Repository: RadeonOpenCompute/ROCR-Runtime
Branch: amd-staging_deprecated
Commit: ba56a24c6132
Files: 652
Total size: 12.5 MB

Directory structure:
gitextract_zn673rxz/

├── .gitignore
├── CMakeLists.txt
├── DEBIAN/
│   ├── Binary/
│   │   ├── postinst.in
│   │   └── prerm.in
│   ├── Dev/
│   │   ├── postinst.in
│   │   └── prerm.in
│   └── preinst
├── LICENSE.txt
├── README.md
├── RPM/
│   ├── Binary/
│   │   ├── post.in
│   │   └── postun.in
│   ├── Dev/
│   │   ├── post.in
│   │   └── postun.in
│   ├── hsa-rocr.spec.in
│   └── preinst
├── _clang-format
├── clang-format-diff.py
├── cmake_modules/
│   └── utils.cmake
├── format
├── libhsakmt/
│   ├── CMakeLists.txt
│   ├── DEBIAN/
│   │   ├── postinst.in
│   │   └── prerm.in
│   ├── LICENSE.md
│   ├── README.md
│   ├── RPM/
│   │   ├── hsakmt-roct-devel.spec.in
│   │   ├── libhsakmt.spec
│   │   ├── post.in
│   │   └── postun.in
│   ├── cmake_modules/
│   │   └── utils.cmake
│   ├── hsakmt-config.cmake.in
│   ├── include/
│   │   └── hsakmt/
│   │       ├── hsakmt.h
│   │       ├── hsakmt_virtio.h
│   │       ├── hsakmtmodel.h
│   │       ├── hsakmtmodeliface.h
│   │       ├── hsakmttypes.h
│   │       └── linux/
│   │           ├── kfd_ioctl.h
│   │           └── udmabuf.h
│   ├── libhsakmt.pc.in
│   ├── src/
│   │   ├── debug.c
│   │   ├── events.c
│   │   ├── fmm.c
│   │   ├── fmm.h
│   │   ├── globals.c
│   │   ├── hsakmtmodel.c
│   │   ├── libhsakmt.c
│   │   ├── libhsakmt.h
│   │   ├── libhsakmt.ver
│   │   ├── memory.c
│   │   ├── openclose.c
│   │   ├── pc_sampling.c
│   │   ├── perfctr.c
│   │   ├── pmc_table.c
│   │   ├── pmc_table.h
│   │   ├── queues.c
│   │   ├── rbtree.c
│   │   ├── rbtree.h
│   │   ├── rbtree_amd.h
│   │   ├── spm.c
│   │   ├── svm.c
│   │   ├── time.c
│   │   ├── topology.c
│   │   ├── version.c
│   │   └── virtio/
│   │       ├── CMakeLists.txt
│   │       ├── hsakmt_virtio_amdgpu.c
│   │       ├── hsakmt_virtio_device.c
│   │       ├── hsakmt_virtio_device.h
│   │       ├── hsakmt_virtio_events.c
│   │       ├── hsakmt_virtio_memory.c
│   │       ├── hsakmt_virtio_openclose.c
│   │       ├── hsakmt_virtio_proto.h
│   │       ├── hsakmt_virtio_queues.c
│   │       ├── hsakmt_virtio_topology.c
│   │       ├── hsakmt_virtio_vm.c
│   │       ├── include/
│   │       │   └── linux/
│   │       │       └── virtgpu_drm.h
│   │       ├── libhsakmt_virtio.ver
│   │       ├── virtio_gpu.c
│   │       └── virtio_gpu.h
│   └── tests/
│       ├── kfdtest/
│       │   ├── .gitignore
│       │   ├── CMakeLists.txt
│       │   ├── LICENSE.kfdtest
│       │   ├── README.txt
│       │   ├── gtest-1.6.0/
│       │   │   ├── gtest/
│       │   │   │   └── gtest.h
│       │   │   └── gtest-all.cpp
│       │   ├── include/
│       │   │   ├── amdp2ptest.h
│       │   │   ├── asic_reg/
│       │   │   │   ├── gfx_7_2_d.h
│       │   │   │   ├── gfx_7_2_enum.h
│       │   │   │   └── gfx_7_2_sh_mask.h
│       │   │   ├── kfd_pm4_opcodes.h
│       │   │   ├── pm4_pkt_struct_ai.h
│       │   │   ├── pm4_pkt_struct_ci.h
│       │   │   ├── pm4_pkt_struct_common.h
│       │   │   ├── pm4_pkt_struct_nv.h
│       │   │   └── sdma_pkt_struct.h
│       │   ├── scripts/
│       │   │   ├── kfdtest.exclude
│       │   │   └── run_kfdtest.sh
│       │   └── src/
│       │       ├── AqlQueue.cpp
│       │       ├── AqlQueue.hpp
│       │       ├── Assemble.cpp
│       │       ├── Assemble.hpp
│       │       ├── BaseDebug.cpp
│       │       ├── BaseDebug.hpp
│       │       ├── BasePacket.cpp
│       │       ├── BasePacket.hpp
│       │       ├── BaseQueue.cpp
│       │       ├── BaseQueue.hpp
│       │       ├── Dispatch.cpp
│       │       ├── Dispatch.hpp
│       │       ├── GoogleTestExtension.cpp
│       │       ├── GoogleTestExtension.hpp
│       │       ├── IndirectBuffer.cpp
│       │       ├── IndirectBuffer.hpp
│       │       ├── KFDASMTest.cpp
│       │       ├── KFDASMTest.hpp
│       │       ├── KFDBaseComponentTest.cpp
│       │       ├── KFDBaseComponentTest.hpp
│       │       ├── KFDCWSRTest.cpp
│       │       ├── KFDCWSRTest.hpp
│       │       ├── KFDDBGTest.cpp
│       │       ├── KFDDBGTest.hpp
│       │       ├── KFDEventTest.cpp
│       │       ├── KFDEventTest.hpp
│       │       ├── KFDEvictTest.cpp
│       │       ├── KFDEvictTest.hpp
│       │       ├── KFDExceptionTest.cpp
│       │       ├── KFDExceptionTest.hpp
│       │       ├── KFDGWSTest.cpp
│       │       ├── KFDGWSTest.hpp
│       │       ├── KFDGraphicsInterop.cpp
│       │       ├── KFDGraphicsInterop.hpp
│       │       ├── KFDHWSTest.cpp
│       │       ├── KFDHWSTest.hpp
│       │       ├── KFDIPCTest.cpp
│       │       ├── KFDIPCTest.hpp
│       │       ├── KFDLocalMemoryTest.cpp
│       │       ├── KFDLocalMemoryTest.hpp
│       │       ├── KFDMemoryTest.cpp
│       │       ├── KFDMemoryTest.hpp
│       │       ├── KFDMultiProcessTest.cpp
│       │       ├── KFDMultiProcessTest.hpp
│       │       ├── KFDNegativeTest.cpp
│       │       ├── KFDNegativeTest.hpp
│       │       ├── KFDOpenCloseKFDTest.cpp
│       │       ├── KFDOpenCloseKFDTest.hpp
│       │       ├── KFDPCSamplingTest.cpp
│       │       ├── KFDPCSamplingTest.hpp
│       │       ├── KFDPMTest.cpp
│       │       ├── KFDPMTest.hpp
│       │       ├── KFDPerfCounters.cpp
│       │       ├── KFDPerfCounters.hpp
│       │       ├── KFDPerformanceTest.cpp
│       │       ├── KFDQMTest.cpp
│       │       ├── KFDQMTest.hpp
│       │       ├── KFDRASTest.cpp
│       │       ├── KFDRASTest.hpp
│       │       ├── KFDSVMEvictTest.cpp
│       │       ├── KFDSVMEvictTest.hpp
│       │       ├── KFDSVMRangeTest.cpp
│       │       ├── KFDSVMRangeTest.hpp
│       │       ├── KFDTestFlags.hpp
│       │       ├── KFDTestMain.cpp
│       │       ├── KFDTestUtil.cpp
│       │       ├── KFDTestUtil.hpp
│       │       ├── KFDTestUtilQueue.cpp
│       │       ├── KFDTestUtilQueue.hpp
│       │       ├── KFDTopologyTest.cpp
│       │       ├── KFDTopologyTest.hpp
│       │       ├── LinuxOSWrapper.cpp
│       │       ├── OSWrapper.hpp
│       │       ├── PM4Packet.cpp
│       │       ├── PM4Packet.hpp
│       │       ├── PM4Queue.cpp
│       │       ├── PM4Queue.hpp
│       │       ├── RDMATest.cpp
│       │       ├── RDMATest.hpp
│       │       ├── RDMAUtil.cpp
│       │       ├── RDMAUtil.hpp
│       │       ├── SDMAPacket.cpp
│       │       ├── SDMAPacket.hpp
│       │       ├── SDMAQueue.cpp
│       │       ├── SDMAQueue.hpp
│       │       ├── SDMAQueueByEngId.hpp
│       │       ├── ShaderStore.cpp
│       │       ├── ShaderStore.hpp
│       │       └── XgmiOptimizedSDMAQueue.hpp
│       ├── rdma/
│       │   └── simple/
│       │       ├── app/
│       │       │   ├── CMakeLists.txt
│       │       │   └── rdma_test.cpp
│       │       └── drv/
│       │           ├── amdp2ptest.c
│       │           └── amdp2ptest.h
│       └── reopen/
│           ├── CMakeLists.txt
│           └── kmtreopen.c
├── rocrtst/
│   ├── .gitignore
│   ├── Kernels/
│   │   ├── CMakeLists.txt
│   │   ├── binary_search_kernel.cl
│   │   ├── read_kernel.cl
│   │   └── write_kernel.cl
│   ├── README.md
│   ├── common/
│   │   ├── base_rocr.cc
│   │   ├── base_rocr.h
│   │   ├── base_rocr_utils.cc
│   │   ├── base_rocr_utils.h
│   │   ├── common.cc
│   │   ├── common.h
│   │   ├── concurrent_utils.cc
│   │   ├── concurrent_utils.h
│   │   ├── helper_funcs.cc
│   │   ├── helper_funcs.h
│   │   ├── hsatimer.cc
│   │   ├── hsatimer.h
│   │   ├── os.cc
│   │   ├── os.h
│   │   ├── rocr.cc
│   │   ├── rocr.h
│   │   └── utils_test/
│   │       ├── CMakeLists.txt
│   │       ├── utils_cpp11_gtest.cpp
│   │       ├── utils_timer_gtest.cpp
│   │       ├── utils_timer_test.cpp
│   │       └── utils_timer_test.hpp
│   ├── gtest/
│   │   ├── CMakeLists.txt
│   │   ├── include/
│   │   │   └── gtest/
│   │   │       ├── gtest-death-test.h
│   │   │       ├── gtest-message.h
│   │   │       ├── gtest-param-test.h
│   │   │       ├── gtest-printers.h
│   │   │       ├── gtest-spi.h
│   │   │       ├── gtest-test-part.h
│   │   │       ├── gtest-typed-test.h
│   │   │       ├── gtest.h
│   │   │       ├── gtest_pred_impl.h
│   │   │       ├── gtest_prod.h
│   │   │       └── internal/
│   │   │           ├── gtest-death-test-internal.h
│   │   │           ├── gtest-filepath.h
│   │   │           ├── gtest-internal.h
│   │   │           ├── gtest-linked_ptr.h
│   │   │           ├── gtest-param-util-generated.h
│   │   │           ├── gtest-param-util-generated.h.pump
│   │   │           ├── gtest-param-util.h
│   │   │           ├── gtest-port.h
│   │   │           ├── gtest-string.h
│   │   │           ├── gtest-tuple.h
│   │   │           ├── gtest-tuple.h.pump
│   │   │           ├── gtest-type-util.h
│   │   │           └── gtest-type-util.h.pump
│   │   └── src/
│   │       ├── gtest-all.cpp
│   │       ├── gtest-death-test.cpp
│   │       ├── gtest-filepath.cpp
│   │       ├── gtest-internal-inl.h
│   │       ├── gtest-port.cpp
│   │       ├── gtest-printers.cpp
│   │       ├── gtest-test-part.cpp
│   │       ├── gtest-typed-test.cpp
│   │       ├── gtest.cpp
│   │       └── gtest_main.cpp
│   ├── samples/
│   │   ├── CMakeLists.txt
│   │   ├── README.txt
│   │   ├── async_mem_copy/
│   │   │   └── async_mem_copy.cc
│   │   ├── binary_search/
│   │   │   ├── binary_search.cc
│   │   │   └── binary_search_kernels.cl
│   │   ├── ipc/
│   │   │   └── ipc.cc
│   │   ├── rocm_async/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── Readme.txt
│   │   │   ├── base_test.cpp
│   │   │   ├── base_test.hpp
│   │   │   ├── common.cpp
│   │   │   ├── common.hpp
│   │   │   ├── hsatimer.cpp
│   │   │   ├── hsatimer.hpp
│   │   │   ├── main.cpp
│   │   │   ├── os.cpp
│   │   │   ├── os.hpp
│   │   │   ├── rocm_async.cpp
│   │   │   ├── rocm_async.hpp
│   │   │   ├── rocm_async_io.cpp
│   │   │   ├── rocm_async_parse.cpp
│   │   │   ├── rocm_async_print.cpp
│   │   │   ├── rocm_async_report.cpp
│   │   │   ├── rocm_async_topology.cpp
│   │   │   ├── rocm_async_trans.cpp
│   │   │   └── rocm_async_validate.cpp
│   │   └── rocrinfo/
│   │       └── rocrinfo.cc
│   ├── suites/
│   │   ├── functional/
│   │   │   ├── agent_props.cc
│   │   │   ├── agent_props.h
│   │   │   ├── aql_barrier_bit.cc
│   │   │   ├── aql_barrier_bit.h
│   │   │   ├── concurrent_init.cc
│   │   │   ├── concurrent_init.h
│   │   │   ├── concurrent_init_shutdown.cc
│   │   │   ├── concurrent_init_shutdown.h
│   │   │   ├── concurrent_shutdown.cc
│   │   │   ├── concurrent_shutdown.h
│   │   │   ├── cu_masking.cc
│   │   │   ├── cu_masking.h
│   │   │   ├── deallocation_notifier.cc
│   │   │   ├── deallocation_notifier.h
│   │   │   ├── debug_basic.cc
│   │   │   ├── debug_basic.h
│   │   │   ├── ipc.cc
│   │   │   ├── ipc.h
│   │   │   ├── memory_access.cc
│   │   │   ├── memory_access.h
│   │   │   ├── memory_alignment.cc
│   │   │   ├── memory_alignment.h
│   │   │   ├── memory_allocation.cc
│   │   │   ├── memory_allocation.h
│   │   │   ├── memory_atomics.cc
│   │   │   ├── memory_atomics.h
│   │   │   ├── memory_basic.cc
│   │   │   ├── memory_basic.h
│   │   │   ├── reference_count.cc
│   │   │   ├── reference_count.h
│   │   │   ├── signal_concurrent.cc
│   │   │   ├── signal_concurrent.h
│   │   │   ├── signal_kernel.cc
│   │   │   ├── signal_kernel.h
│   │   │   ├── virtual_memory.cc
│   │   │   └── virtual_memory.h
│   │   ├── negative/
│   │   │   ├── memory_allocate_negative_tests.cc
│   │   │   ├── memory_allocate_negative_tests.h
│   │   │   ├── queue_validation.cc
│   │   │   └── queue_validation.h
│   │   ├── performance/
│   │   │   ├── dispatch_time.cc
│   │   │   ├── dispatch_time.h
│   │   │   ├── enqueueLatency.cc
│   │   │   ├── enqueueLatency.h
│   │   │   ├── memory_async_copy.cc
│   │   │   ├── memory_async_copy.h
│   │   │   ├── memory_async_copy_numa.cc
│   │   │   └── memory_async_copy_numa.h
│   │   ├── stress/
│   │   │   ├── memory_concurrent_tests.cc
│   │   │   ├── memory_concurrent_tests.h
│   │   │   ├── queue_write_index_concurrent_tests.cc
│   │   │   └── queue_write_index_concurrent_tests.h
│   │   └── test_common/
│   │       ├── CMakeLists.txt
│   │       ├── kernels/
│   │       │   ├── atomicOperations_kernels.cl
│   │       │   ├── cu_mask_kernels.cl
│   │       │   ├── dispatch_time_kernels.cl
│   │       │   ├── gpuReadWrite_kernels.cl
│   │       │   ├── groupMemoryDynamic_kernels.cl
│   │       │   ├── signal_operations.cl
│   │       │   ├── test_case_template_kernels.cl
│   │       │   ├── vector_add_debug_trap_kernel.cl
│   │       │   └── vector_add_memory_fault_kernel.cl
│   │       ├── main.cc
│   │       ├── main.h
│   │       ├── test_base.cc
│   │       ├── test_base.h
│   │       ├── test_case_template.cc
│   │       ├── test_case_template.h
│   │       ├── test_common.cc
│   │       └── test_common.h
│   └── thirdparty/
│       ├── include/
│       │   ├── LICENSE
│       │   ├── hwloc/
│       │   │   ├── autogen/
│       │   │   │   └── config.h
│       │   │   ├── bitmap.h
│       │   │   ├── cpuset.h
│       │   │   ├── cuda.h
│       │   │   ├── cudart.h
│       │   │   ├── deprecated.h
│       │   │   ├── diff.h
│       │   │   ├── gl.h
│       │   │   ├── glibc-sched.h
│       │   │   ├── helper.h
│       │   │   ├── inlines.h
│       │   │   ├── intel-mic.h
│       │   │   ├── linux-libnuma.h
│       │   │   ├── linux.h
│       │   │   ├── myriexpress.h
│       │   │   ├── nvml.h
│       │   │   ├── opencl.h
│       │   │   ├── openfabrics-verbs.h
│       │   │   ├── plugins.h
│       │   │   └── rename.h
│       │   └── hwloc.h
│       └── lib/
│           ├── LICENSE
│           └── libhwloc.so.5
├── runtime/
│   ├── cmake_modules/
│   │   ├── COPYING-CMAKE-SCRIPTS
│   │   └── FindLibElf.cmake
│   ├── docs/
│   │   ├── api-reference/
│   │   │   ├── api.rst
│   │   │   ├── c-interface-adaptors.rst
│   │   │   └── environment_variables.rst
│   │   ├── conf.py
│   │   ├── contribution/
│   │   │   └── contributing-to-rocr.rst
│   │   ├── data/
│   │   │   └── env_variables.rst
│   │   ├── index.rst
│   │   ├── install/
│   │   │   └── installation.rst
│   │   ├── license.rst
│   │   ├── sphinx/
│   │   │   ├── _toc.yml.in
│   │   │   ├── requirements.in
│   │   │   └── requirements.txt
│   │   └── what-is-rocr-runtime.rst
│   ├── hsa-ext-finalize/
│   │   └── CMakeLists.txt
│   ├── hsa-ext-image/
│   │   └── CMakeLists.txt
│   ├── hsa-runtime/
│   │   ├── CMakeLists.txt
│   │   ├── LICENSE.md
│   │   ├── cmake_modules/
│   │   │   ├── COPYING-CMAKE-SCRIPTS
│   │   │   ├── FindLibElf.cmake
│   │   │   ├── hsa_common.cmake
│   │   │   └── utils.cmake
│   │   ├── core/
│   │   │   ├── common/
│   │   │   │   ├── hsa_table_interface.cpp
│   │   │   │   └── shared.h
│   │   │   ├── driver/
│   │   │   │   ├── driver.cpp
│   │   │   │   ├── kfd/
│   │   │   │   │   └── amd_kfd_driver.cpp
│   │   │   │   ├── virtio/
│   │   │   │   │   └── amd_kfd_virtio_driver.cpp
│   │   │   │   └── xdna/
│   │   │   │       ├── amd_xdna_driver.cpp
│   │   │   │       └── uapi/
│   │   │   │           └── amdxdna_accel.h
│   │   │   ├── inc/
│   │   │   │   ├── agent.h
│   │   │   │   ├── amd_aie_agent.h
│   │   │   │   ├── amd_aie_aql_queue.h
│   │   │   │   ├── amd_aql_queue.h
│   │   │   │   ├── amd_available_drivers.h
│   │   │   │   ├── amd_blit_kernel.h
│   │   │   │   ├── amd_blit_sdma.h
│   │   │   │   ├── amd_blit_shaders.h
│   │   │   │   ├── amd_core_dump.hpp
│   │   │   │   ├── amd_cpu_agent.h
│   │   │   │   ├── amd_elf_image.hpp
│   │   │   │   ├── amd_filter_device.h
│   │   │   │   ├── amd_gpu_agent.h
│   │   │   │   ├── amd_gpu_pm4.h
│   │   │   │   ├── amd_hsa_code.hpp
│   │   │   │   ├── amd_hsa_loader.hpp
│   │   │   │   ├── amd_kfd_driver.h
│   │   │   │   ├── amd_loader_context.hpp
│   │   │   │   ├── amd_memory_region.h
│   │   │   │   ├── amd_topology.h
│   │   │   │   ├── amd_trap_handler_v1.h
│   │   │   │   ├── amd_virtio_driver.h
│   │   │   │   ├── amd_xdna_driver.h
│   │   │   │   ├── blit.h
│   │   │   │   ├── cache.h
│   │   │   │   ├── checked.h
│   │   │   │   ├── default_signal.h
│   │   │   │   ├── driver.h
│   │   │   │   ├── exceptions.h
│   │   │   │   ├── host_queue.h
│   │   │   │   ├── hsa_amd_tool_int.hpp
│   │   │   │   ├── hsa_api_trace_int.h
│   │   │   │   ├── hsa_ext_amd_impl.h
│   │   │   │   ├── hsa_ext_interface.h
│   │   │   │   ├── hsa_internal.h
│   │   │   │   ├── hsa_table_interface.h
│   │   │   │   ├── hsa_ven_amd_loader_impl.h
│   │   │   │   ├── intercept_queue.h
│   │   │   │   ├── interrupt_signal.h
│   │   │   │   ├── ipc_signal.h
│   │   │   │   ├── isa.h
│   │   │   │   ├── memory_region.h
│   │   │   │   ├── queue.h
│   │   │   │   ├── registers.h
│   │   │   │   ├── runtime.h
│   │   │   │   ├── scratch_cache.h
│   │   │   │   ├── sdma_registers.h
│   │   │   │   ├── signal.h
│   │   │   │   ├── svm_profiler.h
│   │   │   │   └── thunk_loader.h
│   │   │   ├── runtime/
│   │   │   │   ├── amd_aie_agent.cpp
│   │   │   │   ├── amd_aie_aql_queue.cpp
│   │   │   │   ├── amd_aql_queue.cpp
│   │   │   │   ├── amd_blit_kernel.cpp
│   │   │   │   ├── amd_blit_sdma.cpp
│   │   │   │   ├── amd_cpu_agent.cpp
│   │   │   │   ├── amd_filter_device.cpp
│   │   │   │   ├── amd_gpu_agent.cpp
│   │   │   │   ├── amd_hsa_loader.cpp
│   │   │   │   ├── amd_loader_context.cpp
│   │   │   │   ├── amd_memory_region.cpp
│   │   │   │   ├── amd_topology.cpp
│   │   │   │   ├── blit_shaders/
│   │   │   │   │   ├── CMakeLists.txt
│   │   │   │   │   ├── blit_copyAligned.s
│   │   │   │   │   ├── blit_copyMisaligned.s
│   │   │   │   │   ├── blit_fill.s
│   │   │   │   │   └── create_blit_shader_header.sh
│   │   │   │   ├── cache.cpp
│   │   │   │   ├── default_signal.cpp
│   │   │   │   ├── host_queue.cpp
│   │   │   │   ├── hsa.cpp
│   │   │   │   ├── hsa_api_trace.cpp
│   │   │   │   ├── hsa_ext_amd.cpp
│   │   │   │   ├── hsa_ext_interface.cpp
│   │   │   │   ├── hsa_ven_amd_loader.cpp
│   │   │   │   ├── intercept_queue.cpp
│   │   │   │   ├── interrupt_signal.cpp
│   │   │   │   ├── ipc_signal.cpp
│   │   │   │   ├── isa.cpp
│   │   │   │   ├── queue.cpp
│   │   │   │   ├── runtime.cpp
│   │   │   │   ├── signal.cpp
│   │   │   │   ├── svm_profiler.cpp
│   │   │   │   ├── thunk_loader.cpp
│   │   │   │   └── trap_handler/
│   │   │   │       ├── CMakeLists.txt
│   │   │   │       ├── create_trap_handler_header.sh
│   │   │   │       ├── trap_handler.s
│   │   │   │       └── trap_handler_gfx12.s
│   │   │   └── util/
│   │   │       ├── atomic_helpers.h
│   │   │       ├── flag.cpp
│   │   │       ├── flag.h
│   │   │       ├── lazy_ptr.h
│   │   │       ├── lnx/
│   │   │       │   └── os_linux.cpp
│   │   │       ├── locks.h
│   │   │       ├── memory.h
│   │   │       ├── os.h
│   │   │       ├── simple_heap.h
│   │   │       ├── small_heap.cpp
│   │   │       ├── small_heap.h
│   │   │       ├── timer.cpp
│   │   │       ├── timer.h
│   │   │       ├── utils.h
│   │   │       └── win/
│   │   │           └── os_win.cpp
│   │   ├── hsa-runtime64-config.cmake.in
│   │   ├── hsacore.so.def
│   │   ├── hsacore.so.link
│   │   ├── image/
│   │   │   ├── addrlib/
│   │   │   │   ├── inc/
│   │   │   │   │   ├── addrinterface.h
│   │   │   │   │   └── addrtypes.h
│   │   │   │   └── src/
│   │   │   │       ├── addrinterface.cpp
│   │   │   │       ├── amdgpu_asic_addr.h
│   │   │   │       ├── chip/
│   │   │   │       │   ├── gfx10/
│   │   │   │       │   │   └── gfx10_gb_reg.h
│   │   │   │       │   ├── gfx11/
│   │   │   │       │   │   └── gfx11_gb_reg.h
│   │   │   │       │   ├── gfx12/
│   │   │   │       │   │   └── gfx12_gb_reg.h
│   │   │   │       │   ├── gfx9/
│   │   │   │       │   │   └── gfx9_gb_reg.h
│   │   │   │       │   └── r800/
│   │   │   │       │       └── si_gb_reg.h
│   │   │   │       ├── core/
│   │   │   │       │   ├── addrcommon.h
│   │   │   │       │   ├── addrelemlib.cpp
│   │   │   │       │   ├── addrelemlib.h
│   │   │   │       │   ├── addrlib.cpp
│   │   │   │       │   ├── addrlib.h
│   │   │   │       │   ├── addrlib1.cpp
│   │   │   │       │   ├── addrlib1.h
│   │   │   │       │   ├── addrlib2.cpp
│   │   │   │       │   ├── addrlib2.h
│   │   │   │       │   ├── addrlib3.cpp
│   │   │   │       │   ├── addrlib3.h
│   │   │   │       │   ├── addrobject.cpp
│   │   │   │       │   ├── addrobject.h
│   │   │   │       │   ├── coord.cpp
│   │   │   │       │   └── coord.h
│   │   │   │       ├── gfx10/
│   │   │   │       │   ├── gfx10SwizzlePattern.h
│   │   │   │       │   ├── gfx10addrlib.cpp
│   │   │   │       │   └── gfx10addrlib.h
│   │   │   │       ├── gfx11/
│   │   │   │       │   ├── gfx11SwizzlePattern.h
│   │   │   │       │   ├── gfx11addrlib.cpp
│   │   │   │       │   └── gfx11addrlib.h
│   │   │   │       ├── gfx12/
│   │   │   │       │   ├── gfx12SwizzlePattern.h
│   │   │   │       │   ├── gfx12addrlib.cpp
│   │   │   │       │   └── gfx12addrlib.h
│   │   │   │       └── gfx9/
│   │   │   │           ├── gfx9addrlib.cpp
│   │   │   │           └── gfx9addrlib.h
│   │   │   ├── blit_kernel.cpp
│   │   │   ├── blit_kernel.h
│   │   │   ├── blit_object_gfx7xx.cpp
│   │   │   ├── blit_object_gfx8xx.cpp
│   │   │   ├── blit_object_gfx9xx.cpp
│   │   │   ├── blit_src/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── README.md
│   │   │   │   ├── create_hsaco_ascii_file.sh
│   │   │   │   └── imageblit_kernels.cl
│   │   │   ├── device_info.cpp
│   │   │   ├── device_info.h
│   │   │   ├── hsa_ext_image.cpp
│   │   │   ├── image_lut.h
│   │   │   ├── image_lut_gfx11.cpp
│   │   │   ├── image_lut_gfx11.h
│   │   │   ├── image_lut_kv.cpp
│   │   │   ├── image_lut_kv.h
│   │   │   ├── image_manager.cpp
│   │   │   ├── image_manager.h
│   │   │   ├── image_manager_ai.cpp
│   │   │   ├── image_manager_ai.h
│   │   │   ├── image_manager_gfx11.cpp
│   │   │   ├── image_manager_gfx11.h
│   │   │   ├── image_manager_gfx12.cpp
│   │   │   ├── image_manager_gfx12.h
│   │   │   ├── image_manager_kv.cpp
│   │   │   ├── image_manager_kv.h
│   │   │   ├── image_manager_nv.cpp
│   │   │   ├── image_manager_nv.h
│   │   │   ├── image_runtime.cpp
│   │   │   ├── image_runtime.h
│   │   │   ├── inc/
│   │   │   │   └── hsa_ext_image_impl.h
│   │   │   ├── resource.h
│   │   │   ├── resource_ai.h
│   │   │   ├── resource_gfx11.h
│   │   │   ├── resource_gfx12.h
│   │   │   ├── resource_kv.h
│   │   │   ├── resource_nv.h
│   │   │   └── util.h
│   │   ├── inc/
│   │   │   ├── Brig.h
│   │   │   ├── amd_hsa_common.h
│   │   │   ├── amd_hsa_elf.h
│   │   │   ├── amd_hsa_kernel_code.h
│   │   │   ├── amd_hsa_queue.h
│   │   │   ├── amd_hsa_signal.h
│   │   │   ├── hsa.h
│   │   │   ├── hsa_amd_tool.h
│   │   │   ├── hsa_api_trace.h
│   │   │   ├── hsa_api_trace_version.h
│   │   │   ├── hsa_ext_amd.h
│   │   │   ├── hsa_ext_finalize.h
│   │   │   ├── hsa_ext_image.h
│   │   │   ├── hsa_ven_amd_aqlprofile.h
│   │   │   ├── hsa_ven_amd_loader.h
│   │   │   └── hsa_ven_amd_pc_sampling.h
│   │   ├── libamdhsacode/
│   │   │   ├── amd_core_dump.cpp
│   │   │   ├── amd_elf_image.cpp
│   │   │   ├── amd_hsa_code.cpp
│   │   │   ├── amd_hsa_code_util.cpp
│   │   │   ├── amd_hsa_code_util.hpp
│   │   │   ├── amd_hsa_locks.cpp
│   │   │   ├── amd_hsa_locks.hpp
│   │   │   ├── amd_options.cpp
│   │   │   └── amd_options.hpp
│   │   ├── loader/
│   │   │   ├── AMDHSAKernelDescriptor.h
│   │   │   ├── executable.cpp
│   │   │   └── executable.hpp
│   │   └── pcs/
│   │       ├── hsa_ven_amd_pc_sampling.cpp
│   │       ├── inc/
│   │       │   └── hsa_ven_amd_pc_sampling_impl.h
│   │       ├── pcs_runtime.cpp
│   │       └── pcs_runtime.h
│   ├── hsa-runtime-tools/
│   │   └── CMakeLists.txt
│   └── packages/
│       ├── hsa-ext-rocr-dev/
│       │   ├── CMakeLists.txt
│       │   ├── Old CMakeLists.txt
│       │   ├── copyright
│       │   ├── description
│       │   ├── postinst
│       │   ├── prerm
│       │   ├── rpm_post
│       │   └── rpm_postun
│       └── rocr_tools_legacy/
│           ├── CMakeLists.txt
│           ├── copyright
│           ├── description
│           ├── postinst
│           ├── prerm
│           ├── rpm_post
│           └── rpm_postun
└── samples/
    ├── GetInfo/
    │   ├── get_info.cpp
    │   └── get_info.h
    └── common/
        ├── common.cpp
        ├── common.hpp
        ├── common_utility.cpp
        ├── common_utility.h
        ├── helper_funcs.cpp
        ├── helper_funcs.hpp
        ├── hsa_base_util.cpp
        ├── hsa_base_util.h
        ├── hsa_perf_cntrs.cpp
        ├── hsa_perf_cntrs.hpp
        ├── hsa_rsrc_factory.cpp
        ├── hsa_rsrc_factory.hpp
        ├── hsa_test.cpp
        ├── hsa_test.h
        ├── hsatimer.cpp
        ├── hsatimer.h
        ├── os.cpp
        ├── os.h
        ├── utilities.cpp
        └── utilities.h

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.*

#
# git files that we don't want to ignore even it they are dot-files
#
!.gitignore
!.mailmap
.github*
patches-*
build/
outgoing/
Makefile

# documentation artifacts
_build/
_doxygen/
_images/
_static/
_templates/
_toc.yml
doxygen


================================================
FILE: CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################


cmake_minimum_required(VERSION 3.7)

# Set the project name
project("rocr")

set(CMAKE_VERBOSE_MAKEFILE ON)
## Expose static library option
if ( NOT DEFINED BUILD_SHARED_LIBS )
  set ( BUILD_SHARED_LIBS ON )
endif()
set ( BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS} CACHE BOOL "Build shared library (.so) or not.")

if (NOT DEFINED BUILD_ROCR)
  set(BUILD_ROCR ON)
endif()

function(add_rocm_subdir subdir subdir_assigns)
    message("add_rocm_subdir() -- " ${subdir})
    # message("  subdir_assigns before:" ${subdir_assigns} "EOM")
    string(STRIP "${subdir_assigns}" subdir_assigns)
    message("  subdir_assigns:" ${subdir_assigns} "EOM")

    # if the subdir_assigns is defined and  non-empty, then..

    if(NOT "${subdir_assigns}" STREQUAL "")
        foreach(assignment IN LISTS subdir_assigns)
            # The format of each var should be VARNAME=VALUE
            message("assignment: " ${assignment})
            string(REPLACE "=" ";" pair ${assignment})
            list(GET pair 0 var_name)
            list(GET pair 1 var_value)

            # Set variable locally for this function and for the subdirectory
            set(${var_name} "${var_value}")
            message("The value of ${var_name} is: ${${var_name}}")
        endforeach()
    endif()
    add_subdirectory(${subdir})
endfunction()

list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
include(utils)


## Get version strings
get_version("1.18.0")
if (${ROCM_PATCH_VERSION})
  set(VERSION_PATCH ${ROCM_PATCH_VERSION})
endif()
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
set(PACKAGE_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_COMMIT_COUNT}")

if (NOT DEFINED BUILD_SHARED_LIBS)
	set(BUILD_SHARED_LIBS ON)
endif()

# Set hsa pkg dependency with rocprofiler-register package
# for Shared Library Only.
if (BUILD_SHARED_LIBS)
  set(HSA_DEP_ROCPROFILER_REGISTER ON CACHE INTERNAL "")
endif()

if (HSA_DEP_ROCPROFILER_REGISTER)
  string(APPEND CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS ", rocprofiler-register")
  string(APPEND CPACK_RPM_BINARY_PACKAGE_REQUIRES " rocprofiler-register")
endif()

if (NOT DEFINED BUILD_THUNK_VIRTIO)
  set(BUILD_THUNK_VIRTIO OFF)
endif()

add_rocm_subdir(libhsakmt "${THUNK_DEFINITIONS}")
set_target_properties(hsakmt PROPERTIES
                                ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/libhsakmt/archive"
                                LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/libhsakmt/lib"
                                RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/libhsakmt/runtime")

if (BUILD_THUNK_VIRTIO)
  add_rocm_subdir(libhsakmt/src/virtio "${THUNK_VIRTIO_DEFINITIONS}")
endif()

if (BUILD_ROCR)
  add_rocm_subdir(runtime/hsa-runtime "${ROCR_DEFINITIONS}")
  set_target_properties(hsa-runtime64 PROPERTIES
                               ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/rocr/archive"
                               LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/rocr/lib"
                               RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/rocr/runtime")

  if (BUILD_SHARED_LIBS)
    add_dependencies(hsa-runtime64 hsakmt)
    if (BUILD_THUNK_VIRTIO)
      add_dependencies(hsa-runtime64 hsakmt_virtio)
    endif()
  else()
    add_dependencies(hsa-runtime64 hsakmt-staticdrm)
  endif()
endif()

# Optionally record the package's find module in the user's package cache.
if ( NOT DEFINED EXPORT_TO_USER_PACKAGE_REGISTRY )
  set ( EXPORT_TO_USER_PACKAGE_REGISTRY "off")
endif()
set ( EXPORT_TO_USER_PACKAGE_REGISTRY ${EXPORT_TO_USER_PACKAGE_REGISTRY} CACHE BOOL "Add cmake package config location to the user's cmake package registry.")
if(${EXPORT_TO_USER_PACKAGE_REGISTRY})
  # Enable writing to the registry
  set(CMAKE_EXPORT_PACKAGE_REGISTRY ON)
  # Generate a target file for the build
  export(TARGETS ${CORE_RUNTIME_NAME} NAMESPACE ${CORE_RUNTIME_NAME}:: FILE ${CORE_RUNTIME_NAME}Targets.cmake)
  # Record the package in the user's cache.
  export(PACKAGE ${CORE_RUNTIME_NAME})
endif()

## Packaging directives
set(CPACK_VERBOSE 1)
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Package types to build")
set(ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")

# From libhsakmt:
set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}"  CACHE STRING "Default packaging prefix.")

if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX)
  set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX} ${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}")
endif()

# ASAN Package will have libraries and license file
if (ENABLE_ASAN_PACKAGING)
  # ASAN Package requires only asan component with libraries and license file
  set(CPACK_COMPONENTS_ALL asan)
else()
  set(CPACK_COMPONENTS_ALL binary dev)
endif()
set(CPACK_DEB_COMPONENT_INSTALL ON)
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION_STRING})
set(CPACK_PACKAGE_CONTACT "AMD HSA Support <dl.HSA-Runtime-Support@amd.com>")
set(CPACK_COMPONENT_DESCRIPTION "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime for Boltzmann (ROCm) platforms\nIncludes HSAKMT, the user-mode API interfaces used to interact with the ROCk driver.\n Contains the headers, pkgonfig and\n cmake files for ROCT.")
set(CPACK_COMPONENT_BINARY_DESCRIPTION "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime for Boltzmann (ROCm) platforms")
set(CPACK_COMPONENT_DEV_DESCRIPTION "AMD Heterogeneous System Architecture HSA development package.\n This package contains the headers and cmake files for the rocr-runtime package.")
set(CPACK_COMPONENT_ASAN_DESCRIPTION "AMD Heterogeneous System Architecture HSA - Linux HSA instrumented libraries for Boltzmann (ROCm) platforms")

if (DEFINED ENV{ROCM_LIBPATCH_VERSION})
  set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
  message("Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}")
endif()

# Debian package specific variables
set(CPACK_DEBIAN_BINARY_PACKAGE_NAME "hsa-rocr")
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "hsa-rocr-dev")
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "hsa-rocr-asan")
if (DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
  set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
else()
  set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
endif()
message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}")
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/ROCR-Runtime")

## Process the Debian install/remove scripts to update the CPACK variables
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/Binary/postinst.in DEBIAN/Binary/postinst @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/Binary/prerm.in DEBIAN/Binary/prerm @ONLY)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/preinst DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/DEBIAN)
set (CPACK_DEBIAN_BINARY_PACKAGE_CONTROL_EXTRA "DEBIAN/preinst;DEBIAN/Binary/postinst;DEBIAN/Binary/prerm")
# Needed since some packages still say they need hsakmt-roct
set(CPACK_DEBIAN_DEV_PACKAGE_REPLACES "hsakmt-roct,hsakmt-roct-dev,hsa-ext-rocr-dev")
set(CPACK_DEBIAN_DEV_PACKAGE_PROVIDES "hsakmt-roct,hsakmt-roct-dev,hsa-ext-rocr-dev")
#TODO: hsa-ext-rocr-dev can be added to conflicts list and remove CPACK_DEBIAN_DEV_PACKAGE_BREAKS
set(CPACK_DEBIAN_DEV_PACKAGE_CONFLICTS "hsakmt-roct,hsakmt-roct-dev")
# package dependencies
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core")
set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev")
# Setting devel package dependendent version
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core, hsa-rocr")

set(CPACK_DEBIAN_DEV_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev")

set(CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS "libdrm-amdgpu-amdgpu1 | libdrm-amdgpu1, libnuma1, libelf1")
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core-asan, libdrm-amdgpu-amdgpu1 | libdrm-amdgpu1, libnuma1, libelf1")
set(CPACK_DEBIAN_ASAN_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev")

set(CPACK_DEBIAN_BINARY_PACKAGE_RECOMMENDS "libdrm-amdgpu-amdgpu1")
if (ROCM_DEP_ROCMCORE)
  string(APPEND CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS ", rocm-core")
  string(APPEND CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ", rocm-core-asan")
endif()
if (HSA_DEP_ROCPROFILER_REGISTER)
  string(APPEND CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS ", rocprofiler-register")
endif()
# Declare package relationships (hsa-ext-rocr-dev is a legacy package that we subsume)
set(CPACK_DEBIAN_DEV_PACKAGE_BREAKS "hsa-ext-rocr-dev")

# RPM package specific variables
set(EL7_DISTRO "FALSE")
Checksetel7(EL7_DISTRO)
set(CPACK_RPM_BINARY_PACKAGE_NAME "hsa-rocr")
# Since we changed the package name to match RPM specs, take care of older builds that had -dev installed
# Also cover the fact that this now replaces the old binary package hsakmt-roct
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "hsakmt-roct,hsakmt-roct-devel,hsakmt-roct-dev,hsa-ext-rocr-dev")
set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "hsakmt-roct,hsakmt-roct-devel,hsakmt-roct-dev,hsa-ext-rocr-dev")

set(CPACK_RPM_DEV_PACKAGE_NAME "hsa-rocr-devel")
set(CPACK_RPM_ASAN_PACKAGE_NAME "hsa-rocr-asan")
if (DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
  set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
else()
  set(CPACK_RPM_PACKAGE_RELEASE "local")
endif()

string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
message("CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}")
set(CPACK_RPM_PACKAGE_LICENSE "NCSA")

## Process the Rpm install/remove scripts to update the CPACK variables
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/Binary/post.in" RPM/Binary/post @ONLY)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/Binary/postun.in" RPM/Binary/postun @ONLY)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/RPM/preinst DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/RPM)
set (CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/preinst")

set(CPACK_RPM_BINARY_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/Binary/post")
set(CPACK_RPM_BINARY_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/Binary/postun")

# package dependencies
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core , hsa-rocr")

#
if (${EL7_DISTRO} STREQUAL "TRUE")
  set(CPACK_RPM_BINARY_PACKAGE_REQUIRES "libdrm-amdgpu, numactl-libs")
  set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "libdrm-amdgpu, numactl-libs, libdrm-amdgpu-devel")
  set(CPACK_RPM_PACKAGE_REQUIRES "libdrm-amdgpu-devel")
  string(APPEND CPACK_RPM_DEV_PACKAGE_REQUIRES ", libdrm-amdgpu-devel")
else()
  set(CPACK_RPM_BINARY_PACKAGE_REQUIRES "(libdrm-amdgpu or libdrm or libdrm_amdgpu1), (libnuma1 or numactl-libs)")
  set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "(libdrm-amdgpu or libdrm or libdrm_amdgpu1), (libnuma1 or numactl-libs), (libdrm-amdgpu-devel or libdrm-devel)")
  set(CPACK_RPM_USER_BINARY_SPECFILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/hsa-rocr.spec.in")
  set(CPACK_RPM_PACKAGE_RECOMMENDS "libdrm-amdgpu, libdrm-amdgpu-devel")

  set(CPACK_RPM_PACKAGE_REQUIRES "(libdrm-amdgpu-devel or libdrm-devel)")
  string(APPEND CPACK_RPM_DEV_PACKAGE_REQUIRES ", (libdrm-amdgpu-devel or libdrm-devel)")
  set(CPACK_RPM_DEV_PACKAGE_RECOMMENDS "libdrm-amdgpu-devel")
  set(CPACK_RPM_ASAN_PACKAGE_RECOMMENDS "libdrm-amdgpu-devel")

endif()

if (ROCM_DEP_ROCMCORE)
  string(APPEND CPACK_RPM_BINARY_PACKAGE_REQUIRES " rocm-core")
  string(APPEND CPACK_RPM_ASAN_PACKAGE_REQUIRES " rocm-core-asan")
else()
  string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
  string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
  string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
  string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
  string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES})
  string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS})
endif()
if (HSA_DEP_ROCPROFILER_REGISTER)
  string(APPEND CPACK_RPM_BINARY_PACKAGE_REQUIRES " rocprofiler-register")
endif()

if(NOT BUILD_SHARED_LIBS)
  # Suffix package name with static
  set(CPACK_RPM_STATIC_PACKAGE_NAME "hsa-rocr-static-devel")
  set(CPACK_DEBIAN_STATIC_PACKAGE_NAME "hsa-rocr-static-dev")
  set(CPACK_COMPONENT_STATIC_DESCRIPTION "HSA (Heterogenous System Architecture) core runtime - Linux static libraries")
  set(CPACK_RPM_STATIC_PACKAGE_REQUIRES "${CPACK_RPM_BINARY_PACKAGE_REQUIRES}")
  set(CPACK_DEBIAN_STATIC_PACKAGE_DEPENDS "${CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS}")
endif()

## Include packaging
include(CPack)

# static package generation
# Group binary and dev component to single package
if(NOT BUILD_SHARED_LIBS)
    cpack_add_component_group("static")
    cpack_add_component(binary  GROUP static)
    cpack_add_component(dev GROUP static)
endif()

cpack_add_component(asan
  DISPLAY_NAME "ASAN"
  DESCRIPTION "ASAN libraries for rocr-runtime")


================================================
FILE: DEBIAN/Binary/postinst.in
================================================
#!/bin/bash

################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

set -e

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
do_ldconfig() {
  if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
    echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/rocr-runtime.conf
    ldconfig
  fi
}

case "$1" in
   ( configure )
       do_ldconfig
   ;;
  ( abort-upgrade | abort-remove | abort-deconfigure )
    echo "$1"
  ;;
   ( * )
       exit 0
   ;;
esac


================================================
FILE: DEBIAN/Binary/prerm.in
================================================
#!/bin/bash

################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

set -e

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
rm_ldconfig() {
  if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
    rm -f /etc/ld.so.conf.d/rocr-runtime.conf
    ldconfig
  fi
}

case "$1" in
   ( remove | upgrade)
       rm_ldconfig
   ;;
  ( purge )
  ;;
   ( * )
       exit 0
   ;;
esac


================================================
FILE: DEBIAN/Dev/postinst.in
================================================
#!/bin/bash

################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

set -e

case "$1" in
   ( configure )
       # Workaround for CPACK directory symlink handling error.
       mkdir -p @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/include
       ln -sf ../../@CMAKE_INSTALL_INCLUDEDIR@/hsa @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/include/hsa
   ;;
   ( * )
       exit 0
   ;;
esac


================================================
FILE: DEBIAN/Dev/prerm.in
================================================
#!/bin/bash

################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

set -e

case "$1" in
   ( remove | upgrade )
       # Workaround for CPACK directory symlink handling error.
       # Needed for remove and upgrade scenarios since
       # upgrade installs to new folder and old folders need to be cleaned
       rm -rf @CPACK_PACKAGING_INSTALL_PREFIX@/hsa
   ;;
   ( * )
       exit 0
   ;;
esac


================================================
FILE: DEBIAN/preinst
================================================
#!/bin/bash

echo "Pre-install check for ROCr."

# Check for old installations...
if ls /usr/lib/libhsa-runtime* 1> /dev/null 2>&1; then
  echo "An old version of libhsa-runtime was found in /usr/lib."
  echo "This must be uninstalled before proceeding with the installation"
  echo "to avoid potential incompatibilities."

  read -r -p "Do you want to uninstall the old version? [y/N] " response
  if [ "$response" = "y" ]; then
    if ! rm -rf /usr/lib/libhsa-runtime*; then
      echo "Failed to remove /usr/lib/libhsa-runtime* files."
      echo "Try to uninstall these files manually."
      exit 1
    fi
    echo "Old version uninstalled."
  else
    echo "The old and new versions of ROCm are incompatible. Installation aborted."
    exit 1
  fi
fi


================================================
FILE: LICENSE.txt
================================================
The University of Illinois/NCSA
Open Source License (NCSA)

Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.

Developed by:

                AMD Research and AMD HSA Software Development

                Advanced Micro Devices, Inc.

                www.amd.com

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal with the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

 - Redistributions of source code must retain the above copyright notice,
   this list of conditions and the following disclaimers.
 - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimers in
   the documentation and/or other materials provided with the distribution.
 - Neither the names of Advanced Micro Devices, Inc,
   nor the names of its contributors may be used to endorse or promote
   products derived from this Software without specific prior written
   permission.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS WITH THE SOFTWARE.


================================================
FILE: README.md
================================================
# ROCR Runtime

> [!CAUTION]
> The ROCR-Runtime repository is retired, please use the [ROCm/rocm-systems](https://github.com/ROCm/rocm-systems/tree/develop/projects/rocr-runtime) repository

This ROCm Runtime (ROCr) repo combines 2 previously separate repos into a single repo:
- The HSA Runtime (`hsa-runtime`) for AMD GPU application development and
- The ROCt Thunk Library (`libhsakmt`), a "thunk" interface to the ROCm kernel driver (ROCk), used by the runtime.


================================================
FILE: RPM/Binary/post.in
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2016-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
  echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/hsa-rocr.conf
  ldconfig
fi


================================================
FILE: RPM/Binary/postun.in
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2016-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

# left-hand term originates from @ENABLE_LDCONFIG@ = ON/OFF at package build
if [ $1 -le 1 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
    # perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
    rm -f /etc/ld.so.conf.d/hsa-rocr.conf
    ldconfig
fi


================================================
FILE: RPM/Dev/post.in
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2016-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

# Workaround for CPACK directory symlink handling error.
mkdir -p @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/include
ln -sf ../../@CMAKE_INSTALL_INCLUDEDIR@/hsa @CPACK_PACKAGING_INSTALL_PREFIX@/hsa/include/hsa


================================================
FILE: RPM/Dev/postun.in
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2016-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

if [ $1 -le 1 ]; then
  # Workaround for CPACK directory symlink handling error.
  # Needed for uninstall and upgrade scenarios since
  # upgrade install to new folder and old folders need to be cleaned
  rm -rf @CPACK_PACKAGING_INSTALL_PREFIX@/hsa
fi


================================================
FILE: RPM/hsa-rocr.spec.in
================================================
# Restore old style debuginfo creation for rpm >= 4.14.
%undefine _debugsource_packages
%undefine _debuginfo_subpackages

# -*- rpm-spec -*-
BuildRoot:      %_topdir/@CPACK_PACKAGE_FILE_NAME@@CPACK_RPM_PACKAGE_COMPONENT_PART_PATH@
Summary:        @CPACK_RPM_PACKAGE_SUMMARY@
Name:           @CPACK_RPM_PACKAGE_NAME@
Version:        @CPACK_RPM_PACKAGE_VERSION@
Release:        @CPACK_RPM_PACKAGE_RELEASE@
License:        @CPACK_RPM_PACKAGE_LICENSE@
Group:          @CPACK_RPM_PACKAGE_GROUP@
Vendor:         @CPACK_RPM_PACKAGE_VENDOR@

# Modifications to allow recommends to be used (not implemented in cpack):
%if "@CPACK_RPM_PACKAGE_RECOMMENDS@" != ""
Recommends: @CPACK_RPM_PACKAGE_RECOMMENDS@
%endif
# End of modifications

@TMP_RPM_URL@
@TMP_RPM_REQUIRES@
@TMP_RPM_REQUIRES_PRE@
@TMP_RPM_REQUIRES_POST@
@TMP_RPM_REQUIRES_PREUN@
@TMP_RPM_REQUIRES_POSTUN@
@TMP_RPM_PROVIDES@
@TMP_RPM_OBSOLETES@
@TMP_RPM_CONFLICTS@
@TMP_RPM_SUGGESTS@
@TMP_RPM_AUTOPROV@
@TMP_RPM_AUTOREQ@
@TMP_RPM_AUTOREQPROV@
@TMP_RPM_BUILDARCH@
@TMP_RPM_PREFIXES@
@TMP_RPM_EPOCH@

@TMP_RPM_DEBUGINFO@

%define _rpmdir %_topdir/RPMS
%define _srcrpmdir %_topdir/SRPMS
@FILE_NAME_DEFINE@
%define _unpackaged_files_terminate_build 0
@TMP_RPM_SPEC_INSTALL_POST@
@CPACK_RPM_SPEC_MORE_DEFINE@
@CPACK_RPM_COMPRESSION_TYPE_TMP@

%description
@CPACK_RPM_PACKAGE_DESCRIPTION@

# This is a shortcutted spec file generated by CMake RPM generator
# we skip _install step because CPack does that for us.
# We do only save CPack installed tree in _prepr
# and then restore it in build.
%prep
mv $RPM_BUILD_ROOT %_topdir/tmpBBroot

%install
if [ -e $RPM_BUILD_ROOT ];
then
  rm -rf $RPM_BUILD_ROOT
fi
mv %_topdir/tmpBBroot $RPM_BUILD_ROOT

@TMP_RPM_DEBUGINFO_INSTALL@

%clean

%post
@RPM_SYMLINK_POSTINSTALL@
@CPACK_RPM_SPEC_POSTINSTALL@

%posttrans
@CPACK_RPM_SPEC_POSTTRANS@

%postun
@CPACK_RPM_SPEC_POSTUNINSTALL@

%pre
@CPACK_RPM_SPEC_PREINSTALL@

%pretrans
@CPACK_RPM_SPEC_PRETRANS@

%preun
@CPACK_RPM_SPEC_PREUNINSTALL@

%files
%defattr(@TMP_DEFAULT_FILE_PERMISSIONS@,@TMP_DEFAULT_USER@,@TMP_DEFAULT_GROUP@,@TMP_DEFAULT_DIR_PERMISSIONS@)
@CPACK_RPM_INSTALL_FILES@
@CPACK_RPM_ABSOLUTE_INSTALL_FILES@
@CPACK_RPM_USER_INSTALL_FILES@

%changelog
@CPACK_RPM_SPEC_CHANGELOG@

@TMP_OTHER_COMPONENTS@


================================================
FILE: RPM/preinst
================================================
#!/bin/bash

echo "Pre-install check for ROCr."

# Check for old installations...
if ls /usr/lib/libhsa-runtime* 1> /dev/null 2>&1; then
  echo "An old version of libhsa-runtime was found in /usr/lib."
  echo "This must be uninstalled before proceeding with the installation"
  echo "to avoid potential incompatibilities."

  read -r -p "Do you want to uninstall the old version? [y/N] " response
  if [ "$response" = "y" ]; then
    if ! rm -rf /usr/lib/libhsa-runtime*; then
      echo "Failed to remove /usr/lib/libhsa-runtime* files."
      echo "Try to uninstall these files manually."
      exit 1
    fi
    echo "Old version uninstalled."
  else
    echo "The old and new versions of ROCm are incompatible. Installation aborted."
    exit 1
  fi
fi


================================================
FILE: _clang-format
================================================
---
Language:        Cpp
# BasedOnStyle:  Google
AccessModifierOffset: -1
ConstructorInitializerIndentWidth: 4
AlignEscapedNewlinesLeft: false
AlignTrailingComments: true
AlignConsecutiveAssignments: false
AlignOperands: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AlwaysBreakAfterDefinitionReturnType: false
AlwaysBreakTemplateDeclarations: false
AlwaysBreakBeforeMultilineStrings: true
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BinPackParameters: true
ColumnLimit:     100
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: true
IndentWrappedFunctionNames: false
IndentFunctionDeclarationAfterType: false
MaxEmptyLinesToKeep: 2
KeepEmptyLinesAtTheStartOfBlocks: false
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakString: 1000
PenaltyBreakFirstLessLess: 120
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
DerivePointerAlignment: false
PointerAlignment: Left
SpacesBeforeTrailingComments: 2
Cpp11BracedListStyle: true
Standard:        Auto
IndentWidth:     2
TabWidth:        8
UseTab:          Never
BreakBeforeBraces: Attach
SpacesInParentheses: false
SpacesInAngles:  false
SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: true
SpaceBeforeAssignmentOperators: true
ContinuationIndentWidth: 4
CommentPragmas:  '^ IWYU pragma:'
ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
SpaceBeforeParens: ControlStatements
DisableFormat: false
SortIncludes: false
...


================================================
FILE: clang-format-diff.py
================================================
#!/usr/bin/env python3
#
#===- clang-format-diff.py - ClangFormat Diff Reformatter ----*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#

"""
This script reads input from a unified diff and reformats all the changed
lines. This is useful to reformat all the lines touched by a specific patch.
Example usage for git/svn users:

  git diff -U0 --no-color --relative HEAD^ | clang-format-diff.py -p1 -i
  svn diff --diff-cmd=diff -x-U0 | clang-format-diff.py -i

It should be noted that the filename contained in the diff is used unmodified
to determine the source file to update. Users calling this script directly
should be careful to ensure that the path in the diff is correct relative to the
current working directory.
"""
from __future__ import absolute_import, division, print_function

import argparse
import difflib
import re
import subprocess
import sys

if sys.version_info.major >= 3:
    from io import StringIO
else:
    from io import BytesIO as StringIO


def main():
  parser = argparse.ArgumentParser(description=__doc__,
                                   formatter_class=
                                           argparse.RawDescriptionHelpFormatter)
  parser.add_argument('-i', action='store_true', default=False,
                      help='apply edits to files instead of displaying a diff')
  parser.add_argument('-p', metavar='NUM', default=0,
                      help='strip the smallest prefix containing P slashes')
  parser.add_argument('-regex', metavar='PATTERN', default=None,
                      help='custom pattern selecting file paths to reformat '
                      '(case sensitive, overrides -iregex)')
  parser.add_argument('-iregex', metavar='PATTERN', default=
                      r'.*\.(cpp|cc|c\+\+|cxx|c|cl|h|hh|hpp|hxx|m|mm|inc|js|ts'
                      r'|proto|protodevel|java|cs)',
                      help='custom pattern selecting file paths to reformat '
                      '(case insensitive, overridden by -regex)')
  parser.add_argument('-sort-includes', action='store_true', default=False,
                      help='let clang-format sort include blocks')
  parser.add_argument('-v', '--verbose', action='store_true',
                      help='be more verbose, ineffective without -i')
  parser.add_argument('-style',
                      help='formatting style to apply (LLVM, GNU, Google, Chromium, '
                      'Microsoft, Mozilla, WebKit)')
  parser.add_argument('-binary', default='clang-format',
                      help='location of binary to use for clang-format')
  args = parser.parse_args()

  # Extract changed lines for each file.
  filename = None
  lines_by_file = {}
  for line in sys.stdin:
    match = re.search(r'^\+\+\+\ (.*?/){%s}(\S*)' % args.p, line)
    if match:
      filename = match.group(2)
    if filename is None:
      continue

    if args.regex is not None:
      if not re.match('^%s$' % args.regex, filename):
        continue
    else:
      if not re.match('^%s$' % args.iregex, filename, re.IGNORECASE):
        continue

    match = re.search(r'^@@.*\+(\d+)(,(\d+))?', line)
    if match:
      start_line = int(match.group(1))
      line_count = 1
      if match.group(3):
        line_count = int(match.group(3))
      if line_count == 0:
        continue
      end_line = start_line + line_count - 1
      lines_by_file.setdefault(filename, []).extend(
          ['-lines', str(start_line) + ':' + str(end_line)])

  # Reformat files containing changes in place.
  for filename, lines in lines_by_file.items():
    if args.i and args.verbose:
      print('Formatting {}'.format(filename))
    command = [args.binary, filename]
    if args.i:
      command.append('-i')
    if args.sort_includes:
      command.append('-sort-includes')
    command.extend(lines)
    if args.style:
      command.extend(['-style', args.style])

    try:
      p = subprocess.Popen(command,
                           stdout=subprocess.PIPE,
                           stderr=None,
                           stdin=subprocess.PIPE,
                           universal_newlines=True)
    except OSError as e:
      # Give the user more context when clang-format isn't
      # found/isn't executable, etc.
      raise RuntimeError(
        'Failed to run "%s" - %s"' % (" ".join(command), e.strerror))

    stdout, stderr = p.communicate()
    if p.returncode != 0:
      sys.exit(p.returncode)

    if not args.i:
      with open(filename) as f:
        code = f.readlines()
      formatted_code = StringIO(stdout).readlines()
      diff = difflib.unified_diff(code, formatted_code,
                                  filename, filename,
                                  '(before formatting)', '(after formatting)')
      diff_string = ''.join(diff)
      if len(diff_string) > 0:
        sys.stdout.write(diff_string)

if __name__ == '__main__':
  main()


================================================
FILE: cmake_modules/utils.cmake
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and#or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

function( get_path LIB CACHED_PATH HELP )

    set( options "")
    set( oneValueArgs RESULT )
    set( multiValueArgs HINTS NAMES )
    cmake_parse_arguments(ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )

    # Search for canary file.
    if( ${LIB} )
        find_library( FULLPATH NAMES ${ARGS_NAMES} HINTS ${${CACHED_PATH}} ${ARGS_HINTS} )
    else()
        find_file( FULLPATH NAMES ${ARGS_NAMES} HINTS ${${CACHED_PATH}} ${ARGS_HINTS} )
    endif()
    set( RESULT (NOT ${FULLPATH} MATCHES NOTFOUND) )
    
    # Extract path
    get_filename_component ( DIRPATH ${FULLPATH} DIRECTORY )
    
    # Check path against cache
    if( NOT "${${CACHED_PATH}}" STREQUAL "" )
        if ( NOT "${${CACHED_PATH}}" STREQUAL "${DIRPATH}" )
            message(WARNING "${CACHED_PATH} may be incorrect." )
            set( DIRPATH ${${CACHED_PATH}} )
        endif()
    elseif(NOT ${RESULT})
        message(WARNING "${CACHED_PATH} not located during path search.")
    endif()

    # Set cache variable and help text
    set( ${CACHED_PATH} ${DIRPATH} CACHE PATH ${HELP} FORCE )
    unset( FULLPATH CACHE )

    # Return success flag
    if( NOT ${ARGS_RESULT} STREQUAL "" )
        set( ${ARGS_RESULT} ${RESULT} PARENT_SCOPE)
    endif()

endfunction()

## Searches for a file using include paths and stores the path to that file in the cache
## using the cached value if set.  Search paths are optional.  Returns success in RESULT.
## get_include_path(<VAR> NAMES name1 [name2...] [HINTS path1 [path2 ... ENV var]] [RESULT <var>]
macro( get_include_path CACHED_PATH HELP )
    get_path( 0 ${ARGV} )
endmacro()

## Searches for a file using library paths and stores the path to that file in the cache
## using the cached value if set.  Search paths are optional.  Returns success in RESULT.
## get_library_path(<VAR> NAMES name1 [name2...] [HINTS path1 [path2 ... ENV var]] [RESULT <var>]
macro( get_library_path CACHED_PATH HELP )
    get_path( 1 ${ARGV} )
endmacro()

## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function( parse_version VERSION_STRING )

    string ( FIND ${VERSION_STRING} "-" STRING_INDEX )

    if ( ${STRING_INDEX} GREATER -1 )
        math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
        string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
    endif ()

    string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
    list ( LENGTH VERSIONS VERSION_COUNT )

    if ( ${VERSION_COUNT} GREATER 0)
        list ( GET VERSIONS 0 MAJOR )
        set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
    endif ()

    if ( ${VERSION_COUNT} GREATER 1 )
        list ( GET VERSIONS 1 MINOR )
        set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
    endif ()

    if ( ${VERSION_COUNT} GREATER 2 )
        list ( GET VERSIONS 2 PATCH )
        set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
    endif ()

endfunction ()

## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function ( get_version DEFAULT_VERSION_STRING )

    set( VERSION_JOB "local-build" )
    set( VERSION_COMMIT_COUNT 0 )
    set( VERSION_HASH "unknown" )

    find_program( GIT NAMES git )

    if( GIT )

        #execute_process ( COMMAND git describe --tags --dirty --long
        #                  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
        #                  OUTPUT_VARIABLE GIT_TAG_STRING
        #                  OUTPUT_STRIP_TRAILING_WHITESPACE
        #                  RESULT_VARIABLE RESULT )

        # Get branch commit (common ancestor) of current branch and master branch.
        execute_process(COMMAND git merge-base HEAD origin/HEAD
                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                        OUTPUT_VARIABLE GIT_MERGE_BASE
                        OUTPUT_STRIP_TRAILING_WHITESPACE
                        RESULT_VARIABLE RESULT )

        if( ${RESULT} EQUAL 0 )
            # Count commits from branch point.
            execute_process(COMMAND git rev-list --count ${GIT_MERGE_BASE}..HEAD
                            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                            OUTPUT_VARIABLE VERSION_COMMIT_COUNT
                            OUTPUT_STRIP_TRAILING_WHITESPACE
                            RESULT_VARIABLE RESULT )
            if(NOT ${RESULT} EQUAL 0 )
                set( VERSION_COMMIT_COUNT 0 )
            endif()
        endif()

        # Get current short hash.
        execute_process(COMMAND git rev-parse --short HEAD
                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                        OUTPUT_VARIABLE VERSION_HASH
                        OUTPUT_STRIP_TRAILING_WHITESPACE
                        RESULT_VARIABLE RESULT )
        if( ${RESULT} EQUAL 0 )
            # Check for dirty workspace.
            execute_process(COMMAND git diff --quiet
                            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                            RESULT_VARIABLE RESULT )
            if(${RESULT} EQUAL 1)
                set(VERSION_HASH "${VERSION_HASH}-dirty")
            endif()
        else()
            set( VERSION_HASH "unknown" )
        endif()
    endif()

    # Build automation IDs
    if(DEFINED ENV{ROCM_BUILD_ID})
        set( VERSION_JOB $ENV{ROCM_BUILD_ID} )
    endif()

    parse_version(${DEFAULT_VERSION_STRING})

    set( VERSION_MAJOR  "${VERSION_MAJOR}" PARENT_SCOPE )
    set( VERSION_MINOR  "${VERSION_MINOR}" PARENT_SCOPE )
    set( VERSION_PATCH  "${VERSION_PATCH}" PARENT_SCOPE )
    set( VERSION_COMMIT_COUNT "${VERSION_COMMIT_COUNT}" PARENT_SCOPE )
    set( VERSION_HASH "${VERSION_HASH}" PARENT_SCOPE )
    set( VERSION_JOB "${VERSION_JOB}" PARENT_SCOPE )

    #message("${VERSION_MAJOR}" )
    #message("${VERSION_MINOR}" )
    #message("${VERSION_PATCH}" )
    #message("${VERSION_COMMIT_COUNT}")
    #message("${VERSION_HASH}")
    #message("${VERSION_JOB}")

endfunction()

## Collects subdirectory names and returns them in a list
function ( listsubdirs DIRPATH SUBDIRECTORIES )
    file( GLOB CONTENTS RELATIVE ${DIRPATH} "${DIRPATH}/*" )
    set ( FOLDERS, "" )
    foreach( ITEM IN LISTS CONTENTS)
        if( IS_DIRECTORY "${DIRPATH}/${ITEM}" )
            list( APPEND FOLDERS ${ITEM} )
        endif()
    endforeach()
    set (${SUBDIRECTORIES} ${FOLDERS} PARENT_SCOPE)
endfunction()

## Sets el7 flag to be true
function (Checksetel7 EL7_DISTRO)
execute_process(COMMAND rpm --eval %{?dist}
                 RESULT_VARIABLE PROC_RESULT
                 OUTPUT_VARIABLE EVAL_RESULT
                 OUTPUT_STRIP_TRAILING_WHITESPACE)
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
if (PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
  if ("${EVAL_RESULT}" STREQUAL ".el7")
     set (${EL7_DISTRO} TRUE PARENT_SCOPE)
  endif()
endif()
endfunction()


================================================
FILE: format
================================================
#!/bin/bash
root=`git rev-parse --show-toplevel`
pushd . > /dev/null
cd $root
git diff -U0 HEAD^ | ./clang-format-diff.py -p1 -i -style=file
popd > /dev/null


================================================
FILE: libhsakmt/CMakeLists.txt
================================================
################################################################################
##
## Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
##
## MIT LICENSE:
## Permission is hereby granted, free of charge, to any person obtaining a copy of
## this software and associated documentation files (the "Software"), to deal in
## the Software without restriction, including without limitation the rights to
## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
## of the Software, and to permit persons to whom the Software is furnished to do
## so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in all
## copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
################################################################################

cmake_minimum_required ( VERSION 3.6.3 )

set(CMAKE_VERBOSE_MAKEFILE ON)

set ( HSAKMT "hsakmt" )
set ( HSAKMT_PACKAGE "hsakmt-roct" )
set ( HSAKMT_COMPONENT "lib${HSAKMT}" )
set ( HSAKMT_TARGET "${HSAKMT}" )
set(HSAKMT_STATIC_DRM_TARGET "${HSAKMT_TARGET}-staticdrm")

project ( ${HSAKMT_TARGET} VERSION 1.9.0)

# Optionally, build HSAKMT with ccache.
set(ROCM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
if (ROCM_CCACHE_BUILD)
  find_program(CCACHE_PROGRAM ccache)
  if (CCACHE_PROGRAM)
    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PROGRAM})
  else()
    message(WARNING "Unable to find ccache. Falling back to real compiler")
  endif() # if (CCACHE_PROGRAM)
endif() # if (ROCM_CCACHE_BUILD)

list( PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )

## Include common cmake modules
include ( utils )
include ( GNUInstallDirs )

## Setup the package version.
get_version ( "1.0.0" )

set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )

set ( LIB_VERSION_MAJOR 1)
set ( LIB_VERSION_MINOR 0)
if (${ROCM_PATCH_VERSION})
    set ( LIB_VERSION_PATCH ${ROCM_PATCH_VERSION} )
else ()
    set ( LIB_VERSION_PATCH 6)
endif ()
set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )

if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
    message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
    set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
endif ()
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )

## Compiler flags
set (HSAKMT_C_FLAGS -fPIC -W -Wall -Wextra -Wno-unused-parameter -Wformat-security -Wswitch-default -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wunreachable-code -std=gnu99 -fvisibility=hidden)
if ( CMAKE_COMPILER_IS_GNUCC )
    set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -Wlogical-op)
endif ()
if ( ${HSAKMT_WERROR} )
    set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -Werror )
endif ()
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
    set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -O2 )
else ()
    set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -g )
endif ()

set ( HSAKMT_LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/src/libhsakmt.ver" )

## Linker Flags
## Add --enable-new-dtags to generate DT_RUNPATH
set (HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -Wl,--enable-new-dtags -Wl,--version-script=${HSAKMT_LINKER_SCRIPT} -Wl,-soname=${HSAKMT_COMPONENT}.so.${LIB_VERSION_MAJOR} -Wl,-z,nodelete")

## Address Sanitize Flag
if ( ${ADDRESS_SANITIZER} )
    set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -fsanitize=address )
    set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -fsanitize=address" )
    if ( BUILD_SHARED_LIBS )
        set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -shared-libsan" )
    else ()
        set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -static-libsan" )
    endif ()
else ()
    if ( CMAKE_COMPILER_IS_GNUCC )
        set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -Wl,-no-undefined" )
    else ()
        set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -Wl,-undefined,error" )
    endif ()
endif ()

## Source files
set ( HSAKMT_SRC "src/debug.c"
                 "src/events.c"
                 "src/fmm.c"
                 "src/globals.c"
                 "src/hsakmtmodel.c"
                 "src/libhsakmt.c"
                 "src/memory.c"
                 "src/openclose.c"
                 "src/perfctr.c"
                 "src/pmc_table.c"
                 "src/queues.c"
                 "src/time.c"
                 "src/topology.c"
                 "src/rbtree.c"
                 "src/spm.c"
                 "src/version.c"
                 "src/svm.c"
                 "src/pc_sampling.c")

## Declare the library target name
add_library (${HSAKMT_TARGET} STATIC "")

## Add sources
target_sources ( ${HSAKMT_TARGET} PRIVATE ${HSAKMT_SRC} )

## Add headers.  The public headers need to point at their location in both build and install
## directory layouts.  This declaration allows publishing library use data to downstream clients.
target_include_directories( ${HSAKMT_TARGET}
  PUBLIC
  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
  PRIVATE
  ${CMAKE_CURRENT_SOURCE_DIR}/src )

set_property(TARGET ${HSAKMT_TARGET} PROPERTY LINK_FLAGS ${HSAKMT_LINK_FLAGS})

## Set the VERSION and SOVERSION values
set_property ( TARGET ${HSAKMT_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${HSAKMT_TARGET} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )

find_package(PkgConfig)
# get OS-info for OS-specific build dependencies
get_os_info()

find_package(PkgConfig)
# Check for libraries required for building
find_library(LIBC NAMES c REQUIRED)
find_package(NUMA)
if(NUMA_FOUND)
  set(NUMA "${NUMA_LIBRARIES}")
else()
  find_library(NUMA NAMES numa REQUIRED)
endif()
message(STATUS "LIBC: " ${LIBC})
message(STATUS "NUMA: " ${NUMA})

## If environment variable DRM_DIR is set, the script
## will pick up the corresponding libraries from that path.
if(DRM_DIR)
  list (PREPEND CMAKE_PREFIX_PATH "${DRM_DIR}")
endif()

# The module name passed to pkg_check_modules() is determined by the
# name of file *.pc
pkg_check_modules(DRM REQUIRED IMPORTED_TARGET libdrm)
pkg_check_modules(DRM_AMDGPU REQUIRED IMPORTED_TARGET libdrm_amdgpu)
include_directories(${DRM_AMDGPU_INCLUDE_DIRS})
include_directories(${DRM_INCLUDE_DIRS})

target_link_libraries ( ${HSAKMT_TARGET}
  PRIVATE ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} pthread rt ${LIBC} ${NUMA} ${CMAKE_DL_LIBS}
)

target_compile_options(${HSAKMT_TARGET} PRIVATE ${DRM_CFLAGS} ${HSAKMT_C_FLAGS})

include(CheckFunctionExists)
set(CMAKE_REQUIRED_DEFINITIONS -D__USE_GNU=1)
set(CMAKE_REQUIRED_INCLUDES sys/mman.h)
check_function_exists(memfd_create HAVE_MEMFD_CREATE)
if(HAVE_MEMFD_CREATE)
  target_compile_definitions(${HSAKMT_TARGET} PRIVATE -DHAVE_MEMFD_CREATE=1)
endif()

## Define default paths and packages.
if( CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT )
  set ( CMAKE_INSTALL_PREFIX "/opt/rocm" )
endif()
set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX} CACHE STRING "Default installation directory." FORCE )

# Installs binaries and exports the library usage data to ${HSAKMT_TARGET}Targets
install ( TARGETS ${HSAKMT_TARGET} EXPORT ${HSAKMT_TARGET}Targets
    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan )
install ( TARGETS ${HSAKMT_TARGET}
    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary )

# Install public headers
install ( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/${HSAKMT_TARGET} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
  COMPONENT dev PATTERN "linux" EXCLUDE PATTERN "*virtio*" EXCLUDE)

# Record our usage data for clients find_package calls.
install ( EXPORT ${HSAKMT_TARGET}Targets
  FILE ${HSAKMT_TARGET}Targets.cmake
  NAMESPACE ${HSAKMT_TARGET}::
  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${HSAKMT_TARGET}
  COMPONENT dev)

# Adds the target alias hsakmt::hsakmt to the local cmake cache.
# This isn't necessary today.  It's harmless preparation for some
# hypothetical future in which the we might be included by add_subdirectory()
# in some other project's cmake file.  It allows uniform use of find_package
# and target_link_library() without regard to whether a target is external or
# a subdirectory of the current build.
add_library( ${HSAKMT_TARGET}::${HSAKMT_TARGET} ALIAS ${HSAKMT_TARGET} )

# Create cmake configuration files
include(CMakePackageConfigHelpers)

configure_package_config_file(${HSAKMT_TARGET}-config.cmake.in
                            ${HSAKMT_TARGET}-config.cmake
                            INSTALL_DESTINATION
                            ${CMAKE_INSTALL_LIBDIR}/cmake/${HSAKMT_TARGET} )

write_basic_package_version_file(${HSAKMT_TARGET}-config-version.cmake
                 VERSION ${BUILD_VERSION_STRING}
                 COMPATIBILITY
                 AnyNewerVersion)

install(FILES
        ${CMAKE_CURRENT_BINARY_DIR}/${HSAKMT_TARGET}-config.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/${HSAKMT_TARGET}-config-version.cmake
        DESTINATION
        ${CMAKE_INSTALL_LIBDIR}/cmake/${HSAKMT_TARGET}
        COMPONENT dev)

# Optionally record the package's find module in the user's package cache.
if ( NOT DEFINED EXPORT_TO_USER_PACKAGE_REGISTRY )
  set ( EXPORT_TO_USER_PACKAGE_REGISTRY "off" )
endif()
set ( EXPORT_TO_USER_PACKAGE_REGISTRY ${EXPORT_TO_USER_PACKAGE_REGISTRY}
             CACHE BOOL "Add cmake package config location to the user's cmake package registry.")
if(${EXPORT_TO_USER_PACKAGE_REGISTRY})
  # Enable writing to the registry
  set(CMAKE_EXPORT_PACKAGE_REGISTRY ON)
  # Generate a target file for the build
  export(TARGETS ${HSAKMT_TARGET} NAMESPACE ${HSAKMT_TARGET}:: FILE ${HSAKMT_TARGET}Targets.cmake)
  # Record the package in the user's cache.
  export(PACKAGE ${HSAKMT_TARGET})
endif()

# CPACK_PACKAGING_INSTALL_PREFIX is needed in libhsakmt.pc.in
# TODO: Add support for relocatable packages.
configure_file ( libhsakmt.pc.in libhsakmt.pc @ONLY )

install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/libhsakmt.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig COMPONENT dev)

if ( NOT BUILD_SHARED_LIBS)
  ## Create separate target file for static builds
  ## In static builds, libdrm and libdrm_amdgpu need to be linked statically
  add_library (${HSAKMT_STATIC_DRM_TARGET}  STATIC "")
  target_sources (${HSAKMT_STATIC_DRM_TARGET} PRIVATE ${HSAKMT_SRC})

  target_include_directories( ${HSAKMT_STATIC_DRM_TARGET}
    PUBLIC
    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
    PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/src )

  ## Set the VERSION and SOVERSION values
  set_property(TARGET ${HSAKMT_STATIC_DRM_TARGET} PROPERTY LINK_FLAGS ${HSAKMT_LINK_FLAGS}
              PROPERTY VERSION "${LIB_VERSION_STRING}"
              PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )

  #Additional search path for static libraries
  if(${DISTRO_ID} MATCHES "ubuntu")
      set(AMDGPU_STATIC_LIB_PATHS "-L/opt/amdgpu/lib/x86_64-linux-gnu")
  else()
      set(AMDGPU_STATIC_LIB_PATHS "-L/opt/amdgpu/lib64" "-L/opt/amdgpu/lib")
  endif()
  # Link drm_amdgpu and drm library statically
  target_link_libraries ( ${HSAKMT_STATIC_DRM_TARGET}
    PRIVATE pthread rt c numa ${CMAKE_DL_LIBS}
    INTERFACE -Wl,-Bstatic ${AMDGPU_STATIC_LIB_PATHS} ${DRM_AMDGPU_LDFLAGS} ${DRM_LDFLAGS} -Wl,-Bdynamic
  )
  target_compile_options(${HSAKMT_STATIC_DRM_TARGET} PRIVATE ${DRM_CFLAGS} ${HSAKMT_C_FLAGS})

  install ( TARGETS ${HSAKMT_STATIC_DRM_TARGET} EXPORT ${HSAKMT_STATIC_DRM_TARGET}Targets
            ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary
            LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary)
  install ( EXPORT ${HSAKMT_STATIC_DRM_TARGET}Targets
    FILE ${HSAKMT_STATIC_DRM_TARGET}Targets.cmake
    NAMESPACE ${HSAKMT_STATIC_DRM_TARGET}::
    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${HSAKMT_TARGET}
    COMPONENT dev)

  add_library( ${HSAKMT_STATIC_DRM_TARGET}::${HSAKMT_STATIC_DRM_TARGET} ALIAS ${HSAKMT_STATIC_DRM_TARGET} )
endif()

###########################
# Packaging directives
###########################
# Use component packaging
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")


================================================
FILE: libhsakmt/DEBIAN/postinst.in
================================================
#!/bin/bash

set -e

# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
do_ldconfig() {
  if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
    echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf
    ldconfig
  fi
}

case "$1" in
  ( configure )
    do_ldconfig
  ;;
  ( abort-upgrade | abort-remove | abort-deconfigure )
    echo "$1"
  ;;
  ( * )
    exit 0
  ;;
esac


================================================
FILE: libhsakmt/DEBIAN/prerm.in
================================================
#!/bin/bash

set -e

# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
rm_ldconfig() {
  if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
    rm -f /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf && ldconfig
  fi
}

case "$1" in
  ( remove | upgrade )
    rm_ldconfig
  ;;
  ( purge )
  ;;
  ( * )
    exit 0
  ;;
esac


================================================
FILE: libhsakmt/LICENSE.md
================================================
ROCT-Thunk Interface LICENSE

Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.

MIT LICENSE:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

This product contains software provided by Nginx, Inc. and its contributors.

Copyright (C) 2002-2018 Igor Sysoev
Copyright (C) 2011-2018 Nginx, Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.


================================================
FILE: libhsakmt/README.md
================================================
# ROCt Library

This repository includes the user-mode API interfaces used to interact with the ROCk driver.

Starting at 1.7 release, ROCt uses drm render device. This requires the user to belong to video group. Add the user account to video group with "sudo usermod -a -G video _username_" command if the user if not part of video group yet.
NOTE: Users of Ubuntu 20.04 will need to add the user to the new "render" group, as Ubuntu has changed the owner:group of /dev/kfd to render:render as of that release

## ROCk Driver

The ROCt library is not a standalone product and requires that you have the correct ROCk driver installed, or are using a compatible upstream kernel.
Please refer to <https://rocm.docs.amd.com> under "Getting Started Guide" for a list of supported Operating Systems and kernel versions, as well as supported hardware.

## Building the Thunk

A simple cmake-based system is available for building thunk. To build the thunk from the the ROCT-Thunk-Interface directory, execute:

```bash
    mkdir -p build
    cd build
    cmake ..
    make
```

If the hsakmt-roct and hsakmt-roct-dev packages are desired:

```bash
    mkdir -p build
    cd build
    cmake ..
    make package
```

If you choose not to build and install packages, manual installation of the binaries and header files can be done via:

```bash
    make install
```

NOTE: For older versions of the thunk where hsakmt-dev.txt is present, "make package-dev" and "make install-dev" are required to generate/install the developer packages. Currently, these are created via the "make package" and "make install" commands

## Disclaimer

The information contained herein is for informational purposes only, and is subject to change without notice. While every precaution has been taken in the preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular purposes, with respect to the operation or use of AMD hardware, software or other products described herein. No license, including implied or arising by estoppel, to any intellectual property rights is granted by this document. Terms and limitations applicable to the purchase or use of AMD's products are as set forth in a signed agreement between the parties or in AMD's Standard Terms and Conditions of Sale.

AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies.

Copyright (c) 2014-2023 Advanced Micro Devices, Inc. All rights reserved.


================================================
FILE: libhsakmt/RPM/hsakmt-roct-devel.spec.in
================================================
# Restore old style debuginfo creation for rpm >= 4.14.
%undefine _debugsource_packages
%undefine _debuginfo_subpackages

# -*- rpm-spec -*-
BuildRoot:      %_topdir/@CPACK_PACKAGE_FILE_NAME@@CPACK_RPM_PACKAGE_COMPONENT_PART_PATH@
Summary:        @CPACK_RPM_PACKAGE_SUMMARY@
Name:           @CPACK_RPM_PACKAGE_NAME@
Version:        @CPACK_RPM_PACKAGE_VERSION@
Release:        @CPACK_RPM_PACKAGE_RELEASE@
License:        @CPACK_RPM_PACKAGE_LICENSE@
Group:          @CPACK_RPM_PACKAGE_GROUP@
Vendor:         @CPACK_RPM_PACKAGE_VENDOR@

@TMP_RPM_URL@
@TMP_RPM_REQUIRES@
@TMP_RPM_REQUIRES_PRE@
@TMP_RPM_REQUIRES_POST@
@TMP_RPM_REQUIRES_PREUN@
@TMP_RPM_REQUIRES_POSTUN@
@TMP_RPM_PROVIDES@
@TMP_RPM_OBSOLETES@
@TMP_RPM_CONFLICTS@
@TMP_RPM_SUGGESTS@
@TMP_RPM_AUTOPROV@
@TMP_RPM_AUTOREQ@
@TMP_RPM_AUTOREQPROV@
@TMP_RPM_BUILDARCH@
@TMP_RPM_PREFIXES@
@TMP_RPM_EPOCH@

# Modifications to allow recommends to be used (not implemented in cpack):
%if "@CPACK_RPM_PACKAGE_RECOMMENDS@" != ""
Recommends: @CPACK_RPM_PACKAGE_RECOMMENDS@
%endif
# End of modifications

@TMP_RPM_DEBUGINFO@

%define _rpmdir %_topdir/RPMS
%define _srcrpmdir %_topdir/SRPMS
@FILE_NAME_DEFINE@
%define _unpackaged_files_terminate_build 0
@TMP_RPM_SPEC_INSTALL_POST@
@CPACK_RPM_SPEC_MORE_DEFINE@
@CPACK_RPM_COMPRESSION_TYPE_TMP@

%description
@CPACK_RPM_PACKAGE_DESCRIPTION@

# This is a shortcutted spec file generated by CMake RPM generator
# we skip _install step because CPack does that for us.
# We do only save CPack installed tree in _prepr
# and then restore it in build.
%prep
mv $RPM_BUILD_ROOT %_topdir/tmpBBroot

%install
if [ -e $RPM_BUILD_ROOT ];
then
  rm -rf $RPM_BUILD_ROOT
fi
mv %_topdir/tmpBBroot $RPM_BUILD_ROOT

@TMP_RPM_DEBUGINFO_INSTALL@

%clean

%post
@RPM_SYMLINK_POSTINSTALL@
@CPACK_RPM_SPEC_POSTINSTALL@

%posttrans
@CPACK_RPM_SPEC_POSTTRANS@

%postun
@CPACK_RPM_SPEC_POSTUNINSTALL@

%pre
@CPACK_RPM_SPEC_PREINSTALL@

%pretrans
@CPACK_RPM_SPEC_PRETRANS@

%preun
@CPACK_RPM_SPEC_PREUNINSTALL@

%files
%defattr(@TMP_DEFAULT_FILE_PERMISSIONS@,@TMP_DEFAULT_USER@,@TMP_DEFAULT_GROUP@,@TMP_DEFAULT_DIR_PERMISSIONS@)
@CPACK_RPM_INSTALL_FILES@
@CPACK_RPM_ABSOLUTE_INSTALL_FILES@
@CPACK_RPM_USER_INSTALL_FILES@

%changelog
@CPACK_RPM_SPEC_CHANGELOG@

@TMP_OTHER_COMPONENTS@


================================================
FILE: libhsakmt/RPM/libhsakmt.spec
================================================
%define name        hsakmt-rocm-dev
%define version     %{getenv:PACKAGE_VER}
%define packageroot %{getenv:PACKAGE_DIR}

Name:       %{name}
Version:    %{version}
Release:    1
Summary:    Thunk libraries for AMD KFD

Group:      System Environment/Libraries
License:    Advanced Micro Devices Inc.

%if 0%{?centos} == 6
Requires:   numactl
%else
Requires:   numactl-libs
%endif


%description
This package includes the libhsakmt (Thunk) libraries
for AMD KFD

%prep
%setup -T -D -c -n %{name}

%install
cp -R %packageroot $RPM_BUILD_ROOT
find $RPM_BUILD_ROOT \! -type d | sed "s|$RPM_BUILD_ROOT||"> thunk.list

%post
ldconfig

%postun
ldconfig

%clean
rm -rf $RPM_BUILD_ROOT

%files -f thunk.list

%defattr(-,root,root,-)


================================================
FILE: libhsakmt/RPM/post.in
================================================
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
  echo -e "@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@" > /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf
  ldconfig
fi


================================================
FILE: libhsakmt/RPM/postun.in
================================================
# second term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ $1 -le 1 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
    # perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
    rm -f /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf
    ldconfig
fi


================================================
FILE: libhsakmt/cmake_modules/utils.cmake
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and#or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function( parse_version VERSION_STRING )

    string ( FIND ${VERSION_STRING} "-" STRING_INDEX )

    if ( ${STRING_INDEX} GREATER -1 )
        math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
        string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
    endif ()

    string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
    list ( LENGTH VERSIONS VERSION_COUNT )

    if ( ${VERSION_COUNT} GREATER 0)
        list ( GET VERSIONS 0 MAJOR )
        set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
        set ( TEMP_VERSION_STRING "${MAJOR}" )
    endif ()

    if ( ${VERSION_COUNT} GREATER 1 )
        list ( GET VERSIONS 1 MINOR )
        set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
        set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
    endif ()

    if ( ${VERSION_COUNT} GREATER 2 )
        list ( GET VERSIONS 2 PATCH )
        set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
        set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
    endif ()

    if ( DEFINED VERSION_BUILD )
        set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
    endif ()

    set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )

endfunction ()

## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function ( get_version DEFAULT_VERSION_STRING )

    parse_version ( ${DEFAULT_VERSION_STRING} )

    find_program ( GIT NAMES git )

    if ( GIT )

        execute_process ( COMMAND git describe --tags --dirty --long
                          WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                          OUTPUT_VARIABLE GIT_TAG_STRING
                          OUTPUT_STRIP_TRAILING_WHITESPACE
                          RESULT_VARIABLE RESULT )

        if ( ${RESULT} EQUAL 0 )

            parse_version ( ${GIT_TAG_STRING} )

        endif ()

    endif ()

    set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
    set( VERSION_MAJOR  "${VERSION_MAJOR}" PARENT_SCOPE )
    set( VERSION_MINOR  "${VERSION_MINOR}" PARENT_SCOPE )
    set( VERSION_PATCH  "${VERSION_PATCH}" PARENT_SCOPE )
    set( VERSION_BUILD  "${VERSION_BUILD}" PARENT_SCOPE )

endfunction()

#get the OS version
function(get_os_info)
if( EXISTS "/etc/os-release")
    file(STRINGS "/etc/os-release" DISTRO_ID REGEX "^ID=")
    file(STRINGS "/etc/os-release" DISTRO_RELEASE REGEX "^VERSION_ID=")
    string(REPLACE "ID=" "" DISTRO_ID "${DISTRO_ID}")
    string(REPLACE "VERSION_ID=" "" DISTRO_RELEASE "${DISTRO_RELEASE}")
    message(STATUS "Detected distribution: ${DISTRO_ID}:${DISTRO_RELEASE}")
elseif(EXISTS "/etc/centos-release" )
    # Example: CentOS release 6.10 (Final)
    file(STRINGS "/etc/centos-release" DISTRO_FULL_STR REGEX "release")
    string(REGEX MATCH "^[a-zA-Z]+" DISTRO_ID "${DISTRO_FULL_STR}")
    string(TOLOWER "${DISTRO_ID}" DISTRO_ID)
    string(REGEX MATCH "[0-9]+" DISTRO_RELEASE "${DISTRO_FULL_STR}")
    message(STATUS "Detected distribution: ${DISTRO_ID}:${DISTRO_RELEASE}")
else()
     message(STATUS "Not able to detect OS")
endif()
    set(DISTRO_ID "${DISTRO_ID}" PARENT_SCOPE )
    set(DISTRO_RELEASE "${DISTRO_RELEASE}" PARENT_SCOPE )

endfunction()


================================================
FILE: libhsakmt/hsakmt-config.cmake.in
================================================
@PACKAGE_INIT@

include( CMakeFindDependencyMacro )

# Locate dependent packages here.  Finding them propagates usage requirements,
# if any, to our clients and ensures that their target names are in scope for
# the build.  hsakmt has no cmake project dependencies so there is nothing to
# find.  If we switch to use find_package with external (to ROCm) library
# dependencies (ie libnuma) then those packages should be located here using
# find_dependencies as shown below.
#find_dependency(Bar, 2.0)

# If the option is ON link other dependent libraries dynamically
# If the option is OFF, then link libdrm and libdrm_amdgpu statically
if(@BUILD_SHARED_LIBS@)
  include( "${CMAKE_CURRENT_LIST_DIR}/@HSAKMT_TARGET@Targets.cmake" )
else()
  include( "${CMAKE_CURRENT_LIST_DIR}/@HSAKMT_STATIC_DRM_TARGET@Targets.cmake" )
endif()


================================================
FILE: libhsakmt/include/hsakmt/hsakmt.h
================================================
/*
 * Copyright © 2024 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef _HSAKMT_H_
#define _HSAKMT_H_

#include "hsakmttypes.h"

#ifdef __cplusplus
extern "C" {
#endif


/**
  "Opens" the HSA kernel driver for user-kernel mode communication.

  On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
  is responsible for user-kernel communication, this handle is used internally by
  the thunk library to send device I/O control to the HSA kernel driver.
  No other thunk library function may be called unless the user-kernel communication
  channel is opened first.

  On Linux this call opens the "/dev/kfd" device file to establish a communication
  path to the kernel.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtOpenKFD( void );

/**
  "Closes" the user-kernel communication path.

  On Windows, the handle obtained by the hsaKmtOpenKFD() function is closed;
  no other communication with the kernel driver is possible after the successful
  execution of the saKmdCloseKFD() function. Depending on the failure reason,
  the user-kernel communication path may or may not be still active.

  On Linux the function closes the "dev/kfd" device file.
  No further communication to the kernel driver is allowed until hsaKmtOpenKFD()
  function is called again.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtCloseKFD( void );


/**
  Returns the user-kernel interface version supported by KFD.
  Higher major numbers usually add new features to KFD and may break user-kernel
  compatibility; higher minor numbers define additional functionality associated
  within a major number.
  The calling software should validate that it meets the minimum interface version
  as described in the API specification.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetVersion(
    HsaVersionInfo*  VersionInfo    //OUT
    );

/**
  The function takes a "snapshot" of the topology information within the KFD
  to avoid any changes during the enumeration process.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtAcquireSystemProperties(
    HsaSystemProperties*  SystemProperties    //OUT
    );

/**
  Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtReleaseSystemProperties( void ) ;

/**
  Retrieves the discoverable sub-properties for a given HSA
  node. The parameters returned allow the application or runtime to size the
  management structures necessary to store the information.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeProperties(
    HSAuint32               NodeId,            //IN
    HsaNodeProperties*      NodeProperties     //OUT
    );

/**
  Retrieves the memory properties of a specific HSA node.
  the memory pointer passed as MemoryProperties is sized as
  NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
  hsaKmtGetNodeProperties() call.

  Some of the data returned is optional. Not all implementations may return all
  parameters in the hsaMemoryProperties.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeMemoryProperties(
    HSAuint32             NodeId,             //IN
    HSAuint32             NumBanks,           //IN
    HsaMemoryProperties*  MemoryProperties    //OUT
    );

/**
  Retrieves the cache properties of a specific HSA node and processor ID.
  ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
  via the hsaKmtGetNodeProperties() call.
  The memory pointer passed as CacheProperties is sized as
  NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
  hsaKmtGetNodeProperties() call.

  The data returned is optional. Not all implementations may return all
  parameters in the CacheProperties.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeCacheProperties(
    HSAuint32           NodeId,         //IN
    HSAuint32           ProcessorId,    //IN
    HSAuint32           NumCaches,      //IN
    HsaCacheProperties* CacheProperties //OUT
    );

/**
  Retrieves the HSA IO affinity properties of a specific HSA node.
  the memory pointer passed as Properties is sized as
  NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
  hsaKmtGetNodeProperties() call.

  The data returned is optional. Not all implementations may return all
  parameters in the IoLinkProperties.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeIoLinkProperties(
    HSAuint32            NodeId,            //IN
    HSAuint32            NumIoLinks,        //IN
    HsaIoLinkProperties* IoLinkProperties  //OUT
    );


/**
  Creates an operating system event associated with a HSA event ID
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateEvent(
    HsaEventDescriptor* EventDesc,              //IN
    bool                ManualReset,            //IN
    bool                IsSignaled,             //IN
    HsaEvent**          Event                   //OUT
    );

/**
  Destroys an operating system event associated with a HSA event ID
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtDestroyEvent(
    HsaEvent*   Event    //IN
    );

/**
  Sets the specified event object to the signaled state
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetEvent(
    HsaEvent*  Event    //IN
    );

/**
  Sets the specified event object to the non-signaled state
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtResetEvent(
    HsaEvent*  Event    //IN
    );

/**
  Queries the state of the specified event object
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueryEventState(
    HsaEvent*  Event    //IN
    );

/**
  Checks the current state of the event object. If the object's state is
  nonsignaled, the calling thread enters the wait state.

 The function returns when one of the following occurs:
- The specified event object is in the signaled state.
- The time-out interval elapses.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnEvent(
    HsaEvent*   Event,          //IN
    HSAuint32   Milliseconds    //IN
    );

/**
  Checks the current state of the event object. If the object's state is
  nonsignaled, the calling thread enters the wait state. event_age can
  help avoiding race conditions.

 The function returns when one of the following occurs:
- The specified event object is in the signaled state.
- The time-out interval elapses.
- Tracking event age
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnEvent_Ext(
    HsaEvent*   Event,          //IN
    HSAuint32   Milliseconds,   //IN
    uint64_t   *event_age       //IN/OUT
    );

/**
  Checks the current state of multiple event objects.

 The function returns when one of the following occurs:
- Either any one or all of the specified objects are in the signaled state
  - if "WaitOnAll" is "true" the function returns when the state of all
    objects in array is signaled
  - if "WaitOnAll" is "false" the function returns when the state of any
    one of the objects is set to signaled
- The time-out interval elapses.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnMultipleEvents(
    HsaEvent*   Events[],       //IN
    HSAuint32   NumEvents,      //IN
    bool        WaitOnAll,      //IN
    HSAuint32   Milliseconds    //IN
    );

/**
  Checks the current state of multiple event objects.
  event_age can help avoiding race conditions.

 The function returns when one of the following occurs:
- Either any one or all of the specified objects are in the signaled state
  - if "WaitOnAll" is "true" the function returns when the state of all
    objects in array is signaled
  - if "WaitOnAll" is "false" the function returns when the state of any
    one of the objects is set to signaled
- The time-out interval elapses.
- Tracking event age
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnMultipleEvents_Ext(
    HsaEvent*   Events[],       //IN
    HSAuint32   NumEvents,      //IN
    bool        WaitOnAll,      //IN
    HSAuint32   Milliseconds,   //IN
    uint64_t   *event_age       //IN/OUT
    );

/**
  new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform
  If used on other platforms the function will return HSAKMT_STATUS_ERROR
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtReportQueue(
    HSA_QUEUEID     QueueId,        //IN
    HsaQueueReport* QueueReport     //OUT
    );

/**
  Creates a GPU queue with user-mode access rights
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateQueue(
    HSAuint32           NodeId,           //IN
    HSA_QUEUE_TYPE      Type,             //IN
    HSAuint32           QueuePercentage,  //IN
    HSA_QUEUE_PRIORITY  Priority,         //IN
    void*               QueueAddress,     //IN
    HSAuint64           QueueSizeInBytes, //IN
    HsaEvent*           Event,            //IN
    HsaQueueResource*   QueueResource     //OUT
    );

/**
  Creates a GPU queue with user-mode access rights
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateQueueExt(
    HSAuint32           NodeId,           //IN
    HSA_QUEUE_TYPE      Type,             //IN
    HSAuint32           QueuePercentage,  //IN
    HSA_QUEUE_PRIORITY  Priority,         //IN
    HSAuint32           SdmaEngineId,     //IN
    void*               QueueAddress,     //IN
    HSAuint64           QueueSizeInBytes, //IN
    HsaEvent*           Event,            //IN
    HsaQueueResource*   QueueResource     //OUT
    );

/**
  Updates a queue
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtUpdateQueue(
    HSA_QUEUEID         QueueId,        //IN
    HSAuint32           QueuePercentage,//IN
    HSA_QUEUE_PRIORITY  Priority,       //IN
    void*               QueueAddress,   //IN
    HSAuint64           QueueSize,      //IN
    HsaEvent*           Event           //IN
    );

/**
  Destroys a queue
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtDestroyQueue(
    HSA_QUEUEID         QueueId         //IN
    );

/**
  Set cu mask for a queue
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetQueueCUMask(
    HSA_QUEUEID         QueueId,        //IN
    HSAuint32           CUMaskCount,    //IN
    HSAuint32*          QueueCUMask     //IN
    );

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetQueueInfo(
    HSA_QUEUEID QueueId,	//IN
    HsaQueueInfo *QueueInfo	//IN
);

/**
  Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch. 
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetMemoryPolicy(
    HSAuint32       Node,                       //IN
    HSAuint32       DefaultPolicy,     	   	    //IN  
    HSAuint32       AlternatePolicy,       	    //IN  
    void*           MemoryAddressAlternate,     //IN (page-aligned)
    HSAuint64       MemorySizeInBytes   	    //IN (page-aligned)
    );
/**
  Allocates a memory buffer that may be accessed by the GPU
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocMemory(
    HSAuint32       PreferredNode,          //IN
    HSAuint64       SizeInBytes,            //IN  (multiple of page size)
    HsaMemFlags     MemFlags,               //IN
    void**          MemoryAddress           //IN/OUT (page-aligned)
    );

/**
  Allocates a memory buffer with specific alignment that may be accessed by the GPU
  If Alignment is 0, the smallest possible alignment will be used
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocMemoryAlign(
    HSAuint32       PreferredNode,          //IN
    HSAuint64       SizeInBytes,            //IN  (multiple of page size)
    HSAuint64       Alignment,              //IN  (power of 2 and >= page size)
    HsaMemFlags     MemFlags,               //IN
    void**          MemoryAddress           //IN/OUT (page-aligned)
    );

/**
  Frees a memory buffer
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtFreeMemory(
    void*       MemoryAddress,      //IN (page-aligned)
    HSAuint64   SizeInBytes         //IN
    );

/**
  Inquires memory available for allocation as a memory buffer
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtAvailableMemory(
    HSAuint32 Node,
    HSAuint64 *AvailableBytes
    );

/**
  Registers with KFD a memory buffer that may be accessed by the GPU
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterMemory(
    void*       MemoryAddress,      //IN (cache-aligned)
    HSAuint64   MemorySizeInBytes   //IN (cache-aligned)
    );


/**
  Registers with KFD a memory buffer that may be accessed by specific GPUs
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterMemoryToNodes(
    void        *MemoryAddress,     // IN (cache-aligned)
    HSAuint64   MemorySizeInBytes,  // IN (cache-aligned)
    HSAuint64   NumberOfNodes,      // IN
    HSAuint32*  NodeArray           // IN
    );


/**
  Registers with KFD a memory buffer with memory attributes
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterMemoryWithFlags(
    void        *MemoryAddress,     // IN (cache-aligned)
    HSAuint64   MemorySizeInBytes,  // IN (cache-aligned)
    HsaMemFlags MemFlags            // IN
    );

/**
  Registers with KFD a graphics buffer and returns graphics metadata
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterGraphicsHandleToNodes(
    HSAuint64       GraphicsResourceHandle,        //IN
    HsaGraphicsResourceInfo *GraphicsResourceInfo, //OUT
    HSAuint64       NumberOfNodes,                 //IN
    HSAuint32*      NodeArray                      //IN
    );

/**
  Similar to hsaKmtRegisterGraphicsHandleToNodes but provides registration
  options via RegisterFlags.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterGraphicsHandleToNodesExt(
    HSAuint64       GraphicsResourceHandle,        //IN
    HsaGraphicsResourceInfo *GraphicsResourceInfo, //OUT
    HSAuint64       NumberOfNodes,                 //IN
    HSAuint32*      NodeArray,                     //IN
    HSA_REGISTER_MEM_FLAGS RegisterFlags           //IN
    );

/**
 * Export a dmabuf handle and offset for a given memory address
 *
 * Validates that @MemoryAddress belongs to a valid allocation and that the
 * @MemorySizeInBytes doesn't exceed the end of that allocation. Returns a
 * dmabuf fd of the allocation and the offset of MemoryAddress within that
 * allocation. The memory will remain allocated even after the allocation is
 * freed by hsaKmtFreeMemory for as long as a dmabuf fd remains open or any
 * importer of that fd maintains an active reference to the memory.
 */

HSAKMT_STATUS
HSAKMTAPI
hsaKmtExportDMABufHandle(
    void *MemoryAddress,		//IN
    HSAuint64 MemorySizeInBytes,	//IN
    int *DMABufFd,			//OUT
    HSAuint64 *Offset			//OUT
    );

/**
 Export a memory buffer for sharing with other processes

 NOTE: for the current revision of the thunk spec, SizeInBytes
 must match whole allocation.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtShareMemory(
	void                  *MemoryAddress,     // IN
	HSAuint64             SizeInBytes,        // IN
	HsaSharedMemoryHandle *SharedMemoryHandle // OUT
);

/**
 Register shared memory handle
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterSharedHandle(
	const HsaSharedMemoryHandle *SharedMemoryHandle, // IN
	void                        **MemoryAddress,     // OUT
	HSAuint64                   *SizeInBytes         // OUT
);

/**
 Register shared memory handle to specific nodes only
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterSharedHandleToNodes(
	const HsaSharedMemoryHandle *SharedMemoryHandle, // IN
	void                        **MemoryAddress,     // OUT
	HSAuint64                   *SizeInBytes,        // OUT
	HSAuint64                   NumberOfNodes,       // OUT
	HSAuint32*                  NodeArray            // OUT
);

/**
 Copy data from the GPU address space of the process identified
 by Pid. Size Copied will return actual amount of data copied.
 If return is not SUCCESS, partial copies could have happened.
 */
HSAKMT_STATUS
HSAKMTAPI
hsaKmtProcessVMRead(
	HSAuint32                 Pid,                     // IN
	HsaMemoryRange            *LocalMemoryArray,       // IN
	HSAuint64                 LocalMemoryArrayCount,   // IN
	HsaMemoryRange            *RemoteMemoryArray,      // IN
	HSAuint64                 RemoteMemoryArrayCount,  // IN
	HSAuint64                 *SizeCopied              // OUT
);

/**
 Write data to the GPU address space of the process identified
 by Pid. See also hsaKmtProcessVMRead.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtProcessVMWrite(
	HSAuint32                 Pid,                     // IN
	HsaMemoryRange            *LocalMemoryArray,       // IN
	HSAuint64                 LocalMemoryArrayCount,   // IN
	HsaMemoryRange            *RemoteMemoryArray,      // IN
	HSAuint64                 RemoteMemoryArrayCount,  // IN
	HSAuint64                 *SizeCopied              // OUT
);

/**
  Unregisters with KFD a memory buffer
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtDeregisterMemory(
    void*       MemoryAddress  //IN
    );


/**
  Ensures that the memory is resident and can be accessed by GPU
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtMapMemoryToGPU(
    void*           MemoryAddress,     //IN (page-aligned)
    HSAuint64       MemorySizeInBytes, //IN (page-aligned)
    HSAuint64*      AlternateVAGPU     //OUT (page-aligned)     
    );

/**
  Ensures that the memory is resident and can be accessed by GPUs
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtMapMemoryToGPUNodes(
    void*           MemoryAddress,         //IN (page-aligned)
    HSAuint64       MemorySizeInBytes,     //IN (page-aligned)
    HSAuint64*      AlternateVAGPU,        //OUT (page-aligned)
    HsaMemMapFlags  MemMapFlags,           //IN
    HSAuint64       NumberOfNodes,         //IN
    HSAuint32*      NodeArray              //IN
    );

/**
  Releases the residency of the memory
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtUnmapMemoryToGPU(
    void*           MemoryAddress       //IN (page-aligned)
    );


/**
  Notifies the kernel driver that a process wants to use GPU debugging facilities
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtMapGraphicHandle(
                HSAuint32          NodeId,                              //IN
                HSAuint64          GraphicDeviceHandle,                 //IN
                HSAuint64          GraphicResourceHandle,               //IN
                HSAuint64          GraphicResourceOffset,               //IN
                HSAuint64          GraphicResourceSize,                 //IN
                HSAuint64*         FlatMemoryAddress            //OUT
                );


/**
  Stub for Unmap Graphic Handle
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtUnmapGraphicHandle(
                HSAuint32          NodeId,                      //IN
                HSAuint64          FlatMemoryAddress,           //IN
                HSAuint64              SizeInBytes              //IN
                );

/**
 * Get an AMDGPU device handle for a GPU node
 */
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetAMDGPUDeviceHandle(
                HSAuint32               NodeId,                    //IN
                HsaAMDGPUDeviceHandle   *DeviceHandle              //OUT
                );

/**
  Allocate GWS resource for a queue
 */

HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocQueueGWS(
                HSA_QUEUEID        QueueId,                     //IN
                HSAuint32          nGWS,                        //IN
                HSAuint32          *firstGWS                    //OUT
                );

/**
  Notifies the kernel driver that a process wants to use GPU debugging facilities
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgRegister(
    HSAuint32       NodeId      //IN
    );

/**
  Detaches the debugger process from the HW debug established by hsaKmtDbgRegister() API
*/

HSAKMT_STATUS 
HSAKMTAPI 
hsaKmtDbgUnregister(
    HSAuint32       NodeId      //IN
    );

/**
  Controls a wavefront
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgWavefrontControl(
    HSAuint32           NodeId,         //IN
    HSA_DBG_WAVEOP      Operand,        //IN
    HSA_DBG_WAVEMODE    Mode,           //IN
    HSAuint32           TrapId,         //IN
    HsaDbgWaveMessage*  DbgWaveMsgRing  //IN
    );

/**
  Sets watch points on memory address ranges to generate exception events when the
  watched addresses are  accessed
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgAddressWatch(
    HSAuint32           NodeId,         //IN
    HSAuint32           NumWatchPoints, //IN
    HSA_DBG_WATCH_MODE  WatchMode[],    //IN
    void*               WatchAddress[], //IN
    HSAuint64           WatchMask[],    //IN, optional
    HsaEvent*           WatchEvent[]    //IN, optional
    );

HSAKMT_STATUS
HSAKMTAPI
hsaKmtRuntimeEnable(
    void*     rDebug,    // IN
    bool      setupTtmp
    );

HSAKMT_STATUS
HSAKMTAPI
hsaKmtRuntimeDisable(void);

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetRuntimeCapabilities(
    HSAuint32	*caps_mask // OUT
    );

/**
  Enable debug trap.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgEnable(
    void **runtime_info, //Out
    HSAuint32 *data_size //Out
    );

/**
  Disable debug trap.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgDisable(void);

/**
  Get device snapshot.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgGetDeviceData(
    void **data, //Out
    HSAuint32 *n_entries, //Out
    HSAuint32 *entry_size //Out
    );

/**
  Get queues snapshot.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgGetQueueData(
    void **data, //Out
    HSAuint32 *n_entries, //Out
    HSAuint32 *entry_size, //Out
    bool suspend_queues //In
    );

/**   
  Check whether gpu firmware and kernel support debugging
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCheckRuntimeDebugSupport(
    void
    );

/**
  Debug ops call primarily used for KFD testing
 */
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(
    struct kfd_ioctl_dbg_trap_args *arg,
    HSA_QUEUEID *Queues,
    HSAuint64 *DebugReturn
    );

/**
  Gets GPU and CPU clock counters for particular Node
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetClockCounters(
    HSAuint32         NodeId,  //IN
    HsaClockCounters* Counters //OUT
    );

/**
  Retrieves information on the available HSA counters
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcGetCounterProperties(
    HSAuint32                   NodeId,             //IN
    HsaCounterProperties**      CounterProperties   //OUT
    );

/**
  Registers a set of (HW) counters to be used for tracing/profiling
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcRegisterTrace(
    HSAuint32           NodeId,             //IN
    HSAuint32           NumberOfCounters,   //IN
    HsaCounter*         Counters,           //IN
    HsaPmcTraceRoot*    TraceRoot           //OUT
    );

/**
  Unregisters a set of (HW) counters used for tracing/profiling
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcUnregisterTrace(
    HSAuint32   NodeId,     //IN
    HSATraceId  TraceId     //IN
    );

/**
  Allows a user mode process to get exclusive access to the defined set of (HW) counters
  used for tracing/profiling
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcAcquireTraceAccess(
    HSAuint32   NodeId,     //IN
    HSATraceId  TraceId     //IN
    );

/**
  Allows a user mode process to release exclusive access to the defined set of (HW) counters
  used for tracing/profiling
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcReleaseTraceAccess(
    HSAuint32   NodeId,     //IN
    HSATraceId  TraceId     //IN
    );

/**
  Starts tracing operation on a previously established set of performance counters
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcStartTrace(
    HSATraceId  TraceId,                //IN
    void*       TraceBuffer,            //IN (page aligned) 
    HSAuint64   TraceBufferSizeBytes    //IN (page aligned)
    );

/**
   Forces an update of all the counters that a previously started trace operation has registered
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcQueryTrace(
    HSATraceId    TraceId   //IN
    );

/**
  Stops tracing operation on a previously established set of performance counters
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcStopTrace(
    HSATraceId  TraceId     //IN
    );

/**
  Sets trap handler and trap buffer to be used for all queues associated with the specified NodeId within this process context
*/

HSAKMT_STATUS 
HSAKMTAPI 
hsaKmtSetTrapHandler(
    HSAuint32           NodeId,                   //IN
    void*               TrapHandlerBaseAddress,   //IN
    HSAuint64           TrapHandlerSizeInBytes,   //IN
    void*               TrapBufferBaseAddress,    //IN
    HSAuint64           TrapBufferSizeInBytes     //IN
    );

/**
  Gets image tile configuration.
 */
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetTileConfig(
    HSAuint32           NodeId,     // IN
    HsaGpuTileConfig*   config      // IN & OUT
    );

/**
  Returns information about pointers
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueryPointerInfo(
    const void *        Pointer,        //IN
    HsaPointerInfo *    PointerInfo     //OUT
    );

/**
  Associates user data with a memory allocation
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetMemoryUserData(
    const void *    Pointer,    //IN
    void *          UserData    //IN
    );

/**
  Acquire request exclusive use of SPM
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSPMAcquire(
    HSAuint32	PreferredNode	//IN
    );


/**
  Release exclusive use of SPM
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSPMRelease(
    HSAuint32	PreferredNode	//IN
    );

/**
   Set up the destination user mode buffer for stream performance
   counter data.
*/

HSAKMT_STATUS
HSAKMTAPI
hsaKmtSPMSetDestBuffer(
	HSAuint32   PreferredNode,		//IN
	HSAuint32   SizeInBytes,		//IN
	HSAuint32   * timeout,			//IN/OUT
	HSAuint32   * SizeCopied,		//OUT
	void        *DestMemoryAddress,		//IN
	bool        *isSPMDataLoss		//OUT
    );

/* Helper functions for calling KFD SVM ioctl */
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSVMSetAttr(
    void *start_addr,   // IN: Start of the virtual address range (page-aligned)
    HSAuint64 size,     // IN: size (page-aligned)
    unsigned int nattr, // IN: number of attributes
    HSA_SVM_ATTRIBUTE *attrs  // IN: array of attributes
);

HSAKMT_STATUS
HSAKMTAPI
hsaKmtSVMGetAttr(
    void *start_addr,   // IN: Start of the virtual address range (page-aligned)
    HSAuint64 size,     // IN: size (page aligned)
    unsigned int nattr, // IN: number of attributes
    HSA_SVM_ATTRIBUTE *attrs  // IN/OUT: array of attributes
);

HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetXNACKMode(
    HSAint32 enable  // IN: enable/disable XNACK node.
);

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetXNACKMode(
    HSAint32 * enable  // OUT: returns XNACK value.
);

/**
   Open anonymous file handle to enable events and read SMI events.

   To enable events, write 64bit events mask to fd, event enums as bit index.
   for example, event mask (HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_INDEX_MAX) - 1) to enable all events

   Read event from fd is not blocking, use poll with timeout value to check if event is available.
   Event is dropped if kernel event fifo is full.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtOpenSMI(
    HSAuint32 NodeId,   // IN: GPU node_id to receive the SMI event from
    int *fd             // OUT: anonymous file handle
);

/**
   If this is GPU Mapped memory, remap the first page at this address to be normal system memory

   This is used in ASAN mode to remap the first page of device memory to share host ASAN logic.
   This function is only supported when libhsakmt is compiled in ASAN mode.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtReplaceAsanHeaderPage(
    void *addr     // IN: Start of othe virtual address page
);

/**
   If this is GPU Mapped memory, remap the first page back to the original GPU memory

   This is used in ASAN mode to remap the first page back to its original mapping.
   This function is only supported when libhsakmt is compiled in ASAN mode.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtReturnAsanHeaderPage(
    void *addr     // IN: Start of othe virtual address page
);

/**
   Check whether kernel support pc sampling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPcSamplingSupport(
    void
);

/**
 * Query device PC Sampling capabilities
 *
 *  Arguments:
 *   @NodeId        (IN) - GPU node_id
 *   @sample_info   (IN) - Pointer to array of HSAPcSamplingInfo
 *   @sample_info_sz(IN) - Size of sampling_info in units of HSAPcSamplingInfo
 *   @sz_needed     (OUT)- If sampling_info_sz is too small, sample_info_sz needed
 *
 *  Return:
 *   HSAKMT_STATUS_ERROR             - failed
 *   HSAKMT_STATUS_SUCCESS           - successfully complete
 *   HSAKMT_STATUS_INVALID_PARAMETER - invalid input
 *   HSAKMT_STATUS_BUFFER_TOO_SMALL  - sample buffer size is too small. Retry with sample_info_sz
 *                                     >= sz_needed
 *   HSAKMT_STATUS_NOT_SUPPORTED     - this asic doesn't support pc sampling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPcSamplingQueryCapabilities(
    HSAuint32 NodeId,
    void *sample_info,
    HSAuint32 sample_info_sz,
    HSAuint32 *sz_needed
);

/**
 * Create PC Sampling Session
 *
 *  Arguments:
 *   @NodeId     (IN)  - GPU node_id
 *   @sample_info(IN)  - PC Sampling configuration requested
 *   @traceId    (OUT) - Unique PC Sampling trace Id
 *
 *  Return:
 *   HSAKMT_STATUS_ERROR             - failed
 *   HSAKMT_STATUS_SUCCESS           - successfully complete
 *   HSAKMT_STATUS_INVALID_PARAMETER - invalid input
 *   HSAKMT_STATUS_NO_MEMORY         - not enough memory to create new pc sampling session
 *   HSAKMT_STATUS_UNAVAILABLE       - a different pc sampling session started on this node
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPcSamplingCreate(
  HSAuint32 node_id,
  HsaPcSamplingInfo *sample_info,
  HsaPcSamplingTraceId *traceId
);

/**
 * Destroy PC Sampling Session
 *
 *  Arguments:
 *   @NodeId (IN) - GPU node_id
 *   @traceId(IN) - PC Sampling trace Id
 *
 *  Return:
 *   HSAKMT_STATUS_ERROR             - failed
 *   HSAKMT_STATUS_SUCCESS           - successfully complete
 *   HSAKMT_STATUS_INVALID_PARAMETER - invalid input
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPcSamplingDestroy(
    HSAuint32 NodeId,
    HsaPcSamplingTraceId traceId
);

/**
 * Start PC Sampling Session
 *
 *  Arguments:
 *   @NodeId (IN) - GPU node_id
 *   @traceId(IN) - PC Sampling trace Id
 *
 *  Return:
 *   HSAKMT_STATUS_ERROR             - failed
 *   HSAKMT_STATUS_SUCCESS           - successfully complete
 *   HSAKMT_STATUS_INVALID_PARAMETER - invalid input
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPcSamplingStart(
    HSAuint32 NodeId,
    HsaPcSamplingTraceId traceId
);

/**
 * Stop PC Sampling Session
 *
 *  Arguments:
 *   @NodeId (IN) - GPU node_id
 *   @traceId(IN) - PC Sampling trace Id
 *
 *  Return:
 *   HSAKMT_STATUS_ERROR                 - failed
 *   HSAKMT_STATUS_SUCCESS               - successfully complete
 *   HSAKMT_STATUS_INVALID_PARAMETER     - invalid input
 *   HSAKMT_STATUS_KERNEL_ALREADY_OPENED - stop already
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPcSamplingStop(
    HSAuint32 NodeId,
    HsaPcSamplingTraceId traceId
);

/**
 * Check if the HSA KMT Model is enabled
 * 
 *  Arguments:
 *   @enable (OUT) - true if the HSA KMT Model is enabled, false otherwise
 * 
 *  Return:
 *   HSAKMT_STATUS_ERROR             - failed
 *   HSAKMT_STATUS_SUCCESS           - successfully complete
 */

HSAKMT_STATUS
HSAKMTAPI
hsaKmtModelEnabled(
    bool* enable // OUT
);

#ifdef __cplusplus
}   //extern "C"
#endif

#endif //_HSAKMT_H_


================================================
FILE: libhsakmt/include/hsakmt/hsakmt_virtio.h
================================================
/*
 * Copyright © 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef HSAKMT_VIRTIO_H
#define HSAKMT_VIRTIO_H

#include "hsakmt/linux/kfd_ioctl.h"
#include "hsakmt/hsakmt.h"
#include <libdrm/amdgpu.h>

#ifdef __cplusplus
extern "C" {
#endif

HSAKMT_STATUS HSAKMTAPI vhsaKmtOpenKFD(void);
HSAKMT_STATUS HSAKMTAPI vhsaKmtCloseKFD(void);
HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 SizeInBytes,
                                           HsaMemFlags MemFlags, void** MemoryAddress);
HSAKMT_STATUS HSAKMTAPI vhsaKmtFreeMemory(void* MemoryAddress, HSAuint64 SizeInBytes);
HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPUNodes(void* MemoryAddress, HSAuint64 MemorySizeInBytes,
                                                   HSAuint64* AlternateVAGPU,
                                                   HsaMemMapFlags MemMapFlags,
                                                   HSAuint64 NumberOfNodes, HSAuint32* NodeArray);
HSAKMT_STATUS HSAKMTAPI vhsaKmtUnmapMemoryToGPU(void* MemoryAddress);
HSAKMT_STATUS HSAKMTAPI vhsaKmtAvailableMemory(HSAuint32 Node, HSAuint64* AvailableBytes);
HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPU(void* MemoryAddress, HSAuint64 MemorySizeInBytes,
                                              HSAuint64* AlternateVAGPU);
HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemoryWithFlags(void* MemoryAddress,
                                                       HSAuint64 MemorySizeInBytes,
                                                       HsaMemFlags MemFlags);
HSAKMT_STATUS HSAKMTAPI vhsaKmtDeregisterMemory(void* MemoryAddress);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetVersion(HsaVersionInfo* v);
HSAKMT_STATUS HSAKMTAPI vhsaKmtAcquireSystemProperties(HsaSystemProperties* SystemProperties);
HSAKMT_STATUS HSAKMTAPI vhsaKmtReleaseSystemProperties(void);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeProperties(HSAuint32 NodeId,
                                                 HsaNodeProperties* NodeProperties);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetXNACKMode(HSAint32* enable);
HSAKMT_STATUS HSAKMTAPI vhsaKmtRuntimeEnable(void* rDebug, bool setupTtmp);
HSAKMT_STATUS HSAKMTAPI vhsaKmtRuntimeDisable(void);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, HSAuint32 NumBanks,
                                                       HsaMemoryProperties* MemoryProperties);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeCacheProperties(HSAuint32 NodeId, HSAuint32 ProcessorId,
                                                      HSAuint32 NumCaches,
                                                      HsaCacheProperties* CacheProperties);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId, HSAuint32 NumIoLinks,
                                                       HsaIoLinkProperties* IoLinkProperties);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetClockCounters(HSAuint32 NodeId, HsaClockCounters* Counters);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetAMDGPUDeviceHandle(HSAuint32 NodeId,
                                                     HsaAMDGPUDeviceHandle* DeviceHandle);
HSAKMT_STATUS HSAKMTAPI vhsaKmtQueryPointerInfo(const void* Pointer, HsaPointerInfo* PointerInfo);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig* config);
HSAKMT_STATUS HSAKMTAPI vhsaKmtCreateEvent(HsaEventDescriptor* EventDesc, _Bool ManualReset,
                                           _Bool IsSignaled, HsaEvent** Event);
HSAKMT_STATUS HSAKMTAPI vhsaKmtDestroyEvent(HsaEvent* Event);
HSAKMT_STATUS HSAKMTAPI vhsaKmtSetEvent(HsaEvent* Event);
HSAKMT_STATUS HSAKMTAPI vhsaKmtResetEvent(HsaEvent* Event);
HSAKMT_STATUS HSAKMTAPI vhsaKmtQueryEventState(HsaEvent* Event);
HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnMultipleEvents(HsaEvent* Events[], HSAuint32 NumEvents,
                                                    bool WaitOnAll, HSAuint32 Milliseconds);
HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnEvent(HsaEvent* Event, HSAuint32 Milliseconds);
HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnEvent_Ext(HsaEvent* Event, HSAuint32 Milliseconds,
                                               uint64_t* event_age);
HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnMultipleEvents_Ext(HsaEvent* Events[], HSAuint32 NumEvents,
                                                        bool WaitOnAll, HSAuint32 Milliseconds,
                                                        uint64_t* event_age);
HSAKMT_STATUS HSAKMTAPI vhsaKmtSetTrapHandler(HSAuint32 NodeId, void* TrapHandlerBaseAddress,
                                              HSAuint64 TrapHandlerSizeInBytes,
                                              void* TrapBufferBaseAddress,
                                              HSAuint64 TrapBufferSizeInBytes);
HSAKMT_STATUS HSAKMTAPI vhsaKmtCreateQueueExt(HSAuint32 NodeId, HSA_QUEUE_TYPE Type,
                                              HSAuint32 QueuePercentage,
                                              HSA_QUEUE_PRIORITY Priority, HSAuint32 SdmaEngineId,
                                              void* QueueAddress, HSAuint64 QueueSizeInBytes,
                                              HsaEvent* Event, HsaQueueResource* QueueResource);
HSAKMT_STATUS HSAKMTAPI vhsaKmtCreateQueue(HSAuint32 NodeId, HSA_QUEUE_TYPE Type,
                                           HSAuint32 QueuePercentage, HSA_QUEUE_PRIORITY Priority,
                                           void* QueueAddress, HSAuint64 QueueSizeInBytes,
                                           HsaEvent* Event, HsaQueueResource* QueueResource);
HSAKMT_STATUS HSAKMTAPI vhsaKmtDestroyQueue(HSA_QUEUEID QueueId);
HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodes(
    HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo* GraphicsResourceInfo,
    HSAuint64 NumberOfNodes, HSAuint32* NodeArray);
HSAKMT_STATUS HSAKMTAPI vhsaKmtGetRuntimeCapabilities(HSAuint32* caps_mask);

int vamdgpu_query_gpu_info(amdgpu_device_handle dev, void* out);

#ifdef __cplusplus
}
#endif

#endif /* HSAKMT_VIRTIO_H */


================================================
FILE: libhsakmt/include/hsakmt/hsakmtmodel.h
================================================
/*
 * Copyright © 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef _HSAKMTMODEL_H_
#define _HSAKMTMODEL_H_
#include <stdbool.h>
extern bool hsakmt_use_model;
extern char *hsakmt_model_topology;
void model_init_env_vars(void);
void model_init(void);
void model_set_mmio_page(void *ptr);
void model_set_event_page(void *ptr, unsigned event_limit);
int model_kfd_ioctl(unsigned long request, void *arg);
#endif /* _HSAKMTMODEL_H_ */

================================================
FILE: libhsakmt/include/hsakmt/hsakmtmodeliface.h
================================================
/*
 * Copyright © 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef _HSAKMTMODELIFACE_H_
#define _HSAKMTMODELIFACE_H_

#include <inttypes.h>

// Changelog:
//  0.2: Add set_set_event function to hsakmt_model_functions
#define HSAKMT_MODEL_INTERFACE_VERSION_MAJOR 0
#define HSAKMT_MODEL_INTERFACE_VERSION_MINOR 4

typedef struct hsakmt_model hsakmt_model_t;
typedef struct hsakmt_model_queue hsakmt_model_queue_t;

// Description of a queue to be registered with the model.
//
// Addresses are relative to the global aperture.
struct hsakmt_model_queue_info {
	uint64_t ring_base_address;
	uint64_t write_pointer_address;
	uint64_t read_pointer_address;

	uint64_t *doorbell;

	uint32_t ring_size; // in bytes
	uint32_t queue_type;
};

// Pointer to a "set event" function.
//
// data is a user-provided opaque pointer.
// event_id is the ID of the event to set (as in amd_signal_s::event_id).
typedef void (*hsakmt_model_set_event_fn)(void *data, unsigned event_id);

// Interface provided by the software model implementation.
//
// Queried from a shared library by calling an export called
// `get_hsakmt_model_functions`
//
// Interface versioning follows the semantic versioning model: clients that
// know about interface version X.Y can use any implementation that provides
// version X.Z with Z >= Y.
//
// The model is designed to support only one VMID space.
struct hsakmt_model_functions {
	uint32_t version_major; // HSAKMT_MODEL_INTERFACE_VERSION_MAJOR
	uint32_t version_minor; // HSAKMT_MODEL_INTERFACE_VERSION_MINOR

	// Create a GPU device model.
	hsakmt_model_t *(*create)(void);

	// Destroy a GPU device model.
	void (*destroy)(hsakmt_model_t *model);

	// Set the global aperture. GPU virtual address 0 is at CPU address `base`.
	void (*set_global_aperture)(hsakmt_model_t *model, void *base, uint64_t size);
	void (*alloced_memory)(hsakmt_model_t *model, void *base, uint64_t size, uint32_t flags);
	void (*freed_memory)(hsakmt_model_t *model, void *base, uint64_t size);
	// Register a callback that the model should call when an event is signaled.
	// `data` is client data that is opaque to the model.
	//
	// TODO: Deprecated -- remove this!
	void (*set_notify_event)(hsakmt_model_t *model, void (*callback)(void *data), void *data);

	// Register a callback that the model should call in order to wait for an
	// event to be signaled.
	// `data` is client data that is opaque to the model.
	void (*set_wait_event)(hsakmt_model_t *model, void (*callback)(void *data, uint64_t address, uint64_t age), void *data);

	// Register a queue with the model. The model will immediately begin
	// asynchronous processing of the queue (but by default, the model need not
	// provide forward progress guarantees between multiple queues).
	hsakmt_model_queue_t *(*register_queue)(hsakmt_model_t *model, struct hsakmt_model_queue_info *info);

	// Register a callback that allows the model to set an event.
	void (*set_set_event)(hsakmt_model_t *model, hsakmt_model_set_event_fn fn, void *data);

	// Destroy a queue that was returned by register_queue.
	void (*destroy_queue)(hsakmt_model_t *model, hsakmt_model_queue_t *queue);
};

// Type of a shared library export called `get_hsakmt_model_functions`.
typedef const struct hsakmt_model_functions *(*get_hsakmt_model_functions_t)(void);

#endif // _HSAKMTMODELIFACE_H_

================================================
FILE: libhsakmt/include/hsakmt/hsakmttypes.h
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef _HSAKMTTYPES_H_
#define _HSAKMTTYPES_H_

//the definitions and THUNK API are version specific - define the version numbers here
#define HSAKMT_VERSION_MAJOR    0
#define HSAKMT_VERSION_MINOR    99


#ifdef __cplusplus
extern "C" {
#endif

#if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32)

    #if defined(_WIN32)
        #define HSAKMTAPI  __stdcall
    #else
        #define HSAKMTAPI
    #endif

    typedef unsigned char      HSAuint8;
    typedef char               HSAint8;
    typedef unsigned short     HSAuint16;
    typedef signed short       HSAint16;
    typedef unsigned __int32   HSAuint32;
    typedef signed __int32     HSAint32;
    typedef signed __int64     HSAint64;
    typedef unsigned __int64   HSAuint64;

#elif defined(__linux__)

#include <stdbool.h>
#include <stdint.h>

    #define HSAKMTAPI

    typedef uint8_t     HSAuint8;
    typedef int8_t      HSAint8;
    typedef uint16_t	HSAuint16;
    typedef int16_t	HSAint16;
    typedef uint32_t	HSAuint32;
    typedef int32_t 	HSAint32;
    typedef int64_t	HSAint64;
    typedef uint64_t	HSAuint64;

#endif

typedef void*              HSA_HANDLE;
typedef HSAuint64          HSA_QUEUEID;
// An HSA_QUEUEID that is never a valid queue ID.
#define INVALID_QUEUEID 0xFFFFFFFFFFFFFFFFULL

// A PID that is never a valid process ID.
#define INVALID_PID 0xFFFFFFFF

// // A HSA_NODEID that is never a valid node ID.
#define INVALID_NODEID 0xFFFFFFFF

// This is included in order to force the alignments to be 4 bytes so that
// it avoids extra padding added by the compiler when a 64-bit binary is generated.
#pragma pack(push, hsakmttypes_h, 4)

//
// HSA STATUS codes returned by the KFD Interfaces
//

typedef enum _HSAKMT_STATUS
{
    HSAKMT_STATUS_SUCCESS                      = 0,  // Operation successful
    HSAKMT_STATUS_ERROR                        = 1,  // General error return if not otherwise specified
    HSAKMT_STATUS_DRIVER_MISMATCH              = 2,  // User mode component is not compatible with kernel HSA driver

    HSAKMT_STATUS_INVALID_PARAMETER            = 3,  // KFD identifies input parameters invalid
    HSAKMT_STATUS_INVALID_HANDLE               = 4,  // KFD identifies handle parameter invalid
    HSAKMT_STATUS_INVALID_NODE_UNIT            = 5,  // KFD identifies node or unit parameter invalid

    HSAKMT_STATUS_NO_MEMORY                    = 6,  // No memory available (when allocating queues or memory)
    HSAKMT_STATUS_BUFFER_TOO_SMALL             = 7,  // A buffer needed to handle a request is too small

    HSAKMT_STATUS_NOT_IMPLEMENTED              = 10, // KFD function is not implemented for this set of paramters
    HSAKMT_STATUS_NOT_SUPPORTED                = 11, // KFD function is not supported on this node
    HSAKMT_STATUS_UNAVAILABLE                  = 12, // KFD function is not available currently on this node (but
                                                     // may be at a later time)
    HSAKMT_STATUS_OUT_OF_RESOURCES             = 13, // KFD function request exceeds the resources currently available.

    HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened
    HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR   = 21, // user-kernel mode communication failure
    HSAKMT_STATUS_KERNEL_ALREADY_OPENED        = 22, // KFD driver path already opened
    HSAKMT_STATUS_HSAMMU_UNAVAILABLE           = 23, // ATS/PRI 1.1 (Address Translation Services) not available
                                                     // (IOMMU driver not installed or not-available)

    HSAKMT_STATUS_WAIT_FAILURE                 = 30, // The wait operation failed
    HSAKMT_STATUS_WAIT_TIMEOUT                 = 31, // The wait operation timed out

    HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED    = 35, // Memory buffer already registered
    HSAKMT_STATUS_MEMORY_NOT_REGISTERED        = 36, // Memory buffer not registered
    HSAKMT_STATUS_MEMORY_ALIGNMENT             = 37, // Memory parameter not aligned

} HSAKMT_STATUS;

//
// HSA KFD interface version information. Calling software has to validate that it meets
// the minimum interface version as described in the API specification.
// All future structures will be extended in a backward compatible fashion.
//

typedef struct _HsaVersionInfo
{
    HSAuint32    KernelInterfaceMajorVersion;    // supported kernel interface major version
    HSAuint32    KernelInterfaceMinorVersion;    // supported kernel interface minor version
} HsaVersionInfo;

//
// HSA Topology Discovery Infrastructure structure definitions.
// The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD
// The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined
// in the "Heterogeneous System Architecture Detail Topology" specification.
//
// The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output.
// When the call is made within a process context, a "snapshot" of the topology information
// is taken within the KFD to avoid any changes during the enumeration process.
// The Snapshot is released when hsaKmtReleaseSystemProperties() is called
// or when the process exits or is terminated.
//

typedef struct _HsaSystemProperties
{
    HSAuint32    NumNodes;         // the number of "H-NUMA" memory nodes.
                                   // each node represents a discoverable node of the system
                                   // All other enumeration is done on a per-node basis

    HSAuint32    PlatformOem;      // identifies HSA platform, reflects the OEMID in the CRAT
    HSAuint32    PlatformId;       // HSA platform ID, reflects OEM TableID in the CRAT
    HSAuint32    PlatformRev;      // HSA platform revision, reflects Platform Table Revision ID
} HsaSystemProperties;

typedef union
{
    HSAuint32 Value;
    struct
    {
        unsigned int uCode    : 10;  // ucode packet processor version
        unsigned int Major    :  6;  // GFXIP Major engine version
        unsigned int Minor    :  8;  // GFXIP Minor engine version
        unsigned int Stepping :  8;  // GFXIP Stepping info
    }ui32;
} HSA_ENGINE_ID;

typedef union
{
    HSAuint32 Value;
    struct
    {
        unsigned int uCodeSDMA: 10; // ucode version SDMA engine
        unsigned int uCodeRes : 10; // ucode version (reserved)
        unsigned int Reserved : 12; // Reserved, must be 0
    };
} HSA_ENGINE_VERSION;

typedef union
{
    HSAuint32 Value;
    struct
    {
        unsigned int HotPluggable        : 1;    // the node may be removed by some system action
                                                 // (event will be sent)
        unsigned int HSAMMUPresent       : 1;    // This node has an ATS/PRI 1.1 compatible
                                                 // translation agent in the system (e.g. IOMMUv2)
        unsigned int SharedWithGraphics  : 1;    // this HSA nodes' GPU function is also used for OS primary
                                                 // graphics render (= UI)
        unsigned int QueueSizePowerOfTwo : 1;    // This node GPU requires the queue size to be a power of 2 value
        unsigned int QueueSize32bit      : 1;    // This node GPU requires the queue size to be less than 4GB
        unsigned int QueueIdleEvent      : 1;    // This node GPU supports notification on Queue Idle
        unsigned int VALimit             : 1;    // This node GPU has limited VA range for platform
                                                 // (typical 40bit). Affects shared VM use for 64bit apps
        unsigned int WatchPointsSupported: 1;	 // Indicates if Watchpoints are available on the node.
        unsigned int WatchPointsTotalBits: 4;    // Watchpoints available. To determine the number use 2^value

        unsigned int DoorbellType        : 2;    // 0: This node has pre-1.0 doorbell characteristic
                                                 // 1: This node has 1.0 doorbell characteristic
                                                 // 2,3: reserved for future use
        unsigned int AQLQueueDoubleMap   : 1;	 // The unit needs a VA “double map”
        unsigned int DebugTrapSupported  : 1;    // Indicates if Debug Trap is supported on the node.
        unsigned int WaveLaunchTrapOverrideSupported: 1; // Indicates if Wave Launch Trap Override is supported on the node.
        unsigned int WaveLaunchModeSupported: 1; // Indicates if Wave Launch Mode is supported on the node.
        unsigned int PreciseMemoryOperationsSupported: 1; // Indicates if Precise Memory Operations are supported on the node.
        unsigned int DEPRECATED_SRAM_EDCSupport: 1; // Old buggy user mode depends on this being 0
        unsigned int Mem_EDCSupport: 1;          // Indicates if GFX internal DRAM/HBM EDC/ECC functionality is active
        unsigned int RASEventNotify: 1;          // Indicates if GFX extended RASFeatures and RAS EventNotify status is available
        unsigned int ASICRevision: 4;            // Indicates the ASIC revision of the chip on this node.
        unsigned int SRAM_EDCSupport: 1;         // Indicates if GFX internal SRAM EDC/ECC functionality is active
        unsigned int SVMAPISupported     : 1;    // Whether or not the SVM API is supported
        unsigned int CoherentHostAccess: 1;      // Whether or not device memory can be coherently accessed by the host CPU
        unsigned int DebugSupportedFirmware : 1; // Indicates if HWS firmware supports GPU debugging
        unsigned int PreciseALUOperationsSupported : 1; //Indicates if precise ALU operations are supported for GPU debugging
        unsigned int PerQueueResetSupported : 1; // Indicates per-queue reset supported
    } ui32;
} HSA_CAPABILITY;

typedef union
{
    HSAuint32 Value;
    struct
    {
        unsigned int PerSDMAQueueResetSupported : 1; // Indicates per-sdma queue reset supported
        unsigned int Reserved : 31; // Reserved
    } ui32;
} HSA_CAPABILITY2;

// Debug Properties and values
// HSA runtime may expose a subset of the capabilities outlined to the applicati
typedef union
{
    HSAuint64 Value;
    struct
    {
        HSAuint64 WatchAddrMaskLoBit: 4; // Only bits
                                        // WatchAddrMaskLoBit..WatchAddrMaskHiBit
                                        // of the
        HSAuint64 WatchAddrMaskHiBit: 6; // watch address mask are used.
                                         // 0 is the least significant bit.
        HSAuint64 DispatchInfoAlwaysValid: 1; // 0 if control of TTMP setup is
                                              // controlled on a per process
                                              // basis and is not always enabled
                                              // 1 if TTMP setup is always
                                              // enabled
        HSAuint64 AddressWatchpointShareKind: 1; // whether the address watchpoint
                                                 //     is per process or shared with
                                                 //     all proccesses
                                                 // 0 if shared or unsuppoted
                                                 //    (unsupported indicated by
                                                 //    address_watchpoint_count == 0)
                                                 //    All current devices have shared watchpoints
                                                 // 1 if unshared
        HSAuint64 Reserved: 52;              //
    };
} HSA_DEBUG_PROPERTIES;

//
// HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties()
// The application or runtime can use the information herein to size the topology management structures
// Unless there is some very weird setup, there is at most one "GPU" device (with a certain number
// of throughput compute units (= SIMDs) associated with a H-NUMA node.
//

#define HSA_PUBLIC_NAME_SIZE        64   // Marketing name string size

typedef struct _HsaNodeProperties
{
    HSAuint32       NumCPUCores;       // # of latency (= CPU) cores present on this HSA node.
                                       // This value is 0 for a HSA node with no such cores,
                                       // e.g a "discrete HSA GPU"
    HSAuint32       NumFComputeCores;  // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node.
                                       // This value is 0 if no FCompute cores are present (e.g. pure "CPU node").
    HSAuint32 NumNeuralCores;          // # of HSA neural processing units (= AIE) present in a
                                       // node. This value is 0 if there are no NeuralCores.
    HSAuint32       NumMemoryBanks;    // # of discoverable memory bank affinity properties on this "H-NUMA" node.
    HSAuint32       NumCaches;         // # of discoverable cache affinity properties on this "H-NUMA"  node.

    HSAuint32       NumIOLinks;        // # of discoverable IO link affinity properties of this node
                                       // connecting to other nodes.

    HSAuint32       CComputeIdLo;      // low value of the logical processor ID of the latency (= CPU)
                                       // cores available on this node
    HSAuint32       FComputeIdLo;      // low value of the logical processor ID of the throughput (= GPU)
                                       // units available on this node

    HSA_CAPABILITY  Capability;        // see above
    HSA_CAPABILITY2  Capability2;      // see above

    HSAuint32       MaxWavesPerSIMD;   // This identifies the max. number of launched waves per SIMD.
                                       // If NumFComputeCores is 0, this value is ignored.
    HSAuint32       LDSSizeInKB;       // Size of Local Data Store in Kilobytes per SIMD Wavefront
    HSAuint32       GDSSizeInKB;       // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts

    HSAuint32       WaveFrontSize;     // Number of SIMD cores per wavefront executed, typically 64,
                                       // may be 32 or a different value for some HSA based architectures

    HSAuint32       NumShaderBanks;    // Number of Shader Banks or Shader Engines, typical values are 1 or 2


    HSAuint32       NumArrays;         // Number of SIMD arrays per engine
    HSAuint32       NumCUPerArray;     // Number of Compute Units (CU) per SIMD array
    HSAuint32       NumSIMDPerCU;      // Number of SIMD representing a Compute Unit (CU)

    HSAuint32       MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access,
                                       // may be 0 if HW has no restrictions

    HSA_ENGINE_ID   EngineId;          // Identifier (rev) of the GPU uEngine or Firmware, may be 0
    HSA_ENGINE_ID   OverrideEngineId;  // Identifier (rev) of the Overrided GPU uEngine or Firmware, may be 0

    HSAuint16       VendorId;          // GPU vendor id; 0 on latency (= CPU)-only nodes
    HSAuint16       DeviceId;          // GPU device id; 0 on latency (= CPU)-only nodes

    HSAuint32       LocationId;        // GPU BDF (Bus/Device/function number) - identifies the device
                                       // location in the overall system
    HSAuint64       LocalMemSize;       // Local memory size
    HSAuint32       MaxEngineClockMhzFCompute;  // maximum engine clocks for CPU and
    HSAuint32       MaxEngineClockMhzCCompute;  // GPU function, including any boost caopabilities,
    HSAint32        DrmRenderMinor;             // DRM render device minor device number
    HSAuint16       MarketingName[HSA_PUBLIC_NAME_SIZE];   // Public name of the "device" on the node (board or APU name).
                                       // Unicode string
    HSAuint8        AMDName[HSA_PUBLIC_NAME_SIZE];   //CAL Name of the "device", ASCII
    HSA_ENGINE_VERSION uCodeEngineVersions;
    HSA_DEBUG_PROPERTIES DebugProperties; // Debug properties of this node.
    HSAuint64       HiveID;            // XGMI Hive the GPU node belongs to in the system. It is an opaque and static
                                       // number hash created by the PSP
    HSAuint32       NumSdmaEngines;    // number of PCIe optimized SDMA engines
    HSAuint32       NumSdmaXgmiEngines;// number of XGMI optimized SDMA engines

    HSAuint8        NumSdmaQueuesPerEngine;// number of SDMA queue per one engine
    HSAuint8        NumCpQueues; // number of Compute queues
    HSAuint8        NumGws;            // number of GWS barriers
    HSAuint8        Integrated;        // 0 - discrete GPU, 1 - integrated GPU (including small APU and APP APU)

    HSAuint32       Domain;            // PCI domain of the GPU
    HSAuint64       UniqueID;          // Globally unique immutable id

    HSAuint32       VGPRSizePerCU;     // VGPR size in bytes per CU
    HSAuint32       SGPRSizePerCU;     // SGPR size in bytes per CU

    HSAuint32       NumXcc;            // Number of XCC
    HSAuint32       KFDGpuID;          // GPU Hash ID generated by KFD

    HSAuint32       FamilyID;          // GPU family id
} HsaNodeProperties;


typedef enum _HSA_HEAPTYPE
{
    HSA_HEAPTYPE_SYSTEM                = 0,
    HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC   = 1, // CPU "visible" part of GPU device local memory (for discrete GPU)
    HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE  = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU)
                                            // All HSA accessible memory is per definition "CPU visible"
                                            // "Private memory" is relevant for graphics interop only.
    HSA_HEAPTYPE_GPU_GDS               = 3, // GPU internal memory (GDS)
    HSA_HEAPTYPE_GPU_LDS               = 4, // GPU internal memory (LDS)
    HSA_HEAPTYPE_GPU_SCRATCH           = 5, // GPU special memory (scratch)
    HSA_HEAPTYPE_DEVICE_SVM            = 6, // sys-memory mapped by device page tables
    HSA_HEAPTYPE_MMIO_REMAP            = 7, // remapped mmio, such as hdp flush registers

    HSA_HEAPTYPE_NUMHEAPTYPES,
    HSA_HEAPTYPE_SIZE                  = 0xFFFFFFFF
} HSA_HEAPTYPE;

typedef union
{
    HSAuint32 MemoryProperty;
    struct
    {
        unsigned int HotPluggable      : 1; // the memory may be removed by some system action,
                                            // memory should be used for temporary data
        unsigned int NonVolatile       : 1; // memory content is preserved across a power-off cycle.
        unsigned int Reserved          :30;
    } ui32;
} HSA_MEMORYPROPERTY;


//
// Discoverable HSA Memory properties.
// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
//

typedef struct _HsaMemoryProperties
{
    HSA_HEAPTYPE    HeapType;          // system or frame buffer,
    union
    {
        HSAuint64   SizeInBytes;       // physical memory size of the memory range in bytes
        struct
        {
            HSAuint32 SizeInBytesLow;  // physical memory size of the memory range in bytes (lower 32bit)
            HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit)
        } ui32;
    };
    HSA_MEMORYPROPERTY  Flags;         // See definitions above

    HSAuint32    Width;                // memory width - the number of parallel bits of the memory interface
    HSAuint32    MemoryClockMax;       // memory clock for the memory, this allows computing the available bandwidth
                                       // to the memory when needed
    HSAuint64    VirtualBaseAddress;   // if set to value != 0, indicates the virtual base address of the memory
                                       // in process virtual space
} HsaMemoryProperties;

//
// Discoverable Cache Properties. (optional).
// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
// Any of the parameters may be 0 (= not defined)
//

#define HSA_CPU_SIBLINGS            256
#define HSA_PROCESSORID_ALL         0xFFFFFFFF

typedef union
{
    HSAuint32 Value;
    struct
    {
        unsigned int Data           : 1;
        unsigned int Instruction    : 1;
        unsigned int CPU            : 1;
        unsigned int HSACU          : 1;
        unsigned int Reserved       :28;
    } ui32;
} HsaCacheType;

typedef struct _HaCacheProperties
{
    HSAuint32    ProcessorIdLow;   // Identifies the processor number

    HSAuint32    CacheLevel;       // Integer representing level: 1, 2, 3, 4, etc
    HSAuint32    CacheSize;        // Size of the cache
    HSAuint32    CacheLineSize;    // Cache line size in bytes
    HSAuint32    CacheLinesPerTag; // Cache lines per Cache Tag
    HSAuint32    CacheAssociativity; // Cache Associativity
    HSAuint32    CacheLatency;     // Cache latency in ns
    HsaCacheType CacheType;
    HSAuint32    SiblingMap[HSA_CPU_SIBLINGS];
} HsaCacheProperties;


//
// Discoverable CPU Compute Properties. (optional).
// The structure is the output parameter of the hsaKmtGetCComputeProperties() function
// Any of the parameters may be 0 (= not defined)
//

typedef struct _HsaCComputeProperties
{
    HSAuint32    SiblingMap[HSA_CPU_SIBLINGS];
} HsaCComputeProperties;

//
// Discoverable IoLink Properties (optional).
// The structure is the output parameter of the hsaKmtGetIoLinkProperties() function.
// Any of the parameters may be 0 (= not defined)
//

typedef enum _HSA_IOLINKTYPE {
    HSA_IOLINKTYPE_UNDEFINED      = 0,
    HSA_IOLINKTYPE_HYPERTRANSPORT = 1,
    HSA_IOLINKTYPE_PCIEXPRESS     = 2,
    HSA_IOLINKTYPE_AMBA           = 3,
    HSA_IOLINKTYPE_MIPI           = 4,
    HSA_IOLINK_TYPE_QPI_1_1       = 5,
    HSA_IOLINK_TYPE_RESERVED1     = 6,
    HSA_IOLINK_TYPE_RESERVED2     = 7,
    HSA_IOLINK_TYPE_RAPID_IO      = 8,
    HSA_IOLINK_TYPE_INFINIBAND    = 9,
    HSA_IOLINK_TYPE_RESERVED3     = 10,
    HSA_IOLINK_TYPE_XGMI          = 11,
    HSA_IOLINK_TYPE_XGOP          = 12,
    HSA_IOLINK_TYPE_GZ            = 13,
    HSA_IOLINK_TYPE_ETHERNET_RDMA = 14,
    HSA_IOLINK_TYPE_RDMA_OTHER    = 15,
    HSA_IOLINK_TYPE_OTHER         = 16,
    HSA_IOLINKTYPE_NUMIOLINKTYPES,
    HSA_IOLINKTYPE_SIZE           = 0xFFFFFFFF
} HSA_IOLINKTYPE;

typedef union
{
    HSAuint32 LinkProperty;
    struct
    {
        unsigned int Override          : 1;  // bus link properties are determined by this structure
                                             // not by the HSA_IOLINKTYPE. The other flags are valid
                                             // only if this bit is set to one
        unsigned int NonCoherent       : 1;  // The link doesn't support coherent transactions
                                             // memory accesses across must not be set to "host cacheable"!
        unsigned int NoAtomics32bit    : 1;  // The link doesn't support 32bit-wide atomic transactions
        unsigned int NoAtomics64bit    : 1;  // The link doesn't support 64bit-wide atomic transactions
        unsigned int NoPeerToPeerDMA   : 1;  // The link doesn't allow device P2P access
        unsigned int Reserved          :27;
    } ui32;
} HSA_LINKPROPERTY;


typedef struct _HsaIoLinkProperties
{
    HSA_IOLINKTYPE  IoLinkType;      // see above
    HSAuint32    VersionMajor;       // Bus interface version (optional)
    HSAuint32    VersionMinor;       // Bus interface version (optional)

    HSAuint32    NodeFrom;           //
    HSAuint32    NodeTo;             //

    HSAuint32    Weight;             // weight factor (derived from CDIT)

    HSAuint32    MinimumLatency;     // minimum cost of time to transfer (rounded to ns)
    HSAuint32    MaximumLatency;     // maximum cost of time to transfer (rounded to ns)
    HSAuint32    MinimumBandwidth;   // minimum interface Bandwidth in MB/s
    HSAuint32    MaximumBandwidth;   // maximum interface Bandwidth in MB/s
    HSAuint32    RecTransferSize;    // recommended transfer size to reach maximum bandwidth in Bytes
    HSAuint32    RecSdmaEngIdMask;   // recommended sdma engine IDs to reach maximum bandwidth
    HSA_LINKPROPERTY Flags;          // override flags (may be active for specific platforms)
} HsaIoLinkProperties;

//
// Memory allocation definitions for the KFD HSA interface
//

typedef struct _HsaMemFlags
{
    union
    {
        struct
        {
            unsigned int NonPaged    : 1; // default = 0: pageable memory
            unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE
            unsigned int ReadOnly    : 1; // default = 0: Read/Write memory
            unsigned int PageSize    : 2; // see HSA_PAGE_SIZE
            unsigned int HostAccess  : 1; // default = 0: GPU access only
            unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on
                                          // discrete GPU local), allocation may fall back to system memory node 0
                                          // memory (= always available). Otherwise no allocation is possible.
            unsigned int GDSMemory   : 1; // default = 0: If set, the allocation will occur in GDS heap.
                                          // HostAccess must be 0, all other flags (except NoSubstitute) should
                                          // be 0 when setting this entry to 1. GDS allocation may fail due to
                                          // limited resources. Application code is required to work without
                                          // any allocated GDS memory using regular memory.
                                          // Allocation fails on any node without GPU function.
            unsigned int Scratch     : 1; // default = 0: If set, the allocation will occur in GPU "scratch area".
                                          // HostAccess must be 0, all other flags (except NoSubstitute) should be 0
                                          // when setting this entry to 1. Scratch allocation may fail due to limited
                                          // resources. Application code is required to work without any allocation.
                                          // Allocation fails on any node without GPU function.
            unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow 
                                              // atomic ops processing. On AMD APU, this will use the ATC path on system 
                                              // memory, irrespective of the NonPaged flag setting (= if NonPaged is set, 
                                              // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM). 
                                              // All atomic ops must be supported on this memory.
            unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however 
                                                 // focused on AMD discrete GPU that support PCIe atomics; the memory 
                                                 // allocation is mapped to allow for PCIe atomics to operate on system 
                                                 // memory, irrespective of NonPaged set or the presence of an ATC path 
                                                 // in the system. The atomic operations supported are limited to SWAP, 
                                                 // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic 
                                                 // increment and decrement via 2-complement arithmetic), which are the 
                                                 // only atomic ops directly supported in PCI Express.
                                                 // On AMD APU, setting this flag will allocate the same type of memory 
                                                 // as AtomicAccessFull, but it will be considered compatible with 
                                                 // discrete GPU atomic operations access.
            unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed 
                                           // for executable code (e.g. queue memory) by the host CPU or the device. 
                                           // Influences the page attribute setting within the allocation
            unsigned int CoarseGrain : 1;  // default = 0: The memory can be accessed assuming cache
                                           // coherency maintained by link infrastructure and HSA agents.
                                           // 1: memory consistency needs to be enforced at
                                           // synchronization points at dispatch or other software
                                           // enforced synchronization boundaries.
            unsigned int AQLQueueMemory: 1; // default = 0; If 1: The caller indicates that the memory will be used as AQL queue memory.
					    // The KFD will ensure that the memory returned is allocated in the optimal memory location
					    // and optimal alignment requirements
            unsigned int FixedAddress : 1; // Allocate memory at specified virtual address. Fail if address is not free.
            unsigned int NoNUMABind:    1; // Don't bind system memory to a specific NUMA node
            unsigned int Uncached:      1; // Caching flag for fine-grained memory on A+A HW platform
            unsigned int NoAddress:     1; // only do vram allocation, return a handle, not allocate virtual address.
            unsigned int OnlyAddress:   1; // only do virtal address allocation without vram allocation.
            unsigned int ExtendedCoherent: 1;  // system-scope coherence on atomic instructions
            unsigned int GTTAccess:     1;  // default = 0; If 1: The caller indicates this memory will be mapped to GART for MES
					    // KFD will allocate GTT memory with the Preferred_node set as gpu_id for GART mapping
            unsigned int Contiguous:	1; // Allocate contiguous VRAM
            unsigned int ExecuteBlit:	1; // default = 0; If 1: The caller indicates that the memory is for blit kernel object.
            unsigned int Reserved:      8;

        } ui32;
        HSAuint32 Value;
    };
} HsaMemFlags;

typedef struct _HsaMemMapFlags
{
    union
    {
        struct
        {
            unsigned int Reserved1      :  1; //
            unsigned int CachePolicy    :  2; // see HSA_CACHING_TYPE
            unsigned int ReadOnly       :  1; // memory is not modified while mapped
            	    	    	    	      // allows migration scale-out
	    unsigned int PageSize	    :  2; // see HSA_PAGE_SIZE, hint to use
					  // this page size if possible and
					  // smaller than default
	    unsigned int HostAccess     :  1; // default = 0: GPU access only
	    unsigned int Migrate        :  1; // Hint: Allows migration to local mem
						  // of mapped GPU(s), instead of mapping
						  // physical location
            unsigned int Probe          :  1;     // default = 0: Indicates that a range
                                                  // will be mapped by the process soon,
						  // but does not initiate a map operation
						  // may trigger eviction of nonessential
						  // data from the memory, reduces latency
						  // “cleanup hint” only, may be ignored
            unsigned int Reserved       : 23;
        } ui32;
        HSAuint32 Value;
    };
} HsaMemMapFlags;

typedef struct _HsaGraphicsResourceInfo {
    void       *MemoryAddress;      // For use in hsaKmtMapMemoryToGPU(Nodes)
    HSAuint64  SizeInBytes;         // Buffer size
    const void *Metadata;           // Pointer to metadata owned by Thunk
    HSAuint32  MetadataSizeInBytes; // Size of metadata
    HSAuint32  NodeId;              // GPU exported the buffer
} HsaGraphicsResourceInfo;

typedef enum _HSA_CACHING_TYPE
{
    HSA_CACHING_CACHED        = 0,
    HSA_CACHING_NONCACHED     = 1,
    HSA_CACHING_WRITECOMBINED = 2,
    HSA_CACHING_RESERVED      = 3,
    HSA_CACHING_NUM_CACHING,
    HSA_CACHING_SIZE          = 0xFFFFFFFF
} HSA_CACHING_TYPE;

typedef enum _HSA_PAGE_SIZE
{
    HSA_PAGE_SIZE_4KB         = 0,
    HSA_PAGE_SIZE_64KB        = 1,  //64KB pages, not generally available in systems
    HSA_PAGE_SIZE_2MB         = 2,
    HSA_PAGE_SIZE_1GB         = 3,  //1GB pages, not generally available in systems
} HSA_PAGE_SIZE;


typedef enum _HSA_DEVICE
{
    HSA_DEVICE_CPU  = 0,
    HSA_DEVICE_GPU  = 1,
    MAX_HSA_DEVICE  = 2
} HSA_DEVICE;


typedef enum _HSA_QUEUE_PRIORITY
{
    HSA_QUEUE_PRIORITY_MINIMUM        = -3,
    HSA_QUEUE_PRIORITY_LOW            = -2,
    HSA_QUEUE_PRIORITY_BELOW_NORMAL   = -1,
    HSA_QUEUE_PRIORITY_NORMAL         =  0,
    HSA_QUEUE_PRIORITY_ABOVE_NORMAL   =  1,
    HSA_QUEUE_PRIORITY_HIGH           =  2,
    HSA_QUEUE_PRIORITY_MAXIMUM        =  3,
    HSA_QUEUE_PRIORITY_NUM_PRIORITY,
    HSA_QUEUE_PRIORITY_SIZE           = 0xFFFFFFFF
} HSA_QUEUE_PRIORITY;

typedef enum _HSA_QUEUE_TYPE
{
    HSA_QUEUE_COMPUTE            = 1,  // AMD PM4 compatible Compute Queue
    HSA_QUEUE_SDMA               = 2,  // PCIe optimized SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
    HSA_QUEUE_MULTIMEDIA_DECODE  = 3,  // reserved, for HSA multimedia decode queue
    HSA_QUEUE_MULTIMEDIA_ENCODE  = 4,  // reserved, for HSA multimedia encode queue
    HSA_QUEUE_SDMA_XGMI          = 5,  // XGMI optimized SDMA Queue
    HSA_QUEUE_SDMA_BY_ENG_ID     = 6,  // Queue with specified SDMA engine ID

    // the following values indicate a queue type permitted to reference OS graphics
    // resources through the interoperation API. See [5] "HSA Graphics Interoperation
    // specification" for more details on use of such resources.

    HSA_QUEUE_COMPUTE_OS           = 11, // AMD PM4 compatible Compute Queue
    HSA_QUEUE_SDMA_OS              = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
    HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue
    HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14,  // reserved, for HSA multimedia encode queue

    HSA_QUEUE_COMPUTE_AQL          = 21, // HSA AQL packet compatible Compute Queue
    HSA_QUEUE_DMA_AQL              = 22, // HSA AQL packet compatible DMA Queue
    HSA_QUEUE_DMA_AQL_XGMI         = 23, // HSA AQL packet compatible XGMI optimized DMA Queue

    // more types in the future

    HSA_QUEUE_TYPE_SIZE            = 0xFFFFFFFF     //aligns to 32bit enum
} HSA_QUEUE_TYPE;

/**
  The user context save area is page aligned. The HsaUserContextSaveAreaHeader
  header starts at offset 0. Space for a user space copy of the control stack
  comes next and is immediately followed by the user space wave save state. The
  start of the user space wave save state is page aligned. The debugger reserved
  area comes next and is 64 byte aligned.

  The user context save area is valid for the duration that the associated
  queue exists. When a context save occurs, the HsaUserContextSaveAreaHeader
  header will be updated with information about the context save. The context
  save area is not modified by any other operation, including a context resume.
 */

typedef struct
{
    HSAuint32 ControlStackOffset;  // Byte offset from start of user context
                                 // save area to the last saved top (lowest
                                 // address) of control stack data. Must be
                                 // 4 byte aligned.
    HSAuint32 ControlStackSize;  // Byte size of the last saved control stack
                                 // data. Must be 4 byte aligned.
    HSAuint32 WaveStateOffset;   // Byte offset from start of user context save
                                 // area to the last saved base (lowest address)
                                 // of wave state data. Must be 4 byte aligned.
    HSAuint32 WaveStateSize;     // Byte size of the last saved wave state data.
                                 // Must be 4 byte aligned.
    HSAuint32 DebugOffset;       // Byte offset from start of the user context
                                 // save area to the memory reserved for the
                                 // debugger. Must be 64 byte aligned.
    HSAuint32 DebugSize;         // Byte size of the memory reserved for the
                                 // debugger. Must be 64 byte aligned.
    volatile HSAint64 *ErrorReason;      // Address of the HSA signal payload for
                                         // reporting the error reason bitmask.
                                         // Must be 4 byte aligned.
    HSAuint32 ErrorEventId;      // Event ID used for exception signalling.
                                 // Must be 4 byte aligned.
    HSAuint32 Reserved1;
} HsaUserContextSaveAreaHeader;


typedef struct
{
	HSAuint32 QueueDetailError;	// HW specific queue error state
	HSAuint32 QueueTypeExtended;	// HW specific queue type info.
					// 0 = no information
	HSAuint32 NumCUAssigned;	// size of *CUMaskInfo bit array, Multiple
					// of 32, 0 = no information
	HSAuint32* CUMaskInfo;		// runtime/system CU assignment for realtime
					// queue & reserved CU priority. Ptr to
					// bit-array, each bit represents one CU.
					// NULL = no information
	HSAuint32* UserContextSaveArea;	// reference to user space context save area
	HSAuint64 SaveAreaSizeInBytes;	// Must be 4-Byte aligned
	HSAuint32* ControlStackTop;	// ptr to the TOS
	HSAuint64 ControlStackUsedInBytes; // Must be 4-Byte aligned
	HsaUserContextSaveAreaHeader *SaveAreaHeader;
	HSAuint64 Reserved2;		// runtime/system CU assignment
} HsaQueueInfo;

typedef struct _HsaQueueResource
{
    HSA_QUEUEID     QueueId;    /** queue ID */
    /** Doorbell address to notify HW of a new dispatch */
    union
    {
        HSAuint32*  Queue_DoorBell;
        HSAuint64*  Queue_DoorBell_aql;
        HSAuint64   QueueDoorBell;
    };

    /** virtual address to notify HW of queue write ptr value */
    union
    {
        HSAuint32*  Queue_write_ptr;
        HSAuint64*  Queue_write_ptr_aql;
        HSAuint64   QueueWptrValue;
    };

    /** virtual address updated by HW to indicate current read location */
    union
    {
        HSAuint32*  Queue_read_ptr;
        HSAuint64*  Queue_read_ptr_aql;
        HSAuint64   QueueRptrValue;
    };

    volatile HSAint64* ErrorReason;  /** exception bits signal payload */
} HsaQueueResource;


//TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform
typedef struct _HsaQueueReport
{
    HSAuint32     VMID;         //Required on SI to dispatch IB in primary ring
    void*         QueueAddress; //virtual address of UM mapped compute ring
    HSAuint64     QueueSize;    //size of the UM mapped compute ring
} HsaQueueReport;


typedef enum _HSA_DBG_WAVEOP
{
    HSA_DBG_WAVEOP_HALT        = 1, //Halts a wavefront
    HSA_DBG_WAVEOP_RESUME      = 2, //Resumes a wavefront
    HSA_DBG_WAVEOP_KILL        = 3, //Kills a wavefront
    HSA_DBG_WAVEOP_DEBUG       = 4, //Causes wavefront to enter debug mode
    HSA_DBG_WAVEOP_TRAP        = 5, //Causes wavefront to take a trap
    HSA_DBG_NUM_WAVEOP         = 5,
    HSA_DBG_MAX_WAVEOP         = 0xFFFFFFFF
} HSA_DBG_WAVEOP;

typedef enum _HSA_DBG_WAVEMODE
{
    HSA_DBG_WAVEMODE_SINGLE               = 0,  //send command to a single wave
    //Broadcast to all wavefronts of all processes is not supported for HSA user mode
    HSA_DBG_WAVEMODE_BROADCAST_PROCESS    = 2,  //send to waves within current process
    HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,  //send to waves within current process on CU
    HSA_DBG_NUM_WAVEMODE                  = 3,
    HSA_DBG_MAX_WAVEMODE                  = 0xFFFFFFFF
} HSA_DBG_WAVEMODE;


typedef enum _HSA_DBG_WAVEMSG_TYPE
{
    HSA_DBG_WAVEMSG_AUTO    = 0,
    HSA_DBG_WAVEMSG_USER    = 1,
    HSA_DBG_WAVEMSG_ERROR   = 2,
    HSA_DBG_NUM_WAVEMSG,
    HSA_DBG_MAX_WAVEMSG     = 0xFFFFFFFF
} HSA_DBG_WAVEMSG_TYPE;

typedef enum _HSA_DBG_WATCH_MODE
{
    HSA_DBG_WATCH_READ        = 0, //Read operations only
    HSA_DBG_WATCH_NONREAD     = 1, //Write or Atomic operations only
    HSA_DBG_WATCH_ATOMIC      = 2, //Atomic Operations only
    HSA_DBG_WATCH_ALL         = 3, //Read, Write or Atomic operations
    HSA_DBG_WATCH_NUM
} HSA_DBG_WATCH_MODE;

typedef enum _HSA_DBG_TRAP_OVERRIDE
{
  HSA_DBG_TRAP_OVERRIDE_OR      = 0, // Bitwise OR exception mask with HSA_DBG_TRAP_MASK
  HSA_DBG_TRAP_OVERRIDE_REPLACE = 1, // Replace exception mask with HSA_DBG_TRAP_MASK
  HSA_DBG_TRAP_OVERRIDE_NUM
} HSA_DBG_TRAP_OVERRIDE;

typedef enum _HSA_DBG_TRAP_MASK
{
  HSA_DBG_TRAP_MASK_FP_INVALID           = 1,   // Floating point invalid operation
  HSA_DBG_TRAP_MASK_FP_INPUT_DENOMAL     = 2,   // Floating point input denormal
  HSA_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO    = 4,   // Floating point divide by zero
  HSA_DBG_TRAP_MASK_FP_OVERFLOW          = 8,   // Floating point overflow
  HSA_DBG_TRAP_MASK_FP_UNDERFLOW         = 16,  // Floating point underflow
  HSA_DBG_TRAP_MASK_FP_INEXACT           = 32,  // Floating point inexact
  HSA_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO   = 64,  // Integer divide by zero
  HSA_DBG_TRAP_MASK_DBG_ADDRESS_WATCH    = 128, // Debug address watch
  HSA_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION = 256  // Memory violation
} HSA_DBG_TRAP_MASK;

typedef enum _HSA_DBG_TRAP_EXCEPTION_CODE {
	HSA_DBG_EC_NONE = 0,
	/* per queue */
	HSA_DBG_EC_QUEUE_WAVE_ABORT = 1,
	HSA_DBG_EC_QUEUE_WAVE_TRAP = 2,
	HSA_DBG_EC_QUEUE_WAVE_MATH_ERROR = 3,
	HSA_DBG_EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION = 4,
	HSA_DBG_EC_QUEUE_WAVE_MEMORY_VIOLATION = 5,
	HSA_DBG_EC_QUEUE_WAVE_APERTURE_VIOLATION = 6,
	HSA_DBG_EC_QUEUE_PACKET_DISPATCH_DIM_INVALID = 16,
	HSA_DBG_EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID = 17,
	HSA_DBG_EC_QUEUE_PACKET_DISPATCH_CODE_INVALID = 18,
	HSA_DBG_EC_QUEUE_PACKET_RESERVED = 19,
	HSA_DBG_EC_QUEUE_PACKET_UNSUPPORTED = 20,
	HSA_DBG_EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID = 21,
	HSA_DBG_EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID = 22,
	HSA_DBG_EC_QUEUE_PACKET_VENDOR_UNSUPPORTED = 23,
	HSA_DBG_EC_QUEUE_PREEMPTION_ERROR = 30,
	HSA_DBG_EC_QUEUE_NEW = 31,
	/* per device */
	HSA_DBG_EC_DEVICE_QUEUE_DELETE = 32,
	HSA_DBG_EC_DEVICE_MEMORY_VIOLATION = 33,
	HSA_DBG_EC_DEVICE_RAS_ERROR = 34,
	HSA_DBG_EC_DEVICE_FATAL_HALT = 35,
	HSA_DBG_EC_DEVICE_NEW = 36,
	/* per process */
	HSA_DBG_EC_PROCESS_RUNTIME = 48,
	HSA_DBG_EC_PROCESS_DEVICE_REMOVE = 49,
	HSA_DBG_EC_MAX
} HSA_DBG_TRAP_EXCEPTION_CODE;

/* Mask generated by ecode defined in enum above. */
#define HSA_EC_MASK(ecode)	(1ULL << (ecode - 1))

typedef enum _HSA_DBG_WAVE_LAUNCH_MODE
{
    HSA_DBG_WAVE_LAUNCH_MODE_NORMAL      = 0, // Wavefront launched normally.
    HSA_DBG_WAVE_LAUNCH_MODE_HALT        = 1, // Wavefront launched in halted mode.
    HSA_DBG_WAVE_LAUNCH_MODE_KILL        = 2, // Wavefront is launched but immediately
                                              // terminated before executing any instructions.
    HSA_DBG_WAVE_LAUNCH_MODE_SINGLE_STEP = 3, // Wavefront is launched in single step (debug)
                                              // mode. If debug trap is enabled by
                                              // hsaKmtDbgEnableDebugTrap() then causes a
                                              // trap after executing each instruction,
                                              // otherwise behaves the same as
                                              // HSA_DBG_WAVE_LAUNCH_MODE_NORMAL.
    HSA_DBG_WAVE_LAUNCH_MODE_DISABLE     = 4, // Disable launching any new waves.
    HSA_DBG_WAVE_LAUNCH_MODE_NUM
} HSA_DBG_WAVE_LAUNCH_MODE;

/**
 *    There are no flags currently defined.
 */
typedef enum HSA_DBG_NODE_CONTROL {
    HSA_DBG_NODE_CONTROL_FLAG_MAX = 0x01
} HSA_DBG_NODE_CONTROL;

#define HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK 0x80000000

//This structure is hardware specific and may change in the future
typedef struct _HsaDbgWaveMsgAMDGen2
{
    HSAuint32      Value;
    HSAuint32      Reserved2;

} HsaDbgWaveMsgAMDGen2;

typedef union _HsaDbgWaveMessageAMD
{
    HsaDbgWaveMsgAMDGen2    WaveMsgInfoGen2;
    //for future HsaDbgWaveMsgAMDGen3;
} HsaDbgWaveMessageAMD;

typedef struct _HsaDbgWaveMessage
{
    void*                   MemoryVA;         // ptr to associated host-accessible data
    HsaDbgWaveMessageAMD    DbgWaveMsg;
} HsaDbgWaveMessage;


//
// HSA sync primitive, Event and HW Exception notification API definitions
// The API functions allow the runtime to define a so-called sync-primitive, a SW object
// combining a user-mode provided "syncvar" and a scheduler event that can be signaled
// through a defined GPU interrupt. A syncvar is a process virtual memory location of
// a certain size that can be accessed by CPU and GPU shader code within the process to set
// and query the content within that memory. The definition of the content is determined by
// the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
// The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
// user mode instruction stream.
// The OS scheduler event is typically associated and signaled by an interrupt issued by
// the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be
// surfaced by the KFD by this mechanism, too.
//

// these are the new definitions for events
typedef enum _HSA_EVENTTYPE
{
    HSA_EVENTTYPE_SIGNAL                     = 0, //user-mode generated GPU signal
    HSA_EVENTTYPE_NODECHANGE                 = 1, //HSA node change (attach/detach)
    HSA_EVENTTYPE_DEVICESTATECHANGE          = 2, //HSA device state change( start/stop )
    HSA_EVENTTYPE_HW_EXCEPTION               = 3, //GPU shader exception event
    HSA_EVENTTYPE_SYSTEM_EVENT               = 4, //GPU SYSCALL with parameter info
    HSA_EVENTTYPE_DEBUG_EVENT                = 5, //GPU signal for debugging
    HSA_EVENTTYPE_PROFILE_EVENT              = 6, //GPU signal for profiling
    HSA_EVENTTYPE_QUEUE_EVENT                = 7, //GPU signal queue idle state (EOP pm4)
    HSA_EVENTTYPE_MEMORY                     = 8, //GPU signal for signaling memory access faults and memory subsystem issues
    //...
    HSA_EVENTTYPE_MAXID,
    HSA_EVENTTYPE_TYPE_SIZE                  = 0xFFFFFFFF
} HSA_EVENTTYPE;


//
// Definitions for types of pending debug events
//
typedef enum _HSA_DEBUG_EVENT_TYPE
{
	HSA_DEBUG_EVENT_TYPE_NONE				= 0,
	HSA_DEBUG_EVENT_TYPE_TRAP				= 1,
	HSA_DEBUG_EVENT_TYPE_VMFAULT			= 2,
	HSA_DEBUG_EVENT_TYPE_TRAP_VMFAULT		= 3
} HSA_DEBUG_EVENT_TYPE;

typedef HSAuint32  HSA_EVENTID;

//
// Subdefinitions for various event types: Syncvar
//

typedef struct _HsaSyncVar
{
    union
    {
        void*       UserData;           //pointer to user mode data
        HSAuint64   UserDataPtrValue;   //64bit compatibility of value
    } SyncVar;
    HSAuint64       SyncVarSize;
} HsaSyncVar;

//
// Subdefinitions for various event types: NodeChange
//

typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS
{
    HSA_EVENTTYPE_NODECHANGE_ADD     = 0,
    HSA_EVENTTYPE_NODECHANGE_REMOVE  = 1,
    HSA_EVENTTYPE_NODECHANGE_SIZE    = 0xFFFFFFFF
} HSA_EVENTTYPE_NODECHANGE_FLAGS;

typedef struct _HsaNodeChange
{
    HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;   // HSA node added/removed on the platform
} HsaNodeChange;

//
// Sub-definitions for various event types: DeviceStateChange
//

typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS
{
    HSA_EVENTTYPE_DEVICESTATUSCHANGE_START     = 0, //device started (and available)
    HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP      = 1, //device stopped (i.e. unavailable)
    HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE      = 0xFFFFFFFF
} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;

typedef struct _HsaDeviceStateChange
{
    HSAuint32                           NodeId;     // F-NUMA node that contains the device
    HSA_DEVICE                          Device;     // device type: GPU or CPU
    HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags;    // event flags
} HsaDeviceStateChange;

//
// Sub-definitions for various event types: Memory exception
//

typedef enum _HSA_EVENTID_MEMORYFLAGS
{
    HSA_EVENTID_MEMORY_RECOVERABLE           = 0, //access fault, recoverable after page adjustment
    HSA_EVENTID_MEMORY_FATAL_PROCESS         = 1, //memory access requires process context destruction, unrecoverable
    HSA_EVENTID_MEMORY_FATAL_VM              = 2, //memory access requires all GPU VA context destruction, unrecoverable
} HSA_EVENTID_MEMORYFLAGS;

typedef struct _HsaAccessAttributeFailure
{
    unsigned int NotPresent  : 1;  // Page not present or supervisor privilege 
    unsigned int ReadOnly    : 1;  // Write access to a read-only page
    unsigned int NoExecute   : 1;  // Execute access to a page marked NX
    unsigned int GpuAccess   : 1;  // Host access only
    unsigned int ECC         : 1;  // RAS ECC failure (notification of DRAM ECC - non-recoverable - error, if supported by HW)
    unsigned int Imprecise   : 1;  // Can't determine the exact fault address
    unsigned int ErrorType   : 3;  // Indicates RAS errors or other errors causing the access to GPU to fail
                                      // 0 = no RAS error, 1 = ECC_SRAM, 2 = Link_SYNFLOOD (poison), 3 = GPU hang (not attributable to a specific cause), other values reserved
    unsigned int Reserved    : 23; // must be 0
} HsaAccessAttributeFailure;

// data associated with HSA_EVENTID_MEMORY
typedef struct _HsaMemoryAccessFault
{
    HSAuint32                       NodeId;             // H-NUMA node that contains the device where the memory access occurred
    HSAuint64                       VirtualAddress;     // virtual address this occurred on
    HsaAccessAttributeFailure       Failure;            // failure attribute
    HSA_EVENTID_MEMORYFLAGS         Flags;              // event flags
} HsaMemoryAccessFault;

typedef enum _HSA_EVENTID_HW_EXCEPTION_CAUSE
{
    HSA_EVENTID_HW_EXCEPTION_GPU_HANG  = 0, // GPU Hang
    HSA_EVENTID_HW_EXCEPTION_ECC       = 1, // SRAM ECC error
} HSA_EVENTID_HW_EXCEPTION_CAUSE;

// data associated with HSA_EVENTID_HW_EXCEPTION
typedef struct _HsaHwException
{
    HSAuint32                       NodeId;    // Node Id where the memory exception occured
    HSAuint32                       ResetType;
    HSAuint32                       MemoryLost;
    HSA_EVENTID_HW_EXCEPTION_CAUSE  ResetCause;
} HsaHwException;

typedef struct _HsaEventData
{
    HSA_EVENTTYPE   EventType;      //event type

    union
    {
        // return data associated with HSA_EVENTTYPE_SIGNAL and other events
        HsaSyncVar              SyncVar;

        // data associated with HSA_EVENTTYPE_NODE_CHANGE
        HsaNodeChange           NodeChangeState;

        // data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE
        HsaDeviceStateChange    DeviceState;

        // data associated with HSA_EVENTTYPE_MEMORY
        HsaMemoryAccessFault    MemoryAccessFault;

        // data associated with HSA_EVENTTYPE_HW_EXCEPTION
        HsaHwException          HwException;
    } EventData;

    // the following data entries are internal to the KFD & thunk itself.

    HSAuint64       HWData1;                    // internal thunk store for Event data  (OsEventHandle)
    HSAuint64       HWData2;                    // internal thunk store for Event data  (HWAddress)
    HSAuint32       HWData3;                    // internal thunk store for Event data  (HWData)
} HsaEventData;


typedef struct _HsaEventDescriptor
{
    HSA_EVENTTYPE   EventType;                  // event type to allocate
    HSAuint32       NodeId;                     // H-NUMA node containing GPU device that is event source
    HsaSyncVar      SyncVar;                    // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL
} HsaEventDescriptor;


typedef struct _HsaEvent
{
    HSA_EVENTID     EventId;
    HsaEventData    EventData;
} HsaEvent;

typedef enum _HsaEventTimeout
{
    HSA_EVENTTIMEOUT_IMMEDIATE  = 0,
    HSA_EVENTTIMEOUT_INFINITE   = 0xFFFFFFFF
} HsaEventTimeOut;

typedef struct _HsaClockCounters
{
    HSAuint64   GPUClockCounter;
    HSAuint64   CPUClockCounter;
    HSAuint64   SystemClockCounter;
    HSAuint64   SystemClockFrequencyHz;
} HsaClockCounters;

#ifndef DEFINE_GUID
typedef struct _HSA_UUID
{
    HSAuint32   Data1;
    HSAuint16   Data2;
    HSAuint16   Data3;
    HSAuint8    Data4[8];
} HSA_UUID;

#define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
    static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
#else
#define HSA_UUID GUID
#define HSA_DEFINE_UUID DEFINE_GUID
#endif

// HSA_UUID that identifies the GPU ColorBuffer (CB) block
// {9ba429c6-af2d-4b38-b349-157271beac6a}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CB,
0x9ba429c6, 0xaf2d, 0x4b38, 0xb3, 0x49, 0x15, 0x72, 0x71, 0xbe, 0xac, 0x6a);

// HSA_UUID that identifies the GPU (CPF) block
// {2b0ad2b5-1c43-4f46-a7bc-e119411ea6c9}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CPF,
0x2b0ad2b5, 0x1c43, 0x4f46, 0xa7, 0xbc, 0xe1, 0x19, 0x41, 0x1e, 0xa6, 0xc9);

// HSA_UUID that identifies the GPU (CPG) block
// {590ec94d-20f0-448f-8dff-316c679de7ff
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CPG,
0x590ec94d, 0x20f0, 0x448f, 0x8d, 0xff, 0x31, 0x6c, 0x67, 0x9d, 0xe7, 0xff);

// HSA_UUID that identifies the GPU (DB) block
// {3d1a47fc-0013-4ed4-8306-822ca0b7a6c2
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_DB,
0x3d1a47fc, 0x0013, 0x4ed4, 0x83, 0x06, 0x82, 0x2c, 0xa0, 0xb7, 0xa6, 0xc2);

// HSA_UUID that identifies the GPU (GDS) block
// {f59276ec-2526-4bf8-8ec0-118f77700dc9
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GDS,
0xf59276ec, 0x2526, 0x4bf8, 0x8e, 0xc0, 0x11, 0x8f, 0x77, 0x70, 0x0d, 0xc9);

// HSA_UUID that identifies the GPU (GRBM) block
// {8f00933c-c33d-4801-97b7-7007f78573ad
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GRBM,
0x8f00933c, 0xc33d, 0x4801, 0x97, 0xb7, 0x70, 0x07, 0xf7, 0x85, 0x73, 0xad);

// HSA_UUID that identifies the GPU (GRBMSE) block
// {34ebd8d7-7c8b-4d15-88fa-0e4e4af59ac1
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GRBMSE,
0x34ebd8d7, 0x7c8b, 0x4d15, 0x88, 0xfa, 0x0e, 0x4e, 0x4a, 0xf5, 0x9a, 0xc1);

// HSA_UUID that identifies the GPU (IA) block
// {34276944-4264-4fcd-9d6e-ae264582ec51
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IA,
0x34276944, 0x4264, 0x4fcd, 0x9d, 0x6e, 0xae, 0x26, 0x45, 0x82, 0xec, 0x51);

// HSA_UUID that identifies the GPU Memory Controller (MC) block
// {13900B57-4956-4D98-81D0-68521937F59C
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC,
0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c);

// HSA_UUID that identifies the GPU (PASC) block
// {b0e7fb5d-0efc-4744-b516-5d23dc1fd56c
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_PASC,
0xb0e7fb5d, 0x0efc, 0x4744, 0xb5, 0x16, 0x5d, 0x23, 0xdc, 0x1f, 0xd5, 0x6c);

// HSA_UUID that identifies the GPU (PASU) block
// {9a152b6a-1fad-45f2-a5bf-f163826bd0cd
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_PASU,
0x9a152b6a, 0x1fad, 0x45f2, 0xa5, 0xbf, 0xf1, 0x63, 0x82, 0x6b, 0xd0, 0xcd);

// HSA_UUID that identifies the GPU (SPI) block
// {eda81044-d62c-47eb-af89-4f6fbf3b38e0
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SPI,
0xeda81044, 0xd62c, 0x47eb, 0xaf, 0x89, 0x4f, 0x6f, 0xbf, 0x3b, 0x38, 0xe0);

// HSA_UUID that identifies the GPU (SRBM) block
// {9f8040e0-6830-4019-acc8-463c9e445b89
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SRBM,
0x9f8040e0, 0x6830, 0x4019, 0xac, 0xc8, 0x46, 0x3c, 0x9e, 0x44, 0x5b, 0x89);

// GUID that identifies the GPU Shader Sequencer (SQ) block
// {B5C396B6-D310-47E4-86FC-5CC3043AF508}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ,
0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8);

// HSA_UUID that identifies the GPU (SX) block
// {bdb8d737-43cc-4162-be52-51cfb847beaf}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SX,
0xbdb8d737, 0x43cc, 0x4162, 0xbe, 0x52, 0x51, 0xcf, 0xb8, 0x47, 0xbe, 0xaf);

// HSA_UUID that identifies the GPU (TA) block
// {c01ee43d-ad92-44b1-8ab9-be5e696ceea7}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TA,
0xc01ee43d, 0xad92, 0x44b1, 0x8a, 0xb9, 0xbe, 0x5e, 0x69, 0x6c, 0xee, 0xa7);

// HSA_UUID that identifies the GPU TextureCache (TCA) block
// {333e393f-e147-4f49-a6d1-60914c7086b0}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCA,
0x333e393f, 0xe147, 0x4f49, 0xa6, 0xd1,0x60, 0x91, 0x4c, 0x70, 0x86, 0xb0);

// HSA_UUID that identifies the GPU TextureCache (TCC) block
// {848ce855-d805-4566-a8ab-73e884cc6bff}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCC,
0x848ce855, 0xd805, 0x4566, 0xa8, 0xab, 0x73, 0xe8, 0x84, 0xcc, 0x6b, 0xff);

// HSA_UUID that identifies the GPU (TCP) block
// {e10a013b-17d4-4bf5-b089-429591059b60}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCP,
0xe10a013b, 0x17d4, 0x4bf5, 0xb0, 0x89, 0x42, 0x95, 0x91, 0x05, 0x9b, 0x60);

// HSA_UUID that identifies the GPU (TCS) block
// {4126245c-4d96-4d1a-8aed-a939d4cc8ec9}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCS,
0x4126245c, 0x4d96, 0x4d1a, 0x8a, 0xed, 0xa9, 0x39, 0xd4, 0xcc, 0x8e, 0xc9);

// HSA_UUID that identifies the GPU (TD) block
// {7d7c0fe4-fe41-4fea-92c9-4544d7706dc6}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TD,
0x7d7c0fe4, 0xfe41, 0x4fea, 0x92, 0xc9, 0x45, 0x44, 0xd7, 0x70, 0x6d, 0xc6);

// HSA_UUID that identifies the GPU (VGT) block
// {0b6a8cb7-7a01-409f-a22c-3014854f1359}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_VGT,
0x0b6a8cb7, 0x7a01, 0x409f, 0xa2, 0x2c, 0x30, 0x14, 0x85, 0x4f, 0x13, 0x59);

// HSA_UUID that identifies the GPU (WD) block
// {0e176789-46ed-4b02-972a-916d2fac244a}
HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_WD,
0x0e176789, 0x46ed, 0x4b02, 0x97, 0x2a, 0x91, 0x6d, 0x2f, 0xac, 0x24, 0x4a);

typedef enum _HSA_PROFILE_TYPE
{
    HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only)
    HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously
                                               //writes to memory on updates (KFD access only)
    HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE    = 2, //user-queue accessible counter
    HSA_PROFILE_TYPE_NONPRIV_STREAMING    = 3, //user-queue accessible counter
    //...
    HSA_PROFILE_TYPE_NUM,

    HSA_PROFILE_TYPE_SIZE                 = 0xFFFFFFFF      // In order to align to 32-bit value
} HSA_PROFILE_TYPE;


typedef struct _HsaCounterFlags
{
    union
    {
        struct
        {
            unsigned int  Global       : 1;  // counter is global
                                             // (not tied to VMID/WAVE/CU, ...)
            unsigned int  Resettable   : 1;  // counter can be reset by SW
                                             // (always to 0?)
            unsigned int  ReadOnly     : 1;  // counter is read-only
                                             // (but may be reset, if indicated)
            unsigned int  Stream       : 1;  // counter has streaming capability
                                             // (after trigger, updates buffer)
            unsigned int  Reserved     : 28;
        } ui32;
        HSAuint32      Value;
    };
} HsaCounterFlags;


typedef struct _HsaCounter
{
    HSA_PROFILE_TYPE Type;              // specifies the counter type
    HSAuint64        CounterId;         // indicates counter register offset
    HSAuint32        CounterSizeInBits; // indicates relevant counter bits
    HSAuint64        CounterMask;       // bitmask for counter value (if applicable)
    HsaCounterFlags  Flags;             // Property flags (see above)
    HSAuint32        BlockIndex;        // identifies block the counter belongs to,
                                        // value may be 0 to NumBlocks
} HsaCounter;


typedef struct _HsaCounterBlockProperties
{
    HSA_UUID                    BlockId;        // specifies the block location
    HSAuint32                   NumCounters;    // How many counters are available?
                                                // (sizes Counters[] array below)
    HSAuint32                   NumConcurrent;  // How many counter slots are available
                                                // in block?
    HsaCounter                  Counters[1];    // Start of counter array
                                                // (NumCounters elements total)
} HsaCounterBlockProperties;


typedef struct _HsaCounterProperties
{
    HSAuint32                   NumBlocks;      // How many profilable block are available?
                                                // (sizes Blocks[] array below)
    HSAuint32                   NumConcurrent;  // How many blocks slots can be queried
                                                // concurrently by HW?
    HsaCounterBlockProperties   Blocks[1];      // Start of block array
                                                // (NumBlocks elements total)
} HsaCounterProperties;

typedef HSAuint64   HSATraceId;

typedef struct _HsaPmcTraceRoot
{
    HSAuint64                   TraceBufferMinSizeBytes;// (page aligned)
    HSAuint32                   NumberOfPasses;
    HSATraceId                  TraceId;
} HsaPmcTraceRoot;

typedef struct _HsaGpuTileConfig
{
    HSAuint32 *TileConfig;
    HSAuint32 *MacroTileConfig;
    HSAuint32 NumTileConfigs;
    HSAuint32 NumMacroTileConfigs;

    HSAuint32 GbAddrConfig;

    HSAuint32 NumBanks;
    HSAuint32 NumRanks;
    /* 9 dwords on 64-bit system */
    HSAuint32 Reserved[7]; /* Round up to 16 dwords for future extension */
} HsaGpuTileConfig;

typedef enum _HSA_POINTER_TYPE {
    HSA_POINTER_UNKNOWN = 0,
    HSA_POINTER_ALLOCATED = 1,           // Allocated with hsaKmtAllocMemory (except scratch)
    HSA_POINTER_REGISTERED_USER = 2,     // Registered user pointer
    HSA_POINTER_REGISTERED_GRAPHICS = 3, // Registered graphics buffer
    HSA_POINTER_REGISTERED_SHARED = 4,   // Registered shared buffer (IPC)
                                         // (hsaKmtRegisterGraphicsToNodes)
    HSA_POINTER_RESERVED_ADDR = 5        // address-only reservation VA
} HSA_POINTER_TYPE;

typedef struct _HsaPointerInfo {
    HSA_POINTER_TYPE   Type;             // Pointer type
    HSAuint32          Node;             // Node where the memory is located
    HsaMemFlags        MemFlags;         // HsaMemFlags used to alloc memory
    void               *CPUAddress;      // Start address for CPU access
    HSAuint64          GPUAddress;       // Start address for GPU access
    HSAuint64          SizeInBytes;      // Size in bytes
    HSAuint32          NRegisteredNodes; // Number of nodes the memory is registered to
    HSAuint32          NMappedNodes;     // Number of nodes the memory is mapped to
    const HSAuint32    *RegisteredNodes; // Array of registered nodes
    const HSAuint32    *MappedNodes;     // Array of mapped nodes
    void               *UserData;        // User data associated with the memory
} HsaPointerInfo;

typedef HSAuint32 HsaSharedMemoryHandle[8];

typedef struct _HsaMemoryRange {
	void               *MemoryAddress;   // Pointer to GPU memory
	HSAuint64          SizeInBytes;      // Size of above memory
} HsaMemoryRange;

typedef enum _HSA_SVM_FLAGS {
	HSA_SVM_FLAG_HOST_ACCESS = 0x00000001, // Guarantee host access to memory
	HSA_SVM_FLAG_COHERENT    = 0x00000002, // Fine grained coherency between all devices with access
	HSA_SVM_FLAG_HIVE_LOCAL  = 0x00000004, // Use any GPU in same hive as preferred device
	HSA_SVM_FLAG_GPU_RO      = 0x00000008, // GPUs only read, allows replication
	HSA_SVM_FLAG_GPU_EXEC    = 0x00000010, // Allow execution on GPU
	HSA_SVM_FLAG_GPU_READ_MOSTLY = 0x00000020, // GPUs mostly read, may allow similar optimizations as RO, but writes fault
	HSA_SVM_FLAG_GPU_ALWAYS_MAPPED = 0x00000040, // Keep GPU memory mapping always valid as if XNACK is disable
	HSA_SVM_FLAG_EXT_COHERENT = 0x00000080, //  Fine grained coherency between all devices using device-scope atomics
} HSA_SVM_FLAGS;

typedef enum _HSA_SVM_ATTR_TYPE {
	HSA_SVM_ATTR_PREFERRED_LOC,  // gpuid of the preferred location, 0 for
                                     // system memory, INVALID_NODEID for
                                     // "don't care"
	HSA_SVM_ATTR_PREFETCH_LOC,   // gpuid of the prefetch location, 0 for
                                     // system memory. Setting this triggers an
                                     // immediate prefetch (migration)
	HSA_SVM_ATTR_ACCESS,
	HSA_SVM_ATTR_ACCESS_IN_PLACE,
	HSA_SVM_ATTR_NO_ACCESS,      // specify memory access for the gpuid given
                                     // by the attribute value
	HSA_SVM_ATTR_SET_FLAGS,      // bitmask of flags to set (see HSA_SVM_FLAGS)
	HSA_SVM_ATTR_CLR_FLAGS,      // bitmask of flags to clear
	HSA_SVM_ATTR_GRANULARITY     // migration granularity (log2 num pages)
} HSA_SVM_ATTR_TYPE;

typedef struct _HSA_SVM_ATTRIBUTE {
	HSAuint32 type;  // attribute type (see enum HSA_SVM_ATTR_TYPE)
	HSAuint32 value; // attribute value
} HSA_SVM_ATTRIBUTE;

typedef enum _HSA_SMI_EVENT {
	HSA_SMI_EVENT_NONE = 0, /* not used */
	HSA_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
	HSA_SMI_EVENT_THERMAL_THROTTLE = 2,
	HSA_SMI_EVENT_GPU_PRE_RESET = 3,
	HSA_SMI_EVENT_GPU_POST_RESET = 4,
	HSA_SMI_EVENT_MIGRATE_START = 5,
	HSA_SMI_EVENT_MIGRATE_END = 6,
	HSA_SMI_EVENT_PAGE_FAULT_START = 7,
	HSA_SMI_EVENT_PAGE_FAULT_END = 8,
	HSA_SMI_EVENT_QUEUE_EVICTION = 9,
	HSA_SMI_EVENT_QUEUE_RESTORE = 10,
	HSA_SMI_EVENT_UNMAP_FROM_GPU = 11,
	HSA_SMI_EVENT_INDEX_MAX = 12,

	/*
	 * max event number, as a flag bit to get events from all processes,
	 * this requires super user permission, otherwise will not be able to
	 * receive event from any process. Without this flag to receive events
	 * from same process.
	 */
	HSA_SMI_EVENT_ALL_PROCESS = 64
} HSA_EVENT_TYPE;

typedef enum _HSA_MIGRATE_TRIGGERS {
	HSA_MIGRATE_TRIGGER_PREFETCH,
	HSA_MIGRATE_TRIGGER_PAGEFAULT_GPU,
	HSA_MIGRATE_TRIGGER_PAGEFAULT_CPU,
	HSA_MIGRATE_TRIGGER_TTM_EVICTION
} HSA_MIGRATE_TRIGGERS;

typedef enum _HSA_QUEUE_EVICTION_TRIGGERS {
	HSA_QUEUE_EVICTION_TRIGGER_SVM,
	HSA_QUEUE_EVICTION_TRIGGER_USERPTR,
	HSA_QUEUE_EVICTION_TRIGGER_TTM,
	HSA_QUEUE_EVICTION_TRIGGER_SUSPEND,
	HSA_QUEUE_EVICTION_CRIU_CHECKPOINT,
	HSA_QUEUE_EVICTION_CRIU_RESTORE
} HSA_QUEUE_EVICTION_TRIGGERS;

typedef enum _HSA_SVM_UNMAP_TRIGGERS {
	HSA_SVM_UNMAP_TRIGGER_MMU_NOTIFY,
	HSA_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,
	HSA_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU
} HSA_SVM_UNMAP_TRIGGERS;

#define HSA_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
#define HSA_SMI_EVENT_MSG_SIZE	96

typedef void *HsaAMDGPUDeviceHandle;

typedef HSAuint32 HsaPcSamplingTraceId;

typedef enum _HSA_PC_SAMPLING_METHOD_KIND
{
    HSA_PC_SAMPLING_METHOD_KIND_HOSTTRAP_V1 = 1,
    HSA_PC_SAMPLING_METHOD_KIND_STOCHASTIC_V1,
} HSA_PC_SAMPLING_METHOD_KIND;

typedef enum _HSA_PC_SAMPLING_UNITS
{
    HSA_PC_SAMPLING_UNIT_INTERVAL_MICROSECONDS,
    HSA_PC_SAMPLING_UNIT_INTERVAL_CYCLES,
    HSA_PC_SAMPLING_UNIT_INTERVAL_INSTRUCTIONS,
} HSA_PC_SAMPLING_UNIT_INTERVAL;

typedef struct _HsaPcSamplingInfo
{
    HSAuint64 value;
    HSAuint64 value_min;
    HSAuint64 value_max;
    HSAuint64 flags;
    HSA_PC_SAMPLING_METHOD_KIND method;
    HSA_PC_SAMPLING_UNIT_INTERVAL units;
}
HsaPcSamplingInfo;

typedef union
{
    HSAuint32 Value;
    struct
    {
        unsigned int requiresVAddr : 1;  // Requires virtual address
    } ui32;
} HSA_REGISTER_MEM_FLAGS;

#pragma pack(pop, hsakmttypes_h)


#ifdef __cplusplus
}   //extern "C"
#endif

#endif //_HSAKMTTYPES_H_


================================================
FILE: libhsakmt/include/hsakmt/linux/kfd_ioctl.h
================================================
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef KFD_IOCTL_H_INCLUDED
#define KFD_IOCTL_H_INCLUDED

#include <libdrm/drm.h>
#include <linux/ioctl.h>

/*
 * - 1.1 - initial version
 * - 1.3 - Add SMI events support
 * - 1.4 - Indicate new SRAM EDC bit in device properties
 * - 1.5 - Add SVM API
 * - 1.6 - Query clear flags in SVM get_attr API
 * - 1.7 - Checkpoint Restore (CRIU) API
 * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
 * - 1.9 - Add available_memory ioctl
 * - 1.10 - Add SMI profiler event log
 * - 1.11 - Add unified memory for ctx save/restore area
 * - 1.12 - Add DMA buf export ioctl
 * - 1.13 - Add debugger API
 * - 1.14 - Update kfd_event_data
 * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl
 * - 1.16 - Add contiguous VRAM allocation flag
 * - 1.17 - Add SDMA queue creation with target SDMA engine ID
 */
#define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 17

struct kfd_ioctl_get_version_args {
	__u32 major_version;	/* from KFD */
	__u32 minor_version;	/* from KFD */
};

/* For kfd_ioctl_create_queue_args.queue_type. */
#define KFD_IOC_QUEUE_TYPE_COMPUTE		0x0
#define KFD_IOC_QUEUE_TYPE_SDMA			0x1
#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL		0x2
#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI		0x3
#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID	0x4

#define KFD_MAX_QUEUE_PERCENTAGE	100
#define KFD_MAX_QUEUE_PRIORITY		15

struct kfd_ioctl_create_queue_args {
	__u64 ring_base_address;	/* to KFD */
	__u64 write_pointer_address;	/* from KFD */
	__u64 read_pointer_address;	/* from KFD */
	__u64 doorbell_offset;	/* from KFD */

	__u32 ring_size;		/* to KFD */
	__u32 gpu_id;		/* to KFD */
	__u32 queue_type;		/* to KFD */
	__u32 queue_percentage;	/* to KFD */
	__u32 queue_priority;	/* to KFD */
	__u32 queue_id;		/* from KFD */

	__u64 eop_buffer_address;	/* to KFD */
	__u64 eop_buffer_size;	/* to KFD */
	__u64 ctx_save_restore_address; /* to KFD */
	__u32 ctx_save_restore_size;	/* to KFD */
	__u32 ctl_stack_size;		/* to KFD */
	__u32 sdma_engine_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_destroy_queue_args {
	__u32 queue_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_update_queue_args {
	__u64 ring_base_address;	/* to KFD */

	__u32 queue_id;		/* to KFD */
	__u32 ring_size;		/* to KFD */
	__u32 queue_percentage;	/* to KFD */
	__u32 queue_priority;	/* to KFD */
};

struct kfd_ioctl_set_cu_mask_args {
	__u32 queue_id;		/* to KFD */
	__u32 num_cu_mask;		/* to KFD */
	__u64 cu_mask_ptr;		/* to KFD */
};

struct kfd_ioctl_get_queue_wave_state_args {
	__u64 ctl_stack_address;	/* to KFD */
	__u32 ctl_stack_used_size;	/* from KFD */
	__u32 save_area_used_size;	/* from KFD */
	__u32 queue_id;			/* to KFD */
	__u32 pad;
};

struct kfd_queue_snapshot_entry {
	__u64 exception_status;
	__u64 ring_base_address;
	__u64 write_pointer_address;
	__u64 read_pointer_address;
	__u64 ctx_save_restore_address;
	__u32 queue_id;
	__u32 gpu_id;
	__u32 ring_size;
	__u32 queue_type;
	__u32 ctx_save_restore_area_size;
	__u32 reserved;
};

struct kfd_dbg_device_info_entry {
	__u64 exception_status;
	__u64 lds_base;
	__u64 lds_limit;
	__u64 scratch_base;
	__u64 scratch_limit;
	__u64 gpuvm_base;
	__u64 gpuvm_limit;
	__u32 gpu_id;
	__u32 location_id;
	__u32 vendor_id;
	__u32 device_id;
	__u32 revision_id;
	__u32 subsystem_vendor_id;
	__u32 subsystem_device_id;
	__u32 fw_version;
	__u32 gfx_target_version;
	__u32 simd_count;
	__u32 max_waves_per_simd;
	__u32 array_count;
	__u32 simd_arrays_per_engine;
	__u32 num_xcc;
	__u32 capability;
	__u32 debug_prop;
};

/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1

struct kfd_ioctl_set_memory_policy_args {
	__u64 alternate_aperture_base;	/* to KFD */
	__u64 alternate_aperture_size;	/* to KFD */

	__u32 gpu_id;			/* to KFD */
	__u32 default_policy;		/* to KFD */
	__u32 alternate_policy;		/* to KFD */
	__u32 misc_process_flag;        /* to KFD */
};

/*
 * All counters are monotonic. They are used for profiling of compute jobs.
 * The profiling is done by userspace.
 *
 * In case of GPU reset, the counter should not be affected.
 */

struct kfd_ioctl_get_clock_counters_args {
	__u64 gpu_clock_counter;	/* from KFD */
	__u64 cpu_clock_counter;	/* from KFD */
	__u64 system_clock_counter;	/* from KFD */
	__u64 system_clock_freq;	/* from KFD */

	__u32 gpu_id;		/* to KFD */
	__u32 pad;
};

struct kfd_process_device_apertures {
	__u64 lds_base;		/* from KFD */
	__u64 lds_limit;		/* from KFD */
	__u64 scratch_base;		/* from KFD */
	__u64 scratch_limit;		/* from KFD */
	__u64 gpuvm_base;		/* from KFD */
	__u64 gpuvm_limit;		/* from KFD */
	__u32 gpu_id;		/* from KFD */
	__u32 pad;
};

/*
 * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
 * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
 * unlimited number of GPUs.
 */
#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_ioctl_get_process_apertures_args {
	struct kfd_process_device_apertures
			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */

	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
	__u32 num_of_nodes;
	__u32 pad;
};

struct kfd_ioctl_get_process_apertures_new_args {
	/* User allocated. Pointer to struct kfd_process_device_apertures
	 * filled in by Kernel
	 */
	__u64 kfd_process_device_apertures_ptr;
	/* to KFD - indicates amount of memory present in
	 *  kfd_process_device_apertures_ptr
	 * from KFD - Number of entries filled by KFD.
	 */
	__u32 num_of_nodes;
	__u32 pad;
};

#define MAX_ALLOWED_NUM_POINTS    100
#define MAX_ALLOWED_AW_BUFF_SIZE 4096
#define MAX_ALLOWED_WAC_BUFF_SIZE  128

struct kfd_ioctl_dbg_register_args {
	__u32 gpu_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_dbg_unregister_args {
	__u32 gpu_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_dbg_address_watch_args {
	__u64 content_ptr;		/* a pointer to the actual content */
	__u32 gpu_id;		/* to KFD */
	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
};

struct kfd_ioctl_dbg_wave_control_args {
	__u64 content_ptr;		/* a pointer to the actual content */
	__u32 gpu_id;		/* to KFD */
	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
};
#define	KFD_DBG_EV_FLAG_CLEAR_STATUS	1

/* queue states for suspend/resume */
#define KFD_DBG_QUEUE_ERROR_BIT		30
#define KFD_DBG_QUEUE_INVALID_BIT	31
#define KFD_DBG_QUEUE_ERROR_MASK	(1 << KFD_DBG_QUEUE_ERROR_BIT)
#define KFD_DBG_QUEUE_INVALID_MASK	(1 << KFD_DBG_QUEUE_INVALID_BIT)

#define KFD_INVALID_GPUID	0xffffffff
#define KFD_INVALID_QUEUEID	0xffffffff
#define KFD_INVALID_FD		0xffffffff

enum kfd_dbg_trap_override_mode {
	KFD_DBG_TRAP_OVERRIDE_OR = 0,
	KFD_DBG_TRAP_OVERRIDE_REPLACE = 1
};
enum kfd_dbg_trap_mask {
	KFD_DBG_TRAP_MASK_FP_INVALID = 1,
	KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2,
	KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4,
	KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8,
	KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16,
	KFD_DBG_TRAP_MASK_FP_INEXACT = 32,
	KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO = 64,
	KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH = 128,
	KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION = 256,
	KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START = (1 << 30),
	KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END = (1 << 31)
};

/* Wave launch modes */
enum kfd_dbg_trap_wave_launch_mode {
	KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL = 0,
	KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT = 1,
	KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG = 3
};

/* Address watch modes */
enum kfd_dbg_trap_address_watch_mode {
	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ = 0,
	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD = 1,
	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC = 2,
	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL = 3
};

/* Additional wave settings */
enum kfd_dbg_trap_flags {
	KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1,
};

enum kfd_dbg_trap_exception_code {
	EC_NONE = 0,
	/* per queue */
	EC_QUEUE_WAVE_ABORT = 1,
	EC_QUEUE_WAVE_TRAP = 2,
	EC_QUEUE_WAVE_MATH_ERROR = 3,
	EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION = 4,
	EC_QUEUE_WAVE_MEMORY_VIOLATION = 5,
	EC_QUEUE_WAVE_APERTURE_VIOLATION = 6,
	EC_QUEUE_PACKET_DISPATCH_DIM_INVALID = 16,
	EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID = 17,
	EC_QUEUE_PACKET_DISPATCH_CODE_INVALID = 18,
	EC_QUEUE_PACKET_RESERVED = 19,
	EC_QUEUE_PACKET_UNSUPPORTED = 20,
	EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID = 21,
	EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID = 22,
	EC_QUEUE_PACKET_VENDOR_UNSUPPORTED = 23,
	EC_QUEUE_PREEMPTION_ERROR = 30,
	EC_QUEUE_NEW = 31,
	/* per device */
	EC_DEVICE_QUEUE_DELETE = 32,
	EC_DEVICE_MEMORY_VIOLATION = 33,
	EC_DEVICE_RAS_ERROR = 34,
	EC_DEVICE_FATAL_HALT = 35,
	EC_DEVICE_NEW = 36,
	/* per process */
	EC_PROCESS_RUNTIME = 48,
	EC_PROCESS_DEVICE_REMOVE = 49,
	EC_MAX
};

/* Mask generated by ecode defined in enum above. */
#define KFD_EC_MASK(ecode)	(1ULL << (ecode - 1))

/* Masks for exception code type checks below. */
#define KFD_EC_MASK_QUEUE	(KFD_EC_MASK(EC_QUEUE_WAVE_ABORT) |	\
				 KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) |	\
				 KFD_EC_MASK(EC_QUEUE_WAVE_MATH_ERROR) |	\
				 KFD_EC_MASK(EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION) |	\
				 KFD_EC_MASK(EC_QUEUE_WAVE_MEMORY_VIOLATION) |	\
				 KFD_EC_MASK(EC_QUEUE_WAVE_APERTURE_VIOLATION) |	\
				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) |	\
				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) |	\
				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) |	\
				 KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) |	\
				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) |	\
				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) |	\
				 KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED)	|	\
				 KFD_EC_MASK(EC_QUEUE_PREEMPTION_ERROR)	|	\
				 KFD_EC_MASK(EC_QUEUE_NEW))
#define KFD_EC_MASK_DEVICE	(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE) |		\
				 KFD_EC_MASK(EC_DEVICE_RAS_ERROR) |		\
				 KFD_EC_MASK(EC_DEVICE_FATAL_HALT) |		\
				 KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION) |	\
				 KFD_EC_MASK(EC_DEVICE_NEW))
#define KFD_EC_MASK_PROCESS	(KFD_EC_MASK(EC_PROCESS_RUNTIME) |	\
				 KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))

/* Checks for exception code types for KFD search. */
#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode)					\
			(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode)				\
			(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode)				\
			(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))

/* Misc. per process flags */
#define ENABLE_MFMA_HIGH_PRECISION              (1 << 0)

enum kfd_dbg_runtime_state {
	DEBUG_RUNTIME_STATE_DISABLED = 0,
	DEBUG_RUNTIME_STATE_ENABLED = 1,
	DEBUG_RUNTIME_STATE_ENABLED_BUSY = 2,
	DEBUG_RUNTIME_STATE_ENABLED_ERROR = 3
};

struct kfd_runtime_info {
	__u64 r_debug;
	__u32 runtime_state;
	__u32 ttmp_setup;
};

/* Enable modes for runtime enable */
#define KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK	1
#define KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK	2
#define KFD_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK 0x80000000

/**
 * kfd_ioctl_runtime_enable_args - Arguments for runtime enable
 *
 * Coordinates debug exception signalling and debug device enablement with runtime.
 *
 * @r_debug - pointer to user struct for sharing information between ROCr and the debuggger
 * @mode_mask - mask to set mode
 *	KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK - enable runtime for debugging, otherwise disable
 *	KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK - enable trap temporary setup (ignore on disable)
 *
 * Return - 0 on SUCCESS.
 *	  - EBUSY if runtime enable call already pending.
 *	  - EEXIST if user queues already active prior to call.
 *	    If process is debug enabled, runtime enable will enable debug devices and
 *	    wait for debugger process to send runtime exception EC_PROCESS_RUNTIME
 *	    to unblock - see kfd_ioctl_dbg_trap_args.
 *
 */
struct kfd_ioctl_runtime_enable_args {
	__u64 r_debug;
	__u32 mode_mask;
	__u32 capabilities_mask;
};

/* Context save area header information */
struct kfd_context_save_area_header {
	struct {
		__u32 control_stack_offset;
		__u32 control_stack_size;
		__u32 wave_state_offset;
		__u32 wave_state_size;
	} wave_state;
	__u32 debug_offset;
	__u32 debug_size;
	__u64 err_payload_addr;
	__u32 err_event_id;
	__u32 reserved1;
};

/*
 * Debug operations
 *
 * For specifics on usage and return values, see documentation per operation
 * below.  Otherwise, generic error returns apply:
 *	- ESRCH if the process to debug does not exist.
 *
 *	- EINVAL (with KFD_IOC_DBG_TRAP_ENABLE exempt) if operation
 *		 KFD_IOC_DBG_TRAP_ENABLE has not succeeded prior.
 *		 Also returns this error if GPU hardware scheduling is not supported.
 *
 *	- EPERM (with KFD_IOC_DBG_TRAP_DISABLE exempt) if target process is not
 *		 PTRACE_ATTACHED.  KFD_IOC_DBG_TRAP_DISABLE is exempt to allow
 *		 clean up of debug mode as long as process is debug enabled.
 *
 *	- EACCES if any DBG_HW_OP (debug hardware operation) is requested when
 *		 AMDKFD_IOC_RUNTIME_ENABLE has not succeeded prior.
 *
 *	- ENODEV if any GPU does not support debugging on a DBG_HW_OP call.
 *
 *	- Other errors may be returned when a DBG_HW_OP occurs while the GPU
 *	  is in a fatal state.
 *
 */
enum kfd_dbg_trap_operations {
	KFD_IOC_DBG_TRAP_ENABLE = 0,
	KFD_IOC_DBG_TRAP_DISABLE = 1,
	KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT = 2,
	KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED = 3,
	KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE = 4,  /* DBG_HW_OP */
	KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE = 5,      /* DBG_HW_OP */
	KFD_IOC_DBG_TRAP_SUSPEND_QUEUES = 6,		/* DBG_HW_OP */
	KFD_IOC_DBG_TRAP_RESUME_QUEUES = 7,		/* DBG_HW_OP */
	KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH = 8,	/* DBG_HW_OP */
	KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH = 9,	/* DBG_HW_OP */
	KFD_IOC_DBG_TRAP_SET_FLAGS = 10,
	KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT = 11,
	KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO = 12,
	KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT = 13,
	KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT = 14
};

/**
 * kfd_ioctl_dbg_trap_enable_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_ENABLE.
 *
 *     Enables debug session for target process. Call @op KFD_IOC_DBG_TRAP_DISABLE in
 *     kfd_ioctl_dbg_trap_args to disable debug session.
 *
 *     @exception_mask (IN)	- exceptions to raise to the debugger
 *     @rinfo_ptr      (IN)	- pointer to runtime info buffer (see kfd_runtime_info)
 *     @rinfo_size     (IN/OUT)	- size of runtime info buffer in bytes
 *     @dbg_fd	       (IN)	- fd the KFD will nofify the debugger with of raised
 *				  exceptions set in exception_mask.
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *		Copies KFD saved kfd_runtime_info to @rinfo_ptr on enable.
 *		Size of kfd_runtime saved by the KFD returned to @rinfo_size.
 *            - EBADF if KFD cannot get a reference to dbg_fd.
 *            - EFAULT if KFD cannot copy runtime info to rinfo_ptr.
 *            - EINVAL if target process is already debug enabled.
 *
 */
struct kfd_ioctl_dbg_trap_enable_args {
	__u64 exception_mask;
	__u64 rinfo_ptr;
	__u32 rinfo_size;
	__u32 dbg_fd;
};

/**
 * kfd_ioctl_dbg_trap_send_runtime_event_args
 *
 *
 *     Arguments for KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT.
 *     Raises exceptions to runtime.
 *
 *     @exception_mask (IN) - exceptions to raise to runtime
 *     @gpu_id	       (IN) - target device id
 *     @queue_id       (IN) - target queue id
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *	      - ENODEV if gpu_id not found.
 *		If exception_mask contains EC_PROCESS_RUNTIME, unblocks pending
 *		AMDKFD_IOC_RUNTIME_ENABLE call - see kfd_ioctl_runtime_enable_args.
 *		All other exceptions are raised to runtime through err_payload_addr.
 *		See kfd_context_save_area_header.
 */
struct kfd_ioctl_dbg_trap_send_runtime_event_args {
	__u64 exception_mask;
	__u32 gpu_id;
	__u32 queue_id;
};

/**
 * kfd_ioctl_dbg_trap_set_exceptions_enabled_args
 *
 *     Arguments for KFD_IOC_SET_EXCEPTIONS_ENABLED
 *     Set new exceptions to be raised to the debugger.
 *
 *     @exception_mask (IN) - new exceptions to raise the debugger
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 */
struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args {
	__u64 exception_mask;
};

/**
 * kfd_ioctl_dbg_trap_set_wave_launch_override_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE
 *     Enable HW exceptions to raise trap.
 *
 *     @override_mode	     (IN)     - see kfd_dbg_trap_override_mode
 *     @enable_mask	     (IN/OUT) - reference kfd_dbg_trap_mask.
 *					IN is the override modes requested to be enabled.
 *					OUT is referenced in Return below.
 *     @support_request_mask (IN/OUT) - reference kfd_dbg_trap_mask.
 *					IN is the override modes requested for support check.
 *					OUT is referenced in Return below.
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *		Previous enablement is returned in @enable_mask.
 *		Actual override support is returned in @support_request_mask.
 *	      - EINVAL if override mode is not supported.
 *	      - EACCES if trap support requested is not actually supported.
 *		i.e. enable_mask (IN) is not a subset of support_request_mask (OUT).
 *		Otherwise it is considered a generic error (see kfd_dbg_trap_operations).
 */
struct kfd_ioctl_dbg_trap_set_wave_launch_override_args {
	__u32 override_mode;
	__u32 enable_mask;
	__u32 support_request_mask;
	__u32 pad;
};

/**
 * kfd_ioctl_dbg_trap_set_wave_launch_mode_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE
 *     Set wave launch mode.
 *
 *     @mode (IN) - see kfd_dbg_trap_wave_launch_mode
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 */
struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args {
	__u32 launch_mode;
	__u32 pad;
};

/**
 * kfd_ioctl_dbg_trap_suspend_queues_ags
 *
 *     Arguments for KFD_IOC_DBG_TRAP_SUSPEND_QUEUES
 *     Suspend queues.
 *
 *     @exception_mask	(IN) - raised exceptions to clear
 *     @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id)
 *			       to suspend
 *     @num_queues	(IN) - number of queues to suspend in @queue_array_ptr
 *     @grace_period	(IN) - wave time allowance before preemption
 *			       per 1K GPU clock cycle unit
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Destruction of a suspended queue is blocked until the queue is
 *     resumed.  This allows the debugger to access queue information and
 *     the its context save area without running into a race condition on
 *     queue destruction.
 *     Automatically copies per queue context save area header information
 *     into the save area base
 *     (see kfd_queue_snapshot_entry and kfd_context_save_area_header).
 *
 *     Return - Number of queues suspended on SUCCESS.
 *	.	KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK masked
 *		for each queue id in @queue_array_ptr array reports unsuccessful
 *		suspend reason.
 *		KFD_DBG_QUEUE_ERROR_MASK = HW failure.
 *		KFD_DBG_QUEUE_INVALID_MASK = queue does not exist, is new or
 *		is being destroyed.
 */
struct kfd_ioctl_dbg_trap_suspend_queues_args {
	__u64 exception_mask;
	__u64 queue_array_ptr;
	__u32 num_queues;
	__u32 grace_period;
};

/**
 * kfd_ioctl_dbg_trap_resume_queues_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_RESUME_QUEUES
 *     Resume queues.
 *
 *     @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id)
 *			       to resume
 *     @num_queues	(IN) - number of queues to resume in @queue_array_ptr
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - Number of queues resumed on SUCCESS.
 *		KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK mask
 *		for each queue id in @queue_array_ptr array reports unsuccessful
 *		resume reason.
 *		KFD_DBG_QUEUE_ERROR_MASK = HW failure.
 *		KFD_DBG_QUEUE_INVALID_MASK = queue does not exist.
 */
struct kfd_ioctl_dbg_trap_resume_queues_args {
	__u64 queue_array_ptr;
	__u32 num_queues;
	__u32 pad;
};

/**
 * kfd_ioctl_dbg_trap_set_node_address_watch_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH
 *     Sets address watch for device.
 *
 *     @address	(IN)  - watch address to set
 *     @mode    (IN)  - see kfd_dbg_trap_address_watch_mode
 *     @mask    (IN)  - watch address mask
 *     @gpu_id  (IN)  - target gpu to set watch point
 *     @id      (OUT) - watch id allocated
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *		Allocated watch ID returned to @id.
 *	      - ENODEV if gpu_id not found.
 *	      - ENOMEM if watch IDs can be allocated
 */
struct kfd_ioctl_dbg_trap_set_node_address_watch_args {
	__u64 address;
	__u32 mode;
	__u32 mask;
	__u32 gpu_id;
	__u32 id;
};

/**
 * kfd_ioctl_dbg_trap_clear_node_address_watch_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH
 *     Clear address watch for device.
 *
 *     @gpu_id  (IN)  - target device to clear watch point
 *     @id      (IN) - allocated watch id to clear
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *	      - ENODEV if gpu_id not found.
 *	      - EINVAL if watch ID has not been allocated.
 */
struct kfd_ioctl_dbg_trap_clear_node_address_watch_args {
	__u32 gpu_id;
	__u32 id;
};

/**
 * kfd_ioctl_dbg_trap_set_flags_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_SET_FLAGS
 *     Sets flags for wave behaviour.
 *
 *     @flags (IN/OUT) - IN = flags to enable, OUT = flags previously enabled
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *	      - EACCESS if any debug device does not allow flag options.
 */
struct kfd_ioctl_dbg_trap_set_flags_args {
	__u32 flags;
	__u32 pad;
};

/**
 * kfd_ioctl_dbg_trap_query_debug_event_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT
 *
 *     Find one or more raised exceptions. This function can return multiple
 *     exceptions from a single queue or a single device with one call. To find
 *     all raised exceptions, this function must be called repeatedly until it
 *     returns -EAGAIN. Returned exceptions can optionally be cleared by
 *     setting the corresponding bit in the @exception_mask input parameter.
 *     However, clearing an exception prevents retrieving further information
 *     about it with KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO.
 *
 *     @exception_mask (IN/OUT) - exception to clear (IN) and raised (OUT)
 *     @gpu_id	       (OUT)    - gpu id of exceptions raised
 *     @queue_id       (OUT)    - queue id of exceptions raised
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on raised exception found
 *              Raised exceptions found are returned in @exception mask
 *              with reported source id returned in @gpu_id or @queue_id.
 *            - EAGAIN if no raised exception has been found
 */
struct kfd_ioctl_dbg_trap_query_debug_event_args {
	__u64 exception_mask;
	__u32 gpu_id;
	__u32 queue_id;
};

/**
 * kfd_ioctl_dbg_trap_query_exception_info_args
 *
 *     Arguments KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO
 *     Get additional info on raised exception.
 *
 *     @info_ptr	(IN)	 - pointer to exception info buffer to copy to
 *     @info_size	(IN/OUT) - exception info buffer size (bytes)
 *     @source_id	(IN)     - target gpu or queue id
 *     @exception_code	(IN)     - target exception
 *     @clear_exception	(IN)     - clear raised @exception_code exception
 *				   (0 = false, 1 = true)
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *              If @exception_code is EC_DEVICE_MEMORY_VIOLATION, copy @info_size(OUT)
 *		bytes of memory exception data to @info_ptr.
 *              If @exception_code is EC_PROCESS_RUNTIME, copy saved
 *              kfd_runtime_info to @info_ptr.
 *              Actual required @info_ptr size (bytes) is returned in @info_size.
 */
struct kfd_ioctl_dbg_trap_query_exception_info_args {
	__u64 info_ptr;
	__u32 info_size;
	__u32 source_id;
	__u32 exception_code;
	__u32 clear_exception;
};

/**
 * kfd_ioctl_dbg_trap_get_queue_snapshot_args
 *
 *     Arguments KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT
 *     Get queue information.
 *
 *     @exception_mask	 (IN)	  - exceptions raised to clear
 *     @snapshot_buf_ptr (IN)	  - queue snapshot entry buffer (see kfd_queue_snapshot_entry)
 *     @num_queues	 (IN/OUT) - number of queue snapshot entries
 *         The debugger specifies the size of the array allocated in @num_queues.
 *         KFD returns the number of queues that actually existed. If this is
 *         larger than the size specified by the debugger, KFD will not overflow
 *         the array allocated by the debugger.
 *
 *     @entry_size	 (IN/OUT) - size per entry in bytes
 *         The debugger specifies sizeof(struct kfd_queue_snapshot_entry) in
 *         @entry_size. KFD returns the number of bytes actually populated per
 *         entry. The debugger should use the KFD_IOCTL_MINOR_VERSION to determine,
 *         which fields in struct kfd_queue_snapshot_entry are valid. This allows
 *         growing the ABI in a backwards compatible manner.
 *         Note that entry_size(IN) should still be used to stride the snapshot buffer in the
 *         event that it's larger than actual kfd_queue_snapshot_entry.
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *              Copies @num_queues(IN) queue snapshot entries of size @entry_size(IN)
 *              into @snapshot_buf_ptr if @num_queues(IN) > 0.
 *              Otherwise return @num_queues(OUT) queue snapshot entries that exist.
 */
struct kfd_ioctl_dbg_trap_queue_snapshot_args {
	__u64 exception_mask;
	__u64 snapshot_buf_ptr;
	__u32 num_queues;
	__u32 entry_size;
};

/**
 * kfd_ioctl_dbg_trap_get_device_snapshot_args
 *
 *     Arguments for KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT
 *     Get device information.
 *
 *     @exception_mask	 (IN)	  - exceptions raised to clear
 *     @snapshot_buf_ptr (IN)	  - pointer to snapshot buffer (see kfd_dbg_device_info_entry)
 *     @num_devices	 (IN/OUT) - number of debug devices to snapshot
 *         The debugger specifies the size of the array allocated in @num_devices.
 *         KFD returns the number of devices that actually existed. If this is
 *         larger than the size specified by the debugger, KFD will not overflow
 *         the array allocated by the debugger.
 *
 *     @entry_size	 (IN/OUT) - size per entry in bytes
 *         The debugger specifies sizeof(struct kfd_dbg_device_info_entry) in
 *         @entry_size. KFD returns the number of bytes actually populated. The
 *         debugger should use KFD_IOCTL_MINOR_VERSION to determine, which fields
 *         in struct kfd_dbg_device_info_entry are valid. This allows growing the
 *         ABI in a backwards compatible manner.
 *         Note that entry_size(IN) should still be used to stride the snapshot buffer in the
 *         event that it's larger than actual kfd_dbg_device_info_entry.
 *
 *     Generic errors apply (see kfd_dbg_trap_operations).
 *     Return - 0 on SUCCESS.
 *              Copies @num_devices(IN) device snapshot entries of size @entry_size(IN)
 *              into @snapshot_buf_ptr if @num_devices(IN) > 0.
 *              Otherwise return @num_devices(OUT) queue snapshot entries that exist.
 */
struct kfd_ioctl_dbg_trap_device_snapshot_args {
	__u64 exception_mask;
	__u64 snapshot_buf_ptr;
	__u32 num_devices;
	__u32 entry_size;
};

/**
 * kfd_ioctl_dbg_trap_args
 *
 * Arguments to debug target process.
 *
 *     @pid - target process to debug
 *     @op  - debug operation (see kfd_dbg_trap_operations)
 *
 *     @op determines which union struct args to use.
 *     Refer to kern docs for each kfd_ioctl_dbg_trap_*_args struct.
 */
struct kfd_ioctl_dbg_trap_args {
	__u32 pid;
	__u32 op;

	union {
		struct kfd_ioctl_dbg_trap_enable_args enable;
		struct kfd_ioctl_dbg_trap_send_runtime_event_args send_runtime_event;
		struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args set_exceptions_enabled;
		struct kfd_ioctl_dbg_trap_set_wave_launch_override_args launch_override;
		struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args launch_mode;
		struct kfd_ioctl_dbg_trap_suspend_queues_args suspend_queues;
		struct kfd_ioctl_dbg_trap_resume_queues_args resume_queues;
		struct kfd_ioctl_dbg_trap_set_node_address_watch_args set_node_address_watch;
		struct kfd_ioctl_dbg_trap_clear_node_address_watch_args clear_node_address_watch;
		struct kfd_ioctl_dbg_trap_set_flags_args set_flags;
		struct kfd_ioctl_dbg_trap_query_debug_event_args query_debug_event;
		struct kfd_ioctl_dbg_trap_query_exception_info_args query_exception_info;
		struct kfd_ioctl_dbg_trap_queue_snapshot_args queue_snapshot;
		struct kfd_ioctl_dbg_trap_device_snapshot_args device_snapshot;
	};
};

/* Matching HSA_EVENTTYPE */
#define KFD_IOC_EVENT_SIGNAL			0
#define KFD_IOC_EVENT_NODECHANGE		1
#define KFD_IOC_EVENT_DEVICESTATECHANGE		2
#define KFD_IOC_EVENT_HW_EXCEPTION		3
#define KFD_IOC_EVENT_SYSTEM_EVENT		4
#define KFD_IOC_EVENT_DEBUG_EVENT		5
#define KFD_IOC_EVENT_PROFILE_EVENT		6
#define KFD_IOC_EVENT_QUEUE_EVENT		7
#define KFD_IOC_EVENT_MEMORY			8

#define KFD_IOC_WAIT_RESULT_COMPLETE		0
#define KFD_IOC_WAIT_RESULT_TIMEOUT		1
#define KFD_IOC_WAIT_RESULT_FAIL		2

#define KFD_SIGNAL_EVENT_LIMIT			4096

/* For kfd_event_data.hw_exception_data.reset_type. */
#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET	0
#define KFD_HW_EXCEPTION_PER_ENGINE_RESET	1

/* For kfd_event_data.hw_exception_data.reset_cause. */
#define KFD_HW_EXCEPTION_GPU_HANG	0
#define KFD_HW_EXCEPTION_ECC		1

/* For kfd_hsa_memory_exception_data.ErrorType */
#define KFD_MEM_ERR_NO_RAS		0
#define KFD_MEM_ERR_SRAM_ECC		1
#define KFD_MEM_ERR_POISON_CONSUMED	2
#define KFD_MEM_ERR_GPU_HANG		3

struct kfd_ioctl_create_event_args {
	__u64 event_page_offset;	/* from KFD */
	__u32 event_trigger_data;	/* from KFD - signal events only */
	__u32 event_type;		/* to KFD */
	__u32 auto_reset;		/* to KFD */
	__u32 node_id;		/* to KFD - only valid for certain
							event types */
	__u32 event_id;		/* from KFD */
	__u32 event_slot_index;	/* from KFD */
};

struct kfd_ioctl_destroy_event_args {
	__u32 event_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_set_event_args {
	__u32 event_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_reset_event_args {
	__u32 event_id;		/* to KFD */
	__u32 pad;
};

struct kfd_memory_exception_failure {
	__u32 NotPresent;	/* Page not present or supervisor privilege */
	__u32 ReadOnly;	/* Write access to a read-only page */
	__u32 NoExecute;	/* Execute access to a page marked NX */
	__u32 imprecise;	/* Can't determine the	exact fault address */
};

/* memory exception data */
struct kfd_hsa_memory_exception_data {
	struct kfd_memory_exception_failure failure;
	__u64 va;
	__u32 gpu_id;
	__u32 ErrorType; /* 0 = no RAS error,
			  * 1 = ECC_SRAM,
			  * 2 = Link_SYNFLOOD (poison),
			  * 3 = GPU hang (not attributable to a specific cause),
			  * other values reserved
			  */
};

/* hw exception data */
struct kfd_hsa_hw_exception_data {
	__u32 reset_type;
	__u32 reset_cause;
	__u32 memory_lost;
	__u32 gpu_id;
};

/* hsa signal event data */
struct kfd_hsa_signal_event_data {
	__u64 last_event_age;	/* to and from KFD */
};

/* Event data */
struct kfd_event_data {
	union {
		/* From KFD */
		struct kfd_hsa_memory_exception_data memory_exception_data;
		struct kfd_hsa_hw_exception_data hw_exception_data;
		/* To and From KFD */
		struct kfd_hsa_signal_event_data signal_event_data;
	};
	__u64 kfd_event_data_ext;	/* pointer to an extension structure
					   for future exception types */
	__u32 event_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_wait_events_args {
	__u64 events_ptr;		/* pointed to struct
					   kfd_event_data array, to KFD */
	__u32 num_events;		/* to KFD */
	__u32 wait_for_all;		/* to KFD */
	__u32 timeout;		/* to KFD */
	__u32 wait_result;		/* from KFD */
};

struct kfd_ioctl_set_scratch_backing_va_args {
	__u64 va_addr;	/* to KFD */
	__u32 gpu_id;	/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_get_tile_config_args {
	/* to KFD: pointer to tile array */
	__u64 tile_config_ptr;
	/* to KFD: pointer to macro tile array */
	__u64 macro_tile_config_ptr;
	/* to KFD: array size allocated by user mode
	 * from KFD: array size filled by kernel
	 */
	__u32 num_tile_configs;
	/* to KFD: array size allocated by user mode
	 * from KFD: array size filled by kernel
	 */
	__u32 num_macro_tile_configs;

	__u32 gpu_id;		/* to KFD */
	__u32 gb_addr_config;	/* from KFD */
	__u32 num_banks;		/* from KFD */
	__u32 num_ranks;		/* from KFD */
	/* struct size can be extended later if needed
	 * without breaking ABI compatibility
	 */
};

struct kfd_ioctl_set_trap_handler_args {
	__u64 tba_addr;		/* to KFD */
	__u64 tma_addr;		/* to KFD */
	__u32 gpu_id;		/* to KFD */
	__u32 pad;
};

struct kfd_ioctl_acquire_vm_args {
	__u32 drm_fd;	/* to KFD */
	__u32 gpu_id;	/* to KFD */
};

/* Allocation flags: memory types */
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM		(1 << 0)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT		(1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP	(1 << 4)
/* Allocation flags: attributes/access options */
#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC		(1 << 29)
#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE	(1 << 28)
#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM	(1 << 27)
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT	(1 << 26)
#define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED	(1 << 25)
#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT	(1 << 24)
#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT	(1 << 23)

/* Allocate memory for later SVM (shared virtual memory) mapping.
 *
 * @va_addr:     virtual address of the memory to be allocated
 *               all later mappings on all GPUs will use this address
 * @size:        size in bytes
 * @handle:      buffer handle returned to user mode, used to refer to
 *               this allocation for mapping, unmapping and freeing
 * @mmap_offset: for CPU-mapping the allocation by mmapping a render node
 *               for userptrs this is overloaded to specify the CPU address
 * @gpu_id:      device identifier
 * @flags:       memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
 */
struct kfd_ioctl_alloc_memory_of_gpu_args {
	__u64 va_addr;		/* to KFD */
	__u64 size;		/* to KFD */
	__u64 handle;		/* from KFD */
	__u64 mmap_offset;	/* to KFD (userptr), from KFD (mmap offset) */
	__u32 gpu_id;		/* to KFD */
	__u32 flags;
};

/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
 *
 * @handle: memory handle returned by alloc
 */
struct kfd_ioctl_free_memory_of_gpu_args {
	__u64 handle;		/* to KFD */
};

/* Inquire available memory with kfd_ioctl_get_available_memory
 *
 * @available: memory available for alloc
 */
struct  kfd_ioctl_get_available_memory_args {
	__u64 available;	/* from KFD */
	__u32 gpu_id;		/* to KFD */
	__u32 pad;
};

/* Map memory to one or more GPUs
 *
 * @handle:                memory handle returned by alloc
 * @device_ids_array_ptr:  array of gpu_ids (__u32 per device)
 * @n_devices:             number of devices in the array
 * @n_success:             number of devices mapped successfully
 *
 * @n_success returns information to the caller how many devices from
 * the start of the array have mapped the buffer successfully. It can
 * be passed into a subsequent retry call to skip those devices. For
 * the first call the caller should initialize it to 0.
 *
 * If the ioctl completes with return code 0 (success), n_success ==
 * n_devices.
 */
struct kfd_ioctl_map_memory_to_gpu_args {
	__u64 handle;			/* to KFD */
	__u64 device_ids_array_ptr;	/* to KFD */
	__u32 n_devices;		/* to KFD */
	__u32 n_success;		/* to/from KFD */
};

/* Unmap memory from one or more GPUs
 *
 * same arguments as for mapping
 */
struct kfd_ioctl_unmap_memory_from_gpu_args {
	__u64 handle;			/* to KFD */
	__u64 device_ids_array_ptr;	/* to KFD */
	__u32 n_devices;		/* to KFD */
	__u32 n_success;		/* to/from KFD */
};

/* Allocate GWS for specific queue
 *
 * @queue_id:    queue's id that GWS is allocated for
 * @num_gws:     how many GWS to allocate
 * @first_gws:   index of the first GWS allocated.
 *               only support contiguous GWS allocation
 */
struct kfd_ioctl_alloc_queue_gws_args {
	__u32 queue_id;		/* to KFD */
	__u32 num_gws;		/* to KFD */
	__u32 first_gws;	/* from KFD */
	__u32 pad;
};

struct kfd_ioctl_get_dmabuf_info_args {
	__u64 size;		/* from KFD */
	__u64 metadata_ptr;	/* to KFD */
	__u32 metadata_size;	/* to KFD (space allocated by user)
				 * from KFD (actual metadata size)
				 */
	__u32 gpu_id;	/* from KFD */
	__u32 flags;		/* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
	__u32 dmabuf_fd;	/* to KFD */
};

struct kfd_ioctl_import_dmabuf_args {
	__u64 va_addr;	/* to KFD */
	__u64 handle;	/* from KFD */
	__u32 gpu_id;	/* to KFD */
	__u32 dmabuf_fd;	/* to KFD */
};

struct kfd_ioctl_export_dmabuf_args {
	__u64 handle;		/* to KFD */
	__u32 flags;		/* to KFD */
	__u32 dmabuf_fd;	/* from KFD */
};

/*
 * KFD SMI(System Management Interface) events
 */
enum kfd_smi_event {
	KFD_SMI_EVENT_NONE = 0, /* not used */
	KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
	KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
	KFD_SMI_EVENT_GPU_PRE_RESET = 3,
	KFD_SMI_EVENT_GPU_POST_RESET = 4,
};

#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
#define KFD_SMI_EVENT_MSG_SIZE	96

struct kfd_ioctl_smi_events_args {
	__u32 gpuid;	/* to KFD */
	__u32 anon_fd;	/* from KFD */
};

/**
 * kfd_ioctl_spm_op - SPM ioctl operations
 *
 * @KFD_IOCTL_SPM_OP_ACQUIRE: acquire exclusive access to SPM
 * @KFD_IOCTL_SPM_OP_RELEASE: release exclusive access to SPM
 * @KFD_IOCTL_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM streaming
 */
enum kfd_ioctl_spm_op {
	KFD_IOCTL_SPM_OP_ACQUIRE,
	KFD_IOCTL_SPM_OP_RELEASE,
	KFD_IOCTL_SPM_OP_SET_DEST_BUF
};

/**
 * kfd_ioctl_spm_args - Arguments for SPM ioctl
 *
 * @op[in]:            specifies the operation to perform
 * @gpu_id[in]:        GPU ID of the GPU to profile
 * @dst_buf[in]:       used for the address of the destination buffer
 *                      in @KFD_IOCTL_SPM_SET_DEST_BUFFER
 * @buf_size[in]:      size of the destination buffer
 * @timeout[in/out]:   [in]: timeout in milliseconds, [out]: amount of time left
 *                      `in the timeout window
 * @bytes_copied[out]: total amount of data that was copied to the previous dest_buf
 * @has_data_loss:     total count for sub-block which has data loss
 *
 * This ioctl performs different functions depending on the @op parameter.
 *
 * KFD_IOCTL_SPM_OP_ACQUIRE
 * ------------------------
 *
 * Acquires exclusive access of SPM on the specified @gpu_id for the calling process.
 * This must be called before using KFD_IOCTL_SPM_OP_SET_DEST_BUF.
 *
 * KFD_IOCTL_SPM_OP_RELEASE
 * ------------------------
 *
 * Releases exclusive access of SPM on the specified @gpu_id for the calling process,
 * which allows another process to acquire it in the future.
 *
 * KFD_IOCTL_SPM_OP_SET_DEST_BUF
 * -----------------------------
 *
 * If @dst_buf is NULL, the destination buffer address is unset and copying of counters
 * is stopped.
 *
 * If @dst_buf is not NULL, it specifies the pointer to a new destination buffer.
 * @buf_size specifies the size of the buffer.
 *
 * If @timeout is non-0, the call will wait for up to @timeout ms for the previous
 * buffer to be filled. If previous buffer to be filled before timeout, the @timeout
 * will be updated value with the time remaining. If the timeout is exceeded, the function
 * copies any partial data available into the previous user buffer and returns success.
 * The amount of valid data in the previous user buffer is indicated by @bytes_copied.
 *
 * If @timeout is 0, the function immediately replaces the previous destination buffer
 * without waiting for the previous buffer to be filled. That means the previous buffer
 * may only be partially filled, and @bytes_copied will indicate how much data has been
 * copied to it.
 *
 * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will be non-0.
 *
 * Returns negative error code on failure, 0 on success.
 */
struct kfd_ioctl_spm_args {
	__u64 dest_buf;
	__u32 buf_size;
	__u32 op;
	__u32 timeout;
	__u32 gpu_id;
	__u32 bytes_copied;
	__u32 has_data_loss;
};

/**
 * kfd_ioctl_spm_buffer_header - SPM Buffer header for kfd_ioctl_spm_args->dest_buf
 *
 * @version        [out]: spm versiom
 * @bytes_copied   [out]: amount of data for each sub-block
 * @has_data_loss: [out]: boolean indicating whether data was lost for each sub-block
 *                        (e.g. due to a ring-buffer overflow)
 */
struct kfd_ioctl_spm_buffer_header {
	__u32 version; /* 0-23: minor 24-31: major */
	__u32 bytes_copied;
	__u32 has_data_loss;
	__u32 reserved[5];
};

/**************************************************************************************************
 * CRIU IOCTLs (Checkpoint Restore In Userspace)
 *
 * When checkpointing a process, the userspace application will perform:
 * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts
 *    all the queues.
 * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges)
 * 3. UNPAUSE op to un-evict all the queues
 *
 * When restoring a process, the CRIU userspace application will perform:
 *
 * 1. RESTORE op to restore process contents
 * 2. RESUME op to start the process
 *
 * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User
 * application needs to perform an UNPAUSE operation after calling PROCESS_INFO.
 */

enum kfd_criu_op {
	KFD_CRIU_OP_PROCESS_INFO,
	KFD_CRIU_OP_CHECKPOINT,
	KFD_CRIU_OP_UNPAUSE,
	KFD_CRIU_OP_RESTORE,
	KFD_CRIU_OP_RESUME,
};

/**
 * kfd_ioctl_criu_args - Arguments perform CRIU operation
 * @devices:		[in/out] User pointer to memory location for devices information.
 * 			This is an array of type kfd_criu_device_bucket.
 * @bos:		[in/out] User pointer to memory location for BOs information
 * 			This is an array of type kfd_criu_bo_bucket.
 * @priv_data:		[in/out] User pointer to memory location for private data
 * @priv_data_size:	[in/out] Size of priv_data in bytes
 * @num_devices:	[in/out] Number of GPUs used by process. Size of @devices array.
 * @num_bos		[in/out] Number of BOs used by process. Size of @bos array.
 * @num_objects:	[in/out] Number of objects used by process. Objects are opaque to
 *				 user application.
 * @pid:		[in/out] PID of the process being checkpointed
 * @op			[in] Type of operation (kfd_criu_op)
 *
 * Return: 0 on success, -errno on failure
 */
struct kfd_ioctl_criu_args {
	__u64 devices;		/* Used during ops: CHECKPOINT, RESTORE */
	__u64 bos;		/* Used during ops: CHECKPOINT, RESTORE */
	__u64 priv_data;	/* Used during ops: CHECKPOINT, RESTORE */
	__u64 priv_data_size;	/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 num_devices;	/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 num_bos;		/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 num_objects;	/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 pid;		/* Used during ops: PROCESS_INFO, RESUME */
	__u32 op;
};

struct kfd_criu_device_bucket {
	__u32 user_gpu_id;
	__u32 actual_gpu_id;
	__u32 drm_fd;
	__u32 pad;
};

struct kfd_criu_bo_bucket {
	__u64 addr;
	__u64 size;
	__u64 offset;
	__u64 restored_offset;    /* During restore, updated offset for BO */
	__u32 gpu_id;             /* This is the user_gpu_id */
	__u32 alloc_flags;
	__u32 dmabuf_fd;
	__u32 pad;
};

/* CRIU IOCTLs - END */
/**************************************************************************************************/
/* Register offset inside the remapped mmio page
 */
enum kfd_mmio_remap {
	KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0,
	KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
};

struct kfd_ioctl_ipc_export_handle_args {
	__u64 handle;		/* to KFD */
	__u32 share_handle[4];	/* from KFD */
	__u32 gpu_id;		/* to KFD */
	__u32 flags;		/* to KFD */
};

struct kfd_ioctl_ipc_import_handle_args {
	__u64 handle;		/* from KFD */
	__u64 va_addr;		/* to KFD */
	__u64 mmap_offset;	/* from KFD */
	__u32 share_handle[4];	/* to KFD */
	__u32 gpu_id;		/* to KFD */
	__u32 flags;		/* from KFD */
};

struct kfd_memory_range {
	__u64 va_addr;
	__u64 size;
};

/* flags definitions
 * BIT0: 0: read operation, 1: write operation.
 * This also identifies if the src or dst array belongs to remote process
 */
#define KFD_CROSS_MEMORY_RW_BIT (1 << 0)
#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT)
#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT)
#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT)

struct kfd_ioctl_cross_memory_copy_args {
	/* to KFD: Process ID of the remote process */
	__u32 pid;
	/* to KFD: See above definition */
	__u32 flags;
	/* to KFD: Source GPU VM range */
	__u64 src_mem_range_array;
	/* to KFD: Size of above array */
	__u64 src_mem_array_size;
	/* to KFD: Destination GPU VM range */
	__u64 dst_mem_range_array;
	/* to KFD: Size of above array */
	__u64 dst_mem_array_size;
	/* from KFD: Total amount of bytes copied */
	__u64 bytes_copied;
};

/* Guarantee host access to memory */
#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001
/* Fine grained coherency between all devices with access */
#define KFD_IOCTL_SVM_FLAG_COHERENT    0x00000002
/* Use any GPU in same hive as preferred device */
#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL  0x00000004
/* GPUs only read, allows replication */
#define KFD_IOCTL_SVM_FLAG_GPU_RO      0x00000008
/* Allow execution on GPU */
#define KFD_IOCTL_SVM_FLAG_GPU_EXEC    0x00000010
/* GPUs mostly read, may allow similar optimizations as RO, but writes fault */
#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
/* Keep GPU memory mapping always valid as if XNACK is disable */
#define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x00000040
/* Fine grained coherency between all devices using device-scope atomics */
#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT        0x00000080

/**
 * kfd_ioctl_svm_op - SVM ioctl operations
 *
 * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes
 * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes
 */
enum kfd_ioctl_svm_op {
	KFD_IOCTL_SVM_OP_SET_ATTR,
	KFD_IOCTL_SVM_OP_GET_ATTR
};

/** kfd_ioctl_svm_location - Enum for preferred and prefetch locations
 *
 * GPU IDs are used to specify GPUs as preferred and prefetch locations.
 * Below definitions are used for system memory or for leaving the preferred
 * location unspecified.
 */
enum kfd_ioctl_svm_location {
	KFD_IOCTL_SVM_LOCATION_SYSMEM = 0,
	KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff
};

/**
 * kfd_ioctl_svm_attr_type - SVM attribute types
 *
 * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: gpuid of the preferred location, 0 for
 *                                    system memory
 * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: gpuid of the prefetch location, 0 for
 *                                   system memory. Setting this triggers an
 *                                   immediate prefetch (migration).
 * @KFD_IOCTL_SVM_ATTR_ACCESS:
 * @KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
 * @KFD_IOCTL_SVM_ATTR_NO_ACCESS: specify memory access for the gpuid given
 *                                by the attribute value
 * @KFD_IOCTL_SVM_ATTR_SET_FLAGS: bitmask of flags to set (see
 *                                KFD_IOCTL_SVM_FLAG_...)
 * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS: bitmask of flags to clear
 * @KFD_IOCTL_SVM_ATTR_GRANULARITY: migration granularity
 *                                  (log2 num pages)
 */
enum kfd_ioctl_svm_attr_type {
	KFD_IOCTL_SVM_ATTR_PREFERRED_LOC,
	KFD_IOCTL_SVM_ATTR_PREFETCH_LOC,
	KFD_IOCTL_SVM_ATTR_ACCESS,
	KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE,
	KFD_IOCTL_SVM_ATTR_NO_ACCESS,
	KFD_IOCTL_SVM_ATTR_SET_FLAGS,
	KFD_IOCTL_SVM_ATTR_CLR_FLAGS,
	KFD_IOCTL_SVM_ATTR_GRANULARITY
};

/**
 * kfd_ioctl_svm_attribute - Attributes as pairs of type and value
 *
 * The meaning of the @value depends on the attribute type.
 *
 * @type: attribute type (see enum @kfd_ioctl_svm_attr_type)
 * @value: attribute value
 */
struct kfd_ioctl_svm_attribute {
	__u32 type;
	__u32 value;
};

/**
 * kfd_ioctl_svm_args - Arguments for SVM ioctl
 *
 * @op specifies the operation to perform (see enum
 * @kfd_ioctl_svm_op).  @start_addr and @size are common for all
 * operations.
 *
 * A variable number of attributes can be given in @attrs.
 * @nattr specifies the number of attributes. New attributes can be
 * added in the future without breaking the ABI. If unknown attributes
 * are given, the function returns -EINVAL.
 *
 * @KFD_IOCTL_SVM_OP_SET_ATTR sets attributes for a virtual address
 * range. It may overlap existing virtual address ranges. If it does,
 * the existing ranges will be split such that the attribute changes
 * only apply to the specified address range.
 *
 * @KFD_IOCTL_SVM_OP_GET_ATTR returns the intersection of attributes
 * over all memory in the given range and returns the result as the
 * attribute value. If different pages have different preferred or
 * prefetch locations, 0xffffffff will be returned for
 * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or
 * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For
 * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be
 * aggregated by bitwise AND. That means, a flag will be set in the
 * output, if that flag is set for all pages in the range. For
 * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS, flags of all pages will be
 * aggregated by bitwise NOR. That means, a flag will be set in the
 * output, if that flag is clear for all pages in the range.
 * The minimum migration granularity throughout the range will be
 * returned for @KFD_IOCTL_SVM_ATTR_GRANULARITY.
 *
 * Querying of accessibility attributes works by initializing the
 * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the
 * GPUID being queried. Multiple attributes can be given to allow
 * querying multiple GPUIDs. The ioctl function overwrites the
 * attribute type to indicate the access for the specified GPU.
 */
struct kfd_ioctl_svm_args {
	__u64 start_addr;
	__u64 size;
	__u32 op;
	__u32 nattr;
	/* Variable length array of attributes */
	struct kfd_ioctl_svm_attribute attrs[];
};

/**
 * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode
 *
 * @xnack_enabled:       [in/out] Whether to enable XNACK mode for this process
 *
 * @xnack_enabled indicates whether recoverable page faults should be
 * enabled for the current process. 0 means disabled, positive means
 * enabled, negative means leave unchanged. If enabled, virtual address
 * translations on GFXv9 and later AMD GPUs can return XNACK and retry
 * the access until a valid PTE is available. This is used to implement
 * device page faults.
 *
 * On output, @xnack_enabled returns the (new) current mode (0 or
 * positive). Therefore, a negative input value can be used to query
 * the current mode without changing it.
 *
 * The XNACK mode fundamentally changes the way SVM managed memory works
 * in the driver, with subtle effects on application performance and
 * functionality.
 *
 * Enabling XNACK mode requires shader programs to be compiled
 * differently. Furthermore, not all GPUs support changing the mode
 * per-process. Therefore changing the mode is only allowed while no
 * user mode queues exist in the process. This ensure that no shader
 * code is running that may be compiled for the wrong mode. And GPUs
 * that cannot change to the requested mode will prevent the XNACK
 * mode from occurring. All GPUs used by the process must be in the
 * same XNACK mode.
 *
 * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM.
 * Therefore those GPUs are not considered for the XNACK mode switch.
 *
 * Return: 0 on success, -errno on failure
 */
struct kfd_ioctl_set_xnack_mode_args {
	__s32 xnack_enabled;
};

/**
 * kfd_ioctl_pc_sample_op - PC Sampling ioctl operations
 *
 * @KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES: Query device PC Sampling capabilities
 * @KFD_IOCTL_PCS_OP_CREATE:             Register this process with a per-device PC sampler instance
 * @KFD_IOCTL_PCS_OP_DESTROY:            Unregister from a previously registered PC sampler instance
 * @KFD_IOCTL_PCS_OP_START:              Process begins taking samples from a previously registered PC sampler instance
 * @KFD_IOCTL_PCS_OP_STOP:               Process stops taking samples from a previously registered PC sampler instance
 */
enum kfd_ioctl_pc_sample_op {
	KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES,
	KFD_IOCTL_PCS_OP_CREATE,
	KFD_IOCTL_PCS_OP_DESTROY,
	KFD_IOCTL_PCS_OP_START,
	KFD_IOCTL_PCS_OP_STOP,
};

/* Values have to be a power of 2*/
#define KFD_IOCTL_PCS_FLAG_POWER_OF_2 0x00000001

enum kfd_ioctl_pc_sample_method {
	KFD_IOCTL_PCS_METHOD_HOSTTRAP = 1,
	KFD_IOCTL_PCS_METHOD_STOCHASTIC,
};

enum kfd_ioctl_pc_sample_type {
	KFD_IOCTL_PCS_TYPE_TIME_US,
	KFD_IOCTL_PCS_TYPE_CLOCK_CYCLES,
	KFD_IOCTL_PCS_TYPE_INSTRUCTIONS
};

struct kfd_pc_sample_info {
	__u64 interval;      /* [IN] if PCS_TYPE_INTERVAL_US: sample interval in us
	                      * if PCS_TYPE_CLOCK_CYCLES: sample interval in graphics core clk cycles
	                      * if PCS_TYPE_INSTRUCTIONS: sample interval in instructions issued by
	                      * graphics compute units
	                      */
	__u64 interval_min;  /* [OUT] */
	__u64 interval_max;  /* [OUT] */
	__u64 flags;         /* [OUT] indicate potential restrictions e.g FLAG_POWER_OF_2 */
	__u32 method;        /* [IN/OUT] kfd_ioctl_pc_sample_method */
	__u32 type;          /* [IN/OUT] kfd_ioctl_pc_sample_type */
};

#define KFD_IOCTL_PCS_QUERY_TYPE_FULL (1 << 0) /* If not set, return current */

struct kfd_ioctl_pc_sample_args {
	__u64 sample_info_ptr;   /* array of kfd_pc_sample_info */
	__u32 num_sample_info;
	__u32 op;                /* kfd_ioctl_pc_sample_op */
	__u32 gpu_id;
	__u32 trace_id;
	__u32 flags;             /* kfd_ioctl_pcs_query flags */
	__u32 reserved;
};

#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IOW(nr, type)		_IOW(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IOWR(nr, type)		_IOWR(AMDKFD_IOCTL_BASE, nr, type)

#define AMDKFD_IOC_GET_VERSION			\
		AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args)

#define AMDKFD_IOC_CREATE_QUEUE			\
		AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args)

#define AMDKFD_IOC_DESTROY_QUEUE		\
		AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args)

#define AMDKFD_IOC_SET_MEMORY_POLICY		\
		AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args)

#define AMDKFD_IOC_GET_CLOCK_COUNTERS		\
		AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args)

#define AMDKFD_IOC_GET_PROCESS_APERTURES	\
		AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args)

#define AMDKFD_IOC_UPDATE_QUEUE			\
		AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)

#define AMDKFD_IOC_CREATE_EVENT			\
		AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)

#define AMDKFD_IOC_DESTROY_EVENT		\
		AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)

#define AMDKFD_IOC_SET_EVENT			\
		AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)

#define AMDKFD_IOC_RESET_EVENT			\
		AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)

#define AMDKFD_IOC_WAIT_EVENTS			\
		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)

#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED	\
		AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)

#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED	\
		AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)

#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED	\
		AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)

#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED	\
		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)

#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
		AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)

#define AMDKFD_IOC_GET_TILE_CONFIG                                      \
		AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)

#define AMDKFD_IOC_SET_TRAP_HANDLER		\
		AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)

#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW	\
		AMDKFD_IOWR(0x14,		\
			struct kfd_ioctl_get_process_apertures_new_args)

#define AMDKFD_IOC_ACQUIRE_VM			\
		AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)

#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU		\
		AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)

#define AMDKFD_IOC_FREE_MEMORY_OF_GPU		\
		AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)

#define AMDKFD_IOC_MAP_MEMORY_TO_GPU		\
		AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)

#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU	\
		AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)

#define AMDKFD_IOC_SET_CU_MASK		\
		AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)

#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE		\
		AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)

#define AMDKFD_IOC_GET_DMABUF_INFO		\
		AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)

#define AMDKFD_IOC_IMPORT_DMABUF		\
		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)

#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)

#define AMDKFD_IOC_SMI_EVENTS			\
		AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)

#define AMDKFD_IOC_SVM	AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args)

#define AMDKFD_IOC_SET_XNACK_MODE		\
		AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)

#define AMDKFD_IOC_CRIU_OP			\
		AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)

#define AMDKFD_IOC_AVAILABLE_MEMORY		\
		AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)

#define AMDKFD_IOC_EXPORT_DMABUF		\
		AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args)

#define AMDKFD_IOC_RUNTIME_ENABLE		\
		AMDKFD_IOWR(0x25, struct kfd_ioctl_runtime_enable_args)

#define AMDKFD_IOC_DBG_TRAP			\
		AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args)

#define AMDKFD_COMMAND_START		0x01
#define AMDKFD_COMMAND_END		0x27

/* non-upstream ioctls */
#define AMDKFD_IOC_IPC_IMPORT_HANDLE                                    \
		AMDKFD_IOWR(0x80, struct kfd_ioctl_ipc_import_handle_args)

#define AMDKFD_IOC_IPC_EXPORT_HANDLE		\
		AMDKFD_IOWR(0x81, struct kfd_ioctl_ipc_export_handle_args)

#define AMDKFD_IOC_CROSS_MEMORY_COPY		\
		AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_args)

#define AMDKFD_IOC_RLC_SPM		\
		AMDKFD_IOWR(0x84, struct kfd_ioctl_spm_args)

#define AMDKFD_IOC_PC_SAMPLE		\
		AMDKFD_IOWR(0x85, struct kfd_ioctl_pc_sample_args)

#define AMDKFD_COMMAND_START_2		0x80
#define AMDKFD_COMMAND_END_2		0x86

#endif


================================================
FILE: libhsakmt/include/hsakmt/linux/udmabuf.h
================================================
/* GPL-2.0 WITH Linux-syscall-note */
/*
 * This file was copied from inux-libc-dev package
 * This header provides interface to linux kernel udmabuf drver
 * Modifications may have been made.
 */
#ifndef _THUNK_UDMABUF_H
#define _THUNK_UDMABUF_H

#include <linux/types.h>
#include <linux/ioctl.h>

#define UDMABUF_FLAGS_CLOEXEC   0x01

struct udmabuf_create {
        __u32 memfd;
        __u32 flags;
        __u64 offset;
        __u64 size;
};

struct udmabuf_create_item {
        __u32 memfd;
        __u32 __pad;
        __u64 offset;
        __u64 size;
};

struct udmabuf_create_list {
        __u32 flags;
        __u32 count;
        struct udmabuf_create_item list[];
};

#define UDMABUF_CREATE       _IOW('u', 0x42, struct udmabuf_create)
#define UDMABUF_CREATE_LIST  _IOW('u', 0x43, struct udmabuf_create_list)

#endif /* _THUNK_UDMABUF_H */


================================================
FILE: libhsakmt/libhsakmt.pc.in
================================================
prefix=${pcfiledir}/../..
exec_prefix=${prefix}
libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@

Name: libhsakmt
Description: HSA Kernel Mode Thunk library for AMD KFD support
Version: @LIB_VERSION_STRING@

Libs: -L${libdir} -lhsakmt
Cflags: -I${includedir}


================================================
FILE: libhsakmt/src/debug.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

static bool *is_device_debugged;
static uint32_t runtime_capabilities_mask = 0;

HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes)
{
	unsigned int i;

	is_device_debugged = malloc(NumNodes * sizeof(bool));
	if (!is_device_debugged)
		return HSAKMT_STATUS_NO_MEMORY;

	for (i = 0; i < NumNodes; i++)
		is_device_debugged[i] = false;

	return HSAKMT_STATUS_SUCCESS;
}

void hsakmt_destroy_device_debugging_memory(void)
{
	if (is_device_debugged) {
		free(is_device_debugged);
		is_device_debugged = NULL;
	}
}

bool hsakmt_debug_get_reg_status(uint32_t node_id)
{
	return is_device_debugged[node_id];
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
{
	HSAKMT_STATUS result;
	uint32_t gpu_id;

	CHECK_KFD_OPEN();

	if (!is_device_debugged)
		return HSAKMT_STATUS_NO_MEMORY;

	result = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	struct kfd_ioctl_dbg_register_args args = {0};

	args.gpu_id = gpu_id;

	long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_REGISTER_DEPRECATED, &args);

	if (err == 0)
		result = HSAKMT_STATUS_SUCCESS;
	else
		result = HSAKMT_STATUS_ERROR;

	return result;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDbgUnregister(HSAuint32 NodeId)
{
	uint32_t gpu_id;
	HSAKMT_STATUS result;

	CHECK_KFD_OPEN();

	if (!is_device_debugged)
		return HSAKMT_STATUS_NO_MEMORY;

	result = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	struct kfd_ioctl_dbg_unregister_args args = {0};

	args.gpu_id = gpu_id;
	long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, &args);

	if (err)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDbgWavefrontControl(HSAuint32 NodeId,
						  HSA_DBG_WAVEOP Operand,
						  HSA_DBG_WAVEMODE Mode,
						  HSAuint32 TrapId,
						  HsaDbgWaveMessage *DbgWaveMsgRing)
{
	HSAKMT_STATUS result;
	uint32_t gpu_id;

	struct kfd_ioctl_dbg_wave_control_args *args;

	CHECK_KFD_OPEN();

	result = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;


/* Determine Size of the ioctl buffer */
	uint32_t buff_size = sizeof(Operand) + sizeof(Mode) + sizeof(TrapId) +
			     sizeof(DbgWaveMsgRing->DbgWaveMsg) +
			     sizeof(DbgWaveMsgRing->MemoryVA) + sizeof(*args);

	args = (struct kfd_ioctl_dbg_wave_control_args *)malloc(buff_size);
	if (!args)
		return HSAKMT_STATUS_ERROR;

	memset(args, 0, buff_size);

	args->gpu_id = gpu_id;
	args->buf_size_in_bytes = buff_size;

	/* increment pointer to the start of the non fixed part */
	unsigned char *run_ptr = (unsigned char *)args + sizeof(*args);

	/* save variable content pointer for kfd */
	args->content_ptr = (uint64_t)run_ptr;

	/* insert items, and increment pointer accordingly */
	*((HSA_DBG_WAVEOP *)run_ptr) = Operand;
	run_ptr += sizeof(Operand);

	*((HSA_DBG_WAVEMODE *)run_ptr) = Mode;
	run_ptr += sizeof(Mode);

	*((HSAuint32 *)run_ptr) = TrapId;
	run_ptr += sizeof(TrapId);

	*((HsaDbgWaveMessageAMD *)run_ptr) = DbgWaveMsgRing->DbgWaveMsg;
	run_ptr += sizeof(DbgWaveMsgRing->DbgWaveMsg);

	*((void **)run_ptr) = DbgWaveMsgRing->MemoryVA;
	run_ptr += sizeof(DbgWaveMsgRing->MemoryVA);

	/* send to kernel */
	long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, args);

	free(args);

	if (err)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
					      HSAuint32 NumWatchPoints,
					      HSA_DBG_WATCH_MODE WatchMode[],
					      void *WatchAddress[],
					      HSAuint64 WatchMask[],
					      HsaEvent *WatchEvent[])
{
	HSAKMT_STATUS result;
	uint32_t gpu_id;

	/* determine the size of the watch mask and event buffers
	 * the value is NULL if and only if no vector data should be attached
	 */
	uint32_t watch_mask_items = WatchMask[0] > 0 ? NumWatchPoints:1;
	uint32_t watch_event_items = WatchEvent != NULL ? NumWatchPoints:0;

	struct kfd_ioctl_dbg_address_watch_args *args;
	HSAuint32		 i = 0;

	CHECK_KFD_OPEN();

	result = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	if (NumWatchPoints > MAX_ALLOWED_NUM_POINTS)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	/* Size and structure of the ioctl buffer is dynamic in this case
	 * Here we calculate the buff size.
	 */
	uint32_t buff_size = sizeof(NumWatchPoints) +
		(sizeof(WatchMode[0]) + sizeof(WatchAddress[0])) *
			NumWatchPoints +
		watch_mask_items * sizeof(HSAuint64) +
		watch_event_items * sizeof(HsaEvent *) + sizeof(*args);

	args = (struct kfd_ioctl_dbg_address_watch_args *) malloc(buff_size);
	if (!args)
		return HSAKMT_STATUS_ERROR;

	memset(args, 0, buff_size);

	args->gpu_id = gpu_id;
	args->buf_size_in_bytes = buff_size;


	/* increment pointer to the start of the non fixed part */
	unsigned char *run_ptr = (unsigned char *)args + sizeof(*args);

	/* save variable content pointer for kfd */
	args->content_ptr = (uint64_t)run_ptr;
	/* insert items, and increment pointer accordingly */

	*((HSAuint32 *)run_ptr) = NumWatchPoints;
	run_ptr += sizeof(NumWatchPoints);

	for (i = 0; i < NumWatchPoints; i++) {
		*((HSA_DBG_WATCH_MODE *)run_ptr) = WatchMode[i];
		run_ptr += sizeof(WatchMode[i]);
	}

	for (i = 0; i < NumWatchPoints; i++) {
		*((void **)run_ptr) = WatchAddress[i];
		run_ptr += sizeof(WatchAddress[i]);
	}

	for (i = 0; i < watch_mask_items; i++) {
		*((HSAuint64 *)run_ptr) = WatchMask[i];
		run_ptr += sizeof(WatchMask[i]);
	}

	for (i = 0; i < watch_event_items; i++)	{
		*((HsaEvent **)run_ptr) = WatchEvent[i];
		run_ptr += sizeof(WatchEvent[i]);
	}

	/* send to kernel */
	long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, args);

	free(args);

	if (err)
		return HSAKMT_STATUS_ERROR;
	return HSAKMT_STATUS_SUCCESS;
}

#define HSA_RUNTIME_ENABLE_MAX_MAJOR   1
#define HSA_RUNTIME_ENABLE_MIN_MINOR   13

HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupport(void) {
	HsaNodeProperties node = {0};
	HsaSystemProperties props = {0};
	HsaVersionInfo versionInfo = {0};

	memset(&node, 0x00, sizeof(node));
	memset(&props, 0x00, sizeof(props));
	if (hsaKmtAcquireSystemProperties(&props))
		return HSAKMT_STATUS_ERROR;

	//the firmware of gpu node doesn't support the debugger, disable it.
	for (uint32_t i = 0; i < props.NumNodes; i++) {
		if (hsaKmtGetNodeProperties(i, &node))
			return HSAKMT_STATUS_ERROR;

		//ignore cpu node
		if (node.NumCPUCores && !node.NumFComputeCores)
			continue;
		if (!node.Capability.ui32.DebugSupportedFirmware)
			return HSAKMT_STATUS_NOT_SUPPORTED;
	}

	if (hsaKmtGetVersion(&versionInfo))
		return HSAKMT_STATUS_NOT_SUPPORTED;

	if (versionInfo.KernelInterfaceMajorVersion < HSA_RUNTIME_ENABLE_MAX_MAJOR ||
		(versionInfo.KernelInterfaceMajorVersion ==
			HSA_RUNTIME_ENABLE_MAX_MAJOR &&
		(int)versionInfo.KernelInterfaceMinorVersion < HSA_RUNTIME_ENABLE_MIN_MINOR))
		return HSAKMT_STATUS_NOT_SUPPORTED;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
					    bool setupTtmp)
{
	struct kfd_ioctl_runtime_enable_args args = {0};
	HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();

	if (result)
		return result;

	memset(&args, 0x00, sizeof(args));
	args.mode_mask = KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK |
		((setupTtmp) ? KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK : 0);
	args.r_debug = (HSAuint64)rDebug;

	long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);

	if (err) {
		if (errno == EBUSY)
			return HSAKMT_STATUS_UNAVAILABLE;
		else
			return HSAKMT_STATUS_ERROR;
	}
	runtime_capabilities_mask= args.capabilities_mask;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
{
	struct kfd_ioctl_runtime_enable_args args = {0};
	HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();

	if (result)
		return result;

	memset(&args, 0x00, sizeof(args));
	args.mode_mask = 0; //Disable

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilities(HSAuint32 *caps_mask)
{
	*caps_mask = runtime_capabilities_mask;
	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
					      uint32_t *n_entries,
					      uint32_t entry_size)
{
	struct kfd_ioctl_dbg_trap_args args = {0};

	args.device_snapshot.snapshot_buf_ptr = (uint64_t) data;
	args.device_snapshot.num_devices = *n_entries;
	args.device_snapshot.entry_size = entry_size;
	args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
	args.pid = getpid();
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
		return HSAKMT_STATUS_ERROR;
	*n_entries = args.device_snapshot.num_devices;

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
					     uint32_t *n_entries,
					     uint32_t entry_size,
					     uint32_t *queue_ids)
{
	struct kfd_ioctl_dbg_trap_args args = {0};

	args.queue_snapshot.num_queues = *n_entries;
	args.queue_snapshot.entry_size = entry_size;
	args.queue_snapshot.exception_mask = KFD_EC_MASK(EC_QUEUE_NEW);
	args.op = KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT;
	args.queue_snapshot.snapshot_buf_ptr = (uint64_t) data;
	args.pid = getpid();

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
		return HSAKMT_STATUS_ERROR;

	*n_entries = args.queue_snapshot.num_queues;
	if (queue_ids && *n_entries) {
		struct kfd_queue_snapshot_entry *queue_entry =
		    (struct kfd_queue_snapshot_entry *) data;
		for (uint32_t i = 0; i < *n_entries; i++)
			queue_ids[i] = queue_entry[i].queue_id;
	}

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
					     uint32_t num_queues)
{
	struct kfd_ioctl_dbg_trap_args args = {0};
	int r;

	args.suspend_queues.queue_array_ptr = (uint64_t) queue_ids;
	args.suspend_queues.num_queues = num_queues;
	args.suspend_queues.exception_mask = KFD_EC_MASK(EC_QUEUE_NEW);
	args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
	args.pid = getpid();

	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args);
	if (r < 0)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

/* Debugger support has been in KFD ABI 1.13.  */
#define KFD_MINOR_MIN_DEBUG 13

HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
					     HSAuint32 *data_size)
{
	struct kfd_ioctl_dbg_trap_args args = {0};

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
	*data_size = sizeof(struct kfd_runtime_info);
	args.enable.rinfo_size = *data_size;
	args.enable.dbg_fd = hsakmt_kfd_fd;
	*runtime_info = malloc(args.enable.rinfo_size);
	if (!*runtime_info)
		return HSAKMT_STATUS_NO_MEMORY;
	args.enable.rinfo_ptr = (uint64_t) *runtime_info;
	args.op = KFD_IOC_DBG_TRAP_ENABLE;
	args.pid = getpid();

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args)) {
		free(*runtime_info);
		return HSAKMT_STATUS_ERROR;
	}

	return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
{
	struct kfd_ioctl_dbg_trap_args args = {0};

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
	args.enable.dbg_fd = hsakmt_kfd_fd;
	args.op = KFD_IOC_DBG_TRAP_DISABLE;
	args.pid = getpid();

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
						HSAuint32 *n_entries,
						HSAuint32 *entry_size)
{
	HSAKMT_STATUS ret = HSAKMT_STATUS_NO_MEMORY;

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
	*n_entries = UINT32_MAX;
	*entry_size = sizeof(struct kfd_dbg_device_info_entry);
	*data = malloc(*entry_size * *n_entries);
	if (!*data)
		return ret;
	ret = dbg_trap_get_device_data(*data, n_entries, *entry_size);
	if (ret)
		free(*data);

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
						HSAuint32 *n_entries,
						HSAuint32 *entry_size,
						bool suspend_queues)
{
	uint32_t *queue_ids = NULL;

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
	*entry_size = sizeof(struct kfd_queue_snapshot_entry);
	*n_entries = 0;
	if (dbg_trap_get_queue_data(NULL, n_entries, *entry_size, NULL))
		return HSAKMT_STATUS_ERROR;
	*data = malloc(*n_entries * *entry_size);
	if (!*data)
		return HSAKMT_STATUS_NO_MEMORY;
	if (suspend_queues && *n_entries)
		queue_ids = (uint32_t *)malloc(sizeof(uint32_t) * *n_entries);
	if (!queue_ids ||
	    dbg_trap_get_queue_data(*data, n_entries, *entry_size, queue_ids))
		goto free_data;
	if (queue_ids) {
		if (dbg_trap_suspend_queues(queue_ids, *n_entries) ||
		    dbg_trap_get_queue_data(*data, n_entries, *entry_size, NULL))
			goto free_data;
		free(queue_ids);
	}
	return HSAKMT_STATUS_SUCCESS;
free_data:
	free(*data);
	if (queue_ids)
		free(queue_ids);

	return HSAKMT_STATUS_ERROR;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *args,
					HSA_QUEUEID *Queues,
					HSAuint64 *DebugReturn)
{
	HSAKMT_STATUS result;

	CHECK_KFD_OPEN();

	if (Queues) {
		int num_queues = args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ?
						args->suspend_queues.num_queues :
						args->resume_queues.num_queues;
		void *queue_ptr = args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ?
						(void *)args->suspend_queues.queue_array_ptr :
						(void *)args->resume_queues.queue_array_ptr;

		uint32_t *queue_ids = hsakmt_convert_queue_ids(num_queues, Queues);
		if (!queue_ids) {
			return HSAKMT_STATUS_NO_MEMORY;
		}
		memcpy(queue_ptr, queue_ids, num_queues * sizeof(uint32_t));
		free(queue_ids);
	}

	long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, args);
	if (DebugReturn)
		*DebugReturn = err;

	if (args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES &&
				err >= 0 && err <= args->suspend_queues.num_queues)
		result = HSAKMT_STATUS_SUCCESS;
	else if (args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES &&
				err >= 0 && err <= args->resume_queues.num_queues)
		result = HSAKMT_STATUS_SUCCESS;
	else if (err == 0)
		result = HSAKMT_STATUS_SUCCESS;
	else
		result = HSAKMT_STATUS_ERROR;

	return result;
}


================================================
FILE: libhsakmt/src/events.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include "hsakmt/linux/kfd_ioctl.h"
#include "fmm.h"
#include "hsakmt/hsakmtmodel.h"

static HSAuint64 *events_page = NULL;

void hsakmt_clear_events_page(void)
{
	events_page = NULL;
}

static bool IsSystemEventType(HSA_EVENTTYPE type)
{
	// Debug events behave as signal events.
	return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
					  bool ManualReset, bool IsSignaled,
					  HsaEvent **Event)
{
	unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT;

	CHECK_KFD_OPEN();

	if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	HsaEvent *e = malloc(sizeof(HsaEvent));

	if (!e)
		return HSAKMT_STATUS_ERROR;

	memset(e, 0, sizeof(*e));

	struct kfd_ioctl_create_event_args args = {0};

	args.event_type = EventDesc->EventType;
	args.node_id = EventDesc->NodeId;
	args.auto_reset = !ManualReset;

	/* dGPU code */
	pthread_mutex_lock(&hsakmt_mutex);

	if (hsakmt_is_dgpu && !events_page) {
		events_page = hsakmt_allocate_exec_aligned_memory_gpu(
			KFD_SIGNAL_EVENT_LIMIT * 8, PAGE_SIZE, 0, 0, true, false, true);
		if (!events_page) {
			free(e);
			pthread_mutex_unlock(&hsakmt_mutex);
			return HSAKMT_STATUS_ERROR;
		}
		if (hsakmt_use_model)
			model_set_event_page(events_page, KFD_SIGNAL_EVENT_LIMIT);
		else
			hsakmt_fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset);
	}

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
		free(e);
		*Event = NULL;
		pthread_mutex_unlock(&hsakmt_mutex);
		return HSAKMT_STATUS_ERROR;
	}

	e->EventId = args.event_id;

	if (!events_page && args.event_page_offset > 0) {
		events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ,
				MAP_SHARED, hsakmt_kfd_fd, args.event_page_offset);
		if (events_page == MAP_FAILED) {
			/* old kernels only support 256 events */
			event_limit = 256;
			events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ,
					   MAP_SHARED, hsakmt_kfd_fd, args.event_page_offset);
		}
		if (events_page == MAP_FAILED) {
			events_page = NULL;
			pthread_mutex_unlock(&hsakmt_mutex);
			hsaKmtDestroyEvent(e);
			return HSAKMT_STATUS_ERROR;
		}
	}

	if (args.event_page_offset > 0 && args.event_slot_index < event_limit)
		e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];

        pthread_mutex_unlock(&hsakmt_mutex);

        e->EventData.EventType = EventDesc->EventType;
        e->EventData.HWData1 = args.event_id;

	e->EventData.HWData3 = args.event_trigger_data;
	e->EventData.EventData.SyncVar.SyncVar.UserData =
		EventDesc->SyncVar.SyncVar.UserData;
	e->EventData.EventData.SyncVar.SyncVarSize =
		EventDesc->SyncVar.SyncVarSize;

	if (IsSignaled && !IsSystemEventType(e->EventData.EventType)) {
		struct kfd_ioctl_set_event_args set_args = {0};

		set_args.event_id = args.event_id;

                if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_EVENT,
                                 &set_args) != 0) {
                  hsaKmtDestroyEvent(e);
                  return HSAKMT_STATUS_ERROR;
                }
        }

        *Event = e;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	struct kfd_ioctl_destroy_event_args args = {0};

	args.event_id = Event->EventId;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0)
		return HSAKMT_STATUS_ERROR;

	free(Event);
	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	/* Although the spec is doesn't say, don't allow system-defined events
	 * to be signaled.
	 */
	if (IsSystemEventType(Event->EventData.EventType))
		return HSAKMT_STATUS_ERROR;

	struct kfd_ioctl_set_event_args args = {0};

	args.event_id = Event->EventId;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	/* Although the spec is doesn't say, don't allow system-defined events
	 * to be signaled.
	 */
	if (IsSystemEventType(Event->EventData.EventType))
		return HSAKMT_STATUS_ERROR;

	struct kfd_ioctl_reset_event_args args = {0};

	args.event_id = Event->EventId;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
{
	CHECK_KFD_OPEN();

	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
		HSAuint32 Milliseconds)
{
	return hsaKmtWaitOnEvent_Ext(Event, Milliseconds, NULL);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_Ext(HsaEvent *Event,
		HSAuint32 Milliseconds, uint64_t *event_age)
{
	if (!Event)
		return HSAKMT_STATUS_INVALID_HANDLE;

	return hsaKmtWaitOnMultipleEvents_Ext(&Event, 1, true, Milliseconds, event_age);
}

static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
{
	struct kfd_ioctl_svm_args *args;
        uint32_t node_id = 0;
        HSAuint32 s_attr;
        HSAuint32 i;
	HSA_SVM_ATTRIBUTE attrs[] = {
					{HSA_SVM_ATTR_PREFERRED_LOC, 0},
					{HSA_SVM_ATTR_PREFETCH_LOC, 0},
					{HSA_SVM_ATTR_ACCESS, gpu_id},
					{HSA_SVM_ATTR_SET_FLAGS, 0},
				    };

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(5);

	s_attr = sizeof(attrs);
	args = alloca(sizeof(*args) + s_attr);
	args->start_addr = address;
	args->size = PAGE_SIZE;
	args->op = KFD_IOCTL_SVM_OP_GET_ATTR;
	args->nattr = s_attr / sizeof(*attrs);
	memcpy(args->attrs, attrs, s_attr);
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
		pr_debug("op get range attrs failed %s\n", strerror(errno));
		return HSAKMT_STATUS_ERROR;
	}

	pr_err("GPU address 0x%lx, is Unified memory\n", address);
	for (i = 0; i < args->nattr; i++) {
		if (args->attrs[i].value == KFD_IOCTL_SVM_LOCATION_SYSMEM ||
		    args->attrs[i].value == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
			node_id = args->attrs[i].value;
		else
			hsakmt_gpuid_to_nodeid(args->attrs[i].value, &node_id);
		switch (args->attrs[i].type) {
		case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
			pr_err("Preferred location for address 0x%lx is Node id %d\n",
				address, node_id);
			break;
		case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
			pr_err("Prefetch location for address 0x%lx is Node id %d\n",
				address, node_id);
			break;
		case KFD_IOCTL_SVM_ATTR_ACCESS:
			pr_err("Node id %d has access to address 0x%lx\n",
				node_id, address);
			break;
		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
			pr_err("Node id %d has access in place to address 0x%lx\n",
				node_id, address);
			break;
		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
			pr_err("Node id %d has no access to address 0x%lx\n",
				node_id, address);
			break;
		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
			if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_COHERENT)
				pr_err("Fine grained coherency between devices\n");
			if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_GPU_RO)
				pr_err("Read only\n");
			if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
				pr_err("GPU exec allowed\n");
			if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
				 pr_err("GPU always mapped\n");
			if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_EXT_COHERENT)
				 pr_err("Extended-scope fine grained coherency between devices\n");
			break;
		default:
			pr_debug("get invalid attr type 0x%x\n", args->attrs[i].type);
			return HSAKMT_STATUS_ERROR;
		}
	}

	return HSAKMT_STATUS_SUCCESS;
}
//Analysis memory exception data, print debug messages
static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
						memory_exception_data)
{
	HSAKMT_STATUS ret;
	HsaPointerInfo info;
	const uint64_t addr = memory_exception_data->va;
	uint32_t node_id = 0;
	unsigned int i;

	hsakmt_gpuid_to_nodeid(memory_exception_data->gpu_id, &node_id);
	pr_err("Memory exception on virtual address 0x%lx, ", addr);
	pr_err("node id %d : ", node_id);
	if (memory_exception_data->failure.NotPresent)
		pr_err("Page not present\n");
	else if (memory_exception_data->failure.ReadOnly)
		pr_err("Writing to readonly page\n");
	else if (memory_exception_data->failure.NoExecute)
		pr_err("Execute to none-executable page\n");

	ret = hsakmt_fmm_get_mem_info((const void *)addr, &info);
	if (ret != HSAKMT_STATUS_SUCCESS) {
		ret = get_mem_info_svm_api(addr, memory_exception_data->gpu_id);
		if (ret != HSAKMT_STATUS_SUCCESS)
			pr_err("Address does not belong to a known buffer\n");
		return;
	}

	pr_err("GPU address 0x%lx, node id %d, size in byte 0x%lx\n",
			info.GPUAddress, info.Node, info.SizeInBytes);
	switch (info.Type) {
	case HSA_POINTER_REGISTERED_SHARED:
		pr_err("Memory is registered shared buffer (IPC)\n");
		break;
	case HSA_POINTER_REGISTERED_GRAPHICS:
		pr_err("Memory is registered graphics buffer\n");
		break;
	case HSA_POINTER_REGISTERED_USER:
		pr_err("Memory is registered user pointer\n");
		pr_err("CPU address of the memory is %p\n", info.CPUAddress);
		break;
	case HSA_POINTER_ALLOCATED:
		pr_err("Memory is allocated using hsaKmtAllocMemory\n");
		pr_err("CPU address of the memory is %p\n", info.CPUAddress);
		break;
	case HSA_POINTER_RESERVED_ADDR:
		pr_err("Memory is allocated by OnlyAddress mode\n");
		break;
	default:
		pr_err("Invalid memory type %d\n", info.Type);
		break;
	}

	if (info.RegisteredNodes) {
		pr_err("Memory is registered to node id: ");
		for (i = 0; i < info.NRegisteredNodes; i++)
			pr_err("%d ", info.RegisteredNodes[i]);
		pr_err("\n");
	}
	if (info.MappedNodes) {
		pr_err("Memory is mapped to node id: ");
		for (i = 0; i < info.NMappedNodes; i++)
			pr_err("%d ", info.MappedNodes[i]);
		pr_err("\n");
	}
}

HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
						   HSAuint32 NumEvents,
						   bool WaitOnAll,
						   HSAuint32 Milliseconds)
{
	return hsaKmtWaitOnMultipleEvents_Ext(Events, NumEvents, WaitOnAll, Milliseconds, NULL);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
						   HSAuint32 NumEvents,
						   bool WaitOnAll,
						   HSAuint32 Milliseconds,
						   uint64_t *event_age)
{
        HSAKMT_STATUS result;
        CHECK_KFD_OPEN();

        if (!Events)
		return HSAKMT_STATUS_INVALID_HANDLE;

        struct kfd_event_data *event_data =
        calloc(NumEvents, sizeof(struct kfd_event_data));
        if (!event_data) {
		return HSAKMT_STATUS_NO_MEMORY;
	}
        for (HSAuint32 i = 0; i < NumEvents; i++) {
		event_data[i].event_id = Events[i]->EventId;
		event_data[i].kfd_event_data_ext = (uint64_t)(uintptr_t)NULL;
		if (event_age && Events[i]->EventData.EventType == HSA_EVENTTYPE_SIGNAL)
			event_data[i].signal_event_data.last_event_age = event_age[i];
	}

        struct kfd_ioctl_wait_events_args args = {0};

	args.wait_for_all = WaitOnAll;
	args.timeout = Milliseconds;
	args.num_events = NumEvents;
	args.events_ptr = (uint64_t)(uintptr_t)event_data;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1)
		result = HSAKMT_STATUS_ERROR;
	else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT)
		result = HSAKMT_STATUS_WAIT_TIMEOUT;
	else {
		result = HSAKMT_STATUS_SUCCESS;
		for (HSAuint32 i = 0; i < NumEvents; i++) {
			if (Events[i]->EventData.EventType == HSA_EVENTTYPE_MEMORY &&
			    event_data[i].memory_exception_data.gpu_id) {
				Events[i]->EventData.EventData.MemoryAccessFault.VirtualAddress = event_data[i].memory_exception_data.va;
				result = hsakmt_gpuid_to_nodeid(event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
				if (result != HSAKMT_STATUS_SUCCESS)
					goto out;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.NotPresent = event_data[i].memory_exception_data.failure.NotPresent;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.ReadOnly = event_data[i].memory_exception_data.failure.ReadOnly;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.NoExecute = event_data[i].memory_exception_data.failure.NoExecute;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.Imprecise = event_data[i].memory_exception_data.failure.imprecise;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.ErrorType = event_data[i].memory_exception_data.ErrorType;
				Events[i]->EventData.EventData.MemoryAccessFault.Failure.ECC =
						((event_data[i].memory_exception_data.ErrorType == 1) || (event_data[i].memory_exception_data.ErrorType == 2)) ? 1 : 0;
				Events[i]->EventData.EventData.MemoryAccessFault.Flags = HSA_EVENTID_MEMORY_FATAL_PROCESS;
				analysis_memory_exception(&event_data[i].memory_exception_data);
			} else if (Events[i]->EventData.EventType == HSA_EVENTTYPE_HW_EXCEPTION &&
				event_data[i].hw_exception_data.gpu_id) {

				result = hsakmt_gpuid_to_nodeid(event_data[i].hw_exception_data.gpu_id, &Events[i]->EventData.EventData.HwException.NodeId);
				if (result != HSAKMT_STATUS_SUCCESS)
					goto out;

				Events[i]->EventData.EventData.HwException.ResetType = event_data[i].hw_exception_data.reset_type;
				Events[i]->EventData.EventData.HwException.ResetCause = event_data[i].hw_exception_data.reset_cause;
				Events[i]->EventData.EventData.HwException.MemoryLost = event_data[i].hw_exception_data.memory_lost;
			}
		}
	}
out:

	for (HSAuint32 i = 0; i < NumEvents; i++) {
		if (event_age && Events[i]->EventData.EventType == HSA_EVENTTYPE_SIGNAL)
			event_age[i] = event_data[i].signal_event_data.last_event_age;
	}

	free(event_data);

	return result;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
{
	struct kfd_ioctl_smi_events_args args;
	HSAKMT_STATUS result;
	uint32_t gpuid;

	CHECK_KFD_OPEN();

	pr_debug("[%s] node %d\n", __func__, NodeId);

	result = hsakmt_validate_nodeid(NodeId, &gpuid);
	if (result != HSAKMT_STATUS_SUCCESS) {
		pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
		return result;
	}

	args.gpuid = gpuid;
	result = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SMI_EVENTS, &args);
	if (result) {
		pr_debug("open SMI event fd failed %s\n", strerror(errno));
		return HSAKMT_STATUS_ERROR;
	}

	*fd = args.anon_fd;
	return HSAKMT_STATUS_SUCCESS;
}


================================================
FILE: libhsakmt/src/fmm.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#define _GNU_SOURCE
#include "libhsakmt.h"
#include "fmm.h"
#include "hsakmt/hsakmtmodel.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <errno.h>
#include <assert.h>

#include <numa.h>
#include <numaif.h>
#include "rbtree.h"
#include <amdgpu.h>

#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include "hsakmt/linux/udmabuf.h"

#ifndef MPOL_F_STATIC_NODES
/* Bug in numaif.h, this should be defined in there. Definition copied
 * from linux/mempolicy.h.
 */
#define MPOL_F_STATIC_NODES     (1 << 15)
#endif

#define NON_VALID_GPU_ID 0

#define INIT_MANAGEABLE_APERTURE(base_value, limit_value) {	\
	.base = (void *) base_value,				\
	.limit = (void *) limit_value,				\
	.align = 0,						\
	.guard_pages = 1,					\
	.vm_ranges = NULL,					\
	.fmm_mutex = PTHREAD_MUTEX_INITIALIZER,			\
	.is_cpu_accessible = false,				\
	.ops = &reserved_aperture_ops				\
	}

#define container_of(ptr, type, member) ({			\
		char *__mptr = (void *)(ptr);			\
		((type *)(__mptr - offsetof(type, member))); })

#define rb_entry(ptr, type, member)				\
		container_of(ptr, type, member)

#define vm_object_entry(n, is_userptr) ({			\
		(is_userptr) == 0 ?				\
		rb_entry(n, vm_object_t, node) :		\
		rb_entry(n, vm_object_t, user_node); })

#define vm_object_tree(app, is_userptr)				\
		((is_userptr) ? &(app)->user_tree : &(app)->tree)

#define START_NON_CANONICAL_ADDR (1ULL << 47)
#define END_NON_CANONICAL_ADDR (~0UL - (1UL << 47))

struct vm_object {
	void *start;
	void *userptr;
	uint64_t userptr_size;
	uint64_t size; /* size allocated on GPU. When the user requests a random
			* size, Thunk aligns it to page size and allocates this
			* aligned size on GPU
			*/
	uint32_t node_id;
	rbtree_node_t node;
	rbtree_node_t user_node;

	HsaMemFlags mflags; /* memory allocation flags */
	/* Registered nodes to map on SVM mGPU */
	uint32_t *registered_device_id_array;
	uint32_t registered_device_id_array_size;
	uint32_t *registered_node_id_array;
	uint32_t registration_count; /* the same memory region can be registered multiple times */
	/* Nodes that mapped already */
	uint32_t *mapped_device_id_array;
	uint32_t mapped_device_id_array_size;
	uint32_t *mapped_node_id_array;
	uint32_t mapping_count;
	/* Metadata of imported graphics buffers */
	void *metadata;
	/* User data associated with the memory */
	void *user_data;
	/* Flag to indicate imported KFD buffer */
	bool is_imported_kfd_bo;
#ifdef SANITIZER_AMDGPU
	int mmap_flags;
	int mmap_fd;
	off_t mmap_offset;
#endif
	uint32_t handle_num; /* number of handles */
	uint64_t handles[]; /* kfd handles array */
};
typedef struct vm_object vm_object_t;

struct vm_area {
	void *start;
	void *end;
	struct vm_area *next;
	struct vm_area *prev;
};
typedef struct vm_area vm_area_t;

/* Memory manager for an aperture */
typedef struct manageable_aperture manageable_aperture_t;

/* Aperture management function pointers to allow different management
 * schemes.
 */
typedef struct {
	void *(*allocate_area_aligned)(manageable_aperture_t *aper, void *addr,
				       uint64_t size, uint64_t align);
	void (*release_area)(manageable_aperture_t *aper,
			     void *addr, uint64_t size);
} manageable_aperture_ops_t;

/* Reserved aperture type managed by its own address allocator */
static void *reserved_aperture_allocate_aligned(manageable_aperture_t *aper,
						void *addr,
						uint64_t size, uint64_t align);
static void reserved_aperture_release(manageable_aperture_t *aper,
				      void *addr, uint64_t size);

static int bind_mem_to_numa(uint32_t node_id, void *mem,
			    uint64_t SizeInBytes, HsaMemFlags mflags);

static const manageable_aperture_ops_t reserved_aperture_ops = {
	reserved_aperture_allocate_aligned,
	reserved_aperture_release
};

/* Unreserved aperture type using mmap to allocate virtual address space */
static void *mmap_aperture_allocate_aligned(manageable_aperture_t *aper,
					    void *addr,
					    uint64_t size, uint64_t align);
static void mmap_aperture_release(manageable_aperture_t *aper,
				  void *addr, uint64_t size);
static const manageable_aperture_ops_t mmap_aperture_ops = {
	mmap_aperture_allocate_aligned,
	mmap_aperture_release
};

struct manageable_aperture {
	void *base;
	void *limit;
	uint64_t align;
	uint32_t guard_pages;
	vm_area_t *vm_ranges;
	rbtree_t tree;
	rbtree_t user_tree;
	pthread_mutex_t fmm_mutex;
	bool is_cpu_accessible;
	const manageable_aperture_ops_t *ops;
};

typedef struct {
	void *base;
	void *limit;
} aperture_t;

typedef struct {
	uint32_t gpu_id;
	uint32_t device_id;
	uint32_t node_id;
	uint64_t local_mem_size;
	HSA_ENGINE_ID EngineId;
	aperture_t lds_aperture;
	aperture_t scratch_aperture;
	aperture_t mmio_aperture;
	manageable_aperture_t scratch_physical; /* For dGPU, scratch physical is allocated from
						 * dgpu_aperture. When requested by RT, each
						 * GPU will get a differnt range
						 */
	manageable_aperture_t gpuvm_aperture;   /* used for GPUVM on APU, outsidethe canonical address range */
	int drm_render_fd;
	uint32_t usable_peer_id_num;
	uint32_t *usable_peer_id_array;
	int drm_render_minor;
} gpu_mem_t;

enum svm_aperture_type {
	SVM_DEFAULT = 0,
	SVM_COHERENT,
	SVM_APERTURE_NUM
};

/* The main structure for dGPU Shared Virtual Memory Management */
typedef struct {
	/* Two apertures can have different MTypes (for coherency) */
	manageable_aperture_t apertures[SVM_APERTURE_NUM];

	/* Pointers to apertures, may point to the same aperture on
	 * GFXv9 and later, where MType is not based on apertures
	 */
	manageable_aperture_t *dgpu_aperture;
	manageable_aperture_t *dgpu_alt_aperture;

	/* whether to use userptr for paged memory */
	bool userptr_for_paged_mem;

	/* whether to check userptrs on registration */
	bool check_userptr;

	/* whether to check reserve svm on registration */
	bool reserve_svm;

	/* whether all memory is coherent (GPU cache disabled) */
	bool disable_cache;

	/* specifies the alignment size as PAGE_SIZE * 2^alignment_order */
	uint32_t alignment_order;
} svm_t;

/* The other apertures are specific to each GPU. gpu_mem_t manages GPU
 * specific memory apertures.
 */
static gpu_mem_t *gpu_mem;
static unsigned int gpu_mem_count;
static gpu_mem_t *g_first_gpu_mem;

static void *dgpu_shared_aperture_base;
static void *dgpu_shared_aperture_limit;

static svm_t svm = {
	.apertures = {INIT_MANAGEABLE_APERTURE(0, 0),
		      INIT_MANAGEABLE_APERTURE(0, 0)},
	.dgpu_aperture = NULL,
	.dgpu_alt_aperture = NULL,
	.userptr_for_paged_mem = false,
	.check_userptr = false,
	.disable_cache = false,
};

/* On APU, for memory allocated on the system memory that GPU doesn't access
 * via GPU driver, they are not managed by GPUVM. cpuvm_aperture keeps track
 * of this part of memory.
 */
static manageable_aperture_t cpuvm_aperture = INIT_MANAGEABLE_APERTURE(0, 0);

/* mem_handle_aperture is used to generate memory handles
 * for allocations that don't have a valid virtual address
 * its size is 47bits.
*/
static manageable_aperture_t mem_handle_aperture = INIT_MANAGEABLE_APERTURE(START_NON_CANONICAL_ADDR, (START_NON_CANONICAL_ADDR + (1ULL << 47)));

/* GPU node array for default mappings */
static uint32_t all_gpu_id_array_size;
static uint32_t *all_gpu_id_array;

/* IPC structures and helper functions */
typedef enum _HSA_APERTURE {
	HSA_APERTURE_UNSUPPORTED = 0,
	HSA_APERTURE_DGPU,
	HSA_APERTURE_DGPU_ALT,
	HSA_APERTURE_GPUVM,
	HSA_APERTURE_CPUVM,
	HSA_APERTURE_MEMHANDLE
} HSA_APERTURE;

typedef struct _HsaApertureInfo {
	HSA_APERTURE	type;		// Aperture type
	HSAuint32	idx;		// Aperture index
} HsaApertureInfo;

typedef struct _HsaSharedMemoryStruct {
	HSAuint32	ShareHandle[4];
	HsaApertureInfo	ApeInfo;
	HSAuint32	SizeInPages;
	HSAuint32	ExportGpuId;
} HsaSharedMemoryStruct;

static inline const HsaSharedMemoryStruct *to_const_hsa_shared_memory_struct(
			const HsaSharedMemoryHandle *SharedMemoryHandle)
{
	return (const HsaSharedMemoryStruct *)SharedMemoryHandle;
}

static inline HsaSharedMemoryStruct *to_hsa_shared_memory_struct(
			HsaSharedMemoryHandle *SharedMemoryHandle)
{
	return (HsaSharedMemoryStruct *)SharedMemoryHandle;
}

__attribute__((unused))
static inline HsaSharedMemoryHandle *to_hsa_shared_memory_handle(
			HsaSharedMemoryStruct *SharedMemoryStruct)
{
	return (HsaSharedMemoryHandle *)SharedMemoryStruct;
}

static int __fmm_release(vm_object_t *object, manageable_aperture_t *aperture);
static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id,
				       manageable_aperture_t *aperture,
				       void *address);
static void print_device_id_array(uint32_t *device_id_array, uint32_t device_id_array_size);

static vm_area_t *vm_create_and_init_area(void *start, void *end)
{
	vm_area_t *area = (vm_area_t *) malloc(sizeof(vm_area_t));

	if (area) {
		area->start = start;
		area->end = end;
		area->next = area->prev = NULL;
	}

	return area;
}

/* One huge page smaller than 512GB system buffer limit,
 * because 512GB allocation will cause TTM failure.
 */
#define BIGGEST_SINGLE_BUF_SIZE ((1ULL << 39) - GPU_HUGE_PAGE_SIZE)

static vm_object_t *vm_create_and_init_object(void *start, uint64_t size,
					      uint64_t handle, HsaMemFlags mflags)
{
	vm_object_t *object;
	uint64_t handle_array_size = (size + BIGGEST_SINGLE_BUF_SIZE - 1) /
				     BIGGEST_SINGLE_BUF_SIZE;

	object = (vm_object_t *) malloc(sizeof(vm_object_t) +
		 handle_array_size * sizeof(uint64_t));

	if (object) {
		object->start = start;
		object->userptr = NULL;
		object->userptr_size = 0;
		object->size = size;
		object->handles[0] = handle;
		object->handle_num = 1;
		object->registered_device_id_array_size = 0;
		object->mapped_device_id_array_size = 0;
		object->registered_device_id_array = NULL;
		object->mapped_device_id_array = NULL;
		object->registered_node_id_array = NULL;
		object->mapped_node_id_array = NULL;
		object->registration_count = 0;
		object->mapping_count = 0;
		object->mflags = mflags;
		object->metadata = NULL;
		object->user_data = NULL;
		object->is_imported_kfd_bo = false;
		object->node.key = rbtree_key((unsigned long)start, size);
		object->user_node.key = rbtree_key(0, 0);
#ifdef SANITIZER_AMDGPU
		object->mmap_fd = 0;
#endif
	}

	return object;
}


static void vm_remove_area(manageable_aperture_t *app, vm_area_t *area)
{
	vm_area_t *next;
	vm_area_t *prev;

	next = area->next;
	prev = area->prev;

	if (!prev) /* The first element */
		app->vm_ranges = next;
	else
		prev->next = next;

	if (next) /* If not the last element */
		next->prev = prev;

	free(area);
}

static void vm_remove_object(manageable_aperture_t *app, vm_object_t *object)
{
	/* Free allocations inside the object */
	if (object->registered_device_id_array)
		free(object->registered_device_id_array);

	if (object->mapped_device_id_array)
		free(object->mapped_device_id_array);

	if (object->metadata)
		free(object->metadata);

	if (object->registered_node_id_array)
		free(object->registered_node_id_array);
	if (object->mapped_node_id_array)
		free(object->mapped_node_id_array);

	hsakmt_rbtree_delete(&app->tree, &object->node);
	if (object->userptr)
		hsakmt_rbtree_delete(&app->user_tree, &object->user_node);

	free(object);
}

static void vm_add_area_after(vm_area_t *after_this, vm_area_t *new_area)
{
	vm_area_t *next = after_this->next;

	after_this->next = new_area;
	new_area->next = next;

	new_area->prev = after_this;
	if (next)
		next->prev = new_area;
}

static void vm_split_area(manageable_aperture_t *app, vm_area_t *area,
				void *address, uint64_t MemorySizeInBytes)
{
	/*
	 * The existing area is split to: [area->start, address - 1]
	 * and [address + MemorySizeInBytes, area->end]
	 */
	vm_area_t *new_area = vm_create_and_init_area(
				VOID_PTR_ADD(address, MemorySizeInBytes),
				area->end);

	if (new_area == NULL) {
		pr_err("[%s] Failed to create new area during split.", __func__);
		return;
	}
	/* Shrink the existing area */
	area->end = VOID_PTR_SUB(address, 1);

	vm_add_area_after(area, new_area);
}

static vm_object_t *vm_find_object_by_address_userptr(manageable_aperture_t *app,
					const void *address, uint64_t size, int is_userptr)
{
	vm_object_t *cur = NULL;

	rbtree_t *tree = vm_object_tree(app, is_userptr);
	rbtree_key_t key = rbtree_key((unsigned long)address, size);
	void *start;
	uint64_t s;

	/* rbtree_lookup_nearest(,,,RIGHT) will return a node with
	 * its size >= key.size and its address >= key.address
	 * if there are two nodes with format(address, size),
	 * (0x100, 16) and (0x110, 8). the key is (0x100, 0),
	 * then node (0x100, 16) will be returned.
	 */
	rbtree_node_t *n = rbtree_lookup_nearest(tree, &key, LKP_ALL, RIGHT);

	if (n) {
		cur = vm_object_entry(n, is_userptr);
		if (is_userptr == 0) {
			start = cur->start;
			s = cur->size;
		} else {
			start = cur->userptr;
			s = cur->userptr_size;
		}

		if (start != address)
			return NULL;

		if (size)
			return size == s ? cur : NULL;

		/* size is 0, make sure there is only one node whose address == key.address*/
		key = rbtree_key((unsigned long)address, (unsigned long)-1);
		rbtree_node_t *rn = rbtree_lookup_nearest(tree, &key, LKP_ALL, LEFT);

		if (rn != n)
			return NULL;
	}

	return cur; /* NULL if not found */
}


static vm_object_t *vm_find_object_by_address_userptr_range(manageable_aperture_t *app,
						    const void *address, int is_userptr)
{
	vm_object_t *cur = NULL;
	rbtree_t *tree = vm_object_tree(app, is_userptr);
	rbtree_key_t key = rbtree_key((unsigned long)address, 0);
	rbtree_node_t *rn = rbtree_lookup_nearest(tree, &key, LKP_ALL, RIGHT);
	rbtree_node_t *ln;
	void *start;
	uint64_t size;

	/* all nodes might sit on left side of *address*, in this case rn is NULL.
	 * So pick up the rightest one as rn.
	 */
	if (!rn)
		rn = rbtree_min_max(tree, RIGHT);

	if (is_userptr) {
		/* userptr might overlap. Need walk through the tree from right to left as only left nodes
		 * can obtain the *address*
		 */
		ln = rbtree_min_max(tree, LEFT);
	} else {
		/* if key->size is -1, it match the node with start <= address.
		 * if key->size is 0, it match the node with start < address.
		 */
		key = rbtree_key((unsigned long)address, -1);
		ln = rbtree_lookup_nearest(tree, &key, LKP_ALL, LEFT);
	}
	if (!ln)
		return NULL;

	while (rn) {
		cur = vm_object_entry(rn, is_userptr);
		if (is_userptr == 0) {
			start = cur->start;
			size = cur->size;
		} else {
			start = cur->userptr;
			size = cur->userptr_size;
		}

		if (address >= start &&
				(uint64_t)address < ((uint64_t)start + size))
			break;

		cur = NULL;

		if (ln == rn)
			break;

		rn = hsakmt_rbtree_prev(tree, rn);
	}

	return cur; /* NULL if not found */
}

static vm_object_t *vm_find_object_by_address(manageable_aperture_t *app,
					const void *address, uint64_t size)
{
	return vm_find_object_by_address_userptr(app, address, size, 0);
}

static vm_object_t *vm_find_object_by_address_range(manageable_aperture_t *app,
						    const void *address)
{
	return vm_find_object_by_address_userptr_range(app, address, 0);
}

static vm_object_t *vm_find_object_by_userptr(manageable_aperture_t *app,
					const void *address, HSAuint64 size)
{
	return vm_find_object_by_address_userptr(app, address, size, 1);
}

static vm_object_t *vm_find_object_by_userptr_range(manageable_aperture_t *app,
						const void *address)
{
	return vm_find_object_by_address_userptr_range(app, address, 1);
}

static vm_area_t *vm_find(manageable_aperture_t *app, void *address)
{
	vm_area_t *cur = app->vm_ranges;

	/* Look up the appropriate address range containing the given address */
	while (cur) {
		if (cur->start <= address && cur->end >= address)
			break;
		cur = cur->next;
	};

	return cur; /* NULL if not found */
}

static bool aperture_is_valid(void *app_base, void *app_limit)
{
	if (app_base && app_limit && app_base < app_limit)
		return true;
	return false;
}

/* Align size of a VM area
 *
 * Leave at least one guard page after every object to catch
 * out-of-bounds accesses with VM faults.
 */
static uint64_t vm_align_area_size(manageable_aperture_t *app, uint64_t size)
{
	return size + (uint64_t)app->guard_pages * PAGE_SIZE;
}

/*
 * Assumes that fmm_mutex is locked on entry.
 */
static void reserved_aperture_release(manageable_aperture_t *app,
				      void *address,
				      uint64_t MemorySizeInBytes)
{
	vm_area_t *area;
	uint64_t SizeOfRegion;

	MemorySizeInBytes = vm_align_area_size(app, MemorySizeInBytes);

	area = vm_find(app, address);
	if (!area)
		return;

	SizeOfRegion = VOID_PTRS_SUB(area->end, area->start) + 1;

	/* check if block is whole region or part of it */
	if (SizeOfRegion == MemorySizeInBytes) {
		vm_remove_area(app, area);
	} else if (SizeOfRegion > MemorySizeInBytes) {
		/* shrink from the start */
		if (area->start == address)
			area->start =
				VOID_PTR_ADD(area->start, MemorySizeInBytes);
		/* shrink from the end */
		else if (VOID_PTRS_SUB(area->end, address) + 1 ==
				MemorySizeInBytes)
			area->end = VOID_PTR_SUB(area->end, MemorySizeInBytes);
		/* split the area */
		else
			vm_split_area(app, area, address, MemorySizeInBytes);
	}

	if (app->is_cpu_accessible) {
		void *mmap_ret;

		/* Reset NUMA policy */
		mbind(address, MemorySizeInBytes, MPOL_DEFAULT, NULL, 0, 0);

		/* Remove any CPU mapping, but keep the address range reserved */
		mmap_ret = mmap(address, MemorySizeInBytes, PROT_NONE,
			MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED,
			-1, 0);
		if (mmap_ret == MAP_FAILED && errno == ENOMEM) {
			/* When mmap count reaches max_map_count, any mmap will
			 * fail. Reduce the count with munmap then map it as
			 * NORESERVE immediately.
			 */
			if (munmap(address, MemorySizeInBytes) == 0) {
				/* After unmapping, try mmap again and handle failure
				 * */
				mmap_ret = mmap(address, MemorySizeInBytes, PROT_NONE,
						MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED,
						-1, 0);
				if (mmap_ret == MAP_FAILED) {
					/* Handle mmap failure gracefully, log if needed */
					pr_err("Failed to remap memory after unmap\n");
				}
			} else {
				/* Handle munmap failure if needed */
				pr_err("Failed to unmap memory\n");
			}
		}
	}
}

/*
 * returns allocated address or NULL. Assumes, that fmm_mutex is locked
 * on entry.
 */
static void *reserved_aperture_allocate_aligned(manageable_aperture_t *app,
						void *address,
						uint64_t MemorySizeInBytes,
						uint64_t align)
{
	uint64_t offset = 0, orig_align = align;
	vm_area_t *cur, *next;
	void *start;

	if (align < app->align)
		align = app->align;

	/* Align big buffers to the next power-of-2 up to huge page
	 * size for flexible fragment size TLB optimizations
	 */
	while (align < GPU_HUGE_PAGE_SIZE && MemorySizeInBytes >= (align << 1))
		align <<= 1;

	/* If no specific alignment was requested, align the end of
	 * buffers instead of the start. For fragment optimizations,
	 * aligning the start or the end achieves the same effective
	 * optimization. End alignment to the TLB cache line size is
	 * needed as a workaround for TLB issues on some older GPUs.
	 */
	if (orig_align <= (uint64_t)PAGE_SIZE)
		offset = align - (MemorySizeInBytes & (align - 1));

	MemorySizeInBytes = vm_align_area_size(app, MemorySizeInBytes);

	/* Find a big enough "hole" in the address space */
	cur = NULL;
	next = app->vm_ranges;
	start = address ? address :
		(void *)(ALIGN_UP((uint64_t)app->base, align) + offset);
	while (next) {
		if (next->start > start &&
		    VOID_PTRS_SUB(next->start, start) >= MemorySizeInBytes)
			break;

		cur = next;
		next = next->next;
		if (!address)
			start = (void *)(ALIGN_UP((uint64_t)cur->end + 1, align) + offset);
	}
	if (!next && VOID_PTRS_SUB(app->limit, start) + 1 < MemorySizeInBytes)
		/* No hole found and not enough space after the last area */
		return NULL;

	if (cur && address && address < (void *)ALIGN_UP((uint64_t)cur->end + 1, align))
		/* Required address is not free or overlaps */
		return NULL;

	if (cur && VOID_PTR_ADD(cur->end, 1) == start) {
		/* extend existing area */
		cur->end = VOID_PTR_ADD(start, MemorySizeInBytes-1);
	} else {
		vm_area_t *new_area;
		/* create a new area between cur and next */
		new_area = vm_create_and_init_area(start,
				VOID_PTR_ADD(start, (MemorySizeInBytes - 1)));
		if (!new_area)
			return NULL;
		new_area->next = next;
		new_area->prev = cur;
		if (cur)
			cur->next = new_area;
		else
			app->vm_ranges = new_area;
		if (next)
			next->prev = new_area;
	}

	return start;
}

void *hsakmt_mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align,
			    uint64_t guard_size, void *aper_base, void *aper_limit, int fd)
{
	void *addr, *aligned_addr, *aligned_end, *mapping_end;
	uint64_t aligned_padded_size;

	aligned_padded_size = size + guard_size * 2 + (align - PAGE_SIZE);

	/* Map memory PROT_NONE to alloc address space only */
	addr = mmap(0, aligned_padded_size, PROT_NONE, flags | MAP_ANONYMOUS, -1, 0);
	if (addr == MAP_FAILED) {
		pr_err("mmap failed: %s\n", strerror(errno));
		return NULL;
	}

	/* Adjust for alignment and guard pages */
	aligned_addr = (void *)ALIGN_UP((uint64_t)addr + guard_size, align);
	if (aligned_addr < aper_base ||
	    VOID_PTR_ADD(aligned_addr, size - 1) > aper_limit) {
		pr_err("mmap returned %p, out of range %p-%p\n", aligned_addr,
		       aper_base, aper_limit);
		munmap(addr, aligned_padded_size);
		return NULL;
	}

	/* Unmap padding and guard pages */
	if (aligned_addr > addr)
		munmap(addr, VOID_PTRS_SUB(aligned_addr, addr));

	aligned_end = VOID_PTR_ADD(aligned_addr, size);
	mapping_end = VOID_PTR_ADD(addr, aligned_padded_size);
	if (mapping_end > aligned_end)
		munmap(aligned_end, VOID_PTRS_SUB(mapping_end, aligned_end));

	if (prot == PROT_NONE)
		return aligned_addr;

	/*  MAP_FIXED to the aligned address with required prot */
	addr = mmap(aligned_addr, size, prot, flags | MAP_FIXED, fd, 0);
	if (addr == MAP_FAILED) {
		pr_err("mmap failed: %s\n", strerror(errno));
		return NULL;
	}

	return addr;
}

static void *mmap_aperture_allocate_aligned(manageable_aperture_t *aper,
					    void *address,
					    uint64_t size, uint64_t align)
{
	uint64_t alignment_size = PAGE_SIZE << svm.alignment_order;
	uint64_t guard_size;

	if (!aper->is_cpu_accessible) {
		pr_err("MMap Aperture must be CPU accessible\n");
		return NULL;
	}

	if (address) {
		void *addr;

#ifdef MAP_FIXED_NOREPLACE
		addr = mmap(address, size, PROT_NONE,
			MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED_NOREPLACE,
			-1, 0);
#else
		addr = mmap(address, size, PROT_NONE,
			MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE,
			-1, 0);
#endif
		if (addr == MAP_FAILED) {
			pr_err("mmap failed: %s\n", strerror(errno));
			return NULL;
		}

#ifndef MAP_FIXED_NOREPLACE
		if (address != addr) {
			pr_err("mmap failed to return addr asked\n");
			munmap(addr, size);
			return NULL;
		}
#endif
		return addr;
	}

	/* Align big buffers to the next power-of-2. By default, the max alignment
	 * size is set to 2MB. This can be modified by the env variable
	 * HSA_MAX_VA_ALIGN. This variable sets the order of the alignment size as
	 * PAGE_SIZE * 2^HSA_MAX_VA_ALIGN. Setting HSA_MAX_VA_ALIGN = 18 (1GB),
	 * improves the time for memory allocation and mapping. But it might lose
	 * performance when GFX access it, specially for big allocations (>3GB).
	 */
	while (align < alignment_size && size >= (align << 1))
		align <<= 1;

	/* Add padding to guarantee proper alignment and leave guard
	 * pages on both sides
	 */
	guard_size = (uint64_t)aper->guard_pages * PAGE_SIZE;

	return hsakmt_mmap_allocate_aligned(PROT_NONE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE,
				     size, align, guard_size, aper->base, aper->limit, -1);
}

static void mmap_aperture_release(manageable_aperture_t *aper,
				  void *addr, uint64_t size)
{
	if (!aper->is_cpu_accessible) {
		pr_err("MMap Aperture must be CPU accessible\n");
		return;
	}

	/* Reset NUMA policy */
	mbind(addr, size, MPOL_DEFAULT, NULL, 0, 0);

	/* Unmap memory */
	munmap(addr, size);
}

/* Wrapper functions to call aperture-specific VA management functions */
static void *aperture_allocate_area_aligned(manageable_aperture_t *app,
					    void *address,
					    uint64_t MemorySizeInBytes,
					    uint64_t align)
{
	return app->ops->allocate_area_aligned(app, address, MemorySizeInBytes, align ? align : app->align);
}
static void *aperture_allocate_area(manageable_aperture_t *app, void *address,
				    uint64_t MemorySizeInBytes)
{
	return app->ops->allocate_area_aligned(app, address, MemorySizeInBytes, app->align);
}
static void aperture_release_area(manageable_aperture_t *app, void *address,
				  uint64_t MemorySizeInBytes)
{
	app->ops->release_area(app, address, MemorySizeInBytes);
}

/* returns 0 on success. Assumes, that fmm_mutex is locked on entry */
static vm_object_t *aperture_allocate_object(manageable_aperture_t *app,
					     void *new_address,
					     uint64_t handle,
					     uint64_t MemorySizeInBytes,
					     HsaMemFlags mflags)
{
	vm_object_t *new_object;

	/* Allocate new object */
	new_object = vm_create_and_init_object(new_address,
					       MemorySizeInBytes,
					       handle, mflags);
	if (!new_object)
		return NULL;

	hsakmt_rbtree_insert(&app->tree, &new_object->node);

	return new_object;
}

static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id)
{
	uint32_t i;

	for (i = 0 ; i < gpu_mem_count ; i++)
		if (gpu_mem[i].gpu_id == gpu_id)
			return i;

	return -1;
}

static int32_t gpu_mem_find_by_node_id(uint32_t node_id)
{
	uint32_t i;

	for (i = 0 ; i < gpu_mem_count ; i++)
		if (gpu_mem[i].node_id == node_id)
			return i;

	return -1;
}

static manageable_aperture_t *fmm_get_aperture(HsaApertureInfo info)
{
	switch (info.type) {
	case HSA_APERTURE_DGPU:
		return svm.dgpu_aperture;
	case HSA_APERTURE_DGPU_ALT:
		return svm.dgpu_alt_aperture;
	case HSA_APERTURE_GPUVM:
		return &gpu_mem[info.idx].gpuvm_aperture;
	case HSA_APERTURE_CPUVM:
		return &cpuvm_aperture;
	case HSA_APERTURE_MEMHANDLE:
		return &mem_handle_aperture;
	default:
		return NULL;
	}
}

static gpu_mem_t *fmm_is_scratch_aperture(const void *address)
{
	uint32_t i;

	for (i = 0; i < gpu_mem_count; i++) {
		if (gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
			continue;

		if ((address >= gpu_mem[i].scratch_physical.base) &&
			(address <= gpu_mem[i].scratch_physical.limit))
			return &gpu_mem[i];

	}
	return NULL;
}

static manageable_aperture_t *fmm_find_aperture(const void *address,
						HsaApertureInfo *info)
{
	manageable_aperture_t *aperture = NULL;
	uint32_t i;
	HsaApertureInfo _info = { .type = HSA_APERTURE_UNSUPPORTED, .idx = 0};
	gpu_mem_t *gpu_mem_ptr = NULL;

	if ((address >= mem_handle_aperture.base) &&
		(address <= mem_handle_aperture.limit)){

		aperture = &mem_handle_aperture;
		_info.type = HSA_APERTURE_MEMHANDLE;

	} else if (hsakmt_is_dgpu) {
		if (address >= svm.dgpu_aperture->base &&
			address <= svm.dgpu_aperture->limit) {

			gpu_mem_ptr = fmm_is_scratch_aperture(address);
			if (gpu_mem_ptr) {
				aperture = &gpu_mem_ptr->scratch_physical;
			} else {
				aperture = svm.dgpu_aperture;
				_info.type = HSA_APERTURE_DGPU;
			}
		} else if (address >= svm.dgpu_alt_aperture->base &&
			address <= svm.dgpu_alt_aperture->limit) {
			aperture = svm.dgpu_alt_aperture;
			_info.type = HSA_APERTURE_DGPU_ALT;
		} else {
			/* Not in SVM, it can be system memory registered by userptr */
			aperture = svm.dgpu_aperture;
			_info.type = HSA_APERTURE_DGPU;
		}
	} else { /* APU */
		if (address >= svm.dgpu_aperture->base && address <= svm.dgpu_aperture->limit) {
			aperture = svm.dgpu_aperture;
			_info.type = HSA_APERTURE_DGPU;
		} else {
			/* gpuvm_aperture */
			for (i = 0; i < gpu_mem_count; i++) {
				if ((address >= gpu_mem[i].gpuvm_aperture.base) &&
					(address <= gpu_mem[i].gpuvm_aperture.limit)) {
					aperture = &gpu_mem[i].gpuvm_aperture;
					_info.type = HSA_APERTURE_GPUVM;
					_info.idx = i;
				}
			}
		}
		if (!aperture) {
			/* Not in GPUVM */
			aperture = &cpuvm_aperture;
			_info.type = HSA_APERTURE_CPUVM;
		}
	}

	if (info)
		*info = _info;

	return aperture;
}

static HsaMemFlags fmm_translate_ioc_to_hsa_flags(uint32_t ioc_flags)
{
	HsaMemFlags mflags = {0};

	if (!(ioc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE))
		mflags.ui32.ReadOnly = 1;
	if (!(ioc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT))
		mflags.ui32.CoarseGrain = 1;
	if (ioc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
		mflags.ui32.ExtendedCoherent = 1;
	if (ioc_flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC)
		mflags.ui32.HostAccess = 1;
	return mflags;
}

static HSAKMT_STATUS fmm_register_mem_svm_api(void *address,
					      uint64_t size,
					      bool coarse_grain,
					      bool ext_coherent)
{
	struct kfd_ioctl_svm_args *args;
	size_t s_attr;
	HSAuint32 page_offset = (HSAuint64)address & (PAGE_SIZE-1);
	HSAuint64 aligned_addr = (HSAuint64)address - page_offset;
	HSAuint64 aligned_size = PAGE_ALIGN_UP(page_offset + size);

	if (!g_first_gpu_mem)
		return HSAKMT_STATUS_ERROR;

	s_attr = 2 * sizeof(struct kfd_ioctl_svm_attribute);
	args = alloca(sizeof(*args) + s_attr);
	args->start_addr = aligned_addr;
	args->size = aligned_size;
	args->op = KFD_IOCTL_SVM_OP_SET_ATTR;
	args->nattr = 2;
	args->attrs[0].type = coarse_grain ?
			      HSA_SVM_ATTR_CLR_FLAGS : HSA_SVM_ATTR_SET_FLAGS;
	args->attrs[0].value = HSA_SVM_FLAG_COHERENT;
	args->attrs[1].type = ext_coherent ? HSA_SVM_ATTR_SET_FLAGS : HSA_SVM_ATTR_CLR_FLAGS ;
	args->attrs[1].value = HSA_SVM_FLAG_EXT_COHERENT;
	pr_debug("Registering to SVM %p size: %ld\n", (void*)aligned_addr,
		 aligned_size);
	/* Driver does one copy_from_user, with extra attrs size */
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
		pr_debug("op set range attrs failed %s\n", strerror(errno));
		return HSAKMT_STATUS_ERROR;
	}

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS fmm_map_mem_svm_api(void *address,
					      uint64_t size,
					      uint32_t *nodes_to_map,
					      uint32_t nodes_array_size)
{
	struct kfd_ioctl_svm_args *args;
	size_t s_attr;
	uint32_t i, nattr;

	if (!g_first_gpu_mem)
		return HSAKMT_STATUS_ERROR;

	nattr = nodes_array_size;
	s_attr = sizeof(struct kfd_ioctl_svm_attribute) * nattr;
	args = alloca(sizeof(*args) + s_attr);

	args->start_addr = (uint64_t)address;
	args->size = size;
	args->op = KFD_IOCTL_SVM_OP_SET_ATTR;
	args->nattr = nattr;
	for (i = 0; i < nodes_array_size; i++) {
		args->attrs[i].type = HSA_SVM_ATTR_ACCESS_IN_PLACE;
		args->attrs[i].value = nodes_to_map[i];
	}
	/* Driver does one copy_from_user, with extra attrs size */
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
		pr_debug("op set range attrs failed %s\n", strerror(errno));
		return HSAKMT_STATUS_ERROR;
	}

	return HSAKMT_STATUS_SUCCESS;
}

/* After allocating the memory, return the vm_object created for this memory.
 * Return NULL if any failure.
 */
static vm_object_t *fmm_allocate_memory_object(uint32_t gpu_id, void *mem,
						uint64_t MemorySizeInBytes,
						manageable_aperture_t *aperture,
						uint64_t *mmap_offset,
						uint32_t ioc_flags)
{
	struct kfd_ioctl_alloc_memory_of_gpu_args args = {0};
	struct kfd_ioctl_free_memory_of_gpu_args free_args = {0};
	vm_object_t *vm_obj = NULL;
	HsaMemFlags mflags;
	uint64_t offset = 0, total_size, size;

	if (!mem)
		return NULL;

	/* Allocate memory from amdkfd */
	args.gpu_id = gpu_id;

	args.flags = ioc_flags |
		KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE;
	args.va_addr = (uint64_t)mem;
	if (!hsakmt_is_dgpu &&
	    (ioc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
		args.va_addr = VOID_PTRS_SUB(mem, aperture->base);

	/* if allocate vram-only, use an invalid VA */
	if (aperture == &mem_handle_aperture)
		args.va_addr = 0;

	total_size = 0;
	/* Split to multiple buffers, if size is too big */
	if (ioc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
		size = MemorySizeInBytes < BIGGEST_SINGLE_BUF_SIZE ?
			MemorySizeInBytes : BIGGEST_SINGLE_BUF_SIZE;
		offset = *mmap_offset;
		args.mmap_offset = *mmap_offset;
	} else {
		size = MemorySizeInBytes;
	}

	mflags = fmm_translate_ioc_to_hsa_flags(ioc_flags);

	do {
		args.size = size;

		if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, &args))
			goto err_hsakmt_ioctl_failed;

		/* Allocate object */
		if (!vm_obj) {
			pthread_mutex_lock(&aperture->fmm_mutex);
			vm_obj = aperture_allocate_object(aperture, mem, args.handle,
					MemorySizeInBytes, mflags);

			pthread_mutex_unlock(&aperture->fmm_mutex);
			if (!vm_obj)
				goto err_object_allocation_failed;

			if (mmap_offset)
				*mmap_offset = args.mmap_offset;
		} else {
			vm_obj->handles[vm_obj->handle_num++] = args.handle;
		}

		args.va_addr += size;
		offset += size;

		if (ioc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
			args.mmap_offset = offset;

		total_size += size;
		if (total_size + BIGGEST_SINGLE_BUF_SIZE > MemorySizeInBytes)
			size = MemorySizeInBytes - total_size;
		else
			size = BIGGEST_SINGLE_BUF_SIZE;
	} while (total_size < MemorySizeInBytes);

	return vm_obj;

err_object_allocation_failed:
	free_args.handle = args.handle;
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &free_args)) {
		pr_err("Failed to free GPU memory with handle: 0x%llx\n", free_args.handle);
	}
err_hsakmt_ioctl_failed:
	if (vm_obj) {
		do {
			free_args.handle = vm_obj->handles[--vm_obj->handle_num];
			if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &free_args))
				pr_err("Failed to free GPU memory with handle: 0x%llx\n", free_args.handle);
		} while (vm_obj->handle_num);
		pthread_mutex_lock(&aperture->fmm_mutex);
		vm_remove_object(aperture, vm_obj);
		pthread_mutex_unlock(&aperture->fmm_mutex);
	}
	return NULL;
}

#ifdef DEBUG_PRINT_APERTURE
static void aperture_print(aperture_t *app)
{
	pr_info("\t Base: %p\n", app->base);
	pr_info("\t Limit: %p\n", app->limit);
}

static void manageable_aperture_print(manageable_aperture_t *app)
{
	vm_area_t *cur = app->vm_ranges;
	rbtree_node_t *n = rbtree_node_any(&app->tree, LEFT);
	vm_object_t *object;

	pr_info("\t Base: %p\n", app->base);
	pr_info("\t Limit: %p\n", app->limit);
	pr_info("\t Ranges:\n");
	while (cur) {
		pr_info("\t\t Range [%p - %p]\n", cur->start, cur->end);
		cur = cur->next;
	};
	pr_info("\t Objects:\n");
	while (n) {
		object = vm_object_entry(n, 0);
		pr_info("\t\t Object [%p - %" PRIu64 "]\n",
				object->start, object->size);
		n = hsakmt_rbtree_next(&app->tree, n);
	}
}

void hsakmt_fmm_print(uint32_t gpu_id)
{
	int32_t gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);

	if (gpu_mem_id >= 0) { /* Found */
		pr_info("LDS aperture:\n");
		aperture_print(&gpu_mem[gpu_mem_id].lds_aperture);
		pr_info("GPUVM aperture:\n");
		manageable_aperture_print(&gpu_mem[gpu_mem_id].gpuvm_aperture);
		pr_info("Scratch aperture:\n");
		aperture_print(&gpu_mem[gpu_mem_id].scratch_aperture);
		pr_info("Scratch backing memory:\n");
		manageable_aperture_print(&gpu_mem[gpu_mem_id].scratch_physical);
	}

	pr_info("dGPU aperture:\n");
	manageable_aperture_print(svm.dgpu_aperture);
	pr_info("dGPU alt aperture:\n");
	if (svm.dgpu_aperture == svm.dgpu_alt_aperture)
		pr_info("\t Alias of dGPU aperture\n");
	else
		manageable_aperture_print(svm.dgpu_alt_aperture);
}
#else
void hsakmt_fmm_print(uint32_t gpu_id)
{
}
#endif

/* vm_find_object - Find a VM object in any aperture
 *
 * @addr: VM address of the object
 * @size: size of the object, 0 means "don't care",
 *        UINT64_MAX means addr can match any address within the object
 * @out_aper: Aperture where the object was found
 *
 * Returns a pointer to the object if found, NULL otherwise. If an
 * object is found, this function returns with the
 * (*out_aper)->fmm_mutex locked.
 */
static vm_object_t *vm_find_object(const void *addr, uint64_t size,
				   manageable_aperture_t **out_aper)
{
	manageable_aperture_t *aper = NULL;
	bool range = (size == UINT64_MAX);
	bool userptr = false;
	vm_object_t *obj = NULL;
	uint32_t i;

	for (i = 0; i < gpu_mem_count; i++)
		if (gpu_mem[i].gpu_id != NON_VALID_GPU_ID &&
		    addr >= gpu_mem[i].gpuvm_aperture.base &&
		    addr <= gpu_mem[i].gpuvm_aperture.limit) {
			aper = &gpu_mem[i].gpuvm_aperture;
			break;
		}

	if (!aper) {
		if ((addr >= mem_handle_aperture.base) &&
			 (addr <= mem_handle_aperture.limit)){
			 aper = &mem_handle_aperture;
		}
	}

	if (!aper) {
		if (!svm.dgpu_aperture)
			goto no_svm;

		if ((addr >= svm.dgpu_aperture->base) &&
		    (addr <= svm.dgpu_aperture->limit))
			aper = svm.dgpu_aperture;
		else if ((addr >= svm.dgpu_alt_aperture->base) &&
			 (addr <= svm.dgpu_alt_aperture->limit))
			aper = svm.dgpu_alt_aperture;
		else {
			aper = svm.dgpu_aperture;
			userptr = true;
		}
	}

	pthread_mutex_lock(&aper->fmm_mutex);
	if (range) {
		/* mmap_apertures can have userptrs in them. Try to
		 * look up addresses as userptrs first to sort out any
		 * ambiguity of multiple overlapping mappings at
		 * different GPU addresses.
		 */
		if (userptr || aper->ops == &mmap_aperture_ops)
			obj = vm_find_object_by_userptr_range(aper, addr);
		if (!obj && !userptr)
			obj = vm_find_object_by_address_range(aper, addr);
	} else {
		if (userptr || aper->ops == &mmap_aperture_ops)
			obj = vm_find_object_by_userptr(aper, addr, size);
		if (!obj && !userptr) {
			long page_offset = (long)addr & (PAGE_SIZE-1);
			const void *page_addr = (const uint8_t *)addr - page_offset;

			obj = vm_find_object_by_address(aper, page_addr, 0);
			/* If we find a userptr here, it's a match on
			 * the aligned GPU address. Make sure that the
			 * page offset and size match too.
			 */
			if (obj && obj->userptr &&
			    (((long)obj->userptr & (PAGE_SIZE - 1)) != page_offset ||
			     (size && size != obj->userptr_size)))
				obj = NULL;
		}
	}

no_svm:
	if (!obj && !hsakmt_is_dgpu) {
		/* On APUs try finding it in the CPUVM aperture */
		if (aper)
			pthread_mutex_unlock(&aper->fmm_mutex);

		aper = &cpuvm_aperture;

		pthread_mutex_lock(&aper->fmm_mutex);
		if (range)
			obj = vm_find_object_by_address_range(aper, addr);
		else
			obj = vm_find_object_by_address(aper, addr, 0);
	}

	if (obj) {
		*out_aper = aper;
		return obj;
	}

	if (aper)
		pthread_mutex_unlock(&aper->fmm_mutex);
	return NULL;
}

static HSAuint8 fmm_check_user_memory(const void *addr, HSAuint64 size)
{
	volatile const HSAuint8 *ptr = addr;
	volatile const HSAuint8 *end = ptr + size;
	HSAuint8 sum = 0;

	/* Access every page in the buffer to make sure the mapping is
	 * valid. If it's not, it will die with a segfault that's easy
	 * to debug.
	 */
	for (; ptr < end; ptr = (void *)PAGE_ALIGN_UP(ptr + 1))
		sum += *ptr;

	return sum;
}

static void fmm_release_scratch(uint32_t gpu_id)
{
	int32_t gpu_mem_id;
	uint64_t size;
	vm_object_t *obj;
	manageable_aperture_t *aperture;
	rbtree_node_t *n;

	gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
	if (gpu_mem_id < 0)
		return;

	aperture = &gpu_mem[gpu_mem_id].scratch_physical;

	size = VOID_PTRS_SUB(aperture->limit, aperture->base) + 1;

	if (hsakmt_is_dgpu) {
		/* unmap and remove all remaining objects */
		pthread_mutex_lock(&aperture->fmm_mutex);
		while ((n = rbtree_node_any(&aperture->tree, MID))) {
			obj = vm_object_entry(n, 0);

			void *obj_addr = obj->start;

			pthread_mutex_unlock(&aperture->fmm_mutex);

			_fmm_unmap_from_gpu_scratch(gpu_id, aperture, obj_addr);

			pthread_mutex_lock(&aperture->fmm_mutex);
		}
		pthread_mutex_unlock(&aperture->fmm_mutex);

		/* release address space */
		pthread_mutex_lock(&svm.dgpu_aperture->fmm_mutex);
		aperture_release_area(svm.dgpu_aperture,
				      gpu_mem[gpu_mem_id].scratch_physical.base,
				      size);
		pthread_mutex_unlock(&svm.dgpu_aperture->fmm_mutex);
	} else
		/* release address space */
		munmap(gpu_mem[gpu_mem_id].scratch_physical.base, size);

	/* invalidate scratch backing aperture */
	gpu_mem[gpu_mem_id].scratch_physical.base = NULL;
	gpu_mem[gpu_mem_id].scratch_physical.limit = NULL;
}

static uint32_t fmm_translate_hsa_to_ioc_flags(HsaMemFlags flags)
{
	uint32_t ioc_flags = 0;

	if (flags.ui32.AQLQueueMemory)
		ioc_flags |= (KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM |
			      KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED);
	if (!flags.ui32.ReadOnly)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE;
	if (flags.ui32.ExecuteAccess)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
	return ioc_flags;
}

#define SCRATCH_ALIGN 0x10000
void *hsakmt_fmm_allocate_scratch(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes)
{
	manageable_aperture_t *aperture_phy;
	struct kfd_ioctl_set_scratch_backing_va_args args = {0};
	int32_t gpu_mem_id;
	void *mem = NULL;
	uint64_t aligned_size = ALIGN_UP(MemorySizeInBytes, SCRATCH_ALIGN);

	/* Retrieve gpu_mem id according to gpu_id */
	gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
	if (gpu_mem_id < 0)
		return NULL;

	aperture_phy = &gpu_mem[gpu_mem_id].scratch_physical;
	if (aperture_phy->base || aperture_phy->limit)
		/* Scratch was already allocated for this GPU */
		return NULL;

	/* Allocate address space for scratch backing, 64KB aligned */
	if (hsakmt_is_dgpu) {
		pthread_mutex_lock(&svm.dgpu_aperture->fmm_mutex);
		mem = aperture_allocate_area_aligned(
			svm.dgpu_aperture, address,
			aligned_size, SCRATCH_ALIGN);
		pthread_mutex_unlock(&svm.dgpu_aperture->fmm_mutex);
	} else {
		if (address)
			return NULL;

		mem = hsakmt_mmap_allocate_aligned(PROT_READ | PROT_WRITE,
					    MAP_PRIVATE | MAP_ANONYMOUS,
					    aligned_size, SCRATCH_ALIGN, 0,
					    0, (void *)LONG_MAX, -1);
	}

	/* Remember scratch backing aperture for later */
	aperture_phy->base = mem;
	aperture_phy->limit = VOID_PTR_ADD(mem, aligned_size-1);
	aperture_phy->is_cpu_accessible = true;

	/* Program SH_HIDDEN_PRIVATE_BASE */
	args.gpu_id = gpu_id;
	args.va_addr = ((uint64_t)mem) >> 16;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_SCRATCH_BACKING_VA, &args)) {
		fmm_release_scratch(gpu_id);
		return NULL;
	}

	return mem;
}

static void *__fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes,
		manageable_aperture_t *aperture, uint64_t *mmap_offset,
		uint32_t ioc_flags, uint64_t alignment, vm_object_t **vm_obj)
{
	void *mem = NULL;
	vm_object_t *obj;

	/* Check that aperture is properly initialized/supported */
	if (!aperture_is_valid(aperture->base, aperture->limit))
		return NULL;

	/* Allocate address space */
	pthread_mutex_lock(&aperture->fmm_mutex);
	mem = aperture_allocate_area_aligned(aperture, address, MemorySizeInBytes, alignment);
	pthread_mutex_unlock(&aperture->fmm_mutex);

	if (!mem)
		return NULL;
	/*
	 * Now that we have the area reserved, allocate memory in the device
	 * itself
	 */
	obj = fmm_allocate_memory_object(gpu_id, mem,
			MemorySizeInBytes, aperture, mmap_offset, ioc_flags);
	if (!obj) {
		/*
		 * allocation of memory in device failed.
		 * Release region in aperture
		 */
		pthread_mutex_lock(&aperture->fmm_mutex);
		aperture_release_area(aperture, mem, MemorySizeInBytes);
		pthread_mutex_unlock(&aperture->fmm_mutex);

		/* Assign NULL to mem to indicate failure to calling function */
		mem = NULL;
	}
	if (vm_obj)
		*vm_obj = obj;

	return mem;
}

static void *fmm_map_to_cpu(void *mem, uint64_t size, bool host_access,
			    int fd, uint64_t mmap_offset) {
	int flag = MAP_SHARED | MAP_FIXED;
	int prot = host_access ? PROT_READ | PROT_WRITE : PROT_NONE;
	void *ret = mmap(mem, size, prot, flag, fd, mmap_offset);

	if (ret != MAP_FAILED)
		/* This madvise() call is needed to avoid additional references
		 * to mapped BOs in child processes that can prevent freeing
		 * memory in the parent process and lead to out-of-memory
		 * conditions.
		 */
		madvise(mem, size, MADV_DONTFORK);

	return ret;
}

static void *fmm_allocate_va(uint32_t gpu_id, void *address, uint64_t size,
			manageable_aperture_t *aperture, uint64_t alignment, HsaMemFlags mflags)
{
	void *mem = NULL;
	vm_object_t *vm_obj = NULL;

	/* Check aperture is properly initialized/supported */
	if (!aperture_is_valid(aperture->base, aperture->limit))
		return NULL;

	/* Allocate address space */
	pthread_mutex_lock(&aperture->fmm_mutex);
	mem = aperture_allocate_area_aligned(aperture, address, size, alignment);

	if (mem) {
		/* Assign handle 0 to vm_obj since no memory allocated yet */
		vm_obj = aperture_allocate_object(aperture, mem, 0, size, mflags);
		if (!vm_obj) {
			aperture_release_area(aperture, mem, size);
			mem = NULL;
		}
		/* Set node_id to 0 for OnlyAddress */
		vm_obj->node_id = 0;
	}

	pthread_mutex_unlock(&aperture->fmm_mutex);

	return mem;
}

/* use udmabuf driver to allocate buf */
static void* udmabuf_allocation(uint32_t gpu_id, uint32_t node_id, uint64_t size,
                               manageable_aperture_t *aperture, uint64_t alignment,
                               HsaMemFlags mflags, vm_object_t** vm_obj)
{
	struct kfd_ioctl_import_dmabuf_args importArgs = {0};
	int memfd, dmabuf_fd;
	long long node_size, free_size;
	struct udmabuf_create create;
	uint64_t alignment_size;
	uint32_t numa_node_id;
	uint64_t guard_size;
	void *mem;
	int ret;

	dmabuf_fd = -1;
	memfd = -1;

	*vm_obj = NULL;

	memfd = memfd_create("thunk_memfd", MFD_ALLOW_SEALING);
	if (memfd == -1) {
		pr_debug("running kernel does not support memfd\n");
		return NULL;
	}

	if (ftruncate(memfd, size) == -1) {
		pr_debug("ftruncate fail\n");
		goto error_release_memfd;
	}
	pr_debug("PID: %jd; fd: %d; /proc/%jd/fd/%d\n",
               (intmax_t) getpid(), memfd, (intmax_t) getpid(), memfd);

	if (fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW) < 0) {
		pr_debug("fcntl fail %s\n", strerror(errno));
		goto error_release_memfd;
	}

	alignment_size = PAGE_SIZE << svm.alignment_order;
	alignment = alignment ? alignment : aperture->align;
	while (alignment < alignment_size && size >= (alignment << 1))
		alignment <<= 1;

	guard_size = (uint64_t)aperture->guard_pages * PAGE_SIZE;

	mem = hsakmt_mmap_allocate_aligned(PROT_WRITE | PROT_READ, MAP_NORESERVE | MAP_SHARED,
					  size, alignment, guard_size, aperture->base, aperture->limit, memfd);
	if (!mem)
		goto error_release_memfd;

	/* set madvise flags to HUGEPAGE if allocate more than 2MB */
	if (size >= (2 * 1024 * 1024))
		madvise(mem, size, MADV_HUGEPAGE);

	/* always bind to numa node */
	mflags.ui32.NoSubstitute = 1;
	/* Bind to NUMA node */
	/* node_id is gpu id, get closed numa id */
	numa_node_id = hsakmt_get_direct_link_cpu(node_id);
	if (bind_mem_to_numa(numa_node_id, mem, size, mflags))
		goto error_release_aperture;

	node_size = numa_node_size64(numa_node_id, &free_size);
	pr_debug("udmabuf_allocation: numa_node_id %d, node_size %lld, free_size %lld\n",
		numa_node_id, node_size, free_size);
	/* compare free size at numa_node_id with size */
	if ((uint64_t)free_size < size) {
		pr_debug("udmabuf_allocation: has no enough ram on numa_node_id %d, node_size %lld, free_size %lld\n",
			numa_node_id, node_size, free_size);
		goto error_release_aperture;
	}

	create.memfd = memfd;
	create.flags = UDMABUF_FLAGS_CLOEXEC;
	create.offset = 0;
	create.size = size;
	dmabuf_fd = ioctl(hsakmt_udmabuf_dev_fd, UDMABUF_CREATE, &create);

	if (dmabuf_fd < 0) {
		pr_debug("ioctl UDMABUF_CREATE failed\n");
		goto error_release_aperture;
	}

	importArgs.va_addr = (uint64_t)mem;
	importArgs.gpu_id = gpu_id;
	importArgs.dmabuf_fd = dmabuf_fd;

	ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_IMPORT_DMABUF, (void *)&importArgs);
	if (ret) {
		pr_debug("ioctl AMDKFD_IOC_IMPORT_DMABUF failed\n, ret 0x%x", ret);
		goto error_release_dmabuf;
	}

	/* Allocate object */
	pthread_mutex_lock(&aperture->fmm_mutex);
	*vm_obj = aperture_allocate_object(aperture, mem, importArgs.handle,
                                          size, mflags);
	pthread_mutex_unlock(&aperture->fmm_mutex);

	if (*vm_obj == NULL)
		goto error_release_dmabuf;

	/* after import udmabuf into kfd driver close dmabuf_fd
	 * as kfd driver holds the dmabuf
	 */
	close(dmabuf_fd);
	close(memfd);

	return mem;

error_release_dmabuf:
	close(dmabuf_fd);
error_release_aperture:
	aperture_release_area(aperture, mem, size);
error_release_memfd:
	close(memfd);

	return NULL;
}

void *hsakmt_fmm_allocate_device(uint32_t gpu_id, uint32_t node_id, void *address,
			  uint64_t MemorySizeInBytes, uint64_t alignment, HsaMemFlags mflags)
{
	manageable_aperture_t *aperture;
	int32_t gpu_mem_id;
	uint32_t ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_VRAM;
	uint64_t size, mmap_offset;
	void *mem;
	vm_object_t *vm_obj = NULL;

	/* Retrieve gpu_mem id according to gpu_id */
	gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
	if (gpu_mem_id < 0)
		return NULL;

	size = MemorySizeInBytes;

	if (mflags.ui32.HostAccess)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;

	ioc_flags |= fmm_translate_hsa_to_ioc_flags(mflags);

	if (hsakmt_topology_is_svm_needed(gpu_mem[gpu_mem_id].EngineId)) {
		aperture = svm.dgpu_aperture;
		if (mflags.ui32.AQLQueueMemory)
			size = MemorySizeInBytes * 2;
	} else {
		aperture = &gpu_mem[gpu_mem_id].gpuvm_aperture;
	}

	/* special case for va allocation without vram alloc */
	if (mflags.ui32.OnlyAddress)
		return fmm_allocate_va(gpu_id, address, size, aperture, alignment, mflags);

	/* special case for vram allocation without addr */
	if(mflags.ui32.NoAddress)
		aperture = &mem_handle_aperture;

	if (!mflags.ui32.CoarseGrain || svm.disable_cache)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;

	if (mflags.ui32.Uncached || svm.disable_cache)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;

	if (mflags.ui32.ExtendedCoherent)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT;

	if (mflags.ui32.Contiguous)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT;

	mem = NULL;
	if (hsakmt_udmabuf_dev_fd > 0 && aperture == svm.dgpu_aperture && !hsakmt_is_dgpu
		 && aperture->ops == &mmap_aperture_ops) {
		mem  = udmabuf_allocation(gpu_id, node_id, size, aperture, alignment,
                                        mflags, &vm_obj);
		pr_debug("udmabuf_allocation mem %p\n", mem);
		if (!mem)
			pr_debug("udmabuf_allocation allocation fail\n");
	}

	/* env HSA_USE_UDMABUF not set, or not apu, or cannot use udmabuf,
	 * fall back to use device driver to allocate memory
	 */
	if (!mem) {
		mem = __fmm_allocate_device(gpu_id, address, size, aperture, &mmap_offset,
					   ioc_flags, alignment, &vm_obj);

		/* if alloc vram-only not mmap to cpu vm since no va */
		if (mem && !mflags.ui32.NoAddress) {
			void *ret = fmm_map_to_cpu(mem, MemorySizeInBytes,
					   mflags.ui32.HostAccess,
					   gpu_mem[gpu_mem_id].drm_render_fd,
					   mmap_offset);

			if (ret == MAP_FAILED) {
				__fmm_release(vm_obj, aperture);
				return NULL;
			}
#ifdef SANITIZER_AMDGPU
			if (vm_obj) {
				vm_obj->mmap_flags = mflags.ui32.HostAccess ? PROT_READ | PROT_WRITE : PROT_NONE;
				vm_obj->mmap_fd = gpu_mem[gpu_mem_id].drm_render_fd;
				vm_obj->mmap_offset = mmap_offset;
			}
#endif
		}
	}

	if (mem && vm_obj) {
		pthread_mutex_lock(&aperture->fmm_mutex);
		/* Store memory allocation flags, not ioc flags */
		 vm_obj->mflags = mflags;
		 hsakmt_gpuid_to_nodeid(gpu_id, &vm_obj->node_id);
		 pthread_mutex_unlock(&aperture->fmm_mutex);

	}

	return mem;
}

void *hsakmt_fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes,
			    uint64_t doorbell_mmap_offset)
{
	manageable_aperture_t *aperture;
	int32_t gpu_mem_id;
	uint32_t ioc_flags;
	void *mem;
	vm_object_t *vm_obj = NULL;

	/* Retrieve gpu_mem id according to gpu_id */
	gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
	if (gpu_mem_id < 0)
		return NULL;

	/* Use fine-grained aperture */
	aperture = svm.dgpu_alt_aperture;
	ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
		    KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
		    KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;

	mem = __fmm_allocate_device(gpu_id, NULL, MemorySizeInBytes, aperture, NULL,
				    ioc_flags, 0, &vm_obj);

	if (mem && vm_obj) {
		HsaMemFlags mflags;

		/* Cook up some flags for storing in the VM object */
		mflags.Value = 0;
		mflags.ui32.NonPaged = 1;
		mflags.ui32.HostAccess = 1;

		pthread_mutex_lock(&aperture->fmm_mutex);
		vm_obj->mflags = mflags;
		hsakmt_gpuid_to_nodeid(gpu_id, &vm_obj->node_id);
		pthread_mutex_unlock(&aperture->fmm_mutex);
	}

	if (mem) {
		void *ret = mmap(mem, MemorySizeInBytes,
				 PROT_READ | PROT_WRITE,
				 MAP_SHARED | MAP_FIXED, hsakmt_kfd_fd,
				 doorbell_mmap_offset);
		if (ret == MAP_FAILED) {
			__fmm_release(vm_obj, aperture);
			return NULL;
		}
	}

	return mem;
}

static void *fmm_allocate_host_cpu(void *address, uint64_t MemorySizeInBytes,
				HsaMemFlags mflags)
{
	void *mem = NULL;
	vm_object_t *vm_obj;
	int mmap_prot = PROT_READ;

	if (address)
		return NULL;

	if (mflags.ui32.ExecuteAccess)
		mmap_prot |= PROT_EXEC;

	if (!mflags.ui32.ReadOnly)
		mmap_prot |= PROT_WRITE;

	/* mmap will return a pointer with alignment equal to
	 * sysconf(_SC_PAGESIZE).
	 */
	mem = mmap(NULL, MemorySizeInBytes, mmap_prot,
			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);

	if (mem == MAP_FAILED)
		return NULL;

	pthread_mutex_lock(&cpuvm_aperture.fmm_mutex);
	vm_obj = aperture_allocate_object(&cpuvm_aperture, mem, 0,
				      MemorySizeInBytes, mflags);
	if (vm_obj)
		vm_obj->node_id = 0; /* APU systems only have one CPU node */
	pthread_mutex_unlock(&cpuvm_aperture.fmm_mutex);

	return mem;
}

static int bind_mem_to_numa(uint32_t numa_node_id, void *mem,
			    uint64_t SizeInBytes, HsaMemFlags mflags)
{
	int mode = MPOL_F_STATIC_NODES;
	struct bitmask *node_mask;
	int num_node;
	long r;

	pr_debug("%s mem %p flags 0x%x size 0x%lx node_id %d\n", __func__,
		mem, mflags.Value, SizeInBytes, numa_node_id);

	if (mflags.ui32.NoNUMABind || numa_available() == -1) {
		/* but need bind to a numa node */
		if (mflags.ui32.NoSubstitute)
			return -EFAULT;
		else
			return 0;
	}

	num_node = numa_max_node() + 1;

	/* Ignore binding requests to invalid nodes IDs */
	if (numa_node_id >= (unsigned)num_node || numa_node_id == INVALID_NODEID || num_node <= 1) {
		pr_warn("numa_node_id is out range: numa_node_id %d, num_node %d\n", numa_node_id, num_node);
		if (mflags.ui32.NoSubstitute)
			return -EFAULT;
		else
			return 0;
	}

	node_mask = numa_bitmask_alloc(num_node);
	if (!node_mask)
		return -ENOMEM;

#ifdef __PPC64__
	numa_bitmask_setbit(node_mask, numa_node_id * 8);
#else
	numa_bitmask_setbit(node_mask, numa_node_id);
#endif

	mode |= mflags.ui32.NoSubstitute ? MPOL_BIND : MPOL_PREFERRED;
	r = mbind(mem, SizeInBytes, mode, node_mask->maskp, num_node + 1, 0);
	numa_bitmask_free(node_mask);

	if (r) {
		/* If applcation is running inside docker, still return
		 * ok because docker seccomp blocks mbind by default,
		 * otherwise application cannot allocate system memory.
		 */
		if (errno == EPERM) {
			pr_err_once("mbind is blocked by seccomp\n");

			return 0;
		}

		/* Ignore mbind failure if no memory available on node */
		if (!mflags.ui32.NoSubstitute)
			return 0;

		pr_warn_once("Failed to set NUMA policy for %p: %s\n", mem,
			     strerror(errno));

		return -EFAULT;
	}

	return 0;
}

static void *fmm_allocate_host_gpu(uint32_t gpu_id, uint32_t node_id, void *address,
				   uint64_t MemorySizeInBytes, uint64_t alignment, HsaMemFlags mflags)
{
	manageable_aperture_t *aperture;
	vm_object_t *vm_obj = NULL;
	int flags = MADV_DONTFORK;
	uint64_t mmap_offset;
	int32_t gpu_drm_fd;
	uint32_t ioc_flags;
	uint32_t preferred_gpu_id;
	int gpu_mem_id = 0; /* default to g_first_gpu_mem */
	uint64_t size;
	void *mem;

	/* set madvise flags to HUGEPAGE always for 2MB pages */
	if (MemorySizeInBytes >= (2 * 1024 * 1024))
		flags |= MADV_HUGEPAGE;


	if (!g_first_gpu_mem)
		return NULL;

	if (gpu_id) {
		gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
		if (gpu_mem_id < 0)
			return NULL;
	}

	preferred_gpu_id = gpu_mem[gpu_mem_id].gpu_id;
	gpu_drm_fd = gpu_mem[gpu_mem_id].drm_render_fd;

	size = MemorySizeInBytes;
	ioc_flags = 0;
	if (mflags.ui32.CoarseGrain)
		aperture = svm.dgpu_aperture;
	else
		aperture = svm.dgpu_alt_aperture; /* always coherent */

	if (!mflags.ui32.CoarseGrain || svm.disable_cache)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;

	if (mflags.ui32.Uncached || svm.disable_cache)
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;

	ioc_flags |= fmm_translate_hsa_to_ioc_flags(mflags);

	if (mflags.ui32.AQLQueueMemory)
		size = MemorySizeInBytes * 2;

	/* special case for va allocation without real memory alloc */
	if (mflags.ui32.OnlyAddress)
		return fmm_allocate_va(gpu_id, address, size, aperture, alignment, mflags);

	/* Paged memory is allocated as a userptr mapping, non-paged
	 * memory is allocated from KFD
	 */
	if (!mflags.ui32.NonPaged && svm.userptr_for_paged_mem) {
		/* Allocate address space */
		pthread_mutex_lock(&aperture->fmm_mutex);
		mem = aperture_allocate_area_aligned(aperture, address, size, alignment);
		pthread_mutex_unlock(&aperture->fmm_mutex);
		if (!mem)
			return NULL;

		/* Map anonymous pages */
		if (mmap(mem, MemorySizeInBytes, PROT_READ | PROT_WRITE,
			 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0)
		    == MAP_FAILED)
			goto out_release_area;

		/* Bind to NUMA node */
		if (bind_mem_to_numa(node_id, mem, MemorySizeInBytes, mflags))
			goto out_release_area;

		/* Mappings in the DGPU aperture don't need to be copied on
		 * fork. This avoids MMU notifiers and evictions due to user
		 * memory mappings on fork.
		 */
		madvise(mem, MemorySizeInBytes, flags);

		/* Create userptr BO */
		mmap_offset = (uint64_t)mem;
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_USERPTR;
		vm_obj = fmm_allocate_memory_object(preferred_gpu_id, mem, size,
						       aperture, &mmap_offset,
						       ioc_flags);
		if (!vm_obj)
			goto out_release_area;
	} else {
		ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_GTT;
		mem =  __fmm_allocate_device(preferred_gpu_id, address, size, aperture,
					     &mmap_offset, ioc_flags, alignment, &vm_obj);

		if (mem && mflags.ui32.HostAccess) {
			void *ret = fmm_map_to_cpu(mem, MemorySizeInBytes,
						   mflags.ui32.HostAccess,
						   gpu_drm_fd, mmap_offset);

			if (ret == MAP_FAILED) {
				__fmm_release(vm_obj, aperture);
				return NULL;
			}
		}
    }

#ifdef SANITIZER_AMDGPU
		if (mem && vm_obj) {
			vm_obj->mmap_flags = mflags.ui32.HostAccess ? PROT_READ | PROT_WRITE : PROT_NONE;
			vm_obj->mmap_fd = gpu_drm_fd;
			vm_obj->mmap_offset = mmap_offset;
		}
#endif

	if (mem && vm_obj) {
		/* Store memory allocation flags, not ioc flags */
		pthread_mutex_lock(&aperture->fmm_mutex);
		vm_obj->mflags = mflags;
		vm_obj->node_id = node_id;
		pthread_mutex_unlock(&aperture->fmm_mutex);
	}

	return mem;

out_release_area:
	/* Release address space */
	pthread_mutex_lock(&aperture->fmm_mutex);
	if (mem) {
		aperture_release_area(aperture, mem, size);
	}
	pthread_mutex_unlock(&aperture->fmm_mutex);

	return NULL;
}

void *hsakmt_fmm_allocate_host(uint32_t gpu_id, uint32_t node_id, void *address,
			uint64_t MemorySizeInBytes, uint64_t alignment, HsaMemFlags mflags)
{
	if (hsakmt_is_dgpu)
		return fmm_allocate_host_gpu(gpu_id, node_id, address, MemorySizeInBytes, alignment, mflags);

	if (alignment) {//Alignment not supported on non-dgpu
		pr_err("Non-default alignment not supported on non-dgpu\n");
		return NULL;
	}

	return fmm_allocate_host_cpu(address, MemorySizeInBytes, mflags);
}

static int __fmm_release(vm_object_t *object, manageable_aperture_t *aperture)
{
	struct kfd_ioctl_free_memory_of_gpu_args args = {0};
	int ret = 0;
	uint32_t i;

	if (!object)
		return -EINVAL;

	pthread_mutex_lock(&aperture->fmm_mutex);

	if (object->userptr) {
		object->registration_count--;
		if (object->registration_count > 0) {
			pthread_mutex_unlock(&aperture->fmm_mutex);
			return 0;
		}
	}

	/* If memory is user memory and it's still GPU mapped, munmap
	 * would cause an eviction. If the restore happens quickly
	 * enough, restore would also fail with an error message. So
	 * free the BO before unmapping the pages.
	 */
	for (i = 0; i < object->handle_num; i++) {
		args.handle = object->handles[i];
		if (args.handle == 0)
			continue;
		if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &args))
			ret = -errno;
	}

	if (ret)
		goto err_free_mem_failed;

	aperture_release_area(aperture, object->start, object->size);
	vm_remove_object(aperture, object);

err_free_mem_failed:
	pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}

HSAKMT_STATUS hsakmt_fmm_release(void *address)
{
	manageable_aperture_t *aperture = NULL;
	vm_object_t *object = NULL;
	gpu_mem_t *gpu_mem_ptr = NULL;

	/* Special handling for scratch memory */
	gpu_mem_ptr = fmm_is_scratch_aperture(address);
	if (gpu_mem_ptr) {
		fmm_release_scratch(gpu_mem_ptr->gpu_id);
		return HSAKMT_STATUS_SUCCESS;
	}

	object = vm_find_object(address, 0, &aperture);

	if (!object)
		return hsakmt_is_svm_api_supported ?
			HSAKMT_STATUS_SUCCESS :
			HSAKMT_STATUS_MEMORY_NOT_REGISTERED;

	if (aperture == &cpuvm_aperture) {
		/* APU system memory */
		uint64_t size = 0;

		size = object->size;
		vm_remove_object(&cpuvm_aperture, object);
		pthread_mutex_unlock(&aperture->fmm_mutex);
		munmap(address, size);
	} else {
		pthread_mutex_unlock(&aperture->fmm_mutex);

		if (__fmm_release(object, aperture))
			return HSAKMT_STATUS_ERROR;
	}

	return HSAKMT_STATUS_SUCCESS;
}

static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_policy,
				 uintptr_t alt_base, uint64_t alt_size,
				 uint32_t misc_process_flags)
{
	struct kfd_ioctl_set_memory_policy_args args = {0};

	args.gpu_id = gpu_id;
	args.default_policy = default_policy;
	args.alternate_policy = alt_policy;
	args.alternate_aperture_base = alt_base;
	args.alternate_aperture_size = alt_size;
	args.misc_process_flag = misc_process_flags;

	return hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
}

static uint32_t get_vm_alignment(uint32_t device_id)
{
	int page_size = 0;

	if (device_id >= 0x6920 && device_id <= 0x6939) /* Tonga */
		page_size = TONGA_PAGE_SIZE;
	else if (device_id >= 0x9870 && device_id <= 0x9877) /* Carrizo */
		page_size = TONGA_PAGE_SIZE;

	return MAX(PAGE_SIZE, page_size);
}

static HSAKMT_STATUS get_process_apertures(
	struct kfd_process_device_apertures *process_apertures,
	uint32_t *num_of_nodes)
{
	struct kfd_ioctl_get_process_apertures_new_args args_new = {0};
	struct kfd_ioctl_get_process_apertures_args args_old;

	args_new.kfd_process_device_apertures_ptr = (uintptr_t)process_apertures;
	args_new.num_of_nodes = *num_of_nodes;
	if (!hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
		      (void *)&args_new)) {
		*num_of_nodes = args_new.num_of_nodes;
		return HSAKMT_STATUS_SUCCESS;
	}

	/* New IOCTL failed, try the old one in case we're running on
	 * a really old kernel */
	memset(&args_old, 0, sizeof(args_old));

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES,
		     (void *)&args_old))
		return HSAKMT_STATUS_ERROR;

	if (args_old.num_of_nodes < *num_of_nodes)
		*num_of_nodes = args_old.num_of_nodes;

	memcpy(process_apertures, args_old.process_apertures,
	       sizeof(*process_apertures) * *num_of_nodes);

	return HSAKMT_STATUS_SUCCESS;
}

/* The VMs from DRM render nodes are used by KFD for the lifetime of
 * the process. Therefore we have to keep using the same FDs for the
 * lifetime of the process, even when we close and reopen KFD. There
 * are up to 128 render nodes that we cache in this array.
 */
#define DRM_FIRST_RENDER_NODE 128
#define DRM_LAST_RENDER_NODE 255
static int drm_render_fds[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE];

/* amdgpu device handle for each gpu that libdrm uses */
static struct amdgpu_device *amdgpu_handle[DRM_LAST_RENDER_NODE + 1 - DRM_FIRST_RENDER_NODE];

int hsakmt_open_drm_render_device(int minor)
{
	char path[128];
	int index, fd;
	uint32_t major_drm, minor_drm;
	struct amdgpu_device **device_handle;

	/* Bypass amdgpu if we're running a model. Return hsakmt_kfd_fd, which is the
	 * backing for all our "GPU" memory. */
	if (hsakmt_use_model)
		return hsakmt_kfd_fd;

	if (minor < DRM_FIRST_RENDER_NODE || minor > DRM_LAST_RENDER_NODE) {
		pr_err("DRM render minor %d out of range [%d, %d]\n", minor,
		       DRM_FIRST_RENDER_NODE, DRM_LAST_RENDER_NODE);
		return -EINVAL;
	}
	index = minor - DRM_FIRST_RENDER_NODE;

	/* If the render node was already opened, keep using the same FD */
	if (drm_render_fds[index])
		return drm_render_fds[index];

	sprintf(path, "/dev/dri/renderD%d", minor);
	fd = open(path, O_RDWR | O_CLOEXEC);
	if (fd < 0) {
		if (errno != ENOENT && errno != EPERM) {
			pr_err("Failed to open %s: %s\n", path, strerror(errno));
			if (errno == EACCES)
				pr_info("Check user is in \"video\" group\n");
		}
		return -errno;
	}
	drm_render_fds[index] = fd;

	device_handle = &amdgpu_handle[index];
	if (!amdgpu_device_initialize(fd, &major_drm, &minor_drm, device_handle)) {
		/* if amdgpu_device_get_fd available query render fd that libdrm uses,
		 * then close drm_render_fds above, replace it by fd libdrm uses.
		 */
		if (hsakmt_fn_amdgpu_device_get_fd) {
			fd = hsakmt_fn_amdgpu_device_get_fd(*device_handle);
			if (fd > 0) {
				close(drm_render_fds[index]);
				drm_render_fds[index] = fd;
			} else {
				pr_err("amdgpu_device_get_fd failed: %d\n", fd);
				amdgpu_device_deinitialize(*device_handle);
				*device_handle = 0;
			}
		}
	}

	return fd;
}

static HSAKMT_STATUS acquire_vm(uint32_t gpu_id, int fd)
{
	struct kfd_ioctl_acquire_vm_args args;

	args.gpu_id = gpu_id;
	args.drm_fd = fd;
	pr_info("acquiring VM for %x using %d\n", gpu_id, fd);
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_ACQUIRE_VM, (void *)&args)) {
		pr_err("AMDKFD_IOC_ACQUIRE_VM failed\n");
		return HSAKMT_STATUS_ERROR;
	}

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS init_mmap_apertures(HSAuint64 base, HSAuint64 limit,
					 HSAuint32 align, HSAuint32 guard_pages)
{
	void *addr;

	if (align > (HSAuint32)PAGE_SIZE) {
		/* This should never happen. Alignment constraints
		 * only apply to old GPUs that don't support 48-bit
		 * virtual addresses.
		 */
		pr_info("Falling back to reserved SVM apertures due to alignment constraints.\n");
		return HSAKMT_STATUS_ERROR;
	}

	/* Set up one SVM aperture */
	svm.apertures[SVM_DEFAULT].base  = (void *)base;
	svm.apertures[SVM_DEFAULT].limit = (void *)limit;
	svm.apertures[SVM_DEFAULT].align = align;
	svm.apertures[SVM_DEFAULT].guard_pages = guard_pages;
	svm.apertures[SVM_DEFAULT].is_cpu_accessible = true;
	svm.apertures[SVM_DEFAULT].ops = &mmap_aperture_ops;

	svm.apertures[SVM_COHERENT].base = svm.apertures[SVM_COHERENT].limit =
		NULL;

	/* Try to allocate one page. If it fails, we'll fall back to
	 * managing our own reserved address range.
	 */
	addr = aperture_allocate_area(&svm.apertures[SVM_DEFAULT], NULL, PAGE_SIZE);
	if (addr) {
		aperture_release_area(&svm.apertures[SVM_DEFAULT], addr,
				      PAGE_SIZE);

		svm.dgpu_aperture = svm.dgpu_alt_aperture =
			&svm.apertures[SVM_DEFAULT];
		pr_info("Initialized unreserved SVM apertures: %p - %p\n",
			svm.apertures[SVM_DEFAULT].base,
			svm.apertures[SVM_DEFAULT].limit);
	} else {
		pr_info("Failed to allocate unreserved SVM address space.\n");
		pr_info("Falling back to reserved SVM apertures.\n");
	}

	return addr ? HSAKMT_STATUS_SUCCESS : HSAKMT_STATUS_ERROR;
}

static void *reserve_address(void *addr, unsigned long long int len)
{
	void *ret_addr;

	if (len <= 0)
		return NULL;

	ret_addr = mmap(addr, len, PROT_NONE,
				 MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
	if (ret_addr == MAP_FAILED)
		return NULL;

	return ret_addr;
}

/* Managed SVM aperture limits: only reserve up to 40 bits (1TB, what
 * GFX8 supports). Need to find at least 4GB of usable address space.
 */
#define SVM_RESERVATION_LIMIT ((1ULL << 40) - 1)
#define SVM_MIN_VM_SIZE (4ULL << 30)
#define IS_CANONICAL_ADDR(a) ((a) < (1ULL << 47))

static HSAKMT_STATUS init_svm_apertures(HSAuint64 base, HSAuint64 limit,
					HSAuint32 align, HSAuint32 guard_pages)
{
	const HSAuint64 ADDR_INC = GPU_HUGE_PAGE_SIZE;
	HSAuint64 len, map_size, alt_base, alt_size;
	bool found = false;
	void *addr, *ret_addr = NULL;

	/* If we already have an SVM aperture initialized (from a
	 * parent process), keep using it
	 */
	if (dgpu_shared_aperture_limit)
		return HSAKMT_STATUS_SUCCESS;

	/* Align base and limit to huge page size */
	base = ALIGN_UP(base, GPU_HUGE_PAGE_SIZE);
	limit = ((limit + 1) & ~(HSAuint64)(GPU_HUGE_PAGE_SIZE - 1)) - 1;

	/* If the limit is greater or equal 47-bits of address space,
	 * it means we have GFXv9 or later GPUs only. We don't need
	 * apertures to determine the MTYPE and the virtual address
	 * space of the GPUs covers the full CPU address range (on
	 * x86_64) or at least mmap is unlikely to run out of
	 * addresses the GPUs can handle.
	 */
	if (limit >= (1ULL << 47) - 1 && !svm.reserve_svm) {
		HSAKMT_STATUS status = init_mmap_apertures(base, limit, align,
							   guard_pages);

		if (status == HSAKMT_STATUS_SUCCESS)
			return status;
		/* fall through: fall back to reserved address space */
	}

	if (limit > SVM_RESERVATION_LIMIT)
		limit = SVM_RESERVATION_LIMIT;
	if (base >= limit) {
		pr_err("No SVM range compatible with all GPU and software constraints\n");
		return HSAKMT_STATUS_ERROR;
	}

	/* Try to reserve address space for SVM.
	 *
	 * Inner loop: try start addresses in huge-page increments up
	 * to half the VM size we're trying to reserve
	 *
	 * Outer loop: reduce size of the allocation by factor 2 at a
	 * time and print a warning for every reduction
	 */
	for (len = limit - base + 1; !found && len >= SVM_MIN_VM_SIZE;
	     len = (len + 1) >> 1) {
		for (addr = (void *)base; (HSAuint64)addr + ((len + 1) >> 1) - 1 <= limit;
		     addr = (void *)((HSAuint64)addr + ADDR_INC)) {
			HSAuint64 top = MIN((HSAuint64)addr + len, limit+1);

			map_size = (top - (HSAuint64)addr) &
				~(HSAuint64)(PAGE_SIZE - 1);
			if (map_size < SVM_MIN_VM_SIZE)
				break;

			ret_addr = reserve_address(addr, map_size);
			if (!ret_addr)
				break;
			if ((HSAuint64)ret_addr + ((len + 1) >> 1) - 1 <= limit)
				/* At least half the returned address
				 * space is GPU addressable, we'll
				 * take it
				 */
				break;
			munmap(ret_addr, map_size);
			ret_addr = NULL;
		}
		if (!ret_addr) {
			pr_warn("Failed to reserve %uGB for SVM ...\n",
				(unsigned int)(len >> 30));
			continue;
		}
		if ((HSAuint64)ret_addr + SVM_MIN_VM_SIZE - 1 > limit) {
			/* addressable size is less than the minimum */
			pr_warn("Got %uGB for SVM at %p with only %dGB usable ...\n",
				(unsigned int)(map_size >> 30), ret_addr,
				(int)((limit - (HSAint64)ret_addr) >> 30));
			munmap(ret_addr, map_size);
			ret_addr = NULL;
			continue;
		} else {
			found = true;
			break;
		}
	}

	if (!found) {
		pr_err("Failed to reserve SVM address range. Giving up.\n");
		return HSAKMT_STATUS_ERROR;
	}

	base = (HSAuint64)ret_addr;
	if (base + map_size - 1 > limit)
		/* trim the tail that's not GPU-addressable */
		munmap((void *)(limit + 1), base + map_size - 1 - limit);
	else
		limit = base + map_size - 1;

	/* init two apertures for non-coherent and coherent memory */
	svm.apertures[SVM_DEFAULT].base  = dgpu_shared_aperture_base  = ret_addr;
	svm.apertures[SVM_DEFAULT].limit = dgpu_shared_aperture_limit = (void *)limit;
	svm.apertures[SVM_DEFAULT].align = align;
	svm.apertures[SVM_DEFAULT].guard_pages = guard_pages;
	svm.apertures[SVM_DEFAULT].is_cpu_accessible = true;
	svm.apertures[SVM_DEFAULT].ops = &reserved_aperture_ops;

	/* Use the first 1/4 of the dGPU aperture as
	 * alternate aperture for coherent access.
	 * Base and size must be 64KB aligned.
	 */
	alt_base = (HSAuint64)svm.apertures[SVM_DEFAULT].base;
	alt_size = (VOID_PTRS_SUB(svm.apertures[SVM_DEFAULT].limit,
				  svm.apertures[SVM_DEFAULT].base) + 1) >> 2;
	alt_base = (alt_base + 0xffff) & ~0xffffULL;
	alt_size = (alt_size + 0xffff) & ~0xffffULL;
	svm.apertures[SVM_COHERENT].base = (void *)alt_base;
	svm.apertures[SVM_COHERENT].limit = (void *)(alt_base + alt_size - 1);
	svm.apertures[SVM_COHERENT].align = align;
	svm.apertures[SVM_COHERENT].guard_pages = guard_pages;
	svm.apertures[SVM_COHERENT].is_cpu_accessible = true;
	svm.apertures[SVM_COHERENT].ops = &reserved_aperture_ops;

	svm.apertures[SVM_DEFAULT].base = VOID_PTR_ADD(svm.apertures[SVM_COHERENT].limit, 1);

	pr_info("SVM alt (coherent): %12p - %12p\n",
		svm.apertures[SVM_COHERENT].base, svm.apertures[SVM_COHERENT].limit);
	pr_info("SVM (non-coherent): %12p - %12p\n",
		svm.apertures[SVM_DEFAULT].base, svm.apertures[SVM_DEFAULT].limit);

	svm.dgpu_aperture = &svm.apertures[SVM_DEFAULT];
	svm.dgpu_alt_aperture = &svm.apertures[SVM_COHERENT];

	return HSAKMT_STATUS_SUCCESS;
}

static void fmm_init_rbtree(void)
{
	static int once;
	int i = gpu_mem_count;

	if (once++ == 0) {
		rbtree_init(&svm.apertures[SVM_DEFAULT].tree);
		rbtree_init(&svm.apertures[SVM_DEFAULT].user_tree);
		rbtree_init(&svm.apertures[SVM_COHERENT].tree);
		rbtree_init(&svm.apertures[SVM_COHERENT].user_tree);
		rbtree_init(&cpuvm_aperture.tree);
		rbtree_init(&cpuvm_aperture.user_tree);
		rbtree_init(&mem_handle_aperture.tree);
		rbtree_init(&mem_handle_aperture.user_tree);
	}

	while (i--) {
		rbtree_init(&gpu_mem[i].scratch_physical.tree);
		rbtree_init(&gpu_mem[i].scratch_physical.user_tree);
		rbtree_init(&gpu_mem[i].gpuvm_aperture.tree);
		rbtree_init(&gpu_mem[i].gpuvm_aperture.user_tree);
	}
}

static void *map_mmio(uint32_t node_id, uint32_t gpu_id, int mmap_fd)
{
	void *mem;
	manageable_aperture_t *aperture = svm.dgpu_alt_aperture;
	uint32_t ioc_flags;
	vm_object_t *vm_obj = NULL;
	HsaMemFlags mflags;
	void *ret;
	uint64_t mmap_offset;

	/* Allocate physical memory and vm object*/
	ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP |
		KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
		KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
	mem = __fmm_allocate_device(gpu_id, NULL, PAGE_SIZE, aperture,
			&mmap_offset, ioc_flags, 0, &vm_obj);

	if (!mem || !vm_obj)
		return NULL;

	mflags.Value = 0;
	mflags.ui32.NonPaged = 1;
	mflags.ui32.HostAccess = 1;
	pthread_mutex_lock(&aperture->fmm_mutex);
	vm_obj->mflags = mflags;
	vm_obj->node_id = node_id;
	pthread_mutex_unlock(&aperture->fmm_mutex);

	if (hsakmt_use_model) {
		model_set_mmio_page(mem);
		return mem;
	}

	/* Map for CPU access*/
	ret = mmap(mem, PAGE_SIZE,
			 PROT_READ | PROT_WRITE,
			 MAP_SHARED | MAP_FIXED, mmap_fd,
			 mmap_offset);
	if (ret == MAP_FAILED) {
		__fmm_release(vm_obj, aperture);
		return NULL;
	}

	/* Map for GPU access*/
	if (hsakmt_fmm_map_to_gpu(mem, PAGE_SIZE, NULL)) {
		__fmm_release(vm_obj, aperture);
		return NULL;
	}

	return mem;
}

static void release_mmio(void)
{
	uint32_t gpu_mem_id;

	for (gpu_mem_id = 0; (uint32_t)gpu_mem_id < gpu_mem_count; gpu_mem_id++) {
		if (!gpu_mem[gpu_mem_id].mmio_aperture.base)
			continue;
		hsakmt_fmm_unmap_from_gpu(gpu_mem[gpu_mem_id].mmio_aperture.base);
		munmap(gpu_mem[gpu_mem_id].mmio_aperture.base, PAGE_SIZE);
		hsakmt_fmm_release(gpu_mem[gpu_mem_id].mmio_aperture.base);
	}
}

HSAKMT_STATUS hsakmt_fmm_get_amdgpu_device_handle(uint32_t node_id,
						HsaAMDGPUDeviceHandle *DeviceHandle)
{
	int32_t i = gpu_mem_find_by_node_id(node_id);
	int index;

	if (i < 0)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;

	if (hsakmt_use_model) {
		*DeviceHandle = NULL;
		return HSAKMT_STATUS_SUCCESS;
	}

	index = gpu_mem[i].drm_render_minor - DRM_FIRST_RENDER_NODE;
	if (!amdgpu_handle[index])
		return HSAKMT_STATUS_INVALID_HANDLE;

	*DeviceHandle = amdgpu_handle[index];
	return HSAKMT_STATUS_SUCCESS;
}

static bool two_apertures_overlap(void *start_1, void *limit_1, void *start_2, void *limit_2)
{
    return (start_1 >= start_2 && start_1 <= limit_2) || (start_2 >= start_1 && start_2 <= limit_1);
}

static bool init_mem_handle_aperture(HSAuint32 align, HSAuint32 guard_pages)
{
	bool found;
	uint32_t i;

	/* init mem_handle_aperture for buffer handler management */
	mem_handle_aperture.align = align;
	mem_handle_aperture.guard_pages = guard_pages;
	mem_handle_aperture.is_cpu_accessible = false;
	mem_handle_aperture.ops = &reserved_aperture_ops;

	while (PORT_VPTR_TO_UINT64(mem_handle_aperture.base) < END_NON_CANONICAL_ADDR - 1) {

		found = true;
		for (i = 0; i < gpu_mem_count; i++) {

			if (gpu_mem[i].lds_aperture.base &&
				two_apertures_overlap(gpu_mem[i].lds_aperture.base, gpu_mem[i].lds_aperture.limit,
									mem_handle_aperture.base, mem_handle_aperture.limit)) {
					found = false;
					break;
			}

			if (gpu_mem[i].scratch_aperture.base &&
				two_apertures_overlap(gpu_mem[i].scratch_aperture.base, gpu_mem[i].scratch_aperture.limit,
									mem_handle_aperture.base, mem_handle_aperture.limit)){
					found = false;
					break;
			}

			if (gpu_mem[i].gpuvm_aperture.base &&
			   two_apertures_overlap(gpu_mem[i].gpuvm_aperture.base, gpu_mem[i].gpuvm_aperture.limit,
									mem_handle_aperture.base, mem_handle_aperture.limit)){
					found = false;
					break;
			}
		}

		if (found) {
			pr_info("mem_handle_aperture start %p, mem_handle_aperture limit %p\n",
					mem_handle_aperture.base, mem_handle_aperture.limit);
			return true;
		} else {
			/* increase base by 1UL<<47 to check next hole */
			mem_handle_aperture.base =  VOID_PTR_ADD(mem_handle_aperture.base, (1UL << 47));
			mem_handle_aperture.limit = VOID_PTR_ADD(mem_handle_aperture.base, (1ULL << 47));
		}
	}

	/* set invalid aperture if fail locating a hole for it */
	mem_handle_aperture.base =  0;
	mem_handle_aperture.limit = 0;

	return false;
}

HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes)
{
	uint32_t i;
	int32_t gpu_mem_id = 0;
	struct kfd_process_device_apertures *process_apertures;
	uint32_t num_of_sysfs_nodes;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr, *reserveSvm;
	char *maxVaAlignStr, *mfmaHighPrecisionModeStr;
	unsigned int guardPages = 1;
	uint64_t svm_base = 0, svm_limit = 0;
	uint32_t svm_alignment = 0, mfma_high_precision_mode = 0;

	/* If HSA_DISABLE_CACHE is set to a non-0 value, disable caching */
	disableCache = getenv("HSA_DISABLE_CACHE");
	svm.disable_cache = (disableCache && strcmp(disableCache, "0"));

	/* If HSA_USERPTR_FOR_PAGED_MEM is not set or set to a non-0
	 * value, enable userptr for all paged memory allocations
	 */
	pagedUserptr = getenv("HSA_USERPTR_FOR_PAGED_MEM");
	svm.userptr_for_paged_mem = (!pagedUserptr || strcmp(pagedUserptr, "0"));

	if (hsakmt_use_model)
		svm.userptr_for_paged_mem = false;
	/* If HSA_CHECK_USERPTR is set to a non-0 value, check all userptrs
	 * when they are registered
	 */
	checkUserptr = getenv("HSA_CHECK_USERPTR");
	svm.check_userptr = (checkUserptr && strcmp(checkUserptr, "0"));

	/* If HSA_RESERVE_SVM is set to a non-0 value,
	 * enable packet capture and replay mode.
	 */
	reserveSvm = getenv("HSA_RESERVE_SVM");
	svm.reserve_svm = (reserveSvm && strcmp(reserveSvm, "0"));

	/* Specify number of guard pages for SVM apertures, default is 1 */
	guardPagesStr = getenv("HSA_SVM_GUARD_PAGES");
	if (!guardPagesStr || sscanf(guardPagesStr, "%u", &guardPages) != 1)
		guardPages = 1;

	mfmaHighPrecisionModeStr = getenv("HSA_HIGH_PRECISION_MODE");
	mfma_high_precision_mode = (mfmaHighPrecisionModeStr &&
				    strcmp(mfmaHighPrecisionModeStr, "0"));
	/* Sets the max VA alignment order size during mapping. By default the order
	 * size is set to 18(1G) for GFX950 to reduce TLB hits. If any non-gfx950
	 * ASIC is found in the system, set back to 9(2MB).
	 */
	maxVaAlignStr = getenv("HSA_MAX_VA_ALIGN");
	if (!maxVaAlignStr || sscanf(maxVaAlignStr, "%u", &svm.alignment_order) != 1) {
		svm.alignment_order = 18;

		for (i = 0; i < NumNodes; i++) {
			if (hsakmt_get_gfxv_by_node_id(i) != GFX_VERSION_GFX950) {
				svm.alignment_order = 9;
				break;
			}
		}
	}
	pr_info("SVM alignment default order is %d.", svm.alignment_order);

	gpu_mem_count = 0;
	g_first_gpu_mem = NULL;

	/* Trade off - NumNodes includes GPU nodes + CPU Node. So in
	 * systems with CPU node, slightly more memory is allocated than
	 * necessary
	 */
	gpu_mem = (gpu_mem_t *)calloc(NumNodes, sizeof(gpu_mem_t));
	if (!gpu_mem)
		return HSAKMT_STATUS_NO_MEMORY;

	/* Initialize gpu_mem[] from sysfs topology. Rest of the members are
	 * set to 0 by calloc. This is necessary because this function
	 * gets called before hsaKmtAcquireSystemProperties() is called.
	 */

	hsakmt_is_dgpu = false;

	for (i = 0; i < NumNodes; i++) {
		HsaNodeProperties props;

		ret = hsakmt_topology_get_node_props(i, &props);
		if (ret != HSAKMT_STATUS_SUCCESS)
			goto gpu_mem_init_failed;

		hsakmt_topology_setup_is_dgpu_param(&props);

		/* Skip non-GPU nodes */
		if (props.KFDGpuID) {
			int fd = hsakmt_open_drm_render_device(props.DrmRenderMinor);
			if (fd <= 0) {
				ret = HSAKMT_STATUS_ERROR;
				goto gpu_mem_init_failed;
			}

			gpu_mem[gpu_mem_count].drm_render_minor = props.DrmRenderMinor;
			gpu_mem[gpu_mem_count].usable_peer_id_array =
				calloc(NumNodes, sizeof(uint32_t));
			if (!gpu_mem[gpu_mem_count].usable_peer_id_array) {
				ret = HSAKMT_STATUS_NO_MEMORY;
				goto gpu_mem_init_failed;
			}
			gpu_mem[gpu_mem_count].usable_peer_id_array[0] = props.KFDGpuID;
			gpu_mem[gpu_mem_count].usable_peer_id_num = 1;

			gpu_mem[gpu_mem_count].EngineId.ui32.Major = props.EngineId.ui32.Major;
			gpu_mem[gpu_mem_count].EngineId.ui32.Minor = props.EngineId.ui32.Minor;
			gpu_mem[gpu_mem_count].EngineId.ui32.Stepping = props.EngineId.ui32.Stepping;

			gpu_mem[gpu_mem_count].drm_render_fd = fd;
			gpu_mem[gpu_mem_count].gpu_id = props.KFDGpuID;
			gpu_mem[gpu_mem_count].local_mem_size = props.LocalMemSize;
			gpu_mem[gpu_mem_count].device_id = props.DeviceId;
			gpu_mem[gpu_mem_count].node_id = i;
			hsakmt_is_svm_api_supported &= props.Capability.ui32.SVMAPISupported;

			gpu_mem[gpu_mem_count].scratch_physical.align = PAGE_SIZE;
			gpu_mem[gpu_mem_count].scratch_physical.ops = &reserved_aperture_ops;
			pthread_mutex_init(&gpu_mem[gpu_mem_count].scratch_physical.fmm_mutex, NULL);

			gpu_mem[gpu_mem_count].gpuvm_aperture.align =
				get_vm_alignment(props.DeviceId);
			gpu_mem[gpu_mem_count].gpuvm_aperture.guard_pages = guardPages;
			gpu_mem[gpu_mem_count].gpuvm_aperture.ops = &reserved_aperture_ops;
			pthread_mutex_init(&gpu_mem[gpu_mem_count].gpuvm_aperture.fmm_mutex, NULL);

			if (!g_first_gpu_mem)
				g_first_gpu_mem = &gpu_mem[gpu_mem_count];

			gpu_mem_count++;
		}
	}

	/* The ioctl will also return Number of Nodes if
	 * args.kfd_process_device_apertures_ptr is set to NULL. This is not
	 * required since Number of nodes is already known. Kernel will fill in
	 * the apertures in kfd_process_device_apertures_ptr
	 */
	num_of_sysfs_nodes = hsakmt_get_num_sysfs_nodes();
	if (num_of_sysfs_nodes < gpu_mem_count) {
		ret = HSAKMT_STATUS_ERROR;
		goto sysfs_parse_failed;
	}

	process_apertures = calloc(num_of_sysfs_nodes, sizeof(struct kfd_process_device_apertures));
	if (!process_apertures) {
		ret = HSAKMT_STATUS_NO_MEMORY;
		goto sysfs_parse_failed;
	}

	/* GPU Resource management can disable some of the GPU nodes.
	 * The Kernel driver could be not aware of this.
	 * Get from Kernel driver information of all the nodes and then filter it.
	 */
	ret = get_process_apertures(process_apertures, &num_of_sysfs_nodes);
	if (ret != HSAKMT_STATUS_SUCCESS)
		goto get_aperture_ioctl_failed;

	all_gpu_id_array_size = 0;
	all_gpu_id_array = NULL;
	if (num_of_sysfs_nodes > 0) {
		all_gpu_id_array = malloc(sizeof(uint32_t) * gpu_mem_count);
		if (!all_gpu_id_array) {
			ret = HSAKMT_STATUS_NO_MEMORY;
			goto get_aperture_ioctl_failed;
		}
	}

	for (i = 0 ; i < num_of_sysfs_nodes ; i++) {
		HsaNodeProperties nodeProps;
		HsaIoLinkProperties linkProps[NumNodes];
		uint32_t nodeId;
		uint32_t j;

		/* Map Kernel process device data node i <--> gpu_mem_id which
		 * indexes into gpu_mem[] based on gpu_id
		 */
		gpu_mem_id = gpu_mem_find_by_gpu_id(process_apertures[i].gpu_id);
		if (gpu_mem_id < 0)
			continue;

		if (all_gpu_id_array_size == gpu_mem_count) {
			ret = HSAKMT_STATUS_ERROR;
			goto aperture_init_failed;
		}
		all_gpu_id_array[all_gpu_id_array_size++] = process_apertures[i].gpu_id;

		/* Add this GPU to the usable_peer_id_arrays of all GPUs that
		 * this GPU has an IO link to. This GPU can map memory
		 * allocated on those GPUs.
		 */
		nodeId = gpu_mem[gpu_mem_id].node_id;
		ret = hsakmt_topology_get_node_props(nodeId, &nodeProps);
		if (ret != HSAKMT_STATUS_SUCCESS)
			goto aperture_init_failed;
		assert(nodeProps.NumIOLinks <= NumNodes);
		ret = hsakmt_topology_get_iolink_props(nodeId, nodeProps.NumIOLinks,
						linkProps);
		if (ret != HSAKMT_STATUS_SUCCESS)
			goto aperture_init_failed;
		for (j = 0; j < nodeProps.NumIOLinks; j++) {
			int32_t to_gpu_mem_id =
				gpu_mem_find_by_node_id(linkProps[j].NodeTo);
			uint32_t peer;

			if (to_gpu_mem_id < 0)
				continue;

			assert(gpu_mem[to_gpu_mem_id].usable_peer_id_num < NumNodes);
			peer = gpu_mem[to_gpu_mem_id].usable_peer_id_num++;
			gpu_mem[to_gpu_mem_id].usable_peer_id_array[peer] =
				gpu_mem[gpu_mem_id].gpu_id;
		}

		gpu_mem[gpu_mem_id].lds_aperture.base =
			PORT_UINT64_TO_VPTR(process_apertures[i].lds_base);
		gpu_mem[gpu_mem_id].lds_aperture.limit =
			PORT_UINT64_TO_VPTR(process_apertures[i].lds_limit);

		gpu_mem[gpu_mem_id].scratch_aperture.base =
			PORT_UINT64_TO_VPTR(process_apertures[i].scratch_base);
		gpu_mem[gpu_mem_id].scratch_aperture.limit =
			PORT_UINT64_TO_VPTR(process_apertures[i].scratch_limit);

		if (IS_CANONICAL_ADDR(process_apertures[i].gpuvm_limit)) {
			uint64_t vm_alignment = get_vm_alignment(
				gpu_mem[gpu_mem_id].device_id);

			/* Set proper alignment for scratch backing aperture */
			gpu_mem[gpu_mem_id].scratch_physical.align = vm_alignment;

			/* Non-canonical per-ASIC GPUVM aperture does
			 * not exist on dGPUs in GPUVM64 address mode
			 */
			gpu_mem[gpu_mem_id].gpuvm_aperture.base = NULL;
			gpu_mem[gpu_mem_id].gpuvm_aperture.limit = NULL;

			/* Update SVM aperture limits and alignment */
			if (process_apertures[i].gpuvm_base > svm_base)
				svm_base = process_apertures[i].gpuvm_base;
			if (process_apertures[i].gpuvm_limit < svm_limit ||
			    svm_limit == 0)
				svm_limit = process_apertures[i].gpuvm_limit;
			if (vm_alignment > svm_alignment)
				svm_alignment = vm_alignment;
		} else {
			gpu_mem[gpu_mem_id].gpuvm_aperture.base =
				PORT_UINT64_TO_VPTR(process_apertures[i].gpuvm_base);
			gpu_mem[gpu_mem_id].gpuvm_aperture.limit =
				PORT_UINT64_TO_VPTR(process_apertures[i].gpuvm_limit);
			/* Reserve space at the start of the
			 * aperture. After subtracting the base, we
			 * don't want valid pointers to become NULL.
			 */
			aperture_allocate_area(
				&gpu_mem[gpu_mem_id].gpuvm_aperture,
				NULL,
				gpu_mem[gpu_mem_id].gpuvm_aperture.align);
		}

		/* Acquire the VM from the DRM render node for KFD use */
		ret = acquire_vm(gpu_mem[gpu_mem_id].gpu_id,
				 gpu_mem[gpu_mem_id].drm_render_fd);
		if (ret != HSAKMT_STATUS_SUCCESS)
			goto aperture_init_failed;
	}
	all_gpu_id_array_size *= sizeof(uint32_t);

	if (svm_limit) {
		/* At least one GPU uses GPUVM in canonical address
		 * space. Set up SVM apertures shared by all such GPUs
		 */
		ret = init_svm_apertures(svm_base, svm_limit, svm_alignment,
					 guardPages);
		if (ret != HSAKMT_STATUS_SUCCESS)
			goto init_svm_failed;

		for (i = 0 ; i < num_of_sysfs_nodes ; i++) {
			uintptr_t alt_base;
			uint64_t alt_size;
			int err;

			if (!IS_CANONICAL_ADDR(process_apertures[i].gpuvm_limit))
				continue;

			/* Set memory policy to match the SVM apertures */
			alt_base = (uintptr_t)svm.dgpu_alt_aperture->base;
			alt_size = VOID_PTRS_SUB(svm.dgpu_alt_aperture->limit,
				svm.dgpu_alt_aperture->base) + 1;
			err = fmm_set_memory_policy(process_apertures[i].gpu_id,
						    svm.disable_cache ?
						    KFD_IOC_CACHE_POLICY_COHERENT :
						    KFD_IOC_CACHE_POLICY_NONCOHERENT,
						    KFD_IOC_CACHE_POLICY_COHERENT,
						    alt_base, alt_size,
						    hsakmt_get_gfxv_by_node_id(i) == GFX_VERSION_GFX950 ?
						    mfma_high_precision_mode : 0);
			if (err) {
				pr_err("Failed to set mem policy for GPU [0x%x]\n",
				       process_apertures[i].gpu_id);
				ret = HSAKMT_STATUS_ERROR;
				goto set_memory_policy_failed;
			}
		}
	}

	cpuvm_aperture.align = PAGE_SIZE;
	cpuvm_aperture.limit = (void *)0x7FFFFFFFFFFF; /* 2^47 - 1 */

	fmm_init_rbtree();

	if (!init_mem_handle_aperture(PAGE_SIZE, guardPages))
		pr_err("Failed to init mem_handle_aperture\n");

	for (gpu_mem_id = 0; (uint32_t)gpu_mem_id < gpu_mem_count; gpu_mem_id++) {
		if (!hsakmt_topology_is_svm_needed(gpu_mem[gpu_mem_id].EngineId))
			continue;
		gpu_mem[gpu_mem_id].mmio_aperture.base = map_mmio(
				gpu_mem[gpu_mem_id].node_id,
				gpu_mem[gpu_mem_id].gpu_id,
				hsakmt_kfd_fd);
		if (gpu_mem[gpu_mem_id].mmio_aperture.base)
			gpu_mem[gpu_mem_id].mmio_aperture.limit = (void *)
			((char *)gpu_mem[gpu_mem_id].mmio_aperture.base +
			 PAGE_SIZE - 1);
		else
			pr_err("Failed to map remapped mmio page on gpu_mem %d\n",
					gpu_mem_id);
	}

	free(process_apertures);
	return ret;

aperture_init_failed:
init_svm_failed:
set_memory_policy_failed:
	free(all_gpu_id_array);
	all_gpu_id_array = NULL;
get_aperture_ioctl_failed:
	free(process_apertures);
sysfs_parse_failed:
gpu_mem_init_failed:
	hsakmt_fmm_destroy_process_apertures();
	return ret;
}

void hsakmt_fmm_destroy_process_apertures(void)
{
	release_mmio();

	if (all_gpu_id_array) {
		free(all_gpu_id_array);
		all_gpu_id_array = NULL;
	}
	all_gpu_id_array_size = 0;

	if (gpu_mem) {
		while (gpu_mem_count-- > 0)
			free(gpu_mem[gpu_mem_count].usable_peer_id_array);
		free(gpu_mem);
		gpu_mem = NULL;
	}
	gpu_mem_count = 0;
}

HSAKMT_STATUS hsakmt_fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSAuint32 gpu_id,
			HSAuint64 *aperture_base, HSAuint64 *aperture_limit)
{
	HSAKMT_STATUS err = HSAKMT_STATUS_ERROR;
	int32_t slot = gpu_mem_find_by_gpu_id(gpu_id);

	if (slot < 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	switch (aperture_type) {
	case FMM_GPUVM:
		if (aperture_is_valid(gpu_mem[slot].gpuvm_aperture.base,
			gpu_mem[slot].gpuvm_aperture.limit)) {
			*aperture_base = PORT_VPTR_TO_UINT64(gpu_mem[slot].gpuvm_aperture.base);
			*aperture_limit = PORT_VPTR_TO_UINT64(gpu_mem[slot].gpuvm_aperture.limit);
			err = HSAKMT_STATUS_SUCCESS;
		}
		break;

	case FMM_SCRATCH:
		if (aperture_is_valid(gpu_mem[slot].scratch_aperture.base,
			gpu_mem[slot].scratch_aperture.limit)) {
			*aperture_base = PORT_VPTR_TO_UINT64(gpu_mem[slot].scratch_aperture.base);
			*aperture_limit = PORT_VPTR_TO_UINT64(gpu_mem[slot].scratch_aperture.limit);
			err = HSAKMT_STATUS_SUCCESS;
		}
		break;

	case FMM_LDS:
		if (aperture_is_valid(gpu_mem[slot].lds_aperture.base,
			gpu_mem[slot].lds_aperture.limit)) {
			*aperture_base = PORT_VPTR_TO_UINT64(gpu_mem[slot].lds_aperture.base);
			*aperture_limit = PORT_VPTR_TO_UINT64(gpu_mem[slot].lds_aperture.limit);
			err = HSAKMT_STATUS_SUCCESS;
		}
		break;

	case FMM_SVM:
		/* Report single SVM aperture, starting at base of
		 * fine-grained, ending at limit of coarse-grained
		 */
		if (aperture_is_valid(svm.dgpu_alt_aperture->base,
				      svm.dgpu_aperture->limit)) {
			*aperture_base = PORT_VPTR_TO_UINT64(svm.dgpu_alt_aperture->base);
			*aperture_limit = PORT_VPTR_TO_UINT64(svm.dgpu_aperture->limit);
			err = HSAKMT_STATUS_SUCCESS;
		}
		break;

	case FMM_MMIO:
		if (aperture_is_valid(gpu_mem[slot].mmio_aperture.base,
			gpu_mem[slot].mmio_aperture.limit)) {
			*aperture_base = PORT_VPTR_TO_UINT64(gpu_mem[slot].mmio_aperture.base);
			*aperture_limit = PORT_VPTR_TO_UINT64(gpu_mem[slot].mmio_aperture.limit);
			err = HSAKMT_STATUS_SUCCESS;
		}
		break;

	default:
		break;
	}

	return err;
}

static bool id_in_array(uint32_t id, uint32_t *ids_array,
		uint32_t ids_array_size)
{
	uint32_t i;

	for (i = 0; i < ids_array_size/sizeof(uint32_t); i++) {
		if (id == ids_array[i])
			return true;
	}
	return false;
}

/* Helper function to remove ids_array from
 * obj->mapped_device_id_array
 */
static void remove_device_ids_from_mapped_array(vm_object_t *obj,
		uint32_t *ids_array, uint32_t ids_array_size)
{
	uint32_t i = 0, j = 0;

	if (obj->mapped_device_id_array == ids_array)
		goto set_size_and_free;

	for (i = 0; i < obj->mapped_device_id_array_size/
			sizeof(uint32_t); i++) {
		if (!id_in_array(obj->mapped_device_id_array[i],
					ids_array, ids_array_size))
			obj->mapped_device_id_array[j++] =
				obj->mapped_device_id_array[i];
	}

set_size_and_free:
	obj->mapped_device_id_array_size = j*sizeof(uint32_t);
	if (!j) {
		if (obj->mapped_device_id_array)
			free(obj->mapped_device_id_array);

		obj->mapped_device_id_array = NULL;
	}
}

/* Helper function to add ids_array to
 * obj->mapped_device_id_array
 */
static void add_device_ids_to_mapped_array(vm_object_t *obj,
		uint32_t *ids_array, uint32_t ids_array_size)
{
	uint32_t new_array_size;

	/* Remove any potential duplicated ids */
	remove_device_ids_from_mapped_array(obj, ids_array, ids_array_size);
	new_array_size = obj->mapped_device_id_array_size
		+ ids_array_size;

	obj->mapped_device_id_array = (uint32_t *)realloc(
			obj->mapped_device_id_array, new_array_size);
	if (!obj->mapped_device_id_array) {
		 pr_err("Failed to allocate memory for mapped device ID array.\n");
		 return;
	}

	memcpy(&obj->mapped_device_id_array
			[obj->mapped_device_id_array_size/sizeof(uint32_t)],
			ids_array, ids_array_size);

	obj->mapped_device_id_array_size = new_array_size;
}


/* If nodes_to_map is not NULL, map the nodes specified; otherwise map all. */
static HSAKMT_STATUS _fmm_map_to_gpu(manageable_aperture_t *aperture,
			void *address, uint64_t size, vm_object_t *obj,
			uint32_t *nodes_to_map, uint32_t nodes_array_size)
{
	struct kfd_ioctl_map_memory_to_gpu_args args = {0};
	vm_object_t *object;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	int ret_ioctl;
	uint32_t i;

	if (!obj)
		pthread_mutex_lock(&aperture->fmm_mutex);

	object = obj;
	if (!object) {
		/* Find the object to retrieve the handle */
		object = vm_find_object_by_address(aperture, address, 0);
		if (!object) {
			ret = HSAKMT_STATUS_INVALID_HANDLE;
			goto err_object_not_found;
		}
	}

	/* For a memory region that is registered by user pointer, changing
	 * mapping nodes is not allowed, so we don't need to check the mapping
	 * nodes or map if it's already mapped. Just increase the reference.
	 */
	if (object->userptr && object->mapping_count) {
		++object->mapping_count;
		goto exit_ok;
	}

	if (nodes_to_map) {
	/* If specified, map the requested */
		args.device_ids_array_ptr = (uint64_t)nodes_to_map;
		args.n_devices = nodes_array_size / sizeof(uint32_t);
	} else if (object->registered_device_id_array_size > 0) {
	/* otherwise map all registered */
		args.device_ids_array_ptr =
			(uint64_t)object->registered_device_id_array;
		args.n_devices = object->registered_device_id_array_size /
			sizeof(uint32_t);
	} else {
	/* not specified, not registered: map all GPUs */
		int32_t gpu_mem_id = gpu_mem_find_by_node_id(obj->node_id);

		if (!obj->userptr && hsakmt_get_device_id_by_node_id(obj->node_id) &&
		    gpu_mem_id >= 0) {
			args.device_ids_array_ptr = (uint64_t)
				gpu_mem[gpu_mem_id].usable_peer_id_array;
			args.n_devices =
				gpu_mem[gpu_mem_id].usable_peer_id_num;
		} else {
			args.device_ids_array_ptr = (uint64_t)all_gpu_id_array;
			args.n_devices = all_gpu_id_array_size / sizeof(uint32_t);
		}
	}

	for (i = 0; i < object->handle_num; i++) {
		args.n_success = 0;
		args.handle = object->handles[i];

		ret_ioctl = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU, &args);
		if (ret_ioctl) {
			pr_err("GPU mapping failed (%d) for obj at %p, userptr %p, size %lu",
				ret_ioctl, object->start, object->userptr, object->size);
			ret = HSAKMT_STATUS_ERROR;
			goto err_map_failed;
		}
	}

	add_device_ids_to_mapped_array(object,
				(uint32_t *)args.device_ids_array_ptr,
				args.n_success * sizeof(uint32_t));
	print_device_id_array((uint32_t *)object->mapped_device_id_array,
			      object->mapped_device_id_array_size);

	object->mapping_count = 1;
	/* Mapping changed and lifecycle of object->mapped_node_id_array
	 * terminates here. Free it and allocate on next query
	 */
	if (object->mapped_node_id_array) {
		free(object->mapped_node_id_array);
		object->mapped_node_id_array = NULL;
	}

err_map_failed:
	while (ret && i--) {
		args.handle = object->handles[i];
		hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &args);
	}
exit_ok:
err_object_not_found:
	if (!obj)
		pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}

static HSAKMT_STATUS _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageable_aperture_t *aperture,
				   void *address, uint64_t size)
{
	int32_t gpu_mem_id;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	bool is_debugger = 0;
	uint32_t flags;
	void *mmap_ret = NULL;
	uint64_t mmap_offset = 0;
	vm_object_t *obj;

	/* Retrieve gpu_mem id according to gpu_id */
	gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
	if (gpu_mem_id < 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (!hsakmt_is_dgpu)
		return HSAKMT_STATUS_SUCCESS; /* Nothing to do on APU */

	/* sanity check the address */
	if (address < aperture->base ||
	    VOID_PTR_ADD(address, size - 1) > aperture->limit)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	is_debugger = hsakmt_debug_get_reg_status(gpu_mem[gpu_mem_id].node_id);
	flags = is_debugger ? KFD_IOC_ALLOC_MEM_FLAGS_GTT :
			      KFD_IOC_ALLOC_MEM_FLAGS_VRAM;
	flags |= KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE;
	/* allocate object within the scratch backing aperture */
	obj = fmm_allocate_memory_object(gpu_id, address, size,
					 aperture, &mmap_offset, flags);
	if (!obj)
		return HSAKMT_STATUS_INVALID_HANDLE;
	/* Create a CPU mapping for the debugger */
	mmap_ret = fmm_map_to_cpu(address, size, is_debugger,
				  gpu_mem[gpu_mem_id].drm_render_fd,
				  mmap_offset);
	if (mmap_ret == MAP_FAILED) {
		__fmm_release(obj, aperture);
		return HSAKMT_STATUS_ERROR;
	}

	/* map to GPU */
	ret = _fmm_map_to_gpu(aperture, address, size, NULL, &gpu_id, sizeof(uint32_t));
	if (ret != HSAKMT_STATUS_SUCCESS)
		__fmm_release(obj, aperture);

	return ret;
}

static HSAKMT_STATUS _fmm_map_to_gpu_userptr(void *addr, uint64_t size,
					     uint64_t *gpuvm_addr, vm_object_t *object,
					     uint32_t *nodes_to_map, uint32_t nodes_array_size)
{
	manageable_aperture_t *aperture;
	void *svm_addr;
	HSAuint32 page_offset = (HSAuint64)addr & (PAGE_SIZE-1);
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	aperture = svm.dgpu_aperture;

	/* Map and return the GPUVM address adjusted by the offset
	 * from the start of the page
	 */
	if (!object && hsakmt_is_svm_api_supported) {
		svm_addr = (void*)((HSAuint64)addr - page_offset);
		if (!nodes_to_map) {
			nodes_to_map = all_gpu_id_array;
			nodes_array_size = all_gpu_id_array_size;
		}
		pr_debug("%s Mapping Address %p size aligned: %ld offset: %x\n",
			__func__, svm_addr, PAGE_ALIGN_UP(page_offset + size), page_offset);
		ret = fmm_map_mem_svm_api(svm_addr,
						  PAGE_ALIGN_UP(page_offset + size),
						  nodes_to_map,
						  nodes_array_size / sizeof(uint32_t));

	} else if (object) {
		svm_addr = object->start;
		ret = _fmm_map_to_gpu(aperture, svm_addr, object->size, object, NULL, 0);
	} else {
		pr_err("Object is null and SVM API is not supported.\n");
		return HSAKMT_STATUS_ERROR;
	}
	if (ret == HSAKMT_STATUS_SUCCESS && gpuvm_addr)
		*gpuvm_addr = (uint64_t)svm_addr + page_offset;

	return ret;
}

HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address)
{
	manageable_aperture_t *aperture = NULL;
	vm_object_t *object;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	gpu_mem_t *gpu_mem_ptr = NULL;

	/* Special handling for scratch memory */
	gpu_mem_ptr = fmm_is_scratch_aperture(address);
	if (gpu_mem_ptr) {
		return _fmm_map_to_gpu_scratch(gpu_mem_ptr->gpu_id,
							&gpu_mem_ptr->scratch_physical,
							address, size);
	}

	object = vm_find_object(address, size, &aperture);
	if (!object && !hsakmt_is_svm_api_supported) {
		if (!hsakmt_is_dgpu) {
			/* Prefetch memory on APUs with dummy-reads */
			fmm_check_user_memory(address, size);
			return HSAKMT_STATUS_SUCCESS;
		}
		pr_err("Object not found at %p\n", address);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}
	/* Successful vm_find_object returns with the aperture locked */

	/* allocate VA only */
	if (object && object->handles[0] == 0) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	/* allocate buffer only, should be mapped by GEM API */
        if (aperture && (aperture == &mem_handle_aperture)) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	if (aperture && (aperture == &cpuvm_aperture)) {
		/* Prefetch memory on APUs with dummy-reads */
		fmm_check_user_memory(address, size);
		ret = HSAKMT_STATUS_SUCCESS;
	} else if ((hsakmt_is_svm_api_supported && !object) || (object && (object->userptr))) {
		ret = _fmm_map_to_gpu_userptr(address, size, gpuvm_address, object, NULL, 0);
	} else if (aperture) {
		ret = _fmm_map_to_gpu(aperture, address, size, object, NULL, 0);
		/* Update alternate GPUVM address only for
		 * CPU-invisible apertures on old APUs
		 */
		if (ret == HSAKMT_STATUS_SUCCESS && gpuvm_address && !aperture->is_cpu_accessible)
			*gpuvm_address = VOID_PTRS_SUB(object->start, aperture->base);
	}

	if (object)
		pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}

static void print_device_id_array(uint32_t *device_id_array, uint32_t device_id_array_size)
{
#ifdef DEBUG_PRINT_APERTURE
	device_id_array_size /= sizeof(uint32_t);

	pr_info("device id array size %d\n", device_id_array_size);

	for (uint32_t i = 0 ; i < device_id_array_size; i++)
		pr_info("%d . 0x%x\n", (i+1), device_id_array[i]);
#endif
}

static int _fmm_unmap_from_gpu(manageable_aperture_t *aperture, void *address,
		uint32_t *device_ids_array, uint32_t device_ids_array_size,
		vm_object_t *obj)
{
	vm_object_t *object;
	int ret = 0, tmp_ret;
	uint32_t i;
	struct kfd_ioctl_unmap_memory_from_gpu_args args = {0};
	HSAuint32 page_offset = (HSAint64)address & (PAGE_SIZE - 1);

	if (!obj)
		pthread_mutex_lock(&aperture->fmm_mutex);

	/* Find the object to retrieve the handle */
	object = obj;
	if (!object) {
		object = vm_find_object_by_address(aperture,
					VOID_PTR_SUB(address, page_offset), 0);
		if (!object) {
			ret = -1;
			goto out;
		}
	}

	if (object->userptr && object->mapping_count > 1) {
		--object->mapping_count;
		goto out;
	}

	if (device_ids_array && device_ids_array_size > 0) {
		args.device_ids_array_ptr = (uint64_t)device_ids_array;
		args.n_devices = device_ids_array_size / sizeof(uint32_t);
	} else if (object->mapped_device_id_array_size > 0) {
		args.device_ids_array_ptr = (uint64_t)object->mapped_device_id_array;
		args.n_devices = object->mapped_device_id_array_size /
			sizeof(uint32_t);
	} else {
		/*
		 * When unmap exits here it should return failing error code as the user tried to
		 * unmap already unmapped buffer. Currently we returns success as KFDTEST and RT
		 * need to deploy the change on there side before thunk fails on this case.
		 */
		ret = 0;
		goto out;
	}

	print_device_id_array((void *)args.device_ids_array_ptr,
			      args.n_devices * sizeof(uint32_t));

	for (i = 0; i < object->handle_num; i++) {
		args.handle = object->handles[i];
		args.n_success = 0;

		tmp_ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &args);
		if (tmp_ret)
			ret = tmp_ret;
	}

	if (!ret) {
		remove_device_ids_from_mapped_array(object,
				(uint32_t *)args.device_ids_array_ptr,
				args.n_success * sizeof(uint32_t));

		if (object->mapped_node_id_array)
			free(object->mapped_node_id_array);
		object->mapped_node_id_array = NULL;
		object->mapping_count = 0;
	}
out:
	if (!obj)
		pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}

static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id,
				       manageable_aperture_t *aperture,
				       void *address)
{
	int32_t gpu_mem_id;
	vm_object_t *object;
	struct kfd_ioctl_unmap_memory_from_gpu_args args = {0};
	int ret;

	/* Retrieve gpu_mem id according to gpu_id */
	gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
	if (gpu_mem_id < 0)
		return -1;

	if (!hsakmt_is_dgpu)
		return 0; /* Nothing to do on APU */

	pthread_mutex_lock(&aperture->fmm_mutex);

	/* Find the object to retrieve the handle and size */
	object = vm_find_object_by_address(aperture, address, 0);
	if (!object) {
		ret = -EINVAL;
		goto err;
	}

	if (!object->mapped_device_id_array ||
			object->mapped_device_id_array_size == 0) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return 0;
	}

	/* unmap from GPU */
	args.handle = object->handles[0];
	args.device_ids_array_ptr = (uint64_t)object->mapped_device_id_array;
	args.n_devices = object->mapped_device_id_array_size / sizeof(uint32_t);
	args.n_success = 0;
	ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &args);

	/* unmap from CPU while keeping the address space reserved */
	mmap(address, object->size, PROT_NONE,
	     MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED,
	     -1, 0);

	remove_device_ids_from_mapped_array(object,
			(uint32_t *)args.device_ids_array_ptr,
			args.n_success * sizeof(uint32_t));

	if (object->mapped_node_id_array)
		free(object->mapped_node_id_array);
	object->mapped_node_id_array = NULL;

	if (ret)
		goto err;

	pthread_mutex_unlock(&aperture->fmm_mutex);

	/* free object in scratch backing aperture */
	return __fmm_release(object, aperture);

err:
	pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}

int hsakmt_fmm_unmap_from_gpu(void *address)
{
	manageable_aperture_t *aperture;
	vm_object_t *object;
	int ret;
	gpu_mem_t *gpu_mem_ptr = NULL;

	/* Special handling for scratch memory */
	gpu_mem_ptr = fmm_is_scratch_aperture(address);
	if (gpu_mem_ptr) {
		return _fmm_unmap_from_gpu_scratch(gpu_mem_ptr->gpu_id,
							&gpu_mem_ptr->scratch_physical,
							address);
	}

	object = vm_find_object(address, 0, &aperture);
	if (!object)
		/* On APUs GPU unmapping of system memory is a no-op */
		return (!hsakmt_is_dgpu || hsakmt_is_svm_api_supported) ? 0 : -EINVAL;
	/* Successful vm_find_object returns with the aperture locked */

	if (aperture == &cpuvm_aperture)
		/* On APUs GPU unmapping of system memory is a no-op */
		ret = 0;
	else
		ret = _fmm_unmap_from_gpu(aperture, address, NULL, 0, object);

	pthread_mutex_unlock(&aperture->fmm_mutex);

	return ret;
}

bool hsakmt_fmm_get_handle(void *address, uint64_t *handle)
{
	uint32_t i;
	manageable_aperture_t *aperture;
	vm_object_t *object;
	bool found;

	found = false;
	aperture = NULL;

	/* Find the aperture the requested address belongs to */
	for (i = 0; i < gpu_mem_count; i++) {
		if (gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
			continue;

		if ((address >= gpu_mem[i].gpuvm_aperture.base) &&
			(address <= gpu_mem[i].gpuvm_aperture.limit)) {
			aperture = &gpu_mem[i].gpuvm_aperture;
			break;
		}
	}

	if (!aperture) {
		if ((address >= svm.dgpu_aperture->base) &&
			(address <= svm.dgpu_aperture->limit)) {
			aperture = svm.dgpu_aperture;
		} else if ((address >= svm.dgpu_alt_aperture->base) &&
			(address <= svm.dgpu_alt_aperture->limit)) {
			aperture = svm.dgpu_alt_aperture;
		}
	}

	if (!aperture)
		return false;

	pthread_mutex_lock(&aperture->fmm_mutex);
	/* Find the object to retrieve the handle */
	object = vm_find_object_by_address(aperture, address, 0);
	if (object && handle) {
		*handle = object->handles[0];
		found = true;
	}
	pthread_mutex_unlock(&aperture->fmm_mutex);


	return found;
}

static HSAKMT_STATUS fmm_register_user_memory(void *addr,
						HSAuint64 size,
						vm_object_t **obj_ret,
						bool coarse_grain,
						bool ext_coherent)
{
	manageable_aperture_t *aperture = svm.dgpu_aperture;
	HSAuint32 page_offset = (HSAuint64)addr & (PAGE_SIZE-1);
	HSAuint64 aligned_addr = (HSAuint64)addr - page_offset;
	HSAuint64 aligned_size = PAGE_ALIGN_UP(page_offset + size);
	void *svm_addr;
	HSAuint32 gpu_id;
	vm_object_t *obj, *exist_obj;

	/* Find first GPU for creating the userptr BO */
	if (!g_first_gpu_mem)
		return HSAKMT_STATUS_ERROR;

	gpu_id = g_first_gpu_mem->gpu_id;

	/* Optionally check that the CPU mapping is valid */
	if (svm.check_userptr)
		fmm_check_user_memory(addr, size);

	/* Allocate BO, userptr address is passed in mmap_offset */
	svm_addr = __fmm_allocate_device(gpu_id, NULL, aligned_size, aperture,
			 &aligned_addr, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR |
			 KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
			 KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE |
			 (coarse_grain ? 0 : KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) |
			 (ext_coherent ? KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT : 0),
			 0,
			 &obj);
	if (!svm_addr)
		return HSAKMT_STATUS_ERROR;

	if (!obj)
		return HSAKMT_STATUS_ERROR;

	pthread_mutex_lock(&aperture->fmm_mutex);

	/* catch the race condition where some other thread added the userptr
	 * object already after the vm_find_object.
	 */
	exist_obj = vm_find_object_by_userptr(aperture, addr, size);
	if (exist_obj) {
		++exist_obj->registration_count;
	} else {
		obj->userptr = addr;
		hsakmt_gpuid_to_nodeid(gpu_id, &obj->node_id);
		obj->userptr_size = size;
		obj->registration_count = 1;
		obj->user_node.key = rbtree_key((unsigned long)addr, size);
		hsakmt_rbtree_insert(&aperture->user_tree, &obj->user_node);
	}
	pthread_mutex_unlock(&aperture->fmm_mutex);

	if (exist_obj)
		__fmm_release(obj, aperture);

	if (obj_ret)
		*obj_ret = exist_obj ? exist_obj : obj;
	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
				  uint32_t *gpu_id_array,
				  uint32_t gpu_id_array_size,
				  bool coarse_grain,
				  bool ext_coherent)
{
	manageable_aperture_t *aperture = NULL;
	vm_object_t *object = NULL;
	HSAKMT_STATUS ret;

	if (gpu_id_array_size > 0 && !gpu_id_array)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (coarse_grain && ext_coherent)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	object = vm_find_object(address, size_in_bytes, &aperture);
	if (!object) {
		if (!hsakmt_is_dgpu)
			/* System memory registration on APUs is a no-op */
			return HSAKMT_STATUS_SUCCESS;

		/* Register a new user ptr */
		if (hsakmt_is_svm_api_supported) {
			ret = fmm_register_mem_svm_api(address,
							size_in_bytes,
							coarse_grain,
							ext_coherent);
			if (ret == HSAKMT_STATUS_SUCCESS)
				return ret;
			pr_debug("SVM failed, falling back to old registration\n");
		}
		ret = fmm_register_user_memory(address,
					       size_in_bytes,
					       &object,
					       coarse_grain,
					       ext_coherent);

		if (ret != HSAKMT_STATUS_SUCCESS)
			return ret;
		if (gpu_id_array_size == 0)
			return HSAKMT_STATUS_SUCCESS;
		aperture = svm.dgpu_aperture;
		pthread_mutex_lock(&aperture->fmm_mutex);
		/* fall through for registered device ID array setup */
	} else if (object->userptr) {
		/* Update an existing userptr */
		++object->registration_count;
	} else {
		/* Not a userptr when we are expecting one */
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_INVALID_HANDLE;
	}
	/* Successful vm_find_object returns with aperture locked */

	if (object->registered_device_id_array_size > 0) {
		/* Multiple registration is allowed, but not changing nodes */
		if ((gpu_id_array_size != object->registered_device_id_array_size)
			|| memcmp(object->registered_device_id_array,
					gpu_id_array, gpu_id_array_size)) {
			pr_err("Cannot change nodes in a registered addr.\n");
			pthread_mutex_unlock(&aperture->fmm_mutex);
			return HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED;
		} else {
			/* Delete the new array, keep the existing one. */
			if (gpu_id_array)
				free(gpu_id_array);

			pthread_mutex_unlock(&aperture->fmm_mutex);
			return HSAKMT_STATUS_SUCCESS;
		}
	}

	if (gpu_id_array_size > 0) {
		object->registered_device_id_array = gpu_id_array;
		object->registered_device_id_array_size = gpu_id_array_size;
		/* Registration of object changed. Lifecycle of object->
		 * registered_node_id_array terminates here. Free old one
		 * and re-allocate on next query
		 */
		if (object->registered_node_id_array) {
			free(object->registered_node_id_array);
			object->registered_node_id_array = NULL;
		}
	}

	pthread_mutex_unlock(&aperture->fmm_mutex);
	return HSAKMT_STATUS_SUCCESS;
}

#define GRAPHICS_METADATA_DEFAULT_SIZE 64
HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle,
					   HsaGraphicsResourceInfo *GraphicsResourceInfo,
					   uint32_t *gpu_id_array,
					   uint32_t gpu_id_array_size,
					   HSA_REGISTER_MEM_FLAGS RegisterFlags)
{
	struct kfd_ioctl_get_dmabuf_info_args infoArgs = {0};
	struct kfd_ioctl_import_dmabuf_args importArgs = {0};
	struct kfd_ioctl_free_memory_of_gpu_args freeArgs = {0};
	manageable_aperture_t *aperture;
	HsaMemFlags mflags;
	vm_object_t *obj;
	void *metadata;
	void *mem = NULL, *aperture_base = NULL;
	int32_t gpu_mem_id;
	int r;
	HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
	static const uint64_t IMAGE_ALIGN = 256*1024;

	if (gpu_id_array_size > 0 && !gpu_id_array)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	infoArgs.dmabuf_fd = GraphicsResourceHandle;
	infoArgs.metadata_size = GRAPHICS_METADATA_DEFAULT_SIZE;
	metadata = calloc(infoArgs.metadata_size, 1);
	if (!metadata)
		return HSAKMT_STATUS_NO_MEMORY;
	infoArgs.metadata_ptr = (uint64_t)metadata;
	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_DMABUF_INFO, (void *)&infoArgs);
	if (r && infoArgs.metadata_size > GRAPHICS_METADATA_DEFAULT_SIZE) {
		/* Try again with bigger metadata */
		free(metadata);
		metadata = calloc(infoArgs.metadata_size, 1);
		if (!metadata)
			return HSAKMT_STATUS_NO_MEMORY;
		infoArgs.metadata_ptr = (uint64_t)metadata;
		r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_DMABUF_INFO, (void *)&infoArgs);
	}

	if (r)
		goto error_free_metadata;

	/* Choose aperture based on GPU and allocate virtual address */
	gpu_mem_id = gpu_mem_find_by_gpu_id(infoArgs.gpu_id);
	if (gpu_mem_id < 0)
		goto error_free_metadata;

	/* import DMA buffer without VA assigned */
	if (!gpu_id_array && gpu_id_array_size == 0 && !RegisterFlags.ui32.requiresVAddr) {
		aperture = &mem_handle_aperture;
	} else if (hsakmt_topology_is_svm_needed(gpu_mem[gpu_mem_id].EngineId)) {
		aperture = svm.dgpu_aperture;
	} else {
		aperture = &gpu_mem[gpu_mem_id].gpuvm_aperture;
		aperture_base = aperture->base;
	}
	if (!aperture_is_valid(aperture->base, aperture->limit))
		goto error_free_metadata;
	pthread_mutex_lock(&aperture->fmm_mutex);
	mem = aperture_allocate_area_aligned(aperture, NULL, infoArgs.size,
					     IMAGE_ALIGN);
	if (!mem) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		goto error_free_metadata;
	}

	/* Import DMA buffer */
	if (aperture == &mem_handle_aperture)
		importArgs.va_addr = 0;
	else
		importArgs.va_addr = VOID_PTRS_SUB(mem, aperture_base);

	importArgs.gpu_id = infoArgs.gpu_id;
	importArgs.dmabuf_fd = GraphicsResourceHandle;
	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_IMPORT_DMABUF, (void *)&importArgs);
	if (r) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		goto error_release_aperture;
	}

	/* Atomically update and register the object */
	mflags = fmm_translate_ioc_to_hsa_flags(infoArgs.flags);
	mflags.ui32.CoarseGrain = 1;
	obj = aperture_allocate_object(aperture, mem, importArgs.handle,
				       infoArgs.size, mflags);
	if (obj) {
		obj->metadata = metadata;
		obj->registered_device_id_array = gpu_id_array;
		obj->registered_device_id_array_size = gpu_id_array_size;
		hsakmt_gpuid_to_nodeid(infoArgs.gpu_id, &obj->node_id);
	}
	pthread_mutex_unlock(&aperture->fmm_mutex);
	if (!obj)
		goto error_release_buffer;

	GraphicsResourceInfo->MemoryAddress = mem;
	GraphicsResourceInfo->SizeInBytes = infoArgs.size;
	GraphicsResourceInfo->Metadata = (void *)(unsigned long)infoArgs.metadata_ptr;
	GraphicsResourceInfo->MetadataSizeInBytes = infoArgs.metadata_size;
	hsakmt_gpuid_to_nodeid(infoArgs.gpu_id, &GraphicsResourceInfo->NodeId);

	return HSAKMT_STATUS_SUCCESS;

error_release_buffer:
	freeArgs.handle = importArgs.handle;
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &freeArgs) != 0) {
		/* Handle error if memory is not freed properly */
		pr_err("Failed to free GPU memory\n");
	}
error_release_aperture:
	aperture_release_area(aperture, mem, infoArgs.size);
error_free_metadata:
	free(metadata);

	return status;
}

HSAKMT_STATUS hsakmt_fmm_export_dma_buf_fd(void *MemoryAddress,
				    HSAuint64 MemorySizeInBytes,
				    int *DMABufFd,
				    HSAuint64 *Offset)
{
	struct kfd_ioctl_export_dmabuf_args exportArgs = {0};
	manageable_aperture_t *aperture;
	HsaApertureInfo ApeInfo;
	vm_object_t *obj;
	HSAuint64 offset;
	int r;

	aperture = fmm_find_aperture(MemoryAddress, &ApeInfo);
	if (!aperture)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	pthread_mutex_lock(&aperture->fmm_mutex);
	obj = vm_find_object_by_address_range(aperture, MemoryAddress);
	if (obj) {
		offset = VOID_PTRS_SUB(MemoryAddress, obj->start);
		if (offset + MemorySizeInBytes <= obj->size) {
			exportArgs.handle = obj->handles[0];
			exportArgs.flags = O_CLOEXEC;
			exportArgs.dmabuf_fd = 0;
		} else {
			obj = NULL;
		}
	}
	pthread_mutex_unlock(&aperture->fmm_mutex);
	if (!obj)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_EXPORT_DMABUF, (void *)&exportArgs);
	if (r)
		return HSAKMT_STATUS_ERROR;

	*DMABufFd = exportArgs.dmabuf_fd;
	*Offset = offset;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS hsakmt_fmm_share_memory(void *MemoryAddress,
				HSAuint64 SizeInBytes,
				HsaSharedMemoryHandle *SharedMemoryHandle)
{
	int r = 0;
	HSAuint32 gpu_id = 0;
	vm_object_t *obj = NULL;
	manageable_aperture_t *aperture = NULL;
	struct kfd_ioctl_ipc_export_handle_args exportArgs = {0};
	HsaApertureInfo ApeInfo;
	HsaSharedMemoryStruct *SharedMemoryStruct =
		to_hsa_shared_memory_struct(SharedMemoryHandle);

	if (SizeInBytes >= (1ULL << ((sizeof(HSAuint32) * 8) + PAGE_SHIFT)))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	aperture = fmm_find_aperture(MemoryAddress, &ApeInfo);
	if (!aperture)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	pthread_mutex_lock(&aperture->fmm_mutex);
	obj = vm_find_object_by_address(aperture, MemoryAddress, 0);
	pthread_mutex_unlock(&aperture->fmm_mutex);
	if (!obj)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	r = hsakmt_validate_nodeid(obj->node_id, &gpu_id);
	if (r != HSAKMT_STATUS_SUCCESS)
		return r;
	if (!gpu_id && hsakmt_is_dgpu) {
		/* Sharing non paged system memory. Use first GPU which was
		 * used during allocation. See fmm_allocate_host_gpu()
		 */
		if (!g_first_gpu_mem)
			return HSAKMT_STATUS_ERROR;

		gpu_id = g_first_gpu_mem->gpu_id;
	}
	exportArgs.handle = obj->handles[0];
	exportArgs.gpu_id = gpu_id;
	exportArgs.flags = obj->mflags.Value;

	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_IPC_EXPORT_HANDLE, (void *)&exportArgs);
	if (r)
		return HSAKMT_STATUS_ERROR;

	memcpy(SharedMemoryStruct->ShareHandle, exportArgs.share_handle,
			sizeof(SharedMemoryStruct->ShareHandle));
	SharedMemoryStruct->ApeInfo = ApeInfo;
	SharedMemoryStruct->SizeInPages = (HSAuint32) (SizeInBytes >> PAGE_SHIFT);
	SharedMemoryStruct->ExportGpuId = gpu_id;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS hsakmt_fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemoryHandle,
						HSAuint64 *SizeInBytes,
						void **MemoryAddress,
						uint32_t *gpu_id_array,
						uint32_t gpu_id_array_size)
{
	int r = 0;
	HSAKMT_STATUS err = HSAKMT_STATUS_ERROR;
	vm_object_t *obj = NULL;
	void *reservedMem = NULL;
	manageable_aperture_t *aperture;
	struct kfd_ioctl_ipc_import_handle_args importArgs = {0};
	struct kfd_ioctl_free_memory_of_gpu_args freeArgs = {0};
	const HsaSharedMemoryStruct *SharedMemoryStruct =
		to_const_hsa_shared_memory_struct(SharedMemoryHandle);
	HSAuint64 SizeInPages = SharedMemoryStruct->SizeInPages;
	HsaMemFlags mflags;

	if (gpu_id_array_size > 0 && !gpu_id_array)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	memcpy(importArgs.share_handle, SharedMemoryStruct->ShareHandle,
			sizeof(importArgs.share_handle));
	importArgs.gpu_id = SharedMemoryStruct->ExportGpuId;

	aperture = fmm_get_aperture(SharedMemoryStruct->ApeInfo);
	if (!aperture)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	pthread_mutex_lock(&aperture->fmm_mutex);
	reservedMem = aperture_allocate_area(aperture, NULL,
			(SizeInPages << PAGE_SHIFT));
	if (!reservedMem) {
		err = HSAKMT_STATUS_NO_MEMORY;
		goto err_free_buffer;
	}

	importArgs.va_addr = (uint64_t)reservedMem;
	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_IPC_IMPORT_HANDLE, (void *)&importArgs);
	if (r) {
		err = HSAKMT_STATUS_ERROR;
		goto err_import;
	}

	mflags.Value = importArgs.flags;
	obj = aperture_allocate_object(aperture, reservedMem, importArgs.handle,
			(SizeInPages << PAGE_SHIFT), mflags);
	if (!obj) {
		err = HSAKMT_STATUS_NO_MEMORY;
		goto err_free_mem;
	}

	if (importArgs.mmap_offset) {
		int32_t gpu_mem_id = gpu_mem_find_by_gpu_id(importArgs.gpu_id);
		void *ret;

		if (gpu_mem_id < 0) {
			vm_remove_object(aperture, obj);
			aperture_release_area(aperture, reservedMem,
					(SizeInPages << PAGE_SHIFT));
			err = HSAKMT_STATUS_ERROR;
			goto err_free_mem;
		}
		obj->node_id = gpu_mem[gpu_mem_id].node_id;
		pthread_mutex_unlock(&aperture->fmm_mutex);

		ret = fmm_map_to_cpu(reservedMem, (SizeInPages << PAGE_SHIFT),
				true, gpu_mem[gpu_mem_id].drm_render_fd,
				importArgs.mmap_offset);

		if (ret == MAP_FAILED) {
			pthread_mutex_lock(&aperture->fmm_mutex);
			vm_remove_object(aperture, obj);
			aperture_release_area(aperture, reservedMem,
					(SizeInPages << PAGE_SHIFT));
			err = HSAKMT_STATUS_ERROR;
			goto err_free_mem_handle;
		}
	} else {
		pthread_mutex_unlock(&aperture->fmm_mutex);
	}

	*MemoryAddress = reservedMem;
	*SizeInBytes = (SizeInPages << PAGE_SHIFT);

	if (gpu_id_array_size > 0) {
		obj->registered_device_id_array = gpu_id_array;
		obj->registered_device_id_array_size = gpu_id_array_size;
	}
	obj->is_imported_kfd_bo = true;

	return HSAKMT_STATUS_SUCCESS;
err_free_mem_handle:
	freeArgs.handle = importArgs.handle;
	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &freeArgs) != 0) {
		pr_err("Failed to free GPU memory for handle %llu\n", freeArgs.handle);
	}
err_free_mem:
err_free_buffer:
err_import:
	pthread_mutex_unlock(&aperture->fmm_mutex);
	return err;
}

HSAKMT_STATUS hsakmt_fmm_deregister_memory(void *address)
{
	manageable_aperture_t *aperture;
	vm_object_t *object;

	object = vm_find_object(address, 0, &aperture);
	if (!object)
		/* On APUs we assume it's a random system memory address
		 * where registration and dergistration is a no-op
		 */
		return (!hsakmt_is_dgpu || hsakmt_is_svm_api_supported) ?
			HSAKMT_STATUS_SUCCESS :
			HSAKMT_STATUS_MEMORY_NOT_REGISTERED;
	/* Successful vm_find_object returns with aperture locked */

	if (aperture == &cpuvm_aperture) {
		/* API-allocated system memory on APUs, deregistration
		 * is a no-op
		 */
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_SUCCESS;
	}

	if (object->metadata || object->userptr || object->is_imported_kfd_bo) {
		/* An object with metadata is an imported graphics
		 * buffer. Deregistering imported graphics buffers or
		 * userptrs means releasing the BO.
		 */
		pthread_mutex_unlock(&aperture->fmm_mutex);
		__fmm_release(object, aperture);
		return HSAKMT_STATUS_SUCCESS;
	}

	if (!object->registered_device_id_array ||
		object->registered_device_id_array_size <= 0) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_MEMORY_NOT_REGISTERED;
	}

	if (object->registered_device_id_array) {
		free(object->registered_device_id_array);
		object->registered_device_id_array = NULL;
		object->registered_device_id_array_size = 0;
	}
	if (object->registered_node_id_array)
		free(object->registered_node_id_array);
	object->registered_node_id_array = NULL;
	object->registration_count = 0;

	pthread_mutex_unlock(&aperture->fmm_mutex);

	return HSAKMT_STATUS_SUCCESS;
}

/*
 * This function unmaps all nodes on current mapped nodes list that are not included on nodes_to_map
 * and maps nodes_to_map
 */

HSAKMT_STATUS hsakmt_fmm_map_to_gpu_nodes(void *address, uint64_t size,
		uint32_t *nodes_to_map, uint64_t num_of_nodes,
		uint64_t *gpuvm_address)
{
	manageable_aperture_t *aperture = NULL;
	vm_object_t *object;
	uint32_t i;
	uint32_t *registered_node_id_array, registered_node_id_array_size;
	HSAKMT_STATUS ret;
	int retcode = 0;

	if (!num_of_nodes || !nodes_to_map || !address)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	object = vm_find_object(address, size, &aperture);
	if (!object && !hsakmt_is_svm_api_supported)
		return HSAKMT_STATUS_ERROR;
	/* Successful vm_find_object returns with aperture locked */

	/* allocates VA only */
	if (object && object->handles[0] == 0) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	/* allocates buffer only, should be mapped by GEM API */
	if (aperture == &mem_handle_aperture) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	/* APU memory is not supported by this function */
	if (aperture &&
	   (aperture == &cpuvm_aperture || !aperture->is_cpu_accessible)) {
		pthread_mutex_unlock(&aperture->fmm_mutex);
		return HSAKMT_STATUS_ERROR;
	}

	if ((hsakmt_is_svm_api_supported && !object) || object->userptr) {
		retcode = _fmm_map_to_gpu_userptr(address, size, gpuvm_address,
				object, nodes_to_map, num_of_nodes * sizeof(uint32_t));
		if (object)
			pthread_mutex_unlock(&aperture->fmm_mutex);
		return retcode ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
	}

	/* Verify that all nodes to map are registered already */
	registered_node_id_array = all_gpu_id_array;
	registered_node_id_array_size = all_gpu_id_array_size;
	if (object->registered_device_id_array_size > 0 &&
			object->registered_device_id_array) {
		registered_node_id_array = object->registered_device_id_array;
		registered_node_id_array_size = object->registered_device_id_array_size;
	}
	for (i = 0 ; i < num_of_nodes; i++) {
		if (!id_in_array(nodes_to_map[i], registered_node_id_array,
					registered_node_id_array_size)) {
			pthread_mutex_unlock(&aperture->fmm_mutex);
			return HSAKMT_STATUS_ERROR;
		}
	}

	/* Unmap buffer from all nodes that have this buffer mapped that are not included on nodes_to_map array */
	if (object->mapped_device_id_array_size > 0) {
		uint32_t temp_node_id_array[object->mapped_device_id_array_size];
		uint32_t temp_node_id_array_size = 0;

		for (i = 0 ; i < object->mapped_device_id_array_size / sizeof(uint32_t); i++) {
			if (!id_in_array(object->mapped_device_id_array[i],
					nodes_to_map,
					num_of_nodes*sizeof(uint32_t)))
				temp_node_id_array[temp_node_id_array_size++] =
					object->mapped_device_id_array[i];
		}
		temp_node_id_array_size *= sizeof(uint32_t);

		if (temp_node_id_array_size) {
			ret = _fmm_unmap_from_gpu(aperture, address,
					temp_node_id_array,
					temp_node_id_array_size,
					object);
			if (ret != HSAKMT_STATUS_SUCCESS) {
				pthread_mutex_unlock(&aperture->fmm_mutex);
				return ret;
			}
		}
	}

	/* Remove already mapped nodes from nodes_to_map
	 * to generate the final map list
	 */
	uint32_t map_node_id_array[num_of_nodes];
	uint32_t map_node_id_array_size = 0;

	for (i = 0; i < num_of_nodes; i++) {
		if (!id_in_array(nodes_to_map[i],
				object->mapped_device_id_array,
				object->mapped_device_id_array_size))
			map_node_id_array[map_node_id_array_size++] =
				nodes_to_map[i];
	}

	if (map_node_id_array_size)
		retcode = _fmm_map_to_gpu(aperture, address, size, object,
				map_node_id_array,
				map_node_id_array_size * sizeof(uint32_t));

	pthread_mutex_unlock(&aperture->fmm_mutex);

	if (retcode != 0)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS hsakmt_fmm_get_mem_info(const void *address, HsaPointerInfo *info)
{
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	uint32_t i;
	manageable_aperture_t *aperture;
	vm_object_t *vm_obj;

	memset(info, 0, sizeof(HsaPointerInfo));

	vm_obj = vm_find_object(address, UINT64_MAX, &aperture);
	if (!vm_obj) {
		info->Type = HSA_POINTER_UNKNOWN;
		return HSAKMT_STATUS_ERROR;
	}
	/* Successful vm_find_object returns with the aperture locked */

	if (vm_obj->is_imported_kfd_bo)
		info->Type = HSA_POINTER_REGISTERED_SHARED;
	else if (vm_obj->metadata)
		info->Type = HSA_POINTER_REGISTERED_GRAPHICS;
	else if (vm_obj->userptr)
		info->Type = HSA_POINTER_REGISTERED_USER;
	else if (vm_obj->handles[0] == 0)
		info->Type = HSA_POINTER_RESERVED_ADDR;
	else
		info->Type = HSA_POINTER_ALLOCATED;

	info->Node = vm_obj->node_id;
	info->GPUAddress = (HSAuint64)vm_obj->start;
	info->SizeInBytes = vm_obj->size;
	/* registered nodes */
	info->NRegisteredNodes =
		vm_obj->registered_device_id_array_size / sizeof(uint32_t);
	if (info->NRegisteredNodes && !vm_obj->registered_node_id_array) {
		vm_obj->registered_node_id_array = (uint32_t *)
			(uint32_t *)malloc(vm_obj->registered_device_id_array_size);
		if (!vm_obj->registered_node_id_array) {
			pthread_mutex_unlock(&aperture->fmm_mutex);
			return HSAKMT_STATUS_NO_MEMORY;
		}
		/* vm_obj->registered_node_id_array allocated here will be
		 * freed whenever the registration is changed (deregistration or
		 * register to new nodes) or the memory being freed
		 */
		for (i = 0; i < info->NRegisteredNodes; i++)
			hsakmt_gpuid_to_nodeid(vm_obj->registered_device_id_array[i],
				&vm_obj->registered_node_id_array[i]);
	}
	info->RegisteredNodes = vm_obj->registered_node_id_array;
	/* mapped nodes */
	info->NMappedNodes =
		vm_obj->mapped_device_id_array_size / sizeof(uint32_t);
	if (info->NMappedNodes && !vm_obj->mapped_node_id_array) {
		vm_obj->mapped_node_id_array =
			(uint32_t *)malloc(vm_obj->mapped_device_id_array_size);
		if (!vm_obj->mapped_node_id_array) {
			pthread_mutex_unlock(&aperture->fmm_mutex);
			return HSAKMT_STATUS_NO_MEMORY;
		}
		/* vm_obj->mapped_node_id_array allocated here will be
		 * freed whenever the mapping is changed (unmapped or map
		 * to new nodes) or memory being freed
		 */
		for (i = 0; i < info->NMappedNodes; i++)
			hsakmt_gpuid_to_nodeid(vm_obj->mapped_device_id_array[i],
				&vm_obj->mapped_node_id_array[i]);
	}
	info->MappedNodes = vm_obj->mapped_node_id_array;
	info->UserData = vm_obj->user_data;

	info->MemFlags = vm_obj->mflags;

	if (info->Type == HSA_POINTER_REGISTERED_USER) {
		info->CPUAddress = vm_obj->userptr;
		info->SizeInBytes = vm_obj->userptr_size;
		info->GPUAddress += ((HSAuint64)info->CPUAddress & (PAGE_SIZE - 1));
	} else if (info->Type == HSA_POINTER_ALLOCATED) {
		info->CPUAddress = vm_obj->start;
	}

	pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}

#ifdef SANITIZER_AMDGPU
HSAKMT_STATUS hsakmt_fmm_replace_asan_header_page(void* address)
{
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	manageable_aperture_t* aperture;
	vm_object_t* vm_obj;

	vm_obj = vm_find_object(address, UINT64_MAX, &aperture);
	if (!vm_obj)
		return HSAKMT_STATUS_ERROR;
	/* Successful vm_find_object returns with the aperture locked */

	/* If this is a GPU-mapped memory, remap the first page to be normal system memory*/
	if (vm_obj->mmap_fd) {
		void* p = mmap(address,
				PAGE_SIZE,
				PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED,
				-1,
				0);

		if (p == MAP_FAILED)
			ret = HSAKMT_STATUS_ERROR;
	}

	pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}

HSAKMT_STATUS hsakmt_fmm_return_asan_header_page(void* address)
{
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	manageable_aperture_t* aperture;
	vm_object_t* vm_obj;

	vm_obj = vm_find_object(address, UINT64_MAX, &aperture);
	if (!vm_obj)
		return HSAKMT_STATUS_ERROR;
	/* Successful vm_find_object returns with the aperture locked */

	/* If this is a GPU-mapped memory, remap the first page back to the original GPU memory*/
	if (vm_obj->mmap_fd) {
		off_t mmap_offset = vm_obj->mmap_offset + ((char*)address - (char*)vm_obj->start);
		void* p = mmap(address,
				PAGE_SIZE,
				vm_obj->mmap_flags,
				MAP_SHARED | MAP_FIXED,
				vm_obj->mmap_fd,
				mmap_offset);

		if (p == MAP_FAILED)
			ret = HSAKMT_STATUS_ERROR;
	}

	pthread_mutex_unlock(&aperture->fmm_mutex);
	return ret;
}
#endif

HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(const void *mem, void *usr_data)
{
	manageable_aperture_t *aperture;
	vm_object_t *vm_obj;

	vm_obj = vm_find_object(mem, 0, &aperture);
	if (!vm_obj)
		return HSAKMT_STATUS_ERROR;

	vm_obj->user_data = usr_data;

	pthread_mutex_unlock(&aperture->fmm_mutex);
	return HSAKMT_STATUS_SUCCESS;
}

static void fmm_clear_aperture(manageable_aperture_t *app)
{
	rbtree_node_t *n;

	pthread_mutex_init(&app->fmm_mutex, NULL);

	while ((n = rbtree_node_any(&app->tree, MID)))
		vm_remove_object(app, vm_object_entry(n, 0));

	while (app->vm_ranges) {
		void *next_range = app->vm_ranges->next;
		vm_remove_area(app, app->vm_ranges);
		app->vm_ranges = next_range;
	}
}

/* This is a special funcion that should be called only from the child process
 * after a fork(). This will clear all vm_objects and mmaps duplicated from
 * the parent.
 */
void hsakmt_fmm_clear_all_mem(void)
{
	uint32_t i;
	void *map_addr;

	/* Close render node FDs. The child process needs to open new ones */
	for (i = 0; i <= DRM_LAST_RENDER_NODE - DRM_FIRST_RENDER_NODE; i++) {

		if (amdgpu_handle[i]) {
			amdgpu_device_deinitialize(amdgpu_handle[i]);
			amdgpu_handle[i] = NULL;
		} else if (drm_render_fds[i]) {
			close(drm_render_fds[i]);
		}
		drm_render_fds[i] = 0;
	}

	fmm_clear_aperture(&mem_handle_aperture);
	fmm_clear_aperture(&cpuvm_aperture);
	fmm_clear_aperture(&svm.apertures[SVM_DEFAULT]);
	fmm_clear_aperture(&svm.apertures[SVM_COHERENT]);

	if (dgpu_shared_aperture_limit) {
		/* Use the same dgpu range as the parent. If failed, then set
		 * hsakmt_is_dgpu_mem_init to false. Later on dgpu_mem_init will try
		 * to get a new range
		 */
		map_addr = mmap(dgpu_shared_aperture_base, (HSAuint64)(dgpu_shared_aperture_limit)-
			(HSAuint64)(dgpu_shared_aperture_base) + 1, PROT_NONE,
			MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED, -1, 0);

		if (map_addr == MAP_FAILED) {
			munmap(dgpu_shared_aperture_base,
				   (HSAuint64)(dgpu_shared_aperture_limit) -
				   (HSAuint64)(dgpu_shared_aperture_base) + 1);

			dgpu_shared_aperture_base = NULL;
			dgpu_shared_aperture_limit = NULL;
		}
	}

	/* Nothing is initialized. */
	if (!gpu_mem)
		return;

	for (i = 0; i < gpu_mem_count; i++) {
		fmm_clear_aperture(&gpu_mem[i].gpuvm_aperture);
		fmm_clear_aperture(&gpu_mem[i].scratch_physical);
	}

	hsakmt_fmm_destroy_process_apertures();
}


================================================
FILE: libhsakmt/src/fmm.h
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef FMM_H_
#define FMM_H_

#include "hsakmt/hsakmttypes.h"
#include <stddef.h>

typedef enum {
	FMM_FIRST_APERTURE_TYPE = 0,
	FMM_GPUVM = FMM_FIRST_APERTURE_TYPE,
	FMM_LDS,
	FMM_SCRATCH,
	FMM_SVM,
	FMM_MMIO,
	FMM_LAST_APERTURE_TYPE
} aperture_type_e;

typedef struct {
	aperture_type_e app_type;
	uint64_t size;
	void *start_address;
} aperture_properties_t;

HSAKMT_STATUS hsakmt_fmm_get_amdgpu_device_handle(uint32_t node_id,  HsaAMDGPUDeviceHandle *DeviceHandle);
HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes);
void hsakmt_fmm_destroy_process_apertures(void);

/* Memory interface */
void *hsakmt_fmm_allocate_scratch(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes);
void *hsakmt_fmm_allocate_device(uint32_t gpu_id, uint32_t node_id, void *address,
			uint64_t MemorySizeInBytes, uint64_t alignment, HsaMemFlags flags);
void *hsakmt_fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset);
void *hsakmt_fmm_allocate_host(uint32_t gpu_id, uint32_t node_id, void *address, uint64_t MemorySizeInBytes,
			uint64_t alignment, HsaMemFlags flags);
void hsakmt_fmm_print(uint32_t node);
HSAKMT_STATUS hsakmt_fmm_release(void *address);
HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address);
int hsakmt_fmm_unmap_from_gpu(void *address);
bool hsakmt_fmm_get_handle(void *address, uint64_t *handle);
HSAKMT_STATUS hsakmt_fmm_get_mem_info(const void *address, HsaPointerInfo *info);
HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(const void *mem, void *usr_data);
#ifdef SANITIZER_AMDGPU
HSAKMT_STATUS hsakmt_fmm_replace_asan_header_page(void* address);
HSAKMT_STATUS hsakmt_fmm_return_asan_header_page(void* address);
#endif

/* Topology interface*/
HSAKMT_STATUS hsakmt_fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSAuint32 gpu_id,
		HSAuint64 *aperture_base, HSAuint64 *aperture_limit);

HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
								  uint32_t *gpu_id_array,
								  uint32_t gpu_id_array_size,
								  bool coarse_grain,
								  bool ext_coherent);
HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle,
					   HsaGraphicsResourceInfo *GraphicsResourceInfo,
					   uint32_t *gpu_id_array,
					   uint32_t gpu_id_array_size,
					   HSA_REGISTER_MEM_FLAGS RegisterFlags);
HSAKMT_STATUS hsakmt_fmm_deregister_memory(void *address);
HSAKMT_STATUS hsakmt_fmm_export_dma_buf_fd(void *MemoryAddress,
				    HSAuint64 MemorySizeInBytes,
				    int *DMABufFd,
				    HSAuint64 *Offset);
HSAKMT_STATUS hsakmt_fmm_share_memory(void *MemoryAddress,
			       HSAuint64 SizeInBytes,
			       HsaSharedMemoryHandle *SharedMemoryHandle);
HSAKMT_STATUS hsakmt_fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemoryHandle,
					 HSAuint64 *SizeInBytes,
					 void **MemoryAddress,
					 uint32_t *gpu_id_array,
					 uint32_t gpu_id_array_size);
HSAKMT_STATUS hsakmt_fmm_map_to_gpu_nodes(void *address, uint64_t size,
		uint32_t *nodes_to_map, uint64_t num_of_nodes, uint64_t *gpuvm_address);

int hsakmt_open_drm_render_device(int minor);
void *hsakmt_mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align,
			    uint64_t guard_size, void *aper_base, void *aper_limit, int fd);

extern int (*hsakmt_fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);
#endif /* FMM_H_ */


================================================
FILE: libhsakmt/src/globals.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"

// HSAKMT global data

int hsakmt_kfd_fd = -1;
int hsakmt_udmabuf_dev_fd = -1;
unsigned long hsakmt_kfd_open_count;
unsigned long hsakmt_system_properties_count;
pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER;
bool hsakmt_is_dgpu;

int hsakmt_page_size;
int hsakmt_page_shift;

/* whether to check all dGPUs in the topology support SVM API */
bool hsakmt_is_svm_api_supported;
/* zfb is mainly used during emulation */
int hsakmt_zfb_support;


================================================
FILE: libhsakmt/src/hsakmtmodel.c
================================================
/*
 * Copyright © 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt/hsakmtmodel.h"
#include "libhsakmt.h"
#include "hsakmt/hsakmttypes.h"
#include "hsakmt/hsakmtmodeliface.h"
#define _GNU_SOURCE
#define __USE_GNU
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <dlfcn.h>
#include <sys/mman.h>
#include <fcntl.h>

bool hsakmt_use_model;
char *hsakmt_model_topology;

struct model_node
{
	bool is_gpu;
	void *aperture;
	hsakmt_model_t *model;
	uint64_t doorbell_offset;
	uint64_t total_memory_size;
	uint64_t allocated_memory_size;
};

struct model_event
{
	uint32_t event_type;
	uint32_t auto_reset;
	uint64_t value;
};

struct model_mem_data
{
	uint64_t va_addr;
	uint64_t file_offset;
	uint64_t size;
	uint64_t mapped_nodes_bitmask;
	uint32_t flags;
	uint32_t node_id;
};

struct model_queue
{
	hsakmt_model_queue_t *queue;
	uint32_t node_id;
};

#define MAX_MODEL_QUEUES 128
// Use a 256GB aperture for the model.
#define MODEL_APERTURE_SIZE (1llu << 38)
static void *model_mmio_page;
static pthread_mutex_t model_ioctl_mutex = PTHREAD_MUTEX_INITIALIZER;
static unsigned model_event_limit;
static uint64_t *model_event_bitmap;
static struct model_event *model_events;
static pthread_cond_t model_event_condvar;
static void *model_library;
static const struct hsakmt_model_functions *model_functions;
static uint64_t model_memfd_size;
static uint64_t model_num_nodes;
static struct model_node *model_nodes;
static struct model_queue model_queues[MAX_MODEL_QUEUES];

HSAKMT_STATUS HSAKMTAPI hsaKmtModelEnabled(bool* enable)
{
	*enable = hsakmt_use_model;
	return HSAKMT_STATUS_SUCCESS;
}

void model_init_env_vars(void)
{
	/* Check whether to use a model instead of real hardware */
	hsakmt_model_topology = getenv("HSA_MODEL_TOPOLOGY");
	if (hsakmt_model_topology)
		hsakmt_use_model = true;
	if (hsakmt_use_model)
	{
		/* Backing memory file is used to stand in for the kfd_fd,
		 * which is needed early, so create it already.
		 *
		 * For old systems without memfd_create, or if the user prefers,
		 * we create a regular backing file. Prefer to use memfd_create
		 * by default where possible.
		 */
		int fd = -1;
		const char *fname = getenv("HSA_MODEL_MEMFILE");
		if (fname)
		{
			fprintf(stderr, "model: use memory backing file given in HSA_MODEL_MEMFILE: %s\n", fname);

			fd = open(fname, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, S_IRUSR | S_IWUSR);
			if (fd < 0)
			{
				perror("model: failed to create backing file");
				abort();
			}

			unlink(fname);
		}

		if (fd < 0)
		{
#ifdef HAVE_MEMFD_CREATE
			fd = memfd_create("hsakmt_model", MFD_CLOEXEC);
			if (fd < 0)
			{
				fprintf(stderr, "model: Failed to create memfd\n");
				abort();
			}
#else
			fprintf(stderr, "model: built without memfd support\n"
							"model: set HSA_MODEL_MEMFILE to path of a backing file\n");
			abort();
#endif
		}
		assert(hsakmt_kfd_fd < 0);
		hsakmt_kfd_fd = fd;
		pthread_condattr_t condattr;
		pthread_condattr_init(&condattr);
		pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
		pthread_cond_init(&model_event_condvar, &condattr);
		pthread_condattr_destroy(&condattr);
		const char *libname = getenv("HSA_MODEL_LIB");
		if (!libname)
		{
			fprintf(stderr, "model: HSA_MODEL_LIB environment variable must be set to FFM .so\n");
			abort();
		}
		// model_library = dlmopen(LM_ID_NEWLM, libname, RTLD_NOW);
		model_library = dlopen(libname, RTLD_NOW | RTLD_LOCAL);
		if (!model_library)
		{
			fprintf(stderr, "model: failed to load %s: %s\n", libname, dlerror());
			abort();
		}
		get_hsakmt_model_functions_t getter = dlsym(model_library, "get_hsakmt_model_functions");
		if (!getter)
		{
			fprintf(stderr, "model: Failed to get hsakmt_model_functions\n");
			abort();
		}
		model_functions = getter();
		if (model_functions->version_major != HSAKMT_MODEL_INTERFACE_VERSION_MAJOR ||
			model_functions->version_minor < HSAKMT_MODEL_INTERFACE_VERSION_MINOR)
		{
			fprintf(stderr, "model: Model has interface version %u.%u, need version %u.%u\n",
					model_functions->version_major, model_functions->version_minor,
					HSAKMT_MODEL_INTERFACE_VERSION_MAJOR, HSAKMT_MODEL_INTERFACE_VERSION_MINOR);
			abort();
		}
	}
}

static uint64_t allocate_from_memfd(uint64_t size, uint64_t align)
{
	if (!align)
		align = 4096;
	assert(POWER_OF_2(align)); /* must be power of two */
	assert(align >= 4096);
	size = (size + 4095) & ~4095;
	model_memfd_size = (model_memfd_size + align - 1) & ~(align - 1);
	uint64_t offset = model_memfd_size;
	model_memfd_size += size;
	int ret = ftruncate(hsakmt_kfd_fd, model_memfd_size);
	if (ret < 0)
	{
		fprintf(stderr, "model: ftruncate on memfd failed\n");
		abort();
	}
	return offset;
}
static uint64_t get_sysfs_mem_bank_size(unsigned node_id, unsigned mem_id)
{
	char prop_name[256];
	char path[256];
	snprintf(path, sizeof(path), "%s/nodes/%u/mem_banks/%u/properties",
			 hsakmt_model_topology, node_id, mem_id);
	FILE *f = fopen(path, "r");
	if (!f)
	{
		fprintf(stderr, "model: Failed to open %s\n", path);
		abort();
	}
	uint64_t prop_val;
	while (fscanf(f, "%s %" PRIu64 "\n", prop_name, &prop_val) == 2)
	{
		if (!strcmp(prop_name, "size_in_bytes"))
		{
			fclose(f);
			return prop_val;
		}
	}
	fprintf(stderr, "model: Missing size_in_bytes in %s\n", path);
	abort();
}

static void model_set_event(void *data, unsigned event_id)
{
	if (!event_id)
		return;

	if (event_id > model_event_limit)
	{
		fprintf(stderr, "model_set_event: event_id = %u out of bounds\n",
				event_id);
		abort();
	}

	unsigned slot = event_id - 1;

	if (!((model_event_bitmap[slot / 64] >> (slot % 64)) & 1))
	{
		fprintf(stderr, "model_set_event: event_id = %u is not allocated\n",
				event_id);
		abort();
	}

	struct model_event *event = &model_events[slot];
	if (event->event_type == HSA_EVENTTYPE_SIGNAL)
	{
		assert(model_events[slot].value <= 1);
		model_events[slot].value = 1;
	}
	else
	{
		fprintf(stderr, "model: Unimplemented event type\n");
		abort();
	}

	pthread_cond_broadcast(&model_event_condvar);
}

void model_init(void)
{
	if (!hsakmt_use_model)
		return;
	HSAKMT_STATUS result;
	HsaSystemProperties props;
	/* Read the topology to determine nodes. */
	result = hsakmt_topology_sysfs_get_system_props(&props);
	if (result != HSAKMT_STATUS_SUCCESS)
	{
		fprintf(stderr, "model: Failed to parse topology\n");
		abort();
	}
	model_nodes = calloc(props.NumNodes, sizeof(*model_nodes));
	if (!model_nodes)
		abort();
	model_num_nodes = props.NumNodes;
	for (unsigned node_id = 0; node_id < props.NumNodes; node_id++)
	{
		HsaNodeProperties node_props;
		result = hsakmt_topology_get_node_props(node_id, &node_props);
		if (result != HSAKMT_STATUS_SUCCESS)
		{
			fprintf(stderr, "model: Failed to get node %u properties\n", node_id);
			abort();
		}
		if (node_props.KFDGpuID == 0)
			continue;
		if (node_props.KFDGpuID != node_id + 1)
		{
			fprintf(stderr,
					"model: Node %u has KFD GPU ID %u, but should be %u."
					" Please change the gpu_id file.\n",
					node_id, node_props.KFDGpuID, node_id + 1);
			abort();
		}
		model_nodes[node_id].is_gpu = true;
		/* Reserve the VA space for the aperture, but don't fill it with pages. */
		model_nodes[node_id].aperture =
			mmap(NULL, MODEL_APERTURE_SIZE, PROT_NONE,
				 MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS, -1, 0);
		pr_debug("Modeling Creating Memory Aperture: %p\n", model_nodes[node_id].aperture);
		if (model_nodes[node_id].aperture == MAP_FAILED)
		{
			fprintf(stderr, "model: Failed to reserve aperture via mmap\n");
			abort();
		}
		/* Create the doorbell region */
		model_nodes[node_id].doorbell_offset = allocate_from_memfd(8192, 8192);
		for (unsigned mem_id = 0; mem_id < node_props.NumMemoryBanks; ++mem_id)
		{
			model_nodes[node_id].total_memory_size += get_sysfs_mem_bank_size(node_id, mem_id);
		}
		/* Create the model */
		// TODO: Move this into a separate thread
		model_nodes[node_id].model = model_functions->create();
		if (!model_nodes[node_id].model)
		{
			fprintf(stderr, "model: Failed to create model\n");
			abort();
		}
		model_functions->set_global_aperture(model_nodes[node_id].model,
											 model_nodes[node_id].aperture,
											 MODEL_APERTURE_SIZE);

		model_functions->set_set_event(model_nodes[node_id].model, model_set_event, NULL);
	}
}
void model_set_mmio_page(void *ptr)
{
	assert(!model_mmio_page);
	model_mmio_page = ptr;
}
void model_set_event_page(void *ptr, unsigned event_limit)
{
	// TODO: Fully understand what's happening with this page and the event limit.
	//       ROCR-Runtime allocates a pool of 4096 events, but also a handful or so
	//       of additional events, which blows through the event_limit of 4096
	//       that is passed here. And it seems that not using the page at all
	//       is supported?
	assert(!model_event_limit);
	assert(event_limit % 64 == 0);
	event_limit *= 2;
	model_event_limit = event_limit;
	model_event_bitmap = calloc(event_limit / 64, 8);
	model_events = calloc(event_limit, sizeof(*model_events));
}
/* Model implementation of KFD ioctl. */

static int model_kfd_ioctl_locked(unsigned long request, void *arg)
{
	assert(_IOC_TYPE(request) == AMDKFD_IOCTL_BASE);
	if (_IOC_NR(request) == 0x20)
	{
		// This is AMDKFD_IOC_SVM. It is defined / used in an unusual way.
		struct kfd_ioctl_svm_args *args = arg;
		if (args->op == KFD_IOCTL_SVM_OP_SET_ATTR)
		{
			// todo?
			return 0;
		}
		fprintf(stderr, "model: Unimplemented SVM op\n");
		abort();
	}
	switch (request)
	{
	case AMDKFD_IOC_GET_VERSION:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_GET_VERSION\n");
		struct kfd_ioctl_get_version_args *args = arg;
		args->major_version = 1;
		args->minor_version = 14;
		return 0;
	}
	case AMDKFD_IOC_GET_PROCESS_APERTURES_NEW:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
		struct kfd_ioctl_get_process_apertures_new_args *args = arg;
		struct kfd_process_device_apertures *apertures =
			(void *)args->kfd_process_device_apertures_ptr;
		assert(args->num_of_nodes == model_num_nodes);
		for (unsigned node_id = 0; node_id < args->num_of_nodes; ++node_id)
		{
			memset(&apertures[node_id], 0, sizeof(apertures[node_id]));
			if (!model_nodes[node_id].is_gpu)
				continue;
			apertures[node_id].gpu_id = 1 + node_id;
			apertures[node_id].gpuvm_base = 0x4000llu;
			apertures[node_id].gpuvm_limit = MODEL_APERTURE_SIZE;
			apertures[node_id].lds_base = 0x4000000000000000llu; // 0x1000000000000?
			apertures[node_id].lds_limit = 0x40000000ffffffffllu;
			apertures[node_id].scratch_base = 0x5000000000000000llu; // 0x2000000000000?
			apertures[node_id].scratch_limit = 0x50000000ffffffffllu;
		}
		return 0;
	}
	case AMDKFD_IOC_SET_XNACK_MODE:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_SET_XNACK_MODE\n");
		// Don't support XNACK
		struct kfd_ioctl_set_xnack_mode_args *args = arg;
		if (args->xnack_enabled < 0)
		{
			args->xnack_enabled = 0;
			return 0;
		}
		errno = EPERM;
		return -1;
	}
	case AMDKFD_IOC_GET_CLOCK_COUNTERS:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
		struct kfd_ioctl_get_clock_counters_args *args = arg;
		args->gpu_clock_counter = 0; // TODO
		args->cpu_clock_counter = 0;
		args->system_clock_counter = 0;
		args->system_clock_freq = 0;
		return 0;
	}
	case AMDKFD_IOC_ACQUIRE_VM:
		pr_debug("MODEL IOCTL: AMDKFD_IOC_ACQUIRE_VM\n");
		return 0;
	case AMDKFD_IOC_SET_MEMORY_POLICY:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_SET_MEMORY_POLICY\n");
		// todo?
		return 0;
	}
	case AMDKFD_IOC_AVAILABLE_MEMORY:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_AVAILABLE_MEMORY\n");
		static const uint64_t minimum_reported = 128 * 1024 * 1024;
		struct kfd_ioctl_get_available_memory_args *args = arg;
		unsigned node_id = args->gpu_id - 1;
		struct model_node *node = &model_nodes[node_id];
		assert(node_id < model_num_nodes);
		if (node->allocated_memory_size + minimum_reported >= node->total_memory_size)
			args->available = minimum_reported;
		else
			args->available = node->total_memory_size - node->allocated_memory_size;
		return 0;
	}
	case AMDKFD_IOC_ALLOC_MEMORY_OF_GPU:
	{
		// Expect an SVM style allocation: The memory is allocated on the host
		// side e.g. via mmap(), and this IOCTL "only" registers the memory
		// with the GPU. This is a no-op for us because we aren't a GPU.
		struct kfd_ioctl_alloc_memory_of_gpu_args *args = arg;
		unsigned node_id = args->gpu_id - 1;
		assert(node_id < model_num_nodes);
		assert(model_nodes[node_id].is_gpu);
		if (args->va_addr == 0)
		{
			fprintf(stderr, "model: Expect only SVM allocations?\n");
			abort();
		}
		if (args->size % PAGE_SIZE != 0)
		{
			fprintf(stderr, "model: Allocation size not a multiple of page size\n");
			abort();
		}
		if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
		{
			fprintf(stderr, "model: userptr not supported\n");
			abort();
		}
		struct model_mem_data *mem_data = calloc(1, sizeof(*mem_data));
		if (!mem_data)
			abort();
		mem_data->va_addr = args->va_addr;
		mem_data->size = args->size;
		mem_data->flags = args->flags;
		mem_data->node_id = node_id;
		if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
		{
			assert(args->size == 8192);
			mem_data->file_offset = model_nodes[node_id].doorbell_offset;
		}
		else
		{
			mem_data->file_offset = allocate_from_memfd(args->size, 0);
		}
		args->handle = (__u64)mem_data;
		args->mmap_offset = mem_data->file_offset;
		model_nodes[node_id].allocated_memory_size += args->size;
		pr_debug("MODEL IOCTL: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU: VA: %lx : Size: %lu, Flags: %x\n", mem_data->va_addr, mem_data->size, mem_data->flags);
		model_functions->alloced_memory(model_nodes[node_id].model, (uint64_t *)mem_data->va_addr, mem_data->size, mem_data->flags);
		return 0;
	}
	case AMDKFD_IOC_FREE_MEMORY_OF_GPU:
	{
		struct kfd_ioctl_free_memory_of_gpu_args *args = arg;
		struct model_mem_data *mem_data = (void *)args->handle;
		assert(!mem_data->mapped_nodes_bitmask);
		// Free the memory by punching a hole into the underlying memfd.
		//
		// Ideally, we'd also remember holes in the file and re-use them for
		// allocations to avoid the file size from growing indefinitely. It's
		// unclear whether the current implementation causes kernel data
		// structures to grow. But in practice, it almost certainly never
		// matters.
		int ret = fallocate(hsakmt_kfd_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
							mem_data->file_offset, mem_data->size);
		if (ret != 0)
		{
			perror("model: failed to punch hole in memfd");
			abort();
		}
		model_nodes[mem_data->node_id].allocated_memory_size -= mem_data->size;
		model_functions->freed_memory(model_nodes[mem_data->node_id].model, (uint64_t *)mem_data->va_addr, mem_data->size);
		pr_debug("MODEL IOCTL: AMDKFD_IOC_FREE_MEMORY_OF_GPU: VA: %lx : Size: %lu, Flags: %x\n", mem_data->va_addr, mem_data->size, mem_data->flags);
		free(mem_data);
		return 0;
	}
	case AMDKFD_IOC_MAP_MEMORY_TO_GPU:
	{
		struct kfd_ioctl_map_memory_to_gpu_args *args = arg;
		struct model_mem_data *mem_data = (void *)args->handle;
		while (args->n_success < args->n_devices)
		{
			uint32_t gpu_id = ((uint32_t *)args->device_ids_array_ptr)[args->n_success];
			uint32_t node_id = gpu_id - 1;
			assert(node_id < model_num_nodes);
			if (mem_data->mapped_nodes_bitmask & (1llu << node_id))
			{
				fprintf(stderr, "model: Already mapped\n");
				abort();
			}
			assert(model_nodes[node_id].aperture);
			unsigned prot = PROT_READ;
			if (mem_data->flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
				prot |= PROT_WRITE;
			// TODO: Mark *shader*-executable memory?

			pr_debug("MODEL IOCTL: AMDKFD_IOC_MAP_MEMORY_TO_GPU: VA: %lx : Size: %lu, Flags: %x\n", mem_data->va_addr, mem_data->size, mem_data->flags);
			void *ret = mmap(VOID_PTR_ADD(model_nodes[node_id].aperture, mem_data->va_addr),
							 mem_data->size, prot,
							 MAP_SHARED | MAP_FIXED, hsakmt_kfd_fd, mem_data->file_offset);
			if (ret == MAP_FAILED)
			{
				fprintf(stderr, "model: mmap failed\n");
				abort();
			}
			mem_data->mapped_nodes_bitmask |= (1llu << node_id);
			args->n_success++;
		}
		return 0;
	}
	case AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
		struct kfd_ioctl_unmap_memory_from_gpu_args *args = arg;
		struct model_mem_data *mem_data = (void *)args->handle;
		while (args->n_success < args->n_devices)
		{
			uint32_t gpu_id = ((uint32_t *)args->device_ids_array_ptr)[args->n_success];
			uint32_t node_id = gpu_id - 1;
			assert(node_id < model_num_nodes);
			if (!(mem_data->mapped_nodes_bitmask & (1llu << node_id)))
			{
				fprintf(stderr, "model: Not mapped\n");
				abort();
			}
			assert(model_nodes[node_id].aperture);
			/* Overwrite the mapping with an empty mapping to keep
			 * it reserved. */
			void *ret = mmap(VOID_PTR_ADD(model_nodes[node_id].aperture, mem_data->va_addr),
							 mem_data->size, PROT_NONE,
							 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_NORESERVE, -1, 0);
			if (ret == MAP_FAILED)
			{
				perror("model: unmap failed");
				abort();
			}
			mem_data->mapped_nodes_bitmask &= ~(1llu << node_id);
			args->n_success++;
		}
		args->n_success = args->n_devices;
		return 0;
	}
	case AMDKFD_IOC_CREATE_EVENT:
	{
		struct kfd_ioctl_create_event_args *args = arg;
		pr_debug("MODEL IOCTL: AMDKFD_IOC_CREATE_EVENT: %u\n", args->event_type);
		// Find a free slot
		unsigned i;
		for (i = 0; i < model_event_limit; i += 64)
		{
			uint64_t bitmap = model_event_bitmap[i / 64];
			if (bitmap == ~(uint64_t)0)
				continue;
			i += ffsll(~bitmap) - 1;
			break;
		}
		if (i >= model_event_limit)
		{
			fprintf(stderr, "model: Ran out of event slots. Should be an application error.\n");
			abort();
		}
		// Allocate the signal
		model_event_bitmap[i / 64] |= (uint64_t)1 << (i % 64);
		model_events[i].event_type = args->event_type;
		model_events[i].auto_reset = args->auto_reset;
		model_events[i].value = 0;
		args->event_trigger_data = 0xbadf001; // ???
		args->event_id = 1 + i;
		args->event_slot_index = ~0;
		return 0;
	}
	case AMDKFD_IOC_WAIT_EVENTS:
	{
		struct kfd_ioctl_wait_events_args *args = arg;
		struct kfd_event_data *events = (void *)args->events_ptr;
		pr_debug("MODEL IOCTL: AMDKFD_IOC_WAIT_EVENTS: %u\n", args->num_events);
		bool have_timeout = args->timeout != 0xffffffffu;
		bool hit_timeout = false;
		struct timespec timeout;
		if (have_timeout)
		{
			clock_gettime(CLOCK_MONOTONIC, &timeout);
			timeout.tv_sec += args->timeout / 1000;
			timeout.tv_nsec += (args->timeout % 1000) * 1000000;
			if (timeout.tv_nsec > 1000000000)
			{
				timeout.tv_nsec -= 1000000000;
				timeout.tv_sec++;
			}
		}
		for (;;)
		{
			bool final_ready = args->wait_for_all;
			for (unsigned i = 0; i < args->num_events; ++i)
			{
				unsigned slot = events[i].event_id - 1;
				struct model_event *event = &model_events[slot];
				bool this_ready = false;
				if (event->event_type == HSA_EVENTTYPE_SIGNAL)
				{
					uint64_t current_age = event->value;
					uint64_t target_age = events[i].signal_event_data.last_event_age;
					this_ready = current_age >= target_age;
				}
				else if (event->event_type == HSA_EVENTTYPE_HW_EXCEPTION ||
						 event->event_type == HSA_EVENTTYPE_NODECHANGE ||
						 event->event_type == HSA_EVENTTYPE_DEVICESTATECHANGE ||
						 event->event_type == HSA_EVENTTYPE_HW_EXCEPTION ||
						 event->event_type == HSA_EVENTTYPE_DEBUG_EVENT ||
						 event->event_type == HSA_EVENTTYPE_PROFILE_EVENT ||
						 event->event_type == HSA_EVENTTYPE_MEMORY)
				{
					// These never happen in the model
				}
				else
				{
					fprintf(stderr, "model: Unimplemented event type\n");
					abort();
				}
				if (final_ready != this_ready)
				{
					final_ready = this_ready;
					break;
				}
			}
			if (final_ready)
				break;
			if (have_timeout)
			{
				int ret = pthread_cond_timedwait(
					&model_event_condvar, &model_ioctl_mutex, &timeout);
				if (ret == ETIMEDOUT)
				{
					hit_timeout = true;
					break;
				}
			}
			else
			{
				pthread_cond_wait(&model_event_condvar, &model_ioctl_mutex);
			}
		}
		/* Record most recent event ages and perform auto reset. */
		for (unsigned i = 0; i < args->num_events; ++i)
		{
			unsigned slot = events[i].event_id - 1;
			struct model_event *event = &model_events[slot];
			if (event->event_type == HSA_EVENTTYPE_SIGNAL)
			{
				uint64_t last_age = event->value;
				if (event->auto_reset && last_age >= events[i].signal_event_data.last_event_age)
					event->value = 0;
				events[i].signal_event_data.last_event_age = last_age;
			}
		}
		args->wait_result = hit_timeout ? KFD_IOC_WAIT_RESULT_TIMEOUT
										: KFD_IOC_WAIT_RESULT_COMPLETE;
		return 0;
	}
	case AMDKFD_IOC_SET_EVENT:
	{
		struct kfd_ioctl_set_event_args *args = arg;
		model_set_event(NULL, args->event_id);
		return 0;
	}
	case AMDKFD_IOC_RESET_EVENT:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_RESET_EVENT\n");
		struct kfd_ioctl_reset_event_args *args = arg;
		unsigned slot = args->event_id - 1;
		struct model_event *event = &model_events[slot];
		if (event->event_type == HSA_EVENTTYPE_SIGNAL)
		{
			model_events[slot].value = 0;
		}
		else
		{
			fprintf(stderr, "model: Unimplemented event type\n");
			abort();
		}
		return 0;
	}
	case AMDKFD_IOC_DESTROY_EVENT:
	{
		struct kfd_ioctl_destroy_event_args *args = arg;
		unsigned i = args->event_id - 1;
		if (i >= model_event_limit || !(model_event_bitmap[i / 64] & ((uint64_t)1 << (i % 64))))
		{
			fprintf(stderr, "model: trying to destroy an event that doesn't exist.\n");
			abort();
		}
		memset(&model_events[i], 0, sizeof(model_events[i]));
		model_event_bitmap[i / 64] &= ~((uint64_t)1 << (i % 64));
		return 0;
	}
	case AMDKFD_IOC_CREATE_QUEUE:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_CREATE_QUEUE\n");
		struct kfd_ioctl_create_queue_args *args = arg;
		unsigned node_id = args->gpu_id - 1;
		assert(node_id < model_num_nodes);
		assert(model_nodes[node_id].model);
		const bool supported_queue_type = args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL ||
										  args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA;
		if (!supported_queue_type)
		{
			fprintf(stderr, "model: Unsupported queue type\n");
			abort();
		}
		unsigned queue_id = 0;
		while (queue_id < MAX_MODEL_QUEUES && model_queues[queue_id].queue)
			queue_id++;
		if (queue_id >= MAX_MODEL_QUEUES)
		{
			fprintf(stderr, "model: too many queues\n");
			abort();
		}
		struct hsakmt_model_queue_info info = {0};
		info.ring_base_address = args->ring_base_address;
		info.ring_size = args->ring_size;
		info.write_pointer_address = args->write_pointer_address;
		info.read_pointer_address = args->read_pointer_address;
		info.queue_type = args->queue_type;
		model_queues[queue_id].queue =
			model_functions->register_queue(model_nodes[node_id].model, &info);
		model_queues[queue_id].node_id = node_id;
		args->queue_id = queue_id;
		// Note that strictly speaking, this is the offset into the hsakmt_kfd_fd
		// file, not the DRM fd (but they are the same in our case).
		args->doorbell_offset = model_nodes[node_id].doorbell_offset + 8 * queue_id;
		return 0;
	}
	case AMDKFD_IOC_DESTROY_QUEUE:
	{
		struct kfd_ioctl_destroy_queue_args *args = arg;
		if (args->queue_id >= MAX_MODEL_QUEUES || !model_queues[args->queue_id].queue)
		{
			fprintf(stderr, "model: trying to destroy a queue that doesn't exist\n");
			abort();
		}
		struct model_queue *queue = &model_queues[args->queue_id];
		// Older model versions simply leak the queue.
		if (model_functions->version_minor >= 3)
			model_functions->destroy_queue(model_nodes[queue->node_id].model, queue->queue);
		queue->queue = NULL;
		return 0;
	}
	case AMDKFD_IOC_GET_TILE_CONFIG:
	{
		pr_debug("MODEL IOCTL: AMDKFD_IOC_GET_TILE_CONFIG\n");
		struct kfd_ioctl_get_tile_config_args *args = arg;
		args->gb_addr_config = 0x10000444;
		return 0;
	}
	case AMDKFD_IOC_SET_SCRATCH_BACKING_VA:
		pr_debug("MODEL IOCTL: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
		// no-op -- scratch allocations are communicated via amd_queue_s
		return 0;
	case AMDKFD_IOC_RUNTIME_ENABLE:
		pr_debug("MODEL IOCTL: AMDKFD_IOC_RUNTIME_ENABLE\n");
		fprintf(stderr, "model: Debugger runtime not implemented\n");
		fprintf(stderr, "Fix this by clearing bit 30 of the 'capability' field in $HSA_MODEL_TOPOLOGY/%%d/properties\n");
		abort();
	default:
		fprintf(stderr, "model: Unimplemented KFD ioctl\n");
		abort();
	}
}
int model_kfd_ioctl(unsigned long request, void *arg)
{
	/* Use a very simle locking strategy for correctness. IOCTLs should
	 * be rare anyway and not contended considering the cost of running
	 * the model itself.
	 *
	 * The bulk of model execution happens in a separate thread *without*
	 * holding the IOCTL mutex. */
	pthread_mutex_lock(&model_ioctl_mutex);
	int ret = model_kfd_ioctl_locked(request, arg);
	pthread_mutex_unlock(&model_ioctl_mutex);
	return ret;
}

================================================
FILE: libhsakmt/src/libhsakmt.c
================================================
#include <stdio.h>
#include <errno.h>
#include <sys/ioctl.h>

#include "libhsakmt.h"
#include "hsakmt/hsakmtmodel.h"

/* Call ioctl, restarting if it is interrupted */
int hsakmt_ioctl(int fd, unsigned long request, void *arg)
{
	if (hsakmt_use_model)
		return model_kfd_ioctl(request, arg);

	int ret;

	do {
		ret = ioctl(fd, request, arg);
	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));

	if (ret == -1 && errno == EBADF) {
		/* In case pthread_atfork didn't catch it, this will
		 * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
		 */
		pr_err("KFD file descriptor not valid in this process\n");
		hsakmt_is_forked_child();
	}

	return ret;
}


================================================
FILE: libhsakmt/src/libhsakmt.h
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef LIBHSAKMT_H_INCLUDED
#define LIBHSAKMT_H_INCLUDED

#include "hsakmt/linux/kfd_ioctl.h"
#include "hsakmt/hsakmt.h"
#include <pthread.h>
#include <stdint.h>
#include <limits.h>

extern int hsakmt_kfd_fd;
extern int hsakmt_udmabuf_dev_fd;
extern unsigned long hsakmt_kfd_open_count;
extern bool hsakmt_forked;
extern pthread_mutex_t hsakmt_mutex;
extern bool hsakmt_is_dgpu;
extern bool hsakmt_is_svm_api_supported;
extern int hsakmt_zfb_support;

extern HsaVersionInfo hsakmt_kfd_version_info;

#undef HSAKMTAPI
#define HSAKMTAPI __attribute__((visibility ("default")))

#if defined(__clang__)
#if __has_feature(address_sanitizer)
#define SANITIZER_AMDGPU 1
#endif
#endif

/*Avoid pointer-to-int-cast warning*/
#define PORT_VPTR_TO_UINT64(vptr) ((uint64_t)(unsigned long)(vptr))

/*Avoid int-to-pointer-cast warning*/
#define PORT_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v))

#define CHECK_KFD_OPEN() \
	do { if (hsakmt_kfd_open_count == 0 || hsakmt_forked) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0)

#define CHECK_KFD_MINOR_VERSION(minor)					\
	do { if ((minor) > hsakmt_kfd_version_info.KernelInterfaceMinorVersion)\
		return HSAKMT_STATUS_NOT_SUPPORTED; } while (0)

extern int hsakmt_page_size;
extern int hsakmt_page_shift;

/* Might be defined in limits.h on platforms where it is constant (used by musl) */
/* See also: https://pubs.opengroup.org/onlinepubs/7908799/xsh/limits.h.html */
#ifndef PAGE_SIZE
#define PAGE_SIZE hsakmt_page_size
#endif
#ifndef PAGE_SHIFT
#define PAGE_SHIFT hsakmt_page_shift
#endif

/* VI HW bug requires this virtual address alignment */
#define TONGA_PAGE_SIZE 0x8000

/* 64KB BigK fragment size for TLB efficiency */
#define GPU_BIGK_PAGE_SIZE (1 << 16)

/* 2MB huge page size for 4-level page tables on Vega10 and later GPUs */
#define GPU_HUGE_PAGE_SIZE (2 << 20)

#define CHECK_PAGE_MULTIPLE(x) \
	do { if ((uint64_t)PORT_VPTR_TO_UINT64(x) % PAGE_SIZE) return HSAKMT_STATUS_INVALID_PARAMETER; } while(0)

#define ALIGN_UP(x,align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1))
#define ALIGN_UP_32(x,align) (((uint32_t)(x) + (align) - 1) & ~(uint32_t)((align)-1))
#define PAGE_ALIGN_UP(x) ALIGN_UP(x,PAGE_SIZE)
#define BITMASK(n) ((n) ? (UINT64_MAX >> (sizeof(UINT64_MAX) * CHAR_BIT - (n))) : 0)
#define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))

/* HSA Thunk logging usage */
extern int hsakmt_debug_level;
#define hsakmt_print(level, fmt, ...) \
	do { if (level <= hsakmt_debug_level) fprintf(stderr, fmt, ##__VA_ARGS__); } while (0)
#define HSAKMT_DEBUG_LEVEL_DEFAULT	-1
#define HSAKMT_DEBUG_LEVEL_ERR		3
#define HSAKMT_DEBUG_LEVEL_WARNING	4
#define HSAKMT_DEBUG_LEVEL_INFO		6
#define HSAKMT_DEBUG_LEVEL_DEBUG	7
#define pr_err(fmt, ...) \
	hsakmt_print(HSAKMT_DEBUG_LEVEL_ERR, fmt, ##__VA_ARGS__)
#define pr_warn(fmt, ...) \
	hsakmt_print(HSAKMT_DEBUG_LEVEL_WARNING, fmt, ##__VA_ARGS__)
#define pr_info(fmt, ...) \
	hsakmt_print(HSAKMT_DEBUG_LEVEL_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) \
	hsakmt_print(HSAKMT_DEBUG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)
#define pr_err_once(fmt, ...)                   \
({                                              \
        static bool __print_once;               \
        if (!__print_once) {                    \
                __print_once = true;            \
                pr_err(fmt, ##__VA_ARGS__);     \
        }                                       \
})
#define pr_warn_once(fmt, ...)                  \
({                                              \
        static bool __print_once;               \
        if (!__print_once) {                    \
                __print_once = true;            \
                pr_warn(fmt, ##__VA_ARGS__);    \
        }                                       \
})

/* Expects gfxv (full) in decimal */
#define HSA_GET_GFX_VERSION_MAJOR(gfxv)   (((gfxv) / 10000) % 100)
#define HSA_GET_GFX_VERSION_MINOR(gfxv)   (((gfxv) / 100) % 100)
#define HSA_GET_GFX_VERSION_STEP(gfxv)    ((gfxv) % 100)

/* Expects HSA_ENGINE_ID.ui32, returns gfxv (full) in hex */
#define HSA_GET_GFX_VERSION_FULL(ui32) \
	(((ui32.Major) << 16) | ((ui32.Minor) << 8) | (ui32.Stepping))

enum full_gfx_versions {
	GFX_VERSION_KAVERI		= 0x070000,
	GFX_VERSION_HAWAII		= 0x070001,
	GFX_VERSION_CARRIZO		= 0x080001,
	GFX_VERSION_TONGA		= 0x080002,
	GFX_VERSION_FIJI		= 0x080003,
	GFX_VERSION_POLARIS10		= 0x080003,
	GFX_VERSION_POLARIS11		= 0x080003,
	GFX_VERSION_POLARIS12		= 0x080003,
	GFX_VERSION_VEGAM		= 0x080003,
	GFX_VERSION_VEGA10		= 0x090000,
	GFX_VERSION_RAVEN		= 0x090002,
	GFX_VERSION_VEGA12		= 0x090004,
	GFX_VERSION_VEGA20		= 0x090006,
	GFX_VERSION_ARCTURUS		= 0x090008,
	GFX_VERSION_ALDEBARAN		= 0x09000A,
	GFX_VERSION_AQUA_VANJARAM	= 0x090400,
	GFX_VERSION_GFX950		= 0x090500,
	GFX_VERSION_RENOIR		= 0x09000C,
	GFX_VERSION_NAVI10		= 0x0A0100,
	GFX_VERSION_NAVI12		= 0x0A0101,
	GFX_VERSION_NAVI14		= 0x0A0102,
	GFX_VERSION_CYAN_SKILLFISH	= 0x0A0103,
	GFX_VERSION_SIENNA_CICHLID	= 0x0A0300,
	GFX_VERSION_NAVY_FLOUNDER	= 0x0A0301,
	GFX_VERSION_DIMGREY_CAVEFISH	= 0x0A0302,
	GFX_VERSION_VANGOGH	 	= 0x0A0303,
	GFX_VERSION_BEIGE_GOBY	 	= 0x0A0304,
	GFX_VERSION_YELLOW_CARP	 	= 0x0A0305,
	GFX_VERSION_PLUM_BONITO		= 0x0B0000,
	GFX_VERSION_WHEAT_NAS		= 0x0B0001,
	GFX_VERSION_GFX1200		= 0x0C0000,
	GFX_VERSION_GFX1201		= 0x0C0001,
};

struct hsa_gfxip_table {
	uint16_t device_id;		// Device ID
	unsigned char major;		// GFXIP Major engine version
	unsigned char minor;		// GFXIP Minor engine version
	unsigned char stepping;		// GFXIP Stepping info
	const char *amd_name;		// CALName of the device
};

HSAKMT_STATUS hsakmt_init_kfd_version(void);

#define IS_SOC15(gfxv) ((gfxv) >= GFX_VERSION_VEGA10)

HSAKMT_STATUS hsakmt_validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
uint32_t hsakmt_get_gfxv_by_node_id(HSAuint32 node_id);
bool hsakmt_prefer_ats(HSAuint32 node_id);
uint16_t hsakmt_get_device_id_by_node_id(HSAuint32 node_id);
uint16_t hsakmt_get_device_id_by_gpu_id(HSAuint32 gpu_id);
uint32_t hsakmt_get_direct_link_cpu(uint32_t gpu_node);
int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id);
HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
		uint32_t NumberOfNodes, uint32_t *NodeArray);

HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props);
HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
				      HsaNodeProperties *NodeProperties);
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
					HSAuint32 NumIoLinks,
					HsaIoLinkProperties *IoLinkProperties);
void hsakmt_topology_setup_is_dgpu_param(HsaNodeProperties *props);
bool hsakmt_topology_is_svm_needed(HSA_ENGINE_ID EngineId);

HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags);

void* hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align,
				       uint32_t gpu_id,
				       uint32_t NodeId, bool NonPaged,
				       bool DeviceLocal, bool Uncached);
void hsakmt_free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align);
HSAKMT_STATUS hsakmt_init_process_doorbells(unsigned int NumNodes);
void hsakmt_destroy_process_doorbells(void);
HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes);
void hsakmt_destroy_device_debugging_memory(void);
bool hsakmt_debug_get_reg_status(uint32_t node_id);
HSAKMT_STATUS hsakmt_init_counter_props(unsigned int NumNodes);
void hsakmt_destroy_counter_props(void);
uint32_t *hsakmt_convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues);

extern int hsakmt_ioctl(int fd, unsigned long request, void *arg);

/* Void pointer arithmetic (or remove -Wpointer-arith to allow void pointers arithmetic) */
#define VOID_PTR_ADD32(ptr,n) (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
#define VOID_PTR_ADD(ptr,n) (void*)((uint8_t*)(ptr) + n)/*ptr + offset*/
#define VOID_PTR_SUB(ptr,n) (void*)((uint8_t*)(ptr) - n)/*ptr - offset*/
#define VOID_PTRS_SUB(ptr1,ptr2) (uint64_t)((uint8_t*)(ptr1) - (uint8_t*)(ptr2)) /*ptr1 - ptr2*/

#define MIN(a, b) ({				\
	typeof(a) tmp1 = (a), tmp2 = (b);	\
	tmp1 < tmp2 ? tmp1 : tmp2; })

#define MAX(a, b) ({				\
	typeof(a) tmp1 = (a), tmp2 = (b);	\
	tmp1 > tmp2 ? tmp1 : tmp2; })

#define POWER_OF_2(x) ((x && (!(x & (x - 1)))) ? 1 : 0)

void hsakmt_clear_events_page(void);
void hsakmt_fmm_clear_all_mem(void);
void hsakmt_clear_process_doorbells(void);
uint32_t hsakmt_get_num_sysfs_nodes(void);

bool hsakmt_is_forked_child(void);

/* Calculate VGPR and SGPR register file size per CU */
uint32_t hsakmt_get_vgpr_size_per_cu(uint32_t gfxv);
#define SGPR_SIZE_PER_CU 0x4000
#endif


================================================
FILE: libhsakmt/src/libhsakmt.ver
================================================
HSAKMT_1
{
global:
hsaKmtOpenKFD;
hsaKmtCloseKFD;
hsaKmtGetVersion;
hsaKmtAcquireSystemProperties;
hsaKmtReleaseSystemProperties;
hsaKmtGetNodeProperties;
hsaKmtGetNodeMemoryProperties;
hsaKmtGetNodeCacheProperties;
hsaKmtGetNodeIoLinkProperties;
hsaKmtCreateEvent;
hsaKmtDestroyEvent;
hsaKmtSetEvent;
hsaKmtResetEvent;
hsaKmtQueryEventState;
hsaKmtWaitOnEvent;
hsaKmtWaitOnMultipleEvents;
hsaKmtCreateQueue;
hsaKmtUpdateQueue;
hsaKmtDestroyQueue;
hsaKmtSetQueueCUMask;
hsaKmtSetMemoryPolicy;
hsaKmtAllocMemory;
hsaKmtAllocMemoryAlign;
hsaKmtFreeMemory;
hsaKmtAvailableMemory;
hsaKmtRegisterMemory;
hsaKmtRegisterMemoryToNodes;
hsaKmtRegisterMemoryWithFlags;
hsaKmtRegisterGraphicsHandleToNodes;
hsaKmtShareMemory;
hsaKmtRegisterSharedHandle;
hsaKmtRegisterSharedHandleToNodes;
hsaKmtProcessVMRead;
hsaKmtProcessVMWrite;
hsaKmtDeregisterMemory;
hsaKmtMapMemoryToGPU;
hsaKmtMapMemoryToGPUNodes;
hsaKmtUnmapMemoryToGPU;
hsaKmtDbgRegister;
hsaKmtDbgUnregister;
hsaKmtDbgWavefrontControl;
hsaKmtDbgAddressWatch;
hsaKmtDbgEnable;
hsaKmtDbgDisable;
hsaKmtDbgGetDeviceData;
hsaKmtDbgGetQueueData;
hsaKmtGetClockCounters;
hsaKmtPmcGetCounterProperties;
hsaKmtPmcRegisterTrace;
hsaKmtPmcUnregisterTrace;
hsaKmtPmcAcquireTraceAccess;
hsaKmtPmcReleaseTraceAccess;
hsaKmtPmcStartTrace;
hsaKmtPmcQueryTrace;
hsaKmtPmcStopTrace;
hsaKmtMapGraphicHandle;
hsaKmtUnmapGraphicHandle;
hsaKmtSetTrapHandler;
hsaKmtGetTileConfig;
hsaKmtQueryPointerInfo;
hsaKmtSetMemoryUserData;
hsaKmtGetQueueInfo;
hsaKmtAllocQueueGWS;
hsaKmtRuntimeEnable;
hsaKmtRuntimeDisable;
hsaKmtCheckRuntimeDebugSupport;
hsaKmtGetRuntimeCapabilities;
hsaKmtDebugTrapIoctl;
hsaKmtSPMAcquire;
hsaKmtSPMRelease;
hsaKmtSPMSetDestBuffer;
hsaKmtSVMSetAttr;
hsaKmtSVMGetAttr;
hsaKmtSetXNACKMode;
hsaKmtGetXNACKMode;
hsaKmtOpenSMI;
hsaKmtExportDMABufHandle;
hsaKmtWaitOnEvent_Ext;
hsaKmtWaitOnMultipleEvents_Ext;
hsaKmtReplaceAsanHeaderPage;
hsaKmtReturnAsanHeaderPage;
hsaKmtGetAMDGPUDeviceHandle;
hsaKmtPcSamplingQueryCapabilities;
hsaKmtPcSamplingCreate;
hsaKmtPcSamplingDestroy;
hsaKmtPcSamplingStart;
hsaKmtPcSamplingStop;
hsaKmtPcSamplingSupport;
local: *;
};


================================================
FILE: libhsakmt/src/memory.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include "fmm.h"

HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
					      HSAuint32 DefaultPolicy,
					      HSAuint32 AlternatePolicy,
					      void *MemoryAddressAlternate,
					      HSAuint64 MemorySizeInBytes)
{
	struct kfd_ioctl_set_memory_policy_args args = {0};
	HSAKMT_STATUS result;
	uint32_t gpu_id;

	CHECK_KFD_OPEN();

	pr_debug("[%s] node %d; default %d; alternate %d\n",
		__func__, Node, DefaultPolicy, AlternatePolicy);

	result = hsakmt_validate_nodeid(Node, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	if (hsakmt_get_gfxv_by_node_id(Node) != GFX_VERSION_KAVERI)
		/* This is a legacy API useful on Kaveri only. On dGPU
		 * the alternate aperture is setup and used
		 * automatically for coherent allocations. Don't let
		 * app override it.
		 */
		return HSAKMT_STATUS_NOT_IMPLEMENTED;

	/*
	 * We accept any legal policy and alternate address location.
	 * You get CC everywhere anyway.
	 */
	if ((DefaultPolicy != HSA_CACHING_CACHED &&
		DefaultPolicy != HSA_CACHING_NONCACHED) ||
			(AlternatePolicy != HSA_CACHING_CACHED &&
			AlternatePolicy != HSA_CACHING_NONCACHED))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	CHECK_PAGE_MULTIPLE(MemoryAddressAlternate);
	CHECK_PAGE_MULTIPLE(MemorySizeInBytes);

	args.gpu_id = gpu_id;
	args.default_policy = (DefaultPolicy == HSA_CACHING_CACHED) ?
					KFD_IOC_CACHE_POLICY_COHERENT :
					KFD_IOC_CACHE_POLICY_NONCOHERENT;

	args.alternate_policy = (AlternatePolicy == HSA_CACHING_CACHED) ?
					KFD_IOC_CACHE_POLICY_COHERENT :
					KFD_IOC_CACHE_POLICY_NONCOHERENT;

	args.alternate_aperture_base = (uintptr_t) MemoryAddressAlternate;
	args.alternate_aperture_size = MemorySizeInBytes;

	int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);

	return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
}

HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags)
{
	switch (pageSizeFlags) {
	case HSA_PAGE_SIZE_4KB: return 4*1024;
	case HSA_PAGE_SIZE_64KB: return 64*1024;
	case HSA_PAGE_SIZE_2MB: return 2*1024*1024;
	case HSA_PAGE_SIZE_1GB: return 1024*1024*1024;
	default:
		assert(false);
		return 4*1024;
	}
}

HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode,
					  HSAuint64 SizeInBytes,
					  HsaMemFlags MemFlags,
					  void **MemoryAddress)
{
	return hsaKmtAllocMemoryAlign(PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
					  HSAuint64 SizeInBytes,
					  HSAuint64 Alignment,
					  HsaMemFlags MemFlags,
					  void **MemoryAddress)
{
	HSAKMT_STATUS result;
	uint32_t gpu_id;
	HSAuint64 page_size;

	CHECK_KFD_OPEN();

	if (MemFlags.ui32.Contiguous)
		CHECK_KFD_MINOR_VERSION(16);

	pr_debug("[%s] node %d\n", __func__, PreferredNode);

	result = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS) {
		pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
		return result;
	}

	page_size = hsakmt_PageSizeFromFlags(MemFlags.ui32.PageSize);

	if (Alignment && (Alignment < page_size || !POWER_OF_2(Alignment)))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (!MemoryAddress || !SizeInBytes || (SizeInBytes & (page_size-1)))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (MemFlags.ui32.FixedAddress) {
		if (*MemoryAddress == NULL)
			return HSAKMT_STATUS_INVALID_PARAMETER;
	} else
		*MemoryAddress = NULL;

	if ((MemFlags.ui32.CoarseGrain && MemFlags.ui32.ExtendedCoherent) ||
	    (MemFlags.ui32.ExtendedCoherent && MemFlags.ui32.Uncached))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (MemFlags.ui32.Scratch) {
		if (Alignment) {
			// Scratch memory currently forced to SCRATCH_ALIGN
			pr_err("[%s] Alignment not supported for scratch memory: %d\n", __func__, PreferredNode);
			return HSAKMT_STATUS_NOT_IMPLEMENTED;
		}

		*MemoryAddress = hsakmt_fmm_allocate_scratch(gpu_id, *MemoryAddress, SizeInBytes);

		if (!(*MemoryAddress)) {
			pr_err("[%s] failed to allocate %lu bytes from scratch\n",
				__func__, SizeInBytes);
			return HSAKMT_STATUS_NO_MEMORY;
		}

		pr_debug("[%s] node %d address %p size %lu from scratch\n", __func__, PreferredNode, *MemoryAddress, SizeInBytes);
		return HSAKMT_STATUS_SUCCESS;
	}

	/* GPU allocated system memory */
	if (!gpu_id || !MemFlags.ui32.NonPaged || hsakmt_zfb_support || MemFlags.ui32.GTTAccess
		|| MemFlags.ui32.OnlyAddress) {
		/* Backwards compatibility hack: Allocate system memory if app
		 * asks for paged memory from a GPU node.
		 */

		/* If allocate VRAM under ZFB mode */
		if (hsakmt_zfb_support && gpu_id && MemFlags.ui32.NonPaged == 1)
			MemFlags.ui32.CoarseGrain = 1;

		*MemoryAddress = hsakmt_fmm_allocate_host(gpu_id, MemFlags.ui32.GTTAccess ? 0 : PreferredNode,
						   *MemoryAddress, SizeInBytes, Alignment, MemFlags);

		if (!(*MemoryAddress)) {
			pr_err("[%s] failed to allocate %lu bytes from host\n",
				__func__, SizeInBytes);
			return HSAKMT_STATUS_ERROR;
		}

		pr_debug("[%s] node %d address %p size %lu from host\n", __func__, PreferredNode, *MemoryAddress, SizeInBytes);
		return HSAKMT_STATUS_SUCCESS;
	}

	/* GPU allocated VRAM */
	/* sanity check cannot do OnlyAddress and NoAddress alloc at same time */
	if (MemFlags.ui32.OnlyAddress && MemFlags.ui32.NoAddress) {
		pr_err("[%s] allocate addr-only and memory-only at same time\n",
			__func__);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	*MemoryAddress = hsakmt_fmm_allocate_device(gpu_id, PreferredNode, *MemoryAddress,
					     SizeInBytes, Alignment, MemFlags);

	if (!(*MemoryAddress)) {
		pr_err("[%s] failed to allocate %lu bytes from device\n",
			__func__, SizeInBytes);
		return HSAKMT_STATUS_NO_MEMORY;
	}

	pr_debug("[%s] node %d address %p size %lu from device\n", __func__, PreferredNode, *MemoryAddress, SizeInBytes);
	return HSAKMT_STATUS_SUCCESS;

}

HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
					 HSAuint64 SizeInBytes)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] address %p\n", __func__, MemoryAddress);

	if (!MemoryAddress) {
		pr_err("FIXME: freeing NULL pointer\n");
		return HSAKMT_STATUS_ERROR;
	}

	return hsakmt_fmm_release(MemoryAddress);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
					      HSAuint64 *AvailableBytes)
{
	struct kfd_ioctl_get_available_memory_args args = {};
	HSAKMT_STATUS result;

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(9);

	pr_debug("[%s] node %d\n", __func__, Node);

	result = hsakmt_validate_nodeid(Node, &args.gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS) {
		pr_err("[%s] invalid node ID: %d\n", __func__, Node);
		return result;
	}

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_AVAILABLE_MEMORY, &args))
		return HSAKMT_STATUS_ERROR;

	*AvailableBytes = args.available;
	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
					     HSAuint64 MemorySizeInBytes)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] address %p size %lu\n", __func__, MemoryAddress, MemorySizeInBytes);

	if (!hsakmt_is_dgpu)
		/* TODO: support mixed APU and dGPU configurations */
		return HSAKMT_STATUS_SUCCESS;

	return hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
				   NULL, 0, true, false);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
						    HSAuint64 MemorySizeInBytes,
						    HSAuint64 NumberOfNodes,
						    HSAuint32 *NodeArray)
{
	CHECK_KFD_OPEN();
	uint32_t *gpu_id_array;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	pr_debug("[%s] address %p size %lu number of nodes %lu\n",
		__func__, MemoryAddress, MemorySizeInBytes, NumberOfNodes);

	if (!hsakmt_is_dgpu)
		/* TODO: support mixed APU and dGPU configurations */
		return HSAKMT_STATUS_NOT_SUPPORTED;

	ret = hsakmt_validate_nodeid_array(&gpu_id_array,
			NumberOfNodes, NodeArray);

	if (ret == HSAKMT_STATUS_SUCCESS) {
		ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
					  gpu_id_array,
					  NumberOfNodes*sizeof(uint32_t),
					  true, false);
		if (ret != HSAKMT_STATUS_SUCCESS)
			free(gpu_id_array);
	}

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
						    HSAuint64 MemorySizeInBytes,
						    HsaMemFlags MemFlags)
{
	CHECK_KFD_OPEN();
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	pr_debug("[%s] address %p size %lu\n",
		__func__, MemoryAddress, MemorySizeInBytes);

	if (MemFlags.ui32.ExtendedCoherent && MemFlags.ui32.CoarseGrain)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	// Registered memory should be ordinary paged host memory.
	if ((MemFlags.ui32.HostAccess != 1) || (MemFlags.ui32.NonPaged == 1))
		return HSAKMT_STATUS_NOT_SUPPORTED;

	if (!hsakmt_is_dgpu)
		/* TODO: support mixed APU and dGPU configurations */
		return HSAKMT_STATUS_NOT_SUPPORTED;

	ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
		NULL, 0, MemFlags.ui32.CoarseGrain, MemFlags.ui32.ExtendedCoherent);

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle,
							    HsaGraphicsResourceInfo *GraphicsResourceInfo,
							    HSAuint64 NumberOfNodes,
							    HSAuint32 *NodeArray)
{
       HSA_REGISTER_MEM_FLAGS regFlags;
       regFlags.Value = 0;
        
       return hsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle,
						     GraphicsResourceInfo,
						     NumberOfNodes,
						     NodeArray,
						     regFlags);

}

HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 GraphicsResourceHandle,
							       HsaGraphicsResourceInfo *GraphicsResourceInfo,
							       HSAuint64 NumberOfNodes,
							       HSAuint32 *NodeArray,
							       HSA_REGISTER_MEM_FLAGS RegisterFlags)
{
	CHECK_KFD_OPEN();
	uint32_t *gpu_id_array = NULL;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	pr_debug("[%s] number of nodes %lu\n", __func__, NumberOfNodes);

	if (NodeArray != NULL || NumberOfNodes != 0) {
		ret = hsakmt_validate_nodeid_array(&gpu_id_array,
				NumberOfNodes, NodeArray);
	}

	if (ret == HSAKMT_STATUS_SUCCESS) {
		ret = hsakmt_fmm_register_graphics_handle(
			GraphicsResourceHandle, GraphicsResourceInfo,
			gpu_id_array, NumberOfNodes * sizeof(uint32_t), RegisterFlags);
		if (ret != HSAKMT_STATUS_SUCCESS)
			free(gpu_id_array);
	}

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
						 HSAuint64 MemorySizeInBytes,
						 int *DMABufFd,
						 HSAuint64 *Offset)
{
	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(12);

	pr_debug("[%s] address %p\n", __func__, MemoryAddress);

	return hsakmt_fmm_export_dma_buf_fd(MemoryAddress, MemorySizeInBytes,
				     DMABufFd, Offset);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
					  HSAuint64 SizeInBytes,
					  HsaSharedMemoryHandle *SharedMemoryHandle)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] address %p\n", __func__, MemoryAddress);

	if (!SharedMemoryHandle)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	return hsakmt_fmm_share_memory(MemoryAddress, SizeInBytes, SharedMemoryHandle);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle *SharedMemoryHandle,
						   void **MemoryAddress,
						   HSAuint64 *SizeInBytes)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] handle %p\n", __func__, SharedMemoryHandle);

	return hsaKmtRegisterSharedHandleToNodes(SharedMemoryHandle,
						 MemoryAddress,
						 SizeInBytes,
						 0,
						 NULL);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryHandle *SharedMemoryHandle,
							  void **MemoryAddress,
							  HSAuint64 *SizeInBytes,
							  HSAuint64 NumberOfNodes,
							  HSAuint32 *NodeArray)
{
	CHECK_KFD_OPEN();

	uint32_t *gpu_id_array = NULL;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	pr_debug("[%s] handle %p number of nodes %lu\n",
		__func__, SharedMemoryHandle, NumberOfNodes);

	if (!SharedMemoryHandle)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (NodeArray) {
		ret = hsakmt_validate_nodeid_array(&gpu_id_array, NumberOfNodes, NodeArray);
		if (ret != HSAKMT_STATUS_SUCCESS)
			goto error;
	}

	ret = hsakmt_fmm_register_shared_memory(SharedMemoryHandle,
					 SizeInBytes,
					 MemoryAddress,
					 gpu_id_array,
					 NumberOfNodes*sizeof(uint32_t));
	if (ret != HSAKMT_STATUS_SUCCESS)
		goto error;

	return ret;

error:
	if (gpu_id_array)
		free(gpu_id_array);
	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMRead(HSAuint32 Pid,
					    HsaMemoryRange *LocalMemoryArray,
					    HSAuint64 LocalMemoryArrayCount,
					    HsaMemoryRange *RemoteMemoryArray,
					    HSAuint64 RemoteMemoryArrayCount,
					    HSAuint64 *SizeCopied)
{
	pr_err("[%s] Deprecated\n", __func__);

	return HSAKMT_STATUS_NOT_IMPLEMENTED;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMWrite(HSAuint32 Pid,
					     HsaMemoryRange *LocalMemoryArray,
					     HSAuint64 LocalMemoryArrayCount,
					     HsaMemoryRange *RemoteMemoryArray,
					     HSAuint64 RemoteMemoryArrayCount,
					     HSAuint64 *SizeCopied)
{
	pr_err("[%s] Deprecated\n", __func__);

	return HSAKMT_STATUS_NOT_IMPLEMENTED;
}


HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] address %p\n", __func__, MemoryAddress);

	return hsakmt_fmm_deregister_memory(MemoryAddress);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
					     HSAuint64 MemorySizeInBytes,
					     HSAuint64 *AlternateVAGPU)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] address %p\n", __func__, MemoryAddress);

	if (!MemoryAddress) {
		pr_err("FIXME: mapping NULL pointer\n");
		return HSAKMT_STATUS_ERROR;
	}

	if (AlternateVAGPU)
		*AlternateVAGPU = 0;

	return hsakmt_fmm_map_to_gpu(MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
						  HSAuint64 MemorySizeInBytes,
						  HSAuint64 *AlternateVAGPU,
						  HsaMemMapFlags MemMapFlags,
						  HSAuint64 NumberOfNodes,
						  HSAuint32 *NodeArray)
{
	uint32_t *gpu_id_array;
	HSAKMT_STATUS ret;

	CHECK_KFD_OPEN();

	pr_debug("[%s] address %p number of nodes %lu\n",
		__func__, MemoryAddress, NumberOfNodes);

	if (!MemoryAddress) {
		pr_err("FIXME: mapping NULL pointer\n");
		return HSAKMT_STATUS_ERROR;
	}

	if (!hsakmt_is_dgpu && NumberOfNodes == 1)
		return hsaKmtMapMemoryToGPU(MemoryAddress,
				MemorySizeInBytes,
				AlternateVAGPU);

	ret = hsakmt_validate_nodeid_array(&gpu_id_array,
				NumberOfNodes, NodeArray);
	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	ret = hsakmt_fmm_map_to_gpu_nodes(MemoryAddress, MemorySizeInBytes,
		gpu_id_array, NumberOfNodes, AlternateVAGPU);

	if (gpu_id_array)
		free(gpu_id_array);

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] address %p\n", __func__, MemoryAddress);

	if (!MemoryAddress) {
		/* Workaround for runtime bug */
		pr_err("FIXME: Unmapping NULL pointer\n");
		return HSAKMT_STATUS_SUCCESS;
	}

	if (!hsakmt_fmm_unmap_from_gpu(MemoryAddress))
		return HSAKMT_STATUS_SUCCESS;
	else
		return HSAKMT_STATUS_ERROR;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtMapGraphicHandle(HSAuint32 NodeId,
					       HSAuint64 GraphicDeviceHandle,
					       HSAuint64 GraphicResourceHandle,
					       HSAuint64 GraphicResourceOffset,
					       HSAuint64 GraphicResourceSize,
					       HSAuint64 *FlatMemoryAddress)
{
	/* This API was only ever implemented in KFD for Kaveri and
	 * was never upstreamed. There are no open-source users of
	 * this interface. It has been superseded by
	 * RegisterGraphicsHandleToNodes.
	 */
	return HSAKMT_STATUS_NOT_IMPLEMENTED;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId,
						 HSAuint64 FlatMemoryAddress,
						 HSAuint64 SizeInBytes)
{
	CHECK_KFD_OPEN();

	return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress));
}

HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config)
{
	struct kfd_ioctl_get_tile_config_args args = {0};
	uint32_t gpu_id;
	HSAKMT_STATUS result;

	CHECK_KFD_OPEN();

	pr_debug("[%s] node %d\n", __func__, NodeId);

	result = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	/* Avoid Valgrind warnings about uninitialized data. Valgrind doesn't
	 * know that KFD writes this.
	 */
	memset(config->TileConfig, 0, sizeof(*config->TileConfig) * config->NumTileConfigs);
	memset(config->MacroTileConfig, 0, sizeof(*config->MacroTileConfig) * config->NumMacroTileConfigs);

	args.gpu_id = gpu_id;
	args.tile_config_ptr = (uint64_t)config->TileConfig;
	args.macro_tile_config_ptr = (uint64_t)config->MacroTileConfig;
	args.num_tile_configs = config->NumTileConfigs;
	args.num_macro_tile_configs = config->NumMacroTileConfigs;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0)
		return HSAKMT_STATUS_ERROR;

	config->NumTileConfigs = args.num_tile_configs;
	config->NumMacroTileConfigs = args.num_macro_tile_configs;

	config->GbAddrConfig = args.gb_addr_config;

	config->NumBanks = args.num_banks;
	config->NumRanks = args.num_ranks;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
					       HsaPointerInfo *PointerInfo)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] pointer %p\n", __func__, Pointer);

	if (!PointerInfo)
		return HSAKMT_STATUS_INVALID_PARAMETER;
	return hsakmt_fmm_get_mem_info(Pointer, PointerInfo);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer,
						void *UserData)
{
	CHECK_KFD_OPEN();

	pr_debug("[%s] pointer %p\n", __func__, Pointer);

	return hsakmt_fmm_set_mem_user_data(Pointer, UserData);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPage(void *addr)
{
#ifdef SANITIZER_AMDGPU
	pr_debug("[%s] address %p\n", __func__, addr);
	CHECK_KFD_OPEN();

	return hsakmt_fmm_replace_asan_header_page(addr);
#else
	return HSAKMT_STATUS_NOT_SUPPORTED;
#endif
}

HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
{
#ifdef SANITIZER_AMDGPU
	pr_debug("[%s] address %p\n", __func__, addr);
	CHECK_KFD_OPEN();

	return hsakmt_fmm_return_asan_header_page(addr);
#else
	return HSAKMT_STATUS_NOT_SUPPORTED;
#endif
}

HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( HSAuint32 NodeId,
						HsaAMDGPUDeviceHandle   *DeviceHandle)
{
	CHECK_KFD_OPEN();

	return hsakmt_fmm_get_amdgpu_device_handle(NodeId, DeviceHandle);
}


================================================
FILE: libhsakmt/src/openclose.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

/* glibc macro that enables access some nonstandard GNU/Linux extensions
 * such as RTLD_DEFAULT used by dlsym
 */
#define _GNU_SOURCE

#include "libhsakmt.h"
#include "hsakmt/hsakmtmodel.h"

#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <strings.h>
#include "fmm.h"
#include <dlfcn.h>
#include <string.h>

int (*hsakmt_fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);

static const char kfd_device_name[] = "/dev/kfd";
static const char kfd_udmabuf_device_name[] = "/dev/udmabuf";
static pid_t parent_pid = -1;
int hsakmt_debug_level;
bool hsakmt_forked;

/* hsakmt_is_forked_child detects when the process has forked since the last
 * time this function was called. We cannot rely on pthread_atfork
 * because the process can fork without calling the fork function in
 * libc (using clone or calling the system call directly).
 */
bool hsakmt_is_forked_child(void)
{
	pid_t cur_pid;

	if (hsakmt_forked)
		return true;

	cur_pid = getpid();

	if (parent_pid == -1) {
		parent_pid = cur_pid;
		return false;
	}

	if (parent_pid != cur_pid) {
		hsakmt_forked = true;
		return true;
	}

	return false;
}

/* Callbacks from pthread_atfork */
static void prepare_fork_handler(void)
{
	pthread_mutex_lock(&hsakmt_mutex);
}
static void parent_fork_handler(void)
{
	pthread_mutex_unlock(&hsakmt_mutex);
}
static void child_fork_handler(void)
{
	pthread_mutex_init(&hsakmt_mutex, NULL);
	hsakmt_forked = true;
}

/* Call this from the child process after fork. This will clear all
 * data that is duplicated from the parent process, that is not valid
 * in the child.
 * The topology information is duplicated from the parent is valid
 * in the child process so it is not cleared
 */
static void clear_after_fork(void)
{
	hsakmt_clear_process_doorbells();
	hsakmt_clear_events_page();
	hsakmt_fmm_clear_all_mem();
	hsakmt_destroy_device_debugging_memory();
	if (hsakmt_kfd_fd) {
		close(hsakmt_kfd_fd);
		hsakmt_kfd_fd = -1;
	}
	if (hsakmt_udmabuf_dev_fd > 0) {
		close(hsakmt_udmabuf_dev_fd);
		hsakmt_udmabuf_dev_fd = -1;
	}
	hsakmt_kfd_open_count = 0;
	parent_pid = -1;
	hsakmt_forked = false;
}

static inline void init_page_size(void)
{
	hsakmt_page_size = sysconf(_SC_PAGESIZE);
	hsakmt_page_shift = ffs(hsakmt_page_size) - 1;
}

static HSAKMT_STATUS init_vars_from_env(void)
{
	char *envvar;
	int debug_level;

	/* Normally libraries don't print messages. For debugging purpose, we'll
	 * print messages if an environment variable, HSAKMT_DEBUG_LEVEL, is set.
	 */
	hsakmt_debug_level = HSAKMT_DEBUG_LEVEL_DEFAULT;

	envvar = getenv("HSAKMT_DEBUG_LEVEL");
	if (envvar) {
		debug_level = atoi(envvar);
		if (debug_level >= HSAKMT_DEBUG_LEVEL_ERR &&
				debug_level <= HSAKMT_DEBUG_LEVEL_DEBUG)
			hsakmt_debug_level = debug_level;
	}

	/* Check whether to support Zero frame buffer */
	envvar = getenv("HSA_ZFB");
	if (envvar)
		hsakmt_zfb_support = atoi(envvar);

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
{
	HSAKMT_STATUS result;
	int fd = -1;
	HsaSystemProperties sys_props;
	char *error;
	char *useSvmStr;
	char *useUdmaBuf;

	pthread_mutex_lock(&hsakmt_mutex);

	/* If the process has forked, the child process must re-initialize
	 * it's connection to KFD. Any references tracked by hsakmt_kfd_open_count
	 * belong to the parent
	 */
	if (hsakmt_is_forked_child())
		clear_after_fork();

	if (hsakmt_kfd_open_count == 0) {
		static bool atfork_installed = false;

		hsakmt_fn_amdgpu_device_get_fd = dlsym(RTLD_DEFAULT, "amdgpu_device_get_fd");
		if ((error = dlerror()) != NULL)
			pr_err("amdgpu_device_get_fd is not available: %s\n", error);
		else
			pr_info("amdgpu_device_get_fd is available %p\n", hsakmt_fn_amdgpu_device_get_fd);

		result = init_vars_from_env();
		if (result != HSAKMT_STATUS_SUCCESS)
			goto open_failed;

		// Check if we are using the hsakmtmodel and setup initial state
		model_init_env_vars();

		if (hsakmt_kfd_fd < 0 && !hsakmt_use_model) {
			fd = open(kfd_device_name, O_RDWR | O_CLOEXEC);

			if (fd == -1) {
				result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
				goto open_failed;
			}

			hsakmt_kfd_fd = fd;
		}

		init_page_size();

		result = hsakmt_init_kfd_version();
		if (result != HSAKMT_STATUS_SUCCESS)
			goto kfd_version_failed;

		/* check if udmabuf is enabled by env HSA_USE_UDMABUF */
		useUdmaBuf = getenv("HSA_USE_UDMABUF");
		if (useUdmaBuf && atoi(useUdmaBuf)) {
			/* open udmabuf device */
			hsakmt_udmabuf_dev_fd = open(kfd_udmabuf_device_name, 0);
			if (hsakmt_udmabuf_dev_fd < 0)
				pr_debug("running kernel does not support udmabuf\n");
			else
				pr_debug("udmabuf is enabled\n");
		} else
			pr_debug("udmabuf is not enabled\n");

		useSvmStr = getenv("HSA_USE_SVM");
		hsakmt_is_svm_api_supported = !(useSvmStr && !strcmp(useSvmStr, "0"));
		if(!hsakmt_use_model)
			result = hsakmt_topology_sysfs_get_system_props(&sys_props);
		
		if (result != HSAKMT_STATUS_SUCCESS)
			goto topology_sysfs_failed;

		hsakmt_kfd_open_count = 1;

		if (hsakmt_init_device_debugging_memory(sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS)
			pr_warn("Insufficient Memory. Debugging unavailable\n");

		hsakmt_init_counter_props(sys_props.NumNodes);

		if (!atfork_installed) {
			/* Atfork handlers cannot be uninstalled and
			 * must be installed only once. Otherwise
			 * prepare will deadlock when trying to take
			 * the same lock multiple times.
			 */
			pthread_atfork(prepare_fork_handler,
				       parent_fork_handler,
				       child_fork_handler);
			atfork_installed = true;
		}
	} else {
		hsakmt_kfd_open_count++;
		result = HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
	}

	pthread_mutex_unlock(&hsakmt_mutex);
	return result;
topology_sysfs_failed:
kfd_version_failed:
	if (fd >= 0)
		close(fd);
open_failed:
	pthread_mutex_unlock(&hsakmt_mutex);

	return result;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
{
	HSAKMT_STATUS result;

	pthread_mutex_lock(&hsakmt_mutex);

	if (hsakmt_kfd_open_count > 0)	{
		if (--hsakmt_kfd_open_count == 0) {
			hsakmt_destroy_counter_props();
			hsakmt_destroy_device_debugging_memory();
		}

		result = HSAKMT_STATUS_SUCCESS;
	} else
		result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;

	pthread_mutex_unlock(&hsakmt_mutex);

	return result;
}


================================================
FILE: libhsakmt/src/pc_sampling.c
================================================
/*
 * Copyright © 2023 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <errno.h>

#define INVALID_TRACE_ID 0x0

HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingSupport(void)
{
    CHECK_KFD_OPEN();
    CHECK_KFD_MINOR_VERSION(16);

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingQueryCapabilities(HSAuint32 NodeId, void *sample_info,
                            HSAuint32 sample_info_sz, HSAuint32 *size)
{
    struct kfd_ioctl_pc_sample_args args = {0};
    uint32_t gpu_id;

    if (size == NULL)
        return HSAKMT_STATUS_INVALID_PARAMETER;

    CHECK_KFD_OPEN();
    CHECK_KFD_MINOR_VERSION(16);

    HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
        return ret;
    }
    assert(sizeof(HsaPcSamplingInfo) == sizeof(struct kfd_pc_sample_info));

    args.op = KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES;
    args.gpu_id = gpu_id;
    args.sample_info_ptr = (uint64_t)sample_info;
    args.num_sample_info = sample_info_sz;
    args.flags = 0;

    int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);

    *size = args.num_sample_info;

    if (err) {
        switch (errno) {
        case ENOSPC:
                return HSAKMT_STATUS_BUFFER_TOO_SMALL;
        case EINVAL:
                return HSAKMT_STATUS_INVALID_PARAMETER;
        case EOPNOTSUPP:
                return HSAKMT_STATUS_NOT_SUPPORTED;
        case EBUSY:
                return HSAKMT_STATUS_UNAVAILABLE;
        default:
                return HSAKMT_STATUS_ERROR;
        }
    }

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingCreate(HSAuint32 NodeId, HsaPcSamplingInfo *sample_info,
						HsaPcSamplingTraceId *traceId)
{
    struct kfd_ioctl_pc_sample_args args = {0};
    uint32_t gpu_id;

    if (sample_info == NULL || traceId == NULL)
        return HSAKMT_STATUS_INVALID_PARAMETER;

    CHECK_KFD_OPEN();

    *traceId = INVALID_TRACE_ID;
    HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
        return ret;
    }

    args.op = KFD_IOCTL_PCS_OP_CREATE;
    args.gpu_id = gpu_id;
    args.sample_info_ptr = (uint64_t)sample_info;
    args.num_sample_info = 1;
    args.trace_id = INVALID_TRACE_ID;

    int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
    if (err) {
        switch (errno) {
        case EINVAL:
            return HSAKMT_STATUS_INVALID_PARAMETER;
        case ENOMEM:
            return HSAKMT_STATUS_NO_MEMORY;
        case EBUSY:
            return HSAKMT_STATUS_UNAVAILABLE;
        default:
            return HSAKMT_STATUS_ERROR;
        }
    }

    *traceId = args.trace_id;
    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingDestroy(HSAuint32 NodeId, HsaPcSamplingTraceId traceId)
{
    struct kfd_ioctl_pc_sample_args args = {0};
    uint32_t gpu_id;

    if (traceId == INVALID_TRACE_ID)
        return HSAKMT_STATUS_INVALID_HANDLE;

    CHECK_KFD_OPEN();

    HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
        return ret;
    }

    hsaKmtPcSamplingStop(NodeId, traceId);

    args.op = KFD_IOCTL_PCS_OP_DESTROY;
    args.gpu_id = gpu_id;
    args.trace_id = traceId;

    int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
    if (err) {
        if (errno == EINVAL)
            return HSAKMT_STATUS_INVALID_PARAMETER;
        return HSAKMT_STATUS_ERROR;
    }

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStart(HSAuint32 NodeId, HsaPcSamplingTraceId traceId)
{
    struct kfd_ioctl_pc_sample_args args = {0};
    uint32_t gpu_id;

    if (traceId == INVALID_TRACE_ID)
        return HSAKMT_STATUS_INVALID_HANDLE;

    CHECK_KFD_OPEN();

    HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
        return ret;
    }

    args.op = KFD_IOCTL_PCS_OP_START;
    args.gpu_id = gpu_id;
    args.trace_id = traceId;

    int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
    if (err) {
        switch (errno) {
        case EINVAL:
            return HSAKMT_STATUS_INVALID_PARAMETER;
        case ENOMEM:
            return HSAKMT_STATUS_OUT_OF_RESOURCES;
        case EBUSY:
            return HSAKMT_STATUS_UNAVAILABLE;
        case EALREADY:
            return HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
        default:
            return HSAKMT_STATUS_ERROR;
        }
    }

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStop(HSAuint32 NodeId, HsaPcSamplingTraceId traceId)
{
    struct kfd_ioctl_pc_sample_args args = {0};
    uint32_t gpu_id;

    if (traceId == INVALID_TRACE_ID)
        return HSAKMT_STATUS_INVALID_HANDLE;

    CHECK_KFD_OPEN();

    HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
        return ret;
    }

    args.op = KFD_IOCTL_PCS_OP_STOP;
    args.gpu_id = gpu_id;
    args.trace_id = traceId;

    int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
    if (err) {
        switch (errno) {
        case EINVAL:
            return HSAKMT_STATUS_INVALID_PARAMETER;
        case EALREADY:
            return HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
        default:
            return HSAKMT_STATUS_ERROR;
        }
    }
    return HSAKMT_STATUS_SUCCESS;
}


================================================
FILE: libhsakmt/src/perfctr.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <linux/perf_event.h>
#include <sys/syscall.h>
#include "libhsakmt.h"
#include "pmc_table.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <unistd.h>
#include <sys/ioctl.h>
#include <errno.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <semaphore.h>

#define BITS_PER_BYTE		CHAR_BIT

#define HSA_PERF_MAGIC4CC	0x54415348

enum perf_trace_state {
	PERF_TRACE_STATE__STOPPED = 0,
	PERF_TRACE_STATE__STARTED
};

struct perf_trace_block {
	enum perf_block_id block_id;
	uint32_t num_counters;
	uint64_t *counter_id;
	int *perf_event_fd;
};

struct perf_trace {
	uint32_t magic4cc;
	uint32_t gpu_id;
	enum perf_trace_state state;
	uint32_t num_blocks;
	void *buf;
	uint64_t buf_size;
	struct perf_trace_block blocks[0];
};

struct perf_counts_values {
	union {
		struct {
			uint64_t val;
			uint64_t ena;
			uint64_t run;
		};
		uint64_t values[3];
	};
};

static HsaCounterProperties **counter_props;
static unsigned int counter_props_count;

static ssize_t readn(int fd, void *buf, size_t n)
{
	size_t left = n;
	ssize_t bytes;

	while (left) {
		bytes = read(fd, buf, left);
		if (!bytes) /* reach EOF */
			return (n - left);
		if (bytes < 0) {
			if (errno == EINTR) /* read got interrupted */
				continue;
			else
				return -errno;
		}
		left -= bytes;
		buf = VOID_PTR_ADD(buf, bytes);
	}
	return n;
}

HSAKMT_STATUS hsakmt_init_counter_props(unsigned int NumNodes)
{
	counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
	if (!counter_props) {
		pr_warn("Profiling is not available.\n");
		return HSAKMT_STATUS_NO_MEMORY;
	}

	counter_props_count = NumNodes;

	return HSAKMT_STATUS_SUCCESS;
}

void hsakmt_destroy_counter_props(void)
{
	unsigned int i;

	if (!counter_props)
		return;

	for (i = 0; i < counter_props_count; i++)
		if (counter_props[i]) {
			free(counter_props[i]);
			counter_props[i] = NULL;
		}

	free(counter_props);
}

static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
{
	int rc = 0;

	switch (block_id) {
	case PERFCOUNTER_BLOCKID__CB:
		*uuid = HSA_PROFILEBLOCK_AMD_CB;
		break;
	case PERFCOUNTER_BLOCKID__CPF:
		*uuid = HSA_PROFILEBLOCK_AMD_CPF;
		break;
	case PERFCOUNTER_BLOCKID__CPG:
		*uuid = HSA_PROFILEBLOCK_AMD_CPG;
		break;
	case PERFCOUNTER_BLOCKID__DB:
		*uuid = HSA_PROFILEBLOCK_AMD_DB;
		break;
	case PERFCOUNTER_BLOCKID__GDS:
		*uuid = HSA_PROFILEBLOCK_AMD_GDS;
		break;
	case PERFCOUNTER_BLOCKID__GRBM:
		*uuid = HSA_PROFILEBLOCK_AMD_GRBM;
		break;
	case PERFCOUNTER_BLOCKID__GRBMSE:
		*uuid = HSA_PROFILEBLOCK_AMD_GRBMSE;
		break;
	case PERFCOUNTER_BLOCKID__IA:
		*uuid = HSA_PROFILEBLOCK_AMD_IA;
		break;
	case PERFCOUNTER_BLOCKID__MC:
		*uuid = HSA_PROFILEBLOCK_AMD_MC;
		break;
	case PERFCOUNTER_BLOCKID__PASC:
		*uuid = HSA_PROFILEBLOCK_AMD_PASC;
		break;
	case PERFCOUNTER_BLOCKID__PASU:
		*uuid = HSA_PROFILEBLOCK_AMD_PASU;
		break;
	case PERFCOUNTER_BLOCKID__SPI:
		*uuid = HSA_PROFILEBLOCK_AMD_SPI;
		break;
	case PERFCOUNTER_BLOCKID__SRBM:
		*uuid = HSA_PROFILEBLOCK_AMD_SRBM;
		break;
	case PERFCOUNTER_BLOCKID__SQ:
		*uuid = HSA_PROFILEBLOCK_AMD_SQ;
		break;
	case PERFCOUNTER_BLOCKID__SX:
		*uuid = HSA_PROFILEBLOCK_AMD_SX;
		break;
	case PERFCOUNTER_BLOCKID__TA:
		*uuid = HSA_PROFILEBLOCK_AMD_TA;
		break;
	case PERFCOUNTER_BLOCKID__TCA:
		*uuid = HSA_PROFILEBLOCK_AMD_TCA;
		break;
	case PERFCOUNTER_BLOCKID__TCC:
		*uuid = HSA_PROFILEBLOCK_AMD_TCC;
		break;
	case PERFCOUNTER_BLOCKID__TCP:
		*uuid = HSA_PROFILEBLOCK_AMD_TCP;
		break;
	case PERFCOUNTER_BLOCKID__TCS:
		*uuid = HSA_PROFILEBLOCK_AMD_TCS;
		break;
	case PERFCOUNTER_BLOCKID__TD:
		*uuid = HSA_PROFILEBLOCK_AMD_TD;
		break;
	case PERFCOUNTER_BLOCKID__VGT:
		*uuid = HSA_PROFILEBLOCK_AMD_VGT;
		break;
	case PERFCOUNTER_BLOCKID__WD:
		*uuid = HSA_PROFILEBLOCK_AMD_WD;
		break;
	default:
		/* If we reach this point, it's a bug */
		rc = -1;
		break;
	}

	return rc;
}

static HSAuint32 get_block_concurrent_limit(uint32_t node_id,
						HSAuint32 block_id)
{
	uint32_t i;
	HsaCounterBlockProperties *block = &counter_props[node_id]->Blocks[0];

	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
		if (block->Counters[0].BlockIndex == block_id)
			return block->NumConcurrent;
		block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters];
	}

	return 0;
}

static HSAKMT_STATUS perf_trace_ioctl(struct perf_trace_block *block,
				      uint32_t cmd)
{
	uint32_t i;

	for (i = 0; i < block->num_counters; i++) {
		if (block->perf_event_fd[i] < 0)
			return HSAKMT_STATUS_UNAVAILABLE;
		if (ioctl(block->perf_event_fd[i], cmd, NULL))
			return HSAKMT_STATUS_ERROR;
	}

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS query_trace(int fd, uint64_t *buf)
{
	struct perf_counts_values content;

	if (fd < 0)
		return HSAKMT_STATUS_ERROR;
	if (readn(fd, &content, sizeof(content)) != sizeof(content))
		return HSAKMT_STATUS_ERROR;

	*buf = content.val;
	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
						      HsaCounterProperties **CounterProperties)
{
	HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
	uint32_t gpu_id, i, block_id;
	uint32_t counter_props_size = 0;
	uint32_t total_counters = 0;
	uint32_t total_concurrent = 0;
	struct perf_counter_block block = {0};
	uint32_t total_blocks = 0;
	HsaCounterBlockProperties *block_prop;

	if (!counter_props)
		return HSAKMT_STATUS_NO_MEMORY;

	if (!CounterProperties)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;

	if (counter_props[NodeId]) {
		*CounterProperties = counter_props[NodeId];
		return HSAKMT_STATUS_SUCCESS;
	}

	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
		rc = hsakmt_get_block_properties(NodeId, i, &block);
		if (rc != HSAKMT_STATUS_SUCCESS)
			return rc;
		total_concurrent += block.num_of_slots;
		total_counters += block.num_of_counters;
		/* If num_of_slots=0, this block doesn't exist */
		if (block.num_of_slots)
			total_blocks++;
	}

	counter_props_size = sizeof(HsaCounterProperties) +
			sizeof(HsaCounterBlockProperties) * (total_blocks - 1) +
			sizeof(HsaCounter) * (total_counters - total_blocks);

	counter_props[NodeId] = malloc(counter_props_size);
	if (!counter_props[NodeId])
		return HSAKMT_STATUS_NO_MEMORY;

	counter_props[NodeId]->NumBlocks = total_blocks;
	counter_props[NodeId]->NumConcurrent = total_concurrent;

	block_prop = &counter_props[NodeId]->Blocks[0];
	for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
		rc = hsakmt_get_block_properties(NodeId, block_id, &block);
		if (rc != HSAKMT_STATUS_SUCCESS) {
			free(counter_props[NodeId]);
			counter_props[NodeId] = NULL;
			return rc;
		}

		if (!block.num_of_slots) /* not a valid block */
			continue;

		blockid2uuid(block_id, &block_prop->BlockId);
		block_prop->NumCounters = block.num_of_counters;
		block_prop->NumConcurrent = block.num_of_slots;
		for (i = 0; i < block.num_of_counters; i++) {
			block_prop->Counters[i].BlockIndex = block_id;
			block_prop->Counters[i].CounterId = block.counter_ids[i];
			block_prop->Counters[i].CounterSizeInBits = block.counter_size_in_bits;
			block_prop->Counters[i].CounterMask = block.counter_mask;
			block_prop->Counters[i].Flags.ui32.Global = 1;
			block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
		}

		block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters];
	}

	*CounterProperties = counter_props[NodeId];

	return HSAKMT_STATUS_SUCCESS;
}

/* Registers a set of (HW) counters to be used for tracing/profiling */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
					       HSAuint32 NumberOfCounters,
					       HsaCounter *Counters,
					       HsaPmcTraceRoot *TraceRoot)
{
	uint32_t gpu_id, i, j;
	uint64_t min_buf_size = 0;
	struct perf_trace *trace = NULL;
	uint32_t concurrent_limit;
	const uint32_t MAX_COUNTERS = 512;

	/* Declare performance counter ID 2D array as a contiguous block */
	uint64_t *counter_id = malloc(
			PERFCOUNTER_BLOCKID__MAX * MAX_COUNTERS * sizeof(uint64_t));
	uint32_t num_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
	uint32_t block, num_blocks = 0, total_counters = 0;
	uint64_t *counter_id_ptr;
	int *fd_ptr;

	pr_debug("[%s] Number of counters %d\n", __func__, NumberOfCounters);

	if (counter_id == NULL) {
		pr_err("Failed to allocate memory for counter_id. Requested %zu bytes.\n",
				PERFCOUNTER_BLOCKID__MAX * MAX_COUNTERS * sizeof(uint64_t));
		return HSAKMT_STATUS_NO_MEMORY;
	}

	if (!counter_props) {
		pr_err("Profiling is not available, counter_props is NULL.\n");
		goto no_memory_exit;
	}

	if (!Counters || !TraceRoot || NumberOfCounters == 0)
		goto invalid_parameter_exit;

	if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) {
		free(counter_id);
		return HSAKMT_STATUS_INVALID_NODE_UNIT;
	}

	if (NumberOfCounters > MAX_COUNTERS) {
		pr_err("MAX_COUNTERS is too small for %d.\n", NumberOfCounters);
		goto no_memory_exit;
	}

	/* Calculating the minimum buffer size */
	for (i = 0; i < NumberOfCounters; i++) {
		if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
			goto invalid_parameter_exit;
		/* Only privileged counters need to register */
		if (Counters[i].Type > HSA_PROFILE_TYPE_PRIVILEGED_STREAMING)
			continue;
		min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
		/* j: the first blank entry in the block to record counter_id */
		j = num_counters[Counters[i].BlockIndex];
		/* Make sure counter_id stays within bounds */
		if (j >= MAX_COUNTERS) {
			pr_err("Counter ID exceeded MAX_COUNTERS for block %d.\n",
					Counters[i].BlockIndex);
			goto invalid_parameter_exit;
		}
		/* Initialize counter_id */
		counter_id[Counters[i].BlockIndex * MAX_COUNTERS + j] = Counters[i].CounterId;
		num_counters[Counters[i].BlockIndex]++;
		total_counters++;
	}

	/* Verify that the number of counters per block is not larger than the
	 * number of slots.
	 */
	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
		if (!num_counters[i])
			continue;
		concurrent_limit = get_block_concurrent_limit(NodeId, i);
		if (!concurrent_limit) {
			pr_err("Invalid block ID: %d\n", i);
			goto invalid_parameter_exit;
		}
		if (num_counters[i] > concurrent_limit) {
			pr_err("Counters exceed the limit.\n");
			goto invalid_parameter_exit;
		}
		num_blocks++;
	}

	if (!num_blocks)
		goto invalid_parameter_exit;

	/* Now we have sorted blocks/counters information in
	 * num_counters[block_id] and counter_id[block_id][]. Allocate trace
	 * and record the information.
	 */
	trace = (struct perf_trace *)calloc(sizeof(struct perf_trace)
			+ sizeof(struct perf_trace_block) * num_blocks
			+ sizeof(uint64_t) * total_counters
			+ sizeof(int) * total_counters,
			1);
	if (!trace) {
		pr_err("Failed to allocate memory for trace. Requested %zu bytes.\n",
				sizeof(struct perf_trace)
				+ sizeof(struct perf_trace_block) * num_blocks
				+ sizeof(uint64_t) * total_counters
				+ sizeof(int) * total_counters);
		goto no_memory_exit;
	}

	/* Allocated area is partitioned as:
	 * +---------------------------------+ trace
	 * |    perf_trace                   |
	 * |---------------------------------| trace->blocks[0]
	 * | perf_trace_block 0              |
	 * | ....                            |
	 * | perf_trace_block N-1            | trace->blocks[N-1]
	 * |---------------------------------| <-- counter_id_ptr starts here
	 * | block 0's counter IDs(uint64_t) |
	 * | ......                          |
	 * | block N-1's counter IDs         |
	 * |---------------------------------| <-- perf_event_fd starts here
	 * | block 0's perf_event_fds(int)   |
	 * | ......                          |
	 * | block N-1's perf_event_fds      |
	 * +---------------------------------+
	 */
	block = 0;
	counter_id_ptr = (uint64_t *)((char *)
			trace + sizeof(struct perf_trace)
			+ sizeof(struct perf_trace_block) * num_blocks);
	fd_ptr = (int *)(counter_id_ptr + total_counters);
	/* Fill in each block's information to the TraceId */
	for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
		if (!num_counters[i]) /* not a block to trace */
			continue;
		/* Following perf_trace + perf_trace_block x N are those
		 * counter_id arrays. Assign the counter_id array belonging to
		 * this block.
		 */
		trace->blocks[block].counter_id = counter_id_ptr;
		/* Fill in counter IDs to the counter_id array. */
		for (j = 0; j < num_counters[i]; j++)
			trace->blocks[block].counter_id[j] = counter_id[i * MAX_COUNTERS + j];
		trace->blocks[block].perf_event_fd = fd_ptr;
		/* how many counters to trace */
		trace->blocks[block].num_counters = num_counters[i];
		/* block index in "enum perf_block_id" */
		trace->blocks[block].block_id = i;
		block++; /* move to next */
		counter_id_ptr += num_counters[i];
		fd_ptr += num_counters[i];
	}

	trace->magic4cc = HSA_PERF_MAGIC4CC;
	trace->gpu_id = gpu_id;
	trace->state = PERF_TRACE_STATE__STOPPED;
	trace->num_blocks = num_blocks;

	TraceRoot->NumberOfPasses = 1;
	TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
	TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);

	free(trace);
	free(counter_id);
	return HSAKMT_STATUS_SUCCESS;

	no_memory_exit:
		free(counter_id);
		return HSAKMT_STATUS_NO_MEMORY;

	invalid_parameter_exit:
		free(counter_id);
		return HSAKMT_STATUS_INVALID_PARAMETER;
}

/* Unregisters a set of (HW) counters used for tracing/profiling */

HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
						 HSATraceId TraceId)
{
	uint32_t gpu_id;
	struct perf_trace *trace;

	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);

	if (TraceId == 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;

	trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);

	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
		return HSAKMT_STATUS_INVALID_HANDLE;

	if (trace->gpu_id != gpu_id)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;

	/* If the trace is in the running state, stop it */
	if (trace->state == PERF_TRACE_STATE__STARTED) {
		HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);

		if (status != HSAKMT_STATUS_SUCCESS)
			return status;
	}

	free(trace);

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
						    HSATraceId TraceId)
{
	struct perf_trace *trace;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	uint32_t gpu_id;

	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);

	if (TraceId == 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);

	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
		return HSAKMT_STATUS_INVALID_HANDLE;

	if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId,
						    HSATraceId TraceId)
{
	struct perf_trace *trace;

	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);

	if (TraceId == 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);

	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
		return HSAKMT_STATUS_INVALID_HANDLE;

	return HSAKMT_STATUS_SUCCESS;
}


/* Starts tracing operation on a previously established set of performance counters */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStartTrace(HSATraceId TraceId,
					    void *TraceBuffer,
					    HSAuint64 TraceBufferSizeBytes)
{
	struct perf_trace *trace =
			(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
	uint32_t i;
	int32_t j;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);

	if (TraceId == 0 || !TraceBuffer || TraceBufferSizeBytes == 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
		return HSAKMT_STATUS_INVALID_HANDLE;

	for (i = 0; i < trace->num_blocks; i++) {
		ret = perf_trace_ioctl(&trace->blocks[i],
					PERF_EVENT_IOC_ENABLE);
		if (ret != HSAKMT_STATUS_SUCCESS)
			break;
	}
	if (ret != HSAKMT_STATUS_SUCCESS) {
		/* Disable enabled blocks before returning the failure. */
		j = (int32_t)i;
		while (--j >= 0)
			perf_trace_ioctl(&trace->blocks[j],
					PERF_EVENT_IOC_DISABLE);
		return ret;
	}

	trace->state = PERF_TRACE_STATE__STARTED;
	trace->buf = TraceBuffer;
	trace->buf_size = TraceBufferSizeBytes;

	return HSAKMT_STATUS_SUCCESS;
}


/*Forces an update of all the counters that a previously started trace operation has registered */

HSAKMT_STATUS HSAKMTAPI hsaKmtPmcQueryTrace(HSATraceId TraceId)
{
	struct perf_trace *trace =
			(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
	uint32_t i, j;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	uint64_t *buf;
	uint64_t buf_filled = 0;

	if (TraceId == 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
		return HSAKMT_STATUS_INVALID_HANDLE;

	buf = (uint64_t *)trace->buf;
	pr_debug("[%s] Trace buffer(%p): ", __func__, buf);
	for (i = 0; i < trace->num_blocks; i++)
		for (j = 0; j < trace->blocks[i].num_counters; j++) {
			buf_filled += sizeof(uint64_t);
			if (buf_filled > trace->buf_size)
				return HSAKMT_STATUS_NO_MEMORY;
			ret = query_trace(trace->blocks[i].perf_event_fd[j],
					buf);
			if (ret != HSAKMT_STATUS_SUCCESS)
				return ret;
			pr_debug("%lu_", *buf);
			buf++;
		}
	pr_debug("\n");

	return HSAKMT_STATUS_SUCCESS;
}


/* Stops tracing operation on a previously established set of performance counters */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStopTrace(HSATraceId TraceId)
{
	struct perf_trace *trace =
			(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
	uint32_t i;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);

	if (TraceId == 0)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (trace->magic4cc != HSA_PERF_MAGIC4CC)
		return HSAKMT_STATUS_INVALID_HANDLE;

	for (i = 0; i < trace->num_blocks; i++) {
		ret = perf_trace_ioctl(&trace->blocks[i],
					PERF_EVENT_IOC_DISABLE);
		if (ret != HSAKMT_STATUS_SUCCESS)
			return ret;
	}

	trace->state = PERF_TRACE_STATE__STOPPED;

	return ret;
}


================================================
FILE: libhsakmt/src/pmc_table.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "libhsakmt.h"
#include "pmc_table.h"

/****** CB ******/
static uint32_t gfx7_cb_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225
};

static uint32_t gfx8_cb_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394, 395
};

static uint32_t gfx9_cb_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420,
421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436,
437
};

static uint32_t gfx10_cb_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420,
421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436,
437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452,
453, 454, 455, 456, 457, 458, 459, 460
};

/****** CPF ******/
static uint32_t gfx7_cpf_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
};

static uint32_t gfx8_cpf_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
};

static uint32_t gfx9_cpf_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};

static uint32_t gfx10_cpf_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39
};

/****** CPG ******/
static uint32_t gfx7_cpg_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45
};

static uint32_t gfx8_cpg_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48
};

static uint32_t gfx9_cpg_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
};

static uint32_t gfx10_cpg_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81
};

/****** DB ******/
static uint32_t gfx7_db_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256
};
/* gfx8_db_counter_ids are the same as gfx7_db_counter_ids */

static uint32_t gfx9_db_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327
};

static uint32_t gfx10_db_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369
};

/****** GDS ******/
static uint32_t gfx7_gds_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120
};
/* gfx8_gds_counter_ids are the same as gfx7_gds_counter_ids */
/* gfx9_gds_counter_ids are the same as gfx7_gds_counter_ids */

static uint32_t gfx10_gds_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122
};

/****** GRBM ******/
static uint32_t gfx7_grbm_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
};
/* gfx8_grbm_counter_ids are the same as gfx7_grbm_counter_ids */

static uint32_t gfx9_grbm_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37
};

static uint32_t gfx10_grbm_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46
};

/****** GRBMSE ******/
static uint32_t gfx7_grbmse_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
};
/* gfx8_grbmse_counter_ids are the same as gfx7_grbmse_counter_ids */

static uint32_t gfx9_grbmse_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};

static uint32_t gfx10_grbmse_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
};

/****** IA ******/
static uint32_t gfx7_ia_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
};

static uint32_t gfx8_ia_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23
};

static uint32_t gfx9_ia_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31
};
/* gfx10 doesn't have IA */

/****** PASC ******/
static uint32_t gfx7_pasc_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394
};

static uint32_t gfx8_pasc_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394, 395, 396
};

static uint32_t gfx9_pasc_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420,
421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436,
437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452,
453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468,
469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484,
485, 486, 487, 488, 489, 490
};

static uint32_t gfx10_pasc_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420,
421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436,
437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452,
453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468,
469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484,
485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500,
501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516,
517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532,
533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548,
549, 550, 551
};

/****** PASU ******/
static uint32_t gfx7_pasu_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152
};
/* gfx8_pasu_counter_ids are the same as gfx7_pasu_counter_ids */

static uint32_t gfx9_pasu_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291
};

static uint32_t gfx10_pasu_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265
};

/****** SPI ******/
static uint32_t gfx7_spi_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185
};

static uint32_t gfx8_spi_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196
};

static uint32_t gfx9_spi_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195
};

static uint32_t gfx10_spi_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328
};

/****** SQ ******/
/* Unused counters - 163-167 */
static uint32_t gfx7_sq_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 168, 169,
170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217,
218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
250
};

/* Unused counters - 166, 292 - 297 */
static uint32_t gfx8_sq_counter_ids[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101,
102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,
167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214,
215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262,
263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278,
279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 298
};

/* Polaris 10/11/12 have the same SQ cpunter IDs but different from other gfx8's. */
/* Unused counters - 167 and 275 are *_DUMMY_LAST */
static uint32_t gfx8_pl_sq_counter_ids[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
158, 159, 160, 161, 162, 163, 164, 165, 168, 169, 170, 171, 172, 173, 174,
175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204,
205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264,
265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 276, 277, 278, 279, 280,
281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295
};

static uint32_t gfx9_sq_counter_ids[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,
173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 255, 256, 257, 258,
259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273,
274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288,
289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303,
304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318,
319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333,
334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348,
349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,
364, 365, 366, 367, 368, 369, 370, 371, 372
};

static uint32_t gfx10_sq_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244,
245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260,
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292,
293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,
309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340,
341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372,
373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388,
389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420,
421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436,
437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452,
453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468,
469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484,
485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500,
501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511
};

/****** SRBM ******/
static uint32_t gfx7_srbm_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
};

static uint32_t gfx8_srbm_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27
};
/* gfx9 doesn't have SRBM */
/* gfx10 doesn't have SRBM */

/****** SX ******/
static uint32_t gfx7_sx_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
};
/* gfx8_sx_counter_ids are the same as gfx7_sx_counter_ids */

static uint32_t gfx9_sx_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207
};

static uint32_t gfx10_sx_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224
};

/****** TA ******/
static uint32_t gfx7_ta_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110
};

static uint32_t gfx8_ta_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118
};
/* gfx9_ta_counter_ids is same as gfx8_ta_counter_ids */

static uint32_t gfx10_ta_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225
};

/****** TCA ******/
static uint32_t gfx7_tca_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38
};

static uint32_t gfx8_tca_counter_ids[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
};
/* gfx9_tca_counter_ids is same as gfx8_tca_counter_ids */
/* gfx10 doesn't have TCA */

/****** TCC ******/
static uint32_t gfx7_tcc_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 159
};

static uint32_t gfx8_tcc_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 128, 129, 130, 131, 132, 133, 134, 135, 136,
137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184,
185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216,
217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232,
233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248,
249, 250, 251, 252, 253, 254, 255
};

static uint32_t gfx8_cz_tcc_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 128, 129, 130, 131, 132, 133, 134, 135, 136,
137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184,
185, 186, 187, 188, 189, 190, 191
};
/* gfx9_tcc_counter_ids is same as gfx8_tcc_counter_ids */
/* gfx10 doesn't have TCC */

/****** TCP ******/
static uint32_t gfx7_tcp_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153
};

static uint32_t gfx8_tcp_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182
};

static uint32_t gfx9_tcp_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84
};

static uint32_t gfx10_tcp_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76
};

/****** TCS ******/
static uint32_t gfx7_tcs_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127
};
/* gfx8 doesn't have TCS */
/* gfx9 doesn't have TCS */
/* gfx10 doesn't have TCS */

/****** TD ******/
static uint32_t gfx7_td_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54
};
/* gfx8_td_counter_ids are the same as gfx7_td_counter_ids */

static uint32_t gfx9_td_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56
};

static uint32_t gfx10_td_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60
};

/****** VGT ******/
static uint32_t gfx7_vgt_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139
};

static uint32_t gfx8_vgt_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145
};

static uint32_t gfx8_pl_vgt_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146
};

static uint32_t gfx9_vgt_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147
};
/* gfx10 doesn't have VGT */

/****** WD ******/
static uint32_t gfx7_wd_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9
};

static uint32_t gfx8_wd_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36
};

static uint32_t gfx9_wd_counter_ids[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57
};
/* gfx10 doesn't have WD */

static struct perf_counter_block kaveri_blocks[PERFCOUNTER_BLOCKID__MAX] = {
	[PERFCOUNTER_BLOCKID__SQ] = {
		.num_of_slots = 8,
		.num_of_counters = sizeof(gfx7_sq_counter_ids) /
					sizeof(*gfx7_sq_counter_ids),
		.counter_ids = gfx7_sq_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
};

static struct perf_counter_block hawaii_blocks[PERFCOUNTER_BLOCKID__MAX] = {
	[PERFCOUNTER_BLOCKID__CB] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx7_cb_counter_ids) /
					sizeof(*gfx7_cb_counter_ids),
		.counter_ids = gfx7_cb_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPF] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx7_cpf_counter_ids) /
					sizeof(*gfx7_cpf_counter_ids),
		.counter_ids = gfx7_cpf_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPG] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx7_cpg_counter_ids) /
					sizeof(*gfx7_cpg_counter_ids),
		.counter_ids = gfx7_cpg_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__DB] = {
		.num_of_slots = 12,
		.num_of_counters = sizeof(gfx7_db_counter_ids) /
					sizeof(*gfx7_db_counter_ids),
		.counter_ids = gfx7_db_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GDS] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_gds_counter_ids) /
					sizeof(*gfx7_gds_counter_ids),
		.counter_ids = gfx7_gds_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx7_grbm_counter_ids) /
					sizeof(*gfx7_grbm_counter_ids),
		.counter_ids = gfx7_grbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBMSE] = {
		.num_of_slots = 1,
		.num_of_counters = sizeof(gfx7_grbmse_counter_ids) /
					sizeof(*gfx7_grbmse_counter_ids),
		.counter_ids = gfx7_grbmse_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__IA] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx7_ia_counter_ids) /
					sizeof(*gfx7_ia_counter_ids),
		.counter_ids = gfx7_ia_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASC] = {
		.num_of_slots = 11,
		.num_of_counters = sizeof(gfx7_pasc_counter_ids) /
					sizeof(*gfx7_pasc_counter_ids),
		.counter_ids = gfx7_pasc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASU] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx7_pasu_counter_ids) /
					sizeof(*gfx7_pasu_counter_ids),
		.counter_ids = gfx7_pasu_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SPI] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_spi_counter_ids) /
					sizeof(*gfx7_spi_counter_ids),
		.counter_ids = gfx7_spi_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx7_srbm_counter_ids) /
					sizeof(*gfx7_srbm_counter_ids),
		.counter_ids = gfx7_srbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SQ] = {
		.num_of_slots = 8,
		.num_of_counters = sizeof(gfx7_sq_counter_ids) /
					sizeof(*gfx7_sq_counter_ids),
		.counter_ids = gfx7_sq_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SX] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_sx_counter_ids) /
					sizeof(*gfx7_sx_counter_ids),
		.counter_ids = gfx7_sx_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TA] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx7_ta_counter_ids) /
					sizeof(*gfx7_ta_counter_ids),
		.counter_ids = gfx7_ta_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCA] = {
		.num_of_slots = 10, /* same as CZ */
		.num_of_counters = sizeof(gfx7_tca_counter_ids) /
					sizeof(*gfx7_tca_counter_ids),
		.counter_ids = gfx7_tca_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCC] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx7_tcc_counter_ids) /
					sizeof(*gfx7_tcc_counter_ids),
		.counter_ids = gfx7_tcc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCP] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx7_tcp_counter_ids) /
					sizeof(*gfx7_tcp_counter_ids),
		.counter_ids = gfx7_tcp_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCS] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx7_tcs_counter_ids) /
					sizeof(*gfx7_tcs_counter_ids),
		.counter_ids = gfx7_tcs_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TD] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx7_td_counter_ids) /
					sizeof(*gfx7_td_counter_ids),
		.counter_ids = gfx7_td_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__VGT] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx7_vgt_counter_ids) /
					sizeof(*gfx7_vgt_counter_ids),
		.counter_ids = gfx7_vgt_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__WD] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_wd_counter_ids) /
					sizeof(*gfx7_wd_counter_ids),
		.counter_ids = gfx7_wd_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
};

static struct perf_counter_block carrizo_blocks[PERFCOUNTER_BLOCKID__MAX] = {
	[PERFCOUNTER_BLOCKID__CB] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx8_cb_counter_ids) /
					sizeof(*gfx8_cb_counter_ids),
		.counter_ids = gfx8_cb_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPF] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx8_cpf_counter_ids) /
					sizeof(*gfx8_cpf_counter_ids),
		.counter_ids = gfx8_cpf_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPG] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx8_cpg_counter_ids) /
					sizeof(*gfx8_cpg_counter_ids),
		.counter_ids = gfx8_cpg_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__DB] = {
		.num_of_slots = 12,
		.num_of_counters = sizeof(gfx7_db_counter_ids) /
					sizeof(*gfx7_db_counter_ids),
		.counter_ids = gfx7_db_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GDS] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_gds_counter_ids) /
					sizeof(*gfx7_gds_counter_ids),
		.counter_ids = gfx7_gds_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx7_grbm_counter_ids) /
					sizeof(*gfx7_grbm_counter_ids),
		.counter_ids = gfx7_grbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBMSE] = {
		.num_of_slots = 1,
		.num_of_counters = sizeof(gfx7_grbmse_counter_ids) /
					sizeof(*gfx7_grbmse_counter_ids),
		.counter_ids = gfx7_grbmse_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__IA] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx8_ia_counter_ids) /
					sizeof(*gfx8_ia_counter_ids),
		.counter_ids = gfx8_ia_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASC] = {
		.num_of_slots = 11,
		.num_of_counters = sizeof(gfx8_pasc_counter_ids) /
					sizeof(*gfx8_pasc_counter_ids),
		.counter_ids = gfx8_pasc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASU] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx7_pasu_counter_ids) /
					sizeof(*gfx7_pasu_counter_ids),
		.counter_ids = gfx7_pasu_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SPI] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx8_spi_counter_ids) /
					sizeof(*gfx8_spi_counter_ids),
		.counter_ids = gfx8_spi_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx8_srbm_counter_ids) /
					sizeof(*gfx8_srbm_counter_ids),
		.counter_ids = gfx8_srbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SQ] = {
		.num_of_slots = 8,
		.num_of_counters = sizeof(gfx8_sq_counter_ids) /
					sizeof(*gfx8_sq_counter_ids),
		.counter_ids = gfx8_sq_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SX] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_sx_counter_ids) /
					sizeof(*gfx7_sx_counter_ids),
		.counter_ids = gfx7_sx_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TA] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx8_ta_counter_ids) /
					sizeof(*gfx8_ta_counter_ids),
		.counter_ids = gfx8_ta_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCA] = {
		/* PMC0: PERF_SEL~PERF_SEL3, PMC1: PERF_SEL~PERF_SEL3,
		 * PMC2: PERF_SEL, PMC3: PERF_SEL. So 10 PERF_SELs in total
		 */
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_tca_counter_ids) /
					sizeof(*gfx8_tca_counter_ids),
		.counter_ids = gfx8_tca_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCC] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_cz_tcc_counter_ids) /
					sizeof(*gfx8_cz_tcc_counter_ids),
		.counter_ids = gfx8_cz_tcc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCP] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_tcp_counter_ids) /
					sizeof(*gfx8_tcp_counter_ids),
		.counter_ids = gfx8_tcp_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TD] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx7_td_counter_ids) /
					sizeof(*gfx7_td_counter_ids),
		.counter_ids = gfx7_td_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__VGT] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_vgt_counter_ids) /
					sizeof(*gfx8_vgt_counter_ids),
		.counter_ids = gfx8_vgt_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__WD] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx8_wd_counter_ids) /
					sizeof(*gfx8_wd_counter_ids),
		.counter_ids = gfx8_wd_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
};

static struct perf_counter_block fiji_blocks[PERFCOUNTER_BLOCKID__MAX] = {
	[PERFCOUNTER_BLOCKID__CB] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx8_cb_counter_ids) /
					sizeof(*gfx8_cb_counter_ids),
		.counter_ids = gfx8_cb_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPF] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx8_cpf_counter_ids) /
					sizeof(*gfx8_cpf_counter_ids),
		.counter_ids = gfx8_cpf_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPG] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx8_cpg_counter_ids) /
					sizeof(*gfx8_cpg_counter_ids),
		.counter_ids = gfx8_cpg_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__DB] = {
		.num_of_slots = 12,
		.num_of_counters = sizeof(gfx7_db_counter_ids) /
					sizeof(*gfx7_db_counter_ids),
		.counter_ids = gfx7_db_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GDS] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_gds_counter_ids) /
					sizeof(*gfx7_gds_counter_ids),
		.counter_ids = gfx7_gds_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx7_grbm_counter_ids) /
					sizeof(*gfx7_grbm_counter_ids),
		.counter_ids = gfx7_grbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBMSE] = {
		.num_of_slots = 1,
		.num_of_counters = sizeof(gfx7_grbmse_counter_ids) /
					sizeof(*gfx7_grbmse_counter_ids),
		.counter_ids = gfx7_grbmse_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__IA] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx8_ia_counter_ids) /
					sizeof(*gfx8_ia_counter_ids),
		.counter_ids = gfx8_ia_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASC] = {
		.num_of_slots = 11,
		.num_of_counters = sizeof(gfx8_pasc_counter_ids) /
					sizeof(*gfx8_pasc_counter_ids),
		.counter_ids = gfx8_pasc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASU] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx7_pasu_counter_ids) /
					sizeof(*gfx7_pasu_counter_ids),
		.counter_ids = gfx7_pasu_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SPI] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx8_spi_counter_ids) /
					sizeof(*gfx8_spi_counter_ids),
		.counter_ids = gfx8_spi_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx8_srbm_counter_ids) /
					sizeof(*gfx8_srbm_counter_ids),
		.counter_ids = gfx8_srbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SQ] = {
		.num_of_slots = 8,
		.num_of_counters = sizeof(gfx8_sq_counter_ids) /
					sizeof(*gfx8_sq_counter_ids),
		.counter_ids = gfx8_sq_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SX] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_sx_counter_ids) /
					sizeof(*gfx7_sx_counter_ids),
		.counter_ids = gfx7_sx_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TA] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx8_ta_counter_ids) /
					sizeof(*gfx8_ta_counter_ids),
		.counter_ids = gfx8_ta_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCA] = {
		.num_of_slots = 10, /* same as CZ */
		.num_of_counters = sizeof(gfx8_tca_counter_ids) /
					sizeof(*gfx8_tca_counter_ids),
		.counter_ids = gfx8_tca_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCC] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_tcc_counter_ids) /
					sizeof(*gfx8_tcc_counter_ids),
		.counter_ids = gfx8_tcc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCP] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_tcp_counter_ids) /
					sizeof(*gfx8_tcp_counter_ids),
		.counter_ids = gfx8_tcp_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TD] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx7_td_counter_ids) /
					sizeof(*gfx7_td_counter_ids),
		.counter_ids = gfx7_td_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__VGT] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_vgt_counter_ids) /
					sizeof(*gfx8_vgt_counter_ids),
		.counter_ids = gfx8_vgt_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__WD] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx8_wd_counter_ids) /
					sizeof(*gfx8_wd_counter_ids),
		.counter_ids = gfx8_wd_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
};

static struct perf_counter_block polaris_blocks[PERFCOUNTER_BLOCKID__MAX] = {
	[PERFCOUNTER_BLOCKID__CB] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx8_cb_counter_ids) /
					sizeof(*gfx8_cb_counter_ids),
		.counter_ids = gfx8_cb_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPF] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx8_cpf_counter_ids) /
					sizeof(*gfx8_cpf_counter_ids),
		.counter_ids = gfx8_cpf_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPG] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx8_cpg_counter_ids) /
					sizeof(*gfx8_cpg_counter_ids),
		.counter_ids = gfx8_cpg_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__DB] = {
		.num_of_slots = 12,
		.num_of_counters = sizeof(gfx7_db_counter_ids) /
					sizeof(*gfx7_db_counter_ids),
		.counter_ids = gfx7_db_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GDS] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_gds_counter_ids) /
					sizeof(*gfx7_gds_counter_ids),
		.counter_ids = gfx7_gds_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx7_grbm_counter_ids) /
					sizeof(*gfx7_grbm_counter_ids),
		.counter_ids = gfx7_grbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBMSE] = {
		.num_of_slots = 1,
		.num_of_counters = sizeof(gfx7_grbmse_counter_ids) /
					sizeof(*gfx7_grbmse_counter_ids),
		.counter_ids = gfx7_grbmse_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__IA] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx8_ia_counter_ids) /
					sizeof(*gfx8_ia_counter_ids),
		.counter_ids = gfx8_ia_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASC] = {
		.num_of_slots = 11,
		.num_of_counters = sizeof(gfx8_pasc_counter_ids) /
					sizeof(*gfx8_pasc_counter_ids),
		.counter_ids = gfx8_pasc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASU] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx7_pasu_counter_ids) /
					sizeof(*gfx7_pasu_counter_ids),
		.counter_ids = gfx7_pasu_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SPI] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx8_spi_counter_ids) /
					sizeof(*gfx8_spi_counter_ids),
		.counter_ids = gfx8_spi_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SQ] = {
		.num_of_slots = 8,
		.num_of_counters = sizeof(gfx8_pl_sq_counter_ids) /
					sizeof(*gfx8_pl_sq_counter_ids),
		.counter_ids = gfx8_pl_sq_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx8_srbm_counter_ids) /
					sizeof(*gfx8_srbm_counter_ids),
		.counter_ids = gfx8_srbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SX] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_sx_counter_ids) /
					sizeof(*gfx7_sx_counter_ids),
		.counter_ids = gfx7_sx_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TA] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx8_ta_counter_ids) /
					sizeof(*gfx8_ta_counter_ids),
		.counter_ids = gfx8_ta_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCA] = {
		.num_of_slots = 10, /* same as CZ */
		.num_of_counters = sizeof(gfx8_tca_counter_ids) /
					sizeof(*gfx8_tca_counter_ids),
		.counter_ids = gfx8_tca_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCC] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_tcc_counter_ids) /
					sizeof(*gfx8_tcc_counter_ids),
		.counter_ids = gfx8_tcc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCP] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_tcp_counter_ids) /
					sizeof(*gfx8_tcp_counter_ids),
		.counter_ids = gfx8_tcp_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TD] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx7_td_counter_ids) /
					sizeof(*gfx7_td_counter_ids),
		.counter_ids = gfx7_td_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__VGT] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_pl_vgt_counter_ids) /
					sizeof(*gfx8_pl_vgt_counter_ids),
		.counter_ids = gfx8_pl_vgt_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__WD] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx8_wd_counter_ids) /
					sizeof(*gfx8_wd_counter_ids),
		.counter_ids = gfx8_wd_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
};

static struct perf_counter_block vega_blocks[PERFCOUNTER_BLOCKID__MAX] = {
	[PERFCOUNTER_BLOCKID__CB] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx9_cb_counter_ids) /
					sizeof(*gfx9_cb_counter_ids),
		.counter_ids = gfx9_cb_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPF] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx9_cpf_counter_ids) /
					sizeof(*gfx9_cpf_counter_ids),
		.counter_ids = gfx9_cpf_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPG] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx9_cpg_counter_ids) /
					sizeof(*gfx9_cpg_counter_ids),
		.counter_ids = gfx9_cpg_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__DB] = {
		.num_of_slots = 12,
		.num_of_counters = sizeof(gfx9_db_counter_ids) /
					sizeof(*gfx9_db_counter_ids),
		.counter_ids = gfx9_db_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GDS] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx7_gds_counter_ids) /
					sizeof(*gfx7_gds_counter_ids),
		.counter_ids = gfx7_gds_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx9_grbm_counter_ids) /
					sizeof(*gfx9_grbm_counter_ids),
		.counter_ids = gfx9_grbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBMSE] = {
		.num_of_slots = 1,
		.num_of_counters = sizeof(gfx9_grbmse_counter_ids) /
					sizeof(*gfx9_grbmse_counter_ids),
		.counter_ids = gfx9_grbmse_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__IA] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx9_ia_counter_ids) /
					sizeof(*gfx9_ia_counter_ids),
		.counter_ids = gfx9_ia_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASC] = {
		.num_of_slots = 11,
		.num_of_counters = sizeof(gfx9_pasc_counter_ids) /
					sizeof(*gfx9_pasc_counter_ids),
		.counter_ids = gfx9_pasc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASU] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx9_pasu_counter_ids) /
					sizeof(*gfx9_pasu_counter_ids),
		.counter_ids = gfx9_pasu_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SPI] = {
		.num_of_slots = 18,
		.num_of_counters = sizeof(gfx9_spi_counter_ids) /
					sizeof(*gfx9_spi_counter_ids),
		.counter_ids = gfx9_spi_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SQ] = {
		.num_of_slots = 16,
		.num_of_counters = sizeof(gfx9_sq_counter_ids) /
					sizeof(*gfx9_sq_counter_ids),
		.counter_ids = gfx9_sq_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SX] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx9_sx_counter_ids) /
					sizeof(*gfx9_sx_counter_ids),
		.counter_ids = gfx9_sx_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TA] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx8_ta_counter_ids) /
					sizeof(*gfx8_ta_counter_ids),
		.counter_ids = gfx8_ta_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCA] = {
		.num_of_slots = 10, /* same as Fiji */
		/* Greenland has the same TCA counter IDs with Fiji */
		.num_of_counters = sizeof(gfx8_tca_counter_ids) /
					sizeof(*gfx8_tca_counter_ids),
		.counter_ids = gfx8_tca_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCC] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx8_tcc_counter_ids) /
					sizeof(*gfx8_tcc_counter_ids),
		.counter_ids = gfx8_tcc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCP] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx9_tcp_counter_ids) /
					sizeof(*gfx9_tcp_counter_ids),
		.counter_ids = gfx9_tcp_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TD] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx9_td_counter_ids) /
					sizeof(*gfx9_td_counter_ids),
		.counter_ids = gfx9_td_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__VGT] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx9_vgt_counter_ids) /
					sizeof(*gfx9_vgt_counter_ids),
		.counter_ids = gfx9_vgt_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__WD] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx9_wd_counter_ids) /
					sizeof(*gfx9_wd_counter_ids),
		.counter_ids = gfx9_wd_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
};

static struct perf_counter_block navi_blocks[PERFCOUNTER_BLOCKID__MAX] = {
	[PERFCOUNTER_BLOCKID__CB] = {
		.num_of_slots = 7,
		.num_of_counters = sizeof(gfx10_cb_counter_ids) /
					sizeof(*gfx10_cb_counter_ids),
		.counter_ids = gfx10_cb_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPF] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx10_cpf_counter_ids) /
					sizeof(*gfx10_cpf_counter_ids),
		.counter_ids = gfx10_cpf_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__CPG] = {
		.num_of_slots = 6,
		.num_of_counters = sizeof(gfx10_cpg_counter_ids) /
					sizeof(*gfx10_cpg_counter_ids),
		.counter_ids = gfx10_cpg_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__DB] = {
		.num_of_slots = 12,
		.num_of_counters = sizeof(gfx10_db_counter_ids) /
					sizeof(*gfx10_db_counter_ids),
		.counter_ids = gfx10_db_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GDS] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx10_gds_counter_ids) /
					sizeof(*gfx10_gds_counter_ids),
		.counter_ids = gfx10_gds_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBM] = {
		.num_of_slots = 2,
		.num_of_counters = sizeof(gfx10_grbm_counter_ids) /
					sizeof(*gfx10_grbm_counter_ids),
		.counter_ids = gfx10_grbm_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__GRBMSE] = {
		.num_of_slots = 1,
		.num_of_counters = sizeof(gfx10_grbmse_counter_ids) /
					sizeof(*gfx10_grbmse_counter_ids),
		.counter_ids = gfx10_grbmse_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASC] = {
		.num_of_slots = 11,
		.num_of_counters = sizeof(gfx10_pasc_counter_ids) /
					sizeof(*gfx10_pasc_counter_ids),
		.counter_ids = gfx10_pasc_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__PASU] = {
		.num_of_slots = 16,
		.num_of_counters = sizeof(gfx10_pasu_counter_ids) /
					sizeof(*gfx10_pasu_counter_ids),
		.counter_ids = gfx10_pasu_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SPI] = {
		.num_of_slots = 18,
		.num_of_counters = sizeof(gfx10_spi_counter_ids) /
					sizeof(*gfx10_spi_counter_ids),
		.counter_ids = gfx10_spi_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SQ] = {
		.num_of_slots = 16,
		.num_of_counters = sizeof(gfx10_sq_counter_ids) /
					sizeof(*gfx10_sq_counter_ids),
		.counter_ids = gfx10_sq_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__SX] = {
		.num_of_slots = 4,
		.num_of_counters = sizeof(gfx10_sx_counter_ids) /
					sizeof(*gfx10_sx_counter_ids),
		.counter_ids = gfx10_sx_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TA] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx10_ta_counter_ids) /
					sizeof(*gfx10_ta_counter_ids),
		.counter_ids = gfx10_ta_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TCP] = {
		.num_of_slots = 10,
		.num_of_counters = sizeof(gfx10_tcp_counter_ids) /
					sizeof(*gfx10_tcp_counter_ids),
		.counter_ids = gfx10_tcp_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
	[PERFCOUNTER_BLOCKID__TD] = {
		.num_of_slots = 5,
		.num_of_counters = sizeof(gfx10_td_counter_ids) /
					sizeof(*gfx10_td_counter_ids),
		.counter_ids = gfx10_td_counter_ids,
		.counter_size_in_bits = 64,
		.counter_mask = BITMASK(64)
	},
};

HSAKMT_STATUS hsakmt_get_block_properties(uint32_t node_id,
				   enum perf_block_id block_id,
				   struct perf_counter_block *block)
{
	uint32_t gfxv = hsakmt_get_gfxv_by_node_id(node_id);
	uint16_t dev_id = hsakmt_get_device_id_by_node_id(node_id);

	if (block_id >= PERFCOUNTER_BLOCKID__MAX ||
			block_id < PERFCOUNTER_BLOCKID__FIRST)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	/* Major GFX Version */
	switch (gfxv >> 16) {
	case 7:
		if (gfxv == GFX_VERSION_KAVERI)
			*block = kaveri_blocks[block_id];
		else
			*block = hawaii_blocks[block_id];
		break;
	case 8:
		if (gfxv == GFX_VERSION_TONGA)
			return HSAKMT_STATUS_INVALID_PARAMETER;
		else if (gfxv == GFX_VERSION_CARRIZO)
			*block = carrizo_blocks[block_id];
		else {
			/*
			 * Fiji/Polaris/VegaM cards are of the same GFXIP Engine Version (8.0.3).
			 * Only way to differentiate b/t Fiji and Polaris/VegaM is via DID.
			 */
			if (dev_id == 0x7300 || dev_id == 0x730F)
				*block = fiji_blocks[block_id];
			else
				*block = polaris_blocks[block_id];
		}
		break;
	case 9:
		*block = vega_blocks[block_id];
		break;
	case 10:
		*block = navi_blocks[block_id];
		break;
	default:
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	return HSAKMT_STATUS_SUCCESS;
}


================================================
FILE: libhsakmt/src/pmc_table.h
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef PMC_TABLE_H
#define PMC_TABLE_H

#include "libhsakmt.h"

enum perf_block_id {
	PERFCOUNTER_BLOCKID__FIRST = 0,
	/* non-privileged */
	PERFCOUNTER_BLOCKID__CB = PERFCOUNTER_BLOCKID__FIRST,
	PERFCOUNTER_BLOCKID__CPC,
	PERFCOUNTER_BLOCKID__CPF,
	PERFCOUNTER_BLOCKID__CPG,
	PERFCOUNTER_BLOCKID__DB,
	PERFCOUNTER_BLOCKID__GDS,
	PERFCOUNTER_BLOCKID__GRBM,
	PERFCOUNTER_BLOCKID__GRBMSE,
	PERFCOUNTER_BLOCKID__IA,
	PERFCOUNTER_BLOCKID__MC,
	PERFCOUNTER_BLOCKID__PASC,
	PERFCOUNTER_BLOCKID__PASU,
	PERFCOUNTER_BLOCKID__SPI,
	PERFCOUNTER_BLOCKID__SRBM,
	PERFCOUNTER_BLOCKID__SQ,
	PERFCOUNTER_BLOCKID__SX,
	PERFCOUNTER_BLOCKID__TA,
	PERFCOUNTER_BLOCKID__TCA,
	PERFCOUNTER_BLOCKID__TCC,
	PERFCOUNTER_BLOCKID__TCP,
	PERFCOUNTER_BLOCKID__TCS,
	PERFCOUNTER_BLOCKID__TD,
	PERFCOUNTER_BLOCKID__VGT,
	PERFCOUNTER_BLOCKID__WD,
	/* privileged */
	PERFCOUNTER_BLOCKID__MAX
};

struct perf_counter_block {
	uint32_t    num_of_slots;
	uint32_t    num_of_counters;
	uint32_t    *counter_ids;
	uint32_t    counter_size_in_bits;
	uint64_t    counter_mask;
};

HSAKMT_STATUS hsakmt_get_block_properties(uint32_t node_id,
				   enum perf_block_id block_id,
				   struct perf_counter_block *block);

#endif // PMC_TABLE_H


================================================
FILE: libhsakmt/src/queues.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include "fmm.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <math.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>

/* 1024 doorbells, 4 or 8 bytes each doorbell depending on ASIC generation */
#define DOORBELL_SIZE(gfxv)	(((gfxv) >= 0x90000) ? 8 : 4)
#define DOORBELLS_PAGE_SIZE(ds)	(1024 * (ds))

#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, node) 		\
	(hsakmt_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +	\
	 (node.LDSSizeInKB << 10) + HWREG_SIZE_PER_CU)

#define CNTL_STACK_BYTES_PER_WAVE(gfxv)	\
	((gfxv) >= GFX_VERSION_NAVI10 ? 12 : 8)

#define HWREG_SIZE_PER_CU	0x1000
#define DEBUGGER_BYTES_ALIGN	64
#define DEBUGGER_BYTES_PER_WAVE	32

struct queue {
	uint32_t queue_id;
	uint64_t wptr;
	uint64_t rptr;
	void *eop_buffer;
	void *ctx_save_restore;
	uint32_t ctx_save_restore_size;
	uint32_t ctl_stack_size;
	uint32_t debug_memory_size;
	uint32_t eop_buffer_size;
	uint32_t total_mem_alloc_size;
	uint32_t gfxv;
	bool use_ats;
	bool unified_ctx_save_restore;
	/* This queue structure is allocated from GPU with page aligned size
	 * but only small bytes are used. We use the extra space in the end for
	 * cu_mask bits array.
	 */
	uint32_t cu_mask_count; /* in bits */
	uint32_t cu_mask[0];
};

struct process_doorbells {
	bool use_gpuvm;
	uint32_t size;
	void *mapping;
	pthread_mutex_t mutex;
};

static unsigned int num_doorbells;
static struct process_doorbells *doorbells;

uint32_t hsakmt_get_vgpr_size_per_cu(uint32_t gfxv)
{
	uint32_t vgpr_size = 0x40000;

	if (gfxv == GFX_VERSION_GFX950 ||
		(gfxv & ~(0xff)) == GFX_VERSION_AQUA_VANJARAM ||
		 gfxv == GFX_VERSION_ALDEBARAN ||
		 gfxv == GFX_VERSION_ARCTURUS)
		vgpr_size = 0x80000;

	else if (gfxv == GFX_VERSION_PLUM_BONITO ||
		 gfxv == GFX_VERSION_WHEAT_NAS ||
		 gfxv == GFX_VERSION_GFX1200 ||
		 gfxv == GFX_VERSION_GFX1201)
		vgpr_size = 0x60000;

	return vgpr_size;
}

HSAKMT_STATUS hsakmt_init_process_doorbells(unsigned int NumNodes)
{
	unsigned int i;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	/* doorbells[] is accessed using Topology NodeId. This means doorbells[0],
	 * which corresponds to CPU only Node, might not be used
	 */
	doorbells = malloc(NumNodes * sizeof(struct process_doorbells));
	if (!doorbells)
		return HSAKMT_STATUS_NO_MEMORY;

	for (i = 0; i < NumNodes; i++) {
		doorbells[i].use_gpuvm = false;
		doorbells[i].size = 0;
		doorbells[i].mapping = NULL;
		pthread_mutex_init(&doorbells[i].mutex, NULL);
	}

	num_doorbells = NumNodes;

	return ret;
}

static void get_doorbell_map_info(uint32_t node_id,
				  struct process_doorbells *doorbell)
{
	/*
	 * GPUVM doorbell on Tonga requires a workaround for VM TLB ACTIVE bit
	 * lookup bug. Remove ASIC check when this is implemented in amdgpu.
	 */
	uint32_t gfxv = hsakmt_get_gfxv_by_node_id(node_id);
	doorbell->use_gpuvm = (hsakmt_is_dgpu && gfxv != GFX_VERSION_TONGA);
	doorbell->size = DOORBELLS_PAGE_SIZE(DOORBELL_SIZE(gfxv));

	if (doorbell->size < (uint32_t) PAGE_SIZE) {
		doorbell->size = PAGE_SIZE;
	}

	return;
}

void hsakmt_destroy_process_doorbells(void)
{
	unsigned int i;

	if (!doorbells)
		return;

	for (i = 0; i < num_doorbells; i++) {
		if (!doorbells[i].size)
			continue;

		if (doorbells[i].use_gpuvm) {
			hsakmt_fmm_unmap_from_gpu(doorbells[i].mapping);
			hsakmt_fmm_release(doorbells[i].mapping);
		} else
			munmap(doorbells[i].mapping, doorbells[i].size);
	}

	free(doorbells);
	doorbells = NULL;
	num_doorbells = 0;
}

/* This is a special funcion that should be called only from the child process
 * after a fork(). This will clear doorbells duplicated from the parent.
 */
void hsakmt_clear_process_doorbells(void)
{
	unsigned int i;

	if (!doorbells)
		return;

	for (i = 0; i < num_doorbells; i++) {
		if (!doorbells[i].size)
			continue;

		if (!doorbells[i].use_gpuvm)
			munmap(doorbells[i].mapping, doorbells[i].size);
	}

	free(doorbells);
	doorbells = NULL;
	num_doorbells = 0;
}

static HSAKMT_STATUS map_doorbell_apu(HSAuint32 NodeId, HSAuint32 gpu_id,
				      HSAuint64 doorbell_mmap_offset)
{
	void *ptr;

	ptr = mmap(0, doorbells[NodeId].size, PROT_READ|PROT_WRITE,
		   MAP_SHARED, hsakmt_kfd_fd, doorbell_mmap_offset);

	if (ptr == MAP_FAILED)
		return HSAKMT_STATUS_ERROR;

	doorbells[NodeId].mapping = ptr;

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS map_doorbell_dgpu(HSAuint32 NodeId, HSAuint32 gpu_id,
				       HSAuint64 doorbell_mmap_offset)
{
	void *ptr;

	ptr = hsakmt_fmm_allocate_doorbell(gpu_id, doorbells[NodeId].size,
				doorbell_mmap_offset);

	if (!ptr)
		return HSAKMT_STATUS_ERROR;

	/* map for GPU access */
	if (hsakmt_fmm_map_to_gpu(ptr, doorbells[NodeId].size, NULL)) {
		hsakmt_fmm_release(ptr);
		return HSAKMT_STATUS_ERROR;
	}

	doorbells[NodeId].mapping = ptr;

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id,
				  HSAuint64 doorbell_mmap_offset)
{
	HSAKMT_STATUS status = HSAKMT_STATUS_SUCCESS;

	pthread_mutex_lock(&doorbells[NodeId].mutex);
	if (doorbells[NodeId].size) {
		pthread_mutex_unlock(&doorbells[NodeId].mutex);
		return HSAKMT_STATUS_SUCCESS;
	}

	get_doorbell_map_info(NodeId, &doorbells[NodeId]);

	if (doorbells[NodeId].use_gpuvm) {
		status = map_doorbell_dgpu(NodeId, gpu_id, doorbell_mmap_offset);
		if (status != HSAKMT_STATUS_SUCCESS) {
			/* Fall back to the old method if KFD doesn't
			 * support doorbells in GPUVM
			 */
			doorbells[NodeId].use_gpuvm = false;
			status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);
		}
	} else
		status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);

	if (status != HSAKMT_STATUS_SUCCESS)
		doorbells[NodeId].size = 0;

	pthread_mutex_unlock(&doorbells[NodeId].mutex);

	return status;
}

static void *allocate_exec_aligned_memory_cpu(uint32_t size)
{
	void *ptr;

	/* mmap will return a pointer with alignment equal to
	 * sysconf(_SC_PAGESIZE).
	 *
	 * MAP_ANONYMOUS initializes the memory to zero.
	 */
	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);

	if (ptr == MAP_FAILED)
		return NULL;
	return ptr;
}

/* The bool return indicate whether the queue needs a context-save-restore area*/
static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
{
	HsaNodeProperties node;

	if (q->gfxv < GFX_VERSION_CARRIZO)
		return false;
	if (hsaKmtGetNodeProperties(nodeid, &node))
		return false;
	if (node.NumFComputeCores && node.NumSIMDPerCU) {
		uint32_t ctl_stack_size, wg_data_size;
		uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU / node.NumXcc;
		uint32_t wave_num = (q->gfxv < GFX_VERSION_NAVI10)
			? MIN(cu_num * 40, node.NumShaderBanks / node.NumArrays * 512)
			: cu_num * 32;

		ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(q->gfxv) + 8;
		wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->gfxv, node);
		q->ctl_stack_size = PAGE_ALIGN_UP(sizeof(HsaUserContextSaveAreaHeader)
					+ ctl_stack_size);
		if ((q->gfxv & 0x3f0000) == 0xA0000) {
			/* HW design limits control stack size to 0x7000.
			 * This is insufficient for theoretical PM4 cases
			 * but sufficient for AQL, limited by SPI events.
			 */
			q->ctl_stack_size = MIN(q->ctl_stack_size, 0x7000);
		}

		q->debug_memory_size =
			ALIGN_UP(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);

		q->ctx_save_restore_size = q->ctl_stack_size
					+ PAGE_ALIGN_UP(wg_data_size);
		return true;
	}
	return false;
}

void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uint32_t gpu_id,
				       uint32_t NodeId, bool nonPaged,
				       bool DeviceLocal,
				       bool Uncached)
{
	void *mem = NULL;
	HSAuint64 gpu_va;
	HsaMemFlags flags;
	HSAuint32 cpu_id = 0;

	flags.Value = 0;
	flags.ui32.HostAccess = !DeviceLocal;
	flags.ui32.ExecuteAccess = 1;
	flags.ui32.NonPaged = nonPaged;
	flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
	flags.ui32.CoarseGrain = DeviceLocal;
	flags.ui32.Uncached = Uncached;

	size = ALIGN_UP(size, align);

	if (DeviceLocal && !hsakmt_zfb_support)
		mem = hsakmt_fmm_allocate_device(gpu_id, NodeId, mem, size, 0, flags);
	else {
		/* VRAM under ZFB mode should be supported here without any
		 * additional code
		 */
		/* Get the closest cpu_id to GPU NodeId for system memory allocation
		 * nonPaged=0 system memory allocation uses GTT path
		 */
		if (!nonPaged) {
			cpu_id = hsakmt_get_direct_link_cpu(NodeId);
			if (cpu_id == INVALID_NODEID) {
				flags.ui32.NoNUMABind = 1;
				cpu_id = 0;
			}
		}
		mem = hsakmt_fmm_allocate_host(gpu_id, cpu_id, mem, size, 0, flags);
	}

	if (!mem) {
		pr_err("Alloc %s memory failed size %d\n",
		       DeviceLocal ? "VRAM" : "GTT", size);
		return NULL;
	}

	if (NodeId != 0) {
		uint32_t nodes_array[1] = {NodeId};
		HsaMemMapFlags map_flags = {0};
		HSAKMT_STATUS result;

		result = hsaKmtMapMemoryToGPUNodes(mem, size, &gpu_va, map_flags, 1, nodes_array);
		if (result != HSAKMT_STATUS_SUCCESS) {
			hsaKmtFreeMemory(mem, size);
			return NULL;
		}

		return mem;
	}

	if (hsaKmtMapMemoryToGPU(mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
		hsaKmtFreeMemory(mem, size);
		return NULL;
	}

	return mem;
}

void hsakmt_free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align)
{
	size = ALIGN_UP(size, align);

	if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS)
		hsaKmtFreeMemory(addr, size);
}

/*
 * Allocates memory aligned to sysconf(_SC_PAGESIZE)
 */
static void *allocate_exec_aligned_memory(uint32_t size,
					  bool use_ats,
					  uint32_t gpu_id,
					  uint32_t NodeId,
					  bool nonPaged,
					  bool DeviceLocal,
					  bool Uncached)
{
	if (!use_ats)
		return hsakmt_allocate_exec_aligned_memory_gpu(size, PAGE_SIZE, gpu_id, NodeId,
							nonPaged, DeviceLocal,
							Uncached);
	return allocate_exec_aligned_memory_cpu(size);
}

static void free_exec_aligned_memory(void *addr, uint32_t size, uint32_t align,
				     bool use_ats)
{
	if (!use_ats)
		hsakmt_free_exec_aligned_memory_gpu(addr, size, align);
	else
		munmap(addr, size);
}

static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
				uint32_t gpuNode, uint32_t prefetchNode,
				uint32_t preferredNode, bool alwaysMapped)
{
	HSA_SVM_ATTRIBUTE *attrs;
	HSAuint64 s_attr;
	HSAuint32 nattr;
	HSAuint32 flags;

	flags = HSA_SVM_FLAG_HOST_ACCESS | HSA_SVM_FLAG_GPU_EXEC;

	if (alwaysMapped) {
		CHECK_KFD_MINOR_VERSION(11);
		flags |= HSA_SVM_FLAG_GPU_ALWAYS_MAPPED;
	}

	nattr = 6;
	s_attr = sizeof(*attrs) * nattr;
	attrs = (HSA_SVM_ATTRIBUTE *)alloca(s_attr);

	attrs[0].type = HSA_SVM_ATTR_PREFETCH_LOC;
	attrs[0].value = prefetchNode;
	attrs[1].type = HSA_SVM_ATTR_PREFERRED_LOC;
	attrs[1].value = preferredNode;
	attrs[2].type = HSA_SVM_ATTR_CLR_FLAGS;
	attrs[2].value = ~flags;
	attrs[3].type = HSA_SVM_ATTR_SET_FLAGS;
	attrs[3].value = flags;
	attrs[4].type = HSA_SVM_ATTR_ACCESS;
	attrs[4].value = gpuNode;
	attrs[5].type = HSA_SVM_ATTR_GRANULARITY;
	attrs[5].value = 0xFF;

	return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
}

static void free_queue(struct queue *q)
{
	if (q->eop_buffer)
		free_exec_aligned_memory(q->eop_buffer,
					 q->eop_buffer_size,
					 PAGE_SIZE, q->use_ats);
	if (q->unified_ctx_save_restore)
		munmap(q->ctx_save_restore, q->total_mem_alloc_size);
	else if (q->ctx_save_restore)
		free_exec_aligned_memory(q->ctx_save_restore,
					 q->total_mem_alloc_size,
					 PAGE_SIZE, q->use_ats);

	free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->use_ats);
}

static inline void fill_cwsr_header(struct queue *q, void *addr,
		HsaEvent *Event, volatile HSAint64 *ErrPayload, HSAuint32 NumXcc)
{
	uint32_t i;
	HsaUserContextSaveAreaHeader *header;

	for (i = 0; i < NumXcc; i++) {
		header = (HsaUserContextSaveAreaHeader *)
			((uintptr_t)addr + (i * q->ctx_save_restore_size));
		header->ErrorEventId = 0;
		if (Event)
			header->ErrorEventId = Event->EventId;
		header->ErrorReason = ErrPayload;
		header->DebugOffset = (NumXcc - i) * q->ctx_save_restore_size;
		header->DebugSize = q->debug_memory_size * NumXcc;
	}
}

static int handle_concrete_asic(struct queue *q,
				struct kfd_ioctl_create_queue_args *args,
				uint32_t gpu_id,
				uint32_t NodeId,
				HsaEvent *Event,
				volatile HSAint64 *ErrPayload)
{
	bool ret;

	if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA ||
	    args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
		return HSAKMT_STATUS_SUCCESS;

	if (q->eop_buffer_size > 0) {
		pr_info("Allocating VRAM for EOP\n");
		q->eop_buffer = allocate_exec_aligned_memory(q->eop_buffer_size,
				q->use_ats, gpu_id,
				NodeId, true, true, /* Unused for VRAM */false);
		if (!q->eop_buffer)
			return HSAKMT_STATUS_NO_MEMORY;

		args->eop_buffer_address = (uintptr_t)q->eop_buffer;
		args->eop_buffer_size = q->eop_buffer_size;
	}

	ret = update_ctx_save_restore_size(NodeId, q);

	if (ret) {
		HsaNodeProperties node;

		if (hsaKmtGetNodeProperties(NodeId, &node))
			return HSAKMT_STATUS_ERROR;

		args->ctx_save_restore_size = q->ctx_save_restore_size;
		args->ctl_stack_size = q->ctl_stack_size;

		/* Total memory to be allocated is =
		 * (Control Stack size + WG size +
		 *  Debug memory area size) * num_xcc
		 */
		q->total_mem_alloc_size = (q->ctx_save_restore_size +
					q->debug_memory_size) * node.NumXcc;

		/* Allocate unified memory for context save restore
		 * area on dGPU.
		 */
		if (!q->use_ats && hsakmt_is_svm_api_supported) {
			uint32_t size = PAGE_ALIGN_UP(q->total_mem_alloc_size);

			pr_info("Allocating GTT for CWSR\n");
			void *addr = hsakmt_mmap_allocate_aligned(PROT_READ | PROT_WRITE,
						     MAP_ANONYMOUS | MAP_PRIVATE,
						     size, GPU_HUGE_PAGE_SIZE, 0,
						     0, (void *)LONG_MAX, -1);
			if (!addr) {
				pr_err("mmap failed to alloc ctx area size 0x%x: %s\n",
					size, strerror(errno));
			} else {
				/*
				 * To avoid fork child process COW MMU notifier
				 * callback evict parent process queues.
				 */
				if (madvise(addr, size, MADV_DONTFORK))
					pr_err("madvise failed -%d\n", errno);

				fill_cwsr_header(q, addr, Event, ErrPayload, node.NumXcc);

				HSAKMT_STATUS r = register_svm_range(addr, size,
						NodeId, NodeId, 0, true);

				if (r == HSAKMT_STATUS_SUCCESS) {
					q->ctx_save_restore = addr;
					q->unified_ctx_save_restore = true;
				} else {
					munmap(addr, size);
				}
			}
		}

		if (!q->unified_ctx_save_restore) {
			q->ctx_save_restore = allocate_exec_aligned_memory(
							q->total_mem_alloc_size,
							q->use_ats, gpu_id, NodeId,
							false, false, false);

			if (!q->ctx_save_restore)
				return HSAKMT_STATUS_NO_MEMORY;

			fill_cwsr_header(q, q->ctx_save_restore, Event, ErrPayload, node.NumXcc);
		}

		args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore;
	}

	return HSAKMT_STATUS_SUCCESS;
}

/* A map to translate thunk queue priority (-3 to +3)
 * to KFD queue priority (0 to 15)
 * Indexed by thunk_queue_priority+3
 */
static uint32_t priority_map[] = {0, 3, 5, 7, 9, 11, 15};

HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
					  HSA_QUEUE_TYPE Type,
					  HSAuint32 QueuePercentage,
					  HSA_QUEUE_PRIORITY Priority,
					  void *QueueAddress,
					  HSAuint64 QueueSizeInBytes,
					  HsaEvent *Event,
					  HsaQueueResource *QueueResource)
{
	if (Type == HSA_QUEUE_SDMA_BY_ENG_ID)
		return HSAKMT_STATUS_ERROR;

	return hsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, 0,
				    QueueAddress, QueueSizeInBytes, Event,
				    QueueResource);
}

HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
					     HSA_QUEUE_TYPE Type,
					     HSAuint32 QueuePercentage,
					     HSA_QUEUE_PRIORITY Priority,
					     HSAuint32 SdmaEngineId,
					     void *QueueAddress,
					     HSAuint64 QueueSizeInBytes,
					     HsaEvent *Event,
					     HsaQueueResource *QueueResource)
{
	HSAKMT_STATUS result;
	uint32_t gpu_id;
	uint64_t doorbell_mmap_offset;
	unsigned int doorbell_offset;
	int err;
	HsaNodeProperties props;
	uint32_t cu_num, i;

	CHECK_KFD_OPEN();

	if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
		Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	result = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	struct queue *q = allocate_exec_aligned_memory(sizeof(*q),
			false, gpu_id, NodeId, true, false, true);
	if (!q)
		return HSAKMT_STATUS_NO_MEMORY;

	memset(q, 0, sizeof(*q));

	q->gfxv = hsakmt_get_gfxv_by_node_id(NodeId);
	q->use_ats = false;

	if (q->gfxv == GFX_VERSION_TONGA)
		q->eop_buffer_size = TONGA_PAGE_SIZE;
	else if ((q->gfxv & ~(0xff)) == GFX_VERSION_AQUA_VANJARAM)
		q->eop_buffer_size = ((Type == HSA_QUEUE_COMPUTE) ? 4096 : 0);
	else if (q->gfxv >= 0x80000)
		q->eop_buffer_size = 4096;

	/* By default, CUs are all turned on. Initialize cu_mask to '1
	 * for all CU bits.
	 */
	if (hsaKmtGetNodeProperties(NodeId, &props))
		q->cu_mask_count = 0;
	else {
		cu_num = props.NumFComputeCores / props.NumSIMDPerCU;
		/* cu_mask_count counts bits. It must be multiple of 32 */
		q->cu_mask_count = ALIGN_UP_32(cu_num, 32);
		for (i = 0; i < cu_num; i++)
			q->cu_mask[i/32] |= (1 << (i % 32));
	}

	struct kfd_ioctl_create_queue_args args = {0};

	args.gpu_id = gpu_id;

	switch (Type) {
	case HSA_QUEUE_COMPUTE:
		args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE;
		break;
	case HSA_QUEUE_SDMA:
		args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA;
		break;
	case HSA_QUEUE_SDMA_XGMI:
		args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA_XGMI;
		break;
	case HSA_QUEUE_SDMA_BY_ENG_ID:
		args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID;
		break;
	case HSA_QUEUE_COMPUTE_AQL:
		args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
		break;
	default:
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	if (Type != HSA_QUEUE_COMPUTE_AQL) {
		QueueResource->QueueRptrValue = (uintptr_t)&q->rptr;
		QueueResource->QueueWptrValue = (uintptr_t)&q->wptr;
	}

	err = handle_concrete_asic(q, &args, gpu_id, NodeId, Event, QueueResource->ErrorReason);
	if (err != HSAKMT_STATUS_SUCCESS) {
		free_queue(q);
		return err;
	}

	args.read_pointer_address = QueueResource->QueueRptrValue;
	args.write_pointer_address = QueueResource->QueueWptrValue;
	args.ring_base_address = (uintptr_t)QueueAddress;
	args.ring_size = QueueSizeInBytes;
	args.queue_percentage = QueuePercentage;
	args.queue_priority = priority_map[Priority+3];
	args.sdma_engine_id = SdmaEngineId;

	err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);

	if (err == -1) {
		free_queue(q);
		return HSAKMT_STATUS_ERROR;
	}

	q->queue_id = args.queue_id;

	if (IS_SOC15(q->gfxv)) {
		HSAuint64 mask = DOORBELLS_PAGE_SIZE(DOORBELL_SIZE(q->gfxv)) - 1;

		/* On SOC15 chips, the doorbell offset within the
		 * doorbell page is included in the doorbell offset
		 * returned by KFD. This allows CP queue doorbells to be
		 * allocated dynamically (while SDMA queue doorbells fixed)
		 * rather than based on the its process queue ID.
		 */
		doorbell_mmap_offset = args.doorbell_offset & ~mask;
		doorbell_offset = args.doorbell_offset & mask;
	} else {
		/* On older chips, the doorbell offset within the
		 * doorbell page is based on the queue ID.
		 */
		doorbell_mmap_offset = args.doorbell_offset;
		doorbell_offset = q->queue_id * DOORBELL_SIZE(q->gfxv);
	}

	err = map_doorbell(NodeId, gpu_id, doorbell_mmap_offset);
	if (err != HSAKMT_STATUS_SUCCESS) {
		hsaKmtDestroyQueue(q->queue_id);
		return HSAKMT_STATUS_ERROR;
	}

	QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
	QueueResource->Queue_DoorBell = VOID_PTR_ADD(doorbells[NodeId].mapping,
						     doorbell_offset);

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
					  HSAuint32 QueuePercentage,
					  HSA_QUEUE_PRIORITY Priority,
					  void *QueueAddress,
					  HSAuint64 QueueSize,
					  HsaEvent *Event)
{
	struct kfd_ioctl_update_queue_args arg = {0};
	struct queue *q = PORT_UINT64_TO_VPTR(QueueId);

	CHECK_KFD_OPEN();

	if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
		Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (!q)
		return HSAKMT_STATUS_INVALID_PARAMETER;
	arg.queue_id = (HSAuint32)q->queue_id;
	arg.ring_base_address = (uintptr_t)QueueAddress;
	arg.ring_size = QueueSize;
	arg.queue_percentage = QueuePercentage;
	arg.queue_priority = priority_map[Priority+3];

	int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);

	if (err == -1)
		return HSAKMT_STATUS_ERROR;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
{
	CHECK_KFD_OPEN();

	struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
	struct kfd_ioctl_destroy_queue_args args = {0};

	if (!q)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	args.queue_id = q->queue_id;

	int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args);

	if (err == -1) {
		pr_err("Failed to destroy queue: %s\n", strerror(errno));
		return HSAKMT_STATUS_ERROR;
	}

	free_queue(q);
	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
					     HSAuint32 CUMaskCount,
					     HSAuint32 *QueueCUMask)
{
	struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
	struct kfd_ioctl_set_cu_mask_args args = {0};

	CHECK_KFD_OPEN();

	if (CUMaskCount == 0 || !QueueCUMask || ((CUMaskCount % 32) != 0))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	args.queue_id = q->queue_id;
	args.num_cu_mask = CUMaskCount;
	args.cu_mask_ptr = (uintptr_t)QueueCUMask;

	int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_CU_MASK, &args);

	if (err == -1)
		return HSAKMT_STATUS_ERROR;

	memcpy(q->cu_mask, QueueCUMask, CUMaskCount / 8);
	q->cu_mask_count = CUMaskCount;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetQueueInfo(
	HSA_QUEUEID QueueId,
	HsaQueueInfo *QueueInfo
)
{
	struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
	struct kfd_ioctl_get_queue_wave_state_args args = {0};

	CHECK_KFD_OPEN();

	if (QueueInfo == NULL || q == NULL)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (q->ctx_save_restore == NULL)
		return HSAKMT_STATUS_ERROR;

	args.queue_id = q->queue_id;
	args.ctl_stack_address = (uintptr_t)q->ctx_save_restore;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_QUEUE_WAVE_STATE, &args) < 0)
		return HSAKMT_STATUS_ERROR;

	QueueInfo->ControlStackTop = (void *)(args.ctl_stack_address +
				q->ctl_stack_size - args.ctl_stack_used_size);
	QueueInfo->UserContextSaveArea = (void *)
				 (args.ctl_stack_address + q->ctl_stack_size);
	QueueInfo->SaveAreaSizeInBytes = args.save_area_used_size;
	QueueInfo->ControlStackUsedInBytes = args.ctl_stack_used_size;
	QueueInfo->NumCUAssigned = q->cu_mask_count;
	QueueInfo->CUMaskInfo = q->cu_mask;
	QueueInfo->QueueDetailError = 0;
	QueueInfo->QueueTypeExtended = 0;
	QueueInfo->SaveAreaHeader = q->ctx_save_restore;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
					     void *TrapHandlerBaseAddress,
					     HSAuint64 TrapHandlerSizeInBytes,
					     void *TrapBufferBaseAddress,
					     HSAuint64 TrapBufferSizeInBytes)
{
	struct kfd_ioctl_set_trap_handler_args args = {0};
	HSAKMT_STATUS result;
	uint32_t gpu_id;

	CHECK_KFD_OPEN();

	result = hsakmt_validate_nodeid(Node, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	args.gpu_id = gpu_id;
	args.tba_addr = (uintptr_t)TrapHandlerBaseAddress;
	args.tma_addr = (uintptr_t)TrapBufferBaseAddress;

	int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_TRAP_HANDLER, &args);

	return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
}

uint32_t *hsakmt_convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues)
{
	uint32_t *queue_ids_ptr;
	unsigned int i;

	if (NumQueues == 0 || Queues == NULL)
		return NULL;

	queue_ids_ptr = malloc(NumQueues * sizeof(uint32_t));
	if (!queue_ids_ptr)
		return NULL;

	for (i = 0; i < NumQueues; i++) {
		struct queue *q = PORT_UINT64_TO_VPTR(Queues[i]);

		if (q == NULL) {
			free(queue_ids_ptr);
			return NULL;
		}

		queue_ids_ptr[i] = q->queue_id;
	}
	return queue_ids_ptr;
}

HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocQueueGWS(
                HSA_QUEUEID        QueueId,
                HSAuint32          nGWS,
                HSAuint32          *firstGWS)
{
	struct kfd_ioctl_alloc_queue_gws_args args = {0};
	struct queue *q = PORT_UINT64_TO_VPTR(QueueId);

	CHECK_KFD_OPEN();

	args.queue_id = (HSAuint32)q->queue_id;
	args.num_gws = nGWS;

	int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_ALLOC_QUEUE_GWS, &args);

	if (!err && firstGWS)
		*firstGWS = args.first_gws;

	if (!err)
		return HSAKMT_STATUS_SUCCESS;
	else if (errno == EINVAL)
		return HSAKMT_STATUS_INVALID_PARAMETER;
	else if (errno == EBUSY)
		return HSAKMT_STATUS_OUT_OF_RESOURCES;
	else if (errno == ENODEV)
		return HSAKMT_STATUS_NOT_SUPPORTED;
	else
		return HSAKMT_STATUS_ERROR;
}


================================================
FILE: libhsakmt/src/rbtree.c
================================================
/*
 * Copyright (C) 2002-2018 Igor Sysoev
 * Copyright (C) 2011-2018 Nginx, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "rbtree.h"

static inline void rbtree_left_rotate(rbtree_node_t **root,
		rbtree_node_t *sentinel, rbtree_node_t *node);
static inline void rbtree_right_rotate(rbtree_node_t **root,
		rbtree_node_t *sentinel, rbtree_node_t *node);

static void
hsakmt_rbtree_insert_value(rbtree_node_t *temp, rbtree_node_t *node,
		rbtree_node_t *sentinel)
{
	rbtree_node_t  **p;

	for ( ;; ) {

		p = rbtree_key_compare(LKP_ALL, &node->key, &temp->key) < 0 ?
			&temp->left : &temp->right;

		if (*p == sentinel) {
			break;
		}

		temp = *p;
	}

	*p = node;
	node->parent = temp;
	node->left = sentinel;
	node->right = sentinel;
	rbt_red(node);
}


void
hsakmt_rbtree_insert(rbtree_t *tree, rbtree_node_t *node)
{
	rbtree_node_t  **root, *temp, *sentinel;

	/* a binary tree insert */

	root = &tree->root;
	sentinel = &tree->sentinel;

	if (*root == sentinel) {
		node->parent = NULL;
		node->left = sentinel;
		node->right = sentinel;
		rbt_black(node);
		*root = node;

		return;
	}

	hsakmt_rbtree_insert_value(*root, node, sentinel);

	/* re-balance tree */

	while (node != *root && rbt_is_red(node->parent)) {

		if (node->parent == node->parent->parent->left) {
			temp = node->parent->parent->right;

			if (rbt_is_red(temp)) {
				rbt_black(node->parent);
				rbt_black(temp);
				rbt_red(node->parent->parent);
				node = node->parent->parent;

			} else {
				if (node == node->parent->right) {
					node = node->parent;
					rbtree_left_rotate(root, sentinel, node);
				}

				rbt_black(node->parent);
				rbt_red(node->parent->parent);
				rbtree_right_rotate(root, sentinel, node->parent->parent);
			}

		} else {
			temp = node->parent->parent->left;

			if (rbt_is_red(temp)) {
				rbt_black(node->parent);
				rbt_black(temp);
				rbt_red(node->parent->parent);
				node = node->parent->parent;

			} else {
				if (node == node->parent->left) {
					node = node->parent;
					rbtree_right_rotate(root, sentinel, node);
				}

				rbt_black(node->parent);
				rbt_red(node->parent->parent);
				rbtree_left_rotate(root, sentinel, node->parent->parent);
			}
		}
	}

	rbt_black(*root);
}


void
hsakmt_rbtree_delete(rbtree_t *tree, rbtree_node_t *node)
{
	unsigned int red;
	rbtree_node_t  **root, *sentinel, *subst, *temp, *w;

	/* a binary tree delete */

	root = &tree->root;
	sentinel = &tree->sentinel;

	if (node->left == sentinel) {
		temp = node->right;
		subst = node;

	} else if (node->right == sentinel) {
		temp = node->left;
		subst = node;

	} else {
		subst = rbtree_min(node->right, sentinel);

		if (subst->left != sentinel) {
			temp = subst->left;
		} else {
			temp = subst->right;
		}
	}

	if (subst == *root) {
		*root = temp;
		rbt_black(temp);

		return;
	}

	red = rbt_is_red(subst);

	if (subst == subst->parent->left) {
		subst->parent->left = temp;

	} else {
		subst->parent->right = temp;
	}

	if (subst == node) {

		temp->parent = subst->parent;

	} else {

		if (subst->parent == node) {
			temp->parent = subst;

		} else {
			temp->parent = subst->parent;
		}

		subst->left = node->left;
		subst->right = node->right;
		subst->parent = node->parent;
		rbt_copy_color(subst, node);

		if (node == *root) {
			*root = subst;

		} else {
			if (node == node->parent->left) {
				node->parent->left = subst;
			} else {
				node->parent->right = subst;
			}
		}

		if (subst->left != sentinel) {
			subst->left->parent = subst;
		}

		if (subst->right != sentinel) {
			subst->right->parent = subst;
		}
	}

	if (red) {
		return;
	}

	/* a delete fixup */

	while (temp != *root && rbt_is_black(temp)) {

		if (temp == temp->parent->left) {
			w = temp->parent->right;

			if (rbt_is_red(w)) {
				rbt_black(w);
				rbt_red(temp->parent);
				rbtree_left_rotate(root, sentinel, temp->parent);
				w = temp->parent->right;
			}

			if (rbt_is_black(w->left) && rbt_is_black(w->right)) {
				rbt_red(w);
				temp = temp->parent;

			} else {
				if (rbt_is_black(w->right)) {
					rbt_black(w->left);
					rbt_red(w);
					rbtree_right_rotate(root, sentinel, w);
					w = temp->parent->right;
				}

				rbt_copy_color(w, temp->parent);
				rbt_black(temp->parent);
				rbt_black(w->right);
				rbtree_left_rotate(root, sentinel, temp->parent);
				temp = *root;
			}

		} else {
			w = temp->parent->left;

			if (rbt_is_red(w)) {
				rbt_black(w);
				rbt_red(temp->parent);
				rbtree_right_rotate(root, sentinel, temp->parent);
				w = temp->parent->left;
			}

			if (rbt_is_black(w->left) && rbt_is_black(w->right)) {
				rbt_red(w);
				temp = temp->parent;

			} else {
				if (rbt_is_black(w->left)) {
					rbt_black(w->right);
					rbt_red(w);
					rbtree_left_rotate(root, sentinel, w);
					w = temp->parent->left;
				}

				rbt_copy_color(w, temp->parent);
				rbt_black(temp->parent);
				rbt_black(w->left);
				rbtree_right_rotate(root, sentinel, temp->parent);
				temp = *root;
			}
		}
	}

	rbt_black(temp);
}


static inline void
rbtree_left_rotate(rbtree_node_t **root, rbtree_node_t *sentinel,
		rbtree_node_t *node)
{
	rbtree_node_t  *temp;

	temp = node->right;
	node->right = temp->left;

	if (temp->left != sentinel) {
		temp->left->parent = node;
	}

	temp->parent = node->parent;

	if (node == *root) {
		*root = temp;

	} else if (node == node->parent->left) {
		node->parent->left = temp;

	} else {
		node->parent->right = temp;
	}

	temp->left = node;
	node->parent = temp;
}


static inline void
rbtree_right_rotate(rbtree_node_t **root, rbtree_node_t *sentinel,
		rbtree_node_t *node)
{
	rbtree_node_t  *temp;

	temp = node->left;
	node->left = temp->right;

	if (temp->right != sentinel) {
		temp->right->parent = node;
	}

	temp->parent = node->parent;

	if (node == *root) {
		*root = temp;

	} else if (node == node->parent->right) {
		node->parent->right = temp;

	} else {
		node->parent->left = temp;
	}

	temp->right = node;
	node->parent = temp;
}


rbtree_node_t *
hsakmt_rbtree_next(rbtree_t *tree, rbtree_node_t *node)
{
	rbtree_node_t  *root, *sentinel, *parent;

	sentinel = &tree->sentinel;

	if (node->right != sentinel) {
		return rbtree_min(node->right, sentinel);
	}

	root = tree->root;

	for ( ;; ) {
		parent = node->parent;

		if (node == root) {
			return NULL;
		}

		if (node == parent->left) {
			return parent;
		}

		node = parent;
	}
}

rbtree_node_t *
hsakmt_rbtree_prev(rbtree_t *tree, rbtree_node_t *node)
{
	rbtree_node_t  *root, *sentinel, *parent;

	sentinel = &tree->sentinel;

	if (node->left != sentinel) {
		return rbtree_max(node->left, sentinel);
	}

	root = tree->root;

	for ( ;; ) {
		parent = node->parent;

		if (node == root) {
			return NULL;
		}

		if (node == parent->right) {
			return parent;
		}

		node = parent;
	}
}


================================================
FILE: libhsakmt/src/rbtree.h
================================================
/*
 * Copyright (C) 2002-2018 Igor Sysoev
 * Copyright (C) 2011-2018 Nginx, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef _RBTREE_H_
#define _RBTREE_H_

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <errno.h>
#include "rbtree_amd.h"

typedef struct rbtree_node_s rbtree_node_t;

struct rbtree_node_s {
	rbtree_key_t    key;
	rbtree_node_t   *left;
	rbtree_node_t   *right;
	rbtree_node_t   *parent;
	unsigned char   color;
	unsigned char   data;
};

typedef struct rbtree_s rbtree_t;

struct rbtree_s {
	rbtree_node_t   *root;
	rbtree_node_t   sentinel;
};

#define rbtree_init(tree)				\
	rbtree_sentinel_init(&(tree)->sentinel);	\
	(tree)->root = &(tree)->sentinel;

void hsakmt_rbtree_insert(rbtree_t *tree, rbtree_node_t *node);
void hsakmt_rbtree_delete(rbtree_t *tree, rbtree_node_t *node);
rbtree_node_t *hsakmt_rbtree_prev(rbtree_t *tree,
		rbtree_node_t *node);
rbtree_node_t *hsakmt_rbtree_next(rbtree_t *tree,
		rbtree_node_t *node);

#define rbt_red(node)			((node)->color = 1)
#define rbt_black(node)			((node)->color = 0)
#define rbt_is_red(node)		((node)->color)
#define rbt_is_black(node)		(!rbt_is_red(node))
#define rbt_copy_color(n1, n2)		(n1->color = n2->color)

/* a sentinel must be black */

#define rbtree_sentinel_init(node)	rbt_black(node)

static inline rbtree_node_t *
rbtree_min(rbtree_node_t *node, rbtree_node_t *sentinel)
{
	while (node->left != sentinel) {
		node = node->left;
	}

	return node;
}

#include "rbtree_amd.h"

#endif


================================================
FILE: libhsakmt/src/rbtree_amd.h
================================================
/*
 * Copyright © 2018 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef _RBTREE_AMD_H_
#define _RBTREE_AMD_H_

typedef struct rbtree_key_s rbtree_key_t;
struct rbtree_key_s {
#define ADDR_BIT 0
#define SIZE_BIT 1
	unsigned long addr;
	unsigned long size;
};
#define BIT(x) (1<<(x))
#define LKP_ALL (BIT(ADDR_BIT) | BIT(SIZE_BIT))
#define LKP_ADDR (BIT(ADDR_BIT))
#define LKP_ADDR_SIZE (BIT(ADDR_BIT) | BIT(SIZE_BIT))

static inline rbtree_key_t
rbtree_key(unsigned long addr, unsigned long size)
{
	return (rbtree_key_t){addr, size};
}

/*
 * compare addr, size one by one
 */
static inline int
rbtree_key_compare(unsigned int type, rbtree_key_t *key1, rbtree_key_t *key2)
{
	if ((type & 1 << ADDR_BIT) && (key1->addr != key2->addr))
		return key1->addr > key2->addr ? 1 : -1;

	if ((type & 1 << SIZE_BIT) && (key1->size != key2->size))
		return key1->size > key2->size ? 1 : -1;

	return 0;
}
#endif /*_RBTREE_AMD_H_*/

/*inlcude this file again with RBTREE_HELPER defined*/
#ifndef RBTREE_HELPER
#define RBTREE_HELPER
#else
#ifndef _RBTREE_AMD_H_HELPER_
#define _RBTREE_AMD_H_HELPER_
static inline rbtree_node_t *
rbtree_max(rbtree_node_t *node, rbtree_node_t *sentinel)
{
	while (node->right != sentinel)
		node = node->right;

	return node;
}

#define LEFT 0
#define RIGHT 1
#define MID 2
static inline rbtree_node_t *
rbtree_min_max(rbtree_t *tree, int lr)
{
	rbtree_node_t *sentinel = &tree->sentinel;
	rbtree_node_t *node = tree->root;

	if (node == sentinel)
		return NULL;

	if (lr == LEFT)
		node = rbtree_min(node, sentinel);
	else if (lr == RIGHT)
		node = rbtree_max(node, sentinel);

	return node;
}

static inline rbtree_node_t *
rbtree_node_any(rbtree_t *tree, int lmr)
{
	rbtree_node_t *sentinel = &tree->sentinel;
	rbtree_node_t *node = tree->root;

	if (node == sentinel)
		return NULL;

	if (lmr == MID)
		return node;

	return rbtree_min_max(tree, lmr);
}

static inline rbtree_node_t *
rbtree_lookup_nearest(rbtree_t *rbtree, rbtree_key_t *key,
		unsigned int type, int lr)
{
	int rc;
	rbtree_node_t *node, *sentinel, *n = NULL;

	node = rbtree->root;
	sentinel = &rbtree->sentinel;

	while (node != sentinel) {
		rc = rbtree_key_compare(type, key, &node->key);

		if (rc < 0) {
			if (lr == RIGHT)
				n = node;
			node = node->left;
			continue;
		}

		if (rc > 0) {
			if (lr == LEFT)
				n = node;
			node = node->right;
			continue;
		}

		return node;
	}

	return n;
}

static inline rbtree_node_t *
rbtree_lookup(rbtree_t *rbtree, rbtree_key_t *key,
		unsigned int type)
{
	return rbtree_lookup_nearest(rbtree, key, type, -1);
}
#endif /*_RBTREE_AMD_H_HELPER_*/

#endif /*RBTREE_HELPER*/


================================================
FILE: libhsakmt/src/spm.c
================================================
/*
 * Copyright © 2020 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <stdio.h>


HSAKMT_STATUS HSAKMTAPI hsaKmtSPMAcquire(HSAuint32 PreferredNode)
{
	int ret;
	struct kfd_ioctl_spm_args args = {0};
	uint32_t gpu_id;

	ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
	if (ret != HSAKMT_STATUS_SUCCESS) {
		pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
		return ret;
	}

	ret = HSAKMT_STATUS_SUCCESS;
	args.op = KFD_IOCTL_SPM_OP_ACQUIRE;
	args.gpu_id = gpu_id;

	ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode,
						HSAuint32 SizeInBytes,
						HSAuint32 * timeout,
						HSAuint32 * SizeCopied,
						void *DestMemoryAddress,
						bool *isSPMDataLoss)
{
	int ret;
	struct kfd_ioctl_spm_args args = {0};
	uint32_t gpu_id = 0;

	ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
	if (ret != HSAKMT_STATUS_SUCCESS) {
		return ret;
	}

	args.timeout    = *timeout;
	args.dest_buf    = (uint64_t)DestMemoryAddress;
	args.buf_size   = SizeInBytes;
	args.op         = KFD_IOCTL_SPM_OP_SET_DEST_BUF;
	args.gpu_id     = gpu_id;

	ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);

	*SizeCopied = args.bytes_copied;
	*isSPMDataLoss = args.has_data_loss;
	*timeout = args.timeout;

	return ret;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtSPMRelease(HSAuint32 PreferredNode)
{
	int ret = HSAKMT_STATUS_SUCCESS;
	struct kfd_ioctl_spm_args args = {0};
	uint32_t gpu_id;

	ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
	if (ret != HSAKMT_STATUS_SUCCESS) {
		pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
		return ret;
	}

	args.op = KFD_IOCTL_SPM_OP_RELEASE;
	args.gpu_id = gpu_id;

	ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);

	return ret;
}


================================================
FILE: libhsakmt/src/svm.c
================================================
/*
 * Copyright © 2020 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
#include "libhsakmt.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <errno.h>

/* Helper functions for calling KFD SVM ioctl */

HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
		 HSA_SVM_ATTRIBUTE *attrs)
{
	struct kfd_ioctl_svm_args *args;
	HSAuint64 s_attr;
	HSAKMT_STATUS r;
	HSAuint32 i;

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(5);

	pr_debug("%s: address 0x%p size 0x%lx\n", __func__, start_addr, size);

	if (!start_addr || !size)
		return HSAKMT_STATUS_INVALID_PARAMETER;
	if ((uint64_t)start_addr & (PAGE_SIZE - 1))
		return HSAKMT_STATUS_INVALID_PARAMETER;
	if (size & (PAGE_SIZE - 1))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	s_attr = sizeof(*attrs) * nattr;
	args = alloca(sizeof(*args) + s_attr);

	args->start_addr = (uint64_t)start_addr;
	args->size = size;
	args->op = KFD_IOCTL_SVM_OP_SET_ATTR;
	args->nattr = nattr;
	memcpy(args->attrs, attrs, s_attr);

	for (i = 0; i < nattr; i++) {
		if (attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFERRED_LOC &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFETCH_LOC &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
		    continue;

		if (attrs[i].type == KFD_IOCTL_SVM_ATTR_PREFERRED_LOC &&
		    attrs[i].value == INVALID_NODEID) {
			args->attrs[i].value = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
			continue;
		}

		r = hsakmt_validate_nodeid(attrs[i].value, &args->attrs[i].value);
		if (r != HSAKMT_STATUS_SUCCESS) {
			pr_debug("invalid node ID: %d\n", attrs[i].value);
			return r;
		} else if (!args->attrs[i].value &&
			   (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS ||
			    attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE ||
			    attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS)) {
			pr_debug("CPU node invalid for access attribute\n");
			return HSAKMT_STATUS_INVALID_NODE_UNIT;
		}
	}

	/* Driver does one copy_from_user, with extra attrs size */
	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
	if (r) {
		pr_debug("op set range attrs failed %s\n", strerror(errno));
		return HSAKMT_STATUS_ERROR;
	}

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
		 HSA_SVM_ATTRIBUTE *attrs)
{
	struct kfd_ioctl_svm_args *args;
	HSAuint64 s_attr;
	HSAKMT_STATUS r;
	HSAuint32 i;

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(5);

	pr_debug("%s: address 0x%p size 0x%lx\n", __func__, start_addr, size);

	if (!start_addr || !size)
		return HSAKMT_STATUS_INVALID_PARAMETER;
	if ((uint64_t)start_addr & (PAGE_SIZE - 1))
		return HSAKMT_STATUS_INVALID_PARAMETER;
	if (size & (PAGE_SIZE - 1))
		return HSAKMT_STATUS_INVALID_PARAMETER;

	s_attr = sizeof(*attrs) * nattr;
	args = alloca(sizeof(*args) + s_attr);

	args->start_addr = (uint64_t)start_addr;
	args->size = size;
	args->op = KFD_IOCTL_SVM_OP_GET_ATTR;
	args->nattr = nattr;
	memcpy(args->attrs, attrs, s_attr);

	for (i = 0; i < nattr; i++) {
		if (attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
		    continue;

		r = hsakmt_validate_nodeid(attrs[i].value, &args->attrs[i].value);
		if (r != HSAKMT_STATUS_SUCCESS) {
			pr_debug("invalid node ID: %d\n", attrs[i].value);
			return r;
		} else if (!args->attrs[i].value) {
			pr_debug("CPU node invalid for access attribute\n");
			return HSAKMT_STATUS_INVALID_NODE_UNIT;
		}
	}

	/* Driver does one copy_from_user, with extra attrs size */
	r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
	if (r) {
		pr_debug("op get range attrs failed %s\n", strerror(errno));
		return HSAKMT_STATUS_ERROR;
	}

	memcpy(attrs, args->attrs, s_attr);

	for (i = 0; i < nattr; i++) {
		if (attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFERRED_LOC &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFETCH_LOC &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE &&
		    attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
			continue;

		switch (attrs[i].value) {
		case KFD_IOCTL_SVM_LOCATION_SYSMEM:
			attrs[i].value = 0;
			break;
		case KFD_IOCTL_SVM_LOCATION_UNDEFINED:
			attrs[i].value = INVALID_NODEID;
			break;
		default:
			r = hsakmt_gpuid_to_nodeid(attrs[i].value, &attrs[i].value);
			if (r != HSAKMT_STATUS_SUCCESS) {
				pr_debug("invalid GPU ID: %d\n",
					 attrs[i].value);
				return r;
			}
		}
	}

	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS
hsaKmtSetGetXNACKMode(HSAint32 * enable)
{
	struct kfd_ioctl_set_xnack_mode_args args;

	CHECK_KFD_OPEN();
	CHECK_KFD_MINOR_VERSION(5);

	args.xnack_enabled = *enable;

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_XNACK_MODE, &args)) {
		if (errno == EPERM) {
			pr_debug("set mode not supported %s\n",
				 strerror(errno));
			return HSAKMT_STATUS_NOT_SUPPORTED;
		} else if (errno == EBUSY) {
			pr_debug("hsakmt_ioctl queues not empty %s\n",
				 strerror(errno));
		}
		return HSAKMT_STATUS_ERROR;
	}

	*enable = args.xnack_enabled;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI
hsaKmtSetXNACKMode(HSAint32 enable)
{
	return hsaKmtSetGetXNACKMode(&enable);
}

HSAKMT_STATUS HSAKMTAPI
hsaKmtGetXNACKMode(HSAint32 * enable)
{
	*enable = -1;
	return hsaKmtSetGetXNACKMode(enable);
}


================================================
FILE: libhsakmt/src/time.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"

HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
					       HsaClockCounters *Counters)
{
	HSAKMT_STATUS result;
	uint32_t gpu_id;
	struct kfd_ioctl_get_clock_counters_args args = {0};
	int err;

	CHECK_KFD_OPEN();

	result = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (result != HSAKMT_STATUS_SUCCESS)
		return result;

	args.gpu_id = gpu_id;

	err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
	if (err < 0) {
		result = HSAKMT_STATUS_ERROR;
	} else {
		/* At this point the result is already HSAKMT_STATUS_SUCCESS */
		Counters->GPUClockCounter = args.gpu_clock_counter;
		Counters->CPUClockCounter = args.cpu_clock_counter;
		Counters->SystemClockCounter = args.system_clock_counter;
		Counters->SystemClockFrequencyHz = args.system_clock_freq;
	}

	return result;
}


================================================
FILE: libhsakmt/src/topology.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 * Copyright 2016-2018 Raptor Engineering, LLC. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <malloc.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <limits.h>

#include <errno.h>
#include <sys/sysinfo.h>
#include <xf86drm.h>
#include <amdgpu.h>
#include <amdgpu_drm.h>

#include "libhsakmt.h"
#include "hsakmt/hsakmtmodel.h"
#include "fmm.h"

/* Number of memory banks added by thunk on top of topology
 * This only includes static heaps like LDS, scratch and SVM,
 * not for MMIO_REMAP heap. MMIO_REMAP memory bank is reported
 * dynamically based on whether mmio aperture was mapped
 * successfully on this node.
 */
#define NUM_OF_IGPU_HEAPS 3
#define NUM_OF_DGPU_HEAPS 3
/* SYSFS related */
#define KFD_SYSFS_PATH "/sys/devices/virtual/kfd/kfd/topology"
#define KFD_SYSFS_PATH_GENERATION_ID "%s/generation_id"
#define KFD_SYSFS_PATH_SYSTEM_PROPERTIES "%s/system_properties"
#define KFD_SYSFS_PATH_NODES "%s/nodes"

static const char *get_topology_dir(void)
{
	if (hsakmt_use_model)
		return hsakmt_model_topology;
	return KFD_SYSFS_PATH;
}

typedef struct {
	HsaNodeProperties node;
	HsaMemoryProperties *mem;     /* node->NumBanks elements */
	HsaCacheProperties *cache;
	HsaIoLinkProperties *link;
} node_props_t;

static HsaSystemProperties *g_system;
static node_props_t *g_props;

/* This array caches sysfs based node IDs of CPU nodes + all supported GPU nodes.
 * It will be used to map user-node IDs to sysfs-node IDs.
 */
static uint32_t *map_user_to_sysfs_node_id;
static uint32_t map_user_to_sysfs_node_id_size;
static uint32_t num_sysfs_nodes;

static int processor_vendor = -1;
/* Supported System Vendors */
enum SUPPORTED_PROCESSOR_VENDORS {
	GENUINE_INTEL = 0,
	AUTHENTIC_AMD,
	IBM_POWER
};
/* Adding newline to make the search easier */
static const char *supported_processor_vendor_name[] = {
	"GenuineIntel\n",
	"AuthenticAMD\n",
	"\n"			// POWER requires a different search method
};

static HSAKMT_STATUS topology_take_snapshot(void);
static void topology_drop_snapshot(void);

static const struct hsa_gfxip_table gfxip_lookup_table[] = {
	/* Kaveri Family */
	{ 0x1304, 7, 0, 0, "Spectre" },
	{ 0x1305, 7, 0, 0, "Spectre" },
	{ 0x1306, 7, 0, 0, "Spectre" },
	{ 0x1307, 7, 0, 0, "Spectre" },
	{ 0x1309, 7, 0, 0, "Spectre" },
	{ 0x130A, 7, 0, 0, "Spectre" },
	{ 0x130B, 7, 0, 0, "Spectre" },
	{ 0x130C, 7, 0, 0, "Spectre" },
	{ 0x130D, 7, 0, 0, "Spectre" },
	{ 0x130E, 7, 0, 0, "Spectre" },
	{ 0x130F, 7, 0, 0, "Spectre" },
	{ 0x1310, 7, 0, 0, "Spectre" },
	{ 0x1311, 7, 0, 0, "Spectre" },
	{ 0x1312, 7, 0, 0, "Spooky" },
	{ 0x1313, 7, 0, 0, "Spectre" },
	{ 0x1315, 7, 0, 0, "Spectre" },
	{ 0x1316, 7, 0, 0, "Spooky" },
	{ 0x1317, 7, 0, 0, "Spooky" },
	{ 0x1318, 7, 0, 0, "Spectre" },
	{ 0x131B, 7, 0, 0, "Spectre" },
	{ 0x131C, 7, 0, 0, "Spectre" },
	{ 0x131D, 7, 0, 0, "Spectre" },
	/* Hawaii Family */
	{ 0x67A0, 7, 0, 1, "Hawaii" },
	{ 0x67A1, 7, 0, 1, "Hawaii" },
	{ 0x67A2, 7, 0, 1, "Hawaii" },
	{ 0x67A8, 7, 0, 1, "Hawaii" },
	{ 0x67A9, 7, 0, 1, "Hawaii" },
	{ 0x67AA, 7, 0, 1, "Hawaii" },
	{ 0x67B0, 7, 0, 1, "Hawaii" },
	{ 0x67B1, 7, 0, 1, "Hawaii" },
	{ 0x67B8, 7, 0, 1, "Hawaii" },
	{ 0x67B9, 7, 0, 1, "Hawaii" },
	{ 0x67BA, 7, 0, 1, "Hawaii" },
	{ 0x67BE, 7, 0, 1, "Hawaii" },
	/* Carrizo Family */
	{ 0x9870, 8, 0, 1, "Carrizo" },
	{ 0x9874, 8, 0, 1, "Carrizo" },
	{ 0x9875, 8, 0, 1, "Carrizo" },
	{ 0x9876, 8, 0, 1, "Carrizo" },
	{ 0x9877, 8, 0, 1, "Carrizo" },
	/* Tonga Family */
	{ 0x6920, 8, 0, 2, "Tonga" },
	{ 0x6921, 8, 0, 2, "Tonga" },
	{ 0x6928, 8, 0, 2, "Tonga" },
	{ 0x6929, 8, 0, 2, "Tonga" },
	{ 0x692B, 8, 0, 2, "Tonga" },
	{ 0x692F, 8, 0, 2, "Tonga" },
	{ 0x6930, 8, 0, 2, "Tonga" },
	{ 0x6938, 8, 0, 2, "Tonga" },
	{ 0x6939, 8, 0, 2, "Tonga" },
	/* Fiji */
	{ 0x7300, 8, 0, 3, "Fiji" },
	{ 0x730F, 8, 0, 3, "Fiji" },
	/* Polaris10 */
	{ 0x67C0, 8, 0, 3, "Polaris10" },
	{ 0x67C1, 8, 0, 3, "Polaris10" },
	{ 0x67C2, 8, 0, 3, "Polaris10" },
	{ 0x67C4, 8, 0, 3, "Polaris10" },
	{ 0x67C7, 8, 0, 3, "Polaris10" },
	{ 0x67C8, 8, 0, 3, "Polaris10" },
	{ 0x67C9, 8, 0, 3, "Polaris10" },
	{ 0x67CA, 8, 0, 3, "Polaris10" },
	{ 0x67CC, 8, 0, 3, "Polaris10" },
	{ 0x67CF, 8, 0, 3, "Polaris10" },
	{ 0x67D0, 8, 0, 3, "Polaris10" },
	{ 0x67DF, 8, 0, 3, "Polaris10" },
	{ 0x6FDF, 8, 0, 3, "Polaris10" },
	/* Polaris11 */
	{ 0x67E0, 8, 0, 3, "Polaris11" },
	{ 0x67E1, 8, 0, 3, "Polaris11" },
	{ 0x67E3, 8, 0, 3, "Polaris11" },
	{ 0x67E7, 8, 0, 3, "Polaris11" },
	{ 0x67E8, 8, 0, 3, "Polaris11" },
	{ 0x67E9, 8, 0, 3, "Polaris11" },
	{ 0x67EB, 8, 0, 3, "Polaris11" },
	{ 0x67EF, 8, 0, 3, "Polaris11" },
	{ 0x67FF, 8, 0, 3, "Polaris11" },
	/* Polaris12 */
	{ 0x6980, 8, 0, 3, "Polaris12" },
	{ 0x6981, 8, 0, 3, "Polaris12" },
	{ 0x6985, 8, 0, 3, "Polaris12" },
	{ 0x6986, 8, 0, 3, "Polaris12" },
	{ 0x6987, 8, 0, 3, "Polaris12" },
	{ 0x6995, 8, 0, 3, "Polaris12" },
	{ 0x6997, 8, 0, 3, "Polaris12" },
	{ 0x699F, 8, 0, 3, "Polaris12" },
	/* VegaM */
	{ 0x694C, 8, 0, 3, "VegaM" },
	{ 0x694E, 8, 0, 3, "VegaM" },
	{ 0x694F, 8, 0, 3, "VegaM" },
	/* Vega10 */
	{ 0x6860, 9, 0, 0, "Vega10" },
	{ 0x6861, 9, 0, 0, "Vega10" },
	{ 0x6862, 9, 0, 0, "Vega10" },
	{ 0x6863, 9, 0, 0, "Vega10" },
	{ 0x6864, 9, 0, 0, "Vega10" },
	{ 0x6867, 9, 0, 0, "Vega10" },
	{ 0x6868, 9, 0, 0, "Vega10" },
	{ 0x6869, 9, 0, 0, "Vega10" },
	{ 0x686A, 9, 0, 0, "Vega10" },
	{ 0x686B, 9, 0, 0, "Vega10" },
	{ 0x686C, 9, 0, 0, "Vega10" },
	{ 0x686D, 9, 0, 0, "Vega10" },
	{ 0x686E, 9, 0, 0, "Vega10" },
	{ 0x687F, 9, 0, 0, "Vega10" },
	/* Vega12 */
	{ 0x69A0, 9, 0, 4, "Vega12" },
	{ 0x69A1, 9, 0, 4, "Vega12" },
	{ 0x69A2, 9, 0, 4, "Vega12" },
	{ 0x69A3, 9, 0, 4, "Vega12" },
	{ 0x69Af, 9, 0, 4, "Vega12" },
	/* Raven */
	{ 0x15DD, 9, 0, 2, "Raven" },
	{ 0x15D8, 9, 0, 2, "Raven" },
	/* Vega20 */
	{ 0x66A0, 9, 0, 6, "Vega20" },
	{ 0x66A1, 9, 0, 6, "Vega20" },
	{ 0x66A2, 9, 0, 6, "Vega20" },
	{ 0x66A3, 9, 0, 6, "Vega20" },
	{ 0x66A4, 9, 0, 6, "Vega20" },
	{ 0x66A7, 9, 0, 6, "Vega20" },
	{ 0x66AF, 9, 0, 6, "Vega20" },
	/* Arcturus */
	{ 0x7388, 9, 0, 8, "Arcturus" },
	{ 0x738C, 9, 0, 8, "Arcturus" },
	{ 0x738E, 9, 0, 8, "Arcturus" },
	{ 0x7390, 9, 0, 8, "Arcturus" },
	/* Aldebaran */
	{ 0x7408, 9, 0, 10, "Aldebaran" },
	{ 0x740C, 9, 0, 10, "Aldebaran" },
	{ 0x740F, 9, 0, 10, "Aldebaran" },
	{ 0x7410, 9, 0, 10, "Aldebaran" },
	/* Renoir */
	{ 0x15E7, 9, 0, 12, "Renoir" },
	{ 0x1636, 9, 0, 12, "Renoir" },
	{ 0x1638, 9, 0, 12, "Renoir" },
	{ 0x164C, 9, 0, 12, "Renoir" },
	/* Navi10 */
	{ 0x7310, 10, 1, 0, "Navi10" },
	{ 0x7312, 10, 1, 0, "Navi10" },
	{ 0x7318, 10, 1, 0, "Navi10" },
	{ 0x731A, 10, 1, 0, "Navi10" },
	{ 0x731E, 10, 1, 0, "Navi10" },
	{ 0x731F, 10, 1, 0, "Navi10" },
	/* cyan_skillfish */
	{ 0x13F9, 10, 1, 3, "cyan_skillfish" },
	{ 0x13FA, 10, 1, 3, "cyan_skillfish" },
	{ 0x13FB, 10, 1, 3, "cyan_skillfish" },
	{ 0x13FC, 10, 1, 3, "cyan_skillfish" },
	{ 0x13FE, 10, 1, 3, "cyan_skillfish" },
	{ 0x143F, 10, 1, 3, "cyan_skillfish" },
	/* Navi14 */
	{ 0x7340, 10, 1, 2, "Navi14" },
	{ 0x7341, 10, 1, 2, "Navi14" },
	{ 0x7347, 10, 1, 2, "Navi14" },
	/* Navi12 */
	{ 0x7360, 10, 1, 1, "Navi12" },
	{ 0x7362, 10, 1, 1, "Navi12" },
	/* SIENNA_CICHLID */
	{ 0x73A0, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73A1, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73A2, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73A3, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73A5, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73A8, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73A9, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73AC, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73AD, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73AB, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73AE, 10, 3, 0, "SIENNA_CICHLID" },
	{ 0x73BF, 10, 3, 0, "SIENNA_CICHLID" },
	/* NAVY_FLOUNDER */
	{ 0x73C0, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73C1, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73C3, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73DA, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73DB, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73DC, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73DD, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73DE, 10, 3, 1, "NAVY_FLOUNDER" },
	{ 0x73DF, 10, 3, 1, "NAVY_FLOUNDER" },
	/* DIMGREY_CAVEFISH */
	{ 0x73E0, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73E1, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73E2, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73E8, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73E9, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73EA, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73EB, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73EC, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73ED, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73EF, 10, 3, 2, "DIMGREY_CAVEFISH" },
	{ 0x73FF, 10, 3, 2, "DIMGREY_CAVEFISH" },
	/* VanGogh */
	{ 0x163F, 10, 3, 3, "VanGogh" },
	/* BEIGE_GOBY */
	{ 0x7420, 10, 3, 4, "BEIGE_GOBY" },
	{ 0x7421, 10, 3, 4, "BEIGE_GOBY" },
	{ 0x7422, 10, 3, 4, "BEIGE_GOBY" },
	{ 0x7423, 10, 3, 4, "BEIGE_GOBY" },
	{ 0x743F, 10, 3, 4, "BEIGE_GOBY" },
	/* Yellow_Carp */
	{ 0x164D, 10, 3, 5, "YELLOW_CARP" },
	{ 0x1681, 10, 3, 5, "YELLOW_CARP" },
};

/* information from /proc/cpuinfo */
struct proc_cpuinfo {
	uint32_t proc_num; /* processor */
	uint32_t apicid; /* apicid */
	char model_name[HSA_PUBLIC_NAME_SIZE]; /* model name */
};

/* CPU cache table for all CPUs on the system. Each entry has the relative CPU
 * info and caches connected to that CPU.
 */
typedef struct cpu_cacheinfo {
	uint32_t len; /* length of the table = number of online procs */
	int32_t proc_num; /* this cpu's processor number */
	uint32_t num_caches; /* number of caches reported by this cpu */
	HsaCacheProperties *cache_prop; /* a list of cache properties */
} cpu_cacheinfo_t;

static void free_properties(node_props_t *props, int size)
{
	if (props) {
		int i;
		for (i = 0; i < size; i++) {
			free(props[i].mem);
			free(props[i].cache);
			free(props[i].link);
		}

		free(props);
	}
}

/* num_subdirs - find the number of sub-directories in the specified path
 *	@dirpath - directory path to find sub-directories underneath
 *	@prefix - only count sub-directory names starting with prefix.
 *		Use blank string, "", to count all.
 *	Return - number of sub-directories
 */
static int num_subdirs(char *dirpath, char *prefix)
{
	int count = 0;
	DIR *dirp;
	struct dirent *dir;
	int prefix_len = strlen(prefix);

	dirp = opendir(dirpath);
	if (dirp) {
		while ((dir = readdir(dirp)) != 0) {
			if ((strcmp(dir->d_name, ".") == 0) ||
				(strcmp(dir->d_name, "..") == 0))
				continue;
			if (prefix_len &&
				strncmp(dir->d_name, prefix, prefix_len))
				continue;
			count++;
		}
		closedir(dirp);
	}

	return count;
}

/* fscanf_dec - read a file whose content is a decimal number
 *      @file [IN ] file to read
 *      @num [OUT] number in the file
 */
static HSAKMT_STATUS fscanf_dec(char *file, uint32_t *num)
{
	FILE *fd;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	fd = fopen(file, "r");
	if (!fd) {
		pr_err("Failed to open %s\n", file);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}
	if (fscanf(fd, "%u", num) != 1) {
		pr_err("Failed to parse %s as a decimal.\n", file);
		ret = HSAKMT_STATUS_ERROR;
	}

	fclose(fd);
	return ret;
}

/* fscanf_str - read a file whose content is a string
 *      @file [IN ] file to read
 *      @str [OUT] string in the file
 */
static HSAKMT_STATUS fscanf_str(const char *file, char *str, size_t str_size)
{
	FILE *fd;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	fd = fopen(file, "r");
	if (!fd) {
		pr_err("Failed to open %s\n", file);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	if (!fgets(str, (int)str_size, fd)) {
		pr_err("Failed to read from %s.\n", file);
		ret = HSAKMT_STATUS_ERROR;
	} else {
		// Remove possible newline characters at the end, due to using fgets function
		str[strcspn(str, "\r\n")] = '\0';
	}

	fclose(fd);
	return ret;
}

/* fscanf_size - read a file whose content represents size as a string
 *      @file [IN ] file to read
 *      @bytes [OUT] sizes in bytes
 */
static HSAKMT_STATUS fscanf_size(char *file, uint32_t *bytes)
{
	FILE *fd;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	char unit;
	int n;

	fd = fopen(file, "r");
	if (!fd) {
		pr_err("Failed to open %s\n", file);
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	n = fscanf(fd, "%u%c", bytes, &unit);
	if (n < 1) {
		pr_err("Failed to parse %s\n", file);
		ret = HSAKMT_STATUS_ERROR;
	}

	if (n == 2) {
		switch (unit) {
		case 'K':
			*bytes <<= 10; break;
		case 'M':
			*bytes <<= 20; break;
		case 'G':
			*bytes <<= 30; break;
		default:
			ret = HSAKMT_STATUS_ERROR; break;
		}
	}

	fclose(fd);
	return ret;
}

/* cpumap_to_cpu_ci - translate shared_cpu_map string + cpuinfo->apicid into
 *		      SiblingMap in cache
 *	@shared_cpu_map [IN ] shared_cpu_map string
 *	@cpuinfo [IN ] cpuinfo to get apicid
 *	@this_cache [OUT] CPU cache to fill in SiblingMap
 */
static void cpumap_to_cpu_ci(char *shared_cpu_map,
			     struct proc_cpuinfo *cpuinfo,
			     HsaCacheProperties *this_cache)
{
	int num_hexs, bit;
	uint32_t proc, apicid, mask;
	char *ch_ptr;

	/* shared_cpu_map is shown as ...X3,X2,X1 Each X is a hex without 0x
	 * and it's up to 8 characters(32 bits). For the first 32 CPUs(actually
	 * procs), it's presented in X1. The next 32 is in X2, and so on.
	 */
	num_hexs = (strlen(shared_cpu_map) + 8) / 9; /* 8 characters + "," */
	ch_ptr = strtok(shared_cpu_map, ",");
	while (num_hexs-- > 0) {
		mask = strtol(ch_ptr, NULL, 16); /* each X */
		for (bit = 0; bit < 32; bit++) {
			if (!((1 << bit) & mask))
				continue;
			proc = num_hexs * 32 + bit;
			apicid = cpuinfo[proc].apicid;
			if (apicid >= HSA_CPU_SIBLINGS) {
				pr_warn("SiblingMap buffer %d is too small\n",
					HSA_CPU_SIBLINGS);
				continue;
			}
			this_cache->SiblingMap[apicid] = 1;
		}
		ch_ptr = strtok(NULL, ",");
	}
}

/* get_cpu_cache_info - get specified CPU's cache information from sysfs
 *     @prefix [IN] sysfs path for target cpu cache,
 *                  /sys/devices/system/node/nodeX/cpuY/cache
 *     @cpuinfo [IN] /proc/cpuinfo data to get apicid
 *     @cpu_ci: CPU specified. This parameter is an input and also an output.
 *             [IN] cpu_ci->num_caches: number of index dirs
 *             [OUT] cpu_ci->cache_info: to store cache info collected
 *             [OUT] cpu_ci->num_caches: reduces when shared with other cpu(s)
 * Return: number of cache reported from this cpu
 */
static int get_cpu_cache_info(const char *prefix, struct proc_cpuinfo *cpuinfo,
			      cpu_cacheinfo_t *cpu_ci)
{
	int idx, num_idx, n;
	HsaCacheProperties *this_cache;
	char path[256], str[256];
	bool is_power9 = false;

	if (processor_vendor == IBM_POWER) {
		if (strcmp(cpuinfo[0].model_name, "POWER9") == 0) {
			is_power9 = true;
		}
	}

	this_cache = cpu_ci->cache_prop;
	num_idx = cpu_ci->num_caches;
	for (idx = 0; idx < num_idx; idx++) {
		/* If this cache is shared by multiple CPUs, we only need
		 * to list it in the first CPU.
		 */
		if (is_power9) {
			// POWER9 has SMT4
			if (cpu_ci->proc_num & 0x3) {
				/* proc is not 0,4,8,etc.  Skip and reduce the cache count. */
				--cpu_ci->num_caches;
				continue;
			}
		} else {
			snprintf(path, 256, "%s/index%d/shared_cpu_list", prefix, idx);
			/* shared_cpu_list is shown as n1,n2... or n1-n2,n3-n4...
			 * For both cases, this cache is listed to proc n1 only.
			 */
			fscanf_dec(path, (uint32_t *)&n);
			if (cpu_ci->proc_num != n) {
				/* proc is not n1. Skip and reduce the cache count. */
				--cpu_ci->num_caches;
				continue;
			}
			this_cache->ProcessorIdLow = cpuinfo[cpu_ci->proc_num].apicid;
		}

		/* CacheLevel */
		snprintf(path, 256, "%s/index%d/level", prefix, idx);
		fscanf_dec(path, &this_cache->CacheLevel);
		/* CacheType */
		snprintf(path, 256, "%s/index%d/type", prefix, idx);

		memset(str, 0, sizeof(str));
		fscanf_str(path, str, sizeof(str));
		if (!strcmp(str, "Data"))
			this_cache->CacheType.ui32.Data = 1;
		if (!strcmp(str, "Instruction"))
			this_cache->CacheType.ui32.Instruction = 1;
		if (!strcmp(str, "Unified")) {
			this_cache->CacheType.ui32.Data = 1;
			this_cache->CacheType.ui32.Instruction = 1;
		}
		this_cache->CacheType.ui32.CPU = 1;
		/* CacheSize */
		snprintf(path, 256, "%s/index%d/size", prefix, idx);
		fscanf_size(path, &this_cache->CacheSize);
		/* CacheLineSize */
		snprintf(path, 256, "%s/index%d/coherency_line_size", prefix, idx);
		fscanf_dec(path, &this_cache->CacheLineSize);
		/* CacheAssociativity */
		snprintf(path, 256, "%s/index%d/ways_of_associativity", prefix, idx);
		fscanf_dec(path, &this_cache->CacheAssociativity);
		/* CacheLinesPerTag */
		snprintf(path, 256, "%s/index%d/physical_line_partition", prefix, idx);
		fscanf_dec(path, &this_cache->CacheLinesPerTag);
		/* CacheSiblings */
		snprintf(path, 256, "%s/index%d/shared_cpu_map", prefix, idx);
		fscanf_str(path, str, sizeof(str));
		cpumap_to_cpu_ci(str, cpuinfo, this_cache);

		++this_cache;
	}

	return cpu_ci->num_caches;
}

static HSAKMT_STATUS topology_sysfs_get_generation(uint32_t *gen)
{
	FILE *fd;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	char path[256];
	snprintf(path, sizeof(path), KFD_SYSFS_PATH_GENERATION_ID, get_topology_dir());

	assert(gen);
	fd = fopen(path, "r");
	if (!fd)
		return HSAKMT_STATUS_ERROR;
	if (fscanf(fd, "%ul", gen) != 1) {
		ret = HSAKMT_STATUS_ERROR;
		goto err;
	}

err:
	fclose(fd);
	return ret;
}

static HSAKMT_STATUS topology_sysfs_map_node_id(uint32_t node_id, uint32_t *sys_node_id)
{
	if ((!map_user_to_sysfs_node_id) || (node_id >= map_user_to_sysfs_node_id_size))
		return HSAKMT_STATUS_NOT_SUPPORTED;

	*sys_node_id = map_user_to_sysfs_node_id[node_id];
	return HSAKMT_STATUS_SUCCESS;
}

static HSAKMT_STATUS topology_sysfs_get_gpu_id(uint32_t sysfs_node_id, uint32_t *gpu_id)
{
	FILE *fd;
	char path[256];
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	assert(gpu_id);
	snprintf(path, sizeof(path), KFD_SYSFS_PATH_NODES "/%d/gpu_id", get_topology_dir(), sysfs_node_id);
	fd = fopen(path, "r");
	if (!fd)
		return HSAKMT_STATUS_ERROR;
	if (fscanf(fd, "%ul", gpu_id) != 1)
		ret = (errno == EPERM) ? HSAKMT_STATUS_NOT_SUPPORTED :
					 HSAKMT_STATUS_ERROR;
	fclose(fd);

	return ret;
}

/* Check if the @sysfs_node_id is supported. This function will be passed with sysfs node id.
 * This function can not use topology_* help functions, because those functions are
 * using user node id.
 * A sysfs node is not supported
 *	- if corresponding drm render node is not available.
 *	- if node information is not accessible (EPERM)
 */
static HSAKMT_STATUS topology_sysfs_check_node_supported(uint32_t sysfs_node_id, bool *is_node_supported)
{
	uint32_t gpu_id;
	FILE *fd;
	char *read_buf, *p;
	int read_size;
	char prop_name[256];
	char path[256];
	unsigned long long prop_val;
	uint32_t prog;
	uint32_t drm_render_minor = 0;
	int ret_value;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	*is_node_supported = false;

	/* Retrieve the GPU ID */
	ret = topology_sysfs_get_gpu_id(sysfs_node_id, &gpu_id);
	if (ret == HSAKMT_STATUS_NOT_SUPPORTED)
		return HSAKMT_STATUS_SUCCESS;
	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	if (gpu_id == 0) {
		*is_node_supported = true;
		return HSAKMT_STATUS_SUCCESS;
	}

	read_buf = malloc(PAGE_SIZE);
	if (!read_buf)
		return HSAKMT_STATUS_NO_MEMORY;

	/* Retrieve the node properties */
	snprintf(path, 256, KFD_SYSFS_PATH_NODES "/%d/properties", get_topology_dir(), sysfs_node_id);
	fd = fopen(path, "r");
	if (!fd) {
		free(read_buf);
		return HSAKMT_STATUS_ERROR;
	}

	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
	if (read_size <= 0) {
		ret = HSAKMT_STATUS_ERROR;
		goto err;
	}

	/* Since we're using the buffer as a string, we make sure the string terminates */
	if (read_size >= PAGE_SIZE)
		read_size = PAGE_SIZE - 1;
	read_buf[read_size] = 0;

	/* Read the node properties */
	prog = 0;
	p = read_buf;
	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
		if (strcmp(prop_name, "drm_render_minor") == 0) {
			drm_render_minor = (int32_t)prop_val;
			break;
		}
	}
	if (!drm_render_minor) {
		ret = HSAKMT_STATUS_ERROR;
		goto err;
	}

	/* Open DRM Render device */
	ret_value = hsakmt_open_drm_render_device(drm_render_minor);
	if (ret_value > 0)
		*is_node_supported = true;
	else if (ret_value != -ENOENT && ret_value != -EPERM)
		ret = HSAKMT_STATUS_ERROR;

err:
	free(read_buf);
	fclose(fd);
	return ret;
}

HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props)
{
	FILE *fd;
	char *read_buf, *p;
	char path[256];
	char prop_name[256];
	unsigned long long prop_val;
	uint32_t prog;
	int read_size;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	bool is_node_supported = true;
	uint32_t num_supported_nodes = 0;

	assert(props);
	snprintf(path, sizeof(path), KFD_SYSFS_PATH_SYSTEM_PROPERTIES, get_topology_dir());
	fd = fopen(path, "r");
	if (!fd)
		return HSAKMT_STATUS_ERROR;

	read_buf = malloc(PAGE_SIZE);
	if (!read_buf) {
		ret = HSAKMT_STATUS_NO_MEMORY;
		goto err1;
	}

	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
	if (read_size <= 0) {
		ret = HSAKMT_STATUS_ERROR;
		goto err2;
	}

	/* Since we're using the buffer as a string, we make sure the string terminates */
	if (read_size >= PAGE_SIZE)
		read_size = PAGE_SIZE - 1;
	read_buf[read_size] = 0;

	/* Read the system properties */
	prog = 0;
	p = read_buf;
	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
		if (strcmp(prop_name, "platform_oem") == 0)
			props->PlatformOem = (uint32_t)prop_val;
		else if (strcmp(prop_name, "platform_id") == 0)
			props->PlatformId = (uint32_t)prop_val;
		else if (strcmp(prop_name, "platform_rev") == 0)
			props->PlatformRev = (uint32_t)prop_val;
	}

	/*
	 * Discover the number of sysfs nodes:
	 * Assuming that inside nodes folder there are only folders
	 * which represent the node numbers
	 */
	snprintf(path, sizeof(path), KFD_SYSFS_PATH_NODES, get_topology_dir());
	num_sysfs_nodes = num_subdirs(path, "");

	if (map_user_to_sysfs_node_id == NULL) {
		/* Trade off - num_sysfs_nodes includes all CPU and GPU nodes.
		 * Slightly more memory is allocated than necessary.
		 */
		map_user_to_sysfs_node_id = calloc(num_sysfs_nodes, sizeof(uint32_t));
		if (map_user_to_sysfs_node_id == NULL) {
			ret = HSAKMT_STATUS_NO_MEMORY;
			goto err2;
		}
		map_user_to_sysfs_node_id_size = num_sysfs_nodes;
	} else if (num_sysfs_nodes > map_user_to_sysfs_node_id_size) {
		free(map_user_to_sysfs_node_id);
		map_user_to_sysfs_node_id = calloc(num_sysfs_nodes, sizeof(uint32_t));
		if (map_user_to_sysfs_node_id == NULL) {
			ret = HSAKMT_STATUS_NO_MEMORY;
			goto err2;
		}
		map_user_to_sysfs_node_id_size = num_sysfs_nodes;
	}

	for (uint32_t i = 0; i < num_sysfs_nodes; i++) {
		ret = topology_sysfs_check_node_supported(i, &is_node_supported);
		if (ret != HSAKMT_STATUS_SUCCESS)
			goto sysfs_parse_failed;
		if (is_node_supported)
			map_user_to_sysfs_node_id[num_supported_nodes++] = i;
	}
	props->NumNodes = num_supported_nodes;

	free(read_buf);
	fclose(fd);
	return ret;

sysfs_parse_failed:
	free(map_user_to_sysfs_node_id);
	map_user_to_sysfs_node_id = NULL;
err2:
	free(read_buf);
err1:
	fclose(fd);
	return ret;
}

static const struct hsa_gfxip_table *find_hsa_gfxip_device(uint16_t device_id, uint8_t gfxv_major)
{
	if (gfxv_major > 10)
		return NULL;

	uint32_t i, table_size;

	table_size = sizeof(gfxip_lookup_table)/sizeof(struct hsa_gfxip_table);
	for (i = 0; i < table_size; i++) {
		if (gfxip_lookup_table[i].device_id == device_id)
			return &gfxip_lookup_table[i];
	}
	return NULL;
}

void hsakmt_topology_setup_is_dgpu_param(HsaNodeProperties *props)
{
	/* if we found a dGPU node, then treat the whole system as dGPU */
	if (!props->NumCPUCores && props->NumFComputeCores)
		hsakmt_is_dgpu = true;
}

bool hsakmt_topology_is_svm_needed(HSA_ENGINE_ID EngineId)
{
	if (hsakmt_is_dgpu)
		return true;

	if (HSA_GET_GFX_VERSION_FULL(EngineId.ui32) >= GFX_VERSION_VEGA10)
		return true;

	return false;
}

static HSAKMT_STATUS topology_get_cpu_model_name(HsaNodeProperties *props,
				struct proc_cpuinfo *cpuinfo, int num_procs)
{
	int i, j;

	if (!props) {
		pr_err("Invalid props to get cpu model name\n");
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	for (i = 0; i < num_procs; i++, cpuinfo++) {
		if (props->CComputeIdLo == cpuinfo->apicid) {
			if (!props->DeviceId) /* CPU-only node */
				strncpy((char *)props->AMDName, cpuinfo->model_name, sizeof(props->AMDName));
			/* Convert from UTF8 to UTF16 */
			for (j = 0; cpuinfo->model_name[j] != '\0' && j < HSA_PUBLIC_NAME_SIZE - 1; j++)
				props->MarketingName[j] = cpuinfo->model_name[j];
			props->MarketingName[j] = '\0';
			return HSAKMT_STATUS_SUCCESS;
		}
	}

	return HSAKMT_STATUS_ERROR;
}

static int topology_search_processor_vendor(const char *processor_name)
{
	unsigned int i;

	for (i = 0; i < ARRAY_LEN(supported_processor_vendor_name); i++) {
		if (!strcmp(processor_name, supported_processor_vendor_name[i]))
			return i;
		if (!strcmp(processor_name, "POWER9, altivec supported\n"))
			return IBM_POWER;
	}
	return -1;
}

/* topology_parse_cpuinfo - Parse /proc/cpuinfo and fill up required
 *			topology information
 * cpuinfo [OUT]: output buffer to hold cpu information
 * num_procs: number of processors the output buffer can hold
 */
static HSAKMT_STATUS topology_parse_cpuinfo(struct proc_cpuinfo *cpuinfo,
					    uint32_t num_procs)
{
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	FILE *fd;
	char read_buf[256];
	char *p;
	uint32_t proc = 0;
	size_t p_len;
	const char *proc_cpuinfo_path = "/proc/cpuinfo";

	if (!cpuinfo) {
		pr_err("CPU information will be missing\n");
		return HSAKMT_STATUS_INVALID_PARAMETER;
	}

	fd = fopen(proc_cpuinfo_path, "r");
	if (!fd) {
		pr_err("Failed to open [%s]. Unable to get CPU information",
			proc_cpuinfo_path);
		return HSAKMT_STATUS_ERROR;
	}

#ifdef __PPC64__
	char *p2;

	/* Each line in /proc/cpuinfo that read_buf is constructed, the format
	 * is like this:
	 * "token       : value\n"
	 * where token is our target like vendor_id, model name, apicid ...
	 * and value is the answer
	 */
	while (fgets(read_buf, sizeof(read_buf), fd)) {
		/* processor number */
		if (!strncmp("processor	", read_buf, sizeof("processor	") - 1)) {
			p = strchr(read_buf, ':');
			p += 2; /* remove ": " */
			proc = atoi(p);
			if (proc >= num_procs) {
				pr_warn("cpuinfo contains processor %d larger than %u\n",
					proc, num_procs);
				ret = HSAKMT_STATUS_NO_MEMORY;
				goto exit;
			}
			continue;
		}

		/* vendor name / model name */
		if (!strncmp("cpu	", read_buf, sizeof("cpu	") - 1) &&
			(processor_vendor == -1)) {
			p = strchr(read_buf, ':');
			p += 2; /* remove ": " */
			processor_vendor = topology_search_processor_vendor(p);

			p2 = strchr(p, ',');
			if (p2 != NULL) {
				p2++;
				*p2 = 0;
			}
			if (strlen(p) < HSA_PUBLIC_NAME_SIZE) {
				/* -1 to remove \n from p */
				strncpy(cpuinfo[proc].model_name, p, strlen(p) - 1);
				cpuinfo[proc].model_name[strlen(p) - 1] = '\0';
			} else
				strncpy(cpuinfo[proc].model_name, p, HSA_PUBLIC_NAME_SIZE);
			continue;
		}
	}
#else
	/* Each line in /proc/cpuinfo that read_buf is constructed, the format
	 * is like this:
	 * "token       : value\n"
	 * where token is our target like vendor_id, model name, apicid ...
	 * and value is the answer
	 */
	while (fgets(read_buf, sizeof(read_buf), fd)) {
		/* processor number */
		if (!strncmp("processor", read_buf, sizeof("processor") - 1)) {
			p = strchr(read_buf, ':');
			p += 2; /* remove ": " */
			proc = atoi(p);
			if (proc >= num_procs) {
				pr_warn("cpuinfo contains processor %d larger than %u\n",
					proc, num_procs);
				ret = HSAKMT_STATUS_NO_MEMORY;
				goto exit;
			}
			continue;
		}

		/* vendor name */
		if (!strncmp("vendor_id", read_buf, sizeof("vendor_id") - 1) &&
			(processor_vendor == -1)) {
			p = strchr(read_buf, ':');
			p += 2; /* remove ": " */
			processor_vendor = topology_search_processor_vendor(p);
			continue;
		}

		/* model name */
		if (!strncmp("model name", read_buf, sizeof("model name") - 1)) {
			p = strchr(read_buf, ':');
			p += 2; /* remove ": " */
			p_len = strlen(p);
			if (p_len > HSA_PUBLIC_NAME_SIZE)
				p_len = HSA_PUBLIC_NAME_SIZE;
			memcpy(cpuinfo[proc].model_name, p, p_len);
			cpuinfo[proc].model_name[p_len - 1] = '\0';
			continue;
		}

		/* apicid */
		if (!strncmp("apicid", read_buf, sizeof("apicid") - 1)) {
			p = strchr(read_buf, ':');
			p += 2; /* remove ": " */
			cpuinfo[proc].apicid = atoi(p);
		}
	}
#endif

	if (processor_vendor < 0) {
		pr_err("Failed to get Processor Vendor. Setting to %s",
			supported_processor_vendor_name[GENUINE_INTEL]);
		processor_vendor = GENUINE_INTEL;
	}

exit:
	fclose(fd);
	return ret;
}

static int topology_get_node_props_from_drm(HsaNodeProperties *props)
{
	int drm_fd;
	uint32_t major_version;
	uint32_t minor_version;
	amdgpu_device_handle device_handle;
	struct amdgpu_gpu_info gpu_info;
	const char *name;
	int i, ret = 0;

	if (props == NULL)
		return -1;

	drm_fd = drmOpenRender(props->DrmRenderMinor);
	if (drm_fd < 0)
		return -1;

	if (amdgpu_device_initialize(drm_fd,
		&major_version, &minor_version, &device_handle) < 0) {
		ret = -1;
		goto err_device_initialize;
	}

	name = amdgpu_get_marketing_name(device_handle);
	if (name != NULL) {
		for (i = 0; name[i] != 0 && i < HSA_PUBLIC_NAME_SIZE - 1; i++)
			props->MarketingName[i] = name[i];
		props->MarketingName[i] = '\0';
	}

	if (amdgpu_query_gpu_info(device_handle, &gpu_info)) {
		ret = -1;
		goto err_query_gpu_info;
	}

	props->FamilyID = gpu_info.family_id;
	props->Integrated = !!(gpu_info.ids_flags & AMDGPU_IDS_FLAGS_FUSION);

err_query_gpu_info:
	amdgpu_device_deinitialize(device_handle);
err_device_initialize:
	drmClose(drm_fd);
	return ret;
}

static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
						   HsaNodeProperties *props,
						   bool *p2p_links,
						   uint32_t *num_p2pLinks)
{
	FILE *fd;
	char *read_buf, *p, *envvar, dummy = '\0';
	char prop_name[256];
	char path[256];
	char per_node_override[32];
	unsigned long long prop_val = 0;
	uint32_t prog, major = 0, minor = 0, step = 0;
	int read_size;
	const struct hsa_gfxip_table *hsa_gfxip;
	uint32_t sys_node_id;
	uint32_t gfxv = 0;
	uint8_t gfxv_major, gfxv_minor, gfxv_stepping;
	uint32_t simd_arrays_count = 0;

	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

	assert(props);
	ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	/* Retrieve the GPU ID */
	ret = topology_sysfs_get_gpu_id(sys_node_id, &props->KFDGpuID);
	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	read_buf = malloc(PAGE_SIZE);
	if (!read_buf)
		return HSAKMT_STATUS_NO_MEMORY;

	/* Retrieve the node properties */
	snprintf(path, 256, KFD_SYSFS_PATH_NODES "/%d/properties", get_topology_dir(), sys_node_id);
	fd = fopen(path, "r");
	if (!fd) {
		free(read_buf);
		return HSAKMT_STATUS_ERROR;
	}

	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
	if (read_size <= 0) {
		ret = HSAKMT_STATUS_ERROR;
		goto out;
	}

	/* Since we're using the buffer as a string, we make sure the string terminates */
	if (read_size >= PAGE_SIZE)
		read_size = PAGE_SIZE - 1;
	read_buf[read_size] = 0;

	/* Read the node properties */
	prog = 0;
	p = read_buf;
	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
		if (strcmp(prop_name, "cpu_cores_count") == 0)
			props->NumCPUCores = (uint32_t)prop_val;
		else if (strcmp(prop_name, "simd_count") == 0)
			props->NumFComputeCores = (uint32_t)prop_val;
		else if (strcmp(prop_name, "mem_banks_count") == 0)
			props->NumMemoryBanks = (uint32_t)prop_val;
		else if (strcmp(prop_name, "caches_count") == 0)
			props->NumCaches = (uint32_t)prop_val;
		else if (strcmp(prop_name, "io_links_count") == 0)
			props->NumIOLinks = (uint32_t)prop_val;
		else if (strcmp(prop_name, "p2p_links_count") == 0) {
			props->NumIOLinks += (uint32_t)prop_val;
			if (num_p2pLinks)
				*num_p2pLinks = (uint32_t)prop_val;
			if (p2p_links)
				*p2p_links = true;
		} else if (strcmp(prop_name, "cpu_core_id_base") == 0)
			props->CComputeIdLo = (uint32_t)prop_val;
		else if (strcmp(prop_name, "simd_id_base") == 0)
			props->FComputeIdLo = (uint32_t)prop_val;
		else if (strcmp(prop_name, "capability") == 0)
			props->Capability.Value = (uint32_t)prop_val;
		else if (strcmp(prop_name, "capability2") == 0)
			props->Capability2.Value = (uint32_t)prop_val;
		else if (strcmp(prop_name, "debug_prop") == 0)
			props->DebugProperties.Value = (uint64_t)prop_val;
		else if (strcmp(prop_name, "max_waves_per_simd") == 0)
			props->MaxWavesPerSIMD = (uint32_t)prop_val;
		else if (strcmp(prop_name, "lds_size_in_kb") == 0)
			props->LDSSizeInKB = (uint32_t)prop_val;
		else if (strcmp(prop_name, "gds_size_in_kb") == 0)
			props->GDSSizeInKB = (uint32_t)prop_val;
		else if (strcmp(prop_name, "wave_front_size") == 0)
			props->WaveFrontSize = (uint32_t)prop_val;
		else if (strcmp(prop_name, "array_count") == 0)
			simd_arrays_count = (uint32_t)prop_val;
		else if (strcmp(prop_name, "simd_arrays_per_engine") == 0)
			props->NumArrays = (uint32_t)prop_val;
		else if (strcmp(prop_name, "cu_per_simd_array") == 0)
			props->NumCUPerArray = (uint32_t)prop_val;
		else if (strcmp(prop_name, "simd_per_cu") == 0)
			props->NumSIMDPerCU = (uint32_t)prop_val;
		else if (strcmp(prop_name, "max_slots_scratch_cu") == 0)
			props->MaxSlotsScratchCU = (uint32_t)prop_val;
		else if (strcmp(prop_name, "fw_version") == 0)
			props->EngineId.Value = (uint32_t)prop_val & 0x3ff;
		else if (strcmp(prop_name, "vendor_id") == 0)
			props->VendorId = (uint32_t)prop_val;
		else if (strcmp(prop_name, "device_id") == 0)
			props->DeviceId = (uint32_t)prop_val;
		else if (strcmp(prop_name, "location_id") == 0)
			props->LocationId = (uint32_t)prop_val;
		else if (strcmp(prop_name, "domain") == 0)
			props->Domain = (uint32_t)prop_val;
		else if (strcmp(prop_name, "max_engine_clk_fcompute") == 0)
			props->MaxEngineClockMhzFCompute = (uint32_t)prop_val;
		else if (strcmp(prop_name, "max_engine_clk_ccompute") == 0)
			props->MaxEngineClockMhzCCompute = (uint32_t)prop_val;
		else if (strcmp(prop_name, "local_mem_size") == 0)
			props->LocalMemSize = prop_val;
		else if (strcmp(prop_name, "drm_render_minor") == 0)
			props->DrmRenderMinor = (int32_t)prop_val;
		else if (strcmp(prop_name, "sdma_fw_version") == 0)
			props->uCodeEngineVersions.Value = (uint32_t)prop_val & 0x3ff;
		else if (strcmp(prop_name, "hive_id") == 0)
			props->HiveID = prop_val;
		else if (strcmp(prop_name, "unique_id") == 0)
			props->UniqueID = prop_val;
		else if (strcmp(prop_name, "num_sdma_engines") == 0)
			props->NumSdmaEngines = prop_val;
		else if (strcmp(prop_name, "num_sdma_xgmi_engines") == 0)
			props->NumSdmaXgmiEngines = prop_val;
		else if (strcmp(prop_name, "num_gws") == 0)
			props->NumGws = prop_val;
		else if (strcmp(prop_name, "num_sdma_queues_per_engine") == 0)
			props->NumSdmaQueuesPerEngine = prop_val;
		else if (strcmp(prop_name, "num_cp_queues") == 0)
			props->NumCpQueues = prop_val;
		else if (strcmp(prop_name, "num_xcc") == 0)
			props->NumXcc = prop_val;
		else if (strcmp(prop_name, "family_id") == 0)
			props->FamilyID = prop_val;
		else if (strcmp(prop_name, "gfx_target_version") == 0)
			gfxv = (uint32_t)prop_val;
	}

	if (!hsakmt_is_svm_api_supported)
		props->Capability.ui32.SVMAPISupported = 0;

	/* Bail out early, if a CPU node */
	if (!props->NumFComputeCores)
		goto out;

	if (props->NumArrays != 0)
		props->NumShaderBanks = simd_arrays_count/props->NumArrays;

	gfxv_major = HSA_GET_GFX_VERSION_MAJOR(gfxv);
	gfxv_minor = HSA_GET_GFX_VERSION_MINOR(gfxv);
	gfxv_stepping = HSA_GET_GFX_VERSION_STEP(gfxv);

	hsa_gfxip = find_hsa_gfxip_device(props->DeviceId, gfxv_major);
	if (hsa_gfxip || gfxv) {
		snprintf(per_node_override, sizeof(per_node_override), "HSA_OVERRIDE_GFX_VERSION_%d", node_id);
		if ((envvar = getenv(per_node_override)) || (envvar = getenv("HSA_OVERRIDE_GFX_VERSION"))) {
			/* HSA_OVERRIDE_GFX_VERSION=major.minor.stepping */
			if ((sscanf(envvar, "%u.%u.%u%c",
					&major, &minor, &step, &dummy) != 3) ||
				(major > 63 || minor > 255 || step > 255)) {
				pr_err("HSA_OVERRIDE_GFX_VERSION %s is invalid\n",
					envvar);
				ret = HSAKMT_STATUS_ERROR;
				goto out;
			}
			props->OverrideEngineId.ui32.Major = major & 0x3f;
			props->OverrideEngineId.ui32.Minor = minor & 0xff;
			props->OverrideEngineId.ui32.Stepping = step & 0xff;
		}

		if (hsa_gfxip) {
			props->EngineId.ui32.Major = hsa_gfxip->major & 0x3f;
			props->EngineId.ui32.Minor = hsa_gfxip->minor & 0xff;
			props->EngineId.ui32.Stepping = hsa_gfxip->stepping & 0xff;
		} else {
			props->EngineId.ui32.Major = gfxv_major & 0x3f;
			props->EngineId.ui32.Minor = gfxv_minor & 0xff;
			props->EngineId.ui32.Stepping = gfxv_stepping & 0xff;
		}

		/* Set the CAL name of the node. If DID-based hsa_gfxip lookup was
		 * successful, use that name. Otherwise, set to GFX<GFX_VERSION>.
		 */
		if (hsa_gfxip && hsa_gfxip->amd_name)
			strncpy((char *)props->AMDName, hsa_gfxip->amd_name,
					sizeof(props->AMDName)-1);
		else
			snprintf((char *)props->AMDName, sizeof(props->AMDName)-1, "GFX%06x",
					HSA_GET_GFX_VERSION_FULL(props->EngineId.ui32));

		/* Is dGPU Node, not APU
		 * Retrieve the marketing name of the node.
		 */
		if (topology_get_node_props_from_drm(props))
			pr_info("failed to get marketing name for device ID 0x%x\n", props->DeviceId);

		/* Get VGPR/SGPR size in byte per CU */
		props->SGPRSizePerCU = SGPR_SIZE_PER_CU;
		props->VGPRSizePerCU = hsakmt_get_vgpr_size_per_cu(HSA_GET_GFX_VERSION_FULL(props->EngineId.ui32));

	} else if (props->DeviceId)
		/* still return success */
		pr_err("device ID 0x%x is not supported in libhsakmt\n",
				props->DeviceId);

	if (props->NumFComputeCores)
		assert(props->EngineId.ui32.Major && "HSA_OVERRIDE_GFX_VERSION may be needed");

	/* On Older kernels, num_xcc may not be present in system properties.
	 * Set it to 1 if system properties do not report num_xcc.
	 */
	if (!props->NumXcc)
		props->NumXcc = 1;

out:
	free(read_buf);
	fclose(fd);
	return ret;
}

static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
						  uint32_t mem_id,
						  HsaMemoryProperties *props)
{
	FILE *fd;
	char *read_buf, *p;
	char prop_name[256];
	char path[256];
	unsigned long long prop_val;
	uint32_t prog;
	int read_size;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	uint32_t sys_node_id;

	assert(props);
	ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	snprintf(path, 256, KFD_SYSFS_PATH_NODES "/%d/mem_banks/%d/properties", get_topology_dir(), sys_node_id, mem_id);
	fd = fopen(path, "r");
	if (!fd)
		return HSAKMT_STATUS_ERROR;
	read_buf = malloc(PAGE_SIZE);
	if (!read_buf) {
		ret = HSAKMT_STATUS_NO_MEMORY;
		goto err1;
	}

	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
	if (read_size <= 0) {
		ret = HSAKMT_STATUS_ERROR;
		goto err2;
	}

	/* Since we're using the buffer as a string, we make sure the string terminates */
	if (read_size >= PAGE_SIZE)
		read_size = PAGE_SIZE - 1;
	read_buf[read_size] = 0;

	prog = 0;
	p = read_buf;
	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
		if (strcmp(prop_name, "heap_type") == 0)
			props->HeapType = (uint32_t)prop_val;
		else if (strcmp(prop_name, "size_in_bytes") == 0)
			props->SizeInBytes = (uint64_t)prop_val;
		else if (strcmp(prop_name, "flags") == 0)
			props->Flags.MemoryProperty = (uint32_t)prop_val;
		else if (strcmp(prop_name, "width") == 0)
			props->Width = (uint32_t)prop_val;
		else if (strcmp(prop_name, "mem_clk_max") == 0)
			props->MemoryClockMax = (uint32_t)prop_val;
	}

err2:
	free(read_buf);
err1:
	fclose(fd);
	return ret;
}

/* topology_destroy_temp_cpu_cache_list -
 *	Free the memory allocated in topology_create_temp_cpu_cache_list().
 */
static void topology_destroy_temp_cpu_cache_list(
					cpu_cacheinfo_t *temp_cpu_ci_list)
{
	uint32_t n;
	cpu_cacheinfo_t *p_temp_cpu_ci_list = temp_cpu_ci_list;
	cpu_cacheinfo_t *cpu_ci = p_temp_cpu_ci_list;

	if (p_temp_cpu_ci_list) {
		for (n = 0; n < p_temp_cpu_ci_list->len; n++, cpu_ci++)
			free(cpu_ci->cache_prop);
		free(p_temp_cpu_ci_list);
	}

}

/* topology_create_temp_cpu_cache_list - Create a temporary cpu-cache list to
 *		store cpu cache information. This list will be used to copy
 *		HsaCacheProperties in the CPU node. Two buffers are allocated
 *		inside this function: cpu_ci list and cache_prop under each
 *		cpu_ci. Must call topology_destroy_temp_cpu_cache_list to free
 *		the memory after the information is copied.
 *	@node [IN] CPU node number
 *	@cpuinfo [IN] /proc/cpuinfo data
 *	@temp_cpu_ci_list [OUT] cpu-cache-info list with data filled
 * Return: total number of caches under this CPU node
 */
static int topology_create_temp_cpu_cache_list(int node,
	struct proc_cpuinfo *cpuinfo, cpu_cacheinfo_t **temp_cpu_ci_list)
{
	/* Get max path size from /sys/devices/system/node/node%d/%s/cache
	 * below, which will max out according to the largest filename,
	 * which can be present twice in the string above. 29 is for the prefix
	 * and the +6 is for the cache suffix
	 */
#ifndef MAXNAMLEN
/* MAXNAMLEN is the BSD name for NAME_MAX. glibc aliases this as NAME_MAX, but not musl */
#define MAXNAMLEN NAME_MAX
#endif
	const uint32_t MAXPATHSIZE = 29 + MAXNAMLEN + (MAXNAMLEN + 6);
	cpu_cacheinfo_t *p_temp_cpu_ci_list; /* a list of cpu_ci */
	char path[MAXPATHSIZE], node_dir[MAXPATHSIZE];
	int max_cpus;
	cpu_cacheinfo_t *this_cpu; /* one cpu_ci in cpu_ci_list */
	int cache_cnt = 0;
	DIR *dirp = NULL;
	struct dirent *dir;
	char *p;

	if (!temp_cpu_ci_list) {
		pr_err("Invalid temp_cpu_ci_list\n");
		goto exit;
	}
	*temp_cpu_ci_list = NULL;

	/* Get info from /sys/devices/system/node/nodeX/cpuY/cache */
	int node_real = node;
	if (processor_vendor == IBM_POWER) {
		if (!strcmp(cpuinfo[0].model_name, "POWER9")) {
			node_real = node * 8;
		}
	}
	snprintf(node_dir, MAXPATHSIZE, "/sys/devices/system/node/node%d", node_real);
	/* Other than cpuY folders, this dir also has cpulist and cpumap */
	max_cpus = num_subdirs(node_dir, "cpu");
	if (max_cpus <= 0) {
		/* If CONFIG_NUMA is not enabled in the kernel,
		 * /sys/devices/system/node doesn't exist.
		 */
		if (node) { /* CPU node must be 0 or something is wrong */
			pr_err("Fail to get cpu* dirs under %s.", node_dir);
			goto exit;
		}
		/* Fall back to use /sys/devices/system/cpu */
		snprintf(node_dir, MAXPATHSIZE, "/sys/devices/system/cpu");
		max_cpus = num_subdirs(node_dir, "cpu");
		if (max_cpus <= 0) {
			pr_err("Fail to get cpu* dirs under %s\n", node_dir);
			goto exit;
		}
	}

	p_temp_cpu_ci_list = calloc(max_cpus, sizeof(cpu_cacheinfo_t));
	if (!p_temp_cpu_ci_list) {
		pr_err("Fail to allocate p_temp_cpu_ci_list\n");
		goto exit;
	}
	p_temp_cpu_ci_list->len = 0;

	this_cpu = p_temp_cpu_ci_list;
	dirp = opendir(node_dir);
	while ((dir = readdir(dirp)) != 0) {
		if (strncmp(dir->d_name, "cpu", 3))
			continue;
		if (!isdigit(dir->d_name[3])) /* ignore files like cpulist */
			continue;
		snprintf(path, MAXPATHSIZE, "%s/%s/cache", node_dir, dir->d_name);
		this_cpu->num_caches = num_subdirs(path, "index");
		this_cpu->cache_prop = calloc(this_cpu->num_caches,
					sizeof(HsaCacheProperties));
		if (!this_cpu->cache_prop) {
			pr_err("Fail to allocate cache_info\n");
			goto exit;
		}
		p = &dir->d_name[3];
		this_cpu->proc_num = atoi(p);
		cache_cnt += get_cpu_cache_info(path, cpuinfo, this_cpu);
		++p_temp_cpu_ci_list->len;
		++this_cpu;
	}
	*temp_cpu_ci_list = p_temp_cpu_ci_list;

exit:
	if (dirp)
		closedir(dirp);
	return cache_cnt;
}

/* topology_get_cpu_cache_props - Read CPU cache information from sysfs
 *	@node [IN] CPU node number
 *	@cpuinfo [IN] /proc/cpuinfo data
 *	@tbl [OUT] the node table to fill up
 * Return: HSAKMT_STATUS_SUCCESS in success or error number in failure
 */
static HSAKMT_STATUS topology_get_cpu_cache_props(int node,
			struct proc_cpuinfo *cpuinfo, node_props_t *tbl)
{
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	cpu_cacheinfo_t *cpu_ci_list = NULL;
	uint32_t n, cache_cnt, i;
	cpu_cacheinfo_t *cpu_ci;
	HsaCacheProperties *this_cache;

	tbl->node.NumCaches = topology_create_temp_cpu_cache_list(
					node, cpuinfo, &cpu_ci_list);
	if (!tbl->node.NumCaches) {
		/* For "Intel Meteor lake Mobile", the cache info is not in sysfs,
		 * That means /sys/devices/system/node/node%d/%s/cache is not exist.
		 * here AMD will not black this issue.
		 */
		pr_debug("CPU cache info is not available for node %d \n", node);
		goto exit;
	}

	tbl->cache = calloc(tbl->node.NumCaches, sizeof(HsaCacheProperties));
	if (!tbl->cache) {
		ret = HSAKMT_STATUS_NO_MEMORY;
		goto exit;
	}

	/* Now fill in the information to cache properties. */
	cache_cnt = 0;
	cpu_ci = cpu_ci_list;
	for (n = 0; n < cpu_ci_list->len; n++, cpu_ci++) {
		this_cache = cpu_ci->cache_prop;
		for (i = 0; i < cpu_ci->num_caches; i++, this_cache++) {
			memcpy(&tbl->cache[cache_cnt++],
			       this_cache,
			       sizeof(HsaCacheProperties));
			if (cache_cnt >= tbl->node.NumCaches)
				goto exit;
		}
	}

exit:
	topology_destroy_temp_cpu_cache_list(cpu_ci_list);

	return ret;
}

static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id,
						    uint32_t cache_id,
						    HsaCacheProperties *props)
{
	FILE *fd;
	char *read_buf, *p;
	char prop_name[256];
	char path[256];
	unsigned long long prop_val;
	uint32_t i, prog;
	int read_size;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	uint32_t sys_node_id;

	assert(props);
	ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	snprintf(path, 256, KFD_SYSFS_PATH_NODES "/%d/caches/%d/properties", get_topology_dir(), sys_node_id, cache_id);
	fd = fopen(path, "r");
	if (!fd)
		return HSAKMT_STATUS_ERROR;
	read_buf = malloc(PAGE_SIZE);
	if (!read_buf) {
		ret = HSAKMT_STATUS_NO_MEMORY;
		goto err1;
	}

	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
	if (read_size <= 0) {
		ret = HSAKMT_STATUS_ERROR;
		goto err2;
	}

	/* Since we're using the buffer as a string, we make sure the string terminates */
	if (read_size >= PAGE_SIZE)
		read_size = PAGE_SIZE - 1;
	read_buf[read_size] = 0;

	prog = 0;
	p = read_buf;
	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
		if (strcmp(prop_name, "processor_id_low") == 0)
			props->ProcessorIdLow = (uint32_t)prop_val;
		else if (strcmp(prop_name, "level") == 0)
			props->CacheLevel = (uint32_t)prop_val;
		else if (strcmp(prop_name, "size") == 0)
			props->CacheSize = (uint32_t)prop_val;
		else if (strcmp(prop_name, "cache_line_size") == 0)
			props->CacheLineSize = (uint32_t)prop_val;
		else if (strcmp(prop_name, "cache_lines_per_tag") == 0)
			props->CacheLinesPerTag = (uint32_t)prop_val;
		else if (strcmp(prop_name, "association") == 0)
			props->CacheAssociativity = (uint32_t)prop_val;
		else if (strcmp(prop_name, "latency") == 0)
			props->CacheLatency = (uint32_t)prop_val;
		else if (strcmp(prop_name, "type") == 0)
			props->CacheType.Value = (uint32_t)prop_val;
		else if (strcmp(prop_name, "sibling_map") == 0)
			break;
	}

	prog = 0;
	if ((sscanf(p, "sibling_map %n", &prog)) == 0 && prog) {
		i = 0;
		while ((i < HSA_CPU_SIBLINGS) &&
			(sscanf(p += prog, "%u%*[,\n]%n", &props->SiblingMap[i++], &prog) == 1))
			continue;
	}

err2:
	free(read_buf);
err1:
	fclose(fd);
	return ret;
}

static HSAKMT_STATUS topology_map_sysfs_to_user_node_id(uint32_t sys_node_id, uint32_t *user_node_id)
{
	uint32_t node_id;

	for (node_id = 0; node_id < map_user_to_sysfs_node_id_size; node_id++)
		if (map_user_to_sysfs_node_id[node_id] == sys_node_id) {
			*user_node_id = node_id;
			return HSAKMT_STATUS_SUCCESS;
		}
	return HSAKMT_STATUS_INVALID_NODE_UNIT;
}


/* For a give Node @node_id the function gets @iolink_id information i.e. parses sysfs the following sysfs entry
 * ./nodes/@node_id/io_links/@iolink_id/properties. @node_id has to be valid accessible node.
 *
 * If node_to specified by the @iolink_id is not accessible the function returns HSAKMT_STATUS_NOT_SUPPORTED.
 * If node_to is accessible, then node_to is mapped from sysfs_node to user_node and returns HSAKMT_STATUS_SUCCESS.
 */
static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id,
						     uint32_t iolink_id,
						     HsaIoLinkProperties *props, bool p2pLink)
{
	FILE *fd;
	char *read_buf, *p;
	char prop_name[256];
	char path[256];
	unsigned long long prop_val;
	uint32_t prog;
	int read_size;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	uint32_t sys_node_id;

	assert(props);
	ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	snprintf(path, 256, KFD_SYSFS_PATH_NODES "/%d/%s/%d/properties", get_topology_dir(), sys_node_id, p2pLink ? "p2p_links" : "io_links", iolink_id);

	fd = fopen(path, "r");
	if (!fd)
		return HSAKMT_STATUS_ERROR;
	read_buf = malloc(PAGE_SIZE);
	if (!read_buf) {
		ret = HSAKMT_STATUS_NO_MEMORY;
		goto err1;
	}

	read_size = fread(read_buf, 1, PAGE_SIZE, fd);
	if (read_size <= 0) {
		ret = (errno == EPERM) ? HSAKMT_STATUS_NOT_SUPPORTED :
					 HSAKMT_STATUS_ERROR;
		goto err2;
	}

	/* Since we're using the buffer as a string, we make sure the string terminates */
	if (read_size >= PAGE_SIZE)
		read_size = PAGE_SIZE - 1;
	read_buf[read_size] = 0;

	prog = 0;
	p = read_buf;
	while (sscanf(p += prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
		if (strcmp(prop_name, "type") == 0)
			props->IoLinkType = (uint32_t)prop_val;
		else if (strcmp(prop_name, "version_major") == 0)
			props->VersionMajor = (uint32_t)prop_val;
		else if (strcmp(prop_name, "version_minor") == 0)
			props->VersionMinor = (uint32_t)prop_val;
		else if (strcmp(prop_name, "node_from") == 0) {
			if (sys_node_id != (uint32_t)prop_val) {
				ret = HSAKMT_STATUS_INVALID_NODE_UNIT;
				goto err2;
			}
			props->NodeFrom = node_id;
		} else if (strcmp(prop_name, "node_to") == 0) {
			bool is_node_supported;
			uint32_t sysfs_node_id;

			sysfs_node_id = (uint32_t)prop_val;
			ret = topology_sysfs_check_node_supported(sysfs_node_id, &is_node_supported);
			if (!is_node_supported) {
				ret = HSAKMT_STATUS_NOT_SUPPORTED;
				memset(props, 0, sizeof(*props));
				goto err2;
			}
			ret = topology_map_sysfs_to_user_node_id(sysfs_node_id, &props->NodeTo);
			if (ret != HSAKMT_STATUS_SUCCESS)
				goto err2;
		} else if (strcmp(prop_name, "weight") == 0)
			props->Weight = (uint32_t)prop_val;
		else if (strcmp(prop_name, "min_latency") == 0)
			props->MinimumLatency = (uint32_t)prop_val;
		else if (strcmp(prop_name, "max_latency") == 0)
			props->MaximumLatency = (uint32_t)prop_val;
		else if (strcmp(prop_name, "min_bandwidth") == 0)
			props->MinimumBandwidth = (uint32_t)prop_val;
		else if (strcmp(prop_name, "max_bandwidth") == 0)
			props->MaximumBandwidth = (uint32_t)prop_val;
		else if (strcmp(prop_name, "recommended_transfer_size") == 0)
			props->RecTransferSize = (uint32_t)prop_val;
		else if (strcmp(prop_name, "recommended_sdma_engine_id_mask") == 0)
			props->RecSdmaEngIdMask = (uint32_t)prop_val;
		else if (strcmp(prop_name, "flags") == 0)
			props->Flags.LinkProperty = (uint32_t)prop_val;
	}


err2:
	free(read_buf);
err1:
	fclose(fd);
	return ret;
}

/* topology_get_free_io_link_slot_for_node - For the given node_id, find the
 * next available free slot to add an io_link
 */
static HsaIoLinkProperties *topology_get_free_io_link_slot_for_node(uint32_t node_id,
								    const HsaSystemProperties *sys_props,
								    node_props_t *node_props)
{
	HsaIoLinkProperties *props;

	if (node_id >= sys_props->NumNodes) {
		pr_err("Invalid node [%d]\n", node_id);
		return NULL;
	}

	props = node_props[node_id].link;
	if (!props) {
		pr_err("No io_link reported for Node [%d]\n", node_id);
		return NULL;
	}

	if (node_props[node_id].node.NumIOLinks >= sys_props->NumNodes - 1) {
		pr_err("No more space for io_link for Node [%d]\n", node_id);
		return NULL;
	}

	return &props[node_props[node_id].node.NumIOLinks];
}

/* topology_add_io_link_for_node - If a free slot is available,
 * add io_link for the given Node.
 * TODO: Add other members of HsaIoLinkProperties
 */
static HSAKMT_STATUS topology_add_io_link_for_node(uint32_t node_from,
						   const HsaSystemProperties *sys_props,
						   node_props_t *node_props,
						   HSA_IOLINKTYPE IoLinkType,
						   uint32_t node_to,
						   uint32_t Weight)
{
	HsaIoLinkProperties *props;

	props = topology_get_free_io_link_slot_for_node(node_from,
			sys_props, node_props);
	if (!props)
		return HSAKMT_STATUS_NO_MEMORY;

	props->IoLinkType = IoLinkType;
	props->NodeFrom = node_from;
	props->NodeTo = node_to;
	props->Weight = Weight;
	node_props[node_from].node.NumIOLinks++;

	return HSAKMT_STATUS_SUCCESS;
}

/* Find the CPU that this GPU (gpu_node) directly connects to */
static int32_t gpu_get_direct_link_cpu(uint32_t gpu_node, node_props_t *node_props)
{
	HsaIoLinkProperties *props = node_props[gpu_node].link;
	uint32_t i;

	if (!node_props[gpu_node].node.KFDGpuID || !props ||
			node_props[gpu_node].node.NumIOLinks == 0)
		return -1;

	for (i = 0; i < node_props[gpu_node].node.NumIOLinks; i++)
		if ((props[i].IoLinkType == HSA_IOLINKTYPE_PCIEXPRESS || props[i].IoLinkType == HSA_IOLINK_TYPE_XGMI) &&
			props[i].Weight <= 20) /* >20 is GPU->CPU->GPU */{
			if (!node_props[props[i].NodeTo].node.KFDGpuID)
				return props[i].NodeTo;
		}

	return -1;
}

/* Get node1->node2 IO link information. This should be a direct link that has
 * been created in the kernel.
 */
static HSAKMT_STATUS get_direct_iolink_info(uint32_t node1, uint32_t node2,
					    node_props_t *node_props, HSAuint32 *weight,
					    HSA_IOLINKTYPE *type)
{
	HsaIoLinkProperties *props = node_props[node1].link;
	uint32_t i;

	if (!props)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;

	for (i = 0; i < node_props[node1].node.NumIOLinks; i++)
		if (props[i].NodeTo == node2) {
			if (weight)
				*weight = props[i].Weight;
			if (type)
				*type = props[i].IoLinkType;
			return HSAKMT_STATUS_SUCCESS;
		}

	return HSAKMT_STATUS_INVALID_PARAMETER;
}

static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2,
					      node_props_t *node_props, HSAuint32 *weight,
					      HSA_IOLINKTYPE *type)
{
	int32_t dir_cpu1 = -1, dir_cpu2 = -1;
	HSAuint32 weight1 = 0, weight2 = 0, weight3 = 0;
	HSAKMT_STATUS ret;
	uint32_t i;

	*weight = 0;
	*type = HSA_IOLINKTYPE_UNDEFINED;

	if (node1 == node2)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	/* CPU->CPU is not an indirect link */
	if (!node_props[node1].node.KFDGpuID && !node_props[node2].node.KFDGpuID)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;

	if (node_props[node1].node.HiveID &&
	    node_props[node2].node.HiveID &&
	    node_props[node1].node.HiveID == node_props[node2].node.HiveID)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	if (node_props[node1].node.KFDGpuID)
		dir_cpu1 = gpu_get_direct_link_cpu(node1, node_props);
	if (node_props[node2].node.KFDGpuID)
		dir_cpu2 = gpu_get_direct_link_cpu(node2, node_props);

	if (dir_cpu1 < 0 && dir_cpu2 < 0)
		return HSAKMT_STATUS_ERROR;

	/* if the node2(dst) is GPU , it need to be large bar for host access*/
	if (node_props[node2].node.KFDGpuID) {
		for (i = 0; i < node_props[node2].node.NumMemoryBanks; ++i)
			if (node_props[node2].mem[i].HeapType ==
				HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC)
				break;
		if (i >=  node_props[node2].node.NumMemoryBanks)
			return HSAKMT_STATUS_ERROR;
	}
	/* Possible topology:
	 *   GPU --(weight1) -- CPU -- (weight2) -- GPU
	 *   GPU --(weight1) -- CPU -- (weight2) -- CPU -- (weight3) -- GPU
	 *   GPU --(weight1) -- CPU -- (weight2) -- CPU
	 *   CPU -- (weight2) -- CPU -- (weight3) -- GPU
	 */
	if (dir_cpu1 >= 0) { /* GPU->CPU ... */
		if (dir_cpu2 >= 0) {
			if (dir_cpu1 == dir_cpu2) /* GPU->CPU->GPU*/ {
				ret = get_direct_iolink_info(node1, dir_cpu1,
						node_props, &weight1, NULL);
				if (ret != HSAKMT_STATUS_SUCCESS)
					return ret;
				ret = get_direct_iolink_info(dir_cpu1, node2,
						node_props, &weight2, type);
			} else /* GPU->CPU->CPU->GPU*/ {
				ret = get_direct_iolink_info(node1, dir_cpu1,
						node_props, &weight1, NULL);
				if (ret != HSAKMT_STATUS_SUCCESS)
					return ret;
				ret = get_direct_iolink_info(dir_cpu1, dir_cpu2,
						node_props, &weight2, type);
				if (ret != HSAKMT_STATUS_SUCCESS)
					return ret;
				/* On QPI interconnection, GPUs can't access
				 * each other if they are attached to different
				 * CPU sockets. CPU<->CPU weight larger than 20
				 * means the two CPUs are in different sockets.
				 */
				if (*type == HSA_IOLINK_TYPE_QPI_1_1
					&& weight2 > 20)
					return HSAKMT_STATUS_NOT_SUPPORTED;
				ret = get_direct_iolink_info(dir_cpu2, node2,
						node_props, &weight3, NULL);
			}
		} else /* GPU->CPU->CPU */ {
			ret = get_direct_iolink_info(node1, dir_cpu1, node_props,
							&weight1, NULL);
			if (ret != HSAKMT_STATUS_SUCCESS)
				return ret;
			ret = get_direct_iolink_info(dir_cpu1, node2, node_props,
							&weight2, type);
		}
	} else { /* CPU->CPU->GPU */
		ret = get_direct_iolink_info(node1, dir_cpu2, node_props, &weight2,
					type);
		if (ret != HSAKMT_STATUS_SUCCESS)
			return ret;
		ret = get_direct_iolink_info(dir_cpu2, node2, node_props, &weight3,
						NULL);
	}

	if (ret != HSAKMT_STATUS_SUCCESS)
		return ret;

	*weight = weight1 + weight2 + weight3;
	return HSAKMT_STATUS_SUCCESS;
}

static void topology_create_indirect_gpu_links(const HsaSystemProperties *sys_props,
					       node_props_t *node_props)
{

	uint32_t i, j;
	HSAuint32 weight;
	HSA_IOLINKTYPE type;

	for (i = 0; i < sys_props->NumNodes - 1; i++) {
		for (j = i + 1; j < sys_props->NumNodes; j++) {
			get_indirect_iolink_info(i, j, node_props, &weight, &type);
			if (!weight)
				goto try_alt_dir;
			if (topology_add_io_link_for_node(i, sys_props, node_props,
				type, j, weight) != HSAKMT_STATUS_SUCCESS)
				pr_err("Fail to add IO link %d->%d\n", i, j);
try_alt_dir:
			get_indirect_iolink_info(j, i, node_props, &weight, &type);
			if (!weight)
				continue;
			if (topology_add_io_link_for_node(j, sys_props, node_props,
				type, i, weight) != HSAKMT_STATUS_SUCCESS)
				pr_err("Fail to add IO link %d->%d\n", j, i);
		}
	}
}

HSAKMT_STATUS topology_take_snapshot(void)
{
	uint32_t gen_start, gen_end, i, mem_id, cache_id;
	HsaSystemProperties sys_props;
	node_props_t *temp_props = 0;
	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
	struct proc_cpuinfo *cpuinfo;
	const uint32_t num_procs = get_nprocs();
	uint32_t num_ioLinks;
	bool p2p_links = false;
	uint32_t num_p2pLinks = 0;

	cpuinfo = calloc(num_procs, sizeof(struct proc_cpuinfo));
	if (!cpuinfo) {
		pr_err("Fail to allocate memory for CPU info\n");
		return HSAKMT_STATUS_NO_MEMORY;
	}
	topology_parse_cpuinfo(cpuinfo, num_procs);

retry:
	ret = topology_sysfs_get_generation(&gen_start);
	if (ret != HSAKMT_STATUS_SUCCESS)
		goto err;
	ret = hsakmt_topology_sysfs_get_system_props(&sys_props);
	if (ret != HSAKMT_STATUS_SUCCESS)
		goto err;
	if (sys_props.NumNodes > 0) {
		temp_props = calloc(sys_props.NumNodes * sizeof(node_props_t), 1);
		if (!temp_props) {
			ret = HSAKMT_STATUS_NO_MEMORY;
			goto err;
		}
		for (i = 0; i < sys_props.NumNodes; i++) {
			ret = topology_sysfs_get_node_props(i,
					&temp_props[i].node,
					&p2p_links, &num_p2pLinks);
			if (ret != HSAKMT_STATUS_SUCCESS) {
				free_properties(temp_props, i);
				goto err;
			}

			if (temp_props[i].node.NumCPUCores)
				topology_get_cpu_model_name(&temp_props[i].node,
							cpuinfo, num_procs);

			if (temp_props[i].node.NumMemoryBanks) {
				temp_props[i].mem = calloc(temp_props[i].node.NumMemoryBanks * sizeof(HsaMemoryProperties), 1);
				if (!temp_props[i].mem) {
					ret = HSAKMT_STATUS_NO_MEMORY;
					free_properties(temp_props, i + 1);
					goto err;
				}
				for (mem_id = 0; mem_id < temp_props[i].node.NumMemoryBanks; mem_id++) {
					ret = topology_sysfs_get_mem_props(i, mem_id, &temp_props[i].mem[mem_id]);
					if (ret != HSAKMT_STATUS_SUCCESS) {
						free_properties(temp_props, i + 1);
						goto err;
					}
				}
			}

			if (temp_props[i].node.NumCaches) {
				temp_props[i].cache = calloc(temp_props[i].node.NumCaches * sizeof(HsaCacheProperties), 1);
				if (!temp_props[i].cache) {
					ret = HSAKMT_STATUS_NO_MEMORY;
					free_properties(temp_props, i + 1);
					goto err;
				}
				for (cache_id = 0; cache_id < temp_props[i].node.NumCaches; cache_id++) {
					ret = topology_sysfs_get_cache_props(i, cache_id, &temp_props[i].cache[cache_id]);
					if (ret != HSAKMT_STATUS_SUCCESS) {
						free_properties(temp_props, i + 1);
						goto err;
					}
				}
			} else if (!temp_props[i].node.KFDGpuID) { /* a CPU node */
				ret = topology_get_cpu_cache_props(
						i, cpuinfo, &temp_props[i]);
				if (ret != HSAKMT_STATUS_SUCCESS) {
					free_properties(temp_props, i + 1);
					goto err;
				}
			}

			/* To simplify, allocate maximum needed memory for io_links for each node. This
			 * removes the need for realloc when indirect and QPI links are added later
			 */
			temp_props[i].link = calloc(sys_props.NumNodes - 1, sizeof(HsaIoLinkProperties));
			if (!temp_props[i].link) {
				ret = HSAKMT_STATUS_NO_MEMORY;
				free_properties(temp_props, i + 1);
				goto err;
			}
			num_ioLinks = temp_props[i].node.NumIOLinks - num_p2pLinks;
			uint32_t link_id = 0;

			if (num_ioLinks) {
				uint32_t sys_link_id = 0;

				/* Parse all the sysfs specified io links. Skip the ones where the
				 * remote node (node_to) is not accessible
				 */
				while (sys_link_id < num_ioLinks &&
					link_id < sys_props.NumNodes - 1) {
					ret = topology_sysfs_get_iolink_props(i, sys_link_id++,
								&temp_props[i].link[link_id], false);
					if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
						continue;
					} else if (ret != HSAKMT_STATUS_SUCCESS) {
						free_properties(temp_props, i + 1);
						goto err;
					}
					link_id++;
				}
				/* sysfs specifies all the io links. Limit the number to valid ones */
				temp_props[i].node.NumIOLinks = link_id;
			}

			if (num_p2pLinks) {
				uint32_t sys_link_id = 0;

				/* Parse all the sysfs specified p2p links.
				 */
				while (sys_link_id < num_p2pLinks &&
					link_id < sys_props.NumNodes - 1) {
					ret = topology_sysfs_get_iolink_props(i, sys_link_id++,
								&temp_props[i].link[link_id], true);
					if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
						continue;
					} else if (ret != HSAKMT_STATUS_SUCCESS) {
						free_properties(temp_props, i + 1);
						goto err;
					}
					link_id++;
				}
				temp_props[i].node.NumIOLinks = link_id;
			}
		}
	}

	if (!p2p_links) {
		/* All direct IO links are created in the kernel. Here we need to
		 * connect GPU<->GPU or GPU<->CPU indirect IO links.
		 */
		topology_create_indirect_gpu_links(&sys_props, temp_props);
	}

	ret = topology_sysfs_get_generation(&gen_end);
	if (ret != HSAKMT_STATUS_SUCCESS) {
		free_properties(temp_props, sys_props.NumNodes);
		goto err;
	}

	if (gen_start != gen_end) {
		free_properties(temp_props, sys_props.NumNodes);
		temp_props = 0;
		goto retry;
	}

	if (!g_system) {
		g_system = malloc(sizeof(HsaSystemProperties));
		if (!g_system) {
			free_properties(temp_props, sys_props.NumNodes);
			ret = HSAKMT_STATUS_NO_MEMORY;
			goto err;
		}
	}

	*g_system = sys_props;
	if (g_props)
		free(g_props);
	g_props = temp_props;
err:
	free(cpuinfo);
	return ret;
}

/* Drop the Snapshot of the HSA topology information. Assume lock is held. */
void topology_drop_snapshot(void)
{
	if (!!g_system != !!g_props)
		pr_warn("Probably inconsistency?\n");

	if (g_props) {
		/* Remove state */
		free_properties(g_props, g_system->NumNodes);
		g_props = NULL;
	}

	free(g_system);
	g_system = NULL;

	if (map_user_to_sysfs_node_id) {
		free(map_user_to_sysfs_node_id);
		map_user_to_sysfs_node_id = NULL;
		map_user_to_sysfs_node_id_size = 0;
	}
}

HSAKMT_STATUS hsakmt_validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
{
	if (!g_props || !g_system || g_system->NumNodes <= nodeid)
		return HSAKMT_STATUS_INVALID_NODE_UNIT;
	if (gpu_id)
		*gpu_id = g_props[nodeid].node.KFDGpuID;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
{
	uint64_t node_idx;

	for (node_idx = 0; node_idx < g_system->NumNodes; node_idx++) {
		if (g_props[node_idx].node.KFDGpuID == gpu_id) {
			*node_id = node_idx;
			return HSAKMT_STATUS_SUCCESS;
		}
	}

	return HSAKMT_STATUS_INVALID_NODE_UNIT;

}

HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
{
	HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;

	CHECK_KFD_OPEN();

	if (!SystemProperties)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	pthread_mutex_lock(&hsakmt_mutex);

	/* We already have a valid snapshot. Avoid double initialization that
	 * would leak memory.
	 */
	if (g_system) {
		*SystemProperties = *g_system;
		goto out;
	}

	err = topology_take_snapshot();
	if (err != HSAKMT_STATUS_SUCCESS)
		goto out;

	assert(g_system);

	if (hsakmt_use_model)
		model_init();

	err = hsakmt_fmm_init_process_apertures(g_system->NumNodes);
	if (err != HSAKMT_STATUS_SUCCESS)
		goto init_process_apertures_failed;

	err = hsakmt_init_process_doorbells(g_system->NumNodes);
	if (err != HSAKMT_STATUS_SUCCESS)
		goto init_doorbells_failed;

	*SystemProperties = *g_system;

	goto out;

init_doorbells_failed:
	hsakmt_fmm_destroy_process_apertures();
init_process_apertures_failed:
	topology_drop_snapshot();

out:
	pthread_mutex_unlock(&hsakmt_mutex);
	return err;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void)
{
	pthread_mutex_lock(&hsakmt_mutex);

	hsakmt_destroy_process_doorbells();
	hsakmt_fmm_destroy_process_apertures();
	topology_drop_snapshot();

	pthread_mutex_unlock(&hsakmt_mutex);

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
				      HsaNodeProperties *NodeProperties)
{
	if (!g_system || !g_props || NodeId >= g_system->NumNodes)
		return HSAKMT_STATUS_ERROR;

	*NodeProperties = g_props[NodeId].node;
	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
						HsaNodeProperties *NodeProperties)
{
	HSAKMT_STATUS err;
	uint32_t gpu_id;

	if (!NodeProperties)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	CHECK_KFD_OPEN();
	pthread_mutex_lock(&hsakmt_mutex);

	err = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (err != HSAKMT_STATUS_SUCCESS)
		goto out;

	err = hsakmt_topology_get_node_props(NodeId, NodeProperties);
	if (err != HSAKMT_STATUS_SUCCESS)
		goto out;
	/* For CPU only node don't add any additional GPU memory banks. */
	if (gpu_id) {
		uint64_t base, limit;
		if (hsakmt_is_dgpu)
			NodeProperties->NumMemoryBanks += NUM_OF_DGPU_HEAPS;
		else
			NodeProperties->NumMemoryBanks += NUM_OF_IGPU_HEAPS;
		if (hsakmt_fmm_get_aperture_base_and_limit(FMM_MMIO, gpu_id, &base,
				&limit) == HSAKMT_STATUS_SUCCESS)
			NodeProperties->NumMemoryBanks += 1;
	}

out:
	pthread_mutex_unlock(&hsakmt_mutex);
	return err;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
						      HSAuint32 NumBanks,
						      HsaMemoryProperties *MemoryProperties)
{
	HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
	uint32_t i, gpu_id;
	HSAuint64 aperture_limit;

	if (!MemoryProperties)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	CHECK_KFD_OPEN();
	pthread_mutex_lock(&hsakmt_mutex);

	err = hsakmt_validate_nodeid(NodeId, &gpu_id);
	if (err != HSAKMT_STATUS_SUCCESS)
		goto out;

	memset(MemoryProperties, 0, NumBanks * sizeof(HsaMemoryProperties));

	for (i = 0; i < MIN(g_props[NodeId].node.NumMemoryBanks, NumBanks); i++) {
		assert(g_props[NodeId].mem);
		MemoryProperties[i] = g_props[NodeId].mem[i];
	}

	/* The following memory banks does not apply to CPU only node */
	if (gpu_id == 0)
		goto out;

	/*Add LDS*/
	if (i < NumBanks &&
		hsakmt_fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id,
				&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
		MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
		MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LDSSizeInKB * 1024;
		i++;
	}

	/* Add Local memory - HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE.
	 * For dGPU the topology node contains Local Memory and it is added by
	 * the for loop above
	 */
	if (hsakmt_get_gfxv_by_node_id(NodeId) == GFX_VERSION_KAVERI && i < NumBanks &&
		g_props[NodeId].node.LocalMemSize > 0 &&
		hsakmt_fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id,
				&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
		MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
		MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LocalMemSize;
		i++;
	}

	/* Add SCRATCH */
	if (i < NumBanks &&
		hsakmt_fmm_get_aperture_base_and_limit(FMM_SCRATCH, gpu_id,
				&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
		MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_SCRATCH;
		MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
		i++;
	}

	/* Add SVM aperture */
	if (hsakmt_topology_is_svm_needed(g_props[NodeId].node.EngineId) && i < NumBanks &&
	    hsakmt_fmm_get_aperture_base_and_limit(
		    FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
		    &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
		MemoryProperties[i].HeapType = HSA_HEAPTYPE_DEVICE_SVM;
		MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
		i++;
	}

	/* Add mmio aperture */
	if (i < NumBanks &&
		hsakmt_fmm_get_aperture_base_and_limit(FMM_MMIO, gpu_id,
				&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
		MemoryProperties[i].HeapType = HSA_HEAPTYPE_MMIO_REMAP;
		MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
		i++;
	}

out:
	pthread_mutex_unlock(&hsakmt_mutex);
	return err;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId,
						     HSAuint32 ProcessorId,
						     HSAuint32 NumCaches,
						     HsaCacheProperties *CacheProperties)
{
	HSAKMT_STATUS err;
	uint32_t i;

	if (!CacheProperties)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	CHECK_KFD_OPEN();
	pthread_mutex_lock(&hsakmt_mutex);

	/* KFD ADD page 18, snapshot protocol violation */
	if (!g_system || NodeId >= g_system->NumNodes) {
		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
		goto out;
	}

	if (NumCaches > g_props[NodeId].node.NumCaches) {
		err = HSAKMT_STATUS_INVALID_PARAMETER;
		goto out;
	}

	for (i = 0; i < MIN(g_props[NodeId].node.NumCaches, NumCaches); i++) {
		assert(g_props[NodeId].cache);
		CacheProperties[i] = g_props[NodeId].cache[i];
	}

	err = HSAKMT_STATUS_SUCCESS;

out:
	pthread_mutex_unlock(&hsakmt_mutex);
	return err;
}

HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
					HSAuint32 NumIoLinks,
					HsaIoLinkProperties *IoLinkProperties)
{
	if (!g_system || !g_props || NodeId >= g_system->NumNodes)
		return HSAKMT_STATUS_ERROR;

	memcpy(IoLinkProperties, g_props[NodeId].link,
	       NumIoLinks * sizeof(*IoLinkProperties));

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId,
						      HSAuint32 NumIoLinks,
						      HsaIoLinkProperties *IoLinkProperties)
{
	HSAKMT_STATUS err;

	if (!IoLinkProperties)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	CHECK_KFD_OPEN();

	pthread_mutex_lock(&hsakmt_mutex);

	/* KFD ADD page 18, snapshot protocol violation */
	if (!g_system || NodeId >= g_system->NumNodes ) {
		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
		goto out;
	}

	if (NumIoLinks > g_props[NodeId].node.NumIOLinks) {
		err = HSAKMT_STATUS_INVALID_PARAMETER;
		goto out;
	}

	assert(g_props[NodeId].link);
	err = hsakmt_topology_get_iolink_props(NodeId, NumIoLinks, IoLinkProperties);

out:
	pthread_mutex_unlock(&hsakmt_mutex);
	return err;
}

uint32_t hsakmt_get_gfxv_by_node_id(HSAuint32 node_id)
{
	return HSA_GET_GFX_VERSION_FULL(g_props[node_id].node.EngineId.ui32);
}

uint16_t hsakmt_get_device_id_by_node_id(HSAuint32 node_id)
{
	if (!g_props || !g_system || g_system->NumNodes <= node_id)
		return 0;

	return g_props[node_id].node.DeviceId;
}

bool hsakmt_prefer_ats(HSAuint32 node_id)
{
	return g_props[node_id].node.Capability.ui32.HSAMMUPresent
			&& g_props[node_id].node.NumCPUCores
			&& g_props[node_id].node.NumFComputeCores;
}

uint16_t hsakmt_get_device_id_by_gpu_id(HSAuint32 gpu_id)
{
	unsigned int i;

	if (!g_props || !g_system)
		return 0;

	for (i = 0; i < g_system->NumNodes; i++) {
		if (g_props[i].node.KFDGpuID == gpu_id)
			return g_props[i].node.DeviceId;
	}

	return 0;
}

uint32_t hsakmt_get_direct_link_cpu(uint32_t gpu_node)
{
	HSAuint64 size = 0;
	int32_t cpu_id;
	HSAuint32 i;

	cpu_id = gpu_get_direct_link_cpu(gpu_node, g_props);
	if (cpu_id == -1)
		return INVALID_NODEID;

	assert(g_props[cpu_id].mem);

	for (i = 0; i < g_props[cpu_id].node.NumMemoryBanks; i++)
		size += g_props[cpu_id].mem[i].SizeInBytes;

	return size ? (uint32_t)cpu_id : INVALID_NODEID;
}


HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
		uint32_t NumberOfNodes, uint32_t *NodeArray)
{
	HSAKMT_STATUS ret;
	unsigned int i;

	if (NumberOfNodes == 0 || !NodeArray || !gpu_id_array)
		return HSAKMT_STATUS_INVALID_PARAMETER;

	/* Translate Node IDs to gpu_ids */
	*gpu_id_array = malloc(NumberOfNodes * sizeof(uint32_t));
	if (!(*gpu_id_array))
		return HSAKMT_STATUS_NO_MEMORY;
	for (i = 0; i < NumberOfNodes; i++) {
		ret = hsakmt_validate_nodeid(NodeArray[i], *gpu_id_array + i);
		if (ret != HSAKMT_STATUS_SUCCESS) {
			free(*gpu_id_array);
			break;
		}
	}

	return ret;
}

inline uint32_t hsakmt_get_num_sysfs_nodes(void)
{
	return num_sysfs_nodes;
}


================================================
FILE: libhsakmt/src/version.c
================================================
/*
 * Copyright © 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including
 * the next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "libhsakmt.h"
#include <stdlib.h>
#include <string.h>
#include "hsakmt/linux/kfd_ioctl.h"

HsaVersionInfo hsakmt_kfd_version_info;

HSAKMT_STATUS HSAKMTAPI hsaKmtGetVersion(HsaVersionInfo *VersionInfo)
{
	CHECK_KFD_OPEN();

	*VersionInfo = hsakmt_kfd_version_info;

	return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS hsakmt_init_kfd_version(void)
{
	struct kfd_ioctl_get_version_args args = {0};

	if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
		return HSAKMT_STATUS_ERROR;

	hsakmt_kfd_version_info.KernelInterfaceMajorVersion = args.major_version;
	hsakmt_kfd_version_info.KernelInterfaceMinorVersion = args.minor_version;

	if (args.major_version != 1)
		return HSAKMT_STATUS_DRIVER_MISMATCH;

	return HSAKMT_STATUS_SUCCESS;
}


================================================
FILE: libhsakmt/src/virtio/CMakeLists.txt
================================================

# Copyright 2025 Advanced Micro Devices, Inc.

# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

cmake_minimum_required ( VERSION 3.7 )

set (CMAKE_VERBOSE_MAKEFILE ON)

set ( HSAKMT_VIRTIO "hsakmt_virtio" )
set ( HSAKMT_VIRTIO_TARGET "${HSAKMT_VIRTIO}" )

project ( ${HSAKMT_VIRTIO_TARGET} VERSION 1.0)

## Compiler flags
set ( HSAKMT_VIRTIO_C_FLAGS -fPIC -W -Wall -Wextra -Wno-unused-parameter -Wformat-security -Wswitch-default -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wunreachable-code -std=gnu99 -fvisibility=hidden )
if ( CMAKE_COMPILER_IS_GNUCC )
    set ( HSAKMT_VIRTIO_C_FLAGS "${HSAKMT_VIRTIO_C_FLAGS}" -Wlogical-op )
endif ()

set ( HSAKMT_VIRTIO_LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/libhsakmt_virtio.ver" )

set ( HSAKMT_VIRTIO_LINK_FLAGS "-Wl,--enable-new-dtags -Wl,--version-script=${HSAKMT_VIRTIO_LINKER_SCRIPT} -Wl,-z,nodelete")

if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
    set ( HSAKMT_VIRTIO_C_FLAGS "${HSAKMT_VIRTIO_C_FLAGS}" -O2 )
else ()
    set ( HSAKMT_VIRTIO_C_FLAGS "${HSAKMT_VIRTIO_C_FLAGS}" -g )
endif ()

set ( HSAKMT_VIRTIO_SRC "virtio_gpu.c"
                        "hsakmt_virtio_vm.c"
                        "hsakmt_virtio_device.c"
                        "hsakmt_virtio_memory.c"
                        "hsakmt_virtio_amdgpu.c"
                        "hsakmt_virtio_events.c"
                        "hsakmt_virtio_queues.c"
                        "hsakmt_virtio_topology.c"
                        "hsakmt_virtio_openclose.c"
                        "../rbtree.c" )

add_library ( ${HSAKMT_VIRTIO_TARGET} STATIC ${HSAKMT_VIRTIO_SRC} )

target_sources ( ${HSAKMT_VIRTIO_TARGET} PRIVATE ${HSAKMT_VIRTIO_SRC} )

target_compile_options ( ${HSAKMT_VIRTIO_TARGET} PRIVATE ${HSAKMT_VIRTIO_C_FLAGS} )

target_include_directories ( ${HSAKMT_VIRTIO_TARGET}
    PUBLIC
    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
    PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/virtio
    ${CMAKE_CURRENT_SOURCE_DIR}/../
    ${CMAKE_CURRENT_SOURCE_DIR}/../../include
    ${CMAKE_CURRENT_SOURCE_DIR}/include/linux )

set_property(TARGET ${HSAKMT_VIRTIO_TARGET} PROPERTY LINK_FLAGS ${HSAKMT_VIRTIO_LINK_FLAGS})

find_package ( PkgConfig )

## If environment variable DRM_DIR is set, the script
## will pick up the corresponding libraries from that path.
list ( PREPEND CMAKE_PREFIX_PATH "${DRM_DIR}" )

pkg_check_modules ( DRM REQUIRED IMPORTED_TARGET libdrm )
pkg_check_modules ( DRM_AMDGPU REQUIRED IMPORTED_TARGET libdrm_amdgpu )
target_include_directories ( ${HSAKMT_VIRTIO_TARGET} PRIVATE ${DRM_AMDGPU_INCLUDE_DIRS} )
target_include_directories ( ${HSAKMT_VIRTIO_TARGET} PRIVATE ${DRM_INCLUDE_DIRS} )

target_link_libraries ( ${HSAKMT_VIRTIO_TARGET}
    PRIVATE ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} pthread rt c ${CMAKE_DL_LIBS} )


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_amdgpu.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt/hsakmt_virtio.h"
#include "hsakmt_virtio_device.h"

int vamdgpu_query_gpu_info(amdgpu_device_handle handle, void* out) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GPU_INFO,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  int ret = vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  if (!ret) memcpy(out, &rsp->gpu_info, sizeof(struct amdgpu_gpu_info));

  return ret;
}

HSAKMT_STATUS vhsaKmtGetAMDGPUDeviceHandle(HSAuint32 NodeId, HsaAMDGPUDeviceHandle* DeviceHandle) {
  CHECK_VIRTIO_KFD_OPEN();

  return HSAKMT_STATUS_SUCCESS;
}


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_device.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt_virtio_device.h"

int vhsakmt_execbuf_cpu(vhsakmt_device_handle dev, struct vhsakmt_ccmd_req* req, const char* from) {
  return virtio_gpu_exec_cmd(dev->vgdev, req, true);
}

void* vhsakmt_alloc_rsp(vhsakmt_device_handle dev, struct vhsakmt_ccmd_req* req, uint32_t sz) {
  return virtio_gpu_alloc_rsp(dev->vgdev, req, sz);
}


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_device.h
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef HSAKMT_VIRTIO_DEVICE_H
#define HSAKMT_VIRTIO_DEVICE_H

#include "hsakmt_virtio_proto.h"
#include "rbtree.h"
#include "virtio_gpu.h"
#include <stdatomic.h>

#ifdef __cplusplus
extern "C" {
#endif

#define vhsakmt_atomic_inc_return(ptr) (atomic_fetch_add((ptr), 1) + 1)
#define vhsakmt_atomic_dec_return(ptr) (atomic_fetch_sub((ptr), 1) - 1)

#define VHSA_VPTR_TO_UINT64(vptr) ((uint64_t)(unsigned long)(vptr))
#define VHSA_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v))

extern int vhsakmt_debug_level;
#define vhsakmt_print(level, fmt, ...)                                                             \
  do {                                                                                             \
    if (level <= vhsakmt_debug_level) fprintf(stderr, fmt, ##__VA_ARGS__);                         \
  } while (0)
#define VHSAKMT_DEBUG_LEVEL_DEFAULT -1
#define VHSAKMT_DEBUG_LEVEL_ERR 3
#define VHSAKMT_DEBUG_LEVEL_WARNING 4
#define VHSAKMT_DEBUG_LEVEL_INFO 6
#define VHSAKMT_DEBUG_LEVEL_DEBUG 7
#define vhsa_err(fmt, ...) vhsakmt_print(VHSAKMT_DEBUG_LEVEL_ERR, fmt, ##__VA_ARGS__)
#define vhsa_warn(fmt, ...) vhsakmt_print(VHSAKMT_DEBUG_LEVEL_WARNING, fmt, ##__VA_ARGS__)
#define vhsa_info(fmt, ...) vhsakmt_print(VHSAKMT_DEBUG_LEVEL_INFO, fmt, ##__VA_ARGS__)
#define vhsa_debug(fmt, ...) vhsakmt_print(VHSAKMT_DEBUG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)

struct vhsakmt_device;
struct vhsakmt_bo;

typedef struct vhsakmt_device* vhsakmt_device_handle;
typedef struct vhsakmt_bo* vhsakmt_bo_handle;
typedef rbtree_node_t* bo_entry;

extern pthread_mutex_t dev_mutex;
extern vhsakmt_device_handle dev_list;

#define VHSA_BO_KFD_MEM 1 << 0 /* allocated from KFD (hsaKmtAllocMemory) */
#define VHSA_BO_USERPTR 1 << 1
#define VHSA_BO_QUEUE_BUFFER 1 << 2   /* allocated from KFD, but used for queue CMD submit */
#define VHSA_BO_QUEUE_DOORBELL 1 << 3 /* doorbell memory */
#define VHSA_BO_QUEUE_RW_PTR 1 << 4   /* queue read write ptr, from host map to guest*/
/* allocated from KFD, but used for AQL queue read write ptr */
#define VHSA_BO_QUEUE_AQL_RW_PTR 1 << 5
#define VHSA_BO_CLGL 1 << 6 /* CLGL memory, imported from mesa GL */
/* allocated from KFD, but is scratch memory, do not need map and unmap in ioctrl */
#define VHSA_BO_SCRATCH 1 << 7
#define VHSA_BO_QUEUE 1 << 8
#define VHSA_BO_EVENT 1 << 9
#define VHSA_BO_SCRATCH_MAP 1 << 10

#define VHSA_SDMA_NONE UINT32_MAX

#define CHECK_VIRTIO_KFD_OPEN()                                                                    \
  do {                                                                                             \
    if (dev_list == NULL) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;                       \
  } while (0)

struct vhsakmt_node {
  HsaNodeProperties node_props;
  void* doorbell_base;
  uint64_t scratch_start;
  uint64_t scratch_size;
};

struct vhsakmt_device {
  struct virtio_gpu_device* vgdev;
  int refcount;
  pthread_mutex_t bo_handles_mutex;
  rbtree_t bo_rbt;

  struct vhsakmt_bo* shmem_bo;

  uint32_t reqbuf_max;
  uint32_t next_blob_id;

  uint64_t vm_start;
  uint64_t vm_size;

  pthread_mutex_t vhsakmt_mutex;
  struct vhsakmt_node* vhsakmt_nodes;
  HsaSystemProperties* sys_props;
};

struct vhsakmt_bo {
  rbtree_node_t rbtn;
  struct vhsakmt_device* dev;

  int refcount;
  unsigned size;
  void* cpu_addr;
  void* host_addr;
  HsaMemFlags flags;
  uint32_t bo_type;
  uint32_t blob_id;
  pthread_mutex_t map_mutex;

  union {
    struct {
      uint32_t handle;
      uint32_t res_id;
      uint64_t offset;
      uint64_t alloc_size;
      int map_count;
    } real;
  };

  vHsaEvent* event;
  uint64_t queue_id;
  vhsakmt_bo_handle rw_bo;
  void* gl_meta_data;
};

/*hsakmt_virtio_memory.c*/
vhsakmt_bo_handle vhsakmt_entry_to_bo_handle(bo_entry e);
bo_entry vhsakmt_bo_handle_to_entry(vhsakmt_bo_handle bo);

void vhsakmt_insert_bo(vhsakmt_device_handle dev, vhsakmt_bo_handle bo, void* addr, uint64_t size);
void vhsakmt_remove_bo(vhsakmt_device_handle dev, vhsakmt_bo_handle bo);
vhsakmt_bo_handle vhsakmt_find_bo_by_addr(vhsakmt_device_handle dev, void* addr);
void* vhsakmt_gpu_va(vhsakmt_device_handle dev, void* va);

int vhsakmt_bo_cpu_unmap(vhsakmt_bo_handle bo);
int vhsakmt_bo_cpu_map(vhsakmt_bo_handle bo_handle, void** cpu, void* fixed_cpu);
int vhsakmt_create_mappable_blob_bo(vhsakmt_device_handle dev, size_t size, uint32_t blob_id,
                                    uint32_t bo_type, void* va_handle,
                                    vhsakmt_bo_handle* bo_handle);
int vhsakmt_bo_free(vhsakmt_device_handle dev, vhsakmt_bo_handle bo);
int vhsakmt_init_host_blob(vhsakmt_device_handle dev, size_t size, uint32_t blob_type,
                           uint32_t blob_flag, uint32_t blob_id, uint32_t bo_type, void* va_handle,
                           vhsakmt_bo_handle* bo_handle);

/*hsakmt_virtio_openclose.c*/
vhsakmt_device_handle vhsakmt_dev(void);

/*hsakmt_virtio_vm.c*/
void* vhsakmt_vm_start(void);
int vhsakmt_reserve_va(uint64_t start, uint64_t size);
void vhsakmt_dereserve_va(uint64_t start, uint64_t size);
void vhsakmt_set_scratch_area(vhsakmt_device_handle dev, uint32_t node, uint64_t start,
                              uint64_t size);
void vhsakmt_set_vm_area(vhsakmt_device_handle dev, uint64_t start, uint64_t size);
int vhsakmt_set_node_doorbell(vhsakmt_device_handle dev, uint32_t node, void* doorbell);
void* vhsakmt_node_doorbell(vhsakmt_device_handle dev, uint32_t node);
bool vhsakmt_is_scratch_mem(vhsakmt_device_handle dev, void* addr);
bool vhsakmt_is_userptr(vhsakmt_device_handle dev, void* addr);

/*hsakmt_virtio_device.c*/
int vhsakmt_execbuf_cpu(vhsakmt_device_handle dev, struct vhsakmt_ccmd_req* req, const char* from);
void* vhsakmt_alloc_rsp(vhsakmt_device_handle dev, struct vhsakmt_ccmd_req* req, uint32_t sz);

/*hsakmt_virtio_event.c*/
void* vhsakmt_event_host_handle(HsaEvent* h);

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_events.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt/hsakmt_virtio.h"
#include "hsakmt_virtio_device.h"

int vhsakmt_debug_level;

void* vhsakmt_event_host_handle(HsaEvent* h) { return (void*)((vHsaEvent*)h)->event_handle; }

static inline int32_t vhsakmt_event_res_id(HsaEvent* h) { return ((vHsaEvent*)h)->res_id; }

static inline vhsakmt_bo_handle vhsakmt_event_bo_handle(HsaEvent* h) {
  return (vhsakmt_bo_handle)((vHsaEvent*)h)->bo_handle;
}

static int vhsakmt_create_event_blob_bo(vhsakmt_device_handle dev, size_t size, uint32_t blob_id,
                                        vHsaEvent* vevent_handle, vhsakmt_bo_handle* bo_handle) {
  int r;

  r = vhsakmt_init_host_blob(dev, size, VIRTGPU_BLOB_MEM_HOST3D, 0, blob_id, VHSA_BO_EVENT,
                             (void*)vevent_handle->event_handle, bo_handle);
  if (r) return r;

  (*bo_handle)->event = vevent_handle;
  vevent_handle->bo_handle = (uint64_t)(*bo_handle);
  vevent_handle->res_id = (*bo_handle)->real.res_id;
  vhsakmt_insert_bo(dev, *bo_handle, vevent_handle, size);
  return r;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtCreateEvent(HsaEventDescriptor* EventDesc, _Bool ManualReset,
                                           _Bool IsSignaled, HsaEvent** Event) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_event_rsp* rsp;
  vhsakmt_bo_handle event_bo;
  vHsaEvent* e;
  int r;
  struct vhsakmt_ccmd_event_req req = {
      .hdr = VHSAKMT_CCMD(EVENT, sizeof(struct vhsakmt_ccmd_event_req)),
      .type = VHSAKMT_CCMD_EVENT_CREATE,
      .create_args.EventDesc = *EventDesc,
      .create_args.ManualReset = ManualReset,
      .create_args.IsSignaled = IsSignaled,
      .blob_id = vhsakmt_atomic_inc_return(&dev->next_blob_id),
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_event_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (rsp->ret) return rsp->ret;

  e = calloc(1, sizeof(vHsaEvent));
  if (!e) return -ENOMEM;

  memcpy(e, &rsp->vevent, sizeof(vHsaEvent));

  r = vhsakmt_create_event_blob_bo(dev, sizeof(vHsaEvent), req.blob_id, e, &event_bo);
  if (r) {
    free(e);
    return -ENOMEM;
  }

  *Event = (HsaEvent*)e;

  vhsa_debug(
      "%s: event addr: %p, hw123: %lx, %lx, %x, type: %d, id: %x, host handle: 0x%lx, res id: %d\n",
      __FUNCTION__, e, e->event.EventData.HWData1, e->event.EventData.HWData2,
      e->event.EventData.HWData3, e->event.EventData.EventType, e->event.EventId, e->event_handle,
      event_bo->real.res_id);

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtDestroyEvent(HsaEvent* Event) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_bo* bo;

  if (Event == NULL) return HSAKMT_STATUS_SUCCESS;

  bo = vhsakmt_event_bo_handle(Event);
  if (!bo) return HSAKMT_STATUS_SUCCESS;

  return vhsakmt_bo_free(dev, bo);
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtSetEvent(HsaEvent* Event) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_event_rsp* rsp;
  struct vhsakmt_ccmd_event_req req = {
      .hdr = VHSAKMT_CCMD(EVENT, sizeof(struct vhsakmt_ccmd_event_req)),
      .type = VHSAKMT_CCMD_EVENT_SET,
      .event_hanele = vhsakmt_event_host_handle(Event),
      .res_id = vhsakmt_event_res_id(Event),
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_event_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtResetEvent(HsaEvent* Event) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_event_rsp* rsp;
  struct vhsakmt_ccmd_event_req req = {
      .hdr = VHSAKMT_CCMD(EVENT, sizeof(struct vhsakmt_ccmd_event_req)),
      .type = VHSAKMT_CCMD_EVENT_RESET,
      .event_hanele = vhsakmt_event_host_handle(Event),
      .res_id = vhsakmt_event_res_id(Event),
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_event_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtQueryEventState(HsaEvent* Event) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_event_rsp* rsp;
  struct vhsakmt_ccmd_event_req req = {
      .hdr = VHSAKMT_CCMD(EVENT, sizeof(struct vhsakmt_ccmd_event_req)),
      .type = VHSAKMT_CCMD_EVENT_QUERY_STATE,
      .event_hanele = vhsakmt_event_host_handle(Event),
      .res_id = vhsakmt_event_res_id(Event),
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_event_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnMultipleEvents(HsaEvent* Events[], HSAuint32 NumEvents,
                                                    bool WaitOnAll, HSAuint32 Milliseconds) {
  return HSAKMT_STATUS_ERROR;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnEvent(HsaEvent* Event, HSAuint32 Milliseconds) {
  return vhsaKmtWaitOnMultipleEvents(&Event, 1, true, Milliseconds);
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnEvent_Ext(HsaEvent* Event, HSAuint32 Milliseconds,
                                               uint64_t* event_age) {
  return vhsaKmtWaitOnMultipleEvents(&Event, 1, true, Milliseconds);
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtWaitOnMultipleEvents_Ext(HsaEvent* Events[], HSAuint32 NumEvents,
                                                        bool WaitOnAll, HSAuint32 Milliseconds,
                                                        uint64_t* event_age) {
  return vhsaKmtWaitOnMultipleEvents(Events, NumEvents, WaitOnAll, Milliseconds);
}


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_memory.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt/hsakmt_virtio.h"
#include "hsakmt_virtio_device.h"

#define VHSA_GL_METADATA_MAX_SIZE (0x50)

vhsakmt_bo_handle vhsakmt_entry_to_bo_handle(bo_entry e) { return (vhsakmt_bo_handle)e; }
bo_entry vhsakmt_bo_handle_to_entry(vhsakmt_bo_handle bo) { return &bo->rbtn; }
static inline bool vhsakmt_is_mem_bo(vhsakmt_bo_handle bo) { return (!bo->queue_id && !bo->event); }

static bool vhsakmt_mappable(HsaMemFlags flags) { return (!flags.ui32.Scratch); }

static bool vhsakmt_bo_mappable(vhsakmt_bo_handle bo) { return vhsakmt_mappable(bo->flags); }

void vhsakmt_insert_bo(vhsakmt_device_handle dev, vhsakmt_bo_handle bo, void* addr, uint64_t size) {
  bo->rbtn.key.addr = (unsigned long)addr;
  bo->rbtn.key.size = (unsigned long)size;

  pthread_mutex_lock(&dev->bo_handles_mutex);
  hsakmt_rbtree_insert(&dev->bo_rbt, &bo->rbtn);
  pthread_mutex_unlock(&dev->bo_handles_mutex);
}

static void vhsakmt_remove_entry(vhsakmt_device_handle dev, bo_entry entry) {
  if (!entry) return;

  pthread_mutex_lock(&dev->bo_handles_mutex);
  hsakmt_rbtree_delete(&dev->bo_rbt, entry);
  pthread_mutex_unlock(&dev->bo_handles_mutex);
}

void vhsakmt_remove_bo(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) {
  bo_entry entry = vhsakmt_bo_handle_to_entry(bo);
  if (entry->key.addr == 0 && entry->key.size == 0) return;

  vhsakmt_remove_entry(dev, entry);
}

static bo_entry vhsakmt_rbt_search(vhsakmt_device_handle dev, void* addr) {
  vhsakmt_bo_handle bo;

  rbtree_key_t key = rbtree_key((uint64_t)addr, 0);
  pthread_mutex_lock(&dev->bo_handles_mutex);
  bo_entry n = rbtree_lookup_nearest(&dev->bo_rbt, &key, LKP_ADDR, RIGHT);
  pthread_mutex_unlock(&dev->bo_handles_mutex);
  if (n) {
    bo = vhsakmt_entry_to_bo_handle(n);
    if (bo->cpu_addr != addr) return NULL;
    return n;
  }

  return NULL;
}

static bo_entry vhsakmt_find_entry_by_addr(vhsakmt_device_handle dev, void* addr) {
  return vhsakmt_rbt_search(dev, addr);
}

vhsakmt_bo_handle vhsakmt_find_bo_by_addr(vhsakmt_device_handle dev, void* addr) {
  bo_entry entry = vhsakmt_find_entry_by_addr(dev, addr);

  if (entry) {
    vhsakmt_bo_handle bo = vhsakmt_entry_to_bo_handle(entry);
    if (!vhsakmt_is_mem_bo(bo)) return NULL;

    return bo;
  }

  return NULL;
}

void* vhsakmt_gpu_va(vhsakmt_device_handle dev, void* va) {
  if (!vhsakmt_is_userptr(dev, va)) return va;

  bo_entry entry = vhsakmt_find_entry_by_addr(dev, va);

  if (!entry) return NULL;

  return vhsakmt_entry_to_bo_handle(entry)->host_addr;
}

int vhsakmt_bo_cpu_map(vhsakmt_bo_handle bo, void** cpu, void* fixed_cpu) {
  int r;

  if (!vhsakmt_bo_mappable(bo)) return 0;

  pthread_mutex_lock(&bo->map_mutex);

  if (!bo->cpu_addr) {
    r = virtio_gpu_map_handle(bo->dev->vgdev, bo->real.handle, bo->size, cpu, fixed_cpu);
    if (r) {
      pthread_mutex_unlock(&bo->map_mutex);
      return r;
    }
    bo->cpu_addr = *cpu;
    atomic_fetch_add(&bo->real.map_count, 1);
  }
  pthread_mutex_unlock(&bo->map_mutex);

  return *cpu == MAP_FAILED;
}

int vhsakmt_bo_cpu_unmap(vhsakmt_bo_handle bo) {
  int r = 0;

  if (!vhsakmt_bo_mappable(bo)) return 0;

  pthread_mutex_lock(&bo->map_mutex);

  if (!bo->cpu_addr || bo->real.map_count == 0) {
    pthread_mutex_unlock(&bo->map_mutex);
    return 0;
  }

  if (vhsakmt_atomic_dec_return(&bo->real.map_count) <= 0) {
    if (bo->bo_type & VHSA_BO_KFD_MEM) {
      virtio_gpu_unmap(bo->cpu_addr, bo->size);
      vhsakmt_reserve_va(VHSA_VPTR_TO_UINT64(bo->cpu_addr), bo->size);
      bo->cpu_addr = NULL;
    }
  }

  pthread_mutex_unlock(&bo->map_mutex);
  return r;
}

static int vhsakmt_destroy_handle(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) {
  int r = virtio_gpu_destroy_handle(dev->vgdev, bo->real.handle);
  free(bo);

  return r;
}

int vhsakmt_init_host_blob(vhsakmt_device_handle dev, size_t size, uint32_t blob_type,
                           uint32_t blob_flag, uint32_t blob_id, uint32_t bo_type, void* va_handle,
                           vhsakmt_bo_handle* bo_handle) {
  int r;
  vhsakmt_bo_handle bo;
  struct drm_virtgpu_resource_create_blob args = {
      .blob_mem = blob_type,
      .size = size,
      .blob_id = blob_id,
      .blob_flags = blob_flag,
  };

  r = virtio_gpu_create_blob(dev->vgdev, &args);
  if (r) return -EINVAL;

  bo = calloc(1, sizeof(struct vhsakmt_bo));
  if (!bo) {
    virtio_gpu_destroy_handle(dev->vgdev, args.bo_handle);
    return -ENOMEM;
  }

  bo->dev = dev;
  bo->size = size;
  bo->real.alloc_size = size;
  bo->bo_type = bo_type;
  bo->host_addr = va_handle;
  pthread_mutex_init(&bo->map_mutex, NULL);
  atomic_store(&bo->real.map_count, 0);
  atomic_store(&bo->refcount, 1);
  bo->real.handle = args.bo_handle;

  virtio_gpu_res_id(dev->vgdev, bo->real.handle, &bo->real.res_id);

  *bo_handle = bo;
  return 0;
}

static int vhsakmt_init_userptr_blob(vhsakmt_device_handle dev, void* addr, size_t size,
                                     vhsakmt_bo_handle* bo_handle, uint64_t* offset) {
  int r;
  struct drm_virtgpu_resource_create_blob args = {
      .blob_mem = VIRTGPU_BLOB_MEM_HOST3D_GUEST,
      .blob_flags = VIRTGPU_BLOB_FLAG_USE_USERPTR,
      .size = size,
      .blob_id = vhsakmt_atomic_inc_return(&dev->next_blob_id),
      .blob_userptr = (uint64_t)addr,
  };

  r = virtio_gpu_create_blob(dev->vgdev, &args);
  if (r < 0) return r;

  vhsakmt_bo_handle userptr = calloc(1, sizeof(struct vhsakmt_bo));
  if (!userptr) {
    virtio_gpu_destroy_handle(dev->vgdev, args.bo_handle);
    return -ENOMEM;
  }

  userptr->dev = dev;
  userptr->size = size;
  userptr->real.alloc_size = size;
  userptr->bo_type = VHSA_BO_USERPTR;
  userptr->cpu_addr = addr;
  pthread_mutex_init(&userptr->map_mutex, NULL);
  atomic_store(&userptr->real.map_count, 0);
  atomic_store(&userptr->refcount, 1);
  userptr->real.handle = args.bo_handle;

  virtio_gpu_res_id(dev->vgdev, userptr->real.handle, &userptr->real.res_id);

  *bo_handle = userptr;
  *offset = args.offset;
  return r;
}

int vhsakmt_create_mappable_blob_bo(vhsakmt_device_handle dev, size_t size, uint32_t blob_id,
                                    uint32_t bo_type, void* va_handle,
                                    vhsakmt_bo_handle* bo_handle) {
  int r;

  r = vhsakmt_init_host_blob(dev, size, VIRTGPU_BLOB_MEM_HOST3D, VIRTGPU_BLOB_FLAG_USE_MAPPABLE,
                             blob_id, bo_type, va_handle, bo_handle);
  if (r) return r;

  r = vhsakmt_bo_cpu_map(*bo_handle, &((*bo_handle)->cpu_addr), va_handle);
  if (r) {
    free(*bo_handle);
    *bo_handle = NULL;
    return -EINVAL;
  }

  if (va_handle && (va_handle != (*bo_handle)->cpu_addr))
    vhsa_warn("%s: target map: %p != real map: %p\n", __FUNCTION__, va_handle,
              (*bo_handle)->cpu_addr);

  vhsakmt_insert_bo(dev, *bo_handle, (*bo_handle)->cpu_addr, (*bo_handle)->size);
  return r;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 SizeInBytes,
                                           HsaMemFlags MemFlags, void** MemoryAddress) {
  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_memory_rsp* rsp;
  vhsakmt_bo_handle bo;
  int r;
  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_ALLOC,
      .blob_id = vhsakmt_atomic_inc_return(&dev->next_blob_id),
      .alloc_args =
          {
              .PreferredNode = PreferredNode,
              .SizeInBytes = SizeInBytes,
              .MemFlags = MemFlags,
          },
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (rsp->ret) return rsp->ret;

  if (!rsp->memory_handle) return -ENOMEM;

  r = vhsakmt_init_host_blob(dev, SizeInBytes, VIRTGPU_BLOB_MEM_HOST3D,
                             vhsakmt_mappable(MemFlags) ? VIRTGPU_BLOB_FLAG_USE_MAPPABLE : 0,
                             req.blob_id, VHSA_BO_KFD_MEM, (void*)rsp->memory_handle, &bo);
  if (r) return r;

  if (!vhsakmt_mappable(MemFlags)) {
    bo->cpu_addr = bo->host_addr;
    if (MemFlags.ui32.Scratch) {
      vhsakmt_set_scratch_area(dev, PreferredNode, (uint64_t)bo->cpu_addr, SizeInBytes);
      bo->bo_type |= VHSA_BO_SCRATCH;
    }
  } else {
    r = vhsakmt_bo_cpu_map(bo, &bo->cpu_addr, bo->host_addr);
    if (r) {
      free(bo);
      return -ENOMEM;
    }
  }

  if (!MemFlags.ui32.Scratch) vhsakmt_insert_bo(dev, bo, bo->cpu_addr, bo->size);

  *MemoryAddress = bo->cpu_addr;

  vhsa_debug("alloc mem addr: %p, host addr: %p, size: %lx, res-id: %d, handble: %d\n",
             *MemoryAddress, bo->host_addr, SizeInBytes, bo->real.res_id, bo->real.handle);

  return rsp->ret;
}

int vhsakmt_bo_free(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) {
  bo_entry entry;
  int r;

  if (vhsakmt_atomic_dec_return(&bo->refcount) > 0) return 0;

  entry = vhsakmt_bo_handle_to_entry(bo);
  if (entry->key.addr == 0 && entry->key.size == 0) return -EINVAL;

  /* do not free BOs of queue, let them be freed with queue */
  if (bo->bo_type & VHSA_BO_QUEUE_DOORBELL) {
    vhsa_err("%s: Try to free VHSA_BO_QUEUE_DOORBELL memory: %p\n", __FUNCTION__, bo->cpu_addr);
    return 0;
  }

  vhsakmt_remove_bo(dev, bo);

  if (bo->cpu_addr) vhsakmt_bo_cpu_unmap(bo);

  if (bo->event) free(bo->event);

  if (bo->gl_meta_data) free(bo->gl_meta_data);

  pthread_mutex_destroy(&bo->map_mutex);

  r = vhsakmt_destroy_handle(dev, bo);

  return r;
}

/* Only remove bo in rbtree */
static void vhsakmt_remove_userptr_bo(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) {
  vhsakmt_remove_bo(dev, bo);
  free(bo);
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtFreeMemory(void* MemoryAddress, HSAuint64 SizeInBytes) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress);
  if (!bo) return HSAKMT_STATUS_SUCCESS;

  vhsa_debug("%s: addr: %p, size: %lx, res_id: %d\n", __FUNCTION__, MemoryAddress, SizeInBytes,
             bo->real.res_id);

  return vhsakmt_bo_free(dev, bo);
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPUNodes(void* MemoryAddress, HSAuint64 MemorySizeInBytes,
                                                   HSAuint64* AlternateVAGPU,
                                                   HsaMemMapFlags MemMapFlags,
                                                   HSAuint64 NumberOfNodes, HSAuint32* NodeArray) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  size_t req_len =
      VHSA_ALIGN_UP(sizeof(struct vhsakmt_ccmd_memory_req) + NumberOfNodes * sizeof(*NodeArray), 8);
  struct vhsakmt_ccmd_memory_req* req;
  struct vhsakmt_ccmd_memory_rsp* rsp;
  vhsakmt_bo_handle bo;

  req = (void*)calloc(1, req_len);
  if (!req) return -ENOMEM;
  req->hdr = VHSAKMT_CCMD(MEMORY, req_len);
  req->type = VHSAKMT_CCMD_MEMORY_MAP_TO_GPU_NODES;
  req->map_to_GPU_nodes_args.MemorySizeInBytes = MemorySizeInBytes;
  req->map_to_GPU_nodes_args.MemMapFlags = MemMapFlags;
  req->map_to_GPU_nodes_args.NumberOfNodes = NumberOfNodes;

  bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress);
  if (bo) {
    req->map_to_GPU_nodes_args.MemoryAddress = (uint64_t)bo->host_addr;
    if (bo->bo_type & VHSA_BO_USERPTR) vhsakmt_remove_userptr_bo(dev, bo);
  } else
    req->map_to_GPU_nodes_args.MemoryAddress = (uint64_t)MemoryAddress;

  memcpy(req->payload, NodeArray, NumberOfNodes * sizeof(*NodeArray));

  rsp = vhsakmt_alloc_rsp(dev, &req->hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) {
    free(req);
    return -ENOMEM;
  }

  vhsakmt_execbuf_cpu(dev, &req->hdr, __FUNCTION__);

  *AlternateVAGPU = rsp->alternate_vagpu;

  vhsa_debug("%s: gva: %p, hva: 0x%lx, size: %lx, AlternateVAGPU: %lx, ret: %d\n", __FUNCTION__,
             MemoryAddress, req->map_to_GPU_nodes_args.MemoryAddress, MemorySizeInBytes,
             *AlternateVAGPU, rsp->ret);

  free(req);
  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtUnmapMemoryToGPU(void* MemoryAddress) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress);
  if (!bo) return HSAKMT_STATUS_SUCCESS;

  struct vhsakmt_ccmd_memory_rsp* rsp;
  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_UNMAP_TO_GPU,
      .MemoryAddress = (uint64_t)bo->host_addr,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  vhsa_debug("%s: gva: %p, hva: 0x%lx\n", __FUNCTION__, MemoryAddress, req.MemoryAddress);

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtAvailableMemory(HSAuint32 Node, HSAuint64* AvailableBytes) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_memory_rsp* rsp;
  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_AVAIL_MEM,
      .Node = Node,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  *AvailableBytes = rsp->available_bytes;

  return rsp->ret;
}

static int vhsakmt_create_scratch_map_memory(vhsakmt_device_handle dev, void* MemoryAddress,
                                             HSAuint64 MemorySizeInBytes,
                                             HSAuint64* AlternateVAGPU) {
  vhsakmt_bo_handle out;
  int r;
  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_MAP_MEM_TO_GPU,
      .blob_id = vhsakmt_atomic_inc_return(&dev->next_blob_id),
      .map_to_GPU_args =
          {
              .MemoryAddress = (uint64_t)MemoryAddress,
              .MemorySizeInBytes = MemorySizeInBytes,
              .need_create_bo = true,
          },
  };

  struct vhsakmt_ccmd_memory_rsp* rsp =
      vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  if (rsp->ret) return rsp->ret;

  r = vhsakmt_init_host_blob(dev, MemorySizeInBytes, VIRTGPU_BLOB_MEM_HOST3D, 0, req.blob_id,
                             VHSA_BO_SCRATCH_MAP, NULL, &out);
  if (r) return r;

  // TODO: insert scratch bo into rbtree, or insert it in dev nodes.

  out->cpu_addr = MemoryAddress;
  out->host_addr = (void*)rsp->memory_handle;
  *AlternateVAGPU = rsp->alternate_vagpu;

  vhsa_debug(
      "%s: create scratch memory, gva: %p, memory_handle: 0x%p, alternate_vagpu: %p, size: %lx\n",
      __FUNCTION__, MemoryAddress, (void*)rsp->memory_handle, (void*)rsp->alternate_vagpu,
      MemorySizeInBytes);

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPU(void* MemoryAddress, HSAuint64 MemorySizeInBytes,
                                              HSAuint64* AlternateVAGPU) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_memory_rsp* rsp;
  vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress);
  if (!bo && vhsakmt_is_scratch_mem(dev, MemoryAddress))
    return vhsakmt_create_scratch_map_memory(dev, MemoryAddress, MemorySizeInBytes, AlternateVAGPU);

  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_MAP_MEM_TO_GPU,
      .map_to_GPU_args =
          {
              .MemoryAddress = bo ? (uint64_t)bo->host_addr : (uint64_t)MemoryAddress,
              .MemorySizeInBytes = MemorySizeInBytes,
          },
  };

  if (bo && (bo->bo_type & VHSA_BO_USERPTR)) vhsakmt_remove_userptr_bo(dev, bo);

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  vhsa_debug("%s: gva: %p, hva: 0x%lx, size: %lx\n", __FUNCTION__, MemoryAddress, req.MemoryAddress,
             MemorySizeInBytes);

  *AlternateVAGPU = rsp->alternate_vagpu;

  return rsp->ret;
}

static int vhsakmt_map_userptr(vhsakmt_device_handle dev, void* addr, size_t size, uint32_t res_id,
                               uint64_t* userptr_handle) {
  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_MAP_USERPTR,
      .res_id = res_id,
  };
  struct vhsakmt_ccmd_memory_rsp* rsp =
      vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  rsp->map_userptr_rsp.userptr_handle = 0;
  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  *userptr_handle = rsp->map_userptr_rsp.userptr_handle;
  return rsp->ret;
}

static void* vhsakmt_map_to_gpu(void* addr, size_t size) {
  vhsakmt_device_handle dev = vhsakmt_dev();
  size_t offset = (uint64_t)addr % getpagesize();
  size_t map_size = (VHSA_ALIGN_UP(size + offset, getpagesize()) / getpagesize()) * getpagesize();
  uint64_t userptr_offset, userptr_handle = 0;
  vhsakmt_bo_handle userptr;
  int r;

  vhsa_debug("%s: addr: %p, size: 0x%lx, size + offset: 0x%lx, map_size: 0x%lx\n", __FUNCTION__,
             addr, size, size + offset, map_size);

  r = vhsakmt_init_userptr_blob(dev, addr, size, &userptr, &userptr_offset);
  if (r < 0) {
    vhsa_debug("%s: userptr create failed at address: %p, ret = %d\n", __FUNCTION__, addr, r);
    return NULL;
  }

  vhsakmt_map_userptr(dev, addr, size, userptr->real.res_id, &userptr_handle);
  if (!userptr_handle) {
    vhsa_debug("%s: map userptr failed at address: %p, ret = %d\n", __FUNCTION__, addr, r);
    vhsakmt_destroy_handle(dev, userptr);
    vhsakmt_remove_userptr_bo(dev, userptr);
    return NULL;
  }
  userptr->host_addr = VHSA_UINT64_TO_VPTR(VHSA_VPTR_TO_UINT64(userptr_handle) + offset);

  if (r > 0) {
    vhsa_debug("%s: userptr: %p already registered, offset: %lx\n", __FUNCTION__, addr,
               userptr_offset);
    userptr->host_addr =
        VHSA_UINT64_TO_VPTR(VHSA_VPTR_TO_UINT64(userptr->host_addr) + userptr_offset);
  }
  vhsakmt_insert_bo(dev, userptr, userptr->cpu_addr, userptr->size);

  vhsa_debug("%s: real gva: %p, gva: %p, hva: %p, size: %lx, offset: %" PRIu64
             ", map_size: 0x%lx\n",
             __FUNCTION__, addr, userptr->cpu_addr, userptr->host_addr, size, offset, map_size);
  return userptr->host_addr;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemoryWithFlags(void* MemoryAddress,
                                                       HSAuint64 MemorySizeInBytes,
                                                       HsaMemFlags MemFlags) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_memory_rsp* rsp;
  void* addr;
  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_REG_MEM_WITH_FLAG,
      .reg_mem_with_flag =
          {
              .MemorySizeInBytes = MemorySizeInBytes,
              .MemFlags = MemFlags,
          },
  };

  /* no need to register memory from lihsakmt / not a userptr */
  if (!vhsakmt_is_userptr(dev, MemoryAddress)) return HSAKMT_STATUS_SUCCESS;

  addr = vhsakmt_map_to_gpu(MemoryAddress, MemorySizeInBytes);
  if (!addr) {
    vhsa_debug("%s: register memory failed, gva: %p, size: %lx\n", __FUNCTION__, MemoryAddress,
               MemorySizeInBytes);
    return HSAKMT_STATUS_ERROR;
  }

  req.reg_mem_with_flag.MemoryAddress = (uint64_t)addr;

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  return rsp->ret;
}

static int vhsakmt_remove_clgl_bo(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) {
  struct vhsakmt_ccmd_memory_rsp* rsp;
  struct vhsakmt_ccmd_memory_req req = {
      .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)),
      .type = VHSAKMT_CCMD_MEMORY_DEREG_MEM,
      .res_id = bo->real.res_id,
      .MemoryAddress = (uint64_t)bo->cpu_addr,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (rsp->ret) vhsa_err("%s: deregister failed clgl memory gva: %p\n", __FUNCTION__, bo->cpu_addr);

  vhsakmt_bo_free(dev, bo);

  vhsa_debug("%s: deregister clgl memory gva: %p, ret: %d\n", __FUNCTION__, bo->cpu_addr, rsp->ret);
  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtDeregisterMemory(void* MemoryAddress) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress);
  if (!bo) return HSAKMT_STATUS_SUCCESS;

  vhsa_debug("%s: remove userptr %p size: 0x%lx, res id: %d\n", __FUNCTION__, MemoryAddress,
             (size_t)bo->size, bo->real.res_id);

  if (bo->bo_type & VHSA_BO_CLGL)
    return vhsakmt_remove_clgl_bo(dev, bo);
  else {
    vhsakmt_remove_bo(dev, bo);
    free(bo);
  }

  return 0;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtQueryPointerInfo(const void* Pointer, HsaPointerInfo* PointerInfo) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  void* gpu_va = vhsakmt_gpu_va(dev, VHSA_UINT64_TO_VPTR(Pointer));
  if (!gpu_va) return -HSAKMT_STATUS_ERROR;
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_POINTER_INFO,
      .pointer = VHSA_VPTR_TO_UINT64(gpu_va),
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr,
                          sizeof(struct vhsakmt_ccmd_query_info_rsp) +
                              QUERY_PTR_INFO_MAX_MAPPED_NODES * sizeof(uint32_t));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  memcpy(PointerInfo, &rsp->ptr_info, sizeof(HsaPointerInfo));

  if (PointerInfo->NMappedNodes && PointerInfo->MappedNodes) {
    if (PointerInfo->NMappedNodes > QUERY_PTR_INFO_MAX_MAPPED_NODES) {
      PointerInfo->NMappedNodes = QUERY_PTR_INFO_MAX_MAPPED_NODES;
      vhsa_debug(
          "%s: query pointer: %p info mapped nodes greater than QUERY_PTR_INFO_MAX_MAPPED_NODES\n",
          __FUNCTION__, Pointer);
    }

    PointerInfo->MappedNodes = calloc(PointerInfo->NMappedNodes, sizeof(uint32_t));
    if (!PointerInfo->MappedNodes) {
      PointerInfo->NMappedNodes = 0;
      return -HSAKMT_STATUS_NO_MEMORY;
    }
    memcpy(VHSA_UINT64_TO_VPTR(PointerInfo->MappedNodes), rsp->payload,
           PointerInfo->NMappedNodes * sizeof(uint32_t));
  }

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig* config) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  uint8_t* config_cpy_addr = NULL;
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  unsigned req_len = sizeof(struct vhsakmt_ccmd_query_info_req);
  unsigned rsp_len = sizeof(struct vhsakmt_ccmd_query_info_rsp) +
      config->NumTileConfigs * sizeof(HSAuint32) + config->NumMacroTileConfigs * sizeof(HSAuint32);

  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, req_len),
      .type = VHSAKMT_CCMD_QUERY_TILE_CONFIG,
      .tile_config_args.NodeId = NodeId,
      .tile_config_args.config = *config,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, rsp_len);
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  memcpy(config, &rsp->tile_config_rsp, sizeof(HsaGpuTileConfig));
  config_cpy_addr = ((uint8_t*)rsp->payload);
  memcpy(config->TileConfig, config_cpy_addr, config->NumTileConfigs * sizeof(HSAuint32));
  config_cpy_addr += config->NumTileConfigs * sizeof(HSAuint32);
  memcpy(config->MacroTileConfig, config_cpy_addr, config->NumMacroTileConfigs * sizeof(HSAuint32));

  return rsp->ret;
}

static int vhsakmt_create_clgl_bo(vhsakmt_device_handle dev, void* addr, size_t size,
                                  uint32_t res_id, uint32_t bo_handle, void* meta_data) {
  vhsakmt_bo_handle out = calloc(1, sizeof(struct vhsakmt_bo));
  if (!out) return -ENOMEM;

  out->dev = dev;
  out->size = size;
  atomic_store(&out->real.map_count, 0);
  atomic_store(&out->refcount, 1);

#ifdef CLGL_EXPORT_RESID
  out->real.res_id = GraphicsResourceHandle;
#else
  out->real.res_id = res_id;
#endif

  /* GL bo handle from GL context*/
  out->real.handle = bo_handle;
  out->bo_type |= VHSA_BO_CLGL;
  if (meta_data) out->gl_meta_data = meta_data;

  out->host_addr = addr;

  vhsakmt_insert_bo(dev, out, addr, out->size);

  return 0;
}

static int vhsakmt_gfxhandle_to_resid(vhsakmt_device_handle dev, uint32_t gfx_handle,
                                      uint32_t* res_id, uint32_t* bo_handle) {
  int r = drmPrimeFDToHandle(dev->vgdev->fd, gfx_handle, bo_handle);
  if (r) {
    vhsa_err("%s: drmPrimeFDToHandle failed for handle: %u\n", __FUNCTION__, gfx_handle);
    return r;
  }

  virtio_gpu_res_id(dev->vgdev, *bo_handle, res_id);

  vhsa_debug("%s: register praphics handle: handle: %d, bo_handle: %d, res_id: %d\n", __FUNCTION__,
             gfx_handle, *bo_handle, *res_id);

  return 0;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodes(
    HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo* GraphicsResourceInfo,
    HSAuint64 NumberOfNodes, HSAuint32* NodeArray) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  uint32_t bo_handle, res_id;
  uint64_t meta_data_size = VHSA_GL_METADATA_MAX_SIZE;
  unsigned req_len = sizeof(struct vhsakmt_ccmd_gl_inter_req) + NumberOfNodes * sizeof(NodeArray);
  struct vhsakmt_ccmd_gl_inter_req* req;
  struct vhsakmt_ccmd_gl_inter_rsp* rsp;
  int r;

  req = calloc(1, req_len);
  if (!req) return -ENOMEM;

  req->hdr = VHSAKMT_CCMD(GL_INTER, req_len);
  req->type = VHSAKMT_CCMD_GL_REG_GHD_TO_NODES;
  req->reg_ghd_to_nodes.NumberOfNodes = NumberOfNodes;
  req->reg_ghd_to_nodes.res_handle = GraphicsResourceHandle;

#ifdef CLGL_EXPORT_RESID
  req->reg_ghd_to_nodes.GraphicsResourceHandle = GraphicsResourceHandle;
#else
  r = vhsakmt_gfxhandle_to_resid(dev, GraphicsResourceHandle, &res_id, &bo_handle);
  if (r) return r;

  req->reg_ghd_to_nodes.GraphicsResourceHandle = bo_handle;
  req->reg_ghd_to_nodes.res_handle = res_id;
#endif

  memcpy(req->payload, NodeArray, NumberOfNodes * sizeof(NodeArray));

  rsp =
      vhsakmt_alloc_rsp(dev, &req->hdr, sizeof(struct vhsakmt_ccmd_gl_inter_rsp) + meta_data_size);
  if (!rsp) {
    r = -ENOMEM;
    goto free_out;
  }

  vhsakmt_execbuf_cpu(dev, &req->hdr, __FUNCTION__);
  if (rsp->ret) return rsp->ret;

  memcpy(GraphicsResourceInfo, &rsp->info, sizeof(HsaGraphicsResourceInfo));
  if (rsp->info.MetadataSizeInBytes) {
    GraphicsResourceInfo->Metadata = calloc(1, GraphicsResourceInfo->MetadataSizeInBytes);
    if (!GraphicsResourceInfo->Metadata) {
      r = -ENOMEM;
      goto free_out;
    }

    memcpy(VHSA_UINT64_TO_VPTR(GraphicsResourceInfo->Metadata), rsp->payload,
           GraphicsResourceInfo->MetadataSizeInBytes);
  } else
    GraphicsResourceInfo->Metadata = NULL;

  vhsa_debug("%s: register graphics handle: handle: %ld hva: %p, size: %lx\n", __FUNCTION__,
             GraphicsResourceHandle, GraphicsResourceInfo->MemoryAddress,
             GraphicsResourceInfo->SizeInBytes);

  r = vhsakmt_create_clgl_bo(dev, GraphicsResourceInfo->MemoryAddress,
                             GraphicsResourceInfo->SizeInBytes, res_id, bo_handle,
                             VHSA_UINT64_TO_VPTR(GraphicsResourceInfo->Metadata));
  if (r) goto free_out;

  r = rsp->ret;

free_out:
  /* close exported FD after register or close it when deregistre. Close after register here. */
  close(GraphicsResourceHandle);
  free(req);
  return r;
}


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_openclose.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt/hsakmt_virtio.h"
#include "hsakmt_virtio_device.h"

pthread_mutex_t dev_mutex = PTHREAD_MUTEX_INITIALIZER;
vhsakmt_device_handle dev_list = NULL;

vhsakmt_device_handle vhsakmt_dev(void) { return dev_list; }

static HSAKMT_STATUS vhsakmt_openKFD_cmd(vhsakmt_device_handle dev) {
  void* vm_start = vhsakmt_vm_start();
  if (!vm_start) return -HSAKMT_STATUS_NO_MEMORY;
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_OPEN_KFD,
      .open_kfd_args =
          {
              .cur_vm_start = VHSA_VPTR_TO_UINT64(vm_start),
          },
  };

  if (!req.open_kfd_args.cur_vm_start) {
    vhsa_err("%s: failed to get current heap start address\n", __FUNCTION__);
    return -HSAKMT_STATUS_ERROR;
  }

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -HSAKMT_STATUS_NO_MEMORY;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (!rsp->open_kfd_rsp.vm_start || !rsp->open_kfd_rsp.vm_size) {
    vhsa_err("%s: failed to get KFD VM area\n", __FUNCTION__);
    return -HSAKMT_STATUS_ERROR;
  }

  vhsakmt_set_vm_area(dev, rsp->open_kfd_rsp.vm_start, rsp->open_kfd_rsp.vm_size);
  if (vhsakmt_reserve_va(dev->vm_start, dev->vm_size)) {
    vhsa_err("%s: failed to reserve VM area: [%lx-%lx]-0x%lx\n", __FUNCTION__, dev->vm_start,
             dev->vm_start + dev->vm_size, dev->vm_size);
    return -HSAKMT_STATUS_NO_MEMORY;
  }

  vhsa_debug("%s: kfd vm range: [%lx-%lx]-0x%lx\n", __FUNCTION__, dev->vm_start,
             dev->vm_start + dev->vm_size, dev->vm_size);
  return rsp->ret;
}

static vhsakmt_device_handle vhsakmt_device_init(void) {
  int fd;
  vhsakmt_device_handle dev = NULL;

  if (vhsakmt_dev()) return vhsakmt_dev();

  pthread_mutex_lock(&dev_mutex);

  fd = virtio_gpu_kfd_open();
  if (fd < 0) goto open_failed;

  dev = calloc(1, sizeof(struct vhsakmt_device));
  if (!dev) goto open_failed;

  dev->vgdev = virtio_gpu_init(fd, 0);
  if (!dev->vgdev) goto malloc_failed;

  rbtree_init(&dev->bo_rbt);
  atomic_store(&dev->next_blob_id, 1);
  atomic_store(&dev->refcount, 1);
  pthread_mutex_init(&dev->bo_handles_mutex, NULL);
  pthread_mutex_init(&dev->vhsakmt_mutex, NULL);
  dev_list = dev;

  pthread_mutex_unlock(&dev_mutex);
  return dev;

malloc_failed:
  free(dev);
  dev = NULL;
open_failed:
  pthread_mutex_unlock(&dev_mutex);
  return dev;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtOpenKFD(void) {
  vhsakmt_device_handle dev;
  char* d = getenv("VHSAKMT_DEBUG_LEVEL");
  if (d) vhsakmt_debug_level = atoi(d);

  dev = vhsakmt_device_init();
  if (!dev) return HSAKMT_STATUS_ERROR;

  return vhsakmt_openKFD_cmd(vhsakmt_dev());
}

static void vhsakmt_device_destroy(struct vhsakmt_device* dev) {
  pthread_mutex_destroy(&dev->bo_handles_mutex);
  vhsakmt_dereserve_va(dev->vm_start, dev->vm_size);

  if (dev->sys_props) free(dev->sys_props);
  if (dev->vhsakmt_nodes) free(dev->vhsakmt_nodes);

  virtio_gpu_close(dev->vgdev);
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtCloseKFD(void) {
  vhsakmt_device_handle dev = vhsakmt_dev();
  pthread_mutex_lock(&dev_mutex);
  if (vhsakmt_atomic_dec_return(&dev->refcount) <= 0) vhsakmt_device_destroy(dev);
  pthread_mutex_unlock(&dev_mutex);
  return 0;
}


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_proto.h
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef VHSAKMT_VIRTIO_PROTO_H
#define VHSAKMT_VIRTIO_PROTO_H

#include "hsakmt/linux/kfd_ioctl.h"
#include "hsakmt/hsakmt.h"

#include <drm/amdgpu_drm.h>
#include <libdrm/amdgpu.h>
#include <stdint.h>

#include "virtio_gpu.h"

#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wpadded"
#endif

/* defined in other header file in virglrenderer */
#define VHSAKMT_DEFINE_CAST(parent, child)                                                         \
  static inline struct child* to_##child(struct parent* x) { return (struct child*)x; }

#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
#define VHSAKMT_STATIC_ASSERT_SIZE(t)                                                              \
  static_assert(sizeof(struct t) % 8 == 0, "sizeof(struct " #t ") not multiple of 8");             \
  static_assert(_Alignof(struct t) <= 8, "alignof(struct " #t ") too large");
#else
#define VHSAKMT_STATIC_ASSERT_SIZE(t)
#endif

enum vhsakmt_ccmd {
  VHSAKMT_CCMD_NOP = 1, /* No payload, can be used to sync with host */
  VHSAKMT_CCMD_QUERY_INFO,
  VHSAKMT_CCMD_EVENT,
  VHSAKMT_CCMD_MEMORY,
  VHSAKMT_CCMD_QUEUE,
  VHSAKMT_CCMD_GL_INTER,
};

typedef struct _vHsaEvent {
  HsaEvent event;
  uint64_t event_handle;
  uint64_t bo_handle;
  uint32_t res_id;
  uint32_t pad;
} vHsaEvent;
VHSAKMT_STATIC_ASSERT_SIZE(_vHsaEvent)

struct vhsakmt_event_shmem {
  uint32_t trigered_events_num;
  uint32_t pad;
  HsaEvent trigered_events[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_event_shmem)

#define VHSAKMT_CCMD(_cmd, _len)                                                                   \
  ((struct vhsakmt_ccmd_req){                                                                      \
      .cmd = VHSAKMT_CCMD_##_cmd,                                                                  \
      .len = (_len),                                                                               \
  })

struct vhsakmt_ccmd_nop_req {
  struct vhsakmt_ccmd_req hdr;
};

/*
 * VHSAKMT_CCMD_QUERY
 */
enum vhsakmt_ccmd_query_type {
  VHSAKMT_CCMD_QUERY_GPU_INFO = 0,
  VHSAKMT_CCMD_QUERY_OPEN_KFD,
  VHSAKMT_CCMD_QUERY_GET_VER,
  VHSAKMT_CCMD_QUERY_REL_SYS_PROP,
  VHSAKMT_CCMD_QUERY_GET_SYS_PROP,
  VHSAKMT_CCMD_QUERY_GET_NODE_PROP,
  VHSAKMT_CCMD_QUERY_GET_XNACK_MODE,
  VHSAKMT_CCMD_QUERY_RUN_TIME_ENABLE,
  VHSAKMT_CCMD_QUERY_RUN_TIME_DISABLE,
  VHSAKMT_CCMD_QUERY_GET_NOD_MEM_PROP,
  VHSAKMT_CCMD_QUERY_GET_NOD_CACHE_PROP,
  VHSAKMT_CCMD_QUERY_GET_NOD_IO_LINK_PROP,
  VHSAKMT_CCMD_QUERY_GET_CLOCK_COUNTERS,
  VHSAKMT_CCMD_QUERY_POINTER_INFO,
  VHSAKMT_CCMD_QUERY_TILE_CONFIG,
  VHSAKMT_CCMD_QUERY_NANO_TIME,
  VHSAKMT_CCMD_QUERY_GET_RUNTIME_CAPS,
};

#define QUERY_PTR_INFO_MAX_MAPPED_NODES 3

typedef struct _query_req_run_time_enable_args {
  /* void*     rDebug, bypassed by payload */
  uint8_t pad[3];
  uint8_t setupTtmp;
  uint32_t __pad;
} query_req_run_time_enable_args;
VHSAKMT_STATIC_ASSERT_SIZE(_query_req_run_time_enable_args)

typedef struct _query_req_node_mem_prop_args {
  uint32_t NodeId;
  uint32_t NumBanks;
} query_req_node_mem_prop_args;
VHSAKMT_STATIC_ASSERT_SIZE(_query_req_node_mem_prop_args)

typedef struct _query_req_node_cache_prop_args {
  uint32_t NodeId;
  uint32_t ProcessorId;
  uint32_t NumCaches;
  uint32_t pad;
} query_req_node_cache_prop_args;
VHSAKMT_STATIC_ASSERT_SIZE(_query_req_node_cache_prop_args)

typedef struct _query_req_node_io_link_args {
  uint32_t NodeId;
  uint32_t NumIoLinks;
} query_req_node_io_link_args;
VHSAKMT_STATIC_ASSERT_SIZE(_query_req_node_io_link_args)

typedef struct _query_tile_config {
  HsaGpuTileConfig config;
  uint32_t NodeId;
  uint32_t pad;
} query_tile_config;
VHSAKMT_STATIC_ASSERT_SIZE(_query_tile_config)

typedef struct _query_open_kfd_args {
  uint64_t cur_vm_start;
} query_open_kfd_args;
VHSAKMT_STATIC_ASSERT_SIZE(_query_open_kfd_args)

typedef struct _query_open_kfd_rsp {
  uint64_t vm_start;
  uint64_t vm_size;
} query_open_kfd_rsp;
VHSAKMT_STATIC_ASSERT_SIZE(_query_open_kfd_rsp)

typedef struct _query_nano_time_rsp {
  uint64_t nano_time;
} query_nano_time_rsp;
VHSAKMT_STATIC_ASSERT_SIZE(_query_nano_time_rsp)

struct vhsakmt_ccmd_query_info_req {
  struct vhsakmt_ccmd_req hdr;
  struct drm_amdgpu_info info;
  uint32_t type;
  uint32_t pad;
  union {
    uint64_t pointer;
    uint32_t NodeID; /* some query API just need node ID */
    query_req_run_time_enable_args run_time_enable_args;
    query_req_node_mem_prop_args node_mem_prop_args;
    query_req_node_cache_prop_args node_cache_prop_args;
    query_req_node_io_link_args node_io_link_args;
    query_tile_config tile_config_args;
    query_open_kfd_args open_kfd_args;
  };

  uint8_t payload[];
};
VHSAKMT_DEFINE_CAST(vhsakmt_ccmd_req, vhsakmt_ccmd_query_info_req)
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_query_info_req)
#define VHSAKMT_CCMD_QUERY_MAX_TILE_CONFIG 128
#define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_MEM_PROP 128
#define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_CACHE_PROP 128
#define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_IO_LINK_PROP 128

struct vhsakmt_ccmd_query_info_rsp {
  struct vhsakmt_ccmd_rsp hdr;
  int32_t ret;
  union {
    query_open_kfd_rsp open_kfd_rsp;
    query_nano_time_rsp nano_time_rsp;
    HsaGpuTileConfig tile_config_rsp;
    HsaPointerInfo ptr_info;
    struct amdgpu_gpu_info gpu_info;
    HsaVersionInfo kfd_version;
    HsaSystemProperties sys_props;
    HsaNodeProperties node_props;
    int32_t xnack_mode;
    HsaClockCounters clock_counters;
    uint32_t caps;
    uint64_t pad[9];
  };
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_query_info_rsp)

/*
 * VHSAKMT_CCMD_EVENT
 */
enum vhsakmt_ccmd_event_type {
  VHSAKMT_CCMD_EVENT_CREATE,
  VHSAKMT_CCMD_EVENT_DESTROY,
  VHSAKMT_CCMD_EVENT_SET,
  VHSAKMT_CCMD_EVENT_RESET,
  VHSAKMT_CCMD_EVENT_QUERY_STATE,
  VHSAKMT_CCMD_EVENT_WAIT_ON_MULTI_EVENTS,

  VHSAKMT_CCMD_EVENT_SET_TRAP,

};
typedef struct _event_req_create_args {
  HsaEventDescriptor EventDesc;
  uint8_t ManualReset;
  uint8_t IsSignaled;
  uint8_t pad[6];
} event_req_create_args;
VHSAKMT_STATIC_ASSERT_SIZE(_event_req_create_args)

typedef struct _event_req_wait_args {
  HsaEvent Event;
  uint32_t Milliseconds;
  uint32_t pad;
} event_req_wait_args;
VHSAKMT_STATIC_ASSERT_SIZE(_event_req_wait_args)

typedef struct _event_req_wait_ext_args {
  HsaEvent Event;
  uint64_t event_age;
  uint32_t Milliseconds;
  uint32_t pad;
} event_req_wait_ext_args;
VHSAKMT_STATIC_ASSERT_SIZE(_event_req_wait_ext_args)

typedef struct _event_req_wait_on_multi_args {
  /*HsaEvent*   Events[], in playloud*/
  uint32_t NumEvents;
  uint32_t Milliseconds;
  uint8_t WaitOnAll;
  uint8_t pad[7];
} event_req_wait_on_multi_args;
VHSAKMT_STATIC_ASSERT_SIZE(_event_req_wait_on_multi_args)

typedef struct _event_req_wait_on_multi_ext_args {
  /*HsaEvent*   Events[], in playloud*/
  uint32_t NumEvents;
  uint32_t Milliseconds;
  uint64_t event_age;
  uint8_t WaitOnAll;
  uint8_t pad[7];
} event_req_wait_on_multi_ext_args;
VHSAKMT_STATIC_ASSERT_SIZE(_event_req_wait_on_multi_ext_args)

typedef struct _event_set_trap_handler_args {
  uint64_t TrapHandlerBaseAddress;
  uint64_t TrapHandlerSizeInBytes;
  uint64_t TrapBufferBaseAddress;
  uint64_t TrapBufferSizeInBytes;
  uint32_t NodeId;
  uint32_t pad;
} event_set_trap_handler_args;
VHSAKMT_STATIC_ASSERT_SIZE(_event_set_trap_handler_args)

struct vhsakmt_ccmd_event_req {
  struct vhsakmt_ccmd_req hdr;
  union {
    HsaEvent Event; /* For set, reset, query. */
    HsaEvent* event_hanele;
    event_req_wait_args wait_args;
    event_req_create_args create_args;
    event_req_wait_ext_args wait_ext_args;
    event_req_wait_on_multi_args wait_on_multi_args;
    event_req_wait_on_multi_ext_args wait_on_multi_ext_args;
    event_set_trap_handler_args set_trap_handler_args;
  };
  uint32_t type;
  uint32_t sync_shmem_res_id;
  uint64_t blob_id;
  uint32_t res_id;
  uint32_t pad;
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_event_req)
VHSAKMT_DEFINE_CAST(vhsakmt_ccmd_req, vhsakmt_ccmd_event_req)

struct vhsakmt_ccmd_event_rsp {
  struct vhsakmt_ccmd_rsp hdr;
  int32_t ret;
  vHsaEvent vevent;
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_event_rsp)

/*
 * VHSAKMT_CCMD_MEMORY
 */
enum vhsakmt_ccmd_memory_type {
  VHSAKMT_CCMD_MEMORY_ALLOC,
  VHSAKMT_CCMD_MEMORY_MAP_TO_GPU_NODES,
  VHSAKMT_CCMD_MEMORY_FREE,
  VHSAKMT_CCMD_MEMORY_UNMAP_TO_GPU,
  VHSAKMT_CCMD_MEMORY_AVAIL_MEM,
  VHSAKMT_CCMD_MEMORY_MAP_MEM_TO_GPU,
  VHSAKMT_CCMD_MEMORY_REG_MEM_WITH_FLAG,
  VHSAKMT_CCMD_MEMORY_DEREG_MEM,
  VHSAKMT_CCMD_MEMORY_MAP_USERPTR,
};

typedef struct _memory_req_alloc_args {
  uint32_t PreferredNode;
  HsaMemFlags MemFlags;
  uint64_t SizeInBytes;
  uint64_t MemoryAddress;
} memory_req_alloc_args;
VHSAKMT_STATIC_ASSERT_SIZE(_memory_req_alloc_args)

typedef struct _memory_req_free_args {
  uint64_t MemoryAddress;
  uint64_t SizeInBytes;
} memory_req_free_args;
VHSAKMT_STATIC_ASSERT_SIZE(_memory_req_free_args)

typedef struct _memory_req_map_to_GPU_nodes_args {
  uint64_t MemoryAddress;
  uint64_t MemorySizeInBytes;
  uint64_t AlternateVAGPU;
  HsaMemMapFlags MemMapFlags;
  uint32_t pad;
  uint64_t NumberOfNodes;
  uint32_t* NodeArray;
} memory_req_map_to_GPU_nodes_args;
VHSAKMT_STATIC_ASSERT_SIZE(_memory_req_map_to_GPU_nodes_args)

typedef struct _memory_map_mem_to_gpu_args {
  uint64_t MemoryAddress;
  uint64_t MemorySizeInBytes;
  uint8_t need_create_bo;
  uint8_t pad[7];
} memory_map_mem_to_gpu_args;
VHSAKMT_STATIC_ASSERT_SIZE(_memory_map_mem_to_gpu_args)

typedef struct _memory_reg_mem_with_flag {
  uint64_t MemoryAddress;
  uint64_t MemorySizeInBytes;
  HsaMemFlags MemFlags;
  uint32_t pad;
} memory_reg_mem_with_flag;
VHSAKMT_STATIC_ASSERT_SIZE(_memory_reg_mem_with_flag)

struct vhsakmt_ccmd_memory_req {
  struct vhsakmt_ccmd_req hdr;
  union {
    uint64_t MemoryAddress;
    uint32_t Node;
    memory_req_alloc_args alloc_args;
    memory_req_map_to_GPU_nodes_args map_to_GPU_nodes_args;
    memory_req_free_args free_args;
    memory_map_mem_to_gpu_args map_to_GPU_args;
    memory_reg_mem_with_flag reg_mem_with_flag;
  };
  uint64_t blob_id;
  uint32_t type;
  uint32_t res_id;
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_memory_req)
VHSAKMT_DEFINE_CAST(vhsakmt_ccmd_req, vhsakmt_ccmd_memory_req)

typedef struct _vhsakmt_ccmd_memory_map_userptr_rsp {
  uint64_t userptr_handle;
  uint32_t npfns;
  uint32_t pad;
} vhsakmt_ccmd_memory_map_userptr_rsp;
VHSAKMT_STATIC_ASSERT_SIZE(_vhsakmt_ccmd_memory_map_userptr_rsp)

struct vhsakmt_ccmd_memory_rsp {
  struct vhsakmt_ccmd_rsp hdr;
  int32_t ret;
  union {
    vhsakmt_ccmd_memory_map_userptr_rsp map_userptr_rsp;
    uint64_t memory_handle;
    uint64_t alternate_vagpu;
    uint64_t available_bytes;
  };
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_memory_rsp)

/*
 * VHSAKMT_CCMD_QUEUE
 */
enum vhsakmt_ccmd_queue_type {
  VHSAKMT_CCMD_QUEUE_CREATE,
  VHSAKMT_CCMD_QUEUE_DESTROY,
};

typedef struct _vHsaQueueResource {
  HsaQueueResource r;
  uint64_t host_doorbell;
  uint64_t host_doorbell_offset;
  uint64_t host_write_offset;
  uint64_t host_read_offset;
  uint64_t host_rw_handle;
  uint64_t queue_handle;
} vHsaQueueResource;
VHSAKMT_STATIC_ASSERT_SIZE(_vHsaQueueResource)

typedef struct _queue_req_create {
  uint32_t NodeId;
  HSA_QUEUE_TYPE Type;
  uint32_t QueuePercentage;
  uint32_t pad;
  HSA_QUEUE_PRIORITY Priority;
  uint32_t pad1;
  uint32_t SdmaEngineId;
  uint64_t QueueAddress;
  uint64_t QueueSizeInBytes;
  HsaEvent* Event;
  HsaQueueResource* QueueResource;
  uint64_t* Queue_write_ptr_aql;
  uint64_t* Queue_read_ptr_aql;
} queue_req_create;
VHSAKMT_STATIC_ASSERT_SIZE(_queue_req_create)

struct vhsakmt_ccmd_queue_req {
  struct vhsakmt_ccmd_req hdr;
  union {
    HSA_QUEUEID QueueId;
    queue_req_create create_queue_args;
  };
  uint64_t blob_id;          /* For queue create, queue resource */
  uint64_t rw_ptr_blob_id;   /* For queue create, r/w ptr memory mapping */
  uint64_t doorbell_blob_id; /* For queue create, doorbell ptr memory mapping */
  uint32_t res_id;
  uint32_t type;
  uint32_t queue_mem_res_id;
  uint32_t pad;
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_queue_req)
VHSAKMT_DEFINE_CAST(vhsakmt_ccmd_req, vhsakmt_ccmd_queue_req)

struct vhsakmt_ccmd_queue_rsp {
  struct vhsakmt_ccmd_rsp hdr;
  int32_t ret;
  vHsaQueueResource vqueue_res;
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_queue_rsp)

/*
 * VHSAKMT_CCMD_GL_INTER
 */
enum vhsakmt_ccmd_gl_inter_type {
  VHSAKMT_CCMD_GL_REG_GHD_TO_NODES,
};

typedef struct _gl_inter_req_reg_ghd_to_nodes {
  uint64_t GraphicsResourceHandle;
  uint64_t NumberOfNodes;  // NodeArray in payload
  uint32_t res_handle;
  uint32_t pad;
} gl_inter_req_reg_ghd_to_nodes;
VHSAKMT_STATIC_ASSERT_SIZE(_gl_inter_req_reg_ghd_to_nodes)

struct vhsakmt_ccmd_gl_inter_req {
  struct vhsakmt_ccmd_req hdr;
  union {
    gl_inter_req_reg_ghd_to_nodes reg_ghd_to_nodes;
  };
  uint32_t type;
  uint32_t pad;
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_gl_inter_req)
VHSAKMT_DEFINE_CAST(vhsakmt_ccmd_req, vhsakmt_ccmd_gl_inter_req)

struct vhsakmt_ccmd_gl_inter_rsp {
  struct vhsakmt_ccmd_rsp hdr;
  int32_t ret;
  union {
    HsaGraphicsResourceInfo info;
  };
  uint8_t payload[];
};
VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_gl_inter_rsp)

#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif

#endif


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_queues.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt/hsakmt_virtio.h"
#include "hsakmt_virtio_device.h"

static inline uint64_t vhsakmt_doorbell_page_size(void) { return 0x2000; }
static inline uint64_t vhsakmt_queue_page_size(void) { return getpagesize(); }

HSAKMT_STATUS HSAKMTAPI vhsaKmtSetTrapHandler(HSAuint32 NodeId, void* TrapHandlerBaseAddress,
                                              HSAuint64 TrapHandlerSizeInBytes,
                                              void* TrapBufferBaseAddress,
                                              HSAuint64 TrapBufferSizeInBytes) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_event_rsp* rsp;
  struct vhsakmt_ccmd_event_req req = {
      .hdr = VHSAKMT_CCMD(EVENT, sizeof(struct vhsakmt_ccmd_event_req)),
      .type = VHSAKMT_CCMD_EVENT_SET_TRAP,
      .set_trap_handler_args =
          {
              .NodeId = NodeId,
              .TrapHandlerBaseAddress = (uint64_t)TrapHandlerBaseAddress,
              .TrapHandlerSizeInBytes = TrapHandlerSizeInBytes,
              .TrapBufferBaseAddress = (uint64_t)TrapBufferBaseAddress,
              .TrapBufferSizeInBytes = TrapBufferSizeInBytes,
          },
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_event_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  return rsp->ret;
}

static int vhsakmt_find_aql_rw_bo(vhsakmt_device_handle dev, uint64_t aql_ptr,
                                  uint32_t* aql_bo_res_id) {
  uint64_t aql_base_ptr = VHSA_ALIGN_DOWN(aql_ptr, getpagesize());

  vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, (void*)aql_base_ptr);
  if (!bo) return -EINVAL;

  bo->bo_type |= VHSA_BO_QUEUE_AQL_RW_PTR;
  *aql_bo_res_id = bo->real.res_id;
  return 0;
}

static int vhsakmt_create_doorbell_blob_bo(vhsakmt_device_handle dev, uint32_t node, size_t size,
                                           uint32_t blob_id, uint64_t host_handle,
                                           vhsakmt_bo_handle* bo_handle) {
  int r;

  r = vhsakmt_create_mappable_blob_bo(dev, size, blob_id, VHSA_BO_QUEUE_DOORBELL,
                                      (void*)host_handle, bo_handle);
  if (r) return r;

  r = vhsakmt_set_node_doorbell(dev, node, (*bo_handle)->cpu_addr);

  return r;
}

static int vhsakmt_create_queue_rw_blob_bo(vhsakmt_device_handle dev, size_t size, uint32_t blob_id,
                                           uint64_t host_handle, vhsakmt_bo_handle* bo_handle) {
  int r;

  r = vhsakmt_create_mappable_blob_bo(dev, size, blob_id, VHSA_BO_QUEUE_RW_PTR, NULL, bo_handle);
  if (r) return r;

  (*bo_handle)->host_addr = (void*)host_handle;
  return r;
}

static int vhsakmt_create_queue_blob_bo(vhsakmt_device_handle dev, size_t size, uint32_t blob_id,
                                        uint64_t queue_id, vhsakmt_bo_handle rw_bo_handle,
                                        vhsakmt_bo_handle* bo_handle) {
  int r;

  r = vhsakmt_init_host_blob(dev, size, VIRTGPU_BLOB_MEM_HOST3D, 0, blob_id, VHSA_BO_QUEUE, NULL,
                             bo_handle);
  if (r) return r;

  vhsakmt_insert_bo(dev, *bo_handle, *bo_handle, (*bo_handle)->size);

  (*bo_handle)->queue_id = queue_id;
  (*bo_handle)->rw_bo = rw_bo_handle;

  return r;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtCreateQueueExt(HSAuint32 NodeId, HSA_QUEUE_TYPE Type,
                                              HSAuint32 QueuePercentage,
                                              HSA_QUEUE_PRIORITY Priority, HSAuint32 SdmaEngineId,
                                              void* QueueAddress, HSAuint64 QueueSizeInBytes,
                                              HsaEvent* Event, HsaQueueResource* QueueResource) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  vhsakmt_bo_handle rw_bo_handle = NULL, doorbell_bo, queue_bo, queue_mem_bo;
  struct vhsakmt_ccmd_queue_rsp* rsp;
  struct vhsakmt_ccmd_queue_req req = {
      .hdr = VHSAKMT_CCMD(QUEUE, sizeof(struct vhsakmt_ccmd_queue_req)),
      .type = VHSAKMT_CCMD_QUEUE_CREATE,
      .create_queue_args =
          {
              .NodeId = NodeId,
              .Type = Type,
              .QueuePercentage = QueuePercentage,
              .Priority = Priority,
              .SdmaEngineId = SdmaEngineId,
              .QueueAddress = (uint64_t)QueueAddress,
              .QueueSizeInBytes = QueueSizeInBytes,
              .Event = Event ? vhsakmt_event_host_handle(Event) : 0,
              .Queue_write_ptr_aql = QueueResource->Queue_write_ptr_aql,
              .Queue_read_ptr_aql = QueueResource->Queue_read_ptr_aql,
          },
      .blob_id = vhsakmt_atomic_inc_return(&dev->next_blob_id), /* For queue resource */
      .doorbell_blob_id = vhsakmt_node_doorbell(dev, NodeId)
          ? 0
          : vhsakmt_atomic_inc_return(&dev->next_blob_id), /* For queue doorbell memory map */
  };
  int r;

  /* Queue ptr memory is allocated by hsakmtallocmemory in host then mapped into guest, but their
   * address are not aligned. */
  if (Type == HSA_QUEUE_COMPUTE_AQL) {
    r = vhsakmt_find_aql_rw_bo(dev, QueueResource->QueueWptrValue, &req.res_id);
    if (r) {
      vhsa_debug("%s: can not find the AQL queue R/W BO: %p\n", __FUNCTION__,
                 QueueResource->Queue_write_ptr_aql);
      return HSAKMT_STATUS_NO_MEMORY;
    }

    vhsa_debug("%s: create AQL queue, read ptr: %p, write ptr: %p, res id: %d\n", __FUNCTION__,
               QueueResource->Queue_read_ptr_aql, QueueResource->Queue_write_ptr_aql, req.res_id);
  } else
    /* For queue not CP AQL, it use r/w ptr by itself. */
    req.rw_ptr_blob_id = vhsakmt_atomic_inc_return(&dev->next_blob_id);

  queue_mem_bo = vhsakmt_find_bo_by_addr(dev, QueueAddress);
  if (!queue_mem_bo) {
    vhsa_err("%s: can not find the queue memory BO: %p\n", __FUNCTION__, QueueAddress);
    return HSAKMT_STATUS_NO_MEMORY;
  }
  queue_mem_bo->bo_type |= VHSA_BO_QUEUE_AQL_RW_PTR;
  req.queue_mem_res_id = queue_mem_bo->real.res_id;

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_queue_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (rsp->ret) {
    vhsa_err("%s: queue create failed, ret: %d", __FUNCTION__, rsp->ret);
    return rsp->ret;
  }

  /* Map doorbell */
  if (req.doorbell_blob_id) {
    r = vhsakmt_create_doorbell_blob_bo(
        dev, NodeId, vhsakmt_doorbell_page_size(), req.doorbell_blob_id,
        rsp->vqueue_res.host_doorbell - rsp->vqueue_res.host_doorbell_offset, &doorbell_bo);
    if (r) {
      vhsa_err("%s: doorbell create failed, doorbell: %lx\n", __FUNCTION__,
               rsp->vqueue_res.host_doorbell);
      return r;
    }
    vhsa_debug("%s: create doorbell: %p, size: 0x%x\n", __FUNCTION__, doorbell_bo->cpu_addr,
               doorbell_bo->size);
  }

  QueueResource->Queue_DoorBell_aql = (void*)rsp->vqueue_res.host_doorbell;
  vhsa_debug("%s: queue create, Doorbell: %p\n", __FUNCTION__, QueueResource->Queue_DoorBell_aql);

  /* Map R/W pointer.
   * For a queue is not a COMPUTE AQL, the R/W PTR not using the input address,
   * uses the queue memory allocated by hsakmtallocmemory, a page align address.
   */
  if (Type != HSA_QUEUE_COMPUTE_AQL) {
    r = vhsakmt_create_queue_rw_blob_bo(dev, vhsakmt_queue_page_size(), req.rw_ptr_blob_id,
                                        rsp->vqueue_res.host_rw_handle, &rw_bo_handle);
    if (r) {
      vhsa_debug("%s: queue rw ptr create failed, host addr: %p\n", __FUNCTION__,
                 (void*)rsp->vqueue_res.host_rw_handle);
      return r;
    }

    QueueResource->Queue_write_ptr_aql = VHSA_UINT64_TO_VPTR(
        VHSA_VPTR_TO_UINT64(rw_bo_handle->cpu_addr) + rsp->vqueue_res.host_write_offset);
    QueueResource->Queue_read_ptr_aql = VHSA_UINT64_TO_VPTR(
        VHSA_VPTR_TO_UINT64(rw_bo_handle->cpu_addr) + rsp->vqueue_res.host_read_offset);

    vhsa_debug("%s: queue create: write ptr gva: %p, read ptr gva: %p, base hva: %lx\n",
               __FUNCTION__, QueueResource->Queue_write_ptr_aql, QueueResource->Queue_read_ptr_aql,
               rsp->vqueue_res.host_rw_handle);
  }

  r = vhsakmt_create_queue_blob_bo(dev, QueueSizeInBytes, req.blob_id, rsp->vqueue_res.r.QueueId,
                                   rw_bo_handle, &queue_bo);
  if (r) {
    vhsa_err("%s: queue create failed, queue ID: 0x%lx\n", __FUNCTION__, rsp->vqueue_res.r.QueueId);
    return r;
  }
  QueueResource->QueueId = (uint64_t)queue_bo;
  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtCreateQueue(HSAuint32 NodeId, HSA_QUEUE_TYPE Type,
                                           HSAuint32 QueuePercentage, HSA_QUEUE_PRIORITY Priority,
                                           void* QueueAddress, HSAuint64 QueueSizeInBytes,
                                           HsaEvent* Event, HsaQueueResource* QueueResource) {
  return vhsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, VHSA_SDMA_NONE,
                               QueueAddress, QueueSizeInBytes, Event, QueueResource);
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtDestroyQueue(HSA_QUEUEID QueueId) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  int r;

  /* queue ID: vhsakmt_bo_handle -> real queue ID*/
  vhsakmt_bo_handle bo = (vhsakmt_bo_handle)QueueId;
  vhsakmt_bo_handle rw_bo = bo->rw_bo;

  r = vhsakmt_bo_free(dev, bo);
  if (rw_bo) vhsakmt_bo_free(dev, rw_bo);

  vhsa_debug("%s: queue res id: %d, queue ID: %" PRIu64 ", ret = %d\n", __FUNCTION__,
             bo->real.res_id, bo->queue_id, r);

  return r;
}


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_topology.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "hsakmt/hsakmt_virtio.h"
#include "hsakmt_virtio_device.h"

static int vhsakmt_set_sys_props(vhsakmt_device_handle dev, HsaSystemProperties* sys_props) {
  int r = 0;

  pthread_mutex_lock(&dev->vhsakmt_mutex);
  if (dev->sys_props) {
    r = 0;
    goto out;
  }

  dev->sys_props = calloc(1, sizeof(HsaSystemProperties));
  if (!dev->sys_props) {
    r = -ENOMEM;
    goto out;
  }

  memcpy(dev->sys_props, sys_props, sizeof(HsaSystemProperties));

out:
  pthread_mutex_unlock(&dev->vhsakmt_mutex);
  return r;
}

static int vhsakmt_set_node_props(vhsakmt_device_handle dev, uint32_t node,
                                  HsaNodeProperties* node_props) {
  int r = 0;
  if (!dev->sys_props) return -EINVAL;
  if (node >= dev->sys_props->NumNodes) return -EINVAL;

  pthread_mutex_lock(&dev->vhsakmt_mutex);

  if (!dev->vhsakmt_nodes) {
    dev->vhsakmt_nodes = calloc(dev->sys_props->NumNodes, sizeof(struct vhsakmt_node));
    if (!dev->vhsakmt_nodes) {
      r = -ENOMEM;
      goto out;
    }
  }

  memcpy(&dev->vhsakmt_nodes[node].node_props, node_props, sizeof(HsaNodeProperties));

out:
  pthread_mutex_unlock(&dev->vhsakmt_mutex);
  return r;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetVersion(HsaVersionInfo* v) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_VER,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  memcpy(v, &rsp->kfd_version, sizeof(HsaVersionInfo));

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtAcquireSystemProperties(HsaSystemProperties* SystemProperties) {
  CHECK_VIRTIO_KFD_OPEN();

  int r;
  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_SYS_PROP,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (!rsp) return -ENOMEM;

  memcpy(SystemProperties, &rsp->sys_props, sizeof(HsaSystemProperties));

  r = vhsakmt_set_sys_props(dev, SystemProperties);
  if (r) return r;

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtReleaseSystemProperties(void) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_REL_SYS_PROP,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (!rsp) return -ENOMEM;

  if (dev->sys_props) {
    free(dev->sys_props);
    dev->sys_props = NULL;
  }

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeProperties(HSAuint32 NodeId,
                                                 HsaNodeProperties* NodeProperties) {
  CHECK_VIRTIO_KFD_OPEN();

  int r;
  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .NodeID = NodeId,
      .type = VHSAKMT_CCMD_QUERY_GET_NODE_PROP,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (!rsp) return -ENOMEM;

  memcpy(NodeProperties, &rsp->node_props, sizeof(HsaNodeProperties));

  r = vhsakmt_set_node_props(dev, NodeId, NodeProperties);
  if (r) return r;

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetXNACKMode(HSAint32* enable) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_XNACK_MODE,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (!rsp) return -ENOMEM;

  memcpy(enable, &rsp->xnack_mode, sizeof(HSAint32));

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtRuntimeEnable(void* rDebug, bool setupTtmp) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .run_time_enable_args.setupTtmp = setupTtmp,
      .type = VHSAKMT_CCMD_QUERY_RUN_TIME_ENABLE,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (!rsp) return -ENOMEM;

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtRuntimeDisable(void) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_RUN_TIME_DISABLE,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);
  if (!rsp) return -ENOMEM;

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, HSAuint32 NumBanks,
                                                       HsaMemoryProperties* MemoryProperties) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_NOD_MEM_PROP,
      .node_mem_prop_args.NodeId = NodeId,
      .node_mem_prop_args.NumBanks = NumBanks,
  };

  rsp = vhsakmt_alloc_rsp(
      dev, &req.hdr,
      sizeof(struct vhsakmt_ccmd_query_info_rsp) + NumBanks * sizeof(HsaMemoryProperties));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  memcpy(MemoryProperties, rsp->payload, NumBanks * sizeof(HsaMemoryProperties));

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeCacheProperties(HSAuint32 NodeId, HSAuint32 ProcessorId,
                                                      HSAuint32 NumCaches,
                                                      HsaCacheProperties* CacheProperties) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_NOD_CACHE_PROP,
      .node_cache_prop_args.NodeId = NodeId,
      .node_cache_prop_args.ProcessorId = ProcessorId,
      .node_cache_prop_args.NumCaches = NumCaches,
  };

  rsp = vhsakmt_alloc_rsp(
      dev, &req.hdr,
      sizeof(struct vhsakmt_ccmd_query_info_rsp) + NumCaches * sizeof(HsaCacheProperties));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  memcpy(CacheProperties, rsp->payload, NumCaches * sizeof(HsaCacheProperties));

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId, HSAuint32 NumIoLinks,
                                                       HsaIoLinkProperties* IoLinkProperties) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_NOD_IO_LINK_PROP,
      .node_io_link_args.NodeId = NodeId,
      .node_io_link_args.NumIoLinks = NumIoLinks,
  };

  rsp = vhsakmt_alloc_rsp(
      dev, &req.hdr,
      sizeof(struct vhsakmt_ccmd_query_info_rsp) + NumIoLinks * sizeof(HsaIoLinkProperties));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  memcpy(IoLinkProperties, rsp->payload, NumIoLinks * sizeof(HsaIoLinkProperties));

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetClockCounters(HSAuint32 NodeId, HsaClockCounters* Counters) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_CLOCK_COUNTERS,
      .NodeID = NodeId,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  memcpy(Counters, &rsp->clock_counters, sizeof(HsaClockCounters));

  return rsp->ret;
}

HSAKMT_STATUS HSAKMTAPI vhsaKmtGetRuntimeCapabilities(HSAuint32* caps_mask) {
  CHECK_VIRTIO_KFD_OPEN();

  vhsakmt_device_handle dev = vhsakmt_dev();
  struct vhsakmt_ccmd_query_info_rsp* rsp;
  struct vhsakmt_ccmd_query_info_req req = {
      .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)),
      .type = VHSAKMT_CCMD_QUERY_GET_RUNTIME_CAPS,
  };

  rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp));
  if (!rsp) return -ENOMEM;

  vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__);

  *caps_mask = rsp->caps;

  return rsp->ret;
}


================================================
FILE: libhsakmt/src/virtio/hsakmt_virtio_vm.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include <sys/mman.h>
#include <unistd.h>

#include "hsakmt_virtio_device.h"

void* vhsakmt_vm_start(void) {
  void* vm_start = malloc(getpagesize());
  if (!vm_start) return NULL;

  free(vm_start);
  return vm_start;
}

int vhsakmt_reserve_va(uint64_t start, uint64_t size) {
  int32_t protFlags = PROT_NONE;
  int32_t mapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
  void* va = mmap((void*)start, size, protFlags, mapFlags, -1, 0);
  if (va == MAP_FAILED) return -ENOMEM;

  if (va != (void*)start) return -ENOMEM;

  madvise(va, size, MADV_DONTFORK);

  return 0;
}

void vhsakmt_dereserve_va(uint64_t start, uint64_t size) { munmap((void*)start, size); }

void vhsakmt_set_scratch_area(vhsakmt_device_handle dev, uint32_t node, uint64_t start,
                              uint64_t size) {
  if (!dev->vhsakmt_nodes || !dev->sys_props) return;
  if (node >= dev->sys_props->NumNodes) return;

  pthread_mutex_lock(&dev->vhsakmt_mutex);

  if (dev->vhsakmt_nodes[node].scratch_start && dev->vhsakmt_nodes[node].scratch_size) goto out;

  dev->vhsakmt_nodes[node].scratch_start = start;
  dev->vhsakmt_nodes[node].scratch_size = size;

out:
  pthread_mutex_unlock(&dev->vhsakmt_mutex);
}

bool vhsakmt_is_scratch_mem(vhsakmt_device_handle dev, void* addr) {
  uint32_t i;
  if (!dev->vhsakmt_nodes || !dev->sys_props) return false;

  for (i = 0; i < dev->sys_props->NumNodes; i++) {
    if ((uint64_t)addr >= dev->vhsakmt_nodes[i].scratch_start &&
        (uint64_t)addr <= dev->vhsakmt_nodes[i].scratch_start + dev->vhsakmt_nodes[i].scratch_size)
      return true;
  }

  return false;
}

void vhsakmt_set_vm_area(vhsakmt_device_handle dev, uint64_t start, uint64_t size) {
  pthread_mutex_lock(&dev->vhsakmt_mutex);
  if (dev->vm_start && dev->vm_size) goto out;

  dev->vm_start = start;
  dev->vm_size = size;

out:
  pthread_mutex_unlock(&dev->vhsakmt_mutex);
}

bool vhsakmt_is_userptr(vhsakmt_device_handle dev, void* addr) {
  return !((uint64_t)addr >= dev->vm_start && (uint64_t)addr <= dev->vm_start + dev->vm_size);
}

int vhsakmt_set_node_doorbell(vhsakmt_device_handle dev, uint32_t node, void* doorbell) {
  if (!dev->vhsakmt_nodes || !dev->sys_props) return -EINVAL;
  if (node >= dev->sys_props->NumNodes) return -EINVAL;

  pthread_mutex_lock(&dev->vhsakmt_mutex);

  dev->vhsakmt_nodes[node].doorbell_base = doorbell;

  pthread_mutex_unlock(&dev->vhsakmt_mutex);

  return 0;
}

void* vhsakmt_node_doorbell(vhsakmt_device_handle dev, uint32_t node) {
  if (!dev->vhsakmt_nodes || !dev->sys_props) return NULL;
  if (node >= dev->sys_props->NumNodes) return NULL;

  return dev->vhsakmt_nodes[node].doorbell_base;
}


================================================
FILE: libhsakmt/src/virtio/include/linux/virtgpu_drm.h
================================================
/*
 * Copyright 2013 Red Hat
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
#ifndef VIRTGPU_DRM_H
#define VIRTGPU_DRM_H

#include "drm.h"

#if defined(__cplusplus)
extern "C" {
#endif

/* Please note that modifications to all structs defined here are
 * subject to backwards-compatibility constraints.
 *
 * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
 * compatibility Keep fields aligned to their size
 */

#define DRM_VIRTGPU_MAP         0x01
#define DRM_VIRTGPU_EXECBUFFER  0x02
#define DRM_VIRTGPU_GETPARAM    0x03
#define DRM_VIRTGPU_RESOURCE_CREATE 0x04
#define DRM_VIRTGPU_RESOURCE_INFO     0x05
#define DRM_VIRTGPU_TRANSFER_FROM_HOST 0x06
#define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07
#define DRM_VIRTGPU_WAIT     0x08
#define DRM_VIRTGPU_GET_CAPS  0x09
#define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a
#define DRM_VIRTGPU_CONTEXT_INIT 0x0b

#define VIRTGPU_EXECBUF_FENCE_FD_IN	0x01
#define VIRTGPU_EXECBUF_FENCE_FD_OUT	0x02
#define VIRTGPU_EXECBUF_RING_IDX	0x04
#define VIRTGPU_EXECBUF_FLAGS  (\
		VIRTGPU_EXECBUF_FENCE_FD_IN |\
		VIRTGPU_EXECBUF_FENCE_FD_OUT |\
		VIRTGPU_EXECBUF_RING_IDX |\
		0)

struct drm_virtgpu_map {
	__u64 offset; /* use for mmap system call */
	__u32 handle;
	__u32 pad;
};

#define VIRTGPU_EXECBUF_SYNCOBJ_RESET		0x01
#define VIRTGPU_EXECBUF_SYNCOBJ_FLAGS ( \
		VIRTGPU_EXECBUF_SYNCOBJ_RESET | \
		0)
struct drm_virtgpu_execbuffer_syncobj {
	__u32 handle;
	__u32 flags;
	__u64 point;
};

/* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */
struct drm_virtgpu_execbuffer {
	__u32 flags;
	__u32 size;
	__u64 command; /* void* */
	__u64 bo_handles;
	__u32 num_bo_handles;
	__s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */
	__u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */
	__u32 syncobj_stride; /* size of @drm_virtgpu_execbuffer_syncobj */
	__u32 num_in_syncobjs;
	__u32 num_out_syncobjs;
	__u64 in_syncobjs;
	__u64 out_syncobjs;
};

#define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
#define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */
#define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */
#define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing  */
#define VIRTGPU_PARAM_CONTEXT_INIT 6 /* DRM_VIRTGPU_CONTEXT_INIT */
#define VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs 7 /* Bitmask of supported capability set ids */
#define VIRTGPU_PARAM_EXPLICIT_DEBUG_NAME 8 /* Ability to set debug name from userspace */

struct drm_virtgpu_getparam {
	__u64 param;
	__u64 value;
};

/* NO_BO flags? NO resource flag? */
/* resource flag for y_0_top */
struct drm_virtgpu_resource_create {
	__u32 target;
	__u32 format;
	__u32 bind;
	__u32 width;
	__u32 height;
	__u32 depth;
	__u32 array_size;
	__u32 last_level;
	__u32 nr_samples;
	__u32 flags;
	__u32 bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
	__u32 res_handle;  /* returned by kernel */
	__u32 size;        /* validate transfer in the host */
	__u32 stride;      /* validate transfer in the host */
};

struct drm_virtgpu_resource_info {
	__u32 bo_handle;
	__u32 res_handle;
	__u32 size;
	__u32 blob_mem;
};

struct drm_virtgpu_3d_box {
	__u32 x;
	__u32 y;
	__u32 z;
	__u32 w;
	__u32 h;
	__u32 d;
};

struct drm_virtgpu_3d_transfer_to_host {
	__u32 bo_handle;
	struct drm_virtgpu_3d_box box;
	__u32 level;
	__u32 offset;
	__u32 stride;
	__u32 layer_stride;
};

struct drm_virtgpu_3d_transfer_from_host {
	__u32 bo_handle;
	struct drm_virtgpu_3d_box box;
	__u32 level;
	__u32 offset;
	__u32 stride;
	__u32 layer_stride;
};

#define VIRTGPU_WAIT_NOWAIT 1 /* like it */
struct drm_virtgpu_3d_wait {
	__u32 handle; /* 0 is an invalid handle */
	__u32 flags;
};

#define VIRTGPU_DRM_CAPSET_VIRGL 1
#define VIRTGPU_DRM_CAPSET_VIRGL2 2
#define VIRTGPU_DRM_CAPSET_GFXSTREAM_VULKAN 3
#define VIRTGPU_DRM_CAPSET_VENUS 4
#define VIRTGPU_DRM_CAPSET_CROSS_DOMAIN 5
#define VIRTGPU_DRM_CAPSET_DRM 6
struct drm_virtgpu_get_caps {
	__u32 cap_set_id;
	__u32 cap_set_ver;
	__u64 addr;
	__u32 size;
	__u32 pad;
};

struct drm_virtgpu_resource_create_blob {
#define VIRTGPU_BLOB_MEM_GUEST             0x0001
#define VIRTGPU_BLOB_MEM_HOST3D            0x0002
#define VIRTGPU_BLOB_MEM_HOST3D_GUEST      0x0003

#define VIRTGPU_BLOB_FLAG_USE_MAPPABLE     0x0001
#define VIRTGPU_BLOB_FLAG_USE_SHAREABLE    0x0002
#define VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004
#define VIRTGPU_BLOB_FLAG_USE_USERPTR      0x0008
	/* zero is invalid blob_mem */
	__u32 blob_mem;
	__u32 blob_flags;
	__u32 bo_handle;
	__u32 res_handle;
	__u64 size;

	/*
	 * for 3D contexts with VIRTGPU_BLOB_MEM_HOST3D_GUEST and
	 * VIRTGPU_BLOB_MEM_HOST3D otherwise, must be zero.
	 */
	__u32 pad;
	__u32 cmd_size;
	__u64 cmd;
	__u64 blob_id;
  __u64 blob_userptr;
  __s64 offset;
};

#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID       0x0001
#define VIRTGPU_CONTEXT_PARAM_NUM_RINGS       0x0002
#define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003
#define VIRTGPU_CONTEXT_PARAM_DEBUG_NAME      0x0004
struct drm_virtgpu_context_set_param {
	__u64 param;
	__u64 value;
};

struct drm_virtgpu_context_init {
	__u32 num_params;
	__u32 pad;

	/* pointer to drm_virtgpu_context_set_param array */
	__u64 ctx_set_params;
};

/*
 * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in
 * effect.  The event size is sizeof(drm_event), since there is no additional
 * payload.
 */
#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000

#define DRM_IOCTL_VIRTGPU_MAP \
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)

#define DRM_IOCTL_VIRTGPU_EXECBUFFER \
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\
		struct drm_virtgpu_execbuffer)

#define DRM_IOCTL_VIRTGPU_GETPARAM \
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GETPARAM,\
		struct drm_virtgpu_getparam)

#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE			\
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE,	\
		struct drm_virtgpu_resource_create)

#define DRM_IOCTL_VIRTGPU_RESOURCE_INFO \
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_INFO, \
		 struct drm_virtgpu_resource_info)

#define DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST \
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_FROM_HOST,	\
		struct drm_virtgpu_3d_transfer_from_host)

#define DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST \
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_TO_HOST,	\
		struct drm_virtgpu_3d_transfer_to_host)

#define DRM_IOCTL_VIRTGPU_WAIT				\
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WAIT,	\
		struct drm_virtgpu_3d_wait)

#define DRM_IOCTL_VIRTGPU_GET_CAPS \
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \
	struct drm_virtgpu_get_caps)

#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB				\
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB,	\
		struct drm_virtgpu_resource_create_blob)

#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT					\
	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT,		\
		struct drm_virtgpu_context_init)

#if defined(__cplusplus)
}
#endif

#endif


================================================
FILE: libhsakmt/src/virtio/libhsakmt_virtio.ver
================================================
{
global:
vhsaKmtOpenKFD;
vhsaKmtCloseKFD;
vhsaKmtAllocMemory;
vhsaKmtFreeMemory;
vhsaKmtMapMemoryToGPUNodes;
vhsaKmtUnmapMemoryToGPU;
vhsaKmtAvailableMemory;
vhsaKmtMapMemoryToGPU;
vhsaKmtRegisterMemoryWithFlags;
vhsaKmtDeregisterMemory;
vhsaKmtGetVersion;
vhsaKmtAcquireSystemProperties;
vhsaKmtReleaseSystemProperties;
vhsaKmtGetNodeProperties;
vhsaKmtGetXNACKMode;
vhsaKmtRuntimeEnable;
vhsaKmtRuntimeDisable;
vhsaKmtGetNodeMemoryProperties;
vhsaKmtGetNodeCacheProperties;
vhsaKmtGetNodeIoLinkProperties;
vhsaKmtGetClockCounters;
vhsaKmtGetAMDGPUDeviceHandle;
vhsaKmtQueryPointerInfo;
vhsaKmtGetTileConfig;
vhsaKmtCreateEvent;
vhsaKmtDestroyEvent;
vhsaKmtSetEvent;
vhsaKmtResetEvent;
vhsaKmtQueryEventState;
vhsaKmtWaitOnMultipleEvents;
vhsaKmtWaitOnEvent;
vhsaKmtWaitOnEvent_Ext;
vhsaKmtWaitOnMultipleEvents_Ext;
vhsaKmtSetTrapHandler;
vhsaKmtCreateQueueExt;
vhsaKmtCreateQueue;
vhsaKmtDestroyQueue;
vhsaKmtRegisterGraphicsHandleToNodes;
vhsaKmtGetRuntimeCapabilities;
vamdgpu_query_gpu_info;
local: *;
};


================================================
FILE: libhsakmt/src/virtio/virtio_gpu.c
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include <errno.h>
#include <libsync.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <fcntl.h>

#include "virtio_gpu.h"

#define SHMEM_SZ (25 * 0x1000)

static int set_context(int fd) {
  struct drm_virtgpu_context_set_param params[] = {
      {VIRTGPU_CONTEXT_PARAM_CAPSET_ID, VIRGL_RENDERER_CAPSET_HSAKMT},
      {VIRTGPU_CONTEXT_PARAM_NUM_RINGS, 64},
  };
  struct drm_virtgpu_context_init args = {
      .num_params = ARRAY_SIZE(params),
      .ctx_set_params = (uintptr_t)(params),
  };

  return virtio_gpu_ioctl(fd, VIRTGPU_CONTEXT_INIT, &args);
}

int virtio_gpu_map_handle(struct virtio_gpu_device* vgdev, uint32_t handle, uint64_t size,
                          void** addr, void* fixed_map) {
  struct drm_virtgpu_map args = {
      .handle = handle,
  };
  int r;

  r = virtio_gpu_ioctl(vgdev->fd, VIRTGPU_MAP, &args);
  if (r) return r;

  *addr = mmap(fixed_map, size, PROT_READ | PROT_WRITE, MAP_SHARED | (fixed_map ? MAP_FIXED : 0),
               vgdev->fd, args.offset);

  if (*addr == MAP_FAILED) return -EINVAL;

  return 0;
}

void virtio_gpu_unmap(void* addr, uint64_t size) { munmap(addr, size); }

static void virtio_gpu_bo_close(struct virtio_gpu_device* vgdev, uint32_t handle) {
  struct drm_gem_close args = {
      .handle = handle,
  };

  virtio_gpu_ioctl(vgdev->fd, GEM_CLOSE, &args);
}

static int virtio_gpu_shmem_init(struct virtio_gpu_device* vgdev, size_t size) {
  struct drm_virtgpu_resource_create_blob args = {
      .blob_mem = VIRTGPU_BLOB_MEM_HOST3D,
      .blob_flags = VIRTGPU_BLOB_FLAG_USE_MAPPABLE,
      .size = size,
      .blob_id = 0,
  };

  int r = virtio_gpu_ioctl(vgdev->fd, VIRTGPU_RESOURCE_CREATE_BLOB, &args);
  if (r) return r;

  r = virtio_gpu_map_handle(vgdev, args.bo_handle, size, (void**)&vgdev->shmem, NULL);
  if (r) {
    virtio_gpu_bo_close(vgdev, args.bo_handle);
    return r;
  }

  vgdev->shmem_handle = args.bo_handle;

  uint32_t offset = vgdev->shmem->base.rsp_mem_offset;
  vgdev->rsp_mem_len = size - offset;
  vgdev->rsp_mem = &((uint8_t*)vgdev->shmem)[offset];

  return 0;
}

struct virtio_gpu_device* virtio_gpu_init(int fd, uint32_t context_id) {
  struct virtio_gpu_device* vgdev;
  int r;

  r = set_context(fd);

  if (r) return NULL;

  vgdev = calloc(1, sizeof(*vgdev));
  if (!vgdev) return NULL;

  vgdev->fd = fd;

  vgdev->reqbuf = calloc(1, SHMEM_SZ);
  if (!vgdev->reqbuf) {
    free(vgdev);
    return NULL;
  }

  r = virtio_gpu_shmem_init(vgdev, SHMEM_SZ);
  if (r) {
    free(vgdev);
    return NULL;
  }

  pthread_mutex_init(&vgdev->rsp_lock, NULL);
  pthread_mutex_init(&vgdev->eb_lock, NULL);

  return vgdev;
}

void virtio_gpu_close(struct virtio_gpu_device* vgdev) {
  virtio_gpu_unmap(vgdev->shmem, SHMEM_SZ);
  virtio_gpu_bo_close(vgdev, vgdev->shmem_handle);

  pthread_mutex_destroy(&vgdev->rsp_lock);
  pthread_mutex_destroy(&vgdev->eb_lock);

  close(vgdev->fd);
  free(vgdev->reqbuf);
  free(vgdev);
}

void* virtio_gpu_alloc_rsp(struct virtio_gpu_device* vgdev, struct virtio_gpu_ccmd_req* req,
                           uint32_t size) {
  uint32_t off;

  pthread_mutex_lock(&vgdev->rsp_lock);

  size = VHSA_ALIGN_UP(size, 8);

  if ((vgdev->next_rsp_off + size) >= vgdev->rsp_mem_len) vgdev->next_rsp_off = 0;

  off = vgdev->next_rsp_off;
  vgdev->next_rsp_off += size;

  pthread_mutex_unlock(&vgdev->rsp_lock);

  req->rsp_off = off;
  struct virtio_gpu_ccmd_rsp* rsp = (void*)&vgdev->rsp_mem[off];
  rsp->len = size;

  return rsp;
}

static int virtio_gpu_execbuffer_locked(struct virtio_gpu_device* vgdev, void* cmd,
                                        uint32_t cmd_size, uint32_t* handles, uint32_t num_handles,
                                        int* fence_fd, int ring_idx, uint32_t num_in_syncobjs,
                                        uint32_t num_out_syncobjs,
                                        struct drm_virtgpu_execbuffer_syncobj* in_syncobjs,
                                        struct drm_virtgpu_execbuffer_syncobj* out_syncobjs,
                                        bool in_fence, bool out_fence) {
  struct drm_virtgpu_execbuffer eb = {
      .flags = (out_fence ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0) |
          (in_fence ? VIRTGPU_EXECBUF_FENCE_FD_IN : 0) | VIRTGPU_EXECBUF_RING_IDX,
      .size = cmd_size,
      .command = (uintptr_t)cmd,
      .bo_handles = (uintptr_t)handles,
      .num_bo_handles = num_handles,
      .fence_fd = *fence_fd,
      .ring_idx = ring_idx,
      .syncobj_stride = sizeof(struct drm_virtgpu_execbuffer_syncobj),
      .num_in_syncobjs = num_in_syncobjs,
      .num_out_syncobjs = num_out_syncobjs,
      .in_syncobjs = (uintptr_t)in_syncobjs,
      .out_syncobjs = (uintptr_t)out_syncobjs,
  };
  int r = virtio_gpu_ioctl(vgdev->fd, VIRTGPU_EXECBUFFER, &eb);
  if (r) return r;

  if (out_fence) *fence_fd = eb.fence_fd;

  return 0;
}

static int virtio_gpu_flush_locked(struct virtio_gpu_device* vgdev, int* fence) {
  int r;

  if (!vgdev->reqbuf_len) return 0;

  r = virtio_gpu_execbuffer_locked(vgdev, vgdev->reqbuf, vgdev->reqbuf_len, NULL, 0, fence, 0, 0, 0,
                                   NULL, NULL, false, !!fence);
  if (r) return r;

  vgdev->reqbuf_len = 0;
  vgdev->reqbuf_cnt = 0;

  return 0;
}

static int virtio_gpu_add_cmd(struct virtio_gpu_device* vgdev, struct virtio_gpu_ccmd_req* req) {
  req->seqno = ++vgdev->next_seqno;
  int r;

  if (vgdev->reqbuf_len + req->len > sizeof(vgdev->reqbuf)) {
    r = virtio_gpu_flush_locked(vgdev, NULL);
    if (r) return r;
  }

  memcpy(&vgdev->reqbuf[vgdev->reqbuf_len], req, req->len);
  vgdev->reqbuf_len += req->len;
  vgdev->reqbuf_cnt++;

  return 0;
}

static inline bool fence_before(uint32_t a, uint32_t b) { return (int32_t)(a - b) < 0; }

static void virtio_gpu_seqno_sync(struct virtio_gpu_device* vgdev,
                                  struct virtio_gpu_ccmd_req* req) {
  while (fence_before(vgdev->shmem->base.seqno, req->seqno)) sched_yield();
}

int virtio_gpu_exec_cmd(struct virtio_gpu_device* vgdev, struct virtio_gpu_ccmd_req* req,
                        bool sync) {
  int r = 0;
  int fence;

  pthread_mutex_lock(&vgdev->eb_lock);

  r = virtio_gpu_add_cmd(vgdev, req);

  if (r || !sync) goto out;

  r = virtio_gpu_flush_locked(vgdev, &fence);

out:
  pthread_mutex_unlock(&vgdev->eb_lock);
  if (r) return r;

  if (sync) {
    sync_wait(fence, -1);
    close(fence);
    virtio_gpu_seqno_sync(vgdev, req);
  }

  return r;
}

int virtio_gpu_create_blob(struct virtio_gpu_device* vgdev,
                           struct drm_virtgpu_resource_create_blob* args) {
  return virtio_gpu_ioctl(vgdev->fd, VIRTGPU_RESOURCE_CREATE_BLOB, args);
}

int virtio_gpu_destroy_handle(struct virtio_gpu_device* vgdev, uint32_t bo_handle) {
  struct drm_gem_close args = {
      .handle = bo_handle,
  };

  return virtio_gpu_ioctl(vgdev->fd, GEM_CLOSE, &args);
}

int virtio_gpu_res_id(struct virtio_gpu_device* vgdev, uint32_t handle, uint32_t* res_id) {
  struct drm_virtgpu_resource_info args = {
      .bo_handle = handle,
  };
  int r = virtio_gpu_ioctl(vgdev->fd, VIRTGPU_RESOURCE_INFO, &args);
  if (r) return r;

  *res_id = args.res_handle;
  return 0;
}

static int virtio_gpu_get_capset(int fd, struct virgl_renderer_capset_hsakmt* caps) {
  struct drm_virtgpu_get_caps args = {
      .cap_set_id = VIRGL_RENDERER_CAPSET_HSAKMT,
      .cap_set_ver = 0,
      .addr = (uintptr_t)caps,
      .size = sizeof(*caps),
  };

  memset(caps, 0, sizeof(*caps));

  return virtio_gpu_ioctl(fd, VIRTGPU_GET_CAPS, &args);
}

int virtio_gpu_kfd_open(void) {
  drmDevicePtr devices[VHSA_MAX_DEVICES];
  int num_devices = 0;
  int i, fd, ret;

  num_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
  if (num_devices <= 0) return -1;

  for (i = 0; i < num_devices; i++) {
    fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
    if (fd < 0) continue;

    struct virgl_renderer_capset_hsakmt caps;
    ret = virtio_gpu_get_capset(fd, &caps);
    if (ret || caps.context_type != VIRTGPU_DRM_CONTEXT_AMDGPU) {
      close(fd);
      fd = -1;
      continue;
    }

    goto out;
  }

out:
  drmFreeDevices(devices, num_devices);
  return fd;
}


================================================
FILE: libhsakmt/src/virtio/virtio_gpu.h
================================================
/*
 * Copyright 2025 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef VIRTIO_GPU_H
#define VIRTIO_GPU_H

#include <pthread.h>
#include <stdint.h>
#include <xf86drm.h>

#include "virtgpu_drm.h"

#define VIRGL_RENDERER_CAPSET_HSAKMT 8
#define VIRTGPU_DRM_CONTEXT_AMDGPU 1
#define VHSA_MAX_DEVICES 10

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif

#define VHSA_ALIGN_UP(x, align) (((uint64_t)(x) + (align)-1) & ~(uint64_t)((align)-1))
#define VHSA_ALIGN_DOWN(x, align) ((uint64_t)(x) & ~(uint64_t)((align)-1))

#define virtio_gpu_ioctl(fd, name, args)                                                           \
  ({                                                                                               \
    int ret = drmIoctl((fd), DRM_IOCTL_##name, (args));                                            \
    ret;                                                                                           \
  })

struct virgl_renderer_capset_hsakmt {
  uint32_t wire_format_version;
  /* Underlying drm device version: */
  uint32_t version_major;
  uint32_t version_minor;
  uint32_t version_patchlevel;
  uint32_t context_type;
  uint32_t pad;
};

struct virtio_gpu_shmem_base {
  uint32_t seqno;
  uint32_t rsp_mem_offset;
};

struct virtio_gpu_ccmd_req {
  uint32_t cmd;
  uint32_t len;
  uint32_t seqno;
  uint32_t rsp_off;
};

struct virtio_gpu_ccmd_rsp {
  uint32_t len;
};

struct virtio_gpu_shmem {
  struct virtio_gpu_shmem_base base;
  uint32_t async_error;
  uint32_t global_faults;
};

#define vhsakmt_shmem virtio_gpu_shmem
#define vhsakmt_ccmd_req virtio_gpu_ccmd_req
#define vhsakmt_ccmd_rsp virtio_gpu_ccmd_rsp

struct virtio_gpu_device {
  int fd;

  struct virtio_gpu_shmem* shmem;
  uint32_t shmem_handle;

  uint8_t* rsp_mem;
  uint32_t rsp_mem_len;
  uint32_t next_rsp_off;
  pthread_mutex_t rsp_lock;
  pthread_mutex_t eb_lock;

  uint32_t next_seqno;
  uint32_t reqbuf_len;
  uint32_t reqbuf_cnt;
  uint8_t* reqbuf;
};

struct virtio_gpu_device* virtio_gpu_init(int fd, uint32_t context_id);
void virtio_gpu_close(struct virtio_gpu_device* vgdev);
int virtio_gpu_exec_cmd(struct virtio_gpu_device* vgdev, struct virtio_gpu_ccmd_req* req,
                        bool sync);
void* virtio_gpu_alloc_rsp(struct virtio_gpu_device* vgdev, struct virtio_gpu_ccmd_req* req,
                           uint32_t size);
int virtio_gpu_map_handle(struct virtio_gpu_device* vgdev, uint32_t handle, uint64_t size,
                          void** addr, void* fixed_map);
void virtio_gpu_unmap(void* addr, uint64_t size);
int virtio_gpu_create_blob(struct virtio_gpu_device* vgdev,
                           struct drm_virtgpu_resource_create_blob* args);
int virtio_gpu_destroy_handle(struct virtio_gpu_device* vgdev, uint32_t bo_handle);
int virtio_gpu_res_id(struct virtio_gpu_device* vgdev, uint32_t handle, uint32_t* res_id);
int virtio_gpu_kfd_open(void);

#endif /* VIRTIO_GPU_H */


================================================
FILE: libhsakmt/tests/kfdtest/.gitignore
================================================


================================================
FILE: libhsakmt/tests/kfdtest/CMakeLists.txt
================================================
#
# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
#

# If environment variable DRM_DIR or LIBHSAKMT_PATH is set, the script
# will pick up the corresponding libraries from those pathes.

cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

project(KFDTest)

# For DEB/RPM generation
set ( CPACK_PACKAGE_NAME "kfdtest" )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION "This package includes kfdtest, the list of excluded tests for each ASIC, and a convenience script to run the test suite" )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "Test suite for ROCK/KFD" )

# Make proper version for appending
# Default Value is 99999, setting it first
set(ROCM_VERSION_FOR_PACKAGE "99999")
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
  set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION})
endif()

set ( CPACK_PACKAGE_VERSION_MAJOR "1" )
set ( CPACK_PACKAGE_VERSION_MINOR "0" )
set ( CPACK_PACKAGE_VERSION_PATCH "0" )
set ( CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/ROCm/ROCR-Runtime/" )
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT")

## Debian package values
set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" )
if( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
  set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
endif()
## RPM package variables
set ( CPACK_RPM_PACKAGE_RELEASE "local" )
if( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
  set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
endif()

## Note: rpm --eval %{?dist} will evaluate to NULL in Debian
## So Debian distros won't append dist tag to CPACK_RPM_PACKAGE_RELEASE.
## Also for debian package name , the dist tag is added from build env
execute_process( COMMAND rpm --eval %{?dist}
                 RESULT_VARIABLE PROC_RESULT
                 OUTPUT_VARIABLE EVAL_RESULT
                 OUTPUT_STRIP_TRAILING_WHITESPACE )
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
## Add distribution tag to rpm package name
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
  string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
endif()

set(PACKAGE_VERSION_STR "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}")
set(CPACK_PACKAGE_VERSION "${PACKAGE_VERSION_STR}")

## Define default variable and variables for the optional build target hsakmt-dev
set ( SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE STRING "Location of hsakmt source code." )
set ( CMAKE_INSTALL_PREFIX "/opt/rocm"  CACHE STRING "Default installation directory." )
set ( CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}"  CACHE STRING "Default packaging prefix." )
set ( CPACK_GENERATOR "DEB;RPM"  CACHE STRING "Default packaging generators." )

# Debian package specific variables
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/ROCm/ROCR-Runtime/" )
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core" )

# RPM package specific variables
set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core")

#set ( CMAKE_VERBOSE_MAKEFILE on )

find_package(PkgConfig)

list (PREPEND CMAKE_PREFIX_PATH "${DRM_DIR}")
# The module name passed to pkg_check_modules() is determined by the
# name of file *.pc
pkg_check_modules(DRM REQUIRED libdrm)
pkg_check_modules(DRM_AMDGPU REQUIRED libdrm_amdgpu)
include_directories(${DRM_AMDGPU_INCLUDE_DIRS})

if( DEFINED ENV{LIBHSAKMT_PATH} )
    set ( LIBHSAKMT_PATH $ENV{LIBHSAKMT_PATH} )
    message ( "LIBHSAKMT_PATH environment variable is set" )
else()
    if ( ${ROCM_INSTALL_PATH} )
       set ( ENV{PKG_CONFIG_PATH} ${ROCM_INSTALL_PATH}/share/pkgconfig )
    else()
       set ( ENV{PKG_CONFIG_PATH} /opt/rocm/share/pkgconfig )
    endif()

    pkg_check_modules(HSAKMT libhsakmt)

    if( NOT HSAKMT_FOUND )
       set ( LIBHSAKMT_PATH $ENV{OUT_DIR} )
    endif()
endif()

if( DEFINED LIBHSAKMT_PATH )
    set ( HSAKMT_LIBRARY_DIRS ${LIBHSAKMT_PATH} )
    set ( HSAKMT_LIBRARIES hsakmt )
endif()

message ( "Find libhsakmt at ${HSAKMT_LIBRARY_DIRS}" )

if ( POLICY CMP0074 )
    cmake_policy( SET CMP0074 NEW )
endif()

find_path( LIGHTNING_CMAKE_DIR NAMES LLVMConfig.cmake
    PATHS $ENV{OUT_DIR}/llvm/lib/cmake/llvm NO_CACHE NO_DEFAULT_PATH)

if ( DEFINED LIGHTNING_CMAKE_DIR AND EXISTS ${LIGHTNING_CMAKE_DIR} )
    set ( LLVM_DIR ${LIGHTNING_CMAKE_DIR} )
else()
    message( STATUS "Couldn't find Lightning build in compute directory. "
        "Searching LLVM_DIR then defaulting to system LLVM install if still not found..." )
endif()

find_package( LLVM REQUIRED CONFIG )

if( ${LLVM_PACKAGE_VERSION} VERSION_LESS "7.0" )
    message( FATAL_ERROR "Requires LLVM 7.0 or greater "
        "(found ${LLVM_PACKAGE_VERSION})" )
elseif( ${LLVM_PACKAGE_VERSION} VERSION_LESS "14.0" )
    message( WARNING "Not using latest LLVM version. "
        "Some ASIC targets may not work!" )
endif()

message( STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}" )
message( STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}" )

include_directories(${LLVM_INCLUDE_DIRS})
separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
add_definitions(${LLVM_DEFINITIONS_LIST})

if (LLVM_LINK_LLVM_DYLIB)
  set(llvm_libs LLVM)
else()
  llvm_map_components_to_libnames(llvm_libs AMDGPUAsmParser Core Support)
endif()

include_directories(${PROJECT_SOURCE_DIR}/gtest-1.6.0)
include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/../../include)
include_directories(${PROJECT_SOURCE_DIR}/../../libhsakmt/include)

include_directories(${DRM_INCLUDE_DIRS})

set (SRC_FILES gtest-1.6.0/gtest-all.cpp

  src/AqlQueue.cpp
  src/BasePacket.cpp
  src/BaseDebug.cpp
  src/BaseQueue.cpp
  src/Dispatch.cpp
  src/GoogleTestExtension.cpp
  src/IndirectBuffer.cpp
  src/Assemble.cpp
  src/ShaderStore.cpp
  src/LinuxOSWrapper.cpp
  src/PM4Packet.cpp
  src/PM4Queue.cpp
  src/RDMAUtil.cpp
  src/SDMAPacket.cpp
  src/SDMAQueue.cpp
  src/KFDBaseComponentTest.cpp
  src/KFDMultiProcessTest.cpp
  src/KFDTestMain.cpp
  src/KFDTestUtil.cpp
  src/KFDTestUtilQueue.cpp

  src/KFDOpenCloseKFDTest.cpp
  src/KFDTopologyTest.cpp
  src/KFDMemoryTest.cpp
  src/KFDLocalMemoryTest.cpp
  src/KFDEventTest.cpp
  src/KFDQMTest.cpp
  src/KFDCWSRTest.cpp
  src/KFDExceptionTest.cpp
  src/KFDGraphicsInterop.cpp
  src/KFDPerfCounters.cpp
  src/KFDDBGTest.cpp
  src/KFDGWSTest.cpp
  src/KFDIPCTest.cpp
  src/KFDASMTest.cpp

  src/KFDEvictTest.cpp
  src/KFDHWSTest.cpp
  src/KFDPerformanceTest.cpp
  src/KFDPMTest.cpp
  src/KFDSVMRangeTest.cpp
  src/KFDSVMEvictTest.cpp
  src/KFDRASTest.cpp
  src/KFDPCSamplingTest.cpp
  src/KFDNegativeTest.cpp
  src/RDMATest.cpp)

message( STATUS "PROJECT_SOURCE_DIR:" ${PROJECT_SOURCE_DIR} )
#message( STATUS "SRC_FILES: ")
#foreach(file ${SRC_FILES})
#  message(STATUS "${file}")
#endforeach()

#add_definitions(-Wall -std=c++11)

if ( "${CMAKE_C_COMPILER_VERSION}" STRGREATER "4.8.0")
## Add --enable-new-dtags to generate DT_RUNPATH
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17 -Wl,--enable-new-dtags" )
endif()
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2" )
else ()
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g" )
endif ()

## Address Sanitize Flag
if ( ${ADDRESS_SANITIZER} )
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address" )
    set ( CMAKE_EXE_LINKER_FLAGS -fsanitize=address )
endif ()

# link_directories() has to be put before add_executable()
# The modules found by pkg_check_modules() in the default pkg config
# path do not need to use link_directories() here.
link_directories(${HSAKMT_LIBRARY_DIRS})

add_executable(kfdtest ${SRC_FILES})

target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} ${llvm_libs} pthread m stdc++ rt numa)

configure_file ( scripts/kfdtest.exclude kfdtest.exclude COPYONLY )
configure_file ( scripts/run_kfdtest.sh run_kfdtest.sh COPYONLY )

install( PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/kfdtest ${CMAKE_CURRENT_BINARY_DIR}/run_kfdtest.sh
	DESTINATION bin )
install( FILES ${CMAKE_CURRENT_BINARY_DIR}/kfdtest.exclude
	DESTINATION share/kfdtest )
# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake
if(NOT ROCM_DEP_ROCMCORE)
    string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
    string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
endif()
include ( CPack )


================================================
FILE: libhsakmt/tests/kfdtest/LICENSE.kfdtest
================================================
KFDTest - KFD unit tests LICENSE
Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.

MIT LICENSE:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: libhsakmt/tests/kfdtest/README.txt
================================================
1. Note on building kfdtest

To build this kfdtest application, the following libraries should be already
installed on the building machine:
libdrm libdrm_amdgpu libhsakmt

If libhsakmt is not installed, but the headers and libraries are present
locally, you can specify its directory by
export LIBHSAKMT_PATH=/path/to/libhsakmt.a
With that, CMake/make will look for the lib at LIBHSAKMT_PATH/libhsakmt.a
Note that this assumes that you will be building kfdtest from the same thunk found in ../..

2. How to run kfdtest

Just run "./run_kfdtest.sh" under the building output folder. You may need
to specify library path through:
export LD_LIBRARY_PATH=/path/to/libhsakmt.a

Note: you can use "run_kfdtest.sh -h" to see more options.


================================================
FILE: libhsakmt/tests/kfdtest/gtest-1.6.0/gtest/gtest.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the public API for Google Test.  It should be
// included by any test program that uses Google Test.
//
// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
// leave some internal implementation details in this header file.
// They are clearly marked by comments like this:
//
//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
//
// Such code is NOT meant to be used by a user directly, and is subject
// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
// program!
//
// Acknowledgment: Google Test borrowed the idea of automatic test
// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
// easyUnit framework.

#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_H_

#include <stdint.h>
#include <limits>
#include <vector>

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file declares functions and macros used internally by
// Google Test.  They are subject to change without notice.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan)
//
// Low-level types and utilities for porting Google Test to various
// platforms.  They are subject to change without notice.  DO NOT USE
// THEM IN USER CODE.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_

// The user can define the following macros in the build script to
// control Google Test's behavior.  If the user doesn't define a macro
// in this list, Google Test will define it.
//
//   GTEST_HAS_CLONE          - Define it to 1/0 to indicate that clone(2)
//                              is/isn't available.
//   GTEST_HAS_EXCEPTIONS     - Define it to 1/0 to indicate that exceptions
//                              are enabled.
//   GTEST_HAS_GLOBAL_STRING  - Define it to 1/0 to indicate that ::string
//                              is/isn't available (some systems define
//                              ::string, which is different to std::string).
//   GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string
//                              is/isn't available (some systems define
//                              ::wstring, which is different to std::wstring).
//   GTEST_HAS_POSIX_RE       - Define it to 1/0 to indicate that POSIX regular
//                              expressions are/aren't available.
//   GTEST_HAS_PTHREAD        - Define it to 1/0 to indicate that <pthread.h>
//                              is/isn't available.
//   GTEST_HAS_RTTI           - Define it to 1/0 to indicate that RTTI is/isn't
//                              enabled.
//   GTEST_HAS_STD_WSTRING    - Define it to 1/0 to indicate that
//                              std::wstring does/doesn't work (Google Test can
//                              be used where std::wstring is unavailable).
//   GTEST_HAS_TR1_TUPLE      - Define it to 1/0 to indicate tr1::tuple
//                              is/isn't available.
//   GTEST_HAS_SEH            - Define it to 1/0 to indicate whether the
//                              compiler supports Microsoft's "Structured
//                              Exception Handling".
//   GTEST_HAS_STREAM_REDIRECTION
//                            - Define it to 1/0 to indicate whether the
//                              platform supports I/O stream redirection using
//                              dup() and dup2().
//   GTEST_USE_OWN_TR1_TUPLE  - Define it to 1/0 to indicate whether Google
//                              Test's own tr1 tuple implementation should be
//                              used.  Unused when the user sets
//                              GTEST_HAS_TR1_TUPLE to 0.
//   GTEST_LINKED_AS_SHARED_LIBRARY
//                            - Define to 1 when compiling tests that use
//                              Google Test as a shared library (known as
//                              DLL on Windows).
//   GTEST_CREATE_SHARED_LIBRARY
//                            - Define to 1 when compiling Google Test itself
//                              as a shared library.

// This header defines the following utilities:
//
// Macros indicating the current platform (defined to 1 if compiled on
// the given platform; otherwise undefined):
//   GTEST_OS_AIX      - IBM AIX
//   GTEST_OS_CYGWIN   - Cygwin
//   GTEST_OS_HPUX     - HP-UX
//   GTEST_OS_LINUX    - Linux
//     GTEST_OS_LINUX_ANDROID - Google Android
//   GTEST_OS_MAC      - Mac OS X
//   GTEST_OS_NACL     - Google Native Client (NaCl)
//   GTEST_OS_SOLARIS  - Sun Solaris
//   GTEST_OS_SYMBIAN  - Symbian
//   GTEST_OS_WINDOWS  - Windows (Desktop, MinGW, or Mobile)
//     GTEST_OS_WINDOWS_DESKTOP  - Windows Desktop
//     GTEST_OS_WINDOWS_MINGW    - MinGW
//     GTEST_OS_WINDOWS_MOBILE   - Windows Mobile
//   GTEST_OS_ZOS      - z/OS
//
// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
// most stable support.  Since core members of the Google Test project
// don't have access to other platforms, support for them may be less
// stable.  If you notice any problems on your platform, please notify
// googletestframework@googlegroups.com (patches for fixing them are
// even more welcome!).
//
// Note that it is possible that none of the GTEST_OS_* macros are defined.
//
// Macros indicating available Google Test features (defined to 1 if
// the corresponding feature is supported; otherwise undefined):
//   GTEST_HAS_COMBINE      - the Combine() function (for value-parameterized
//                            tests)
//   GTEST_HAS_DEATH_TEST   - death tests
//   GTEST_HAS_PARAM_TEST   - value-parameterized tests
//   GTEST_HAS_TYPED_TEST   - typed tests
//   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
//   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used. Do not confuse with
//                            GTEST_HAS_POSIX_RE (see above) which users can
//                            define themselves.
//   GTEST_USES_SIMPLE_RE   - our own simple regex is used;
//                            the above two are mutually exclusive.
//   GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().
//
// Macros for basic C++ coding:
//   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
//   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
//                              variable don't have to be used.
//   GTEST_DISALLOW_ASSIGN_   - disables operator=.
//   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
//   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
//
// Synchronization:
//   Mutex, MutexLock, ThreadLocal, GetThreadCount()
//                  - synchronization primitives.
//   GTEST_IS_THREADSAFE - defined to 1 to indicate that the above
//                         synchronization primitives have real implementations
//                         and Google Test is thread-safe; or 0 otherwise.
//
// Template meta programming:
//   is_pointer     - as in TR1; needed on Symbian and IBM XL C/C++ only.
//   IteratorTraits - partial implementation of std::iterator_traits, which
//                    is not available in libCstd when compiled with Sun C++.
//
// Smart pointers:
//   scoped_ptr     - as in TR2.
//
// Regular expressions:
//   RE             - a simple regular expression class using the POSIX
//                    Extended Regular Expression syntax on UNIX-like
//                    platforms, or a reduced regular exception syntax on
//                    other platforms, including Windows.
//
// Logging:
//   GTEST_LOG_()   - logs messages at the specified severity level.
//   LogToStderr()  - directs all log messages to stderr.
//   FlushInfoLog() - flushes informational log messages.
//
// Stdout and stderr capturing:
//   CaptureStdout()     - starts capturing stdout.
//   GetCapturedStdout() - stops capturing stdout and returns the captured
//                         string.
//   CaptureStderr()     - starts capturing stderr.
//   GetCapturedStderr() - stops capturing stderr and returns the captured
//                         string.
//
// Integer types:
//   TypeWithSize   - maps an integer to a int type.
//   Int32, UInt32, Int64, UInt64, TimeInMillis
//                  - integers of known sizes.
//   BiggestInt     - the biggest signed integer type.
//
// Command-line utilities:
//   GTEST_FLAG()       - references a flag.
//   GTEST_DECLARE_*()  - declares a flag.
//   GTEST_DEFINE_*()   - defines a flag.
//   GetArgvs()         - returns the command line as a vector of strings.
//
// Environment variable utilities:
//   GetEnv()             - gets the value of an environment variable.
//   BoolFromGTestEnv()   - parses a bool environment variable.
//   Int32FromGTestEnv()  - parses an Int32 environment variable.
//   StringFromGTestEnv() - parses a string environment variable.

#include <ctype.h>   // for isspace, etc
#include <stddef.h>  // for ptrdiff_t
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifndef _WIN32_WCE
# include <sys/types.h>
# include <sys/stat.h>
#endif  // !_WIN32_WCE

#include <iostream>  // NOLINT
#include <sstream>  // NOLINT
#include <string>  // NOLINT

#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
#define GTEST_FLAG_PREFIX_ "gtest_"
#define GTEST_FLAG_PREFIX_DASH_ "gtest-"
#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
#define GTEST_NAME_ "Google Test"
#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/"

// Determines the version of gcc that is used to compile this.
#ifdef __GNUC__
// 40302 means version 4.3.2.
# define GTEST_GCC_VER_ \
    (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
#endif  // __GNUC__

// Determines the platform on which Google Test is compiled.
#ifdef __CYGWIN__
# define GTEST_OS_CYGWIN 1
#elif defined __SYMBIAN32__
# define GTEST_OS_SYMBIAN 1
#elif defined _WIN32
# define GTEST_OS_WINDOWS 1
# ifdef _WIN32_WCE
#  define GTEST_OS_WINDOWS_MOBILE 1
# elif defined(__MINGW__) || defined(__MINGW32__)
#  define GTEST_OS_WINDOWS_MINGW 1
# else
#  define GTEST_OS_WINDOWS_DESKTOP 1
# endif  // _WIN32_WCE
#elif defined __APPLE__
# define GTEST_OS_MAC 1
#elif defined __linux__
# define GTEST_OS_LINUX 1
# ifdef ANDROID
#  define GTEST_OS_LINUX_ANDROID 1
# endif  // ANDROID
#elif defined __MVS__
# define GTEST_OS_ZOS 1
#elif defined(__sun) && defined(__SVR4)
# define GTEST_OS_SOLARIS 1
#elif defined(_AIX)
# define GTEST_OS_AIX 1
#elif defined(__hpux)
# define GTEST_OS_HPUX 1
#elif defined __native_client__
# define GTEST_OS_NACL 1
#endif  // __CYGWIN__

// Brings in definitions for functions used in the testing::internal::posix
// namespace (read, write, close, chdir, isatty, stat). We do not currently
// use them on Windows Mobile.
#if !GTEST_OS_WINDOWS
// This assumes that non-Windows OSes provide unistd.h. For OSes where this
// is not the case, we need to include headers that provide the functions
// mentioned above.
# include <unistd.h>
# if !GTEST_OS_NACL
// TODO(vladl@google.com): Remove this condition when Native Client SDK adds
// strings.h (tracked in
// http://code.google.com/p/nativeclient/issues/detail?id=1175).
#  include <strings.h>  // Native Client doesn't provide strings.h.
# endif
#elif !GTEST_OS_WINDOWS_MOBILE
# include <direct.h>
# include <io.h>
#endif

#if defined(_MSC_VER)
# include <windows.h>
#endif

// Defines this to true iff Google Test can use POSIX regular expressions.
#ifndef GTEST_HAS_POSIX_RE
# define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
#endif

#if GTEST_HAS_POSIX_RE

// On some platforms, <regex.h> needs someone to define size_t, and
// won't compile otherwise.  We can #include it here as we already
// included <stdlib.h>, which is guaranteed to define size_t through
// <stddef.h>.
# include <regex.h>  // NOLINT

# define GTEST_USES_POSIX_RE 1

#elif GTEST_OS_WINDOWS

// <regex.h> is not available on Windows.  Use our own simple regex
// implementation instead.
# define GTEST_USES_SIMPLE_RE 1

#else

// <regex.h> may not be available on this platform.  Use our own
// simple regex implementation instead.
# define GTEST_USES_SIMPLE_RE 1

#endif  // GTEST_HAS_POSIX_RE

#ifndef GTEST_HAS_EXCEPTIONS
// The user didn't tell us whether exceptions are enabled, so we need
// to figure it out.
# if defined(_MSC_VER) || defined(__BORLANDC__)
// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS
// macro to enable exceptions, so we'll do the same.
// Assumes that exceptions are enabled by default.
#  ifndef _HAS_EXCEPTIONS
#   define _HAS_EXCEPTIONS 1
#  endif  // _HAS_EXCEPTIONS
#  define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
# elif defined(__GNUC__) && __EXCEPTIONS
// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__SUNPRO_CC)
// Sun Pro CC supports exceptions.  However, there is no compile-time way of
// detecting whether they are enabled or not.  Therefore, we assume that
// they are enabled unless the user tells us otherwise.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__IBMCPP__) && __EXCEPTIONS
// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__HP_aCC)
// Exception handling is in effect by default in HP aCC compiler. It has to
// be turned of by +noeh compiler option if desired.
#  define GTEST_HAS_EXCEPTIONS 1
# else
// For other compilers, we assume exceptions are disabled to be
// conservative.
#  define GTEST_HAS_EXCEPTIONS 0
# endif  // defined(_MSC_VER) || defined(__BORLANDC__)
#endif  // GTEST_HAS_EXCEPTIONS

#if !defined(GTEST_HAS_STD_STRING)
// Even though we don't use this macro any longer, we keep it in case
// some clients still depend on it.
# define GTEST_HAS_STD_STRING 1
#elif !GTEST_HAS_STD_STRING
// The user told us that ::std::string isn't available.
# error "Google Test cannot be used where ::std::string isn't available."
#endif  // !defined(GTEST_HAS_STD_STRING)

#ifndef GTEST_HAS_GLOBAL_STRING
// The user didn't tell us whether ::string is available, so we need
// to figure it out.

# define GTEST_HAS_GLOBAL_STRING 0

#endif  // GTEST_HAS_GLOBAL_STRING

#ifndef GTEST_HAS_STD_WSTRING
// The user didn't tell us whether ::std::wstring is available, so we need
// to figure it out.
// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
//   is available.

// Cygwin 1.7 and below doesn't support ::std::wstring.
// Solaris' libc++ doesn't support it either.  Android has
// no support for it at least as recent as Froyo (2.2).
# define GTEST_HAS_STD_WSTRING \
    (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))

#endif  // GTEST_HAS_STD_WSTRING

#ifndef GTEST_HAS_GLOBAL_WSTRING
// The user didn't tell us whether ::wstring is available, so we need
// to figure it out.
# define GTEST_HAS_GLOBAL_WSTRING \
    (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
#endif  // GTEST_HAS_GLOBAL_WSTRING

// Determines whether RTTI is available.
#ifndef GTEST_HAS_RTTI
// The user didn't tell us whether RTTI is enabled, so we need to
// figure it out.

# ifdef _MSC_VER

#  ifdef _CPPRTTI  // MSVC defines this macro iff RTTI is enabled.
#   define GTEST_HAS_RTTI 1
#  else
#   define GTEST_HAS_RTTI 0
#  endif

// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)

#  ifdef __GXX_RTTI
#   define GTEST_HAS_RTTI 1
#  else
#   define GTEST_HAS_RTTI 0
#  endif  // __GXX_RTTI

// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
// both the typeid and dynamic_cast features are present.
# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)

#  ifdef __RTTI_ALL__
#   define GTEST_HAS_RTTI 1
#  else
#   define GTEST_HAS_RTTI 0
#  endif

# else

// For all other compilers, we assume RTTI is enabled.
#  define GTEST_HAS_RTTI 1

# endif  // _MSC_VER

#endif  // GTEST_HAS_RTTI

// It's this header's responsibility to #include <typeinfo> when RTTI
// is enabled.
#if GTEST_HAS_RTTI
# include <typeinfo>
#endif

// Determines whether Google Test can use the pthreads library.
#ifndef GTEST_HAS_PTHREAD
// The user didn't tell us explicitly, so we assume pthreads support is
// available on Linux and Mac.
//
// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
// to your compiler flags.
# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX)
#endif  // GTEST_HAS_PTHREAD

#if GTEST_HAS_PTHREAD
// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
// true.
# include <pthread.h>  // NOLINT

// For timespec and nanosleep, used below.
# include <time.h>  // NOLINT
#endif

// Determines whether Google Test can use tr1/tuple.  You can define
// this macro to 0 to prevent Google Test from using tuple (any
// feature depending on tuple with be disabled in this mode).
#ifndef GTEST_HAS_TR1_TUPLE
// The user didn't tell us not to do it, so we assume it's OK.
# define GTEST_HAS_TR1_TUPLE 0
#endif  // GTEST_HAS_TR1_TUPLE

// Determines whether Google Test's own tr1 tuple implementation
// should be used.
#ifndef GTEST_USE_OWN_TR1_TUPLE
// The user didn't tell us, so we need to figure it out.

// We use our own TR1 tuple if we aren't sure the user has an
// implementation of it already.  At this time, GCC 4.0.0+ and MSVC
// 2010 are the only mainstream compilers that come with a TR1 tuple
// implementation.  NVIDIA's CUDA NVCC compiler pretends to be GCC by
// defining __GNUC__ and friends, but cannot compile GCC's tuple
// implementation.  MSVC 2008 (9.0) provides TR1 tuple in a 323 MB
// Feature Pack download, which we cannot assume the user has.
# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000)) \
    || _MSC_VER >= 1600
#  define GTEST_USE_OWN_TR1_TUPLE 0
# else
#  define GTEST_USE_OWN_TR1_TUPLE 1
# endif

#endif  // GTEST_USE_OWN_TR1_TUPLE

// To avoid conditional compilation everywhere, we make it
// gtest-port.h's responsibility to #include the header implementing
// tr1/tuple.
#if GTEST_HAS_TR1_TUPLE

# if GTEST_USE_OWN_TR1_TUPLE
// This file was GENERATED by a script.  DO NOT EDIT BY HAND!!!

// Copyright 2009 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Implements a subset of TR1 tuple needed by Google Test and Google Mock.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_

#include <utility>  // For ::std::pair.

// The compiler used in Symbian has a bug that prevents us from declaring the
// tuple template as a friend (it complains that tuple is redefined).  This
// hack bypasses the bug by declaring the members that should otherwise be
// private as public.
// Sun Studio versions < 12 also have the above bug.
#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
#else
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
    template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
   private:
#endif

// GTEST_n_TUPLE_(T) is the type of an n-tuple.
#define GTEST_0_TUPLE_(T) tuple<>
#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
    void, void, void>
#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
    void, void, void>
#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
    void, void, void>
#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
    void, void, void>
#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
    void, void, void>
#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
    void, void, void>
#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    void, void, void>
#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, void, void>
#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, T##8, void>
#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, T##8, T##9>

// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
#define GTEST_0_TYPENAMES_(T)
#define GTEST_1_TYPENAMES_(T) typename T##0
#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3
#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4
#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5
#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6
#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, \
    typename T##7, typename T##8
#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, \
    typename T##7, typename T##8, typename T##9

// In theory, defining stuff in the ::std namespace is undefined
// behavior.  We can do this as we are playing the role of a standard
// library vendor.
namespace std {
namespace tr1 {

template <typename T0 = void, typename T1 = void, typename T2 = void,
    typename T3 = void, typename T4 = void, typename T5 = void,
    typename T6 = void, typename T7 = void, typename T8 = void,
    typename T9 = void>
class tuple;

// Anything in namespace gtest_internal is Google Test's INTERNAL
// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
namespace gtest_internal {

// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
template <typename T>
struct ByRef { typedef const T& type; };  // NOLINT
template <typename T>
struct ByRef<T&> { typedef T& type; };  // NOLINT

// A handy wrapper for ByRef.
#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type

// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
// is the same as tr1::add_reference<T>::type.
template <typename T>
struct AddRef { typedef T& type; };  // NOLINT
template <typename T>
struct AddRef<T&> { typedef T& type; };  // NOLINT

// A handy wrapper for AddRef.
#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type

// A helper for implementing get<k>().
template <int k> class Get;

// A helper for implementing tuple_element<k, T>.  kIndexValid is true
// iff k < the number of fields in tuple type T.
template <bool kIndexValid, int kIndex, class Tuple>
struct TupleElement;

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 0, GTEST_10_TUPLE_(T)> { typedef T0 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 1, GTEST_10_TUPLE_(T)> { typedef T1 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 2, GTEST_10_TUPLE_(T)> { typedef T2 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 3, GTEST_10_TUPLE_(T)> { typedef T3 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 4, GTEST_10_TUPLE_(T)> { typedef T4 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 5, GTEST_10_TUPLE_(T)> { typedef T5 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 6, GTEST_10_TUPLE_(T)> { typedef T6 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 7, GTEST_10_TUPLE_(T)> { typedef T7 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 8, GTEST_10_TUPLE_(T)> { typedef T8 type; };

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 9, GTEST_10_TUPLE_(T)> { typedef T9 type; };

}  // namespace gtest_internal

template <>
class tuple<> {
 public:
  tuple() {}
  tuple(const tuple& /* t */)  {}
  tuple& operator=(const tuple& /* t */) { return *this; }
};

template <GTEST_1_TYPENAMES_(T)>
class GTEST_1_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}

  tuple(const tuple& t) : f0_(t.f0_) {}

  template <GTEST_1_TYPENAMES_(U)>
  tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_1_TYPENAMES_(U)>
  tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_1_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
    f0_ = t.f0_;
    return *this;
  }

  T0 f0_;
};

template <GTEST_2_TYPENAMES_(T)>
class GTEST_2_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
      f1_(f1) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {}

  template <GTEST_2_TYPENAMES_(U)>
  tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
  template <typename U0, typename U1>
  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_2_TYPENAMES_(U)>
  tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
    return CopyFrom(t);
  }
  template <typename U0, typename U1>
  tuple& operator=(const ::std::pair<U0, U1>& p) {
    f0_ = p.first;
    f1_ = p.second;
    return *this;
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_2_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
};

template <GTEST_3_TYPENAMES_(T)>
class GTEST_3_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}

  template <GTEST_3_TYPENAMES_(U)>
  tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_3_TYPENAMES_(U)>
  tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_3_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
};

template <GTEST_4_TYPENAMES_(T)>
class GTEST_4_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
      f3_(f3) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}

  template <GTEST_4_TYPENAMES_(U)>
  tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_4_TYPENAMES_(U)>
  tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_4_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
};

template <GTEST_5_TYPENAMES_(T)>
class GTEST_5_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
      GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_) {}

  template <GTEST_5_TYPENAMES_(U)>
  tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_5_TYPENAMES_(U)>
  tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_5_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
};

template <GTEST_6_TYPENAMES_(T)>
class GTEST_6_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
      f5_(f5) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_) {}

  template <GTEST_6_TYPENAMES_(U)>
  tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_6_TYPENAMES_(U)>
  tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_6_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
};

template <GTEST_7_TYPENAMES_(T)>
class GTEST_7_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
      f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}

  template <GTEST_7_TYPENAMES_(U)>
  tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_7_TYPENAMES_(U)>
  tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_7_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
};

template <GTEST_8_TYPENAMES_(T)>
class GTEST_8_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
      GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
      f5_(f5), f6_(f6), f7_(f7) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}

  template <GTEST_8_TYPENAMES_(U)>
  tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_8_TYPENAMES_(U)>
  tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_8_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
};

template <GTEST_9_TYPENAMES_(T)>
class GTEST_9_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
      GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
      f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}

  template <GTEST_9_TYPENAMES_(U)>
  tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_9_TYPENAMES_(U)>
  tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_9_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    f8_ = t.f8_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
  T8 f8_;
};

template <GTEST_10_TYPENAMES_(T)>
class tuple {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
      f9_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
      GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
      f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}

  template <GTEST_10_TYPENAMES_(U)>
  tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
      f9_(t.f9_) {}

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_10_TYPENAMES_(U)>
  tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_10_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    f8_ = t.f8_;
    f9_ = t.f9_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
  T8 f8_;
  T9 f9_;
};

// 6.1.3.2 Tuple creation functions.

// Known limitations: we don't support passing an
// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
// implement tie().

inline tuple<> make_tuple() { return tuple<>(); }

template <GTEST_1_TYPENAMES_(T)>
inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
  return GTEST_1_TUPLE_(T)(f0);
}

template <GTEST_2_TYPENAMES_(T)>
inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
  return GTEST_2_TUPLE_(T)(f0, f1);
}

template <GTEST_3_TYPENAMES_(T)>
inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
  return GTEST_3_TUPLE_(T)(f0, f1, f2);
}

template <GTEST_4_TYPENAMES_(T)>
inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3) {
  return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
}

template <GTEST_5_TYPENAMES_(T)>
inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4) {
  return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
}

template <GTEST_6_TYPENAMES_(T)>
inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5) {
  return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
}

template <GTEST_7_TYPENAMES_(T)>
inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
  return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
}

template <GTEST_8_TYPENAMES_(T)>
inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
  return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
}

template <GTEST_9_TYPENAMES_(T)>
inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
    const T8& f8) {
  return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
}

template <GTEST_10_TYPENAMES_(T)>
inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
    const T8& f8, const T9& f9) {
  return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
}

// 6.1.3.3 Tuple helper classes.

template <typename Tuple> struct tuple_size;

template <GTEST_0_TYPENAMES_(T)>
struct tuple_size<GTEST_0_TUPLE_(T)> { static const int value = 0; };

template <GTEST_1_TYPENAMES_(T)>
struct tuple_size<GTEST_1_TUPLE_(T)> { static const int value = 1; };

template <GTEST_2_TYPENAMES_(T)>
struct tuple_size<GTEST_2_TUPLE_(T)> { static const int value = 2; };

template <GTEST_3_TYPENAMES_(T)>
struct tuple_size<GTEST_3_TUPLE_(T)> { static const int value = 3; };

template <GTEST_4_TYPENAMES_(T)>
struct tuple_size<GTEST_4_TUPLE_(T)> { static const int value = 4; };

template <GTEST_5_TYPENAMES_(T)>
struct tuple_size<GTEST_5_TUPLE_(T)> { static const int value = 5; };

template <GTEST_6_TYPENAMES_(T)>
struct tuple_size<GTEST_6_TUPLE_(T)> { static const int value = 6; };

template <GTEST_7_TYPENAMES_(T)>
struct tuple_size<GTEST_7_TUPLE_(T)> { static const int value = 7; };

template <GTEST_8_TYPENAMES_(T)>
struct tuple_size<GTEST_8_TUPLE_(T)> { static const int value = 8; };

template <GTEST_9_TYPENAMES_(T)>
struct tuple_size<GTEST_9_TUPLE_(T)> { static const int value = 9; };

template <GTEST_10_TYPENAMES_(T)>
struct tuple_size<GTEST_10_TUPLE_(T)> { static const int value = 10; };

template <int k, class Tuple>
struct tuple_element {
  typedef typename gtest_internal::TupleElement<
      k < (tuple_size<Tuple>::value), k, Tuple>::type type;
};

#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type

// 6.1.3.4 Element access.

namespace gtest_internal {

template <>
class Get<0> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
  Field(Tuple& t) { return t.f0_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
  ConstField(const Tuple& t) { return t.f0_; }
};

template <>
class Get<1> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
  Field(Tuple& t) { return t.f1_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
  ConstField(const Tuple& t) { return t.f1_; }
};

template <>
class Get<2> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
  Field(Tuple& t) { return t.f2_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
  ConstField(const Tuple& t) { return t.f2_; }
};

template <>
class Get<3> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
  Field(Tuple& t) { return t.f3_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
  ConstField(const Tuple& t) { return t.f3_; }
};

template <>
class Get<4> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
  Field(Tuple& t) { return t.f4_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
  ConstField(const Tuple& t) { return t.f4_; }
};

template <>
class Get<5> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
  Field(Tuple& t) { return t.f5_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
  ConstField(const Tuple& t) { return t.f5_; }
};

template <>
class Get<6> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
  Field(Tuple& t) { return t.f6_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
  ConstField(const Tuple& t) { return t.f6_; }
};

template <>
class Get<7> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
  Field(Tuple& t) { return t.f7_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
  ConstField(const Tuple& t) { return t.f7_; }
};

template <>
class Get<8> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
  Field(Tuple& t) { return t.f8_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
  ConstField(const Tuple& t) { return t.f8_; }
};

template <>
class Get<9> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
  Field(Tuple& t) { return t.f9_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
  ConstField(const Tuple& t) { return t.f9_; }
};

}  // namespace gtest_internal

template <int k, GTEST_10_TYPENAMES_(T)>
GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
get(GTEST_10_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::Field(t);
}

template <int k, GTEST_10_TYPENAMES_(T)>
GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_10_TUPLE_(T)))
get(const GTEST_10_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::ConstField(t);
}

// 6.1.3.5 Relational operators

// We only implement == and !=, as we don't have a need for the rest yet.

namespace gtest_internal {

// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
// first k fields of t1 equals the first k fields of t2.
// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
// k1 != k2.
template <int kSize1, int kSize2>
struct SameSizeTuplePrefixComparator;

template <>
struct SameSizeTuplePrefixComparator<0, 0> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
    return true;
  }
};

template <int k>
struct SameSizeTuplePrefixComparator<k, k> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
    return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
        ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
  }
};

}  // namespace gtest_internal

template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
inline bool operator==(const GTEST_10_TUPLE_(T)& t,
                       const GTEST_10_TUPLE_(U)& u) {
  return gtest_internal::SameSizeTuplePrefixComparator<
      tuple_size<GTEST_10_TUPLE_(T)>::value,
      tuple_size<GTEST_10_TUPLE_(U)>::value>::Eq(t, u);
}

template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
                       const GTEST_10_TUPLE_(U)& u) { return !(t == u); }

// 6.1.4 Pairs.
// Unimplemented.

}  // namespace tr1
}  // namespace std

#undef GTEST_0_TUPLE_
#undef GTEST_1_TUPLE_
#undef GTEST_2_TUPLE_
#undef GTEST_3_TUPLE_
#undef GTEST_4_TUPLE_
#undef GTEST_5_TUPLE_
#undef GTEST_6_TUPLE_
#undef GTEST_7_TUPLE_
#undef GTEST_8_TUPLE_
#undef GTEST_9_TUPLE_
#undef GTEST_10_TUPLE_

#undef GTEST_0_TYPENAMES_
#undef GTEST_1_TYPENAMES_
#undef GTEST_2_TYPENAMES_
#undef GTEST_3_TYPENAMES_
#undef GTEST_4_TYPENAMES_
#undef GTEST_5_TYPENAMES_
#undef GTEST_6_TYPENAMES_
#undef GTEST_7_TYPENAMES_
#undef GTEST_8_TYPENAMES_
#undef GTEST_9_TYPENAMES_
#undef GTEST_10_TYPENAMES_

#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
#undef GTEST_BY_REF_
#undef GTEST_ADD_REF_
#undef GTEST_TUPLE_ELEMENT_

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
# elif GTEST_OS_SYMBIAN

// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
// use STLport's tuple implementation, which unfortunately doesn't
// work as the copy of STLport distributed with Symbian is incomplete.
// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
// use its own tuple implementation.
#  ifdef BOOST_HAS_TR1_TUPLE
#   undef BOOST_HAS_TR1_TUPLE
#  endif  // BOOST_HAS_TR1_TUPLE

// This prevents <boost/tr1/detail/config.hpp>, which defines
// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
#  define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
#  include <tuple>

# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header.  This does
// not conform to the TR1 spec, which requires the header to be <tuple>.

#  if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
// which is #included by <tr1/tuple>, to not compile when RTTI is
// disabled.  _TR1_FUNCTIONAL is the header guard for
// <tr1/functional>.  Hence the following #define is a hack to prevent
// <tr1/functional> from being included.
#   define _TR1_FUNCTIONAL 1
#   include <tr1/tuple>
#   undef _TR1_FUNCTIONAL  // Allows the user to #include
                        // <tr1/functional> if he chooses to.
#  else
#   include <tr1/tuple>  // NOLINT
#  endif  // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302

# else
// If the compiler is not GCC 4.0+, we assume the user is using a
// spec-conforming TR1 implementation.
#  include <tuple>  // NOLINT
# endif  // GTEST_USE_OWN_TR1_TUPLE

#endif  // GTEST_HAS_TR1_TUPLE

// Determines whether clone(2) is supported.
// Usually it will only be available on Linux, excluding
// Linux on the Itanium architecture.
// Also see http://linux.die.net/man/2/clone.
#ifndef GTEST_HAS_CLONE
// The user didn't tell us, so we need to figure it out.

# if GTEST_OS_LINUX && !defined(__ia64__)
#  define GTEST_HAS_CLONE 1
# else
#  define GTEST_HAS_CLONE 0
# endif  // GTEST_OS_LINUX && !defined(__ia64__)

#endif  // GTEST_HAS_CLONE

// Determines whether to support stream redirection. This is used to test
// output correctness and to implement death tests.
#ifndef GTEST_HAS_STREAM_REDIRECTION
// By default, we assume that stream redirection is supported on all
// platforms except known mobile ones.
# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN
#  define GTEST_HAS_STREAM_REDIRECTION 0
# else
#  define GTEST_HAS_STREAM_REDIRECTION 1
# endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
#endif  // GTEST_HAS_STREAM_REDIRECTION

// Determines whether to support death tests.
// Google Test does not support death tests for VC 7.1 and earlier as
// abort() in a VC 7.1 application compiled as GUI in debug config
// pops up a dialog window that cannot be suppressed programmatically.
#if (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \
     GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX)
# define GTEST_HAS_DEATH_TEST 1
# include <vector>  // NOLINT
#endif

// We don't support MSVC 7.1 with exceptions disabled now.  Therefore
// all the compilers we care about are adequate for supporting
// value-parameterized tests.
#define GTEST_HAS_PARAM_TEST 1

// Determines whether to support type-driven tests.

// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
// Sun Pro CC, IBM Visual Age, and HP aCC support.
#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
    defined(__IBMCPP__) || defined(__HP_aCC)
# define GTEST_HAS_TYPED_TEST 1
# define GTEST_HAS_TYPED_TEST_P 1
#endif

// Determines whether to support Combine(). This only makes sense when
// value-parameterized tests are enabled.  The implementation doesn't
// work on Sun Studio since it doesn't understand templated conversion
// operators.
#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC)
# define GTEST_HAS_COMBINE 1
#endif

// Determines whether the system compiler uses UTF-16 for encoding wide strings.
#define GTEST_WIDE_STRING_USES_UTF16_ \
    (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)

// Determines whether test results can be streamed to a socket.
#if GTEST_OS_LINUX
# define GTEST_CAN_STREAM_RESULTS_ 1
#endif

// Defines some utility macros.

// The GNU compiler emits a warning if nested "if" statements are followed by
// an "else" statement and braces are not used to explicitly disambiguate the
// "else" binding.  This leads to problems with code like:
//
//   if (gate)
//     ASSERT_*(condition) << "Some message";
//
// The "switch (0) case 0:" idiom is used to suppress this.
#ifdef __INTEL_COMPILER
# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
#else
# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default:  // NOLINT
#endif

// Use this annotation at the end of a struct/class definition to
// prevent the compiler from optimizing away instances that are never
// used.  This is useful when all interesting logic happens inside the
// c'tor and / or d'tor.  Example:
//
//   struct Foo {
//     Foo() { ... }
//   } GTEST_ATTRIBUTE_UNUSED_;
//
// Also use it after a variable or parameter declaration to tell the
// compiler the variable/parameter does not have to be used.
#if defined(__GNUC__) && !defined(COMPILER_ICC)
# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
#else
# define GTEST_ATTRIBUTE_UNUSED_
#endif

// A macro to disallow operator=
// This should be used in the private: declarations for a class.
#define GTEST_DISALLOW_ASSIGN_(type)\
  void operator=(type const &)

// A macro to disallow copy constructor and operator=
// This should be used in the private: declarations for a class.
#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\
  type(type const &);\
  GTEST_DISALLOW_ASSIGN_(type)

// Tell the compiler to warn about unused return values for functions declared
// with this macro.  The macro should be used on function declarations
// following the argument list:
//
//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
#else
# define GTEST_MUST_USE_RESULT_
#endif  // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC

// Determine whether the compiler supports Microsoft's Structured Exception
// Handling.  This is supported by several Windows compilers but generally
// does not exist on any other system.
#ifndef GTEST_HAS_SEH
// The user didn't tell us, so we need to figure it out.

# if defined(_MSC_VER) || defined(__BORLANDC__)
// These two compilers are known to support SEH.
#  define GTEST_HAS_SEH 1
# else
// Assume no SEH.
#  define GTEST_HAS_SEH 0
# endif

#endif  // GTEST_HAS_SEH

#ifdef _MSC_VER

# if GTEST_LINKED_AS_SHARED_LIBRARY
#  define GTEST_API_ __declspec(dllimport)
# elif GTEST_CREATE_SHARED_LIBRARY
#  define GTEST_API_ __declspec(dllexport)
# endif

#endif  // _MSC_VER

#ifndef GTEST_API_
# define GTEST_API_
#endif

#ifdef __GNUC__
// Ask the compiler to never inline a given function.
# define GTEST_NO_INLINE_ __attribute__((noinline))
#else
# define GTEST_NO_INLINE_
#endif

namespace testing {

class Message;

namespace internal {

class String;

// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
//   GTEST_COMPILE_ASSERT_(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
//                         content_type_names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
//   GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.

template <bool>
struct CompileAssert {
};

#define GTEST_COMPILE_ASSERT_(expr, msg) \
  typedef ::testing::internal::CompileAssert<(bool(expr))> \
      msg[bool(expr) ? 1 : -1]

// Implementation details of GTEST_COMPILE_ASSERT_:
//
// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1
//   elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
//    #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
//   does not work, as gcc supports variable-length arrays whose sizes
//   are determined at run-time (this is gcc's extension and not part
//   of the C++ standard).  As a result, gcc fails to reject the
//   following code with the simple definition:
//
//     int foo;
//     GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is
//                                      // not a compile-time constant.
//
// - By using the type CompileAssert<(bool(expr))>, we ensures that
//   expr is a compile-time constant.  (Template arguments must be
//   determined at compile-time.)
//
// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
//
//     CompileAssert<bool(expr)>
//
//   instead, these compilers will refuse to compile
//
//     GTEST_COMPILE_ASSERT_(5 > 0, some_message);
//
//   (They seem to think the ">" in "5 > 0" marks the end of the
//   template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
//     ((expr) ? 1 : -1).
//
//   This is to avoid running into a bug in MS VC 7.1, which
//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.

// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
//
// This template is declared, but intentionally undefined.
template <typename T1, typename T2>
struct StaticAssertTypeEqHelper;

template <typename T>
struct StaticAssertTypeEqHelper<T, T> {};

#if GTEST_HAS_GLOBAL_STRING
typedef ::string string;
#else
typedef ::std::string string;
#endif  // GTEST_HAS_GLOBAL_STRING

#if GTEST_HAS_GLOBAL_WSTRING
typedef ::wstring wstring;
#elif GTEST_HAS_STD_WSTRING
typedef ::std::wstring wstring;
#endif  // GTEST_HAS_GLOBAL_WSTRING

// A helper for suppressing warnings on constant condition.  It just
// returns 'condition'.
GTEST_API_ bool IsTrue(bool condition);

// Defines scoped_ptr.

// This implementation of scoped_ptr is PARTIAL - it only contains
// enough stuff to satisfy Google Test's need.
template <typename T>
class scoped_ptr {
 public:
  typedef T element_type;

  explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
  ~scoped_ptr() { reset(); }

  T& operator*() const { return *ptr_; }
  T* operator->() const { return ptr_; }
  T* get() const { return ptr_; }

  T* release() {
    T* const ptr = ptr_;
    ptr_ = NULL;
    return ptr;
  }

  void reset(T* p = NULL) {
    if (p != ptr_) {
      if (IsTrue(sizeof(T) > 0)) {  // Makes sure T is a complete type.
        delete ptr_;
      }
      ptr_ = p;
    }
  }
 private:
  T* ptr_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
};

// Defines RE.

// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
// Regular Expression syntax.
class GTEST_API_ RE {
 public:
  // A copy constructor is required by the Standard to initialize object
  // references from r-values.
  RE(const RE& other) { Init(other.pattern()); }

  // Constructs an RE from a string.
  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT

#if GTEST_HAS_GLOBAL_STRING

  RE(const ::string& regex) { Init(regex.c_str()); }  // NOLINT

#endif  // GTEST_HAS_GLOBAL_STRING

  RE(const char* regex) { Init(regex); }  // NOLINT
  ~RE();

  // Returns the string representation of the regex.
  const char* pattern() const { return pattern_; }

  // FullMatch(str, re) returns true iff regular expression re matches
  // the entire str.
  // PartialMatch(str, re) returns true iff regular expression re
  // matches a substring of str (including str itself).
  //
  // TODO(wan@google.com): make FullMatch() and PartialMatch() work
  // when str contains NUL characters.
  static bool FullMatch(const ::std::string& str, const RE& re) {
    return FullMatch(str.c_str(), re);
  }
  static bool PartialMatch(const ::std::string& str, const RE& re) {
    return PartialMatch(str.c_str(), re);
  }

#if GTEST_HAS_GLOBAL_STRING

  static bool FullMatch(const ::string& str, const RE& re) {
    return FullMatch(str.c_str(), re);
  }
  static bool PartialMatch(const ::string& str, const RE& re) {
    return PartialMatch(str.c_str(), re);
  }

#endif  // GTEST_HAS_GLOBAL_STRING

  static bool FullMatch(const char* str, const RE& re);
  static bool PartialMatch(const char* str, const RE& re);

 private:
  void Init(const char* regex);

  // We use a const char* instead of a string, as Google Test may be used
  // where string is not available.  We also do not use Google Test's own
  // String type here, in order to simplify dependencies between the
  // files.
  const char* pattern_;
  bool is_valid_;

#if GTEST_USES_POSIX_RE

  regex_t full_regex_;     // For FullMatch().
  regex_t partial_regex_;  // For PartialMatch().

#else  // GTEST_USES_SIMPLE_RE

  const char* full_pattern_;  // For FullMatch();

#endif

  GTEST_DISALLOW_ASSIGN_(RE);
};

// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);

// Formats a file location for compiler-independent XML output.
// Although this function is not platform dependent, we put it next to
// FormatFileLocation in order to contrast the two functions.
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
                                                               int line);

// Defines logging utilities:
//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
//                          message itself is streamed into the macro.
//   LogToStderr()  - directs all log messages to stderr.
//   FlushInfoLog() - flushes informational log messages.

enum GTestLogSeverity {
  GTEST_INFO,
  GTEST_WARNING,
  GTEST_ERROR,
  GTEST_FATAL
};

// Formats log entry severity, provides a stream object for streaming the
// log message, and terminates the message with a newline when going out of
// scope.
class GTEST_API_ GTestLog {
 public:
  GTestLog(GTestLogSeverity severity, const char* file, int line);

  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
  ~GTestLog();

  ::std::ostream& GetStream() { return ::std::cerr; }

 private:
  const GTestLogSeverity severity_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
};

#define GTEST_LOG_(severity) \
    ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
                                  __FILE__, __LINE__).GetStream()

inline void LogToStderr() {}
inline void FlushInfoLog() { fflush(NULL); }

// INTERNAL IMPLEMENTATION - DO NOT USE.
//
// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
// is not satisfied.
//  Synopsys:
//    GTEST_CHECK_(boolean_condition);
//     or
//    GTEST_CHECK_(boolean_condition) << "Additional message";
//
//    This checks the condition and if the condition is not satisfied
//    it prints message about the condition violation, including the
//    condition itself, plus additional message streamed into it, if any,
//    and then it aborts the program. It aborts the program irrespective of
//    whether it is built in the debug mode or not.
#define GTEST_CHECK_(condition) \
    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
    if (::testing::internal::IsTrue(condition)) \
      ; \
    else \
      GTEST_LOG_(FATAL) << "Condition " #condition " failed. "

// An all-mode assert to verify that the given POSIX-style function
// call returns 0 (indicating success).  Known limitation: this
// doesn't expand to a balanced 'if' statement, so enclose the macro
// in {} if you need to use it as the only statement in an 'if'
// branch.
#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
  if (const int gtest_error = (posix_call)) \
    GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
                      << gtest_error

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Use ImplicitCast_ as a safe version of static_cast for upcasting in
// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
// const Foo*).  When you use ImplicitCast_, the compiler checks that
// the cast is safe.  Such explicit ImplicitCast_s are necessary in
// surprisingly many situations where C++ demands an exact type match
// instead of an argument type convertable to a target type.
//
// The syntax for using ImplicitCast_ is the same as for static_cast:
//
//   ImplicitCast_<ToType>(expr)
//
// ImplicitCast_ would have been part of the C++ standard library,
// but the proposal was submitted too late.  It will probably make
// its way into the language in the future.
//
// This relatively ugly name is intentional. It prevents clashes with
// similar functions users may have (e.g., implicit_cast). The internal
// namespace alone is not enough because the function can be found by ADL.
template<typename To>
inline To ImplicitCast_(To x) { return x; }

// When you upcast (that is, cast a pointer from type Foo to type
// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
// always succeed.  When you downcast (that is, cast a pointer from
// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
// how do you know the pointer is really of type SubclassOfFoo?  It
// could be a bare Foo, or of type DifferentSubclassOfFoo.  Thus,
// when you downcast, you should use this macro.  In debug mode, we
// use dynamic_cast<> to double-check the downcast is legal (we die
// if it's not).  In normal mode, we do the efficient static_cast<>
// instead.  Thus, it's important to test in debug mode to make sure
// the cast is legal!
//    This is the only place in the code we should use dynamic_cast<>.
// In particular, you SHOULDN'T be using dynamic_cast<> in order to
// do RTTI (eg code like this:
//    if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
//    if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
// You should design the code some other way not to need this.
//
// This relatively ugly name is intentional. It prevents clashes with
// similar functions users may have (e.g., down_cast). The internal
// namespace alone is not enough because the function can be found by ADL.
template<typename To, typename From>  // use like this: DownCast_<T*>(foo);
inline To DownCast_(From* f) {  // so we only accept pointers
  // Ensures that To is a sub-type of From *.  This test is here only
  // for compile-time type checking, and has no overhead in an
  // optimized build at run-time, as it will be optimized away
  // completely.
  if (false) {
    const To to = NULL;
    ::testing::internal::ImplicitCast_<From*>(to);
  }

#if GTEST_HAS_RTTI
  // RTTI: debug mode only!
  GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL);
#endif
  return static_cast<To>(f);
}

// Downcasts the pointer of type Base to Derived.
// Derived must be a subclass of Base. The parameter MUST
// point to a class of type Derived, not any subclass of it.
// When RTTI is available, the function performs a runtime
// check to enforce this.
template <class Derived, class Base>
Derived* CheckedDowncastToActualType(Base* base) {
#if GTEST_HAS_RTTI
  GTEST_CHECK_(typeid(*base) == typeid(Derived));
  return dynamic_cast<Derived*>(base);  // NOLINT
#else
  return static_cast<Derived*>(base);  // Poor man's downcast.
#endif
}

#if GTEST_HAS_STREAM_REDIRECTION

// Defines the stderr capturer:
//   CaptureStdout     - starts capturing stdout.
//   GetCapturedStdout - stops capturing stdout and returns the captured string.
//   CaptureStderr     - starts capturing stderr.
//   GetCapturedStderr - stops capturing stderr and returns the captured string.
//
GTEST_API_ void CaptureStdout();
GTEST_API_ String GetCapturedStdout();
GTEST_API_ void CaptureStderr();
GTEST_API_ String GetCapturedStderr();

#endif  // GTEST_HAS_STREAM_REDIRECTION


#if GTEST_HAS_DEATH_TEST

// A copy of all command line arguments.  Set by InitGoogleTest().
extern ::std::vector<String> g_argvs;

// GTEST_HAS_DEATH_TEST implies we have ::std::string.
const ::std::vector<String>& GetArgvs();

#endif  // GTEST_HAS_DEATH_TEST

// Defines synchronization primitives.

#if GTEST_HAS_PTHREAD

// Sleeps for (roughly) n milli-seconds.  This function is only for
// testing Google Test's own constructs.  Don't use it in user tests,
// either directly or indirectly.
inline void SleepMilliseconds(int n) {
  const timespec time = {
    0,                  // 0 seconds.
    n * 1000L * 1000L,  // And n ms.
  };
  nanosleep(&time, NULL);
}

// Allows a controller thread to pause execution of newly created
// threads until notified.  Instances of this class must be created
// and destroyed in the controller thread.
//
// This class is only for testing Google Test's own constructs. Do not
// use it in user tests, either directly or indirectly.
class Notification {
 public:
  Notification() : notified_(false) {}

  // Notifies all threads created with this notification to start. Must
  // be called from the controller thread.
  void Notify() { notified_ = true; }

  // Blocks until the controller thread notifies. Must be called from a test
  // thread.
  void WaitForNotification() {
    while(!notified_) {
      SleepMilliseconds(10);
    }
  }

 private:
  volatile bool notified_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
};

// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
// Consequently, it cannot select a correct instantiation of ThreadWithParam
// in order to call its Run(). Introducing ThreadWithParamBase as a
// non-templated base class for ThreadWithParam allows us to bypass this
// problem.
class ThreadWithParamBase {
 public:
  virtual ~ThreadWithParamBase() {}
  virtual void Run() = 0;
};

// pthread_create() accepts a pointer to a function type with the C linkage.
// According to the Standard (7.5/1), function types with different linkages
// are different even if they are otherwise identical.  Some compilers (for
// example, SunStudio) treat them as different types.  Since class methods
// cannot be defined with C-linkage we need to define a free C-function to
// pass into pthread_create().
extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
  static_cast<ThreadWithParamBase*>(thread)->Run();
  return NULL;
}

// Helper class for testing Google Test's multi-threading constructs.
// To use it, write:
//
//   void ThreadFunc(int param) { /* Do things with param */ }
//   Notification thread_can_start;
//   ...
//   // The thread_can_start parameter is optional; you can supply NULL.
//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
//   thread_can_start.Notify();
//
// These classes are only for testing Google Test's own constructs. Do
// not use them in user tests, either directly or indirectly.
template <typename T>
class ThreadWithParam : public ThreadWithParamBase {
 public:
  typedef void (*UserThreadFunc)(T);

  ThreadWithParam(
      UserThreadFunc func, T param, Notification* thread_can_start)
      : func_(func),
        param_(param),
        thread_can_start_(thread_can_start),
        finished_(false) {
    ThreadWithParamBase* const base = this;
    // The thread can be created only after all fields except thread_
    // have been initialized.
    GTEST_CHECK_POSIX_SUCCESS_(
        pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
  }
  ~ThreadWithParam() { Join(); }

  void Join() {
    if (!finished_) {
      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
      finished_ = true;
    }
  }

  virtual void Run() {
    if (thread_can_start_ != NULL)
      thread_can_start_->WaitForNotification();
    func_(param_);
  }

 private:
  const UserThreadFunc func_;  // User-supplied thread function.
  const T param_;  // User-supplied parameter to the thread function.
  // When non-NULL, used to block execution until the controller thread
  // notifies.
  Notification* const thread_can_start_;
  bool finished_;  // true iff we know that the thread function has finished.
  pthread_t thread_;  // The native thread object.

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
};

// MutexBase and Mutex implement mutex on pthreads-based platforms. They
// are used in conjunction with class MutexLock:
//
//   Mutex mutex;
//   ...
//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the end
//                            // of the current scope.
//
// MutexBase implements behavior for both statically and dynamically
// allocated mutexes.  Do not use MutexBase directly.  Instead, write
// the following to define a static mutex:
//
//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
//
// You can forward declare a static mutex like this:
//
//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
//
// To create a dynamic mutex, just define an object of type Mutex.
class MutexBase {
 public:
  // Acquires this mutex.
  void Lock() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
    owner_ = pthread_self();
  }

  // Releases this mutex.
  void Unlock() {
    // We don't protect writing to owner_ here, as it's the caller's
    // responsibility to ensure that the current thread holds the
    // mutex when this is called.
    owner_ = 0;
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
  }

  // Does nothing if the current thread holds the mutex. Otherwise, crashes
  // with high probability.
  void AssertHeld() const {
    GTEST_CHECK_(owner_ == pthread_self())
        << "The current thread is not holding the mutex @" << this;
  }

  // A static mutex may be used before main() is entered.  It may even
  // be used before the dynamic initialization stage.  Therefore we
  // must be able to initialize a static mutex object at link time.
  // This means MutexBase has to be a POD and its member variables
  // have to be public.
 public:
  pthread_mutex_t mutex_;  // The underlying pthread mutex.
  pthread_t owner_;  // The thread holding the mutex; 0 means no one holds it.
};

// Forward-declares a static mutex.
# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
    extern ::testing::internal::MutexBase mutex

// Defines and statically (i.e. at link time) initializes a static mutex.
# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
    ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, 0 }

// The Mutex class can only be used for mutexes created at runtime. It
// shares its API with MutexBase otherwise.
class Mutex : public MutexBase {
 public:
  Mutex() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
    owner_ = 0;
  }
  ~Mutex() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
  }

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
};

// We cannot name this class MutexLock as the ctor declaration would
// conflict with a macro named MutexLock, which is defined on some
// platforms.  Hence the typedef trick below.
class GTestMutexLock {
 public:
  explicit GTestMutexLock(MutexBase* mutex)
      : mutex_(mutex) { mutex_->Lock(); }

  ~GTestMutexLock() { mutex_->Unlock(); }

 private:
  MutexBase* const mutex_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
};

typedef GTestMutexLock MutexLock;

// Helpers for ThreadLocal.

// pthread_key_create() requires DeleteThreadLocalValue() to have
// C-linkage.  Therefore it cannot be templatized to access
// ThreadLocal<T>.  Hence the need for class
// ThreadLocalValueHolderBase.
class ThreadLocalValueHolderBase {
 public:
  virtual ~ThreadLocalValueHolderBase() {}
};

// Called by pthread to delete thread-local data stored by
// pthread_setspecific().
extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
}

// Implements thread-local storage on pthreads-based systems.
//
//   // Thread 1
//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
//
//   // Thread 2
//   tl.set(150);  // Changes the value for thread 2 only.
//   EXPECT_EQ(150, tl.get());
//
//   // Thread 1
//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
//   tl.set(200);
//   EXPECT_EQ(200, tl.get());
//
// The template type argument T must have a public copy constructor.
// In addition, the default ThreadLocal constructor requires T to have
// a public default constructor.
//
// An object managed for a thread by a ThreadLocal instance is deleted
// when the thread exits.  Or, if the ThreadLocal instance dies in
// that thread, when the ThreadLocal dies.  It's the user's
// responsibility to ensure that all other threads using a ThreadLocal
// have exited when it dies, or the per-thread objects for those
// threads will not be deleted.
//
// Google Test only uses global ThreadLocal objects.  That means they
// will die after main() has returned.  Therefore, no per-thread
// object managed by Google Test will be leaked as long as all threads
// using Google Test have exited when main() returns.
template <typename T>
class ThreadLocal {
 public:
  ThreadLocal() : key_(CreateKey()),
                  default_() {}
  explicit ThreadLocal(const T& value) : key_(CreateKey()),
                                         default_(value) {}

  ~ThreadLocal() {
    // Destroys the managed object for the current thread, if any.
    DeleteThreadLocalValue(pthread_getspecific(key_));

    // Releases resources associated with the key.  This will *not*
    // delete managed objects for other threads.
    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
  }

  T* pointer() { return GetOrCreateValue(); }
  const T* pointer() const { return GetOrCreateValue(); }
  const T& get() const { return *pointer(); }
  void set(const T& value) { *pointer() = value; }

 private:
  // Holds a value of type T.
  class ValueHolder : public ThreadLocalValueHolderBase {
   public:
    explicit ValueHolder(const T& value) : value_(value) {}

    T* pointer() { return &value_; }

   private:
    T value_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
  };

  static pthread_key_t CreateKey() {
    pthread_key_t key;
    // When a thread exits, DeleteThreadLocalValue() will be called on
    // the object managed for that thread.
    GTEST_CHECK_POSIX_SUCCESS_(
        pthread_key_create(&key, &DeleteThreadLocalValue));
    return key;
  }

  T* GetOrCreateValue() const {
    ThreadLocalValueHolderBase* const holder =
        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
    if (holder != NULL) {
      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
    }

    ValueHolder* const new_holder = new ValueHolder(default_);
    ThreadLocalValueHolderBase* const holder_base = new_holder;
    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
    return new_holder->pointer();
  }

  // A key pthreads uses for looking up per-thread values.
  const pthread_key_t key_;
  const T default_;  // The default value for each thread.

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
};

# define GTEST_IS_THREADSAFE 1

#else  // GTEST_HAS_PTHREAD

// A dummy implementation of synchronization primitives (mutex, lock,
// and thread-local variable).  Necessary for compiling Google Test where
// mutex is not supported - using Google Test in multiple threads is not
// supported on such platforms.

class Mutex {
 public:
  Mutex():owner_(0), handle_() 
  {
    ::InitializeCriticalSection(&handle_);
  }
  
  ~Mutex()
  {
    ::DeleteCriticalSection(&handle_);
  }
  
  void Lock() 
  {
    ::EnterCriticalSection(&handle_);
    owner_ = ::GetCurrentThreadId();
  }
  
  void Unlock() 
  {
    ::LeaveCriticalSection(&handle_);
    owner_ = 0;
  }
  
 // Does nothing if the current thread holds the mutex. Otherwise, crashes
// with high probability.
  void AssertHeld() const {
    GTEST_CHECK_(owner_ == ::GetCurrentThreadId())
       << "The current thread is not holding the mutex @" << this;
  }
  
  private:
  DWORD              owner_;
  CRITICAL_SECTION   handle_;
};

# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
  static ::testing::internal::Mutex mutex

# define GTEST_DEFINE_STATIC_MUTEX_(mutex)

class GTestMutexLock {
 public:
 explicit GTestMutexLock(Mutex* inMutex) : mutex_(inMutex) {
   mutex_->Lock();
 }

 ~GTestMutexLock() {
   mutex_->Unlock();
 }
 private:
 Mutex* mutex_;
};

typedef GTestMutexLock MutexLock;

class ThreadLocalValueHolderBase {
 public:
  virtual ~ThreadLocalValueHolderBase() {}
};

extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
}

// Implements thread-local storage on windows system.
template <typename T>
class ThreadLocal {
 public:
  ThreadLocal() : key_(CreateKey()),
                  default_() {}
  explicit ThreadLocal(const T& value) : key_(CreateKey()),
                                         default_(value) {}

  ~ThreadLocal() {
    // Destroys the managed object for the current thread, if any.
    DeleteThreadLocalValue(TlsGetValue(key_));

    // Releases resources associated with the key.  This will *not*
    // delete managed objects for other threads.
    GTEST_CHECK_(TlsFree(key_) > 0);
  }

  T* pointer() { return GetOrCreateValue(); }
  const T* pointer() const { return GetOrCreateValue(); }
  const T& get() const { return *pointer(); }
  void set(const T& value) { *pointer() = value; }

 private:
  // Holds a value of type T.
  class ValueHolder : public ThreadLocalValueHolderBase {
   public:
    explicit ValueHolder(const T& value) : value_(value) {}

    T* pointer() { return &value_; }

   private:
    T value_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
  };

  static DWORD CreateKey() {
    DWORD key;
    // When a thread exits, DeleteThreadLocalValue() will be called on
    // the object managed for that thread.
    GTEST_CHECK_((key = TlsAlloc()) != TLS_OUT_OF_INDEXES);
    return key;
  }

  T* GetOrCreateValue() const {
    ThreadLocalValueHolderBase* const holder =
        static_cast<ThreadLocalValueHolderBase*>(TlsGetValue(key_));
    if (holder != NULL) {
      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
    }

    ValueHolder* const new_holder = new ValueHolder(default_);
    ThreadLocalValueHolderBase* const holder_base = new_holder;
    GTEST_CHECK_(TlsSetValue(key_, holder_base) != 0);
    return new_holder->pointer();
  }

  // A key pthreads uses for looking up per-thread values.
  const DWORD key_;
  const T default_;  // The default value for each thread.

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
};

// The above synchronization primitives have dummy implementations.
// Therefore Google Test is not thread-safe.
# define GTEST_IS_THREADSAFE 0

#endif  // GTEST_HAS_PTHREAD

// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
GTEST_API_ size_t GetThreadCount();

// Passing non-POD classes through ellipsis (...) crashes the ARM
// compiler and generates a warning in Sun Studio.  The Nokia Symbian
// and the IBM XL C/C++ compiler try to instantiate a copy constructor
// for objects passed through ellipsis (...), failing for uncopyable
// objects.  We define this to ensure that only POD is passed through
// ellipsis on these systems.
#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC)
// We lose support for NULL detection where the compiler doesn't like
// passing non-POD classes through ellipsis (...).
# define GTEST_ELLIPSIS_NEEDS_POD_ 1
#else
# define GTEST_CAN_COMPARE_NULL 1
#endif

// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
// const T& and const T* in a function template.  These compilers
// _can_ decide between class template specializations for T and T*,
// so a tr1::type_traits-like is_pointer works.
#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
# define GTEST_NEEDS_IS_POINTER_ 1
#endif

template <bool bool_value>
struct bool_constant {
  typedef bool_constant<bool_value> type;
  static const bool value = bool_value;
};
template <bool bool_value> const bool bool_constant<bool_value>::value;

typedef bool_constant<false> false_type;
typedef bool_constant<true> true_type;

template <typename T>
struct is_pointer : public false_type {};

template <typename T>
struct is_pointer<T*> : public true_type {};

template <typename Iterator>
struct IteratorTraits {
  typedef typename Iterator::value_type value_type;
};

template <typename T>
struct IteratorTraits<T*> {
  typedef T value_type;
};

template <typename T>
struct IteratorTraits<const T*> {
  typedef T value_type;
};

#if GTEST_OS_WINDOWS
# define GTEST_PATH_SEP_ "\\"
# define GTEST_HAS_ALT_PATH_SEP_ 1
// The biggest signed integer type the compiler supports.
typedef __int64 BiggestInt;
#else
# define GTEST_PATH_SEP_ "/"
# define GTEST_HAS_ALT_PATH_SEP_ 0
typedef long long BiggestInt;  // NOLINT
#endif  // GTEST_OS_WINDOWS

// Utilities for char.

// isspace(int ch) and friends accept an unsigned char or EOF.  char
// may be signed, depending on the compiler (or compiler flags).
// Therefore we need to cast a char to unsigned char before calling
// isspace(), etc.

inline bool IsAlpha(char ch) {
  return isalpha(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsAlNum(char ch) {
  return isalnum(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsDigit(char ch) {
  return isdigit(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsLower(char ch) {
  return islower(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsSpace(char ch) {
  return isspace(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsUpper(char ch) {
  return isupper(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsXDigit(char ch) {
  return isxdigit(static_cast<unsigned char>(ch)) != 0;
}

inline char ToLower(char ch) {
  return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
}
inline char ToUpper(char ch) {
  return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
}

// The testing::internal::posix namespace holds wrappers for common
// POSIX functions.  These wrappers hide the differences between
// Windows/MSVC and POSIX systems.  Since some compilers define these
// standard functions as macros, the wrapper cannot have the same name
// as the wrapped function.

namespace posix {

// Functions with a different name on Windows.

#if GTEST_OS_WINDOWS

typedef struct _stat StatStruct;

# ifdef __BORLANDC__
inline int IsATTY(int fd) { return isatty(fd); }
inline int StrCaseCmp(const char* s1, const char* s2) {
  return stricmp(s1, s2);
}
inline char* StrDup(const char* src) { return strdup(src); }
# else  // !__BORLANDC__
#  if GTEST_OS_WINDOWS_MOBILE
inline int IsATTY(int /* fd */) { return 0; }
#  else
inline int IsATTY(int fd) { return _isatty(fd); }
#  endif  // GTEST_OS_WINDOWS_MOBILE
inline int StrCaseCmp(const char* s1, const char* s2) {
  return _stricmp(s1, s2);
}
inline char* StrDup(const char* src) { return _strdup(src); }
# endif  // __BORLANDC__

# if GTEST_OS_WINDOWS_MOBILE
inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
// time and thus not defined there.
# else
inline int FileNo(FILE* file) { return _fileno(file); }
inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
inline int RmDir(const char* dir) { return _rmdir(dir); }
inline bool IsDir(const StatStruct& st) {
  return (_S_IFDIR & st.st_mode) != 0;
}
# endif  // GTEST_OS_WINDOWS_MOBILE

#else

typedef struct stat StatStruct;

inline int FileNo(FILE* file) { return fileno(file); }
inline int IsATTY(int fd) { return isatty(fd); }
inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
inline int StrCaseCmp(const char* s1, const char* s2) {
  return strcasecmp(s1, s2);
}
inline char* StrDup(const char* src) { return strdup(src); }
inline int RmDir(const char* dir) { return rmdir(dir); }
inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }

#endif  // GTEST_OS_WINDOWS

// Functions deprecated by MSVC 8.0.

#ifdef _MSC_VER
// Temporarily disable warning 4996 (deprecated function).
# pragma warning(push)
# pragma warning(disable:4996)
#endif

inline const char* StrNCpy(char* dest, const char* src, size_t n) {
  return strncpy(dest, src, n);
}

// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
// StrError() aren't needed on Windows CE at this time and thus not
// defined there.

#if !GTEST_OS_WINDOWS_MOBILE
inline int ChDir(const char* dir) { return chdir(dir); }
#endif
inline FILE* FOpen(const char* path, const char* mode) {
  return fopen(path, mode);
}
#if !GTEST_OS_WINDOWS_MOBILE
inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
  return freopen(path, mode, stream);
}
inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
#endif
inline int FClose(FILE* fp) { return fclose(fp); }
#if !GTEST_OS_WINDOWS_MOBILE
inline int Read(int fd, void* buf, unsigned int count) {
  return static_cast<int>(read(fd, buf, count));
}
inline int Write(int fd, const void* buf, unsigned int count) {
  return static_cast<int>(write(fd, buf, count));
}
inline int Close(int fd) { return close(fd); }
inline const char* StrError(int errnum) { return strerror(errnum); }
#endif
inline const char* GetEnv(const char* name) {
#if GTEST_OS_WINDOWS_MOBILE
  // We are on Windows CE, which has no environment variables.
  return NULL;
#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
  // Environment variables which we programmatically clear will be set to the
  // empty string rather than unset (NULL).  Handle that case.
  const char* const env = getenv(name);
  return (env != NULL && env[0] != '\0') ? env : NULL;
#else
  return getenv(name);
#endif
}

#ifdef _MSC_VER
# pragma warning(pop)  // Restores the warning state.
#endif

#if GTEST_OS_WINDOWS_MOBILE
// Windows CE has no C library. The abort() function is used in
// several places in Google Test. This implementation provides a reasonable
// imitation of standard behaviour.
void Abort();
#else
inline void Abort() { abort(); }
#endif  // GTEST_OS_WINDOWS_MOBILE

}  // namespace posix

// The maximum number a BiggestInt can represent.  This definition
// works no matter BiggestInt is represented in one's complement or
// two's complement.
//
// We cannot rely on numeric_limits in STL, as __int64 and long long
// are not part of standard C++ and numeric_limits doesn't need to be
// defined for them.
const BiggestInt kMaxBiggestInt =
    ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));

// This template class serves as a compile-time function from size to
// type.  It maps a size in bytes to a primitive type with that
// size. e.g.
//
//   TypeWithSize<4>::UInt
//
// is typedef-ed to be unsigned int (unsigned integer made up of 4
// bytes).
//
// Such functionality should belong to STL, but I cannot find it
// there.
//
// Google Test uses this class in the implementation of floating-point
// comparison.
//
// For now it only handles UInt (unsigned int) as that's all Google Test
// needs.  Other types can be easily added in the future if need
// arises.
template <size_t size>
class TypeWithSize {
 public:
  // This prevents the user from using TypeWithSize<N> with incorrect
  // values of N.
  typedef void UInt;
};

// The specialization for size 4.
template <>
class TypeWithSize<4> {
 public:
  // unsigned int has size 4 in both gcc and MSVC.
  //
  // As base/basictypes.h doesn't compile on Windows, we cannot use
  // uint32, uint64, and etc here.
  typedef int Int;
  typedef unsigned int UInt;
};

// The specialization for size 8.
template <>
class TypeWithSize<8> {
 public:

#if GTEST_OS_WINDOWS
  typedef __int64 Int;
  typedef unsigned __int64 UInt;
#else
  typedef long long Int;  // NOLINT
  typedef unsigned long long UInt;  // NOLINT
#endif  // GTEST_OS_WINDOWS
};

// Integer types of known sizes.
typedef TypeWithSize<4>::Int Int32;
typedef TypeWithSize<4>::UInt UInt32;
typedef TypeWithSize<8>::Int Int64;
typedef TypeWithSize<8>::UInt UInt64;
typedef TypeWithSize<8>::Int TimeInMillis;  // Represents time in milliseconds.

// Utilities for command line flags and environment variables.

// Macro for referencing flags.
#define GTEST_FLAG(name) FLAGS_gtest_##name

// Macros for declaring flags.
#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
#define GTEST_DECLARE_int32_(name) \
    GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
#define GTEST_DECLARE_string_(name) \
    GTEST_API_ extern ::testing::internal::String GTEST_FLAG(name)

// Macros for defining flags.
#define GTEST_DEFINE_bool_(name, default_val, doc) \
    GTEST_API_ bool GTEST_FLAG(name) = (default_val)
#define GTEST_DEFINE_int32_(name, default_val, doc) \
    GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
#define GTEST_DEFINE_string_(name, default_val, doc) \
    GTEST_API_ ::testing::internal::String GTEST_FLAG(name) = (default_val)

// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
// to *value and returns true; otherwise leaves *value unchanged and returns
// false.
// TODO(chandlerc): Find a better way to refactor flag and environment parsing
// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
// function.
bool ParseInt32(const Message& src_text, const char* str, Int32* value);

// Parses a bool/Int32/string from the environment variable
// corresponding to the given Google Test flag.
bool BoolFromGTestEnv(const char* flag, bool default_val);
GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
const char* StringFromGTestEnv(const char* flag, const char* default_val);

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_

#if GTEST_OS_LINUX
# include <stdlib.h>
# include <sys/types.h>
# include <sys/wait.h>
# include <unistd.h>
#endif  // GTEST_OS_LINUX

#include <ctype.h>
#include <string.h>
#include <iomanip>
#include <limits>
#include <set>

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file declares the String class and functions used internally by
// Google Test.  They are subject to change without notice. They should not used
// by code external to Google Test.
//
// This header file is #included by <gtest/internal/gtest-internal.h>.
// It should not be #included by other files.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_

#ifdef __BORLANDC__
// string.h is not guaranteed to provide strcpy on C++ Builder.
# include <mem.h>
#endif

#include <string.h>

#include <string>

namespace testing {
namespace internal {

// String - a UTF-8 string class.
//
// For historic reasons, we don't use std::string.
//
// TODO(wan@google.com): replace this class with std::string or
// implement it in terms of the latter.
//
// Note that String can represent both NULL and the empty string,
// while std::string cannot represent NULL.
//
// NULL and the empty string are considered different.  NULL is less
// than anything (including the empty string) except itself.
//
// This class only provides minimum functionality necessary for
// implementing Google Test.  We do not intend to implement a full-fledged
// string class here.
//
// Since the purpose of this class is to provide a substitute for
// std::string on platforms where it cannot be used, we define a copy
// constructor and assignment operators such that we don't need
// conditional compilation in a lot of places.
//
// In order to make the representation efficient, the d'tor of String
// is not virtual.  Therefore DO NOT INHERIT FROM String.
class GTEST_API_ String {
 public:
  // Static utility methods

  // Returns the input enclosed in double quotes if it's not NULL;
  // otherwise returns "(null)".  For example, "\"Hello\"" is returned
  // for input "Hello".
  //
  // This is useful for printing a C string in the syntax of a literal.
  //
  // Known issue: escape sequences are not handled yet.
  static String ShowCStringQuoted(const char* c_str);

  // Clones a 0-terminated C string, allocating memory using new.  The
  // caller is responsible for deleting the return value using
  // delete[].  Returns the cloned string, or NULL if the input is
  // NULL.
  //
  // This is different from strdup() in string.h, which allocates
  // memory using malloc().
  static const char* CloneCString(const char* c_str);

#if GTEST_OS_WINDOWS_MOBILE
  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
  // able to pass strings to Win32 APIs on CE we need to convert them
  // to 'Unicode', UTF-16.

  // Creates a UTF-16 wide string from the given ANSI string, allocating
  // memory using new. The caller is responsible for deleting the return
  // value using delete[]. Returns the wide string, or NULL if the
  // input is NULL.
  //
  // The wide string is created using the ANSI codepage (CP_ACP) to
  // match the behaviour of the ANSI versions of Win32 calls and the
  // C runtime.
  static LPCWSTR AnsiToUtf16(const char* c_str);

  // Creates an ANSI string from the given wide string, allocating
  // memory using new. The caller is responsible for deleting the return
  // value using delete[]. Returns the ANSI string, or NULL if the
  // input is NULL.
  //
  // The returned string is created using the ANSI codepage (CP_ACP) to
  // match the behaviour of the ANSI versions of Win32 calls and the
  // C runtime.
  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
#endif

  // Compares two C strings.  Returns true iff they have the same content.
  //
  // Unlike strcmp(), this function can handle NULL argument(s).  A
  // NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool CStringEquals(const char* lhs, const char* rhs);

  // Converts a wide C string to a String using the UTF-8 encoding.
  // NULL will be converted to "(null)".  If an error occurred during
  // the conversion, "(failed to convert from wide string)" is
  // returned.
  static String ShowWideCString(const wchar_t* wide_c_str);

  // Similar to ShowWideCString(), except that this function encloses
  // the converted string in double quotes.
  static String ShowWideCStringQuoted(const wchar_t* wide_c_str);

  // Compares two wide C strings.  Returns true iff they have the same
  // content.
  //
  // Unlike wcscmp(), this function can handle NULL argument(s).  A
  // NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);

  // Compares two C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike strcasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool CaseInsensitiveCStringEquals(const char* lhs,
                                           const char* rhs);

  // Compares two wide C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike wcscasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL wide C string,
  // including the empty string.
  // NB: The implementations on different platforms slightly differ.
  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
  // environment variable. On GNU platform this method uses wcscasecmp
  // which compares according to LC_CTYPE category of the current locale.
  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
  // current locale.
  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
                                               const wchar_t* rhs);

  // Formats a list of arguments to a String, using the same format
  // spec string as for printf.
  //
  // We do not use the StringPrintf class as it is not universally
  // available.
  //
  // The result is limited to 4096 characters (including the tailing
  // 0).  If 4096 characters are not enough to format the input,
  // "<buffer exceeded>" is returned.
  static String Format(const char* format, ...);

  // C'tors

  // The default c'tor constructs a NULL string.
  String() : c_str_(NULL), length_(0) {}

  // Constructs a String by cloning a 0-terminated C string.
  String(const char* a_c_str) {  // NOLINT
    if (a_c_str == NULL) {
      c_str_ = NULL;
      length_ = 0;
    } else {
      ConstructNonNull(a_c_str, strlen(a_c_str));
    }
  }

  // Constructs a String by copying a given number of chars from a
  // buffer.  E.g. String("hello", 3) creates the string "hel",
  // String("a\0bcd", 4) creates "a\0bc", String(NULL, 0) creates "",
  // and String(NULL, 1) results in access violation.
  String(const char* buffer, size_t a_length) {
    ConstructNonNull(buffer, a_length);
  }

  // The copy c'tor creates a new copy of the string.  The two
  // String objects do not share content.
  String(const String& str) : c_str_(NULL), length_(0) { *this = str; }

  // D'tor.  String is intended to be a final class, so the d'tor
  // doesn't need to be virtual.
  ~String() { delete[] c_str_; }

  // Allows a String to be implicitly converted to an ::std::string or
  // ::string, and vice versa.  Converting a String containing a NULL
  // pointer to ::std::string or ::string is undefined behavior.
  // Converting a ::std::string or ::string containing an embedded NUL
  // character to a String will result in the prefix up to the first
  // NUL character.
  String(const ::std::string& str) {
    ConstructNonNull(str.c_str(), str.length());
  }

  operator ::std::string() const { return ::std::string(c_str(), length()); }

#if GTEST_HAS_GLOBAL_STRING
  String(const ::string& str) {
    ConstructNonNull(str.c_str(), str.length());
  }

  operator ::string() const { return ::string(c_str(), length()); }
#endif  // GTEST_HAS_GLOBAL_STRING

  // Returns true iff this is an empty string (i.e. "").
  bool empty() const { return (c_str() != NULL) && (length() == 0); }

  // Compares this with another String.
  // Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0
  // if this is greater than rhs.
  int Compare(const String& rhs) const;

  // Returns true iff this String equals the given C string.  A NULL
  // string and a non-NULL string are considered not equal.
  bool operator==(const char* a_c_str) const { return Compare(a_c_str) == 0; }

  // Returns true iff this String is less than the given String.  A
  // NULL string is considered less than "".
  bool operator<(const String& rhs) const { return Compare(rhs) < 0; }

  // Returns true iff this String doesn't equal the given C string.  A NULL
  // string and a non-NULL string are considered not equal.
  bool operator!=(const char* a_c_str) const { return !(*this == a_c_str); }

  // Returns true iff this String ends with the given suffix.  *Any*
  // String is considered to end with a NULL or empty suffix.
  bool EndsWith(const char* suffix) const;

  // Returns true iff this String ends with the given suffix, not considering
  // case. Any String is considered to end with a NULL or empty suffix.
  bool EndsWithCaseInsensitive(const char* suffix) const;

  // Returns the length of the encapsulated string, or 0 if the
  // string is NULL.
  size_t length() const { return length_; }

  // Gets the 0-terminated C string this String object represents.
  // The String object still owns the string.  Therefore the caller
  // should NOT delete the return value.
  const char* c_str() const { return c_str_; }

  // Assigns a C string to this object.  Self-assignment works.
  const String& operator=(const char* a_c_str) {
    return *this = String(a_c_str);
  }

  // Assigns a String object to this object.  Self-assignment works.
  const String& operator=(const String& rhs) {
    if (this != &rhs) {
      delete[] c_str_;
      if (rhs.c_str() == NULL) {
        c_str_ = NULL;
        length_ = 0;
      } else {
        ConstructNonNull(rhs.c_str(), rhs.length());
      }
    }

    return *this;
  }

 private:
  // Constructs a non-NULL String from the given content.  This
  // function can only be called when c_str_ has not been allocated.
  // ConstructNonNull(NULL, 0) results in an empty string ("").
  // ConstructNonNull(NULL, non_zero) is undefined behavior.
  void ConstructNonNull(const char* buffer, size_t a_length) {
    char* const str = new char[a_length + 1];
    memcpy(str, buffer, a_length);
    str[a_length] = '\0';
    c_str_ = str;
    length_ = a_length;
  }

  const char* c_str_;
  size_t length_;
};  // class String

// Streams a String to an ostream.  Each '\0' character in the String
// is replaced with "\\0".
inline ::std::ostream& operator<<(::std::ostream& os, const String& str) {
  if (str.c_str() == NULL) {
    os << "(null)";
  } else {
    const char* const c_str = str.c_str();
    for (size_t i = 0; i != str.length(); i++) {
      if (c_str[i] == '\0') {
        os << "\\0";
      } else {
        os << c_str[i];
      }
    }
  }
  return os;
}

// Gets the content of the stringstream's buffer as a String.  Each '\0'
// character in the buffer is replaced with "\\0".
GTEST_API_ String StringStreamToString(::std::stringstream* stream);

// Converts a streamable value to a String.  A NULL pointer is
// converted to "(null)".  When the input value is a ::string,
// ::std::string, ::wstring, or ::std::wstring object, each NUL
// character in it is replaced with "\\0".

// Declared here but defined in gtest.h, so that it has access
// to the definition of the Message class, required by the ARM
// compiler.
template <typename T>
String StreamableToString(const T& streamable);

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: keith.ray@gmail.com (Keith Ray)
//
// Google Test filepath utilities
//
// This header file declares classes and functions used internally by
// Google Test.  They are subject to change without notice.
//
// This file is #included in <gtest/internal/gtest-internal.h>.
// Do not include this header file separately!

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_


namespace testing {
namespace internal {

// FilePath - a class for file and directory pathname manipulation which
// handles platform-specific conventions (like the pathname separator).
// Used for helper functions for naming files in a directory for xml output.
// Except for Set methods, all methods are const or static, which provides an
// "immutable value object" -- useful for peace of mind.
// A FilePath with a value ending in a path separator ("like/this/") represents
// a directory, otherwise it is assumed to represent a file. In either case,
// it may or may not represent an actual file or directory in the file system.
// Names are NOT checked for syntax correctness -- no checking for illegal
// characters, malformed paths, etc.

class GTEST_API_ FilePath {
 public:
  FilePath() : pathname_("") { }
  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }

  explicit FilePath(const char* pathname) : pathname_(pathname) {
    Normalize();
  }

  explicit FilePath(const String& pathname) : pathname_(pathname) {
    Normalize();
  }

  FilePath& operator=(const FilePath& rhs) {
    Set(rhs);
    return *this;
  }

  void Set(const FilePath& rhs) {
    pathname_ = rhs.pathname_;
  }

  String ToString() const { return pathname_; }
  const char* c_str() const { return pathname_.c_str(); }

  // Returns the current working directory, or "" if unsuccessful.
  static FilePath GetCurrentDir();

  // Given directory = "dir", base_name = "test", number = 0,
  // extension = "xml", returns "dir/test.xml". If number is greater
  // than zero (e.g., 12), returns "dir/test_12.xml".
  // On Windows platform, uses \ as the separator rather than /.
  static FilePath MakeFileName(const FilePath& directory,
                               const FilePath& base_name,
                               int number,
                               const char* extension);

  // Given directory = "dir", relative_path = "test.xml",
  // returns "dir/test.xml".
  // On Windows, uses \ as the separator rather than /.
  static FilePath ConcatPaths(const FilePath& directory,
                              const FilePath& relative_path);

  // Returns a pathname for a file that does not currently exist. The pathname
  // will be directory/base_name.extension or
  // directory/base_name_<number>.extension if directory/base_name.extension
  // already exists. The number will be incremented until a pathname is found
  // that does not already exist.
  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
  // There could be a race condition if two or more processes are calling this
  // function at the same time -- they could both pick the same filename.
  static FilePath GenerateUniqueFileName(const FilePath& directory,
                                         const FilePath& base_name,
                                         const char* extension);

  // Returns true iff the path is NULL or "".
  bool IsEmpty() const { return c_str() == NULL || *c_str() == '\0'; }

  // If input name has a trailing separator character, removes it and returns
  // the name, otherwise return the name string unmodified.
  // On Windows platform, uses \ as the separator, other platforms use /.
  FilePath RemoveTrailingPathSeparator() const;

  // Returns a copy of the FilePath with the directory part removed.
  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
  // returns an empty FilePath ("").
  // On Windows platform, '\' is the path separator, otherwise it is '/'.
  FilePath RemoveDirectoryName() const;

  // RemoveFileName returns the directory path with the filename removed.
  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
  // On Windows platform, '\' is the path separator, otherwise it is '/'.
  FilePath RemoveFileName() const;

  // Returns a copy of the FilePath with the case-insensitive extension removed.
  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
  // FilePath("dir/file"). If a case-insensitive extension is not
  // found, returns a copy of the original FilePath.
  FilePath RemoveExtension(const char* extension) const;

  // Creates directories so that path exists. Returns true if successful or if
  // the directories already exist; returns false if unable to create
  // directories for any reason. Will also return false if the FilePath does
  // not represent a directory (that is, it doesn't end with a path separator).
  bool CreateDirectoriesRecursively() const;

  // Create the directory so that path exists. Returns true if successful or
  // if the directory already exists; returns false if unable to create the
  // directory for any reason, including if the parent directory does not
  // exist. Not named "CreateDirectory" because that's a macro on Windows.
  bool CreateFolder() const;

  // Returns true if FilePath describes something in the file-system,
  // either a file, directory, or whatever, and that something exists.
  bool FileOrDirectoryExists() const;

  // Returns true if pathname describes a directory in the file-system
  // that exists.
  bool DirectoryExists() const;

  // Returns true if FilePath ends with a path separator, which indicates that
  // it is intended to represent a directory. Returns false otherwise.
  // This does NOT check that a directory (or file) actually exists.
  bool IsDirectory() const;

  // Returns true if pathname describes a root directory. (Windows has one
  // root directory per disk drive.)
  bool IsRootDirectory() const;

  // Returns true if pathname describes an absolute path.
  bool IsAbsolutePath() const;

 private:
  // Replaces multiple consecutive separators with a single separator.
  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
  // redundancies that might be in a pathname involving "." or "..".
  //
  // A pathname with multiple consecutive separators may occur either through
  // user error or as a result of some scripts or APIs that generate a pathname
  // with a trailing separator. On other platforms the same API or script
  // may NOT generate a pathname with a trailing "/". Then elsewhere that
  // pathname may have another "/" and pathname components added to it,
  // without checking for the separator already being there.
  // The script language and operating system may allow paths like "foo//bar"
  // but some of the functions in FilePath will not handle that correctly. In
  // particular, RemoveTrailingPathSeparator() only removes one separator, and
  // it is called in CreateDirectoriesRecursively() assuming that it will change
  // a pathname from directory syntax (trailing separator) to filename syntax.
  //
  // On Windows this method also replaces the alternate path separator '/' with
  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
  // "bar\\foo".

  void Normalize();

  // Returns a pointer to the last occurence of a valid path separator in
  // the FilePath. On Windows, for example, both '/' and '\' are valid path
  // separators. Returns NULL if no path separator was found.
  const char* FindLastPathSeparator() const;

  String pathname_;
};  // class FilePath

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
// This file was GENERATED by command:
//     pump.py gtest-type-util.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Type utilities needed for implementing typed and type-parameterized
// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently we support at most 50 types in a list, and at most 50
// type-parameterized tests in one type-parameterized test case.
// Please contact googletestframework@googlegroups.com if you need
// more.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_


// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
// libstdc++ (which is where cxxabi.h comes from).
# ifdef __GLIBCXX__
#  include <cxxabi.h>
# elif defined(__HP_aCC)
#  include <acxx_demangle.h>
# endif  // __GLIBCXX__

namespace testing {
namespace internal {

// GetTypeName<T>() returns a human-readable name of type T.
// NB: This function is also used in Google Mock, so don't move it inside of
// the typed-test-only section below.
template <typename T>
String GetTypeName() {
# if GTEST_HAS_RTTI

  const char* const name = typeid(T).name();
#  if defined(__GLIBCXX__) || defined(__HP_aCC)
  int status = 0;
  // gcc's implementation of typeid(T).name() mangles the type name,
  // so we have to demangle it.
#   ifdef __GLIBCXX__
  using abi::__cxa_demangle;
#   endif // __GLIBCXX__
  char* const readable_name = __cxa_demangle(name, 0, 0, &status);
  const String name_str(status == 0 ? readable_name : name);
  free(readable_name);
  return name_str;
#  else
  return name;
#  endif  // __GLIBCXX__ || __HP_aCC

# else

  return "<type>";

# endif  // GTEST_HAS_RTTI
}

#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
// type.  This can be used as a compile-time assertion to ensure that
// two types are equal.

template <typename T1, typename T2>
struct AssertTypeEq;

template <typename T>
struct AssertTypeEq<T, T> {
  typedef bool type;
};

// A unique type used as the default value for the arguments of class
// template Types.  This allows us to simulate variadic templates
// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
// support directly.
struct None {};

// The following family of struct and struct templates are used to
// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
// represents a type list with N types (T1, T2, ..., and TN) in it.
// Except for Types0, every struct in the family has two member types:
// Head for the first type in the list, and Tail for the rest of the
// list.

// The empty type list.
struct Types0 {};

// Type lists of length 1, 2, 3, and so on.

template <typename T1>
struct Types1 {
  typedef T1 Head;
  typedef Types0 Tail;
};
template <typename T1, typename T2>
struct Types2 {
  typedef T1 Head;
  typedef Types1<T2> Tail;
};

template <typename T1, typename T2, typename T3>
struct Types3 {
  typedef T1 Head;
  typedef Types2<T2, T3> Tail;
};

template <typename T1, typename T2, typename T3, typename T4>
struct Types4 {
  typedef T1 Head;
  typedef Types3<T2, T3, T4> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5>
struct Types5 {
  typedef T1 Head;
  typedef Types4<T2, T3, T4, T5> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
struct Types6 {
  typedef T1 Head;
  typedef Types5<T2, T3, T4, T5, T6> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
struct Types7 {
  typedef T1 Head;
  typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
struct Types8 {
  typedef T1 Head;
  typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
struct Types9 {
  typedef T1 Head;
  typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
struct Types10 {
  typedef T1 Head;
  typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
struct Types11 {
  typedef T1 Head;
  typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
struct Types12 {
  typedef T1 Head;
  typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
struct Types13 {
  typedef T1 Head;
  typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
struct Types14 {
  typedef T1 Head;
  typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
struct Types15 {
  typedef T1 Head;
  typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
struct Types16 {
  typedef T1 Head;
  typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
struct Types17 {
  typedef T1 Head;
  typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
struct Types18 {
  typedef T1 Head;
  typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
struct Types19 {
  typedef T1 Head;
  typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
struct Types20 {
  typedef T1 Head;
  typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
struct Types21 {
  typedef T1 Head;
  typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
struct Types22 {
  typedef T1 Head;
  typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
struct Types23 {
  typedef T1 Head;
  typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
struct Types24 {
  typedef T1 Head;
  typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
struct Types25 {
  typedef T1 Head;
  typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
struct Types26 {
  typedef T1 Head;
  typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
struct Types27 {
  typedef T1 Head;
  typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
struct Types28 {
  typedef T1 Head;
  typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
struct Types29 {
  typedef T1 Head;
  typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
struct Types30 {
  typedef T1 Head;
  typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
struct Types31 {
  typedef T1 Head;
  typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
struct Types32 {
  typedef T1 Head;
  typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
struct Types33 {
  typedef T1 Head;
  typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
struct Types34 {
  typedef T1 Head;
  typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
struct Types35 {
  typedef T1 Head;
  typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
struct Types36 {
  typedef T1 Head;
  typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
struct Types37 {
  typedef T1 Head;
  typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
struct Types38 {
  typedef T1 Head;
  typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
struct Types39 {
  typedef T1 Head;
  typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
struct Types40 {
  typedef T1 Head;
  typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
struct Types41 {
  typedef T1 Head;
  typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
struct Types42 {
  typedef T1 Head;
  typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
struct Types43 {
  typedef T1 Head;
  typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
struct Types44 {
  typedef T1 Head;
  typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
struct Types45 {
  typedef T1 Head;
  typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
struct Types46 {
  typedef T1 Head;
  typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
struct Types47 {
  typedef T1 Head;
  typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
struct Types48 {
  typedef T1 Head;
  typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47, T48> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
struct Types49 {
  typedef T1 Head;
  typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47, T48, T49> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
struct Types50 {
  typedef T1 Head;
  typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
      T44, T45, T46, T47, T48, T49, T50> Tail;
};


}  // namespace internal

// We don't want to require the users to write TypesN<...> directly,
// as that would require them to count the length.  Types<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Types<int>
// will appear as Types<int, None, None, ..., None> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Types<T1, ..., TN>, and Google Test will translate
// that to TypesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Types template.
template <typename T1 = internal::None, typename T2 = internal::None,
    typename T3 = internal::None, typename T4 = internal::None,
    typename T5 = internal::None, typename T6 = internal::None,
    typename T7 = internal::None, typename T8 = internal::None,
    typename T9 = internal::None, typename T10 = internal::None,
    typename T11 = internal::None, typename T12 = internal::None,
    typename T13 = internal::None, typename T14 = internal::None,
    typename T15 = internal::None, typename T16 = internal::None,
    typename T17 = internal::None, typename T18 = internal::None,
    typename T19 = internal::None, typename T20 = internal::None,
    typename T21 = internal::None, typename T22 = internal::None,
    typename T23 = internal::None, typename T24 = internal::None,
    typename T25 = internal::None, typename T26 = internal::None,
    typename T27 = internal::None, typename T28 = internal::None,
    typename T29 = internal::None, typename T30 = internal::None,
    typename T31 = internal::None, typename T32 = internal::None,
    typename T33 = internal::None, typename T34 = internal::None,
    typename T35 = internal::None, typename T36 = internal::None,
    typename T37 = internal::None, typename T38 = internal::None,
    typename T39 = internal::None, typename T40 = internal::None,
    typename T41 = internal::None, typename T42 = internal::None,
    typename T43 = internal::None, typename T44 = internal::None,
    typename T45 = internal::None, typename T46 = internal::None,
    typename T47 = internal::None, typename T48 = internal::None,
    typename T49 = internal::None, typename T50 = internal::None>
struct Types {
  typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
};

template <>
struct Types<internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types0 type;
};
template <typename T1>
struct Types<T1, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types1<T1> type;
};
template <typename T1, typename T2>
struct Types<T1, T2, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types2<T1, T2> type;
};
template <typename T1, typename T2, typename T3>
struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types3<T1, T2, T3> type;
};
template <typename T1, typename T2, typename T3, typename T4>
struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types4<T1, T2, T3, T4> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5>
struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types5<T1, T2, T3, T4, T5> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None, internal::None> {
  typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    internal::None, internal::None, internal::None, internal::None,
    internal::None, internal::None> {
  typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    internal::None, internal::None, internal::None, internal::None,
    internal::None> {
  typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, T47, internal::None, internal::None, internal::None> {
  typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, T47, T48, internal::None, internal::None> {
  typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
    T46, T47, T48, T49, internal::None> {
  typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
};

namespace internal {

# define GTEST_TEMPLATE_ template <typename T> class

// The template "selector" struct TemplateSel<Tmpl> is used to
// represent Tmpl, which must be a class template with one type
// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
// as the type Tmpl<T>.  This allows us to actually instantiate the
// template "selected" by TemplateSel<Tmpl>.
//
// This trick is necessary for simulating typedef for class templates,
// which C++ doesn't support directly.
template <GTEST_TEMPLATE_ Tmpl>
struct TemplateSel {
  template <typename T>
  struct Bind {
    typedef Tmpl<T> type;
  };
};

# define GTEST_BIND_(TmplSel, T) \
  TmplSel::template Bind<T>::type

// A unique struct template used as the default value for the
// arguments of class template Templates.  This allows us to simulate
// variadic templates (e.g. Templates<int>, Templates<int, double>,
// and etc), which C++ doesn't support directly.
template <typename T>
struct NoneT {};

// The following family of struct and struct templates are used to
// represent template lists.  In particular, TemplatesN<T1, T2, ...,
// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
// for Templates0, every struct in the family has two member types:
// Head for the selector of the first template in the list, and Tail
// for the rest of the list.

// The empty template list.
struct Templates0 {};

// Template lists of length 1, 2, 3, and so on.

template <GTEST_TEMPLATE_ T1>
struct Templates1 {
  typedef TemplateSel<T1> Head;
  typedef Templates0 Tail;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
struct Templates2 {
  typedef TemplateSel<T1> Head;
  typedef Templates1<T2> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
struct Templates3 {
  typedef TemplateSel<T1> Head;
  typedef Templates2<T2, T3> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4>
struct Templates4 {
  typedef TemplateSel<T1> Head;
  typedef Templates3<T2, T3, T4> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
struct Templates5 {
  typedef TemplateSel<T1> Head;
  typedef Templates4<T2, T3, T4, T5> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
struct Templates6 {
  typedef TemplateSel<T1> Head;
  typedef Templates5<T2, T3, T4, T5, T6> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7>
struct Templates7 {
  typedef TemplateSel<T1> Head;
  typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
struct Templates8 {
  typedef TemplateSel<T1> Head;
  typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
struct Templates9 {
  typedef TemplateSel<T1> Head;
  typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10>
struct Templates10 {
  typedef TemplateSel<T1> Head;
  typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
struct Templates11 {
  typedef TemplateSel<T1> Head;
  typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
struct Templates12 {
  typedef TemplateSel<T1> Head;
  typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13>
struct Templates13 {
  typedef TemplateSel<T1> Head;
  typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
struct Templates14 {
  typedef TemplateSel<T1> Head;
  typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
struct Templates15 {
  typedef TemplateSel<T1> Head;
  typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16>
struct Templates16 {
  typedef TemplateSel<T1> Head;
  typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
struct Templates17 {
  typedef TemplateSel<T1> Head;
  typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
struct Templates18 {
  typedef TemplateSel<T1> Head;
  typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19>
struct Templates19 {
  typedef TemplateSel<T1> Head;
  typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
struct Templates20 {
  typedef TemplateSel<T1> Head;
  typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
struct Templates21 {
  typedef TemplateSel<T1> Head;
  typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22>
struct Templates22 {
  typedef TemplateSel<T1> Head;
  typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
struct Templates23 {
  typedef TemplateSel<T1> Head;
  typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
struct Templates24 {
  typedef TemplateSel<T1> Head;
  typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25>
struct Templates25 {
  typedef TemplateSel<T1> Head;
  typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
struct Templates26 {
  typedef TemplateSel<T1> Head;
  typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
struct Templates27 {
  typedef TemplateSel<T1> Head;
  typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28>
struct Templates28 {
  typedef TemplateSel<T1> Head;
  typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
struct Templates29 {
  typedef TemplateSel<T1> Head;
  typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
struct Templates30 {
  typedef TemplateSel<T1> Head;
  typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31>
struct Templates31 {
  typedef TemplateSel<T1> Head;
  typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
struct Templates32 {
  typedef TemplateSel<T1> Head;
  typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
struct Templates33 {
  typedef TemplateSel<T1> Head;
  typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34>
struct Templates34 {
  typedef TemplateSel<T1> Head;
  typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
struct Templates35 {
  typedef TemplateSel<T1> Head;
  typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
struct Templates36 {
  typedef TemplateSel<T1> Head;
  typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37>
struct Templates37 {
  typedef TemplateSel<T1> Head;
  typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
struct Templates38 {
  typedef TemplateSel<T1> Head;
  typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
struct Templates39 {
  typedef TemplateSel<T1> Head;
  typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40>
struct Templates40 {
  typedef TemplateSel<T1> Head;
  typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
struct Templates41 {
  typedef TemplateSel<T1> Head;
  typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
struct Templates42 {
  typedef TemplateSel<T1> Head;
  typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43>
struct Templates43 {
  typedef TemplateSel<T1> Head;
  typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
struct Templates44 {
  typedef TemplateSel<T1> Head;
  typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
struct Templates45 {
  typedef TemplateSel<T1> Head;
  typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46>
struct Templates46 {
  typedef TemplateSel<T1> Head;
  typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
struct Templates47 {
  typedef TemplateSel<T1> Head;
  typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
struct Templates48 {
  typedef TemplateSel<T1> Head;
  typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47, T48> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
    GTEST_TEMPLATE_ T49>
struct Templates49 {
  typedef TemplateSel<T1> Head;
  typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47, T48, T49> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
    GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
struct Templates50 {
  typedef TemplateSel<T1> Head;
  typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
      T43, T44, T45, T46, T47, T48, T49, T50> Tail;
};


// We don't want to require the users to write TemplatesN<...> directly,
// as that would require them to count the length.  Templates<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Templates<list>
// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Templates<T1, ..., TN>, and Google Test will translate
// that to TemplatesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Templates template.
template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
    GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
    GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
    GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
    GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
    GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
    GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
    GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
    GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
    GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
    GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
    GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
    GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
    GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
    GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
    GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
    GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
    GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
    GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
    GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
    GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
    GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
    GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
    GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
    GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
struct Templates {
  typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
};

template <>
struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates0 type;
};
template <GTEST_TEMPLATE_ T1>
struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates1<T1> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates2<T1, T2> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates3<T1, T2, T3> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4>
struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates4<T1, T2, T3, T4> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates5<T1, T2, T3, T4, T5> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates6<T1, T2, T3, T4, T5, T6> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7>
struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT> {
  typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT> {
  typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT> {
  typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT> {
  typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT> {
  typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT> {
  typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, T47, NoneT, NoneT, NoneT> {
  typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, T47, T48, NoneT, NoneT> {
  typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47, T48> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
    GTEST_TEMPLATE_ T49>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
    T45, T46, T47, T48, T49, NoneT> {
  typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
      T42, T43, T44, T45, T46, T47, T48, T49> type;
};

// The TypeList template makes it possible to use either a single type
// or a Types<...> list in TYPED_TEST_CASE() and
// INSTANTIATE_TYPED_TEST_CASE_P().

template <typename T>
struct TypeList { typedef Types1<T> type; };

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48, T49, T50> > {
  typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
};

#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_

// Due to C++ preprocessor weirdness, we need double indirection to
// concatenate two tokens when one of them is __LINE__.  Writing
//
//   foo ## __LINE__
//
// will result in the token foo__LINE__, instead of foo followed by
// the current line number.  For more details, see
// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar

// Google Test defines the testing::Message class to allow construction of
// test messages via the << operator.  The idea is that anything
// streamable to std::ostream can be streamed to a testing::Message.
// This allows a user to use his own types in Google Test assertions by
// overloading the << operator.
//
// util/gtl/stl_logging-inl.h overloads << for STL containers.  These
// overloads cannot be defined in the std namespace, as that will be
// undefined behavior.  Therefore, they are defined in the global
// namespace instead.
//
// C++'s symbol lookup rule (i.e. Koenig lookup) says that these
// overloads are visible in either the std namespace or the global
// namespace, but not other namespaces, including the testing
// namespace which Google Test's Message class is in.
//
// To allow STL containers (and other types that has a << operator
// defined in the global namespace) to be used in Google Test assertions,
// testing::Message must access the custom << operator from the global
// namespace.  Hence this helper function.
//
// Note: Jeffrey Yasskin suggested an alternative fix by "using
// ::operator<<;" in the definition of Message's operator<<.  That fix
// doesn't require a helper function, but unfortunately doesn't
// compile with MSVC.
template <typename T>
inline void GTestStreamToHelper(std::ostream* os, const T& val) {
  *os << val;
}

class ProtocolMessage;
namespace proto2 { class Message; }

namespace testing {

// Forward declarations.

class AssertionResult;                 // Result of an assertion.
class Message;                         // Represents a failure message.
class Test;                            // Represents a test.
class TestInfo;                        // Information about a test.
class TestPartResult;                  // Result of a test part.
class UnitTest;                        // A collection of test cases.

template <typename T>
::std::string PrintToString(const T& value);

namespace internal {

struct TraceInfo;                      // Information about a trace point.
class ScopedTrace;                     // Implements scoped trace.
class TestInfoImpl;                    // Opaque implementation of TestInfo
class UnitTestImpl;                    // Opaque implementation of UnitTest

// How many times InitGoogleTest() has been called.
extern int g_init_gtest_count;

// The text used in failure messages to indicate the start of the
// stack trace.
GTEST_API_ extern const char kStackTraceMarker[];

// A secret type that Google Test users don't know about.  It has no
// definition on purpose.  Therefore it's impossible to create a
// Secret object, which is what we want.
class Secret;

// Two overloaded helpers for checking at compile time whether an
// expression is a null pointer literal (i.e. NULL or any 0-valued
// compile-time integral constant).  Their return values have
// different sizes, so we can use sizeof() to test which version is
// picked by the compiler.  These helpers have no implementations, as
// we only need their signatures.
//
// Given IsNullLiteralHelper(x), the compiler will pick the first
// version if x can be implicitly converted to Secret*, and pick the
// second version otherwise.  Since Secret is a secret and incomplete
// type, the only expression a user can write that has type Secret* is
// a null pointer literal.  Therefore, we know that x is a null
// pointer literal if and only if the first version is picked by the
// compiler.
char IsNullLiteralHelper(Secret* p);
char (&IsNullLiteralHelper(...))[2];  // NOLINT

// A compile-time bool constant that is true if and only if x is a
// null pointer literal (i.e. NULL or any 0-valued compile-time
// integral constant).
#ifdef GTEST_ELLIPSIS_NEEDS_POD_
// We lose support for NULL detection where the compiler doesn't like
// passing non-POD classes through ellipsis (...).
# define GTEST_IS_NULL_LITERAL_(x) false
#else
# define GTEST_IS_NULL_LITERAL_(x) \
    (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
#endif  // GTEST_ELLIPSIS_NEEDS_POD_

// Appends the user-supplied message to the Google-Test-generated message.
GTEST_API_ String AppendUserMessage(const String& gtest_msg,
                                    const Message& user_msg);

// A helper class for creating scoped traces in user programs.
class GTEST_API_ ScopedTrace {
 public:
  // The c'tor pushes the given source file location and message onto
  // a trace stack maintained by Google Test.
  ScopedTrace(const char* file, int line, const Message& message);

  // The d'tor pops the info pushed by the c'tor.
  //
  // Note that the d'tor is not virtual in order to be efficient.
  // Don't inherit from ScopedTrace!
  ~ScopedTrace();

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
} GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
                            // c'tor and d'tor.  Therefore it doesn't
                            // need to be used otherwise.

// Converts a streamable value to a String.  A NULL pointer is
// converted to "(null)".  When the input value is a ::string,
// ::std::string, ::wstring, or ::std::wstring object, each NUL
// character in it is replaced with "\\0".
// Declared here but defined in gtest.h, so that it has access
// to the definition of the Message class, required by the ARM
// compiler.
template <typename T>
String StreamableToString(const T& streamable);

// The Symbian compiler has a bug that prevents it from selecting the
// correct overload of FormatForComparisonFailureMessage (see below)
// unless we pass the first argument by reference.  If we do that,
// however, Visual Age C++ 10.1 generates a compiler error.  Therefore
// we only apply the work-around for Symbian.
#if defined(__SYMBIAN32__)
# define GTEST_CREF_WORKAROUND_ const&
#else
# define GTEST_CREF_WORKAROUND_
#endif

// When this operand is a const char* or char*, if the other operand
// is a ::std::string or ::string, we print this operand as a C string
// rather than a pointer (we do the same for wide strings); otherwise
// we print it as a pointer to be safe.

// This internal macro is used to avoid duplicated code.
#define GTEST_FORMAT_IMPL_(operand2_type, operand1_printer)\
inline String FormatForComparisonFailureMessage(\
    operand2_type::value_type* GTEST_CREF_WORKAROUND_ str, \
    const operand2_type& /*operand2*/) {\
  return operand1_printer(str);\
}\
inline String FormatForComparisonFailureMessage(\
    const operand2_type::value_type* GTEST_CREF_WORKAROUND_ str, \
    const operand2_type& /*operand2*/) {\
  return operand1_printer(str);\
}

GTEST_FORMAT_IMPL_(::std::string, String::ShowCStringQuoted)
#if GTEST_HAS_STD_WSTRING
GTEST_FORMAT_IMPL_(::std::wstring, String::ShowWideCStringQuoted)
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_GLOBAL_STRING
GTEST_FORMAT_IMPL_(::string, String::ShowCStringQuoted)
#endif  // GTEST_HAS_GLOBAL_STRING
#if GTEST_HAS_GLOBAL_WSTRING
GTEST_FORMAT_IMPL_(::wstring, String::ShowWideCStringQuoted)
#endif  // GTEST_HAS_GLOBAL_WSTRING

#undef GTEST_FORMAT_IMPL_

// The next four overloads handle the case where the operand being
// printed is a char/wchar_t pointer and the other operand is not a
// string/wstring object.  In such cases, we just print the operand as
// a pointer to be safe.
#define GTEST_FORMAT_CHAR_PTR_IMPL_(CharType)                       \
  template <typename T>                                             \
  String FormatForComparisonFailureMessage(CharType* GTEST_CREF_WORKAROUND_ p, \
                                           const T&) { \
    return PrintToString(static_cast<const void*>(p));              \
  }

GTEST_FORMAT_CHAR_PTR_IMPL_(char)
GTEST_FORMAT_CHAR_PTR_IMPL_(const char)
GTEST_FORMAT_CHAR_PTR_IMPL_(wchar_t)
GTEST_FORMAT_CHAR_PTR_IMPL_(const wchar_t)

#undef GTEST_FORMAT_CHAR_PTR_IMPL_

// Constructs and returns the message for an equality assertion
// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
//
// The first four parameters are the expressions used in the assertion
// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
// where foo is 5 and bar is 6, we have:
//
//   expected_expression: "foo"
//   actual_expression:   "bar"
//   expected_value:      "5"
//   actual_value:        "6"
//
// The ignoring_case parameter is true iff the assertion is a
// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
// be inserted into the message.
GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
                                     const char* actual_expression,
                                     const String& expected_value,
                                     const String& actual_value,
                                     bool ignoring_case);

// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
GTEST_API_ String GetBoolAssertionFailureMessage(
    const AssertionResult& assertion_result,
    const char* expression_text,
    const char* actual_predicate_value,
    const char* expected_predicate_value);

// This template class represents an IEEE floating-point number
// (either single-precision or double-precision, depending on the
// template parameters).
//
// The purpose of this class is to do more sophisticated number
// comparison.  (Due to round-off error, etc, it's very unlikely that
// two floating-points will be equal exactly.  Hence a naive
// comparison by the == operation often doesn't work.)
//
// Format of IEEE floating-point:
//
//   The most-significant bit being the leftmost, an IEEE
//   floating-point looks like
//
//     sign_bit exponent_bits fraction_bits
//
//   Here, sign_bit is a single bit that designates the sign of the
//   number.
//
//   For float, there are 8 exponent bits and 23 fraction bits.
//
//   For double, there are 11 exponent bits and 52 fraction bits.
//
//   More details can be found at
//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
//
// Template parameter:
//
//   RawType: the raw floating-point type (either float or double)
template <typename RawType>
class FloatingPoint {
 public:
  // Defines the unsigned integer type that has the same size as the
  // floating point number.
  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;

  // Constants.

  // # of bits in a number.
  static const size_t kBitCount = 8*sizeof(RawType);

  // # of fraction bits in a number.
  static const size_t kFractionBitCount =
    std::numeric_limits<RawType>::digits - 1;

  // # of exponent bits in a number.
  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;

  // The mask for the sign bit.
  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);

  // The mask for the fraction bits.
  static const Bits kFractionBitMask =
    ~static_cast<Bits>(0) >> (kExponentBitCount + 1);

  // The mask for the exponent bits.
  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);

  // How many ULP's (Units in the Last Place) we want to tolerate when
  // comparing two numbers.  The larger the value, the more error we
  // allow.  A 0 value means that two numbers must be exactly the same
  // to be considered equal.
  //
  // The maximum error of a single floating-point operation is 0.5
  // units in the last place.  On Intel CPU's, all floating-point
  // calculations are done with 80-bit precision, while double has 64
  // bits.  Therefore, 4 should be enough for ordinary use.
  //
  // See the following article for more details on ULP:
  // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm.
  static const size_t kMaxUlps = 4;

  // Constructs a FloatingPoint from a raw floating-point number.
  //
  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
  // around may change its bits, although the new value is guaranteed
  // to be also a NAN.  Therefore, don't expect this constructor to
  // preserve the bits in x when x is a NAN.
  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }

  // Static methods

  // Reinterprets a bit pattern as a floating-point number.
  //
  // This function is needed to test the AlmostEquals() method.
  static RawType ReinterpretBits(const Bits bits) {
    FloatingPoint fp(0);
    fp.u_.bits_ = bits;
    return fp.u_.value_;
  }

  // Returns the floating-point number that represent positive infinity.
  static RawType Infinity() {
    return ReinterpretBits(kExponentBitMask);
  }

  // Non-static methods

  // Returns the bits that represents this number.
  const Bits &bits() const { return u_.bits_; }

  // Returns the exponent bits of this number.
  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }

  // Returns the fraction bits of this number.
  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }

  // Returns the sign bit of this number.
  Bits sign_bit() const { return kSignBitMask & u_.bits_; }

  // Returns true iff this is NAN (not a number).
  bool is_nan() const {
    // It's a NAN if the exponent bits are all ones and the fraction
    // bits are not entirely zeros.
    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
  }

  // Returns true iff this number is at most kMaxUlps ULP's away from
  // rhs.  In particular, this function:
  //
  //   - returns false if either number is (or both are) NAN.
  //   - treats really large numbers as almost equal to infinity.
  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
  bool AlmostEquals(const FloatingPoint& rhs) const {
    // The IEEE standard says that any comparison operation involving
    // a NAN must return false.
    if (is_nan() || rhs.is_nan()) return false;

    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
        <= kMaxUlps;
  }

 private:
  // The data type used to store the actual floating-point number.
  union FloatingPointUnion {
    RawType value_;  // The raw floating-point number.
    Bits bits_;      // The bits that represent the number.
  };

  // Converts an integer from the sign-and-magnitude representation to
  // the biased representation.  More precisely, let N be 2 to the
  // power of (kBitCount - 1), an integer x is represented by the
  // unsigned number x + N.
  //
  // For instance,
  //
  //   -N + 1 (the most negative number representable using
  //          sign-and-magnitude) is represented by 1;
  //   0      is represented by N; and
  //   N - 1  (the biggest number representable using
  //          sign-and-magnitude) is represented by 2N - 1.
  //
  // Read http://en.wikipedia.org/wiki/Signed_number_representations
  // for more details on signed number representations.
  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
    if (kSignBitMask & sam) {
      // sam represents a negative number.
      return ~sam + 1;
    } else {
      // sam represents a positive number.
      return kSignBitMask | sam;
    }
  }

  // Given two numbers in the sign-and-magnitude representation,
  // returns the distance between them as an unsigned number.
  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
                                                     const Bits &sam2) {
    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
  }

  FloatingPointUnion u_;
};

// Typedefs the instances of the FloatingPoint template class that we
// care to use.
typedef FloatingPoint<float> Float;
typedef FloatingPoint<double> Double;

// In order to catch the mistake of putting tests that use different
// test fixture classes in the same test case, we need to assign
// unique IDs to fixture classes and compare them.  The TypeId type is
// used to hold such IDs.  The user should treat TypeId as an opaque
// type: the only operation allowed on TypeId values is to compare
// them for equality using the == operator.
typedef const void* TypeId;

template <typename T>
class TypeIdHelper {
 public:
  // dummy_ must not have a const type.  Otherwise an overly eager
  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
  static bool dummy_;
};

template <typename T>
bool TypeIdHelper<T>::dummy_ = false;

// GetTypeId<T>() returns the ID of type T.  Different values will be
// returned for different types.  Calling the function twice with the
// same type argument is guaranteed to return the same ID.
template <typename T>
TypeId GetTypeId() {
  // The compiler is required to allocate a different
  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
  // the template.  Therefore, the address of dummy_ is guaranteed to
  // be unique.
  return &(TypeIdHelper<T>::dummy_);
}

// Returns the type ID of ::testing::Test.  Always call this instead
// of GetTypeId< ::testing::Test>() to get the type ID of
// ::testing::Test, as the latter may give the wrong result due to a
// suspected linker bug when compiling Google Test as a Mac OS X
// framework.
GTEST_API_ TypeId GetTestTypeId();

// Defines the abstract factory interface that creates instances
// of a Test object.
class TestFactoryBase {
 public:
  virtual ~TestFactoryBase() {}

  // Creates a test instance to run. The instance is both created and destroyed
  // within TestInfoImpl::Run()
  virtual Test* CreateTest() = 0;

 protected:
  TestFactoryBase() {}

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
};

// This class provides implementation of TeastFactoryBase interface.
// It is used in TEST and TEST_F macros.
template <class TestClass>
class TestFactoryImpl : public TestFactoryBase {
 public:
  virtual Test* CreateTest() { return new TestClass; }
};

#if GTEST_OS_WINDOWS

// Predicate-formatters for implementing the HRESULT checking macros
// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
// We pass a long instead of HRESULT to avoid causing an
// include dependency for the HRESULT type.
GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
                                            long hr);  // NOLINT
GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
                                            long hr);  // NOLINT

#endif  // GTEST_OS_WINDOWS

// Types of SetUpTestCase() and TearDownTestCase() functions.
typedef void (*SetUpTestCaseFunc)();
typedef void (*TearDownTestCaseFunc)();

// Creates a new TestInfo object and registers it with Google Test;
// returns the created object.
//
// Arguments:
//
//   test_case_name:   name of the test case
//   name:             name of the test
//   type_param        the name of the test's type parameter, or NULL if
//                     this is not  a typed or a type-parameterized test.
//   value_param       text representation of the test's value parameter,
//                     or NULL if this is not a type-parameterized test.
//   fixture_class_id: ID of the test fixture class
//   set_up_tc:        pointer to the function that sets up the test case
//   tear_down_tc:     pointer to the function that tears down the test case
//   factory:          pointer to the factory that creates a test object.
//                     The newly created TestInfo instance will assume
//                     ownership of the factory object.
GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
    const char* test_case_name, const char* name,
    const char* type_param,
    const char* value_param,
    TypeId fixture_class_id,
    SetUpTestCaseFunc set_up_tc,
    TearDownTestCaseFunc tear_down_tc,
    TestFactoryBase* factory);

// If *pstr starts with the given prefix, modifies *pstr to be right
// past the prefix and returns true; otherwise leaves *pstr unchanged
// and returns false.  None of pstr, *pstr, and prefix can be NULL.
GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);

#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// State of the definition of a type-parameterized test case.
class GTEST_API_ TypedTestCasePState {
 public:
  TypedTestCasePState() : registered_(false) {}

  // Adds the given test name to defined_test_names_ and return true
  // if the test case hasn't been registered; otherwise aborts the
  // program.
  bool AddTestName(const char* file, int line, const char* case_name,
                   const char* test_name) {
    if (registered_) {
      fprintf(stderr, "%s Test %s must be defined before "
              "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
              FormatFileLocation(file, line).c_str(), test_name, case_name);
      fflush(stderr);
      posix::Abort();
    }
    defined_test_names_.insert(test_name);
    return true;
  }

  // Verifies that registered_tests match the test names in
  // defined_test_names_; returns registered_tests if successful, or
  // aborts the program otherwise.
  const char* VerifyRegisteredTestNames(
      const char* file, int line, const char* registered_tests);

 private:
  bool registered_;
  ::std::set<const char*> defined_test_names_;
};

// Skips to the first non-space char after the first comma in 'str';
// returns NULL if no comma is found in 'str'.
inline const char* SkipComma(const char* str) {
  const char* comma = strchr(str, ',');
  if (comma == NULL) {
    return NULL;
  }
  while (IsSpace(*(++comma))) {}
  return comma;
}

// Returns the prefix of 'str' before the first comma in it; returns
// the entire string if it contains no comma.
inline String GetPrefixUntilComma(const char* str) {
  const char* comma = strchr(str, ',');
  return comma == NULL ? String(str) : String(str, comma - str);
}

// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
// registers a list of type-parameterized tests with Google Test.  The
// return value is insignificant - we just need to return something
// such that we can call this function in a namespace scope.
//
// Implementation note: The GTEST_TEMPLATE_ macro declares a template
// template parameter.  It's defined in gtest-type-util.h.
template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
class TypeParameterizedTest {
 public:
  // 'index' is the index of the test in the type list 'Types'
  // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
  // Types).  Valid values for 'index' are [0, N - 1] where N is the
  // length of Types.
  static bool Register(const char* prefix, const char* case_name,
                       const char* test_names, int index) {
    typedef typename Types::Head Type;
    typedef Fixture<Type> FixtureClass;
    typedef typename GTEST_BIND_(TestSel, Type) TestClass;

    // First, registers the first type-parameterized test in the type
    // list.
    MakeAndRegisterTestInfo(
        String::Format("%s%s%s/%d", prefix, prefix[0] == '\0' ? "" : "/",
                       case_name, index).c_str(),
        GetPrefixUntilComma(test_names).c_str(),
        GetTypeName<Type>().c_str(),
        NULL,  // No value parameter.
        GetTypeId<FixtureClass>(),
        TestClass::SetUpTestCase,
        TestClass::TearDownTestCase,
        new TestFactoryImpl<TestClass>);

    // Next, recurses (at compile time) with the tail of the type list.
    return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail>
        ::Register(prefix, case_name, test_names, index + 1);
  }
};

// The base case for the compile time recursion.
template <GTEST_TEMPLATE_ Fixture, class TestSel>
class TypeParameterizedTest<Fixture, TestSel, Types0> {
 public:
  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
                       const char* /*test_names*/, int /*index*/) {
    return true;
  }
};

// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
// registers *all combinations* of 'Tests' and 'Types' with Google
// Test.  The return value is insignificant - we just need to return
// something such that we can call this function in a namespace scope.
template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
class TypeParameterizedTestCase {
 public:
  static bool Register(const char* prefix, const char* case_name,
                       const char* test_names) {
    typedef typename Tests::Head Head;

    // First, register the first test in 'Test' for each type in 'Types'.
    TypeParameterizedTest<Fixture, Head, Types>::Register(
        prefix, case_name, test_names, 0);

    // Next, recurses (at compile time) with the tail of the test list.
    return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types>
        ::Register(prefix, case_name, SkipComma(test_names));
  }
};

// The base case for the compile time recursion.
template <GTEST_TEMPLATE_ Fixture, typename Types>
class TypeParameterizedTestCase<Fixture, Templates0, Types> {
 public:
  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
                       const char* /*test_names*/) {
    return true;
  }
};

#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// Returns the current OS stack trace as a String.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
GTEST_API_ String GetCurrentOsStackTraceExceptTop(UnitTest* unit_test,
                                                  int skip_count);

// Helpers for suppressing warnings on unreachable code or constant
// condition.

// Always returns true.
GTEST_API_ bool AlwaysTrue();

// Always returns false.
inline bool AlwaysFalse() { return !AlwaysTrue(); }

// Helper for suppressing false warning from Clang on a const char*
// variable declared in a conditional expression always being NULL in
// the else branch.
struct GTEST_API_ ConstCharPtr {
  ConstCharPtr(const char* str) : value(str) {}
  operator bool() const { return true; }
  const char* value;
};

// A simple Linear Congruential Generator for generating random
// numbers with a uniform distribution.  Unlike rand() and srand(), it
// doesn't use global state (and therefore can't interfere with user
// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
// but it's good enough for our purposes.
class GTEST_API_ Random {
 public:
  static const UInt32 kMaxRange = 1u << 31;

  explicit Random(UInt32 seed) : state_(seed) {}

  void Reseed(UInt32 seed) { state_ = seed; }

  // Generates a random number from [0, range).  Crashes if 'range' is
  // 0 or greater than kMaxRange.
  UInt32 Generate(UInt32 range);

 private:
  UInt32 state_;
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
};

// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
// compiler error iff T1 and T2 are different types.
template <typename T1, typename T2>
struct CompileAssertTypesEqual;

template <typename T>
struct CompileAssertTypesEqual<T, T> {
};

// Removes the reference from a type if it is a reference type,
// otherwise leaves it unchanged.  This is the same as
// tr1::remove_reference, which is not widely available yet.
template <typename T>
struct RemoveReference { typedef T type; };  // NOLINT
template <typename T>
struct RemoveReference<T&> { typedef T type; };  // NOLINT

// A handy wrapper around RemoveReference that works when the argument
// T depends on template parameters.
#define GTEST_REMOVE_REFERENCE_(T) \
    typename ::testing::internal::RemoveReference<T>::type

// Removes const from a type if it is a const type, otherwise leaves
// it unchanged.  This is the same as tr1::remove_const, which is not
// widely available yet.
template <typename T>
struct RemoveConst { typedef T type; };  // NOLINT
template <typename T>
struct RemoveConst<const T> { typedef T type; };  // NOLINT

// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
// definition to fail to remove the const in 'const int[3]' and 'const
// char[3][4]'.  The following specialization works around the bug.
// However, it causes trouble with GCC and thus needs to be
// conditionally compiled.
#if defined(_MSC_VER) || defined(__SUNPRO_CC) || defined(__IBMCPP__)
template <typename T, size_t N>
struct RemoveConst<const T[N]> {
  typedef typename RemoveConst<T>::type type[N];
};
#endif

// A handy wrapper around RemoveConst that works when the argument
// T depends on template parameters.
#define GTEST_REMOVE_CONST_(T) \
    typename ::testing::internal::RemoveConst<T>::type

// Turns const U&, U&, const U, and U all into U.
#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
    GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))

// Adds reference to a type if it is not a reference type,
// otherwise leaves it unchanged.  This is the same as
// tr1::add_reference, which is not widely available yet.
template <typename T>
struct AddReference { typedef T& type; };  // NOLINT
template <typename T>
struct AddReference<T&> { typedef T& type; };  // NOLINT

// A handy wrapper around AddReference that works when the argument T
// depends on template parameters.
#define GTEST_ADD_REFERENCE_(T) \
    typename ::testing::internal::AddReference<T>::type

// Adds a reference to const on top of T as necessary.  For example,
// it transforms
//
//   char         ==> const char&
//   const char   ==> const char&
//   char&        ==> const char&
//   const char&  ==> const char&
//
// The argument T must depend on some template parameters.
#define GTEST_REFERENCE_TO_CONST_(T) \
    GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T))

// ImplicitlyConvertible<From, To>::value is a compile-time bool
// constant that's true iff type From can be implicitly converted to
// type To.
template <typename From, typename To>
class ImplicitlyConvertible {
 private:
  // We need the following helper functions only for their types.
  // They have no implementations.

  // MakeFrom() is an expression whose type is From.  We cannot simply
  // use From(), as the type From may not have a public default
  // constructor.
  static From MakeFrom();

  // These two functions are overloaded.  Given an expression
  // Helper(x), the compiler will pick the first version if x can be
  // implicitly converted to type To; otherwise it will pick the
  // second version.
  //
  // The first version returns a value of size 1, and the second
  // version returns a value of size 2.  Therefore, by checking the
  // size of Helper(x), which can be done at compile time, we can tell
  // which version of Helper() is used, and hence whether x can be
  // implicitly converted to type To.
  static char Helper(To);
  static char (&Helper(...))[2];  // NOLINT

  // We have to put the 'public' section after the 'private' section,
  // or MSVC refuses to compile the code.
 public:
  // MSVC warns about implicitly converting from double to int for
  // possible loss of data, so we need to temporarily disable the
  // warning.
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4244)  // Temporarily disables warning 4244.

  static const bool value =
      sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
# pragma warning(pop)           // Restores the warning state.
#elif defined(__BORLANDC__)
  // C++Builder cannot use member overload resolution during template
  // instantiation.  The simplest workaround is to use its C++0x type traits
  // functions (C++Builder 2009 and above only).
  static const bool value = __is_convertible(From, To);
#else
  static const bool value =
      sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
#endif  // _MSV_VER
};
template <typename From, typename To>
const bool ImplicitlyConvertible<From, To>::value;

// IsAProtocolMessage<T>::value is a compile-time bool constant that's
// true iff T is type ProtocolMessage, proto2::Message, or a subclass
// of those.
template <typename T>
struct IsAProtocolMessage
    : public bool_constant<
  ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value ||
  ImplicitlyConvertible<const T*, const ::proto2::Message*>::value> {
};

// When the compiler sees expression IsContainerTest<C>(0), if C is an
// STL-style container class, the first overload of IsContainerTest
// will be viable (since both C::iterator* and C::const_iterator* are
// valid types and NULL can be implicitly converted to them).  It will
// be picked over the second overload as 'int' is a perfect match for
// the type of argument 0.  If C::iterator or C::const_iterator is not
// a valid type, the first overload is not viable, and the second
// overload will be picked.  Therefore, we can determine whether C is
// a container class by checking the type of IsContainerTest<C>(0).
// The value of the expression is insignificant.
//
// Note that we look for both C::iterator and C::const_iterator.  The
// reason is that C++ injects the name of a class as a member of the
// class itself (e.g. you can refer to class iterator as either
// 'iterator' or 'iterator::iterator').  If we look for C::iterator
// only, for example, we would mistakenly think that a class named
// iterator is an STL container.
//
// Also note that the simpler approach of overloading
// IsContainerTest(typename C::const_iterator*) and
// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
typedef int IsContainer;
template <class C>
IsContainer IsContainerTest(int /* dummy */,
                            typename C::iterator* /* it */ = NULL,
                            typename C::const_iterator* /* const_it */ = NULL) {
  return 0;
}

typedef char IsNotContainer;
template <class C>
IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; }

// EnableIf<condition>::type is void when 'Cond' is true, and
// undefined when 'Cond' is false.  To use SFINAE to make a function
// overload only apply when a particular expression is true, add
// "typename EnableIf<expression>::type* = 0" as the last parameter.
template<bool> struct EnableIf;
template<> struct EnableIf<true> { typedef void type; };  // NOLINT

// Utilities for native arrays.

// ArrayEq() compares two k-dimensional native arrays using the
// elements' operator==, where k can be any integer >= 0.  When k is
// 0, ArrayEq() degenerates into comparing a single pair of values.

template <typename T, typename U>
bool ArrayEq(const T* lhs, size_t size, const U* rhs);

// This generic version is used when k is 0.
template <typename T, typename U>
inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; }

// This overload is used when k >= 1.
template <typename T, typename U, size_t N>
inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
  return internal::ArrayEq(lhs, N, rhs);
}

// This helper reduces code bloat.  If we instead put its logic inside
// the previous ArrayEq() function, arrays with different sizes would
// lead to different copies of the template code.
template <typename T, typename U>
bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
  for (size_t i = 0; i != size; i++) {
    if (!internal::ArrayEq(lhs[i], rhs[i]))
      return false;
  }
  return true;
}

// Finds the first element in the iterator range [begin, end) that
// equals elem.  Element may be a native array type itself.
template <typename Iter, typename Element>
Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
  for (Iter it = begin; it != end; ++it) {
    if (internal::ArrayEq(*it, elem))
      return it;
  }
  return end;
}

// CopyArray() copies a k-dimensional native array using the elements'
// operator=, where k can be any integer >= 0.  When k is 0,
// CopyArray() degenerates into copying a single value.

template <typename T, typename U>
void CopyArray(const T* from, size_t size, U* to);

// This generic version is used when k is 0.
template <typename T, typename U>
inline void CopyArray(const T& from, U* to) { *to = from; }

// This overload is used when k >= 1.
template <typename T, typename U, size_t N>
inline void CopyArray(const T(&from)[N], U(*to)[N]) {
  internal::CopyArray(from, N, *to);
}

// This helper reduces code bloat.  If we instead put its logic inside
// the previous CopyArray() function, arrays with different sizes
// would lead to different copies of the template code.
template <typename T, typename U>
void CopyArray(const T* from, size_t size, U* to) {
  for (size_t i = 0; i != size; i++) {
    internal::CopyArray(from[i], to + i);
  }
}

// The relation between an NativeArray object (see below) and the
// native array it represents.
enum RelationToSource {
  kReference,  // The NativeArray references the native array.
  kCopy        // The NativeArray makes a copy of the native array and
               // owns the copy.
};

// Adapts a native array to a read-only STL-style container.  Instead
// of the complete STL container concept, this adaptor only implements
// members useful for Google Mock's container matchers.  New members
// should be added as needed.  To simplify the implementation, we only
// support Element being a raw type (i.e. having no top-level const or
// reference modifier).  It's the client's responsibility to satisfy
// this requirement.  Element can be an array type itself (hence
// multi-dimensional arrays are supported).
template <typename Element>
class NativeArray {
 public:
  // STL-style container typedefs.
  typedef Element value_type;
  typedef Element* iterator;
  typedef const Element* const_iterator;

  // Constructs from a native array.
  NativeArray(const Element* array, size_t count, RelationToSource relation) {
    Init(array, count, relation);
  }

  // Copy constructor.
  NativeArray(const NativeArray& rhs) {
    Init(rhs.array_, rhs.size_, rhs.relation_to_source_);
  }

  ~NativeArray() {
    // Ensures that the user doesn't instantiate NativeArray with a
    // const or reference type.
    static_cast<void>(StaticAssertTypeEqHelper<Element,
        GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>());
    if (relation_to_source_ == kCopy)
      delete[] array_;
  }

  // STL-style container methods.
  size_t size() const { return size_; }
  const_iterator begin() const { return array_; }
  const_iterator end() const { return array_ + size_; }
  bool operator==(const NativeArray& rhs) const {
    return size() == rhs.size() &&
        ArrayEq(begin(), size(), rhs.begin());
  }

 private:
  // Initializes this object; makes a copy of the input array if
  // 'relation' is kCopy.
  void Init(const Element* array, size_t a_size, RelationToSource relation) {
    if (relation == kReference) {
      array_ = array;
    } else {
      Element* const copy = new Element[a_size];
      CopyArray(array, a_size, copy);
      array_ = copy;
    }
    size_ = a_size;
    relation_to_source_ = relation;
  }

  const Element* array_;
  size_t size_;
  RelationToSource relation_to_source_;

  GTEST_DISALLOW_ASSIGN_(NativeArray);
};

}  // namespace internal
}  // namespace testing

#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
  ::testing::internal::AssertHelper(result_type, file, line, message) \
    = ::testing::Message()

#define GTEST_MESSAGE_(message, result_type) \
  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)

#define GTEST_FATAL_FAILURE_(message) \
  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)

#define GTEST_NONFATAL_FAILURE_(message) \
  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)

#define GTEST_SUCCESS_(message) \
  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)

// Suppresses MSVC warnings 4072 (unreachable code) for the code following
// statement if it returns or throws (or doesn't return or throw in some
// situations).
#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
  if (::testing::internal::AlwaysTrue()) { statement; }

#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::ConstCharPtr gtest_msg = "") { \
    bool gtest_caught_expected = false; \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (expected_exception const&) { \
      gtest_caught_expected = true; \
    } \
    catch (...) { \
      gtest_msg.value = \
          "Expected: " #statement " throws an exception of type " \
          #expected_exception ".\n  Actual: it throws a different type."; \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
    } \
    if (!gtest_caught_expected) { \
      gtest_msg.value = \
          "Expected: " #statement " throws an exception of type " \
          #expected_exception ".\n  Actual: it throws nothing."; \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
      fail(gtest_msg.value)

#define GTEST_TEST_NO_THROW_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (...) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
      fail("Expected: " #statement " doesn't throw an exception.\n" \
           "  Actual: it throws.")

#define GTEST_TEST_ANY_THROW_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    bool gtest_caught_any = false; \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (...) { \
      gtest_caught_any = true; \
    } \
    if (!gtest_caught_any) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
      fail("Expected: " #statement " throws an exception.\n" \
           "  Actual: it doesn't.")


// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
// either a boolean expression or an AssertionResult. text is a textual
// represenation of expression as it was passed into the EXPECT_TRUE.
#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (const ::testing::AssertionResult gtest_ar_ = \
      ::testing::AssertionResult(expression)) \
    ; \
  else \
    fail(::testing::internal::GetBoolAssertionFailureMessage(\
        gtest_ar_, text, #actual, #expected).c_str())

#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
      fail("Expected: " #statement " doesn't generate new fatal " \
           "failures in the current thread.\n" \
           "  Actual: it does.")

// Expands to the name of the class that implements the given test.
#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
  test_case_name##_##test_name##_Test

// Helper macro for defining tests.
#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
 public:\
  GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
 private:\
  virtual void TestBody();\
  static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
  GTEST_DISALLOW_COPY_AND_ASSIGN_(\
      GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
};\
\
::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
  ::test_info_ =\
    ::testing::internal::MakeAndRegisterTestInfo(\
        #test_case_name, #test_name, NULL, NULL, \
        (parent_id), \
        parent_class::SetUpTestCase, \
        parent_class::TearDownTestCase, \
        new ::testing::internal::TestFactoryImpl<\
            GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the public API for death tests.  It is
// #included by gtest.h so a user doesn't need to include this
// directly.

#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_

// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines internal utilities needed for implementing
// death tests.  They are subject to change without notice.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_


#include <stdio.h>

namespace testing {
namespace internal {

GTEST_DECLARE_string_(internal_run_death_test);

// Names of the flags (needed for parsing Google Test flags).
const char kDeathTestStyleFlag[] = "death_test_style";
const char kDeathTestUseFork[] = "death_test_use_fork";
const char kInternalRunDeathTestFlag[] = "internal_run_death_test";

#if GTEST_HAS_DEATH_TEST

// DeathTest is a class that hides much of the complexity of the
// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
// returns a concrete class that depends on the prevailing death test
// style, as defined by the --gtest_death_test_style and/or
// --gtest_internal_run_death_test flags.

// In describing the results of death tests, these terms are used with
// the corresponding definitions:
//
// exit status:  The integer exit information in the format specified
//               by wait(2)
// exit code:    The integer code passed to exit(3), _exit(2), or
//               returned from main()
class GTEST_API_ DeathTest {
 public:
  // Create returns false if there was an error determining the
  // appropriate action to take for the current death test; for example,
  // if the gtest_death_test_style flag is set to an invalid value.
  // The LastMessage method will return a more detailed message in that
  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
  // argument is set.  If the death test should be skipped, the pointer
  // is set to NULL; otherwise, it is set to the address of a new concrete
  // DeathTest object that controls the execution of the current test.
  static bool Create(const char* statement, const RE* regex,
                     const char* file, int line, DeathTest** test);
  DeathTest();
  virtual ~DeathTest() { }

  // A helper class that aborts a death test when it's deleted.
  class ReturnSentinel {
   public:
    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
   private:
    DeathTest* const test_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
  } GTEST_ATTRIBUTE_UNUSED_;

  // An enumeration of possible roles that may be taken when a death
  // test is encountered.  EXECUTE means that the death test logic should
  // be executed immediately.  OVERSEE means that the program should prepare
  // the appropriate environment for a child process to execute the death
  // test, then wait for it to complete.
  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };

  // An enumeration of the three reasons that a test might be aborted.
  enum AbortReason {
    TEST_ENCOUNTERED_RETURN_STATEMENT,
    TEST_THREW_EXCEPTION,
    TEST_DID_NOT_DIE
  };

  // Assumes one of the above roles.
  virtual TestRole AssumeRole() = 0;

  // Waits for the death test to finish and returns its status.
  virtual int Wait() = 0;

  // Returns true if the death test passed; that is, the test process
  // exited during the test, its exit status matches a user-supplied
  // predicate, and its stderr output matches a user-supplied regular
  // expression.
  // The user-supplied predicate may be a macro expression rather
  // than a function pointer or functor, or else Wait and Passed could
  // be combined.
  virtual bool Passed(bool exit_status_ok) = 0;

  // Signals that the death test did not die as expected.
  virtual void Abort(AbortReason reason) = 0;

  // Returns a human-readable outcome message regarding the outcome of
  // the last death test.
  static const char* LastMessage();

  static void set_last_death_test_message(const String& message);

 private:
  // A string containing a description of the outcome of the last death test.
  static String last_death_test_message_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
};

// Factory interface for death tests.  May be mocked out for testing.
class DeathTestFactory {
 public:
  virtual ~DeathTestFactory() { }
  virtual bool Create(const char* statement, const RE* regex,
                      const char* file, int line, DeathTest** test) = 0;
};

// A concrete DeathTestFactory implementation for normal use.
class DefaultDeathTestFactory : public DeathTestFactory {
 public:
  virtual bool Create(const char* statement, const RE* regex,
                      const char* file, int line, DeathTest** test);
};

// Returns true if exit_status describes a process that was terminated
// by a signal, or exited normally with a nonzero exit code.
GTEST_API_ bool ExitedUnsuccessfully(int exit_status);

// Traps C++ exceptions escaping statement and reports them as test
// failures. Note that trapping SEH exceptions is not implemented here.
# if GTEST_HAS_EXCEPTIONS
#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
  try { \
    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
  } catch (const ::std::exception& gtest_exception) { \
    fprintf(\
        stderr, \
        "\n%s: Caught std::exception-derived exception escaping the " \
        "death test statement. Exception message: %s\n", \
        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
        gtest_exception.what()); \
    fflush(stderr); \
    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
  } catch (...) { \
    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
  }

# else
#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)

# endif

// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
// ASSERT_EXIT*, and EXPECT_EXIT*.
# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    const ::testing::internal::RE& gtest_regex = (regex); \
    ::testing::internal::DeathTest* gtest_dt; \
    if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
        __FILE__, __LINE__, &gtest_dt)) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
    } \
    if (gtest_dt != NULL) { \
      ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
          gtest_dt_ptr(gtest_dt); \
      switch (gtest_dt->AssumeRole()) { \
        case ::testing::internal::DeathTest::OVERSEE_TEST: \
          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
          } \
          break; \
        case ::testing::internal::DeathTest::EXECUTE_TEST: { \
          ::testing::internal::DeathTest::ReturnSentinel \
              gtest_sentinel(gtest_dt); \
          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
          break; \
        } \
        default: \
          break; \
      } \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
      fail(::testing::internal::DeathTest::LastMessage())
// The symbol "fail" here expands to something into which a message
// can be streamed.

// A class representing the parsed contents of the
// --gtest_internal_run_death_test flag, as it existed when
// RUN_ALL_TESTS was called.
class InternalRunDeathTestFlag {
 public:
  InternalRunDeathTestFlag(const String& a_file,
                           int a_line,
                           int an_index,
                           int a_write_fd)
      : file_(a_file), line_(a_line), index_(an_index),
        write_fd_(a_write_fd) {}

  ~InternalRunDeathTestFlag() {
    if (write_fd_ >= 0)
      posix::Close(write_fd_);
  }

  String file() const { return file_; }
  int line() const { return line_; }
  int index() const { return index_; }
  int write_fd() const { return write_fd_; }

 private:
  String file_;
  int line_;
  int index_;
  int write_fd_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
};

// Returns a newly created InternalRunDeathTestFlag object with fields
// initialized from the GTEST_FLAG(internal_run_death_test) flag if
// the flag is specified; otherwise returns NULL.
InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();

#else  // GTEST_HAS_DEATH_TEST

// This macro is used for implementing macros such as
// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
// death tests are not supported. Those macros must compile on such systems
// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
// systems that support death tests. This allows one to write such a macro
// on a system that does not support death tests and be sure that it will
// compile on a death-test supporting system.
//
// Parameters:
//   statement -  A statement that a macro such as EXPECT_DEATH would test
//                for program termination. This macro has to make sure this
//                statement is compiled but not executed, to ensure that
//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
//                parameter iff EXPECT_DEATH compiles with it.
//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
//                the output of statement.  This parameter has to be
//                compiled but not evaluated by this macro, to ensure that
//                this macro only accepts expressions that a macro such as
//                EXPECT_DEATH would accept.
//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
//                compile inside functions where ASSERT_DEATH doesn't
//                compile.
//
//  The branch that has an always false condition is used to ensure that
//  statement and regex are compiled (and thus syntactically correct) but
//  never executed. The unreachable code macro protects the terminator
//  statement from generating an 'unreachable code' warning in case
//  statement unconditionally returns or throws. The Message constructor at
//  the end allows the syntax of streaming additional messages into the
//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
    if (::testing::internal::AlwaysTrue()) { \
      GTEST_LOG_(WARNING) \
          << "Death tests are not supported on this platform.\n" \
          << "Statement '" #statement "' cannot be verified."; \
    } else if (::testing::internal::AlwaysFalse()) { \
      ::testing::internal::RE::PartialMatch(".*", (regex)); \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
      terminator; \
    } else \
      ::testing::Message()

#endif  // GTEST_HAS_DEATH_TEST

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_

namespace testing {

// This flag controls the style of death tests.  Valid values are "threadsafe",
// meaning that the death test child process will re-execute the test binary
// from the start, running only a single death test, or "fast",
// meaning that the child process will execute the test logic immediately
// after forking.
GTEST_DECLARE_string_(death_test_style);

#if GTEST_HAS_DEATH_TEST

// The following macros are useful for writing death tests.

// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
// executed:
//
//   1. It generates a warning if there is more than one active
//   thread.  This is because it's safe to fork() or clone() only
//   when there is a single thread.
//
//   2. The parent process clone()s a sub-process and runs the death
//   test in it; the sub-process exits with code 0 at the end of the
//   death test, if it hasn't exited already.
//
//   3. The parent process waits for the sub-process to terminate.
//
//   4. The parent process checks the exit code and error message of
//   the sub-process.
//
// Examples:
//
//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
//   for (int i = 0; i < 5; i++) {
//     EXPECT_DEATH(server.ProcessRequest(i),
//                  "Invalid request .* in ProcessRequest()")
//         << "Failed to die on request " << i);
//   }
//
//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
//
//   bool KilledBySIGHUP(int exit_code) {
//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
//   }
//
//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
//
// On the regular expressions used in death tests:
//
//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
//   which uses the POSIX extended regex syntax.
//
//   On other platforms (e.g. Windows), we only support a simple regex
//   syntax implemented as part of Google Test.  This limited
//   implementation should be enough most of the time when writing
//   death tests; though it lacks many features you can find in PCRE
//   or POSIX extended regex syntax.  For example, we don't support
//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
//   repetition count ("x{5,7}"), among others.
//
//   Below is the syntax that we do support.  We chose it to be a
//   subset of both PCRE and POSIX extended regex, so it's easy to
//   learn wherever you come from.  In the following: 'A' denotes a
//   literal character, period (.), or a single \\ escape sequence;
//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
//   natural numbers.
//
//     c     matches any literal character c
//     \\d   matches any decimal digit
//     \\D   matches any character that's not a decimal digit
//     \\f   matches \f
//     \\n   matches \n
//     \\r   matches \r
//     \\s   matches any ASCII whitespace, including \n
//     \\S   matches any character that's not a whitespace
//     \\t   matches \t
//     \\v   matches \v
//     \\w   matches any letter, _, or decimal digit
//     \\W   matches any character that \\w doesn't match
//     \\c   matches any literal character c, which must be a punctuation
//     .     matches any single character except \n
//     A?    matches 0 or 1 occurrences of A
//     A*    matches 0 or many occurrences of A
//     A+    matches 1 or many occurrences of A
//     ^     matches the beginning of a string (not that of each line)
//     $     matches the end of a string (not that of each line)
//     xy    matches x followed by y
//
//   If you accidentally use PCRE or POSIX extended regex features
//   not implemented by us, you will get a run-time failure.  In that
//   case, please try to rewrite your regular expression within the
//   above syntax.
//
//   This implementation is *not* meant to be as highly tuned or robust
//   as a compiled regex library, but should perform well enough for a
//   death test, which already incurs significant overhead by launching
//   a child process.
//
// Known caveats:
//
//   A "threadsafe" style death test obtains the path to the test
//   program from argv[0] and re-executes it in the sub-process.  For
//   simplicity, the current implementation doesn't search the PATH
//   when launching the sub-process.  This means that the user must
//   invoke the test program via a path that contains at least one
//   path separator (e.g. path/to/foo_test and
//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
//   is rarely a problem as people usually don't put the test binary
//   directory in PATH.
//
// TODO(wan@google.com): make thread-safe death tests search the PATH.

// Asserts that a given statement causes the program to exit, with an
// integer exit status that satisfies predicate, and emitting error output
// that matches regex.
# define ASSERT_EXIT(statement, predicate, regex) \
    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)

// Like ASSERT_EXIT, but continues on to successive tests in the
// test case, if any:
# define EXPECT_EXIT(statement, predicate, regex) \
    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)

// Asserts that a given statement causes the program to exit, either by
// explicitly exiting with a nonzero exit code or being killed by a
// signal, and emitting error output that matches regex.
# define ASSERT_DEATH(statement, regex) \
    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)

// Like ASSERT_DEATH, but continues on to successive tests in the
// test case, if any:
# define EXPECT_DEATH(statement, regex) \
    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)

// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:

// Tests that an exit code describes a normal exit with a given exit code.
class GTEST_API_ ExitedWithCode {
 public:
  explicit ExitedWithCode(int exit_code);
  bool operator()(int exit_status) const;
 private:
  // No implementation - assignment is unsupported.
  void operator=(const ExitedWithCode& other);

  const int exit_code_;
};

# if !GTEST_OS_WINDOWS
// Tests that an exit code describes an exit due to termination by a
// given signal.
class GTEST_API_ KilledBySignal {
 public:
  explicit KilledBySignal(int signum);
  bool operator()(int exit_status) const;
 private:
  const int signum_;
};
# endif  // !GTEST_OS_WINDOWS

// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
// The death testing framework causes this to have interesting semantics,
// since the sideeffects of the call are only visible in opt mode, and not
// in debug mode.
//
// In practice, this can be used to test functions that utilize the
// LOG(DFATAL) macro using the following style:
//
// int DieInDebugOr12(int* sideeffect) {
//   if (sideeffect) {
//     *sideeffect = 12;
//   }
//   LOG(DFATAL) << "death";
//   return 12;
// }
//
// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
//   int sideeffect = 0;
//   // Only asserts in dbg.
//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
//
// #ifdef NDEBUG
//   // opt-mode has sideeffect visible.
//   EXPECT_EQ(12, sideeffect);
// #else
//   // dbg-mode no visible sideeffect.
//   EXPECT_EQ(0, sideeffect);
// #endif
// }
//
// This will assert that DieInDebugReturn12InOpt() crashes in debug
// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
// appropriate fallback value (12 in this case) in opt mode. If you
// need to test that a function has appropriate side-effects in opt
// mode, include assertions against the side-effects.  A general
// pattern for this is:
//
// EXPECT_DEBUG_DEATH({
//   // Side-effects here will have an effect after this statement in
//   // opt mode, but none in debug mode.
//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
// }, "death");
//
# ifdef NDEBUG

#  define EXPECT_DEBUG_DEATH(statement, regex) \
  do { statement; } while (::testing::internal::AlwaysFalse())

#  define ASSERT_DEBUG_DEATH(statement, regex) \
  do { statement; } while (::testing::internal::AlwaysFalse())

# else

#  define EXPECT_DEBUG_DEATH(statement, regex) \
  EXPECT_DEATH(statement, regex)

#  define ASSERT_DEBUG_DEATH(statement, regex) \
  ASSERT_DEATH(statement, regex)

# endif  // NDEBUG for EXPECT_DEBUG_DEATH
#endif  // GTEST_HAS_DEATH_TEST

// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
// death tests are supported; otherwise they just issue a warning.  This is
// useful when you are combining death test assertions with normal test
// assertions in one test.
#if GTEST_HAS_DEATH_TEST
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
    EXPECT_DEATH(statement, regex)
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
    ASSERT_DEATH(statement, regex)
#else
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
#endif

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the Message class.
//
// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
// leave some internal implementation details in this header file.
// They are clearly marked by comments like this:
//
//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
//
// Such code is NOT meant to be used by a user directly, and is subject
// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
// program!

#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_

#include <limits>


namespace testing {

// The Message class works like an ostream repeater.
//
// Typical usage:
//
//   1. You stream a bunch of values to a Message object.
//      It will remember the text in a stringstream.
//   2. Then you stream the Message object to an ostream.
//      This causes the text in the Message to be streamed
//      to the ostream.
//
// For example;
//
//   testing::Message foo;
//   foo << 1 << " != " << 2;
//   std::cout << foo;
//
// will print "1 != 2".
//
// Message is not intended to be inherited from.  In particular, its
// destructor is not virtual.
//
// Note that stringstream behaves differently in gcc and in MSVC.  You
// can stream a NULL char pointer to it in the former, but not in the
// latter (it causes an access violation if you do).  The Message
// class hides this difference by treating a NULL char pointer as
// "(null)".
class GTEST_API_ Message {
 private:
  // The type of basic IO manipulators (endl, ends, and flush) for
  // narrow streams.
  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);

 public:
  // Constructs an empty Message.
  // We allocate the stringstream separately because otherwise each use of
  // ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
  // stack frame leading to huge stack frames in some cases; gcc does not reuse
  // the stack space.
  Message() : ss_(new ::std::stringstream) {
    // By default, we want there to be enough precision when printing
    // a double to a Message.
    *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
  }

  // Copy constructor.
  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
    *ss_ << msg.GetString();
  }

  // Constructs a Message from a C-string.
  explicit Message(const char* str) : ss_(new ::std::stringstream) {
    *ss_ << str;
  }

#if GTEST_OS_SYMBIAN
  // Streams a value (either a pointer or not) to this object.
  template <typename T>
  inline Message& operator <<(const T& value) {
    StreamHelper(typename internal::is_pointer<T>::type(), value);
    return *this;
  }
#else
  // Streams a non-pointer value to this object.
  template <typename T>
  inline Message& operator <<(const T& val) {
    ::GTestStreamToHelper(ss_.get(), val);
    return *this;
  }

  // Streams a pointer value to this object.
  //
  // This function is an overload of the previous one.  When you
  // stream a pointer to a Message, this definition will be used as it
  // is more specialized.  (The C++ Standard, section
  // [temp.func.order].)  If you stream a non-pointer, then the
  // previous definition will be used.
  //
  // The reason for this overload is that streaming a NULL pointer to
  // ostream is undefined behavior.  Depending on the compiler, you
  // may get "0", "(nil)", "(null)", or an access violation.  To
  // ensure consistent result across compilers, we always treat NULL
  // as "(null)".
  template <typename T>
  inline Message& operator <<(T* const& pointer) {  // NOLINT
    if (pointer == NULL) {
      *ss_ << "(null)";
    } else {
      ::GTestStreamToHelper(ss_.get(), pointer);
    }
    return *this;
  }
#endif  // GTEST_OS_SYMBIAN

  // Since the basic IO manipulators are overloaded for both narrow
  // and wide streams, we have to provide this specialized definition
  // of operator <<, even though its body is the same as the
  // templatized version above.  Without this definition, streaming
  // endl or other basic IO manipulators to Message will confuse the
  // compiler.
  Message& operator <<(BasicNarrowIoManip val) {
    *ss_ << val;
    return *this;
  }

  // Instead of 1/0, we want to see true/false for bool values.
  Message& operator <<(bool b) {
    return *this << (b ? "true" : "false");
  }

  // These two overloads allow streaming a wide C string to a Message
  // using the UTF-8 encoding.
  Message& operator <<(const wchar_t* wide_c_str) {
    return *this << internal::String::ShowWideCString(wide_c_str);
  }
  Message& operator <<(wchar_t* wide_c_str) {
    return *this << internal::String::ShowWideCString(wide_c_str);
  }

#if GTEST_HAS_STD_WSTRING
  // Converts the given wide string to a narrow string using the UTF-8
  // encoding, and streams the result to this Message object.
  Message& operator <<(const ::std::wstring& wstr);
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_GLOBAL_WSTRING
  // Converts the given wide string to a narrow string using the UTF-8
  // encoding, and streams the result to this Message object.
  Message& operator <<(const ::wstring& wstr);
#endif  // GTEST_HAS_GLOBAL_WSTRING

  // Gets the text streamed to this object so far as a String.
  // Each '\0' character in the buffer is replaced with "\\0".
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  internal::String GetString() const {
    return internal::StringStreamToString(ss_.get());
  }

 private:

#if GTEST_OS_SYMBIAN
  // These are needed as the Nokia Symbian Compiler cannot decide between
  // const T& and const T* in a function template. The Nokia compiler _can_
  // decide between class template specializations for T and T*, so a
  // tr1::type_traits-like is_pointer works, and we can overload on that.
  template <typename T>
  inline void StreamHelper(internal::true_type /*dummy*/, T* pointer) {
    if (pointer == NULL) {
      *ss_ << "(null)";
    } else {
      ::GTestStreamToHelper(ss_.get(), pointer);
    }
  }
  template <typename T>
  inline void StreamHelper(internal::false_type /*dummy*/, const T& value) {
    ::GTestStreamToHelper(ss_.get(), value);
  }
#endif  // GTEST_OS_SYMBIAN

  // We'll hold the text streamed to this object here.
  const internal::scoped_ptr< ::std::stringstream> ss_;

  // We declare (but don't implement) this to prevent the compiler
  // from implementing the assignment operator.
  void operator=(const Message&);
};

// Streams a Message to an ostream.
inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
  return os << sb.GetString();
}

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
// This file was GENERATED by command:
//     pump.py gtest-param-test.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: vladl@google.com (Vlad Losev)
//
// Macros and functions for implementing parameterized tests
// in Google C++ Testing Framework (Google Test)
//
// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_


// Value-parameterized tests allow you to test your code with different
// parameters without writing multiple copies of the same test.
//
// Here is how you use value-parameterized tests:

#if 0

// To write value-parameterized tests, first you should define a fixture
// class. It is usually derived from testing::TestWithParam<T> (see below for
// another inheritance scheme that's sometimes useful in more complicated
// class hierarchies), where the type of your parameter values.
// TestWithParam<T> is itself derived from testing::Test. T can be any
// copyable type. If it's a raw pointer, you are responsible for managing the
// lifespan of the pointed values.

class FooTest : public ::testing::TestWithParam<const char*> {
  // You can implement all the usual class fixture members here.
};

// Then, use the TEST_P macro to define as many parameterized tests
// for this fixture as you want. The _P suffix is for "parameterized"
// or "pattern", whichever you prefer to think.

TEST_P(FooTest, DoesBlah) {
  // Inside a test, access the test parameter with the GetParam() method
  // of the TestWithParam<T> class:
  EXPECT_TRUE(foo.Blah(GetParam()));
  ...
}

TEST_P(FooTest, HasBlahBlah) {
  ...
}

// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
// case with any set of parameters you want. Google Test defines a number
// of functions for generating test parameters. They return what we call
// (surprise!) parameter generators. Here is a  summary of them, which
// are all in the testing namespace:
//
//
//  Range(begin, end [, step]) - Yields values {begin, begin+step,
//                               begin+step+step, ...}. The values do not
//                               include end. step defaults to 1.
//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
//  ValuesIn(container)        - Yields values from a C-style array, an STL
//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
//  Bool()                     - Yields sequence {false, true}.
//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
//                               for the math savvy) of the values generated
//                               by the N generators.
//
// For more details, see comments at the definitions of these functions below
// in this file.
//
// The following statement will instantiate tests from the FooTest test case
// each with parameter values "meeny", "miny", and "moe".

INSTANTIATE_TEST_CASE_P(InstantiationName,
                        FooTest,
                        Values("meeny", "miny", "moe"));

// To distinguish different instances of the pattern, (yes, you
// can instantiate it more then once) the first argument to the
// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
// actual test case name. Remember to pick unique prefixes for different
// instantiations. The tests from the instantiation above will have
// these names:
//
//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
//
// You can use these names in --gtest_filter.
//
// This statement will instantiate all tests from FooTest again, each
// with parameter values "cat" and "dog":

const char* pets[] = {"cat", "dog"};
INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));

// The tests from the instantiation above will have these names:
//
//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
//
// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
// in the given test case, whether their definitions come before or
// AFTER the INSTANTIATE_TEST_CASE_P statement.
//
// Please also note that generator expressions (including parameters to the
// generators) are evaluated in InitGoogleTest(), after main() has started.
// This allows the user on one hand, to adjust generator parameters in order
// to dynamically determine a set of tests to run and on the other hand,
// give the user a chance to inspect the generated tests with Google Test
// reflection API before RUN_ALL_TESTS() is executed.
//
// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
// for more examples.
//
// In the future, we plan to publish the API for defining new parameter
// generators. But for now this interface remains part of the internal
// implementation and is subject to change.
//
//
// A parameterized test fixture must be derived from testing::Test and from
// testing::WithParamInterface<T>, where T is the type of the parameter
// values. Inheriting from TestWithParam<T> satisfies that requirement because
// TestWithParam<T> inherits from both Test and WithParamInterface. In more
// complicated hierarchies, however, it is occasionally useful to inherit
// separately from Test and WithParamInterface. For example:

class BaseTest : public ::testing::Test {
  // You can inherit all the usual members for a non-parameterized test
  // fixture here.
};

class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
  // The usual test fixture members go here too.
};

TEST_F(BaseTest, HasFoo) {
  // This is an ordinary non-parameterized test.
}

TEST_P(DerivedTest, DoesBlah) {
  // GetParam works just the same here as if you inherit from TestWithParam.
  EXPECT_TRUE(foo.Blah(GetParam()));
}

#endif  // 0


#if !GTEST_OS_SYMBIAN
# include <utility>
#endif

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)

// Type and function utilities for implementing parameterized tests.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_

#include <iterator>
#include <utility>
#include <vector>

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
// Copyright 2003 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Dan Egnor (egnor@google.com)
//
// A "smart" pointer type with reference tracking.  Every pointer to a
// particular object is kept on a circular linked list.  When the last pointer
// to an object is destroyed or reassigned, the object is deleted.
//
// Used properly, this deletes the object when the last reference goes away.
// There are several caveats:
// - Like all reference counting schemes, cycles lead to leaks.
// - Each smart pointer is actually two pointers (8 bytes instead of 4).
// - Every time a pointer is assigned, the entire list of pointers to that
//   object is traversed.  This class is therefore NOT SUITABLE when there
//   will often be more than two or three pointers to a particular object.
// - References are only tracked as long as linked_ptr<> objects are copied.
//   If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
//   will happen (double deletion).
//
// A good use of this class is storing object references in STL containers.
// You can safely put linked_ptr<> in a vector<>.
// Other uses may not be as good.
//
// Note: If you use an incomplete type with linked_ptr<>, the class
// *containing* linked_ptr<> must have a constructor and destructor (even
// if they do nothing!).
//
// Bill Gibbons suggested we use something like this.
//
// Thread Safety:
//   Unlike other linked_ptr implementations, in this implementation
//   a linked_ptr object is thread-safe in the sense that:
//     - it's safe to copy linked_ptr objects concurrently,
//     - it's safe to copy *from* a linked_ptr and read its underlying
//       raw pointer (e.g. via get()) concurrently, and
//     - it's safe to write to two linked_ptrs that point to the same
//       shared object concurrently.
// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
// confusion with normal linked_ptr.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_

#include <stdlib.h>
#include <assert.h>


namespace testing {
namespace internal {

// Protects copying of all linked_ptr objects.
GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);

// This is used internally by all instances of linked_ptr<>.  It needs to be
// a non-template class because different types of linked_ptr<> can refer to
// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
// So, it needs to be possible for different types of linked_ptr to participate
// in the same circular linked list, so we need a single class type here.
//
// DO NOT USE THIS CLASS DIRECTLY YOURSELF.  Use linked_ptr<T>.
class linked_ptr_internal {
 public:
  // Create a new circle that includes only this instance.
  void join_new() {
    next_ = this;
  }

  // Many linked_ptr operations may change p.link_ for some linked_ptr
  // variable p in the same circle as this object.  Therefore we need
  // to prevent two such operations from occurring concurrently.
  //
  // Note that different types of linked_ptr objects can coexist in a
  // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
  // linked_ptr<Derived2>).  Therefore we must use a single mutex to
  // protect all linked_ptr objects.  This can create serious
  // contention in production code, but is acceptable in a testing
  // framework.

  // Join an existing circle.
  // L < g_linked_ptr_mutex
  void join(linked_ptr_internal const* ptr) {
    MutexLock lock(&g_linked_ptr_mutex);

    linked_ptr_internal const* p = ptr;
    while (p->next_ != ptr) p = p->next_;
    p->next_ = this;
    next_ = ptr;
  }

  // Leave whatever circle we're part of.  Returns true if we were the
  // last member of the circle.  Once this is done, you can join() another.
  // L < g_linked_ptr_mutex
  bool depart() {
    MutexLock lock(&g_linked_ptr_mutex);

    if (next_ == this) return true;
    linked_ptr_internal const* p = next_;
    while (p->next_ != this) p = p->next_;
    p->next_ = next_;
    return false;
  }

 private:
  mutable linked_ptr_internal const* next_;
};

template <typename T>
class linked_ptr {
 public:
  typedef T element_type;

  // Take over ownership of a raw pointer.  This should happen as soon as
  // possible after the object is created.
  explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
  ~linked_ptr() { depart(); }

  // Copy an existing linked_ptr<>, adding ourselves to the list of references.
  template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
  linked_ptr(linked_ptr const& ptr) {  // NOLINT
    assert(&ptr != this);
    copy(&ptr);
  }

  // Assignment releases the old value and acquires the new.
  template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
    depart();
    copy(&ptr);
    return *this;
  }

  linked_ptr& operator=(linked_ptr const& ptr) {
    if (&ptr != this) {
      depart();
      copy(&ptr);
    }
    return *this;
  }

  // Smart pointer members.
  void reset(T* ptr = NULL) {
    depart();
    capture(ptr);
  }
  T* get() const { return value_; }
  T* operator->() const { return value_; }
  T& operator*() const { return *value_; }

  bool operator==(T* p) const { return value_ == p; }
  bool operator!=(T* p) const { return value_ != p; }
  template <typename U>
  bool operator==(linked_ptr<U> const& ptr) const {
    return value_ == ptr.get();
  }
  template <typename U>
  bool operator!=(linked_ptr<U> const& ptr) const {
    return value_ != ptr.get();
  }

 private:
  template <typename U>
  friend class linked_ptr;

  T* value_;
  linked_ptr_internal link_;

  void depart() {
    if (link_.depart()) delete value_;
  }

  void capture(T* ptr) {
    value_ = ptr;
    link_.join_new();
  }

  template <typename U> void copy(linked_ptr<U> const* ptr) {
    value_ = ptr->get();
    if (value_)
      link_.join(&ptr->link_);
    else
      link_.join_new();
  }
};

template<typename T> inline
bool operator==(T* ptr, const linked_ptr<T>& x) {
  return ptr == x.get();
}

template<typename T> inline
bool operator!=(T* ptr, const linked_ptr<T>& x) {
  return ptr != x.get();
}

// A function to convert T* into linked_ptr<T>
// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
template <typename T>
linked_ptr<T> make_linked_ptr(T* ptr) {
  return linked_ptr<T>(ptr);
}

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// A user can teach this function how to print a class type T by
// defining either operator<<() or PrintTo() in the namespace that
// defines T.  More specifically, the FIRST defined function in the
// following list will be used (assuming T is defined in namespace
// foo):
//
//   1. foo::PrintTo(const T&, ostream*)
//   2. operator<<(ostream&, const T&) defined in either foo or the
//      global namespace.
//
// If none of the above is defined, it will print the debug string of
// the value if it is a protocol buffer, or print the raw bytes in the
// value otherwise.
//
// To aid debugging: when T is a reference type, the address of the
// value is also printed; when T is a (const) char pointer, both the
// pointer value and the NUL-terminated string it points to are
// printed.
//
// We also provide some convenient wrappers:
//
//   // Prints a value to a string.  For a (const or not) char
//   // pointer, the NUL-terminated string (but not the pointer) is
//   // printed.
//   std::string ::testing::PrintToString(const T& value);
//
//   // Prints a value tersely: for a reference type, the referenced
//   // value (but not the address) is printed; for a (const or not) char
//   // pointer, the NUL-terminated string (but not the pointer) is
//   // printed.
//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
//
//   // Prints value using the type inferred by the compiler.  The difference
//   // from UniversalTersePrint() is that this function prints both the
//   // pointer and the NUL-terminated string for a (const or not) char pointer.
//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
//
//   // Prints the fields of a tuple tersely to a string vector, one
//   // element for each field. Tuple support must be enabled in
//   // gtest-port.h.
//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
//       const Tuple& value);
//
// Known limitation:
//
// The print primitives print the elements of an STL-style container
// using the compiler-inferred type of *iter where iter is a
// const_iterator of the container.  When const_iterator is an input
// iterator but not a forward iterator, this inferred type may not
// match value_type, and the print output may be incorrect.  In
// practice, this is rarely a problem as for most containers
// const_iterator is a forward iterator.  We'll fix this if there's an
// actual need for it.  Note that this fix cannot rely on value_type
// being defined as many user-defined container types don't have
// value_type.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_

#include <ostream>  // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>

namespace testing {

// Definitions in the 'internal' and 'internal2' name spaces are
// subject to change without notice.  DO NOT USE THEM IN USER CODE!
namespace internal2 {

// Prints the given number of bytes in the given object to the given
// ostream.
GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
                                     size_t count,
                                     ::std::ostream* os);

// For selecting which printer to use when a given type has neither <<
// nor PrintTo().
enum TypeKind {
  kProtobuf,              // a protobuf type
  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
                          // (e.g. a named or unnamed enum type)
  kOtherType              // anything else
};

// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
// by the universal printer to print a value of type T when neither
// operator<< nor PrintTo() is defined for T, where kTypeKind is the
// "kind" of T as defined by enum TypeKind.
template <typename T, TypeKind kTypeKind>
class TypeWithoutFormatter {
 public:
  // This default version is called when kTypeKind is kOtherType.
  static void PrintValue(const T& value, ::std::ostream* os) {
    PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
                         sizeof(value), os);
  }
};

// We print a protobuf using its ShortDebugString() when the string
// doesn't exceed this many characters; otherwise we print it using
// DebugString() for better readability.
const size_t kProtobufOneLinerMaxLength = 50;

template <typename T>
class TypeWithoutFormatter<T, kProtobuf> {
 public:
  static void PrintValue(const T& value, ::std::ostream* os) {
    const ::testing::internal::string short_str = value.ShortDebugString();
    const ::testing::internal::string pretty_str =
        short_str.length() <= kProtobufOneLinerMaxLength ?
        short_str : ("\n" + value.DebugString());
    *os << ("<" + pretty_str + ">");
  }
};

template <typename T>
class TypeWithoutFormatter<T, kConvertibleToInteger> {
 public:
  // Since T has no << operator or PrintTo() but can be implicitly
  // converted to BiggestInt, we print it as a BiggestInt.
  //
  // Most likely T is an enum type (either named or unnamed), in which
  // case printing it as an integer is the desired behavior.  In case
  // T is not an enum, printing it as an integer is the best we can do
  // given that it has no user-defined printer.
  static void PrintValue(const T& value, ::std::ostream* os) {
    const internal::BiggestInt kBigInt = value;
    *os << kBigInt;
  }
};

// Prints the given value to the given ostream.  If the value is a
// protocol message, its debug string is printed; if it's an enum or
// of a type implicitly convertible to BiggestInt, it's printed as an
// integer; otherwise the bytes in the value are printed.  This is
// what UniversalPrinter<T>::Print() does when it knows nothing about
// type T and T has neither << operator nor PrintTo().
//
// A user can override this behavior for a class type Foo by defining
// a << operator in the namespace where Foo is defined.
//
// We put this operator in namespace 'internal2' instead of 'internal'
// to simplify the implementation, as much code in 'internal' needs to
// use << in STL, which would conflict with our own << were it defined
// in 'internal'.
//
// Note that this operator<< takes a generic std::basic_ostream<Char,
// CharTraits> type instead of the more restricted std::ostream.  If
// we define it to take an std::ostream instead, we'll get an
// "ambiguous overloads" compiler error when trying to print a type
// Foo that supports streaming to std::basic_ostream<Char,
// CharTraits>, as the compiler cannot tell whether
// operator<<(std::ostream&, const T&) or
// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
// specific.
template <typename Char, typename CharTraits, typename T>
::std::basic_ostream<Char, CharTraits>& operator<<(
    ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
  TypeWithoutFormatter<T,
      (internal::IsAProtocolMessage<T>::value ? kProtobuf :
       internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
       kConvertibleToInteger : kOtherType)>::PrintValue(x, &os);
  return os;
}

}  // namespace internal2
}  // namespace testing

// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
// magic needed for implementing UniversalPrinter won't work.
namespace testing_internal {

// Used to print a value that is not an STL-style container when the
// user doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
  // With the following statement, during unqualified name lookup,
  // testing::internal2::operator<< appears as if it was declared in
  // the nearest enclosing namespace that contains both
  // ::testing_internal and ::testing::internal2, i.e. the global
  // namespace.  For more details, refer to the C++ Standard section
  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
  // testing::internal2::operator<< in case T doesn't come with a <<
  // operator.
  //
  // We cannot write 'using ::testing::internal2::operator<<;', which
  // gcc 3.3 fails to compile due to a compiler bug.
  using namespace ::testing::internal2;  // NOLINT

  // Assuming T is defined in namespace foo, in the next statement,
  // the compiler will consider all of:
  //
  //   1. foo::operator<< (thanks to Koenig look-up),
  //   2. ::operator<< (as the current namespace is enclosed in ::),
  //   3. testing::internal2::operator<< (thanks to the using statement above).
  //
  // The operator<< whose type matches T best will be picked.
  //
  // We deliberately allow #2 to be a candidate, as sometimes it's
  // impossible to define #1 (e.g. when foo is ::std, defining
  // anything in it is undefined behavior unless you are a compiler
  // vendor.).
  *os << value;
}

}  // namespace testing_internal

namespace testing {
namespace internal {

// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
// value to the given ostream.  The caller must ensure that
// 'ostream_ptr' is not NULL, or the behavior is undefined.
//
// We define UniversalPrinter as a class template (as opposed to a
// function template), as we need to partially specialize it for
// reference types, which cannot be done with function templates.
template <typename T>
class UniversalPrinter;

template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os);

// Used to print an STL-style container when the user doesn't define
// a PrintTo() for it.
template <typename C>
void DefaultPrintTo(IsContainer /* dummy */,
                    false_type /* is not a pointer */,
                    const C& container, ::std::ostream* os) {
  const size_t kMaxCount = 32;  // The maximum number of elements to print.
  *os << '{';
  size_t count = 0;
  for (typename C::const_iterator it = container.begin();
       it != container.end(); ++it, ++count) {
    if (count > 0) {
      *os << ',';
      if (count == kMaxCount) {  // Enough has been printed.
        *os << " ...";
        break;
      }
    }
    *os << ' ';
    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
    // handle *it being a native array.
    internal::UniversalPrint(*it, os);
  }

  if (count > 0) {
    *os << ' ';
  }
  *os << '}';
}

// Used to print a pointer that is neither a char pointer nor a member
// pointer, when the user doesn't define PrintTo() for it.  (A member
// variable pointer or member function pointer doesn't really point to
// a location in the address space.  Their representation is
// implementation-defined.  Therefore they will be printed as raw
// bytes.)
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
                    true_type /* is a pointer */,
                    T* p, ::std::ostream* os) {
  if (p == NULL) {
    *os << "NULL";
  } else {
    // C++ doesn't allow casting from a function pointer to any object
    // pointer.
    //
    // IsTrue() silences warnings: "Condition is always true",
    // "unreachable code".
    if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
      // T is not a function type.  We just call << to print p,
      // relying on ADL to pick up user-defined << for their pointer
      // types, if any.
      *os << p;
    } else {
      // T is a function type, so '*os << p' doesn't do what we want
      // (it just prints p as bool).  We want to print p as a const
      // void*.  However, we cannot cast it to const void* directly,
      // even using reinterpret_cast, as earlier versions of gcc
      // (e.g. 3.4.5) cannot compile the cast when p is a function
      // pointer.  Casting to uintptr_t first solves the problem.
      *os << reinterpret_cast<const void*>(
          reinterpret_cast<uintptr_t>(p));
    }
  }
}

// Used to print a non-container, non-pointer value when the user
// doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
                    false_type /* is not a pointer */,
                    const T& value, ::std::ostream* os) {
  ::testing_internal::DefaultPrintNonContainerTo(value, os);
}

// Prints the given value using the << operator if it has one;
// otherwise prints the bytes in it.  This is what
// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
// or overloaded for type T.
//
// A user can override this behavior for a class type Foo by defining
// an overload of PrintTo() in the namespace where Foo is defined.  We
// give the user this option as sometimes defining a << operator for
// Foo is not desirable (e.g. the coding style may prevent doing it,
// or there is already a << operator but it doesn't do what the user
// wants).
template <typename T>
void PrintTo(const T& value, ::std::ostream* os) {
  // DefaultPrintTo() is overloaded.  The type of its first two
  // arguments determine which version will be picked.  If T is an
  // STL-style container, the version for container will be called; if
  // T is a pointer, the pointer version will be called; otherwise the
  // generic version will be called.
  //
  // Note that we check for container types here, prior to we check
  // for protocol message types in our operator<<.  The rationale is:
  //
  // For protocol messages, we want to give people a chance to
  // override Google Mock's format by defining a PrintTo() or
  // operator<<.  For STL containers, other formats can be
  // incompatible with Google Mock's format for the container
  // elements; therefore we check for container types here to ensure
  // that our format is used.
  //
  // The second argument of DefaultPrintTo() is needed to bypass a bug
  // in Symbian's C++ compiler that prevents it from picking the right
  // overload between:
  //
  //   PrintTo(const T& x, ...);
  //   PrintTo(T* x, ...);
  DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
}

// The following list of PrintTo() overloads tells
// UniversalPrinter<T>::Print() how to print standard types (built-in
// types, strings, plain arrays, and pointers).

// Overloads for various char types.
GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
inline void PrintTo(char c, ::std::ostream* os) {
  // When printing a plain char, we always treat it as unsigned.  This
  // way, the output won't be affected by whether the compiler thinks
  // char is signed or not.
  PrintTo(static_cast<unsigned char>(c), os);
}

// Overloads for other simple built-in types.
inline void PrintTo(bool x, ::std::ostream* os) {
  *os << (x ? "true" : "false");
}

// Overload for wchar_t type.
// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its decimal code (except for L'\0').
// The L'\0' char is printed as "L'\\0'". The decimal code is printed
// as signed integer when wchar_t is implemented by the compiler
// as a signed type and is printed as an unsigned integer when wchar_t
// is implemented as an unsigned type.
GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);

// Overloads for C strings.
GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
inline void PrintTo(char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const char*>(s), os);
}

// signed/unsigned char is often used for representing binary data, so
// we print pointers to it as void* to be safe.
inline void PrintTo(const signed char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(signed char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(unsigned char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}

// MSVC can be configured to define wchar_t as a typedef of unsigned
// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
// type.  When wchar_t is a typedef, defining an overload for const
// wchar_t* would cause unsigned short* be printed as a wide string,
// possibly causing invalid memory accesses.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Overloads for wide C strings
GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
inline void PrintTo(wchar_t* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
}
#endif

// Overload for C arrays.  Multi-dimensional arrays are printed
// properly.

// Prints the given number of elements in an array, without printing
// the curly braces.
template <typename T>
void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
  UniversalPrint(a[0], os);
  for (size_t i = 1; i != count; i++) {
    *os << ", ";
    UniversalPrint(a[i], os);
  }
}

// Overloads for ::string and ::std::string.
#if GTEST_HAS_GLOBAL_STRING
GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os);
inline void PrintTo(const ::string& s, ::std::ostream* os) {
  PrintStringTo(s, os);
}
#endif  // GTEST_HAS_GLOBAL_STRING

GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os);
inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
  PrintStringTo(s, os);
}

// Overloads for ::wstring and ::std::wstring.
#if GTEST_HAS_GLOBAL_WSTRING
GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os);
inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
  PrintWideStringTo(s, os);
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

#if GTEST_HAS_STD_WSTRING
GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os);
inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
  PrintWideStringTo(s, os);
}
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_TR1_TUPLE
// Overload for ::std::tr1::tuple.  Needed for printing function arguments,
// which are packed as tuples.

// Helper function for printing a tuple.  T must be instantiated with
// a tuple type.
template <typename T>
void PrintTupleTo(const T& t, ::std::ostream* os);

// Overloaded PrintTo() for tuples of various arities.  We support
// tuples of up-to 10 fields.  The following implementation works
// regardless of whether tr1::tuple is implemented using the
// non-standard variadic template feature or not.

inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1>
void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2>
void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
void PrintTo(
    const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
    ::std::ostream* os) {
  PrintTupleTo(t, os);
}
#endif  // GTEST_HAS_TR1_TUPLE

// Overload for std::pair.
template <typename T1, typename T2>
void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
  *os << '(';
  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
  // a reference type.  The same for printing value.second.
  UniversalPrinter<T1>::Print(value.first, os);
  *os << ", ";
  UniversalPrinter<T2>::Print(value.second, os);
  *os << ')';
}

// Implements printing a non-reference type T by letting the compiler
// pick the right overload of PrintTo() for T.
template <typename T>
class UniversalPrinter {
 public:
  // MSVC warns about adding const to a function type, so we want to
  // disable the warning.
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4180)  // Temporarily disables warning 4180.
#endif  // _MSC_VER

  // Note: we deliberately don't call this PrintTo(), as that name
  // conflicts with ::testing::internal::PrintTo in the body of the
  // function.
  static void Print(const T& value, ::std::ostream* os) {
    // By default, ::testing::internal::PrintTo() is used for printing
    // the value.
    //
    // Thanks to Koenig look-up, if T is a class and has its own
    // PrintTo() function defined in its namespace, that function will
    // be visible here.  Since it is more specific than the generic ones
    // in ::testing::internal, it will be picked by the compiler in the
    // following statement - exactly what we want.
    PrintTo(value, os);
  }

#ifdef _MSC_VER
# pragma warning(pop)           // Restores the warning state.
#endif  // _MSC_VER
};

// UniversalPrintArray(begin, len, os) prints an array of 'len'
// elements, starting at address 'begin'.
template <typename T>
void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
  if (len == 0) {
    *os << "{}";
  } else {
    *os << "{ ";
    const size_t kThreshold = 18;
    const size_t kChunkSize = 8;
    // If the array has more than kThreshold elements, we'll have to
    // omit some details by printing only the first and the last
    // kChunkSize elements.
    // TODO(wan@google.com): let the user control the threshold using a flag.
    if (len <= kThreshold) {
      PrintRawArrayTo(begin, len, os);
    } else {
      PrintRawArrayTo(begin, kChunkSize, os);
      *os << ", ..., ";
      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
    }
    *os << " }";
  }
}
// This overload prints a (const) char array compactly.
GTEST_API_ void UniversalPrintArray(const char* begin,
                                    size_t len,
                                    ::std::ostream* os);

// Implements printing an array type T[N].
template <typename T, size_t N>
class UniversalPrinter<T[N]> {
 public:
  // Prints the given array, omitting some elements when there are too
  // many.
  static void Print(const T (&a)[N], ::std::ostream* os) {
    UniversalPrintArray(a, N, os);
  }
};

// Implements printing a reference type T&.
template <typename T>
class UniversalPrinter<T&> {
 public:
  // MSVC warns about adding const to a function type, so we want to
  // disable the warning.
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4180)  // Temporarily disables warning 4180.
#endif  // _MSC_VER

  static void Print(const T& value, ::std::ostream* os) {
    // Prints the address of the value.  We use reinterpret_cast here
    // as static_cast doesn't compile when T is a function type.
    *os << "@" << reinterpret_cast<const void*>(&value) << " ";

    // Then prints the value itself.
    UniversalPrint(value, os);
  }

#ifdef _MSC_VER
# pragma warning(pop)           // Restores the warning state.
#endif  // _MSC_VER
};

// Prints a value tersely: for a reference type, the referenced value
// (but not the address) is printed; for a (const) char pointer, the
// NUL-terminated string (but not the pointer) is printed.
template <typename T>
void UniversalTersePrint(const T& value, ::std::ostream* os) {
  UniversalPrint(value, os);
}
inline void UniversalTersePrint(const char* str, ::std::ostream* os) {
  if (str == NULL) {
    *os << "NULL";
  } else {
    UniversalPrint(string(str), os);
  }
}
inline void UniversalTersePrint(char* str, ::std::ostream* os) {
  UniversalTersePrint(static_cast<const char*>(str), os);
}

// Prints a value using the type inferred by the compiler.  The
// difference between this and UniversalTersePrint() is that for a
// (const) char pointer, this prints both the pointer and the
// NUL-terminated string.
template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os) {
  UniversalPrinter<T>::Print(value, os);
}

#if GTEST_HAS_TR1_TUPLE
typedef ::std::vector<string> Strings;

// This helper template allows PrintTo() for tuples and
// UniversalTersePrintTupleFieldsToStrings() to be defined by
// induction on the number of tuple fields.  The idea is that
// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
// fields in tuple t, and can be defined in terms of
// TuplePrefixPrinter<N - 1>.

// The inductive case.
template <size_t N>
struct TuplePrefixPrinter {
  // Prints the first N fields of a tuple.
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
    TuplePrefixPrinter<N - 1>::PrintPrefixTo(t, os);
    *os << ", ";
    UniversalPrinter<typename ::std::tr1::tuple_element<N - 1, Tuple>::type>
        ::Print(::std::tr1::get<N - 1>(t), os);
  }

  // Tersely prints the first N fields of a tuple to a string vector,
  // one element for each field.
  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
    TuplePrefixPrinter<N - 1>::TersePrintPrefixToStrings(t, strings);
    ::std::stringstream ss;
    UniversalTersePrint(::std::tr1::get<N - 1>(t), &ss);
    strings->push_back(ss.str());
  }
};

// Base cases.
template <>
struct TuplePrefixPrinter<0> {
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}

  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
};
// We have to specialize the entire TuplePrefixPrinter<> class
// template here, even though the definition of
// TersePrintPrefixToStrings() is the same as the generic version, as
// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't
// support specializing a method template of a class template.
template <>
struct TuplePrefixPrinter<1> {
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
    UniversalPrinter<typename ::std::tr1::tuple_element<0, Tuple>::type>::
        Print(::std::tr1::get<0>(t), os);
  }

  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
    ::std::stringstream ss;
    UniversalTersePrint(::std::tr1::get<0>(t), &ss);
    strings->push_back(ss.str());
  }
};

// Helper function for printing a tuple.  T must be instantiated with
// a tuple type.
template <typename T>
void PrintTupleTo(const T& t, ::std::ostream* os) {
  *os << "(";
  TuplePrefixPrinter< ::std::tr1::tuple_size<T>::value>::
      PrintPrefixTo(t, os);
  *os << ")";
}

// Prints the fields of a tuple tersely to a string vector, one
// element for each field.  See the comment before
// UniversalTersePrint() for how we define "tersely".
template <typename Tuple>
Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
  Strings result;
  TuplePrefixPrinter< ::std::tr1::tuple_size<Tuple>::value>::
      TersePrintPrefixToStrings(value, &result);
  return result;
}
#endif  // GTEST_HAS_TR1_TUPLE

}  // namespace internal

template <typename T>
::std::string PrintToString(const T& value) {
  ::std::stringstream ss;
  internal::UniversalTersePrint(value, &ss);
  return ss.str();
}

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_

#if GTEST_HAS_PARAM_TEST

namespace testing {
namespace internal {

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Outputs a message explaining invalid registration of different
// fixture class for the same test case. This may happen when
// TEST_P macro is used to define two tests with the same name
// but in different namespaces.
GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
                                          const char* file, int line);

template <typename> class ParamGeneratorInterface;
template <typename> class ParamGenerator;

// Interface for iterating over elements provided by an implementation
// of ParamGeneratorInterface<T>.
template <typename T>
class ParamIteratorInterface {
 public:
  virtual ~ParamIteratorInterface() {}
  // A pointer to the base generator instance.
  // Used only for the purposes of iterator comparison
  // to make sure that two iterators belong to the same generator.
  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
  // Advances iterator to point to the next element
  // provided by the generator. The caller is responsible
  // for not calling Advance() on an iterator equal to
  // BaseGenerator()->End().
  virtual void Advance() = 0;
  // Clones the iterator object. Used for implementing copy semantics
  // of ParamIterator<T>.
  virtual ParamIteratorInterface* Clone() const = 0;
  // Dereferences the current iterator and provides (read-only) access
  // to the pointed value. It is the caller's responsibility not to call
  // Current() on an iterator equal to BaseGenerator()->End().
  // Used for implementing ParamGenerator<T>::operator*().
  virtual const T* Current() const = 0;
  // Determines whether the given iterator and other point to the same
  // element in the sequence generated by the generator.
  // Used for implementing ParamGenerator<T>::operator==().
  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
};

// Class iterating over elements provided by an implementation of
// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
// and implements the const forward iterator concept.
template <typename T>
class ParamIterator {
 public:
  typedef T value_type;
  typedef const T& reference;
  typedef ptrdiff_t difference_type;

  // ParamIterator assumes ownership of the impl_ pointer.
  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
  ParamIterator& operator=(const ParamIterator& other) {
    if (this != &other)
      impl_.reset(other.impl_->Clone());
    return *this;
  }

  const T& operator*() const { return *impl_->Current(); }
  const T* operator->() const { return impl_->Current(); }
  // Prefix version of operator++.
  ParamIterator& operator++() {
    impl_->Advance();
    return *this;
  }
  // Postfix version of operator++.
  ParamIterator operator++(int /*unused*/) {
    ParamIteratorInterface<T>* clone = impl_->Clone();
    impl_->Advance();
    return ParamIterator(clone);
  }
  bool operator==(const ParamIterator& other) const {
    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
  }
  bool operator!=(const ParamIterator& other) const {
    return !(*this == other);
  }

 private:
  friend class ParamGenerator<T>;
  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
  scoped_ptr<ParamIteratorInterface<T> > impl_;
};

// ParamGeneratorInterface<T> is the binary interface to access generators
// defined in other translation units.
template <typename T>
class ParamGeneratorInterface {
 public:
  typedef T ParamType;

  virtual ~ParamGeneratorInterface() {}

  // Generator interface definition
  virtual ParamIteratorInterface<T>* Begin() const = 0;
  virtual ParamIteratorInterface<T>* End() const = 0;
};

// Wraps ParamGeneratorInterface<T> and provides general generator syntax
// compatible with the STL Container concept.
// This class implements copy initialization semantics and the contained
// ParamGeneratorInterface<T> instance is shared among all copies
// of the original object. This is possible because that instance is immutable.
template<typename T>
class ParamGenerator {
 public:
  typedef ParamIterator<T> iterator;

  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}

  ParamGenerator& operator=(const ParamGenerator& other) {
    impl_ = other.impl_;
    return *this;
  }

  iterator begin() const { return iterator(impl_->Begin()); }
  iterator end() const { return iterator(impl_->End()); }

 private:
  linked_ptr<const ParamGeneratorInterface<T> > impl_;
};

// Generates values from a range of two comparable values. Can be used to
// generate sequences of user-defined types that implement operator+() and
// operator<().
// This class is used in the Range() function.
template <typename T, typename IncrementT>
class RangeGenerator : public ParamGeneratorInterface<T> {
 public:
  RangeGenerator(T begin, T end, IncrementT step)
      : begin_(begin), end_(end),
        step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
  virtual ~RangeGenerator() {}

  virtual ParamIteratorInterface<T>* Begin() const {
    return new Iterator(this, begin_, 0, step_);
  }
  virtual ParamIteratorInterface<T>* End() const {
    return new Iterator(this, end_, end_index_, step_);
  }

 private:
  class Iterator : public ParamIteratorInterface<T> {
   public:
    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
             IncrementT step)
        : base_(base), value_(value), index_(index), step_(step) {}
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
      return base_;
    }
    virtual void Advance() {
      value_ = value_ + step_;
      index_++;
    }
    virtual ParamIteratorInterface<T>* Clone() const {
      return new Iterator(*this);
    }
    virtual const T* Current() const { return &value_; }
    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const int other_index =
          CheckedDowncastToActualType<const Iterator>(&other)->index_;
      return index_ == other_index;
    }

   private:
    Iterator(const Iterator& other)
        : ParamIteratorInterface<T>(),
          base_(other.base_), value_(other.value_), index_(other.index_),
          step_(other.step_) {}

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<T>* const base_;
    T value_;
    int index_;
    const IncrementT step_;
  };  // class RangeGenerator::Iterator

  static int CalculateEndIndex(const T& begin,
                               const T& end,
                               const IncrementT& step) {
    int end_index = 0;
    for (T i = begin; i < end; i = i + step)
      end_index++;
    return end_index;
  }

  // No implementation - assignment is unsupported.
  void operator=(const RangeGenerator& other);

  const T begin_;
  const T end_;
  const IncrementT step_;
  // The index for the end() iterator. All the elements in the generated
  // sequence are indexed (0-based) to aid iterator comparison.
  const int end_index_;
};  // class RangeGenerator


// Generates values from a pair of STL-style iterators. Used in the
// ValuesIn() function. The elements are copied from the source range
// since the source can be located on the stack, and the generator
// is likely to persist beyond that stack frame.
template <typename T>
class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
 public:
  template <typename ForwardIterator>
  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
      : container_(begin, end) {}
  virtual ~ValuesInIteratorRangeGenerator() {}

  virtual ParamIteratorInterface<T>* Begin() const {
    return new Iterator(this, container_.begin());
  }
  virtual ParamIteratorInterface<T>* End() const {
    return new Iterator(this, container_.end());
  }

 private:
  typedef typename ::std::vector<T> ContainerType;

  class Iterator : public ParamIteratorInterface<T> {
   public:
    Iterator(const ParamGeneratorInterface<T>* base,
             typename ContainerType::const_iterator iterator)
        : base_(base), iterator_(iterator) {}
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
      return base_;
    }
    virtual void Advance() {
      ++iterator_;
      value_.reset();
    }
    virtual ParamIteratorInterface<T>* Clone() const {
      return new Iterator(*this);
    }
    // We need to use cached value referenced by iterator_ because *iterator_
    // can return a temporary object (and of type other then T), so just
    // having "return &*iterator_;" doesn't work.
    // value_ is updated here and not in Advance() because Advance()
    // can advance iterator_ beyond the end of the range, and we cannot
    // detect that fact. The client code, on the other hand, is
    // responsible for not calling Current() on an out-of-range iterator.
    virtual const T* Current() const {
      if (value_.get() == NULL)
        value_.reset(new T(*iterator_));
      return value_.get();
    }
    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      return iterator_ ==
          CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
    }

   private:
    Iterator(const Iterator& other)
          // The explicit constructor call suppresses a false warning
          // emitted by gcc when supplied with the -Wextra option.
        : ParamIteratorInterface<T>(),
          base_(other.base_),
          iterator_(other.iterator_) {}

    const ParamGeneratorInterface<T>* const base_;
    typename ContainerType::const_iterator iterator_;
    // A cached value of *iterator_. We keep it here to allow access by
    // pointer in the wrapping iterator's operator->().
    // value_ needs to be mutable to be accessed in Current().
    // Use of scoped_ptr helps manage cached value's lifetime,
    // which is bound by the lifespan of the iterator itself.
    mutable scoped_ptr<const T> value_;
  };  // class ValuesInIteratorRangeGenerator::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const ValuesInIteratorRangeGenerator& other);

  const ContainerType container_;
};  // class ValuesInIteratorRangeGenerator

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Stores a parameter value and later creates tests parameterized with that
// value.
template <class TestClass>
class ParameterizedTestFactory : public TestFactoryBase {
 public:
  typedef typename TestClass::ParamType ParamType;
  explicit ParameterizedTestFactory(ParamType parameter) :
      parameter_(parameter) {}
  virtual Test* CreateTest() {
    TestClass::SetParam(&parameter_);
    return new TestClass();
  }

 private:
  const ParamType parameter_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactoryBase is a base class for meta-factories that create
// test factories for passing into MakeAndRegisterTestInfo function.
template <class ParamType>
class TestMetaFactoryBase {
 public:
  virtual ~TestMetaFactoryBase() {}

  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactory creates test factories for passing into
// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
// ownership of test factory pointer, same factory object cannot be passed
// into that method twice. But ParameterizedTestCaseInfo is going to call
// it for each Test/Parameter value combination. Thus it needs meta factory
// creator class.
template <class TestCase>
class TestMetaFactory
    : public TestMetaFactoryBase<typename TestCase::ParamType> {
 public:
  typedef typename TestCase::ParamType ParamType;

  TestMetaFactory() {}

  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
    return new ParameterizedTestFactory<TestCase>(parameter);
  }

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfoBase is a generic interface
// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
// accumulates test information provided by TEST_P macro invocations
// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
// and uses that information to register all resulting test instances
// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
// a collection of pointers to the ParameterizedTestCaseInfo objects
// and calls RegisterTests() on each of them when asked.
class ParameterizedTestCaseInfoBase {
 public:
  virtual ~ParameterizedTestCaseInfoBase() {}

  // Base part of test case name for display purposes.
  virtual const string& GetTestCaseName() const = 0;
  // Test case id to verify identity.
  virtual TypeId GetTestCaseTypeId() const = 0;
  // UnitTest class invokes this method to register tests in this
  // test case right before running them in RUN_ALL_TESTS macro.
  // This method should not be called more then once on any single
  // instance of a ParameterizedTestCaseInfoBase derived class.
  virtual void RegisterTests() = 0;

 protected:
  ParameterizedTestCaseInfoBase() {}

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
// macro invocations for a particular test case and generators
// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
// test case. It registers tests with all values generated by all
// generators when asked.
template <class TestCase>
class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
 public:
  // ParamType and GeneratorCreationFunc are private types but are required
  // for declarations of public methods AddTestPattern() and
  // AddTestCaseInstantiation().
  typedef typename TestCase::ParamType ParamType;
  // A function that returns an instance of appropriate generator type.
  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();

  explicit ParameterizedTestCaseInfo(const char* name)
      : test_case_name_(name) {}

  // Test case base name for display purposes.
  virtual const string& GetTestCaseName() const { return test_case_name_; }
  // Test case id to verify identity.
  virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
  // TEST_P macro uses AddTestPattern() to record information
  // about a single test in a LocalTestInfo structure.
  // test_case_name is the base name of the test case (without invocation
  // prefix). test_base_name is the name of an individual test without
  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
  // test case base name and DoBar is test base name.
  void AddTestPattern(const char* test_case_name,
                      const char* test_base_name,
                      TestMetaFactoryBase<ParamType>* meta_factory) {
    tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
                                                       test_base_name,
                                                       meta_factory)));
  }
  // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
  // about a generator.
  int AddTestCaseInstantiation(const string& instantiation_name,
                               GeneratorCreationFunc* func,
                               const char* /* file */,
                               int /* line */) {
    instantiations_.push_back(::std::make_pair(instantiation_name, func));
    return 0;  // Return value used only to run this method in namespace scope.
  }
  // UnitTest class invokes this method to register tests in this test case
  // test cases right before running tests in RUN_ALL_TESTS macro.
  // This method should not be called more then once on any single
  // instance of a ParameterizedTestCaseInfoBase derived class.
  // UnitTest has a guard to prevent from calling this method more then once.
  virtual void RegisterTests() {
    for (typename TestInfoContainer::iterator test_it = tests_.begin();
         test_it != tests_.end(); ++test_it) {
      linked_ptr<TestInfo> test_info = *test_it;
      for (typename InstantiationContainer::iterator gen_it =
               instantiations_.begin(); gen_it != instantiations_.end();
               ++gen_it) {
        const string& instantiation_name = gen_it->first;
        ParamGenerator<ParamType> generator((*gen_it->second)());

        Message test_case_name_stream;
        if ( !instantiation_name.empty() )
          test_case_name_stream << instantiation_name << "/";
        test_case_name_stream << test_info->test_case_base_name;

        int i = 0;
        for (typename ParamGenerator<ParamType>::iterator param_it =
                 generator.begin();
             param_it != generator.end(); ++param_it, ++i) {
          Message test_name_stream;
          test_name_stream << test_info->test_base_name << "/" << i;
          MakeAndRegisterTestInfo(
              test_case_name_stream.GetString().c_str(),
              test_name_stream.GetString().c_str(),
              NULL,  // No type parameter.
              PrintToString(*param_it).c_str(),
              GetTestCaseTypeId(),
              TestCase::SetUpTestCase,
              TestCase::TearDownTestCase,
              test_info->test_meta_factory->CreateTestFactory(*param_it));
        }  // for param_it
      }  // for gen_it
    }  // for test_it
  }  // RegisterTests

 private:
  // LocalTestInfo structure keeps information about a single test registered
  // with TEST_P macro.
  struct TestInfo {
    TestInfo(const char* a_test_case_base_name,
             const char* a_test_base_name,
             TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
        test_case_base_name(a_test_case_base_name),
        test_base_name(a_test_base_name),
        test_meta_factory(a_test_meta_factory) {}

    const string test_case_base_name;
    const string test_base_name;
    const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
  };
  typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
  // Keeps pairs of <Instantiation name, Sequence generator creation function>
  // received from INSTANTIATE_TEST_CASE_P macros.
  typedef ::std::vector<std::pair<string, GeneratorCreationFunc*> >
      InstantiationContainer;

  const string test_case_name_;
  TestInfoContainer tests_;
  InstantiationContainer instantiations_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
};  // class ParameterizedTestCaseInfo

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
// macros use it to locate their corresponding ParameterizedTestCaseInfo
// descriptors.
class ParameterizedTestCaseRegistry {
 public:
  ParameterizedTestCaseRegistry() {}
  ~ParameterizedTestCaseRegistry() {
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      delete *it;
    }
  }

  // Looks up or creates and returns a structure containing information about
  // tests and instantiations of a particular test case.
  template <class TestCase>
  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
      const char* test_case_name,
      const char* file,
      int line) {
    ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      if ((*it)->GetTestCaseName() == test_case_name) {
        if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
          // Complain about incorrect usage of Google Test facilities
          // and terminate the program since we cannot guaranty correct
          // test case setup and tear-down in this case.
          ReportInvalidTestCaseType(test_case_name,  file, line);
          posix::Abort();
        } else {
          // At this point we are sure that the object we found is of the same
          // type we are looking for, so we downcast it to that type
          // without further checks.
          typed_test_info = CheckedDowncastToActualType<
              ParameterizedTestCaseInfo<TestCase> >(*it);
        }
        break;
      }
    }
    if (typed_test_info == NULL) {
      typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name);
      test_case_infos_.push_back(typed_test_info);
    }
    return typed_test_info;
  }
  void RegisterTests() {
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      (*it)->RegisterTests();
    }
  }

 private:
  typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;

  TestCaseInfoContainer test_case_infos_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
};

}  // namespace internal
}  // namespace testing

#endif  //  GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
// This file was GENERATED by command:
//     pump.py gtest-param-util-generated.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)

// Type and function utilities for implementing parameterized tests.
// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently Google Test supports at most 50 arguments in Values,
// and at most 10 arguments in Combine. Please contact
// googletestframework@googlegroups.com if you need more.
// Please note that the number of arguments to Combine is limited
// by the maximum arity of the implementation of tr1::tuple which is
// currently set at 10.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Forward declarations of ValuesIn(), which is implemented in
// include/gtest/gtest-param-test.h.
template <typename ForwardIterator>
internal::ParamGenerator<
  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
ValuesIn(ForwardIterator begin, ForwardIterator end);

template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);

template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
    const Container& container);

namespace internal {

// Used in the Values() function to provide polymorphic capabilities.
template <typename T1>
class ValueArray1 {
 public:
  explicit ValueArray1(T1 v1) : v1_(v1) {}

  template <typename T>
  operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray1& other);

  const T1 v1_;
};

template <typename T1, typename T2>
class ValueArray2 {
 public:
  ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray2& other);

  const T1 v1_;
  const T2 v2_;
};

template <typename T1, typename T2, typename T3>
class ValueArray3 {
 public:
  ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray3& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
};

template <typename T1, typename T2, typename T3, typename T4>
class ValueArray4 {
 public:
  ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray4& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5>
class ValueArray5 {
 public:
  ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray5& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
class ValueArray6 {
 public:
  ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray6& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
class ValueArray7 {
 public:
  ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray7& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
class ValueArray8 {
 public:
  ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
      T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray8& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
class ValueArray9 {
 public:
  ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
      T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray9& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
class ValueArray10 {
 public:
  ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray10& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
class ValueArray11 {
 public:
  ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray11& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
class ValueArray12 {
 public:
  ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray12& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
class ValueArray13 {
 public:
  ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray13& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
class ValueArray14 {
 public:
  ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray14& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
class ValueArray15 {
 public:
  ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray15& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
class ValueArray16 {
 public:
  ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray16& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
class ValueArray17 {
 public:
  ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
      T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray17& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
class ValueArray18 {
 public:
  ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray18& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
class ValueArray19 {
 public:
  ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray19& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
class ValueArray20 {
 public:
  ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray20& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
class ValueArray21 {
 public:
  ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray21& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
class ValueArray22 {
 public:
  ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray22& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
class ValueArray23 {
 public:
  ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_,
        v23_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray23& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
class ValueArray24 {
 public:
  ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray24& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
class ValueArray25 {
 public:
  ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
      T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray25& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
class ValueArray26 {
 public:
  ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray26& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
class ValueArray27 {
 public:
  ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
      v26_(v26), v27_(v27) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray27& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
class ValueArray28 {
 public:
  ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
      v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray28& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
class ValueArray29 {
 public:
  ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray29& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
class ValueArray30 {
 public:
  ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray30& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
class ValueArray31 {
 public:
  ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray31& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
class ValueArray32 {
 public:
  ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray32& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
class ValueArray33 {
 public:
  ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
      T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray33& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
class ValueArray34 {
 public:
  ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray34& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
class ValueArray35 {
 public:
  ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
      v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_,
        v35_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray35& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
class ValueArray36 {
 public:
  ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray36& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
class ValueArray37 {
 public:
  ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
      v36_(v36), v37_(v37) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray37& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
class ValueArray38 {
 public:
  ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray38& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
class ValueArray39 {
 public:
  ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray39& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
class ValueArray40 {
 public:
  ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
      v40_(v40) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray40& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
class ValueArray41 {
 public:
  ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
      T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray41& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
class ValueArray42 {
 public:
  ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray42& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
class ValueArray43 {
 public:
  ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
      v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
      v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray43& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
class ValueArray44 {
 public:
  ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
      v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
      v43_(v43), v44_(v44) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray44& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
class ValueArray45 {
 public:
  ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
      v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
      v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray45& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
class ValueArray46 {
 public:
  ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray46& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
class ValueArray47 {
 public:
  ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
      v47_(v47) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_,
        v47_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray47& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
class ValueArray48 {
 public:
  ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
      v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
      v46_(v46), v47_(v47), v48_(v48) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_,
        v48_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray48& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
class ValueArray49 {
 public:
  ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
      T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_,
        v48_, v49_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray49& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
  const T49 v49_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
class ValueArray50 {
 public:
  ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
      T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_,
        v48_, v49_, v50_};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray50& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
  const T49 v49_;
  const T50 v50_;
};

# if GTEST_HAS_COMBINE
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Generates values from the Cartesian product of values produced
// by the argument generators.
//
template <typename T1, typename T2>
class CartesianProductGenerator2
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2> > {
 public:
  typedef ::std::tr1::tuple<T1, T2> ParamType;

  CartesianProductGenerator2(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2)
      : g1_(g1), g2_(g2) {}
  virtual ~CartesianProductGenerator2() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current2_;
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    ParamType current_value_;
  };  // class CartesianProductGenerator2::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator2& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
};  // class CartesianProductGenerator2


template <typename T1, typename T2, typename T3>
class CartesianProductGenerator3
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3> ParamType;

  CartesianProductGenerator3(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
      : g1_(g1), g2_(g2), g3_(g3) {}
  virtual ~CartesianProductGenerator3() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current3_;
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    ParamType current_value_;
  };  // class CartesianProductGenerator3::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator3& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
};  // class CartesianProductGenerator3


template <typename T1, typename T2, typename T3, typename T4>
class CartesianProductGenerator4
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4> ParamType;

  CartesianProductGenerator4(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
  virtual ~CartesianProductGenerator4() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current4_;
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    ParamType current_value_;
  };  // class CartesianProductGenerator4::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator4& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
};  // class CartesianProductGenerator4


template <typename T1, typename T2, typename T3, typename T4, typename T5>
class CartesianProductGenerator5
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5> ParamType;

  CartesianProductGenerator5(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
  virtual ~CartesianProductGenerator5() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current5_;
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    ParamType current_value_;
  };  // class CartesianProductGenerator5::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator5& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
};  // class CartesianProductGenerator5


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
class CartesianProductGenerator6
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5,
        T6> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> ParamType;

  CartesianProductGenerator6(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
  virtual ~CartesianProductGenerator6() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current6_;
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    ParamType current_value_;
  };  // class CartesianProductGenerator6::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator6& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
};  // class CartesianProductGenerator6


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
class CartesianProductGenerator7
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
        T7> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;

  CartesianProductGenerator7(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
  virtual ~CartesianProductGenerator7() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current7_;
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    ParamType current_value_;
  };  // class CartesianProductGenerator7::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator7& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
};  // class CartesianProductGenerator7


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
class CartesianProductGenerator8
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
        T7, T8> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;

  CartesianProductGenerator8(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
      const ParamGenerator<T8>& g8)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
          g8_(g8) {}
  virtual ~CartesianProductGenerator8() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin(), g8_, g8_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
        g8_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7,
      const ParamGenerator<T8>& g8,
      const typename ParamGenerator<T8>::iterator& current8)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
          begin8_(g8.begin()), end8_(g8.end()), current8_(current8)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current8_;
      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_ &&
          current8_ == typed_other->current8_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_, *current8_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_ ||
          current8_ == end8_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    ParamType current_value_;
  };  // class CartesianProductGenerator8::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator8& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
};  // class CartesianProductGenerator8


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
class CartesianProductGenerator9
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
        T7, T8, T9> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;

  CartesianProductGenerator9(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9) {}
  virtual ~CartesianProductGenerator9() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
        g8_.end(), g9_, g9_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7,
      const ParamGenerator<T8>& g8,
      const typename ParamGenerator<T8>::iterator& current8,
      const ParamGenerator<T9>& g9,
      const typename ParamGenerator<T9>::iterator& current9)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
          begin9_(g9.begin()), end9_(g9.end()), current9_(current9)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current9_;
      if (current9_ == end9_) {
        current9_ = begin9_;
        ++current8_;
      }
      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_ &&
          current8_ == typed_other->current8_ &&
          current9_ == typed_other->current9_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_),
        begin9_(other.begin9_),
        end9_(other.end9_),
        current9_(other.current9_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_, *current8_,
            *current9_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_ ||
          current8_ == end8_ ||
          current9_ == end9_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    const typename ParamGenerator<T9>::iterator begin9_;
    const typename ParamGenerator<T9>::iterator end9_;
    typename ParamGenerator<T9>::iterator current9_;
    ParamType current_value_;
  };  // class CartesianProductGenerator9::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator9& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
  const ParamGenerator<T9> g9_;
};  // class CartesianProductGenerator9


template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
class CartesianProductGenerator10
    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
        T7, T8, T9, T10> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;

  CartesianProductGenerator10(const ParamGenerator<T1>& g1,
      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
      const ParamGenerator<T10>& g10)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9), g10_(g10) {}
  virtual ~CartesianProductGenerator10() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
        g8_.end(), g9_, g9_.end(), g10_, g10_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
      const ParamGenerator<T1>& g1,
      const typename ParamGenerator<T1>::iterator& current1,
      const ParamGenerator<T2>& g2,
      const typename ParamGenerator<T2>::iterator& current2,
      const ParamGenerator<T3>& g3,
      const typename ParamGenerator<T3>::iterator& current3,
      const ParamGenerator<T4>& g4,
      const typename ParamGenerator<T4>::iterator& current4,
      const ParamGenerator<T5>& g5,
      const typename ParamGenerator<T5>::iterator& current5,
      const ParamGenerator<T6>& g6,
      const typename ParamGenerator<T6>::iterator& current6,
      const ParamGenerator<T7>& g7,
      const typename ParamGenerator<T7>::iterator& current7,
      const ParamGenerator<T8>& g8,
      const typename ParamGenerator<T8>::iterator& current8,
      const ParamGenerator<T9>& g9,
      const typename ParamGenerator<T9>::iterator& current9,
      const ParamGenerator<T10>& g10,
      const typename ParamGenerator<T10>::iterator& current10)
        : base_(base),
          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
          begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
          begin10_(g10.begin()), end10_(g10.end()), current10_(current10)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current10_;
      if (current10_ == end10_) {
        current10_ = begin10_;
        ++current9_;
      }
      if (current9_ == end9_) {
        current9_ = begin9_;
        ++current8_;
      }
      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }
      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }
      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }
      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }
      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }
      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }
      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         (
          current1_ == typed_other->current1_ &&
          current2_ == typed_other->current2_ &&
          current3_ == typed_other->current3_ &&
          current4_ == typed_other->current4_ &&
          current5_ == typed_other->current5_ &&
          current6_ == typed_other->current6_ &&
          current7_ == typed_other->current7_ &&
          current8_ == typed_other->current8_ &&
          current9_ == typed_other->current9_ &&
          current10_ == typed_other->current10_);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_),
        begin9_(other.begin9_),
        end9_(other.end9_),
        current9_(other.current9_),
        begin10_(other.begin10_),
        end10_(other.end10_),
        current10_(other.current10_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
            *current4_, *current5_, *current6_, *current7_, *current8_,
            *current9_, *current10_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
          current1_ == end1_ ||
          current2_ == end2_ ||
          current3_ == end3_ ||
          current4_ == end4_ ||
          current5_ == end5_ ||
          current6_ == end6_ ||
          current7_ == end7_ ||
          current8_ == end8_ ||
          current9_ == end9_ ||
          current10_ == end10_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    const typename ParamGenerator<T9>::iterator begin9_;
    const typename ParamGenerator<T9>::iterator end9_;
    typename ParamGenerator<T9>::iterator current9_;
    const typename ParamGenerator<T10>::iterator begin10_;
    const typename ParamGenerator<T10>::iterator end10_;
    typename ParamGenerator<T10>::iterator current10_;
    ParamType current_value_;
  };  // class CartesianProductGenerator10::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator10& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
  const ParamGenerator<T9> g9_;
  const ParamGenerator<T10> g10_;
};  // class CartesianProductGenerator10


// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Helper classes providing Combine() with polymorphic features. They allow
// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
// convertible to U.
//
template <class Generator1, class Generator2>
class CartesianProductHolder2 {
 public:
CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
      : g1_(g1), g2_(g2) {}
  template <typename T1, typename T2>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2> >(
        new CartesianProductGenerator2<T1, T2>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder2& other);

  const Generator1 g1_;
  const Generator2 g2_;
};  // class CartesianProductHolder2

template <class Generator1, class Generator2, class Generator3>
class CartesianProductHolder3 {
 public:
CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
    const Generator3& g3)
      : g1_(g1), g2_(g2), g3_(g3) {}
  template <typename T1, typename T2, typename T3>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >(
        new CartesianProductGenerator3<T1, T2, T3>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder3& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
};  // class CartesianProductHolder3

template <class Generator1, class Generator2, class Generator3,
    class Generator4>
class CartesianProductHolder4 {
 public:
CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
  template <typename T1, typename T2, typename T3, typename T4>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >(
        new CartesianProductGenerator4<T1, T2, T3, T4>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder4& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
};  // class CartesianProductHolder4

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5>
class CartesianProductHolder5 {
 public:
CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >(
        new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder5& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
};  // class CartesianProductHolder5

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6>
class CartesianProductHolder6 {
 public:
CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >(
        new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder6& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
};  // class CartesianProductHolder6

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7>
class CartesianProductHolder7 {
 public:
CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
      T7> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> >(
        new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder7& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
};  // class CartesianProductHolder7

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7,
    class Generator8>
class CartesianProductHolder8 {
 public:
CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7, const Generator8& g8)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
          g8_(g8) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7, typename T8>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7,
      T8> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
        new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_),
        static_cast<ParamGenerator<T8> >(g8_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder8& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
};  // class CartesianProductHolder8

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7,
    class Generator8, class Generator9>
class CartesianProductHolder9 {
 public:
CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7, const Generator8& g8,
    const Generator9& g9)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7, typename T8, typename T9>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
      T9> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
        T9> >(
        new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_),
        static_cast<ParamGenerator<T8> >(g8_),
        static_cast<ParamGenerator<T9> >(g9_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder9& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
  const Generator9 g9_;
};  // class CartesianProductHolder9

template <class Generator1, class Generator2, class Generator3,
    class Generator4, class Generator5, class Generator6, class Generator7,
    class Generator8, class Generator9, class Generator10>
class CartesianProductHolder10 {
 public:
CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
    const Generator3& g3, const Generator4& g4, const Generator5& g5,
    const Generator6& g6, const Generator7& g7, const Generator8& g8,
    const Generator9& g9, const Generator10& g10)
      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
          g9_(g9), g10_(g10) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
      typename T6, typename T7, typename T8, typename T9, typename T10>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
      T9, T10> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
        T9, T10> >(
        new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
            T10>(
        static_cast<ParamGenerator<T1> >(g1_),
        static_cast<ParamGenerator<T2> >(g2_),
        static_cast<ParamGenerator<T3> >(g3_),
        static_cast<ParamGenerator<T4> >(g4_),
        static_cast<ParamGenerator<T5> >(g5_),
        static_cast<ParamGenerator<T6> >(g6_),
        static_cast<ParamGenerator<T7> >(g7_),
        static_cast<ParamGenerator<T8> >(g8_),
        static_cast<ParamGenerator<T9> >(g9_),
        static_cast<ParamGenerator<T10> >(g10_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder10& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
  const Generator9 g9_;
  const Generator10 g10_;
};  // class CartesianProductHolder10

# endif  // GTEST_HAS_COMBINE

}  // namespace internal
}  // namespace testing

#endif  //  GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Functions producing parameter generators.
//
// Google Test uses these generators to produce parameters for value-
// parameterized tests. When a parameterized test case is instantiated
// with a particular generator, Google Test creates and runs tests
// for each element in the sequence produced by the generator.
//
// In the following sample, tests from test case FooTest are instantiated
// each three times with parameter values 3, 5, and 8:
//
// class FooTest : public TestWithParam<int> { ... };
//
// TEST_P(FooTest, TestThis) {
// }
// TEST_P(FooTest, TestThat) {
// }
// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
//

// Range() returns generators providing sequences of values in a range.
//
// Synopsis:
// Range(start, end)
//   - returns a generator producing a sequence of values {start, start+1,
//     start+2, ..., }.
// Range(start, end, step)
//   - returns a generator producing a sequence of values {start, start+step,
//     start+step+step, ..., }.
// Notes:
//   * The generated sequences never include end. For example, Range(1, 5)
//     returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
//     returns a generator producing {1, 3, 5, 7}.
//   * start and end must have the same type. That type may be any integral or
//     floating-point type or a user defined type satisfying these conditions:
//     * It must be assignable (have operator=() defined).
//     * It must have operator+() (operator+(int-compatible type) for
//       two-operand version).
//     * It must have operator<() defined.
//     Elements in the resulting sequences will also have that type.
//   * Condition start < end must be satisfied in order for resulting sequences
//     to contain any elements.
//
template <typename T, typename IncrementT>
internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
  return internal::ParamGenerator<T>(
      new internal::RangeGenerator<T, IncrementT>(start, end, step));
}

template <typename T>
internal::ParamGenerator<T> Range(T start, T end) {
  return Range(start, end, 1);
}

// ValuesIn() function allows generation of tests with parameters coming from
// a container.
//
// Synopsis:
// ValuesIn(const T (&array)[N])
//   - returns a generator producing sequences with elements from
//     a C-style array.
// ValuesIn(const Container& container)
//   - returns a generator producing sequences with elements from
//     an STL-style container.
// ValuesIn(Iterator begin, Iterator end)
//   - returns a generator producing sequences with elements from
//     a range [begin, end) defined by a pair of STL-style iterators. These
//     iterators can also be plain C pointers.
//
// Please note that ValuesIn copies the values from the containers
// passed in and keeps them to generate tests in RUN_ALL_TESTS().
//
// Examples:
//
// This instantiates tests from test case StringTest
// each with C-string values of "foo", "bar", and "baz":
//
// const char* strings[] = {"foo", "bar", "baz"};
// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
//
// This instantiates tests from test case StlStringTest
// each with STL strings with values "a" and "b":
//
// ::std::vector< ::std::string> GetParameterStrings() {
//   ::std::vector< ::std::string> v;
//   v.push_back("a");
//   v.push_back("b");
//   return v;
// }
//
// INSTANTIATE_TEST_CASE_P(CharSequence,
//                         StlStringTest,
//                         ValuesIn(GetParameterStrings()));
//
//
// This will also instantiate tests from CharTest
// each with parameter values 'a' and 'b':
//
// ::std::list<char> GetParameterChars() {
//   ::std::list<char> list;
//   list.push_back('a');
//   list.push_back('b');
//   return list;
// }
// ::std::list<char> l = GetParameterChars();
// INSTANTIATE_TEST_CASE_P(CharSequence2,
//                         CharTest,
//                         ValuesIn(l.begin(), l.end()));
//
template <typename ForwardIterator>
internal::ParamGenerator<
  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
ValuesIn(ForwardIterator begin, ForwardIterator end) {
  typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
      ::value_type ParamType;
  return internal::ParamGenerator<ParamType>(
      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
}

template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
  return ValuesIn(array, array + N);
}

template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
    const Container& container) {
  return ValuesIn(container.begin(), container.end());
}

// Values() allows generating tests from explicitly specified list of
// parameters.
//
// Synopsis:
// Values(T v1, T v2, ..., T vN)
//   - returns a generator producing sequences with elements v1, v2, ..., vN.
//
// For example, this instantiates tests from test case BarTest each
// with values "one", "two", and "three":
//
// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
//
// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
// The exact type of values will depend on the type of parameter in BazTest.
//
// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
//
// Currently, Values() supports from 1 to 50 parameters.
//
template <typename T1>
internal::ValueArray1<T1> Values(T1 v1) {
  return internal::ValueArray1<T1>(v1);
}

template <typename T1, typename T2>
internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
  return internal::ValueArray2<T1, T2>(v1, v2);
}

template <typename T1, typename T2, typename T3>
internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
  return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
}

template <typename T1, typename T2, typename T3, typename T4>
internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
  return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5>
internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5) {
  return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6>
internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6) {
  return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7>
internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6, T7 v7) {
  return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
      v6, v7);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8>
internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
  return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
      v5, v6, v7, v8);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9>
internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
  return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
      v4, v5, v6, v7, v8, v9);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10>
internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
  return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
      v2, v3, v4, v5, v6, v7, v8, v9, v10);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11>
internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
    T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11) {
  return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
      T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12>
internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
    T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12) {
  return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13>
internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
    T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13) {
  return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14>
internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
  return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
      v14);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15>
internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
  return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
      v13, v14, v15);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16>
internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16) {
  return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
      v12, v13, v14, v15, v16);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17>
internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17) {
  return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
      v11, v12, v13, v14, v15, v16, v17);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18>
internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18) {
  return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
      v10, v11, v12, v13, v14, v15, v16, v17, v18);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19>
internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
  return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20>
internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
  return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21>
internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
  return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22>
internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22) {
  return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23>
internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23) {
  return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24>
internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24) {
  return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
      v19, v20, v21, v22, v23, v24);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25>
internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
  return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
      v18, v19, v20, v21, v22, v23, v24, v25);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26>
internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
    T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26) {
  return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27>
internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
    T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27) {
  return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28>
internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
    T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28) {
  return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
      v28);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29>
internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29) {
  return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
      v27, v28, v29);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30>
internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
  return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
      v26, v27, v28, v29, v30);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31>
internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
  return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
      v25, v26, v27, v28, v29, v30, v31);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32>
internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32) {
  return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33>
internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33) {
  return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34>
internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
    T31 v31, T32 v32, T33 v33, T34 v34) {
  return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35>
internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
  return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36>
internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
  return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
      v34, v35, v36);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37>
internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
    T37 v37) {
  return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
      v34, v35, v36, v37);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38>
internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
    T37 v37, T38 v38) {
  return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
      v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
      v33, v34, v35, v36, v37, v38);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39>
internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
    T37 v37, T38 v38, T39 v39) {
  return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
      v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
      v32, v33, v34, v35, v36, v37, v38, v39);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40>
internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
    T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
    T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
  return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
      v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41>
internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
    T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
  return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
      v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42>
internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
    T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
    T42 v42) {
  return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
      v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
      v42);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43>
internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
    T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
    T42 v42, T43 v43) {
  return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
      v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
      v41, v42, v43);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44>
internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
    T42 v42, T43 v43, T44 v44) {
  return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
      v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
      v40, v41, v42, v43, v44);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45>
internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
    T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
    T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
  return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
      v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
      v39, v40, v41, v42, v43, v44, v45);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46>
internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
  return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
      v38, v39, v40, v41, v42, v43, v44, v45, v46);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47>
internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
  return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
      v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48>
internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
    T48 v48) {
  return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
      v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49>
internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
    T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
    T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
    T47 v47, T48 v48, T49 v49) {
  return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
      v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
    typename T6, typename T7, typename T8, typename T9, typename T10,
    typename T11, typename T12, typename T13, typename T14, typename T15,
    typename T16, typename T17, typename T18, typename T19, typename T20,
    typename T21, typename T22, typename T23, typename T24, typename T25,
    typename T26, typename T27, typename T28, typename T29, typename T30,
    typename T31, typename T32, typename T33, typename T34, typename T35,
    typename T36, typename T37, typename T38, typename T39, typename T40,
    typename T41, typename T42, typename T43, typename T44, typename T45,
    typename T46, typename T47, typename T48, typename T49, typename T50>
internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
    T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
    T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
    T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
  return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
      v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
      v48, v49, v50);
}

// Bool() allows generating tests with parameters in a set of (false, true).
//
// Synopsis:
// Bool()
//   - returns a generator producing sequences with elements {false, true}.
//
// It is useful when testing code that depends on Boolean flags. Combinations
// of multiple flags can be tested when several Bool()'s are combined using
// Combine() function.
//
// In the following example all tests in the test case FlagDependentTest
// will be instantiated twice with parameters false and true.
//
// class FlagDependentTest : public testing::TestWithParam<bool> {
//   virtual void SetUp() {
//     external_flag = GetParam();
//   }
// }
// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
//
inline internal::ParamGenerator<bool> Bool() {
  return Values(false, true);
}

# if GTEST_HAS_COMBINE
// Combine() allows the user to combine two or more sequences to produce
// values of a Cartesian product of those sequences' elements.
//
// Synopsis:
// Combine(gen1, gen2, ..., genN)
//   - returns a generator producing sequences with elements coming from
//     the Cartesian product of elements from the sequences generated by
//     gen1, gen2, ..., genN. The sequence elements will have a type of
//     tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
//     of elements from sequences produces by gen1, gen2, ..., genN.
//
// Combine can have up to 10 arguments. This number is currently limited
// by the maximum number of elements in the tuple implementation used by Google
// Test.
//
// Example:
//
// This will instantiate tests in test case AnimalTest each one with
// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
// tuple("dog", BLACK), and tuple("dog", WHITE):
//
// enum Color { BLACK, GRAY, WHITE };
// class AnimalTest
//     : public testing::TestWithParam<tuple<const char*, Color> > {...};
//
// TEST_P(AnimalTest, AnimalLooksNice) {...}
//
// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
//                         Combine(Values("cat", "dog"),
//                                 Values(BLACK, WHITE)));
//
// This will instantiate tests in FlagDependentTest with all variations of two
// Boolean flags:
//
// class FlagDependentTest
//     : public testing::TestWithParam<tuple(bool, bool)> > {
//   virtual void SetUp() {
//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
//     tie(external_flag_1, external_flag_2) = GetParam();
//   }
// };
//
// TEST_P(FlagDependentTest, TestFeature1) {
//   // Test your code using external_flag_1 and external_flag_2 here.
// }
// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
//                         Combine(Bool(), Bool()));
//
template <typename Generator1, typename Generator2>
internal::CartesianProductHolder2<Generator1, Generator2> Combine(
    const Generator1& g1, const Generator2& g2) {
  return internal::CartesianProductHolder2<Generator1, Generator2>(
      g1, g2);
}

template <typename Generator1, typename Generator2, typename Generator3>
internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3) {
  return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
      g1, g2, g3);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4>
internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
    Generator4> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4) {
  return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
      Generator4>(
      g1, g2, g3, g4);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5>
internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
    Generator4, Generator5> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5) {
  return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
      Generator4, Generator5>(
      g1, g2, g3, g4, g5);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6>
internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6) {
  return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6>(
      g1, g2, g3, g4, g5, g6);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7>
internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7) {
  return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7>(
      g1, g2, g3, g4, g5, g6, g7);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7, typename Generator8>
internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7, const Generator8& g8) {
  return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7, Generator8>(
      g1, g2, g3, g4, g5, g6, g7, g8);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7, typename Generator8, typename Generator9>
internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7, Generator8,
    Generator9> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7, const Generator8& g8, const Generator9& g9) {
  return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
      g1, g2, g3, g4, g5, g6, g7, g8, g9);
}

template <typename Generator1, typename Generator2, typename Generator3,
    typename Generator4, typename Generator5, typename Generator6,
    typename Generator7, typename Generator8, typename Generator9,
    typename Generator10>
internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
    Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
    Generator10> Combine(
    const Generator1& g1, const Generator2& g2, const Generator3& g3,
        const Generator4& g4, const Generator5& g5, const Generator6& g6,
        const Generator7& g7, const Generator8& g8, const Generator9& g9,
        const Generator10& g10) {
  return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
      Generator10>(
      g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
}
# endif  // GTEST_HAS_COMBINE


# define TEST_P(test_case_name, test_name) \
  class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
      : public test_case_name { \
   public: \
    GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
    virtual void TestBody(); \
   private: \
    static int AddToRegistry() { \
      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
          GetTestCasePatternHolder<test_case_name>(\
              #test_case_name, __FILE__, __LINE__)->AddTestPattern(\
                  #test_case_name, \
                  #test_name, \
                  new ::testing::internal::TestMetaFactory< \
                      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
      return 0; \
    } \
    static int gtest_registering_dummy_; \
    GTEST_DISALLOW_COPY_AND_ASSIGN_(\
        GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
  }; \
  int GTEST_TEST_CLASS_NAME_(test_case_name, \
                             test_name)::gtest_registering_dummy_ = \
      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
  void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()

# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
  ::testing::internal::ParamGenerator<test_case_name::ParamType> \
      gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
  int gtest_##prefix##test_case_name##_dummy_ = \
      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
          GetTestCasePatternHolder<test_case_name>(\
              #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
                  #prefix, \
                  &gtest_##prefix##test_case_name##_EvalGenerator_, \
                  __FILE__, __LINE__)

}  // namespace testing

#endif  // GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Google C++ Testing Framework definitions useful in production code.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_

// When you need to test the private or protected members of a class,
// use the FRIEND_TEST macro to declare your tests as friends of the
// class.  For example:
//
// class MyClass {
//  private:
//   void MyMethod();
//   FRIEND_TEST(MyClassTest, MyMethod);
// };
//
// class MyClassTest : public testing::Test {
//   // ...
// };
//
// TEST_F(MyClassTest, MyMethod) {
//   // Can call MyClass::MyMethod() here.
// }

#define FRIEND_TEST(test_case_name, test_name)\
friend class test_case_name##_##test_name##_Test

#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//

#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_

#include <iosfwd>
#include <vector>

namespace testing {

// A copyable object representing the result of a test part (i.e. an
// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
//
// Don't inherit from TestPartResult as its destructor is not virtual.
class GTEST_API_ TestPartResult {
 public:
  // The possible outcomes of a test part (i.e. an assertion or an
  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
  enum Type {
    kSuccess,          // Succeeded.
    kNonFatalFailure,  // Failed but the test can continue.
    kFatalFailure      // Failed and the test should be terminated.
  };

  // C'tor.  TestPartResult does NOT have a default constructor.
  // Always use this constructor (with parameters) to create a
  // TestPartResult object.
  TestPartResult(Type a_type,
                 const char* a_file_name,
                 int a_line_number,
                 const char* a_message)
      : type_(a_type),
        file_name_(a_file_name),
        line_number_(a_line_number),
        summary_(ExtractSummary(a_message)),
        message_(a_message) {
  }

  // Gets the outcome of the test part.
  Type type() const { return type_; }

  // Gets the name of the source file where the test part took place, or
  // NULL if it's unknown.
  const char* file_name() const { return file_name_.c_str(); }

  // Gets the line in the source file where the test part took place,
  // or -1 if it's unknown.
  int line_number() const { return line_number_; }

  // Gets the summary of the failure message.
  const char* summary() const { return summary_.c_str(); }

  // Gets the message associated with the test part.
  const char* message() const { return message_.c_str(); }

  // Returns true iff the test part passed.
  bool passed() const { return type_ == kSuccess; }

  // Returns true iff the test part failed.
  bool failed() const { return type_ != kSuccess; }

  // Returns true iff the test part non-fatally failed.
  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }

  // Returns true iff the test part fatally failed.
  bool fatally_failed() const { return type_ == kFatalFailure; }
 private:
  Type type_;

  // Gets the summary of the failure message by omitting the stack
  // trace in it.
  static internal::String ExtractSummary(const char* message);

  // The name of the source file where the test part took place, or
  // NULL if the source file is unknown.
  internal::String file_name_;
  // The line in the source file where the test part took place, or -1
  // if the line number is unknown.
  int line_number_;
  internal::String summary_;  // The test failure summary.
  internal::String message_;  // The test failure message.
};

// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result);

// An array of TestPartResult objects.
//
// Don't inherit from TestPartResultArray as its destructor is not
// virtual.
class GTEST_API_ TestPartResultArray {
 public:
  TestPartResultArray() {}

  // Appends the given TestPartResult to the array.
  void Append(const TestPartResult& result);

  // Returns the TestPartResult at the given index (0-based).
  const TestPartResult& GetTestPartResult(int index) const;

  // Returns the number of TestPartResult objects in the array.
  int size() const;

 private:
  std::vector<TestPartResult> array_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
};

// This interface knows how to report a test part result.
class TestPartResultReporterInterface {
 public:
  virtual ~TestPartResultReporterInterface() {}

  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
};

namespace internal {

// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
// statement generates new fatal failures. To do so it registers itself as the
// current test part result reporter. Besides checking if fatal failures were
// reported, it only delegates the reporting to the former result reporter.
// The original result reporter is restored in the destructor.
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
class GTEST_API_ HasNewFatalFailureHelper
    : public TestPartResultReporterInterface {
 public:
  HasNewFatalFailureHelper();
  virtual ~HasNewFatalFailureHelper();
  virtual void ReportTestPartResult(const TestPartResult& result);
  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
 private:
  bool has_new_fatal_failure_;
  TestPartResultReporterInterface* original_reporter_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
};

}  // namespace internal

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_

// This header implements typed tests and type-parameterized tests.

// Typed (aka type-driven) tests repeat the same test for types in a
// list.  You must know which types you want to test with when writing
// typed tests. Here's how you do it:

#if 0

// First, define a fixture class template.  It should be parameterized
// by a type.  Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
 public:
  ...
  typedef std::list<T> List;
  static T shared_;
  T value_;
};

// Next, associate a list of types with the test case, which will be
// repeated for each type in the list.  The typedef is necessary for
// the macro to parse correctly.
typedef testing::Types<char, int, unsigned int> MyTypes;
TYPED_TEST_CASE(FooTest, MyTypes);

// If the type list contains only one type, you can write that type
// directly without Types<...>:
//   TYPED_TEST_CASE(FooTest, int);

// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
// tests for this test case as you want.
TYPED_TEST(FooTest, DoesBlah) {
  // Inside a test, refer to TypeParam to get the type parameter.
  // Since we are inside a derived class template, C++ requires use to
  // visit the members of FooTest via 'this'.
  TypeParam n = this->value_;

  // To visit static members of the fixture, add the TestFixture::
  // prefix.
  n += TestFixture::shared_;

  // To refer to typedefs in the fixture, add the "typename
  // TestFixture::" prefix.
  typename TestFixture::List values;
  values.push_back(n);
  ...
}

TYPED_TEST(FooTest, HasPropertyA) { ... }

#endif  // 0

// Type-parameterized tests are abstract test patterns parameterized
// by a type.  Compared with typed tests, type-parameterized tests
// allow you to define the test pattern without knowing what the type
// parameters are.  The defined pattern can be instantiated with
// different types any number of times, in any number of translation
// units.
//
// If you are designing an interface or concept, you can define a
// suite of type-parameterized tests to verify properties that any
// valid implementation of the interface/concept should have.  Then,
// each implementation can easily instantiate the test suite to verify
// that it conforms to the requirements, without having to write
// similar tests repeatedly.  Here's an example:

#if 0

// First, define a fixture class template.  It should be parameterized
// by a type.  Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
  ...
};

// Next, declare that you will define a type-parameterized test case
// (the _P suffix is for "parameterized" or "pattern", whichever you
// prefer):
TYPED_TEST_CASE_P(FooTest);

// Then, use TYPED_TEST_P() to define as many type-parameterized tests
// for this type-parameterized test case as you want.
TYPED_TEST_P(FooTest, DoesBlah) {
  // Inside a test, refer to TypeParam to get the type parameter.
  TypeParam n = 0;
  ...
}

TYPED_TEST_P(FooTest, HasPropertyA) { ... }

// Now the tricky part: you need to register all test patterns before
// you can instantiate them.  The first argument of the macro is the
// test case name; the rest are the names of the tests in this test
// case.
REGISTER_TYPED_TEST_CASE_P(FooTest,
                           DoesBlah, HasPropertyA);

// Finally, you are free to instantiate the pattern with the types you
// want.  If you put the above code in a header file, you can #include
// it in multiple C++ source files and instantiate it multiple times.
//
// To distinguish different instances of the pattern, the first
// argument to the INSTANTIATE_* macro is a prefix that will be added
// to the actual test case name.  Remember to pick unique prefixes for
// different instances.
typedef testing::Types<char, int, unsigned int> MyTypes;
INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);

// If the type list contains only one type, you can write that type
// directly without Types<...>:
//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);

#endif  // 0


// Implements typed tests.

#if GTEST_HAS_TYPED_TEST

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the typedef for the type parameters of the
// given test case.
# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_

// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define TYPED_TEST_CASE(CaseName, Types) \
  typedef ::testing::internal::TypeList< Types >::type \
      GTEST_TYPE_PARAMS_(CaseName)

# define TYPED_TEST(CaseName, TestName) \
  template <typename gtest_TypeParam_> \
  class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
      : public CaseName<gtest_TypeParam_> { \
   private: \
    typedef CaseName<gtest_TypeParam_> TestFixture; \
    typedef gtest_TypeParam_ TypeParam; \
    virtual void TestBody(); \
  }; \
  bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
      ::testing::internal::TypeParameterizedTest< \
          CaseName, \
          ::testing::internal::TemplateSel< \
              GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
          GTEST_TYPE_PARAMS_(CaseName)>::Register(\
              "", #CaseName, #TestName, 0); \
  template <typename gtest_TypeParam_> \
  void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()

#endif  // GTEST_HAS_TYPED_TEST

// Implements type-parameterized tests.

#if GTEST_HAS_TYPED_TEST_P

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the namespace name that the type-parameterized tests for
// the given type-parameterized test case are defined in.  The exact
// name of the namespace is subject to change without notice.
# define GTEST_CASE_NAMESPACE_(TestCaseName) \
  gtest_case_##TestCaseName##_

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the variable used to remember the names of
// the defined tests in the given test case.
# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
  gtest_typed_test_case_p_state_##TestCaseName##_

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
//
// Expands to the name of the variable used to remember the names of
// the registered tests in the given test case.
# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
  gtest_registered_test_names_##TestCaseName##_

// The variables defined in the type-parameterized test macros are
// static as typically these macros are used in a .h file that can be
// #included in multiple translation units linked together.
# define TYPED_TEST_CASE_P(CaseName) \
  static ::testing::internal::TypedTestCasePState \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)

# define TYPED_TEST_P(CaseName, TestName) \
  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
  template <typename gtest_TypeParam_> \
  class TestName : public CaseName<gtest_TypeParam_> { \
   private: \
    typedef CaseName<gtest_TypeParam_> TestFixture; \
    typedef gtest_TypeParam_ TypeParam; \
    virtual void TestBody(); \
  }; \
  static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
          __FILE__, __LINE__, #CaseName, #TestName); \
  } \
  template <typename gtest_TypeParam_> \
  void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()

# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
  typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
  } \
  static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
          __FILE__, __LINE__, #__VA_ARGS__)

// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
  bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
      ::testing::internal::TypeParameterizedTestCase<CaseName, \
          GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
          ::testing::internal::TypeList< Types >::type>::Register(\
              #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))

#endif  // GTEST_HAS_TYPED_TEST_P

#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_

// Depending on the platform, different string classes are available.
// On Linux, in addition to ::std::string, Google also makes use of
// class ::string, which has the same interface as ::std::string, but
// has a different implementation.
//
// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
// ::string is available AND is a distinct type to ::std::string, or
// define it to 0 to indicate otherwise.
//
// If the user's ::std::string and ::string are the same class due to
// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0.
//
// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined
// heuristically.

namespace testing {

// Declares the flags.

// This flag temporary enables the disabled tests.
GTEST_DECLARE_bool_(also_run_disabled_tests);

// This flag brings the debugger on an assertion failure.
GTEST_DECLARE_bool_(break_on_failure);

// This flag controls whether Google Test catches all test-thrown exceptions
// and logs them as failures.
GTEST_DECLARE_bool_(catch_exceptions);

// This flag enables using colors in terminal output. Available values are
// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
// to let Google Test decide.
GTEST_DECLARE_string_(color);

// This flag sets up the filter to select by name using a glob pattern
// the tests to run. If the filter is not given all tests are executed.
GTEST_DECLARE_string_(filter);

// This flag causes the Google Test to list tests. None of the tests listed
// are actually run if the flag is provided.
GTEST_DECLARE_bool_(list_tests);

// This flag controls whether Google Test emits a detailed XML report to a file
// in addition to its normal textual output.
GTEST_DECLARE_string_(output);

// This flags control whether Google Test prints the elapsed time for each
// test.
GTEST_DECLARE_bool_(print_time);

// This flag specifies the random number seed.
GTEST_DECLARE_int32_(random_seed);

// This flag sets how many times the tests are repeated. The default value
// is 1. If the value is -1 the tests are repeating forever.
GTEST_DECLARE_int32_(repeat);

// This flag controls whether Google Test includes Google Test internal
// stack frames in failure stack traces.
GTEST_DECLARE_bool_(show_internal_stack_frames);

// When this flag is specified, tests' order is randomized on every iteration.
GTEST_DECLARE_bool_(shuffle);

// This flag specifies the maximum number of stack frames to be
// printed in a failure message.
GTEST_DECLARE_int32_(stack_trace_depth);

// When this flag is specified, a failed assertion will throw an
// exception if exceptions are enabled, or exit the program with a
// non-zero code otherwise.
GTEST_DECLARE_bool_(throw_on_failure);

// When this flag is set with a "host:port" string, on supported
// platforms test results are streamed to the specified port on
// the specified host machine.
GTEST_DECLARE_string_(stream_result_to);

// The upper limit for valid stack trace depths.
const int kMaxStackTraceDepth = 100;

namespace internal {

class AssertHelper;
class DefaultGlobalTestPartResultReporter;
class ExecDeathTest;
class NoExecDeathTest;
class FinalSuccessChecker;
class GTestFlagSaver;
class TestResultAccessor;
class TestEventListenersAccessor;
class TestEventRepeater;
class WindowsDeathTest;
class UnitTestImpl* GetUnitTestImpl();
void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                    const String& message);

// Converts a streamable value to a String.  A NULL pointer is
// converted to "(null)".  When the input value is a ::string,
// ::std::string, ::wstring, or ::std::wstring object, each NUL
// character in it is replaced with "\\0".
// Declared in gtest-internal.h but defined here, so that it has access
// to the definition of the Message class, required by the ARM
// compiler.
template <typename T>
String StreamableToString(const T& streamable) {
  return (Message() << streamable).GetString();
}

}  // namespace internal

// The friend relationship of some of these classes is cyclic.
// If we don't forward declare them the compiler might confuse the classes
// in friendship clauses with same named classes on the scope.
class Test;
class TestCase;
class TestInfo;
class UnitTest;

// A class for indicating whether an assertion was successful.  When
// the assertion wasn't successful, the AssertionResult object
// remembers a non-empty message that describes how it failed.
//
// To create an instance of this class, use one of the factory functions
// (AssertionSuccess() and AssertionFailure()).
//
// This class is useful for two purposes:
//   1. Defining predicate functions to be used with Boolean test assertions
//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
//   2. Defining predicate-format functions to be
//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
//
// For example, if you define IsEven predicate:
//
//   testing::AssertionResult IsEven(int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess();
//     else
//       return testing::AssertionFailure() << n << " is odd";
//   }
//
// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
// will print the message
//
//   Value of: IsEven(Fib(5))
//     Actual: false (5 is odd)
//   Expected: true
//
// instead of a more opaque
//
//   Value of: IsEven(Fib(5))
//     Actual: false
//   Expected: true
//
// in case IsEven is a simple Boolean predicate.
//
// If you expect your predicate to be reused and want to support informative
// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
// about half as often as positive ones in our tests), supply messages for
// both success and failure cases:
//
//   testing::AssertionResult IsEven(int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess() << n << " is even";
//     else
//       return testing::AssertionFailure() << n << " is odd";
//   }
//
// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
//
//   Value of: IsEven(Fib(6))
//     Actual: true (8 is even)
//   Expected: false
//
// NB: Predicates that support negative Boolean assertions have reduced
// performance in positive ones so be careful not to use them in tests
// that have lots (tens of thousands) of positive Boolean assertions.
//
// To use this class with EXPECT_PRED_FORMAT assertions such as:
//
//   // Verifies that Foo() returns an even number.
//   EXPECT_PRED_FORMAT1(IsEven, Foo());
//
// you need to define:
//
//   testing::AssertionResult IsEven(const char* expr, int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess();
//     else
//       return testing::AssertionFailure()
//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
//   }
//
// If Foo() returns 5, you will see the following message:
//
//   Expected: Foo() is even
//     Actual: it's 5
//
class GTEST_API_ AssertionResult {
 public:
  // Copy constructor.
  // Used in EXPECT_TRUE/FALSE(assertion_result).
  AssertionResult(const AssertionResult& other);
  // Used in the EXPECT_TRUE/FALSE(bool_expression).
  explicit AssertionResult(bool success) : success_(success) {}

  // Returns true iff the assertion succeeded.
  operator bool() const { return success_; }  // NOLINT

  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
  AssertionResult operator!() const;

  // Returns the text streamed into this AssertionResult. Test assertions
  // use it when they fail (i.e., the predicate's outcome doesn't match the
  // assertion's expectation). When nothing has been streamed into the
  // object, returns an empty string.
  const char* message() const {
    return message_.get() != NULL ?  message_->c_str() : "";
  }
  // TODO(vladl@google.com): Remove this after making sure no clients use it.
  // Deprecated; please use message() instead.
  const char* failure_message() const { return message(); }

  // Streams a custom failure message into this object.
  template <typename T> AssertionResult& operator<<(const T& value) {
    AppendMessage(Message() << value);
    return *this;
  }

  // Allows streaming basic output manipulators such as endl or flush into
  // this object.
  AssertionResult& operator<<(
      ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
    AppendMessage(Message() << basic_manipulator);
    return *this;
  }

 private:
  // Appends the contents of message to message_.
  void AppendMessage(const Message& a_message) {
    if (message_.get() == NULL)
      message_.reset(new ::std::string);
    message_->append(a_message.GetString().c_str());
  }

  // Stores result of the assertion predicate.
  bool success_;
  // Stores the message describing the condition in case the expectation
  // construct is not satisfied with the predicate's outcome.
  // Referenced via a pointer to avoid taking too much stack frame space
  // with test assertions.
  internal::scoped_ptr< ::std::string> message_;

  GTEST_DISALLOW_ASSIGN_(AssertionResult);
};

// Makes a successful assertion result.
GTEST_API_ AssertionResult AssertionSuccess();

// Makes a failed assertion result.
GTEST_API_ AssertionResult AssertionFailure();

// Makes a failed assertion result with the given failure message.
// Deprecated; use AssertionFailure() << msg.
GTEST_API_ AssertionResult AssertionFailure(const Message& msg);

// The abstract class that all tests inherit from.
//
// In Google Test, a unit test program contains one or many TestCases, and
// each TestCase contains one or many Tests.
//
// When you define a test using the TEST macro, you don't need to
// explicitly derive from Test - the TEST macro automatically does
// this for you.
//
// The only time you derive from Test is when defining a test fixture
// to be used a TEST_F.  For example:
//
//   class FooTest : public testing::Test {
//    protected:
//     virtual void SetUp() { ... }
//     virtual void TearDown() { ... }
//     ...
//   };
//
//   TEST_F(FooTest, Bar) { ... }
//   TEST_F(FooTest, Baz) { ... }
//
// Test is not copyable.
class GTEST_API_ Test {
 public:
  friend class TestInfo;

  // Defines types for pointers to functions that set up and tear down
  // a test case.
  typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
  typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;

  // The d'tor is virtual as we intend to inherit from Test.
  virtual ~Test();

  // Sets up the stuff shared by all tests in this test case.
  //
  // Google Test will call Foo::SetUpTestCase() before running the first
  // test in test case Foo.  Hence a sub-class can define its own
  // SetUpTestCase() method to shadow the one defined in the super
  // class.
  static void SetUpTestCase() {}

  // Tears down the stuff shared by all tests in this test case.
  //
  // Google Test will call Foo::TearDownTestCase() after running the last
  // test in test case Foo.  Hence a sub-class can define its own
  // TearDownTestCase() method to shadow the one defined in the super
  // class.
  static void TearDownTestCase() {}

  // Returns true iff the current test has a fatal failure.
  static bool HasFatalFailure();

  // Returns true iff the current test has a non-fatal failure.
  static bool HasNonfatalFailure();

  // Returns true iff the current test has a (either fatal or
  // non-fatal) failure.
  static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }

  // Logs a property for the current test.  Only the last value for a given
  // key is remembered.
  // These are public static so they can be called from utility functions
  // that are not members of the test fixture.
  // The arguments are const char* instead strings, as Google Test is used
  // on platforms where string doesn't compile.
  //
  // Note that a driving consideration for these RecordProperty methods
  // was to produce xml output suited to the Greenspan charting utility,
  // which at present will only chart values that fit in a 32-bit int. It
  // is the user's responsibility to restrict their values to 32-bit ints
  // if they intend them to be used with Greenspan.
  static void RecordProperty(const char* key, const char* value);
  static void RecordProperty(const char* key, int value);

 protected:
  // Creates a Test object.
  Test();

  // Sets up the test fixture.
  virtual void SetUp();

  // Tears down the test fixture.
  virtual void TearDown();

 private:
  // Returns true iff the current test has the same fixture class as
  // the first test in the current test case.
  static bool HasSameFixtureClass();

  // Runs the test after the test fixture has been set up.
  //
  // A sub-class must implement this to define the test logic.
  //
  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
  // Instead, use the TEST or TEST_F macro.
  virtual void TestBody() = 0;

  // Sets up, executes, and tears down the test.
  void Run();

  // Deletes self.  We deliberately pick an unusual name for this
  // internal method to avoid clashing with names used in user TESTs.
  void DeleteSelf_() { delete this; }

  // Uses a GTestFlagSaver to save and restore all Google Test flags.
  const internal::GTestFlagSaver* const gtest_flag_saver_;

  // Often a user mis-spells SetUp() as Setup() and spends a long time
  // wondering why it is never called by Google Test.  The declaration of
  // the following method is solely for catching such an error at
  // compile time:
  //
  //   - The return type is deliberately chosen to be not void, so it
  //   will be a conflict if a user declares void Setup() in his test
  //   fixture.
  //
  //   - This method is private, so it will be another compiler error
  //   if a user calls it from his test fixture.
  //
  // DO NOT OVERRIDE THIS FUNCTION.
  //
  // If you see an error about overriding the following function or
  // about it being private, you have mis-spelled SetUp() as Setup().
  struct Setup_should_be_spelled_SetUp {};
  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }

  // We disallow copying Tests.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
};

typedef internal::TimeInMillis TimeInMillis;

// A copyable object representing a user specified test property which can be
// output as a key/value string pair.
//
// Don't inherit from TestProperty as its destructor is not virtual.
class TestProperty {
 public:
  // C'tor.  TestProperty does NOT have a default constructor.
  // Always use this constructor (with parameters) to create a
  // TestProperty object.
  TestProperty(const char* a_key, const char* a_value) :
    key_(a_key), value_(a_value) {
  }

  // Gets the user supplied key.
  const char* key() const {
    return key_.c_str();
  }

  // Gets the user supplied value.
  const char* value() const {
    return value_.c_str();
  }

  // Sets a new value, overriding the one supplied in the constructor.
  void SetValue(const char* new_value) {
    value_ = new_value;
  }

 private:
  // The key supplied by the user.
  internal::String key_;
  // The value supplied by the user.
  internal::String value_;
};

// The result of a single Test.  This includes a list of
// TestPartResults, a list of TestProperties, a count of how many
// death tests there are in the Test, and how much time it took to run
// the Test.
//
// TestResult is not copyable.
class GTEST_API_ TestResult {
 public:
  // Creates an empty TestResult.
  TestResult();

  // D'tor.  Do not inherit from TestResult.
  ~TestResult();

  // Gets the number of all test parts.  This is the sum of the number
  // of successful test parts and the number of failed test parts.
  int total_part_count() const;

  // Returns the number of the test properties.
  int test_property_count() const;

  // Returns true iff the test passed (i.e. no test part failed).
  bool Passed() const { return !Failed(); }

  // Returns true iff the test failed.
  bool Failed() const;

  // Returns true iff the test fatally failed.
  bool HasFatalFailure() const;

  // Returns true iff the test has a non-fatal failure.
  bool HasNonfatalFailure() const;

  // Returns the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const { return elapsed_time_; }

  // Returns the i-th test part result among all the results. i can range
  // from 0 to test_property_count() - 1. If i is not in that range, aborts
  // the program.
  const TestPartResult& GetTestPartResult(int i) const;

  // Returns the i-th test property. i can range from 0 to
  // test_property_count() - 1. If i is not in that range, aborts the
  // program.
  const TestProperty& GetTestProperty(int i) const;

 private:
  friend class TestInfo;
  friend class UnitTest;
  friend class internal::DefaultGlobalTestPartResultReporter;
  friend class internal::ExecDeathTest;
  friend class internal::TestResultAccessor;
  friend class internal::UnitTestImpl;
  friend class internal::WindowsDeathTest;

  // Gets the vector of TestPartResults.
  const std::vector<TestPartResult>& test_part_results() const {
    return test_part_results_;
  }

  // Gets the vector of TestProperties.
  const std::vector<TestProperty>& test_properties() const {
    return test_properties_;
  }

  // Sets the elapsed time.
  void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }

  // Adds a test property to the list. The property is validated and may add
  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
  // key names). If a property is already recorded for the same key, the
  // value will be updated, rather than storing multiple values for the same
  // key.
  void RecordProperty(const TestProperty& test_property);

  // Adds a failure if the key is a reserved attribute of Google Test
  // testcase tags.  Returns true if the property is valid.
  // TODO(russr): Validate attribute names are legal and human readable.
  static bool ValidateTestProperty(const TestProperty& test_property);

  // Adds a test part result to the list.
  void AddTestPartResult(const TestPartResult& test_part_result);

  // Returns the death test count.
  int death_test_count() const { return death_test_count_; }

  // Increments the death test count, returning the new count.
  int increment_death_test_count() { return ++death_test_count_; }

  // Clears the test part results.
  void ClearTestPartResults();

  // Clears the object.
  void Clear();

  // Protects mutable state of the property vector and of owned
  // properties, whose values may be updated.
  internal::Mutex test_properites_mutex_;

  // The vector of TestPartResults
  std::vector<TestPartResult> test_part_results_;
  // The vector of TestProperties
  std::vector<TestProperty> test_properties_;
  // Running count of death tests.
  int death_test_count_;
  // The elapsed time, in milliseconds.
  TimeInMillis elapsed_time_;

  // We disallow copying TestResult.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
};  // class TestResult

// A TestInfo object stores the following information about a test:
//
//   Test case name
//   Test name
//   Whether the test should be run
//   A function pointer that creates the test object when invoked
//   Test result
//
// The constructor of TestInfo registers itself with the UnitTest
// singleton such that the RUN_ALL_TESTS() macro knows which tests to
// run.
class GTEST_API_ TestInfo {
 public:
  // Destructs a TestInfo object.  This function is not virtual, so
  // don't inherit from TestInfo.
  ~TestInfo();

  // Returns the test case name.
  const char* test_case_name() const { return test_case_name_.c_str(); }

  // Returns the test name.
  const char* name() const { return name_.c_str(); }

  // Returns the name of the parameter type, or NULL if this is not a typed
  // or a type-parameterized test.
  const char* type_param() const {
    if (type_param_.get() != NULL)
      return type_param_->c_str();
    return NULL;
  }

  // Returns the text representation of the value parameter, or NULL if this
  // is not a value-parameterized test.
  const char* value_param() const {
    if (value_param_.get() != NULL)
      return value_param_->c_str();
    return NULL;
  }

  // Returns true if this test should run, that is if the test is not disabled
  // (or it is disabled but the also_run_disabled_tests flag has been specified)
  // and its full name matches the user-specified filter.
  //
  // Google Test allows the user to filter the tests by their full names.
  // The full name of a test Bar in test case Foo is defined as
  // "Foo.Bar".  Only the tests that match the filter will run.
  //
  // A filter is a colon-separated list of glob (not regex) patterns,
  // optionally followed by a '-' and a colon-separated list of
  // negative patterns (tests to exclude).  A test is run if it
  // matches one of the positive patterns and does not match any of
  // the negative patterns.
  //
  // For example, *A*:Foo.* is a filter that matches any string that
  // contains the character 'A' or starts with "Foo.".
  bool should_run() const { return should_run_; }

  // Returns the result of the test.
  const TestResult* result() const { return &result_; }

 private:

#if GTEST_HAS_DEATH_TEST
  friend class internal::DefaultDeathTestFactory;
#endif  // GTEST_HAS_DEATH_TEST
  friend class Test;
  friend class TestCase;
  friend class internal::UnitTestImpl;
  friend TestInfo* internal::MakeAndRegisterTestInfo(
      const char* test_case_name, const char* name,
      const char* type_param,
      const char* value_param,
      internal::TypeId fixture_class_id,
      Test::SetUpTestCaseFunc set_up_tc,
      Test::TearDownTestCaseFunc tear_down_tc,
      internal::TestFactoryBase* factory);

  // Constructs a TestInfo object. The newly constructed instance assumes
  // ownership of the factory object.
  TestInfo(const char* test_case_name, const char* name,
           const char* a_type_param,
           const char* a_value_param,
           internal::TypeId fixture_class_id,
           internal::TestFactoryBase* factory);

  // Increments the number of death tests encountered in this test so
  // far.
  int increment_death_test_count() {
    return result_.increment_death_test_count();
  }

  // Creates the test object, runs it, records its result, and then
  // deletes it.
  void Run();

  static void ClearTestResult(TestInfo* test_info) {
    test_info->result_.Clear();
  }

  // These fields are immutable properties of the test.
  const std::string test_case_name_;     // Test case name
  const std::string name_;               // Test name
  // Name of the parameter type, or NULL if this is not a typed or a
  // type-parameterized test.
  const internal::scoped_ptr<const ::std::string> type_param_;
  // Text representation of the value parameter, or NULL if this is not a
  // value-parameterized test.
  const internal::scoped_ptr<const ::std::string> value_param_;
  const internal::TypeId fixture_class_id_;   // ID of the test fixture class
  bool should_run_;                 // True iff this test should run
  bool is_disabled_;                // True iff this test is disabled
  bool matches_filter_;             // True if this test matches the
                                    // user-specified filter.
  internal::TestFactoryBase* const factory_;  // The factory that creates
                                              // the test object

  // This field is mutable and needs to be reset before running the
  // test for the second time.
  TestResult result_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
};

// A test case, which consists of a vector of TestInfos.
//
// TestCase is not copyable.
class GTEST_API_ TestCase {
 public:
  // Creates a TestCase with the given name.
  //
  // TestCase does NOT have a default constructor.  Always use this
  // constructor to create a TestCase object.
  //
  // Arguments:
  //
  //   name:         name of the test case
  //   a_type_param: the name of the test's type parameter, or NULL if
  //                 this is not a type-parameterized test.
  //   set_up_tc:    pointer to the function that sets up the test case
  //   tear_down_tc: pointer to the function that tears down the test case
  TestCase(const char* name, const char* a_type_param,
           Test::SetUpTestCaseFunc set_up_tc,
           Test::TearDownTestCaseFunc tear_down_tc);

  // Destructor of TestCase.
  virtual ~TestCase();

  // Gets the name of the TestCase.
  const char* name() const { return name_.c_str(); }

  // Returns the name of the parameter type, or NULL if this is not a
  // type-parameterized test case.
  const char* type_param() const {
    if (type_param_.get() != NULL)
      return type_param_->c_str();
    return NULL;
  }

  // Returns true if any test in this test case should run.
  bool should_run() const { return should_run_; }

  // Gets the number of successful tests in this test case.
  int successful_test_count() const;

  // Gets the number of failed tests in this test case.
  int failed_test_count() const;

  // Gets the number of disabled tests in this test case.
  int disabled_test_count() const;

  // Get the number of tests in this test case that should run.
  int test_to_run_count() const;

  // Gets the number of all tests in this test case.
  int total_test_count() const;

  // Returns true iff the test case passed.
  bool Passed() const { return !Failed(); }

  // Returns true iff the test case failed.
  bool Failed() const { return failed_test_count() > 0; }

  // Returns the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const { return elapsed_time_; }

  // Returns the i-th test among all the tests. i can range from 0 to
  // total_test_count() - 1. If i is not in that range, returns NULL.
  const TestInfo* GetTestInfo(int i) const;

 private:
  friend class Test;
  friend class internal::UnitTestImpl;

  // Gets the (mutable) vector of TestInfos in this TestCase.
  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }

  // Gets the (immutable) vector of TestInfos in this TestCase.
  const std::vector<TestInfo*>& test_info_list() const {
    return test_info_list_;
  }

  // Returns the i-th test among all the tests. i can range from 0 to
  // total_test_count() - 1. If i is not in that range, returns NULL.
  TestInfo* GetMutableTestInfo(int i);

  // Sets the should_run member.
  void set_should_run(bool should) { should_run_ = should; }

  // Adds a TestInfo to this test case.  Will delete the TestInfo upon
  // destruction of the TestCase object.
  void AddTestInfo(TestInfo * test_info);

  // Clears the results of all tests in this test case.
  void ClearResult();

  // Clears the results of all tests in the given test case.
  static void ClearTestCaseResult(TestCase* test_case) {
    test_case->ClearResult();
  }

  // Runs every test in this TestCase.
  void Run();

  // Runs SetUpTestCase() for this TestCase.  This wrapper is needed
  // for catching exceptions thrown from SetUpTestCase().
  void RunSetUpTestCase() { (*set_up_tc_)(); }

  // Runs TearDownTestCase() for this TestCase.  This wrapper is
  // needed for catching exceptions thrown from TearDownTestCase().
  void RunTearDownTestCase() { (*tear_down_tc_)(); }

  // Returns true iff test passed.
  static bool TestPassed(const TestInfo* test_info) {
    return test_info->should_run() && test_info->result()->Passed();
  }

  // Returns true iff test failed.
  static bool TestFailed(const TestInfo* test_info) {
    return test_info->should_run() && test_info->result()->Failed();
  }

  // Returns true iff test is disabled.
  static bool TestDisabled(const TestInfo* test_info) {
    return test_info->is_disabled_;
  }

  // Returns true if the given test should run.
  static bool ShouldRunTest(const TestInfo* test_info) {
    return test_info->should_run();
  }

  // Shuffles the tests in this test case.
  void ShuffleTests(internal::Random* random);

  // Restores the test order to before the first shuffle.
  void UnshuffleTests();

  // Name of the test case.
  internal::String name_;
  // Name of the parameter type, or NULL if this is not a typed or a
  // type-parameterized test.
  const internal::scoped_ptr<const ::std::string> type_param_;
  // The vector of TestInfos in their original order.  It owns the
  // elements in the vector.
  std::vector<TestInfo*> test_info_list_;
  // Provides a level of indirection for the test list to allow easy
  // shuffling and restoring the test order.  The i-th element in this
  // vector is the index of the i-th test in the shuffled test list.
  std::vector<int> test_indices_;
  // Pointer to the function that sets up the test case.
  Test::SetUpTestCaseFunc set_up_tc_;
  // Pointer to the function that tears down the test case.
  Test::TearDownTestCaseFunc tear_down_tc_;
  // True iff any test in this test case should run.
  bool should_run_;
  // Elapsed time, in milliseconds.
  TimeInMillis elapsed_time_;

  // We disallow copying TestCases.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
};

// An Environment object is capable of setting up and tearing down an
// environment.  The user should subclass this to define his own
// environment(s).
//
// An Environment object does the set-up and tear-down in virtual
// methods SetUp() and TearDown() instead of the constructor and the
// destructor, as:
//
//   1. You cannot safely throw from a destructor.  This is a problem
//      as in some cases Google Test is used where exceptions are enabled, and
//      we may want to implement ASSERT_* using exceptions where they are
//      available.
//   2. You cannot use ASSERT_* directly in a constructor or
//      destructor.
class Environment {
 public:
  // The d'tor is virtual as we need to subclass Environment.
  virtual ~Environment() {}

  // Override this to define how to set up the environment.
  virtual void SetUp() {}

  // Override this to define how to tear down the environment.
  virtual void TearDown() {}
 private:
  // If you see an error about overriding the following function or
  // about it being private, you have mis-spelled SetUp() as Setup().
  struct Setup_should_be_spelled_SetUp {};
  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
};

// The interface for tracing execution of tests. The methods are organized in
// the order the corresponding events are fired.
class TestEventListener {
 public:
  virtual ~TestEventListener() {}

  // Fired before any test activity starts.
  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;

  // Fired before each iteration of tests starts.  There may be more than
  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
  // index, starting from 0.
  virtual void OnTestIterationStart(const UnitTest& unit_test,
                                    int iteration) = 0;

  // Fired before environment set-up for each iteration of tests starts.
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;

  // Fired after environment set-up for each iteration of tests ends.
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;

  // Fired before the test case starts.
  virtual void OnTestCaseStart(const TestCase& test_case) = 0;

  // Fired before the test starts.
  virtual void OnTestStart(const TestInfo& test_info) = 0;

  // Fired after a failed assertion or a SUCCEED() invocation.
  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;

  // Fired after the test ends.
  virtual void OnTestEnd(const TestInfo& test_info) = 0;

  // Fired after the test case ends.
  virtual void OnTestCaseEnd(const TestCase& test_case) = 0;

  // Fired before environment tear-down for each iteration of tests starts.
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;

  // Fired after environment tear-down for each iteration of tests ends.
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;

  // Fired after each iteration of tests finishes.
  virtual void OnTestIterationEnd(const UnitTest& unit_test,
                                  int iteration) = 0;

  // Fired after all test activities have ended.
  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
};

// The convenience class for users who need to override just one or two
// methods and are not concerned that a possible change to a signature of
// the methods they override will not be caught during the build.  For
// comments about each method please see the definition of TestEventListener
// above.
class EmptyTestEventListener : public TestEventListener {
 public:
  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
                                    int /*iteration*/) {}
  virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
  virtual void OnTestStart(const TestInfo& /*test_info*/) {}
  virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
  virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
  virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
                                  int /*iteration*/) {}
  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
};

// TestEventListeners lets users add listeners to track events in Google Test.
class GTEST_API_ TestEventListeners {
 public:
  TestEventListeners();
  ~TestEventListeners();

  // Appends an event listener to the end of the list. Google Test assumes
  // the ownership of the listener (i.e. it will delete the listener when
  // the test program finishes).
  void Append(TestEventListener* listener);

  // Removes the given event listener from the list and returns it.  It then
  // becomes the caller's responsibility to delete the listener. Returns
  // NULL if the listener is not found in the list.
  TestEventListener* Release(TestEventListener* listener);

  // Returns the standard listener responsible for the default console
  // output.  Can be removed from the listeners list to shut down default
  // console output.  Note that removing this object from the listener list
  // with Release transfers its ownership to the caller and makes this
  // function return NULL the next time.
  TestEventListener* default_result_printer() const {
    return default_result_printer_;
  }

  // Returns the standard listener responsible for the default XML output
  // controlled by the --gtest_output=xml flag.  Can be removed from the
  // listeners list by users who want to shut down the default XML output
  // controlled by this flag and substitute it with custom one.  Note that
  // removing this object from the listener list with Release transfers its
  // ownership to the caller and makes this function return NULL the next
  // time.
  TestEventListener* default_xml_generator() const {
    return default_xml_generator_;
  }

 private:
  friend class TestCase;
  friend class TestInfo;
  friend class internal::DefaultGlobalTestPartResultReporter;
  friend class internal::NoExecDeathTest;
  friend class internal::TestEventListenersAccessor;
  friend class internal::UnitTestImpl;

  // Returns repeater that broadcasts the TestEventListener events to all
  // subscribers.
  TestEventListener* repeater();

  // Sets the default_result_printer attribute to the provided listener.
  // The listener is also added to the listener list and previous
  // default_result_printer is removed from it and deleted. The listener can
  // also be NULL in which case it will not be added to the list. Does
  // nothing if the previous and the current listener objects are the same.
  void SetDefaultResultPrinter(TestEventListener* listener);

  // Sets the default_xml_generator attribute to the provided listener.  The
  // listener is also added to the listener list and previous
  // default_xml_generator is removed from it and deleted. The listener can
  // also be NULL in which case it will not be added to the list. Does
  // nothing if the previous and the current listener objects are the same.
  void SetDefaultXmlGenerator(TestEventListener* listener);

  // Controls whether events will be forwarded by the repeater to the
  // listeners in the list.
  bool EventForwardingEnabled() const;
  void SuppressEventForwarding();

  // The actual list of listeners.
  internal::TestEventRepeater* repeater_;
  // Listener responsible for the standard result output.
  TestEventListener* default_result_printer_;
  // Listener responsible for the creation of the XML output file.
  TestEventListener* default_xml_generator_;

  // We disallow copying TestEventListeners.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
};

// A UnitTest consists of a vector of TestCases.
//
// This is a singleton class.  The only instance of UnitTest is
// created when UnitTest::GetInstance() is first called.  This
// instance is never deleted.
//
// UnitTest is not copyable.
//
// This class is thread-safe as long as the methods are called
// according to their specification.
class GTEST_API_ UnitTest {
 public:
  // Gets the singleton UnitTest object.  The first time this method
  // is called, a UnitTest object is constructed and returned.
  // Consecutive calls will return the same object.
  static UnitTest* GetInstance();

  // Runs all tests in this UnitTest object and prints the result.
  // Returns 0 if successful, or 1 otherwise.
  //
  // This method can only be called from the main thread.
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  int Run() GTEST_MUST_USE_RESULT_;

  // Returns the working directory when the first TEST() or TEST_F()
  // was executed.  The UnitTest object owns the string.
  const char* original_working_dir() const;

  // Returns the TestCase object for the test that's currently running,
  // or NULL if no test is running.
  const TestCase* current_test_case() const;

  // Returns the TestInfo object for the test that's currently running,
  // or NULL if no test is running.
  const TestInfo* current_test_info() const;

  // Returns the random seed used at the start of the current test run.
  int random_seed() const;

#if GTEST_HAS_PARAM_TEST
  // Returns the ParameterizedTestCaseRegistry object used to keep track of
  // value-parameterized tests and instantiate and register them.
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  internal::ParameterizedTestCaseRegistry& parameterized_test_registry();
#endif  // GTEST_HAS_PARAM_TEST

  // Gets the number of successful test cases.
  int successful_test_case_count() const;

  // Gets the number of failed test cases.
  int failed_test_case_count() const;

  // Gets the number of all test cases.
  int total_test_case_count() const;

  // Gets the number of all test cases that contain at least one test
  // that should run.
  int test_case_to_run_count() const;

  // Gets the number of successful tests.
  int successful_test_count() const;

  // Gets the number of failed tests.
  int failed_test_count() const;

  // Gets the number of disabled tests.
  int disabled_test_count() const;

  // Gets the number of all tests.
  int total_test_count() const;

  // Gets the number of tests that should run.
  int test_to_run_count() const;

  // Gets the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const;

  // Returns true iff the unit test passed (i.e. all test cases passed).
  bool Passed() const;

  // Returns true iff the unit test failed (i.e. some test case failed
  // or something outside of all tests failed).
  bool Failed() const;

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  const TestCase* GetTestCase(int i) const;

  // Returns the list of event listeners that can be used to track events
  // inside Google Test.
  TestEventListeners& listeners();

 private:
  // Registers and returns a global test environment.  When a test
  // program is run, all global test environments will be set-up in
  // the order they were registered.  After all tests in the program
  // have finished, all global test environments will be torn-down in
  // the *reverse* order they were registered.
  //
  // The UnitTest object takes ownership of the given environment.
  //
  // This method can only be called from the main thread.
  Environment* AddEnvironment(Environment* env);

  // Adds a TestPartResult to the current TestResult object.  All
  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
  // eventually call this to report their results.  The user code
  // should use the assertion macros instead of calling this directly.
  void AddTestPartResult(TestPartResult::Type result_type,
                         const char* file_name,
                         int line_number,
                         const internal::String& message,
                         const internal::String& os_stack_trace);

  // Adds a TestProperty to the current TestResult object. If the result already
  // contains a property with the same key, the value will be updated.
  void RecordPropertyForCurrentTest(const char* key, const char* value);

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  TestCase* GetMutableTestCase(int i);

  // Accessors for the implementation object.
  internal::UnitTestImpl* impl() { return impl_; }
  const internal::UnitTestImpl* impl() const { return impl_; }

  // These classes and funcions are friends as they need to access private
  // members of UnitTest.
  friend class Test;
  friend class internal::AssertHelper;
  friend class internal::ScopedTrace;
  friend Environment* AddGlobalTestEnvironment(Environment* env);
  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
  friend void internal::ReportFailureInUnknownLocation(
      TestPartResult::Type result_type,
      const internal::String& message);

  // Creates an empty UnitTest.
  UnitTest();

  // D'tor
  virtual ~UnitTest();

  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
  // Google Test trace stack.
  void PushGTestTrace(const internal::TraceInfo& trace);

  // Pops a trace from the per-thread Google Test trace stack.
  void PopGTestTrace();

  // Protects mutable state in *impl_.  This is mutable as some const
  // methods need to lock it too.
  mutable internal::Mutex mutex_;

  // Opaque implementation object.  This field is never changed once
  // the object is constructed.  We don't mark it as const here, as
  // doing so will cause a warning in the constructor of UnitTest.
  // Mutable state in *impl_ is protected by mutex_.
  internal::UnitTestImpl* impl_;

  // We disallow copying UnitTest.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
};

// A convenient wrapper for adding an environment for the test
// program.
//
// You should call this before RUN_ALL_TESTS() is called, probably in
// main().  If you use gtest_main, you need to call this before main()
// starts for it to take effect.  For example, you can define a global
// variable like this:
//
//   testing::Environment* const foo_env =
//       testing::AddGlobalTestEnvironment(new FooEnvironment);
//
// However, we strongly recommend you to write your own main() and
// call AddGlobalTestEnvironment() there, as relying on initialization
// of global variables makes the code harder to read and may cause
// problems when you register multiple environments from different
// translation units and the environments have dependencies among them
// (remember that the compiler doesn't guarantee the order in which
// global variables from different translation units are initialized).
inline Environment* AddGlobalTestEnvironment(Environment* env) {
  return UnitTest::GetInstance()->AddEnvironment(env);
}

// Initializes Google Test.  This must be called before calling
// RUN_ALL_TESTS().  In particular, it parses a command line for the
// flags that Google Test recognizes.  Whenever a Google Test flag is
// seen, it is removed from argv, and *argc is decremented.
//
// No value is returned.  Instead, the Google Test flag variables are
// updated.
//
// Calling the function for the second time has no user-visible effect.
GTEST_API_ void InitGoogleTest(int* argc, char** argv);

// This overloaded version can be used in Windows programs compiled in
// UNICODE mode.
GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);

namespace internal {

// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
// operand to be used in a failure message.  The type (but not value)
// of the other operand may affect the format.  This allows us to
// print a char* as a raw pointer when it is compared against another
// char*, and print it as a C string when it is compared against an
// std::string object, for example.
//
// The default implementation ignores the type of the other operand.
// Some specialized versions are used to handle formatting wide or
// narrow C strings.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
template <typename T1, typename T2>
String FormatForComparisonFailureMessage(const T1& value,
                                         const T2& /* other_operand */) {
  // C++Builder compiles this incorrectly if the namespace isn't explicitly
  // given.
  return ::testing::PrintToString(value);
}

// The helper function for {ASSERT|EXPECT}_EQ.
template <typename T1, typename T2>
AssertionResult CmpHelperEQ(const char* expected_expression,
                            const char* actual_expression,
                            const T1& expected,
                            const T2& actual) {
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4389)  // Temporarily disables warning on
                               // signed/unsigned mismatch.
#endif

  if (expected == actual) {
    return AssertionSuccess();
  }

#ifdef _MSC_VER
# pragma warning(pop)          // Restores the warning state.
#endif

  return EqFailure(expected_expression,
                   actual_expression,
                   FormatForComparisonFailureMessage(expected, actual),
                   FormatForComparisonFailureMessage(actual, expected),
                   false);
}

// With this overloaded version, we allow anonymous enums to be used
// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
// can be implicitly cast to BiggestInt.
GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression,
                                       const char* actual_expression,
                                       BiggestInt expected,
                                       BiggestInt actual);

// The helper class for {ASSERT|EXPECT}_EQ.  The template argument
// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
// is a null pointer literal.  The following default implementation is
// for lhs_is_null_literal being false.
template <bool lhs_is_null_literal>
class EqHelper {
 public:
  // This templatized version is for the general case.
  template <typename T1, typename T2>
  static AssertionResult Compare(const char* expected_expression,
                                 const char* actual_expression,
                                 const T1& expected,
                                 const T2& actual) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }

  // With this overloaded version, we allow anonymous enums to be used
  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
  // enums can be implicitly cast to BiggestInt.
  //
  // Even though its body looks the same as the above version, we
  // cannot merge the two, as it will make anonymous enums unhappy.
  static AssertionResult Compare(const char* expected_expression,
                                 const char* actual_expression,
                                 BiggestInt expected,
                                 BiggestInt actual) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }
};

// This specialization is used when the first argument to ASSERT_EQ()
// is a null pointer literal, like NULL, false, or 0.
template <>
class EqHelper<true> {
 public:
  // We define two overloaded versions of Compare().  The first
  // version will be picked when the second argument to ASSERT_EQ() is
  // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
  // EXPECT_EQ(false, a_bool).
  template <typename T1, typename T2>
  static AssertionResult Compare(
      const char* expected_expression,
      const char* actual_expression,
      const T1& expected,
      const T2& actual,
      // The following line prevents this overload from being considered if T2
      // is not a pointer type.  We need this because ASSERT_EQ(NULL, my_ptr)
      // expands to Compare("", "", NULL, my_ptr), which requires a conversion
      // to match the Secret* in the other overload, which would otherwise make
      // this template match better.
      typename EnableIf<!is_pointer<T2>::value>::type* = 0) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }

  // This version will be picked when the second argument to ASSERT_EQ() is a
  // pointer, e.g. ASSERT_EQ(NULL, a_pointer).
  template <typename T>
  static AssertionResult Compare(
      const char* expected_expression,
      const char* actual_expression,
      // We used to have a second template parameter instead of Secret*.  That
      // template parameter would deduce to 'long', making this a better match
      // than the first overload even without the first overload's EnableIf.
      // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to
      // non-pointer argument" (even a deduced integral argument), so the old
      // implementation caused warnings in user code.
      Secret* /* expected (NULL) */,
      T* actual) {
    // We already know that 'expected' is a null pointer.
    return CmpHelperEQ(expected_expression, actual_expression,
                       static_cast<T*>(NULL), actual);
  }
};

// A macro for implementing the helper functions needed to implement
// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
// of similar code.
//
// For each templatized helper function, we also define an overloaded
// version for BiggestInt in order to reduce code bloat and allow
// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
// with gcc 4.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
template <typename T1, typename T2>\
AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
                                   const T1& val1, const T2& val2) {\
  if (val1 op val2) {\
    return AssertionSuccess();\
  } else {\
    return AssertionFailure() \
        << "Expected: (" << expr1 << ") " #op " (" << expr2\
        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
  }\
}\
GTEST_API_ AssertionResult CmpHelper##op_name(\
    const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)

// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.

// Implements the helper function for {ASSERT|EXPECT}_NE
GTEST_IMPL_CMP_HELPER_(NE, !=);
// Implements the helper function for {ASSERT|EXPECT}_LE
GTEST_IMPL_CMP_HELPER_(LE, <=);
// Implements the helper function for {ASSERT|EXPECT}_LT
GTEST_IMPL_CMP_HELPER_(LT, < );
// Implements the helper function for {ASSERT|EXPECT}_GE
GTEST_IMPL_CMP_HELPER_(GE, >=);
// Implements the helper function for {ASSERT|EXPECT}_GT
GTEST_IMPL_CMP_HELPER_(GT, > );

#undef GTEST_IMPL_CMP_HELPER_

// The helper function for {ASSERT|EXPECT}_STREQ.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
                                          const char* actual_expression,
                                          const char* expected,
                                          const char* actual);

// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
                                              const char* actual_expression,
                                              const char* expected,
                                              const char* actual);

// The helper function for {ASSERT|EXPECT}_STRNE.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
                                          const char* s2_expression,
                                          const char* s1,
                                          const char* s2);

// The helper function for {ASSERT|EXPECT}_STRCASENE.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
                                              const char* s2_expression,
                                              const char* s1,
                                              const char* s2);


// Helper function for *_STREQ on wide strings.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
                                          const char* actual_expression,
                                          const wchar_t* expected,
                                          const wchar_t* actual);

// Helper function for *_STRNE on wide strings.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
                                          const char* s2_expression,
                                          const wchar_t* s1,
                                          const wchar_t* s2);

}  // namespace internal

// IsSubstring() and IsNotSubstring() are intended to be used as the
// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
// themselves.  They check whether needle is a substring of haystack
// (NULL is considered a substring of itself only), and return an
// appropriate error message when they fail.
//
// The {needle,haystack}_expr arguments are the stringified
// expressions that generated the two real arguments.
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack);
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack);
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack);

#if GTEST_HAS_STD_WSTRING
GTEST_API_ AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack);
GTEST_API_ AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack);
#endif  // GTEST_HAS_STD_WSTRING

namespace internal {

// Helper template function for comparing floating-points.
//
// Template parameter:
//
//   RawType: the raw floating-point type (either float or double)
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
template <typename RawType>
AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression,
                                         const char* actual_expression,
                                         RawType expected,
                                         RawType actual) {
  const FloatingPoint<RawType> lhs(expected), rhs(actual);

  if (lhs.AlmostEquals(rhs)) {
    return AssertionSuccess();
  }

  ::std::stringstream expected_ss;
  expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
              << expected;

  ::std::stringstream actual_ss;
  actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
            << actual;

  return EqFailure(expected_expression,
                   actual_expression,
                   StringStreamToString(&expected_ss),
                   StringStreamToString(&actual_ss),
                   false);
}

// Helper function for implementing ASSERT_NEAR.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
                                                const char* expr2,
                                                const char* abs_error_expr,
                                                double val1,
                                                double val2,
                                                double abs_error);

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
// A class that enables one to stream messages to assertion macros
class GTEST_API_ AssertHelper {
 public:
  // Constructor.
  AssertHelper(TestPartResult::Type type,
               const char* file,
               int line,
               const char* message);
  ~AssertHelper();

  // Message assignment is a semantic trick to enable assertion
  // streaming; see the GTEST_MESSAGE_ macro below.
  void operator=(const Message& message) const;

 private:
  // We put our data in a struct so that the size of the AssertHelper class can
  // be as small as possible.  This is important because gcc is incapable of
  // re-using stack space even for temporary variables, so every EXPECT_EQ
  // reserves stack space for another AssertHelper.
  struct AssertHelperData {
    AssertHelperData(TestPartResult::Type t,
                     const char* srcfile,
                     int line_num,
                     const char* msg)
        : type(t), file(srcfile), line(line_num), message(msg) { }

    TestPartResult::Type const type;
    const char*        const file;
    int                const line;
    String             const message;

   private:
    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
  };

  AssertHelperData* const data_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
};

}  // namespace internal

#if GTEST_HAS_PARAM_TEST
// The pure interface class that all value-parameterized tests inherit from.
// A value-parameterized class must inherit from both ::testing::Test and
// ::testing::WithParamInterface. In most cases that just means inheriting
// from ::testing::TestWithParam, but more complicated test hierarchies
// may need to inherit from Test and WithParamInterface at different levels.
//
// This interface has support for accessing the test parameter value via
// the GetParam() method.
//
// Use it with one of the parameter generator defining functions, like Range(),
// Values(), ValuesIn(), Bool(), and Combine().
//
// class FooTest : public ::testing::TestWithParam<int> {
//  protected:
//   FooTest() {
//     // Can use GetParam() here.
//   }
//   virtual ~FooTest() {
//     // Can use GetParam() here.
//   }
//   virtual void SetUp() {
//     // Can use GetParam() here.
//   }
//   virtual void TearDown {
//     // Can use GetParam() here.
//   }
// };
// TEST_P(FooTest, DoesBar) {
//   // Can use GetParam() method here.
//   Foo foo;
//   ASSERT_TRUE(foo.DoesBar(GetParam()));
// }
// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));

template <typename T>
class WithParamInterface {
 public:
  typedef T ParamType;
  virtual ~WithParamInterface() {}

  // The current parameter value. Is also available in the test fixture's
  // constructor. This member function is non-static, even though it only
  // references static data, to reduce the opportunity for incorrect uses
  // like writing 'WithParamInterface<bool>::GetParam()' for a test that
  // uses a fixture whose parameter type is int.
  const ParamType& GetParam() const { return *parameter_; }

 private:
  // Sets parameter value. The caller is responsible for making sure the value
  // remains alive and unchanged throughout the current test.
  static void SetParam(const ParamType* parameter) {
    parameter_ = parameter;
  }

  // Static value used for accessing parameter during a test lifetime.
  static const ParamType* parameter_;

  // TestClass must be a subclass of WithParamInterface<T> and Test.
  template <class TestClass> friend class internal::ParameterizedTestFactory;
};

template <typename T>
const T* WithParamInterface<T>::parameter_ = NULL;

// Most value-parameterized classes can ignore the existence of
// WithParamInterface, and can just inherit from ::testing::TestWithParam.

template <typename T>
class TestWithParam : public Test, public WithParamInterface<T> {
};

#endif  // GTEST_HAS_PARAM_TEST

// Macros for indicating success/failure in test code.

// ADD_FAILURE unconditionally adds a failure to the current test.
// SUCCEED generates a success - it doesn't automatically make the
// current test successful, as a test is only successful when it has
// no failure.
//
// EXPECT_* verifies that a certain condition is satisfied.  If not,
// it behaves like ADD_FAILURE.  In particular:
//
//   EXPECT_TRUE  verifies that a Boolean condition is true.
//   EXPECT_FALSE verifies that a Boolean condition is false.
//
// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
// that they will also abort the current function on failure.  People
// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
// writing data-driven tests often find themselves using ADD_FAILURE
// and EXPECT_* more.
//
// Examples:
//
//   EXPECT_TRUE(server.StatusIsOK());
//   ASSERT_FALSE(server.HasPendingRequest(port))
//       << "There are still pending requests " << "on port " << port;

// Generates a nonfatal failure with a generic message.
#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")

// Generates a nonfatal failure at the given source file location with
// a generic message.
#define ADD_FAILURE_AT(file, line) \
  GTEST_MESSAGE_AT_(file, line, "Failed", \
                    ::testing::TestPartResult::kNonFatalFailure)

// Generates a fatal failure with a generic message.
#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")

// Define this macro to 1 to omit the definition of FAIL(), which is a
// generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_FAIL
# define FAIL() GTEST_FAIL()
#endif

// Generates a success with a generic message.
#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")

// Define this macro to 1 to omit the definition of SUCCEED(), which
// is a generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_SUCCEED
# define SUCCEED() GTEST_SUCCEED()
#endif

// Macros for testing exceptions.
//
//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
//         Tests that the statement throws the expected exception.
//    * {ASSERT|EXPECT}_NO_THROW(statement):
//         Tests that the statement doesn't throw any exception.
//    * {ASSERT|EXPECT}_ANY_THROW(statement):
//         Tests that the statement throws an exception.

#define EXPECT_THROW(statement, expected_exception) \
  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
#define EXPECT_NO_THROW(statement) \
  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
#define EXPECT_ANY_THROW(statement) \
  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
#define ASSERT_THROW(statement, expected_exception) \
  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
#define ASSERT_NO_THROW(statement) \
  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
#define ASSERT_ANY_THROW(statement) \
  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)

// Boolean assertions. Condition can be either a Boolean expression or an
// AssertionResult. For more information on how to use AssertionResult with
// these macros see comments on that class.
#define EXPECT_TRUE(condition) \
  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
                      GTEST_NONFATAL_FAILURE_)
#define EXPECT_FALSE(condition) \
  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                      GTEST_NONFATAL_FAILURE_)
#define ASSERT_TRUE(condition) \
  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
                      GTEST_FATAL_FAILURE_)
#define ASSERT_FALSE(condition) \
  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                      GTEST_FATAL_FAILURE_)

// Includes the auto-generated header that implements a family of
// generic predicate assertion macros.
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// This file is AUTOMATICALLY GENERATED on 09/24/2010 by command
// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
//
// Implements a family of generic predicate assertion macros.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_

// Makes sure this header is not included before gtest.h.
#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
# error Do not include gtest_pred_impl.h directly.  Include gtest.h instead.
#endif  // GTEST_INCLUDE_GTEST_GTEST_H_

// This header implements a family of generic predicate assertion
// macros:
//
//   ASSERT_PRED_FORMAT1(pred_format, v1)
//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
//   ...
//
// where pred_format is a function or functor that takes n (in the
// case of ASSERT_PRED_FORMATn) values and their source expression
// text, and returns a testing::AssertionResult.  See the definition
// of ASSERT_EQ in gtest.h for an example.
//
// If you don't care about formatting, you can use the more
// restrictive version:
//
//   ASSERT_PRED1(pred, v1)
//   ASSERT_PRED2(pred, v1, v2)
//   ...
//
// where pred is an n-ary function or functor that returns bool,
// and the values v1, v2, ..., must support the << operator for
// streaming to std::ostream.
//
// We also define the EXPECT_* variations.
//
// For now we only support predicates whose arity is at most 5.
// Please email googletestframework@googlegroups.com if you need
// support for higher arities.

// GTEST_ASSERT_ is the basic statement to which all of the assertions
// in this file reduce.  Don't use this in your code.

#define GTEST_ASSERT_(expression, on_failure) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (const ::testing::AssertionResult gtest_ar = (expression)) \
    ; \
  else \
    on_failure(gtest_ar.failure_message())

#define GTEST_ASSERT_MESSAGE(expression, on_failure, message) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::AssertionResult gtest_ar = (expression)) \
    ; \
  else \
    on_failure((gtest_ar.failure_message()))


// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
// this in your code.
template <typename Pred,
          typename T1>
AssertionResult AssertPred1Helper(const char* pred_text,
                                  const char* e1,
                                  Pred pred,
                                  const T1& v1) {
  if (pred(v1)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
// Don't use this in your code.
#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, v1),\
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
// this in your code.
#define GTEST_PRED1_(pred, v1, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
                                             #v1, \
                                             pred, \
                                             v1), on_failure)

// Unary predicate assertion macros.
#define EXPECT_PRED_FORMAT1(pred_format, v1) \
  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED1(pred, v1) \
  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT1(pred_format, v1) \
  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED1(pred, v1) \
  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2>
AssertionResult AssertPred2Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2) {
  if (pred(v1, v2)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
// Don't use this in your code.
#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2),\
                on_failure)

#define GTEST_PRED_FORMAT2_MESSAGE(pred_format, v1, v2, message, on_failure)\
  GTEST_ASSERT_MESSAGE(pred_format(#message#v1, #message#v2, v1, v2),\
                on_failure, message)

// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
// this in your code.
#define GTEST_PRED2_(pred, v1, v2, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             pred, \
                                             v1, \
                                             v2), on_failure)

// Binary predicate assertion macros.
#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED_FORMAT2_MESSAGE(pred_format, v1, v2, message) \
  GTEST_PRED_FORMAT2_MESSAGE(pred_format, v1, v2, message, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED2(pred, v1, v2) \
  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)

#define ASSERT_PRED_FORMAT2_MESSAGE(pred_format, v1, v2, message) \
  GTEST_PRED_FORMAT2_MESSAGE(pred_format, v1, v2, message, GTEST_FATAL_FAILURE_)

#define ASSERT_PRED2(pred, v1, v2) \
  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3>
AssertionResult AssertPred3Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3) {
  if (pred(v1, v2, v3)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ", "
                            << e3 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2
                            << "\n" << e3 << " evaluates to " << v3;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
// Don't use this in your code.
#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3),\
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
// this in your code.
#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3), on_failure)

// Ternary predicate assertion macros.
#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED3(pred, v1, v2, v3) \
  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED3(pred, v1, v2, v3) \
  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3,
          typename T4>
AssertionResult AssertPred4Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  const char* e4,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3,
                                  const T4& v4) {
  if (pred(v1, v2, v3, v4)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ", "
                            << e3 << ", "
                            << e4 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2
                            << "\n" << e3 << " evaluates to " << v3
                            << "\n" << e4 << " evaluates to " << v4;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
// Don't use this in your code.
#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4),\
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
// this in your code.
#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             #v4, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3, \
                                             v4), on_failure)

// 4-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3,
          typename T4,
          typename T5>
AssertionResult AssertPred5Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  const char* e4,
                                  const char* e5,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3,
                                  const T4& v4,
                                  const T5& v5) {
  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();

  return AssertionFailure() << pred_text << "("
                            << e1 << ", "
                            << e2 << ", "
                            << e3 << ", "
                            << e4 << ", "
                            << e5 << ") evaluates to false, where"
                            << "\n" << e1 << " evaluates to " << v1
                            << "\n" << e2 << " evaluates to " << v2
                            << "\n" << e3 << " evaluates to " << v3
                            << "\n" << e4 << " evaluates to " << v4
                            << "\n" << e5 << " evaluates to " << v5;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
// Don't use this in your code.
#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5),\
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
// this in your code.
#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             #v4, \
                                             #v5, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3, \
                                             v4, \
                                             v5), on_failure)

// 5-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)


#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_

// Macros for testing equalities and inequalities.
//
//    * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual
//    * {ASSERT|EXPECT}_NE(v1, v2):           Tests that v1 != v2
//    * {ASSERT|EXPECT}_LT(v1, v2):           Tests that v1 < v2
//    * {ASSERT|EXPECT}_LE(v1, v2):           Tests that v1 <= v2
//    * {ASSERT|EXPECT}_GT(v1, v2):           Tests that v1 > v2
//    * {ASSERT|EXPECT}_GE(v1, v2):           Tests that v1 >= v2
//
// When they are not, Google Test prints both the tested expressions and
// their actual values.  The values must be compatible built-in types,
// or you will get a compiler error.  By "compatible" we mean that the
// values can be compared by the respective operator.
//
// Note:
//
//   1. It is possible to make a user-defined type work with
//   {ASSERT|EXPECT}_??(), but that requires overloading the
//   comparison operators and is thus discouraged by the Google C++
//   Usage Guide.  Therefore, you are advised to use the
//   {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
//   equal.
//
//   2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
//   pointers (in particular, C strings).  Therefore, if you use it
//   with two C strings, you are testing how their locations in memory
//   are related, not how their content is related.  To compare two C
//   strings by content, use {ASSERT|EXPECT}_STR*().
//
//   3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to
//   {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you
//   what the actual value is when it fails, and similarly for the
//   other comparisons.
//
//   4. Do not depend on the order in which {ASSERT|EXPECT}_??()
//   evaluate their arguments, which is undefined.
//
//   5. These macros evaluate their arguments exactly once.
//
// Examples:
//
//   EXPECT_NE(5, Foo());
//   EXPECT_EQ(NULL, a_pointer);
//   ASSERT_LT(i, array_size);
//   ASSERT_GT(records.size(), 0) << "There is no record left.";

#define EXPECT_EQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal:: \
                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
                      expected, actual)
#define EXPECT_NE(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual)
#define EXPECT_LE(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
#define EXPECT_LT(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
#define EXPECT_GE(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
#define EXPECT_GT(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)

#define GTEST_ASSERT_EQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal:: \
                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
                      expected, actual)

#define GTEST_ASSERT_EQ_MESSAGE(expected, actual, message) \
  ASSERT_PRED_FORMAT2_MESSAGE(::testing::internal:: \
                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
                      expected, actual, message)

#define GTEST_ASSERT_NE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
#define GTEST_ASSERT_LE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
#define GTEST_ASSERT_LT(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
#define GTEST_ASSERT_GE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
#define GTEST_ASSERT_GT(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)

// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
// ASSERT_XY(), which clashes with some users' own code.

#if !GTEST_DONT_DEFINE_ASSERT_EQ
# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
# define ASSERT_EQ_MESSAGE(val1, val2, message) GTEST_ASSERT_EQ_MESSAGE(val1, val2, message)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_NE
# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_LE
# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_LT
# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_GE
# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_GT
# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
#endif

// C String Comparisons.  All tests treat NULL and any non-NULL string
// as different.  Two NULLs are equal.
//
//    * {ASSERT|EXPECT}_STREQ(s1, s2):     Tests that s1 == s2
//    * {ASSERT|EXPECT}_STRNE(s1, s2):     Tests that s1 != s2
//    * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
//    * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
//
// For wide or narrow string objects, you can use the
// {ASSERT|EXPECT}_??() macros.
//
// Don't depend on the order in which the arguments are evaluated,
// which is undefined.
//
// These macros evaluate their arguments exactly once.

#define EXPECT_STREQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
#define EXPECT_STRNE(s1, s2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
#define EXPECT_STRCASEEQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
#define EXPECT_STRCASENE(s1, s2)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)

#define ASSERT_STREQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
#define ASSERT_STRNE(s1, s2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
#define ASSERT_STRCASEEQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
#define ASSERT_STRCASENE(s1, s2)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)

// Macros for comparing floating-point numbers.
//
//    * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual):
//         Tests that two float values are almost equal.
//    * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual):
//         Tests that two double values are almost equal.
//    * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
//         Tests that v1 and v2 are within the given distance to each other.
//
// Google Test uses ULP-based comparison to automatically pick a default
// error bound that is appropriate for the operands.  See the
// FloatingPoint template class in gtest-internal.h if you are
// interested in the implementation details.

#define EXPECT_FLOAT_EQ(expected, actual)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
                      expected, actual)

#define EXPECT_DOUBLE_EQ(expected, actual)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
                      expected, actual)

#define ASSERT_FLOAT_EQ(expected, actual)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
                      expected, actual)

#define ASSERT_DOUBLE_EQ(expected, actual)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
                      expected, actual)

#define EXPECT_NEAR(val1, val2, abs_error)\
  EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
                      val1, val2, abs_error)

#define ASSERT_NEAR(val1, val2, abs_error)\
  ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
                      val1, val2, abs_error)

// These predicate format functions work on floating-point values, and
// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
//
//   EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
                                   float val1, float val2);
GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
                                    double val1, double val2);


#if GTEST_OS_WINDOWS

// Macros that test for HRESULT failure and success, these are only useful
// on Windows, and rely on Windows SDK macros and APIs to compile.
//
//    * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
//
// When expr unexpectedly fails or succeeds, Google Test prints the
// expected result and the actual result with both a human-readable
// string representation of the error, if available, as well as the
// hex result code.
# define EXPECT_HRESULT_SUCCEEDED(expr) \
    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))

# define ASSERT_HRESULT_SUCCEEDED(expr) \
    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))

# define EXPECT_HRESULT_FAILED(expr) \
    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))

# define ASSERT_HRESULT_FAILED(expr) \
    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))

#endif  // GTEST_OS_WINDOWS

// Macros that execute statement and check that it doesn't generate new fatal
// failures in the current thread.
//
//   * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
//
// Examples:
//
//   EXPECT_NO_FATAL_FAILURE(Process());
//   ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
//
#define ASSERT_NO_FATAL_FAILURE(statement) \
    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
#define EXPECT_NO_FATAL_FAILURE(statement) \
    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)

// Causes a trace (including the source file path, the current line
// number, and the given message) to be included in every test failure
// message generated by code in the current scope.  The effect is
// undone when the control leaves the current scope.
//
// The message argument can be anything streamable to std::ostream.
//
// In the implementation, we include the current line number as part
// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
// to appear in the same block - as long as they are on different
// lines.
#define SCOPED_TRACE(message) \
  ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
    __FILE__, __LINE__, ::testing::Message() << (message))

// Compile-time assertion for type equality.
// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
// the same type.  The value it returns is not interesting.
//
// Instead of making StaticAssertTypeEq a class template, we make it a
// function template that invokes a helper class template.  This
// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
// defining objects of that type.
//
// CAVEAT:
//
// When used inside a method of a class template,
// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
// instantiated.  For example, given:
//
//   template <typename T> class Foo {
//    public:
//     void Bar() { testing::StaticAssertTypeEq<int, T>(); }
//   };
//
// the code:
//
//   void Test1() { Foo<bool> foo; }
//
// will NOT generate a compiler error, as Foo<bool>::Bar() is never
// actually instantiated.  Instead, you need:
//
//   void Test2() { Foo<bool> foo; foo.Bar(); }
//
// to cause a compiler error.
template <typename T1, typename T2>
bool StaticAssertTypeEq() {
  (void)internal::StaticAssertTypeEqHelper<T1, T2>();
  return true;
}

// Defines a test.
//
// The first parameter is the name of the test case, and the second
// parameter is the name of the test within the test case.
//
// The convention is to end the test case name with "Test".  For
// example, a test case for the Foo class can be named FooTest.
//
// The user should put his test code between braces after using this
// macro.  Example:
//
//   TEST(FooTest, InitializesCorrectly) {
//     Foo foo;
//     EXPECT_TRUE(foo.StatusIsOK());
//   }

// Note that we call GetTestTypeId() instead of GetTypeId<
// ::testing::Test>() here to get the type ID of testing::Test.  This
// is to work around a suspected linker bug when using Google Test as
// a framework on Mac OS X.  The bug causes GetTypeId<
// ::testing::Test>() to return different values depending on whether
// the call is from the Google Test framework itself or from user test
// code.  GetTestTypeId() is guaranteed to always return the same
// value, as it always calls GetTypeId<>() from the Google Test
// framework.
#define GTEST_TEST(test_case_name, test_name)\
  GTEST_TEST_(test_case_name, test_name, \
              ::testing::Test, ::testing::internal::GetTestTypeId())

// Define this macro to 1 to omit the definition of TEST(), which
// is a generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_TEST
# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
#endif

// Defines a test that uses a test fixture.
//
// The first parameter is the name of the test fixture class, which
// also doubles as the test case name.  The second parameter is the
// name of the test within the test case.
//
// A test fixture class must be declared earlier.  The user should put
// his test code between braces after using this macro.  Example:
//
//   class FooTest : public testing::Test {
//    protected:
//     virtual void SetUp() { b_.AddElement(3); }
//
//     Foo a_;
//     Foo b_;
//   };
//
//   TEST_F(FooTest, InitializesCorrectly) {
//     EXPECT_TRUE(a_.StatusIsOK());
//   }
//
//   TEST_F(FooTest, ReturnsElementCountCorrectly) {
//     EXPECT_EQ(0, a_.size());
//     EXPECT_EQ(1, b_.size());
//   }

#define TEST_F(test_fixture, test_name)\
  GTEST_TEST_(test_fixture, test_name, test_fixture, \
              ::testing::internal::GetTypeId<test_fixture>())

// Use this macro in main() to run all tests.  It returns 0 if all
// tests are successful, or 1 otherwise.
//
// RUN_ALL_TESTS() should be invoked after the command line has been
// parsed by InitGoogleTest().

#define RUN_ALL_TESTS()\
  (::testing::UnitTest::GetInstance()->Run())

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_H_


================================================
FILE: libhsakmt/tests/kfdtest/gtest-1.6.0/gtest-all.cpp
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// Google C++ Testing Framework (Google Test)
//
// Sometimes it's desirable to build Google Test by compiling a single file.
// This file serves this purpose.

// This line ensures that gtest.h can be compiled on its own, even
// when it's fused.
#include "gtest/gtest.h"

// The following lines pull in the real gtest *.cc files.
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)

// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Utilities for testing Google Test itself and code that uses Google Test
// (e.g. frameworks built on top of Google Test).

#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_


namespace testing {

// This helper class can be used to mock out Google Test failure reporting
// so that we can test Google Test or code that builds on Google Test.
//
// An object of this class appends a TestPartResult object to the
// TestPartResultArray object given in the constructor whenever a Google Test
// failure is reported. It can either intercept only failures that are
// generated in the same thread that created this object or it can intercept
// all generated failures. The scope of this mock object can be controlled with
// the second argument to the two arguments constructor.
class GTEST_API_ ScopedFakeTestPartResultReporter
    : public TestPartResultReporterInterface {
 public:
  // The two possible mocking modes of this object.
  enum InterceptMode {
    INTERCEPT_ONLY_CURRENT_THREAD,  // Intercepts only thread local failures.
    INTERCEPT_ALL_THREADS           // Intercepts all failures.
  };

  // The c'tor sets this object as the test part result reporter used
  // by Google Test.  The 'result' parameter specifies where to report the
  // results. This reporter will only catch failures generated in the current
  // thread. DEPRECATED
  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);

  // Same as above, but you can choose the interception scope of this object.
  ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
                                   TestPartResultArray* result);

  // The d'tor restores the previous test part result reporter.
  virtual ~ScopedFakeTestPartResultReporter();

  // Appends the TestPartResult object to the TestPartResultArray
  // received in the constructor.
  //
  // This method is from the TestPartResultReporterInterface
  // interface.
  virtual void ReportTestPartResult(const TestPartResult& result);
 private:
  void Init();

  const InterceptMode intercept_mode_;
  TestPartResultReporterInterface* old_reporter_;
  TestPartResultArray* const result_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
};

namespace internal {

// A helper class for implementing EXPECT_FATAL_FAILURE() and
// EXPECT_NONFATAL_FAILURE().  Its destructor verifies that the given
// TestPartResultArray contains exactly one failure that has the given
// type and contains the given substring.  If that's not the case, a
// non-fatal failure will be generated.
class GTEST_API_ SingleFailureChecker {
 public:
  // The constructor remembers the arguments.
  SingleFailureChecker(const TestPartResultArray* results,
                       TestPartResult::Type type,
                       const string& substr);
  ~SingleFailureChecker();
 private:
  const TestPartResultArray* const results_;
  const TestPartResult::Type type_;
  const string substr_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
};

}  // namespace internal

}  // namespace testing

// A set of macros for testing Google Test assertions or code that's expected
// to generate Google Test fatal failures.  It verifies that the given
// statement will cause exactly one fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
//   - 'statement' cannot reference local non-static variables or
//     non-static members of the current object.
//   - 'statement' cannot return a value.
//   - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works.  The AcceptsMacroThatExpandsToUnprotectedComma test in
// gtest_unittest.cc will fail to compile if we do that.
#define EXPECT_FATAL_FAILURE(statement, substr) \
  do { \
    class GTestExpectFatalFailureHelper {\
     public:\
      static void Execute() { statement; }\
    };\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
      GTestExpectFatalFailureHelper::Execute();\
    }\
  } while (::testing::internal::AlwaysFalse())

#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
  do { \
    class GTestExpectFatalFailureHelper {\
     public:\
      static void Execute() { statement; }\
    };\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ALL_THREADS, &gtest_failures);\
      GTestExpectFatalFailureHelper::Execute();\
    }\
  } while (::testing::internal::AlwaysFalse())

// A macro for testing Google Test assertions or code that's expected to
// generate Google Test non-fatal failures.  It asserts that the given
// statement will cause exactly one non-fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// 'statement' is allowed to reference local variables and members of
// the current object.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
//   - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works.  If we do that, the code won't compile when the user gives
// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
// expands to code containing an unprotected comma.  The
// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
// catches that.
//
// For the same reason, we have to write
//   if (::testing::internal::AlwaysTrue()) { statement; }
// instead of
//   GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
// to avoid an MSVC warning on unreachable code.
#define EXPECT_NONFATAL_FAILURE(statement, substr) \
  do {\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
        (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
      if (::testing::internal::AlwaysTrue()) { statement; }\
    }\
  } while (::testing::internal::AlwaysFalse())

#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
  do {\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
        (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS,\
          &gtest_failures);\
      if (::testing::internal::AlwaysTrue()) { statement; }\
    }\
  } while (::testing::internal::AlwaysFalse())

#endif  // GTEST_INCLUDE_GTEST_GTEST_SPI_H_

#include <ctype.h>
#include <math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>

#include <algorithm>
#include <ostream>  // NOLINT
#include <sstream>
#include <vector>

#if GTEST_OS_LINUX

// TODO(kenton@google.com): Use autoconf to detect availability of
// gettimeofday().
# define GTEST_HAS_GETTIMEOFDAY_ 1

# include <fcntl.h>  // NOLINT
# include <limits.h>  // NOLINT
# include <sched.h>  // NOLINT
// Declares vsnprintf().  This header is not available on Windows.
# include <strings.h>  // NOLINT
# include <sys/mman.h>  // NOLINT
# include <sys/time.h>  // NOLINT
# include <unistd.h>  // NOLINT
# include <string>

#elif GTEST_OS_SYMBIAN
# define GTEST_HAS_GETTIMEOFDAY_ 1
# include <sys/time.h>  // NOLINT

#elif GTEST_OS_ZOS
# define GTEST_HAS_GETTIMEOFDAY_ 1
# include <sys/time.h>  // NOLINT

// On z/OS we additionally need strings.h for strcasecmp.
# include <strings.h>  // NOLINT

#elif GTEST_OS_WINDOWS_MOBILE  // We are on Windows CE.

# include <windows.h>  // NOLINT

#elif GTEST_OS_WINDOWS  // We are on Windows proper.

# include <io.h>  // NOLINT
# include <sys/timeb.h>  // NOLINT
# include <sys/types.h>  // NOLINT
# include <sys/stat.h>  // NOLINT

# if GTEST_OS_WINDOWS_MINGW
// MinGW has gettimeofday() but not _ftime64().
// TODO(kenton@google.com): Use autoconf to detect availability of
//   gettimeofday().
// TODO(kenton@google.com): There are other ways to get the time on
//   Windows, like GetTickCount() or GetSystemTimeAsFileTime().  MinGW
//   supports these.  consider using them instead.
#  define GTEST_HAS_GETTIMEOFDAY_ 1
#  include <sys/time.h>  // NOLINT
# endif  // GTEST_OS_WINDOWS_MINGW

// cpplint thinks that the header is already included, so we want to
// silence it.
# include <windows.h>  // NOLINT

#else

// Assume other platforms have gettimeofday().
// TODO(kenton@google.com): Use autoconf to detect availability of
//   gettimeofday().
# define GTEST_HAS_GETTIMEOFDAY_ 1

// cpplint thinks that the header is already included, so we want to
// silence it.
# include <sys/time.h>  // NOLINT
# include <unistd.h>  // NOLINT

#endif  // GTEST_OS_LINUX

#if GTEST_HAS_EXCEPTIONS
# include <stdexcept>
#endif

#if GTEST_CAN_STREAM_RESULTS_
# include <arpa/inet.h>  // NOLINT
# include <netdb.h>  // NOLINT
#endif

// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Utility functions and classes used by the Google C++ testing framework.
//
// Author: wan@google.com (Zhanyong Wan)
//
// This file contains purely Google Test's internal implementation.  Please
// DO NOT #INCLUDE IT IN A USER PROGRAM.

#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
#define GTEST_SRC_GTEST_INTERNAL_INL_H_

// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is
// part of Google Test's implementation; otherwise it's undefined.
#if !GTEST_IMPLEMENTATION_
// A user is trying to include this from his code - just say no.
# error "gtest-internal-inl.h is part of Google Test's internal implementation."
# error "It must not be included except by Google Test itself."
#endif  // GTEST_IMPLEMENTATION_

#ifndef _WIN32_WCE
# include <errno.h>
#endif  // !_WIN32_WCE
#include <stddef.h>
#include <stdlib.h>  // For strtoll/_strtoul64/malloc/free.
#include <string.h>  // For memmove.

#include <algorithm>
#include <string>
#include <vector>


#if GTEST_OS_WINDOWS
# include <windows.h>  // NOLINT
#endif  // GTEST_OS_WINDOWS


namespace testing {

// Declares the flags.
//
// We don't want the users to modify this flag in the code, but want
// Google Test's own unit tests to be able to access it. Therefore we
// declare it here as opposed to in gtest.h.
GTEST_DECLARE_bool_(death_test_use_fork);

namespace internal {

// The value of GetTestTypeId() as seen from within the Google Test
// library.  This is solely for testing GetTestTypeId().
GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;

// Names of the flags (needed for parsing Google Test flags).
const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
const char kBreakOnFailureFlag[] = "break_on_failure";
const char kCatchExceptionsFlag[] = "catch_exceptions";
const char kColorFlag[] = "color";
const char kFilterFlag[] = "filter";
const char kListTestsFlag[] = "list_tests";
const char kOutputFlag[] = "output";
const char kPrintTimeFlag[] = "print_time";
const char kRandomSeedFlag[] = "random_seed";
const char kRepeatFlag[] = "repeat";
const char kShuffleFlag[] = "shuffle";
const char kStackTraceDepthFlag[] = "stack_trace_depth";
const char kStreamResultToFlag[] = "stream_result_to";
const char kThrowOnFailureFlag[] = "throw_on_failure";

// A valid random seed must be in [1, kMaxRandomSeed].
const int kMaxRandomSeed = 99999;

// g_help_flag is true iff the --help flag or an equivalent form is
// specified on the command line.
GTEST_API_ extern bool g_help_flag;

// Returns the current time in milliseconds.
GTEST_API_ TimeInMillis GetTimeInMillis();

// Returns true iff Google Test should use colors in the output.
GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);

// Formats the given time in milliseconds as seconds.
GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);

// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
GTEST_API_ bool ParseInt32Flag(
    const char* str, const char* flag, Int32* value);

// Returns a random seed in range [1, kMaxRandomSeed] based on the
// given --gtest_random_seed flag value.
inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
  const unsigned int raw_seed = (random_seed_flag == 0) ?
      static_cast<unsigned int>(GetTimeInMillis()) :
      static_cast<unsigned int>(random_seed_flag);

  // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
  // it's easy to type.
  const int normalized_seed =
      static_cast<int>((raw_seed - 1U) %
                       static_cast<unsigned int>(kMaxRandomSeed)) + 1;
  return normalized_seed;
}

// Returns the first valid random seed after 'seed'.  The behavior is
// undefined if 'seed' is invalid.  The seed after kMaxRandomSeed is
// considered to be 1.
inline int GetNextRandomSeed(int seed) {
  GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
      << "Invalid random seed " << seed << " - must be in [1, "
      << kMaxRandomSeed << "].";
  const int next_seed = seed + 1;
  return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
}

// This class saves the values of all Google Test flags in its c'tor, and
// restores them in its d'tor.
class GTestFlagSaver {
 public:
  // The c'tor.
  GTestFlagSaver() {
    also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
    break_on_failure_ = GTEST_FLAG(break_on_failure);
    catch_exceptions_ = GTEST_FLAG(catch_exceptions);
    color_ = GTEST_FLAG(color);
    death_test_style_ = GTEST_FLAG(death_test_style);
    death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
    filter_ = GTEST_FLAG(filter);
    internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
    list_tests_ = GTEST_FLAG(list_tests);
    output_ = GTEST_FLAG(output);
    print_time_ = GTEST_FLAG(print_time);
    random_seed_ = GTEST_FLAG(random_seed);
    repeat_ = GTEST_FLAG(repeat);
    shuffle_ = GTEST_FLAG(shuffle);
    stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
    stream_result_to_ = GTEST_FLAG(stream_result_to);
    throw_on_failure_ = GTEST_FLAG(throw_on_failure);
  }

  // The d'tor is not virtual.  DO NOT INHERIT FROM THIS CLASS.
  ~GTestFlagSaver() {
    GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
    GTEST_FLAG(break_on_failure) = break_on_failure_;
    GTEST_FLAG(catch_exceptions) = catch_exceptions_;
    GTEST_FLAG(color) = color_;
    GTEST_FLAG(death_test_style) = death_test_style_;
    GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
    GTEST_FLAG(filter) = filter_;
    GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
    GTEST_FLAG(list_tests) = list_tests_;
    GTEST_FLAG(output) = output_;
    GTEST_FLAG(print_time) = print_time_;
    GTEST_FLAG(random_seed) = random_seed_;
    GTEST_FLAG(repeat) = repeat_;
    GTEST_FLAG(shuffle) = shuffle_;
    GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
    GTEST_FLAG(stream_result_to) = stream_result_to_;
    GTEST_FLAG(throw_on_failure) = throw_on_failure_;
  }
 private:
  // Fields for saving the original values of flags.
  bool also_run_disabled_tests_;
  bool break_on_failure_;
  bool catch_exceptions_;
  String color_;
  String death_test_style_;
  bool death_test_use_fork_;
  String filter_;
  String internal_run_death_test_;
  bool list_tests_;
  String output_;
  bool print_time_;
  bool pretty_;
  internal::Int32 random_seed_;
  internal::Int32 repeat_;
  bool shuffle_;
  internal::Int32 stack_trace_depth_;
  String stream_result_to_;
  bool throw_on_failure_;
} GTEST_ATTRIBUTE_UNUSED_;

// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// The output buffer str must containt at least 32 characters.
// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
// as '(Invalid Unicode 0xXXXXXXXX)'.
GTEST_API_ char* CodePointToUtf8(UInt32 code_point, char* str);

// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
GTEST_API_ String WideStringToUtf8(const wchar_t* str, int num_chars);

// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
// if the variable is present. If a file already exists at this location, this
// function will write over it. If the variable is present, but the file cannot
// be created, prints an error and exits.
void WriteToShardStatusFileIfNeeded();

// Checks whether sharding is enabled by examining the relevant
// environment variable values. If the variables are present,
// but inconsistent (e.g., shard_index >= total_shards), prints
// an error and exits. If in_subprocess_for_death_test, sharding is
// disabled because it must only be applied to the original test
// process. Otherwise, we could filter out death tests we intended to execute.
GTEST_API_ bool ShouldShard(const char* total_shards_str,
                            const char* shard_index_str,
                            bool in_subprocess_for_death_test);

// Parses the environment variable var as an Int32. If it is unset,
// returns default_val. If it is not an Int32, prints an error and
// and aborts.
GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);

// Given the total number of shards, the shard index, and the test id,
// returns true iff the test should be run on this shard. The test id is
// some arbitrary but unique non-negative integer assigned to each test
// method. Assumes that 0 <= shard_index < total_shards.
GTEST_API_ bool ShouldRunTestOnShard(
    int total_shards, int shard_index, int test_id);

// STL container utilities.

// Returns the number of elements in the given container that satisfy
// the given predicate.
template <class Container, typename Predicate>
inline int CountIf(const Container& c, Predicate predicate) {
  // Implemented as an explicit loop since std::count_if() in libCstd on
  // Solaris has a non-standard signature.
  int count = 0;
  for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) {
    if (predicate(*it))
      ++count;
  }
  return count;
}

// Applies a function/functor to each element in the container.
template <class Container, typename Functor>
void ForEach(const Container& c, Functor functor) {
  std::for_each(c.begin(), c.end(), functor);
}

// Returns the i-th element of the vector, or default_value if i is not
// in range [0, v.size()).
template <typename E>
inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
}

// Performs an in-place shuffle of a range of the vector's elements.
// 'begin' and 'end' are element indices as an STL-style range;
// i.e. [begin, end) are shuffled, where 'end' == size() means to
// shuffle to the end of the vector.
template <typename E>
void ShuffleRange(internal::Random* random, int begin, int end,
                  std::vector<E>* v) {
  const int size = static_cast<int>(v->size());
  GTEST_CHECK_(0 <= begin && begin <= size)
      << "Invalid shuffle range start " << begin << ": must be in range [0, "
      << size << "].";
  GTEST_CHECK_(begin <= end && end <= size)
      << "Invalid shuffle range finish " << end << ": must be in range ["
      << begin << ", " << size << "].";

  // Fisher-Yates shuffle, from
  // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
  for (int range_width = end - begin; range_width >= 2; range_width--) {
    const int last_in_range = begin + range_width - 1;
    const int selected = begin + random->Generate(range_width);
    std::swap((*v)[selected], (*v)[last_in_range]);
  }
}

// Performs an in-place shuffle of the vector's elements.
template <typename E>
inline void Shuffle(internal::Random* random, std::vector<E>* v) {
  ShuffleRange(random, 0, static_cast<int>(v->size()), v);
}

// A function for deleting an object.  Handy for being used as a
// functor.
template <typename T>
static void Delete(T* x) {
  delete x;
}

// A predicate that checks the key of a TestProperty against a known key.
//
// TestPropertyKeyIs is copyable.
class TestPropertyKeyIs {
 public:
  // Constructor.
  //
  // TestPropertyKeyIs has NO default constructor.
  explicit TestPropertyKeyIs(const char* key)
      : key_(key) {}

  // Returns true iff the test name of test property matches on key_.
  bool operator()(const TestProperty& test_property) const {
    return String(test_property.key()).Compare(key_) == 0;
  }

 private:
  String key_;
};

// Class UnitTestOptions.
//
// This class contains functions for processing options the user
// specifies when running the tests.  It has only static members.
//
// In most cases, the user can specify an option using either an
// environment variable or a command line flag.  E.g. you can set the
// test filter using either GTEST_FILTER or --gtest_filter.  If both
// the variable and the flag are present, the latter overrides the
// former.
class GTEST_API_ UnitTestOptions {
 public:
  // Functions for processing the gtest_output flag.

  // Returns the output format, or "" for normal printed output.
  static String GetOutputFormat();

  // Returns the absolute path of the requested output file, or the
  // default (test_detail.xml in the original working directory) if
  // none was explicitly specified.
  static String GetAbsolutePathToOutputFile();

  // Functions for processing the gtest_filter flag.

  // Returns true iff the wildcard pattern matches the string.  The
  // first ':' or '\0' character in pattern marks the end of it.
  //
  // This recursive algorithm isn't very efficient, but is clear and
  // works well enough for matching test names, which are short.
  static bool PatternMatchesString(const char *pattern, const char *str);

  // Returns true iff the user-specified filter matches the test case
  // name and the test name.
  static bool FilterMatchesTest(const String &test_case_name,
                                const String &test_name);

#if GTEST_OS_WINDOWS
  // Function for supporting the gtest_catch_exception flag.

  // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
  // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
  // This function is useful as an __except condition.
  static int GTestShouldProcessSEH(DWORD exception_code);
#endif  // GTEST_OS_WINDOWS

  // Returns true if "name" matches the ':' separated list of glob-style
  // filters in "filter".
  static bool MatchesFilter(const String& name, const char* filter);
};

// Returns the current application's name, removing directory path if that
// is present.  Used by UnitTestOptions::GetOutputFile.
GTEST_API_ FilePath GetCurrentExecutableName();

// The role interface for getting the OS stack trace as a string.
class OsStackTraceGetterInterface {
 public:
  OsStackTraceGetterInterface() {}
  virtual ~OsStackTraceGetterInterface() {}

  // Returns the current OS stack trace as a String.  Parameters:
  //
  //   max_depth  - the maximum number of stack frames to be included
  //                in the trace.
  //   skip_count - the number of top frames to be skipped; doesn't count
  //                against max_depth.
  virtual String CurrentStackTrace(int max_depth, int skip_count) = 0;

  // UponLeavingGTest() should be called immediately before Google Test calls
  // user code. It saves some information about the current stack that
  // CurrentStackTrace() will use to find and hide Google Test stack frames.
  virtual void UponLeavingGTest() = 0;

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
};

// A working implementation of the OsStackTraceGetterInterface interface.
class OsStackTraceGetter : public OsStackTraceGetterInterface {
 public:
  OsStackTraceGetter() : caller_frame_(NULL) {}
  virtual String CurrentStackTrace(int max_depth, int skip_count);
  virtual void UponLeavingGTest();

  // This string is inserted in place of stack frames that are part of
  // Google Test's implementation.
  static const char* const kElidedFramesMarker;

 private:
  Mutex mutex_;  // protects all internal state

  // We save the stack frame below the frame that calls user code.
  // We do this because the address of the frame immediately below
  // the user code changes between the call to UponLeavingGTest()
  // and any calls to CurrentStackTrace() from within the user code.
  void* caller_frame_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
};

// Information about a Google Test trace point.
struct TraceInfo {
  const char* file;
  int line;
  String message;
};

// This is the default global test part result reporter used in UnitTestImpl.
// This class should only be used by UnitTestImpl.
class DefaultGlobalTestPartResultReporter
  : public TestPartResultReporterInterface {
 public:
  explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
  // Implements the TestPartResultReporterInterface. Reports the test part
  // result in the current test.
  virtual void ReportTestPartResult(const TestPartResult& result);

 private:
  UnitTestImpl* const unit_test_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter);
};

// This is the default per thread test part result reporter used in
// UnitTestImpl. This class should only be used by UnitTestImpl.
class DefaultPerThreadTestPartResultReporter
    : public TestPartResultReporterInterface {
 public:
  explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
  // Implements the TestPartResultReporterInterface. The implementation just
  // delegates to the current global test part result reporter of *unit_test_.
  virtual void ReportTestPartResult(const TestPartResult& result);

 private:
  UnitTestImpl* const unit_test_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter);
};

// The private implementation of the UnitTest class.  We don't protect
// the methods under a mutex, as this class is not accessible by a
// user and the UnitTest class that delegates work to this class does
// proper locking.
class GTEST_API_ UnitTestImpl {
 public:
  explicit UnitTestImpl(UnitTest* parent);
  virtual ~UnitTestImpl();

  // There are two different ways to register your own TestPartResultReporter.
  // You can register your own repoter to listen either only for test results
  // from the current thread or for results from all threads.
  // By default, each per-thread test result repoter just passes a new
  // TestPartResult to the global test result reporter, which registers the
  // test part result for the currently running test.

  // Returns the global test part result reporter.
  TestPartResultReporterInterface* GetGlobalTestPartResultReporter();

  // Sets the global test part result reporter.
  void SetGlobalTestPartResultReporter(
      TestPartResultReporterInterface* reporter);

  // Returns the test part result reporter for the current thread.
  TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();

  // Sets the test part result reporter for the current thread.
  void SetTestPartResultReporterForCurrentThread(
      TestPartResultReporterInterface* reporter);

  // Gets the number of successful test cases.
  int successful_test_case_count() const;

  // Gets the number of failed test cases.
  int failed_test_case_count() const;

  // Gets the number of all test cases.
  int total_test_case_count() const;

  // Gets the number of all test cases that contain at least one test
  // that should run.
  int test_case_to_run_count() const;

  // Gets the number of successful tests.
  int successful_test_count() const;

  // Gets the number of failed tests.
  int failed_test_count() const;

  // Gets the number of disabled tests.
  int disabled_test_count() const;

  // Gets the number of all tests.
  int total_test_count() const;

  // Gets the number of tests that should run.
  int test_to_run_count() const;

  // Gets the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const { return elapsed_time_; }

  // Returns true iff the unit test passed (i.e. all test cases passed).
  bool Passed() const { return !Failed(); }

  // Returns true iff the unit test failed (i.e. some test case failed
  // or something outside of all tests failed).
  bool Failed() const {
    return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed();
  }

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  const TestCase* GetTestCase(int i) const {
    const int index = GetElementOr(test_case_indices_, i, -1);
    return index < 0 ? NULL : test_cases_[i];
  }

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  TestCase* GetMutableTestCase(int i) {
    const int index = GetElementOr(test_case_indices_, i, -1);
    return index < 0 ? NULL : test_cases_[index];
  }

  // Provides access to the event listener list.
  TestEventListeners* listeners() { return &listeners_; }

  // Returns the TestResult for the test that's currently running, or
  // the TestResult for the ad hoc test if no test is running.
  TestResult* current_test_result();

  // Returns the TestResult for the ad hoc test.
  const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }

  // Sets the OS stack trace getter.
  //
  // Does nothing if the input and the current OS stack trace getter
  // are the same; otherwise, deletes the old getter and makes the
  // input the current getter.
  void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);

  // Returns the current OS stack trace getter if it is not NULL;
  // otherwise, creates an OsStackTraceGetter, makes it the current
  // getter, and returns it.
  OsStackTraceGetterInterface* os_stack_trace_getter();

  // Returns the current OS stack trace as a String.
  //
  // The maximum number of stack frames to be included is specified by
  // the gtest_stack_trace_depth flag.  The skip_count parameter
  // specifies the number of top frames to be skipped, which doesn't
  // count against the number of frames to be included.
  //
  // For example, if Foo() calls Bar(), which in turn calls
  // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
  // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
  String CurrentOsStackTraceExceptTop(int skip_count);

  // Finds and returns a TestCase with the given name.  If one doesn't
  // exist, creates one and returns it.
  //
  // Arguments:
  //
  //   test_case_name: name of the test case
  //   type_param:     the name of the test's type parameter, or NULL if
  //                   this is not a typed or a type-parameterized test.
  //   set_up_tc:      pointer to the function that sets up the test case
  //   tear_down_tc:   pointer to the function that tears down the test case
  TestCase* GetTestCase(const char* test_case_name,
                        const char* type_param,
                        Test::SetUpTestCaseFunc set_up_tc,
                        Test::TearDownTestCaseFunc tear_down_tc);

  // Adds a TestInfo to the unit test.
  //
  // Arguments:
  //
  //   set_up_tc:    pointer to the function that sets up the test case
  //   tear_down_tc: pointer to the function that tears down the test case
  //   test_info:    the TestInfo object
  void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc,
                   Test::TearDownTestCaseFunc tear_down_tc,
                   TestInfo* test_info) {
    // In order to support thread-safe death tests, we need to
    // remember the original working directory when the test program
    // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
    // the user may have changed the current directory before calling
    // RUN_ALL_TESTS().  Therefore we capture the current directory in
    // AddTestInfo(), which is called to register a TEST or TEST_F
    // before main() is reached.
    if (original_working_dir_.IsEmpty()) {
      original_working_dir_.Set(FilePath::GetCurrentDir());
      GTEST_CHECK_(!original_working_dir_.IsEmpty())
          << "Failed to get the current working directory.";
    }

    GetTestCase(test_info->test_case_name(),
                test_info->type_param(),
                set_up_tc,
                tear_down_tc)->AddTestInfo(test_info);
  }

#if GTEST_HAS_PARAM_TEST
  // Returns ParameterizedTestCaseRegistry object used to keep track of
  // value-parameterized tests and instantiate and register them.
  internal::ParameterizedTestCaseRegistry& parameterized_test_registry() {
    return parameterized_test_registry_;
  }
#endif  // GTEST_HAS_PARAM_TEST

  // Sets the TestCase object for the test that's currently running.
  void set_current_test_case(TestCase* a_current_test_case) {
    current_test_case_ = a_current_test_case;
  }

  // Sets the TestInfo object for the test that's currently running.  If
  // current_test_info is NULL, the assertion results will be stored in
  // ad_hoc_test_result_.
  void set_current_test_info(TestInfo* a_current_test_info) {
    current_test_info_ = a_current_test_info;
  }

  // Registers all parameterized tests defined using TEST_P and
  // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter
  // combination. This method can be called more then once; it has guards
  // protecting from registering the tests more then once.  If
  // value-parameterized tests are disabled, RegisterParameterizedTests is
  // present but does nothing.
  void RegisterParameterizedTests();

  // Runs all tests in this UnitTest object, prints the result, and
  // returns true if all tests are successful.  If any exception is
  // thrown during a test, this test is considered to be failed, but
  // the rest of the tests will still be run.
  bool RunAllTests();

  // Clears the results of all tests, except the ad hoc tests.
  void ClearNonAdHocTestResult() {
    ForEach(test_cases_, TestCase::ClearTestCaseResult);
  }

  // Clears the results of ad-hoc test assertions.
  void ClearAdHocTestResult() {
    ad_hoc_test_result_.Clear();
  }

  enum ReactionToSharding {
    HONOR_SHARDING_PROTOCOL,
    IGNORE_SHARDING_PROTOCOL
  };

  // Matches the full name of each test against the user-specified
  // filter to decide whether the test should run, then records the
  // result in each TestCase and TestInfo object.
  // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
  // based on sharding variables in the environment.
  // Returns the number of tests that should run.
  int FilterTests(ReactionToSharding shard_tests);

  // Prints the names of the tests matching the user-specified filter flag.
  void ListTestsMatchingFilter();

  const TestCase* current_test_case() const { return current_test_case_; }
  TestInfo* current_test_info() { return current_test_info_; }
  const TestInfo* current_test_info() const { return current_test_info_; }

  // Returns the vector of environments that need to be set-up/torn-down
  // before/after the tests are run.
  std::vector<Environment*>& environments() { return environments_; }

  // Getters for the per-thread Google Test trace stack.
  std::vector<TraceInfo>& gtest_trace_stack() {
    return *(gtest_trace_stack_.pointer());
  }
  const std::vector<TraceInfo>& gtest_trace_stack() const {
    return gtest_trace_stack_.get();
  }

#if GTEST_HAS_DEATH_TEST
  void InitDeathTestSubprocessControlInfo() {
    internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
  }
  // Returns a pointer to the parsed --gtest_internal_run_death_test
  // flag, or NULL if that flag was not specified.
  // This information is useful only in a death test child process.
  // Must not be called before a call to InitGoogleTest.
  const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
    return internal_run_death_test_flag_.get();
  }

  // Returns a pointer to the current death test factory.
  internal::DeathTestFactory* death_test_factory() {
    return death_test_factory_.get();
  }

  void SuppressTestEventsIfInSubprocess();

  friend class ReplaceDeathTestFactory;
#endif  // GTEST_HAS_DEATH_TEST

  // Initializes the event listener performing XML output as specified by
  // UnitTestOptions. Must not be called before InitGoogleTest.
  void ConfigureXmlOutput();

#if GTEST_CAN_STREAM_RESULTS_
  // Initializes the event listener for streaming test results to a socket.
  // Must not be called before InitGoogleTest.
  void ConfigureStreamingOutput();
#endif

  // Performs initialization dependent upon flag values obtained in
  // ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
  // ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
  // this function is also called from RunAllTests.  Since this function can be
  // called more than once, it has to be idempotent.
  void PostFlagParsingInit();

  // Gets the random seed used at the start of the current test iteration.
  int random_seed() const { return random_seed_; }

  // Gets the random number generator.
  internal::Random* random() { return &random_; }

  // Shuffles all test cases, and the tests within each test case,
  // making sure that death tests are still run first.
  void ShuffleTests();

  // Restores the test cases and tests to their order before the first shuffle.
  void UnshuffleTests();

  // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
  // UnitTest::Run() starts.
  bool catch_exceptions() const { return catch_exceptions_; }

 private:
  friend class ::testing::UnitTest;

  // Used by UnitTest::Run() to capture the state of
  // GTEST_FLAG(catch_exceptions) at the moment it starts.
  void set_catch_exceptions(bool value) { catch_exceptions_ = value; }

  // The UnitTest object that owns this implementation object.
  UnitTest* const parent_;

  // The working directory when the first TEST() or TEST_F() was
  // executed.
  internal::FilePath original_working_dir_;

  // The default test part result reporters.
  DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
  DefaultPerThreadTestPartResultReporter
      default_per_thread_test_part_result_reporter_;

  // Points to (but doesn't own) the global test part result reporter.
  TestPartResultReporterInterface* global_test_part_result_repoter_;

  // Protects read and write access to global_test_part_result_reporter_.
  internal::Mutex global_test_part_result_reporter_mutex_;

  // Points to (but doesn't own) the per-thread test part result reporter.
  internal::ThreadLocal<TestPartResultReporterInterface*>
      per_thread_test_part_result_reporter_;

  // The vector of environments that need to be set-up/torn-down
  // before/after the tests are run.
  std::vector<Environment*> environments_;

  // The vector of TestCases in their original order.  It owns the
  // elements in the vector.
  std::vector<TestCase*> test_cases_;

  // Provides a level of indirection for the test case list to allow
  // easy shuffling and restoring the test case order.  The i-th
  // element of this vector is the index of the i-th test case in the
  // shuffled order.
  std::vector<int> test_case_indices_;

#if GTEST_HAS_PARAM_TEST
  // ParameterizedTestRegistry object used to register value-parameterized
  // tests.
  internal::ParameterizedTestCaseRegistry parameterized_test_registry_;

  // Indicates whether RegisterParameterizedTests() has been called already.
  bool parameterized_tests_registered_;
#endif  // GTEST_HAS_PARAM_TEST

  // Index of the last death test case registered.  Initially -1.
  int last_death_test_case_;

  // This points to the TestCase for the currently running test.  It
  // changes as Google Test goes through one test case after another.
  // When no test is running, this is set to NULL and Google Test
  // stores assertion results in ad_hoc_test_result_.  Initially NULL.
  TestCase* current_test_case_;

  // This points to the TestInfo for the currently running test.  It
  // changes as Google Test goes through one test after another.  When
  // no test is running, this is set to NULL and Google Test stores
  // assertion results in ad_hoc_test_result_.  Initially NULL.
  TestInfo* current_test_info_;

  // Normally, a user only writes assertions inside a TEST or TEST_F,
  // or inside a function called by a TEST or TEST_F.  Since Google
  // Test keeps track of which test is current running, it can
  // associate such an assertion with the test it belongs to.
  //
  // If an assertion is encountered when no TEST or TEST_F is running,
  // Google Test attributes the assertion result to an imaginary "ad hoc"
  // test, and records the result in ad_hoc_test_result_.
  TestResult ad_hoc_test_result_;

  // The list of event listeners that can be used to track events inside
  // Google Test.
  TestEventListeners listeners_;

  // The OS stack trace getter.  Will be deleted when the UnitTest
  // object is destructed.  By default, an OsStackTraceGetter is used,
  // but the user can set this field to use a custom getter if that is
  // desired.
  OsStackTraceGetterInterface* os_stack_trace_getter_;

  // True iff PostFlagParsingInit() has been called.
  bool post_flag_parse_init_performed_;

  // The random number seed used at the beginning of the test run.
  int random_seed_;

  // Our random number generator.
  internal::Random random_;

  // How long the test took to run, in milliseconds.
  TimeInMillis elapsed_time_;

#if GTEST_HAS_DEATH_TEST
  // The decomposed components of the gtest_internal_run_death_test flag,
  // parsed when RUN_ALL_TESTS is called.
  internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
  internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_;
#endif  // GTEST_HAS_DEATH_TEST

  // A per-thread stack of traces created by the SCOPED_TRACE() macro.
  internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;

  // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests()
  // starts.
  bool catch_exceptions_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl);
};  // class UnitTestImpl

// Convenience function for accessing the global UnitTest
// implementation object.
inline UnitTestImpl* GetUnitTestImpl() {
  return UnitTest::GetInstance()->impl();
}

#if GTEST_USES_SIMPLE_RE

// Internal helper functions for implementing the simple regular
// expression matcher.
GTEST_API_ bool IsInSet(char ch, const char* str);
GTEST_API_ bool IsAsciiDigit(char ch);
GTEST_API_ bool IsAsciiPunct(char ch);
GTEST_API_ bool IsRepeat(char ch);
GTEST_API_ bool IsAsciiWhiteSpace(char ch);
GTEST_API_ bool IsAsciiWordChar(char ch);
GTEST_API_ bool IsValidEscape(char ch);
GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
GTEST_API_ bool ValidateRegex(const char* regex);
GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
GTEST_API_ bool MatchRepetitionAndRegexAtHead(
    bool escaped, char ch, char repeat, const char* regex, const char* str);
GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);

#endif  // GTEST_USES_SIMPLE_RE

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.
GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);

#if GTEST_HAS_DEATH_TEST

// Returns the message describing the last system error, regardless of the
// platform.
GTEST_API_ String GetLastErrnoDescription();

# if GTEST_OS_WINDOWS
// Provides leak-safe Windows kernel handle ownership.
class AutoHandle {
 public:
  AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
  explicit AutoHandle(HANDLE handle) : handle_(handle) {}

  ~AutoHandle() { Reset(); }

  HANDLE Get() const { return handle_; }
  void Reset() { Reset(INVALID_HANDLE_VALUE); }
  void Reset(HANDLE handle) {
    if (handle != handle_) {
      if (handle_ != INVALID_HANDLE_VALUE)
        ::CloseHandle(handle_);
      handle_ = handle;
    }
  }

 private:
  HANDLE handle_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
};
# endif  // GTEST_OS_WINDOWS

// Attempts to parse a string into a positive integer pointed to by the
// number parameter.  Returns true if that is possible.
// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
// it here.
template <typename Integer>
bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
  // Fail fast if the given string does not begin with a digit;
  // this bypasses strtoXXX's "optional leading whitespace and plus
  // or minus sign" semantics, which are undesirable here.
  if (str.empty() || !IsDigit(str[0])) {
    return false;
  }
  errno = 0;

  char* end;
  // BiggestConvertible is the largest integer type that system-provided
  // string-to-number conversion routines can return.

# if GTEST_OS_WINDOWS && !defined(__GNUC__)

  // MSVC and C++ Builder define __int64 instead of the standard long long.
  typedef unsigned __int64 BiggestConvertible;
  const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);

# else

  typedef unsigned long long BiggestConvertible;  // NOLINT
  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);

# endif  // GTEST_OS_WINDOWS && !defined(__GNUC__)

  const bool parse_success = *end == '\0' && errno == 0;

  // TODO(vladl@google.com): Convert this to compile time assertion when it is
  // available.
  GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));

  const Integer result = static_cast<Integer>(parsed);
  if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
    *number = result;
    return true;
  }
  return false;
}
#endif  // GTEST_HAS_DEATH_TEST

// TestResult contains some private methods that should be hidden from
// Google Test user but are required for testing. This class allow our tests
// to access them.
//
// This class is supplied only for the purpose of testing Google Test's own
// constructs. Do not use it in user tests, either directly or indirectly.
class TestResultAccessor {
 public:
  static void RecordProperty(TestResult* test_result,
                             const TestProperty& property) {
    test_result->RecordProperty(property);
  }

  static void ClearTestPartResults(TestResult* test_result) {
    test_result->ClearTestPartResults();
  }

  static const std::vector<testing::TestPartResult>& test_part_results(
      const TestResult& test_result) {
    return test_result.test_part_results();
  }
};

}  // namespace internal
}  // namespace testing

#endif  // GTEST_SRC_GTEST_INTERNAL_INL_H_
#undef GTEST_IMPLEMENTATION_

#if GTEST_OS_WINDOWS
# define vsnprintf _vsnprintf
#endif  // GTEST_OS_WINDOWS

namespace testing {

using internal::CountIf;
using internal::ForEach;
using internal::GetElementOr;
using internal::Shuffle;

// Constants.

// A test whose test case name or test name matches this filter is
// disabled and not run.
static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";

// A test case whose name matches this filter is considered a death
// test case and will be run before test cases whose name doesn't
// match this filter.
static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";

// A test filter that matches everything.
static const char kUniversalFilter[] = "*";

// The default output file for XML output.
static const char kDefaultOutputFile[] = "test_detail.xml";

// The environment variable name for the test shard index.
static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
// The environment variable name for the total number of test shards.
static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
// The environment variable name for the test shard status file.
static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";

namespace internal {

// The text used in failure messages to indicate the start of the
// stack trace.
const char kStackTraceMarker[] = "\nStack trace:\n";

// g_help_flag is true iff the --help flag or an equivalent form is
// specified on the command line.
bool g_help_flag = false;

}  // namespace internal

GTEST_DEFINE_bool_(
    also_run_disabled_tests,
    internal::BoolFromGTestEnv("also_run_disabled_tests", false),
    "Run disabled tests too, in addition to the tests normally being run.");

GTEST_DEFINE_bool_(
    break_on_failure,
    internal::BoolFromGTestEnv("break_on_failure", false),
    "True iff a failed assertion should be a debugger break-point.");

GTEST_DEFINE_bool_(
    catch_exceptions,
    internal::BoolFromGTestEnv("catch_exceptions", true),
    "True iff " GTEST_NAME_
    " should catch exceptions and treat them as test failures.");

GTEST_DEFINE_string_(
    color,
    internal::StringFromGTestEnv("color", "auto"),
    "Whether to use colors in the output.  Valid values: yes, no, "
    "and auto.  'auto' means to use colors if the output is "
    "being sent to a terminal and the TERM environment variable "
    "is set to xterm, xterm-color, xterm-256color, linux or cygwin.");

GTEST_DEFINE_string_(
    filter,
    internal::StringFromGTestEnv("filter", kUniversalFilter),
    "A colon-separated list of glob (not regex) patterns "
    "for filtering the tests to run, optionally followed by a "
    "'-' and a : separated list of negative patterns (tests to "
    "exclude).  A test is run if it matches one of the positive "
    "patterns and does not match any of the negative patterns.");

GTEST_DEFINE_bool_(list_tests, false,
                   "List all tests without running them.");

GTEST_DEFINE_string_(
    output,
    internal::StringFromGTestEnv("output", ""),
    "A format (currently must be \"xml\"), optionally followed "
    "by a colon and an output file name or directory. A directory "
    "is indicated by a trailing pathname separator. "
    "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
    "If a directory is specified, output files will be created "
    "within that directory, with file-names based on the test "
    "executable's name and, if necessary, made unique by adding "
    "digits.");

GTEST_DEFINE_bool_(
    print_time,
    internal::BoolFromGTestEnv("print_time", true),
    "True iff " GTEST_NAME_
    " should display elapsed time in text output.");

GTEST_DEFINE_int32_(
    random_seed,
    internal::Int32FromGTestEnv("random_seed", 0),
    "Random number seed to use when shuffling test orders.  Must be in range "
    "[1, 99999], or 0 to use a seed based on the current time.");

GTEST_DEFINE_int32_(
    repeat,
    internal::Int32FromGTestEnv("repeat", 1),
    "How many times to repeat each test.  Specify a negative number "
    "for repeating forever.  Useful for shaking out flaky tests.");

GTEST_DEFINE_bool_(
    show_internal_stack_frames, false,
    "True iff " GTEST_NAME_ " should include internal stack frames when "
    "printing test failure stack traces.");

GTEST_DEFINE_bool_(
    shuffle,
    internal::BoolFromGTestEnv("shuffle", false),
    "True iff " GTEST_NAME_
    " should randomize tests' order on every run.");

GTEST_DEFINE_int32_(
    stack_trace_depth,
    internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth),
    "The maximum number of stack frames to print when an "
    "assertion fails.  The valid range is 0 through 100, inclusive.");

GTEST_DEFINE_string_(
    stream_result_to,
    internal::StringFromGTestEnv("stream_result_to", ""),
    "This flag specifies the host name and the port number on which to stream "
    "test results. Example: \"localhost:555\". The flag is effective only on "
    "Linux.");

GTEST_DEFINE_bool_(
    throw_on_failure,
    internal::BoolFromGTestEnv("throw_on_failure", false),
    "When this flag is specified, a failed assertion will throw an exception "
    "if exceptions are enabled or exit the program with a non-zero code "
    "otherwise.");

namespace internal {

// Generates a random number from [0, range), using a Linear
// Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
// than kMaxRange.
UInt32 Random::Generate(UInt32 range) {
  // These constants are the same as are used in glibc's rand(3).
  state_ = (1103515245U*state_ + 12345U) % kMaxRange;

  GTEST_CHECK_(range > 0)
      << "Cannot generate a number in the range [0, 0).";
  GTEST_CHECK_(range <= kMaxRange)
      << "Generation of a number in [0, " << range << ") was requested, "
      << "but this can only generate numbers in [0, " << kMaxRange << ").";

  // Converting via modulus introduces a bit of downward bias, but
  // it's simple, and a linear congruential generator isn't too good
  // to begin with.
  return state_ % range;
}

// GTestIsInitialized() returns true iff the user has initialized
// Google Test.  Useful for catching the user mistake of not initializing
// Google Test before calling RUN_ALL_TESTS().
//
// A user must call testing::InitGoogleTest() to initialize Google
// Test.  g_init_gtest_count is set to the number of times
// InitGoogleTest() has been called.  We don't protect this variable
// under a mutex as it is only accessed in the main thread.
int g_init_gtest_count = 0;
static bool GTestIsInitialized() { return g_init_gtest_count != 0; }

// Iterates over a vector of TestCases, keeping a running sum of the
// results of calling a given int-returning method on each.
// Returns the sum.
static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
                               int (TestCase::*method)() const) {
  int sum = 0;
  for (size_t i = 0; i < case_list.size(); i++) {
    sum += (case_list[i]->*method)();
  }
  return sum;
}

// Returns true iff the test case passed.
static bool TestCasePassed(const TestCase* test_case) {
  return test_case->should_run() && test_case->Passed();
}

// Returns true iff the test case failed.
static bool TestCaseFailed(const TestCase* test_case) {
  return test_case->should_run() && test_case->Failed();
}

// Returns true iff test_case contains at least one test that should
// run.
static bool ShouldRunTestCase(const TestCase* test_case) {
  return test_case->should_run();
}

// AssertHelper constructor.
AssertHelper::AssertHelper(TestPartResult::Type type,
                           const char* file,
                           int line,
                           const char* message)
    : data_(new AssertHelperData(type, file, line, message)) {
}

AssertHelper::~AssertHelper() {
  delete data_;
}

// Message assignment, for assertion streaming support.
void AssertHelper::operator=(const Message& message) const {
  UnitTest::GetInstance()->
    AddTestPartResult(data_->type, data_->file, data_->line,
                      AppendUserMessage(data_->message, message),
                      UnitTest::GetInstance()->impl()
                      ->CurrentOsStackTraceExceptTop(1)
                      // Skips the stack frame for this function itself.
                      );  // NOLINT
}

// Mutex for linked pointers.
GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex);

// Application pathname gotten in InitGoogleTest.
String g_executable_path;

// Returns the current application's name, removing directory path if that
// is present.
FilePath GetCurrentExecutableName() {
  FilePath result;

#if GTEST_OS_WINDOWS
  result.Set(FilePath(g_executable_path).RemoveExtension("exe"));
#else
  result.Set(FilePath(g_executable_path));
#endif  // GTEST_OS_WINDOWS

  return result.RemoveDirectoryName();
}

// Functions for processing the gtest_output flag.

// Returns the output format, or "" for normal printed output.
String UnitTestOptions::GetOutputFormat() {
  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
  if (gtest_output_flag == NULL) return String("");

  const char* const colon = strchr(gtest_output_flag, ':');
  return (colon == NULL) ?
      String(gtest_output_flag) :
      String(gtest_output_flag, colon - gtest_output_flag);
}

// Returns the name of the requested output file, or the default if none
// was explicitly specified.
String UnitTestOptions::GetAbsolutePathToOutputFile() {
  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
  if (gtest_output_flag == NULL)
    return String("");

  const char* const colon = strchr(gtest_output_flag, ':');
  if (colon == NULL)
    return String(internal::FilePath::ConcatPaths(
               internal::FilePath(
                   UnitTest::GetInstance()->original_working_dir()),
               internal::FilePath(kDefaultOutputFile)).ToString() );

  internal::FilePath output_name(colon + 1);
  if (!output_name.IsAbsolutePath())
    // TODO(wan@google.com): on Windows \some\path is not an absolute
    // path (as its meaning depends on the current drive), yet the
    // following logic for turning it into an absolute path is wrong.
    // Fix it.
    output_name = internal::FilePath::ConcatPaths(
        internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
        internal::FilePath(colon + 1));

  if (!output_name.IsDirectory())
    return output_name.ToString();

  internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
      output_name, internal::GetCurrentExecutableName(),
      GetOutputFormat().c_str()));
  return result.ToString();
}

// Returns true iff the wildcard pattern matches the string.  The
// first ':' or '\0' character in pattern marks the end of it.
//
// This recursive algorithm isn't very efficient, but is clear and
// works well enough for matching test names, which are short.
bool UnitTestOptions::PatternMatchesString(const char *pattern,
                                           const char *str) {
  switch (*pattern) {
    case '\0':
    case ':':  // Either ':' or '\0' marks the end of the pattern.
      return *str == '\0';
    case '?':  // Matches any single character.
      return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);
    case '*':  // Matches any string (possibly empty) of characters.
      return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
          PatternMatchesString(pattern + 1, str);
    default:  // Non-special character.  Matches itself.
      return *pattern == *str &&
          PatternMatchesString(pattern + 1, str + 1);
  }
}

bool UnitTestOptions::MatchesFilter(const String& name, const char* filter) {
  const char *cur_pattern = filter;
  for (;;) {
    if (PatternMatchesString(cur_pattern, name.c_str())) {
      return true;
    }

    // Finds the next pattern in the filter.
    cur_pattern = strchr(cur_pattern, ':');

    // Returns if no more pattern can be found.
    if (cur_pattern == NULL) {
      return false;
    }

    // Skips the pattern separater (the ':' character).
    cur_pattern++;
  }
}

// TODO(keithray): move String function implementations to gtest-string.cc.

// Returns true iff the user-specified filter matches the test case
// name and the test name.
bool UnitTestOptions::FilterMatchesTest(const String &test_case_name,
                                        const String &test_name) {
  const String& full_name = String::Format("%s.%s",
                                           test_case_name.c_str(),
                                           test_name.c_str());

  // Split --gtest_filter at '-', if there is one, to separate into
  // positive filter and negative filter portions
  const char* const p = GTEST_FLAG(filter).c_str();
  const char* const dash = strchr(p, '-');
  String positive;
  String negative;
  if (dash == NULL) {
    positive = GTEST_FLAG(filter).c_str();  // Whole string is a positive filter
    negative = String("");
  } else {
    positive = String(p, dash - p);  // Everything up to the dash
    negative = String(dash+1);       // Everything after the dash
    if (positive.empty()) {
      // Treat '-test1' as the same as '*-test1'
      positive = kUniversalFilter;
    }
  }

  // A filter is a colon-separated list of patterns.  It matches a
  // test if any pattern in it matches the test.
  return (MatchesFilter(full_name, positive.c_str()) &&
          !MatchesFilter(full_name, negative.c_str()));
}

#if GTEST_HAS_SEH
// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
// This function is useful as an __except condition.
int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
  // Google Test should handle a SEH exception if:
  //   1. the user wants it to, AND
  //   2. this is not a breakpoint exception, AND
  //   3. this is not a C++ exception (VC++ implements them via SEH,
  //      apparently).
  //
  // SEH exception code for C++ exceptions.
  // (see http://support.microsoft.com/kb/185294 for more information).
  const DWORD kCxxExceptionCode = 0xe06d7363;

  bool should_handle = true;

  if (!GTEST_FLAG(catch_exceptions))
    should_handle = false;
  else if (exception_code == EXCEPTION_BREAKPOINT)
    should_handle = false;
  else if (exception_code == kCxxExceptionCode)
    should_handle = false;

  return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH;
}
#endif  // GTEST_HAS_SEH

}  // namespace internal

// The c'tor sets this object as the test part result reporter used by
// Google Test.  The 'result' parameter specifies where to report the
// results. Intercepts only failures from the current thread.
ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
    TestPartResultArray* result)
    : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD),
      result_(result) {
  Init();
}

// The c'tor sets this object as the test part result reporter used by
// Google Test.  The 'result' parameter specifies where to report the
// results.
ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
    InterceptMode intercept_mode, TestPartResultArray* result)
    : intercept_mode_(intercept_mode),
      result_(result) {
  Init();
}

void ScopedFakeTestPartResultReporter::Init() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
    old_reporter_ = impl->GetGlobalTestPartResultReporter();
    impl->SetGlobalTestPartResultReporter(this);
  } else {
    old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
    impl->SetTestPartResultReporterForCurrentThread(this);
  }
}

// The d'tor restores the test part result reporter used by Google Test
// before.
ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
    impl->SetGlobalTestPartResultReporter(old_reporter_);
  } else {
    impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
  }
}

// Increments the test part result count and remembers the result.
// This method is from the TestPartResultReporterInterface interface.
void ScopedFakeTestPartResultReporter::ReportTestPartResult(
    const TestPartResult& result) {
  result_->Append(result);
}

namespace internal {

// Returns the type ID of ::testing::Test.  We should always call this
// instead of GetTypeId< ::testing::Test>() to get the type ID of
// testing::Test.  This is to work around a suspected linker bug when
// using Google Test as a framework on Mac OS X.  The bug causes
// GetTypeId< ::testing::Test>() to return different values depending
// on whether the call is from the Google Test framework itself or
// from user test code.  GetTestTypeId() is guaranteed to always
// return the same value, as it always calls GetTypeId<>() from the
// gtest.cc, which is within the Google Test framework.
TypeId GetTestTypeId() {
  return GetTypeId<Test>();
}

// The value of GetTestTypeId() as seen from within the Google Test
// library.  This is solely for testing GetTestTypeId().
extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();

// This predicate-formatter checks that 'results' contains a test part
// failure of the given type and that the failure message contains the
// given substring.
AssertionResult HasOneFailure(const char* /* results_expr */,
                              const char* /* type_expr */,
                              const char* /* substr_expr */,
                              const TestPartResultArray& results,
                              TestPartResult::Type type,
                              const string& substr) {
  const String expected(type == TestPartResult::kFatalFailure ?
                        "1 fatal failure" :
                        "1 non-fatal failure");
  Message msg;
  if (results.size() != 1) {
    msg << "Expected: " << expected << "\n"
        << "  Actual: " << results.size() << " failures";
    for (int i = 0; i < results.size(); i++) {
      msg << "\n" << results.GetTestPartResult(i);
    }
    return AssertionFailure() << msg;
  }

  const TestPartResult& r = results.GetTestPartResult(0);
  if (r.type() != type) {
    return AssertionFailure() << "Expected: " << expected << "\n"
                              << "  Actual:\n"
                              << r;
  }

  if (strstr(r.message(), substr.c_str()) == NULL) {
    return AssertionFailure() << "Expected: " << expected << " containing \""
                              << substr << "\"\n"
                              << "  Actual:\n"
                              << r;
  }

  return AssertionSuccess();
}

// The constructor of SingleFailureChecker remembers where to look up
// test part results, what type of failure we expect, and what
// substring the failure message should contain.
SingleFailureChecker:: SingleFailureChecker(
    const TestPartResultArray* results,
    TestPartResult::Type type,
    const string& substr)
    : results_(results),
      type_(type),
      substr_(substr) {}

// The destructor of SingleFailureChecker verifies that the given
// TestPartResultArray contains exactly one failure that has the given
// type and contains the given substring.  If that's not the case, a
// non-fatal failure will be generated.
SingleFailureChecker::~SingleFailureChecker() {
  EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_);
}

DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
    UnitTestImpl* unit_test) : unit_test_(unit_test) {}

void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
    const TestPartResult& result) {
  unit_test_->current_test_result()->AddTestPartResult(result);
  unit_test_->listeners()->repeater()->OnTestPartResult(result);
}

DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
    UnitTestImpl* unit_test) : unit_test_(unit_test) {}

void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
    const TestPartResult& result) {
  unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
}

// Returns the global test part result reporter.
TestPartResultReporterInterface*
UnitTestImpl::GetGlobalTestPartResultReporter() {
  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
  return global_test_part_result_repoter_;
}

// Sets the global test part result reporter.
void UnitTestImpl::SetGlobalTestPartResultReporter(
    TestPartResultReporterInterface* reporter) {
  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
  global_test_part_result_repoter_ = reporter;
}

// Returns the test part result reporter for the current thread.
TestPartResultReporterInterface*
UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
  return per_thread_test_part_result_reporter_.get();
}

// Sets the test part result reporter for the current thread.
void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
    TestPartResultReporterInterface* reporter) {
  per_thread_test_part_result_reporter_.set(reporter);
}

// Gets the number of successful test cases.
int UnitTestImpl::successful_test_case_count() const {
  return CountIf(test_cases_, TestCasePassed);
}

// Gets the number of failed test cases.
int UnitTestImpl::failed_test_case_count() const {
  return CountIf(test_cases_, TestCaseFailed);
}

// Gets the number of all test cases.
int UnitTestImpl::total_test_case_count() const {
  return static_cast<int>(test_cases_.size());
}

// Gets the number of all test cases that contain at least one test
// that should run.
int UnitTestImpl::test_case_to_run_count() const {
  return CountIf(test_cases_, ShouldRunTestCase);
}

// Gets the number of successful tests.
int UnitTestImpl::successful_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count);
}

// Gets the number of failed tests.
int UnitTestImpl::failed_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count);
}

// Gets the number of disabled tests.
int UnitTestImpl::disabled_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count);
}

// Gets the number of all tests.
int UnitTestImpl::total_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::total_test_count);
}

// Gets the number of tests that should run.
int UnitTestImpl::test_to_run_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count);
}

// Returns the current OS stack trace as a String.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
String UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
  (void)skip_count;
  return String("");
}

// Returns the current time in milliseconds.
TimeInMillis GetTimeInMillis() {
#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
  // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
  // http://analogous.blogspot.com/2005/04/epoch.html
  const TimeInMillis kJavaEpochToWinFileTimeDelta =
    static_cast<TimeInMillis>(116444736UL) * 100000UL;
  const DWORD kTenthMicrosInMilliSecond = 10000;

  SYSTEMTIME now_systime;
  FILETIME now_filetime;
  ULARGE_INTEGER now_int64;
  // TODO(kenton@google.com): Shouldn't this just use
  //   GetSystemTimeAsFileTime()?
  GetSystemTime(&now_systime);
  if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
    now_int64.LowPart = now_filetime.dwLowDateTime;
    now_int64.HighPart = now_filetime.dwHighDateTime;
    now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
      kJavaEpochToWinFileTimeDelta;
    return now_int64.QuadPart;
  }
  return 0;
#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
  __timeb64 now;

# ifdef _MSC_VER

  // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
  // (deprecated function) there.
  // TODO(kenton@google.com): Use GetTickCount()?  Or use
  //   SystemTimeToFileTime()
#  pragma warning(push)          // Saves the current warning state.
#  pragma warning(disable:4996)  // Temporarily disables warning 4996.
  _ftime64(&now);
#  pragma warning(pop)           // Restores the warning state.
# else

  _ftime64(&now);

# endif  // _MSC_VER

  return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
#elif GTEST_HAS_GETTIMEOFDAY_
  struct timeval now;
  gettimeofday(&now, NULL);
  return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
#else
# error "Don't know how to get the current time on your system."
#endif
}

// Utilities

// class String

// Returns the input enclosed in double quotes if it's not NULL;
// otherwise returns "(null)".  For example, "\"Hello\"" is returned
// for input "Hello".
//
// This is useful for printing a C string in the syntax of a literal.
//
// Known issue: escape sequences are not handled yet.
String String::ShowCStringQuoted(const char* c_str) {
  return c_str ? String::Format("\"%s\"", c_str) : String("(null)");
}

// Copies at most length characters from str into a newly-allocated
// piece of memory of size length+1.  The memory is allocated with new[].
// A terminating null byte is written to the memory, and a pointer to it
// is returned.  If str is NULL, NULL is returned.
static char* CloneString(const char* str, size_t length) {
  if (str == NULL) {
    return NULL;
  } else {
    char* const clone = new char[length + 1];
    posix::StrNCpy(clone, str, length);
    clone[length] = '\0';
    return clone;
  }
}

// Clones a 0-terminated C string, allocating memory using new.  The
// caller is responsible for deleting[] the return value.  Returns the
// cloned string, or NULL if the input is NULL.
const char * String::CloneCString(const char* c_str) {
  return (c_str == NULL) ?
                    NULL : CloneString(c_str, strlen(c_str));
}

#if GTEST_OS_WINDOWS_MOBILE
// Creates a UTF-16 wide string from the given ANSI string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the wide string, or NULL if the
// input is NULL.
LPCWSTR String::AnsiToUtf16(const char* ansi) {
  if (!ansi) return NULL;
  const int length = strlen(ansi);
  const int unicode_length =
      MultiByteToWideChar(CP_ACP, 0, ansi, length,
                          NULL, 0);
  WCHAR* unicode = new WCHAR[unicode_length + 1];
  MultiByteToWideChar(CP_ACP, 0, ansi, length,
                      unicode, unicode_length);
  unicode[unicode_length] = 0;
  return unicode;
}

// Creates an ANSI string from the given wide string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the ANSI string, or NULL if the
// input is NULL.
const char* String::Utf16ToAnsi(LPCWSTR utf16_str)  {
  if (!utf16_str) return NULL;
  const int ansi_length =
      WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
                          NULL, 0, NULL, NULL);
  char* ansi = new char[ansi_length + 1];
  WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
                      ansi, ansi_length, NULL, NULL);
  ansi[ansi_length] = 0;
  return ansi;
}

#endif  // GTEST_OS_WINDOWS_MOBILE

// Compares two C strings.  Returns true iff they have the same content.
//
// Unlike strcmp(), this function can handle NULL argument(s).  A NULL
// C string is considered different to any non-NULL C string,
// including the empty string.
bool String::CStringEquals(const char * lhs, const char * rhs) {
  if ( lhs == NULL ) return rhs == NULL;

  if ( rhs == NULL ) return false;

  return strcmp(lhs, rhs) == 0;
}

#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING

// Converts an array of wide chars to a narrow string using the UTF-8
// encoding, and streams the result to the given Message object.
static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
                                     Message* msg) {
  // TODO(wan): consider allowing a testing::String object to
  // contain '\0'.  This will make it behave more like std::string,
  // and will allow ToUtf8String() to return the correct encoding
  // for '\0' s.t. we can get rid of the conditional here (and in
  // several other places).
  for (size_t i = 0; i != length; ) {  // NOLINT
    if (wstr[i] != L'\0') {
      *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
      while (i != length && wstr[i] != L'\0')
        i++;
    } else {
      *msg << '\0';
      i++;
    }
  }
}

#endif  // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING

}  // namespace internal

#if GTEST_HAS_STD_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& Message::operator <<(const ::std::wstring& wstr) {
  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
  return *this;
}
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_GLOBAL_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& Message::operator <<(const ::wstring& wstr) {
  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
  return *this;
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

// AssertionResult constructors.
// Used in EXPECT_TRUE/FALSE(assertion_result).
AssertionResult::AssertionResult(const AssertionResult& other)
    : success_(other.success_),
      message_(other.message_.get() != NULL ?
               new ::std::string(*other.message_) :
               static_cast< ::std::string*>(NULL)) {
}

// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
AssertionResult AssertionResult::operator!() const {
  AssertionResult negation(!success_);
  if (message_.get() != NULL)
    negation << *message_;
  return negation;
}

// Makes a successful assertion result.
AssertionResult AssertionSuccess() {
  return AssertionResult(true);
}

// Makes a failed assertion result.
AssertionResult AssertionFailure() {
  return AssertionResult(false);
}

// Makes a failed assertion result with the given failure message.
// Deprecated; use AssertionFailure() << message.
AssertionResult AssertionFailure(const Message& message) {
  return AssertionFailure() << message;
}

namespace internal {

// Constructs and returns the message for an equality assertion
// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
//
// The first four parameters are the expressions used in the assertion
// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
// where foo is 5 and bar is 6, we have:
//
//   expected_expression: "foo"
//   actual_expression:   "bar"
//   expected_value:      "5"
//   actual_value:        "6"
//
// The ignoring_case parameter is true iff the assertion is a
// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
// be inserted into the message.
AssertionResult EqFailure(const char* expected_expression,
                          const char* actual_expression,
                          const String& expected_value,
                          const String& actual_value,
                          bool ignoring_case) {
  Message msg;
  msg << "Value of: " << actual_expression;
  if (actual_value != actual_expression) {
    msg << "\n  Actual: " << actual_value;
  }

  msg << "\nExpected: " << expected_expression;
  if (ignoring_case) {
    msg << " (ignoring case)";
  }
  if (expected_value != expected_expression) {
    msg << "\nWhich is: " << expected_value;
  }

  return AssertionFailure() << msg;
}

// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
String GetBoolAssertionFailureMessage(const AssertionResult& assertion_result,
                                      const char* expression_text,
                                      const char* actual_predicate_value,
                                      const char* expected_predicate_value) {
  const char* actual_message = assertion_result.message();
  Message msg;
  msg << "Value of: " << expression_text
      << "\n  Actual: " << actual_predicate_value;
  if (actual_message[0] != '\0')
    msg << " (" << actual_message << ")";
  msg << "\nExpected: " << expected_predicate_value;
  return msg.GetString();
}

// Helper function for implementing ASSERT_NEAR.
AssertionResult DoubleNearPredFormat(const char* expr1,
                                     const char* expr2,
                                     const char* abs_error_expr,
                                     double val1,
                                     double val2,
                                     double abs_error) {
  const double diff = fabs(val1 - val2);
  if (diff <= abs_error) return AssertionSuccess();

  // TODO(wan): do not print the value of an expression if it's
  // already a literal.
  return AssertionFailure()
      << "The difference between " << expr1 << " and " << expr2
      << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
      << expr1 << " evaluates to " << val1 << ",\n"
      << expr2 << " evaluates to " << val2 << ", and\n"
      << abs_error_expr << " evaluates to " << abs_error << ".";
}


// Helper template for implementing FloatLE() and DoubleLE().
template <typename RawType>
AssertionResult FloatingPointLE(const char* expr1,
                                const char* expr2,
                                RawType val1,
                                RawType val2) {
  // Returns success if val1 is less than val2,
  if (val1 < val2) {
    return AssertionSuccess();
  }

  // or if val1 is almost equal to val2.
  const FloatingPoint<RawType> lhs(val1), rhs(val2);
  if (lhs.AlmostEquals(rhs)) {
    return AssertionSuccess();
  }

  // Note that the above two checks will both fail if either val1 or
  // val2 is NaN, as the IEEE floating-point standard requires that
  // any predicate involving a NaN must return false.

  ::std::stringstream val1_ss;
  val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
          << val1;

  ::std::stringstream val2_ss;
  val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
          << val2;

  return AssertionFailure()
      << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
      << "  Actual: " << StringStreamToString(&val1_ss) << " vs "
      << StringStreamToString(&val2_ss);
}

}  // namespace internal

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
AssertionResult FloatLE(const char* expr1, const char* expr2,
                        float val1, float val2) {
  return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
}

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
AssertionResult DoubleLE(const char* expr1, const char* expr2,
                         double val1, double val2) {
  return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
}

namespace internal {

// The helper function for {ASSERT|EXPECT}_EQ with int or enum
// arguments.
AssertionResult CmpHelperEQ(const char* expected_expression,
                            const char* actual_expression,
                            BiggestInt expected,
                            BiggestInt actual) {
  if (expected == actual) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   FormatForComparisonFailureMessage(expected, actual),
                   FormatForComparisonFailureMessage(actual, expected),
                   false);
}

// A macro for implementing the helper functions needed to implement
// ASSERT_?? and EXPECT_?? with integer or enum arguments.  It is here
// just to avoid copy-and-paste of similar code.
#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
                                   BiggestInt val1, BiggestInt val2) {\
  if (val1 op val2) {\
    return AssertionSuccess();\
  } else {\
    return AssertionFailure() \
        << "Expected: (" << expr1 << ") " #op " (" << expr2\
        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
  }\
}

// Implements the helper function for {ASSERT|EXPECT}_NE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(NE, !=)
// Implements the helper function for {ASSERT|EXPECT}_LE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(LE, <=)
// Implements the helper function for {ASSERT|EXPECT}_LT with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(LT, < )
// Implements the helper function for {ASSERT|EXPECT}_GE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(GE, >=)
// Implements the helper function for {ASSERT|EXPECT}_GT with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(GT, > )

#undef GTEST_IMPL_CMP_HELPER_

// The helper function for {ASSERT|EXPECT}_STREQ.
AssertionResult CmpHelperSTREQ(const char* expected_expression,
                               const char* actual_expression,
                               const char* expected,
                               const char* actual) {
  if (String::CStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   String::ShowCStringQuoted(expected),
                   String::ShowCStringQuoted(actual),
                   false);
}

// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
                                   const char* actual_expression,
                                   const char* expected,
                                   const char* actual) {
  if (String::CaseInsensitiveCStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   String::ShowCStringQuoted(expected),
                   String::ShowCStringQuoted(actual),
                   true);
}

// The helper function for {ASSERT|EXPECT}_STRNE.
AssertionResult CmpHelperSTRNE(const char* s1_expression,
                               const char* s2_expression,
                               const char* s1,
                               const char* s2) {
  if (!String::CStringEquals(s1, s2)) {
    return AssertionSuccess();
  } else {
    return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
                              << s2_expression << "), actual: \""
                              << s1 << "\" vs \"" << s2 << "\"";
  }
}

// The helper function for {ASSERT|EXPECT}_STRCASENE.
AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
                                   const char* s2_expression,
                                   const char* s1,
                                   const char* s2) {
  if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
    return AssertionSuccess();
  } else {
    return AssertionFailure()
        << "Expected: (" << s1_expression << ") != ("
        << s2_expression << ") (ignoring case), actual: \""
        << s1 << "\" vs \"" << s2 << "\"";
  }
}

}  // namespace internal

namespace {

// Helper functions for implementing IsSubString() and IsNotSubstring().

// This group of overloaded functions return true iff needle is a
// substring of haystack.  NULL is considered a substring of itself
// only.

bool IsSubstringPred(const char* needle, const char* haystack) {
  if (needle == NULL || haystack == NULL)
    return needle == haystack;

  return strstr(haystack, needle) != NULL;
}

bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
  if (needle == NULL || haystack == NULL)
    return needle == haystack;

  return wcsstr(haystack, needle) != NULL;
}

// StringType here can be either ::std::string or ::std::wstring.
template <typename StringType>
bool IsSubstringPred(const StringType& needle,
                     const StringType& haystack) {
  return haystack.find(needle) != StringType::npos;
}

// This function implements either IsSubstring() or IsNotSubstring(),
// depending on the value of the expected_to_be_substring parameter.
// StringType here can be const char*, const wchar_t*, ::std::string,
// or ::std::wstring.
template <typename StringType>
AssertionResult IsSubstringImpl(
    bool expected_to_be_substring,
    const char* needle_expr, const char* haystack_expr,
    const StringType& needle, const StringType& haystack) {
  if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
    return AssertionSuccess();

  const bool is_wide_string = sizeof(needle[0]) > 1;
  const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
  return AssertionFailure()
      << "Value of: " << needle_expr << "\n"
      << "  Actual: " << begin_string_quote << needle << "\"\n"
      << "Expected: " << (expected_to_be_substring ? "" : "not ")
      << "a substring of " << haystack_expr << "\n"
      << "Which is: " << begin_string_quote << haystack << "\"";
}

}  // namespace

// IsSubstring() and IsNotSubstring() check whether needle is a
// substring of haystack (NULL is considered a substring of itself
// only), and return an appropriate error message when they fail.

AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const char* needle, const char* haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const wchar_t* needle, const wchar_t* haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::string& needle, const ::std::string& haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

#if GTEST_HAS_STD_WSTRING
AssertionResult IsSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
    const char* needle_expr, const char* haystack_expr,
    const ::std::wstring& needle, const ::std::wstring& haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}
#endif  // GTEST_HAS_STD_WSTRING

namespace internal {

#if GTEST_OS_WINDOWS

namespace {

// Helper function for IsHRESULT{SuccessFailure} predicates
AssertionResult HRESULTFailureHelper(const char* expr,
                                     const char* expected,
                                     long hr) {  // NOLINT
# if GTEST_OS_WINDOWS_MOBILE

  // Windows CE doesn't support FormatMessage.
  const char error_text[] = "";

# else

  // Looks up the human-readable system message for the HRESULT code
  // and since we're not passing any params to FormatMessage, we don't
  // want inserts expanded.
  const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM |
                       FORMAT_MESSAGE_IGNORE_INSERTS;
  const DWORD kBufSize = 4096;  // String::Format can't exceed this length.
  // Gets the system's human readable message string for this HRESULT.
  char error_text[kBufSize] = { '\0' };
  DWORD message_length = ::FormatMessageA(kFlags,
                                          0,  // no source, we're asking system
                                          hr,  // the error
                                          0,  // no line width restrictions
                                          error_text,  // output buffer
                                          kBufSize,  // buf size
                                          NULL);  // no arguments for inserts
  // Trims tailing white space (FormatMessage leaves a trailing cr-lf)
  for (; message_length && IsSpace(error_text[message_length - 1]);
          --message_length) {
    error_text[message_length - 1] = '\0';
  }

# endif  // GTEST_OS_WINDOWS_MOBILE

  const String error_hex(String::Format("0x%08X ", hr));
  return ::testing::AssertionFailure()
      << "Expected: " << expr << " " << expected << ".\n"
      << "  Actual: " << error_hex << error_text << "\n";
}

}  // namespace

AssertionResult IsHRESULTSuccess(const char* expr, long hr) {  // NOLINT
  if (SUCCEEDED(hr)) {
    return AssertionSuccess();
  }
  return HRESULTFailureHelper(expr, "succeeds", hr);
}

AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
  if (FAILED(hr)) {
    return AssertionSuccess();
  }
  return HRESULTFailureHelper(expr, "fails", hr);
}

#endif  // GTEST_OS_WINDOWS

// Utility functions for encoding Unicode text (wide strings) in
// UTF-8.

// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
// like this:
//
// Code-point length   Encoding
//   0 -  7 bits       0xxxxxxx
//   8 - 11 bits       110xxxxx 10xxxxxx
//  12 - 16 bits       1110xxxx 10xxxxxx 10xxxxxx
//  17 - 21 bits       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

// The maximum code-point a one-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) <<  7) - 1;

// The maximum code-point a two-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;

// The maximum code-point a three-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1;

// The maximum code-point a four-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1;

// Chops off the n lowest bits from a bit pattern.  Returns the n
// lowest bits.  As a side effect, the original bit pattern will be
// shifted to the right by n bits.
inline UInt32 ChopLowBits(UInt32* bits, int n) {
  const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
  *bits >>= n;
  return low_bits;
}

// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// The output buffer str must containt at least 32 characters.
// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
// as '(Invalid Unicode 0xXXXXXXXX)'.
char* CodePointToUtf8(UInt32 code_point, char* str) {
  if (code_point <= kMaxCodePoint1) {
    str[1] = '\0';
    str[0] = static_cast<char>(code_point);                          // 0xxxxxxx
  } else if (code_point <= kMaxCodePoint2) {
    str[2] = '\0';
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xC0 | code_point);                   // 110xxxxx
  } else if (code_point <= kMaxCodePoint3) {
    str[3] = '\0';
    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xE0 | code_point);                   // 1110xxxx
  } else if (code_point <= kMaxCodePoint4) {
    str[4] = '\0';
    str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xF0 | code_point);                   // 11110xxx
  } else {
    // The longest string String::Format can produce when invoked
    // with these parameters is 28 character long (not including
    // the terminating nul character). We are asking for 32 character
    // buffer just in case. This is also enough for strncpy to
    // null-terminate the destination string.
    posix::StrNCpy(
        str, String::Format("(Invalid Unicode 0x%X)", code_point).c_str(), 32);
    str[31] = '\0';  // Makes sure no change in the format to strncpy leaves
                     // the result unterminated.
  }
  return str;
}

// The following two functions only make sense if the the system
// uses UTF-16 for wide string encoding. All supported systems
// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.

// Determines if the arguments constitute UTF-16 surrogate pair
// and thus should be combined into a single Unicode code point
// using CreateCodePointFromUtf16SurrogatePair.
inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
  return sizeof(wchar_t) == 2 &&
      (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00;
}

// Creates a Unicode code point from UTF16 surrogate pair.
inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
                                                    wchar_t second) {
  const UInt32 mask = (1 << 10) - 1;
  return (sizeof(wchar_t) == 2) ?
      (((first & mask) << 10) | (second & mask)) + 0x10000 :
      // This function should not be called when the condition is
      // false, but we provide a sensible default in case it is.
      static_cast<UInt32>(first);
}

// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
String WideStringToUtf8(const wchar_t* str, int num_chars) {
  if (num_chars == -1)
    num_chars = static_cast<int>(wcslen(str));

  ::std::stringstream stream;
  for (int i = 0; i < num_chars; ++i) {
    UInt32 unicode_code_point;

    if (str[i] == L'\0') {
      break;
    } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
      unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i],
                                                                 str[i + 1]);
      i++;
    } else {
      unicode_code_point = static_cast<UInt32>(str[i]);
    }

    char buffer[32];  // CodePointToUtf8 requires a buffer this big.
    stream << CodePointToUtf8(unicode_code_point, buffer);
  }
  return StringStreamToString(&stream);
}

// Converts a wide C string to a String using the UTF-8 encoding.
// NULL will be converted to "(null)".
String String::ShowWideCString(const wchar_t * wide_c_str) {
  if (wide_c_str == NULL) return String("(null)");

  return String(internal::WideStringToUtf8(wide_c_str, -1).c_str());
}

// Similar to ShowWideCString(), except that this function encloses
// the converted string in double quotes.
String String::ShowWideCStringQuoted(const wchar_t* wide_c_str) {
  if (wide_c_str == NULL) return String("(null)");

  return String::Format("L\"%s\"",
                        String::ShowWideCString(wide_c_str).c_str());
}

// Compares two wide C strings.  Returns true iff they have the same
// content.
//
// Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
// C string is considered different to any non-NULL C string,
// including the empty string.
bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) {
  if (lhs == NULL) return rhs == NULL;

  if (rhs == NULL) return false;

  return wcscmp(lhs, rhs) == 0;
}

// Helper function for *_STREQ on wide strings.
AssertionResult CmpHelperSTREQ(const char* expected_expression,
                               const char* actual_expression,
                               const wchar_t* expected,
                               const wchar_t* actual) {
  if (String::WideCStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   String::ShowWideCStringQuoted(expected),
                   String::ShowWideCStringQuoted(actual),
                   false);
}

// Helper function for *_STRNE on wide strings.
AssertionResult CmpHelperSTRNE(const char* s1_expression,
                               const char* s2_expression,
                               const wchar_t* s1,
                               const wchar_t* s2) {
  if (!String::WideCStringEquals(s1, s2)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
                            << s2_expression << "), actual: "
                            << String::ShowWideCStringQuoted(s1)
                            << " vs " << String::ShowWideCStringQuoted(s2);
}

// Compares two C strings, ignoring case.  Returns true iff they have
// the same content.
//
// Unlike strcasecmp(), this function can handle NULL argument(s).  A
// NULL C string is considered different to any non-NULL C string,
// including the empty string.
bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) {
  if (lhs == NULL)
    return rhs == NULL;
  if (rhs == NULL)
    return false;
  return posix::StrCaseCmp(lhs, rhs) == 0;
}

  // Compares two wide C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike wcscasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL wide C string,
  // including the empty string.
  // NB: The implementations on different platforms slightly differ.
  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
  // environment variable. On GNU platform this method uses wcscasecmp
  // which compares according to LC_CTYPE category of the current locale.
  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
  // current locale.
bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
                                              const wchar_t* rhs) {
  if (lhs == NULL) return rhs == NULL;

  if (rhs == NULL) return false;

#if GTEST_OS_WINDOWS
  return _wcsicmp(lhs, rhs) == 0;
#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID
  return wcscasecmp(lhs, rhs) == 0;
#else
  // Android, Mac OS X and Cygwin don't define wcscasecmp.
  // Other unknown OSes may not define it either.
  wint_t left, right;
  do {
    left = towlower(*lhs++);
    right = towlower(*rhs++);
  } while (left && left == right);
  return left == right;
#endif  // OS selector
}

// Compares this with another String.
// Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0
// if this is greater than rhs.
int String::Compare(const String & rhs) const {
  const char* const lhs_c_str = c_str();
  const char* const rhs_c_str = rhs.c_str();

  if (lhs_c_str == NULL) {
    return rhs_c_str == NULL ? 0 : -1;  // NULL < anything except NULL
  } else if (rhs_c_str == NULL) {
    return 1;
  }

  const size_t shorter_str_len =
      length() <= rhs.length() ? length() : rhs.length();
  for (size_t i = 0; i != shorter_str_len; i++) {
    if (lhs_c_str[i] < rhs_c_str[i]) {
      return -1;
    } else if (lhs_c_str[i] > rhs_c_str[i]) {
      return 1;
    }
  }
  return (length() < rhs.length()) ? -1 :
      (length() > rhs.length()) ? 1 : 0;
}

// Returns true iff this String ends with the given suffix.  *Any*
// String is considered to end with a NULL or empty suffix.
bool String::EndsWith(const char* suffix) const {
  if (suffix == NULL || CStringEquals(suffix, "")) return true;

  if (c_str() == NULL) return false;

  const size_t this_len = strlen(c_str());
  const size_t suffix_len = strlen(suffix);
  return (this_len >= suffix_len) &&
         CStringEquals(c_str() + this_len - suffix_len, suffix);
}

// Returns true iff this String ends with the given suffix, ignoring case.
// Any String is considered to end with a NULL or empty suffix.
bool String::EndsWithCaseInsensitive(const char* suffix) const {
  if (suffix == NULL || CStringEquals(suffix, "")) return true;

  if (c_str() == NULL) return false;

  const size_t this_len = strlen(c_str());
  const size_t suffix_len = strlen(suffix);
  return (this_len >= suffix_len) &&
         CaseInsensitiveCStringEquals(c_str() + this_len - suffix_len, suffix);
}

// Formats a list of arguments to a String, using the same format
// spec string as for printf.
//
// We do not use the StringPrintf class as it is not universally
// available.
//
// The result is limited to 4096 characters (including the tailing 0).
// If 4096 characters are not enough to format the input, or if
// there's an error, "<formatting error or buffer exceeded>" is
// returned.
String String::Format(const char * format, ...) {
  va_list args;
  va_start(args, format);

  char buffer[4096];
  const int kBufferSize = sizeof(buffer)/sizeof(buffer[0]);

  // MSVC 8 deprecates vsnprintf(), so we want to suppress warning
  // 4996 (deprecated function) there.
#ifdef _MSC_VER  // We are using MSVC.
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4996)  // Temporarily disables warning 4996.

  const int size = vsnprintf(buffer, kBufferSize, format, args);

# pragma warning(pop)           // Restores the warning state.
#else  // We are not using MSVC.
  const int size = vsnprintf(buffer, kBufferSize, format, args);
#endif  // _MSC_VER
  va_end(args);

  // vsnprintf()'s behavior is not portable.  When the buffer is not
  // big enough, it returns a negative value in MSVC, and returns the
  // needed buffer size on Linux.  When there is an output error, it
  // always returns a negative value.  For simplicity, we lump the two
  // error cases together.
  if (size < 0 || size >= kBufferSize) {
    return String("<formatting error or buffer exceeded>");
  } else {
    return String(buffer, size);
  }
}

// Converts the buffer in a stringstream to a String, converting NUL
// bytes to "\\0" along the way.
String StringStreamToString(::std::stringstream* ss) {
  const ::std::string& str = ss->str();
  const char* const start = str.c_str();
  const char* const end = start + str.length();

  // We need to use a helper stringstream to do this transformation
  // because String doesn't support push_back().
  ::std::stringstream helper;
  for (const char* ch = start; ch != end; ++ch) {
    if (*ch == '\0') {
      helper << "\\0";  // Replaces NUL with "\\0";
    } else {
      helper.put(*ch);
    }
  }

  return String(helper.str().c_str());
}

// Appends the user-supplied message to the Google-Test-generated message.
String AppendUserMessage(const String& gtest_msg,
                         const Message& user_msg) {
  // Appends the user message if it's non-empty.
  const String user_msg_string = user_msg.GetString();
  if (user_msg_string.empty()) {
    return gtest_msg;
  }

  Message msg;
  msg << gtest_msg << "\n" << user_msg_string;

  return msg.GetString();
}

}  // namespace internal

// class TestResult

// Creates an empty TestResult.
TestResult::TestResult()
    : death_test_count_(0),
      elapsed_time_(0) {
}

// D'tor.
TestResult::~TestResult() {
}

// Returns the i-th test part result among all the results. i can
// range from 0 to total_part_count() - 1. If i is not in that range,
// aborts the program.
const TestPartResult& TestResult::GetTestPartResult(int i) const {
  if (i < 0 || i >= total_part_count())
    internal::posix::Abort();
  return test_part_results_.at(i);
}

// Returns the i-th test property. i can range from 0 to
// test_property_count() - 1. If i is not in that range, aborts the
// program.
const TestProperty& TestResult::GetTestProperty(int i) const {
  if (i < 0 || i >= test_property_count())
    internal::posix::Abort();
  return test_properties_.at(i);
}

// Clears the test part results.
void TestResult::ClearTestPartResults() {
  test_part_results_.clear();
}

// Adds a test part result to the list.
void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
  test_part_results_.push_back(test_part_result);
}

// Adds a test property to the list. If a property with the same key as the
// supplied property is already represented, the value of this test_property
// replaces the old value for that key.
void TestResult::RecordProperty(const TestProperty& test_property) {
  if (!ValidateTestProperty(test_property)) {
    return;
  }
  internal::MutexLock lock(&test_properites_mutex_);
  const std::vector<TestProperty>::iterator property_with_matching_key =
      std::find_if(test_properties_.begin(), test_properties_.end(),
                   internal::TestPropertyKeyIs(test_property.key()));
  if (property_with_matching_key == test_properties_.end()) {
    test_properties_.push_back(test_property);
    return;
  }
  property_with_matching_key->SetValue(test_property.value());
}

// Adds a failure if the key is a reserved attribute of Google Test
// testcase tags.  Returns true if the property is valid.
bool TestResult::ValidateTestProperty(const TestProperty& test_property) {
  internal::String key(test_property.key());
  if (key == "name" || key == "status" || key == "time" || key == "classname") {
    ADD_FAILURE()
        << "Reserved key used in RecordProperty(): "
        << key
        << " ('name', 'status', 'time', and 'classname' are reserved by "
        << GTEST_NAME_ << ")";
    return false;
  }
  return true;
}

// Clears the object.
void TestResult::Clear() {
  test_part_results_.clear();
  test_properties_.clear();
  death_test_count_ = 0;
  elapsed_time_ = 0;
}

// Returns true iff the test failed.
bool TestResult::Failed() const {
  for (int i = 0; i < total_part_count(); ++i) {
    if (GetTestPartResult(i).failed())
      return true;
  }
  return false;
}

// Returns true iff the test part fatally failed.
static bool TestPartFatallyFailed(const TestPartResult& result) {
  return result.fatally_failed();
}

// Returns true iff the test fatally failed.
bool TestResult::HasFatalFailure() const {
  return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
}

// Returns true iff the test part non-fatally failed.
static bool TestPartNonfatallyFailed(const TestPartResult& result) {
  return result.nonfatally_failed();
}

// Returns true iff the test has a non-fatal failure.
bool TestResult::HasNonfatalFailure() const {
  return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
}

// Gets the number of all test parts.  This is the sum of the number
// of successful test parts and the number of failed test parts.
int TestResult::total_part_count() const {
  return static_cast<int>(test_part_results_.size());
}

// Returns the number of the test properties.
int TestResult::test_property_count() const {
  return static_cast<int>(test_properties_.size());
}

// class Test

// Creates a Test object.

// The c'tor saves the values of all Google Test flags.
Test::Test()
    : gtest_flag_saver_(new internal::GTestFlagSaver) {
}

// The d'tor restores the values of all Google Test flags.
Test::~Test() {
  delete gtest_flag_saver_;
}

// Sets up the test fixture.
//
// A sub-class may override this.
void Test::SetUp() {
}

// Tears down the test fixture.
//
// A sub-class may override this.
void Test::TearDown() {
}

// Allows user supplied key value pairs to be recorded for later output.
void Test::RecordProperty(const char* key, const char* value) {
  UnitTest::GetInstance()->RecordPropertyForCurrentTest(key, value);
}

// Allows user supplied key value pairs to be recorded for later output.
void Test::RecordProperty(const char* key, int value) {
  Message value_message;
  value_message << value;
  RecordProperty(key, value_message.GetString().c_str());
}

namespace internal {

void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                    const String& message) {
  // This function is a friend of UnitTest and as such has access to
  // AddTestPartResult.
  UnitTest::GetInstance()->AddTestPartResult(
      result_type,
      NULL,  // No info about the source file where the exception occurred.
      -1,    // We have no info on which line caused the exception.
      message,
      String());  // No stack trace, either.
}

}  // namespace internal

// Google Test requires all tests in the same test case to use the same test
// fixture class.  This function checks if the current test has the
// same fixture class as the first test in the current test case.  If
// yes, it returns true; otherwise it generates a Google Test failure and
// returns false.
bool Test::HasSameFixtureClass() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  const TestCase* const test_case = impl->current_test_case();

  // Info about the first test in the current test case.
  const TestInfo* const first_test_info = test_case->test_info_list()[0];
  const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
  const char* const first_test_name = first_test_info->name();

  // Info about the current test.
  const TestInfo* const this_test_info = impl->current_test_info();
  const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
  const char* const this_test_name = this_test_info->name();

  if (this_fixture_id != first_fixture_id) {
    // Is the first test defined using TEST?
    const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
    // Is this test defined using TEST?
    const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();

    if (first_is_TEST || this_is_TEST) {
      // The user mixed TEST and TEST_F in this test case - we'll tell
      // him/her how to fix it.

      // Gets the name of the TEST and the name of the TEST_F.  Note
      // that first_is_TEST and this_is_TEST cannot both be true, as
      // the fixture IDs are different for the two tests.
      const char* const TEST_name =
          first_is_TEST ? first_test_name : this_test_name;
      const char* const TEST_F_name =
          first_is_TEST ? this_test_name : first_test_name;

      ADD_FAILURE()
          << "All tests in the same test case must use the same test fixture\n"
          << "class, so mixing TEST_F and TEST in the same test case is\n"
          << "illegal.  In test case " << this_test_info->test_case_name()
          << ",\n"
          << "test " << TEST_F_name << " is defined using TEST_F but\n"
          << "test " << TEST_name << " is defined using TEST.  You probably\n"
          << "want to change the TEST to TEST_F or move it to another test\n"
          << "case.";
    } else {
      // The user defined two fixture classes with the same name in
      // two namespaces - we'll tell him/her how to fix it.
      ADD_FAILURE()
          << "All tests in the same test case must use the same test fixture\n"
          << "class.  However, in test case "
          << this_test_info->test_case_name() << ",\n"
          << "you defined test " << first_test_name
          << " and test " << this_test_name << "\n"
          << "using two different test fixture classes.  This can happen if\n"
          << "the two classes are from different namespaces or translation\n"
          << "units and have the same name.  You should probably rename one\n"
          << "of the classes to put the tests into different test cases.";
    }
    return false;
  }

  return true;
}

#if GTEST_HAS_SEH

// Adds an "exception thrown" fatal failure to the current test.  This
// function returns its result via an output parameter pointer because VC++
// prohibits creation of objects with destructors on stack in functions
// using __try (see error C2712).
static internal::String* FormatSehExceptionMessage(DWORD exception_code,
                                                   const char* location) {
  Message message;
  message << "SEH exception with code 0x" << std::setbase(16) <<
    exception_code << std::setbase(10) << " thrown in " << location << ".";

  return new internal::String(message.GetString());
}

#endif  // GTEST_HAS_SEH

#if GTEST_HAS_EXCEPTIONS

// Adds an "exception thrown" fatal failure to the current test.
static internal::String FormatCxxExceptionMessage(const char* description,
                                                  const char* location) {
  Message message;
  if (description != NULL) {
    message << "C++ exception with description \"" << description << "\"";
  } else {
    message << "Unknown C++ exception";
  }
  message << " thrown in " << location << ".";

  return message.GetString();
}

static internal::String PrintTestPartResultToString(
    const TestPartResult& test_part_result);

// A failed Google Test assertion will throw an exception of this type when
// GTEST_FLAG(throw_on_failure) is true (if exceptions are enabled).  We
// derive it from std::runtime_error, which is for errors presumably
// detectable only at run time.  Since std::runtime_error inherits from
// std::exception, many testing frameworks know how to extract and print the
// message inside it.
class GoogleTestFailureException : public ::std::runtime_error {
 public:
  explicit GoogleTestFailureException(const TestPartResult& failure)
      : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
};
#endif  // GTEST_HAS_EXCEPTIONS

namespace internal {
// We put these helper functions in the internal namespace as IBM's xlC
// compiler rejects the code if they were declared static.

// Runs the given method and handles SEH exceptions it throws, when
// SEH is supported; returns the 0-value for type Result in case of an
// SEH exception.  (Microsoft compilers cannot handle SEH and C++
// exceptions in the same function.  Therefore, we provide a separate
// wrapper function for handling SEH exceptions.)
template <class T, typename Result>
Result HandleSehExceptionsInMethodIfSupported(
    T* object, Result (T::*method)(), const char* location) {
#if GTEST_HAS_SEH
  __try {
    return (object->*method)();
  } __except (internal::UnitTestOptions::GTestShouldProcessSEH(  // NOLINT
      GetExceptionCode())) {
    // We create the exception message on the heap because VC++ prohibits
    // creation of objects with destructors on stack in functions using __try
    // (see error C2712).
    internal::String* exception_message = FormatSehExceptionMessage(
        GetExceptionCode(), location);
    internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
                                             *exception_message);
    delete exception_message;
    return static_cast<Result>(0);
  }
#else
  (void)location;
  return (object->*method)();
#endif  // GTEST_HAS_SEH
}

// Runs the given method and catches and reports C++ and/or SEH-style
// exceptions, if they are supported; returns the 0-value for type
// Result in case of an SEH exception.
template <class T, typename Result>
Result HandleExceptionsInMethodIfSupported(
    T* object, Result (T::*method)(), const char* location) {
  // NOTE: The user code can affect the way in which Google Test handles
  // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
  // RUN_ALL_TESTS() starts. It is technically possible to check the flag
  // after the exception is caught and either report or re-throw the
  // exception based on the flag's value:
  //
  // try {
  //   // Perform the test method.
  // } catch (...) {
  //   if (GTEST_FLAG(catch_exceptions))
  //     // Report the exception as failure.
  //   else
  //     throw;  // Re-throws the original exception.
  // }
  //
  // However, the purpose of this flag is to allow the program to drop into
  // the debugger when the exception is thrown. On most platforms, once the
  // control enters the catch block, the exception origin information is
  // lost and the debugger will stop the program at the point of the
  // re-throw in this function -- instead of at the point of the original
  // throw statement in the code under test.  For this reason, we perform
  // the check early, sacrificing the ability to affect Google Test's
  // exception handling in the method where the exception is thrown.
  if (internal::GetUnitTestImpl()->catch_exceptions()) {
#if GTEST_HAS_EXCEPTIONS
    try {
      return HandleSehExceptionsInMethodIfSupported(object, method, location);
    } catch (const GoogleTestFailureException&) {  // NOLINT
      // This exception doesn't originate in code under test. It makes no
      // sense to report it as a test failure.
      throw;
    } catch (const std::exception& e) {  // NOLINT
      internal::ReportFailureInUnknownLocation(
          TestPartResult::kFatalFailure,
          FormatCxxExceptionMessage(e.what(), location));
    } catch (...) {  // NOLINT
      internal::ReportFailureInUnknownLocation(
          TestPartResult::kFatalFailure,
          FormatCxxExceptionMessage(NULL, location));
    }
    return static_cast<Result>(0);
#else
    return HandleSehExceptionsInMethodIfSupported(object, method, location);
#endif  // GTEST_HAS_EXCEPTIONS
  } else {
    return (object->*method)();
  }
}

}  // namespace internal

// Runs the test and updates the test result.
void Test::Run() {
  if (!HasSameFixtureClass()) return;

  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
  // We will run the test only if SetUp() was successful.
  if (!HasFatalFailure()) {
    impl->os_stack_trace_getter()->UponLeavingGTest();
    internal::HandleExceptionsInMethodIfSupported(
        this, &Test::TestBody, "the test body");
  }

  // However, we want to clean up as much as possible.  Hence we will
  // always call TearDown(), even if SetUp() or the test body has
  // failed.
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      this, &Test::TearDown, "TearDown()");
}

// Returns true iff the current test has a fatal failure.
bool Test::HasFatalFailure() {
  return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
}

// Returns true iff the current test has a non-fatal failure.
bool Test::HasNonfatalFailure() {
  return internal::GetUnitTestImpl()->current_test_result()->
      HasNonfatalFailure();
}

// class TestInfo

// Constructs a TestInfo object. It assumes ownership of the test factory
// object.
// TODO(vladl@google.com): Make a_test_case_name and a_name const string&'s
// to signify they cannot be NULLs.
TestInfo::TestInfo(const char* a_test_case_name,
                   const char* a_name,
                   const char* a_type_param,
                   const char* a_value_param,
                   internal::TypeId fixture_class_id,
                   internal::TestFactoryBase* factory)
    : test_case_name_(a_test_case_name),
      name_(a_name),
      type_param_(a_type_param ? new std::string(a_type_param) : NULL),
      value_param_(a_value_param ? new std::string(a_value_param) : NULL),
      fixture_class_id_(fixture_class_id),
      should_run_(false),
      is_disabled_(false),
      matches_filter_(false),
      factory_(factory),
      result_() {}

// Destructs a TestInfo object.
TestInfo::~TestInfo() { delete factory_; }

namespace internal {

// Creates a new TestInfo object and registers it with Google Test;
// returns the created object.
//
// Arguments:
//
//   test_case_name:   name of the test case
//   name:             name of the test
//   type_param:       the name of the test's type parameter, or NULL if
//                     this is not a typed or a type-parameterized test.
//   value_param:      text representation of the test's value parameter,
//                     or NULL if this is not a value-parameterized test.
//   fixture_class_id: ID of the test fixture class
//   set_up_tc:        pointer to the function that sets up the test case
//   tear_down_tc:     pointer to the function that tears down the test case
//   factory:          pointer to the factory that creates a test object.
//                     The newly created TestInfo instance will assume
//                     ownership of the factory object.
TestInfo* MakeAndRegisterTestInfo(
    const char* test_case_name, const char* name,
    const char* type_param,
    const char* value_param,
    TypeId fixture_class_id,
    SetUpTestCaseFunc set_up_tc,
    TearDownTestCaseFunc tear_down_tc,
    TestFactoryBase* factory) {
  TestInfo* const test_info =
      new TestInfo(test_case_name, name, type_param, value_param,
                   fixture_class_id, factory);
  GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
  return test_info;
}

#if GTEST_HAS_PARAM_TEST
void ReportInvalidTestCaseType(const char* test_case_name,
                               const char* file, int line) {
  Message errors;
  errors
      << "Attempted redefinition of test case " << test_case_name << ".\n"
      << "All tests in the same test case must use the same test fixture\n"
      << "class.  However, in test case " << test_case_name << ", you tried\n"
      << "to define a test using a fixture class different from the one\n"
      << "used earlier. This can happen if the two fixture classes are\n"
      << "from different namespaces and have the same name. You should\n"
      << "probably rename one of the classes to put the tests into different\n"
      << "test cases.";

  fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
          errors.GetString().c_str());
}
#endif  // GTEST_HAS_PARAM_TEST

}  // namespace internal

namespace {

// A predicate that checks the test name of a TestInfo against a known
// value.
//
// This is used for implementation of the TestCase class only.  We put
// it in the anonymous namespace to prevent polluting the outer
// namespace.
//
// TestNameIs is copyable.
class TestNameIs {
 public:
  // Constructor.
  //
  // TestNameIs has NO default constructor.
  explicit TestNameIs(const char* name)
      : name_(name) {}

  // Returns true iff the test name of test_info matches name_.
  bool operator()(const TestInfo * test_info) const {
    return test_info && internal::String(test_info->name()).Compare(name_) == 0;
  }

 private:
  internal::String name_;
};

}  // namespace

namespace internal {

// This method expands all parameterized tests registered with macros TEST_P
// and INSTANTIATE_TEST_CASE_P into regular tests and registers those.
// This will be done just once during the program runtime.
void UnitTestImpl::RegisterParameterizedTests() {
#if GTEST_HAS_PARAM_TEST
  if (!parameterized_tests_registered_) {
    parameterized_test_registry_.RegisterTests();
    parameterized_tests_registered_ = true;
  }
#endif
}

}  // namespace internal

// Creates the test object, runs it, records its result, and then
// deletes it.
void TestInfo::Run() {
  if (!should_run_) return;

  // Tells UnitTest where to store test result.
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->set_current_test_info(this);

  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();

  // Notifies the unit test event listeners that a test is about to start.
  repeater->OnTestStart(*this);

  const TimeInMillis start = internal::GetTimeInMillis();

  impl->os_stack_trace_getter()->UponLeavingGTest();

  // Creates the test object.
  Test* const test = internal::HandleExceptionsInMethodIfSupported(
      factory_, &internal::TestFactoryBase::CreateTest,
      "the test fixture's constructor");

  // Runs the test only if the test object was created and its
  // constructor didn't generate a fatal failure.
  if ((test != NULL) && !Test::HasFatalFailure()) {
    // This doesn't throw as all user code that can throw are wrapped into
    // exception handling code.
    test->Run();
  }

  // Deletes the test object.
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      test, &Test::DeleteSelf_, "the test fixture's destructor");

  result_.set_elapsed_time(internal::GetTimeInMillis() - start);

  // Notifies the unit test event listener that a test has just finished.
  repeater->OnTestEnd(*this);

  // Tells UnitTest to stop associating assertion results to this
  // test.
  impl->set_current_test_info(NULL);
}

// class TestCase

// Gets the number of successful tests in this test case.
int TestCase::successful_test_count() const {
  return CountIf(test_info_list_, TestPassed);
}

// Gets the number of failed tests in this test case.
int TestCase::failed_test_count() const {
  return CountIf(test_info_list_, TestFailed);
}

int TestCase::disabled_test_count() const {
  return CountIf(test_info_list_, TestDisabled);
}

// Get the number of tests in this test case that should run.
int TestCase::test_to_run_count() const {
  return CountIf(test_info_list_, ShouldRunTest);
}

// Gets the number of all tests.
int TestCase::total_test_count() const {
  return static_cast<int>(test_info_list_.size());
}

// Creates a TestCase with the given name.
//
// Arguments:
//
//   name:         name of the test case
//   a_type_param: the name of the test case's type parameter, or NULL if
//                 this is not a typed or a type-parameterized test case.
//   set_up_tc:    pointer to the function that sets up the test case
//   tear_down_tc: pointer to the function that tears down the test case
TestCase::TestCase(const char* a_name, const char* a_type_param,
                   Test::SetUpTestCaseFunc set_up_tc,
                   Test::TearDownTestCaseFunc tear_down_tc)
    : name_(a_name),
      type_param_(a_type_param ? new std::string(a_type_param) : NULL),
      set_up_tc_(set_up_tc),
      tear_down_tc_(tear_down_tc),
      should_run_(false),
      elapsed_time_(0) {
}

// Destructor of TestCase.
TestCase::~TestCase() {
  // Deletes every Test in the collection.
  ForEach(test_info_list_, internal::Delete<TestInfo>);
}

// Returns the i-th test among all the tests. i can range from 0 to
// total_test_count() - 1. If i is not in that range, returns NULL.
const TestInfo* TestCase::GetTestInfo(int i) const {
  const int index = GetElementOr(test_indices_, i, -1);
  return index < 0 ? NULL : test_info_list_[index];
}

// Returns the i-th test among all the tests. i can range from 0 to
// total_test_count() - 1. If i is not in that range, returns NULL.
TestInfo* TestCase::GetMutableTestInfo(int i) {
  const int index = GetElementOr(test_indices_, i, -1);
  return index < 0 ? NULL : test_info_list_[index];
}

// Adds a test to this test case.  Will delete the test upon
// destruction of the TestCase object.
void TestCase::AddTestInfo(TestInfo * test_info) {
  test_info_list_.push_back(test_info);
  test_indices_.push_back(static_cast<int>(test_indices_.size()));
}

// Runs every test in this TestCase.
void TestCase::Run() {
  if (!should_run_) return;

  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->set_current_test_case(this);

  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();

  repeater->OnTestCaseStart(*this);
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      this, &TestCase::RunSetUpTestCase, "SetUpTestCase()");

  const internal::TimeInMillis start = internal::GetTimeInMillis();
  for (int i = 0; i < total_test_count(); i++) {
    GetMutableTestInfo(i)->Run();
  }
  elapsed_time_ = internal::GetTimeInMillis() - start;

  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
      this, &TestCase::RunTearDownTestCase, "TearDownTestCase()");

  repeater->OnTestCaseEnd(*this);
  impl->set_current_test_case(NULL);
}

// Clears the results of all tests in this test case.
void TestCase::ClearResult() {
  ForEach(test_info_list_, TestInfo::ClearTestResult);
}

// Shuffles the tests in this test case.
void TestCase::ShuffleTests(internal::Random* random) {
  Shuffle(random, &test_indices_);
}

// Restores the test order to before the first shuffle.
void TestCase::UnshuffleTests() {
  for (size_t i = 0; i < test_indices_.size(); i++) {
    test_indices_[i] = static_cast<int>(i);
  }
}

// Formats a countable noun.  Depending on its quantity, either the
// singular form or the plural form is used. e.g.
//
// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
// FormatCountableNoun(5, "book", "books") returns "5 books".
static internal::String FormatCountableNoun(int count,
                                            const char * singular_form,
                                            const char * plural_form) {
  return internal::String::Format("%d %s", count,
                                  count == 1 ? singular_form : plural_form);
}

// Formats the count of tests.
static internal::String FormatTestCount(int test_count) {
  return FormatCountableNoun(test_count, "test", "tests");
}

// Formats the count of test cases.
static internal::String FormatTestCaseCount(int test_case_count) {
  return FormatCountableNoun(test_case_count, "test case", "test cases");
}

// Converts a TestPartResult::Type enum to human-friendly string
// representation.  Both kNonFatalFailure and kFatalFailure are translated
// to "Failure", as the user usually doesn't care about the difference
// between the two when viewing the test result.
static const char * TestPartResultTypeToString(TestPartResult::Type type) {
  switch (type) {
    case TestPartResult::kSuccess:
      return "Success";

    case TestPartResult::kNonFatalFailure:
    case TestPartResult::kFatalFailure:
#ifdef _MSC_VER
      return "error: ";
#else
      return "Failure\n";
#endif
    default:
      return "Unknown result type";
  }
}

// Prints a TestPartResult to a String.
static internal::String PrintTestPartResultToString(
    const TestPartResult& test_part_result) {
  return (Message()
          << internal::FormatFileLocation(test_part_result.file_name(),
                                          test_part_result.line_number())
          << " " << TestPartResultTypeToString(test_part_result.type())
          << test_part_result.message()).GetString();
}

// Prints a TestPartResult.
static void PrintTestPartResult(const TestPartResult& test_part_result) {
  const internal::String& result =
      PrintTestPartResultToString(test_part_result);
  printf("%s\n", result.c_str());
  fflush(stdout);
  // If the test program runs in Visual Studio or a debugger, the
  // following statements add the test part result message to the Output
  // window such that the user can double-click on it to jump to the
  // corresponding source code location; otherwise they do nothing.
#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  // We don't call OutputDebugString*() on Windows Mobile, as printing
  // to stdout is done by OutputDebugString() there already - we don't
  // want the same message printed twice.
  ::OutputDebugStringA(result.c_str());
  ::OutputDebugStringA("\n");
#endif
}

// class PrettyUnitTestResultPrinter

namespace internal {

enum GTestColor {
  COLOR_DEFAULT,
  COLOR_RED,
  COLOR_GREEN,
  COLOR_YELLOW
};

#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE

// Returns the character attribute for the given color.
WORD GetColorAttribute(GTestColor color) {
  switch (color) {
    case COLOR_RED:    return FOREGROUND_RED;
    case COLOR_GREEN:  return FOREGROUND_GREEN;
    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
    default:           return 0;
  }
}

#else

// Returns the ANSI color code for the given color.  COLOR_DEFAULT is
// an invalid input.
const char* GetAnsiColorCode(GTestColor color) {
  switch (color) {
    case COLOR_RED:     return "1";
    case COLOR_GREEN:   return "2";
    case COLOR_YELLOW:  return "3";
    default:            return NULL;
  };
}

#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE

// Returns true iff Google Test should use colors in the output.
bool ShouldUseColor(bool stdout_is_tty) {
  const char* const gtest_color = GTEST_FLAG(color).c_str();

  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
#if GTEST_OS_WINDOWS
    // On Windows the TERM variable is usually not set, but the
    // console there does support colors.
    return stdout_is_tty;
#else
    // On non-Windows platforms, we rely on the TERM variable.
    const char* const term = posix::GetEnv("TERM");
    const bool term_supports_color =
        String::CStringEquals(term, "xterm") ||
        String::CStringEquals(term, "xterm-color") ||
        String::CStringEquals(term, "xterm-256color") ||
        String::CStringEquals(term, "screen") ||
        String::CStringEquals(term, "linux") ||
        String::CStringEquals(term, "cygwin");
    return stdout_is_tty && term_supports_color;
#endif  // GTEST_OS_WINDOWS
  }

  return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
      String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
      String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
      String::CStringEquals(gtest_color, "1");
  // We take "yes", "true", "t", and "1" as meaning "yes".  If the
  // value is neither one of these nor "auto", we treat it as "no" to
  // be conservative.
}

// Helpers for printing colored strings to stdout. Note that on Windows, we
// cannot simply emit special characters and have the terminal change colors.
// This routine must actually emit the characters rather than return a string
// that would be colored when printed, as can be done on Linux.
void ColoredPrintf(GTestColor color, const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);

#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
  const bool use_color = false;
#else
  static const bool in_color_mode =
      ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
#endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
  // The '!= 0' comparison is necessary to satisfy MSVC 7.1.

  if (!use_color) {
    vprintf(fmt, args);
    va_end(args);
    return;
  }

#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);

  // Gets the current text color.
  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
  const WORD old_color_attrs = buffer_info.wAttributes;

  // We need to flush the stream buffers into the console before each
  // SetConsoleTextAttribute call lest it affect the text that is already
  // printed but has not yet reached the console.
  fflush(stdout);
  SetConsoleTextAttribute(stdout_handle,
                          GetColorAttribute(color) | FOREGROUND_INTENSITY);
  vprintf(fmt, args);

  fflush(stdout);
  // Restores the text color.
  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
#else
  printf("\033[0;3%sm", GetAnsiColorCode(color));
  vprintf(fmt, args);
  printf("\033[m");  // Resets the terminal to default.
#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  va_end(args);
}

void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
  const char* const type_param = test_info.type_param();
  const char* const value_param = test_info.value_param();

  if (type_param != NULL || value_param != NULL) {
    printf(", where ");
    if (type_param != NULL) {
      printf("TypeParam = %s", type_param);
      if (value_param != NULL)
        printf(" and ");
    }
    if (value_param != NULL) {
      printf("GetParam() = %s", value_param);
    }
  }
}

// This class implements the TestEventListener interface.
//
// Class PrettyUnitTestResultPrinter is copyable.
class PrettyUnitTestResultPrinter : public TestEventListener {
 public:
  PrettyUnitTestResultPrinter() {}
  static void PrintTestName(const char * test_case, const char * test) {
    printf("%s.%s", test_case, test);
  }

  // The following methods override what's in the TestEventListener class.
  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestCaseStart(const TestCase& test_case);
  virtual void OnTestStart(const TestInfo& test_info);
  virtual void OnTestPartResult(const TestPartResult& result);
  virtual void OnTestEnd(const TestInfo& test_info);
  virtual void OnTestCaseEnd(const TestCase& test_case);
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}

 private:
  static void PrintFailedTests(const UnitTest& unit_test);

  internal::String test_case_name_;
};

  // Fired before each iteration of tests starts.
void PrettyUnitTestResultPrinter::OnTestIterationStart(
    const UnitTest& unit_test, int iteration) {
  if (GTEST_FLAG(repeat) != 1)
    printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);

  const char* const filter = GTEST_FLAG(filter).c_str();

  // Prints the filter if it's not *.  This reminds the user that some
  // tests may be skipped.
  if (!internal::String::CStringEquals(filter, kUniversalFilter)) {
    ColoredPrintf(COLOR_YELLOW,
                  "Note: %s filter = %s\n", GTEST_NAME_, filter);
  }

  if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
    const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
    ColoredPrintf(COLOR_YELLOW,
                  "Note: This is test shard %d of %s.\n",
                  static_cast<int>(shard_index) + 1,
                  internal::posix::GetEnv(kTestTotalShards));
  }

  if (GTEST_FLAG(shuffle)) {
    ColoredPrintf(COLOR_YELLOW,
                  "Note: Randomizing tests' orders with a seed of %d .\n",
                  unit_test.random_seed());
  }

  ColoredPrintf(COLOR_GREEN,  "[==========] ");
  printf("Running %s from %s.\n",
         FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
    const UnitTest& /*unit_test*/) {
  ColoredPrintf(COLOR_GREEN,  "[----------] ");
  printf("Global test environment set-up.\n");
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
  test_case_name_ = test_case.name();
  const internal::String counts =
      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s", counts.c_str(), test_case_name_.c_str());
  if (test_case.type_param() == NULL) {
    printf("\n");
  } else {
    printf(", where TypeParam = %s\n", test_case.type_param());
  }
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
  ColoredPrintf(COLOR_GREEN,  "[ RUN      ] ");
  PrintTestName(test_case_name_.c_str(), test_info.name());
  printf("\n");
  fflush(stdout);
}

// Called after an assertion failure.
void PrettyUnitTestResultPrinter::OnTestPartResult(
    const TestPartResult& result) {
  // If the test part succeeded, we don't need to do anything.
  if (result.type() == TestPartResult::kSuccess)
    return;

  // Print failure message from the assertion (e.g. expected this and got that).
  PrintTestPartResult(result);
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
  if (test_info.result()->Passed()) {
    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
  } else {
    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
  }
  PrintTestName(test_case_name_.c_str(), test_info.name());
  if (test_info.result()->Failed())
    PrintFullTestCommentIfPresent(test_info);

  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms)\n", internal::StreamableToString(
           test_info.result()->elapsed_time()).c_str());
  } else {
    printf("\n");
  }
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
  if (!GTEST_FLAG(print_time)) return;

  test_case_name_ = test_case.name();
  const internal::String counts =
      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s (%s ms total)\n\n",
         counts.c_str(), test_case_name_.c_str(),
         internal::StreamableToString(test_case.elapsed_time()).c_str());
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
    const UnitTest& /*unit_test*/) {
  ColoredPrintf(COLOR_GREEN,  "[----------] ");
  printf("Global test environment tear-down\n");
  fflush(stdout);
}

// Internal helper for printing the list of failed tests.
void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
  const int failed_test_count = unit_test.failed_test_count();
  if (failed_test_count == 0) {
    return;
  }

  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
    const TestCase& test_case = *unit_test.GetTestCase(i);
    if (!test_case.should_run() || (test_case.failed_test_count() == 0)) {
      continue;
    }
    for (int j = 0; j < test_case.total_test_count(); ++j) {
      const TestInfo& test_info = *test_case.GetTestInfo(j);
      if (!test_info.should_run() || test_info.result()->Passed()) {
        continue;
      }
      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
      printf("%s.%s", test_case.name(), test_info.name());
      PrintFullTestCommentIfPresent(test_info);
      printf("\n");
    }
  }
}

void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                     int /*iteration*/) {
  ColoredPrintf(COLOR_GREEN,  "[==========] ");
  printf("%s from %s ran.",
         FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms total)",
           internal::StreamableToString(unit_test.elapsed_time()).c_str());
  }
  printf("\n");
  ColoredPrintf(COLOR_GREEN,  "[  PASSED  ] ");
  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());

  int num_failures = unit_test.failed_test_count();
  if (!unit_test.Passed()) {
    const int failed_test_count = unit_test.failed_test_count();
    ColoredPrintf(COLOR_RED,  "[  FAILED  ] ");
    printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
    PrintFailedTests(unit_test);
    printf("\n%2d FAILED %s\n", num_failures,
                        num_failures == 1 ? "TEST" : "TESTS");
  }

  int num_disabled = unit_test.disabled_test_count();
  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
    if (!num_failures) {
      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
    }
    ColoredPrintf(COLOR_YELLOW,
                  "  YOU HAVE %d DISABLED %s\n\n",
                  num_disabled,
                  num_disabled == 1 ? "TEST" : "TESTS");
  }
  // Ensure that Google Test output is printed before, e.g., heapchecker output.
  fflush(stdout);
}

// End PrettyUnitTestResultPrinter

// class TestEventRepeater
//
// This class forwards events to other event listeners.
class TestEventRepeater : public TestEventListener {
 public:
  TestEventRepeater() : forwarding_enabled_(true) {}
  virtual ~TestEventRepeater();
  void Append(TestEventListener *listener);
  TestEventListener* Release(TestEventListener* listener);

  // Controls whether events will be forwarded to listeners_. Set to false
  // in death test child processes.
  bool forwarding_enabled() const { return forwarding_enabled_; }
  void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }

  virtual void OnTestProgramStart(const UnitTest& unit_test);
  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
  virtual void OnTestCaseStart(const TestCase& test_case);
  virtual void OnTestStart(const TestInfo& test_info);
  virtual void OnTestPartResult(const TestPartResult& result);
  virtual void OnTestEnd(const TestInfo& test_info);
  virtual void OnTestCaseEnd(const TestCase& test_case);
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
  virtual void OnTestProgramEnd(const UnitTest& unit_test);

 private:
  // Controls whether events will be forwarded to listeners_. Set to false
  // in death test child processes.
  bool forwarding_enabled_;
  // The list of listeners that receive events.
  std::vector<TestEventListener*> listeners_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater);
};

TestEventRepeater::~TestEventRepeater() {
  ForEach(listeners_, Delete<TestEventListener>);
}

void TestEventRepeater::Append(TestEventListener *listener) {
  listeners_.push_back(listener);
}

// TODO(vladl@google.com): Factor the search functionality into Vector::Find.
TestEventListener* TestEventRepeater::Release(TestEventListener *listener) {
  for (size_t i = 0; i < listeners_.size(); ++i) {
    if (listeners_[i] == listener) {
      listeners_.erase(listeners_.begin() + i);
      return listener;
    }
  }

  return NULL;
}

// Since most methods are very similar, use macros to reduce boilerplate.
// This defines a member that forwards the call to all listeners.
#define GTEST_REPEATER_METHOD_(Name, Type) \
void TestEventRepeater::Name(const Type& parameter) { \
  if (forwarding_enabled_) { \
    for (size_t i = 0; i < listeners_.size(); i++) { \
      listeners_[i]->Name(parameter); \
    } \
  } \
}
// This defines a member that forwards the call to all listeners in reverse
// order.
#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
void TestEventRepeater::Name(const Type& parameter) { \
  if (forwarding_enabled_) { \
    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
      listeners_[i]->Name(parameter); \
    } \
  } \
}

GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase)
GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase)
GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)

#undef GTEST_REPEATER_METHOD_
#undef GTEST_REVERSE_REPEATER_METHOD_

void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
                                             int iteration) {
  if (forwarding_enabled_) {
    for (size_t i = 0; i < listeners_.size(); i++) {
      listeners_[i]->OnTestIterationStart(unit_test, iteration);
    }
  }
}

void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
                                           int iteration) {
  if (forwarding_enabled_) {
    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
      listeners_[i]->OnTestIterationEnd(unit_test, iteration);
    }
  }
}

// End TestEventRepeater

// This class generates an XML output file.
class XmlUnitTestResultPrinter : public EmptyTestEventListener {
 public:
  explicit XmlUnitTestResultPrinter(const char* output_file);

  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);

 private:
  // Is c a whitespace character that is normalized to a space character
  // when it appears in an XML attribute value?
  static bool IsNormalizableWhitespace(char c) {
    return c == 0x9 || c == 0xA || c == 0xD;
  }

  // May c appear in a well-formed XML document?
  static bool IsValidXmlCharacter(char c) {
    return IsNormalizableWhitespace(c) || c >= 0x20;
  }

  // Returns an XML-escaped copy of the input string str.  If
  // is_attribute is true, the text is meant to appear as an attribute
  // value, and normalizable whitespace is preserved by replacing it
  // with character references.
  static String EscapeXml(const char* str, bool is_attribute);

  // Returns the given string with all characters invalid in XML removed.
  static string RemoveInvalidXmlCharacters(const string& str);

  // Convenience wrapper around EscapeXml when str is an attribute value.
  static String EscapeXmlAttribute(const char* str) {
    return EscapeXml(str, true);
  }

  // Convenience wrapper around EscapeXml when str is not an attribute value.
  static String EscapeXmlText(const char* str) { return EscapeXml(str, false); }

  // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
  static void OutputXmlCDataSection(::std::ostream* stream, const char* data);

  // Streams an XML representation of a TestInfo object.
  static void OutputXmlTestInfo(::std::ostream* stream,
                                const char* test_case_name,
                                const TestInfo& test_info);

  // Prints an XML representation of a TestCase object
  static void PrintXmlTestCase(FILE* out, const TestCase& test_case);

  // Prints an XML summary of unit_test to output stream out.
  static void PrintXmlUnitTest(FILE* out, const UnitTest& unit_test);

  // Produces a string representing the test properties in a result as space
  // delimited XML attributes based on the property key="value" pairs.
  // When the String is not empty, it includes a space at the beginning,
  // to delimit this attribute from prior attributes.
  static String TestPropertiesAsXmlAttributes(const TestResult& result);

  // The output file.
  const String output_file_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter);
};

// Creates a new XmlUnitTestResultPrinter.
XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
    : output_file_(output_file) {
  if (output_file_.c_str() == NULL || output_file_.empty()) {
    fprintf(stderr, "XML output file may not be null\n");
    fflush(stderr);
    exit(EXIT_FAILURE);
  }
}

// Called after the unit test ends.
void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                  int /*iteration*/) {
  FILE* xmlout = NULL;
  FilePath output_file(output_file_);
  FilePath output_dir(output_file.RemoveFileName());

  if (output_dir.CreateDirectoriesRecursively()) {
    xmlout = posix::FOpen(output_file_.c_str(), "w");
  }
  if (xmlout == NULL) {
    // TODO(wan): report the reason of the failure.
    //
    // We don't do it for now as:
    //
    //   1. There is no urgent need for it.
    //   2. It's a bit involved to make the errno variable thread-safe on
    //      all three operating systems (Linux, Windows, and Mac OS).
    //   3. To interpret the meaning of errno in a thread-safe way,
    //      we need the strerror_r() function, which is not available on
    //      Windows.
    fprintf(stderr,
            "Unable to open file \"%s\"\n",
            output_file_.c_str());
    fflush(stderr);
    exit(EXIT_FAILURE);
  }
  PrintXmlUnitTest(xmlout, unit_test);
  fclose(xmlout);
}

// Returns an XML-escaped copy of the input string str.  If is_attribute
// is true, the text is meant to appear as an attribute value, and
// normalizable whitespace is preserved by replacing it with character
// references.
//
// Invalid XML characters in str, if any, are stripped from the output.
// It is expected that most, if not all, of the text processed by this
// module will consist of ordinary English text.
// If this module is ever modified to produce version 1.1 XML output,
// most invalid characters can be retained using character references.
// TODO(wan): It might be nice to have a minimally invasive, human-readable
// escaping scheme for invalid characters, rather than dropping them.
String XmlUnitTestResultPrinter::EscapeXml(const char* str, bool is_attribute) {
  Message m;

  if (str != NULL) {
    for (const char* src = str; *src; ++src) {
      switch (*src) {
        case '<':
          m << "&lt;";
          break;
        case '>':
          m << "&gt;";
          break;
        case '&':
          m << "&amp;";
          break;
        case '\'':
          if (is_attribute)
            m << "&apos;";
          else
            m << '\'';
          break;
        case '"':
          if (is_attribute)
            m << "&quot;";
          else
            m << '"';
          break;
        default:
          if (IsValidXmlCharacter(*src)) {
            if (is_attribute && IsNormalizableWhitespace(*src))
              m << String::Format("&#x%02X;", unsigned(*src));
            else
              m << *src;
          }
          break;
      }
    }
  }

  return m.GetString();
}

// Returns the given string with all characters invalid in XML removed.
// Currently invalid characters are dropped from the string. An
// alternative is to replace them with certain characters such as . or ?.
string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(const string& str) {
  string output;
  output.reserve(str.size());
  for (string::const_iterator it = str.begin(); it != str.end(); ++it)
    if (IsValidXmlCharacter(*it))
      output.push_back(*it);

  return output;
}

// The following routines generate an XML representation of a UnitTest
// object.
//
// This is how Google Test concepts map to the DTD:
//
// <testsuites name="AllTests">        <-- corresponds to a UnitTest object
//   <testsuite name="testcase-name">  <-- corresponds to a TestCase object
//     <testcase name="test-name">     <-- corresponds to a TestInfo object
//       <failure message="...">...</failure>
//       <failure message="...">...</failure>
//       <failure message="...">...</failure>
//                                     <-- individual assertion failures
//     </testcase>
//   </testsuite>
// </testsuites>

// Formats the given time in milliseconds as seconds.
std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
  ::std::stringstream ss;
  ss << ms/1000.0;
  return ss.str();
}

// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
                                                     const char* data) {
  const char* segment = data;
  *stream << "<![CDATA[";
  for (;;) {
    const char* const next_segment = strstr(segment, "]]>");
    if (next_segment != NULL) {
      stream->write(
          segment, static_cast<std::streamsize>(next_segment - segment));
      *stream << "]]>]]&gt;<![CDATA[";
      segment = next_segment + strlen("]]>");
    } else {
      *stream << segment;
      break;
    }
  }
  *stream << "]]>";
}

// Prints an XML representation of a TestInfo object.
// TODO(wan): There is also value in printing properties with the plain printer.
void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
                                                 const char* test_case_name,
                                                 const TestInfo& test_info) {
  const TestResult& result = *test_info.result();
  *stream << "    <testcase name=\""
          << EscapeXmlAttribute(test_info.name()).c_str() << "\"";

  if (test_info.value_param() != NULL) {
    *stream << " value_param=\"" << EscapeXmlAttribute(test_info.value_param())
            << "\"";
  }
  if (test_info.type_param() != NULL) {
    *stream << " type_param=\"" << EscapeXmlAttribute(test_info.type_param())
            << "\"";
  }

  *stream << " status=\""
          << (test_info.should_run() ? "run" : "notrun")
          << "\" time=\""
          << FormatTimeInMillisAsSeconds(result.elapsed_time())
          << "\" classname=\"" << EscapeXmlAttribute(test_case_name).c_str()
          << "\"" << TestPropertiesAsXmlAttributes(result).c_str();

  int failures = 0;
  for (int i = 0; i < result.total_part_count(); ++i) {
    const TestPartResult& part = result.GetTestPartResult(i);
    if (part.failed()) {
      if (++failures == 1)
        *stream << ">\n";
      *stream << "      <failure message=\""
              << EscapeXmlAttribute(part.summary()).c_str()
              << "\" type=\"\">";
      const string location = internal::FormatCompilerIndependentFileLocation(
          part.file_name(), part.line_number());
      const string message = location + "\n" + part.message();
      OutputXmlCDataSection(stream,
                            RemoveInvalidXmlCharacters(message).c_str());
      *stream << "</failure>\n";
    }
  }

  if (failures == 0)
    *stream << " />\n";
  else
    *stream << "    </testcase>\n";
}

// Prints an XML representation of a TestCase object
void XmlUnitTestResultPrinter::PrintXmlTestCase(FILE* out,
                                                const TestCase& test_case) {
  fprintf(out,
          "  <testsuite name=\"%s\" tests=\"%d\" failures=\"%d\" "
          "disabled=\"%d\" ",
          EscapeXmlAttribute(test_case.name()).c_str(),
          test_case.total_test_count(),
          test_case.failed_test_count(),
          test_case.disabled_test_count());
  fprintf(out,
          "errors=\"0\" time=\"%s\">\n",
          FormatTimeInMillisAsSeconds(test_case.elapsed_time()).c_str());
  for (int i = 0; i < test_case.total_test_count(); ++i) {
    ::std::stringstream stream;
    OutputXmlTestInfo(&stream, test_case.name(), *test_case.GetTestInfo(i));
    fprintf(out, "%s", StringStreamToString(&stream).c_str());
  }
  fprintf(out, "  </testsuite>\n");
}

// Prints an XML summary of unit_test to output stream out.
void XmlUnitTestResultPrinter::PrintXmlUnitTest(FILE* out,
                                                const UnitTest& unit_test) {
  fprintf(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  fprintf(out,
          "<testsuites tests=\"%d\" failures=\"%d\" disabled=\"%d\" "
          "errors=\"0\" time=\"%s\" ",
          unit_test.total_test_count(),
          unit_test.failed_test_count(),
          unit_test.disabled_test_count(),
          FormatTimeInMillisAsSeconds(unit_test.elapsed_time()).c_str());
  if (GTEST_FLAG(shuffle)) {
    fprintf(out, "random_seed=\"%d\" ", unit_test.random_seed());
  }
  fprintf(out, "name=\"AllTests\">\n");
  for (int i = 0; i < unit_test.total_test_case_count(); ++i)
    PrintXmlTestCase(out, *unit_test.GetTestCase(i));
  fprintf(out, "</testsuites>\n");
}

// Produces a string representing the test properties in a result as space
// delimited XML attributes based on the property key="value" pairs.
String XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
    const TestResult& result) {
  Message attributes;
  for (int i = 0; i < result.test_property_count(); ++i) {
    const TestProperty& property = result.GetTestProperty(i);
    attributes << " " << property.key() << "="
        << "\"" << EscapeXmlAttribute(property.value()) << "\"";
  }
  return attributes.GetString();
}

// End XmlUnitTestResultPrinter

#if GTEST_CAN_STREAM_RESULTS_

// Streams test results to the given port on the given host machine.
class StreamingListener : public EmptyTestEventListener {
 public:
  // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
  static string UrlEncode(const char* str);

  StreamingListener(const string& host, const string& port)
      : sockfd_(-1), host_name_(host), port_num_(port) {
    MakeConnection();
    Send("gtest_streaming_protocol_version=1.0\n");
  }

  virtual ~StreamingListener() {
    if (sockfd_ != -1)
      CloseConnection();
  }

  void OnTestProgramStart(const UnitTest& /* unit_test */) {
    Send("event=TestProgramStart\n");
  }

  void OnTestProgramEnd(const UnitTest& unit_test) {
    // Note that Google Test current only report elapsed time for each
    // test iteration, not for the entire test program.
    Send(String::Format("event=TestProgramEnd&passed=%d\n",
                        unit_test.Passed()));

    // Notify the streaming server to stop.
    CloseConnection();
  }

  void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
    Send(String::Format("event=TestIterationStart&iteration=%d\n",
                        iteration));
  }

  void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
    Send(String::Format("event=TestIterationEnd&passed=%d&elapsed_time=%sms\n",
                        unit_test.Passed(),
                        StreamableToString(unit_test.elapsed_time()).c_str()));
  }

  void OnTestCaseStart(const TestCase& test_case) {
    Send(String::Format("event=TestCaseStart&name=%s\n", test_case.name()));
  }

  void OnTestCaseEnd(const TestCase& test_case) {
    Send(String::Format("event=TestCaseEnd&passed=%d&elapsed_time=%sms\n",
                        test_case.Passed(),
                        StreamableToString(test_case.elapsed_time()).c_str()));
  }

  void OnTestStart(const TestInfo& test_info) {
    Send(String::Format("event=TestStart&name=%s\n", test_info.name()));
  }

  void OnTestEnd(const TestInfo& test_info) {
    Send(String::Format(
        "event=TestEnd&passed=%d&elapsed_time=%sms\n",
        (test_info.result())->Passed(),
        StreamableToString((test_info.result())->elapsed_time()).c_str()));
  }

  void OnTestPartResult(const TestPartResult& test_part_result) {
    const char* file_name = test_part_result.file_name();
    if (file_name == NULL)
      file_name = "";
    Send(String::Format("event=TestPartResult&file=%s&line=%d&message=",
                        UrlEncode(file_name).c_str(),
                        test_part_result.line_number()));
    Send(UrlEncode(test_part_result.message()) + "\n");
  }

 private:
  // Creates a client socket and connects to the server.
  void MakeConnection();

  // Closes the socket.
  void CloseConnection() {
    GTEST_CHECK_(sockfd_ != -1)
        << "CloseConnection() can be called only when there is a connection.";

    close(sockfd_);
    sockfd_ = -1;
  }

  // Sends a string to the socket.
  void Send(const string& message) {
    GTEST_CHECK_(sockfd_ != -1)
        << "Send() can be called only when there is a connection.";

    const int len = static_cast<int>(message.length());
    if (write(sockfd_, message.c_str(), len) != len) {
      GTEST_LOG_(WARNING)
          << "stream_result_to: failed to stream to "
          << host_name_ << ":" << port_num_;
    }
  }

  int sockfd_;   // socket file descriptor
  const string host_name_;
  const string port_num_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
};  // class StreamingListener

// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
// replaces them by "%xx" where xx is their hexadecimal value. For
// example, replaces "=" with "%3D".  This algorithm is O(strlen(str))
// in both time and space -- important as the input str may contain an
// arbitrarily long test failure message and stack trace.
string StreamingListener::UrlEncode(const char* str) {
  string result;
  result.reserve(strlen(str) + 1);
  for (char ch = *str; ch != '\0'; ch = *++str) {
    switch (ch) {
      case '%':
      case '=':
      case '&':
      case '\n':
        result.append(String::Format("%%%02x", static_cast<unsigned char>(ch)));
        break;
      default:
        result.push_back(ch);
        break;
    }
  }
  return result;
}

void StreamingListener::MakeConnection() {
  GTEST_CHECK_(sockfd_ == -1)
      << "MakeConnection() can't be called when there is already a connection.";

  addrinfo hints;
  memset(&hints, 0, sizeof(hints));
  hints.ai_family = AF_UNSPEC;    // To allow both IPv4 and IPv6 addresses.
  hints.ai_socktype = SOCK_STREAM;
  addrinfo* servinfo = NULL;

  // Use the getaddrinfo() to get a linked list of IP addresses for
  // the given host name.
  const int error_num = getaddrinfo(
      host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
  if (error_num != 0) {
    GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
                        << gai_strerror(error_num);
  }

  // Loop through all the results and connect to the first we can.
  for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL;
       cur_addr = cur_addr->ai_next) {
    sockfd_ = socket(
        cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol);
    if (sockfd_ != -1) {
      // Connect the client socket to the server socket.
      if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) {
        close(sockfd_);
        sockfd_ = -1;
      }
    }
  }

  freeaddrinfo(servinfo);  // all done with this structure

  if (sockfd_ == -1) {
    GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to "
                        << host_name_ << ":" << port_num_;
  }
}

// End of class Streaming Listener
#endif  // GTEST_CAN_STREAM_RESULTS__

// Class ScopedTrace

// Pushes the given source file location and message onto a per-thread
// trace stack maintained by Google Test.
// L < UnitTest::mutex_
ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) {
  TraceInfo trace;
  trace.file = file;
  trace.line = line;
  trace.message = message.GetString();

  UnitTest::GetInstance()->PushGTestTrace(trace);
}

// Pops the info pushed by the c'tor.
// L < UnitTest::mutex_
ScopedTrace::~ScopedTrace() {
  UnitTest::GetInstance()->PopGTestTrace();
}


// class OsStackTraceGetter

// Returns the current OS stack trace as a String.  Parameters:
//
//   max_depth  - the maximum number of stack frames to be included
//                in the trace.
//   skip_count - the number of top frames to be skipped; doesn't count
//                against max_depth.
//
// L < mutex_
// We use "L < mutex_" to denote that the function may acquire mutex_.
String OsStackTraceGetter::CurrentStackTrace(int, int) {
  return String("");
}

// L < mutex_
void OsStackTraceGetter::UponLeavingGTest() {
}

const char* const
OsStackTraceGetter::kElidedFramesMarker =
    "... " GTEST_NAME_ " internal frames ...";

}  // namespace internal

// class TestEventListeners

TestEventListeners::TestEventListeners()
    : repeater_(new internal::TestEventRepeater()),
      default_result_printer_(NULL),
      default_xml_generator_(NULL) {
}

TestEventListeners::~TestEventListeners() { delete repeater_; }

// Returns the standard listener responsible for the default console
// output.  Can be removed from the listeners list to shut down default
// console output.  Note that removing this object from the listener list
// with Release transfers its ownership to the user.
void TestEventListeners::Append(TestEventListener* listener) {
  repeater_->Append(listener);
}

// Removes the given event listener from the list and returns it.  It then
// becomes the caller's responsibility to delete the listener. Returns
// NULL if the listener is not found in the list.
TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
  if (listener == default_result_printer_)
    default_result_printer_ = NULL;
  else if (listener == default_xml_generator_)
    default_xml_generator_ = NULL;
  return repeater_->Release(listener);
}

// Returns repeater that broadcasts the TestEventListener events to all
// subscribers.
TestEventListener* TestEventListeners::repeater() { return repeater_; }

// Sets the default_result_printer attribute to the provided listener.
// The listener is also added to the listener list and previous
// default_result_printer is removed from it and deleted. The listener can
// also be NULL in which case it will not be added to the list. Does
// nothing if the previous and the current listener objects are the same.
void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
  if (default_result_printer_ != listener) {
    // It is an error to pass this method a listener that is already in the
    // list.
    delete Release(default_result_printer_);
    default_result_printer_ = listener;
    if (listener != NULL)
      Append(listener);
  }
}

// Sets the default_xml_generator attribute to the provided listener.  The
// listener is also added to the listener list and previous
// default_xml_generator is removed from it and deleted. The listener can
// also be NULL in which case it will not be added to the list. Does
// nothing if the previous and the current listener objects are the same.
void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
  if (default_xml_generator_ != listener) {
    // It is an error to pass this method a listener that is already in the
    // list.
    delete Release(default_xml_generator_);
    default_xml_generator_ = listener;
    if (listener != NULL)
      Append(listener);
  }
}

// Controls whether events will be forwarded by the repeater to the
// listeners in the list.
bool TestEventListeners::EventForwardingEnabled() const {
  return repeater_->forwarding_enabled();
}

void TestEventListeners::SuppressEventForwarding() {
  repeater_->set_forwarding_enabled(false);
}

// class UnitTest

// Gets the singleton UnitTest object.  The first time this method is
// called, a UnitTest object is constructed and returned.  Consecutive
// calls will return the same object.
//
// We don't protect this under mutex_ as a user is not supposed to
// call this before main() starts, from which point on the return
// value will never change.
UnitTest * UnitTest::GetInstance() {
  // When compiled with MSVC 7.1 in optimized mode, destroying the
  // UnitTest object upon exiting the program messes up the exit code,
  // causing successful tests to appear failed.  We have to use a
  // different implementation in this case to bypass the compiler bug.
  // This implementation makes the compiler happy, at the cost of
  // leaking the UnitTest object.

  // CodeGear C++Builder insists on a public destructor for the
  // default implementation.  Use this implementation to keep good OO
  // design with private destructor.

#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
  static UnitTest* const instance = new UnitTest;
  return instance;
#else
  static UnitTest instance;
  return &instance;
#endif  // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
}

// Gets the number of successful test cases.
int UnitTest::successful_test_case_count() const {
  return impl()->successful_test_case_count();
}

// Gets the number of failed test cases.
int UnitTest::failed_test_case_count() const {
  return impl()->failed_test_case_count();
}

// Gets the number of all test cases.
int UnitTest::total_test_case_count() const {
  return impl()->total_test_case_count();
}

// Gets the number of all test cases that contain at least one test
// that should run.
int UnitTest::test_case_to_run_count() const {
  return impl()->test_case_to_run_count();
}

// Gets the number of successful tests.
int UnitTest::successful_test_count() const {
  return impl()->successful_test_count();
}

// Gets the number of failed tests.
int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }

// Gets the number of disabled tests.
int UnitTest::disabled_test_count() const {
  return impl()->disabled_test_count();
}

// Gets the number of all tests.
int UnitTest::total_test_count() const { return impl()->total_test_count(); }

// Gets the number of tests that should run.
int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); }

// Gets the elapsed time, in milliseconds.
internal::TimeInMillis UnitTest::elapsed_time() const {
  return impl()->elapsed_time();
}

// Returns true iff the unit test passed (i.e. all test cases passed).
bool UnitTest::Passed() const { return impl()->Passed(); }

// Returns true iff the unit test failed (i.e. some test case failed
// or something outside of all tests failed).
bool UnitTest::Failed() const { return impl()->Failed(); }

// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
const TestCase* UnitTest::GetTestCase(int i) const {
  return impl()->GetTestCase(i);
}

// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
TestCase* UnitTest::GetMutableTestCase(int i) {
  return impl()->GetMutableTestCase(i);
}

// Returns the list of event listeners that can be used to track events
// inside Google Test.
TestEventListeners& UnitTest::listeners() {
  return *impl()->listeners();
}

// Registers and returns a global test environment.  When a test
// program is run, all global test environments will be set-up in the
// order they were registered.  After all tests in the program have
// finished, all global test environments will be torn-down in the
// *reverse* order they were registered.
//
// The UnitTest object takes ownership of the given environment.
//
// We don't protect this under mutex_, as we only support calling it
// from the main thread.
Environment* UnitTest::AddEnvironment(Environment* env) {
  if (env == NULL) {
    return NULL;
  }

  impl_->environments().push_back(env);
  return env;
}

// Adds a TestPartResult to the current TestResult object.  All Google Test
// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
// this to report their results.  The user code should use the
// assertion macros instead of calling this directly.
// L < mutex_
void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
                                 const char* file_name,
                                 int line_number,
                                 const internal::String& message,
                                 const internal::String& os_stack_trace) {
  Message msg;
  msg << message;

  internal::MutexLock lock(&mutex_);
  if (impl_->gtest_trace_stack().size() > 0) {
    msg << "\n" << GTEST_NAME_ << " trace:";

    for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
         i > 0; --i) {
      const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
      msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
          << " " << trace.message;
    }
  }

  if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) {
    msg << internal::kStackTraceMarker << os_stack_trace;
  }

  const TestPartResult result =
    TestPartResult(result_type, file_name, line_number,
                   msg.GetString().c_str());
  impl_->GetTestPartResultReporterForCurrentThread()->
      ReportTestPartResult(result);

  if (result_type != TestPartResult::kSuccess) {
    // gtest_break_on_failure takes precedence over
    // gtest_throw_on_failure.  This allows a user to set the latter
    // in the code (perhaps in order to use Google Test assertions
    // with another testing framework) and specify the former on the
    // command line for debugging.
    if (GTEST_FLAG(break_on_failure)) {
#if GTEST_OS_WINDOWS
      // Using DebugBreak on Windows allows gtest to still break into a debugger
      // when a failure happens and both the --gtest_break_on_failure and
      // the --gtest_catch_exceptions flags are specified.
      DebugBreak();
#else
      // Dereference NULL through a volatile pointer to prevent the compiler
      // from removing. We use this rather than abort() or __builtin_trap() for
      // portability: Symbian doesn't implement abort() well, and some debuggers
      // don't correctly trap abort().
      *static_cast<volatile int*>(NULL) = 1;
#endif  // GTEST_OS_WINDOWS
    } else if (GTEST_FLAG(throw_on_failure)) {
#if GTEST_HAS_EXCEPTIONS
      throw GoogleTestFailureException(result);
#else
      // We cannot call abort() as it generates a pop-up in debug mode
      // that cannot be suppressed in VC 7.1 or below.
      exit(1);
#endif
    }
  }
}

// Creates and adds a property to the current TestResult. If a property matching
// the supplied value already exists, updates its value instead.
void UnitTest::RecordPropertyForCurrentTest(const char* key,
                                            const char* value) {
  const TestProperty test_property(key, value);
  impl_->current_test_result()->RecordProperty(test_property);
}

// Runs all tests in this UnitTest object and prints the result.
// Returns 0 if successful, or 1 otherwise.
//
// We don't protect this under mutex_, as we only support calling it
// from the main thread.
int UnitTest::Run() {
  // Captures the value of GTEST_FLAG(catch_exceptions).  This value will be
  // used for the duration of the program.
  impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions));

#if GTEST_HAS_SEH
  const bool in_death_test_child_process =
      internal::GTEST_FLAG(internal_run_death_test).length() > 0;

  // Either the user wants Google Test to catch exceptions thrown by the
  // tests or this is executing in the context of death test child
  // process. In either case the user does not want to see pop-up dialogs
  // about crashes - they are expected.
  if (impl()->catch_exceptions() || in_death_test_child_process) {

# if !GTEST_OS_WINDOWS_MOBILE
    // SetErrorMode doesn't exist on CE.
    SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
                 SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
# endif  // !GTEST_OS_WINDOWS_MOBILE

# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
    // Death test children can be terminated with _abort().  On Windows,
    // _abort() can show a dialog with a warning message.  This forces the
    // abort message to go to stderr instead.
    _set_error_mode(_OUT_TO_STDERR);
# endif

# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
    // In the debug version, Visual Studio pops up a separate dialog
    // offering a choice to debug the aborted program. We need to suppress
    // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
    // executed. Google Test will notify the user of any unexpected
    // failure via stderr.
    //
    // VC++ doesn't define _set_abort_behavior() prior to the version 8.0.
    // Users of prior VC versions shall suffer the agony and pain of
    // clicking through the countless debug dialogs.
    // TODO(vladl@google.com): find a way to suppress the abort dialog() in the
    // debug mode when compiled with VC 7.1 or lower.
    if (!GTEST_FLAG(break_on_failure))
      _set_abort_behavior(
          0x0,                                    // Clear the following flags:
          _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.
# endif

#if _MSC_VER >= 1310 && !GTEST_OS_WINDOWS_MOBILE
    // Suppress the "Debug Assertion Failed" dialog in the debug mode. (As far
    // as I know, these functions are available on Visual Studio .NET 2003 or
    // later.)
    _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
    _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
    _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
    _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
    _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
    _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
#endif

  }
#endif  // GTEST_HAS_SEH

  return internal::HandleExceptionsInMethodIfSupported(
      impl(),
      &internal::UnitTestImpl::RunAllTests,
      "auxiliary test code (environments or event listeners)") ? 0 : 1;
}

// Returns the working directory when the first TEST() or TEST_F() was
// executed.
const char* UnitTest::original_working_dir() const {
  return impl_->original_working_dir_.c_str();
}

// Returns the TestCase object for the test that's currently running,
// or NULL if no test is running.
// L < mutex_
const TestCase* UnitTest::current_test_case() const {
  internal::MutexLock lock(&mutex_);
  return impl_->current_test_case();
}

// Returns the TestInfo object for the test that's currently running,
// or NULL if no test is running.
// L < mutex_
const TestInfo* UnitTest::current_test_info() const {
  internal::MutexLock lock(&mutex_);
  return impl_->current_test_info();
}

// Returns the random seed used at the start of the current test run.
int UnitTest::random_seed() const { return impl_->random_seed(); }

#if GTEST_HAS_PARAM_TEST
// Returns ParameterizedTestCaseRegistry object used to keep track of
// value-parameterized tests and instantiate and register them.
// L < mutex_
internal::ParameterizedTestCaseRegistry&
    UnitTest::parameterized_test_registry() {
  return impl_->parameterized_test_registry();
}
#endif  // GTEST_HAS_PARAM_TEST

// Creates an empty UnitTest.
UnitTest::UnitTest() {
  impl_ = new internal::UnitTestImpl(this);
}

// Destructor of UnitTest.
UnitTest::~UnitTest() {
  delete impl_;
}

// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
// Google Test trace stack.
// L < mutex_
void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) {
  internal::MutexLock lock(&mutex_);
  impl_->gtest_trace_stack().push_back(trace);
}

// Pops a trace from the per-thread Google Test trace stack.
// L < mutex_
void UnitTest::PopGTestTrace() {
  internal::MutexLock lock(&mutex_);
  impl_->gtest_trace_stack().pop_back();
}

namespace internal {

UnitTestImpl::UnitTestImpl(UnitTest* parent)
    : parent_(parent),
#ifdef _MSC_VER
# pragma warning(push)                    // Saves the current warning state.
# pragma warning(disable:4355)            // Temporarily disables warning 4355
                                         // (using this in initializer).
      default_global_test_part_result_reporter_(this),
      default_per_thread_test_part_result_reporter_(this),
# pragma warning(pop)                     // Restores the warning state again.
#else
      default_global_test_part_result_reporter_(this),
      default_per_thread_test_part_result_reporter_(this),
#endif  // _MSC_VER
      global_test_part_result_repoter_(
          &default_global_test_part_result_reporter_),
      per_thread_test_part_result_reporter_(
          &default_per_thread_test_part_result_reporter_),
#if GTEST_HAS_PARAM_TEST
      parameterized_test_registry_(),
      parameterized_tests_registered_(false),
#endif  // GTEST_HAS_PARAM_TEST
      last_death_test_case_(-1),
      current_test_case_(NULL),
      current_test_info_(NULL),
      ad_hoc_test_result_(),
      os_stack_trace_getter_(NULL),
      post_flag_parse_init_performed_(false),
      random_seed_(0),  // Will be overridden by the flag before first use.
      random_(0),  // Will be reseeded before first use.
      elapsed_time_(0),
#if GTEST_HAS_DEATH_TEST
      internal_run_death_test_flag_(NULL),
      death_test_factory_(new DefaultDeathTestFactory),
#endif
      // Will be overridden by the flag before first use.
      catch_exceptions_(false) {
  listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
}

UnitTestImpl::~UnitTestImpl() {
  // Deletes every TestCase.
  ForEach(test_cases_, internal::Delete<TestCase>);

  // Deletes every Environment.
  ForEach(environments_, internal::Delete<Environment>);

  delete os_stack_trace_getter_;
}

#if GTEST_HAS_DEATH_TEST
// Disables event forwarding if the control is currently in a death test
// subprocess. Must not be called before InitGoogleTest.
void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
  if (internal_run_death_test_flag_.get() != NULL)
    listeners()->SuppressEventForwarding();
}
#endif  // GTEST_HAS_DEATH_TEST

// Initializes event listeners performing XML output as specified by
// UnitTestOptions. Must not be called before InitGoogleTest.
void UnitTestImpl::ConfigureXmlOutput() {
  const String& output_format = UnitTestOptions::GetOutputFormat();
  if (output_format == "xml") {
    listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
        UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
  } else if (output_format != "") {
    printf("WARNING: unrecognized output format \"%s\" ignored.\n",
           output_format.c_str());
    fflush(stdout);
  }
}

#if GTEST_CAN_STREAM_RESULTS_
// Initializes event listeners for streaming test results in String form.
// Must not be called before InitGoogleTest.
void UnitTestImpl::ConfigureStreamingOutput() {
  const string& target = GTEST_FLAG(stream_result_to);
  if (!target.empty()) {
    const size_t pos = target.find(':');
    if (pos != string::npos) {
      listeners()->Append(new StreamingListener(target.substr(0, pos),
                                                target.substr(pos+1)));
    } else {
      printf("WARNING: unrecognized streaming target \"%s\" ignored.\n",
             target.c_str());
      fflush(stdout);
    }
  }
}
#endif  // GTEST_CAN_STREAM_RESULTS_

// Performs initialization dependent upon flag values obtained in
// ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
// ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
// this function is also called from RunAllTests.  Since this function can be
// called more than once, it has to be idempotent.
void UnitTestImpl::PostFlagParsingInit() {
  // Ensures that this function does not execute more than once.
  if (!post_flag_parse_init_performed_) {
    post_flag_parse_init_performed_ = true;

#if GTEST_HAS_DEATH_TEST
    InitDeathTestSubprocessControlInfo();
    SuppressTestEventsIfInSubprocess();
#endif  // GTEST_HAS_DEATH_TEST

    // Registers parameterized tests. This makes parameterized tests
    // available to the UnitTest reflection API without running
    // RUN_ALL_TESTS.
    RegisterParameterizedTests();

    // Configures listeners for XML output. This makes it possible for users
    // to shut down the default XML output before invoking RUN_ALL_TESTS.
    ConfigureXmlOutput();

#if GTEST_CAN_STREAM_RESULTS_
    // Configures listeners for streaming test results to the specified server.
    ConfigureStreamingOutput();
#endif  // GTEST_CAN_STREAM_RESULTS_
  }
}

// A predicate that checks the name of a TestCase against a known
// value.
//
// This is used for implementation of the UnitTest class only.  We put
// it in the anonymous namespace to prevent polluting the outer
// namespace.
//
// TestCaseNameIs is copyable.
class TestCaseNameIs {
 public:
  // Constructor.
  explicit TestCaseNameIs(const String& name)
      : name_(name) {}

  // Returns true iff the name of test_case matches name_.
  bool operator()(const TestCase* test_case) const {
    return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0;
  }

 private:
  String name_;
};

// Finds and returns a TestCase with the given name.  If one doesn't
// exist, creates one and returns it.  It's the CALLER'S
// RESPONSIBILITY to ensure that this function is only called WHEN THE
// TESTS ARE NOT SHUFFLED.
//
// Arguments:
//
//   test_case_name: name of the test case
//   type_param:     the name of the test case's type parameter, or NULL if
//                   this is not a typed or a type-parameterized test case.
//   set_up_tc:      pointer to the function that sets up the test case
//   tear_down_tc:   pointer to the function that tears down the test case
TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
                                    const char* type_param,
                                    Test::SetUpTestCaseFunc set_up_tc,
                                    Test::TearDownTestCaseFunc tear_down_tc) {
  // Can we find a TestCase with the given name?
  const std::vector<TestCase*>::const_iterator test_case =
      std::find_if(test_cases_.begin(), test_cases_.end(),
                   TestCaseNameIs(test_case_name));

  if (test_case != test_cases_.end())
    return *test_case;

  // No.  Let's create one.
  TestCase* const new_test_case =
      new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc);

  // Is this a death test case?
  if (internal::UnitTestOptions::MatchesFilter(String(test_case_name),
                                               kDeathTestCaseFilter)) {
    // Yes.  Inserts the test case after the last death test case
    // defined so far.  This only works when the test cases haven't
    // been shuffled.  Otherwise we may end up running a death test
    // after a non-death test.
    ++last_death_test_case_;
    test_cases_.insert(test_cases_.begin() + last_death_test_case_,
                       new_test_case);
  } else {
    // No.  Appends to the end of the list.
    test_cases_.push_back(new_test_case);
  }

  test_case_indices_.push_back(static_cast<int>(test_case_indices_.size()));
  return new_test_case;
}

// Helpers for setting up / tearing down the given environment.  They
// are for use in the ForEach() function.
static void SetUpEnvironment(Environment* env) { env->SetUp(); }
static void TearDownEnvironment(Environment* env) { env->TearDown(); }

// Runs all tests in this UnitTest object, prints the result, and
// returns true if all tests are successful.  If any exception is
// thrown during a test, the test is considered to be failed, but the
// rest of the tests will still be run.
//
// When parameterized tests are enabled, it expands and registers
// parameterized tests first in RegisterParameterizedTests().
// All other functions called from RunAllTests() may safely assume that
// parameterized tests are ready to be counted and run.
bool UnitTestImpl::RunAllTests() {
  // Makes sure InitGoogleTest() was called.
  if (!GTestIsInitialized()) {
    printf("%s",
           "\nThis test program did NOT call ::testing::InitGoogleTest "
           "before calling RUN_ALL_TESTS().  Please fix it.\n");
    return false;
  }

  // Do not run any test if the --help flag was specified.
  if (g_help_flag)
    return true;

  // Repeats the call to the post-flag parsing initialization in case the
  // user didn't call InitGoogleTest.
  PostFlagParsingInit();

  // Even if sharding is not on, test runners may want to use the
  // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
  // protocol.
  internal::WriteToShardStatusFileIfNeeded();

  // True iff we are in a subprocess for running a thread-safe-style
  // death test.
  bool in_subprocess_for_death_test = false;

#if GTEST_HAS_DEATH_TEST
  in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL);
#endif  // GTEST_HAS_DEATH_TEST

  const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
                                        in_subprocess_for_death_test);

  // Compares the full test names with the filter to decide which
  // tests to run.
  const bool has_tests_to_run = FilterTests(should_shard
                                              ? HONOR_SHARDING_PROTOCOL
                                              : IGNORE_SHARDING_PROTOCOL) > 0;

  // Lists the tests and exits if the --gtest_list_tests flag was specified.
  if (GTEST_FLAG(list_tests)) {
    // This must be called *after* FilterTests() has been called.
    ListTestsMatchingFilter();
    return true;
  }

  random_seed_ = GTEST_FLAG(shuffle) ?
      GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;

  // True iff at least one test has failed.
  bool failed = false;

  TestEventListener* repeater = listeners()->repeater();

  repeater->OnTestProgramStart(*parent_);

  // How many times to repeat the tests?  We don't want to repeat them
  // when we are inside the subprocess of a death test.
  const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
  // Repeats forever if the repeat count is negative.
  const bool forever = repeat < 0;
  for (int i = 0; forever || i != repeat; i++) {
    // We want to preserve failures generated by ad-hoc test
    // assertions executed before RUN_ALL_TESTS().
    ClearNonAdHocTestResult();

    const TimeInMillis start = GetTimeInMillis();

    // Shuffles test cases and tests if requested.
    if (has_tests_to_run && GTEST_FLAG(shuffle)) {
      random()->Reseed(random_seed_);
      // This should be done before calling OnTestIterationStart(),
      // such that a test event listener can see the actual test order
      // in the event.
      ShuffleTests();
    }

    // Tells the unit test event listeners that the tests are about to start.
    repeater->OnTestIterationStart(*parent_, i);

    // Runs each test case if there is at least one test to run.
    if (has_tests_to_run) {
      // Sets up all environments beforehand.
      repeater->OnEnvironmentsSetUpStart(*parent_);
      ForEach(environments_, SetUpEnvironment);
      repeater->OnEnvironmentsSetUpEnd(*parent_);

      // Runs the tests only if there was no fatal failure during global
      // set-up.
      if (!Test::HasFatalFailure()) {
        for (int test_index = 0; test_index < total_test_case_count();
             test_index++) {
          GetMutableTestCase(test_index)->Run();
        }
      }

      // Tears down all environments in reverse order afterwards.
      repeater->OnEnvironmentsTearDownStart(*parent_);
      std::for_each(environments_.rbegin(), environments_.rend(),
                    TearDownEnvironment);
      repeater->OnEnvironmentsTearDownEnd(*parent_);
    }

    elapsed_time_ = GetTimeInMillis() - start;

    // Tells the unit test event listener that the tests have just finished.
    repeater->OnTestIterationEnd(*parent_, i);

    // Gets the result and clears it.
    if (!Passed()) {
      failed = true;
    }

    // Restores the original test order after the iteration.  This
    // allows the user to quickly repro a failure that happens in the
    // N-th iteration without repeating the first (N - 1) iterations.
    // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
    // case the user somehow changes the value of the flag somewhere
    // (it's always safe to unshuffle the tests).
    UnshuffleTests();

    if (GTEST_FLAG(shuffle)) {
      // Picks a new random seed for each iteration.
      random_seed_ = GetNextRandomSeed(random_seed_);
    }
  }

  repeater->OnTestProgramEnd(*parent_);

  return !failed;
}

// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
// if the variable is present. If a file already exists at this location, this
// function will write over it. If the variable is present, but the file cannot
// be created, prints an error and exits.
void WriteToShardStatusFileIfNeeded() {
  const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
  if (test_shard_file != NULL) {
    FILE* const file = posix::FOpen(test_shard_file, "w");
    if (file == NULL) {
      ColoredPrintf(COLOR_RED,
                    "Could not write to the test shard status file \"%s\" "
                    "specified by the %s environment variable.\n",
                    test_shard_file, kTestShardStatusFile);
      fflush(stdout);
      exit(EXIT_FAILURE);
    }
    fclose(file);
  }
}

// Checks whether sharding is enabled by examining the relevant
// environment variable values. If the variables are present,
// but inconsistent (i.e., shard_index >= total_shards), prints
// an error and exits. If in_subprocess_for_death_test, sharding is
// disabled because it must only be applied to the original test
// process. Otherwise, we could filter out death tests we intended to execute.
bool ShouldShard(const char* total_shards_env,
                 const char* shard_index_env,
                 bool in_subprocess_for_death_test) {
  if (in_subprocess_for_death_test) {
    return false;
  }

  const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
  const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);

  if (total_shards == -1 && shard_index == -1) {
    return false;
  } else if (total_shards == -1 && shard_index != -1) {
    const Message msg = Message()
      << "Invalid environment variables: you have "
      << kTestShardIndex << " = " << shard_index
      << ", but have left " << kTestTotalShards << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  } else if (total_shards != -1 && shard_index == -1) {
    const Message msg = Message()
      << "Invalid environment variables: you have "
      << kTestTotalShards << " = " << total_shards
      << ", but have left " << kTestShardIndex << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  } else if (shard_index < 0 || shard_index >= total_shards) {
    const Message msg = Message()
      << "Invalid environment variables: we require 0 <= "
      << kTestShardIndex << " < " << kTestTotalShards
      << ", but you have " << kTestShardIndex << "=" << shard_index
      << ", " << kTestTotalShards << "=" << total_shards << ".\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  }

  return total_shards > 1;
}

// Parses the environment variable var as an Int32. If it is unset,
// returns default_val. If it is not an Int32, prints an error
// and aborts.
Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
  const char* str_val = posix::GetEnv(var);
  if (str_val == NULL) {
    return default_val;
  }

  Int32 result;
  if (!ParseInt32(Message() << "The value of environment variable " << var,
                  str_val, &result)) {
    exit(EXIT_FAILURE);
  }
  return result;
}

// Given the total number of shards, the shard index, and the test id,
// returns true iff the test should be run on this shard. The test id is
// some arbitrary but unique non-negative integer assigned to each test
// method. Assumes that 0 <= shard_index < total_shards.
bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
  return (test_id % total_shards) == shard_index;
}

// Compares the name of each test with the user-specified filter to
// decide whether the test should be run, then records the result in
// each TestCase and TestInfo object.
// If shard_tests == true, further filters tests based on sharding
// variables in the environment - see
// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide.
// Returns the number of tests that should run.
int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
  const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
      Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
  const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
      Int32FromEnvOrDie(kTestShardIndex, -1) : -1;

  // num_runnable_tests are the number of tests that will
  // run across all shards (i.e., match filter and are not disabled).
  // num_selected_tests are the number of tests to be run on
  // this shard.
  int num_runnable_tests = 0;
  int num_selected_tests = 0;
  for (size_t i = 0; i < test_cases_.size(); i++) {
    TestCase* const test_case = test_cases_[i];
    const String &test_case_name = test_case->name();
    test_case->set_should_run(false);

    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
      TestInfo* const test_info = test_case->test_info_list()[j];
      const String test_name(test_info->name());
      // A test is disabled if test case name or test name matches
      // kDisableTestFilter.
      const bool is_disabled =
          internal::UnitTestOptions::MatchesFilter(test_case_name,
                                                   kDisableTestFilter) ||
          internal::UnitTestOptions::MatchesFilter(test_name,
                                                   kDisableTestFilter);
      test_info->is_disabled_ = is_disabled;

      const bool matches_filter =
          internal::UnitTestOptions::FilterMatchesTest(test_case_name,
                                                       test_name);
      test_info->matches_filter_ = matches_filter;

      const bool is_runnable =
          (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
          matches_filter;

      const bool is_selected = is_runnable &&
          (shard_tests == IGNORE_SHARDING_PROTOCOL ||
           ShouldRunTestOnShard(total_shards, shard_index,
                                num_runnable_tests));

      num_runnable_tests += is_runnable;
      num_selected_tests += is_selected;

      test_info->should_run_ = is_selected;
      test_case->set_should_run(test_case->should_run() || is_selected);
    }
  }
  return num_selected_tests;
}

// Prints the names of the tests matching the user-specified filter flag.
void UnitTestImpl::ListTestsMatchingFilter() {
  for (size_t i = 0; i < test_cases_.size(); i++) {
    const TestCase* const test_case = test_cases_[i];
    bool printed_test_case_name = false;

    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
      const TestInfo* const test_info =
          test_case->test_info_list()[j];
      if (test_info->matches_filter_) {
        if (!printed_test_case_name) {
          printed_test_case_name = true;
          printf("%s.\n", test_case->name());
        }
        printf("  %s\n", test_info->name());
      }
    }
  }
  fflush(stdout);
}

// Sets the OS stack trace getter.
//
// Does nothing if the input and the current OS stack trace getter are
// the same; otherwise, deletes the old getter and makes the input the
// current getter.
void UnitTestImpl::set_os_stack_trace_getter(
    OsStackTraceGetterInterface* getter) {
  if (os_stack_trace_getter_ != getter) {
    delete os_stack_trace_getter_;
    os_stack_trace_getter_ = getter;
  }
}

// Returns the current OS stack trace getter if it is not NULL;
// otherwise, creates an OsStackTraceGetter, makes it the current
// getter, and returns it.
OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
  if (os_stack_trace_getter_ == NULL) {
    os_stack_trace_getter_ = new OsStackTraceGetter;
  }

  return os_stack_trace_getter_;
}

// Returns the TestResult for the test that's currently running, or
// the TestResult for the ad hoc test if no test is running.
TestResult* UnitTestImpl::current_test_result() {
  return current_test_info_ ?
      &(current_test_info_->result_) : &ad_hoc_test_result_;
}

// Shuffles all test cases, and the tests within each test case,
// making sure that death tests are still run first.
void UnitTestImpl::ShuffleTests() {
  // Shuffles the death test cases.
  ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_);

  // Shuffles the non-death test cases.
  ShuffleRange(random(), last_death_test_case_ + 1,
               static_cast<int>(test_cases_.size()), &test_case_indices_);

  // Shuffles the tests inside each test case.
  for (size_t i = 0; i < test_cases_.size(); i++) {
    test_cases_[i]->ShuffleTests(random());
  }
}

// Restores the test cases and tests to their order before the first shuffle.
void UnitTestImpl::UnshuffleTests() {
  for (size_t i = 0; i < test_cases_.size(); i++) {
    // Unshuffles the tests in each test case.
    test_cases_[i]->UnshuffleTests();
    // Resets the index of each test case.
    test_case_indices_[i] = static_cast<int>(i);
  }
}

// Returns the current OS stack trace as a String.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
String GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/,
                                       int skip_count) {
  // We pass skip_count + 1 to skip this wrapper function in addition
  // to what the user really wants to skip.
  return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
}

// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to
// suppress unreachable code warnings.
namespace {
class ClassUniqueToAlwaysTrue {};
}

bool IsTrue(bool condition) { return condition; }

bool AlwaysTrue() {
#if GTEST_HAS_EXCEPTIONS
  // This condition is always false so AlwaysTrue() never actually throws,
  // but it makes the compiler think that it may throw.
  if (IsTrue(false))
    throw ClassUniqueToAlwaysTrue();
#endif  // GTEST_HAS_EXCEPTIONS
  return true;
}

// If *pstr starts with the given prefix, modifies *pstr to be right
// past the prefix and returns true; otherwise leaves *pstr unchanged
// and returns false.  None of pstr, *pstr, and prefix can be NULL.
bool SkipPrefix(const char* prefix, const char** pstr) {
  const size_t prefix_len = strlen(prefix);
  if (strncmp(*pstr, prefix, prefix_len) == 0) {
    *pstr += prefix_len;
    return true;
  }
  return false;
}

// Parses a string as a command line flag.  The string should have
// the format "--flag=value".  When def_optional is true, the "=value"
// part can be omitted.
//
// Returns the value of the flag, or NULL if the parsing failed.
const char* ParseFlagValue(const char* str,
                           const char* flag,
                           bool def_optional) {
  // str and flag must not be NULL.
  if (str == NULL || flag == NULL) return NULL;

  // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
  const String flag_str = String::Format("--%s%s", GTEST_FLAG_PREFIX_, flag);
  const size_t flag_len = flag_str.length();
  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;

  // Skips the flag name.
  const char* flag_end = str + flag_len;

  // When def_optional is true, it's OK to not have a "=value" part.
  if (def_optional && (flag_end[0] == '\0')) {
    return flag_end;
  }

  // If def_optional is true and there are more characters after the
  // flag name, or if def_optional is false, there must be a '=' after
  // the flag name.
  if (flag_end[0] != '=') return NULL;

  // Returns the string after "=".
  return flag_end + 1;
}

// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
// In the former case, the value is taken as true as long as it does
// not start with '0', 'f', or 'F'.
//
// In the latter case, the value is taken as true.
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, true);

  // Aborts if the parsing failed.
  if (value_str == NULL) return false;

  // Converts the string value to a bool.
  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
  return true;
}

// Parses a string for an Int32 flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
  if (value_str == NULL) return false;

  // Sets *value to the value of the flag.
  return ParseInt32(Message() << "The value of flag --" << flag,
                    value_str, value);
}

// Parses a string for a string flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseStringFlag(const char* str, const char* flag, String* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
  if (value_str == NULL) return false;

  // Sets *value to the value of the flag.
  *value = value_str;
  return true;
}

// Determines whether a string has a prefix that Google Test uses for its
// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
// If Google Test detects that a command line flag has its prefix but is not
// recognized, it will print its help message. Flags starting with
// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
// internal flags and do not trigger the help message.
static bool HasGoogleTestFlagPrefix(const char* str) {
  return (SkipPrefix("--", &str) ||
          SkipPrefix("-", &str) ||
          SkipPrefix("/", &str)) &&
         !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
         (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
          SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
}

// Prints a string containing code-encoded text.  The following escape
// sequences can be used in the string to control the text color:
//
//   @@    prints a single '@' character.
//   @R    changes the color to red.
//   @G    changes the color to green.
//   @Y    changes the color to yellow.
//   @D    changes to the default terminal text color.
//
// TODO(wan@google.com): Write tests for this once we add stdout
// capturing to Google Test.
static void PrintColorEncoded(const char* str) {
  GTestColor color = COLOR_DEFAULT;  // The current color.

  // Conceptually, we split the string into segments divided by escape
  // sequences.  Then we print one segment at a time.  At the end of
  // each iteration, the str pointer advances to the beginning of the
  // next segment.
  for (;;) {
    const char* p = strchr(str, '@');
    if (p == NULL) {
      ColoredPrintf(color, "%s", str);
      return;
    }

    ColoredPrintf(color, "%s", String(str, p - str).c_str());

    const char ch = p[1];
    str = p + 2;
    if (ch == '@') {
      ColoredPrintf(color, "@");
    } else if (ch == 'D') {
      color = COLOR_DEFAULT;
    } else if (ch == 'R') {
      color = COLOR_RED;
    } else if (ch == 'G') {
      color = COLOR_GREEN;
    } else if (ch == 'Y') {
      color = COLOR_YELLOW;
    } else {
      --str;
    }
  }
}

static const char kColorEncodedHelpMessage[] =
"This program contains tests written using " GTEST_NAME_ ". You can use the\n"
"following command line flags to control its behavior:\n"
"\n"
"Test Selection:\n"
"  @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
"      List the names of all tests instead of running them. The name of\n"
"      TEST(Foo, Bar) is \"Foo.Bar\".\n"
"  @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
    "[@G-@YNEGATIVE_PATTERNS]@D\n"
"      Run only the tests whose name matches one of the positive patterns but\n"
"      none of the negative patterns. '?' matches any single character; '*'\n"
"      matches any substring; ':' separates two patterns.\n"
"  @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
"      Run all disabled tests too.\n"
"\n"
"Test Execution:\n"
"  @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
"      Run the tests repeatedly; use a negative count to repeat forever.\n"
"  @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
"      Randomize tests' orders on every iteration.\n"
"  @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
"      Random number seed to use for shuffling test orders (between 1 and\n"
"      99999, or 0 to use a seed based on the current time).\n"
"\n"
"Test Output:\n"
"  @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
"      Enable/disable colored output. The default is @Gauto@D.\n"
"  -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
"      Don't print the elapsed time of each test.\n"
"  @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G"
    GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
"      Generate an XML report in the given directory or with the given file\n"
"      name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n"
#if GTEST_CAN_STREAM_RESULTS_
"  @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n"
"      Stream test results to the given server.\n"
#endif  // GTEST_CAN_STREAM_RESULTS_
"\n"
"Assertion Behavior:\n"
#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
"  @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
"      Set the default death test style.\n"
#endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
"  @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
"      Turn assertion failures into debugger break-points.\n"
"  @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
"      Turn assertion failures into C++ exceptions.\n"
"  @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n"
"      Do not report exceptions as test failures. Instead, allow them\n"
"      to crash the program or throw a pop-up (on Windows).\n"
"\n"
"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
    "the corresponding\n"
"environment variable of a flag (all letters in upper-case). For example, to\n"
"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
    "color=no@D or set\n"
"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
"\n"
"For more information, please read the " GTEST_NAME_ " documentation at\n"
"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
"(not one in your own code or tests), please report it to\n"
"@G<" GTEST_DEV_EMAIL_ ">@D.\n";

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.  The type parameter CharType can be
// instantiated to either char or wchar_t.
template <typename CharType>
void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
  for (int i = 1; i < *argc; i++) {
    const String arg_string = StreamableToString(argv[i]);
    const char* const arg = arg_string.c_str();

    using internal::ParseBoolFlag;
    using internal::ParseInt32Flag;
    using internal::ParseStringFlag;

    // Do we see a Google Test flag?
    if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
                      &GTEST_FLAG(also_run_disabled_tests)) ||
        ParseBoolFlag(arg, kBreakOnFailureFlag,
                      &GTEST_FLAG(break_on_failure)) ||
        ParseBoolFlag(arg, kCatchExceptionsFlag,
                      &GTEST_FLAG(catch_exceptions)) ||
        ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
        ParseStringFlag(arg, kDeathTestStyleFlag,
                        &GTEST_FLAG(death_test_style)) ||
        ParseBoolFlag(arg, kDeathTestUseFork,
                      &GTEST_FLAG(death_test_use_fork)) ||
        ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
        ParseStringFlag(arg, kInternalRunDeathTestFlag,
                        &GTEST_FLAG(internal_run_death_test)) ||
        ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
        ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
        ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
        ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
        ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
        ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
        ParseInt32Flag(arg, kStackTraceDepthFlag,
                       &GTEST_FLAG(stack_trace_depth)) ||
        ParseStringFlag(arg, kStreamResultToFlag,
                        &GTEST_FLAG(stream_result_to)) ||
        ParseBoolFlag(arg, kThrowOnFailureFlag,
                      &GTEST_FLAG(throw_on_failure))
        ) {
      // Yes.  Shift the remainder of the argv list left by one.  Note
      // that argv has (*argc + 1) elements, the last one always being
      // NULL.  The following loop moves the trailing NULL element as
      // well.
      for (int j = i; j != *argc; j++) {
        argv[j] = argv[j + 1];
      }

      // Decrements the argument count.
      (*argc)--;

      // We also need to decrement the iterator as we just removed
      // an element.
      i--;
    } else if (arg_string == "--help" || arg_string == "-h" ||
               arg_string == "-?" || arg_string == "/?" ||
               HasGoogleTestFlagPrefix(arg)) {
      // Both help flag and unrecognized Google Test flags (excluding
      // internal ones) trigger help display.
      g_help_flag = true;
    }
  }

  if (g_help_flag) {
    // We print the help here instead of in RUN_ALL_TESTS(), as the
    // latter may not be called at all if the user is using Google
    // Test with another testing framework.
    PrintColorEncoded(kColorEncodedHelpMessage);
  }
}

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.
void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
  ParseGoogleTestFlagsOnlyImpl(argc, argv);
}
void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
  ParseGoogleTestFlagsOnlyImpl(argc, argv);
}

// The internal implementation of InitGoogleTest().
//
// The type parameter CharType can be instantiated to either char or
// wchar_t.
template <typename CharType>
void InitGoogleTestImpl(int* argc, CharType** argv) {
  g_init_gtest_count++;

  // We don't want to run the initialization code twice.
  if (g_init_gtest_count != 1) return;

  if (*argc <= 0) return;

  internal::g_executable_path = internal::StreamableToString(argv[0]);

#if GTEST_HAS_DEATH_TEST

  g_argvs.clear();
  for (int i = 0; i != *argc; i++) {
    g_argvs.push_back(StreamableToString(argv[i]));
  }

#endif  // GTEST_HAS_DEATH_TEST

  ParseGoogleTestFlagsOnly(argc, argv);
  GetUnitTestImpl()->PostFlagParsingInit();
}

}  // namespace internal

// Initializes Google Test.  This must be called before calling
// RUN_ALL_TESTS().  In particular, it parses a command line for the
// flags that Google Test recognizes.  Whenever a Google Test flag is
// seen, it is removed from argv, and *argc is decremented.
//
// No value is returned.  Instead, the Google Test flag variables are
// updated.
//
// Calling the function for the second time has no user-visible effect.
void InitGoogleTest(int* argc, char** argv) {
  internal::InitGoogleTestImpl(argc, argv);
}

// This overloaded version can be used in Windows programs compiled in
// UNICODE mode.
void InitGoogleTest(int* argc, wchar_t** argv) {
  internal::InitGoogleTestImpl(argc, argv);
}

}  // namespace testing
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev)
//
// This file implements death tests.


#if GTEST_HAS_DEATH_TEST

# if GTEST_OS_MAC
#  include <crt_externs.h>
# endif  // GTEST_OS_MAC

# include <errno.h>
# include <fcntl.h>
# include <limits.h>
# include <stdarg.h>

# if GTEST_OS_WINDOWS
#  include <windows.h>
# else
#  include <sys/mman.h>
#  include <sys/wait.h>
# endif  // GTEST_OS_WINDOWS

#endif  // GTEST_HAS_DEATH_TEST


// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#undef GTEST_IMPLEMENTATION_

namespace testing {

// Constants.

// The default death test style.
static const char kDefaultDeathTestStyle[] = "fast";

GTEST_DEFINE_string_(
    death_test_style,
    internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle),
    "Indicates how to run a death test in a forked child process: "
    "\"threadsafe\" (child process re-executes the test binary "
    "from the beginning, running only the specific death test) or "
    "\"fast\" (child process runs the death test immediately "
    "after forking).");

GTEST_DEFINE_bool_(
    death_test_use_fork,
    internal::BoolFromGTestEnv("death_test_use_fork", false),
    "Instructs to use fork()/_exit() instead of clone() in death tests. "
    "Ignored and always uses fork() on POSIX systems where clone() is not "
    "implemented. Useful when running under valgrind or similar tools if "
    "those do not support clone(). Valgrind 3.3.1 will just fail if "
    "it sees an unsupported combination of clone() flags. "
    "It is not recommended to use this flag w/o valgrind though it will "
    "work in 99% of the cases. Once valgrind is fixed, this flag will "
    "most likely be removed.");

namespace internal {
GTEST_DEFINE_string_(
    internal_run_death_test, "",
    "Indicates the file, line number, temporal index of "
    "the single death test to run, and a file descriptor to "
    "which a success code may be sent, all separated by "
    "colons.  This flag is specified if and only if the current "
    "process is a sub-process launched for running a thread-safe "
    "death test.  FOR INTERNAL USE ONLY.");
}  // namespace internal

#if GTEST_HAS_DEATH_TEST

// ExitedWithCode constructor.
ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {
}

// ExitedWithCode function-call operator.
bool ExitedWithCode::operator()(int exit_status) const {
# if GTEST_OS_WINDOWS

  return exit_status == exit_code_;

# else

  return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;

# endif  // GTEST_OS_WINDOWS
}

# if !GTEST_OS_WINDOWS
// KilledBySignal constructor.
KilledBySignal::KilledBySignal(int signum) : signum_(signum) {
}

// KilledBySignal function-call operator.
bool KilledBySignal::operator()(int exit_status) const {
  return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
}
# endif  // !GTEST_OS_WINDOWS

namespace internal {

// Utilities needed for death tests.

// Generates a textual description of a given exit code, in the format
// specified by wait(2).
static String ExitSummary(int exit_code) {
  Message m;

# if GTEST_OS_WINDOWS

  m << "Exited with exit status " << exit_code;

# else

  if (WIFEXITED(exit_code)) {
    m << "Exited with exit status " << WEXITSTATUS(exit_code);
  } else if (WIFSIGNALED(exit_code)) {
    m << "Terminated by signal " << WTERMSIG(exit_code);
  }
#  ifdef WCOREDUMP
  if (WCOREDUMP(exit_code)) {
    m << " (core dumped)";
  }
#  endif
# endif  // GTEST_OS_WINDOWS

  return m.GetString();
}

// Returns true if exit_status describes a process that was terminated
// by a signal, or exited normally with a nonzero exit code.
bool ExitedUnsuccessfully(int exit_status) {
  return !ExitedWithCode(0)(exit_status);
}

# if !GTEST_OS_WINDOWS
// Generates a textual failure message when a death test finds more than
// one thread running, or cannot determine the number of threads, prior
// to executing the given statement.  It is the responsibility of the
// caller not to pass a thread_count of 1.
static String DeathTestThreadWarning(size_t thread_count) {
  Message msg;
  msg << "Death tests use fork(), which is unsafe particularly"
      << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
  if (thread_count == 0)
    msg << "couldn't detect the number of threads.";
  else
    msg << "detected " << thread_count << " threads.";
  return msg.GetString();
}
# endif  // !GTEST_OS_WINDOWS

// Flag characters for reporting a death test that did not die.
static const char kDeathTestLived = 'L';
static const char kDeathTestReturned = 'R';
static const char kDeathTestThrew = 'T';
static const char kDeathTestInternalError = 'I';

// An enumeration describing all of the possible ways that a death test can
// conclude.  DIED means that the process died while executing the test
// code; LIVED means that process lived beyond the end of the test code;
// RETURNED means that the test statement attempted to execute a return
// statement, which is not allowed; THREW means that the test statement
// returned control by throwing an exception.  IN_PROGRESS means the test
// has not yet concluded.
// TODO(vladl@google.com): Unify names and possibly values for
// AbortReason, DeathTestOutcome, and flag characters above.
enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };

// Routine for aborting the program which is safe to call from an
// exec-style death test child process, in which case the error
// message is propagated back to the parent process.  Otherwise, the
// message is simply printed to stderr.  In either case, the program
// then exits with status 1.
void DeathTestAbort(const String& message) {
  // On a POSIX system, this function may be called from a threadsafe-style
  // death test child process, which operates on a very small stack.  Use
  // the heap for any additional non-minuscule memory requirements.
  const InternalRunDeathTestFlag* const flag =
      GetUnitTestImpl()->internal_run_death_test_flag();
  if (flag != NULL) {
    FILE* parent = posix::FDOpen(flag->write_fd(), "w");
    fputc(kDeathTestInternalError, parent);
    fprintf(parent, "%s", message.c_str());
    fflush(parent);
    _exit(1);
  } else {
    fprintf(stderr, "%s", message.c_str());
    fflush(stderr);
    posix::Abort();
  }
}

// A replacement for CHECK that calls DeathTestAbort if the assertion
// fails.
# define GTEST_DEATH_TEST_CHECK_(expression) \
  do { \
    if (!::testing::internal::IsTrue(expression)) { \
      DeathTestAbort(::testing::internal::String::Format( \
          "CHECK failed: File %s, line %d: %s", \
          __FILE__, __LINE__, #expression)); \
    } \
  } while (::testing::internal::AlwaysFalse())

// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
// evaluating any system call that fulfills two conditions: it must return
// -1 on failure, and set errno to EINTR when it is interrupted and
// should be tried again.  The macro expands to a loop that repeatedly
// evaluates the expression as long as it evaluates to -1 and sets
// errno to EINTR.  If the expression evaluates to -1 but errno is
// something other than EINTR, DeathTestAbort is called.
# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \
  do { \
    int gtest_retval; \
    do { \
      gtest_retval = (expression); \
    } while (gtest_retval == -1 && errno == EINTR); \
    if (gtest_retval == -1) { \
      DeathTestAbort(::testing::internal::String::Format( \
          "CHECK failed: File %s, line %d: %s != -1", \
          __FILE__, __LINE__, #expression)); \
    } \
  } while (::testing::internal::AlwaysFalse())

// Returns the message describing the last system error in errno.
String GetLastErrnoDescription() {
    return String(errno == 0 ? "" : posix::StrError(errno));
}

// This is called from a death test parent process to read a failure
// message from the death test child process and log it with the FATAL
// severity. On Windows, the message is read from a pipe handle. On other
// platforms, it is read from a file descriptor.
static void FailFromInternalError(int fd) {
  Message error;
  char buffer[256];
  int num_read;

  do {
    while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
      buffer[num_read] = '\0';
      error << buffer;
    }
  } while (num_read == -1 && errno == EINTR);

  if (num_read == 0) {
    GTEST_LOG_(FATAL) << error.GetString();
  } else {
    const int last_error = errno;
    GTEST_LOG_(FATAL) << "Error while reading death test internal: "
                      << GetLastErrnoDescription() << " [" << last_error << "]";
  }
}

// Death test constructor.  Increments the running death test count
// for the current test.
DeathTest::DeathTest() {
  TestInfo* const info = GetUnitTestImpl()->current_test_info();
  if (info == NULL) {
    DeathTestAbort("Cannot run a death test outside of a TEST or "
                   "TEST_F construct");
  }
}

// Creates and returns a death test by dispatching to the current
// death test factory.
bool DeathTest::Create(const char* statement, const RE* regex,
                       const char* file, int line, DeathTest** test) {
  return GetUnitTestImpl()->death_test_factory()->Create(
      statement, regex, file, line, test);
}

const char* DeathTest::LastMessage() {
  return last_death_test_message_.c_str();
}

void DeathTest::set_last_death_test_message(const String& message) {
  last_death_test_message_ = message;
}

String DeathTest::last_death_test_message_;

// Provides cross platform implementation for some death functionality.
class DeathTestImpl : public DeathTest {
 protected:
  DeathTestImpl(const char* a_statement, const RE* a_regex)
      : statement_(a_statement),
        regex_(a_regex),
        spawned_(false),
        status_(-1),
        outcome_(IN_PROGRESS),
        read_fd_(-1),
        write_fd_(-1) {}

  // read_fd_ is expected to be closed and cleared by a derived class.
  ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }

  void Abort(AbortReason reason);
  virtual bool Passed(bool status_ok);

  const char* statement() const { return statement_; }
  const RE* regex() const { return regex_; }
  bool spawned() const { return spawned_; }
  void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
  int status() const { return status_; }
  void set_status(int a_status) { status_ = a_status; }
  DeathTestOutcome outcome() const { return outcome_; }
  void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; }
  int read_fd() const { return read_fd_; }
  void set_read_fd(int fd) { read_fd_ = fd; }
  int write_fd() const { return write_fd_; }
  void set_write_fd(int fd) { write_fd_ = fd; }

  // Called in the parent process only. Reads the result code of the death
  // test child process via a pipe, interprets it to set the outcome_
  // member, and closes read_fd_.  Outputs diagnostics and terminates in
  // case of unexpected codes.
  void ReadAndInterpretStatusByte();

 private:
  // The textual content of the code this object is testing.  This class
  // doesn't own this string and should not attempt to delete it.
  const char* const statement_;
  // The regular expression which test output must match.  DeathTestImpl
  // doesn't own this object and should not attempt to delete it.
  const RE* const regex_;
  // True if the death test child process has been successfully spawned.
  bool spawned_;
  // The exit status of the child process.
  int status_;
  // How the death test concluded.
  DeathTestOutcome outcome_;
  // Descriptor to the read end of the pipe to the child process.  It is
  // always -1 in the child process.  The child keeps its write end of the
  // pipe in write_fd_.
  int read_fd_;
  // Descriptor to the child's write end of the pipe to the parent process.
  // It is always -1 in the parent process.  The parent keeps its end of the
  // pipe in read_fd_.
  int write_fd_;
};

// Called in the parent process only. Reads the result code of the death
// test child process via a pipe, interprets it to set the outcome_
// member, and closes read_fd_.  Outputs diagnostics and terminates in
// case of unexpected codes.
void DeathTestImpl::ReadAndInterpretStatusByte() {
  char flag;
  int bytes_read;

  // The read() here blocks until data is available (signifying the
  // failure of the death test) or until the pipe is closed (signifying
  // its success), so it's okay to call this in the parent before
  // the child process has exited.
  do {
    bytes_read = posix::Read(read_fd(), &flag, 1);
  } while (bytes_read == -1 && errno == EINTR);

  if (bytes_read == 0) {
    set_outcome(DIED);
  } else if (bytes_read == 1) {
    switch (flag) {
      case kDeathTestReturned:
        set_outcome(RETURNED);
        break;
      case kDeathTestThrew:
        set_outcome(THREW);
        break;
      case kDeathTestLived:
        set_outcome(LIVED);
        break;
      case kDeathTestInternalError:
        FailFromInternalError(read_fd());  // Does not return.
        break;
      default:
        GTEST_LOG_(FATAL) << "Death test child process reported "
                          << "unexpected status byte ("
                          << static_cast<unsigned int>(flag) << ")";
    }
  } else {
    GTEST_LOG_(FATAL) << "Read from death test child process failed: "
                      << GetLastErrnoDescription();
  }
  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
  set_read_fd(-1);
}

// Signals that the death test code which should have exited, didn't.
// Should be called only in a death test child process.
// Writes a status byte to the child's status file descriptor, then
// calls _exit(1).
void DeathTestImpl::Abort(AbortReason reason) {
  // The parent process considers the death test to be a failure if
  // it finds any data in our pipe.  So, here we write a single flag byte
  // to the pipe, then exit.
  const char status_ch =
      reason == TEST_DID_NOT_DIE ? kDeathTestLived :
      reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned;

  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
  // We are leaking the descriptor here because on some platforms (i.e.,
  // when built as Windows DLL), destructors of global objects will still
  // run after calling _exit(). On such systems, write_fd_ will be
  // indirectly closed from the destructor of UnitTestImpl, causing double
  // close if it is also closed here. On debug configurations, double close
  // may assert. As there are no in-process buffers to flush here, we are
  // relying on the OS to close the descriptor after the process terminates
  // when the destructors are not run.
  _exit(1);  // Exits w/o any normal exit hooks (we were supposed to crash)
}

// Returns an indented copy of stderr output for a death test.
// This makes distinguishing death test output lines from regular log lines
// much easier.
static ::std::string FormatDeathTestOutput(const ::std::string& output) {
  ::std::string ret;
  for (size_t at = 0; ; ) {
    const size_t line_end = output.find('\n', at);
    ret += "[  DEATH   ] ";
    if (line_end == ::std::string::npos) {
      ret += output.substr(at);
      break;
    }
    ret += output.substr(at, line_end + 1 - at);
    at = line_end + 1;
  }
  return ret;
}

// Assesses the success or failure of a death test, using both private
// members which have previously been set, and one argument:
//
// Private data members:
//   outcome:  An enumeration describing how the death test
//             concluded: DIED, LIVED, THREW, or RETURNED.  The death test
//             fails in the latter three cases.
//   status:   The exit status of the child process. On *nix, it is in the
//             in the format specified by wait(2). On Windows, this is the
//             value supplied to the ExitProcess() API or a numeric code
//             of the exception that terminated the program.
//   regex:    A regular expression object to be applied to
//             the test's captured standard error output; the death test
//             fails if it does not match.
//
// Argument:
//   status_ok: true if exit_status is acceptable in the context of
//              this particular death test, which fails if it is false
//
// Returns true iff all of the above conditions are met.  Otherwise, the
// first failing condition, in the order given above, is the one that is
// reported. Also sets the last death test message string.
bool DeathTestImpl::Passed(bool status_ok) {
  if (!spawned())
    return false;

  const String error_message = GetCapturedStderr();

  bool success = false;
  Message buffer;

  buffer << "Death test: " << statement() << "\n";
  switch (outcome()) {
    case LIVED:
      buffer << "    Result: failed to die.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;
    case THREW:
      buffer << "    Result: threw an exception.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;
    case RETURNED:
      buffer << "    Result: illegal return in test statement.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;
    case DIED:
      if (status_ok) {
        const bool matched = RE::PartialMatch(error_message.c_str(), *regex());
        if (matched) {
          success = true;
        } else {
          buffer << "    Result: died but not with expected error.\n"
                 << "  Expected: " << regex()->pattern() << "\n"
                 << "Actual msg:\n" << FormatDeathTestOutput(error_message);
        }
      } else {
        buffer << "    Result: died but not with expected exit code:\n"
               << "            " << ExitSummary(status()) << "\n"
               << "Actual msg:\n" << FormatDeathTestOutput(error_message);
      }
      break;
    case IN_PROGRESS:
    default:
      GTEST_LOG_(FATAL)
          << "DeathTest::Passed somehow called before conclusion of test";
  }

  DeathTest::set_last_death_test_message(buffer.GetString());
  return success;
}

# if GTEST_OS_WINDOWS
// WindowsDeathTest implements death tests on Windows. Due to the
// specifics of starting new processes on Windows, death tests there are
// always threadsafe, and Google Test considers the
// --gtest_death_test_style=fast setting to be equivalent to
// --gtest_death_test_style=threadsafe there.
//
// A few implementation notes:  Like the Linux version, the Windows
// implementation uses pipes for child-to-parent communication. But due to
// the specifics of pipes on Windows, some extra steps are required:
//
// 1. The parent creates a communication pipe and stores handles to both
//    ends of it.
// 2. The parent starts the child and provides it with the information
//    necessary to acquire the handle to the write end of the pipe.
// 3. The child acquires the write end of the pipe and signals the parent
//    using a Windows event.
// 4. Now the parent can release the write end of the pipe on its side. If
//    this is done before step 3, the object's reference count goes down to
//    0 and it is destroyed, preventing the child from acquiring it. The
//    parent now has to release it, or read operations on the read end of
//    the pipe will not return when the child terminates.
// 5. The parent reads child's output through the pipe (outcome code and
//    any possible error messages) from the pipe, and its stderr and then
//    determines whether to fail the test.
//
// Note: to distinguish Win32 API calls from the local method and function
// calls, the former are explicitly resolved in the global namespace.
//
class WindowsDeathTest : public DeathTestImpl {
 public:
  WindowsDeathTest(const char* a_statement,
                   const RE* a_regex,
                   const char* file,
                   int line)
      : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {}

  // All of these virtual functions are inherited from DeathTest.
  virtual int Wait();
  virtual TestRole AssumeRole();

 private:
  // The name of the file in which the death test is located.
  const char* const file_;
  // The line number on which the death test is located.
  const int line_;
  // Handle to the write end of the pipe to the child process.
  AutoHandle write_handle_;
  // Child process handle.
  AutoHandle child_handle_;
  // Event the child process uses to signal the parent that it has
  // acquired the handle to the write end of the pipe. After seeing this
  // event the parent can release its own handles to make sure its
  // ReadFile() calls return when the child terminates.
  AutoHandle event_handle_;
};

// Waits for the child in a death test to exit, returning its exit
// status, or 0 if no child process exists.  As a side effect, sets the
// outcome data member.
int WindowsDeathTest::Wait() {
  if (!spawned())
    return 0;

  // Wait until the child either signals that it has acquired the write end
  // of the pipe or it dies.
  const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() };
  switch (::WaitForMultipleObjects(2,
                                   wait_handles,
                                   FALSE,  // Waits for any of the handles.
                                   INFINITE)) {
    case WAIT_OBJECT_0:
    case WAIT_OBJECT_0 + 1:
      break;
    default:
      GTEST_DEATH_TEST_CHECK_(false);  // Should not get here.
  }

  // The child has acquired the write end of the pipe or exited.
  // We release the handle on our side and continue.
  write_handle_.Reset();
  event_handle_.Reset();

  ReadAndInterpretStatusByte();

  // Waits for the child process to exit if it haven't already. This
  // returns immediately if the child has already exited, regardless of
  // whether previous calls to WaitForMultipleObjects synchronized on this
  // handle or not.
  GTEST_DEATH_TEST_CHECK_(
      WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(),
                                             INFINITE));
  DWORD status_code;
  GTEST_DEATH_TEST_CHECK_(
      ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE);
  child_handle_.Reset();
  set_status(static_cast<int>(status_code));
  return status();
}

// The AssumeRole process for a Windows death test.  It creates a child
// process with the same executable as the current process to run the
// death test.  The child process is given the --gtest_filter and
// --gtest_internal_run_death_test flags such that it knows to run the
// current death test only.
DeathTest::TestRole WindowsDeathTest::AssumeRole() {
  const UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
      impl->internal_run_death_test_flag();
  const TestInfo* const info = impl->current_test_info();
  const int death_test_index = info->result()->death_test_count();

  if (flag != NULL) {
    // ParseInternalRunDeathTestFlag() has performed all the necessary
    // processing.
    set_write_fd(flag->write_fd());
    return EXECUTE_TEST;
  }

  // WindowsDeathTest uses an anonymous pipe to communicate results of
  // a death test.
  SECURITY_ATTRIBUTES handles_are_inheritable = {
    sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
  HANDLE read_handle, write_handle;
  GTEST_DEATH_TEST_CHECK_(
      ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable,
                   0)  // Default buffer size.
      != FALSE);
  set_read_fd(::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle),
                                O_RDONLY));
  write_handle_.Reset(write_handle);
  event_handle_.Reset(::CreateEvent(
      &handles_are_inheritable,
      TRUE,    // The event will automatically reset to non-signaled state.
      FALSE,   // The initial state is non-signalled.
      NULL));  // The even is unnamed.
  GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL);
  const String filter_flag = String::Format("--%s%s=%s.%s",
                                            GTEST_FLAG_PREFIX_, kFilterFlag,
                                            info->test_case_name(),
                                            info->name());
  const String internal_flag = String::Format(
    "--%s%s=%s|%d|%d|%u|%Iu|%Iu",
      GTEST_FLAG_PREFIX_,
      kInternalRunDeathTestFlag,
      file_, line_,
      death_test_index,
      static_cast<unsigned int>(::GetCurrentProcessId()),
      // size_t has the same with as pointers on both 32-bit and 64-bit
      // Windows platforms.
      // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
      reinterpret_cast<size_t>(write_handle),
      reinterpret_cast<size_t>(event_handle_.Get()));

  char executable_path[_MAX_PATH + 1];  // NOLINT
  GTEST_DEATH_TEST_CHECK_(
      _MAX_PATH + 1 != ::GetModuleFileNameA(NULL,
                                            executable_path,
                                            _MAX_PATH));

  String command_line = String::Format("%s %s \"%s\"",
                                       ::GetCommandLineA(),
                                       filter_flag.c_str(),
                                       internal_flag.c_str());

  DeathTest::set_last_death_test_message("");

  CaptureStderr();
  // Flush the log buffers since the log streams are shared with the child.
  FlushInfoLog();

  // The child process will share the standard handles with the parent.
  STARTUPINFOA startup_info;
  memset(&startup_info, 0, sizeof(STARTUPINFO));
  startup_info.dwFlags = STARTF_USESTDHANDLES;
  startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
  startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
  startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);

  PROCESS_INFORMATION process_info;
  GTEST_DEATH_TEST_CHECK_(::CreateProcessA(
      executable_path,
      const_cast<char*>(command_line.c_str()),
      NULL,   // Retuned process handle is not inheritable.
      NULL,   // Retuned thread handle is not inheritable.
      TRUE,   // Child inherits all inheritable handles (for write_handle_).
      0x0,    // Default creation flags.
      NULL,   // Inherit the parent's environment.
      UnitTest::GetInstance()->original_working_dir(),
      &startup_info,
      &process_info) != FALSE);
  child_handle_.Reset(process_info.hProcess);
  ::CloseHandle(process_info.hThread);
  set_spawned(true);
  return OVERSEE_TEST;
}
# else  // We are not on Windows.

// ForkingDeathTest provides implementations for most of the abstract
// methods of the DeathTest interface.  Only the AssumeRole method is
// left undefined.
class ForkingDeathTest : public DeathTestImpl {
 public:
  ForkingDeathTest(const char* statement, const RE* regex);

  // All of these virtual functions are inherited from DeathTest.
  virtual int Wait();

 protected:
  void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }

 private:
  // PID of child process during death test; 0 in the child process itself.
  pid_t child_pid_;
};

// Constructs a ForkingDeathTest.
ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex)
    : DeathTestImpl(a_statement, a_regex),
      child_pid_(-1) {}

// Waits for the child in a death test to exit, returning its exit
// status, or 0 if no child process exists.  As a side effect, sets the
// outcome data member.
int ForkingDeathTest::Wait() {
  if (!spawned())
    return 0;

  ReadAndInterpretStatusByte();

  int status_value;
  GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
  set_status(status_value);
  return status_value;
}

// A concrete death test class that forks, then immediately runs the test
// in the child process.
class NoExecDeathTest : public ForkingDeathTest {
 public:
  NoExecDeathTest(const char* a_statement, const RE* a_regex) :
      ForkingDeathTest(a_statement, a_regex) { }
  virtual TestRole AssumeRole();
};

// The AssumeRole process for a fork-and-run death test.  It implements a
// straightforward fork, with a simple pipe to transmit the status byte.
DeathTest::TestRole NoExecDeathTest::AssumeRole() {
  const size_t thread_count = GetThreadCount();
  if (thread_count != 1) {
    GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
  }

  int pipe_fd[2];
  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);

  DeathTest::set_last_death_test_message("");
  CaptureStderr();
  // When we fork the process below, the log file buffers are copied, but the
  // file descriptors are shared.  We flush all log files here so that closing
  // the file descriptors in the child process doesn't throw off the
  // synchronization between descriptors and buffers in the parent process.
  // This is as close to the fork as possible to avoid a race condition in case
  // there are multiple threads running before the death test, and another
  // thread writes to the log file.
  FlushInfoLog();

  const pid_t child_pid = fork();
  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
  set_child_pid(child_pid);
  if (child_pid == 0) {
    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
    set_write_fd(pipe_fd[1]);
    // Redirects all logging to stderr in the child process to prevent
    // concurrent writes to the log files.  We capture stderr in the parent
    // process and append the child process' output to a log.
    LogToStderr();
    // Event forwarding to the listeners of event listener API mush be shut
    // down in death test subprocesses.
    GetUnitTestImpl()->listeners()->SuppressEventForwarding();
    return EXECUTE_TEST;
  } else {
    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
    set_read_fd(pipe_fd[0]);
    set_spawned(true);
    return OVERSEE_TEST;
  }
}

// A concrete death test class that forks and re-executes the main
// program from the beginning, with command-line flags set that cause
// only this specific death test to be run.
class ExecDeathTest : public ForkingDeathTest {
 public:
  ExecDeathTest(const char* a_statement, const RE* a_regex,
                const char* file, int line) :
      ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
  virtual TestRole AssumeRole();
 private:
  // The name of the file in which the death test is located.
  const char* const file_;
  // The line number on which the death test is located.
  const int line_;
};

// Utility class for accumulating command-line arguments.
class Arguments {
 public:
  Arguments() {
    args_.push_back(NULL);
  }

  ~Arguments() {
    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
         ++i) {
      free(*i);
    }
  }
  void AddArgument(const char* argument) {
    args_.insert(args_.end() - 1, posix::StrDup(argument));
  }

  template <typename Str>
  void AddArguments(const ::std::vector<Str>& arguments) {
    for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
         i != arguments.end();
         ++i) {
      args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
    }
  }
  char* const* Argv() {
    return &args_[0];
  }
 private:
  std::vector<char*> args_;
};

// A struct that encompasses the arguments to the child process of a
// threadsafe-style death test process.
struct ExecDeathTestArgs {
  char* const* argv;  // Command-line arguments for the child's call to exec
  int close_fd;       // File descriptor to close; the read end of a pipe
};

#  if GTEST_OS_MAC
inline char** GetEnviron() {
  // When Google Test is built as a framework on MacOS X, the environ variable
  // is unavailable. Apple's documentation (man environ) recommends using
  // _NSGetEnviron() instead.
  return *_NSGetEnviron();
}
#  else
// Some POSIX platforms expect you to declare environ. extern "C" makes
// it reside in the global namespace.
extern "C" char** environ;
inline char** GetEnviron() { return environ; }
#  endif  // GTEST_OS_MAC

// The main function for a threadsafe-style death test child process.
// This function is called in a clone()-ed process and thus must avoid
// any potentially unsafe operations like malloc or libc functions.
static int ExecDeathTestChildMain(void* child_arg) {
  ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));

  // We need to execute the test program in the same environment where
  // it was originally invoked.  Therefore we change to the original
  // working directory first.
  const char* const original_dir =
      UnitTest::GetInstance()->original_working_dir();
  // We can safely call chdir() as it's a direct system call.
  if (chdir(original_dir) != 0) {
    DeathTestAbort(String::Format("chdir(\"%s\") failed: %s",
                                  original_dir,
                                  GetLastErrnoDescription().c_str()));
    return EXIT_FAILURE;
  }

  // We can safely call execve() as it's a direct system call.  We
  // cannot use execvp() as it's a libc function and thus potentially
  // unsafe.  Since execve() doesn't search the PATH, the user must
  // invoke the test program via a valid path that contains at least
  // one path separator.
  execve(args->argv[0], args->argv, GetEnviron());
  DeathTestAbort(String::Format("execve(%s, ...) in %s failed: %s",
                                args->argv[0],
                                original_dir,
                                GetLastErrnoDescription().c_str()));
  return EXIT_FAILURE;
}

// Two utility routines that together determine the direction the stack
// grows.
// This could be accomplished more elegantly by a single recursive
// function, but we want to guard against the unlikely possibility of
// a smart compiler optimizing the recursion away.
//
// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
// StackLowerThanAddress into StackGrowsDown, which then doesn't give
// correct answer.
bool StackLowerThanAddress(const void* ptr) GTEST_NO_INLINE_;
bool StackLowerThanAddress(const void* ptr) {
  int dummy;
  return &dummy < ptr;
}

bool StackGrowsDown() {
  int dummy;
  return StackLowerThanAddress(&dummy);
}

// A threadsafe implementation of fork(2) for threadsafe-style death tests
// that uses clone(2).  It dies with an error message if anything goes
// wrong.
static pid_t ExecDeathTestFork(char* const* argv, int close_fd) {
  ExecDeathTestArgs args = { argv, close_fd };
  pid_t child_pid = -1;

#  if GTEST_HAS_CLONE
  const bool use_fork = GTEST_FLAG(death_test_use_fork);

  if (!use_fork) {
    static const bool stack_grows_down = StackGrowsDown();
    const size_t stack_size = getpagesize();
    // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
    void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
                             MAP_ANON | MAP_PRIVATE, -1, 0);
    GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
    void* const stack_top =
        static_cast<char*>(stack) + (stack_grows_down ? stack_size : 0);

    child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);

    GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
  }
#  else
  const bool use_fork = true;
#  endif  // GTEST_HAS_CLONE

  if (use_fork && (child_pid = fork()) == 0) {
      ExecDeathTestChildMain(&args);
      _exit(0);
  }

  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
  return child_pid;
}

// The AssumeRole process for a fork-and-exec death test.  It re-executes the
// main program from the beginning, setting the --gtest_filter
// and --gtest_internal_run_death_test flags to cause only the current
// death test to be re-run.
DeathTest::TestRole ExecDeathTest::AssumeRole() {
  const UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
      impl->internal_run_death_test_flag();
  const TestInfo* const info = impl->current_test_info();
  const int death_test_index = info->result()->death_test_count();

  if (flag != NULL) {
    set_write_fd(flag->write_fd());
    return EXECUTE_TEST;
  }

  int pipe_fd[2];
  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
  // Clear the close-on-exec flag on the write end of the pipe, lest
  // it be closed when the child process does an exec:
  GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);

  const String filter_flag =
      String::Format("--%s%s=%s.%s",
                     GTEST_FLAG_PREFIX_, kFilterFlag,
                     info->test_case_name(), info->name());
  const String internal_flag =
      String::Format("--%s%s=%s|%d|%d|%d",
                     GTEST_FLAG_PREFIX_, kInternalRunDeathTestFlag,
                     file_, line_, death_test_index, pipe_fd[1]);
  Arguments args;
  args.AddArguments(GetArgvs());
  args.AddArgument(filter_flag.c_str());
  args.AddArgument(internal_flag.c_str());

  DeathTest::set_last_death_test_message("");

  CaptureStderr();
  // See the comment in NoExecDeathTest::AssumeRole for why the next line
  // is necessary.
  FlushInfoLog();

  const pid_t child_pid = ExecDeathTestFork(args.Argv(), pipe_fd[0]);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
  set_child_pid(child_pid);
  set_read_fd(pipe_fd[0]);
  set_spawned(true);
  return OVERSEE_TEST;
}

# endif  // !GTEST_OS_WINDOWS

// Creates a concrete DeathTest-derived class that depends on the
// --gtest_death_test_style flag, and sets the pointer pointed to
// by the "test" argument to its address.  If the test should be
// skipped, sets that pointer to NULL.  Returns true, unless the
// flag is set to an invalid value.
bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
                                     const char* file, int line,
                                     DeathTest** test) {
  UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
      impl->internal_run_death_test_flag();
  const int death_test_index = impl->current_test_info()
      ->increment_death_test_count();

  if (flag != NULL) {
    if (death_test_index > flag->index()) {
      DeathTest::set_last_death_test_message(String::Format(
          "Death test count (%d) somehow exceeded expected maximum (%d)",
          death_test_index, flag->index()));
      return false;
    }

    if (!(flag->file() == file && flag->line() == line &&
          flag->index() == death_test_index)) {
      *test = NULL;
      return true;
    }
  }

# if GTEST_OS_WINDOWS

  if (GTEST_FLAG(death_test_style) == "threadsafe" ||
      GTEST_FLAG(death_test_style) == "fast") {
    *test = new WindowsDeathTest(statement, regex, file, line);
  }

# else

  if (GTEST_FLAG(death_test_style) == "threadsafe") {
    *test = new ExecDeathTest(statement, regex, file, line);
  } else if (GTEST_FLAG(death_test_style) == "fast") {
    *test = new NoExecDeathTest(statement, regex);
  }

# endif  // GTEST_OS_WINDOWS

  else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
    DeathTest::set_last_death_test_message(String::Format(
        "Unknown death test style \"%s\" encountered",
        GTEST_FLAG(death_test_style).c_str()));
    return false;
  }

  return true;
}

// Splits a given string on a given delimiter, populating a given
// vector with the fields.  GTEST_HAS_DEATH_TEST implies that we have
// ::std::string, so we can use it here.
static void SplitString(const ::std::string& str, char delimiter,
                        ::std::vector< ::std::string>* dest) {
  ::std::vector< ::std::string> parsed;
  ::std::string::size_type pos = 0;
  while (::testing::internal::AlwaysTrue()) {
    const ::std::string::size_type colon = str.find(delimiter, pos);
    if (colon == ::std::string::npos) {
      parsed.push_back(str.substr(pos));
      break;
    } else {
      parsed.push_back(str.substr(pos, colon - pos));
      pos = colon + 1;
    }
  }
  dest->swap(parsed);
}

# if GTEST_OS_WINDOWS
// Recreates the pipe and event handles from the provided parameters,
// signals the event, and returns a file descriptor wrapped around the pipe
// handle. This function is called in the child process only.
int GetStatusFileDescriptor(unsigned int parent_process_id,
                            size_t write_handle_as_size_t,
                            size_t event_handle_as_size_t) {
  AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
                                                   FALSE,  // Non-inheritable.
                                                   parent_process_id));
  if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
    DeathTestAbort(String::Format("Unable to open parent process %u",
                                  parent_process_id));
  }

  // TODO(vladl@google.com): Replace the following check with a
  // compile-time assertion when available.
  GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));

  const HANDLE write_handle =
      reinterpret_cast<HANDLE>(write_handle_as_size_t);
  HANDLE dup_write_handle;

  // The newly initialized handle is accessible only in in the parent
  // process. To obtain one accessible within the child, we need to use
  // DuplicateHandle.
  if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
                         ::GetCurrentProcess(), &dup_write_handle,
                         0x0,    // Requested privileges ignored since
                                 // DUPLICATE_SAME_ACCESS is used.
                         FALSE,  // Request non-inheritable handler.
                         DUPLICATE_SAME_ACCESS)) {
    DeathTestAbort(String::Format(
        "Unable to duplicate the pipe handle %Iu from the parent process %u",
        write_handle_as_size_t, parent_process_id));
  }

  const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
  HANDLE dup_event_handle;

  if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
                         ::GetCurrentProcess(), &dup_event_handle,
                         0x0,
                         FALSE,
                         DUPLICATE_SAME_ACCESS)) {
    DeathTestAbort(String::Format(
        "Unable to duplicate the event handle %Iu from the parent process %u",
        event_handle_as_size_t, parent_process_id));
  }

  const int write_fd =
      ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
  if (write_fd == -1) {
    DeathTestAbort(String::Format(
        "Unable to convert pipe handle %Iu to a file descriptor",
        write_handle_as_size_t));
  }

  // Signals the parent that the write end of the pipe has been acquired
  // so the parent can release its own write end.
  ::SetEvent(dup_event_handle);

  return write_fd;
}
# endif  // GTEST_OS_WINDOWS

// Returns a newly created InternalRunDeathTestFlag object with fields
// initialized from the GTEST_FLAG(internal_run_death_test) flag if
// the flag is specified; otherwise returns NULL.
InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
  if (GTEST_FLAG(internal_run_death_test) == "") return NULL;

  // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
  // can use it here.
  int line = -1;
  int index = -1;
  ::std::vector< ::std::string> fields;
  SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields);
  int write_fd = -1;

# if GTEST_OS_WINDOWS

  unsigned int parent_process_id = 0;
  size_t write_handle_as_size_t = 0;
  size_t event_handle_as_size_t = 0;

  if (fields.size() != 6
      || !ParseNaturalNumber(fields[1], &line)
      || !ParseNaturalNumber(fields[2], &index)
      || !ParseNaturalNumber(fields[3], &parent_process_id)
      || !ParseNaturalNumber(fields[4], &write_handle_as_size_t)
      || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
    DeathTestAbort(String::Format(
        "Bad --gtest_internal_run_death_test flag: %s",
        GTEST_FLAG(internal_run_death_test).c_str()));
  }
  write_fd = GetStatusFileDescriptor(parent_process_id,
                                     write_handle_as_size_t,
                                     event_handle_as_size_t);
# else

  if (fields.size() != 4
      || !ParseNaturalNumber(fields[1], &line)
      || !ParseNaturalNumber(fields[2], &index)
      || !ParseNaturalNumber(fields[3], &write_fd)) {
    DeathTestAbort(String::Format(
        "Bad --gtest_internal_run_death_test flag: %s",
        GTEST_FLAG(internal_run_death_test).c_str()));
  }

# endif  // GTEST_OS_WINDOWS

  return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
}

}  // namespace internal

#endif  // GTEST_HAS_DEATH_TEST

}  // namespace testing
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: keith.ray@gmail.com (Keith Ray)


#include <stdlib.h>

#if GTEST_OS_WINDOWS_MOBILE
# include <windows.h>
#elif GTEST_OS_WINDOWS
# include <direct.h>
# include <io.h>
#elif GTEST_OS_SYMBIAN || GTEST_OS_NACL
// Symbian OpenC and NaCl have PATH_MAX in sys/syslimits.h
# include <sys/syslimits.h>
#else
# include <limits.h>
# include <climits>  // Some Linux distributions define PATH_MAX here.
#endif  // GTEST_OS_WINDOWS_MOBILE

#if GTEST_OS_WINDOWS
# define GTEST_PATH_MAX_ _MAX_PATH
#elif defined(PATH_MAX)
# define GTEST_PATH_MAX_ PATH_MAX
#elif defined(_XOPEN_PATH_MAX)
# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
#elif defined(_POSIX_PATH_MAX)
# define GTEST_PATH_MAX_ _POSIX_PATH_MAX
#else // conservately set to 256
#define GTEST_PATH_MAX_ 256
#endif  // GTEST_OS_WINDOWS


namespace testing {
namespace internal {

#if GTEST_OS_WINDOWS
// On Windows, '\\' is the standard path separator, but many tools and the
// Windows API also accept '/' as an alternate path separator. Unless otherwise
// noted, a file path can contain either kind of path separators, or a mixture
// of them.
const char kPathSeparator = '\\';
const char kAlternatePathSeparator = '/';
const char kPathSeparatorString[] = "\\";
const char kAlternatePathSeparatorString[] = "/";
# if GTEST_OS_WINDOWS_MOBILE
// Windows CE doesn't have a current directory. You should not use
// the current directory in tests on Windows CE, but this at least
// provides a reasonable fallback.
const char kCurrentDirectoryString[] = "\\";
// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
const DWORD kInvalidFileAttributes = 0xffffffff;
# else
const char kCurrentDirectoryString[] = ".\\";
# endif  // GTEST_OS_WINDOWS_MOBILE
#else
const char kPathSeparator = '/';
const char kPathSeparatorString[] = "/";
const char kCurrentDirectoryString[] = "./";
#endif  // GTEST_OS_WINDOWS

// Returns whether the given character is a valid path separator.
static bool IsPathSeparator(char c) {
#if GTEST_HAS_ALT_PATH_SEP_
  return (c == kPathSeparator) || (c == kAlternatePathSeparator);
#else
  return c == kPathSeparator;
#endif
}

// Returns the current working directory, or "" if unsuccessful.
FilePath FilePath::GetCurrentDir() {
#if GTEST_OS_WINDOWS_MOBILE
  // Windows CE doesn't have a current directory, so we just return
  // something reasonable.
  return FilePath(kCurrentDirectoryString);
#elif GTEST_OS_WINDOWS
  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
  return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
#else
  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
  return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
#endif  // GTEST_OS_WINDOWS_MOBILE
}

// Returns a copy of the FilePath with the case-insensitive extension removed.
// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
// FilePath("dir/file"). If a case-insensitive extension is not
// found, returns a copy of the original FilePath.
FilePath FilePath::RemoveExtension(const char* extension) const {
  String dot_extension(String::Format(".%s", extension));
  if (pathname_.EndsWithCaseInsensitive(dot_extension.c_str())) {
    return FilePath(String(pathname_.c_str(), pathname_.length() - 4));
  }
  return *this;
}

// Returns a pointer to the last occurence of a valid path separator in
// the FilePath. On Windows, for example, both '/' and '\' are valid path
// separators. Returns NULL if no path separator was found.
const char* FilePath::FindLastPathSeparator() const {
  const char* const last_sep = strrchr(c_str(), kPathSeparator);
#if GTEST_HAS_ALT_PATH_SEP_
  const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
  // Comparing two pointers of which only one is NULL is undefined.
  if (last_alt_sep != NULL &&
      (last_sep == NULL || last_alt_sep > last_sep)) {
    return last_alt_sep;
  }
#endif
  return last_sep;
}

// Returns a copy of the FilePath with the directory part removed.
// Example: FilePath("path/to/file").RemoveDirectoryName() returns
// FilePath("file"). If there is no directory part ("just_a_file"), it returns
// the FilePath unmodified. If there is no file part ("just_a_dir/") it
// returns an empty FilePath ("").
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveDirectoryName() const {
  const char* const last_sep = FindLastPathSeparator();
  return last_sep ? FilePath(String(last_sep + 1)) : *this;
}

// RemoveFileName returns the directory path with the filename removed.
// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveFileName() const {
  const char* const last_sep = FindLastPathSeparator();
  String dir;
  if (last_sep) {
    dir = String(c_str(), last_sep + 1 - c_str());
  } else {
    dir = kCurrentDirectoryString;
  }
  return FilePath(dir);
}

// Helper functions for naming files in a directory for xml output.

// Given directory = "dir", base_name = "test", number = 0,
// extension = "xml", returns "dir/test.xml". If number is greater
// than zero (e.g., 12), returns "dir/test_12.xml".
// On Windows platform, uses \ as the separator rather than /.
FilePath FilePath::MakeFileName(const FilePath& directory,
                                const FilePath& base_name,
                                int number,
                                const char* extension) {
  String file;
  if (number == 0) {
    file = String::Format("%s.%s", base_name.c_str(), extension);
  } else {
    file = String::Format("%s_%d.%s", base_name.c_str(), number, extension);
  }
  return ConcatPaths(directory, FilePath(file));
}

// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
// On Windows, uses \ as the separator rather than /.
FilePath FilePath::ConcatPaths(const FilePath& directory,
                               const FilePath& relative_path) {
  if (directory.IsEmpty())
    return relative_path;
  const FilePath dir(directory.RemoveTrailingPathSeparator());
  return FilePath(String::Format("%s%c%s", dir.c_str(), kPathSeparator,
                                 relative_path.c_str()));
}

// Returns true if pathname describes something findable in the file-system,
// either a file, directory, or whatever.
bool FilePath::FileOrDirectoryExists() const {
#if GTEST_OS_WINDOWS_MOBILE
  LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
  const DWORD attributes = GetFileAttributes(unicode);
  delete [] unicode;
  return attributes != kInvalidFileAttributes;
#else
  posix::StatStruct file_stat;
  return posix::Stat(pathname_.c_str(), &file_stat) == 0;
#endif  // GTEST_OS_WINDOWS_MOBILE
}

// Returns true if pathname describes a directory in the file-system
// that exists.
bool FilePath::DirectoryExists() const {
  bool result = false;
#if GTEST_OS_WINDOWS
  // Don't strip off trailing separator if path is a root directory on
  // Windows (like "C:\\").
  const FilePath& path(IsRootDirectory() ? *this :
                                           RemoveTrailingPathSeparator());
#else
  const FilePath& path(*this);
#endif

#if GTEST_OS_WINDOWS_MOBILE
  LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
  const DWORD attributes = GetFileAttributes(unicode);
  delete [] unicode;
  if ((attributes != kInvalidFileAttributes) &&
      (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
    result = true;
  }
#else
  posix::StatStruct file_stat;
  result = posix::Stat(path.c_str(), &file_stat) == 0 &&
      posix::IsDir(file_stat);
#endif  // GTEST_OS_WINDOWS_MOBILE

  return result;
}

// Returns true if pathname describes a root directory. (Windows has one
// root directory per disk drive.)
bool FilePath::IsRootDirectory() const {
#if GTEST_OS_WINDOWS
  // TODO(wan@google.com): on Windows a network share like
  // \\server\share can be a root directory, although it cannot be the
  // current directory.  Handle this properly.
  return pathname_.length() == 3 && IsAbsolutePath();
#else
  return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
#endif
}

// Returns true if pathname describes an absolute path.
bool FilePath::IsAbsolutePath() const {
  const char* const name = pathname_.c_str();
#if GTEST_OS_WINDOWS
  return pathname_.length() >= 3 &&
     ((name[0] >= 'a' && name[0] <= 'z') ||
      (name[0] >= 'A' && name[0] <= 'Z')) &&
     name[1] == ':' &&
     IsPathSeparator(name[2]);
#else
  return IsPathSeparator(name[0]);
#endif
}

// Returns a pathname for a file that does not currently exist. The pathname
// will be directory/base_name.extension or
// directory/base_name_<number>.extension if directory/base_name.extension
// already exists. The number will be incremented until a pathname is found
// that does not already exist.
// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
// There could be a race condition if two or more processes are calling this
// function at the same time -- they could both pick the same filename.
FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
                                          const FilePath& base_name,
                                          const char* extension) {
  FilePath full_pathname;
  int number = 0;
  do {
    full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
  } while (full_pathname.FileOrDirectoryExists());
  return full_pathname;
}

// Returns true if FilePath ends with a path separator, which indicates that
// it is intended to represent a directory. Returns false otherwise.
// This does NOT check that a directory (or file) actually exists.
bool FilePath::IsDirectory() const {
  return !pathname_.empty() &&
         IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
}

// Create directories so that path exists. Returns true if successful or if
// the directories already exist; returns false if unable to create directories
// for any reason.
bool FilePath::CreateDirectoriesRecursively() const {
  if (!this->IsDirectory()) {
    return false;
  }

  if (pathname_.length() == 0 || this->DirectoryExists()) {
    return true;
  }

  const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
  return parent.CreateDirectoriesRecursively() && this->CreateFolder();
}

// Create the directory so that path exists. Returns true if successful or
// if the directory already exists; returns false if unable to create the
// directory for any reason, including if the parent directory does not
// exist. Not named "CreateDirectory" because that's a macro on Windows.
bool FilePath::CreateFolder() const {
#if GTEST_OS_WINDOWS_MOBILE
  FilePath removed_sep(this->RemoveTrailingPathSeparator());
  LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
  int result = CreateDirectory(unicode, NULL) ? 0 : -1;
  delete [] unicode;
#elif GTEST_OS_WINDOWS
  int result = _mkdir(pathname_.c_str());
#else
  int result = mkdir(pathname_.c_str(), 0777);
#endif  // GTEST_OS_WINDOWS_MOBILE

  if (result == -1) {
    return this->DirectoryExists();  // An error is OK if the directory exists.
  }
  return true;  // No error.
}

// If input name has a trailing separator character, remove it and return the
// name, otherwise return the name string unmodified.
// On Windows platform, uses \ as the separator, other platforms use /.
FilePath FilePath::RemoveTrailingPathSeparator() const {
  return IsDirectory()
      ? FilePath(String(pathname_.c_str(), pathname_.length() - 1))
      : *this;
}

// Removes any redundant separators that might be in the pathname.
// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
// redundancies that might be in a pathname involving "." or "..".
// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
void FilePath::Normalize() {
  if (pathname_.c_str() == NULL) {
    pathname_ = "";
    return;
  }
  const char* src = pathname_.c_str();
  char* const dest = new char[pathname_.length() + 1];
  char* dest_ptr = dest;
  memset(dest_ptr, 0, pathname_.length() + 1);

  while (*src != '\0') {
    *dest_ptr = *src;
    if (!IsPathSeparator(*src)) {
      src++;
    } else {
#if GTEST_HAS_ALT_PATH_SEP_
      if (*dest_ptr == kAlternatePathSeparator) {
        *dest_ptr = kPathSeparator;
      }
#endif
      while (IsPathSeparator(*src))
        src++;
    }
    dest_ptr++;
  }
  *dest_ptr = '\0';
  pathname_ = dest;
  delete[] dest;
}

}  // namespace internal
}  // namespace testing
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)


#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#if GTEST_OS_WINDOWS_MOBILE
# include <windows.h>  // For TerminateProcess()
#elif GTEST_OS_WINDOWS
# include <io.h>
# include <sys/stat.h>
#else
# include <unistd.h>
#endif  // GTEST_OS_WINDOWS_MOBILE

#if GTEST_OS_MAC
# include <mach/mach_init.h>
# include <mach/task.h>
# include <mach/vm_map.h>
#endif  // GTEST_OS_MAC


// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#undef GTEST_IMPLEMENTATION_

namespace testing {
namespace internal {

#if defined(_MSC_VER) || defined(__BORLANDC__)
// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
const int kStdOutFileno = 1;
const int kStdErrFileno = 2;
#else
const int kStdOutFileno = STDOUT_FILENO;
const int kStdErrFileno = STDERR_FILENO;
#endif  // _MSC_VER

#if GTEST_OS_MAC

// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
size_t GetThreadCount() {
  const task_t task = mach_task_self();
  mach_msg_type_number_t thread_count;
  thread_act_array_t thread_list;
  const kern_return_t status = task_threads(task, &thread_list, &thread_count);
  if (status == KERN_SUCCESS) {
    // task_threads allocates resources in thread_list and we need to free them
    // to avoid leaks.
    vm_deallocate(task,
                  reinterpret_cast<vm_address_t>(thread_list),
                  sizeof(thread_t) * thread_count);
    return static_cast<size_t>(thread_count);
  } else {
    return 0;
  }
}

#else

size_t GetThreadCount() {
  // There's no portable way to detect the number of threads, so we just
  // return 0 to indicate that we cannot detect it.
  return 0;
}

#endif  // GTEST_OS_MAC

#if GTEST_USES_POSIX_RE

// Implements RE.  Currently only needed for death tests.

RE::~RE() {
  if (is_valid_) {
    // regfree'ing an invalid regex might crash because the content
    // of the regex is undefined. Since the regex's are essentially
    // the same, one cannot be valid (or invalid) without the other
    // being so too.
    regfree(&partial_regex_);
    regfree(&full_regex_);
  }
  free(const_cast<char*>(pattern_));
}

// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
  if (!re.is_valid_) return false;

  regmatch_t match;
  return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
}

// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
  if (!re.is_valid_) return false;

  regmatch_t match;
  return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
}

// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
  pattern_ = posix::StrDup(regex);

  // Reserves enough bytes to hold the regular expression used for a
  // full match.
  const size_t full_regex_len = strlen(regex) + 10;
  char* const full_pattern = new char[full_regex_len];

  snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
  is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
  // We want to call regcomp(&partial_regex_, ...) even if the
  // previous expression returns false.  Otherwise partial_regex_ may
  // not be properly initialized can may cause trouble when it's
  // freed.
  //
  // Some implementation of POSIX regex (e.g. on at least some
  // versions of Cygwin) doesn't accept the empty string as a valid
  // regex.  We change it to an equivalent form "()" to be safe.
  if (is_valid_) {
    const char* const partial_regex = (*regex == '\0') ? "()" : regex;
    is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
  }
  EXPECT_TRUE(is_valid_)
      << "Regular expression \"" << regex
      << "\" is not a valid POSIX Extended regular expression.";

  delete[] full_pattern;
}

#elif GTEST_USES_SIMPLE_RE

// Returns true iff ch appears anywhere in str (excluding the
// terminating '\0' character).
bool IsInSet(char ch, const char* str) {
  return ch != '\0' && strchr(str, ch) != NULL;
}

// Returns true iff ch belongs to the given classification.  Unlike
// similar functions in <ctype.h>, these aren't affected by the
// current locale.
bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
bool IsAsciiPunct(char ch) {
  return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
}
bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
bool IsAsciiWordChar(char ch) {
  return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
      ('0' <= ch && ch <= '9') || ch == '_';
}

// Returns true iff "\\c" is a supported escape sequence.
bool IsValidEscape(char c) {
  return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
}

// Returns true iff the given atom (specified by escaped and pattern)
// matches ch.  The result is undefined if the atom is invalid.
bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
  if (escaped) {  // "\\p" where p is pattern_char.
    switch (pattern_char) {
      case 'd': return IsAsciiDigit(ch);
      case 'D': return !IsAsciiDigit(ch);
      case 'f': return ch == '\f';
      case 'n': return ch == '\n';
      case 'r': return ch == '\r';
      case 's': return IsAsciiWhiteSpace(ch);
      case 'S': return !IsAsciiWhiteSpace(ch);
      case 't': return ch == '\t';
      case 'v': return ch == '\v';
      case 'w': return IsAsciiWordChar(ch);
      case 'W': return !IsAsciiWordChar(ch);
    }
    return IsAsciiPunct(pattern_char) && pattern_char == ch;
  }

  return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
}

// Helper function used by ValidateRegex() to format error messages.
String FormatRegexSyntaxError(const char* regex, int index) {
  return (Message() << "Syntax error at index " << index
          << " in simple regular expression \"" << regex << "\": ").GetString();
}

// Generates non-fatal failures and returns false if regex is invalid;
// otherwise returns true.
bool ValidateRegex(const char* regex) {
  if (regex == NULL) {
    // TODO(wan@google.com): fix the source file location in the
    // assertion failures to match where the regex is used in user
    // code.
    ADD_FAILURE() << "NULL is not a valid simple regular expression.";
    return false;
  }

  bool is_valid = true;

  // True iff ?, *, or + can follow the previous atom.
  bool prev_repeatable = false;
  for (int i = 0; regex[i]; i++) {
    if (regex[i] == '\\') {  // An escape sequence
      i++;
      if (regex[i] == '\0') {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
                      << "'\\' cannot appear at the end.";
        return false;
      }

      if (!IsValidEscape(regex[i])) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
                      << "invalid escape sequence \"\\" << regex[i] << "\".";
        is_valid = false;
      }
      prev_repeatable = true;
    } else {  // Not an escape sequence.
      const char ch = regex[i];

      if (ch == '^' && i > 0) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'^' can only appear at the beginning.";
        is_valid = false;
      } else if (ch == '$' && regex[i + 1] != '\0') {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'$' can only appear at the end.";
        is_valid = false;
      } else if (IsInSet(ch, "()[]{}|")) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'" << ch << "' is unsupported.";
        is_valid = false;
      } else if (IsRepeat(ch) && !prev_repeatable) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'" << ch << "' can only follow a repeatable token.";
        is_valid = false;
      }

      prev_repeatable = !IsInSet(ch, "^$?*+");
    }
  }

  return is_valid;
}

// Matches a repeated regex atom followed by a valid simple regular
// expression.  The regex atom is defined as c if escaped is false,
// or \c otherwise.  repeat is the repetition meta character (?, *,
// or +).  The behavior is undefined if str contains too many
// characters to be indexable by size_t, in which case the test will
// probably time out anyway.  We are fine with this limitation as
// std::string has it too.
bool MatchRepetitionAndRegexAtHead(
    bool escaped, char c, char repeat, const char* regex,
    const char* str) {
  const size_t min_count = (repeat == '+') ? 1 : 0;
  const size_t max_count = (repeat == '?') ? 1 :
      static_cast<size_t>(-1) - 1;
  // We cannot call numeric_limits::max() as it conflicts with the
  // max() macro on Windows.

  for (size_t i = 0; i <= max_count; ++i) {
    // We know that the atom matches each of the first i characters in str.
    if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
      // We have enough matches at the head, and the tail matches too.
      // Since we only care about *whether* the pattern matches str
      // (as opposed to *how* it matches), there is no need to find a
      // greedy match.
      return true;
    }
    if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
      return false;
  }
  return false;
}

// Returns true iff regex matches a prefix of str.  regex must be a
// valid simple regular expression and not start with "^", or the
// result is undefined.
bool MatchRegexAtHead(const char* regex, const char* str) {
  if (*regex == '\0')  // An empty regex matches a prefix of anything.
    return true;

  // "$" only matches the end of a string.  Note that regex being
  // valid guarantees that there's nothing after "$" in it.
  if (*regex == '$')
    return *str == '\0';

  // Is the first thing in regex an escape sequence?
  const bool escaped = *regex == '\\';
  if (escaped)
    ++regex;
  if (IsRepeat(regex[1])) {
    // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
    // here's an indirect recursion.  It terminates as the regex gets
    // shorter in each recursion.
    return MatchRepetitionAndRegexAtHead(
        escaped, regex[0], regex[1], regex + 2, str);
  } else {
    // regex isn't empty, isn't "$", and doesn't start with a
    // repetition.  We match the first atom of regex with the first
    // character of str and recurse.
    return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
        MatchRegexAtHead(regex + 1, str + 1);
  }
}

// Returns true iff regex matches any substring of str.  regex must be
// a valid simple regular expression, or the result is undefined.
//
// The algorithm is recursive, but the recursion depth doesn't exceed
// the regex length, so we won't need to worry about running out of
// stack space normally.  In rare cases the time complexity can be
// exponential with respect to the regex length + the string length,
// but usually it's must faster (often close to linear).
bool MatchRegexAnywhere(const char* regex, const char* str) {
  if (regex == NULL || str == NULL)
    return false;

  if (*regex == '^')
    return MatchRegexAtHead(regex + 1, str);

  // A successful match can be anywhere in str.
  do {
    if (MatchRegexAtHead(regex, str))
      return true;
  } while (*str++ != '\0');
  return false;
}

// Implements the RE class.

RE::~RE() {
  free(const_cast<char*>(pattern_));
  free(const_cast<char*>(full_pattern_));
}

// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
  return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
}

// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
  return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
}

// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
  pattern_ = full_pattern_ = NULL;
  if (regex != NULL) {
    pattern_ = posix::StrDup(regex);
  }

  is_valid_ = ValidateRegex(regex);
  if (!is_valid_) {
    // No need to calculate the full pattern when the regex is invalid.
    return;
  }

  const size_t len = strlen(regex);
  // Reserves enough bytes to hold the regular expression used for a
  // full match: we need space to prepend a '^', append a '$', and
  // terminate the string with '\0'.
  char* buffer = static_cast<char*>(malloc(len + 3));
  full_pattern_ = buffer;

  if (*regex != '^')
    *buffer++ = '^';  // Makes sure full_pattern_ starts with '^'.

  // We don't use snprintf or strncpy, as they trigger a warning when
  // compiled with VC++ 8.0.
  memcpy(buffer, regex, len);
  buffer += len;

  if (len == 0 || regex[len - 1] != '$')
    *buffer++ = '$';  // Makes sure full_pattern_ ends with '$'.

  *buffer = '\0';
}

#endif  // GTEST_USES_POSIX_RE

const char kUnknownFile[] = "unknown file";

// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
  const char* const file_name = file == NULL ? kUnknownFile : file;

  if (line < 0) {
    return String::Format("%s:", file_name).c_str();
  }
#ifdef _MSC_VER
  return String::Format("%s(%d):", file_name, line).c_str();
#else
  return String::Format("%s:%d:", file_name, line).c_str();
#endif  // _MSC_VER
}

// Formats a file location for compiler-independent XML output.
// Although this function is not platform dependent, we put it next to
// FormatFileLocation in order to contrast the two functions.
// Note that FormatCompilerIndependentFileLocation() does NOT append colon
// to the file location it produces, unlike FormatFileLocation().
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
    const char* file, int line) {
  const char* const file_name = file == NULL ? kUnknownFile : file;

  if (line < 0)
    return file_name;
  else
    return String::Format("%s:%d", file_name, line).c_str();
}


GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
    : severity_(severity) {
  const char* const marker =
      severity == GTEST_INFO ?    "[  INFO ]" :
      severity == GTEST_WARNING ? "[WARNING]" :
      severity == GTEST_ERROR ?   "[ ERROR ]" : "[ FATAL ]";
  GetStream() << ::std::endl << marker << " "
              << FormatFileLocation(file, line).c_str() << ": ";
}

// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
GTestLog::~GTestLog() {
  GetStream() << ::std::endl;
  if (severity_ == GTEST_FATAL) {
    fflush(stderr);
    posix::Abort();
  }
}
// Disable Microsoft deprecation warnings for POSIX functions called from
// this class (creat, dup, dup2, and close)
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4996)
#endif  // _MSC_VER

#if GTEST_HAS_STREAM_REDIRECTION

// Object that captures an output stream (stdout/stderr).
class CapturedStream {
 public:
  // The ctor redirects the stream to a temporary file.
  CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {

# if GTEST_OS_WINDOWS
    char temp_dir_path[MAX_PATH + 1] = { '\0' };  // NOLINT
    char temp_file_path[MAX_PATH + 1] = { '\0' };  // NOLINT

    ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
    const UINT success = ::GetTempFileNameA(temp_dir_path,
                                            "gtest_redir",
                                            0,  // Generate unique file name.
                                            temp_file_path);
    GTEST_CHECK_(success != 0)
        << "Unable to create a temporary file in " << temp_dir_path;
    const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
    GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
                                    << temp_file_path;
    filename_ = temp_file_path;
# else
    // There's no guarantee that a test has write access to the
    // current directory, so we create the temporary file in the /tmp
    // directory instead.
    char name_template[] = "/tmp/captured_stream.XXXXXX";
    const int captured_fd = mkstemp(name_template);
    filename_ = name_template;
# endif  // GTEST_OS_WINDOWS
    fflush(NULL);
    dup2(captured_fd, fd_);
    close(captured_fd);
  }

  ~CapturedStream() {
    remove(filename_.c_str());
  }

  String GetCapturedString() {
    if (uncaptured_fd_ != -1) {
      // Restores the original stream.
      fflush(NULL);
      dup2(uncaptured_fd_, fd_);
      close(uncaptured_fd_);
      uncaptured_fd_ = -1;
    }

    FILE* const file = posix::FOpen(filename_.c_str(), "r");
    const String content = ReadEntireFile(file);
    posix::FClose(file);
    return content;
  }

 private:
  // Reads the entire content of a file as a String.
  static String ReadEntireFile(FILE* file);

  // Returns the size (in bytes) of a file.
  static size_t GetFileSize(FILE* file);

  const int fd_;  // A stream to capture.
  int uncaptured_fd_;
  // Name of the temporary file holding the stderr output.
  ::std::string filename_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
};

// Returns the size (in bytes) of a file.
size_t CapturedStream::GetFileSize(FILE* file) {
  fseek(file, 0, SEEK_END);
  return static_cast<size_t>(ftell(file));
}

// Reads the entire content of a file as a string.
String CapturedStream::ReadEntireFile(FILE* file) {
  const size_t file_size = GetFileSize(file);
  char* const buffer = new char[file_size];

  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
  size_t bytes_read = 0;       // # of bytes read so far

  fseek(file, 0, SEEK_SET);

  // Keeps reading the file until we cannot read further or the
  // pre-determined file size is reached.
  do {
    bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
    bytes_read += bytes_last_read;
  } while (bytes_last_read > 0 && bytes_read < file_size);

  const String content(buffer, bytes_read);
  delete[] buffer;

  return content;
}

# ifdef _MSC_VER
#  pragma warning(pop)
# endif  // _MSC_VER

static CapturedStream* g_captured_stderr = NULL;
static CapturedStream* g_captured_stdout = NULL;

// Starts capturing an output stream (stdout/stderr).
void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
  if (*stream != NULL) {
    GTEST_LOG_(FATAL) << "Only one " << stream_name
                      << " capturer can exist at a time.";
  }
  *stream = new CapturedStream(fd);
}

// Stops capturing the output stream and returns the captured string.
String GetCapturedStream(CapturedStream** captured_stream) {
  const String content = (*captured_stream)->GetCapturedString();

  delete *captured_stream;
  *captured_stream = NULL;

  return content;
}

// Starts capturing stdout.
void CaptureStdout() {
  CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
}

// Starts capturing stderr.
void CaptureStderr() {
  CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
}

// Stops capturing stdout and returns the captured string.
String GetCapturedStdout() { return GetCapturedStream(&g_captured_stdout); }

// Stops capturing stderr and returns the captured string.
String GetCapturedStderr() { return GetCapturedStream(&g_captured_stderr); }

#endif  // GTEST_HAS_STREAM_REDIRECTION

#if GTEST_HAS_DEATH_TEST

// A copy of all command line arguments.  Set by InitGoogleTest().
::std::vector<String> g_argvs;

// Returns the command line as a vector of strings.
const ::std::vector<String>& GetArgvs() { return g_argvs; }

#endif  // GTEST_HAS_DEATH_TEST

#if GTEST_OS_WINDOWS_MOBILE
namespace posix {
void Abort() {
  DebugBreak();
  TerminateProcess(GetCurrentProcess(), 1);
}
}  // namespace posix
#endif  // GTEST_OS_WINDOWS_MOBILE

// Returns the name of the environment variable corresponding to the
// given flag.  For example, FlagToEnvVar("foo") will return
// "GTEST_FOO" in the open-source version.
static String FlagToEnvVar(const char* flag) {
  const String full_flag =
      (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();

  Message env_var;
  for (size_t i = 0; i != full_flag.length(); i++) {
    env_var << ToUpper(full_flag.c_str()[i]);
  }

  return env_var.GetString();
}

// Parses 'str' for a 32-bit signed integer.  If successful, writes
// the result to *value and returns true; otherwise leaves *value
// unchanged and returns false.
bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
  // Parses the environment variable as a decimal integer.
  char* end = NULL;
  const long long_value = strtol(str, &end, 10);  // NOLINT

  // Has strtol() consumed all characters in the string?
  if (*end != '\0') {
    // No - an invalid character was encountered.
    Message msg;
    msg << "WARNING: " << src_text
        << " is expected to be a 32-bit integer, but actually"
        << " has value \"" << str << "\".\n";
    printf("%s", msg.GetString().c_str());
    fflush(stdout);
    return false;
  }

  // Is the parsed value in the range of an Int32?
  const Int32 result = static_cast<Int32>(long_value);
  if (long_value == LONG_MAX || long_value == LONG_MIN ||
      // The parsed value overflows as a long.  (strtol() returns
      // LONG_MAX or LONG_MIN when the input overflows.)
      result != long_value
      // The parsed value overflows as an Int32.
      ) {
    Message msg;
    msg << "WARNING: " << src_text
        << " is expected to be a 32-bit integer, but actually"
        << " has value " << str << ", which overflows.\n";
    printf("%s", msg.GetString().c_str());
    fflush(stdout);
    return false;
  }

  *value = result;
  return true;
}

// Reads and returns the Boolean environment variable corresponding to
// the given flag; if it's not set, returns default_value.
//
// The value is considered true iff it's not "0".
bool BoolFromGTestEnv(const char* flag, bool default_value) {
  const String env_var = FlagToEnvVar(flag);
  const char* const string_value = posix::GetEnv(env_var.c_str());
  return string_value == NULL ?
      default_value : strcmp(string_value, "0") != 0;
}

// Reads and returns a 32-bit integer stored in the environment
// variable corresponding to the given flag; if it isn't set or
// doesn't represent a valid 32-bit integer, returns default_value.
Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
  const String env_var = FlagToEnvVar(flag);
  const char* const string_value = posix::GetEnv(env_var.c_str());
  if (string_value == NULL) {
    // The environment variable is not set.
    return default_value;
  }

  Int32 result = default_value;
  if (!ParseInt32(Message() << "Environment variable " << env_var,
                  string_value, &result)) {
    printf("The default value %s is used.\n",
           (Message() << default_value).GetString().c_str());
    fflush(stdout);
    return default_value;
  }

  return result;
}

// Reads and returns the string environment variable corresponding to
// the given flag; if it's not set, returns default_value.
const char* StringFromGTestEnv(const char* flag, const char* default_value) {
  const String env_var = FlagToEnvVar(flag);
  const char* const value = posix::GetEnv(env_var.c_str());
  return value == NULL ? default_value : value;
}

}  // namespace internal
}  // namespace testing
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// It uses the << operator when possible, and prints the bytes in the
// object otherwise.  A user can override its behavior for a class
// type Foo by defining either operator<<(::std::ostream&, const Foo&)
// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
// defines Foo.

#include <ctype.h>
#include <stdio.h>
#include <ostream>  // NOLINT
#include <string>

namespace testing {

namespace {

using ::std::ostream;

#if GTEST_OS_WINDOWS_MOBILE  // Windows CE does not define _snprintf_s.
# define snprintf _snprintf
#elif _MSC_VER >= 1400  // VC 8.0 and later deprecate snprintf and _snprintf.
# define snprintf _snprintf_s
#elif _MSC_VER
# define snprintf _snprintf
#endif  // GTEST_OS_WINDOWS_MOBILE

// Prints a segment of bytes in the given object.
void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
                                size_t count, ostream* os) {
  char text[5] = "";
  for (size_t i = 0; i != count; i++) {
    const size_t j = start + i;
    if (i != 0) {
      // Organizes the bytes into groups of 2 for easy parsing by
      // human.
      if ((j % 2) == 0)
        *os << ' ';
      else
        *os << '-';
    }
    snprintf(text, sizeof(text), "%02X", obj_bytes[j]);
    *os << text;
  }
}

// Prints the bytes in the given value to the given ostream.
void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
                              ostream* os) {
  // Tells the user how big the object is.
  *os << count << "-byte object <";

  const size_t kThreshold = 132;
  const size_t kChunkSize = 64;
  // If the object size is bigger than kThreshold, we'll have to omit
  // some details by printing only the first and the last kChunkSize
  // bytes.
  // TODO(wan): let the user control the threshold using a flag.
  if (count < kThreshold) {
    PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
  } else {
    PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
    *os << " ... ";
    // Rounds up to 2-byte boundary.
    const size_t resume_pos = (count - kChunkSize + 1)/2*2;
    PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
  }
  *os << ">";
}

}  // namespace

namespace internal2 {

// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
// given object.  The delegation simplifies the implementation, which
// uses the << operator and thus is easier done outside of the
// ::testing::internal namespace, which contains a << operator that
// sometimes conflicts with the one in STL.
void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
                          ostream* os) {
  PrintBytesInObjectToImpl(obj_bytes, count, os);
}

}  // namespace internal2

namespace internal {

// Depending on the value of a char (or wchar_t), we print it in one
// of three formats:
//   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
//   - as a hexidecimal escape sequence (e.g. '\x7F'), or
//   - as a special escape sequence (e.g. '\r', '\n').
enum CharFormat {
  kAsIs,
  kHexEscape,
  kSpecialEscape
};

// Returns true if c is a printable ASCII character.  We test the
// value of c directly instead of calling isprint(), which is buggy on
// Windows Mobile.
inline bool IsPrintableAscii(wchar_t c) {
  return 0x20 <= c && c <= 0x7E;
}

// Prints a wide or narrow char c as a character literal without the
// quotes, escaping it when necessary; returns how c was formatted.
// The template argument UnsignedChar is the unsigned version of Char,
// which is the type of c.
template <typename UnsignedChar, typename Char>
static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
  switch (static_cast<wchar_t>(c)) {
    case L'\0':
      *os << "\\0";
      break;
    case L'\'':
      *os << "\\'";
      break;
    case L'\\':
      *os << "\\\\";
      break;
    case L'\a':
      *os << "\\a";
      break;
    case L'\b':
      *os << "\\b";
      break;
    case L'\f':
      *os << "\\f";
      break;
    case L'\n':
      *os << "\\n";
      break;
    case L'\r':
      *os << "\\r";
      break;
    case L'\t':
      *os << "\\t";
      break;
    case L'\v':
      *os << "\\v";
      break;
    default:
      if (IsPrintableAscii(c)) {
        *os << static_cast<char>(c);
        return kAsIs;
      } else {
        *os << String::Format("\\x%X", static_cast<UnsignedChar>(c));
        return kHexEscape;
      }
  }
  return kSpecialEscape;
}

// Prints a char c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsWideStringLiteralTo(wchar_t c, ostream* os) {
  switch (c) {
    case L'\'':
      *os << "'";
      return kAsIs;
    case L'"':
      *os << "\\\"";
      return kSpecialEscape;
    default:
      return PrintAsCharLiteralTo<wchar_t>(c, os);
  }
}

// Prints a char c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsNarrowStringLiteralTo(char c, ostream* os) {
  return PrintAsWideStringLiteralTo(static_cast<unsigned char>(c), os);
}

// Prints a wide or narrow character c and its code.  '\0' is printed
// as "'\\0'", other unprintable characters are also properly escaped
// using the standard C++ escape sequence.  The template argument
// UnsignedChar is the unsigned version of Char, which is the type of c.
template <typename UnsignedChar, typename Char>
void PrintCharAndCodeTo(Char c, ostream* os) {
  // First, print c as a literal in the most readable form we can find.
  *os << ((sizeof(c) > 1) ? "L'" : "'");
  const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
  *os << "'";

  // To aid user debugging, we also print c's code in decimal, unless
  // it's 0 (in which case c was printed as '\\0', making the code
  // obvious).
  if (c == 0)
    return;
  *os << " (" << String::Format("%d", c).c_str();

  // For more convenience, we print c's code again in hexidecimal,
  // unless c was already printed in the form '\x##' or the code is in
  // [1, 9].
  if (format == kHexEscape || (1 <= c && c <= 9)) {
    // Do nothing.
  } else {
    *os << String::Format(", 0x%X",
                          static_cast<UnsignedChar>(c)).c_str();
  }
  *os << ")";
}

void PrintTo(unsigned char c, ::std::ostream* os) {
  PrintCharAndCodeTo<unsigned char>(c, os);
}
void PrintTo(signed char c, ::std::ostream* os) {
  PrintCharAndCodeTo<unsigned char>(c, os);
}

// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its code.  L'\0' is printed as "L'\\0'".
void PrintTo(wchar_t wc, ostream* os) {
  PrintCharAndCodeTo<wchar_t>(wc, os);
}

// Prints the given array of characters to the ostream.
// The array starts at *begin, the length is len, it may include '\0' characters
// and may not be null-terminated.
static void PrintCharsAsStringTo(const char* begin, size_t len, ostream* os) {
  *os << "\"";
  bool is_previous_hex = false;
  for (size_t index = 0; index < len; ++index) {
    const char cur = begin[index];
    if (is_previous_hex && IsXDigit(cur)) {
      // Previous character is of '\x..' form and this character can be
      // interpreted as another hexadecimal digit in its number. Break string to
      // disambiguate.
      *os << "\" \"";
    }
    is_previous_hex = PrintAsNarrowStringLiteralTo(cur, os) == kHexEscape;
  }
  *os << "\"";
}

// Prints a (const) char array of 'len' elements, starting at address 'begin'.
void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
  PrintCharsAsStringTo(begin, len, os);
}

// Prints the given array of wide characters to the ostream.
// The array starts at *begin, the length is len, it may include L'\0'
// characters and may not be null-terminated.
static void PrintWideCharsAsStringTo(const wchar_t* begin, size_t len,
                                     ostream* os) {
  *os << "L\"";
  bool is_previous_hex = false;
  for (size_t index = 0; index < len; ++index) {
    const wchar_t cur = begin[index];
    if (is_previous_hex && isascii(cur) && IsXDigit(static_cast<char>(cur))) {
      // Previous character is of '\x..' form and this character can be
      // interpreted as another hexadecimal digit in its number. Break string to
      // disambiguate.
      *os << "\" L\"";
    }
    is_previous_hex = PrintAsWideStringLiteralTo(cur, os) == kHexEscape;
  }
  *os << "\"";
}

// Prints the given C string to the ostream.
void PrintTo(const char* s, ostream* os) {
  if (s == NULL) {
    *os << "NULL";
  } else {
    *os << ImplicitCast_<const void*>(s) << " pointing to ";
    PrintCharsAsStringTo(s, strlen(s), os);
  }
}

// MSVC compiler can be configured to define whar_t as a typedef
// of unsigned short. Defining an overload for const wchar_t* in that case
// would cause pointers to unsigned shorts be printed as wide strings,
// possibly accessing more memory than intended and causing invalid
// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
// wchar_t is implemented as a native type.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Prints the given wide C string to the ostream.
void PrintTo(const wchar_t* s, ostream* os) {
  if (s == NULL) {
    *os << "NULL";
  } else {
    *os << ImplicitCast_<const void*>(s) << " pointing to ";
    PrintWideCharsAsStringTo(s, wcslen(s), os);
  }
}
#endif  // wchar_t is native

// Prints a ::string object.
#if GTEST_HAS_GLOBAL_STRING
void PrintStringTo(const ::string& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_GLOBAL_STRING

void PrintStringTo(const ::std::string& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}

// Prints a ::wstring object.
#if GTEST_HAS_GLOBAL_WSTRING
void PrintWideStringTo(const ::wstring& s, ostream* os) {
  PrintWideCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

#if GTEST_HAS_STD_WSTRING
void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
  PrintWideCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_STD_WSTRING

}  // namespace internal

}  // namespace testing
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// The Google C++ Testing Framework (Google Test)


// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#undef GTEST_IMPLEMENTATION_

namespace testing {

using internal::GetUnitTestImpl;

// Gets the summary of the failure message by omitting the stack trace
// in it.
internal::String TestPartResult::ExtractSummary(const char* message) {
  const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
  return stack_trace == NULL ? internal::String(message) :
      internal::String(message, stack_trace - message);
}

// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
  return os
      << result.file_name() << ":" << result.line_number() << ": "
      << (result.type() == TestPartResult::kSuccess ? "Success" :
          result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
          "Non-fatal failure") << ":\n"
      << result.message() << std::endl;
}

// Appends a TestPartResult to the array.
void TestPartResultArray::Append(const TestPartResult& result) {
  array_.push_back(result);
}

// Returns the TestPartResult at the given index (0-based).
const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
  if (index < 0 || index >= size()) {
    printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
    internal::posix::Abort();
  }

  return array_[index];
}

// Returns the number of TestPartResult objects in the array.
int TestPartResultArray::size() const {
  return static_cast<int>(array_.size());
}

namespace internal {

HasNewFatalFailureHelper::HasNewFatalFailureHelper()
    : has_new_fatal_failure_(false),
      original_reporter_(GetUnitTestImpl()->
                         GetTestPartResultReporterForCurrentThread()) {
  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
}

HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
      original_reporter_);
}

void HasNewFatalFailureHelper::ReportTestPartResult(
    const TestPartResult& result) {
  if (result.fatally_failed())
    has_new_fatal_failure_ = true;
  original_reporter_->ReportTestPartResult(result);
}

}  // namespace internal

}  // namespace testing
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)


namespace testing {
namespace internal {

#if GTEST_HAS_TYPED_TEST_P

// Skips to the first non-space char in str. Returns an empty string if str
// contains only whitespace characters.
static const char* SkipSpaces(const char* str) {
  while (IsSpace(*str))
    str++;
  return str;
}

// Verifies that registered_tests match the test names in
// defined_test_names_; returns registered_tests if successful, or
// aborts the program otherwise.
const char* TypedTestCasePState::VerifyRegisteredTestNames(
    const char* file, int line, const char* registered_tests) {
  typedef ::std::set<const char*>::const_iterator DefinedTestIter;
  registered_ = true;

  // Skip initial whitespace in registered_tests since some
  // preprocessors prefix stringizied literals with whitespace.
  registered_tests = SkipSpaces(registered_tests);

  Message errors;
  ::std::set<String> tests;
  for (const char* names = registered_tests; names != NULL;
       names = SkipComma(names)) {
    const String name = GetPrefixUntilComma(names);
    if (tests.count(name) != 0) {
      errors << "Test " << name << " is listed more than once.\n";
      continue;
    }

    bool found = false;
    for (DefinedTestIter it = defined_test_names_.begin();
         it != defined_test_names_.end();
         ++it) {
      if (name == *it) {
        found = true;
        break;
      }
    }

    if (found) {
      tests.insert(name);
    } else {
      errors << "No test named " << name
             << " can be found in this test case.\n";
    }
  }

  for (DefinedTestIter it = defined_test_names_.begin();
       it != defined_test_names_.end();
       ++it) {
    if (tests.count(*it) == 0) {
      errors << "You forgot to list test " << *it << ".\n";
    }
  }

  const String& errors_str = errors.GetString();
  if (errors_str != "") {
    fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
            errors_str.c_str());
    fflush(stderr);
    posix::Abort();
  }

  return registered_tests;
}

#endif  // GTEST_HAS_TYPED_TEST_P

}  // namespace internal
}  // namespace testing


================================================
FILE: libhsakmt/tests/kfdtest/include/amdp2ptest.h
================================================
/*
 * Copyright 2015-2024 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
#ifndef AMDP2PTEST_H_
#define AMDP2PTEST_H_

#include <linux/ioctl.h>

#define AMDP2PTEST_IOCTL_MAGIC 'A'


#define AMDP2PTEST_DEVICE_NAME "amdp2ptest"
#define AMDP2PTEST_DEVICE_PATH "/dev/amdp2ptest"

struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM {
	/* Input parameters */
	uint64_t addr;
	uint64_t length;

	/* Output parameters */
	uint64_t page_size;
};

struct AMDRDMA_IOCTL_GET_PAGES_PARAM {
	/* Input parameters */
	uint64_t addr;
	uint64_t length;
	uint64_t is_local;	/* 1 if this is the pointer to local
				   allocation */

	/* Output parameters */
	uint64_t cpu_ptr;
};


struct AMDRDMA_IOCTL_PUT_PAGES_PARAM {
	/* Input parameters */
	uint64_t addr;
	uint64_t length;
};


#define AMD2P2PTEST_IOCTL_GET_PAGE_SIZE	\
_IOWR(AMDP2PTEST_IOCTL_MAGIC, 1, struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM *)

#define AMD2P2PTEST_IOCTL_GET_PAGES \
_IOWR(AMDP2PTEST_IOCTL_MAGIC, 2, struct AMDRDMA_IOCTL_GET_PAGES_PARAM *)

#define AMD2P2PTEST_IOCTL_PUT_PAGES	\
_IOW(AMDP2PTEST_IOCTL_MAGIC, 3, struct AMDRDMA_IOCTL_PUT_PAGES_PARAM *)


#endif  /* AMDP2PTEST_H */


================================================
FILE: libhsakmt/tests/kfdtest/include/asic_reg/gfx_7_2_d.h
================================================
/*
 * Copyright (C) 2014  Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef GFX_7_2_D_H
#define GFX_7_2_D_H

#define mmCB_BLEND_RED                                                          0xa105
#define mmCB_BLEND_GREEN                                                        0xa106
#define mmCB_BLEND_BLUE                                                         0xa107
#define mmCB_BLEND_ALPHA                                                        0xa108
#define mmCB_COLOR_CONTROL                                                      0xa202
#define mmCB_BLEND0_CONTROL                                                     0xa1e0
#define mmCB_BLEND1_CONTROL                                                     0xa1e1
#define mmCB_BLEND2_CONTROL                                                     0xa1e2
#define mmCB_BLEND3_CONTROL                                                     0xa1e3
#define mmCB_BLEND4_CONTROL                                                     0xa1e4
#define mmCB_BLEND5_CONTROL                                                     0xa1e5
#define mmCB_BLEND6_CONTROL                                                     0xa1e6
#define mmCB_BLEND7_CONTROL                                                     0xa1e7
#define mmCB_COLOR0_BASE                                                        0xa318
#define mmCB_COLOR1_BASE                                                        0xa327
#define mmCB_COLOR2_BASE                                                        0xa336
#define mmCB_COLOR3_BASE                                                        0xa345
#define mmCB_COLOR4_BASE                                                        0xa354
#define mmCB_COLOR5_BASE                                                        0xa363
#define mmCB_COLOR6_BASE                                                        0xa372
#define mmCB_COLOR7_BASE                                                        0xa381
#define mmCB_COLOR0_PITCH                                                       0xa319
#define mmCB_COLOR1_PITCH                                                       0xa328
#define mmCB_COLOR2_PITCH                                                       0xa337
#define mmCB_COLOR3_PITCH                                                       0xa346
#define mmCB_COLOR4_PITCH                                                       0xa355
#define mmCB_COLOR5_PITCH                                                       0xa364
#define mmCB_COLOR6_PITCH                                                       0xa373
#define mmCB_COLOR7_PITCH                                                       0xa382
#define mmCB_COLOR0_SLICE                                                       0xa31a
#define mmCB_COLOR1_SLICE                                                       0xa329
#define mmCB_COLOR2_SLICE                                                       0xa338
#define mmCB_COLOR3_SLICE                                                       0xa347
#define mmCB_COLOR4_SLICE                                                       0xa356
#define mmCB_COLOR5_SLICE                                                       0xa365
#define mmCB_COLOR6_SLICE                                                       0xa374
#define mmCB_COLOR7_SLICE                                                       0xa383
#define mmCB_COLOR0_VIEW                                                        0xa31b
#define mmCB_COLOR1_VIEW                                                        0xa32a
#define mmCB_COLOR2_VIEW                                                        0xa339
#define mmCB_COLOR3_VIEW                                                        0xa348
#define mmCB_COLOR4_VIEW                                                        0xa357
#define mmCB_COLOR5_VIEW                                                        0xa366
#define mmCB_COLOR6_VIEW                                                        0xa375
#define mmCB_COLOR7_VIEW                                                        0xa384
#define mmCB_COLOR0_INFO                                                        0xa31c
#define mmCB_COLOR1_INFO                                                        0xa32b
#define mmCB_COLOR2_INFO                                                        0xa33a
#define mmCB_COLOR3_INFO                                                        0xa349
#define mmCB_COLOR4_INFO                                                        0xa358
#define mmCB_COLOR5_INFO                                                        0xa367
#define mmCB_COLOR6_INFO                                                        0xa376
#define mmCB_COLOR7_INFO                                                        0xa385
#define mmCB_COLOR0_ATTRIB                                                      0xa31d
#define mmCB_COLOR1_ATTRIB                                                      0xa32c
#define mmCB_COLOR2_ATTRIB                                                      0xa33b
#define mmCB_COLOR3_ATTRIB                                                      0xa34a
#define mmCB_COLOR4_ATTRIB                                                      0xa359
#define mmCB_COLOR5_ATTRIB                                                      0xa368
#define mmCB_COLOR6_ATTRIB                                                      0xa377
#define mmCB_COLOR7_ATTRIB                                                      0xa386
#define mmCB_COLOR0_CMASK                                                       0xa31f
#define mmCB_COLOR1_CMASK                                                       0xa32e
#define mmCB_COLOR2_CMASK                                                       0xa33d
#define mmCB_COLOR3_CMASK                                                       0xa34c
#define mmCB_COLOR4_CMASK                                                       0xa35b
#define mmCB_COLOR5_CMASK                                                       0xa36a
#define mmCB_COLOR6_CMASK                                                       0xa379
#define mmCB_COLOR7_CMASK                                                       0xa388
#define mmCB_COLOR0_CMASK_SLICE                                                 0xa320
#define mmCB_COLOR1_CMASK_SLICE                                                 0xa32f
#define mmCB_COLOR2_CMASK_SLICE                                                 0xa33e
#define mmCB_COLOR3_CMASK_SLICE                                                 0xa34d
#define mmCB_COLOR4_CMASK_SLICE                                                 0xa35c
#define mmCB_COLOR5_CMASK_SLICE                                                 0xa36b
#define mmCB_COLOR6_CMASK_SLICE                                                 0xa37a
#define mmCB_COLOR7_CMASK_SLICE                                                 0xa389
#define mmCB_COLOR0_FMASK                                                       0xa321
#define mmCB_COLOR1_FMASK                                                       0xa330
#define mmCB_COLOR2_FMASK                                                       0xa33f
#define mmCB_COLOR3_FMASK                                                       0xa34e
#define mmCB_COLOR4_FMASK                                                       0xa35d
#define mmCB_COLOR5_FMASK                                                       0xa36c
#define mmCB_COLOR6_FMASK                                                       0xa37b
#define mmCB_COLOR7_FMASK                                                       0xa38a
#define mmCB_COLOR0_FMASK_SLICE                                                 0xa322
#define mmCB_COLOR1_FMASK_SLICE                                                 0xa331
#define mmCB_COLOR2_FMASK_SLICE                                                 0xa340
#define mmCB_COLOR3_FMASK_SLICE                                                 0xa34f
#define mmCB_COLOR4_FMASK_SLICE                                                 0xa35e
#define mmCB_COLOR5_FMASK_SLICE                                                 0xa36d
#define mmCB_COLOR6_FMASK_SLICE                                                 0xa37c
#define mmCB_COLOR7_FMASK_SLICE                                                 0xa38b
#define mmCB_COLOR0_CLEAR_WORD0                                                 0xa323
#define mmCB_COLOR1_CLEAR_WORD0                                                 0xa332
#define mmCB_COLOR2_CLEAR_WORD0                                                 0xa341
#define mmCB_COLOR3_CLEAR_WORD0                                                 0xa350
#define mmCB_COLOR4_CLEAR_WORD0                                                 0xa35f
#define mmCB_COLOR5_CLEAR_WORD0                                                 0xa36e
#define mmCB_COLOR6_CLEAR_WORD0                                                 0xa37d
#define mmCB_COLOR7_CLEAR_WORD0                                                 0xa38c
#define mmCB_COLOR0_CLEAR_WORD1                                                 0xa324
#define mmCB_COLOR1_CLEAR_WORD1                                                 0xa333
#define mmCB_COLOR2_CLEAR_WORD1                                                 0xa342
#define mmCB_COLOR3_CLEAR_WORD1                                                 0xa351
#define mmCB_COLOR4_CLEAR_WORD1                                                 0xa360
#define mmCB_COLOR5_CLEAR_WORD1                                                 0xa36f
#define mmCB_COLOR6_CLEAR_WORD1                                                 0xa37e
#define mmCB_COLOR7_CLEAR_WORD1                                                 0xa38d
#define mmCB_TARGET_MASK                                                        0xa08e
#define mmCB_SHADER_MASK                                                        0xa08f
#define mmCB_HW_CONTROL                                                         0x2684
#define mmCB_HW_CONTROL_1                                                       0x2685
#define mmCB_HW_CONTROL_2                                                       0x2686
#define mmCB_HW_CONTROL_3                                                       0x2683
#define mmCB_PERFCOUNTER_FILTER                                                 0xdc00
#define mmCB_PERFCOUNTER0_SELECT                                                0xdc01
#define mmCB_PERFCOUNTER0_SELECT1                                               0xdc02
#define mmCB_PERFCOUNTER1_SELECT                                                0xdc03
#define mmCB_PERFCOUNTER2_SELECT                                                0xdc04
#define mmCB_PERFCOUNTER3_SELECT                                                0xdc05
#define mmCB_PERFCOUNTER0_LO                                                    0xd406
#define mmCB_PERFCOUNTER1_LO                                                    0xd408
#define mmCB_PERFCOUNTER2_LO                                                    0xd40a
#define mmCB_PERFCOUNTER3_LO                                                    0xd40c
#define mmCB_PERFCOUNTER0_HI                                                    0xd407
#define mmCB_PERFCOUNTER1_HI                                                    0xd409
#define mmCB_PERFCOUNTER2_HI                                                    0xd40b
#define mmCB_PERFCOUNTER3_HI                                                    0xd40d
#define mmCB_CGTT_SCLK_CTRL                                                     0xf0a8
#define mmCB_DEBUG_BUS_1                                                        0x2699
#define mmCB_DEBUG_BUS_2                                                        0x269a
#define mmCB_DEBUG_BUS_3                                                        0x269b
#define mmCB_DEBUG_BUS_4                                                        0x269c
#define mmCB_DEBUG_BUS_5                                                        0x269d
#define mmCB_DEBUG_BUS_6                                                        0x269e
#define mmCB_DEBUG_BUS_7                                                        0x269f
#define mmCB_DEBUG_BUS_8                                                        0x26a0
#define mmCB_DEBUG_BUS_9                                                        0x26a1
#define mmCB_DEBUG_BUS_10                                                       0x26a2
#define mmCB_DEBUG_BUS_11                                                       0x26a3
#define mmCB_DEBUG_BUS_12                                                       0x26a4
#define mmCB_DEBUG_BUS_13                                                       0x26a5
#define mmCB_DEBUG_BUS_14                                                       0x26a6
#define mmCB_DEBUG_BUS_15                                                       0x26a7
#define mmCB_DEBUG_BUS_16                                                       0x26a8
#define mmCB_DEBUG_BUS_17                                                       0x26a9
#define mmCB_DEBUG_BUS_18                                                       0x26aa
#define mmCP_DFY_CNTL                                                           0x3020
#define mmCP_DFY_STAT                                                           0x3021
#define mmCP_DFY_ADDR_HI                                                        0x3022
#define mmCP_DFY_ADDR_LO                                                        0x3023
#define mmCP_DFY_DATA_0                                                         0x3024
#define mmCP_DFY_DATA_1                                                         0x3025
#define mmCP_DFY_DATA_2                                                         0x3026
#define mmCP_DFY_DATA_3                                                         0x3027
#define mmCP_DFY_DATA_4                                                         0x3028
#define mmCP_DFY_DATA_5                                                         0x3029
#define mmCP_DFY_DATA_6                                                         0x302a
#define mmCP_DFY_DATA_7                                                         0x302b
#define mmCP_DFY_DATA_8                                                         0x302c
#define mmCP_DFY_DATA_9                                                         0x302d
#define mmCP_DFY_DATA_10                                                        0x302e
#define mmCP_DFY_DATA_11                                                        0x302f
#define mmCP_DFY_DATA_12                                                        0x3030
#define mmCP_DFY_DATA_13                                                        0x3031
#define mmCP_DFY_DATA_14                                                        0x3032
#define mmCP_DFY_DATA_15                                                        0x3033
#define mmCP_RB0_BASE                                                           0x3040
#define mmCP_RB0_BASE_HI                                                        0x30b1
#define mmCP_RB_BASE                                                            0x3040
#define mmCP_RB1_BASE                                                           0x3060
#define mmCP_RB1_BASE_HI                                                        0x30b2
#define mmCP_RB2_BASE                                                           0x3065
#define mmCP_RB0_CNTL                                                           0x3041
#define mmCP_RB_CNTL                                                            0x3041
#define mmCP_RB1_CNTL                                                           0x3061
#define mmCP_RB2_CNTL                                                           0x3066
#define mmCP_RB_RPTR_WR                                                         0x3042
#define mmCP_RB0_RPTR_ADDR                                                      0x3043
#define mmCP_RB_RPTR_ADDR                                                       0x3043
#define mmCP_RB1_RPTR_ADDR                                                      0x3062
#define mmCP_RB2_RPTR_ADDR                                                      0x3067
#define mmCP_RB0_RPTR_ADDR_HI                                                   0x3044
#define mmCP_RB_RPTR_ADDR_HI                                                    0x3044
#define mmCP_RB1_RPTR_ADDR_HI                                                   0x3063
#define mmCP_RB2_RPTR_ADDR_HI                                                   0x3068
#define mmCP_RB0_WPTR                                                           0x3045
#define mmCP_RB_WPTR                                                            0x3045
#define mmCP_RB1_WPTR                                                           0x3064
#define mmCP_RB2_WPTR                                                           0x3069
#define mmCP_RB_WPTR_POLL_ADDR_LO                                               0x3046
#define mmCP_RB_WPTR_POLL_ADDR_HI                                               0x3047
#define mmGC_PRIV_MODE                                                          0x3048
#define mmCP_INT_CNTL                                                           0x3049
#define mmCP_INT_CNTL_RING0                                                     0x306a
#define mmCP_INT_CNTL_RING1                                                     0x306b
#define mmCP_INT_CNTL_RING2                                                     0x306c
#define mmCP_INT_STATUS                                                         0x304a
#define mmCP_INT_STATUS_RING0                                                   0x306d
#define mmCP_INT_STATUS_RING1                                                   0x306e
#define mmCP_INT_STATUS_RING2                                                   0x306f
#define mmCP_DEVICE_ID                                                          0x304b
#define mmCP_RING_PRIORITY_CNTS                                                 0x304c
#define mmCP_ME0_PIPE_PRIORITY_CNTS                                             0x304c
#define mmCP_RING0_PRIORITY                                                     0x304d
#define mmCP_ME0_PIPE0_PRIORITY                                                 0x304d
#define mmCP_RING1_PRIORITY                                                     0x304e
#define mmCP_ME0_PIPE1_PRIORITY                                                 0x304e
#define mmCP_RING2_PRIORITY                                                     0x304f
#define mmCP_ME0_PIPE2_PRIORITY                                                 0x304f
#define mmCP_ENDIAN_SWAP                                                        0x3050
#define mmCP_RB_VMID                                                            0x3051
#define mmCP_ME0_PIPE0_VMID                                                     0x3052
#define mmCP_ME0_PIPE1_VMID                                                     0x3053
#define mmCP_PFP_UCODE_ADDR                                                     0x3054
#define mmCP_PFP_UCODE_DATA                                                     0x3055
#define mmCP_ME_RAM_RADDR                                                       0x3056
#define mmCP_ME_RAM_WADDR                                                       0x3057
#define mmCP_ME_RAM_DATA                                                        0x3058
#define mmCGTT_CPC_CLK_CTRL                                                     0xf0b2
#define mmCGTT_CPF_CLK_CTRL                                                     0xf0b1
#define mmCGTT_CP_CLK_CTRL                                                      0xf0b0
#define mmCP_CE_UCODE_ADDR                                                      0x305a
#define mmCP_CE_UCODE_DATA                                                      0x305b
#define mmCP_MEC_ME1_UCODE_ADDR                                                 0x305c
#define mmCP_MEC_ME1_UCODE_DATA                                                 0x305d
#define mmCP_MEC_ME2_UCODE_ADDR                                                 0x305e
#define mmCP_MEC_ME2_UCODE_DATA                                                 0x305f
#define mmCP_PWR_CNTL                                                           0x3078
#define mmCP_MEM_SLP_CNTL                                                       0x3079
#define mmCP_ECC_FIRSTOCCURRENCE                                                0x307a
#define mmCP_ECC_FIRSTOCCURRENCE_RING0                                          0x307b
#define mmCP_ECC_FIRSTOCCURRENCE_RING1                                          0x307c
#define mmCP_ECC_FIRSTOCCURRENCE_RING2                                          0x307d
#define mmCP_CPF_DEBUG                                                          0x3080
#define mmCP_FETCHER_SOURCE                                                     0x3082
#define mmCP_PQ_WPTR_POLL_CNTL                                                  0x3083
#define mmCP_PQ_WPTR_POLL_CNTL1                                                 0x3084
#define mmCPC_INT_CNTL                                                          0x30b4
#define mmCP_ME1_PIPE0_INT_CNTL                                                 0x3085
#define mmCP_ME1_PIPE1_INT_CNTL                                                 0x3086
#define mmCP_ME1_PIPE2_INT_CNTL                                                 0x3087
#define mmCP_ME1_PIPE3_INT_CNTL                                                 0x3088
#define mmCP_ME2_PIPE0_INT_CNTL                                                 0x3089
#define mmCP_ME2_PIPE1_INT_CNTL                                                 0x308a
#define mmCP_ME2_PIPE2_INT_CNTL                                                 0x308b
#define mmCP_ME2_PIPE3_INT_CNTL                                                 0x308c
#define mmCPC_INT_STATUS                                                        0x30b5
#define mmCP_ME1_PIPE0_INT_STATUS                                               0x308d
#define mmCP_ME1_PIPE1_INT_STATUS                                               0x308e
#define mmCP_ME1_PIPE2_INT_STATUS                                               0x308f
#define mmCP_ME1_PIPE3_INT_STATUS                                               0x3090
#define mmCP_ME2_PIPE0_INT_STATUS                                               0x3091
#define mmCP_ME2_PIPE1_INT_STATUS                                               0x3092
#define mmCP_ME2_PIPE2_INT_STATUS                                               0x3093
#define mmCP_ME2_PIPE3_INT_STATUS                                               0x3094
#define mmCP_ME1_INT_STAT_DEBUG                                                 0x3095
#define mmCP_ME2_INT_STAT_DEBUG                                                 0x3096
#define mmCP_ME1_PIPE_PRIORITY_CNTS                                             0x3099
#define mmCP_ME1_PIPE0_PRIORITY                                                 0x309a
#define mmCP_ME1_PIPE1_PRIORITY                                                 0x309b
#define mmCP_ME1_PIPE2_PRIORITY                                                 0x309c
#define mmCP_ME1_PIPE3_PRIORITY                                                 0x309d
#define mmCP_ME2_PIPE_PRIORITY_CNTS                                             0x309e
#define mmCP_ME2_PIPE0_PRIORITY                                                 0x309f
#define mmCP_ME2_PIPE1_PRIORITY                                                 0x30a0
#define mmCP_ME2_PIPE2_PRIORITY                                                 0x30a1
#define mmCP_ME2_PIPE3_PRIORITY                                                 0x30a2
#define mmCP_CE_PRGRM_CNTR_START                                                0x30a3
#define mmCP_PFP_PRGRM_CNTR_START                                               0x30a4
#define mmCP_ME_PRGRM_CNTR_START                                                0x30a5
#define mmCP_MEC1_PRGRM_CNTR_START                                              0x30a6
#define mmCP_MEC2_PRGRM_CNTR_START                                              0x30a7
#define mmCP_CE_INTR_ROUTINE_START                                              0x30a8
#define mmCP_PFP_INTR_ROUTINE_START                                             0x30a9
#define mmCP_ME_INTR_ROUTINE_START                                              0x30aa
#define mmCP_MEC1_INTR_ROUTINE_START                                            0x30ab
#define mmCP_MEC2_INTR_ROUTINE_START                                            0x30ac
#define mmCP_CONTEXT_CNTL                                                       0x30ad
#define mmCP_MAX_CONTEXT                                                        0x30ae
#define mmCP_IQ_WAIT_TIME1                                                      0x30af
#define mmCP_IQ_WAIT_TIME2                                                      0x30b0
#define mmCP_VMID_RESET                                                         0x30b3
#define mmCP_VMID_PREEMPT                                                       0x30b6
#define mmCPC_INT_CNTX_ID                                                       0x30b7
#define mmCP_PQ_STATUS                                                          0x30b8
#define mmCP_CPC_STATUS                                                         0x2084
#define mmCP_CPC_BUSY_STAT                                                      0x2085
#define mmCP_CPC_STALLED_STAT1                                                  0x2086
#define mmCP_CPF_STATUS                                                         0x2087
#define mmCP_CPF_BUSY_STAT                                                      0x2088
#define mmCP_CPF_STALLED_STAT1                                                  0x2089
#define mmCP_CPC_MC_CNTL                                                        0x208a
#define mmCP_CPC_GRBM_FREE_COUNT                                                0x208b
#define mmCP_MEC_CNTL                                                           0x208d
#define mmCP_MEC_ME1_HEADER_DUMP                                                0x208e
#define mmCP_MEC_ME2_HEADER_DUMP                                                0x208f
#define mmCP_CPC_SCRATCH_INDEX                                                  0x2090
#define mmCP_CPC_SCRATCH_DATA                                                   0x2091
#define mmCPG_PERFCOUNTER1_SELECT                                               0xd800
#define mmCPG_PERFCOUNTER1_LO                                                   0xd000
#define mmCPG_PERFCOUNTER1_HI                                                   0xd001
#define mmCPG_PERFCOUNTER0_SELECT1                                              0xd801
#define mmCPG_PERFCOUNTER0_SELECT                                               0xd802
#define mmCPG_PERFCOUNTER0_LO                                                   0xd002
#define mmCPG_PERFCOUNTER0_HI                                                   0xd003
#define mmCPC_PERFCOUNTER1_SELECT                                               0xd803
#define mmCPC_PERFCOUNTER1_LO                                                   0xd004
#define mmCPC_PERFCOUNTER1_HI                                                   0xd005
#define mmCPC_PERFCOUNTER0_SELECT1                                              0xd804
#define mmCPC_PERFCOUNTER0_SELECT                                               0xd809
#define mmCPC_PERFCOUNTER0_LO                                                   0xd006
#define mmCPC_PERFCOUNTER0_HI                                                   0xd007
#define mmCPF_PERFCOUNTER1_SELECT                                               0xd805
#define mmCPF_PERFCOUNTER1_LO                                                   0xd008
#define mmCPF_PERFCOUNTER1_HI                                                   0xd009
#define mmCPF_PERFCOUNTER0_SELECT1                                              0xd806
#define mmCPF_PERFCOUNTER0_SELECT                                               0xd807
#define mmCPF_PERFCOUNTER0_LO                                                   0xd00a
#define mmCPF_PERFCOUNTER0_HI                                                   0xd00b
#define mmCP_CPC_HALT_HYST_COUNT                                                0x20a7
#define mmCP_DRAW_OBJECT                                                        0xd810
#define mmCP_DRAW_OBJECT_COUNTER                                                0xd811
#define mmCP_DRAW_WINDOW_MASK_HI                                                0xd812
#define mmCP_DRAW_WINDOW_HI                                                     0xd813
#define mmCP_DRAW_WINDOW_LO                                                     0xd814
#define mmCP_DRAW_WINDOW_CNTL                                                   0xd815
#define mmCP_PRT_LOD_STATS_CNTL0                                                0x20ad
#define mmCP_PRT_LOD_STATS_CNTL1                                                0x20ae
#define mmCP_PRT_LOD_STATS_CNTL2                                                0x20af
#define mmCP_CE_COMPARE_COUNT                                                   0x20c0
#define mmCP_CE_DE_COUNT                                                        0x20c1
#define mmCP_DE_CE_COUNT                                                        0x20c2
#define mmCP_DE_LAST_INVAL_COUNT                                                0x20c3
#define mmCP_DE_DE_COUNT                                                        0x20c4
#define mmCP_EOP_DONE_EVENT_CNTL                                                0xc0d5
#define mmCP_EOP_DONE_DATA_CNTL                                                 0xc0d6
#define mmCP_EOP_DONE_ADDR_LO                                                   0xc000
#define mmCP_EOP_DONE_ADDR_HI                                                   0xc001
#define mmCP_EOP_DONE_DATA_LO                                                   0xc002
#define mmCP_EOP_DONE_DATA_HI                                                   0xc003
#define mmCP_EOP_LAST_FENCE_LO                                                  0xc004
#define mmCP_EOP_LAST_FENCE_HI                                                  0xc005
#define mmCP_STREAM_OUT_ADDR_LO                                                 0xc006
#define mmCP_STREAM_OUT_ADDR_HI                                                 0xc007
#define mmCP_NUM_PRIM_WRITTEN_COUNT0_LO                                         0xc008
#define mmCP_NUM_PRIM_WRITTEN_COUNT0_HI                                         0xc009
#define mmCP_NUM_PRIM_NEEDED_COUNT0_LO                                          0xc00a
#define mmCP_NUM_PRIM_NEEDED_COUNT0_HI                                          0xc00b
#define mmCP_NUM_PRIM_WRITTEN_COUNT1_LO                                         0xc00c
#define mmCP_NUM_PRIM_WRITTEN_COUNT1_HI                                         0xc00d
#define mmCP_NUM_PRIM_NEEDED_COUNT1_LO                                          0xc00e
#define mmCP_NUM_PRIM_NEEDED_COUNT1_HI                                          0xc00f
#define mmCP_NUM_PRIM_WRITTEN_COUNT2_LO                                         0xc010
#define mmCP_NUM_PRIM_WRITTEN_COUNT2_HI                                         0xc011
#define mmCP_NUM_PRIM_NEEDED_COUNT2_LO                                          0xc012
#define mmCP_NUM_PRIM_NEEDED_COUNT2_HI                                          0xc013
#define mmCP_NUM_PRIM_WRITTEN_COUNT3_LO                                         0xc014
#define mmCP_NUM_PRIM_WRITTEN_COUNT3_HI                                         0xc015
#define mmCP_NUM_PRIM_NEEDED_COUNT3_LO                                          0xc016
#define mmCP_NUM_PRIM_NEEDED_COUNT3_HI                                          0xc017
#define mmCP_PIPE_STATS_ADDR_LO                                                 0xc018
#define mmCP_PIPE_STATS_ADDR_HI                                                 0xc019
#define mmCP_VGT_IAVERT_COUNT_LO                                                0xc01a
#define mmCP_VGT_IAVERT_COUNT_HI                                                0xc01b
#define mmCP_VGT_IAPRIM_COUNT_LO                                                0xc01c
#define mmCP_VGT_IAPRIM_COUNT_HI                                                0xc01d
#define mmCP_VGT_GSPRIM_COUNT_LO                                                0xc01e
#define mmCP_VGT_GSPRIM_COUNT_HI                                                0xc01f
#define mmCP_VGT_VSINVOC_COUNT_LO                                               0xc020
#define mmCP_VGT_VSINVOC_COUNT_HI                                               0xc021
#define mmCP_VGT_GSINVOC_COUNT_LO                                               0xc022
#define mmCP_VGT_GSINVOC_COUNT_HI                                               0xc023
#define mmCP_VGT_HSINVOC_COUNT_LO                                               0xc024
#define mmCP_VGT_HSINVOC_COUNT_HI                                               0xc025
#define mmCP_VGT_DSINVOC_COUNT_LO                                               0xc026
#define mmCP_VGT_DSINVOC_COUNT_HI                                               0xc027
#define mmCP_PA_CINVOC_COUNT_LO                                                 0xc028
#define mmCP_PA_CINVOC_COUNT_HI                                                 0xc029
#define mmCP_PA_CPRIM_COUNT_LO                                                  0xc02a
#define mmCP_PA_CPRIM_COUNT_HI                                                  0xc02b
#define mmCP_SC_PSINVOC_COUNT0_LO                                               0xc02c
#define mmCP_SC_PSINVOC_COUNT0_HI                                               0xc02d
#define mmCP_SC_PSINVOC_COUNT1_LO                                               0xc02e
#define mmCP_SC_PSINVOC_COUNT1_HI                                               0xc02f
#define mmCP_VGT_CSINVOC_COUNT_LO                                               0xc030
#define mmCP_VGT_CSINVOC_COUNT_HI                                               0xc031
#define mmCP_STRMOUT_CNTL                                                       0xc03f
#define mmSCRATCH_REG0                                                          0xc040
#define mmSCRATCH_REG1                                                          0xc041
#define mmSCRATCH_REG2                                                          0xc042
#define mmSCRATCH_REG3                                                          0xc043
#define mmSCRATCH_REG4                                                          0xc044
#define mmSCRATCH_REG5                                                          0xc045
#define mmSCRATCH_REG6                                                          0xc046
#define mmSCRATCH_REG7                                                          0xc047
#define mmSCRATCH_UMSK                                                          0xc050
#define mmSCRATCH_ADDR                                                          0xc051
#define mmCP_PFP_ATOMIC_PREOP_LO                                                0xc052
#define mmCP_PFP_ATOMIC_PREOP_HI                                                0xc053
#define mmCP_PFP_GDS_ATOMIC0_PREOP_LO                                           0xc054
#define mmCP_PFP_GDS_ATOMIC0_PREOP_HI                                           0xc055
#define mmCP_PFP_GDS_ATOMIC1_PREOP_LO                                           0xc056
#define mmCP_PFP_GDS_ATOMIC1_PREOP_HI                                           0xc057
#define mmCP_APPEND_ADDR_LO                                                     0xc058
#define mmCP_APPEND_ADDR_HI                                                     0xc059
#define mmCP_APPEND_DATA                                                        0xc05a
#define mmCP_APPEND_LAST_CS_FENCE                                               0xc05b
#define mmCP_APPEND_LAST_PS_FENCE                                               0xc05c
#define mmCP_ATOMIC_PREOP_LO                                                    0xc05d
#define mmCP_ME_ATOMIC_PREOP_LO                                                 0xc05d
#define mmCP_ATOMIC_PREOP_HI                                                    0xc05e
#define mmCP_ME_ATOMIC_PREOP_HI                                                 0xc05e
#define mmCP_GDS_ATOMIC0_PREOP_LO                                               0xc05f
#define mmCP_ME_GDS_ATOMIC0_PREOP_LO                                            0xc05f
#define mmCP_GDS_ATOMIC0_PREOP_HI                                               0xc060
#define mmCP_ME_GDS_ATOMIC0_PREOP_HI                                            0xc060
#define mmCP_GDS_ATOMIC1_PREOP_LO                                               0xc061
#define mmCP_ME_GDS_ATOMIC1_PREOP_LO                                            0xc061
#define mmCP_GDS_ATOMIC1_PREOP_HI                                               0xc062
#define mmCP_ME_GDS_ATOMIC1_PREOP_HI                                            0xc062
#define mmCP_ME_MC_WADDR_LO                                                     0xc069
#define mmCP_ME_MC_WADDR_HI                                                     0xc06a
#define mmCP_ME_MC_WDATA_LO                                                     0xc06b
#define mmCP_ME_MC_WDATA_HI                                                     0xc06c
#define mmCP_ME_MC_RADDR_LO                                                     0xc06d
#define mmCP_ME_MC_RADDR_HI                                                     0xc06e
#define mmCP_SEM_WAIT_TIMER                                                     0xc06f
#define mmCP_SIG_SEM_ADDR_LO                                                    0xc070
#define mmCP_SIG_SEM_ADDR_HI                                                    0xc071
#define mmCP_WAIT_SEM_ADDR_LO                                                   0xc075
#define mmCP_WAIT_SEM_ADDR_HI                                                   0xc076
#define mmCP_WAIT_REG_MEM_TIMEOUT                                               0xc074
#define mmCP_COHER_START_DELAY                                                  0xc07b
#define mmCP_COHER_CNTL                                                         0xc07c
#define mmCP_COHER_SIZE                                                         0xc07d
#define mmCP_COHER_SIZE_HI                                                      0xc08c
#define mmCP_COHER_BASE                                                         0xc07e
#define mmCP_COHER_BASE_HI                                                      0xc079
#define mmCP_COHER_STATUS                                                       0xc07f
#define mmCOHER_DEST_BASE_0                                                     0xa092
#define mmCOHER_DEST_BASE_1                                                     0xa093
#define mmCOHER_DEST_BASE_2                                                     0xa07e
#define mmCOHER_DEST_BASE_3                                                     0xa07f
#define mmCOHER_DEST_BASE_HI_0                                                  0xa07a
#define mmCOHER_DEST_BASE_HI_1                                                  0xa07b
#define mmCOHER_DEST_BASE_HI_2                                                  0xa07c
#define mmCOHER_DEST_BASE_HI_3                                                  0xa07d
#define mmCP_DMA_ME_SRC_ADDR                                                    0xc080
#define mmCP_DMA_ME_SRC_ADDR_HI                                                 0xc081
#define mmCP_DMA_ME_DST_ADDR                                                    0xc082
#define mmCP_DMA_ME_DST_ADDR_HI                                                 0xc083
#define mmCP_DMA_ME_CONTROL                                                     0xc078
#define mmCP_DMA_ME_COMMAND                                                     0xc084
#define mmCP_DMA_PFP_SRC_ADDR                                                   0xc085
#define mmCP_DMA_PFP_SRC_ADDR_HI                                                0xc086
#define mmCP_DMA_PFP_DST_ADDR                                                   0xc087
#define mmCP_DMA_PFP_DST_ADDR_HI                                                0xc088
#define mmCP_DMA_PFP_CONTROL                                                    0xc077
#define mmCP_DMA_PFP_COMMAND                                                    0xc089
#define mmCP_DMA_CNTL                                                           0xc08a
#define mmCP_DMA_READ_TAGS                                                      0xc08b
#define mmCP_PFP_IB_CONTROL                                                     0xc08d
#define mmCP_PFP_LOAD_CONTROL                                                   0xc08e
#define mmCP_SCRATCH_INDEX                                                      0xc08f
#define mmCP_SCRATCH_DATA                                                       0xc090
#define mmCP_RB_OFFSET                                                          0xc091
#define mmCP_IB1_OFFSET                                                         0xc092
#define mmCP_IB2_OFFSET                                                         0xc093
#define mmCP_IB1_PREAMBLE_BEGIN                                                 0xc094
#define mmCP_IB1_PREAMBLE_END                                                   0xc095
#define mmCP_IB2_PREAMBLE_BEGIN                                                 0xc096
#define mmCP_IB2_PREAMBLE_END                                                   0xc097
#define mmCP_CE_IB1_OFFSET                                                      0xc098
#define mmCP_CE_IB2_OFFSET                                                      0xc099
#define mmCP_CE_COUNTER                                                         0xc09a
#define mmCP_STALLED_STAT1                                                      0x219d
#define mmCP_STALLED_STAT2                                                      0x219e
#define mmCP_STALLED_STAT3                                                      0x219c
#define mmCP_BUSY_STAT                                                          0x219f
#define mmCP_STAT                                                               0x21a0
#define mmCP_ME_HEADER_DUMP                                                     0x21a1
#define mmCP_PFP_HEADER_DUMP                                                    0x21a2
#define mmCP_GRBM_FREE_COUNT                                                    0x21a3
#define mmCP_CE_HEADER_DUMP                                                     0x21a4
#define mmCP_MC_PACK_DELAY_CNT                                                  0x21a7
#define mmCP_MC_TAG_CNTL                                                        0x21a8
#define mmCP_MC_TAG_DATA                                                        0x21a9
#define mmCP_CSF_STAT                                                           0x21b4
#define mmCP_CSF_CNTL                                                           0x21b5
#define mmCP_ME_CNTL                                                            0x21b6
#define mmCP_CNTX_STAT                                                          0x21b8
#define mmCP_ME_PREEMPTION                                                      0x21b9
#define mmCP_RB0_RPTR                                                           0x21c0
#define mmCP_RB_RPTR                                                            0x21c0
#define mmCP_RB1_RPTR                                                           0x21bf
#define mmCP_RB2_RPTR                                                           0x21be
#define mmCP_RB_WPTR_DELAY                                                      0x21c1
#define mmCP_RB_WPTR_POLL_CNTL                                                  0x21c2
#define mmCP_CE_INIT_BASE_LO                                                    0xc0c3
#define mmCP_CE_INIT_BASE_HI                                                    0xc0c4
#define mmCP_CE_INIT_BUFSZ                                                      0xc0c5
#define mmCP_CE_IB1_BASE_LO                                                     0xc0c6
#define mmCP_CE_IB1_BASE_HI                                                     0xc0c7
#define mmCP_CE_IB1_BUFSZ                                                       0xc0c8
#define mmCP_CE_IB2_BASE_LO                                                     0xc0c9
#define mmCP_CE_IB2_BASE_HI                                                     0xc0ca
#define mmCP_CE_IB2_BUFSZ                                                       0xc0cb
#define mmCP_IB1_BASE_LO                                                        0xc0cc
#define mmCP_IB1_BASE_HI                                                        0xc0cd
#define mmCP_IB1_BUFSZ                                                          0xc0ce
#define mmCP_IB2_BASE_LO                                                        0xc0cf
#define mmCP_IB2_BASE_HI                                                        0xc0d0
#define mmCP_IB2_BUFSZ                                                          0xc0d1
#define mmCP_ST_BASE_LO                                                         0xc0d2
#define mmCP_ST_BASE_HI                                                         0xc0d3
#define mmCP_ST_BUFSZ                                                           0xc0d4
#define mmCP_ROQ_THRESHOLDS                                                     0x21bc
#define mmCP_MEQ_STQ_THRESHOLD                                                  0x21bd
#define mmCP_ROQ1_THRESHOLDS                                                    0x21d5
#define mmCP_ROQ2_THRESHOLDS                                                    0x21d6
#define mmCP_STQ_THRESHOLDS                                                     0x21d7
#define mmCP_QUEUE_THRESHOLDS                                                   0x21d8
#define mmCP_MEQ_THRESHOLDS                                                     0x21d9
#define mmCP_ROQ_AVAIL                                                          0x21da
#define mmCP_STQ_AVAIL                                                          0x21db
#define mmCP_ROQ2_AVAIL                                                         0x21dc
#define mmCP_MEQ_AVAIL                                                          0x21dd
#define mmCP_CMD_INDEX                                                          0x21de
#define mmCP_CMD_DATA                                                           0x21df
#define mmCP_ROQ_RB_STAT                                                        0x21e0
#define mmCP_ROQ_IB1_STAT                                                       0x21e1
#define mmCP_ROQ_IB2_STAT                                                       0x21e2
#define mmCP_STQ_STAT                                                           0x21e3
#define mmCP_STQ_WR_STAT                                                        0x21e4
#define mmCP_MEQ_STAT                                                           0x21e5
#define mmCP_CEQ1_AVAIL                                                         0x21e6
#define mmCP_CEQ2_AVAIL                                                         0x21e7
#define mmCP_CE_ROQ_RB_STAT                                                     0x21e8
#define mmCP_CE_ROQ_IB1_STAT                                                    0x21e9
#define mmCP_CE_ROQ_IB2_STAT                                                    0x21ea
#define mmCP_INT_STAT_DEBUG                                                     0x21f7
#define mmCP_PERFMON_CNTL                                                       0xd808
#define mmCP_PERFMON_CNTX_CNTL                                                  0xa0d8
#define mmCP_RINGID                                                             0xa0d9
#define mmCP_PIPEID                                                             0xa0d9
#define mmCP_VMID                                                               0xa0da
#define mmCP_HPD_ROQ_OFFSETS                                                    0x3240
#define mmCP_HPD_EOP_BASE_ADDR                                                  0x3241
#define mmCP_HPD_EOP_BASE_ADDR_HI                                               0x3242
#define mmCP_HPD_EOP_VMID                                                       0x3243
#define mmCP_HPD_EOP_CONTROL                                                    0x3244
#define mmCP_MQD_BASE_ADDR                                                      0x3245
#define mmCP_MQD_BASE_ADDR_HI                                                   0x3246
#define mmCP_HQD_ACTIVE                                                         0x3247
#define mmCP_HQD_VMID                                                           0x3248
#define mmCP_HQD_PERSISTENT_STATE                                               0x3249
#define mmCP_HQD_PIPE_PRIORITY                                                  0x324a
#define mmCP_HQD_QUEUE_PRIORITY                                                 0x324b
#define mmCP_HQD_QUANTUM                                                        0x324c
#define mmCP_HQD_PQ_BASE                                                        0x324d
#define mmCP_HQD_PQ_BASE_HI                                                     0x324e
#define mmCP_HQD_PQ_RPTR                                                        0x324f
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR                                            0x3250
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI                                         0x3251
#define mmCP_HQD_PQ_WPTR_POLL_ADDR                                              0x3252
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI                                           0x3253
#define mmCP_HQD_PQ_DOORBELL_CONTROL                                            0x3254
#define mmCP_HQD_PQ_WPTR                                                        0x3255
#define mmCP_HQD_PQ_CONTROL                                                     0x3256
#define mmCP_HQD_IB_BASE_ADDR                                                   0x3257
#define mmCP_HQD_IB_BASE_ADDR_HI                                                0x3258
#define mmCP_HQD_IB_RPTR                                                        0x3259
#define mmCP_HQD_IB_CONTROL                                                     0x325a
#define mmCP_HQD_IQ_TIMER                                                       0x325b
#define mmCP_HQD_IQ_RPTR                                                        0x325c
#define mmCP_HQD_DEQUEUE_REQUEST                                                0x325d
#define mmCP_HQD_DMA_OFFLOAD                                                    0x325e
#define mmCP_HQD_SEMA_CMD                                                       0x325f
#define mmCP_HQD_MSG_TYPE                                                       0x3260
#define mmCP_HQD_ATOMIC0_PREOP_LO                                               0x3261
#define mmCP_HQD_ATOMIC0_PREOP_HI                                               0x3262
#define mmCP_HQD_ATOMIC1_PREOP_LO                                               0x3263
#define mmCP_HQD_ATOMIC1_PREOP_HI                                               0x3264
#define mmCP_HQD_HQ_SCHEDULER0                                                  0x3265
#define mmCP_HQD_HQ_SCHEDULER1                                                  0x3266
#define mmCP_MQD_CONTROL                                                        0x3267
#define mmDB_Z_READ_BASE                                                        0xa012
#define mmDB_STENCIL_READ_BASE                                                  0xa013
#define mmDB_Z_WRITE_BASE                                                       0xa014
#define mmDB_STENCIL_WRITE_BASE                                                 0xa015
#define mmDB_DEPTH_INFO                                                         0xa00f
#define mmDB_Z_INFO                                                             0xa010
#define mmDB_STENCIL_INFO                                                       0xa011
#define mmDB_DEPTH_SIZE                                                         0xa016
#define mmDB_DEPTH_SLICE                                                        0xa017
#define mmDB_DEPTH_VIEW                                                         0xa002
#define mmDB_RENDER_CONTROL                                                     0xa000
#define mmDB_COUNT_CONTROL                                                      0xa001
#define mmDB_RENDER_OVERRIDE                                                    0xa003
#define mmDB_RENDER_OVERRIDE2                                                   0xa004
#define mmDB_EQAA                                                               0xa201
#define mmDB_SHADER_CONTROL                                                     0xa203
#define mmDB_DEPTH_BOUNDS_MIN                                                   0xa008
#define mmDB_DEPTH_BOUNDS_MAX                                                   0xa009
#define mmDB_STENCIL_CLEAR                                                      0xa00a
#define mmDB_DEPTH_CLEAR                                                        0xa00b
#define mmDB_HTILE_DATA_BASE                                                    0xa005
#define mmDB_HTILE_SURFACE                                                      0xa2af
#define mmDB_PRELOAD_CONTROL                                                    0xa2b2
#define mmDB_STENCILREFMASK                                                     0xa10c
#define mmDB_STENCILREFMASK_BF                                                  0xa10d
#define mmDB_SRESULTS_COMPARE_STATE0                                            0xa2b0
#define mmDB_SRESULTS_COMPARE_STATE1                                            0xa2b1
#define mmDB_DEPTH_CONTROL                                                      0xa200
#define mmDB_STENCIL_CONTROL                                                    0xa10b
#define mmDB_ALPHA_TO_MASK                                                      0xa2dc
#define mmDB_PERFCOUNTER0_SELECT                                                0xdc40
#define mmDB_PERFCOUNTER1_SELECT                                                0xdc42
#define mmDB_PERFCOUNTER2_SELECT                                                0xdc44
#define mmDB_PERFCOUNTER3_SELECT                                                0xdc46
#define mmDB_PERFCOUNTER0_SELECT1                                               0xdc41
#define mmDB_PERFCOUNTER1_SELECT1                                               0xdc43
#define mmDB_PERFCOUNTER0_LO                                                    0xd440
#define mmDB_PERFCOUNTER1_LO                                                    0xd442
#define mmDB_PERFCOUNTER2_LO                                                    0xd444
#define mmDB_PERFCOUNTER3_LO                                                    0xd446
#define mmDB_PERFCOUNTER0_HI                                                    0xd441
#define mmDB_PERFCOUNTER1_HI                                                    0xd443
#define mmDB_PERFCOUNTER2_HI                                                    0xd445
#define mmDB_PERFCOUNTER3_HI                                                    0xd447
#define mmDB_DEBUG                                                              0x260c
#define mmDB_DEBUG2                                                             0x260d
#define mmDB_DEBUG3                                                             0x260e
#define mmDB_DEBUG4                                                             0x260f
#define mmDB_CREDIT_LIMIT                                                       0x2614
#define mmDB_WATERMARKS                                                         0x2615
#define mmDB_SUBTILE_CONTROL                                                    0x2616
#define mmDB_FREE_CACHELINES                                                    0x2617
#define mmDB_FIFO_DEPTH1                                                        0x2618
#define mmDB_FIFO_DEPTH2                                                        0x2619
#define mmDB_CGTT_CLK_CTRL_0                                                    0xf0a4
#define mmDB_ZPASS_COUNT_LOW                                                    0xc3fe
#define mmDB_ZPASS_COUNT_HI                                                     0xc3ff
#define mmDB_RING_CONTROL                                                       0x261b
#define mmDB_READ_DEBUG_0                                                       0x2620
#define mmDB_READ_DEBUG_1                                                       0x2621
#define mmDB_READ_DEBUG_2                                                       0x2622
#define mmDB_READ_DEBUG_3                                                       0x2623
#define mmDB_READ_DEBUG_4                                                       0x2624
#define mmDB_READ_DEBUG_5                                                       0x2625
#define mmDB_READ_DEBUG_6                                                       0x2626
#define mmDB_READ_DEBUG_7                                                       0x2627
#define mmDB_READ_DEBUG_8                                                       0x2628
#define mmDB_READ_DEBUG_9                                                       0x2629
#define mmDB_READ_DEBUG_A                                                       0x262a
#define mmDB_READ_DEBUG_B                                                       0x262b
#define mmDB_READ_DEBUG_C                                                       0x262c
#define mmDB_READ_DEBUG_D                                                       0x262d
#define mmDB_READ_DEBUG_E                                                       0x262e
#define mmDB_READ_DEBUG_F                                                       0x262f
#define mmDB_OCCLUSION_COUNT0_LOW                                               0xc3c0
#define mmDB_OCCLUSION_COUNT0_HI                                                0xc3c1
#define mmDB_OCCLUSION_COUNT1_LOW                                               0xc3c2
#define mmDB_OCCLUSION_COUNT1_HI                                                0xc3c3
#define mmDB_OCCLUSION_COUNT2_LOW                                               0xc3c4
#define mmDB_OCCLUSION_COUNT2_HI                                                0xc3c5
#define mmDB_OCCLUSION_COUNT3_LOW                                               0xc3c6
#define mmDB_OCCLUSION_COUNT3_HI                                                0xc3c7
#define mmCC_RB_REDUNDANCY                                                      0x263c
#define mmCC_RB_BACKEND_DISABLE                                                 0x263d
#define mmGC_USER_RB_REDUNDANCY                                                 0x26de
#define mmGC_USER_RB_BACKEND_DISABLE                                            0x26df
#define mmGB_ADDR_CONFIG                                                        0x263e
#define mmGB_BACKEND_MAP                                                        0x263f
#define mmGB_GPU_ID                                                             0x2640
#define mmCC_RB_DAISY_CHAIN                                                     0x2641
#define mmGB_TILE_MODE0                                                         0x2644
#define mmGB_TILE_MODE1                                                         0x2645
#define mmGB_TILE_MODE2                                                         0x2646
#define mmGB_TILE_MODE3                                                         0x2647
#define mmGB_TILE_MODE4                                                         0x2648
#define mmGB_TILE_MODE5                                                         0x2649
#define mmGB_TILE_MODE6                                                         0x264a
#define mmGB_TILE_MODE7                                                         0x264b
#define mmGB_TILE_MODE8                                                         0x264c
#define mmGB_TILE_MODE9                                                         0x264d
#define mmGB_TILE_MODE10                                                        0x264e
#define mmGB_TILE_MODE11                                                        0x264f
#define mmGB_TILE_MODE12                                                        0x2650
#define mmGB_TILE_MODE13                                                        0x2651
#define mmGB_TILE_MODE14                                                        0x2652
#define mmGB_TILE_MODE15                                                        0x2653
#define mmGB_TILE_MODE16                                                        0x2654
#define mmGB_TILE_MODE17                                                        0x2655
#define mmGB_TILE_MODE18                                                        0x2656
#define mmGB_TILE_MODE19                                                        0x2657
#define mmGB_TILE_MODE20                                                        0x2658
#define mmGB_TILE_MODE21                                                        0x2659
#define mmGB_TILE_MODE22                                                        0x265a
#define mmGB_TILE_MODE23                                                        0x265b
#define mmGB_TILE_MODE24                                                        0x265c
#define mmGB_TILE_MODE25                                                        0x265d
#define mmGB_TILE_MODE26                                                        0x265e
#define mmGB_TILE_MODE27                                                        0x265f
#define mmGB_TILE_MODE28                                                        0x2660
#define mmGB_TILE_MODE29                                                        0x2661
#define mmGB_TILE_MODE30                                                        0x2662
#define mmGB_TILE_MODE31                                                        0x2663
#define mmGB_MACROTILE_MODE0                                                    0x2664
#define mmGB_MACROTILE_MODE1                                                    0x2665
#define mmGB_MACROTILE_MODE2                                                    0x2666
#define mmGB_MACROTILE_MODE3                                                    0x2667
#define mmGB_MACROTILE_MODE4                                                    0x2668
#define mmGB_MACROTILE_MODE5                                                    0x2669
#define mmGB_MACROTILE_MODE6                                                    0x266a
#define mmGB_MACROTILE_MODE7                                                    0x266b
#define mmGB_MACROTILE_MODE8                                                    0x266c
#define mmGB_MACROTILE_MODE9                                                    0x266d
#define mmGB_MACROTILE_MODE10                                                   0x266e
#define mmGB_MACROTILE_MODE11                                                   0x266f
#define mmGB_MACROTILE_MODE12                                                   0x2670
#define mmGB_MACROTILE_MODE13                                                   0x2671
#define mmGB_MACROTILE_MODE14                                                   0x2672
#define mmGB_MACROTILE_MODE15                                                   0x2673
#define mmGB_EDC_MODE                                                           0x307e
#define mmCC_GC_EDC_CONFIG                                                      0x3098
#define mmRAS_SIGNATURE_CONTROL                                                 0x3380
#define mmRAS_SIGNATURE_MASK                                                    0x3381
#define mmRAS_SX_SIGNATURE0                                                     0x3382
#define mmRAS_SX_SIGNATURE1                                                     0x3383
#define mmRAS_SX_SIGNATURE2                                                     0x3384
#define mmRAS_SX_SIGNATURE3                                                     0x3385
#define mmRAS_DB_SIGNATURE0                                                     0x338b
#define mmRAS_PA_SIGNATURE0                                                     0x338c
#define mmRAS_VGT_SIGNATURE0                                                    0x338d
#define mmRAS_SQ_SIGNATURE0                                                     0x338e
#define mmRAS_SC_SIGNATURE0                                                     0x338f
#define mmRAS_SC_SIGNATURE1                                                     0x3390
#define mmRAS_SC_SIGNATURE2                                                     0x3391
#define mmRAS_SC_SIGNATURE3                                                     0x3392
#define mmRAS_SC_SIGNATURE4                                                     0x3393
#define mmRAS_SC_SIGNATURE5                                                     0x3394
#define mmRAS_SC_SIGNATURE6                                                     0x3395
#define mmRAS_SC_SIGNATURE7                                                     0x3396
#define mmRAS_IA_SIGNATURE0                                                     0x3397
#define mmRAS_IA_SIGNATURE1                                                     0x3398
#define mmRAS_SPI_SIGNATURE0                                                    0x3399
#define mmRAS_SPI_SIGNATURE1                                                    0x339a
#define mmRAS_TA_SIGNATURE0                                                     0x339b
#define mmRAS_TD_SIGNATURE0                                                     0x339c
#define mmRAS_CB_SIGNATURE0                                                     0x339d
#define mmRAS_BCI_SIGNATURE0                                                    0x339e
#define mmRAS_BCI_SIGNATURE1                                                    0x339f
#define mmGRBM_CAM_INDEX                                                        0x3000
#define mmGRBM_CAM_DATA                                                         0x3001
#define mmGRBM_CNTL                                                             0x2000
#define mmGRBM_SKEW_CNTL                                                        0x2001
#define mmGRBM_PWR_CNTL                                                         0x2003
#define mmGRBM_STATUS                                                           0x2004
#define mmGRBM_STATUS2                                                          0x2002
#define mmGRBM_STATUS_SE0                                                       0x2005
#define mmGRBM_STATUS_SE1                                                       0x2006
#define mmGRBM_STATUS_SE2                                                       0x200e
#define mmGRBM_STATUS_SE3                                                       0x200f
#define mmGRBM_SOFT_RESET                                                       0x2008
#define mmGRBM_DEBUG_CNTL                                                       0x2009
#define mmGRBM_DEBUG_DATA                                                       0x200a
#define mmGRBM_GFX_INDEX                                                        0xc200
#define mmGRBM_GFX_CLKEN_CNTL                                                   0x200c
#define mmGRBM_WAIT_IDLE_CLOCKS                                                 0x200d
#define mmGRBM_DEBUG                                                            0x2014
#define mmGRBM_DEBUG_SNAPSHOT                                                   0x2015
#define mmGRBM_READ_ERROR                                                       0x2016
#define mmGRBM_READ_ERROR2                                                      0x2017
#define mmGRBM_INT_CNTL                                                         0x2018
#define mmGRBM_PERFCOUNTER0_SELECT                                              0xd840
#define mmGRBM_PERFCOUNTER1_SELECT                                              0xd841
#define mmGRBM_SE0_PERFCOUNTER_SELECT                                           0xd842
#define mmGRBM_SE1_PERFCOUNTER_SELECT                                           0xd843
#define mmGRBM_SE2_PERFCOUNTER_SELECT                                           0xd844
#define mmGRBM_SE3_PERFCOUNTER_SELECT                                           0xd845
#define mmGRBM_PERFCOUNTER0_LO                                                  0xd040
#define mmGRBM_PERFCOUNTER0_HI                                                  0xd041
#define mmGRBM_PERFCOUNTER1_LO                                                  0xd043
#define mmGRBM_PERFCOUNTER1_HI                                                  0xd044
#define mmGRBM_SE0_PERFCOUNTER_LO                                               0xd045
#define mmGRBM_SE0_PERFCOUNTER_HI                                               0xd046
#define mmGRBM_SE1_PERFCOUNTER_LO                                               0xd047
#define mmGRBM_SE1_PERFCOUNTER_HI                                               0xd048
#define mmGRBM_SE2_PERFCOUNTER_LO                                               0xd049
#define mmGRBM_SE2_PERFCOUNTER_HI                                               0xd04a
#define mmGRBM_SE3_PERFCOUNTER_LO                                               0xd04b
#define mmGRBM_SE3_PERFCOUNTER_HI                                               0xd04c
#define mmGRBM_SCRATCH_REG0                                                     0x2040
#define mmGRBM_SCRATCH_REG1                                                     0x2041
#define mmGRBM_SCRATCH_REG2                                                     0x2042
#define mmGRBM_SCRATCH_REG3                                                     0x2043
#define mmGRBM_SCRATCH_REG4                                                     0x2044
#define mmGRBM_SCRATCH_REG5                                                     0x2045
#define mmGRBM_SCRATCH_REG6                                                     0x2046
#define mmGRBM_SCRATCH_REG7                                                     0x2047
#define mmDEBUG_INDEX                                                           0x203c
#define mmDEBUG_DATA                                                            0x203d
#define mmGRBM_NOWHERE                                                          0x203f
#define mmPA_CL_VPORT_XSCALE                                                    0xa10f
#define mmPA_CL_VPORT_XOFFSET                                                   0xa110
#define mmPA_CL_VPORT_YSCALE                                                    0xa111
#define mmPA_CL_VPORT_YOFFSET                                                   0xa112
#define mmPA_CL_VPORT_ZSCALE                                                    0xa113
#define mmPA_CL_VPORT_ZOFFSET                                                   0xa114
#define mmPA_CL_VPORT_XSCALE_1                                                  0xa115
#define mmPA_CL_VPORT_XSCALE_2                                                  0xa11b
#define mmPA_CL_VPORT_XSCALE_3                                                  0xa121
#define mmPA_CL_VPORT_XSCALE_4                                                  0xa127
#define mmPA_CL_VPORT_XSCALE_5                                                  0xa12d
#define mmPA_CL_VPORT_XSCALE_6                                                  0xa133
#define mmPA_CL_VPORT_XSCALE_7                                                  0xa139
#define mmPA_CL_VPORT_XSCALE_8                                                  0xa13f
#define mmPA_CL_VPORT_XSCALE_9                                                  0xa145
#define mmPA_CL_VPORT_XSCALE_10                                                 0xa14b
#define mmPA_CL_VPORT_XSCALE_11                                                 0xa151
#define mmPA_CL_VPORT_XSCALE_12                                                 0xa157
#define mmPA_CL_VPORT_XSCALE_13                                                 0xa15d
#define mmPA_CL_VPORT_XSCALE_14                                                 0xa163
#define mmPA_CL_VPORT_XSCALE_15                                                 0xa169
#define mmPA_CL_VPORT_XOFFSET_1                                                 0xa116
#define mmPA_CL_VPORT_XOFFSET_2                                                 0xa11c
#define mmPA_CL_VPORT_XOFFSET_3                                                 0xa122
#define mmPA_CL_VPORT_XOFFSET_4                                                 0xa128
#define mmPA_CL_VPORT_XOFFSET_5                                                 0xa12e
#define mmPA_CL_VPORT_XOFFSET_6                                                 0xa134
#define mmPA_CL_VPORT_XOFFSET_7                                                 0xa13a
#define mmPA_CL_VPORT_XOFFSET_8                                                 0xa140
#define mmPA_CL_VPORT_XOFFSET_9                                                 0xa146
#define mmPA_CL_VPORT_XOFFSET_10                                                0xa14c
#define mmPA_CL_VPORT_XOFFSET_11                                                0xa152
#define mmPA_CL_VPORT_XOFFSET_12                                                0xa158
#define mmPA_CL_VPORT_XOFFSET_13                                                0xa15e
#define mmPA_CL_VPORT_XOFFSET_14                                                0xa164
#define mmPA_CL_VPORT_XOFFSET_15                                                0xa16a
#define mmPA_CL_VPORT_YSCALE_1                                                  0xa117
#define mmPA_CL_VPORT_YSCALE_2                                                  0xa11d
#define mmPA_CL_VPORT_YSCALE_3                                                  0xa123
#define mmPA_CL_VPORT_YSCALE_4                                                  0xa129
#define mmPA_CL_VPORT_YSCALE_5                                                  0xa12f
#define mmPA_CL_VPORT_YSCALE_6                                                  0xa135
#define mmPA_CL_VPORT_YSCALE_7                                                  0xa13b
#define mmPA_CL_VPORT_YSCALE_8                                                  0xa141
#define mmPA_CL_VPORT_YSCALE_9                                                  0xa147
#define mmPA_CL_VPORT_YSCALE_10                                                 0xa14d
#define mmPA_CL_VPORT_YSCALE_11                                                 0xa153
#define mmPA_CL_VPORT_YSCALE_12                                                 0xa159
#define mmPA_CL_VPORT_YSCALE_13                                                 0xa15f
#define mmPA_CL_VPORT_YSCALE_14                                                 0xa165
#define mmPA_CL_VPORT_YSCALE_15                                                 0xa16b
#define mmPA_CL_VPORT_YOFFSET_1                                                 0xa118
#define mmPA_CL_VPORT_YOFFSET_2                                                 0xa11e
#define mmPA_CL_VPORT_YOFFSET_3                                                 0xa124
#define mmPA_CL_VPORT_YOFFSET_4                                                 0xa12a
#define mmPA_CL_VPORT_YOFFSET_5                                                 0xa130
#define mmPA_CL_VPORT_YOFFSET_6                                                 0xa136
#define mmPA_CL_VPORT_YOFFSET_7                                                 0xa13c
#define mmPA_CL_VPORT_YOFFSET_8                                                 0xa142
#define mmPA_CL_VPORT_YOFFSET_9                                                 0xa148
#define mmPA_CL_VPORT_YOFFSET_10                                                0xa14e
#define mmPA_CL_VPORT_YOFFSET_11                                                0xa154
#define mmPA_CL_VPORT_YOFFSET_12                                                0xa15a
#define mmPA_CL_VPORT_YOFFSET_13                                                0xa160
#define mmPA_CL_VPORT_YOFFSET_14                                                0xa166
#define mmPA_CL_VPORT_YOFFSET_15                                                0xa16c
#define mmPA_CL_VPORT_ZSCALE_1                                                  0xa119
#define mmPA_CL_VPORT_ZSCALE_2                                                  0xa11f
#define mmPA_CL_VPORT_ZSCALE_3                                                  0xa125
#define mmPA_CL_VPORT_ZSCALE_4                                                  0xa12b
#define mmPA_CL_VPORT_ZSCALE_5                                                  0xa131
#define mmPA_CL_VPORT_ZSCALE_6                                                  0xa137
#define mmPA_CL_VPORT_ZSCALE_7                                                  0xa13d
#define mmPA_CL_VPORT_ZSCALE_8                                                  0xa143
#define mmPA_CL_VPORT_ZSCALE_9                                                  0xa149
#define mmPA_CL_VPORT_ZSCALE_10                                                 0xa14f
#define mmPA_CL_VPORT_ZSCALE_11                                                 0xa155
#define mmPA_CL_VPORT_ZSCALE_12                                                 0xa15b
#define mmPA_CL_VPORT_ZSCALE_13                                                 0xa161
#define mmPA_CL_VPORT_ZSCALE_14                                                 0xa167
#define mmPA_CL_VPORT_ZSCALE_15                                                 0xa16d
#define mmPA_CL_VPORT_ZOFFSET_1                                                 0xa11a
#define mmPA_CL_VPORT_ZOFFSET_2                                                 0xa120
#define mmPA_CL_VPORT_ZOFFSET_3                                                 0xa126
#define mmPA_CL_VPORT_ZOFFSET_4                                                 0xa12c
#define mmPA_CL_VPORT_ZOFFSET_5                                                 0xa132
#define mmPA_CL_VPORT_ZOFFSET_6                                                 0xa138
#define mmPA_CL_VPORT_ZOFFSET_7                                                 0xa13e
#define mmPA_CL_VPORT_ZOFFSET_8                                                 0xa144
#define mmPA_CL_VPORT_ZOFFSET_9                                                 0xa14a
#define mmPA_CL_VPORT_ZOFFSET_10                                                0xa150
#define mmPA_CL_VPORT_ZOFFSET_11                                                0xa156
#define mmPA_CL_VPORT_ZOFFSET_12                                                0xa15c
#define mmPA_CL_VPORT_ZOFFSET_13                                                0xa162
#define mmPA_CL_VPORT_ZOFFSET_14                                                0xa168
#define mmPA_CL_VPORT_ZOFFSET_15                                                0xa16e
#define mmPA_CL_VTE_CNTL                                                        0xa206
#define mmPA_CL_VS_OUT_CNTL                                                     0xa207
#define mmPA_CL_NANINF_CNTL                                                     0xa208
#define mmPA_CL_CLIP_CNTL                                                       0xa204
#define mmPA_CL_GB_VERT_CLIP_ADJ                                                0xa2fa
#define mmPA_CL_GB_VERT_DISC_ADJ                                                0xa2fb
#define mmPA_CL_GB_HORZ_CLIP_ADJ                                                0xa2fc
#define mmPA_CL_GB_HORZ_DISC_ADJ                                                0xa2fd
#define mmPA_CL_UCP_0_X                                                         0xa16f
#define mmPA_CL_UCP_0_Y                                                         0xa170
#define mmPA_CL_UCP_0_Z                                                         0xa171
#define mmPA_CL_UCP_0_W                                                         0xa172
#define mmPA_CL_UCP_1_X                                                         0xa173
#define mmPA_CL_UCP_1_Y                                                         0xa174
#define mmPA_CL_UCP_1_Z                                                         0xa175
#define mmPA_CL_UCP_1_W                                                         0xa176
#define mmPA_CL_UCP_2_X                                                         0xa177
#define mmPA_CL_UCP_2_Y                                                         0xa178
#define mmPA_CL_UCP_2_Z                                                         0xa179
#define mmPA_CL_UCP_2_W                                                         0xa17a
#define mmPA_CL_UCP_3_X                                                         0xa17b
#define mmPA_CL_UCP_3_Y                                                         0xa17c
#define mmPA_CL_UCP_3_Z                                                         0xa17d
#define mmPA_CL_UCP_3_W                                                         0xa17e
#define mmPA_CL_UCP_4_X                                                         0xa17f
#define mmPA_CL_UCP_4_Y                                                         0xa180
#define mmPA_CL_UCP_4_Z                                                         0xa181
#define mmPA_CL_UCP_4_W                                                         0xa182
#define mmPA_CL_UCP_5_X                                                         0xa183
#define mmPA_CL_UCP_5_Y                                                         0xa184
#define mmPA_CL_UCP_5_Z                                                         0xa185
#define mmPA_CL_UCP_5_W                                                         0xa186
#define mmPA_CL_POINT_X_RAD                                                     0xa1f5
#define mmPA_CL_POINT_Y_RAD                                                     0xa1f6
#define mmPA_CL_POINT_SIZE                                                      0xa1f7
#define mmPA_CL_POINT_CULL_RAD                                                  0xa1f8
#define mmPA_CL_ENHANCE                                                         0x2285
#define mmPA_CL_RESET_DEBUG                                                     0x2286
#define mmPA_SU_VTX_CNTL                                                        0xa2f9
#define mmPA_SU_POINT_SIZE                                                      0xa280
#define mmPA_SU_POINT_MINMAX                                                    0xa281
#define mmPA_SU_LINE_CNTL                                                       0xa282
#define mmPA_SU_LINE_STIPPLE_CNTL                                               0xa209
#define mmPA_SU_LINE_STIPPLE_SCALE                                              0xa20a
#define mmPA_SU_PRIM_FILTER_CNTL                                                0xa20b
#define mmPA_SU_SC_MODE_CNTL                                                    0xa205
#define mmPA_SU_POLY_OFFSET_DB_FMT_CNTL                                         0xa2de
#define mmPA_SU_POLY_OFFSET_CLAMP                                               0xa2df
#define mmPA_SU_POLY_OFFSET_FRONT_SCALE                                         0xa2e0
#define mmPA_SU_POLY_OFFSET_FRONT_OFFSET                                        0xa2e1
#define mmPA_SU_POLY_OFFSET_BACK_SCALE                                          0xa2e2
#define mmPA_SU_POLY_OFFSET_BACK_OFFSET                                         0xa2e3
#define mmPA_SU_HARDWARE_SCREEN_OFFSET                                          0xa08d
#define mmPA_SU_LINE_STIPPLE_VALUE                                              0xc280
#define mmPA_SU_PERFCOUNTER0_SELECT                                             0xd900
#define mmPA_SU_PERFCOUNTER0_SELECT1                                            0xd901
#define mmPA_SU_PERFCOUNTER1_SELECT                                             0xd902
#define mmPA_SU_PERFCOUNTER1_SELECT1                                            0xd903
#define mmPA_SU_PERFCOUNTER2_SELECT                                             0xd904
#define mmPA_SU_PERFCOUNTER3_SELECT                                             0xd905
#define mmPA_SU_PERFCOUNTER0_LO                                                 0xd100
#define mmPA_SU_PERFCOUNTER0_HI                                                 0xd101
#define mmPA_SU_PERFCOUNTER1_LO                                                 0xd102
#define mmPA_SU_PERFCOUNTER1_HI                                                 0xd103
#define mmPA_SU_PERFCOUNTER2_LO                                                 0xd104
#define mmPA_SU_PERFCOUNTER2_HI                                                 0xd105
#define mmPA_SU_PERFCOUNTER3_LO                                                 0xd106
#define mmPA_SU_PERFCOUNTER3_HI                                                 0xd107
#define mmPA_SC_AA_CONFIG                                                       0xa2f8
#define mmPA_SC_AA_MASK_X0Y0_X1Y0                                               0xa30e
#define mmPA_SC_AA_MASK_X0Y1_X1Y1                                               0xa30f
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0                                     0xa2fe
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1                                     0xa2ff
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2                                     0xa300
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3                                     0xa301
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0                                     0xa302
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1                                     0xa303
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2                                     0xa304
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3                                     0xa305
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0                                     0xa306
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1                                     0xa307
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2                                     0xa308
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3                                     0xa309
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0                                     0xa30a
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1                                     0xa30b
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2                                     0xa30c
#define mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3                                     0xa30d
#define mmPA_SC_CENTROID_PRIORITY_0                                             0xa2f5
#define mmPA_SC_CENTROID_PRIORITY_1                                             0xa2f6
#define mmPA_SC_CLIPRECT_0_TL                                                   0xa084
#define mmPA_SC_CLIPRECT_0_BR                                                   0xa085
#define mmPA_SC_CLIPRECT_1_TL                                                   0xa086
#define mmPA_SC_CLIPRECT_1_BR                                                   0xa087
#define mmPA_SC_CLIPRECT_2_TL                                                   0xa088
#define mmPA_SC_CLIPRECT_2_BR                                                   0xa089
#define mmPA_SC_CLIPRECT_3_TL                                                   0xa08a
#define mmPA_SC_CLIPRECT_3_BR                                                   0xa08b
#define mmPA_SC_CLIPRECT_RULE                                                   0xa083
#define mmPA_SC_EDGERULE                                                        0xa08c
#define mmPA_SC_LINE_CNTL                                                       0xa2f7
#define mmPA_SC_LINE_STIPPLE                                                    0xa283
#define mmPA_SC_MODE_CNTL_0                                                     0xa292
#define mmPA_SC_MODE_CNTL_1                                                     0xa293
#define mmPA_SC_RASTER_CONFIG                                                   0xa0d4
#define mmPA_SC_RASTER_CONFIG_1                                                 0xa0d5
#define mmPA_SC_SCREEN_EXTENT_CONTROL                                           0xa0d6
#define mmPA_SC_GENERIC_SCISSOR_TL                                              0xa090
#define mmPA_SC_GENERIC_SCISSOR_BR                                              0xa091
#define mmPA_SC_SCREEN_SCISSOR_TL                                               0xa00c
#define mmPA_SC_SCREEN_SCISSOR_BR                                               0xa00d
#define mmPA_SC_WINDOW_OFFSET                                                   0xa080
#define mmPA_SC_WINDOW_SCISSOR_TL                                               0xa081
#define mmPA_SC_WINDOW_SCISSOR_BR                                               0xa082
#define mmPA_SC_VPORT_SCISSOR_0_TL                                              0xa094
#define mmPA_SC_VPORT_SCISSOR_1_TL                                              0xa096
#define mmPA_SC_VPORT_SCISSOR_2_TL                                              0xa098
#define mmPA_SC_VPORT_SCISSOR_3_TL                                              0xa09a
#define mmPA_SC_VPORT_SCISSOR_4_TL                                              0xa09c
#define mmPA_SC_VPORT_SCISSOR_5_TL                                              0xa09e
#define mmPA_SC_VPORT_SCISSOR_6_TL                                              0xa0a0
#define mmPA_SC_VPORT_SCISSOR_7_TL                                              0xa0a2
#define mmPA_SC_VPORT_SCISSOR_8_TL                                              0xa0a4
#define mmPA_SC_VPORT_SCISSOR_9_TL                                              0xa0a6
#define mmPA_SC_VPORT_SCISSOR_10_TL                                             0xa0a8
#define mmPA_SC_VPORT_SCISSOR_11_TL                                             0xa0aa
#define mmPA_SC_VPORT_SCISSOR_12_TL                                             0xa0ac
#define mmPA_SC_VPORT_SCISSOR_13_TL                                             0xa0ae
#define mmPA_SC_VPORT_SCISSOR_14_TL                                             0xa0b0
#define mmPA_SC_VPORT_SCISSOR_15_TL                                             0xa0b2
#define mmPA_SC_VPORT_SCISSOR_0_BR                                              0xa095
#define mmPA_SC_VPORT_SCISSOR_1_BR                                              0xa097
#define mmPA_SC_VPORT_SCISSOR_2_BR                                              0xa099
#define mmPA_SC_VPORT_SCISSOR_3_BR                                              0xa09b
#define mmPA_SC_VPORT_SCISSOR_4_BR                                              0xa09d
#define mmPA_SC_VPORT_SCISSOR_5_BR                                              0xa09f
#define mmPA_SC_VPORT_SCISSOR_6_BR                                              0xa0a1
#define mmPA_SC_VPORT_SCISSOR_7_BR                                              0xa0a3
#define mmPA_SC_VPORT_SCISSOR_8_BR                                              0xa0a5
#define mmPA_SC_VPORT_SCISSOR_9_BR                                              0xa0a7
#define mmPA_SC_VPORT_SCISSOR_10_BR                                             0xa0a9
#define mmPA_SC_VPORT_SCISSOR_11_BR                                             0xa0ab
#define mmPA_SC_VPORT_SCISSOR_12_BR                                             0xa0ad
#define mmPA_SC_VPORT_SCISSOR_13_BR                                             0xa0af
#define mmPA_SC_VPORT_SCISSOR_14_BR                                             0xa0b1
#define mmPA_SC_VPORT_SCISSOR_15_BR                                             0xa0b3
#define mmPA_SC_VPORT_ZMIN_0                                                    0xa0b4
#define mmPA_SC_VPORT_ZMIN_1                                                    0xa0b6
#define mmPA_SC_VPORT_ZMIN_2                                                    0xa0b8
#define mmPA_SC_VPORT_ZMIN_3                                                    0xa0ba
#define mmPA_SC_VPORT_ZMIN_4                                                    0xa0bc
#define mmPA_SC_VPORT_ZMIN_5                                                    0xa0be
#define mmPA_SC_VPORT_ZMIN_6                                                    0xa0c0
#define mmPA_SC_VPORT_ZMIN_7                                                    0xa0c2
#define mmPA_SC_VPORT_ZMIN_8                                                    0xa0c4
#define mmPA_SC_VPORT_ZMIN_9                                                    0xa0c6
#define mmPA_SC_VPORT_ZMIN_10                                                   0xa0c8
#define mmPA_SC_VPORT_ZMIN_11                                                   0xa0ca
#define mmPA_SC_VPORT_ZMIN_12                                                   0xa0cc
#define mmPA_SC_VPORT_ZMIN_13                                                   0xa0ce
#define mmPA_SC_VPORT_ZMIN_14                                                   0xa0d0
#define mmPA_SC_VPORT_ZMIN_15                                                   0xa0d2
#define mmPA_SC_VPORT_ZMAX_0                                                    0xa0b5
#define mmPA_SC_VPORT_ZMAX_1                                                    0xa0b7
#define mmPA_SC_VPORT_ZMAX_2                                                    0xa0b9
#define mmPA_SC_VPORT_ZMAX_3                                                    0xa0bb
#define mmPA_SC_VPORT_ZMAX_4                                                    0xa0bd
#define mmPA_SC_VPORT_ZMAX_5                                                    0xa0bf
#define mmPA_SC_VPORT_ZMAX_6                                                    0xa0c1
#define mmPA_SC_VPORT_ZMAX_7                                                    0xa0c3
#define mmPA_SC_VPORT_ZMAX_8                                                    0xa0c5
#define mmPA_SC_VPORT_ZMAX_9                                                    0xa0c7
#define mmPA_SC_VPORT_ZMAX_10                                                   0xa0c9
#define mmPA_SC_VPORT_ZMAX_11                                                   0xa0cb
#define mmPA_SC_VPORT_ZMAX_12                                                   0xa0cd
#define mmPA_SC_VPORT_ZMAX_13                                                   0xa0cf
#define mmPA_SC_VPORT_ZMAX_14                                                   0xa0d1
#define mmPA_SC_VPORT_ZMAX_15                                                   0xa0d3
#define mmPA_SC_ENHANCE                                                         0x22fc
#define mmPA_SC_FIFO_SIZE                                                       0x22f3
#define mmPA_SC_IF_FIFO_SIZE                                                    0x22f5
#define mmPA_SC_FORCE_EOV_MAX_CNTS                                              0x22c9
#define mmPA_SC_LINE_STIPPLE_STATE                                              0xc281
#define mmPA_SC_SCREEN_EXTENT_MIN_0                                             0xc284
#define mmPA_SC_SCREEN_EXTENT_MAX_0                                             0xc285
#define mmPA_SC_SCREEN_EXTENT_MIN_1                                             0xc286
#define mmPA_SC_SCREEN_EXTENT_MAX_1                                             0xc28b
#define mmPA_SC_PERFCOUNTER0_SELECT                                             0xd940
#define mmPA_SC_PERFCOUNTER0_SELECT1                                            0xd941
#define mmPA_SC_PERFCOUNTER1_SELECT                                             0xd942
#define mmPA_SC_PERFCOUNTER2_SELECT                                             0xd943
#define mmPA_SC_PERFCOUNTER3_SELECT                                             0xd944
#define mmPA_SC_PERFCOUNTER4_SELECT                                             0xd945
#define mmPA_SC_PERFCOUNTER5_SELECT                                             0xd946
#define mmPA_SC_PERFCOUNTER6_SELECT                                             0xd947
#define mmPA_SC_PERFCOUNTER7_SELECT                                             0xd948
#define mmPA_SC_PERFCOUNTER0_LO                                                 0xd140
#define mmPA_SC_PERFCOUNTER0_HI                                                 0xd141
#define mmPA_SC_PERFCOUNTER1_LO                                                 0xd142
#define mmPA_SC_PERFCOUNTER1_HI                                                 0xd143
#define mmPA_SC_PERFCOUNTER2_LO                                                 0xd144
#define mmPA_SC_PERFCOUNTER2_HI                                                 0xd145
#define mmPA_SC_PERFCOUNTER3_LO                                                 0xd146
#define mmPA_SC_PERFCOUNTER3_HI                                                 0xd147
#define mmPA_SC_PERFCOUNTER4_LO                                                 0xd148
#define mmPA_SC_PERFCOUNTER4_HI                                                 0xd149
#define mmPA_SC_PERFCOUNTER5_LO                                                 0xd14a
#define mmPA_SC_PERFCOUNTER5_HI                                                 0xd14b
#define mmPA_SC_PERFCOUNTER6_LO                                                 0xd14c
#define mmPA_SC_PERFCOUNTER6_HI                                                 0xd14d
#define mmPA_SC_PERFCOUNTER7_LO                                                 0xd14e
#define mmPA_SC_PERFCOUNTER7_HI                                                 0xd14f
#define mmPA_SC_P3D_TRAP_SCREEN_HV_EN                                           0xc2a0
#define mmPA_SC_P3D_TRAP_SCREEN_H                                               0xc2a1
#define mmPA_SC_P3D_TRAP_SCREEN_V                                               0xc2a2
#define mmPA_SC_P3D_TRAP_SCREEN_OCCURRENCE                                      0xc2a3
#define mmPA_SC_P3D_TRAP_SCREEN_COUNT                                           0xc2a4
#define mmPA_SC_HP3D_TRAP_SCREEN_HV_EN                                          0xc2a8
#define mmPA_SC_HP3D_TRAP_SCREEN_H                                              0xc2a9
#define mmPA_SC_HP3D_TRAP_SCREEN_V                                              0xc2aa
#define mmPA_SC_HP3D_TRAP_SCREEN_OCCURRENCE                                     0xc2ab
#define mmPA_SC_HP3D_TRAP_SCREEN_COUNT                                          0xc2ac
#define mmPA_SC_TRAP_SCREEN_HV_EN                                               0xc2b0
#define mmPA_SC_TRAP_SCREEN_H                                                   0xc2b1
#define mmPA_SC_TRAP_SCREEN_V                                                   0xc2b2
#define mmPA_SC_TRAP_SCREEN_OCCURRENCE                                          0xc2b3
#define mmPA_SC_TRAP_SCREEN_COUNT                                               0xc2b4
#define mmPA_SC_P3D_TRAP_SCREEN_HV_LOCK                                         0x22c0
#define mmPA_SC_HP3D_TRAP_SCREEN_HV_LOCK                                        0x22c1
#define mmPA_SC_TRAP_SCREEN_HV_LOCK                                             0x22c2
#define mmPA_CL_CNTL_STATUS                                                     0x2284
#define mmPA_SU_CNTL_STATUS                                                     0x2294
#define mmPA_SC_FIFO_DEPTH_CNTL                                                 0x2295
#define mmCGTT_PA_CLK_CTRL                                                      0xf088
#define mmCGTT_SC_CLK_CTRL                                                      0xf089
#define mmPA_SU_DEBUG_CNTL                                                      0x2280
#define mmPA_SU_DEBUG_DATA                                                      0x2281
#define mmPA_SC_DEBUG_CNTL                                                      0x22f6
#define mmPA_SC_DEBUG_DATA                                                      0x22f7
#define ixCLIPPER_DEBUG_REG00                                                   0x0
#define ixCLIPPER_DEBUG_REG01                                                   0x1
#define ixCLIPPER_DEBUG_REG02                                                   0x2
#define ixCLIPPER_DEBUG_REG03                                                   0x3
#define ixCLIPPER_DEBUG_REG04                                                   0x4
#define ixCLIPPER_DEBUG_REG05                                                   0x5
#define ixCLIPPER_DEBUG_REG06                                                   0x6
#define ixCLIPPER_DEBUG_REG07                                                   0x7
#define ixCLIPPER_DEBUG_REG08                                                   0x8
#define ixCLIPPER_DEBUG_REG09                                                   0x9
#define ixCLIPPER_DEBUG_REG10                                                   0xa
#define ixCLIPPER_DEBUG_REG11                                                   0xb
#define ixCLIPPER_DEBUG_REG12                                                   0xc
#define ixCLIPPER_DEBUG_REG13                                                   0xd
#define ixCLIPPER_DEBUG_REG14                                                   0xe
#define ixCLIPPER_DEBUG_REG15                                                   0xf
#define ixCLIPPER_DEBUG_REG16                                                   0x10
#define ixCLIPPER_DEBUG_REG17                                                   0x11
#define ixCLIPPER_DEBUG_REG18                                                   0x12
#define ixCLIPPER_DEBUG_REG19                                                   0x13
#define ixSXIFCCG_DEBUG_REG0                                                    0x14
#define ixSXIFCCG_DEBUG_REG1                                                    0x15
#define ixSXIFCCG_DEBUG_REG2                                                    0x16
#define ixSXIFCCG_DEBUG_REG3                                                    0x17
#define ixSETUP_DEBUG_REG0                                                      0x18
#define ixSETUP_DEBUG_REG1                                                      0x19
#define ixSETUP_DEBUG_REG2                                                      0x1a
#define ixSETUP_DEBUG_REG3                                                      0x1b
#define ixSETUP_DEBUG_REG4                                                      0x1c
#define ixSETUP_DEBUG_REG5                                                      0x1d
#define ixPA_SC_DEBUG_REG0                                                      0x0
#define ixPA_SC_DEBUG_REG1                                                      0x1
#define mmCOMPUTE_DISPATCH_INITIATOR                                            0x2e00
#define mmCOMPUTE_DIM_X                                                         0x2e01
#define mmCOMPUTE_DIM_Y                                                         0x2e02
#define mmCOMPUTE_DIM_Z                                                         0x2e03
#define mmCOMPUTE_START_X                                                       0x2e04
#define mmCOMPUTE_START_Y                                                       0x2e05
#define mmCOMPUTE_START_Z                                                       0x2e06
#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
#define mmCOMPUTE_NUM_THREAD_Y                                                  0x2e08
#define mmCOMPUTE_NUM_THREAD_Z                                                  0x2e09
#define mmCOMPUTE_PIPELINESTAT_ENABLE                                           0x2e0a
#define mmCOMPUTE_PERFCOUNT_ENABLE                                              0x2e0b
#define mmCOMPUTE_PGM_LO                                                        0x2e0c
#define mmCOMPUTE_PGM_HI                                                        0x2e0d
#define mmCOMPUTE_TBA_LO                                                        0x2e0e
#define mmCOMPUTE_TBA_HI                                                        0x2e0f
#define mmCOMPUTE_TMA_LO                                                        0x2e10
#define mmCOMPUTE_TMA_HI                                                        0x2e11
#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
#define mmCOMPUTE_PGM_RSRC2                                                     0x2e13
#define mmCOMPUTE_VMID                                                          0x2e14
#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
#define mmCOMPUTE_STATIC_THREAD_MGMT_SE0                                        0x2e16
#define mmCOMPUTE_STATIC_THREAD_MGMT_SE1                                        0x2e17
#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
#define mmCOMPUTE_STATIC_THREAD_MGMT_SE2                                        0x2e19
#define mmCOMPUTE_STATIC_THREAD_MGMT_SE3                                        0x2e1a
#define mmCOMPUTE_RESTART_X                                                     0x2e1b
#define mmCOMPUTE_RESTART_Y                                                     0x2e1c
#define mmCOMPUTE_RESTART_Z                                                     0x2e1d
#define mmCOMPUTE_THREAD_TRACE_ENABLE                                           0x2e1e
#define mmCOMPUTE_MISC_RESERVED                                                 0x2e1f
#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
#define mmCOMPUTE_USER_DATA_2                                                   0x2e42
#define mmCOMPUTE_USER_DATA_3                                                   0x2e43
#define mmCOMPUTE_USER_DATA_4                                                   0x2e44
#define mmCOMPUTE_USER_DATA_5                                                   0x2e45
#define mmCOMPUTE_USER_DATA_6                                                   0x2e46
#define mmCOMPUTE_USER_DATA_7                                                   0x2e47
#define mmCOMPUTE_USER_DATA_8                                                   0x2e48
#define mmCOMPUTE_USER_DATA_9                                                   0x2e49
#define mmCOMPUTE_USER_DATA_10                                                  0x2e4a
#define mmCOMPUTE_USER_DATA_11                                                  0x2e4b
#define mmCOMPUTE_USER_DATA_12                                                  0x2e4c
#define mmCOMPUTE_USER_DATA_13                                                  0x2e4d
#define mmCOMPUTE_USER_DATA_14                                                  0x2e4e
#define mmCOMPUTE_USER_DATA_15                                                  0x2e4f
#define mmCSPRIV_CONNECT                                                        0x0
#define mmCSPRIV_THREAD_TRACE_TG0                                               0x1e
#define mmCSPRIV_THREAD_TRACE_TG1                                               0x1e
#define mmCSPRIV_THREAD_TRACE_TG2                                               0x1e
#define mmCSPRIV_THREAD_TRACE_TG3                                               0x1e
#define mmCSPRIV_THREAD_TRACE_EVENT                                             0x1f
#define mmRLC_CNTL                                                              0x30c0
#define mmRLC_DEBUG_SELECT                                                      0x30c1
#define mmRLC_DEBUG                                                             0x30c2
#define mmRLC_MC_CNTL                                                           0x30c3
#define mmRLC_STAT                                                              0x30c4
#define mmRLC_SAFE_MODE                                                         0x313a
#define mmRLC_SOFT_RESET_GPU                                                    0x30c5
#define mmRLC_MEM_SLP_CNTL                                                      0x30c6
#define mmRLC_PERFMON_CNTL                                                      0xdcc0
#define mmRLC_PERFCOUNTER0_SELECT                                               0xdcc1
#define mmRLC_PERFCOUNTER1_SELECT                                               0xdcc2
#define mmRLC_PERFCOUNTER0_LO                                                   0xd480
#define mmRLC_PERFCOUNTER1_LO                                                   0xd482
#define mmRLC_PERFCOUNTER0_HI                                                   0xd481
#define mmRLC_PERFCOUNTER1_HI                                                   0xd483
#define mmCGTT_RLC_CLK_CTRL                                                     0xf0b8
#define mmRLC_LB_CNTL                                                           0x30d9
#define mmRLC_LB_CNTR_MAX                                                       0x30d2
#define mmRLC_LB_CNTR_INIT                                                      0x30db
#define mmRLC_LOAD_BALANCE_CNTR                                                 0x30dc
#define mmRLC_SAVE_AND_RESTORE_BASE                                             0x30dd
#define mmRLC_JUMP_TABLE_RESTORE                                                0x30de
#define mmRLC_DRIVER_CPDMA_STATUS                                               0x30de
#define mmRLC_PG_DELAY_2                                                        0x30df
#define mmRLC_GPM_DEBUG_SELECT                                                  0x30e0
#define mmRLC_GPM_DEBUG                                                         0x30e1
#define mmRLC_GPM_UCODE_ADDR                                                    0x30e2
#define mmRLC_GPM_UCODE_DATA                                                    0x30e3
#define mmRLC_GPU_CLOCK_COUNT_LSB                                               0x30e4
#define mmRLC_GPU_CLOCK_COUNT_MSB                                               0x30e5
#define mmRLC_CAPTURE_GPU_CLOCK_COUNT                                           0x30e6
#define mmRLC_UCODE_CNTL                                                        0x30e7
#define mmRLC_GPM_STAT                                                          0x3100
#define mmRLC_GPU_CLOCK_32_RES_SEL                                              0x3101
#define mmRLC_GPU_CLOCK_32                                                      0x3102
#define mmRLC_PG_CNTL                                                           0x3103
#define mmRLC_GPM_THREAD_PRIORITY                                               0x3104
#define mmRLC_GPM_THREAD_ENABLE                                                 0x3105
#define mmRLC_GPM_VMID_THREAD0                                                  0x3106
#define mmRLC_GPM_VMID_THREAD1                                                  0x3107
#define mmRLC_CGTT_MGCG_OVERRIDE                                                0x3108
#define mmRLC_CGCG_CGLS_CTRL                                                    0x3109
#define mmRLC_CGCG_RAMP_CTRL                                                    0x310a
#define mmRLC_DYN_PG_STATUS                                                     0x310b
#define mmRLC_DYN_PG_REQUEST                                                    0x310c
#define mmRLC_PG_DELAY                                                          0x310d
#define mmRLC_CU_STATUS                                                         0x310e
#define mmRLC_LB_INIT_CU_MASK                                                   0x310f
#define mmRLC_LB_ALWAYS_ACTIVE_CU_MASK                                          0x3110
#define mmRLC_LB_PARAMS                                                         0x3111
#define mmRLC_THREAD1_DELAY                                                     0x3112
#define mmRLC_PG_ALWAYS_ON_CU_MASK                                              0x3113
#define mmRLC_MAX_PG_CU                                                         0x3114
#define mmRLC_AUTO_PG_CTRL                                                      0x3115
#define mmRLC_SMU_GRBM_REG_SAVE_CTRL                                            0x3116
#define mmRLC_SMU_PG_CTRL                                                       0x3117
#define mmRLC_SMU_PG_WAKE_UP_CTRL                                               0x3118
#define mmRLC_SERDES_RD_MASTER_INDEX                                            0x3119
#define mmRLC_SERDES_RD_DATA_0                                                  0x311a
#define mmRLC_SERDES_RD_DATA_1                                                  0x311b
#define mmRLC_SERDES_RD_DATA_2                                                  0x311c
#define mmRLC_SERDES_WR_CU_MASTER_MASK                                          0x311d
#define mmRLC_SERDES_WR_NONCU_MASTER_MASK                                       0x311e
#define mmRLC_SERDES_WR_CTRL                                                    0x311f
#define mmRLC_SERDES_WR_DATA                                                    0x3120
#define mmRLC_SERDES_CU_MASTER_BUSY                                             0x3121
#define mmRLC_SERDES_NONCU_MASTER_BUSY                                          0x3122
#define mmRLC_GPM_GENERAL_0                                                     0x3123
#define mmRLC_GPM_GENERAL_1                                                     0x3124
#define mmRLC_GPM_GENERAL_2                                                     0x3125
#define mmRLC_GPM_GENERAL_3                                                     0x3126
#define mmRLC_GPM_GENERAL_4                                                     0x3127
#define mmRLC_GPM_GENERAL_5                                                     0x3128
#define mmRLC_GPM_GENERAL_6                                                     0x3129
#define mmRLC_GPM_GENERAL_7                                                     0x312a
#define mmRLC_GPM_CU_PD_TIMEOUT                                                 0x312b
#define mmRLC_GPM_SCRATCH_ADDR                                                  0x312c
#define mmRLC_GPM_SCRATCH_DATA                                                  0x312d
#define mmRLC_STATIC_PG_STATUS                                                  0x312e
#define mmRLC_GPM_PERF_COUNT_0                                                  0x312f
#define mmRLC_GPM_PERF_COUNT_1                                                  0x3130
#define mmRLC_GPR_REG1                                                          0x3139
#define mmRLC_GPR_REG2                                                          0x313a
#define mmRLC_SPM_VMID                                                          0x3131
#define mmRLC_SPM_INT_CNTL                                                      0x3132
#define mmRLC_SPM_INT_STATUS                                                    0x3133
#define mmRLC_SPM_DEBUG_SELECT                                                  0x3134
#define mmRLC_SPM_DEBUG                                                         0x3135
#define mmRLC_GPM_LOG_ADDR                                                      0x3136
#define mmRLC_GPM_LOG_SIZE                                                      0x3137
#define mmRLC_GPM_LOG_CONT                                                      0x3138
#define mmRLC_SPM_PERFMON_CNTL                                                  0xdc80
#define mmRLC_SPM_PERFMON_RING_BASE_LO                                          0xdc81
#define mmRLC_SPM_PERFMON_RING_BASE_HI                                          0xdc82
#define mmRLC_SPM_PERFMON_RING_SIZE                                             0xdc83
#define mmRLC_SPM_PERFMON_SEGMENT_SIZE                                          0xdc84
#define mmRLC_SPM_SE_MUXSEL_ADDR                                                0xdc85
#define mmRLC_SPM_SE_MUXSEL_DATA                                                0xdc86
#define mmRLC_SPM_CPG_PERFMON_SAMPLE_DELAY                                      0xdc87
#define mmRLC_SPM_CPC_PERFMON_SAMPLE_DELAY                                      0xdc88
#define mmRLC_SPM_CPF_PERFMON_SAMPLE_DELAY                                      0xdc89
#define mmRLC_SPM_CB_PERFMON_SAMPLE_DELAY                                       0xdc8a
#define mmRLC_SPM_DB_PERFMON_SAMPLE_DELAY                                       0xdc8b
#define mmRLC_SPM_PA_PERFMON_SAMPLE_DELAY                                       0xdc8c
#define mmRLC_SPM_GDS_PERFMON_SAMPLE_DELAY                                      0xdc8d
#define mmRLC_SPM_IA_PERFMON_SAMPLE_DELAY                                       0xdc8e
#define mmRLC_SPM_SC_PERFMON_SAMPLE_DELAY                                       0xdc90
#define mmRLC_SPM_TCC_PERFMON_SAMPLE_DELAY                                      0xdc91
#define mmRLC_SPM_TCA_PERFMON_SAMPLE_DELAY                                      0xdc92
#define mmRLC_SPM_TCP_PERFMON_SAMPLE_DELAY                                      0xdc93
#define mmRLC_SPM_TA_PERFMON_SAMPLE_DELAY                                       0xdc94
#define mmRLC_SPM_TD_PERFMON_SAMPLE_DELAY                                       0xdc95
#define mmRLC_SPM_VGT_PERFMON_SAMPLE_DELAY                                      0xdc96
#define mmRLC_SPM_SPI_PERFMON_SAMPLE_DELAY                                      0xdc97
#define mmRLC_SPM_SQG_PERFMON_SAMPLE_DELAY                                      0xdc98
#define mmRLC_SPM_TCS_PERFMON_SAMPLE_DELAY                                      0xdc99
#define mmRLC_SPM_SX_PERFMON_SAMPLE_DELAY                                       0xdc9a
#define mmRLC_SPM_GLOBAL_MUXSEL_ADDR                                            0xdc9b
#define mmRLC_SPM_GLOBAL_MUXSEL_DATA                                            0xdc9c
#define mmRLC_SPM_RING_RDPTR                                                    0xdc9d
#define mmRLC_SPM_SEGMENT_THRESHOLD                                             0xdc9e
#define mmRLC_SPM_DBR0_PERFMON_SAMPLE_DELAY                                     0xdc9f
#define mmRLC_SPM_DBR1_PERFMON_SAMPLE_DELAY                                     0xdca0
#define mmRLC_SPM_CBR0_PERFMON_SAMPLE_DELAY                                     0xdca1
#define mmRLC_SPM_CBR1_PERFMON_SAMPLE_DELAY                                     0xdca2
#define mmSPI_PS_INPUT_CNTL_0                                                   0xa191
#define mmSPI_PS_INPUT_CNTL_1                                                   0xa192
#define mmSPI_PS_INPUT_CNTL_2                                                   0xa193
#define mmSPI_PS_INPUT_CNTL_3                                                   0xa194
#define mmSPI_PS_INPUT_CNTL_4                                                   0xa195
#define mmSPI_PS_INPUT_CNTL_5                                                   0xa196
#define mmSPI_PS_INPUT_CNTL_6                                                   0xa197
#define mmSPI_PS_INPUT_CNTL_7                                                   0xa198
#define mmSPI_PS_INPUT_CNTL_8                                                   0xa199
#define mmSPI_PS_INPUT_CNTL_9                                                   0xa19a
#define mmSPI_PS_INPUT_CNTL_10                                                  0xa19b
#define mmSPI_PS_INPUT_CNTL_11                                                  0xa19c
#define mmSPI_PS_INPUT_CNTL_12                                                  0xa19d
#define mmSPI_PS_INPUT_CNTL_13                                                  0xa19e
#define mmSPI_PS_INPUT_CNTL_14                                                  0xa19f
#define mmSPI_PS_INPUT_CNTL_15                                                  0xa1a0
#define mmSPI_PS_INPUT_CNTL_16                                                  0xa1a1
#define mmSPI_PS_INPUT_CNTL_17                                                  0xa1a2
#define mmSPI_PS_INPUT_CNTL_18                                                  0xa1a3
#define mmSPI_PS_INPUT_CNTL_19                                                  0xa1a4
#define mmSPI_PS_INPUT_CNTL_20                                                  0xa1a5
#define mmSPI_PS_INPUT_CNTL_21                                                  0xa1a6
#define mmSPI_PS_INPUT_CNTL_22                                                  0xa1a7
#define mmSPI_PS_INPUT_CNTL_23                                                  0xa1a8
#define mmSPI_PS_INPUT_CNTL_24                                                  0xa1a9
#define mmSPI_PS_INPUT_CNTL_25                                                  0xa1aa
#define mmSPI_PS_INPUT_CNTL_26                                                  0xa1ab
#define mmSPI_PS_INPUT_CNTL_27                                                  0xa1ac
#define mmSPI_PS_INPUT_CNTL_28                                                  0xa1ad
#define mmSPI_PS_INPUT_CNTL_29                                                  0xa1ae
#define mmSPI_PS_INPUT_CNTL_30                                                  0xa1af
#define mmSPI_PS_INPUT_CNTL_31                                                  0xa1b0
#define mmSPI_VS_OUT_CONFIG                                                     0xa1b1
#define mmSPI_PS_INPUT_ENA                                                      0xa1b3
#define mmSPI_PS_INPUT_ADDR                                                     0xa1b4
#define mmSPI_INTERP_CONTROL_0                                                  0xa1b5
#define mmSPI_PS_IN_CONTROL                                                     0xa1b6
#define mmSPI_BARYC_CNTL                                                        0xa1b8
#define mmSPI_TMPRING_SIZE                                                      0xa1ba
#define mmSPI_SHADER_POS_FORMAT                                                 0xa1c3
#define mmSPI_SHADER_Z_FORMAT                                                   0xa1c4
#define mmSPI_SHADER_COL_FORMAT                                                 0xa1c5
#define mmSPI_ARB_PRIORITY                                                      0x31c0
#define mmSPI_ARB_CYCLES_0                                                      0x31c1
#define mmSPI_ARB_CYCLES_1                                                      0x31c2
#define mmSPI_CDBG_SYS_GFX                                                      0x31c3
#define mmSPI_CDBG_SYS_HP3D                                                     0x31c4
#define mmSPI_CDBG_SYS_CS0                                                      0x31c5
#define mmSPI_CDBG_SYS_CS1                                                      0x31c6
#define mmSPI_WCL_PIPE_PERCENT_GFX                                              0x31c7
#define mmSPI_WCL_PIPE_PERCENT_HP3D                                             0x31c8
#define mmSPI_WCL_PIPE_PERCENT_CS0                                              0x31c9
#define mmSPI_WCL_PIPE_PERCENT_CS1                                              0x31ca
#define mmSPI_WCL_PIPE_PERCENT_CS2                                              0x31cb
#define mmSPI_WCL_PIPE_PERCENT_CS3                                              0x31cc
#define mmSPI_WCL_PIPE_PERCENT_CS4                                              0x31cd
#define mmSPI_WCL_PIPE_PERCENT_CS5                                              0x31ce
#define mmSPI_WCL_PIPE_PERCENT_CS6                                              0x31cf
#define mmSPI_WCL_PIPE_PERCENT_CS7                                              0x31d0
#define mmSPI_GDBG_WAVE_CNTL                                                    0x31d1
#define mmSPI_GDBG_TRAP_CONFIG                                                  0x31d2
#define mmSPI_GDBG_TRAP_MASK                                                    0x31d3
#define mmSPI_GDBG_TBA_LO                                                       0x31d4
#define mmSPI_GDBG_TBA_HI                                                       0x31d5
#define mmSPI_GDBG_TMA_LO                                                       0x31d6
#define mmSPI_GDBG_TMA_HI                                                       0x31d7
#define mmSPI_GDBG_TRAP_DATA0                                                   0x31d8
#define mmSPI_GDBG_TRAP_DATA1                                                   0x31d9
#define mmSPI_RESET_DEBUG                                                       0x31da
#define mmSPI_COMPUTE_QUEUE_RESET                                               0x31db
#define mmSPI_RESOURCE_RESERVE_CU_0                                             0x31dc
#define mmSPI_RESOURCE_RESERVE_CU_1                                             0x31dd
#define mmSPI_RESOURCE_RESERVE_CU_2                                             0x31de
#define mmSPI_RESOURCE_RESERVE_CU_3                                             0x31df
#define mmSPI_RESOURCE_RESERVE_CU_4                                             0x31e0
#define mmSPI_RESOURCE_RESERVE_CU_5                                             0x31e1
#define mmSPI_RESOURCE_RESERVE_CU_6                                             0x31e2
#define mmSPI_RESOURCE_RESERVE_CU_7                                             0x31e3
#define mmSPI_RESOURCE_RESERVE_CU_8                                             0x31e4
#define mmSPI_RESOURCE_RESERVE_CU_9                                             0x31e5
#define mmSPI_RESOURCE_RESERVE_CU_10                                            0x31f0
#define mmSPI_RESOURCE_RESERVE_CU_11                                            0x31f1
#define mmSPI_RESOURCE_RESERVE_EN_CU_0                                          0x31e6
#define mmSPI_RESOURCE_RESERVE_EN_CU_1                                          0x31e7
#define mmSPI_RESOURCE_RESERVE_EN_CU_2                                          0x31e8
#define mmSPI_RESOURCE_RESERVE_EN_CU_3                                          0x31e9
#define mmSPI_RESOURCE_RESERVE_EN_CU_4                                          0x31ea
#define mmSPI_RESOURCE_RESERVE_EN_CU_5                                          0x31eb
#define mmSPI_RESOURCE_RESERVE_EN_CU_6                                          0x31ec
#define mmSPI_RESOURCE_RESERVE_EN_CU_7                                          0x31ed
#define mmSPI_RESOURCE_RESERVE_EN_CU_8                                          0x31ee
#define mmSPI_RESOURCE_RESERVE_EN_CU_9                                          0x31ef
#define mmSPI_RESOURCE_RESERVE_EN_CU_10                                         0x31f2
#define mmSPI_RESOURCE_RESERVE_EN_CU_11                                         0x31f3
#define mmSPI_PS_MAX_WAVE_ID                                                    0x243a
#define mmSPI_CONFIG_CNTL                                                       0x2440
#define mmSPI_DEBUG_CNTL                                                        0x2441
#define mmSPI_DEBUG_READ                                                        0x2442
#define mmSPI_PERFCOUNTER0_SELECT                                               0xd980
#define mmSPI_PERFCOUNTER1_SELECT                                               0xd981
#define mmSPI_PERFCOUNTER2_SELECT                                               0xd982
#define mmSPI_PERFCOUNTER3_SELECT                                               0xd983
#define mmSPI_PERFCOUNTER0_SELECT1                                              0xd984
#define mmSPI_PERFCOUNTER1_SELECT1                                              0xd985
#define mmSPI_PERFCOUNTER2_SELECT1                                              0xd986
#define mmSPI_PERFCOUNTER3_SELECT1                                              0xd987
#define mmSPI_PERFCOUNTER4_SELECT                                               0xd988
#define mmSPI_PERFCOUNTER5_SELECT                                               0xd989
#define mmSPI_PERFCOUNTER_BINS                                                  0xd98a
#define mmSPI_PERFCOUNTER0_HI                                                   0xd180
#define mmSPI_PERFCOUNTER0_LO                                                   0xd181
#define mmSPI_PERFCOUNTER1_HI                                                   0xd182
#define mmSPI_PERFCOUNTER1_LO                                                   0xd183
#define mmSPI_PERFCOUNTER2_HI                                                   0xd184
#define mmSPI_PERFCOUNTER2_LO                                                   0xd185
#define mmSPI_PERFCOUNTER3_HI                                                   0xd186
#define mmSPI_PERFCOUNTER3_LO                                                   0xd187
#define mmSPI_PERFCOUNTER4_HI                                                   0xd188
#define mmSPI_PERFCOUNTER4_LO                                                   0xd189
#define mmSPI_PERFCOUNTER5_HI                                                   0xd18a
#define mmSPI_PERFCOUNTER5_LO                                                   0xd18b
#define mmSPI_CONFIG_CNTL_1                                                     0x244f
#define mmSPI_DEBUG_BUSY                                                        0x2450
#define mmCGTS_SM_CTRL_REG                                                      0xf000
#define mmCGTS_RD_CTRL_REG                                                      0xf001
#define mmCGTS_RD_REG                                                           0xf002
#define mmCGTS_TCC_DISABLE                                                      0xf003
#define mmCGTS_USER_TCC_DISABLE                                                 0xf004
#define mmCGTS_CU0_SP0_CTRL_REG                                                 0xf008
#define mmCGTS_CU0_LDS_SQ_CTRL_REG                                              0xf009
#define mmCGTS_CU0_TA_SQC_CTRL_REG                                              0xf00a
#define mmCGTS_CU0_SP1_CTRL_REG                                                 0xf00b
#define mmCGTS_CU0_TD_TCP_CTRL_REG                                              0xf00c
#define mmCGTS_CU1_SP0_CTRL_REG                                                 0xf00d
#define mmCGTS_CU1_LDS_SQ_CTRL_REG                                              0xf00e
#define mmCGTS_CU1_TA_CTRL_REG                                                  0xf00f
#define mmCGTS_CU1_SP1_CTRL_REG                                                 0xf010
#define mmCGTS_CU1_TD_TCP_CTRL_REG                                              0xf011
#define mmCGTS_CU2_SP0_CTRL_REG                                                 0xf012
#define mmCGTS_CU2_LDS_SQ_CTRL_REG                                              0xf013
#define mmCGTS_CU2_TA_CTRL_REG                                                  0xf014
#define mmCGTS_CU2_SP1_CTRL_REG                                                 0xf015
#define mmCGTS_CU2_TD_TCP_CTRL_REG                                              0xf016
#define mmCGTS_CU3_SP0_CTRL_REG                                                 0xf017
#define mmCGTS_CU3_LDS_SQ_CTRL_REG                                              0xf018
#define mmCGTS_CU3_TA_CTRL_REG                                                  0xf019
#define mmCGTS_CU3_SP1_CTRL_REG                                                 0xf01a
#define mmCGTS_CU3_TD_TCP_CTRL_REG                                              0xf01b
#define mmCGTS_CU4_SP0_CTRL_REG                                                 0xf01c
#define mmCGTS_CU4_LDS_SQ_CTRL_REG                                              0xf01d
#define mmCGTS_CU4_TA_SQC_CTRL_REG                                              0xf01e
#define mmCGTS_CU4_SP1_CTRL_REG                                                 0xf01f
#define mmCGTS_CU4_TD_TCP_CTRL_REG                                              0xf020
#define mmCGTS_CU5_SP0_CTRL_REG                                                 0xf021
#define mmCGTS_CU5_LDS_SQ_CTRL_REG                                              0xf022
#define mmCGTS_CU5_TA_CTRL_REG                                                  0xf023
#define mmCGTS_CU5_SP1_CTRL_REG                                                 0xf024
#define mmCGTS_CU5_TD_TCP_CTRL_REG                                              0xf025
#define mmCGTS_CU6_SP0_CTRL_REG                                                 0xf026
#define mmCGTS_CU6_LDS_SQ_CTRL_REG                                              0xf027
#define mmCGTS_CU6_TA_CTRL_REG                                                  0xf028
#define mmCGTS_CU6_SP1_CTRL_REG                                                 0xf029
#define mmCGTS_CU6_TD_TCP_CTRL_REG                                              0xf02a
#define mmCGTS_CU7_SP0_CTRL_REG                                                 0xf02b
#define mmCGTS_CU7_LDS_SQ_CTRL_REG                                              0xf02c
#define mmCGTS_CU7_TA_CTRL_REG                                                  0xf02d
#define mmCGTS_CU7_SP1_CTRL_REG                                                 0xf02e
#define mmCGTS_CU7_TD_TCP_CTRL_REG                                              0xf02f
#define mmCGTS_CU8_SP0_CTRL_REG                                                 0xf030
#define mmCGTS_CU8_LDS_SQ_CTRL_REG                                              0xf031
#define mmCGTS_CU8_TA_SQC_CTRL_REG                                              0xf032
#define mmCGTS_CU8_SP1_CTRL_REG                                                 0xf033
#define mmCGTS_CU8_TD_TCP_CTRL_REG                                              0xf034
#define mmCGTS_CU9_SP0_CTRL_REG                                                 0xf035
#define mmCGTS_CU9_LDS_SQ_CTRL_REG                                              0xf036
#define mmCGTS_CU9_TA_CTRL_REG                                                  0xf037
#define mmCGTS_CU9_SP1_CTRL_REG                                                 0xf038
#define mmCGTS_CU9_TD_TCP_CTRL_REG                                              0xf039
#define mmCGTS_CU10_SP0_CTRL_REG                                                0xf03a
#define mmCGTS_CU10_LDS_SQ_CTRL_REG                                             0xf03b
#define mmCGTS_CU10_TA_CTRL_REG                                                 0xf03c
#define mmCGTS_CU10_SP1_CTRL_REG                                                0xf03d
#define mmCGTS_CU10_TD_TCP_CTRL_REG                                             0xf03e
#define mmCGTS_CU11_SP0_CTRL_REG                                                0xf03f
#define mmCGTS_CU11_LDS_SQ_CTRL_REG                                             0xf040
#define mmCGTS_CU11_TA_CTRL_REG                                                 0xf041
#define mmCGTS_CU11_SP1_CTRL_REG                                                0xf042
#define mmCGTS_CU11_TD_TCP_CTRL_REG                                             0xf043
#define mmCGTS_CU12_SP0_CTRL_REG                                                0xf044
#define mmCGTS_CU12_LDS_SQ_CTRL_REG                                             0xf045
#define mmCGTS_CU12_TA_SQC_CTRL_REG                                             0xf046
#define mmCGTS_CU12_SP1_CTRL_REG                                                0xf047
#define mmCGTS_CU12_TD_TCP_CTRL_REG                                             0xf048
#define mmCGTS_CU13_SP0_CTRL_REG                                                0xf049
#define mmCGTS_CU13_LDS_SQ_CTRL_REG                                             0xf04a
#define mmCGTS_CU13_TA_CTRL_REG                                                 0xf04b
#define mmCGTS_CU13_SP1_CTRL_REG                                                0xf04c
#define mmCGTS_CU13_TD_TCP_CTRL_REG                                             0xf04d
#define mmCGTS_CU14_SP0_CTRL_REG                                                0xf04e
#define mmCGTS_CU14_LDS_SQ_CTRL_REG                                             0xf04f
#define mmCGTS_CU14_TA_CTRL_REG                                                 0xf050
#define mmCGTS_CU14_SP1_CTRL_REG                                                0xf051
#define mmCGTS_CU14_TD_TCP_CTRL_REG                                             0xf052
#define mmCGTS_CU15_SP0_CTRL_REG                                                0xf053
#define mmCGTS_CU15_LDS_SQ_CTRL_REG                                             0xf054
#define mmCGTS_CU15_TA_CTRL_REG                                                 0xf055
#define mmCGTS_CU15_SP1_CTRL_REG                                                0xf056
#define mmCGTS_CU15_TD_TCP_CTRL_REG                                             0xf057
#define mmCGTT_SPI_CLK_CTRL                                                     0xf080
#define mmCGTT_PC_CLK_CTRL                                                      0xf081
#define mmCGTT_BCI_CLK_CTRL                                                     0xf082
#define mmSPI_WF_LIFETIME_CNTL                                                  0x24aa
#define mmSPI_WF_LIFETIME_LIMIT_0                                               0x24ab
#define mmSPI_WF_LIFETIME_LIMIT_1                                               0x24ac
#define mmSPI_WF_LIFETIME_LIMIT_2                                               0x24ad
#define mmSPI_WF_LIFETIME_LIMIT_3                                               0x24ae
#define mmSPI_WF_LIFETIME_LIMIT_4                                               0x24af
#define mmSPI_WF_LIFETIME_LIMIT_5                                               0x24b0
#define mmSPI_WF_LIFETIME_LIMIT_6                                               0x24b1
#define mmSPI_WF_LIFETIME_LIMIT_7                                               0x24b2
#define mmSPI_WF_LIFETIME_LIMIT_8                                               0x24b3
#define mmSPI_WF_LIFETIME_LIMIT_9                                               0x24b4
#define mmSPI_WF_LIFETIME_STATUS_0                                              0x24b5
#define mmSPI_WF_LIFETIME_STATUS_1                                              0x24b6
#define mmSPI_WF_LIFETIME_STATUS_2                                              0x24b7
#define mmSPI_WF_LIFETIME_STATUS_3                                              0x24b8
#define mmSPI_WF_LIFETIME_STATUS_4                                              0x24b9
#define mmSPI_WF_LIFETIME_STATUS_5                                              0x24ba
#define mmSPI_WF_LIFETIME_STATUS_6                                              0x24bb
#define mmSPI_WF_LIFETIME_STATUS_7                                              0x24bc
#define mmSPI_WF_LIFETIME_STATUS_8                                              0x24bd
#define mmSPI_WF_LIFETIME_STATUS_9                                              0x24be
#define mmSPI_WF_LIFETIME_STATUS_10                                             0x24bf
#define mmSPI_WF_LIFETIME_STATUS_11                                             0x24c0
#define mmSPI_WF_LIFETIME_STATUS_12                                             0x24c1
#define mmSPI_WF_LIFETIME_STATUS_13                                             0x24c2
#define mmSPI_WF_LIFETIME_STATUS_14                                             0x24c3
#define mmSPI_WF_LIFETIME_STATUS_15                                             0x24c4
#define mmSPI_WF_LIFETIME_STATUS_16                                             0x24c5
#define mmSPI_WF_LIFETIME_STATUS_17                                             0x24c6
#define mmSPI_WF_LIFETIME_STATUS_18                                             0x24c7
#define mmSPI_WF_LIFETIME_STATUS_19                                             0x24c8
#define mmSPI_WF_LIFETIME_STATUS_20                                             0x24c9
#define mmSPI_WF_LIFETIME_DEBUG                                                 0x24ca
#define mmSPI_SLAVE_DEBUG_BUSY                                                  0x24d3
#define mmSPI_LB_CTR_CTRL                                                       0x24d4
#define mmSPI_LB_CU_MASK                                                        0x24d5
#define mmSPI_LB_DATA_REG                                                       0x24d6
#define mmSPI_PG_ENABLE_STATIC_CU_MASK                                          0x24d7
#define mmSPI_GDS_CREDITS                                                       0x24d8
#define mmSPI_SX_EXPORT_BUFFER_SIZES                                            0x24d9
#define mmSPI_SX_SCOREBOARD_BUFFER_SIZES                                        0x24da
#define mmSPI_CSQ_WF_ACTIVE_STATUS                                              0x24db
#define mmSPI_CSQ_WF_ACTIVE_COUNT_0                                             0x24dc
#define mmSPI_CSQ_WF_ACTIVE_COUNT_1                                             0x24dd
#define mmSPI_CSQ_WF_ACTIVE_COUNT_2                                             0x24de
#define mmSPI_CSQ_WF_ACTIVE_COUNT_3                                             0x24df
#define mmSPI_CSQ_WF_ACTIVE_COUNT_4                                             0x24e0
#define mmSPI_CSQ_WF_ACTIVE_COUNT_5                                             0x24e1
#define mmSPI_CSQ_WF_ACTIVE_COUNT_6                                             0x24e2
#define mmSPI_CSQ_WF_ACTIVE_COUNT_7                                             0x24e3
#define mmBCI_DEBUG_READ                                                        0x24eb
#define mmSPI_P0_TRAP_SCREEN_PSBA_LO                                            0x24ec
#define mmSPI_P0_TRAP_SCREEN_PSBA_HI                                            0x24ed
#define mmSPI_P0_TRAP_SCREEN_PSMA_LO                                            0x24ee
#define mmSPI_P0_TRAP_SCREEN_PSMA_HI                                            0x24ef
#define mmSPI_P0_TRAP_SCREEN_GPR_MIN                                            0x24f0
#define mmSPI_P1_TRAP_SCREEN_PSBA_LO                                            0x24f1
#define mmSPI_P1_TRAP_SCREEN_PSBA_HI                                            0x24f2
#define mmSPI_P1_TRAP_SCREEN_PSMA_LO                                            0x24f3
#define mmSPI_P1_TRAP_SCREEN_PSMA_HI                                            0x24f4
#define mmSPI_P1_TRAP_SCREEN_GPR_MIN                                            0x24f5
#define mmSPI_SHADER_TBA_LO_PS                                                  0x2c00
#define mmSPI_SHADER_TBA_HI_PS                                                  0x2c01
#define mmSPI_SHADER_TMA_LO_PS                                                  0x2c02
#define mmSPI_SHADER_TMA_HI_PS                                                  0x2c03
#define mmSPI_SHADER_PGM_LO_PS                                                  0x2c08
#define mmSPI_SHADER_PGM_HI_PS                                                  0x2c09
#define mmSPI_SHADER_PGM_RSRC1_PS                                               0x2c0a
#define mmSPI_SHADER_PGM_RSRC2_PS                                               0x2c0b
#define mmSPI_SHADER_PGM_RSRC3_PS                                               0x2c07
#define mmSPI_SHADER_USER_DATA_PS_0                                             0x2c0c
#define mmSPI_SHADER_USER_DATA_PS_1                                             0x2c0d
#define mmSPI_SHADER_USER_DATA_PS_2                                             0x2c0e
#define mmSPI_SHADER_USER_DATA_PS_3                                             0x2c0f
#define mmSPI_SHADER_USER_DATA_PS_4                                             0x2c10
#define mmSPI_SHADER_USER_DATA_PS_5                                             0x2c11
#define mmSPI_SHADER_USER_DATA_PS_6                                             0x2c12
#define mmSPI_SHADER_USER_DATA_PS_7                                             0x2c13
#define mmSPI_SHADER_USER_DATA_PS_8                                             0x2c14
#define mmSPI_SHADER_USER_DATA_PS_9                                             0x2c15
#define mmSPI_SHADER_USER_DATA_PS_10                                            0x2c16
#define mmSPI_SHADER_USER_DATA_PS_11                                            0x2c17
#define mmSPI_SHADER_USER_DATA_PS_12                                            0x2c18
#define mmSPI_SHADER_USER_DATA_PS_13                                            0x2c19
#define mmSPI_SHADER_USER_DATA_PS_14                                            0x2c1a
#define mmSPI_SHADER_USER_DATA_PS_15                                            0x2c1b
#define mmSPI_SHADER_TBA_LO_VS                                                  0x2c40
#define mmSPI_SHADER_TBA_HI_VS                                                  0x2c41
#define mmSPI_SHADER_TMA_LO_VS                                                  0x2c42
#define mmSPI_SHADER_TMA_HI_VS                                                  0x2c43
#define mmSPI_SHADER_PGM_LO_VS                                                  0x2c48
#define mmSPI_SHADER_PGM_HI_VS                                                  0x2c49
#define mmSPI_SHADER_PGM_RSRC1_VS                                               0x2c4a
#define mmSPI_SHADER_PGM_RSRC2_VS                                               0x2c4b
#define mmSPI_SHADER_PGM_RSRC3_VS                                               0x2c46
#define mmSPI_SHADER_LATE_ALLOC_VS                                              0x2c47
#define mmSPI_SHADER_USER_DATA_VS_0                                             0x2c4c
#define mmSPI_SHADER_USER_DATA_VS_1                                             0x2c4d
#define mmSPI_SHADER_USER_DATA_VS_2                                             0x2c4e
#define mmSPI_SHADER_USER_DATA_VS_3                                             0x2c4f
#define mmSPI_SHADER_USER_DATA_VS_4                                             0x2c50
#define mmSPI_SHADER_USER_DATA_VS_5                                             0x2c51
#define mmSPI_SHADER_USER_DATA_VS_6                                             0x2c52
#define mmSPI_SHADER_USER_DATA_VS_7                                             0x2c53
#define mmSPI_SHADER_USER_DATA_VS_8                                             0x2c54
#define mmSPI_SHADER_USER_DATA_VS_9                                             0x2c55
#define mmSPI_SHADER_USER_DATA_VS_10                                            0x2c56
#define mmSPI_SHADER_USER_DATA_VS_11                                            0x2c57
#define mmSPI_SHADER_USER_DATA_VS_12                                            0x2c58
#define mmSPI_SHADER_USER_DATA_VS_13                                            0x2c59
#define mmSPI_SHADER_USER_DATA_VS_14                                            0x2c5a
#define mmSPI_SHADER_USER_DATA_VS_15                                            0x2c5b
#define mmSPI_SHADER_PGM_RSRC2_ES_VS                                            0x2c7c
#define mmSPI_SHADER_PGM_RSRC2_LS_VS                                            0x2c7d
#define mmSPI_SHADER_TBA_LO_GS                                                  0x2c80
#define mmSPI_SHADER_TBA_HI_GS                                                  0x2c81
#define mmSPI_SHADER_TMA_LO_GS                                                  0x2c82
#define mmSPI_SHADER_TMA_HI_GS                                                  0x2c83
#define mmSPI_SHADER_PGM_LO_GS                                                  0x2c88
#define mmSPI_SHADER_PGM_HI_GS                                                  0x2c89
#define mmSPI_SHADER_PGM_RSRC1_GS                                               0x2c8a
#define mmSPI_SHADER_PGM_RSRC2_GS                                               0x2c8b
#define mmSPI_SHADER_PGM_RSRC3_GS                                               0x2c87
#define mmSPI_SHADER_USER_DATA_GS_0                                             0x2c8c
#define mmSPI_SHADER_USER_DATA_GS_1                                             0x2c8d
#define mmSPI_SHADER_USER_DATA_GS_2                                             0x2c8e
#define mmSPI_SHADER_USER_DATA_GS_3                                             0x2c8f
#define mmSPI_SHADER_USER_DATA_GS_4                                             0x2c90
#define mmSPI_SHADER_USER_DATA_GS_5                                             0x2c91
#define mmSPI_SHADER_USER_DATA_GS_6                                             0x2c92
#define mmSPI_SHADER_USER_DATA_GS_7                                             0x2c93
#define mmSPI_SHADER_USER_DATA_GS_8                                             0x2c94
#define mmSPI_SHADER_USER_DATA_GS_9                                             0x2c95
#define mmSPI_SHADER_USER_DATA_GS_10                                            0x2c96
#define mmSPI_SHADER_USER_DATA_GS_11                                            0x2c97
#define mmSPI_SHADER_USER_DATA_GS_12                                            0x2c98
#define mmSPI_SHADER_USER_DATA_GS_13                                            0x2c99
#define mmSPI_SHADER_USER_DATA_GS_14                                            0x2c9a
#define mmSPI_SHADER_USER_DATA_GS_15                                            0x2c9b
#define mmSPI_SHADER_PGM_RSRC2_ES_GS                                            0x2cbc
#define mmSPI_SHADER_TBA_LO_ES                                                  0x2cc0
#define mmSPI_SHADER_TBA_HI_ES                                                  0x2cc1
#define mmSPI_SHADER_TMA_LO_ES                                                  0x2cc2
#define mmSPI_SHADER_TMA_HI_ES                                                  0x2cc3
#define mmSPI_SHADER_PGM_LO_ES                                                  0x2cc8
#define mmSPI_SHADER_PGM_HI_ES                                                  0x2cc9
#define mmSPI_SHADER_PGM_RSRC1_ES                                               0x2cca
#define mmSPI_SHADER_PGM_RSRC2_ES                                               0x2ccb
#define mmSPI_SHADER_PGM_RSRC3_ES                                               0x2cc7
#define mmSPI_SHADER_USER_DATA_ES_0                                             0x2ccc
#define mmSPI_SHADER_USER_DATA_ES_1                                             0x2ccd
#define mmSPI_SHADER_USER_DATA_ES_2                                             0x2cce
#define mmSPI_SHADER_USER_DATA_ES_3                                             0x2ccf
#define mmSPI_SHADER_USER_DATA_ES_4                                             0x2cd0
#define mmSPI_SHADER_USER_DATA_ES_5                                             0x2cd1
#define mmSPI_SHADER_USER_DATA_ES_6                                             0x2cd2
#define mmSPI_SHADER_USER_DATA_ES_7                                             0x2cd3
#define mmSPI_SHADER_USER_DATA_ES_8                                             0x2cd4
#define mmSPI_SHADER_USER_DATA_ES_9                                             0x2cd5
#define mmSPI_SHADER_USER_DATA_ES_10                                            0x2cd6
#define mmSPI_SHADER_USER_DATA_ES_11                                            0x2cd7
#define mmSPI_SHADER_USER_DATA_ES_12                                            0x2cd8
#define mmSPI_SHADER_USER_DATA_ES_13                                            0x2cd9
#define mmSPI_SHADER_USER_DATA_ES_14                                            0x2cda
#define mmSPI_SHADER_USER_DATA_ES_15                                            0x2cdb
#define mmSPI_SHADER_PGM_RSRC2_LS_ES                                            0x2cfd
#define mmSPI_SHADER_TBA_LO_HS                                                  0x2d00
#define mmSPI_SHADER_TBA_HI_HS                                                  0x2d01
#define mmSPI_SHADER_TMA_LO_HS                                                  0x2d02
#define mmSPI_SHADER_TMA_HI_HS                                                  0x2d03
#define mmSPI_SHADER_PGM_LO_HS                                                  0x2d08
#define mmSPI_SHADER_PGM_HI_HS                                                  0x2d09
#define mmSPI_SHADER_PGM_RSRC1_HS                                               0x2d0a
#define mmSPI_SHADER_PGM_RSRC2_HS                                               0x2d0b
#define mmSPI_SHADER_PGM_RSRC3_HS                                               0x2d07
#define mmSPI_SHADER_USER_DATA_HS_0                                             0x2d0c
#define mmSPI_SHADER_USER_DATA_HS_1                                             0x2d0d
#define mmSPI_SHADER_USER_DATA_HS_2                                             0x2d0e
#define mmSPI_SHADER_USER_DATA_HS_3                                             0x2d0f
#define mmSPI_SHADER_USER_DATA_HS_4                                             0x2d10
#define mmSPI_SHADER_USER_DATA_HS_5                                             0x2d11
#define mmSPI_SHADER_USER_DATA_HS_6                                             0x2d12
#define mmSPI_SHADER_USER_DATA_HS_7                                             0x2d13
#define mmSPI_SHADER_USER_DATA_HS_8                                             0x2d14
#define mmSPI_SHADER_USER_DATA_HS_9                                             0x2d15
#define mmSPI_SHADER_USER_DATA_HS_10                                            0x2d16
#define mmSPI_SHADER_USER_DATA_HS_11                                            0x2d17
#define mmSPI_SHADER_USER_DATA_HS_12                                            0x2d18
#define mmSPI_SHADER_USER_DATA_HS_13                                            0x2d19
#define mmSPI_SHADER_USER_DATA_HS_14                                            0x2d1a
#define mmSPI_SHADER_USER_DATA_HS_15                                            0x2d1b
#define mmSPI_SHADER_PGM_RSRC2_LS_HS                                            0x2d3d
#define mmSPI_SHADER_TBA_LO_LS                                                  0x2d40
#define mmSPI_SHADER_TBA_HI_LS                                                  0x2d41
#define mmSPI_SHADER_TMA_LO_LS                                                  0x2d42
#define mmSPI_SHADER_TMA_HI_LS                                                  0x2d43
#define mmSPI_SHADER_PGM_LO_LS                                                  0x2d48
#define mmSPI_SHADER_PGM_HI_LS                                                  0x2d49
#define mmSPI_SHADER_PGM_RSRC1_LS                                               0x2d4a
#define mmSPI_SHADER_PGM_RSRC2_LS                                               0x2d4b
#define mmSPI_SHADER_PGM_RSRC3_LS                                               0x2d47
#define mmSPI_SHADER_USER_DATA_LS_0                                             0x2d4c
#define mmSPI_SHADER_USER_DATA_LS_1                                             0x2d4d
#define mmSPI_SHADER_USER_DATA_LS_2                                             0x2d4e
#define mmSPI_SHADER_USER_DATA_LS_3                                             0x2d4f
#define mmSPI_SHADER_USER_DATA_LS_4                                             0x2d50
#define mmSPI_SHADER_USER_DATA_LS_5                                             0x2d51
#define mmSPI_SHADER_USER_DATA_LS_6                                             0x2d52
#define mmSPI_SHADER_USER_DATA_LS_7                                             0x2d53
#define mmSPI_SHADER_USER_DATA_LS_8                                             0x2d54
#define mmSPI_SHADER_USER_DATA_LS_9                                             0x2d55
#define mmSPI_SHADER_USER_DATA_LS_10                                            0x2d56
#define mmSPI_SHADER_USER_DATA_LS_11                                            0x2d57
#define mmSPI_SHADER_USER_DATA_LS_12                                            0x2d58
#define mmSPI_SHADER_USER_DATA_LS_13                                            0x2d59
#define mmSPI_SHADER_USER_DATA_LS_14                                            0x2d5a
#define mmSPI_SHADER_USER_DATA_LS_15                                            0x2d5b
#define mmSQ_CONFIG                                                             0x2300
#define mmSQC_CONFIG                                                            0x2301
#define mmSQC_CACHES                                                            0xc348
#define mmSQ_RANDOM_WAVE_PRI                                                    0x2303
#define mmSQ_REG_CREDITS                                                        0x2304
#define mmSQ_FIFO_SIZES                                                         0x2305
#define mmSQ_INTERRUPT_AUTO_MASK                                                0x2314
#define mmSQ_INTERRUPT_MSG_CTRL                                                 0x2315
#define mmSQ_PERFCOUNTER_CTRL                                                   0xd9e0
#define mmSQ_PERFCOUNTER_MASK                                                   0xd9e1
#define mmSQ_PERFCOUNTER_CTRL2                                                  0xd9e2
#define mmCC_SQC_BANK_DISABLE                                                   0x2307
#define mmUSER_SQC_BANK_DISABLE                                                 0x2308
#define mmSQ_PERFCOUNTER0_LO                                                    0xd1c0
#define mmSQ_PERFCOUNTER1_LO                                                    0xd1c2
#define mmSQ_PERFCOUNTER2_LO                                                    0xd1c4
#define mmSQ_PERFCOUNTER3_LO                                                    0xd1c6
#define mmSQ_PERFCOUNTER4_LO                                                    0xd1c8
#define mmSQ_PERFCOUNTER5_LO                                                    0xd1ca
#define mmSQ_PERFCOUNTER6_LO                                                    0xd1cc
#define mmSQ_PERFCOUNTER7_LO                                                    0xd1ce
#define mmSQ_PERFCOUNTER8_LO                                                    0xd1d0
#define mmSQ_PERFCOUNTER9_LO                                                    0xd1d2
#define mmSQ_PERFCOUNTER10_LO                                                   0xd1d4
#define mmSQ_PERFCOUNTER11_LO                                                   0xd1d6
#define mmSQ_PERFCOUNTER12_LO                                                   0xd1d8
#define mmSQ_PERFCOUNTER13_LO                                                   0xd1da
#define mmSQ_PERFCOUNTER14_LO                                                   0xd1dc
#define mmSQ_PERFCOUNTER15_LO                                                   0xd1de
#define mmSQ_PERFCOUNTER0_HI                                                    0xd1c1
#define mmSQ_PERFCOUNTER1_HI                                                    0xd1c3
#define mmSQ_PERFCOUNTER2_HI                                                    0xd1c5
#define mmSQ_PERFCOUNTER3_HI                                                    0xd1c7
#define mmSQ_PERFCOUNTER4_HI                                                    0xd1c9
#define mmSQ_PERFCOUNTER5_HI                                                    0xd1cb
#define mmSQ_PERFCOUNTER6_HI                                                    0xd1cd
#define mmSQ_PERFCOUNTER7_HI                                                    0xd1cf
#define mmSQ_PERFCOUNTER8_HI                                                    0xd1d1
#define mmSQ_PERFCOUNTER9_HI                                                    0xd1d3
#define mmSQ_PERFCOUNTER10_HI                                                   0xd1d5
#define mmSQ_PERFCOUNTER11_HI                                                   0xd1d7
#define mmSQ_PERFCOUNTER12_HI                                                   0xd1d9
#define mmSQ_PERFCOUNTER13_HI                                                   0xd1db
#define mmSQ_PERFCOUNTER14_HI                                                   0xd1dd
#define mmSQ_PERFCOUNTER15_HI                                                   0xd1df
#define mmSQ_PERFCOUNTER0_SELECT                                                0xd9c0
#define mmSQ_PERFCOUNTER1_SELECT                                                0xd9c1
#define mmSQ_PERFCOUNTER2_SELECT                                                0xd9c2
#define mmSQ_PERFCOUNTER3_SELECT                                                0xd9c3
#define mmSQ_PERFCOUNTER4_SELECT                                                0xd9c4
#define mmSQ_PERFCOUNTER5_SELECT                                                0xd9c5
#define mmSQ_PERFCOUNTER6_SELECT                                                0xd9c6
#define mmSQ_PERFCOUNTER7_SELECT                                                0xd9c7
#define mmSQ_PERFCOUNTER8_SELECT                                                0xd9c8
#define mmSQ_PERFCOUNTER9_SELECT                                                0xd9c9
#define mmSQ_PERFCOUNTER10_SELECT                                               0xd9ca
#define mmSQ_PERFCOUNTER11_SELECT                                               0xd9cb
#define mmSQ_PERFCOUNTER12_SELECT                                               0xd9cc
#define mmSQ_PERFCOUNTER13_SELECT                                               0xd9cd
#define mmSQ_PERFCOUNTER14_SELECT                                               0xd9ce
#define mmSQ_PERFCOUNTER15_SELECT                                               0xd9cf
#define mmCGTT_SQ_CLK_CTRL                                                      0xf08c
#define mmCGTT_SQG_CLK_CTRL                                                     0xf08d
#define mmSQ_ALU_CLK_CTRL                                                       0xf08e
#define mmSQ_TEX_CLK_CTRL                                                       0xf08f
#define mmSQ_LDS_CLK_CTRL                                                       0xf090
#define mmSQ_POWER_THROTTLE                                                     0xf091
#define mmSQ_POWER_THROTTLE2                                                    0xf092
#define mmSQ_TIME_HI                                                            0x237c
#define mmSQ_TIME_LO                                                            0x237d
#define mmSQ_THREAD_TRACE_BASE                                                  0x2380
#define mmSQ_THREAD_TRACE_BASE2                                                 0x2385
#define mmSQ_THREAD_TRACE_SIZE                                                  0x2381
#define mmSQ_THREAD_TRACE_MASK                                                  0x2382
#define mmSQ_THREAD_TRACE_USERDATA_0                                            0xc340
#define mmSQ_THREAD_TRACE_USERDATA_1                                            0xc341
#define mmSQ_THREAD_TRACE_USERDATA_2                                            0xc342
#define mmSQ_THREAD_TRACE_USERDATA_3                                            0xc343
#define mmSQ_THREAD_TRACE_MODE                                                  0x238e
#define mmSQ_THREAD_TRACE_CTRL                                                  0x238f
#define mmSQ_THREAD_TRACE_TOKEN_MASK                                            0x2383
#define mmSQ_THREAD_TRACE_TOKEN_MASK2                                           0x2386
#define mmSQ_THREAD_TRACE_PERF_MASK                                             0x2384
#define mmSQ_THREAD_TRACE_WPTR                                                  0x238c
#define mmSQ_THREAD_TRACE_STATUS                                                0x238d
#define mmSQ_THREAD_TRACE_CNTR                                                  0x2390
#define mmSQ_THREAD_TRACE_HIWATER                                               0x2392
#define mmSQ_LB_CTR_CTRL                                                        0x2398
#define mmSQ_LB_DATA_ALU_CYCLES                                                 0x2399
#define mmSQ_LB_DATA_TEX_CYCLES                                                 0x239a
#define mmSQ_LB_DATA_ALU_STALLS                                                 0x239b
#define mmSQ_LB_DATA_TEX_STALLS                                                 0x239c
#define mmSQC_SECDED_CNT                                                        0x23a0
#define mmSQ_SEC_CNT                                                            0x23a1
#define mmSQ_DED_CNT                                                            0x23a2
#define mmSQ_DED_INFO                                                           0x23a3
#define mmSQ_BUF_RSRC_WORD0                                                     0x23c0
#define mmSQ_BUF_RSRC_WORD1                                                     0x23c1
#define mmSQ_BUF_RSRC_WORD2                                                     0x23c2
#define mmSQ_BUF_RSRC_WORD3                                                     0x23c3
#define mmSQ_IMG_RSRC_WORD0                                                     0x23c4
#define mmSQ_IMG_RSRC_WORD1                                                     0x23c5
#define mmSQ_IMG_RSRC_WORD2                                                     0x23c6
#define mmSQ_IMG_RSRC_WORD3                                                     0x23c7
#define mmSQ_IMG_RSRC_WORD4                                                     0x23c8
#define mmSQ_IMG_RSRC_WORD5                                                     0x23c9
#define mmSQ_IMG_RSRC_WORD6                                                     0x23ca
#define mmSQ_IMG_RSRC_WORD7                                                     0x23cb
#define mmSQ_IMG_SAMP_WORD0                                                     0x23cc
#define mmSQ_IMG_SAMP_WORD1                                                     0x23cd
#define mmSQ_IMG_SAMP_WORD2                                                     0x23ce
#define mmSQ_IMG_SAMP_WORD3                                                     0x23cf
#define mmSQ_FLAT_SCRATCH_WORD0                                                 0x23d0
#define mmSQ_FLAT_SCRATCH_WORD1                                                 0x23d1
#define mmSQ_IND_INDEX                                                          0x2378
#define mmSQ_IND_CMD                                                            0x237a
#define mmSQ_CMD                                                                0x237b
#define mmSQ_IND_DATA                                                           0x2379
#define mmSQ_REG_TIMESTAMP                                                      0x2374
#define mmSQ_CMD_TIMESTAMP                                                      0x2375
#define mmSQ_HV_VMID_CTRL                                                       0xf840
#define ixSQ_WAVE_INST_DW0                                                      0x1a
#define ixSQ_WAVE_INST_DW1                                                      0x1b
#define ixSQ_WAVE_PC_LO                                                         0x18
#define ixSQ_WAVE_PC_HI                                                         0x19
#define ixSQ_WAVE_IB_DBG0                                                       0x1c
#define ixSQ_WAVE_EXEC_LO                                                       0x27e
#define ixSQ_WAVE_EXEC_HI                                                       0x27f
#define ixSQ_WAVE_STATUS                                                        0x12
#define ixSQ_WAVE_MODE                                                          0x11
#define ixSQ_WAVE_TRAPSTS                                                       0x13
#define ixSQ_WAVE_HW_ID                                                         0x14
#define ixSQ_WAVE_GPR_ALLOC                                                     0x15
#define ixSQ_WAVE_LDS_ALLOC                                                     0x16
#define ixSQ_WAVE_IB_STS                                                        0x17
#define ixSQ_WAVE_M0                                                            0x27c
#define ixSQ_WAVE_TBA_LO                                                        0x26c
#define ixSQ_WAVE_TBA_HI                                                        0x26d
#define ixSQ_WAVE_TMA_LO                                                        0x26e
#define ixSQ_WAVE_TMA_HI                                                        0x26f
#define ixSQ_WAVE_TTMP0                                                         0x270
#define ixSQ_WAVE_TTMP1                                                         0x271
#define ixSQ_WAVE_TTMP2                                                         0x272
#define ixSQ_WAVE_TTMP3                                                         0x273
#define ixSQ_WAVE_TTMP4                                                         0x274
#define ixSQ_WAVE_TTMP5                                                         0x275
#define ixSQ_WAVE_TTMP6                                                         0x276
#define ixSQ_WAVE_TTMP7                                                         0x277
#define ixSQ_WAVE_TTMP8                                                         0x278
#define ixSQ_WAVE_TTMP9                                                         0x279
#define ixSQ_WAVE_TTMP10                                                        0x27a
#define ixSQ_WAVE_TTMP11                                                        0x27b
#define mmSQ_DEBUG_STS_GLOBAL                                                   0x2309
#define mmSQ_DEBUG_STS_GLOBAL2                                                  0x2310
#define mmSQ_DEBUG_STS_GLOBAL3                                                  0x2311
#define ixSQ_DEBUG_STS_LOCAL                                                    0x8
#define ixSQ_DEBUG_CTRL_LOCAL                                                   0x9
#define mmSH_MEM_BASES                                                          0x230a
#define mmSH_MEM_APE1_BASE                                                      0x230b
#define mmSH_MEM_APE1_LIMIT                                                     0x230c
#define mmSH_MEM_CONFIG                                                         0x230d
#define mmSQC_POLICY                                                            0x230e
#define mmSQC_VOLATILE                                                          0x230f
#define mmSQ_THREAD_TRACE_WORD_CMN                                              0x23b0
#define mmSQ_THREAD_TRACE_WORD_INST                                             0x23b0
#define mmSQ_THREAD_TRACE_WORD_INST_PC_1_OF_2                                   0x23b0
#define mmSQ_THREAD_TRACE_WORD_INST_PC_2_OF_2                                   0x23b1
#define mmSQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2                             0x23b0
#define mmSQ_THREAD_TRACE_WORD_INST_USERDATA_2_OF_2                             0x23b1
#define mmSQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2                                 0x23b0
#define mmSQ_THREAD_TRACE_WORD_TIMESTAMP_2_OF_2                                 0x23b1
#define mmSQ_THREAD_TRACE_WORD_WAVE                                             0x23b0
#define mmSQ_THREAD_TRACE_WORD_MISC                                             0x23b0
#define mmSQ_THREAD_TRACE_WORD_WAVE_START                                       0x23b0
#define mmSQ_THREAD_TRACE_WORD_REG_1_OF_2                                       0x23b0
#define mmSQ_THREAD_TRACE_WORD_REG_2_OF_2                                       0x23b0
#define mmSQ_THREAD_TRACE_WORD_REG_CS_1_OF_2                                    0x23b0
#define mmSQ_THREAD_TRACE_WORD_REG_CS_2_OF_2                                    0x23b0
#define mmSQ_THREAD_TRACE_WORD_EVENT                                            0x23b0
#define mmSQ_THREAD_TRACE_WORD_ISSUE                                            0x23b0
#define mmSQ_THREAD_TRACE_WORD_PERF_1_OF_2                                      0x23b0
#define mmSQ_THREAD_TRACE_WORD_PERF_2_OF_2                                      0x23b1
#define ixSQ_INTERRUPT_WORD_CMN                                                 0x20c0
#define ixSQ_INTERRUPT_WORD_AUTO                                                0x20c0
#define ixSQ_INTERRUPT_WORD_WAVE                                                0x20c0
#define mmSQ_SOP2                                                               0x237f
#define mmSQ_VOP1                                                               0x237f
#define mmSQ_MTBUF_1                                                            0x237f
#define mmSQ_EXP_1                                                              0x237f
#define mmSQ_MUBUF_1                                                            0x237f
#define mmSQ_INST                                                               0x237f
#define mmSQ_EXP_0                                                              0x237f
#define mmSQ_MUBUF_0                                                            0x237f
#define mmSQ_VOP3_0                                                             0x237f
#define mmSQ_VOP2                                                               0x237f
#define mmSQ_MTBUF_0                                                            0x237f
#define mmSQ_SOPP                                                               0x237f
#define mmSQ_FLAT_0                                                             0x237f
#define mmSQ_VOP3_0_SDST_ENC                                                    0x237f
#define mmSQ_MIMG_1                                                             0x237f
#define mmSQ_SMRD                                                               0x237f
#define mmSQ_SOP1                                                               0x237f
#define mmSQ_SOPC                                                               0x237f
#define mmSQ_FLAT_1                                                             0x237f
#define mmSQ_DS_1                                                               0x237f
#define mmSQ_VOP3_1                                                             0x237f
#define mmSQ_MIMG_0                                                             0x237f
#define mmSQ_SOPK                                                               0x237f
#define mmSQ_DS_0                                                               0x237f
#define mmSQ_VOPC                                                               0x237f
#define mmSQ_VINTRP                                                             0x237f
#define mmCGTT_SX_CLK_CTRL0                                                     0xf094
#define mmCGTT_SX_CLK_CTRL1                                                     0xf095
#define mmCGTT_SX_CLK_CTRL2                                                     0xf096
#define mmCGTT_SX_CLK_CTRL3                                                     0xf097
#define mmCGTT_SX_CLK_CTRL4                                                     0xf098
#define mmSX_DEBUG_BUSY                                                         0x2414
#define mmSX_DEBUG_BUSY_2                                                       0x2415
#define mmSX_DEBUG_BUSY_3                                                       0x2416
#define mmSX_DEBUG_BUSY_4                                                       0x2417
#define mmSX_DEBUG_1                                                            0x2418
#define mmSX_PERFCOUNTER0_SELECT                                                0xda40
#define mmSX_PERFCOUNTER1_SELECT                                                0xda41
#define mmSX_PERFCOUNTER2_SELECT                                                0xda42
#define mmSX_PERFCOUNTER3_SELECT                                                0xda43
#define mmSX_PERFCOUNTER0_SELECT1                                               0xda44
#define mmSX_PERFCOUNTER1_SELECT1                                               0xda45
#define mmSX_PERFCOUNTER0_LO                                                    0xd240
#define mmSX_PERFCOUNTER0_HI                                                    0xd241
#define mmSX_PERFCOUNTER1_LO                                                    0xd242
#define mmSX_PERFCOUNTER1_HI                                                    0xd243
#define mmSX_PERFCOUNTER2_LO                                                    0xd244
#define mmSX_PERFCOUNTER2_HI                                                    0xd245
#define mmSX_PERFCOUNTER3_LO                                                    0xd246
#define mmSX_PERFCOUNTER3_HI                                                    0xd247
#define mmTCC_CTRL                                                              0x2b80
#define mmTCC_EDC_COUNTER                                                       0x2b82
#define mmTCC_REDUNDANCY                                                        0x2b83
#define mmTCC_CGTT_SCLK_CTRL                                                    0xf0ac
#define mmTCA_CGTT_SCLK_CTRL                                                    0xf0ad
#define mmTCS_CGTT_SCLK_CTRL                                                    0xf0ae
#define mmTCC_PERFCOUNTER0_SELECT                                               0xdb80
#define mmTCC_PERFCOUNTER1_SELECT                                               0xdb82
#define mmTCC_PERFCOUNTER0_SELECT1                                              0xdb81
#define mmTCC_PERFCOUNTER1_SELECT1                                              0xdb83
#define mmTCC_PERFCOUNTER2_SELECT                                               0xdb84
#define mmTCC_PERFCOUNTER3_SELECT                                               0xdb85
#define mmTCC_PERFCOUNTER0_LO                                                   0xd380
#define mmTCC_PERFCOUNTER1_LO                                                   0xd382
#define mmTCC_PERFCOUNTER2_LO                                                   0xd384
#define mmTCC_PERFCOUNTER3_LO                                                   0xd386
#define mmTCC_PERFCOUNTER0_HI                                                   0xd381
#define mmTCC_PERFCOUNTER1_HI                                                   0xd383
#define mmTCC_PERFCOUNTER2_HI                                                   0xd385
#define mmTCC_PERFCOUNTER3_HI                                                   0xd387
#define mmTCA_CTRL                                                              0x2bc0
#define mmTCA_PERFCOUNTER0_SELECT                                               0xdb90
#define mmTCA_PERFCOUNTER1_SELECT                                               0xdb92
#define mmTCA_PERFCOUNTER0_SELECT1                                              0xdb91
#define mmTCA_PERFCOUNTER1_SELECT1                                              0xdb93
#define mmTCA_PERFCOUNTER2_SELECT                                               0xdb94
#define mmTCA_PERFCOUNTER3_SELECT                                               0xdb95
#define mmTCA_PERFCOUNTER0_LO                                                   0xd390
#define mmTCA_PERFCOUNTER1_LO                                                   0xd392
#define mmTCA_PERFCOUNTER2_LO                                                   0xd394
#define mmTCA_PERFCOUNTER3_LO                                                   0xd396
#define mmTCA_PERFCOUNTER0_HI                                                   0xd391
#define mmTCA_PERFCOUNTER1_HI                                                   0xd393
#define mmTCA_PERFCOUNTER2_HI                                                   0xd395
#define mmTCA_PERFCOUNTER3_HI                                                   0xd397
#define mmTCS_CTRL                                                              0x2be0
#define mmTCS_PERFCOUNTER0_SELECT                                               0xdba0
#define mmTCS_PERFCOUNTER0_SELECT1                                              0xdba1
#define mmTCS_PERFCOUNTER1_SELECT                                               0xdba2
#define mmTCS_PERFCOUNTER2_SELECT                                               0xdba3
#define mmTCS_PERFCOUNTER3_SELECT                                               0xdba4
#define mmTCS_PERFCOUNTER0_LO                                                   0xd3a0
#define mmTCS_PERFCOUNTER1_LO                                                   0xd3a2
#define mmTCS_PERFCOUNTER2_LO                                                   0xd3a4
#define mmTCS_PERFCOUNTER3_LO                                                   0xd3a6
#define mmTCS_PERFCOUNTER0_HI                                                   0xd3a1
#define mmTCS_PERFCOUNTER1_HI                                                   0xd3a3
#define mmTCS_PERFCOUNTER2_HI                                                   0xd3a5
#define mmTCS_PERFCOUNTER3_HI                                                   0xd3a7
#define mmTA_BC_BASE_ADDR                                                       0xa020
#define mmTA_BC_BASE_ADDR_HI                                                    0xa021
#define mmTD_CNTL                                                               0x2525
#define mmTD_STATUS                                                             0x2526
#define mmTD_DEBUG_INDEX                                                        0x2528
#define mmTD_DEBUG_DATA                                                         0x2529
#define mmTD_PERFCOUNTER0_SELECT                                                0xdb00
#define mmTD_PERFCOUNTER1_SELECT                                                0xdb02
#define mmTD_PERFCOUNTER0_SELECT1                                               0xdb01
#define mmTD_PERFCOUNTER0_LO                                                    0xd300
#define mmTD_PERFCOUNTER1_LO                                                    0xd302
#define mmTD_PERFCOUNTER0_HI                                                    0xd301
#define mmTD_PERFCOUNTER1_HI                                                    0xd303
#define mmTD_SCRATCH                                                            0x2533
#define mmTA_CNTL                                                               0x2541
#define mmTA_CNTL_AUX                                                           0x2542
#define mmTA_RESERVED_010C                                                      0x2543
#define mmTA_CS_BC_BASE_ADDR                                                    0xc380
#define mmTA_CS_BC_BASE_ADDR_HI                                                 0xc381
#define mmTA_STATUS                                                             0x2548
#define mmTA_DEBUG_INDEX                                                        0x254c
#define mmTA_DEBUG_DATA                                                         0x254d
#define mmTA_PERFCOUNTER0_SELECT                                                0xdac0
#define mmTA_PERFCOUNTER1_SELECT                                                0xdac2
#define mmTA_PERFCOUNTER0_SELECT1                                               0xdac1
#define mmTA_PERFCOUNTER0_LO                                                    0xd2c0
#define mmTA_PERFCOUNTER1_LO                                                    0xd2c2
#define mmTA_PERFCOUNTER0_HI                                                    0xd2c1
#define mmTA_PERFCOUNTER1_HI                                                    0xd2c3
#define mmTA_SCRATCH                                                            0x2564
#define mmSH_HIDDEN_PRIVATE_BASE_VMID                                           0x2580
#define mmSH_STATIC_MEM_CONFIG                                                  0x2581
#define mmTCP_INVALIDATE                                                        0x2b00
#define mmTCP_STATUS                                                            0x2b01
#define mmTCP_CNTL                                                              0x2b02
#define mmTCP_CHAN_STEER_LO                                                     0x2b03
#define mmTCP_CHAN_STEER_HI                                                     0x2b04
#define mmTCP_ADDR_CONFIG                                                       0x2b05
#define mmTCP_CREDIT                                                            0x2b06
#define mmTCP_PERFCOUNTER0_SELECT                                               0xdb40
#define mmTCP_PERFCOUNTER1_SELECT                                               0xdb42
#define mmTCP_PERFCOUNTER0_SELECT1                                              0xdb41
#define mmTCP_PERFCOUNTER1_SELECT1                                              0xdb43
#define mmTCP_PERFCOUNTER2_SELECT                                               0xdb44
#define mmTCP_PERFCOUNTER3_SELECT                                               0xdb45
#define mmTCP_PERFCOUNTER0_LO                                                   0xd340
#define mmTCP_PERFCOUNTER1_LO                                                   0xd342
#define mmTCP_PERFCOUNTER2_LO                                                   0xd344
#define mmTCP_PERFCOUNTER3_LO                                                   0xd346
#define mmTCP_PERFCOUNTER0_HI                                                   0xd341
#define mmTCP_PERFCOUNTER1_HI                                                   0xd343
#define mmTCP_PERFCOUNTER2_HI                                                   0xd345
#define mmTCP_PERFCOUNTER3_HI                                                   0xd347
#define mmTCP_BUFFER_ADDR_HASH_CNTL                                             0x2b16
#define mmTCP_EDC_COUNTER                                                       0x2b17
#define mmTC_CFG_L1_LOAD_POLICY0                                                0x2b1a
#define mmTC_CFG_L1_LOAD_POLICY1                                                0x2b1b
#define mmTC_CFG_L1_STORE_POLICY                                                0x2b1c
#define mmTC_CFG_L2_LOAD_POLICY0                                                0x2b1d
#define mmTC_CFG_L2_LOAD_POLICY1                                                0x2b1e
#define mmTC_CFG_L2_STORE_POLICY0                                               0x2b1f
#define mmTC_CFG_L2_STORE_POLICY1                                               0x2b20
#define mmTC_CFG_L2_ATOMIC_POLICY                                               0x2b21
#define mmTC_CFG_L1_VOLATILE                                                    0x2b22
#define mmTC_CFG_L2_VOLATILE                                                    0x2b23
#define mmTCP_WATCH0_ADDR_H                                                     0x32a0
#define mmTCP_WATCH1_ADDR_H                                                     0x32a3
#define mmTCP_WATCH2_ADDR_H                                                     0x32a6
#define mmTCP_WATCH3_ADDR_H                                                     0x32a9
#define mmTCP_WATCH0_ADDR_L                                                     0x32a1
#define mmTCP_WATCH1_ADDR_L                                                     0x32a4
#define mmTCP_WATCH2_ADDR_L                                                     0x32a7
#define mmTCP_WATCH3_ADDR_L                                                     0x32aa
#define mmTCP_WATCH0_CNTL                                                       0x32a2
#define mmTCP_WATCH1_CNTL                                                       0x32a5
#define mmTCP_WATCH2_CNTL                                                       0x32a8
#define mmTCP_WATCH3_CNTL                                                       0x32ab
#define mmTD_CGTT_CTRL                                                          0xf09c
#define mmTA_CGTT_CTRL                                                          0xf09d
#define mmCGTT_TCP_CLK_CTRL                                                     0xf09e
#define mmCGTT_TCI_CLK_CTRL                                                     0xf09f
#define mmTCI_STATUS                                                            0x2b61
#define mmTCI_CNTL_1                                                            0x2b62
#define mmTCI_CNTL_2                                                            0x2b63
#define mmGDS_CONFIG                                                            0x25c0
#define mmGDS_CNTL_STATUS                                                       0x25c1
#define mmGDS_ENHANCE2                                                          0x25c2
#define mmGDS_PROTECTION_FAULT                                                  0x25c3
#define mmGDS_VM_PROTECTION_FAULT                                               0x25c4
#define mmGDS_SECDED_CNT                                                        0x25c5
#define mmGDS_GRBM_SECDED_CNT                                                   0x25c6
#define mmGDS_OA_DED                                                            0x25c7
#define mmGDS_DEBUG_CNTL                                                        0x25c8
#define mmGDS_DEBUG_DATA                                                        0x25c9
#define mmCGTT_GDS_CLK_CTRL                                                     0xf0a0
#define mmGDS_RD_ADDR                                                           0xc400
#define mmGDS_RD_DATA                                                           0xc401
#define mmGDS_RD_BURST_ADDR                                                     0xc402
#define mmGDS_RD_BURST_COUNT                                                    0xc403
#define mmGDS_RD_BURST_DATA                                                     0xc404
#define mmGDS_WR_ADDR                                                           0xc405
#define mmGDS_WR_DATA                                                           0xc406
#define mmGDS_WR_BURST_ADDR                                                     0xc407
#define mmGDS_WR_BURST_DATA                                                     0xc408
#define mmGDS_WRITE_COMPLETE                                                    0xc409
#define mmGDS_ATOM_CNTL                                                         0xc40a
#define mmGDS_ATOM_COMPLETE                                                     0xc40b
#define mmGDS_ATOM_BASE                                                         0xc40c
#define mmGDS_ATOM_SIZE                                                         0xc40d
#define mmGDS_ATOM_OFFSET0                                                      0xc40e
#define mmGDS_ATOM_OFFSET1                                                      0xc40f
#define mmGDS_ATOM_DST                                                          0xc410
#define mmGDS_ATOM_OP                                                           0xc411
#define mmGDS_ATOM_SRC0                                                         0xc412
#define mmGDS_ATOM_SRC0_U                                                       0xc413
#define mmGDS_ATOM_SRC1                                                         0xc414
#define mmGDS_ATOM_SRC1_U                                                       0xc415
#define mmGDS_ATOM_READ0                                                        0xc416
#define mmGDS_ATOM_READ0_U                                                      0xc417
#define mmGDS_ATOM_READ1                                                        0xc418
#define mmGDS_ATOM_READ1_U                                                      0xc419
#define mmGDS_GWS_RESOURCE_CNTL                                                 0xc41a
#define mmGDS_GWS_RESOURCE                                                      0xc41b
#define mmGDS_GWS_RESOURCE_CNT                                                  0xc41c
#define mmGDS_OA_CNTL                                                           0xc41d
#define mmGDS_OA_COUNTER                                                        0xc41e
#define mmGDS_OA_ADDRESS                                                        0xc41f
#define mmGDS_OA_INCDEC                                                         0xc420
#define mmGDS_OA_RING_SIZE                                                      0xc421
#define ixGDS_DEBUG_REG0                                                        0x0
#define ixGDS_DEBUG_REG1                                                        0x1
#define ixGDS_DEBUG_REG2                                                        0x2
#define ixGDS_DEBUG_REG3                                                        0x3
#define ixGDS_DEBUG_REG4                                                        0x4
#define ixGDS_DEBUG_REG5                                                        0x5
#define ixGDS_DEBUG_REG6                                                        0x6
#define mmGDS_PERFCOUNTER0_SELECT                                               0xda80
#define mmGDS_PERFCOUNTER1_SELECT                                               0xda81
#define mmGDS_PERFCOUNTER2_SELECT                                               0xda82
#define mmGDS_PERFCOUNTER3_SELECT                                               0xda83
#define mmGDS_PERFCOUNTER0_LO                                                   0xd280
#define mmGDS_PERFCOUNTER1_LO                                                   0xd282
#define mmGDS_PERFCOUNTER2_LO                                                   0xd284
#define mmGDS_PERFCOUNTER3_LO                                                   0xd286
#define mmGDS_PERFCOUNTER0_HI                                                   0xd281
#define mmGDS_PERFCOUNTER1_HI                                                   0xd283
#define mmGDS_PERFCOUNTER2_HI                                                   0xd285
#define mmGDS_PERFCOUNTER3_HI                                                   0xd287
#define mmGDS_PERFCOUNTER0_SELECT1                                              0xda84
#define mmGDS_VMID0_BASE                                                        0x3300
#define mmGDS_VMID1_BASE                                                        0x3302
#define mmGDS_VMID2_BASE                                                        0x3304
#define mmGDS_VMID3_BASE                                                        0x3306
#define mmGDS_VMID4_BASE                                                        0x3308
#define mmGDS_VMID5_BASE                                                        0x330a
#define mmGDS_VMID6_BASE                                                        0x330c
#define mmGDS_VMID7_BASE                                                        0x330e
#define mmGDS_VMID8_BASE                                                        0x3310
#define mmGDS_VMID9_BASE                                                        0x3312
#define mmGDS_VMID10_BASE                                                       0x3314
#define mmGDS_VMID11_BASE                                                       0x3316
#define mmGDS_VMID12_BASE                                                       0x3318
#define mmGDS_VMID13_BASE                                                       0x331a
#define mmGDS_VMID14_BASE                                                       0x331c
#define mmGDS_VMID15_BASE                                                       0x331e
#define mmGDS_VMID0_SIZE                                                        0x3301
#define mmGDS_VMID1_SIZE                                                        0x3303
#define mmGDS_VMID2_SIZE                                                        0x3305
#define mmGDS_VMID3_SIZE                                                        0x3307
#define mmGDS_VMID4_SIZE                                                        0x3309
#define mmGDS_VMID5_SIZE                                                        0x330b
#define mmGDS_VMID6_SIZE                                                        0x330d
#define mmGDS_VMID7_SIZE                                                        0x330f
#define mmGDS_VMID8_SIZE                                                        0x3311
#define mmGDS_VMID9_SIZE                                                        0x3313
#define mmGDS_VMID10_SIZE                                                       0x3315
#define mmGDS_VMID11_SIZE                                                       0x3317
#define mmGDS_VMID12_SIZE                                                       0x3319
#define mmGDS_VMID13_SIZE                                                       0x331b
#define mmGDS_VMID14_SIZE                                                       0x331d
#define mmGDS_VMID15_SIZE                                                       0x331f
#define mmGDS_GWS_VMID0                                                         0x3320
#define mmGDS_GWS_VMID1                                                         0x3321
#define mmGDS_GWS_VMID2                                                         0x3322
#define mmGDS_GWS_VMID3                                                         0x3323
#define mmGDS_GWS_VMID4                                                         0x3324
#define mmGDS_GWS_VMID5                                                         0x3325
#define mmGDS_GWS_VMID6                                                         0x3326
#define mmGDS_GWS_VMID7                                                         0x3327
#define mmGDS_GWS_VMID8                                                         0x3328
#define mmGDS_GWS_VMID9                                                         0x3329
#define mmGDS_GWS_VMID10                                                        0x332a
#define mmGDS_GWS_VMID11                                                        0x332b
#define mmGDS_GWS_VMID12                                                        0x332c
#define mmGDS_GWS_VMID13                                                        0x332d
#define mmGDS_GWS_VMID14                                                        0x332e
#define mmGDS_GWS_VMID15                                                        0x332f
#define mmGDS_OA_VMID0                                                          0x3330
#define mmGDS_OA_VMID1                                                          0x3331
#define mmGDS_OA_VMID2                                                          0x3332
#define mmGDS_OA_VMID3                                                          0x3333
#define mmGDS_OA_VMID4                                                          0x3334
#define mmGDS_OA_VMID5                                                          0x3335
#define mmGDS_OA_VMID6                                                          0x3336
#define mmGDS_OA_VMID7                                                          0x3337
#define mmGDS_OA_VMID8                                                          0x3338
#define mmGDS_OA_VMID9                                                          0x3339
#define mmGDS_OA_VMID10                                                         0x333a
#define mmGDS_OA_VMID11                                                         0x333b
#define mmGDS_OA_VMID12                                                         0x333c
#define mmGDS_OA_VMID13                                                         0x333d
#define mmGDS_OA_VMID14                                                         0x333e
#define mmGDS_OA_VMID15                                                         0x333f
#define mmGDS_GWS_RESET0                                                        0x3344
#define mmGDS_GWS_RESET1                                                        0x3345
#define mmGDS_GWS_RESOURCE_RESET                                                0x3346
#define mmGDS_COMPUTE_MAX_WAVE_ID                                               0x3348
#define mmGDS_OA_RESET_MASK                                                     0x3349
#define mmGDS_OA_RESET                                                          0x334a
#define mmGDS_ENHANCE                                                           0x334b
#define mmGDS_OA_CGPG_RESTORE                                                   0x334c
#define mmCS_COPY_STATE                                                         0xa1f3
#define mmGFX_COPY_STATE                                                        0xa1f4
#define mmVGT_DRAW_INITIATOR                                                    0xa1fc
#define mmVGT_EVENT_INITIATOR                                                   0xa2a4
#define mmVGT_EVENT_ADDRESS_REG                                                 0xa1fe
#define mmVGT_DMA_BASE_HI                                                       0xa1f9
#define mmVGT_DMA_BASE                                                          0xa1fa
#define mmVGT_DMA_INDEX_TYPE                                                    0xa29f
#define mmVGT_DMA_NUM_INSTANCES                                                 0xa2a2
#define mmIA_ENHANCE                                                            0xa29c
#define mmVGT_DMA_SIZE                                                          0xa29d
#define mmVGT_DMA_MAX_SIZE                                                      0xa29e
#define mmVGT_DMA_PRIMITIVE_TYPE                                                0x2271
#define mmVGT_DMA_CONTROL                                                       0x2272
#define mmVGT_IMMED_DATA                                                        0xa1fd
#define mmVGT_INDEX_TYPE                                                        0xc243
#define mmVGT_NUM_INDICES                                                       0xc24c
#define mmVGT_NUM_INSTANCES                                                     0xc24d
#define mmVGT_PRIMITIVE_TYPE                                                    0xc242
#define mmVGT_PRIMITIVEID_EN                                                    0xa2a1
#define mmVGT_PRIMITIVEID_RESET                                                 0xa2a3
#define mmVGT_VTX_CNT_EN                                                        0xa2ae
#define mmVGT_REUSE_OFF                                                         0xa2ad
#define mmVGT_INSTANCE_STEP_RATE_0                                              0xa2a8
#define mmVGT_INSTANCE_STEP_RATE_1                                              0xa2a9
#define mmVGT_MAX_VTX_INDX                                                      0xa100
#define mmVGT_MIN_VTX_INDX                                                      0xa101
#define mmVGT_INDX_OFFSET                                                       0xa102
#define mmVGT_VERTEX_REUSE_BLOCK_CNTL                                           0xa316
#define mmVGT_OUT_DEALLOC_CNTL                                                  0xa317
#define mmVGT_MULTI_PRIM_IB_RESET_INDX                                          0xa103
#define mmVGT_MULTI_PRIM_IB_RESET_EN                                            0xa2a5
#define mmVGT_ENHANCE                                                           0xa294
#define mmVGT_OUTPUT_PATH_CNTL                                                  0xa284
#define mmVGT_HOS_CNTL                                                          0xa285
#define mmVGT_HOS_MAX_TESS_LEVEL                                                0xa286
#define mmVGT_HOS_MIN_TESS_LEVEL                                                0xa287
#define mmVGT_HOS_REUSE_DEPTH                                                   0xa288
#define mmVGT_GROUP_PRIM_TYPE                                                   0xa289
#define mmVGT_GROUP_FIRST_DECR                                                  0xa28a
#define mmVGT_GROUP_DECR                                                        0xa28b
#define mmVGT_GROUP_VECT_0_CNTL                                                 0xa28c
#define mmVGT_GROUP_VECT_1_CNTL                                                 0xa28d
#define mmVGT_GROUP_VECT_0_FMT_CNTL                                             0xa28e
#define mmVGT_GROUP_VECT_1_FMT_CNTL                                             0xa28f
#define mmVGT_VTX_VECT_EJECT_REG                                                0x222c
#define mmVGT_DMA_DATA_FIFO_DEPTH                                               0x222d
#define mmVGT_DMA_REQ_FIFO_DEPTH                                                0x222e
#define mmVGT_DRAW_INIT_FIFO_DEPTH                                              0x222f
#define mmVGT_LAST_COPY_STATE                                                   0x2230
#define mmCC_GC_SHADER_ARRAY_CONFIG                                             0x226f
#define mmGC_USER_SHADER_ARRAY_CONFIG                                           0x2270
#define mmVGT_GS_MODE                                                           0xa290
#define mmVGT_GS_ONCHIP_CNTL                                                    0xa291
#define mmVGT_GS_OUT_PRIM_TYPE                                                  0xa29b
#define mmVGT_CACHE_INVALIDATION                                                0x2231
#define mmVGT_RESET_DEBUG                                                       0x2232
#define mmVGT_STRMOUT_DELAY                                                     0x2233
#define mmVGT_FIFO_DEPTHS                                                       0x2234
#define mmVGT_GS_PER_ES                                                         0xa295
#define mmVGT_ES_PER_GS                                                         0xa296
#define mmVGT_GS_PER_VS                                                         0xa297
#define mmVGT_GS_VERTEX_REUSE                                                   0x2235
#define mmVGT_MC_LAT_CNTL                                                       0x2236
#define mmIA_CNTL_STATUS                                                        0x2237
#define mmVGT_STRMOUT_CONFIG                                                    0xa2e5
#define mmVGT_STRMOUT_BUFFER_SIZE_0                                             0xa2b4
#define mmVGT_STRMOUT_BUFFER_SIZE_1                                             0xa2b8
#define mmVGT_STRMOUT_BUFFER_SIZE_2                                             0xa2bc
#define mmVGT_STRMOUT_BUFFER_SIZE_3                                             0xa2c0
#define mmVGT_STRMOUT_BUFFER_OFFSET_0                                           0xa2b7
#define mmVGT_STRMOUT_BUFFER_OFFSET_1                                           0xa2bb
#define mmVGT_STRMOUT_BUFFER_OFFSET_2                                           0xa2bf
#define mmVGT_STRMOUT_BUFFER_OFFSET_3                                           0xa2c3
#define mmVGT_STRMOUT_VTX_STRIDE_0                                              0xa2b5
#define mmVGT_STRMOUT_VTX_STRIDE_1                                              0xa2b9
#define mmVGT_STRMOUT_VTX_STRIDE_2                                              0xa2bd
#define mmVGT_STRMOUT_VTX_STRIDE_3                                              0xa2c1
#define mmVGT_STRMOUT_BUFFER_CONFIG                                             0xa2e6
#define mmVGT_STRMOUT_BUFFER_FILLED_SIZE_0                                      0xc244
#define mmVGT_STRMOUT_BUFFER_FILLED_SIZE_1                                      0xc245
#define mmVGT_STRMOUT_BUFFER_FILLED_SIZE_2                                      0xc246
#define mmVGT_STRMOUT_BUFFER_FILLED_SIZE_3                                      0xc247
#define mmVGT_STRMOUT_DRAW_OPAQUE_OFFSET                                        0xa2ca
#define mmVGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE                            0xa2cb
#define mmVGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE                                 0xa2cc
#define mmVGT_GS_MAX_VERT_OUT                                                   0xa2ce
#define mmIA_VMID_OVERRIDE                                                      0x2260
#define mmVGT_SHADER_STAGES_EN                                                  0xa2d5
#define mmVGT_DISPATCH_DRAW_INDEX                                               0xa2dd
#define mmVGT_LS_HS_CONFIG                                                      0xa2d6
#define mmVGT_DMA_LS_HS_CONFIG                                                  0x2273
#define mmVGT_TF_PARAM                                                          0xa2db
#define mmVGT_TF_RING_SIZE                                                      0xc24e
#define mmVGT_SYS_CONFIG                                                        0x2263
#define mmVGT_HS_OFFCHIP_PARAM                                                  0xc24f
#define mmVGT_TF_MEMORY_BASE                                                    0xc250
#define mmVGT_GS_INSTANCE_CNT                                                   0xa2e4
#define mmIA_MULTI_VGT_PARAM                                                    0xa2aa
#define mmVGT_VS_MAX_WAVE_ID                                                    0x2268
#define mmVGT_ESGS_RING_SIZE                                                    0xc240
#define mmVGT_GSVS_RING_SIZE                                                    0xc241
#define mmVGT_GSVS_RING_OFFSET_1                                                0xa298
#define mmVGT_GSVS_RING_OFFSET_2                                                0xa299
#define mmVGT_GSVS_RING_OFFSET_3                                                0xa29a
#define mmVGT_ESGS_RING_ITEMSIZE                                                0xa2ab
#define mmVGT_GSVS_RING_ITEMSIZE                                                0xa2ac
#define mmVGT_GS_VERT_ITEMSIZE                                                  0xa2d7
#define mmVGT_GS_VERT_ITEMSIZE_1                                                0xa2d8
#define mmVGT_GS_VERT_ITEMSIZE_2                                                0xa2d9
#define mmVGT_GS_VERT_ITEMSIZE_3                                                0xa2da
#define mmWD_CNTL_STATUS                                                        0x223f
#define mmWD_ENHANCE                                                            0xa2a0
#define mmGFX_PIPE_CONTROL                                                      0x226d
#define mmGFX_PIPE_PRIORITY                                                     0xf87f
#define mmCGTT_VGT_CLK_CTRL                                                     0xf084
#define mmCGTT_IA_CLK_CTRL                                                      0xf085
#define mmCGTT_WD_CLK_CTRL                                                      0xf086
#define mmVGT_DEBUG_CNTL                                                        0x2238
#define mmVGT_DEBUG_DATA                                                        0x2239
#define mmIA_DEBUG_CNTL                                                         0x223a
#define mmIA_DEBUG_DATA                                                         0x223b
#define mmVGT_CNTL_STATUS                                                       0x223c
#define mmWD_DEBUG_CNTL                                                         0x223d
#define mmWD_DEBUG_DATA                                                         0x223e
#define mmCC_GC_PRIM_CONFIG                                                     0x2240
#define mmGC_USER_PRIM_CONFIG                                                   0x2241
#define ixWD_DEBUG_REG0                                                         0x0
#define ixWD_DEBUG_REG1                                                         0x1
#define ixWD_DEBUG_REG2                                                         0x2
#define ixWD_DEBUG_REG3                                                         0x3
#define ixWD_DEBUG_REG4                                                         0x4
#define ixWD_DEBUG_REG5                                                         0x5
#define ixIA_DEBUG_REG0                                                         0x0
#define ixIA_DEBUG_REG1                                                         0x1
#define ixIA_DEBUG_REG2                                                         0x2
#define ixIA_DEBUG_REG3                                                         0x3
#define ixIA_DEBUG_REG4                                                         0x4
#define ixIA_DEBUG_REG5                                                         0x5
#define ixIA_DEBUG_REG6                                                         0x6
#define ixIA_DEBUG_REG7                                                         0x7
#define ixIA_DEBUG_REG8                                                         0x8
#define ixIA_DEBUG_REG9                                                         0x9
#define ixVGT_DEBUG_REG0                                                        0x0
#define ixVGT_DEBUG_REG1                                                        0x1
#define ixVGT_DEBUG_REG2                                                        0x1e
#define ixVGT_DEBUG_REG3                                                        0x1f
#define ixVGT_DEBUG_REG4                                                        0x20
#define ixVGT_DEBUG_REG5                                                        0x21
#define ixVGT_DEBUG_REG6                                                        0x22
#define ixVGT_DEBUG_REG7                                                        0x23
#define ixVGT_DEBUG_REG8                                                        0x8
#define ixVGT_DEBUG_REG9                                                        0x9
#define ixVGT_DEBUG_REG10                                                       0xa
#define ixVGT_DEBUG_REG11                                                       0xb
#define ixVGT_DEBUG_REG12                                                       0xc
#define ixVGT_DEBUG_REG13                                                       0xd
#define ixVGT_DEBUG_REG14                                                       0xe
#define ixVGT_DEBUG_REG15                                                       0xf
#define ixVGT_DEBUG_REG16                                                       0x10
#define ixVGT_DEBUG_REG17                                                       0x11
#define ixVGT_DEBUG_REG18                                                       0x7
#define ixVGT_DEBUG_REG19                                                       0x13
#define ixVGT_DEBUG_REG20                                                       0x14
#define ixVGT_DEBUG_REG21                                                       0x15
#define ixVGT_DEBUG_REG22                                                       0x16
#define ixVGT_DEBUG_REG23                                                       0x17
#define ixVGT_DEBUG_REG24                                                       0x18
#define ixVGT_DEBUG_REG25                                                       0x19
#define ixVGT_DEBUG_REG26                                                       0x24
#define ixVGT_DEBUG_REG27                                                       0x1b
#define ixVGT_DEBUG_REG28                                                       0x1c
#define ixVGT_DEBUG_REG29                                                       0x1d
#define ixVGT_DEBUG_REG30                                                       0x25
#define ixVGT_DEBUG_REG31                                                       0x26
#define ixVGT_DEBUG_REG32                                                       0x27
#define ixVGT_DEBUG_REG33                                                       0x28
#define ixVGT_DEBUG_REG34                                                       0x29
#define ixVGT_DEBUG_REG35                                                       0x2a
#define mmVGT_PERFCOUNTER_SEID_MASK                                             0xd894
#define mmVGT_PERFCOUNTER0_SELECT                                               0xd88c
#define mmVGT_PERFCOUNTER1_SELECT                                               0xd88d
#define mmVGT_PERFCOUNTER2_SELECT                                               0xd88e
#define mmVGT_PERFCOUNTER3_SELECT                                               0xd88f
#define mmVGT_PERFCOUNTER0_SELECT1                                              0xd890
#define mmVGT_PERFCOUNTER1_SELECT1                                              0xd891
#define mmVGT_PERFCOUNTER0_LO                                                   0xd090
#define mmVGT_PERFCOUNTER1_LO                                                   0xd092
#define mmVGT_PERFCOUNTER2_LO                                                   0xd094
#define mmVGT_PERFCOUNTER3_LO                                                   0xd096
#define mmVGT_PERFCOUNTER0_HI                                                   0xd091
#define mmVGT_PERFCOUNTER1_HI                                                   0xd093
#define mmVGT_PERFCOUNTER2_HI                                                   0xd095
#define mmVGT_PERFCOUNTER3_HI                                                   0xd097
#define mmIA_PERFCOUNTER0_SELECT                                                0xd884
#define mmIA_PERFCOUNTER1_SELECT                                                0xd885
#define mmIA_PERFCOUNTER2_SELECT                                                0xd886
#define mmIA_PERFCOUNTER3_SELECT                                                0xd887
#define mmIA_PERFCOUNTER0_SELECT1                                               0xd888
#define mmIA_PERFCOUNTER0_LO                                                    0xd088
#define mmIA_PERFCOUNTER1_LO                                                    0xd08a
#define mmIA_PERFCOUNTER2_LO                                                    0xd08c
#define mmIA_PERFCOUNTER3_LO                                                    0xd08e
#define mmIA_PERFCOUNTER0_HI                                                    0xd089
#define mmIA_PERFCOUNTER1_HI                                                    0xd08b
#define mmIA_PERFCOUNTER2_HI                                                    0xd08d
#define mmIA_PERFCOUNTER3_HI                                                    0xd08f
#define mmWD_PERFCOUNTER0_SELECT                                                0xd880
#define mmWD_PERFCOUNTER1_SELECT                                                0xd881
#define mmWD_PERFCOUNTER2_SELECT                                                0xd882
#define mmWD_PERFCOUNTER3_SELECT                                                0xd883
#define mmWD_PERFCOUNTER0_LO                                                    0xd080
#define mmWD_PERFCOUNTER1_LO                                                    0xd082
#define mmWD_PERFCOUNTER2_LO                                                    0xd084
#define mmWD_PERFCOUNTER3_LO                                                    0xd086
#define mmWD_PERFCOUNTER0_HI                                                    0xd081
#define mmWD_PERFCOUNTER1_HI                                                    0xd083
#define mmWD_PERFCOUNTER2_HI                                                    0xd085
#define mmWD_PERFCOUNTER3_HI                                                    0xd087
#define mmDIDT_IND_INDEX                                                        0x3280
#define mmDIDT_IND_DATA                                                         0x3281
#define ixDIDT_SQ_CTRL0                                                         0x0
#define ixDIDT_SQ_CTRL1                                                         0x1
#define ixDIDT_SQ_CTRL2                                                         0x2
#define ixDIDT_SQ_WEIGHT0_3                                                     0x10
#define ixDIDT_SQ_WEIGHT4_7                                                     0x11
#define ixDIDT_SQ_WEIGHT8_11                                                    0x12
#define ixDIDT_DB_CTRL0                                                         0x20
#define ixDIDT_DB_CTRL1                                                         0x21
#define ixDIDT_DB_CTRL2                                                         0x22
#define ixDIDT_DB_WEIGHT0_3                                                     0x30
#define ixDIDT_DB_WEIGHT4_7                                                     0x31
#define ixDIDT_DB_WEIGHT8_11                                                    0x32
#define ixDIDT_TD_CTRL0                                                         0x40
#define ixDIDT_TD_CTRL1                                                         0x41
#define ixDIDT_TD_CTRL2                                                         0x42
#define ixDIDT_TD_WEIGHT0_3                                                     0x50
#define ixDIDT_TD_WEIGHT4_7                                                     0x51
#define ixDIDT_TD_WEIGHT8_11                                                    0x52
#define ixDIDT_TCP_CTRL0                                                        0x60
#define ixDIDT_TCP_CTRL1                                                        0x61
#define ixDIDT_TCP_CTRL2                                                        0x62
#define ixDIDT_TCP_WEIGHT0_3                                                    0x70
#define ixDIDT_TCP_WEIGHT4_7                                                    0x71
#define ixDIDT_TCP_WEIGHT8_11                                                   0x72

#endif /* GFX_7_2_D_H */


================================================
FILE: libhsakmt/tests/kfdtest/include/asic_reg/gfx_7_2_enum.h
================================================
/*
 * Copyright (C) 2014  Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef GFX_7_2_ENUM_H
#define GFX_7_2_ENUM_H

typedef enum SurfaceNumber {
	NUMBER_UNORM                                     = 0x0,
	NUMBER_SNORM                                     = 0x1,
	NUMBER_USCALED                                   = 0x2,
	NUMBER_SSCALED                                   = 0x3,
	NUMBER_UINT                                      = 0x4,
	NUMBER_SINT                                      = 0x5,
	NUMBER_SRGB                                      = 0x6,
	NUMBER_FLOAT                                     = 0x7,
} SurfaceNumber;
typedef enum SurfaceSwap {
	SWAP_STD                                         = 0x0,
	SWAP_ALT                                         = 0x1,
	SWAP_STD_REV                                     = 0x2,
	SWAP_ALT_REV                                     = 0x3,
} SurfaceSwap;
typedef enum CBMode {
	CB_DISABLE                                       = 0x0,
	CB_NORMAL                                        = 0x1,
	CB_ELIMINATE_FAST_CLEAR                          = 0x2,
	CB_RESOLVE                                       = 0x3,
	CB_DECOMPRESS                                    = 0x4,
	CB_FMASK_DECOMPRESS                              = 0x5,
} CBMode;
typedef enum RoundMode {
	ROUND_BY_HALF                                    = 0x0,
	ROUND_TRUNCATE                                   = 0x1,
} RoundMode;
typedef enum SourceFormat {
	EXPORT_4C_32BPC                                  = 0x0,
	EXPORT_4C_16BPC                                  = 0x1,
	EXPORT_2C_32BPC_GR                               = 0x2,
	EXPORT_2C_32BPC_AR                               = 0x3,
} SourceFormat;
typedef enum BlendOp {
	BLEND_ZERO                                       = 0x0,
	BLEND_ONE                                        = 0x1,
	BLEND_SRC_COLOR                                  = 0x2,
	BLEND_ONE_MINUS_SRC_COLOR                        = 0x3,
	BLEND_SRC_ALPHA                                  = 0x4,
	BLEND_ONE_MINUS_SRC_ALPHA                        = 0x5,
	BLEND_DST_ALPHA                                  = 0x6,
	BLEND_ONE_MINUS_DST_ALPHA                        = 0x7,
	BLEND_DST_COLOR                                  = 0x8,
	BLEND_ONE_MINUS_DST_COLOR                        = 0x9,
	BLEND_SRC_ALPHA_SATURATE                         = 0xa,
	BLEND_BOTH_SRC_ALPHA                             = 0xb,
	BLEND_BOTH_INV_SRC_ALPHA                         = 0xc,
	BLEND_CONSTANT_COLOR                             = 0xd,
	BLEND_ONE_MINUS_CONSTANT_COLOR                   = 0xe,
	BLEND_SRC1_COLOR                                 = 0xf,
	BLEND_INV_SRC1_COLOR                             = 0x10,
	BLEND_SRC1_ALPHA                                 = 0x11,
	BLEND_INV_SRC1_ALPHA                             = 0x12,
	BLEND_CONSTANT_ALPHA                             = 0x13,
	BLEND_ONE_MINUS_CONSTANT_ALPHA                   = 0x14,
} BlendOp;
typedef enum CombFunc {
	COMB_DST_PLUS_SRC                                = 0x0,
	COMB_SRC_MINUS_DST                               = 0x1,
	COMB_MIN_DST_SRC                                 = 0x2,
	COMB_MAX_DST_SRC                                 = 0x3,
	COMB_DST_MINUS_SRC                               = 0x4,
} CombFunc;
typedef enum BlendOpt {
	FORCE_OPT_AUTO                                   = 0x0,
	FORCE_OPT_DISABLE                                = 0x1,
	FORCE_OPT_ENABLE_IF_SRC_A_0                      = 0x2,
	FORCE_OPT_ENABLE_IF_SRC_RGB_0                    = 0x3,
	FORCE_OPT_ENABLE_IF_SRC_ARGB_0                   = 0x4,
	FORCE_OPT_ENABLE_IF_SRC_A_1                      = 0x5,
	FORCE_OPT_ENABLE_IF_SRC_RGB_1                    = 0x6,
	FORCE_OPT_ENABLE_IF_SRC_ARGB_1                   = 0x7,
} BlendOpt;
typedef enum CmaskCode {
	CMASK_CLR00_F0                                   = 0x0,
	CMASK_CLR00_F1                                   = 0x1,
	CMASK_CLR00_F2                                   = 0x2,
	CMASK_CLR00_FX                                   = 0x3,
	CMASK_CLR01_F0                                   = 0x4,
	CMASK_CLR01_F1                                   = 0x5,
	CMASK_CLR01_F2                                   = 0x6,
	CMASK_CLR01_FX                                   = 0x7,
	CMASK_CLR10_F0                                   = 0x8,
	CMASK_CLR10_F1                                   = 0x9,
	CMASK_CLR10_F2                                   = 0xa,
	CMASK_CLR10_FX                                   = 0xb,
	CMASK_CLR11_F0                                   = 0xc,
	CMASK_CLR11_F1                                   = 0xd,
	CMASK_CLR11_F2                                   = 0xe,
	CMASK_CLR11_FX                                   = 0xf,
} CmaskCode;
typedef enum CBPerfSel {
	CB_PERF_SEL_NONE                                 = 0x0,
	CB_PERF_SEL_BUSY                                 = 0x1,
	CB_PERF_SEL_CORE_SCLK_VLD                        = 0x2,
	CB_PERF_SEL_REG_SCLK0_VLD                        = 0x3,
	CB_PERF_SEL_REG_SCLK1_VLD                        = 0x4,
	CB_PERF_SEL_DRAWN_QUAD                           = 0x5,
	CB_PERF_SEL_DRAWN_PIXEL                          = 0x6,
	CB_PERF_SEL_DRAWN_QUAD_FRAGMENT                  = 0x7,
	CB_PERF_SEL_DRAWN_TILE                           = 0x8,
	CB_PERF_SEL_DB_CB_TILE_VALID_READY               = 0x9,
	CB_PERF_SEL_DB_CB_TILE_VALID_READYB              = 0xa,
	CB_PERF_SEL_DB_CB_TILE_VALIDB_READY              = 0xb,
	CB_PERF_SEL_DB_CB_TILE_VALIDB_READYB             = 0xc,
	CB_PERF_SEL_CM_FC_TILE_VALID_READY               = 0xd,
	CB_PERF_SEL_CM_FC_TILE_VALID_READYB              = 0xe,
	CB_PERF_SEL_CM_FC_TILE_VALIDB_READY              = 0xf,
	CB_PERF_SEL_CM_FC_TILE_VALIDB_READYB             = 0x10,
	CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READY          = 0x11,
	CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READYB         = 0x12,
	CB_PERF_SEL_DB_CB_LQUAD_VALID_READY              = 0x13,
	CB_PERF_SEL_DB_CB_LQUAD_VALID_READYB             = 0x14,
	CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READY             = 0x15,
	CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READYB            = 0x16,
	CB_PERF_SEL_LQUAD_NO_TILE                        = 0x17,
	CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_R          = 0x18,
	CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_AR         = 0x19,
	CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_GR         = 0x1a,
	CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_ABGR       = 0x1b,
	CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_FP16_ABGR     = 0x1c,
	CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_SIGNED16_ABGR = 0x1d,
	CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_UNSIGNED16_ABGR= 0x1e,
	CB_PERF_SEL_QUAD_KILLED_BY_EXTRA_PIXEL_EXPORT    = 0x1f,
	CB_PERF_SEL_QUAD_KILLED_BY_COLOR_INVALID         = 0x20,
	CB_PERF_SEL_QUAD_KILLED_BY_NULL_TARGET_SHADER_MASK= 0x21,
	CB_PERF_SEL_QUAD_KILLED_BY_NULL_SAMPLE_MASK      = 0x22,
	CB_PERF_SEL_QUAD_KILLED_BY_DISCARD_PIXEL         = 0x23,
	CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READY            = 0x24,
	CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READYB           = 0x25,
	CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READY           = 0x26,
	CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READYB          = 0x27,
	CB_PERF_SEL_FOP_IN_VALID_READY                   = 0x28,
	CB_PERF_SEL_FOP_IN_VALID_READYB                  = 0x29,
	CB_PERF_SEL_FOP_IN_VALIDB_READY                  = 0x2a,
	CB_PERF_SEL_FOP_IN_VALIDB_READYB                 = 0x2b,
	CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READY           = 0x2c,
	CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READYB          = 0x2d,
	CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READY          = 0x2e,
	CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READYB         = 0x2f,
	CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READY            = 0x30,
	CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READYB           = 0x31,
	CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READY           = 0x32,
	CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READYB          = 0x33,
	CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READY            = 0x34,
	CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READYB           = 0x35,
	CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READY           = 0x36,
	CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READYB          = 0x37,
	CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READY        = 0x38,
	CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READYB       = 0x39,
	CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READY       = 0x3a,
	CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READYB      = 0x3b,
	CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READY         = 0x3c,
	CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READYB        = 0x3d,
	CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READY        = 0x3e,
	CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READYB       = 0x3f,
	CB_PERF_SEL_CC_BC_CS_FRAG_VALID                  = 0x40,
	CB_PERF_SEL_CM_CACHE_HIT                         = 0x41,
	CB_PERF_SEL_CM_CACHE_TAG_MISS                    = 0x42,
	CB_PERF_SEL_CM_CACHE_SECTOR_MISS                 = 0x43,
	CB_PERF_SEL_CM_CACHE_REEVICTION_STALL            = 0x44,
	CB_PERF_SEL_CM_CACHE_EVICT_NONZERO_INFLIGHT_STALL= 0x45,
	CB_PERF_SEL_CM_CACHE_REPLACE_PENDING_EVICT_STALL = 0x46,
	CB_PERF_SEL_CM_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL= 0x47,
	CB_PERF_SEL_CM_CACHE_READ_OUTPUT_STALL           = 0x48,
	CB_PERF_SEL_CM_CACHE_WRITE_OUTPUT_STALL          = 0x49,
	CB_PERF_SEL_CM_CACHE_ACK_OUTPUT_STALL            = 0x4a,
	CB_PERF_SEL_CM_CACHE_STALL                       = 0x4b,
	CB_PERF_SEL_CM_CACHE_FLUSH                       = 0x4c,
	CB_PERF_SEL_CM_CACHE_TAGS_FLUSHED                = 0x4d,
	CB_PERF_SEL_CM_CACHE_SECTORS_FLUSHED             = 0x4e,
	CB_PERF_SEL_CM_CACHE_DIRTY_SECTORS_FLUSHED       = 0x4f,
	CB_PERF_SEL_FC_CACHE_HIT                         = 0x50,
	CB_PERF_SEL_FC_CACHE_TAG_MISS                    = 0x51,
	CB_PERF_SEL_FC_CACHE_SECTOR_MISS                 = 0x52,
	CB_PERF_SEL_FC_CACHE_REEVICTION_STALL            = 0x53,
	CB_PERF_SEL_FC_CACHE_EVICT_NONZERO_INFLIGHT_STALL= 0x54,
	CB_PERF_SEL_FC_CACHE_REPLACE_PENDING_EVICT_STALL = 0x55,
	CB_PERF_SEL_FC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL= 0x56,
	CB_PERF_SEL_FC_CACHE_READ_OUTPUT_STALL           = 0x57,
	CB_PERF_SEL_FC_CACHE_WRITE_OUTPUT_STALL          = 0x58,
	CB_PERF_SEL_FC_CACHE_ACK_OUTPUT_STALL            = 0x59,
	CB_PERF_SEL_FC_CACHE_STALL                       = 0x5a,
	CB_PERF_SEL_FC_CACHE_FLUSH                       = 0x5b,
	CB_PERF_SEL_FC_CACHE_TAGS_FLUSHED                = 0x5c,
	CB_PERF_SEL_FC_CACHE_SECTORS_FLUSHED             = 0x5d,
	CB_PERF_SEL_FC_CACHE_DIRTY_SECTORS_FLUSHED       = 0x5e,
	CB_PERF_SEL_CC_CACHE_HIT                         = 0x5f,
	CB_PERF_SEL_CC_CACHE_TAG_MISS                    = 0x60,
	CB_PERF_SEL_CC_CACHE_SECTOR_MISS                 = 0x61,
	CB_PERF_SEL_CC_CACHE_REEVICTION_STALL            = 0x62,
	CB_PERF_SEL_CC_CACHE_EVICT_NONZERO_INFLIGHT_STALL= 0x63,
	CB_PERF_SEL_CC_CACHE_REPLACE_PENDING_EVICT_STALL = 0x64,
	CB_PERF_SEL_CC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL= 0x65,
	CB_PERF_SEL_CC_CACHE_READ_OUTPUT_STALL           = 0x66,
	CB_PERF_SEL_CC_CACHE_WRITE_OUTPUT_STALL          = 0x67,
	CB_PERF_SEL_CC_CACHE_ACK_OUTPUT_STALL            = 0x68,
	CB_PERF_SEL_CC_CACHE_STALL                       = 0x69,
	CB_PERF_SEL_CC_CACHE_FLUSH                       = 0x6a,
	CB_PERF_SEL_CC_CACHE_TAGS_FLUSHED                = 0x6b,
	CB_PERF_SEL_CC_CACHE_SECTORS_FLUSHED             = 0x6c,
	CB_PERF_SEL_CC_CACHE_DIRTY_SECTORS_FLUSHED       = 0x6d,
	CB_PERF_SEL_CC_CACHE_WA_TO_RMW_CONVERSION        = 0x6e,
	CB_PERF_SEL_CB_TAP_WRREQ_VALID_READY             = 0x6f,
	CB_PERF_SEL_CB_TAP_WRREQ_VALID_READYB            = 0x70,
	CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READY            = 0x71,
	CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READYB           = 0x72,
	CB_PERF_SEL_CM_MC_WRITE_REQUEST                  = 0x73,
	CB_PERF_SEL_FC_MC_WRITE_REQUEST                  = 0x74,
	CB_PERF_SEL_CC_MC_WRITE_REQUEST                  = 0x75,
	CB_PERF_SEL_CM_MC_WRITE_REQUESTS_IN_FLIGHT       = 0x76,
	CB_PERF_SEL_FC_MC_WRITE_REQUESTS_IN_FLIGHT       = 0x77,
	CB_PERF_SEL_CC_MC_WRITE_REQUESTS_IN_FLIGHT       = 0x78,
	CB_PERF_SEL_CB_TAP_RDREQ_VALID_READY             = 0x79,
	CB_PERF_SEL_CB_TAP_RDREQ_VALID_READYB            = 0x7a,
	CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READY            = 0x7b,
	CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READYB           = 0x7c,
	CB_PERF_SEL_CM_MC_READ_REQUEST                   = 0x7d,
	CB_PERF_SEL_FC_MC_READ_REQUEST                   = 0x7e,
	CB_PERF_SEL_CC_MC_READ_REQUEST                   = 0x7f,
	CB_PERF_SEL_CM_MC_READ_REQUESTS_IN_FLIGHT        = 0x80,
	CB_PERF_SEL_FC_MC_READ_REQUESTS_IN_FLIGHT        = 0x81,
	CB_PERF_SEL_CC_MC_READ_REQUESTS_IN_FLIGHT        = 0x82,
	CB_PERF_SEL_CM_TQ_FULL                           = 0x83,
	CB_PERF_SEL_CM_TQ_FIFO_TILE_RESIDENCY_STALL      = 0x84,
	CB_PERF_SEL_FC_QUAD_RDLAT_FIFO_FULL              = 0x85,
	CB_PERF_SEL_FC_TILE_RDLAT_FIFO_FULL              = 0x86,
	CB_PERF_SEL_FC_RDLAT_FIFO_QUAD_RESIDENCY_STALL   = 0x87,
	CB_PERF_SEL_FOP_FMASK_RAW_STALL                  = 0x88,
	CB_PERF_SEL_FOP_FMASK_BYPASS_STALL               = 0x89,
	CB_PERF_SEL_CC_SF_FULL                           = 0x8a,
	CB_PERF_SEL_CC_RB_FULL                           = 0x8b,
	CB_PERF_SEL_CC_EVENFIFO_QUAD_RESIDENCY_STALL     = 0x8c,
	CB_PERF_SEL_CC_ODDFIFO_QUAD_RESIDENCY_STALL      = 0x8d,
	CB_PERF_SEL_BLENDER_RAW_HAZARD_STALL             = 0x8e,
	CB_PERF_SEL_EVENT                                = 0x8f,
	CB_PERF_SEL_EVENT_CACHE_FLUSH_TS                 = 0x90,
	CB_PERF_SEL_EVENT_CONTEXT_DONE                   = 0x91,
	CB_PERF_SEL_EVENT_CACHE_FLUSH                    = 0x92,
	CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_TS_EVENT   = 0x93,
	CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_EVENT      = 0x94,
	CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_DATA_TS       = 0x95,
	CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_META          = 0x96,
	CB_PERF_SEL_CC_SURFACE_SYNC                      = 0x97,
	CB_PERF_SEL_CMASK_READ_DATA_0xC                  = 0x98,
	CB_PERF_SEL_CMASK_READ_DATA_0xD                  = 0x99,
	CB_PERF_SEL_CMASK_READ_DATA_0xE                  = 0x9a,
	CB_PERF_SEL_CMASK_READ_DATA_0xF                  = 0x9b,
	CB_PERF_SEL_CMASK_WRITE_DATA_0xC                 = 0x9c,
	CB_PERF_SEL_CMASK_WRITE_DATA_0xD                 = 0x9d,
	CB_PERF_SEL_CMASK_WRITE_DATA_0xE                 = 0x9e,
	CB_PERF_SEL_CMASK_WRITE_DATA_0xF                 = 0x9f,
	CB_PERF_SEL_TWO_PROBE_QUAD_FRAGMENT              = 0xa0,
	CB_PERF_SEL_EXPORT_32_ABGR_QUAD_FRAGMENT         = 0xa1,
	CB_PERF_SEL_DUAL_SOURCE_COLOR_QUAD_FRAGMENT      = 0xa2,
	CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_BEFORE_UPDATE    = 0xa3,
	CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_BEFORE_UPDATE   = 0xa4,
	CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_BEFORE_UPDATE   = 0xa5,
	CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_BEFORE_UPDATE   = 0xa6,
	CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_BEFORE_UPDATE   = 0xa7,
	CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_BEFORE_UPDATE   = 0xa8,
	CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_BEFORE_UPDATE   = 0xa9,
	CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_BEFORE_UPDATE   = 0xaa,
	CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_AFTER_UPDATE     = 0xab,
	CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_AFTER_UPDATE    = 0xac,
	CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_AFTER_UPDATE    = 0xad,
	CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_AFTER_UPDATE    = 0xae,
	CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_AFTER_UPDATE    = 0xaf,
	CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_AFTER_UPDATE    = 0xb0,
	CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_AFTER_UPDATE    = 0xb1,
	CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_AFTER_UPDATE    = 0xb2,
	CB_PERF_SEL_QUAD_ADDED_1_FRAGMENT                = 0xb3,
	CB_PERF_SEL_QUAD_ADDED_2_FRAGMENTS               = 0xb4,
	CB_PERF_SEL_QUAD_ADDED_3_FRAGMENTS               = 0xb5,
	CB_PERF_SEL_QUAD_ADDED_4_FRAGMENTS               = 0xb6,
	CB_PERF_SEL_QUAD_ADDED_5_FRAGMENTS               = 0xb7,
	CB_PERF_SEL_QUAD_ADDED_6_FRAGMENTS               = 0xb8,
	CB_PERF_SEL_QUAD_ADDED_7_FRAGMENTS               = 0xb9,
	CB_PERF_SEL_QUAD_REMOVED_1_FRAGMENT              = 0xba,
	CB_PERF_SEL_QUAD_REMOVED_2_FRAGMENTS             = 0xbb,
	CB_PERF_SEL_QUAD_REMOVED_3_FRAGMENTS             = 0xbc,
	CB_PERF_SEL_QUAD_REMOVED_4_FRAGMENTS             = 0xbd,
	CB_PERF_SEL_QUAD_REMOVED_5_FRAGMENTS             = 0xbe,
	CB_PERF_SEL_QUAD_REMOVED_6_FRAGMENTS             = 0xbf,
	CB_PERF_SEL_QUAD_REMOVED_7_FRAGMENTS             = 0xc0,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_0                = 0xc1,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_1                = 0xc2,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_2                = 0xc3,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_3                = 0xc4,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_4                = 0xc5,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_5                = 0xc6,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_6                = 0xc7,
	CB_PERF_SEL_QUAD_READS_FRAGMENT_7                = 0xc8,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_0               = 0xc9,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_1               = 0xca,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_2               = 0xcb,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_3               = 0xcc,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_4               = 0xcd,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_5               = 0xce,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_6               = 0xcf,
	CB_PERF_SEL_QUAD_WRITES_FRAGMENT_7               = 0xd0,
	CB_PERF_SEL_QUAD_BLEND_OPT_DONT_READ_DST         = 0xd1,
	CB_PERF_SEL_QUAD_BLEND_OPT_BLEND_BYPASS          = 0xd2,
	CB_PERF_SEL_QUAD_BLEND_OPT_DISCARD_PIXELS        = 0xd3,
	CB_PERF_SEL_QUAD_DST_READ_COULD_HAVE_BEEN_OPTIMIZED= 0xd4,
	CB_PERF_SEL_QUAD_BLENDING_COULD_HAVE_BEEN_BYPASSED= 0xd5,
	CB_PERF_SEL_QUAD_COULD_HAVE_BEEN_DISCARDED       = 0xd6,
	CB_PERF_SEL_BLEND_OPT_PIXELS_RESULT_EQ_DEST      = 0xd7,
	CB_PERF_SEL_DRAWN_BUSY                           = 0xd8,
	CB_PERF_SEL_TILE_TO_CMR_REGION_BUSY              = 0xd9,
	CB_PERF_SEL_CMR_TO_FCR_REGION_BUSY               = 0xda,
	CB_PERF_SEL_FCR_TO_CCR_REGION_BUSY               = 0xdb,
	CB_PERF_SEL_CCR_TO_CCW_REGION_BUSY               = 0xdc,
	CB_PERF_SEL_FC_PF_SLOW_MODE_QUAD_EMPTY_HALF_DROPPED= 0xdd,
	CB_PERF_SEL_FC_SEQUENCER_CLEAR                   = 0xde,
	CB_PERF_SEL_FC_SEQUENCER_ELIMINATE_FAST_CLEAR    = 0xdf,
	CB_PERF_SEL_FC_SEQUENCER_FMASK_DECOMPRESS        = 0xe0,
	CB_PERF_SEL_FC_SEQUENCER_FMASK_COMPRESSION_DISABLE= 0xe1,
} CBPerfSel;
typedef enum CBPerfOpFilterSel {
	CB_PERF_OP_FILTER_SEL_WRITE_ONLY                 = 0x0,
	CB_PERF_OP_FILTER_SEL_NEEDS_DESTINATION          = 0x1,
	CB_PERF_OP_FILTER_SEL_RESOLVE                    = 0x2,
	CB_PERF_OP_FILTER_SEL_DECOMPRESS                 = 0x3,
	CB_PERF_OP_FILTER_SEL_FMASK_DECOMPRESS           = 0x4,
	CB_PERF_OP_FILTER_SEL_ELIMINATE_FAST_CLEAR       = 0x5,
} CBPerfOpFilterSel;
typedef enum CBPerfClearFilterSel {
	CB_PERF_CLEAR_FILTER_SEL_NONCLEAR                = 0x0,
	CB_PERF_CLEAR_FILTER_SEL_CLEAR                   = 0x1,
} CBPerfClearFilterSel;
typedef enum CP_RING_ID {
	RINGID0                                          = 0x0,
	RINGID1                                          = 0x1,
	RINGID2                                          = 0x2,
	RINGID3                                          = 0x3,
} CP_RING_ID;
typedef enum CP_PIPE_ID {
	PIPE_ID0                                         = 0x0,
	PIPE_ID1                                         = 0x1,
	PIPE_ID2                                         = 0x2,
	PIPE_ID3                                         = 0x3,
} CP_PIPE_ID;
typedef enum CP_ME_ID {
	ME_ID0                                           = 0x0,
	ME_ID1                                           = 0x1,
	ME_ID2                                           = 0x2,
	ME_ID3                                           = 0x3,
} CP_ME_ID;
typedef enum SPM_PERFMON_STATE {
	STRM_PERFMON_STATE_DISABLE_AND_RESET             = 0x0,
	STRM_PERFMON_STATE_START_COUNTING                = 0x1,
	STRM_PERFMON_STATE_STOP_COUNTING                 = 0x2,
	STRM_PERFMON_STATE_RESERVED_3                    = 0x3,
	STRM_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM     = 0x4,
	STRM_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM        = 0x5,
} SPM_PERFMON_STATE;
typedef enum CP_PERFMON_STATE {
	CP_PERFMON_STATE_DISABLE_AND_RESET               = 0x0,
	CP_PERFMON_STATE_START_COUNTING                  = 0x1,
	CP_PERFMON_STATE_STOP_COUNTING                   = 0x2,
	CP_PERFMON_STATE_RESERVED_3                      = 0x3,
	CP_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM       = 0x4,
	CP_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM          = 0x5,
} CP_PERFMON_STATE;
typedef enum CP_PERFMON_ENABLE_MODE {
	CP_PERFMON_ENABLE_MODE_ALWAYS_COUNT              = 0x0,
	CP_PERFMON_ENABLE_MODE_RESERVED_1                = 0x1,
	CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_TRUE        = 0x2,
	CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_FALSE       = 0x3,
} CP_PERFMON_ENABLE_MODE;
typedef enum CPG_PERFCOUNT_SEL {
	CPG_PERF_SEL_ALWAYS_COUNT                        = 0x0,
	CPG_PERF_SEL_RBIU_FIFO_FULL                      = 0x1,
	CPG_PERF_SEL_CSF_RTS_BUT_MIU_NOT_RTR             = 0x2,
	CPG_PERF_SEL_CSF_ST_BASE_SIZE_FIFO_FULL          = 0x3,
	CPG_PERF_SEL_CP_GRBM_DWORDS_SENT                 = 0x4,
	CPG_PERF_SEL_ME_PARSER_BUSY                      = 0x5,
	CPG_PERF_SEL_COUNT_TYPE0_PACKETS                 = 0x6,
	CPG_PERF_SEL_COUNT_TYPE3_PACKETS                 = 0x7,
	CPG_PERF_SEL_CSF_FETCHING_CMD_BUFFERS            = 0x8,
	CPG_PERF_SEL_CP_GRBM_OUT_OF_CREDITS              = 0x9,
	CPG_PERF_SEL_CP_PFP_GRBM_OUT_OF_CREDITS          = 0xa,
	CPG_PERF_SEL_CP_GDS_GRBM_OUT_OF_CREDITS          = 0xb,
	CPG_PERF_SEL_RCIU_STALLED_ON_ME_READ             = 0xc,
	CPG_PERF_SEL_RCIU_STALLED_ON_DMA_READ            = 0xd,
	CPG_PERF_SEL_SSU_STALLED_ON_ACTIVE_CNTX          = 0xe,
	CPG_PERF_SEL_SSU_STALLED_ON_CLEAN_SIGNALS        = 0xf,
	CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_PULSE        = 0x10,
	CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_WR_CONFIRM   = 0x11,
	CPG_PERF_SEL_PFP_STALLED_ON_CSF_READY            = 0x12,
	CPG_PERF_SEL_PFP_STALLED_ON_MEQ_READY            = 0x13,
	CPG_PERF_SEL_PFP_STALLED_ON_RCIU_READY           = 0x14,
	CPG_PERF_SEL_PFP_STALLED_FOR_DATA_FROM_ROQ       = 0x15,
	CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_PFP        = 0x16,
	CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_STQ        = 0x17,
	CPG_PERF_SEL_ME_STALLED_ON_NO_AVAIL_GFX_CNTX     = 0x18,
	CPG_PERF_SEL_ME_STALLED_WRITING_TO_RCIU          = 0x19,
	CPG_PERF_SEL_ME_STALLED_WRITING_CONSTANTS        = 0x1a,
	CPG_PERF_SEL_ME_STALLED_ON_PARTIAL_FLUSH         = 0x1b,
	CPG_PERF_SEL_ME_WAIT_ON_CE_COUNTER               = 0x1c,
	CPG_PERF_SEL_ME_WAIT_ON_AVAIL_BUFFER             = 0x1d,
	CPG_PERF_SEL_SEMAPHORE_BUSY_POLLING_FOR_PASS     = 0x1e,
	CPG_PERF_SEL_LOAD_STALLED_ON_SET_COHERENCY       = 0x1f,
	CPG_PERF_SEL_DYNAMIC_CLK_VALID                   = 0x20,
	CPG_PERF_SEL_REGISTER_CLK_VALID                  = 0x21,
	CPG_PERF_SEL_MIU_WRITE_REQUEST_SENT              = 0x22,
	CPG_PERF_SEL_MIU_READ_REQUEST_SENT               = 0x23,
	CPG_PERF_SEL_CE_STALL_RAM_DUMP                   = 0x24,
	CPG_PERF_SEL_CE_STALL_RAM_WRITE                  = 0x25,
	CPG_PERF_SEL_CE_STALL_ON_INC_FIFO                = 0x26,
	CPG_PERF_SEL_CE_STALL_ON_WR_RAM_FIFO             = 0x27,
	CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_MIU           = 0x28,
	CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_ROQ           = 0x29,
	CPG_PERF_SEL_CE_STALL_ON_CE_BUFFER_FLAG          = 0x2a,
	CPG_PERF_SEL_CE_STALL_ON_DE_COUNTER              = 0x2b,
	CPG_PERF_SEL_TCIU_STALL_WAIT_ON_FREE             = 0x2c,
	CPG_PERF_SEL_TCIU_STALL_WAIT_ON_TAGS             = 0x2d,
} CPG_PERFCOUNT_SEL;
typedef enum CPF_PERFCOUNT_SEL {
	CPF_PERF_SEL_ALWAYS_COUNT                        = 0x0,
	CPF_PERF_SEL_MIU_STALLED_WAITING_RDREQ_FREE      = 0x1,
	CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_FREE        = 0x2,
	CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_TAGS        = 0x3,
	CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_RING          = 0x4,
	CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB1           = 0x5,
	CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB2           = 0x6,
	CPF_PERF_SEL_CSF_BUSY_FOR_FECTHINC_STATE         = 0x7,
	CPF_PERF_SEL_MIU_BUSY_FOR_OUTSTANDING_TAGS       = 0x8,
	CPF_PERF_SEL_CSF_RTS_MIU_NOT_RTR                 = 0x9,
	CPF_PERF_SEL_CSF_STATE_FIFO_NOT_RTR              = 0xa,
	CPF_PERF_SEL_CSF_FETCHING_CMD_BUFFERS            = 0xb,
	CPF_PERF_SEL_GRBM_DWORDS_SENT                    = 0xc,
	CPF_PERF_SEL_DYNAMIC_CLOCK_VALID                 = 0xd,
	CPF_PERF_SEL_REGISTER_CLOCK_VALID                = 0xe,
	CPF_PERF_SEL_MIU_WRITE_REQUEST_SEND              = 0xf,
	CPF_PERF_SEL_MIU_READ_REQUEST_SEND               = 0x10,
} CPF_PERFCOUNT_SEL;
typedef enum CPC_PERFCOUNT_SEL {
	CPC_PERF_SEL_ALWAYS_COUNT                        = 0x0,
	CPC_PERF_SEL_RCIU_STALL_WAIT_ON_FREE             = 0x1,
	CPC_PERF_SEL_RCIU_STALL_PRIV_VIOLATION           = 0x2,
	CPC_PERF_SEL_MIU_STALL_ON_RDREQ_FREE             = 0x3,
	CPC_PERF_SEL_MIU_STALL_ON_WRREQ_FREE             = 0x4,
	CPC_PERF_SEL_TCIU_STALL_WAIT_ON_FREE             = 0x5,
	CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY        = 0x6,
	CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY_PERF   = 0x7,
	CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READ         = 0x8,
	CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_READ          = 0x9,
	CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_WRITE         = 0xa,
	CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ          = 0xb,
	CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ_PERF     = 0xc,
	CPC_PERF_SEL_ME1_BUSY_FOR_PACKET_DECODE          = 0xd,
	CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY        = 0xe,
	CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY_PERF   = 0xf,
	CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READ         = 0x10,
	CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_READ          = 0x11,
	CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_WRITE         = 0x12,
	CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ          = 0x13,
	CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ_PERF     = 0x14,
	CPC_PERF_SEL_ME2_BUSY_FOR_PACKET_DECODE          = 0x15,
} CPC_PERFCOUNT_SEL;
typedef enum CP_ALPHA_TAG_RAM_SEL {
	CPG_TAG_RAM                                      = 0x0,
	CPC_TAG_RAM                                      = 0x1,
	CPF_TAG_RAM                                      = 0x2,
	RSV_TAG_RAM                                      = 0x3,
} CP_ALPHA_TAG_RAM_SEL;
#define SEM_ECC_ERROR                             0x0
#define SEM_RESERVED                              0x1
#define SEM_FAILED                                0x2
#define SEM_PASSED                                0x3
#define IQ_QUEUE_SLEEP                            0x0
#define IQ_OFFLOAD_RETRY                          0x1
#define IQ_SCH_WAVE_MSG                           0x2
#define IQ_SEM_REARM                              0x3
#define IQ_DEQUEUE_RETRY                          0x4
#define IQ_INTR_TYPE_PQ                           0x0
#define IQ_INTR_TYPE_IB                           0x1
#define IQ_INTR_TYPE_MQD                          0x2
#define VMID_SZ                                   0x4
#define CONFIG_SPACE_START                        0x2000
#define CONFIG_SPACE_END                          0x9fff
#define CONFIG_SPACE1_START                       0x2000
#define CONFIG_SPACE1_END                         0x2bff
#define CONFIG_SPACE2_START                       0x3000
#define CONFIG_SPACE2_END                         0x9fff
#define UCONFIG_SPACE_START                       0xc000
#define UCONFIG_SPACE_END                         0xffff
#define PERSISTENT_SPACE_START                    0x2c00
#define PERSISTENT_SPACE_END                      0x2fff
#define CONTEXT_SPACE_START                       0xa000
#define CONTEXT_SPACE_END                         0xbfff
typedef enum ForceControl {
	FORCE_OFF                                        = 0x0,
	FORCE_ENABLE                                     = 0x1,
	FORCE_DISABLE                                    = 0x2,
	FORCE_RESERVED                                   = 0x3,
} ForceControl;
typedef enum ZSamplePosition {
	Z_SAMPLE_CENTER                                  = 0x0,
	Z_SAMPLE_CENTROID                                = 0x1,
} ZSamplePosition;
typedef enum ZOrder {
	LATE_Z                                           = 0x0,
	EARLY_Z_THEN_LATE_Z                              = 0x1,
	RE_Z                                             = 0x2,
	EARLY_Z_THEN_RE_Z                                = 0x3,
} ZOrder;
typedef enum ZpassControl {
	ZPASS_DISABLE                                    = 0x0,
	ZPASS_SAMPLES                                    = 0x1,
	ZPASS_PIXELS                                     = 0x2,
} ZpassControl;
typedef enum ZModeForce {
	NO_FORCE                                         = 0x0,
	FORCE_EARLY_Z                                    = 0x1,
	FORCE_LATE_Z                                     = 0x2,
	FORCE_RE_Z                                       = 0x3,
} ZModeForce;
typedef enum ZLimitSumm {
	FORCE_SUMM_OFF                                   = 0x0,
	FORCE_SUMM_MINZ                                  = 0x1,
	FORCE_SUMM_MAXZ                                  = 0x2,
	FORCE_SUMM_BOTH                                  = 0x3,
} ZLimitSumm;
typedef enum CompareFrag {
	FRAG_NEVER                                       = 0x0,
	FRAG_LESS                                        = 0x1,
	FRAG_EQUAL                                       = 0x2,
	FRAG_LEQUAL                                      = 0x3,
	FRAG_GREATER                                     = 0x4,
	FRAG_NOTEQUAL                                    = 0x5,
	FRAG_GEQUAL                                      = 0x6,
	FRAG_ALWAYS                                      = 0x7,
} CompareFrag;
typedef enum StencilOp {
	STENCIL_KEEP                                     = 0x0,
	STENCIL_ZERO                                     = 0x1,
	STENCIL_ONES                                     = 0x2,
	STENCIL_REPLACE_TEST                             = 0x3,
	STENCIL_REPLACE_OP                               = 0x4,
	STENCIL_ADD_CLAMP                                = 0x5,
	STENCIL_SUB_CLAMP                                = 0x6,
	STENCIL_INVERT                                   = 0x7,
	STENCIL_ADD_WRAP                                 = 0x8,
	STENCIL_SUB_WRAP                                 = 0x9,
	STENCIL_AND                                      = 0xa,
	STENCIL_OR                                       = 0xb,
	STENCIL_XOR                                      = 0xc,
	STENCIL_NAND                                     = 0xd,
	STENCIL_NOR                                      = 0xe,
	STENCIL_XNOR                                     = 0xf,
} StencilOp;
typedef enum ConservativeZExport {
	EXPORT_ANY_Z                                     = 0x0,
	EXPORT_LESS_THAN_Z                               = 0x1,
	EXPORT_GREATER_THAN_Z                            = 0x2,
	EXPORT_RESERVED                                  = 0x3,
} ConservativeZExport;
typedef enum DbPSLControl {
	PSLC_AUTO                                        = 0x0,
	PSLC_ON_HANG_ONLY                                = 0x1,
	PSLC_ASAP                                        = 0x2,
	PSLC_COUNTDOWN                                   = 0x3,
} DbPSLControl;
typedef enum PerfCounter_Vals {
	DB_PERF_SEL_SC_DB_tile_sends                     = 0x0,
	DB_PERF_SEL_SC_DB_tile_busy                      = 0x1,
	DB_PERF_SEL_SC_DB_tile_stalls                    = 0x2,
	DB_PERF_SEL_SC_DB_tile_events                    = 0x3,
	DB_PERF_SEL_SC_DB_tile_tiles                     = 0x4,
	DB_PERF_SEL_SC_DB_tile_covered                   = 0x5,
	DB_PERF_SEL_hiz_tc_read_starved                  = 0x6,
	DB_PERF_SEL_hiz_tc_write_stall                   = 0x7,
	DB_PERF_SEL_hiz_qtiles_culled                    = 0x8,
	DB_PERF_SEL_his_qtiles_culled                    = 0x9,
	DB_PERF_SEL_DB_SC_tile_sends                     = 0xa,
	DB_PERF_SEL_DB_SC_tile_busy                      = 0xb,
	DB_PERF_SEL_DB_SC_tile_stalls                    = 0xc,
	DB_PERF_SEL_DB_SC_tile_df_stalls                 = 0xd,
	DB_PERF_SEL_DB_SC_tile_tiles                     = 0xe,
	DB_PERF_SEL_DB_SC_tile_culled                    = 0xf,
	DB_PERF_SEL_DB_SC_tile_hier_kill                 = 0x10,
	DB_PERF_SEL_DB_SC_tile_fast_ops                  = 0x11,
	DB_PERF_SEL_DB_SC_tile_no_ops                    = 0x12,
	DB_PERF_SEL_DB_SC_tile_tile_rate                 = 0x13,
	DB_PERF_SEL_DB_SC_tile_ssaa_kill                 = 0x14,
	DB_PERF_SEL_DB_SC_tile_fast_z_ops                = 0x15,
	DB_PERF_SEL_DB_SC_tile_fast_stencil_ops          = 0x16,
	DB_PERF_SEL_SC_DB_quad_sends                     = 0x17,
	DB_PERF_SEL_SC_DB_quad_busy                      = 0x18,
	DB_PERF_SEL_SC_DB_quad_squads                    = 0x19,
	DB_PERF_SEL_SC_DB_quad_tiles                     = 0x1a,
	DB_PERF_SEL_SC_DB_quad_pixels                    = 0x1b,
	DB_PERF_SEL_SC_DB_quad_killed_tiles              = 0x1c,
	DB_PERF_SEL_DB_SC_quad_sends                     = 0x1d,
	DB_PERF_SEL_DB_SC_quad_busy                      = 0x1e,
	DB_PERF_SEL_DB_SC_quad_stalls                    = 0x1f,
	DB_PERF_SEL_DB_SC_quad_tiles                     = 0x20,
	DB_PERF_SEL_DB_SC_quad_lit_quad                  = 0x21,
	DB_PERF_SEL_DB_CB_tile_sends                     = 0x22,
	DB_PERF_SEL_DB_CB_tile_busy                      = 0x23,
	DB_PERF_SEL_DB_CB_tile_stalls                    = 0x24,
	DB_PERF_SEL_SX_DB_quad_sends                     = 0x25,
	DB_PERF_SEL_SX_DB_quad_busy                      = 0x26,
	DB_PERF_SEL_SX_DB_quad_stalls                    = 0x27,
	DB_PERF_SEL_SX_DB_quad_quads                     = 0x28,
	DB_PERF_SEL_SX_DB_quad_pixels                    = 0x29,
	DB_PERF_SEL_SX_DB_quad_exports                   = 0x2a,
	DB_PERF_SEL_SH_quads_outstanding_sum             = 0x2b,
	DB_PERF_SEL_DB_CB_lquad_sends                    = 0x2c,
	DB_PERF_SEL_DB_CB_lquad_busy                     = 0x2d,
	DB_PERF_SEL_DB_CB_lquad_stalls                   = 0x2e,
	DB_PERF_SEL_DB_CB_lquad_quads                    = 0x2f,
	DB_PERF_SEL_tile_rd_sends                        = 0x30,
	DB_PERF_SEL_mi_tile_rd_outstanding_sum           = 0x31,
	DB_PERF_SEL_quad_rd_sends                        = 0x32,
	DB_PERF_SEL_quad_rd_busy                         = 0x33,
	DB_PERF_SEL_quad_rd_mi_stall                     = 0x34,
	DB_PERF_SEL_quad_rd_rw_collision                 = 0x35,
	DB_PERF_SEL_quad_rd_tag_stall                    = 0x36,
	DB_PERF_SEL_quad_rd_32byte_reqs                  = 0x37,
	DB_PERF_SEL_quad_rd_panic                        = 0x38,
	DB_PERF_SEL_mi_quad_rd_outstanding_sum           = 0x39,
	DB_PERF_SEL_quad_rdret_sends                     = 0x3a,
	DB_PERF_SEL_quad_rdret_busy                      = 0x3b,
	DB_PERF_SEL_tile_wr_sends                        = 0x3c,
	DB_PERF_SEL_tile_wr_acks                         = 0x3d,
	DB_PERF_SEL_mi_tile_wr_outstanding_sum           = 0x3e,
	DB_PERF_SEL_quad_wr_sends                        = 0x3f,
	DB_PERF_SEL_quad_wr_busy                         = 0x40,
	DB_PERF_SEL_quad_wr_mi_stall                     = 0x41,
	DB_PERF_SEL_quad_wr_coherency_stall              = 0x42,
	DB_PERF_SEL_quad_wr_acks                         = 0x43,
	DB_PERF_SEL_mi_quad_wr_outstanding_sum           = 0x44,
	DB_PERF_SEL_Tile_Cache_misses                    = 0x45,
	DB_PERF_SEL_Tile_Cache_hits                      = 0x46,
	DB_PERF_SEL_Tile_Cache_flushes                   = 0x47,
	DB_PERF_SEL_Tile_Cache_surface_stall             = 0x48,
	DB_PERF_SEL_Tile_Cache_starves                   = 0x49,
	DB_PERF_SEL_Tile_Cache_mem_return_starve         = 0x4a,
	DB_PERF_SEL_tcp_dispatcher_reads                 = 0x4b,
	DB_PERF_SEL_tcp_prefetcher_reads                 = 0x4c,
	DB_PERF_SEL_tcp_preloader_reads                  = 0x4d,
	DB_PERF_SEL_tcp_dispatcher_flushes               = 0x4e,
	DB_PERF_SEL_tcp_prefetcher_flushes               = 0x4f,
	DB_PERF_SEL_tcp_preloader_flushes                = 0x50,
	DB_PERF_SEL_Depth_Tile_Cache_sends               = 0x51,
	DB_PERF_SEL_Depth_Tile_Cache_busy                = 0x52,
	DB_PERF_SEL_Depth_Tile_Cache_starves             = 0x53,
	DB_PERF_SEL_Depth_Tile_Cache_dtile_locked        = 0x54,
	DB_PERF_SEL_Depth_Tile_Cache_alloc_stall         = 0x55,
	DB_PERF_SEL_Depth_Tile_Cache_misses              = 0x56,
	DB_PERF_SEL_Depth_Tile_Cache_hits                = 0x57,
	DB_PERF_SEL_Depth_Tile_Cache_flushes             = 0x58,
	DB_PERF_SEL_Depth_Tile_Cache_noop_tile           = 0x59,
	DB_PERF_SEL_Depth_Tile_Cache_detailed_noop       = 0x5a,
	DB_PERF_SEL_Depth_Tile_Cache_event               = 0x5b,
	DB_PERF_SEL_Depth_Tile_Cache_tile_frees          = 0x5c,
	DB_PERF_SEL_Depth_Tile_Cache_data_frees          = 0x5d,
	DB_PERF_SEL_Depth_Tile_Cache_mem_return_starve   = 0x5e,
	DB_PERF_SEL_Stencil_Cache_misses                 = 0x5f,
	DB_PERF_SEL_Stencil_Cache_hits                   = 0x60,
	DB_PERF_SEL_Stencil_Cache_flushes                = 0x61,
	DB_PERF_SEL_Stencil_Cache_starves                = 0x62,
	DB_PERF_SEL_Stencil_Cache_frees                  = 0x63,
	DB_PERF_SEL_Z_Cache_separate_Z_misses            = 0x64,
	DB_PERF_SEL_Z_Cache_separate_Z_hits              = 0x65,
	DB_PERF_SEL_Z_Cache_separate_Z_flushes           = 0x66,
	DB_PERF_SEL_Z_Cache_separate_Z_starves           = 0x67,
	DB_PERF_SEL_Z_Cache_pmask_misses                 = 0x68,
	DB_PERF_SEL_Z_Cache_pmask_hits                   = 0x69,
	DB_PERF_SEL_Z_Cache_pmask_flushes                = 0x6a,
	DB_PERF_SEL_Z_Cache_pmask_starves                = 0x6b,
	DB_PERF_SEL_Z_Cache_frees                        = 0x6c,
	DB_PERF_SEL_Plane_Cache_misses                   = 0x6d,
	DB_PERF_SEL_Plane_Cache_hits                     = 0x6e,
	DB_PERF_SEL_Plane_Cache_flushes                  = 0x6f,
	DB_PERF_SEL_Plane_Cache_starves                  = 0x70,
	DB_PERF_SEL_Plane_Cache_frees                    = 0x71,
	DB_PERF_SEL_flush_expanded_stencil               = 0x72,
	DB_PERF_SEL_flush_compressed_stencil             = 0x73,
	DB_PERF_SEL_flush_single_stencil                 = 0x74,
	DB_PERF_SEL_planes_flushed                       = 0x75,
	DB_PERF_SEL_flush_1plane                         = 0x76,
	DB_PERF_SEL_flush_2plane                         = 0x77,
	DB_PERF_SEL_flush_3plane                         = 0x78,
	DB_PERF_SEL_flush_4plane                         = 0x79,
	DB_PERF_SEL_flush_5plane                         = 0x7a,
	DB_PERF_SEL_flush_6plane                         = 0x7b,
	DB_PERF_SEL_flush_7plane                         = 0x7c,
	DB_PERF_SEL_flush_8plane                         = 0x7d,
	DB_PERF_SEL_flush_9plane                         = 0x7e,
	DB_PERF_SEL_flush_10plane                        = 0x7f,
	DB_PERF_SEL_flush_11plane                        = 0x80,
	DB_PERF_SEL_flush_12plane                        = 0x81,
	DB_PERF_SEL_flush_13plane                        = 0x82,
	DB_PERF_SEL_flush_14plane                        = 0x83,
	DB_PERF_SEL_flush_15plane                        = 0x84,
	DB_PERF_SEL_flush_16plane                        = 0x85,
	DB_PERF_SEL_flush_expanded_z                     = 0x86,
	DB_PERF_SEL_earlyZ_waiting_for_postZ_done        = 0x87,
	DB_PERF_SEL_reZ_waiting_for_postZ_done           = 0x88,
	DB_PERF_SEL_dk_tile_sends                        = 0x89,
	DB_PERF_SEL_dk_tile_busy                         = 0x8a,
	DB_PERF_SEL_dk_tile_quad_starves                 = 0x8b,
	DB_PERF_SEL_dk_tile_stalls                       = 0x8c,
	DB_PERF_SEL_dk_squad_sends                       = 0x8d,
	DB_PERF_SEL_dk_squad_busy                        = 0x8e,
	DB_PERF_SEL_dk_squad_stalls                      = 0x8f,
	DB_PERF_SEL_Op_Pipe_Busy                         = 0x90,
	DB_PERF_SEL_Op_Pipe_MC_Read_stall                = 0x91,
	DB_PERF_SEL_qc_busy                              = 0x92,
	DB_PERF_SEL_qc_xfc                               = 0x93,
	DB_PERF_SEL_qc_conflicts                         = 0x94,
	DB_PERF_SEL_qc_full_stall                        = 0x95,
	DB_PERF_SEL_qc_in_preZ_tile_stalls_postZ         = 0x96,
	DB_PERF_SEL_qc_in_postZ_tile_stalls_preZ         = 0x97,
	DB_PERF_SEL_tsc_insert_summarize_stall           = 0x98,
	DB_PERF_SEL_tl_busy                              = 0x99,
	DB_PERF_SEL_tl_dtc_read_starved                  = 0x9a,
	DB_PERF_SEL_tl_z_fetch_stall                     = 0x9b,
	DB_PERF_SEL_tl_stencil_stall                     = 0x9c,
	DB_PERF_SEL_tl_z_decompress_stall                = 0x9d,
	DB_PERF_SEL_tl_stencil_locked_stall              = 0x9e,
	DB_PERF_SEL_tl_events                            = 0x9f,
	DB_PERF_SEL_tl_summarize_squads                  = 0xa0,
	DB_PERF_SEL_tl_flush_expand_squads               = 0xa1,
	DB_PERF_SEL_tl_expand_squads                     = 0xa2,
	DB_PERF_SEL_tl_preZ_squads                       = 0xa3,
	DB_PERF_SEL_tl_postZ_squads                      = 0xa4,
	DB_PERF_SEL_tl_preZ_noop_squads                  = 0xa5,
	DB_PERF_SEL_tl_postZ_noop_squads                 = 0xa6,
	DB_PERF_SEL_tl_tile_ops                          = 0xa7,
	DB_PERF_SEL_tl_in_xfc                            = 0xa8,
	DB_PERF_SEL_tl_in_single_stencil_expand_stall    = 0xa9,
	DB_PERF_SEL_tl_in_fast_z_stall                   = 0xaa,
	DB_PERF_SEL_tl_out_xfc                           = 0xab,
	DB_PERF_SEL_tl_out_squads                        = 0xac,
	DB_PERF_SEL_zf_plane_multicycle                  = 0xad,
	DB_PERF_SEL_PostZ_Samples_passing_Z              = 0xae,
	DB_PERF_SEL_PostZ_Samples_failing_Z              = 0xaf,
	DB_PERF_SEL_PostZ_Samples_failing_S              = 0xb0,
	DB_PERF_SEL_PreZ_Samples_passing_Z               = 0xb1,
	DB_PERF_SEL_PreZ_Samples_failing_Z               = 0xb2,
	DB_PERF_SEL_PreZ_Samples_failing_S               = 0xb3,
	DB_PERF_SEL_ts_tc_update_stall                   = 0xb4,
	DB_PERF_SEL_sc_kick_start                        = 0xb5,
	DB_PERF_SEL_sc_kick_end                          = 0xb6,
	DB_PERF_SEL_clock_reg_active                     = 0xb7,
	DB_PERF_SEL_clock_main_active                    = 0xb8,
	DB_PERF_SEL_clock_mem_export_active              = 0xb9,
	DB_PERF_SEL_esr_ps_out_busy                      = 0xba,
	DB_PERF_SEL_esr_ps_lqf_busy                      = 0xbb,
	DB_PERF_SEL_esr_ps_lqf_stall                     = 0xbc,
	DB_PERF_SEL_etr_out_send                         = 0xbd,
	DB_PERF_SEL_etr_out_busy                         = 0xbe,
	DB_PERF_SEL_etr_out_ltile_probe_fifo_full_stall  = 0xbf,
	DB_PERF_SEL_etr_out_cb_tile_stall                = 0xc0,
	DB_PERF_SEL_etr_out_esr_stall                    = 0xc1,
	DB_PERF_SEL_esr_ps_sqq_busy                      = 0xc2,
	DB_PERF_SEL_esr_ps_sqq_stall                     = 0xc3,
	DB_PERF_SEL_esr_eot_fwd_busy                     = 0xc4,
	DB_PERF_SEL_esr_eot_fwd_holding_squad            = 0xc5,
	DB_PERF_SEL_esr_eot_fwd_forward                  = 0xc6,
	DB_PERF_SEL_esr_sqq_zi_busy                      = 0xc7,
	DB_PERF_SEL_esr_sqq_zi_stall                     = 0xc8,
	DB_PERF_SEL_postzl_sq_pt_busy                    = 0xc9,
	DB_PERF_SEL_postzl_sq_pt_stall                   = 0xca,
	DB_PERF_SEL_postzl_se_busy                       = 0xcb,
	DB_PERF_SEL_postzl_se_stall                      = 0xcc,
	DB_PERF_SEL_postzl_partial_launch                = 0xcd,
	DB_PERF_SEL_postzl_full_launch                   = 0xce,
	DB_PERF_SEL_postzl_partial_waiting               = 0xcf,
	DB_PERF_SEL_postzl_tile_mem_stall                = 0xd0,
	DB_PERF_SEL_postzl_tile_init_stall               = 0xd1,
	DB_PEFF_SEL_prezl_tile_mem_stall                 = 0xd2,
	DB_PERF_SEL_prezl_tile_init_stall                = 0xd3,
	DB_PERF_SEL_dtt_sm_clash_stall                   = 0xd4,
	DB_PERF_SEL_dtt_sm_slot_stall                    = 0xd5,
	DB_PERF_SEL_dtt_sm_miss_stall                    = 0xd6,
	DB_PERF_SEL_mi_rdreq_busy                        = 0xd7,
	DB_PERF_SEL_mi_rdreq_stall                       = 0xd8,
	DB_PERF_SEL_mi_wrreq_busy                        = 0xd9,
	DB_PERF_SEL_mi_wrreq_stall                       = 0xda,
	DB_PERF_SEL_recomp_tile_to_1zplane_no_fastop     = 0xdb,
	DB_PERF_SEL_dkg_tile_rate_tile                   = 0xdc,
	DB_PERF_SEL_prezl_src_in_sends                   = 0xdd,
	DB_PERF_SEL_prezl_src_in_stall                   = 0xde,
	DB_PERF_SEL_prezl_src_in_squads                  = 0xdf,
	DB_PERF_SEL_prezl_src_in_squads_unrolled         = 0xe0,
	DB_PERF_SEL_prezl_src_in_tile_rate               = 0xe1,
	DB_PERF_SEL_prezl_src_in_tile_rate_unrolled      = 0xe2,
	DB_PERF_SEL_prezl_src_out_stall                  = 0xe3,
	DB_PERF_SEL_postzl_src_in_sends                  = 0xe4,
	DB_PERF_SEL_postzl_src_in_stall                  = 0xe5,
	DB_PERF_SEL_postzl_src_in_squads                 = 0xe6,
	DB_PERF_SEL_postzl_src_in_squads_unrolled        = 0xe7,
	DB_PERF_SEL_postzl_src_in_tile_rate              = 0xe8,
	DB_PERF_SEL_postzl_src_in_tile_rate_unrolled     = 0xe9,
	DB_PERF_SEL_postzl_src_out_stall                 = 0xea,
	DB_PERF_SEL_esr_ps_src_in_sends                  = 0xeb,
	DB_PERF_SEL_esr_ps_src_in_stall                  = 0xec,
	DB_PERF_SEL_esr_ps_src_in_squads                 = 0xed,
	DB_PERF_SEL_esr_ps_src_in_squads_unrolled        = 0xee,
	DB_PERF_SEL_esr_ps_src_in_tile_rate              = 0xef,
	DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled     = 0xf0,
	DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled_to_pixel_rate= 0xf1,
	DB_PERF_SEL_esr_ps_src_out_stall                 = 0xf2,
	DB_PERF_SEL_depth_bounds_qtiles_culled           = 0xf3,
	DB_PERF_SEL_PreZ_Samples_failing_DB              = 0xf4,
	DB_PERF_SEL_PostZ_Samples_failing_DB             = 0xf5,
	DB_PERF_SEL_flush_compressed                     = 0xf6,
	DB_PERF_SEL_flush_plane_le4                      = 0xf7,
	DB_PERF_SEL_tiles_z_fully_summarized             = 0xf8,
	DB_PERF_SEL_tiles_stencil_fully_summarized       = 0xf9,
	DB_PERF_SEL_tiles_z_clear_on_expclear            = 0xfa,
	DB_PERF_SEL_tiles_s_clear_on_expclear            = 0xfb,
	DB_PERF_SEL_tiles_decomp_on_expclear             = 0xfc,
	DB_PERF_SEL_tiles_compressed_to_decompressed     = 0xfd,
	DB_PERF_SEL_Op_Pipe_Prez_Busy                    = 0xfe,
	DB_PERF_SEL_Op_Pipe_Postz_Busy                   = 0xff,
	DB_PERF_SEL_di_dt_stall                          = 0x100,
} PerfCounter_Vals;
typedef enum RingCounterControl {
	COUNTER_RING_SPLIT                               = 0x0,
	COUNTER_RING_0                                   = 0x1,
	COUNTER_RING_1                                   = 0x2,
} RingCounterControl;
typedef enum PixelPipeCounterId {
	PIXEL_PIPE_OCCLUSION_COUNT_0                     = 0x0,
	PIXEL_PIPE_OCCLUSION_COUNT_1                     = 0x1,
	PIXEL_PIPE_OCCLUSION_COUNT_2                     = 0x2,
	PIXEL_PIPE_OCCLUSION_COUNT_3                     = 0x3,
	PIXEL_PIPE_SCREEN_MIN_EXTENTS_0                  = 0x4,
	PIXEL_PIPE_SCREEN_MAX_EXTENTS_0                  = 0x5,
	PIXEL_PIPE_SCREEN_MIN_EXTENTS_1                  = 0x6,
	PIXEL_PIPE_SCREEN_MAX_EXTENTS_1                  = 0x7,
} PixelPipeCounterId;
typedef enum PixelPipeStride {
	PIXEL_PIPE_STRIDE_32_BITS                        = 0x0,
	PIXEL_PIPE_STRIDE_64_BITS                        = 0x1,
	PIXEL_PIPE_STRIDE_128_BITS                       = 0x2,
	PIXEL_PIPE_STRIDE_256_BITS                       = 0x3,
} PixelPipeStride;
typedef enum GB_EDC_DED_MODE {
	GB_EDC_DED_MODE_LOG                              = 0x0,
	GB_EDC_DED_MODE_HALT                             = 0x1,
	GB_EDC_DED_MODE_INT_HALT                         = 0x2,
} GB_EDC_DED_MODE;
#define GB_TILING_CONFIG_TABLE_SIZE               0x20
#define GB_TILING_CONFIG_MACROTABLE_SIZE          0x10
typedef enum GRBM_PERF_SEL {
	GRBM_PERF_SEL_COUNT                              = 0x0,
	GRBM_PERF_SEL_USER_DEFINED                       = 0x1,
	GRBM_PERF_SEL_GUI_ACTIVE                         = 0x2,
	GRBM_PERF_SEL_CP_BUSY                            = 0x3,
	GRBM_PERF_SEL_CP_COHER_BUSY                      = 0x4,
	GRBM_PERF_SEL_CP_DMA_BUSY                        = 0x5,
	GRBM_PERF_SEL_CB_BUSY                            = 0x6,
	GRBM_PERF_SEL_DB_BUSY                            = 0x7,
	GRBM_PERF_SEL_PA_BUSY                            = 0x8,
	GRBM_PERF_SEL_SC_BUSY                            = 0x9,
	GRBM_PERF_SEL_RESERVED_6                         = 0xa,
	GRBM_PERF_SEL_SPI_BUSY                           = 0xb,
	GRBM_PERF_SEL_SX_BUSY                            = 0xc,
	GRBM_PERF_SEL_TA_BUSY                            = 0xd,
	GRBM_PERF_SEL_CB_CLEAN                           = 0xe,
	GRBM_PERF_SEL_DB_CLEAN                           = 0xf,
	GRBM_PERF_SEL_RESERVED_5                         = 0x10,
	GRBM_PERF_SEL_VGT_BUSY                           = 0x11,
	GRBM_PERF_SEL_RESERVED_4                         = 0x12,
	GRBM_PERF_SEL_RESERVED_3                         = 0x13,
	GRBM_PERF_SEL_RESERVED_2                         = 0x14,
	GRBM_PERF_SEL_RESERVED_1                         = 0x15,
	GRBM_PERF_SEL_RESERVED_0                         = 0x16,
	GRBM_PERF_SEL_IA_BUSY                            = 0x17,
	GRBM_PERF_SEL_IA_NO_DMA_BUSY                     = 0x18,
	GRBM_PERF_SEL_GDS_BUSY                           = 0x19,
	GRBM_PERF_SEL_BCI_BUSY                           = 0x1a,
	GRBM_PERF_SEL_RLC_BUSY                           = 0x1b,
	GRBM_PERF_SEL_TC_BUSY                            = 0x1c,
	GRBM_PERF_SEL_CPG_BUSY                           = 0x1d,
	GRBM_PERF_SEL_CPC_BUSY                           = 0x1e,
	GRBM_PERF_SEL_CPF_BUSY                           = 0x1f,
	GRBM_PERF_SEL_WD_BUSY                            = 0x20,
	GRBM_PERF_SEL_WD_NO_DMA_BUSY                     = 0x21,
} GRBM_PERF_SEL;
typedef enum GRBM_SE0_PERF_SEL {
	GRBM_SE0_PERF_SEL_COUNT                          = 0x0,
	GRBM_SE0_PERF_SEL_USER_DEFINED                   = 0x1,
	GRBM_SE0_PERF_SEL_CB_BUSY                        = 0x2,
	GRBM_SE0_PERF_SEL_DB_BUSY                        = 0x3,
	GRBM_SE0_PERF_SEL_SC_BUSY                        = 0x4,
	GRBM_SE0_PERF_SEL_RESERVED_1                     = 0x5,
	GRBM_SE0_PERF_SEL_SPI_BUSY                       = 0x6,
	GRBM_SE0_PERF_SEL_SX_BUSY                        = 0x7,
	GRBM_SE0_PERF_SEL_TA_BUSY                        = 0x8,
	GRBM_SE0_PERF_SEL_CB_CLEAN                       = 0x9,
	GRBM_SE0_PERF_SEL_DB_CLEAN                       = 0xa,
	GRBM_SE0_PERF_SEL_RESERVED_0                     = 0xb,
	GRBM_SE0_PERF_SEL_PA_BUSY                        = 0xc,
	GRBM_SE0_PERF_SEL_VGT_BUSY                       = 0xd,
	GRBM_SE0_PERF_SEL_BCI_BUSY                       = 0xe,
} GRBM_SE0_PERF_SEL;
typedef enum GRBM_SE1_PERF_SEL {
	GRBM_SE1_PERF_SEL_COUNT                          = 0x0,
	GRBM_SE1_PERF_SEL_USER_DEFINED                   = 0x1,
	GRBM_SE1_PERF_SEL_CB_BUSY                        = 0x2,
	GRBM_SE1_PERF_SEL_DB_BUSY                        = 0x3,
	GRBM_SE1_PERF_SEL_SC_BUSY                        = 0x4,
	GRBM_SE1_PERF_SEL_RESERVED_1                     = 0x5,
	GRBM_SE1_PERF_SEL_SPI_BUSY                       = 0x6,
	GRBM_SE1_PERF_SEL_SX_BUSY                        = 0x7,
	GRBM_SE1_PERF_SEL_TA_BUSY                        = 0x8,
	GRBM_SE1_PERF_SEL_CB_CLEAN                       = 0x9,
	GRBM_SE1_PERF_SEL_DB_CLEAN                       = 0xa,
	GRBM_SE1_PERF_SEL_RESERVED_0                     = 0xb,
	GRBM_SE1_PERF_SEL_PA_BUSY                        = 0xc,
	GRBM_SE1_PERF_SEL_VGT_BUSY                       = 0xd,
	GRBM_SE1_PERF_SEL_BCI_BUSY                       = 0xe,
} GRBM_SE1_PERF_SEL;
typedef enum GRBM_SE2_PERF_SEL {
	GRBM_SE2_PERF_SEL_COUNT                          = 0x0,
	GRBM_SE2_PERF_SEL_USER_DEFINED                   = 0x1,
	GRBM_SE2_PERF_SEL_CB_BUSY                        = 0x2,
	GRBM_SE2_PERF_SEL_DB_BUSY                        = 0x3,
	GRBM_SE2_PERF_SEL_SC_BUSY                        = 0x4,
	GRBM_SE2_PERF_SEL_RESERVED_1                     = 0x5,
	GRBM_SE2_PERF_SEL_SPI_BUSY                       = 0x6,
	GRBM_SE2_PERF_SEL_SX_BUSY                        = 0x7,
	GRBM_SE2_PERF_SEL_TA_BUSY                        = 0x8,
	GRBM_SE2_PERF_SEL_CB_CLEAN                       = 0x9,
	GRBM_SE2_PERF_SEL_DB_CLEAN                       = 0xa,
	GRBM_SE2_PERF_SEL_RESERVED_0                     = 0xb,
	GRBM_SE2_PERF_SEL_PA_BUSY                        = 0xc,
	GRBM_SE2_PERF_SEL_VGT_BUSY                       = 0xd,
	GRBM_SE2_PERF_SEL_BCI_BUSY                       = 0xe,
} GRBM_SE2_PERF_SEL;
typedef enum GRBM_SE3_PERF_SEL {
	GRBM_SE3_PERF_SEL_COUNT                          = 0x0,
	GRBM_SE3_PERF_SEL_USER_DEFINED                   = 0x1,
	GRBM_SE3_PERF_SEL_CB_BUSY                        = 0x2,
	GRBM_SE3_PERF_SEL_DB_BUSY                        = 0x3,
	GRBM_SE3_PERF_SEL_SC_BUSY                        = 0x4,
	GRBM_SE3_PERF_SEL_RESERVED_1                     = 0x5,
	GRBM_SE3_PERF_SEL_SPI_BUSY                       = 0x6,
	GRBM_SE3_PERF_SEL_SX_BUSY                        = 0x7,
	GRBM_SE3_PERF_SEL_TA_BUSY                        = 0x8,
	GRBM_SE3_PERF_SEL_CB_CLEAN                       = 0x9,
	GRBM_SE3_PERF_SEL_DB_CLEAN                       = 0xa,
	GRBM_SE3_PERF_SEL_RESERVED_0                     = 0xb,
	GRBM_SE3_PERF_SEL_PA_BUSY                        = 0xc,
	GRBM_SE3_PERF_SEL_VGT_BUSY                       = 0xd,
	GRBM_SE3_PERF_SEL_BCI_BUSY                       = 0xe,
} GRBM_SE3_PERF_SEL;
typedef enum SU_PERFCNT_SEL {
	PERF_PAPC_PASX_REQ                               = 0x0,
	PERF_PAPC_PASX_DISABLE_PIPE                      = 0x1,
	PERF_PAPC_PASX_FIRST_VECTOR                      = 0x2,
	PERF_PAPC_PASX_SECOND_VECTOR                     = 0x3,
	PERF_PAPC_PASX_FIRST_DEAD                        = 0x4,
	PERF_PAPC_PASX_SECOND_DEAD                       = 0x5,
	PERF_PAPC_PASX_VTX_KILL_DISCARD                  = 0x6,
	PERF_PAPC_PASX_VTX_NAN_DISCARD                   = 0x7,
	PERF_PAPC_PA_INPUT_PRIM                          = 0x8,
	PERF_PAPC_PA_INPUT_NULL_PRIM                     = 0x9,
	PERF_PAPC_PA_INPUT_EVENT_FLAG                    = 0xa,
	PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT               = 0xb,
	PERF_PAPC_PA_INPUT_END_OF_PACKET                 = 0xc,
	PERF_PAPC_PA_INPUT_EXTENDED_EVENT                = 0xd,
	PERF_PAPC_CLPR_CULL_PRIM                         = 0xe,
	PERF_PAPC_CLPR_VVUCP_CULL_PRIM                   = 0xf,
	PERF_PAPC_CLPR_VV_CULL_PRIM                      = 0x10,
	PERF_PAPC_CLPR_UCP_CULL_PRIM                     = 0x11,
	PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM                = 0x12,
	PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM                 = 0x13,
	PERF_PAPC_CLPR_CULL_TO_NULL_PRIM                 = 0x14,
	PERF_PAPC_CLPR_VVUCP_CLIP_PRIM                   = 0x15,
	PERF_PAPC_CLPR_VV_CLIP_PRIM                      = 0x16,
	PERF_PAPC_CLPR_UCP_CLIP_PRIM                     = 0x17,
	PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE              = 0x18,
	PERF_PAPC_CLPR_CLIP_PLANE_CNT_1                  = 0x19,
	PERF_PAPC_CLPR_CLIP_PLANE_CNT_2                  = 0x1a,
	PERF_PAPC_CLPR_CLIP_PLANE_CNT_3                  = 0x1b,
	PERF_PAPC_CLPR_CLIP_PLANE_CNT_4                  = 0x1c,
	PERF_PAPC_CLPR_CLIP_PLANE_CNT_5_8                = 0x1d,
	PERF_PAPC_CLPR_CLIP_PLANE_CNT_9_12               = 0x1e,
	PERF_PAPC_CLPR_CLIP_PLANE_NEAR                   = 0x1f,
	PERF_PAPC_CLPR_CLIP_PLANE_FAR                    = 0x20,
	PERF_PAPC_CLPR_CLIP_PLANE_LEFT                   = 0x21,
	PERF_PAPC_CLPR_CLIP_PLANE_RIGHT                  = 0x22,
	PERF_PAPC_CLPR_CLIP_PLANE_TOP                    = 0x23,
	PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM                 = 0x24,
	PERF_PAPC_CLPR_GSC_KILL_CULL_PRIM                = 0x25,
	PERF_PAPC_CLPR_RASTER_KILL_CULL_PRIM             = 0x26,
	PERF_PAPC_CLSM_NULL_PRIM                         = 0x27,
	PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM              = 0x28,
	PERF_PAPC_CLSM_CULL_TO_NULL_PRIM                 = 0x29,
	PERF_PAPC_CLSM_OUT_PRIM_CNT_1                    = 0x2a,
	PERF_PAPC_CLSM_OUT_PRIM_CNT_2                    = 0x2b,
	PERF_PAPC_CLSM_OUT_PRIM_CNT_3                    = 0x2c,
	PERF_PAPC_CLSM_OUT_PRIM_CNT_4                    = 0x2d,
	PERF_PAPC_CLSM_OUT_PRIM_CNT_5_8                  = 0x2e,
	PERF_PAPC_CLSM_OUT_PRIM_CNT_9_13                 = 0x2f,
	PERF_PAPC_CLIPGA_VTE_KILL_PRIM                   = 0x30,
	PERF_PAPC_SU_INPUT_PRIM                          = 0x31,
	PERF_PAPC_SU_INPUT_CLIP_PRIM                     = 0x32,
	PERF_PAPC_SU_INPUT_NULL_PRIM                     = 0x33,
	PERF_PAPC_SU_INPUT_PRIM_DUAL                     = 0x34,
	PERF_PAPC_SU_INPUT_CLIP_PRIM_DUAL                = 0x35,
	PERF_PAPC_SU_ZERO_AREA_CULL_PRIM                 = 0x36,
	PERF_PAPC_SU_BACK_FACE_CULL_PRIM                 = 0x37,
	PERF_PAPC_SU_FRONT_FACE_CULL_PRIM                = 0x38,
	PERF_PAPC_SU_POLYMODE_FACE_CULL                  = 0x39,
	PERF_PAPC_SU_POLYMODE_BACK_CULL                  = 0x3a,
	PERF_PAPC_SU_POLYMODE_FRONT_CULL                 = 0x3b,
	PERF_PAPC_SU_POLYMODE_INVALID_FILL               = 0x3c,
	PERF_PAPC_SU_OUTPUT_PRIM                         = 0x3d,
	PERF_PAPC_SU_OUTPUT_CLIP_PRIM                    = 0x3e,
	PERF_PAPC_SU_OUTPUT_NULL_PRIM                    = 0x3f,
	PERF_PAPC_SU_OUTPUT_EVENT_FLAG                   = 0x40,
	PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT              = 0x41,
	PERF_PAPC_SU_OUTPUT_END_OF_PACKET                = 0x42,
	PERF_PAPC_SU_OUTPUT_POLYMODE_FACE                = 0x43,
	PERF_PAPC_SU_OUTPUT_POLYMODE_BACK                = 0x44,
	PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT               = 0x45,
	PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE              = 0x46,
	PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK              = 0x47,
	PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT             = 0x48,
	PERF_PAPC_SU_OUTPUT_PRIM_DUAL                    = 0x49,
	PERF_PAPC_SU_OUTPUT_CLIP_PRIM_DUAL               = 0x4a,
	PERF_PAPC_SU_OUTPUT_POLYMODE_DUAL                = 0x4b,
	PERF_PAPC_SU_OUTPUT_CLIP_POLYMODE_DUAL           = 0x4c,
	PERF_PAPC_PASX_REQ_IDLE                          = 0x4d,
	PERF_PAPC_PASX_REQ_BUSY                          = 0x4e,
	PERF_PAPC_PASX_REQ_STALLED                       = 0x4f,
	PERF_PAPC_PASX_REC_IDLE                          = 0x50,
	PERF_PAPC_PASX_REC_BUSY                          = 0x51,
	PERF_PAPC_PASX_REC_STARVED_SX                    = 0x52,
	PERF_PAPC_PASX_REC_STALLED                       = 0x53,
	PERF_PAPC_PASX_REC_STALLED_POS_MEM               = 0x54,
	PERF_PAPC_PASX_REC_STALLED_CCGSM_IN              = 0x55,
	PERF_PAPC_CCGSM_IDLE                             = 0x56,
	PERF_PAPC_CCGSM_BUSY                             = 0x57,
	PERF_PAPC_CCGSM_STALLED                          = 0x58,
	PERF_PAPC_CLPRIM_IDLE                            = 0x59,
	PERF_PAPC_CLPRIM_BUSY                            = 0x5a,
	PERF_PAPC_CLPRIM_STALLED                         = 0x5b,
	PERF_PAPC_CLPRIM_STARVED_CCGSM                   = 0x5c,
	PERF_PAPC_CLIPSM_IDLE                            = 0x5d,
	PERF_PAPC_CLIPSM_BUSY                            = 0x5e,
	PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH             = 0x5f,
	PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ               = 0x60,
	PERF_PAPC_CLIPSM_WAIT_CLIPGA                     = 0x61,
	PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP             = 0x62,
	PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM                 = 0x63,
	PERF_PAPC_CLIPGA_IDLE                            = 0x64,
	PERF_PAPC_CLIPGA_BUSY                            = 0x65,
	PERF_PAPC_CLIPGA_STARVED_VTE_CLIP                = 0x66,
	PERF_PAPC_CLIPGA_STALLED                         = 0x67,
	PERF_PAPC_CLIP_IDLE                              = 0x68,
	PERF_PAPC_CLIP_BUSY                              = 0x69,
	PERF_PAPC_SU_IDLE                                = 0x6a,
	PERF_PAPC_SU_BUSY                                = 0x6b,
	PERF_PAPC_SU_STARVED_CLIP                        = 0x6c,
	PERF_PAPC_SU_STALLED_SC                          = 0x6d,
	PERF_PAPC_CL_DYN_SCLK_VLD                        = 0x6e,
	PERF_PAPC_SU_DYN_SCLK_VLD                        = 0x6f,
	PERF_PAPC_PA_REG_SCLK_VLD                        = 0x70,
	PERF_PAPC_SU_MULTI_GPU_PRIM_FILTER_CULL          = 0x71,
	PERF_PAPC_PASX_SE0_REQ                           = 0x72,
	PERF_PAPC_PASX_SE1_REQ                           = 0x73,
	PERF_PAPC_PASX_SE0_FIRST_VECTOR                  = 0x74,
	PERF_PAPC_PASX_SE0_SECOND_VECTOR                 = 0x75,
	PERF_PAPC_PASX_SE1_FIRST_VECTOR                  = 0x76,
	PERF_PAPC_PASX_SE1_SECOND_VECTOR                 = 0x77,
	PERF_PAPC_SU_SE0_PRIM_FILTER_CULL                = 0x78,
	PERF_PAPC_SU_SE1_PRIM_FILTER_CULL                = 0x79,
	PERF_PAPC_SU_SE01_PRIM_FILTER_CULL               = 0x7a,
	PERF_PAPC_SU_SE0_OUTPUT_PRIM                     = 0x7b,
	PERF_PAPC_SU_SE1_OUTPUT_PRIM                     = 0x7c,
	PERF_PAPC_SU_SE01_OUTPUT_PRIM                    = 0x7d,
	PERF_PAPC_SU_SE0_OUTPUT_NULL_PRIM                = 0x7e,
	PERF_PAPC_SU_SE1_OUTPUT_NULL_PRIM                = 0x7f,
	PERF_PAPC_SU_SE01_OUTPUT_NULL_PRIM               = 0x80,
	PERF_PAPC_SU_SE0_OUTPUT_FIRST_PRIM_SLOT          = 0x81,
	PERF_PAPC_SU_SE1_OUTPUT_FIRST_PRIM_SLOT          = 0x82,
	PERF_PAPC_SU_SE0_STALLED_SC                      = 0x83,
	PERF_PAPC_SU_SE1_STALLED_SC                      = 0x84,
	PERF_PAPC_SU_SE01_STALLED_SC                     = 0x85,
	PERF_PAPC_CLSM_CLIPPING_PRIM                     = 0x86,
	PERF_PAPC_SU_CULLED_PRIM                         = 0x87,
	PERF_PAPC_SU_OUTPUT_EOPG                         = 0x88,
	PERF_PAPC_SU_SE2_PRIM_FILTER_CULL                = 0x89,
	PERF_PAPC_SU_SE3_PRIM_FILTER_CULL                = 0x8a,
	PERF_PAPC_SU_SE2_OUTPUT_PRIM                     = 0x8b,
	PERF_PAPC_SU_SE3_OUTPUT_PRIM                     = 0x8c,
	PERF_PAPC_SU_SE2_OUTPUT_NULL_PRIM                = 0x8d,
	PERF_PAPC_SU_SE3_OUTPUT_NULL_PRIM                = 0x8e,
	PERF_PAPC_SU_SE0_OUTPUT_END_OF_PACKET            = 0x8f,
	PERF_PAPC_SU_SE1_OUTPUT_END_OF_PACKET            = 0x90,
	PERF_PAPC_SU_SE2_OUTPUT_END_OF_PACKET            = 0x91,
	PERF_PAPC_SU_SE3_OUTPUT_END_OF_PACKET            = 0x92,
	PERF_PAPC_SU_SE0_OUTPUT_EOPG                     = 0x93,
	PERF_PAPC_SU_SE1_OUTPUT_EOPG                     = 0x94,
	PERF_PAPC_SU_SE2_OUTPUT_EOPG                     = 0x95,
	PERF_PAPC_SU_SE3_OUTPUT_EOPG                     = 0x96,
	PERF_PAPC_SU_SE2_STALLED_SC                      = 0x97,
	PERF_PAPC_SU_SE3_STALLED_SC                      = 0x98,
} SU_PERFCNT_SEL;
typedef enum SC_PERFCNT_SEL {
	SC_SRPS_WINDOW_VALID                             = 0x0,
	SC_PSSW_WINDOW_VALID                             = 0x1,
	SC_TPQZ_WINDOW_VALID                             = 0x2,
	SC_QZQP_WINDOW_VALID                             = 0x3,
	SC_TRPK_WINDOW_VALID                             = 0x4,
	SC_SRPS_WINDOW_VALID_BUSY                        = 0x5,
	SC_PSSW_WINDOW_VALID_BUSY                        = 0x6,
	SC_TPQZ_WINDOW_VALID_BUSY                        = 0x7,
	SC_QZQP_WINDOW_VALID_BUSY                        = 0x8,
	SC_TRPK_WINDOW_VALID_BUSY                        = 0x9,
	SC_STARVED_BY_PA                                 = 0xa,
	SC_STALLED_BY_PRIMFIFO                           = 0xb,
	SC_STALLED_BY_DB_TILE                            = 0xc,
	SC_STARVED_BY_DB_TILE                            = 0xd,
	SC_STALLED_BY_TILEORDERFIFO                      = 0xe,
	SC_STALLED_BY_TILEFIFO                           = 0xf,
	SC_STALLED_BY_DB_QUAD                            = 0x10,
	SC_STARVED_BY_DB_QUAD                            = 0x11,
	SC_STALLED_BY_QUADFIFO                           = 0x12,
	SC_STALLED_BY_BCI                                = 0x13,
	SC_STALLED_BY_SPI                                = 0x14,
	SC_SCISSOR_DISCARD                               = 0x15,
	SC_BB_DISCARD                                    = 0x16,
	SC_SUPERTILE_COUNT                               = 0x17,
	SC_SUPERTILE_PER_PRIM_H0                         = 0x18,
	SC_SUPERTILE_PER_PRIM_H1                         = 0x19,
	SC_SUPERTILE_PER_PRIM_H2                         = 0x1a,
	SC_SUPERTILE_PER_PRIM_H3                         = 0x1b,
	SC_SUPERTILE_PER_PRIM_H4                         = 0x1c,
	SC_SUPERTILE_PER_PRIM_H5                         = 0x1d,
	SC_SUPERTILE_PER_PRIM_H6                         = 0x1e,
	SC_SUPERTILE_PER_PRIM_H7                         = 0x1f,
	SC_SUPERTILE_PER_PRIM_H8                         = 0x20,
	SC_SUPERTILE_PER_PRIM_H9                         = 0x21,
	SC_SUPERTILE_PER_PRIM_H10                        = 0x22,
	SC_SUPERTILE_PER_PRIM_H11                        = 0x23,
	SC_SUPERTILE_PER_PRIM_H12                        = 0x24,
	SC_SUPERTILE_PER_PRIM_H13                        = 0x25,
	SC_SUPERTILE_PER_PRIM_H14                        = 0x26,
	SC_SUPERTILE_PER_PRIM_H15                        = 0x27,
	SC_SUPERTILE_PER_PRIM_H16                        = 0x28,
	SC_TILE_PER_PRIM_H0                              = 0x29,
	SC_TILE_PER_PRIM_H1                              = 0x2a,
	SC_TILE_PER_PRIM_H2                              = 0x2b,
	SC_TILE_PER_PRIM_H3                              = 0x2c,
	SC_TILE_PER_PRIM_H4                              = 0x2d,
	SC_TILE_PER_PRIM_H5                              = 0x2e,
	SC_TILE_PER_PRIM_H6                              = 0x2f,
	SC_TILE_PER_PRIM_H7                              = 0x30,
	SC_TILE_PER_PRIM_H8                              = 0x31,
	SC_TILE_PER_PRIM_H9                              = 0x32,
	SC_TILE_PER_PRIM_H10                             = 0x33,
	SC_TILE_PER_PRIM_H11                             = 0x34,
	SC_TILE_PER_PRIM_H12                             = 0x35,
	SC_TILE_PER_PRIM_H13                             = 0x36,
	SC_TILE_PER_PRIM_H14                             = 0x37,
	SC_TILE_PER_PRIM_H15                             = 0x38,
	SC_TILE_PER_PRIM_H16                             = 0x39,
	SC_TILE_PER_SUPERTILE_H0                         = 0x3a,
	SC_TILE_PER_SUPERTILE_H1                         = 0x3b,
	SC_TILE_PER_SUPERTILE_H2                         = 0x3c,
	SC_TILE_PER_SUPERTILE_H3                         = 0x3d,
	SC_TILE_PER_SUPERTILE_H4                         = 0x3e,
	SC_TILE_PER_SUPERTILE_H5                         = 0x3f,
	SC_TILE_PER_SUPERTILE_H6                         = 0x40,
	SC_TILE_PER_SUPERTILE_H7                         = 0x41,
	SC_TILE_PER_SUPERTILE_H8                         = 0x42,
	SC_TILE_PER_SUPERTILE_H9                         = 0x43,
	SC_TILE_PER_SUPERTILE_H10                        = 0x44,
	SC_TILE_PER_SUPERTILE_H11                        = 0x45,
	SC_TILE_PER_SUPERTILE_H12                        = 0x46,
	SC_TILE_PER_SUPERTILE_H13                        = 0x47,
	SC_TILE_PER_SUPERTILE_H14                        = 0x48,
	SC_TILE_PER_SUPERTILE_H15                        = 0x49,
	SC_TILE_PER_SUPERTILE_H16                        = 0x4a,
	SC_TILE_PICKED_H1                                = 0x4b,
	SC_TILE_PICKED_H2                                = 0x4c,
	SC_TILE_PICKED_H3                                = 0x4d,
	SC_TILE_PICKED_H4                                = 0x4e,
	SC_QZ0_MULTI_GPU_TILE_DISCARD                    = 0x4f,
	SC_QZ1_MULTI_GPU_TILE_DISCARD                    = 0x50,
	SC_QZ2_MULTI_GPU_TILE_DISCARD                    = 0x51,
	SC_QZ3_MULTI_GPU_TILE_DISCARD                    = 0x52,
	SC_QZ0_TILE_COUNT                                = 0x53,
	SC_QZ1_TILE_COUNT                                = 0x54,
	SC_QZ2_TILE_COUNT                                = 0x55,
	SC_QZ3_TILE_COUNT                                = 0x56,
	SC_QZ0_TILE_COVERED_COUNT                        = 0x57,
	SC_QZ1_TILE_COVERED_COUNT                        = 0x58,
	SC_QZ2_TILE_COVERED_COUNT                        = 0x59,
	SC_QZ3_TILE_COVERED_COUNT                        = 0x5a,
	SC_QZ0_TILE_NOT_COVERED_COUNT                    = 0x5b,
	SC_QZ1_TILE_NOT_COVERED_COUNT                    = 0x5c,
	SC_QZ2_TILE_NOT_COVERED_COUNT                    = 0x5d,
	SC_QZ3_TILE_NOT_COVERED_COUNT                    = 0x5e,
	SC_QZ0_QUAD_PER_TILE_H0                          = 0x5f,
	SC_QZ0_QUAD_PER_TILE_H1                          = 0x60,
	SC_QZ0_QUAD_PER_TILE_H2                          = 0x61,
	SC_QZ0_QUAD_PER_TILE_H3                          = 0x62,
	SC_QZ0_QUAD_PER_TILE_H4                          = 0x63,
	SC_QZ0_QUAD_PER_TILE_H5                          = 0x64,
	SC_QZ0_QUAD_PER_TILE_H6                          = 0x65,
	SC_QZ0_QUAD_PER_TILE_H7                          = 0x66,
	SC_QZ0_QUAD_PER_TILE_H8                          = 0x67,
	SC_QZ0_QUAD_PER_TILE_H9                          = 0x68,
	SC_QZ0_QUAD_PER_TILE_H10                         = 0x69,
	SC_QZ0_QUAD_PER_TILE_H11                         = 0x6a,
	SC_QZ0_QUAD_PER_TILE_H12                         = 0x6b,
	SC_QZ0_QUAD_PER_TILE_H13                         = 0x6c,
	SC_QZ0_QUAD_PER_TILE_H14                         = 0x6d,
	SC_QZ0_QUAD_PER_TILE_H15                         = 0x6e,
	SC_QZ0_QUAD_PER_TILE_H16                         = 0x6f,
	SC_QZ1_QUAD_PER_TILE_H0                          = 0x70,
	SC_QZ1_QUAD_PER_TILE_H1                          = 0x71,
	SC_QZ1_QUAD_PER_TILE_H2                          = 0x72,
	SC_QZ1_QUAD_PER_TILE_H3                          = 0x73,
	SC_QZ1_QUAD_PER_TILE_H4                          = 0x74,
	SC_QZ1_QUAD_PER_TILE_H5                          = 0x75,
	SC_QZ1_QUAD_PER_TILE_H6                          = 0x76,
	SC_QZ1_QUAD_PER_TILE_H7                          = 0x77,
	SC_QZ1_QUAD_PER_TILE_H8                          = 0x78,
	SC_QZ1_QUAD_PER_TILE_H9                          = 0x79,
	SC_QZ1_QUAD_PER_TILE_H10                         = 0x7a,
	SC_QZ1_QUAD_PER_TILE_H11                         = 0x7b,
	SC_QZ1_QUAD_PER_TILE_H12                         = 0x7c,
	SC_QZ1_QUAD_PER_TILE_H13                         = 0x7d,
	SC_QZ1_QUAD_PER_TILE_H14                         = 0x7e,
	SC_QZ1_QUAD_PER_TILE_H15                         = 0x7f,
	SC_QZ1_QUAD_PER_TILE_H16                         = 0x80,
	SC_QZ2_QUAD_PER_TILE_H0                          = 0x81,
	SC_QZ2_QUAD_PER_TILE_H1                          = 0x82,
	SC_QZ2_QUAD_PER_TILE_H2                          = 0x83,
	SC_QZ2_QUAD_PER_TILE_H3                          = 0x84,
	SC_QZ2_QUAD_PER_TILE_H4                          = 0x85,
	SC_QZ2_QUAD_PER_TILE_H5                          = 0x86,
	SC_QZ2_QUAD_PER_TILE_H6                          = 0x87,
	SC_QZ2_QUAD_PER_TILE_H7                          = 0x88,
	SC_QZ2_QUAD_PER_TILE_H8                          = 0x89,
	SC_QZ2_QUAD_PER_TILE_H9                          = 0x8a,
	SC_QZ2_QUAD_PER_TILE_H10                         = 0x8b,
	SC_QZ2_QUAD_PER_TILE_H11                         = 0x8c,
	SC_QZ2_QUAD_PER_TILE_H12                         = 0x8d,
	SC_QZ2_QUAD_PER_TILE_H13                         = 0x8e,
	SC_QZ2_QUAD_PER_TILE_H14                         = 0x8f,
	SC_QZ2_QUAD_PER_TILE_H15                         = 0x90,
	SC_QZ2_QUAD_PER_TILE_H16                         = 0x91,
	SC_QZ3_QUAD_PER_TILE_H0                          = 0x92,
	SC_QZ3_QUAD_PER_TILE_H1                          = 0x93,
	SC_QZ3_QUAD_PER_TILE_H2                          = 0x94,
	SC_QZ3_QUAD_PER_TILE_H3                          = 0x95,
	SC_QZ3_QUAD_PER_TILE_H4                          = 0x96,
	SC_QZ3_QUAD_PER_TILE_H5                          = 0x97,
	SC_QZ3_QUAD_PER_TILE_H6                          = 0x98,
	SC_QZ3_QUAD_PER_TILE_H7                          = 0x99,
	SC_QZ3_QUAD_PER_TILE_H8                          = 0x9a,
	SC_QZ3_QUAD_PER_TILE_H9                          = 0x9b,
	SC_QZ3_QUAD_PER_TILE_H10                         = 0x9c,
	SC_QZ3_QUAD_PER_TILE_H11                         = 0x9d,
	SC_QZ3_QUAD_PER_TILE_H12                         = 0x9e,
	SC_QZ3_QUAD_PER_TILE_H13                         = 0x9f,
	SC_QZ3_QUAD_PER_TILE_H14                         = 0xa0,
	SC_QZ3_QUAD_PER_TILE_H15                         = 0xa1,
	SC_QZ3_QUAD_PER_TILE_H16                         = 0xa2,
	SC_QZ0_QUAD_COUNT                                = 0xa3,
	SC_QZ1_QUAD_COUNT                                = 0xa4,
	SC_QZ2_QUAD_COUNT                                = 0xa5,
	SC_QZ3_QUAD_COUNT                                = 0xa6,
	SC_P0_HIZ_TILE_COUNT                             = 0xa7,
	SC_P1_HIZ_TILE_COUNT                             = 0xa8,
	SC_P2_HIZ_TILE_COUNT                             = 0xa9,
	SC_P3_HIZ_TILE_COUNT                             = 0xaa,
	SC_P0_HIZ_QUAD_PER_TILE_H0                       = 0xab,
	SC_P0_HIZ_QUAD_PER_TILE_H1                       = 0xac,
	SC_P0_HIZ_QUAD_PER_TILE_H2                       = 0xad,
	SC_P0_HIZ_QUAD_PER_TILE_H3                       = 0xae,
	SC_P0_HIZ_QUAD_PER_TILE_H4                       = 0xaf,
	SC_P0_HIZ_QUAD_PER_TILE_H5                       = 0xb0,
	SC_P0_HIZ_QUAD_PER_TILE_H6                       = 0xb1,
	SC_P0_HIZ_QUAD_PER_TILE_H7                       = 0xb2,
	SC_P0_HIZ_QUAD_PER_TILE_H8                       = 0xb3,
	SC_P0_HIZ_QUAD_PER_TILE_H9                       = 0xb4,
	SC_P0_HIZ_QUAD_PER_TILE_H10                      = 0xb5,
	SC_P0_HIZ_QUAD_PER_TILE_H11                      = 0xb6,
	SC_P0_HIZ_QUAD_PER_TILE_H12                      = 0xb7,
	SC_P0_HIZ_QUAD_PER_TILE_H13                      = 0xb8,
	SC_P0_HIZ_QUAD_PER_TILE_H14                      = 0xb9,
	SC_P0_HIZ_QUAD_PER_TILE_H15                      = 0xba,
	SC_P0_HIZ_QUAD_PER_TILE_H16                      = 0xbb,
	SC_P1_HIZ_QUAD_PER_TILE_H0                       = 0xbc,
	SC_P1_HIZ_QUAD_PER_TILE_H1                       = 0xbd,
	SC_P1_HIZ_QUAD_PER_TILE_H2                       = 0xbe,
	SC_P1_HIZ_QUAD_PER_TILE_H3                       = 0xbf,
	SC_P1_HIZ_QUAD_PER_TILE_H4                       = 0xc0,
	SC_P1_HIZ_QUAD_PER_TILE_H5                       = 0xc1,
	SC_P1_HIZ_QUAD_PER_TILE_H6                       = 0xc2,
	SC_P1_HIZ_QUAD_PER_TILE_H7                       = 0xc3,
	SC_P1_HIZ_QUAD_PER_TILE_H8                       = 0xc4,
	SC_P1_HIZ_QUAD_PER_TILE_H9                       = 0xc5,
	SC_P1_HIZ_QUAD_PER_TILE_H10                      = 0xc6,
	SC_P1_HIZ_QUAD_PER_TILE_H11                      = 0xc7,
	SC_P1_HIZ_QUAD_PER_TILE_H12                      = 0xc8,
	SC_P1_HIZ_QUAD_PER_TILE_H13                      = 0xc9,
	SC_P1_HIZ_QUAD_PER_TILE_H14                      = 0xca,
	SC_P1_HIZ_QUAD_PER_TILE_H15                      = 0xcb,
	SC_P1_HIZ_QUAD_PER_TILE_H16                      = 0xcc,
	SC_P2_HIZ_QUAD_PER_TILE_H0                       = 0xcd,
	SC_P2_HIZ_QUAD_PER_TILE_H1                       = 0xce,
	SC_P2_HIZ_QUAD_PER_TILE_H2                       = 0xcf,
	SC_P2_HIZ_QUAD_PER_TILE_H3                       = 0xd0,
	SC_P2_HIZ_QUAD_PER_TILE_H4                       = 0xd1,
	SC_P2_HIZ_QUAD_PER_TILE_H5                       = 0xd2,
	SC_P2_HIZ_QUAD_PER_TILE_H6                       = 0xd3,
	SC_P2_HIZ_QUAD_PER_TILE_H7                       = 0xd4,
	SC_P2_HIZ_QUAD_PER_TILE_H8                       = 0xd5,
	SC_P2_HIZ_QUAD_PER_TILE_H9                       = 0xd6,
	SC_P2_HIZ_QUAD_PER_TILE_H10                      = 0xd7,
	SC_P2_HIZ_QUAD_PER_TILE_H11                      = 0xd8,
	SC_P2_HIZ_QUAD_PER_TILE_H12                      = 0xd9,
	SC_P2_HIZ_QUAD_PER_TILE_H13                      = 0xda,
	SC_P2_HIZ_QUAD_PER_TILE_H14                      = 0xdb,
	SC_P2_HIZ_QUAD_PER_TILE_H15                      = 0xdc,
	SC_P2_HIZ_QUAD_PER_TILE_H16                      = 0xdd,
	SC_P3_HIZ_QUAD_PER_TILE_H0                       = 0xde,
	SC_P3_HIZ_QUAD_PER_TILE_H1                       = 0xdf,
	SC_P3_HIZ_QUAD_PER_TILE_H2                       = 0xe0,
	SC_P3_HIZ_QUAD_PER_TILE_H3                       = 0xe1,
	SC_P3_HIZ_QUAD_PER_TILE_H4                       = 0xe2,
	SC_P3_HIZ_QUAD_PER_TILE_H5                       = 0xe3,
	SC_P3_HIZ_QUAD_PER_TILE_H6                       = 0xe4,
	SC_P3_HIZ_QUAD_PER_TILE_H7                       = 0xe5,
	SC_P3_HIZ_QUAD_PER_TILE_H8                       = 0xe6,
	SC_P3_HIZ_QUAD_PER_TILE_H9                       = 0xe7,
	SC_P3_HIZ_QUAD_PER_TILE_H10                      = 0xe8,
	SC_P3_HIZ_QUAD_PER_TILE_H11                      = 0xe9,
	SC_P3_HIZ_QUAD_PER_TILE_H12                      = 0xea,
	SC_P3_HIZ_QUAD_PER_TILE_H13                      = 0xeb,
	SC_P3_HIZ_QUAD_PER_TILE_H14                      = 0xec,
	SC_P3_HIZ_QUAD_PER_TILE_H15                      = 0xed,
	SC_P3_HIZ_QUAD_PER_TILE_H16                      = 0xee,
	SC_P0_HIZ_QUAD_COUNT                             = 0xef,
	SC_P1_HIZ_QUAD_COUNT                             = 0xf0,
	SC_P2_HIZ_QUAD_COUNT                             = 0xf1,
	SC_P3_HIZ_QUAD_COUNT                             = 0xf2,
	SC_P0_DETAIL_QUAD_COUNT                          = 0xf3,
	SC_P1_DETAIL_QUAD_COUNT                          = 0xf4,
	SC_P2_DETAIL_QUAD_COUNT                          = 0xf5,
	SC_P3_DETAIL_QUAD_COUNT                          = 0xf6,
	SC_P0_DETAIL_QUAD_WITH_1_PIX                     = 0xf7,
	SC_P0_DETAIL_QUAD_WITH_2_PIX                     = 0xf8,
	SC_P0_DETAIL_QUAD_WITH_3_PIX                     = 0xf9,
	SC_P0_DETAIL_QUAD_WITH_4_PIX                     = 0xfa,
	SC_P1_DETAIL_QUAD_WITH_1_PIX                     = 0xfb,
	SC_P1_DETAIL_QUAD_WITH_2_PIX                     = 0xfc,
	SC_P1_DETAIL_QUAD_WITH_3_PIX                     = 0xfd,
	SC_P1_DETAIL_QUAD_WITH_4_PIX                     = 0xfe,
	SC_P2_DETAIL_QUAD_WITH_1_PIX                     = 0xff,
	SC_P2_DETAIL_QUAD_WITH_2_PIX                     = 0x100,
	SC_P2_DETAIL_QUAD_WITH_3_PIX                     = 0x101,
	SC_P2_DETAIL_QUAD_WITH_4_PIX                     = 0x102,
	SC_P3_DETAIL_QUAD_WITH_1_PIX                     = 0x103,
	SC_P3_DETAIL_QUAD_WITH_2_PIX                     = 0x104,
	SC_P3_DETAIL_QUAD_WITH_3_PIX                     = 0x105,
	SC_P3_DETAIL_QUAD_WITH_4_PIX                     = 0x106,
	SC_EARLYZ_QUAD_COUNT                             = 0x107,
	SC_EARLYZ_QUAD_WITH_1_PIX                        = 0x108,
	SC_EARLYZ_QUAD_WITH_2_PIX                        = 0x109,
	SC_EARLYZ_QUAD_WITH_3_PIX                        = 0x10a,
	SC_EARLYZ_QUAD_WITH_4_PIX                        = 0x10b,
	SC_PKR_QUAD_PER_ROW_H1                           = 0x10c,
	SC_PKR_QUAD_PER_ROW_H2                           = 0x10d,
	SC_PKR_QUAD_PER_ROW_H3                           = 0x10e,
	SC_PKR_QUAD_PER_ROW_H4                           = 0x10f,
	SC_PKR_END_OF_VECTOR                             = 0x110,
	SC_PKR_CONTROL_XFER                              = 0x111,
	SC_PKR_DBHANG_FORCE_EOV                          = 0x112,
	SC_REG_SCLK_BUSY                                 = 0x113,
	SC_GRP0_DYN_SCLK_BUSY                            = 0x114,
	SC_GRP1_DYN_SCLK_BUSY                            = 0x115,
	SC_GRP2_DYN_SCLK_BUSY                            = 0x116,
	SC_GRP3_DYN_SCLK_BUSY                            = 0x117,
	SC_GRP4_DYN_SCLK_BUSY                            = 0x118,
	SC_PA0_SC_DATA_FIFO_RD                           = 0x119,
	SC_PA0_SC_DATA_FIFO_WE                           = 0x11a,
	SC_PA1_SC_DATA_FIFO_RD                           = 0x11b,
	SC_PA1_SC_DATA_FIFO_WE                           = 0x11c,
	SC_PS_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES           = 0x11d,
	SC_PS_ARB_XFC_ONLY_PRIM_CYCLES                   = 0x11e,
	SC_PS_ARB_XFC_ONLY_ONE_INC_PER_PRIM              = 0x11f,
	SC_PS_ARB_STALLED_FROM_BELOW                     = 0x120,
	SC_PS_ARB_STARVED_FROM_ABOVE                     = 0x121,
	SC_PS_ARB_SC_BUSY                                = 0x122,
	SC_PS_ARB_PA_SC_BUSY                             = 0x123,
	SC_PA2_SC_DATA_FIFO_RD                           = 0x124,
	SC_PA2_SC_DATA_FIFO_WE                           = 0x125,
	SC_PA3_SC_DATA_FIFO_RD                           = 0x126,
	SC_PA3_SC_DATA_FIFO_WE                           = 0x127,
	SC_PA_SC_DEALLOC_0_0_WE                          = 0x128,
	SC_PA_SC_DEALLOC_0_1_WE                          = 0x129,
	SC_PA_SC_DEALLOC_1_0_WE                          = 0x12a,
	SC_PA_SC_DEALLOC_1_1_WE                          = 0x12b,
	SC_PA_SC_DEALLOC_2_0_WE                          = 0x12c,
	SC_PA_SC_DEALLOC_2_1_WE                          = 0x12d,
	SC_PA_SC_DEALLOC_3_0_WE                          = 0x12e,
	SC_PA_SC_DEALLOC_3_1_WE                          = 0x12f,
	SC_PA0_SC_EOP_WE                                 = 0x130,
	SC_PA0_SC_EOPG_WE                                = 0x131,
	SC_PA0_SC_EVENT_WE                               = 0x132,
	SC_PA1_SC_EOP_WE                                 = 0x133,
	SC_PA1_SC_EOPG_WE                                = 0x134,
	SC_PA1_SC_EVENT_WE                               = 0x135,
	SC_PA2_SC_EOP_WE                                 = 0x136,
	SC_PA2_SC_EOPG_WE                                = 0x137,
	SC_PA2_SC_EVENT_WE                               = 0x138,
	SC_PA3_SC_EOP_WE                                 = 0x139,
	SC_PA3_SC_EOPG_WE                                = 0x13a,
	SC_PA3_SC_EVENT_WE                               = 0x13b,
	SC_PS_ARB_OOO_THRESHOLD_SWITCH_TO_DESIRED_FIFO   = 0x13c,
	SC_PS_ARB_OOO_FIFO_EMPTY_SWITCH                  = 0x13d,
	SC_PS_ARB_NULL_PRIM_BUBBLE_POP                   = 0x13e,
	SC_PS_ARB_EOP_POP_SYNC_POP                       = 0x13f,
	SC_PS_ARB_EVENT_SYNC_POP                         = 0x140,
	SC_SC_PS_ENG_MULTICYCLE_BUBBLE                   = 0x141,
	SC_PA0_SC_FPOV_WE                                = 0x142,
	SC_PA1_SC_FPOV_WE                                = 0x143,
	SC_PA2_SC_FPOV_WE                                = 0x144,
	SC_PA3_SC_FPOV_WE                                = 0x145,
	SC_PA0_SC_LPOV_WE                                = 0x146,
	SC_PA1_SC_LPOV_WE                                = 0x147,
	SC_PA2_SC_LPOV_WE                                = 0x148,
	SC_PA3_SC_LPOV_WE                                = 0x149,
	SC_SC_SPI_DEALLOC_0_0                            = 0x14a,
	SC_SC_SPI_DEALLOC_0_1                            = 0x14b,
	SC_SC_SPI_DEALLOC_0_2                            = 0x14c,
	SC_SC_SPI_DEALLOC_1_0                            = 0x14d,
	SC_SC_SPI_DEALLOC_1_1                            = 0x14e,
	SC_SC_SPI_DEALLOC_1_2                            = 0x14f,
	SC_SC_SPI_DEALLOC_2_0                            = 0x150,
	SC_SC_SPI_DEALLOC_2_1                            = 0x151,
	SC_SC_SPI_DEALLOC_2_2                            = 0x152,
	SC_SC_SPI_DEALLOC_3_0                            = 0x153,
	SC_SC_SPI_DEALLOC_3_1                            = 0x154,
	SC_SC_SPI_DEALLOC_3_2                            = 0x155,
	SC_SC_SPI_FPOV_0                                 = 0x156,
	SC_SC_SPI_FPOV_1                                 = 0x157,
	SC_SC_SPI_FPOV_2                                 = 0x158,
	SC_SC_SPI_FPOV_3                                 = 0x159,
	SC_SC_SPI_EVENT                                  = 0x15a,
	SC_PS_TS_EVENT_FIFO_PUSH                         = 0x15b,
	SC_PS_TS_EVENT_FIFO_POP                          = 0x15c,
	SC_PS_CTX_DONE_FIFO_PUSH                         = 0x15d,
	SC_PS_CTX_DONE_FIFO_POP                          = 0x15e,
	SC_MULTICYCLE_BUBBLE_FREEZE                      = 0x15f,
	SC_EOP_SYNC_WINDOW                               = 0x160,
	SC_PA0_SC_NULL_WE                                = 0x161,
	SC_PA0_SC_NULL_DEALLOC_WE                        = 0x162,
	SC_PA0_SC_DATA_FIFO_EOPG_RD                      = 0x163,
	SC_PA0_SC_DATA_FIFO_EOP_RD                       = 0x164,
	SC_PA0_SC_DEALLOC_0_RD                           = 0x165,
	SC_PA0_SC_DEALLOC_1_RD                           = 0x166,
	SC_PA1_SC_DATA_FIFO_EOPG_RD                      = 0x167,
	SC_PA1_SC_DATA_FIFO_EOP_RD                       = 0x168,
	SC_PA1_SC_DEALLOC_0_RD                           = 0x169,
	SC_PA1_SC_DEALLOC_1_RD                           = 0x16a,
	SC_PA1_SC_NULL_WE                                = 0x16b,
	SC_PA1_SC_NULL_DEALLOC_WE                        = 0x16c,
	SC_PA2_SC_DATA_FIFO_EOPG_RD                      = 0x16d,
	SC_PA2_SC_DATA_FIFO_EOP_RD                       = 0x16e,
	SC_PA2_SC_DEALLOC_0_RD                           = 0x16f,
	SC_PA2_SC_DEALLOC_1_RD                           = 0x170,
	SC_PA2_SC_NULL_WE                                = 0x171,
	SC_PA2_SC_NULL_DEALLOC_WE                        = 0x172,
	SC_PA3_SC_DATA_FIFO_EOPG_RD                      = 0x173,
	SC_PA3_SC_DATA_FIFO_EOP_RD                       = 0x174,
	SC_PA3_SC_DEALLOC_0_RD                           = 0x175,
	SC_PA3_SC_DEALLOC_1_RD                           = 0x176,
	SC_PA3_SC_NULL_WE                                = 0x177,
	SC_PA3_SC_NULL_DEALLOC_WE                        = 0x178,
	SC_PS_PA0_SC_FIFO_EMPTY                          = 0x179,
	SC_PS_PA0_SC_FIFO_FULL                           = 0x17a,
	SC_PA0_PS_DATA_SEND                              = 0x17b,
	SC_PS_PA1_SC_FIFO_EMPTY                          = 0x17c,
	SC_PS_PA1_SC_FIFO_FULL                           = 0x17d,
	SC_PA1_PS_DATA_SEND                              = 0x17e,
	SC_PS_PA2_SC_FIFO_EMPTY                          = 0x17f,
	SC_PS_PA2_SC_FIFO_FULL                           = 0x180,
	SC_PA2_PS_DATA_SEND                              = 0x181,
	SC_PS_PA3_SC_FIFO_EMPTY                          = 0x182,
	SC_PS_PA3_SC_FIFO_FULL                           = 0x183,
	SC_PA3_PS_DATA_SEND                              = 0x184,
	SC_BUSY_PROCESSING_MULTICYCLE_PRIM               = 0x185,
	SC_BUSY_CNT_NOT_ZERO                             = 0x186,
	SC_BM_BUSY                                       = 0x187,
	SC_BACKEND_BUSY                                  = 0x188,
	SC_SCF_SCB_INTERFACE_BUSY                        = 0x189,
	SC_SCB_BUSY                                      = 0x18a,
} SC_PERFCNT_SEL;
typedef enum SePairXsel {
	RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE           = 0x0,
	RASTER_CONFIG_SE_PAIR_XSEL_16_WIDE_TILE          = 0x1,
	RASTER_CONFIG_SE_PAIR_XSEL_32_WIDE_TILE          = 0x2,
	RASTER_CONFIG_SE_PAIR_XSEL_64_WIDE_TILE          = 0x3,
} SePairXsel;
typedef enum SePairYsel {
	RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE           = 0x0,
	RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE          = 0x1,
	RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE          = 0x2,
	RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE          = 0x3,
} SePairYsel;
typedef enum SePairMap {
	RASTER_CONFIG_SE_PAIR_MAP_0                      = 0x0,
	RASTER_CONFIG_SE_PAIR_MAP_1                      = 0x1,
	RASTER_CONFIG_SE_PAIR_MAP_2                      = 0x2,
	RASTER_CONFIG_SE_PAIR_MAP_3                      = 0x3,
} SePairMap;
typedef enum SeXsel {
	RASTER_CONFIG_SE_XSEL_8_WIDE_TILE                = 0x0,
	RASTER_CONFIG_SE_XSEL_16_WIDE_TILE               = 0x1,
	RASTER_CONFIG_SE_XSEL_32_WIDE_TILE               = 0x2,
	RASTER_CONFIG_SE_XSEL_64_WIDE_TILE               = 0x3,
} SeXsel;
typedef enum SeYsel {
	RASTER_CONFIG_SE_YSEL_8_WIDE_TILE                = 0x0,
	RASTER_CONFIG_SE_YSEL_16_WIDE_TILE               = 0x1,
	RASTER_CONFIG_SE_YSEL_32_WIDE_TILE               = 0x2,
	RASTER_CONFIG_SE_YSEL_64_WIDE_TILE               = 0x3,
} SeYsel;
typedef enum SeMap {
	RASTER_CONFIG_SE_MAP_0                           = 0x0,
	RASTER_CONFIG_SE_MAP_1                           = 0x1,
	RASTER_CONFIG_SE_MAP_2                           = 0x2,
	RASTER_CONFIG_SE_MAP_3                           = 0x3,
} SeMap;
typedef enum ScXsel {
	RASTER_CONFIG_SC_XSEL_8_WIDE_TILE                = 0x0,
	RASTER_CONFIG_SC_XSEL_16_WIDE_TILE               = 0x1,
	RASTER_CONFIG_SC_XSEL_32_WIDE_TILE               = 0x2,
	RASTER_CONFIG_SC_XSEL_64_WIDE_TILE               = 0x3,
} ScXsel;
typedef enum ScYsel {
	RASTER_CONFIG_SC_YSEL_8_WIDE_TILE                = 0x0,
	RASTER_CONFIG_SC_YSEL_16_WIDE_TILE               = 0x1,
	RASTER_CONFIG_SC_YSEL_32_WIDE_TILE               = 0x2,
	RASTER_CONFIG_SC_YSEL_64_WIDE_TILE               = 0x3,
} ScYsel;
typedef enum ScMap {
	RASTER_CONFIG_SC_MAP_0                           = 0x0,
	RASTER_CONFIG_SC_MAP_1                           = 0x1,
	RASTER_CONFIG_SC_MAP_2                           = 0x2,
	RASTER_CONFIG_SC_MAP_3                           = 0x3,
} ScMap;
typedef enum PkrXsel2 {
	RASTER_CONFIG_PKR_XSEL2_0                        = 0x0,
	RASTER_CONFIG_PKR_XSEL2_1                        = 0x1,
	RASTER_CONFIG_PKR_XSEL2_2                        = 0x2,
	RASTER_CONFIG_PKR_XSEL2_3                        = 0x3,
} PkrXsel2;
typedef enum PkrXsel {
	RASTER_CONFIG_PKR_XSEL_0                         = 0x0,
	RASTER_CONFIG_PKR_XSEL_1                         = 0x1,
	RASTER_CONFIG_PKR_XSEL_2                         = 0x2,
	RASTER_CONFIG_PKR_XSEL_3                         = 0x3,
} PkrXsel;
typedef enum PkrYsel {
	RASTER_CONFIG_PKR_YSEL_0                         = 0x0,
	RASTER_CONFIG_PKR_YSEL_1                         = 0x1,
	RASTER_CONFIG_PKR_YSEL_2                         = 0x2,
	RASTER_CONFIG_PKR_YSEL_3                         = 0x3,
} PkrYsel;
typedef enum PkrMap {
	RASTER_CONFIG_PKR_MAP_0                          = 0x0,
	RASTER_CONFIG_PKR_MAP_1                          = 0x1,
	RASTER_CONFIG_PKR_MAP_2                          = 0x2,
	RASTER_CONFIG_PKR_MAP_3                          = 0x3,
} PkrMap;
typedef enum RbXsel {
	RASTER_CONFIG_RB_XSEL_0                          = 0x0,
	RASTER_CONFIG_RB_XSEL_1                          = 0x1,
} RbXsel;
typedef enum RbYsel {
	RASTER_CONFIG_RB_YSEL_0                          = 0x0,
	RASTER_CONFIG_RB_YSEL_1                          = 0x1,
} RbYsel;
typedef enum RbXsel2 {
	RASTER_CONFIG_RB_XSEL2_0                         = 0x0,
	RASTER_CONFIG_RB_XSEL2_1                         = 0x1,
	RASTER_CONFIG_RB_XSEL2_2                         = 0x2,
	RASTER_CONFIG_RB_XSEL2_3                         = 0x3,
} RbXsel2;
typedef enum RbMap {
	RASTER_CONFIG_RB_MAP_0                           = 0x0,
	RASTER_CONFIG_RB_MAP_1                           = 0x1,
	RASTER_CONFIG_RB_MAP_2                           = 0x2,
	RASTER_CONFIG_RB_MAP_3                           = 0x3,
} RbMap;
typedef enum CSDATA_TYPE {
	CSDATA_TYPE_TG                                   = 0x0,
	CSDATA_TYPE_STATE                                = 0x1,
	CSDATA_TYPE_EVENT                                = 0x2,
	CSDATA_TYPE_PRIVATE                              = 0x3,
} CSDATA_TYPE;
#define CSDATA_TYPE_WIDTH                         0x2
#define CSDATA_ADDR_WIDTH                         0x7
#define CSDATA_DATA_WIDTH                         0x20
typedef enum SPI_SAMPLE_CNTL {
	CENTROIDS_ONLY                                   = 0x0,
	CENTERS_ONLY                                     = 0x1,
	CENTROIDS_AND_CENTERS                            = 0x2,
	UNDEF                                            = 0x3,
} SPI_SAMPLE_CNTL;
typedef enum SPI_FOG_MODE {
	SPI_FOG_NONE                                     = 0x0,
	SPI_FOG_EXP                                      = 0x1,
	SPI_FOG_EXP2                                     = 0x2,
	SPI_FOG_LINEAR                                   = 0x3,
} SPI_FOG_MODE;
typedef enum SPI_PNT_SPRITE_OVERRIDE {
	SPI_PNT_SPRITE_SEL_0                             = 0x0,
	SPI_PNT_SPRITE_SEL_1                             = 0x1,
	SPI_PNT_SPRITE_SEL_S                             = 0x2,
	SPI_PNT_SPRITE_SEL_T                             = 0x3,
	SPI_PNT_SPRITE_SEL_NONE                          = 0x4,
} SPI_PNT_SPRITE_OVERRIDE;
typedef enum SPI_PERFCNT_SEL {
	SPI_PERF_VS_WINDOW_VALID                         = 0x0,
	SPI_PERF_VS_BUSY                                 = 0x1,
	SPI_PERF_VS_FIRST_WAVE                           = 0x2,
	SPI_PERF_VS_LAST_WAVE                            = 0x3,
	SPI_PERF_VS_LSHS_DEALLOC                         = 0x4,
	SPI_PERF_VS_PC_STALL                             = 0x5,
	SPI_PERF_VS_POS0_STALL                           = 0x6,
	SPI_PERF_VS_POS1_STALL                           = 0x7,
	SPI_PERF_VS_CRAWLER_STALL                        = 0x8,
	SPI_PERF_VS_EVENT_WAVE                           = 0x9,
	SPI_PERF_VS_WAVE                                 = 0xa,
	SPI_PERF_VS_PERS_UPD_FULL0                       = 0xb,
	SPI_PERF_VS_PERS_UPD_FULL1                       = 0xc,
	SPI_PERF_VS_LATE_ALLOC_FULL                      = 0xd,
	SPI_PERF_VS_FIRST_SUBGRP                         = 0xe,
	SPI_PERF_VS_LAST_SUBGRP                          = 0xf,
	SPI_PERF_GS_WINDOW_VALID                         = 0x10,
	SPI_PERF_GS_BUSY                                 = 0x11,
	SPI_PERF_GS_CRAWLER_STALL                        = 0x12,
	SPI_PERF_GS_EVENT_WAVE                           = 0x13,
	SPI_PERF_GS_WAVE                                 = 0x14,
	SPI_PERF_GS_PERS_UPD_FULL0                       = 0x15,
	SPI_PERF_GS_PERS_UPD_FULL1                       = 0x16,
	SPI_PERF_GS_FIRST_SUBGRP                         = 0x17,
	SPI_PERF_GS_LAST_SUBGRP                          = 0x18,
	SPI_PERF_ES_WINDOW_VALID                         = 0x19,
	SPI_PERF_ES_BUSY                                 = 0x1a,
	SPI_PERF_ES_CRAWLER_STALL                        = 0x1b,
	SPI_PERF_ES_FIRST_WAVE                           = 0x1c,
	SPI_PERF_ES_LAST_WAVE                            = 0x1d,
	SPI_PERF_ES_LSHS_DEALLOC                         = 0x1e,
	SPI_PERF_ES_EVENT_WAVE                           = 0x1f,
	SPI_PERF_ES_WAVE                                 = 0x20,
	SPI_PERF_ES_PERS_UPD_FULL0                       = 0x21,
	SPI_PERF_ES_PERS_UPD_FULL1                       = 0x22,
	SPI_PERF_ES_FIRST_SUBGRP                         = 0x23,
	SPI_PERF_ES_LAST_SUBGRP                          = 0x24,
	SPI_PERF_HS_WINDOW_VALID                         = 0x25,
	SPI_PERF_HS_BUSY                                 = 0x26,
	SPI_PERF_HS_CRAWLER_STALL                        = 0x27,
	SPI_PERF_HS_FIRST_WAVE                           = 0x28,
	SPI_PERF_HS_LAST_WAVE                            = 0x29,
	SPI_PERF_HS_LSHS_DEALLOC                         = 0x2a,
	SPI_PERF_HS_EVENT_WAVE                           = 0x2b,
	SPI_PERF_HS_WAVE                                 = 0x2c,
	SPI_PERF_HS_PERS_UPD_FULL0                       = 0x2d,
	SPI_PERF_HS_PERS_UPD_FULL1                       = 0x2e,
	SPI_PERF_LS_WINDOW_VALID                         = 0x2f,
	SPI_PERF_LS_BUSY                                 = 0x30,
	SPI_PERF_LS_CRAWLER_STALL                        = 0x31,
	SPI_PERF_LS_FIRST_WAVE                           = 0x32,
	SPI_PERF_LS_LAST_WAVE                            = 0x33,
	SPI_PERF_OFFCHIP_LDS_STALL_LS                    = 0x34,
	SPI_PERF_LS_EVENT_WAVE                           = 0x35,
	SPI_PERF_LS_WAVE                                 = 0x36,
	SPI_PERF_LS_PERS_UPD_FULL0                       = 0x37,
	SPI_PERF_LS_PERS_UPD_FULL1                       = 0x38,
	SPI_PERF_CSG_WINDOW_VALID                        = 0x39,
	SPI_PERF_CSG_BUSY                                = 0x3a,
	SPI_PERF_CSG_NUM_THREADGROUPS                    = 0x3b,
	SPI_PERF_CSG_CRAWLER_STALL                       = 0x3c,
	SPI_PERF_CSG_EVENT_WAVE                          = 0x3d,
	SPI_PERF_CSG_WAVE                                = 0x3e,
	SPI_PERF_CSN_WINDOW_VALID                        = 0x3f,
	SPI_PERF_CSN_BUSY                                = 0x40,
	SPI_PERF_CSN_NUM_THREADGROUPS                    = 0x41,
	SPI_PERF_CSN_CRAWLER_STALL                       = 0x42,
	SPI_PERF_CSN_EVENT_WAVE                          = 0x43,
	SPI_PERF_CSN_WAVE                                = 0x44,
	SPI_PERF_PS_CTL_WINDOW_VALID                     = 0x45,
	SPI_PERF_PS_CTL_BUSY                             = 0x46,
	SPI_PERF_PS_CTL_ACTIVE                           = 0x47,
	SPI_PERF_PS_CTL_DEALLOC_BIN0                     = 0x48,
	SPI_PERF_PS_CTL_FPOS_BIN1_STALL                  = 0x49,
	SPI_PERF_PS_CTL_EVENT_WAVE                       = 0x4a,
	SPI_PERF_PS_CTL_WAVE                             = 0x4b,
	SPI_PERF_PS_CTL_OPT_WAVE                         = 0x4c,
	SPI_PERF_PS_CTL_PASS_BIN0                        = 0x4d,
	SPI_PERF_PS_CTL_PASS_BIN1                        = 0x4e,
	SPI_PERF_PS_CTL_FPOS_BIN2                        = 0x4f,
	SPI_PERF_PS_CTL_PRIM_BIN0                        = 0x50,
	SPI_PERF_PS_CTL_PRIM_BIN1                        = 0x51,
	SPI_PERF_PS_CTL_CNF_BIN2                         = 0x52,
	SPI_PERF_PS_CTL_CNF_BIN3                         = 0x53,
	SPI_PERF_PS_CTL_CRAWLER_STALL                    = 0x54,
	SPI_PERF_PS_CTL_LDS_RES_FULL                     = 0x55,
	SPI_PERF_PS_PERS_UPD_FULL0                       = 0x56,
	SPI_PERF_PS_PERS_UPD_FULL1                       = 0x57,
	SPI_PERF_PIX_ALLOC_PEND_CNT                      = 0x58,
	SPI_PERF_PIX_ALLOC_SCB_STALL                     = 0x59,
	SPI_PERF_PIX_ALLOC_DB0_STALL                     = 0x5a,
	SPI_PERF_PIX_ALLOC_DB1_STALL                     = 0x5b,
	SPI_PERF_PIX_ALLOC_DB2_STALL                     = 0x5c,
	SPI_PERF_PIX_ALLOC_DB3_STALL                     = 0x5d,
	SPI_PERF_LDS0_PC_VALID                           = 0x5e,
	SPI_PERF_LDS1_PC_VALID                           = 0x5f,
	SPI_PERF_RA_PIPE_REQ_BIN2                        = 0x60,
	SPI_PERF_RA_TASK_REQ_BIN3                        = 0x61,
	SPI_PERF_RA_WR_CTL_FULL                          = 0x62,
	SPI_PERF_RA_REQ_NO_ALLOC                         = 0x63,
	SPI_PERF_RA_REQ_NO_ALLOC_PS                      = 0x64,
	SPI_PERF_RA_REQ_NO_ALLOC_VS                      = 0x65,
	SPI_PERF_RA_REQ_NO_ALLOC_GS                      = 0x66,
	SPI_PERF_RA_REQ_NO_ALLOC_ES                      = 0x67,
	SPI_PERF_RA_REQ_NO_ALLOC_HS                      = 0x68,
	SPI_PERF_RA_REQ_NO_ALLOC_LS                      = 0x69,
	SPI_PERF_RA_REQ_NO_ALLOC_CSG                     = 0x6a,
	SPI_PERF_RA_REQ_NO_ALLOC_CSN                     = 0x6b,
	SPI_PERF_RA_RES_STALL_PS                         = 0x6c,
	SPI_PERF_RA_RES_STALL_VS                         = 0x6d,
	SPI_PERF_RA_RES_STALL_GS                         = 0x6e,
	SPI_PERF_RA_RES_STALL_ES                         = 0x6f,
	SPI_PERF_RA_RES_STALL_HS                         = 0x70,
	SPI_PERF_RA_RES_STALL_LS                         = 0x71,
	SPI_PERF_RA_RES_STALL_CSG                        = 0x72,
	SPI_PERF_RA_RES_STALL_CSN                        = 0x73,
	SPI_PERF_RA_TMP_STALL_PS                         = 0x74,
	SPI_PERF_RA_TMP_STALL_VS                         = 0x75,
	SPI_PERF_RA_TMP_STALL_GS                         = 0x76,
	SPI_PERF_RA_TMP_STALL_ES                         = 0x77,
	SPI_PERF_RA_TMP_STALL_HS                         = 0x78,
	SPI_PERF_RA_TMP_STALL_LS                         = 0x79,
	SPI_PERF_RA_TMP_STALL_CSG                        = 0x7a,
	SPI_PERF_RA_TMP_STALL_CSN                        = 0x7b,
	SPI_PERF_RA_WAVE_SIMD_FULL_PS                    = 0x7c,
	SPI_PERF_RA_WAVE_SIMD_FULL_VS                    = 0x7d,
	SPI_PERF_RA_WAVE_SIMD_FULL_GS                    = 0x7e,
	SPI_PERF_RA_WAVE_SIMD_FULL_ES                    = 0x7f,
	SPI_PERF_RA_WAVE_SIMD_FULL_HS                    = 0x80,
	SPI_PERF_RA_WAVE_SIMD_FULL_LS                    = 0x81,
	SPI_PERF_RA_WAVE_SIMD_FULL_CSG                   = 0x82,
	SPI_PERF_RA_WAVE_SIMD_FULL_CSN                   = 0x83,
	SPI_PERF_RA_VGPR_SIMD_FULL_PS                    = 0x84,
	SPI_PERF_RA_VGPR_SIMD_FULL_VS                    = 0x85,
	SPI_PERF_RA_VGPR_SIMD_FULL_GS                    = 0x86,
	SPI_PERF_RA_VGPR_SIMD_FULL_ES                    = 0x87,
	SPI_PERF_RA_VGPR_SIMD_FULL_HS                    = 0x88,
	SPI_PERF_RA_VGPR_SIMD_FULL_LS                    = 0x89,
	SPI_PERF_RA_VGPR_SIMD_FULL_CSG                   = 0x8a,
	SPI_PERF_RA_VGPR_SIMD_FULL_CSN                   = 0x8b,
	SPI_PERF_RA_SGPR_SIMD_FULL_PS                    = 0x8c,
	SPI_PERF_RA_SGPR_SIMD_FULL_VS                    = 0x8d,
	SPI_PERF_RA_SGPR_SIMD_FULL_GS                    = 0x8e,
	SPI_PERF_RA_SGPR_SIMD_FULL_ES                    = 0x8f,
	SPI_PERF_RA_SGPR_SIMD_FULL_HS                    = 0x90,
	SPI_PERF_RA_SGPR_SIMD_FULL_LS                    = 0x91,
	SPI_PERF_RA_SGPR_SIMD_FULL_CSG                   = 0x92,
	SPI_PERF_RA_SGPR_SIMD_FULL_CSN                   = 0x93,
	SPI_PERF_RA_LDS_CU_FULL_PS                       = 0x94,
	SPI_PERF_RA_LDS_CU_FULL_LS                       = 0x95,
	SPI_PERF_RA_LDS_CU_FULL_ES                       = 0x96,
	SPI_PERF_RA_LDS_CU_FULL_CSG                      = 0x97,
	SPI_PERF_RA_LDS_CU_FULL_CSN                      = 0x98,
	SPI_PERF_RA_BAR_CU_FULL_HS                       = 0x99,
	SPI_PERF_RA_BAR_CU_FULL_CSG                      = 0x9a,
	SPI_PERF_RA_BAR_CU_FULL_CSN                      = 0x9b,
	SPI_PERF_RA_BULKY_CU_FULL_CSG                    = 0x9c,
	SPI_PERF_RA_BULKY_CU_FULL_CSN                    = 0x9d,
	SPI_PERF_RA_TGLIM_CU_FULL_CSG                    = 0x9e,
	SPI_PERF_RA_TGLIM_CU_FULL_CSN                    = 0x9f,
	SPI_PERF_RA_WVLIM_STALL_PS                       = 0xa0,
	SPI_PERF_RA_WVLIM_STALL_VS                       = 0xa1,
	SPI_PERF_RA_WVLIM_STALL_GS                       = 0xa2,
	SPI_PERF_RA_WVLIM_STALL_ES                       = 0xa3,
	SPI_PERF_RA_WVLIM_STALL_HS                       = 0xa4,
	SPI_PERF_RA_WVLIM_STALL_LS                       = 0xa5,
	SPI_PERF_RA_WVLIM_STALL_CSG                      = 0xa6,
	SPI_PERF_RA_WVLIM_STALL_CSN                      = 0xa7,
	SPI_PERF_RA_PS_LOCK                              = 0xa8,
	SPI_PERF_RA_VS_LOCK                              = 0xa9,
	SPI_PERF_RA_GS_LOCK                              = 0xaa,
	SPI_PERF_RA_ES_LOCK                              = 0xab,
	SPI_PERF_RA_HS_LOCK                              = 0xac,
	SPI_PERF_RA_LS_LOCK                              = 0xad,
	SPI_PERF_RA_CSG_LOCK                             = 0xae,
	SPI_PERF_RA_CSN_LOCK                             = 0xaf,
	SPI_PERF_RA_RSV_UPD                              = 0xb0,
	SPI_PERF_EXP_ARB_COL_CNT                         = 0xb1,
	SPI_PERF_EXP_ARB_PAR_CNT                         = 0xb2,
	SPI_PERF_EXP_ARB_POS_CNT                         = 0xb3,
	SPI_PERF_EXP_ARB_GDS_CNT                         = 0xb4,
	SPI_PERF_CLKGATE_BUSY_STALL                      = 0xb5,
	SPI_PERF_CLKGATE_ACTIVE_STALL                    = 0xb6,
	SPI_PERF_CLKGATE_ALL_CLOCKS_ON                   = 0xb7,
	SPI_PERF_CLKGATE_CGTT_DYN_ON                     = 0xb8,
	SPI_PERF_CLKGATE_CGTT_REG_ON                     = 0xb9,
} SPI_PERFCNT_SEL;
typedef enum SPI_SHADER_FORMAT {
	SPI_SHADER_NONE                                  = 0x0,
	SPI_SHADER_1COMP                                 = 0x1,
	SPI_SHADER_2COMP                                 = 0x2,
	SPI_SHADER_4COMPRESS                             = 0x3,
	SPI_SHADER_4COMP                                 = 0x4,
} SPI_SHADER_FORMAT;
typedef enum SPI_SHADER_EX_FORMAT {
	SPI_SHADER_ZERO                                  = 0x0,
	SPI_SHADER_32_R                                  = 0x1,
	SPI_SHADER_32_GR                                 = 0x2,
	SPI_SHADER_32_AR                                 = 0x3,
	SPI_SHADER_FP16_ABGR                             = 0x4,
	SPI_SHADER_UNORM16_ABGR                          = 0x5,
	SPI_SHADER_SNORM16_ABGR                          = 0x6,
	SPI_SHADER_UINT16_ABGR                           = 0x7,
	SPI_SHADER_SINT16_ABGR                           = 0x8,
	SPI_SHADER_32_ABGR                               = 0x9,
} SPI_SHADER_EX_FORMAT;
typedef enum CLKGATE_SM_MODE {
	ON_SEQ                                           = 0x0,
	OFF_SEQ                                          = 0x1,
	PROG_SEQ                                         = 0x2,
	READ_SEQ                                         = 0x3,
	SM_MODE_RESERVED                                 = 0x4,
} CLKGATE_SM_MODE;
typedef enum CLKGATE_BASE_MODE {
	MULT_8                                           = 0x0,
	MULT_16                                          = 0x1,
} CLKGATE_BASE_MODE;
typedef enum SQ_TEX_CLAMP {
	SQ_TEX_WRAP                                      = 0x0,
	SQ_TEX_MIRROR                                    = 0x1,
	SQ_TEX_CLAMP_LAST_TEXEL                          = 0x2,
	SQ_TEX_MIRROR_ONCE_LAST_TEXEL                    = 0x3,
	SQ_TEX_CLAMP_HALF_BORDER                         = 0x4,
	SQ_TEX_MIRROR_ONCE_HALF_BORDER                   = 0x5,
	SQ_TEX_CLAMP_BORDER                              = 0x6,
	SQ_TEX_MIRROR_ONCE_BORDER                        = 0x7,
} SQ_TEX_CLAMP;
typedef enum SQ_TEX_XY_FILTER {
	SQ_TEX_XY_FILTER_POINT                           = 0x0,
	SQ_TEX_XY_FILTER_BILINEAR                        = 0x1,
	SQ_TEX_XY_FILTER_ANISO_POINT                     = 0x2,
	SQ_TEX_XY_FILTER_ANISO_BILINEAR                  = 0x3,
} SQ_TEX_XY_FILTER;
typedef enum SQ_TEX_Z_FILTER {
	SQ_TEX_Z_FILTER_NONE                             = 0x0,
	SQ_TEX_Z_FILTER_POINT                            = 0x1,
	SQ_TEX_Z_FILTER_LINEAR                           = 0x2,
} SQ_TEX_Z_FILTER;
typedef enum SQ_TEX_MIP_FILTER {
	SQ_TEX_MIP_FILTER_NONE                           = 0x0,
	SQ_TEX_MIP_FILTER_POINT                          = 0x1,
	SQ_TEX_MIP_FILTER_LINEAR                         = 0x2,
} SQ_TEX_MIP_FILTER;
typedef enum SQ_TEX_ANISO_RATIO {
	SQ_TEX_ANISO_RATIO_1                             = 0x0,
	SQ_TEX_ANISO_RATIO_2                             = 0x1,
	SQ_TEX_ANISO_RATIO_4                             = 0x2,
	SQ_TEX_ANISO_RATIO_8                             = 0x3,
	SQ_TEX_ANISO_RATIO_16                            = 0x4,
} SQ_TEX_ANISO_RATIO;
typedef enum SQ_TEX_DEPTH_COMPARE {
	SQ_TEX_DEPTH_COMPARE_NEVER                       = 0x0,
	SQ_TEX_DEPTH_COMPARE_LESS                        = 0x1,
	SQ_TEX_DEPTH_COMPARE_EQUAL                       = 0x2,
	SQ_TEX_DEPTH_COMPARE_LESSEQUAL                   = 0x3,
	SQ_TEX_DEPTH_COMPARE_GREATER                     = 0x4,
	SQ_TEX_DEPTH_COMPARE_NOTEQUAL                    = 0x5,
	SQ_TEX_DEPTH_COMPARE_GREATEREQUAL                = 0x6,
	SQ_TEX_DEPTH_COMPARE_ALWAYS                      = 0x7,
} SQ_TEX_DEPTH_COMPARE;
typedef enum SQ_TEX_BORDER_COLOR {
	SQ_TEX_BORDER_COLOR_TRANS_BLACK                  = 0x0,
	SQ_TEX_BORDER_COLOR_OPAQUE_BLACK                 = 0x1,
	SQ_TEX_BORDER_COLOR_OPAQUE_WHITE                 = 0x2,
	SQ_TEX_BORDER_COLOR_REGISTER                     = 0x3,
} SQ_TEX_BORDER_COLOR;
typedef enum SQ_RSRC_BUF_TYPE {
	SQ_RSRC_BUF                                      = 0x0,
	SQ_RSRC_BUF_RSVD_1                               = 0x1,
	SQ_RSRC_BUF_RSVD_2                               = 0x2,
	SQ_RSRC_BUF_RSVD_3                               = 0x3,
} SQ_RSRC_BUF_TYPE;
typedef enum SQ_RSRC_IMG_TYPE {
	SQ_RSRC_IMG_RSVD_0                               = 0x0,
	SQ_RSRC_IMG_RSVD_1                               = 0x1,
	SQ_RSRC_IMG_RSVD_2                               = 0x2,
	SQ_RSRC_IMG_RSVD_3                               = 0x3,
	SQ_RSRC_IMG_RSVD_4                               = 0x4,
	SQ_RSRC_IMG_RSVD_5                               = 0x5,
	SQ_RSRC_IMG_RSVD_6                               = 0x6,
	SQ_RSRC_IMG_RSVD_7                               = 0x7,
	SQ_RSRC_IMG_1D                                   = 0x8,
	SQ_RSRC_IMG_2D                                   = 0x9,
	SQ_RSRC_IMG_3D                                   = 0xa,
	SQ_RSRC_IMG_CUBE                                 = 0xb,
	SQ_RSRC_IMG_1D_ARRAY                             = 0xc,
	SQ_RSRC_IMG_2D_ARRAY                             = 0xd,
	SQ_RSRC_IMG_2D_MSAA                              = 0xe,
	SQ_RSRC_IMG_2D_MSAA_ARRAY                        = 0xf,
} SQ_RSRC_IMG_TYPE;
typedef enum SQ_RSRC_FLAT_TYPE {
	SQ_RSRC_FLAT_RSVD_0                              = 0x0,
	SQ_RSRC_FLAT                                     = 0x1,
	SQ_RSRC_FLAT_RSVD_2                              = 0x2,
	SQ_RSRC_FLAT_RSVD_3                              = 0x3,
} SQ_RSRC_FLAT_TYPE;
typedef enum SQ_IMG_FILTER_TYPE {
	SQ_IMG_FILTER_MODE_BLEND                         = 0x0,
	SQ_IMG_FILTER_MODE_MIN                           = 0x1,
	SQ_IMG_FILTER_MODE_MAX                           = 0x2,
} SQ_IMG_FILTER_TYPE;
typedef enum SQ_SEL_XYZW01 {
	SQ_SEL_0                                         = 0x0,
	SQ_SEL_1                                         = 0x1,
	SQ_SEL_RESERVED_0                                = 0x2,
	SQ_SEL_RESERVED_1                                = 0x3,
	SQ_SEL_X                                         = 0x4,
	SQ_SEL_Y                                         = 0x5,
	SQ_SEL_Z                                         = 0x6,
	SQ_SEL_W                                         = 0x7,
} SQ_SEL_XYZW01;
typedef enum SQ_WAVE_TYPE {
	SQ_WAVE_TYPE_PS                                  = 0x0,
	SQ_WAVE_TYPE_VS                                  = 0x1,
	SQ_WAVE_TYPE_GS                                  = 0x2,
	SQ_WAVE_TYPE_ES                                  = 0x3,
	SQ_WAVE_TYPE_HS                                  = 0x4,
	SQ_WAVE_TYPE_LS                                  = 0x5,
	SQ_WAVE_TYPE_CS                                  = 0x6,
	SQ_WAVE_TYPE_PS1                                 = 0x7,
} SQ_WAVE_TYPE;
typedef enum SQ_THREAD_TRACE_TOKEN_TYPE {
	SQ_THREAD_TRACE_TOKEN_MISC                       = 0x0,
	SQ_THREAD_TRACE_TOKEN_TIMESTAMP                  = 0x1,
	SQ_THREAD_TRACE_TOKEN_REG                        = 0x2,
	SQ_THREAD_TRACE_TOKEN_WAVE_START                 = 0x3,
	SQ_THREAD_TRACE_TOKEN_WAVE_ALLOC                 = 0x4,
	SQ_THREAD_TRACE_TOKEN_REG_CSPRIV                 = 0x5,
	SQ_THREAD_TRACE_TOKEN_WAVE_END                   = 0x6,
	SQ_THREAD_TRACE_TOKEN_EVENT                      = 0x7,
	SQ_THREAD_TRACE_TOKEN_EVENT_CS                   = 0x8,
	SQ_THREAD_TRACE_TOKEN_EVENT_GFX1                 = 0x9,
	SQ_THREAD_TRACE_TOKEN_INST                       = 0xa,
	SQ_THREAD_TRACE_TOKEN_INST_PC                    = 0xb,
	SQ_THREAD_TRACE_TOKEN_INST_USERDATA              = 0xc,
	SQ_THREAD_TRACE_TOKEN_ISSUE                      = 0xd,
	SQ_THREAD_TRACE_TOKEN_PERF                       = 0xe,
	SQ_THREAD_TRACE_TOKEN_REG_CS                     = 0xf,
} SQ_THREAD_TRACE_TOKEN_TYPE;
typedef enum SQ_THREAD_TRACE_MISC_TOKEN_TYPE {
	SQ_THREAD_TRACE_MISC_TOKEN_TIME                  = 0x0,
	SQ_THREAD_TRACE_MISC_TOKEN_TIME_RESET            = 0x1,
	SQ_THREAD_TRACE_MISC_TOKEN_PACKET_LOST           = 0x2,
	SQ_THREAD_TRACE_MISC_TOKEN_SURF_SYNC             = 0x3,
	SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_BEGIN    = 0x4,
	SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_END      = 0x5,
} SQ_THREAD_TRACE_MISC_TOKEN_TYPE;
typedef enum SQ_THREAD_TRACE_INST_TYPE {
	SQ_THREAD_TRACE_INST_TYPE_SMEM                   = 0x0,
	SQ_THREAD_TRACE_INST_TYPE_SALU                   = 0x1,
	SQ_THREAD_TRACE_INST_TYPE_VMEM_RD                = 0x2,
	SQ_THREAD_TRACE_INST_TYPE_VMEM_WR                = 0x3,
	SQ_THREAD_TRACE_INST_TYPE_FLAT_WR                = 0x4,
	SQ_THREAD_TRACE_INST_TYPE_VALU                   = 0x5,
	SQ_THREAD_TRACE_INST_TYPE_LDS                    = 0x6,
	SQ_THREAD_TRACE_INST_TYPE_PC                     = 0x7,
	SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GDS             = 0x8,
	SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GFX             = 0x9,
	SQ_THREAD_TRACE_INST_TYPE_EXPGNT_PAR_COL         = 0xa,
	SQ_THREAD_TRACE_INST_TYPE_EXPGNT_POS_GDS         = 0xb,
	SQ_THREAD_TRACE_INST_TYPE_JUMP                   = 0xc,
	SQ_THREAD_TRACE_INST_TYPE_NEXT                   = 0xd,
	SQ_THREAD_TRACE_INST_TYPE_FLAT_RD                = 0xe,
	SQ_THREAD_TRACE_INST_TYPE_OTHER_MSG              = 0xf,
} SQ_THREAD_TRACE_INST_TYPE;
typedef enum SQ_THREAD_TRACE_REG_TYPE {
	SQ_THREAD_TRACE_REG_TYPE_EVENT                   = 0x0,
	SQ_THREAD_TRACE_REG_TYPE_DRAW                    = 0x1,
	SQ_THREAD_TRACE_REG_TYPE_DISPATCH                = 0x2,
	SQ_THREAD_TRACE_REG_TYPE_USERDATA                = 0x3,
	SQ_THREAD_TRACE_REG_TYPE_MARKER                  = 0x4,
	SQ_THREAD_TRACE_REG_TYPE_GFXDEC                  = 0x5,
	SQ_THREAD_TRACE_REG_TYPE_SHDEC                   = 0x6,
	SQ_THREAD_TRACE_REG_TYPE_OTHER                   = 0x7,
} SQ_THREAD_TRACE_REG_TYPE;
typedef enum SQ_THREAD_TRACE_REG_OP {
	SQ_THREAD_TRACE_REG_OP_READ                      = 0x0,
	SQ_THREAD_TRACE_REG_OP_WRITE                     = 0x1,
} SQ_THREAD_TRACE_REG_OP;
typedef enum SQ_THREAD_TRACE_MODE_SEL {
	SQ_THREAD_TRACE_MODE_OFF                         = 0x0,
	SQ_THREAD_TRACE_MODE_ON                          = 0x1,
	SQ_THREAD_TRACE_MODE_RANDOM                      = 0x2,
} SQ_THREAD_TRACE_MODE_SEL;
typedef enum SQ_THREAD_TRACE_CAPTURE_MODE {
	SQ_THREAD_TRACE_CAPTURE_MODE_ALL                 = 0x0,
	SQ_THREAD_TRACE_CAPTURE_MODE_SELECT              = 0x1,
	SQ_THREAD_TRACE_CAPTURE_MODE_SELECT_DETAIL       = 0x2,
} SQ_THREAD_TRACE_CAPTURE_MODE;
typedef enum SQ_THREAD_TRACE_VM_ID_MASK {
	SQ_THREAD_TRACE_VM_ID_MASK_SINGLE                = 0x0,
	SQ_THREAD_TRACE_VM_ID_MASK_ALL                   = 0x1,
	SQ_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL         = 0x2,
} SQ_THREAD_TRACE_VM_ID_MASK;
typedef enum SQ_THREAD_TRACE_WAVE_MASK {
	SQ_THREAD_TRACE_WAVE_MASK_NONE                   = 0x0,
	SQ_THREAD_TRACE_WAVE_MASK_ALL                    = 0x1,
	SQ_THREAD_TRACE_WAVE_MASK_1_2                    = 0x2,
	SQ_THREAD_TRACE_WAVE_MASK_1_4                    = 0x3,
	SQ_THREAD_TRACE_WAVE_MASK_1_8                    = 0x4,
	SQ_THREAD_TRACE_WAVE_MASK_1_16                   = 0x5,
	SQ_THREAD_TRACE_WAVE_MASK_1_32                   = 0x6,
	SQ_THREAD_TRACE_WAVE_MASK_1_64                   = 0x7,
} SQ_THREAD_TRACE_WAVE_MASK;
typedef enum SQ_THREAD_TRACE_ISSUE {
	SQ_THREAD_TRACE_ISSUE_NULL                       = 0x0,
	SQ_THREAD_TRACE_ISSUE_STALL                      = 0x1,
	SQ_THREAD_TRACE_ISSUE_INST                       = 0x2,
	SQ_THREAD_TRACE_ISSUE_IMMED                      = 0x3,
} SQ_THREAD_TRACE_ISSUE;
typedef enum SQ_THREAD_TRACE_ISSUE_MASK {
	SQ_THREAD_TRACE_ISSUE_MASK_ALL                   = 0x0,
	SQ_THREAD_TRACE_ISSUE_MASK_STALLED               = 0x1,
	SQ_THREAD_TRACE_ISSUE_MASK_STALLED_AND_IMMED     = 0x2,
	SQ_THREAD_TRACE_ISSUE_MASK_IMMED                 = 0x3,
} SQ_THREAD_TRACE_ISSUE_MASK;
typedef enum SQ_PERF_SEL {
	SQ_PERF_SEL_NONE                                 = 0x0,
	SQ_PERF_SEL_ACCUM_PREV                           = 0x1,
	SQ_PERF_SEL_CYCLES                               = 0x2,
	SQ_PERF_SEL_BUSY_CYCLES                          = 0x3,
	SQ_PERF_SEL_WAVES                                = 0x4,
	SQ_PERF_SEL_LEVEL_WAVES                          = 0x5,
	SQ_PERF_SEL_WAVES_EQ_64                          = 0x6,
	SQ_PERF_SEL_WAVES_LT_64                          = 0x7,
	SQ_PERF_SEL_WAVES_LT_48                          = 0x8,
	SQ_PERF_SEL_WAVES_LT_32                          = 0x9,
	SQ_PERF_SEL_WAVES_LT_16                          = 0xa,
	SQ_PERF_SEL_WAVES_CU                             = 0xb,
	SQ_PERF_SEL_LEVEL_WAVES_CU                       = 0xc,
	SQ_PERF_SEL_BUSY_CU_CYCLES                       = 0xd,
	SQ_PERF_SEL_ITEMS                                = 0xe,
	SQ_PERF_SEL_QUADS                                = 0xf,
	SQ_PERF_SEL_EVENTS                               = 0x10,
	SQ_PERF_SEL_SURF_SYNCS                           = 0x11,
	SQ_PERF_SEL_TTRACE_REQS                          = 0x12,
	SQ_PERF_SEL_TTRACE_INFLIGHT_REQS                 = 0x13,
	SQ_PERF_SEL_TTRACE_STALL                         = 0x14,
	SQ_PERF_SEL_MSG_CNTR                             = 0x15,
	SQ_PERF_SEL_MSG_PERF                             = 0x16,
	SQ_PERF_SEL_MSG_GSCNT                            = 0x17,
	SQ_PERF_SEL_MSG_INTERRUPT                        = 0x18,
	SQ_PERF_SEL_INSTS                                = 0x19,
	SQ_PERF_SEL_INSTS_VALU                           = 0x1a,
	SQ_PERF_SEL_INSTS_VMEM_WR                        = 0x1b,
	SQ_PERF_SEL_INSTS_VMEM_RD                        = 0x1c,
	SQ_PERF_SEL_INSTS_VMEM                           = 0x1d,
	SQ_PERF_SEL_INSTS_SALU                           = 0x1e,
	SQ_PERF_SEL_INSTS_SMEM                           = 0x1f,
	SQ_PERF_SEL_INSTS_FLAT                           = 0x20,
	SQ_PERF_SEL_INSTS_FLAT_LDS_ONLY                  = 0x21,
	SQ_PERF_SEL_INSTS_LDS                            = 0x22,
	SQ_PERF_SEL_INSTS_GDS                            = 0x23,
	SQ_PERF_SEL_INSTS_EXP                            = 0x24,
	SQ_PERF_SEL_INSTS_EXP_GDS                        = 0x25,
	SQ_PERF_SEL_INSTS_BRANCH                         = 0x26,
	SQ_PERF_SEL_INSTS_SENDMSG                        = 0x27,
	SQ_PERF_SEL_INSTS_VSKIPPED                       = 0x28,
	SQ_PERF_SEL_INST_LEVEL_VMEM                      = 0x29,
	SQ_PERF_SEL_INST_LEVEL_SMEM                      = 0x2a,
	SQ_PERF_SEL_INST_LEVEL_LDS                       = 0x2b,
	SQ_PERF_SEL_INST_LEVEL_GDS                       = 0x2c,
	SQ_PERF_SEL_INST_LEVEL_EXP                       = 0x2d,
	SQ_PERF_SEL_WAVE_CYCLES                          = 0x2e,
	SQ_PERF_SEL_WAVE_READY                           = 0x2f,
	SQ_PERF_SEL_WAIT_CNT_VM                          = 0x30,
	SQ_PERF_SEL_WAIT_CNT_LGKM                        = 0x31,
	SQ_PERF_SEL_WAIT_CNT_EXP                         = 0x32,
	SQ_PERF_SEL_WAIT_CNT_ANY                         = 0x33,
	SQ_PERF_SEL_WAIT_BARRIER                         = 0x34,
	SQ_PERF_SEL_WAIT_EXP_ALLOC                       = 0x35,
	SQ_PERF_SEL_WAIT_SLEEP                           = 0x36,
	SQ_PERF_SEL_WAIT_OTHER                           = 0x37,
	SQ_PERF_SEL_WAIT_ANY                             = 0x38,
	SQ_PERF_SEL_WAIT_TTRACE                          = 0x39,
	SQ_PERF_SEL_WAIT_IFETCH                          = 0x3a,
	SQ_PERF_SEL_WAIT_INST_VMEM                       = 0x3b,
	SQ_PERF_SEL_WAIT_INST_SCA                        = 0x3c,
	SQ_PERF_SEL_WAIT_INST_LDS                        = 0x3d,
	SQ_PERF_SEL_WAIT_INST_VALU                       = 0x3e,
	SQ_PERF_SEL_WAIT_INST_EXP_GDS                    = 0x3f,
	SQ_PERF_SEL_WAIT_INST_MISC                       = 0x40,
	SQ_PERF_SEL_WAIT_INST_FLAT                       = 0x41,
	SQ_PERF_SEL_ACTIVE_INST_ANY                      = 0x42,
	SQ_PERF_SEL_ACTIVE_INST_VMEM                     = 0x43,
	SQ_PERF_SEL_ACTIVE_INST_LDS                      = 0x44,
	SQ_PERF_SEL_ACTIVE_INST_VALU                     = 0x45,
	SQ_PERF_SEL_ACTIVE_INST_SCA                      = 0x46,
	SQ_PERF_SEL_ACTIVE_INST_EXP_GDS                  = 0x47,
	SQ_PERF_SEL_ACTIVE_INST_MISC                     = 0x48,
	SQ_PERF_SEL_ACTIVE_INST_FLAT                     = 0x49,
	SQ_PERF_SEL_INST_CYCLES_VMEM_WR                  = 0x4a,
	SQ_PERF_SEL_INST_CYCLES_VMEM_RD                  = 0x4b,
	SQ_PERF_SEL_INST_CYCLES_VMEM_ADDR                = 0x4c,
	SQ_PERF_SEL_INST_CYCLES_VMEM_DATA                = 0x4d,
	SQ_PERF_SEL_INST_CYCLES_VMEM_CMD                 = 0x4e,
	SQ_PERF_SEL_INST_CYCLES_VMEM                     = 0x4f,
	SQ_PERF_SEL_INST_CYCLES_LDS                      = 0x50,
	SQ_PERF_SEL_INST_CYCLES_VALU                     = 0x51,
	SQ_PERF_SEL_INST_CYCLES_EXP                      = 0x52,
	SQ_PERF_SEL_INST_CYCLES_GDS                      = 0x53,
	SQ_PERF_SEL_INST_CYCLES_SCA                      = 0x54,
	SQ_PERF_SEL_INST_CYCLES_SMEM                     = 0x55,
	SQ_PERF_SEL_INST_CYCLES_SALU                     = 0x56,
	SQ_PERF_SEL_INST_CYCLES_EXP_GDS                  = 0x57,
	SQ_PERF_SEL_INST_CYCLES_MISC                     = 0x58,
	SQ_PERF_SEL_THREAD_CYCLES_VALU                   = 0x59,
	SQ_PERF_SEL_THREAD_CYCLES_VALU_MAX               = 0x5a,
	SQ_PERF_SEL_IFETCH                               = 0x5b,
	SQ_PERF_SEL_IFETCH_LEVEL                         = 0x5c,
	SQ_PERF_SEL_CBRANCH_FORK                         = 0x5d,
	SQ_PERF_SEL_CBRANCH_FORK_SPLIT                   = 0x5e,
	SQ_PERF_SEL_VALU_LDS_DIRECT_RD                   = 0x5f,
	SQ_PERF_SEL_VALU_LDS_INTERP_OP                   = 0x60,
	SQ_PERF_SEL_LDS_BANK_CONFLICT                    = 0x61,
	SQ_PERF_SEL_LDS_ADDR_CONFLICT                    = 0x62,
	SQ_PERF_SEL_LDS_UNALIGNED_STALL                  = 0x63,
	SQ_PERF_SEL_LDS_MEM_VIOLATIONS                   = 0x64,
	SQ_PERF_SEL_LDS_ATOMIC_RETURN                    = 0x65,
	SQ_PERF_SEL_LDS_IDX_ACTIVE                       = 0x66,
	SQ_PERF_SEL_VALU_DEP_STALL                       = 0x67,
	SQ_PERF_SEL_VALU_STARVE                          = 0x68,
	SQ_PERF_SEL_EXP_REQ_FIFO_FULL                    = 0x69,
	SQ_PERF_SEL_LDS_BACK2BACK_STALL                  = 0x6a,
	SQ_PERF_SEL_LDS_DATA_FIFO_FULL                   = 0x6b,
	SQ_PERF_SEL_LDS_CMD_FIFO_FULL                    = 0x6c,
	SQ_PERF_SEL_VMEM_BACK2BACK_STALL                 = 0x6d,
	SQ_PERF_SEL_VMEM_TA_ADDR_FIFO_FULL               = 0x6e,
	SQ_PERF_SEL_VMEM_TA_CMD_FIFO_FULL                = 0x6f,
	SQ_PERF_SEL_VMEM_EX_DATA_REG_BUSY                = 0x70,
	SQ_PERF_SEL_VMEM_WR_BACK2BACK_STALL              = 0x71,
	SQ_PERF_SEL_VMEM_WR_TA_DATA_FIFO_FULL            = 0x72,
	SQ_PERF_SEL_VALU_SRC_C_CONFLICT                  = 0x73,
	SQ_PERF_SEL_VMEM_RD_SRC_CD_CONFLICT              = 0x74,
	SQ_PERF_SEL_VMEM_WR_SRC_CD_CONFLICT              = 0x75,
	SQ_PERF_SEL_FLAT_SRC_CD_CONFLICT                 = 0x76,
	SQ_PERF_SEL_LDS_SRC_CD_CONFLICT                  = 0x77,
	SQ_PERF_SEL_SRC_CD_BUSY                          = 0x78,
	SQ_PERF_SEL_PT_POWER_STALL                       = 0x79,
	SQ_PERF_SEL_USER0                                = 0x7a,
	SQ_PERF_SEL_USER1                                = 0x7b,
	SQ_PERF_SEL_USER2                                = 0x7c,
	SQ_PERF_SEL_USER3                                = 0x7d,
	SQ_PERF_SEL_USER4                                = 0x7e,
	SQ_PERF_SEL_USER5                                = 0x7f,
	SQ_PERF_SEL_USER6                                = 0x80,
	SQ_PERF_SEL_USER7                                = 0x81,
	SQ_PERF_SEL_USER8                                = 0x82,
	SQ_PERF_SEL_USER9                                = 0x83,
	SQ_PERF_SEL_USER10                               = 0x84,
	SQ_PERF_SEL_USER11                               = 0x85,
	SQ_PERF_SEL_USER12                               = 0x86,
	SQ_PERF_SEL_USER13                               = 0x87,
	SQ_PERF_SEL_USER14                               = 0x88,
	SQ_PERF_SEL_USER15                               = 0x89,
	SQ_PERF_SEL_USER_LEVEL0                          = 0x8a,
	SQ_PERF_SEL_USER_LEVEL1                          = 0x8b,
	SQ_PERF_SEL_USER_LEVEL2                          = 0x8c,
	SQ_PERF_SEL_USER_LEVEL3                          = 0x8d,
	SQ_PERF_SEL_USER_LEVEL4                          = 0x8e,
	SQ_PERF_SEL_USER_LEVEL5                          = 0x8f,
	SQ_PERF_SEL_USER_LEVEL6                          = 0x90,
	SQ_PERF_SEL_USER_LEVEL7                          = 0x91,
	SQ_PERF_SEL_USER_LEVEL8                          = 0x92,
	SQ_PERF_SEL_USER_LEVEL9                          = 0x93,
	SQ_PERF_SEL_USER_LEVEL10                         = 0x94,
	SQ_PERF_SEL_USER_LEVEL11                         = 0x95,
	SQ_PERF_SEL_USER_LEVEL12                         = 0x96,
	SQ_PERF_SEL_USER_LEVEL13                         = 0x97,
	SQ_PERF_SEL_USER_LEVEL14                         = 0x98,
	SQ_PERF_SEL_USER_LEVEL15                         = 0x99,
	SQ_PERF_SEL_POWER_VALU                           = 0x9a,
	SQ_PERF_SEL_POWER_VALU0                          = 0x9b,
	SQ_PERF_SEL_POWER_VALU1                          = 0x9c,
	SQ_PERF_SEL_POWER_VALU2                          = 0x9d,
	SQ_PERF_SEL_POWER_GPR_RD                         = 0x9e,
	SQ_PERF_SEL_POWER_GPR_WR                         = 0x9f,
	SQ_PERF_SEL_POWER_LDS_BUSY                       = 0xa0,
	SQ_PERF_SEL_POWER_ALU_BUSY                       = 0xa1,
	SQ_PERF_SEL_POWER_TEX_BUSY                       = 0xa2,
	SQ_PERF_SEL_ACCUM_PREV_HIRES                     = 0xa3,
	SQ_PERF_SEL_DUMMY_LAST                           = 0xa7,
	SQC_PERF_SEL_ICACHE_INPUT_VALID_READY            = 0xa8,
	SQC_PERF_SEL_ICACHE_INPUT_VALID_READYB           = 0xa9,
	SQC_PERF_SEL_ICACHE_INPUT_VALIDB                 = 0xaa,
	SQC_PERF_SEL_DCACHE_INPUT_VALID_READY            = 0xab,
	SQC_PERF_SEL_DCACHE_INPUT_VALID_READYB           = 0xac,
	SQC_PERF_SEL_DCACHE_INPUT_VALIDB                 = 0xad,
	SQC_PERF_SEL_TC_REQ                              = 0xae,
	SQC_PERF_SEL_TC_INST_REQ                         = 0xaf,
	SQC_PERF_SEL_TC_DATA_REQ                         = 0xb0,
	SQC_PERF_SEL_TC_STALL                            = 0xb1,
	SQC_PERF_SEL_TC_STARVE                           = 0xb2,
	SQC_PERF_SEL_ICACHE_BUSY_CYCLES                  = 0xb3,
	SQC_PERF_SEL_ICACHE_REQ                          = 0xb4,
	SQC_PERF_SEL_ICACHE_HITS                         = 0xb5,
	SQC_PERF_SEL_ICACHE_MISSES                       = 0xb6,
	SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE             = 0xb7,
	SQC_PERF_SEL_ICACHE_UNCACHED                     = 0xb8,
	SQC_PERF_SEL_ICACHE_VOLATILE                     = 0xb9,
	SQC_PERF_SEL_ICACHE_INVAL_INST                   = 0xba,
	SQC_PERF_SEL_ICACHE_INVAL_ASYNC                  = 0xbb,
	SQC_PERF_SEL_ICACHE_INVAL_VOLATILE_INST          = 0xbc,
	SQC_PERF_SEL_ICACHE_INVAL_VOLATILE_ASYNC         = 0xbd,
	SQC_PERF_SEL_ICACHE_INPUT_STALL_ARB_NO_GRANT     = 0xbe,
	SQC_PERF_SEL_ICACHE_INPUT_STALL_BANK_READYB      = 0xbf,
	SQC_PERF_SEL_ICACHE_CACHE_STALLED                = 0xc0,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_NONZERO = 0xc1,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_MAX     = 0xc2,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_VOLATILE_MISMATCH= 0xc3,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_UNCACHED_HIT     = 0xc4,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT           = 0xc5,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_MISS_FIFO = 0xc6,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_HIT_FIFO  = 0xc7,
	SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_TC_IF     = 0xc8,
	SQC_PERF_SEL_ICACHE_STALL_OUTXBAR_ARB_NO_GRANT   = 0xc9,
	SQC_PERF_SEL_DCACHE_BUSY_CYCLES                  = 0xca,
	SQC_PERF_SEL_DCACHE_REQ                          = 0xcb,
	SQC_PERF_SEL_DCACHE_HITS                         = 0xcc,
	SQC_PERF_SEL_DCACHE_MISSES                       = 0xcd,
	SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE             = 0xce,
	SQC_PERF_SEL_DCACHE_UNCACHED                     = 0xcf,
	SQC_PERF_SEL_DCACHE_VOLATILE                     = 0xd0,
	SQC_PERF_SEL_DCACHE_INVAL_INST                   = 0xd1,
	SQC_PERF_SEL_DCACHE_INVAL_ASYNC                  = 0xd2,
	SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_INST          = 0xd3,
	SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_ASYNC         = 0xd4,
	SQC_PERF_SEL_DCACHE_INPUT_STALL_ARB_NO_GRANT     = 0xd5,
	SQC_PERF_SEL_DCACHE_INPUT_STALL_BANK_READYB      = 0xd6,
	SQC_PERF_SEL_DCACHE_CACHE_STALLED                = 0xd7,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_INFLIGHT_NONZERO = 0xd8,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_INFLIGHT_MAX     = 0xd9,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_VOLATILE_MISMATCH= 0xda,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_UNCACHED_HIT     = 0xdb,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT           = 0xdc,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_MISS_FIFO = 0xdd,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_HIT_FIFO  = 0xde,
	SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_TC_IF     = 0xdf,
	SQC_PERF_SEL_DCACHE_STALL_OUTXBAR_ARB_NO_GRANT   = 0xe0,
	SQC_PERF_SEL_DCACHE_REQ_1                        = 0xe1,
	SQC_PERF_SEL_DCACHE_REQ_2                        = 0xe2,
	SQC_PERF_SEL_DCACHE_REQ_4                        = 0xe3,
	SQC_PERF_SEL_DCACHE_REQ_8                        = 0xe4,
	SQC_PERF_SEL_DCACHE_REQ_16                       = 0xe5,
	SQC_PERF_SEL_DCACHE_REQ_TIME                     = 0xe6,
	SQC_PERF_SEL_SQ_DCACHE_REQS                      = 0xe7,
	SQC_PERF_SEL_DCACHE_FLAT_REQ                     = 0xe8,
	SQC_PERF_SEL_DCACHE_NONFLAT_REQ                  = 0xe9,
	SQC_PERF_SEL_ICACHE_INFLIGHT_LEVEL               = 0xea,
	SQC_PERF_SEL_ICACHE_PRE_CC_LEVEL                 = 0xeb,
	SQC_PERF_SEL_ICACHE_POST_CC_LEVEL                = 0xec,
	SQC_PERF_SEL_ICACHE_POST_CC_HIT_LEVEL            = 0xed,
	SQC_PERF_SEL_ICACHE_POST_CC_MISS_LEVEL           = 0xee,
	SQC_PERF_SEL_DCACHE_INFLIGHT_LEVEL               = 0xef,
	SQC_PERF_SEL_DCACHE_PRE_CC_LEVEL                 = 0xf0,
	SQC_PERF_SEL_DCACHE_POST_CC_LEVEL                = 0xf1,
	SQC_PERF_SEL_DCACHE_POST_CC_HIT_LEVEL            = 0xf2,
	SQC_PERF_SEL_DCACHE_POST_CC_MISS_LEVEL           = 0xf3,
	SQC_PERF_SEL_TC_INFLIGHT_LEVEL                   = 0xf4,
	SQC_PERF_SEL_ICACHE_TC_INFLIGHT_LEVEL            = 0xf5,
	SQC_PERF_SEL_DCACHE_TC_INFLIGHT_LEVEL            = 0xf6,
	SQC_PERF_SEL_ERR_DCACHE_REQ_2_GPR_ADDR_UNALIGNED = 0xf7,
	SQC_PERF_SEL_ERR_DCACHE_REQ_4_GPR_ADDR_UNALIGNED = 0xf8,
	SQC_PERF_SEL_ERR_DCACHE_REQ_8_GPR_ADDR_UNALIGNED = 0xf9,
	SQC_PERF_SEL_ERR_DCACHE_REQ_16_GPR_ADDR_UNALIGNED= 0xfa,
	SQC_PERF_SEL_DUMMY_LAST                          = 0xfb,
} SQ_PERF_SEL;
typedef enum SQC_DATA_CACHE_POLICIES {
	SQC_DATA_CACHE_POLICY_HIT_LRU                    = 0x0,
	SQC_DATA_CACHE_POLICY_MISS_EVICT                 = 0x1,
} SQC_DATA_CACHE_POLICIES;
typedef enum SQ_CAC_POWER_SEL {
	SQ_CAC_POWER_VALU                                = 0x0,
	SQ_CAC_POWER_VALU0                               = 0x1,
	SQ_CAC_POWER_VALU1                               = 0x2,
	SQ_CAC_POWER_VALU2                               = 0x3,
	SQ_CAC_POWER_GPR_RD                              = 0x4,
	SQ_CAC_POWER_GPR_WR                              = 0x5,
	SQ_CAC_POWER_LDS_BUSY                            = 0x6,
	SQ_CAC_POWER_ALU_BUSY                            = 0x7,
	SQ_CAC_POWER_TEX_BUSY                            = 0x8,
} SQ_CAC_POWER_SEL;
typedef enum SQ_IND_CMD_CMD {
	SQ_IND_CMD_CMD_NULL                              = 0x0,
	SQ_IND_CMD_CMD_HALT                              = 0x1,
	SQ_IND_CMD_CMD_RESUME                            = 0x2,
	SQ_IND_CMD_CMD_KILL                              = 0x3,
	SQ_IND_CMD_CMD_DEBUG                             = 0x4,
	SQ_IND_CMD_CMD_TRAP                              = 0x5,
} SQ_IND_CMD_CMD;
typedef enum SQ_IND_CMD_MODE {
	SQ_IND_CMD_MODE_SINGLE                           = 0x0,
	SQ_IND_CMD_MODE_BROADCAST                        = 0x1,
	SQ_IND_CMD_MODE_BROADCAST_QUEUE                  = 0x2,
	SQ_IND_CMD_MODE_BROADCAST_PIPE                   = 0x3,
	SQ_IND_CMD_MODE_BROADCAST_ME                     = 0x4,
} SQ_IND_CMD_MODE;
typedef enum SQ_DED_INFO_SOURCE {
	SQ_DED_INFO_SOURCE_INVALID                       = 0x0,
	SQ_DED_INFO_SOURCE_INST                          = 0x1,
	SQ_DED_INFO_SOURCE_SGPR                          = 0x2,
	SQ_DED_INFO_SOURCE_VGPR                          = 0x3,
	SQ_DED_INFO_SOURCE_LDS                           = 0x4,
	SQ_DED_INFO_SOURCE_GDS                           = 0x5,
	SQ_DED_INFO_SOURCE_TA                            = 0x6,
} SQ_DED_INFO_SOURCE;
typedef enum SQ_ROUND_MODE {
	SQ_ROUND_NEAREST_EVEN                            = 0x0,
	SQ_ROUND_PLUS_INFINITY                           = 0x1,
	SQ_ROUND_MINUS_INFINITY                          = 0x2,
	SQ_ROUND_TO_ZERO                                 = 0x3,
} SQ_ROUND_MODE;
typedef enum SQ_INTERRUPT_WORD_ENCODING {
	SQ_INTERRUPT_WORD_ENCODING_AUTO                  = 0x0,
	SQ_INTERRUPT_WORD_ENCODING_INST                  = 0x1,
	SQ_INTERRUPT_WORD_ENCODING_ERROR                 = 0x2,
} SQ_INTERRUPT_WORD_ENCODING;
typedef enum ENUM_SQ_EXPORT_RAT_INST {
	SQ_EXPORT_RAT_INST_NOP                           = 0x0,
	SQ_EXPORT_RAT_INST_STORE_TYPED                   = 0x1,
	SQ_EXPORT_RAT_INST_STORE_RAW                     = 0x2,
	SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM             = 0x3,
	SQ_EXPORT_RAT_INST_CMPXCHG_INT                   = 0x4,
	SQ_EXPORT_RAT_INST_CMPXCHG_FLT                   = 0x5,
	SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM               = 0x6,
	SQ_EXPORT_RAT_INST_ADD                           = 0x7,
	SQ_EXPORT_RAT_INST_SUB                           = 0x8,
	SQ_EXPORT_RAT_INST_RSUB                          = 0x9,
	SQ_EXPORT_RAT_INST_MIN_INT                       = 0xa,
	SQ_EXPORT_RAT_INST_MIN_UINT                      = 0xb,
	SQ_EXPORT_RAT_INST_MAX_INT                       = 0xc,
	SQ_EXPORT_RAT_INST_MAX_UINT                      = 0xd,
	SQ_EXPORT_RAT_INST_AND                           = 0xe,
	SQ_EXPORT_RAT_INST_OR                            = 0xf,
	SQ_EXPORT_RAT_INST_XOR                           = 0x10,
	SQ_EXPORT_RAT_INST_MSKOR                         = 0x11,
	SQ_EXPORT_RAT_INST_INC_UINT                      = 0x12,
	SQ_EXPORT_RAT_INST_DEC_UINT                      = 0x13,
	SQ_EXPORT_RAT_INST_STORE_DWORD                   = 0x14,
	SQ_EXPORT_RAT_INST_STORE_SHORT                   = 0x15,
	SQ_EXPORT_RAT_INST_STORE_BYTE                    = 0x16,
	SQ_EXPORT_RAT_INST_NOP_RTN                       = 0x20,
	SQ_EXPORT_RAT_INST_XCHG_RTN                      = 0x22,
	SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN              = 0x23,
	SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN               = 0x24,
	SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN               = 0x25,
	SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN           = 0x26,
	SQ_EXPORT_RAT_INST_ADD_RTN                       = 0x27,
	SQ_EXPORT_RAT_INST_SUB_RTN                       = 0x28,
	SQ_EXPORT_RAT_INST_RSUB_RTN                      = 0x29,
	SQ_EXPORT_RAT_INST_MIN_INT_RTN                   = 0x2a,
	SQ_EXPORT_RAT_INST_MIN_UINT_RTN                  = 0x2b,
	SQ_EXPORT_RAT_INST_MAX_INT_RTN                   = 0x2c,
	SQ_EXPORT_RAT_INST_MAX_UINT_RTN                  = 0x2d,
	SQ_EXPORT_RAT_INST_AND_RTN                       = 0x2e,
	SQ_EXPORT_RAT_INST_OR_RTN                        = 0x2f,
	SQ_EXPORT_RAT_INST_XOR_RTN                       = 0x30,
	SQ_EXPORT_RAT_INST_MSKOR_RTN                     = 0x31,
	SQ_EXPORT_RAT_INST_INC_UINT_RTN                  = 0x32,
	SQ_EXPORT_RAT_INST_DEC_UINT_RTN                  = 0x33,
} ENUM_SQ_EXPORT_RAT_INST;
typedef enum SQ_IBUF_ST {
	SQ_IBUF_IB_IDLE                                  = 0x0,
	SQ_IBUF_IB_INI_WAIT_GNT                          = 0x1,
	SQ_IBUF_IB_INI_WAIT_DRET                         = 0x2,
	SQ_IBUF_IB_LE_4DW                                = 0x3,
	SQ_IBUF_IB_WAIT_DRET                             = 0x4,
	SQ_IBUF_IB_EMPTY_WAIT_DRET                       = 0x5,
	SQ_IBUF_IB_DRET                                  = 0x6,
	SQ_IBUF_IB_EMPTY_WAIT_GNT                        = 0x7,
} SQ_IBUF_ST;
typedef enum SQ_INST_STR_ST {
	SQ_INST_STR_IB_WAVE_NORML                        = 0x0,
	SQ_INST_STR_IB_WAVE2ID_NORMAL_INST_AV            = 0x1,
	SQ_INST_STR_IB_WAVE_INTERNAL_INST_AV             = 0x2,
	SQ_INST_STR_IB_WAVE_INST_SKIP_AV                 = 0x3,
	SQ_INST_STR_IB_WAVE_SETVSKIP_ST0                 = 0x4,
	SQ_INST_STR_IB_WAVE_SETVSKIP_ST1                 = 0x5,
	SQ_INST_STR_IB_WAVE_NOP_SLEEP_WAIT               = 0x6,
	SQ_INST_STR_IB_WAVE_PC_FROM_SGPR_MSG_WAIT        = 0x7,
} SQ_INST_STR_ST;
typedef enum SQ_WAVE_IB_ECC_ST {
	SQ_WAVE_IB_ECC_CLEAN                             = 0x0,
	SQ_WAVE_IB_ECC_ERR_CONTINUE                      = 0x1,
	SQ_WAVE_IB_ECC_ERR_HALT                          = 0x2,
	SQ_WAVE_IB_ECC_WITH_ERR_MSG                      = 0x3,
} SQ_WAVE_IB_ECC_ST;
typedef enum SH_MEM_ALIGNMENT_MODE {
	SH_MEM_ALIGNMENT_MODE_DWORD                      = 0x0,
	SH_MEM_ALIGNMENT_MODE_DWORD_STRICT               = 0x1,
	SH_MEM_ALIGNMENT_MODE_STRICT                     = 0x2,
	SH_MEM_ALIGNMENT_MODE_UNALIGNED                  = 0x3,
} SH_MEM_ALIGNMENT_MODE;
#define SQ_WAVE_TYPE_PS0                          0x0
#define SQ_THREAD_TRACE_LFSR_PS                   0x8016
#define SQ_THREAD_TRACE_LFSR_VS                   0x801c
#define SQ_THREAD_TRACE_LFSR_GS                   0x801f
#define SQ_THREAD_TRACE_LFSR_ES                   0x8029
#define SQ_THREAD_TRACE_LFSR_HS                   0x805e
#define SQ_THREAD_TRACE_LFSR_LS                   0x806b
#define SQ_THREAD_TRACE_LFSR_CS                   0x8097
#define SQIND_GLOBAL_REGS_OFFSET                  0x0
#define SQIND_GLOBAL_REGS_SIZE                    0x8
#define SQIND_LOCAL_REGS_OFFSET                   0x8
#define SQIND_LOCAL_REGS_SIZE                     0x8
#define SQIND_WAVE_HWREGS_OFFSET                  0x10
#define SQIND_WAVE_HWREGS_SIZE                    0x1f0
#define SQIND_WAVE_SGPRS_OFFSET                   0x200
#define SQIND_WAVE_SGPRS_SIZE                     0x200
#define SQ_GFXDEC_BEGIN                           0xa000
#define SQ_GFXDEC_END                             0xc000
#define SQ_GFXDEC_STATE_ID_SHIFT                  0xa
#define SQDEC_BEGIN                               0x2300
#define SQDEC_END                                 0x23ff
#define SQPERFSDEC_BEGIN                          0xd9c0
#define SQPERFSDEC_END                            0xda40
#define SQPERFDDEC_BEGIN                          0xd1c0
#define SQPERFDDEC_END                            0xd240
#define SQGFXUDEC_BEGIN                           0xc340
#define SQGFXUDEC_END                             0xc380
#define SQPWRDEC_BEGIN                            0xf08c
#define SQPWRDEC_END                              0xf094
#define SQ_DISPATCHER_GFX_MIN                     0x10
#define SQ_DISPATCHER_GFX_CNT_PER_RING            0x8
#define SQ_MAX_PGM_SGPRS                          0x68
#define SQ_MAX_PGM_VGPRS                          0x100
#define SQ_THREAD_TRACE_TIME_UNIT                 0x4
#define SQ_INTERRUPT_ID                           0xef
#define SQ_EX_MODE_EXCP_VALU_BASE                 0x0
#define SQ_EX_MODE_EXCP_VALU_SIZE                 0x7
#define SQ_EX_MODE_EXCP_INVALID                   0x0
#define SQ_EX_MODE_EXCP_INPUT_DENORM              0x1
#define SQ_EX_MODE_EXCP_DIV0                      0x2
#define SQ_EX_MODE_EXCP_OVERFLOW                  0x3
#define SQ_EX_MODE_EXCP_UNDERFLOW                 0x4
#define SQ_EX_MODE_EXCP_INEXACT                   0x5
#define SQ_EX_MODE_EXCP_INT_DIV0                  0x6
#define SQ_EX_MODE_EXCP_ADDR_WATCH                0x7
#define SQ_EX_MODE_EXCP_MEM_VIOL                  0x8
#define INST_ID_ECC_INTERRUPT_MSG                 0xfffffff0
#define INST_ID_TTRACE_NEW_PC_MSG                 0xfffffff1
#define INST_ID_HW_TRAP                           0xfffffff2
#define INST_ID_KILL_SEQ                          0xfffffff3
#define INST_ID_HOST_REG_TRAP_MSG                 0xfffffffe
#define SQ_ENC_SOP1_BITS                          0xbe800000
#define SQ_ENC_SOP1_MASK                          0xff800000
#define SQ_ENC_SOP1_FIELD                         0x17d
#define SQ_ENC_SOPC_BITS                          0xbf000000
#define SQ_ENC_SOPC_MASK                          0xff800000
#define SQ_ENC_SOPC_FIELD                         0x17e
#define SQ_ENC_SOPP_BITS                          0xbf800000
#define SQ_ENC_SOPP_MASK                          0xff800000
#define SQ_ENC_SOPP_FIELD                         0x17f
#define SQ_ENC_SOPK_BITS                          0xb0000000
#define SQ_ENC_SOPK_MASK                          0xf0000000
#define SQ_ENC_SOPK_FIELD                         0xb
#define SQ_ENC_SOP2_BITS                          0x80000000
#define SQ_ENC_SOP2_MASK                          0xc0000000
#define SQ_ENC_SOP2_FIELD                         0x2
#define SQ_ENC_SMRD_BITS                          0xc0000000
#define SQ_ENC_SMRD_MASK                          0xf8000000
#define SQ_ENC_SMRD_FIELD                         0x18
#define SQ_ENC_VOP1_BITS                          0x7e000000
#define SQ_ENC_VOP1_MASK                          0xfe000000
#define SQ_ENC_VOP1_FIELD                         0x3f
#define SQ_ENC_VOPC_BITS                          0x7c000000
#define SQ_ENC_VOPC_MASK                          0xfe000000
#define SQ_ENC_VOPC_FIELD                         0x3e
#define SQ_ENC_VOP2_BITS                          0x0
#define SQ_ENC_VOP2_MASK                          0x80000000
#define SQ_ENC_VOP2_FIELD                         0x0
#define SQ_ENC_VINTRP_BITS                        0xc8000000
#define SQ_ENC_VINTRP_MASK                        0xfc000000
#define SQ_ENC_VINTRP_FIELD                       0x32
#define SQ_ENC_VOP3_BITS                          0xd0000000
#define SQ_ENC_VOP3_MASK                          0xfc000000
#define SQ_ENC_VOP3_FIELD                         0x34
#define SQ_ENC_DS_BITS                            0xd8000000
#define SQ_ENC_DS_MASK                            0xfc000000
#define SQ_ENC_DS_FIELD                           0x36
#define SQ_ENC_MUBUF_BITS                         0xe0000000
#define SQ_ENC_MUBUF_MASK                         0xfc000000
#define SQ_ENC_MUBUF_FIELD                        0x38
#define SQ_ENC_MTBUF_BITS                         0xe8000000
#define SQ_ENC_MTBUF_MASK                         0xfc000000
#define SQ_ENC_MTBUF_FIELD                        0x3a
#define SQ_ENC_MIMG_BITS                          0xf0000000
#define SQ_ENC_MIMG_MASK                          0xfc000000
#define SQ_ENC_MIMG_FIELD                         0x3c
#define SQ_ENC_EXP_BITS                           0xf8000000
#define SQ_ENC_EXP_MASK                           0xfc000000
#define SQ_ENC_EXP_FIELD                          0x3e
#define SQ_ENC_FLAT_BITS                          0xdc000000
#define SQ_ENC_FLAT_MASK                          0xfc000000
#define SQ_ENC_FLAT_FIELD                         0x37
#define SQ_WAITCNT_VM_SHIFT                       0x0
#define SQ_SENDMSG_STREAMID_SIZE                  0x2
#define SQ_V_OPC_COUNT                            0x100
#define SQ_HWREG_OFFSET_SIZE                      0x5
#define SQ_HWREG_OFFSET_SHIFT                     0x6
#define SQ_NUM_ATTR                               0x21
#define SQ_NUM_VGPR                               0x100
#define SQ_SENDMSG_MSG_SIZE                       0x4
#define SQ_NUM_TTMP                               0xc
#define SQ_HWREG_ID_SIZE                          0x6
#define SQ_SENDMSG_GSOP_SIZE                      0x2
#define SQ_NUM_SGPR                               0x68
#define SQ_EXP_NUM_MRT                            0x8
#define SQ_SENDMSG_SYSTEM_SIZE                    0x3
#define SQ_WAITCNT_LGKM_SHIFT                     0x8
#define SQ_WAITCNT_EXP_SIZE                       0x3
#define SQ_SENDMSG_SYSTEM_SHIFT                   0x4
#define SQ_HWREG_SIZE_SHIFT                       0xb
#define SQ_EXP_NUM_GDS                            0x5
#define SQ_SENDMSG_MSG_SHIFT                      0x0
#define SQ_WAITCNT_EXP_SHIFT                      0x4
#define SQ_WAITCNT_VM_SIZE                        0x4
#define SQ_SENDMSG_GSOP_SHIFT                     0x4
#define SQ_SRC_VGPR_BIT                           0x100
#define SQ_V_OP2_COUNT                            0x40
#define SQ_EXP_NUM_PARAM                          0x20
#define SQ_SENDMSG_STREAMID_SHIFT                 0x8
#define SQ_V_OP1_COUNT                            0x80
#define SQ_WAITCNT_LGKM_SIZE                      0x5
#define SQ_EXP_NUM_POS                            0x4
#define SQ_HWREG_SIZE_SIZE                        0x5
#define SQ_HWREG_ID_SHIFT                         0x0
#define SQ_S_MOV_B32                              0x3
#define SQ_S_MOV_B64                              0x4
#define SQ_S_CMOV_B32                             0x5
#define SQ_S_CMOV_B64                             0x6
#define SQ_S_NOT_B32                              0x7
#define SQ_S_NOT_B64                              0x8
#define SQ_S_WQM_B32                              0x9
#define SQ_S_WQM_B64                              0xa
#define SQ_S_BREV_B32                             0xb
#define SQ_S_BREV_B64                             0xc
#define SQ_S_BCNT0_I32_B32                        0xd
#define SQ_S_BCNT0_I32_B64                        0xe
#define SQ_S_BCNT1_I32_B32                        0xf
#define SQ_S_BCNT1_I32_B64                        0x10
#define SQ_S_FF0_I32_B32                          0x11
#define SQ_S_FF0_I32_B64                          0x12
#define SQ_S_FF1_I32_B32                          0x13
#define SQ_S_FF1_I32_B64                          0x14
#define SQ_S_FLBIT_I32_B32                        0x15
#define SQ_S_FLBIT_I32_B64                        0x16
#define SQ_S_FLBIT_I32                            0x17
#define SQ_S_FLBIT_I32_I64                        0x18
#define SQ_S_SEXT_I32_I8                          0x19
#define SQ_S_SEXT_I32_I16                         0x1a
#define SQ_S_BITSET0_B32                          0x1b
#define SQ_S_BITSET0_B64                          0x1c
#define SQ_S_BITSET1_B32                          0x1d
#define SQ_S_BITSET1_B64                          0x1e
#define SQ_S_GETPC_B64                            0x1f
#define SQ_S_SETPC_B64                            0x20
#define SQ_S_SWAPPC_B64                           0x21
#define SQ_S_RFE_B64                              0x22
#define SQ_S_AND_SAVEEXEC_B64                     0x24
#define SQ_S_OR_SAVEEXEC_B64                      0x25
#define SQ_S_XOR_SAVEEXEC_B64                     0x26
#define SQ_S_ANDN2_SAVEEXEC_B64                   0x27
#define SQ_S_ORN2_SAVEEXEC_B64                    0x28
#define SQ_S_NAND_SAVEEXEC_B64                    0x29
#define SQ_S_NOR_SAVEEXEC_B64                     0x2a
#define SQ_S_XNOR_SAVEEXEC_B64                    0x2b
#define SQ_S_QUADMASK_B32                         0x2c
#define SQ_S_QUADMASK_B64                         0x2d
#define SQ_S_MOVRELS_B32                          0x2e
#define SQ_S_MOVRELS_B64                          0x2f
#define SQ_S_MOVRELD_B32                          0x30
#define SQ_S_MOVRELD_B64                          0x31
#define SQ_S_CBRANCH_JOIN                         0x32
#define SQ_S_MOV_REGRD_B32                        0x33
#define SQ_S_ABS_I32                              0x34
#define SQ_S_MOV_FED_B32                          0x35
#define SQ_ATTR0                                  0x0
#define SQ_S_MOVK_I32                             0x0
#define SQ_S_CMOVK_I32                            0x2
#define SQ_S_CMPK_EQ_I32                          0x3
#define SQ_S_CMPK_LG_I32                          0x4
#define SQ_S_CMPK_GT_I32                          0x5
#define SQ_S_CMPK_GE_I32                          0x6
#define SQ_S_CMPK_LT_I32                          0x7
#define SQ_S_CMPK_LE_I32                          0x8
#define SQ_S_CMPK_EQ_U32                          0x9
#define SQ_S_CMPK_LG_U32                          0xa
#define SQ_S_CMPK_GT_U32                          0xb
#define SQ_S_CMPK_GE_U32                          0xc
#define SQ_S_CMPK_LT_U32                          0xd
#define SQ_S_CMPK_LE_U32                          0xe
#define SQ_S_ADDK_I32                             0xf
#define SQ_S_MULK_I32                             0x10
#define SQ_S_CBRANCH_I_FORK                       0x11
#define SQ_S_GETREG_B32                           0x12
#define SQ_S_SETREG_B32                           0x13
#define SQ_S_GETREG_REGRD_B32                     0x14
#define SQ_S_SETREG_IMM32_B32                     0x15
#define SQ_TBA_LO                                 0x6c
#define SQ_TBA_HI                                 0x6d
#define SQ_TMA_LO                                 0x6e
#define SQ_TMA_HI                                 0x6f
#define SQ_TTMP0                                  0x70
#define SQ_TTMP1                                  0x71
#define SQ_TTMP2                                  0x72
#define SQ_TTMP3                                  0x73
#define SQ_TTMP4                                  0x74
#define SQ_TTMP5                                  0x75
#define SQ_TTMP6                                  0x76
#define SQ_TTMP7                                  0x77
#define SQ_TTMP8                                  0x78
#define SQ_TTMP9                                  0x79
#define SQ_TTMP10                                 0x7a
#define SQ_TTMP11                                 0x7b
#define SQ_VGPR0                                  0x0
#define SQ_EXP                                    0x0
#define SQ_EXP_MRT0                               0x0
#define SQ_EXP_MRTZ                               0x8
#define SQ_EXP_NULL                               0x9
#define SQ_EXP_POS0                               0xc
#define SQ_EXP_PARAM0                             0x20
#define SQ_CNT1                                   0x0
#define SQ_CNT2                                   0x1
#define SQ_CNT3                                   0x2
#define SQ_CNT4                                   0x3
#define SQ_F                                      0x0
#define SQ_LT                                     0x1
#define SQ_EQ                                     0x2
#define SQ_LE                                     0x3
#define SQ_GT                                     0x4
#define SQ_LG                                     0x5
#define SQ_GE                                     0x6
#define SQ_O                                      0x7
#define SQ_U                                      0x8
#define SQ_NGE                                    0x9
#define SQ_NLG                                    0xa
#define SQ_NGT                                    0xb
#define SQ_NLE                                    0xc
#define SQ_NEQ                                    0xd
#define SQ_NLT                                    0xe
#define SQ_TRU                                    0xf
#define SQ_V_CMP_F_F32                            0x0
#define SQ_V_CMP_LT_F32                           0x1
#define SQ_V_CMP_EQ_F32                           0x2
#define SQ_V_CMP_LE_F32                           0x3
#define SQ_V_CMP_GT_F32                           0x4
#define SQ_V_CMP_LG_F32                           0x5
#define SQ_V_CMP_GE_F32                           0x6
#define SQ_V_CMP_O_F32                            0x7
#define SQ_V_CMP_U_F32                            0x8
#define SQ_V_CMP_NGE_F32                          0x9
#define SQ_V_CMP_NLG_F32                          0xa
#define SQ_V_CMP_NGT_F32                          0xb
#define SQ_V_CMP_NLE_F32                          0xc
#define SQ_V_CMP_NEQ_F32                          0xd
#define SQ_V_CMP_NLT_F32                          0xe
#define SQ_V_CMP_TRU_F32                          0xf
#define SQ_V_CMPX_F_F32                           0x10
#define SQ_V_CMPX_LT_F32                          0x11
#define SQ_V_CMPX_EQ_F32                          0x12
#define SQ_V_CMPX_LE_F32                          0x13
#define SQ_V_CMPX_GT_F32                          0x14
#define SQ_V_CMPX_LG_F32                          0x15
#define SQ_V_CMPX_GE_F32                          0x16
#define SQ_V_CMPX_O_F32                           0x17
#define SQ_V_CMPX_U_F32                           0x18
#define SQ_V_CMPX_NGE_F32                         0x19
#define SQ_V_CMPX_NLG_F32                         0x1a
#define SQ_V_CMPX_NGT_F32                         0x1b
#define SQ_V_CMPX_NLE_F32                         0x1c
#define SQ_V_CMPX_NEQ_F32                         0x1d
#define SQ_V_CMPX_NLT_F32                         0x1e
#define SQ_V_CMPX_TRU_F32                         0x1f
#define SQ_V_CMP_F_F64                            0x20
#define SQ_V_CMP_LT_F64                           0x21
#define SQ_V_CMP_EQ_F64                           0x22
#define SQ_V_CMP_LE_F64                           0x23
#define SQ_V_CMP_GT_F64                           0x24
#define SQ_V_CMP_LG_F64                           0x25
#define SQ_V_CMP_GE_F64                           0x26
#define SQ_V_CMP_O_F64                            0x27
#define SQ_V_CMP_U_F64                            0x28
#define SQ_V_CMP_NGE_F64                          0x29
#define SQ_V_CMP_NLG_F64                          0x2a
#define SQ_V_CMP_NGT_F64                          0x2b
#define SQ_V_CMP_NLE_F64                          0x2c
#define SQ_V_CMP_NEQ_F64                          0x2d
#define SQ_V_CMP_NLT_F64                          0x2e
#define SQ_V_CMP_TRU_F64                          0x2f
#define SQ_V_CMPX_F_F64                           0x30
#define SQ_V_CMPX_LT_F64                          0x31
#define SQ_V_CMPX_EQ_F64                          0x32
#define SQ_V_CMPX_LE_F64                          0x33
#define SQ_V_CMPX_GT_F64                          0x34
#define SQ_V_CMPX_LG_F64                          0x35
#define SQ_V_CMPX_GE_F64                          0x36
#define SQ_V_CMPX_O_F64                           0x37
#define SQ_V_CMPX_U_F64                           0x38
#define SQ_V_CMPX_NGE_F64                         0x39
#define SQ_V_CMPX_NLG_F64                         0x3a
#define SQ_V_CMPX_NGT_F64                         0x3b
#define SQ_V_CMPX_NLE_F64                         0x3c
#define SQ_V_CMPX_NEQ_F64                         0x3d
#define SQ_V_CMPX_NLT_F64                         0x3e
#define SQ_V_CMPX_TRU_F64                         0x3f
#define SQ_V_CMPS_F_F32                           0x40
#define SQ_V_CMPS_LT_F32                          0x41
#define SQ_V_CMPS_EQ_F32                          0x42
#define SQ_V_CMPS_LE_F32                          0x43
#define SQ_V_CMPS_GT_F32                          0x44
#define SQ_V_CMPS_LG_F32                          0x45
#define SQ_V_CMPS_GE_F32                          0x46
#define SQ_V_CMPS_O_F32                           0x47
#define SQ_V_CMPS_U_F32                           0x48
#define SQ_V_CMPS_NGE_F32                         0x49
#define SQ_V_CMPS_NLG_F32                         0x4a
#define SQ_V_CMPS_NGT_F32                         0x4b
#define SQ_V_CMPS_NLE_F32                         0x4c
#define SQ_V_CMPS_NEQ_F32                         0x4d
#define SQ_V_CMPS_NLT_F32                         0x4e
#define SQ_V_CMPS_TRU_F32                         0x4f
#define SQ_V_CMPSX_F_F32                          0x50
#define SQ_V_CMPSX_LT_F32                         0x51
#define SQ_V_CMPSX_EQ_F32                         0x52
#define SQ_V_CMPSX_LE_F32                         0x53
#define SQ_V_CMPSX_GT_F32                         0x54
#define SQ_V_CMPSX_LG_F32                         0x55
#define SQ_V_CMPSX_GE_F32                         0x56
#define SQ_V_CMPSX_O_F32                          0x57
#define SQ_V_CMPSX_U_F32                          0x58
#define SQ_V_CMPSX_NGE_F32                        0x59
#define SQ_V_CMPSX_NLG_F32                        0x5a
#define SQ_V_CMPSX_NGT_F32                        0x5b
#define SQ_V_CMPSX_NLE_F32                        0x5c
#define SQ_V_CMPSX_NEQ_F32                        0x5d
#define SQ_V_CMPSX_NLT_F32                        0x5e
#define SQ_V_CMPSX_TRU_F32                        0x5f
#define SQ_V_CMPS_F_F64                           0x60
#define SQ_V_CMPS_LT_F64                          0x61
#define SQ_V_CMPS_EQ_F64                          0x62
#define SQ_V_CMPS_LE_F64                          0x63
#define SQ_V_CMPS_GT_F64                          0x64
#define SQ_V_CMPS_LG_F64                          0x65
#define SQ_V_CMPS_GE_F64                          0x66
#define SQ_V_CMPS_O_F64                           0x67
#define SQ_V_CMPS_U_F64                           0x68
#define SQ_V_CMPS_NGE_F64                         0x69
#define SQ_V_CMPS_NLG_F64                         0x6a
#define SQ_V_CMPS_NGT_F64                         0x6b
#define SQ_V_CMPS_NLE_F64                         0x6c
#define SQ_V_CMPS_NEQ_F64                         0x6d
#define SQ_V_CMPS_NLT_F64                         0x6e
#define SQ_V_CMPS_TRU_F64                         0x6f
#define SQ_V_CMPSX_F_F64                          0x70
#define SQ_V_CMPSX_LT_F64                         0x71
#define SQ_V_CMPSX_EQ_F64                         0x72
#define SQ_V_CMPSX_LE_F64                         0x73
#define SQ_V_CMPSX_GT_F64                         0x74
#define SQ_V_CMPSX_LG_F64                         0x75
#define SQ_V_CMPSX_GE_F64                         0x76
#define SQ_V_CMPSX_O_F64                          0x77
#define SQ_V_CMPSX_U_F64                          0x78
#define SQ_V_CMPSX_NGE_F64                        0x79
#define SQ_V_CMPSX_NLG_F64                        0x7a
#define SQ_V_CMPSX_NGT_F64                        0x7b
#define SQ_V_CMPSX_NLE_F64                        0x7c
#define SQ_V_CMPSX_NEQ_F64                        0x7d
#define SQ_V_CMPSX_NLT_F64                        0x7e
#define SQ_V_CMPSX_TRU_F64                        0x7f
#define SQ_V_CMP_F_I32                            0x80
#define SQ_V_CMP_LT_I32                           0x81
#define SQ_V_CMP_EQ_I32                           0x82
#define SQ_V_CMP_LE_I32                           0x83
#define SQ_V_CMP_GT_I32                           0x84
#define SQ_V_CMP_NE_I32                           0x85
#define SQ_V_CMP_GE_I32                           0x86
#define SQ_V_CMP_T_I32                            0x87
#define SQ_V_CMPX_F_I32                           0x90
#define SQ_V_CMPX_LT_I32                          0x91
#define SQ_V_CMPX_EQ_I32                          0x92
#define SQ_V_CMPX_LE_I32                          0x93
#define SQ_V_CMPX_GT_I32                          0x94
#define SQ_V_CMPX_NE_I32                          0x95
#define SQ_V_CMPX_GE_I32                          0x96
#define SQ_V_CMPX_T_I32                           0x97
#define SQ_V_CMP_F_I64                            0xa0
#define SQ_V_CMP_LT_I64                           0xa1
#define SQ_V_CMP_EQ_I64                           0xa2
#define SQ_V_CMP_LE_I64                           0xa3
#define SQ_V_CMP_GT_I64                           0xa4
#define SQ_V_CMP_NE_I64                           0xa5
#define SQ_V_CMP_GE_I64                           0xa6
#define SQ_V_CMP_T_I64                            0xa7
#define SQ_V_CMPX_F_I64                           0xb0
#define SQ_V_CMPX_LT_I64                          0xb1
#define SQ_V_CMPX_EQ_I64                          0xb2
#define SQ_V_CMPX_LE_I64                          0xb3
#define SQ_V_CMPX_GT_I64                          0xb4
#define SQ_V_CMPX_NE_I64                          0xb5
#define SQ_V_CMPX_GE_I64                          0xb6
#define SQ_V_CMPX_T_I64                           0xb7
#define SQ_V_CMP_F_U32                            0xc0
#define SQ_V_CMP_LT_U32                           0xc1
#define SQ_V_CMP_EQ_U32                           0xc2
#define SQ_V_CMP_LE_U32                           0xc3
#define SQ_V_CMP_GT_U32                           0xc4
#define SQ_V_CMP_NE_U32                           0xc5
#define SQ_V_CMP_GE_U32                           0xc6
#define SQ_V_CMP_T_U32                            0xc7
#define SQ_V_CMPX_F_U32                           0xd0
#define SQ_V_CMPX_LT_U32                          0xd1
#define SQ_V_CMPX_EQ_U32                          0xd2
#define SQ_V_CMPX_LE_U32                          0xd3
#define SQ_V_CMPX_GT_U32                          0xd4
#define SQ_V_CMPX_NE_U32                          0xd5
#define SQ_V_CMPX_GE_U32                          0xd6
#define SQ_V_CMPX_T_U32                           0xd7
#define SQ_V_CMP_F_U64                            0xe0
#define SQ_V_CMP_LT_U64                           0xe1
#define SQ_V_CMP_EQ_U64                           0xe2
#define SQ_V_CMP_LE_U64                           0xe3
#define SQ_V_CMP_GT_U64                           0xe4
#define SQ_V_CMP_NE_U64                           0xe5
#define SQ_V_CMP_GE_U64                           0xe6
#define SQ_V_CMP_T_U64                            0xe7
#define SQ_V_CMPX_F_U64                           0xf0
#define SQ_V_CMPX_LT_U64                          0xf1
#define SQ_V_CMPX_EQ_U64                          0xf2
#define SQ_V_CMPX_LE_U64                          0xf3
#define SQ_V_CMPX_GT_U64                          0xf4
#define SQ_V_CMPX_NE_U64                          0xf5
#define SQ_V_CMPX_GE_U64                          0xf6
#define SQ_V_CMPX_T_U64                           0xf7
#define SQ_V_CMP_CLASS_F32                        0x88
#define SQ_V_CMPX_CLASS_F32                       0x98
#define SQ_V_CMP_CLASS_F64                        0xa8
#define SQ_V_CMPX_CLASS_F64                       0xb8
#define SQ_SGPR0                                  0x0
#define SQ_F                                      0x0
#define SQ_LT                                     0x1
#define SQ_EQ                                     0x2
#define SQ_LE                                     0x3
#define SQ_GT                                     0x4
#define SQ_NE                                     0x5
#define SQ_GE                                     0x6
#define SQ_T                                      0x7
#define SQ_SRC_64_INT                             0xc0
#define SQ_SRC_M_1_INT                            0xc1
#define SQ_SRC_M_2_INT                            0xc2
#define SQ_SRC_M_3_INT                            0xc3
#define SQ_SRC_M_4_INT                            0xc4
#define SQ_SRC_M_5_INT                            0xc5
#define SQ_SRC_M_6_INT                            0xc6
#define SQ_SRC_M_7_INT                            0xc7
#define SQ_SRC_M_8_INT                            0xc8
#define SQ_SRC_M_9_INT                            0xc9
#define SQ_SRC_M_10_INT                           0xca
#define SQ_SRC_M_11_INT                           0xcb
#define SQ_SRC_M_12_INT                           0xcc
#define SQ_SRC_M_13_INT                           0xcd
#define SQ_SRC_M_14_INT                           0xce
#define SQ_SRC_M_15_INT                           0xcf
#define SQ_SRC_M_16_INT                           0xd0
#define SQ_SRC_0_5                                0xf0
#define SQ_SRC_M_0_5                              0xf1
#define SQ_SRC_1                                  0xf2
#define SQ_SRC_M_1                                0xf3
#define SQ_SRC_2                                  0xf4
#define SQ_SRC_M_2                                0xf5
#define SQ_SRC_4                                  0xf6
#define SQ_SRC_M_4                                0xf7
#define SQ_SRC_0                                  0x80
#define SQ_SRC_1_INT                              0x81
#define SQ_SRC_2_INT                              0x82
#define SQ_SRC_3_INT                              0x83
#define SQ_SRC_4_INT                              0x84
#define SQ_SRC_5_INT                              0x85
#define SQ_SRC_6_INT                              0x86
#define SQ_SRC_7_INT                              0x87
#define SQ_SRC_8_INT                              0x88
#define SQ_SRC_9_INT                              0x89
#define SQ_SRC_10_INT                             0x8a
#define SQ_SRC_11_INT                             0x8b
#define SQ_SRC_12_INT                             0x8c
#define SQ_SRC_13_INT                             0x8d
#define SQ_SRC_14_INT                             0x8e
#define SQ_SRC_15_INT                             0x8f
#define SQ_SRC_16_INT                             0x90
#define SQ_SRC_17_INT                             0x91
#define SQ_SRC_18_INT                             0x92
#define SQ_SRC_19_INT                             0x93
#define SQ_SRC_20_INT                             0x94
#define SQ_SRC_21_INT                             0x95
#define SQ_SRC_22_INT                             0x96
#define SQ_SRC_23_INT                             0x97
#define SQ_SRC_24_INT                             0x98
#define SQ_SRC_25_INT                             0x99
#define SQ_SRC_26_INT                             0x9a
#define SQ_SRC_27_INT                             0x9b
#define SQ_SRC_28_INT                             0x9c
#define SQ_SRC_29_INT                             0x9d
#define SQ_SRC_30_INT                             0x9e
#define SQ_SRC_31_INT                             0x9f
#define SQ_SRC_32_INT                             0xa0
#define SQ_SRC_33_INT                             0xa1
#define SQ_SRC_34_INT                             0xa2
#define SQ_SRC_35_INT                             0xa3
#define SQ_SRC_36_INT                             0xa4
#define SQ_SRC_37_INT                             0xa5
#define SQ_SRC_38_INT                             0xa6
#define SQ_SRC_39_INT                             0xa7
#define SQ_SRC_40_INT                             0xa8
#define SQ_SRC_41_INT                             0xa9
#define SQ_SRC_42_INT                             0xaa
#define SQ_SRC_43_INT                             0xab
#define SQ_SRC_44_INT                             0xac
#define SQ_SRC_45_INT                             0xad
#define SQ_SRC_46_INT                             0xae
#define SQ_SRC_47_INT                             0xaf
#define SQ_SRC_48_INT                             0xb0
#define SQ_SRC_49_INT                             0xb1
#define SQ_SRC_50_INT                             0xb2
#define SQ_SRC_51_INT                             0xb3
#define SQ_SRC_52_INT                             0xb4
#define SQ_SRC_53_INT                             0xb5
#define SQ_SRC_54_INT                             0xb6
#define SQ_SRC_55_INT                             0xb7
#define SQ_SRC_56_INT                             0xb8
#define SQ_SRC_57_INT                             0xb9
#define SQ_SRC_58_INT                             0xba
#define SQ_SRC_59_INT                             0xbb
#define SQ_SRC_60_INT                             0xbc
#define SQ_SRC_61_INT                             0xbd
#define SQ_SRC_62_INT                             0xbe
#define SQ_SRC_63_INT                             0xbf
#define SQ_BUFFER_LOAD_FORMAT_X                   0x0
#define SQ_BUFFER_LOAD_FORMAT_XY                  0x1
#define SQ_BUFFER_LOAD_FORMAT_XYZ                 0x2
#define SQ_BUFFER_LOAD_FORMAT_XYZW                0x3
#define SQ_BUFFER_STORE_FORMAT_X                  0x4
#define SQ_BUFFER_STORE_FORMAT_XY                 0x5
#define SQ_BUFFER_STORE_FORMAT_XYZ                0x6
#define SQ_BUFFER_STORE_FORMAT_XYZW               0x7
#define SQ_BUFFER_LOAD_UBYTE                      0x8
#define SQ_BUFFER_LOAD_SBYTE                      0x9
#define SQ_BUFFER_LOAD_USHORT                     0xa
#define SQ_BUFFER_LOAD_SSHORT                     0xb
#define SQ_BUFFER_LOAD_DWORD                      0xc
#define SQ_BUFFER_LOAD_DWORDX2                    0xd
#define SQ_BUFFER_LOAD_DWORDX4                    0xe
#define SQ_BUFFER_LOAD_DWORDX3                    0xf
#define SQ_BUFFER_STORE_BYTE                      0x18
#define SQ_BUFFER_STORE_SHORT                     0x1a
#define SQ_BUFFER_STORE_DWORD                     0x1c
#define SQ_BUFFER_STORE_DWORDX2                   0x1d
#define SQ_BUFFER_STORE_DWORDX4                   0x1e
#define SQ_BUFFER_STORE_DWORDX3                   0x1f
#define SQ_BUFFER_ATOMIC_SWAP                     0x30
#define SQ_BUFFER_ATOMIC_CMPSWAP                  0x31
#define SQ_BUFFER_ATOMIC_ADD                      0x32
#define SQ_BUFFER_ATOMIC_SUB                      0x33
#define SQ_BUFFER_ATOMIC_SMIN                     0x35
#define SQ_BUFFER_ATOMIC_UMIN                     0x36
#define SQ_BUFFER_ATOMIC_SMAX                     0x37
#define SQ_BUFFER_ATOMIC_UMAX                     0x38
#define SQ_BUFFER_ATOMIC_AND                      0x39
#define SQ_BUFFER_ATOMIC_OR                       0x3a
#define SQ_BUFFER_ATOMIC_XOR                      0x3b
#define SQ_BUFFER_ATOMIC_INC                      0x3c
#define SQ_BUFFER_ATOMIC_DEC                      0x3d
#define SQ_BUFFER_ATOMIC_FCMPSWAP                 0x3e
#define SQ_BUFFER_ATOMIC_FMIN                     0x3f
#define SQ_BUFFER_ATOMIC_FMAX                     0x40
#define SQ_BUFFER_ATOMIC_SWAP_X2                  0x50
#define SQ_BUFFER_ATOMIC_CMPSWAP_X2               0x51
#define SQ_BUFFER_ATOMIC_ADD_X2                   0x52
#define SQ_BUFFER_ATOMIC_SUB_X2                   0x53
#define SQ_BUFFER_ATOMIC_SMIN_X2                  0x55
#define SQ_BUFFER_ATOMIC_UMIN_X2                  0x56
#define SQ_BUFFER_ATOMIC_SMAX_X2                  0x57
#define SQ_BUFFER_ATOMIC_UMAX_X2                  0x58
#define SQ_BUFFER_ATOMIC_AND_X2                   0x59
#define SQ_BUFFER_ATOMIC_OR_X2                    0x5a
#define SQ_BUFFER_ATOMIC_XOR_X2                   0x5b
#define SQ_BUFFER_ATOMIC_INC_X2                   0x5c
#define SQ_BUFFER_ATOMIC_DEC_X2                   0x5d
#define SQ_BUFFER_ATOMIC_FCMPSWAP_X2              0x5e
#define SQ_BUFFER_ATOMIC_FMIN_X2                  0x5f
#define SQ_BUFFER_ATOMIC_FMAX_X2                  0x60
#define SQ_BUFFER_WBINVL1_VOL                     0x70
#define SQ_BUFFER_WBINVL1                         0x71
#define SQ_DS_ADD_U32                             0x0
#define SQ_DS_SUB_U32                             0x1
#define SQ_DS_RSUB_U32                            0x2
#define SQ_DS_INC_U32                             0x3
#define SQ_DS_DEC_U32                             0x4
#define SQ_DS_MIN_I32                             0x5
#define SQ_DS_MAX_I32                             0x6
#define SQ_DS_MIN_U32                             0x7
#define SQ_DS_MAX_U32                             0x8
#define SQ_DS_AND_B32                             0x9
#define SQ_DS_OR_B32                              0xa
#define SQ_DS_XOR_B32                             0xb
#define SQ_DS_MSKOR_B32                           0xc
#define SQ_DS_WRITE_B32                           0xd
#define SQ_DS_WRITE2_B32                          0xe
#define SQ_DS_WRITE2ST64_B32                      0xf
#define SQ_DS_CMPST_B32                           0x10
#define SQ_DS_CMPST_F32                           0x11
#define SQ_DS_MIN_F32                             0x12
#define SQ_DS_MAX_F32                             0x13
#define SQ_DS_NOP                                 0x14
#define SQ_DS_GWS_SEMA_RELEASE_ALL                0x18
#define SQ_DS_GWS_INIT                            0x19
#define SQ_DS_GWS_SEMA_V                          0x1a
#define SQ_DS_GWS_SEMA_BR                         0x1b
#define SQ_DS_GWS_SEMA_P                          0x1c
#define SQ_DS_GWS_BARRIER                         0x1d
#define SQ_DS_WRITE_B8                            0x1e
#define SQ_DS_WRITE_B16                           0x1f
#define SQ_DS_ADD_RTN_U32                         0x20
#define SQ_DS_SUB_RTN_U32                         0x21
#define SQ_DS_RSUB_RTN_U32                        0x22
#define SQ_DS_INC_RTN_U32                         0x23
#define SQ_DS_DEC_RTN_U32                         0x24
#define SQ_DS_MIN_RTN_I32                         0x25
#define SQ_DS_MAX_RTN_I32                         0x26
#define SQ_DS_MIN_RTN_U32                         0x27
#define SQ_DS_MAX_RTN_U32                         0x28
#define SQ_DS_AND_RTN_B32                         0x29
#define SQ_DS_OR_RTN_B32                          0x2a
#define SQ_DS_XOR_RTN_B32                         0x2b
#define SQ_DS_MSKOR_RTN_B32                       0x2c
#define SQ_DS_WRXCHG_RTN_B32                      0x2d
#define SQ_DS_WRXCHG2_RTN_B32                     0x2e
#define SQ_DS_WRXCHG2ST64_RTN_B32                 0x2f
#define SQ_DS_CMPST_RTN_B32                       0x30
#define SQ_DS_CMPST_RTN_F32                       0x31
#define SQ_DS_MIN_RTN_F32                         0x32
#define SQ_DS_MAX_RTN_F32                         0x33
#define SQ_DS_WRAP_RTN_B32                        0x34
#define SQ_DS_SWIZZLE_B32                         0x35
#define SQ_DS_READ_B32                            0x36
#define SQ_DS_READ2_B32                           0x37
#define SQ_DS_READ2ST64_B32                       0x38
#define SQ_DS_READ_I8                             0x39
#define SQ_DS_READ_U8                             0x3a
#define SQ_DS_READ_I16                            0x3b
#define SQ_DS_READ_U16                            0x3c
#define SQ_DS_CONSUME                             0x3d
#define SQ_DS_APPEND                              0x3e
#define SQ_DS_ORDERED_COUNT                       0x3f
#define SQ_DS_ADD_U64                             0x40
#define SQ_DS_SUB_U64                             0x41
#define SQ_DS_RSUB_U64                            0x42
#define SQ_DS_INC_U64                             0x43
#define SQ_DS_DEC_U64                             0x44
#define SQ_DS_MIN_I64                             0x45
#define SQ_DS_MAX_I64                             0x46
#define SQ_DS_MIN_U64                             0x47
#define SQ_DS_MAX_U64                             0x48
#define SQ_DS_AND_B64                             0x49
#define SQ_DS_OR_B64                              0x4a
#define SQ_DS_XOR_B64                             0x4b
#define SQ_DS_MSKOR_B64                           0x4c
#define SQ_DS_WRITE_B64                           0x4d
#define SQ_DS_WRITE2_B64                          0x4e
#define SQ_DS_WRITE2ST64_B64                      0x4f
#define SQ_DS_CMPST_B64                           0x50
#define SQ_DS_CMPST_F64                           0x51
#define SQ_DS_MIN_F64                             0x52
#define SQ_DS_MAX_F64                             0x53
#define SQ_DS_ADD_RTN_U64                         0x60
#define SQ_DS_SUB_RTN_U64                         0x61
#define SQ_DS_RSUB_RTN_U64                        0x62
#define SQ_DS_INC_RTN_U64                         0x63
#define SQ_DS_DEC_RTN_U64                         0x64
#define SQ_DS_MIN_RTN_I64                         0x65
#define SQ_DS_MAX_RTN_I64                         0x66
#define SQ_DS_MIN_RTN_U64                         0x67
#define SQ_DS_MAX_RTN_U64                         0x68
#define SQ_DS_AND_RTN_B64                         0x69
#define SQ_DS_OR_RTN_B64                          0x6a
#define SQ_DS_XOR_RTN_B64                         0x6b
#define SQ_DS_MSKOR_RTN_B64                       0x6c
#define SQ_DS_WRXCHG_RTN_B64                      0x6d
#define SQ_DS_WRXCHG2_RTN_B64                     0x6e
#define SQ_DS_WRXCHG2ST64_RTN_B64                 0x6f
#define SQ_DS_CMPST_RTN_B64                       0x70
#define SQ_DS_CMPST_RTN_F64                       0x71
#define SQ_DS_MIN_RTN_F64                         0x72
#define SQ_DS_MAX_RTN_F64                         0x73
#define SQ_DS_READ_B64                            0x76
#define SQ_DS_READ2_B64                           0x77
#define SQ_DS_READ2ST64_B64                       0x78
#define SQ_DS_CONDXCHG32_RTN_B64                  0x7e
#define SQ_DS_ADD_SRC2_U32                        0x80
#define SQ_DS_SUB_SRC2_U32                        0x81
#define SQ_DS_RSUB_SRC2_U32                       0x82
#define SQ_DS_INC_SRC2_U32                        0x83
#define SQ_DS_DEC_SRC2_U32                        0x84
#define SQ_DS_MIN_SRC2_I32                        0x85
#define SQ_DS_MAX_SRC2_I32                        0x86
#define SQ_DS_MIN_SRC2_U32                        0x87
#define SQ_DS_MAX_SRC2_U32                        0x88
#define SQ_DS_AND_SRC2_B32                        0x89
#define SQ_DS_OR_SRC2_B32                         0x8a
#define SQ_DS_XOR_SRC2_B32                        0x8b
#define SQ_DS_WRITE_SRC2_B32                      0x8d
#define SQ_DS_MIN_SRC2_F32                        0x92
#define SQ_DS_MAX_SRC2_F32                        0x93
#define SQ_DS_ADD_SRC2_U64                        0xc0
#define SQ_DS_SUB_SRC2_U64                        0xc1
#define SQ_DS_RSUB_SRC2_U64                       0xc2
#define SQ_DS_INC_SRC2_U64                        0xc3
#define SQ_DS_DEC_SRC2_U64                        0xc4
#define SQ_DS_MIN_SRC2_I64                        0xc5
#define SQ_DS_MAX_SRC2_I64                        0xc6
#define SQ_DS_MIN_SRC2_U64                        0xc7
#define SQ_DS_MAX_SRC2_U64                        0xc8
#define SQ_DS_AND_SRC2_B64                        0xc9
#define SQ_DS_OR_SRC2_B64                         0xca
#define SQ_DS_XOR_SRC2_B64                        0xcb
#define SQ_DS_WRITE_SRC2_B64                      0xcd
#define SQ_DS_MIN_SRC2_F64                        0xd2
#define SQ_DS_MAX_SRC2_F64                        0xd3
#define SQ_DS_WRITE_B96                           0xde
#define SQ_DS_WRITE_B128                          0xdf
#define SQ_DS_CONDXCHG32_RTN_B128                 0xfd
#define SQ_DS_READ_B96                            0xfe
#define SQ_DS_READ_B128                           0xff
#define SQ_SRC_SCC                                0xfd
#define SQ_OMOD_OFF                               0x0
#define SQ_OMOD_M2                                0x1
#define SQ_OMOD_M4                                0x2
#define SQ_OMOD_D2                                0x3
#define SQ_EXP_GDS0                               0x18
#define SQ_GS_OP_NOP                              0x0
#define SQ_GS_OP_CUT                              0x1
#define SQ_GS_OP_EMIT                             0x2
#define SQ_GS_OP_EMIT_CUT                         0x3
#define SQ_IMAGE_LOAD                             0x0
#define SQ_IMAGE_LOAD_MIP                         0x1
#define SQ_IMAGE_LOAD_PCK                         0x2
#define SQ_IMAGE_LOAD_PCK_SGN                     0x3
#define SQ_IMAGE_LOAD_MIP_PCK                     0x4
#define SQ_IMAGE_LOAD_MIP_PCK_SGN                 0x5
#define SQ_IMAGE_STORE                            0x8
#define SQ_IMAGE_STORE_MIP                        0x9
#define SQ_IMAGE_STORE_PCK                        0xa
#define SQ_IMAGE_STORE_MIP_PCK                    0xb
#define SQ_IMAGE_GET_RESINFO                      0xe
#define SQ_IMAGE_ATOMIC_SWAP                      0xf
#define SQ_IMAGE_ATOMIC_CMPSWAP                   0x10
#define SQ_IMAGE_ATOMIC_ADD                       0x11
#define SQ_IMAGE_ATOMIC_SUB                       0x12
#define SQ_IMAGE_ATOMIC_SMIN                      0x14
#define SQ_IMAGE_ATOMIC_UMIN                      0x15
#define SQ_IMAGE_ATOMIC_SMAX                      0x16
#define SQ_IMAGE_ATOMIC_UMAX                      0x17
#define SQ_IMAGE_ATOMIC_AND                       0x18
#define SQ_IMAGE_ATOMIC_OR                        0x19
#define SQ_IMAGE_ATOMIC_XOR                       0x1a
#define SQ_IMAGE_ATOMIC_INC                       0x1b
#define SQ_IMAGE_ATOMIC_DEC                       0x1c
#define SQ_IMAGE_ATOMIC_FCMPSWAP                  0x1d
#define SQ_IMAGE_ATOMIC_FMIN                      0x1e
#define SQ_IMAGE_ATOMIC_FMAX                      0x1f
#define SQ_IMAGE_SAMPLE                           0x20
#define SQ_IMAGE_SAMPLE_CL                        0x21
#define SQ_IMAGE_SAMPLE_D                         0x22
#define SQ_IMAGE_SAMPLE_D_CL                      0x23
#define SQ_IMAGE_SAMPLE_L                         0x24
#define SQ_IMAGE_SAMPLE_B                         0x25
#define SQ_IMAGE_SAMPLE_B_CL                      0x26
#define SQ_IMAGE_SAMPLE_LZ                        0x27
#define SQ_IMAGE_SAMPLE_C                         0x28
#define SQ_IMAGE_SAMPLE_C_CL                      0x29
#define SQ_IMAGE_SAMPLE_C_D                       0x2a
#define SQ_IMAGE_SAMPLE_C_D_CL                    0x2b
#define SQ_IMAGE_SAMPLE_C_L                       0x2c
#define SQ_IMAGE_SAMPLE_C_B                       0x2d
#define SQ_IMAGE_SAMPLE_C_B_CL                    0x2e
#define SQ_IMAGE_SAMPLE_C_LZ                      0x2f
#define SQ_IMAGE_SAMPLE_O                         0x30
#define SQ_IMAGE_SAMPLE_CL_O                      0x31
#define SQ_IMAGE_SAMPLE_D_O                       0x32
#define SQ_IMAGE_SAMPLE_D_CL_O                    0x33
#define SQ_IMAGE_SAMPLE_L_O                       0x34
#define SQ_IMAGE_SAMPLE_B_O                       0x35
#define SQ_IMAGE_SAMPLE_B_CL_O                    0x36
#define SQ_IMAGE_SAMPLE_LZ_O                      0x37
#define SQ_IMAGE_SAMPLE_C_O                       0x38
#define SQ_IMAGE_SAMPLE_C_CL_O                    0x39
#define SQ_IMAGE_SAMPLE_C_D_O                     0x3a
#define SQ_IMAGE_SAMPLE_C_D_CL_O                  0x3b
#define SQ_IMAGE_SAMPLE_C_L_O                     0x3c
#define SQ_IMAGE_SAMPLE_C_B_O                     0x3d
#define SQ_IMAGE_SAMPLE_C_B_CL_O                  0x3e
#define SQ_IMAGE_SAMPLE_C_LZ_O                    0x3f
#define SQ_IMAGE_GATHER4                          0x40
#define SQ_IMAGE_GATHER4_CL                       0x41
#define SQ_IMAGE_GATHER4_L                        0x44
#define SQ_IMAGE_GATHER4_B                        0x45
#define SQ_IMAGE_GATHER4_B_CL                     0x46
#define SQ_IMAGE_GATHER4_LZ                       0x47
#define SQ_IMAGE_GATHER4_C                        0x48
#define SQ_IMAGE_GATHER4_C_CL                     0x49
#define SQ_IMAGE_GATHER4_C_L                      0x4c
#define SQ_IMAGE_GATHER4_C_B                      0x4d
#define SQ_IMAGE_GATHER4_C_B_CL                   0x4e
#define SQ_IMAGE_GATHER4_C_LZ                     0x4f
#define SQ_IMAGE_GATHER4_O                        0x50
#define SQ_IMAGE_GATHER4_CL_O                     0x51
#define SQ_IMAGE_GATHER4_L_O                      0x54
#define SQ_IMAGE_GATHER4_B_O                      0x55
#define SQ_IMAGE_GATHER4_B_CL_O                   0x56
#define SQ_IMAGE_GATHER4_LZ_O                     0x57
#define SQ_IMAGE_GATHER4_C_O                      0x58
#define SQ_IMAGE_GATHER4_C_CL_O                   0x59
#define SQ_IMAGE_GATHER4_C_L_O                    0x5c
#define SQ_IMAGE_GATHER4_C_B_O                    0x5d
#define SQ_IMAGE_GATHER4_C_B_CL_O                 0x5e
#define SQ_IMAGE_GATHER4_C_LZ_O                   0x5f
#define SQ_IMAGE_GET_LOD                          0x60
#define SQ_IMAGE_SAMPLE_CD                        0x68
#define SQ_IMAGE_SAMPLE_CD_CL                     0x69
#define SQ_IMAGE_SAMPLE_C_CD                      0x6a
#define SQ_IMAGE_SAMPLE_C_CD_CL                   0x6b
#define SQ_IMAGE_SAMPLE_CD_O                      0x6c
#define SQ_IMAGE_SAMPLE_CD_CL_O                   0x6d
#define SQ_IMAGE_SAMPLE_C_CD_O                    0x6e
#define SQ_IMAGE_SAMPLE_C_CD_CL_O                 0x6f
#define SQ_IMAGE_RSRC256                          0x7e
#define SQ_IMAGE_SAMPLER                          0x7f
#define SQ_SRC_VCCZ                               0xfb
#define SQ_SRC_VGPR0                              0x100
#define SQ_DFMT_INVALID                           0x0
#define SQ_DFMT_8                                 0x1
#define SQ_DFMT_16                                0x2
#define SQ_DFMT_8_8                               0x3
#define SQ_DFMT_32                                0x4
#define SQ_DFMT_16_16                             0x5
#define SQ_DFMT_10_11_11                          0x6
#define SQ_DFMT_11_11_10                          0x7
#define SQ_DFMT_10_10_10_2                        0x8
#define SQ_DFMT_2_10_10_10                        0x9
#define SQ_DFMT_8_8_8_8                           0xa
#define SQ_DFMT_32_32                             0xb
#define SQ_DFMT_16_16_16_16                       0xc
#define SQ_DFMT_32_32_32                          0xd
#define SQ_DFMT_32_32_32_32                       0xe
#define SQ_TBUFFER_LOAD_FORMAT_X                  0x0
#define SQ_TBUFFER_LOAD_FORMAT_XY                 0x1
#define SQ_TBUFFER_LOAD_FORMAT_XYZ                0x2
#define SQ_TBUFFER_LOAD_FORMAT_XYZW               0x3
#define SQ_TBUFFER_STORE_FORMAT_X                 0x4
#define SQ_TBUFFER_STORE_FORMAT_XY                0x5
#define SQ_TBUFFER_STORE_FORMAT_XYZ               0x6
#define SQ_TBUFFER_STORE_FORMAT_XYZW              0x7
#define SQ_CHAN_X                                 0x0
#define SQ_CHAN_Y                                 0x1
#define SQ_CHAN_Z                                 0x2
#define SQ_CHAN_W                                 0x3
#define SQ_EXEC_LO                                0x7e
#define SQ_EXEC_HI                                0x7f
#define SQ_S_LOAD_DWORD                           0x0
#define SQ_S_LOAD_DWORDX2                         0x1
#define SQ_S_LOAD_DWORDX4                         0x2
#define SQ_S_LOAD_DWORDX8                         0x3
#define SQ_S_LOAD_DWORDX16                        0x4
#define SQ_S_BUFFER_LOAD_DWORD                    0x8
#define SQ_S_BUFFER_LOAD_DWORDX2                  0x9
#define SQ_S_BUFFER_LOAD_DWORDX4                  0xa
#define SQ_S_BUFFER_LOAD_DWORDX8                  0xb
#define SQ_S_BUFFER_LOAD_DWORDX16                 0xc
#define SQ_S_DCACHE_INV_VOL                       0x1d
#define SQ_S_MEMTIME                              0x1e
#define SQ_S_DCACHE_INV                           0x1f
#define SQ_V_NOP                                  0x0
#define SQ_V_MOV_B32                              0x1
#define SQ_V_READFIRSTLANE_B32                    0x2
#define SQ_V_CVT_I32_F64                          0x3
#define SQ_V_CVT_F64_I32                          0x4
#define SQ_V_CVT_F32_I32                          0x5
#define SQ_V_CVT_F32_U32                          0x6
#define SQ_V_CVT_U32_F32                          0x7
#define SQ_V_CVT_I32_F32                          0x8
#define SQ_V_MOV_FED_B32                          0x9
#define SQ_V_CVT_F16_F32                          0xa
#define SQ_V_CVT_F32_F16                          0xb
#define SQ_V_CVT_RPI_I32_F32                      0xc
#define SQ_V_CVT_FLR_I32_F32                      0xd
#define SQ_V_CVT_OFF_F32_I4                       0xe
#define SQ_V_CVT_F32_F64                          0xf
#define SQ_V_CVT_F64_F32                          0x10
#define SQ_V_CVT_F32_UBYTE0                       0x11
#define SQ_V_CVT_F32_UBYTE1                       0x12
#define SQ_V_CVT_F32_UBYTE2                       0x13
#define SQ_V_CVT_F32_UBYTE3                       0x14
#define SQ_V_CVT_U32_F64                          0x15
#define SQ_V_CVT_F64_U32                          0x16
#define SQ_V_TRUNC_F64                            0x17
#define SQ_V_CEIL_F64                             0x18
#define SQ_V_RNDNE_F64                            0x19
#define SQ_V_FLOOR_F64                            0x1a
#define SQ_V_FRACT_F32                            0x20
#define SQ_V_TRUNC_F32                            0x21
#define SQ_V_CEIL_F32                             0x22
#define SQ_V_RNDNE_F32                            0x23
#define SQ_V_FLOOR_F32                            0x24
#define SQ_V_EXP_F32                              0x25
#define SQ_V_LOG_CLAMP_F32                        0x26
#define SQ_V_LOG_F32                              0x27
#define SQ_V_RCP_CLAMP_F32                        0x28
#define SQ_V_RCP_LEGACY_F32                       0x29
#define SQ_V_RCP_F32                              0x2a
#define SQ_V_RCP_IFLAG_F32                        0x2b
#define SQ_V_RSQ_CLAMP_F32                        0x2c
#define SQ_V_RSQ_LEGACY_F32                       0x2d
#define SQ_V_RSQ_F32                              0x2e
#define SQ_V_RCP_F64                              0x2f
#define SQ_V_RCP_CLAMP_F64                        0x30
#define SQ_V_RSQ_F64                              0x31
#define SQ_V_RSQ_CLAMP_F64                        0x32
#define SQ_V_SQRT_F32                             0x33
#define SQ_V_SQRT_F64                             0x34
#define SQ_V_SIN_F32                              0x35
#define SQ_V_COS_F32                              0x36
#define SQ_V_NOT_B32                              0x37
#define SQ_V_BFREV_B32                            0x38
#define SQ_V_FFBH_U32                             0x39
#define SQ_V_FFBL_B32                             0x3a
#define SQ_V_FFBH_I32                             0x3b
#define SQ_V_FREXP_EXP_I32_F64                    0x3c
#define SQ_V_FREXP_MANT_F64                       0x3d
#define SQ_V_FRACT_F64                            0x3e
#define SQ_V_FREXP_EXP_I32_F32                    0x3f
#define SQ_V_FREXP_MANT_F32                       0x40
#define SQ_V_CLREXCP                              0x41
#define SQ_V_MOVRELD_B32                          0x42
#define SQ_V_MOVRELS_B32                          0x43
#define SQ_V_MOVRELSD_B32                         0x44
#define SQ_V_LOG_LEGACY_F32                       0x45
#define SQ_V_EXP_LEGACY_F32                       0x46
#define SQ_NFMT_UNORM                             0x0
#define SQ_NFMT_SNORM                             0x1
#define SQ_NFMT_USCALED                           0x2
#define SQ_NFMT_SSCALED                           0x3
#define SQ_NFMT_UINT                              0x4
#define SQ_NFMT_SINT                              0x5
#define SQ_NFMT_SNORM_OGL                         0x6
#define SQ_NFMT_FLOAT                             0x7
#define SQ_V_OP1_OFFSET                           0x180
#define SQ_V_OP2_OFFSET                           0x100
#define SQ_V_OPC_OFFSET                           0x0
#define SQ_V_INTERP_P1_F32                        0x0
#define SQ_V_INTERP_P2_F32                        0x1
#define SQ_V_INTERP_MOV_F32                       0x2
#define SQ_S_NOP                                  0x0
#define SQ_S_ENDPGM                               0x1
#define SQ_S_BRANCH                               0x2
#define SQ_S_CBRANCH_SCC0                         0x4
#define SQ_S_CBRANCH_SCC1                         0x5
#define SQ_S_CBRANCH_VCCZ                         0x6
#define SQ_S_CBRANCH_VCCNZ                        0x7
#define SQ_S_CBRANCH_EXECZ                        0x8
#define SQ_S_CBRANCH_EXECNZ                       0x9
#define SQ_S_BARRIER                              0xa
#define SQ_S_SETKILL                              0xb
#define SQ_S_WAITCNT                              0xc
#define SQ_S_SETHALT                              0xd
#define SQ_S_SLEEP                                0xe
#define SQ_S_SETPRIO                              0xf
#define SQ_S_SENDMSG                              0x10
#define SQ_S_SENDMSGHALT                          0x11
#define SQ_S_TRAP                                 0x12
#define SQ_S_ICACHE_INV                           0x13
#define SQ_S_INCPERFLEVEL                         0x14
#define SQ_S_DECPERFLEVEL                         0x15
#define SQ_S_TTRACEDATA                           0x16
#define SQ_S_CBRANCH_CDBGSYS                      0x17
#define SQ_S_CBRANCH_CDBGUSER                     0x18
#define SQ_S_CBRANCH_CDBGSYS_OR_USER              0x19
#define SQ_S_CBRANCH_CDBGSYS_AND_USER             0x1a
#define SQ_SRC_LITERAL                            0xff
#define SQ_VCC_LO                                 0x6a
#define SQ_VCC_HI                                 0x6b
#define SQ_PARAM_P10                              0x0
#define SQ_PARAM_P20                              0x1
#define SQ_PARAM_P0                               0x2
#define SQ_SRC_LDS_DIRECT                         0xfe
#define SQ_FLAT_SCRATCH_LO                        0x68
#define SQ_FLAT_SCRATCH_HI                        0x69
#define SQ_V_CNDMASK_B32                          0x0
#define SQ_V_READLANE_B32                         0x1
#define SQ_V_WRITELANE_B32                        0x2
#define SQ_V_ADD_F32                              0x3
#define SQ_V_SUB_F32                              0x4
#define SQ_V_SUBREV_F32                           0x5
#define SQ_V_MAC_LEGACY_F32                       0x6
#define SQ_V_MUL_LEGACY_F32                       0x7
#define SQ_V_MUL_F32                              0x8
#define SQ_V_MUL_I32_I24                          0x9
#define SQ_V_MUL_HI_I32_I24                       0xa
#define SQ_V_MUL_U32_U24                          0xb
#define SQ_V_MUL_HI_U32_U24                       0xc
#define SQ_V_MIN_LEGACY_F32                       0xd
#define SQ_V_MAX_LEGACY_F32                       0xe
#define SQ_V_MIN_F32                              0xf
#define SQ_V_MAX_F32                              0x10
#define SQ_V_MIN_I32                              0x11
#define SQ_V_MAX_I32                              0x12
#define SQ_V_MIN_U32                              0x13
#define SQ_V_MAX_U32                              0x14
#define SQ_V_LSHR_B32                             0x15
#define SQ_V_LSHRREV_B32                          0x16
#define SQ_V_ASHR_I32                             0x17
#define SQ_V_ASHRREV_I32                          0x18
#define SQ_V_LSHL_B32                             0x19
#define SQ_V_LSHLREV_B32                          0x1a
#define SQ_V_AND_B32                              0x1b
#define SQ_V_OR_B32                               0x1c
#define SQ_V_XOR_B32                              0x1d
#define SQ_V_BFM_B32                              0x1e
#define SQ_V_MAC_F32                              0x1f
#define SQ_V_MADMK_F32                            0x20
#define SQ_V_MADAK_F32                            0x21
#define SQ_V_BCNT_U32_B32                         0x22
#define SQ_V_MBCNT_LO_U32_B32                     0x23
#define SQ_V_MBCNT_HI_U32_B32                     0x24
#define SQ_V_ADD_I32                              0x25
#define SQ_V_SUB_I32                              0x26
#define SQ_V_SUBREV_I32                           0x27
#define SQ_V_ADDC_U32                             0x28
#define SQ_V_SUBB_U32                             0x29
#define SQ_V_SUBBREV_U32                          0x2a
#define SQ_V_LDEXP_F32                            0x2b
#define SQ_V_CVT_PKACCUM_U8_F32                   0x2c
#define SQ_V_CVT_PKNORM_I16_F32                   0x2d
#define SQ_V_CVT_PKNORM_U16_F32                   0x2e
#define SQ_V_CVT_PKRTZ_F16_F32                    0x2f
#define SQ_V_CVT_PK_U16_U32                       0x30
#define SQ_V_CVT_PK_I16_I32                       0x31
#define SQ_FLAT_LOAD_UBYTE                        0x8
#define SQ_FLAT_LOAD_SBYTE                        0x9
#define SQ_FLAT_LOAD_USHORT                       0xa
#define SQ_FLAT_LOAD_SSHORT                       0xb
#define SQ_FLAT_LOAD_DWORD                        0xc
#define SQ_FLAT_LOAD_DWORDX2                      0xd
#define SQ_FLAT_LOAD_DWORDX4                      0xe
#define SQ_FLAT_LOAD_DWORDX3                      0xf
#define SQ_FLAT_STORE_BYTE                        0x18
#define SQ_FLAT_STORE_SHORT                       0x1a
#define SQ_FLAT_STORE_DWORD                       0x1c
#define SQ_FLAT_STORE_DWORDX2                     0x1d
#define SQ_FLAT_STORE_DWORDX4                     0x1e
#define SQ_FLAT_STORE_DWORDX3                     0x1f
#define SQ_FLAT_ATOMIC_SWAP                       0x30
#define SQ_FLAT_ATOMIC_CMPSWAP                    0x31
#define SQ_FLAT_ATOMIC_ADD                        0x32
#define SQ_FLAT_ATOMIC_SUB                        0x33
#define SQ_FLAT_ATOMIC_SMIN                       0x35
#define SQ_FLAT_ATOMIC_UMIN                       0x36
#define SQ_FLAT_ATOMIC_SMAX                       0x37
#define SQ_FLAT_ATOMIC_UMAX                       0x38
#define SQ_FLAT_ATOMIC_AND                        0x39
#define SQ_FLAT_ATOMIC_OR                         0x3a
#define SQ_FLAT_ATOMIC_XOR                        0x3b
#define SQ_FLAT_ATOMIC_INC                        0x3c
#define SQ_FLAT_ATOMIC_DEC                        0x3d
#define SQ_FLAT_ATOMIC_FCMPSWAP                   0x3e
#define SQ_FLAT_ATOMIC_FMIN                       0x3f
#define SQ_FLAT_ATOMIC_FMAX                       0x40
#define SQ_FLAT_ATOMIC_SWAP_X2                    0x50
#define SQ_FLAT_ATOMIC_CMPSWAP_X2                 0x51
#define SQ_FLAT_ATOMIC_ADD_X2                     0x52
#define SQ_FLAT_ATOMIC_SUB_X2                     0x53
#define SQ_FLAT_ATOMIC_SMIN_X2                    0x55
#define SQ_FLAT_ATOMIC_UMIN_X2                    0x56
#define SQ_FLAT_ATOMIC_SMAX_X2                    0x57
#define SQ_FLAT_ATOMIC_UMAX_X2                    0x58
#define SQ_FLAT_ATOMIC_AND_X2                     0x59
#define SQ_FLAT_ATOMIC_OR_X2                      0x5a
#define SQ_FLAT_ATOMIC_XOR_X2                     0x5b
#define SQ_FLAT_ATOMIC_INC_X2                     0x5c
#define SQ_FLAT_ATOMIC_DEC_X2                     0x5d
#define SQ_FLAT_ATOMIC_FCMPSWAP_X2                0x5e
#define SQ_FLAT_ATOMIC_FMIN_X2                    0x5f
#define SQ_FLAT_ATOMIC_FMAX_X2                    0x60
#define SQ_S_CMP_EQ_I32                           0x0
#define SQ_S_CMP_LG_I32                           0x1
#define SQ_S_CMP_GT_I32                           0x2
#define SQ_S_CMP_GE_I32                           0x3
#define SQ_S_CMP_LT_I32                           0x4
#define SQ_S_CMP_LE_I32                           0x5
#define SQ_S_CMP_EQ_U32                           0x6
#define SQ_S_CMP_LG_U32                           0x7
#define SQ_S_CMP_GT_U32                           0x8
#define SQ_S_CMP_GE_U32                           0x9
#define SQ_S_CMP_LT_U32                           0xa
#define SQ_S_CMP_LE_U32                           0xb
#define SQ_S_BITCMP0_B32                          0xc
#define SQ_S_BITCMP1_B32                          0xd
#define SQ_S_BITCMP0_B64                          0xe
#define SQ_S_BITCMP1_B64                          0xf
#define SQ_S_SETVSKIP                             0x10
#define SQ_M0                                     0x7c
#define SQ_V_MAD_LEGACY_F32                       0x140
#define SQ_V_MAD_F32                              0x141
#define SQ_V_MAD_I32_I24                          0x142
#define SQ_V_MAD_U32_U24                          0x143
#define SQ_V_CUBEID_F32                           0x144
#define SQ_V_CUBESC_F32                           0x145
#define SQ_V_CUBETC_F32                           0x146
#define SQ_V_CUBEMA_F32                           0x147
#define SQ_V_BFE_U32                              0x148
#define SQ_V_BFE_I32                              0x149
#define SQ_V_BFI_B32                              0x14a
#define SQ_V_FMA_F32                              0x14b
#define SQ_V_FMA_F64                              0x14c
#define SQ_V_LERP_U8                              0x14d
#define SQ_V_ALIGNBIT_B32                         0x14e
#define SQ_V_ALIGNBYTE_B32                        0x14f
#define SQ_V_MULLIT_F32                           0x150
#define SQ_V_MIN3_F32                             0x151
#define SQ_V_MIN3_I32                             0x152
#define SQ_V_MIN3_U32                             0x153
#define SQ_V_MAX3_F32                             0x154
#define SQ_V_MAX3_I32                             0x155
#define SQ_V_MAX3_U32                             0x156
#define SQ_V_MED3_F32                             0x157
#define SQ_V_MED3_I32                             0x158
#define SQ_V_MED3_U32                             0x159
#define SQ_V_SAD_U8                               0x15a
#define SQ_V_SAD_HI_U8                            0x15b
#define SQ_V_SAD_U16                              0x15c
#define SQ_V_SAD_U32                              0x15d
#define SQ_V_CVT_PK_U8_F32                        0x15e
#define SQ_V_DIV_FIXUP_F32                        0x15f
#define SQ_V_DIV_FIXUP_F64                        0x160
#define SQ_V_LSHL_B64                             0x161
#define SQ_V_LSHR_B64                             0x162
#define SQ_V_ASHR_I64                             0x163
#define SQ_V_ADD_F64                              0x164
#define SQ_V_MUL_F64                              0x165
#define SQ_V_MIN_F64                              0x166
#define SQ_V_MAX_F64                              0x167
#define SQ_V_LDEXP_F64                            0x168
#define SQ_V_MUL_LO_U32                           0x169
#define SQ_V_MUL_HI_U32                           0x16a
#define SQ_V_MUL_LO_I32                           0x16b
#define SQ_V_MUL_HI_I32                           0x16c
#define SQ_V_DIV_SCALE_F32                        0x16d
#define SQ_V_DIV_SCALE_F64                        0x16e
#define SQ_V_DIV_FMAS_F32                         0x16f
#define SQ_V_DIV_FMAS_F64                         0x170
#define SQ_V_MSAD_U8                              0x171
#define SQ_V_QSAD_PK_U16_U8                       0x172
#define SQ_V_MQSAD_PK_U16_U8                      0x173
#define SQ_V_TRIG_PREOP_F64                       0x174
#define SQ_V_MQSAD_U32_U8                         0x175
#define SQ_V_MAD_U64_U32                          0x176
#define SQ_V_MAD_I64_I32                          0x177
#define SQ_VCC_ALL                                0x0
#define SQ_SRC_EXECZ                              0xfc
#define SQ_SYSMSG_OP_ECC_ERR_INTERRUPT            0x1
#define SQ_SYSMSG_OP_REG_RD                       0x2
#define SQ_SYSMSG_OP_HOST_TRAP_ACK                0x3
#define SQ_SYSMSG_OP_TTRACE_PC                    0x4
#define SQ_HW_REG_MODE                            0x1
#define SQ_HW_REG_STATUS                          0x2
#define SQ_HW_REG_TRAPSTS                         0x3
#define SQ_HW_REG_HW_ID                           0x4
#define SQ_HW_REG_GPR_ALLOC                       0x5
#define SQ_HW_REG_LDS_ALLOC                       0x6
#define SQ_HW_REG_IB_STS                          0x7
#define SQ_HW_REG_PC_LO                           0x8
#define SQ_HW_REG_PC_HI                           0x9
#define SQ_HW_REG_INST_DW0                        0xa
#define SQ_HW_REG_INST_DW1                        0xb
#define SQ_HW_REG_IB_DBG0                         0xc
#define SQ_S_ADD_U32                              0x0
#define SQ_S_SUB_U32                              0x1
#define SQ_S_ADD_I32                              0x2
#define SQ_S_SUB_I32                              0x3
#define SQ_S_ADDC_U32                             0x4
#define SQ_S_SUBB_U32                             0x5
#define SQ_S_MIN_I32                              0x6
#define SQ_S_MIN_U32                              0x7
#define SQ_S_MAX_I32                              0x8
#define SQ_S_MAX_U32                              0x9
#define SQ_S_CSELECT_B32                          0xa
#define SQ_S_CSELECT_B64                          0xb
#define SQ_S_AND_B32                              0xe
#define SQ_S_AND_B64                              0xf
#define SQ_S_OR_B32                               0x10
#define SQ_S_OR_B64                               0x11
#define SQ_S_XOR_B32                              0x12
#define SQ_S_XOR_B64                              0x13
#define SQ_S_ANDN2_B32                            0x14
#define SQ_S_ANDN2_B64                            0x15
#define SQ_S_ORN2_B32                             0x16
#define SQ_S_ORN2_B64                             0x17
#define SQ_S_NAND_B32                             0x18
#define SQ_S_NAND_B64                             0x19
#define SQ_S_NOR_B32                              0x1a
#define SQ_S_NOR_B64                              0x1b
#define SQ_S_XNOR_B32                             0x1c
#define SQ_S_XNOR_B64                             0x1d
#define SQ_S_LSHL_B32                             0x1e
#define SQ_S_LSHL_B64                             0x1f
#define SQ_S_LSHR_B32                             0x20
#define SQ_S_LSHR_B64                             0x21
#define SQ_S_ASHR_I32                             0x22
#define SQ_S_ASHR_I64                             0x23
#define SQ_S_BFM_B32                              0x24
#define SQ_S_BFM_B64                              0x25
#define SQ_S_MUL_I32                              0x26
#define SQ_S_BFE_U32                              0x27
#define SQ_S_BFE_I32                              0x28
#define SQ_S_BFE_U64                              0x29
#define SQ_S_BFE_I64                              0x2a
#define SQ_S_CBRANCH_G_FORK                       0x2b
#define SQ_S_ABSDIFF_I32                          0x2c
#define SQ_MSG_INTERRUPT                          0x1
#define SQ_MSG_GS                                 0x2
#define SQ_MSG_GS_DONE                            0x3
#define SQ_MSG_SYSMSG                             0xf
typedef enum TEX_BORDER_COLOR_TYPE {
	TEX_BorderColor_TransparentBlack                 = 0x0,
	TEX_BorderColor_OpaqueBlack                      = 0x1,
	TEX_BorderColor_OpaqueWhite                      = 0x2,
	TEX_BorderColor_Register                         = 0x3,
} TEX_BORDER_COLOR_TYPE;
typedef enum TEX_CHROMA_KEY {
	TEX_ChromaKey_Disabled                           = 0x0,
	TEX_ChromaKey_Kill                               = 0x1,
	TEX_ChromaKey_Blend                              = 0x2,
	TEX_ChromaKey_RESERVED_3                         = 0x3,
} TEX_CHROMA_KEY;
typedef enum TEX_CLAMP {
	TEX_Clamp_Repeat                                 = 0x0,
	TEX_Clamp_Mirror                                 = 0x1,
	TEX_Clamp_ClampToLast                            = 0x2,
	TEX_Clamp_MirrorOnceToLast                       = 0x3,
	TEX_Clamp_ClampHalfToBorder                      = 0x4,
	TEX_Clamp_MirrorOnceHalfToBorder                 = 0x5,
	TEX_Clamp_ClampToBorder                          = 0x6,
	TEX_Clamp_MirrorOnceToBorder                     = 0x7,
} TEX_CLAMP;
typedef enum TEX_COORD_TYPE {
	TEX_CoordType_Unnormalized                       = 0x0,
	TEX_CoordType_Normalized                         = 0x1,
} TEX_COORD_TYPE;
typedef enum TEX_DEPTH_COMPARE_FUNCTION {
	TEX_DepthCompareFunction_Never                   = 0x0,
	TEX_DepthCompareFunction_Less                    = 0x1,
	TEX_DepthCompareFunction_Equal                   = 0x2,
	TEX_DepthCompareFunction_LessEqual               = 0x3,
	TEX_DepthCompareFunction_Greater                 = 0x4,
	TEX_DepthCompareFunction_NotEqual                = 0x5,
	TEX_DepthCompareFunction_GreaterEqual            = 0x6,
	TEX_DepthCompareFunction_Always                  = 0x7,
} TEX_DEPTH_COMPARE_FUNCTION;
typedef enum TEX_DIM {
	TEX_Dim_1D                                       = 0x0,
	TEX_Dim_2D                                       = 0x1,
	TEX_Dim_3D                                       = 0x2,
	TEX_Dim_CubeMap                                  = 0x3,
	TEX_Dim_1DArray                                  = 0x4,
	TEX_Dim_2DArray                                  = 0x5,
	TEX_Dim_2D_MSAA                                  = 0x6,
	TEX_Dim_2DArray_MSAA                             = 0x7,
} TEX_DIM;
typedef enum TEX_FORMAT_COMP {
	TEX_FormatComp_Unsigned                          = 0x0,
	TEX_FormatComp_Signed                            = 0x1,
	TEX_FormatComp_UnsignedBiased                    = 0x2,
	TEX_FormatComp_RESERVED_3                        = 0x3,
} TEX_FORMAT_COMP;
typedef enum TEX_MAX_ANISO_RATIO {
	TEX_MaxAnisoRatio_1to1                           = 0x0,
	TEX_MaxAnisoRatio_2to1                           = 0x1,
	TEX_MaxAnisoRatio_4to1                           = 0x2,
	TEX_MaxAnisoRatio_8to1                           = 0x3,
	TEX_MaxAnisoRatio_16to1                          = 0x4,
	TEX_MaxAnisoRatio_RESERVED_5                     = 0x5,
	TEX_MaxAnisoRatio_RESERVED_6                     = 0x6,
	TEX_MaxAnisoRatio_RESERVED_7                     = 0x7,
} TEX_MAX_ANISO_RATIO;
typedef enum TEX_MIP_FILTER {
	TEX_MipFilter_None                               = 0x0,
	TEX_MipFilter_Point                              = 0x1,
	TEX_MipFilter_Linear                             = 0x2,
	TEX_MipFilter_RESERVED_3                         = 0x3,
} TEX_MIP_FILTER;
typedef enum TEX_REQUEST_SIZE {
	TEX_RequestSize_32B                              = 0x0,
	TEX_RequestSize_64B                              = 0x1,
	TEX_RequestSize_128B                             = 0x2,
	TEX_RequestSize_2X64B                            = 0x3,
} TEX_REQUEST_SIZE;
typedef enum TEX_SAMPLER_TYPE {
	TEX_SamplerType_Invalid                          = 0x0,
	TEX_SamplerType_Valid                            = 0x1,
} TEX_SAMPLER_TYPE;
typedef enum TEX_XY_FILTER {
	TEX_XYFilter_Point                               = 0x0,
	TEX_XYFilter_Linear                              = 0x1,
	TEX_XYFilter_AnisoPoint                          = 0x2,
	TEX_XYFilter_AnisoLinear                         = 0x3,
} TEX_XY_FILTER;
typedef enum TEX_Z_FILTER {
	TEX_ZFilter_None                                 = 0x0,
	TEX_ZFilter_Point                                = 0x1,
	TEX_ZFilter_Linear                               = 0x2,
	TEX_ZFilter_RESERVED_3                           = 0x3,
} TEX_Z_FILTER;
typedef enum VTX_CLAMP {
	VTX_Clamp_ClampToZero                            = 0x0,
	VTX_Clamp_ClampToNAN                             = 0x1,
} VTX_CLAMP;
typedef enum VTX_FETCH_TYPE {
	VTX_FetchType_VertexData                         = 0x0,
	VTX_FetchType_InstanceData                       = 0x1,
	VTX_FetchType_NoIndexOffset                      = 0x2,
	VTX_FetchType_RESERVED_3                         = 0x3,
} VTX_FETCH_TYPE;
typedef enum VTX_FORMAT_COMP_ALL {
	VTX_FormatCompAll_Unsigned                       = 0x0,
	VTX_FormatCompAll_Signed                         = 0x1,
} VTX_FORMAT_COMP_ALL;
typedef enum VTX_MEM_REQUEST_SIZE {
	VTX_MemRequestSize_32B                           = 0x0,
	VTX_MemRequestSize_64B                           = 0x1,
} VTX_MEM_REQUEST_SIZE;
typedef enum TVX_DATA_FORMAT {
	TVX_FMT_INVALID                                  = 0x0,
	TVX_FMT_8                                        = 0x1,
	TVX_FMT_4_4                                      = 0x2,
	TVX_FMT_3_3_2                                    = 0x3,
	TVX_FMT_RESERVED_4                               = 0x4,
	TVX_FMT_16                                       = 0x5,
	TVX_FMT_16_FLOAT                                 = 0x6,
	TVX_FMT_8_8                                      = 0x7,
	TVX_FMT_5_6_5                                    = 0x8,
	TVX_FMT_6_5_5                                    = 0x9,
	TVX_FMT_1_5_5_5                                  = 0xa,
	TVX_FMT_4_4_4_4                                  = 0xb,
	TVX_FMT_5_5_5_1                                  = 0xc,
	TVX_FMT_32                                       = 0xd,
	TVX_FMT_32_FLOAT                                 = 0xe,
	TVX_FMT_16_16                                    = 0xf,
	TVX_FMT_16_16_FLOAT                              = 0x10,
	TVX_FMT_8_24                                     = 0x11,
	TVX_FMT_8_24_FLOAT                               = 0x12,
	TVX_FMT_24_8                                     = 0x13,
	TVX_FMT_24_8_FLOAT                               = 0x14,
	TVX_FMT_10_11_11                                 = 0x15,
	TVX_FMT_10_11_11_FLOAT                           = 0x16,
	TVX_FMT_11_11_10                                 = 0x17,
	TVX_FMT_11_11_10_FLOAT                           = 0x18,
	TVX_FMT_2_10_10_10                               = 0x19,
	TVX_FMT_8_8_8_8                                  = 0x1a,
	TVX_FMT_10_10_10_2                               = 0x1b,
	TVX_FMT_X24_8_32_FLOAT                           = 0x1c,
	TVX_FMT_32_32                                    = 0x1d,
	TVX_FMT_32_32_FLOAT                              = 0x1e,
	TVX_FMT_16_16_16_16                              = 0x1f,
	TVX_FMT_16_16_16_16_FLOAT                        = 0x20,
	TVX_FMT_RESERVED_33                              = 0x21,
	TVX_FMT_32_32_32_32                              = 0x22,
	TVX_FMT_32_32_32_32_FLOAT                        = 0x23,
	TVX_FMT_RESERVED_36                              = 0x24,
	TVX_FMT_1                                        = 0x25,
	TVX_FMT_1_REVERSED                               = 0x26,
	TVX_FMT_GB_GR                                    = 0x27,
	TVX_FMT_BG_RG                                    = 0x28,
	TVX_FMT_32_AS_8                                  = 0x29,
	TVX_FMT_32_AS_8_8                                = 0x2a,
	TVX_FMT_5_9_9_9_SHAREDEXP                        = 0x2b,
	TVX_FMT_8_8_8                                    = 0x2c,
	TVX_FMT_16_16_16                                 = 0x2d,
	TVX_FMT_16_16_16_FLOAT                           = 0x2e,
	TVX_FMT_32_32_32                                 = 0x2f,
	TVX_FMT_32_32_32_FLOAT                           = 0x30,
	TVX_FMT_BC1                                      = 0x31,
	TVX_FMT_BC2                                      = 0x32,
	TVX_FMT_BC3                                      = 0x33,
	TVX_FMT_BC4                                      = 0x34,
	TVX_FMT_BC5                                      = 0x35,
	TVX_FMT_APC0                                     = 0x36,
	TVX_FMT_APC1                                     = 0x37,
	TVX_FMT_APC2                                     = 0x38,
	TVX_FMT_APC3                                     = 0x39,
	TVX_FMT_APC4                                     = 0x3a,
	TVX_FMT_APC5                                     = 0x3b,
	TVX_FMT_APC6                                     = 0x3c,
	TVX_FMT_APC7                                     = 0x3d,
	TVX_FMT_CTX1                                     = 0x3e,
	TVX_FMT_RESERVED_63                              = 0x3f,
} TVX_DATA_FORMAT;
typedef enum TVX_DST_SEL {
	TVX_DstSel_X                                     = 0x0,
	TVX_DstSel_Y                                     = 0x1,
	TVX_DstSel_Z                                     = 0x2,
	TVX_DstSel_W                                     = 0x3,
	TVX_DstSel_0f                                    = 0x4,
	TVX_DstSel_1f                                    = 0x5,
	TVX_DstSel_RESERVED_6                            = 0x6,
	TVX_DstSel_Mask                                  = 0x7,
} TVX_DST_SEL;
typedef enum TVX_ENDIAN_SWAP {
	TVX_EndianSwap_None                              = 0x0,
	TVX_EndianSwap_8in16                             = 0x1,
	TVX_EndianSwap_8in32                             = 0x2,
	TVX_EndianSwap_8in64                             = 0x3,
} TVX_ENDIAN_SWAP;
typedef enum TVX_INST {
	TVX_Inst_NormalVertexFetch                       = 0x0,
	TVX_Inst_SemanticVertexFetch                     = 0x1,
	TVX_Inst_RESERVED_2                              = 0x2,
	TVX_Inst_LD                                      = 0x3,
	TVX_Inst_GetTextureResInfo                       = 0x4,
	TVX_Inst_GetNumberOfSamples                      = 0x5,
	TVX_Inst_GetLOD                                  = 0x6,
	TVX_Inst_GetGradientsH                           = 0x7,
	TVX_Inst_GetGradientsV                           = 0x8,
	TVX_Inst_SetTextureOffsets                       = 0x9,
	TVX_Inst_KeepGradients                           = 0xa,
	TVX_Inst_SetGradientsH                           = 0xb,
	TVX_Inst_SetGradientsV                           = 0xc,
	TVX_Inst_Pass                                    = 0xd,
	TVX_Inst_GetBufferResInfo                        = 0xe,
	TVX_Inst_RESERVED_15                             = 0xf,
	TVX_Inst_Sample                                  = 0x10,
	TVX_Inst_Sample_L                                = 0x11,
	TVX_Inst_Sample_LB                               = 0x12,
	TVX_Inst_Sample_LZ                               = 0x13,
	TVX_Inst_Sample_G                                = 0x14,
	TVX_Inst_Gather4                                 = 0x15,
	TVX_Inst_Sample_G_LB                             = 0x16,
	TVX_Inst_Gather4_O                               = 0x17,
	TVX_Inst_Sample_C                                = 0x18,
	TVX_Inst_Sample_C_L                              = 0x19,
	TVX_Inst_Sample_C_LB                             = 0x1a,
	TVX_Inst_Sample_C_LZ                             = 0x1b,
	TVX_Inst_Sample_C_G                              = 0x1c,
	TVX_Inst_Gather4_C                               = 0x1d,
	TVX_Inst_Sample_C_G_LB                           = 0x1e,
	TVX_Inst_Gather4_C_O                             = 0x1f,
} TVX_INST;
typedef enum TVX_NUM_FORMAT_ALL {
	TVX_NumFormatAll_Norm                            = 0x0,
	TVX_NumFormatAll_Int                             = 0x1,
	TVX_NumFormatAll_Scaled                          = 0x2,
	TVX_NumFormatAll_RESERVED_3                      = 0x3,
} TVX_NUM_FORMAT_ALL;
typedef enum TVX_SRC_SEL {
	TVX_SrcSel_X                                     = 0x0,
	TVX_SrcSel_Y                                     = 0x1,
	TVX_SrcSel_Z                                     = 0x2,
	TVX_SrcSel_W                                     = 0x3,
	TVX_SrcSel_0f                                    = 0x4,
	TVX_SrcSel_1f                                    = 0x5,
} TVX_SRC_SEL;
typedef enum TVX_SRF_MODE_ALL {
	TVX_SRFModeAll_ZCMO                              = 0x0,
	TVX_SRFModeAll_NZ                                = 0x1,
} TVX_SRF_MODE_ALL;
typedef enum TVX_TYPE {
	TVX_Type_InvalidTextureResource                  = 0x0,
	TVX_Type_InvalidVertexBuffer                     = 0x1,
	TVX_Type_ValidTextureResource                    = 0x2,
	TVX_Type_ValidVertexBuffer                       = 0x3,
} TVX_TYPE;
typedef enum TC_OP_MASKS {
	TC_OP_MASK_FLUSH_DENROM                          = 0x8,
	TC_OP_MASK_64                                    = 0x20,
	TC_OP_MASK_NO_RTN                                = 0x40,
} TC_OP_MASKS;
typedef enum TC_OP {
	TC_OP_READ                                       = 0x0,
	TC_OP_ATOMIC_FCMPSWAP_RTN_32                     = 0x1,
	TC_OP_ATOMIC_FMIN_RTN_32                         = 0x2,
	TC_OP_ATOMIC_FMAX_RTN_32                         = 0x3,
	TC_OP_RESERVED_FOP_RTN_32_0                      = 0x4,
	TC_OP_RESERVED_FOP_RTN_32_1                      = 0x5,
	TC_OP_RESERVED_FOP_RTN_32_2                      = 0x6,
	TC_OP_ATOMIC_SWAP_RTN_32                         = 0x7,
	TC_OP_ATOMIC_CMPSWAP_RTN_32                      = 0x8,
	TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_32        = 0x9,
	TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_32            = 0xa,
	TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_32            = 0xb,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_0         = 0xc,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_1         = 0xd,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_2         = 0xe,
	TC_OP_ATOMIC_ADD_RTN_32                          = 0xf,
	TC_OP_ATOMIC_SUB_RTN_32                          = 0x10,
	TC_OP_ATOMIC_SMIN_RTN_32                         = 0x11,
	TC_OP_ATOMIC_UMIN_RTN_32                         = 0x12,
	TC_OP_ATOMIC_SMAX_RTN_32                         = 0x13,
	TC_OP_ATOMIC_UMAX_RTN_32                         = 0x14,
	TC_OP_ATOMIC_AND_RTN_32                          = 0x15,
	TC_OP_ATOMIC_OR_RTN_32                           = 0x16,
	TC_OP_ATOMIC_XOR_RTN_32                          = 0x17,
	TC_OP_ATOMIC_INC_RTN_32                          = 0x18,
	TC_OP_ATOMIC_DEC_RTN_32                          = 0x19,
	TC_OP_WBINVL1_VOL                                = 0x1a,
	TC_OP_RESERVED_NON_FLOAT_RTN_32_0                = 0x1b,
	TC_OP_RESERVED_NON_FLOAT_RTN_32_1                = 0x1c,
	TC_OP_RESERVED_NON_FLOAT_RTN_32_2                = 0x1d,
	TC_OP_RESERVED_NON_FLOAT_RTN_32_3                = 0x1e,
	TC_OP_RESERVED_NON_FLOAT_RTN_32_4                = 0x1f,
	TC_OP_WRITE                                      = 0x20,
	TC_OP_ATOMIC_FCMPSWAP_RTN_64                     = 0x21,
	TC_OP_ATOMIC_FMIN_RTN_64                         = 0x22,
	TC_OP_ATOMIC_FMAX_RTN_64                         = 0x23,
	TC_OP_RESERVED_FOP_RTN_64_0                      = 0x24,
	TC_OP_RESERVED_FOP_RTN_64_1                      = 0x25,
	TC_OP_RESERVED_FOP_RTN_64_2                      = 0x26,
	TC_OP_ATOMIC_SWAP_RTN_64                         = 0x27,
	TC_OP_ATOMIC_CMPSWAP_RTN_64                      = 0x28,
	TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_64        = 0x29,
	TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_64            = 0x2a,
	TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_64            = 0x2b,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_0         = 0x2c,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_1         = 0x2d,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_2         = 0x2e,
	TC_OP_ATOMIC_ADD_RTN_64                          = 0x2f,
	TC_OP_ATOMIC_SUB_RTN_64                          = 0x30,
	TC_OP_ATOMIC_SMIN_RTN_64                         = 0x31,
	TC_OP_ATOMIC_UMIN_RTN_64                         = 0x32,
	TC_OP_ATOMIC_SMAX_RTN_64                         = 0x33,
	TC_OP_ATOMIC_UMAX_RTN_64                         = 0x34,
	TC_OP_ATOMIC_AND_RTN_64                          = 0x35,
	TC_OP_ATOMIC_OR_RTN_64                           = 0x36,
	TC_OP_ATOMIC_XOR_RTN_64                          = 0x37,
	TC_OP_ATOMIC_INC_RTN_64                          = 0x38,
	TC_OP_ATOMIC_DEC_RTN_64                          = 0x39,
	TC_OP_WBL2_VOL                                   = 0x3a,
	TC_OP_RESERVED_NON_FLOAT_RTN_64_0                = 0x3b,
	TC_OP_RESERVED_NON_FLOAT_RTN_64_1                = 0x3c,
	TC_OP_RESERVED_NON_FLOAT_RTN_64_2                = 0x3d,
	TC_OP_RESERVED_NON_FLOAT_RTN_64_3                = 0x3e,
	TC_OP_RESERVED_NON_FLOAT_RTN_64_4                = 0x3f,
	TC_OP_WBINVL1                                    = 0x40,
	TC_OP_ATOMIC_FCMPSWAP_32                         = 0x41,
	TC_OP_ATOMIC_FMIN_32                             = 0x42,
	TC_OP_ATOMIC_FMAX_32                             = 0x43,
	TC_OP_RESERVED_FOP_32_0                          = 0x44,
	TC_OP_RESERVED_FOP_32_1                          = 0x45,
	TC_OP_RESERVED_FOP_32_2                          = 0x46,
	TC_OP_ATOMIC_SWAP_32                             = 0x47,
	TC_OP_ATOMIC_CMPSWAP_32                          = 0x48,
	TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_32            = 0x49,
	TC_OP_ATOMIC_FMIN_FLUSH_DENORM_32                = 0x4a,
	TC_OP_ATOMIC_FMAX_FLUSH_DENORM_32                = 0x4b,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_32_0             = 0x4c,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_32_1             = 0x4d,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_32_2             = 0x4e,
	TC_OP_ATOMIC_ADD_32                              = 0x4f,
	TC_OP_ATOMIC_SUB_32                              = 0x50,
	TC_OP_ATOMIC_SMIN_32                             = 0x51,
	TC_OP_ATOMIC_UMIN_32                             = 0x52,
	TC_OP_ATOMIC_SMAX_32                             = 0x53,
	TC_OP_ATOMIC_UMAX_32                             = 0x54,
	TC_OP_ATOMIC_AND_32                              = 0x55,
	TC_OP_ATOMIC_OR_32                               = 0x56,
	TC_OP_ATOMIC_XOR_32                              = 0x57,
	TC_OP_ATOMIC_INC_32                              = 0x58,
	TC_OP_ATOMIC_DEC_32                              = 0x59,
	TC_OP_INVL2_VOL                                  = 0x5a,
	TC_OP_RESERVED_NON_FLOAT_32_0                    = 0x5b,
	TC_OP_RESERVED_NON_FLOAT_32_1                    = 0x5c,
	TC_OP_RESERVED_NON_FLOAT_32_2                    = 0x5d,
	TC_OP_RESERVED_NON_FLOAT_32_3                    = 0x5e,
	TC_OP_RESERVED_NON_FLOAT_32_4                    = 0x5f,
	TC_OP_WBINVL2                                    = 0x60,
	TC_OP_ATOMIC_FCMPSWAP_64                         = 0x61,
	TC_OP_ATOMIC_FMIN_64                             = 0x62,
	TC_OP_ATOMIC_FMAX_64                             = 0x63,
	TC_OP_RESERVED_FOP_64_0                          = 0x64,
	TC_OP_RESERVED_FOP_64_1                          = 0x65,
	TC_OP_RESERVED_FOP_64_2                          = 0x66,
	TC_OP_ATOMIC_SWAP_64                             = 0x67,
	TC_OP_ATOMIC_CMPSWAP_64                          = 0x68,
	TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_64            = 0x69,
	TC_OP_ATOMIC_FMIN_FLUSH_DENORM_64                = 0x6a,
	TC_OP_ATOMIC_FMAX_FLUSH_DENORM_64                = 0x6b,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_64_0             = 0x6c,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_64_1             = 0x6d,
	TC_OP_RESERVED_FOP_FLUSH_DENORM_64_2             = 0x6e,
	TC_OP_ATOMIC_ADD_64                              = 0x6f,
	TC_OP_ATOMIC_SUB_64                              = 0x70,
	TC_OP_ATOMIC_SMIN_64                             = 0x71,
	TC_OP_ATOMIC_UMIN_64                             = 0x72,
	TC_OP_ATOMIC_SMAX_64                             = 0x73,
	TC_OP_ATOMIC_UMAX_64                             = 0x74,
	TC_OP_ATOMIC_AND_64                              = 0x75,
	TC_OP_ATOMIC_OR_64                               = 0x76,
	TC_OP_ATOMIC_XOR_64                              = 0x77,
	TC_OP_ATOMIC_INC_64                              = 0x78,
	TC_OP_ATOMIC_DEC_64                              = 0x79,
	TC_OP_INVL1L2_VOL                                = 0x7a,
	TC_OP_RESERVED_NON_FLOAT_64_0                    = 0x7b,
	TC_OP_RESERVED_NON_FLOAT_64_1                    = 0x7c,
	TC_OP_RESERVED_NON_FLOAT_64_2                    = 0x7d,
	TC_OP_RESERVED_NON_FLOAT_64_3                    = 0x7e,
	TC_OP_RESERVED_NON_FLOAT_64_4                    = 0x7f,
} TC_OP;
typedef enum TC_CHUB_REQ_CREDITS_ENUM {
	TC_CHUB_REQ_CREDITS                              = 0x10,
} TC_CHUB_REQ_CREDITS_ENUM;
typedef enum CHUB_TC_RET_CREDITS_ENUM {
	CHUB_TC_RET_CREDITS                              = 0x20,
} CHUB_TC_RET_CREDITS_ENUM;
typedef enum TC_NACKS {
	TC_NACK_NO_FAULT                                 = 0x0,
	TC_NACK_PAGE_FAULT                               = 0x1,
	TC_NACK_PROTECTION_FAULT                         = 0x2,
	TC_NACK_DATA_ERROR                               = 0x3,
} TC_NACKS;
typedef enum TCC_PERF_SEL {
	TCC_PERF_SEL_NONE                                = 0x0,
	TCC_PERF_SEL_CYCLE                               = 0x1,
	TCC_PERF_SEL_BUSY                                = 0x2,
	TCC_PERF_SEL_REQ                                 = 0x3,
	TCC_PERF_SEL_STREAMING_REQ                       = 0x4,
	TCC_PERF_SEL_READ                                = 0x5,
	TCC_PERF_SEL_WRITE                               = 0x6,
	TCC_PERF_SEL_ATOMIC                              = 0x7,
	TCC_PERF_SEL_WBINVL2                             = 0x8,
	TCC_PERF_SEL_WBINVL2_CYCLE                       = 0x9,
	TCC_PERF_SEL_HIT                                 = 0xa,
	TCC_PERF_SEL_MISS                                = 0xb,
	TCC_PERF_SEL_DEWRITE_ALLOCATE_HIT                = 0xc,
	TCC_PERF_SEL_FULLY_WRITTEN_HIT                   = 0xd,
	TCC_PERF_SEL_WRITEBACK                           = 0xe,
	TCC_PERF_SEL_LATENCY_FIFO_FULL                   = 0xf,
	TCC_PERF_SEL_SRC_FIFO_FULL                       = 0x10,
	TCC_PERF_SEL_HOLE_FIFO_FULL                      = 0x11,
	TCC_PERF_SEL_MC_WRREQ                            = 0x12,
	TCC_PERF_SEL_MC_WRREQ_STALL                      = 0x13,
	TCC_PERF_SEL_MC_WRREQ_CREDIT_STALL               = 0x14,
	TCC_PERF_SEL_MC_WRREQ_MC_HALT_STALL              = 0x15,
	TCC_PERF_SEL_TOO_MANY_MC_WRREQS_STALL            = 0x16,
	TCC_PERF_SEL_MC_WRREQ_LEVEL                      = 0x17,
	TCC_PERF_SEL_MC_RDREQ                            = 0x18,
	TCC_PERF_SEL_MC_RDREQ_CREDIT_STALL               = 0x19,
	TCC_PERF_SEL_MC_RDREQ_MC_HALT_STALL              = 0x1a,
	TCC_PERF_SEL_MC_RDREQ_LEVEL                      = 0x1b,
	TCC_PERF_SEL_TAG_STALL                           = 0x1c,
	TCC_PERF_SEL_TAG_WRITEBACK_FIFO_FULL             = 0x1d,
	TCC_PERF_SEL_TAG_MISS_NOTHING_REPLACEABLE_STALL  = 0x1e,
	TCC_PERF_SEL_READ_RETURN_TIMEOUT                 = 0x1f,
	TCC_PERF_SEL_WRITEBACK_READ_TIMEOUT              = 0x20,
	TCC_PERF_SEL_READ_RETURN_FULL_BUBBLE             = 0x21,
	TCC_PERF_SEL_BUBBLE                              = 0x22,
	TCC_PERF_SEL_RETURN_ACK                          = 0x23,
	TCC_PERF_SEL_RETURN_DATA                         = 0x24,
	TCC_PERF_SEL_RETURN_HOLE                         = 0x25,
	TCC_PERF_SEL_RETURN_ACK_HOLE                     = 0x26,
	TCC_PERF_SEL_IB_STALL                            = 0x27,
	TCC_PERF_SEL_TCA_LEVEL                           = 0x28,
	TCC_PERF_SEL_HOLE_LEVEL                          = 0x29,
	TCC_PERF_SEL_MC_RDRET_NACK                       = 0x2a,
	TCC_PERF_SEL_MC_WRRET_NACK                       = 0x2b,
	TCC_PERF_SEL_EXE_REQ                             = 0x2c,
	TCC_PERF_SEL_CLIENT0_REQ                         = 0x40,
	TCC_PERF_SEL_CLIENT1_REQ                         = 0x41,
	TCC_PERF_SEL_CLIENT2_REQ                         = 0x42,
	TCC_PERF_SEL_CLIENT3_REQ                         = 0x43,
	TCC_PERF_SEL_CLIENT4_REQ                         = 0x44,
	TCC_PERF_SEL_CLIENT5_REQ                         = 0x45,
	TCC_PERF_SEL_CLIENT6_REQ                         = 0x46,
	TCC_PERF_SEL_CLIENT7_REQ                         = 0x47,
	TCC_PERF_SEL_CLIENT8_REQ                         = 0x48,
	TCC_PERF_SEL_CLIENT9_REQ                         = 0x49,
	TCC_PERF_SEL_CLIENT10_REQ                        = 0x4a,
	TCC_PERF_SEL_CLIENT11_REQ                        = 0x4b,
	TCC_PERF_SEL_CLIENT12_REQ                        = 0x4c,
	TCC_PERF_SEL_CLIENT13_REQ                        = 0x4d,
	TCC_PERF_SEL_CLIENT14_REQ                        = 0x4e,
	TCC_PERF_SEL_CLIENT15_REQ                        = 0x4f,
	TCC_PERF_SEL_CLIENT16_REQ                        = 0x50,
	TCC_PERF_SEL_CLIENT17_REQ                        = 0x51,
	TCC_PERF_SEL_CLIENT18_REQ                        = 0x52,
	TCC_PERF_SEL_CLIENT19_REQ                        = 0x53,
	TCC_PERF_SEL_CLIENT20_REQ                        = 0x54,
	TCC_PERF_SEL_CLIENT21_REQ                        = 0x55,
	TCC_PERF_SEL_CLIENT22_REQ                        = 0x56,
	TCC_PERF_SEL_CLIENT23_REQ                        = 0x57,
	TCC_PERF_SEL_CLIENT24_REQ                        = 0x58,
	TCC_PERF_SEL_CLIENT25_REQ                        = 0x59,
	TCC_PERF_SEL_CLIENT26_REQ                        = 0x5a,
	TCC_PERF_SEL_CLIENT27_REQ                        = 0x5b,
	TCC_PERF_SEL_CLIENT28_REQ                        = 0x5c,
	TCC_PERF_SEL_CLIENT29_REQ                        = 0x5d,
	TCC_PERF_SEL_CLIENT30_REQ                        = 0x5e,
	TCC_PERF_SEL_CLIENT31_REQ                        = 0x5f,
	TCC_PERF_SEL_CLIENT32_REQ                        = 0x60,
	TCC_PERF_SEL_CLIENT33_REQ                        = 0x61,
	TCC_PERF_SEL_CLIENT34_REQ                        = 0x62,
	TCC_PERF_SEL_CLIENT35_REQ                        = 0x63,
	TCC_PERF_SEL_CLIENT36_REQ                        = 0x64,
	TCC_PERF_SEL_CLIENT37_REQ                        = 0x65,
	TCC_PERF_SEL_CLIENT38_REQ                        = 0x66,
	TCC_PERF_SEL_CLIENT39_REQ                        = 0x67,
	TCC_PERF_SEL_CLIENT40_REQ                        = 0x68,
	TCC_PERF_SEL_CLIENT41_REQ                        = 0x69,
	TCC_PERF_SEL_CLIENT42_REQ                        = 0x6a,
	TCC_PERF_SEL_CLIENT43_REQ                        = 0x6b,
	TCC_PERF_SEL_CLIENT44_REQ                        = 0x6c,
	TCC_PERF_SEL_CLIENT45_REQ                        = 0x6d,
	TCC_PERF_SEL_CLIENT46_REQ                        = 0x6e,
	TCC_PERF_SEL_CLIENT47_REQ                        = 0x6f,
	TCC_PERF_SEL_CLIENT48_REQ                        = 0x70,
	TCC_PERF_SEL_CLIENT49_REQ                        = 0x71,
	TCC_PERF_SEL_CLIENT50_REQ                        = 0x72,
	TCC_PERF_SEL_CLIENT51_REQ                        = 0x73,
	TCC_PERF_SEL_CLIENT52_REQ                        = 0x74,
	TCC_PERF_SEL_CLIENT53_REQ                        = 0x75,
	TCC_PERF_SEL_CLIENT54_REQ                        = 0x76,
	TCC_PERF_SEL_CLIENT55_REQ                        = 0x77,
	TCC_PERF_SEL_CLIENT56_REQ                        = 0x78,
	TCC_PERF_SEL_CLIENT57_REQ                        = 0x79,
	TCC_PERF_SEL_CLIENT58_REQ                        = 0x7a,
	TCC_PERF_SEL_CLIENT59_REQ                        = 0x7b,
	TCC_PERF_SEL_CLIENT60_REQ                        = 0x7c,
	TCC_PERF_SEL_CLIENT61_REQ                        = 0x7d,
	TCC_PERF_SEL_CLIENT62_REQ                        = 0x7e,
	TCC_PERF_SEL_CLIENT63_REQ                        = 0x7f,
	TCC_PERF_SEL_NORMAL_WRITEBACK                    = 0x80,
	TCC_PERF_SEL_TC_OP_WBL2_VOL_WRITEBACK            = 0x81,
	TCC_PERF_SEL_TC_OP_WBINVL2_WRITEBACK             = 0x82,
	TCC_PERF_SEL_ALL_TC_OP_WB_WRITEBACK              = 0x83,
	TCC_PERF_SEL_NORMAL_EVICT                        = 0x84,
	TCC_PERF_SEL_TC_OP_INVL2_VOL_EVICT               = 0x85,
	TCC_PERF_SEL_TC_OP_INVL1L2_VOL_EVICT             = 0x86,
	TCC_PERF_SEL_TC_OP_WBL2_VOL_EVICT                = 0x87,
	TCC_PERF_SEL_TC_OP_WBINVL2_EVICT                 = 0x88,
	TCC_PERF_SEL_ALL_TC_OP_INV_EVICT                 = 0x89,
	TCC_PERF_SEL_ALL_TC_OP_INV_VOL_EVICT             = 0x8a,
	TCC_PERF_SEL_TC_OP_WBL2_VOL_CYCLE                = 0x8b,
	TCC_PERF_SEL_TC_OP_INVL2_VOL_CYCLE               = 0x8c,
	TCC_PERF_SEL_TC_OP_INVL1L2_VOL_CYCLE             = 0x8d,
	TCC_PERF_SEL_TC_OP_WBINVL2_CYCLE                 = 0x8e,
	TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_CYCLE           = 0x8f,
	TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_VOL_CYCLE       = 0x90,
	TCC_PERF_SEL_TC_OP_WBL2_VOL_START                = 0x91,
	TCC_PERF_SEL_TC_OP_INVL2_VOL_START               = 0x92,
	TCC_PERF_SEL_TC_OP_INVL1L2_VOL_START             = 0x93,
	TCC_PERF_SEL_TC_OP_WBINVL2_START                 = 0x94,
	TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_START           = 0x95,
	TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_VOL_START       = 0x96,
	TCC_PERF_SEL_TC_OP_WBL2_VOL_FINISH               = 0x97,
	TCC_PERF_SEL_TC_OP_INVL2_VOL_FINISH              = 0x98,
	TCC_PERF_SEL_TC_OP_INVL1L2_VOL_FINISH            = 0x99,
	TCC_PERF_SEL_TC_OP_WBINVL2_FINISH                = 0x9a,
	TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_FINISH          = 0x9b,
	TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_VOL_FINISH      = 0x9c,
	TCC_PERF_SEL_VOL_MC_WRREQ                        = 0x9d,
	TCC_PERF_SEL_VOL_MC_RDREQ                        = 0x9e,
	TCC_PERF_SEL_VOL_REQ                             = 0x9f,
} TCC_PERF_SEL;
typedef enum TCA_PERF_SEL {
	TCA_PERF_SEL_NONE                                = 0x0,
	TCA_PERF_SEL_CYCLE                               = 0x1,
	TCA_PERF_SEL_BUSY                                = 0x2,
	TCA_PERF_SEL_FORCED_HOLE_TCC0                    = 0x3,
	TCA_PERF_SEL_FORCED_HOLE_TCC1                    = 0x4,
	TCA_PERF_SEL_FORCED_HOLE_TCC2                    = 0x5,
	TCA_PERF_SEL_FORCED_HOLE_TCC3                    = 0x6,
	TCA_PERF_SEL_FORCED_HOLE_TCC4                    = 0x7,
	TCA_PERF_SEL_FORCED_HOLE_TCC5                    = 0x8,
	TCA_PERF_SEL_FORCED_HOLE_TCC6                    = 0x9,
	TCA_PERF_SEL_FORCED_HOLE_TCC7                    = 0xa,
	TCA_PERF_SEL_REQ_TCC0                            = 0xb,
	TCA_PERF_SEL_REQ_TCC1                            = 0xc,
	TCA_PERF_SEL_REQ_TCC2                            = 0xd,
	TCA_PERF_SEL_REQ_TCC3                            = 0xe,
	TCA_PERF_SEL_REQ_TCC4                            = 0xf,
	TCA_PERF_SEL_REQ_TCC5                            = 0x10,
	TCA_PERF_SEL_REQ_TCC6                            = 0x11,
	TCA_PERF_SEL_REQ_TCC7                            = 0x12,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC0            = 0x13,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC1            = 0x14,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC2            = 0x15,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC3            = 0x16,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC4            = 0x17,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC5            = 0x18,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC6            = 0x19,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC7            = 0x1a,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC0                 = 0x1b,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC1                 = 0x1c,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC2                 = 0x1d,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC3                 = 0x1e,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC4                 = 0x1f,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC5                 = 0x20,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC6                 = 0x21,
	TCA_PERF_SEL_CROSSBAR_STALL_TCC7                 = 0x22,
	TCA_PERF_SEL_FORCED_HOLE_TCS                     = 0x23,
	TCA_PERF_SEL_REQ_TCS                             = 0x24,
	TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCS             = 0x25,
	TCA_PERF_SEL_CROSSBAR_STALL_TCS                  = 0x26,
} TCA_PERF_SEL;
typedef enum TCS_PERF_SEL {
	TCS_PERF_SEL_NONE                                = 0x0,
	TCS_PERF_SEL_CYCLE                               = 0x1,
	TCS_PERF_SEL_BUSY                                = 0x2,
	TCS_PERF_SEL_REQ                                 = 0x3,
	TCS_PERF_SEL_READ                                = 0x4,
	TCS_PERF_SEL_WRITE                               = 0x5,
	TCS_PERF_SEL_ATOMIC                              = 0x6,
	TCS_PERF_SEL_HOLE_FIFO_FULL                      = 0x7,
	TCS_PERF_SEL_REQ_FIFO_FULL                       = 0x8,
	TCS_PERF_SEL_REQ_CREDIT_STALL                    = 0x9,
	TCS_PERF_SEL_REQ_NO_SRC_DATA_STALL               = 0xa,
	TCS_PERF_SEL_REQ_STALL                           = 0xb,
	TCS_PERF_SEL_TCS_CHUB_REQ_SEND                   = 0xc,
	TCS_PERF_SEL_CHUB_TCS_RET_SEND                   = 0xd,
	TCS_PERF_SEL_RETURN_ACK                          = 0xe,
	TCS_PERF_SEL_RETURN_DATA                         = 0xf,
	TCS_PERF_SEL_IB_TOTAL_REQUESTS_STALL             = 0x10,
	TCS_PERF_SEL_IB_STALL                            = 0x11,
	TCS_PERF_SEL_TCA_LEVEL                           = 0x12,
	TCS_PERF_SEL_HOLE_LEVEL                          = 0x13,
	TCS_PERF_SEL_CHUB_LEVEL                          = 0x14,
	TCS_PERF_SEL_CLIENT0_REQ                         = 0x40,
	TCS_PERF_SEL_CLIENT1_REQ                         = 0x41,
	TCS_PERF_SEL_CLIENT2_REQ                         = 0x42,
	TCS_PERF_SEL_CLIENT3_REQ                         = 0x43,
	TCS_PERF_SEL_CLIENT4_REQ                         = 0x44,
	TCS_PERF_SEL_CLIENT5_REQ                         = 0x45,
	TCS_PERF_SEL_CLIENT6_REQ                         = 0x46,
	TCS_PERF_SEL_CLIENT7_REQ                         = 0x47,
	TCS_PERF_SEL_CLIENT8_REQ                         = 0x48,
	TCS_PERF_SEL_CLIENT9_REQ                         = 0x49,
	TCS_PERF_SEL_CLIENT10_REQ                        = 0x4a,
	TCS_PERF_SEL_CLIENT11_REQ                        = 0x4b,
	TCS_PERF_SEL_CLIENT12_REQ                        = 0x4c,
	TCS_PERF_SEL_CLIENT13_REQ                        = 0x4d,
	TCS_PERF_SEL_CLIENT14_REQ                        = 0x4e,
	TCS_PERF_SEL_CLIENT15_REQ                        = 0x4f,
	TCS_PERF_SEL_CLIENT16_REQ                        = 0x50,
	TCS_PERF_SEL_CLIENT17_REQ                        = 0x51,
	TCS_PERF_SEL_CLIENT18_REQ                        = 0x52,
	TCS_PERF_SEL_CLIENT19_REQ                        = 0x53,
	TCS_PERF_SEL_CLIENT20_REQ                        = 0x54,
	TCS_PERF_SEL_CLIENT21_REQ                        = 0x55,
	TCS_PERF_SEL_CLIENT22_REQ                        = 0x56,
	TCS_PERF_SEL_CLIENT23_REQ                        = 0x57,
	TCS_PERF_SEL_CLIENT24_REQ                        = 0x58,
	TCS_PERF_SEL_CLIENT25_REQ                        = 0x59,
	TCS_PERF_SEL_CLIENT26_REQ                        = 0x5a,
	TCS_PERF_SEL_CLIENT27_REQ                        = 0x5b,
	TCS_PERF_SEL_CLIENT28_REQ                        = 0x5c,
	TCS_PERF_SEL_CLIENT29_REQ                        = 0x5d,
	TCS_PERF_SEL_CLIENT30_REQ                        = 0x5e,
	TCS_PERF_SEL_CLIENT31_REQ                        = 0x5f,
	TCS_PERF_SEL_CLIENT32_REQ                        = 0x60,
	TCS_PERF_SEL_CLIENT33_REQ                        = 0x61,
	TCS_PERF_SEL_CLIENT34_REQ                        = 0x62,
	TCS_PERF_SEL_CLIENT35_REQ                        = 0x63,
	TCS_PERF_SEL_CLIENT36_REQ                        = 0x64,
	TCS_PERF_SEL_CLIENT37_REQ                        = 0x65,
	TCS_PERF_SEL_CLIENT38_REQ                        = 0x66,
	TCS_PERF_SEL_CLIENT39_REQ                        = 0x67,
	TCS_PERF_SEL_CLIENT40_REQ                        = 0x68,
	TCS_PERF_SEL_CLIENT41_REQ                        = 0x69,
	TCS_PERF_SEL_CLIENT42_REQ                        = 0x6a,
	TCS_PERF_SEL_CLIENT43_REQ                        = 0x6b,
	TCS_PERF_SEL_CLIENT44_REQ                        = 0x6c,
	TCS_PERF_SEL_CLIENT45_REQ                        = 0x6d,
	TCS_PERF_SEL_CLIENT46_REQ                        = 0x6e,
	TCS_PERF_SEL_CLIENT47_REQ                        = 0x6f,
	TCS_PERF_SEL_CLIENT48_REQ                        = 0x70,
	TCS_PERF_SEL_CLIENT49_REQ                        = 0x71,
	TCS_PERF_SEL_CLIENT50_REQ                        = 0x72,
	TCS_PERF_SEL_CLIENT51_REQ                        = 0x73,
	TCS_PERF_SEL_CLIENT52_REQ                        = 0x74,
	TCS_PERF_SEL_CLIENT53_REQ                        = 0x75,
	TCS_PERF_SEL_CLIENT54_REQ                        = 0x76,
	TCS_PERF_SEL_CLIENT55_REQ                        = 0x77,
	TCS_PERF_SEL_CLIENT56_REQ                        = 0x78,
	TCS_PERF_SEL_CLIENT57_REQ                        = 0x79,
	TCS_PERF_SEL_CLIENT58_REQ                        = 0x7a,
	TCS_PERF_SEL_CLIENT59_REQ                        = 0x7b,
	TCS_PERF_SEL_CLIENT60_REQ                        = 0x7c,
	TCS_PERF_SEL_CLIENT61_REQ                        = 0x7d,
	TCS_PERF_SEL_CLIENT62_REQ                        = 0x7e,
	TCS_PERF_SEL_CLIENT63_REQ                        = 0x7f,
} TCS_PERF_SEL;
typedef enum TA_TC_ADDR_MODES {
	TA_TC_ADDR_MODE_DEFAULT                          = 0x0,
	TA_TC_ADDR_MODE_COMP0                            = 0x1,
	TA_TC_ADDR_MODE_COMP1                            = 0x2,
	TA_TC_ADDR_MODE_COMP2                            = 0x3,
	TA_TC_ADDR_MODE_COMP3                            = 0x4,
	TA_TC_ADDR_MODE_UNALIGNED                        = 0x5,
	TA_TC_ADDR_MODE_BORDER_COLOR                     = 0x6,
} TA_TC_ADDR_MODES;
typedef enum TA_PERFCOUNT_SEL {
	TA_PERF_SEL_ta_busy                              = 0x0,
	TA_PERF_SEL_sh_fifo_busy                         = 0x1,
	TA_PERF_SEL_sh_fifo_cmd_busy                     = 0x2,
	TA_PERF_SEL_sh_fifo_addr_busy                    = 0x3,
	TA_PERF_SEL_sh_fifo_data_busy                    = 0x4,
	TA_PERF_SEL_sh_fifo_data_sfifo_busy              = 0x5,
	TA_PERF_SEL_sh_fifo_data_tfifo_busy              = 0x6,
	TA_PERF_SEL_gradient_busy                        = 0x7,
	TA_PERF_SEL_gradient_fifo_busy                   = 0x8,
	TA_PERF_SEL_lod_busy                             = 0x9,
	TA_PERF_SEL_lod_fifo_busy                        = 0xa,
	TA_PERF_SEL_addresser_busy                       = 0xb,
	TA_PERF_SEL_addresser_fifo_busy                  = 0xc,
	TA_PERF_SEL_aligner_busy                         = 0xd,
	TA_PERF_SEL_write_path_busy                      = 0xe,
	TA_PERF_SEL_RESERVED_15                          = 0xf,
	TA_PERF_SEL_sq_ta_cmd_cycles                     = 0x10,
	TA_PERF_SEL_sp_ta_addr_cycles                    = 0x11,
	TA_PERF_SEL_sp_ta_data_cycles                    = 0x12,
	TA_PERF_SEL_ta_fa_data_state_cycles              = 0x13,
	TA_PERF_SEL_sh_fifo_addr_waiting_on_cmd_cycles   = 0x14,
	TA_PERF_SEL_sh_fifo_cmd_waiting_on_addr_cycles   = 0x15,
	TA_PERF_SEL_sh_fifo_addr_starved_while_busy_cycles= 0x16,
	TA_PERF_SEL_sh_fifo_cmd_starved_while_busy_cycles= 0x17,
	TA_PERF_SEL_sh_fifo_data_waiting_on_data_state_cycles= 0x18,
	TA_PERF_SEL_sh_fifo_data_state_waiting_on_data_cycles= 0x19,
	TA_PERF_SEL_sh_fifo_data_starved_while_busy_cycles= 0x1a,
	TA_PERF_SEL_sh_fifo_data_state_starved_while_busy_cycles= 0x1b,
	TA_PERF_SEL_RESERVED_28                          = 0x1c,
	TA_PERF_SEL_RESERVED_29                          = 0x1d,
	TA_PERF_SEL_sh_fifo_addr_cycles                  = 0x1e,
	TA_PERF_SEL_sh_fifo_data_cycles                  = 0x1f,
	TA_PERF_SEL_total_wavefronts                     = 0x20,
	TA_PERF_SEL_gradient_cycles                      = 0x21,
	TA_PERF_SEL_walker_cycles                        = 0x22,
	TA_PERF_SEL_aligner_cycles                       = 0x23,
	TA_PERF_SEL_image_wavefronts                     = 0x24,
	TA_PERF_SEL_image_read_wavefronts                = 0x25,
	TA_PERF_SEL_image_write_wavefronts               = 0x26,
	TA_PERF_SEL_image_atomic_wavefronts              = 0x27,
	TA_PERF_SEL_image_total_cycles                   = 0x28,
	TA_PERF_SEL_RESERVED_41                          = 0x29,
	TA_PERF_SEL_RESERVED_42                          = 0x2a,
	TA_PERF_SEL_RESERVED_43                          = 0x2b,
	TA_PERF_SEL_buffer_wavefronts                    = 0x2c,
	TA_PERF_SEL_buffer_read_wavefronts               = 0x2d,
	TA_PERF_SEL_buffer_write_wavefronts              = 0x2e,
	TA_PERF_SEL_buffer_atomic_wavefronts             = 0x2f,
	TA_PERF_SEL_buffer_coalescable_wavefronts        = 0x30,
	TA_PERF_SEL_buffer_total_cycles                  = 0x31,
	TA_PERF_SEL_buffer_coalescable_addr_multicycled_cycles= 0x32,
	TA_PERF_SEL_buffer_coalescable_clamp_16kdword_multicycled_cycles= 0x33,
	TA_PERF_SEL_buffer_coalesced_read_cycles         = 0x34,
	TA_PERF_SEL_buffer_coalesced_write_cycles        = 0x35,
	TA_PERF_SEL_addr_stalled_by_tc_cycles            = 0x36,
	TA_PERF_SEL_addr_stalled_by_td_cycles            = 0x37,
	TA_PERF_SEL_data_stalled_by_tc_cycles            = 0x38,
	TA_PERF_SEL_addresser_stalled_by_aligner_only_cycles= 0x39,
	TA_PERF_SEL_addresser_stalled_cycles             = 0x3a,
	TA_PERF_SEL_aniso_stalled_by_addresser_only_cycles= 0x3b,
	TA_PERF_SEL_aniso_stalled_cycles                 = 0x3c,
	TA_PERF_SEL_deriv_stalled_by_aniso_only_cycles   = 0x3d,
	TA_PERF_SEL_deriv_stalled_cycles                 = 0x3e,
	TA_PERF_SEL_aniso_gt1_cycle_quads                = 0x3f,
	TA_PERF_SEL_color_1_cycle_pixels                 = 0x40,
	TA_PERF_SEL_color_2_cycle_pixels                 = 0x41,
	TA_PERF_SEL_color_3_cycle_pixels                 = 0x42,
	TA_PERF_SEL_color_4_cycle_pixels                 = 0x43,
	TA_PERF_SEL_mip_1_cycle_pixels                   = 0x44,
	TA_PERF_SEL_mip_2_cycle_pixels                   = 0x45,
	TA_PERF_SEL_vol_1_cycle_pixels                   = 0x46,
	TA_PERF_SEL_vol_2_cycle_pixels                   = 0x47,
	TA_PERF_SEL_bilin_point_1_cycle_pixels           = 0x48,
	TA_PERF_SEL_mipmap_lod_0_samples                 = 0x49,
	TA_PERF_SEL_mipmap_lod_1_samples                 = 0x4a,
	TA_PERF_SEL_mipmap_lod_2_samples                 = 0x4b,
	TA_PERF_SEL_mipmap_lod_3_samples                 = 0x4c,
	TA_PERF_SEL_mipmap_lod_4_samples                 = 0x4d,
	TA_PERF_SEL_mipmap_lod_5_samples                 = 0x4e,
	TA_PERF_SEL_mipmap_lod_6_samples                 = 0x4f,
	TA_PERF_SEL_mipmap_lod_7_samples                 = 0x50,
	TA_PERF_SEL_mipmap_lod_8_samples                 = 0x51,
	TA_PERF_SEL_mipmap_lod_9_samples                 = 0x52,
	TA_PERF_SEL_mipmap_lod_10_samples                = 0x53,
	TA_PERF_SEL_mipmap_lod_11_samples                = 0x54,
	TA_PERF_SEL_mipmap_lod_12_samples                = 0x55,
	TA_PERF_SEL_mipmap_lod_13_samples                = 0x56,
	TA_PERF_SEL_mipmap_lod_14_samples                = 0x57,
	TA_PERF_SEL_mipmap_invalid_samples               = 0x58,
	TA_PERF_SEL_aniso_1_cycle_quads                  = 0x59,
	TA_PERF_SEL_aniso_2_cycle_quads                  = 0x5a,
	TA_PERF_SEL_aniso_4_cycle_quads                  = 0x5b,
	TA_PERF_SEL_aniso_6_cycle_quads                  = 0x5c,
	TA_PERF_SEL_aniso_8_cycle_quads                  = 0x5d,
	TA_PERF_SEL_aniso_10_cycle_quads                 = 0x5e,
	TA_PERF_SEL_aniso_12_cycle_quads                 = 0x5f,
	TA_PERF_SEL_aniso_14_cycle_quads                 = 0x60,
	TA_PERF_SEL_aniso_16_cycle_quads                 = 0x61,
	TA_PERF_SEL_write_path_input_cycles              = 0x62,
	TA_PERF_SEL_write_path_output_cycles             = 0x63,
	TA_PERF_SEL_flat_wavefronts                      = 0x64,
	TA_PERF_SEL_flat_read_wavefronts                 = 0x65,
	TA_PERF_SEL_flat_write_wavefronts                = 0x66,
	TA_PERF_SEL_flat_atomic_wavefronts               = 0x67,
	TA_PERF_SEL_flat_coalesceable_wavefronts         = 0x68,
	TA_PERF_SEL_reg_sclk_vld                         = 0x69,
	TA_PERF_SEL_local_cg_dyn_sclk_grp0_en            = 0x6a,
	TA_PERF_SEL_local_cg_dyn_sclk_grp1_en            = 0x6b,
	TA_PERF_SEL_local_cg_dyn_sclk_grp1_mems_en       = 0x6c,
	TA_PERF_SEL_local_cg_dyn_sclk_grp4_en            = 0x6d,
	TA_PERF_SEL_local_cg_dyn_sclk_grp5_en            = 0x6e,
} TA_PERFCOUNT_SEL;
typedef enum TD_PERFCOUNT_SEL {
	TD_PERF_SEL_td_busy                              = 0x0,
	TD_PERF_SEL_input_busy                           = 0x1,
	TD_PERF_SEL_output_busy                          = 0x2,
	TD_PERF_SEL_lerp_busy                            = 0x3,
	TD_PERF_SEL_RESERVED_4                           = 0x4,
	TD_PERF_SEL_reg_sclk_vld                         = 0x5,
	TD_PERF_SEL_local_cg_dyn_sclk_grp0_en            = 0x6,
	TD_PERF_SEL_local_cg_dyn_sclk_grp1_en            = 0x7,
	TD_PERF_SEL_local_cg_dyn_sclk_grp4_en            = 0x8,
	TD_PERF_SEL_local_cg_dyn_sclk_grp5_en            = 0x9,
	TD_PERF_SEL_tc_td_fifo_full                      = 0xa,
	TD_PERF_SEL_constant_state_full                  = 0xb,
	TD_PERF_SEL_sample_state_full                    = 0xc,
	TD_PERF_SEL_output_fifo_full                     = 0xd,
	TD_PERF_SEL_RESERVED_14                          = 0xe,
	TD_PERF_SEL_tc_stall                             = 0xf,
	TD_PERF_SEL_pc_stall                             = 0x10,
	TD_PERF_SEL_gds_stall                            = 0x11,
	TD_PERF_SEL_RESERVED_18                          = 0x12,
	TD_PERF_SEL_RESERVED_19                          = 0x13,
	TD_PERF_SEL_gather4_wavefront                    = 0x14,
	TD_PERF_SEL_sample_c_wavefront                   = 0x15,
	TD_PERF_SEL_load_wavefront                       = 0x16,
	TD_PERF_SEL_atomic_wavefront                     = 0x17,
	TD_PERF_SEL_store_wavefront                      = 0x18,
	TD_PERF_SEL_ldfptr_wavefront                     = 0x19,
	TD_PERF_SEL_RESERVED_26                          = 0x1a,
	TD_PERF_SEL_RESERVED_27                          = 0x1b,
	TD_PERF_SEL_RESERVED_28                          = 0x1c,
	TD_PERF_SEL_RESERVED_29                          = 0x1d,
	TD_PERF_SEL_bypass_filter_wavefront              = 0x1e,
	TD_PERF_SEL_min_max_filter_wavefront             = 0x1f,
	TD_PERF_SEL_coalescable_wavefront                = 0x20,
	TD_PERF_SEL_coalesced_phase                      = 0x21,
	TD_PERF_SEL_four_phase_wavefront                 = 0x22,
	TD_PERF_SEL_eight_phase_wavefront                = 0x23,
	TD_PERF_SEL_sixteen_phase_wavefront              = 0x24,
	TD_PERF_SEL_four_phase_forward_wavefront         = 0x25,
	TD_PERF_SEL_write_ack_wavefront                  = 0x26,
	TD_PERF_SEL_RESERVED_39                          = 0x27,
	TD_PERF_SEL_user_defined_border                  = 0x28,
	TD_PERF_SEL_white_border                         = 0x29,
	TD_PERF_SEL_opaque_black_border                  = 0x2a,
	TD_PERF_SEL_RESERVED_43                          = 0x2b,
	TD_PERF_SEL_RESERVED_44                          = 0x2c,
	TD_PERF_SEL_nack                                 = 0x2d,
	TD_PERF_SEL_td_sp_traffic                        = 0x2e,
	TD_PERF_SEL_consume_gds_traffic                  = 0x2f,
	TD_PERF_SEL_addresscmd_poison                    = 0x30,
	TD_PERF_SEL_data_poison                          = 0x31,
	TD_PERF_SEL_start_cycle_0                        = 0x32,
	TD_PERF_SEL_start_cycle_1                        = 0x33,
	TD_PERF_SEL_start_cycle_2                        = 0x34,
	TD_PERF_SEL_start_cycle_3                        = 0x35,
	TD_PERF_SEL_null_cycle_output                    = 0x36,
} TD_PERFCOUNT_SEL;
typedef enum TCP_PERFCOUNT_SELECT {
	TCP_PERF_SEL_TA_TCP_ADDR_STARVE_CYCLES           = 0x0,
	TCP_PERF_SEL_TA_TCP_DATA_STARVE_CYCLES           = 0x1,
	TCP_PERF_SEL_TCP_TA_ADDR_STALL_CYCLES            = 0x2,
	TCP_PERF_SEL_TCP_TA_DATA_STALL_CYCLES            = 0x3,
	TCP_PERF_SEL_TD_TCP_STALL_CYCLES                 = 0x4,
	TCP_PERF_SEL_TCR_TCP_STALL_CYCLES                = 0x5,
	TCP_PERF_SEL_LOD_STALL_CYCLES                    = 0x6,
	TCP_PERF_SEL_READ_TAGCONFLICT_STALL_CYCLES       = 0x7,
	TCP_PERF_SEL_WRITE_TAGCONFLICT_STALL_CYCLES      = 0x8,
	TCP_PERF_SEL_ATOMIC_TAGCONFLICT_STALL_CYCLES     = 0x9,
	TCP_PERF_SEL_ALLOC_STALL_CYCLES                  = 0xa,
	TCP_PERF_SEL_LFIFO_STALL_CYCLES                  = 0xb,
	TCP_PERF_SEL_RFIFO_STALL_CYCLES                  = 0xc,
	TCP_PERF_SEL_TCR_RDRET_STALL                     = 0xd,
	TCP_PERF_SEL_WRITE_CONFLICT_STALL                = 0xe,
	TCP_PERF_SEL_HOLE_READ_STALL                     = 0xf,
	TCP_PERF_SEL_READCONFLICT_STALL_CYCLES           = 0x10,
	TCP_PERF_SEL_PENDING_STALL_CYCLES                = 0x11,
	TCP_PERF_SEL_READFIFO_STALL_CYCLES               = 0x12,
	TCP_PERF_SEL_TCP_LATENCY                         = 0x13,
	TCP_PERF_SEL_TCC_READ_REQ_LATENCY                = 0x14,
	TCP_PERF_SEL_TCC_WRITE_REQ_LATENCY               = 0x15,
	TCP_PERF_SEL_TCC_WRITE_REQ_HOLE_LATENCY          = 0x16,
	TCP_PERF_SEL_TCC_READ_REQ                        = 0x17,
	TCP_PERF_SEL_TCC_WRITE_REQ                       = 0x18,
	TCP_PERF_SEL_TCC_ATOMIC_WITH_RET_REQ             = 0x19,
	TCP_PERF_SEL_TCC_ATOMIC_WITHOUT_RET_REQ          = 0x1a,
	TCP_PERF_SEL_TOTAL_LOCAL_READ                    = 0x1b,
	TCP_PERF_SEL_TOTAL_GLOBAL_READ                   = 0x1c,
	TCP_PERF_SEL_TOTAL_LOCAL_WRITE                   = 0x1d,
	TCP_PERF_SEL_TOTAL_GLOBAL_WRITE                  = 0x1e,
	TCP_PERF_SEL_TOTAL_ATOMIC_WITH_RET               = 0x1f,
	TCP_PERF_SEL_TOTAL_ATOMIC_WITHOUT_RET            = 0x20,
	TCP_PERF_SEL_TOTAL_WBINVL1                       = 0x21,
	TCP_PERF_SEL_IMG_READ_FMT_1                      = 0x22,
	TCP_PERF_SEL_IMG_READ_FMT_8                      = 0x23,
	TCP_PERF_SEL_IMG_READ_FMT_16                     = 0x24,
	TCP_PERF_SEL_IMG_READ_FMT_32                     = 0x25,
	TCP_PERF_SEL_IMG_READ_FMT_32_AS_8                = 0x26,
	TCP_PERF_SEL_IMG_READ_FMT_32_AS_16               = 0x27,
	TCP_PERF_SEL_IMG_READ_FMT_32_AS_128              = 0x28,
	TCP_PERF_SEL_IMG_READ_FMT_64_2_CYCLE             = 0x29,
	TCP_PERF_SEL_IMG_READ_FMT_64_1_CYCLE             = 0x2a,
	TCP_PERF_SEL_IMG_READ_FMT_96                     = 0x2b,
	TCP_PERF_SEL_IMG_READ_FMT_128_4_CYCLE            = 0x2c,
	TCP_PERF_SEL_IMG_READ_FMT_128_1_CYCLE            = 0x2d,
	TCP_PERF_SEL_IMG_READ_FMT_BC1                    = 0x2e,
	TCP_PERF_SEL_IMG_READ_FMT_BC2                    = 0x2f,
	TCP_PERF_SEL_IMG_READ_FMT_BC3                    = 0x30,
	TCP_PERF_SEL_IMG_READ_FMT_BC4                    = 0x31,
	TCP_PERF_SEL_IMG_READ_FMT_BC5                    = 0x32,
	TCP_PERF_SEL_IMG_READ_FMT_BC6                    = 0x33,
	TCP_PERF_SEL_IMG_READ_FMT_BC7                    = 0x34,
	TCP_PERF_SEL_IMG_READ_FMT_I8                     = 0x35,
	TCP_PERF_SEL_IMG_READ_FMT_I16                    = 0x36,
	TCP_PERF_SEL_IMG_READ_FMT_I32                    = 0x37,
	TCP_PERF_SEL_IMG_READ_FMT_I32_AS_8               = 0x38,
	TCP_PERF_SEL_IMG_READ_FMT_I32_AS_16              = 0x39,
	TCP_PERF_SEL_IMG_READ_FMT_D8                     = 0x3a,
	TCP_PERF_SEL_IMG_READ_FMT_D16                    = 0x3b,
	TCP_PERF_SEL_IMG_READ_FMT_D32                    = 0x3c,
	TCP_PERF_SEL_IMG_WRITE_FMT_8                     = 0x3d,
	TCP_PERF_SEL_IMG_WRITE_FMT_16                    = 0x3e,
	TCP_PERF_SEL_IMG_WRITE_FMT_32                    = 0x3f,
	TCP_PERF_SEL_IMG_WRITE_FMT_64                    = 0x40,
	TCP_PERF_SEL_IMG_WRITE_FMT_128                   = 0x41,
	TCP_PERF_SEL_IMG_WRITE_FMT_D8                    = 0x42,
	TCP_PERF_SEL_IMG_WRITE_FMT_D16                   = 0x43,
	TCP_PERF_SEL_IMG_WRITE_FMT_D32                   = 0x44,
	TCP_PERF_SEL_IMG_ATOMIC_WITH_RET_FMT_32          = 0x45,
	TCP_PERF_SEL_IMG_ATOMIC_WITHOUT_RET_FMT_32       = 0x46,
	TCP_PERF_SEL_IMG_ATOMIC_WITH_RET_FMT_64          = 0x47,
	TCP_PERF_SEL_IMG_ATOMIC_WITHOUT_RET_FMT_64       = 0x48,
	TCP_PERF_SEL_BUF_READ_FMT_8                      = 0x49,
	TCP_PERF_SEL_BUF_READ_FMT_16                     = 0x4a,
	TCP_PERF_SEL_BUF_READ_FMT_32                     = 0x4b,
	TCP_PERF_SEL_BUF_WRITE_FMT_8                     = 0x4c,
	TCP_PERF_SEL_BUF_WRITE_FMT_16                    = 0x4d,
	TCP_PERF_SEL_BUF_WRITE_FMT_32                    = 0x4e,
	TCP_PERF_SEL_BUF_ATOMIC_WITH_RET_FMT_32          = 0x4f,
	TCP_PERF_SEL_BUF_ATOMIC_WITHOUT_RET_FMT_32       = 0x50,
	TCP_PERF_SEL_BUF_ATOMIC_WITH_RET_FMT_64          = 0x51,
	TCP_PERF_SEL_BUF_ATOMIC_WITHOUT_RET_FMT_64       = 0x52,
	TCP_PERF_SEL_ARR_LINEAR_GENERAL                  = 0x53,
	TCP_PERF_SEL_ARR_LINEAR_ALIGNED                  = 0x54,
	TCP_PERF_SEL_ARR_1D_THIN1                        = 0x55,
	TCP_PERF_SEL_ARR_1D_THICK                        = 0x56,
	TCP_PERF_SEL_ARR_2D_THIN1                        = 0x57,
	TCP_PERF_SEL_ARR_2D_THICK                        = 0x58,
	TCP_PERF_SEL_ARR_2D_XTHICK                       = 0x59,
	TCP_PERF_SEL_ARR_3D_THIN1                        = 0x5a,
	TCP_PERF_SEL_ARR_3D_THICK                        = 0x5b,
	TCP_PERF_SEL_ARR_3D_XTHICK                       = 0x5c,
	TCP_PERF_SEL_DIM_1D                              = 0x5d,
	TCP_PERF_SEL_DIM_2D                              = 0x5e,
	TCP_PERF_SEL_DIM_3D                              = 0x5f,
	TCP_PERF_SEL_DIM_1D_ARRAY                        = 0x60,
	TCP_PERF_SEL_DIM_2D_ARRAY                        = 0x61,
	TCP_PERF_SEL_DIM_2D_MSAA                         = 0x62,
	TCP_PERF_SEL_DIM_2D_ARRAY_MSAA                   = 0x63,
	TCP_PERF_SEL_DIM_CUBE_ARRAY                      = 0x64,
	TCP_PERF_SEL_CP_TCP_INVALIDATE                   = 0x65,
	TCP_PERF_SEL_TA_TCP_STATE_READ                   = 0x66,
	TCP_PERF_SEL_TAGRAM0_REQ                         = 0x67,
	TCP_PERF_SEL_TAGRAM1_REQ                         = 0x68,
	TCP_PERF_SEL_TAGRAM2_REQ                         = 0x69,
	TCP_PERF_SEL_TAGRAM3_REQ                         = 0x6a,
	TCP_PERF_SEL_GATE_EN1                            = 0x6b,
	TCP_PERF_SEL_GATE_EN2                            = 0x6c,
	TCP_PERF_SEL_CORE_REG_SCLK_VLD                   = 0x6d,
	TCP_PERF_SEL_TCC_REQ                             = 0x6e,
	TCP_PERF_SEL_TCC_NON_READ_REQ                    = 0x6f,
	TCP_PERF_SEL_TCC_BYPASS_READ_REQ                 = 0x70,
	TCP_PERF_SEL_TCC_MISS_EVICT_READ_REQ             = 0x71,
	TCP_PERF_SEL_TCC_VOLATILE_READ_REQ               = 0x72,
	TCP_PERF_SEL_TCC_VOLATILE_BYPASS_READ_REQ        = 0x73,
	TCP_PERF_SEL_TCC_VOLATILE_MISS_EVICT_READ_REQ    = 0x74,
	TCP_PERF_SEL_TCC_BYPASS_WRITE_REQ                = 0x75,
	TCP_PERF_SEL_TCC_MISS_EVICT_WRITE_REQ            = 0x76,
	TCP_PERF_SEL_TCC_VOLATILE_BYPASS_WRITE_REQ       = 0x77,
	TCP_PERF_SEL_TCC_VOLATILE_WRITE_REQ              = 0x78,
	TCP_PERF_SEL_TCC_VOLATILE_MISS_EVICT_WRITE_REQ   = 0x79,
	TCP_PERF_SEL_TCC_BYPASS_ATOMIC_REQ               = 0x7a,
	TCP_PERF_SEL_TCC_ATOMIC_REQ                      = 0x7b,
	TCP_PERF_SEL_TCC_VOLATILE_ATOMIC_REQ             = 0x7c,
	TCP_PERF_SEL_TCC_DATA_BUS_BUSY                   = 0x7d,
	TCP_PERF_SEL_TOTAL_ACCESSES                      = 0x7e,
	TCP_PERF_SEL_TOTAL_READ                          = 0x7f,
	TCP_PERF_SEL_TOTAL_HIT_LRU_READ                  = 0x80,
	TCP_PERF_SEL_TOTAL_HIT_EVICT_READ                = 0x81,
	TCP_PERF_SEL_TOTAL_MISS_LRU_READ                 = 0x82,
	TCP_PERF_SEL_TOTAL_MISS_EVICT_READ               = 0x83,
	TCP_PERF_SEL_TOTAL_NON_READ                      = 0x84,
	TCP_PERF_SEL_TOTAL_WRITE                         = 0x85,
	TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE                = 0x86,
	TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE              = 0x87,
	TCP_PERF_SEL_TOTAL_WBINVL1_VOL                   = 0x88,
	TCP_PERF_SEL_TOTAL_WRITEBACK_INVALIDATES         = 0x89,
	TCP_PERF_SEL_DISPLAY_MICROTILING                 = 0x8a,
	TCP_PERF_SEL_THIN_MICROTILING                    = 0x8b,
	TCP_PERF_SEL_DEPTH_MICROTILING                   = 0x8c,
	TCP_PERF_SEL_ARR_PRT_THIN1                       = 0x8d,
	TCP_PERF_SEL_ARR_PRT_2D_THIN1                    = 0x8e,
	TCP_PERF_SEL_ARR_PRT_3D_THIN1                    = 0x8f,
	TCP_PERF_SEL_ARR_PRT_THICK                       = 0x90,
	TCP_PERF_SEL_ARR_PRT_2D_THICK                    = 0x91,
	TCP_PERF_SEL_ARR_PRT_3D_THICK                    = 0x92,
	TCP_PERF_SEL_CP_TCP_INVALIDATE_VOL               = 0x93,
	TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL               = 0x94,
	TCP_PERF_SEL_UNALIGNED                           = 0x95,
	TCP_PERF_SEL_ROTATED_MICROTILING                 = 0x96,
	TCP_PERF_SEL_THICK_MICROTILING                   = 0x97,
	TCP_PERF_SEL_ATC                                 = 0x98,
	TCP_PERF_SEL_POWER_STALL                         = 0x99,
} TCP_PERFCOUNT_SELECT;
typedef enum TCP_CACHE_POLICIES {
	TCP_CACHE_POLICY_MISS_LRU                        = 0x0,
	TCP_CACHE_POLICY_MISS_EVICT                      = 0x1,
	TCP_CACHE_POLICY_HIT_LRU                         = 0x2,
	TCP_CACHE_POLICY_HIT_EVICT                       = 0x3,
} TCP_CACHE_POLICIES;
typedef enum TCP_CACHE_STORE_POLICIES {
	TCP_CACHE_STORE_POLICY_MISS_LRU                  = 0x0,
	TCP_CACHE_STORE_POLICY_MISS_EVICT                = 0x1,
} TCP_CACHE_STORE_POLICIES;
typedef enum TCP_WATCH_MODES {
	TCP_WATCH_MODE_READ                              = 0x0,
	TCP_WATCH_MODE_NONREAD                           = 0x1,
	TCP_WATCH_MODE_ATOMIC                            = 0x2,
	TCP_WATCH_MODE_ALL                               = 0x3,
} TCP_WATCH_MODES;
typedef enum VGT_OUT_PRIM_TYPE {
	VGT_OUT_POINT                                    = 0x0,
	VGT_OUT_LINE                                     = 0x1,
	VGT_OUT_TRI                                      = 0x2,
	VGT_OUT_RECT_V0                                  = 0x3,
	VGT_OUT_RECT_V1                                  = 0x4,
	VGT_OUT_RECT_V2                                  = 0x5,
	VGT_OUT_RECT_V3                                  = 0x6,
	VGT_OUT_RESERVED                                 = 0x7,
	VGT_TE_QUAD                                      = 0x8,
	VGT_TE_PRIM_INDEX_LINE                           = 0x9,
	VGT_TE_PRIM_INDEX_TRI                            = 0xa,
	VGT_TE_PRIM_INDEX_QUAD                           = 0xb,
	VGT_OUT_LINE_ADJ                                 = 0xc,
	VGT_OUT_TRI_ADJ                                  = 0xd,
	VGT_OUT_PATCH                                    = 0xe,
} VGT_OUT_PRIM_TYPE;
typedef enum VGT_DI_PRIM_TYPE {
	DI_PT_NONE                                       = 0x0,
	DI_PT_POINTLIST                                  = 0x1,
	DI_PT_LINELIST                                   = 0x2,
	DI_PT_LINESTRIP                                  = 0x3,
	DI_PT_TRILIST                                    = 0x4,
	DI_PT_TRIFAN                                     = 0x5,
	DI_PT_TRISTRIP                                   = 0x6,
	DI_PT_UNUSED_0                                   = 0x7,
	DI_PT_UNUSED_1                                   = 0x8,
	DI_PT_PATCH                                      = 0x9,
	DI_PT_LINELIST_ADJ                               = 0xa,
	DI_PT_LINESTRIP_ADJ                              = 0xb,
	DI_PT_TRILIST_ADJ                                = 0xc,
	DI_PT_TRISTRIP_ADJ                               = 0xd,
	DI_PT_UNUSED_3                                   = 0xe,
	DI_PT_UNUSED_4                                   = 0xf,
	DI_PT_TRI_WITH_WFLAGS                            = 0x10,
	DI_PT_RECTLIST                                   = 0x11,
	DI_PT_LINELOOP                                   = 0x12,
	DI_PT_QUADLIST                                   = 0x13,
	DI_PT_QUADSTRIP                                  = 0x14,
	DI_PT_POLYGON                                    = 0x15,
	DI_PT_2D_COPY_RECT_LIST_V0                       = 0x16,
	DI_PT_2D_COPY_RECT_LIST_V1                       = 0x17,
	DI_PT_2D_COPY_RECT_LIST_V2                       = 0x18,
	DI_PT_2D_COPY_RECT_LIST_V3                       = 0x19,
	DI_PT_2D_FILL_RECT_LIST                          = 0x1a,
	DI_PT_2D_LINE_STRIP                              = 0x1b,
	DI_PT_2D_TRI_STRIP                               = 0x1c,
} VGT_DI_PRIM_TYPE;
typedef enum VGT_DI_SOURCE_SELECT {
	DI_SRC_SEL_DMA                                   = 0x0,
	DI_SRC_SEL_IMMEDIATE                             = 0x1,
	DI_SRC_SEL_AUTO_INDEX                            = 0x2,
	DI_SRC_SEL_RESERVED                              = 0x3,
} VGT_DI_SOURCE_SELECT;
typedef enum VGT_DI_MAJOR_MODE_SELECT {
	DI_MAJOR_MODE_0                                  = 0x0,
	DI_MAJOR_MODE_1                                  = 0x1,
} VGT_DI_MAJOR_MODE_SELECT;
typedef enum VGT_DI_INDEX_SIZE {
	DI_INDEX_SIZE_16_BIT                             = 0x0,
	DI_INDEX_SIZE_32_BIT                             = 0x1,
} VGT_DI_INDEX_SIZE;
typedef enum VGT_EVENT_TYPE {
	Reserved_0x00                                    = 0x0,
	SAMPLE_STREAMOUTSTATS1                           = 0x1,
	SAMPLE_STREAMOUTSTATS2                           = 0x2,
	SAMPLE_STREAMOUTSTATS3                           = 0x3,
	CACHE_FLUSH_TS                                   = 0x4,
	CONTEXT_DONE                                     = 0x5,
	CACHE_FLUSH                                      = 0x6,
	CS_PARTIAL_FLUSH                                 = 0x7,
	VGT_STREAMOUT_SYNC                               = 0x8,
	Reserved_0x09                                    = 0x9,
	VGT_STREAMOUT_RESET                              = 0xa,
	END_OF_PIPE_INCR_DE                              = 0xb,
	END_OF_PIPE_IB_END                               = 0xc,
	RST_PIX_CNT                                      = 0xd,
	Reserved_0x0E                                    = 0xe,
	VS_PARTIAL_FLUSH                                 = 0xf,
	PS_PARTIAL_FLUSH                                 = 0x10,
	FLUSH_HS_OUTPUT                                  = 0x11,
	FLUSH_LS_OUTPUT                                  = 0x12,
	Reserved_0x13                                    = 0x13,
	CACHE_FLUSH_AND_INV_TS_EVENT                     = 0x14,
	ZPASS_DONE                                       = 0x15,
	CACHE_FLUSH_AND_INV_EVENT                        = 0x16,
	PERFCOUNTER_START                                = 0x17,
	PERFCOUNTER_STOP                                 = 0x18,
	PIPELINESTAT_START                               = 0x19,
	PIPELINESTAT_STOP                                = 0x1a,
	PERFCOUNTER_SAMPLE                               = 0x1b,
	FLUSH_ES_OUTPUT                                  = 0x1c,
	FLUSH_GS_OUTPUT                                  = 0x1d,
	SAMPLE_PIPELINESTAT                              = 0x1e,
	SO_VGTSTREAMOUT_FLUSH                            = 0x1f,
	SAMPLE_STREAMOUTSTATS                            = 0x20,
	RESET_VTX_CNT                                    = 0x21,
	BLOCK_CONTEXT_DONE                               = 0x22,
	CS_CONTEXT_DONE                                  = 0x23,
	VGT_FLUSH                                        = 0x24,
	Reserved_0x25                                    = 0x25,
	SQ_NON_EVENT                                     = 0x26,
	SC_SEND_DB_VPZ                                   = 0x27,
	BOTTOM_OF_PIPE_TS                                = 0x28,
	FLUSH_SX_TS                                      = 0x29,
	DB_CACHE_FLUSH_AND_INV                           = 0x2a,
	FLUSH_AND_INV_DB_DATA_TS                         = 0x2b,
	FLUSH_AND_INV_DB_META                            = 0x2c,
	FLUSH_AND_INV_CB_DATA_TS                         = 0x2d,
	FLUSH_AND_INV_CB_META                            = 0x2e,
	CS_DONE                                          = 0x2f,
	PS_DONE                                          = 0x30,
	FLUSH_AND_INV_CB_PIXEL_DATA                      = 0x31,
	SX_CB_RAT_ACK_REQUEST                            = 0x32,
	THREAD_TRACE_START                               = 0x33,
	THREAD_TRACE_STOP                                = 0x34,
	THREAD_TRACE_MARKER                              = 0x35,
	THREAD_TRACE_FLUSH                               = 0x36,
	THREAD_TRACE_FINISH                              = 0x37,
	PIXEL_PIPE_STAT_CONTROL                          = 0x38,
	PIXEL_PIPE_STAT_DUMP                             = 0x39,
	PIXEL_PIPE_STAT_RESET                            = 0x3a,
	CONTEXT_SUSPEND                                  = 0x3b,
} VGT_EVENT_TYPE;
typedef enum VGT_DMA_SWAP_MODE {
	VGT_DMA_SWAP_NONE                                = 0x0,
	VGT_DMA_SWAP_16_BIT                              = 0x1,
	VGT_DMA_SWAP_32_BIT                              = 0x2,
	VGT_DMA_SWAP_WORD                                = 0x3,
} VGT_DMA_SWAP_MODE;
typedef enum VGT_INDEX_TYPE_MODE {
	VGT_INDEX_16                                     = 0x0,
	VGT_INDEX_32                                     = 0x1,
} VGT_INDEX_TYPE_MODE;
typedef enum VGT_DMA_BUF_TYPE {
	VGT_DMA_BUF_MEM                                  = 0x0,
	VGT_DMA_BUF_RING                                 = 0x1,
	VGT_DMA_BUF_SETUP                                = 0x2,
} VGT_DMA_BUF_TYPE;
typedef enum VGT_OUTPATH_SELECT {
	VGT_OUTPATH_VTX_REUSE                            = 0x0,
	VGT_OUTPATH_TESS_EN                              = 0x1,
	VGT_OUTPATH_PASSTHRU                             = 0x2,
	VGT_OUTPATH_GS_BLOCK                             = 0x3,
	VGT_OUTPATH_HS_BLOCK                             = 0x4,
} VGT_OUTPATH_SELECT;
typedef enum VGT_GRP_PRIM_TYPE {
	VGT_GRP_3D_POINT                                 = 0x0,
	VGT_GRP_3D_LINE                                  = 0x1,
	VGT_GRP_3D_TRI                                   = 0x2,
	VGT_GRP_3D_RECT                                  = 0x3,
	VGT_GRP_3D_QUAD                                  = 0x4,
	VGT_GRP_2D_COPY_RECT_V0                          = 0x5,
	VGT_GRP_2D_COPY_RECT_V1                          = 0x6,
	VGT_GRP_2D_COPY_RECT_V2                          = 0x7,
	VGT_GRP_2D_COPY_RECT_V3                          = 0x8,
	VGT_GRP_2D_FILL_RECT                             = 0x9,
	VGT_GRP_2D_LINE                                  = 0xa,
	VGT_GRP_2D_TRI                                   = 0xb,
	VGT_GRP_PRIM_INDEX_LINE                          = 0xc,
	VGT_GRP_PRIM_INDEX_TRI                           = 0xd,
	VGT_GRP_PRIM_INDEX_QUAD                          = 0xe,
	VGT_GRP_3D_LINE_ADJ                              = 0xf,
	VGT_GRP_3D_TRI_ADJ                               = 0x10,
	VGT_GRP_3D_PATCH                                 = 0x11,
} VGT_GRP_PRIM_TYPE;
typedef enum VGT_GRP_PRIM_ORDER {
	VGT_GRP_LIST                                     = 0x0,
	VGT_GRP_STRIP                                    = 0x1,
	VGT_GRP_FAN                                      = 0x2,
	VGT_GRP_LOOP                                     = 0x3,
	VGT_GRP_POLYGON                                  = 0x4,
} VGT_GRP_PRIM_ORDER;
typedef enum VGT_GROUP_CONV_SEL {
	VGT_GRP_INDEX_16                                 = 0x0,
	VGT_GRP_INDEX_32                                 = 0x1,
	VGT_GRP_UINT_16                                  = 0x2,
	VGT_GRP_UINT_32                                  = 0x3,
	VGT_GRP_SINT_16                                  = 0x4,
	VGT_GRP_SINT_32                                  = 0x5,
	VGT_GRP_FLOAT_32                                 = 0x6,
	VGT_GRP_AUTO_PRIM                                = 0x7,
	VGT_GRP_FIX_1_23_TO_FLOAT                        = 0x8,
} VGT_GROUP_CONV_SEL;
typedef enum VGT_GS_MODE_TYPE {
	GS_OFF                                           = 0x0,
	GS_SCENARIO_A                                    = 0x1,
	GS_SCENARIO_B                                    = 0x2,
	GS_SCENARIO_G                                    = 0x3,
	GS_SCENARIO_C                                    = 0x4,
	SPRITE_EN                                        = 0x5,
} VGT_GS_MODE_TYPE;
typedef enum VGT_GS_CUT_MODE {
	GS_CUT_1024                                      = 0x0,
	GS_CUT_512                                       = 0x1,
	GS_CUT_256                                       = 0x2,
	GS_CUT_128                                       = 0x3,
} VGT_GS_CUT_MODE;
typedef enum VGT_GS_OUTPRIM_TYPE {
	POINTLIST                                        = 0x0,
	LINESTRIP                                        = 0x1,
	TRISTRIP                                         = 0x2,
} VGT_GS_OUTPRIM_TYPE;
typedef enum VGT_CACHE_INVALID_MODE {
	VC_ONLY                                          = 0x0,
	TC_ONLY                                          = 0x1,
	VC_AND_TC                                        = 0x2,
} VGT_CACHE_INVALID_MODE;
typedef enum VGT_TESS_TYPE {
	TESS_ISOLINE                                     = 0x0,
	TESS_TRIANGLE                                    = 0x1,
	TESS_QUAD                                        = 0x2,
} VGT_TESS_TYPE;
typedef enum VGT_TESS_PARTITION {
	PART_INTEGER                                     = 0x0,
	PART_POW2                                        = 0x1,
	PART_FRAC_ODD                                    = 0x2,
	PART_FRAC_EVEN                                   = 0x3,
} VGT_TESS_PARTITION;
typedef enum VGT_TESS_TOPOLOGY {
	OUTPUT_POINT                                     = 0x0,
	OUTPUT_LINE                                      = 0x1,
	OUTPUT_TRIANGLE_CW                               = 0x2,
	OUTPUT_TRIANGLE_CCW                              = 0x3,
} VGT_TESS_TOPOLOGY;
typedef enum VGT_RDREQ_POLICY {
	VGT_POLICY_LRU                                   = 0x0,
	VGT_POLICY_STREAM                                = 0x1,
	VGT_POLICY_BYPASS                                = 0x2,
	VGT_POLICY_RESERVED                              = 0x3,
} VGT_RDREQ_POLICY;
typedef enum VGT_STAGES_LS_EN {
	LS_STAGE_OFF                                     = 0x0,
	LS_STAGE_ON                                      = 0x1,
	CS_STAGE_ON                                      = 0x2,
	RESERVED_LS                                      = 0x3,
} VGT_STAGES_LS_EN;
typedef enum VGT_STAGES_HS_EN {
	HS_STAGE_OFF                                     = 0x0,
	HS_STAGE_ON                                      = 0x1,
} VGT_STAGES_HS_EN;
typedef enum VGT_STAGES_ES_EN {
	ES_STAGE_OFF                                     = 0x0,
	ES_STAGE_DS                                      = 0x1,
	ES_STAGE_REAL                                    = 0x2,
	RESERVED_ES                                      = 0x3,
} VGT_STAGES_ES_EN;
typedef enum VGT_STAGES_GS_EN {
	GS_STAGE_OFF                                     = 0x0,
	GS_STAGE_ON                                      = 0x1,
} VGT_STAGES_GS_EN;
typedef enum VGT_STAGES_VS_EN {
	VS_STAGE_REAL                                    = 0x0,
	VS_STAGE_DS                                      = 0x1,
	VS_STAGE_COPY_SHADER                             = 0x2,
	RESERVED_VS                                      = 0x3,
} VGT_STAGES_VS_EN;
typedef enum VGT_PERFCOUNT_SELECT {
	vgt_perf_VGT_SPI_ESTHREAD_EVENT_WINDOW_ACTIVE    = 0x0,
	vgt_perf_VGT_SPI_ESVERT_VALID                    = 0x1,
	vgt_perf_VGT_SPI_ESVERT_EOV                      = 0x2,
	vgt_perf_VGT_SPI_ESVERT_STALLED                  = 0x3,
	vgt_perf_VGT_SPI_ESVERT_STARVED_BUSY             = 0x4,
	vgt_perf_VGT_SPI_ESVERT_STARVED_IDLE             = 0x5,
	vgt_perf_VGT_SPI_ESVERT_STATIC                   = 0x6,
	vgt_perf_VGT_SPI_ESTHREAD_IS_EVENT               = 0x7,
	vgt_perf_VGT_SPI_ESTHREAD_SEND                   = 0x8,
	vgt_perf_VGT_SPI_GSPRIM_VALID                    = 0x9,
	vgt_perf_VGT_SPI_GSPRIM_EOV                      = 0xa,
	vgt_perf_VGT_SPI_GSPRIM_CONT                     = 0xb,
	vgt_perf_VGT_SPI_GSPRIM_STALLED                  = 0xc,
	vgt_perf_VGT_SPI_GSPRIM_STARVED_BUSY             = 0xd,
	vgt_perf_VGT_SPI_GSPRIM_STARVED_IDLE             = 0xe,
	vgt_perf_VGT_SPI_GSPRIM_STATIC                   = 0xf,
	vgt_perf_VGT_SPI_GSTHREAD_EVENT_WINDOW_ACTIVE    = 0x10,
	vgt_perf_VGT_SPI_GSTHREAD_IS_EVENT               = 0x11,
	vgt_perf_VGT_SPI_GSTHREAD_SEND                   = 0x12,
	vgt_perf_VGT_SPI_VSTHREAD_EVENT_WINDOW_ACTIVE    = 0x13,
	vgt_perf_VGT_SPI_VSVERT_SEND                     = 0x14,
	vgt_perf_VGT_SPI_VSVERT_EOV                      = 0x15,
	vgt_perf_VGT_SPI_VSVERT_STALLED                  = 0x16,
	vgt_perf_VGT_SPI_VSVERT_STARVED_BUSY             = 0x17,
	vgt_perf_VGT_SPI_VSVERT_STARVED_IDLE             = 0x18,
	vgt_perf_VGT_SPI_VSVERT_STATIC                   = 0x19,
	vgt_perf_VGT_SPI_VSTHREAD_IS_EVENT               = 0x1a,
	vgt_perf_VGT_SPI_VSTHREAD_SEND                   = 0x1b,
	vgt_perf_VGT_PA_EVENT_WINDOW_ACTIVE              = 0x1c,
	vgt_perf_VGT_PA_CLIPV_SEND                       = 0x1d,
	vgt_perf_VGT_PA_CLIPV_FIRSTVERT                  = 0x1e,
	vgt_perf_VGT_PA_CLIPV_STALLED                    = 0x1f,
	vgt_perf_VGT_PA_CLIPV_STARVED_BUSY               = 0x20,
	vgt_perf_VGT_PA_CLIPV_STARVED_IDLE               = 0x21,
	vgt_perf_VGT_PA_CLIPV_STATIC                     = 0x22,
	vgt_perf_VGT_PA_CLIPP_SEND                       = 0x23,
	vgt_perf_VGT_PA_CLIPP_EOP                        = 0x24,
	vgt_perf_VGT_PA_CLIPP_IS_EVENT                   = 0x25,
	vgt_perf_VGT_PA_CLIPP_NULL_PRIM                  = 0x26,
	vgt_perf_VGT_PA_CLIPP_NEW_VTX_VECT               = 0x27,
	vgt_perf_VGT_PA_CLIPP_STALLED                    = 0x28,
	vgt_perf_VGT_PA_CLIPP_STARVED_BUSY               = 0x29,
	vgt_perf_VGT_PA_CLIPP_STARVED_IDLE               = 0x2a,
	vgt_perf_VGT_PA_CLIPP_STATIC                     = 0x2b,
	vgt_perf_VGT_PA_CLIPS_SEND                       = 0x2c,
	vgt_perf_VGT_PA_CLIPS_STALLED                    = 0x2d,
	vgt_perf_VGT_PA_CLIPS_STARVED_BUSY               = 0x2e,
	vgt_perf_VGT_PA_CLIPS_STARVED_IDLE               = 0x2f,
	vgt_perf_VGT_PA_CLIPS_STATIC                     = 0x30,
	vgt_perf_vsvert_ds_send                          = 0x31,
	vgt_perf_vsvert_api_send                         = 0x32,
	vgt_perf_hs_tif_stall                            = 0x33,
	vgt_perf_hs_input_stall                          = 0x34,
	vgt_perf_hs_interface_stall                      = 0x35,
	vgt_perf_hs_tfm_stall                            = 0x36,
	vgt_perf_te11_starved                            = 0x37,
	vgt_perf_gs_event_stall                          = 0x38,
	vgt_perf_vgt_pa_clipp_send_not_event             = 0x39,
	vgt_perf_vgt_pa_clipp_valid_prim                 = 0x3a,
	vgt_perf_reused_es_indices                       = 0x3b,
	vgt_perf_vs_cache_hits                           = 0x3c,
	vgt_perf_gs_cache_hits                           = 0x3d,
	vgt_perf_ds_cache_hits                           = 0x3e,
	vgt_perf_total_cache_hits                        = 0x3f,
	vgt_perf_vgt_busy                                = 0x40,
	vgt_perf_vgt_gs_busy                             = 0x41,
	vgt_perf_esvert_stalled_es_tbl                   = 0x42,
	vgt_perf_esvert_stalled_gs_tbl                   = 0x43,
	vgt_perf_esvert_stalled_gs_event                 = 0x44,
	vgt_perf_esvert_stalled_gsprim                   = 0x45,
	vgt_perf_gsprim_stalled_es_tbl                   = 0x46,
	vgt_perf_gsprim_stalled_gs_tbl                   = 0x47,
	vgt_perf_gsprim_stalled_gs_event                 = 0x48,
	vgt_perf_gsprim_stalled_esvert                   = 0x49,
	vgt_perf_esthread_stalled_es_rb_full             = 0x4a,
	vgt_perf_esthread_stalled_spi_bp                 = 0x4b,
	vgt_perf_counters_avail_stalled                  = 0x4c,
	vgt_perf_gs_rb_space_avail_stalled               = 0x4d,
	vgt_perf_gs_issue_rtr_stalled                    = 0x4e,
	vgt_perf_gsthread_stalled                        = 0x4f,
	vgt_perf_strmout_stalled                         = 0x50,
	vgt_perf_wait_for_es_done_stalled                = 0x51,
	vgt_perf_cm_stalled_by_gog                       = 0x52,
	vgt_perf_cm_reading_stalled                      = 0x53,
	vgt_perf_cm_stalled_by_gsfetch_done              = 0x54,
	vgt_perf_gog_vs_tbl_stalled                      = 0x55,
	vgt_perf_gog_out_indx_stalled                    = 0x56,
	vgt_perf_gog_out_prim_stalled                    = 0x57,
	vgt_perf_waveid_stalled                          = 0x58,
	vgt_perf_gog_busy                                = 0x59,
	vgt_perf_reused_vs_indices                       = 0x5a,
	vgt_perf_sclk_reg_vld_event                      = 0x5b,
	vgt_perf_RESERVED0                               = 0x5c,
	vgt_perf_sclk_core_vld_event                     = 0x5d,
	vgt_perf_RESERVED1                               = 0x5e,
	vgt_perf_sclk_gs_vld_event                       = 0x5f,
	vgt_perf_VGT_SPI_LSVERT_VALID                    = 0x60,
	vgt_perf_VGT_SPI_LSVERT_EOV                      = 0x61,
	vgt_perf_VGT_SPI_LSVERT_STALLED                  = 0x62,
	vgt_perf_VGT_SPI_LSVERT_STARVED_BUSY             = 0x63,
	vgt_perf_VGT_SPI_LSVERT_STARVED_IDLE             = 0x64,
	vgt_perf_VGT_SPI_LSVERT_STATIC                   = 0x65,
	vgt_perf_VGT_SPI_LSWAVE_EVENT_WINDOW_ACTIVE      = 0x66,
	vgt_perf_VGT_SPI_LSWAVE_IS_EVENT                 = 0x67,
	vgt_perf_VGT_SPI_LSWAVE_SEND                     = 0x68,
	vgt_perf_VGT_SPI_HSVERT_VALID                    = 0x69,
	vgt_perf_VGT_SPI_HSVERT_EOV                      = 0x6a,
	vgt_perf_VGT_SPI_HSVERT_STALLED                  = 0x6b,
	vgt_perf_VGT_SPI_HSVERT_STARVED_BUSY             = 0x6c,
	vgt_perf_VGT_SPI_HSVERT_STARVED_IDLE             = 0x6d,
	vgt_perf_VGT_SPI_HSVERT_STATIC                   = 0x6e,
	vgt_perf_VGT_SPI_HSWAVE_EVENT_WINDOW_ACTIVE      = 0x6f,
	vgt_perf_VGT_SPI_HSWAVE_IS_EVENT                 = 0x70,
	vgt_perf_VGT_SPI_HSWAVE_SEND                     = 0x71,
	vgt_perf_ds_prims                                = 0x72,
	vgt_perf_null_tess_patches                       = 0x73,
	vgt_perf_ls_thread_groups                        = 0x74,
	vgt_perf_hs_thread_groups                        = 0x75,
	vgt_perf_es_thread_groups                        = 0x76,
	vgt_perf_vs_thread_groups                        = 0x77,
	vgt_perf_ls_done_latency                         = 0x78,
	vgt_perf_hs_done_latency                         = 0x79,
	vgt_perf_es_done_latency                         = 0x7a,
	vgt_perf_gs_done_latency                         = 0x7b,
	vgt_perf_vgt_hs_busy                             = 0x7c,
	vgt_perf_vgt_te11_busy                           = 0x7d,
	vgt_perf_ls_flush                                = 0x7e,
	vgt_perf_hs_flush                                = 0x7f,
	vgt_perf_es_flush                                = 0x80,
	vgt_perf_gs_flush                                = 0x81,
	vgt_perf_ls_done                                 = 0x82,
	vgt_perf_hs_done                                 = 0x83,
	vgt_perf_es_done                                 = 0x84,
	vgt_perf_gs_done                                 = 0x85,
	vgt_perf_vsfetch_done                            = 0x86,
	vgt_perf_RESERVED2                               = 0x87,
	vgt_perf_es_ring_high_water_mark                 = 0x88,
	vgt_perf_gs_ring_high_water_mark                 = 0x89,
	vgt_perf_vs_table_high_water_mark                = 0x8a,
	vgt_perf_hs_tgs_active_high_water_mark           = 0x8b,
} VGT_PERFCOUNT_SELECT;
typedef enum IA_PERFCOUNT_SELECT {
	ia_perf_GRP_INPUT_EVENT_WINDOW_ACTIVE            = 0x0,
	ia_perf_MC_LAT_BIN_0                             = 0x1,
	ia_perf_MC_LAT_BIN_1                             = 0x2,
	ia_perf_MC_LAT_BIN_2                             = 0x3,
	ia_perf_MC_LAT_BIN_3                             = 0x4,
	ia_perf_MC_LAT_BIN_4                             = 0x5,
	ia_perf_MC_LAT_BIN_5                             = 0x6,
	ia_perf_MC_LAT_BIN_6                             = 0x7,
	ia_perf_MC_LAT_BIN_7                             = 0x8,
	ia_perf_ia_busy                                  = 0x9,
	ia_perf_ia_sclk_reg_vld_event                    = 0xa,
	ia_perf_RESERVED0                                = 0xb,
	ia_perf_ia_sclk_core_vld_event                   = 0xc,
	ia_perf_RESERVED1                                = 0xd,
	ia_perf_ia_dma_return                            = 0xe,
	ia_perf_shift_starved_pipe1_event                = 0xf,
	ia_perf_shift_starved_pipe0_event                = 0x10,
	ia_perf_ia_stalled                               = 0x11,
} IA_PERFCOUNT_SELECT;
typedef enum WD_PERFCOUNT_SELECT {
	wd_perf_RBIU_FIFOS_EVENT_WINDOW_ACTIVE           = 0x0,
	wd_perf_RBIU_DR_FIFO_STARVED                     = 0x1,
	wd_perf_RBIU_DR_FIFO_STALLED                     = 0x2,
	wd_perf_RBIU_DI_FIFO_STARVED                     = 0x3,
	wd_perf_RBIU_DI_FIFO_STALLED                     = 0x4,
	wd_perf_wd_busy                                  = 0x5,
	wd_perf_wd_sclk_reg_vld_event                    = 0x6,
	wd_perf_wd_sclk_input_vld_event                  = 0x7,
	wd_perf_wd_sclk_core_vld_event                   = 0x8,
	wd_perf_wd_stalled                               = 0x9,
} WD_PERFCOUNT_SELECT;
typedef enum WD_IA_DRAW_TYPE {
	WD_IA_DRAW_TYPE_DI_MM0                           = 0x0,
	WD_IA_DRAW_TYPE_DI_MM1                           = 0x1,
	WD_IA_DRAW_TYPE_EVENT_INIT                       = 0x2,
	WD_IA_DRAW_TYPE_EVENT_ADDR                       = 0x3,
	WD_IA_DRAW_TYPE_MIN_INDX                         = 0x4,
	WD_IA_DRAW_TYPE_MAX_INDX                         = 0x5,
	WD_IA_DRAW_TYPE_INDX_OFF                         = 0x6,
	WD_IA_DRAW_TYPE_IMM_DATA                         = 0x7,
} WD_IA_DRAW_TYPE;
#define GSTHREADID_SIZE                           0x2
typedef enum SurfaceEndian {
	ENDIAN_NONE                                      = 0x0,
	ENDIAN_8IN16                                     = 0x1,
	ENDIAN_8IN32                                     = 0x2,
	ENDIAN_8IN64                                     = 0x3,
} SurfaceEndian;
typedef enum ArrayMode {
	ARRAY_LINEAR_GENERAL                             = 0x0,
	ARRAY_LINEAR_ALIGNED                             = 0x1,
	ARRAY_1D_TILED_THIN1                             = 0x2,
	ARRAY_1D_TILED_THICK                             = 0x3,
	ARRAY_2D_TILED_THIN1                             = 0x4,
	ARRAY_PRT_TILED_THIN1                            = 0x5,
	ARRAY_PRT_2D_TILED_THIN1                         = 0x6,
	ARRAY_2D_TILED_THICK                             = 0x7,
	ARRAY_2D_TILED_XTHICK                            = 0x8,
	ARRAY_PRT_TILED_THICK                            = 0x9,
	ARRAY_PRT_2D_TILED_THICK                         = 0xa,
	ARRAY_PRT_3D_TILED_THIN1                         = 0xb,
	ARRAY_3D_TILED_THIN1                             = 0xc,
	ARRAY_3D_TILED_THICK                             = 0xd,
	ARRAY_3D_TILED_XTHICK                            = 0xe,
	ARRAY_PRT_3D_TILED_THICK                         = 0xf,
} ArrayMode;
typedef enum PipeTiling {
	CONFIG_1_PIPE                                    = 0x0,
	CONFIG_2_PIPE                                    = 0x1,
	CONFIG_4_PIPE                                    = 0x2,
	CONFIG_8_PIPE                                    = 0x3,
} PipeTiling;
typedef enum BankTiling {
	CONFIG_4_BANK                                    = 0x0,
	CONFIG_8_BANK                                    = 0x1,
} BankTiling;
typedef enum GroupInterleave {
	CONFIG_256B_GROUP                                = 0x0,
	CONFIG_512B_GROUP                                = 0x1,
} GroupInterleave;
typedef enum RowTiling {
	CONFIG_1KB_ROW                                   = 0x0,
	CONFIG_2KB_ROW                                   = 0x1,
	CONFIG_4KB_ROW                                   = 0x2,
	CONFIG_8KB_ROW                                   = 0x3,
	CONFIG_1KB_ROW_OPT                               = 0x4,
	CONFIG_2KB_ROW_OPT                               = 0x5,
	CONFIG_4KB_ROW_OPT                               = 0x6,
	CONFIG_8KB_ROW_OPT                               = 0x7,
} RowTiling;
typedef enum BankSwapBytes {
	CONFIG_128B_SWAPS                                = 0x0,
	CONFIG_256B_SWAPS                                = 0x1,
	CONFIG_512B_SWAPS                                = 0x2,
	CONFIG_1KB_SWAPS                                 = 0x3,
} BankSwapBytes;
typedef enum SampleSplitBytes {
	CONFIG_1KB_SPLIT                                 = 0x0,
	CONFIG_2KB_SPLIT                                 = 0x1,
	CONFIG_4KB_SPLIT                                 = 0x2,
	CONFIG_8KB_SPLIT                                 = 0x3,
} SampleSplitBytes;
typedef enum NumPipes {
	ADDR_CONFIG_1_PIPE                               = 0x0,
	ADDR_CONFIG_2_PIPE                               = 0x1,
	ADDR_CONFIG_4_PIPE                               = 0x2,
	ADDR_CONFIG_8_PIPE                               = 0x3,
	ADDR_CONFIG_16_PIPE                              = 0x4,
} NumPipes;
typedef enum PipeInterleaveSize {
	ADDR_CONFIG_PIPE_INTERLEAVE_256B                 = 0x0,
	ADDR_CONFIG_PIPE_INTERLEAVE_512B                 = 0x1,
} PipeInterleaveSize;
typedef enum BankInterleaveSize {
	ADDR_CONFIG_BANK_INTERLEAVE_1                    = 0x0,
	ADDR_CONFIG_BANK_INTERLEAVE_2                    = 0x1,
	ADDR_CONFIG_BANK_INTERLEAVE_4                    = 0x2,
	ADDR_CONFIG_BANK_INTERLEAVE_8                    = 0x3,
} BankInterleaveSize;
typedef enum NumShaderEngines {
	ADDR_CONFIG_1_SHADER_ENGINE                      = 0x0,
	ADDR_CONFIG_2_SHADER_ENGINE                      = 0x1,
} NumShaderEngines;
typedef enum ShaderEngineTileSize {
	ADDR_CONFIG_SE_TILE_16                           = 0x0,
	ADDR_CONFIG_SE_TILE_32                           = 0x1,
} ShaderEngineTileSize;
typedef enum NumGPUs {
	ADDR_CONFIG_1_GPU                                = 0x0,
	ADDR_CONFIG_2_GPU                                = 0x1,
	ADDR_CONFIG_4_GPU                                = 0x2,
} NumGPUs;
typedef enum MultiGPUTileSize {
	ADDR_CONFIG_GPU_TILE_16                          = 0x0,
	ADDR_CONFIG_GPU_TILE_32                          = 0x1,
	ADDR_CONFIG_GPU_TILE_64                          = 0x2,
	ADDR_CONFIG_GPU_TILE_128                         = 0x3,
} MultiGPUTileSize;
typedef enum RowSize {
	ADDR_CONFIG_1KB_ROW                              = 0x0,
	ADDR_CONFIG_2KB_ROW                              = 0x1,
	ADDR_CONFIG_4KB_ROW                              = 0x2,
} RowSize;
typedef enum NumLowerPipes {
	ADDR_CONFIG_1_LOWER_PIPES                        = 0x0,
	ADDR_CONFIG_2_LOWER_PIPES                        = 0x1,
} NumLowerPipes;
typedef enum DebugBlockId {
	DBG_CLIENT_BLKID_RESERVED                        = 0x0,
	DBG_CLIENT_BLKID_dbg                             = 0x1,
	DBG_CLIENT_BLKID_dco0                            = 0x2,
	DBG_CLIENT_BLKID_wd                              = 0x3,
	DBG_CLIENT_BLKID_vmc                             = 0x4,
	DBG_CLIENT_BLKID_scf2                            = 0x5,
	DBG_CLIENT_BLKID_spim3                           = 0x6,
	DBG_CLIENT_BLKID_cb3                             = 0x7,
	DBG_CLIENT_BLKID_sx0                             = 0x8,
	DBG_CLIENT_BLKID_cb2                             = 0x9,
	DBG_CLIENT_BLKID_bci1                            = 0xa,
	DBG_CLIENT_BLKID_xdma                            = 0xb,
	DBG_CLIENT_BLKID_bci0                            = 0xc,
	DBG_CLIENT_BLKID_spim0                           = 0xd,
	DBG_CLIENT_BLKID_mcd0                            = 0xe,
	DBG_CLIENT_BLKID_mcc0                            = 0xf,
	DBG_CLIENT_BLKID_cb0                             = 0x10,
	DBG_CLIENT_BLKID_cb1                             = 0x11,
	DBG_CLIENT_BLKID_cpc_0                           = 0x12,
	DBG_CLIENT_BLKID_cpc_1                           = 0x13,
	DBG_CLIENT_BLKID_cpf                             = 0x14,
	DBG_CLIENT_BLKID_rlc                             = 0x15,
	DBG_CLIENT_BLKID_grbm                            = 0x16,
	DBG_CLIENT_BLKID_bif                             = 0x17,
	DBG_CLIENT_BLKID_scf1                            = 0x18,
	DBG_CLIENT_BLKID_sam                             = 0x19,
	DBG_CLIENT_BLKID_mcd4                            = 0x1a,
	DBG_CLIENT_BLKID_mcc4                            = 0x1b,
	DBG_CLIENT_BLKID_gmcon                           = 0x1c,
	DBG_CLIENT_BLKID_mcb                             = 0x1d,
	DBG_CLIENT_BLKID_vgt0                            = 0x1e,
	DBG_CLIENT_BLKID_pc0                             = 0x1f,
	DBG_CLIENT_BLKID_spim1                           = 0x20,
	DBG_CLIENT_BLKID_bci2                            = 0x21,
	DBG_CLIENT_BLKID_mcd6                            = 0x22,
	DBG_CLIENT_BLKID_mcc6                            = 0x23,
	DBG_CLIENT_BLKID_mcd3                            = 0x24,
	DBG_CLIENT_BLKID_mcc3                            = 0x25,
	DBG_CLIENT_BLKID_uvdm_0                          = 0x26,
	DBG_CLIENT_BLKID_uvdm_1                          = 0x27,
	DBG_CLIENT_BLKID_uvdm_2                          = 0x28,
	DBG_CLIENT_BLKID_uvdm_3                          = 0x29,
	DBG_CLIENT_BLKID_spim2                           = 0x2a,
	DBG_CLIENT_BLKID_ds                              = 0x2b,
	DBG_CLIENT_BLKID_srbm                            = 0x2c,
	DBG_CLIENT_BLKID_ih                              = 0x2d,
	DBG_CLIENT_BLKID_sem                             = 0x2e,
	DBG_CLIENT_BLKID_sdma_0                          = 0x2f,
	DBG_CLIENT_BLKID_sdma_1                          = 0x30,
	DBG_CLIENT_BLKID_hdp                             = 0x31,
	DBG_CLIENT_BLKID_acp_0                           = 0x32,
	DBG_CLIENT_BLKID_acp_1                           = 0x33,
	DBG_CLIENT_BLKID_vceb_0                          = 0x34,
	DBG_CLIENT_BLKID_vceb_1                          = 0x35,
	DBG_CLIENT_BLKID_vceb_2                          = 0x36,
	DBG_CLIENT_BLKID_mcd2                            = 0x37,
	DBG_CLIENT_BLKID_mcc2                            = 0x38,
	DBG_CLIENT_BLKID_scf3                            = 0x39,
	DBG_CLIENT_BLKID_bci3                            = 0x3a,
	DBG_CLIENT_BLKID_mcd5                            = 0x3b,
	DBG_CLIENT_BLKID_mcc5                            = 0x3c,
	DBG_CLIENT_BLKID_vgt2                            = 0x3d,
	DBG_CLIENT_BLKID_pc2                             = 0x3e,
	DBG_CLIENT_BLKID_smu_0                           = 0x3f,
	DBG_CLIENT_BLKID_smu_1                           = 0x40,
	DBG_CLIENT_BLKID_smu_2                           = 0x41,
	DBG_CLIENT_BLKID_vcea_0                          = 0x42,
	DBG_CLIENT_BLKID_vcea_1                          = 0x43,
	DBG_CLIENT_BLKID_vcea_2                          = 0x44,
	DBG_CLIENT_BLKID_vcea_3                          = 0x45,
	DBG_CLIENT_BLKID_vcea_4                          = 0x46,
	DBG_CLIENT_BLKID_vcea_5                          = 0x47,
	DBG_CLIENT_BLKID_vcea_6                          = 0x48,
	DBG_CLIENT_BLKID_scf0                            = 0x49,
	DBG_CLIENT_BLKID_vgt1                            = 0x4a,
	DBG_CLIENT_BLKID_pc1                             = 0x4b,
	DBG_CLIENT_BLKID_gdc_0                           = 0x4c,
	DBG_CLIENT_BLKID_gdc_1                           = 0x4d,
	DBG_CLIENT_BLKID_gdc_2                           = 0x4e,
	DBG_CLIENT_BLKID_gdc_3                           = 0x4f,
	DBG_CLIENT_BLKID_gdc_4                           = 0x50,
	DBG_CLIENT_BLKID_gdc_5                           = 0x51,
	DBG_CLIENT_BLKID_gdc_6                           = 0x52,
	DBG_CLIENT_BLKID_gdc_7                           = 0x53,
	DBG_CLIENT_BLKID_gdc_8                           = 0x54,
	DBG_CLIENT_BLKID_gdc_9                           = 0x55,
	DBG_CLIENT_BLKID_gdc_10                          = 0x56,
	DBG_CLIENT_BLKID_gdc_11                          = 0x57,
	DBG_CLIENT_BLKID_gdc_12                          = 0x58,
	DBG_CLIENT_BLKID_gdc_13                          = 0x59,
	DBG_CLIENT_BLKID_gdc_14                          = 0x5a,
	DBG_CLIENT_BLKID_gdc_15                          = 0x5b,
	DBG_CLIENT_BLKID_gdc_16                          = 0x5c,
	DBG_CLIENT_BLKID_gdc_17                          = 0x5d,
	DBG_CLIENT_BLKID_gdc_18                          = 0x5e,
	DBG_CLIENT_BLKID_gdc_19                          = 0x5f,
	DBG_CLIENT_BLKID_gdc_20                          = 0x60,
	DBG_CLIENT_BLKID_gdc_21                          = 0x61,
	DBG_CLIENT_BLKID_gdc_22                          = 0x62,
	DBG_CLIENT_BLKID_vgt3                            = 0x63,
	DBG_CLIENT_BLKID_pc3                             = 0x64,
	DBG_CLIENT_BLKID_uvdu_0                          = 0x65,
	DBG_CLIENT_BLKID_uvdu_1                          = 0x66,
	DBG_CLIENT_BLKID_uvdu_2                          = 0x67,
	DBG_CLIENT_BLKID_uvdu_3                          = 0x68,
	DBG_CLIENT_BLKID_uvdu_4                          = 0x69,
	DBG_CLIENT_BLKID_uvdu_5                          = 0x6a,
	DBG_CLIENT_BLKID_uvdu_6                          = 0x6b,
	DBG_CLIENT_BLKID_mcd7                            = 0x6c,
	DBG_CLIENT_BLKID_mcc7                            = 0x6d,
	DBG_CLIENT_BLKID_cpg_0                           = 0x6e,
	DBG_CLIENT_BLKID_cpg_1                           = 0x6f,
	DBG_CLIENT_BLKID_gck                             = 0x70,
	DBG_CLIENT_BLKID_mcd1                            = 0x71,
	DBG_CLIENT_BLKID_mcc1                            = 0x72,
	DBG_CLIENT_BLKID_cb101                           = 0x73,
	DBG_CLIENT_BLKID_cb103                           = 0x74,
	DBG_CLIENT_BLKID_sx10                            = 0x75,
	DBG_CLIENT_BLKID_cb102                           = 0x76,
	DBG_CLIENT_BLKID_cb002                           = 0x77,
	DBG_CLIENT_BLKID_cb100                           = 0x78,
	DBG_CLIENT_BLKID_cb000                           = 0x79,
	DBG_CLIENT_BLKID_pa00                            = 0x7a,
	DBG_CLIENT_BLKID_pa10                            = 0x7b,
	DBG_CLIENT_BLKID_ia0                             = 0x7c,
	DBG_CLIENT_BLKID_ia1                             = 0x7d,
	DBG_CLIENT_BLKID_tmonw00                         = 0x7e,
	DBG_CLIENT_BLKID_cb001                           = 0x7f,
	DBG_CLIENT_BLKID_cb003                           = 0x80,
	DBG_CLIENT_BLKID_sx00                            = 0x81,
	DBG_CLIENT_BLKID_sx20                            = 0x82,
	DBG_CLIENT_BLKID_cb203                           = 0x83,
	DBG_CLIENT_BLKID_cb201                           = 0x84,
	DBG_CLIENT_BLKID_cb302                           = 0x85,
	DBG_CLIENT_BLKID_cb202                           = 0x86,
	DBG_CLIENT_BLKID_cb300                           = 0x87,
	DBG_CLIENT_BLKID_cb200                           = 0x88,
	DBG_CLIENT_BLKID_pa01                            = 0x89,
	DBG_CLIENT_BLKID_pa11                            = 0x8a,
	DBG_CLIENT_BLKID_sx30                            = 0x8b,
	DBG_CLIENT_BLKID_cb303                           = 0x8c,
	DBG_CLIENT_BLKID_cb301                           = 0x8d,
	DBG_CLIENT_BLKID_dco                             = 0x8e,
	DBG_CLIENT_BLKID_scb0                            = 0x8f,
	DBG_CLIENT_BLKID_scb1                            = 0x90,
	DBG_CLIENT_BLKID_scb2                            = 0x91,
	DBG_CLIENT_BLKID_scb3                            = 0x92,
	DBG_CLIENT_BLKID_tmonw01                         = 0x93,
	DBG_CLIENT_BLKID_RESERVED_LAST                   = 0x94,
} DebugBlockId;
typedef enum DebugBlockId_OLD {
	DBG_BLOCK_ID_RESERVED                            = 0x0,
	DBG_BLOCK_ID_DBG                                 = 0x1,
	DBG_BLOCK_ID_VMC                                 = 0x2,
	DBG_BLOCK_ID_PDMA                                = 0x3,
	DBG_BLOCK_ID_CG                                  = 0x4,
	DBG_BLOCK_ID_SRBM                                = 0x5,
	DBG_BLOCK_ID_GRBM                                = 0x6,
	DBG_BLOCK_ID_RLC                                 = 0x7,
	DBG_BLOCK_ID_CSC                                 = 0x8,
	DBG_BLOCK_ID_SEM                                 = 0x9,
	DBG_BLOCK_ID_IH                                  = 0xa,
	DBG_BLOCK_ID_SC                                  = 0xb,
	DBG_BLOCK_ID_SQ                                  = 0xc,
	DBG_BLOCK_ID_AVP                                 = 0xd,
	DBG_BLOCK_ID_GMCON                               = 0xe,
	DBG_BLOCK_ID_SMU                                 = 0xf,
	DBG_BLOCK_ID_DMA0                                = 0x10,
	DBG_BLOCK_ID_DMA1                                = 0x11,
	DBG_BLOCK_ID_SPIM                                = 0x12,
	DBG_BLOCK_ID_GDS                                 = 0x13,
	DBG_BLOCK_ID_SPIS                                = 0x14,
	DBG_BLOCK_ID_UNUSED0                             = 0x15,
	DBG_BLOCK_ID_PA0                                 = 0x16,
	DBG_BLOCK_ID_PA1                                 = 0x17,
	DBG_BLOCK_ID_CP0                                 = 0x18,
	DBG_BLOCK_ID_CP1                                 = 0x19,
	DBG_BLOCK_ID_CP2                                 = 0x1a,
	DBG_BLOCK_ID_UNUSED1                             = 0x1b,
	DBG_BLOCK_ID_UVDU                                = 0x1c,
	DBG_BLOCK_ID_UVDM                                = 0x1d,
	DBG_BLOCK_ID_VCE                                 = 0x1e,
	DBG_BLOCK_ID_UNUSED2                             = 0x1f,
	DBG_BLOCK_ID_VGT0                                = 0x20,
	DBG_BLOCK_ID_VGT1                                = 0x21,
	DBG_BLOCK_ID_IA                                  = 0x22,
	DBG_BLOCK_ID_UNUSED3                             = 0x23,
	DBG_BLOCK_ID_SCT0                                = 0x24,
	DBG_BLOCK_ID_SCT1                                = 0x25,
	DBG_BLOCK_ID_SPM0                                = 0x26,
	DBG_BLOCK_ID_SPM1                                = 0x27,
	DBG_BLOCK_ID_TCAA                                = 0x28,
	DBG_BLOCK_ID_TCAB                                = 0x29,
	DBG_BLOCK_ID_TCCA                                = 0x2a,
	DBG_BLOCK_ID_TCCB                                = 0x2b,
	DBG_BLOCK_ID_MCC0                                = 0x2c,
	DBG_BLOCK_ID_MCC1                                = 0x2d,
	DBG_BLOCK_ID_MCC2                                = 0x2e,
	DBG_BLOCK_ID_MCC3                                = 0x2f,
	DBG_BLOCK_ID_SX0                                 = 0x30,
	DBG_BLOCK_ID_SX1                                 = 0x31,
	DBG_BLOCK_ID_SX2                                 = 0x32,
	DBG_BLOCK_ID_SX3                                 = 0x33,
	DBG_BLOCK_ID_UNUSED4                             = 0x34,
	DBG_BLOCK_ID_UNUSED5                             = 0x35,
	DBG_BLOCK_ID_UNUSED6                             = 0x36,
	DBG_BLOCK_ID_UNUSED7                             = 0x37,
	DBG_BLOCK_ID_PC0                                 = 0x38,
	DBG_BLOCK_ID_PC1                                 = 0x39,
	DBG_BLOCK_ID_UNUSED8                             = 0x3a,
	DBG_BLOCK_ID_UNUSED9                             = 0x3b,
	DBG_BLOCK_ID_UNUSED10                            = 0x3c,
	DBG_BLOCK_ID_UNUSED11                            = 0x3d,
	DBG_BLOCK_ID_MCB                                 = 0x3e,
	DBG_BLOCK_ID_UNUSED12                            = 0x3f,
	DBG_BLOCK_ID_SCB0                                = 0x40,
	DBG_BLOCK_ID_SCB1                                = 0x41,
	DBG_BLOCK_ID_UNUSED13                            = 0x42,
	DBG_BLOCK_ID_UNUSED14                            = 0x43,
	DBG_BLOCK_ID_SCF0                                = 0x44,
	DBG_BLOCK_ID_SCF1                                = 0x45,
	DBG_BLOCK_ID_UNUSED15                            = 0x46,
	DBG_BLOCK_ID_UNUSED16                            = 0x47,
	DBG_BLOCK_ID_BCI0                                = 0x48,
	DBG_BLOCK_ID_BCI1                                = 0x49,
	DBG_BLOCK_ID_BCI2                                = 0x4a,
	DBG_BLOCK_ID_BCI3                                = 0x4b,
	DBG_BLOCK_ID_UNUSED17                            = 0x4c,
	DBG_BLOCK_ID_UNUSED18                            = 0x4d,
	DBG_BLOCK_ID_UNUSED19                            = 0x4e,
	DBG_BLOCK_ID_UNUSED20                            = 0x4f,
	DBG_BLOCK_ID_CB00                                = 0x50,
	DBG_BLOCK_ID_CB01                                = 0x51,
	DBG_BLOCK_ID_CB02                                = 0x52,
	DBG_BLOCK_ID_CB03                                = 0x53,
	DBG_BLOCK_ID_CB04                                = 0x54,
	DBG_BLOCK_ID_UNUSED21                            = 0x55,
	DBG_BLOCK_ID_UNUSED22                            = 0x56,
	DBG_BLOCK_ID_UNUSED23                            = 0x57,
	DBG_BLOCK_ID_CB10                                = 0x58,
	DBG_BLOCK_ID_CB11                                = 0x59,
	DBG_BLOCK_ID_CB12                                = 0x5a,
	DBG_BLOCK_ID_CB13                                = 0x5b,
	DBG_BLOCK_ID_CB14                                = 0x5c,
	DBG_BLOCK_ID_UNUSED24                            = 0x5d,
	DBG_BLOCK_ID_UNUSED25                            = 0x5e,
	DBG_BLOCK_ID_UNUSED26                            = 0x5f,
	DBG_BLOCK_ID_TCP0                                = 0x60,
	DBG_BLOCK_ID_TCP1                                = 0x61,
	DBG_BLOCK_ID_TCP2                                = 0x62,
	DBG_BLOCK_ID_TCP3                                = 0x63,
	DBG_BLOCK_ID_TCP4                                = 0x64,
	DBG_BLOCK_ID_TCP5                                = 0x65,
	DBG_BLOCK_ID_TCP6                                = 0x66,
	DBG_BLOCK_ID_TCP7                                = 0x67,
	DBG_BLOCK_ID_TCP8                                = 0x68,
	DBG_BLOCK_ID_TCP9                                = 0x69,
	DBG_BLOCK_ID_TCP10                               = 0x6a,
	DBG_BLOCK_ID_TCP11                               = 0x6b,
	DBG_BLOCK_ID_TCP12                               = 0x6c,
	DBG_BLOCK_ID_TCP13                               = 0x6d,
	DBG_BLOCK_ID_TCP14                               = 0x6e,
	DBG_BLOCK_ID_TCP15                               = 0x6f,
	DBG_BLOCK_ID_TCP16                               = 0x70,
	DBG_BLOCK_ID_TCP17                               = 0x71,
	DBG_BLOCK_ID_TCP18                               = 0x72,
	DBG_BLOCK_ID_TCP19                               = 0x73,
	DBG_BLOCK_ID_TCP20                               = 0x74,
	DBG_BLOCK_ID_TCP21                               = 0x75,
	DBG_BLOCK_ID_TCP22                               = 0x76,
	DBG_BLOCK_ID_TCP23                               = 0x77,
	DBG_BLOCK_ID_TCP_RESERVED0                       = 0x78,
	DBG_BLOCK_ID_TCP_RESERVED1                       = 0x79,
	DBG_BLOCK_ID_TCP_RESERVED2                       = 0x7a,
	DBG_BLOCK_ID_TCP_RESERVED3                       = 0x7b,
	DBG_BLOCK_ID_TCP_RESERVED4                       = 0x7c,
	DBG_BLOCK_ID_TCP_RESERVED5                       = 0x7d,
	DBG_BLOCK_ID_TCP_RESERVED6                       = 0x7e,
	DBG_BLOCK_ID_TCP_RESERVED7                       = 0x7f,
	DBG_BLOCK_ID_DB00                                = 0x80,
	DBG_BLOCK_ID_DB01                                = 0x81,
	DBG_BLOCK_ID_DB02                                = 0x82,
	DBG_BLOCK_ID_DB03                                = 0x83,
	DBG_BLOCK_ID_DB04                                = 0x84,
	DBG_BLOCK_ID_UNUSED27                            = 0x85,
	DBG_BLOCK_ID_UNUSED28                            = 0x86,
	DBG_BLOCK_ID_UNUSED29                            = 0x87,
	DBG_BLOCK_ID_DB10                                = 0x88,
	DBG_BLOCK_ID_DB11                                = 0x89,
	DBG_BLOCK_ID_DB12                                = 0x8a,
	DBG_BLOCK_ID_DB13                                = 0x8b,
	DBG_BLOCK_ID_DB14                                = 0x8c,
	DBG_BLOCK_ID_UNUSED30                            = 0x8d,
	DBG_BLOCK_ID_UNUSED31                            = 0x8e,
	DBG_BLOCK_ID_UNUSED32                            = 0x8f,
	DBG_BLOCK_ID_TCC0                                = 0x90,
	DBG_BLOCK_ID_TCC1                                = 0x91,
	DBG_BLOCK_ID_TCC2                                = 0x92,
	DBG_BLOCK_ID_TCC3                                = 0x93,
	DBG_BLOCK_ID_TCC4                                = 0x94,
	DBG_BLOCK_ID_TCC5                                = 0x95,
	DBG_BLOCK_ID_TCC6                                = 0x96,
	DBG_BLOCK_ID_TCC7                                = 0x97,
	DBG_BLOCK_ID_SPS00                               = 0x98,
	DBG_BLOCK_ID_SPS01                               = 0x99,
	DBG_BLOCK_ID_SPS02                               = 0x9a,
	DBG_BLOCK_ID_SPS10                               = 0x9b,
	DBG_BLOCK_ID_SPS11                               = 0x9c,
	DBG_BLOCK_ID_SPS12                               = 0x9d,
	DBG_BLOCK_ID_UNUSED33                            = 0x9e,
	DBG_BLOCK_ID_UNUSED34                            = 0x9f,
	DBG_BLOCK_ID_TA00                                = 0xa0,
	DBG_BLOCK_ID_TA01                                = 0xa1,
	DBG_BLOCK_ID_TA02                                = 0xa2,
	DBG_BLOCK_ID_TA03                                = 0xa3,
	DBG_BLOCK_ID_TA04                                = 0xa4,
	DBG_BLOCK_ID_TA05                                = 0xa5,
	DBG_BLOCK_ID_TA06                                = 0xa6,
	DBG_BLOCK_ID_TA07                                = 0xa7,
	DBG_BLOCK_ID_TA08                                = 0xa8,
	DBG_BLOCK_ID_TA09                                = 0xa9,
	DBG_BLOCK_ID_TA0A                                = 0xaa,
	DBG_BLOCK_ID_TA0B                                = 0xab,
	DBG_BLOCK_ID_UNUSED35                            = 0xac,
	DBG_BLOCK_ID_UNUSED36                            = 0xad,
	DBG_BLOCK_ID_UNUSED37                            = 0xae,
	DBG_BLOCK_ID_UNUSED38                            = 0xaf,
	DBG_BLOCK_ID_TA10                                = 0xb0,
	DBG_BLOCK_ID_TA11                                = 0xb1,
	DBG_BLOCK_ID_TA12                                = 0xb2,
	DBG_BLOCK_ID_TA13                                = 0xb3,
	DBG_BLOCK_ID_TA14                                = 0xb4,
	DBG_BLOCK_ID_TA15                                = 0xb5,
	DBG_BLOCK_ID_TA16                                = 0xb6,
	DBG_BLOCK_ID_TA17                                = 0xb7,
	DBG_BLOCK_ID_TA18                                = 0xb8,
	DBG_BLOCK_ID_TA19                                = 0xb9,
	DBG_BLOCK_ID_TA1A                                = 0xba,
	DBG_BLOCK_ID_TA1B                                = 0xbb,
	DBG_BLOCK_ID_UNUSED39                            = 0xbc,
	DBG_BLOCK_ID_UNUSED40                            = 0xbd,
	DBG_BLOCK_ID_UNUSED41                            = 0xbe,
	DBG_BLOCK_ID_UNUSED42                            = 0xbf,
	DBG_BLOCK_ID_TD00                                = 0xc0,
	DBG_BLOCK_ID_TD01                                = 0xc1,
	DBG_BLOCK_ID_TD02                                = 0xc2,
	DBG_BLOCK_ID_TD03                                = 0xc3,
	DBG_BLOCK_ID_TD04                                = 0xc4,
	DBG_BLOCK_ID_TD05                                = 0xc5,
	DBG_BLOCK_ID_TD06                                = 0xc6,
	DBG_BLOCK_ID_TD07                                = 0xc7,
	DBG_BLOCK_ID_TD08                                = 0xc8,
	DBG_BLOCK_ID_TD09                                = 0xc9,
	DBG_BLOCK_ID_TD0A                                = 0xca,
	DBG_BLOCK_ID_TD0B                                = 0xcb,
	DBG_BLOCK_ID_UNUSED43                            = 0xcc,
	DBG_BLOCK_ID_UNUSED44                            = 0xcd,
	DBG_BLOCK_ID_UNUSED45                            = 0xce,
	DBG_BLOCK_ID_UNUSED46                            = 0xcf,
	DBG_BLOCK_ID_TD10                                = 0xd0,
	DBG_BLOCK_ID_TD11                                = 0xd1,
	DBG_BLOCK_ID_TD12                                = 0xd2,
	DBG_BLOCK_ID_TD13                                = 0xd3,
	DBG_BLOCK_ID_TD14                                = 0xd4,
	DBG_BLOCK_ID_TD15                                = 0xd5,
	DBG_BLOCK_ID_TD16                                = 0xd6,
	DBG_BLOCK_ID_TD17                                = 0xd7,
	DBG_BLOCK_ID_TD18                                = 0xd8,
	DBG_BLOCK_ID_TD19                                = 0xd9,
	DBG_BLOCK_ID_TD1A                                = 0xda,
	DBG_BLOCK_ID_TD1B                                = 0xdb,
	DBG_BLOCK_ID_UNUSED47                            = 0xdc,
	DBG_BLOCK_ID_UNUSED48                            = 0xdd,
	DBG_BLOCK_ID_UNUSED49                            = 0xde,
	DBG_BLOCK_ID_UNUSED50                            = 0xdf,
	DBG_BLOCK_ID_MCD0                                = 0xe0,
	DBG_BLOCK_ID_MCD1                                = 0xe1,
	DBG_BLOCK_ID_MCD2                                = 0xe2,
	DBG_BLOCK_ID_MCD3                                = 0xe3,
	DBG_BLOCK_ID_MCD4                                = 0xe4,
	DBG_BLOCK_ID_MCD5                                = 0xe5,
	DBG_BLOCK_ID_UNUSED51                            = 0xe6,
	DBG_BLOCK_ID_UNUSED52                            = 0xe7,
} DebugBlockId_OLD;
typedef enum DebugBlockId_BY2 {
	DBG_BLOCK_ID_RESERVED_BY2                        = 0x0,
	DBG_BLOCK_ID_VMC_BY2                             = 0x1,
	DBG_BLOCK_ID_CG_BY2                              = 0x2,
	DBG_BLOCK_ID_GRBM_BY2                            = 0x3,
	DBG_BLOCK_ID_CSC_BY2                             = 0x4,
	DBG_BLOCK_ID_IH_BY2                              = 0x5,
	DBG_BLOCK_ID_SQ_BY2                              = 0x6,
	DBG_BLOCK_ID_GMCON_BY2                           = 0x7,
	DBG_BLOCK_ID_DMA0_BY2                            = 0x8,
	DBG_BLOCK_ID_SPIM_BY2                            = 0x9,
	DBG_BLOCK_ID_SPIS_BY2                            = 0xa,
	DBG_BLOCK_ID_PA0_BY2                             = 0xb,
	DBG_BLOCK_ID_CP0_BY2                             = 0xc,
	DBG_BLOCK_ID_CP2_BY2                             = 0xd,
	DBG_BLOCK_ID_UVDU_BY2                            = 0xe,
	DBG_BLOCK_ID_VCE_BY2                             = 0xf,
	DBG_BLOCK_ID_VGT0_BY2                            = 0x10,
	DBG_BLOCK_ID_IA_BY2                              = 0x11,
	DBG_BLOCK_ID_SCT0_BY2                            = 0x12,
	DBG_BLOCK_ID_SPM0_BY2                            = 0x13,
	DBG_BLOCK_ID_TCAA_BY2                            = 0x14,
	DBG_BLOCK_ID_TCCA_BY2                            = 0x15,
	DBG_BLOCK_ID_MCC0_BY2                            = 0x16,
	DBG_BLOCK_ID_MCC2_BY2                            = 0x17,
	DBG_BLOCK_ID_SX0_BY2                             = 0x18,
	DBG_BLOCK_ID_SX2_BY2                             = 0x19,
	DBG_BLOCK_ID_UNUSED4_BY2                         = 0x1a,
	DBG_BLOCK_ID_UNUSED6_BY2                         = 0x1b,
	DBG_BLOCK_ID_PC0_BY2                             = 0x1c,
	DBG_BLOCK_ID_UNUSED8_BY2                         = 0x1d,
	DBG_BLOCK_ID_UNUSED10_BY2                        = 0x1e,
	DBG_BLOCK_ID_MCB_BY2                             = 0x1f,
	DBG_BLOCK_ID_SCB0_BY2                            = 0x20,
	DBG_BLOCK_ID_UNUSED13_BY2                        = 0x21,
	DBG_BLOCK_ID_SCF0_BY2                            = 0x22,
	DBG_BLOCK_ID_UNUSED15_BY2                        = 0x23,
	DBG_BLOCK_ID_BCI0_BY2                            = 0x24,
	DBG_BLOCK_ID_BCI2_BY2                            = 0x25,
	DBG_BLOCK_ID_UNUSED17_BY2                        = 0x26,
	DBG_BLOCK_ID_UNUSED19_BY2                        = 0x27,
	DBG_BLOCK_ID_CB00_BY2                            = 0x28,
	DBG_BLOCK_ID_CB02_BY2                            = 0x29,
	DBG_BLOCK_ID_CB04_BY2                            = 0x2a,
	DBG_BLOCK_ID_UNUSED22_BY2                        = 0x2b,
	DBG_BLOCK_ID_CB10_BY2                            = 0x2c,
	DBG_BLOCK_ID_CB12_BY2                            = 0x2d,
	DBG_BLOCK_ID_CB14_BY2                            = 0x2e,
	DBG_BLOCK_ID_UNUSED25_BY2                        = 0x2f,
	DBG_BLOCK_ID_TCP0_BY2                            = 0x30,
	DBG_BLOCK_ID_TCP2_BY2                            = 0x31,
	DBG_BLOCK_ID_TCP4_BY2                            = 0x32,
	DBG_BLOCK_ID_TCP6_BY2                            = 0x33,
	DBG_BLOCK_ID_TCP8_BY2                            = 0x34,
	DBG_BLOCK_ID_TCP10_BY2                           = 0x35,
	DBG_BLOCK_ID_TCP12_BY2                           = 0x36,
	DBG_BLOCK_ID_TCP14_BY2                           = 0x37,
	DBG_BLOCK_ID_TCP16_BY2                           = 0x38,
	DBG_BLOCK_ID_TCP18_BY2                           = 0x39,
	DBG_BLOCK_ID_TCP20_BY2                           = 0x3a,
	DBG_BLOCK_ID_TCP22_BY2                           = 0x3b,
	DBG_BLOCK_ID_TCP_RESERVED0_BY2                   = 0x3c,
	DBG_BLOCK_ID_TCP_RESERVED2_BY2                   = 0x3d,
	DBG_BLOCK_ID_TCP_RESERVED4_BY2                   = 0x3e,
	DBG_BLOCK_ID_TCP_RESERVED6_BY2                   = 0x3f,
	DBG_BLOCK_ID_DB00_BY2                            = 0x40,
	DBG_BLOCK_ID_DB02_BY2                            = 0x41,
	DBG_BLOCK_ID_DB04_BY2                            = 0x42,
	DBG_BLOCK_ID_UNUSED28_BY2                        = 0x43,
	DBG_BLOCK_ID_DB10_BY2                            = 0x44,
	DBG_BLOCK_ID_DB12_BY2                            = 0x45,
	DBG_BLOCK_ID_DB14_BY2                            = 0x46,
	DBG_BLOCK_ID_UNUSED31_BY2                        = 0x47,
	DBG_BLOCK_ID_TCC0_BY2                            = 0x48,
	DBG_BLOCK_ID_TCC2_BY2                            = 0x49,
	DBG_BLOCK_ID_TCC4_BY2                            = 0x4a,
	DBG_BLOCK_ID_TCC6_BY2                            = 0x4b,
	DBG_BLOCK_ID_SPS00_BY2                           = 0x4c,
	DBG_BLOCK_ID_SPS02_BY2                           = 0x4d,
	DBG_BLOCK_ID_SPS11_BY2                           = 0x4e,
	DBG_BLOCK_ID_UNUSED33_BY2                        = 0x4f,
	DBG_BLOCK_ID_TA00_BY2                            = 0x50,
	DBG_BLOCK_ID_TA02_BY2                            = 0x51,
	DBG_BLOCK_ID_TA04_BY2                            = 0x52,
	DBG_BLOCK_ID_TA06_BY2                            = 0x53,
	DBG_BLOCK_ID_TA08_BY2                            = 0x54,
	DBG_BLOCK_ID_TA0A_BY2                            = 0x55,
	DBG_BLOCK_ID_UNUSED35_BY2                        = 0x56,
	DBG_BLOCK_ID_UNUSED37_BY2                        = 0x57,
	DBG_BLOCK_ID_TA10_BY2                            = 0x58,
	DBG_BLOCK_ID_TA12_BY2                            = 0x59,
	DBG_BLOCK_ID_TA14_BY2                            = 0x5a,
	DBG_BLOCK_ID_TA16_BY2                            = 0x5b,
	DBG_BLOCK_ID_TA18_BY2                            = 0x5c,
	DBG_BLOCK_ID_TA1A_BY2                            = 0x5d,
	DBG_BLOCK_ID_UNUSED39_BY2                        = 0x5e,
	DBG_BLOCK_ID_UNUSED41_BY2                        = 0x5f,
	DBG_BLOCK_ID_TD00_BY2                            = 0x60,
	DBG_BLOCK_ID_TD02_BY2                            = 0x61,
	DBG_BLOCK_ID_TD04_BY2                            = 0x62,
	DBG_BLOCK_ID_TD06_BY2                            = 0x63,
	DBG_BLOCK_ID_TD08_BY2                            = 0x64,
	DBG_BLOCK_ID_TD0A_BY2                            = 0x65,
	DBG_BLOCK_ID_UNUSED43_BY2                        = 0x66,
	DBG_BLOCK_ID_UNUSED45_BY2                        = 0x67,
	DBG_BLOCK_ID_TD10_BY2                            = 0x68,
	DBG_BLOCK_ID_TD12_BY2                            = 0x69,
	DBG_BLOCK_ID_TD14_BY2                            = 0x6a,
	DBG_BLOCK_ID_TD16_BY2                            = 0x6b,
	DBG_BLOCK_ID_TD18_BY2                            = 0x6c,
	DBG_BLOCK_ID_TD1A_BY2                            = 0x6d,
	DBG_BLOCK_ID_UNUSED47_BY2                        = 0x6e,
	DBG_BLOCK_ID_UNUSED49_BY2                        = 0x6f,
	DBG_BLOCK_ID_MCD0_BY2                            = 0x70,
	DBG_BLOCK_ID_MCD2_BY2                            = 0x71,
	DBG_BLOCK_ID_MCD4_BY2                            = 0x72,
	DBG_BLOCK_ID_UNUSED51_BY2                        = 0x73,
} DebugBlockId_BY2;
typedef enum DebugBlockId_BY4 {
	DBG_BLOCK_ID_RESERVED_BY4                        = 0x0,
	DBG_BLOCK_ID_CG_BY4                              = 0x1,
	DBG_BLOCK_ID_CSC_BY4                             = 0x2,
	DBG_BLOCK_ID_SQ_BY4                              = 0x3,
	DBG_BLOCK_ID_DMA0_BY4                            = 0x4,
	DBG_BLOCK_ID_SPIS_BY4                            = 0x5,
	DBG_BLOCK_ID_CP0_BY4                             = 0x6,
	DBG_BLOCK_ID_UVDU_BY4                            = 0x7,
	DBG_BLOCK_ID_VGT0_BY4                            = 0x8,
	DBG_BLOCK_ID_SCT0_BY4                            = 0x9,
	DBG_BLOCK_ID_TCAA_BY4                            = 0xa,
	DBG_BLOCK_ID_MCC0_BY4                            = 0xb,
	DBG_BLOCK_ID_SX0_BY4                             = 0xc,
	DBG_BLOCK_ID_UNUSED4_BY4                         = 0xd,
	DBG_BLOCK_ID_PC0_BY4                             = 0xe,
	DBG_BLOCK_ID_UNUSED10_BY4                        = 0xf,
	DBG_BLOCK_ID_SCB0_BY4                            = 0x10,
	DBG_BLOCK_ID_SCF0_BY4                            = 0x11,
	DBG_BLOCK_ID_BCI0_BY4                            = 0x12,
	DBG_BLOCK_ID_UNUSED17_BY4                        = 0x13,
	DBG_BLOCK_ID_CB00_BY4                            = 0x14,
	DBG_BLOCK_ID_CB04_BY4                            = 0x15,
	DBG_BLOCK_ID_CB10_BY4                            = 0x16,
	DBG_BLOCK_ID_CB14_BY4                            = 0x17,
	DBG_BLOCK_ID_TCP0_BY4                            = 0x18,
	DBG_BLOCK_ID_TCP4_BY4                            = 0x19,
	DBG_BLOCK_ID_TCP8_BY4                            = 0x1a,
	DBG_BLOCK_ID_TCP12_BY4                           = 0x1b,
	DBG_BLOCK_ID_TCP16_BY4                           = 0x1c,
	DBG_BLOCK_ID_TCP20_BY4                           = 0x1d,
	DBG_BLOCK_ID_TCP_RESERVED0_BY4                   = 0x1e,
	DBG_BLOCK_ID_TCP_RESERVED4_BY4                   = 0x1f,
	DBG_BLOCK_ID_DB_BY4                              = 0x20,
	DBG_BLOCK_ID_DB04_BY4                            = 0x21,
	DBG_BLOCK_ID_DB10_BY4                            = 0x22,
	DBG_BLOCK_ID_DB14_BY4                            = 0x23,
	DBG_BLOCK_ID_TCC0_BY4                            = 0x24,
	DBG_BLOCK_ID_TCC4_BY4                            = 0x25,
	DBG_BLOCK_ID_SPS00_BY4                           = 0x26,
	DBG_BLOCK_ID_SPS11_BY4                           = 0x27,
	DBG_BLOCK_ID_TA00_BY4                            = 0x28,
	DBG_BLOCK_ID_TA04_BY4                            = 0x29,
	DBG_BLOCK_ID_TA08_BY4                            = 0x2a,
	DBG_BLOCK_ID_UNUSED35_BY4                        = 0x2b,
	DBG_BLOCK_ID_TA10_BY4                            = 0x2c,
	DBG_BLOCK_ID_TA14_BY4                            = 0x2d,
	DBG_BLOCK_ID_TA18_BY4                            = 0x2e,
	DBG_BLOCK_ID_UNUSED39_BY4                        = 0x2f,
	DBG_BLOCK_ID_TD00_BY4                            = 0x30,
	DBG_BLOCK_ID_TD04_BY4                            = 0x31,
	DBG_BLOCK_ID_TD08_BY4                            = 0x32,
	DBG_BLOCK_ID_UNUSED43_BY4                        = 0x33,
	DBG_BLOCK_ID_TD10_BY4                            = 0x34,
	DBG_BLOCK_ID_TD14_BY4                            = 0x35,
	DBG_BLOCK_ID_TD18_BY4                            = 0x36,
	DBG_BLOCK_ID_UNUSED47_BY4                        = 0x37,
	DBG_BLOCK_ID_MCD0_BY4                            = 0x38,
	DBG_BLOCK_ID_MCD4_BY4                            = 0x39,
} DebugBlockId_BY4;
typedef enum DebugBlockId_BY8 {
	DBG_BLOCK_ID_RESERVED_BY8                        = 0x0,
	DBG_BLOCK_ID_CSC_BY8                             = 0x1,
	DBG_BLOCK_ID_DMA0_BY8                            = 0x2,
	DBG_BLOCK_ID_CP0_BY8                             = 0x3,
	DBG_BLOCK_ID_VGT0_BY8                            = 0x4,
	DBG_BLOCK_ID_TCAA_BY8                            = 0x5,
	DBG_BLOCK_ID_SX0_BY8                             = 0x6,
	DBG_BLOCK_ID_PC0_BY8                             = 0x7,
	DBG_BLOCK_ID_SCB0_BY8                            = 0x8,
	DBG_BLOCK_ID_BCI0_BY8                            = 0x9,
	DBG_BLOCK_ID_CB00_BY8                            = 0xa,
	DBG_BLOCK_ID_CB10_BY8                            = 0xb,
	DBG_BLOCK_ID_TCP0_BY8                            = 0xc,
	DBG_BLOCK_ID_TCP8_BY8                            = 0xd,
	DBG_BLOCK_ID_TCP16_BY8                           = 0xe,
	DBG_BLOCK_ID_TCP_RESERVED0_BY8                   = 0xf,
	DBG_BLOCK_ID_DB00_BY8                            = 0x10,
	DBG_BLOCK_ID_DB10_BY8                            = 0x11,
	DBG_BLOCK_ID_TCC0_BY8                            = 0x12,
	DBG_BLOCK_ID_SPS00_BY8                           = 0x13,
	DBG_BLOCK_ID_TA00_BY8                            = 0x14,
	DBG_BLOCK_ID_TA08_BY8                            = 0x15,
	DBG_BLOCK_ID_TA10_BY8                            = 0x16,
	DBG_BLOCK_ID_TA18_BY8                            = 0x17,
	DBG_BLOCK_ID_TD00_BY8                            = 0x18,
	DBG_BLOCK_ID_TD08_BY8                            = 0x19,
	DBG_BLOCK_ID_TD10_BY8                            = 0x1a,
	DBG_BLOCK_ID_TD18_BY8                            = 0x1b,
	DBG_BLOCK_ID_MCD0_BY8                            = 0x1c,
} DebugBlockId_BY8;
typedef enum DebugBlockId_BY16 {
	DBG_BLOCK_ID_RESERVED_BY16                       = 0x0,
	DBG_BLOCK_ID_DMA0_BY16                           = 0x1,
	DBG_BLOCK_ID_VGT0_BY16                           = 0x2,
	DBG_BLOCK_ID_SX0_BY16                            = 0x3,
	DBG_BLOCK_ID_SCB0_BY16                           = 0x4,
	DBG_BLOCK_ID_CB00_BY16                           = 0x5,
	DBG_BLOCK_ID_TCP0_BY16                           = 0x6,
	DBG_BLOCK_ID_TCP16_BY16                          = 0x7,
	DBG_BLOCK_ID_DB00_BY16                           = 0x8,
	DBG_BLOCK_ID_TCC0_BY16                           = 0x9,
	DBG_BLOCK_ID_TA00_BY16                           = 0xa,
	DBG_BLOCK_ID_TA10_BY16                           = 0xb,
	DBG_BLOCK_ID_TD00_BY16                           = 0xc,
	DBG_BLOCK_ID_TD10_BY16                           = 0xd,
	DBG_BLOCK_ID_MCD0_BY16                           = 0xe,
} DebugBlockId_BY16;
typedef enum CompareRef {
	REF_NEVER                                        = 0x0,
	REF_LESS                                         = 0x1,
	REF_EQUAL                                        = 0x2,
	REF_LEQUAL                                       = 0x3,
	REF_GREATER                                      = 0x4,
	REF_NOTEQUAL                                     = 0x5,
	REF_GEQUAL                                       = 0x6,
	REF_ALWAYS                                       = 0x7,
} CompareRef;
typedef enum ReadSize {
	READ_256_BITS                                    = 0x0,
	READ_512_BITS                                    = 0x1,
} ReadSize;
typedef enum DepthFormat {
	DEPTH_INVALID                                    = 0x0,
	DEPTH_16                                         = 0x1,
	DEPTH_X8_24                                      = 0x2,
	DEPTH_8_24                                       = 0x3,
	DEPTH_X8_24_FLOAT                                = 0x4,
	DEPTH_8_24_FLOAT                                 = 0x5,
	DEPTH_32_FLOAT                                   = 0x6,
	DEPTH_X24_8_32_FLOAT                             = 0x7,
} DepthFormat;
typedef enum ZFormat {
	Z_INVALID                                        = 0x0,
	Z_16                                             = 0x1,
	Z_24                                             = 0x2,
	Z_32_FLOAT                                       = 0x3,
} ZFormat;
typedef enum StencilFormat {
	STENCIL_INVALID                                  = 0x0,
	STENCIL_8                                        = 0x1,
} StencilFormat;
typedef enum CmaskMode {
	CMASK_CLEAR_NONE                                 = 0x0,
	CMASK_CLEAR_ONE                                  = 0x1,
	CMASK_CLEAR_ALL                                  = 0x2,
	CMASK_ANY_EXPANDED                               = 0x3,
	CMASK_ALPHA0_FRAG1                               = 0x4,
	CMASK_ALPHA0_FRAG2                               = 0x5,
	CMASK_ALPHA0_FRAG4                               = 0x6,
	CMASK_ALPHA0_FRAGS                               = 0x7,
	CMASK_ALPHA1_FRAG1                               = 0x8,
	CMASK_ALPHA1_FRAG2                               = 0x9,
	CMASK_ALPHA1_FRAG4                               = 0xa,
	CMASK_ALPHA1_FRAGS                               = 0xb,
	CMASK_ALPHAX_FRAG1                               = 0xc,
	CMASK_ALPHAX_FRAG2                               = 0xd,
	CMASK_ALPHAX_FRAG4                               = 0xe,
	CMASK_ALPHAX_FRAGS                               = 0xf,
} CmaskMode;
typedef enum QuadExportFormat {
	EXPORT_UNUSED                                    = 0x0,
	EXPORT_32_R                                      = 0x1,
	EXPORT_32_GR                                     = 0x2,
	EXPORT_32_AR                                     = 0x3,
	EXPORT_FP16_ABGR                                 = 0x4,
	EXPORT_UNSIGNED16_ABGR                           = 0x5,
	EXPORT_SIGNED16_ABGR                             = 0x6,
	EXPORT_32_ABGR                                   = 0x7,
} QuadExportFormat;
typedef enum QuadExportFormatOld {
	EXPORT_4P_32BPC_ABGR                             = 0x0,
	EXPORT_4P_16BPC_ABGR                             = 0x1,
	EXPORT_4P_32BPC_GR                               = 0x2,
	EXPORT_4P_32BPC_AR                               = 0x3,
	EXPORT_2P_32BPC_ABGR                             = 0x4,
	EXPORT_8P_32BPC_R                                = 0x5,
} QuadExportFormatOld;
typedef enum ColorFormat {
	COLOR_INVALID                                    = 0x0,
	COLOR_8                                          = 0x1,
	COLOR_16                                         = 0x2,
	COLOR_8_8                                        = 0x3,
	COLOR_32                                         = 0x4,
	COLOR_16_16                                      = 0x5,
	COLOR_10_11_11                                   = 0x6,
	COLOR_11_11_10                                   = 0x7,
	COLOR_10_10_10_2                                 = 0x8,
	COLOR_2_10_10_10                                 = 0x9,
	COLOR_8_8_8_8                                    = 0xa,
	COLOR_32_32                                      = 0xb,
	COLOR_16_16_16_16                                = 0xc,
	COLOR_RESERVED_13                                = 0xd,
	COLOR_32_32_32_32                                = 0xe,
	COLOR_RESERVED_15                                = 0xf,
	COLOR_5_6_5                                      = 0x10,
	COLOR_1_5_5_5                                    = 0x11,
	COLOR_5_5_5_1                                    = 0x12,
	COLOR_4_4_4_4                                    = 0x13,
	COLOR_8_24                                       = 0x14,
	COLOR_24_8                                       = 0x15,
	COLOR_X24_8_32_FLOAT                             = 0x16,
	COLOR_RESERVED_23                                = 0x17,
} ColorFormat;
typedef enum SurfaceFormat {
	FMT_INVALID                                      = 0x0,
	FMT_8                                            = 0x1,
	FMT_16                                           = 0x2,
	FMT_8_8                                          = 0x3,
	FMT_32                                           = 0x4,
	FMT_16_16                                        = 0x5,
	FMT_10_11_11                                     = 0x6,
	FMT_11_11_10                                     = 0x7,
	FMT_10_10_10_2                                   = 0x8,
	FMT_2_10_10_10                                   = 0x9,
	FMT_8_8_8_8                                      = 0xa,
	FMT_32_32                                        = 0xb,
	FMT_16_16_16_16                                  = 0xc,
	FMT_32_32_32                                     = 0xd,
	FMT_32_32_32_32                                  = 0xe,
	FMT_RESERVED_4                                   = 0xf,
	FMT_5_6_5                                        = 0x10,
	FMT_1_5_5_5                                      = 0x11,
	FMT_5_5_5_1                                      = 0x12,
	FMT_4_4_4_4                                      = 0x13,
	FMT_8_24                                         = 0x14,
	FMT_24_8                                         = 0x15,
	FMT_X24_8_32_FLOAT                               = 0x16,
	FMT_RESERVED_33                                  = 0x17,
	FMT_11_11_10_FLOAT                               = 0x18,
	FMT_16_FLOAT                                     = 0x19,
	FMT_32_FLOAT                                     = 0x1a,
	FMT_16_16_FLOAT                                  = 0x1b,
	FMT_8_24_FLOAT                                   = 0x1c,
	FMT_24_8_FLOAT                                   = 0x1d,
	FMT_32_32_FLOAT                                  = 0x1e,
	FMT_10_11_11_FLOAT                               = 0x1f,
	FMT_16_16_16_16_FLOAT                            = 0x20,
	FMT_3_3_2                                        = 0x21,
	FMT_6_5_5                                        = 0x22,
	FMT_32_32_32_32_FLOAT                            = 0x23,
	FMT_RESERVED_36                                  = 0x24,
	FMT_1                                            = 0x25,
	FMT_1_REVERSED                                   = 0x26,
	FMT_GB_GR                                        = 0x27,
	FMT_BG_RG                                        = 0x28,
	FMT_32_AS_8                                      = 0x29,
	FMT_32_AS_8_8                                    = 0x2a,
	FMT_5_9_9_9_SHAREDEXP                            = 0x2b,
	FMT_8_8_8                                        = 0x2c,
	FMT_16_16_16                                     = 0x2d,
	FMT_16_16_16_FLOAT                               = 0x2e,
	FMT_4_4                                          = 0x2f,
	FMT_32_32_32_FLOAT                               = 0x30,
	FMT_BC1                                          = 0x31,
	FMT_BC2                                          = 0x32,
	FMT_BC3                                          = 0x33,
	FMT_BC4                                          = 0x34,
	FMT_BC5                                          = 0x35,
	FMT_BC6                                          = 0x36,
	FMT_BC7                                          = 0x37,
	FMT_32_AS_32_32_32_32                            = 0x38,
	FMT_APC3                                         = 0x39,
	FMT_APC4                                         = 0x3a,
	FMT_APC5                                         = 0x3b,
	FMT_APC6                                         = 0x3c,
	FMT_APC7                                         = 0x3d,
	FMT_CTX1                                         = 0x3e,
	FMT_RESERVED_63                                  = 0x3f,
} SurfaceFormat;
typedef enum BUF_DATA_FORMAT {
	BUF_DATA_FORMAT_INVALID                          = 0x0,
	BUF_DATA_FORMAT_8                                = 0x1,
	BUF_DATA_FORMAT_16                               = 0x2,
	BUF_DATA_FORMAT_8_8                              = 0x3,
	BUF_DATA_FORMAT_32                               = 0x4,
	BUF_DATA_FORMAT_16_16                            = 0x5,
	BUF_DATA_FORMAT_10_11_11                         = 0x6,
	BUF_DATA_FORMAT_11_11_10                         = 0x7,
	BUF_DATA_FORMAT_10_10_10_2                       = 0x8,
	BUF_DATA_FORMAT_2_10_10_10                       = 0x9,
	BUF_DATA_FORMAT_8_8_8_8                          = 0xa,
	BUF_DATA_FORMAT_32_32                            = 0xb,
	BUF_DATA_FORMAT_16_16_16_16                      = 0xc,
	BUF_DATA_FORMAT_32_32_32                         = 0xd,
	BUF_DATA_FORMAT_32_32_32_32                      = 0xe,
	BUF_DATA_FORMAT_RESERVED_15                      = 0xf,
} BUF_DATA_FORMAT;
typedef enum IMG_DATA_FORMAT {
	IMG_DATA_FORMAT_INVALID                          = 0x0,
	IMG_DATA_FORMAT_8                                = 0x1,
	IMG_DATA_FORMAT_16                               = 0x2,
	IMG_DATA_FORMAT_8_8                              = 0x3,
	IMG_DATA_FORMAT_32                               = 0x4,
	IMG_DATA_FORMAT_16_16                            = 0x5,
	IMG_DATA_FORMAT_10_11_11                         = 0x6,
	IMG_DATA_FORMAT_11_11_10                         = 0x7,
	IMG_DATA_FORMAT_10_10_10_2                       = 0x8,
	IMG_DATA_FORMAT_2_10_10_10                       = 0x9,
	IMG_DATA_FORMAT_8_8_8_8                          = 0xa,
	IMG_DATA_FORMAT_32_32                            = 0xb,
	IMG_DATA_FORMAT_16_16_16_16                      = 0xc,
	IMG_DATA_FORMAT_32_32_32                         = 0xd,
	IMG_DATA_FORMAT_32_32_32_32                      = 0xe,
	IMG_DATA_FORMAT_RESERVED_15                      = 0xf,
	IMG_DATA_FORMAT_5_6_5                            = 0x10,
	IMG_DATA_FORMAT_1_5_5_5                          = 0x11,
	IMG_DATA_FORMAT_5_5_5_1                          = 0x12,
	IMG_DATA_FORMAT_4_4_4_4                          = 0x13,
	IMG_DATA_FORMAT_8_24                             = 0x14,
	IMG_DATA_FORMAT_24_8                             = 0x15,
	IMG_DATA_FORMAT_X24_8_32                         = 0x16,
	IMG_DATA_FORMAT_RESERVED_23                      = 0x17,
	IMG_DATA_FORMAT_RESERVED_24                      = 0x18,
	IMG_DATA_FORMAT_RESERVED_25                      = 0x19,
	IMG_DATA_FORMAT_RESERVED_26                      = 0x1a,
	IMG_DATA_FORMAT_RESERVED_27                      = 0x1b,
	IMG_DATA_FORMAT_RESERVED_28                      = 0x1c,
	IMG_DATA_FORMAT_RESERVED_29                      = 0x1d,
	IMG_DATA_FORMAT_RESERVED_30                      = 0x1e,
	IMG_DATA_FORMAT_RESERVED_31                      = 0x1f,
	IMG_DATA_FORMAT_GB_GR                            = 0x20,
	IMG_DATA_FORMAT_BG_RG                            = 0x21,
	IMG_DATA_FORMAT_5_9_9_9                          = 0x22,
	IMG_DATA_FORMAT_BC1                              = 0x23,
	IMG_DATA_FORMAT_BC2                              = 0x24,
	IMG_DATA_FORMAT_BC3                              = 0x25,
	IMG_DATA_FORMAT_BC4                              = 0x26,
	IMG_DATA_FORMAT_BC5                              = 0x27,
	IMG_DATA_FORMAT_BC6                              = 0x28,
	IMG_DATA_FORMAT_BC7                              = 0x29,
	IMG_DATA_FORMAT_RESERVED_42                      = 0x2a,
	IMG_DATA_FORMAT_RESERVED_43                      = 0x2b,
	IMG_DATA_FORMAT_FMASK8_S2_F1                     = 0x2c,
	IMG_DATA_FORMAT_FMASK8_S4_F1                     = 0x2d,
	IMG_DATA_FORMAT_FMASK8_S8_F1                     = 0x2e,
	IMG_DATA_FORMAT_FMASK8_S2_F2                     = 0x2f,
	IMG_DATA_FORMAT_FMASK8_S4_F2                     = 0x30,
	IMG_DATA_FORMAT_FMASK8_S4_F4                     = 0x31,
	IMG_DATA_FORMAT_FMASK16_S16_F1                   = 0x32,
	IMG_DATA_FORMAT_FMASK16_S8_F2                    = 0x33,
	IMG_DATA_FORMAT_FMASK32_S16_F2                   = 0x34,
	IMG_DATA_FORMAT_FMASK32_S8_F4                    = 0x35,
	IMG_DATA_FORMAT_FMASK32_S8_F8                    = 0x36,
	IMG_DATA_FORMAT_FMASK64_S16_F4                   = 0x37,
	IMG_DATA_FORMAT_FMASK64_S16_F8                   = 0x38,
	IMG_DATA_FORMAT_4_4                              = 0x39,
	IMG_DATA_FORMAT_6_5_5                            = 0x3a,
	IMG_DATA_FORMAT_1                                = 0x3b,
	IMG_DATA_FORMAT_1_REVERSED                       = 0x3c,
	IMG_DATA_FORMAT_32_AS_8                          = 0x3d,
	IMG_DATA_FORMAT_32_AS_8_8                        = 0x3e,
	IMG_DATA_FORMAT_32_AS_32_32_32_32                = 0x3f,
} IMG_DATA_FORMAT;
typedef enum BUF_NUM_FORMAT {
	BUF_NUM_FORMAT_UNORM                             = 0x0,
	BUF_NUM_FORMAT_SNORM                             = 0x1,
	BUF_NUM_FORMAT_USCALED                           = 0x2,
	BUF_NUM_FORMAT_SSCALED                           = 0x3,
	BUF_NUM_FORMAT_UINT                              = 0x4,
	BUF_NUM_FORMAT_SINT                              = 0x5,
	BUF_NUM_FORMAT_SNORM_OGL                         = 0x6,
	BUF_NUM_FORMAT_FLOAT                             = 0x7,
} BUF_NUM_FORMAT;
typedef enum IMG_NUM_FORMAT {
	IMG_NUM_FORMAT_UNORM                             = 0x0,
	IMG_NUM_FORMAT_SNORM                             = 0x1,
	IMG_NUM_FORMAT_USCALED                           = 0x2,
	IMG_NUM_FORMAT_SSCALED                           = 0x3,
	IMG_NUM_FORMAT_UINT                              = 0x4,
	IMG_NUM_FORMAT_SINT                              = 0x5,
	IMG_NUM_FORMAT_SNORM_OGL                         = 0x6,
	IMG_NUM_FORMAT_FLOAT                             = 0x7,
	IMG_NUM_FORMAT_RESERVED_8                        = 0x8,
	IMG_NUM_FORMAT_SRGB                              = 0x9,
	IMG_NUM_FORMAT_UBNORM                            = 0xa,
	IMG_NUM_FORMAT_UBNORM_OGL                        = 0xb,
	IMG_NUM_FORMAT_UBINT                             = 0xc,
	IMG_NUM_FORMAT_UBSCALED                          = 0xd,
	IMG_NUM_FORMAT_RESERVED_14                       = 0xe,
	IMG_NUM_FORMAT_RESERVED_15                       = 0xf,
} IMG_NUM_FORMAT;
typedef enum TileType {
	ARRAY_COLOR_TILE                                 = 0x0,
	ARRAY_DEPTH_TILE                                 = 0x1,
} TileType;
typedef enum NonDispTilingOrder {
	ADDR_SURF_MICRO_TILING_DISPLAY                   = 0x0,
	ADDR_SURF_MICRO_TILING_NON_DISPLAY               = 0x1,
} NonDispTilingOrder;
typedef enum MicroTileMode {
	ADDR_SURF_DISPLAY_MICRO_TILING                   = 0x0,
	ADDR_SURF_THIN_MICRO_TILING                      = 0x1,
	ADDR_SURF_DEPTH_MICRO_TILING                     = 0x2,
	ADDR_SURF_ROTATED_MICRO_TILING                   = 0x3,
	ADDR_SURF_THICK_MICRO_TILING                     = 0x4,
} MicroTileMode;
typedef enum TileSplit {
	ADDR_SURF_TILE_SPLIT_64B                         = 0x0,
	ADDR_SURF_TILE_SPLIT_128B                        = 0x1,
	ADDR_SURF_TILE_SPLIT_256B                        = 0x2,
	ADDR_SURF_TILE_SPLIT_512B                        = 0x3,
	ADDR_SURF_TILE_SPLIT_1KB                         = 0x4,
	ADDR_SURF_TILE_SPLIT_2KB                         = 0x5,
	ADDR_SURF_TILE_SPLIT_4KB                         = 0x6,
} TileSplit;
typedef enum SampleSplit {
	ADDR_SURF_SAMPLE_SPLIT_1                         = 0x0,
	ADDR_SURF_SAMPLE_SPLIT_2                         = 0x1,
	ADDR_SURF_SAMPLE_SPLIT_4                         = 0x2,
	ADDR_SURF_SAMPLE_SPLIT_8                         = 0x3,
} SampleSplit;
typedef enum PipeConfig {
	ADDR_SURF_P2                                     = 0x0,
	ADDR_SURF_P2_RESERVED0                           = 0x1,
	ADDR_SURF_P2_RESERVED1                           = 0x2,
	ADDR_SURF_P2_RESERVED2                           = 0x3,
	ADDR_SURF_P4_8x16                                = 0x4,
	ADDR_SURF_P4_16x16                               = 0x5,
	ADDR_SURF_P4_16x32                               = 0x6,
	ADDR_SURF_P4_32x32                               = 0x7,
	ADDR_SURF_P8_16x16_8x16                          = 0x8,
	ADDR_SURF_P8_16x32_8x16                          = 0x9,
	ADDR_SURF_P8_32x32_8x16                          = 0xa,
	ADDR_SURF_P8_16x32_16x16                         = 0xb,
	ADDR_SURF_P8_32x32_16x16                         = 0xc,
	ADDR_SURF_P8_32x32_16x32                         = 0xd,
	ADDR_SURF_P8_32x64_32x32                         = 0xe,
	ADDR_SURF_P8_RESERVED0                           = 0xf,
	ADDR_SURF_P16_32x32_8x16                         = 0x10,
	ADDR_SURF_P16_32x32_16x16                        = 0x11,
} PipeConfig;
typedef enum NumBanks {
	ADDR_SURF_2_BANK                                 = 0x0,
	ADDR_SURF_4_BANK                                 = 0x1,
	ADDR_SURF_8_BANK                                 = 0x2,
	ADDR_SURF_16_BANK                                = 0x3,
} NumBanks;
typedef enum BankWidth {
	ADDR_SURF_BANK_WIDTH_1                           = 0x0,
	ADDR_SURF_BANK_WIDTH_2                           = 0x1,
	ADDR_SURF_BANK_WIDTH_4                           = 0x2,
	ADDR_SURF_BANK_WIDTH_8                           = 0x3,
} BankWidth;
typedef enum BankHeight {
	ADDR_SURF_BANK_HEIGHT_1                          = 0x0,
	ADDR_SURF_BANK_HEIGHT_2                          = 0x1,
	ADDR_SURF_BANK_HEIGHT_4                          = 0x2,
	ADDR_SURF_BANK_HEIGHT_8                          = 0x3,
} BankHeight;
typedef enum BankWidthHeight {
	ADDR_SURF_BANK_WH_1                              = 0x0,
	ADDR_SURF_BANK_WH_2                              = 0x1,
	ADDR_SURF_BANK_WH_4                              = 0x2,
	ADDR_SURF_BANK_WH_8                              = 0x3,
} BankWidthHeight;
typedef enum MacroTileAspect {
	ADDR_SURF_MACRO_ASPECT_1                         = 0x0,
	ADDR_SURF_MACRO_ASPECT_2                         = 0x1,
	ADDR_SURF_MACRO_ASPECT_4                         = 0x2,
	ADDR_SURF_MACRO_ASPECT_8                         = 0x3,
} MacroTileAspect;
typedef enum TCC_CACHE_POLICIES {
	TCC_CACHE_POLICY_LRU                             = 0x0,
	TCC_CACHE_POLICY_STREAM                          = 0x1,
	TCC_CACHE_POLICY_BYPASS                          = 0x2,
} TCC_CACHE_POLICIES;
typedef enum MTYPE {
	MTYPE_NC_NV                                      = 0x0,
	MTYPE_NC                                         = 0x1,
	MTYPE_CC                                         = 0x2,
	MTYPE_UC                                         = 0x3,
} MTYPE;
typedef enum PERFMON_COUNTER_MODE {
	PERFMON_COUNTER_MODE_ACCUM                       = 0x0,
	PERFMON_COUNTER_MODE_ACTIVE_CYCLES               = 0x1,
	PERFMON_COUNTER_MODE_MAX                         = 0x2,
	PERFMON_COUNTER_MODE_DIRTY                       = 0x3,
	PERFMON_COUNTER_MODE_SAMPLE                      = 0x4,
	PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT    = 0x5,
	PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT     = 0x6,
	PERFMON_COUNTER_MODE_CYCLES_GE_HI                = 0x7,
	PERFMON_COUNTER_MODE_CYCLES_EQ_HI                = 0x8,
	PERFMON_COUNTER_MODE_INACTIVE_CYCLES             = 0x9,
	PERFMON_COUNTER_MODE_RESERVED                    = 0xf,
} PERFMON_COUNTER_MODE;
typedef enum PERFMON_SPM_MODE {
	PERFMON_SPM_MODE_OFF                             = 0x0,
	PERFMON_SPM_MODE_16BIT_CLAMP                     = 0x1,
	PERFMON_SPM_MODE_16BIT_NO_CLAMP                  = 0x2,
	PERFMON_SPM_MODE_32BIT_CLAMP                     = 0x3,
	PERFMON_SPM_MODE_32BIT_NO_CLAMP                  = 0x4,
	PERFMON_SPM_MODE_RESERVED_5                      = 0x5,
	PERFMON_SPM_MODE_RESERVED_6                      = 0x6,
	PERFMON_SPM_MODE_RESERVED_7                      = 0x7,
	PERFMON_SPM_MODE_TEST_MODE_0                     = 0x8,
	PERFMON_SPM_MODE_TEST_MODE_1                     = 0x9,
	PERFMON_SPM_MODE_TEST_MODE_2                     = 0xa,
} PERFMON_SPM_MODE;
typedef enum SurfaceTiling {
	ARRAY_LINEAR                                     = 0x0,
	ARRAY_TILED                                      = 0x1,
} SurfaceTiling;
typedef enum SurfaceArray {
	ARRAY_1D                                         = 0x0,
	ARRAY_2D                                         = 0x1,
	ARRAY_3D                                         = 0x2,
	ARRAY_3D_SLICE                                   = 0x3,
} SurfaceArray;
typedef enum ColorArray {
	ARRAY_2D_ALT_COLOR                               = 0x0,
	ARRAY_2D_COLOR                                   = 0x1,
	ARRAY_3D_SLICE_COLOR                             = 0x3,
} ColorArray;
typedef enum DepthArray {
	ARRAY_2D_ALT_DEPTH                               = 0x0,
	ARRAY_2D_DEPTH                                   = 0x1,
} DepthArray;

#endif /* GFX_7_2_ENUM_H */


================================================
FILE: libhsakmt/tests/kfdtest/include/asic_reg/gfx_7_2_sh_mask.h
================================================
/*
 * Copyright (C) 2014  Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef GFX_7_2_SH_MASK_H
#define GFX_7_2_SH_MASK_H

#define CB_BLEND_RED__BLEND_RED_MASK 0xffffffff
#define CB_BLEND_RED__BLEND_RED__SHIFT 0x0
#define CB_BLEND_GREEN__BLEND_GREEN_MASK 0xffffffff
#define CB_BLEND_GREEN__BLEND_GREEN__SHIFT 0x0
#define CB_BLEND_BLUE__BLEND_BLUE_MASK 0xffffffff
#define CB_BLEND_BLUE__BLEND_BLUE__SHIFT 0x0
#define CB_BLEND_ALPHA__BLEND_ALPHA_MASK 0xffffffff
#define CB_BLEND_ALPHA__BLEND_ALPHA__SHIFT 0x0
#define CB_COLOR_CONTROL__DEGAMMA_ENABLE_MASK 0x8
#define CB_COLOR_CONTROL__DEGAMMA_ENABLE__SHIFT 0x3
#define CB_COLOR_CONTROL__MODE_MASK 0x70
#define CB_COLOR_CONTROL__MODE__SHIFT 0x4
#define CB_COLOR_CONTROL__ROP3_MASK 0xff0000
#define CB_COLOR_CONTROL__ROP3__SHIFT 0x10
#define CB_BLEND0_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND0_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND0_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND0_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND0_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND0_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND0_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND0_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND0_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND0_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND0_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND0_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND0_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND0_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND0_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND0_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND0_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND0_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_BLEND1_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND1_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND1_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND1_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND1_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND1_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND1_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND1_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND1_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND1_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND1_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND1_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND1_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND1_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND1_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND1_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND1_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND1_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_BLEND2_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND2_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND2_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND2_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND2_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND2_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND2_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND2_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND2_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND2_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND2_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND2_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND2_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND2_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND2_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND2_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND2_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND2_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_BLEND3_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND3_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND3_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND3_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND3_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND3_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND3_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND3_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND3_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND3_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND3_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND3_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND3_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND3_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND3_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND3_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND3_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND3_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_BLEND4_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND4_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND4_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND4_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND4_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND4_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND4_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND4_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND4_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND4_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND4_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND4_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND4_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND4_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND4_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND4_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND4_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND4_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_BLEND5_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND5_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND5_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND5_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND5_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND5_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND5_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND5_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND5_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND5_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND5_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND5_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND5_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND5_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND5_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND5_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND5_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND5_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_BLEND6_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND6_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND6_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND6_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND6_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND6_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND6_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND6_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND6_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND6_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND6_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND6_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND6_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND6_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND6_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND6_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND6_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND6_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_BLEND7_CONTROL__COLOR_SRCBLEND_MASK 0x1f
#define CB_BLEND7_CONTROL__COLOR_SRCBLEND__SHIFT 0x0
#define CB_BLEND7_CONTROL__COLOR_COMB_FCN_MASK 0xe0
#define CB_BLEND7_CONTROL__COLOR_COMB_FCN__SHIFT 0x5
#define CB_BLEND7_CONTROL__COLOR_DESTBLEND_MASK 0x1f00
#define CB_BLEND7_CONTROL__COLOR_DESTBLEND__SHIFT 0x8
#define CB_BLEND7_CONTROL__ALPHA_SRCBLEND_MASK 0x1f0000
#define CB_BLEND7_CONTROL__ALPHA_SRCBLEND__SHIFT 0x10
#define CB_BLEND7_CONTROL__ALPHA_COMB_FCN_MASK 0xe00000
#define CB_BLEND7_CONTROL__ALPHA_COMB_FCN__SHIFT 0x15
#define CB_BLEND7_CONTROL__ALPHA_DESTBLEND_MASK 0x1f000000
#define CB_BLEND7_CONTROL__ALPHA_DESTBLEND__SHIFT 0x18
#define CB_BLEND7_CONTROL__SEPARATE_ALPHA_BLEND_MASK 0x20000000
#define CB_BLEND7_CONTROL__SEPARATE_ALPHA_BLEND__SHIFT 0x1d
#define CB_BLEND7_CONTROL__ENABLE_MASK 0x40000000
#define CB_BLEND7_CONTROL__ENABLE__SHIFT 0x1e
#define CB_BLEND7_CONTROL__DISABLE_ROP3_MASK 0x80000000
#define CB_BLEND7_CONTROL__DISABLE_ROP3__SHIFT 0x1f
#define CB_COLOR0_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR0_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR1_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR1_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR2_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR2_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR3_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR3_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR4_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR4_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR5_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR5_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR6_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR6_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR7_BASE__BASE_256B_MASK 0xffffffff
#define CB_COLOR7_BASE__BASE_256B__SHIFT 0x0
#define CB_COLOR0_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR0_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR0_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR0_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR1_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR1_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR1_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR1_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR2_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR2_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR2_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR2_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR3_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR3_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR3_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR3_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR4_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR4_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR4_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR4_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR5_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR5_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR5_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR5_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR6_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR6_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR6_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR6_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR7_PITCH__TILE_MAX_MASK 0x7ff
#define CB_COLOR7_PITCH__TILE_MAX__SHIFT 0x0
#define CB_COLOR7_PITCH__FMASK_TILE_MAX_MASK 0x7ff00000
#define CB_COLOR7_PITCH__FMASK_TILE_MAX__SHIFT 0x14
#define CB_COLOR0_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR0_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR1_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR1_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR2_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR2_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR3_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR3_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR4_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR4_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR5_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR5_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR6_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR6_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR7_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR7_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR0_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR0_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR0_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR0_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR1_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR1_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR1_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR1_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR2_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR2_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR2_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR2_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR3_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR3_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR3_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR3_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR4_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR4_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR4_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR4_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR5_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR5_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR5_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR5_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR6_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR6_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR6_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR6_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR7_VIEW__SLICE_START_MASK 0x7ff
#define CB_COLOR7_VIEW__SLICE_START__SHIFT 0x0
#define CB_COLOR7_VIEW__SLICE_MAX_MASK 0xffe000
#define CB_COLOR7_VIEW__SLICE_MAX__SHIFT 0xd
#define CB_COLOR0_INFO__ENDIAN_MASK 0x3
#define CB_COLOR0_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR0_INFO__FORMAT_MASK 0x7c
#define CB_COLOR0_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR0_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR0_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR0_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR0_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR0_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR0_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR0_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR0_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR0_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR0_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR0_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR0_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR0_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR0_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR0_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR0_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR0_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR0_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR0_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR0_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR0_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR0_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR0_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR0_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR0_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR0_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR1_INFO__ENDIAN_MASK 0x3
#define CB_COLOR1_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR1_INFO__FORMAT_MASK 0x7c
#define CB_COLOR1_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR1_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR1_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR1_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR1_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR1_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR1_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR1_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR1_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR1_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR1_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR1_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR1_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR1_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR1_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR1_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR1_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR1_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR1_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR1_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR1_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR1_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR1_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR1_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR1_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR1_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR1_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR2_INFO__ENDIAN_MASK 0x3
#define CB_COLOR2_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR2_INFO__FORMAT_MASK 0x7c
#define CB_COLOR2_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR2_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR2_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR2_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR2_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR2_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR2_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR2_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR2_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR2_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR2_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR2_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR2_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR2_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR2_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR2_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR2_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR2_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR2_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR2_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR2_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR2_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR2_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR2_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR2_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR2_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR2_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR3_INFO__ENDIAN_MASK 0x3
#define CB_COLOR3_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR3_INFO__FORMAT_MASK 0x7c
#define CB_COLOR3_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR3_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR3_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR3_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR3_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR3_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR3_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR3_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR3_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR3_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR3_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR3_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR3_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR3_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR3_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR3_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR3_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR3_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR3_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR3_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR3_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR3_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR3_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR3_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR3_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR3_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR3_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR4_INFO__ENDIAN_MASK 0x3
#define CB_COLOR4_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR4_INFO__FORMAT_MASK 0x7c
#define CB_COLOR4_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR4_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR4_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR4_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR4_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR4_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR4_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR4_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR4_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR4_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR4_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR4_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR4_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR4_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR4_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR4_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR4_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR4_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR4_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR4_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR4_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR4_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR4_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR4_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR4_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR4_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR4_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR5_INFO__ENDIAN_MASK 0x3
#define CB_COLOR5_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR5_INFO__FORMAT_MASK 0x7c
#define CB_COLOR5_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR5_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR5_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR5_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR5_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR5_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR5_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR5_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR5_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR5_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR5_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR5_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR5_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR5_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR5_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR5_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR5_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR5_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR5_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR5_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR5_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR5_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR5_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR5_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR5_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR5_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR5_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR6_INFO__ENDIAN_MASK 0x3
#define CB_COLOR6_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR6_INFO__FORMAT_MASK 0x7c
#define CB_COLOR6_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR6_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR6_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR6_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR6_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR6_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR6_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR6_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR6_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR6_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR6_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR6_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR6_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR6_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR6_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR6_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR6_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR6_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR6_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR6_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR6_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR6_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR6_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR6_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR6_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR6_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR6_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR7_INFO__ENDIAN_MASK 0x3
#define CB_COLOR7_INFO__ENDIAN__SHIFT 0x0
#define CB_COLOR7_INFO__FORMAT_MASK 0x7c
#define CB_COLOR7_INFO__FORMAT__SHIFT 0x2
#define CB_COLOR7_INFO__LINEAR_GENERAL_MASK 0x80
#define CB_COLOR7_INFO__LINEAR_GENERAL__SHIFT 0x7
#define CB_COLOR7_INFO__NUMBER_TYPE_MASK 0x700
#define CB_COLOR7_INFO__NUMBER_TYPE__SHIFT 0x8
#define CB_COLOR7_INFO__COMP_SWAP_MASK 0x1800
#define CB_COLOR7_INFO__COMP_SWAP__SHIFT 0xb
#define CB_COLOR7_INFO__FAST_CLEAR_MASK 0x2000
#define CB_COLOR7_INFO__FAST_CLEAR__SHIFT 0xd
#define CB_COLOR7_INFO__COMPRESSION_MASK 0x4000
#define CB_COLOR7_INFO__COMPRESSION__SHIFT 0xe
#define CB_COLOR7_INFO__BLEND_CLAMP_MASK 0x8000
#define CB_COLOR7_INFO__BLEND_CLAMP__SHIFT 0xf
#define CB_COLOR7_INFO__BLEND_BYPASS_MASK 0x10000
#define CB_COLOR7_INFO__BLEND_BYPASS__SHIFT 0x10
#define CB_COLOR7_INFO__SIMPLE_FLOAT_MASK 0x20000
#define CB_COLOR7_INFO__SIMPLE_FLOAT__SHIFT 0x11
#define CB_COLOR7_INFO__ROUND_MODE_MASK 0x40000
#define CB_COLOR7_INFO__ROUND_MODE__SHIFT 0x12
#define CB_COLOR7_INFO__CMASK_IS_LINEAR_MASK 0x80000
#define CB_COLOR7_INFO__CMASK_IS_LINEAR__SHIFT 0x13
#define CB_COLOR7_INFO__BLEND_OPT_DONT_RD_DST_MASK 0x700000
#define CB_COLOR7_INFO__BLEND_OPT_DONT_RD_DST__SHIFT 0x14
#define CB_COLOR7_INFO__BLEND_OPT_DISCARD_PIXEL_MASK 0x3800000
#define CB_COLOR7_INFO__BLEND_OPT_DISCARD_PIXEL__SHIFT 0x17
#define CB_COLOR7_INFO__FMASK_COMPRESSION_DISABLE_MASK 0x4000000
#define CB_COLOR7_INFO__FMASK_COMPRESSION_DISABLE__SHIFT 0x1a
#define CB_COLOR0_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR0_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR0_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR0_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR0_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR0_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR0_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR0_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR1_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR1_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR1_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR1_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR1_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR1_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR1_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR1_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR1_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR1_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR1_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR1_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR2_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR2_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR2_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR2_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR2_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR2_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR2_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR2_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR2_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR2_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR2_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR2_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR3_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR3_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR3_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR3_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR3_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR3_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR3_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR3_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR3_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR3_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR3_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR3_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR4_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR4_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR4_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR4_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR4_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR4_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR4_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR4_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR4_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR4_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR4_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR4_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR5_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR5_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR5_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR5_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR5_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR5_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR5_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR5_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR5_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR5_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR5_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR5_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR6_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR6_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR6_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR6_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR6_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR6_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR6_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR6_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR6_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR6_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR6_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR6_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR7_ATTRIB__TILE_MODE_INDEX_MASK 0x1f
#define CB_COLOR7_ATTRIB__TILE_MODE_INDEX__SHIFT 0x0
#define CB_COLOR7_ATTRIB__FMASK_TILE_MODE_INDEX_MASK 0x3e0
#define CB_COLOR7_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 0x5
#define CB_COLOR7_ATTRIB__FMASK_BANK_HEIGHT_MASK 0xc00
#define CB_COLOR7_ATTRIB__FMASK_BANK_HEIGHT__SHIFT 0xa
#define CB_COLOR7_ATTRIB__NUM_SAMPLES_MASK 0x7000
#define CB_COLOR7_ATTRIB__NUM_SAMPLES__SHIFT 0xc
#define CB_COLOR7_ATTRIB__NUM_FRAGMENTS_MASK 0x18000
#define CB_COLOR7_ATTRIB__NUM_FRAGMENTS__SHIFT 0xf
#define CB_COLOR7_ATTRIB__FORCE_DST_ALPHA_1_MASK 0x20000
#define CB_COLOR7_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 0x11
#define CB_COLOR0_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR0_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR1_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR1_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR2_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR2_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR3_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR3_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR4_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR4_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR5_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR5_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR6_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR6_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR7_CMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR7_CMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR0_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR0_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR1_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR1_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR2_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR2_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR3_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR3_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR4_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR4_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR5_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR5_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR6_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR6_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR7_CMASK_SLICE__TILE_MAX_MASK 0x3fff
#define CB_COLOR7_CMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR0_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR0_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR1_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR1_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR2_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR2_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR3_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR3_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR4_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR4_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR5_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR5_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR6_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR6_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR7_FMASK__BASE_256B_MASK 0xffffffff
#define CB_COLOR7_FMASK__BASE_256B__SHIFT 0x0
#define CB_COLOR0_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR0_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR1_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR1_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR2_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR2_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR3_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR3_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR4_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR4_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR5_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR5_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR6_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR6_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR7_FMASK_SLICE__TILE_MAX_MASK 0x3fffff
#define CB_COLOR7_FMASK_SLICE__TILE_MAX__SHIFT 0x0
#define CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR1_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR1_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR2_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR2_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR3_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR3_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR4_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR4_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR5_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR5_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR6_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR6_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR7_CLEAR_WORD0__CLEAR_WORD0_MASK 0xffffffff
#define CB_COLOR7_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0x0
#define CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_COLOR1_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR1_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_COLOR2_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR2_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_COLOR3_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR3_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_COLOR4_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR4_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_COLOR5_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR5_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_COLOR6_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR6_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_COLOR7_CLEAR_WORD1__CLEAR_WORD1_MASK 0xffffffff
#define CB_COLOR7_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0x0
#define CB_TARGET_MASK__TARGET0_ENABLE_MASK 0xf
#define CB_TARGET_MASK__TARGET0_ENABLE__SHIFT 0x0
#define CB_TARGET_MASK__TARGET1_ENABLE_MASK 0xf0
#define CB_TARGET_MASK__TARGET1_ENABLE__SHIFT 0x4
#define CB_TARGET_MASK__TARGET2_ENABLE_MASK 0xf00
#define CB_TARGET_MASK__TARGET2_ENABLE__SHIFT 0x8
#define CB_TARGET_MASK__TARGET3_ENABLE_MASK 0xf000
#define CB_TARGET_MASK__TARGET3_ENABLE__SHIFT 0xc
#define CB_TARGET_MASK__TARGET4_ENABLE_MASK 0xf0000
#define CB_TARGET_MASK__TARGET4_ENABLE__SHIFT 0x10
#define CB_TARGET_MASK__TARGET5_ENABLE_MASK 0xf00000
#define CB_TARGET_MASK__TARGET5_ENABLE__SHIFT 0x14
#define CB_TARGET_MASK__TARGET6_ENABLE_MASK 0xf000000
#define CB_TARGET_MASK__TARGET6_ENABLE__SHIFT 0x18
#define CB_TARGET_MASK__TARGET7_ENABLE_MASK 0xf0000000
#define CB_TARGET_MASK__TARGET7_ENABLE__SHIFT 0x1c
#define CB_SHADER_MASK__OUTPUT0_ENABLE_MASK 0xf
#define CB_SHADER_MASK__OUTPUT0_ENABLE__SHIFT 0x0
#define CB_SHADER_MASK__OUTPUT1_ENABLE_MASK 0xf0
#define CB_SHADER_MASK__OUTPUT1_ENABLE__SHIFT 0x4
#define CB_SHADER_MASK__OUTPUT2_ENABLE_MASK 0xf00
#define CB_SHADER_MASK__OUTPUT2_ENABLE__SHIFT 0x8
#define CB_SHADER_MASK__OUTPUT3_ENABLE_MASK 0xf000
#define CB_SHADER_MASK__OUTPUT3_ENABLE__SHIFT 0xc
#define CB_SHADER_MASK__OUTPUT4_ENABLE_MASK 0xf0000
#define CB_SHADER_MASK__OUTPUT4_ENABLE__SHIFT 0x10
#define CB_SHADER_MASK__OUTPUT5_ENABLE_MASK 0xf00000
#define CB_SHADER_MASK__OUTPUT5_ENABLE__SHIFT 0x14
#define CB_SHADER_MASK__OUTPUT6_ENABLE_MASK 0xf000000
#define CB_SHADER_MASK__OUTPUT6_ENABLE__SHIFT 0x18
#define CB_SHADER_MASK__OUTPUT7_ENABLE_MASK 0xf0000000
#define CB_SHADER_MASK__OUTPUT7_ENABLE__SHIFT 0x1c
#define CB_HW_CONTROL__CM_CACHE_EVICT_POINT_MASK 0xf
#define CB_HW_CONTROL__CM_CACHE_EVICT_POINT__SHIFT 0x0
#define CB_HW_CONTROL__FC_CACHE_EVICT_POINT_MASK 0x3c0
#define CB_HW_CONTROL__FC_CACHE_EVICT_POINT__SHIFT 0x6
#define CB_HW_CONTROL__CC_CACHE_EVICT_POINT_MASK 0xf000
#define CB_HW_CONTROL__CC_CACHE_EVICT_POINT__SHIFT 0xc
#define CB_HW_CONTROL__ALLOW_MRT_WITH_DUAL_SOURCE_MASK 0x10000
#define CB_HW_CONTROL__ALLOW_MRT_WITH_DUAL_SOURCE__SHIFT 0x10
#define CB_HW_CONTROL__DISABLE_INTNORM_LE11BPC_CLAMPING_MASK 0x40000
#define CB_HW_CONTROL__DISABLE_INTNORM_LE11BPC_CLAMPING__SHIFT 0x12
#define CB_HW_CONTROL__FORCE_NEEDS_DST_MASK 0x80000
#define CB_HW_CONTROL__FORCE_NEEDS_DST__SHIFT 0x13
#define CB_HW_CONTROL__FORCE_ALWAYS_TOGGLE_MASK 0x100000
#define CB_HW_CONTROL__FORCE_ALWAYS_TOGGLE__SHIFT 0x14
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_RESULT_EQ_DEST_MASK 0x200000
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_RESULT_EQ_DEST__SHIFT 0x15
#define CB_HW_CONTROL__DISABLE_FULL_WRITE_MASK_MASK 0x400000
#define CB_HW_CONTROL__DISABLE_FULL_WRITE_MASK__SHIFT 0x16
#define CB_HW_CONTROL__DISABLE_RESOLVE_OPT_FOR_SINGLE_FRAG_MASK 0x800000
#define CB_HW_CONTROL__DISABLE_RESOLVE_OPT_FOR_SINGLE_FRAG__SHIFT 0x17
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_DONT_RD_DST_MASK 0x1000000
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_DONT_RD_DST__SHIFT 0x18
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_BYPASS_MASK 0x2000000
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_BYPASS__SHIFT 0x19
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_DISCARD_PIXEL_MASK 0x4000000
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_DISCARD_PIXEL__SHIFT 0x1a
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_WHEN_DISABLED_SRCALPHA_IS_USED_MASK 0x8000000
#define CB_HW_CONTROL__DISABLE_BLEND_OPT_WHEN_DISABLED_SRCALPHA_IS_USED__SHIFT 0x1b
#define CB_HW_CONTROL__PRIORITIZE_FC_WR_OVER_FC_RD_ON_CMASK_CONFLICT_MASK 0x10000000
#define CB_HW_CONTROL__PRIORITIZE_FC_WR_OVER_FC_RD_ON_CMASK_CONFLICT__SHIFT 0x1c
#define CB_HW_CONTROL__PRIORITIZE_FC_EVICT_OVER_FOP_RD_ON_BANK_CONFLICT_MASK 0x20000000
#define CB_HW_CONTROL__PRIORITIZE_FC_EVICT_OVER_FOP_RD_ON_BANK_CONFLICT__SHIFT 0x1d
#define CB_HW_CONTROL__DISABLE_CC_IB_SERIALIZER_STATE_OPT_MASK 0x40000000
#define CB_HW_CONTROL__DISABLE_CC_IB_SERIALIZER_STATE_OPT__SHIFT 0x1e
#define CB_HW_CONTROL__DISABLE_PIXEL_IN_QUAD_FIX_FOR_LINEAR_SURFACE_MASK 0x80000000
#define CB_HW_CONTROL__DISABLE_PIXEL_IN_QUAD_FIX_FOR_LINEAR_SURFACE__SHIFT 0x1f
#define CB_HW_CONTROL_1__CM_CACHE_NUM_TAGS_MASK 0x1f
#define CB_HW_CONTROL_1__CM_CACHE_NUM_TAGS__SHIFT 0x0
#define CB_HW_CONTROL_1__FC_CACHE_NUM_TAGS_MASK 0x7e0
#define CB_HW_CONTROL_1__FC_CACHE_NUM_TAGS__SHIFT 0x5
#define CB_HW_CONTROL_1__CC_CACHE_NUM_TAGS_MASK 0x1f800
#define CB_HW_CONTROL_1__CC_CACHE_NUM_TAGS__SHIFT 0xb
#define CB_HW_CONTROL_1__CM_TILE_FIFO_DEPTH_MASK 0x3fe0000
#define CB_HW_CONTROL_1__CM_TILE_FIFO_DEPTH__SHIFT 0x11
#define CB_HW_CONTROL_1__CHICKEN_BITS_MASK 0xfc000000
#define CB_HW_CONTROL_1__CHICKEN_BITS__SHIFT 0x1a
#define CB_HW_CONTROL_2__CC_EVEN_ODD_FIFO_DEPTH_MASK 0xff
#define CB_HW_CONTROL_2__CC_EVEN_ODD_FIFO_DEPTH__SHIFT 0x0
#define CB_HW_CONTROL_2__FC_RDLAT_TILE_FIFO_DEPTH_MASK 0x7f00
#define CB_HW_CONTROL_2__FC_RDLAT_TILE_FIFO_DEPTH__SHIFT 0x8
#define CB_HW_CONTROL_2__FC_RDLAT_QUAD_FIFO_DEPTH_MASK 0x7f8000
#define CB_HW_CONTROL_2__FC_RDLAT_QUAD_FIFO_DEPTH__SHIFT 0xf
#define CB_HW_CONTROL_2__CHICKEN_BITS_MASK 0xff000000
#define CB_HW_CONTROL_2__CHICKEN_BITS__SHIFT 0x18
#define CB_HW_CONTROL_3__DISABLE_SLOW_MODE_EMPTY_HALF_QUAD_KILL_MASK 0x1
#define CB_HW_CONTROL_3__DISABLE_SLOW_MODE_EMPTY_HALF_QUAD_KILL__SHIFT 0x0
#define CB_PERFCOUNTER_FILTER__OP_FILTER_ENABLE_MASK 0x1
#define CB_PERFCOUNTER_FILTER__OP_FILTER_ENABLE__SHIFT 0x0
#define CB_PERFCOUNTER_FILTER__OP_FILTER_SEL_MASK 0xe
#define CB_PERFCOUNTER_FILTER__OP_FILTER_SEL__SHIFT 0x1
#define CB_PERFCOUNTER_FILTER__FORMAT_FILTER_ENABLE_MASK 0x10
#define CB_PERFCOUNTER_FILTER__FORMAT_FILTER_ENABLE__SHIFT 0x4
#define CB_PERFCOUNTER_FILTER__FORMAT_FILTER_SEL_MASK 0x3e0
#define CB_PERFCOUNTER_FILTER__FORMAT_FILTER_SEL__SHIFT 0x5
#define CB_PERFCOUNTER_FILTER__CLEAR_FILTER_ENABLE_MASK 0x400
#define CB_PERFCOUNTER_FILTER__CLEAR_FILTER_ENABLE__SHIFT 0xa
#define CB_PERFCOUNTER_FILTER__CLEAR_FILTER_SEL_MASK 0x800
#define CB_PERFCOUNTER_FILTER__CLEAR_FILTER_SEL__SHIFT 0xb
#define CB_PERFCOUNTER_FILTER__MRT_FILTER_ENABLE_MASK 0x1000
#define CB_PERFCOUNTER_FILTER__MRT_FILTER_ENABLE__SHIFT 0xc
#define CB_PERFCOUNTER_FILTER__MRT_FILTER_SEL_MASK 0xe000
#define CB_PERFCOUNTER_FILTER__MRT_FILTER_SEL__SHIFT 0xd
#define CB_PERFCOUNTER_FILTER__NUM_SAMPLES_FILTER_ENABLE_MASK 0x20000
#define CB_PERFCOUNTER_FILTER__NUM_SAMPLES_FILTER_ENABLE__SHIFT 0x11
#define CB_PERFCOUNTER_FILTER__NUM_SAMPLES_FILTER_SEL_MASK 0x1c0000
#define CB_PERFCOUNTER_FILTER__NUM_SAMPLES_FILTER_SEL__SHIFT 0x12
#define CB_PERFCOUNTER_FILTER__NUM_FRAGMENTS_FILTER_ENABLE_MASK 0x200000
#define CB_PERFCOUNTER_FILTER__NUM_FRAGMENTS_FILTER_ENABLE__SHIFT 0x15
#define CB_PERFCOUNTER_FILTER__NUM_FRAGMENTS_FILTER_SEL_MASK 0xc00000
#define CB_PERFCOUNTER_FILTER__NUM_FRAGMENTS_FILTER_SEL__SHIFT 0x16
#define CB_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x1ff
#define CB_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define CB_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0x7fc00
#define CB_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define CB_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define CB_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define CB_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define CB_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define CB_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define CB_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define CB_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x1ff
#define CB_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define CB_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0x7fc00
#define CB_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define CB_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf000000
#define CB_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x18
#define CB_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf0000000
#define CB_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x1c
#define CB_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x1ff
#define CB_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define CB_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define CB_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define CB_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x1ff
#define CB_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define CB_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define CB_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define CB_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x1ff
#define CB_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define CB_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define CB_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define CB_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CB_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CB_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CB_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CB_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CB_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CB_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CB_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CB_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CB_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CB_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CB_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CB_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CB_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CB_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CB_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CB_CGTT_SCLK_CTRL__ON_DELAY_MASK 0xf
#define CB_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0
#define CB_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CB_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define CB_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define CB_DEBUG_BUS_13__TILE_INTFC_BUSY_MASK 0x1
#define CB_DEBUG_BUS_13__TILE_INTFC_BUSY__SHIFT 0x0
#define CB_DEBUG_BUS_13__MU_BUSY_MASK 0x2
#define CB_DEBUG_BUS_13__MU_BUSY__SHIFT 0x1
#define CB_DEBUG_BUS_13__TQ_BUSY_MASK 0x4
#define CB_DEBUG_BUS_13__TQ_BUSY__SHIFT 0x2
#define CB_DEBUG_BUS_13__AC_BUSY_MASK 0x8
#define CB_DEBUG_BUS_13__AC_BUSY__SHIFT 0x3
#define CB_DEBUG_BUS_13__CRW_BUSY_MASK 0x10
#define CB_DEBUG_BUS_13__CRW_BUSY__SHIFT 0x4
#define CB_DEBUG_BUS_13__CACHE_CTRL_BUSY_MASK 0x20
#define CB_DEBUG_BUS_13__CACHE_CTRL_BUSY__SHIFT 0x5
#define CB_DEBUG_BUS_13__MC_WR_PENDING_MASK 0x40
#define CB_DEBUG_BUS_13__MC_WR_PENDING__SHIFT 0x6
#define CB_DEBUG_BUS_13__FC_WR_PENDING_MASK 0x80
#define CB_DEBUG_BUS_13__FC_WR_PENDING__SHIFT 0x7
#define CB_DEBUG_BUS_13__FC_RD_PENDING_MASK 0x100
#define CB_DEBUG_BUS_13__FC_RD_PENDING__SHIFT 0x8
#define CB_DEBUG_BUS_13__EVICT_PENDING_MASK 0x200
#define CB_DEBUG_BUS_13__EVICT_PENDING__SHIFT 0x9
#define CB_DEBUG_BUS_13__LAST_RD_ARB_WINNER_MASK 0x400
#define CB_DEBUG_BUS_13__LAST_RD_ARB_WINNER__SHIFT 0xa
#define CB_DEBUG_BUS_13__MU_STATE_MASK 0x7f800
#define CB_DEBUG_BUS_13__MU_STATE__SHIFT 0xb
#define CB_DEBUG_BUS_14__TILE_RETIREMENT_BUSY_MASK 0x1
#define CB_DEBUG_BUS_14__TILE_RETIREMENT_BUSY__SHIFT 0x0
#define CB_DEBUG_BUS_14__FOP_BUSY_MASK 0x2
#define CB_DEBUG_BUS_14__FOP_BUSY__SHIFT 0x1
#define CB_DEBUG_BUS_14__LAT_BUSY_MASK 0x4
#define CB_DEBUG_BUS_14__LAT_BUSY__SHIFT 0x2
#define CB_DEBUG_BUS_14__CACHE_CTL_BUSY_MASK 0x8
#define CB_DEBUG_BUS_14__CACHE_CTL_BUSY__SHIFT 0x3
#define CB_DEBUG_BUS_14__ADDR_BUSY_MASK 0x10
#define CB_DEBUG_BUS_14__ADDR_BUSY__SHIFT 0x4
#define CB_DEBUG_BUS_14__MERGE_BUSY_MASK 0x20
#define CB_DEBUG_BUS_14__MERGE_BUSY__SHIFT 0x5
#define CB_DEBUG_BUS_14__QUAD_BUSY_MASK 0x40
#define CB_DEBUG_BUS_14__QUAD_BUSY__SHIFT 0x6
#define CB_DEBUG_BUS_14__TILE_BUSY_MASK 0x80
#define CB_DEBUG_BUS_14__TILE_BUSY__SHIFT 0x7
#define CB_DEBUG_BUS_14__CLEAR_BUSY_MASK 0x100
#define CB_DEBUG_BUS_14__CLEAR_BUSY__SHIFT 0x8
#define CB_DEBUG_BUS_15__SURF_SYNC_STATE_MASK 0x3
#define CB_DEBUG_BUS_15__SURF_SYNC_STATE__SHIFT 0x0
#define CB_DEBUG_BUS_15__SURF_SYNC_START_MASK 0x4
#define CB_DEBUG_BUS_15__SURF_SYNC_START__SHIFT 0x2
#define CB_DEBUG_BUS_15__SF_BUSY_MASK 0x8
#define CB_DEBUG_BUS_15__SF_BUSY__SHIFT 0x3
#define CB_DEBUG_BUS_15__CS_BUSY_MASK 0x10
#define CB_DEBUG_BUS_15__CS_BUSY__SHIFT 0x4
#define CB_DEBUG_BUS_15__RB_BUSY_MASK 0x20
#define CB_DEBUG_BUS_15__RB_BUSY__SHIFT 0x5
#define CB_DEBUG_BUS_15__DS_BUSY_MASK 0x40
#define CB_DEBUG_BUS_15__DS_BUSY__SHIFT 0x6
#define CB_DEBUG_BUS_15__TB_BUSY_MASK 0x80
#define CB_DEBUG_BUS_15__TB_BUSY__SHIFT 0x7
#define CB_DEBUG_BUS_15__IB_BUSY_MASK 0x100
#define CB_DEBUG_BUS_15__IB_BUSY__SHIFT 0x8
#define CB_DEBUG_BUS_16__MC_RDREQ_CREDITS_MASK 0x3f
#define CB_DEBUG_BUS_16__MC_RDREQ_CREDITS__SHIFT 0x0
#define CB_DEBUG_BUS_16__LAST_RD_GRANT_VEC_MASK 0x3c0
#define CB_DEBUG_BUS_16__LAST_RD_GRANT_VEC__SHIFT 0x6
#define CB_DEBUG_BUS_16__MC_WRREQ_CREDITS_MASK 0xfc00
#define CB_DEBUG_BUS_16__MC_WRREQ_CREDITS__SHIFT 0xa
#define CB_DEBUG_BUS_16__LAST_WR_GRANT_VEC_MASK 0xf0000
#define CB_DEBUG_BUS_16__LAST_WR_GRANT_VEC__SHIFT 0x10
#define CB_DEBUG_BUS_16__CC_WRREQ_FIFO_EMPTY_MASK 0x100000
#define CB_DEBUG_BUS_16__CC_WRREQ_FIFO_EMPTY__SHIFT 0x14
#define CB_DEBUG_BUS_16__FC_WRREQ_FIFO_EMPTY_MASK 0x200000
#define CB_DEBUG_BUS_16__FC_WRREQ_FIFO_EMPTY__SHIFT 0x15
#define CB_DEBUG_BUS_16__CM_WRREQ_FIFO_EMPTY_MASK 0x400000
#define CB_DEBUG_BUS_16__CM_WRREQ_FIFO_EMPTY__SHIFT 0x16
#define CB_DEBUG_BUS_17__CM_BUSY_MASK 0x1
#define CB_DEBUG_BUS_17__CM_BUSY__SHIFT 0x0
#define CB_DEBUG_BUS_17__FC_BUSY_MASK 0x2
#define CB_DEBUG_BUS_17__FC_BUSY__SHIFT 0x1
#define CB_DEBUG_BUS_17__CC_BUSY_MASK 0x4
#define CB_DEBUG_BUS_17__CC_BUSY__SHIFT 0x2
#define CB_DEBUG_BUS_17__BB_BUSY_MASK 0x8
#define CB_DEBUG_BUS_17__BB_BUSY__SHIFT 0x3
#define CB_DEBUG_BUS_17__MA_BUSY_MASK 0x10
#define CB_DEBUG_BUS_17__MA_BUSY__SHIFT 0x4
#define CB_DEBUG_BUS_17__CORE_SCLK_VLD_MASK 0x20
#define CB_DEBUG_BUS_17__CORE_SCLK_VLD__SHIFT 0x5
#define CB_DEBUG_BUS_17__REG_SCLK1_VLD_MASK 0x40
#define CB_DEBUG_BUS_17__REG_SCLK1_VLD__SHIFT 0x6
#define CB_DEBUG_BUS_17__REG_SCLK0_VLD_MASK 0x80
#define CB_DEBUG_BUS_17__REG_SCLK0_VLD__SHIFT 0x7
#define CB_DEBUG_BUS_18__NOT_USED_MASK 0xffffff
#define CB_DEBUG_BUS_18__NOT_USED__SHIFT 0x0
#define CP_DFY_CNTL__POLICY_MASK 0x300
#define CP_DFY_CNTL__POLICY__SHIFT 0x8
#define CP_DFY_CNTL__VOL_MASK 0x400
#define CP_DFY_CNTL__VOL__SHIFT 0xa
#define CP_DFY_CNTL__ATC_MASK 0x800
#define CP_DFY_CNTL__ATC__SHIFT 0xb
#define CP_DFY_STAT__BURST_COUNT_MASK 0xffff
#define CP_DFY_STAT__BURST_COUNT__SHIFT 0x0
#define CP_DFY_STAT__TAGS_PENDING_MASK 0xff0000
#define CP_DFY_STAT__TAGS_PENDING__SHIFT 0x10
#define CP_DFY_STAT__BUSY_MASK 0x80000000
#define CP_DFY_STAT__BUSY__SHIFT 0x1f
#define CP_DFY_ADDR_HI__ADDR_HI_MASK 0xffffffff
#define CP_DFY_ADDR_HI__ADDR_HI__SHIFT 0x0
#define CP_DFY_ADDR_LO__ADDR_LO_MASK 0xffffffe0
#define CP_DFY_ADDR_LO__ADDR_LO__SHIFT 0x5
#define CP_DFY_DATA_0__DATA_MASK 0xffffffff
#define CP_DFY_DATA_0__DATA__SHIFT 0x0
#define CP_DFY_DATA_1__DATA_MASK 0xffffffff
#define CP_DFY_DATA_1__DATA__SHIFT 0x0
#define CP_DFY_DATA_2__DATA_MASK 0xffffffff
#define CP_DFY_DATA_2__DATA__SHIFT 0x0
#define CP_DFY_DATA_3__DATA_MASK 0xffffffff
#define CP_DFY_DATA_3__DATA__SHIFT 0x0
#define CP_DFY_DATA_4__DATA_MASK 0xffffffff
#define CP_DFY_DATA_4__DATA__SHIFT 0x0
#define CP_DFY_DATA_5__DATA_MASK 0xffffffff
#define CP_DFY_DATA_5__DATA__SHIFT 0x0
#define CP_DFY_DATA_6__DATA_MASK 0xffffffff
#define CP_DFY_DATA_6__DATA__SHIFT 0x0
#define CP_DFY_DATA_7__DATA_MASK 0xffffffff
#define CP_DFY_DATA_7__DATA__SHIFT 0x0
#define CP_DFY_DATA_8__DATA_MASK 0xffffffff
#define CP_DFY_DATA_8__DATA__SHIFT 0x0
#define CP_DFY_DATA_9__DATA_MASK 0xffffffff
#define CP_DFY_DATA_9__DATA__SHIFT 0x0
#define CP_DFY_DATA_10__DATA_MASK 0xffffffff
#define CP_DFY_DATA_10__DATA__SHIFT 0x0
#define CP_DFY_DATA_11__DATA_MASK 0xffffffff
#define CP_DFY_DATA_11__DATA__SHIFT 0x0
#define CP_DFY_DATA_12__DATA_MASK 0xffffffff
#define CP_DFY_DATA_12__DATA__SHIFT 0x0
#define CP_DFY_DATA_13__DATA_MASK 0xffffffff
#define CP_DFY_DATA_13__DATA__SHIFT 0x0
#define CP_DFY_DATA_14__DATA_MASK 0xffffffff
#define CP_DFY_DATA_14__DATA__SHIFT 0x0
#define CP_DFY_DATA_15__DATA_MASK 0xffffffff
#define CP_DFY_DATA_15__DATA__SHIFT 0x0
#define CP_RB0_BASE__RB_BASE_MASK 0xffffffff
#define CP_RB0_BASE__RB_BASE__SHIFT 0x0
#define CP_RB0_BASE_HI__RB_BASE_HI_MASK 0xff
#define CP_RB0_BASE_HI__RB_BASE_HI__SHIFT 0x0
#define CP_RB_BASE__RB_BASE_MASK 0xffffffff
#define CP_RB_BASE__RB_BASE__SHIFT 0x0
#define CP_RB1_BASE__RB_BASE_MASK 0xffffffff
#define CP_RB1_BASE__RB_BASE__SHIFT 0x0
#define CP_RB1_BASE_HI__RB_BASE_HI_MASK 0xff
#define CP_RB1_BASE_HI__RB_BASE_HI__SHIFT 0x0
#define CP_RB2_BASE__RB_BASE_MASK 0xffffffff
#define CP_RB2_BASE__RB_BASE__SHIFT 0x0
#define CP_RB0_CNTL__RB_BUFSZ_MASK 0x3f
#define CP_RB0_CNTL__RB_BUFSZ__SHIFT 0x0
#define CP_RB0_CNTL__RB_BLKSZ_MASK 0x3f00
#define CP_RB0_CNTL__RB_BLKSZ__SHIFT 0x8
#define CP_RB0_CNTL__BUF_SWAP_MASK 0x30000
#define CP_RB0_CNTL__BUF_SWAP__SHIFT 0x10
#define CP_RB0_CNTL__MIN_AVAILSZ_MASK 0x300000
#define CP_RB0_CNTL__MIN_AVAILSZ__SHIFT 0x14
#define CP_RB0_CNTL__MIN_IB_AVAILSZ_MASK 0xc00000
#define CP_RB0_CNTL__MIN_IB_AVAILSZ__SHIFT 0x16
#define CP_RB0_CNTL__CACHE_POLICY_MASK 0x3000000
#define CP_RB0_CNTL__CACHE_POLICY__SHIFT 0x18
#define CP_RB0_CNTL__RB_VOLATILE_MASK 0x4000000
#define CP_RB0_CNTL__RB_VOLATILE__SHIFT 0x1a
#define CP_RB0_CNTL__RB_NO_UPDATE_MASK 0x8000000
#define CP_RB0_CNTL__RB_NO_UPDATE__SHIFT 0x1b
#define CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK 0x80000000
#define CP_RB0_CNTL__RB_RPTR_WR_ENA__SHIFT 0x1f
#define CP_RB_CNTL__RB_BUFSZ_MASK 0x3f
#define CP_RB_CNTL__RB_BUFSZ__SHIFT 0x0
#define CP_RB_CNTL__RB_BLKSZ_MASK 0x3f00
#define CP_RB_CNTL__RB_BLKSZ__SHIFT 0x8
#define CP_RB_CNTL__BUF_SWAP_MASK 0x30000
#define CP_RB_CNTL__BUF_SWAP__SHIFT 0x10
#define CP_RB_CNTL__MIN_AVAILSZ_MASK 0x300000
#define CP_RB_CNTL__MIN_AVAILSZ__SHIFT 0x14
#define CP_RB_CNTL__MIN_IB_AVAILSZ_MASK 0xc00000
#define CP_RB_CNTL__MIN_IB_AVAILSZ__SHIFT 0x16
#define CP_RB_CNTL__CACHE_POLICY_MASK 0x3000000
#define CP_RB_CNTL__CACHE_POLICY__SHIFT 0x18
#define CP_RB_CNTL__RB_VOLATILE_MASK 0x4000000
#define CP_RB_CNTL__RB_VOLATILE__SHIFT 0x1a
#define CP_RB_CNTL__RB_NO_UPDATE_MASK 0x8000000
#define CP_RB_CNTL__RB_NO_UPDATE__SHIFT 0x1b
#define CP_RB_CNTL__RB_RPTR_WR_ENA_MASK 0x80000000
#define CP_RB_CNTL__RB_RPTR_WR_ENA__SHIFT 0x1f
#define CP_RB1_CNTL__RB_BUFSZ_MASK 0x3f
#define CP_RB1_CNTL__RB_BUFSZ__SHIFT 0x0
#define CP_RB1_CNTL__RB_BLKSZ_MASK 0x3f00
#define CP_RB1_CNTL__RB_BLKSZ__SHIFT 0x8
#define CP_RB1_CNTL__MIN_AVAILSZ_MASK 0x300000
#define CP_RB1_CNTL__MIN_AVAILSZ__SHIFT 0x14
#define CP_RB1_CNTL__MIN_IB_AVAILSZ_MASK 0xc00000
#define CP_RB1_CNTL__MIN_IB_AVAILSZ__SHIFT 0x16
#define CP_RB1_CNTL__CACHE_POLICY_MASK 0x3000000
#define CP_RB1_CNTL__CACHE_POLICY__SHIFT 0x18
#define CP_RB1_CNTL__RB_VOLATILE_MASK 0x4000000
#define CP_RB1_CNTL__RB_VOLATILE__SHIFT 0x1a
#define CP_RB1_CNTL__RB_NO_UPDATE_MASK 0x8000000
#define CP_RB1_CNTL__RB_NO_UPDATE__SHIFT 0x1b
#define CP_RB1_CNTL__RB_RPTR_WR_ENA_MASK 0x80000000
#define CP_RB1_CNTL__RB_RPTR_WR_ENA__SHIFT 0x1f
#define CP_RB2_CNTL__RB_BUFSZ_MASK 0x3f
#define CP_RB2_CNTL__RB_BUFSZ__SHIFT 0x0
#define CP_RB2_CNTL__RB_BLKSZ_MASK 0x3f00
#define CP_RB2_CNTL__RB_BLKSZ__SHIFT 0x8
#define CP_RB2_CNTL__MIN_AVAILSZ_MASK 0x300000
#define CP_RB2_CNTL__MIN_AVAILSZ__SHIFT 0x14
#define CP_RB2_CNTL__MIN_IB_AVAILSZ_MASK 0xc00000
#define CP_RB2_CNTL__MIN_IB_AVAILSZ__SHIFT 0x16
#define CP_RB2_CNTL__CACHE_POLICY_MASK 0x3000000
#define CP_RB2_CNTL__CACHE_POLICY__SHIFT 0x18
#define CP_RB2_CNTL__RB_VOLATILE_MASK 0x4000000
#define CP_RB2_CNTL__RB_VOLATILE__SHIFT 0x1a
#define CP_RB2_CNTL__RB_NO_UPDATE_MASK 0x8000000
#define CP_RB2_CNTL__RB_NO_UPDATE__SHIFT 0x1b
#define CP_RB2_CNTL__RB_RPTR_WR_ENA_MASK 0x80000000
#define CP_RB2_CNTL__RB_RPTR_WR_ENA__SHIFT 0x1f
#define CP_RB_RPTR_WR__RB_RPTR_WR_MASK 0xfffff
#define CP_RB_RPTR_WR__RB_RPTR_WR__SHIFT 0x0
#define CP_RB0_RPTR_ADDR__RB_RPTR_SWAP_MASK 0x3
#define CP_RB0_RPTR_ADDR__RB_RPTR_SWAP__SHIFT 0x0
#define CP_RB0_RPTR_ADDR__RB_RPTR_ADDR_MASK 0xfffffffc
#define CP_RB0_RPTR_ADDR__RB_RPTR_ADDR__SHIFT 0x2
#define CP_RB_RPTR_ADDR__RB_RPTR_SWAP_MASK 0x3
#define CP_RB_RPTR_ADDR__RB_RPTR_SWAP__SHIFT 0x0
#define CP_RB_RPTR_ADDR__RB_RPTR_ADDR_MASK 0xfffffffc
#define CP_RB_RPTR_ADDR__RB_RPTR_ADDR__SHIFT 0x2
#define CP_RB1_RPTR_ADDR__RB_RPTR_SWAP_MASK 0x3
#define CP_RB1_RPTR_ADDR__RB_RPTR_SWAP__SHIFT 0x0
#define CP_RB1_RPTR_ADDR__RB_RPTR_ADDR_MASK 0xfffffffc
#define CP_RB1_RPTR_ADDR__RB_RPTR_ADDR__SHIFT 0x2
#define CP_RB2_RPTR_ADDR__RB_RPTR_SWAP_MASK 0x3
#define CP_RB2_RPTR_ADDR__RB_RPTR_SWAP__SHIFT 0x0
#define CP_RB2_RPTR_ADDR__RB_RPTR_ADDR_MASK 0xfffffffc
#define CP_RB2_RPTR_ADDR__RB_RPTR_ADDR__SHIFT 0x2
#define CP_RB0_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK 0xffff
#define CP_RB0_RPTR_ADDR_HI__RB_RPTR_ADDR_HI__SHIFT 0x0
#define CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK 0xffff
#define CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI__SHIFT 0x0
#define CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK 0xffff
#define CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI__SHIFT 0x0
#define CP_RB2_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK 0xffff
#define CP_RB2_RPTR_ADDR_HI__RB_RPTR_ADDR_HI__SHIFT 0x0
#define CP_RB0_WPTR__RB_WPTR_MASK 0xfffff
#define CP_RB0_WPTR__RB_WPTR__SHIFT 0x0
#define CP_RB_WPTR__RB_WPTR_MASK 0xfffff
#define CP_RB_WPTR__RB_WPTR__SHIFT 0x0
#define CP_RB1_WPTR__RB_WPTR_MASK 0xfffff
#define CP_RB1_WPTR__RB_WPTR__SHIFT 0x0
#define CP_RB2_WPTR__RB_WPTR_MASK 0xfffff
#define CP_RB2_WPTR__RB_WPTR__SHIFT 0x0
#define CP_RB_WPTR_POLL_ADDR_LO__OBSOLETE_MASK 0xfffffffc
#define CP_RB_WPTR_POLL_ADDR_LO__OBSOLETE__SHIFT 0x2
#define CP_RB_WPTR_POLL_ADDR_HI__OBSOLETE_MASK 0xff
#define CP_RB_WPTR_POLL_ADDR_HI__OBSOLETE__SHIFT 0x0
#define CP_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_INT_CNTL__CNTX_BUSY_INT_ENABLE_MASK 0x80000
#define CP_INT_CNTL__CNTX_BUSY_INT_ENABLE__SHIFT 0x13
#define CP_INT_CNTL__CNTX_EMPTY_INT_ENABLE_MASK 0x100000
#define CP_INT_CNTL__CNTX_EMPTY_INT_ENABLE__SHIFT 0x14
#define CP_INT_CNTL__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define CP_INT_CNTL__PRIV_INSTR_INT_ENABLE__SHIFT 0x16
#define CP_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_INT_CNTL_RING0__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_INT_CNTL_RING0__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_INT_CNTL_RING0__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_INT_CNTL_RING0__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK 0x80000
#define CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE__SHIFT 0x13
#define CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK 0x100000
#define CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE__SHIFT 0x14
#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE__SHIFT 0x16
#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_INT_CNTL_RING0__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_INT_CNTL_RING0__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_INT_CNTL_RING0__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_INT_CNTL_RING0__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_INT_CNTL_RING0__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_INT_CNTL_RING0__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_INT_CNTL_RING0__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_INT_CNTL_RING0__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_INT_CNTL_RING1__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_INT_CNTL_RING1__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_INT_CNTL_RING1__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_INT_CNTL_RING1__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_INT_CNTL_RING1__CNTX_BUSY_INT_ENABLE_MASK 0x80000
#define CP_INT_CNTL_RING1__CNTX_BUSY_INT_ENABLE__SHIFT 0x13
#define CP_INT_CNTL_RING1__CNTX_EMPTY_INT_ENABLE_MASK 0x100000
#define CP_INT_CNTL_RING1__CNTX_EMPTY_INT_ENABLE__SHIFT 0x14
#define CP_INT_CNTL_RING1__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define CP_INT_CNTL_RING1__PRIV_INSTR_INT_ENABLE__SHIFT 0x16
#define CP_INT_CNTL_RING1__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_INT_CNTL_RING1__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_INT_CNTL_RING1__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_INT_CNTL_RING1__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_INT_CNTL_RING1__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_INT_CNTL_RING1__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_INT_CNTL_RING1__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_INT_CNTL_RING1__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_INT_CNTL_RING1__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_INT_CNTL_RING1__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_INT_CNTL_RING1__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_INT_CNTL_RING1__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_INT_CNTL_RING1__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_INT_CNTL_RING1__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_INT_CNTL_RING2__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_INT_CNTL_RING2__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_INT_CNTL_RING2__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_INT_CNTL_RING2__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_INT_CNTL_RING2__CNTX_BUSY_INT_ENABLE_MASK 0x80000
#define CP_INT_CNTL_RING2__CNTX_BUSY_INT_ENABLE__SHIFT 0x13
#define CP_INT_CNTL_RING2__CNTX_EMPTY_INT_ENABLE_MASK 0x100000
#define CP_INT_CNTL_RING2__CNTX_EMPTY_INT_ENABLE__SHIFT 0x14
#define CP_INT_CNTL_RING2__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define CP_INT_CNTL_RING2__PRIV_INSTR_INT_ENABLE__SHIFT 0x16
#define CP_INT_CNTL_RING2__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_INT_CNTL_RING2__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_INT_CNTL_RING2__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_INT_CNTL_RING2__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_INT_CNTL_RING2__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_INT_CNTL_RING2__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_INT_CNTL_RING2__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_INT_CNTL_RING2__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_INT_CNTL_RING2__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_INT_CNTL_RING2__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_INT_CNTL_RING2__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_INT_CNTL_RING2__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_INT_CNTL_RING2__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_INT_CNTL_RING2__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_INT_STATUS__CP_ECC_ERROR_INT_STAT_MASK 0x4000
#define CP_INT_STATUS__CP_ECC_ERROR_INT_STAT__SHIFT 0xe
#define CP_INT_STATUS__WRM_POLL_TIMEOUT_INT_STAT_MASK 0x20000
#define CP_INT_STATUS__WRM_POLL_TIMEOUT_INT_STAT__SHIFT 0x11
#define CP_INT_STATUS__CNTX_BUSY_INT_STAT_MASK 0x80000
#define CP_INT_STATUS__CNTX_BUSY_INT_STAT__SHIFT 0x13
#define CP_INT_STATUS__CNTX_EMPTY_INT_STAT_MASK 0x100000
#define CP_INT_STATUS__CNTX_EMPTY_INT_STAT__SHIFT 0x14
#define CP_INT_STATUS__PRIV_INSTR_INT_STAT_MASK 0x400000
#define CP_INT_STATUS__PRIV_INSTR_INT_STAT__SHIFT 0x16
#define CP_INT_STATUS__PRIV_REG_INT_STAT_MASK 0x800000
#define CP_INT_STATUS__PRIV_REG_INT_STAT__SHIFT 0x17
#define CP_INT_STATUS__OPCODE_ERROR_INT_STAT_MASK 0x1000000
#define CP_INT_STATUS__OPCODE_ERROR_INT_STAT__SHIFT 0x18
#define CP_INT_STATUS__TIME_STAMP_INT_STAT_MASK 0x4000000
#define CP_INT_STATUS__TIME_STAMP_INT_STAT__SHIFT 0x1a
#define CP_INT_STATUS__RESERVED_BIT_ERROR_INT_STAT_MASK 0x8000000
#define CP_INT_STATUS__RESERVED_BIT_ERROR_INT_STAT__SHIFT 0x1b
#define CP_INT_STATUS__GENERIC2_INT_STAT_MASK 0x20000000
#define CP_INT_STATUS__GENERIC2_INT_STAT__SHIFT 0x1d
#define CP_INT_STATUS__GENERIC1_INT_STAT_MASK 0x40000000
#define CP_INT_STATUS__GENERIC1_INT_STAT__SHIFT 0x1e
#define CP_INT_STATUS__GENERIC0_INT_STAT_MASK 0x80000000
#define CP_INT_STATUS__GENERIC0_INT_STAT__SHIFT 0x1f
#define CP_INT_STATUS_RING0__CP_ECC_ERROR_INT_STAT_MASK 0x4000
#define CP_INT_STATUS_RING0__CP_ECC_ERROR_INT_STAT__SHIFT 0xe
#define CP_INT_STATUS_RING0__WRM_POLL_TIMEOUT_INT_STAT_MASK 0x20000
#define CP_INT_STATUS_RING0__WRM_POLL_TIMEOUT_INT_STAT__SHIFT 0x11
#define CP_INT_STATUS_RING0__CNTX_BUSY_INT_STAT_MASK 0x80000
#define CP_INT_STATUS_RING0__CNTX_BUSY_INT_STAT__SHIFT 0x13
#define CP_INT_STATUS_RING0__CNTX_EMPTY_INT_STAT_MASK 0x100000
#define CP_INT_STATUS_RING0__CNTX_EMPTY_INT_STAT__SHIFT 0x14
#define CP_INT_STATUS_RING0__PRIV_INSTR_INT_STAT_MASK 0x400000
#define CP_INT_STATUS_RING0__PRIV_INSTR_INT_STAT__SHIFT 0x16
#define CP_INT_STATUS_RING0__PRIV_REG_INT_STAT_MASK 0x800000
#define CP_INT_STATUS_RING0__PRIV_REG_INT_STAT__SHIFT 0x17
#define CP_INT_STATUS_RING0__OPCODE_ERROR_INT_STAT_MASK 0x1000000
#define CP_INT_STATUS_RING0__OPCODE_ERROR_INT_STAT__SHIFT 0x18
#define CP_INT_STATUS_RING0__TIME_STAMP_INT_STAT_MASK 0x4000000
#define CP_INT_STATUS_RING0__TIME_STAMP_INT_STAT__SHIFT 0x1a
#define CP_INT_STATUS_RING0__RESERVED_BIT_ERROR_INT_STAT_MASK 0x8000000
#define CP_INT_STATUS_RING0__RESERVED_BIT_ERROR_INT_STAT__SHIFT 0x1b
#define CP_INT_STATUS_RING0__GENERIC2_INT_STAT_MASK 0x20000000
#define CP_INT_STATUS_RING0__GENERIC2_INT_STAT__SHIFT 0x1d
#define CP_INT_STATUS_RING0__GENERIC1_INT_STAT_MASK 0x40000000
#define CP_INT_STATUS_RING0__GENERIC1_INT_STAT__SHIFT 0x1e
#define CP_INT_STATUS_RING0__GENERIC0_INT_STAT_MASK 0x80000000
#define CP_INT_STATUS_RING0__GENERIC0_INT_STAT__SHIFT 0x1f
#define CP_INT_STATUS_RING1__CP_ECC_ERROR_INT_STAT_MASK 0x4000
#define CP_INT_STATUS_RING1__CP_ECC_ERROR_INT_STAT__SHIFT 0xe
#define CP_INT_STATUS_RING1__WRM_POLL_TIMEOUT_INT_STAT_MASK 0x20000
#define CP_INT_STATUS_RING1__WRM_POLL_TIMEOUT_INT_STAT__SHIFT 0x11
#define CP_INT_STATUS_RING1__CNTX_BUSY_INT_STAT_MASK 0x80000
#define CP_INT_STATUS_RING1__CNTX_BUSY_INT_STAT__SHIFT 0x13
#define CP_INT_STATUS_RING1__CNTX_EMPTY_INT_STAT_MASK 0x100000
#define CP_INT_STATUS_RING1__CNTX_EMPTY_INT_STAT__SHIFT 0x14
#define CP_INT_STATUS_RING1__PRIV_INSTR_INT_STAT_MASK 0x400000
#define CP_INT_STATUS_RING1__PRIV_INSTR_INT_STAT__SHIFT 0x16
#define CP_INT_STATUS_RING1__PRIV_REG_INT_STAT_MASK 0x800000
#define CP_INT_STATUS_RING1__PRIV_REG_INT_STAT__SHIFT 0x17
#define CP_INT_STATUS_RING1__OPCODE_ERROR_INT_STAT_MASK 0x1000000
#define CP_INT_STATUS_RING1__OPCODE_ERROR_INT_STAT__SHIFT 0x18
#define CP_INT_STATUS_RING1__TIME_STAMP_INT_STAT_MASK 0x4000000
#define CP_INT_STATUS_RING1__TIME_STAMP_INT_STAT__SHIFT 0x1a
#define CP_INT_STATUS_RING1__RESERVED_BIT_ERROR_INT_STAT_MASK 0x8000000
#define CP_INT_STATUS_RING1__RESERVED_BIT_ERROR_INT_STAT__SHIFT 0x1b
#define CP_INT_STATUS_RING1__GENERIC2_INT_STAT_MASK 0x20000000
#define CP_INT_STATUS_RING1__GENERIC2_INT_STAT__SHIFT 0x1d
#define CP_INT_STATUS_RING1__GENERIC1_INT_STAT_MASK 0x40000000
#define CP_INT_STATUS_RING1__GENERIC1_INT_STAT__SHIFT 0x1e
#define CP_INT_STATUS_RING1__GENERIC0_INT_STAT_MASK 0x80000000
#define CP_INT_STATUS_RING1__GENERIC0_INT_STAT__SHIFT 0x1f
#define CP_INT_STATUS_RING2__CP_ECC_ERROR_INT_STAT_MASK 0x4000
#define CP_INT_STATUS_RING2__CP_ECC_ERROR_INT_STAT__SHIFT 0xe
#define CP_INT_STATUS_RING2__WRM_POLL_TIMEOUT_INT_STAT_MASK 0x20000
#define CP_INT_STATUS_RING2__WRM_POLL_TIMEOUT_INT_STAT__SHIFT 0x11
#define CP_INT_STATUS_RING2__CNTX_BUSY_INT_STAT_MASK 0x80000
#define CP_INT_STATUS_RING2__CNTX_BUSY_INT_STAT__SHIFT 0x13
#define CP_INT_STATUS_RING2__CNTX_EMPTY_INT_STAT_MASK 0x100000
#define CP_INT_STATUS_RING2__CNTX_EMPTY_INT_STAT__SHIFT 0x14
#define CP_INT_STATUS_RING2__PRIV_INSTR_INT_STAT_MASK 0x400000
#define CP_INT_STATUS_RING2__PRIV_INSTR_INT_STAT__SHIFT 0x16
#define CP_INT_STATUS_RING2__PRIV_REG_INT_STAT_MASK 0x800000
#define CP_INT_STATUS_RING2__PRIV_REG_INT_STAT__SHIFT 0x17
#define CP_INT_STATUS_RING2__OPCODE_ERROR_INT_STAT_MASK 0x1000000
#define CP_INT_STATUS_RING2__OPCODE_ERROR_INT_STAT__SHIFT 0x18
#define CP_INT_STATUS_RING2__TIME_STAMP_INT_STAT_MASK 0x4000000
#define CP_INT_STATUS_RING2__TIME_STAMP_INT_STAT__SHIFT 0x1a
#define CP_INT_STATUS_RING2__RESERVED_BIT_ERROR_INT_STAT_MASK 0x8000000
#define CP_INT_STATUS_RING2__RESERVED_BIT_ERROR_INT_STAT__SHIFT 0x1b
#define CP_INT_STATUS_RING2__GENERIC2_INT_STAT_MASK 0x20000000
#define CP_INT_STATUS_RING2__GENERIC2_INT_STAT__SHIFT 0x1d
#define CP_INT_STATUS_RING2__GENERIC1_INT_STAT_MASK 0x40000000
#define CP_INT_STATUS_RING2__GENERIC1_INT_STAT__SHIFT 0x1e
#define CP_INT_STATUS_RING2__GENERIC0_INT_STAT_MASK 0x80000000
#define CP_INT_STATUS_RING2__GENERIC0_INT_STAT__SHIFT 0x1f
#define CP_DEVICE_ID__DEVICE_ID_MASK 0xff
#define CP_DEVICE_ID__DEVICE_ID__SHIFT 0x0
#define CP_RING_PRIORITY_CNTS__PRIORITY1_CNT_MASK 0xff
#define CP_RING_PRIORITY_CNTS__PRIORITY1_CNT__SHIFT 0x0
#define CP_RING_PRIORITY_CNTS__PRIORITY2A_CNT_MASK 0xff00
#define CP_RING_PRIORITY_CNTS__PRIORITY2A_CNT__SHIFT 0x8
#define CP_RING_PRIORITY_CNTS__PRIORITY2B_CNT_MASK 0xff0000
#define CP_RING_PRIORITY_CNTS__PRIORITY2B_CNT__SHIFT 0x10
#define CP_RING_PRIORITY_CNTS__PRIORITY3_CNT_MASK 0xff000000
#define CP_RING_PRIORITY_CNTS__PRIORITY3_CNT__SHIFT 0x18
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY1_CNT_MASK 0xff
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY1_CNT__SHIFT 0x0
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY2A_CNT_MASK 0xff00
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY2A_CNT__SHIFT 0x8
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY2B_CNT_MASK 0xff0000
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY2B_CNT__SHIFT 0x10
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY3_CNT_MASK 0xff000000
#define CP_ME0_PIPE_PRIORITY_CNTS__PRIORITY3_CNT__SHIFT 0x18
#define CP_RING0_PRIORITY__PRIORITY_MASK 0x3
#define CP_RING0_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME0_PIPE0_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME0_PIPE0_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_RING1_PRIORITY__PRIORITY_MASK 0x3
#define CP_RING1_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME0_PIPE1_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME0_PIPE1_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_RING2_PRIORITY__PRIORITY_MASK 0x3
#define CP_RING2_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME0_PIPE2_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME0_PIPE2_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ENDIAN_SWAP__ENDIAN_SWAP_MASK 0x3
#define CP_ENDIAN_SWAP__ENDIAN_SWAP__SHIFT 0x0
#define CP_RB_VMID__RB0_VMID_MASK 0xf
#define CP_RB_VMID__RB0_VMID__SHIFT 0x0
#define CP_RB_VMID__RB1_VMID_MASK 0xf00
#define CP_RB_VMID__RB1_VMID__SHIFT 0x8
#define CP_RB_VMID__RB2_VMID_MASK 0xf0000
#define CP_RB_VMID__RB2_VMID__SHIFT 0x10
#define CP_ME0_PIPE0_VMID__VMID_MASK 0xf
#define CP_ME0_PIPE0_VMID__VMID__SHIFT 0x0
#define CP_ME0_PIPE1_VMID__VMID_MASK 0xf
#define CP_ME0_PIPE1_VMID__VMID__SHIFT 0x0
#define CP_PFP_UCODE_ADDR__UCODE_ADDR_MASK 0xfff
#define CP_PFP_UCODE_ADDR__UCODE_ADDR__SHIFT 0x0
#define CP_PFP_UCODE_DATA__UCODE_DATA_MASK 0xffffffff
#define CP_PFP_UCODE_DATA__UCODE_DATA__SHIFT 0x0
#define CP_ME_RAM_RADDR__ME_RAM_RADDR_MASK 0xfff
#define CP_ME_RAM_RADDR__ME_RAM_RADDR__SHIFT 0x0
#define CP_ME_RAM_WADDR__ME_RAM_WADDR_MASK 0xfff
#define CP_ME_RAM_WADDR__ME_RAM_WADDR__SHIFT 0x0
#define CP_ME_RAM_DATA__ME_RAM_DATA_MASK 0xffffffff
#define CP_ME_RAM_DATA__ME_RAM_DATA__SHIFT 0x0
#define CGTT_CPC_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_CPC_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_CPC_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_CPC_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000
#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e
#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000
#define CGTT_CPC_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f
#define CGTT_CPF_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_CPF_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_CPF_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_CPF_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000
#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e
#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000
#define CGTT_CPF_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f
#define CGTT_CP_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_CP_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_CP_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_CP_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000
#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e
#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000
#define CGTT_CP_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f
#define CP_CE_UCODE_ADDR__UCODE_ADDR_MASK 0xfff
#define CP_CE_UCODE_ADDR__UCODE_ADDR__SHIFT 0x0
#define CP_CE_UCODE_DATA__UCODE_DATA_MASK 0xffffffff
#define CP_CE_UCODE_DATA__UCODE_DATA__SHIFT 0x0
#define CP_MEC_ME1_UCODE_ADDR__UCODE_ADDR_MASK 0x1fff
#define CP_MEC_ME1_UCODE_ADDR__UCODE_ADDR__SHIFT 0x0
#define CP_MEC_ME1_UCODE_DATA__UCODE_DATA_MASK 0xffffffff
#define CP_MEC_ME1_UCODE_DATA__UCODE_DATA__SHIFT 0x0
#define CP_MEC_ME2_UCODE_ADDR__UCODE_ADDR_MASK 0x1fff
#define CP_MEC_ME2_UCODE_ADDR__UCODE_ADDR__SHIFT 0x0
#define CP_MEC_ME2_UCODE_DATA__UCODE_DATA_MASK 0xffffffff
#define CP_MEC_ME2_UCODE_DATA__UCODE_DATA__SHIFT 0x0
#define CP_PFP_F32_INTERRUPT__PRIV_REG_INT_MASK 0x2
#define CP_PFP_F32_INTERRUPT__PRIV_REG_INT__SHIFT 0x1
#define CP_MEC1_F32_INTERRUPT__PRIV_REG_INT_MASK 0x2
#define CP_MEC1_F32_INTERRUPT__PRIV_REG_INT__SHIFT 0x1
#define CP_MEC2_F32_INTERRUPT__PRIV_REG_INT_MASK 0x2
#define CP_MEC2_F32_INTERRUPT__PRIV_REG_INT__SHIFT 0x1
#define CP_PWR_CNTL__GFX_CLK_HALT_MASK 0x1
#define CP_PWR_CNTL__GFX_CLK_HALT__SHIFT 0x0
#define CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK 0x1
#define CP_MEM_SLP_CNTL__CP_MEM_LS_EN__SHIFT 0x0
#define CP_MEM_SLP_CNTL__CP_MEM_DS_EN_MASK 0x2
#define CP_MEM_SLP_CNTL__CP_MEM_DS_EN__SHIFT 0x1
#define CP_MEM_SLP_CNTL__RESERVED_MASK 0xfc
#define CP_MEM_SLP_CNTL__RESERVED__SHIFT 0x2
#define CP_MEM_SLP_CNTL__CP_MEM_LS_ON_DELAY_MASK 0xff00
#define CP_MEM_SLP_CNTL__CP_MEM_LS_ON_DELAY__SHIFT 0x8
#define CP_MEM_SLP_CNTL__CP_MEM_LS_OFF_DELAY_MASK 0xff0000
#define CP_MEM_SLP_CNTL__CP_MEM_LS_OFF_DELAY__SHIFT 0x10
#define CP_MEM_SLP_CNTL__RESERVED1_MASK 0xff000000
#define CP_MEM_SLP_CNTL__RESERVED1__SHIFT 0x18
#define CP_ECC_FIRSTOCCURRENCE__INTERFACE_MASK 0x3
#define CP_ECC_FIRSTOCCURRENCE__INTERFACE__SHIFT 0x0
#define CP_ECC_FIRSTOCCURRENCE__REQUEST_CLIENT_MASK 0xf0
#define CP_ECC_FIRSTOCCURRENCE__REQUEST_CLIENT__SHIFT 0x4
#define CP_ECC_FIRSTOCCURRENCE__RING_ID_MASK 0x3c00
#define CP_ECC_FIRSTOCCURRENCE__RING_ID__SHIFT 0xa
#define CP_ECC_FIRSTOCCURRENCE__VMID_MASK 0xf0000
#define CP_ECC_FIRSTOCCURRENCE__VMID__SHIFT 0x10
#define CP_ECC_FIRSTOCCURRENCE_RING0__INTERFACE_MASK 0x3
#define CP_ECC_FIRSTOCCURRENCE_RING0__INTERFACE__SHIFT 0x0
#define CP_ECC_FIRSTOCCURRENCE_RING0__REQUEST_CLIENT_MASK 0xf0
#define CP_ECC_FIRSTOCCURRENCE_RING0__REQUEST_CLIENT__SHIFT 0x4
#define CP_ECC_FIRSTOCCURRENCE_RING0__RING_ID_MASK 0x3c00
#define CP_ECC_FIRSTOCCURRENCE_RING0__RING_ID__SHIFT 0xa
#define CP_ECC_FIRSTOCCURRENCE_RING0__VMID_MASK 0xf0000
#define CP_ECC_FIRSTOCCURRENCE_RING0__VMID__SHIFT 0x10
#define CP_ECC_FIRSTOCCURRENCE_RING1__INTERFACE_MASK 0x3
#define CP_ECC_FIRSTOCCURRENCE_RING1__INTERFACE__SHIFT 0x0
#define CP_ECC_FIRSTOCCURRENCE_RING1__REQUEST_CLIENT_MASK 0xf0
#define CP_ECC_FIRSTOCCURRENCE_RING1__REQUEST_CLIENT__SHIFT 0x4
#define CP_ECC_FIRSTOCCURRENCE_RING1__RING_ID_MASK 0x3c00
#define CP_ECC_FIRSTOCCURRENCE_RING1__RING_ID__SHIFT 0xa
#define CP_ECC_FIRSTOCCURRENCE_RING1__VMID_MASK 0xf0000
#define CP_ECC_FIRSTOCCURRENCE_RING1__VMID__SHIFT 0x10
#define CP_ECC_FIRSTOCCURRENCE_RING2__INTERFACE_MASK 0x3
#define CP_ECC_FIRSTOCCURRENCE_RING2__INTERFACE__SHIFT 0x0
#define CP_ECC_FIRSTOCCURRENCE_RING2__REQUEST_CLIENT_MASK 0xf0
#define CP_ECC_FIRSTOCCURRENCE_RING2__REQUEST_CLIENT__SHIFT 0x4
#define CP_ECC_FIRSTOCCURRENCE_RING2__RING_ID_MASK 0x3c00
#define CP_ECC_FIRSTOCCURRENCE_RING2__RING_ID__SHIFT 0xa
#define CP_ECC_FIRSTOCCURRENCE_RING2__VMID_MASK 0xf0000
#define CP_ECC_FIRSTOCCURRENCE_RING2__VMID__SHIFT 0x10
#define CP_FETCHER_SOURCE__ME_SRC_MASK 0x1
#define CP_FETCHER_SOURCE__ME_SRC__SHIFT 0x0
#define CP_PQ_WPTR_POLL_CNTL__PERIOD_MASK 0xff
#define CP_PQ_WPTR_POLL_CNTL__PERIOD__SHIFT 0x0
#define CP_PQ_WPTR_POLL_CNTL__POLL_ACTIVE_MASK 0x40000000
#define CP_PQ_WPTR_POLL_CNTL__POLL_ACTIVE__SHIFT 0x1e
#define CP_PQ_WPTR_POLL_CNTL__EN_MASK 0x80000000
#define CP_PQ_WPTR_POLL_CNTL__EN__SHIFT 0x1f
#define CP_PQ_WPTR_POLL_CNTL1__QUEUE_MASK_MASK 0xffffffff
#define CP_PQ_WPTR_POLL_CNTL1__QUEUE_MASK__SHIFT 0x0
#define CPC_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CPC_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CPC_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CPC_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CPC_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CPC_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CPC_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CPC_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CPC_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CPC_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CPC_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CPC_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CPC_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CPC_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CPC_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CPC_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CPC_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CPC_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CPC_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CPC_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME1_PIPE0_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME1_PIPE0_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME1_PIPE0_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME1_PIPE0_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME1_PIPE0_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME1_PIPE0_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME1_PIPE0_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME1_PIPE0_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME1_PIPE0_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME1_PIPE0_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME1_PIPE0_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME1_PIPE0_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME1_PIPE0_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME1_PIPE0_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME1_PIPE0_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME1_PIPE0_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME1_PIPE0_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME1_PIPE0_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME1_PIPE0_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME1_PIPE0_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME1_PIPE1_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME1_PIPE1_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME1_PIPE1_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME1_PIPE1_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME1_PIPE1_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME1_PIPE1_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME1_PIPE1_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME1_PIPE1_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME1_PIPE1_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME1_PIPE1_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME1_PIPE1_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME1_PIPE1_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME1_PIPE1_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME1_PIPE1_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME1_PIPE1_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME1_PIPE1_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME1_PIPE1_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME1_PIPE1_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME1_PIPE1_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME1_PIPE1_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME1_PIPE2_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME1_PIPE2_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME1_PIPE2_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME1_PIPE2_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME1_PIPE2_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME1_PIPE2_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME1_PIPE2_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME1_PIPE2_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME1_PIPE2_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME1_PIPE2_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME1_PIPE2_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME1_PIPE2_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME1_PIPE2_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME1_PIPE2_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME1_PIPE2_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME1_PIPE2_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME1_PIPE2_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME1_PIPE2_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME1_PIPE2_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME1_PIPE2_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME1_PIPE3_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME1_PIPE3_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME1_PIPE3_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME1_PIPE3_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME1_PIPE3_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME1_PIPE3_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME1_PIPE3_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME1_PIPE3_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME1_PIPE3_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME1_PIPE3_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME1_PIPE3_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME1_PIPE3_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME1_PIPE3_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME1_PIPE3_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME1_PIPE3_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME1_PIPE3_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME1_PIPE3_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME1_PIPE3_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME1_PIPE3_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME1_PIPE3_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME2_PIPE0_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME2_PIPE0_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME2_PIPE0_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME2_PIPE0_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME2_PIPE0_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME2_PIPE0_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME2_PIPE0_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME2_PIPE0_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME2_PIPE0_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME2_PIPE0_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME2_PIPE0_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME2_PIPE0_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME2_PIPE0_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME2_PIPE0_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME2_PIPE0_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME2_PIPE0_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME2_PIPE0_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME2_PIPE0_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME2_PIPE0_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME2_PIPE0_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME2_PIPE1_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME2_PIPE1_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME2_PIPE1_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME2_PIPE1_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME2_PIPE1_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME2_PIPE1_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME2_PIPE1_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME2_PIPE1_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME2_PIPE1_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME2_PIPE1_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME2_PIPE1_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME2_PIPE1_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME2_PIPE1_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME2_PIPE1_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME2_PIPE1_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME2_PIPE1_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME2_PIPE1_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME2_PIPE1_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME2_PIPE1_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME2_PIPE1_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME2_PIPE2_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME2_PIPE2_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME2_PIPE2_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME2_PIPE2_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME2_PIPE2_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME2_PIPE2_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME2_PIPE2_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME2_PIPE2_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME2_PIPE2_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME2_PIPE2_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME2_PIPE2_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME2_PIPE2_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME2_PIPE2_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME2_PIPE2_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME2_PIPE2_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME2_PIPE2_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME2_PIPE2_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME2_PIPE2_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME2_PIPE2_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME2_PIPE2_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CP_ME2_PIPE3_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE_MASK 0x2000
#define CP_ME2_PIPE3_INT_CNTL__DEQUEUE_REQUEST_INT_ENABLE__SHIFT 0xd
#define CP_ME2_PIPE3_INT_CNTL__CP_ECC_ERROR_INT_ENABLE_MASK 0x4000
#define CP_ME2_PIPE3_INT_CNTL__CP_ECC_ERROR_INT_ENABLE__SHIFT 0xe
#define CP_ME2_PIPE3_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE_MASK 0x20000
#define CP_ME2_PIPE3_INT_CNTL__WRM_POLL_TIMEOUT_INT_ENABLE__SHIFT 0x11
#define CP_ME2_PIPE3_INT_CNTL__PRIV_REG_INT_ENABLE_MASK 0x800000
#define CP_ME2_PIPE3_INT_CNTL__PRIV_REG_INT_ENABLE__SHIFT 0x17
#define CP_ME2_PIPE3_INT_CNTL__OPCODE_ERROR_INT_ENABLE_MASK 0x1000000
#define CP_ME2_PIPE3_INT_CNTL__OPCODE_ERROR_INT_ENABLE__SHIFT 0x18
#define CP_ME2_PIPE3_INT_CNTL__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_ME2_PIPE3_INT_CNTL__TIME_STAMP_INT_ENABLE__SHIFT 0x1a
#define CP_ME2_PIPE3_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE_MASK 0x8000000
#define CP_ME2_PIPE3_INT_CNTL__RESERVED_BIT_ERROR_INT_ENABLE__SHIFT 0x1b
#define CP_ME2_PIPE3_INT_CNTL__GENERIC2_INT_ENABLE_MASK 0x20000000
#define CP_ME2_PIPE3_INT_CNTL__GENERIC2_INT_ENABLE__SHIFT 0x1d
#define CP_ME2_PIPE3_INT_CNTL__GENERIC1_INT_ENABLE_MASK 0x40000000
#define CP_ME2_PIPE3_INT_CNTL__GENERIC1_INT_ENABLE__SHIFT 0x1e
#define CP_ME2_PIPE3_INT_CNTL__GENERIC0_INT_ENABLE_MASK 0x80000000
#define CP_ME2_PIPE3_INT_CNTL__GENERIC0_INT_ENABLE__SHIFT 0x1f
#define CPC_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CPC_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CPC_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CPC_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CPC_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CPC_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CPC_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CPC_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CPC_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CPC_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CPC_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CPC_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CPC_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CPC_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CPC_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CPC_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CPC_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CPC_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CPC_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CPC_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME1_PIPE0_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME1_PIPE0_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME1_PIPE0_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME1_PIPE0_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME1_PIPE0_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME1_PIPE0_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME1_PIPE0_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME1_PIPE0_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME1_PIPE0_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME1_PIPE0_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME1_PIPE0_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME1_PIPE0_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME1_PIPE0_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME1_PIPE0_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME1_PIPE0_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME1_PIPE0_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME1_PIPE0_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME1_PIPE0_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME1_PIPE0_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME1_PIPE0_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME1_PIPE1_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME1_PIPE1_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME1_PIPE1_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME1_PIPE1_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME1_PIPE1_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME1_PIPE1_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME1_PIPE1_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME1_PIPE1_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME1_PIPE1_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME1_PIPE1_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME1_PIPE1_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME1_PIPE1_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME1_PIPE1_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME1_PIPE1_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME1_PIPE1_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME1_PIPE1_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME1_PIPE1_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME1_PIPE1_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME1_PIPE1_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME1_PIPE1_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME1_PIPE2_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME1_PIPE2_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME1_PIPE2_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME1_PIPE2_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME1_PIPE2_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME1_PIPE2_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME1_PIPE2_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME1_PIPE2_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME1_PIPE2_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME1_PIPE2_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME1_PIPE2_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME1_PIPE2_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME1_PIPE2_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME1_PIPE2_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME1_PIPE2_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME1_PIPE2_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME1_PIPE2_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME1_PIPE2_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME1_PIPE2_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME1_PIPE2_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME1_PIPE3_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME1_PIPE3_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME1_PIPE3_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME1_PIPE3_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME1_PIPE3_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME1_PIPE3_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME1_PIPE3_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME1_PIPE3_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME1_PIPE3_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME1_PIPE3_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME1_PIPE3_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME1_PIPE3_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME1_PIPE3_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME1_PIPE3_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME1_PIPE3_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME1_PIPE3_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME1_PIPE3_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME1_PIPE3_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME1_PIPE3_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME1_PIPE3_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME2_PIPE0_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME2_PIPE0_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME2_PIPE0_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME2_PIPE0_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME2_PIPE0_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME2_PIPE0_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME2_PIPE0_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME2_PIPE0_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME2_PIPE0_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME2_PIPE0_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME2_PIPE0_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME2_PIPE0_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME2_PIPE0_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME2_PIPE0_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME2_PIPE0_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME2_PIPE0_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME2_PIPE0_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME2_PIPE0_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME2_PIPE0_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME2_PIPE0_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME2_PIPE1_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME2_PIPE1_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME2_PIPE1_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME2_PIPE1_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME2_PIPE1_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME2_PIPE1_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME2_PIPE1_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME2_PIPE1_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME2_PIPE1_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME2_PIPE1_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME2_PIPE1_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME2_PIPE1_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME2_PIPE1_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME2_PIPE1_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME2_PIPE1_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME2_PIPE1_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME2_PIPE1_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME2_PIPE1_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME2_PIPE1_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME2_PIPE1_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME2_PIPE2_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME2_PIPE2_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME2_PIPE2_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME2_PIPE2_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME2_PIPE2_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME2_PIPE2_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME2_PIPE2_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME2_PIPE2_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME2_PIPE2_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME2_PIPE2_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME2_PIPE2_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME2_PIPE2_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME2_PIPE2_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME2_PIPE2_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME2_PIPE2_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME2_PIPE2_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME2_PIPE2_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME2_PIPE2_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME2_PIPE2_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME2_PIPE2_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME2_PIPE3_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS_MASK 0x2000
#define CP_ME2_PIPE3_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd
#define CP_ME2_PIPE3_INT_STATUS__CP_ECC_ERROR_INT_STATUS_MASK 0x4000
#define CP_ME2_PIPE3_INT_STATUS__CP_ECC_ERROR_INT_STATUS__SHIFT 0xe
#define CP_ME2_PIPE3_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS_MASK 0x20000
#define CP_ME2_PIPE3_INT_STATUS__WRM_POLL_TIMEOUT_INT_STATUS__SHIFT 0x11
#define CP_ME2_PIPE3_INT_STATUS__PRIV_REG_INT_STATUS_MASK 0x800000
#define CP_ME2_PIPE3_INT_STATUS__PRIV_REG_INT_STATUS__SHIFT 0x17
#define CP_ME2_PIPE3_INT_STATUS__OPCODE_ERROR_INT_STATUS_MASK 0x1000000
#define CP_ME2_PIPE3_INT_STATUS__OPCODE_ERROR_INT_STATUS__SHIFT 0x18
#define CP_ME2_PIPE3_INT_STATUS__TIME_STAMP_INT_STATUS_MASK 0x4000000
#define CP_ME2_PIPE3_INT_STATUS__TIME_STAMP_INT_STATUS__SHIFT 0x1a
#define CP_ME2_PIPE3_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS_MASK 0x8000000
#define CP_ME2_PIPE3_INT_STATUS__RESERVED_BIT_ERROR_INT_STATUS__SHIFT 0x1b
#define CP_ME2_PIPE3_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000
#define CP_ME2_PIPE3_INT_STATUS__GENERIC2_INT_STATUS__SHIFT 0x1d
#define CP_ME2_PIPE3_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000
#define CP_ME2_PIPE3_INT_STATUS__GENERIC1_INT_STATUS__SHIFT 0x1e
#define CP_ME2_PIPE3_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000
#define CP_ME2_PIPE3_INT_STATUS__GENERIC0_INT_STATUS__SHIFT 0x1f
#define CP_ME1_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED_MASK 0x2000
#define CP_ME1_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED__SHIFT 0xd
#define CP_ME1_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED_MASK 0x4000
#define CP_ME1_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED__SHIFT 0xe
#define CP_ME1_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED_MASK 0x20000
#define CP_ME1_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED__SHIFT 0x11
#define CP_ME1_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED_MASK 0x800000
#define CP_ME1_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED__SHIFT 0x17
#define CP_ME1_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED_MASK 0x1000000
#define CP_ME1_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED__SHIFT 0x18
#define CP_ME1_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED_MASK 0x4000000
#define CP_ME1_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED__SHIFT 0x1a
#define CP_ME1_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED_MASK 0x8000000
#define CP_ME1_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED__SHIFT 0x1b
#define CP_ME1_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED_MASK 0x20000000
#define CP_ME1_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED__SHIFT 0x1d
#define CP_ME1_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED_MASK 0x40000000
#define CP_ME1_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED__SHIFT 0x1e
#define CP_ME1_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED_MASK 0x80000000
#define CP_ME1_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED__SHIFT 0x1f
#define CP_ME2_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED_MASK 0x2000
#define CP_ME2_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED__SHIFT 0xd
#define CP_ME2_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED_MASK 0x4000
#define CP_ME2_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED__SHIFT 0xe
#define CP_ME2_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED_MASK 0x20000
#define CP_ME2_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED__SHIFT 0x11
#define CP_ME2_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED_MASK 0x800000
#define CP_ME2_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED__SHIFT 0x17
#define CP_ME2_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED_MASK 0x1000000
#define CP_ME2_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED__SHIFT 0x18
#define CP_ME2_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED_MASK 0x4000000
#define CP_ME2_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED__SHIFT 0x1a
#define CP_ME2_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED_MASK 0x8000000
#define CP_ME2_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED__SHIFT 0x1b
#define CP_ME2_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED_MASK 0x20000000
#define CP_ME2_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED__SHIFT 0x1d
#define CP_ME2_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED_MASK 0x40000000
#define CP_ME2_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED__SHIFT 0x1e
#define CP_ME2_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED_MASK 0x80000000
#define CP_ME2_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED__SHIFT 0x1f
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY1_CNT_MASK 0xff
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY1_CNT__SHIFT 0x0
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY2A_CNT_MASK 0xff00
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY2A_CNT__SHIFT 0x8
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY2B_CNT_MASK 0xff0000
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY2B_CNT__SHIFT 0x10
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY3_CNT_MASK 0xff000000
#define CP_ME1_PIPE_PRIORITY_CNTS__PRIORITY3_CNT__SHIFT 0x18
#define CP_ME1_PIPE0_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME1_PIPE0_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME1_PIPE1_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME1_PIPE1_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME1_PIPE2_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME1_PIPE2_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME1_PIPE3_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME1_PIPE3_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY1_CNT_MASK 0xff
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY1_CNT__SHIFT 0x0
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY2A_CNT_MASK 0xff00
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY2A_CNT__SHIFT 0x8
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY2B_CNT_MASK 0xff0000
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY2B_CNT__SHIFT 0x10
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY3_CNT_MASK 0xff000000
#define CP_ME2_PIPE_PRIORITY_CNTS__PRIORITY3_CNT__SHIFT 0x18
#define CP_ME2_PIPE0_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME2_PIPE0_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME2_PIPE1_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME2_PIPE1_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME2_PIPE2_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME2_PIPE2_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_ME2_PIPE3_PRIORITY__PRIORITY_MASK 0x3
#define CP_ME2_PIPE3_PRIORITY__PRIORITY__SHIFT 0x0
#define CP_CE_PRGRM_CNTR_START__IP_START_MASK 0x7ff
#define CP_CE_PRGRM_CNTR_START__IP_START__SHIFT 0x0
#define CP_PFP_PRGRM_CNTR_START__IP_START_MASK 0x7ff
#define CP_PFP_PRGRM_CNTR_START__IP_START__SHIFT 0x0
#define CP_ME_PRGRM_CNTR_START__IP_START_MASK 0x7ff
#define CP_ME_PRGRM_CNTR_START__IP_START__SHIFT 0x0
#define CP_MEC1_PRGRM_CNTR_START__IP_START_MASK 0xfff
#define CP_MEC1_PRGRM_CNTR_START__IP_START__SHIFT 0x0
#define CP_MEC2_PRGRM_CNTR_START__IP_START_MASK 0xfff
#define CP_MEC2_PRGRM_CNTR_START__IP_START__SHIFT 0x0
#define CP_CE_INTR_ROUTINE_START__IR_START_MASK 0x7ff
#define CP_CE_INTR_ROUTINE_START__IR_START__SHIFT 0x0
#define CP_PFP_INTR_ROUTINE_START__IR_START_MASK 0x7ff
#define CP_PFP_INTR_ROUTINE_START__IR_START__SHIFT 0x0
#define CP_ME_INTR_ROUTINE_START__IR_START_MASK 0x7ff
#define CP_ME_INTR_ROUTINE_START__IR_START__SHIFT 0x0
#define CP_MEC1_INTR_ROUTINE_START__IR_START_MASK 0xfff
#define CP_MEC1_INTR_ROUTINE_START__IR_START__SHIFT 0x0
#define CP_MEC2_INTR_ROUTINE_START__IR_START_MASK 0xfff
#define CP_MEC2_INTR_ROUTINE_START__IR_START__SHIFT 0x0
#define CP_CONTEXT_CNTL__ME0PIPE0_MAX_WD_CNTX_MASK 0x7
#define CP_CONTEXT_CNTL__ME0PIPE0_MAX_WD_CNTX__SHIFT 0x0
#define CP_CONTEXT_CNTL__ME0PIPE0_MAX_PIPE_CNTX_MASK 0x70
#define CP_CONTEXT_CNTL__ME0PIPE0_MAX_PIPE_CNTX__SHIFT 0x4
#define CP_CONTEXT_CNTL__ME0PIPE1_MAX_WD_CNTX_MASK 0x70000
#define CP_CONTEXT_CNTL__ME0PIPE1_MAX_WD_CNTX__SHIFT 0x10
#define CP_CONTEXT_CNTL__ME0PIPE1_MAX_PIPE_CNTX_MASK 0x700000
#define CP_CONTEXT_CNTL__ME0PIPE1_MAX_PIPE_CNTX__SHIFT 0x14
#define CP_MAX_CONTEXT__MAX_CONTEXT_MASK 0x7
#define CP_MAX_CONTEXT__MAX_CONTEXT__SHIFT 0x0
#define CP_IQ_WAIT_TIME1__IB_OFFLOAD_MASK 0xff
#define CP_IQ_WAIT_TIME1__IB_OFFLOAD__SHIFT 0x0
#define CP_IQ_WAIT_TIME1__ATOMIC_OFFLOAD_MASK 0xff00
#define CP_IQ_WAIT_TIME1__ATOMIC_OFFLOAD__SHIFT 0x8
#define CP_IQ_WAIT_TIME1__WRM_OFFLOAD_MASK 0xff0000
#define CP_IQ_WAIT_TIME1__WRM_OFFLOAD__SHIFT 0x10
#define CP_IQ_WAIT_TIME1__GWS_MASK 0xff000000
#define CP_IQ_WAIT_TIME1__GWS__SHIFT 0x18
#define CP_IQ_WAIT_TIME2__QUE_SLEEP_MASK 0xff
#define CP_IQ_WAIT_TIME2__QUE_SLEEP__SHIFT 0x0
#define CP_IQ_WAIT_TIME2__SCH_WAVE_MASK 0xff00
#define CP_IQ_WAIT_TIME2__SCH_WAVE__SHIFT 0x8
#define CP_IQ_WAIT_TIME2__SEM_REARM_MASK 0xff0000
#define CP_IQ_WAIT_TIME2__SEM_REARM__SHIFT 0x10
#define CP_IQ_WAIT_TIME2__DEQ_RETRY_MASK 0xff000000
#define CP_IQ_WAIT_TIME2__DEQ_RETRY__SHIFT 0x18
#define CP_VMID_RESET__RESET_REQUEST_MASK 0xffff
#define CP_VMID_RESET__RESET_REQUEST__SHIFT 0x0
#define CP_VMID_RESET__RESET_STATUS_MASK 0xffff0000
#define CP_VMID_RESET__RESET_STATUS__SHIFT 0x10
#define CP_VMID_PREEMPT__PREEMPT_REQUEST_MASK 0xffff
#define CP_VMID_PREEMPT__PREEMPT_REQUEST__SHIFT 0x0
#define CP_VMID_PREEMPT__PREEMPT_STATUS_MASK 0xffff0000
#define CP_VMID_PREEMPT__PREEMPT_STATUS__SHIFT 0x10
#define CPC_INT_CNTX_ID__CNTX_ID_MASK 0xffff
#define CPC_INT_CNTX_ID__CNTX_ID__SHIFT 0x0
#define CP_PQ_STATUS__DOORBELL_UPDATED_MASK 0x1
#define CP_PQ_STATUS__DOORBELL_UPDATED__SHIFT 0x0
#define CP_PQ_STATUS__DOORBELL_ENABLE_MASK 0x2
#define CP_PQ_STATUS__DOORBELL_ENABLE__SHIFT 0x1
#define CP_CPC_STATUS__MEC1_BUSY_MASK 0x1
#define CP_CPC_STATUS__MEC1_BUSY__SHIFT 0x0
#define CP_CPC_STATUS__MEC2_BUSY_MASK 0x2
#define CP_CPC_STATUS__MEC2_BUSY__SHIFT 0x1
#define CP_CPC_STATUS__DC0_BUSY_MASK 0x4
#define CP_CPC_STATUS__DC0_BUSY__SHIFT 0x2
#define CP_CPC_STATUS__DC1_BUSY_MASK 0x8
#define CP_CPC_STATUS__DC1_BUSY__SHIFT 0x3
#define CP_CPC_STATUS__RCIU1_BUSY_MASK 0x10
#define CP_CPC_STATUS__RCIU1_BUSY__SHIFT 0x4
#define CP_CPC_STATUS__RCIU2_BUSY_MASK 0x20
#define CP_CPC_STATUS__RCIU2_BUSY__SHIFT 0x5
#define CP_CPC_STATUS__ROQ1_BUSY_MASK 0x40
#define CP_CPC_STATUS__ROQ1_BUSY__SHIFT 0x6
#define CP_CPC_STATUS__ROQ2_BUSY_MASK 0x80
#define CP_CPC_STATUS__ROQ2_BUSY__SHIFT 0x7
#define CP_CPC_STATUS__MIU_RDREQ_BUSY_MASK 0x100
#define CP_CPC_STATUS__MIU_RDREQ_BUSY__SHIFT 0x8
#define CP_CPC_STATUS__MIU_WRREQ_BUSY_MASK 0x200
#define CP_CPC_STATUS__MIU_WRREQ_BUSY__SHIFT 0x9
#define CP_CPC_STATUS__TCIU_BUSY_MASK 0x400
#define CP_CPC_STATUS__TCIU_BUSY__SHIFT 0xa
#define CP_CPC_STATUS__SCRATCH_RAM_BUSY_MASK 0x800
#define CP_CPC_STATUS__SCRATCH_RAM_BUSY__SHIFT 0xb
#define CP_CPC_STATUS__QU_BUSY_MASK 0x1000
#define CP_CPC_STATUS__QU_BUSY__SHIFT 0xc
#define CP_CPC_STATUS__CPG_CPC_BUSY_MASK 0x20000000
#define CP_CPC_STATUS__CPG_CPC_BUSY__SHIFT 0x1d
#define CP_CPC_STATUS__CPF_CPC_BUSY_MASK 0x40000000
#define CP_CPC_STATUS__CPF_CPC_BUSY__SHIFT 0x1e
#define CP_CPC_STATUS__CPC_BUSY_MASK 0x80000000
#define CP_CPC_STATUS__CPC_BUSY__SHIFT 0x1f
#define CP_CPC_BUSY_STAT__MEC1_LOAD_BUSY_MASK 0x1
#define CP_CPC_BUSY_STAT__MEC1_LOAD_BUSY__SHIFT 0x0
#define CP_CPC_BUSY_STAT__MEC1_SEMAPOHRE_BUSY_MASK 0x2
#define CP_CPC_BUSY_STAT__MEC1_SEMAPOHRE_BUSY__SHIFT 0x1
#define CP_CPC_BUSY_STAT__MEC1_MUTEX_BUSY_MASK 0x4
#define CP_CPC_BUSY_STAT__MEC1_MUTEX_BUSY__SHIFT 0x2
#define CP_CPC_BUSY_STAT__MEC1_MESSAGE_BUSY_MASK 0x8
#define CP_CPC_BUSY_STAT__MEC1_MESSAGE_BUSY__SHIFT 0x3
#define CP_CPC_BUSY_STAT__MEC1_EOP_QUEUE_BUSY_MASK 0x10
#define CP_CPC_BUSY_STAT__MEC1_EOP_QUEUE_BUSY__SHIFT 0x4
#define CP_CPC_BUSY_STAT__MEC1_IQ_QUEUE_BUSY_MASK 0x20
#define CP_CPC_BUSY_STAT__MEC1_IQ_QUEUE_BUSY__SHIFT 0x5
#define CP_CPC_BUSY_STAT__MEC1_IB_QUEUE_BUSY_MASK 0x40
#define CP_CPC_BUSY_STAT__MEC1_IB_QUEUE_BUSY__SHIFT 0x6
#define CP_CPC_BUSY_STAT__MEC1_TC_BUSY_MASK 0x80
#define CP_CPC_BUSY_STAT__MEC1_TC_BUSY__SHIFT 0x7
#define CP_CPC_BUSY_STAT__MEC1_DMA_BUSY_MASK 0x100
#define CP_CPC_BUSY_STAT__MEC1_DMA_BUSY__SHIFT 0x8
#define CP_CPC_BUSY_STAT__MEC1_PARTIAL_FLUSH_BUSY_MASK 0x200
#define CP_CPC_BUSY_STAT__MEC1_PARTIAL_FLUSH_BUSY__SHIFT 0x9
#define CP_CPC_BUSY_STAT__MEC1_PIPE0_BUSY_MASK 0x400
#define CP_CPC_BUSY_STAT__MEC1_PIPE0_BUSY__SHIFT 0xa
#define CP_CPC_BUSY_STAT__MEC1_PIPE1_BUSY_MASK 0x800
#define CP_CPC_BUSY_STAT__MEC1_PIPE1_BUSY__SHIFT 0xb
#define CP_CPC_BUSY_STAT__MEC1_PIPE2_BUSY_MASK 0x1000
#define CP_CPC_BUSY_STAT__MEC1_PIPE2_BUSY__SHIFT 0xc
#define CP_CPC_BUSY_STAT__MEC1_PIPE3_BUSY_MASK 0x2000
#define CP_CPC_BUSY_STAT__MEC1_PIPE3_BUSY__SHIFT 0xd
#define CP_CPC_BUSY_STAT__MEC2_LOAD_BUSY_MASK 0x10000
#define CP_CPC_BUSY_STAT__MEC2_LOAD_BUSY__SHIFT 0x10
#define CP_CPC_BUSY_STAT__MEC2_SEMAPOHRE_BUSY_MASK 0x20000
#define CP_CPC_BUSY_STAT__MEC2_SEMAPOHRE_BUSY__SHIFT 0x11
#define CP_CPC_BUSY_STAT__MEC2_MUTEX_BUSY_MASK 0x40000
#define CP_CPC_BUSY_STAT__MEC2_MUTEX_BUSY__SHIFT 0x12
#define CP_CPC_BUSY_STAT__MEC2_MESSAGE_BUSY_MASK 0x80000
#define CP_CPC_BUSY_STAT__MEC2_MESSAGE_BUSY__SHIFT 0x13
#define CP_CPC_BUSY_STAT__MEC2_EOP_QUEUE_BUSY_MASK 0x100000
#define CP_CPC_BUSY_STAT__MEC2_EOP_QUEUE_BUSY__SHIFT 0x14
#define CP_CPC_BUSY_STAT__MEC2_IQ_QUEUE_BUSY_MASK 0x200000
#define CP_CPC_BUSY_STAT__MEC2_IQ_QUEUE_BUSY__SHIFT 0x15
#define CP_CPC_BUSY_STAT__MEC2_IB_QUEUE_BUSY_MASK 0x400000
#define CP_CPC_BUSY_STAT__MEC2_IB_QUEUE_BUSY__SHIFT 0x16
#define CP_CPC_BUSY_STAT__MEC2_TC_BUSY_MASK 0x800000
#define CP_CPC_BUSY_STAT__MEC2_TC_BUSY__SHIFT 0x17
#define CP_CPC_BUSY_STAT__MEC2_DMA_BUSY_MASK 0x1000000
#define CP_CPC_BUSY_STAT__MEC2_DMA_BUSY__SHIFT 0x18
#define CP_CPC_BUSY_STAT__MEC2_PARTIAL_FLUSH_BUSY_MASK 0x2000000
#define CP_CPC_BUSY_STAT__MEC2_PARTIAL_FLUSH_BUSY__SHIFT 0x19
#define CP_CPC_BUSY_STAT__MEC2_PIPE0_BUSY_MASK 0x4000000
#define CP_CPC_BUSY_STAT__MEC2_PIPE0_BUSY__SHIFT 0x1a
#define CP_CPC_BUSY_STAT__MEC2_PIPE1_BUSY_MASK 0x8000000
#define CP_CPC_BUSY_STAT__MEC2_PIPE1_BUSY__SHIFT 0x1b
#define CP_CPC_BUSY_STAT__MEC2_PIPE2_BUSY_MASK 0x10000000
#define CP_CPC_BUSY_STAT__MEC2_PIPE2_BUSY__SHIFT 0x1c
#define CP_CPC_BUSY_STAT__MEC2_PIPE3_BUSY_MASK 0x20000000
#define CP_CPC_BUSY_STAT__MEC2_PIPE3_BUSY__SHIFT 0x1d
#define CP_CPC_STALLED_STAT1__MIU_RDREQ_FREE_STALL_MASK 0x1
#define CP_CPC_STALLED_STAT1__MIU_RDREQ_FREE_STALL__SHIFT 0x0
#define CP_CPC_STALLED_STAT1__MIU_WRREQ_FREE_STALL_MASK 0x2
#define CP_CPC_STALLED_STAT1__MIU_WRREQ_FREE_STALL__SHIFT 0x1
#define CP_CPC_STALLED_STAT1__RCIU_TX_FREE_STALL_MASK 0x8
#define CP_CPC_STALLED_STAT1__RCIU_TX_FREE_STALL__SHIFT 0x3
#define CP_CPC_STALLED_STAT1__RCIU_PRIV_VIOLATION_MASK 0x10
#define CP_CPC_STALLED_STAT1__RCIU_PRIV_VIOLATION__SHIFT 0x4
#define CP_CPC_STALLED_STAT1__TCIU_TX_FREE_STALL_MASK 0x40
#define CP_CPC_STALLED_STAT1__TCIU_TX_FREE_STALL__SHIFT 0x6
#define CP_CPC_STALLED_STAT1__MEC1_DECODING_PACKET_MASK 0x100
#define CP_CPC_STALLED_STAT1__MEC1_DECODING_PACKET__SHIFT 0x8
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_RCIU_MASK 0x200
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_RCIU__SHIFT 0x9
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_RCIU_READ_MASK 0x400
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_RCIU_READ__SHIFT 0xa
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_MC_READ_MASK 0x800
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_MC_READ__SHIFT 0xb
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_MC_WR_ACK_MASK 0x1000
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_MC_WR_ACK__SHIFT 0xc
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_ROQ_DATA_MASK 0x2000
#define CP_CPC_STALLED_STAT1__MEC1_WAIT_ON_ROQ_DATA__SHIFT 0xd
#define CP_CPC_STALLED_STAT1__MEC2_DECODING_PACKET_MASK 0x10000
#define CP_CPC_STALLED_STAT1__MEC2_DECODING_PACKET__SHIFT 0x10
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_RCIU_MASK 0x20000
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_RCIU__SHIFT 0x11
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_RCIU_READ_MASK 0x40000
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_RCIU_READ__SHIFT 0x12
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_MC_READ_MASK 0x80000
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_MC_READ__SHIFT 0x13
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_MC_WR_ACK_MASK 0x100000
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_MC_WR_ACK__SHIFT 0x14
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_ROQ_DATA_MASK 0x200000
#define CP_CPC_STALLED_STAT1__MEC2_WAIT_ON_ROQ_DATA__SHIFT 0x15
#define CP_CPF_STATUS__POST_WPTR_GFX_BUSY_MASK 0x1
#define CP_CPF_STATUS__POST_WPTR_GFX_BUSY__SHIFT 0x0
#define CP_CPF_STATUS__CSF_BUSY_MASK 0x2
#define CP_CPF_STATUS__CSF_BUSY__SHIFT 0x1
#define CP_CPF_STATUS__MIU_RDREQ_BUSY_MASK 0x4
#define CP_CPF_STATUS__MIU_RDREQ_BUSY__SHIFT 0x2
#define CP_CPF_STATUS__MIU_WRREQ_BUSY_MASK 0x8
#define CP_CPF_STATUS__MIU_WRREQ_BUSY__SHIFT 0x3
#define CP_CPF_STATUS__ROQ_ALIGN_BUSY_MASK 0x10
#define CP_CPF_STATUS__ROQ_ALIGN_BUSY__SHIFT 0x4
#define CP_CPF_STATUS__ROQ_RING_BUSY_MASK 0x20
#define CP_CPF_STATUS__ROQ_RING_BUSY__SHIFT 0x5
#define CP_CPF_STATUS__ROQ_INDIRECT1_BUSY_MASK 0x40
#define CP_CPF_STATUS__ROQ_INDIRECT1_BUSY__SHIFT 0x6
#define CP_CPF_STATUS__ROQ_INDIRECT2_BUSY_MASK 0x80
#define CP_CPF_STATUS__ROQ_INDIRECT2_BUSY__SHIFT 0x7
#define CP_CPF_STATUS__ROQ_STATE_BUSY_MASK 0x100
#define CP_CPF_STATUS__ROQ_STATE_BUSY__SHIFT 0x8
#define CP_CPF_STATUS__ROQ_CE_RING_BUSY_MASK 0x200
#define CP_CPF_STATUS__ROQ_CE_RING_BUSY__SHIFT 0x9
#define CP_CPF_STATUS__ROQ_CE_INDIRECT1_BUSY_MASK 0x400
#define CP_CPF_STATUS__ROQ_CE_INDIRECT1_BUSY__SHIFT 0xa
#define CP_CPF_STATUS__ROQ_CE_INDIRECT2_BUSY_MASK 0x800
#define CP_CPF_STATUS__ROQ_CE_INDIRECT2_BUSY__SHIFT 0xb
#define CP_CPF_STATUS__SEMAPHORE_BUSY_MASK 0x1000
#define CP_CPF_STATUS__SEMAPHORE_BUSY__SHIFT 0xc
#define CP_CPF_STATUS__INTERRUPT_BUSY_MASK 0x2000
#define CP_CPF_STATUS__INTERRUPT_BUSY__SHIFT 0xd
#define CP_CPF_STATUS__TCIU_BUSY_MASK 0x4000
#define CP_CPF_STATUS__TCIU_BUSY__SHIFT 0xe
#define CP_CPF_STATUS__HQD_BUSY_MASK 0x8000
#define CP_CPF_STATUS__HQD_BUSY__SHIFT 0xf
#define CP_CPF_STATUS__CPC_CPF_BUSY_MASK 0x40000000
#define CP_CPF_STATUS__CPC_CPF_BUSY__SHIFT 0x1e
#define CP_CPF_STATUS__CPF_BUSY_MASK 0x80000000
#define CP_CPF_STATUS__CPF_BUSY__SHIFT 0x1f
#define CP_CPF_BUSY_STAT__REG_BUS_FIFO_BUSY_MASK 0x1
#define CP_CPF_BUSY_STAT__REG_BUS_FIFO_BUSY__SHIFT 0x0
#define CP_CPF_BUSY_STAT__CSF_RING_BUSY_MASK 0x2
#define CP_CPF_BUSY_STAT__CSF_RING_BUSY__SHIFT 0x1
#define CP_CPF_BUSY_STAT__CSF_INDIRECT1_BUSY_MASK 0x4
#define CP_CPF_BUSY_STAT__CSF_INDIRECT1_BUSY__SHIFT 0x2
#define CP_CPF_BUSY_STAT__CSF_INDIRECT2_BUSY_MASK 0x8
#define CP_CPF_BUSY_STAT__CSF_INDIRECT2_BUSY__SHIFT 0x3
#define CP_CPF_BUSY_STAT__CSF_STATE_BUSY_MASK 0x10
#define CP_CPF_BUSY_STAT__CSF_STATE_BUSY__SHIFT 0x4
#define CP_CPF_BUSY_STAT__CSF_CE_INDR1_BUSY_MASK 0x20
#define CP_CPF_BUSY_STAT__CSF_CE_INDR1_BUSY__SHIFT 0x5
#define CP_CPF_BUSY_STAT__CSF_CE_INDR2_BUSY_MASK 0x40
#define CP_CPF_BUSY_STAT__CSF_CE_INDR2_BUSY__SHIFT 0x6
#define CP_CPF_BUSY_STAT__CSF_ARBITER_BUSY_MASK 0x80
#define CP_CPF_BUSY_STAT__CSF_ARBITER_BUSY__SHIFT 0x7
#define CP_CPF_BUSY_STAT__CSF_INPUT_BUSY_MASK 0x100
#define CP_CPF_BUSY_STAT__CSF_INPUT_BUSY__SHIFT 0x8
#define CP_CPF_BUSY_STAT__OUTSTANDING_READ_TAGS_MASK 0x200
#define CP_CPF_BUSY_STAT__OUTSTANDING_READ_TAGS__SHIFT 0x9
#define CP_CPF_BUSY_STAT__HPD_PROCESSING_EOP_BUSY_MASK 0x800
#define CP_CPF_BUSY_STAT__HPD_PROCESSING_EOP_BUSY__SHIFT 0xb
#define CP_CPF_BUSY_STAT__HQD_DISPATCH_BUSY_MASK 0x1000
#define CP_CPF_BUSY_STAT__HQD_DISPATCH_BUSY__SHIFT 0xc
#define CP_CPF_BUSY_STAT__HQD_IQ_TIMER_BUSY_MASK 0x2000
#define CP_CPF_BUSY_STAT__HQD_IQ_TIMER_BUSY__SHIFT 0xd
#define CP_CPF_BUSY_STAT__HQD_DMA_OFFLOAD_BUSY_MASK 0x4000
#define CP_CPF_BUSY_STAT__HQD_DMA_OFFLOAD_BUSY__SHIFT 0xe
#define CP_CPF_BUSY_STAT__HQD_WAIT_SEMAPHORE_BUSY_MASK 0x8000
#define CP_CPF_BUSY_STAT__HQD_WAIT_SEMAPHORE_BUSY__SHIFT 0xf
#define CP_CPF_BUSY_STAT__HQD_SIGNAL_SEMAPHORE_BUSY_MASK 0x10000
#define CP_CPF_BUSY_STAT__HQD_SIGNAL_SEMAPHORE_BUSY__SHIFT 0x10
#define CP_CPF_BUSY_STAT__HQD_MESSAGE_BUSY_MASK 0x20000
#define CP_CPF_BUSY_STAT__HQD_MESSAGE_BUSY__SHIFT 0x11
#define CP_CPF_BUSY_STAT__HQD_PQ_FETCHER_BUSY_MASK 0x40000
#define CP_CPF_BUSY_STAT__HQD_PQ_FETCHER_BUSY__SHIFT 0x12
#define CP_CPF_BUSY_STAT__HQD_IB_FETCHER_BUSY_MASK 0x80000
#define CP_CPF_BUSY_STAT__HQD_IB_FETCHER_BUSY__SHIFT 0x13
#define CP_CPF_BUSY_STAT__HQD_IQ_FETCHER_BUSY_MASK 0x100000
#define CP_CPF_BUSY_STAT__HQD_IQ_FETCHER_BUSY__SHIFT 0x14
#define CP_CPF_BUSY_STAT__HQD_EOP_FETCHER_BUSY_MASK 0x200000
#define CP_CPF_BUSY_STAT__HQD_EOP_FETCHER_BUSY__SHIFT 0x15
#define CP_CPF_BUSY_STAT__HQD_CONSUMED_RPTR_BUSY_MASK 0x400000
#define CP_CPF_BUSY_STAT__HQD_CONSUMED_RPTR_BUSY__SHIFT 0x16
#define CP_CPF_BUSY_STAT__HQD_FETCHER_ARB_BUSY_MASK 0x800000
#define CP_CPF_BUSY_STAT__HQD_FETCHER_ARB_BUSY__SHIFT 0x17
#define CP_CPF_BUSY_STAT__HQD_ROQ_ALIGN_BUSY_MASK 0x1000000
#define CP_CPF_BUSY_STAT__HQD_ROQ_ALIGN_BUSY__SHIFT 0x18
#define CP_CPF_BUSY_STAT__HQD_ROQ_EOP_BUSY_MASK 0x2000000
#define CP_CPF_BUSY_STAT__HQD_ROQ_EOP_BUSY__SHIFT 0x19
#define CP_CPF_BUSY_STAT__HQD_ROQ_IQ_BUSY_MASK 0x4000000
#define CP_CPF_BUSY_STAT__HQD_ROQ_IQ_BUSY__SHIFT 0x1a
#define CP_CPF_BUSY_STAT__HQD_ROQ_PQ_BUSY_MASK 0x8000000
#define CP_CPF_BUSY_STAT__HQD_ROQ_PQ_BUSY__SHIFT 0x1b
#define CP_CPF_BUSY_STAT__HQD_ROQ_IB_BUSY_MASK 0x10000000
#define CP_CPF_BUSY_STAT__HQD_ROQ_IB_BUSY__SHIFT 0x1c
#define CP_CPF_BUSY_STAT__HQD_WPTR_POLL_BUSY_MASK 0x20000000
#define CP_CPF_BUSY_STAT__HQD_WPTR_POLL_BUSY__SHIFT 0x1d
#define CP_CPF_BUSY_STAT__HQD_PQ_BUSY_MASK 0x40000000
#define CP_CPF_BUSY_STAT__HQD_PQ_BUSY__SHIFT 0x1e
#define CP_CPF_BUSY_STAT__HQD_IB_BUSY_MASK 0x80000000
#define CP_CPF_BUSY_STAT__HQD_IB_BUSY__SHIFT 0x1f
#define CP_CPF_STALLED_STAT1__RING_FETCHING_DATA_MASK 0x1
#define CP_CPF_STALLED_STAT1__RING_FETCHING_DATA__SHIFT 0x0
#define CP_CPF_STALLED_STAT1__INDR1_FETCHING_DATA_MASK 0x2
#define CP_CPF_STALLED_STAT1__INDR1_FETCHING_DATA__SHIFT 0x1
#define CP_CPF_STALLED_STAT1__INDR2_FETCHING_DATA_MASK 0x4
#define CP_CPF_STALLED_STAT1__INDR2_FETCHING_DATA__SHIFT 0x2
#define CP_CPF_STALLED_STAT1__STATE_FETCHING_DATA_MASK 0x8
#define CP_CPF_STALLED_STAT1__STATE_FETCHING_DATA__SHIFT 0x3
#define CP_CPF_STALLED_STAT1__MIU_WAITING_ON_RDREQ_FREE_MASK 0x10
#define CP_CPF_STALLED_STAT1__MIU_WAITING_ON_RDREQ_FREE__SHIFT 0x4
#define CP_CPF_STALLED_STAT1__TCIU_WAITING_ON_FREE_MASK 0x20
#define CP_CPF_STALLED_STAT1__TCIU_WAITING_ON_FREE__SHIFT 0x5
#define CP_CPF_STALLED_STAT1__TCIU_WAITING_ON_TAGS_MASK 0x40
#define CP_CPF_STALLED_STAT1__TCIU_WAITING_ON_TAGS__SHIFT 0x6
#define CP_CPC_MC_CNTL__PACK_DELAY_CNT_MASK 0x1f
#define CP_CPC_MC_CNTL__PACK_DELAY_CNT__SHIFT 0x0
#define CP_CPC_GRBM_FREE_COUNT__FREE_COUNT_MASK 0x3f
#define CP_CPC_GRBM_FREE_COUNT__FREE_COUNT__SHIFT 0x0
#define CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK 0x10
#define CP_MEC_CNTL__MEC_INVALIDATE_ICACHE__SHIFT 0x4
#define CP_MEC_CNTL__MEC_ME2_HALT_MASK 0x10000000
#define CP_MEC_CNTL__MEC_ME2_HALT__SHIFT 0x1c
#define CP_MEC_CNTL__MEC_ME2_STEP_MASK 0x20000000
#define CP_MEC_CNTL__MEC_ME2_STEP__SHIFT 0x1d
#define CP_MEC_CNTL__MEC_ME1_HALT_MASK 0x40000000
#define CP_MEC_CNTL__MEC_ME1_HALT__SHIFT 0x1e
#define CP_MEC_CNTL__MEC_ME1_STEP_MASK 0x80000000
#define CP_MEC_CNTL__MEC_ME1_STEP__SHIFT 0x1f
#define CP_MEC_ME1_HEADER_DUMP__HEADER_DUMP_MASK 0xffffffff
#define CP_MEC_ME1_HEADER_DUMP__HEADER_DUMP__SHIFT 0x0
#define CP_MEC_ME2_HEADER_DUMP__HEADER_DUMP_MASK 0xffffffff
#define CP_MEC_ME2_HEADER_DUMP__HEADER_DUMP__SHIFT 0x0
#define CP_CPC_SCRATCH_INDEX__SCRATCH_INDEX_MASK 0xff
#define CP_CPC_SCRATCH_INDEX__SCRATCH_INDEX__SHIFT 0x0
#define CP_CPC_SCRATCH_DATA__SCRATCH_DATA_MASK 0xffffffff
#define CP_CPC_SCRATCH_DATA__SCRATCH_DATA__SHIFT 0x0
#define CPG_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3f
#define CPG_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define CPG_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CPG_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CPG_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CPG_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CPG_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3f
#define CPG_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define CPG_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xfc00
#define CPG_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define CPG_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3f
#define CPG_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define CPG_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xfc00
#define CPG_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define CPG_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define CPG_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define CPG_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CPG_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CPG_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CPG_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CPC_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3f
#define CPC_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define CPC_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CPC_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CPC_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CPC_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CPC_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3f
#define CPC_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define CPC_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xfc00
#define CPC_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define CPC_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3f
#define CPC_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define CPC_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xfc00
#define CPC_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define CPC_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define CPC_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define CPC_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CPC_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CPC_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CPC_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CPF_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3f
#define CPF_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define CPF_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CPF_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CPF_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CPF_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CPF_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3f
#define CPF_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define CPF_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xfc00
#define CPF_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define CPF_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3f
#define CPF_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define CPF_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xfc00
#define CPF_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define CPF_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define CPF_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define CPF_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define CPF_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define CPF_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define CPF_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CP_CPC_HALT_HYST_COUNT__COUNT_MASK 0xf
#define CP_CPC_HALT_HYST_COUNT__COUNT__SHIFT 0x0
#define CP_DRAW_OBJECT__OBJECT_MASK 0xffffffff
#define CP_DRAW_OBJECT__OBJECT__SHIFT 0x0
#define CP_DRAW_OBJECT_COUNTER__COUNT_MASK 0xffff
#define CP_DRAW_OBJECT_COUNTER__COUNT__SHIFT 0x0
#define CP_DRAW_WINDOW_MASK_HI__WINDOW_MASK_HI_MASK 0xffffffff
#define CP_DRAW_WINDOW_MASK_HI__WINDOW_MASK_HI__SHIFT 0x0
#define CP_DRAW_WINDOW_HI__WINDOW_HI_MASK 0xffffffff
#define CP_DRAW_WINDOW_HI__WINDOW_HI__SHIFT 0x0
#define CP_DRAW_WINDOW_LO__MIN_MASK 0xffff
#define CP_DRAW_WINDOW_LO__MIN__SHIFT 0x0
#define CP_DRAW_WINDOW_LO__MAX_MASK 0xffff0000
#define CP_DRAW_WINDOW_LO__MAX__SHIFT 0x10
#define CP_DRAW_WINDOW_CNTL__DISABLE_DRAW_WINDOW_LO_MAX_MASK 0x1
#define CP_DRAW_WINDOW_CNTL__DISABLE_DRAW_WINDOW_LO_MAX__SHIFT 0x0
#define CP_DRAW_WINDOW_CNTL__DISABLE_DRAW_WINDOW_LO_MIN_MASK 0x2
#define CP_DRAW_WINDOW_CNTL__DISABLE_DRAW_WINDOW_LO_MIN__SHIFT 0x1
#define CP_DRAW_WINDOW_CNTL__DISABLE_DRAW_WINDOW_HI_MASK 0x4
#define CP_DRAW_WINDOW_CNTL__DISABLE_DRAW_WINDOW_HI__SHIFT 0x2
#define CP_DRAW_WINDOW_CNTL__MODE_MASK 0x100
#define CP_DRAW_WINDOW_CNTL__MODE__SHIFT 0x8
#define CP_PRT_LOD_STATS_CNTL0__BU_SIZE_MASK 0xffffffff
#define CP_PRT_LOD_STATS_CNTL0__BU_SIZE__SHIFT 0x0
#define CP_PRT_LOD_STATS_CNTL1__BASE_LO_MASK 0xffffffff
#define CP_PRT_LOD_STATS_CNTL1__BASE_LO__SHIFT 0x0
#define CP_PRT_LOD_STATS_CNTL2__BASE_HI_MASK 0x3
#define CP_PRT_LOD_STATS_CNTL2__BASE_HI__SHIFT 0x0
#define CP_PRT_LOD_STATS_CNTL2__INTERVAL_MASK 0x3fc
#define CP_PRT_LOD_STATS_CNTL2__INTERVAL__SHIFT 0x2
#define CP_PRT_LOD_STATS_CNTL2__RESET_CNT_MASK 0x3fc00
#define CP_PRT_LOD_STATS_CNTL2__RESET_CNT__SHIFT 0xa
#define CP_PRT_LOD_STATS_CNTL2__RESET_FORCE_MASK 0x40000
#define CP_PRT_LOD_STATS_CNTL2__RESET_FORCE__SHIFT 0x12
#define CP_PRT_LOD_STATS_CNTL2__REPORT_AND_RESET_MASK 0x80000
#define CP_PRT_LOD_STATS_CNTL2__REPORT_AND_RESET__SHIFT 0x13
#define CP_PRT_LOD_STATS_CNTL2__MC_ENDIAN_SWAP_MASK 0x300000
#define CP_PRT_LOD_STATS_CNTL2__MC_ENDIAN_SWAP__SHIFT 0x14
#define CP_PRT_LOD_STATS_CNTL2__MC_VMID_MASK 0x7800000
#define CP_PRT_LOD_STATS_CNTL2__MC_VMID__SHIFT 0x17
#define CP_CE_COMPARE_COUNT__COMPARE_COUNT_MASK 0xffffffff
#define CP_CE_COMPARE_COUNT__COMPARE_COUNT__SHIFT 0x0
#define CP_CE_DE_COUNT__DRAW_ENGINE_COUNT_MASK 0xffffffff
#define CP_CE_DE_COUNT__DRAW_ENGINE_COUNT__SHIFT 0x0
#define CP_DE_CE_COUNT__CONST_ENGINE_COUNT_MASK 0xffffffff
#define CP_DE_CE_COUNT__CONST_ENGINE_COUNT__SHIFT 0x0
#define CP_DE_LAST_INVAL_COUNT__LAST_INVAL_COUNT_MASK 0xffffffff
#define CP_DE_LAST_INVAL_COUNT__LAST_INVAL_COUNT__SHIFT 0x0
#define CP_DE_DE_COUNT__DRAW_ENGINE_COUNT_MASK 0xffffffff
#define CP_DE_DE_COUNT__DRAW_ENGINE_COUNT__SHIFT 0x0
#define CP_EOP_DONE_EVENT_CNTL__WBINV_TC_OP_MASK 0x7f
#define CP_EOP_DONE_EVENT_CNTL__WBINV_TC_OP__SHIFT 0x0
#define CP_EOP_DONE_EVENT_CNTL__WBINV_ACTION_ENA_MASK 0x3f000
#define CP_EOP_DONE_EVENT_CNTL__WBINV_ACTION_ENA__SHIFT 0xc
#define CP_EOP_DONE_EVENT_CNTL__CACHE_CONTROL_MASK 0x6000000
#define CP_EOP_DONE_EVENT_CNTL__CACHE_CONTROL__SHIFT 0x19
#define CP_EOP_DONE_EVENT_CNTL__EOP_VOLATILE_MASK 0x8000000
#define CP_EOP_DONE_EVENT_CNTL__EOP_VOLATILE__SHIFT 0x1b
#define CP_EOP_DONE_DATA_CNTL__CNTX_ID_MASK 0xffff
#define CP_EOP_DONE_DATA_CNTL__CNTX_ID__SHIFT 0x0
#define CP_EOP_DONE_DATA_CNTL__DST_SEL_MASK 0x30000
#define CP_EOP_DONE_DATA_CNTL__DST_SEL__SHIFT 0x10
#define CP_EOP_DONE_DATA_CNTL__INT_SEL_MASK 0x7000000
#define CP_EOP_DONE_DATA_CNTL__INT_SEL__SHIFT 0x18
#define CP_EOP_DONE_DATA_CNTL__DATA_SEL_MASK 0xe0000000
#define CP_EOP_DONE_DATA_CNTL__DATA_SEL__SHIFT 0x1d
#define CP_EOP_DONE_ADDR_LO__ADDR_SWAP_MASK 0x3
#define CP_EOP_DONE_ADDR_LO__ADDR_SWAP__SHIFT 0x0
#define CP_EOP_DONE_ADDR_LO__ADDR_LO_MASK 0xfffffffc
#define CP_EOP_DONE_ADDR_LO__ADDR_LO__SHIFT 0x2
#define CP_EOP_DONE_ADDR_HI__ADDR_HI_MASK 0xffff
#define CP_EOP_DONE_ADDR_HI__ADDR_HI__SHIFT 0x0
#define CP_EOP_DONE_DATA_LO__DATA_LO_MASK 0xffffffff
#define CP_EOP_DONE_DATA_LO__DATA_LO__SHIFT 0x0
#define CP_EOP_DONE_DATA_HI__DATA_HI_MASK 0xffffffff
#define CP_EOP_DONE_DATA_HI__DATA_HI__SHIFT 0x0
#define CP_EOP_LAST_FENCE_LO__LAST_FENCE_LO_MASK 0xffffffff
#define CP_EOP_LAST_FENCE_LO__LAST_FENCE_LO__SHIFT 0x0
#define CP_EOP_LAST_FENCE_HI__LAST_FENCE_HI_MASK 0xffffffff
#define CP_EOP_LAST_FENCE_HI__LAST_FENCE_HI__SHIFT 0x0
#define CP_STREAM_OUT_ADDR_LO__STREAM_OUT_ADDR_SWAP_MASK 0x3
#define CP_STREAM_OUT_ADDR_LO__STREAM_OUT_ADDR_SWAP__SHIFT 0x0
#define CP_STREAM_OUT_ADDR_LO__STREAM_OUT_ADDR_LO_MASK 0xfffffffc
#define CP_STREAM_OUT_ADDR_LO__STREAM_OUT_ADDR_LO__SHIFT 0x2
#define CP_STREAM_OUT_ADDR_HI__STREAM_OUT_ADDR_HI_MASK 0xffff
#define CP_STREAM_OUT_ADDR_HI__STREAM_OUT_ADDR_HI__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT0_LO__NUM_PRIM_WRITTEN_CNT0_LO_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT0_LO__NUM_PRIM_WRITTEN_CNT0_LO__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT0_HI__NUM_PRIM_WRITTEN_CNT0_HI_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT0_HI__NUM_PRIM_WRITTEN_CNT0_HI__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT0_LO__NUM_PRIM_NEEDED_CNT0_LO_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT0_LO__NUM_PRIM_NEEDED_CNT0_LO__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT0_HI__NUM_PRIM_NEEDED_CNT0_HI_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT0_HI__NUM_PRIM_NEEDED_CNT0_HI__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT1_LO__NUM_PRIM_WRITTEN_CNT1_LO_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT1_LO__NUM_PRIM_WRITTEN_CNT1_LO__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT1_HI__NUM_PRIM_WRITTEN_CNT1_HI_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT1_HI__NUM_PRIM_WRITTEN_CNT1_HI__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT1_LO__NUM_PRIM_NEEDED_CNT1_LO_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT1_LO__NUM_PRIM_NEEDED_CNT1_LO__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT1_HI__NUM_PRIM_NEEDED_CNT1_HI_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT1_HI__NUM_PRIM_NEEDED_CNT1_HI__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT2_LO__NUM_PRIM_WRITTEN_CNT2_LO_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT2_LO__NUM_PRIM_WRITTEN_CNT2_LO__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT2_HI__NUM_PRIM_WRITTEN_CNT2_HI_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT2_HI__NUM_PRIM_WRITTEN_CNT2_HI__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT2_LO__NUM_PRIM_NEEDED_CNT2_LO_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT2_LO__NUM_PRIM_NEEDED_CNT2_LO__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT2_HI__NUM_PRIM_NEEDED_CNT2_HI_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT2_HI__NUM_PRIM_NEEDED_CNT2_HI__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT3_LO__NUM_PRIM_WRITTEN_CNT3_LO_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT3_LO__NUM_PRIM_WRITTEN_CNT3_LO__SHIFT 0x0
#define CP_NUM_PRIM_WRITTEN_COUNT3_HI__NUM_PRIM_WRITTEN_CNT3_HI_MASK 0xffffffff
#define CP_NUM_PRIM_WRITTEN_COUNT3_HI__NUM_PRIM_WRITTEN_CNT3_HI__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT3_LO__NUM_PRIM_NEEDED_CNT3_LO_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT3_LO__NUM_PRIM_NEEDED_CNT3_LO__SHIFT 0x0
#define CP_NUM_PRIM_NEEDED_COUNT3_HI__NUM_PRIM_NEEDED_CNT3_HI_MASK 0xffffffff
#define CP_NUM_PRIM_NEEDED_COUNT3_HI__NUM_PRIM_NEEDED_CNT3_HI__SHIFT 0x0
#define CP_PIPE_STATS_ADDR_LO__PIPE_STATS_ADDR_SWAP_MASK 0x3
#define CP_PIPE_STATS_ADDR_LO__PIPE_STATS_ADDR_SWAP__SHIFT 0x0
#define CP_PIPE_STATS_ADDR_LO__PIPE_STATS_ADDR_LO_MASK 0xfffffffc
#define CP_PIPE_STATS_ADDR_LO__PIPE_STATS_ADDR_LO__SHIFT 0x2
#define CP_PIPE_STATS_ADDR_HI__PIPE_STATS_ADDR_HI_MASK 0xffff
#define CP_PIPE_STATS_ADDR_HI__PIPE_STATS_ADDR_HI__SHIFT 0x0
#define CP_VGT_IAVERT_COUNT_LO__IAVERT_COUNT_LO_MASK 0xffffffff
#define CP_VGT_IAVERT_COUNT_LO__IAVERT_COUNT_LO__SHIFT 0x0
#define CP_VGT_IAVERT_COUNT_HI__IAVERT_COUNT_HI_MASK 0xffffffff
#define CP_VGT_IAVERT_COUNT_HI__IAVERT_COUNT_HI__SHIFT 0x0
#define CP_VGT_IAPRIM_COUNT_LO__IAPRIM_COUNT_LO_MASK 0xffffffff
#define CP_VGT_IAPRIM_COUNT_LO__IAPRIM_COUNT_LO__SHIFT 0x0
#define CP_VGT_IAPRIM_COUNT_HI__IAPRIM_COUNT_HI_MASK 0xffffffff
#define CP_VGT_IAPRIM_COUNT_HI__IAPRIM_COUNT_HI__SHIFT 0x0
#define CP_VGT_GSPRIM_COUNT_LO__GSPRIM_COUNT_LO_MASK 0xffffffff
#define CP_VGT_GSPRIM_COUNT_LO__GSPRIM_COUNT_LO__SHIFT 0x0
#define CP_VGT_GSPRIM_COUNT_HI__GSPRIM_COUNT_HI_MASK 0xffffffff
#define CP_VGT_GSPRIM_COUNT_HI__GSPRIM_COUNT_HI__SHIFT 0x0
#define CP_VGT_VSINVOC_COUNT_LO__VSINVOC_COUNT_LO_MASK 0xffffffff
#define CP_VGT_VSINVOC_COUNT_LO__VSINVOC_COUNT_LO__SHIFT 0x0
#define CP_VGT_VSINVOC_COUNT_HI__VSINVOC_COUNT_HI_MASK 0xffffffff
#define CP_VGT_VSINVOC_COUNT_HI__VSINVOC_COUNT_HI__SHIFT 0x0
#define CP_VGT_GSINVOC_COUNT_LO__GSINVOC_COUNT_LO_MASK 0xffffffff
#define CP_VGT_GSINVOC_COUNT_LO__GSINVOC_COUNT_LO__SHIFT 0x0
#define CP_VGT_GSINVOC_COUNT_HI__GSINVOC_COUNT_HI_MASK 0xffffffff
#define CP_VGT_GSINVOC_COUNT_HI__GSINVOC_COUNT_HI__SHIFT 0x0
#define CP_VGT_HSINVOC_COUNT_LO__HSINVOC_COUNT_LO_MASK 0xffffffff
#define CP_VGT_HSINVOC_COUNT_LO__HSINVOC_COUNT_LO__SHIFT 0x0
#define CP_VGT_HSINVOC_COUNT_HI__HSINVOC_COUNT_HI_MASK 0xffffffff
#define CP_VGT_HSINVOC_COUNT_HI__HSINVOC_COUNT_HI__SHIFT 0x0
#define CP_VGT_DSINVOC_COUNT_LO__DSINVOC_COUNT_LO_MASK 0xffffffff
#define CP_VGT_DSINVOC_COUNT_LO__DSINVOC_COUNT_LO__SHIFT 0x0
#define CP_VGT_DSINVOC_COUNT_HI__DSINVOC_COUNT_HI_MASK 0xffffffff
#define CP_VGT_DSINVOC_COUNT_HI__DSINVOC_COUNT_HI__SHIFT 0x0
#define CP_PA_CINVOC_COUNT_LO__CINVOC_COUNT_LO_MASK 0xffffffff
#define CP_PA_CINVOC_COUNT_LO__CINVOC_COUNT_LO__SHIFT 0x0
#define CP_PA_CINVOC_COUNT_HI__CINVOC_COUNT_HI_MASK 0xffffffff
#define CP_PA_CINVOC_COUNT_HI__CINVOC_COUNT_HI__SHIFT 0x0
#define CP_PA_CPRIM_COUNT_LO__CPRIM_COUNT_LO_MASK 0xffffffff
#define CP_PA_CPRIM_COUNT_LO__CPRIM_COUNT_LO__SHIFT 0x0
#define CP_PA_CPRIM_COUNT_HI__CPRIM_COUNT_HI_MASK 0xffffffff
#define CP_PA_CPRIM_COUNT_HI__CPRIM_COUNT_HI__SHIFT 0x0
#define CP_SC_PSINVOC_COUNT0_LO__PSINVOC_COUNT0_LO_MASK 0xffffffff
#define CP_SC_PSINVOC_COUNT0_LO__PSINVOC_COUNT0_LO__SHIFT 0x0
#define CP_SC_PSINVOC_COUNT0_HI__PSINVOC_COUNT0_HI_MASK 0xffffffff
#define CP_SC_PSINVOC_COUNT0_HI__PSINVOC_COUNT0_HI__SHIFT 0x0
#define CP_SC_PSINVOC_COUNT1_LO__OBSOLETE_MASK 0xffffffff
#define CP_SC_PSINVOC_COUNT1_LO__OBSOLETE__SHIFT 0x0
#define CP_SC_PSINVOC_COUNT1_HI__OBSOLETE_MASK 0xffffffff
#define CP_SC_PSINVOC_COUNT1_HI__OBSOLETE__SHIFT 0x0
#define CP_VGT_CSINVOC_COUNT_LO__CSINVOC_COUNT_LO_MASK 0xffffffff
#define CP_VGT_CSINVOC_COUNT_LO__CSINVOC_COUNT_LO__SHIFT 0x0
#define CP_VGT_CSINVOC_COUNT_HI__CSINVOC_COUNT_HI_MASK 0xffffffff
#define CP_VGT_CSINVOC_COUNT_HI__CSINVOC_COUNT_HI__SHIFT 0x0
#define CP_STRMOUT_CNTL__OFFSET_UPDATE_DONE_MASK 0x1
#define CP_STRMOUT_CNTL__OFFSET_UPDATE_DONE__SHIFT 0x0
#define SCRATCH_REG0__SCRATCH_REG0_MASK 0xffffffff
#define SCRATCH_REG0__SCRATCH_REG0__SHIFT 0x0
#define SCRATCH_REG1__SCRATCH_REG1_MASK 0xffffffff
#define SCRATCH_REG1__SCRATCH_REG1__SHIFT 0x0
#define SCRATCH_REG2__SCRATCH_REG2_MASK 0xffffffff
#define SCRATCH_REG2__SCRATCH_REG2__SHIFT 0x0
#define SCRATCH_REG3__SCRATCH_REG3_MASK 0xffffffff
#define SCRATCH_REG3__SCRATCH_REG3__SHIFT 0x0
#define SCRATCH_REG4__SCRATCH_REG4_MASK 0xffffffff
#define SCRATCH_REG4__SCRATCH_REG4__SHIFT 0x0
#define SCRATCH_REG5__SCRATCH_REG5_MASK 0xffffffff
#define SCRATCH_REG5__SCRATCH_REG5__SHIFT 0x0
#define SCRATCH_REG6__SCRATCH_REG6_MASK 0xffffffff
#define SCRATCH_REG6__SCRATCH_REG6__SHIFT 0x0
#define SCRATCH_REG7__SCRATCH_REG7_MASK 0xffffffff
#define SCRATCH_REG7__SCRATCH_REG7__SHIFT 0x0
#define SCRATCH_UMSK__OBSOLETE_UMSK_MASK 0xff
#define SCRATCH_UMSK__OBSOLETE_UMSK__SHIFT 0x0
#define SCRATCH_UMSK__OBSOLETE_SWAP_MASK 0x30000
#define SCRATCH_UMSK__OBSOLETE_SWAP__SHIFT 0x10
#define SCRATCH_ADDR__OBSOLETE_ADDR_MASK 0xffffffff
#define SCRATCH_ADDR__OBSOLETE_ADDR__SHIFT 0x0
#define CP_PFP_ATOMIC_PREOP_LO__ATOMIC_PREOP_LO_MASK 0xffffffff
#define CP_PFP_ATOMIC_PREOP_LO__ATOMIC_PREOP_LO__SHIFT 0x0
#define CP_PFP_ATOMIC_PREOP_HI__ATOMIC_PREOP_HI_MASK 0xffffffff
#define CP_PFP_ATOMIC_PREOP_HI__ATOMIC_PREOP_HI__SHIFT 0x0
#define CP_PFP_GDS_ATOMIC0_PREOP_LO__GDS_ATOMIC0_PREOP_LO_MASK 0xffffffff
#define CP_PFP_GDS_ATOMIC0_PREOP_LO__GDS_ATOMIC0_PREOP_LO__SHIFT 0x0
#define CP_PFP_GDS_ATOMIC0_PREOP_HI__GDS_ATOMIC0_PREOP_HI_MASK 0xffffffff
#define CP_PFP_GDS_ATOMIC0_PREOP_HI__GDS_ATOMIC0_PREOP_HI__SHIFT 0x0
#define CP_PFP_GDS_ATOMIC1_PREOP_LO__GDS_ATOMIC1_PREOP_LO_MASK 0xffffffff
#define CP_PFP_GDS_ATOMIC1_PREOP_LO__GDS_ATOMIC1_PREOP_LO__SHIFT 0x0
#define CP_PFP_GDS_ATOMIC1_PREOP_HI__GDS_ATOMIC1_PREOP_HI_MASK 0xffffffff
#define CP_PFP_GDS_ATOMIC1_PREOP_HI__GDS_ATOMIC1_PREOP_HI__SHIFT 0x0
#define CP_APPEND_ADDR_LO__MEM_ADDR_LO_MASK 0xfffffffc
#define CP_APPEND_ADDR_LO__MEM_ADDR_LO__SHIFT 0x2
#define CP_APPEND_ADDR_HI__MEM_ADDR_HI_MASK 0xffff
#define CP_APPEND_ADDR_HI__MEM_ADDR_HI__SHIFT 0x0
#define CP_APPEND_ADDR_HI__CS_PS_SEL_MASK 0x10000
#define CP_APPEND_ADDR_HI__CS_PS_SEL__SHIFT 0x10
#define CP_APPEND_ADDR_HI__COMMAND_MASK 0xe0000000
#define CP_APPEND_ADDR_HI__COMMAND__SHIFT 0x1d
#define CP_APPEND_DATA__DATA_MASK 0xffffffff
#define CP_APPEND_DATA__DATA__SHIFT 0x0
#define CP_APPEND_LAST_CS_FENCE__LAST_FENCE_MASK 0xffffffff
#define CP_APPEND_LAST_CS_FENCE__LAST_FENCE__SHIFT 0x0
#define CP_APPEND_LAST_PS_FENCE__LAST_FENCE_MASK 0xffffffff
#define CP_APPEND_LAST_PS_FENCE__LAST_FENCE__SHIFT 0x0
#define CP_ATOMIC_PREOP_LO__ATOMIC_PREOP_LO_MASK 0xffffffff
#define CP_ATOMIC_PREOP_LO__ATOMIC_PREOP_LO__SHIFT 0x0
#define CP_ME_ATOMIC_PREOP_LO__ATOMIC_PREOP_LO_MASK 0xffffffff
#define CP_ME_ATOMIC_PREOP_LO__ATOMIC_PREOP_LO__SHIFT 0x0
#define CP_ATOMIC_PREOP_HI__ATOMIC_PREOP_HI_MASK 0xffffffff
#define CP_ATOMIC_PREOP_HI__ATOMIC_PREOP_HI__SHIFT 0x0
#define CP_ME_ATOMIC_PREOP_HI__ATOMIC_PREOP_HI_MASK 0xffffffff
#define CP_ME_ATOMIC_PREOP_HI__ATOMIC_PREOP_HI__SHIFT 0x0
#define CP_GDS_ATOMIC0_PREOP_LO__GDS_ATOMIC0_PREOP_LO_MASK 0xffffffff
#define CP_GDS_ATOMIC0_PREOP_LO__GDS_ATOMIC0_PREOP_LO__SHIFT 0x0
#define CP_ME_GDS_ATOMIC0_PREOP_LO__GDS_ATOMIC0_PREOP_LO_MASK 0xffffffff
#define CP_ME_GDS_ATOMIC0_PREOP_LO__GDS_ATOMIC0_PREOP_LO__SHIFT 0x0
#define CP_GDS_ATOMIC0_PREOP_HI__GDS_ATOMIC0_PREOP_HI_MASK 0xffffffff
#define CP_GDS_ATOMIC0_PREOP_HI__GDS_ATOMIC0_PREOP_HI__SHIFT 0x0
#define CP_ME_GDS_ATOMIC0_PREOP_HI__GDS_ATOMIC0_PREOP_HI_MASK 0xffffffff
#define CP_ME_GDS_ATOMIC0_PREOP_HI__GDS_ATOMIC0_PREOP_HI__SHIFT 0x0
#define CP_GDS_ATOMIC1_PREOP_LO__GDS_ATOMIC1_PREOP_LO_MASK 0xffffffff
#define CP_GDS_ATOMIC1_PREOP_LO__GDS_ATOMIC1_PREOP_LO__SHIFT 0x0
#define CP_ME_GDS_ATOMIC1_PREOP_LO__GDS_ATOMIC1_PREOP_LO_MASK 0xffffffff
#define CP_ME_GDS_ATOMIC1_PREOP_LO__GDS_ATOMIC1_PREOP_LO__SHIFT 0x0
#define CP_GDS_ATOMIC1_PREOP_HI__GDS_ATOMIC1_PREOP_HI_MASK 0xffffffff
#define CP_GDS_ATOMIC1_PREOP_HI__GDS_ATOMIC1_PREOP_HI__SHIFT 0x0
#define CP_ME_GDS_ATOMIC1_PREOP_HI__GDS_ATOMIC1_PREOP_HI_MASK 0xffffffff
#define CP_ME_GDS_ATOMIC1_PREOP_HI__GDS_ATOMIC1_PREOP_HI__SHIFT 0x0
#define CP_ME_MC_WADDR_LO__ME_MC_WADDR_SWAP_MASK 0x3
#define CP_ME_MC_WADDR_LO__ME_MC_WADDR_SWAP__SHIFT 0x0
#define CP_ME_MC_WADDR_LO__ME_MC_WADDR_LO_MASK 0xfffffffc
#define CP_ME_MC_WADDR_LO__ME_MC_WADDR_LO__SHIFT 0x2
#define CP_ME_MC_WADDR_HI__ME_MC_WADDR_HI_MASK 0xffff
#define CP_ME_MC_WADDR_HI__ME_MC_WADDR_HI__SHIFT 0x0
#define CP_ME_MC_WDATA_LO__ME_MC_WDATA_LO_MASK 0xffffffff
#define CP_ME_MC_WDATA_LO__ME_MC_WDATA_LO__SHIFT 0x0
#define CP_ME_MC_WDATA_HI__ME_MC_WDATA_HI_MASK 0xffffffff
#define CP_ME_MC_WDATA_HI__ME_MC_WDATA_HI__SHIFT 0x0
#define CP_ME_MC_RADDR_LO__ME_MC_RADDR_SWAP_MASK 0x3
#define CP_ME_MC_RADDR_LO__ME_MC_RADDR_SWAP__SHIFT 0x0
#define CP_ME_MC_RADDR_LO__ME_MC_RADDR_LO_MASK 0xfffffffc
#define CP_ME_MC_RADDR_LO__ME_MC_RADDR_LO__SHIFT 0x2
#define CP_ME_MC_RADDR_HI__ME_MC_RADDR_HI_MASK 0xffff
#define CP_ME_MC_RADDR_HI__ME_MC_RADDR_HI__SHIFT 0x0
#define CP_SEM_WAIT_TIMER__SEM_WAIT_TIMER_MASK 0xffffffff
#define CP_SEM_WAIT_TIMER__SEM_WAIT_TIMER__SHIFT 0x0
#define CP_SIG_SEM_ADDR_LO__SEM_ADDR_SWAP_MASK 0x3
#define CP_SIG_SEM_ADDR_LO__SEM_ADDR_SWAP__SHIFT 0x0
#define CP_SIG_SEM_ADDR_LO__SEM_ADDR_LO_MASK 0xfffffff8
#define CP_SIG_SEM_ADDR_LO__SEM_ADDR_LO__SHIFT 0x3
#define CP_SIG_SEM_ADDR_HI__SEM_ADDR_HI_MASK 0xffff
#define CP_SIG_SEM_ADDR_HI__SEM_ADDR_HI__SHIFT 0x0
#define CP_SIG_SEM_ADDR_HI__SEM_USE_MAILBOX_MASK 0x10000
#define CP_SIG_SEM_ADDR_HI__SEM_USE_MAILBOX__SHIFT 0x10
#define CP_SIG_SEM_ADDR_HI__SEM_SIGNAL_TYPE_MASK 0x100000
#define CP_SIG_SEM_ADDR_HI__SEM_SIGNAL_TYPE__SHIFT 0x14
#define CP_SIG_SEM_ADDR_HI__SEM_CLIENT_CODE_MASK 0x3000000
#define CP_SIG_SEM_ADDR_HI__SEM_CLIENT_CODE__SHIFT 0x18
#define CP_SIG_SEM_ADDR_HI__SEM_SELECT_MASK 0xe0000000
#define CP_SIG_SEM_ADDR_HI__SEM_SELECT__SHIFT 0x1d
#define CP_WAIT_SEM_ADDR_LO__SEM_ADDR_SWAP_MASK 0x3
#define CP_WAIT_SEM_ADDR_LO__SEM_ADDR_SWAP__SHIFT 0x0
#define CP_WAIT_SEM_ADDR_LO__SEM_ADDR_LO_MASK 0xfffffff8
#define CP_WAIT_SEM_ADDR_LO__SEM_ADDR_LO__SHIFT 0x3
#define CP_WAIT_SEM_ADDR_HI__SEM_ADDR_HI_MASK 0xffff
#define CP_WAIT_SEM_ADDR_HI__SEM_ADDR_HI__SHIFT 0x0
#define CP_WAIT_SEM_ADDR_HI__SEM_USE_MAILBOX_MASK 0x10000
#define CP_WAIT_SEM_ADDR_HI__SEM_USE_MAILBOX__SHIFT 0x10
#define CP_WAIT_SEM_ADDR_HI__SEM_SIGNAL_TYPE_MASK 0x100000
#define CP_WAIT_SEM_ADDR_HI__SEM_SIGNAL_TYPE__SHIFT 0x14
#define CP_WAIT_SEM_ADDR_HI__SEM_CLIENT_CODE_MASK 0x3000000
#define CP_WAIT_SEM_ADDR_HI__SEM_CLIENT_CODE__SHIFT 0x18
#define CP_WAIT_SEM_ADDR_HI__SEM_SELECT_MASK 0xe0000000
#define CP_WAIT_SEM_ADDR_HI__SEM_SELECT__SHIFT 0x1d
#define CP_WAIT_REG_MEM_TIMEOUT__WAIT_REG_MEM_TIMEOUT_MASK 0xffffffff
#define CP_WAIT_REG_MEM_TIMEOUT__WAIT_REG_MEM_TIMEOUT__SHIFT 0x0
#define CP_COHER_START_DELAY__START_DELAY_COUNT_MASK 0x3f
#define CP_COHER_START_DELAY__START_DELAY_COUNT__SHIFT 0x0
#define CP_COHER_CNTL__DEST_BASE_0_ENA_MASK 0x1
#define CP_COHER_CNTL__DEST_BASE_0_ENA__SHIFT 0x0
#define CP_COHER_CNTL__DEST_BASE_1_ENA_MASK 0x2
#define CP_COHER_CNTL__DEST_BASE_1_ENA__SHIFT 0x1
#define CP_COHER_CNTL__CB0_DEST_BASE_ENA_MASK 0x40
#define CP_COHER_CNTL__CB0_DEST_BASE_ENA__SHIFT 0x6
#define CP_COHER_CNTL__CB1_DEST_BASE_ENA_MASK 0x80
#define CP_COHER_CNTL__CB1_DEST_BASE_ENA__SHIFT 0x7
#define CP_COHER_CNTL__CB2_DEST_BASE_ENA_MASK 0x100
#define CP_COHER_CNTL__CB2_DEST_BASE_ENA__SHIFT 0x8
#define CP_COHER_CNTL__CB3_DEST_BASE_ENA_MASK 0x200
#define CP_COHER_CNTL__CB3_DEST_BASE_ENA__SHIFT 0x9
#define CP_COHER_CNTL__CB4_DEST_BASE_ENA_MASK 0x400
#define CP_COHER_CNTL__CB4_DEST_BASE_ENA__SHIFT 0xa
#define CP_COHER_CNTL__CB5_DEST_BASE_ENA_MASK 0x800
#define CP_COHER_CNTL__CB5_DEST_BASE_ENA__SHIFT 0xb
#define CP_COHER_CNTL__CB6_DEST_BASE_ENA_MASK 0x1000
#define CP_COHER_CNTL__CB6_DEST_BASE_ENA__SHIFT 0xc
#define CP_COHER_CNTL__CB7_DEST_BASE_ENA_MASK 0x2000
#define CP_COHER_CNTL__CB7_DEST_BASE_ENA__SHIFT 0xd
#define CP_COHER_CNTL__DB_DEST_BASE_ENA_MASK 0x4000
#define CP_COHER_CNTL__DB_DEST_BASE_ENA__SHIFT 0xe
#define CP_COHER_CNTL__TCL1_VOL_ACTION_ENA_MASK 0x8000
#define CP_COHER_CNTL__TCL1_VOL_ACTION_ENA__SHIFT 0xf
#define CP_COHER_CNTL__TC_VOL_ACTION_ENA_MASK 0x10000
#define CP_COHER_CNTL__TC_VOL_ACTION_ENA__SHIFT 0x10
#define CP_COHER_CNTL__TC_WB_ACTION_ENA_MASK 0x40000
#define CP_COHER_CNTL__TC_WB_ACTION_ENA__SHIFT 0x12
#define CP_COHER_CNTL__DEST_BASE_2_ENA_MASK 0x80000
#define CP_COHER_CNTL__DEST_BASE_2_ENA__SHIFT 0x13
#define CP_COHER_CNTL__DEST_BASE_3_ENA_MASK 0x200000
#define CP_COHER_CNTL__DEST_BASE_3_ENA__SHIFT 0x15
#define CP_COHER_CNTL__TCL1_ACTION_ENA_MASK 0x400000
#define CP_COHER_CNTL__TCL1_ACTION_ENA__SHIFT 0x16
#define CP_COHER_CNTL__TC_ACTION_ENA_MASK 0x800000
#define CP_COHER_CNTL__TC_ACTION_ENA__SHIFT 0x17
#define CP_COHER_CNTL__CB_ACTION_ENA_MASK 0x2000000
#define CP_COHER_CNTL__CB_ACTION_ENA__SHIFT 0x19
#define CP_COHER_CNTL__DB_ACTION_ENA_MASK 0x4000000
#define CP_COHER_CNTL__DB_ACTION_ENA__SHIFT 0x1a
#define CP_COHER_CNTL__SH_KCACHE_ACTION_ENA_MASK 0x8000000
#define CP_COHER_CNTL__SH_KCACHE_ACTION_ENA__SHIFT 0x1b
#define CP_COHER_CNTL__SH_KCACHE_VOL_ACTION_ENA_MASK 0x10000000
#define CP_COHER_CNTL__SH_KCACHE_VOL_ACTION_ENA__SHIFT 0x1c
#define CP_COHER_CNTL__SH_ICACHE_ACTION_ENA_MASK 0x20000000
#define CP_COHER_CNTL__SH_ICACHE_ACTION_ENA__SHIFT 0x1d
#define CP_COHER_SIZE__COHER_SIZE_256B_MASK 0xffffffff
#define CP_COHER_SIZE__COHER_SIZE_256B__SHIFT 0x0
#define CP_COHER_SIZE_HI__COHER_SIZE_HI_256B_MASK 0xff
#define CP_COHER_SIZE_HI__COHER_SIZE_HI_256B__SHIFT 0x0
#define CP_COHER_BASE__COHER_BASE_256B_MASK 0xffffffff
#define CP_COHER_BASE__COHER_BASE_256B__SHIFT 0x0
#define CP_COHER_BASE_HI__COHER_BASE_HI_256B_MASK 0xff
#define CP_COHER_BASE_HI__COHER_BASE_HI_256B__SHIFT 0x0
#define CP_COHER_STATUS__MATCHING_GFX_CNTX_MASK 0xff
#define CP_COHER_STATUS__MATCHING_GFX_CNTX__SHIFT 0x0
#define CP_COHER_STATUS__MEID_MASK 0x3000000
#define CP_COHER_STATUS__MEID__SHIFT 0x18
#define CP_COHER_STATUS__PHASE1_STATUS_MASK 0x40000000
#define CP_COHER_STATUS__PHASE1_STATUS__SHIFT 0x1e
#define CP_COHER_STATUS__STATUS_MASK 0x80000000
#define CP_COHER_STATUS__STATUS__SHIFT 0x1f
#define COHER_DEST_BASE_0__DEST_BASE_256B_MASK 0xffffffff
#define COHER_DEST_BASE_0__DEST_BASE_256B__SHIFT 0x0
#define COHER_DEST_BASE_1__DEST_BASE_256B_MASK 0xffffffff
#define COHER_DEST_BASE_1__DEST_BASE_256B__SHIFT 0x0
#define COHER_DEST_BASE_2__DEST_BASE_256B_MASK 0xffffffff
#define COHER_DEST_BASE_2__DEST_BASE_256B__SHIFT 0x0
#define COHER_DEST_BASE_3__DEST_BASE_256B_MASK 0xffffffff
#define COHER_DEST_BASE_3__DEST_BASE_256B__SHIFT 0x0
#define COHER_DEST_BASE_HI_0__DEST_BASE_HI_256B_MASK 0xffffffff
#define COHER_DEST_BASE_HI_0__DEST_BASE_HI_256B__SHIFT 0x0
#define COHER_DEST_BASE_HI_1__DEST_BASE_HI_256B_MASK 0xffffffff
#define COHER_DEST_BASE_HI_1__DEST_BASE_HI_256B__SHIFT 0x0
#define COHER_DEST_BASE_HI_2__DEST_BASE_HI_256B_MASK 0xffffffff
#define COHER_DEST_BASE_HI_2__DEST_BASE_HI_256B__SHIFT 0x0
#define COHER_DEST_BASE_HI_3__DEST_BASE_HI_256B_MASK 0xffffffff
#define COHER_DEST_BASE_HI_3__DEST_BASE_HI_256B__SHIFT 0x0
#define CP_DMA_ME_SRC_ADDR__SRC_ADDR_MASK 0xffffffff
#define CP_DMA_ME_SRC_ADDR__SRC_ADDR__SHIFT 0x0
#define CP_DMA_ME_SRC_ADDR_HI__SRC_ADDR_HI_MASK 0xffff
#define CP_DMA_ME_SRC_ADDR_HI__SRC_ADDR_HI__SHIFT 0x0
#define CP_DMA_ME_DST_ADDR__DST_ADDR_MASK 0xffffffff
#define CP_DMA_ME_DST_ADDR__DST_ADDR__SHIFT 0x0
#define CP_DMA_ME_DST_ADDR_HI__DST_ADDR_HI_MASK 0xffff
#define CP_DMA_ME_DST_ADDR_HI__DST_ADDR_HI__SHIFT 0x0
#define CP_DMA_ME_CONTROL__SRC_ATC_MASK 0x1000
#define CP_DMA_ME_CONTROL__SRC_ATC__SHIFT 0xc
#define CP_DMA_ME_CONTROL__SRC_CACHE_POLICY_MASK 0x6000
#define CP_DMA_ME_CONTROL__SRC_CACHE_POLICY__SHIFT 0xd
#define CP_DMA_ME_CONTROL__SRC_VOLATILE_MASK 0x8000
#define CP_DMA_ME_CONTROL__SRC_VOLATILE__SHIFT 0xf
#define CP_DMA_ME_CONTROL__DST_SELECT_MASK 0x300000
#define CP_DMA_ME_CONTROL__DST_SELECT__SHIFT 0x14
#define CP_DMA_ME_CONTROL__DST_ATC_MASK 0x1000000
#define CP_DMA_ME_CONTROL__DST_ATC__SHIFT 0x18
#define CP_DMA_ME_CONTROL__DST_CACHE_POLICY_MASK 0x6000000
#define CP_DMA_ME_CONTROL__DST_CACHE_POLICY__SHIFT 0x19
#define CP_DMA_ME_CONTROL__DST_VOLATILE_MASK 0x8000000
#define CP_DMA_ME_CONTROL__DST_VOLATILE__SHIFT 0x1b
#define CP_DMA_ME_CONTROL__SRC_SELECT_MASK 0x60000000
#define CP_DMA_ME_CONTROL__SRC_SELECT__SHIFT 0x1d
#define CP_DMA_ME_COMMAND__BYTE_COUNT_MASK 0x1fffff
#define CP_DMA_ME_COMMAND__BYTE_COUNT__SHIFT 0x0
#define CP_DMA_ME_COMMAND__DIS_WC_MASK 0x200000
#define CP_DMA_ME_COMMAND__DIS_WC__SHIFT 0x15
#define CP_DMA_ME_COMMAND__SRC_SWAP_MASK 0xc00000
#define CP_DMA_ME_COMMAND__SRC_SWAP__SHIFT 0x16
#define CP_DMA_ME_COMMAND__DST_SWAP_MASK 0x3000000
#define CP_DMA_ME_COMMAND__DST_SWAP__SHIFT 0x18
#define CP_DMA_ME_COMMAND__SAS_MASK 0x4000000
#define CP_DMA_ME_COMMAND__SAS__SHIFT 0x1a
#define CP_DMA_ME_COMMAND__DAS_MASK 0x8000000
#define CP_DMA_ME_COMMAND__DAS__SHIFT 0x1b
#define CP_DMA_ME_COMMAND__SAIC_MASK 0x10000000
#define CP_DMA_ME_COMMAND__SAIC__SHIFT 0x1c
#define CP_DMA_ME_COMMAND__DAIC_MASK 0x20000000
#define CP_DMA_ME_COMMAND__DAIC__SHIFT 0x1d
#define CP_DMA_ME_COMMAND__RAW_WAIT_MASK 0x40000000
#define CP_DMA_ME_COMMAND__RAW_WAIT__SHIFT 0x1e
#define CP_DMA_PFP_SRC_ADDR__SRC_ADDR_MASK 0xffffffff
#define CP_DMA_PFP_SRC_ADDR__SRC_ADDR__SHIFT 0x0
#define CP_DMA_PFP_SRC_ADDR_HI__SRC_ADDR_HI_MASK 0xffff
#define CP_DMA_PFP_SRC_ADDR_HI__SRC_ADDR_HI__SHIFT 0x0
#define CP_DMA_PFP_DST_ADDR__DST_ADDR_MASK 0xffffffff
#define CP_DMA_PFP_DST_ADDR__DST_ADDR__SHIFT 0x0
#define CP_DMA_PFP_DST_ADDR_HI__DST_ADDR_HI_MASK 0xffff
#define CP_DMA_PFP_DST_ADDR_HI__DST_ADDR_HI__SHIFT 0x0
#define CP_DMA_PFP_CONTROL__SRC_ATC_MASK 0x1000
#define CP_DMA_PFP_CONTROL__SRC_ATC__SHIFT 0xc
#define CP_DMA_PFP_CONTROL__SRC_CACHE_POLICY_MASK 0x6000
#define CP_DMA_PFP_CONTROL__SRC_CACHE_POLICY__SHIFT 0xd
#define CP_DMA_PFP_CONTROL__SRC_VOLATILE_MASK 0x8000
#define CP_DMA_PFP_CONTROL__SRC_VOLATILE__SHIFT 0xf
#define CP_DMA_PFP_CONTROL__DST_SELECT_MASK 0x300000
#define CP_DMA_PFP_CONTROL__DST_SELECT__SHIFT 0x14
#define CP_DMA_PFP_CONTROL__DST_ATC_MASK 0x1000000
#define CP_DMA_PFP_CONTROL__DST_ATC__SHIFT 0x18
#define CP_DMA_PFP_CONTROL__DST_CACHE_POLICY_MASK 0x6000000
#define CP_DMA_PFP_CONTROL__DST_CACHE_POLICY__SHIFT 0x19
#define CP_DMA_PFP_CONTROL__DST_VOLATILE_MASK 0x8000000
#define CP_DMA_PFP_CONTROL__DST_VOLATILE__SHIFT 0x1b
#define CP_DMA_PFP_CONTROL__SRC_SELECT_MASK 0x60000000
#define CP_DMA_PFP_CONTROL__SRC_SELECT__SHIFT 0x1d
#define CP_DMA_PFP_COMMAND__BYTE_COUNT_MASK 0x1fffff
#define CP_DMA_PFP_COMMAND__BYTE_COUNT__SHIFT 0x0
#define CP_DMA_PFP_COMMAND__DIS_WC_MASK 0x200000
#define CP_DMA_PFP_COMMAND__DIS_WC__SHIFT 0x15
#define CP_DMA_PFP_COMMAND__SRC_SWAP_MASK 0xc00000
#define CP_DMA_PFP_COMMAND__SRC_SWAP__SHIFT 0x16
#define CP_DMA_PFP_COMMAND__DST_SWAP_MASK 0x3000000
#define CP_DMA_PFP_COMMAND__DST_SWAP__SHIFT 0x18
#define CP_DMA_PFP_COMMAND__SAS_MASK 0x4000000
#define CP_DMA_PFP_COMMAND__SAS__SHIFT 0x1a
#define CP_DMA_PFP_COMMAND__DAS_MASK 0x8000000
#define CP_DMA_PFP_COMMAND__DAS__SHIFT 0x1b
#define CP_DMA_PFP_COMMAND__SAIC_MASK 0x10000000
#define CP_DMA_PFP_COMMAND__SAIC__SHIFT 0x1c
#define CP_DMA_PFP_COMMAND__DAIC_MASK 0x20000000
#define CP_DMA_PFP_COMMAND__DAIC__SHIFT 0x1d
#define CP_DMA_PFP_COMMAND__RAW_WAIT_MASK 0x40000000
#define CP_DMA_PFP_COMMAND__RAW_WAIT__SHIFT 0x1e
#define CP_DMA_CNTL__MIN_AVAILSZ_MASK 0x30
#define CP_DMA_CNTL__MIN_AVAILSZ__SHIFT 0x4
#define CP_DMA_CNTL__BUFFER_DEPTH_MASK 0xf0000
#define CP_DMA_CNTL__BUFFER_DEPTH__SHIFT 0x10
#define CP_DMA_CNTL__PIO_FIFO_EMPTY_MASK 0x10000000
#define CP_DMA_CNTL__PIO_FIFO_EMPTY__SHIFT 0x1c
#define CP_DMA_CNTL__PIO_FIFO_FULL_MASK 0x20000000
#define CP_DMA_CNTL__PIO_FIFO_FULL__SHIFT 0x1d
#define CP_DMA_CNTL__PIO_COUNT_MASK 0xc0000000
#define CP_DMA_CNTL__PIO_COUNT__SHIFT 0x1e
#define CP_DMA_READ_TAGS__DMA_READ_TAG_MASK 0x3ffffff
#define CP_DMA_READ_TAGS__DMA_READ_TAG__SHIFT 0x0
#define CP_DMA_READ_TAGS__DMA_READ_TAG_VALID_MASK 0x10000000
#define CP_DMA_READ_TAGS__DMA_READ_TAG_VALID__SHIFT 0x1c
#define CP_PFP_IB_CONTROL__IB_EN_MASK 0xff
#define CP_PFP_IB_CONTROL__IB_EN__SHIFT 0x0
#define CP_PFP_LOAD_CONTROL__CONFIG_REG_EN_MASK 0x1
#define CP_PFP_LOAD_CONTROL__CONFIG_REG_EN__SHIFT 0x0
#define CP_PFP_LOAD_CONTROL__CNTX_REG_EN_MASK 0x2
#define CP_PFP_LOAD_CONTROL__CNTX_REG_EN__SHIFT 0x1
#define CP_PFP_LOAD_CONTROL__UCONFIG_REG_EN_MASK 0x8000
#define CP_PFP_LOAD_CONTROL__UCONFIG_REG_EN__SHIFT 0xf
#define CP_PFP_LOAD_CONTROL__SH_GFX_REG_EN_MASK 0x10000
#define CP_PFP_LOAD_CONTROL__SH_GFX_REG_EN__SHIFT 0x10
#define CP_PFP_LOAD_CONTROL__SH_CS_REG_EN_MASK 0x1000000
#define CP_PFP_LOAD_CONTROL__SH_CS_REG_EN__SHIFT 0x18
#define CP_SCRATCH_INDEX__SCRATCH_INDEX_MASK 0xff
#define CP_SCRATCH_INDEX__SCRATCH_INDEX__SHIFT 0x0
#define CP_SCRATCH_DATA__SCRATCH_DATA_MASK 0xffffffff
#define CP_SCRATCH_DATA__SCRATCH_DATA__SHIFT 0x0
#define CP_RB_OFFSET__RB_OFFSET_MASK 0xfffff
#define CP_RB_OFFSET__RB_OFFSET__SHIFT 0x0
#define CP_IB1_OFFSET__IB1_OFFSET_MASK 0xfffff
#define CP_IB1_OFFSET__IB1_OFFSET__SHIFT 0x0
#define CP_IB2_OFFSET__IB2_OFFSET_MASK 0xfffff
#define CP_IB2_OFFSET__IB2_OFFSET__SHIFT 0x0
#define CP_IB1_PREAMBLE_BEGIN__IB1_PREAMBLE_BEGIN_MASK 0xfffff
#define CP_IB1_PREAMBLE_BEGIN__IB1_PREAMBLE_BEGIN__SHIFT 0x0
#define CP_IB1_PREAMBLE_END__IB1_PREAMBLE_END_MASK 0xfffff
#define CP_IB1_PREAMBLE_END__IB1_PREAMBLE_END__SHIFT 0x0
#define CP_IB2_PREAMBLE_BEGIN__IB2_PREAMBLE_BEGIN_MASK 0xfffff
#define CP_IB2_PREAMBLE_BEGIN__IB2_PREAMBLE_BEGIN__SHIFT 0x0
#define CP_IB2_PREAMBLE_END__IB2_PREAMBLE_END_MASK 0xfffff
#define CP_IB2_PREAMBLE_END__IB2_PREAMBLE_END__SHIFT 0x0
#define CP_CE_IB1_OFFSET__IB1_OFFSET_MASK 0xfffff
#define CP_CE_IB1_OFFSET__IB1_OFFSET__SHIFT 0x0
#define CP_CE_IB2_OFFSET__IB2_OFFSET_MASK 0xfffff
#define CP_CE_IB2_OFFSET__IB2_OFFSET__SHIFT 0x0
#define CP_CE_COUNTER__CONST_ENGINE_COUNT_MASK 0xffffffff
#define CP_CE_COUNTER__CONST_ENGINE_COUNT__SHIFT 0x0
#define CP_STALLED_STAT1__RBIU_TO_DMA_NOT_RDY_TO_RCV_MASK 0x1
#define CP_STALLED_STAT1__RBIU_TO_DMA_NOT_RDY_TO_RCV__SHIFT 0x0
#define CP_STALLED_STAT1__RBIU_TO_SEM_NOT_RDY_TO_RCV_MASK 0x4
#define CP_STALLED_STAT1__RBIU_TO_SEM_NOT_RDY_TO_RCV__SHIFT 0x2
#define CP_STALLED_STAT1__RBIU_TO_MEMWR_NOT_RDY_TO_RCV_MASK 0x10
#define CP_STALLED_STAT1__RBIU_TO_MEMWR_NOT_RDY_TO_RCV__SHIFT 0x4
#define CP_STALLED_STAT1__ME_HAS_ACTIVE_CE_BUFFER_FLAG_MASK 0x400
#define CP_STALLED_STAT1__ME_HAS_ACTIVE_CE_BUFFER_FLAG__SHIFT 0xa
#define CP_STALLED_STAT1__ME_HAS_ACTIVE_DE_BUFFER_FLAG_MASK 0x800
#define CP_STALLED_STAT1__ME_HAS_ACTIVE_DE_BUFFER_FLAG__SHIFT 0xb
#define CP_STALLED_STAT1__ME_STALLED_ON_TC_WR_CONFIRM_MASK 0x1000
#define CP_STALLED_STAT1__ME_STALLED_ON_TC_WR_CONFIRM__SHIFT 0xc
#define CP_STALLED_STAT1__ME_STALLED_ON_ATOMIC_RTN_DATA_MASK 0x2000
#define CP_STALLED_STAT1__ME_STALLED_ON_ATOMIC_RTN_DATA__SHIFT 0xd
#define CP_STALLED_STAT1__ME_WAITING_ON_MC_READ_DATA_MASK 0x4000
#define CP_STALLED_STAT1__ME_WAITING_ON_MC_READ_DATA__SHIFT 0xe
#define CP_STALLED_STAT1__ME_WAITING_ON_REG_READ_DATA_MASK 0x8000
#define CP_STALLED_STAT1__ME_WAITING_ON_REG_READ_DATA__SHIFT 0xf
#define CP_STALLED_STAT1__MIU_WAITING_ON_RDREQ_FREE_MASK 0x10000
#define CP_STALLED_STAT1__MIU_WAITING_ON_RDREQ_FREE__SHIFT 0x10
#define CP_STALLED_STAT1__MIU_WAITING_ON_WRREQ_FREE_MASK 0x20000
#define CP_STALLED_STAT1__MIU_WAITING_ON_WRREQ_FREE__SHIFT 0x11
#define CP_STALLED_STAT1__RCIU_WAITING_ON_GDS_FREE_MASK 0x800000
#define CP_STALLED_STAT1__RCIU_WAITING_ON_GDS_FREE__SHIFT 0x17
#define CP_STALLED_STAT1__RCIU_WAITING_ON_GRBM_FREE_MASK 0x1000000
#define CP_STALLED_STAT1__RCIU_WAITING_ON_GRBM_FREE__SHIFT 0x18
#define CP_STALLED_STAT1__RCIU_WAITING_ON_VGT_FREE_MASK 0x2000000
#define CP_STALLED_STAT1__RCIU_WAITING_ON_VGT_FREE__SHIFT 0x19
#define CP_STALLED_STAT1__RCIU_STALLED_ON_ME_READ_MASK 0x4000000
#define CP_STALLED_STAT1__RCIU_STALLED_ON_ME_READ__SHIFT 0x1a
#define CP_STALLED_STAT1__RCIU_STALLED_ON_DMA_READ_MASK 0x8000000
#define CP_STALLED_STAT1__RCIU_STALLED_ON_DMA_READ__SHIFT 0x1b
#define CP_STALLED_STAT1__RCIU_STALLED_ON_APPEND_READ_MASK 0x10000000
#define CP_STALLED_STAT1__RCIU_STALLED_ON_APPEND_READ__SHIFT 0x1c
#define CP_STALLED_STAT1__RCIU_HALTED_BY_REG_VIOLATION_MASK 0x20000000
#define CP_STALLED_STAT1__RCIU_HALTED_BY_REG_VIOLATION__SHIFT 0x1d
#define CP_STALLED_STAT2__PFP_TO_CSF_NOT_RDY_TO_RCV_MASK 0x1
#define CP_STALLED_STAT2__PFP_TO_CSF_NOT_RDY_TO_RCV__SHIFT 0x0
#define CP_STALLED_STAT2__PFP_TO_MEQ_NOT_RDY_TO_RCV_MASK 0x2
#define CP_STALLED_STAT2__PFP_TO_MEQ_NOT_RDY_TO_RCV__SHIFT 0x1
#define CP_STALLED_STAT2__PFP_TO_RCIU_NOT_RDY_TO_RCV_MASK 0x4
#define CP_STALLED_STAT2__PFP_TO_RCIU_NOT_RDY_TO_RCV__SHIFT 0x2
#define CP_STALLED_STAT2__PFP_TO_VGT_WRITES_PENDING_MASK 0x10
#define CP_STALLED_STAT2__PFP_TO_VGT_WRITES_PENDING__SHIFT 0x4
#define CP_STALLED_STAT2__PFP_RCIU_READ_PENDING_MASK 0x20
#define CP_STALLED_STAT2__PFP_RCIU_READ_PENDING__SHIFT 0x5
#define CP_STALLED_STAT2__PFP_MIU_READ_PENDING_MASK 0x40
#define CP_STALLED_STAT2__PFP_MIU_READ_PENDING__SHIFT 0x6
#define CP_STALLED_STAT2__PFP_TO_MIU_WRITE_NOT_RDY_TO_RCV_MASK 0x80
#define CP_STALLED_STAT2__PFP_TO_MIU_WRITE_NOT_RDY_TO_RCV__SHIFT 0x7
#define CP_STALLED_STAT2__PFP_WAITING_ON_BUFFER_DATA_MASK 0x100
#define CP_STALLED_STAT2__PFP_WAITING_ON_BUFFER_DATA__SHIFT 0x8
#define CP_STALLED_STAT2__ME_WAIT_ON_CE_COUNTER_MASK 0x200
#define CP_STALLED_STAT2__ME_WAIT_ON_CE_COUNTER__SHIFT 0x9
#define CP_STALLED_STAT2__ME_WAIT_ON_AVAIL_BUFFER_MASK 0x400
#define CP_STALLED_STAT2__ME_WAIT_ON_AVAIL_BUFFER__SHIFT 0xa
#define CP_STALLED_STAT2__GFX_CNTX_NOT_AVAIL_TO_ME_MASK 0x800
#define CP_STALLED_STAT2__GFX_CNTX_NOT_AVAIL_TO_ME__SHIFT 0xb
#define CP_STALLED_STAT2__ME_RCIU_NOT_RDY_TO_RCV_MASK 0x1000
#define CP_STALLED_STAT2__ME_RCIU_NOT_RDY_TO_RCV__SHIFT 0xc
#define CP_STALLED_STAT2__ME_TO_CONST_NOT_RDY_TO_RCV_MASK 0x2000
#define CP_STALLED_STAT2__ME_TO_CONST_NOT_RDY_TO_RCV__SHIFT 0xd
#define CP_STALLED_STAT2__ME_WAITING_DATA_FROM_PFP_MASK 0x4000
#define CP_STALLED_STAT2__ME_WAITING_DATA_FROM_PFP__SHIFT 0xe
#define CP_STALLED_STAT2__ME_WAITING_ON_PARTIAL_FLUSH_MASK 0x8000
#define CP_STALLED_STAT2__ME_WAITING_ON_PARTIAL_FLUSH__SHIFT 0xf
#define CP_STALLED_STAT2__MEQ_TO_ME_NOT_RDY_TO_RCV_MASK 0x10000
#define CP_STALLED_STAT2__MEQ_TO_ME_NOT_RDY_TO_RCV__SHIFT 0x10
#define CP_STALLED_STAT2__STQ_TO_ME_NOT_RDY_TO_RCV_MASK 0x20000
#define CP_STALLED_STAT2__STQ_TO_ME_NOT_RDY_TO_RCV__SHIFT 0x11
#define CP_STALLED_STAT2__ME_WAITING_DATA_FROM_STQ_MASK 0x40000
#define CP_STALLED_STAT2__ME_WAITING_DATA_FROM_STQ__SHIFT 0x12
#define CP_STALLED_STAT2__PFP_STALLED_ON_TC_WR_CONFIRM_MASK 0x80000
#define CP_STALLED_STAT2__PFP_STALLED_ON_TC_WR_CONFIRM__SHIFT 0x13
#define CP_STALLED_STAT2__PFP_STALLED_ON_ATOMIC_RTN_DATA_MASK 0x100000
#define CP_STALLED_STAT2__PFP_STALLED_ON_ATOMIC_RTN_DATA__SHIFT 0x14
#define CP_STALLED_STAT2__EOPD_FIFO_NEEDS_SC_EOP_DONE_MASK 0x200000
#define CP_STALLED_STAT2__EOPD_FIFO_NEEDS_SC_EOP_DONE__SHIFT 0x15
#define CP_STALLED_STAT2__EOPD_FIFO_NEEDS_WR_CONFIRM_MASK 0x400000
#define CP_STALLED_STAT2__EOPD_FIFO_NEEDS_WR_CONFIRM__SHIFT 0x16
#define CP_STALLED_STAT2__STRMO_WR_OF_PRIM_DATA_PENDING_MASK 0x800000
#define CP_STALLED_STAT2__STRMO_WR_OF_PRIM_DATA_PENDING__SHIFT 0x17
#define CP_STALLED_STAT2__PIPE_STATS_WR_DATA_PENDING_MASK 0x1000000
#define CP_STALLED_STAT2__PIPE_STATS_WR_DATA_PENDING__SHIFT 0x18
#define CP_STALLED_STAT2__APPEND_RDY_WAIT_ON_CS_DONE_MASK 0x2000000
#define CP_STALLED_STAT2__APPEND_RDY_WAIT_ON_CS_DONE__SHIFT 0x19
#define CP_STALLED_STAT2__APPEND_RDY_WAIT_ON_PS_DONE_MASK 0x4000000
#define CP_STALLED_STAT2__APPEND_RDY_WAIT_ON_PS_DONE__SHIFT 0x1a
#define CP_STALLED_STAT2__APPEND_WAIT_ON_WR_CONFIRM_MASK 0x8000000
#define CP_STALLED_STAT2__APPEND_WAIT_ON_WR_CONFIRM__SHIFT 0x1b
#define CP_STALLED_STAT2__APPEND_ACTIVE_PARTITION_MASK 0x10000000
#define CP_STALLED_STAT2__APPEND_ACTIVE_PARTITION__SHIFT 0x1c
#define CP_STALLED_STAT2__APPEND_WAITING_TO_SEND_MEMWRITE_MASK 0x20000000
#define CP_STALLED_STAT2__APPEND_WAITING_TO_SEND_MEMWRITE__SHIFT 0x1d
#define CP_STALLED_STAT2__SURF_SYNC_NEEDS_IDLE_CNTXS_MASK 0x40000000
#define CP_STALLED_STAT2__SURF_SYNC_NEEDS_IDLE_CNTXS__SHIFT 0x1e
#define CP_STALLED_STAT2__SURF_SYNC_NEEDS_ALL_CLEAN_MASK 0x80000000
#define CP_STALLED_STAT2__SURF_SYNC_NEEDS_ALL_CLEAN__SHIFT 0x1f
#define CP_STALLED_STAT3__CE_TO_CSF_NOT_RDY_TO_RCV_MASK 0x1
#define CP_STALLED_STAT3__CE_TO_CSF_NOT_RDY_TO_RCV__SHIFT 0x0
#define CP_STALLED_STAT3__CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV_MASK 0x2
#define CP_STALLED_STAT3__CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV__SHIFT 0x1
#define CP_STALLED_STAT3__CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER_MASK 0x4
#define CP_STALLED_STAT3__CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER__SHIFT 0x2
#define CP_STALLED_STAT3__CE_TO_RAM_INIT_NOT_RDY_MASK 0x8
#define CP_STALLED_STAT3__CE_TO_RAM_INIT_NOT_RDY__SHIFT 0x3
#define CP_STALLED_STAT3__CE_TO_RAM_DUMP_NOT_RDY_MASK 0x10
#define CP_STALLED_STAT3__CE_TO_RAM_DUMP_NOT_RDY__SHIFT 0x4
#define CP_STALLED_STAT3__CE_TO_RAM_WRITE_NOT_RDY_MASK 0x20
#define CP_STALLED_STAT3__CE_TO_RAM_WRITE_NOT_RDY__SHIFT 0x5
#define CP_STALLED_STAT3__CE_TO_INC_FIFO_NOT_RDY_TO_RCV_MASK 0x40
#define CP_STALLED_STAT3__CE_TO_INC_FIFO_NOT_RDY_TO_RCV__SHIFT 0x6
#define CP_STALLED_STAT3__CE_TO_WR_FIFO_NOT_RDY_TO_RCV_MASK 0x80
#define CP_STALLED_STAT3__CE_TO_WR_FIFO_NOT_RDY_TO_RCV__SHIFT 0x7
#define CP_STALLED_STAT3__CE_TO_MIU_WRITE_NOT_RDY_TO_RCV_MASK 0x100
#define CP_STALLED_STAT3__CE_TO_MIU_WRITE_NOT_RDY_TO_RCV__SHIFT 0x8
#define CP_STALLED_STAT3__CE_WAITING_ON_BUFFER_DATA_MASK 0x400
#define CP_STALLED_STAT3__CE_WAITING_ON_BUFFER_DATA__SHIFT 0xa
#define CP_STALLED_STAT3__CE_WAITING_ON_CE_BUFFER_FLAG_MASK 0x800
#define CP_STALLED_STAT3__CE_WAITING_ON_CE_BUFFER_FLAG__SHIFT 0xb
#define CP_STALLED_STAT3__CE_WAITING_ON_DE_COUNTER_MASK 0x1000
#define CP_STALLED_STAT3__CE_WAITING_ON_DE_COUNTER__SHIFT 0xc
#define CP_STALLED_STAT3__CE_WAITING_ON_DE_COUNTER_UNDERFLOW_MASK 0x2000
#define CP_STALLED_STAT3__CE_WAITING_ON_DE_COUNTER_UNDERFLOW__SHIFT 0xd
#define CP_STALLED_STAT3__TCIU_WAITING_ON_FREE_MASK 0x4000
#define CP_STALLED_STAT3__TCIU_WAITING_ON_FREE__SHIFT 0xe
#define CP_STALLED_STAT3__TCIU_WAITING_ON_TAGS_MASK 0x8000
#define CP_STALLED_STAT3__TCIU_WAITING_ON_TAGS__SHIFT 0xf
#define CP_BUSY_STAT__REG_BUS_FIFO_BUSY_MASK 0x1
#define CP_BUSY_STAT__REG_BUS_FIFO_BUSY__SHIFT 0x0
#define CP_BUSY_STAT__COHER_CNT_NEQ_ZERO_MASK 0x40
#define CP_BUSY_STAT__COHER_CNT_NEQ_ZERO__SHIFT 0x6
#define CP_BUSY_STAT__PFP_PARSING_PACKETS_MASK 0x80
#define CP_BUSY_STAT__PFP_PARSING_PACKETS__SHIFT 0x7
#define CP_BUSY_STAT__ME_PARSING_PACKETS_MASK 0x100
#define CP_BUSY_STAT__ME_PARSING_PACKETS__SHIFT 0x8
#define CP_BUSY_STAT__RCIU_PFP_BUSY_MASK 0x200
#define CP_BUSY_STAT__RCIU_PFP_BUSY__SHIFT 0x9
#define CP_BUSY_STAT__RCIU_ME_BUSY_MASK 0x400
#define CP_BUSY_STAT__RCIU_ME_BUSY__SHIFT 0xa
#define CP_BUSY_STAT__SEM_CMDFIFO_NOT_EMPTY_MASK 0x1000
#define CP_BUSY_STAT__SEM_CMDFIFO_NOT_EMPTY__SHIFT 0xc
#define CP_BUSY_STAT__SEM_FAILED_AND_HOLDING_MASK 0x2000
#define CP_BUSY_STAT__SEM_FAILED_AND_HOLDING__SHIFT 0xd
#define CP_BUSY_STAT__SEM_POLLING_FOR_PASS_MASK 0x4000
#define CP_BUSY_STAT__SEM_POLLING_FOR_PASS__SHIFT 0xe
#define CP_BUSY_STAT__GFX_CONTEXT_BUSY_MASK 0x8000
#define CP_BUSY_STAT__GFX_CONTEXT_BUSY__SHIFT 0xf
#define CP_BUSY_STAT__ME_PARSER_BUSY_MASK 0x20000
#define CP_BUSY_STAT__ME_PARSER_BUSY__SHIFT 0x11
#define CP_BUSY_STAT__EOP_DONE_BUSY_MASK 0x40000
#define CP_BUSY_STAT__EOP_DONE_BUSY__SHIFT 0x12
#define CP_BUSY_STAT__STRM_OUT_BUSY_MASK 0x80000
#define CP_BUSY_STAT__STRM_OUT_BUSY__SHIFT 0x13
#define CP_BUSY_STAT__PIPE_STATS_BUSY_MASK 0x100000
#define CP_BUSY_STAT__PIPE_STATS_BUSY__SHIFT 0x14
#define CP_BUSY_STAT__RCIU_CE_BUSY_MASK 0x200000
#define CP_BUSY_STAT__RCIU_CE_BUSY__SHIFT 0x15
#define CP_BUSY_STAT__CE_PARSING_PACKETS_MASK 0x400000
#define CP_BUSY_STAT__CE_PARSING_PACKETS__SHIFT 0x16
#define CP_STAT__MIU_RDREQ_BUSY_MASK 0x80
#define CP_STAT__MIU_RDREQ_BUSY__SHIFT 0x7
#define CP_STAT__MIU_WRREQ_BUSY_MASK 0x100
#define CP_STAT__MIU_WRREQ_BUSY__SHIFT 0x8
#define CP_STAT__ROQ_RING_BUSY_MASK 0x200
#define CP_STAT__ROQ_RING_BUSY__SHIFT 0x9
#define CP_STAT__ROQ_INDIRECT1_BUSY_MASK 0x400
#define CP_STAT__ROQ_INDIRECT1_BUSY__SHIFT 0xa
#define CP_STAT__ROQ_INDIRECT2_BUSY_MASK 0x800
#define CP_STAT__ROQ_INDIRECT2_BUSY__SHIFT 0xb
#define CP_STAT__ROQ_STATE_BUSY_MASK 0x1000
#define CP_STAT__ROQ_STATE_BUSY__SHIFT 0xc
#define CP_STAT__DC_BUSY_MASK 0x2000
#define CP_STAT__DC_BUSY__SHIFT 0xd
#define CP_STAT__PFP_BUSY_MASK 0x8000
#define CP_STAT__PFP_BUSY__SHIFT 0xf
#define CP_STAT__MEQ_BUSY_MASK 0x10000
#define CP_STAT__MEQ_BUSY__SHIFT 0x10
#define CP_STAT__ME_BUSY_MASK 0x20000
#define CP_STAT__ME_BUSY__SHIFT 0x11
#define CP_STAT__QUERY_BUSY_MASK 0x40000
#define CP_STAT__QUERY_BUSY__SHIFT 0x12
#define CP_STAT__SEMAPHORE_BUSY_MASK 0x80000
#define CP_STAT__SEMAPHORE_BUSY__SHIFT 0x13
#define CP_STAT__INTERRUPT_BUSY_MASK 0x100000
#define CP_STAT__INTERRUPT_BUSY__SHIFT 0x14
#define CP_STAT__SURFACE_SYNC_BUSY_MASK 0x200000
#define CP_STAT__SURFACE_SYNC_BUSY__SHIFT 0x15
#define CP_STAT__DMA_BUSY_MASK 0x400000
#define CP_STAT__DMA_BUSY__SHIFT 0x16
#define CP_STAT__RCIU_BUSY_MASK 0x800000
#define CP_STAT__RCIU_BUSY__SHIFT 0x17
#define CP_STAT__SCRATCH_RAM_BUSY_MASK 0x1000000
#define CP_STAT__SCRATCH_RAM_BUSY__SHIFT 0x18
#define CP_STAT__CPC_CPG_BUSY_MASK 0x2000000
#define CP_STAT__CPC_CPG_BUSY__SHIFT 0x19
#define CP_STAT__CE_BUSY_MASK 0x4000000
#define CP_STAT__CE_BUSY__SHIFT 0x1a
#define CP_STAT__TCIU_BUSY_MASK 0x8000000
#define CP_STAT__TCIU_BUSY__SHIFT 0x1b
#define CP_STAT__ROQ_CE_RING_BUSY_MASK 0x10000000
#define CP_STAT__ROQ_CE_RING_BUSY__SHIFT 0x1c
#define CP_STAT__ROQ_CE_INDIRECT1_BUSY_MASK 0x20000000
#define CP_STAT__ROQ_CE_INDIRECT1_BUSY__SHIFT 0x1d
#define CP_STAT__ROQ_CE_INDIRECT2_BUSY_MASK 0x40000000
#define CP_STAT__ROQ_CE_INDIRECT2_BUSY__SHIFT 0x1e
#define CP_STAT__CP_BUSY_MASK 0x80000000
#define CP_STAT__CP_BUSY__SHIFT 0x1f
#define CP_ME_HEADER_DUMP__ME_HEADER_DUMP_MASK 0xffffffff
#define CP_ME_HEADER_DUMP__ME_HEADER_DUMP__SHIFT 0x0
#define CP_PFP_HEADER_DUMP__PFP_HEADER_DUMP_MASK 0xffffffff
#define CP_PFP_HEADER_DUMP__PFP_HEADER_DUMP__SHIFT 0x0
#define CP_GRBM_FREE_COUNT__FREE_COUNT_MASK 0x3f
#define CP_GRBM_FREE_COUNT__FREE_COUNT__SHIFT 0x0
#define CP_GRBM_FREE_COUNT__FREE_COUNT_GDS_MASK 0x3f00
#define CP_GRBM_FREE_COUNT__FREE_COUNT_GDS__SHIFT 0x8
#define CP_GRBM_FREE_COUNT__FREE_COUNT_PFP_MASK 0x3f0000
#define CP_GRBM_FREE_COUNT__FREE_COUNT_PFP__SHIFT 0x10
#define CP_CE_HEADER_DUMP__CE_HEADER_DUMP_MASK 0xffffffff
#define CP_CE_HEADER_DUMP__CE_HEADER_DUMP__SHIFT 0x0
#define CP_MC_PACK_DELAY_CNT__PACK_DELAY_CNT_MASK 0x1f
#define CP_MC_PACK_DELAY_CNT__PACK_DELAY_CNT__SHIFT 0x0
#define CP_MC_TAG_CNTL__TAG_RAM_INDEX_MASK 0x3f
#define CP_MC_TAG_CNTL__TAG_RAM_INDEX__SHIFT 0x0
#define CP_MC_TAG_CNTL__TAG_RAM_SEL_MASK 0x30000
#define CP_MC_TAG_CNTL__TAG_RAM_SEL__SHIFT 0x10
#define CP_MC_TAG_DATA__TAG_RAM_DATA_MASK 0xffffffff
#define CP_MC_TAG_DATA__TAG_RAM_DATA__SHIFT 0x0
#define CP_CSF_STAT__BUFFER_SLOTS_ALLOCATED_MASK 0xf
#define CP_CSF_STAT__BUFFER_SLOTS_ALLOCATED__SHIFT 0x0
#define CP_CSF_STAT__BUFFER_REQUEST_COUNT_MASK 0x3f00
#define CP_CSF_STAT__BUFFER_REQUEST_COUNT__SHIFT 0x8
#define CP_CSF_CNTL__FETCH_BUFFER_DEPTH_MASK 0xf
#define CP_CSF_CNTL__FETCH_BUFFER_DEPTH__SHIFT 0x0
#define CP_ME_CNTL__CE_INVALIDATE_ICACHE_MASK 0x10
#define CP_ME_CNTL__CE_INVALIDATE_ICACHE__SHIFT 0x4
#define CP_ME_CNTL__PFP_INVALIDATE_ICACHE_MASK 0x40
#define CP_ME_CNTL__PFP_INVALIDATE_ICACHE__SHIFT 0x6
#define CP_ME_CNTL__ME_INVALIDATE_ICACHE_MASK 0x100
#define CP_ME_CNTL__ME_INVALIDATE_ICACHE__SHIFT 0x8
#define CP_ME_CNTL__CE_HALT_MASK 0x1000000
#define CP_ME_CNTL__CE_HALT__SHIFT 0x18
#define CP_ME_CNTL__CE_STEP_MASK 0x2000000
#define CP_ME_CNTL__CE_STEP__SHIFT 0x19
#define CP_ME_CNTL__PFP_HALT_MASK 0x4000000
#define CP_ME_CNTL__PFP_HALT__SHIFT 0x1a
#define CP_ME_CNTL__PFP_STEP_MASK 0x8000000
#define CP_ME_CNTL__PFP_STEP__SHIFT 0x1b
#define CP_ME_CNTL__ME_HALT_MASK 0x10000000
#define CP_ME_CNTL__ME_HALT__SHIFT 0x1c
#define CP_ME_CNTL__ME_STEP_MASK 0x20000000
#define CP_ME_CNTL__ME_STEP__SHIFT 0x1d
#define CP_CNTX_STAT__ACTIVE_HP3D_CONTEXTS_MASK 0xff
#define CP_CNTX_STAT__ACTIVE_HP3D_CONTEXTS__SHIFT 0x0
#define CP_CNTX_STAT__CURRENT_HP3D_CONTEXT_MASK 0x700
#define CP_CNTX_STAT__CURRENT_HP3D_CONTEXT__SHIFT 0x8
#define CP_CNTX_STAT__ACTIVE_GFX_CONTEXTS_MASK 0xff00000
#define CP_CNTX_STAT__ACTIVE_GFX_CONTEXTS__SHIFT 0x14
#define CP_CNTX_STAT__CURRENT_GFX_CONTEXT_MASK 0x70000000
#define CP_CNTX_STAT__CURRENT_GFX_CONTEXT__SHIFT 0x1c
#define CP_ME_PREEMPTION__ME_CNTXSW_PREEMPTION_MASK 0x1
#define CP_ME_PREEMPTION__ME_CNTXSW_PREEMPTION__SHIFT 0x0
#define CP_RB0_RPTR__RB_RPTR_MASK 0xfffff
#define CP_RB0_RPTR__RB_RPTR__SHIFT 0x0
#define CP_RB_RPTR__RB_RPTR_MASK 0xfffff
#define CP_RB_RPTR__RB_RPTR__SHIFT 0x0
#define CP_RB1_RPTR__RB_RPTR_MASK 0xfffff
#define CP_RB1_RPTR__RB_RPTR__SHIFT 0x0
#define CP_RB2_RPTR__RB_RPTR_MASK 0xfffff
#define CP_RB2_RPTR__RB_RPTR__SHIFT 0x0
#define CP_RB_WPTR_DELAY__PRE_WRITE_TIMER_MASK 0xfffffff
#define CP_RB_WPTR_DELAY__PRE_WRITE_TIMER__SHIFT 0x0
#define CP_RB_WPTR_DELAY__PRE_WRITE_LIMIT_MASK 0xf0000000
#define CP_RB_WPTR_DELAY__PRE_WRITE_LIMIT__SHIFT 0x1c
#define CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK 0xffff
#define CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT 0x0
#define CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK 0xffff0000
#define CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT 0x10
#define CP_CE_INIT_BASE_LO__INIT_BASE_LO_MASK 0xffffffe0
#define CP_CE_INIT_BASE_LO__INIT_BASE_LO__SHIFT 0x5
#define CP_CE_INIT_BASE_HI__INIT_BASE_HI_MASK 0xffff
#define CP_CE_INIT_BASE_HI__INIT_BASE_HI__SHIFT 0x0
#define CP_CE_INIT_BUFSZ__INIT_BUFSZ_MASK 0xfff
#define CP_CE_INIT_BUFSZ__INIT_BUFSZ__SHIFT 0x0
#define CP_CE_IB1_BASE_LO__IB1_BASE_LO_MASK 0xfffffffc
#define CP_CE_IB1_BASE_LO__IB1_BASE_LO__SHIFT 0x2
#define CP_CE_IB1_BASE_HI__IB1_BASE_HI_MASK 0xffff
#define CP_CE_IB1_BASE_HI__IB1_BASE_HI__SHIFT 0x0
#define CP_CE_IB1_BUFSZ__IB1_BUFSZ_MASK 0xfffff
#define CP_CE_IB1_BUFSZ__IB1_BUFSZ__SHIFT 0x0
#define CP_CE_IB2_BASE_LO__IB2_BASE_LO_MASK 0xfffffffc
#define CP_CE_IB2_BASE_LO__IB2_BASE_LO__SHIFT 0x2
#define CP_CE_IB2_BASE_HI__IB2_BASE_HI_MASK 0xffff
#define CP_CE_IB2_BASE_HI__IB2_BASE_HI__SHIFT 0x0
#define CP_CE_IB2_BUFSZ__IB2_BUFSZ_MASK 0xfffff
#define CP_CE_IB2_BUFSZ__IB2_BUFSZ__SHIFT 0x0
#define CP_IB1_BASE_LO__IB1_BASE_LO_MASK 0xfffffffc
#define CP_IB1_BASE_LO__IB1_BASE_LO__SHIFT 0x2
#define CP_IB1_BASE_HI__IB1_BASE_HI_MASK 0xffff
#define CP_IB1_BASE_HI__IB1_BASE_HI__SHIFT 0x0
#define CP_IB1_BUFSZ__IB1_BUFSZ_MASK 0xfffff
#define CP_IB1_BUFSZ__IB1_BUFSZ__SHIFT 0x0
#define CP_IB2_BASE_LO__IB2_BASE_LO_MASK 0xfffffffc
#define CP_IB2_BASE_LO__IB2_BASE_LO__SHIFT 0x2
#define CP_IB2_BASE_HI__IB2_BASE_HI_MASK 0xffff
#define CP_IB2_BASE_HI__IB2_BASE_HI__SHIFT 0x0
#define CP_IB2_BUFSZ__IB2_BUFSZ_MASK 0xfffff
#define CP_IB2_BUFSZ__IB2_BUFSZ__SHIFT 0x0
#define CP_ST_BASE_LO__ST_BASE_LO_MASK 0xfffffffc
#define CP_ST_BASE_LO__ST_BASE_LO__SHIFT 0x2
#define CP_ST_BASE_HI__ST_BASE_HI_MASK 0xffff
#define CP_ST_BASE_HI__ST_BASE_HI__SHIFT 0x0
#define CP_ST_BUFSZ__ST_BUFSZ_MASK 0xfffff
#define CP_ST_BUFSZ__ST_BUFSZ__SHIFT 0x0
#define CP_ROQ_THRESHOLDS__IB1_START_MASK 0xff
#define CP_ROQ_THRESHOLDS__IB1_START__SHIFT 0x0
#define CP_ROQ_THRESHOLDS__IB2_START_MASK 0xff00
#define CP_ROQ_THRESHOLDS__IB2_START__SHIFT 0x8
#define CP_MEQ_STQ_THRESHOLD__STQ_START_MASK 0xff
#define CP_MEQ_STQ_THRESHOLD__STQ_START__SHIFT 0x0
#define CP_ROQ1_THRESHOLDS__RB1_START_MASK 0xff
#define CP_ROQ1_THRESHOLDS__RB1_START__SHIFT 0x0
#define CP_ROQ1_THRESHOLDS__RB2_START_MASK 0xff00
#define CP_ROQ1_THRESHOLDS__RB2_START__SHIFT 0x8
#define CP_ROQ1_THRESHOLDS__R0_IB1_START_MASK 0xff0000
#define CP_ROQ1_THRESHOLDS__R0_IB1_START__SHIFT 0x10
#define CP_ROQ1_THRESHOLDS__R1_IB1_START_MASK 0xff000000
#define CP_ROQ1_THRESHOLDS__R1_IB1_START__SHIFT 0x18
#define CP_ROQ2_THRESHOLDS__R2_IB1_START_MASK 0xff
#define CP_ROQ2_THRESHOLDS__R2_IB1_START__SHIFT 0x0
#define CP_ROQ2_THRESHOLDS__R0_IB2_START_MASK 0xff00
#define CP_ROQ2_THRESHOLDS__R0_IB2_START__SHIFT 0x8
#define CP_ROQ2_THRESHOLDS__R1_IB2_START_MASK 0xff0000
#define CP_ROQ2_THRESHOLDS__R1_IB2_START__SHIFT 0x10
#define CP_ROQ2_THRESHOLDS__R2_IB2_START_MASK 0xff000000
#define CP_ROQ2_THRESHOLDS__R2_IB2_START__SHIFT 0x18
#define CP_STQ_THRESHOLDS__STQ0_START_MASK 0xff
#define CP_STQ_THRESHOLDS__STQ0_START__SHIFT 0x0
#define CP_STQ_THRESHOLDS__STQ1_START_MASK 0xff00
#define CP_STQ_THRESHOLDS__STQ1_START__SHIFT 0x8
#define CP_STQ_THRESHOLDS__STQ2_START_MASK 0xff0000
#define CP_STQ_THRESHOLDS__STQ2_START__SHIFT 0x10
#define CP_QUEUE_THRESHOLDS__ROQ_IB1_START_MASK 0x3f
#define CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT 0x0
#define CP_QUEUE_THRESHOLDS__ROQ_IB2_START_MASK 0x3f00
#define CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT 0x8
#define CP_MEQ_THRESHOLDS__MEQ1_START_MASK 0xff
#define CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT 0x0
#define CP_MEQ_THRESHOLDS__MEQ2_START_MASK 0xff00
#define CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT 0x8
#define CP_ROQ_AVAIL__ROQ_CNT_RING_MASK 0x7ff
#define CP_ROQ_AVAIL__ROQ_CNT_RING__SHIFT 0x0
#define CP_ROQ_AVAIL__ROQ_CNT_IB1_MASK 0x7ff0000
#define CP_ROQ_AVAIL__ROQ_CNT_IB1__SHIFT 0x10
#define CP_STQ_AVAIL__STQ_CNT_MASK 0x1ff
#define CP_STQ_AVAIL__STQ_CNT__SHIFT 0x0
#define CP_ROQ2_AVAIL__ROQ_CNT_IB2_MASK 0x7ff
#define CP_ROQ2_AVAIL__ROQ_CNT_IB2__SHIFT 0x0
#define CP_MEQ_AVAIL__MEQ_CNT_MASK 0x3ff
#define CP_MEQ_AVAIL__MEQ_CNT__SHIFT 0x0
#define CP_CMD_INDEX__CMD_INDEX_MASK 0x7ff
#define CP_CMD_INDEX__CMD_INDEX__SHIFT 0x0
#define CP_CMD_INDEX__CMD_ME_SEL_MASK 0x3000
#define CP_CMD_INDEX__CMD_ME_SEL__SHIFT 0xc
#define CP_CMD_INDEX__CMD_QUEUE_SEL_MASK 0x30000
#define CP_CMD_INDEX__CMD_QUEUE_SEL__SHIFT 0x10
#define CP_CMD_DATA__CMD_DATA_MASK 0xffffffff
#define CP_CMD_DATA__CMD_DATA__SHIFT 0x0
#define CP_ROQ_RB_STAT__ROQ_RPTR_PRIMARY_MASK 0x3ff
#define CP_ROQ_RB_STAT__ROQ_RPTR_PRIMARY__SHIFT 0x0
#define CP_ROQ_RB_STAT__ROQ_WPTR_PRIMARY_MASK 0x3ff0000
#define CP_ROQ_RB_STAT__ROQ_WPTR_PRIMARY__SHIFT 0x10
#define CP_ROQ_IB1_STAT__ROQ_RPTR_INDIRECT1_MASK 0x3ff
#define CP_ROQ_IB1_STAT__ROQ_RPTR_INDIRECT1__SHIFT 0x0
#define CP_ROQ_IB1_STAT__ROQ_WPTR_INDIRECT1_MASK 0x3ff0000
#define CP_ROQ_IB1_STAT__ROQ_WPTR_INDIRECT1__SHIFT 0x10
#define CP_ROQ_IB2_STAT__ROQ_RPTR_INDIRECT2_MASK 0x3ff
#define CP_ROQ_IB2_STAT__ROQ_RPTR_INDIRECT2__SHIFT 0x0
#define CP_ROQ_IB2_STAT__ROQ_WPTR_INDIRECT2_MASK 0x3ff0000
#define CP_ROQ_IB2_STAT__ROQ_WPTR_INDIRECT2__SHIFT 0x10
#define CP_STQ_STAT__STQ_RPTR_MASK 0x3ff
#define CP_STQ_STAT__STQ_RPTR__SHIFT 0x0
#define CP_STQ_WR_STAT__STQ_WPTR_MASK 0x3ff
#define CP_STQ_WR_STAT__STQ_WPTR__SHIFT 0x0
#define CP_MEQ_STAT__MEQ_RPTR_MASK 0x3ff
#define CP_MEQ_STAT__MEQ_RPTR__SHIFT 0x0
#define CP_MEQ_STAT__MEQ_WPTR_MASK 0x3ff0000
#define CP_MEQ_STAT__MEQ_WPTR__SHIFT 0x10
#define CP_CEQ1_AVAIL__CEQ_CNT_RING_MASK 0x7ff
#define CP_CEQ1_AVAIL__CEQ_CNT_RING__SHIFT 0x0
#define CP_CEQ1_AVAIL__CEQ_CNT_IB1_MASK 0x7ff0000
#define CP_CEQ1_AVAIL__CEQ_CNT_IB1__SHIFT 0x10
#define CP_CEQ2_AVAIL__CEQ_CNT_IB2_MASK 0x7ff
#define CP_CEQ2_AVAIL__CEQ_CNT_IB2__SHIFT 0x0
#define CP_CE_ROQ_RB_STAT__CEQ_RPTR_PRIMARY_MASK 0x3ff
#define CP_CE_ROQ_RB_STAT__CEQ_RPTR_PRIMARY__SHIFT 0x0
#define CP_CE_ROQ_RB_STAT__CEQ_WPTR_PRIMARY_MASK 0x3ff0000
#define CP_CE_ROQ_RB_STAT__CEQ_WPTR_PRIMARY__SHIFT 0x10
#define CP_CE_ROQ_IB1_STAT__CEQ_RPTR_INDIRECT1_MASK 0x3ff
#define CP_CE_ROQ_IB1_STAT__CEQ_RPTR_INDIRECT1__SHIFT 0x0
#define CP_CE_ROQ_IB1_STAT__CEQ_WPTR_INDIRECT1_MASK 0x3ff0000
#define CP_CE_ROQ_IB1_STAT__CEQ_WPTR_INDIRECT1__SHIFT 0x10
#define CP_CE_ROQ_IB2_STAT__CEQ_RPTR_INDIRECT2_MASK 0x3ff
#define CP_CE_ROQ_IB2_STAT__CEQ_RPTR_INDIRECT2__SHIFT 0x0
#define CP_CE_ROQ_IB2_STAT__CEQ_WPTR_INDIRECT2_MASK 0x3ff0000
#define CP_CE_ROQ_IB2_STAT__CEQ_WPTR_INDIRECT2__SHIFT 0x10
#define CP_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED_MASK 0x4000
#define CP_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED__SHIFT 0xe
#define CP_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED_MASK 0x20000
#define CP_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED__SHIFT 0x11
#define CP_INT_STAT_DEBUG__CNTX_BUSY_INT_ASSERTED_MASK 0x80000
#define CP_INT_STAT_DEBUG__CNTX_BUSY_INT_ASSERTED__SHIFT 0x13
#define CP_INT_STAT_DEBUG__CNTX_EMPTY_INT_ASSERTED_MASK 0x100000
#define CP_INT_STAT_DEBUG__CNTX_EMPTY_INT_ASSERTED__SHIFT 0x14
#define CP_INT_STAT_DEBUG__PRIV_INSTR_INT_ASSERTED_MASK 0x400000
#define CP_INT_STAT_DEBUG__PRIV_INSTR_INT_ASSERTED__SHIFT 0x16
#define CP_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED_MASK 0x800000
#define CP_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED__SHIFT 0x17
#define CP_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED_MASK 0x1000000
#define CP_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED__SHIFT 0x18
#define CP_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED_MASK 0x4000000
#define CP_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED__SHIFT 0x1a
#define CP_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED_MASK 0x8000000
#define CP_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED__SHIFT 0x1b
#define CP_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED_MASK 0x20000000
#define CP_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED__SHIFT 0x1d
#define CP_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED_MASK 0x40000000
#define CP_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED__SHIFT 0x1e
#define CP_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED_MASK 0x80000000
#define CP_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED__SHIFT 0x1f
#define CP_PERFMON_CNTL__PERFMON_STATE_MASK 0xf
#define CP_PERFMON_CNTL__PERFMON_STATE__SHIFT 0x0
#define CP_PERFMON_CNTL__SPM_PERFMON_STATE_MASK 0xf0
#define CP_PERFMON_CNTL__SPM_PERFMON_STATE__SHIFT 0x4
#define CP_PERFMON_CNTL__PERFMON_ENABLE_MODE_MASK 0x300
#define CP_PERFMON_CNTL__PERFMON_ENABLE_MODE__SHIFT 0x8
#define CP_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE_MASK 0x400
#define CP_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE__SHIFT 0xa
#define CP_PERFMON_CNTX_CNTL__PERFMON_ENABLE_MASK 0x80000000
#define CP_PERFMON_CNTX_CNTL__PERFMON_ENABLE__SHIFT 0x1f
#define CP_RINGID__RINGID_MASK 0x3
#define CP_RINGID__RINGID__SHIFT 0x0
#define CP_PIPEID__PIPE_ID_MASK 0x3
#define CP_PIPEID__PIPE_ID__SHIFT 0x0
#define CP_VMID__VMID_MASK 0xf
#define CP_VMID__VMID__SHIFT 0x0
#define CP_HPD_ROQ_OFFSETS__IQ_OFFSET_MASK 0x7
#define CP_HPD_ROQ_OFFSETS__IQ_OFFSET__SHIFT 0x0
#define CP_HPD_ROQ_OFFSETS__PQ_OFFSET_MASK 0x3f00
#define CP_HPD_ROQ_OFFSETS__PQ_OFFSET__SHIFT 0x8
#define CP_HPD_ROQ_OFFSETS__IB_OFFSET_MASK 0x3f0000
#define CP_HPD_ROQ_OFFSETS__IB_OFFSET__SHIFT 0x10
#define CP_HPD_EOP_BASE_ADDR__BASE_ADDR_MASK 0xffffffff
#define CP_HPD_EOP_BASE_ADDR__BASE_ADDR__SHIFT 0x0
#define CP_HPD_EOP_BASE_ADDR_HI__BASE_ADDR_HI_MASK 0xff
#define CP_HPD_EOP_BASE_ADDR_HI__BASE_ADDR_HI__SHIFT 0x0
#define CP_HPD_EOP_VMID__VMID_MASK 0xf
#define CP_HPD_EOP_VMID__VMID__SHIFT 0x0
#define CP_HPD_EOP_CONTROL__EOP_SIZE_MASK 0x3f
#define CP_HPD_EOP_CONTROL__EOP_SIZE__SHIFT 0x0
#define CP_HPD_EOP_CONTROL__PROCESSING_EOP_MASK 0x100
#define CP_HPD_EOP_CONTROL__PROCESSING_EOP__SHIFT 0x8
#define CP_HPD_EOP_CONTROL__PROCESSING_QID_MASK 0xe00
#define CP_HPD_EOP_CONTROL__PROCESSING_QID__SHIFT 0x9
#define CP_HPD_EOP_CONTROL__PROCESS_EOP_EN_MASK 0x1000
#define CP_HPD_EOP_CONTROL__PROCESS_EOP_EN__SHIFT 0xc
#define CP_HPD_EOP_CONTROL__PROCESSING_EOPIB_MASK 0x2000
#define CP_HPD_EOP_CONTROL__PROCESSING_EOPIB__SHIFT 0xd
#define CP_HPD_EOP_CONTROL__PROCESS_EOPIB_EN_MASK 0x4000
#define CP_HPD_EOP_CONTROL__PROCESS_EOPIB_EN__SHIFT 0xe
#define CP_HPD_EOP_CONTROL__EOP_ATC_MASK 0x800000
#define CP_HPD_EOP_CONTROL__EOP_ATC__SHIFT 0x17
#define CP_HPD_EOP_CONTROL__CACHE_POLICY_MASK 0x3000000
#define CP_HPD_EOP_CONTROL__CACHE_POLICY__SHIFT 0x18
#define CP_HPD_EOP_CONTROL__EOP_VOLATILE_MASK 0x4000000
#define CP_HPD_EOP_CONTROL__EOP_VOLATILE__SHIFT 0x1a
#define CP_HPD_EOP_CONTROL__PEND_Q_SEM_MASK 0x70000000
#define CP_HPD_EOP_CONTROL__PEND_Q_SEM__SHIFT 0x1c
#define CP_HPD_EOP_CONTROL__PEND_SIG_SEM_MASK 0x80000000
#define CP_HPD_EOP_CONTROL__PEND_SIG_SEM__SHIFT 0x1f
#define CP_MQD_BASE_ADDR__BASE_ADDR_MASK 0xfffffffc
#define CP_MQD_BASE_ADDR__BASE_ADDR__SHIFT 0x2
#define CP_MQD_BASE_ADDR_HI__BASE_ADDR_HI_MASK 0xffff
#define CP_MQD_BASE_ADDR_HI__BASE_ADDR_HI__SHIFT 0x0
#define CP_HQD_ACTIVE__ACTIVE_MASK 0x1
#define CP_HQD_ACTIVE__ACTIVE__SHIFT 0x0
#define CP_HQD_VMID__VMID_MASK 0xf
#define CP_HQD_VMID__VMID__SHIFT 0x0
#define CP_HQD_VMID__IB_VMID_MASK 0xf00
#define CP_HQD_VMID__IB_VMID__SHIFT 0x8
#define CP_HQD_VMID__VQID_MASK 0x3ff0000
#define CP_HQD_VMID__VQID__SHIFT 0x10
#define CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK 0x1
#define CP_HQD_PERSISTENT_STATE__PRELOAD_REQ__SHIFT 0x0
#define CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE_MASK 0x3ff00
#define CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT 0x8
#define CP_HQD_PERSISTENT_STATE__DISP_ACTIVE_MASK 0x80000000
#define CP_HQD_PERSISTENT_STATE__DISP_ACTIVE__SHIFT 0x1f
#define CP_HQD_PIPE_PRIORITY__PIPE_PRIORITY_MASK 0x3
#define CP_HQD_PIPE_PRIORITY__PIPE_PRIORITY__SHIFT 0x0
#define CP_HQD_QUEUE_PRIORITY__PRIORITY_LEVEL_MASK 0xf
#define CP_HQD_QUEUE_PRIORITY__PRIORITY_LEVEL__SHIFT 0x0
#define CP_HQD_QUANTUM__QUANTUM_EN_MASK 0x1
#define CP_HQD_QUANTUM__QUANTUM_EN__SHIFT 0x0
#define CP_HQD_QUANTUM__QUANTUM_SCALE_MASK 0x10
#define CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT 0x4
#define CP_HQD_QUANTUM__QUANTUM_DURATION_MASK 0x3f00
#define CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT 0x8
#define CP_HQD_PQ_BASE__ADDR_MASK 0xffffffff
#define CP_HQD_PQ_BASE__ADDR__SHIFT 0x0
#define CP_HQD_PQ_BASE_HI__ADDR_HI_MASK 0xff
#define CP_HQD_PQ_BASE_HI__ADDR_HI__SHIFT 0x0
#define CP_HQD_PQ_RPTR__CONSUMED_OFFSET_MASK 0xffffffff
#define CP_HQD_PQ_RPTR__CONSUMED_OFFSET__SHIFT 0x0
#define CP_HQD_PQ_RPTR_REPORT_ADDR__RPTR_REPORT_ADDR_MASK 0xfffffffc
#define CP_HQD_PQ_RPTR_REPORT_ADDR__RPTR_REPORT_ADDR__SHIFT 0x2
#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI__RPTR_REPORT_ADDR_HI_MASK 0xffff
#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI__RPTR_REPORT_ADDR_HI__SHIFT 0x0
#define CP_HQD_PQ_WPTR_POLL_ADDR__WPTR_ADDR_MASK 0xfffffffc
#define CP_HQD_PQ_WPTR_POLL_ADDR__WPTR_ADDR__SHIFT 0x2
#define CP_HQD_PQ_WPTR_POLL_ADDR_HI__WPTR_ADDR_HI_MASK 0xffff
#define CP_HQD_PQ_WPTR_POLL_ADDR_HI__WPTR_ADDR_HI__SHIFT 0x0
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK 0x7ffffc
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT 0x2
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK 0x10000000
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE__SHIFT 0x1c
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SCHD_HIT_MASK 0x20000000
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SCHD_HIT__SHIFT 0x1d
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK 0x40000000
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT 0x1e
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK 0x80000000
#define CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT__SHIFT 0x1f
#define CP_HQD_PQ_WPTR__OFFSET_MASK 0xffffffff
#define CP_HQD_PQ_WPTR__OFFSET__SHIFT 0x0
#define CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK 0x3f
#define CP_HQD_PQ_CONTROL__QUEUE_SIZE__SHIFT 0x0
#define CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK 0x3f00
#define CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT 0x8
#define CP_HQD_PQ_CONTROL__ENDIAN_SWAP_MASK 0x30000
#define CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT 0x10
#define CP_HQD_PQ_CONTROL__MIN_AVAIL_SIZE_MASK 0x300000
#define CP_HQD_PQ_CONTROL__MIN_AVAIL_SIZE__SHIFT 0x14
#define CP_HQD_PQ_CONTROL__PQ_ATC_MASK 0x800000
#define CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT 0x17
#define CP_HQD_PQ_CONTROL__CACHE_POLICY_MASK 0x3000000
#define CP_HQD_PQ_CONTROL__CACHE_POLICY__SHIFT 0x18
#define CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK 0x4000000
#define CP_HQD_PQ_CONTROL__PQ_VOLATILE__SHIFT 0x1a
#define CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK 0x8000000
#define CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR__SHIFT 0x1b
#define CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK 0x10000000
#define CP_HQD_PQ_CONTROL__UNORD_DISPATCH__SHIFT 0x1c
#define CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK 0x20000000
#define CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP__SHIFT 0x1d
#define CP_HQD_PQ_CONTROL__PRIV_STATE_MASK 0x40000000
#define CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT 0x1e
#define CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK 0x80000000
#define CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT 0x1f
#define CP_HQD_IB_BASE_ADDR__IB_BASE_ADDR_MASK 0xfffffffc
#define CP_HQD_IB_BASE_ADDR__IB_BASE_ADDR__SHIFT 0x2
#define CP_HQD_IB_BASE_ADDR_HI__IB_BASE_ADDR_HI_MASK 0xffff
#define CP_HQD_IB_BASE_ADDR_HI__IB_BASE_ADDR_HI__SHIFT 0x0
#define CP_HQD_IB_RPTR__CONSUMED_OFFSET_MASK 0xfffff
#define CP_HQD_IB_RPTR__CONSUMED_OFFSET__SHIFT 0x0
#define CP_HQD_IB_CONTROL__IB_SIZE_MASK 0xfffff
#define CP_HQD_IB_CONTROL__IB_SIZE__SHIFT 0x0
#define CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE_MASK 0x300000
#define CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT 0x14
#define CP_HQD_IB_CONTROL__IB_ATC_MASK 0x800000
#define CP_HQD_IB_CONTROL__IB_ATC__SHIFT 0x17
#define CP_HQD_IB_CONTROL__IB_CACHE_POLICY_MASK 0x3000000
#define CP_HQD_IB_CONTROL__IB_CACHE_POLICY__SHIFT 0x18
#define CP_HQD_IB_CONTROL__IB_VOLATILE_MASK 0x4000000
#define CP_HQD_IB_CONTROL__IB_VOLATILE__SHIFT 0x1a
#define CP_HQD_IB_CONTROL__PROCESSING_IB_MASK 0x80000000
#define CP_HQD_IB_CONTROL__PROCESSING_IB__SHIFT 0x1f
#define CP_HQD_IQ_TIMER__WAIT_TIME_MASK 0xff
#define CP_HQD_IQ_TIMER__WAIT_TIME__SHIFT 0x0
#define CP_HQD_IQ_TIMER__RETRY_TYPE_MASK 0x700
#define CP_HQD_IQ_TIMER__RETRY_TYPE__SHIFT 0x8
#define CP_HQD_IQ_TIMER__INTERRUPT_TYPE_MASK 0x3000
#define CP_HQD_IQ_TIMER__INTERRUPT_TYPE__SHIFT 0xc
#define CP_HQD_IQ_TIMER__INTERRUPT_SIZE_MASK 0x3f0000
#define CP_HQD_IQ_TIMER__INTERRUPT_SIZE__SHIFT 0x10
#define CP_HQD_IQ_TIMER__IQ_ATC_MASK 0x800000
#define CP_HQD_IQ_TIMER__IQ_ATC__SHIFT 0x17
#define CP_HQD_IQ_TIMER__CACHE_POLICY_MASK 0x3000000
#define CP_HQD_IQ_TIMER__CACHE_POLICY__SHIFT 0x18
#define CP_HQD_IQ_TIMER__IQ_VOLATILE_MASK 0x4000000
#define CP_HQD_IQ_TIMER__IQ_VOLATILE__SHIFT 0x1a
#define CP_HQD_IQ_TIMER__PROCESS_IQ_EN_MASK 0x20000000
#define CP_HQD_IQ_TIMER__PROCESS_IQ_EN__SHIFT 0x1d
#define CP_HQD_IQ_TIMER__PROCESSING_IQ_MASK 0x40000000
#define CP_HQD_IQ_TIMER__PROCESSING_IQ__SHIFT 0x1e
#define CP_HQD_IQ_TIMER__ACTIVE_MASK 0x80000000
#define CP_HQD_IQ_TIMER__ACTIVE__SHIFT 0x1f
#define CP_HQD_IQ_RPTR__OFFSET_MASK 0x3f
#define CP_HQD_IQ_RPTR__OFFSET__SHIFT 0x0
#define CP_HQD_DEQUEUE_REQUEST__DEQUEUE_REQ_MASK 0x3
#define CP_HQD_DEQUEUE_REQUEST__DEQUEUE_REQ__SHIFT 0x0
#define CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK 0x10
#define CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND__SHIFT 0x4
#define CP_HQD_DEQUEUE_REQUEST__DEQUEUE_INT_MASK 0x100
#define CP_HQD_DEQUEUE_REQUEST__DEQUEUE_INT__SHIFT 0x8
#define CP_HQD_DMA_OFFLOAD__DMA_OFFLOAD_MASK 0x1
#define CP_HQD_DMA_OFFLOAD__DMA_OFFLOAD__SHIFT 0x0
#define CP_HQD_SEMA_CMD__RETRY_MASK 0x1
#define CP_HQD_SEMA_CMD__RETRY__SHIFT 0x0
#define CP_HQD_SEMA_CMD__RESULT_MASK 0x6
#define CP_HQD_SEMA_CMD__RESULT__SHIFT 0x1
#define CP_HQD_MSG_TYPE__ACTION_MASK 0x3
#define CP_HQD_MSG_TYPE__ACTION__SHIFT 0x0
#define CP_HQD_ATOMIC0_PREOP_LO__ATOMIC0_PREOP_LO_MASK 0xffffffff
#define CP_HQD_ATOMIC0_PREOP_LO__ATOMIC0_PREOP_LO__SHIFT 0x0
#define CP_HQD_ATOMIC0_PREOP_HI__ATOMIC0_PREOP_HI_MASK 0xffffffff
#define CP_HQD_ATOMIC0_PREOP_HI__ATOMIC0_PREOP_HI__SHIFT 0x0
#define CP_HQD_ATOMIC1_PREOP_LO__ATOMIC1_PREOP_LO_MASK 0xffffffff
#define CP_HQD_ATOMIC1_PREOP_LO__ATOMIC1_PREOP_LO__SHIFT 0x0
#define CP_HQD_ATOMIC1_PREOP_HI__ATOMIC1_PREOP_HI_MASK 0xffffffff
#define CP_HQD_ATOMIC1_PREOP_HI__ATOMIC1_PREOP_HI__SHIFT 0x0
#define CP_HQD_HQ_SCHEDULER0__DEQUEUE_STATUS_MASK 0x3
#define CP_HQD_HQ_SCHEDULER0__DEQUEUE_STATUS__SHIFT 0x0
#define CP_HQD_HQ_SCHEDULER0__DEQUEUE_RETRY_CNT_MASK 0xc
#define CP_HQD_HQ_SCHEDULER0__DEQUEUE_RETRY_CNT__SHIFT 0x2
#define CP_HQD_HQ_SCHEDULER0__RSV_5_4_MASK 0x30
#define CP_HQD_HQ_SCHEDULER0__RSV_5_4__SHIFT 0x4
#define CP_HQD_HQ_SCHEDULER0__QUEUE_RUN_ONCE_MASK 0x40
#define CP_HQD_HQ_SCHEDULER0__QUEUE_RUN_ONCE__SHIFT 0x6
#define CP_HQD_HQ_SCHEDULER0__SCRATCH_RAM_INIT_MASK 0x80
#define CP_HQD_HQ_SCHEDULER0__SCRATCH_RAM_INIT__SHIFT 0x7
#define CP_HQD_HQ_SCHEDULER0__TCL2_DIRTY_MASK 0x100
#define CP_HQD_HQ_SCHEDULER0__TCL2_DIRTY__SHIFT 0x8
#define CP_HQD_HQ_SCHEDULER0__PG_ACTIVATED_MASK 0x200
#define CP_HQD_HQ_SCHEDULER0__PG_ACTIVATED__SHIFT 0x9
#define CP_HQD_HQ_SCHEDULER0__CG_ACTIVATED_MASK 0x400
#define CP_HQD_HQ_SCHEDULER0__CG_ACTIVATED__SHIFT 0xa
#define CP_HQD_HQ_SCHEDULER0__RSVR_31_11_MASK 0xfffff800
#define CP_HQD_HQ_SCHEDULER0__RSVR_31_11__SHIFT 0xb
#define CP_HQD_HQ_SCHEDULER1__SCHEDULER_MASK 0xffffffff
#define CP_HQD_HQ_SCHEDULER1__SCHEDULER__SHIFT 0x0
#define CP_MQD_CONTROL__VMID_MASK 0xf
#define CP_MQD_CONTROL__VMID__SHIFT 0x0
#define CP_MQD_CONTROL__MQD_ATC_MASK 0x800000
#define CP_MQD_CONTROL__MQD_ATC__SHIFT 0x17
#define CP_MQD_CONTROL__CACHE_POLICY_MASK 0x3000000
#define CP_MQD_CONTROL__CACHE_POLICY__SHIFT 0x18
#define CP_MQD_CONTROL__MQD_VOLATILE_MASK 0x4000000
#define CP_MQD_CONTROL__MQD_VOLATILE__SHIFT 0x1a
#define DB_Z_READ_BASE__BASE_256B_MASK 0xffffffff
#define DB_Z_READ_BASE__BASE_256B__SHIFT 0x0
#define DB_STENCIL_READ_BASE__BASE_256B_MASK 0xffffffff
#define DB_STENCIL_READ_BASE__BASE_256B__SHIFT 0x0
#define DB_Z_WRITE_BASE__BASE_256B_MASK 0xffffffff
#define DB_Z_WRITE_BASE__BASE_256B__SHIFT 0x0
#define DB_STENCIL_WRITE_BASE__BASE_256B_MASK 0xffffffff
#define DB_STENCIL_WRITE_BASE__BASE_256B__SHIFT 0x0
#define DB_DEPTH_INFO__ADDR5_SWIZZLE_MASK_MASK 0xf
#define DB_DEPTH_INFO__ADDR5_SWIZZLE_MASK__SHIFT 0x0
#define DB_DEPTH_INFO__ARRAY_MODE_MASK 0xf0
#define DB_DEPTH_INFO__ARRAY_MODE__SHIFT 0x4
#define DB_DEPTH_INFO__PIPE_CONFIG_MASK 0x1f00
#define DB_DEPTH_INFO__PIPE_CONFIG__SHIFT 0x8
#define DB_DEPTH_INFO__BANK_WIDTH_MASK 0x6000
#define DB_DEPTH_INFO__BANK_WIDTH__SHIFT 0xd
#define DB_DEPTH_INFO__BANK_HEIGHT_MASK 0x18000
#define DB_DEPTH_INFO__BANK_HEIGHT__SHIFT 0xf
#define DB_DEPTH_INFO__MACRO_TILE_ASPECT_MASK 0x60000
#define DB_DEPTH_INFO__MACRO_TILE_ASPECT__SHIFT 0x11
#define DB_DEPTH_INFO__NUM_BANKS_MASK 0x180000
#define DB_DEPTH_INFO__NUM_BANKS__SHIFT 0x13
#define DB_Z_INFO__FORMAT_MASK 0x3
#define DB_Z_INFO__FORMAT__SHIFT 0x0
#define DB_Z_INFO__NUM_SAMPLES_MASK 0xc
#define DB_Z_INFO__NUM_SAMPLES__SHIFT 0x2
#define DB_Z_INFO__TILE_SPLIT_MASK 0xe000
#define DB_Z_INFO__TILE_SPLIT__SHIFT 0xd
#define DB_Z_INFO__TILE_MODE_INDEX_MASK 0x700000
#define DB_Z_INFO__TILE_MODE_INDEX__SHIFT 0x14
#define DB_Z_INFO__ALLOW_EXPCLEAR_MASK 0x8000000
#define DB_Z_INFO__ALLOW_EXPCLEAR__SHIFT 0x1b
#define DB_Z_INFO__READ_SIZE_MASK 0x10000000
#define DB_Z_INFO__READ_SIZE__SHIFT 0x1c
#define DB_Z_INFO__TILE_SURFACE_ENABLE_MASK 0x20000000
#define DB_Z_INFO__TILE_SURFACE_ENABLE__SHIFT 0x1d
#define DB_Z_INFO__ZRANGE_PRECISION_MASK 0x80000000
#define DB_Z_INFO__ZRANGE_PRECISION__SHIFT 0x1f
#define DB_STENCIL_INFO__FORMAT_MASK 0x1
#define DB_STENCIL_INFO__FORMAT__SHIFT 0x0
#define DB_STENCIL_INFO__TILE_SPLIT_MASK 0xe000
#define DB_STENCIL_INFO__TILE_SPLIT__SHIFT 0xd
#define DB_STENCIL_INFO__TILE_MODE_INDEX_MASK 0x700000
#define DB_STENCIL_INFO__TILE_MODE_INDEX__SHIFT 0x14
#define DB_STENCIL_INFO__ALLOW_EXPCLEAR_MASK 0x8000000
#define DB_STENCIL_INFO__ALLOW_EXPCLEAR__SHIFT 0x1b
#define DB_STENCIL_INFO__TILE_STENCIL_DISABLE_MASK 0x20000000
#define DB_STENCIL_INFO__TILE_STENCIL_DISABLE__SHIFT 0x1d
#define DB_DEPTH_SIZE__PITCH_TILE_MAX_MASK 0x7ff
#define DB_DEPTH_SIZE__PITCH_TILE_MAX__SHIFT 0x0
#define DB_DEPTH_SIZE__HEIGHT_TILE_MAX_MASK 0x3ff800
#define DB_DEPTH_SIZE__HEIGHT_TILE_MAX__SHIFT 0xb
#define DB_DEPTH_SLICE__SLICE_TILE_MAX_MASK 0x3fffff
#define DB_DEPTH_SLICE__SLICE_TILE_MAX__SHIFT 0x0
#define DB_DEPTH_VIEW__SLICE_START_MASK 0x7ff
#define DB_DEPTH_VIEW__SLICE_START__SHIFT 0x0
#define DB_DEPTH_VIEW__SLICE_MAX_MASK 0xffe000
#define DB_DEPTH_VIEW__SLICE_MAX__SHIFT 0xd
#define DB_DEPTH_VIEW__Z_READ_ONLY_MASK 0x1000000
#define DB_DEPTH_VIEW__Z_READ_ONLY__SHIFT 0x18
#define DB_DEPTH_VIEW__STENCIL_READ_ONLY_MASK 0x2000000
#define DB_DEPTH_VIEW__STENCIL_READ_ONLY__SHIFT 0x19
#define DB_RENDER_CONTROL__DEPTH_CLEAR_ENABLE_MASK 0x1
#define DB_RENDER_CONTROL__DEPTH_CLEAR_ENABLE__SHIFT 0x0
#define DB_RENDER_CONTROL__STENCIL_CLEAR_ENABLE_MASK 0x2
#define DB_RENDER_CONTROL__STENCIL_CLEAR_ENABLE__SHIFT 0x1
#define DB_RENDER_CONTROL__DEPTH_COPY_MASK 0x4
#define DB_RENDER_CONTROL__DEPTH_COPY__SHIFT 0x2
#define DB_RENDER_CONTROL__STENCIL_COPY_MASK 0x8
#define DB_RENDER_CONTROL__STENCIL_COPY__SHIFT 0x3
#define DB_RENDER_CONTROL__RESUMMARIZE_ENABLE_MASK 0x10
#define DB_RENDER_CONTROL__RESUMMARIZE_ENABLE__SHIFT 0x4
#define DB_RENDER_CONTROL__STENCIL_COMPRESS_DISABLE_MASK 0x20
#define DB_RENDER_CONTROL__STENCIL_COMPRESS_DISABLE__SHIFT 0x5
#define DB_RENDER_CONTROL__DEPTH_COMPRESS_DISABLE_MASK 0x40
#define DB_RENDER_CONTROL__DEPTH_COMPRESS_DISABLE__SHIFT 0x6
#define DB_RENDER_CONTROL__COPY_CENTROID_MASK 0x80
#define DB_RENDER_CONTROL__COPY_CENTROID__SHIFT 0x7
#define DB_RENDER_CONTROL__COPY_SAMPLE_MASK 0xf00
#define DB_RENDER_CONTROL__COPY_SAMPLE__SHIFT 0x8
#define DB_COUNT_CONTROL__ZPASS_INCREMENT_DISABLE_MASK 0x1
#define DB_COUNT_CONTROL__ZPASS_INCREMENT_DISABLE__SHIFT 0x0
#define DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_MASK 0x2
#define DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS__SHIFT 0x1
#define DB_COUNT_CONTROL__SAMPLE_RATE_MASK 0x70
#define DB_COUNT_CONTROL__SAMPLE_RATE__SHIFT 0x4
#define DB_COUNT_CONTROL__ZPASS_ENABLE_MASK 0xf00
#define DB_COUNT_CONTROL__ZPASS_ENABLE__SHIFT 0x8
#define DB_COUNT_CONTROL__ZFAIL_ENABLE_MASK 0xf000
#define DB_COUNT_CONTROL__ZFAIL_ENABLE__SHIFT 0xc
#define DB_COUNT_CONTROL__SFAIL_ENABLE_MASK 0xf0000
#define DB_COUNT_CONTROL__SFAIL_ENABLE__SHIFT 0x10
#define DB_COUNT_CONTROL__DBFAIL_ENABLE_MASK 0xf00000
#define DB_COUNT_CONTROL__DBFAIL_ENABLE__SHIFT 0x14
#define DB_COUNT_CONTROL__SLICE_EVEN_ENABLE_MASK 0xf000000
#define DB_COUNT_CONTROL__SLICE_EVEN_ENABLE__SHIFT 0x18
#define DB_COUNT_CONTROL__SLICE_ODD_ENABLE_MASK 0xf0000000
#define DB_COUNT_CONTROL__SLICE_ODD_ENABLE__SHIFT 0x1c
#define DB_RENDER_OVERRIDE__FORCE_HIZ_ENABLE_MASK 0x3
#define DB_RENDER_OVERRIDE__FORCE_HIZ_ENABLE__SHIFT 0x0
#define DB_RENDER_OVERRIDE__FORCE_HIS_ENABLE0_MASK 0xc
#define DB_RENDER_OVERRIDE__FORCE_HIS_ENABLE0__SHIFT 0x2
#define DB_RENDER_OVERRIDE__FORCE_HIS_ENABLE1_MASK 0x30
#define DB_RENDER_OVERRIDE__FORCE_HIS_ENABLE1__SHIFT 0x4
#define DB_RENDER_OVERRIDE__FORCE_SHADER_Z_ORDER_MASK 0x40
#define DB_RENDER_OVERRIDE__FORCE_SHADER_Z_ORDER__SHIFT 0x6
#define DB_RENDER_OVERRIDE__FAST_Z_DISABLE_MASK 0x80
#define DB_RENDER_OVERRIDE__FAST_Z_DISABLE__SHIFT 0x7
#define DB_RENDER_OVERRIDE__FAST_STENCIL_DISABLE_MASK 0x100
#define DB_RENDER_OVERRIDE__FAST_STENCIL_DISABLE__SHIFT 0x8
#define DB_RENDER_OVERRIDE__NOOP_CULL_DISABLE_MASK 0x200
#define DB_RENDER_OVERRIDE__NOOP_CULL_DISABLE__SHIFT 0x9
#define DB_RENDER_OVERRIDE__FORCE_COLOR_KILL_MASK 0x400
#define DB_RENDER_OVERRIDE__FORCE_COLOR_KILL__SHIFT 0xa
#define DB_RENDER_OVERRIDE__FORCE_Z_READ_MASK 0x800
#define DB_RENDER_OVERRIDE__FORCE_Z_READ__SHIFT 0xb
#define DB_RENDER_OVERRIDE__FORCE_STENCIL_READ_MASK 0x1000
#define DB_RENDER_OVERRIDE__FORCE_STENCIL_READ__SHIFT 0xc
#define DB_RENDER_OVERRIDE__FORCE_FULL_Z_RANGE_MASK 0x6000
#define DB_RENDER_OVERRIDE__FORCE_FULL_Z_RANGE__SHIFT 0xd
#define DB_RENDER_OVERRIDE__FORCE_QC_SMASK_CONFLICT_MASK 0x8000
#define DB_RENDER_OVERRIDE__FORCE_QC_SMASK_CONFLICT__SHIFT 0xf
#define DB_RENDER_OVERRIDE__DISABLE_VIEWPORT_CLAMP_MASK 0x10000
#define DB_RENDER_OVERRIDE__DISABLE_VIEWPORT_CLAMP__SHIFT 0x10
#define DB_RENDER_OVERRIDE__IGNORE_SC_ZRANGE_MASK 0x20000
#define DB_RENDER_OVERRIDE__IGNORE_SC_ZRANGE__SHIFT 0x11
#define DB_RENDER_OVERRIDE__DISABLE_FULLY_COVERED_MASK 0x40000
#define DB_RENDER_OVERRIDE__DISABLE_FULLY_COVERED__SHIFT 0x12
#define DB_RENDER_OVERRIDE__FORCE_Z_LIMIT_SUMM_MASK 0x180000
#define DB_RENDER_OVERRIDE__FORCE_Z_LIMIT_SUMM__SHIFT 0x13
#define DB_RENDER_OVERRIDE__MAX_TILES_IN_DTT_MASK 0x3e00000
#define DB_RENDER_OVERRIDE__MAX_TILES_IN_DTT__SHIFT 0x15
#define DB_RENDER_OVERRIDE__DISABLE_TILE_RATE_TILES_MASK 0x4000000
#define DB_RENDER_OVERRIDE__DISABLE_TILE_RATE_TILES__SHIFT 0x1a
#define DB_RENDER_OVERRIDE__FORCE_Z_DIRTY_MASK 0x8000000
#define DB_RENDER_OVERRIDE__FORCE_Z_DIRTY__SHIFT 0x1b
#define DB_RENDER_OVERRIDE__FORCE_STENCIL_DIRTY_MASK 0x10000000
#define DB_RENDER_OVERRIDE__FORCE_STENCIL_DIRTY__SHIFT 0x1c
#define DB_RENDER_OVERRIDE__FORCE_Z_VALID_MASK 0x20000000
#define DB_RENDER_OVERRIDE__FORCE_Z_VALID__SHIFT 0x1d
#define DB_RENDER_OVERRIDE__FORCE_STENCIL_VALID_MASK 0x40000000
#define DB_RENDER_OVERRIDE__FORCE_STENCIL_VALID__SHIFT 0x1e
#define DB_RENDER_OVERRIDE__PRESERVE_COMPRESSION_MASK 0x80000000
#define DB_RENDER_OVERRIDE__PRESERVE_COMPRESSION__SHIFT 0x1f
#define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_CONTROL_MASK 0x3
#define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_CONTROL__SHIFT 0x0
#define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_COUNTDOWN_MASK 0x1c
#define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_COUNTDOWN__SHIFT 0x2
#define DB_RENDER_OVERRIDE2__DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION_MASK 0x20
#define DB_RENDER_OVERRIDE2__DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION__SHIFT 0x5
#define DB_RENDER_OVERRIDE2__DISABLE_SMEM_EXPCLEAR_OPTIMIZATION_MASK 0x40
#define DB_RENDER_OVERRIDE2__DISABLE_SMEM_EXPCLEAR_OPTIMIZATION__SHIFT 0x6
#define DB_RENDER_OVERRIDE2__DISABLE_COLOR_ON_VALIDATION_MASK 0x80
#define DB_RENDER_OVERRIDE2__DISABLE_COLOR_ON_VALIDATION__SHIFT 0x7
#define DB_RENDER_OVERRIDE2__DECOMPRESS_Z_ON_FLUSH_MASK 0x100
#define DB_RENDER_OVERRIDE2__DECOMPRESS_Z_ON_FLUSH__SHIFT 0x8
#define DB_RENDER_OVERRIDE2__DISABLE_REG_SNOOP_MASK 0x200
#define DB_RENDER_OVERRIDE2__DISABLE_REG_SNOOP__SHIFT 0x9
#define DB_RENDER_OVERRIDE2__DEPTH_BOUNDS_HIER_DEPTH_DISABLE_MASK 0x400
#define DB_RENDER_OVERRIDE2__DEPTH_BOUNDS_HIER_DEPTH_DISABLE__SHIFT 0xa
#define DB_RENDER_OVERRIDE2__SEPARATE_HIZS_FUNC_ENABLE_MASK 0x800
#define DB_RENDER_OVERRIDE2__SEPARATE_HIZS_FUNC_ENABLE__SHIFT 0xb
#define DB_RENDER_OVERRIDE2__HIZ_ZFUNC_MASK 0x7000
#define DB_RENDER_OVERRIDE2__HIZ_ZFUNC__SHIFT 0xc
#define DB_RENDER_OVERRIDE2__HIS_SFUNC_FF_MASK 0x38000
#define DB_RENDER_OVERRIDE2__HIS_SFUNC_FF__SHIFT 0xf
#define DB_RENDER_OVERRIDE2__HIS_SFUNC_BF_MASK 0x1c0000
#define DB_RENDER_OVERRIDE2__HIS_SFUNC_BF__SHIFT 0x12
#define DB_RENDER_OVERRIDE2__PRESERVE_ZRANGE_MASK 0x200000
#define DB_RENDER_OVERRIDE2__PRESERVE_ZRANGE__SHIFT 0x15
#define DB_RENDER_OVERRIDE2__PRESERVE_SRESULTS_MASK 0x400000
#define DB_RENDER_OVERRIDE2__PRESERVE_SRESULTS__SHIFT 0x16
#define DB_RENDER_OVERRIDE2__DISABLE_FAST_PASS_MASK 0x800000
#define DB_RENDER_OVERRIDE2__DISABLE_FAST_PASS__SHIFT 0x17
#define DB_EQAA__MAX_ANCHOR_SAMPLES_MASK 0x7
#define DB_EQAA__MAX_ANCHOR_SAMPLES__SHIFT 0x0
#define DB_EQAA__PS_ITER_SAMPLES_MASK 0x70
#define DB_EQAA__PS_ITER_SAMPLES__SHIFT 0x4
#define DB_EQAA__MASK_EXPORT_NUM_SAMPLES_MASK 0x700
#define DB_EQAA__MASK_EXPORT_NUM_SAMPLES__SHIFT 0x8
#define DB_EQAA__ALPHA_TO_MASK_NUM_SAMPLES_MASK 0x7000
#define DB_EQAA__ALPHA_TO_MASK_NUM_SAMPLES__SHIFT 0xc
#define DB_EQAA__HIGH_QUALITY_INTERSECTIONS_MASK 0x10000
#define DB_EQAA__HIGH_QUALITY_INTERSECTIONS__SHIFT 0x10
#define DB_EQAA__INCOHERENT_EQAA_READS_MASK 0x20000
#define DB_EQAA__INCOHERENT_EQAA_READS__SHIFT 0x11
#define DB_EQAA__INTERPOLATE_COMP_Z_MASK 0x40000
#define DB_EQAA__INTERPOLATE_COMP_Z__SHIFT 0x12
#define DB_EQAA__INTERPOLATE_SRC_Z_MASK 0x80000
#define DB_EQAA__INTERPOLATE_SRC_Z__SHIFT 0x13
#define DB_EQAA__STATIC_ANCHOR_ASSOCIATIONS_MASK 0x100000
#define DB_EQAA__STATIC_ANCHOR_ASSOCIATIONS__SHIFT 0x14
#define DB_EQAA__ALPHA_TO_MASK_EQAA_DISABLE_MASK 0x200000
#define DB_EQAA__ALPHA_TO_MASK_EQAA_DISABLE__SHIFT 0x15
#define DB_EQAA__OVERRASTERIZATION_AMOUNT_MASK 0x7000000
#define DB_EQAA__OVERRASTERIZATION_AMOUNT__SHIFT 0x18
#define DB_EQAA__ENABLE_POSTZ_OVERRASTERIZATION_MASK 0x8000000
#define DB_EQAA__ENABLE_POSTZ_OVERRASTERIZATION__SHIFT 0x1b
#define DB_SHADER_CONTROL__Z_EXPORT_ENABLE_MASK 0x1
#define DB_SHADER_CONTROL__Z_EXPORT_ENABLE__SHIFT 0x0
#define DB_SHADER_CONTROL__STENCIL_TEST_VAL_EXPORT_ENABLE_MASK 0x2
#define DB_SHADER_CONTROL__STENCIL_TEST_VAL_EXPORT_ENABLE__SHIFT 0x1
#define DB_SHADER_CONTROL__STENCIL_OP_VAL_EXPORT_ENABLE_MASK 0x4
#define DB_SHADER_CONTROL__STENCIL_OP_VAL_EXPORT_ENABLE__SHIFT 0x2
#define DB_SHADER_CONTROL__Z_ORDER_MASK 0x30
#define DB_SHADER_CONTROL__Z_ORDER__SHIFT 0x4
#define DB_SHADER_CONTROL__KILL_ENABLE_MASK 0x40
#define DB_SHADER_CONTROL__KILL_ENABLE__SHIFT 0x6
#define DB_SHADER_CONTROL__COVERAGE_TO_MASK_ENABLE_MASK 0x80
#define DB_SHADER_CONTROL__COVERAGE_TO_MASK_ENABLE__SHIFT 0x7
#define DB_SHADER_CONTROL__MASK_EXPORT_ENABLE_MASK 0x100
#define DB_SHADER_CONTROL__MASK_EXPORT_ENABLE__SHIFT 0x8
#define DB_SHADER_CONTROL__EXEC_ON_HIER_FAIL_MASK 0x200
#define DB_SHADER_CONTROL__EXEC_ON_HIER_FAIL__SHIFT 0x9
#define DB_SHADER_CONTROL__EXEC_ON_NOOP_MASK 0x400
#define DB_SHADER_CONTROL__EXEC_ON_NOOP__SHIFT 0xa
#define DB_SHADER_CONTROL__ALPHA_TO_MASK_DISABLE_MASK 0x800
#define DB_SHADER_CONTROL__ALPHA_TO_MASK_DISABLE__SHIFT 0xb
#define DB_SHADER_CONTROL__DEPTH_BEFORE_SHADER_MASK 0x1000
#define DB_SHADER_CONTROL__DEPTH_BEFORE_SHADER__SHIFT 0xc
#define DB_SHADER_CONTROL__CONSERVATIVE_Z_EXPORT_MASK 0x6000
#define DB_SHADER_CONTROL__CONSERVATIVE_Z_EXPORT__SHIFT 0xd
#define DB_DEPTH_BOUNDS_MIN__MIN_MASK 0xffffffff
#define DB_DEPTH_BOUNDS_MIN__MIN__SHIFT 0x0
#define DB_DEPTH_BOUNDS_MAX__MAX_MASK 0xffffffff
#define DB_DEPTH_BOUNDS_MAX__MAX__SHIFT 0x0
#define DB_STENCIL_CLEAR__CLEAR_MASK 0xff
#define DB_STENCIL_CLEAR__CLEAR__SHIFT 0x0
#define DB_DEPTH_CLEAR__DEPTH_CLEAR_MASK 0xffffffff
#define DB_DEPTH_CLEAR__DEPTH_CLEAR__SHIFT 0x0
#define DB_HTILE_DATA_BASE__BASE_256B_MASK 0xffffffff
#define DB_HTILE_DATA_BASE__BASE_256B__SHIFT 0x0
#define DB_HTILE_SURFACE__LINEAR_MASK 0x1
#define DB_HTILE_SURFACE__LINEAR__SHIFT 0x0
#define DB_HTILE_SURFACE__FULL_CACHE_MASK 0x2
#define DB_HTILE_SURFACE__FULL_CACHE__SHIFT 0x1
#define DB_HTILE_SURFACE__HTILE_USES_PRELOAD_WIN_MASK 0x4
#define DB_HTILE_SURFACE__HTILE_USES_PRELOAD_WIN__SHIFT 0x2
#define DB_HTILE_SURFACE__PRELOAD_MASK 0x8
#define DB_HTILE_SURFACE__PRELOAD__SHIFT 0x3
#define DB_HTILE_SURFACE__PREFETCH_WIDTH_MASK 0x3f0
#define DB_HTILE_SURFACE__PREFETCH_WIDTH__SHIFT 0x4
#define DB_HTILE_SURFACE__PREFETCH_HEIGHT_MASK 0xfc00
#define DB_HTILE_SURFACE__PREFETCH_HEIGHT__SHIFT 0xa
#define DB_HTILE_SURFACE__DST_OUTSIDE_ZERO_TO_ONE_MASK 0x10000
#define DB_HTILE_SURFACE__DST_OUTSIDE_ZERO_TO_ONE__SHIFT 0x10
#define DB_PRELOAD_CONTROL__START_X_MASK 0xff
#define DB_PRELOAD_CONTROL__START_X__SHIFT 0x0
#define DB_PRELOAD_CONTROL__START_Y_MASK 0xff00
#define DB_PRELOAD_CONTROL__START_Y__SHIFT 0x8
#define DB_PRELOAD_CONTROL__MAX_X_MASK 0xff0000
#define DB_PRELOAD_CONTROL__MAX_X__SHIFT 0x10
#define DB_PRELOAD_CONTROL__MAX_Y_MASK 0xff000000
#define DB_PRELOAD_CONTROL__MAX_Y__SHIFT 0x18
#define DB_STENCILREFMASK__STENCILTESTVAL_MASK 0xff
#define DB_STENCILREFMASK__STENCILTESTVAL__SHIFT 0x0
#define DB_STENCILREFMASK__STENCILMASK_MASK 0xff00
#define DB_STENCILREFMASK__STENCILMASK__SHIFT 0x8
#define DB_STENCILREFMASK__STENCILWRITEMASK_MASK 0xff0000
#define DB_STENCILREFMASK__STENCILWRITEMASK__SHIFT 0x10
#define DB_STENCILREFMASK__STENCILOPVAL_MASK 0xff000000
#define DB_STENCILREFMASK__STENCILOPVAL__SHIFT 0x18
#define DB_STENCILREFMASK_BF__STENCILTESTVAL_BF_MASK 0xff
#define DB_STENCILREFMASK_BF__STENCILTESTVAL_BF__SHIFT 0x0
#define DB_STENCILREFMASK_BF__STENCILMASK_BF_MASK 0xff00
#define DB_STENCILREFMASK_BF__STENCILMASK_BF__SHIFT 0x8
#define DB_STENCILREFMASK_BF__STENCILWRITEMASK_BF_MASK 0xff0000
#define DB_STENCILREFMASK_BF__STENCILWRITEMASK_BF__SHIFT 0x10
#define DB_STENCILREFMASK_BF__STENCILOPVAL_BF_MASK 0xff000000
#define DB_STENCILREFMASK_BF__STENCILOPVAL_BF__SHIFT 0x18
#define DB_SRESULTS_COMPARE_STATE0__COMPAREFUNC0_MASK 0x7
#define DB_SRESULTS_COMPARE_STATE0__COMPAREFUNC0__SHIFT 0x0
#define DB_SRESULTS_COMPARE_STATE0__COMPAREVALUE0_MASK 0xff0
#define DB_SRESULTS_COMPARE_STATE0__COMPAREVALUE0__SHIFT 0x4
#define DB_SRESULTS_COMPARE_STATE0__COMPAREMASK0_MASK 0xff000
#define DB_SRESULTS_COMPARE_STATE0__COMPAREMASK0__SHIFT 0xc
#define DB_SRESULTS_COMPARE_STATE0__ENABLE0_MASK 0x1000000
#define DB_SRESULTS_COMPARE_STATE0__ENABLE0__SHIFT 0x18
#define DB_SRESULTS_COMPARE_STATE1__COMPAREFUNC1_MASK 0x7
#define DB_SRESULTS_COMPARE_STATE1__COMPAREFUNC1__SHIFT 0x0
#define DB_SRESULTS_COMPARE_STATE1__COMPAREVALUE1_MASK 0xff0
#define DB_SRESULTS_COMPARE_STATE1__COMPAREVALUE1__SHIFT 0x4
#define DB_SRESULTS_COMPARE_STATE1__COMPAREMASK1_MASK 0xff000
#define DB_SRESULTS_COMPARE_STATE1__COMPAREMASK1__SHIFT 0xc
#define DB_SRESULTS_COMPARE_STATE1__ENABLE1_MASK 0x1000000
#define DB_SRESULTS_COMPARE_STATE1__ENABLE1__SHIFT 0x18
#define DB_DEPTH_CONTROL__STENCIL_ENABLE_MASK 0x1
#define DB_DEPTH_CONTROL__STENCIL_ENABLE__SHIFT 0x0
#define DB_DEPTH_CONTROL__Z_ENABLE_MASK 0x2
#define DB_DEPTH_CONTROL__Z_ENABLE__SHIFT 0x1
#define DB_DEPTH_CONTROL__Z_WRITE_ENABLE_MASK 0x4
#define DB_DEPTH_CONTROL__Z_WRITE_ENABLE__SHIFT 0x2
#define DB_DEPTH_CONTROL__DEPTH_BOUNDS_ENABLE_MASK 0x8
#define DB_DEPTH_CONTROL__DEPTH_BOUNDS_ENABLE__SHIFT 0x3
#define DB_DEPTH_CONTROL__ZFUNC_MASK 0x70
#define DB_DEPTH_CONTROL__ZFUNC__SHIFT 0x4
#define DB_DEPTH_CONTROL__BACKFACE_ENABLE_MASK 0x80
#define DB_DEPTH_CONTROL__BACKFACE_ENABLE__SHIFT 0x7
#define DB_DEPTH_CONTROL__STENCILFUNC_MASK 0x700
#define DB_DEPTH_CONTROL__STENCILFUNC__SHIFT 0x8
#define DB_DEPTH_CONTROL__STENCILFUNC_BF_MASK 0x700000
#define DB_DEPTH_CONTROL__STENCILFUNC_BF__SHIFT 0x14
#define DB_DEPTH_CONTROL__ENABLE_COLOR_WRITES_ON_DEPTH_FAIL_MASK 0x40000000
#define DB_DEPTH_CONTROL__ENABLE_COLOR_WRITES_ON_DEPTH_FAIL__SHIFT 0x1e
#define DB_DEPTH_CONTROL__DISABLE_COLOR_WRITES_ON_DEPTH_PASS_MASK 0x80000000
#define DB_DEPTH_CONTROL__DISABLE_COLOR_WRITES_ON_DEPTH_PASS__SHIFT 0x1f
#define DB_STENCIL_CONTROL__STENCILFAIL_MASK 0xf
#define DB_STENCIL_CONTROL__STENCILFAIL__SHIFT 0x0
#define DB_STENCIL_CONTROL__STENCILZPASS_MASK 0xf0
#define DB_STENCIL_CONTROL__STENCILZPASS__SHIFT 0x4
#define DB_STENCIL_CONTROL__STENCILZFAIL_MASK 0xf00
#define DB_STENCIL_CONTROL__STENCILZFAIL__SHIFT 0x8
#define DB_STENCIL_CONTROL__STENCILFAIL_BF_MASK 0xf000
#define DB_STENCIL_CONTROL__STENCILFAIL_BF__SHIFT 0xc
#define DB_STENCIL_CONTROL__STENCILZPASS_BF_MASK 0xf0000
#define DB_STENCIL_CONTROL__STENCILZPASS_BF__SHIFT 0x10
#define DB_STENCIL_CONTROL__STENCILZFAIL_BF_MASK 0xf00000
#define DB_STENCIL_CONTROL__STENCILZFAIL_BF__SHIFT 0x14
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_ENABLE_MASK 0x1
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_ENABLE__SHIFT 0x0
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET0_MASK 0x300
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET0__SHIFT 0x8
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET1_MASK 0xc00
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET1__SHIFT 0xa
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET2_MASK 0x3000
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET2__SHIFT 0xc
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET3_MASK 0xc000
#define DB_ALPHA_TO_MASK__ALPHA_TO_MASK_OFFSET3__SHIFT 0xe
#define DB_ALPHA_TO_MASK__OFFSET_ROUND_MASK 0x10000
#define DB_ALPHA_TO_MASK__OFFSET_ROUND__SHIFT 0x10
#define DB_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define DB_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define DB_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define DB_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define DB_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define DB_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define DB_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define DB_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define DB_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define DB_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define DB_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define DB_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define DB_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0xffc00
#define DB_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define DB_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define DB_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define DB_PERFCOUNTER1_SELECT__PERF_MODE1_MASK 0xf000000
#define DB_PERFCOUNTER1_SELECT__PERF_MODE1__SHIFT 0x18
#define DB_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define DB_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define DB_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define DB_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define DB_PERFCOUNTER2_SELECT__PERF_SEL1_MASK 0xffc00
#define DB_PERFCOUNTER2_SELECT__PERF_SEL1__SHIFT 0xa
#define DB_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define DB_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define DB_PERFCOUNTER2_SELECT__PERF_MODE1_MASK 0xf000000
#define DB_PERFCOUNTER2_SELECT__PERF_MODE1__SHIFT 0x18
#define DB_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define DB_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define DB_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define DB_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define DB_PERFCOUNTER3_SELECT__PERF_SEL1_MASK 0xffc00
#define DB_PERFCOUNTER3_SELECT__PERF_SEL1__SHIFT 0xa
#define DB_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define DB_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define DB_PERFCOUNTER3_SELECT__PERF_MODE1_MASK 0xf000000
#define DB_PERFCOUNTER3_SELECT__PERF_MODE1__SHIFT 0x18
#define DB_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define DB_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define DB_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define DB_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define DB_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define DB_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define DB_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf000000
#define DB_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x18
#define DB_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf0000000
#define DB_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x1c
#define DB_PERFCOUNTER1_SELECT1__PERF_SEL2_MASK 0x3ff
#define DB_PERFCOUNTER1_SELECT1__PERF_SEL2__SHIFT 0x0
#define DB_PERFCOUNTER1_SELECT1__PERF_SEL3_MASK 0xffc00
#define DB_PERFCOUNTER1_SELECT1__PERF_SEL3__SHIFT 0xa
#define DB_PERFCOUNTER1_SELECT1__PERF_MODE3_MASK 0xf000000
#define DB_PERFCOUNTER1_SELECT1__PERF_MODE3__SHIFT 0x18
#define DB_PERFCOUNTER1_SELECT1__PERF_MODE2_MASK 0xf0000000
#define DB_PERFCOUNTER1_SELECT1__PERF_MODE2__SHIFT 0x1c
#define DB_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define DB_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define DB_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define DB_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define DB_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define DB_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define DB_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define DB_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define DB_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define DB_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define DB_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define DB_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define DB_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define DB_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define DB_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define DB_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define DB_DEBUG__DEBUG_STENCIL_COMPRESS_DISABLE_MASK 0x1
#define DB_DEBUG__DEBUG_STENCIL_COMPRESS_DISABLE__SHIFT 0x0
#define DB_DEBUG__DEBUG_DEPTH_COMPRESS_DISABLE_MASK 0x2
#define DB_DEBUG__DEBUG_DEPTH_COMPRESS_DISABLE__SHIFT 0x1
#define DB_DEBUG__FETCH_FULL_Z_TILE_MASK 0x4
#define DB_DEBUG__FETCH_FULL_Z_TILE__SHIFT 0x2
#define DB_DEBUG__FETCH_FULL_STENCIL_TILE_MASK 0x8
#define DB_DEBUG__FETCH_FULL_STENCIL_TILE__SHIFT 0x3
#define DB_DEBUG__FORCE_Z_MODE_MASK 0x30
#define DB_DEBUG__FORCE_Z_MODE__SHIFT 0x4
#define DB_DEBUG__DEBUG_FORCE_DEPTH_READ_MASK 0x40
#define DB_DEBUG__DEBUG_FORCE_DEPTH_READ__SHIFT 0x6
#define DB_DEBUG__DEBUG_FORCE_STENCIL_READ_MASK 0x80
#define DB_DEBUG__DEBUG_FORCE_STENCIL_READ__SHIFT 0x7
#define DB_DEBUG__DEBUG_FORCE_HIZ_ENABLE_MASK 0x300
#define DB_DEBUG__DEBUG_FORCE_HIZ_ENABLE__SHIFT 0x8
#define DB_DEBUG__DEBUG_FORCE_HIS_ENABLE0_MASK 0xc00
#define DB_DEBUG__DEBUG_FORCE_HIS_ENABLE0__SHIFT 0xa
#define DB_DEBUG__DEBUG_FORCE_HIS_ENABLE1_MASK 0x3000
#define DB_DEBUG__DEBUG_FORCE_HIS_ENABLE1__SHIFT 0xc
#define DB_DEBUG__DEBUG_FAST_Z_DISABLE_MASK 0x4000
#define DB_DEBUG__DEBUG_FAST_Z_DISABLE__SHIFT 0xe
#define DB_DEBUG__DEBUG_FAST_STENCIL_DISABLE_MASK 0x8000
#define DB_DEBUG__DEBUG_FAST_STENCIL_DISABLE__SHIFT 0xf
#define DB_DEBUG__DEBUG_NOOP_CULL_DISABLE_MASK 0x10000
#define DB_DEBUG__DEBUG_NOOP_CULL_DISABLE__SHIFT 0x10
#define DB_DEBUG__DISABLE_SUMM_SQUADS_MASK 0x20000
#define DB_DEBUG__DISABLE_SUMM_SQUADS__SHIFT 0x11
#define DB_DEBUG__DEPTH_CACHE_FORCE_MISS_MASK 0x40000
#define DB_DEBUG__DEPTH_CACHE_FORCE_MISS__SHIFT 0x12
#define DB_DEBUG__DEBUG_FORCE_FULL_Z_RANGE_MASK 0x180000
#define DB_DEBUG__DEBUG_FORCE_FULL_Z_RANGE__SHIFT 0x13
#define DB_DEBUG__NEVER_FREE_Z_ONLY_MASK 0x200000
#define DB_DEBUG__NEVER_FREE_Z_ONLY__SHIFT 0x15
#define DB_DEBUG__ZPASS_COUNTS_LOOK_AT_PIPE_STAT_EVENTS_MASK 0x400000
#define DB_DEBUG__ZPASS_COUNTS_LOOK_AT_PIPE_STAT_EVENTS__SHIFT 0x16
#define DB_DEBUG__DISABLE_VPORT_ZPLANE_OPTIMIZATION_MASK 0x800000
#define DB_DEBUG__DISABLE_VPORT_ZPLANE_OPTIMIZATION__SHIFT 0x17
#define DB_DEBUG__DECOMPRESS_AFTER_N_ZPLANES_MASK 0xf000000
#define DB_DEBUG__DECOMPRESS_AFTER_N_ZPLANES__SHIFT 0x18
#define DB_DEBUG__ONE_FREE_IN_FLIGHT_MASK 0x10000000
#define DB_DEBUG__ONE_FREE_IN_FLIGHT__SHIFT 0x1c
#define DB_DEBUG__FORCE_MISS_IF_NOT_INFLIGHT_MASK 0x20000000
#define DB_DEBUG__FORCE_MISS_IF_NOT_INFLIGHT__SHIFT 0x1d
#define DB_DEBUG__DISABLE_DEPTH_SURFACE_SYNC_MASK 0x40000000
#define DB_DEBUG__DISABLE_DEPTH_SURFACE_SYNC__SHIFT 0x1e
#define DB_DEBUG__DISABLE_HTILE_SURFACE_SYNC_MASK 0x80000000
#define DB_DEBUG__DISABLE_HTILE_SURFACE_SYNC__SHIFT 0x1f
#define DB_DEBUG2__ALLOW_COMPZ_BYTE_MASKING_MASK 0x1
#define DB_DEBUG2__ALLOW_COMPZ_BYTE_MASKING__SHIFT 0x0
#define DB_DEBUG2__DISABLE_TC_ZRANGE_L0_CACHE_MASK 0x2
#define DB_DEBUG2__DISABLE_TC_ZRANGE_L0_CACHE__SHIFT 0x1
#define DB_DEBUG2__DISABLE_TC_MASK_L0_CACHE_MASK 0x4
#define DB_DEBUG2__DISABLE_TC_MASK_L0_CACHE__SHIFT 0x2
#define DB_DEBUG2__DTR_ROUND_ROBIN_ARB_MASK 0x8
#define DB_DEBUG2__DTR_ROUND_ROBIN_ARB__SHIFT 0x3
#define DB_DEBUG2__DTR_PREZ_STALLS_FOR_ETF_ROOM_MASK 0x10
#define DB_DEBUG2__DTR_PREZ_STALLS_FOR_ETF_ROOM__SHIFT 0x4
#define DB_DEBUG2__DISABLE_PREZL_LPF_STALL_MASK 0x20
#define DB_DEBUG2__DISABLE_PREZL_LPF_STALL__SHIFT 0x5
#define DB_DEBUG2__ENABLE_PREZL_CB_STALL_MASK 0x40
#define DB_DEBUG2__ENABLE_PREZL_CB_STALL__SHIFT 0x6
#define DB_DEBUG2__DISABLE_PREZL_LPF_STALL_REZ_MASK 0x80
#define DB_DEBUG2__DISABLE_PREZL_LPF_STALL_REZ__SHIFT 0x7
#define DB_DEBUG2__DISABLE_PREZL_CB_STALL_REZ_MASK 0x100
#define DB_DEBUG2__DISABLE_PREZL_CB_STALL_REZ__SHIFT 0x8
#define DB_DEBUG2__CLK_OFF_DELAY_MASK 0x3e00
#define DB_DEBUG2__CLK_OFF_DELAY__SHIFT 0x9
#define DB_DEBUG2__DISABLE_TILE_COVERED_FOR_PS_ITER_MASK 0x4000
#define DB_DEBUG2__DISABLE_TILE_COVERED_FOR_PS_ITER__SHIFT 0xe
#define DB_DEBUG2__ENABLE_SUBTILE_GROUPING_MASK 0x8000
#define DB_DEBUG2__ENABLE_SUBTILE_GROUPING__SHIFT 0xf
#define DB_DEBUG2__DISABLE_HTILE_PAIRED_PIPES_MASK 0x10000
#define DB_DEBUG2__DISABLE_HTILE_PAIRED_PIPES__SHIFT 0x10
#define DB_DEBUG2__DISABLE_NULL_EOT_FORWARDING_MASK 0x20000
#define DB_DEBUG2__DISABLE_NULL_EOT_FORWARDING__SHIFT 0x11
#define DB_DEBUG2__DISABLE_DTT_DATA_FORWARDING_MASK 0x40000
#define DB_DEBUG2__DISABLE_DTT_DATA_FORWARDING__SHIFT 0x12
#define DB_DEBUG2__DISABLE_QUAD_COHERENCY_STALL_MASK 0x80000
#define DB_DEBUG2__DISABLE_QUAD_COHERENCY_STALL__SHIFT 0x13
#define DB_DEBUG2__ENABLE_PREZ_OF_REZ_SUMM_MASK 0x10000000
#define DB_DEBUG2__ENABLE_PREZ_OF_REZ_SUMM__SHIFT 0x1c
#define DB_DEBUG2__DISABLE_PREZL_VIEWPORT_STALL_MASK 0x20000000
#define DB_DEBUG2__DISABLE_PREZL_VIEWPORT_STALL__SHIFT 0x1d
#define DB_DEBUG2__DISABLE_SINGLE_STENCIL_QUAD_SUMM_MASK 0x40000000
#define DB_DEBUG2__DISABLE_SINGLE_STENCIL_QUAD_SUMM__SHIFT 0x1e
#define DB_DEBUG2__DISABLE_WRITE_STALL_ON_RDWR_CONFLICT_MASK 0x80000000
#define DB_DEBUG2__DISABLE_WRITE_STALL_ON_RDWR_CONFLICT__SHIFT 0x1f
#define DB_DEBUG3__FORCE_DB_IS_GOOD_MASK 0x4
#define DB_DEBUG3__FORCE_DB_IS_GOOD__SHIFT 0x2
#define DB_DEBUG3__DISABLE_TL_SSO_NULL_SUPPRESSION_MASK 0x8
#define DB_DEBUG3__DISABLE_TL_SSO_NULL_SUPPRESSION__SHIFT 0x3
#define DB_DEBUG3__DISABLE_HIZ_ON_VPORT_CLAMP_MASK 0x10
#define DB_DEBUG3__DISABLE_HIZ_ON_VPORT_CLAMP__SHIFT 0x4
#define DB_DEBUG3__EQAA_INTERPOLATE_COMP_Z_MASK 0x20
#define DB_DEBUG3__EQAA_INTERPOLATE_COMP_Z__SHIFT 0x5
#define DB_DEBUG3__EQAA_INTERPOLATE_SRC_Z_MASK 0x40
#define DB_DEBUG3__EQAA_INTERPOLATE_SRC_Z__SHIFT 0x6
#define DB_DEBUG3__DISABLE_TCP_CAM_BYPASS_MASK 0x80
#define DB_DEBUG3__DISABLE_TCP_CAM_BYPASS__SHIFT 0x7
#define DB_DEBUG3__DISABLE_ZCMP_DIRTY_SUPPRESSION_MASK 0x100
#define DB_DEBUG3__DISABLE_ZCMP_DIRTY_SUPPRESSION__SHIFT 0x8
#define DB_DEBUG3__DISABLE_REDUNDANT_PLANE_FLUSHES_OPT_MASK 0x200
#define DB_DEBUG3__DISABLE_REDUNDANT_PLANE_FLUSHES_OPT__SHIFT 0x9
#define DB_DEBUG3__DISABLE_RECOMP_TO_1ZPLANE_WITHOUT_FASTOP_MASK 0x400
#define DB_DEBUG3__DISABLE_RECOMP_TO_1ZPLANE_WITHOUT_FASTOP__SHIFT 0xa
#define DB_DEBUG3__ENABLE_INCOHERENT_EQAA_READS_MASK 0x800
#define DB_DEBUG3__ENABLE_INCOHERENT_EQAA_READS__SHIFT 0xb
#define DB_DEBUG3__DISABLE_OP_Z_DATA_FORWARDING_MASK 0x1000
#define DB_DEBUG3__DISABLE_OP_Z_DATA_FORWARDING__SHIFT 0xc
#define DB_DEBUG3__DISABLE_OP_DF_BYPASS_MASK 0x2000
#define DB_DEBUG3__DISABLE_OP_DF_BYPASS__SHIFT 0xd
#define DB_DEBUG3__DISABLE_OP_DF_WRITE_COMBINE_MASK 0x4000
#define DB_DEBUG3__DISABLE_OP_DF_WRITE_COMBINE__SHIFT 0xe
#define DB_DEBUG3__DISABLE_OP_DF_DIRECT_FEEDBACK_MASK 0x8000
#define DB_DEBUG3__DISABLE_OP_DF_DIRECT_FEEDBACK__SHIFT 0xf
#define DB_DEBUG3__ALLOW_RF2P_RW_COLLISION_MASK 0x10000
#define DB_DEBUG3__ALLOW_RF2P_RW_COLLISION__SHIFT 0x10
#define DB_DEBUG3__SLOW_PREZ_TO_A2M_OMASK_RATE_MASK 0x20000
#define DB_DEBUG3__SLOW_PREZ_TO_A2M_OMASK_RATE__SHIFT 0x11
#define DB_DEBUG3__DISABLE_OP_S_DATA_FORWARDING_MASK 0x40000
#define DB_DEBUG3__DISABLE_OP_S_DATA_FORWARDING__SHIFT 0x12
#define DB_DEBUG3__DISABLE_TC_UPDATE_WRITE_COMBINE_MASK 0x80000
#define DB_DEBUG3__DISABLE_TC_UPDATE_WRITE_COMBINE__SHIFT 0x13
#define DB_DEBUG3__DISABLE_HZ_TC_WRITE_COMBINE_MASK 0x100000
#define DB_DEBUG3__DISABLE_HZ_TC_WRITE_COMBINE__SHIFT 0x14
#define DB_DEBUG3__ENABLE_RECOMP_ZDIRTY_SUPPRESSION_OPT_MASK 0x200000
#define DB_DEBUG3__ENABLE_RECOMP_ZDIRTY_SUPPRESSION_OPT__SHIFT 0x15
#define DB_DEBUG3__ENABLE_TC_MA_ROUND_ROBIN_ARB_MASK 0x400000
#define DB_DEBUG3__ENABLE_TC_MA_ROUND_ROBIN_ARB__SHIFT 0x16
#define DB_DEBUG3__DISABLE_RAM_READ_SUPPRESION_ON_FWD_MASK 0x800000
#define DB_DEBUG3__DISABLE_RAM_READ_SUPPRESION_ON_FWD__SHIFT 0x17
#define DB_DEBUG3__DISABLE_EQAA_A2M_PERF_OPT_MASK 0x1000000
#define DB_DEBUG3__DISABLE_EQAA_A2M_PERF_OPT__SHIFT 0x18
#define DB_DEBUG3__DISABLE_DI_DT_STALL_MASK 0x2000000
#define DB_DEBUG3__DISABLE_DI_DT_STALL__SHIFT 0x19
#define DB_DEBUG3__ENABLE_DB_PROCESS_RESET_MASK 0x4000000
#define DB_DEBUG3__ENABLE_DB_PROCESS_RESET__SHIFT 0x1a
#define DB_DEBUG3__DISABLE_OVERRASTERIZATION_FIX_MASK 0x8000000
#define DB_DEBUG3__DISABLE_OVERRASTERIZATION_FIX__SHIFT 0x1b
#define DB_DEBUG3__DONT_INSERT_CONTEXT_SUSPEND_MASK 0x10000000
#define DB_DEBUG3__DONT_INSERT_CONTEXT_SUSPEND__SHIFT 0x1c
#define DB_DEBUG3__DONT_DELETE_CONTEXT_SUSPEND_MASK 0x20000000
#define DB_DEBUG3__DONT_DELETE_CONTEXT_SUSPEND__SHIFT 0x1d
#define DB_DEBUG3__DB_EXTRA_DEBUG3_MASK 0xc0000000
#define DB_DEBUG3__DB_EXTRA_DEBUG3__SHIFT 0x1e
#define DB_DEBUG4__DISABLE_QC_Z_MASK_SUMMATION_MASK 0x1
#define DB_DEBUG4__DISABLE_QC_Z_MASK_SUMMATION__SHIFT 0x0
#define DB_DEBUG4__DISABLE_QC_STENCIL_MASK_SUMMATION_MASK 0x2
#define DB_DEBUG4__DISABLE_QC_STENCIL_MASK_SUMMATION__SHIFT 0x1
#define DB_DEBUG4__DISABLE_RESUMM_TO_SINGLE_STENCIL_MASK 0x4
#define DB_DEBUG4__DISABLE_RESUMM_TO_SINGLE_STENCIL__SHIFT 0x2
#define DB_DEBUG4__DISABLE_PREZ_POSTZ_DTILE_CONFLICT_STALL_MASK 0x8
#define DB_DEBUG4__DISABLE_PREZ_POSTZ_DTILE_CONFLICT_STALL__SHIFT 0x3
#define DB_DEBUG4__DB_EXTRA_DEBUG4_MASK 0xfffffff0
#define DB_DEBUG4__DB_EXTRA_DEBUG4__SHIFT 0x4
#define DB_CREDIT_LIMIT__DB_SC_TILE_CREDITS_MASK 0x1f
#define DB_CREDIT_LIMIT__DB_SC_TILE_CREDITS__SHIFT 0x0
#define DB_CREDIT_LIMIT__DB_SC_QUAD_CREDITS_MASK 0x3e0
#define DB_CREDIT_LIMIT__DB_SC_QUAD_CREDITS__SHIFT 0x5
#define DB_CREDIT_LIMIT__DB_CB_LQUAD_CREDITS_MASK 0x1c00
#define DB_CREDIT_LIMIT__DB_CB_LQUAD_CREDITS__SHIFT 0xa
#define DB_CREDIT_LIMIT__DB_CB_TILE_CREDITS_MASK 0x7f000000
#define DB_CREDIT_LIMIT__DB_CB_TILE_CREDITS__SHIFT 0x18
#define DB_WATERMARKS__DEPTH_FREE_MASK 0x1f
#define DB_WATERMARKS__DEPTH_FREE__SHIFT 0x0
#define DB_WATERMARKS__DEPTH_FLUSH_MASK 0x7e0
#define DB_WATERMARKS__DEPTH_FLUSH__SHIFT 0x5
#define DB_WATERMARKS__FORCE_SUMMARIZE_MASK 0x7800
#define DB_WATERMARKS__FORCE_SUMMARIZE__SHIFT 0xb
#define DB_WATERMARKS__DEPTH_PENDING_FREE_MASK 0xf8000
#define DB_WATERMARKS__DEPTH_PENDING_FREE__SHIFT 0xf
#define DB_WATERMARKS__DEPTH_CACHELINE_FREE_MASK 0x7f00000
#define DB_WATERMARKS__DEPTH_CACHELINE_FREE__SHIFT 0x14
#define DB_WATERMARKS__EARLY_Z_PANIC_DISABLE_MASK 0x8000000
#define DB_WATERMARKS__EARLY_Z_PANIC_DISABLE__SHIFT 0x1b
#define DB_WATERMARKS__LATE_Z_PANIC_DISABLE_MASK 0x10000000
#define DB_WATERMARKS__LATE_Z_PANIC_DISABLE__SHIFT 0x1c
#define DB_WATERMARKS__RE_Z_PANIC_DISABLE_MASK 0x20000000
#define DB_WATERMARKS__RE_Z_PANIC_DISABLE__SHIFT 0x1d
#define DB_WATERMARKS__AUTO_FLUSH_HTILE_MASK 0x40000000
#define DB_WATERMARKS__AUTO_FLUSH_HTILE__SHIFT 0x1e
#define DB_WATERMARKS__AUTO_FLUSH_QUAD_MASK 0x80000000
#define DB_WATERMARKS__AUTO_FLUSH_QUAD__SHIFT 0x1f
#define DB_SUBTILE_CONTROL__MSAA1_X_MASK 0x3
#define DB_SUBTILE_CONTROL__MSAA1_X__SHIFT 0x0
#define DB_SUBTILE_CONTROL__MSAA1_Y_MASK 0xc
#define DB_SUBTILE_CONTROL__MSAA1_Y__SHIFT 0x2
#define DB_SUBTILE_CONTROL__MSAA2_X_MASK 0x30
#define DB_SUBTILE_CONTROL__MSAA2_X__SHIFT 0x4
#define DB_SUBTILE_CONTROL__MSAA2_Y_MASK 0xc0
#define DB_SUBTILE_CONTROL__MSAA2_Y__SHIFT 0x6
#define DB_SUBTILE_CONTROL__MSAA4_X_MASK 0x300
#define DB_SUBTILE_CONTROL__MSAA4_X__SHIFT 0x8
#define DB_SUBTILE_CONTROL__MSAA4_Y_MASK 0xc00
#define DB_SUBTILE_CONTROL__MSAA4_Y__SHIFT 0xa
#define DB_SUBTILE_CONTROL__MSAA8_X_MASK 0x3000
#define DB_SUBTILE_CONTROL__MSAA8_X__SHIFT 0xc
#define DB_SUBTILE_CONTROL__MSAA8_Y_MASK 0xc000
#define DB_SUBTILE_CONTROL__MSAA8_Y__SHIFT 0xe
#define DB_SUBTILE_CONTROL__MSAA16_X_MASK 0x30000
#define DB_SUBTILE_CONTROL__MSAA16_X__SHIFT 0x10
#define DB_SUBTILE_CONTROL__MSAA16_Y_MASK 0xc0000
#define DB_SUBTILE_CONTROL__MSAA16_Y__SHIFT 0x12
#define DB_FREE_CACHELINES__FREE_DTILE_DEPTH_MASK 0x7f
#define DB_FREE_CACHELINES__FREE_DTILE_DEPTH__SHIFT 0x0
#define DB_FREE_CACHELINES__FREE_PLANE_DEPTH_MASK 0x3f80
#define DB_FREE_CACHELINES__FREE_PLANE_DEPTH__SHIFT 0x7
#define DB_FREE_CACHELINES__FREE_Z_DEPTH_MASK 0x1fc000
#define DB_FREE_CACHELINES__FREE_Z_DEPTH__SHIFT 0xe
#define DB_FREE_CACHELINES__FREE_HTILE_DEPTH_MASK 0x1e00000
#define DB_FREE_CACHELINES__FREE_HTILE_DEPTH__SHIFT 0x15
#define DB_FREE_CACHELINES__QUAD_READ_REQS_MASK 0xfe000000
#define DB_FREE_CACHELINES__QUAD_READ_REQS__SHIFT 0x19
#define DB_FIFO_DEPTH1__MI_RDREQ_FIFO_DEPTH_MASK 0x1f
#define DB_FIFO_DEPTH1__MI_RDREQ_FIFO_DEPTH__SHIFT 0x0
#define DB_FIFO_DEPTH1__MI_WRREQ_FIFO_DEPTH_MASK 0x3e0
#define DB_FIFO_DEPTH1__MI_WRREQ_FIFO_DEPTH__SHIFT 0x5
#define DB_FIFO_DEPTH1__MCC_DEPTH_MASK 0xfc00
#define DB_FIFO_DEPTH1__MCC_DEPTH__SHIFT 0xa
#define DB_FIFO_DEPTH1__QC_DEPTH_MASK 0x1f0000
#define DB_FIFO_DEPTH1__QC_DEPTH__SHIFT 0x10
#define DB_FIFO_DEPTH1__LTILE_PROBE_FIFO_DEPTH_MASK 0x1fe00000
#define DB_FIFO_DEPTH1__LTILE_PROBE_FIFO_DEPTH__SHIFT 0x15
#define DB_FIFO_DEPTH2__EQUAD_FIFO_DEPTH_MASK 0xff
#define DB_FIFO_DEPTH2__EQUAD_FIFO_DEPTH__SHIFT 0x0
#define DB_FIFO_DEPTH2__ETILE_OP_FIFO_DEPTH_MASK 0x7f00
#define DB_FIFO_DEPTH2__ETILE_OP_FIFO_DEPTH__SHIFT 0x8
#define DB_FIFO_DEPTH2__LQUAD_FIFO_DEPTH_MASK 0x1ff8000
#define DB_FIFO_DEPTH2__LQUAD_FIFO_DEPTH__SHIFT 0xf
#define DB_FIFO_DEPTH2__LTILE_OP_FIFO_DEPTH_MASK 0xfe000000
#define DB_FIFO_DEPTH2__LTILE_OP_FIFO_DEPTH__SHIFT 0x19
#define DB_CGTT_CLK_CTRL_0__ON_DELAY_MASK 0xf
#define DB_CGTT_CLK_CTRL_0__ON_DELAY__SHIFT 0x0
#define DB_CGTT_CLK_CTRL_0__OFF_HYSTERESIS_MASK 0xff0
#define DB_CGTT_CLK_CTRL_0__OFF_HYSTERESIS__SHIFT 0x4
#define DB_CGTT_CLK_CTRL_0__RESERVED_MASK 0xfff000
#define DB_CGTT_CLK_CTRL_0__RESERVED__SHIFT 0xc
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE7_MASK 0x1000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE7__SHIFT 0x18
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE6_MASK 0x2000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE6__SHIFT 0x19
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE5_MASK 0x4000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE5__SHIFT 0x1a
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE4_MASK 0x8000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE4__SHIFT 0x1b
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE3_MASK 0x10000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE3__SHIFT 0x1c
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE2_MASK 0x20000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE2__SHIFT 0x1d
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE1_MASK 0x40000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE1__SHIFT 0x1e
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE0_MASK 0x80000000
#define DB_CGTT_CLK_CTRL_0__SOFT_OVERRIDE0__SHIFT 0x1f
#define DB_ZPASS_COUNT_LOW__COUNT_LOW_MASK 0xffffffff
#define DB_ZPASS_COUNT_LOW__COUNT_LOW__SHIFT 0x0
#define DB_ZPASS_COUNT_HI__COUNT_HI_MASK 0x7fffffff
#define DB_ZPASS_COUNT_HI__COUNT_HI__SHIFT 0x0
#define DB_RING_CONTROL__COUNTER_CONTROL_MASK 0x3
#define DB_RING_CONTROL__COUNTER_CONTROL__SHIFT 0x0
#define DB_READ_DEBUG_0__BUSY_DATA0_MASK 0xffffffff
#define DB_READ_DEBUG_0__BUSY_DATA0__SHIFT 0x0
#define DB_READ_DEBUG_1__BUSY_DATA1_MASK 0xffffffff
#define DB_READ_DEBUG_1__BUSY_DATA1__SHIFT 0x0
#define DB_READ_DEBUG_2__BUSY_DATA2_MASK 0xffffffff
#define DB_READ_DEBUG_2__BUSY_DATA2__SHIFT 0x0
#define DB_READ_DEBUG_3__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_3__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_4__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_4__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_5__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_5__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_6__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_6__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_7__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_7__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_8__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_8__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_9__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_9__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_A__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_A__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_B__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_B__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_C__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_C__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_D__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_D__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_E__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_E__DEBUG_DATA__SHIFT 0x0
#define DB_READ_DEBUG_F__DEBUG_DATA_MASK 0xffffffff
#define DB_READ_DEBUG_F__DEBUG_DATA__SHIFT 0x0
#define DB_OCCLUSION_COUNT0_LOW__COUNT_LOW_MASK 0xffffffff
#define DB_OCCLUSION_COUNT0_LOW__COUNT_LOW__SHIFT 0x0
#define DB_OCCLUSION_COUNT0_HI__COUNT_HI_MASK 0x7fffffff
#define DB_OCCLUSION_COUNT0_HI__COUNT_HI__SHIFT 0x0
#define DB_OCCLUSION_COUNT1_LOW__COUNT_LOW_MASK 0xffffffff
#define DB_OCCLUSION_COUNT1_LOW__COUNT_LOW__SHIFT 0x0
#define DB_OCCLUSION_COUNT1_HI__COUNT_HI_MASK 0x7fffffff
#define DB_OCCLUSION_COUNT1_HI__COUNT_HI__SHIFT 0x0
#define DB_OCCLUSION_COUNT2_LOW__COUNT_LOW_MASK 0xffffffff
#define DB_OCCLUSION_COUNT2_LOW__COUNT_LOW__SHIFT 0x0
#define DB_OCCLUSION_COUNT2_HI__COUNT_HI_MASK 0x7fffffff
#define DB_OCCLUSION_COUNT2_HI__COUNT_HI__SHIFT 0x0
#define DB_OCCLUSION_COUNT3_LOW__COUNT_LOW_MASK 0xffffffff
#define DB_OCCLUSION_COUNT3_LOW__COUNT_LOW__SHIFT 0x0
#define DB_OCCLUSION_COUNT3_HI__COUNT_HI_MASK 0x7fffffff
#define DB_OCCLUSION_COUNT3_HI__COUNT_HI__SHIFT 0x0
#define CC_RB_REDUNDANCY__FAILED_RB0_MASK 0xf00
#define CC_RB_REDUNDANCY__FAILED_RB0__SHIFT 0x8
#define CC_RB_REDUNDANCY__EN_REDUNDANCY0_MASK 0x1000
#define CC_RB_REDUNDANCY__EN_REDUNDANCY0__SHIFT 0xc
#define CC_RB_REDUNDANCY__FAILED_RB1_MASK 0xf0000
#define CC_RB_REDUNDANCY__FAILED_RB1__SHIFT 0x10
#define CC_RB_REDUNDANCY__EN_REDUNDANCY1_MASK 0x100000
#define CC_RB_REDUNDANCY__EN_REDUNDANCY1__SHIFT 0x14
#define CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK 0xff0000
#define CC_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT 0x10
#define GC_USER_RB_REDUNDANCY__FAILED_RB0_MASK 0xf00
#define GC_USER_RB_REDUNDANCY__FAILED_RB0__SHIFT 0x8
#define GC_USER_RB_REDUNDANCY__EN_REDUNDANCY0_MASK 0x1000
#define GC_USER_RB_REDUNDANCY__EN_REDUNDANCY0__SHIFT 0xc
#define GC_USER_RB_REDUNDANCY__FAILED_RB1_MASK 0xf0000
#define GC_USER_RB_REDUNDANCY__FAILED_RB1__SHIFT 0x10
#define GC_USER_RB_REDUNDANCY__EN_REDUNDANCY1_MASK 0x100000
#define GC_USER_RB_REDUNDANCY__EN_REDUNDANCY1__SHIFT 0x14
#define GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK 0xff0000
#define GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT 0x10
#define GB_ADDR_CONFIG__NUM_PIPES_MASK 0x7
#define GB_ADDR_CONFIG__NUM_PIPES__SHIFT 0x0
#define GB_ADDR_CONFIG__PIPE_INTERLEAVE_SIZE_MASK 0x70
#define GB_ADDR_CONFIG__PIPE_INTERLEAVE_SIZE__SHIFT 0x4
#define GB_ADDR_CONFIG__BANK_INTERLEAVE_SIZE_MASK 0x700
#define GB_ADDR_CONFIG__BANK_INTERLEAVE_SIZE__SHIFT 0x8
#define GB_ADDR_CONFIG__NUM_SHADER_ENGINES_MASK 0x3000
#define GB_ADDR_CONFIG__NUM_SHADER_ENGINES__SHIFT 0xc
#define GB_ADDR_CONFIG__SHADER_ENGINE_TILE_SIZE_MASK 0x70000
#define GB_ADDR_CONFIG__SHADER_ENGINE_TILE_SIZE__SHIFT 0x10
#define GB_ADDR_CONFIG__NUM_GPUS_MASK 0x700000
#define GB_ADDR_CONFIG__NUM_GPUS__SHIFT 0x14
#define GB_ADDR_CONFIG__MULTI_GPU_TILE_SIZE_MASK 0x3000000
#define GB_ADDR_CONFIG__MULTI_GPU_TILE_SIZE__SHIFT 0x18
#define GB_ADDR_CONFIG__ROW_SIZE_MASK 0x30000000
#define GB_ADDR_CONFIG__ROW_SIZE__SHIFT 0x1c
#define GB_ADDR_CONFIG__NUM_LOWER_PIPES_MASK 0x40000000
#define GB_ADDR_CONFIG__NUM_LOWER_PIPES__SHIFT 0x1e
#define GB_BACKEND_MAP__BACKEND_MAP_MASK 0xffffffff
#define GB_BACKEND_MAP__BACKEND_MAP__SHIFT 0x0
#define GB_GPU_ID__GPU_ID_MASK 0xf
#define GB_GPU_ID__GPU_ID__SHIFT 0x0
#define CC_RB_DAISY_CHAIN__RB_0_MASK 0xf
#define CC_RB_DAISY_CHAIN__RB_0__SHIFT 0x0
#define CC_RB_DAISY_CHAIN__RB_1_MASK 0xf0
#define CC_RB_DAISY_CHAIN__RB_1__SHIFT 0x4
#define CC_RB_DAISY_CHAIN__RB_2_MASK 0xf00
#define CC_RB_DAISY_CHAIN__RB_2__SHIFT 0x8
#define CC_RB_DAISY_CHAIN__RB_3_MASK 0xf000
#define CC_RB_DAISY_CHAIN__RB_3__SHIFT 0xc
#define CC_RB_DAISY_CHAIN__RB_4_MASK 0xf0000
#define CC_RB_DAISY_CHAIN__RB_4__SHIFT 0x10
#define CC_RB_DAISY_CHAIN__RB_5_MASK 0xf00000
#define CC_RB_DAISY_CHAIN__RB_5__SHIFT 0x14
#define CC_RB_DAISY_CHAIN__RB_6_MASK 0xf000000
#define CC_RB_DAISY_CHAIN__RB_6__SHIFT 0x18
#define CC_RB_DAISY_CHAIN__RB_7_MASK 0xf0000000
#define CC_RB_DAISY_CHAIN__RB_7__SHIFT 0x1c
#define GB_TILE_MODE0__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE0__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE0__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE0__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE0__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE0__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE0__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE0__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE1__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE1__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE1__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE1__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE1__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE1__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE1__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE1__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE1__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE1__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE2__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE2__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE2__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE2__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE2__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE2__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE2__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE2__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE2__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE2__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE3__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE3__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE3__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE3__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE3__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE3__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE3__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE3__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE3__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE3__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE4__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE4__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE4__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE4__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE4__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE4__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE4__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE4__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE4__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE4__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE5__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE5__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE5__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE5__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE5__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE5__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE5__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE5__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE5__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE5__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE6__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE6__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE6__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE6__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE6__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE6__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE6__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE6__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE6__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE6__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE7__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE7__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE7__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE7__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE7__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE7__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE7__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE7__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE7__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE7__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE8__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE8__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE8__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE8__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE8__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE8__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE8__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE8__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE8__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE8__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE9__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE9__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE9__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE9__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE9__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE9__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE9__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE9__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE9__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE9__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE10__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE10__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE10__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE10__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE10__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE10__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE10__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE10__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE10__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE10__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE11__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE11__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE11__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE11__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE11__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE11__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE11__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE11__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE11__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE11__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE12__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE12__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE12__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE12__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE12__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE12__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE12__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE12__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE12__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE12__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE13__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE13__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE13__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE13__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE13__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE13__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE13__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE13__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE13__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE13__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE14__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE14__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE14__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE14__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE14__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE14__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE14__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE14__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE14__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE14__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE15__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE15__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE15__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE15__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE15__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE15__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE15__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE15__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE15__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE15__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE16__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE16__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE16__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE16__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE16__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE16__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE16__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE16__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE16__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE16__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE17__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE17__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE17__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE17__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE17__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE17__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE17__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE17__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE17__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE17__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE18__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE18__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE18__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE18__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE18__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE18__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE18__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE18__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE18__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE18__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE19__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE19__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE19__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE19__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE19__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE19__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE19__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE19__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE19__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE19__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE20__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE20__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE20__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE20__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE20__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE20__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE20__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE20__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE20__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE20__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE21__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE21__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE21__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE21__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE21__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE21__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE21__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE21__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE21__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE21__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE22__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE22__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE22__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE22__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE22__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE22__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE22__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE22__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE22__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE22__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE23__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE23__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE23__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE23__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE23__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE23__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE23__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE23__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE23__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE23__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE24__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE24__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE24__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE24__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE24__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE24__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE24__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE24__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE24__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE24__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE25__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE25__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE25__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE25__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE25__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE25__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE25__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE25__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE25__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE25__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE26__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE26__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE26__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE26__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE26__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE26__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE26__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE26__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE26__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE26__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE27__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE27__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE27__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE27__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE27__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE27__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE27__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE27__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE27__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE27__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE28__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE28__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE28__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE28__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE28__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE28__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE28__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE28__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE28__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE28__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE29__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE29__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE29__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE29__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE29__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE29__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE29__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE29__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE29__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE29__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE30__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE30__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE30__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE30__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE30__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE30__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE30__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE30__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE30__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE30__SAMPLE_SPLIT__SHIFT 0x19
#define GB_TILE_MODE31__ARRAY_MODE_MASK 0x3c
#define GB_TILE_MODE31__ARRAY_MODE__SHIFT 0x2
#define GB_TILE_MODE31__PIPE_CONFIG_MASK 0x7c0
#define GB_TILE_MODE31__PIPE_CONFIG__SHIFT 0x6
#define GB_TILE_MODE31__TILE_SPLIT_MASK 0x3800
#define GB_TILE_MODE31__TILE_SPLIT__SHIFT 0xb
#define GB_TILE_MODE31__MICRO_TILE_MODE_NEW_MASK 0x1c00000
#define GB_TILE_MODE31__MICRO_TILE_MODE_NEW__SHIFT 0x16
#define GB_TILE_MODE31__SAMPLE_SPLIT_MASK 0x6000000
#define GB_TILE_MODE31__SAMPLE_SPLIT__SHIFT 0x19
#define GB_MACROTILE_MODE0__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE0__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE0__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE0__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE0__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE1__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE1__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE1__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE1__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE1__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE1__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE1__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE1__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE2__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE2__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE2__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE2__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE2__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE2__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE2__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE2__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE3__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE3__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE3__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE3__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE3__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE3__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE3__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE3__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE4__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE4__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE4__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE4__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE4__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE4__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE4__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE4__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE5__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE5__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE5__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE5__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE5__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE5__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE5__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE5__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE6__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE6__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE6__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE6__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE6__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE6__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE6__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE6__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE7__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE7__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE7__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE7__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE7__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE7__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE7__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE7__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE8__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE8__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE8__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE8__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE8__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE8__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE8__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE8__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE9__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE9__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE9__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE9__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE9__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE9__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE9__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE9__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE10__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE10__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE10__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE10__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE10__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE10__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE10__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE10__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE11__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE11__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE11__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE11__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE11__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE11__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE11__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE11__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE12__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE12__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE12__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE12__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE12__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE12__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE12__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE12__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE13__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE13__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE13__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE13__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE13__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE13__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE13__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE13__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE14__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE14__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE14__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE14__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE14__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE14__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE14__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE14__NUM_BANKS__SHIFT 0x6
#define GB_MACROTILE_MODE15__BANK_WIDTH_MASK 0x3
#define GB_MACROTILE_MODE15__BANK_WIDTH__SHIFT 0x0
#define GB_MACROTILE_MODE15__BANK_HEIGHT_MASK 0xc
#define GB_MACROTILE_MODE15__BANK_HEIGHT__SHIFT 0x2
#define GB_MACROTILE_MODE15__MACRO_TILE_ASPECT_MASK 0x30
#define GB_MACROTILE_MODE15__MACRO_TILE_ASPECT__SHIFT 0x4
#define GB_MACROTILE_MODE15__NUM_BANKS_MASK 0xc0
#define GB_MACROTILE_MODE15__NUM_BANKS__SHIFT 0x6
#define GB_EDC_MODE__FORCE_SEC_ON_DED_MASK 0x10000
#define GB_EDC_MODE__FORCE_SEC_ON_DED__SHIFT 0x10
#define GB_EDC_MODE__DED_MODE_MASK 0x300000
#define GB_EDC_MODE__DED_MODE__SHIFT 0x14
#define GB_EDC_MODE__PROP_FED_MASK 0x20000000
#define GB_EDC_MODE__PROP_FED__SHIFT 0x1d
#define GB_EDC_MODE__BYPASS_MASK 0x80000000
#define GB_EDC_MODE__BYPASS__SHIFT 0x1f
#define CC_GC_EDC_CONFIG__DIS_EDC_MASK 0x2
#define CC_GC_EDC_CONFIG__DIS_EDC__SHIFT 0x1
#define RAS_SIGNATURE_CONTROL__ENABLE_MASK 0x1
#define RAS_SIGNATURE_CONTROL__ENABLE__SHIFT 0x0
#define RAS_SIGNATURE_MASK__INPUT_BUS_MASK_MASK 0xffffffff
#define RAS_SIGNATURE_MASK__INPUT_BUS_MASK__SHIFT 0x0
#define RAS_SX_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_SX_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_SX_SIGNATURE1__SIGNATURE_MASK 0xffffffff
#define RAS_SX_SIGNATURE1__SIGNATURE__SHIFT 0x0
#define RAS_SX_SIGNATURE2__SIGNATURE_MASK 0xffffffff
#define RAS_SX_SIGNATURE2__SIGNATURE__SHIFT 0x0
#define RAS_SX_SIGNATURE3__SIGNATURE_MASK 0xffffffff
#define RAS_SX_SIGNATURE3__SIGNATURE__SHIFT 0x0
#define RAS_DB_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_DB_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_PA_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_PA_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_VGT_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_VGT_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_SQ_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_SQ_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE1__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE1__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE2__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE2__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE3__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE3__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE4__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE4__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE5__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE5__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE6__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE6__SIGNATURE__SHIFT 0x0
#define RAS_SC_SIGNATURE7__SIGNATURE_MASK 0xffffffff
#define RAS_SC_SIGNATURE7__SIGNATURE__SHIFT 0x0
#define RAS_IA_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_IA_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_IA_SIGNATURE1__SIGNATURE_MASK 0xffffffff
#define RAS_IA_SIGNATURE1__SIGNATURE__SHIFT 0x0
#define RAS_SPI_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_SPI_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_SPI_SIGNATURE1__SIGNATURE_MASK 0xffffffff
#define RAS_SPI_SIGNATURE1__SIGNATURE__SHIFT 0x0
#define RAS_TA_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_TA_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_TD_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_TD_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_CB_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_CB_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_BCI_SIGNATURE0__SIGNATURE_MASK 0xffffffff
#define RAS_BCI_SIGNATURE0__SIGNATURE__SHIFT 0x0
#define RAS_BCI_SIGNATURE1__SIGNATURE_MASK 0xffffffff
#define RAS_BCI_SIGNATURE1__SIGNATURE__SHIFT 0x0
#define GRBM_CAM_INDEX__CAM_INDEX_MASK 0x7
#define GRBM_CAM_INDEX__CAM_INDEX__SHIFT 0x0
#define GRBM_CAM_DATA__CAM_ADDR_MASK 0xffff
#define GRBM_CAM_DATA__CAM_ADDR__SHIFT 0x0
#define GRBM_CAM_DATA__CAM_REMAPADDR_MASK 0xffff0000
#define GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT 0x10
#define GRBM_CNTL__READ_TIMEOUT_MASK 0xff
#define GRBM_CNTL__READ_TIMEOUT__SHIFT 0x0
#define GRBM_SKEW_CNTL__SKEW_TOP_THRESHOLD_MASK 0x3f
#define GRBM_SKEW_CNTL__SKEW_TOP_THRESHOLD__SHIFT 0x0
#define GRBM_SKEW_CNTL__SKEW_COUNT_MASK 0xfc0
#define GRBM_SKEW_CNTL__SKEW_COUNT__SHIFT 0x6
#define GRBM_PWR_CNTL__REQ_TYPE_MASK 0xf
#define GRBM_PWR_CNTL__REQ_TYPE__SHIFT 0x0
#define GRBM_PWR_CNTL__RSP_TYPE_MASK 0xf0
#define GRBM_PWR_CNTL__RSP_TYPE__SHIFT 0x4
#define GRBM_STATUS__ME0PIPE0_CMDFIFO_AVAIL_MASK 0xf
#define GRBM_STATUS__ME0PIPE0_CMDFIFO_AVAIL__SHIFT 0x0
#define GRBM_STATUS__SRBM_RQ_PENDING_MASK 0x20
#define GRBM_STATUS__SRBM_RQ_PENDING__SHIFT 0x5
#define GRBM_STATUS__ME0PIPE0_CF_RQ_PENDING_MASK 0x80
#define GRBM_STATUS__ME0PIPE0_CF_RQ_PENDING__SHIFT 0x7
#define GRBM_STATUS__ME0PIPE0_PF_RQ_PENDING_MASK 0x100
#define GRBM_STATUS__ME0PIPE0_PF_RQ_PENDING__SHIFT 0x8
#define GRBM_STATUS__GDS_DMA_RQ_PENDING_MASK 0x200
#define GRBM_STATUS__GDS_DMA_RQ_PENDING__SHIFT 0x9
#define GRBM_STATUS__DB_CLEAN_MASK 0x1000
#define GRBM_STATUS__DB_CLEAN__SHIFT 0xc
#define GRBM_STATUS__CB_CLEAN_MASK 0x2000
#define GRBM_STATUS__CB_CLEAN__SHIFT 0xd
#define GRBM_STATUS__TA_BUSY_MASK 0x4000
#define GRBM_STATUS__TA_BUSY__SHIFT 0xe
#define GRBM_STATUS__GDS_BUSY_MASK 0x8000
#define GRBM_STATUS__GDS_BUSY__SHIFT 0xf
#define GRBM_STATUS__WD_BUSY_NO_DMA_MASK 0x10000
#define GRBM_STATUS__WD_BUSY_NO_DMA__SHIFT 0x10
#define GRBM_STATUS__VGT_BUSY_MASK 0x20000
#define GRBM_STATUS__VGT_BUSY__SHIFT 0x11
#define GRBM_STATUS__IA_BUSY_NO_DMA_MASK 0x40000
#define GRBM_STATUS__IA_BUSY_NO_DMA__SHIFT 0x12
#define GRBM_STATUS__IA_BUSY_MASK 0x80000
#define GRBM_STATUS__IA_BUSY__SHIFT 0x13
#define GRBM_STATUS__SX_BUSY_MASK 0x100000
#define GRBM_STATUS__SX_BUSY__SHIFT 0x14
#define GRBM_STATUS__WD_BUSY_MASK 0x200000
#define GRBM_STATUS__WD_BUSY__SHIFT 0x15
#define GRBM_STATUS__SPI_BUSY_MASK 0x400000
#define GRBM_STATUS__SPI_BUSY__SHIFT 0x16
#define GRBM_STATUS__BCI_BUSY_MASK 0x800000
#define GRBM_STATUS__BCI_BUSY__SHIFT 0x17
#define GRBM_STATUS__SC_BUSY_MASK 0x1000000
#define GRBM_STATUS__SC_BUSY__SHIFT 0x18
#define GRBM_STATUS__PA_BUSY_MASK 0x2000000
#define GRBM_STATUS__PA_BUSY__SHIFT 0x19
#define GRBM_STATUS__DB_BUSY_MASK 0x4000000
#define GRBM_STATUS__DB_BUSY__SHIFT 0x1a
#define GRBM_STATUS__CP_COHERENCY_BUSY_MASK 0x10000000
#define GRBM_STATUS__CP_COHERENCY_BUSY__SHIFT 0x1c
#define GRBM_STATUS__CP_BUSY_MASK 0x20000000
#define GRBM_STATUS__CP_BUSY__SHIFT 0x1d
#define GRBM_STATUS__CB_BUSY_MASK 0x40000000
#define GRBM_STATUS__CB_BUSY__SHIFT 0x1e
#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
#define GRBM_STATUS__GUI_ACTIVE__SHIFT 0x1f
#define GRBM_STATUS2__ME0PIPE1_CMDFIFO_AVAIL_MASK 0xf
#define GRBM_STATUS2__ME0PIPE1_CMDFIFO_AVAIL__SHIFT 0x0
#define GRBM_STATUS2__ME0PIPE1_CF_RQ_PENDING_MASK 0x10
#define GRBM_STATUS2__ME0PIPE1_CF_RQ_PENDING__SHIFT 0x4
#define GRBM_STATUS2__ME0PIPE1_PF_RQ_PENDING_MASK 0x20
#define GRBM_STATUS2__ME0PIPE1_PF_RQ_PENDING__SHIFT 0x5
#define GRBM_STATUS2__ME1PIPE0_RQ_PENDING_MASK 0x40
#define GRBM_STATUS2__ME1PIPE0_RQ_PENDING__SHIFT 0x6
#define GRBM_STATUS2__ME1PIPE1_RQ_PENDING_MASK 0x80
#define GRBM_STATUS2__ME1PIPE1_RQ_PENDING__SHIFT 0x7
#define GRBM_STATUS2__ME1PIPE2_RQ_PENDING_MASK 0x100
#define GRBM_STATUS2__ME1PIPE2_RQ_PENDING__SHIFT 0x8
#define GRBM_STATUS2__ME1PIPE3_RQ_PENDING_MASK 0x200
#define GRBM_STATUS2__ME1PIPE3_RQ_PENDING__SHIFT 0x9
#define GRBM_STATUS2__ME2PIPE0_RQ_PENDING_MASK 0x400
#define GRBM_STATUS2__ME2PIPE0_RQ_PENDING__SHIFT 0xa
#define GRBM_STATUS2__ME2PIPE1_RQ_PENDING_MASK 0x800
#define GRBM_STATUS2__ME2PIPE1_RQ_PENDING__SHIFT 0xb
#define GRBM_STATUS2__ME2PIPE2_RQ_PENDING_MASK 0x1000
#define GRBM_STATUS2__ME2PIPE2_RQ_PENDING__SHIFT 0xc
#define GRBM_STATUS2__ME2PIPE3_RQ_PENDING_MASK 0x2000
#define GRBM_STATUS2__ME2PIPE3_RQ_PENDING__SHIFT 0xd
#define GRBM_STATUS2__RLC_RQ_PENDING_MASK 0x4000
#define GRBM_STATUS2__RLC_RQ_PENDING__SHIFT 0xe
#define GRBM_STATUS2__RLC_BUSY_MASK 0x1000000
#define GRBM_STATUS2__RLC_BUSY__SHIFT 0x18
#define GRBM_STATUS2__TC_BUSY_MASK 0x2000000
#define GRBM_STATUS2__TC_BUSY__SHIFT 0x19
#define GRBM_STATUS2__CPF_BUSY_MASK 0x10000000
#define GRBM_STATUS2__CPF_BUSY__SHIFT 0x1c
#define GRBM_STATUS2__CPC_BUSY_MASK 0x20000000
#define GRBM_STATUS2__CPC_BUSY__SHIFT 0x1d
#define GRBM_STATUS2__CPG_BUSY_MASK 0x40000000
#define GRBM_STATUS2__CPG_BUSY__SHIFT 0x1e
#define GRBM_STATUS_SE0__DB_CLEAN_MASK 0x2
#define GRBM_STATUS_SE0__DB_CLEAN__SHIFT 0x1
#define GRBM_STATUS_SE0__CB_CLEAN_MASK 0x4
#define GRBM_STATUS_SE0__CB_CLEAN__SHIFT 0x2
#define GRBM_STATUS_SE0__BCI_BUSY_MASK 0x400000
#define GRBM_STATUS_SE0__BCI_BUSY__SHIFT 0x16
#define GRBM_STATUS_SE0__VGT_BUSY_MASK 0x800000
#define GRBM_STATUS_SE0__VGT_BUSY__SHIFT 0x17
#define GRBM_STATUS_SE0__PA_BUSY_MASK 0x1000000
#define GRBM_STATUS_SE0__PA_BUSY__SHIFT 0x18
#define GRBM_STATUS_SE0__TA_BUSY_MASK 0x2000000
#define GRBM_STATUS_SE0__TA_BUSY__SHIFT 0x19
#define GRBM_STATUS_SE0__SX_BUSY_MASK 0x4000000
#define GRBM_STATUS_SE0__SX_BUSY__SHIFT 0x1a
#define GRBM_STATUS_SE0__SPI_BUSY_MASK 0x8000000
#define GRBM_STATUS_SE0__SPI_BUSY__SHIFT 0x1b
#define GRBM_STATUS_SE0__SC_BUSY_MASK 0x20000000
#define GRBM_STATUS_SE0__SC_BUSY__SHIFT 0x1d
#define GRBM_STATUS_SE0__DB_BUSY_MASK 0x40000000
#define GRBM_STATUS_SE0__DB_BUSY__SHIFT 0x1e
#define GRBM_STATUS_SE0__CB_BUSY_MASK 0x80000000
#define GRBM_STATUS_SE0__CB_BUSY__SHIFT 0x1f
#define GRBM_STATUS_SE1__DB_CLEAN_MASK 0x2
#define GRBM_STATUS_SE1__DB_CLEAN__SHIFT 0x1
#define GRBM_STATUS_SE1__CB_CLEAN_MASK 0x4
#define GRBM_STATUS_SE1__CB_CLEAN__SHIFT 0x2
#define GRBM_STATUS_SE1__BCI_BUSY_MASK 0x400000
#define GRBM_STATUS_SE1__BCI_BUSY__SHIFT 0x16
#define GRBM_STATUS_SE1__VGT_BUSY_MASK 0x800000
#define GRBM_STATUS_SE1__VGT_BUSY__SHIFT 0x17
#define GRBM_STATUS_SE1__PA_BUSY_MASK 0x1000000
#define GRBM_STATUS_SE1__PA_BUSY__SHIFT 0x18
#define GRBM_STATUS_SE1__TA_BUSY_MASK 0x2000000
#define GRBM_STATUS_SE1__TA_BUSY__SHIFT 0x19
#define GRBM_STATUS_SE1__SX_BUSY_MASK 0x4000000
#define GRBM_STATUS_SE1__SX_BUSY__SHIFT 0x1a
#define GRBM_STATUS_SE1__SPI_BUSY_MASK 0x8000000
#define GRBM_STATUS_SE1__SPI_BUSY__SHIFT 0x1b
#define GRBM_STATUS_SE1__SC_BUSY_MASK 0x20000000
#define GRBM_STATUS_SE1__SC_BUSY__SHIFT 0x1d
#define GRBM_STATUS_SE1__DB_BUSY_MASK 0x40000000
#define GRBM_STATUS_SE1__DB_BUSY__SHIFT 0x1e
#define GRBM_STATUS_SE1__CB_BUSY_MASK 0x80000000
#define GRBM_STATUS_SE1__CB_BUSY__SHIFT 0x1f
#define GRBM_STATUS_SE2__DB_CLEAN_MASK 0x2
#define GRBM_STATUS_SE2__DB_CLEAN__SHIFT 0x1
#define GRBM_STATUS_SE2__CB_CLEAN_MASK 0x4
#define GRBM_STATUS_SE2__CB_CLEAN__SHIFT 0x2
#define GRBM_STATUS_SE2__BCI_BUSY_MASK 0x400000
#define GRBM_STATUS_SE2__BCI_BUSY__SHIFT 0x16
#define GRBM_STATUS_SE2__VGT_BUSY_MASK 0x800000
#define GRBM_STATUS_SE2__VGT_BUSY__SHIFT 0x17
#define GRBM_STATUS_SE2__PA_BUSY_MASK 0x1000000
#define GRBM_STATUS_SE2__PA_BUSY__SHIFT 0x18
#define GRBM_STATUS_SE2__TA_BUSY_MASK 0x2000000
#define GRBM_STATUS_SE2__TA_BUSY__SHIFT 0x19
#define GRBM_STATUS_SE2__SX_BUSY_MASK 0x4000000
#define GRBM_STATUS_SE2__SX_BUSY__SHIFT 0x1a
#define GRBM_STATUS_SE2__SPI_BUSY_MASK 0x8000000
#define GRBM_STATUS_SE2__SPI_BUSY__SHIFT 0x1b
#define GRBM_STATUS_SE2__SC_BUSY_MASK 0x20000000
#define GRBM_STATUS_SE2__SC_BUSY__SHIFT 0x1d
#define GRBM_STATUS_SE2__DB_BUSY_MASK 0x40000000
#define GRBM_STATUS_SE2__DB_BUSY__SHIFT 0x1e
#define GRBM_STATUS_SE2__CB_BUSY_MASK 0x80000000
#define GRBM_STATUS_SE2__CB_BUSY__SHIFT 0x1f
#define GRBM_STATUS_SE3__DB_CLEAN_MASK 0x2
#define GRBM_STATUS_SE3__DB_CLEAN__SHIFT 0x1
#define GRBM_STATUS_SE3__CB_CLEAN_MASK 0x4
#define GRBM_STATUS_SE3__CB_CLEAN__SHIFT 0x2
#define GRBM_STATUS_SE3__BCI_BUSY_MASK 0x400000
#define GRBM_STATUS_SE3__BCI_BUSY__SHIFT 0x16
#define GRBM_STATUS_SE3__VGT_BUSY_MASK 0x800000
#define GRBM_STATUS_SE3__VGT_BUSY__SHIFT 0x17
#define GRBM_STATUS_SE3__PA_BUSY_MASK 0x1000000
#define GRBM_STATUS_SE3__PA_BUSY__SHIFT 0x18
#define GRBM_STATUS_SE3__TA_BUSY_MASK 0x2000000
#define GRBM_STATUS_SE3__TA_BUSY__SHIFT 0x19
#define GRBM_STATUS_SE3__SX_BUSY_MASK 0x4000000
#define GRBM_STATUS_SE3__SX_BUSY__SHIFT 0x1a
#define GRBM_STATUS_SE3__SPI_BUSY_MASK 0x8000000
#define GRBM_STATUS_SE3__SPI_BUSY__SHIFT 0x1b
#define GRBM_STATUS_SE3__SC_BUSY_MASK 0x20000000
#define GRBM_STATUS_SE3__SC_BUSY__SHIFT 0x1d
#define GRBM_STATUS_SE3__DB_BUSY_MASK 0x40000000
#define GRBM_STATUS_SE3__DB_BUSY__SHIFT 0x1e
#define GRBM_STATUS_SE3__CB_BUSY_MASK 0x80000000
#define GRBM_STATUS_SE3__CB_BUSY__SHIFT 0x1f
#define GRBM_SOFT_RESET__SOFT_RESET_CP_MASK 0x1
#define GRBM_SOFT_RESET__SOFT_RESET_CP__SHIFT 0x0
#define GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK 0x4
#define GRBM_SOFT_RESET__SOFT_RESET_RLC__SHIFT 0x2
#define GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK 0x10000
#define GRBM_SOFT_RESET__SOFT_RESET_GFX__SHIFT 0x10
#define GRBM_SOFT_RESET__SOFT_RESET_CPF_MASK 0x20000
#define GRBM_SOFT_RESET__SOFT_RESET_CPF__SHIFT 0x11
#define GRBM_SOFT_RESET__SOFT_RESET_CPC_MASK 0x40000
#define GRBM_SOFT_RESET__SOFT_RESET_CPC__SHIFT 0x12
#define GRBM_SOFT_RESET__SOFT_RESET_CPG_MASK 0x80000
#define GRBM_SOFT_RESET__SOFT_RESET_CPG__SHIFT 0x13
#define GRBM_DEBUG_CNTL__GRBM_DEBUG_INDEX_MASK 0x3f
#define GRBM_DEBUG_CNTL__GRBM_DEBUG_INDEX__SHIFT 0x0
#define GRBM_DEBUG_DATA__DATA_MASK 0xffffffff
#define GRBM_DEBUG_DATA__DATA__SHIFT 0x0
#define GRBM_GFX_INDEX__INSTANCE_INDEX_MASK 0xff
#define GRBM_GFX_INDEX__INSTANCE_INDEX__SHIFT 0x0
#define GRBM_GFX_INDEX__SH_INDEX_MASK 0xff00
#define GRBM_GFX_INDEX__SH_INDEX__SHIFT 0x8
#define GRBM_GFX_INDEX__SE_INDEX_MASK 0xff0000
#define GRBM_GFX_INDEX__SE_INDEX__SHIFT 0x10
#define GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK 0x20000000
#define GRBM_GFX_INDEX__SH_BROADCAST_WRITES__SHIFT 0x1d
#define GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK 0x40000000
#define GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT 0x1e
#define GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK 0x80000000
#define GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT 0x1f
#define GRBM_GFX_CLKEN_CNTL__PREFIX_DELAY_CNT_MASK 0xf
#define GRBM_GFX_CLKEN_CNTL__PREFIX_DELAY_CNT__SHIFT 0x0
#define GRBM_GFX_CLKEN_CNTL__POST_DELAY_CNT_MASK 0x1f00
#define GRBM_GFX_CLKEN_CNTL__POST_DELAY_CNT__SHIFT 0x8
#define GRBM_WAIT_IDLE_CLOCKS__WAIT_IDLE_CLOCKS_MASK 0xff
#define GRBM_WAIT_IDLE_CLOCKS__WAIT_IDLE_CLOCKS__SHIFT 0x0
#define GRBM_DEBUG__IGNORE_RDY_MASK 0x2
#define GRBM_DEBUG__IGNORE_RDY__SHIFT 0x1
#define GRBM_DEBUG__IGNORE_FAO_MASK 0x20
#define GRBM_DEBUG__IGNORE_FAO__SHIFT 0x5
#define GRBM_DEBUG__DISABLE_READ_TIMEOUT_MASK 0x40
#define GRBM_DEBUG__DISABLE_READ_TIMEOUT__SHIFT 0x6
#define GRBM_DEBUG__SNAPSHOT_FREE_CNTRS_MASK 0x80
#define GRBM_DEBUG__SNAPSHOT_FREE_CNTRS__SHIFT 0x7
#define GRBM_DEBUG__HYSTERESIS_GUI_ACTIVE_MASK 0xf00
#define GRBM_DEBUG__HYSTERESIS_GUI_ACTIVE__SHIFT 0x8
#define GRBM_DEBUG__GFX_CLOCK_DOMAIN_OVERRIDE_MASK 0x1000
#define GRBM_DEBUG__GFX_CLOCK_DOMAIN_OVERRIDE__SHIFT 0xc
#define GRBM_DEBUG_SNAPSHOT__CPF_RDY_MASK 0x1
#define GRBM_DEBUG_SNAPSHOT__CPF_RDY__SHIFT 0x0
#define GRBM_DEBUG_SNAPSHOT__CPG_RDY_MASK 0x2
#define GRBM_DEBUG_SNAPSHOT__CPG_RDY__SHIFT 0x1
#define GRBM_DEBUG_SNAPSHOT__SRBM_RDY_MASK 0x4
#define GRBM_DEBUG_SNAPSHOT__SRBM_RDY__SHIFT 0x2
#define GRBM_DEBUG_SNAPSHOT__WD_ME0PIPE0_RDY_MASK 0x8
#define GRBM_DEBUG_SNAPSHOT__WD_ME0PIPE0_RDY__SHIFT 0x3
#define GRBM_DEBUG_SNAPSHOT__WD_ME0PIPE1_RDY_MASK 0x10
#define GRBM_DEBUG_SNAPSHOT__WD_ME0PIPE1_RDY__SHIFT 0x4
#define GRBM_DEBUG_SNAPSHOT__GDS_RDY_MASK 0x20
#define GRBM_DEBUG_SNAPSHOT__GDS_RDY__SHIFT 0x5
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE0_RDY0_MASK 0x40
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE0_RDY0__SHIFT 0x6
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE1_RDY0_MASK 0x80
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE1_RDY0__SHIFT 0x7
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE0_RDY0_MASK 0x100
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE0_RDY0__SHIFT 0x8
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE1_RDY0_MASK 0x200
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE1_RDY0__SHIFT 0x9
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE0_RDY0_MASK 0x400
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE0_RDY0__SHIFT 0xa
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE1_RDY0_MASK 0x800
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE1_RDY0__SHIFT 0xb
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE0_RDY0_MASK 0x1000
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE0_RDY0__SHIFT 0xc
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE1_RDY0_MASK 0x2000
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE1_RDY0__SHIFT 0xd
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE0_RDY1_MASK 0x4000
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE0_RDY1__SHIFT 0xe
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE1_RDY1_MASK 0x8000
#define GRBM_DEBUG_SNAPSHOT__SE0SPI_ME0PIPE1_RDY1__SHIFT 0xf
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE0_RDY1_MASK 0x10000
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE0_RDY1__SHIFT 0x10
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE1_RDY1_MASK 0x20000
#define GRBM_DEBUG_SNAPSHOT__SE1SPI_ME0PIPE1_RDY1__SHIFT 0x11
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE0_RDY1_MASK 0x40000
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE0_RDY1__SHIFT 0x12
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE1_RDY1_MASK 0x80000
#define GRBM_DEBUG_SNAPSHOT__SE2SPI_ME0PIPE1_RDY1__SHIFT 0x13
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE0_RDY1_MASK 0x100000
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE0_RDY1__SHIFT 0x14
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE1_RDY1_MASK 0x200000
#define GRBM_DEBUG_SNAPSHOT__SE3SPI_ME0PIPE1_RDY1__SHIFT 0x15
#define GRBM_READ_ERROR__READ_ADDRESS_MASK 0x3fffc
#define GRBM_READ_ERROR__READ_ADDRESS__SHIFT 0x2
#define GRBM_READ_ERROR__READ_PIPEID_MASK 0x300000
#define GRBM_READ_ERROR__READ_PIPEID__SHIFT 0x14
#define GRBM_READ_ERROR__READ_MEID_MASK 0xc00000
#define GRBM_READ_ERROR__READ_MEID__SHIFT 0x16
#define GRBM_READ_ERROR__READ_ERROR_MASK 0x80000000
#define GRBM_READ_ERROR__READ_ERROR__SHIFT 0x1f
#define GRBM_READ_ERROR2__READ_REQUESTER_SRBM_MASK 0x20000
#define GRBM_READ_ERROR2__READ_REQUESTER_SRBM__SHIFT 0x11
#define GRBM_READ_ERROR2__READ_REQUESTER_RLC_MASK 0x40000
#define GRBM_READ_ERROR2__READ_REQUESTER_RLC__SHIFT 0x12
#define GRBM_READ_ERROR2__READ_REQUESTER_GDS_DMA_MASK 0x80000
#define GRBM_READ_ERROR2__READ_REQUESTER_GDS_DMA__SHIFT 0x13
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_CF_MASK 0x100000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_CF__SHIFT 0x14
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_PF_MASK 0x200000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE0_PF__SHIFT 0x15
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE1_CF_MASK 0x400000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE1_CF__SHIFT 0x16
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE1_PF_MASK 0x800000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME0PIPE1_PF__SHIFT 0x17
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE0_MASK 0x1000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE0__SHIFT 0x18
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE1_MASK 0x2000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE1__SHIFT 0x19
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE2_MASK 0x4000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE2__SHIFT 0x1a
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE3_MASK 0x8000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME1PIPE3__SHIFT 0x1b
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE0_MASK 0x10000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE0__SHIFT 0x1c
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE1_MASK 0x20000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE1__SHIFT 0x1d
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE2_MASK 0x40000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE2__SHIFT 0x1e
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE3_MASK 0x80000000
#define GRBM_READ_ERROR2__READ_REQUESTER_ME2PIPE3__SHIFT 0x1f
#define GRBM_INT_CNTL__RDERR_INT_ENABLE_MASK 0x1
#define GRBM_INT_CNTL__RDERR_INT_ENABLE__SHIFT 0x0
#define GRBM_INT_CNTL__GUI_IDLE_INT_ENABLE_MASK 0x80000
#define GRBM_INT_CNTL__GUI_IDLE_INT_ENABLE__SHIFT 0x13
#define GRBM_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3f
#define GRBM_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define GRBM_PERFCOUNTER0_SELECT__DB_CLEAN_USER_DEFINED_MASK_MASK 0x400
#define GRBM_PERFCOUNTER0_SELECT__DB_CLEAN_USER_DEFINED_MASK__SHIFT 0xa
#define GRBM_PERFCOUNTER0_SELECT__CB_CLEAN_USER_DEFINED_MASK_MASK 0x800
#define GRBM_PERFCOUNTER0_SELECT__CB_CLEAN_USER_DEFINED_MASK__SHIFT 0xb
#define GRBM_PERFCOUNTER0_SELECT__VGT_BUSY_USER_DEFINED_MASK_MASK 0x1000
#define GRBM_PERFCOUNTER0_SELECT__VGT_BUSY_USER_DEFINED_MASK__SHIFT 0xc
#define GRBM_PERFCOUNTER0_SELECT__TA_BUSY_USER_DEFINED_MASK_MASK 0x2000
#define GRBM_PERFCOUNTER0_SELECT__TA_BUSY_USER_DEFINED_MASK__SHIFT 0xd
#define GRBM_PERFCOUNTER0_SELECT__SX_BUSY_USER_DEFINED_MASK_MASK 0x4000
#define GRBM_PERFCOUNTER0_SELECT__SX_BUSY_USER_DEFINED_MASK__SHIFT 0xe
#define GRBM_PERFCOUNTER0_SELECT__SPI_BUSY_USER_DEFINED_MASK_MASK 0x10000
#define GRBM_PERFCOUNTER0_SELECT__SPI_BUSY_USER_DEFINED_MASK__SHIFT 0x10
#define GRBM_PERFCOUNTER0_SELECT__SC_BUSY_USER_DEFINED_MASK_MASK 0x20000
#define GRBM_PERFCOUNTER0_SELECT__SC_BUSY_USER_DEFINED_MASK__SHIFT 0x11
#define GRBM_PERFCOUNTER0_SELECT__PA_BUSY_USER_DEFINED_MASK_MASK 0x40000
#define GRBM_PERFCOUNTER0_SELECT__PA_BUSY_USER_DEFINED_MASK__SHIFT 0x12
#define GRBM_PERFCOUNTER0_SELECT__GRBM_BUSY_USER_DEFINED_MASK_MASK 0x80000
#define GRBM_PERFCOUNTER0_SELECT__GRBM_BUSY_USER_DEFINED_MASK__SHIFT 0x13
#define GRBM_PERFCOUNTER0_SELECT__DB_BUSY_USER_DEFINED_MASK_MASK 0x100000
#define GRBM_PERFCOUNTER0_SELECT__DB_BUSY_USER_DEFINED_MASK__SHIFT 0x14
#define GRBM_PERFCOUNTER0_SELECT__CB_BUSY_USER_DEFINED_MASK_MASK 0x200000
#define GRBM_PERFCOUNTER0_SELECT__CB_BUSY_USER_DEFINED_MASK__SHIFT 0x15
#define GRBM_PERFCOUNTER0_SELECT__CP_BUSY_USER_DEFINED_MASK_MASK 0x400000
#define GRBM_PERFCOUNTER0_SELECT__CP_BUSY_USER_DEFINED_MASK__SHIFT 0x16
#define GRBM_PERFCOUNTER0_SELECT__IA_BUSY_USER_DEFINED_MASK_MASK 0x800000
#define GRBM_PERFCOUNTER0_SELECT__IA_BUSY_USER_DEFINED_MASK__SHIFT 0x17
#define GRBM_PERFCOUNTER0_SELECT__GDS_BUSY_USER_DEFINED_MASK_MASK 0x1000000
#define GRBM_PERFCOUNTER0_SELECT__GDS_BUSY_USER_DEFINED_MASK__SHIFT 0x18
#define GRBM_PERFCOUNTER0_SELECT__BCI_BUSY_USER_DEFINED_MASK_MASK 0x2000000
#define GRBM_PERFCOUNTER0_SELECT__BCI_BUSY_USER_DEFINED_MASK__SHIFT 0x19
#define GRBM_PERFCOUNTER0_SELECT__RLC_BUSY_USER_DEFINED_MASK_MASK 0x4000000
#define GRBM_PERFCOUNTER0_SELECT__RLC_BUSY_USER_DEFINED_MASK__SHIFT 0x1a
#define GRBM_PERFCOUNTER0_SELECT__TC_BUSY_USER_DEFINED_MASK_MASK 0x8000000
#define GRBM_PERFCOUNTER0_SELECT__TC_BUSY_USER_DEFINED_MASK__SHIFT 0x1b
#define GRBM_PERFCOUNTER0_SELECT__WD_BUSY_USER_DEFINED_MASK_MASK 0x10000000
#define GRBM_PERFCOUNTER0_SELECT__WD_BUSY_USER_DEFINED_MASK__SHIFT 0x1c
#define GRBM_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3f
#define GRBM_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define GRBM_PERFCOUNTER1_SELECT__DB_CLEAN_USER_DEFINED_MASK_MASK 0x400
#define GRBM_PERFCOUNTER1_SELECT__DB_CLEAN_USER_DEFINED_MASK__SHIFT 0xa
#define GRBM_PERFCOUNTER1_SELECT__CB_CLEAN_USER_DEFINED_MASK_MASK 0x800
#define GRBM_PERFCOUNTER1_SELECT__CB_CLEAN_USER_DEFINED_MASK__SHIFT 0xb
#define GRBM_PERFCOUNTER1_SELECT__VGT_BUSY_USER_DEFINED_MASK_MASK 0x1000
#define GRBM_PERFCOUNTER1_SELECT__VGT_BUSY_USER_DEFINED_MASK__SHIFT 0xc
#define GRBM_PERFCOUNTER1_SELECT__TA_BUSY_USER_DEFINED_MASK_MASK 0x2000
#define GRBM_PERFCOUNTER1_SELECT__TA_BUSY_USER_DEFINED_MASK__SHIFT 0xd
#define GRBM_PERFCOUNTER1_SELECT__SX_BUSY_USER_DEFINED_MASK_MASK 0x4000
#define GRBM_PERFCOUNTER1_SELECT__SX_BUSY_USER_DEFINED_MASK__SHIFT 0xe
#define GRBM_PERFCOUNTER1_SELECT__SPI_BUSY_USER_DEFINED_MASK_MASK 0x10000
#define GRBM_PERFCOUNTER1_SELECT__SPI_BUSY_USER_DEFINED_MASK__SHIFT 0x10
#define GRBM_PERFCOUNTER1_SELECT__SC_BUSY_USER_DEFINED_MASK_MASK 0x20000
#define GRBM_PERFCOUNTER1_SELECT__SC_BUSY_USER_DEFINED_MASK__SHIFT 0x11
#define GRBM_PERFCOUNTER1_SELECT__PA_BUSY_USER_DEFINED_MASK_MASK 0x40000
#define GRBM_PERFCOUNTER1_SELECT__PA_BUSY_USER_DEFINED_MASK__SHIFT 0x12
#define GRBM_PERFCOUNTER1_SELECT__GRBM_BUSY_USER_DEFINED_MASK_MASK 0x80000
#define GRBM_PERFCOUNTER1_SELECT__GRBM_BUSY_USER_DEFINED_MASK__SHIFT 0x13
#define GRBM_PERFCOUNTER1_SELECT__DB_BUSY_USER_DEFINED_MASK_MASK 0x100000
#define GRBM_PERFCOUNTER1_SELECT__DB_BUSY_USER_DEFINED_MASK__SHIFT 0x14
#define GRBM_PERFCOUNTER1_SELECT__CB_BUSY_USER_DEFINED_MASK_MASK 0x200000
#define GRBM_PERFCOUNTER1_SELECT__CB_BUSY_USER_DEFINED_MASK__SHIFT 0x15
#define GRBM_PERFCOUNTER1_SELECT__CP_BUSY_USER_DEFINED_MASK_MASK 0x400000
#define GRBM_PERFCOUNTER1_SELECT__CP_BUSY_USER_DEFINED_MASK__SHIFT 0x16
#define GRBM_PERFCOUNTER1_SELECT__IA_BUSY_USER_DEFINED_MASK_MASK 0x800000
#define GRBM_PERFCOUNTER1_SELECT__IA_BUSY_USER_DEFINED_MASK__SHIFT 0x17
#define GRBM_PERFCOUNTER1_SELECT__GDS_BUSY_USER_DEFINED_MASK_MASK 0x1000000
#define GRBM_PERFCOUNTER1_SELECT__GDS_BUSY_USER_DEFINED_MASK__SHIFT 0x18
#define GRBM_PERFCOUNTER1_SELECT__BCI_BUSY_USER_DEFINED_MASK_MASK 0x2000000
#define GRBM_PERFCOUNTER1_SELECT__BCI_BUSY_USER_DEFINED_MASK__SHIFT 0x19
#define GRBM_PERFCOUNTER1_SELECT__RLC_BUSY_USER_DEFINED_MASK_MASK 0x4000000
#define GRBM_PERFCOUNTER1_SELECT__RLC_BUSY_USER_DEFINED_MASK__SHIFT 0x1a
#define GRBM_PERFCOUNTER1_SELECT__TC_BUSY_USER_DEFINED_MASK_MASK 0x8000000
#define GRBM_PERFCOUNTER1_SELECT__TC_BUSY_USER_DEFINED_MASK__SHIFT 0x1b
#define GRBM_PERFCOUNTER1_SELECT__WD_BUSY_USER_DEFINED_MASK_MASK 0x10000000
#define GRBM_PERFCOUNTER1_SELECT__WD_BUSY_USER_DEFINED_MASK__SHIFT 0x1c
#define GRBM_SE0_PERFCOUNTER_SELECT__PERF_SEL_MASK 0x3f
#define GRBM_SE0_PERFCOUNTER_SELECT__PERF_SEL__SHIFT 0x0
#define GRBM_SE0_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK_MASK 0x400
#define GRBM_SE0_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK__SHIFT 0xa
#define GRBM_SE0_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK_MASK 0x800
#define GRBM_SE0_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK__SHIFT 0xb
#define GRBM_SE0_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK_MASK 0x1000
#define GRBM_SE0_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK__SHIFT 0xc
#define GRBM_SE0_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK_MASK 0x2000
#define GRBM_SE0_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK__SHIFT 0xd
#define GRBM_SE0_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK_MASK 0x8000
#define GRBM_SE0_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK__SHIFT 0xf
#define GRBM_SE0_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK_MASK 0x10000
#define GRBM_SE0_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK__SHIFT 0x10
#define GRBM_SE0_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK_MASK 0x20000
#define GRBM_SE0_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK__SHIFT 0x11
#define GRBM_SE0_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK_MASK 0x40000
#define GRBM_SE0_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK__SHIFT 0x12
#define GRBM_SE0_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK_MASK 0x80000
#define GRBM_SE0_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK__SHIFT 0x13
#define GRBM_SE0_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK_MASK 0x100000
#define GRBM_SE0_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK__SHIFT 0x14
#define GRBM_SE0_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK_MASK 0x200000
#define GRBM_SE0_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK__SHIFT 0x15
#define GRBM_SE1_PERFCOUNTER_SELECT__PERF_SEL_MASK 0x3f
#define GRBM_SE1_PERFCOUNTER_SELECT__PERF_SEL__SHIFT 0x0
#define GRBM_SE1_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK_MASK 0x400
#define GRBM_SE1_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK__SHIFT 0xa
#define GRBM_SE1_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK_MASK 0x800
#define GRBM_SE1_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK__SHIFT 0xb
#define GRBM_SE1_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK_MASK 0x1000
#define GRBM_SE1_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK__SHIFT 0xc
#define GRBM_SE1_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK_MASK 0x2000
#define GRBM_SE1_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK__SHIFT 0xd
#define GRBM_SE1_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK_MASK 0x8000
#define GRBM_SE1_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK__SHIFT 0xf
#define GRBM_SE1_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK_MASK 0x10000
#define GRBM_SE1_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK__SHIFT 0x10
#define GRBM_SE1_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK_MASK 0x20000
#define GRBM_SE1_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK__SHIFT 0x11
#define GRBM_SE1_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK_MASK 0x40000
#define GRBM_SE1_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK__SHIFT 0x12
#define GRBM_SE1_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK_MASK 0x80000
#define GRBM_SE1_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK__SHIFT 0x13
#define GRBM_SE1_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK_MASK 0x100000
#define GRBM_SE1_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK__SHIFT 0x14
#define GRBM_SE1_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK_MASK 0x200000
#define GRBM_SE1_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK__SHIFT 0x15
#define GRBM_SE2_PERFCOUNTER_SELECT__PERF_SEL_MASK 0x3f
#define GRBM_SE2_PERFCOUNTER_SELECT__PERF_SEL__SHIFT 0x0
#define GRBM_SE2_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK_MASK 0x400
#define GRBM_SE2_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK__SHIFT 0xa
#define GRBM_SE2_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK_MASK 0x800
#define GRBM_SE2_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK__SHIFT 0xb
#define GRBM_SE2_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK_MASK 0x1000
#define GRBM_SE2_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK__SHIFT 0xc
#define GRBM_SE2_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK_MASK 0x2000
#define GRBM_SE2_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK__SHIFT 0xd
#define GRBM_SE2_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK_MASK 0x8000
#define GRBM_SE2_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK__SHIFT 0xf
#define GRBM_SE2_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK_MASK 0x10000
#define GRBM_SE2_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK__SHIFT 0x10
#define GRBM_SE2_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK_MASK 0x20000
#define GRBM_SE2_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK__SHIFT 0x11
#define GRBM_SE2_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK_MASK 0x40000
#define GRBM_SE2_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK__SHIFT 0x12
#define GRBM_SE2_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK_MASK 0x80000
#define GRBM_SE2_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK__SHIFT 0x13
#define GRBM_SE2_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK_MASK 0x100000
#define GRBM_SE2_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK__SHIFT 0x14
#define GRBM_SE2_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK_MASK 0x200000
#define GRBM_SE2_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK__SHIFT 0x15
#define GRBM_SE3_PERFCOUNTER_SELECT__PERF_SEL_MASK 0x3f
#define GRBM_SE3_PERFCOUNTER_SELECT__PERF_SEL__SHIFT 0x0
#define GRBM_SE3_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK_MASK 0x400
#define GRBM_SE3_PERFCOUNTER_SELECT__DB_CLEAN_USER_DEFINED_MASK__SHIFT 0xa
#define GRBM_SE3_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK_MASK 0x800
#define GRBM_SE3_PERFCOUNTER_SELECT__CB_CLEAN_USER_DEFINED_MASK__SHIFT 0xb
#define GRBM_SE3_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK_MASK 0x1000
#define GRBM_SE3_PERFCOUNTER_SELECT__TA_BUSY_USER_DEFINED_MASK__SHIFT 0xc
#define GRBM_SE3_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK_MASK 0x2000
#define GRBM_SE3_PERFCOUNTER_SELECT__SX_BUSY_USER_DEFINED_MASK__SHIFT 0xd
#define GRBM_SE3_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK_MASK 0x8000
#define GRBM_SE3_PERFCOUNTER_SELECT__SPI_BUSY_USER_DEFINED_MASK__SHIFT 0xf
#define GRBM_SE3_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK_MASK 0x10000
#define GRBM_SE3_PERFCOUNTER_SELECT__SC_BUSY_USER_DEFINED_MASK__SHIFT 0x10
#define GRBM_SE3_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK_MASK 0x20000
#define GRBM_SE3_PERFCOUNTER_SELECT__DB_BUSY_USER_DEFINED_MASK__SHIFT 0x11
#define GRBM_SE3_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK_MASK 0x40000
#define GRBM_SE3_PERFCOUNTER_SELECT__CB_BUSY_USER_DEFINED_MASK__SHIFT 0x12
#define GRBM_SE3_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK_MASK 0x80000
#define GRBM_SE3_PERFCOUNTER_SELECT__VGT_BUSY_USER_DEFINED_MASK__SHIFT 0x13
#define GRBM_SE3_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK_MASK 0x100000
#define GRBM_SE3_PERFCOUNTER_SELECT__PA_BUSY_USER_DEFINED_MASK__SHIFT 0x14
#define GRBM_SE3_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK_MASK 0x200000
#define GRBM_SE3_PERFCOUNTER_SELECT__BCI_BUSY_USER_DEFINED_MASK__SHIFT 0x15
#define GRBM_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GRBM_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GRBM_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GRBM_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GRBM_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GRBM_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GRBM_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GRBM_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GRBM_SE0_PERFCOUNTER_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GRBM_SE0_PERFCOUNTER_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GRBM_SE0_PERFCOUNTER_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GRBM_SE0_PERFCOUNTER_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GRBM_SE1_PERFCOUNTER_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GRBM_SE1_PERFCOUNTER_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GRBM_SE1_PERFCOUNTER_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GRBM_SE1_PERFCOUNTER_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GRBM_SE2_PERFCOUNTER_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GRBM_SE2_PERFCOUNTER_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GRBM_SE2_PERFCOUNTER_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GRBM_SE2_PERFCOUNTER_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GRBM_SE3_PERFCOUNTER_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GRBM_SE3_PERFCOUNTER_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GRBM_SE3_PERFCOUNTER_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GRBM_SE3_PERFCOUNTER_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GRBM_SCRATCH_REG0__SCRATCH_REG0_MASK 0xffffffff
#define GRBM_SCRATCH_REG0__SCRATCH_REG0__SHIFT 0x0
#define GRBM_SCRATCH_REG1__SCRATCH_REG1_MASK 0xffffffff
#define GRBM_SCRATCH_REG1__SCRATCH_REG1__SHIFT 0x0
#define GRBM_SCRATCH_REG2__SCRATCH_REG2_MASK 0xffffffff
#define GRBM_SCRATCH_REG2__SCRATCH_REG2__SHIFT 0x0
#define GRBM_SCRATCH_REG3__SCRATCH_REG3_MASK 0xffffffff
#define GRBM_SCRATCH_REG3__SCRATCH_REG3__SHIFT 0x0
#define GRBM_SCRATCH_REG4__SCRATCH_REG4_MASK 0xffffffff
#define GRBM_SCRATCH_REG4__SCRATCH_REG4__SHIFT 0x0
#define GRBM_SCRATCH_REG5__SCRATCH_REG5_MASK 0xffffffff
#define GRBM_SCRATCH_REG5__SCRATCH_REG5__SHIFT 0x0
#define GRBM_SCRATCH_REG6__SCRATCH_REG6_MASK 0xffffffff
#define GRBM_SCRATCH_REG6__SCRATCH_REG6__SHIFT 0x0
#define GRBM_SCRATCH_REG7__SCRATCH_REG7_MASK 0xffffffff
#define GRBM_SCRATCH_REG7__SCRATCH_REG7__SHIFT 0x0
#define DEBUG_INDEX__DEBUG_INDEX_MASK 0x3ffff
#define DEBUG_INDEX__DEBUG_INDEX__SHIFT 0x0
#define DEBUG_DATA__DEBUG_DATA_MASK 0xffffffff
#define DEBUG_DATA__DEBUG_DATA__SHIFT 0x0
#define GRBM_NOWHERE__DATA_MASK 0xffffffff
#define GRBM_NOWHERE__DATA__SHIFT 0x0
#define PA_CL_VPORT_XSCALE__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YSCALE__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_1__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_1__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_2__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_2__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_3__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_3__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_4__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_4__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_5__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_5__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_6__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_6__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_7__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_7__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_8__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_8__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_9__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_9__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_10__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_10__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_11__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_11__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_12__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_12__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_13__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_13__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_14__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_14__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XSCALE_15__VPORT_XSCALE_MASK 0xffffffff
#define PA_CL_VPORT_XSCALE_15__VPORT_XSCALE__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_1__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_1__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_2__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_2__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_3__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_3__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_4__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_4__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_5__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_5__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_6__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_6__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_7__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_7__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_8__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_8__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_9__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_9__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_10__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_10__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_11__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_11__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_12__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_12__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_13__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_13__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_14__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_14__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_XOFFSET_15__VPORT_XOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_XOFFSET_15__VPORT_XOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_1__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_1__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_2__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_2__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_3__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_3__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_4__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_4__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_5__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_5__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_6__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_6__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_7__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_7__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_8__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_8__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_9__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_9__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_10__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_10__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_11__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_11__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_12__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_12__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_13__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_13__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_14__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_14__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YSCALE_15__VPORT_YSCALE_MASK 0xffffffff
#define PA_CL_VPORT_YSCALE_15__VPORT_YSCALE__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_1__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_1__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_2__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_2__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_3__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_3__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_4__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_4__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_5__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_5__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_6__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_6__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_7__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_7__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_8__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_8__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_9__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_9__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_10__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_10__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_11__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_11__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_12__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_12__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_13__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_13__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_14__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_14__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_YOFFSET_15__VPORT_YOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_YOFFSET_15__VPORT_YOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_1__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_1__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_2__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_2__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_3__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_3__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_4__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_4__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_5__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_5__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_6__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_6__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_7__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_7__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_8__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_8__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_9__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_9__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_10__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_10__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_11__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_11__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_12__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_12__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_13__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_13__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_14__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_14__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZSCALE_15__VPORT_ZSCALE_MASK 0xffffffff
#define PA_CL_VPORT_ZSCALE_15__VPORT_ZSCALE__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_1__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_1__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_2__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_2__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_3__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_3__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_4__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_4__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_5__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_5__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_6__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_6__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_7__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_7__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_8__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_8__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_9__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_9__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_10__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_10__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_11__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_11__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_12__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_12__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_13__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_13__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_14__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_14__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VPORT_ZOFFSET_15__VPORT_ZOFFSET_MASK 0xffffffff
#define PA_CL_VPORT_ZOFFSET_15__VPORT_ZOFFSET__SHIFT 0x0
#define PA_CL_VTE_CNTL__VPORT_X_SCALE_ENA_MASK 0x1
#define PA_CL_VTE_CNTL__VPORT_X_SCALE_ENA__SHIFT 0x0
#define PA_CL_VTE_CNTL__VPORT_X_OFFSET_ENA_MASK 0x2
#define PA_CL_VTE_CNTL__VPORT_X_OFFSET_ENA__SHIFT 0x1
#define PA_CL_VTE_CNTL__VPORT_Y_SCALE_ENA_MASK 0x4
#define PA_CL_VTE_CNTL__VPORT_Y_SCALE_ENA__SHIFT 0x2
#define PA_CL_VTE_CNTL__VPORT_Y_OFFSET_ENA_MASK 0x8
#define PA_CL_VTE_CNTL__VPORT_Y_OFFSET_ENA__SHIFT 0x3
#define PA_CL_VTE_CNTL__VPORT_Z_SCALE_ENA_MASK 0x10
#define PA_CL_VTE_CNTL__VPORT_Z_SCALE_ENA__SHIFT 0x4
#define PA_CL_VTE_CNTL__VPORT_Z_OFFSET_ENA_MASK 0x20
#define PA_CL_VTE_CNTL__VPORT_Z_OFFSET_ENA__SHIFT 0x5
#define PA_CL_VTE_CNTL__VTX_XY_FMT_MASK 0x100
#define PA_CL_VTE_CNTL__VTX_XY_FMT__SHIFT 0x8
#define PA_CL_VTE_CNTL__VTX_Z_FMT_MASK 0x200
#define PA_CL_VTE_CNTL__VTX_Z_FMT__SHIFT 0x9
#define PA_CL_VTE_CNTL__VTX_W0_FMT_MASK 0x400
#define PA_CL_VTE_CNTL__VTX_W0_FMT__SHIFT 0xa
#define PA_CL_VTE_CNTL__PERFCOUNTER_REF_MASK 0x800
#define PA_CL_VTE_CNTL__PERFCOUNTER_REF__SHIFT 0xb
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_0_MASK 0x1
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_0__SHIFT 0x0
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_1_MASK 0x2
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_1__SHIFT 0x1
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_2_MASK 0x4
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_2__SHIFT 0x2
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_3_MASK 0x8
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_3__SHIFT 0x3
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_4_MASK 0x10
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_4__SHIFT 0x4
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_5_MASK 0x20
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_5__SHIFT 0x5
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_6_MASK 0x40
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_6__SHIFT 0x6
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_7_MASK 0x80
#define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_7__SHIFT 0x7
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_0_MASK 0x100
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_0__SHIFT 0x8
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_1_MASK 0x200
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_1__SHIFT 0x9
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_2_MASK 0x400
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_2__SHIFT 0xa
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_3_MASK 0x800
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_3__SHIFT 0xb
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_4_MASK 0x1000
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_4__SHIFT 0xc
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_5_MASK 0x2000
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_5__SHIFT 0xd
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_6_MASK 0x4000
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_6__SHIFT 0xe
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_7_MASK 0x8000
#define PA_CL_VS_OUT_CNTL__CULL_DIST_ENA_7__SHIFT 0xf
#define PA_CL_VS_OUT_CNTL__USE_VTX_POINT_SIZE_MASK 0x10000
#define PA_CL_VS_OUT_CNTL__USE_VTX_POINT_SIZE__SHIFT 0x10
#define PA_CL_VS_OUT_CNTL__USE_VTX_EDGE_FLAG_MASK 0x20000
#define PA_CL_VS_OUT_CNTL__USE_VTX_EDGE_FLAG__SHIFT 0x11
#define PA_CL_VS_OUT_CNTL__USE_VTX_RENDER_TARGET_INDX_MASK 0x40000
#define PA_CL_VS_OUT_CNTL__USE_VTX_RENDER_TARGET_INDX__SHIFT 0x12
#define PA_CL_VS_OUT_CNTL__USE_VTX_VIEWPORT_INDX_MASK 0x80000
#define PA_CL_VS_OUT_CNTL__USE_VTX_VIEWPORT_INDX__SHIFT 0x13
#define PA_CL_VS_OUT_CNTL__USE_VTX_KILL_FLAG_MASK 0x100000
#define PA_CL_VS_OUT_CNTL__USE_VTX_KILL_FLAG__SHIFT 0x14
#define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_VEC_ENA_MASK 0x200000
#define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_VEC_ENA__SHIFT 0x15
#define PA_CL_VS_OUT_CNTL__VS_OUT_CCDIST0_VEC_ENA_MASK 0x400000
#define PA_CL_VS_OUT_CNTL__VS_OUT_CCDIST0_VEC_ENA__SHIFT 0x16
#define PA_CL_VS_OUT_CNTL__VS_OUT_CCDIST1_VEC_ENA_MASK 0x800000
#define PA_CL_VS_OUT_CNTL__VS_OUT_CCDIST1_VEC_ENA__SHIFT 0x17
#define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_SIDE_BUS_ENA_MASK 0x1000000
#define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_SIDE_BUS_ENA__SHIFT 0x18
#define PA_CL_VS_OUT_CNTL__USE_VTX_GS_CUT_FLAG_MASK 0x2000000
#define PA_CL_VS_OUT_CNTL__USE_VTX_GS_CUT_FLAG__SHIFT 0x19
#define PA_CL_NANINF_CNTL__VTE_XY_INF_DISCARD_MASK 0x1
#define PA_CL_NANINF_CNTL__VTE_XY_INF_DISCARD__SHIFT 0x0
#define PA_CL_NANINF_CNTL__VTE_Z_INF_DISCARD_MASK 0x2
#define PA_CL_NANINF_CNTL__VTE_Z_INF_DISCARD__SHIFT 0x1
#define PA_CL_NANINF_CNTL__VTE_W_INF_DISCARD_MASK 0x4
#define PA_CL_NANINF_CNTL__VTE_W_INF_DISCARD__SHIFT 0x2
#define PA_CL_NANINF_CNTL__VTE_0XNANINF_IS_0_MASK 0x8
#define PA_CL_NANINF_CNTL__VTE_0XNANINF_IS_0__SHIFT 0x3
#define PA_CL_NANINF_CNTL__VTE_XY_NAN_RETAIN_MASK 0x10
#define PA_CL_NANINF_CNTL__VTE_XY_NAN_RETAIN__SHIFT 0x4
#define PA_CL_NANINF_CNTL__VTE_Z_NAN_RETAIN_MASK 0x20
#define PA_CL_NANINF_CNTL__VTE_Z_NAN_RETAIN__SHIFT 0x5
#define PA_CL_NANINF_CNTL__VTE_W_NAN_RETAIN_MASK 0x40
#define PA_CL_NANINF_CNTL__VTE_W_NAN_RETAIN__SHIFT 0x6
#define PA_CL_NANINF_CNTL__VTE_W_RECIP_NAN_IS_0_MASK 0x80
#define PA_CL_NANINF_CNTL__VTE_W_RECIP_NAN_IS_0__SHIFT 0x7
#define PA_CL_NANINF_CNTL__VS_XY_NAN_TO_INF_MASK 0x100
#define PA_CL_NANINF_CNTL__VS_XY_NAN_TO_INF__SHIFT 0x8
#define PA_CL_NANINF_CNTL__VS_XY_INF_RETAIN_MASK 0x200
#define PA_CL_NANINF_CNTL__VS_XY_INF_RETAIN__SHIFT 0x9
#define PA_CL_NANINF_CNTL__VS_Z_NAN_TO_INF_MASK 0x400
#define PA_CL_NANINF_CNTL__VS_Z_NAN_TO_INF__SHIFT 0xa
#define PA_CL_NANINF_CNTL__VS_Z_INF_RETAIN_MASK 0x800
#define PA_CL_NANINF_CNTL__VS_Z_INF_RETAIN__SHIFT 0xb
#define PA_CL_NANINF_CNTL__VS_W_NAN_TO_INF_MASK 0x1000
#define PA_CL_NANINF_CNTL__VS_W_NAN_TO_INF__SHIFT 0xc
#define PA_CL_NANINF_CNTL__VS_W_INF_RETAIN_MASK 0x2000
#define PA_CL_NANINF_CNTL__VS_W_INF_RETAIN__SHIFT 0xd
#define PA_CL_NANINF_CNTL__VS_CLIP_DIST_INF_DISCARD_MASK 0x4000
#define PA_CL_NANINF_CNTL__VS_CLIP_DIST_INF_DISCARD__SHIFT 0xe
#define PA_CL_NANINF_CNTL__VTE_NO_OUTPUT_NEG_0_MASK 0x100000
#define PA_CL_NANINF_CNTL__VTE_NO_OUTPUT_NEG_0__SHIFT 0x14
#define PA_CL_CLIP_CNTL__UCP_ENA_0_MASK 0x1
#define PA_CL_CLIP_CNTL__UCP_ENA_0__SHIFT 0x0
#define PA_CL_CLIP_CNTL__UCP_ENA_1_MASK 0x2
#define PA_CL_CLIP_CNTL__UCP_ENA_1__SHIFT 0x1
#define PA_CL_CLIP_CNTL__UCP_ENA_2_MASK 0x4
#define PA_CL_CLIP_CNTL__UCP_ENA_2__SHIFT 0x2
#define PA_CL_CLIP_CNTL__UCP_ENA_3_MASK 0x8
#define PA_CL_CLIP_CNTL__UCP_ENA_3__SHIFT 0x3
#define PA_CL_CLIP_CNTL__UCP_ENA_4_MASK 0x10
#define PA_CL_CLIP_CNTL__UCP_ENA_4__SHIFT 0x4
#define PA_CL_CLIP_CNTL__UCP_ENA_5_MASK 0x20
#define PA_CL_CLIP_CNTL__UCP_ENA_5__SHIFT 0x5
#define PA_CL_CLIP_CNTL__PS_UCP_Y_SCALE_NEG_MASK 0x2000
#define PA_CL_CLIP_CNTL__PS_UCP_Y_SCALE_NEG__SHIFT 0xd
#define PA_CL_CLIP_CNTL__PS_UCP_MODE_MASK 0xc000
#define PA_CL_CLIP_CNTL__PS_UCP_MODE__SHIFT 0xe
#define PA_CL_CLIP_CNTL__CLIP_DISABLE_MASK 0x10000
#define PA_CL_CLIP_CNTL__CLIP_DISABLE__SHIFT 0x10
#define PA_CL_CLIP_CNTL__UCP_CULL_ONLY_ENA_MASK 0x20000
#define PA_CL_CLIP_CNTL__UCP_CULL_ONLY_ENA__SHIFT 0x11
#define PA_CL_CLIP_CNTL__BOUNDARY_EDGE_FLAG_ENA_MASK 0x40000
#define PA_CL_CLIP_CNTL__BOUNDARY_EDGE_FLAG_ENA__SHIFT 0x12
#define PA_CL_CLIP_CNTL__DX_CLIP_SPACE_DEF_MASK 0x80000
#define PA_CL_CLIP_CNTL__DX_CLIP_SPACE_DEF__SHIFT 0x13
#define PA_CL_CLIP_CNTL__DIS_CLIP_ERR_DETECT_MASK 0x100000
#define PA_CL_CLIP_CNTL__DIS_CLIP_ERR_DETECT__SHIFT 0x14
#define PA_CL_CLIP_CNTL__VTX_KILL_OR_MASK 0x200000
#define PA_CL_CLIP_CNTL__VTX_KILL_OR__SHIFT 0x15
#define PA_CL_CLIP_CNTL__DX_RASTERIZATION_KILL_MASK 0x400000
#define PA_CL_CLIP_CNTL__DX_RASTERIZATION_KILL__SHIFT 0x16
#define PA_CL_CLIP_CNTL__DX_LINEAR_ATTR_CLIP_ENA_MASK 0x1000000
#define PA_CL_CLIP_CNTL__DX_LINEAR_ATTR_CLIP_ENA__SHIFT 0x18
#define PA_CL_CLIP_CNTL__VTE_VPORT_PROVOKE_DISABLE_MASK 0x2000000
#define PA_CL_CLIP_CNTL__VTE_VPORT_PROVOKE_DISABLE__SHIFT 0x19
#define PA_CL_CLIP_CNTL__ZCLIP_NEAR_DISABLE_MASK 0x4000000
#define PA_CL_CLIP_CNTL__ZCLIP_NEAR_DISABLE__SHIFT 0x1a
#define PA_CL_CLIP_CNTL__ZCLIP_FAR_DISABLE_MASK 0x8000000
#define PA_CL_CLIP_CNTL__ZCLIP_FAR_DISABLE__SHIFT 0x1b
#define PA_CL_GB_VERT_CLIP_ADJ__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_GB_VERT_CLIP_ADJ__DATA_REGISTER__SHIFT 0x0
#define PA_CL_GB_VERT_DISC_ADJ__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_GB_VERT_DISC_ADJ__DATA_REGISTER__SHIFT 0x0
#define PA_CL_GB_HORZ_CLIP_ADJ__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_GB_HORZ_CLIP_ADJ__DATA_REGISTER__SHIFT 0x0
#define PA_CL_GB_HORZ_DISC_ADJ__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_GB_HORZ_DISC_ADJ__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_0_X__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_0_X__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_0_Y__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_0_Y__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_0_Z__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_0_Z__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_0_W__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_0_W__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_1_X__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_1_X__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_1_Y__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_1_Y__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_1_Z__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_1_Z__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_1_W__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_1_W__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_2_X__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_2_X__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_2_Y__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_2_Y__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_2_Z__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_2_Z__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_2_W__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_2_W__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_3_X__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_3_X__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_3_Y__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_3_Y__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_3_Z__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_3_Z__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_3_W__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_3_W__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_4_X__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_4_X__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_4_Y__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_4_Y__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_4_Z__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_4_Z__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_4_W__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_4_W__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_5_X__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_5_X__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_5_Y__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_5_Y__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_5_Z__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_5_Z__DATA_REGISTER__SHIFT 0x0
#define PA_CL_UCP_5_W__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_UCP_5_W__DATA_REGISTER__SHIFT 0x0
#define PA_CL_POINT_X_RAD__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_POINT_X_RAD__DATA_REGISTER__SHIFT 0x0
#define PA_CL_POINT_Y_RAD__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_POINT_Y_RAD__DATA_REGISTER__SHIFT 0x0
#define PA_CL_POINT_SIZE__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_POINT_SIZE__DATA_REGISTER__SHIFT 0x0
#define PA_CL_POINT_CULL_RAD__DATA_REGISTER_MASK 0xffffffff
#define PA_CL_POINT_CULL_RAD__DATA_REGISTER__SHIFT 0x0
#define PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK 0x1
#define PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA__SHIFT 0x0
#define PA_CL_ENHANCE__NUM_CLIP_SEQ_MASK 0x6
#define PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT 0x1
#define PA_CL_ENHANCE__CLIPPED_PRIM_SEQ_STALL_MASK 0x8
#define PA_CL_ENHANCE__CLIPPED_PRIM_SEQ_STALL__SHIFT 0x3
#define PA_CL_ENHANCE__VE_NAN_PROC_DISABLE_MASK 0x10
#define PA_CL_ENHANCE__VE_NAN_PROC_DISABLE__SHIFT 0x4
#define PA_CL_ENHANCE__XTRA_DEBUG_REG_SEL_MASK 0x20
#define PA_CL_ENHANCE__XTRA_DEBUG_REG_SEL__SHIFT 0x5
#define PA_CL_ENHANCE__ECO_SPARE3_MASK 0x10000000
#define PA_CL_ENHANCE__ECO_SPARE3__SHIFT 0x1c
#define PA_CL_ENHANCE__ECO_SPARE2_MASK 0x20000000
#define PA_CL_ENHANCE__ECO_SPARE2__SHIFT 0x1d
#define PA_CL_ENHANCE__ECO_SPARE1_MASK 0x40000000
#define PA_CL_ENHANCE__ECO_SPARE1__SHIFT 0x1e
#define PA_CL_ENHANCE__ECO_SPARE0_MASK 0x80000000
#define PA_CL_ENHANCE__ECO_SPARE0__SHIFT 0x1f
#define PA_CL_RESET_DEBUG__CL_TRIV_DISC_DISABLE_MASK 0x1
#define PA_CL_RESET_DEBUG__CL_TRIV_DISC_DISABLE__SHIFT 0x0
#define PA_SU_VTX_CNTL__PIX_CENTER_MASK 0x1
#define PA_SU_VTX_CNTL__PIX_CENTER__SHIFT 0x0
#define PA_SU_VTX_CNTL__ROUND_MODE_MASK 0x6
#define PA_SU_VTX_CNTL__ROUND_MODE__SHIFT 0x1
#define PA_SU_VTX_CNTL__QUANT_MODE_MASK 0x38
#define PA_SU_VTX_CNTL__QUANT_MODE__SHIFT 0x3
#define PA_SU_POINT_SIZE__HEIGHT_MASK 0xffff
#define PA_SU_POINT_SIZE__HEIGHT__SHIFT 0x0
#define PA_SU_POINT_SIZE__WIDTH_MASK 0xffff0000
#define PA_SU_POINT_SIZE__WIDTH__SHIFT 0x10
#define PA_SU_POINT_MINMAX__MIN_SIZE_MASK 0xffff
#define PA_SU_POINT_MINMAX__MIN_SIZE__SHIFT 0x0
#define PA_SU_POINT_MINMAX__MAX_SIZE_MASK 0xffff0000
#define PA_SU_POINT_MINMAX__MAX_SIZE__SHIFT 0x10
#define PA_SU_LINE_CNTL__WIDTH_MASK 0xffff
#define PA_SU_LINE_CNTL__WIDTH__SHIFT 0x0
#define PA_SU_LINE_STIPPLE_CNTL__LINE_STIPPLE_RESET_MASK 0x3
#define PA_SU_LINE_STIPPLE_CNTL__LINE_STIPPLE_RESET__SHIFT 0x0
#define PA_SU_LINE_STIPPLE_CNTL__EXPAND_FULL_LENGTH_MASK 0x4
#define PA_SU_LINE_STIPPLE_CNTL__EXPAND_FULL_LENGTH__SHIFT 0x2
#define PA_SU_LINE_STIPPLE_CNTL__FRACTIONAL_ACCUM_MASK 0x8
#define PA_SU_LINE_STIPPLE_CNTL__FRACTIONAL_ACCUM__SHIFT 0x3
#define PA_SU_LINE_STIPPLE_CNTL__DIAMOND_ADJUST_MASK 0x10
#define PA_SU_LINE_STIPPLE_CNTL__DIAMOND_ADJUST__SHIFT 0x4
#define PA_SU_LINE_STIPPLE_SCALE__LINE_STIPPLE_SCALE_MASK 0xffffffff
#define PA_SU_LINE_STIPPLE_SCALE__LINE_STIPPLE_SCALE__SHIFT 0x0
#define PA_SU_PRIM_FILTER_CNTL__TRIANGLE_FILTER_DISABLE_MASK 0x1
#define PA_SU_PRIM_FILTER_CNTL__TRIANGLE_FILTER_DISABLE__SHIFT 0x0
#define PA_SU_PRIM_FILTER_CNTL__LINE_FILTER_DISABLE_MASK 0x2
#define PA_SU_PRIM_FILTER_CNTL__LINE_FILTER_DISABLE__SHIFT 0x1
#define PA_SU_PRIM_FILTER_CNTL__POINT_FILTER_DISABLE_MASK 0x4
#define PA_SU_PRIM_FILTER_CNTL__POINT_FILTER_DISABLE__SHIFT 0x2
#define PA_SU_PRIM_FILTER_CNTL__RECTANGLE_FILTER_DISABLE_MASK 0x8
#define PA_SU_PRIM_FILTER_CNTL__RECTANGLE_FILTER_DISABLE__SHIFT 0x3
#define PA_SU_PRIM_FILTER_CNTL__TRIANGLE_EXPAND_ENA_MASK 0x10
#define PA_SU_PRIM_FILTER_CNTL__TRIANGLE_EXPAND_ENA__SHIFT 0x4
#define PA_SU_PRIM_FILTER_CNTL__LINE_EXPAND_ENA_MASK 0x20
#define PA_SU_PRIM_FILTER_CNTL__LINE_EXPAND_ENA__SHIFT 0x5
#define PA_SU_PRIM_FILTER_CNTL__POINT_EXPAND_ENA_MASK 0x40
#define PA_SU_PRIM_FILTER_CNTL__POINT_EXPAND_ENA__SHIFT 0x6
#define PA_SU_PRIM_FILTER_CNTL__RECTANGLE_EXPAND_ENA_MASK 0x80
#define PA_SU_PRIM_FILTER_CNTL__RECTANGLE_EXPAND_ENA__SHIFT 0x7
#define PA_SU_PRIM_FILTER_CNTL__PRIM_EXPAND_CONSTANT_MASK 0xff00
#define PA_SU_PRIM_FILTER_CNTL__PRIM_EXPAND_CONSTANT__SHIFT 0x8
#define PA_SU_PRIM_FILTER_CNTL__XMAX_RIGHT_EXCLUSION_MASK 0x40000000
#define PA_SU_PRIM_FILTER_CNTL__XMAX_RIGHT_EXCLUSION__SHIFT 0x1e
#define PA_SU_PRIM_FILTER_CNTL__YMAX_BOTTOM_EXCLUSION_MASK 0x80000000
#define PA_SU_PRIM_FILTER_CNTL__YMAX_BOTTOM_EXCLUSION__SHIFT 0x1f
#define PA_SU_SC_MODE_CNTL__CULL_FRONT_MASK 0x1
#define PA_SU_SC_MODE_CNTL__CULL_FRONT__SHIFT 0x0
#define PA_SU_SC_MODE_CNTL__CULL_BACK_MASK 0x2
#define PA_SU_SC_MODE_CNTL__CULL_BACK__SHIFT 0x1
#define PA_SU_SC_MODE_CNTL__FACE_MASK 0x4
#define PA_SU_SC_MODE_CNTL__FACE__SHIFT 0x2
#define PA_SU_SC_MODE_CNTL__POLY_MODE_MASK 0x18
#define PA_SU_SC_MODE_CNTL__POLY_MODE__SHIFT 0x3
#define PA_SU_SC_MODE_CNTL__POLYMODE_FRONT_PTYPE_MASK 0xe0
#define PA_SU_SC_MODE_CNTL__POLYMODE_FRONT_PTYPE__SHIFT 0x5
#define PA_SU_SC_MODE_CNTL__POLYMODE_BACK_PTYPE_MASK 0x700
#define PA_SU_SC_MODE_CNTL__POLYMODE_BACK_PTYPE__SHIFT 0x8
#define PA_SU_SC_MODE_CNTL__POLY_OFFSET_FRONT_ENABLE_MASK 0x800
#define PA_SU_SC_MODE_CNTL__POLY_OFFSET_FRONT_ENABLE__SHIFT 0xb
#define PA_SU_SC_MODE_CNTL__POLY_OFFSET_BACK_ENABLE_MASK 0x1000
#define PA_SU_SC_MODE_CNTL__POLY_OFFSET_BACK_ENABLE__SHIFT 0xc
#define PA_SU_SC_MODE_CNTL__POLY_OFFSET_PARA_ENABLE_MASK 0x2000
#define PA_SU_SC_MODE_CNTL__POLY_OFFSET_PARA_ENABLE__SHIFT 0xd
#define PA_SU_SC_MODE_CNTL__VTX_WINDOW_OFFSET_ENABLE_MASK 0x10000
#define PA_SU_SC_MODE_CNTL__VTX_WINDOW_OFFSET_ENABLE__SHIFT 0x10
#define PA_SU_SC_MODE_CNTL__PROVOKING_VTX_LAST_MASK 0x80000
#define PA_SU_SC_MODE_CNTL__PROVOKING_VTX_LAST__SHIFT 0x13
#define PA_SU_SC_MODE_CNTL__PERSP_CORR_DIS_MASK 0x100000
#define PA_SU_SC_MODE_CNTL__PERSP_CORR_DIS__SHIFT 0x14
#define PA_SU_SC_MODE_CNTL__MULTI_PRIM_IB_ENA_MASK 0x200000
#define PA_SU_SC_MODE_CNTL__MULTI_PRIM_IB_ENA__SHIFT 0x15
#define PA_SU_POLY_OFFSET_DB_FMT_CNTL__POLY_OFFSET_NEG_NUM_DB_BITS_MASK 0xff
#define PA_SU_POLY_OFFSET_DB_FMT_CNTL__POLY_OFFSET_NEG_NUM_DB_BITS__SHIFT 0x0
#define PA_SU_POLY_OFFSET_DB_FMT_CNTL__POLY_OFFSET_DB_IS_FLOAT_FMT_MASK 0x100
#define PA_SU_POLY_OFFSET_DB_FMT_CNTL__POLY_OFFSET_DB_IS_FLOAT_FMT__SHIFT 0x8
#define PA_SU_POLY_OFFSET_CLAMP__CLAMP_MASK 0xffffffff
#define PA_SU_POLY_OFFSET_CLAMP__CLAMP__SHIFT 0x0
#define PA_SU_POLY_OFFSET_FRONT_SCALE__SCALE_MASK 0xffffffff
#define PA_SU_POLY_OFFSET_FRONT_SCALE__SCALE__SHIFT 0x0
#define PA_SU_POLY_OFFSET_FRONT_OFFSET__OFFSET_MASK 0xffffffff
#define PA_SU_POLY_OFFSET_FRONT_OFFSET__OFFSET__SHIFT 0x0
#define PA_SU_POLY_OFFSET_BACK_SCALE__SCALE_MASK 0xffffffff
#define PA_SU_POLY_OFFSET_BACK_SCALE__SCALE__SHIFT 0x0
#define PA_SU_POLY_OFFSET_BACK_OFFSET__OFFSET_MASK 0xffffffff
#define PA_SU_POLY_OFFSET_BACK_OFFSET__OFFSET__SHIFT 0x0
#define PA_SU_HARDWARE_SCREEN_OFFSET__HW_SCREEN_OFFSET_X_MASK 0x1ff
#define PA_SU_HARDWARE_SCREEN_OFFSET__HW_SCREEN_OFFSET_X__SHIFT 0x0
#define PA_SU_HARDWARE_SCREEN_OFFSET__HW_SCREEN_OFFSET_Y_MASK 0x1ff0000
#define PA_SU_HARDWARE_SCREEN_OFFSET__HW_SCREEN_OFFSET_Y__SHIFT 0x10
#define PA_SU_LINE_STIPPLE_VALUE__LINE_STIPPLE_VALUE_MASK 0xffffff
#define PA_SU_LINE_STIPPLE_VALUE__LINE_STIPPLE_VALUE__SHIFT 0x0
#define PA_SU_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SU_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SU_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define PA_SU_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define PA_SU_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define PA_SU_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define PA_SU_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define PA_SU_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define PA_SU_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define PA_SU_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define PA_SU_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SU_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SU_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0xffc00
#define PA_SU_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define PA_SU_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define PA_SU_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define PA_SU_PERFCOUNTER1_SELECT1__PERF_SEL2_MASK 0x3ff
#define PA_SU_PERFCOUNTER1_SELECT1__PERF_SEL2__SHIFT 0x0
#define PA_SU_PERFCOUNTER1_SELECT1__PERF_SEL3_MASK 0xffc00
#define PA_SU_PERFCOUNTER1_SELECT1__PERF_SEL3__SHIFT 0xa
#define PA_SU_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SU_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SU_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define PA_SU_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define PA_SU_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SU_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SU_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define PA_SU_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define PA_SU_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SU_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SU_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffff
#define PA_SU_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SU_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SU_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SU_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffff
#define PA_SU_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SU_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SU_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SU_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffff
#define PA_SU_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SU_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SU_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SU_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffff
#define PA_SU_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_AA_CONFIG__MSAA_NUM_SAMPLES_MASK 0x7
#define PA_SC_AA_CONFIG__MSAA_NUM_SAMPLES__SHIFT 0x0
#define PA_SC_AA_CONFIG__AA_MASK_CENTROID_DTMN_MASK 0x10
#define PA_SC_AA_CONFIG__AA_MASK_CENTROID_DTMN__SHIFT 0x4
#define PA_SC_AA_CONFIG__MAX_SAMPLE_DIST_MASK 0x1e000
#define PA_SC_AA_CONFIG__MAX_SAMPLE_DIST__SHIFT 0xd
#define PA_SC_AA_CONFIG__MSAA_EXPOSED_SAMPLES_MASK 0x700000
#define PA_SC_AA_CONFIG__MSAA_EXPOSED_SAMPLES__SHIFT 0x14
#define PA_SC_AA_CONFIG__DETAIL_TO_EXPOSED_MODE_MASK 0x3000000
#define PA_SC_AA_CONFIG__DETAIL_TO_EXPOSED_MODE__SHIFT 0x18
#define PA_SC_AA_MASK_X0Y0_X1Y0__AA_MASK_X0Y0_MASK 0xffff
#define PA_SC_AA_MASK_X0Y0_X1Y0__AA_MASK_X0Y0__SHIFT 0x0
#define PA_SC_AA_MASK_X0Y0_X1Y0__AA_MASK_X1Y0_MASK 0xffff0000
#define PA_SC_AA_MASK_X0Y0_X1Y0__AA_MASK_X1Y0__SHIFT 0x10
#define PA_SC_AA_MASK_X0Y1_X1Y1__AA_MASK_X0Y1_MASK 0xffff
#define PA_SC_AA_MASK_X0Y1_X1Y1__AA_MASK_X0Y1__SHIFT 0x0
#define PA_SC_AA_MASK_X0Y1_X1Y1__AA_MASK_X1Y1_MASK 0xffff0000
#define PA_SC_AA_MASK_X0Y1_X1Y1__AA_MASK_X1Y1__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S0_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S0_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S0_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S0_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S1_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S1_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S1_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S1_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S2_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S2_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S2_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S2_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S3_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S3_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S3_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0__S3_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S4_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S4_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S4_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S4_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S5_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S5_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S5_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S5_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S6_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S6_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S6_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S6_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S7_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S7_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S7_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1__S7_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S8_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S8_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S8_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S8_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S9_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S9_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S9_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S9_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S10_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S10_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S10_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S10_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S11_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S11_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S11_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2__S11_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S12_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S12_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S12_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S12_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S13_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S13_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S13_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S13_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S14_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S14_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S14_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S14_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S15_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S15_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S15_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3__S15_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S0_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S0_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S0_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S0_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S1_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S1_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S1_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S1_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S2_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S2_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S2_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S2_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S3_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S3_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S3_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0__S3_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S4_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S4_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S4_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S4_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S5_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S5_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S5_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S5_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S6_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S6_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S6_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S6_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S7_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S7_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S7_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1__S7_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S8_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S8_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S8_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S8_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S9_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S9_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S9_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S9_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S10_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S10_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S10_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S10_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S11_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S11_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S11_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2__S11_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S12_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S12_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S12_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S12_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S13_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S13_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S13_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S13_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S14_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S14_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S14_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S14_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S15_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S15_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S15_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3__S15_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S0_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S0_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S0_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S0_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S1_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S1_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S1_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S1_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S2_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S2_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S2_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S2_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S3_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S3_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S3_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0__S3_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S4_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S4_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S4_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S4_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S5_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S5_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S5_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S5_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S6_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S6_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S6_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S6_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S7_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S7_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S7_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1__S7_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S8_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S8_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S8_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S8_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S9_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S9_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S9_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S9_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S10_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S10_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S10_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S10_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S11_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S11_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S11_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2__S11_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S12_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S12_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S12_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S12_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S13_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S13_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S13_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S13_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S14_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S14_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S14_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S14_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S15_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S15_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S15_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3__S15_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S0_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S0_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S0_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S0_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S1_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S1_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S1_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S1_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S2_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S2_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S2_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S2_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S3_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S3_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S3_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0__S3_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S4_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S4_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S4_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S4_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S5_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S5_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S5_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S5_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S6_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S6_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S6_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S6_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S7_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S7_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S7_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1__S7_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S8_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S8_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S8_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S8_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S9_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S9_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S9_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S9_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S10_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S10_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S10_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S10_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S11_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S11_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S11_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2__S11_Y__SHIFT 0x1c
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S12_X_MASK 0xf
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S12_X__SHIFT 0x0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S12_Y_MASK 0xf0
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S12_Y__SHIFT 0x4
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S13_X_MASK 0xf00
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S13_X__SHIFT 0x8
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S13_Y_MASK 0xf000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S13_Y__SHIFT 0xc
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S14_X_MASK 0xf0000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S14_X__SHIFT 0x10
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S14_Y_MASK 0xf00000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S14_Y__SHIFT 0x14
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S15_X_MASK 0xf000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S15_X__SHIFT 0x18
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S15_Y_MASK 0xf0000000
#define PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3__S15_Y__SHIFT 0x1c
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_0_MASK 0xf
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_0__SHIFT 0x0
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_1_MASK 0xf0
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_1__SHIFT 0x4
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_2_MASK 0xf00
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_2__SHIFT 0x8
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_3_MASK 0xf000
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_3__SHIFT 0xc
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_4_MASK 0xf0000
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_4__SHIFT 0x10
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_5_MASK 0xf00000
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_5__SHIFT 0x14
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_6_MASK 0xf000000
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_6__SHIFT 0x18
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_7_MASK 0xf0000000
#define PA_SC_CENTROID_PRIORITY_0__DISTANCE_7__SHIFT 0x1c
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_8_MASK 0xf
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_8__SHIFT 0x0
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_9_MASK 0xf0
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_9__SHIFT 0x4
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_10_MASK 0xf00
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_10__SHIFT 0x8
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_11_MASK 0xf000
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_11__SHIFT 0xc
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_12_MASK 0xf0000
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_12__SHIFT 0x10
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_13_MASK 0xf00000
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_13__SHIFT 0x14
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_14_MASK 0xf000000
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_14__SHIFT 0x18
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_15_MASK 0xf0000000
#define PA_SC_CENTROID_PRIORITY_1__DISTANCE_15__SHIFT 0x1c
#define PA_SC_CLIPRECT_0_TL__TL_X_MASK 0x7fff
#define PA_SC_CLIPRECT_0_TL__TL_X__SHIFT 0x0
#define PA_SC_CLIPRECT_0_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_0_TL__TL_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_0_BR__BR_X_MASK 0x7fff
#define PA_SC_CLIPRECT_0_BR__BR_X__SHIFT 0x0
#define PA_SC_CLIPRECT_0_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_0_BR__BR_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_1_TL__TL_X_MASK 0x7fff
#define PA_SC_CLIPRECT_1_TL__TL_X__SHIFT 0x0
#define PA_SC_CLIPRECT_1_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_1_TL__TL_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_1_BR__BR_X_MASK 0x7fff
#define PA_SC_CLIPRECT_1_BR__BR_X__SHIFT 0x0
#define PA_SC_CLIPRECT_1_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_1_BR__BR_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_2_TL__TL_X_MASK 0x7fff
#define PA_SC_CLIPRECT_2_TL__TL_X__SHIFT 0x0
#define PA_SC_CLIPRECT_2_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_2_TL__TL_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_2_BR__BR_X_MASK 0x7fff
#define PA_SC_CLIPRECT_2_BR__BR_X__SHIFT 0x0
#define PA_SC_CLIPRECT_2_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_2_BR__BR_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_3_TL__TL_X_MASK 0x7fff
#define PA_SC_CLIPRECT_3_TL__TL_X__SHIFT 0x0
#define PA_SC_CLIPRECT_3_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_3_TL__TL_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_3_BR__BR_X_MASK 0x7fff
#define PA_SC_CLIPRECT_3_BR__BR_X__SHIFT 0x0
#define PA_SC_CLIPRECT_3_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_CLIPRECT_3_BR__BR_Y__SHIFT 0x10
#define PA_SC_CLIPRECT_RULE__CLIP_RULE_MASK 0xffff
#define PA_SC_CLIPRECT_RULE__CLIP_RULE__SHIFT 0x0
#define PA_SC_EDGERULE__ER_TRI_MASK 0xf
#define PA_SC_EDGERULE__ER_TRI__SHIFT 0x0
#define PA_SC_EDGERULE__ER_POINT_MASK 0xf0
#define PA_SC_EDGERULE__ER_POINT__SHIFT 0x4
#define PA_SC_EDGERULE__ER_RECT_MASK 0xf00
#define PA_SC_EDGERULE__ER_RECT__SHIFT 0x8
#define PA_SC_EDGERULE__ER_LINE_LR_MASK 0x3f000
#define PA_SC_EDGERULE__ER_LINE_LR__SHIFT 0xc
#define PA_SC_EDGERULE__ER_LINE_RL_MASK 0xfc0000
#define PA_SC_EDGERULE__ER_LINE_RL__SHIFT 0x12
#define PA_SC_EDGERULE__ER_LINE_TB_MASK 0xf000000
#define PA_SC_EDGERULE__ER_LINE_TB__SHIFT 0x18
#define PA_SC_EDGERULE__ER_LINE_BT_MASK 0xf0000000
#define PA_SC_EDGERULE__ER_LINE_BT__SHIFT 0x1c
#define PA_SC_LINE_CNTL__EXPAND_LINE_WIDTH_MASK 0x200
#define PA_SC_LINE_CNTL__EXPAND_LINE_WIDTH__SHIFT 0x9
#define PA_SC_LINE_CNTL__LAST_PIXEL_MASK 0x400
#define PA_SC_LINE_CNTL__LAST_PIXEL__SHIFT 0xa
#define PA_SC_LINE_CNTL__PERPENDICULAR_ENDCAP_ENA_MASK 0x800
#define PA_SC_LINE_CNTL__PERPENDICULAR_ENDCAP_ENA__SHIFT 0xb
#define PA_SC_LINE_CNTL__DX10_DIAMOND_TEST_ENA_MASK 0x1000
#define PA_SC_LINE_CNTL__DX10_DIAMOND_TEST_ENA__SHIFT 0xc
#define PA_SC_LINE_STIPPLE__LINE_PATTERN_MASK 0xffff
#define PA_SC_LINE_STIPPLE__LINE_PATTERN__SHIFT 0x0
#define PA_SC_LINE_STIPPLE__REPEAT_COUNT_MASK 0xff0000
#define PA_SC_LINE_STIPPLE__REPEAT_COUNT__SHIFT 0x10
#define PA_SC_LINE_STIPPLE__PATTERN_BIT_ORDER_MASK 0x10000000
#define PA_SC_LINE_STIPPLE__PATTERN_BIT_ORDER__SHIFT 0x1c
#define PA_SC_LINE_STIPPLE__AUTO_RESET_CNTL_MASK 0x60000000
#define PA_SC_LINE_STIPPLE__AUTO_RESET_CNTL__SHIFT 0x1d
#define PA_SC_MODE_CNTL_0__MSAA_ENABLE_MASK 0x1
#define PA_SC_MODE_CNTL_0__MSAA_ENABLE__SHIFT 0x0
#define PA_SC_MODE_CNTL_0__VPORT_SCISSOR_ENABLE_MASK 0x2
#define PA_SC_MODE_CNTL_0__VPORT_SCISSOR_ENABLE__SHIFT 0x1
#define PA_SC_MODE_CNTL_0__LINE_STIPPLE_ENABLE_MASK 0x4
#define PA_SC_MODE_CNTL_0__LINE_STIPPLE_ENABLE__SHIFT 0x2
#define PA_SC_MODE_CNTL_0__SEND_UNLIT_STILES_TO_PKR_MASK 0x8
#define PA_SC_MODE_CNTL_0__SEND_UNLIT_STILES_TO_PKR__SHIFT 0x3
#define PA_SC_MODE_CNTL_1__WALK_SIZE_MASK 0x1
#define PA_SC_MODE_CNTL_1__WALK_SIZE__SHIFT 0x0
#define PA_SC_MODE_CNTL_1__WALK_ALIGNMENT_MASK 0x2
#define PA_SC_MODE_CNTL_1__WALK_ALIGNMENT__SHIFT 0x1
#define PA_SC_MODE_CNTL_1__WALK_ALIGN8_PRIM_FITS_ST_MASK 0x4
#define PA_SC_MODE_CNTL_1__WALK_ALIGN8_PRIM_FITS_ST__SHIFT 0x2
#define PA_SC_MODE_CNTL_1__WALK_FENCE_ENABLE_MASK 0x8
#define PA_SC_MODE_CNTL_1__WALK_FENCE_ENABLE__SHIFT 0x3
#define PA_SC_MODE_CNTL_1__WALK_FENCE_SIZE_MASK 0x70
#define PA_SC_MODE_CNTL_1__WALK_FENCE_SIZE__SHIFT 0x4
#define PA_SC_MODE_CNTL_1__SUPERTILE_WALK_ORDER_ENABLE_MASK 0x80
#define PA_SC_MODE_CNTL_1__SUPERTILE_WALK_ORDER_ENABLE__SHIFT 0x7
#define PA_SC_MODE_CNTL_1__TILE_WALK_ORDER_ENABLE_MASK 0x100
#define PA_SC_MODE_CNTL_1__TILE_WALK_ORDER_ENABLE__SHIFT 0x8
#define PA_SC_MODE_CNTL_1__TILE_COVER_DISABLE_MASK 0x200
#define PA_SC_MODE_CNTL_1__TILE_COVER_DISABLE__SHIFT 0x9
#define PA_SC_MODE_CNTL_1__TILE_COVER_NO_SCISSOR_MASK 0x400
#define PA_SC_MODE_CNTL_1__TILE_COVER_NO_SCISSOR__SHIFT 0xa
#define PA_SC_MODE_CNTL_1__ZMM_LINE_EXTENT_MASK 0x800
#define PA_SC_MODE_CNTL_1__ZMM_LINE_EXTENT__SHIFT 0xb
#define PA_SC_MODE_CNTL_1__ZMM_LINE_OFFSET_MASK 0x1000
#define PA_SC_MODE_CNTL_1__ZMM_LINE_OFFSET__SHIFT 0xc
#define PA_SC_MODE_CNTL_1__ZMM_RECT_EXTENT_MASK 0x2000
#define PA_SC_MODE_CNTL_1__ZMM_RECT_EXTENT__SHIFT 0xd
#define PA_SC_MODE_CNTL_1__KILL_PIX_POST_HI_Z_MASK 0x4000
#define PA_SC_MODE_CNTL_1__KILL_PIX_POST_HI_Z__SHIFT 0xe
#define PA_SC_MODE_CNTL_1__KILL_PIX_POST_DETAIL_MASK_MASK 0x8000
#define PA_SC_MODE_CNTL_1__KILL_PIX_POST_DETAIL_MASK__SHIFT 0xf
#define PA_SC_MODE_CNTL_1__PS_ITER_SAMPLE_MASK 0x10000
#define PA_SC_MODE_CNTL_1__PS_ITER_SAMPLE__SHIFT 0x10
#define PA_SC_MODE_CNTL_1__MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE_MASK 0x20000
#define PA_SC_MODE_CNTL_1__MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE__SHIFT 0x11
#define PA_SC_MODE_CNTL_1__MULTI_GPU_SUPERTILE_ENABLE_MASK 0x40000
#define PA_SC_MODE_CNTL_1__MULTI_GPU_SUPERTILE_ENABLE__SHIFT 0x12
#define PA_SC_MODE_CNTL_1__GPU_ID_OVERRIDE_ENABLE_MASK 0x80000
#define PA_SC_MODE_CNTL_1__GPU_ID_OVERRIDE_ENABLE__SHIFT 0x13
#define PA_SC_MODE_CNTL_1__GPU_ID_OVERRIDE_MASK 0xf00000
#define PA_SC_MODE_CNTL_1__GPU_ID_OVERRIDE__SHIFT 0x14
#define PA_SC_MODE_CNTL_1__MULTI_GPU_PRIM_DISCARD_ENABLE_MASK 0x1000000
#define PA_SC_MODE_CNTL_1__MULTI_GPU_PRIM_DISCARD_ENABLE__SHIFT 0x18
#define PA_SC_MODE_CNTL_1__FORCE_EOV_CNTDWN_ENABLE_MASK 0x2000000
#define PA_SC_MODE_CNTL_1__FORCE_EOV_CNTDWN_ENABLE__SHIFT 0x19
#define PA_SC_MODE_CNTL_1__FORCE_EOV_REZ_ENABLE_MASK 0x4000000
#define PA_SC_MODE_CNTL_1__FORCE_EOV_REZ_ENABLE__SHIFT 0x1a
#define PA_SC_MODE_CNTL_1__OUT_OF_ORDER_PRIMITIVE_ENABLE_MASK 0x8000000
#define PA_SC_MODE_CNTL_1__OUT_OF_ORDER_PRIMITIVE_ENABLE__SHIFT 0x1b
#define PA_SC_MODE_CNTL_1__OUT_OF_ORDER_WATER_MARK_MASK 0x70000000
#define PA_SC_MODE_CNTL_1__OUT_OF_ORDER_WATER_MARK__SHIFT 0x1c
#define PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK 0x3
#define PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT 0x0
#define PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK 0xc
#define PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT 0x2
#define PA_SC_RASTER_CONFIG__RB_XSEL2_MASK 0x30
#define PA_SC_RASTER_CONFIG__RB_XSEL2__SHIFT 0x4
#define PA_SC_RASTER_CONFIG__RB_XSEL_MASK 0x40
#define PA_SC_RASTER_CONFIG__RB_XSEL__SHIFT 0x6
#define PA_SC_RASTER_CONFIG__RB_YSEL_MASK 0x80
#define PA_SC_RASTER_CONFIG__RB_YSEL__SHIFT 0x7
#define PA_SC_RASTER_CONFIG__PKR_MAP_MASK 0x300
#define PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT 0x8
#define PA_SC_RASTER_CONFIG__PKR_XSEL_MASK 0xc00
#define PA_SC_RASTER_CONFIG__PKR_XSEL__SHIFT 0xa
#define PA_SC_RASTER_CONFIG__PKR_YSEL_MASK 0x3000
#define PA_SC_RASTER_CONFIG__PKR_YSEL__SHIFT 0xc
#define PA_SC_RASTER_CONFIG__PKR_XSEL2_MASK 0xc000
#define PA_SC_RASTER_CONFIG__PKR_XSEL2__SHIFT 0xe
#define PA_SC_RASTER_CONFIG__SC_MAP_MASK 0x30000
#define PA_SC_RASTER_CONFIG__SC_MAP__SHIFT 0x10
#define PA_SC_RASTER_CONFIG__SC_XSEL_MASK 0xc0000
#define PA_SC_RASTER_CONFIG__SC_XSEL__SHIFT 0x12
#define PA_SC_RASTER_CONFIG__SC_YSEL_MASK 0x300000
#define PA_SC_RASTER_CONFIG__SC_YSEL__SHIFT 0x14
#define PA_SC_RASTER_CONFIG__SE_MAP_MASK 0x3000000
#define PA_SC_RASTER_CONFIG__SE_MAP__SHIFT 0x18
#define PA_SC_RASTER_CONFIG__SE_XSEL_MASK 0xc000000
#define PA_SC_RASTER_CONFIG__SE_XSEL__SHIFT 0x1a
#define PA_SC_RASTER_CONFIG__SE_YSEL_MASK 0x30000000
#define PA_SC_RASTER_CONFIG__SE_YSEL__SHIFT 0x1c
#define PA_SC_RASTER_CONFIG_1__SE_PAIR_MAP_MASK 0x3
#define PA_SC_RASTER_CONFIG_1__SE_PAIR_MAP__SHIFT 0x0
#define PA_SC_RASTER_CONFIG_1__SE_PAIR_XSEL_MASK 0xc
#define PA_SC_RASTER_CONFIG_1__SE_PAIR_XSEL__SHIFT 0x2
#define PA_SC_RASTER_CONFIG_1__SE_PAIR_YSEL_MASK 0x30
#define PA_SC_RASTER_CONFIG_1__SE_PAIR_YSEL__SHIFT 0x4
#define PA_SC_SCREEN_EXTENT_CONTROL__SLICE_EVEN_ENABLE_MASK 0x3
#define PA_SC_SCREEN_EXTENT_CONTROL__SLICE_EVEN_ENABLE__SHIFT 0x0
#define PA_SC_SCREEN_EXTENT_CONTROL__SLICE_ODD_ENABLE_MASK 0xc
#define PA_SC_SCREEN_EXTENT_CONTROL__SLICE_ODD_ENABLE__SHIFT 0x2
#define PA_SC_GENERIC_SCISSOR_TL__TL_X_MASK 0x7fff
#define PA_SC_GENERIC_SCISSOR_TL__TL_X__SHIFT 0x0
#define PA_SC_GENERIC_SCISSOR_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_GENERIC_SCISSOR_TL__TL_Y__SHIFT 0x10
#define PA_SC_GENERIC_SCISSOR_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_GENERIC_SCISSOR_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_GENERIC_SCISSOR_BR__BR_X_MASK 0x7fff
#define PA_SC_GENERIC_SCISSOR_BR__BR_X__SHIFT 0x0
#define PA_SC_GENERIC_SCISSOR_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_GENERIC_SCISSOR_BR__BR_Y__SHIFT 0x10
#define PA_SC_SCREEN_SCISSOR_TL__TL_X_MASK 0xffff
#define PA_SC_SCREEN_SCISSOR_TL__TL_X__SHIFT 0x0
#define PA_SC_SCREEN_SCISSOR_TL__TL_Y_MASK 0xffff0000
#define PA_SC_SCREEN_SCISSOR_TL__TL_Y__SHIFT 0x10
#define PA_SC_SCREEN_SCISSOR_BR__BR_X_MASK 0xffff
#define PA_SC_SCREEN_SCISSOR_BR__BR_X__SHIFT 0x0
#define PA_SC_SCREEN_SCISSOR_BR__BR_Y_MASK 0xffff0000
#define PA_SC_SCREEN_SCISSOR_BR__BR_Y__SHIFT 0x10
#define PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_MASK 0xffff
#define PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET__SHIFT 0x0
#define PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_MASK 0xffff0000
#define PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET__SHIFT 0x10
#define PA_SC_WINDOW_SCISSOR_TL__TL_X_MASK 0x7fff
#define PA_SC_WINDOW_SCISSOR_TL__TL_X__SHIFT 0x0
#define PA_SC_WINDOW_SCISSOR_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_WINDOW_SCISSOR_TL__TL_Y__SHIFT 0x10
#define PA_SC_WINDOW_SCISSOR_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_WINDOW_SCISSOR_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_WINDOW_SCISSOR_BR__BR_X_MASK 0x7fff
#define PA_SC_WINDOW_SCISSOR_BR__BR_X__SHIFT 0x0
#define PA_SC_WINDOW_SCISSOR_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_WINDOW_SCISSOR_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_0_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_0_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_0_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_0_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_0_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_0_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_1_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_1_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_1_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_1_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_1_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_1_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_2_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_2_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_2_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_2_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_2_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_2_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_3_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_3_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_3_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_3_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_3_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_3_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_4_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_4_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_4_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_4_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_4_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_4_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_5_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_5_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_5_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_5_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_5_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_5_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_6_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_6_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_6_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_6_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_6_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_6_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_7_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_7_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_7_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_7_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_7_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_7_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_8_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_8_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_8_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_8_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_8_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_8_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_9_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_9_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_9_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_9_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_9_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_9_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_10_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_10_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_10_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_10_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_10_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_10_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_11_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_11_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_11_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_11_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_11_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_11_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_12_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_12_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_12_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_12_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_12_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_12_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_13_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_13_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_13_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_13_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_13_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_13_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_14_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_14_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_14_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_14_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_14_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_14_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_15_TL__TL_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_15_TL__TL_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_15_TL__TL_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_15_TL__TL_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_15_TL__WINDOW_OFFSET_DISABLE_MASK 0x80000000
#define PA_SC_VPORT_SCISSOR_15_TL__WINDOW_OFFSET_DISABLE__SHIFT 0x1f
#define PA_SC_VPORT_SCISSOR_0_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_0_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_0_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_0_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_1_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_1_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_1_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_1_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_2_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_2_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_2_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_2_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_3_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_3_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_3_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_3_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_4_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_4_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_4_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_4_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_5_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_5_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_5_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_5_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_6_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_6_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_6_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_6_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_7_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_7_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_7_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_7_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_8_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_8_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_8_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_8_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_9_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_9_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_9_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_9_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_10_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_10_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_10_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_10_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_11_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_11_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_11_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_11_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_12_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_12_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_12_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_12_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_13_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_13_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_13_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_13_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_14_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_14_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_14_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_14_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_SCISSOR_15_BR__BR_X_MASK 0x7fff
#define PA_SC_VPORT_SCISSOR_15_BR__BR_X__SHIFT 0x0
#define PA_SC_VPORT_SCISSOR_15_BR__BR_Y_MASK 0x7fff0000
#define PA_SC_VPORT_SCISSOR_15_BR__BR_Y__SHIFT 0x10
#define PA_SC_VPORT_ZMIN_0__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_0__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_1__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_1__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_2__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_2__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_3__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_3__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_4__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_4__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_5__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_5__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_6__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_6__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_7__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_7__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_8__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_8__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_9__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_9__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_10__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_10__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_11__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_11__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_12__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_12__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_13__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_13__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_14__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_14__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMIN_15__VPORT_ZMIN_MASK 0xffffffff
#define PA_SC_VPORT_ZMIN_15__VPORT_ZMIN__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_0__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_0__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_1__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_1__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_2__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_2__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_3__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_3__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_4__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_4__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_5__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_5__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_6__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_6__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_7__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_7__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_8__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_8__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_9__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_9__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_10__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_10__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_11__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_11__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_12__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_12__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_13__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_13__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_14__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_14__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_VPORT_ZMAX_15__VPORT_ZMAX_MASK 0xffffffff
#define PA_SC_VPORT_ZMAX_15__VPORT_ZMAX__SHIFT 0x0
#define PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK 0x1
#define PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER__SHIFT 0x0
#define PA_SC_ENHANCE__DISABLE_SC_DB_TILE_FIX_MASK 0x2
#define PA_SC_ENHANCE__DISABLE_SC_DB_TILE_FIX__SHIFT 0x1
#define PA_SC_ENHANCE__DISABLE_AA_MASK_FULL_FIX_MASK 0x4
#define PA_SC_ENHANCE__DISABLE_AA_MASK_FULL_FIX__SHIFT 0x2
#define PA_SC_ENHANCE__ENABLE_1XMSAA_SAMPLE_LOCATIONS_MASK 0x8
#define PA_SC_ENHANCE__ENABLE_1XMSAA_SAMPLE_LOCATIONS__SHIFT 0x3
#define PA_SC_ENHANCE__ENABLE_1XMSAA_SAMPLE_LOC_CENTROID_MASK 0x10
#define PA_SC_ENHANCE__ENABLE_1XMSAA_SAMPLE_LOC_CENTROID__SHIFT 0x4
#define PA_SC_ENHANCE__DISABLE_SCISSOR_FIX_MASK 0x20
#define PA_SC_ENHANCE__DISABLE_SCISSOR_FIX__SHIFT 0x5
#define PA_SC_ENHANCE__DISABLE_PW_BUBBLE_COLLAPSE_MASK 0xc0
#define PA_SC_ENHANCE__DISABLE_PW_BUBBLE_COLLAPSE__SHIFT 0x6
#define PA_SC_ENHANCE__SEND_UNLIT_STILES_TO_PACKER_MASK 0x100
#define PA_SC_ENHANCE__SEND_UNLIT_STILES_TO_PACKER__SHIFT 0x8
#define PA_SC_ENHANCE__DISABLE_DUALGRAD_PERF_OPTIMIZATION_MASK 0x200
#define PA_SC_ENHANCE__DISABLE_DUALGRAD_PERF_OPTIMIZATION__SHIFT 0x9
#define PA_SC_ENHANCE__DISABLE_SC_PROCESS_RESET_PRIM_MASK 0x400
#define PA_SC_ENHANCE__DISABLE_SC_PROCESS_RESET_PRIM__SHIFT 0xa
#define PA_SC_ENHANCE__DISABLE_SC_PROCESS_RESET_SUPERTILE_MASK 0x800
#define PA_SC_ENHANCE__DISABLE_SC_PROCESS_RESET_SUPERTILE__SHIFT 0xb
#define PA_SC_ENHANCE__DISABLE_SC_PROCESS_RESET_TILE_MASK 0x1000
#define PA_SC_ENHANCE__DISABLE_SC_PROCESS_RESET_TILE__SHIFT 0xc
#define PA_SC_ENHANCE__DISABLE_PA_SC_GUIDANCE_MASK 0x2000
#define PA_SC_ENHANCE__DISABLE_PA_SC_GUIDANCE__SHIFT 0xd
#define PA_SC_ENHANCE__DISABLE_EOV_ALL_CTRL_ONLY_COMBINATIONS_MASK 0x4000
#define PA_SC_ENHANCE__DISABLE_EOV_ALL_CTRL_ONLY_COMBINATIONS__SHIFT 0xe
#define PA_SC_ENHANCE__ENABLE_MULTICYCLE_BUBBLE_FREEZE_MASK 0x8000
#define PA_SC_ENHANCE__ENABLE_MULTICYCLE_BUBBLE_FREEZE__SHIFT 0xf
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_PA_SC_GUIDANCE_MASK 0x10000
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_PA_SC_GUIDANCE__SHIFT 0x10
#define PA_SC_ENHANCE__ENABLE_OUT_OF_ORDER_POLY_MODE_MASK 0x20000
#define PA_SC_ENHANCE__ENABLE_OUT_OF_ORDER_POLY_MODE__SHIFT 0x11
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_EOP_SYNC_NULL_PRIMS_LAST_MASK 0x40000
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_EOP_SYNC_NULL_PRIMS_LAST__SHIFT 0x12
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_THRESHOLD_SWITCHING_MASK 0x80000
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_THRESHOLD_SWITCHING__SHIFT 0x13
#define PA_SC_ENHANCE__ENABLE_OUT_OF_ORDER_THRESHOLD_SWITCH_AT_EOPG_ONLY_MASK 0x100000
#define PA_SC_ENHANCE__ENABLE_OUT_OF_ORDER_THRESHOLD_SWITCH_AT_EOPG_ONLY__SHIFT 0x14
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_DESIRED_FIFO_EMPTY_SWITCHING_MASK 0x200000
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_DESIRED_FIFO_EMPTY_SWITCHING__SHIFT 0x15
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_SELECTED_FIFO_EMPTY_SWITCHING_MASK 0x400000
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_SELECTED_FIFO_EMPTY_SWITCHING__SHIFT 0x16
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_EMPTY_SWITCHING_HYSTERYSIS_MASK 0x800000
#define PA_SC_ENHANCE__DISABLE_OUT_OF_ORDER_EMPTY_SWITCHING_HYSTERYSIS__SHIFT 0x17
#define PA_SC_ENHANCE__ENABLE_OUT_OF_ORDER_DESIRED_FIFO_IS_NEXT_FEID_MASK 0x1000000
#define PA_SC_ENHANCE__ENABLE_OUT_OF_ORDER_DESIRED_FIFO_IS_NEXT_FEID__SHIFT 0x18
#define PA_SC_ENHANCE__DISABLE_OOO_NO_EOPG_SKEW_DESIRED_FIFO_IS_CURRENT_FIFO_MASK 0x2000000
#define PA_SC_ENHANCE__DISABLE_OOO_NO_EOPG_SKEW_DESIRED_FIFO_IS_CURRENT_FIFO__SHIFT 0x19
#define PA_SC_ENHANCE__OOO_DISABLE_EOP_ON_FIRST_LIVE_PRIM_HIT_MASK 0x4000000
#define PA_SC_ENHANCE__OOO_DISABLE_EOP_ON_FIRST_LIVE_PRIM_HIT__SHIFT 0x1a
#define PA_SC_ENHANCE__OOO_DISABLE_EOPG_SKEW_THRESHOLD_SWITCHING_MASK 0x8000000
#define PA_SC_ENHANCE__OOO_DISABLE_EOPG_SKEW_THRESHOLD_SWITCHING__SHIFT 0x1b
#define PA_SC_ENHANCE__DISABLE_EOP_LINE_STIPPLE_RESET_MASK 0x10000000
#define PA_SC_ENHANCE__DISABLE_EOP_LINE_STIPPLE_RESET__SHIFT 0x1c
#define PA_SC_ENHANCE__DISABLE_VPZ_EOP_LINE_STIPPLE_RESET_MASK 0x20000000
#define PA_SC_ENHANCE__DISABLE_VPZ_EOP_LINE_STIPPLE_RESET__SHIFT 0x1d
#define PA_SC_ENHANCE__ECO_SPARE1_MASK 0x40000000
#define PA_SC_ENHANCE__ECO_SPARE1__SHIFT 0x1e
#define PA_SC_ENHANCE__ECO_SPARE0_MASK 0x80000000
#define PA_SC_ENHANCE__ECO_SPARE0__SHIFT 0x1f
#define PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE_MASK 0x3f
#define PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT 0x0
#define PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE_MASK 0x7fc0
#define PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT 0x6
#define PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE_MASK 0x1f8000
#define PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT 0xf
#define PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE_MASK 0xff800000
#define PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT 0x17
#define PA_SC_IF_FIFO_SIZE__SC_DB_TILE_IF_FIFO_SIZE_MASK 0x3f
#define PA_SC_IF_FIFO_SIZE__SC_DB_TILE_IF_FIFO_SIZE__SHIFT 0x0
#define PA_SC_IF_FIFO_SIZE__SC_DB_QUAD_IF_FIFO_SIZE_MASK 0xfc0
#define PA_SC_IF_FIFO_SIZE__SC_DB_QUAD_IF_FIFO_SIZE__SHIFT 0x6
#define PA_SC_IF_FIFO_SIZE__SC_SPI_IF_FIFO_SIZE_MASK 0x3f000
#define PA_SC_IF_FIFO_SIZE__SC_SPI_IF_FIFO_SIZE__SHIFT 0xc
#define PA_SC_IF_FIFO_SIZE__SC_BCI_IF_FIFO_SIZE_MASK 0xfc0000
#define PA_SC_IF_FIFO_SIZE__SC_BCI_IF_FIFO_SIZE__SHIFT 0x12
#define PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_MASK 0xffff
#define PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT 0x0
#define PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_MASK 0xffff0000
#define PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT 0x10
#define PA_SC_LINE_STIPPLE_STATE__CURRENT_PTR_MASK 0xf
#define PA_SC_LINE_STIPPLE_STATE__CURRENT_PTR__SHIFT 0x0
#define PA_SC_LINE_STIPPLE_STATE__CURRENT_COUNT_MASK 0xff00
#define PA_SC_LINE_STIPPLE_STATE__CURRENT_COUNT__SHIFT 0x8
#define PA_SC_SCREEN_EXTENT_MIN_0__X_MASK 0xffff
#define PA_SC_SCREEN_EXTENT_MIN_0__X__SHIFT 0x0
#define PA_SC_SCREEN_EXTENT_MIN_0__Y_MASK 0xffff0000
#define PA_SC_SCREEN_EXTENT_MIN_0__Y__SHIFT 0x10
#define PA_SC_SCREEN_EXTENT_MAX_0__X_MASK 0xffff
#define PA_SC_SCREEN_EXTENT_MAX_0__X__SHIFT 0x0
#define PA_SC_SCREEN_EXTENT_MAX_0__Y_MASK 0xffff0000
#define PA_SC_SCREEN_EXTENT_MAX_0__Y__SHIFT 0x10
#define PA_SC_SCREEN_EXTENT_MIN_1__X_MASK 0xffff
#define PA_SC_SCREEN_EXTENT_MIN_1__X__SHIFT 0x0
#define PA_SC_SCREEN_EXTENT_MIN_1__Y_MASK 0xffff0000
#define PA_SC_SCREEN_EXTENT_MIN_1__Y__SHIFT 0x10
#define PA_SC_SCREEN_EXTENT_MAX_1__X_MASK 0xffff
#define PA_SC_SCREEN_EXTENT_MAX_1__X__SHIFT 0x0
#define PA_SC_SCREEN_EXTENT_MAX_1__Y_MASK 0xffff0000
#define PA_SC_SCREEN_EXTENT_MAX_1__Y__SHIFT 0x10
#define PA_SC_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define PA_SC_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define PA_SC_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define PA_SC_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define PA_SC_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define PA_SC_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define PA_SC_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define PA_SC_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define PA_SC_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER4_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER4_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER5_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER5_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER6_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER6_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER7_SELECT__PERF_SEL_MASK 0x3ff
#define PA_SC_PERFCOUNTER7_SELECT__PERF_SEL__SHIFT 0x0
#define PA_SC_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_PERFCOUNTER4_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER4_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER4_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER4_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_PERFCOUNTER5_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER5_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER5_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER5_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_PERFCOUNTER6_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER6_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER6_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER6_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_PERFCOUNTER7_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define PA_SC_PERFCOUNTER7_LO__PERFCOUNTER_LO__SHIFT 0x0
#define PA_SC_PERFCOUNTER7_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define PA_SC_PERFCOUNTER7_HI__PERFCOUNTER_HI__SHIFT 0x0
#define PA_SC_P3D_TRAP_SCREEN_HV_EN__ENABLE_HV_PRE_SHADER_MASK 0x1
#define PA_SC_P3D_TRAP_SCREEN_HV_EN__ENABLE_HV_PRE_SHADER__SHIFT 0x0
#define PA_SC_P3D_TRAP_SCREEN_HV_EN__FORCE_PRE_SHADER_ALL_PIXELS_MASK 0x2
#define PA_SC_P3D_TRAP_SCREEN_HV_EN__FORCE_PRE_SHADER_ALL_PIXELS__SHIFT 0x1
#define PA_SC_P3D_TRAP_SCREEN_H__X_COORD_MASK 0x3fff
#define PA_SC_P3D_TRAP_SCREEN_H__X_COORD__SHIFT 0x0
#define PA_SC_P3D_TRAP_SCREEN_V__Y_COORD_MASK 0x3fff
#define PA_SC_P3D_TRAP_SCREEN_V__Y_COORD__SHIFT 0x0
#define PA_SC_P3D_TRAP_SCREEN_OCCURRENCE__COUNT_MASK 0xffff
#define PA_SC_P3D_TRAP_SCREEN_OCCURRENCE__COUNT__SHIFT 0x0
#define PA_SC_P3D_TRAP_SCREEN_COUNT__COUNT_MASK 0xffff
#define PA_SC_P3D_TRAP_SCREEN_COUNT__COUNT__SHIFT 0x0
#define PA_SC_HP3D_TRAP_SCREEN_HV_EN__ENABLE_HV_PRE_SHADER_MASK 0x1
#define PA_SC_HP3D_TRAP_SCREEN_HV_EN__ENABLE_HV_PRE_SHADER__SHIFT 0x0
#define PA_SC_HP3D_TRAP_SCREEN_HV_EN__FORCE_PRE_SHADER_ALL_PIXELS_MASK 0x2
#define PA_SC_HP3D_TRAP_SCREEN_HV_EN__FORCE_PRE_SHADER_ALL_PIXELS__SHIFT 0x1
#define PA_SC_HP3D_TRAP_SCREEN_H__X_COORD_MASK 0x3fff
#define PA_SC_HP3D_TRAP_SCREEN_H__X_COORD__SHIFT 0x0
#define PA_SC_HP3D_TRAP_SCREEN_V__Y_COORD_MASK 0x3fff
#define PA_SC_HP3D_TRAP_SCREEN_V__Y_COORD__SHIFT 0x0
#define PA_SC_HP3D_TRAP_SCREEN_OCCURRENCE__COUNT_MASK 0xffff
#define PA_SC_HP3D_TRAP_SCREEN_OCCURRENCE__COUNT__SHIFT 0x0
#define PA_SC_HP3D_TRAP_SCREEN_COUNT__COUNT_MASK 0xffff
#define PA_SC_HP3D_TRAP_SCREEN_COUNT__COUNT__SHIFT 0x0
#define PA_SC_TRAP_SCREEN_HV_EN__ENABLE_HV_PRE_SHADER_MASK 0x1
#define PA_SC_TRAP_SCREEN_HV_EN__ENABLE_HV_PRE_SHADER__SHIFT 0x0
#define PA_SC_TRAP_SCREEN_HV_EN__FORCE_PRE_SHADER_ALL_PIXELS_MASK 0x2
#define PA_SC_TRAP_SCREEN_HV_EN__FORCE_PRE_SHADER_ALL_PIXELS__SHIFT 0x1
#define PA_SC_TRAP_SCREEN_H__X_COORD_MASK 0x3fff
#define PA_SC_TRAP_SCREEN_H__X_COORD__SHIFT 0x0
#define PA_SC_TRAP_SCREEN_V__Y_COORD_MASK 0x3fff
#define PA_SC_TRAP_SCREEN_V__Y_COORD__SHIFT 0x0
#define PA_SC_TRAP_SCREEN_OCCURRENCE__COUNT_MASK 0xffff
#define PA_SC_TRAP_SCREEN_OCCURRENCE__COUNT__SHIFT 0x0
#define PA_SC_TRAP_SCREEN_COUNT__COUNT_MASK 0xffff
#define PA_SC_TRAP_SCREEN_COUNT__COUNT__SHIFT 0x0
#define PA_SC_P3D_TRAP_SCREEN_HV_LOCK__DISABLE_NON_PRIV_WRITES_MASK 0x1
#define PA_SC_P3D_TRAP_SCREEN_HV_LOCK__DISABLE_NON_PRIV_WRITES__SHIFT 0x0
#define PA_SC_HP3D_TRAP_SCREEN_HV_LOCK__DISABLE_NON_PRIV_WRITES_MASK 0x1
#define PA_SC_HP3D_TRAP_SCREEN_HV_LOCK__DISABLE_NON_PRIV_WRITES__SHIFT 0x0
#define PA_SC_TRAP_SCREEN_HV_LOCK__DISABLE_NON_PRIV_WRITES_MASK 0x1
#define PA_SC_TRAP_SCREEN_HV_LOCK__DISABLE_NON_PRIV_WRITES__SHIFT 0x0
#define PA_CL_CNTL_STATUS__CL_BUSY_MASK 0x80000000
#define PA_CL_CNTL_STATUS__CL_BUSY__SHIFT 0x1f
#define PA_SU_CNTL_STATUS__SU_BUSY_MASK 0x80000000
#define PA_SU_CNTL_STATUS__SU_BUSY__SHIFT 0x1f
#define PA_SC_FIFO_DEPTH_CNTL__DEPTH_MASK 0x3ff
#define PA_SC_FIFO_DEPTH_CNTL__DEPTH__SHIFT 0x0
#define CGTT_PA_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_PA_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_PA_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_PA_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_PA_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_PA_CLK_CTRL__SU_CLK_OVERRIDE_MASK 0x20000000
#define CGTT_PA_CLK_CTRL__SU_CLK_OVERRIDE__SHIFT 0x1d
#define CGTT_PA_CLK_CTRL__CL_CLK_OVERRIDE_MASK 0x40000000
#define CGTT_PA_CLK_CTRL__CL_CLK_OVERRIDE__SHIFT 0x1e
#define CGTT_PA_CLK_CTRL__REG_CLK_OVERRIDE_MASK 0x80000000
#define CGTT_PA_CLK_CTRL__REG_CLK_OVERRIDE__SHIFT 0x1f
#define CGTT_SC_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_SC_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_SC_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SC_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_SC_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define PA_SU_DEBUG_CNTL__SU_DEBUG_INDX_MASK 0x1f
#define PA_SU_DEBUG_CNTL__SU_DEBUG_INDX__SHIFT 0x0
#define PA_SU_DEBUG_DATA__DATA_MASK 0xffffffff
#define PA_SU_DEBUG_DATA__DATA__SHIFT 0x0
#define PA_SC_DEBUG_CNTL__SC_DEBUG_INDX_MASK 0x3f
#define PA_SC_DEBUG_CNTL__SC_DEBUG_INDX__SHIFT 0x0
#define PA_SC_DEBUG_DATA__DATA_MASK 0xffffffff
#define PA_SC_DEBUG_DATA__DATA__SHIFT 0x0
#define CLIPPER_DEBUG_REG00__ALWAYS_ZERO_MASK 0xff
#define CLIPPER_DEBUG_REG00__ALWAYS_ZERO__SHIFT 0x0
#define CLIPPER_DEBUG_REG00__clip_ga_bc_fifo_write_MASK 0x100
#define CLIPPER_DEBUG_REG00__clip_ga_bc_fifo_write__SHIFT 0x8
#define CLIPPER_DEBUG_REG00__su_clip_baryc_free_MASK 0x600
#define CLIPPER_DEBUG_REG00__su_clip_baryc_free__SHIFT 0x9
#define CLIPPER_DEBUG_REG00__clip_to_ga_fifo_write_MASK 0x800
#define CLIPPER_DEBUG_REG00__clip_to_ga_fifo_write__SHIFT 0xb
#define CLIPPER_DEBUG_REG00__clip_to_ga_fifo_full_MASK 0x1000
#define CLIPPER_DEBUG_REG00__clip_to_ga_fifo_full__SHIFT 0xc
#define CLIPPER_DEBUG_REG00__primic_to_clprim_fifo_empty_MASK 0x2000
#define CLIPPER_DEBUG_REG00__primic_to_clprim_fifo_empty__SHIFT 0xd
#define CLIPPER_DEBUG_REG00__primic_to_clprim_fifo_full_MASK 0x4000
#define CLIPPER_DEBUG_REG00__primic_to_clprim_fifo_full__SHIFT 0xe
#define CLIPPER_DEBUG_REG00__clip_to_outsm_fifo_empty_MASK 0x8000
#define CLIPPER_DEBUG_REG00__clip_to_outsm_fifo_empty__SHIFT 0xf
#define CLIPPER_DEBUG_REG00__clip_to_outsm_fifo_full_MASK 0x10000
#define CLIPPER_DEBUG_REG00__clip_to_outsm_fifo_full__SHIFT 0x10
#define CLIPPER_DEBUG_REG00__vgt_to_clipp_fifo_empty_MASK 0x20000
#define CLIPPER_DEBUG_REG00__vgt_to_clipp_fifo_empty__SHIFT 0x11
#define CLIPPER_DEBUG_REG00__vgt_to_clipp_fifo_full_MASK 0x40000
#define CLIPPER_DEBUG_REG00__vgt_to_clipp_fifo_full__SHIFT 0x12
#define CLIPPER_DEBUG_REG00__vgt_to_clips_fifo_empty_MASK 0x80000
#define CLIPPER_DEBUG_REG00__vgt_to_clips_fifo_empty__SHIFT 0x13
#define CLIPPER_DEBUG_REG00__vgt_to_clips_fifo_full_MASK 0x100000
#define CLIPPER_DEBUG_REG00__vgt_to_clips_fifo_full__SHIFT 0x14
#define CLIPPER_DEBUG_REG00__clipcode_fifo_fifo_empty_MASK 0x200000
#define CLIPPER_DEBUG_REG00__clipcode_fifo_fifo_empty__SHIFT 0x15
#define CLIPPER_DEBUG_REG00__clipcode_fifo_full_MASK 0x400000
#define CLIPPER_DEBUG_REG00__clipcode_fifo_full__SHIFT 0x16
#define CLIPPER_DEBUG_REG00__vte_out_clip_fifo_fifo_empty_MASK 0x800000
#define CLIPPER_DEBUG_REG00__vte_out_clip_fifo_fifo_empty__SHIFT 0x17
#define CLIPPER_DEBUG_REG00__vte_out_clip_fifo_fifo_full_MASK 0x1000000
#define CLIPPER_DEBUG_REG00__vte_out_clip_fifo_fifo_full__SHIFT 0x18
#define CLIPPER_DEBUG_REG00__vte_out_orig_fifo_fifo_empty_MASK 0x2000000
#define CLIPPER_DEBUG_REG00__vte_out_orig_fifo_fifo_empty__SHIFT 0x19
#define CLIPPER_DEBUG_REG00__vte_out_orig_fifo_fifo_full_MASK 0x4000000
#define CLIPPER_DEBUG_REG00__vte_out_orig_fifo_fifo_full__SHIFT 0x1a
#define CLIPPER_DEBUG_REG00__ccgen_to_clipcc_fifo_empty_MASK 0x8000000
#define CLIPPER_DEBUG_REG00__ccgen_to_clipcc_fifo_empty__SHIFT 0x1b
#define CLIPPER_DEBUG_REG00__ccgen_to_clipcc_fifo_full_MASK 0x10000000
#define CLIPPER_DEBUG_REG00__ccgen_to_clipcc_fifo_full__SHIFT 0x1c
#define CLIPPER_DEBUG_REG00__clip_to_outsm_fifo_write_MASK 0x20000000
#define CLIPPER_DEBUG_REG00__clip_to_outsm_fifo_write__SHIFT 0x1d
#define CLIPPER_DEBUG_REG00__vte_out_orig_fifo_fifo_write_MASK 0x40000000
#define CLIPPER_DEBUG_REG00__vte_out_orig_fifo_fifo_write__SHIFT 0x1e
#define CLIPPER_DEBUG_REG00__vgt_to_clipp_fifo_write_MASK 0x80000000
#define CLIPPER_DEBUG_REG00__vgt_to_clipp_fifo_write__SHIFT 0x1f
#define CLIPPER_DEBUG_REG01__ALWAYS_ZERO_MASK 0xff
#define CLIPPER_DEBUG_REG01__ALWAYS_ZERO__SHIFT 0x0
#define CLIPPER_DEBUG_REG01__clip_extra_bc_valid_MASK 0x700
#define CLIPPER_DEBUG_REG01__clip_extra_bc_valid__SHIFT 0x8
#define CLIPPER_DEBUG_REG01__clip_vert_vte_valid_MASK 0x3800
#define CLIPPER_DEBUG_REG01__clip_vert_vte_valid__SHIFT 0xb
#define CLIPPER_DEBUG_REG01__clip_to_outsm_vertex_deallocate_MASK 0x1c000
#define CLIPPER_DEBUG_REG01__clip_to_outsm_vertex_deallocate__SHIFT 0xe
#define CLIPPER_DEBUG_REG01__clip_to_outsm_deallocate_slot_MASK 0xe0000
#define CLIPPER_DEBUG_REG01__clip_to_outsm_deallocate_slot__SHIFT 0x11
#define CLIPPER_DEBUG_REG01__clip_to_outsm_null_primitive_MASK 0x100000
#define CLIPPER_DEBUG_REG01__clip_to_outsm_null_primitive__SHIFT 0x14
#define CLIPPER_DEBUG_REG01__vte_positions_vte_clip_vte_naninf_kill_2_MASK 0x200000
#define CLIPPER_DEBUG_REG01__vte_positions_vte_clip_vte_naninf_kill_2__SHIFT 0x15
#define CLIPPER_DEBUG_REG01__vte_positions_vte_clip_vte_naninf_kill_1_MASK 0x400000
#define CLIPPER_DEBUG_REG01__vte_positions_vte_clip_vte_naninf_kill_1__SHIFT 0x16
#define CLIPPER_DEBUG_REG01__vte_positions_vte_clip_vte_naninf_kill_0_MASK 0x800000
#define CLIPPER_DEBUG_REG01__vte_positions_vte_clip_vte_naninf_kill_0__SHIFT 0x17
#define CLIPPER_DEBUG_REG01__vte_out_clip_rd_extra_bc_valid_MASK 0x1000000
#define CLIPPER_DEBUG_REG01__vte_out_clip_rd_extra_bc_valid__SHIFT 0x18
#define CLIPPER_DEBUG_REG01__vte_out_clip_rd_vte_naninf_kill_MASK 0x2000000
#define CLIPPER_DEBUG_REG01__vte_out_clip_rd_vte_naninf_kill__SHIFT 0x19
#define CLIPPER_DEBUG_REG01__vte_out_clip_rd_vertex_store_indx_MASK 0xc000000
#define CLIPPER_DEBUG_REG01__vte_out_clip_rd_vertex_store_indx__SHIFT 0x1a
#define CLIPPER_DEBUG_REG01__clip_ga_bc_fifo_write_MASK 0x10000000
#define CLIPPER_DEBUG_REG01__clip_ga_bc_fifo_write__SHIFT 0x1c
#define CLIPPER_DEBUG_REG01__clip_to_ga_fifo_write_MASK 0x20000000
#define CLIPPER_DEBUG_REG01__clip_to_ga_fifo_write__SHIFT 0x1d
#define CLIPPER_DEBUG_REG01__vte_out_clip_fifo_fifo_advanceread_MASK 0x40000000
#define CLIPPER_DEBUG_REG01__vte_out_clip_fifo_fifo_advanceread__SHIFT 0x1e
#define CLIPPER_DEBUG_REG01__vte_out_clip_fifo_fifo_empty_MASK 0x80000000
#define CLIPPER_DEBUG_REG01__vte_out_clip_fifo_fifo_empty__SHIFT 0x1f
#define CLIPPER_DEBUG_REG02__clip_extra_bc_valid_MASK 0x7
#define CLIPPER_DEBUG_REG02__clip_extra_bc_valid__SHIFT 0x0
#define CLIPPER_DEBUG_REG02__clip_vert_vte_valid_MASK 0x38
#define CLIPPER_DEBUG_REG02__clip_vert_vte_valid__SHIFT 0x3
#define CLIPPER_DEBUG_REG02__clip_to_outsm_clip_seq_indx_MASK 0xc0
#define CLIPPER_DEBUG_REG02__clip_to_outsm_clip_seq_indx__SHIFT 0x6
#define CLIPPER_DEBUG_REG02__clip_to_outsm_vertex_store_indx_2_MASK 0xf00
#define CLIPPER_DEBUG_REG02__clip_to_outsm_vertex_store_indx_2__SHIFT 0x8
#define CLIPPER_DEBUG_REG02__clip_to_outsm_vertex_store_indx_1_MASK 0xf000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_vertex_store_indx_1__SHIFT 0xc
#define CLIPPER_DEBUG_REG02__clip_to_outsm_vertex_store_indx_0_MASK 0xf0000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_vertex_store_indx_0__SHIFT 0x10
#define CLIPPER_DEBUG_REG02__clip_to_clipga_extra_bc_coords_MASK 0x100000
#define CLIPPER_DEBUG_REG02__clip_to_clipga_extra_bc_coords__SHIFT 0x14
#define CLIPPER_DEBUG_REG02__clip_to_clipga_vte_naninf_kill_MASK 0x200000
#define CLIPPER_DEBUG_REG02__clip_to_clipga_vte_naninf_kill__SHIFT 0x15
#define CLIPPER_DEBUG_REG02__clip_to_outsm_end_of_packet_MASK 0x400000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_end_of_packet__SHIFT 0x16
#define CLIPPER_DEBUG_REG02__clip_to_outsm_first_prim_of_slot_MASK 0x800000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_first_prim_of_slot__SHIFT 0x17
#define CLIPPER_DEBUG_REG02__clip_to_outsm_clipped_prim_MASK 0x1000000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_clipped_prim__SHIFT 0x18
#define CLIPPER_DEBUG_REG02__clip_to_outsm_null_primitive_MASK 0x2000000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_null_primitive__SHIFT 0x19
#define CLIPPER_DEBUG_REG02__clip_ga_bc_fifo_full_MASK 0x4000000
#define CLIPPER_DEBUG_REG02__clip_ga_bc_fifo_full__SHIFT 0x1a
#define CLIPPER_DEBUG_REG02__clip_to_ga_fifo_full_MASK 0x8000000
#define CLIPPER_DEBUG_REG02__clip_to_ga_fifo_full__SHIFT 0x1b
#define CLIPPER_DEBUG_REG02__clip_ga_bc_fifo_write_MASK 0x10000000
#define CLIPPER_DEBUG_REG02__clip_ga_bc_fifo_write__SHIFT 0x1c
#define CLIPPER_DEBUG_REG02__clip_to_ga_fifo_write_MASK 0x20000000
#define CLIPPER_DEBUG_REG02__clip_to_ga_fifo_write__SHIFT 0x1d
#define CLIPPER_DEBUG_REG02__clip_to_outsm_fifo_advanceread_MASK 0x40000000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_fifo_advanceread__SHIFT 0x1e
#define CLIPPER_DEBUG_REG02__clip_to_outsm_fifo_empty_MASK 0x80000000
#define CLIPPER_DEBUG_REG02__clip_to_outsm_fifo_empty__SHIFT 0x1f
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_clip_code_or_MASK 0x3fff
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_clip_code_or__SHIFT 0x0
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_event_id_MASK 0xfc000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_event_id__SHIFT 0xe
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_state_var_indx_MASK 0x700000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_state_var_indx__SHIFT 0x14
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_clip_primitive_MASK 0x800000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_clip_primitive__SHIFT 0x17
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_deallocate_slot_MASK 0x7000000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_deallocate_slot__SHIFT 0x18
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_first_prim_of_slot_MASK 0x8000000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_first_prim_of_slot__SHIFT 0x1b
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_end_of_packet_MASK 0x10000000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_end_of_packet__SHIFT 0x1c
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG03__clipsm0_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_param_cache_indx_0_MASK 0x7fe
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_param_cache_indx_0__SHIFT 0x1
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_vertex_store_indx_2_MASK 0x1f800
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_vertex_store_indx_2__SHIFT 0xb
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_vertex_store_indx_1_MASK 0x7e0000
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_vertex_store_indx_1__SHIFT 0x11
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_vertex_store_indx_0_MASK 0x1f800000
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_vertex_store_indx_0__SHIFT 0x17
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG04__clipsm0_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_clip_code_or_MASK 0x3fff
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_clip_code_or__SHIFT 0x0
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_event_id_MASK 0xfc000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_event_id__SHIFT 0xe
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_state_var_indx_MASK 0x700000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_state_var_indx__SHIFT 0x14
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_clip_primitive_MASK 0x800000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_clip_primitive__SHIFT 0x17
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_deallocate_slot_MASK 0x7000000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_deallocate_slot__SHIFT 0x18
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_first_prim_of_slot_MASK 0x8000000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_first_prim_of_slot__SHIFT 0x1b
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_end_of_packet_MASK 0x10000000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_end_of_packet__SHIFT 0x1c
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG05__clipsm1_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_param_cache_indx_0_MASK 0x7fe
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_param_cache_indx_0__SHIFT 0x1
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_vertex_store_indx_2_MASK 0x1f800
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_vertex_store_indx_2__SHIFT 0xb
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_vertex_store_indx_1_MASK 0x7e0000
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_vertex_store_indx_1__SHIFT 0x11
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_vertex_store_indx_0_MASK 0x1f800000
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_vertex_store_indx_0__SHIFT 0x17
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG06__clipsm1_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_clip_code_or_MASK 0x3fff
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_clip_code_or__SHIFT 0x0
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_event_id_MASK 0xfc000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_event_id__SHIFT 0xe
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_state_var_indx_MASK 0x700000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_state_var_indx__SHIFT 0x14
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_clip_primitive_MASK 0x800000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_clip_primitive__SHIFT 0x17
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_deallocate_slot_MASK 0x7000000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_deallocate_slot__SHIFT 0x18
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_first_prim_of_slot_MASK 0x8000000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_first_prim_of_slot__SHIFT 0x1b
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_end_of_packet_MASK 0x10000000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_end_of_packet__SHIFT 0x1c
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG07__clipsm2_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_param_cache_indx_0_MASK 0x7fe
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_param_cache_indx_0__SHIFT 0x1
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_vertex_store_indx_2_MASK 0x1f800
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_vertex_store_indx_2__SHIFT 0xb
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_vertex_store_indx_1_MASK 0x7e0000
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_vertex_store_indx_1__SHIFT 0x11
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_vertex_store_indx_0_MASK 0x1f800000
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_vertex_store_indx_0__SHIFT 0x17
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG08__clipsm2_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_clip_code_or_MASK 0x3fff
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_clip_code_or__SHIFT 0x0
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_event_id_MASK 0xfc000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_event_id__SHIFT 0xe
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_state_var_indx_MASK 0x700000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_state_var_indx__SHIFT 0x14
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_clip_primitive_MASK 0x800000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_clip_primitive__SHIFT 0x17
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_deallocate_slot_MASK 0x7000000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_deallocate_slot__SHIFT 0x18
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_first_prim_of_slot_MASK 0x8000000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_first_prim_of_slot__SHIFT 0x1b
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_end_of_packet_MASK 0x10000000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_end_of_packet__SHIFT 0x1c
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG09__clipsm3_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_param_cache_indx_0_MASK 0x7fe
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_param_cache_indx_0__SHIFT 0x1
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_vertex_store_indx_2_MASK 0x1f800
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_vertex_store_indx_2__SHIFT 0xb
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_vertex_store_indx_1_MASK 0x7e0000
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_vertex_store_indx_1__SHIFT 0x11
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_vertex_store_indx_0_MASK 0x1f800000
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_vertex_store_indx_0__SHIFT 0x17
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_event_MASK 0x20000000
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_event__SHIFT 0x1d
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_null_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_null_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG10__clipsm3_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_event_MASK 0x1
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_event__SHIFT 0x0
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_event_MASK 0x2
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_event__SHIFT 0x1
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_event_MASK 0x4
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_event__SHIFT 0x2
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_event_MASK 0x8
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_event__SHIFT 0x3
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_clip_primitive_MASK 0x10
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_clip_primitive__SHIFT 0x4
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_clip_primitive_MASK 0x20
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_clip_primitive__SHIFT 0x5
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_clip_primitive_MASK 0x40
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_clip_primitive__SHIFT 0x6
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_clip_primitive_MASK 0x80
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_clip_primitive__SHIFT 0x7
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_clip_to_outsm_cnt_MASK 0xf00
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0x8
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_clip_to_outsm_cnt_MASK 0xf000
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0xc
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_clip_to_outsm_cnt_MASK 0xf0000
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0x10
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_clip_to_outsm_cnt_MASK 0xf00000
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0x14
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_prim_valid_MASK 0x1000000
#define CLIPPER_DEBUG_REG11__clipsm3_clip_to_clipga_prim_valid__SHIFT 0x18
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_prim_valid_MASK 0x2000000
#define CLIPPER_DEBUG_REG11__clipsm2_clip_to_clipga_prim_valid__SHIFT 0x19
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_prim_valid_MASK 0x4000000
#define CLIPPER_DEBUG_REG11__clipsm1_clip_to_clipga_prim_valid__SHIFT 0x1a
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_prim_valid_MASK 0x8000000
#define CLIPPER_DEBUG_REG11__clipsm0_clip_to_clipga_prim_valid__SHIFT 0x1b
#define CLIPPER_DEBUG_REG11__clipsm3_inc_clip_to_clipga_clip_to_outsm_cnt_MASK 0x10000000
#define CLIPPER_DEBUG_REG11__clipsm3_inc_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0x1c
#define CLIPPER_DEBUG_REG11__clipsm2_inc_clip_to_clipga_clip_to_outsm_cnt_MASK 0x20000000
#define CLIPPER_DEBUG_REG11__clipsm2_inc_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0x1d
#define CLIPPER_DEBUG_REG11__clipsm1_inc_clip_to_clipga_clip_to_outsm_cnt_MASK 0x40000000
#define CLIPPER_DEBUG_REG11__clipsm1_inc_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0x1e
#define CLIPPER_DEBUG_REG11__clipsm0_inc_clip_to_clipga_clip_to_outsm_cnt_MASK 0x80000000
#define CLIPPER_DEBUG_REG11__clipsm0_inc_clip_to_clipga_clip_to_outsm_cnt__SHIFT 0x1f
#define CLIPPER_DEBUG_REG12__ALWAYS_ZERO_MASK 0xff
#define CLIPPER_DEBUG_REG12__ALWAYS_ZERO__SHIFT 0x0
#define CLIPPER_DEBUG_REG12__clip_priority_available_vte_out_clip_MASK 0x1f00
#define CLIPPER_DEBUG_REG12__clip_priority_available_vte_out_clip__SHIFT 0x8
#define CLIPPER_DEBUG_REG12__clip_priority_available_clip_verts_MASK 0x3e000
#define CLIPPER_DEBUG_REG12__clip_priority_available_clip_verts__SHIFT 0xd
#define CLIPPER_DEBUG_REG12__clip_priority_seq_indx_out_MASK 0xc0000
#define CLIPPER_DEBUG_REG12__clip_priority_seq_indx_out__SHIFT 0x12
#define CLIPPER_DEBUG_REG12__clip_priority_seq_indx_vert_MASK 0x300000
#define CLIPPER_DEBUG_REG12__clip_priority_seq_indx_vert__SHIFT 0x14
#define CLIPPER_DEBUG_REG12__clip_priority_seq_indx_load_MASK 0xc00000
#define CLIPPER_DEBUG_REG12__clip_priority_seq_indx_load__SHIFT 0x16
#define CLIPPER_DEBUG_REG12__clipsm3_clprim_to_clip_clip_primitive_MASK 0x1000000
#define CLIPPER_DEBUG_REG12__clipsm3_clprim_to_clip_clip_primitive__SHIFT 0x18
#define CLIPPER_DEBUG_REG12__clipsm3_clprim_to_clip_prim_valid_MASK 0x2000000
#define CLIPPER_DEBUG_REG12__clipsm3_clprim_to_clip_prim_valid__SHIFT 0x19
#define CLIPPER_DEBUG_REG12__clipsm2_clprim_to_clip_clip_primitive_MASK 0x4000000
#define CLIPPER_DEBUG_REG12__clipsm2_clprim_to_clip_clip_primitive__SHIFT 0x1a
#define CLIPPER_DEBUG_REG12__clipsm2_clprim_to_clip_prim_valid_MASK 0x8000000
#define CLIPPER_DEBUG_REG12__clipsm2_clprim_to_clip_prim_valid__SHIFT 0x1b
#define CLIPPER_DEBUG_REG12__clipsm1_clprim_to_clip_clip_primitive_MASK 0x10000000
#define CLIPPER_DEBUG_REG12__clipsm1_clprim_to_clip_clip_primitive__SHIFT 0x1c
#define CLIPPER_DEBUG_REG12__clipsm1_clprim_to_clip_prim_valid_MASK 0x20000000
#define CLIPPER_DEBUG_REG12__clipsm1_clprim_to_clip_prim_valid__SHIFT 0x1d
#define CLIPPER_DEBUG_REG12__clipsm0_clprim_to_clip_clip_primitive_MASK 0x40000000
#define CLIPPER_DEBUG_REG12__clipsm0_clprim_to_clip_clip_primitive__SHIFT 0x1e
#define CLIPPER_DEBUG_REG12__clipsm0_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG12__clipsm0_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG13__clprim_in_back_state_var_indx_MASK 0x7
#define CLIPPER_DEBUG_REG13__clprim_in_back_state_var_indx__SHIFT 0x0
#define CLIPPER_DEBUG_REG13__point_clip_candidate_MASK 0x8
#define CLIPPER_DEBUG_REG13__point_clip_candidate__SHIFT 0x3
#define CLIPPER_DEBUG_REG13__prim_nan_kill_MASK 0x10
#define CLIPPER_DEBUG_REG13__prim_nan_kill__SHIFT 0x4
#define CLIPPER_DEBUG_REG13__clprim_clip_primitive_MASK 0x20
#define CLIPPER_DEBUG_REG13__clprim_clip_primitive__SHIFT 0x5
#define CLIPPER_DEBUG_REG13__clprim_cull_primitive_MASK 0x40
#define CLIPPER_DEBUG_REG13__clprim_cull_primitive__SHIFT 0x6
#define CLIPPER_DEBUG_REG13__prim_back_valid_MASK 0x80
#define CLIPPER_DEBUG_REG13__prim_back_valid__SHIFT 0x7
#define CLIPPER_DEBUG_REG13__vertval_bits_vertex_cc_next_valid_MASK 0xf00
#define CLIPPER_DEBUG_REG13__vertval_bits_vertex_cc_next_valid__SHIFT 0x8
#define CLIPPER_DEBUG_REG13__clipcc_vertex_store_indx_MASK 0x3000
#define CLIPPER_DEBUG_REG13__clipcc_vertex_store_indx__SHIFT 0xc
#define CLIPPER_DEBUG_REG13__vte_out_orig_fifo_fifo_empty_MASK 0x4000
#define CLIPPER_DEBUG_REG13__vte_out_orig_fifo_fifo_empty__SHIFT 0xe
#define CLIPPER_DEBUG_REG13__clipcode_fifo_fifo_empty_MASK 0x8000
#define CLIPPER_DEBUG_REG13__clipcode_fifo_fifo_empty__SHIFT 0xf
#define CLIPPER_DEBUG_REG13__ccgen_to_clipcc_fifo_empty_MASK 0x10000
#define CLIPPER_DEBUG_REG13__ccgen_to_clipcc_fifo_empty__SHIFT 0x10
#define CLIPPER_DEBUG_REG13__clip_priority_seq_indx_out_cnt_MASK 0x1e0000
#define CLIPPER_DEBUG_REG13__clip_priority_seq_indx_out_cnt__SHIFT 0x11
#define CLIPPER_DEBUG_REG13__outsm_clr_rd_orig_vertices_MASK 0x600000
#define CLIPPER_DEBUG_REG13__outsm_clr_rd_orig_vertices__SHIFT 0x15
#define CLIPPER_DEBUG_REG13__outsm_clr_rd_clipsm_wait_MASK 0x800000
#define CLIPPER_DEBUG_REG13__outsm_clr_rd_clipsm_wait__SHIFT 0x17
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_contents_MASK 0x1f000000
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_contents__SHIFT 0x18
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_full_MASK 0x20000000
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_full__SHIFT 0x1d
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_advanceread_MASK 0x40000000
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_advanceread__SHIFT 0x1e
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_write_MASK 0x80000000
#define CLIPPER_DEBUG_REG13__outsm_clr_fifo_write__SHIFT 0x1f
#define CLIPPER_DEBUG_REG14__clprim_in_back_vertex_store_indx_2_MASK 0x3f
#define CLIPPER_DEBUG_REG14__clprim_in_back_vertex_store_indx_2__SHIFT 0x0
#define CLIPPER_DEBUG_REG14__clprim_in_back_vertex_store_indx_1_MASK 0xfc0
#define CLIPPER_DEBUG_REG14__clprim_in_back_vertex_store_indx_1__SHIFT 0x6
#define CLIPPER_DEBUG_REG14__clprim_in_back_vertex_store_indx_0_MASK 0x3f000
#define CLIPPER_DEBUG_REG14__clprim_in_back_vertex_store_indx_0__SHIFT 0xc
#define CLIPPER_DEBUG_REG14__outputclprimtoclip_null_primitive_MASK 0x40000
#define CLIPPER_DEBUG_REG14__outputclprimtoclip_null_primitive__SHIFT 0x12
#define CLIPPER_DEBUG_REG14__clprim_in_back_end_of_packet_MASK 0x80000
#define CLIPPER_DEBUG_REG14__clprim_in_back_end_of_packet__SHIFT 0x13
#define CLIPPER_DEBUG_REG14__clprim_in_back_first_prim_of_slot_MASK 0x100000
#define CLIPPER_DEBUG_REG14__clprim_in_back_first_prim_of_slot__SHIFT 0x14
#define CLIPPER_DEBUG_REG14__clprim_in_back_deallocate_slot_MASK 0xe00000
#define CLIPPER_DEBUG_REG14__clprim_in_back_deallocate_slot__SHIFT 0x15
#define CLIPPER_DEBUG_REG14__clprim_in_back_event_id_MASK 0x3f000000
#define CLIPPER_DEBUG_REG14__clprim_in_back_event_id__SHIFT 0x18
#define CLIPPER_DEBUG_REG14__clprim_in_back_event_MASK 0x40000000
#define CLIPPER_DEBUG_REG14__clprim_in_back_event__SHIFT 0x1e
#define CLIPPER_DEBUG_REG14__prim_back_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG14__prim_back_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG15__vertval_bits_vertex_vertex_store_msb_MASK 0xffff
#define CLIPPER_DEBUG_REG15__vertval_bits_vertex_vertex_store_msb__SHIFT 0x0
#define CLIPPER_DEBUG_REG15__primic_to_clprim_fifo_vertex_store_indx_2_MASK 0x1f0000
#define CLIPPER_DEBUG_REG15__primic_to_clprim_fifo_vertex_store_indx_2__SHIFT 0x10
#define CLIPPER_DEBUG_REG15__primic_to_clprim_fifo_vertex_store_indx_1_MASK 0x3e00000
#define CLIPPER_DEBUG_REG15__primic_to_clprim_fifo_vertex_store_indx_1__SHIFT 0x15
#define CLIPPER_DEBUG_REG15__primic_to_clprim_fifo_vertex_store_indx_0_MASK 0x7c000000
#define CLIPPER_DEBUG_REG15__primic_to_clprim_fifo_vertex_store_indx_0__SHIFT 0x1a
#define CLIPPER_DEBUG_REG15__primic_to_clprim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG15__primic_to_clprim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG16__sm0_prim_end_state_MASK 0x7f
#define CLIPPER_DEBUG_REG16__sm0_prim_end_state__SHIFT 0x0
#define CLIPPER_DEBUG_REG16__sm0_ps_expand_MASK 0x80
#define CLIPPER_DEBUG_REG16__sm0_ps_expand__SHIFT 0x7
#define CLIPPER_DEBUG_REG16__sm0_clip_vert_cnt_MASK 0x1f00
#define CLIPPER_DEBUG_REG16__sm0_clip_vert_cnt__SHIFT 0x8
#define CLIPPER_DEBUG_REG16__sm0_vertex_clip_cnt_MASK 0x3e000
#define CLIPPER_DEBUG_REG16__sm0_vertex_clip_cnt__SHIFT 0xd
#define CLIPPER_DEBUG_REG16__sm0_inv_to_clip_data_valid_1_MASK 0x40000
#define CLIPPER_DEBUG_REG16__sm0_inv_to_clip_data_valid_1__SHIFT 0x12
#define CLIPPER_DEBUG_REG16__sm0_inv_to_clip_data_valid_0_MASK 0x80000
#define CLIPPER_DEBUG_REG16__sm0_inv_to_clip_data_valid_0__SHIFT 0x13
#define CLIPPER_DEBUG_REG16__sm0_current_state_MASK 0x7f00000
#define CLIPPER_DEBUG_REG16__sm0_current_state__SHIFT 0x14
#define CLIPPER_DEBUG_REG16__sm0_clip_to_clipga_clip_to_outsm_cnt_eq0_MASK 0x8000000
#define CLIPPER_DEBUG_REG16__sm0_clip_to_clipga_clip_to_outsm_cnt_eq0__SHIFT 0x1b
#define CLIPPER_DEBUG_REG16__sm0_clip_to_outsm_fifo_full_MASK 0x10000000
#define CLIPPER_DEBUG_REG16__sm0_clip_to_outsm_fifo_full__SHIFT 0x1c
#define CLIPPER_DEBUG_REG16__sm0_highest_priority_seq_MASK 0x20000000
#define CLIPPER_DEBUG_REG16__sm0_highest_priority_seq__SHIFT 0x1d
#define CLIPPER_DEBUG_REG16__sm0_outputcliptoclipga_0_MASK 0x40000000
#define CLIPPER_DEBUG_REG16__sm0_outputcliptoclipga_0__SHIFT 0x1e
#define CLIPPER_DEBUG_REG16__sm0_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG16__sm0_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG17__sm1_prim_end_state_MASK 0x7f
#define CLIPPER_DEBUG_REG17__sm1_prim_end_state__SHIFT 0x0
#define CLIPPER_DEBUG_REG17__sm1_ps_expand_MASK 0x80
#define CLIPPER_DEBUG_REG17__sm1_ps_expand__SHIFT 0x7
#define CLIPPER_DEBUG_REG17__sm1_clip_vert_cnt_MASK 0x1f00
#define CLIPPER_DEBUG_REG17__sm1_clip_vert_cnt__SHIFT 0x8
#define CLIPPER_DEBUG_REG17__sm1_vertex_clip_cnt_MASK 0x3e000
#define CLIPPER_DEBUG_REG17__sm1_vertex_clip_cnt__SHIFT 0xd
#define CLIPPER_DEBUG_REG17__sm1_inv_to_clip_data_valid_1_MASK 0x40000
#define CLIPPER_DEBUG_REG17__sm1_inv_to_clip_data_valid_1__SHIFT 0x12
#define CLIPPER_DEBUG_REG17__sm1_inv_to_clip_data_valid_0_MASK 0x80000
#define CLIPPER_DEBUG_REG17__sm1_inv_to_clip_data_valid_0__SHIFT 0x13
#define CLIPPER_DEBUG_REG17__sm1_current_state_MASK 0x7f00000
#define CLIPPER_DEBUG_REG17__sm1_current_state__SHIFT 0x14
#define CLIPPER_DEBUG_REG17__sm1_clip_to_clipga_clip_to_outsm_cnt_eq0_MASK 0x8000000
#define CLIPPER_DEBUG_REG17__sm1_clip_to_clipga_clip_to_outsm_cnt_eq0__SHIFT 0x1b
#define CLIPPER_DEBUG_REG17__sm1_clip_to_outsm_fifo_full_MASK 0x10000000
#define CLIPPER_DEBUG_REG17__sm1_clip_to_outsm_fifo_full__SHIFT 0x1c
#define CLIPPER_DEBUG_REG17__sm1_highest_priority_seq_MASK 0x20000000
#define CLIPPER_DEBUG_REG17__sm1_highest_priority_seq__SHIFT 0x1d
#define CLIPPER_DEBUG_REG17__sm1_outputcliptoclipga_0_MASK 0x40000000
#define CLIPPER_DEBUG_REG17__sm1_outputcliptoclipga_0__SHIFT 0x1e
#define CLIPPER_DEBUG_REG17__sm1_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG17__sm1_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG18__sm2_prim_end_state_MASK 0x7f
#define CLIPPER_DEBUG_REG18__sm2_prim_end_state__SHIFT 0x0
#define CLIPPER_DEBUG_REG18__sm2_ps_expand_MASK 0x80
#define CLIPPER_DEBUG_REG18__sm2_ps_expand__SHIFT 0x7
#define CLIPPER_DEBUG_REG18__sm2_clip_vert_cnt_MASK 0x1f00
#define CLIPPER_DEBUG_REG18__sm2_clip_vert_cnt__SHIFT 0x8
#define CLIPPER_DEBUG_REG18__sm2_vertex_clip_cnt_MASK 0x3e000
#define CLIPPER_DEBUG_REG18__sm2_vertex_clip_cnt__SHIFT 0xd
#define CLIPPER_DEBUG_REG18__sm2_inv_to_clip_data_valid_1_MASK 0x40000
#define CLIPPER_DEBUG_REG18__sm2_inv_to_clip_data_valid_1__SHIFT 0x12
#define CLIPPER_DEBUG_REG18__sm2_inv_to_clip_data_valid_0_MASK 0x80000
#define CLIPPER_DEBUG_REG18__sm2_inv_to_clip_data_valid_0__SHIFT 0x13
#define CLIPPER_DEBUG_REG18__sm2_current_state_MASK 0x7f00000
#define CLIPPER_DEBUG_REG18__sm2_current_state__SHIFT 0x14
#define CLIPPER_DEBUG_REG18__sm2_clip_to_clipga_clip_to_outsm_cnt_eq0_MASK 0x8000000
#define CLIPPER_DEBUG_REG18__sm2_clip_to_clipga_clip_to_outsm_cnt_eq0__SHIFT 0x1b
#define CLIPPER_DEBUG_REG18__sm2_clip_to_outsm_fifo_full_MASK 0x10000000
#define CLIPPER_DEBUG_REG18__sm2_clip_to_outsm_fifo_full__SHIFT 0x1c
#define CLIPPER_DEBUG_REG18__sm2_highest_priority_seq_MASK 0x20000000
#define CLIPPER_DEBUG_REG18__sm2_highest_priority_seq__SHIFT 0x1d
#define CLIPPER_DEBUG_REG18__sm2_outputcliptoclipga_0_MASK 0x40000000
#define CLIPPER_DEBUG_REG18__sm2_outputcliptoclipga_0__SHIFT 0x1e
#define CLIPPER_DEBUG_REG18__sm2_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG18__sm2_clprim_to_clip_prim_valid__SHIFT 0x1f
#define CLIPPER_DEBUG_REG19__sm3_prim_end_state_MASK 0x7f
#define CLIPPER_DEBUG_REG19__sm3_prim_end_state__SHIFT 0x0
#define CLIPPER_DEBUG_REG19__sm3_ps_expand_MASK 0x80
#define CLIPPER_DEBUG_REG19__sm3_ps_expand__SHIFT 0x7
#define CLIPPER_DEBUG_REG19__sm3_clip_vert_cnt_MASK 0x1f00
#define CLIPPER_DEBUG_REG19__sm3_clip_vert_cnt__SHIFT 0x8
#define CLIPPER_DEBUG_REG19__sm3_vertex_clip_cnt_MASK 0x3e000
#define CLIPPER_DEBUG_REG19__sm3_vertex_clip_cnt__SHIFT 0xd
#define CLIPPER_DEBUG_REG19__sm3_inv_to_clip_data_valid_1_MASK 0x40000
#define CLIPPER_DEBUG_REG19__sm3_inv_to_clip_data_valid_1__SHIFT 0x12
#define CLIPPER_DEBUG_REG19__sm3_inv_to_clip_data_valid_0_MASK 0x80000
#define CLIPPER_DEBUG_REG19__sm3_inv_to_clip_data_valid_0__SHIFT 0x13
#define CLIPPER_DEBUG_REG19__sm3_current_state_MASK 0x7f00000
#define CLIPPER_DEBUG_REG19__sm3_current_state__SHIFT 0x14
#define CLIPPER_DEBUG_REG19__sm3_clip_to_clipga_clip_to_outsm_cnt_eq0_MASK 0x8000000
#define CLIPPER_DEBUG_REG19__sm3_clip_to_clipga_clip_to_outsm_cnt_eq0__SHIFT 0x1b
#define CLIPPER_DEBUG_REG19__sm3_clip_to_outsm_fifo_full_MASK 0x10000000
#define CLIPPER_DEBUG_REG19__sm3_clip_to_outsm_fifo_full__SHIFT 0x1c
#define CLIPPER_DEBUG_REG19__sm3_highest_priority_seq_MASK 0x20000000
#define CLIPPER_DEBUG_REG19__sm3_highest_priority_seq__SHIFT 0x1d
#define CLIPPER_DEBUG_REG19__sm3_outputcliptoclipga_0_MASK 0x40000000
#define CLIPPER_DEBUG_REG19__sm3_outputcliptoclipga_0__SHIFT 0x1e
#define CLIPPER_DEBUG_REG19__sm3_clprim_to_clip_prim_valid_MASK 0x80000000
#define CLIPPER_DEBUG_REG19__sm3_clprim_to_clip_prim_valid__SHIFT 0x1f
#define SXIFCCG_DEBUG_REG0__position_address_MASK 0x3f
#define SXIFCCG_DEBUG_REG0__position_address__SHIFT 0x0
#define SXIFCCG_DEBUG_REG0__point_address_MASK 0x1c0
#define SXIFCCG_DEBUG_REG0__point_address__SHIFT 0x6
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_state_var_indx_MASK 0xe00
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_state_var_indx__SHIFT 0x9
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_req_mask_MASK 0xf000
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_req_mask__SHIFT 0xc
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_pci_MASK 0x3ff0000
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_pci__SHIFT 0x10
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_aux_sel_MASK 0xc000000
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_aux_sel__SHIFT 0x1a
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_sp_id_MASK 0x30000000
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_sp_id__SHIFT 0x1c
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_aux_inc_MASK 0x40000000
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_aux_inc__SHIFT 0x1e
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_advance_MASK 0x80000000
#define SXIFCCG_DEBUG_REG0__sx_pending_rd_advance__SHIFT 0x1f
#define SXIFCCG_DEBUG_REG1__available_positions_MASK 0x7f
#define SXIFCCG_DEBUG_REG1__available_positions__SHIFT 0x0
#define SXIFCCG_DEBUG_REG1__sx_receive_indx_MASK 0x380
#define SXIFCCG_DEBUG_REG1__sx_receive_indx__SHIFT 0x7
#define SXIFCCG_DEBUG_REG1__sx_pending_fifo_contents_MASK 0x7c00
#define SXIFCCG_DEBUG_REG1__sx_pending_fifo_contents__SHIFT 0xa
#define SXIFCCG_DEBUG_REG1__statevar_bits_vs_out_misc_vec_ena_MASK 0x8000
#define SXIFCCG_DEBUG_REG1__statevar_bits_vs_out_misc_vec_ena__SHIFT 0xf
#define SXIFCCG_DEBUG_REG1__statevar_bits_disable_sp_MASK 0xf0000
#define SXIFCCG_DEBUG_REG1__statevar_bits_disable_sp__SHIFT 0x10
#define SXIFCCG_DEBUG_REG1__aux_sel_MASK 0x300000
#define SXIFCCG_DEBUG_REG1__aux_sel__SHIFT 0x14
#define SXIFCCG_DEBUG_REG1__sx_to_pa_empty_1_MASK 0x400000
#define SXIFCCG_DEBUG_REG1__sx_to_pa_empty_1__SHIFT 0x16
#define SXIFCCG_DEBUG_REG1__sx_to_pa_empty_0_MASK 0x800000
#define SXIFCCG_DEBUG_REG1__sx_to_pa_empty_0__SHIFT 0x17
#define SXIFCCG_DEBUG_REG1__pasx_req_cnt_1_MASK 0xf000000
#define SXIFCCG_DEBUG_REG1__pasx_req_cnt_1__SHIFT 0x18
#define SXIFCCG_DEBUG_REG1__pasx_req_cnt_0_MASK 0xf0000000
#define SXIFCCG_DEBUG_REG1__pasx_req_cnt_0__SHIFT 0x1c
#define SXIFCCG_DEBUG_REG2__param_cache_base_MASK 0x7f
#define SXIFCCG_DEBUG_REG2__param_cache_base__SHIFT 0x0
#define SXIFCCG_DEBUG_REG2__sx_aux_MASK 0x180
#define SXIFCCG_DEBUG_REG2__sx_aux__SHIFT 0x7
#define SXIFCCG_DEBUG_REG2__sx_request_indx_MASK 0x7e00
#define SXIFCCG_DEBUG_REG2__sx_request_indx__SHIFT 0x9
#define SXIFCCG_DEBUG_REG2__req_active_verts_loaded_MASK 0x8000
#define SXIFCCG_DEBUG_REG2__req_active_verts_loaded__SHIFT 0xf
#define SXIFCCG_DEBUG_REG2__req_active_verts_MASK 0x7f0000
#define SXIFCCG_DEBUG_REG2__req_active_verts__SHIFT 0x10
#define SXIFCCG_DEBUG_REG2__vgt_to_ccgen_state_var_indx_MASK 0x3800000
#define SXIFCCG_DEBUG_REG2__vgt_to_ccgen_state_var_indx__SHIFT 0x17
#define SXIFCCG_DEBUG_REG2__vgt_to_ccgen_active_verts_MASK 0xfc000000
#define SXIFCCG_DEBUG_REG2__vgt_to_ccgen_active_verts__SHIFT 0x1a
#define SXIFCCG_DEBUG_REG3__ALWAYS_ZERO_MASK 0xff
#define SXIFCCG_DEBUG_REG3__ALWAYS_ZERO__SHIFT 0x0
#define SXIFCCG_DEBUG_REG3__vertex_fifo_entriesavailable_MASK 0xf00
#define SXIFCCG_DEBUG_REG3__vertex_fifo_entriesavailable__SHIFT 0x8
#define SXIFCCG_DEBUG_REG3__statevar_bits_vs_out_ccdist1_vec_ena_MASK 0x1000
#define SXIFCCG_DEBUG_REG3__statevar_bits_vs_out_ccdist1_vec_ena__SHIFT 0xc
#define SXIFCCG_DEBUG_REG3__statevar_bits_vs_out_ccdist0_vec_ena_MASK 0x2000
#define SXIFCCG_DEBUG_REG3__statevar_bits_vs_out_ccdist0_vec_ena__SHIFT 0xd
#define SXIFCCG_DEBUG_REG3__available_positions_MASK 0x1fc000
#define SXIFCCG_DEBUG_REG3__available_positions__SHIFT 0xe
#define SXIFCCG_DEBUG_REG3__current_state_MASK 0x600000
#define SXIFCCG_DEBUG_REG3__current_state__SHIFT 0x15
#define SXIFCCG_DEBUG_REG3__vertex_fifo_empty_MASK 0x800000
#define SXIFCCG_DEBUG_REG3__vertex_fifo_empty__SHIFT 0x17
#define SXIFCCG_DEBUG_REG3__vertex_fifo_full_MASK 0x1000000
#define SXIFCCG_DEBUG_REG3__vertex_fifo_full__SHIFT 0x18
#define SXIFCCG_DEBUG_REG3__sx0_receive_fifo_empty_MASK 0x2000000
#define SXIFCCG_DEBUG_REG3__sx0_receive_fifo_empty__SHIFT 0x19
#define SXIFCCG_DEBUG_REG3__sx0_receive_fifo_full_MASK 0x4000000
#define SXIFCCG_DEBUG_REG3__sx0_receive_fifo_full__SHIFT 0x1a
#define SXIFCCG_DEBUG_REG3__vgt_to_ccgen_fifo_empty_MASK 0x8000000
#define SXIFCCG_DEBUG_REG3__vgt_to_ccgen_fifo_empty__SHIFT 0x1b
#define SXIFCCG_DEBUG_REG3__vgt_to_ccgen_fifo_full_MASK 0x10000000
#define SXIFCCG_DEBUG_REG3__vgt_to_ccgen_fifo_full__SHIFT 0x1c
#define SXIFCCG_DEBUG_REG3__ccgen_to_clipcc_fifo_full_MASK 0x20000000
#define SXIFCCG_DEBUG_REG3__ccgen_to_clipcc_fifo_full__SHIFT 0x1d
#define SXIFCCG_DEBUG_REG3__sx0_receive_fifo_write_MASK 0x40000000
#define SXIFCCG_DEBUG_REG3__sx0_receive_fifo_write__SHIFT 0x1e
#define SXIFCCG_DEBUG_REG3__ccgen_to_clipcc_write_MASK 0x80000000
#define SXIFCCG_DEBUG_REG3__ccgen_to_clipcc_write__SHIFT 0x1f
#define SETUP_DEBUG_REG0__su_baryc_cntl_state_MASK 0x3
#define SETUP_DEBUG_REG0__su_baryc_cntl_state__SHIFT 0x0
#define SETUP_DEBUG_REG0__su_cntl_state_MASK 0x3c
#define SETUP_DEBUG_REG0__su_cntl_state__SHIFT 0x2
#define SETUP_DEBUG_REG0__pmode_state_MASK 0x3f00
#define SETUP_DEBUG_REG0__pmode_state__SHIFT 0x8
#define SETUP_DEBUG_REG0__ge_stallb_MASK 0x4000
#define SETUP_DEBUG_REG0__ge_stallb__SHIFT 0xe
#define SETUP_DEBUG_REG0__geom_enable_MASK 0x8000
#define SETUP_DEBUG_REG0__geom_enable__SHIFT 0xf
#define SETUP_DEBUG_REG0__su_clip_baryc_free_MASK 0x30000
#define SETUP_DEBUG_REG0__su_clip_baryc_free__SHIFT 0x10
#define SETUP_DEBUG_REG0__su_clip_rtr_MASK 0x40000
#define SETUP_DEBUG_REG0__su_clip_rtr__SHIFT 0x12
#define SETUP_DEBUG_REG0__pfifo_busy_MASK 0x80000
#define SETUP_DEBUG_REG0__pfifo_busy__SHIFT 0x13
#define SETUP_DEBUG_REG0__su_cntl_busy_MASK 0x100000
#define SETUP_DEBUG_REG0__su_cntl_busy__SHIFT 0x14
#define SETUP_DEBUG_REG0__geom_busy_MASK 0x200000
#define SETUP_DEBUG_REG0__geom_busy__SHIFT 0x15
#define SETUP_DEBUG_REG0__event_id_gated_MASK 0xfc00000
#define SETUP_DEBUG_REG0__event_id_gated__SHIFT 0x16
#define SETUP_DEBUG_REG0__event_gated_MASK 0x10000000
#define SETUP_DEBUG_REG0__event_gated__SHIFT 0x1c
#define SETUP_DEBUG_REG0__pmode_prim_gated_MASK 0x20000000
#define SETUP_DEBUG_REG0__pmode_prim_gated__SHIFT 0x1d
#define SETUP_DEBUG_REG0__su_dyn_sclk_vld_MASK 0x40000000
#define SETUP_DEBUG_REG0__su_dyn_sclk_vld__SHIFT 0x1e
#define SETUP_DEBUG_REG0__cl_dyn_sclk_vld_MASK 0x80000000
#define SETUP_DEBUG_REG0__cl_dyn_sclk_vld__SHIFT 0x1f
#define SETUP_DEBUG_REG1__y_sort0_gated_23_8_MASK 0xffff
#define SETUP_DEBUG_REG1__y_sort0_gated_23_8__SHIFT 0x0
#define SETUP_DEBUG_REG1__x_sort0_gated_23_8_MASK 0xffff0000
#define SETUP_DEBUG_REG1__x_sort0_gated_23_8__SHIFT 0x10
#define SETUP_DEBUG_REG2__y_sort1_gated_23_8_MASK 0xffff
#define SETUP_DEBUG_REG2__y_sort1_gated_23_8__SHIFT 0x0
#define SETUP_DEBUG_REG2__x_sort1_gated_23_8_MASK 0xffff0000
#define SETUP_DEBUG_REG2__x_sort1_gated_23_8__SHIFT 0x10
#define SETUP_DEBUG_REG3__y_sort2_gated_23_8_MASK 0xffff
#define SETUP_DEBUG_REG3__y_sort2_gated_23_8__SHIFT 0x0
#define SETUP_DEBUG_REG3__x_sort2_gated_23_8_MASK 0xffff0000
#define SETUP_DEBUG_REG3__x_sort2_gated_23_8__SHIFT 0x10
#define SETUP_DEBUG_REG4__attr_indx_sort0_gated_MASK 0x3fff
#define SETUP_DEBUG_REG4__attr_indx_sort0_gated__SHIFT 0x0
#define SETUP_DEBUG_REG4__null_prim_gated_MASK 0x4000
#define SETUP_DEBUG_REG4__null_prim_gated__SHIFT 0xe
#define SETUP_DEBUG_REG4__backfacing_gated_MASK 0x8000
#define SETUP_DEBUG_REG4__backfacing_gated__SHIFT 0xf
#define SETUP_DEBUG_REG4__st_indx_gated_MASK 0x70000
#define SETUP_DEBUG_REG4__st_indx_gated__SHIFT 0x10
#define SETUP_DEBUG_REG4__clipped_gated_MASK 0x80000
#define SETUP_DEBUG_REG4__clipped_gated__SHIFT 0x13
#define SETUP_DEBUG_REG4__dealloc_slot_gated_MASK 0x700000
#define SETUP_DEBUG_REG4__dealloc_slot_gated__SHIFT 0x14
#define SETUP_DEBUG_REG4__xmajor_gated_MASK 0x800000
#define SETUP_DEBUG_REG4__xmajor_gated__SHIFT 0x17
#define SETUP_DEBUG_REG4__diamond_rule_gated_MASK 0x3000000
#define SETUP_DEBUG_REG4__diamond_rule_gated__SHIFT 0x18
#define SETUP_DEBUG_REG4__type_gated_MASK 0x1c000000
#define SETUP_DEBUG_REG4__type_gated__SHIFT 0x1a
#define SETUP_DEBUG_REG4__fpov_gated_MASK 0x60000000
#define SETUP_DEBUG_REG4__fpov_gated__SHIFT 0x1d
#define SETUP_DEBUG_REG4__eop_gated_MASK 0x80000000
#define SETUP_DEBUG_REG4__eop_gated__SHIFT 0x1f
#define SETUP_DEBUG_REG5__attr_indx_sort2_gated_MASK 0x3fff
#define SETUP_DEBUG_REG5__attr_indx_sort2_gated__SHIFT 0x0
#define SETUP_DEBUG_REG5__attr_indx_sort1_gated_MASK 0xfffc000
#define SETUP_DEBUG_REG5__attr_indx_sort1_gated__SHIFT 0xe
#define SETUP_DEBUG_REG5__provoking_vtx_gated_MASK 0x30000000
#define SETUP_DEBUG_REG5__provoking_vtx_gated__SHIFT 0x1c
#define SETUP_DEBUG_REG5__valid_prim_gated_MASK 0x40000000
#define SETUP_DEBUG_REG5__valid_prim_gated__SHIFT 0x1e
#define SETUP_DEBUG_REG5__pa_reg_sclk_vld_MASK 0x80000000
#define SETUP_DEBUG_REG5__pa_reg_sclk_vld__SHIFT 0x1f
#define PA_SC_DEBUG_REG0__REG0_FIELD0_MASK 0x3
#define PA_SC_DEBUG_REG0__REG0_FIELD0__SHIFT 0x0
#define PA_SC_DEBUG_REG0__REG0_FIELD1_MASK 0xc
#define PA_SC_DEBUG_REG0__REG0_FIELD1__SHIFT 0x2
#define PA_SC_DEBUG_REG1__REG1_FIELD0_MASK 0x3
#define PA_SC_DEBUG_REG1__REG1_FIELD0__SHIFT 0x0
#define PA_SC_DEBUG_REG1__REG1_FIELD1_MASK 0xc
#define PA_SC_DEBUG_REG1__REG1_FIELD1__SHIFT 0x2
#define COMPUTE_DISPATCH_INITIATOR__COMPUTE_SHADER_EN_MASK 0x1
#define COMPUTE_DISPATCH_INITIATOR__COMPUTE_SHADER_EN__SHIFT 0x0
#define COMPUTE_DISPATCH_INITIATOR__PARTIAL_TG_EN_MASK 0x2
#define COMPUTE_DISPATCH_INITIATOR__PARTIAL_TG_EN__SHIFT 0x1
#define COMPUTE_DISPATCH_INITIATOR__FORCE_START_AT_000_MASK 0x4
#define COMPUTE_DISPATCH_INITIATOR__FORCE_START_AT_000__SHIFT 0x2
#define COMPUTE_DISPATCH_INITIATOR__ORDERED_APPEND_ENBL_MASK 0x8
#define COMPUTE_DISPATCH_INITIATOR__ORDERED_APPEND_ENBL__SHIFT 0x3
#define COMPUTE_DISPATCH_INITIATOR__ORDERED_APPEND_MODE_MASK 0x10
#define COMPUTE_DISPATCH_INITIATOR__ORDERED_APPEND_MODE__SHIFT 0x4
#define COMPUTE_DISPATCH_INITIATOR__USE_THREAD_DIMENSIONS_MASK 0x20
#define COMPUTE_DISPATCH_INITIATOR__USE_THREAD_DIMENSIONS__SHIFT 0x5
#define COMPUTE_DISPATCH_INITIATOR__ORDER_MODE_MASK 0x40
#define COMPUTE_DISPATCH_INITIATOR__ORDER_MODE__SHIFT 0x6
#define COMPUTE_DISPATCH_INITIATOR__DISPATCH_CACHE_CNTL_MASK 0x380
#define COMPUTE_DISPATCH_INITIATOR__DISPATCH_CACHE_CNTL__SHIFT 0x7
#define COMPUTE_DISPATCH_INITIATOR__SCALAR_L1_INV_VOL_MASK 0x400
#define COMPUTE_DISPATCH_INITIATOR__SCALAR_L1_INV_VOL__SHIFT 0xa
#define COMPUTE_DISPATCH_INITIATOR__VECTOR_L1_INV_VOL_MASK 0x800
#define COMPUTE_DISPATCH_INITIATOR__VECTOR_L1_INV_VOL__SHIFT 0xb
#define COMPUTE_DISPATCH_INITIATOR__DATA_ATC_MASK 0x1000
#define COMPUTE_DISPATCH_INITIATOR__DATA_ATC__SHIFT 0xc
#define COMPUTE_DISPATCH_INITIATOR__RESTORE_MASK 0x4000
#define COMPUTE_DISPATCH_INITIATOR__RESTORE__SHIFT 0xe
#define COMPUTE_DIM_X__SIZE_MASK 0xffffffff
#define COMPUTE_DIM_X__SIZE__SHIFT 0x0
#define COMPUTE_DIM_Y__SIZE_MASK 0xffffffff
#define COMPUTE_DIM_Y__SIZE__SHIFT 0x0
#define COMPUTE_DIM_Z__SIZE_MASK 0xffffffff
#define COMPUTE_DIM_Z__SIZE__SHIFT 0x0
#define COMPUTE_START_X__START_MASK 0xffffffff
#define COMPUTE_START_X__START__SHIFT 0x0
#define COMPUTE_START_Y__START_MASK 0xffffffff
#define COMPUTE_START_Y__START__SHIFT 0x0
#define COMPUTE_START_Z__START_MASK 0xffffffff
#define COMPUTE_START_Z__START__SHIFT 0x0
#define COMPUTE_NUM_THREAD_X__NUM_THREAD_FULL_MASK 0xffff
#define COMPUTE_NUM_THREAD_X__NUM_THREAD_FULL__SHIFT 0x0
#define COMPUTE_NUM_THREAD_X__NUM_THREAD_PARTIAL_MASK 0xffff0000
#define COMPUTE_NUM_THREAD_X__NUM_THREAD_PARTIAL__SHIFT 0x10
#define COMPUTE_NUM_THREAD_Y__NUM_THREAD_FULL_MASK 0xffff
#define COMPUTE_NUM_THREAD_Y__NUM_THREAD_FULL__SHIFT 0x0
#define COMPUTE_NUM_THREAD_Y__NUM_THREAD_PARTIAL_MASK 0xffff0000
#define COMPUTE_NUM_THREAD_Y__NUM_THREAD_PARTIAL__SHIFT 0x10
#define COMPUTE_NUM_THREAD_Z__NUM_THREAD_FULL_MASK 0xffff
#define COMPUTE_NUM_THREAD_Z__NUM_THREAD_FULL__SHIFT 0x0
#define COMPUTE_NUM_THREAD_Z__NUM_THREAD_PARTIAL_MASK 0xffff0000
#define COMPUTE_NUM_THREAD_Z__NUM_THREAD_PARTIAL__SHIFT 0x10
#define COMPUTE_PIPELINESTAT_ENABLE__PIPELINESTAT_ENABLE_MASK 0x1
#define COMPUTE_PIPELINESTAT_ENABLE__PIPELINESTAT_ENABLE__SHIFT 0x0
#define COMPUTE_PERFCOUNT_ENABLE__PERFCOUNT_ENABLE_MASK 0x1
#define COMPUTE_PERFCOUNT_ENABLE__PERFCOUNT_ENABLE__SHIFT 0x0
#define COMPUTE_PGM_LO__DATA_MASK 0xffffffff
#define COMPUTE_PGM_LO__DATA__SHIFT 0x0
#define COMPUTE_PGM_HI__DATA_MASK 0xff
#define COMPUTE_PGM_HI__DATA__SHIFT 0x0
#define COMPUTE_PGM_HI__INST_ATC_MASK 0x100
#define COMPUTE_PGM_HI__INST_ATC__SHIFT 0x8
#define COMPUTE_TBA_LO__DATA_MASK 0xffffffff
#define COMPUTE_TBA_LO__DATA__SHIFT 0x0
#define COMPUTE_TBA_HI__DATA_MASK 0xff
#define COMPUTE_TBA_HI__DATA__SHIFT 0x0
#define COMPUTE_TMA_LO__DATA_MASK 0xffffffff
#define COMPUTE_TMA_LO__DATA__SHIFT 0x0
#define COMPUTE_TMA_HI__DATA_MASK 0xff
#define COMPUTE_TMA_HI__DATA__SHIFT 0x0
#define COMPUTE_PGM_RSRC1__VGPRS_MASK 0x3f
#define COMPUTE_PGM_RSRC1__VGPRS__SHIFT 0x0
#define COMPUTE_PGM_RSRC1__SGPRS_MASK 0x3c0
#define COMPUTE_PGM_RSRC1__SGPRS__SHIFT 0x6
#define COMPUTE_PGM_RSRC1__PRIORITY_MASK 0xc00
#define COMPUTE_PGM_RSRC1__PRIORITY__SHIFT 0xa
#define COMPUTE_PGM_RSRC1__FLOAT_MODE_MASK 0xff000
#define COMPUTE_PGM_RSRC1__FLOAT_MODE__SHIFT 0xc
#define COMPUTE_PGM_RSRC1__PRIV_MASK 0x100000
#define COMPUTE_PGM_RSRC1__PRIV__SHIFT 0x14
#define COMPUTE_PGM_RSRC1__DX10_CLAMP_MASK 0x200000
#define COMPUTE_PGM_RSRC1__DX10_CLAMP__SHIFT 0x15
#define COMPUTE_PGM_RSRC1__DEBUG_MODE_MASK 0x400000
#define COMPUTE_PGM_RSRC1__DEBUG_MODE__SHIFT 0x16
#define COMPUTE_PGM_RSRC1__IEEE_MODE_MASK 0x800000
#define COMPUTE_PGM_RSRC1__IEEE_MODE__SHIFT 0x17
#define COMPUTE_PGM_RSRC1__BULKY_MASK 0x1000000
#define COMPUTE_PGM_RSRC1__BULKY__SHIFT 0x18
#define COMPUTE_PGM_RSRC1__CDBG_USER_MASK 0x2000000
#define COMPUTE_PGM_RSRC1__CDBG_USER__SHIFT 0x19
#define COMPUTE_PGM_RSRC2__SCRATCH_EN_MASK 0x1
#define COMPUTE_PGM_RSRC2__SCRATCH_EN__SHIFT 0x0
#define COMPUTE_PGM_RSRC2__USER_SGPR_MASK 0x3e
#define COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT 0x1
#define COMPUTE_PGM_RSRC2__TRAP_PRESENT_MASK 0x40
#define COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT 0x6
#define COMPUTE_PGM_RSRC2__TGID_X_EN_MASK 0x80
#define COMPUTE_PGM_RSRC2__TGID_X_EN__SHIFT 0x7
#define COMPUTE_PGM_RSRC2__TGID_Y_EN_MASK 0x100
#define COMPUTE_PGM_RSRC2__TGID_Y_EN__SHIFT 0x8
#define COMPUTE_PGM_RSRC2__TGID_Z_EN_MASK 0x200
#define COMPUTE_PGM_RSRC2__TGID_Z_EN__SHIFT 0x9
#define COMPUTE_PGM_RSRC2__TG_SIZE_EN_MASK 0x400
#define COMPUTE_PGM_RSRC2__TG_SIZE_EN__SHIFT 0xa
#define COMPUTE_PGM_RSRC2__TIDIG_COMP_CNT_MASK 0x1800
#define COMPUTE_PGM_RSRC2__TIDIG_COMP_CNT__SHIFT 0xb
#define COMPUTE_PGM_RSRC2__EXCP_EN_MSB_MASK 0x6000
#define COMPUTE_PGM_RSRC2__EXCP_EN_MSB__SHIFT 0xd
#define COMPUTE_PGM_RSRC2__LDS_SIZE_MASK 0xff8000
#define COMPUTE_PGM_RSRC2__LDS_SIZE__SHIFT 0xf
#define COMPUTE_PGM_RSRC2__EXCP_EN_MASK 0x7f000000
#define COMPUTE_PGM_RSRC2__EXCP_EN__SHIFT 0x18
#define COMPUTE_VMID__DATA_MASK 0xf
#define COMPUTE_VMID__DATA__SHIFT 0x0
#define COMPUTE_RESOURCE_LIMITS__WAVES_PER_SH_MASK 0x3ff
#define COMPUTE_RESOURCE_LIMITS__WAVES_PER_SH__SHIFT 0x0
#define COMPUTE_RESOURCE_LIMITS__TG_PER_CU_MASK 0xf000
#define COMPUTE_RESOURCE_LIMITS__TG_PER_CU__SHIFT 0xc
#define COMPUTE_RESOURCE_LIMITS__LOCK_THRESHOLD_MASK 0x3f0000
#define COMPUTE_RESOURCE_LIMITS__LOCK_THRESHOLD__SHIFT 0x10
#define COMPUTE_RESOURCE_LIMITS__SIMD_DEST_CNTL_MASK 0x400000
#define COMPUTE_RESOURCE_LIMITS__SIMD_DEST_CNTL__SHIFT 0x16
#define COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK 0x800000
#define COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST__SHIFT 0x17
#define COMPUTE_RESOURCE_LIMITS__CU_GROUP_COUNT_MASK 0x7000000
#define COMPUTE_RESOURCE_LIMITS__CU_GROUP_COUNT__SHIFT 0x18
#define COMPUTE_STATIC_THREAD_MGMT_SE0__SH0_CU_EN_MASK 0xffff
#define COMPUTE_STATIC_THREAD_MGMT_SE0__SH0_CU_EN__SHIFT 0x0
#define COMPUTE_STATIC_THREAD_MGMT_SE0__SH1_CU_EN_MASK 0xffff0000
#define COMPUTE_STATIC_THREAD_MGMT_SE0__SH1_CU_EN__SHIFT 0x10
#define COMPUTE_STATIC_THREAD_MGMT_SE1__SH0_CU_EN_MASK 0xffff
#define COMPUTE_STATIC_THREAD_MGMT_SE1__SH0_CU_EN__SHIFT 0x0
#define COMPUTE_STATIC_THREAD_MGMT_SE1__SH1_CU_EN_MASK 0xffff0000
#define COMPUTE_STATIC_THREAD_MGMT_SE1__SH1_CU_EN__SHIFT 0x10
#define COMPUTE_TMPRING_SIZE__WAVES_MASK 0xfff
#define COMPUTE_TMPRING_SIZE__WAVES__SHIFT 0x0
#define COMPUTE_TMPRING_SIZE__WAVESIZE_MASK 0x1fff000
#define COMPUTE_TMPRING_SIZE__WAVESIZE__SHIFT 0xc
#define COMPUTE_STATIC_THREAD_MGMT_SE2__SH0_CU_EN_MASK 0xffff
#define COMPUTE_STATIC_THREAD_MGMT_SE2__SH0_CU_EN__SHIFT 0x0
#define COMPUTE_STATIC_THREAD_MGMT_SE2__SH1_CU_EN_MASK 0xffff0000
#define COMPUTE_STATIC_THREAD_MGMT_SE2__SH1_CU_EN__SHIFT 0x10
#define COMPUTE_STATIC_THREAD_MGMT_SE3__SH0_CU_EN_MASK 0xffff
#define COMPUTE_STATIC_THREAD_MGMT_SE3__SH0_CU_EN__SHIFT 0x0
#define COMPUTE_STATIC_THREAD_MGMT_SE3__SH1_CU_EN_MASK 0xffff0000
#define COMPUTE_STATIC_THREAD_MGMT_SE3__SH1_CU_EN__SHIFT 0x10
#define COMPUTE_RESTART_X__RESTART_MASK 0xffffffff
#define COMPUTE_RESTART_X__RESTART__SHIFT 0x0
#define COMPUTE_RESTART_Y__RESTART_MASK 0xffffffff
#define COMPUTE_RESTART_Y__RESTART__SHIFT 0x0
#define COMPUTE_RESTART_Z__RESTART_MASK 0xffffffff
#define COMPUTE_RESTART_Z__RESTART__SHIFT 0x0
#define COMPUTE_THREAD_TRACE_ENABLE__THREAD_TRACE_ENABLE_MASK 0x1
#define COMPUTE_THREAD_TRACE_ENABLE__THREAD_TRACE_ENABLE__SHIFT 0x0
#define COMPUTE_MISC_RESERVED__SEND_SEID_MASK 0x3
#define COMPUTE_MISC_RESERVED__SEND_SEID__SHIFT 0x0
#define COMPUTE_MISC_RESERVED__RESERVED2_MASK 0x4
#define COMPUTE_MISC_RESERVED__RESERVED2__SHIFT 0x2
#define COMPUTE_MISC_RESERVED__RESERVED3_MASK 0x8
#define COMPUTE_MISC_RESERVED__RESERVED3__SHIFT 0x3
#define COMPUTE_MISC_RESERVED__RESERVED4_MASK 0x10
#define COMPUTE_MISC_RESERVED__RESERVED4__SHIFT 0x4
#define COMPUTE_USER_DATA_0__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_0__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_1__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_1__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_2__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_2__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_3__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_3__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_4__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_4__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_5__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_5__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_6__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_6__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_7__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_7__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_8__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_8__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_9__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_9__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_10__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_10__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_11__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_11__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_12__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_12__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_13__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_13__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_14__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_14__DATA__SHIFT 0x0
#define COMPUTE_USER_DATA_15__DATA_MASK 0xffffffff
#define COMPUTE_USER_DATA_15__DATA__SHIFT 0x0
#define CSPRIV_CONNECT__DOORBELL_OFFSET_MASK 0x1fffff
#define CSPRIV_CONNECT__DOORBELL_OFFSET__SHIFT 0x0
#define CSPRIV_CONNECT__QUEUE_ID_MASK 0xe00000
#define CSPRIV_CONNECT__QUEUE_ID__SHIFT 0x15
#define CSPRIV_CONNECT__VMID_MASK 0x3c000000
#define CSPRIV_CONNECT__VMID__SHIFT 0x1a
#define CSPRIV_CONNECT__UNORD_DISP_MASK 0x80000000
#define CSPRIV_CONNECT__UNORD_DISP__SHIFT 0x1f
#define CSPRIV_THREAD_TRACE_TG0__TGID_X_MASK 0xffffffff
#define CSPRIV_THREAD_TRACE_TG0__TGID_X__SHIFT 0x0
#define CSPRIV_THREAD_TRACE_TG1__TGID_Y_MASK 0xffffffff
#define CSPRIV_THREAD_TRACE_TG1__TGID_Y__SHIFT 0x0
#define CSPRIV_THREAD_TRACE_TG2__TGID_Z_MASK 0xffffffff
#define CSPRIV_THREAD_TRACE_TG2__TGID_Z__SHIFT 0x0
#define CSPRIV_THREAD_TRACE_TG3__WAVE_ID_BASE_MASK 0xfff
#define CSPRIV_THREAD_TRACE_TG3__WAVE_ID_BASE__SHIFT 0x0
#define CSPRIV_THREAD_TRACE_TG3__THREADS_IN_GROUP_MASK 0xfff000
#define CSPRIV_THREAD_TRACE_TG3__THREADS_IN_GROUP__SHIFT 0xc
#define CSPRIV_THREAD_TRACE_TG3__PARTIAL_X_FLAG_MASK 0x1000000
#define CSPRIV_THREAD_TRACE_TG3__PARTIAL_X_FLAG__SHIFT 0x18
#define CSPRIV_THREAD_TRACE_TG3__PARTIAL_Y_FLAG_MASK 0x2000000
#define CSPRIV_THREAD_TRACE_TG3__PARTIAL_Y_FLAG__SHIFT 0x19
#define CSPRIV_THREAD_TRACE_TG3__PARTIAL_Z_FLAG_MASK 0x4000000
#define CSPRIV_THREAD_TRACE_TG3__PARTIAL_Z_FLAG__SHIFT 0x1a
#define CSPRIV_THREAD_TRACE_TG3__LAST_TG_MASK 0x8000000
#define CSPRIV_THREAD_TRACE_TG3__LAST_TG__SHIFT 0x1b
#define CSPRIV_THREAD_TRACE_TG3__FIRST_TG_MASK 0x10000000
#define CSPRIV_THREAD_TRACE_TG3__FIRST_TG__SHIFT 0x1c
#define CSPRIV_THREAD_TRACE_EVENT__EVENT_ID_MASK 0x1f
#define CSPRIV_THREAD_TRACE_EVENT__EVENT_ID__SHIFT 0x0
#define RLC_CNTL__RLC_ENABLE_F32_MASK 0x1
#define RLC_CNTL__RLC_ENABLE_F32__SHIFT 0x0
#define RLC_CNTL__FORCE_RETRY_MASK 0x2
#define RLC_CNTL__FORCE_RETRY__SHIFT 0x1
#define RLC_CNTL__READ_CACHE_DISABLE_MASK 0x4
#define RLC_CNTL__READ_CACHE_DISABLE__SHIFT 0x2
#define RLC_CNTL__RLC_STEP_F32_MASK 0x8
#define RLC_CNTL__RLC_STEP_F32__SHIFT 0x3
#define RLC_CNTL__SOFT_RESET_DEBUG_MODE_MASK 0x10
#define RLC_CNTL__SOFT_RESET_DEBUG_MODE__SHIFT 0x4
#define RLC_CNTL__RESERVED_MASK 0xffffff00
#define RLC_CNTL__RESERVED__SHIFT 0x8
#define RLC_DEBUG_SELECT__SELECT_MASK 0xff
#define RLC_DEBUG_SELECT__SELECT__SHIFT 0x0
#define RLC_DEBUG_SELECT__RESERVED_MASK 0xffffff00
#define RLC_DEBUG_SELECT__RESERVED__SHIFT 0x8
#define RLC_DEBUG__DATA_MASK 0xffffffff
#define RLC_DEBUG__DATA__SHIFT 0x0
#define RLC_MC_CNTL__WRREQ_SWAP_MASK 0x3
#define RLC_MC_CNTL__WRREQ_SWAP__SHIFT 0x0
#define RLC_MC_CNTL__WRREQ_TRAN_MASK 0x4
#define RLC_MC_CNTL__WRREQ_TRAN__SHIFT 0x2
#define RLC_MC_CNTL__WRREQ_PRIV_MASK 0x8
#define RLC_MC_CNTL__WRREQ_PRIV__SHIFT 0x3
#define RLC_MC_CNTL__WRNFO_STALL_MASK 0x10
#define RLC_MC_CNTL__WRNFO_STALL__SHIFT 0x4
#define RLC_MC_CNTL__WRNFO_URG_MASK 0x1e0
#define RLC_MC_CNTL__WRNFO_URG__SHIFT 0x5
#define RLC_MC_CNTL__WRREQ_DW_IMASK_MASK 0x1e00
#define RLC_MC_CNTL__WRREQ_DW_IMASK__SHIFT 0x9
#define RLC_MC_CNTL__RESERVED_B_MASK 0xfe000
#define RLC_MC_CNTL__RESERVED_B__SHIFT 0xd
#define RLC_MC_CNTL__RDNFO_URG_MASK 0xf00000
#define RLC_MC_CNTL__RDNFO_URG__SHIFT 0x14
#define RLC_MC_CNTL__RDREQ_SWAP_MASK 0x3000000
#define RLC_MC_CNTL__RDREQ_SWAP__SHIFT 0x18
#define RLC_MC_CNTL__RDREQ_TRAN_MASK 0x4000000
#define RLC_MC_CNTL__RDREQ_TRAN__SHIFT 0x1a
#define RLC_MC_CNTL__RDREQ_PRIV_MASK 0x8000000
#define RLC_MC_CNTL__RDREQ_PRIV__SHIFT 0x1b
#define RLC_MC_CNTL__RDNFO_STALL_MASK 0x10000000
#define RLC_MC_CNTL__RDNFO_STALL__SHIFT 0x1c
#define RLC_MC_CNTL__RESERVED_MASK 0xe0000000
#define RLC_MC_CNTL__RESERVED__SHIFT 0x1d
#define RLC_STAT__RLC_BUSY_MASK 0x1
#define RLC_STAT__RLC_BUSY__SHIFT 0x0
#define RLC_STAT__RLC_GPM_BUSY_MASK 0x2
#define RLC_STAT__RLC_GPM_BUSY__SHIFT 0x1
#define RLC_STAT__RLC_SPM_BUSY_MASK 0x4
#define RLC_STAT__RLC_SPM_BUSY__SHIFT 0x2
#define RLC_STAT__RESERVED_MASK 0xfffffff8
#define RLC_STAT__RESERVED__SHIFT 0x3
#define RLC_SAFE_MODE__REQ_MASK 0x1
#define RLC_SAFE_MODE__REQ__SHIFT 0x0
#define RLC_SAFE_MODE__MESSAGE_MASK 0x1e
#define RLC_SAFE_MODE__MESSAGE__SHIFT 0x1
#define RLC_SAFE_MODE__RESERVED_MASK 0xffffffe0
#define RLC_SAFE_MODE__RESERVED__SHIFT 0x5
#define RLC_SOFT_RESET_GPU__SOFT_RESET_GPU_MASK 0x1
#define RLC_SOFT_RESET_GPU__SOFT_RESET_GPU__SHIFT 0x0
#define RLC_SOFT_RESET_GPU__RESERVED_MASK 0xfffffffe
#define RLC_SOFT_RESET_GPU__RESERVED__SHIFT 0x1
#define RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK 0x1
#define RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN__SHIFT 0x0
#define RLC_MEM_SLP_CNTL__RLC_MEM_DS_EN_MASK 0x2
#define RLC_MEM_SLP_CNTL__RLC_MEM_DS_EN__SHIFT 0x1
#define RLC_MEM_SLP_CNTL__RESERVED_MASK 0xfc
#define RLC_MEM_SLP_CNTL__RESERVED__SHIFT 0x2
#define RLC_MEM_SLP_CNTL__RLC_MEM_LS_ON_DELAY_MASK 0xff00
#define RLC_MEM_SLP_CNTL__RLC_MEM_LS_ON_DELAY__SHIFT 0x8
#define RLC_MEM_SLP_CNTL__RLC_MEM_LS_OFF_DELAY_MASK 0xff0000
#define RLC_MEM_SLP_CNTL__RLC_MEM_LS_OFF_DELAY__SHIFT 0x10
#define RLC_MEM_SLP_CNTL__RESERVED1_MASK 0xff000000
#define RLC_MEM_SLP_CNTL__RESERVED1__SHIFT 0x18
#define RLC_PERFMON_CNTL__PERFMON_STATE_MASK 0x7
#define RLC_PERFMON_CNTL__PERFMON_STATE__SHIFT 0x0
#define RLC_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE_MASK 0x400
#define RLC_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE__SHIFT 0xa
#define RLC_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT_MASK 0xff
#define RLC_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define RLC_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT_MASK 0xff
#define RLC_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define RLC_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define RLC_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define RLC_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define RLC_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define RLC_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define RLC_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define RLC_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define RLC_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define CGTT_RLC_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_RLC_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_RLC_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_RLC_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_DYN_MASK 0x40000000
#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_DYN__SHIFT 0x1e
#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_REG_MASK 0x80000000
#define CGTT_RLC_CLK_CTRL__SOFT_OVERRIDE_REG__SHIFT 0x1f
#define RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK 0x1
#define RLC_LB_CNTL__LOAD_BALANCE_ENABLE__SHIFT 0x0
#define RLC_LB_CNTL__LB_CNT_CP_BUSY_MASK 0x2
#define RLC_LB_CNTL__LB_CNT_CP_BUSY__SHIFT 0x1
#define RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK 0x4
#define RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE__SHIFT 0x2
#define RLC_LB_CNTL__LB_CNT_REG_INC_MASK 0x8
#define RLC_LB_CNTL__LB_CNT_REG_INC__SHIFT 0x3
#define RLC_LB_CNTL__CU_MASK_USED_OFF_HYST_MASK 0xff0
#define RLC_LB_CNTL__CU_MASK_USED_OFF_HYST__SHIFT 0x4
#define RLC_LB_CNTL__RESERVED_MASK 0xfffff000
#define RLC_LB_CNTL__RESERVED__SHIFT 0xc
#define RLC_LB_CNTR_MAX__LB_CNTR_MAX_MASK 0xffffffff
#define RLC_LB_CNTR_MAX__LB_CNTR_MAX__SHIFT 0x0
#define RLC_LB_CNTR_INIT__LB_CNTR_INIT_MASK 0xffffffff
#define RLC_LB_CNTR_INIT__LB_CNTR_INIT__SHIFT 0x0
#define RLC_LOAD_BALANCE_CNTR__RLC_LOAD_BALANCE_CNTR_MASK 0xffffffff
#define RLC_LOAD_BALANCE_CNTR__RLC_LOAD_BALANCE_CNTR__SHIFT 0x0
#define RLC_SAVE_AND_RESTORE_BASE__BASE_MASK 0xffffffff
#define RLC_SAVE_AND_RESTORE_BASE__BASE__SHIFT 0x0
#define RLC_JUMP_TABLE_RESTORE__ADDR_MASK 0xffffffff
#define RLC_JUMP_TABLE_RESTORE__ADDR__SHIFT 0x0
#define RLC_DRIVER_CPDMA_STATUS__DRIVER_REQUEST_MASK 0x1
#define RLC_DRIVER_CPDMA_STATUS__DRIVER_REQUEST__SHIFT 0x0
#define RLC_DRIVER_CPDMA_STATUS__RESERVED1_MASK 0xe
#define RLC_DRIVER_CPDMA_STATUS__RESERVED1__SHIFT 0x1
#define RLC_DRIVER_CPDMA_STATUS__DRIVER_ACK_MASK 0x10
#define RLC_DRIVER_CPDMA_STATUS__DRIVER_ACK__SHIFT 0x4
#define RLC_DRIVER_CPDMA_STATUS__RESERVED_MASK 0xffffffe0
#define RLC_DRIVER_CPDMA_STATUS__RESERVED__SHIFT 0x5
#define RLC_PG_DELAY_2__SERDES_TIMEOUT_VALUE_MASK 0xff
#define RLC_PG_DELAY_2__SERDES_TIMEOUT_VALUE__SHIFT 0x0
#define RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK 0xff00
#define RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT 0x8
#define RLC_PG_DELAY_2__PERCU_TIMEOUT_VALUE_MASK 0xffff0000
#define RLC_PG_DELAY_2__PERCU_TIMEOUT_VALUE__SHIFT 0x10
#define RLC_GPM_DEBUG_SELECT__SELECT_MASK 0xff
#define RLC_GPM_DEBUG_SELECT__SELECT__SHIFT 0x0
#define RLC_GPM_DEBUG_SELECT__RESERVED_MASK 0xffffff00
#define RLC_GPM_DEBUG_SELECT__RESERVED__SHIFT 0x8
#define RLC_GPM_DEBUG__DATA_MASK 0xffffffff
#define RLC_GPM_DEBUG__DATA__SHIFT 0x0
#define RLC_GPM_UCODE_ADDR__UCODE_ADDR_MASK 0xfff
#define RLC_GPM_UCODE_ADDR__UCODE_ADDR__SHIFT 0x0
#define RLC_GPM_UCODE_ADDR__RESERVED_MASK 0xfffff000
#define RLC_GPM_UCODE_ADDR__RESERVED__SHIFT 0xc
#define RLC_GPM_UCODE_DATA__UCODE_DATA_MASK 0xffffffff
#define RLC_GPM_UCODE_DATA__UCODE_DATA__SHIFT 0x0
#define RLC_GPU_CLOCK_COUNT_LSB__GPU_CLOCKS_LSB_MASK 0xffffffff
#define RLC_GPU_CLOCK_COUNT_LSB__GPU_CLOCKS_LSB__SHIFT 0x0
#define RLC_GPU_CLOCK_COUNT_MSB__GPU_CLOCKS_MSB_MASK 0xffffffff
#define RLC_GPU_CLOCK_COUNT_MSB__GPU_CLOCKS_MSB__SHIFT 0x0
#define RLC_CAPTURE_GPU_CLOCK_COUNT__CAPTURE_MASK 0x1
#define RLC_CAPTURE_GPU_CLOCK_COUNT__CAPTURE__SHIFT 0x0
#define RLC_CAPTURE_GPU_CLOCK_COUNT__RESERVED_MASK 0xfffffffe
#define RLC_CAPTURE_GPU_CLOCK_COUNT__RESERVED__SHIFT 0x1
#define RLC_UCODE_CNTL__RLC_UCODE_FLAGS_MASK 0xffffffff
#define RLC_UCODE_CNTL__RLC_UCODE_FLAGS__SHIFT 0x0
#define RLC_GPM_STAT__RLC_BUSY_MASK 0x1
#define RLC_GPM_STAT__RLC_BUSY__SHIFT 0x0
#define RLC_GPM_STAT__GFX_POWER_STATUS_MASK 0x2
#define RLC_GPM_STAT__GFX_POWER_STATUS__SHIFT 0x1
#define RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK 0x4
#define RLC_GPM_STAT__GFX_CLOCK_STATUS__SHIFT 0x2
#define RLC_GPM_STAT__GFX_LS_STATUS_MASK 0x8
#define RLC_GPM_STAT__GFX_LS_STATUS__SHIFT 0x3
#define RLC_GPM_STAT__RESERVED_MASK 0xfffffff0
#define RLC_GPM_STAT__RESERVED__SHIFT 0x4
#define RLC_GPU_CLOCK_32_RES_SEL__RES_SEL_MASK 0x3f
#define RLC_GPU_CLOCK_32_RES_SEL__RES_SEL__SHIFT 0x0
#define RLC_GPU_CLOCK_32_RES_SEL__RESERVED_MASK 0xffffffc0
#define RLC_GPU_CLOCK_32_RES_SEL__RESERVED__SHIFT 0x6
#define RLC_GPU_CLOCK_32__GPU_CLOCK_32_MASK 0xffffffff
#define RLC_GPU_CLOCK_32__GPU_CLOCK_32__SHIFT 0x0
#define RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK 0x1
#define RLC_PG_CNTL__GFX_POWER_GATING_ENABLE__SHIFT 0x0
#define RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK 0x2
#define RLC_PG_CNTL__GFX_POWER_GATING_SRC__SHIFT 0x1
#define RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK 0x4
#define RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE__SHIFT 0x2
#define RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK 0x8
#define RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE__SHIFT 0x3
#define RLC_PG_CNTL__RESERVED_MASK 0xfff0
#define RLC_PG_CNTL__RESERVED__SHIFT 0x4
#define RLC_PG_CNTL__CHUB_HANDSHAKE_ENABLE_MASK 0x10000
#define RLC_PG_CNTL__CHUB_HANDSHAKE_ENABLE__SHIFT 0x10
#define RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK 0x20000
#define RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE__SHIFT 0x11
#define RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK 0x40000
#define RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE__SHIFT 0x12
#define RLC_PG_CNTL__RESERVED1_MASK 0xf80000
#define RLC_PG_CNTL__RESERVED1__SHIFT 0x13
#define RLC_PG_CNTL__PG_ERROR_STATUS_MASK 0xff000000
#define RLC_PG_CNTL__PG_ERROR_STATUS__SHIFT 0x18
#define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY_MASK 0xff
#define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY__SHIFT 0x0
#define RLC_GPM_THREAD_PRIORITY__THREAD1_PRIORITY_MASK 0xff00
#define RLC_GPM_THREAD_PRIORITY__THREAD1_PRIORITY__SHIFT 0x8
#define RLC_GPM_THREAD_PRIORITY__THREAD2_PRIORITY_MASK 0xff0000
#define RLC_GPM_THREAD_PRIORITY__THREAD2_PRIORITY__SHIFT 0x10
#define RLC_GPM_THREAD_PRIORITY__THREAD3_PRIORITY_MASK 0xff000000
#define RLC_GPM_THREAD_PRIORITY__THREAD3_PRIORITY__SHIFT 0x18
#define RLC_GPM_THREAD_ENABLE__THREAD0_ENABLE_MASK 0x1
#define RLC_GPM_THREAD_ENABLE__THREAD0_ENABLE__SHIFT 0x0
#define RLC_GPM_THREAD_ENABLE__THREAD1_ENABLE_MASK 0x2
#define RLC_GPM_THREAD_ENABLE__THREAD1_ENABLE__SHIFT 0x1
#define RLC_GPM_THREAD_ENABLE__THREAD2_ENABLE_MASK 0x4
#define RLC_GPM_THREAD_ENABLE__THREAD2_ENABLE__SHIFT 0x2
#define RLC_GPM_THREAD_ENABLE__THREAD3_ENABLE_MASK 0x8
#define RLC_GPM_THREAD_ENABLE__THREAD3_ENABLE__SHIFT 0x3
#define RLC_GPM_THREAD_ENABLE__RESERVED_MASK 0xfffffff0
#define RLC_GPM_THREAD_ENABLE__RESERVED__SHIFT 0x4
#define RLC_GPM_VMID_THREAD0__RLC_VMID_MASK 0xf
#define RLC_GPM_VMID_THREAD0__RLC_VMID__SHIFT 0x0
#define RLC_GPM_VMID_THREAD0__RESERVED_MASK 0xfffffff0
#define RLC_GPM_VMID_THREAD0__RESERVED__SHIFT 0x4
#define RLC_GPM_VMID_THREAD1__RLC_VMID_MASK 0xf
#define RLC_GPM_VMID_THREAD1__RLC_VMID__SHIFT 0x0
#define RLC_GPM_VMID_THREAD1__RESERVED_MASK 0xfffffff0
#define RLC_GPM_VMID_THREAD1__RESERVED__SHIFT 0x4
#define RLC_CGTT_MGCG_OVERRIDE__OVERRIDE_MASK 0xffffffff
#define RLC_CGTT_MGCG_OVERRIDE__OVERRIDE__SHIFT 0x0
#define RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK 0x1
#define RLC_CGCG_CGLS_CTRL__CGCG_EN__SHIFT 0x0
#define RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK 0x2
#define RLC_CGCG_CGLS_CTRL__CGLS_EN__SHIFT 0x1
#define RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK 0xfc
#define RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT 0x2
#define RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK 0x7ffff00
#define RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT 0x8
#define RLC_CGCG_CGLS_CTRL__CGCG_CONTROLLER_MASK 0x8000000
#define RLC_CGCG_CGLS_CTRL__CGCG_CONTROLLER__SHIFT 0x1b
#define RLC_CGCG_CGLS_CTRL__CGCG_REG_CTRL_MASK 0x10000000
#define RLC_CGCG_CGLS_CTRL__CGCG_REG_CTRL__SHIFT 0x1c
#define RLC_CGCG_CGLS_CTRL__SLEEP_MODE_MASK 0x60000000
#define RLC_CGCG_CGLS_CTRL__SLEEP_MODE__SHIFT 0x1d
#define RLC_CGCG_CGLS_CTRL__SPARE_MASK 0x80000000
#define RLC_CGCG_CGLS_CTRL__SPARE__SHIFT 0x1f
#define RLC_CGCG_RAMP_CTRL__DOWN_DIV_START_UNIT_MASK 0xf
#define RLC_CGCG_RAMP_CTRL__DOWN_DIV_START_UNIT__SHIFT 0x0
#define RLC_CGCG_RAMP_CTRL__DOWN_DIV_STEP_UNIT_MASK 0xf0
#define RLC_CGCG_RAMP_CTRL__DOWN_DIV_STEP_UNIT__SHIFT 0x4
#define RLC_CGCG_RAMP_CTRL__UP_DIV_START_UNIT_MASK 0xf00
#define RLC_CGCG_RAMP_CTRL__UP_DIV_START_UNIT__SHIFT 0x8
#define RLC_CGCG_RAMP_CTRL__UP_DIV_STEP_UNIT_MASK 0xf000
#define RLC_CGCG_RAMP_CTRL__UP_DIV_STEP_UNIT__SHIFT 0xc
#define RLC_CGCG_RAMP_CTRL__STEP_DELAY_CNT_MASK 0xfff0000
#define RLC_CGCG_RAMP_CTRL__STEP_DELAY_CNT__SHIFT 0x10
#define RLC_CGCG_RAMP_CTRL__STEP_DELAY_UNIT_MASK 0xf0000000
#define RLC_CGCG_RAMP_CTRL__STEP_DELAY_UNIT__SHIFT 0x1c
#define RLC_DYN_PG_STATUS__PG_STATUS_CU_MASK_MASK 0xffffffff
#define RLC_DYN_PG_STATUS__PG_STATUS_CU_MASK__SHIFT 0x0
#define RLC_DYN_PG_REQUEST__PG_REQUEST_CU_MASK_MASK 0xffffffff
#define RLC_DYN_PG_REQUEST__PG_REQUEST_CU_MASK__SHIFT 0x0
#define RLC_PG_DELAY__POWER_UP_DELAY_MASK 0xff
#define RLC_PG_DELAY__POWER_UP_DELAY__SHIFT 0x0
#define RLC_PG_DELAY__POWER_DOWN_DELAY_MASK 0xff00
#define RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT 0x8
#define RLC_PG_DELAY__CMD_PROPAGATE_DELAY_MASK 0xff0000
#define RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT 0x10
#define RLC_PG_DELAY__MEM_SLEEP_DELAY_MASK 0xff000000
#define RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT 0x18
#define RLC_CU_STATUS__WORK_PENDING_MASK 0xffffffff
#define RLC_CU_STATUS__WORK_PENDING__SHIFT 0x0
#define RLC_LB_INIT_CU_MASK__INIT_CU_MASK_MASK 0xffffffff
#define RLC_LB_INIT_CU_MASK__INIT_CU_MASK__SHIFT 0x0
#define RLC_LB_ALWAYS_ACTIVE_CU_MASK__ALWAYS_ACTIVE_CU_MASK_MASK 0xffffffff
#define RLC_LB_ALWAYS_ACTIVE_CU_MASK__ALWAYS_ACTIVE_CU_MASK__SHIFT 0x0
#define RLC_LB_PARAMS__SKIP_L2_CHECK_MASK 0x1
#define RLC_LB_PARAMS__SKIP_L2_CHECK__SHIFT 0x0
#define RLC_LB_PARAMS__FIFO_SAMPLES_MASK 0xfe
#define RLC_LB_PARAMS__FIFO_SAMPLES__SHIFT 0x1
#define RLC_LB_PARAMS__PG_IDLE_SAMPLES_MASK 0xff00
#define RLC_LB_PARAMS__PG_IDLE_SAMPLES__SHIFT 0x8
#define RLC_LB_PARAMS__PG_IDLE_SAMPLE_INTERVAL_MASK 0xffff0000
#define RLC_LB_PARAMS__PG_IDLE_SAMPLE_INTERVAL__SHIFT 0x10
#define RLC_THREAD1_DELAY__CU_IDEL_DELAY_MASK 0xff
#define RLC_THREAD1_DELAY__CU_IDEL_DELAY__SHIFT 0x0
#define RLC_THREAD1_DELAY__LBPW_INNER_LOOP_DELAY_MASK 0xff00
#define RLC_THREAD1_DELAY__LBPW_INNER_LOOP_DELAY__SHIFT 0x8
#define RLC_THREAD1_DELAY__LBPW_OUTER_LOOP_DELAY_MASK 0xff0000
#define RLC_THREAD1_DELAY__LBPW_OUTER_LOOP_DELAY__SHIFT 0x10
#define RLC_THREAD1_DELAY__SPARE_MASK 0xff000000
#define RLC_THREAD1_DELAY__SPARE__SHIFT 0x18
#define RLC_PG_ALWAYS_ON_CU_MASK__AON_CU_MASK_MASK 0xffffffff
#define RLC_PG_ALWAYS_ON_CU_MASK__AON_CU_MASK__SHIFT 0x0
#define RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK 0xff
#define RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT 0x0
#define RLC_MAX_PG_CU__SPARE_MASK 0xffffff00
#define RLC_MAX_PG_CU__SPARE__SHIFT 0x8
#define RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK 0x1
#define RLC_AUTO_PG_CTRL__AUTO_PG_EN__SHIFT 0x0
#define RLC_AUTO_PG_CTRL__AUTO_GRBM_REG_SAVE_ON_IDLE_EN_MASK 0x2
#define RLC_AUTO_PG_CTRL__AUTO_GRBM_REG_SAVE_ON_IDLE_EN__SHIFT 0x1
#define RLC_AUTO_PG_CTRL__AUTO_WAKE_UP_EN_MASK 0x4
#define RLC_AUTO_PG_CTRL__AUTO_WAKE_UP_EN__SHIFT 0x2
#define RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK 0x7fff8
#define RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT 0x3
#define RLC_AUTO_PG_CTRL__PG_AFTER_GRBM_REG_SAVE_THRESHOLD_MASK 0xfff80000
#define RLC_AUTO_PG_CTRL__PG_AFTER_GRBM_REG_SAVE_THRESHOLD__SHIFT 0x13
#define RLC_SMU_GRBM_REG_SAVE_CTRL__START_GRBM_REG_SAVE_MASK 0x1
#define RLC_SMU_GRBM_REG_SAVE_CTRL__START_GRBM_REG_SAVE__SHIFT 0x0
#define RLC_SMU_GRBM_REG_SAVE_CTRL__SPARE_MASK 0xfffffffe
#define RLC_SMU_GRBM_REG_SAVE_CTRL__SPARE__SHIFT 0x1
#define RLC_SMU_PG_CTRL__START_PG_MASK 0x1
#define RLC_SMU_PG_CTRL__START_PG__SHIFT 0x0
#define RLC_SMU_PG_CTRL__SPARE_MASK 0xfffffffe
#define RLC_SMU_PG_CTRL__SPARE__SHIFT 0x1
#define RLC_SMU_PG_WAKE_UP_CTRL__START_PG_WAKE_UP_MASK 0x1
#define RLC_SMU_PG_WAKE_UP_CTRL__START_PG_WAKE_UP__SHIFT 0x0
#define RLC_SMU_PG_WAKE_UP_CTRL__SPARE_MASK 0xfffffffe
#define RLC_SMU_PG_WAKE_UP_CTRL__SPARE__SHIFT 0x1
#define RLC_SERDES_RD_MASTER_INDEX__CU_ID_MASK 0xf
#define RLC_SERDES_RD_MASTER_INDEX__CU_ID__SHIFT 0x0
#define RLC_SERDES_RD_MASTER_INDEX__SH_ID_MASK 0x30
#define RLC_SERDES_RD_MASTER_INDEX__SH_ID__SHIFT 0x4
#define RLC_SERDES_RD_MASTER_INDEX__SE_ID_MASK 0x1c0
#define RLC_SERDES_RD_MASTER_INDEX__SE_ID__SHIFT 0x6
#define RLC_SERDES_RD_MASTER_INDEX__SE_NONCU_ID_MASK 0x200
#define RLC_SERDES_RD_MASTER_INDEX__SE_NONCU_ID__SHIFT 0x9
#define RLC_SERDES_RD_MASTER_INDEX__SE_NONCU_MASK 0x400
#define RLC_SERDES_RD_MASTER_INDEX__SE_NONCU__SHIFT 0xa
#define RLC_SERDES_RD_MASTER_INDEX__NON_SE_MASK 0x3800
#define RLC_SERDES_RD_MASTER_INDEX__NON_SE__SHIFT 0xb
#define RLC_SERDES_RD_MASTER_INDEX__DATA_REG_ID_MASK 0xc000
#define RLC_SERDES_RD_MASTER_INDEX__DATA_REG_ID__SHIFT 0xe
#define RLC_SERDES_RD_MASTER_INDEX__SPARE_MASK 0xffff0000
#define RLC_SERDES_RD_MASTER_INDEX__SPARE__SHIFT 0x10
#define RLC_SERDES_RD_DATA_0__DATA_MASK 0xffffffff
#define RLC_SERDES_RD_DATA_0__DATA__SHIFT 0x0
#define RLC_SERDES_RD_DATA_1__DATA_MASK 0xffffffff
#define RLC_SERDES_RD_DATA_1__DATA__SHIFT 0x0
#define RLC_SERDES_RD_DATA_2__DATA_MASK 0xffffffff
#define RLC_SERDES_RD_DATA_2__DATA__SHIFT 0x0
#define RLC_SERDES_WR_CU_MASTER_MASK__MASTER_MASK_MASK 0xffffffff
#define RLC_SERDES_WR_CU_MASTER_MASK__MASTER_MASK__SHIFT 0x0
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SE_MASTER_MASK_MASK 0xffff
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SE_MASTER_MASK__SHIFT 0x0
#define RLC_SERDES_WR_NONCU_MASTER_MASK__GC_MASTER_MASK_MASK 0x10000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__GC_MASTER_MASK__SHIFT 0x10
#define RLC_SERDES_WR_NONCU_MASTER_MASK__TC0_MASTER_MASK_MASK 0x20000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__TC0_MASTER_MASK__SHIFT 0x11
#define RLC_SERDES_WR_NONCU_MASTER_MASK__TC1_MASTER_MASK_MASK 0x40000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__TC1_MASTER_MASK__SHIFT 0x12
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE0_MASTER_MASK_MASK 0x80000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE0_MASTER_MASK__SHIFT 0x13
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE1_MASTER_MASK_MASK 0x100000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE1_MASTER_MASK__SHIFT 0x14
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE2_MASTER_MASK_MASK 0x200000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE2_MASTER_MASK__SHIFT 0x15
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE3_MASTER_MASK_MASK 0x400000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__SPARE3_MASTER_MASK__SHIFT 0x16
#define RLC_SERDES_WR_NONCU_MASTER_MASK__RESERVED_MASK 0xff800000
#define RLC_SERDES_WR_NONCU_MASTER_MASK__RESERVED__SHIFT 0x17
#define RLC_SERDES_WR_CTRL__BPM_ADDR_MASK 0xff
#define RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT 0x0
#define RLC_SERDES_WR_CTRL__POWER_DOWN_MASK 0x100
#define RLC_SERDES_WR_CTRL__POWER_DOWN__SHIFT 0x8
#define RLC_SERDES_WR_CTRL__POWER_UP_MASK 0x200
#define RLC_SERDES_WR_CTRL__POWER_UP__SHIFT 0x9
#define RLC_SERDES_WR_CTRL__P1_SELECT_MASK 0x400
#define RLC_SERDES_WR_CTRL__P1_SELECT__SHIFT 0xa
#define RLC_SERDES_WR_CTRL__P2_SELECT_MASK 0x800
#define RLC_SERDES_WR_CTRL__P2_SELECT__SHIFT 0xb
#define RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK 0x1000
#define RLC_SERDES_WR_CTRL__WRITE_COMMAND__SHIFT 0xc
#define RLC_SERDES_WR_CTRL__READ_COMMAND_MASK 0x2000
#define RLC_SERDES_WR_CTRL__READ_COMMAND__SHIFT 0xd
#define RLC_SERDES_WR_CTRL__RESERVED_1_MASK 0xc000
#define RLC_SERDES_WR_CTRL__RESERVED_1__SHIFT 0xe
#define RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK 0x10000
#define RLC_SERDES_WR_CTRL__CGLS_ENABLE__SHIFT 0x10
#define RLC_SERDES_WR_CTRL__CGLS_DISABLE_MASK 0x20000
#define RLC_SERDES_WR_CTRL__CGLS_DISABLE__SHIFT 0x11
#define RLC_SERDES_WR_CTRL__CGLS_ON_MASK 0x40000
#define RLC_SERDES_WR_CTRL__CGLS_ON__SHIFT 0x12
#define RLC_SERDES_WR_CTRL__CGLS_OFF_MASK 0x80000
#define RLC_SERDES_WR_CTRL__CGLS_OFF__SHIFT 0x13
#define RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK 0x100000
#define RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0__SHIFT 0x14
#define RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_1_MASK 0x200000
#define RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_1__SHIFT 0x15
#define RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK 0x400000
#define RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0__SHIFT 0x16
#define RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK 0x800000
#define RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1__SHIFT 0x17
#define RLC_SERDES_WR_CTRL__RESERVED_2_MASK 0xf000000
#define RLC_SERDES_WR_CTRL__RESERVED_2__SHIFT 0x18
#define RLC_SERDES_WR_CTRL__REG_ADDR_MASK 0xf0000000
#define RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT 0x1c
#define RLC_SERDES_WR_DATA__DATA_MASK 0xffffffff
#define RLC_SERDES_WR_DATA__DATA__SHIFT 0x0
#define RLC_SERDES_CU_MASTER_BUSY__BUSY_BUSY_MASK 0xffffffff
#define RLC_SERDES_CU_MASTER_BUSY__BUSY_BUSY__SHIFT 0x0
#define RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK 0xffff
#define RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY__SHIFT 0x0
#define RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK 0x10000
#define RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY__SHIFT 0x10
#define RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK 0x20000
#define RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY__SHIFT 0x11
#define RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK 0x40000
#define RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY__SHIFT 0x12
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE0_MASTER_BUSY_MASK 0x80000
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE0_MASTER_BUSY__SHIFT 0x13
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE1_MASTER_BUSY_MASK 0x100000
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE1_MASTER_BUSY__SHIFT 0x14
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE2_MASTER_BUSY_MASK 0x200000
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE2_MASTER_BUSY__SHIFT 0x15
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE3_MASTER_BUSY_MASK 0x400000
#define RLC_SERDES_NONCU_MASTER_BUSY__SPARE3_MASTER_BUSY__SHIFT 0x16
#define RLC_SERDES_NONCU_MASTER_BUSY__RESERVED_MASK 0xff800000
#define RLC_SERDES_NONCU_MASTER_BUSY__RESERVED__SHIFT 0x17
#define RLC_GPM_GENERAL_0__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_0__DATA__SHIFT 0x0
#define RLC_GPM_GENERAL_1__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_1__DATA__SHIFT 0x0
#define RLC_GPM_GENERAL_2__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_2__DATA__SHIFT 0x0
#define RLC_GPM_GENERAL_3__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_3__DATA__SHIFT 0x0
#define RLC_GPM_GENERAL_4__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_4__DATA__SHIFT 0x0
#define RLC_GPM_GENERAL_5__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_5__DATA__SHIFT 0x0
#define RLC_GPM_GENERAL_6__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_6__DATA__SHIFT 0x0
#define RLC_GPM_GENERAL_7__DATA_MASK 0xffffffff
#define RLC_GPM_GENERAL_7__DATA__SHIFT 0x0
#define RLC_GPM_CU_PD_TIMEOUT__TIMEOUT_MASK 0xffffffff
#define RLC_GPM_CU_PD_TIMEOUT__TIMEOUT__SHIFT 0x0
#define RLC_GPM_SCRATCH_ADDR__ADDR_MASK 0x1ff
#define RLC_GPM_SCRATCH_ADDR__ADDR__SHIFT 0x0
#define RLC_GPM_SCRATCH_ADDR__RESERVED_MASK 0xfffffe00
#define RLC_GPM_SCRATCH_ADDR__RESERVED__SHIFT 0x9
#define RLC_GPM_SCRATCH_DATA__DATA_MASK 0xffffffff
#define RLC_GPM_SCRATCH_DATA__DATA__SHIFT 0x0
#define RLC_STATIC_PG_STATUS__PG_STATUS_CU_MASK_MASK 0xffffffff
#define RLC_STATIC_PG_STATUS__PG_STATUS_CU_MASK__SHIFT 0x0
#define RLC_GPM_PERF_COUNT_0__FEATURE_SEL_MASK 0xf
#define RLC_GPM_PERF_COUNT_0__FEATURE_SEL__SHIFT 0x0
#define RLC_GPM_PERF_COUNT_0__SE_INDEX_MASK 0xf0
#define RLC_GPM_PERF_COUNT_0__SE_INDEX__SHIFT 0x4
#define RLC_GPM_PERF_COUNT_0__SH_INDEX_MASK 0xf00
#define RLC_GPM_PERF_COUNT_0__SH_INDEX__SHIFT 0x8
#define RLC_GPM_PERF_COUNT_0__CU_INDEX_MASK 0xf000
#define RLC_GPM_PERF_COUNT_0__CU_INDEX__SHIFT 0xc
#define RLC_GPM_PERF_COUNT_0__EVENT_SEL_MASK 0x30000
#define RLC_GPM_PERF_COUNT_0__EVENT_SEL__SHIFT 0x10
#define RLC_GPM_PERF_COUNT_0__UNUSED_MASK 0xc0000
#define RLC_GPM_PERF_COUNT_0__UNUSED__SHIFT 0x12
#define RLC_GPM_PERF_COUNT_0__ENABLE_MASK 0x100000
#define RLC_GPM_PERF_COUNT_0__ENABLE__SHIFT 0x14
#define RLC_GPM_PERF_COUNT_0__RESERVED_MASK 0xffe00000
#define RLC_GPM_PERF_COUNT_0__RESERVED__SHIFT 0x15
#define RLC_GPM_PERF_COUNT_1__FEATURE_SEL_MASK 0xf
#define RLC_GPM_PERF_COUNT_1__FEATURE_SEL__SHIFT 0x0
#define RLC_GPM_PERF_COUNT_1__SE_INDEX_MASK 0xf0
#define RLC_GPM_PERF_COUNT_1__SE_INDEX__SHIFT 0x4
#define RLC_GPM_PERF_COUNT_1__SH_INDEX_MASK 0xf00
#define RLC_GPM_PERF_COUNT_1__SH_INDEX__SHIFT 0x8
#define RLC_GPM_PERF_COUNT_1__CU_INDEX_MASK 0xf000
#define RLC_GPM_PERF_COUNT_1__CU_INDEX__SHIFT 0xc
#define RLC_GPM_PERF_COUNT_1__EVENT_SEL_MASK 0x30000
#define RLC_GPM_PERF_COUNT_1__EVENT_SEL__SHIFT 0x10
#define RLC_GPM_PERF_COUNT_1__UNUSED_MASK 0xc0000
#define RLC_GPM_PERF_COUNT_1__UNUSED__SHIFT 0x12
#define RLC_GPM_PERF_COUNT_1__ENABLE_MASK 0x100000
#define RLC_GPM_PERF_COUNT_1__ENABLE__SHIFT 0x14
#define RLC_GPM_PERF_COUNT_1__RESERVED_MASK 0xffe00000
#define RLC_GPM_PERF_COUNT_1__RESERVED__SHIFT 0x15
#define RLC_GPR_REG1__DATA_MASK 0xffffffff
#define RLC_GPR_REG1__DATA__SHIFT 0x0
#define RLC_GPR_REG2__DATA_MASK 0xffffffff
#define RLC_GPR_REG2__DATA__SHIFT 0x0
#define RLC_SPM_VMID__RLC_SPM_VMID_MASK 0xf
#define RLC_SPM_VMID__RLC_SPM_VMID__SHIFT 0x0
#define RLC_SPM_VMID__RESERVED_MASK 0xfffffff0
#define RLC_SPM_VMID__RESERVED__SHIFT 0x4
#define RLC_SPM_INT_CNTL__RLC_SPM_INT_CNTL_MASK 0x1
#define RLC_SPM_INT_CNTL__RLC_SPM_INT_CNTL__SHIFT 0x0
#define RLC_SPM_INT_CNTL__RESERVED_MASK 0xfffffffe
#define RLC_SPM_INT_CNTL__RESERVED__SHIFT 0x1
#define RLC_SPM_INT_STATUS__RLC_SPM_INT_STATUS_MASK 0x1
#define RLC_SPM_INT_STATUS__RLC_SPM_INT_STATUS__SHIFT 0x0
#define RLC_SPM_INT_STATUS__RESERVED_MASK 0xfffffffe
#define RLC_SPM_INT_STATUS__RESERVED__SHIFT 0x1
#define RLC_SPM_DEBUG_SELECT__SELECT_MASK 0xff
#define RLC_SPM_DEBUG_SELECT__SELECT__SHIFT 0x0
#define RLC_SPM_DEBUG_SELECT__RESERVED_MASK 0x7f00
#define RLC_SPM_DEBUG_SELECT__RESERVED__SHIFT 0x8
#define RLC_SPM_DEBUG_SELECT__RLC_SPM_DEBUG_MODE_MASK 0x8000
#define RLC_SPM_DEBUG_SELECT__RLC_SPM_DEBUG_MODE__SHIFT 0xf
#define RLC_SPM_DEBUG_SELECT__RLC_SPM_NUM_SAMPLE_MASK 0xffff0000
#define RLC_SPM_DEBUG_SELECT__RLC_SPM_NUM_SAMPLE__SHIFT 0x10
#define RLC_SPM_DEBUG__DATA_MASK 0xffffffff
#define RLC_SPM_DEBUG__DATA__SHIFT 0x0
#define RLC_GPM_LOG_ADDR__ADDR_MASK 0xffffffff
#define RLC_GPM_LOG_ADDR__ADDR__SHIFT 0x0
#define RLC_GPM_LOG_SIZE__SIZE_MASK 0xffffffff
#define RLC_GPM_LOG_SIZE__SIZE__SHIFT 0x0
#define RLC_GPM_LOG_CONT__CONT_MASK 0xffffffff
#define RLC_GPM_LOG_CONT__CONT__SHIFT 0x0
#define RLC_SPM_PERFMON_CNTL__RESERVED1_MASK 0xfff
#define RLC_SPM_PERFMON_CNTL__RESERVED1__SHIFT 0x0
#define RLC_SPM_PERFMON_CNTL__PERFMON_RING_MODE_MASK 0x3000
#define RLC_SPM_PERFMON_CNTL__PERFMON_RING_MODE__SHIFT 0xc
#define RLC_SPM_PERFMON_CNTL__RESERVED_MASK 0xc000
#define RLC_SPM_PERFMON_CNTL__RESERVED__SHIFT 0xe
#define RLC_SPM_PERFMON_CNTL__PERFMON_SAMPLE_INTERVAL_MASK 0xffff0000
#define RLC_SPM_PERFMON_CNTL__PERFMON_SAMPLE_INTERVAL__SHIFT 0x10
#define RLC_SPM_PERFMON_RING_BASE_LO__RING_BASE_LO_MASK 0xffffffff
#define RLC_SPM_PERFMON_RING_BASE_LO__RING_BASE_LO__SHIFT 0x0
#define RLC_SPM_PERFMON_RING_BASE_HI__RING_BASE_HI_MASK 0xffff
#define RLC_SPM_PERFMON_RING_BASE_HI__RING_BASE_HI__SHIFT 0x0
#define RLC_SPM_PERFMON_RING_BASE_HI__RESERVED_MASK 0xffff0000
#define RLC_SPM_PERFMON_RING_BASE_HI__RESERVED__SHIFT 0x10
#define RLC_SPM_PERFMON_RING_SIZE__RING_BASE_SIZE_MASK 0xffffffff
#define RLC_SPM_PERFMON_RING_SIZE__RING_BASE_SIZE__SHIFT 0x0
#define RLC_SPM_PERFMON_SEGMENT_SIZE__PERFMON_SEGMENT_SIZE_MASK 0xff
#define RLC_SPM_PERFMON_SEGMENT_SIZE__PERFMON_SEGMENT_SIZE__SHIFT 0x0
#define RLC_SPM_PERFMON_SEGMENT_SIZE__RESERVED1_MASK 0x700
#define RLC_SPM_PERFMON_SEGMENT_SIZE__RESERVED1__SHIFT 0x8
#define RLC_SPM_PERFMON_SEGMENT_SIZE__GLOBAL_NUM_LINE_MASK 0xf800
#define RLC_SPM_PERFMON_SEGMENT_SIZE__GLOBAL_NUM_LINE__SHIFT 0xb
#define RLC_SPM_PERFMON_SEGMENT_SIZE__SE0_NUM_LINE_MASK 0x1f0000
#define RLC_SPM_PERFMON_SEGMENT_SIZE__SE0_NUM_LINE__SHIFT 0x10
#define RLC_SPM_PERFMON_SEGMENT_SIZE__SE1_NUM_LINE_MASK 0x3e00000
#define RLC_SPM_PERFMON_SEGMENT_SIZE__SE1_NUM_LINE__SHIFT 0x15
#define RLC_SPM_PERFMON_SEGMENT_SIZE__SE2_NUM_LINE_MASK 0x7c000000
#define RLC_SPM_PERFMON_SEGMENT_SIZE__SE2_NUM_LINE__SHIFT 0x1a
#define RLC_SPM_PERFMON_SEGMENT_SIZE__RESERVED_MASK 0x80000000
#define RLC_SPM_PERFMON_SEGMENT_SIZE__RESERVED__SHIFT 0x1f
#define RLC_SPM_SE_MUXSEL_ADDR__PERFMON_SEL_ADDR_MASK 0xffffffff
#define RLC_SPM_SE_MUXSEL_ADDR__PERFMON_SEL_ADDR__SHIFT 0x0
#define RLC_SPM_SE_MUXSEL_DATA__PERFMON_SEL_DATA_MASK 0xffffffff
#define RLC_SPM_SE_MUXSEL_DATA__PERFMON_SEL_DATA__SHIFT 0x0
#define RLC_SPM_CPG_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_CPG_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_CPG_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_CPG_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_CPC_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_CPC_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_CPC_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_CPC_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_CPF_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_CPF_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_CPF_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_CPF_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_CB_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_CB_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_CB_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_CB_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_DB_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_DB_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_DB_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_DB_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_PA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_PA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_PA_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_PA_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_GDS_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_GDS_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_GDS_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_GDS_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_IA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_IA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_IA_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_IA_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_SC_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_SC_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_SC_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_SC_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_TCC_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_TCC_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_TCC_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_TCC_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_TCA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_TCA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_TCA_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_TCA_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_TCP_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_TCP_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_TCP_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_TCP_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_TA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_TA_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_TA_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_TA_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_TD_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_TD_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_TD_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_TD_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_VGT_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_VGT_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_VGT_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_VGT_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_SPI_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_SPI_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_SPI_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_SPI_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_SQG_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_SQG_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_SQG_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_SQG_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_TCS_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_TCS_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_TCS_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_TCS_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_SX_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_SX_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_SX_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_SX_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_GLOBAL_MUXSEL_ADDR__PERFMON_SEL_ADDR_MASK 0xffffffff
#define RLC_SPM_GLOBAL_MUXSEL_ADDR__PERFMON_SEL_ADDR__SHIFT 0x0
#define RLC_SPM_GLOBAL_MUXSEL_DATA__PERFMON_SEL_DATA_MASK 0xffffffff
#define RLC_SPM_GLOBAL_MUXSEL_DATA__PERFMON_SEL_DATA__SHIFT 0x0
#define RLC_SPM_RING_RDPTR__PERFMON_RING_RDPTR_MASK 0xffffffff
#define RLC_SPM_RING_RDPTR__PERFMON_RING_RDPTR__SHIFT 0x0
#define RLC_SPM_SEGMENT_THRESHOLD__NUM_SEGMENT_THRESHOLD_MASK 0xffffffff
#define RLC_SPM_SEGMENT_THRESHOLD__NUM_SEGMENT_THRESHOLD__SHIFT 0x0
#define RLC_SPM_DBR0_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_DBR0_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_DBR0_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_DBR0_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_DBR1_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_DBR1_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_DBR1_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_DBR1_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_CBR0_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_CBR0_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_CBR0_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_CBR0_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define RLC_SPM_CBR1_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY_MASK 0xff
#define RLC_SPM_CBR1_PERFMON_SAMPLE_DELAY__PERFMON_SAMPLE_DELAY__SHIFT 0x0
#define RLC_SPM_CBR1_PERFMON_SAMPLE_DELAY__RESERVED_MASK 0xffffff00
#define RLC_SPM_CBR1_PERFMON_SAMPLE_DELAY__RESERVED__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_0__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_0__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_0__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_0__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_0__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_0__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_0__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_0__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_0__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_0__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_0__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_0__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_1__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_1__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_1__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_1__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_1__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_1__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_1__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_1__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_1__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_1__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_1__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_1__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_2__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_2__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_2__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_2__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_2__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_2__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_2__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_2__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_2__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_2__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_2__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_2__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_3__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_3__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_3__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_3__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_3__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_3__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_3__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_3__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_3__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_3__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_3__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_3__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_4__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_4__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_4__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_4__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_4__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_4__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_4__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_4__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_4__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_4__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_4__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_4__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_5__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_5__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_5__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_5__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_5__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_5__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_5__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_5__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_5__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_5__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_5__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_5__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_6__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_6__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_6__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_6__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_6__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_6__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_6__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_6__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_6__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_6__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_6__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_6__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_7__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_7__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_7__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_7__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_7__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_7__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_7__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_7__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_7__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_7__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_7__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_7__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_8__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_8__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_8__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_8__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_8__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_8__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_8__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_8__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_8__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_8__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_8__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_8__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_9__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_9__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_9__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_9__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_9__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_9__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_9__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_9__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_9__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_9__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_9__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_9__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_10__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_10__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_10__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_10__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_10__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_10__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_10__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_10__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_10__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_10__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_10__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_10__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_11__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_11__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_11__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_11__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_11__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_11__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_11__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_11__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_11__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_11__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_11__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_11__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_12__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_12__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_12__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_12__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_12__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_12__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_12__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_12__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_12__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_12__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_12__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_12__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_13__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_13__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_13__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_13__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_13__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_13__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_13__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_13__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_13__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_13__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_13__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_13__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_14__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_14__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_14__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_14__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_14__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_14__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_14__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_14__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_14__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_14__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_14__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_14__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_15__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_15__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_15__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_15__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_15__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_15__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_15__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_15__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_15__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_15__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_15__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_15__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_16__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_16__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_16__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_16__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_16__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_16__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_16__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_16__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_16__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_16__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_16__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_16__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_17__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_17__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_17__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_17__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_17__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_17__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_17__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_17__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_17__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_17__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_17__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_17__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_18__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_18__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_18__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_18__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_18__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_18__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_18__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_18__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_18__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_18__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_18__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_18__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_19__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_19__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_19__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_19__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_19__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_19__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_19__CYL_WRAP_MASK 0x1e000
#define SPI_PS_INPUT_CNTL_19__CYL_WRAP__SHIFT 0xd
#define SPI_PS_INPUT_CNTL_19__PT_SPRITE_TEX_MASK 0x20000
#define SPI_PS_INPUT_CNTL_19__PT_SPRITE_TEX__SHIFT 0x11
#define SPI_PS_INPUT_CNTL_19__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_19__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_20__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_20__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_20__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_20__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_20__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_20__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_20__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_20__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_21__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_21__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_21__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_21__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_21__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_21__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_21__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_21__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_22__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_22__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_22__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_22__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_22__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_22__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_22__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_22__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_23__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_23__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_23__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_23__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_23__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_23__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_23__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_23__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_24__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_24__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_24__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_24__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_24__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_24__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_24__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_24__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_25__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_25__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_25__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_25__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_25__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_25__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_25__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_25__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_26__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_26__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_26__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_26__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_26__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_26__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_26__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_26__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_27__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_27__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_27__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_27__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_27__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_27__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_27__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_27__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_28__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_28__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_28__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_28__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_28__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_28__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_28__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_28__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_29__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_29__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_29__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_29__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_29__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_29__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_29__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_29__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_30__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_30__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_30__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_30__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_30__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_30__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_30__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_30__DUP__SHIFT 0x12
#define SPI_PS_INPUT_CNTL_31__OFFSET_MASK 0x3f
#define SPI_PS_INPUT_CNTL_31__OFFSET__SHIFT 0x0
#define SPI_PS_INPUT_CNTL_31__DEFAULT_VAL_MASK 0x300
#define SPI_PS_INPUT_CNTL_31__DEFAULT_VAL__SHIFT 0x8
#define SPI_PS_INPUT_CNTL_31__FLAT_SHADE_MASK 0x400
#define SPI_PS_INPUT_CNTL_31__FLAT_SHADE__SHIFT 0xa
#define SPI_PS_INPUT_CNTL_31__DUP_MASK 0x40000
#define SPI_PS_INPUT_CNTL_31__DUP__SHIFT 0x12
#define SPI_VS_OUT_CONFIG__VS_EXPORT_COUNT_MASK 0x3e
#define SPI_VS_OUT_CONFIG__VS_EXPORT_COUNT__SHIFT 0x1
#define SPI_VS_OUT_CONFIG__VS_HALF_PACK_MASK 0x40
#define SPI_VS_OUT_CONFIG__VS_HALF_PACK__SHIFT 0x6
#define SPI_PS_INPUT_ENA__PERSP_SAMPLE_ENA_MASK 0x1
#define SPI_PS_INPUT_ENA__PERSP_SAMPLE_ENA__SHIFT 0x0
#define SPI_PS_INPUT_ENA__PERSP_CENTER_ENA_MASK 0x2
#define SPI_PS_INPUT_ENA__PERSP_CENTER_ENA__SHIFT 0x1
#define SPI_PS_INPUT_ENA__PERSP_CENTROID_ENA_MASK 0x4
#define SPI_PS_INPUT_ENA__PERSP_CENTROID_ENA__SHIFT 0x2
#define SPI_PS_INPUT_ENA__PERSP_PULL_MODEL_ENA_MASK 0x8
#define SPI_PS_INPUT_ENA__PERSP_PULL_MODEL_ENA__SHIFT 0x3
#define SPI_PS_INPUT_ENA__LINEAR_SAMPLE_ENA_MASK 0x10
#define SPI_PS_INPUT_ENA__LINEAR_SAMPLE_ENA__SHIFT 0x4
#define SPI_PS_INPUT_ENA__LINEAR_CENTER_ENA_MASK 0x20
#define SPI_PS_INPUT_ENA__LINEAR_CENTER_ENA__SHIFT 0x5
#define SPI_PS_INPUT_ENA__LINEAR_CENTROID_ENA_MASK 0x40
#define SPI_PS_INPUT_ENA__LINEAR_CENTROID_ENA__SHIFT 0x6
#define SPI_PS_INPUT_ENA__LINE_STIPPLE_TEX_ENA_MASK 0x80
#define SPI_PS_INPUT_ENA__LINE_STIPPLE_TEX_ENA__SHIFT 0x7
#define SPI_PS_INPUT_ENA__POS_X_FLOAT_ENA_MASK 0x100
#define SPI_PS_INPUT_ENA__POS_X_FLOAT_ENA__SHIFT 0x8
#define SPI_PS_INPUT_ENA__POS_Y_FLOAT_ENA_MASK 0x200
#define SPI_PS_INPUT_ENA__POS_Y_FLOAT_ENA__SHIFT 0x9
#define SPI_PS_INPUT_ENA__POS_Z_FLOAT_ENA_MASK 0x400
#define SPI_PS_INPUT_ENA__POS_Z_FLOAT_ENA__SHIFT 0xa
#define SPI_PS_INPUT_ENA__POS_W_FLOAT_ENA_MASK 0x800
#define SPI_PS_INPUT_ENA__POS_W_FLOAT_ENA__SHIFT 0xb
#define SPI_PS_INPUT_ENA__FRONT_FACE_ENA_MASK 0x1000
#define SPI_PS_INPUT_ENA__FRONT_FACE_ENA__SHIFT 0xc
#define SPI_PS_INPUT_ENA__ANCILLARY_ENA_MASK 0x2000
#define SPI_PS_INPUT_ENA__ANCILLARY_ENA__SHIFT 0xd
#define SPI_PS_INPUT_ENA__SAMPLE_COVERAGE_ENA_MASK 0x4000
#define SPI_PS_INPUT_ENA__SAMPLE_COVERAGE_ENA__SHIFT 0xe
#define SPI_PS_INPUT_ENA__POS_FIXED_PT_ENA_MASK 0x8000
#define SPI_PS_INPUT_ENA__POS_FIXED_PT_ENA__SHIFT 0xf
#define SPI_PS_INPUT_ADDR__PERSP_SAMPLE_ENA_MASK 0x1
#define SPI_PS_INPUT_ADDR__PERSP_SAMPLE_ENA__SHIFT 0x0
#define SPI_PS_INPUT_ADDR__PERSP_CENTER_ENA_MASK 0x2
#define SPI_PS_INPUT_ADDR__PERSP_CENTER_ENA__SHIFT 0x1
#define SPI_PS_INPUT_ADDR__PERSP_CENTROID_ENA_MASK 0x4
#define SPI_PS_INPUT_ADDR__PERSP_CENTROID_ENA__SHIFT 0x2
#define SPI_PS_INPUT_ADDR__PERSP_PULL_MODEL_ENA_MASK 0x8
#define SPI_PS_INPUT_ADDR__PERSP_PULL_MODEL_ENA__SHIFT 0x3
#define SPI_PS_INPUT_ADDR__LINEAR_SAMPLE_ENA_MASK 0x10
#define SPI_PS_INPUT_ADDR__LINEAR_SAMPLE_ENA__SHIFT 0x4
#define SPI_PS_INPUT_ADDR__LINEAR_CENTER_ENA_MASK 0x20
#define SPI_PS_INPUT_ADDR__LINEAR_CENTER_ENA__SHIFT 0x5
#define SPI_PS_INPUT_ADDR__LINEAR_CENTROID_ENA_MASK 0x40
#define SPI_PS_INPUT_ADDR__LINEAR_CENTROID_ENA__SHIFT 0x6
#define SPI_PS_INPUT_ADDR__LINE_STIPPLE_TEX_ENA_MASK 0x80
#define SPI_PS_INPUT_ADDR__LINE_STIPPLE_TEX_ENA__SHIFT 0x7
#define SPI_PS_INPUT_ADDR__POS_X_FLOAT_ENA_MASK 0x100
#define SPI_PS_INPUT_ADDR__POS_X_FLOAT_ENA__SHIFT 0x8
#define SPI_PS_INPUT_ADDR__POS_Y_FLOAT_ENA_MASK 0x200
#define SPI_PS_INPUT_ADDR__POS_Y_FLOAT_ENA__SHIFT 0x9
#define SPI_PS_INPUT_ADDR__POS_Z_FLOAT_ENA_MASK 0x400
#define SPI_PS_INPUT_ADDR__POS_Z_FLOAT_ENA__SHIFT 0xa
#define SPI_PS_INPUT_ADDR__POS_W_FLOAT_ENA_MASK 0x800
#define SPI_PS_INPUT_ADDR__POS_W_FLOAT_ENA__SHIFT 0xb
#define SPI_PS_INPUT_ADDR__FRONT_FACE_ENA_MASK 0x1000
#define SPI_PS_INPUT_ADDR__FRONT_FACE_ENA__SHIFT 0xc
#define SPI_PS_INPUT_ADDR__ANCILLARY_ENA_MASK 0x2000
#define SPI_PS_INPUT_ADDR__ANCILLARY_ENA__SHIFT 0xd
#define SPI_PS_INPUT_ADDR__SAMPLE_COVERAGE_ENA_MASK 0x4000
#define SPI_PS_INPUT_ADDR__SAMPLE_COVERAGE_ENA__SHIFT 0xe
#define SPI_PS_INPUT_ADDR__POS_FIXED_PT_ENA_MASK 0x8000
#define SPI_PS_INPUT_ADDR__POS_FIXED_PT_ENA__SHIFT 0xf
#define SPI_INTERP_CONTROL_0__FLAT_SHADE_ENA_MASK 0x1
#define SPI_INTERP_CONTROL_0__FLAT_SHADE_ENA__SHIFT 0x0
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_ENA_MASK 0x2
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_ENA__SHIFT 0x1
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_X_MASK 0x1c
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_X__SHIFT 0x2
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_Y_MASK 0xe0
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_Y__SHIFT 0x5
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_Z_MASK 0x700
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_Z__SHIFT 0x8
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_W_MASK 0x3800
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_OVRD_W__SHIFT 0xb
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_TOP_1_MASK 0x4000
#define SPI_INTERP_CONTROL_0__PNT_SPRITE_TOP_1__SHIFT 0xe
#define SPI_PS_IN_CONTROL__NUM_INTERP_MASK 0x3f
#define SPI_PS_IN_CONTROL__NUM_INTERP__SHIFT 0x0
#define SPI_PS_IN_CONTROL__PARAM_GEN_MASK 0x40
#define SPI_PS_IN_CONTROL__PARAM_GEN__SHIFT 0x6
#define SPI_PS_IN_CONTROL__BC_OPTIMIZE_DISABLE_MASK 0x4000
#define SPI_PS_IN_CONTROL__BC_OPTIMIZE_DISABLE__SHIFT 0xe
#define SPI_BARYC_CNTL__PERSP_CENTER_CNTL_MASK 0x1
#define SPI_BARYC_CNTL__PERSP_CENTER_CNTL__SHIFT 0x0
#define SPI_BARYC_CNTL__PERSP_CENTROID_CNTL_MASK 0x10
#define SPI_BARYC_CNTL__PERSP_CENTROID_CNTL__SHIFT 0x4
#define SPI_BARYC_CNTL__LINEAR_CENTER_CNTL_MASK 0x100
#define SPI_BARYC_CNTL__LINEAR_CENTER_CNTL__SHIFT 0x8
#define SPI_BARYC_CNTL__LINEAR_CENTROID_CNTL_MASK 0x1000
#define SPI_BARYC_CNTL__LINEAR_CENTROID_CNTL__SHIFT 0xc
#define SPI_BARYC_CNTL__POS_FLOAT_LOCATION_MASK 0x30000
#define SPI_BARYC_CNTL__POS_FLOAT_LOCATION__SHIFT 0x10
#define SPI_BARYC_CNTL__POS_FLOAT_ULC_MASK 0x100000
#define SPI_BARYC_CNTL__POS_FLOAT_ULC__SHIFT 0x14
#define SPI_BARYC_CNTL__FRONT_FACE_ALL_BITS_MASK 0x1000000
#define SPI_BARYC_CNTL__FRONT_FACE_ALL_BITS__SHIFT 0x18
#define SPI_TMPRING_SIZE__WAVES_MASK 0xfff
#define SPI_TMPRING_SIZE__WAVES__SHIFT 0x0
#define SPI_TMPRING_SIZE__WAVESIZE_MASK 0x1fff000
#define SPI_TMPRING_SIZE__WAVESIZE__SHIFT 0xc
#define SPI_SHADER_POS_FORMAT__POS0_EXPORT_FORMAT_MASK 0xf
#define SPI_SHADER_POS_FORMAT__POS0_EXPORT_FORMAT__SHIFT 0x0
#define SPI_SHADER_POS_FORMAT__POS1_EXPORT_FORMAT_MASK 0xf0
#define SPI_SHADER_POS_FORMAT__POS1_EXPORT_FORMAT__SHIFT 0x4
#define SPI_SHADER_POS_FORMAT__POS2_EXPORT_FORMAT_MASK 0xf00
#define SPI_SHADER_POS_FORMAT__POS2_EXPORT_FORMAT__SHIFT 0x8
#define SPI_SHADER_POS_FORMAT__POS3_EXPORT_FORMAT_MASK 0xf000
#define SPI_SHADER_POS_FORMAT__POS3_EXPORT_FORMAT__SHIFT 0xc
#define SPI_SHADER_Z_FORMAT__Z_EXPORT_FORMAT_MASK 0xf
#define SPI_SHADER_Z_FORMAT__Z_EXPORT_FORMAT__SHIFT 0x0
#define SPI_SHADER_COL_FORMAT__COL0_EXPORT_FORMAT_MASK 0xf
#define SPI_SHADER_COL_FORMAT__COL0_EXPORT_FORMAT__SHIFT 0x0
#define SPI_SHADER_COL_FORMAT__COL1_EXPORT_FORMAT_MASK 0xf0
#define SPI_SHADER_COL_FORMAT__COL1_EXPORT_FORMAT__SHIFT 0x4
#define SPI_SHADER_COL_FORMAT__COL2_EXPORT_FORMAT_MASK 0xf00
#define SPI_SHADER_COL_FORMAT__COL2_EXPORT_FORMAT__SHIFT 0x8
#define SPI_SHADER_COL_FORMAT__COL3_EXPORT_FORMAT_MASK 0xf000
#define SPI_SHADER_COL_FORMAT__COL3_EXPORT_FORMAT__SHIFT 0xc
#define SPI_SHADER_COL_FORMAT__COL4_EXPORT_FORMAT_MASK 0xf0000
#define SPI_SHADER_COL_FORMAT__COL4_EXPORT_FORMAT__SHIFT 0x10
#define SPI_SHADER_COL_FORMAT__COL5_EXPORT_FORMAT_MASK 0xf00000
#define SPI_SHADER_COL_FORMAT__COL5_EXPORT_FORMAT__SHIFT 0x14
#define SPI_SHADER_COL_FORMAT__COL6_EXPORT_FORMAT_MASK 0xf000000
#define SPI_SHADER_COL_FORMAT__COL6_EXPORT_FORMAT__SHIFT 0x18
#define SPI_SHADER_COL_FORMAT__COL7_EXPORT_FORMAT_MASK 0xf0000000
#define SPI_SHADER_COL_FORMAT__COL7_EXPORT_FORMAT__SHIFT 0x1c
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS0_MASK 0x7
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS0__SHIFT 0x0
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS1_MASK 0x38
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS1__SHIFT 0x3
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS2_MASK 0x1c0
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS2__SHIFT 0x6
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS3_MASK 0xe00
#define SPI_ARB_PRIORITY__PIPE_ORDER_TS3__SHIFT 0x9
#define SPI_ARB_PRIORITY__TS0_DUR_MULT_MASK 0x3000
#define SPI_ARB_PRIORITY__TS0_DUR_MULT__SHIFT 0xc
#define SPI_ARB_PRIORITY__TS1_DUR_MULT_MASK 0xc000
#define SPI_ARB_PRIORITY__TS1_DUR_MULT__SHIFT 0xe
#define SPI_ARB_PRIORITY__TS2_DUR_MULT_MASK 0x30000
#define SPI_ARB_PRIORITY__TS2_DUR_MULT__SHIFT 0x10
#define SPI_ARB_PRIORITY__TS3_DUR_MULT_MASK 0xc0000
#define SPI_ARB_PRIORITY__TS3_DUR_MULT__SHIFT 0x12
#define SPI_ARB_CYCLES_0__TS0_DURATION_MASK 0xffff
#define SPI_ARB_CYCLES_0__TS0_DURATION__SHIFT 0x0
#define SPI_ARB_CYCLES_0__TS1_DURATION_MASK 0xffff0000
#define SPI_ARB_CYCLES_0__TS1_DURATION__SHIFT 0x10
#define SPI_ARB_CYCLES_1__TS2_DURATION_MASK 0xffff
#define SPI_ARB_CYCLES_1__TS2_DURATION__SHIFT 0x0
#define SPI_ARB_CYCLES_1__TS3_DURATION_MASK 0xffff0000
#define SPI_ARB_CYCLES_1__TS3_DURATION__SHIFT 0x10
#define SPI_CDBG_SYS_GFX__PS_EN_MASK 0x1
#define SPI_CDBG_SYS_GFX__PS_EN__SHIFT 0x0
#define SPI_CDBG_SYS_GFX__VS_EN_MASK 0x2
#define SPI_CDBG_SYS_GFX__VS_EN__SHIFT 0x1
#define SPI_CDBG_SYS_GFX__GS_EN_MASK 0x4
#define SPI_CDBG_SYS_GFX__GS_EN__SHIFT 0x2
#define SPI_CDBG_SYS_GFX__ES_EN_MASK 0x8
#define SPI_CDBG_SYS_GFX__ES_EN__SHIFT 0x3
#define SPI_CDBG_SYS_GFX__HS_EN_MASK 0x10
#define SPI_CDBG_SYS_GFX__HS_EN__SHIFT 0x4
#define SPI_CDBG_SYS_GFX__LS_EN_MASK 0x20
#define SPI_CDBG_SYS_GFX__LS_EN__SHIFT 0x5
#define SPI_CDBG_SYS_GFX__CS_EN_MASK 0x40
#define SPI_CDBG_SYS_GFX__CS_EN__SHIFT 0x6
#define SPI_CDBG_SYS_HP3D__PS_EN_MASK 0x1
#define SPI_CDBG_SYS_HP3D__PS_EN__SHIFT 0x0
#define SPI_CDBG_SYS_HP3D__VS_EN_MASK 0x2
#define SPI_CDBG_SYS_HP3D__VS_EN__SHIFT 0x1
#define SPI_CDBG_SYS_HP3D__GS_EN_MASK 0x4
#define SPI_CDBG_SYS_HP3D__GS_EN__SHIFT 0x2
#define SPI_CDBG_SYS_HP3D__ES_EN_MASK 0x8
#define SPI_CDBG_SYS_HP3D__ES_EN__SHIFT 0x3
#define SPI_CDBG_SYS_HP3D__HS_EN_MASK 0x10
#define SPI_CDBG_SYS_HP3D__HS_EN__SHIFT 0x4
#define SPI_CDBG_SYS_HP3D__LS_EN_MASK 0x20
#define SPI_CDBG_SYS_HP3D__LS_EN__SHIFT 0x5
#define SPI_CDBG_SYS_CS0__PIPE0_MASK 0xff
#define SPI_CDBG_SYS_CS0__PIPE0__SHIFT 0x0
#define SPI_CDBG_SYS_CS0__PIPE1_MASK 0xff00
#define SPI_CDBG_SYS_CS0__PIPE1__SHIFT 0x8
#define SPI_CDBG_SYS_CS0__PIPE2_MASK 0xff0000
#define SPI_CDBG_SYS_CS0__PIPE2__SHIFT 0x10
#define SPI_CDBG_SYS_CS0__PIPE3_MASK 0xff000000
#define SPI_CDBG_SYS_CS0__PIPE3__SHIFT 0x18
#define SPI_CDBG_SYS_CS1__PIPE0_MASK 0xff
#define SPI_CDBG_SYS_CS1__PIPE0__SHIFT 0x0
#define SPI_CDBG_SYS_CS1__PIPE1_MASK 0xff00
#define SPI_CDBG_SYS_CS1__PIPE1__SHIFT 0x8
#define SPI_CDBG_SYS_CS1__PIPE2_MASK 0xff0000
#define SPI_CDBG_SYS_CS1__PIPE2__SHIFT 0x10
#define SPI_CDBG_SYS_CS1__PIPE3_MASK 0xff000000
#define SPI_CDBG_SYS_CS1__PIPE3__SHIFT 0x18
#define SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_GFX__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_HP3D__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_HP3D__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS0__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS0__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS1__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS1__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS2__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS2__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS3__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS3__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS4__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS4__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS5__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS5__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS6__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS6__VALUE__SHIFT 0x0
#define SPI_WCL_PIPE_PERCENT_CS7__VALUE_MASK 0x1f
#define SPI_WCL_PIPE_PERCENT_CS7__VALUE__SHIFT 0x0
#define SPI_GDBG_WAVE_CNTL__STALL_RA_MASK 0x1
#define SPI_GDBG_WAVE_CNTL__STALL_RA__SHIFT 0x0
#define SPI_GDBG_TRAP_CONFIG__ME_SEL_MASK 0x3
#define SPI_GDBG_TRAP_CONFIG__ME_SEL__SHIFT 0x0
#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL_MASK 0xc
#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL__SHIFT 0x2
#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL_MASK 0x70
#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL__SHIFT 0x4
#define SPI_GDBG_TRAP_CONFIG__ME_MATCH_MASK 0x80
#define SPI_GDBG_TRAP_CONFIG__ME_MATCH__SHIFT 0x7
#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH_MASK 0x100
#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH__SHIFT 0x8
#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH_MASK 0x200
#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH__SHIFT 0x9
#define SPI_GDBG_TRAP_CONFIG__TRAP_EN_MASK 0x8000
#define SPI_GDBG_TRAP_CONFIG__TRAP_EN__SHIFT 0xf
#define SPI_GDBG_TRAP_CONFIG__VMID_SEL_MASK 0xffff0000
#define SPI_GDBG_TRAP_CONFIG__VMID_SEL__SHIFT 0x10
#define SPI_GDBG_TRAP_MASK__EXCP_EN_MASK 0x1ff
#define SPI_GDBG_TRAP_MASK__EXCP_EN__SHIFT 0x0
#define SPI_GDBG_TRAP_MASK__REPLACE_MASK 0x200
#define SPI_GDBG_TRAP_MASK__REPLACE__SHIFT 0x9
#define SPI_GDBG_TBA_LO__MEM_BASE_MASK 0xffffffff
#define SPI_GDBG_TBA_LO__MEM_BASE__SHIFT 0x0
#define SPI_GDBG_TBA_HI__MEM_BASE_MASK 0xff
#define SPI_GDBG_TBA_HI__MEM_BASE__SHIFT 0x0
#define SPI_GDBG_TMA_LO__MEM_BASE_MASK 0xffffffff
#define SPI_GDBG_TMA_LO__MEM_BASE__SHIFT 0x0
#define SPI_GDBG_TMA_HI__MEM_BASE_MASK 0xff
#define SPI_GDBG_TMA_HI__MEM_BASE__SHIFT 0x0
#define SPI_GDBG_TRAP_DATA0__DATA_MASK 0xffffffff
#define SPI_GDBG_TRAP_DATA0__DATA__SHIFT 0x0
#define SPI_GDBG_TRAP_DATA1__DATA_MASK 0xffffffff
#define SPI_GDBG_TRAP_DATA1__DATA__SHIFT 0x0
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_MASK 0x1
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET__SHIFT 0x0
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_PER_VMID_MASK 0x2
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_PER_VMID__SHIFT 0x1
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_ALL_VMID_MASK 0x4
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_ALL_VMID__SHIFT 0x2
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_RESOURCE_MASK 0x8
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_RESOURCE__SHIFT 0x3
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_PRIORITY_MASK 0x10
#define SPI_RESET_DEBUG__DISABLE_GFX_RESET_PRIORITY__SHIFT 0x4
#define SPI_COMPUTE_QUEUE_RESET__RESET_MASK 0x1
#define SPI_COMPUTE_QUEUE_RESET__RESET__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_0__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_0__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_0__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_0__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_0__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_0__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_0__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_0__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_0__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_0__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_1__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_1__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_1__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_1__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_1__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_1__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_1__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_1__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_1__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_1__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_2__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_2__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_2__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_2__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_2__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_2__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_2__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_2__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_2__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_2__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_3__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_3__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_3__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_3__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_3__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_3__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_3__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_3__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_3__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_3__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_4__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_4__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_4__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_4__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_4__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_4__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_4__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_4__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_4__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_4__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_5__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_5__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_5__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_5__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_5__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_5__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_5__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_5__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_5__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_5__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_6__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_6__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_6__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_6__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_6__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_6__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_6__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_6__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_6__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_6__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_7__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_7__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_7__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_7__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_7__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_7__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_7__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_7__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_7__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_7__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_8__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_8__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_8__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_8__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_8__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_8__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_8__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_8__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_8__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_8__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_9__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_9__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_9__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_9__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_9__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_9__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_9__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_9__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_9__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_9__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_10__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_10__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_10__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_10__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_10__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_10__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_10__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_10__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_10__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_10__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_CU_11__VGPR_MASK 0xf
#define SPI_RESOURCE_RESERVE_CU_11__VGPR__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_CU_11__SGPR_MASK 0xf0
#define SPI_RESOURCE_RESERVE_CU_11__SGPR__SHIFT 0x4
#define SPI_RESOURCE_RESERVE_CU_11__LDS_MASK 0xf00
#define SPI_RESOURCE_RESERVE_CU_11__LDS__SHIFT 0x8
#define SPI_RESOURCE_RESERVE_CU_11__WAVES_MASK 0x7000
#define SPI_RESOURCE_RESERVE_CU_11__WAVES__SHIFT 0xc
#define SPI_RESOURCE_RESERVE_CU_11__BARRIERS_MASK 0x78000
#define SPI_RESOURCE_RESERVE_CU_11__BARRIERS__SHIFT 0xf
#define SPI_RESOURCE_RESERVE_EN_CU_0__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_0__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_0__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_0__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_0__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_0__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_0__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_0__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_1__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_1__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_1__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_1__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_1__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_1__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_1__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_1__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_2__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_2__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_2__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_2__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_2__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_2__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_2__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_2__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_3__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_3__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_3__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_3__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_3__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_3__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_3__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_3__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_4__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_4__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_4__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_4__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_4__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_4__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_4__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_4__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_5__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_5__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_5__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_5__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_5__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_5__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_5__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_5__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_6__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_6__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_6__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_6__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_6__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_6__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_6__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_6__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_7__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_7__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_7__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_7__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_7__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_7__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_7__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_7__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_8__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_8__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_8__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_8__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_8__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_8__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_8__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_8__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_9__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_9__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_9__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_9__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_9__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_9__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_9__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_9__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_10__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_10__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_10__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_10__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_10__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_10__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_10__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_10__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_RESOURCE_RESERVE_EN_CU_11__EN_MASK 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_11__EN__SHIFT 0x0
#define SPI_RESOURCE_RESERVE_EN_CU_11__TYPE_MASK_MASK 0xfffe
#define SPI_RESOURCE_RESERVE_EN_CU_11__TYPE_MASK__SHIFT 0x1
#define SPI_RESOURCE_RESERVE_EN_CU_11__QUEUE_MASK_MASK 0xff0000
#define SPI_RESOURCE_RESERVE_EN_CU_11__QUEUE_MASK__SHIFT 0x10
#define SPI_RESOURCE_RESERVE_EN_CU_11__RESERVE_SPACE_ONLY_MASK 0x1000000
#define SPI_RESOURCE_RESERVE_EN_CU_11__RESERVE_SPACE_ONLY__SHIFT 0x18
#define SPI_PS_MAX_WAVE_ID__MAX_WAVE_ID_MASK 0xfff
#define SPI_PS_MAX_WAVE_ID__MAX_WAVE_ID__SHIFT 0x0
#define SPI_CONFIG_CNTL__GPR_WRITE_PRIORITY_MASK 0x1fffff
#define SPI_CONFIG_CNTL__GPR_WRITE_PRIORITY__SHIFT 0x0
#define SPI_CONFIG_CNTL__EXP_PRIORITY_ORDER_MASK 0xe00000
#define SPI_CONFIG_CNTL__EXP_PRIORITY_ORDER__SHIFT 0x15
#define SPI_CONFIG_CNTL__ENABLE_SQG_TOP_EVENTS_MASK 0x1000000
#define SPI_CONFIG_CNTL__ENABLE_SQG_TOP_EVENTS__SHIFT 0x18
#define SPI_CONFIG_CNTL__ENABLE_SQG_BOP_EVENTS_MASK 0x2000000
#define SPI_CONFIG_CNTL__ENABLE_SQG_BOP_EVENTS__SHIFT 0x19
#define SPI_CONFIG_CNTL__RSRC_MGMT_RESET_MASK 0x4000000
#define SPI_CONFIG_CNTL__RSRC_MGMT_RESET__SHIFT 0x1a
#define SPI_CONFIG_CNTL__TTRACE_STALL_ALL_MASK 0x8000000
#define SPI_CONFIG_CNTL__TTRACE_STALL_ALL__SHIFT 0x1b
#define SPI_DEBUG_CNTL__DEBUG_GRBM_OVERRIDE_MASK 0x1
#define SPI_DEBUG_CNTL__DEBUG_GRBM_OVERRIDE__SHIFT 0x0
#define SPI_DEBUG_CNTL__DEBUG_THREAD_TYPE_SEL_MASK 0xe
#define SPI_DEBUG_CNTL__DEBUG_THREAD_TYPE_SEL__SHIFT 0x1
#define SPI_DEBUG_CNTL__DEBUG_GROUP_SEL_MASK 0x3f0
#define SPI_DEBUG_CNTL__DEBUG_GROUP_SEL__SHIFT 0x4
#define SPI_DEBUG_CNTL__DEBUG_SIMD_SEL_MASK 0xfc00
#define SPI_DEBUG_CNTL__DEBUG_SIMD_SEL__SHIFT 0xa
#define SPI_DEBUG_CNTL__DEBUG_SH_SEL_MASK 0x10000
#define SPI_DEBUG_CNTL__DEBUG_SH_SEL__SHIFT 0x10
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_0_MASK 0x20000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_0__SHIFT 0x11
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_1_MASK 0x40000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_1__SHIFT 0x12
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_2_MASK 0x80000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_2__SHIFT 0x13
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_3_MASK 0x100000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_3__SHIFT 0x14
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_4_MASK 0x200000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_4__SHIFT 0x15
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_5_MASK 0x400000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_5__SHIFT 0x16
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_6_MASK 0x800000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_6__SHIFT 0x17
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_7_MASK 0x1000000
#define SPI_DEBUG_CNTL__SPI_ECO_SPARE_7__SHIFT 0x18
#define SPI_DEBUG_CNTL__DEBUG_PIPE_SEL_MASK 0xe000000
#define SPI_DEBUG_CNTL__DEBUG_PIPE_SEL__SHIFT 0x19
#define SPI_DEBUG_CNTL__DEBUG_REG_EN_MASK 0x80000000
#define SPI_DEBUG_CNTL__DEBUG_REG_EN__SHIFT 0x1f
#define SPI_DEBUG_READ__DATA_MASK 0xffffff
#define SPI_DEBUG_READ__DATA__SHIFT 0x0
#define SPI_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define SPI_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define SPI_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define SPI_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define SPI_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define SPI_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define SPI_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define SPI_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define SPI_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0xffc00
#define SPI_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define SPI_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define SPI_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define SPI_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define SPI_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define SPI_PERFCOUNTER2_SELECT__PERF_SEL1_MASK 0xffc00
#define SPI_PERFCOUNTER2_SELECT__PERF_SEL1__SHIFT 0xa
#define SPI_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define SPI_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define SPI_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define SPI_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define SPI_PERFCOUNTER3_SELECT__PERF_SEL1_MASK 0xffc00
#define SPI_PERFCOUNTER3_SELECT__PERF_SEL1__SHIFT 0xa
#define SPI_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define SPI_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define SPI_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define SPI_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define SPI_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define SPI_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define SPI_PERFCOUNTER1_SELECT1__PERF_SEL2_MASK 0x3ff
#define SPI_PERFCOUNTER1_SELECT1__PERF_SEL2__SHIFT 0x0
#define SPI_PERFCOUNTER1_SELECT1__PERF_SEL3_MASK 0xffc00
#define SPI_PERFCOUNTER1_SELECT1__PERF_SEL3__SHIFT 0xa
#define SPI_PERFCOUNTER2_SELECT1__PERF_SEL2_MASK 0x3ff
#define SPI_PERFCOUNTER2_SELECT1__PERF_SEL2__SHIFT 0x0
#define SPI_PERFCOUNTER2_SELECT1__PERF_SEL3_MASK 0xffc00
#define SPI_PERFCOUNTER2_SELECT1__PERF_SEL3__SHIFT 0xa
#define SPI_PERFCOUNTER3_SELECT1__PERF_SEL2_MASK 0x3ff
#define SPI_PERFCOUNTER3_SELECT1__PERF_SEL2__SHIFT 0x0
#define SPI_PERFCOUNTER3_SELECT1__PERF_SEL3_MASK 0xffc00
#define SPI_PERFCOUNTER3_SELECT1__PERF_SEL3__SHIFT 0xa
#define SPI_PERFCOUNTER4_SELECT__PERF_SEL_MASK 0xff
#define SPI_PERFCOUNTER4_SELECT__PERF_SEL__SHIFT 0x0
#define SPI_PERFCOUNTER5_SELECT__PERF_SEL_MASK 0xff
#define SPI_PERFCOUNTER5_SELECT__PERF_SEL__SHIFT 0x0
#define SPI_PERFCOUNTER_BINS__BIN0_MIN_MASK 0xf
#define SPI_PERFCOUNTER_BINS__BIN0_MIN__SHIFT 0x0
#define SPI_PERFCOUNTER_BINS__BIN0_MAX_MASK 0xf0
#define SPI_PERFCOUNTER_BINS__BIN0_MAX__SHIFT 0x4
#define SPI_PERFCOUNTER_BINS__BIN1_MIN_MASK 0xf00
#define SPI_PERFCOUNTER_BINS__BIN1_MIN__SHIFT 0x8
#define SPI_PERFCOUNTER_BINS__BIN1_MAX_MASK 0xf000
#define SPI_PERFCOUNTER_BINS__BIN1_MAX__SHIFT 0xc
#define SPI_PERFCOUNTER_BINS__BIN2_MIN_MASK 0xf0000
#define SPI_PERFCOUNTER_BINS__BIN2_MIN__SHIFT 0x10
#define SPI_PERFCOUNTER_BINS__BIN2_MAX_MASK 0xf00000
#define SPI_PERFCOUNTER_BINS__BIN2_MAX__SHIFT 0x14
#define SPI_PERFCOUNTER_BINS__BIN3_MIN_MASK 0xf000000
#define SPI_PERFCOUNTER_BINS__BIN3_MIN__SHIFT 0x18
#define SPI_PERFCOUNTER_BINS__BIN3_MAX_MASK 0xf0000000
#define SPI_PERFCOUNTER_BINS__BIN3_MAX__SHIFT 0x1c
#define SPI_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SPI_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SPI_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SPI_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SPI_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SPI_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SPI_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SPI_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SPI_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SPI_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SPI_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SPI_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SPI_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SPI_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SPI_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SPI_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SPI_PERFCOUNTER4_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SPI_PERFCOUNTER4_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SPI_PERFCOUNTER4_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SPI_PERFCOUNTER4_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SPI_PERFCOUNTER5_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SPI_PERFCOUNTER5_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SPI_PERFCOUNTER5_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SPI_PERFCOUNTER5_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_MASK 0xf
#define SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT 0x0
#define SPI_CONFIG_CNTL_1__INTERP_ONE_PRIM_PER_ROW_MASK 0x10
#define SPI_CONFIG_CNTL_1__INTERP_ONE_PRIM_PER_ROW__SHIFT 0x4
#define SPI_CONFIG_CNTL_1__PC_LIMIT_ENABLE_MASK 0x40
#define SPI_CONFIG_CNTL_1__PC_LIMIT_ENABLE__SHIFT 0x6
#define SPI_CONFIG_CNTL_1__PC_LIMIT_STRICT_MASK 0x80
#define SPI_CONFIG_CNTL_1__PC_LIMIT_STRICT__SHIFT 0x7
#define SPI_CONFIG_CNTL_1__CRC_SIMD_ID_WADDR_DISABLE_MASK 0x100
#define SPI_CONFIG_CNTL_1__CRC_SIMD_ID_WADDR_DISABLE__SHIFT 0x8
#define SPI_CONFIG_CNTL_1__LBPW_CU_CHK_MODE_MASK 0x200
#define SPI_CONFIG_CNTL_1__LBPW_CU_CHK_MODE__SHIFT 0x9
#define SPI_CONFIG_CNTL_1__LBPW_CU_CHK_CNT_MASK 0x3c00
#define SPI_CONFIG_CNTL_1__LBPW_CU_CHK_CNT__SHIFT 0xa
#define SPI_CONFIG_CNTL_1__PC_LIMIT_SIZE_MASK 0xffff0000
#define SPI_CONFIG_CNTL_1__PC_LIMIT_SIZE__SHIFT 0x10
#define SPI_DEBUG_BUSY__LS_BUSY_MASK 0x1
#define SPI_DEBUG_BUSY__LS_BUSY__SHIFT 0x0
#define SPI_DEBUG_BUSY__HS_BUSY_MASK 0x2
#define SPI_DEBUG_BUSY__HS_BUSY__SHIFT 0x1
#define SPI_DEBUG_BUSY__ES_BUSY_MASK 0x4
#define SPI_DEBUG_BUSY__ES_BUSY__SHIFT 0x2
#define SPI_DEBUG_BUSY__GS_BUSY_MASK 0x8
#define SPI_DEBUG_BUSY__GS_BUSY__SHIFT 0x3
#define SPI_DEBUG_BUSY__VS_BUSY_MASK 0x10
#define SPI_DEBUG_BUSY__VS_BUSY__SHIFT 0x4
#define SPI_DEBUG_BUSY__PS0_BUSY_MASK 0x20
#define SPI_DEBUG_BUSY__PS0_BUSY__SHIFT 0x5
#define SPI_DEBUG_BUSY__PS1_BUSY_MASK 0x40
#define SPI_DEBUG_BUSY__PS1_BUSY__SHIFT 0x6
#define SPI_DEBUG_BUSY__CSG_BUSY_MASK 0x80
#define SPI_DEBUG_BUSY__CSG_BUSY__SHIFT 0x7
#define SPI_DEBUG_BUSY__CS0_BUSY_MASK 0x100
#define SPI_DEBUG_BUSY__CS0_BUSY__SHIFT 0x8
#define SPI_DEBUG_BUSY__CS1_BUSY_MASK 0x200
#define SPI_DEBUG_BUSY__CS1_BUSY__SHIFT 0x9
#define SPI_DEBUG_BUSY__CS2_BUSY_MASK 0x400
#define SPI_DEBUG_BUSY__CS2_BUSY__SHIFT 0xa
#define SPI_DEBUG_BUSY__CS3_BUSY_MASK 0x800
#define SPI_DEBUG_BUSY__CS3_BUSY__SHIFT 0xb
#define SPI_DEBUG_BUSY__CS4_BUSY_MASK 0x1000
#define SPI_DEBUG_BUSY__CS4_BUSY__SHIFT 0xc
#define SPI_DEBUG_BUSY__CS5_BUSY_MASK 0x2000
#define SPI_DEBUG_BUSY__CS5_BUSY__SHIFT 0xd
#define SPI_DEBUG_BUSY__CS6_BUSY_MASK 0x4000
#define SPI_DEBUG_BUSY__CS6_BUSY__SHIFT 0xe
#define SPI_DEBUG_BUSY__CS7_BUSY_MASK 0x8000
#define SPI_DEBUG_BUSY__CS7_BUSY__SHIFT 0xf
#define SPI_DEBUG_BUSY__LDS_WR_CTL0_BUSY_MASK 0x10000
#define SPI_DEBUG_BUSY__LDS_WR_CTL0_BUSY__SHIFT 0x10
#define SPI_DEBUG_BUSY__LDS_WR_CTL1_BUSY_MASK 0x20000
#define SPI_DEBUG_BUSY__LDS_WR_CTL1_BUSY__SHIFT 0x11
#define SPI_DEBUG_BUSY__RSRC_ALLOC0_BUSY_MASK 0x40000
#define SPI_DEBUG_BUSY__RSRC_ALLOC0_BUSY__SHIFT 0x12
#define SPI_DEBUG_BUSY__RSRC_ALLOC1_BUSY_MASK 0x80000
#define SPI_DEBUG_BUSY__RSRC_ALLOC1_BUSY__SHIFT 0x13
#define SPI_DEBUG_BUSY__PC_DEALLOC_BUSY_MASK 0x100000
#define SPI_DEBUG_BUSY__PC_DEALLOC_BUSY__SHIFT 0x14
#define SPI_DEBUG_BUSY__EVENT_CLCTR_BUSY_MASK 0x200000
#define SPI_DEBUG_BUSY__EVENT_CLCTR_BUSY__SHIFT 0x15
#define SPI_DEBUG_BUSY__GRBM_BUSY_MASK 0x400000
#define SPI_DEBUG_BUSY__GRBM_BUSY__SHIFT 0x16
#define SPI_DEBUG_BUSY__SPIS_BUSY_MASK 0x800000
#define SPI_DEBUG_BUSY__SPIS_BUSY__SHIFT 0x17
#define CGTS_SM_CTRL_REG__ON_SEQ_DELAY_MASK 0xf
#define CGTS_SM_CTRL_REG__ON_SEQ_DELAY__SHIFT 0x0
#define CGTS_SM_CTRL_REG__OFF_SEQ_DELAY_MASK 0xff0
#define CGTS_SM_CTRL_REG__OFF_SEQ_DELAY__SHIFT 0x4
#define CGTS_SM_CTRL_REG__MGCG_ENABLED_MASK 0x1000
#define CGTS_SM_CTRL_REG__MGCG_ENABLED__SHIFT 0xc
#define CGTS_SM_CTRL_REG__BASE_MODE_MASK 0x10000
#define CGTS_SM_CTRL_REG__BASE_MODE__SHIFT 0x10
#define CGTS_SM_CTRL_REG__SM_MODE_MASK 0xe0000
#define CGTS_SM_CTRL_REG__SM_MODE__SHIFT 0x11
#define CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK 0x100000
#define CGTS_SM_CTRL_REG__SM_MODE_ENABLE__SHIFT 0x14
#define CGTS_SM_CTRL_REG__OVERRIDE_MASK 0x200000
#define CGTS_SM_CTRL_REG__OVERRIDE__SHIFT 0x15
#define CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK 0x400000
#define CGTS_SM_CTRL_REG__LS_OVERRIDE__SHIFT 0x16
#define CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK 0x800000
#define CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN__SHIFT 0x17
#define CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK 0xff000000
#define CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT 0x18
#define CGTS_RD_CTRL_REG__ROW_MUX_SEL_MASK 0x1f
#define CGTS_RD_CTRL_REG__ROW_MUX_SEL__SHIFT 0x0
#define CGTS_RD_CTRL_REG__REG_MUX_SEL_MASK 0x1f00
#define CGTS_RD_CTRL_REG__REG_MUX_SEL__SHIFT 0x8
#define CGTS_RD_REG__READ_DATA_MASK 0x3fff
#define CGTS_RD_REG__READ_DATA__SHIFT 0x0
#define CGTS_TCC_DISABLE__TCC_DISABLE_MASK 0xffff0000
#define CGTS_TCC_DISABLE__TCC_DISABLE__SHIFT 0x10
#define CGTS_USER_TCC_DISABLE__TCC_DISABLE_MASK 0xffff0000
#define CGTS_USER_TCC_DISABLE__TCC_DISABLE__SHIFT 0x10
#define CGTS_CU0_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU0_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU0_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU0_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU0_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU0_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU0_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU0_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU0_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU0_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU0_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU0_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU0_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU0_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU0_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU0_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU0_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU0_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU0_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU0_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU0_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU0_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU0_TA_SQC_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU0_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_MASK 0x7f0000
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC__SHIFT 0x10
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_OVERRIDE_MASK 0x800000
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_OVERRIDE__SHIFT 0x17
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU0_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU0_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU0_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU0_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU0_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU0_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU0_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU0_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU0_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU0_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU0_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU0_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU0_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU0_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU0_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU0_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU0_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU0_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU0_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU0_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU0_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU0_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU0_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU0_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU1_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU1_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU1_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU1_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU1_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU1_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU1_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU1_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU1_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU1_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU1_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU1_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU1_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU1_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU1_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU1_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU1_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU1_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU1_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU1_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU1_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU1_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU1_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU1_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU1_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU1_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU1_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU1_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU1_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU1_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU1_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU1_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU1_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU1_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU1_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU1_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU1_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU1_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU1_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU1_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU1_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU1_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU1_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU1_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU1_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU1_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU1_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU1_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU1_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU1_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU1_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU1_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU1_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU1_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU1_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU2_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU2_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU2_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU2_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU2_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU2_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU2_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU2_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU2_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU2_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU2_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU2_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU2_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU2_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU2_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU2_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU2_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU2_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU2_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU2_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU2_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU2_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU2_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU2_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU2_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU2_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU2_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU2_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU2_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU2_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU2_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU2_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU2_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU2_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU2_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU2_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU2_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU2_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU2_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU2_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU2_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU2_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU2_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU2_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU2_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU2_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU2_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU2_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU2_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU2_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU2_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU2_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU2_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU2_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU2_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU3_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU3_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU3_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU3_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU3_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU3_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU3_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU3_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU3_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU3_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU3_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU3_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU3_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU3_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU3_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU3_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU3_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU3_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU3_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU3_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU3_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU3_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU3_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU3_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU3_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU3_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU3_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU3_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU3_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU3_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU3_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU3_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU3_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU3_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU3_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU3_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU3_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU3_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU3_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU3_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU3_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU3_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU3_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU3_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU3_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU3_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU3_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU3_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU3_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU3_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU3_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU3_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU3_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU3_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU3_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU4_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU4_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU4_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU4_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU4_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU4_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU4_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU4_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU4_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU4_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU4_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU4_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU4_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU4_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU4_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU4_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU4_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU4_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU4_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU4_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU4_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU4_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU4_TA_SQC_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU4_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_MASK 0x7f0000
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC__SHIFT 0x10
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_OVERRIDE_MASK 0x800000
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_OVERRIDE__SHIFT 0x17
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU4_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU4_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU4_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU4_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU4_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU4_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU4_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU4_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU4_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU4_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU4_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU4_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU4_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU4_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU4_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU4_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU4_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU4_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU4_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU4_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU4_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU4_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU4_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU4_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU5_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU5_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU5_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU5_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU5_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU5_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU5_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU5_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU5_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU5_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU5_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU5_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU5_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU5_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU5_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU5_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU5_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU5_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU5_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU5_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU5_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU5_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU5_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU5_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU5_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU5_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU5_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU5_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU5_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU5_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU5_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU5_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU5_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU5_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU5_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU5_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU5_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU5_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU5_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU5_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU5_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU5_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU5_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU5_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU5_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU5_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU5_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU5_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU5_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU5_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU5_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU5_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU5_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU5_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU5_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU6_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU6_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU6_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU6_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU6_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU6_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU6_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU6_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU6_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU6_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU6_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU6_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU6_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU6_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU6_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU6_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU6_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU6_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU6_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU6_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU6_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU6_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU6_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU6_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU6_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU6_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU6_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU6_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU6_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU6_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU6_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU6_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU6_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU6_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU6_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU6_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU6_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU6_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU6_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU6_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU6_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU6_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU6_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU6_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU6_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU6_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU6_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU6_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU6_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU6_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU6_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU6_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU6_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU6_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU6_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU7_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU7_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU7_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU7_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU7_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU7_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU7_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU7_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU7_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU7_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU7_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU7_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU7_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU7_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU7_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU7_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU7_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU7_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU7_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU7_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU7_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU7_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU7_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU7_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU7_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU7_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU7_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU7_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU7_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU7_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU7_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU7_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU7_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU7_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU7_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU7_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU7_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU7_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU7_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU7_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU7_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU7_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU7_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU7_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU7_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU7_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU7_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU7_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU7_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU7_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU7_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU7_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU7_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU7_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU7_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU8_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU8_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU8_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU8_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU8_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU8_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU8_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU8_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU8_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU8_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU8_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU8_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU8_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU8_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU8_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU8_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU8_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU8_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU8_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU8_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU8_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU8_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU8_TA_SQC_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU8_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_MASK 0x7f0000
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC__SHIFT 0x10
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_OVERRIDE_MASK 0x800000
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_OVERRIDE__SHIFT 0x17
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU8_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU8_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU8_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU8_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU8_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU8_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU8_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU8_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU8_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU8_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU8_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU8_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU8_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU8_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU8_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU8_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU8_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU8_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU8_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU8_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU8_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU8_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU8_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU8_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU9_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU9_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU9_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU9_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU9_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU9_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU9_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU9_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU9_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU9_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU9_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU9_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU9_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU9_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU9_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU9_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU9_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU9_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU9_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU9_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU9_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU9_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU9_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU9_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU9_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU9_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU9_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU9_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU9_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU9_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU9_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU9_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU9_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU9_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU9_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU9_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU9_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU9_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU9_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU9_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU9_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU9_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU9_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU9_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU9_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU9_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU9_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU9_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU9_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU9_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU9_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU9_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU9_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU9_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU9_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU10_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU10_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU10_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU10_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU10_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU10_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU10_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU10_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU10_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU10_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU10_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU10_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU10_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU10_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU10_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU10_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU10_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU10_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU10_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU10_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU10_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU10_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU10_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU10_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU10_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU10_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU10_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU10_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU10_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU10_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU10_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU10_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU10_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU10_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU10_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU10_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU10_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU10_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU10_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU10_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU10_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU10_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU10_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU10_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU10_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU10_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU10_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU10_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU10_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU10_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU10_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU10_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU10_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU10_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU10_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU11_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU11_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU11_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU11_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU11_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU11_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU11_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU11_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU11_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU11_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU11_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU11_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU11_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU11_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU11_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU11_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU11_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU11_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU11_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU11_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU11_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU11_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU11_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU11_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU11_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU11_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU11_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU11_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU11_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU11_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU11_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU11_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU11_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU11_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU11_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU11_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU11_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU11_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU11_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU11_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU11_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU11_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU11_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU11_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU11_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU11_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU11_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU11_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU11_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU11_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU11_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU11_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU11_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU11_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU11_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU12_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU12_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU12_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU12_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU12_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU12_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU12_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU12_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU12_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU12_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU12_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU12_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU12_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU12_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU12_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU12_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU12_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU12_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU12_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU12_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU12_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU12_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU12_TA_SQC_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU12_TA_SQC_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_MASK 0x7f0000
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC__SHIFT 0x10
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_OVERRIDE_MASK 0x800000
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_OVERRIDE__SHIFT 0x17
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU12_TA_SQC_CTRL_REG__SQC_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU12_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU12_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU12_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU12_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU12_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU12_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU12_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU12_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU12_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU12_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU12_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU12_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU12_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU12_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU12_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU12_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU12_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU12_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU12_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU12_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU12_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU12_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU12_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU13_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU13_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU13_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU13_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU13_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU13_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU13_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU13_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU13_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU13_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU13_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU13_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU13_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU13_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU13_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU13_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU13_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU13_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU13_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU13_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU13_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU13_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU13_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU13_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU13_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU13_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU13_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU13_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU13_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU13_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU13_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU13_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU13_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU13_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU13_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU13_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU13_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU13_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU13_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU13_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU13_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU13_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU13_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU13_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU13_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU13_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU13_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU13_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU13_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU13_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU13_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU13_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU13_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU13_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU13_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU14_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU14_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU14_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU14_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU14_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU14_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU14_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU14_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU14_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU14_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU14_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU14_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU14_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU14_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU14_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU14_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU14_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU14_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU14_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU14_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU14_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU14_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU14_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU14_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU14_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU14_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU14_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU14_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU14_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU14_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU14_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU14_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU14_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU14_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU14_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU14_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU14_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU14_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU14_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU14_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU14_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU14_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU14_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU14_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU14_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU14_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU14_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU14_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU14_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU14_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU14_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU14_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU14_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU14_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU14_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU15_SP0_CTRL_REG__SP00_MASK 0x7f
#define CGTS_CU15_SP0_CTRL_REG__SP00__SHIFT 0x0
#define CGTS_CU15_SP0_CTRL_REG__SP00_OVERRIDE_MASK 0x80
#define CGTS_CU15_SP0_CTRL_REG__SP00_OVERRIDE__SHIFT 0x7
#define CGTS_CU15_SP0_CTRL_REG__SP00_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU15_SP0_CTRL_REG__SP00_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU15_SP0_CTRL_REG__SP00_LS_OVERRIDE_MASK 0x400
#define CGTS_CU15_SP0_CTRL_REG__SP00_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU15_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU15_SP0_CTRL_REG__SP00_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU15_SP0_CTRL_REG__SP01_MASK 0x7f0000
#define CGTS_CU15_SP0_CTRL_REG__SP01__SHIFT 0x10
#define CGTS_CU15_SP0_CTRL_REG__SP01_OVERRIDE_MASK 0x800000
#define CGTS_CU15_SP0_CTRL_REG__SP01_OVERRIDE__SHIFT 0x17
#define CGTS_CU15_SP0_CTRL_REG__SP01_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU15_SP0_CTRL_REG__SP01_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU15_SP0_CTRL_REG__SP01_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU15_SP0_CTRL_REG__SP01_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU15_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU15_SP0_CTRL_REG__SP01_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_MASK 0x7f
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS__SHIFT 0x0
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_OVERRIDE_MASK 0x80
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_OVERRIDE__SHIFT 0x7
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE_MASK 0x400
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU15_LDS_SQ_CTRL_REG__LDS_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_MASK 0x7f0000
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ__SHIFT 0x10
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_OVERRIDE_MASK 0x800000
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_OVERRIDE__SHIFT 0x17
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU15_LDS_SQ_CTRL_REG__SQ_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU15_TA_CTRL_REG__TA_MASK 0x7f
#define CGTS_CU15_TA_CTRL_REG__TA__SHIFT 0x0
#define CGTS_CU15_TA_CTRL_REG__TA_OVERRIDE_MASK 0x80
#define CGTS_CU15_TA_CTRL_REG__TA_OVERRIDE__SHIFT 0x7
#define CGTS_CU15_TA_CTRL_REG__TA_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU15_TA_CTRL_REG__TA_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU15_TA_CTRL_REG__TA_LS_OVERRIDE_MASK 0x400
#define CGTS_CU15_TA_CTRL_REG__TA_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU15_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU15_TA_CTRL_REG__TA_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU15_SP1_CTRL_REG__SP10_MASK 0x7f
#define CGTS_CU15_SP1_CTRL_REG__SP10__SHIFT 0x0
#define CGTS_CU15_SP1_CTRL_REG__SP10_OVERRIDE_MASK 0x80
#define CGTS_CU15_SP1_CTRL_REG__SP10_OVERRIDE__SHIFT 0x7
#define CGTS_CU15_SP1_CTRL_REG__SP10_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU15_SP1_CTRL_REG__SP10_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU15_SP1_CTRL_REG__SP10_LS_OVERRIDE_MASK 0x400
#define CGTS_CU15_SP1_CTRL_REG__SP10_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU15_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU15_SP1_CTRL_REG__SP10_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU15_SP1_CTRL_REG__SP11_MASK 0x7f0000
#define CGTS_CU15_SP1_CTRL_REG__SP11__SHIFT 0x10
#define CGTS_CU15_SP1_CTRL_REG__SP11_OVERRIDE_MASK 0x800000
#define CGTS_CU15_SP1_CTRL_REG__SP11_OVERRIDE__SHIFT 0x17
#define CGTS_CU15_SP1_CTRL_REG__SP11_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU15_SP1_CTRL_REG__SP11_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU15_SP1_CTRL_REG__SP11_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU15_SP1_CTRL_REG__SP11_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU15_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU15_SP1_CTRL_REG__SP11_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_MASK 0x7f
#define CGTS_CU15_TD_TCP_CTRL_REG__TD__SHIFT 0x0
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_OVERRIDE_MASK 0x80
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_OVERRIDE__SHIFT 0x7
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE_MASK 0x300
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_BUSY_OVERRIDE__SHIFT 0x8
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_LS_OVERRIDE_MASK 0x400
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_LS_OVERRIDE__SHIFT 0xa
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE_MASK 0x800
#define CGTS_CU15_TD_TCP_CTRL_REG__TD_SIMDBUSY_OVERRIDE__SHIFT 0xb
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_MASK 0x7f0000
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP__SHIFT 0x10
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_OVERRIDE_MASK 0x800000
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_OVERRIDE__SHIFT 0x17
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE_MASK 0x3000000
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_BUSY_OVERRIDE__SHIFT 0x18
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE_MASK 0x4000000
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_LS_OVERRIDE__SHIFT 0x1a
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE_MASK 0x8000000
#define CGTS_CU15_TD_TCP_CTRL_REG__TCP_SIMDBUSY_OVERRIDE__SHIFT 0x1b
#define CGTT_SPI_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_SPI_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_SPI_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SPI_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SPI_CLK_CTRL__GRP5_CG_OFF_HYST_MASK 0xfc0000
#define CGTT_SPI_CLK_CTRL__GRP5_CG_OFF_HYST__SHIFT 0x12
#define CGTT_SPI_CLK_CTRL__GRP5_CG_OVERRIDE_MASK 0x1000000
#define CGTT_SPI_CLK_CTRL__GRP5_CG_OVERRIDE__SHIFT 0x18
#define CGTT_SPI_CLK_CTRL__ALL_CLK_ON_OVERRIDE_MASK 0x4000000
#define CGTT_SPI_CLK_CTRL__ALL_CLK_ON_OVERRIDE__SHIFT 0x1a
#define CGTT_SPI_CLK_CTRL__GRP3_OVERRIDE_MASK 0x8000000
#define CGTT_SPI_CLK_CTRL__GRP3_OVERRIDE__SHIFT 0x1b
#define CGTT_SPI_CLK_CTRL__GRP2_OVERRIDE_MASK 0x10000000
#define CGTT_SPI_CLK_CTRL__GRP2_OVERRIDE__SHIFT 0x1c
#define CGTT_SPI_CLK_CTRL__GRP1_OVERRIDE_MASK 0x20000000
#define CGTT_SPI_CLK_CTRL__GRP1_OVERRIDE__SHIFT 0x1d
#define CGTT_SPI_CLK_CTRL__GRP0_OVERRIDE_MASK 0x40000000
#define CGTT_SPI_CLK_CTRL__GRP0_OVERRIDE__SHIFT 0x1e
#define CGTT_SPI_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_SPI_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define CGTT_PC_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_PC_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_PC_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_PC_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_PC_CLK_CTRL__GRP5_CG_OFF_HYST_MASK 0xfc0000
#define CGTT_PC_CLK_CTRL__GRP5_CG_OFF_HYST__SHIFT 0x12
#define CGTT_PC_CLK_CTRL__GRP5_CG_OVERRIDE_MASK 0x1000000
#define CGTT_PC_CLK_CTRL__GRP5_CG_OVERRIDE__SHIFT 0x18
#define CGTT_PC_CLK_CTRL__BACK_CLK_ON_OVERRIDE_MASK 0x2000000
#define CGTT_PC_CLK_CTRL__BACK_CLK_ON_OVERRIDE__SHIFT 0x19
#define CGTT_PC_CLK_CTRL__FRONT_CLK_ON_OVERRIDE_MASK 0x4000000
#define CGTT_PC_CLK_CTRL__FRONT_CLK_ON_OVERRIDE__SHIFT 0x1a
#define CGTT_PC_CLK_CTRL__CORE3_OVERRIDE_MASK 0x8000000
#define CGTT_PC_CLK_CTRL__CORE3_OVERRIDE__SHIFT 0x1b
#define CGTT_PC_CLK_CTRL__CORE2_OVERRIDE_MASK 0x10000000
#define CGTT_PC_CLK_CTRL__CORE2_OVERRIDE__SHIFT 0x1c
#define CGTT_PC_CLK_CTRL__CORE1_OVERRIDE_MASK 0x20000000
#define CGTT_PC_CLK_CTRL__CORE1_OVERRIDE__SHIFT 0x1d
#define CGTT_PC_CLK_CTRL__CORE0_OVERRIDE_MASK 0x40000000
#define CGTT_PC_CLK_CTRL__CORE0_OVERRIDE__SHIFT 0x1e
#define CGTT_PC_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_PC_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define CGTT_BCI_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_BCI_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_BCI_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_BCI_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_BCI_CLK_CTRL__RESERVED_MASK 0xfff000
#define CGTT_BCI_CLK_CTRL__RESERVED__SHIFT 0xc
#define CGTT_BCI_CLK_CTRL__CORE6_OVERRIDE_MASK 0x1000000
#define CGTT_BCI_CLK_CTRL__CORE6_OVERRIDE__SHIFT 0x18
#define CGTT_BCI_CLK_CTRL__CORE5_OVERRIDE_MASK 0x2000000
#define CGTT_BCI_CLK_CTRL__CORE5_OVERRIDE__SHIFT 0x19
#define CGTT_BCI_CLK_CTRL__CORE4_OVERRIDE_MASK 0x4000000
#define CGTT_BCI_CLK_CTRL__CORE4_OVERRIDE__SHIFT 0x1a
#define CGTT_BCI_CLK_CTRL__CORE3_OVERRIDE_MASK 0x8000000
#define CGTT_BCI_CLK_CTRL__CORE3_OVERRIDE__SHIFT 0x1b
#define CGTT_BCI_CLK_CTRL__CORE2_OVERRIDE_MASK 0x10000000
#define CGTT_BCI_CLK_CTRL__CORE2_OVERRIDE__SHIFT 0x1c
#define CGTT_BCI_CLK_CTRL__CORE1_OVERRIDE_MASK 0x20000000
#define CGTT_BCI_CLK_CTRL__CORE1_OVERRIDE__SHIFT 0x1d
#define CGTT_BCI_CLK_CTRL__CORE0_OVERRIDE_MASK 0x40000000
#define CGTT_BCI_CLK_CTRL__CORE0_OVERRIDE__SHIFT 0x1e
#define CGTT_BCI_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_BCI_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define SPI_WF_LIFETIME_CNTL__SAMPLE_PERIOD_MASK 0xf
#define SPI_WF_LIFETIME_CNTL__SAMPLE_PERIOD__SHIFT 0x0
#define SPI_WF_LIFETIME_CNTL__EN_MASK 0x10
#define SPI_WF_LIFETIME_CNTL__EN__SHIFT 0x4
#define SPI_WF_LIFETIME_LIMIT_0__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_0__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_0__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_0__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_1__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_1__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_1__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_1__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_2__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_2__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_2__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_2__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_3__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_3__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_3__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_3__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_4__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_4__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_4__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_4__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_5__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_5__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_5__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_5__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_6__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_6__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_6__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_6__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_7__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_7__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_7__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_7__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_8__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_8__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_8__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_8__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_LIMIT_9__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_LIMIT_9__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_LIMIT_9__EN_WARN_MASK 0x80000000
#define SPI_WF_LIFETIME_LIMIT_9__EN_WARN__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_0__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_0__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_0__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_0__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_1__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_1__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_1__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_1__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_2__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_2__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_2__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_2__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_3__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_3__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_3__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_3__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_4__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_4__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_4__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_4__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_5__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_5__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_5__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_5__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_6__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_6__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_6__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_6__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_7__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_7__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_7__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_7__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_8__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_8__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_8__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_8__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_9__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_9__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_9__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_9__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_10__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_10__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_10__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_10__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_11__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_11__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_11__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_11__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_12__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_12__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_12__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_12__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_13__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_13__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_13__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_13__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_14__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_14__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_14__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_14__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_15__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_15__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_15__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_15__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_16__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_16__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_16__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_16__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_17__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_17__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_17__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_17__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_18__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_18__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_18__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_18__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_19__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_19__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_19__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_19__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_STATUS_20__MAX_CNT_MASK 0x7fffffff
#define SPI_WF_LIFETIME_STATUS_20__MAX_CNT__SHIFT 0x0
#define SPI_WF_LIFETIME_STATUS_20__INT_SENT_MASK 0x80000000
#define SPI_WF_LIFETIME_STATUS_20__INT_SENT__SHIFT 0x1f
#define SPI_WF_LIFETIME_DEBUG__START_VALUE_MASK 0x7fffffff
#define SPI_WF_LIFETIME_DEBUG__START_VALUE__SHIFT 0x0
#define SPI_WF_LIFETIME_DEBUG__OVERRIDE_EN_MASK 0x80000000
#define SPI_WF_LIFETIME_DEBUG__OVERRIDE_EN__SHIFT 0x1f
#define SPI_SLAVE_DEBUG_BUSY__LS_VTX_BUSY_MASK 0x1
#define SPI_SLAVE_DEBUG_BUSY__LS_VTX_BUSY__SHIFT 0x0
#define SPI_SLAVE_DEBUG_BUSY__HS_VTX_BUSY_MASK 0x2
#define SPI_SLAVE_DEBUG_BUSY__HS_VTX_BUSY__SHIFT 0x1
#define SPI_SLAVE_DEBUG_BUSY__ES_VTX_BUSY_MASK 0x4
#define SPI_SLAVE_DEBUG_BUSY__ES_VTX_BUSY__SHIFT 0x2
#define SPI_SLAVE_DEBUG_BUSY__GS_VTX_BUSY_MASK 0x8
#define SPI_SLAVE_DEBUG_BUSY__GS_VTX_BUSY__SHIFT 0x3
#define SPI_SLAVE_DEBUG_BUSY__VS_VTX_BUSY_MASK 0x10
#define SPI_SLAVE_DEBUG_BUSY__VS_VTX_BUSY__SHIFT 0x4
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC00_BUSY_MASK 0x20
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC00_BUSY__SHIFT 0x5
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC01_BUSY_MASK 0x40
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC01_BUSY__SHIFT 0x6
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC10_BUSY_MASK 0x80
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC10_BUSY__SHIFT 0x7
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC11_BUSY_MASK 0x100
#define SPI_SLAVE_DEBUG_BUSY__VGPR_WC11_BUSY__SHIFT 0x8
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC00_BUSY_MASK 0x200
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC00_BUSY__SHIFT 0x9
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC01_BUSY_MASK 0x400
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC01_BUSY__SHIFT 0xa
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC02_BUSY_MASK 0x800
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC02_BUSY__SHIFT 0xb
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC03_BUSY_MASK 0x1000
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC03_BUSY__SHIFT 0xc
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC10_BUSY_MASK 0x2000
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC10_BUSY__SHIFT 0xd
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC11_BUSY_MASK 0x4000
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC11_BUSY__SHIFT 0xe
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC12_BUSY_MASK 0x8000
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC12_BUSY__SHIFT 0xf
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC13_BUSY_MASK 0x10000
#define SPI_SLAVE_DEBUG_BUSY__SGPR_WC13_BUSY__SHIFT 0x10
#define SPI_SLAVE_DEBUG_BUSY__WAVEBUFFER0_BUSY_MASK 0x20000
#define SPI_SLAVE_DEBUG_BUSY__WAVEBUFFER0_BUSY__SHIFT 0x11
#define SPI_SLAVE_DEBUG_BUSY__WAVEBUFFER1_BUSY_MASK 0x40000
#define SPI_SLAVE_DEBUG_BUSY__WAVEBUFFER1_BUSY__SHIFT 0x12
#define SPI_SLAVE_DEBUG_BUSY__WAVE_WC0_BUSY_MASK 0x80000
#define SPI_SLAVE_DEBUG_BUSY__WAVE_WC0_BUSY__SHIFT 0x13
#define SPI_SLAVE_DEBUG_BUSY__WAVE_WC1_BUSY_MASK 0x100000
#define SPI_SLAVE_DEBUG_BUSY__WAVE_WC1_BUSY__SHIFT 0x14
#define SPI_SLAVE_DEBUG_BUSY__EVENT_CNTL_BUSY_MASK 0x200000
#define SPI_SLAVE_DEBUG_BUSY__EVENT_CNTL_BUSY__SHIFT 0x15
#define SPI_LB_CTR_CTRL__LOAD_MASK 0x1
#define SPI_LB_CTR_CTRL__LOAD__SHIFT 0x0
#define SPI_LB_CU_MASK__CU_MASK_MASK 0xffff
#define SPI_LB_CU_MASK__CU_MASK__SHIFT 0x0
#define SPI_LB_DATA_REG__CNT_DATA_MASK 0xffffffff
#define SPI_LB_DATA_REG__CNT_DATA__SHIFT 0x0
#define SPI_PG_ENABLE_STATIC_CU_MASK__CU_MASK_MASK 0xffff
#define SPI_PG_ENABLE_STATIC_CU_MASK__CU_MASK__SHIFT 0x0
#define SPI_GDS_CREDITS__DS_DATA_CREDITS_MASK 0xff
#define SPI_GDS_CREDITS__DS_DATA_CREDITS__SHIFT 0x0
#define SPI_GDS_CREDITS__DS_CMD_CREDITS_MASK 0xff00
#define SPI_GDS_CREDITS__DS_CMD_CREDITS__SHIFT 0x8
#define SPI_GDS_CREDITS__UNUSED_MASK 0xffff0000
#define SPI_GDS_CREDITS__UNUSED__SHIFT 0x10
#define SPI_SX_EXPORT_BUFFER_SIZES__COLOR_BUFFER_SIZE_MASK 0xffff
#define SPI_SX_EXPORT_BUFFER_SIZES__COLOR_BUFFER_SIZE__SHIFT 0x0
#define SPI_SX_EXPORT_BUFFER_SIZES__POSITION_BUFFER_SIZE_MASK 0xffff0000
#define SPI_SX_EXPORT_BUFFER_SIZES__POSITION_BUFFER_SIZE__SHIFT 0x10
#define SPI_SX_SCOREBOARD_BUFFER_SIZES__COLOR_SCOREBOARD_SIZE_MASK 0xffff
#define SPI_SX_SCOREBOARD_BUFFER_SIZES__COLOR_SCOREBOARD_SIZE__SHIFT 0x0
#define SPI_SX_SCOREBOARD_BUFFER_SIZES__POSITION_SCOREBOARD_SIZE_MASK 0xffff0000
#define SPI_SX_SCOREBOARD_BUFFER_SIZES__POSITION_SCOREBOARD_SIZE__SHIFT 0x10
#define SPI_CSQ_WF_ACTIVE_STATUS__ACTIVE_MASK 0xffffffff
#define SPI_CSQ_WF_ACTIVE_STATUS__ACTIVE__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_1__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_1__COUNT__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_2__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_2__COUNT__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_3__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_3__COUNT__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_4__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_4__COUNT__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_5__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_5__COUNT__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_6__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_6__COUNT__SHIFT 0x0
#define SPI_CSQ_WF_ACTIVE_COUNT_7__COUNT_MASK 0x7ff
#define SPI_CSQ_WF_ACTIVE_COUNT_7__COUNT__SHIFT 0x0
#define BCI_DEBUG_READ__DATA_MASK 0xffffff
#define BCI_DEBUG_READ__DATA__SHIFT 0x0
#define SPI_P0_TRAP_SCREEN_PSBA_LO__MEM_BASE_MASK 0xffffffff
#define SPI_P0_TRAP_SCREEN_PSBA_LO__MEM_BASE__SHIFT 0x0
#define SPI_P0_TRAP_SCREEN_PSBA_HI__MEM_BASE_MASK 0xff
#define SPI_P0_TRAP_SCREEN_PSBA_HI__MEM_BASE__SHIFT 0x0
#define SPI_P0_TRAP_SCREEN_PSMA_LO__MEM_BASE_MASK 0xffffffff
#define SPI_P0_TRAP_SCREEN_PSMA_LO__MEM_BASE__SHIFT 0x0
#define SPI_P0_TRAP_SCREEN_PSMA_HI__MEM_BASE_MASK 0xff
#define SPI_P0_TRAP_SCREEN_PSMA_HI__MEM_BASE__SHIFT 0x0
#define SPI_P0_TRAP_SCREEN_GPR_MIN__VGPR_MIN_MASK 0x3f
#define SPI_P0_TRAP_SCREEN_GPR_MIN__VGPR_MIN__SHIFT 0x0
#define SPI_P0_TRAP_SCREEN_GPR_MIN__SGPR_MIN_MASK 0x3c0
#define SPI_P0_TRAP_SCREEN_GPR_MIN__SGPR_MIN__SHIFT 0x6
#define SPI_P1_TRAP_SCREEN_PSBA_LO__MEM_BASE_MASK 0xffffffff
#define SPI_P1_TRAP_SCREEN_PSBA_LO__MEM_BASE__SHIFT 0x0
#define SPI_P1_TRAP_SCREEN_PSBA_HI__MEM_BASE_MASK 0xff
#define SPI_P1_TRAP_SCREEN_PSBA_HI__MEM_BASE__SHIFT 0x0
#define SPI_P1_TRAP_SCREEN_PSMA_LO__MEM_BASE_MASK 0xffffffff
#define SPI_P1_TRAP_SCREEN_PSMA_LO__MEM_BASE__SHIFT 0x0
#define SPI_P1_TRAP_SCREEN_PSMA_HI__MEM_BASE_MASK 0xff
#define SPI_P1_TRAP_SCREEN_PSMA_HI__MEM_BASE__SHIFT 0x0
#define SPI_P1_TRAP_SCREEN_GPR_MIN__VGPR_MIN_MASK 0x3f
#define SPI_P1_TRAP_SCREEN_GPR_MIN__VGPR_MIN__SHIFT 0x0
#define SPI_P1_TRAP_SCREEN_GPR_MIN__SGPR_MIN_MASK 0x3c0
#define SPI_P1_TRAP_SCREEN_GPR_MIN__SGPR_MIN__SHIFT 0x6
#define SPI_SHADER_TBA_LO_PS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TBA_LO_PS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TBA_HI_PS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TBA_HI_PS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_LO_PS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TMA_LO_PS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_HI_PS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TMA_HI_PS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_LO_PS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_PGM_LO_PS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_HI_PS__MEM_BASE_MASK 0xff
#define SPI_SHADER_PGM_HI_PS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_PS__VGPRS_MASK 0x3f
#define SPI_SHADER_PGM_RSRC1_PS__VGPRS__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_PS__SGPRS_MASK 0x3c0
#define SPI_SHADER_PGM_RSRC1_PS__SGPRS__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC1_PS__PRIORITY_MASK 0xc00
#define SPI_SHADER_PGM_RSRC1_PS__PRIORITY__SHIFT 0xa
#define SPI_SHADER_PGM_RSRC1_PS__FLOAT_MODE_MASK 0xff000
#define SPI_SHADER_PGM_RSRC1_PS__FLOAT_MODE__SHIFT 0xc
#define SPI_SHADER_PGM_RSRC1_PS__PRIV_MASK 0x100000
#define SPI_SHADER_PGM_RSRC1_PS__PRIV__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC1_PS__DX10_CLAMP_MASK 0x200000
#define SPI_SHADER_PGM_RSRC1_PS__DX10_CLAMP__SHIFT 0x15
#define SPI_SHADER_PGM_RSRC1_PS__DEBUG_MODE_MASK 0x400000
#define SPI_SHADER_PGM_RSRC1_PS__DEBUG_MODE__SHIFT 0x16
#define SPI_SHADER_PGM_RSRC1_PS__IEEE_MODE_MASK 0x800000
#define SPI_SHADER_PGM_RSRC1_PS__IEEE_MODE__SHIFT 0x17
#define SPI_SHADER_PGM_RSRC1_PS__CU_GROUP_DISABLE_MASK 0x1000000
#define SPI_SHADER_PGM_RSRC1_PS__CU_GROUP_DISABLE__SHIFT 0x18
#define SPI_SHADER_PGM_RSRC1_PS__CACHE_CTL_MASK 0xe000000
#define SPI_SHADER_PGM_RSRC1_PS__CACHE_CTL__SHIFT 0x19
#define SPI_SHADER_PGM_RSRC1_PS__CDBG_USER_MASK 0x10000000
#define SPI_SHADER_PGM_RSRC1_PS__CDBG_USER__SHIFT 0x1c
#define SPI_SHADER_PGM_RSRC2_PS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_PS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_PS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_PS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_PS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_PS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_PS__WAVE_CNT_EN_MASK 0x80
#define SPI_SHADER_PGM_RSRC2_PS__WAVE_CNT_EN__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_PS__EXTRA_LDS_SIZE_MASK 0xff00
#define SPI_SHADER_PGM_RSRC2_PS__EXTRA_LDS_SIZE__SHIFT 0x8
#define SPI_SHADER_PGM_RSRC2_PS__EXCP_EN_MASK 0x1ff0000
#define SPI_SHADER_PGM_RSRC2_PS__EXCP_EN__SHIFT 0x10
#define SPI_SHADER_PGM_RSRC3_PS__CU_EN_MASK 0xffff
#define SPI_SHADER_PGM_RSRC3_PS__CU_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC3_PS__WAVE_LIMIT_MASK 0x3f0000
#define SPI_SHADER_PGM_RSRC3_PS__WAVE_LIMIT__SHIFT 0x10
#define SPI_SHADER_PGM_RSRC3_PS__LOCK_LOW_THRESHOLD_MASK 0x3c00000
#define SPI_SHADER_PGM_RSRC3_PS__LOCK_LOW_THRESHOLD__SHIFT 0x16
#define SPI_SHADER_USER_DATA_PS_0__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_0__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_1__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_1__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_2__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_2__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_3__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_3__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_4__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_4__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_5__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_5__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_6__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_6__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_7__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_7__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_8__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_8__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_9__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_9__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_10__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_10__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_11__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_11__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_12__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_12__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_13__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_13__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_14__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_14__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_PS_15__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_PS_15__DATA__SHIFT 0x0
#define SPI_SHADER_TBA_LO_VS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TBA_LO_VS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TBA_HI_VS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TBA_HI_VS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_LO_VS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TMA_LO_VS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_HI_VS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TMA_HI_VS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_LO_VS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_PGM_LO_VS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_HI_VS__MEM_BASE_MASK 0xff
#define SPI_SHADER_PGM_HI_VS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_VS__VGPRS_MASK 0x3f
#define SPI_SHADER_PGM_RSRC1_VS__VGPRS__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_VS__SGPRS_MASK 0x3c0
#define SPI_SHADER_PGM_RSRC1_VS__SGPRS__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC1_VS__PRIORITY_MASK 0xc00
#define SPI_SHADER_PGM_RSRC1_VS__PRIORITY__SHIFT 0xa
#define SPI_SHADER_PGM_RSRC1_VS__FLOAT_MODE_MASK 0xff000
#define SPI_SHADER_PGM_RSRC1_VS__FLOAT_MODE__SHIFT 0xc
#define SPI_SHADER_PGM_RSRC1_VS__PRIV_MASK 0x100000
#define SPI_SHADER_PGM_RSRC1_VS__PRIV__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC1_VS__DX10_CLAMP_MASK 0x200000
#define SPI_SHADER_PGM_RSRC1_VS__DX10_CLAMP__SHIFT 0x15
#define SPI_SHADER_PGM_RSRC1_VS__DEBUG_MODE_MASK 0x400000
#define SPI_SHADER_PGM_RSRC1_VS__DEBUG_MODE__SHIFT 0x16
#define SPI_SHADER_PGM_RSRC1_VS__IEEE_MODE_MASK 0x800000
#define SPI_SHADER_PGM_RSRC1_VS__IEEE_MODE__SHIFT 0x17
#define SPI_SHADER_PGM_RSRC1_VS__VGPR_COMP_CNT_MASK 0x3000000
#define SPI_SHADER_PGM_RSRC1_VS__VGPR_COMP_CNT__SHIFT 0x18
#define SPI_SHADER_PGM_RSRC1_VS__CU_GROUP_ENABLE_MASK 0x4000000
#define SPI_SHADER_PGM_RSRC1_VS__CU_GROUP_ENABLE__SHIFT 0x1a
#define SPI_SHADER_PGM_RSRC1_VS__CACHE_CTL_MASK 0x38000000
#define SPI_SHADER_PGM_RSRC1_VS__CACHE_CTL__SHIFT 0x1b
#define SPI_SHADER_PGM_RSRC1_VS__CDBG_USER_MASK 0x40000000
#define SPI_SHADER_PGM_RSRC1_VS__CDBG_USER__SHIFT 0x1e
#define SPI_SHADER_PGM_RSRC2_VS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_VS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_VS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_VS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_VS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_VS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_VS__OC_LDS_EN_MASK 0x80
#define SPI_SHADER_PGM_RSRC2_VS__OC_LDS_EN__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE0_EN_MASK 0x100
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE0_EN__SHIFT 0x8
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE1_EN_MASK 0x200
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE1_EN__SHIFT 0x9
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE2_EN_MASK 0x400
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE2_EN__SHIFT 0xa
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE3_EN_MASK 0x800
#define SPI_SHADER_PGM_RSRC2_VS__SO_BASE3_EN__SHIFT 0xb
#define SPI_SHADER_PGM_RSRC2_VS__SO_EN_MASK 0x1000
#define SPI_SHADER_PGM_RSRC2_VS__SO_EN__SHIFT 0xc
#define SPI_SHADER_PGM_RSRC2_VS__EXCP_EN_MASK 0x3fe000
#define SPI_SHADER_PGM_RSRC2_VS__EXCP_EN__SHIFT 0xd
#define SPI_SHADER_PGM_RSRC3_VS__CU_EN_MASK 0xffff
#define SPI_SHADER_PGM_RSRC3_VS__CU_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC3_VS__WAVE_LIMIT_MASK 0x3f0000
#define SPI_SHADER_PGM_RSRC3_VS__WAVE_LIMIT__SHIFT 0x10
#define SPI_SHADER_PGM_RSRC3_VS__LOCK_LOW_THRESHOLD_MASK 0x3c00000
#define SPI_SHADER_PGM_RSRC3_VS__LOCK_LOW_THRESHOLD__SHIFT 0x16
#define SPI_SHADER_LATE_ALLOC_VS__LIMIT_MASK 0x3f
#define SPI_SHADER_LATE_ALLOC_VS__LIMIT__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_0__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_0__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_1__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_1__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_2__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_2__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_3__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_3__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_4__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_4__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_5__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_5__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_6__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_6__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_7__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_7__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_8__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_8__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_9__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_9__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_10__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_10__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_11__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_11__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_12__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_12__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_13__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_13__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_14__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_14__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_VS_15__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_VS_15__DATA__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_ES_VS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_ES_VS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_ES_VS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_ES_VS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_ES_VS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_ES_VS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_ES_VS__OC_LDS_EN_MASK 0x80
#define SPI_SHADER_PGM_RSRC2_ES_VS__OC_LDS_EN__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_ES_VS__EXCP_EN_MASK 0x1ff00
#define SPI_SHADER_PGM_RSRC2_ES_VS__EXCP_EN__SHIFT 0x8
#define SPI_SHADER_PGM_RSRC2_ES_VS__LDS_SIZE_MASK 0x1ff00000
#define SPI_SHADER_PGM_RSRC2_ES_VS__LDS_SIZE__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC2_LS_VS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_LS_VS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_LS_VS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_LS_VS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_LS_VS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_LS_VS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_LS_VS__LDS_SIZE_MASK 0xff80
#define SPI_SHADER_PGM_RSRC2_LS_VS__LDS_SIZE__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_LS_VS__EXCP_EN_MASK 0x1ff0000
#define SPI_SHADER_PGM_RSRC2_LS_VS__EXCP_EN__SHIFT 0x10
#define SPI_SHADER_TBA_LO_GS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TBA_LO_GS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TBA_HI_GS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TBA_HI_GS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_LO_GS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TMA_LO_GS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_HI_GS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TMA_HI_GS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_LO_GS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_PGM_LO_GS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_HI_GS__MEM_BASE_MASK 0xff
#define SPI_SHADER_PGM_HI_GS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_GS__VGPRS_MASK 0x3f
#define SPI_SHADER_PGM_RSRC1_GS__VGPRS__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_GS__SGPRS_MASK 0x3c0
#define SPI_SHADER_PGM_RSRC1_GS__SGPRS__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC1_GS__PRIORITY_MASK 0xc00
#define SPI_SHADER_PGM_RSRC1_GS__PRIORITY__SHIFT 0xa
#define SPI_SHADER_PGM_RSRC1_GS__FLOAT_MODE_MASK 0xff000
#define SPI_SHADER_PGM_RSRC1_GS__FLOAT_MODE__SHIFT 0xc
#define SPI_SHADER_PGM_RSRC1_GS__PRIV_MASK 0x100000
#define SPI_SHADER_PGM_RSRC1_GS__PRIV__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC1_GS__DX10_CLAMP_MASK 0x200000
#define SPI_SHADER_PGM_RSRC1_GS__DX10_CLAMP__SHIFT 0x15
#define SPI_SHADER_PGM_RSRC1_GS__DEBUG_MODE_MASK 0x400000
#define SPI_SHADER_PGM_RSRC1_GS__DEBUG_MODE__SHIFT 0x16
#define SPI_SHADER_PGM_RSRC1_GS__IEEE_MODE_MASK 0x800000
#define SPI_SHADER_PGM_RSRC1_GS__IEEE_MODE__SHIFT 0x17
#define SPI_SHADER_PGM_RSRC1_GS__CU_GROUP_ENABLE_MASK 0x1000000
#define SPI_SHADER_PGM_RSRC1_GS__CU_GROUP_ENABLE__SHIFT 0x18
#define SPI_SHADER_PGM_RSRC1_GS__CACHE_CTL_MASK 0xe000000
#define SPI_SHADER_PGM_RSRC1_GS__CACHE_CTL__SHIFT 0x19
#define SPI_SHADER_PGM_RSRC1_GS__CDBG_USER_MASK 0x10000000
#define SPI_SHADER_PGM_RSRC1_GS__CDBG_USER__SHIFT 0x1c
#define SPI_SHADER_PGM_RSRC2_GS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_GS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_GS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_GS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_GS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_GS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_GS__EXCP_EN_MASK 0xff80
#define SPI_SHADER_PGM_RSRC2_GS__EXCP_EN__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC3_GS__CU_EN_MASK 0xffff
#define SPI_SHADER_PGM_RSRC3_GS__CU_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC3_GS__WAVE_LIMIT_MASK 0x3f0000
#define SPI_SHADER_PGM_RSRC3_GS__WAVE_LIMIT__SHIFT 0x10
#define SPI_SHADER_PGM_RSRC3_GS__LOCK_LOW_THRESHOLD_MASK 0x3c00000
#define SPI_SHADER_PGM_RSRC3_GS__LOCK_LOW_THRESHOLD__SHIFT 0x16
#define SPI_SHADER_USER_DATA_GS_0__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_0__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_1__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_1__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_2__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_2__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_3__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_3__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_4__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_4__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_5__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_5__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_6__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_6__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_7__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_7__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_8__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_8__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_9__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_9__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_10__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_10__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_11__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_11__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_12__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_12__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_13__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_13__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_14__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_14__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_GS_15__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_GS_15__DATA__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_ES_GS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_ES_GS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_ES_GS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_ES_GS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_ES_GS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_ES_GS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_ES_GS__OC_LDS_EN_MASK 0x80
#define SPI_SHADER_PGM_RSRC2_ES_GS__OC_LDS_EN__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_ES_GS__EXCP_EN_MASK 0x1ff00
#define SPI_SHADER_PGM_RSRC2_ES_GS__EXCP_EN__SHIFT 0x8
#define SPI_SHADER_PGM_RSRC2_ES_GS__LDS_SIZE_MASK 0x1ff00000
#define SPI_SHADER_PGM_RSRC2_ES_GS__LDS_SIZE__SHIFT 0x14
#define SPI_SHADER_TBA_LO_ES__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TBA_LO_ES__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TBA_HI_ES__MEM_BASE_MASK 0xff
#define SPI_SHADER_TBA_HI_ES__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_LO_ES__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TMA_LO_ES__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_HI_ES__MEM_BASE_MASK 0xff
#define SPI_SHADER_TMA_HI_ES__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_LO_ES__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_PGM_LO_ES__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_HI_ES__MEM_BASE_MASK 0xff
#define SPI_SHADER_PGM_HI_ES__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_ES__VGPRS_MASK 0x3f
#define SPI_SHADER_PGM_RSRC1_ES__VGPRS__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_ES__SGPRS_MASK 0x3c0
#define SPI_SHADER_PGM_RSRC1_ES__SGPRS__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC1_ES__PRIORITY_MASK 0xc00
#define SPI_SHADER_PGM_RSRC1_ES__PRIORITY__SHIFT 0xa
#define SPI_SHADER_PGM_RSRC1_ES__FLOAT_MODE_MASK 0xff000
#define SPI_SHADER_PGM_RSRC1_ES__FLOAT_MODE__SHIFT 0xc
#define SPI_SHADER_PGM_RSRC1_ES__PRIV_MASK 0x100000
#define SPI_SHADER_PGM_RSRC1_ES__PRIV__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC1_ES__DX10_CLAMP_MASK 0x200000
#define SPI_SHADER_PGM_RSRC1_ES__DX10_CLAMP__SHIFT 0x15
#define SPI_SHADER_PGM_RSRC1_ES__DEBUG_MODE_MASK 0x400000
#define SPI_SHADER_PGM_RSRC1_ES__DEBUG_MODE__SHIFT 0x16
#define SPI_SHADER_PGM_RSRC1_ES__IEEE_MODE_MASK 0x800000
#define SPI_SHADER_PGM_RSRC1_ES__IEEE_MODE__SHIFT 0x17
#define SPI_SHADER_PGM_RSRC1_ES__VGPR_COMP_CNT_MASK 0x3000000
#define SPI_SHADER_PGM_RSRC1_ES__VGPR_COMP_CNT__SHIFT 0x18
#define SPI_SHADER_PGM_RSRC1_ES__CU_GROUP_ENABLE_MASK 0x4000000
#define SPI_SHADER_PGM_RSRC1_ES__CU_GROUP_ENABLE__SHIFT 0x1a
#define SPI_SHADER_PGM_RSRC1_ES__CACHE_CTL_MASK 0x38000000
#define SPI_SHADER_PGM_RSRC1_ES__CACHE_CTL__SHIFT 0x1b
#define SPI_SHADER_PGM_RSRC1_ES__CDBG_USER_MASK 0x40000000
#define SPI_SHADER_PGM_RSRC1_ES__CDBG_USER__SHIFT 0x1e
#define SPI_SHADER_PGM_RSRC2_ES__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_ES__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_ES__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_ES__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_ES__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_ES__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_ES__OC_LDS_EN_MASK 0x80
#define SPI_SHADER_PGM_RSRC2_ES__OC_LDS_EN__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_ES__EXCP_EN_MASK 0x1ff00
#define SPI_SHADER_PGM_RSRC2_ES__EXCP_EN__SHIFT 0x8
#define SPI_SHADER_PGM_RSRC2_ES__LDS_SIZE_MASK 0x1ff00000
#define SPI_SHADER_PGM_RSRC2_ES__LDS_SIZE__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC3_ES__CU_EN_MASK 0xffff
#define SPI_SHADER_PGM_RSRC3_ES__CU_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC3_ES__WAVE_LIMIT_MASK 0x3f0000
#define SPI_SHADER_PGM_RSRC3_ES__WAVE_LIMIT__SHIFT 0x10
#define SPI_SHADER_PGM_RSRC3_ES__LOCK_LOW_THRESHOLD_MASK 0x3c00000
#define SPI_SHADER_PGM_RSRC3_ES__LOCK_LOW_THRESHOLD__SHIFT 0x16
#define SPI_SHADER_USER_DATA_ES_0__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_0__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_1__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_1__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_2__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_2__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_3__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_3__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_4__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_4__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_5__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_5__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_6__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_6__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_7__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_7__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_8__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_8__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_9__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_9__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_10__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_10__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_11__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_11__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_12__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_12__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_13__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_13__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_14__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_14__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_ES_15__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_ES_15__DATA__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_LS_ES__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_LS_ES__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_LS_ES__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_LS_ES__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_LS_ES__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_LS_ES__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_LS_ES__LDS_SIZE_MASK 0xff80
#define SPI_SHADER_PGM_RSRC2_LS_ES__LDS_SIZE__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_LS_ES__EXCP_EN_MASK 0x1ff0000
#define SPI_SHADER_PGM_RSRC2_LS_ES__EXCP_EN__SHIFT 0x10
#define SPI_SHADER_TBA_LO_HS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TBA_LO_HS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TBA_HI_HS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TBA_HI_HS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_LO_HS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TMA_LO_HS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_HI_HS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TMA_HI_HS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_LO_HS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_PGM_LO_HS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_HI_HS__MEM_BASE_MASK 0xff
#define SPI_SHADER_PGM_HI_HS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_HS__VGPRS_MASK 0x3f
#define SPI_SHADER_PGM_RSRC1_HS__VGPRS__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_HS__SGPRS_MASK 0x3c0
#define SPI_SHADER_PGM_RSRC1_HS__SGPRS__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC1_HS__PRIORITY_MASK 0xc00
#define SPI_SHADER_PGM_RSRC1_HS__PRIORITY__SHIFT 0xa
#define SPI_SHADER_PGM_RSRC1_HS__FLOAT_MODE_MASK 0xff000
#define SPI_SHADER_PGM_RSRC1_HS__FLOAT_MODE__SHIFT 0xc
#define SPI_SHADER_PGM_RSRC1_HS__PRIV_MASK 0x100000
#define SPI_SHADER_PGM_RSRC1_HS__PRIV__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC1_HS__DX10_CLAMP_MASK 0x200000
#define SPI_SHADER_PGM_RSRC1_HS__DX10_CLAMP__SHIFT 0x15
#define SPI_SHADER_PGM_RSRC1_HS__DEBUG_MODE_MASK 0x400000
#define SPI_SHADER_PGM_RSRC1_HS__DEBUG_MODE__SHIFT 0x16
#define SPI_SHADER_PGM_RSRC1_HS__IEEE_MODE_MASK 0x800000
#define SPI_SHADER_PGM_RSRC1_HS__IEEE_MODE__SHIFT 0x17
#define SPI_SHADER_PGM_RSRC1_HS__CACHE_CTL_MASK 0x7000000
#define SPI_SHADER_PGM_RSRC1_HS__CACHE_CTL__SHIFT 0x18
#define SPI_SHADER_PGM_RSRC1_HS__CDBG_USER_MASK 0x8000000
#define SPI_SHADER_PGM_RSRC1_HS__CDBG_USER__SHIFT 0x1b
#define SPI_SHADER_PGM_RSRC2_HS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_HS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_HS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_HS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_HS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_HS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_HS__OC_LDS_EN_MASK 0x80
#define SPI_SHADER_PGM_RSRC2_HS__OC_LDS_EN__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_HS__TG_SIZE_EN_MASK 0x100
#define SPI_SHADER_PGM_RSRC2_HS__TG_SIZE_EN__SHIFT 0x8
#define SPI_SHADER_PGM_RSRC2_HS__EXCP_EN_MASK 0x3fe00
#define SPI_SHADER_PGM_RSRC2_HS__EXCP_EN__SHIFT 0x9
#define SPI_SHADER_PGM_RSRC3_HS__WAVE_LIMIT_MASK 0x3f
#define SPI_SHADER_PGM_RSRC3_HS__WAVE_LIMIT__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC3_HS__LOCK_LOW_THRESHOLD_MASK 0x3c0
#define SPI_SHADER_PGM_RSRC3_HS__LOCK_LOW_THRESHOLD__SHIFT 0x6
#define SPI_SHADER_USER_DATA_HS_0__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_0__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_1__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_1__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_2__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_2__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_3__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_3__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_4__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_4__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_5__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_5__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_6__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_6__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_7__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_7__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_8__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_8__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_9__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_9__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_10__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_10__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_11__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_11__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_12__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_12__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_13__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_13__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_14__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_14__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_HS_15__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_HS_15__DATA__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_LS_HS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_LS_HS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_LS_HS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_LS_HS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_LS_HS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_LS_HS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_LS_HS__LDS_SIZE_MASK 0xff80
#define SPI_SHADER_PGM_RSRC2_LS_HS__LDS_SIZE__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_LS_HS__EXCP_EN_MASK 0x1ff0000
#define SPI_SHADER_PGM_RSRC2_LS_HS__EXCP_EN__SHIFT 0x10
#define SPI_SHADER_TBA_LO_LS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TBA_LO_LS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TBA_HI_LS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TBA_HI_LS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_LO_LS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_TMA_LO_LS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_TMA_HI_LS__MEM_BASE_MASK 0xff
#define SPI_SHADER_TMA_HI_LS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_LO_LS__MEM_BASE_MASK 0xffffffff
#define SPI_SHADER_PGM_LO_LS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_HI_LS__MEM_BASE_MASK 0xff
#define SPI_SHADER_PGM_HI_LS__MEM_BASE__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_LS__VGPRS_MASK 0x3f
#define SPI_SHADER_PGM_RSRC1_LS__VGPRS__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC1_LS__SGPRS_MASK 0x3c0
#define SPI_SHADER_PGM_RSRC1_LS__SGPRS__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC1_LS__PRIORITY_MASK 0xc00
#define SPI_SHADER_PGM_RSRC1_LS__PRIORITY__SHIFT 0xa
#define SPI_SHADER_PGM_RSRC1_LS__FLOAT_MODE_MASK 0xff000
#define SPI_SHADER_PGM_RSRC1_LS__FLOAT_MODE__SHIFT 0xc
#define SPI_SHADER_PGM_RSRC1_LS__PRIV_MASK 0x100000
#define SPI_SHADER_PGM_RSRC1_LS__PRIV__SHIFT 0x14
#define SPI_SHADER_PGM_RSRC1_LS__DX10_CLAMP_MASK 0x200000
#define SPI_SHADER_PGM_RSRC1_LS__DX10_CLAMP__SHIFT 0x15
#define SPI_SHADER_PGM_RSRC1_LS__DEBUG_MODE_MASK 0x400000
#define SPI_SHADER_PGM_RSRC1_LS__DEBUG_MODE__SHIFT 0x16
#define SPI_SHADER_PGM_RSRC1_LS__IEEE_MODE_MASK 0x800000
#define SPI_SHADER_PGM_RSRC1_LS__IEEE_MODE__SHIFT 0x17
#define SPI_SHADER_PGM_RSRC1_LS__VGPR_COMP_CNT_MASK 0x3000000
#define SPI_SHADER_PGM_RSRC1_LS__VGPR_COMP_CNT__SHIFT 0x18
#define SPI_SHADER_PGM_RSRC1_LS__CACHE_CTL_MASK 0x1c000000
#define SPI_SHADER_PGM_RSRC1_LS__CACHE_CTL__SHIFT 0x1a
#define SPI_SHADER_PGM_RSRC1_LS__CDBG_USER_MASK 0x20000000
#define SPI_SHADER_PGM_RSRC1_LS__CDBG_USER__SHIFT 0x1d
#define SPI_SHADER_PGM_RSRC2_LS__SCRATCH_EN_MASK 0x1
#define SPI_SHADER_PGM_RSRC2_LS__SCRATCH_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC2_LS__USER_SGPR_MASK 0x3e
#define SPI_SHADER_PGM_RSRC2_LS__USER_SGPR__SHIFT 0x1
#define SPI_SHADER_PGM_RSRC2_LS__TRAP_PRESENT_MASK 0x40
#define SPI_SHADER_PGM_RSRC2_LS__TRAP_PRESENT__SHIFT 0x6
#define SPI_SHADER_PGM_RSRC2_LS__LDS_SIZE_MASK 0xff80
#define SPI_SHADER_PGM_RSRC2_LS__LDS_SIZE__SHIFT 0x7
#define SPI_SHADER_PGM_RSRC2_LS__EXCP_EN_MASK 0x1ff0000
#define SPI_SHADER_PGM_RSRC2_LS__EXCP_EN__SHIFT 0x10
#define SPI_SHADER_PGM_RSRC3_LS__CU_EN_MASK 0xffff
#define SPI_SHADER_PGM_RSRC3_LS__CU_EN__SHIFT 0x0
#define SPI_SHADER_PGM_RSRC3_LS__WAVE_LIMIT_MASK 0x3f0000
#define SPI_SHADER_PGM_RSRC3_LS__WAVE_LIMIT__SHIFT 0x10
#define SPI_SHADER_PGM_RSRC3_LS__LOCK_LOW_THRESHOLD_MASK 0x3c00000
#define SPI_SHADER_PGM_RSRC3_LS__LOCK_LOW_THRESHOLD__SHIFT 0x16
#define SPI_SHADER_USER_DATA_LS_0__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_0__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_1__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_1__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_2__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_2__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_3__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_3__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_4__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_4__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_5__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_5__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_6__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_6__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_7__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_7__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_8__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_8__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_9__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_9__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_10__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_10__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_11__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_11__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_12__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_12__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_13__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_13__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_14__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_14__DATA__SHIFT 0x0
#define SPI_SHADER_USER_DATA_LS_15__DATA_MASK 0xffffffff
#define SPI_SHADER_USER_DATA_LS_15__DATA__SHIFT 0x0
#define SQ_CONFIG__UNUSED_MASK 0xff
#define SQ_CONFIG__UNUSED__SHIFT 0x0
#define SQ_CONFIG__DEBUG_EN_MASK 0x100
#define SQ_CONFIG__DEBUG_EN__SHIFT 0x8
#define SQ_CONFIG__DISABLE_SCA_BYPASS_MASK 0x200
#define SQ_CONFIG__DISABLE_SCA_BYPASS__SHIFT 0x9
#define SQ_CONFIG__DISABLE_IB_DEP_CHECK_MASK 0x400
#define SQ_CONFIG__DISABLE_IB_DEP_CHECK__SHIFT 0xa
#define SQ_CONFIG__ENABLE_SOFT_CLAUSE_MASK 0x800
#define SQ_CONFIG__ENABLE_SOFT_CLAUSE__SHIFT 0xb
#define SQ_CONFIG__EARLY_TA_DONE_DISABLE_MASK 0x1000
#define SQ_CONFIG__EARLY_TA_DONE_DISABLE__SHIFT 0xc
#define SQ_CONFIG__DUA_FLAT_LOCK_ENABLE_MASK 0x2000
#define SQ_CONFIG__DUA_FLAT_LOCK_ENABLE__SHIFT 0xd
#define SQ_CONFIG__DUA_LDS_BYPASS_DISABLE_MASK 0x4000
#define SQ_CONFIG__DUA_LDS_BYPASS_DISABLE__SHIFT 0xe
#define SQ_CONFIG__DUA_FLAT_LDS_PINGPONG_DISABLE_MASK 0x8000
#define SQ_CONFIG__DUA_FLAT_LDS_PINGPONG_DISABLE__SHIFT 0xf
#define SQC_CONFIG__INST_CACHE_SIZE_MASK 0x3
#define SQC_CONFIG__INST_CACHE_SIZE__SHIFT 0x0
#define SQC_CONFIG__DATA_CACHE_SIZE_MASK 0xc
#define SQC_CONFIG__DATA_CACHE_SIZE__SHIFT 0x2
#define SQC_CONFIG__MISS_FIFO_DEPTH_MASK 0x30
#define SQC_CONFIG__MISS_FIFO_DEPTH__SHIFT 0x4
#define SQC_CONFIG__HIT_FIFO_DEPTH_MASK 0x40
#define SQC_CONFIG__HIT_FIFO_DEPTH__SHIFT 0x6
#define SQC_CONFIG__FORCE_ALWAYS_MISS_MASK 0x80
#define SQC_CONFIG__FORCE_ALWAYS_MISS__SHIFT 0x7
#define SQC_CONFIG__FORCE_IN_ORDER_MASK 0x100
#define SQC_CONFIG__FORCE_IN_ORDER__SHIFT 0x8
#define SQC_CONFIG__IDENTITY_HASH_BANK_MASK 0x200
#define SQC_CONFIG__IDENTITY_HASH_BANK__SHIFT 0x9
#define SQC_CONFIG__IDENTITY_HASH_SET_MASK 0x400
#define SQC_CONFIG__IDENTITY_HASH_SET__SHIFT 0xa
#define SQC_CONFIG__PER_VMID_INV_DISABLE_MASK 0x800
#define SQC_CONFIG__PER_VMID_INV_DISABLE__SHIFT 0xb
#define SQC_CACHES__INST_INVALIDATE_MASK 0x1
#define SQC_CACHES__INST_INVALIDATE__SHIFT 0x0
#define SQC_CACHES__DATA_INVALIDATE_MASK 0x2
#define SQC_CACHES__DATA_INVALIDATE__SHIFT 0x1
#define SQC_CACHES__INVALIDATE_VOLATILE_MASK 0x4
#define SQC_CACHES__INVALIDATE_VOLATILE__SHIFT 0x2
#define SQ_RANDOM_WAVE_PRI__RET_MASK 0x7f
#define SQ_RANDOM_WAVE_PRI__RET__SHIFT 0x0
#define SQ_RANDOM_WAVE_PRI__RUI_MASK 0x380
#define SQ_RANDOM_WAVE_PRI__RUI__SHIFT 0x7
#define SQ_RANDOM_WAVE_PRI__RNG_MASK 0x1ffc00
#define SQ_RANDOM_WAVE_PRI__RNG__SHIFT 0xa
#define SQ_REG_CREDITS__SRBM_CREDITS_MASK 0x3f
#define SQ_REG_CREDITS__SRBM_CREDITS__SHIFT 0x0
#define SQ_REG_CREDITS__CMD_CREDITS_MASK 0xf00
#define SQ_REG_CREDITS__CMD_CREDITS__SHIFT 0x8
#define SQ_REG_CREDITS__REG_BUSY_MASK 0x10000000
#define SQ_REG_CREDITS__REG_BUSY__SHIFT 0x1c
#define SQ_REG_CREDITS__SRBM_OVERFLOW_MASK 0x20000000
#define SQ_REG_CREDITS__SRBM_OVERFLOW__SHIFT 0x1d
#define SQ_REG_CREDITS__IMMED_OVERFLOW_MASK 0x40000000
#define SQ_REG_CREDITS__IMMED_OVERFLOW__SHIFT 0x1e
#define SQ_REG_CREDITS__CMD_OVERFLOW_MASK 0x80000000
#define SQ_REG_CREDITS__CMD_OVERFLOW__SHIFT 0x1f
#define SQ_FIFO_SIZES__INTERRUPT_FIFO_SIZE_MASK 0xf
#define SQ_FIFO_SIZES__INTERRUPT_FIFO_SIZE__SHIFT 0x0
#define SQ_FIFO_SIZES__TTRACE_FIFO_SIZE_MASK 0xf00
#define SQ_FIFO_SIZES__TTRACE_FIFO_SIZE__SHIFT 0x8
#define SQ_FIFO_SIZES__EXPORT_BUF_SIZE_MASK 0x30000
#define SQ_FIFO_SIZES__EXPORT_BUF_SIZE__SHIFT 0x10
#define SQ_FIFO_SIZES__VMEM_DATA_FIFO_SIZE_MASK 0xc0000
#define SQ_FIFO_SIZES__VMEM_DATA_FIFO_SIZE__SHIFT 0x12
#define SQ_INTERRUPT_AUTO_MASK__MASK_MASK 0xffffff
#define SQ_INTERRUPT_AUTO_MASK__MASK__SHIFT 0x0
#define SQ_INTERRUPT_MSG_CTRL__STALL_MASK 0x1
#define SQ_INTERRUPT_MSG_CTRL__STALL__SHIFT 0x0
#define SQ_PERFCOUNTER_CTRL__PS_EN_MASK 0x1
#define SQ_PERFCOUNTER_CTRL__PS_EN__SHIFT 0x0
#define SQ_PERFCOUNTER_CTRL__VS_EN_MASK 0x2
#define SQ_PERFCOUNTER_CTRL__VS_EN__SHIFT 0x1
#define SQ_PERFCOUNTER_CTRL__GS_EN_MASK 0x4
#define SQ_PERFCOUNTER_CTRL__GS_EN__SHIFT 0x2
#define SQ_PERFCOUNTER_CTRL__ES_EN_MASK 0x8
#define SQ_PERFCOUNTER_CTRL__ES_EN__SHIFT 0x3
#define SQ_PERFCOUNTER_CTRL__HS_EN_MASK 0x10
#define SQ_PERFCOUNTER_CTRL__HS_EN__SHIFT 0x4
#define SQ_PERFCOUNTER_CTRL__LS_EN_MASK 0x20
#define SQ_PERFCOUNTER_CTRL__LS_EN__SHIFT 0x5
#define SQ_PERFCOUNTER_CTRL__CS_EN_MASK 0x40
#define SQ_PERFCOUNTER_CTRL__CS_EN__SHIFT 0x6
#define SQ_PERFCOUNTER_CTRL__CNTR_RATE_MASK 0x1f00
#define SQ_PERFCOUNTER_CTRL__CNTR_RATE__SHIFT 0x8
#define SQ_PERFCOUNTER_CTRL__DISABLE_FLUSH_MASK 0x2000
#define SQ_PERFCOUNTER_CTRL__DISABLE_FLUSH__SHIFT 0xd
#define SQ_PERFCOUNTER_MASK__SH0_MASK_MASK 0xffff
#define SQ_PERFCOUNTER_MASK__SH0_MASK__SHIFT 0x0
#define SQ_PERFCOUNTER_MASK__SH1_MASK_MASK 0xffff0000
#define SQ_PERFCOUNTER_MASK__SH1_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER_CTRL2__FORCE_EN_MASK 0x1
#define SQ_PERFCOUNTER_CTRL2__FORCE_EN__SHIFT 0x0
#define CC_SQC_BANK_DISABLE__SQC0_BANK_DISABLE_MASK 0xf0000
#define CC_SQC_BANK_DISABLE__SQC0_BANK_DISABLE__SHIFT 0x10
#define CC_SQC_BANK_DISABLE__SQC1_BANK_DISABLE_MASK 0xf00000
#define CC_SQC_BANK_DISABLE__SQC1_BANK_DISABLE__SHIFT 0x14
#define CC_SQC_BANK_DISABLE__SQC2_BANK_DISABLE_MASK 0xf000000
#define CC_SQC_BANK_DISABLE__SQC2_BANK_DISABLE__SHIFT 0x18
#define CC_SQC_BANK_DISABLE__SQC3_BANK_DISABLE_MASK 0xf0000000
#define CC_SQC_BANK_DISABLE__SQC3_BANK_DISABLE__SHIFT 0x1c
#define USER_SQC_BANK_DISABLE__SQC0_BANK_DISABLE_MASK 0xf0000
#define USER_SQC_BANK_DISABLE__SQC0_BANK_DISABLE__SHIFT 0x10
#define USER_SQC_BANK_DISABLE__SQC1_BANK_DISABLE_MASK 0xf00000
#define USER_SQC_BANK_DISABLE__SQC1_BANK_DISABLE__SHIFT 0x14
#define USER_SQC_BANK_DISABLE__SQC2_BANK_DISABLE_MASK 0xf000000
#define USER_SQC_BANK_DISABLE__SQC2_BANK_DISABLE__SHIFT 0x18
#define USER_SQC_BANK_DISABLE__SQC3_BANK_DISABLE_MASK 0xf0000000
#define USER_SQC_BANK_DISABLE__SQC3_BANK_DISABLE__SHIFT 0x1c
#define SQ_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER4_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER4_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER5_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER5_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER6_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER6_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER7_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER7_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER8_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER8_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER9_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER9_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER10_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER10_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER11_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER11_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER12_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER12_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER13_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER13_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER14_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER14_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER15_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SQ_PERFCOUNTER15_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SQ_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER4_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER4_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER5_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER5_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER6_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER6_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER7_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER7_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER8_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER8_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER9_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER9_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER10_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER10_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER11_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER11_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER12_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER12_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER13_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER13_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER14_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER14_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER15_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SQ_PERFCOUNTER15_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SQ_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER0_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER0_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER0_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER0_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER0_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER0_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER0_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER0_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER1_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER1_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER1_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER1_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER1_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER1_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER1_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER1_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER2_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER2_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER2_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER2_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER2_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER2_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER2_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER2_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER3_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER3_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER3_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER3_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER3_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER3_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER3_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER3_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER4_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER4_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER4_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER4_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER4_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER4_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER4_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER4_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER4_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER4_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER4_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER4_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER5_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER5_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER5_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER5_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER5_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER5_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER5_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER5_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER5_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER5_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER5_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER5_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER6_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER6_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER6_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER6_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER6_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER6_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER6_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER6_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER6_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER6_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER6_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER6_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER7_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER7_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER7_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER7_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER7_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER7_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER7_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER7_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER7_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER7_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER7_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER7_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER8_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER8_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER8_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER8_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER8_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER8_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER8_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER8_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER8_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER8_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER8_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER8_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER9_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER9_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER9_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER9_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER9_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER9_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER9_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER9_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER9_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER9_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER9_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER9_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER10_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER10_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER10_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER10_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER10_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER10_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER10_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER10_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER10_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER10_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER10_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER10_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER11_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER11_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER11_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER11_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER11_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER11_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER11_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER11_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER11_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER11_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER11_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER11_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER12_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER12_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER12_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER12_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER12_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER12_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER12_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER12_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER12_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER12_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER12_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER12_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER13_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER13_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER13_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER13_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER13_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER13_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER13_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER13_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER13_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER13_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER13_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER13_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER14_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER14_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER14_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER14_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER14_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER14_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER14_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER14_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER14_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER14_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER14_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER14_SELECT__PERF_MODE__SHIFT 0x1c
#define SQ_PERFCOUNTER15_SELECT__PERF_SEL_MASK 0xff
#define SQ_PERFCOUNTER15_SELECT__PERF_SEL__SHIFT 0x0
#define SQ_PERFCOUNTER15_SELECT__SQC_BANK_MASK_MASK 0xf000
#define SQ_PERFCOUNTER15_SELECT__SQC_BANK_MASK__SHIFT 0xc
#define SQ_PERFCOUNTER15_SELECT__SQC_CLIENT_MASK_MASK 0xf0000
#define SQ_PERFCOUNTER15_SELECT__SQC_CLIENT_MASK__SHIFT 0x10
#define SQ_PERFCOUNTER15_SELECT__SPM_MODE_MASK 0xf00000
#define SQ_PERFCOUNTER15_SELECT__SPM_MODE__SHIFT 0x14
#define SQ_PERFCOUNTER15_SELECT__SIMD_MASK_MASK 0xf000000
#define SQ_PERFCOUNTER15_SELECT__SIMD_MASK__SHIFT 0x18
#define SQ_PERFCOUNTER15_SELECT__PERF_MODE_MASK 0xf0000000
#define SQ_PERFCOUNTER15_SELECT__PERF_MODE__SHIFT 0x1c
#define CGTT_SQ_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_SQ_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_SQ_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SQ_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SQ_CLK_CTRL__CORE_OVERRIDE_MASK 0x40000000
#define CGTT_SQ_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1e
#define CGTT_SQ_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_SQ_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define CGTT_SQG_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_SQG_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_SQG_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SQG_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SQG_CLK_CTRL__CORE_OVERRIDE_MASK 0x40000000
#define CGTT_SQG_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1e
#define CGTT_SQG_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_SQG_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define SQ_ALU_CLK_CTRL__FORCE_CU_ON_SH0_MASK 0xffff
#define SQ_ALU_CLK_CTRL__FORCE_CU_ON_SH0__SHIFT 0x0
#define SQ_ALU_CLK_CTRL__FORCE_CU_ON_SH1_MASK 0xffff0000
#define SQ_ALU_CLK_CTRL__FORCE_CU_ON_SH1__SHIFT 0x10
#define SQ_TEX_CLK_CTRL__FORCE_CU_ON_SH0_MASK 0xffff
#define SQ_TEX_CLK_CTRL__FORCE_CU_ON_SH0__SHIFT 0x0
#define SQ_TEX_CLK_CTRL__FORCE_CU_ON_SH1_MASK 0xffff0000
#define SQ_TEX_CLK_CTRL__FORCE_CU_ON_SH1__SHIFT 0x10
#define SQ_LDS_CLK_CTRL__FORCE_CU_ON_SH0_MASK 0xffff
#define SQ_LDS_CLK_CTRL__FORCE_CU_ON_SH0__SHIFT 0x0
#define SQ_LDS_CLK_CTRL__FORCE_CU_ON_SH1_MASK 0xffff0000
#define SQ_LDS_CLK_CTRL__FORCE_CU_ON_SH1__SHIFT 0x10
#define SQ_POWER_THROTTLE__MIN_POWER_MASK 0x3fff
#define SQ_POWER_THROTTLE__MIN_POWER__SHIFT 0x0
#define SQ_POWER_THROTTLE__MAX_POWER_MASK 0x3fff0000
#define SQ_POWER_THROTTLE__MAX_POWER__SHIFT 0x10
#define SQ_POWER_THROTTLE__PHASE_OFFSET_MASK 0xc0000000
#define SQ_POWER_THROTTLE__PHASE_OFFSET__SHIFT 0x1e
#define SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK 0x3fff
#define SQ_POWER_THROTTLE2__MAX_POWER_DELTA__SHIFT 0x0
#define SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK 0x3ff0000
#define SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE__SHIFT 0x10
#define SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK 0x78000000
#define SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO__SHIFT 0x1b
#define SQ_POWER_THROTTLE2__USE_REF_CLOCK_MASK 0x80000000
#define SQ_POWER_THROTTLE2__USE_REF_CLOCK__SHIFT 0x1f
#define SQ_TIME_HI__TIME_MASK 0xffffffff
#define SQ_TIME_HI__TIME__SHIFT 0x0
#define SQ_TIME_LO__TIME_MASK 0xffffffff
#define SQ_TIME_LO__TIME__SHIFT 0x0
#define SQ_THREAD_TRACE_BASE__ADDR_MASK 0xffffffff
#define SQ_THREAD_TRACE_BASE__ADDR__SHIFT 0x0
#define SQ_THREAD_TRACE_BASE2__ADDR_HI_MASK 0xf
#define SQ_THREAD_TRACE_BASE2__ADDR_HI__SHIFT 0x0
#define SQ_THREAD_TRACE_BASE2__ATC_MASK 0x10
#define SQ_THREAD_TRACE_BASE2__ATC__SHIFT 0x4
#define SQ_THREAD_TRACE_SIZE__SIZE_MASK 0x3fffff
#define SQ_THREAD_TRACE_SIZE__SIZE__SHIFT 0x0
#define SQ_THREAD_TRACE_MASK__CU_SEL_MASK 0x1f
#define SQ_THREAD_TRACE_MASK__CU_SEL__SHIFT 0x0
#define SQ_THREAD_TRACE_MASK__SH_SEL_MASK 0x20
#define SQ_THREAD_TRACE_MASK__SH_SEL__SHIFT 0x5
#define SQ_THREAD_TRACE_MASK__REG_STALL_EN_MASK 0x80
#define SQ_THREAD_TRACE_MASK__REG_STALL_EN__SHIFT 0x7
#define SQ_THREAD_TRACE_MASK__SIMD_EN_MASK 0xf00
#define SQ_THREAD_TRACE_MASK__SIMD_EN__SHIFT 0x8
#define SQ_THREAD_TRACE_MASK__VM_ID_MASK_MASK 0x3000
#define SQ_THREAD_TRACE_MASK__VM_ID_MASK__SHIFT 0xc
#define SQ_THREAD_TRACE_MASK__SPI_STALL_EN_MASK 0x4000
#define SQ_THREAD_TRACE_MASK__SPI_STALL_EN__SHIFT 0xe
#define SQ_THREAD_TRACE_MASK__SQ_STALL_EN_MASK 0x8000
#define SQ_THREAD_TRACE_MASK__SQ_STALL_EN__SHIFT 0xf
#define SQ_THREAD_TRACE_MASK__RANDOM_SEED_MASK 0xffff0000
#define SQ_THREAD_TRACE_MASK__RANDOM_SEED__SHIFT 0x10
#define SQ_THREAD_TRACE_USERDATA_0__DATA_MASK 0xffffffff
#define SQ_THREAD_TRACE_USERDATA_0__DATA__SHIFT 0x0
#define SQ_THREAD_TRACE_USERDATA_1__DATA_MASK 0xffffffff
#define SQ_THREAD_TRACE_USERDATA_1__DATA__SHIFT 0x0
#define SQ_THREAD_TRACE_USERDATA_2__DATA_MASK 0xffffffff
#define SQ_THREAD_TRACE_USERDATA_2__DATA__SHIFT 0x0
#define SQ_THREAD_TRACE_USERDATA_3__DATA_MASK 0xffffffff
#define SQ_THREAD_TRACE_USERDATA_3__DATA__SHIFT 0x0
#define SQ_THREAD_TRACE_MODE__MASK_PS_MASK 0x7
#define SQ_THREAD_TRACE_MODE__MASK_PS__SHIFT 0x0
#define SQ_THREAD_TRACE_MODE__MASK_VS_MASK 0x38
#define SQ_THREAD_TRACE_MODE__MASK_VS__SHIFT 0x3
#define SQ_THREAD_TRACE_MODE__MASK_GS_MASK 0x1c0
#define SQ_THREAD_TRACE_MODE__MASK_GS__SHIFT 0x6
#define SQ_THREAD_TRACE_MODE__MASK_ES_MASK 0xe00
#define SQ_THREAD_TRACE_MODE__MASK_ES__SHIFT 0x9
#define SQ_THREAD_TRACE_MODE__MASK_HS_MASK 0x7000
#define SQ_THREAD_TRACE_MODE__MASK_HS__SHIFT 0xc
#define SQ_THREAD_TRACE_MODE__MASK_LS_MASK 0x38000
#define SQ_THREAD_TRACE_MODE__MASK_LS__SHIFT 0xf
#define SQ_THREAD_TRACE_MODE__MASK_CS_MASK 0x1c0000
#define SQ_THREAD_TRACE_MODE__MASK_CS__SHIFT 0x12
#define SQ_THREAD_TRACE_MODE__MODE_MASK 0x600000
#define SQ_THREAD_TRACE_MODE__MODE__SHIFT 0x15
#define SQ_THREAD_TRACE_MODE__CAPTURE_MODE_MASK 0x1800000
#define SQ_THREAD_TRACE_MODE__CAPTURE_MODE__SHIFT 0x17
#define SQ_THREAD_TRACE_MODE__AUTOFLUSH_EN_MASK 0x2000000
#define SQ_THREAD_TRACE_MODE__AUTOFLUSH_EN__SHIFT 0x19
#define SQ_THREAD_TRACE_MODE__PRIV_MASK 0x4000000
#define SQ_THREAD_TRACE_MODE__PRIV__SHIFT 0x1a
#define SQ_THREAD_TRACE_MODE__ISSUE_MASK_MASK 0x18000000
#define SQ_THREAD_TRACE_MODE__ISSUE_MASK__SHIFT 0x1b
#define SQ_THREAD_TRACE_MODE__TEST_MODE_MASK 0x20000000
#define SQ_THREAD_TRACE_MODE__TEST_MODE__SHIFT 0x1d
#define SQ_THREAD_TRACE_MODE__INTERRUPT_EN_MASK 0x40000000
#define SQ_THREAD_TRACE_MODE__INTERRUPT_EN__SHIFT 0x1e
#define SQ_THREAD_TRACE_MODE__WRAP_MASK 0x80000000
#define SQ_THREAD_TRACE_MODE__WRAP__SHIFT 0x1f
#define SQ_THREAD_TRACE_CTRL__RESET_BUFFER_MASK 0x80000000
#define SQ_THREAD_TRACE_CTRL__RESET_BUFFER__SHIFT 0x1f
#define SQ_THREAD_TRACE_TOKEN_MASK__TOKEN_MASK_MASK 0xffff
#define SQ_THREAD_TRACE_TOKEN_MASK__TOKEN_MASK__SHIFT 0x0
#define SQ_THREAD_TRACE_TOKEN_MASK__REG_MASK_MASK 0xff0000
#define SQ_THREAD_TRACE_TOKEN_MASK__REG_MASK__SHIFT 0x10
#define SQ_THREAD_TRACE_TOKEN_MASK__REG_DROP_ON_STALL_MASK 0x1000000
#define SQ_THREAD_TRACE_TOKEN_MASK__REG_DROP_ON_STALL__SHIFT 0x18
#define SQ_THREAD_TRACE_TOKEN_MASK2__INST_MASK_MASK 0xffff
#define SQ_THREAD_TRACE_TOKEN_MASK2__INST_MASK__SHIFT 0x0
#define SQ_THREAD_TRACE_PERF_MASK__SH0_MASK_MASK 0xffff
#define SQ_THREAD_TRACE_PERF_MASK__SH0_MASK__SHIFT 0x0
#define SQ_THREAD_TRACE_PERF_MASK__SH1_MASK_MASK 0xffff0000
#define SQ_THREAD_TRACE_PERF_MASK__SH1_MASK__SHIFT 0x10
#define SQ_THREAD_TRACE_WPTR__WPTR_MASK 0x3fffffff
#define SQ_THREAD_TRACE_WPTR__WPTR__SHIFT 0x0
#define SQ_THREAD_TRACE_WPTR__READ_OFFSET_MASK 0xc0000000
#define SQ_THREAD_TRACE_WPTR__READ_OFFSET__SHIFT 0x1e
#define SQ_THREAD_TRACE_STATUS__FINISH_PENDING_MASK 0x3ff
#define SQ_THREAD_TRACE_STATUS__FINISH_PENDING__SHIFT 0x0
#define SQ_THREAD_TRACE_STATUS__FINISH_DONE_MASK 0x3ff0000
#define SQ_THREAD_TRACE_STATUS__FINISH_DONE__SHIFT 0x10
#define SQ_THREAD_TRACE_STATUS__NEW_BUF_MASK 0x20000000
#define SQ_THREAD_TRACE_STATUS__NEW_BUF__SHIFT 0x1d
#define SQ_THREAD_TRACE_STATUS__BUSY_MASK 0x40000000
#define SQ_THREAD_TRACE_STATUS__BUSY__SHIFT 0x1e
#define SQ_THREAD_TRACE_STATUS__FULL_MASK 0x80000000
#define SQ_THREAD_TRACE_STATUS__FULL__SHIFT 0x1f
#define SQ_THREAD_TRACE_CNTR__CNTR_MASK 0xffffffff
#define SQ_THREAD_TRACE_CNTR__CNTR__SHIFT 0x0
#define SQ_THREAD_TRACE_HIWATER__HIWATER_MASK 0x7
#define SQ_THREAD_TRACE_HIWATER__HIWATER__SHIFT 0x0
#define SQ_LB_CTR_CTRL__START_MASK 0x1
#define SQ_LB_CTR_CTRL__START__SHIFT 0x0
#define SQ_LB_CTR_CTRL__LOAD_MASK 0x2
#define SQ_LB_CTR_CTRL__LOAD__SHIFT 0x1
#define SQ_LB_CTR_CTRL__CLEAR_MASK 0x4
#define SQ_LB_CTR_CTRL__CLEAR__SHIFT 0x2
#define SQ_LB_DATA_ALU_CYCLES__DATA_MASK 0xffffffff
#define SQ_LB_DATA_ALU_CYCLES__DATA__SHIFT 0x0
#define SQ_LB_DATA_TEX_CYCLES__DATA_MASK 0xffffffff
#define SQ_LB_DATA_TEX_CYCLES__DATA__SHIFT 0x0
#define SQ_LB_DATA_ALU_STALLS__DATA_MASK 0xffffffff
#define SQ_LB_DATA_ALU_STALLS__DATA__SHIFT 0x0
#define SQ_LB_DATA_TEX_STALLS__DATA_MASK 0xffffffff
#define SQ_LB_DATA_TEX_STALLS__DATA__SHIFT 0x0
#define SQC_SECDED_CNT__INST_SEC_MASK 0xff
#define SQC_SECDED_CNT__INST_SEC__SHIFT 0x0
#define SQC_SECDED_CNT__INST_DED_MASK 0xff00
#define SQC_SECDED_CNT__INST_DED__SHIFT 0x8
#define SQC_SECDED_CNT__DATA_SEC_MASK 0xff0000
#define SQC_SECDED_CNT__DATA_SEC__SHIFT 0x10
#define SQC_SECDED_CNT__DATA_DED_MASK 0xff000000
#define SQC_SECDED_CNT__DATA_DED__SHIFT 0x18
#define SQ_SEC_CNT__LDS_SEC_MASK 0x3f
#define SQ_SEC_CNT__LDS_SEC__SHIFT 0x0
#define SQ_SEC_CNT__SGPR_SEC_MASK 0x1f00
#define SQ_SEC_CNT__SGPR_SEC__SHIFT 0x8
#define SQ_SEC_CNT__VGPR_SEC_MASK 0x1ff0000
#define SQ_SEC_CNT__VGPR_SEC__SHIFT 0x10
#define SQ_DED_CNT__LDS_DED_MASK 0x3f
#define SQ_DED_CNT__LDS_DED__SHIFT 0x0
#define SQ_DED_CNT__SGPR_DED_MASK 0x1f00
#define SQ_DED_CNT__SGPR_DED__SHIFT 0x8
#define SQ_DED_CNT__VGPR_DED_MASK 0x1ff0000
#define SQ_DED_CNT__VGPR_DED__SHIFT 0x10
#define SQ_DED_INFO__WAVE_ID_MASK 0xf
#define SQ_DED_INFO__WAVE_ID__SHIFT 0x0
#define SQ_DED_INFO__SIMD_ID_MASK 0x30
#define SQ_DED_INFO__SIMD_ID__SHIFT 0x4
#define SQ_DED_INFO__SOURCE_MASK 0x1c0
#define SQ_DED_INFO__SOURCE__SHIFT 0x6
#define SQ_DED_INFO__VM_ID_MASK 0x1e00
#define SQ_DED_INFO__VM_ID__SHIFT 0x9
#define SQ_BUF_RSRC_WORD0__BASE_ADDRESS_MASK 0xffffffff
#define SQ_BUF_RSRC_WORD0__BASE_ADDRESS__SHIFT 0x0
#define SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI_MASK 0xffff
#define SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI__SHIFT 0x0
#define SQ_BUF_RSRC_WORD1__STRIDE_MASK 0x3fff0000
#define SQ_BUF_RSRC_WORD1__STRIDE__SHIFT 0x10
#define SQ_BUF_RSRC_WORD1__CACHE_SWIZZLE_MASK 0x40000000
#define SQ_BUF_RSRC_WORD1__CACHE_SWIZZLE__SHIFT 0x1e
#define SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE_MASK 0x80000000
#define SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE__SHIFT 0x1f
#define SQ_BUF_RSRC_WORD2__NUM_RECORDS_MASK 0xffffffff
#define SQ_BUF_RSRC_WORD2__NUM_RECORDS__SHIFT 0x0
#define SQ_BUF_RSRC_WORD3__DST_SEL_X_MASK 0x7
#define SQ_BUF_RSRC_WORD3__DST_SEL_X__SHIFT 0x0
#define SQ_BUF_RSRC_WORD3__DST_SEL_Y_MASK 0x38
#define SQ_BUF_RSRC_WORD3__DST_SEL_Y__SHIFT 0x3
#define SQ_BUF_RSRC_WORD3__DST_SEL_Z_MASK 0x1c0
#define SQ_BUF_RSRC_WORD3__DST_SEL_Z__SHIFT 0x6
#define SQ_BUF_RSRC_WORD3__DST_SEL_W_MASK 0xe00
#define SQ_BUF_RSRC_WORD3__DST_SEL_W__SHIFT 0x9
#define SQ_BUF_RSRC_WORD3__NUM_FORMAT_MASK 0x7000
#define SQ_BUF_RSRC_WORD3__NUM_FORMAT__SHIFT 0xc
#define SQ_BUF_RSRC_WORD3__DATA_FORMAT_MASK 0x78000
#define SQ_BUF_RSRC_WORD3__DATA_FORMAT__SHIFT 0xf
#define SQ_BUF_RSRC_WORD3__ELEMENT_SIZE_MASK 0x180000
#define SQ_BUF_RSRC_WORD3__ELEMENT_SIZE__SHIFT 0x13
#define SQ_BUF_RSRC_WORD3__INDEX_STRIDE_MASK 0x600000
#define SQ_BUF_RSRC_WORD3__INDEX_STRIDE__SHIFT 0x15
#define SQ_BUF_RSRC_WORD3__ADD_TID_ENABLE_MASK 0x800000
#define SQ_BUF_RSRC_WORD3__ADD_TID_ENABLE__SHIFT 0x17
#define SQ_BUF_RSRC_WORD3__ATC_MASK 0x1000000
#define SQ_BUF_RSRC_WORD3__ATC__SHIFT 0x18
#define SQ_BUF_RSRC_WORD3__HASH_ENABLE_MASK 0x2000000
#define SQ_BUF_RSRC_WORD3__HASH_ENABLE__SHIFT 0x19
#define SQ_BUF_RSRC_WORD3__HEAP_MASK 0x4000000
#define SQ_BUF_RSRC_WORD3__HEAP__SHIFT 0x1a
#define SQ_BUF_RSRC_WORD3__MTYPE_MASK 0x38000000
#define SQ_BUF_RSRC_WORD3__MTYPE__SHIFT 0x1b
#define SQ_BUF_RSRC_WORD3__TYPE_MASK 0xc0000000
#define SQ_BUF_RSRC_WORD3__TYPE__SHIFT 0x1e
#define SQ_IMG_RSRC_WORD0__BASE_ADDRESS_MASK 0xffffffff
#define SQ_IMG_RSRC_WORD0__BASE_ADDRESS__SHIFT 0x0
#define SQ_IMG_RSRC_WORD1__BASE_ADDRESS_HI_MASK 0xff
#define SQ_IMG_RSRC_WORD1__BASE_ADDRESS_HI__SHIFT 0x0
#define SQ_IMG_RSRC_WORD1__MIN_LOD_MASK 0xfff00
#define SQ_IMG_RSRC_WORD1__MIN_LOD__SHIFT 0x8
#define SQ_IMG_RSRC_WORD1__DATA_FORMAT_MASK 0x3f00000
#define SQ_IMG_RSRC_WORD1__DATA_FORMAT__SHIFT 0x14
#define SQ_IMG_RSRC_WORD1__NUM_FORMAT_MASK 0x3c000000
#define SQ_IMG_RSRC_WORD1__NUM_FORMAT__SHIFT 0x1a
#define SQ_IMG_RSRC_WORD1__MTYPE_MASK 0xc0000000
#define SQ_IMG_RSRC_WORD1__MTYPE__SHIFT 0x1e
#define SQ_IMG_RSRC_WORD2__WIDTH_MASK 0x3fff
#define SQ_IMG_RSRC_WORD2__WIDTH__SHIFT 0x0
#define SQ_IMG_RSRC_WORD2__HEIGHT_MASK 0xfffc000
#define SQ_IMG_RSRC_WORD2__HEIGHT__SHIFT 0xe
#define SQ_IMG_RSRC_WORD2__PERF_MOD_MASK 0x70000000
#define SQ_IMG_RSRC_WORD2__PERF_MOD__SHIFT 0x1c
#define SQ_IMG_RSRC_WORD2__INTERLACED_MASK 0x80000000
#define SQ_IMG_RSRC_WORD2__INTERLACED__SHIFT 0x1f
#define SQ_IMG_RSRC_WORD3__DST_SEL_X_MASK 0x7
#define SQ_IMG_RSRC_WORD3__DST_SEL_X__SHIFT 0x0
#define SQ_IMG_RSRC_WORD3__DST_SEL_Y_MASK 0x38
#define SQ_IMG_RSRC_WORD3__DST_SEL_Y__SHIFT 0x3
#define SQ_IMG_RSRC_WORD3__DST_SEL_Z_MASK 0x1c0
#define SQ_IMG_RSRC_WORD3__DST_SEL_Z__SHIFT 0x6
#define SQ_IMG_RSRC_WORD3__DST_SEL_W_MASK 0xe00
#define SQ_IMG_RSRC_WORD3__DST_SEL_W__SHIFT 0x9
#define SQ_IMG_RSRC_WORD3__BASE_LEVEL_MASK 0xf000
#define SQ_IMG_RSRC_WORD3__BASE_LEVEL__SHIFT 0xc
#define SQ_IMG_RSRC_WORD3__LAST_LEVEL_MASK 0xf0000
#define SQ_IMG_RSRC_WORD3__LAST_LEVEL__SHIFT 0x10
#define SQ_IMG_RSRC_WORD3__TILING_INDEX_MASK 0x1f00000
#define SQ_IMG_RSRC_WORD3__TILING_INDEX__SHIFT 0x14
#define SQ_IMG_RSRC_WORD3__POW2_PAD_MASK 0x2000000
#define SQ_IMG_RSRC_WORD3__POW2_PAD__SHIFT 0x19
#define SQ_IMG_RSRC_WORD3__MTYPE_MASK 0x4000000
#define SQ_IMG_RSRC_WORD3__MTYPE__SHIFT 0x1a
#define SQ_IMG_RSRC_WORD3__ATC_MASK 0x8000000
#define SQ_IMG_RSRC_WORD3__ATC__SHIFT 0x1b
#define SQ_IMG_RSRC_WORD3__TYPE_MASK 0xf0000000
#define SQ_IMG_RSRC_WORD3__TYPE__SHIFT 0x1c
#define SQ_IMG_RSRC_WORD4__DEPTH_MASK 0x1fff
#define SQ_IMG_RSRC_WORD4__DEPTH__SHIFT 0x0
#define SQ_IMG_RSRC_WORD4__PITCH_MASK 0x7ffe000
#define SQ_IMG_RSRC_WORD4__PITCH__SHIFT 0xd
#define SQ_IMG_RSRC_WORD5__BASE_ARRAY_MASK 0x1fff
#define SQ_IMG_RSRC_WORD5__BASE_ARRAY__SHIFT 0x0
#define SQ_IMG_RSRC_WORD5__LAST_ARRAY_MASK 0x3ffe000
#define SQ_IMG_RSRC_WORD5__LAST_ARRAY__SHIFT 0xd
#define SQ_IMG_RSRC_WORD6__MIN_LOD_WARN_MASK 0xfff
#define SQ_IMG_RSRC_WORD6__MIN_LOD_WARN__SHIFT 0x0
#define SQ_IMG_RSRC_WORD6__COUNTER_BANK_ID_MASK 0xff000
#define SQ_IMG_RSRC_WORD6__COUNTER_BANK_ID__SHIFT 0xc
#define SQ_IMG_RSRC_WORD6__LOD_HDW_CNT_EN_MASK 0x100000
#define SQ_IMG_RSRC_WORD6__LOD_HDW_CNT_EN__SHIFT 0x14
#define SQ_IMG_RSRC_WORD6__UNUNSED_MASK 0xffe00000
#define SQ_IMG_RSRC_WORD6__UNUNSED__SHIFT 0x15
#define SQ_IMG_RSRC_WORD7__UNUNSED_MASK 0xffffffff
#define SQ_IMG_RSRC_WORD7__UNUNSED__SHIFT 0x0
#define SQ_IMG_SAMP_WORD0__CLAMP_X_MASK 0x7
#define SQ_IMG_SAMP_WORD0__CLAMP_X__SHIFT 0x0
#define SQ_IMG_SAMP_WORD0__CLAMP_Y_MASK 0x38
#define SQ_IMG_SAMP_WORD0__CLAMP_Y__SHIFT 0x3
#define SQ_IMG_SAMP_WORD0__CLAMP_Z_MASK 0x1c0
#define SQ_IMG_SAMP_WORD0__CLAMP_Z__SHIFT 0x6
#define SQ_IMG_SAMP_WORD0__MAX_ANISO_RATIO_MASK 0xe00
#define SQ_IMG_SAMP_WORD0__MAX_ANISO_RATIO__SHIFT 0x9
#define SQ_IMG_SAMP_WORD0__DEPTH_COMPARE_FUNC_MASK 0x7000
#define SQ_IMG_SAMP_WORD0__DEPTH_COMPARE_FUNC__SHIFT 0xc
#define SQ_IMG_SAMP_WORD0__FORCE_UNNORMALIZED_MASK 0x8000
#define SQ_IMG_SAMP_WORD0__FORCE_UNNORMALIZED__SHIFT 0xf
#define SQ_IMG_SAMP_WORD0__ANISO_THRESHOLD_MASK 0x70000
#define SQ_IMG_SAMP_WORD0__ANISO_THRESHOLD__SHIFT 0x10
#define SQ_IMG_SAMP_WORD0__MC_COORD_TRUNC_MASK 0x80000
#define SQ_IMG_SAMP_WORD0__MC_COORD_TRUNC__SHIFT 0x13
#define SQ_IMG_SAMP_WORD0__FORCE_DEGAMMA_MASK 0x100000
#define SQ_IMG_SAMP_WORD0__FORCE_DEGAMMA__SHIFT 0x14
#define SQ_IMG_SAMP_WORD0__ANISO_BIAS_MASK 0x7e00000
#define SQ_IMG_SAMP_WORD0__ANISO_BIAS__SHIFT 0x15
#define SQ_IMG_SAMP_WORD0__TRUNC_COORD_MASK 0x8000000
#define SQ_IMG_SAMP_WORD0__TRUNC_COORD__SHIFT 0x1b
#define SQ_IMG_SAMP_WORD0__DISABLE_CUBE_WRAP_MASK 0x10000000
#define SQ_IMG_SAMP_WORD0__DISABLE_CUBE_WRAP__SHIFT 0x1c
#define SQ_IMG_SAMP_WORD0__FILTER_MODE_MASK 0x60000000
#define SQ_IMG_SAMP_WORD0__FILTER_MODE__SHIFT 0x1d
#define SQ_IMG_SAMP_WORD1__MIN_LOD_MASK 0xfff
#define SQ_IMG_SAMP_WORD1__MIN_LOD__SHIFT 0x0
#define SQ_IMG_SAMP_WORD1__MAX_LOD_MASK 0xfff000
#define SQ_IMG_SAMP_WORD1__MAX_LOD__SHIFT 0xc
#define SQ_IMG_SAMP_WORD1__PERF_MIP_MASK 0xf000000
#define SQ_IMG_SAMP_WORD1__PERF_MIP__SHIFT 0x18
#define SQ_IMG_SAMP_WORD1__PERF_Z_MASK 0xf0000000
#define SQ_IMG_SAMP_WORD1__PERF_Z__SHIFT 0x1c
#define SQ_IMG_SAMP_WORD2__LOD_BIAS_MASK 0x3fff
#define SQ_IMG_SAMP_WORD2__LOD_BIAS__SHIFT 0x0
#define SQ_IMG_SAMP_WORD2__LOD_BIAS_SEC_MASK 0xfc000
#define SQ_IMG_SAMP_WORD2__LOD_BIAS_SEC__SHIFT 0xe
#define SQ_IMG_SAMP_WORD2__XY_MAG_FILTER_MASK 0x300000
#define SQ_IMG_SAMP_WORD2__XY_MAG_FILTER__SHIFT 0x14
#define SQ_IMG_SAMP_WORD2__XY_MIN_FILTER_MASK 0xc00000
#define SQ_IMG_SAMP_WORD2__XY_MIN_FILTER__SHIFT 0x16
#define SQ_IMG_SAMP_WORD2__Z_FILTER_MASK 0x3000000
#define SQ_IMG_SAMP_WORD2__Z_FILTER__SHIFT 0x18
#define SQ_IMG_SAMP_WORD2__MIP_FILTER_MASK 0xc000000
#define SQ_IMG_SAMP_WORD2__MIP_FILTER__SHIFT 0x1a
#define SQ_IMG_SAMP_WORD2__MIP_POINT_PRECLAMP_MASK 0x10000000
#define SQ_IMG_SAMP_WORD2__MIP_POINT_PRECLAMP__SHIFT 0x1c
#define SQ_IMG_SAMP_WORD2__DISABLE_LSB_CEIL_MASK 0x20000000
#define SQ_IMG_SAMP_WORD2__DISABLE_LSB_CEIL__SHIFT 0x1d
#define SQ_IMG_SAMP_WORD2__FILTER_PREC_FIX_MASK 0x40000000
#define SQ_IMG_SAMP_WORD2__FILTER_PREC_FIX__SHIFT 0x1e
#define SQ_IMG_SAMP_WORD3__BORDER_COLOR_PTR_MASK 0xfff
#define SQ_IMG_SAMP_WORD3__BORDER_COLOR_PTR__SHIFT 0x0
#define SQ_IMG_SAMP_WORD3__BORDER_COLOR_TYPE_MASK 0xc0000000
#define SQ_IMG_SAMP_WORD3__BORDER_COLOR_TYPE__SHIFT 0x1e
#define SQ_FLAT_SCRATCH_WORD0__SIZE_MASK 0x7ffff
#define SQ_FLAT_SCRATCH_WORD0__SIZE__SHIFT 0x0
#define SQ_FLAT_SCRATCH_WORD1__OFFSET_MASK 0xffffff
#define SQ_FLAT_SCRATCH_WORD1__OFFSET__SHIFT 0x0
#define SQ_IND_INDEX__WAVE_ID_MASK 0xf
#define SQ_IND_INDEX__WAVE_ID__SHIFT 0x0
#define SQ_IND_INDEX__SIMD_ID_MASK 0x30
#define SQ_IND_INDEX__SIMD_ID__SHIFT 0x4
#define SQ_IND_INDEX__THREAD_ID_MASK 0xfc0
#define SQ_IND_INDEX__THREAD_ID__SHIFT 0x6
#define SQ_IND_INDEX__AUTO_INCR_MASK 0x1000
#define SQ_IND_INDEX__AUTO_INCR__SHIFT 0xc
#define SQ_IND_INDEX__FORCE_READ_MASK 0x2000
#define SQ_IND_INDEX__FORCE_READ__SHIFT 0xd
#define SQ_IND_INDEX__READ_TIMEOUT_MASK 0x4000
#define SQ_IND_INDEX__READ_TIMEOUT__SHIFT 0xe
#define SQ_IND_INDEX__UNINDEXED_MASK 0x8000
#define SQ_IND_INDEX__UNINDEXED__SHIFT 0xf
#define SQ_IND_INDEX__INDEX_MASK 0xffff0000
#define SQ_IND_INDEX__INDEX__SHIFT 0x10
#define SQ_CMD__CMD_MASK 0x7
#define SQ_CMD__CMD__SHIFT 0x0
#define SQ_CMD__MODE_MASK 0x70
#define SQ_CMD__MODE__SHIFT 0x4
#define SQ_CMD__CHECK_VMID_MASK 0x80
#define SQ_CMD__CHECK_VMID__SHIFT 0x7
#define SQ_CMD__TRAP_ID_MASK 0x700
#define SQ_CMD__TRAP_ID__SHIFT 0x8
#define SQ_CMD__WAVE_ID_MASK 0xf0000
#define SQ_CMD__WAVE_ID__SHIFT 0x10
#define SQ_CMD__SIMD_ID_MASK 0x300000
#define SQ_CMD__SIMD_ID__SHIFT 0x14
#define SQ_CMD__QUEUE_ID_MASK 0x7000000
#define SQ_CMD__QUEUE_ID__SHIFT 0x18
#define SQ_CMD__VM_ID_MASK 0xf0000000
#define SQ_CMD__VM_ID__SHIFT 0x1c
#define SQ_IND_DATA__DATA_MASK 0xffffffff
#define SQ_IND_DATA__DATA__SHIFT 0x0
#define SQ_REG_TIMESTAMP__TIMESTAMP_MASK 0xff
#define SQ_REG_TIMESTAMP__TIMESTAMP__SHIFT 0x0
#define SQ_CMD_TIMESTAMP__TIMESTAMP_MASK 0xff
#define SQ_CMD_TIMESTAMP__TIMESTAMP__SHIFT 0x0
#define SQ_HV_VMID_CTRL__DEFAULT_VMID_MASK 0xf
#define SQ_HV_VMID_CTRL__DEFAULT_VMID__SHIFT 0x0
#define SQ_HV_VMID_CTRL__ALLOWED_VMID_MASK_MASK 0xffff0
#define SQ_HV_VMID_CTRL__ALLOWED_VMID_MASK__SHIFT 0x4
#define SQ_WAVE_INST_DW0__INST_DW0_MASK 0xffffffff
#define SQ_WAVE_INST_DW0__INST_DW0__SHIFT 0x0
#define SQ_WAVE_INST_DW1__INST_DW1_MASK 0xffffffff
#define SQ_WAVE_INST_DW1__INST_DW1__SHIFT 0x0
#define SQ_WAVE_PC_LO__PC_LO_MASK 0xffffffff
#define SQ_WAVE_PC_LO__PC_LO__SHIFT 0x0
#define SQ_WAVE_PC_HI__PC_HI_MASK 0xff
#define SQ_WAVE_PC_HI__PC_HI__SHIFT 0x0
#define SQ_WAVE_IB_DBG0__IBUF_ST_MASK 0x7
#define SQ_WAVE_IB_DBG0__IBUF_ST__SHIFT 0x0
#define SQ_WAVE_IB_DBG0__PC_INVALID_MASK 0x8
#define SQ_WAVE_IB_DBG0__PC_INVALID__SHIFT 0x3
#define SQ_WAVE_IB_DBG0__NEED_NEXT_DW_MASK 0x10
#define SQ_WAVE_IB_DBG0__NEED_NEXT_DW__SHIFT 0x4
#define SQ_WAVE_IB_DBG0__NO_PREFETCH_CNT_MASK 0xe0
#define SQ_WAVE_IB_DBG0__NO_PREFETCH_CNT__SHIFT 0x5
#define SQ_WAVE_IB_DBG0__IBUF_RPTR_MASK 0x300
#define SQ_WAVE_IB_DBG0__IBUF_RPTR__SHIFT 0x8
#define SQ_WAVE_IB_DBG0__IBUF_WPTR_MASK 0xc00
#define SQ_WAVE_IB_DBG0__IBUF_WPTR__SHIFT 0xa
#define SQ_WAVE_IB_DBG0__INST_STR_ST_MASK 0x70000
#define SQ_WAVE_IB_DBG0__INST_STR_ST__SHIFT 0x10
#define SQ_WAVE_IB_DBG0__MISC_CNT_MASK 0x380000
#define SQ_WAVE_IB_DBG0__MISC_CNT__SHIFT 0x13
#define SQ_WAVE_IB_DBG0__ECC_ST_MASK 0xc00000
#define SQ_WAVE_IB_DBG0__ECC_ST__SHIFT 0x16
#define SQ_WAVE_IB_DBG0__IS_HYB_MASK 0x1000000
#define SQ_WAVE_IB_DBG0__IS_HYB__SHIFT 0x18
#define SQ_WAVE_IB_DBG0__HYB_CNT_MASK 0x6000000
#define SQ_WAVE_IB_DBG0__HYB_CNT__SHIFT 0x19
#define SQ_WAVE_IB_DBG0__KILL_MASK 0x8000000
#define SQ_WAVE_IB_DBG0__KILL__SHIFT 0x1b
#define SQ_WAVE_IB_DBG0__NEED_KILL_IFETCH_MASK 0x10000000
#define SQ_WAVE_IB_DBG0__NEED_KILL_IFETCH__SHIFT 0x1c
#define SQ_WAVE_EXEC_LO__EXEC_LO_MASK 0xffffffff
#define SQ_WAVE_EXEC_LO__EXEC_LO__SHIFT 0x0
#define SQ_WAVE_EXEC_HI__EXEC_HI_MASK 0xffffffff
#define SQ_WAVE_EXEC_HI__EXEC_HI__SHIFT 0x0
#define SQ_WAVE_STATUS__SCC_MASK 0x1
#define SQ_WAVE_STATUS__SCC__SHIFT 0x0
#define SQ_WAVE_STATUS__SPI_PRIO_MASK 0x6
#define SQ_WAVE_STATUS__SPI_PRIO__SHIFT 0x1
#define SQ_WAVE_STATUS__WAVE_PRIO_MASK 0x18
#define SQ_WAVE_STATUS__WAVE_PRIO__SHIFT 0x3
#define SQ_WAVE_STATUS__PRIV_MASK 0x20
#define SQ_WAVE_STATUS__PRIV__SHIFT 0x5
#define SQ_WAVE_STATUS__TRAP_EN_MASK 0x40
#define SQ_WAVE_STATUS__TRAP_EN__SHIFT 0x6
#define SQ_WAVE_STATUS__TTRACE_EN_MASK 0x80
#define SQ_WAVE_STATUS__TTRACE_EN__SHIFT 0x7
#define SQ_WAVE_STATUS__EXPORT_RDY_MASK 0x100
#define SQ_WAVE_STATUS__EXPORT_RDY__SHIFT 0x8
#define SQ_WAVE_STATUS__EXECZ_MASK 0x200
#define SQ_WAVE_STATUS__EXECZ__SHIFT 0x9
#define SQ_WAVE_STATUS__VCCZ_MASK 0x400
#define SQ_WAVE_STATUS__VCCZ__SHIFT 0xa
#define SQ_WAVE_STATUS__IN_TG_MASK 0x800
#define SQ_WAVE_STATUS__IN_TG__SHIFT 0xb
#define SQ_WAVE_STATUS__IN_BARRIER_MASK 0x1000
#define SQ_WAVE_STATUS__IN_BARRIER__SHIFT 0xc
#define SQ_WAVE_STATUS__HALT_MASK 0x2000
#define SQ_WAVE_STATUS__HALT__SHIFT 0xd
#define SQ_WAVE_STATUS__TRAP_MASK 0x4000
#define SQ_WAVE_STATUS__TRAP__SHIFT 0xe
#define SQ_WAVE_STATUS__TTRACE_CU_EN_MASK 0x8000
#define SQ_WAVE_STATUS__TTRACE_CU_EN__SHIFT 0xf
#define SQ_WAVE_STATUS__VALID_MASK 0x10000
#define SQ_WAVE_STATUS__VALID__SHIFT 0x10
#define SQ_WAVE_STATUS__ECC_ERR_MASK 0x20000
#define SQ_WAVE_STATUS__ECC_ERR__SHIFT 0x11
#define SQ_WAVE_STATUS__SKIP_EXPORT_MASK 0x40000
#define SQ_WAVE_STATUS__SKIP_EXPORT__SHIFT 0x12
#define SQ_WAVE_STATUS__PERF_EN_MASK 0x80000
#define SQ_WAVE_STATUS__PERF_EN__SHIFT 0x13
#define SQ_WAVE_STATUS__COND_DBG_USER_MASK 0x100000
#define SQ_WAVE_STATUS__COND_DBG_USER__SHIFT 0x14
#define SQ_WAVE_STATUS__COND_DBG_SYS_MASK 0x200000
#define SQ_WAVE_STATUS__COND_DBG_SYS__SHIFT 0x15
#define SQ_WAVE_STATUS__DATA_ATC_MASK 0x400000
#define SQ_WAVE_STATUS__DATA_ATC__SHIFT 0x16
#define SQ_WAVE_STATUS__INST_ATC_MASK 0x800000
#define SQ_WAVE_STATUS__INST_ATC__SHIFT 0x17
#define SQ_WAVE_STATUS__DISPATCH_CACHE_CTRL_MASK 0x7000000
#define SQ_WAVE_STATUS__DISPATCH_CACHE_CTRL__SHIFT 0x18
#define SQ_WAVE_STATUS__MUST_EXPORT_MASK 0x8000000
#define SQ_WAVE_STATUS__MUST_EXPORT__SHIFT 0x1b
#define SQ_WAVE_MODE__FP_ROUND_MASK 0xf
#define SQ_WAVE_MODE__FP_ROUND__SHIFT 0x0
#define SQ_WAVE_MODE__FP_DENORM_MASK 0xf0
#define SQ_WAVE_MODE__FP_DENORM__SHIFT 0x4
#define SQ_WAVE_MODE__DX10_CLAMP_MASK 0x100
#define SQ_WAVE_MODE__DX10_CLAMP__SHIFT 0x8
#define SQ_WAVE_MODE__IEEE_MASK 0x200
#define SQ_WAVE_MODE__IEEE__SHIFT 0x9
#define SQ_WAVE_MODE__LOD_CLAMPED_MASK 0x400
#define SQ_WAVE_MODE__LOD_CLAMPED__SHIFT 0xa
#define SQ_WAVE_MODE__DEBUG_EN_MASK 0x800
#define SQ_WAVE_MODE__DEBUG_EN__SHIFT 0xb
#define SQ_WAVE_MODE__EXCP_EN_MASK 0x1ff000
#define SQ_WAVE_MODE__EXCP_EN__SHIFT 0xc
#define SQ_WAVE_MODE__VSKIP_MASK 0x10000000
#define SQ_WAVE_MODE__VSKIP__SHIFT 0x1c
#define SQ_WAVE_MODE__CSP_MASK 0xe0000000
#define SQ_WAVE_MODE__CSP__SHIFT 0x1d
#define SQ_WAVE_TRAPSTS__EXCP_MASK 0x1ff
#define SQ_WAVE_TRAPSTS__EXCP__SHIFT 0x0
#define SQ_WAVE_TRAPSTS__EXCP_CYCLE_MASK 0x3f0000
#define SQ_WAVE_TRAPSTS__EXCP_CYCLE__SHIFT 0x10
#define SQ_WAVE_TRAPSTS__DP_RATE_MASK 0xe0000000
#define SQ_WAVE_TRAPSTS__DP_RATE__SHIFT 0x1d
#define SQ_WAVE_HW_ID__WAVE_ID_MASK 0xf
#define SQ_WAVE_HW_ID__WAVE_ID__SHIFT 0x0
#define SQ_WAVE_HW_ID__SIMD_ID_MASK 0x30
#define SQ_WAVE_HW_ID__SIMD_ID__SHIFT 0x4
#define SQ_WAVE_HW_ID__PIPE_ID_MASK 0xc0
#define SQ_WAVE_HW_ID__PIPE_ID__SHIFT 0x6
#define SQ_WAVE_HW_ID__CU_ID_MASK 0xf00
#define SQ_WAVE_HW_ID__CU_ID__SHIFT 0x8
#define SQ_WAVE_HW_ID__SH_ID_MASK 0x1000
#define SQ_WAVE_HW_ID__SH_ID__SHIFT 0xc
#define SQ_WAVE_HW_ID__SE_ID_MASK 0x6000
#define SQ_WAVE_HW_ID__SE_ID__SHIFT 0xd
#define SQ_WAVE_HW_ID__TG_ID_MASK 0xf0000
#define SQ_WAVE_HW_ID__TG_ID__SHIFT 0x10
#define SQ_WAVE_HW_ID__VM_ID_MASK 0xf00000
#define SQ_WAVE_HW_ID__VM_ID__SHIFT 0x14
#define SQ_WAVE_HW_ID__QUEUE_ID_MASK 0x7000000
#define SQ_WAVE_HW_ID__QUEUE_ID__SHIFT 0x18
#define SQ_WAVE_HW_ID__STATE_ID_MASK 0x38000000
#define SQ_WAVE_HW_ID__STATE_ID__SHIFT 0x1b
#define SQ_WAVE_HW_ID__ME_ID_MASK 0xc0000000
#define SQ_WAVE_HW_ID__ME_ID__SHIFT 0x1e
#define SQ_WAVE_GPR_ALLOC__VGPR_BASE_MASK 0x3f
#define SQ_WAVE_GPR_ALLOC__VGPR_BASE__SHIFT 0x0
#define SQ_WAVE_GPR_ALLOC__VGPR_SIZE_MASK 0x3f00
#define SQ_WAVE_GPR_ALLOC__VGPR_SIZE__SHIFT 0x8
#define SQ_WAVE_GPR_ALLOC__SGPR_BASE_MASK 0x3f0000
#define SQ_WAVE_GPR_ALLOC__SGPR_BASE__SHIFT 0x10
#define SQ_WAVE_GPR_ALLOC__SGPR_SIZE_MASK 0xf000000
#define SQ_WAVE_GPR_ALLOC__SGPR_SIZE__SHIFT 0x18
#define SQ_WAVE_LDS_ALLOC__LDS_BASE_MASK 0xff
#define SQ_WAVE_LDS_ALLOC__LDS_BASE__SHIFT 0x0
#define SQ_WAVE_LDS_ALLOC__LDS_SIZE_MASK 0x1ff000
#define SQ_WAVE_LDS_ALLOC__LDS_SIZE__SHIFT 0xc
#define SQ_WAVE_IB_STS__VM_CNT_MASK 0xf
#define SQ_WAVE_IB_STS__VM_CNT__SHIFT 0x0
#define SQ_WAVE_IB_STS__EXP_CNT_MASK 0x70
#define SQ_WAVE_IB_STS__EXP_CNT__SHIFT 0x4
#define SQ_WAVE_IB_STS__LGKM_CNT_MASK 0xf00
#define SQ_WAVE_IB_STS__LGKM_CNT__SHIFT 0x8
#define SQ_WAVE_IB_STS__VALU_CNT_MASK 0x7000
#define SQ_WAVE_IB_STS__VALU_CNT__SHIFT 0xc
#define SQ_WAVE_M0__M0_MASK 0xffffffff
#define SQ_WAVE_M0__M0__SHIFT 0x0
#define SQ_WAVE_TBA_LO__ADDR_LO_MASK 0xffffffff
#define SQ_WAVE_TBA_LO__ADDR_LO__SHIFT 0x0
#define SQ_WAVE_TBA_HI__ADDR_HI_MASK 0xff
#define SQ_WAVE_TBA_HI__ADDR_HI__SHIFT 0x0
#define SQ_WAVE_TMA_LO__ADDR_LO_MASK 0xffffffff
#define SQ_WAVE_TMA_LO__ADDR_LO__SHIFT 0x0
#define SQ_WAVE_TMA_HI__ADDR_HI_MASK 0xff
#define SQ_WAVE_TMA_HI__ADDR_HI__SHIFT 0x0
#define SQ_WAVE_TTMP0__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP0__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP1__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP1__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP2__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP2__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP3__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP3__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP4__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP4__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP5__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP5__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP6__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP6__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP7__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP7__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP8__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP8__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP9__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP9__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP10__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP10__DATA__SHIFT 0x0
#define SQ_WAVE_TTMP11__DATA_MASK 0xffffffff
#define SQ_WAVE_TTMP11__DATA__SHIFT 0x0
#define SQ_DEBUG_STS_GLOBAL__BUSY_MASK 0x1
#define SQ_DEBUG_STS_GLOBAL__BUSY__SHIFT 0x0
#define SQ_DEBUG_STS_GLOBAL__INTERRUPT_MSG_BUSY_MASK 0x2
#define SQ_DEBUG_STS_GLOBAL__INTERRUPT_MSG_BUSY__SHIFT 0x1
#define SQ_DEBUG_STS_GLOBAL__WAVE_LEVEL_SH0_MASK 0xfff0
#define SQ_DEBUG_STS_GLOBAL__WAVE_LEVEL_SH0__SHIFT 0x4
#define SQ_DEBUG_STS_GLOBAL__WAVE_LEVEL_SH1_MASK 0xfff0000
#define SQ_DEBUG_STS_GLOBAL__WAVE_LEVEL_SH1__SHIFT 0x10
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0_MASK 0xff
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0__SHIFT 0x0
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX1_MASK 0xff00
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX1__SHIFT 0x8
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_IMMED_MASK 0xff0000
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_IMMED__SHIFT 0x10
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_HOST_MASK 0xff000000
#define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_HOST__SHIFT 0x18
#define SQ_DEBUG_STS_GLOBAL3__FIFO_LEVEL_HOST_CMD_MASK 0xf
#define SQ_DEBUG_STS_GLOBAL3__FIFO_LEVEL_HOST_CMD__SHIFT 0x0
#define SQ_DEBUG_STS_GLOBAL3__FIFO_LEVEL_HOST_REG_MASK 0xf0
#define SQ_DEBUG_STS_GLOBAL3__FIFO_LEVEL_HOST_REG__SHIFT 0x4
#define SQ_DEBUG_STS_LOCAL__BUSY_MASK 0x1
#define SQ_DEBUG_STS_LOCAL__BUSY__SHIFT 0x0
#define SQ_DEBUG_STS_LOCAL__WAVE_LEVEL_MASK 0x3f0
#define SQ_DEBUG_STS_LOCAL__WAVE_LEVEL__SHIFT 0x4
#define SQ_DEBUG_CTRL_LOCAL__UNUSED_MASK 0xff
#define SQ_DEBUG_CTRL_LOCAL__UNUSED__SHIFT 0x0
#define SH_MEM_BASES__PRIVATE_BASE_MASK 0xffff
#define SH_MEM_BASES__PRIVATE_BASE__SHIFT 0x0
#define SH_MEM_BASES__SHARED_BASE_MASK 0xffff0000
#define SH_MEM_BASES__SHARED_BASE__SHIFT 0x10
#define SH_MEM_APE1_BASE__BASE_MASK 0xffffffff
#define SH_MEM_APE1_BASE__BASE__SHIFT 0x0
#define SH_MEM_APE1_LIMIT__LIMIT_MASK 0xffffffff
#define SH_MEM_APE1_LIMIT__LIMIT__SHIFT 0x0
#define SH_MEM_CONFIG__PTR32_MASK 0x1
#define SH_MEM_CONFIG__PTR32__SHIFT 0x0
#define SH_MEM_CONFIG__PRIVATE_ATC_MASK 0x2
#define SH_MEM_CONFIG__PRIVATE_ATC__SHIFT 0x1
#define SH_MEM_CONFIG__ALIGNMENT_MODE_MASK 0xc
#define SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT 0x2
#define SH_MEM_CONFIG__DEFAULT_MTYPE_MASK 0x70
#define SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT 0x4
#define SH_MEM_CONFIG__APE1_MTYPE_MASK 0x380
#define SH_MEM_CONFIG__APE1_MTYPE__SHIFT 0x7
#define SQC_POLICY__DATA_L1_POLICY_0_MASK 0x1
#define SQC_POLICY__DATA_L1_POLICY_0__SHIFT 0x0
#define SQC_POLICY__DATA_L1_POLICY_1_MASK 0x2
#define SQC_POLICY__DATA_L1_POLICY_1__SHIFT 0x1
#define SQC_POLICY__DATA_L1_POLICY_2_MASK 0x4
#define SQC_POLICY__DATA_L1_POLICY_2__SHIFT 0x2
#define SQC_POLICY__DATA_L1_POLICY_3_MASK 0x8
#define SQC_POLICY__DATA_L1_POLICY_3__SHIFT 0x3
#define SQC_POLICY__DATA_L1_POLICY_4_MASK 0x10
#define SQC_POLICY__DATA_L1_POLICY_4__SHIFT 0x4
#define SQC_POLICY__DATA_L1_POLICY_5_MASK 0x20
#define SQC_POLICY__DATA_L1_POLICY_5__SHIFT 0x5
#define SQC_POLICY__DATA_L1_POLICY_6_MASK 0x40
#define SQC_POLICY__DATA_L1_POLICY_6__SHIFT 0x6
#define SQC_POLICY__DATA_L1_POLICY_7_MASK 0x80
#define SQC_POLICY__DATA_L1_POLICY_7__SHIFT 0x7
#define SQC_POLICY__DATA_L2_POLICY_0_MASK 0x300
#define SQC_POLICY__DATA_L2_POLICY_0__SHIFT 0x8
#define SQC_POLICY__DATA_L2_POLICY_1_MASK 0xc00
#define SQC_POLICY__DATA_L2_POLICY_1__SHIFT 0xa
#define SQC_POLICY__DATA_L2_POLICY_2_MASK 0x3000
#define SQC_POLICY__DATA_L2_POLICY_2__SHIFT 0xc
#define SQC_POLICY__DATA_L2_POLICY_3_MASK 0xc000
#define SQC_POLICY__DATA_L2_POLICY_3__SHIFT 0xe
#define SQC_POLICY__DATA_L2_POLICY_4_MASK 0x30000
#define SQC_POLICY__DATA_L2_POLICY_4__SHIFT 0x10
#define SQC_POLICY__DATA_L2_POLICY_5_MASK 0xc0000
#define SQC_POLICY__DATA_L2_POLICY_5__SHIFT 0x12
#define SQC_POLICY__DATA_L2_POLICY_6_MASK 0x300000
#define SQC_POLICY__DATA_L2_POLICY_6__SHIFT 0x14
#define SQC_POLICY__DATA_L2_POLICY_7_MASK 0xc00000
#define SQC_POLICY__DATA_L2_POLICY_7__SHIFT 0x16
#define SQC_POLICY__INST_L2_POLICY_MASK 0x3000000
#define SQC_POLICY__INST_L2_POLICY__SHIFT 0x18
#define SQC_VOLATILE__DATA_L1_MASK 0xf
#define SQC_VOLATILE__DATA_L1__SHIFT 0x0
#define SQC_VOLATILE__DATA_L2_MASK 0xf0
#define SQC_VOLATILE__DATA_L2__SHIFT 0x4
#define SQC_VOLATILE__INST_L2_MASK 0x100
#define SQC_VOLATILE__INST_L2__SHIFT 0x8
#define SQ_THREAD_TRACE_WORD_CMN__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_CMN__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_CMN__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_CMN__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_INST__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_INST__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_INST__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_INST__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_INST__WAVE_ID_MASK 0x1e0
#define SQ_THREAD_TRACE_WORD_INST__WAVE_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_INST__SIMD_ID_MASK 0x600
#define SQ_THREAD_TRACE_WORD_INST__SIMD_ID__SHIFT 0x9
#define SQ_THREAD_TRACE_WORD_INST__SIZE_MASK 0x800
#define SQ_THREAD_TRACE_WORD_INST__SIZE__SHIFT 0xb
#define SQ_THREAD_TRACE_WORD_INST__INST_TYPE_MASK 0xf000
#define SQ_THREAD_TRACE_WORD_INST__INST_TYPE__SHIFT 0xc
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__WAVE_ID_MASK 0x1e0
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__WAVE_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__SIMD_ID_MASK 0x600
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__SIMD_ID__SHIFT 0x9
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__PC_LO_MASK 0xffff0000
#define SQ_THREAD_TRACE_WORD_INST_PC_1_OF_2__PC_LO__SHIFT 0x10
#define SQ_THREAD_TRACE_WORD_INST_PC_2_OF_2__PC_HI_MASK 0xffffff
#define SQ_THREAD_TRACE_WORD_INST_PC_2_OF_2__PC_HI__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__SH_ID_MASK 0x20
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__SH_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__CU_ID_MASK 0x3c0
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__CU_ID__SHIFT 0x6
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__WAVE_ID_MASK 0x3c00
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__WAVE_ID__SHIFT 0xa
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__SIMD_ID_MASK 0xc000
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__SIMD_ID__SHIFT 0xe
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__DATA_LO_MASK 0xffff0000
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_1_OF_2__DATA_LO__SHIFT 0x10
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_2_OF_2__DATA_HI_MASK 0xffff
#define SQ_THREAD_TRACE_WORD_INST_USERDATA_2_OF_2__DATA_HI__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2__TIME_LO_MASK 0xffff0000
#define SQ_THREAD_TRACE_WORD_TIMESTAMP_1_OF_2__TIME_LO__SHIFT 0x10
#define SQ_THREAD_TRACE_WORD_TIMESTAMP_2_OF_2__TIME_HI_MASK 0xffffffff
#define SQ_THREAD_TRACE_WORD_TIMESTAMP_2_OF_2__TIME_HI__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_WAVE__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_WAVE__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_WAVE__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_WAVE__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_WAVE__SH_ID_MASK 0x20
#define SQ_THREAD_TRACE_WORD_WAVE__SH_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_WAVE__CU_ID_MASK 0x3c0
#define SQ_THREAD_TRACE_WORD_WAVE__CU_ID__SHIFT 0x6
#define SQ_THREAD_TRACE_WORD_WAVE__WAVE_ID_MASK 0x3c00
#define SQ_THREAD_TRACE_WORD_WAVE__WAVE_ID__SHIFT 0xa
#define SQ_THREAD_TRACE_WORD_WAVE__SIMD_ID_MASK 0xc000
#define SQ_THREAD_TRACE_WORD_WAVE__SIMD_ID__SHIFT 0xe
#define SQ_THREAD_TRACE_WORD_MISC__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_MISC__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_MISC__TIME_DELTA_MASK 0xff0
#define SQ_THREAD_TRACE_WORD_MISC__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_MISC__SH_ID_MASK 0x1000
#define SQ_THREAD_TRACE_WORD_MISC__SH_ID__SHIFT 0xc
#define SQ_THREAD_TRACE_WORD_MISC__MISC_TOKEN_TYPE_MASK 0xe000
#define SQ_THREAD_TRACE_WORD_MISC__MISC_TOKEN_TYPE__SHIFT 0xd
#define SQ_THREAD_TRACE_WORD_WAVE_START__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_WAVE_START__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_WAVE_START__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_WAVE_START__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_WAVE_START__SH_ID_MASK 0x20
#define SQ_THREAD_TRACE_WORD_WAVE_START__SH_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_WAVE_START__CU_ID_MASK 0x3c0
#define SQ_THREAD_TRACE_WORD_WAVE_START__CU_ID__SHIFT 0x6
#define SQ_THREAD_TRACE_WORD_WAVE_START__WAVE_ID_MASK 0x3c00
#define SQ_THREAD_TRACE_WORD_WAVE_START__WAVE_ID__SHIFT 0xa
#define SQ_THREAD_TRACE_WORD_WAVE_START__SIMD_ID_MASK 0xc000
#define SQ_THREAD_TRACE_WORD_WAVE_START__SIMD_ID__SHIFT 0xe
#define SQ_THREAD_TRACE_WORD_WAVE_START__DISPATCHER_MASK 0x1f0000
#define SQ_THREAD_TRACE_WORD_WAVE_START__DISPATCHER__SHIFT 0x10
#define SQ_THREAD_TRACE_WORD_WAVE_START__VS_NO_ALLOC_OR_GROUPED_MASK 0x200000
#define SQ_THREAD_TRACE_WORD_WAVE_START__VS_NO_ALLOC_OR_GROUPED__SHIFT 0x15
#define SQ_THREAD_TRACE_WORD_WAVE_START__COUNT_MASK 0x1fc00000
#define SQ_THREAD_TRACE_WORD_WAVE_START__COUNT__SHIFT 0x16
#define SQ_THREAD_TRACE_WORD_WAVE_START__TG_ID_MASK 0xe0000000
#define SQ_THREAD_TRACE_WORD_WAVE_START__TG_ID__SHIFT 0x1d
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__PIPE_ID_MASK 0x60
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__PIPE_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__ME_ID_MASK 0x180
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__ME_ID__SHIFT 0x7
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_DROPPED_PREV_MASK 0x200
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_DROPPED_PREV__SHIFT 0x9
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_TYPE_MASK 0x1c00
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_TYPE__SHIFT 0xa
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_PRIV_MASK 0x4000
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_PRIV__SHIFT 0xe
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_OP_MASK 0x8000
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_OP__SHIFT 0xf
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_ADDR_MASK 0xffff0000
#define SQ_THREAD_TRACE_WORD_REG_1_OF_2__REG_ADDR__SHIFT 0x10
#define SQ_THREAD_TRACE_WORD_REG_2_OF_2__DATA_MASK 0xffffffff
#define SQ_THREAD_TRACE_WORD_REG_2_OF_2__DATA__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__PIPE_ID_MASK 0x60
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__PIPE_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__ME_ID_MASK 0x180
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__ME_ID__SHIFT 0x7
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__REG_ADDR_MASK 0xfe00
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__REG_ADDR__SHIFT 0x9
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__DATA_LO_MASK 0xffff0000
#define SQ_THREAD_TRACE_WORD_REG_CS_1_OF_2__DATA_LO__SHIFT 0x10
#define SQ_THREAD_TRACE_WORD_REG_CS_2_OF_2__DATA_HI_MASK 0xffff
#define SQ_THREAD_TRACE_WORD_REG_CS_2_OF_2__DATA_HI__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_EVENT__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_EVENT__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_EVENT__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_EVENT__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_EVENT__SH_ID_MASK 0x20
#define SQ_THREAD_TRACE_WORD_EVENT__SH_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_EVENT__STAGE_MASK 0x1c0
#define SQ_THREAD_TRACE_WORD_EVENT__STAGE__SHIFT 0x6
#define SQ_THREAD_TRACE_WORD_EVENT__EVENT_TYPE_MASK 0xfc00
#define SQ_THREAD_TRACE_WORD_EVENT__EVENT_TYPE__SHIFT 0xa
#define SQ_THREAD_TRACE_WORD_ISSUE__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_ISSUE__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_ISSUE__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_ISSUE__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_ISSUE__SIMD_ID_MASK 0x60
#define SQ_THREAD_TRACE_WORD_ISSUE__SIMD_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_ISSUE__INST0_MASK 0x300
#define SQ_THREAD_TRACE_WORD_ISSUE__INST0__SHIFT 0x8
#define SQ_THREAD_TRACE_WORD_ISSUE__INST1_MASK 0xc00
#define SQ_THREAD_TRACE_WORD_ISSUE__INST1__SHIFT 0xa
#define SQ_THREAD_TRACE_WORD_ISSUE__INST2_MASK 0x3000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST2__SHIFT 0xc
#define SQ_THREAD_TRACE_WORD_ISSUE__INST3_MASK 0xc000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST3__SHIFT 0xe
#define SQ_THREAD_TRACE_WORD_ISSUE__INST4_MASK 0x30000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST4__SHIFT 0x10
#define SQ_THREAD_TRACE_WORD_ISSUE__INST5_MASK 0xc0000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST5__SHIFT 0x12
#define SQ_THREAD_TRACE_WORD_ISSUE__INST6_MASK 0x300000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST6__SHIFT 0x14
#define SQ_THREAD_TRACE_WORD_ISSUE__INST7_MASK 0xc00000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST7__SHIFT 0x16
#define SQ_THREAD_TRACE_WORD_ISSUE__INST8_MASK 0x3000000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST8__SHIFT 0x18
#define SQ_THREAD_TRACE_WORD_ISSUE__INST9_MASK 0xc000000
#define SQ_THREAD_TRACE_WORD_ISSUE__INST9__SHIFT 0x1a
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__TOKEN_TYPE_MASK 0xf
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__TOKEN_TYPE__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__TIME_DELTA_MASK 0x10
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__TIME_DELTA__SHIFT 0x4
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__SH_ID_MASK 0x20
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__SH_ID__SHIFT 0x5
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CU_ID_MASK 0x3c0
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CU_ID__SHIFT 0x6
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CNTR_BANK_MASK 0xc00
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CNTR_BANK__SHIFT 0xa
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CNTR0_MASK 0x1fff000
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CNTR0__SHIFT 0xc
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CNTR1_LO_MASK 0xfe000000
#define SQ_THREAD_TRACE_WORD_PERF_1_OF_2__CNTR1_LO__SHIFT 0x19
#define SQ_THREAD_TRACE_WORD_PERF_2_OF_2__CNTR1_HI_MASK 0x3f
#define SQ_THREAD_TRACE_WORD_PERF_2_OF_2__CNTR1_HI__SHIFT 0x0
#define SQ_THREAD_TRACE_WORD_PERF_2_OF_2__CNTR2_MASK 0x7ffc0
#define SQ_THREAD_TRACE_WORD_PERF_2_OF_2__CNTR2__SHIFT 0x6
#define SQ_THREAD_TRACE_WORD_PERF_2_OF_2__CNTR3_MASK 0xfff80000
#define SQ_THREAD_TRACE_WORD_PERF_2_OF_2__CNTR3__SHIFT 0x13
#define SQ_INTERRUPT_WORD_CMN__SE_ID_MASK 0x3000000
#define SQ_INTERRUPT_WORD_CMN__SE_ID__SHIFT 0x18
#define SQ_INTERRUPT_WORD_CMN__ENCODING_MASK 0xc000000
#define SQ_INTERRUPT_WORD_CMN__ENCODING__SHIFT 0x1a
#define SQ_INTERRUPT_WORD_AUTO__THREAD_TRACE_MASK 0x1
#define SQ_INTERRUPT_WORD_AUTO__THREAD_TRACE__SHIFT 0x0
#define SQ_INTERRUPT_WORD_AUTO__WLT_MASK 0x2
#define SQ_INTERRUPT_WORD_AUTO__WLT__SHIFT 0x1
#define SQ_INTERRUPT_WORD_AUTO__THREAD_TRACE_BUF_FULL_MASK 0x4
#define SQ_INTERRUPT_WORD_AUTO__THREAD_TRACE_BUF_FULL__SHIFT 0x2
#define SQ_INTERRUPT_WORD_AUTO__REG_TIMESTAMP_MASK 0x8
#define SQ_INTERRUPT_WORD_AUTO__REG_TIMESTAMP__SHIFT 0x3
#define SQ_INTERRUPT_WORD_AUTO__CMD_TIMESTAMP_MASK 0x10
#define SQ_INTERRUPT_WORD_AUTO__CMD_TIMESTAMP__SHIFT 0x4
#define SQ_INTERRUPT_WORD_AUTO__HOST_CMD_OVERFLOW_MASK 0x20
#define SQ_INTERRUPT_WORD_AUTO__HOST_CMD_OVERFLOW__SHIFT 0x5
#define SQ_INTERRUPT_WORD_AUTO__HOST_REG_OVERFLOW_MASK 0x40
#define SQ_INTERRUPT_WORD_AUTO__HOST_REG_OVERFLOW__SHIFT 0x6
#define SQ_INTERRUPT_WORD_AUTO__IMMED_OVERFLOW_MASK 0x80
#define SQ_INTERRUPT_WORD_AUTO__IMMED_OVERFLOW__SHIFT 0x7
#define SQ_INTERRUPT_WORD_AUTO__SE_ID_MASK 0x3000000
#define SQ_INTERRUPT_WORD_AUTO__SE_ID__SHIFT 0x18
#define SQ_INTERRUPT_WORD_AUTO__ENCODING_MASK 0xc000000
#define SQ_INTERRUPT_WORD_AUTO__ENCODING__SHIFT 0x1a
#define SQ_INTERRUPT_WORD_WAVE__DATA_MASK 0xff
#define SQ_INTERRUPT_WORD_WAVE__DATA__SHIFT 0x0
#define SQ_INTERRUPT_WORD_WAVE__SH_ID_MASK 0x100
#define SQ_INTERRUPT_WORD_WAVE__SH_ID__SHIFT 0x8
#define SQ_INTERRUPT_WORD_WAVE__PRIV_MASK 0x200
#define SQ_INTERRUPT_WORD_WAVE__PRIV__SHIFT 0x9
#define SQ_INTERRUPT_WORD_WAVE__VM_ID_MASK 0x3c00
#define SQ_INTERRUPT_WORD_WAVE__VM_ID__SHIFT 0xa
#define SQ_INTERRUPT_WORD_WAVE__WAVE_ID_MASK 0x3c000
#define SQ_INTERRUPT_WORD_WAVE__WAVE_ID__SHIFT 0xe
#define SQ_INTERRUPT_WORD_WAVE__SIMD_ID_MASK 0xc0000
#define SQ_INTERRUPT_WORD_WAVE__SIMD_ID__SHIFT 0x12
#define SQ_INTERRUPT_WORD_WAVE__CU_ID_MASK 0xf00000
#define SQ_INTERRUPT_WORD_WAVE__CU_ID__SHIFT 0x14
#define SQ_INTERRUPT_WORD_WAVE__SE_ID_MASK 0x3000000
#define SQ_INTERRUPT_WORD_WAVE__SE_ID__SHIFT 0x18
#define SQ_INTERRUPT_WORD_WAVE__ENCODING_MASK 0xc000000
#define SQ_INTERRUPT_WORD_WAVE__ENCODING__SHIFT 0x1a
#define SQ_SOP2__SSRC0_MASK 0xff
#define SQ_SOP2__SSRC0__SHIFT 0x0
#define SQ_SOP2__SSRC1_MASK 0xff00
#define SQ_SOP2__SSRC1__SHIFT 0x8
#define SQ_SOP2__SDST_MASK 0x7f0000
#define SQ_SOP2__SDST__SHIFT 0x10
#define SQ_SOP2__OP_MASK 0x3f800000
#define SQ_SOP2__OP__SHIFT 0x17
#define SQ_SOP2__ENCODING_MASK 0xc0000000
#define SQ_SOP2__ENCODING__SHIFT 0x1e
#define SQ_VOP1__SRC0_MASK 0x1ff
#define SQ_VOP1__SRC0__SHIFT 0x0
#define SQ_VOP1__OP_MASK 0x1fe00
#define SQ_VOP1__OP__SHIFT 0x9
#define SQ_VOP1__VDST_MASK 0x1fe0000
#define SQ_VOP1__VDST__SHIFT 0x11
#define SQ_VOP1__ENCODING_MASK 0xfe000000
#define SQ_VOP1__ENCODING__SHIFT 0x19
#define SQ_MTBUF_1__VADDR_MASK 0xff
#define SQ_MTBUF_1__VADDR__SHIFT 0x0
#define SQ_MTBUF_1__VDATA_MASK 0xff00
#define SQ_MTBUF_1__VDATA__SHIFT 0x8
#define SQ_MTBUF_1__SRSRC_MASK 0x1f0000
#define SQ_MTBUF_1__SRSRC__SHIFT 0x10
#define SQ_MTBUF_1__SLC_MASK 0x400000
#define SQ_MTBUF_1__SLC__SHIFT 0x16
#define SQ_MTBUF_1__TFE_MASK 0x800000
#define SQ_MTBUF_1__TFE__SHIFT 0x17
#define SQ_MTBUF_1__SOFFSET_MASK 0xff000000
#define SQ_MTBUF_1__SOFFSET__SHIFT 0x18
#define SQ_EXP_1__VSRC0_MASK 0xff
#define SQ_EXP_1__VSRC0__SHIFT 0x0
#define SQ_EXP_1__VSRC1_MASK 0xff00
#define SQ_EXP_1__VSRC1__SHIFT 0x8
#define SQ_EXP_1__VSRC2_MASK 0xff0000
#define SQ_EXP_1__VSRC2__SHIFT 0x10
#define SQ_EXP_1__VSRC3_MASK 0xff000000
#define SQ_EXP_1__VSRC3__SHIFT 0x18
#define SQ_MUBUF_1__VADDR_MASK 0xff
#define SQ_MUBUF_1__VADDR__SHIFT 0x0
#define SQ_MUBUF_1__VDATA_MASK 0xff00
#define SQ_MUBUF_1__VDATA__SHIFT 0x8
#define SQ_MUBUF_1__SRSRC_MASK 0x1f0000
#define SQ_MUBUF_1__SRSRC__SHIFT 0x10
#define SQ_MUBUF_1__SLC_MASK 0x400000
#define SQ_MUBUF_1__SLC__SHIFT 0x16
#define SQ_MUBUF_1__TFE_MASK 0x800000
#define SQ_MUBUF_1__TFE__SHIFT 0x17
#define SQ_MUBUF_1__SOFFSET_MASK 0xff000000
#define SQ_MUBUF_1__SOFFSET__SHIFT 0x18
#define SQ_INST__ENCODING_MASK 0xffffffff
#define SQ_INST__ENCODING__SHIFT 0x0
#define SQ_EXP_0__EN_MASK 0xf
#define SQ_EXP_0__EN__SHIFT 0x0
#define SQ_EXP_0__TGT_MASK 0x3f0
#define SQ_EXP_0__TGT__SHIFT 0x4
#define SQ_EXP_0__COMPR_MASK 0x400
#define SQ_EXP_0__COMPR__SHIFT 0xa
#define SQ_EXP_0__DONE_MASK 0x800
#define SQ_EXP_0__DONE__SHIFT 0xb
#define SQ_EXP_0__VM_MASK 0x1000
#define SQ_EXP_0__VM__SHIFT 0xc
#define SQ_EXP_0__ENCODING_MASK 0xfc000000
#define SQ_EXP_0__ENCODING__SHIFT 0x1a
#define SQ_MUBUF_0__OFFSET_MASK 0xfff
#define SQ_MUBUF_0__OFFSET__SHIFT 0x0
#define SQ_MUBUF_0__OFFEN_MASK 0x1000
#define SQ_MUBUF_0__OFFEN__SHIFT 0xc
#define SQ_MUBUF_0__IDXEN_MASK 0x2000
#define SQ_MUBUF_0__IDXEN__SHIFT 0xd
#define SQ_MUBUF_0__GLC_MASK 0x4000
#define SQ_MUBUF_0__GLC__SHIFT 0xe
#define SQ_MUBUF_0__ADDR64_MASK 0x8000
#define SQ_MUBUF_0__ADDR64__SHIFT 0xf
#define SQ_MUBUF_0__LDS_MASK 0x10000
#define SQ_MUBUF_0__LDS__SHIFT 0x10
#define SQ_MUBUF_0__OP_MASK 0x1fc0000
#define SQ_MUBUF_0__OP__SHIFT 0x12
#define SQ_MUBUF_0__ENCODING_MASK 0xfc000000
#define SQ_MUBUF_0__ENCODING__SHIFT 0x1a
#define SQ_VOP3_0__VDST_MASK 0xff
#define SQ_VOP3_0__VDST__SHIFT 0x0
#define SQ_VOP3_0__ABS_MASK 0x700
#define SQ_VOP3_0__ABS__SHIFT 0x8
#define SQ_VOP3_0__CLAMP_MASK 0x800
#define SQ_VOP3_0__CLAMP__SHIFT 0xb
#define SQ_VOP3_0__OP_MASK 0x3fe0000
#define SQ_VOP3_0__OP__SHIFT 0x11
#define SQ_VOP3_0__ENCODING_MASK 0xfc000000
#define SQ_VOP3_0__ENCODING__SHIFT 0x1a
#define SQ_VOP2__SRC0_MASK 0x1ff
#define SQ_VOP2__SRC0__SHIFT 0x0
#define SQ_VOP2__VSRC1_MASK 0x1fe00
#define SQ_VOP2__VSRC1__SHIFT 0x9
#define SQ_VOP2__VDST_MASK 0x1fe0000
#define SQ_VOP2__VDST__SHIFT 0x11
#define SQ_VOP2__OP_MASK 0x7e000000
#define SQ_VOP2__OP__SHIFT 0x19
#define SQ_VOP2__ENCODING_MASK 0x80000000
#define SQ_VOP2__ENCODING__SHIFT 0x1f
#define SQ_MTBUF_0__OFFSET_MASK 0xfff
#define SQ_MTBUF_0__OFFSET__SHIFT 0x0
#define SQ_MTBUF_0__OFFEN_MASK 0x1000
#define SQ_MTBUF_0__OFFEN__SHIFT 0xc
#define SQ_MTBUF_0__IDXEN_MASK 0x2000
#define SQ_MTBUF_0__IDXEN__SHIFT 0xd
#define SQ_MTBUF_0__GLC_MASK 0x4000
#define SQ_MTBUF_0__GLC__SHIFT 0xe
#define SQ_MTBUF_0__ADDR64_MASK 0x8000
#define SQ_MTBUF_0__ADDR64__SHIFT 0xf
#define SQ_MTBUF_0__OP_MASK 0x70000
#define SQ_MTBUF_0__OP__SHIFT 0x10
#define SQ_MTBUF_0__DFMT_MASK 0x780000
#define SQ_MTBUF_0__DFMT__SHIFT 0x13
#define SQ_MTBUF_0__NFMT_MASK 0x3800000
#define SQ_MTBUF_0__NFMT__SHIFT 0x17
#define SQ_MTBUF_0__ENCODING_MASK 0xfc000000
#define SQ_MTBUF_0__ENCODING__SHIFT 0x1a
#define SQ_SOPP__SIMM16_MASK 0xffff
#define SQ_SOPP__SIMM16__SHIFT 0x0
#define SQ_SOPP__OP_MASK 0x7f0000
#define SQ_SOPP__OP__SHIFT 0x10
#define SQ_SOPP__ENCODING_MASK 0xff800000
#define SQ_SOPP__ENCODING__SHIFT 0x17
#define SQ_FLAT_0__GLC_MASK 0x10000
#define SQ_FLAT_0__GLC__SHIFT 0x10
#define SQ_FLAT_0__SLC_MASK 0x20000
#define SQ_FLAT_0__SLC__SHIFT 0x11
#define SQ_FLAT_0__OP_MASK 0x1fc0000
#define SQ_FLAT_0__OP__SHIFT 0x12
#define SQ_FLAT_0__ENCODING_MASK 0xfc000000
#define SQ_FLAT_0__ENCODING__SHIFT 0x1a
#define SQ_VOP3_0_SDST_ENC__VDST_MASK 0xff
#define SQ_VOP3_0_SDST_ENC__VDST__SHIFT 0x0
#define SQ_VOP3_0_SDST_ENC__SDST_MASK 0x7f00
#define SQ_VOP3_0_SDST_ENC__SDST__SHIFT 0x8
#define SQ_VOP3_0_SDST_ENC__OP_MASK 0x3fe0000
#define SQ_VOP3_0_SDST_ENC__OP__SHIFT 0x11
#define SQ_VOP3_0_SDST_ENC__ENCODING_MASK 0xfc000000
#define SQ_VOP3_0_SDST_ENC__ENCODING__SHIFT 0x1a
#define SQ_MIMG_1__VADDR_MASK 0xff
#define SQ_MIMG_1__VADDR__SHIFT 0x0
#define SQ_MIMG_1__VDATA_MASK 0xff00
#define SQ_MIMG_1__VDATA__SHIFT 0x8
#define SQ_MIMG_1__SRSRC_MASK 0x1f0000
#define SQ_MIMG_1__SRSRC__SHIFT 0x10
#define SQ_MIMG_1__SSAMP_MASK 0x3e00000
#define SQ_MIMG_1__SSAMP__SHIFT 0x15
#define SQ_SMRD__OFFSET_MASK 0xff
#define SQ_SMRD__OFFSET__SHIFT 0x0
#define SQ_SMRD__IMM_MASK 0x100
#define SQ_SMRD__IMM__SHIFT 0x8
#define SQ_SMRD__SBASE_MASK 0x7e00
#define SQ_SMRD__SBASE__SHIFT 0x9
#define SQ_SMRD__SDST_MASK 0x3f8000
#define SQ_SMRD__SDST__SHIFT 0xf
#define SQ_SMRD__OP_MASK 0x7c00000
#define SQ_SMRD__OP__SHIFT 0x16
#define SQ_SMRD__ENCODING_MASK 0xf8000000
#define SQ_SMRD__ENCODING__SHIFT 0x1b
#define SQ_SOP1__SSRC0_MASK 0xff
#define SQ_SOP1__SSRC0__SHIFT 0x0
#define SQ_SOP1__OP_MASK 0xff00
#define SQ_SOP1__OP__SHIFT 0x8
#define SQ_SOP1__SDST_MASK 0x7f0000
#define SQ_SOP1__SDST__SHIFT 0x10
#define SQ_SOP1__ENCODING_MASK 0xff800000
#define SQ_SOP1__ENCODING__SHIFT 0x17
#define SQ_SOPC__SSRC0_MASK 0xff
#define SQ_SOPC__SSRC0__SHIFT 0x0
#define SQ_SOPC__SSRC1_MASK 0xff00
#define SQ_SOPC__SSRC1__SHIFT 0x8
#define SQ_SOPC__OP_MASK 0x7f0000
#define SQ_SOPC__OP__SHIFT 0x10
#define SQ_SOPC__ENCODING_MASK 0xff800000
#define SQ_SOPC__ENCODING__SHIFT 0x17
#define SQ_FLAT_1__ADDR_MASK 0xff
#define SQ_FLAT_1__ADDR__SHIFT 0x0
#define SQ_FLAT_1__DATA_MASK 0xff00
#define SQ_FLAT_1__DATA__SHIFT 0x8
#define SQ_FLAT_1__TFE_MASK 0x800000
#define SQ_FLAT_1__TFE__SHIFT 0x17
#define SQ_FLAT_1__VDST_MASK 0xff000000
#define SQ_FLAT_1__VDST__SHIFT 0x18
#define SQ_DS_1__ADDR_MASK 0xff
#define SQ_DS_1__ADDR__SHIFT 0x0
#define SQ_DS_1__DATA0_MASK 0xff00
#define SQ_DS_1__DATA0__SHIFT 0x8
#define SQ_DS_1__DATA1_MASK 0xff0000
#define SQ_DS_1__DATA1__SHIFT 0x10
#define SQ_DS_1__VDST_MASK 0xff000000
#define SQ_DS_1__VDST__SHIFT 0x18
#define SQ_VOP3_1__SRC0_MASK 0x1ff
#define SQ_VOP3_1__SRC0__SHIFT 0x0
#define SQ_VOP3_1__SRC1_MASK 0x3fe00
#define SQ_VOP3_1__SRC1__SHIFT 0x9
#define SQ_VOP3_1__SRC2_MASK 0x7fc0000
#define SQ_VOP3_1__SRC2__SHIFT 0x12
#define SQ_VOP3_1__OMOD_MASK 0x18000000
#define SQ_VOP3_1__OMOD__SHIFT 0x1b
#define SQ_VOP3_1__NEG_MASK 0xe0000000
#define SQ_VOP3_1__NEG__SHIFT 0x1d
#define SQ_MIMG_0__DMASK_MASK 0xf00
#define SQ_MIMG_0__DMASK__SHIFT 0x8
#define SQ_MIMG_0__UNORM_MASK 0x1000
#define SQ_MIMG_0__UNORM__SHIFT 0xc
#define SQ_MIMG_0__GLC_MASK 0x2000
#define SQ_MIMG_0__GLC__SHIFT 0xd
#define SQ_MIMG_0__DA_MASK 0x4000
#define SQ_MIMG_0__DA__SHIFT 0xe
#define SQ_MIMG_0__R128_MASK 0x8000
#define SQ_MIMG_0__R128__SHIFT 0xf
#define SQ_MIMG_0__TFE_MASK 0x10000
#define SQ_MIMG_0__TFE__SHIFT 0x10
#define SQ_MIMG_0__LWE_MASK 0x20000
#define SQ_MIMG_0__LWE__SHIFT 0x11
#define SQ_MIMG_0__OP_MASK 0x1fc0000
#define SQ_MIMG_0__OP__SHIFT 0x12
#define SQ_MIMG_0__SLC_MASK 0x2000000
#define SQ_MIMG_0__SLC__SHIFT 0x19
#define SQ_MIMG_0__ENCODING_MASK 0xfc000000
#define SQ_MIMG_0__ENCODING__SHIFT 0x1a
#define SQ_SOPK__SIMM16_MASK 0xffff
#define SQ_SOPK__SIMM16__SHIFT 0x0
#define SQ_SOPK__SDST_MASK 0x7f0000
#define SQ_SOPK__SDST__SHIFT 0x10
#define SQ_SOPK__OP_MASK 0xf800000
#define SQ_SOPK__OP__SHIFT 0x17
#define SQ_SOPK__ENCODING_MASK 0xf0000000
#define SQ_SOPK__ENCODING__SHIFT 0x1c
#define SQ_DS_0__OFFSET0_MASK 0xff
#define SQ_DS_0__OFFSET0__SHIFT 0x0
#define SQ_DS_0__OFFSET1_MASK 0xff00
#define SQ_DS_0__OFFSET1__SHIFT 0x8
#define SQ_DS_0__GDS_MASK 0x20000
#define SQ_DS_0__GDS__SHIFT 0x11
#define SQ_DS_0__OP_MASK 0x3fc0000
#define SQ_DS_0__OP__SHIFT 0x12
#define SQ_DS_0__ENCODING_MASK 0xfc000000
#define SQ_DS_0__ENCODING__SHIFT 0x1a
#define SQ_VOPC__SRC0_MASK 0x1ff
#define SQ_VOPC__SRC0__SHIFT 0x0
#define SQ_VOPC__VSRC1_MASK 0x1fe00
#define SQ_VOPC__VSRC1__SHIFT 0x9
#define SQ_VOPC__OP_MASK 0x1fe0000
#define SQ_VOPC__OP__SHIFT 0x11
#define SQ_VOPC__ENCODING_MASK 0xfe000000
#define SQ_VOPC__ENCODING__SHIFT 0x19
#define SQ_VINTRP__VSRC_MASK 0xff
#define SQ_VINTRP__VSRC__SHIFT 0x0
#define SQ_VINTRP__ATTRCHAN_MASK 0x300
#define SQ_VINTRP__ATTRCHAN__SHIFT 0x8
#define SQ_VINTRP__ATTR_MASK 0xfc00
#define SQ_VINTRP__ATTR__SHIFT 0xa
#define SQ_VINTRP__OP_MASK 0x30000
#define SQ_VINTRP__OP__SHIFT 0x10
#define SQ_VINTRP__VDST_MASK 0x3fc0000
#define SQ_VINTRP__VDST__SHIFT 0x12
#define SQ_VINTRP__ENCODING_MASK 0xfc000000
#define SQ_VINTRP__ENCODING__SHIFT 0x1a
#define CGTT_SX_CLK_CTRL0__ON_DELAY_MASK 0xf
#define CGTT_SX_CLK_CTRL0__ON_DELAY__SHIFT 0x0
#define CGTT_SX_CLK_CTRL0__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SX_CLK_CTRL0__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SX_CLK_CTRL0__RESERVED_MASK 0xfff000
#define CGTT_SX_CLK_CTRL0__RESERVED__SHIFT 0xc
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_SX_CLK_CTRL0__SOFT_OVERRIDE0__SHIFT 0x1f
#define CGTT_SX_CLK_CTRL1__ON_DELAY_MASK 0xf
#define CGTT_SX_CLK_CTRL1__ON_DELAY__SHIFT 0x0
#define CGTT_SX_CLK_CTRL1__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SX_CLK_CTRL1__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SX_CLK_CTRL1__RESERVED_MASK 0xfff000
#define CGTT_SX_CLK_CTRL1__RESERVED__SHIFT 0xc
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_SX_CLK_CTRL1__SOFT_OVERRIDE0__SHIFT 0x1f
#define CGTT_SX_CLK_CTRL2__ON_DELAY_MASK 0xf
#define CGTT_SX_CLK_CTRL2__ON_DELAY__SHIFT 0x0
#define CGTT_SX_CLK_CTRL2__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SX_CLK_CTRL2__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SX_CLK_CTRL2__RESERVED_MASK 0xfff000
#define CGTT_SX_CLK_CTRL2__RESERVED__SHIFT 0xc
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_SX_CLK_CTRL2__SOFT_OVERRIDE0__SHIFT 0x1f
#define CGTT_SX_CLK_CTRL3__ON_DELAY_MASK 0xf
#define CGTT_SX_CLK_CTRL3__ON_DELAY__SHIFT 0x0
#define CGTT_SX_CLK_CTRL3__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SX_CLK_CTRL3__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SX_CLK_CTRL3__RESERVED_MASK 0xfff000
#define CGTT_SX_CLK_CTRL3__RESERVED__SHIFT 0xc
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_SX_CLK_CTRL3__SOFT_OVERRIDE0__SHIFT 0x1f
#define CGTT_SX_CLK_CTRL4__ON_DELAY_MASK 0xf
#define CGTT_SX_CLK_CTRL4__ON_DELAY__SHIFT 0x0
#define CGTT_SX_CLK_CTRL4__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_SX_CLK_CTRL4__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_SX_CLK_CTRL4__RESERVED_MASK 0xfff000
#define CGTT_SX_CLK_CTRL4__RESERVED__SHIFT 0xc
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_SX_CLK_CTRL4__SOFT_OVERRIDE0__SHIFT 0x1f
#define SX_DEBUG_BUSY__POS_FREE_OR_VALIDS_MASK 0x1
#define SX_DEBUG_BUSY__POS_FREE_OR_VALIDS__SHIFT 0x0
#define SX_DEBUG_BUSY__POS_REQUESTER_BUSY_MASK 0x2
#define SX_DEBUG_BUSY__POS_REQUESTER_BUSY__SHIFT 0x1
#define SX_DEBUG_BUSY__PA_SX_BUSY_MASK 0x4
#define SX_DEBUG_BUSY__PA_SX_BUSY__SHIFT 0x2
#define SX_DEBUG_BUSY__POS_SCBD_BUSY_MASK 0x8
#define SX_DEBUG_BUSY__POS_SCBD_BUSY__SHIFT 0x3
#define SX_DEBUG_BUSY__POS_BANK3VAL3_BUSY_MASK 0x10
#define SX_DEBUG_BUSY__POS_BANK3VAL3_BUSY__SHIFT 0x4
#define SX_DEBUG_BUSY__POS_BANK3VAL2_BUSY_MASK 0x20
#define SX_DEBUG_BUSY__POS_BANK3VAL2_BUSY__SHIFT 0x5
#define SX_DEBUG_BUSY__POS_BANK3VAL1_BUSY_MASK 0x40
#define SX_DEBUG_BUSY__POS_BANK3VAL1_BUSY__SHIFT 0x6
#define SX_DEBUG_BUSY__POS_BANK3VAL0_BUSY_MASK 0x80
#define SX_DEBUG_BUSY__POS_BANK3VAL0_BUSY__SHIFT 0x7
#define SX_DEBUG_BUSY__POS_BANK2VAL3_BUSY_MASK 0x100
#define SX_DEBUG_BUSY__POS_BANK2VAL3_BUSY__SHIFT 0x8
#define SX_DEBUG_BUSY__POS_BANK2VAL2_BUSY_MASK 0x200
#define SX_DEBUG_BUSY__POS_BANK2VAL2_BUSY__SHIFT 0x9
#define SX_DEBUG_BUSY__POS_BANK2VAL1_BUSY_MASK 0x400
#define SX_DEBUG_BUSY__POS_BANK2VAL1_BUSY__SHIFT 0xa
#define SX_DEBUG_BUSY__POS_BANK2VAL0_BUSY_MASK 0x800
#define SX_DEBUG_BUSY__POS_BANK2VAL0_BUSY__SHIFT 0xb
#define SX_DEBUG_BUSY__POS_BANK1VAL3_BUSY_MASK 0x1000
#define SX_DEBUG_BUSY__POS_BANK1VAL3_BUSY__SHIFT 0xc
#define SX_DEBUG_BUSY__POS_BANK1VAL2_BUSY_MASK 0x2000
#define SX_DEBUG_BUSY__POS_BANK1VAL2_BUSY__SHIFT 0xd
#define SX_DEBUG_BUSY__POS_BANK1VAL1_BUSY_MASK 0x4000
#define SX_DEBUG_BUSY__POS_BANK1VAL1_BUSY__SHIFT 0xe
#define SX_DEBUG_BUSY__POS_BANK1VAL0_BUSY_MASK 0x8000
#define SX_DEBUG_BUSY__POS_BANK1VAL0_BUSY__SHIFT 0xf
#define SX_DEBUG_BUSY__POS_BANK0VAL3_BUSY_MASK 0x10000
#define SX_DEBUG_BUSY__POS_BANK0VAL3_BUSY__SHIFT 0x10
#define SX_DEBUG_BUSY__POS_BANK0VAL2_BUSY_MASK 0x20000
#define SX_DEBUG_BUSY__POS_BANK0VAL2_BUSY__SHIFT 0x11
#define SX_DEBUG_BUSY__POS_BANK0VAL1_BUSY_MASK 0x40000
#define SX_DEBUG_BUSY__POS_BANK0VAL1_BUSY__SHIFT 0x12
#define SX_DEBUG_BUSY__POS_BANK0VAL0_BUSY_MASK 0x80000
#define SX_DEBUG_BUSY__POS_BANK0VAL0_BUSY__SHIFT 0x13
#define SX_DEBUG_BUSY__POS_INMUX_VALID_MASK 0x100000
#define SX_DEBUG_BUSY__POS_INMUX_VALID__SHIFT 0x14
#define SX_DEBUG_BUSY__WRCTRL1_VALIDQ3_MASK 0x200000
#define SX_DEBUG_BUSY__WRCTRL1_VALIDQ3__SHIFT 0x15
#define SX_DEBUG_BUSY__WRCTRL1_VALIDQ2_MASK 0x400000
#define SX_DEBUG_BUSY__WRCTRL1_VALIDQ2__SHIFT 0x16
#define SX_DEBUG_BUSY__WRCTRL1_VALIDQ1_MASK 0x800000
#define SX_DEBUG_BUSY__WRCTRL1_VALIDQ1__SHIFT 0x17
#define SX_DEBUG_BUSY__WRCTRL0_VALIDQ3_MASK 0x1000000
#define SX_DEBUG_BUSY__WRCTRL0_VALIDQ3__SHIFT 0x18
#define SX_DEBUG_BUSY__WRCTRL0_VALIDQ2_MASK 0x2000000
#define SX_DEBUG_BUSY__WRCTRL0_VALIDQ2__SHIFT 0x19
#define SX_DEBUG_BUSY__WRCTRL0_VALIDQ1_MASK 0x4000000
#define SX_DEBUG_BUSY__WRCTRL0_VALIDQ1__SHIFT 0x1a
#define SX_DEBUG_BUSY__PCCMD_VALID_MASK 0x8000000
#define SX_DEBUG_BUSY__PCCMD_VALID__SHIFT 0x1b
#define SX_DEBUG_BUSY__VDATA1_VALID_MASK 0x10000000
#define SX_DEBUG_BUSY__VDATA1_VALID__SHIFT 0x1c
#define SX_DEBUG_BUSY__VDATA0_VALID_MASK 0x20000000
#define SX_DEBUG_BUSY__VDATA0_VALID__SHIFT 0x1d
#define SX_DEBUG_BUSY__CMD_BUSYORVAL_MASK 0x40000000
#define SX_DEBUG_BUSY__CMD_BUSYORVAL__SHIFT 0x1e
#define SX_DEBUG_BUSY__ADDR_BUSYORVAL_MASK 0x80000000
#define SX_DEBUG_BUSY__ADDR_BUSYORVAL__SHIFT 0x1f
#define SX_DEBUG_BUSY_2__COL_SCBD_BUSY_MASK 0x1
#define SX_DEBUG_BUSY_2__COL_SCBD_BUSY__SHIFT 0x0
#define SX_DEBUG_BUSY_2__COL_REQ3_FREECNT_NE0_MASK 0x2
#define SX_DEBUG_BUSY_2__COL_REQ3_FREECNT_NE0__SHIFT 0x1
#define SX_DEBUG_BUSY_2__COL_REQ3_IDLE_MASK 0x4
#define SX_DEBUG_BUSY_2__COL_REQ3_IDLE__SHIFT 0x2
#define SX_DEBUG_BUSY_2__COL_REQ3_BUSY_MASK 0x8
#define SX_DEBUG_BUSY_2__COL_REQ3_BUSY__SHIFT 0x3
#define SX_DEBUG_BUSY_2__COL_REQ2_FREECNT_NE0_MASK 0x10
#define SX_DEBUG_BUSY_2__COL_REQ2_FREECNT_NE0__SHIFT 0x4
#define SX_DEBUG_BUSY_2__COL_REQ2_IDLE_MASK 0x20
#define SX_DEBUG_BUSY_2__COL_REQ2_IDLE__SHIFT 0x5
#define SX_DEBUG_BUSY_2__COL_REQ2_BUSY_MASK 0x40
#define SX_DEBUG_BUSY_2__COL_REQ2_BUSY__SHIFT 0x6
#define SX_DEBUG_BUSY_2__COL_REQ1_FREECNT_NE0_MASK 0x80
#define SX_DEBUG_BUSY_2__COL_REQ1_FREECNT_NE0__SHIFT 0x7
#define SX_DEBUG_BUSY_2__COL_REQ1_IDLE_MASK 0x100
#define SX_DEBUG_BUSY_2__COL_REQ1_IDLE__SHIFT 0x8
#define SX_DEBUG_BUSY_2__COL_REQ1_BUSY_MASK 0x200
#define SX_DEBUG_BUSY_2__COL_REQ1_BUSY__SHIFT 0x9
#define SX_DEBUG_BUSY_2__COL_REQ0_FREECNT_NE0_MASK 0x400
#define SX_DEBUG_BUSY_2__COL_REQ0_FREECNT_NE0__SHIFT 0xa
#define SX_DEBUG_BUSY_2__COL_REQ0_IDLE_MASK 0x800
#define SX_DEBUG_BUSY_2__COL_REQ0_IDLE__SHIFT 0xb
#define SX_DEBUG_BUSY_2__COL_REQ0_BUSY_MASK 0x1000
#define SX_DEBUG_BUSY_2__COL_REQ0_BUSY__SHIFT 0xc
#define SX_DEBUG_BUSY_2__COL_DBIF3_SENDFREE_BUSY_MASK 0x2000
#define SX_DEBUG_BUSY_2__COL_DBIF3_SENDFREE_BUSY__SHIFT 0xd
#define SX_DEBUG_BUSY_2__COL_DBIF3_FIFO_BUSY_MASK 0x4000
#define SX_DEBUG_BUSY_2__COL_DBIF3_FIFO_BUSY__SHIFT 0xe
#define SX_DEBUG_BUSY_2__COL_DBIF3_READ_VALID_MASK 0x8000
#define SX_DEBUG_BUSY_2__COL_DBIF3_READ_VALID__SHIFT 0xf
#define SX_DEBUG_BUSY_2__COL_DBIF2_SENDFREE_BUSY_MASK 0x10000
#define SX_DEBUG_BUSY_2__COL_DBIF2_SENDFREE_BUSY__SHIFT 0x10
#define SX_DEBUG_BUSY_2__COL_DBIF2_FIFO_BUSY_MASK 0x20000
#define SX_DEBUG_BUSY_2__COL_DBIF2_FIFO_BUSY__SHIFT 0x11
#define SX_DEBUG_BUSY_2__COL_DBIF2_READ_VALID_MASK 0x40000
#define SX_DEBUG_BUSY_2__COL_DBIF2_READ_VALID__SHIFT 0x12
#define SX_DEBUG_BUSY_2__COL_DBIF1_SENDFREE_BUSY_MASK 0x80000
#define SX_DEBUG_BUSY_2__COL_DBIF1_SENDFREE_BUSY__SHIFT 0x13
#define SX_DEBUG_BUSY_2__COL_DBIF1_FIFO_BUSY_MASK 0x100000
#define SX_DEBUG_BUSY_2__COL_DBIF1_FIFO_BUSY__SHIFT 0x14
#define SX_DEBUG_BUSY_2__COL_DBIF1_READ_VALID_MASK 0x200000
#define SX_DEBUG_BUSY_2__COL_DBIF1_READ_VALID__SHIFT 0x15
#define SX_DEBUG_BUSY_2__COL_DBIF0_SENDFREE_BUSY_MASK 0x400000
#define SX_DEBUG_BUSY_2__COL_DBIF0_SENDFREE_BUSY__SHIFT 0x16
#define SX_DEBUG_BUSY_2__COL_DBIF0_FIFO_BUSY_MASK 0x800000
#define SX_DEBUG_BUSY_2__COL_DBIF0_FIFO_BUSY__SHIFT 0x17
#define SX_DEBUG_BUSY_2__COL_DBIF0_READ_VALID_MASK 0x1000000
#define SX_DEBUG_BUSY_2__COL_DBIF0_READ_VALID__SHIFT 0x18
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL3_BUSY_MASK 0x2000000
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL3_BUSY__SHIFT 0x19
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL2_BUSY_MASK 0x4000000
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL2_BUSY__SHIFT 0x1a
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL1_BUSY_MASK 0x8000000
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL1_BUSY__SHIFT 0x1b
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL0_BUSY_MASK 0x10000000
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK3_VAL0_BUSY__SHIFT 0x1c
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK2_VAL3_BUSY_MASK 0x20000000
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK2_VAL3_BUSY__SHIFT 0x1d
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK2_VAL2_BUSY_MASK 0x40000000
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK2_VAL2_BUSY__SHIFT 0x1e
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK2_VAL1_BUSY_MASK 0x80000000
#define SX_DEBUG_BUSY_2__COL_BUFF3_BANK2_VAL1_BUSY__SHIFT 0x1f
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK2_VAL0_BUSY_MASK 0x1
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK2_VAL0_BUSY__SHIFT 0x0
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL3_BUSY_MASK 0x2
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL3_BUSY__SHIFT 0x1
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL2_BUSY_MASK 0x4
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL2_BUSY__SHIFT 0x2
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL1_BUSY_MASK 0x8
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL1_BUSY__SHIFT 0x3
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL0_BUSY_MASK 0x10
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK1_VAL0_BUSY__SHIFT 0x4
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL3_BUSY_MASK 0x20
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL3_BUSY__SHIFT 0x5
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL2_BUSY_MASK 0x40
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL2_BUSY__SHIFT 0x6
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL1_BUSY_MASK 0x80
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL1_BUSY__SHIFT 0x7
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL0_BUSY_MASK 0x100
#define SX_DEBUG_BUSY_3__COL_BUFF3_BANK0_VAL0_BUSY__SHIFT 0x8
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL3_BUSY_MASK 0x200
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL3_BUSY__SHIFT 0x9
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL2_BUSY_MASK 0x400
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL2_BUSY__SHIFT 0xa
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL1_BUSY_MASK 0x800
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL1_BUSY__SHIFT 0xb
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL0_BUSY_MASK 0x1000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK3_VAL0_BUSY__SHIFT 0xc
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL3_BUSY_MASK 0x2000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL3_BUSY__SHIFT 0xd
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL2_BUSY_MASK 0x4000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL2_BUSY__SHIFT 0xe
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL1_BUSY_MASK 0x8000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL1_BUSY__SHIFT 0xf
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL0_BUSY_MASK 0x10000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK2_VAL0_BUSY__SHIFT 0x10
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL3_BUSY_MASK 0x20000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL3_BUSY__SHIFT 0x11
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL2_BUSY_MASK 0x40000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL2_BUSY__SHIFT 0x12
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL1_BUSY_MASK 0x80000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL1_BUSY__SHIFT 0x13
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL0_BUSY_MASK 0x100000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK1_VAL0_BUSY__SHIFT 0x14
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL3_BUSY_MASK 0x200000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL3_BUSY__SHIFT 0x15
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL2_BUSY_MASK 0x400000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL2_BUSY__SHIFT 0x16
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL1_BUSY_MASK 0x800000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL1_BUSY__SHIFT 0x17
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL0_BUSY_MASK 0x1000000
#define SX_DEBUG_BUSY_3__COL_BUFF2_BANK0_VAL0_BUSY__SHIFT 0x18
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL3_BUSY_MASK 0x2000000
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL3_BUSY__SHIFT 0x19
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL2_BUSY_MASK 0x4000000
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL2_BUSY__SHIFT 0x1a
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL1_BUSY_MASK 0x8000000
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL1_BUSY__SHIFT 0x1b
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL0_BUSY_MASK 0x10000000
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK3_VAL0_BUSY__SHIFT 0x1c
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK2_VAL3_BUSY_MASK 0x20000000
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK2_VAL3_BUSY__SHIFT 0x1d
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK2_VAL2_BUSY_MASK 0x40000000
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK2_VAL2_BUSY__SHIFT 0x1e
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK2_VAL1_BUSY_MASK 0x80000000
#define SX_DEBUG_BUSY_3__COL_BUFF1_BANK2_VAL1_BUSY__SHIFT 0x1f
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK2_VAL0_BUSY_MASK 0x1
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK2_VAL0_BUSY__SHIFT 0x0
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL3_BUSY_MASK 0x2
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL3_BUSY__SHIFT 0x1
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL2_BUSY_MASK 0x4
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL2_BUSY__SHIFT 0x2
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL1_BUSY_MASK 0x8
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL1_BUSY__SHIFT 0x3
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL0_BUSY_MASK 0x10
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK1_VAL0_BUSY__SHIFT 0x4
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL3_BUSY_MASK 0x20
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL3_BUSY__SHIFT 0x5
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL2_BUSY_MASK 0x40
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL2_BUSY__SHIFT 0x6
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL1_BUSY_MASK 0x80
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL1_BUSY__SHIFT 0x7
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL0_BUSY_MASK 0x100
#define SX_DEBUG_BUSY_4__COL_BUFF1_BANK0_VAL0_BUSY__SHIFT 0x8
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL3_BUSY_MASK 0x200
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL3_BUSY__SHIFT 0x9
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL2_BUSY_MASK 0x400
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL2_BUSY__SHIFT 0xa
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL1_BUSY_MASK 0x800
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL1_BUSY__SHIFT 0xb
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL0_BUSY_MASK 0x1000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK3_VAL0_BUSY__SHIFT 0xc
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL3_BUSY_MASK 0x2000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL3_BUSY__SHIFT 0xd
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL2_BUSY_MASK 0x4000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL2_BUSY__SHIFT 0xe
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL1_BUSY_MASK 0x8000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL1_BUSY__SHIFT 0xf
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL0_BUSY_MASK 0x10000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK2_VAL0_BUSY__SHIFT 0x10
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL3_BUSY_MASK 0x20000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL3_BUSY__SHIFT 0x11
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL2_BUSY_MASK 0x40000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL2_BUSY__SHIFT 0x12
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL1_BUSY_MASK 0x80000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL1_BUSY__SHIFT 0x13
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL0_BUSY_MASK 0x100000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK1_VAL0_BUSY__SHIFT 0x14
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL3_BUSY_MASK 0x200000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL3_BUSY__SHIFT 0x15
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL2_BUSY_MASK 0x400000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL2_BUSY__SHIFT 0x16
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL1_BUSY_MASK 0x800000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL1_BUSY__SHIFT 0x17
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL0_BUSY_MASK 0x1000000
#define SX_DEBUG_BUSY_4__COL_BUFF0_BANK0_VAL0_BUSY__SHIFT 0x18
#define SX_DEBUG_BUSY_4__RESERVED_MASK 0xfe000000
#define SX_DEBUG_BUSY_4__RESERVED__SHIFT 0x19
#define SX_DEBUG_1__SX_DB_QUAD_CREDIT_MASK 0x7f
#define SX_DEBUG_1__SX_DB_QUAD_CREDIT__SHIFT 0x0
#define SX_DEBUG_1__DEBUG_DATA_MASK 0xffffff80
#define SX_DEBUG_1__DEBUG_DATA__SHIFT 0x7
#define SX_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define SX_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define SX_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define SX_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define SX_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define SX_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define SX_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define SX_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define SX_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define SX_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define SX_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define SX_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define SX_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define SX_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define SX_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define SX_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define SX_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define SX_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define SX_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define SX_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define SX_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define SX_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define SX_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define SX_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define SX_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT2_MASK 0x3ff
#define SX_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT2__SHIFT 0x0
#define SX_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT3_MASK 0xffc00
#define SX_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT3__SHIFT 0xa
#define SX_PERFCOUNTER1_SELECT1__PERFCOUNTER_SELECT2_MASK 0x3ff
#define SX_PERFCOUNTER1_SELECT1__PERFCOUNTER_SELECT2__SHIFT 0x0
#define SX_PERFCOUNTER1_SELECT1__PERFCOUNTER_SELECT3_MASK 0xffc00
#define SX_PERFCOUNTER1_SELECT1__PERFCOUNTER_SELECT3__SHIFT 0xa
#define SX_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SX_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SX_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SX_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SX_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SX_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SX_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SX_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SX_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SX_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SX_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SX_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define SX_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define SX_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define SX_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define SX_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCC_CTRL__CACHE_SIZE_MASK 0x3
#define TCC_CTRL__CACHE_SIZE__SHIFT 0x0
#define TCC_CTRL__RATE_MASK 0xc
#define TCC_CTRL__RATE__SHIFT 0x2
#define TCC_CTRL__WRITEBACK_MARGIN_MASK 0xf0
#define TCC_CTRL__WRITEBACK_MARGIN__SHIFT 0x4
#define TCC_CTRL__SRC_FIFO_SIZE_MASK 0xf000
#define TCC_CTRL__SRC_FIFO_SIZE__SHIFT 0xc
#define TCC_CTRL__LATENCY_FIFO_SIZE_MASK 0xf0000
#define TCC_CTRL__LATENCY_FIFO_SIZE__SHIFT 0x10
#define TCC_CTRL__WB_OR_INV_ALL_VMIDS_MASK 0x100000
#define TCC_CTRL__WB_OR_INV_ALL_VMIDS__SHIFT 0x14
#define TCC_EDC_COUNTER__SEC_COUNT_MASK 0xf
#define TCC_EDC_COUNTER__SEC_COUNT__SHIFT 0x0
#define TCC_EDC_COUNTER__DED_COUNT_MASK 0xf0000
#define TCC_EDC_COUNTER__DED_COUNT__SHIFT 0x10
#define TCC_REDUNDANCY__MC_SEL0_MASK 0x1
#define TCC_REDUNDANCY__MC_SEL0__SHIFT 0x0
#define TCC_REDUNDANCY__MC_SEL1_MASK 0x2
#define TCC_REDUNDANCY__MC_SEL1__SHIFT 0x1
#define TCC_CGTT_SCLK_CTRL__ON_DELAY_MASK 0xf
#define TCC_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0
#define TCC_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define TCC_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define TCC_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define TCA_CGTT_SCLK_CTRL__ON_DELAY_MASK 0xf
#define TCA_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0
#define TCA_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define TCA_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define TCA_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define TCS_CGTT_SCLK_CTRL__ON_DELAY_MASK 0xf
#define TCS_CGTT_SCLK_CTRL__ON_DELAY__SHIFT 0x0
#define TCS_CGTT_SCLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define TCS_CGTT_SCLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define TCS_CGTT_SCLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define TCC_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define TCC_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define TCC_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define TCC_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define TCC_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define TCC_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define TCC_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define TCC_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define TCC_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define TCC_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define TCC_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define TCC_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define TCC_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0xffc00
#define TCC_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define TCC_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define TCC_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define TCC_PERFCOUNTER1_SELECT__PERF_MODE1_MASK 0xf000000
#define TCC_PERFCOUNTER1_SELECT__PERF_MODE1__SHIFT 0x18
#define TCC_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define TCC_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define TCC_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define TCC_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define TCC_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define TCC_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define TCC_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf000000
#define TCC_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x18
#define TCC_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf0000000
#define TCC_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x1c
#define TCC_PERFCOUNTER1_SELECT1__PERF_SEL2_MASK 0x3ff
#define TCC_PERFCOUNTER1_SELECT1__PERF_SEL2__SHIFT 0x0
#define TCC_PERFCOUNTER1_SELECT1__PERF_SEL3_MASK 0xffc00
#define TCC_PERFCOUNTER1_SELECT1__PERF_SEL3__SHIFT 0xa
#define TCC_PERFCOUNTER1_SELECT1__PERF_MODE2_MASK 0xf000000
#define TCC_PERFCOUNTER1_SELECT1__PERF_MODE2__SHIFT 0x18
#define TCC_PERFCOUNTER1_SELECT1__PERF_MODE3_MASK 0xf0000000
#define TCC_PERFCOUNTER1_SELECT1__PERF_MODE3__SHIFT 0x1c
#define TCC_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define TCC_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define TCC_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define TCC_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define TCC_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define TCC_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define TCC_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define TCC_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define TCC_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define TCC_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define TCC_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define TCC_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define TCC_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCC_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCC_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCC_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCC_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCC_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCC_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCC_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCC_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCC_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCC_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCC_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCC_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCC_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCC_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCC_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCA_CTRL__HOLE_TIMEOUT_MASK 0xf
#define TCA_CTRL__HOLE_TIMEOUT__SHIFT 0x0
#define TCA_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define TCA_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define TCA_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define TCA_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define TCA_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define TCA_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define TCA_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define TCA_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define TCA_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define TCA_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define TCA_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define TCA_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define TCA_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0xffc00
#define TCA_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define TCA_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define TCA_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define TCA_PERFCOUNTER1_SELECT__PERF_MODE1_MASK 0xf000000
#define TCA_PERFCOUNTER1_SELECT__PERF_MODE1__SHIFT 0x18
#define TCA_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define TCA_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define TCA_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define TCA_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define TCA_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define TCA_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define TCA_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf000000
#define TCA_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x18
#define TCA_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf0000000
#define TCA_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x1c
#define TCA_PERFCOUNTER1_SELECT1__PERF_SEL2_MASK 0x3ff
#define TCA_PERFCOUNTER1_SELECT1__PERF_SEL2__SHIFT 0x0
#define TCA_PERFCOUNTER1_SELECT1__PERF_SEL3_MASK 0xffc00
#define TCA_PERFCOUNTER1_SELECT1__PERF_SEL3__SHIFT 0xa
#define TCA_PERFCOUNTER1_SELECT1__PERF_MODE2_MASK 0xf000000
#define TCA_PERFCOUNTER1_SELECT1__PERF_MODE2__SHIFT 0x18
#define TCA_PERFCOUNTER1_SELECT1__PERF_MODE3_MASK 0xf0000000
#define TCA_PERFCOUNTER1_SELECT1__PERF_MODE3__SHIFT 0x1c
#define TCA_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define TCA_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define TCA_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define TCA_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define TCA_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define TCA_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define TCA_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define TCA_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define TCA_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define TCA_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define TCA_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define TCA_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define TCA_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCA_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCA_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCA_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCA_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCA_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCA_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCA_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCA_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCA_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCA_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCA_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCA_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCA_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCA_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCA_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCS_CTRL__RATE_MASK 0x3
#define TCS_CTRL__RATE__SHIFT 0x0
#define TCS_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define TCS_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define TCS_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define TCS_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define TCS_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define TCS_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define TCS_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define TCS_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define TCS_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define TCS_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define TCS_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define TCS_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define TCS_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define TCS_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define TCS_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf000000
#define TCS_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x18
#define TCS_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf0000000
#define TCS_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x1c
#define TCS_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define TCS_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define TCS_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define TCS_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define TCS_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define TCS_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define TCS_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define TCS_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define TCS_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define TCS_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define TCS_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define TCS_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define TCS_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define TCS_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define TCS_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define TCS_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define TCS_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define TCS_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define TCS_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCS_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCS_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCS_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCS_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCS_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCS_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCS_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCS_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCS_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCS_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCS_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCS_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCS_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCS_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCS_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TA_BC_BASE_ADDR__ADDRESS_MASK 0xffffffff
#define TA_BC_BASE_ADDR__ADDRESS__SHIFT 0x0
#define TA_BC_BASE_ADDR_HI__ADDRESS_MASK 0xff
#define TA_BC_BASE_ADDR_HI__ADDRESS__SHIFT 0x0
#define TD_CNTL__SYNC_PHASE_SH_MASK 0x3
#define TD_CNTL__SYNC_PHASE_SH__SHIFT 0x0
#define TD_CNTL__SYNC_PHASE_VC_SMX_MASK 0x30
#define TD_CNTL__SYNC_PHASE_VC_SMX__SHIFT 0x4
#define TD_CNTL__PAD_STALL_EN_MASK 0x100
#define TD_CNTL__PAD_STALL_EN__SHIFT 0x8
#define TD_CNTL__EXTEND_LDS_STALL_MASK 0x600
#define TD_CNTL__EXTEND_LDS_STALL__SHIFT 0x9
#define TD_CNTL__LDS_STALL_PHASE_ADJUST_MASK 0x1800
#define TD_CNTL__LDS_STALL_PHASE_ADJUST__SHIFT 0xb
#define TD_CNTL__PRECISION_COMPATIBILITY_MASK 0x8000
#define TD_CNTL__PRECISION_COMPATIBILITY__SHIFT 0xf
#define TD_CNTL__GATHER4_FLOAT_MODE_MASK 0x10000
#define TD_CNTL__GATHER4_FLOAT_MODE__SHIFT 0x10
#define TD_CNTL__LD_FLOAT_MODE_MASK 0x40000
#define TD_CNTL__LD_FLOAT_MODE__SHIFT 0x12
#define TD_CNTL__GATHER4_DX9_MODE_MASK 0x80000
#define TD_CNTL__GATHER4_DX9_MODE__SHIFT 0x13
#define TD_CNTL__DISABLE_POWER_THROTTLE_MASK 0x100000
#define TD_CNTL__DISABLE_POWER_THROTTLE__SHIFT 0x14
#define TD_STATUS__BUSY_MASK 0x80000000
#define TD_STATUS__BUSY__SHIFT 0x1f
#define TD_DEBUG_INDEX__INDEX_MASK 0x1f
#define TD_DEBUG_INDEX__INDEX__SHIFT 0x0
#define TD_DEBUG_DATA__DATA_MASK 0xffffffff
#define TD_DEBUG_DATA__DATA__SHIFT 0x0
#define TD_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0xff
#define TD_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define TD_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0x3fc00
#define TD_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define TD_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define TD_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define TD_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define TD_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define TD_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define TD_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define TD_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0xff
#define TD_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define TD_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0x3fc00
#define TD_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define TD_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define TD_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define TD_PERFCOUNTER1_SELECT__PERF_MODE1_MASK 0xf000000
#define TD_PERFCOUNTER1_SELECT__PERF_MODE1__SHIFT 0x18
#define TD_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define TD_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define TD_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0xff
#define TD_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define TD_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0x3fc00
#define TD_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define TD_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf000000
#define TD_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x18
#define TD_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf0000000
#define TD_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x1c
#define TD_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TD_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TD_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TD_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TD_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TD_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TD_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TD_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TD_SCRATCH__SCRATCH_MASK 0xffffffff
#define TD_SCRATCH__SCRATCH__SHIFT 0x0
#define TA_CNTL__TC_DATA_CREDIT_MASK 0xe000
#define TA_CNTL__TC_DATA_CREDIT__SHIFT 0xd
#define TA_CNTL__ALIGNER_CREDIT_MASK 0x1f0000
#define TA_CNTL__ALIGNER_CREDIT__SHIFT 0x10
#define TA_CNTL__TD_FIFO_CREDIT_MASK 0xffc00000
#define TA_CNTL__TD_FIFO_CREDIT__SHIFT 0x16
#define TA_CNTL_AUX__SCOAL_DSWIZZLE_N_MASK 0x1
#define TA_CNTL_AUX__SCOAL_DSWIZZLE_N__SHIFT 0x0
#define TA_CNTL_AUX__RESERVED_MASK 0xe
#define TA_CNTL_AUX__RESERVED__SHIFT 0x1
#define TA_CNTL_AUX__ANISO_WEIGHT_MODE_MASK 0x10000
#define TA_CNTL_AUX__ANISO_WEIGHT_MODE__SHIFT 0x10
#define TA_RESERVED_010C__Unused_MASK 0xffffffff
#define TA_RESERVED_010C__Unused__SHIFT 0x0
#define TA_CS_BC_BASE_ADDR__ADDRESS_MASK 0xffffffff
#define TA_CS_BC_BASE_ADDR__ADDRESS__SHIFT 0x0
#define TA_CS_BC_BASE_ADDR_HI__ADDRESS_MASK 0xff
#define TA_CS_BC_BASE_ADDR_HI__ADDRESS__SHIFT 0x0
#define TA_STATUS__FG_PFIFO_EMPTYB_MASK 0x1000
#define TA_STATUS__FG_PFIFO_EMPTYB__SHIFT 0xc
#define TA_STATUS__FG_LFIFO_EMPTYB_MASK 0x2000
#define TA_STATUS__FG_LFIFO_EMPTYB__SHIFT 0xd
#define TA_STATUS__FG_SFIFO_EMPTYB_MASK 0x4000
#define TA_STATUS__FG_SFIFO_EMPTYB__SHIFT 0xe
#define TA_STATUS__FL_PFIFO_EMPTYB_MASK 0x10000
#define TA_STATUS__FL_PFIFO_EMPTYB__SHIFT 0x10
#define TA_STATUS__FL_LFIFO_EMPTYB_MASK 0x20000
#define TA_STATUS__FL_LFIFO_EMPTYB__SHIFT 0x11
#define TA_STATUS__FL_SFIFO_EMPTYB_MASK 0x40000
#define TA_STATUS__FL_SFIFO_EMPTYB__SHIFT 0x12
#define TA_STATUS__FA_PFIFO_EMPTYB_MASK 0x100000
#define TA_STATUS__FA_PFIFO_EMPTYB__SHIFT 0x14
#define TA_STATUS__FA_LFIFO_EMPTYB_MASK 0x200000
#define TA_STATUS__FA_LFIFO_EMPTYB__SHIFT 0x15
#define TA_STATUS__FA_SFIFO_EMPTYB_MASK 0x400000
#define TA_STATUS__FA_SFIFO_EMPTYB__SHIFT 0x16
#define TA_STATUS__IN_BUSY_MASK 0x1000000
#define TA_STATUS__IN_BUSY__SHIFT 0x18
#define TA_STATUS__FG_BUSY_MASK 0x2000000
#define TA_STATUS__FG_BUSY__SHIFT 0x19
#define TA_STATUS__LA_BUSY_MASK 0x4000000
#define TA_STATUS__LA_BUSY__SHIFT 0x1a
#define TA_STATUS__FL_BUSY_MASK 0x8000000
#define TA_STATUS__FL_BUSY__SHIFT 0x1b
#define TA_STATUS__TA_BUSY_MASK 0x10000000
#define TA_STATUS__TA_BUSY__SHIFT 0x1c
#define TA_STATUS__FA_BUSY_MASK 0x20000000
#define TA_STATUS__FA_BUSY__SHIFT 0x1d
#define TA_STATUS__AL_BUSY_MASK 0x40000000
#define TA_STATUS__AL_BUSY__SHIFT 0x1e
#define TA_STATUS__BUSY_MASK 0x80000000
#define TA_STATUS__BUSY__SHIFT 0x1f
#define TA_DEBUG_INDEX__INDEX_MASK 0x1f
#define TA_DEBUG_INDEX__INDEX__SHIFT 0x0
#define TA_DEBUG_DATA__DATA_MASK 0xffffffff
#define TA_DEBUG_DATA__DATA__SHIFT 0x0
#define TA_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0xff
#define TA_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define TA_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0x3fc00
#define TA_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define TA_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define TA_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define TA_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define TA_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define TA_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define TA_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define TA_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0xff
#define TA_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define TA_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0x3fc00
#define TA_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define TA_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define TA_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define TA_PERFCOUNTER1_SELECT__PERF_MODE1_MASK 0xf000000
#define TA_PERFCOUNTER1_SELECT__PERF_MODE1__SHIFT 0x18
#define TA_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define TA_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define TA_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0xff
#define TA_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define TA_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0x3fc00
#define TA_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define TA_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf000000
#define TA_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x18
#define TA_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf0000000
#define TA_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x1c
#define TA_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TA_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TA_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TA_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TA_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TA_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TA_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TA_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TA_SCRATCH__SCRATCH_MASK 0xffffffff
#define TA_SCRATCH__SCRATCH__SHIFT 0x0
#define SH_HIDDEN_PRIVATE_BASE_VMID__ADDRESS_MASK 0xffffffff
#define SH_HIDDEN_PRIVATE_BASE_VMID__ADDRESS__SHIFT 0x0
#define SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE_MASK 0x1
#define SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT 0x0
#define SH_STATIC_MEM_CONFIG__ELEMENT_SIZE_MASK 0x6
#define SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT 0x1
#define SH_STATIC_MEM_CONFIG__INDEX_STRIDE_MASK 0x18
#define SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT 0x3
#define SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE_MASK 0xe0
#define SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT 0x5
#define SH_STATIC_MEM_CONFIG__READ_ONLY_CNTL_MASK 0xff00
#define SH_STATIC_MEM_CONFIG__READ_ONLY_CNTL__SHIFT 0x8
#define TCP_INVALIDATE__START_MASK 0x1
#define TCP_INVALIDATE__START__SHIFT 0x0
#define TCP_STATUS__TCP_BUSY_MASK 0x1
#define TCP_STATUS__TCP_BUSY__SHIFT 0x0
#define TCP_CNTL__FORCE_HIT_MASK 0x1
#define TCP_CNTL__FORCE_HIT__SHIFT 0x0
#define TCP_CNTL__FORCE_MISS_MASK 0x2
#define TCP_CNTL__FORCE_MISS__SHIFT 0x1
#define TCP_CNTL__L1_SIZE_MASK 0xc
#define TCP_CNTL__L1_SIZE__SHIFT 0x2
#define TCP_CNTL__FLAT_BUF_HASH_ENABLE_MASK 0x10
#define TCP_CNTL__FLAT_BUF_HASH_ENABLE__SHIFT 0x4
#define TCP_CNTL__FLAT_BUF_CACHE_SWIZZLE_MASK 0x20
#define TCP_CNTL__FLAT_BUF_CACHE_SWIZZLE__SHIFT 0x5
#define TCP_CNTL__FORCE_EOW_TOTAL_CNT_MASK 0x1f8000
#define TCP_CNTL__FORCE_EOW_TOTAL_CNT__SHIFT 0xf
#define TCP_CNTL__FORCE_EOW_TAGRAM_CNT_MASK 0xfc00000
#define TCP_CNTL__FORCE_EOW_TAGRAM_CNT__SHIFT 0x16
#define TCP_CNTL__DISABLE_Z_MAP_MASK 0x10000000
#define TCP_CNTL__DISABLE_Z_MAP__SHIFT 0x1c
#define TCP_CNTL__INV_ALL_VMIDS_MASK 0x20000000
#define TCP_CNTL__INV_ALL_VMIDS__SHIFT 0x1d
#define TCP_CHAN_STEER_LO__CHAN0_MASK 0xf
#define TCP_CHAN_STEER_LO__CHAN0__SHIFT 0x0
#define TCP_CHAN_STEER_LO__CHAN1_MASK 0xf0
#define TCP_CHAN_STEER_LO__CHAN1__SHIFT 0x4
#define TCP_CHAN_STEER_LO__CHAN2_MASK 0xf00
#define TCP_CHAN_STEER_LO__CHAN2__SHIFT 0x8
#define TCP_CHAN_STEER_LO__CHAN3_MASK 0xf000
#define TCP_CHAN_STEER_LO__CHAN3__SHIFT 0xc
#define TCP_CHAN_STEER_LO__CHAN4_MASK 0xf0000
#define TCP_CHAN_STEER_LO__CHAN4__SHIFT 0x10
#define TCP_CHAN_STEER_LO__CHAN5_MASK 0xf00000
#define TCP_CHAN_STEER_LO__CHAN5__SHIFT 0x14
#define TCP_CHAN_STEER_LO__CHAN6_MASK 0xf000000
#define TCP_CHAN_STEER_LO__CHAN6__SHIFT 0x18
#define TCP_CHAN_STEER_LO__CHAN7_MASK 0xf0000000
#define TCP_CHAN_STEER_LO__CHAN7__SHIFT 0x1c
#define TCP_CHAN_STEER_HI__CHAN8_MASK 0xf
#define TCP_CHAN_STEER_HI__CHAN8__SHIFT 0x0
#define TCP_CHAN_STEER_HI__CHAN9_MASK 0xf0
#define TCP_CHAN_STEER_HI__CHAN9__SHIFT 0x4
#define TCP_CHAN_STEER_HI__CHANA_MASK 0xf00
#define TCP_CHAN_STEER_HI__CHANA__SHIFT 0x8
#define TCP_CHAN_STEER_HI__CHANB_MASK 0xf000
#define TCP_CHAN_STEER_HI__CHANB__SHIFT 0xc
#define TCP_CHAN_STEER_HI__CHANC_MASK 0xf0000
#define TCP_CHAN_STEER_HI__CHANC__SHIFT 0x10
#define TCP_CHAN_STEER_HI__CHAND_MASK 0xf00000
#define TCP_CHAN_STEER_HI__CHAND__SHIFT 0x14
#define TCP_CHAN_STEER_HI__CHANE_MASK 0xf000000
#define TCP_CHAN_STEER_HI__CHANE__SHIFT 0x18
#define TCP_CHAN_STEER_HI__CHANF_MASK 0xf0000000
#define TCP_CHAN_STEER_HI__CHANF__SHIFT 0x1c
#define TCP_ADDR_CONFIG__NUM_TCC_BANKS_MASK 0xf
#define TCP_ADDR_CONFIG__NUM_TCC_BANKS__SHIFT 0x0
#define TCP_ADDR_CONFIG__NUM_BANKS_MASK 0x30
#define TCP_ADDR_CONFIG__NUM_BANKS__SHIFT 0x4
#define TCP_ADDR_CONFIG__COLHI_WIDTH_MASK 0x1c0
#define TCP_ADDR_CONFIG__COLHI_WIDTH__SHIFT 0x6
#define TCP_ADDR_CONFIG__RB_SPLIT_COLHI_MASK 0x200
#define TCP_ADDR_CONFIG__RB_SPLIT_COLHI__SHIFT 0x9
#define TCP_CREDIT__LFIFO_CREDIT_MASK 0x3ff
#define TCP_CREDIT__LFIFO_CREDIT__SHIFT 0x0
#define TCP_CREDIT__REQ_FIFO_CREDIT_MASK 0x7f0000
#define TCP_CREDIT__REQ_FIFO_CREDIT__SHIFT 0x10
#define TCP_CREDIT__TD_CREDIT_MASK 0xe0000000
#define TCP_CREDIT__TD_CREDIT__SHIFT 0x1d
#define TCP_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define TCP_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define TCP_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define TCP_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define TCP_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define TCP_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define TCP_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define TCP_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define TCP_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define TCP_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define TCP_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define TCP_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define TCP_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0xffc00
#define TCP_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define TCP_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define TCP_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define TCP_PERFCOUNTER1_SELECT__PERF_MODE1_MASK 0xf000000
#define TCP_PERFCOUNTER1_SELECT__PERF_MODE1__SHIFT 0x18
#define TCP_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define TCP_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define TCP_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define TCP_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define TCP_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define TCP_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define TCP_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf000000
#define TCP_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x18
#define TCP_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf0000000
#define TCP_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x1c
#define TCP_PERFCOUNTER1_SELECT1__PERF_SEL2_MASK 0x3ff
#define TCP_PERFCOUNTER1_SELECT1__PERF_SEL2__SHIFT 0x0
#define TCP_PERFCOUNTER1_SELECT1__PERF_SEL3_MASK 0xffc00
#define TCP_PERFCOUNTER1_SELECT1__PERF_SEL3__SHIFT 0xa
#define TCP_PERFCOUNTER1_SELECT1__PERF_MODE3_MASK 0xf000000
#define TCP_PERFCOUNTER1_SELECT1__PERF_MODE3__SHIFT 0x18
#define TCP_PERFCOUNTER1_SELECT1__PERF_MODE2_MASK 0xf0000000
#define TCP_PERFCOUNTER1_SELECT1__PERF_MODE2__SHIFT 0x1c
#define TCP_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0x3ff
#define TCP_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define TCP_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define TCP_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define TCP_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define TCP_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define TCP_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0x3ff
#define TCP_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define TCP_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define TCP_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define TCP_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define TCP_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define TCP_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCP_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCP_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCP_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCP_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCP_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCP_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define TCP_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define TCP_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCP_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCP_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCP_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCP_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCP_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCP_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define TCP_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define TCP_BUFFER_ADDR_HASH_CNTL__CHANNEL_BITS_MASK 0x7
#define TCP_BUFFER_ADDR_HASH_CNTL__CHANNEL_BITS__SHIFT 0x0
#define TCP_BUFFER_ADDR_HASH_CNTL__BANK_BITS_MASK 0x700
#define TCP_BUFFER_ADDR_HASH_CNTL__BANK_BITS__SHIFT 0x8
#define TCP_BUFFER_ADDR_HASH_CNTL__CHANNEL_XOR_COUNT_MASK 0x70000
#define TCP_BUFFER_ADDR_HASH_CNTL__CHANNEL_XOR_COUNT__SHIFT 0x10
#define TCP_BUFFER_ADDR_HASH_CNTL__BANK_XOR_COUNT_MASK 0x7000000
#define TCP_BUFFER_ADDR_HASH_CNTL__BANK_XOR_COUNT__SHIFT 0x18
#define TCP_EDC_COUNTER__SEC_COUNT_MASK 0xf
#define TCP_EDC_COUNTER__SEC_COUNT__SHIFT 0x0
#define TCP_EDC_COUNTER__DED_COUNT_MASK 0xf0000
#define TCP_EDC_COUNTER__DED_COUNT__SHIFT 0x10
#define TC_CFG_L1_LOAD_POLICY0__POLICY_0_MASK 0x3
#define TC_CFG_L1_LOAD_POLICY0__POLICY_0__SHIFT 0x0
#define TC_CFG_L1_LOAD_POLICY0__POLICY_1_MASK 0xc
#define TC_CFG_L1_LOAD_POLICY0__POLICY_1__SHIFT 0x2
#define TC_CFG_L1_LOAD_POLICY0__POLICY_2_MASK 0x30
#define TC_CFG_L1_LOAD_POLICY0__POLICY_2__SHIFT 0x4
#define TC_CFG_L1_LOAD_POLICY0__POLICY_3_MASK 0xc0
#define TC_CFG_L1_LOAD_POLICY0__POLICY_3__SHIFT 0x6
#define TC_CFG_L1_LOAD_POLICY0__POLICY_4_MASK 0x300
#define TC_CFG_L1_LOAD_POLICY0__POLICY_4__SHIFT 0x8
#define TC_CFG_L1_LOAD_POLICY0__POLICY_5_MASK 0xc00
#define TC_CFG_L1_LOAD_POLICY0__POLICY_5__SHIFT 0xa
#define TC_CFG_L1_LOAD_POLICY0__POLICY_6_MASK 0x3000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_6__SHIFT 0xc
#define TC_CFG_L1_LOAD_POLICY0__POLICY_7_MASK 0xc000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_7__SHIFT 0xe
#define TC_CFG_L1_LOAD_POLICY0__POLICY_8_MASK 0x30000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_8__SHIFT 0x10
#define TC_CFG_L1_LOAD_POLICY0__POLICY_9_MASK 0xc0000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_9__SHIFT 0x12
#define TC_CFG_L1_LOAD_POLICY0__POLICY_10_MASK 0x300000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_10__SHIFT 0x14
#define TC_CFG_L1_LOAD_POLICY0__POLICY_11_MASK 0xc00000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_11__SHIFT 0x16
#define TC_CFG_L1_LOAD_POLICY0__POLICY_12_MASK 0x3000000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_12__SHIFT 0x18
#define TC_CFG_L1_LOAD_POLICY0__POLICY_13_MASK 0xc000000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_13__SHIFT 0x1a
#define TC_CFG_L1_LOAD_POLICY0__POLICY_14_MASK 0x30000000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_14__SHIFT 0x1c
#define TC_CFG_L1_LOAD_POLICY0__POLICY_15_MASK 0xc0000000
#define TC_CFG_L1_LOAD_POLICY0__POLICY_15__SHIFT 0x1e
#define TC_CFG_L1_LOAD_POLICY1__POLICY_16_MASK 0x3
#define TC_CFG_L1_LOAD_POLICY1__POLICY_16__SHIFT 0x0
#define TC_CFG_L1_LOAD_POLICY1__POLICY_17_MASK 0xc
#define TC_CFG_L1_LOAD_POLICY1__POLICY_17__SHIFT 0x2
#define TC_CFG_L1_LOAD_POLICY1__POLICY_18_MASK 0x30
#define TC_CFG_L1_LOAD_POLICY1__POLICY_18__SHIFT 0x4
#define TC_CFG_L1_LOAD_POLICY1__POLICY_19_MASK 0xc0
#define TC_CFG_L1_LOAD_POLICY1__POLICY_19__SHIFT 0x6
#define TC_CFG_L1_LOAD_POLICY1__POLICY_20_MASK 0x300
#define TC_CFG_L1_LOAD_POLICY1__POLICY_20__SHIFT 0x8
#define TC_CFG_L1_LOAD_POLICY1__POLICY_21_MASK 0xc00
#define TC_CFG_L1_LOAD_POLICY1__POLICY_21__SHIFT 0xa
#define TC_CFG_L1_LOAD_POLICY1__POLICY_22_MASK 0x3000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_22__SHIFT 0xc
#define TC_CFG_L1_LOAD_POLICY1__POLICY_23_MASK 0xc000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_23__SHIFT 0xe
#define TC_CFG_L1_LOAD_POLICY1__POLICY_24_MASK 0x30000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_24__SHIFT 0x10
#define TC_CFG_L1_LOAD_POLICY1__POLICY_25_MASK 0xc0000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_25__SHIFT 0x12
#define TC_CFG_L1_LOAD_POLICY1__POLICY_26_MASK 0x300000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_26__SHIFT 0x14
#define TC_CFG_L1_LOAD_POLICY1__POLICY_27_MASK 0xc00000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_27__SHIFT 0x16
#define TC_CFG_L1_LOAD_POLICY1__POLICY_28_MASK 0x3000000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_28__SHIFT 0x18
#define TC_CFG_L1_LOAD_POLICY1__POLICY_29_MASK 0xc000000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_29__SHIFT 0x1a
#define TC_CFG_L1_LOAD_POLICY1__POLICY_30_MASK 0x30000000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_30__SHIFT 0x1c
#define TC_CFG_L1_LOAD_POLICY1__POLICY_31_MASK 0xc0000000
#define TC_CFG_L1_LOAD_POLICY1__POLICY_31__SHIFT 0x1e
#define TC_CFG_L1_STORE_POLICY__POLICY_0_MASK 0x1
#define TC_CFG_L1_STORE_POLICY__POLICY_0__SHIFT 0x0
#define TC_CFG_L1_STORE_POLICY__POLICY_1_MASK 0x2
#define TC_CFG_L1_STORE_POLICY__POLICY_1__SHIFT 0x1
#define TC_CFG_L1_STORE_POLICY__POLICY_2_MASK 0x4
#define TC_CFG_L1_STORE_POLICY__POLICY_2__SHIFT 0x2
#define TC_CFG_L1_STORE_POLICY__POLICY_3_MASK 0x8
#define TC_CFG_L1_STORE_POLICY__POLICY_3__SHIFT 0x3
#define TC_CFG_L1_STORE_POLICY__POLICY_4_MASK 0x10
#define TC_CFG_L1_STORE_POLICY__POLICY_4__SHIFT 0x4
#define TC_CFG_L1_STORE_POLICY__POLICY_5_MASK 0x20
#define TC_CFG_L1_STORE_POLICY__POLICY_5__SHIFT 0x5
#define TC_CFG_L1_STORE_POLICY__POLICY_6_MASK 0x40
#define TC_CFG_L1_STORE_POLICY__POLICY_6__SHIFT 0x6
#define TC_CFG_L1_STORE_POLICY__POLICY_7_MASK 0x80
#define TC_CFG_L1_STORE_POLICY__POLICY_7__SHIFT 0x7
#define TC_CFG_L1_STORE_POLICY__POLICY_8_MASK 0x100
#define TC_CFG_L1_STORE_POLICY__POLICY_8__SHIFT 0x8
#define TC_CFG_L1_STORE_POLICY__POLICY_9_MASK 0x200
#define TC_CFG_L1_STORE_POLICY__POLICY_9__SHIFT 0x9
#define TC_CFG_L1_STORE_POLICY__POLICY_10_MASK 0x400
#define TC_CFG_L1_STORE_POLICY__POLICY_10__SHIFT 0xa
#define TC_CFG_L1_STORE_POLICY__POLICY_11_MASK 0x800
#define TC_CFG_L1_STORE_POLICY__POLICY_11__SHIFT 0xb
#define TC_CFG_L1_STORE_POLICY__POLICY_12_MASK 0x1000
#define TC_CFG_L1_STORE_POLICY__POLICY_12__SHIFT 0xc
#define TC_CFG_L1_STORE_POLICY__POLICY_13_MASK 0x2000
#define TC_CFG_L1_STORE_POLICY__POLICY_13__SHIFT 0xd
#define TC_CFG_L1_STORE_POLICY__POLICY_14_MASK 0x4000
#define TC_CFG_L1_STORE_POLICY__POLICY_14__SHIFT 0xe
#define TC_CFG_L1_STORE_POLICY__POLICY_15_MASK 0x8000
#define TC_CFG_L1_STORE_POLICY__POLICY_15__SHIFT 0xf
#define TC_CFG_L1_STORE_POLICY__POLICY_16_MASK 0x10000
#define TC_CFG_L1_STORE_POLICY__POLICY_16__SHIFT 0x10
#define TC_CFG_L1_STORE_POLICY__POLICY_17_MASK 0x20000
#define TC_CFG_L1_STORE_POLICY__POLICY_17__SHIFT 0x11
#define TC_CFG_L1_STORE_POLICY__POLICY_18_MASK 0x40000
#define TC_CFG_L1_STORE_POLICY__POLICY_18__SHIFT 0x12
#define TC_CFG_L1_STORE_POLICY__POLICY_19_MASK 0x80000
#define TC_CFG_L1_STORE_POLICY__POLICY_19__SHIFT 0x13
#define TC_CFG_L1_STORE_POLICY__POLICY_20_MASK 0x100000
#define TC_CFG_L1_STORE_POLICY__POLICY_20__SHIFT 0x14
#define TC_CFG_L1_STORE_POLICY__POLICY_21_MASK 0x200000
#define TC_CFG_L1_STORE_POLICY__POLICY_21__SHIFT 0x15
#define TC_CFG_L1_STORE_POLICY__POLICY_22_MASK 0x400000
#define TC_CFG_L1_STORE_POLICY__POLICY_22__SHIFT 0x16
#define TC_CFG_L1_STORE_POLICY__POLICY_23_MASK 0x800000
#define TC_CFG_L1_STORE_POLICY__POLICY_23__SHIFT 0x17
#define TC_CFG_L1_STORE_POLICY__POLICY_24_MASK 0x1000000
#define TC_CFG_L1_STORE_POLICY__POLICY_24__SHIFT 0x18
#define TC_CFG_L1_STORE_POLICY__POLICY_25_MASK 0x2000000
#define TC_CFG_L1_STORE_POLICY__POLICY_25__SHIFT 0x19
#define TC_CFG_L1_STORE_POLICY__POLICY_26_MASK 0x4000000
#define TC_CFG_L1_STORE_POLICY__POLICY_26__SHIFT 0x1a
#define TC_CFG_L1_STORE_POLICY__POLICY_27_MASK 0x8000000
#define TC_CFG_L1_STORE_POLICY__POLICY_27__SHIFT 0x1b
#define TC_CFG_L1_STORE_POLICY__POLICY_28_MASK 0x10000000
#define TC_CFG_L1_STORE_POLICY__POLICY_28__SHIFT 0x1c
#define TC_CFG_L1_STORE_POLICY__POLICY_29_MASK 0x20000000
#define TC_CFG_L1_STORE_POLICY__POLICY_29__SHIFT 0x1d
#define TC_CFG_L1_STORE_POLICY__POLICY_30_MASK 0x40000000
#define TC_CFG_L1_STORE_POLICY__POLICY_30__SHIFT 0x1e
#define TC_CFG_L1_STORE_POLICY__POLICY_31_MASK 0x80000000
#define TC_CFG_L1_STORE_POLICY__POLICY_31__SHIFT 0x1f
#define TC_CFG_L2_LOAD_POLICY0__POLICY_0_MASK 0x3
#define TC_CFG_L2_LOAD_POLICY0__POLICY_0__SHIFT 0x0
#define TC_CFG_L2_LOAD_POLICY0__POLICY_1_MASK 0xc
#define TC_CFG_L2_LOAD_POLICY0__POLICY_1__SHIFT 0x2
#define TC_CFG_L2_LOAD_POLICY0__POLICY_2_MASK 0x30
#define TC_CFG_L2_LOAD_POLICY0__POLICY_2__SHIFT 0x4
#define TC_CFG_L2_LOAD_POLICY0__POLICY_3_MASK 0xc0
#define TC_CFG_L2_LOAD_POLICY0__POLICY_3__SHIFT 0x6
#define TC_CFG_L2_LOAD_POLICY0__POLICY_4_MASK 0x300
#define TC_CFG_L2_LOAD_POLICY0__POLICY_4__SHIFT 0x8
#define TC_CFG_L2_LOAD_POLICY0__POLICY_5_MASK 0xc00
#define TC_CFG_L2_LOAD_POLICY0__POLICY_5__SHIFT 0xa
#define TC_CFG_L2_LOAD_POLICY0__POLICY_6_MASK 0x3000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_6__SHIFT 0xc
#define TC_CFG_L2_LOAD_POLICY0__POLICY_7_MASK 0xc000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_7__SHIFT 0xe
#define TC_CFG_L2_LOAD_POLICY0__POLICY_8_MASK 0x30000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_8__SHIFT 0x10
#define TC_CFG_L2_LOAD_POLICY0__POLICY_9_MASK 0xc0000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_9__SHIFT 0x12
#define TC_CFG_L2_LOAD_POLICY0__POLICY_10_MASK 0x300000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_10__SHIFT 0x14
#define TC_CFG_L2_LOAD_POLICY0__POLICY_11_MASK 0xc00000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_11__SHIFT 0x16
#define TC_CFG_L2_LOAD_POLICY0__POLICY_12_MASK 0x3000000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_12__SHIFT 0x18
#define TC_CFG_L2_LOAD_POLICY0__POLICY_13_MASK 0xc000000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_13__SHIFT 0x1a
#define TC_CFG_L2_LOAD_POLICY0__POLICY_14_MASK 0x30000000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_14__SHIFT 0x1c
#define TC_CFG_L2_LOAD_POLICY0__POLICY_15_MASK 0xc0000000
#define TC_CFG_L2_LOAD_POLICY0__POLICY_15__SHIFT 0x1e
#define TC_CFG_L2_LOAD_POLICY1__POLICY_16_MASK 0x3
#define TC_CFG_L2_LOAD_POLICY1__POLICY_16__SHIFT 0x0
#define TC_CFG_L2_LOAD_POLICY1__POLICY_17_MASK 0xc
#define TC_CFG_L2_LOAD_POLICY1__POLICY_17__SHIFT 0x2
#define TC_CFG_L2_LOAD_POLICY1__POLICY_18_MASK 0x30
#define TC_CFG_L2_LOAD_POLICY1__POLICY_18__SHIFT 0x4
#define TC_CFG_L2_LOAD_POLICY1__POLICY_19_MASK 0xc0
#define TC_CFG_L2_LOAD_POLICY1__POLICY_19__SHIFT 0x6
#define TC_CFG_L2_LOAD_POLICY1__POLICY_20_MASK 0x300
#define TC_CFG_L2_LOAD_POLICY1__POLICY_20__SHIFT 0x8
#define TC_CFG_L2_LOAD_POLICY1__POLICY_21_MASK 0xc00
#define TC_CFG_L2_LOAD_POLICY1__POLICY_21__SHIFT 0xa
#define TC_CFG_L2_LOAD_POLICY1__POLICY_22_MASK 0x3000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_22__SHIFT 0xc
#define TC_CFG_L2_LOAD_POLICY1__POLICY_23_MASK 0xc000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_23__SHIFT 0xe
#define TC_CFG_L2_LOAD_POLICY1__POLICY_24_MASK 0x30000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_24__SHIFT 0x10
#define TC_CFG_L2_LOAD_POLICY1__POLICY_25_MASK 0xc0000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_25__SHIFT 0x12
#define TC_CFG_L2_LOAD_POLICY1__POLICY_26_MASK 0x300000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_26__SHIFT 0x14
#define TC_CFG_L2_LOAD_POLICY1__POLICY_27_MASK 0xc00000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_27__SHIFT 0x16
#define TC_CFG_L2_LOAD_POLICY1__POLICY_28_MASK 0x3000000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_28__SHIFT 0x18
#define TC_CFG_L2_LOAD_POLICY1__POLICY_29_MASK 0xc000000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_29__SHIFT 0x1a
#define TC_CFG_L2_LOAD_POLICY1__POLICY_30_MASK 0x30000000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_30__SHIFT 0x1c
#define TC_CFG_L2_LOAD_POLICY1__POLICY_31_MASK 0xc0000000
#define TC_CFG_L2_LOAD_POLICY1__POLICY_31__SHIFT 0x1e
#define TC_CFG_L2_STORE_POLICY0__POLICY_0_MASK 0x3
#define TC_CFG_L2_STORE_POLICY0__POLICY_0__SHIFT 0x0
#define TC_CFG_L2_STORE_POLICY0__POLICY_1_MASK 0xc
#define TC_CFG_L2_STORE_POLICY0__POLICY_1__SHIFT 0x2
#define TC_CFG_L2_STORE_POLICY0__POLICY_2_MASK 0x30
#define TC_CFG_L2_STORE_POLICY0__POLICY_2__SHIFT 0x4
#define TC_CFG_L2_STORE_POLICY0__POLICY_3_MASK 0xc0
#define TC_CFG_L2_STORE_POLICY0__POLICY_3__SHIFT 0x6
#define TC_CFG_L2_STORE_POLICY0__POLICY_4_MASK 0x300
#define TC_CFG_L2_STORE_POLICY0__POLICY_4__SHIFT 0x8
#define TC_CFG_L2_STORE_POLICY0__POLICY_5_MASK 0xc00
#define TC_CFG_L2_STORE_POLICY0__POLICY_5__SHIFT 0xa
#define TC_CFG_L2_STORE_POLICY0__POLICY_6_MASK 0x3000
#define TC_CFG_L2_STORE_POLICY0__POLICY_6__SHIFT 0xc
#define TC_CFG_L2_STORE_POLICY0__POLICY_7_MASK 0xc000
#define TC_CFG_L2_STORE_POLICY0__POLICY_7__SHIFT 0xe
#define TC_CFG_L2_STORE_POLICY0__POLICY_8_MASK 0x30000
#define TC_CFG_L2_STORE_POLICY0__POLICY_8__SHIFT 0x10
#define TC_CFG_L2_STORE_POLICY0__POLICY_9_MASK 0xc0000
#define TC_CFG_L2_STORE_POLICY0__POLICY_9__SHIFT 0x12
#define TC_CFG_L2_STORE_POLICY0__POLICY_10_MASK 0x300000
#define TC_CFG_L2_STORE_POLICY0__POLICY_10__SHIFT 0x14
#define TC_CFG_L2_STORE_POLICY0__POLICY_11_MASK 0xc00000
#define TC_CFG_L2_STORE_POLICY0__POLICY_11__SHIFT 0x16
#define TC_CFG_L2_STORE_POLICY0__POLICY_12_MASK 0x3000000
#define TC_CFG_L2_STORE_POLICY0__POLICY_12__SHIFT 0x18
#define TC_CFG_L2_STORE_POLICY0__POLICY_13_MASK 0xc000000
#define TC_CFG_L2_STORE_POLICY0__POLICY_13__SHIFT 0x1a
#define TC_CFG_L2_STORE_POLICY0__POLICY_14_MASK 0x30000000
#define TC_CFG_L2_STORE_POLICY0__POLICY_14__SHIFT 0x1c
#define TC_CFG_L2_STORE_POLICY0__POLICY_15_MASK 0xc0000000
#define TC_CFG_L2_STORE_POLICY0__POLICY_15__SHIFT 0x1e
#define TC_CFG_L2_STORE_POLICY1__POLICY_16_MASK 0x3
#define TC_CFG_L2_STORE_POLICY1__POLICY_16__SHIFT 0x0
#define TC_CFG_L2_STORE_POLICY1__POLICY_17_MASK 0xc
#define TC_CFG_L2_STORE_POLICY1__POLICY_17__SHIFT 0x2
#define TC_CFG_L2_STORE_POLICY1__POLICY_18_MASK 0x30
#define TC_CFG_L2_STORE_POLICY1__POLICY_18__SHIFT 0x4
#define TC_CFG_L2_STORE_POLICY1__POLICY_19_MASK 0xc0
#define TC_CFG_L2_STORE_POLICY1__POLICY_19__SHIFT 0x6
#define TC_CFG_L2_STORE_POLICY1__POLICY_20_MASK 0x300
#define TC_CFG_L2_STORE_POLICY1__POLICY_20__SHIFT 0x8
#define TC_CFG_L2_STORE_POLICY1__POLICY_21_MASK 0xc00
#define TC_CFG_L2_STORE_POLICY1__POLICY_21__SHIFT 0xa
#define TC_CFG_L2_STORE_POLICY1__POLICY_22_MASK 0x3000
#define TC_CFG_L2_STORE_POLICY1__POLICY_22__SHIFT 0xc
#define TC_CFG_L2_STORE_POLICY1__POLICY_23_MASK 0xc000
#define TC_CFG_L2_STORE_POLICY1__POLICY_23__SHIFT 0xe
#define TC_CFG_L2_STORE_POLICY1__POLICY_24_MASK 0x30000
#define TC_CFG_L2_STORE_POLICY1__POLICY_24__SHIFT 0x10
#define TC_CFG_L2_STORE_POLICY1__POLICY_25_MASK 0xc0000
#define TC_CFG_L2_STORE_POLICY1__POLICY_25__SHIFT 0x12
#define TC_CFG_L2_STORE_POLICY1__POLICY_26_MASK 0x300000
#define TC_CFG_L2_STORE_POLICY1__POLICY_26__SHIFT 0x14
#define TC_CFG_L2_STORE_POLICY1__POLICY_27_MASK 0xc00000
#define TC_CFG_L2_STORE_POLICY1__POLICY_27__SHIFT 0x16
#define TC_CFG_L2_STORE_POLICY1__POLICY_28_MASK 0x3000000
#define TC_CFG_L2_STORE_POLICY1__POLICY_28__SHIFT 0x18
#define TC_CFG_L2_STORE_POLICY1__POLICY_29_MASK 0xc000000
#define TC_CFG_L2_STORE_POLICY1__POLICY_29__SHIFT 0x1a
#define TC_CFG_L2_STORE_POLICY1__POLICY_30_MASK 0x30000000
#define TC_CFG_L2_STORE_POLICY1__POLICY_30__SHIFT 0x1c
#define TC_CFG_L2_STORE_POLICY1__POLICY_31_MASK 0xc0000000
#define TC_CFG_L2_STORE_POLICY1__POLICY_31__SHIFT 0x1e
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_0_MASK 0x3
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_0__SHIFT 0x0
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_1_MASK 0xc
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_1__SHIFT 0x2
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_2_MASK 0x30
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_2__SHIFT 0x4
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_3_MASK 0xc0
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_3__SHIFT 0x6
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_4_MASK 0x300
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_4__SHIFT 0x8
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_5_MASK 0xc00
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_5__SHIFT 0xa
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_6_MASK 0x3000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_6__SHIFT 0xc
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_7_MASK 0xc000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_7__SHIFT 0xe
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_8_MASK 0x30000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_8__SHIFT 0x10
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_9_MASK 0xc0000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_9__SHIFT 0x12
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_10_MASK 0x300000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_10__SHIFT 0x14
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_11_MASK 0xc00000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_11__SHIFT 0x16
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_12_MASK 0x3000000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_12__SHIFT 0x18
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_13_MASK 0xc000000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_13__SHIFT 0x1a
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_14_MASK 0x30000000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_14__SHIFT 0x1c
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_15_MASK 0xc0000000
#define TC_CFG_L2_ATOMIC_POLICY__POLICY_15__SHIFT 0x1e
#define TC_CFG_L1_VOLATILE__VOL_MASK 0xf
#define TC_CFG_L1_VOLATILE__VOL__SHIFT 0x0
#define TC_CFG_L2_VOLATILE__VOL_MASK 0xf
#define TC_CFG_L2_VOLATILE__VOL__SHIFT 0x0
#define TCP_WATCH0_ADDR_H__ADDR_MASK 0xffff
#define TCP_WATCH0_ADDR_H__ADDR__SHIFT 0x0
#define TCP_WATCH1_ADDR_H__ADDR_MASK 0xffff
#define TCP_WATCH1_ADDR_H__ADDR__SHIFT 0x0
#define TCP_WATCH2_ADDR_H__ADDR_MASK 0xffff
#define TCP_WATCH2_ADDR_H__ADDR__SHIFT 0x0
#define TCP_WATCH3_ADDR_H__ADDR_MASK 0xffff
#define TCP_WATCH3_ADDR_H__ADDR__SHIFT 0x0
#define TCP_WATCH0_ADDR_L__ADDR_MASK 0xffffffc0
#define TCP_WATCH0_ADDR_L__ADDR__SHIFT 0x6
#define TCP_WATCH1_ADDR_L__ADDR_MASK 0xffffffc0
#define TCP_WATCH1_ADDR_L__ADDR__SHIFT 0x6
#define TCP_WATCH2_ADDR_L__ADDR_MASK 0xffffffc0
#define TCP_WATCH2_ADDR_L__ADDR__SHIFT 0x6
#define TCP_WATCH3_ADDR_L__ADDR_MASK 0xffffffc0
#define TCP_WATCH3_ADDR_L__ADDR__SHIFT 0x6
#define TCP_WATCH0_CNTL__MASK_MASK 0xffffff
#define TCP_WATCH0_CNTL__MASK__SHIFT 0x0
#define TCP_WATCH0_CNTL__VMID_MASK 0xf000000
#define TCP_WATCH0_CNTL__VMID__SHIFT 0x18
#define TCP_WATCH0_CNTL__MODE_MASK 0x60000000
#define TCP_WATCH0_CNTL__MODE__SHIFT 0x1d
#define TCP_WATCH0_CNTL__VALID_MASK 0x80000000
#define TCP_WATCH0_CNTL__VALID__SHIFT 0x1f
#define TCP_WATCH1_CNTL__MASK_MASK 0xffffff
#define TCP_WATCH1_CNTL__MASK__SHIFT 0x0
#define TCP_WATCH1_CNTL__VMID_MASK 0xf000000
#define TCP_WATCH1_CNTL__VMID__SHIFT 0x18
#define TCP_WATCH1_CNTL__MODE_MASK 0x60000000
#define TCP_WATCH1_CNTL__MODE__SHIFT 0x1d
#define TCP_WATCH1_CNTL__VALID_MASK 0x80000000
#define TCP_WATCH1_CNTL__VALID__SHIFT 0x1f
#define TCP_WATCH2_CNTL__MASK_MASK 0xffffff
#define TCP_WATCH2_CNTL__MASK__SHIFT 0x0
#define TCP_WATCH2_CNTL__VMID_MASK 0xf000000
#define TCP_WATCH2_CNTL__VMID__SHIFT 0x18
#define TCP_WATCH2_CNTL__MODE_MASK 0x60000000
#define TCP_WATCH2_CNTL__MODE__SHIFT 0x1d
#define TCP_WATCH2_CNTL__VALID_MASK 0x80000000
#define TCP_WATCH2_CNTL__VALID__SHIFT 0x1f
#define TCP_WATCH3_CNTL__MASK_MASK 0xffffff
#define TCP_WATCH3_CNTL__MASK__SHIFT 0x0
#define TCP_WATCH3_CNTL__VMID_MASK 0xf000000
#define TCP_WATCH3_CNTL__VMID__SHIFT 0x18
#define TCP_WATCH3_CNTL__MODE_MASK 0x60000000
#define TCP_WATCH3_CNTL__MODE__SHIFT 0x1d
#define TCP_WATCH3_CNTL__VALID_MASK 0x80000000
#define TCP_WATCH3_CNTL__VALID__SHIFT 0x1f
#define TD_CGTT_CTRL__ON_DELAY_MASK 0xf
#define TD_CGTT_CTRL__ON_DELAY__SHIFT 0x0
#define TD_CGTT_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define TD_CGTT_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define TD_CGTT_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define TD_CGTT_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define TD_CGTT_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define TD_CGTT_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define TD_CGTT_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define TD_CGTT_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define TD_CGTT_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define TD_CGTT_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define TD_CGTT_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define TA_CGTT_CTRL__ON_DELAY_MASK 0xf
#define TA_CGTT_CTRL__ON_DELAY__SHIFT 0x0
#define TA_CGTT_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define TA_CGTT_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define TA_CGTT_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define TA_CGTT_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define TA_CGTT_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define TA_CGTT_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define TA_CGTT_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define TA_CGTT_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define TA_CGTT_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define TA_CGTT_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define TA_CGTT_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define CGTT_TCP_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_TCP_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_TCP_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_TCP_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_TCP_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define CGTT_TCI_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_TCI_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_TCI_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_TCI_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_TCI_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define TCI_STATUS__TCI_BUSY_MASK 0x1
#define TCI_STATUS__TCI_BUSY__SHIFT 0x0
#define TCI_CNTL_1__WBINVL1_NUM_CYCLES_MASK 0xffff
#define TCI_CNTL_1__WBINVL1_NUM_CYCLES__SHIFT 0x0
#define TCI_CNTL_1__REQ_FIFO_DEPTH_MASK 0xff0000
#define TCI_CNTL_1__REQ_FIFO_DEPTH__SHIFT 0x10
#define TCI_CNTL_1__WDATA_RAM_DEPTH_MASK 0xff000000
#define TCI_CNTL_1__WDATA_RAM_DEPTH__SHIFT 0x18
#define TCI_CNTL_2__L1_INVAL_ON_WBINVL2_MASK 0x1
#define TCI_CNTL_2__L1_INVAL_ON_WBINVL2__SHIFT 0x0
#define TCI_CNTL_2__TCA_MAX_CREDIT_MASK 0x1fe
#define TCI_CNTL_2__TCA_MAX_CREDIT__SHIFT 0x1
#define GDS_CONFIG__SH0_GPR_PHASE_SEL_MASK 0x6
#define GDS_CONFIG__SH0_GPR_PHASE_SEL__SHIFT 0x1
#define GDS_CONFIG__SH1_GPR_PHASE_SEL_MASK 0x18
#define GDS_CONFIG__SH1_GPR_PHASE_SEL__SHIFT 0x3
#define GDS_CONFIG__SH2_GPR_PHASE_SEL_MASK 0x60
#define GDS_CONFIG__SH2_GPR_PHASE_SEL__SHIFT 0x5
#define GDS_CONFIG__SH3_GPR_PHASE_SEL_MASK 0x180
#define GDS_CONFIG__SH3_GPR_PHASE_SEL__SHIFT 0x7
#define GDS_CNTL_STATUS__GDS_BUSY_MASK 0x1
#define GDS_CNTL_STATUS__GDS_BUSY__SHIFT 0x0
#define GDS_CNTL_STATUS__GRBM_WBUF_BUSY_MASK 0x2
#define GDS_CNTL_STATUS__GRBM_WBUF_BUSY__SHIFT 0x1
#define GDS_CNTL_STATUS__ORD_APP_BUSY_MASK 0x4
#define GDS_CNTL_STATUS__ORD_APP_BUSY__SHIFT 0x2
#define GDS_CNTL_STATUS__DS_BANK_CONFLICT_MASK 0x8
#define GDS_CNTL_STATUS__DS_BANK_CONFLICT__SHIFT 0x3
#define GDS_CNTL_STATUS__DS_ADDR_CONFLICT_MASK 0x10
#define GDS_CNTL_STATUS__DS_ADDR_CONFLICT__SHIFT 0x4
#define GDS_CNTL_STATUS__DS_WR_CLAMP_MASK 0x20
#define GDS_CNTL_STATUS__DS_WR_CLAMP__SHIFT 0x5
#define GDS_CNTL_STATUS__DS_RD_CLAMP_MASK 0x40
#define GDS_CNTL_STATUS__DS_RD_CLAMP__SHIFT 0x6
#define GDS_ENHANCE2__MISC_MASK 0xffff
#define GDS_ENHANCE2__MISC__SHIFT 0x0
#define GDS_ENHANCE2__UNUSED_MASK 0xffff0000
#define GDS_ENHANCE2__UNUSED__SHIFT 0x10
#define GDS_PROTECTION_FAULT__WRITE_DIS_MASK 0x1
#define GDS_PROTECTION_FAULT__WRITE_DIS__SHIFT 0x0
#define GDS_PROTECTION_FAULT__FAULT_DETECTED_MASK 0x2
#define GDS_PROTECTION_FAULT__FAULT_DETECTED__SHIFT 0x1
#define GDS_PROTECTION_FAULT__GRBM_MASK 0x4
#define GDS_PROTECTION_FAULT__GRBM__SHIFT 0x2
#define GDS_PROTECTION_FAULT__SH_ID_MASK 0x38
#define GDS_PROTECTION_FAULT__SH_ID__SHIFT 0x3
#define GDS_PROTECTION_FAULT__CU_ID_MASK 0x3c0
#define GDS_PROTECTION_FAULT__CU_ID__SHIFT 0x6
#define GDS_PROTECTION_FAULT__SIMD_ID_MASK 0xc00
#define GDS_PROTECTION_FAULT__SIMD_ID__SHIFT 0xa
#define GDS_PROTECTION_FAULT__WAVE_ID_MASK 0xf000
#define GDS_PROTECTION_FAULT__WAVE_ID__SHIFT 0xc
#define GDS_PROTECTION_FAULT__ADDRESS_MASK 0xffff0000
#define GDS_PROTECTION_FAULT__ADDRESS__SHIFT 0x10
#define GDS_VM_PROTECTION_FAULT__WRITE_DIS_MASK 0x1
#define GDS_VM_PROTECTION_FAULT__WRITE_DIS__SHIFT 0x0
#define GDS_VM_PROTECTION_FAULT__FAULT_DETECTED_MASK 0x2
#define GDS_VM_PROTECTION_FAULT__FAULT_DETECTED__SHIFT 0x1
#define GDS_VM_PROTECTION_FAULT__GWS_MASK 0x4
#define GDS_VM_PROTECTION_FAULT__GWS__SHIFT 0x2
#define GDS_VM_PROTECTION_FAULT__OA_MASK 0x8
#define GDS_VM_PROTECTION_FAULT__OA__SHIFT 0x3
#define GDS_VM_PROTECTION_FAULT__GRBM_MASK 0x10
#define GDS_VM_PROTECTION_FAULT__GRBM__SHIFT 0x4
#define GDS_VM_PROTECTION_FAULT__VMID_MASK 0xf00
#define GDS_VM_PROTECTION_FAULT__VMID__SHIFT 0x8
#define GDS_VM_PROTECTION_FAULT__ADDRESS_MASK 0xffff0000
#define GDS_VM_PROTECTION_FAULT__ADDRESS__SHIFT 0x10
#define GDS_SECDED_CNT__DED_MASK 0xffff
#define GDS_SECDED_CNT__DED__SHIFT 0x0
#define GDS_SECDED_CNT__SEC_MASK 0xffff0000
#define GDS_SECDED_CNT__SEC__SHIFT 0x10
#define GDS_GRBM_SECDED_CNT__DED_MASK 0xffff
#define GDS_GRBM_SECDED_CNT__DED__SHIFT 0x0
#define GDS_GRBM_SECDED_CNT__SEC_MASK 0xffff0000
#define GDS_GRBM_SECDED_CNT__SEC__SHIFT 0x10
#define GDS_OA_DED__ME0_GFXHP3D_PIX_DED_MASK 0x1
#define GDS_OA_DED__ME0_GFXHP3D_PIX_DED__SHIFT 0x0
#define GDS_OA_DED__ME0_GFXHP3D_VTX_DED_MASK 0x2
#define GDS_OA_DED__ME0_GFXHP3D_VTX_DED__SHIFT 0x1
#define GDS_OA_DED__ME0_CS_DED_MASK 0x4
#define GDS_OA_DED__ME0_CS_DED__SHIFT 0x2
#define GDS_OA_DED__UNUSED0_MASK 0x8
#define GDS_OA_DED__UNUSED0__SHIFT 0x3
#define GDS_OA_DED__ME1_PIPE0_DED_MASK 0x10
#define GDS_OA_DED__ME1_PIPE0_DED__SHIFT 0x4
#define GDS_OA_DED__ME1_PIPE1_DED_MASK 0x20
#define GDS_OA_DED__ME1_PIPE1_DED__SHIFT 0x5
#define GDS_OA_DED__ME1_PIPE2_DED_MASK 0x40
#define GDS_OA_DED__ME1_PIPE2_DED__SHIFT 0x6
#define GDS_OA_DED__ME1_PIPE3_DED_MASK 0x80
#define GDS_OA_DED__ME1_PIPE3_DED__SHIFT 0x7
#define GDS_OA_DED__ME2_PIPE0_DED_MASK 0x100
#define GDS_OA_DED__ME2_PIPE0_DED__SHIFT 0x8
#define GDS_OA_DED__ME2_PIPE1_DED_MASK 0x200
#define GDS_OA_DED__ME2_PIPE1_DED__SHIFT 0x9
#define GDS_OA_DED__ME2_PIPE2_DED_MASK 0x400
#define GDS_OA_DED__ME2_PIPE2_DED__SHIFT 0xa
#define GDS_OA_DED__ME2_PIPE3_DED_MASK 0x800
#define GDS_OA_DED__ME2_PIPE3_DED__SHIFT 0xb
#define GDS_OA_DED__UNUSED1_MASK 0xfffff000
#define GDS_OA_DED__UNUSED1__SHIFT 0xc
#define GDS_DEBUG_CNTL__GDS_DEBUG_INDX_MASK 0x1f
#define GDS_DEBUG_CNTL__GDS_DEBUG_INDX__SHIFT 0x0
#define GDS_DEBUG_CNTL__UNUSED_MASK 0xffffffe0
#define GDS_DEBUG_CNTL__UNUSED__SHIFT 0x5
#define GDS_DEBUG_DATA__DATA_MASK 0xffffffff
#define GDS_DEBUG_DATA__DATA__SHIFT 0x0
#define CGTT_GDS_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_GDS_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_GDS_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_GDS_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x2000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE5_MASK 0x4000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE5__SHIFT 0x1a
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE1_MASK 0x40000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE1__SHIFT 0x1e
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE0_MASK 0x80000000
#define CGTT_GDS_CLK_CTRL__SOFT_OVERRIDE0__SHIFT 0x1f
#define GDS_RD_ADDR__READ_ADDR_MASK 0xffffffff
#define GDS_RD_ADDR__READ_ADDR__SHIFT 0x0
#define GDS_RD_DATA__READ_DATA_MASK 0xffffffff
#define GDS_RD_DATA__READ_DATA__SHIFT 0x0
#define GDS_RD_BURST_ADDR__BURST_ADDR_MASK 0xffffffff
#define GDS_RD_BURST_ADDR__BURST_ADDR__SHIFT 0x0
#define GDS_RD_BURST_COUNT__BURST_COUNT_MASK 0xffffffff
#define GDS_RD_BURST_COUNT__BURST_COUNT__SHIFT 0x0
#define GDS_RD_BURST_DATA__BURST_DATA_MASK 0xffffffff
#define GDS_RD_BURST_DATA__BURST_DATA__SHIFT 0x0
#define GDS_WR_ADDR__WRITE_ADDR_MASK 0xffffffff
#define GDS_WR_ADDR__WRITE_ADDR__SHIFT 0x0
#define GDS_WR_DATA__WRITE_DATA_MASK 0xffffffff
#define GDS_WR_DATA__WRITE_DATA__SHIFT 0x0
#define GDS_WR_BURST_ADDR__WRITE_ADDR_MASK 0xffffffff
#define GDS_WR_BURST_ADDR__WRITE_ADDR__SHIFT 0x0
#define GDS_WR_BURST_DATA__WRITE_DATA_MASK 0xffffffff
#define GDS_WR_BURST_DATA__WRITE_DATA__SHIFT 0x0
#define GDS_WRITE_COMPLETE__WRITE_COMPLETE_MASK 0xffffffff
#define GDS_WRITE_COMPLETE__WRITE_COMPLETE__SHIFT 0x0
#define GDS_ATOM_CNTL__AINC_MASK 0x3f
#define GDS_ATOM_CNTL__AINC__SHIFT 0x0
#define GDS_ATOM_CNTL__UNUSED1_MASK 0xc0
#define GDS_ATOM_CNTL__UNUSED1__SHIFT 0x6
#define GDS_ATOM_CNTL__DMODE_MASK 0x100
#define GDS_ATOM_CNTL__DMODE__SHIFT 0x8
#define GDS_ATOM_CNTL__UNUSED2_MASK 0xfffffe00
#define GDS_ATOM_CNTL__UNUSED2__SHIFT 0x9
#define GDS_ATOM_COMPLETE__COMPLETE_MASK 0x1
#define GDS_ATOM_COMPLETE__COMPLETE__SHIFT 0x0
#define GDS_ATOM_COMPLETE__UNUSED_MASK 0xfffffffe
#define GDS_ATOM_COMPLETE__UNUSED__SHIFT 0x1
#define GDS_ATOM_BASE__BASE_MASK 0xffff
#define GDS_ATOM_BASE__BASE__SHIFT 0x0
#define GDS_ATOM_BASE__UNUSED_MASK 0xffff0000
#define GDS_ATOM_BASE__UNUSED__SHIFT 0x10
#define GDS_ATOM_SIZE__SIZE_MASK 0xffff
#define GDS_ATOM_SIZE__SIZE__SHIFT 0x0
#define GDS_ATOM_SIZE__UNUSED_MASK 0xffff0000
#define GDS_ATOM_SIZE__UNUSED__SHIFT 0x10
#define GDS_ATOM_OFFSET0__OFFSET0_MASK 0xff
#define GDS_ATOM_OFFSET0__OFFSET0__SHIFT 0x0
#define GDS_ATOM_OFFSET0__UNUSED_MASK 0xffffff00
#define GDS_ATOM_OFFSET0__UNUSED__SHIFT 0x8
#define GDS_ATOM_OFFSET1__OFFSET1_MASK 0xff
#define GDS_ATOM_OFFSET1__OFFSET1__SHIFT 0x0
#define GDS_ATOM_OFFSET1__UNUSED_MASK 0xffffff00
#define GDS_ATOM_OFFSET1__UNUSED__SHIFT 0x8
#define GDS_ATOM_DST__DST_MASK 0xffffffff
#define GDS_ATOM_DST__DST__SHIFT 0x0
#define GDS_ATOM_OP__OP_MASK 0xff
#define GDS_ATOM_OP__OP__SHIFT 0x0
#define GDS_ATOM_OP__UNUSED_MASK 0xffffff00
#define GDS_ATOM_OP__UNUSED__SHIFT 0x8
#define GDS_ATOM_SRC0__DATA_MASK 0xffffffff
#define GDS_ATOM_SRC0__DATA__SHIFT 0x0
#define GDS_ATOM_SRC0_U__DATA_MASK 0xffffffff
#define GDS_ATOM_SRC0_U__DATA__SHIFT 0x0
#define GDS_ATOM_SRC1__DATA_MASK 0xffffffff
#define GDS_ATOM_SRC1__DATA__SHIFT 0x0
#define GDS_ATOM_SRC1_U__DATA_MASK 0xffffffff
#define GDS_ATOM_SRC1_U__DATA__SHIFT 0x0
#define GDS_ATOM_READ0__DATA_MASK 0xffffffff
#define GDS_ATOM_READ0__DATA__SHIFT 0x0
#define GDS_ATOM_READ0_U__DATA_MASK 0xffffffff
#define GDS_ATOM_READ0_U__DATA__SHIFT 0x0
#define GDS_ATOM_READ1__DATA_MASK 0xffffffff
#define GDS_ATOM_READ1__DATA__SHIFT 0x0
#define GDS_ATOM_READ1_U__DATA_MASK 0xffffffff
#define GDS_ATOM_READ1_U__DATA__SHIFT 0x0
#define GDS_GWS_RESOURCE_CNTL__INDEX_MASK 0x3f
#define GDS_GWS_RESOURCE_CNTL__INDEX__SHIFT 0x0
#define GDS_GWS_RESOURCE_CNTL__UNUSED_MASK 0xffffffc0
#define GDS_GWS_RESOURCE_CNTL__UNUSED__SHIFT 0x6
#define GDS_GWS_RESOURCE__FLAG_MASK 0x1
#define GDS_GWS_RESOURCE__FLAG__SHIFT 0x0
#define GDS_GWS_RESOURCE__COUNTER_MASK 0x1ffe
#define GDS_GWS_RESOURCE__COUNTER__SHIFT 0x1
#define GDS_GWS_RESOURCE__TYPE_MASK 0x2000
#define GDS_GWS_RESOURCE__TYPE__SHIFT 0xd
#define GDS_GWS_RESOURCE__DED_MASK 0x4000
#define GDS_GWS_RESOURCE__DED__SHIFT 0xe
#define GDS_GWS_RESOURCE__RELEASE_ALL_MASK 0x8000
#define GDS_GWS_RESOURCE__RELEASE_ALL__SHIFT 0xf
#define GDS_GWS_RESOURCE__HEAD_QUEUE_MASK 0x7ff0000
#define GDS_GWS_RESOURCE__HEAD_QUEUE__SHIFT 0x10
#define GDS_GWS_RESOURCE__HEAD_VALID_MASK 0x8000000
#define GDS_GWS_RESOURCE__HEAD_VALID__SHIFT 0x1b
#define GDS_GWS_RESOURCE__HEAD_FLAG_MASK 0x10000000
#define GDS_GWS_RESOURCE__HEAD_FLAG__SHIFT 0x1c
#define GDS_GWS_RESOURCE__UNUSED1_MASK 0xe0000000
#define GDS_GWS_RESOURCE__UNUSED1__SHIFT 0x1d
#define GDS_GWS_RESOURCE_CNT__RESOURCE_CNT_MASK 0xffff
#define GDS_GWS_RESOURCE_CNT__RESOURCE_CNT__SHIFT 0x0
#define GDS_GWS_RESOURCE_CNT__UNUSED_MASK 0xffff0000
#define GDS_GWS_RESOURCE_CNT__UNUSED__SHIFT 0x10
#define GDS_OA_CNTL__INDEX_MASK 0xf
#define GDS_OA_CNTL__INDEX__SHIFT 0x0
#define GDS_OA_CNTL__UNUSED_MASK 0xfffffff0
#define GDS_OA_CNTL__UNUSED__SHIFT 0x4
#define GDS_OA_COUNTER__SPACE_AVAILABLE_MASK 0xffffffff
#define GDS_OA_COUNTER__SPACE_AVAILABLE__SHIFT 0x0
#define GDS_OA_ADDRESS__DS_ADDRESS_MASK 0xffff
#define GDS_OA_ADDRESS__DS_ADDRESS__SHIFT 0x0
#define GDS_OA_ADDRESS__CRAWLER_TYPE_MASK 0xf0000
#define GDS_OA_ADDRESS__CRAWLER_TYPE__SHIFT 0x10
#define GDS_OA_ADDRESS__CRAWLER_MASK 0xf00000
#define GDS_OA_ADDRESS__CRAWLER__SHIFT 0x14
#define GDS_OA_ADDRESS__UNUSED_MASK 0x3f000000
#define GDS_OA_ADDRESS__UNUSED__SHIFT 0x18
#define GDS_OA_ADDRESS__NO_ALLOC_MASK 0x40000000
#define GDS_OA_ADDRESS__NO_ALLOC__SHIFT 0x1e
#define GDS_OA_ADDRESS__ENABLE_MASK 0x80000000
#define GDS_OA_ADDRESS__ENABLE__SHIFT 0x1f
#define GDS_OA_INCDEC__VALUE_MASK 0x7fffffff
#define GDS_OA_INCDEC__VALUE__SHIFT 0x0
#define GDS_OA_INCDEC__INCDEC_MASK 0x80000000
#define GDS_OA_INCDEC__INCDEC__SHIFT 0x1f
#define GDS_OA_RING_SIZE__RING_SIZE_MASK 0xffffffff
#define GDS_OA_RING_SIZE__RING_SIZE__SHIFT 0x0
#define GDS_DEBUG_REG0__spare1_MASK 0x3f
#define GDS_DEBUG_REG0__spare1__SHIFT 0x0
#define GDS_DEBUG_REG0__write_buff_valid_MASK 0x40
#define GDS_DEBUG_REG0__write_buff_valid__SHIFT 0x6
#define GDS_DEBUG_REG0__wr_pixel_nxt_ptr_MASK 0xf80
#define GDS_DEBUG_REG0__wr_pixel_nxt_ptr__SHIFT 0x7
#define GDS_DEBUG_REG0__last_pixel_ptr_MASK 0x1000
#define GDS_DEBUG_REG0__last_pixel_ptr__SHIFT 0xc
#define GDS_DEBUG_REG0__cstate_MASK 0x1e000
#define GDS_DEBUG_REG0__cstate__SHIFT 0xd
#define GDS_DEBUG_REG0__buff_write_MASK 0x20000
#define GDS_DEBUG_REG0__buff_write__SHIFT 0x11
#define GDS_DEBUG_REG0__flush_request_MASK 0x40000
#define GDS_DEBUG_REG0__flush_request__SHIFT 0x12
#define GDS_DEBUG_REG0__wr_buffer_wr_complete_MASK 0x80000
#define GDS_DEBUG_REG0__wr_buffer_wr_complete__SHIFT 0x13
#define GDS_DEBUG_REG0__wbuf_fifo_empty_MASK 0x100000
#define GDS_DEBUG_REG0__wbuf_fifo_empty__SHIFT 0x14
#define GDS_DEBUG_REG0__wbuf_fifo_full_MASK 0x200000
#define GDS_DEBUG_REG0__wbuf_fifo_full__SHIFT 0x15
#define GDS_DEBUG_REG0__spare_MASK 0xffc00000
#define GDS_DEBUG_REG0__spare__SHIFT 0x16
#define GDS_DEBUG_REG1__tag_hit_MASK 0x1
#define GDS_DEBUG_REG1__tag_hit__SHIFT 0x0
#define GDS_DEBUG_REG1__tag_miss_MASK 0x2
#define GDS_DEBUG_REG1__tag_miss__SHIFT 0x1
#define GDS_DEBUG_REG1__pixel_addr_MASK 0x1fffc
#define GDS_DEBUG_REG1__pixel_addr__SHIFT 0x2
#define GDS_DEBUG_REG1__pixel_vld_MASK 0x20000
#define GDS_DEBUG_REG1__pixel_vld__SHIFT 0x11
#define GDS_DEBUG_REG1__data_ready_MASK 0x40000
#define GDS_DEBUG_REG1__data_ready__SHIFT 0x12
#define GDS_DEBUG_REG1__awaiting_data_MASK 0x80000
#define GDS_DEBUG_REG1__awaiting_data__SHIFT 0x13
#define GDS_DEBUG_REG1__addr_fifo_full_MASK 0x100000
#define GDS_DEBUG_REG1__addr_fifo_full__SHIFT 0x14
#define GDS_DEBUG_REG1__addr_fifo_empty_MASK 0x200000
#define GDS_DEBUG_REG1__addr_fifo_empty__SHIFT 0x15
#define GDS_DEBUG_REG1__buffer_loaded_MASK 0x400000
#define GDS_DEBUG_REG1__buffer_loaded__SHIFT 0x16
#define GDS_DEBUG_REG1__buffer_invalid_MASK 0x800000
#define GDS_DEBUG_REG1__buffer_invalid__SHIFT 0x17
#define GDS_DEBUG_REG1__spare_MASK 0xff000000
#define GDS_DEBUG_REG1__spare__SHIFT 0x18
#define GDS_DEBUG_REG2__ds_full_MASK 0x1
#define GDS_DEBUG_REG2__ds_full__SHIFT 0x0
#define GDS_DEBUG_REG2__ds_credit_avail_MASK 0x2
#define GDS_DEBUG_REG2__ds_credit_avail__SHIFT 0x1
#define GDS_DEBUG_REG2__ord_idx_free_MASK 0x4
#define GDS_DEBUG_REG2__ord_idx_free__SHIFT 0x2
#define GDS_DEBUG_REG2__cmd_write_MASK 0x8
#define GDS_DEBUG_REG2__cmd_write__SHIFT 0x3
#define GDS_DEBUG_REG2__app_sel_MASK 0xf0
#define GDS_DEBUG_REG2__app_sel__SHIFT 0x4
#define GDS_DEBUG_REG2__req_MASK 0x7fff00
#define GDS_DEBUG_REG2__req__SHIFT 0x8
#define GDS_DEBUG_REG2__spare_MASK 0xff800000
#define GDS_DEBUG_REG2__spare__SHIFT 0x17
#define GDS_DEBUG_REG3__pipe_num_busy_MASK 0x7ff
#define GDS_DEBUG_REG3__pipe_num_busy__SHIFT 0x0
#define GDS_DEBUG_REG3__pipe0_busy_num_MASK 0x7800
#define GDS_DEBUG_REG3__pipe0_busy_num__SHIFT 0xb
#define GDS_DEBUG_REG3__spare_MASK 0xffff8000
#define GDS_DEBUG_REG3__spare__SHIFT 0xf
#define GDS_DEBUG_REG4__gws_busy_MASK 0x1
#define GDS_DEBUG_REG4__gws_busy__SHIFT 0x0
#define GDS_DEBUG_REG4__gws_req_MASK 0x2
#define GDS_DEBUG_REG4__gws_req__SHIFT 0x1
#define GDS_DEBUG_REG4__gws_out_stall_MASK 0x4
#define GDS_DEBUG_REG4__gws_out_stall__SHIFT 0x2
#define GDS_DEBUG_REG4__cur_reso_MASK 0x1f8
#define GDS_DEBUG_REG4__cur_reso__SHIFT 0x3
#define GDS_DEBUG_REG4__cur_reso_head_valid_MASK 0x200
#define GDS_DEBUG_REG4__cur_reso_head_valid__SHIFT 0x9
#define GDS_DEBUG_REG4__cur_reso_head_dirty_MASK 0x400
#define GDS_DEBUG_REG4__cur_reso_head_dirty__SHIFT 0xa
#define GDS_DEBUG_REG4__cur_reso_head_flag_MASK 0x800
#define GDS_DEBUG_REG4__cur_reso_head_flag__SHIFT 0xb
#define GDS_DEBUG_REG4__cur_reso_fed_MASK 0x1000
#define GDS_DEBUG_REG4__cur_reso_fed__SHIFT 0xc
#define GDS_DEBUG_REG4__cur_reso_barrier_MASK 0x2000
#define GDS_DEBUG_REG4__cur_reso_barrier__SHIFT 0xd
#define GDS_DEBUG_REG4__cur_reso_flag_MASK 0x4000
#define GDS_DEBUG_REG4__cur_reso_flag__SHIFT 0xe
#define GDS_DEBUG_REG4__cur_reso_cnt_gt0_MASK 0x8000
#define GDS_DEBUG_REG4__cur_reso_cnt_gt0__SHIFT 0xf
#define GDS_DEBUG_REG4__credit_cnt_gt0_MASK 0x10000
#define GDS_DEBUG_REG4__credit_cnt_gt0__SHIFT 0x10
#define GDS_DEBUG_REG4__cmd_write_MASK 0x20000
#define GDS_DEBUG_REG4__cmd_write__SHIFT 0x11
#define GDS_DEBUG_REG4__grbm_gws_reso_wr_MASK 0x40000
#define GDS_DEBUG_REG4__grbm_gws_reso_wr__SHIFT 0x12
#define GDS_DEBUG_REG4__grbm_gws_reso_rd_MASK 0x80000
#define GDS_DEBUG_REG4__grbm_gws_reso_rd__SHIFT 0x13
#define GDS_DEBUG_REG4__ram_read_busy_MASK 0x100000
#define GDS_DEBUG_REG4__ram_read_busy__SHIFT 0x14
#define GDS_DEBUG_REG4__gws_bulkfree_MASK 0x200000
#define GDS_DEBUG_REG4__gws_bulkfree__SHIFT 0x15
#define GDS_DEBUG_REG4__ram_gws_re_MASK 0x400000
#define GDS_DEBUG_REG4__ram_gws_re__SHIFT 0x16
#define GDS_DEBUG_REG4__ram_gws_we_MASK 0x800000
#define GDS_DEBUG_REG4__ram_gws_we__SHIFT 0x17
#define GDS_DEBUG_REG4__spare_MASK 0xff000000
#define GDS_DEBUG_REG4__spare__SHIFT 0x18
#define GDS_DEBUG_REG5__write_dis_MASK 0x1
#define GDS_DEBUG_REG5__write_dis__SHIFT 0x0
#define GDS_DEBUG_REG5__dec_error_MASK 0x2
#define GDS_DEBUG_REG5__dec_error__SHIFT 0x1
#define GDS_DEBUG_REG5__alloc_opco_error_MASK 0x4
#define GDS_DEBUG_REG5__alloc_opco_error__SHIFT 0x2
#define GDS_DEBUG_REG5__dealloc_opco_error_MASK 0x8
#define GDS_DEBUG_REG5__dealloc_opco_error__SHIFT 0x3
#define GDS_DEBUG_REG5__wrap_opco_error_MASK 0x10
#define GDS_DEBUG_REG5__wrap_opco_error__SHIFT 0x4
#define GDS_DEBUG_REG5__spare_MASK 0xe0
#define GDS_DEBUG_REG5__spare__SHIFT 0x5
#define GDS_DEBUG_REG5__error_ds_address_MASK 0x3fff00
#define GDS_DEBUG_REG5__error_ds_address__SHIFT 0x8
#define GDS_DEBUG_REG5__spare1_MASK 0xffc00000
#define GDS_DEBUG_REG5__spare1__SHIFT 0x16
#define GDS_DEBUG_REG6__oa_busy_MASK 0x1
#define GDS_DEBUG_REG6__oa_busy__SHIFT 0x0
#define GDS_DEBUG_REG6__counters_enabled_MASK 0x1e
#define GDS_DEBUG_REG6__counters_enabled__SHIFT 0x1
#define GDS_DEBUG_REG6__counters_busy_MASK 0x1fffe0
#define GDS_DEBUG_REG6__counters_busy__SHIFT 0x5
#define GDS_DEBUG_REG6__spare_MASK 0xffe00000
#define GDS_DEBUG_REG6__spare__SHIFT 0x15
#define GDS_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define GDS_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define GDS_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define GDS_PERFCOUNTER0_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define GDS_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define GDS_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define GDS_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define GDS_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define GDS_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define GDS_PERFCOUNTER1_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define GDS_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define GDS_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define GDS_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define GDS_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define GDS_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define GDS_PERFCOUNTER2_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define GDS_PERFCOUNTER2_SELECT__CNTR_MODE_MASK 0xf00000
#define GDS_PERFCOUNTER2_SELECT__CNTR_MODE__SHIFT 0x14
#define GDS_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT_MASK 0x3ff
#define GDS_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT__SHIFT 0x0
#define GDS_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT1_MASK 0xffc00
#define GDS_PERFCOUNTER3_SELECT__PERFCOUNTER_SELECT1__SHIFT 0xa
#define GDS_PERFCOUNTER3_SELECT__CNTR_MODE_MASK 0xf00000
#define GDS_PERFCOUNTER3_SELECT__CNTR_MODE__SHIFT 0x14
#define GDS_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GDS_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GDS_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GDS_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GDS_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GDS_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GDS_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define GDS_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define GDS_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GDS_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GDS_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GDS_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GDS_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GDS_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GDS_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define GDS_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define GDS_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT2_MASK 0x3ff
#define GDS_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT2__SHIFT 0x0
#define GDS_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT3_MASK 0xffc00
#define GDS_PERFCOUNTER0_SELECT1__PERFCOUNTER_SELECT3__SHIFT 0xa
#define GDS_VMID0_BASE__BASE_MASK 0xffff
#define GDS_VMID0_BASE__BASE__SHIFT 0x0
#define GDS_VMID1_BASE__BASE_MASK 0xffff
#define GDS_VMID1_BASE__BASE__SHIFT 0x0
#define GDS_VMID2_BASE__BASE_MASK 0xffff
#define GDS_VMID2_BASE__BASE__SHIFT 0x0
#define GDS_VMID3_BASE__BASE_MASK 0xffff
#define GDS_VMID3_BASE__BASE__SHIFT 0x0
#define GDS_VMID4_BASE__BASE_MASK 0xffff
#define GDS_VMID4_BASE__BASE__SHIFT 0x0
#define GDS_VMID5_BASE__BASE_MASK 0xffff
#define GDS_VMID5_BASE__BASE__SHIFT 0x0
#define GDS_VMID6_BASE__BASE_MASK 0xffff
#define GDS_VMID6_BASE__BASE__SHIFT 0x0
#define GDS_VMID7_BASE__BASE_MASK 0xffff
#define GDS_VMID7_BASE__BASE__SHIFT 0x0
#define GDS_VMID8_BASE__BASE_MASK 0xffff
#define GDS_VMID8_BASE__BASE__SHIFT 0x0
#define GDS_VMID9_BASE__BASE_MASK 0xffff
#define GDS_VMID9_BASE__BASE__SHIFT 0x0
#define GDS_VMID10_BASE__BASE_MASK 0xffff
#define GDS_VMID10_BASE__BASE__SHIFT 0x0
#define GDS_VMID11_BASE__BASE_MASK 0xffff
#define GDS_VMID11_BASE__BASE__SHIFT 0x0
#define GDS_VMID12_BASE__BASE_MASK 0xffff
#define GDS_VMID12_BASE__BASE__SHIFT 0x0
#define GDS_VMID13_BASE__BASE_MASK 0xffff
#define GDS_VMID13_BASE__BASE__SHIFT 0x0
#define GDS_VMID14_BASE__BASE_MASK 0xffff
#define GDS_VMID14_BASE__BASE__SHIFT 0x0
#define GDS_VMID15_BASE__BASE_MASK 0xffff
#define GDS_VMID15_BASE__BASE__SHIFT 0x0
#define GDS_VMID0_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID0_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID1_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID1_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID2_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID2_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID3_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID3_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID4_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID4_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID5_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID5_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID6_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID6_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID7_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID7_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID8_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID8_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID9_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID9_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID10_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID10_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID11_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID11_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID12_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID12_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID13_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID13_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID14_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID14_SIZE__SIZE__SHIFT 0x0
#define GDS_VMID15_SIZE__SIZE_MASK 0x1ffff
#define GDS_VMID15_SIZE__SIZE__SHIFT 0x0
#define GDS_GWS_VMID0__BASE_MASK 0x3f
#define GDS_GWS_VMID0__BASE__SHIFT 0x0
#define GDS_GWS_VMID0__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID0__SIZE__SHIFT 0x10
#define GDS_GWS_VMID1__BASE_MASK 0x3f
#define GDS_GWS_VMID1__BASE__SHIFT 0x0
#define GDS_GWS_VMID1__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID1__SIZE__SHIFT 0x10
#define GDS_GWS_VMID2__BASE_MASK 0x3f
#define GDS_GWS_VMID2__BASE__SHIFT 0x0
#define GDS_GWS_VMID2__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID2__SIZE__SHIFT 0x10
#define GDS_GWS_VMID3__BASE_MASK 0x3f
#define GDS_GWS_VMID3__BASE__SHIFT 0x0
#define GDS_GWS_VMID3__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID3__SIZE__SHIFT 0x10
#define GDS_GWS_VMID4__BASE_MASK 0x3f
#define GDS_GWS_VMID4__BASE__SHIFT 0x0
#define GDS_GWS_VMID4__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID4__SIZE__SHIFT 0x10
#define GDS_GWS_VMID5__BASE_MASK 0x3f
#define GDS_GWS_VMID5__BASE__SHIFT 0x0
#define GDS_GWS_VMID5__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID5__SIZE__SHIFT 0x10
#define GDS_GWS_VMID6__BASE_MASK 0x3f
#define GDS_GWS_VMID6__BASE__SHIFT 0x0
#define GDS_GWS_VMID6__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID6__SIZE__SHIFT 0x10
#define GDS_GWS_VMID7__BASE_MASK 0x3f
#define GDS_GWS_VMID7__BASE__SHIFT 0x0
#define GDS_GWS_VMID7__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID7__SIZE__SHIFT 0x10
#define GDS_GWS_VMID8__BASE_MASK 0x3f
#define GDS_GWS_VMID8__BASE__SHIFT 0x0
#define GDS_GWS_VMID8__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID8__SIZE__SHIFT 0x10
#define GDS_GWS_VMID9__BASE_MASK 0x3f
#define GDS_GWS_VMID9__BASE__SHIFT 0x0
#define GDS_GWS_VMID9__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID9__SIZE__SHIFT 0x10
#define GDS_GWS_VMID10__BASE_MASK 0x3f
#define GDS_GWS_VMID10__BASE__SHIFT 0x0
#define GDS_GWS_VMID10__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID10__SIZE__SHIFT 0x10
#define GDS_GWS_VMID11__BASE_MASK 0x3f
#define GDS_GWS_VMID11__BASE__SHIFT 0x0
#define GDS_GWS_VMID11__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID11__SIZE__SHIFT 0x10
#define GDS_GWS_VMID12__BASE_MASK 0x3f
#define GDS_GWS_VMID12__BASE__SHIFT 0x0
#define GDS_GWS_VMID12__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID12__SIZE__SHIFT 0x10
#define GDS_GWS_VMID13__BASE_MASK 0x3f
#define GDS_GWS_VMID13__BASE__SHIFT 0x0
#define GDS_GWS_VMID13__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID13__SIZE__SHIFT 0x10
#define GDS_GWS_VMID14__BASE_MASK 0x3f
#define GDS_GWS_VMID14__BASE__SHIFT 0x0
#define GDS_GWS_VMID14__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID14__SIZE__SHIFT 0x10
#define GDS_GWS_VMID15__BASE_MASK 0x3f
#define GDS_GWS_VMID15__BASE__SHIFT 0x0
#define GDS_GWS_VMID15__SIZE_MASK 0x7f0000
#define GDS_GWS_VMID15__SIZE__SHIFT 0x10
#define GDS_OA_VMID0__MASK_MASK 0xffff
#define GDS_OA_VMID0__MASK__SHIFT 0x0
#define GDS_OA_VMID0__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID0__UNUSED__SHIFT 0x10
#define GDS_OA_VMID1__MASK_MASK 0xffff
#define GDS_OA_VMID1__MASK__SHIFT 0x0
#define GDS_OA_VMID1__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID1__UNUSED__SHIFT 0x10
#define GDS_OA_VMID2__MASK_MASK 0xffff
#define GDS_OA_VMID2__MASK__SHIFT 0x0
#define GDS_OA_VMID2__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID2__UNUSED__SHIFT 0x10
#define GDS_OA_VMID3__MASK_MASK 0xffff
#define GDS_OA_VMID3__MASK__SHIFT 0x0
#define GDS_OA_VMID3__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID3__UNUSED__SHIFT 0x10
#define GDS_OA_VMID4__MASK_MASK 0xffff
#define GDS_OA_VMID4__MASK__SHIFT 0x0
#define GDS_OA_VMID4__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID4__UNUSED__SHIFT 0x10
#define GDS_OA_VMID5__MASK_MASK 0xffff
#define GDS_OA_VMID5__MASK__SHIFT 0x0
#define GDS_OA_VMID5__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID5__UNUSED__SHIFT 0x10
#define GDS_OA_VMID6__MASK_MASK 0xffff
#define GDS_OA_VMID6__MASK__SHIFT 0x0
#define GDS_OA_VMID6__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID6__UNUSED__SHIFT 0x10
#define GDS_OA_VMID7__MASK_MASK 0xffff
#define GDS_OA_VMID7__MASK__SHIFT 0x0
#define GDS_OA_VMID7__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID7__UNUSED__SHIFT 0x10
#define GDS_OA_VMID8__MASK_MASK 0xffff
#define GDS_OA_VMID8__MASK__SHIFT 0x0
#define GDS_OA_VMID8__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID8__UNUSED__SHIFT 0x10
#define GDS_OA_VMID9__MASK_MASK 0xffff
#define GDS_OA_VMID9__MASK__SHIFT 0x0
#define GDS_OA_VMID9__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID9__UNUSED__SHIFT 0x10
#define GDS_OA_VMID10__MASK_MASK 0xffff
#define GDS_OA_VMID10__MASK__SHIFT 0x0
#define GDS_OA_VMID10__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID10__UNUSED__SHIFT 0x10
#define GDS_OA_VMID11__MASK_MASK 0xffff
#define GDS_OA_VMID11__MASK__SHIFT 0x0
#define GDS_OA_VMID11__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID11__UNUSED__SHIFT 0x10
#define GDS_OA_VMID12__MASK_MASK 0xffff
#define GDS_OA_VMID12__MASK__SHIFT 0x0
#define GDS_OA_VMID12__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID12__UNUSED__SHIFT 0x10
#define GDS_OA_VMID13__MASK_MASK 0xffff
#define GDS_OA_VMID13__MASK__SHIFT 0x0
#define GDS_OA_VMID13__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID13__UNUSED__SHIFT 0x10
#define GDS_OA_VMID14__MASK_MASK 0xffff
#define GDS_OA_VMID14__MASK__SHIFT 0x0
#define GDS_OA_VMID14__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID14__UNUSED__SHIFT 0x10
#define GDS_OA_VMID15__MASK_MASK 0xffff
#define GDS_OA_VMID15__MASK__SHIFT 0x0
#define GDS_OA_VMID15__UNUSED_MASK 0xffff0000
#define GDS_OA_VMID15__UNUSED__SHIFT 0x10
#define GDS_GWS_RESET0__RESOURCE0_RESET_MASK 0x1
#define GDS_GWS_RESET0__RESOURCE0_RESET__SHIFT 0x0
#define GDS_GWS_RESET0__RESOURCE1_RESET_MASK 0x2
#define GDS_GWS_RESET0__RESOURCE1_RESET__SHIFT 0x1
#define GDS_GWS_RESET0__RESOURCE2_RESET_MASK 0x4
#define GDS_GWS_RESET0__RESOURCE2_RESET__SHIFT 0x2
#define GDS_GWS_RESET0__RESOURCE3_RESET_MASK 0x8
#define GDS_GWS_RESET0__RESOURCE3_RESET__SHIFT 0x3
#define GDS_GWS_RESET0__RESOURCE4_RESET_MASK 0x10
#define GDS_GWS_RESET0__RESOURCE4_RESET__SHIFT 0x4
#define GDS_GWS_RESET0__RESOURCE5_RESET_MASK 0x20
#define GDS_GWS_RESET0__RESOURCE5_RESET__SHIFT 0x5
#define GDS_GWS_RESET0__RESOURCE6_RESET_MASK 0x40
#define GDS_GWS_RESET0__RESOURCE6_RESET__SHIFT 0x6
#define GDS_GWS_RESET0__RESOURCE7_RESET_MASK 0x80
#define GDS_GWS_RESET0__RESOURCE7_RESET__SHIFT 0x7
#define GDS_GWS_RESET0__RESOURCE8_RESET_MASK 0x100
#define GDS_GWS_RESET0__RESOURCE8_RESET__SHIFT 0x8
#define GDS_GWS_RESET0__RESOURCE9_RESET_MASK 0x200
#define GDS_GWS_RESET0__RESOURCE9_RESET__SHIFT 0x9
#define GDS_GWS_RESET0__RESOURCE10_RESET_MASK 0x400
#define GDS_GWS_RESET0__RESOURCE10_RESET__SHIFT 0xa
#define GDS_GWS_RESET0__RESOURCE11_RESET_MASK 0x800
#define GDS_GWS_RESET0__RESOURCE11_RESET__SHIFT 0xb
#define GDS_GWS_RESET0__RESOURCE12_RESET_MASK 0x1000
#define GDS_GWS_RESET0__RESOURCE12_RESET__SHIFT 0xc
#define GDS_GWS_RESET0__RESOURCE13_RESET_MASK 0x2000
#define GDS_GWS_RESET0__RESOURCE13_RESET__SHIFT 0xd
#define GDS_GWS_RESET0__RESOURCE14_RESET_MASK 0x4000
#define GDS_GWS_RESET0__RESOURCE14_RESET__SHIFT 0xe
#define GDS_GWS_RESET0__RESOURCE15_RESET_MASK 0x8000
#define GDS_GWS_RESET0__RESOURCE15_RESET__SHIFT 0xf
#define GDS_GWS_RESET0__RESOURCE16_RESET_MASK 0x10000
#define GDS_GWS_RESET0__RESOURCE16_RESET__SHIFT 0x10
#define GDS_GWS_RESET0__RESOURCE17_RESET_MASK 0x20000
#define GDS_GWS_RESET0__RESOURCE17_RESET__SHIFT 0x11
#define GDS_GWS_RESET0__RESOURCE18_RESET_MASK 0x40000
#define GDS_GWS_RESET0__RESOURCE18_RESET__SHIFT 0x12
#define GDS_GWS_RESET0__RESOURCE19_RESET_MASK 0x80000
#define GDS_GWS_RESET0__RESOURCE19_RESET__SHIFT 0x13
#define GDS_GWS_RESET0__RESOURCE20_RESET_MASK 0x100000
#define GDS_GWS_RESET0__RESOURCE20_RESET__SHIFT 0x14
#define GDS_GWS_RESET0__RESOURCE21_RESET_MASK 0x200000
#define GDS_GWS_RESET0__RESOURCE21_RESET__SHIFT 0x15
#define GDS_GWS_RESET0__RESOURCE22_RESET_MASK 0x400000
#define GDS_GWS_RESET0__RESOURCE22_RESET__SHIFT 0x16
#define GDS_GWS_RESET0__RESOURCE23_RESET_MASK 0x800000
#define GDS_GWS_RESET0__RESOURCE23_RESET__SHIFT 0x17
#define GDS_GWS_RESET0__RESOURCE24_RESET_MASK 0x1000000
#define GDS_GWS_RESET0__RESOURCE24_RESET__SHIFT 0x18
#define GDS_GWS_RESET0__RESOURCE25_RESET_MASK 0x2000000
#define GDS_GWS_RESET0__RESOURCE25_RESET__SHIFT 0x19
#define GDS_GWS_RESET0__RESOURCE26_RESET_MASK 0x4000000
#define GDS_GWS_RESET0__RESOURCE26_RESET__SHIFT 0x1a
#define GDS_GWS_RESET0__RESOURCE27_RESET_MASK 0x8000000
#define GDS_GWS_RESET0__RESOURCE27_RESET__SHIFT 0x1b
#define GDS_GWS_RESET0__RESOURCE28_RESET_MASK 0x10000000
#define GDS_GWS_RESET0__RESOURCE28_RESET__SHIFT 0x1c
#define GDS_GWS_RESET0__RESOURCE29_RESET_MASK 0x20000000
#define GDS_GWS_RESET0__RESOURCE29_RESET__SHIFT 0x1d
#define GDS_GWS_RESET0__RESOURCE30_RESET_MASK 0x40000000
#define GDS_GWS_RESET0__RESOURCE30_RESET__SHIFT 0x1e
#define GDS_GWS_RESET0__RESOURCE31_RESET_MASK 0x80000000
#define GDS_GWS_RESET0__RESOURCE31_RESET__SHIFT 0x1f
#define GDS_GWS_RESET1__RESOURCE32_RESET_MASK 0x1
#define GDS_GWS_RESET1__RESOURCE32_RESET__SHIFT 0x0
#define GDS_GWS_RESET1__RESOURCE33_RESET_MASK 0x2
#define GDS_GWS_RESET1__RESOURCE33_RESET__SHIFT 0x1
#define GDS_GWS_RESET1__RESOURCE34_RESET_MASK 0x4
#define GDS_GWS_RESET1__RESOURCE34_RESET__SHIFT 0x2
#define GDS_GWS_RESET1__RESOURCE35_RESET_MASK 0x8
#define GDS_GWS_RESET1__RESOURCE35_RESET__SHIFT 0x3
#define GDS_GWS_RESET1__RESOURCE36_RESET_MASK 0x10
#define GDS_GWS_RESET1__RESOURCE36_RESET__SHIFT 0x4
#define GDS_GWS_RESET1__RESOURCE37_RESET_MASK 0x20
#define GDS_GWS_RESET1__RESOURCE37_RESET__SHIFT 0x5
#define GDS_GWS_RESET1__RESOURCE38_RESET_MASK 0x40
#define GDS_GWS_RESET1__RESOURCE38_RESET__SHIFT 0x6
#define GDS_GWS_RESET1__RESOURCE39_RESET_MASK 0x80
#define GDS_GWS_RESET1__RESOURCE39_RESET__SHIFT 0x7
#define GDS_GWS_RESET1__RESOURCE40_RESET_MASK 0x100
#define GDS_GWS_RESET1__RESOURCE40_RESET__SHIFT 0x8
#define GDS_GWS_RESET1__RESOURCE41_RESET_MASK 0x200
#define GDS_GWS_RESET1__RESOURCE41_RESET__SHIFT 0x9
#define GDS_GWS_RESET1__RESOURCE42_RESET_MASK 0x400
#define GDS_GWS_RESET1__RESOURCE42_RESET__SHIFT 0xa
#define GDS_GWS_RESET1__RESOURCE43_RESET_MASK 0x800
#define GDS_GWS_RESET1__RESOURCE43_RESET__SHIFT 0xb
#define GDS_GWS_RESET1__RESOURCE44_RESET_MASK 0x1000
#define GDS_GWS_RESET1__RESOURCE44_RESET__SHIFT 0xc
#define GDS_GWS_RESET1__RESOURCE45_RESET_MASK 0x2000
#define GDS_GWS_RESET1__RESOURCE45_RESET__SHIFT 0xd
#define GDS_GWS_RESET1__RESOURCE46_RESET_MASK 0x4000
#define GDS_GWS_RESET1__RESOURCE46_RESET__SHIFT 0xe
#define GDS_GWS_RESET1__RESOURCE47_RESET_MASK 0x8000
#define GDS_GWS_RESET1__RESOURCE47_RESET__SHIFT 0xf
#define GDS_GWS_RESET1__RESOURCE48_RESET_MASK 0x10000
#define GDS_GWS_RESET1__RESOURCE48_RESET__SHIFT 0x10
#define GDS_GWS_RESET1__RESOURCE49_RESET_MASK 0x20000
#define GDS_GWS_RESET1__RESOURCE49_RESET__SHIFT 0x11
#define GDS_GWS_RESET1__RESOURCE50_RESET_MASK 0x40000
#define GDS_GWS_RESET1__RESOURCE50_RESET__SHIFT 0x12
#define GDS_GWS_RESET1__RESOURCE51_RESET_MASK 0x80000
#define GDS_GWS_RESET1__RESOURCE51_RESET__SHIFT 0x13
#define GDS_GWS_RESET1__RESOURCE52_RESET_MASK 0x100000
#define GDS_GWS_RESET1__RESOURCE52_RESET__SHIFT 0x14
#define GDS_GWS_RESET1__RESOURCE53_RESET_MASK 0x200000
#define GDS_GWS_RESET1__RESOURCE53_RESET__SHIFT 0x15
#define GDS_GWS_RESET1__RESOURCE54_RESET_MASK 0x400000
#define GDS_GWS_RESET1__RESOURCE54_RESET__SHIFT 0x16
#define GDS_GWS_RESET1__RESOURCE55_RESET_MASK 0x800000
#define GDS_GWS_RESET1__RESOURCE55_RESET__SHIFT 0x17
#define GDS_GWS_RESET1__RESOURCE56_RESET_MASK 0x1000000
#define GDS_GWS_RESET1__RESOURCE56_RESET__SHIFT 0x18
#define GDS_GWS_RESET1__RESOURCE57_RESET_MASK 0x2000000
#define GDS_GWS_RESET1__RESOURCE57_RESET__SHIFT 0x19
#define GDS_GWS_RESET1__RESOURCE58_RESET_MASK 0x4000000
#define GDS_GWS_RESET1__RESOURCE58_RESET__SHIFT 0x1a
#define GDS_GWS_RESET1__RESOURCE59_RESET_MASK 0x8000000
#define GDS_GWS_RESET1__RESOURCE59_RESET__SHIFT 0x1b
#define GDS_GWS_RESET1__RESOURCE60_RESET_MASK 0x10000000
#define GDS_GWS_RESET1__RESOURCE60_RESET__SHIFT 0x1c
#define GDS_GWS_RESET1__RESOURCE61_RESET_MASK 0x20000000
#define GDS_GWS_RESET1__RESOURCE61_RESET__SHIFT 0x1d
#define GDS_GWS_RESET1__RESOURCE62_RESET_MASK 0x40000000
#define GDS_GWS_RESET1__RESOURCE62_RESET__SHIFT 0x1e
#define GDS_GWS_RESET1__RESOURCE63_RESET_MASK 0x80000000
#define GDS_GWS_RESET1__RESOURCE63_RESET__SHIFT 0x1f
#define GDS_GWS_RESOURCE_RESET__RESET_MASK 0x1
#define GDS_GWS_RESOURCE_RESET__RESET__SHIFT 0x0
#define GDS_GWS_RESOURCE_RESET__RESOURCE_ID_MASK 0xff00
#define GDS_GWS_RESOURCE_RESET__RESOURCE_ID__SHIFT 0x8
#define GDS_COMPUTE_MAX_WAVE_ID__MAX_WAVE_ID_MASK 0xfff
#define GDS_COMPUTE_MAX_WAVE_ID__MAX_WAVE_ID__SHIFT 0x0
#define GDS_OA_RESET_MASK__ME0_GFXHP3D_PIX_RESET_MASK 0x1
#define GDS_OA_RESET_MASK__ME0_GFXHP3D_PIX_RESET__SHIFT 0x0
#define GDS_OA_RESET_MASK__ME0_GFXHP3D_VTX_RESET_MASK 0x2
#define GDS_OA_RESET_MASK__ME0_GFXHP3D_VTX_RESET__SHIFT 0x1
#define GDS_OA_RESET_MASK__ME0_CS_RESET_MASK 0x4
#define GDS_OA_RESET_MASK__ME0_CS_RESET__SHIFT 0x2
#define GDS_OA_RESET_MASK__UNUSED0_MASK 0x8
#define GDS_OA_RESET_MASK__UNUSED0__SHIFT 0x3
#define GDS_OA_RESET_MASK__ME1_PIPE0_RESET_MASK 0x10
#define GDS_OA_RESET_MASK__ME1_PIPE0_RESET__SHIFT 0x4
#define GDS_OA_RESET_MASK__ME1_PIPE1_RESET_MASK 0x20
#define GDS_OA_RESET_MASK__ME1_PIPE1_RESET__SHIFT 0x5
#define GDS_OA_RESET_MASK__ME1_PIPE2_RESET_MASK 0x40
#define GDS_OA_RESET_MASK__ME1_PIPE2_RESET__SHIFT 0x6
#define GDS_OA_RESET_MASK__ME1_PIPE3_RESET_MASK 0x80
#define GDS_OA_RESET_MASK__ME1_PIPE3_RESET__SHIFT 0x7
#define GDS_OA_RESET_MASK__ME2_PIPE0_RESET_MASK 0x100
#define GDS_OA_RESET_MASK__ME2_PIPE0_RESET__SHIFT 0x8
#define GDS_OA_RESET_MASK__ME2_PIPE1_RESET_MASK 0x200
#define GDS_OA_RESET_MASK__ME2_PIPE1_RESET__SHIFT 0x9
#define GDS_OA_RESET_MASK__ME2_PIPE2_RESET_MASK 0x400
#define GDS_OA_RESET_MASK__ME2_PIPE2_RESET__SHIFT 0xa
#define GDS_OA_RESET_MASK__ME2_PIPE3_RESET_MASK 0x800
#define GDS_OA_RESET_MASK__ME2_PIPE3_RESET__SHIFT 0xb
#define GDS_OA_RESET_MASK__UNUSED1_MASK 0xfffff000
#define GDS_OA_RESET_MASK__UNUSED1__SHIFT 0xc
#define GDS_OA_RESET__RESET_MASK 0x1
#define GDS_OA_RESET__RESET__SHIFT 0x0
#define GDS_OA_RESET__PIPE_ID_MASK 0xff00
#define GDS_OA_RESET__PIPE_ID__SHIFT 0x8
#define GDS_ENHANCE__MISC_MASK 0xffff
#define GDS_ENHANCE__MISC__SHIFT 0x0
#define GDS_ENHANCE__AUTO_INC_INDEX_MASK 0x10000
#define GDS_ENHANCE__AUTO_INC_INDEX__SHIFT 0x10
#define GDS_ENHANCE__CGPG_RESTORE_MASK 0x20000
#define GDS_ENHANCE__CGPG_RESTORE__SHIFT 0x11
#define GDS_ENHANCE__UNUSED_MASK 0xfffc0000
#define GDS_ENHANCE__UNUSED__SHIFT 0x12
#define GDS_OA_CGPG_RESTORE__VMID_MASK 0xff
#define GDS_OA_CGPG_RESTORE__VMID__SHIFT 0x0
#define GDS_OA_CGPG_RESTORE__MEID_MASK 0xf00
#define GDS_OA_CGPG_RESTORE__MEID__SHIFT 0x8
#define GDS_OA_CGPG_RESTORE__PIPEID_MASK 0xf000
#define GDS_OA_CGPG_RESTORE__PIPEID__SHIFT 0xc
#define GDS_OA_CGPG_RESTORE__UNUSED_MASK 0xffff0000
#define GDS_OA_CGPG_RESTORE__UNUSED__SHIFT 0x10
#define CS_COPY_STATE__SRC_STATE_ID_MASK 0x7
#define CS_COPY_STATE__SRC_STATE_ID__SHIFT 0x0
#define GFX_COPY_STATE__SRC_STATE_ID_MASK 0x7
#define GFX_COPY_STATE__SRC_STATE_ID__SHIFT 0x0
#define VGT_DRAW_INITIATOR__SOURCE_SELECT_MASK 0x3
#define VGT_DRAW_INITIATOR__SOURCE_SELECT__SHIFT 0x0
#define VGT_DRAW_INITIATOR__MAJOR_MODE_MASK 0xc
#define VGT_DRAW_INITIATOR__MAJOR_MODE__SHIFT 0x2
#define VGT_DRAW_INITIATOR__SPRITE_EN_R6XX_MASK 0x10
#define VGT_DRAW_INITIATOR__SPRITE_EN_R6XX__SHIFT 0x4
#define VGT_DRAW_INITIATOR__NOT_EOP_MASK 0x20
#define VGT_DRAW_INITIATOR__NOT_EOP__SHIFT 0x5
#define VGT_DRAW_INITIATOR__USE_OPAQUE_MASK 0x40
#define VGT_DRAW_INITIATOR__USE_OPAQUE__SHIFT 0x6
#define VGT_EVENT_INITIATOR__EVENT_TYPE_MASK 0x3f
#define VGT_EVENT_INITIATOR__EVENT_TYPE__SHIFT 0x0
#define VGT_EVENT_INITIATOR__ADDRESS_HI_MASK 0x7fc0000
#define VGT_EVENT_INITIATOR__ADDRESS_HI__SHIFT 0x12
#define VGT_EVENT_INITIATOR__EXTENDED_EVENT_MASK 0x8000000
#define VGT_EVENT_INITIATOR__EXTENDED_EVENT__SHIFT 0x1b
#define VGT_EVENT_ADDRESS_REG__ADDRESS_LOW_MASK 0xfffffff
#define VGT_EVENT_ADDRESS_REG__ADDRESS_LOW__SHIFT 0x0
#define VGT_DMA_BASE_HI__BASE_ADDR_MASK 0xff
#define VGT_DMA_BASE_HI__BASE_ADDR__SHIFT 0x0
#define VGT_DMA_BASE__BASE_ADDR_MASK 0xffffffff
#define VGT_DMA_BASE__BASE_ADDR__SHIFT 0x0
#define VGT_DMA_INDEX_TYPE__INDEX_TYPE_MASK 0x3
#define VGT_DMA_INDEX_TYPE__INDEX_TYPE__SHIFT 0x0
#define VGT_DMA_INDEX_TYPE__SWAP_MODE_MASK 0xc
#define VGT_DMA_INDEX_TYPE__SWAP_MODE__SHIFT 0x2
#define VGT_DMA_INDEX_TYPE__BUF_TYPE_MASK 0x30
#define VGT_DMA_INDEX_TYPE__BUF_TYPE__SHIFT 0x4
#define VGT_DMA_INDEX_TYPE__RDREQ_POLICY_MASK 0xc0
#define VGT_DMA_INDEX_TYPE__RDREQ_POLICY__SHIFT 0x6
#define VGT_DMA_INDEX_TYPE__ATC_MASK 0x100
#define VGT_DMA_INDEX_TYPE__ATC__SHIFT 0x8
#define VGT_DMA_INDEX_TYPE__NOT_EOP_MASK 0x200
#define VGT_DMA_INDEX_TYPE__NOT_EOP__SHIFT 0x9
#define VGT_DMA_INDEX_TYPE__REQ_PATH_MASK 0x400
#define VGT_DMA_INDEX_TYPE__REQ_PATH__SHIFT 0xa
#define VGT_DMA_NUM_INSTANCES__NUM_INSTANCES_MASK 0xffffffff
#define VGT_DMA_NUM_INSTANCES__NUM_INSTANCES__SHIFT 0x0
#define IA_ENHANCE__MISC_MASK 0xffffffff
#define IA_ENHANCE__MISC__SHIFT 0x0
#define VGT_DMA_SIZE__NUM_INDICES_MASK 0xffffffff
#define VGT_DMA_SIZE__NUM_INDICES__SHIFT 0x0
#define VGT_DMA_MAX_SIZE__MAX_SIZE_MASK 0xffffffff
#define VGT_DMA_MAX_SIZE__MAX_SIZE__SHIFT 0x0
#define VGT_DMA_PRIMITIVE_TYPE__PRIM_TYPE_MASK 0x3f
#define VGT_DMA_PRIMITIVE_TYPE__PRIM_TYPE__SHIFT 0x0
#define VGT_DMA_CONTROL__PRIMGROUP_SIZE_MASK 0xffff
#define VGT_DMA_CONTROL__PRIMGROUP_SIZE__SHIFT 0x0
#define VGT_DMA_CONTROL__IA_SWITCH_ON_EOP_MASK 0x20000
#define VGT_DMA_CONTROL__IA_SWITCH_ON_EOP__SHIFT 0x11
#define VGT_DMA_CONTROL__WD_SWITCH_ON_EOP_MASK 0x100000
#define VGT_DMA_CONTROL__WD_SWITCH_ON_EOP__SHIFT 0x14
#define VGT_IMMED_DATA__DATA_MASK 0xffffffff
#define VGT_IMMED_DATA__DATA__SHIFT 0x0
#define VGT_INDEX_TYPE__INDEX_TYPE_MASK 0x3
#define VGT_INDEX_TYPE__INDEX_TYPE__SHIFT 0x0
#define VGT_NUM_INDICES__NUM_INDICES_MASK 0xffffffff
#define VGT_NUM_INDICES__NUM_INDICES__SHIFT 0x0
#define VGT_NUM_INSTANCES__NUM_INSTANCES_MASK 0xffffffff
#define VGT_NUM_INSTANCES__NUM_INSTANCES__SHIFT 0x0
#define VGT_PRIMITIVE_TYPE__PRIM_TYPE_MASK 0x3f
#define VGT_PRIMITIVE_TYPE__PRIM_TYPE__SHIFT 0x0
#define VGT_PRIMITIVEID_EN__PRIMITIVEID_EN_MASK 0x1
#define VGT_PRIMITIVEID_EN__PRIMITIVEID_EN__SHIFT 0x0
#define VGT_PRIMITIVEID_EN__DISABLE_RESET_ON_EOI_MASK 0x2
#define VGT_PRIMITIVEID_EN__DISABLE_RESET_ON_EOI__SHIFT 0x1
#define VGT_PRIMITIVEID_RESET__VALUE_MASK 0xffffffff
#define VGT_PRIMITIVEID_RESET__VALUE__SHIFT 0x0
#define VGT_VTX_CNT_EN__VTX_CNT_EN_MASK 0x1
#define VGT_VTX_CNT_EN__VTX_CNT_EN__SHIFT 0x0
#define VGT_REUSE_OFF__REUSE_OFF_MASK 0x1
#define VGT_REUSE_OFF__REUSE_OFF__SHIFT 0x0
#define VGT_INSTANCE_STEP_RATE_0__STEP_RATE_MASK 0xffffffff
#define VGT_INSTANCE_STEP_RATE_0__STEP_RATE__SHIFT 0x0
#define VGT_INSTANCE_STEP_RATE_1__STEP_RATE_MASK 0xffffffff
#define VGT_INSTANCE_STEP_RATE_1__STEP_RATE__SHIFT 0x0
#define VGT_MAX_VTX_INDX__MAX_INDX_MASK 0xffffffff
#define VGT_MAX_VTX_INDX__MAX_INDX__SHIFT 0x0
#define VGT_MIN_VTX_INDX__MIN_INDX_MASK 0xffffffff
#define VGT_MIN_VTX_INDX__MIN_INDX__SHIFT 0x0
#define VGT_INDX_OFFSET__INDX_OFFSET_MASK 0xffffffff
#define VGT_INDX_OFFSET__INDX_OFFSET__SHIFT 0x0
#define VGT_VERTEX_REUSE_BLOCK_CNTL__VTX_REUSE_DEPTH_MASK 0xff
#define VGT_VERTEX_REUSE_BLOCK_CNTL__VTX_REUSE_DEPTH__SHIFT 0x0
#define VGT_OUT_DEALLOC_CNTL__DEALLOC_DIST_MASK 0x7f
#define VGT_OUT_DEALLOC_CNTL__DEALLOC_DIST__SHIFT 0x0
#define VGT_MULTI_PRIM_IB_RESET_INDX__RESET_INDX_MASK 0xffffffff
#define VGT_MULTI_PRIM_IB_RESET_INDX__RESET_INDX__SHIFT 0x0
#define VGT_MULTI_PRIM_IB_RESET_EN__RESET_EN_MASK 0x1
#define VGT_MULTI_PRIM_IB_RESET_EN__RESET_EN__SHIFT 0x0
#define VGT_ENHANCE__MISC_MASK 0xffffffff
#define VGT_ENHANCE__MISC__SHIFT 0x0
#define VGT_OUTPUT_PATH_CNTL__PATH_SELECT_MASK 0x7
#define VGT_OUTPUT_PATH_CNTL__PATH_SELECT__SHIFT 0x0
#define VGT_HOS_CNTL__TESS_MODE_MASK 0x3
#define VGT_HOS_CNTL__TESS_MODE__SHIFT 0x0
#define VGT_HOS_MAX_TESS_LEVEL__MAX_TESS_MASK 0xffffffff
#define VGT_HOS_MAX_TESS_LEVEL__MAX_TESS__SHIFT 0x0
#define VGT_HOS_MIN_TESS_LEVEL__MIN_TESS_MASK 0xffffffff
#define VGT_HOS_MIN_TESS_LEVEL__MIN_TESS__SHIFT 0x0
#define VGT_HOS_REUSE_DEPTH__REUSE_DEPTH_MASK 0xff
#define VGT_HOS_REUSE_DEPTH__REUSE_DEPTH__SHIFT 0x0
#define VGT_GROUP_PRIM_TYPE__PRIM_TYPE_MASK 0x1f
#define VGT_GROUP_PRIM_TYPE__PRIM_TYPE__SHIFT 0x0
#define VGT_GROUP_PRIM_TYPE__RETAIN_ORDER_MASK 0x4000
#define VGT_GROUP_PRIM_TYPE__RETAIN_ORDER__SHIFT 0xe
#define VGT_GROUP_PRIM_TYPE__RETAIN_QUADS_MASK 0x8000
#define VGT_GROUP_PRIM_TYPE__RETAIN_QUADS__SHIFT 0xf
#define VGT_GROUP_PRIM_TYPE__PRIM_ORDER_MASK 0x70000
#define VGT_GROUP_PRIM_TYPE__PRIM_ORDER__SHIFT 0x10
#define VGT_GROUP_FIRST_DECR__FIRST_DECR_MASK 0xf
#define VGT_GROUP_FIRST_DECR__FIRST_DECR__SHIFT 0x0
#define VGT_GROUP_DECR__DECR_MASK 0xf
#define VGT_GROUP_DECR__DECR__SHIFT 0x0
#define VGT_GROUP_VECT_0_CNTL__COMP_X_EN_MASK 0x1
#define VGT_GROUP_VECT_0_CNTL__COMP_X_EN__SHIFT 0x0
#define VGT_GROUP_VECT_0_CNTL__COMP_Y_EN_MASK 0x2
#define VGT_GROUP_VECT_0_CNTL__COMP_Y_EN__SHIFT 0x1
#define VGT_GROUP_VECT_0_CNTL__COMP_Z_EN_MASK 0x4
#define VGT_GROUP_VECT_0_CNTL__COMP_Z_EN__SHIFT 0x2
#define VGT_GROUP_VECT_0_CNTL__COMP_W_EN_MASK 0x8
#define VGT_GROUP_VECT_0_CNTL__COMP_W_EN__SHIFT 0x3
#define VGT_GROUP_VECT_0_CNTL__STRIDE_MASK 0xff00
#define VGT_GROUP_VECT_0_CNTL__STRIDE__SHIFT 0x8
#define VGT_GROUP_VECT_0_CNTL__SHIFT_MASK 0xff0000
#define VGT_GROUP_VECT_0_CNTL__SHIFT__SHIFT 0x10
#define VGT_GROUP_VECT_1_CNTL__COMP_X_EN_MASK 0x1
#define VGT_GROUP_VECT_1_CNTL__COMP_X_EN__SHIFT 0x0
#define VGT_GROUP_VECT_1_CNTL__COMP_Y_EN_MASK 0x2
#define VGT_GROUP_VECT_1_CNTL__COMP_Y_EN__SHIFT 0x1
#define VGT_GROUP_VECT_1_CNTL__COMP_Z_EN_MASK 0x4
#define VGT_GROUP_VECT_1_CNTL__COMP_Z_EN__SHIFT 0x2
#define VGT_GROUP_VECT_1_CNTL__COMP_W_EN_MASK 0x8
#define VGT_GROUP_VECT_1_CNTL__COMP_W_EN__SHIFT 0x3
#define VGT_GROUP_VECT_1_CNTL__STRIDE_MASK 0xff00
#define VGT_GROUP_VECT_1_CNTL__STRIDE__SHIFT 0x8
#define VGT_GROUP_VECT_1_CNTL__SHIFT_MASK 0xff0000
#define VGT_GROUP_VECT_1_CNTL__SHIFT__SHIFT 0x10
#define VGT_GROUP_VECT_0_FMT_CNTL__X_CONV_MASK 0xf
#define VGT_GROUP_VECT_0_FMT_CNTL__X_CONV__SHIFT 0x0
#define VGT_GROUP_VECT_0_FMT_CNTL__X_OFFSET_MASK 0xf0
#define VGT_GROUP_VECT_0_FMT_CNTL__X_OFFSET__SHIFT 0x4
#define VGT_GROUP_VECT_0_FMT_CNTL__Y_CONV_MASK 0xf00
#define VGT_GROUP_VECT_0_FMT_CNTL__Y_CONV__SHIFT 0x8
#define VGT_GROUP_VECT_0_FMT_CNTL__Y_OFFSET_MASK 0xf000
#define VGT_GROUP_VECT_0_FMT_CNTL__Y_OFFSET__SHIFT 0xc
#define VGT_GROUP_VECT_0_FMT_CNTL__Z_CONV_MASK 0xf0000
#define VGT_GROUP_VECT_0_FMT_CNTL__Z_CONV__SHIFT 0x10
#define VGT_GROUP_VECT_0_FMT_CNTL__Z_OFFSET_MASK 0xf00000
#define VGT_GROUP_VECT_0_FMT_CNTL__Z_OFFSET__SHIFT 0x14
#define VGT_GROUP_VECT_0_FMT_CNTL__W_CONV_MASK 0xf000000
#define VGT_GROUP_VECT_0_FMT_CNTL__W_CONV__SHIFT 0x18
#define VGT_GROUP_VECT_0_FMT_CNTL__W_OFFSET_MASK 0xf0000000
#define VGT_GROUP_VECT_0_FMT_CNTL__W_OFFSET__SHIFT 0x1c
#define VGT_GROUP_VECT_1_FMT_CNTL__X_CONV_MASK 0xf
#define VGT_GROUP_VECT_1_FMT_CNTL__X_CONV__SHIFT 0x0
#define VGT_GROUP_VECT_1_FMT_CNTL__X_OFFSET_MASK 0xf0
#define VGT_GROUP_VECT_1_FMT_CNTL__X_OFFSET__SHIFT 0x4
#define VGT_GROUP_VECT_1_FMT_CNTL__Y_CONV_MASK 0xf00
#define VGT_GROUP_VECT_1_FMT_CNTL__Y_CONV__SHIFT 0x8
#define VGT_GROUP_VECT_1_FMT_CNTL__Y_OFFSET_MASK 0xf000
#define VGT_GROUP_VECT_1_FMT_CNTL__Y_OFFSET__SHIFT 0xc
#define VGT_GROUP_VECT_1_FMT_CNTL__Z_CONV_MASK 0xf0000
#define VGT_GROUP_VECT_1_FMT_CNTL__Z_CONV__SHIFT 0x10
#define VGT_GROUP_VECT_1_FMT_CNTL__Z_OFFSET_MASK 0xf00000
#define VGT_GROUP_VECT_1_FMT_CNTL__Z_OFFSET__SHIFT 0x14
#define VGT_GROUP_VECT_1_FMT_CNTL__W_CONV_MASK 0xf000000
#define VGT_GROUP_VECT_1_FMT_CNTL__W_CONV__SHIFT 0x18
#define VGT_GROUP_VECT_1_FMT_CNTL__W_OFFSET_MASK 0xf0000000
#define VGT_GROUP_VECT_1_FMT_CNTL__W_OFFSET__SHIFT 0x1c
#define VGT_VTX_VECT_EJECT_REG__PRIM_COUNT_MASK 0x3ff
#define VGT_VTX_VECT_EJECT_REG__PRIM_COUNT__SHIFT 0x0
#define VGT_DMA_DATA_FIFO_DEPTH__DMA_DATA_FIFO_DEPTH_MASK 0x1ff
#define VGT_DMA_DATA_FIFO_DEPTH__DMA_DATA_FIFO_DEPTH__SHIFT 0x0
#define VGT_DMA_REQ_FIFO_DEPTH__DMA_REQ_FIFO_DEPTH_MASK 0x3f
#define VGT_DMA_REQ_FIFO_DEPTH__DMA_REQ_FIFO_DEPTH__SHIFT 0x0
#define VGT_DRAW_INIT_FIFO_DEPTH__DRAW_INIT_FIFO_DEPTH_MASK 0x3f
#define VGT_DRAW_INIT_FIFO_DEPTH__DRAW_INIT_FIFO_DEPTH__SHIFT 0x0
#define VGT_LAST_COPY_STATE__SRC_STATE_ID_MASK 0x7
#define VGT_LAST_COPY_STATE__SRC_STATE_ID__SHIFT 0x0
#define VGT_LAST_COPY_STATE__DST_STATE_ID_MASK 0x70000
#define VGT_LAST_COPY_STATE__DST_STATE_ID__SHIFT 0x10
#define CC_GC_SHADER_ARRAY_CONFIG__DPFP_RATE_MASK 0x6
#define CC_GC_SHADER_ARRAY_CONFIG__DPFP_RATE__SHIFT 0x1
#define CC_GC_SHADER_ARRAY_CONFIG__SQC_BALANCE_DISABLE_MASK 0x8
#define CC_GC_SHADER_ARRAY_CONFIG__SQC_BALANCE_DISABLE__SHIFT 0x3
#define CC_GC_SHADER_ARRAY_CONFIG__HALF_LDS_MASK 0x10
#define CC_GC_SHADER_ARRAY_CONFIG__HALF_LDS__SHIFT 0x4
#define CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK 0xffff0000
#define CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT 0x10
#define GC_USER_SHADER_ARRAY_CONFIG__DPFP_RATE_MASK 0x6
#define GC_USER_SHADER_ARRAY_CONFIG__DPFP_RATE__SHIFT 0x1
#define GC_USER_SHADER_ARRAY_CONFIG__SQC_BALANCE_DISABLE_MASK 0x8
#define GC_USER_SHADER_ARRAY_CONFIG__SQC_BALANCE_DISABLE__SHIFT 0x3
#define GC_USER_SHADER_ARRAY_CONFIG__HALF_LDS_MASK 0x10
#define GC_USER_SHADER_ARRAY_CONFIG__HALF_LDS__SHIFT 0x4
#define GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK 0xffff0000
#define GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT 0x10
#define VGT_GS_MODE__MODE_MASK 0x7
#define VGT_GS_MODE__MODE__SHIFT 0x0
#define VGT_GS_MODE__RESERVED_0_MASK 0x8
#define VGT_GS_MODE__RESERVED_0__SHIFT 0x3
#define VGT_GS_MODE__CUT_MODE_MASK 0x30
#define VGT_GS_MODE__CUT_MODE__SHIFT 0x4
#define VGT_GS_MODE__RESERVED_1_MASK 0x7c0
#define VGT_GS_MODE__RESERVED_1__SHIFT 0x6
#define VGT_GS_MODE__GS_C_PACK_EN_MASK 0x800
#define VGT_GS_MODE__GS_C_PACK_EN__SHIFT 0xb
#define VGT_GS_MODE__RESERVED_2_MASK 0x1000
#define VGT_GS_MODE__RESERVED_2__SHIFT 0xc
#define VGT_GS_MODE__ES_PASSTHRU_MASK 0x2000
#define VGT_GS_MODE__ES_PASSTHRU__SHIFT 0xd
#define VGT_GS_MODE__COMPUTE_MODE_MASK 0x4000
#define VGT_GS_MODE__COMPUTE_MODE__SHIFT 0xe
#define VGT_GS_MODE__FAST_COMPUTE_MODE_MASK 0x8000
#define VGT_GS_MODE__FAST_COMPUTE_MODE__SHIFT 0xf
#define VGT_GS_MODE__ELEMENT_INFO_EN_MASK 0x10000
#define VGT_GS_MODE__ELEMENT_INFO_EN__SHIFT 0x10
#define VGT_GS_MODE__PARTIAL_THD_AT_EOI_MASK 0x20000
#define VGT_GS_MODE__PARTIAL_THD_AT_EOI__SHIFT 0x11
#define VGT_GS_MODE__SUPPRESS_CUTS_MASK 0x40000
#define VGT_GS_MODE__SUPPRESS_CUTS__SHIFT 0x12
#define VGT_GS_MODE__ES_WRITE_OPTIMIZE_MASK 0x80000
#define VGT_GS_MODE__ES_WRITE_OPTIMIZE__SHIFT 0x13
#define VGT_GS_MODE__GS_WRITE_OPTIMIZE_MASK 0x100000
#define VGT_GS_MODE__GS_WRITE_OPTIMIZE__SHIFT 0x14
#define VGT_GS_MODE__ONCHIP_MASK 0x600000
#define VGT_GS_MODE__ONCHIP__SHIFT 0x15
#define VGT_GS_ONCHIP_CNTL__ES_VERTS_PER_SUBGRP_MASK 0x7ff
#define VGT_GS_ONCHIP_CNTL__ES_VERTS_PER_SUBGRP__SHIFT 0x0
#define VGT_GS_ONCHIP_CNTL__GS_PRIMS_PER_SUBGRP_MASK 0x3ff800
#define VGT_GS_ONCHIP_CNTL__GS_PRIMS_PER_SUBGRP__SHIFT 0xb
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE_MASK 0x3f
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE__SHIFT 0x0
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE_1_MASK 0x3f00
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE_1__SHIFT 0x8
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE_2_MASK 0x3f0000
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE_2__SHIFT 0x10
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE_3_MASK 0xfc00000
#define VGT_GS_OUT_PRIM_TYPE__OUTPRIM_TYPE_3__SHIFT 0x16
#define VGT_GS_OUT_PRIM_TYPE__UNIQUE_TYPE_PER_STREAM_MASK 0x80000000
#define VGT_GS_OUT_PRIM_TYPE__UNIQUE_TYPE_PER_STREAM__SHIFT 0x1f
#define VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_MASK 0x3
#define VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT 0x0
#define VGT_CACHE_INVALIDATION__VS_NO_EXTRA_BUFFER_MASK 0x20
#define VGT_CACHE_INVALIDATION__VS_NO_EXTRA_BUFFER__SHIFT 0x5
#define VGT_CACHE_INVALIDATION__AUTO_INVLD_EN_MASK 0xc0
#define VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT 0x6
#define VGT_CACHE_INVALIDATION__USE_GS_DONE_MASK 0x200
#define VGT_CACHE_INVALIDATION__USE_GS_DONE__SHIFT 0x9
#define VGT_CACHE_INVALIDATION__DIS_RANGE_FULL_INVLD_MASK 0x800
#define VGT_CACHE_INVALIDATION__DIS_RANGE_FULL_INVLD__SHIFT 0xb
#define VGT_CACHE_INVALIDATION__GS_LATE_ALLOC_EN_MASK 0x1000
#define VGT_CACHE_INVALIDATION__GS_LATE_ALLOC_EN__SHIFT 0xc
#define VGT_CACHE_INVALIDATION__STREAMOUT_FULL_FLUSH_MASK 0x2000
#define VGT_CACHE_INVALIDATION__STREAMOUT_FULL_FLUSH__SHIFT 0xd
#define VGT_CACHE_INVALIDATION__ES_LIMIT_MASK 0x1f0000
#define VGT_CACHE_INVALIDATION__ES_LIMIT__SHIFT 0x10
#define VGT_RESET_DEBUG__GS_DISABLE_MASK 0x1
#define VGT_RESET_DEBUG__GS_DISABLE__SHIFT 0x0
#define VGT_RESET_DEBUG__TESS_DISABLE_MASK 0x2
#define VGT_RESET_DEBUG__TESS_DISABLE__SHIFT 0x1
#define VGT_RESET_DEBUG__WD_DISABLE_MASK 0x4
#define VGT_RESET_DEBUG__WD_DISABLE__SHIFT 0x2
#define VGT_STRMOUT_DELAY__SKIP_DELAY_MASK 0xff
#define VGT_STRMOUT_DELAY__SKIP_DELAY__SHIFT 0x0
#define VGT_STRMOUT_DELAY__SE0_WD_DELAY_MASK 0x700
#define VGT_STRMOUT_DELAY__SE0_WD_DELAY__SHIFT 0x8
#define VGT_STRMOUT_DELAY__SE1_WD_DELAY_MASK 0x3800
#define VGT_STRMOUT_DELAY__SE1_WD_DELAY__SHIFT 0xb
#define VGT_STRMOUT_DELAY__SE2_WD_DELAY_MASK 0x1c000
#define VGT_STRMOUT_DELAY__SE2_WD_DELAY__SHIFT 0xe
#define VGT_STRMOUT_DELAY__SE3_WD_DELAY_MASK 0xe0000
#define VGT_STRMOUT_DELAY__SE3_WD_DELAY__SHIFT 0x11
#define VGT_FIFO_DEPTHS__VS_DEALLOC_TBL_DEPTH_MASK 0x7f
#define VGT_FIFO_DEPTHS__VS_DEALLOC_TBL_DEPTH__SHIFT 0x0
#define VGT_FIFO_DEPTHS__RESERVED_0_MASK 0x80
#define VGT_FIFO_DEPTHS__RESERVED_0__SHIFT 0x7
#define VGT_FIFO_DEPTHS__CLIPP_FIFO_DEPTH_MASK 0x3fff00
#define VGT_FIFO_DEPTHS__CLIPP_FIFO_DEPTH__SHIFT 0x8
#define VGT_FIFO_DEPTHS__RESERVED_1_MASK 0x400000
#define VGT_FIFO_DEPTHS__RESERVED_1__SHIFT 0x16
#define VGT_GS_PER_ES__GS_PER_ES_MASK 0x7ff
#define VGT_GS_PER_ES__GS_PER_ES__SHIFT 0x0
#define VGT_ES_PER_GS__ES_PER_GS_MASK 0x7ff
#define VGT_ES_PER_GS__ES_PER_GS__SHIFT 0x0
#define VGT_GS_PER_VS__GS_PER_VS_MASK 0xf
#define VGT_GS_PER_VS__GS_PER_VS__SHIFT 0x0
#define VGT_GS_VERTEX_REUSE__VERT_REUSE_MASK 0x1f
#define VGT_GS_VERTEX_REUSE__VERT_REUSE__SHIFT 0x0
#define VGT_MC_LAT_CNTL__MC_TIME_STAMP_RES_MASK 0x3
#define VGT_MC_LAT_CNTL__MC_TIME_STAMP_RES__SHIFT 0x0
#define IA_CNTL_STATUS__IA_BUSY_MASK 0x1
#define IA_CNTL_STATUS__IA_BUSY__SHIFT 0x0
#define IA_CNTL_STATUS__IA_DMA_BUSY_MASK 0x2
#define IA_CNTL_STATUS__IA_DMA_BUSY__SHIFT 0x1
#define IA_CNTL_STATUS__IA_DMA_REQ_BUSY_MASK 0x4
#define IA_CNTL_STATUS__IA_DMA_REQ_BUSY__SHIFT 0x2
#define IA_CNTL_STATUS__IA_GRP_BUSY_MASK 0x8
#define IA_CNTL_STATUS__IA_GRP_BUSY__SHIFT 0x3
#define IA_CNTL_STATUS__IA_ADC_BUSY_MASK 0x10
#define IA_CNTL_STATUS__IA_ADC_BUSY__SHIFT 0x4
#define VGT_STRMOUT_CONFIG__STREAMOUT_0_EN_MASK 0x1
#define VGT_STRMOUT_CONFIG__STREAMOUT_0_EN__SHIFT 0x0
#define VGT_STRMOUT_CONFIG__STREAMOUT_1_EN_MASK 0x2
#define VGT_STRMOUT_CONFIG__STREAMOUT_1_EN__SHIFT 0x1
#define VGT_STRMOUT_CONFIG__STREAMOUT_2_EN_MASK 0x4
#define VGT_STRMOUT_CONFIG__STREAMOUT_2_EN__SHIFT 0x2
#define VGT_STRMOUT_CONFIG__STREAMOUT_3_EN_MASK 0x8
#define VGT_STRMOUT_CONFIG__STREAMOUT_3_EN__SHIFT 0x3
#define VGT_STRMOUT_CONFIG__RAST_STREAM_MASK 0x70
#define VGT_STRMOUT_CONFIG__RAST_STREAM__SHIFT 0x4
#define VGT_STRMOUT_CONFIG__RAST_STREAM_MASK_MASK 0xf00
#define VGT_STRMOUT_CONFIG__RAST_STREAM_MASK__SHIFT 0x8
#define VGT_STRMOUT_CONFIG__USE_RAST_STREAM_MASK_MASK 0x80000000
#define VGT_STRMOUT_CONFIG__USE_RAST_STREAM_MASK__SHIFT 0x1f
#define VGT_STRMOUT_BUFFER_SIZE_0__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_SIZE_0__SIZE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_SIZE_1__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_SIZE_1__SIZE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_SIZE_2__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_SIZE_2__SIZE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_SIZE_3__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_SIZE_3__SIZE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_OFFSET_0__OFFSET_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_OFFSET_0__OFFSET__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_OFFSET_1__OFFSET_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_OFFSET_1__OFFSET__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_OFFSET_2__OFFSET_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_OFFSET_2__OFFSET__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_OFFSET_3__OFFSET_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_OFFSET_3__OFFSET__SHIFT 0x0
#define VGT_STRMOUT_VTX_STRIDE_0__STRIDE_MASK 0x3ff
#define VGT_STRMOUT_VTX_STRIDE_0__STRIDE__SHIFT 0x0
#define VGT_STRMOUT_VTX_STRIDE_1__STRIDE_MASK 0x3ff
#define VGT_STRMOUT_VTX_STRIDE_1__STRIDE__SHIFT 0x0
#define VGT_STRMOUT_VTX_STRIDE_2__STRIDE_MASK 0x3ff
#define VGT_STRMOUT_VTX_STRIDE_2__STRIDE__SHIFT 0x0
#define VGT_STRMOUT_VTX_STRIDE_3__STRIDE_MASK 0x3ff
#define VGT_STRMOUT_VTX_STRIDE_3__STRIDE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_0_BUFFER_EN_MASK 0xf
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_0_BUFFER_EN__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_1_BUFFER_EN_MASK 0xf0
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_1_BUFFER_EN__SHIFT 0x4
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_2_BUFFER_EN_MASK 0xf00
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_2_BUFFER_EN__SHIFT 0x8
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_3_BUFFER_EN_MASK 0xf000
#define VGT_STRMOUT_BUFFER_CONFIG__STREAM_3_BUFFER_EN__SHIFT 0xc
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_0__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_0__SIZE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_1__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_1__SIZE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_2__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_2__SIZE__SHIFT 0x0
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_3__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_BUFFER_FILLED_SIZE_3__SIZE__SHIFT 0x0
#define VGT_STRMOUT_DRAW_OPAQUE_OFFSET__OFFSET_MASK 0xffffffff
#define VGT_STRMOUT_DRAW_OPAQUE_OFFSET__OFFSET__SHIFT 0x0
#define VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE__SIZE_MASK 0xffffffff
#define VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE__SIZE__SHIFT 0x0
#define VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE__VERTEX_STRIDE_MASK 0x1ff
#define VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE__VERTEX_STRIDE__SHIFT 0x0
#define VGT_GS_MAX_VERT_OUT__MAX_VERT_OUT_MASK 0x7ff
#define VGT_GS_MAX_VERT_OUT__MAX_VERT_OUT__SHIFT 0x0
#define IA_VMID_OVERRIDE__ENABLE_MASK 0x1
#define IA_VMID_OVERRIDE__ENABLE__SHIFT 0x0
#define IA_VMID_OVERRIDE__VMID_MASK 0x1e
#define IA_VMID_OVERRIDE__VMID__SHIFT 0x1
#define VGT_SHADER_STAGES_EN__LS_EN_MASK 0x3
#define VGT_SHADER_STAGES_EN__LS_EN__SHIFT 0x0
#define VGT_SHADER_STAGES_EN__HS_EN_MASK 0x4
#define VGT_SHADER_STAGES_EN__HS_EN__SHIFT 0x2
#define VGT_SHADER_STAGES_EN__ES_EN_MASK 0x18
#define VGT_SHADER_STAGES_EN__ES_EN__SHIFT 0x3
#define VGT_SHADER_STAGES_EN__GS_EN_MASK 0x20
#define VGT_SHADER_STAGES_EN__GS_EN__SHIFT 0x5
#define VGT_SHADER_STAGES_EN__VS_EN_MASK 0xc0
#define VGT_SHADER_STAGES_EN__VS_EN__SHIFT 0x6
#define VGT_SHADER_STAGES_EN__DYNAMIC_HS_MASK 0x100
#define VGT_SHADER_STAGES_EN__DYNAMIC_HS__SHIFT 0x8
#define VGT_DISPATCH_DRAW_INDEX__MATCH_INDEX_MASK 0xffffffff
#define VGT_DISPATCH_DRAW_INDEX__MATCH_INDEX__SHIFT 0x0
#define VGT_LS_HS_CONFIG__NUM_PATCHES_MASK 0xff
#define VGT_LS_HS_CONFIG__NUM_PATCHES__SHIFT 0x0
#define VGT_LS_HS_CONFIG__HS_NUM_INPUT_CP_MASK 0x3f00
#define VGT_LS_HS_CONFIG__HS_NUM_INPUT_CP__SHIFT 0x8
#define VGT_LS_HS_CONFIG__HS_NUM_OUTPUT_CP_MASK 0xfc000
#define VGT_LS_HS_CONFIG__HS_NUM_OUTPUT_CP__SHIFT 0xe
#define VGT_DMA_LS_HS_CONFIG__HS_NUM_INPUT_CP_MASK 0x3f00
#define VGT_DMA_LS_HS_CONFIG__HS_NUM_INPUT_CP__SHIFT 0x8
#define VGT_TF_PARAM__TYPE_MASK 0x3
#define VGT_TF_PARAM__TYPE__SHIFT 0x0
#define VGT_TF_PARAM__PARTITIONING_MASK 0x1c
#define VGT_TF_PARAM__PARTITIONING__SHIFT 0x2
#define VGT_TF_PARAM__TOPOLOGY_MASK 0xe0
#define VGT_TF_PARAM__TOPOLOGY__SHIFT 0x5
#define VGT_TF_PARAM__RESERVED_REDUC_AXIS_MASK 0x100
#define VGT_TF_PARAM__RESERVED_REDUC_AXIS__SHIFT 0x8
#define VGT_TF_PARAM__DEPRECATED_MASK 0x200
#define VGT_TF_PARAM__DEPRECATED__SHIFT 0x9
#define VGT_TF_PARAM__NUM_DS_WAVES_PER_SIMD_MASK 0x3c00
#define VGT_TF_PARAM__NUM_DS_WAVES_PER_SIMD__SHIFT 0xa
#define VGT_TF_PARAM__DISABLE_DONUTS_MASK 0x4000
#define VGT_TF_PARAM__DISABLE_DONUTS__SHIFT 0xe
#define VGT_TF_PARAM__RDREQ_POLICY_MASK 0x18000
#define VGT_TF_PARAM__RDREQ_POLICY__SHIFT 0xf
#define VGT_TF_RING_SIZE__SIZE_MASK 0xffff
#define VGT_TF_RING_SIZE__SIZE__SHIFT 0x0
#define VGT_SYS_CONFIG__DUAL_CORE_EN_MASK 0x1
#define VGT_SYS_CONFIG__DUAL_CORE_EN__SHIFT 0x0
#define VGT_SYS_CONFIG__MAX_LS_HS_THDGRP_MASK 0x7e
#define VGT_SYS_CONFIG__MAX_LS_HS_THDGRP__SHIFT 0x1
#define VGT_SYS_CONFIG__ADC_EVENT_FILTER_DISABLE_MASK 0x80
#define VGT_SYS_CONFIG__ADC_EVENT_FILTER_DISABLE__SHIFT 0x7
#define VGT_HS_OFFCHIP_PARAM__OFFCHIP_BUFFERING_MASK 0x1ff
#define VGT_HS_OFFCHIP_PARAM__OFFCHIP_BUFFERING__SHIFT 0x0
#define VGT_HS_OFFCHIP_PARAM__OFFCHIP_GRANULARITY_MASK 0x600
#define VGT_HS_OFFCHIP_PARAM__OFFCHIP_GRANULARITY__SHIFT 0x9
#define VGT_TF_MEMORY_BASE__BASE_MASK 0xffffffff
#define VGT_TF_MEMORY_BASE__BASE__SHIFT 0x0
#define VGT_GS_INSTANCE_CNT__ENABLE_MASK 0x1
#define VGT_GS_INSTANCE_CNT__ENABLE__SHIFT 0x0
#define VGT_GS_INSTANCE_CNT__CNT_MASK 0x1fc
#define VGT_GS_INSTANCE_CNT__CNT__SHIFT 0x2
#define IA_MULTI_VGT_PARAM__PRIMGROUP_SIZE_MASK 0xffff
#define IA_MULTI_VGT_PARAM__PRIMGROUP_SIZE__SHIFT 0x0
#define IA_MULTI_VGT_PARAM__PARTIAL_VS_WAVE_ON_MASK 0x10000
#define IA_MULTI_VGT_PARAM__PARTIAL_VS_WAVE_ON__SHIFT 0x10
#define IA_MULTI_VGT_PARAM__SWITCH_ON_EOP_MASK 0x20000
#define IA_MULTI_VGT_PARAM__SWITCH_ON_EOP__SHIFT 0x11
#define IA_MULTI_VGT_PARAM__PARTIAL_ES_WAVE_ON_MASK 0x40000
#define IA_MULTI_VGT_PARAM__PARTIAL_ES_WAVE_ON__SHIFT 0x12
#define IA_MULTI_VGT_PARAM__SWITCH_ON_EOI_MASK 0x80000
#define IA_MULTI_VGT_PARAM__SWITCH_ON_EOI__SHIFT 0x13
#define IA_MULTI_VGT_PARAM__WD_SWITCH_ON_EOP_MASK 0x100000
#define IA_MULTI_VGT_PARAM__WD_SWITCH_ON_EOP__SHIFT 0x14
#define VGT_VS_MAX_WAVE_ID__MAX_WAVE_ID_MASK 0xfff
#define VGT_VS_MAX_WAVE_ID__MAX_WAVE_ID__SHIFT 0x0
#define VGT_ESGS_RING_SIZE__MEM_SIZE_MASK 0xffffffff
#define VGT_ESGS_RING_SIZE__MEM_SIZE__SHIFT 0x0
#define VGT_GSVS_RING_SIZE__MEM_SIZE_MASK 0xffffffff
#define VGT_GSVS_RING_SIZE__MEM_SIZE__SHIFT 0x0
#define VGT_GSVS_RING_OFFSET_1__OFFSET_MASK 0x7fff
#define VGT_GSVS_RING_OFFSET_1__OFFSET__SHIFT 0x0
#define VGT_GSVS_RING_OFFSET_2__OFFSET_MASK 0x7fff
#define VGT_GSVS_RING_OFFSET_2__OFFSET__SHIFT 0x0
#define VGT_GSVS_RING_OFFSET_3__OFFSET_MASK 0x7fff
#define VGT_GSVS_RING_OFFSET_3__OFFSET__SHIFT 0x0
#define VGT_ESGS_RING_ITEMSIZE__ITEMSIZE_MASK 0x7fff
#define VGT_ESGS_RING_ITEMSIZE__ITEMSIZE__SHIFT 0x0
#define VGT_GSVS_RING_ITEMSIZE__ITEMSIZE_MASK 0x7fff
#define VGT_GSVS_RING_ITEMSIZE__ITEMSIZE__SHIFT 0x0
#define VGT_GS_VERT_ITEMSIZE__ITEMSIZE_MASK 0x7fff
#define VGT_GS_VERT_ITEMSIZE__ITEMSIZE__SHIFT 0x0
#define VGT_GS_VERT_ITEMSIZE_1__ITEMSIZE_MASK 0x7fff
#define VGT_GS_VERT_ITEMSIZE_1__ITEMSIZE__SHIFT 0x0
#define VGT_GS_VERT_ITEMSIZE_2__ITEMSIZE_MASK 0x7fff
#define VGT_GS_VERT_ITEMSIZE_2__ITEMSIZE__SHIFT 0x0
#define VGT_GS_VERT_ITEMSIZE_3__ITEMSIZE_MASK 0x7fff
#define VGT_GS_VERT_ITEMSIZE_3__ITEMSIZE__SHIFT 0x0
#define WD_CNTL_STATUS__WD_BUSY_MASK 0x1
#define WD_CNTL_STATUS__WD_BUSY__SHIFT 0x0
#define WD_CNTL_STATUS__WD_SPL_DMA_BUSY_MASK 0x2
#define WD_CNTL_STATUS__WD_SPL_DMA_BUSY__SHIFT 0x1
#define WD_CNTL_STATUS__WD_SPL_DI_BUSY_MASK 0x4
#define WD_CNTL_STATUS__WD_SPL_DI_BUSY__SHIFT 0x2
#define WD_CNTL_STATUS__WD_ADC_BUSY_MASK 0x8
#define WD_CNTL_STATUS__WD_ADC_BUSY__SHIFT 0x3
#define WD_ENHANCE__MISC_MASK 0xffffffff
#define WD_ENHANCE__MISC__SHIFT 0x0
#define GFX_PIPE_CONTROL__HYSTERESIS_CNT_MASK 0x1fff
#define GFX_PIPE_CONTROL__HYSTERESIS_CNT__SHIFT 0x0
#define GFX_PIPE_CONTROL__RESERVED_MASK 0xe000
#define GFX_PIPE_CONTROL__RESERVED__SHIFT 0xd
#define GFX_PIPE_CONTROL__CONTEXT_SUSPEND_EN_MASK 0x10000
#define GFX_PIPE_CONTROL__CONTEXT_SUSPEND_EN__SHIFT 0x10
#define GFX_PIPE_PRIORITY__HP_PIPE_SELECT_MASK 0x1
#define GFX_PIPE_PRIORITY__HP_PIPE_SELECT__SHIFT 0x0
#define CGTT_VGT_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_VGT_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_VGT_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_VGT_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_VGT_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_VGT_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_VGT_CLK_CTRL__PERF_ENABLE_MASK 0x2000000
#define CGTT_VGT_CLK_CTRL__PERF_ENABLE__SHIFT 0x19
#define CGTT_VGT_CLK_CTRL__DBG_ENABLE_MASK 0x4000000
#define CGTT_VGT_CLK_CTRL__DBG_ENABLE__SHIFT 0x1a
#define CGTT_VGT_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_VGT_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_VGT_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_VGT_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_VGT_CLK_CTRL__GS_OVERRIDE_MASK 0x20000000
#define CGTT_VGT_CLK_CTRL__GS_OVERRIDE__SHIFT 0x1d
#define CGTT_VGT_CLK_CTRL__CORE_OVERRIDE_MASK 0x40000000
#define CGTT_VGT_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1e
#define CGTT_VGT_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_VGT_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define CGTT_IA_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_IA_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_IA_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_IA_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_IA_CLK_CTRL__PERF_ENABLE_MASK 0x2000000
#define CGTT_IA_CLK_CTRL__PERF_ENABLE__SHIFT 0x19
#define CGTT_IA_CLK_CTRL__DBG_ENABLE_MASK 0x4000000
#define CGTT_IA_CLK_CTRL__DBG_ENABLE__SHIFT 0x1a
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000
#define CGTT_IA_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d
#define CGTT_IA_CLK_CTRL__CORE_OVERRIDE_MASK 0x40000000
#define CGTT_IA_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1e
#define CGTT_IA_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_IA_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define CGTT_WD_CLK_CTRL__ON_DELAY_MASK 0xf
#define CGTT_WD_CLK_CTRL__ON_DELAY__SHIFT 0x0
#define CGTT_WD_CLK_CTRL__OFF_HYSTERESIS_MASK 0xff0
#define CGTT_WD_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4
#define CGTT_WD_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x1000000
#define CGTT_WD_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18
#define CGTT_WD_CLK_CTRL__PERF_ENABLE_MASK 0x2000000
#define CGTT_WD_CLK_CTRL__PERF_ENABLE__SHIFT 0x19
#define CGTT_WD_CLK_CTRL__DBG_ENABLE_MASK 0x4000000
#define CGTT_WD_CLK_CTRL__DBG_ENABLE__SHIFT 0x1a
#define CGTT_WD_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x8000000
#define CGTT_WD_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b
#define CGTT_WD_CLK_CTRL__ADC_OVERRIDE_MASK 0x10000000
#define CGTT_WD_CLK_CTRL__ADC_OVERRIDE__SHIFT 0x1c
#define CGTT_WD_CLK_CTRL__CORE_OVERRIDE_MASK 0x20000000
#define CGTT_WD_CLK_CTRL__CORE_OVERRIDE__SHIFT 0x1d
#define CGTT_WD_CLK_CTRL__RBIU_INPUT_OVERRIDE_MASK 0x40000000
#define CGTT_WD_CLK_CTRL__RBIU_INPUT_OVERRIDE__SHIFT 0x1e
#define CGTT_WD_CLK_CTRL__REG_OVERRIDE_MASK 0x80000000
#define CGTT_WD_CLK_CTRL__REG_OVERRIDE__SHIFT 0x1f
#define VGT_DEBUG_CNTL__VGT_DEBUG_INDX_MASK 0x3f
#define VGT_DEBUG_CNTL__VGT_DEBUG_INDX__SHIFT 0x0
#define VGT_DEBUG_CNTL__VGT_DEBUG_SEL_BUS_B_MASK 0x40
#define VGT_DEBUG_CNTL__VGT_DEBUG_SEL_BUS_B__SHIFT 0x6
#define VGT_DEBUG_DATA__DATA_MASK 0xffffffff
#define VGT_DEBUG_DATA__DATA__SHIFT 0x0
#define IA_DEBUG_CNTL__IA_DEBUG_INDX_MASK 0x3f
#define IA_DEBUG_CNTL__IA_DEBUG_INDX__SHIFT 0x0
#define IA_DEBUG_CNTL__IA_DEBUG_SEL_BUS_B_MASK 0x40
#define IA_DEBUG_CNTL__IA_DEBUG_SEL_BUS_B__SHIFT 0x6
#define IA_DEBUG_DATA__DATA_MASK 0xffffffff
#define IA_DEBUG_DATA__DATA__SHIFT 0x0
#define VGT_CNTL_STATUS__VGT_BUSY_MASK 0x1
#define VGT_CNTL_STATUS__VGT_BUSY__SHIFT 0x0
#define VGT_CNTL_STATUS__VGT_OUT_INDX_BUSY_MASK 0x2
#define VGT_CNTL_STATUS__VGT_OUT_INDX_BUSY__SHIFT 0x1
#define VGT_CNTL_STATUS__VGT_OUT_BUSY_MASK 0x4
#define VGT_CNTL_STATUS__VGT_OUT_BUSY__SHIFT 0x2
#define VGT_CNTL_STATUS__VGT_PT_BUSY_MASK 0x8
#define VGT_CNTL_STATUS__VGT_PT_BUSY__SHIFT 0x3
#define VGT_CNTL_STATUS__VGT_TE_BUSY_MASK 0x10
#define VGT_CNTL_STATUS__VGT_TE_BUSY__SHIFT 0x4
#define VGT_CNTL_STATUS__VGT_VR_BUSY_MASK 0x20
#define VGT_CNTL_STATUS__VGT_VR_BUSY__SHIFT 0x5
#define VGT_CNTL_STATUS__VGT_PI_BUSY_MASK 0x40
#define VGT_CNTL_STATUS__VGT_PI_BUSY__SHIFT 0x6
#define VGT_CNTL_STATUS__VGT_GS_BUSY_MASK 0x80
#define VGT_CNTL_STATUS__VGT_GS_BUSY__SHIFT 0x7
#define VGT_CNTL_STATUS__VGT_HS_BUSY_MASK 0x100
#define VGT_CNTL_STATUS__VGT_HS_BUSY__SHIFT 0x8
#define VGT_CNTL_STATUS__VGT_TE11_BUSY_MASK 0x200
#define VGT_CNTL_STATUS__VGT_TE11_BUSY__SHIFT 0x9
#define WD_DEBUG_CNTL__WD_DEBUG_INDX_MASK 0x3f
#define WD_DEBUG_CNTL__WD_DEBUG_INDX__SHIFT 0x0
#define WD_DEBUG_CNTL__WD_DEBUG_SEL_BUS_B_MASK 0x40
#define WD_DEBUG_CNTL__WD_DEBUG_SEL_BUS_B__SHIFT 0x6
#define WD_DEBUG_DATA__DATA_MASK 0xffffffff
#define WD_DEBUG_DATA__DATA__SHIFT 0x0
#define CC_GC_PRIM_CONFIG__INACTIVE_IA_MASK 0x30000
#define CC_GC_PRIM_CONFIG__INACTIVE_IA__SHIFT 0x10
#define CC_GC_PRIM_CONFIG__INACTIVE_VGT_PA_MASK 0xf000000
#define CC_GC_PRIM_CONFIG__INACTIVE_VGT_PA__SHIFT 0x18
#define GC_USER_PRIM_CONFIG__INACTIVE_IA_MASK 0x30000
#define GC_USER_PRIM_CONFIG__INACTIVE_IA__SHIFT 0x10
#define GC_USER_PRIM_CONFIG__INACTIVE_VGT_PA_MASK 0xf000000
#define GC_USER_PRIM_CONFIG__INACTIVE_VGT_PA__SHIFT 0x18
#define WD_DEBUG_REG0__wd_busy_extended_MASK 0x1
#define WD_DEBUG_REG0__wd_busy_extended__SHIFT 0x0
#define WD_DEBUG_REG0__wd_nodma_busy_extended_MASK 0x2
#define WD_DEBUG_REG0__wd_nodma_busy_extended__SHIFT 0x1
#define WD_DEBUG_REG0__wd_busy_MASK 0x4
#define WD_DEBUG_REG0__wd_busy__SHIFT 0x2
#define WD_DEBUG_REG0__wd_nodma_busy_MASK 0x8
#define WD_DEBUG_REG0__wd_nodma_busy__SHIFT 0x3
#define WD_DEBUG_REG0__rbiu_busy_MASK 0x10
#define WD_DEBUG_REG0__rbiu_busy__SHIFT 0x4
#define WD_DEBUG_REG0__spl_dma_busy_MASK 0x20
#define WD_DEBUG_REG0__spl_dma_busy__SHIFT 0x5
#define WD_DEBUG_REG0__spl_di_busy_MASK 0x40
#define WD_DEBUG_REG0__spl_di_busy__SHIFT 0x6
#define WD_DEBUG_REG0__vgt0_active_q_MASK 0x80
#define WD_DEBUG_REG0__vgt0_active_q__SHIFT 0x7
#define WD_DEBUG_REG0__vgt1_active_q_MASK 0x100
#define WD_DEBUG_REG0__vgt1_active_q__SHIFT 0x8
#define WD_DEBUG_REG0__spl_dma_p1_busy_MASK 0x200
#define WD_DEBUG_REG0__spl_dma_p1_busy__SHIFT 0x9
#define WD_DEBUG_REG0__rbiu_dr_p1_fifo_busy_MASK 0x400
#define WD_DEBUG_REG0__rbiu_dr_p1_fifo_busy__SHIFT 0xa
#define WD_DEBUG_REG0__rbiu_di_p1_fifo_busy_MASK 0x800
#define WD_DEBUG_REG0__rbiu_di_p1_fifo_busy__SHIFT 0xb
#define WD_DEBUG_REG0__SPARE2_MASK 0x1000
#define WD_DEBUG_REG0__SPARE2__SHIFT 0xc
#define WD_DEBUG_REG0__rbiu_dr_fifo_busy_MASK 0x2000
#define WD_DEBUG_REG0__rbiu_dr_fifo_busy__SHIFT 0xd
#define WD_DEBUG_REG0__rbiu_spl_dr_valid_MASK 0x4000
#define WD_DEBUG_REG0__rbiu_spl_dr_valid__SHIFT 0xe
#define WD_DEBUG_REG0__spl_rbiu_dr_read_MASK 0x8000
#define WD_DEBUG_REG0__spl_rbiu_dr_read__SHIFT 0xf
#define WD_DEBUG_REG0__SPARE3_MASK 0x10000
#define WD_DEBUG_REG0__SPARE3__SHIFT 0x10
#define WD_DEBUG_REG0__rbiu_di_fifo_busy_MASK 0x20000
#define WD_DEBUG_REG0__rbiu_di_fifo_busy__SHIFT 0x11
#define WD_DEBUG_REG0__rbiu_spl_di_valid_MASK 0x40000
#define WD_DEBUG_REG0__rbiu_spl_di_valid__SHIFT 0x12
#define WD_DEBUG_REG0__spl_rbiu_di_read_MASK 0x80000
#define WD_DEBUG_REG0__spl_rbiu_di_read__SHIFT 0x13
#define WD_DEBUG_REG0__se0_synced_q_MASK 0x100000
#define WD_DEBUG_REG0__se0_synced_q__SHIFT 0x14
#define WD_DEBUG_REG0__se1_synced_q_MASK 0x200000
#define WD_DEBUG_REG0__se1_synced_q__SHIFT 0x15
#define WD_DEBUG_REG0__se2_synced_q_MASK 0x400000
#define WD_DEBUG_REG0__se2_synced_q__SHIFT 0x16
#define WD_DEBUG_REG0__se3_synced_q_MASK 0x800000
#define WD_DEBUG_REG0__se3_synced_q__SHIFT 0x17
#define WD_DEBUG_REG0__reg_clk_busy_MASK 0x1000000
#define WD_DEBUG_REG0__reg_clk_busy__SHIFT 0x18
#define WD_DEBUG_REG0__input_clk_busy_MASK 0x2000000
#define WD_DEBUG_REG0__input_clk_busy__SHIFT 0x19
#define WD_DEBUG_REG0__core_clk_busy_MASK 0x4000000
#define WD_DEBUG_REG0__core_clk_busy__SHIFT 0x1a
#define WD_DEBUG_REG0__vgt2_active_q_MASK 0x8000000
#define WD_DEBUG_REG0__vgt2_active_q__SHIFT 0x1b
#define WD_DEBUG_REG0__sclk_reg_vld_MASK 0x10000000
#define WD_DEBUG_REG0__sclk_reg_vld__SHIFT 0x1c
#define WD_DEBUG_REG0__sclk_input_vld_MASK 0x20000000
#define WD_DEBUG_REG0__sclk_input_vld__SHIFT 0x1d
#define WD_DEBUG_REG0__sclk_core_vld_MASK 0x40000000
#define WD_DEBUG_REG0__sclk_core_vld__SHIFT 0x1e
#define WD_DEBUG_REG0__vgt3_active_q_MASK 0x80000000
#define WD_DEBUG_REG0__vgt3_active_q__SHIFT 0x1f
#define WD_DEBUG_REG1__grbm_fifo_empty_MASK 0x1
#define WD_DEBUG_REG1__grbm_fifo_empty__SHIFT 0x0
#define WD_DEBUG_REG1__grbm_fifo_full_MASK 0x2
#define WD_DEBUG_REG1__grbm_fifo_full__SHIFT 0x1
#define WD_DEBUG_REG1__grbm_fifo_we_MASK 0x4
#define WD_DEBUG_REG1__grbm_fifo_we__SHIFT 0x2
#define WD_DEBUG_REG1__grbm_fifo_re_MASK 0x8
#define WD_DEBUG_REG1__grbm_fifo_re__SHIFT 0x3
#define WD_DEBUG_REG1__draw_initiator_valid_q_MASK 0x10
#define WD_DEBUG_REG1__draw_initiator_valid_q__SHIFT 0x4
#define WD_DEBUG_REG1__event_initiator_valid_q_MASK 0x20
#define WD_DEBUG_REG1__event_initiator_valid_q__SHIFT 0x5
#define WD_DEBUG_REG1__event_addr_valid_q_MASK 0x40
#define WD_DEBUG_REG1__event_addr_valid_q__SHIFT 0x6
#define WD_DEBUG_REG1__dma_request_valid_q_MASK 0x80
#define WD_DEBUG_REG1__dma_request_valid_q__SHIFT 0x7
#define WD_DEBUG_REG1__SPARE0_MASK 0x100
#define WD_DEBUG_REG1__SPARE0__SHIFT 0x8
#define WD_DEBUG_REG1__min_indx_valid_q_MASK 0x200
#define WD_DEBUG_REG1__min_indx_valid_q__SHIFT 0x9
#define WD_DEBUG_REG1__max_indx_valid_q_MASK 0x400
#define WD_DEBUG_REG1__max_indx_valid_q__SHIFT 0xa
#define WD_DEBUG_REG1__indx_offset_valid_q_MASK 0x800
#define WD_DEBUG_REG1__indx_offset_valid_q__SHIFT 0xb
#define WD_DEBUG_REG1__grbm_fifo_rdata_reg_id_MASK 0x1f000
#define WD_DEBUG_REG1__grbm_fifo_rdata_reg_id__SHIFT 0xc
#define WD_DEBUG_REG1__grbm_fifo_rdata_state_MASK 0xe0000
#define WD_DEBUG_REG1__grbm_fifo_rdata_state__SHIFT 0x11
#define WD_DEBUG_REG1__free_cnt_q_MASK 0x3f00000
#define WD_DEBUG_REG1__free_cnt_q__SHIFT 0x14
#define WD_DEBUG_REG1__rbiu_di_fifo_we_MASK 0x4000000
#define WD_DEBUG_REG1__rbiu_di_fifo_we__SHIFT 0x1a
#define WD_DEBUG_REG1__rbiu_dr_fifo_we_MASK 0x8000000
#define WD_DEBUG_REG1__rbiu_dr_fifo_we__SHIFT 0x1b
#define WD_DEBUG_REG1__rbiu_di_fifo_empty_MASK 0x10000000
#define WD_DEBUG_REG1__rbiu_di_fifo_empty__SHIFT 0x1c
#define WD_DEBUG_REG1__rbiu_di_fifo_full_MASK 0x20000000
#define WD_DEBUG_REG1__rbiu_di_fifo_full__SHIFT 0x1d
#define WD_DEBUG_REG1__rbiu_dr_fifo_empty_MASK 0x40000000
#define WD_DEBUG_REG1__rbiu_dr_fifo_empty__SHIFT 0x1e
#define WD_DEBUG_REG1__rbiu_dr_fifo_full_MASK 0x80000000
#define WD_DEBUG_REG1__rbiu_dr_fifo_full__SHIFT 0x1f
#define WD_DEBUG_REG2__p1_grbm_fifo_empty_MASK 0x1
#define WD_DEBUG_REG2__p1_grbm_fifo_empty__SHIFT 0x0
#define WD_DEBUG_REG2__p1_grbm_fifo_full_MASK 0x2
#define WD_DEBUG_REG2__p1_grbm_fifo_full__SHIFT 0x1
#define WD_DEBUG_REG2__p1_grbm_fifo_we_MASK 0x4
#define WD_DEBUG_REG2__p1_grbm_fifo_we__SHIFT 0x2
#define WD_DEBUG_REG2__p1_grbm_fifo_re_MASK 0x8
#define WD_DEBUG_REG2__p1_grbm_fifo_re__SHIFT 0x3
#define WD_DEBUG_REG2__p1_draw_initiator_valid_q_MASK 0x10
#define WD_DEBUG_REG2__p1_draw_initiator_valid_q__SHIFT 0x4
#define WD_DEBUG_REG2__p1_event_initiator_valid_q_MASK 0x20
#define WD_DEBUG_REG2__p1_event_initiator_valid_q__SHIFT 0x5
#define WD_DEBUG_REG2__p1_event_addr_valid_q_MASK 0x40
#define WD_DEBUG_REG2__p1_event_addr_valid_q__SHIFT 0x6
#define WD_DEBUG_REG2__p1_dma_request_valid_q_MASK 0x80
#define WD_DEBUG_REG2__p1_dma_request_valid_q__SHIFT 0x7
#define WD_DEBUG_REG2__SPARE0_MASK 0x100
#define WD_DEBUG_REG2__SPARE0__SHIFT 0x8
#define WD_DEBUG_REG2__p1_min_indx_valid_q_MASK 0x200
#define WD_DEBUG_REG2__p1_min_indx_valid_q__SHIFT 0x9
#define WD_DEBUG_REG2__p1_max_indx_valid_q_MASK 0x400
#define WD_DEBUG_REG2__p1_max_indx_valid_q__SHIFT 0xa
#define WD_DEBUG_REG2__p1_indx_offset_valid_q_MASK 0x800
#define WD_DEBUG_REG2__p1_indx_offset_valid_q__SHIFT 0xb
#define WD_DEBUG_REG2__p1_grbm_fifo_rdata_reg_id_MASK 0x1f000
#define WD_DEBUG_REG2__p1_grbm_fifo_rdata_reg_id__SHIFT 0xc
#define WD_DEBUG_REG2__p1_grbm_fifo_rdata_state_MASK 0xe0000
#define WD_DEBUG_REG2__p1_grbm_fifo_rdata_state__SHIFT 0x11
#define WD_DEBUG_REG2__p1_free_cnt_q_MASK 0x3f00000
#define WD_DEBUG_REG2__p1_free_cnt_q__SHIFT 0x14
#define WD_DEBUG_REG2__p1_rbiu_di_fifo_we_MASK 0x4000000
#define WD_DEBUG_REG2__p1_rbiu_di_fifo_we__SHIFT 0x1a
#define WD_DEBUG_REG2__p1_rbiu_dr_fifo_we_MASK 0x8000000
#define WD_DEBUG_REG2__p1_rbiu_dr_fifo_we__SHIFT 0x1b
#define WD_DEBUG_REG2__p1_rbiu_di_fifo_empty_MASK 0x10000000
#define WD_DEBUG_REG2__p1_rbiu_di_fifo_empty__SHIFT 0x1c
#define WD_DEBUG_REG2__p1_rbiu_di_fifo_full_MASK 0x20000000
#define WD_DEBUG_REG2__p1_rbiu_di_fifo_full__SHIFT 0x1d
#define WD_DEBUG_REG2__p1_rbiu_dr_fifo_empty_MASK 0x40000000
#define WD_DEBUG_REG2__p1_rbiu_dr_fifo_empty__SHIFT 0x1e
#define WD_DEBUG_REG2__p1_rbiu_dr_fifo_full_MASK 0x80000000
#define WD_DEBUG_REG2__p1_rbiu_dr_fifo_full__SHIFT 0x1f
#define WD_DEBUG_REG3__rbiu_spl_dr_valid_MASK 0x1
#define WD_DEBUG_REG3__rbiu_spl_dr_valid__SHIFT 0x0
#define WD_DEBUG_REG3__SPARE0_MASK 0x2
#define WD_DEBUG_REG3__SPARE0__SHIFT 0x1
#define WD_DEBUG_REG3__pipe0_dr_MASK 0x4
#define WD_DEBUG_REG3__pipe0_dr__SHIFT 0x2
#define WD_DEBUG_REG3__pipe0_rtr_MASK 0x8
#define WD_DEBUG_REG3__pipe0_rtr__SHIFT 0x3
#define WD_DEBUG_REG3__pipe1_dr_MASK 0x10
#define WD_DEBUG_REG3__pipe1_dr__SHIFT 0x4
#define WD_DEBUG_REG3__pipe1_rtr_MASK 0x20
#define WD_DEBUG_REG3__pipe1_rtr__SHIFT 0x5
#define WD_DEBUG_REG3__wd_subdma_fifo_empty_MASK 0x40
#define WD_DEBUG_REG3__wd_subdma_fifo_empty__SHIFT 0x6
#define WD_DEBUG_REG3__wd_subdma_fifo_full_MASK 0x80
#define WD_DEBUG_REG3__wd_subdma_fifo_full__SHIFT 0x7
#define WD_DEBUG_REG3__dma_buf_type_p0_q_MASK 0x300
#define WD_DEBUG_REG3__dma_buf_type_p0_q__SHIFT 0x8
#define WD_DEBUG_REG3__dma_zero_indices_p0_q_MASK 0x400
#define WD_DEBUG_REG3__dma_zero_indices_p0_q__SHIFT 0xa
#define WD_DEBUG_REG3__dma_req_path_p3_q_MASK 0x800
#define WD_DEBUG_REG3__dma_req_path_p3_q__SHIFT 0xb
#define WD_DEBUG_REG3__dma_not_eop_p1_q_MASK 0x1000
#define WD_DEBUG_REG3__dma_not_eop_p1_q__SHIFT 0xc
#define WD_DEBUG_REG3__out_of_range_p4_MASK 0x2000
#define WD_DEBUG_REG3__out_of_range_p4__SHIFT 0xd
#define WD_DEBUG_REG3__last_sub_dma_p3_q_MASK 0x4000
#define WD_DEBUG_REG3__last_sub_dma_p3_q__SHIFT 0xe
#define WD_DEBUG_REG3__last_rdreq_of_sub_dma_p4_MASK 0x8000
#define WD_DEBUG_REG3__last_rdreq_of_sub_dma_p4__SHIFT 0xf
#define WD_DEBUG_REG3__WD_IA_dma_send_d_MASK 0x10000
#define WD_DEBUG_REG3__WD_IA_dma_send_d__SHIFT 0x10
#define WD_DEBUG_REG3__WD_IA_dma_rtr_MASK 0x20000
#define WD_DEBUG_REG3__WD_IA_dma_rtr__SHIFT 0x11
#define WD_DEBUG_REG3__WD_IA1_dma_send_d_MASK 0x40000
#define WD_DEBUG_REG3__WD_IA1_dma_send_d__SHIFT 0x12
#define WD_DEBUG_REG3__WD_IA1_dma_rtr_MASK 0x80000
#define WD_DEBUG_REG3__WD_IA1_dma_rtr__SHIFT 0x13
#define WD_DEBUG_REG3__last_inst_of_dma_p2_MASK 0x100000
#define WD_DEBUG_REG3__last_inst_of_dma_p2__SHIFT 0x14
#define WD_DEBUG_REG3__last_sd_of_inst_p2_MASK 0x200000
#define WD_DEBUG_REG3__last_sd_of_inst_p2__SHIFT 0x15
#define WD_DEBUG_REG3__last_sd_of_dma_p2_MASK 0x400000
#define WD_DEBUG_REG3__last_sd_of_dma_p2__SHIFT 0x16
#define WD_DEBUG_REG3__SPARE1_MASK 0x800000
#define WD_DEBUG_REG3__SPARE1__SHIFT 0x17
#define WD_DEBUG_REG3__WD_IA_dma_busy_MASK 0x1000000
#define WD_DEBUG_REG3__WD_IA_dma_busy__SHIFT 0x18
#define WD_DEBUG_REG3__WD_IA1_dma_busy_MASK 0x2000000
#define WD_DEBUG_REG3__WD_IA1_dma_busy__SHIFT 0x19
#define WD_DEBUG_REG3__send_to_ia1_p3_q_MASK 0x4000000
#define WD_DEBUG_REG3__send_to_ia1_p3_q__SHIFT 0x1a
#define WD_DEBUG_REG3__dma_wd_switch_on_eop_p3_q_MASK 0x8000000
#define WD_DEBUG_REG3__dma_wd_switch_on_eop_p3_q__SHIFT 0x1b
#define WD_DEBUG_REG3__pipe3_dr_MASK 0x10000000
#define WD_DEBUG_REG3__pipe3_dr__SHIFT 0x1c
#define WD_DEBUG_REG3__pipe3_rtr_MASK 0x20000000
#define WD_DEBUG_REG3__pipe3_rtr__SHIFT 0x1d
#define WD_DEBUG_REG3__wd_dma2draw_fifo_empty_MASK 0x40000000
#define WD_DEBUG_REG3__wd_dma2draw_fifo_empty__SHIFT 0x1e
#define WD_DEBUG_REG3__wd_dma2draw_fifo_full_MASK 0x80000000
#define WD_DEBUG_REG3__wd_dma2draw_fifo_full__SHIFT 0x1f
#define WD_DEBUG_REG4__rbiu_spl_di_valid_MASK 0x1
#define WD_DEBUG_REG4__rbiu_spl_di_valid__SHIFT 0x0
#define WD_DEBUG_REG4__spl_rbiu_di_read_MASK 0x2
#define WD_DEBUG_REG4__spl_rbiu_di_read__SHIFT 0x1
#define WD_DEBUG_REG4__rbiu_spl_p1_di_valid_MASK 0x4
#define WD_DEBUG_REG4__rbiu_spl_p1_di_valid__SHIFT 0x2
#define WD_DEBUG_REG4__spl_rbiu_p1_di_read_MASK 0x8
#define WD_DEBUG_REG4__spl_rbiu_p1_di_read__SHIFT 0x3
#define WD_DEBUG_REG4__pipe0_dr_MASK 0x10
#define WD_DEBUG_REG4__pipe0_dr__SHIFT 0x4
#define WD_DEBUG_REG4__pipe0_rtr_MASK 0x20
#define WD_DEBUG_REG4__pipe0_rtr__SHIFT 0x5
#define WD_DEBUG_REG4__pipe1_dr_MASK 0x40
#define WD_DEBUG_REG4__pipe1_dr__SHIFT 0x6
#define WD_DEBUG_REG4__pipe1_rtr_MASK 0x80
#define WD_DEBUG_REG4__pipe1_rtr__SHIFT 0x7
#define WD_DEBUG_REG4__pipe2_dr_MASK 0x100
#define WD_DEBUG_REG4__pipe2_dr__SHIFT 0x8
#define WD_DEBUG_REG4__pipe2_rtr_MASK 0x200
#define WD_DEBUG_REG4__pipe2_rtr__SHIFT 0x9
#define WD_DEBUG_REG4__pipe3_ld_MASK 0x400
#define WD_DEBUG_REG4__pipe3_ld__SHIFT 0xa
#define WD_DEBUG_REG4__pipe3_rtr_MASK 0x800
#define WD_DEBUG_REG4__pipe3_rtr__SHIFT 0xb
#define WD_DEBUG_REG4__WD_IA_draw_send_d_MASK 0x1000
#define WD_DEBUG_REG4__WD_IA_draw_send_d__SHIFT 0xc
#define WD_DEBUG_REG4__WD_IA_draw_rtr_MASK 0x2000
#define WD_DEBUG_REG4__WD_IA_draw_rtr__SHIFT 0xd
#define WD_DEBUG_REG4__di_type_p0_MASK 0xc000
#define WD_DEBUG_REG4__di_type_p0__SHIFT 0xe
#define WD_DEBUG_REG4__di_state_sel_p1_q_MASK 0x70000
#define WD_DEBUG_REG4__di_state_sel_p1_q__SHIFT 0x10
#define WD_DEBUG_REG4__di_wd_switch_on_eop_p1_q_MASK 0x80000
#define WD_DEBUG_REG4__di_wd_switch_on_eop_p1_q__SHIFT 0x13
#define WD_DEBUG_REG4__rbiu_spl_pipe0_lockout_MASK 0x100000
#define WD_DEBUG_REG4__rbiu_spl_pipe0_lockout__SHIFT 0x14
#define WD_DEBUG_REG4__last_inst_of_di_p2_MASK 0x200000
#define WD_DEBUG_REG4__last_inst_of_di_p2__SHIFT 0x15
#define WD_DEBUG_REG4__last_sd_of_inst_p2_MASK 0x400000
#define WD_DEBUG_REG4__last_sd_of_inst_p2__SHIFT 0x16
#define WD_DEBUG_REG4__last_sd_of_di_p2_MASK 0x800000
#define WD_DEBUG_REG4__last_sd_of_di_p2__SHIFT 0x17
#define WD_DEBUG_REG4__not_eop_wait_p1_q_MASK 0x1000000
#define WD_DEBUG_REG4__not_eop_wait_p1_q__SHIFT 0x18
#define WD_DEBUG_REG4__not_eop_wait_q_MASK 0x2000000
#define WD_DEBUG_REG4__not_eop_wait_q__SHIFT 0x19
#define WD_DEBUG_REG4__ext_event_wait_p1_q_MASK 0x4000000
#define WD_DEBUG_REG4__ext_event_wait_p1_q__SHIFT 0x1a
#define WD_DEBUG_REG4__ext_event_wait_q_MASK 0x8000000
#define WD_DEBUG_REG4__ext_event_wait_q__SHIFT 0x1b
#define WD_DEBUG_REG4__WD_IA1_draw_send_d_MASK 0x10000000
#define WD_DEBUG_REG4__WD_IA1_draw_send_d__SHIFT 0x1c
#define WD_DEBUG_REG4__WD_IA1_draw_rtr_MASK 0x20000000
#define WD_DEBUG_REG4__WD_IA1_draw_rtr__SHIFT 0x1d
#define WD_DEBUG_REG4__send_to_ia1_q_MASK 0x40000000
#define WD_DEBUG_REG4__send_to_ia1_q__SHIFT 0x1e
#define WD_DEBUG_REG4__dual_ia_mode_MASK 0x80000000
#define WD_DEBUG_REG4__dual_ia_mode__SHIFT 0x1f
#define WD_DEBUG_REG5__p1_rbiu_spl_dr_valid_MASK 0x1
#define WD_DEBUG_REG5__p1_rbiu_spl_dr_valid__SHIFT 0x0
#define WD_DEBUG_REG5__SPARE0_MASK 0x2
#define WD_DEBUG_REG5__SPARE0__SHIFT 0x1
#define WD_DEBUG_REG5__p1_pipe0_dr_MASK 0x4
#define WD_DEBUG_REG5__p1_pipe0_dr__SHIFT 0x2
#define WD_DEBUG_REG5__p1_pipe0_rtr_MASK 0x8
#define WD_DEBUG_REG5__p1_pipe0_rtr__SHIFT 0x3
#define WD_DEBUG_REG5__p1_pipe1_dr_MASK 0x10
#define WD_DEBUG_REG5__p1_pipe1_dr__SHIFT 0x4
#define WD_DEBUG_REG5__p1_pipe1_rtr_MASK 0x20
#define WD_DEBUG_REG5__p1_pipe1_rtr__SHIFT 0x5
#define WD_DEBUG_REG5__p1_wd_subdma_fifo_empty_MASK 0x40
#define WD_DEBUG_REG5__p1_wd_subdma_fifo_empty__SHIFT 0x6
#define WD_DEBUG_REG5__p1_wd_subdma_fifo_full_MASK 0x80
#define WD_DEBUG_REG5__p1_wd_subdma_fifo_full__SHIFT 0x7
#define WD_DEBUG_REG5__p1_dma_buf_type_p0_q_MASK 0x300
#define WD_DEBUG_REG5__p1_dma_buf_type_p0_q__SHIFT 0x8
#define WD_DEBUG_REG5__p1_dma_zero_indices_p0_q_MASK 0x400
#define WD_DEBUG_REG5__p1_dma_zero_indices_p0_q__SHIFT 0xa
#define WD_DEBUG_REG5__p1_dma_req_path_p3_q_MASK 0x800
#define WD_DEBUG_REG5__p1_dma_req_path_p3_q__SHIFT 0xb
#define WD_DEBUG_REG5__p1_dma_not_eop_p1_q_MASK 0x1000
#define WD_DEBUG_REG5__p1_dma_not_eop_p1_q__SHIFT 0xc
#define WD_DEBUG_REG5__p1_out_of_range_p4_MASK 0x2000
#define WD_DEBUG_REG5__p1_out_of_range_p4__SHIFT 0xd
#define WD_DEBUG_REG5__p1_last_sub_dma_p3_q_MASK 0x4000
#define WD_DEBUG_REG5__p1_last_sub_dma_p3_q__SHIFT 0xe
#define WD_DEBUG_REG5__p1_last_rdreq_of_sub_dma_p4_MASK 0x8000
#define WD_DEBUG_REG5__p1_last_rdreq_of_sub_dma_p4__SHIFT 0xf
#define WD_DEBUG_REG5__p1_WD_IA_dma_send_d_MASK 0x10000
#define WD_DEBUG_REG5__p1_WD_IA_dma_send_d__SHIFT 0x10
#define WD_DEBUG_REG5__p1_WD_IA_dma_rtr_MASK 0x20000
#define WD_DEBUG_REG5__p1_WD_IA_dma_rtr__SHIFT 0x11
#define WD_DEBUG_REG5__p1_WD_IA1_dma_send_d_MASK 0x40000
#define WD_DEBUG_REG5__p1_WD_IA1_dma_send_d__SHIFT 0x12
#define WD_DEBUG_REG5__p1_WD_IA1_dma_rtr_MASK 0x80000
#define WD_DEBUG_REG5__p1_WD_IA1_dma_rtr__SHIFT 0x13
#define WD_DEBUG_REG5__p1_last_inst_of_dma_p2_MASK 0x100000
#define WD_DEBUG_REG5__p1_last_inst_of_dma_p2__SHIFT 0x14
#define WD_DEBUG_REG5__p1_last_sd_of_inst_p2_MASK 0x200000
#define WD_DEBUG_REG5__p1_last_sd_of_inst_p2__SHIFT 0x15
#define WD_DEBUG_REG5__p1_last_sd_of_dma_p2_MASK 0x400000
#define WD_DEBUG_REG5__p1_last_sd_of_dma_p2__SHIFT 0x16
#define WD_DEBUG_REG5__SPARE1_MASK 0x800000
#define WD_DEBUG_REG5__SPARE1__SHIFT 0x17
#define WD_DEBUG_REG5__p1_WD_IA_dma_busy_MASK 0x1000000
#define WD_DEBUG_REG5__p1_WD_IA_dma_busy__SHIFT 0x18
#define WD_DEBUG_REG5__p1_WD_IA1_dma_busy_MASK 0x2000000
#define WD_DEBUG_REG5__p1_WD_IA1_dma_busy__SHIFT 0x19
#define WD_DEBUG_REG5__p1_send_to_ia1_p3_q_MASK 0x4000000
#define WD_DEBUG_REG5__p1_send_to_ia1_p3_q__SHIFT 0x1a
#define WD_DEBUG_REG5__p1_dma_wd_switch_on_eop_p3_q_MASK 0x8000000
#define WD_DEBUG_REG5__p1_dma_wd_switch_on_eop_p3_q__SHIFT 0x1b
#define WD_DEBUG_REG5__p1_pipe3_dr_MASK 0x10000000
#define WD_DEBUG_REG5__p1_pipe3_dr__SHIFT 0x1c
#define WD_DEBUG_REG5__p1_pipe3_rtr_MASK 0x20000000
#define WD_DEBUG_REG5__p1_pipe3_rtr__SHIFT 0x1d
#define WD_DEBUG_REG5__p1_wd_dma2draw_fifo_empty_MASK 0x40000000
#define WD_DEBUG_REG5__p1_wd_dma2draw_fifo_empty__SHIFT 0x1e
#define WD_DEBUG_REG5__p1_wd_dma2draw_fifo_full_MASK 0x80000000
#define WD_DEBUG_REG5__p1_wd_dma2draw_fifo_full__SHIFT 0x1f
#define IA_DEBUG_REG0__ia_busy_extended_MASK 0x1
#define IA_DEBUG_REG0__ia_busy_extended__SHIFT 0x0
#define IA_DEBUG_REG0__ia_nodma_busy_extended_MASK 0x2
#define IA_DEBUG_REG0__ia_nodma_busy_extended__SHIFT 0x1
#define IA_DEBUG_REG0__ia_busy_MASK 0x4
#define IA_DEBUG_REG0__ia_busy__SHIFT 0x2
#define IA_DEBUG_REG0__ia_nodma_busy_MASK 0x8
#define IA_DEBUG_REG0__ia_nodma_busy__SHIFT 0x3
#define IA_DEBUG_REG0__SPARE0_MASK 0x10
#define IA_DEBUG_REG0__SPARE0__SHIFT 0x4
#define IA_DEBUG_REG0__dma_req_busy_MASK 0x20
#define IA_DEBUG_REG0__dma_req_busy__SHIFT 0x5
#define IA_DEBUG_REG0__dma_busy_MASK 0x40
#define IA_DEBUG_REG0__dma_busy__SHIFT 0x6
#define IA_DEBUG_REG0__mc_xl8r_busy_MASK 0x80
#define IA_DEBUG_REG0__mc_xl8r_busy__SHIFT 0x7
#define IA_DEBUG_REG0__grp_busy_MASK 0x100
#define IA_DEBUG_REG0__grp_busy__SHIFT 0x8
#define IA_DEBUG_REG0__SPARE1_MASK 0x200
#define IA_DEBUG_REG0__SPARE1__SHIFT 0x9
#define IA_DEBUG_REG0__dma_grp_valid_MASK 0x400
#define IA_DEBUG_REG0__dma_grp_valid__SHIFT 0xa
#define IA_DEBUG_REG0__grp_dma_read_MASK 0x800
#define IA_DEBUG_REG0__grp_dma_read__SHIFT 0xb
#define IA_DEBUG_REG0__dma_grp_hp_valid_MASK 0x1000
#define IA_DEBUG_REG0__dma_grp_hp_valid__SHIFT 0xc
#define IA_DEBUG_REG0__grp_dma_hp_read_MASK 0x2000
#define IA_DEBUG_REG0__grp_dma_hp_read__SHIFT 0xd
#define IA_DEBUG_REG0__SPARE2_MASK 0xffc000
#define IA_DEBUG_REG0__SPARE2__SHIFT 0xe
#define IA_DEBUG_REG0__reg_clk_busy_MASK 0x1000000
#define IA_DEBUG_REG0__reg_clk_busy__SHIFT 0x18
#define IA_DEBUG_REG0__core_clk_busy_MASK 0x2000000
#define IA_DEBUG_REG0__core_clk_busy__SHIFT 0x19
#define IA_DEBUG_REG0__SPARE3_MASK 0x4000000
#define IA_DEBUG_REG0__SPARE3__SHIFT 0x1a
#define IA_DEBUG_REG0__SPARE4_MASK 0x8000000
#define IA_DEBUG_REG0__SPARE4__SHIFT 0x1b
#define IA_DEBUG_REG0__sclk_reg_vld_MASK 0x10000000
#define IA_DEBUG_REG0__sclk_reg_vld__SHIFT 0x1c
#define IA_DEBUG_REG0__sclk_core_vld_MASK 0x20000000
#define IA_DEBUG_REG0__sclk_core_vld__SHIFT 0x1d
#define IA_DEBUG_REG0__SPARE5_MASK 0x40000000
#define IA_DEBUG_REG0__SPARE5__SHIFT 0x1e
#define IA_DEBUG_REG0__SPARE6_MASK 0x80000000
#define IA_DEBUG_REG0__SPARE6__SHIFT 0x1f
#define IA_DEBUG_REG1__dma_input_fifo_empty_MASK 0x1
#define IA_DEBUG_REG1__dma_input_fifo_empty__SHIFT 0x0
#define IA_DEBUG_REG1__dma_input_fifo_full_MASK 0x2
#define IA_DEBUG_REG1__dma_input_fifo_full__SHIFT 0x1
#define IA_DEBUG_REG1__start_new_packet_MASK 0x4
#define IA_DEBUG_REG1__start_new_packet__SHIFT 0x2
#define IA_DEBUG_REG1__dma_rdreq_dr_q_MASK 0x8
#define IA_DEBUG_REG1__dma_rdreq_dr_q__SHIFT 0x3
#define IA_DEBUG_REG1__dma_zero_indices_q_MASK 0x10
#define IA_DEBUG_REG1__dma_zero_indices_q__SHIFT 0x4
#define IA_DEBUG_REG1__dma_buf_type_q_MASK 0x60
#define IA_DEBUG_REG1__dma_buf_type_q__SHIFT 0x5
#define IA_DEBUG_REG1__dma_req_path_q_MASK 0x80
#define IA_DEBUG_REG1__dma_req_path_q__SHIFT 0x7
#define IA_DEBUG_REG1__discard_1st_chunk_MASK 0x100
#define IA_DEBUG_REG1__discard_1st_chunk__SHIFT 0x8
#define IA_DEBUG_REG1__discard_2nd_chunk_MASK 0x200
#define IA_DEBUG_REG1__discard_2nd_chunk__SHIFT 0x9
#define IA_DEBUG_REG1__second_tc_ret_data_q_MASK 0x400
#define IA_DEBUG_REG1__second_tc_ret_data_q__SHIFT 0xa
#define IA_DEBUG_REG1__dma_tc_ret_sel_q_MASK 0x800
#define IA_DEBUG_REG1__dma_tc_ret_sel_q__SHIFT 0xb
#define IA_DEBUG_REG1__last_rdreq_in_dma_op_MASK 0x1000
#define IA_DEBUG_REG1__last_rdreq_in_dma_op__SHIFT 0xc
#define IA_DEBUG_REG1__dma_mask_fifo_empty_MASK 0x2000
#define IA_DEBUG_REG1__dma_mask_fifo_empty__SHIFT 0xd
#define IA_DEBUG_REG1__dma_data_fifo_empty_q_MASK 0x4000
#define IA_DEBUG_REG1__dma_data_fifo_empty_q__SHIFT 0xe
#define IA_DEBUG_REG1__dma_data_fifo_full_MASK 0x8000
#define IA_DEBUG_REG1__dma_data_fifo_full__SHIFT 0xf
#define IA_DEBUG_REG1__dma_req_fifo_empty_MASK 0x10000
#define IA_DEBUG_REG1__dma_req_fifo_empty__SHIFT 0x10
#define IA_DEBUG_REG1__dma_req_fifo_full_MASK 0x20000
#define IA_DEBUG_REG1__dma_req_fifo_full__SHIFT 0x11
#define IA_DEBUG_REG1__stage2_dr_MASK 0x40000
#define IA_DEBUG_REG1__stage2_dr__SHIFT 0x12
#define IA_DEBUG_REG1__stage2_rtr_MASK 0x80000
#define IA_DEBUG_REG1__stage2_rtr__SHIFT 0x13
#define IA_DEBUG_REG1__stage3_dr_MASK 0x100000
#define IA_DEBUG_REG1__stage3_dr__SHIFT 0x14
#define IA_DEBUG_REG1__stage3_rtr_MASK 0x200000
#define IA_DEBUG_REG1__stage3_rtr__SHIFT 0x15
#define IA_DEBUG_REG1__stage4_dr_MASK 0x400000
#define IA_DEBUG_REG1__stage4_dr__SHIFT 0x16
#define IA_DEBUG_REG1__stage4_rtr_MASK 0x800000
#define IA_DEBUG_REG1__stage4_rtr__SHIFT 0x17
#define IA_DEBUG_REG1__dma_skid_fifo_empty_MASK 0x1000000
#define IA_DEBUG_REG1__dma_skid_fifo_empty__SHIFT 0x18
#define IA_DEBUG_REG1__dma_skid_fifo_full_MASK 0x2000000
#define IA_DEBUG_REG1__dma_skid_fifo_full__SHIFT 0x19
#define IA_DEBUG_REG1__dma_grp_valid_MASK 0x4000000
#define IA_DEBUG_REG1__dma_grp_valid__SHIFT 0x1a
#define IA_DEBUG_REG1__grp_dma_read_MASK 0x8000000
#define IA_DEBUG_REG1__grp_dma_read__SHIFT 0x1b
#define IA_DEBUG_REG1__current_data_valid_MASK 0x10000000
#define IA_DEBUG_REG1__current_data_valid__SHIFT 0x1c
#define IA_DEBUG_REG1__out_of_range_r2_q_MASK 0x20000000
#define IA_DEBUG_REG1__out_of_range_r2_q__SHIFT 0x1d
#define IA_DEBUG_REG1__dma_mask_fifo_we_MASK 0x40000000
#define IA_DEBUG_REG1__dma_mask_fifo_we__SHIFT 0x1e
#define IA_DEBUG_REG1__dma_ret_data_we_q_MASK 0x80000000
#define IA_DEBUG_REG1__dma_ret_data_we_q__SHIFT 0x1f
#define IA_DEBUG_REG2__hp_dma_input_fifo_empty_MASK 0x1
#define IA_DEBUG_REG2__hp_dma_input_fifo_empty__SHIFT 0x0
#define IA_DEBUG_REG2__hp_dma_input_fifo_full_MASK 0x2
#define IA_DEBUG_REG2__hp_dma_input_fifo_full__SHIFT 0x1
#define IA_DEBUG_REG2__hp_start_new_packet_MASK 0x4
#define IA_DEBUG_REG2__hp_start_new_packet__SHIFT 0x2
#define IA_DEBUG_REG2__hp_dma_rdreq_dr_q_MASK 0x8
#define IA_DEBUG_REG2__hp_dma_rdreq_dr_q__SHIFT 0x3
#define IA_DEBUG_REG2__hp_dma_zero_indices_q_MASK 0x10
#define IA_DEBUG_REG2__hp_dma_zero_indices_q__SHIFT 0x4
#define IA_DEBUG_REG2__hp_dma_buf_type_q_MASK 0x60
#define IA_DEBUG_REG2__hp_dma_buf_type_q__SHIFT 0x5
#define IA_DEBUG_REG2__hp_dma_req_path_q_MASK 0x80
#define IA_DEBUG_REG2__hp_dma_req_path_q__SHIFT 0x7
#define IA_DEBUG_REG2__hp_discard_1st_chunk_MASK 0x100
#define IA_DEBUG_REG2__hp_discard_1st_chunk__SHIFT 0x8
#define IA_DEBUG_REG2__hp_discard_2nd_chunk_MASK 0x200
#define IA_DEBUG_REG2__hp_discard_2nd_chunk__SHIFT 0x9
#define IA_DEBUG_REG2__hp_second_tc_ret_data_q_MASK 0x400
#define IA_DEBUG_REG2__hp_second_tc_ret_data_q__SHIFT 0xa
#define IA_DEBUG_REG2__hp_dma_tc_ret_sel_q_MASK 0x800
#define IA_DEBUG_REG2__hp_dma_tc_ret_sel_q__SHIFT 0xb
#define IA_DEBUG_REG2__hp_last_rdreq_in_dma_op_MASK 0x1000
#define IA_DEBUG_REG2__hp_last_rdreq_in_dma_op__SHIFT 0xc
#define IA_DEBUG_REG2__hp_dma_mask_fifo_empty_MASK 0x2000
#define IA_DEBUG_REG2__hp_dma_mask_fifo_empty__SHIFT 0xd
#define IA_DEBUG_REG2__hp_dma_data_fifo_empty_q_MASK 0x4000
#define IA_DEBUG_REG2__hp_dma_data_fifo_empty_q__SHIFT 0xe
#define IA_DEBUG_REG2__hp_dma_data_fifo_full_MASK 0x8000
#define IA_DEBUG_REG2__hp_dma_data_fifo_full__SHIFT 0xf
#define IA_DEBUG_REG2__hp_dma_req_fifo_empty_MASK 0x10000
#define IA_DEBUG_REG2__hp_dma_req_fifo_empty__SHIFT 0x10
#define IA_DEBUG_REG2__hp_dma_req_fifo_full_MASK 0x20000
#define IA_DEBUG_REG2__hp_dma_req_fifo_full__SHIFT 0x11
#define IA_DEBUG_REG2__hp_stage2_dr_MASK 0x40000
#define IA_DEBUG_REG2__hp_stage2_dr__SHIFT 0x12
#define IA_DEBUG_REG2__hp_stage2_rtr_MASK 0x80000
#define IA_DEBUG_REG2__hp_stage2_rtr__SHIFT 0x13
#define IA_DEBUG_REG2__hp_stage3_dr_MASK 0x100000
#define IA_DEBUG_REG2__hp_stage3_dr__SHIFT 0x14
#define IA_DEBUG_REG2__hp_stage3_rtr_MASK 0x200000
#define IA_DEBUG_REG2__hp_stage3_rtr__SHIFT 0x15
#define IA_DEBUG_REG2__hp_stage4_dr_MASK 0x400000
#define IA_DEBUG_REG2__hp_stage4_dr__SHIFT 0x16
#define IA_DEBUG_REG2__hp_stage4_rtr_MASK 0x800000
#define IA_DEBUG_REG2__hp_stage4_rtr__SHIFT 0x17
#define IA_DEBUG_REG2__hp_dma_skid_fifo_empty_MASK 0x1000000
#define IA_DEBUG_REG2__hp_dma_skid_fifo_empty__SHIFT 0x18
#define IA_DEBUG_REG2__hp_dma_skid_fifo_full_MASK 0x2000000
#define IA_DEBUG_REG2__hp_dma_skid_fifo_full__SHIFT 0x19
#define IA_DEBUG_REG2__hp_dma_grp_valid_MASK 0x4000000
#define IA_DEBUG_REG2__hp_dma_grp_valid__SHIFT 0x1a
#define IA_DEBUG_REG2__hp_grp_dma_read_MASK 0x8000000
#define IA_DEBUG_REG2__hp_grp_dma_read__SHIFT 0x1b
#define IA_DEBUG_REG2__hp_current_data_valid_MASK 0x10000000
#define IA_DEBUG_REG2__hp_current_data_valid__SHIFT 0x1c
#define IA_DEBUG_REG2__hp_out_of_range_r2_q_MASK 0x20000000
#define IA_DEBUG_REG2__hp_out_of_range_r2_q__SHIFT 0x1d
#define IA_DEBUG_REG2__hp_dma_mask_fifo_we_MASK 0x40000000
#define IA_DEBUG_REG2__hp_dma_mask_fifo_we__SHIFT 0x1e
#define IA_DEBUG_REG2__hp_dma_ret_data_we_q_MASK 0x80000000
#define IA_DEBUG_REG2__hp_dma_ret_data_we_q__SHIFT 0x1f
#define IA_DEBUG_REG3__dma_pipe0_rdreq_valid_MASK 0x1
#define IA_DEBUG_REG3__dma_pipe0_rdreq_valid__SHIFT 0x0
#define IA_DEBUG_REG3__dma_pipe0_rdreq_read_MASK 0x2
#define IA_DEBUG_REG3__dma_pipe0_rdreq_read__SHIFT 0x1
#define IA_DEBUG_REG3__dma_pipe0_rdreq_null_out_MASK 0x4
#define IA_DEBUG_REG3__dma_pipe0_rdreq_null_out__SHIFT 0x2
#define IA_DEBUG_REG3__dma_pipe0_rdreq_eop_out_MASK 0x8
#define IA_DEBUG_REG3__dma_pipe0_rdreq_eop_out__SHIFT 0x3
#define IA_DEBUG_REG3__dma_pipe0_rdreq_use_tc_out_MASK 0x10
#define IA_DEBUG_REG3__dma_pipe0_rdreq_use_tc_out__SHIFT 0x4
#define IA_DEBUG_REG3__grp_dma_draw_is_pipe0_MASK 0x20
#define IA_DEBUG_REG3__grp_dma_draw_is_pipe0__SHIFT 0x5
#define IA_DEBUG_REG3__must_service_pipe0_req_MASK 0x40
#define IA_DEBUG_REG3__must_service_pipe0_req__SHIFT 0x6
#define IA_DEBUG_REG3__send_pipe1_req_MASK 0x80
#define IA_DEBUG_REG3__send_pipe1_req__SHIFT 0x7
#define IA_DEBUG_REG3__dma_pipe1_rdreq_valid_MASK 0x100
#define IA_DEBUG_REG3__dma_pipe1_rdreq_valid__SHIFT 0x8
#define IA_DEBUG_REG3__dma_pipe1_rdreq_read_MASK 0x200
#define IA_DEBUG_REG3__dma_pipe1_rdreq_read__SHIFT 0x9
#define IA_DEBUG_REG3__dma_pipe1_rdreq_null_out_MASK 0x400
#define IA_DEBUG_REG3__dma_pipe1_rdreq_null_out__SHIFT 0xa
#define IA_DEBUG_REG3__dma_pipe1_rdreq_eop_out_MASK 0x800
#define IA_DEBUG_REG3__dma_pipe1_rdreq_eop_out__SHIFT 0xb
#define IA_DEBUG_REG3__dma_pipe1_rdreq_use_tc_out_MASK 0x1000
#define IA_DEBUG_REG3__dma_pipe1_rdreq_use_tc_out__SHIFT 0xc
#define IA_DEBUG_REG3__ia_mc_rdreq_rtr_q_MASK 0x2000
#define IA_DEBUG_REG3__ia_mc_rdreq_rtr_q__SHIFT 0xd
#define IA_DEBUG_REG3__mc_out_rtr_MASK 0x4000
#define IA_DEBUG_REG3__mc_out_rtr__SHIFT 0xe
#define IA_DEBUG_REG3__dma_rdreq_send_out_MASK 0x8000
#define IA_DEBUG_REG3__dma_rdreq_send_out__SHIFT 0xf
#define IA_DEBUG_REG3__pipe0_dr_MASK 0x10000
#define IA_DEBUG_REG3__pipe0_dr__SHIFT 0x10
#define IA_DEBUG_REG3__pipe0_rtr_MASK 0x20000
#define IA_DEBUG_REG3__pipe0_rtr__SHIFT 0x11
#define IA_DEBUG_REG3__ia_tc_rdreq_rtr_q_MASK 0x40000
#define IA_DEBUG_REG3__ia_tc_rdreq_rtr_q__SHIFT 0x12
#define IA_DEBUG_REG3__tc_out_rtr_MASK 0x80000
#define IA_DEBUG_REG3__tc_out_rtr__SHIFT 0x13
#define IA_DEBUG_REG3__pair0_valid_p1_MASK 0x100000
#define IA_DEBUG_REG3__pair0_valid_p1__SHIFT 0x14
#define IA_DEBUG_REG3__pair1_valid_p1_MASK 0x200000
#define IA_DEBUG_REG3__pair1_valid_p1__SHIFT 0x15
#define IA_DEBUG_REG3__pair2_valid_p1_MASK 0x400000
#define IA_DEBUG_REG3__pair2_valid_p1__SHIFT 0x16
#define IA_DEBUG_REG3__pair3_valid_p1_MASK 0x800000
#define IA_DEBUG_REG3__pair3_valid_p1__SHIFT 0x17
#define IA_DEBUG_REG3__tc_req_count_q_MASK 0x3000000
#define IA_DEBUG_REG3__tc_req_count_q__SHIFT 0x18
#define IA_DEBUG_REG3__discard_1st_chunk_MASK 0x4000000
#define IA_DEBUG_REG3__discard_1st_chunk__SHIFT 0x1a
#define IA_DEBUG_REG3__discard_2nd_chunk_MASK 0x8000000
#define IA_DEBUG_REG3__discard_2nd_chunk__SHIFT 0x1b
#define IA_DEBUG_REG3__last_tc_req_p1_MASK 0x10000000
#define IA_DEBUG_REG3__last_tc_req_p1__SHIFT 0x1c
#define IA_DEBUG_REG3__IA_TC_rdreq_send_out_MASK 0x20000000
#define IA_DEBUG_REG3__IA_TC_rdreq_send_out__SHIFT 0x1d
#define IA_DEBUG_REG3__TC_IA_rdret_valid_in_MASK 0x40000000
#define IA_DEBUG_REG3__TC_IA_rdret_valid_in__SHIFT 0x1e
#define IA_DEBUG_REG3__TAP_IA_rdret_vld_in_MASK 0x80000000
#define IA_DEBUG_REG3__TAP_IA_rdret_vld_in__SHIFT 0x1f
#define IA_DEBUG_REG4__pipe0_dr_MASK 0x1
#define IA_DEBUG_REG4__pipe0_dr__SHIFT 0x0
#define IA_DEBUG_REG4__pipe1_dr_MASK 0x2
#define IA_DEBUG_REG4__pipe1_dr__SHIFT 0x1
#define IA_DEBUG_REG4__pipe2_dr_MASK 0x4
#define IA_DEBUG_REG4__pipe2_dr__SHIFT 0x2
#define IA_DEBUG_REG4__pipe3_dr_MASK 0x8
#define IA_DEBUG_REG4__pipe3_dr__SHIFT 0x3
#define IA_DEBUG_REG4__pipe4_dr_MASK 0x10
#define IA_DEBUG_REG4__pipe4_dr__SHIFT 0x4
#define IA_DEBUG_REG4__pipe5_dr_MASK 0x20
#define IA_DEBUG_REG4__pipe5_dr__SHIFT 0x5
#define IA_DEBUG_REG4__grp_se0_fifo_empty_MASK 0x40
#define IA_DEBUG_REG4__grp_se0_fifo_empty__SHIFT 0x6
#define IA_DEBUG_REG4__grp_se0_fifo_full_MASK 0x80
#define IA_DEBUG_REG4__grp_se0_fifo_full__SHIFT 0x7
#define IA_DEBUG_REG4__pipe0_rtr_MASK 0x100
#define IA_DEBUG_REG4__pipe0_rtr__SHIFT 0x8
#define IA_DEBUG_REG4__pipe1_rtr_MASK 0x200
#define IA_DEBUG_REG4__pipe1_rtr__SHIFT 0x9
#define IA_DEBUG_REG4__pipe2_rtr_MASK 0x400
#define IA_DEBUG_REG4__pipe2_rtr__SHIFT 0xa
#define IA_DEBUG_REG4__pipe3_rtr_MASK 0x800
#define IA_DEBUG_REG4__pipe3_rtr__SHIFT 0xb
#define IA_DEBUG_REG4__pipe4_rtr_MASK 0x1000
#define IA_DEBUG_REG4__pipe4_rtr__SHIFT 0xc
#define IA_DEBUG_REG4__pipe5_rtr_MASK 0x2000
#define IA_DEBUG_REG4__pipe5_rtr__SHIFT 0xd
#define IA_DEBUG_REG4__ia_vgt_prim_rtr_q_MASK 0x4000
#define IA_DEBUG_REG4__ia_vgt_prim_rtr_q__SHIFT 0xe
#define IA_DEBUG_REG4__ia_se1vgt_prim_rtr_q_MASK 0x8000
#define IA_DEBUG_REG4__ia_se1vgt_prim_rtr_q__SHIFT 0xf
#define IA_DEBUG_REG4__di_major_mode_p1_q_MASK 0x10000
#define IA_DEBUG_REG4__di_major_mode_p1_q__SHIFT 0x10
#define IA_DEBUG_REG4__gs_mode_p1_q_MASK 0xe0000
#define IA_DEBUG_REG4__gs_mode_p1_q__SHIFT 0x11
#define IA_DEBUG_REG4__di_event_flag_p1_q_MASK 0x100000
#define IA_DEBUG_REG4__di_event_flag_p1_q__SHIFT 0x14
#define IA_DEBUG_REG4__di_state_sel_p1_q_MASK 0xe00000
#define IA_DEBUG_REG4__di_state_sel_p1_q__SHIFT 0x15
#define IA_DEBUG_REG4__draw_opaq_en_p1_q_MASK 0x1000000
#define IA_DEBUG_REG4__draw_opaq_en_p1_q__SHIFT 0x18
#define IA_DEBUG_REG4__draw_opaq_active_q_MASK 0x2000000
#define IA_DEBUG_REG4__draw_opaq_active_q__SHIFT 0x19
#define IA_DEBUG_REG4__di_source_select_p1_q_MASK 0xc000000
#define IA_DEBUG_REG4__di_source_select_p1_q__SHIFT 0x1a
#define IA_DEBUG_REG4__ready_to_read_di_MASK 0x10000000
#define IA_DEBUG_REG4__ready_to_read_di__SHIFT 0x1c
#define IA_DEBUG_REG4__di_first_group_of_draw_q_MASK 0x20000000
#define IA_DEBUG_REG4__di_first_group_of_draw_q__SHIFT 0x1d
#define IA_DEBUG_REG4__last_shift_of_draw_MASK 0x40000000
#define IA_DEBUG_REG4__last_shift_of_draw__SHIFT 0x1e
#define IA_DEBUG_REG4__current_shift_is_vect1_q_MASK 0x80000000
#define IA_DEBUG_REG4__current_shift_is_vect1_q__SHIFT 0x1f
#define IA_DEBUG_REG5__di_index_counter_q_15_0_MASK 0xffff
#define IA_DEBUG_REG5__di_index_counter_q_15_0__SHIFT 0x0
#define IA_DEBUG_REG5__instanceid_13_0_MASK 0x3fff0000
#define IA_DEBUG_REG5__instanceid_13_0__SHIFT 0x10
#define IA_DEBUG_REG5__draw_input_fifo_full_MASK 0x40000000
#define IA_DEBUG_REG5__draw_input_fifo_full__SHIFT 0x1e
#define IA_DEBUG_REG5__draw_input_fifo_empty_MASK 0x80000000
#define IA_DEBUG_REG5__draw_input_fifo_empty__SHIFT 0x1f
#define IA_DEBUG_REG6__current_shift_q_MASK 0xf
#define IA_DEBUG_REG6__current_shift_q__SHIFT 0x0
#define IA_DEBUG_REG6__current_stride_pre_MASK 0xf0
#define IA_DEBUG_REG6__current_stride_pre__SHIFT 0x4
#define IA_DEBUG_REG6__current_stride_q_MASK 0x1f00
#define IA_DEBUG_REG6__current_stride_q__SHIFT 0x8
#define IA_DEBUG_REG6__first_group_partial_MASK 0x2000
#define IA_DEBUG_REG6__first_group_partial__SHIFT 0xd
#define IA_DEBUG_REG6__second_group_partial_MASK 0x4000
#define IA_DEBUG_REG6__second_group_partial__SHIFT 0xe
#define IA_DEBUG_REG6__curr_prim_partial_MASK 0x8000
#define IA_DEBUG_REG6__curr_prim_partial__SHIFT 0xf
#define IA_DEBUG_REG6__next_stride_q_MASK 0x1f0000
#define IA_DEBUG_REG6__next_stride_q__SHIFT 0x10
#define IA_DEBUG_REG6__next_group_partial_MASK 0x200000
#define IA_DEBUG_REG6__next_group_partial__SHIFT 0x15
#define IA_DEBUG_REG6__after_group_partial_MASK 0x400000
#define IA_DEBUG_REG6__after_group_partial__SHIFT 0x16
#define IA_DEBUG_REG6__extract_group_MASK 0x800000
#define IA_DEBUG_REG6__extract_group__SHIFT 0x17
#define IA_DEBUG_REG6__grp_shift_debug_data_MASK 0xff000000
#define IA_DEBUG_REG6__grp_shift_debug_data__SHIFT 0x18
#define IA_DEBUG_REG7__reset_indx_state_q_MASK 0xf
#define IA_DEBUG_REG7__reset_indx_state_q__SHIFT 0x0
#define IA_DEBUG_REG7__shift_vect_valid_p2_q_MASK 0xf0
#define IA_DEBUG_REG7__shift_vect_valid_p2_q__SHIFT 0x4
#define IA_DEBUG_REG7__shift_vect1_valid_p2_q_MASK 0xf00
#define IA_DEBUG_REG7__shift_vect1_valid_p2_q__SHIFT 0x8
#define IA_DEBUG_REG7__shift_vect0_reset_match_p2_q_MASK 0xf000
#define IA_DEBUG_REG7__shift_vect0_reset_match_p2_q__SHIFT 0xc
#define IA_DEBUG_REG7__shift_vect1_reset_match_p2_q_MASK 0xf0000
#define IA_DEBUG_REG7__shift_vect1_reset_match_p2_q__SHIFT 0x10
#define IA_DEBUG_REG7__num_indx_in_group_p2_q_MASK 0x700000
#define IA_DEBUG_REG7__num_indx_in_group_p2_q__SHIFT 0x14
#define IA_DEBUG_REG7__last_group_of_draw_p2_q_MASK 0x800000
#define IA_DEBUG_REG7__last_group_of_draw_p2_q__SHIFT 0x17
#define IA_DEBUG_REG7__shift_event_flag_p2_q_MASK 0x1000000
#define IA_DEBUG_REG7__shift_event_flag_p2_q__SHIFT 0x18
#define IA_DEBUG_REG7__indx_shift_is_one_p2_q_MASK 0x2000000
#define IA_DEBUG_REG7__indx_shift_is_one_p2_q__SHIFT 0x19
#define IA_DEBUG_REG7__indx_shift_is_two_p2_q_MASK 0x4000000
#define IA_DEBUG_REG7__indx_shift_is_two_p2_q__SHIFT 0x1a
#define IA_DEBUG_REG7__indx_stride_is_four_p2_q_MASK 0x8000000
#define IA_DEBUG_REG7__indx_stride_is_four_p2_q__SHIFT 0x1b
#define IA_DEBUG_REG7__shift_prim1_reset_p3_q_MASK 0x10000000
#define IA_DEBUG_REG7__shift_prim1_reset_p3_q__SHIFT 0x1c
#define IA_DEBUG_REG7__shift_prim1_partial_p3_q_MASK 0x20000000
#define IA_DEBUG_REG7__shift_prim1_partial_p3_q__SHIFT 0x1d
#define IA_DEBUG_REG7__shift_prim0_reset_p3_q_MASK 0x40000000
#define IA_DEBUG_REG7__shift_prim0_reset_p3_q__SHIFT 0x1e
#define IA_DEBUG_REG7__shift_prim0_partial_p3_q_MASK 0x80000000
#define IA_DEBUG_REG7__shift_prim0_partial_p3_q__SHIFT 0x1f
#define IA_DEBUG_REG8__di_prim_type_p1_q_MASK 0x1f
#define IA_DEBUG_REG8__di_prim_type_p1_q__SHIFT 0x0
#define IA_DEBUG_REG8__two_cycle_xfer_p1_q_MASK 0x20
#define IA_DEBUG_REG8__two_cycle_xfer_p1_q__SHIFT 0x5
#define IA_DEBUG_REG8__two_prim_input_p1_q_MASK 0x40
#define IA_DEBUG_REG8__two_prim_input_p1_q__SHIFT 0x6
#define IA_DEBUG_REG8__shift_vect_end_of_packet_p5_q_MASK 0x80
#define IA_DEBUG_REG8__shift_vect_end_of_packet_p5_q__SHIFT 0x7
#define IA_DEBUG_REG8__last_group_of_inst_p5_q_MASK 0x100
#define IA_DEBUG_REG8__last_group_of_inst_p5_q__SHIFT 0x8
#define IA_DEBUG_REG8__shift_prim1_null_flag_p5_q_MASK 0x200
#define IA_DEBUG_REG8__shift_prim1_null_flag_p5_q__SHIFT 0x9
#define IA_DEBUG_REG8__shift_prim0_null_flag_p5_q_MASK 0x400
#define IA_DEBUG_REG8__shift_prim0_null_flag_p5_q__SHIFT 0xa
#define IA_DEBUG_REG8__grp_continued_MASK 0x800
#define IA_DEBUG_REG8__grp_continued__SHIFT 0xb
#define IA_DEBUG_REG8__grp_state_sel_MASK 0x7000
#define IA_DEBUG_REG8__grp_state_sel__SHIFT 0xc
#define IA_DEBUG_REG8__grp_sub_prim_type_MASK 0x1f8000
#define IA_DEBUG_REG8__grp_sub_prim_type__SHIFT 0xf
#define IA_DEBUG_REG8__grp_output_path_MASK 0xe00000
#define IA_DEBUG_REG8__grp_output_path__SHIFT 0x15
#define IA_DEBUG_REG8__grp_null_primitive_MASK 0x1000000
#define IA_DEBUG_REG8__grp_null_primitive__SHIFT 0x18
#define IA_DEBUG_REG8__grp_eop_MASK 0x2000000
#define IA_DEBUG_REG8__grp_eop__SHIFT 0x19
#define IA_DEBUG_REG8__grp_eopg_MASK 0x4000000
#define IA_DEBUG_REG8__grp_eopg__SHIFT 0x1a
#define IA_DEBUG_REG8__grp_event_flag_MASK 0x8000000
#define IA_DEBUG_REG8__grp_event_flag__SHIFT 0x1b
#define IA_DEBUG_REG8__grp_components_valid_MASK 0xf0000000
#define IA_DEBUG_REG8__grp_components_valid__SHIFT 0x1c
#define IA_DEBUG_REG9__send_to_se1_p6_MASK 0x1
#define IA_DEBUG_REG9__send_to_se1_p6__SHIFT 0x0
#define IA_DEBUG_REG9__gfx_se_switch_p6_MASK 0x2
#define IA_DEBUG_REG9__gfx_se_switch_p6__SHIFT 0x1
#define IA_DEBUG_REG9__null_eoi_xfer_prim1_p6_MASK 0x4
#define IA_DEBUG_REG9__null_eoi_xfer_prim1_p6__SHIFT 0x2
#define IA_DEBUG_REG9__null_eoi_xfer_prim0_p6_MASK 0x8
#define IA_DEBUG_REG9__null_eoi_xfer_prim0_p6__SHIFT 0x3
#define IA_DEBUG_REG9__prim1_eoi_p6_MASK 0x10
#define IA_DEBUG_REG9__prim1_eoi_p6__SHIFT 0x4
#define IA_DEBUG_REG9__prim0_eoi_p6_MASK 0x20
#define IA_DEBUG_REG9__prim0_eoi_p6__SHIFT 0x5
#define IA_DEBUG_REG9__prim1_valid_eopg_p6_MASK 0x40
#define IA_DEBUG_REG9__prim1_valid_eopg_p6__SHIFT 0x6
#define IA_DEBUG_REG9__prim0_valid_eopg_p6_MASK 0x80
#define IA_DEBUG_REG9__prim0_valid_eopg_p6__SHIFT 0x7
#define IA_DEBUG_REG9__prim1_to_other_se_p6_MASK 0x100
#define IA_DEBUG_REG9__prim1_to_other_se_p6__SHIFT 0x8
#define IA_DEBUG_REG9__eopg_on_last_prim_p6_MASK 0x200
#define IA_DEBUG_REG9__eopg_on_last_prim_p6__SHIFT 0x9
#define IA_DEBUG_REG9__eopg_between_prims_p6_MASK 0x400
#define IA_DEBUG_REG9__eopg_between_prims_p6__SHIFT 0xa
#define IA_DEBUG_REG9__prim_count_eq_group_size_p6_MASK 0x800
#define IA_DEBUG_REG9__prim_count_eq_group_size_p6__SHIFT 0xb
#define IA_DEBUG_REG9__prim_count_gt_group_size_p6_MASK 0x1000
#define IA_DEBUG_REG9__prim_count_gt_group_size_p6__SHIFT 0xc
#define IA_DEBUG_REG9__two_prim_output_p5_q_MASK 0x2000
#define IA_DEBUG_REG9__two_prim_output_p5_q__SHIFT 0xd
#define IA_DEBUG_REG9__SPARE0_MASK 0x4000
#define IA_DEBUG_REG9__SPARE0__SHIFT 0xe
#define IA_DEBUG_REG9__SPARE1_MASK 0x8000
#define IA_DEBUG_REG9__SPARE1__SHIFT 0xf
#define IA_DEBUG_REG9__shift_vect_end_of_packet_p5_q_MASK 0x10000
#define IA_DEBUG_REG9__shift_vect_end_of_packet_p5_q__SHIFT 0x10
#define IA_DEBUG_REG9__prim1_xfer_p6_MASK 0x20000
#define IA_DEBUG_REG9__prim1_xfer_p6__SHIFT 0x11
#define IA_DEBUG_REG9__grp_se1_fifo_empty_MASK 0x40000
#define IA_DEBUG_REG9__grp_se1_fifo_empty__SHIFT 0x12
#define IA_DEBUG_REG9__grp_se1_fifo_full_MASK 0x80000
#define IA_DEBUG_REG9__grp_se1_fifo_full__SHIFT 0x13
#define IA_DEBUG_REG9__prim_counter_q_MASK 0xfff00000
#define IA_DEBUG_REG9__prim_counter_q__SHIFT 0x14
#define VGT_DEBUG_REG0__vgt_busy_extended_MASK 0x1
#define VGT_DEBUG_REG0__vgt_busy_extended__SHIFT 0x0
#define VGT_DEBUG_REG0__SPARE9_MASK 0x2
#define VGT_DEBUG_REG0__SPARE9__SHIFT 0x1
#define VGT_DEBUG_REG0__vgt_busy_MASK 0x4
#define VGT_DEBUG_REG0__vgt_busy__SHIFT 0x2
#define VGT_DEBUG_REG0__SPARE8_MASK 0x8
#define VGT_DEBUG_REG0__SPARE8__SHIFT 0x3
#define VGT_DEBUG_REG0__SPARE7_MASK 0x10
#define VGT_DEBUG_REG0__SPARE7__SHIFT 0x4
#define VGT_DEBUG_REG0__SPARE6_MASK 0x20
#define VGT_DEBUG_REG0__SPARE6__SHIFT 0x5
#define VGT_DEBUG_REG0__SPARE5_MASK 0x40
#define VGT_DEBUG_REG0__SPARE5__SHIFT 0x6
#define VGT_DEBUG_REG0__SPARE4_MASK 0x80
#define VGT_DEBUG_REG0__SPARE4__SHIFT 0x7
#define VGT_DEBUG_REG0__pi_busy_MASK 0x100
#define VGT_DEBUG_REG0__pi_busy__SHIFT 0x8
#define VGT_DEBUG_REG0__vr_pi_busy_MASK 0x200
#define VGT_DEBUG_REG0__vr_pi_busy__SHIFT 0x9
#define VGT_DEBUG_REG0__pt_pi_busy_MASK 0x400
#define VGT_DEBUG_REG0__pt_pi_busy__SHIFT 0xa
#define VGT_DEBUG_REG0__te_pi_busy_MASK 0x800
#define VGT_DEBUG_REG0__te_pi_busy__SHIFT 0xb
#define VGT_DEBUG_REG0__gs_busy_MASK 0x1000
#define VGT_DEBUG_REG0__gs_busy__SHIFT 0xc
#define VGT_DEBUG_REG0__rcm_busy_MASK 0x2000
#define VGT_DEBUG_REG0__rcm_busy__SHIFT 0xd
#define VGT_DEBUG_REG0__tm_busy_MASK 0x4000
#define VGT_DEBUG_REG0__tm_busy__SHIFT 0xe
#define VGT_DEBUG_REG0__cm_busy_MASK 0x8000
#define VGT_DEBUG_REG0__cm_busy__SHIFT 0xf
#define VGT_DEBUG_REG0__gog_busy_MASK 0x10000
#define VGT_DEBUG_REG0__gog_busy__SHIFT 0x10
#define VGT_DEBUG_REG0__frmt_busy_MASK 0x20000
#define VGT_DEBUG_REG0__frmt_busy__SHIFT 0x11
#define VGT_DEBUG_REG0__SPARE10_MASK 0x40000
#define VGT_DEBUG_REG0__SPARE10__SHIFT 0x12
#define VGT_DEBUG_REG0__te11_pi_busy_MASK 0x80000
#define VGT_DEBUG_REG0__te11_pi_busy__SHIFT 0x13
#define VGT_DEBUG_REG0__SPARE3_MASK 0x100000
#define VGT_DEBUG_REG0__SPARE3__SHIFT 0x14
#define VGT_DEBUG_REG0__combined_out_busy_MASK 0x200000
#define VGT_DEBUG_REG0__combined_out_busy__SHIFT 0x15
#define VGT_DEBUG_REG0__spi_vs_interfaces_busy_MASK 0x400000
#define VGT_DEBUG_REG0__spi_vs_interfaces_busy__SHIFT 0x16
#define VGT_DEBUG_REG0__pa_interfaces_busy_MASK 0x800000
#define VGT_DEBUG_REG0__pa_interfaces_busy__SHIFT 0x17
#define VGT_DEBUG_REG0__reg_clk_busy_MASK 0x1000000
#define VGT_DEBUG_REG0__reg_clk_busy__SHIFT 0x18
#define VGT_DEBUG_REG0__SPARE2_MASK 0x2000000
#define VGT_DEBUG_REG0__SPARE2__SHIFT 0x19
#define VGT_DEBUG_REG0__core_clk_busy_MASK 0x4000000
#define VGT_DEBUG_REG0__core_clk_busy__SHIFT 0x1a
#define VGT_DEBUG_REG0__gs_clk_busy_MASK 0x8000000
#define VGT_DEBUG_REG0__gs_clk_busy__SHIFT 0x1b
#define VGT_DEBUG_REG0__SPARE1_MASK 0x10000000
#define VGT_DEBUG_REG0__SPARE1__SHIFT 0x1c
#define VGT_DEBUG_REG0__sclk_core_vld_MASK 0x20000000
#define VGT_DEBUG_REG0__sclk_core_vld__SHIFT 0x1d
#define VGT_DEBUG_REG0__sclk_gs_vld_MASK 0x40000000
#define VGT_DEBUG_REG0__sclk_gs_vld__SHIFT 0x1e
#define VGT_DEBUG_REG0__SPARE0_MASK 0x80000000
#define VGT_DEBUG_REG0__SPARE0__SHIFT 0x1f
#define VGT_DEBUG_REG1__SPARE9_MASK 0x1
#define VGT_DEBUG_REG1__SPARE9__SHIFT 0x0
#define VGT_DEBUG_REG1__SPARE8_MASK 0x2
#define VGT_DEBUG_REG1__SPARE8__SHIFT 0x1
#define VGT_DEBUG_REG1__SPARE7_MASK 0x4
#define VGT_DEBUG_REG1__SPARE7__SHIFT 0x2
#define VGT_DEBUG_REG1__SPARE6_MASK 0x8
#define VGT_DEBUG_REG1__SPARE6__SHIFT 0x3
#define VGT_DEBUG_REG1__SPARE5_MASK 0x10
#define VGT_DEBUG_REG1__SPARE5__SHIFT 0x4
#define VGT_DEBUG_REG1__SPARE4_MASK 0x20
#define VGT_DEBUG_REG1__SPARE4__SHIFT 0x5
#define VGT_DEBUG_REG1__SPARE3_MASK 0x40
#define VGT_DEBUG_REG1__SPARE3__SHIFT 0x6
#define VGT_DEBUG_REG1__SPARE2_MASK 0x80
#define VGT_DEBUG_REG1__SPARE2__SHIFT 0x7
#define VGT_DEBUG_REG1__SPARE1_MASK 0x100
#define VGT_DEBUG_REG1__SPARE1__SHIFT 0x8
#define VGT_DEBUG_REG1__SPARE0_MASK 0x200
#define VGT_DEBUG_REG1__SPARE0__SHIFT 0x9
#define VGT_DEBUG_REG1__pi_vr_valid_MASK 0x400
#define VGT_DEBUG_REG1__pi_vr_valid__SHIFT 0xa
#define VGT_DEBUG_REG1__vr_pi_read_MASK 0x800
#define VGT_DEBUG_REG1__vr_pi_read__SHIFT 0xb
#define VGT_DEBUG_REG1__pi_pt_valid_MASK 0x1000
#define VGT_DEBUG_REG1__pi_pt_valid__SHIFT 0xc
#define VGT_DEBUG_REG1__pt_pi_read_MASK 0x2000
#define VGT_DEBUG_REG1__pt_pi_read__SHIFT 0xd
#define VGT_DEBUG_REG1__pi_te_valid_MASK 0x4000
#define VGT_DEBUG_REG1__pi_te_valid__SHIFT 0xe
#define VGT_DEBUG_REG1__te_grp_read_MASK 0x8000
#define VGT_DEBUG_REG1__te_grp_read__SHIFT 0xf
#define VGT_DEBUG_REG1__vr_out_indx_valid_MASK 0x10000
#define VGT_DEBUG_REG1__vr_out_indx_valid__SHIFT 0x10
#define VGT_DEBUG_REG1__SPARE12_MASK 0x20000
#define VGT_DEBUG_REG1__SPARE12__SHIFT 0x11
#define VGT_DEBUG_REG1__vr_out_prim_valid_MASK 0x40000
#define VGT_DEBUG_REG1__vr_out_prim_valid__SHIFT 0x12
#define VGT_DEBUG_REG1__SPARE11_MASK 0x80000
#define VGT_DEBUG_REG1__SPARE11__SHIFT 0x13
#define VGT_DEBUG_REG1__pt_out_indx_valid_MASK 0x100000
#define VGT_DEBUG_REG1__pt_out_indx_valid__SHIFT 0x14
#define VGT_DEBUG_REG1__SPARE10_MASK 0x200000
#define VGT_DEBUG_REG1__SPARE10__SHIFT 0x15
#define VGT_DEBUG_REG1__pt_out_prim_valid_MASK 0x400000
#define VGT_DEBUG_REG1__pt_out_prim_valid__SHIFT 0x16
#define VGT_DEBUG_REG1__SPARE23_MASK 0x800000
#define VGT_DEBUG_REG1__SPARE23__SHIFT 0x17
#define VGT_DEBUG_REG1__te_out_data_valid_MASK 0x1000000
#define VGT_DEBUG_REG1__te_out_data_valid__SHIFT 0x18
#define VGT_DEBUG_REG1__SPARE25_MASK 0x2000000
#define VGT_DEBUG_REG1__SPARE25__SHIFT 0x19
#define VGT_DEBUG_REG1__pi_gs_valid_MASK 0x4000000
#define VGT_DEBUG_REG1__pi_gs_valid__SHIFT 0x1a
#define VGT_DEBUG_REG1__gs_pi_read_MASK 0x8000000
#define VGT_DEBUG_REG1__gs_pi_read__SHIFT 0x1b
#define VGT_DEBUG_REG1__gog_out_indx_valid_MASK 0x10000000
#define VGT_DEBUG_REG1__gog_out_indx_valid__SHIFT 0x1c
#define VGT_DEBUG_REG1__out_indx_read_MASK 0x20000000
#define VGT_DEBUG_REG1__out_indx_read__SHIFT 0x1d
#define VGT_DEBUG_REG1__gog_out_prim_valid_MASK 0x40000000
#define VGT_DEBUG_REG1__gog_out_prim_valid__SHIFT 0x1e
#define VGT_DEBUG_REG1__out_prim_read_MASK 0x80000000
#define VGT_DEBUG_REG1__out_prim_read__SHIFT 0x1f
#define VGT_DEBUG_REG2__hs_grp_busy_MASK 0x1
#define VGT_DEBUG_REG2__hs_grp_busy__SHIFT 0x0
#define VGT_DEBUG_REG2__hs_noif_busy_MASK 0x2
#define VGT_DEBUG_REG2__hs_noif_busy__SHIFT 0x1
#define VGT_DEBUG_REG2__tfmmIsBusy_MASK 0x4
#define VGT_DEBUG_REG2__tfmmIsBusy__SHIFT 0x2
#define VGT_DEBUG_REG2__lsVertIfBusy_0_MASK 0x8
#define VGT_DEBUG_REG2__lsVertIfBusy_0__SHIFT 0x3
#define VGT_DEBUG_REG2__te11_hs_tess_input_rtr_MASK 0x10
#define VGT_DEBUG_REG2__te11_hs_tess_input_rtr__SHIFT 0x4
#define VGT_DEBUG_REG2__lsWaveIfBusy_0_MASK 0x20
#define VGT_DEBUG_REG2__lsWaveIfBusy_0__SHIFT 0x5
#define VGT_DEBUG_REG2__hs_te11_tess_input_rts_MASK 0x40
#define VGT_DEBUG_REG2__hs_te11_tess_input_rts__SHIFT 0x6
#define VGT_DEBUG_REG2__grpModBusy_MASK 0x80
#define VGT_DEBUG_REG2__grpModBusy__SHIFT 0x7
#define VGT_DEBUG_REG2__lsVertFifoEmpty_MASK 0x100
#define VGT_DEBUG_REG2__lsVertFifoEmpty__SHIFT 0x8
#define VGT_DEBUG_REG2__lsWaveFifoEmpty_MASK 0x200
#define VGT_DEBUG_REG2__lsWaveFifoEmpty__SHIFT 0x9
#define VGT_DEBUG_REG2__hsVertFifoEmpty_MASK 0x400
#define VGT_DEBUG_REG2__hsVertFifoEmpty__SHIFT 0xa
#define VGT_DEBUG_REG2__hsWaveFifoEmpty_MASK 0x800
#define VGT_DEBUG_REG2__hsWaveFifoEmpty__SHIFT 0xb
#define VGT_DEBUG_REG2__hsInputFifoEmpty_MASK 0x1000
#define VGT_DEBUG_REG2__hsInputFifoEmpty__SHIFT 0xc
#define VGT_DEBUG_REG2__hsTifFifoEmpty_MASK 0x2000
#define VGT_DEBUG_REG2__hsTifFifoEmpty__SHIFT 0xd
#define VGT_DEBUG_REG2__lsVertFifoFull_MASK 0x4000
#define VGT_DEBUG_REG2__lsVertFifoFull__SHIFT 0xe
#define VGT_DEBUG_REG2__lsWaveFifoFull_MASK 0x8000
#define VGT_DEBUG_REG2__lsWaveFifoFull__SHIFT 0xf
#define VGT_DEBUG_REG2__hsVertFifoFull_MASK 0x10000
#define VGT_DEBUG_REG2__hsVertFifoFull__SHIFT 0x10
#define VGT_DEBUG_REG2__hsWaveFifoFull_MASK 0x20000
#define VGT_DEBUG_REG2__hsWaveFifoFull__SHIFT 0x11
#define VGT_DEBUG_REG2__hsInputFifoFull_MASK 0x40000
#define VGT_DEBUG_REG2__hsInputFifoFull__SHIFT 0x12
#define VGT_DEBUG_REG2__hsTifFifoFull_MASK 0x80000
#define VGT_DEBUG_REG2__hsTifFifoFull__SHIFT 0x13
#define VGT_DEBUG_REG2__p0_rtr_MASK 0x100000
#define VGT_DEBUG_REG2__p0_rtr__SHIFT 0x14
#define VGT_DEBUG_REG2__p1_rtr_MASK 0x200000
#define VGT_DEBUG_REG2__p1_rtr__SHIFT 0x15
#define VGT_DEBUG_REG2__p0_dr_MASK 0x400000
#define VGT_DEBUG_REG2__p0_dr__SHIFT 0x16
#define VGT_DEBUG_REG2__p1_dr_MASK 0x800000
#define VGT_DEBUG_REG2__p1_dr__SHIFT 0x17
#define VGT_DEBUG_REG2__p0_rts_MASK 0x1000000
#define VGT_DEBUG_REG2__p0_rts__SHIFT 0x18
#define VGT_DEBUG_REG2__p1_rts_MASK 0x2000000
#define VGT_DEBUG_REG2__p1_rts__SHIFT 0x19
#define VGT_DEBUG_REG2__ls_sh_id_MASK 0x4000000
#define VGT_DEBUG_REG2__ls_sh_id__SHIFT 0x1a
#define VGT_DEBUG_REG2__lsFwaveFlag_MASK 0x8000000
#define VGT_DEBUG_REG2__lsFwaveFlag__SHIFT 0x1b
#define VGT_DEBUG_REG2__lsWaveSendFlush_MASK 0x10000000
#define VGT_DEBUG_REG2__lsWaveSendFlush__SHIFT 0x1c
#define VGT_DEBUG_REG2__SPARE_MASK 0xe0000000
#define VGT_DEBUG_REG2__SPARE__SHIFT 0x1d
#define VGT_DEBUG_REG3__lsTgRelInd_MASK 0xfff
#define VGT_DEBUG_REG3__lsTgRelInd__SHIFT 0x0
#define VGT_DEBUG_REG3__lsWaveRelInd_MASK 0x3f000
#define VGT_DEBUG_REG3__lsWaveRelInd__SHIFT 0xc
#define VGT_DEBUG_REG3__lsPatchCnt_MASK 0x3fc0000
#define VGT_DEBUG_REG3__lsPatchCnt__SHIFT 0x12
#define VGT_DEBUG_REG3__hsWaveRelInd_MASK 0xfc000000
#define VGT_DEBUG_REG3__hsWaveRelInd__SHIFT 0x1a
#define VGT_DEBUG_REG4__hsPatchCnt_MASK 0xff
#define VGT_DEBUG_REG4__hsPatchCnt__SHIFT 0x0
#define VGT_DEBUG_REG4__hsPrimId_15_0_MASK 0xffff00
#define VGT_DEBUG_REG4__hsPrimId_15_0__SHIFT 0x8
#define VGT_DEBUG_REG4__hsCpCnt_MASK 0x1f000000
#define VGT_DEBUG_REG4__hsCpCnt__SHIFT 0x18
#define VGT_DEBUG_REG4__hsWaveSendFlush_MASK 0x20000000
#define VGT_DEBUG_REG4__hsWaveSendFlush__SHIFT 0x1d
#define VGT_DEBUG_REG4__hsFwaveFlag_MASK 0x40000000
#define VGT_DEBUG_REG4__hsFwaveFlag__SHIFT 0x1e
#define VGT_DEBUG_REG4__SPARE_MASK 0x80000000
#define VGT_DEBUG_REG4__SPARE__SHIFT 0x1f
#define VGT_DEBUG_REG5__SPARE4_MASK 0x7
#define VGT_DEBUG_REG5__SPARE4__SHIFT 0x0
#define VGT_DEBUG_REG5__hsWaveCreditCnt_0_MASK 0xf8
#define VGT_DEBUG_REG5__hsWaveCreditCnt_0__SHIFT 0x3
#define VGT_DEBUG_REG5__SPARE3_MASK 0x700
#define VGT_DEBUG_REG5__SPARE3__SHIFT 0x8
#define VGT_DEBUG_REG5__hsVertCreditCnt_0_MASK 0xf800
#define VGT_DEBUG_REG5__hsVertCreditCnt_0__SHIFT 0xb
#define VGT_DEBUG_REG5__SPARE2_MASK 0x70000
#define VGT_DEBUG_REG5__SPARE2__SHIFT 0x10
#define VGT_DEBUG_REG5__lsWaveCreditCnt_0_MASK 0xf80000
#define VGT_DEBUG_REG5__lsWaveCreditCnt_0__SHIFT 0x13
#define VGT_DEBUG_REG5__SPARE1_MASK 0x7000000
#define VGT_DEBUG_REG5__SPARE1__SHIFT 0x18
#define VGT_DEBUG_REG5__lsVertCreditCnt_0_MASK 0xf8000000
#define VGT_DEBUG_REG5__lsVertCreditCnt_0__SHIFT 0x1b
#define VGT_DEBUG_REG6__debug_BASE_MASK 0xffff
#define VGT_DEBUG_REG6__debug_BASE__SHIFT 0x0
#define VGT_DEBUG_REG6__debug_SIZE_MASK 0xffff0000
#define VGT_DEBUG_REG6__debug_SIZE__SHIFT 0x10
#define VGT_DEBUG_REG7__debug_tfmmFifoEmpty_MASK 0x1
#define VGT_DEBUG_REG7__debug_tfmmFifoEmpty__SHIFT 0x0
#define VGT_DEBUG_REG7__debug_tfmmFifoFull_MASK 0x2
#define VGT_DEBUG_REG7__debug_tfmmFifoFull__SHIFT 0x1
#define VGT_DEBUG_REG7__hs_pipe0_dr_MASK 0x4
#define VGT_DEBUG_REG7__hs_pipe0_dr__SHIFT 0x2
#define VGT_DEBUG_REG7__hs_pipe0_rtr_MASK 0x8
#define VGT_DEBUG_REG7__hs_pipe0_rtr__SHIFT 0x3
#define VGT_DEBUG_REG7__hs_pipe1_rtr_MASK 0x10
#define VGT_DEBUG_REG7__hs_pipe1_rtr__SHIFT 0x4
#define VGT_DEBUG_REG7__SPARE_MASK 0xffe0
#define VGT_DEBUG_REG7__SPARE__SHIFT 0x5
#define VGT_DEBUG_REG7__TF_addr_MASK 0xffff0000
#define VGT_DEBUG_REG7__TF_addr__SHIFT 0x10
#define VGT_DEBUG_REG8__rcm_busy_q_MASK 0x1
#define VGT_DEBUG_REG8__rcm_busy_q__SHIFT 0x0
#define VGT_DEBUG_REG8__rcm_noif_busy_q_MASK 0x2
#define VGT_DEBUG_REG8__rcm_noif_busy_q__SHIFT 0x1
#define VGT_DEBUG_REG8__r1_inst_rtr_MASK 0x4
#define VGT_DEBUG_REG8__r1_inst_rtr__SHIFT 0x2
#define VGT_DEBUG_REG8__spi_gsprim_fifo_busy_q_MASK 0x8
#define VGT_DEBUG_REG8__spi_gsprim_fifo_busy_q__SHIFT 0x3
#define VGT_DEBUG_REG8__spi_esvert_fifo_busy_q_MASK 0x10
#define VGT_DEBUG_REG8__spi_esvert_fifo_busy_q__SHIFT 0x4
#define VGT_DEBUG_REG8__gs_tbl_valid_r3_q_MASK 0x20
#define VGT_DEBUG_REG8__gs_tbl_valid_r3_q__SHIFT 0x5
#define VGT_DEBUG_REG8__valid_r0_q_MASK 0x40
#define VGT_DEBUG_REG8__valid_r0_q__SHIFT 0x6
#define VGT_DEBUG_REG8__valid_r1_q_MASK 0x80
#define VGT_DEBUG_REG8__valid_r1_q__SHIFT 0x7
#define VGT_DEBUG_REG8__valid_r2_MASK 0x100
#define VGT_DEBUG_REG8__valid_r2__SHIFT 0x8
#define VGT_DEBUG_REG8__valid_r2_q_MASK 0x200
#define VGT_DEBUG_REG8__valid_r2_q__SHIFT 0x9
#define VGT_DEBUG_REG8__r0_rtr_MASK 0x400
#define VGT_DEBUG_REG8__r0_rtr__SHIFT 0xa
#define VGT_DEBUG_REG8__r1_rtr_MASK 0x800
#define VGT_DEBUG_REG8__r1_rtr__SHIFT 0xb
#define VGT_DEBUG_REG8__r2_indx_rtr_MASK 0x1000
#define VGT_DEBUG_REG8__r2_indx_rtr__SHIFT 0xc
#define VGT_DEBUG_REG8__r2_rtr_MASK 0x2000
#define VGT_DEBUG_REG8__r2_rtr__SHIFT 0xd
#define VGT_DEBUG_REG8__es_gs_rtr_MASK 0x4000
#define VGT_DEBUG_REG8__es_gs_rtr__SHIFT 0xe
#define VGT_DEBUG_REG8__gs_event_fifo_rtr_MASK 0x8000
#define VGT_DEBUG_REG8__gs_event_fifo_rtr__SHIFT 0xf
#define VGT_DEBUG_REG8__tm_rcm_gs_event_rtr_MASK 0x10000
#define VGT_DEBUG_REG8__tm_rcm_gs_event_rtr__SHIFT 0x10
#define VGT_DEBUG_REG8__gs_tbl_r3_rtr_MASK 0x20000
#define VGT_DEBUG_REG8__gs_tbl_r3_rtr__SHIFT 0x11
#define VGT_DEBUG_REG8__prim_skid_fifo_empty_MASK 0x40000
#define VGT_DEBUG_REG8__prim_skid_fifo_empty__SHIFT 0x12
#define VGT_DEBUG_REG8__VGT_SPI_gsprim_rtr_q_MASK 0x80000
#define VGT_DEBUG_REG8__VGT_SPI_gsprim_rtr_q__SHIFT 0x13
#define VGT_DEBUG_REG8__tm_rcm_gs_tbl_rtr_MASK 0x100000
#define VGT_DEBUG_REG8__tm_rcm_gs_tbl_rtr__SHIFT 0x14
#define VGT_DEBUG_REG8__tm_rcm_es_tbl_rtr_MASK 0x200000
#define VGT_DEBUG_REG8__tm_rcm_es_tbl_rtr__SHIFT 0x15
#define VGT_DEBUG_REG8__VGT_SPI_esvert_rtr_q_MASK 0x400000
#define VGT_DEBUG_REG8__VGT_SPI_esvert_rtr_q__SHIFT 0x16
#define VGT_DEBUG_REG8__r2_no_bp_rtr_MASK 0x800000
#define VGT_DEBUG_REG8__r2_no_bp_rtr__SHIFT 0x17
#define VGT_DEBUG_REG8__hold_for_es_flush_MASK 0x1000000
#define VGT_DEBUG_REG8__hold_for_es_flush__SHIFT 0x18
#define VGT_DEBUG_REG8__gs_event_fifo_empty_MASK 0x2000000
#define VGT_DEBUG_REG8__gs_event_fifo_empty__SHIFT 0x19
#define VGT_DEBUG_REG8__gsprim_buff_empty_q_MASK 0x4000000
#define VGT_DEBUG_REG8__gsprim_buff_empty_q__SHIFT 0x1a
#define VGT_DEBUG_REG8__gsprim_buff_full_q_MASK 0x8000000
#define VGT_DEBUG_REG8__gsprim_buff_full_q__SHIFT 0x1b
#define VGT_DEBUG_REG8__te_prim_fifo_empty_MASK 0x10000000
#define VGT_DEBUG_REG8__te_prim_fifo_empty__SHIFT 0x1c
#define VGT_DEBUG_REG8__te_prim_fifo_full_MASK 0x20000000
#define VGT_DEBUG_REG8__te_prim_fifo_full__SHIFT 0x1d
#define VGT_DEBUG_REG8__te_vert_fifo_empty_MASK 0x40000000
#define VGT_DEBUG_REG8__te_vert_fifo_empty__SHIFT 0x1e
#define VGT_DEBUG_REG8__te_vert_fifo_full_MASK 0x80000000
#define VGT_DEBUG_REG8__te_vert_fifo_full__SHIFT 0x1f
#define VGT_DEBUG_REG9__indices_to_send_r2_q_MASK 0x3
#define VGT_DEBUG_REG9__indices_to_send_r2_q__SHIFT 0x0
#define VGT_DEBUG_REG9__valid_indices_r3_MASK 0x4
#define VGT_DEBUG_REG9__valid_indices_r3__SHIFT 0x2
#define VGT_DEBUG_REG9__gs_eov_r3_MASK 0x8
#define VGT_DEBUG_REG9__gs_eov_r3__SHIFT 0x3
#define VGT_DEBUG_REG9__eop_indx_r3_MASK 0x10
#define VGT_DEBUG_REG9__eop_indx_r3__SHIFT 0x4
#define VGT_DEBUG_REG9__eop_prim_r3_MASK 0x20
#define VGT_DEBUG_REG9__eop_prim_r3__SHIFT 0x5
#define VGT_DEBUG_REG9__es_eov_r3_MASK 0x40
#define VGT_DEBUG_REG9__es_eov_r3__SHIFT 0x6
#define VGT_DEBUG_REG9__es_tbl_state_r3_q_0_MASK 0x80
#define VGT_DEBUG_REG9__es_tbl_state_r3_q_0__SHIFT 0x7
#define VGT_DEBUG_REG9__pending_es_send_r3_q_MASK 0x100
#define VGT_DEBUG_REG9__pending_es_send_r3_q__SHIFT 0x8
#define VGT_DEBUG_REG9__pending_es_flush_r3_MASK 0x200
#define VGT_DEBUG_REG9__pending_es_flush_r3__SHIFT 0x9
#define VGT_DEBUG_REG9__gs_tbl_num_es_per_gs_r3_q_not_0_MASK 0x400
#define VGT_DEBUG_REG9__gs_tbl_num_es_per_gs_r3_q_not_0__SHIFT 0xa
#define VGT_DEBUG_REG9__gs_tbl_prim_cnt_r3_q_MASK 0x3f800
#define VGT_DEBUG_REG9__gs_tbl_prim_cnt_r3_q__SHIFT 0xb
#define VGT_DEBUG_REG9__gs_tbl_eop_r3_q_MASK 0x40000
#define VGT_DEBUG_REG9__gs_tbl_eop_r3_q__SHIFT 0x12
#define VGT_DEBUG_REG9__gs_tbl_state_r3_q_MASK 0x380000
#define VGT_DEBUG_REG9__gs_tbl_state_r3_q__SHIFT 0x13
#define VGT_DEBUG_REG9__gs_pending_state_r3_q_MASK 0x400000
#define VGT_DEBUG_REG9__gs_pending_state_r3_q__SHIFT 0x16
#define VGT_DEBUG_REG9__invalidate_rb_roll_over_q_MASK 0x800000
#define VGT_DEBUG_REG9__invalidate_rb_roll_over_q__SHIFT 0x17
#define VGT_DEBUG_REG9__gs_instancing_state_q_MASK 0x1000000
#define VGT_DEBUG_REG9__gs_instancing_state_q__SHIFT 0x18
#define VGT_DEBUG_REG9__es_per_gs_vert_cnt_r3_q_not_0_MASK 0x2000000
#define VGT_DEBUG_REG9__es_per_gs_vert_cnt_r3_q_not_0__SHIFT 0x19
#define VGT_DEBUG_REG9__gs_prim_per_es_ctr_r3_q_not_0_MASK 0x4000000
#define VGT_DEBUG_REG9__gs_prim_per_es_ctr_r3_q_not_0__SHIFT 0x1a
#define VGT_DEBUG_REG9__pre_r0_rtr_MASK 0x8000000
#define VGT_DEBUG_REG9__pre_r0_rtr__SHIFT 0x1b
#define VGT_DEBUG_REG9__valid_r3_q_MASK 0x10000000
#define VGT_DEBUG_REG9__valid_r3_q__SHIFT 0x1c
#define VGT_DEBUG_REG9__valid_pre_r0_q_MASK 0x20000000
#define VGT_DEBUG_REG9__valid_pre_r0_q__SHIFT 0x1d
#define VGT_DEBUG_REG9__SPARE0_MASK 0x40000000
#define VGT_DEBUG_REG9__SPARE0__SHIFT 0x1e
#define VGT_DEBUG_REG9__off_chip_hs_r2_q_MASK 0x80000000
#define VGT_DEBUG_REG9__off_chip_hs_r2_q__SHIFT 0x1f
#define VGT_DEBUG_REG10__index_buffer_depth_r1_q_MASK 0x1f
#define VGT_DEBUG_REG10__index_buffer_depth_r1_q__SHIFT 0x0
#define VGT_DEBUG_REG10__eopg_r2_q_MASK 0x20
#define VGT_DEBUG_REG10__eopg_r2_q__SHIFT 0x5
#define VGT_DEBUG_REG10__eotg_r2_q_MASK 0x40
#define VGT_DEBUG_REG10__eotg_r2_q__SHIFT 0x6
#define VGT_DEBUG_REG10__onchip_gs_en_r0_q_MASK 0x180
#define VGT_DEBUG_REG10__onchip_gs_en_r0_q__SHIFT 0x7
#define VGT_DEBUG_REG10__SPARE2_MASK 0x600
#define VGT_DEBUG_REG10__SPARE2__SHIFT 0x9
#define VGT_DEBUG_REG10__rcm_mem_gsprim_re_qq_MASK 0x800
#define VGT_DEBUG_REG10__rcm_mem_gsprim_re_qq__SHIFT 0xb
#define VGT_DEBUG_REG10__rcm_mem_gsprim_re_q_MASK 0x1000
#define VGT_DEBUG_REG10__rcm_mem_gsprim_re_q__SHIFT 0xc
#define VGT_DEBUG_REG10__gs_rb_space_avail_r3_q_9_0_MASK 0x7fe000
#define VGT_DEBUG_REG10__gs_rb_space_avail_r3_q_9_0__SHIFT 0xd
#define VGT_DEBUG_REG10__es_rb_space_avail_r2_q_8_0_MASK 0xff800000
#define VGT_DEBUG_REG10__es_rb_space_avail_r2_q_8_0__SHIFT 0x17
#define VGT_DEBUG_REG11__tm_busy_q_MASK 0x1
#define VGT_DEBUG_REG11__tm_busy_q__SHIFT 0x0
#define VGT_DEBUG_REG11__tm_noif_busy_q_MASK 0x2
#define VGT_DEBUG_REG11__tm_noif_busy_q__SHIFT 0x1
#define VGT_DEBUG_REG11__tm_out_busy_q_MASK 0x4
#define VGT_DEBUG_REG11__tm_out_busy_q__SHIFT 0x2
#define VGT_DEBUG_REG11__es_rb_dealloc_fifo_busy_MASK 0x8
#define VGT_DEBUG_REG11__es_rb_dealloc_fifo_busy__SHIFT 0x3
#define VGT_DEBUG_REG11__vs_dealloc_tbl_busy_MASK 0x10
#define VGT_DEBUG_REG11__vs_dealloc_tbl_busy__SHIFT 0x4
#define VGT_DEBUG_REG11__SPARE1_MASK 0x20
#define VGT_DEBUG_REG11__SPARE1__SHIFT 0x5
#define VGT_DEBUG_REG11__spi_gsthread_fifo_busy_MASK 0x40
#define VGT_DEBUG_REG11__spi_gsthread_fifo_busy__SHIFT 0x6
#define VGT_DEBUG_REG11__spi_esthread_fifo_busy_MASK 0x80
#define VGT_DEBUG_REG11__spi_esthread_fifo_busy__SHIFT 0x7
#define VGT_DEBUG_REG11__hold_eswave_MASK 0x100
#define VGT_DEBUG_REG11__hold_eswave__SHIFT 0x8
#define VGT_DEBUG_REG11__es_rb_roll_over_r3_MASK 0x200
#define VGT_DEBUG_REG11__es_rb_roll_over_r3__SHIFT 0x9
#define VGT_DEBUG_REG11__counters_busy_r0_MASK 0x400
#define VGT_DEBUG_REG11__counters_busy_r0__SHIFT 0xa
#define VGT_DEBUG_REG11__counters_avail_r0_MASK 0x800
#define VGT_DEBUG_REG11__counters_avail_r0__SHIFT 0xb
#define VGT_DEBUG_REG11__counters_available_r0_MASK 0x1000
#define VGT_DEBUG_REG11__counters_available_r0__SHIFT 0xc
#define VGT_DEBUG_REG11__vs_event_fifo_rtr_MASK 0x2000
#define VGT_DEBUG_REG11__vs_event_fifo_rtr__SHIFT 0xd
#define VGT_DEBUG_REG11__VGT_SPI_gsthread_rtr_q_MASK 0x4000
#define VGT_DEBUG_REG11__VGT_SPI_gsthread_rtr_q__SHIFT 0xe
#define VGT_DEBUG_REG11__VGT_SPI_esthread_rtr_q_MASK 0x8000
#define VGT_DEBUG_REG11__VGT_SPI_esthread_rtr_q__SHIFT 0xf
#define VGT_DEBUG_REG11__gs_issue_rtr_MASK 0x10000
#define VGT_DEBUG_REG11__gs_issue_rtr__SHIFT 0x10
#define VGT_DEBUG_REG11__tm_pt_event_rtr_MASK 0x20000
#define VGT_DEBUG_REG11__tm_pt_event_rtr__SHIFT 0x11
#define VGT_DEBUG_REG11__SPARE0_MASK 0x40000
#define VGT_DEBUG_REG11__SPARE0__SHIFT 0x12
#define VGT_DEBUG_REG11__gs_r0_rtr_MASK 0x80000
#define VGT_DEBUG_REG11__gs_r0_rtr__SHIFT 0x13
#define VGT_DEBUG_REG11__es_r0_rtr_MASK 0x100000
#define VGT_DEBUG_REG11__es_r0_rtr__SHIFT 0x14
#define VGT_DEBUG_REG11__gog_tm_vs_event_rtr_MASK 0x200000
#define VGT_DEBUG_REG11__gog_tm_vs_event_rtr__SHIFT 0x15
#define VGT_DEBUG_REG11__tm_rcm_gs_event_rtr_MASK 0x400000
#define VGT_DEBUG_REG11__tm_rcm_gs_event_rtr__SHIFT 0x16
#define VGT_DEBUG_REG11__tm_rcm_gs_tbl_rtr_MASK 0x800000
#define VGT_DEBUG_REG11__tm_rcm_gs_tbl_rtr__SHIFT 0x17
#define VGT_DEBUG_REG11__tm_rcm_es_tbl_rtr_MASK 0x1000000
#define VGT_DEBUG_REG11__tm_rcm_es_tbl_rtr__SHIFT 0x18
#define VGT_DEBUG_REG11__vs_event_fifo_empty_MASK 0x2000000
#define VGT_DEBUG_REG11__vs_event_fifo_empty__SHIFT 0x19
#define VGT_DEBUG_REG11__vs_event_fifo_full_MASK 0x4000000
#define VGT_DEBUG_REG11__vs_event_fifo_full__SHIFT 0x1a
#define VGT_DEBUG_REG11__es_rb_dealloc_fifo_full_MASK 0x8000000
#define VGT_DEBUG_REG11__es_rb_dealloc_fifo_full__SHIFT 0x1b
#define VGT_DEBUG_REG11__vs_dealloc_tbl_full_MASK 0x10000000
#define VGT_DEBUG_REG11__vs_dealloc_tbl_full__SHIFT 0x1c
#define VGT_DEBUG_REG11__send_event_q_MASK 0x20000000
#define VGT_DEBUG_REG11__send_event_q__SHIFT 0x1d
#define VGT_DEBUG_REG11__es_tbl_empty_MASK 0x40000000
#define VGT_DEBUG_REG11__es_tbl_empty__SHIFT 0x1e
#define VGT_DEBUG_REG11__no_active_states_r0_MASK 0x80000000
#define VGT_DEBUG_REG11__no_active_states_r0__SHIFT 0x1f
#define VGT_DEBUG_REG12__gs_state0_r0_q_MASK 0x7
#define VGT_DEBUG_REG12__gs_state0_r0_q__SHIFT 0x0
#define VGT_DEBUG_REG12__gs_state1_r0_q_MASK 0x38
#define VGT_DEBUG_REG12__gs_state1_r0_q__SHIFT 0x3
#define VGT_DEBUG_REG12__gs_state2_r0_q_MASK 0x1c0
#define VGT_DEBUG_REG12__gs_state2_r0_q__SHIFT 0x6
#define VGT_DEBUG_REG12__gs_state3_r0_q_MASK 0xe00
#define VGT_DEBUG_REG12__gs_state3_r0_q__SHIFT 0x9
#define VGT_DEBUG_REG12__gs_state4_r0_q_MASK 0x7000
#define VGT_DEBUG_REG12__gs_state4_r0_q__SHIFT 0xc
#define VGT_DEBUG_REG12__gs_state5_r0_q_MASK 0x38000
#define VGT_DEBUG_REG12__gs_state5_r0_q__SHIFT 0xf
#define VGT_DEBUG_REG12__gs_state6_r0_q_MASK 0x1c0000
#define VGT_DEBUG_REG12__gs_state6_r0_q__SHIFT 0x12
#define VGT_DEBUG_REG12__gs_state7_r0_q_MASK 0xe00000
#define VGT_DEBUG_REG12__gs_state7_r0_q__SHIFT 0x15
#define VGT_DEBUG_REG12__gs_state8_r0_q_MASK 0x7000000
#define VGT_DEBUG_REG12__gs_state8_r0_q__SHIFT 0x18
#define VGT_DEBUG_REG12__gs_state9_r0_q_MASK 0x38000000
#define VGT_DEBUG_REG12__gs_state9_r0_q__SHIFT 0x1b
#define VGT_DEBUG_REG12__hold_eswave_eop_MASK 0x40000000
#define VGT_DEBUG_REG12__hold_eswave_eop__SHIFT 0x1e
#define VGT_DEBUG_REG12__SPARE0_MASK 0x80000000
#define VGT_DEBUG_REG12__SPARE0__SHIFT 0x1f
#define VGT_DEBUG_REG13__gs_state10_r0_q_MASK 0x7
#define VGT_DEBUG_REG13__gs_state10_r0_q__SHIFT 0x0
#define VGT_DEBUG_REG13__gs_state11_r0_q_MASK 0x38
#define VGT_DEBUG_REG13__gs_state11_r0_q__SHIFT 0x3
#define VGT_DEBUG_REG13__gs_state12_r0_q_MASK 0x1c0
#define VGT_DEBUG_REG13__gs_state12_r0_q__SHIFT 0x6
#define VGT_DEBUG_REG13__gs_state13_r0_q_MASK 0xe00
#define VGT_DEBUG_REG13__gs_state13_r0_q__SHIFT 0x9
#define VGT_DEBUG_REG13__gs_state14_r0_q_MASK 0x7000
#define VGT_DEBUG_REG13__gs_state14_r0_q__SHIFT 0xc
#define VGT_DEBUG_REG13__gs_state15_r0_q_MASK 0x38000
#define VGT_DEBUG_REG13__gs_state15_r0_q__SHIFT 0xf
#define VGT_DEBUG_REG13__gs_tbl_wrptr_r0_q_3_0_MASK 0x3c0000
#define VGT_DEBUG_REG13__gs_tbl_wrptr_r0_q_3_0__SHIFT 0x12
#define VGT_DEBUG_REG13__gsfetch_done_fifo_cnt_q_not_0_MASK 0x400000
#define VGT_DEBUG_REG13__gsfetch_done_fifo_cnt_q_not_0__SHIFT 0x16
#define VGT_DEBUG_REG13__gsfetch_done_cnt_q_not_0_MASK 0x800000
#define VGT_DEBUG_REG13__gsfetch_done_cnt_q_not_0__SHIFT 0x17
#define VGT_DEBUG_REG13__es_tbl_full_MASK 0x1000000
#define VGT_DEBUG_REG13__es_tbl_full__SHIFT 0x18
#define VGT_DEBUG_REG13__SPARE1_MASK 0x2000000
#define VGT_DEBUG_REG13__SPARE1__SHIFT 0x19
#define VGT_DEBUG_REG13__SPARE0_MASK 0x4000000
#define VGT_DEBUG_REG13__SPARE0__SHIFT 0x1a
#define VGT_DEBUG_REG13__active_cm_sm_r0_q_MASK 0xf8000000
#define VGT_DEBUG_REG13__active_cm_sm_r0_q__SHIFT 0x1b
#define VGT_DEBUG_REG14__SPARE3_MASK 0xf
#define VGT_DEBUG_REG14__SPARE3__SHIFT 0x0
#define VGT_DEBUG_REG14__gsfetch_done_fifo_full_MASK 0x10
#define VGT_DEBUG_REG14__gsfetch_done_fifo_full__SHIFT 0x4
#define VGT_DEBUG_REG14__gs_rb_space_avail_r0_MASK 0x20
#define VGT_DEBUG_REG14__gs_rb_space_avail_r0__SHIFT 0x5
#define VGT_DEBUG_REG14__smx_es_done_cnt_r0_q_not_0_MASK 0x40
#define VGT_DEBUG_REG14__smx_es_done_cnt_r0_q_not_0__SHIFT 0x6
#define VGT_DEBUG_REG14__SPARE8_MASK 0x180
#define VGT_DEBUG_REG14__SPARE8__SHIFT 0x7
#define VGT_DEBUG_REG14__vs_done_cnt_q_not_0_MASK 0x200
#define VGT_DEBUG_REG14__vs_done_cnt_q_not_0__SHIFT 0x9
#define VGT_DEBUG_REG14__es_flush_cnt_busy_q_MASK 0x400
#define VGT_DEBUG_REG14__es_flush_cnt_busy_q__SHIFT 0xa
#define VGT_DEBUG_REG14__gs_tbl_full_r0_MASK 0x800
#define VGT_DEBUG_REG14__gs_tbl_full_r0__SHIFT 0xb
#define VGT_DEBUG_REG14__SPARE2_MASK 0x1ff000
#define VGT_DEBUG_REG14__SPARE2__SHIFT 0xc
#define VGT_DEBUG_REG14__se1spi_gsthread_fifo_busy_MASK 0x200000
#define VGT_DEBUG_REG14__se1spi_gsthread_fifo_busy__SHIFT 0x15
#define VGT_DEBUG_REG14__SPARE_MASK 0x1c00000
#define VGT_DEBUG_REG14__SPARE__SHIFT 0x16
#define VGT_DEBUG_REG14__VGT_SE1SPI_gsthread_rtr_q_MASK 0x2000000
#define VGT_DEBUG_REG14__VGT_SE1SPI_gsthread_rtr_q__SHIFT 0x19
#define VGT_DEBUG_REG14__smx1_es_done_cnt_r0_q_not_0_MASK 0x4000000
#define VGT_DEBUG_REG14__smx1_es_done_cnt_r0_q_not_0__SHIFT 0x1a
#define VGT_DEBUG_REG14__se1spi_esthread_fifo_busy_MASK 0x8000000
#define VGT_DEBUG_REG14__se1spi_esthread_fifo_busy__SHIFT 0x1b
#define VGT_DEBUG_REG14__SPARE1_MASK 0x10000000
#define VGT_DEBUG_REG14__SPARE1__SHIFT 0x1c
#define VGT_DEBUG_REG14__gsfetch_done_se1_cnt_q_not_0_MASK 0x20000000
#define VGT_DEBUG_REG14__gsfetch_done_se1_cnt_q_not_0__SHIFT 0x1d
#define VGT_DEBUG_REG14__SPARE0_MASK 0x40000000
#define VGT_DEBUG_REG14__SPARE0__SHIFT 0x1e
#define VGT_DEBUG_REG14__VGT_SE1SPI_esthread_rtr_q_MASK 0x80000000
#define VGT_DEBUG_REG14__VGT_SE1SPI_esthread_rtr_q__SHIFT 0x1f
#define VGT_DEBUG_REG15__cm_busy_q_MASK 0x1
#define VGT_DEBUG_REG15__cm_busy_q__SHIFT 0x0
#define VGT_DEBUG_REG15__counters_busy_q_MASK 0x2
#define VGT_DEBUG_REG15__counters_busy_q__SHIFT 0x1
#define VGT_DEBUG_REG15__output_fifo_empty_MASK 0x4
#define VGT_DEBUG_REG15__output_fifo_empty__SHIFT 0x2
#define VGT_DEBUG_REG15__output_fifo_full_MASK 0x8
#define VGT_DEBUG_REG15__output_fifo_full__SHIFT 0x3
#define VGT_DEBUG_REG15__counters_full_MASK 0x10
#define VGT_DEBUG_REG15__counters_full__SHIFT 0x4
#define VGT_DEBUG_REG15__active_sm_q_MASK 0x3e0
#define VGT_DEBUG_REG15__active_sm_q__SHIFT 0x5
#define VGT_DEBUG_REG15__entry_rdptr_q_MASK 0x7c00
#define VGT_DEBUG_REG15__entry_rdptr_q__SHIFT 0xa
#define VGT_DEBUG_REG15__cntr_tbl_wrptr_q_MASK 0xf8000
#define VGT_DEBUG_REG15__cntr_tbl_wrptr_q__SHIFT 0xf
#define VGT_DEBUG_REG15__SPARE25_MASK 0x3f00000
#define VGT_DEBUG_REG15__SPARE25__SHIFT 0x14
#define VGT_DEBUG_REG15__st_cut_mode_q_MASK 0xc000000
#define VGT_DEBUG_REG15__st_cut_mode_q__SHIFT 0x1a
#define VGT_DEBUG_REG15__gs_done_array_q_not_0_MASK 0x10000000
#define VGT_DEBUG_REG15__gs_done_array_q_not_0__SHIFT 0x1c
#define VGT_DEBUG_REG15__SPARE31_MASK 0xe0000000
#define VGT_DEBUG_REG15__SPARE31__SHIFT 0x1d
#define VGT_DEBUG_REG16__gog_busy_MASK 0x1
#define VGT_DEBUG_REG16__gog_busy__SHIFT 0x0
#define VGT_DEBUG_REG16__gog_state_q_MASK 0xe
#define VGT_DEBUG_REG16__gog_state_q__SHIFT 0x1
#define VGT_DEBUG_REG16__r0_rtr_MASK 0x10
#define VGT_DEBUG_REG16__r0_rtr__SHIFT 0x4
#define VGT_DEBUG_REG16__r1_rtr_MASK 0x20
#define VGT_DEBUG_REG16__r1_rtr__SHIFT 0x5
#define VGT_DEBUG_REG16__r1_upstream_rtr_MASK 0x40
#define VGT_DEBUG_REG16__r1_upstream_rtr__SHIFT 0x6
#define VGT_DEBUG_REG16__r2_vs_tbl_rtr_MASK 0x80
#define VGT_DEBUG_REG16__r2_vs_tbl_rtr__SHIFT 0x7
#define VGT_DEBUG_REG16__r2_prim_rtr_MASK 0x100
#define VGT_DEBUG_REG16__r2_prim_rtr__SHIFT 0x8
#define VGT_DEBUG_REG16__r2_indx_rtr_MASK 0x200
#define VGT_DEBUG_REG16__r2_indx_rtr__SHIFT 0x9
#define VGT_DEBUG_REG16__r2_rtr_MASK 0x400
#define VGT_DEBUG_REG16__r2_rtr__SHIFT 0xa
#define VGT_DEBUG_REG16__gog_tm_vs_event_rtr_MASK 0x800
#define VGT_DEBUG_REG16__gog_tm_vs_event_rtr__SHIFT 0xb
#define VGT_DEBUG_REG16__r3_force_vs_tbl_we_rtr_MASK 0x1000
#define VGT_DEBUG_REG16__r3_force_vs_tbl_we_rtr__SHIFT 0xc
#define VGT_DEBUG_REG16__indx_valid_r2_q_MASK 0x2000
#define VGT_DEBUG_REG16__indx_valid_r2_q__SHIFT 0xd
#define VGT_DEBUG_REG16__prim_valid_r2_q_MASK 0x4000
#define VGT_DEBUG_REG16__prim_valid_r2_q__SHIFT 0xe
#define VGT_DEBUG_REG16__valid_r2_q_MASK 0x8000
#define VGT_DEBUG_REG16__valid_r2_q__SHIFT 0xf
#define VGT_DEBUG_REG16__prim_valid_r1_q_MASK 0x10000
#define VGT_DEBUG_REG16__prim_valid_r1_q__SHIFT 0x10
#define VGT_DEBUG_REG16__indx_valid_r1_q_MASK 0x20000
#define VGT_DEBUG_REG16__indx_valid_r1_q__SHIFT 0x11
#define VGT_DEBUG_REG16__valid_r1_q_MASK 0x40000
#define VGT_DEBUG_REG16__valid_r1_q__SHIFT 0x12
#define VGT_DEBUG_REG16__indx_valid_r0_q_MASK 0x80000
#define VGT_DEBUG_REG16__indx_valid_r0_q__SHIFT 0x13
#define VGT_DEBUG_REG16__prim_valid_r0_q_MASK 0x100000
#define VGT_DEBUG_REG16__prim_valid_r0_q__SHIFT 0x14
#define VGT_DEBUG_REG16__valid_r0_q_MASK 0x200000
#define VGT_DEBUG_REG16__valid_r0_q__SHIFT 0x15
#define VGT_DEBUG_REG16__send_event_q_MASK 0x400000
#define VGT_DEBUG_REG16__send_event_q__SHIFT 0x16
#define VGT_DEBUG_REG16__SPARE24_MASK 0x800000
#define VGT_DEBUG_REG16__SPARE24__SHIFT 0x17
#define VGT_DEBUG_REG16__vert_seen_since_sopg_r2_q_MASK 0x1000000
#define VGT_DEBUG_REG16__vert_seen_since_sopg_r2_q__SHIFT 0x18
#define VGT_DEBUG_REG16__gog_out_prim_state_sel_MASK 0xe000000
#define VGT_DEBUG_REG16__gog_out_prim_state_sel__SHIFT 0x19
#define VGT_DEBUG_REG16__multiple_streams_en_r1_q_MASK 0x10000000
#define VGT_DEBUG_REG16__multiple_streams_en_r1_q__SHIFT 0x1c
#define VGT_DEBUG_REG16__vs_vert_count_r2_q_not_0_MASK 0x20000000
#define VGT_DEBUG_REG16__vs_vert_count_r2_q_not_0__SHIFT 0x1d
#define VGT_DEBUG_REG16__num_gs_r2_q_not_0_MASK 0x40000000
#define VGT_DEBUG_REG16__num_gs_r2_q_not_0__SHIFT 0x1e
#define VGT_DEBUG_REG16__new_vs_thread_r2_MASK 0x80000000
#define VGT_DEBUG_REG16__new_vs_thread_r2__SHIFT 0x1f
#define VGT_DEBUG_REG17__gog_out_prim_rel_indx2_5_0_MASK 0x3f
#define VGT_DEBUG_REG17__gog_out_prim_rel_indx2_5_0__SHIFT 0x0
#define VGT_DEBUG_REG17__gog_out_prim_rel_indx1_5_0_MASK 0xfc0
#define VGT_DEBUG_REG17__gog_out_prim_rel_indx1_5_0__SHIFT 0x6
#define VGT_DEBUG_REG17__gog_out_prim_rel_indx0_5_0_MASK 0x3f000
#define VGT_DEBUG_REG17__gog_out_prim_rel_indx0_5_0__SHIFT 0xc
#define VGT_DEBUG_REG17__gog_out_indx_13_0_MASK 0xfffc0000
#define VGT_DEBUG_REG17__gog_out_indx_13_0__SHIFT 0x12
#define VGT_DEBUG_REG18__grp_vr_valid_MASK 0x1
#define VGT_DEBUG_REG18__grp_vr_valid__SHIFT 0x0
#define VGT_DEBUG_REG18__pipe0_dr_MASK 0x2
#define VGT_DEBUG_REG18__pipe0_dr__SHIFT 0x1
#define VGT_DEBUG_REG18__pipe1_dr_MASK 0x4
#define VGT_DEBUG_REG18__pipe1_dr__SHIFT 0x2
#define VGT_DEBUG_REG18__vr_grp_read_MASK 0x8
#define VGT_DEBUG_REG18__vr_grp_read__SHIFT 0x3
#define VGT_DEBUG_REG18__pipe0_rtr_MASK 0x10
#define VGT_DEBUG_REG18__pipe0_rtr__SHIFT 0x4
#define VGT_DEBUG_REG18__pipe1_rtr_MASK 0x20
#define VGT_DEBUG_REG18__pipe1_rtr__SHIFT 0x5
#define VGT_DEBUG_REG18__out_vr_indx_read_MASK 0x40
#define VGT_DEBUG_REG18__out_vr_indx_read__SHIFT 0x6
#define VGT_DEBUG_REG18__out_vr_prim_read_MASK 0x80
#define VGT_DEBUG_REG18__out_vr_prim_read__SHIFT 0x7
#define VGT_DEBUG_REG18__indices_to_send_q_MASK 0x700
#define VGT_DEBUG_REG18__indices_to_send_q__SHIFT 0x8
#define VGT_DEBUG_REG18__valid_indices_MASK 0x800
#define VGT_DEBUG_REG18__valid_indices__SHIFT 0xb
#define VGT_DEBUG_REG18__last_indx_of_prim_MASK 0x1000
#define VGT_DEBUG_REG18__last_indx_of_prim__SHIFT 0xc
#define VGT_DEBUG_REG18__indx0_new_d_MASK 0x2000
#define VGT_DEBUG_REG18__indx0_new_d__SHIFT 0xd
#define VGT_DEBUG_REG18__indx1_new_d_MASK 0x4000
#define VGT_DEBUG_REG18__indx1_new_d__SHIFT 0xe
#define VGT_DEBUG_REG18__indx2_new_d_MASK 0x8000
#define VGT_DEBUG_REG18__indx2_new_d__SHIFT 0xf
#define VGT_DEBUG_REG18__indx2_hit_d_MASK 0x10000
#define VGT_DEBUG_REG18__indx2_hit_d__SHIFT 0x10
#define VGT_DEBUG_REG18__indx1_hit_d_MASK 0x20000
#define VGT_DEBUG_REG18__indx1_hit_d__SHIFT 0x11
#define VGT_DEBUG_REG18__indx0_hit_d_MASK 0x40000
#define VGT_DEBUG_REG18__indx0_hit_d__SHIFT 0x12
#define VGT_DEBUG_REG18__st_vertex_reuse_off_r0_q_MASK 0x80000
#define VGT_DEBUG_REG18__st_vertex_reuse_off_r0_q__SHIFT 0x13
#define VGT_DEBUG_REG18__last_group_of_instance_r0_q_MASK 0x100000
#define VGT_DEBUG_REG18__last_group_of_instance_r0_q__SHIFT 0x14
#define VGT_DEBUG_REG18__null_primitive_r0_q_MASK 0x200000
#define VGT_DEBUG_REG18__null_primitive_r0_q__SHIFT 0x15
#define VGT_DEBUG_REG18__eop_r0_q_MASK 0x400000
#define VGT_DEBUG_REG18__eop_r0_q__SHIFT 0x16
#define VGT_DEBUG_REG18__eject_vtx_vect_r1_d_MASK 0x800000
#define VGT_DEBUG_REG18__eject_vtx_vect_r1_d__SHIFT 0x17
#define VGT_DEBUG_REG18__sub_prim_type_r0_q_MASK 0x7000000
#define VGT_DEBUG_REG18__sub_prim_type_r0_q__SHIFT 0x18
#define VGT_DEBUG_REG18__gs_scenario_a_r0_q_MASK 0x8000000
#define VGT_DEBUG_REG18__gs_scenario_a_r0_q__SHIFT 0x1b
#define VGT_DEBUG_REG18__gs_scenario_b_r0_q_MASK 0x10000000
#define VGT_DEBUG_REG18__gs_scenario_b_r0_q__SHIFT 0x1c
#define VGT_DEBUG_REG18__components_valid_r0_q_MASK 0xe0000000
#define VGT_DEBUG_REG18__components_valid_r0_q__SHIFT 0x1d
#define VGT_DEBUG_REG19__separate_out_busy_q_MASK 0x1
#define VGT_DEBUG_REG19__separate_out_busy_q__SHIFT 0x0
#define VGT_DEBUG_REG19__separate_out_indx_busy_q_MASK 0x2
#define VGT_DEBUG_REG19__separate_out_indx_busy_q__SHIFT 0x1
#define VGT_DEBUG_REG19__prim_buffer_empty_MASK 0x4
#define VGT_DEBUG_REG19__prim_buffer_empty__SHIFT 0x2
#define VGT_DEBUG_REG19__prim_buffer_full_MASK 0x8
#define VGT_DEBUG_REG19__prim_buffer_full__SHIFT 0x3
#define VGT_DEBUG_REG19__pa_clips_fifo_busy_q_MASK 0x10
#define VGT_DEBUG_REG19__pa_clips_fifo_busy_q__SHIFT 0x4
#define VGT_DEBUG_REG19__pa_clipp_fifo_busy_q_MASK 0x20
#define VGT_DEBUG_REG19__pa_clipp_fifo_busy_q__SHIFT 0x5
#define VGT_DEBUG_REG19__VGT_PA_clips_rtr_q_MASK 0x40
#define VGT_DEBUG_REG19__VGT_PA_clips_rtr_q__SHIFT 0x6
#define VGT_DEBUG_REG19__VGT_PA_clipp_rtr_q_MASK 0x80
#define VGT_DEBUG_REG19__VGT_PA_clipp_rtr_q__SHIFT 0x7
#define VGT_DEBUG_REG19__spi_vsthread_fifo_busy_q_MASK 0x100
#define VGT_DEBUG_REG19__spi_vsthread_fifo_busy_q__SHIFT 0x8
#define VGT_DEBUG_REG19__spi_vsvert_fifo_busy_q_MASK 0x200
#define VGT_DEBUG_REG19__spi_vsvert_fifo_busy_q__SHIFT 0x9
#define VGT_DEBUG_REG19__pa_clipv_fifo_busy_q_MASK 0x400
#define VGT_DEBUG_REG19__pa_clipv_fifo_busy_q__SHIFT 0xa
#define VGT_DEBUG_REG19__hold_prim_MASK 0x800
#define VGT_DEBUG_REG19__hold_prim__SHIFT 0xb
#define VGT_DEBUG_REG19__VGT_SPI_vsthread_rtr_q_MASK 0x1000
#define VGT_DEBUG_REG19__VGT_SPI_vsthread_rtr_q__SHIFT 0xc
#define VGT_DEBUG_REG19__VGT_SPI_vsvert_rtr_q_MASK 0x2000
#define VGT_DEBUG_REG19__VGT_SPI_vsvert_rtr_q__SHIFT 0xd
#define VGT_DEBUG_REG19__VGT_PA_clipv_rtr_q_MASK 0x4000
#define VGT_DEBUG_REG19__VGT_PA_clipv_rtr_q__SHIFT 0xe
#define VGT_DEBUG_REG19__new_packet_q_MASK 0x8000
#define VGT_DEBUG_REG19__new_packet_q__SHIFT 0xf
#define VGT_DEBUG_REG19__buffered_prim_event_MASK 0x10000
#define VGT_DEBUG_REG19__buffered_prim_event__SHIFT 0x10
#define VGT_DEBUG_REG19__buffered_prim_null_primitive_MASK 0x20000
#define VGT_DEBUG_REG19__buffered_prim_null_primitive__SHIFT 0x11
#define VGT_DEBUG_REG19__buffered_prim_eop_MASK 0x40000
#define VGT_DEBUG_REG19__buffered_prim_eop__SHIFT 0x12
#define VGT_DEBUG_REG19__buffered_prim_eject_vtx_vect_MASK 0x80000
#define VGT_DEBUG_REG19__buffered_prim_eject_vtx_vect__SHIFT 0x13
#define VGT_DEBUG_REG19__buffered_prim_type_event_MASK 0x3f00000
#define VGT_DEBUG_REG19__buffered_prim_type_event__SHIFT 0x14
#define VGT_DEBUG_REG19__VGT_SE1SPI_vswave_rtr_q_MASK 0x4000000
#define VGT_DEBUG_REG19__VGT_SE1SPI_vswave_rtr_q__SHIFT 0x1a
#define VGT_DEBUG_REG19__VGT_SE1SPI_vsvert_rtr_q_MASK 0x8000000
#define VGT_DEBUG_REG19__VGT_SE1SPI_vsvert_rtr_q__SHIFT 0x1b
#define VGT_DEBUG_REG19__num_new_unique_rel_indx_MASK 0x30000000
#define VGT_DEBUG_REG19__num_new_unique_rel_indx__SHIFT 0x1c
#define VGT_DEBUG_REG19__null_terminate_vtx_vector_MASK 0x40000000
#define VGT_DEBUG_REG19__null_terminate_vtx_vector__SHIFT 0x1e
#define VGT_DEBUG_REG19__filter_event_MASK 0x80000000
#define VGT_DEBUG_REG19__filter_event__SHIFT 0x1f
#define VGT_DEBUG_REG20__dbg_VGT_SPI_vsthread_sovertexindex_MASK 0xffff
#define VGT_DEBUG_REG20__dbg_VGT_SPI_vsthread_sovertexindex__SHIFT 0x0
#define VGT_DEBUG_REG20__dbg_VGT_SPI_vsthread_sovertexcount_not_0_MASK 0x10000
#define VGT_DEBUG_REG20__dbg_VGT_SPI_vsthread_sovertexcount_not_0__SHIFT 0x10
#define VGT_DEBUG_REG20__SPARE17_MASK 0x20000
#define VGT_DEBUG_REG20__SPARE17__SHIFT 0x11
#define VGT_DEBUG_REG20__alloc_counter_q_MASK 0x3c0000
#define VGT_DEBUG_REG20__alloc_counter_q__SHIFT 0x12
#define VGT_DEBUG_REG20__curr_dealloc_distance_q_MASK 0x1fc00000
#define VGT_DEBUG_REG20__curr_dealloc_distance_q__SHIFT 0x16
#define VGT_DEBUG_REG20__new_allocate_q_MASK 0x20000000
#define VGT_DEBUG_REG20__new_allocate_q__SHIFT 0x1d
#define VGT_DEBUG_REG20__curr_slot_in_vtx_vect_q_not_0_MASK 0x40000000
#define VGT_DEBUG_REG20__curr_slot_in_vtx_vect_q_not_0__SHIFT 0x1e
#define VGT_DEBUG_REG20__int_vtx_counter_q_not_0_MASK 0x80000000
#define VGT_DEBUG_REG20__int_vtx_counter_q_not_0__SHIFT 0x1f
#define VGT_DEBUG_REG21__out_indx_fifo_empty_MASK 0x1
#define VGT_DEBUG_REG21__out_indx_fifo_empty__SHIFT 0x0
#define VGT_DEBUG_REG21__indx_side_fifo_empty_MASK 0x2
#define VGT_DEBUG_REG21__indx_side_fifo_empty__SHIFT 0x1
#define VGT_DEBUG_REG21__pipe0_dr_MASK 0x4
#define VGT_DEBUG_REG21__pipe0_dr__SHIFT 0x2
#define VGT_DEBUG_REG21__pipe1_dr_MASK 0x8
#define VGT_DEBUG_REG21__pipe1_dr__SHIFT 0x3
#define VGT_DEBUG_REG21__pipe2_dr_MASK 0x10
#define VGT_DEBUG_REG21__pipe2_dr__SHIFT 0x4
#define VGT_DEBUG_REG21__vsthread_buff_empty_MASK 0x20
#define VGT_DEBUG_REG21__vsthread_buff_empty__SHIFT 0x5
#define VGT_DEBUG_REG21__out_indx_fifo_full_MASK 0x40
#define VGT_DEBUG_REG21__out_indx_fifo_full__SHIFT 0x6
#define VGT_DEBUG_REG21__indx_side_fifo_full_MASK 0x80
#define VGT_DEBUG_REG21__indx_side_fifo_full__SHIFT 0x7
#define VGT_DEBUG_REG21__pipe0_rtr_MASK 0x100
#define VGT_DEBUG_REG21__pipe0_rtr__SHIFT 0x8
#define VGT_DEBUG_REG21__pipe1_rtr_MASK 0x200
#define VGT_DEBUG_REG21__pipe1_rtr__SHIFT 0x9
#define VGT_DEBUG_REG21__pipe2_rtr_MASK 0x400
#define VGT_DEBUG_REG21__pipe2_rtr__SHIFT 0xa
#define VGT_DEBUG_REG21__vsthread_buff_full_MASK 0x800
#define VGT_DEBUG_REG21__vsthread_buff_full__SHIFT 0xb
#define VGT_DEBUG_REG21__interfaces_rtr_MASK 0x1000
#define VGT_DEBUG_REG21__interfaces_rtr__SHIFT 0xc
#define VGT_DEBUG_REG21__indx_count_q_not_0_MASK 0x2000
#define VGT_DEBUG_REG21__indx_count_q_not_0__SHIFT 0xd
#define VGT_DEBUG_REG21__wait_for_external_eopg_q_MASK 0x4000
#define VGT_DEBUG_REG21__wait_for_external_eopg_q__SHIFT 0xe
#define VGT_DEBUG_REG21__full_state_p1_q_MASK 0x8000
#define VGT_DEBUG_REG21__full_state_p1_q__SHIFT 0xf
#define VGT_DEBUG_REG21__indx_side_indx_valid_MASK 0x10000
#define VGT_DEBUG_REG21__indx_side_indx_valid__SHIFT 0x10
#define VGT_DEBUG_REG21__stateid_p0_q_MASK 0xe0000
#define VGT_DEBUG_REG21__stateid_p0_q__SHIFT 0x11
#define VGT_DEBUG_REG21__is_event_p0_q_MASK 0x100000
#define VGT_DEBUG_REG21__is_event_p0_q__SHIFT 0x14
#define VGT_DEBUG_REG21__lshs_dealloc_p1_MASK 0x200000
#define VGT_DEBUG_REG21__lshs_dealloc_p1__SHIFT 0x15
#define VGT_DEBUG_REG21__stream_id_r2_q_MASK 0x400000
#define VGT_DEBUG_REG21__stream_id_r2_q__SHIFT 0x16
#define VGT_DEBUG_REG21__vtx_vect_counter_q_not_0_MASK 0x800000
#define VGT_DEBUG_REG21__vtx_vect_counter_q_not_0__SHIFT 0x17
#define VGT_DEBUG_REG21__buff_full_p1_MASK 0x1000000
#define VGT_DEBUG_REG21__buff_full_p1__SHIFT 0x18
#define VGT_DEBUG_REG21__strmout_valid_p1_MASK 0x2000000
#define VGT_DEBUG_REG21__strmout_valid_p1__SHIFT 0x19
#define VGT_DEBUG_REG21__eotg_r2_q_MASK 0x4000000
#define VGT_DEBUG_REG21__eotg_r2_q__SHIFT 0x1a
#define VGT_DEBUG_REG21__null_r2_q_MASK 0x8000000
#define VGT_DEBUG_REG21__null_r2_q__SHIFT 0x1b
#define VGT_DEBUG_REG21__p0_dr_MASK 0x10000000
#define VGT_DEBUG_REG21__p0_dr__SHIFT 0x1c
#define VGT_DEBUG_REG21__p0_rtr_MASK 0x20000000
#define VGT_DEBUG_REG21__p0_rtr__SHIFT 0x1d
#define VGT_DEBUG_REG21__eopg_p0_q_MASK 0x40000000
#define VGT_DEBUG_REG21__eopg_p0_q__SHIFT 0x1e
#define VGT_DEBUG_REG21__p0_nobp_MASK 0x80000000
#define VGT_DEBUG_REG21__p0_nobp__SHIFT 0x1f
#define VGT_DEBUG_REG22__cm_state16_MASK 0x3
#define VGT_DEBUG_REG22__cm_state16__SHIFT 0x0
#define VGT_DEBUG_REG22__cm_state17_MASK 0xc
#define VGT_DEBUG_REG22__cm_state17__SHIFT 0x2
#define VGT_DEBUG_REG22__cm_state18_MASK 0x30
#define VGT_DEBUG_REG22__cm_state18__SHIFT 0x4
#define VGT_DEBUG_REG22__cm_state19_MASK 0xc0
#define VGT_DEBUG_REG22__cm_state19__SHIFT 0x6
#define VGT_DEBUG_REG22__cm_state20_MASK 0x300
#define VGT_DEBUG_REG22__cm_state20__SHIFT 0x8
#define VGT_DEBUG_REG22__cm_state21_MASK 0xc00
#define VGT_DEBUG_REG22__cm_state21__SHIFT 0xa
#define VGT_DEBUG_REG22__cm_state22_MASK 0x3000
#define VGT_DEBUG_REG22__cm_state22__SHIFT 0xc
#define VGT_DEBUG_REG22__cm_state23_MASK 0xc000
#define VGT_DEBUG_REG22__cm_state23__SHIFT 0xe
#define VGT_DEBUG_REG22__cm_state24_MASK 0x30000
#define VGT_DEBUG_REG22__cm_state24__SHIFT 0x10
#define VGT_DEBUG_REG22__cm_state25_MASK 0xc0000
#define VGT_DEBUG_REG22__cm_state25__SHIFT 0x12
#define VGT_DEBUG_REG22__cm_state26_MASK 0x300000
#define VGT_DEBUG_REG22__cm_state26__SHIFT 0x14
#define VGT_DEBUG_REG22__cm_state27_MASK 0xc00000
#define VGT_DEBUG_REG22__cm_state27__SHIFT 0x16
#define VGT_DEBUG_REG22__cm_state28_MASK 0x3000000
#define VGT_DEBUG_REG22__cm_state28__SHIFT 0x18
#define VGT_DEBUG_REG22__cm_state29_MASK 0xc000000
#define VGT_DEBUG_REG22__cm_state29__SHIFT 0x1a
#define VGT_DEBUG_REG22__cm_state30_MASK 0x30000000
#define VGT_DEBUG_REG22__cm_state30__SHIFT 0x1c
#define VGT_DEBUG_REG22__cm_state31_MASK 0xc0000000
#define VGT_DEBUG_REG22__cm_state31__SHIFT 0x1e
#define VGT_DEBUG_REG23__frmt_busy_MASK 0x1
#define VGT_DEBUG_REG23__frmt_busy__SHIFT 0x0
#define VGT_DEBUG_REG23__rcm_frmt_vert_rtr_MASK 0x2
#define VGT_DEBUG_REG23__rcm_frmt_vert_rtr__SHIFT 0x1
#define VGT_DEBUG_REG23__rcm_frmt_prim_rtr_MASK 0x4
#define VGT_DEBUG_REG23__rcm_frmt_prim_rtr__SHIFT 0x2
#define VGT_DEBUG_REG23__prim_r3_rtr_MASK 0x8
#define VGT_DEBUG_REG23__prim_r3_rtr__SHIFT 0x3
#define VGT_DEBUG_REG23__prim_r2_rtr_MASK 0x10
#define VGT_DEBUG_REG23__prim_r2_rtr__SHIFT 0x4
#define VGT_DEBUG_REG23__vert_r3_rtr_MASK 0x20
#define VGT_DEBUG_REG23__vert_r3_rtr__SHIFT 0x5
#define VGT_DEBUG_REG23__vert_r2_rtr_MASK 0x40
#define VGT_DEBUG_REG23__vert_r2_rtr__SHIFT 0x6
#define VGT_DEBUG_REG23__vert_r1_rtr_MASK 0x80
#define VGT_DEBUG_REG23__vert_r1_rtr__SHIFT 0x7
#define VGT_DEBUG_REG23__vert_r0_rtr_MASK 0x100
#define VGT_DEBUG_REG23__vert_r0_rtr__SHIFT 0x8
#define VGT_DEBUG_REG23__prim_fifo_empty_MASK 0x200
#define VGT_DEBUG_REG23__prim_fifo_empty__SHIFT 0x9
#define VGT_DEBUG_REG23__prim_fifo_full_MASK 0x400
#define VGT_DEBUG_REG23__prim_fifo_full__SHIFT 0xa
#define VGT_DEBUG_REG23__vert_dr_r2_q_MASK 0x800
#define VGT_DEBUG_REG23__vert_dr_r2_q__SHIFT 0xb
#define VGT_DEBUG_REG23__prim_dr_r2_q_MASK 0x1000
#define VGT_DEBUG_REG23__prim_dr_r2_q__SHIFT 0xc
#define VGT_DEBUG_REG23__vert_dr_r1_q_MASK 0x2000
#define VGT_DEBUG_REG23__vert_dr_r1_q__SHIFT 0xd
#define VGT_DEBUG_REG23__vert_dr_r0_q_MASK 0x4000
#define VGT_DEBUG_REG23__vert_dr_r0_q__SHIFT 0xe
#define VGT_DEBUG_REG23__new_verts_r2_q_MASK 0x18000
#define VGT_DEBUG_REG23__new_verts_r2_q__SHIFT 0xf
#define VGT_DEBUG_REG23__verts_sent_r2_q_MASK 0x1e0000
#define VGT_DEBUG_REG23__verts_sent_r2_q__SHIFT 0x11
#define VGT_DEBUG_REG23__prim_state_sel_r2_q_MASK 0xe00000
#define VGT_DEBUG_REG23__prim_state_sel_r2_q__SHIFT 0x15
#define VGT_DEBUG_REG23__SPARE_MASK 0xff000000
#define VGT_DEBUG_REG23__SPARE__SHIFT 0x18
#define VGT_DEBUG_REG24__avail_es_rb_space_r0_q_23_0_MASK 0xffffff
#define VGT_DEBUG_REG24__avail_es_rb_space_r0_q_23_0__SHIFT 0x0
#define VGT_DEBUG_REG24__dependent_st_cut_mode_q_MASK 0x3000000
#define VGT_DEBUG_REG24__dependent_st_cut_mode_q__SHIFT 0x18
#define VGT_DEBUG_REG24__SPARE31_MASK 0xfc000000
#define VGT_DEBUG_REG24__SPARE31__SHIFT 0x1a
#define VGT_DEBUG_REG25__avail_gs_rb_space_r0_q_25_0_MASK 0x3ffffff
#define VGT_DEBUG_REG25__avail_gs_rb_space_r0_q_25_0__SHIFT 0x0
#define VGT_DEBUG_REG25__active_sm_r0_q_MASK 0x3c000000
#define VGT_DEBUG_REG25__active_sm_r0_q__SHIFT 0x1a
#define VGT_DEBUG_REG25__add_gs_rb_space_r1_q_MASK 0x40000000
#define VGT_DEBUG_REG25__add_gs_rb_space_r1_q__SHIFT 0x1e
#define VGT_DEBUG_REG25__add_gs_rb_space_r0_q_MASK 0x80000000
#define VGT_DEBUG_REG25__add_gs_rb_space_r0_q__SHIFT 0x1f
#define VGT_DEBUG_REG26__cm_state0_MASK 0x3
#define VGT_DEBUG_REG26__cm_state0__SHIFT 0x0
#define VGT_DEBUG_REG26__cm_state1_MASK 0xc
#define VGT_DEBUG_REG26__cm_state1__SHIFT 0x2
#define VGT_DEBUG_REG26__cm_state2_MASK 0x30
#define VGT_DEBUG_REG26__cm_state2__SHIFT 0x4
#define VGT_DEBUG_REG26__cm_state3_MASK 0xc0
#define VGT_DEBUG_REG26__cm_state3__SHIFT 0x6
#define VGT_DEBUG_REG26__cm_state4_MASK 0x300
#define VGT_DEBUG_REG26__cm_state4__SHIFT 0x8
#define VGT_DEBUG_REG26__cm_state5_MASK 0xc00
#define VGT_DEBUG_REG26__cm_state5__SHIFT 0xa
#define VGT_DEBUG_REG26__cm_state6_MASK 0x3000
#define VGT_DEBUG_REG26__cm_state6__SHIFT 0xc
#define VGT_DEBUG_REG26__cm_state7_MASK 0xc000
#define VGT_DEBUG_REG26__cm_state7__SHIFT 0xe
#define VGT_DEBUG_REG26__cm_state8_MASK 0x30000
#define VGT_DEBUG_REG26__cm_state8__SHIFT 0x10
#define VGT_DEBUG_REG26__cm_state9_MASK 0xc0000
#define VGT_DEBUG_REG26__cm_state9__SHIFT 0x12
#define VGT_DEBUG_REG26__cm_state10_MASK 0x300000
#define VGT_DEBUG_REG26__cm_state10__SHIFT 0x14
#define VGT_DEBUG_REG26__cm_state11_MASK 0xc00000
#define VGT_DEBUG_REG26__cm_state11__SHIFT 0x16
#define VGT_DEBUG_REG26__cm_state12_MASK 0x3000000
#define VGT_DEBUG_REG26__cm_state12__SHIFT 0x18
#define VGT_DEBUG_REG26__cm_state13_MASK 0xc000000
#define VGT_DEBUG_REG26__cm_state13__SHIFT 0x1a
#define VGT_DEBUG_REG26__cm_state14_MASK 0x30000000
#define VGT_DEBUG_REG26__cm_state14__SHIFT 0x1c
#define VGT_DEBUG_REG26__cm_state15_MASK 0xc0000000
#define VGT_DEBUG_REG26__cm_state15__SHIFT 0x1e
#define VGT_DEBUG_REG27__pipe0_dr_MASK 0x1
#define VGT_DEBUG_REG27__pipe0_dr__SHIFT 0x0
#define VGT_DEBUG_REG27__gsc0_dr_MASK 0x2
#define VGT_DEBUG_REG27__gsc0_dr__SHIFT 0x1
#define VGT_DEBUG_REG27__pipe1_dr_MASK 0x4
#define VGT_DEBUG_REG27__pipe1_dr__SHIFT 0x2
#define VGT_DEBUG_REG27__tm_pt_event_rtr_MASK 0x8
#define VGT_DEBUG_REG27__tm_pt_event_rtr__SHIFT 0x3
#define VGT_DEBUG_REG27__pipe0_rtr_MASK 0x10
#define VGT_DEBUG_REG27__pipe0_rtr__SHIFT 0x4
#define VGT_DEBUG_REG27__gsc0_rtr_MASK 0x20
#define VGT_DEBUG_REG27__gsc0_rtr__SHIFT 0x5
#define VGT_DEBUG_REG27__pipe1_rtr_MASK 0x40
#define VGT_DEBUG_REG27__pipe1_rtr__SHIFT 0x6
#define VGT_DEBUG_REG27__last_indx_of_prim_p1_q_MASK 0x80
#define VGT_DEBUG_REG27__last_indx_of_prim_p1_q__SHIFT 0x7
#define VGT_DEBUG_REG27__indices_to_send_p0_q_MASK 0x300
#define VGT_DEBUG_REG27__indices_to_send_p0_q__SHIFT 0x8
#define VGT_DEBUG_REG27__event_flag_p1_q_MASK 0x400
#define VGT_DEBUG_REG27__event_flag_p1_q__SHIFT 0xa
#define VGT_DEBUG_REG27__eop_p1_q_MASK 0x800
#define VGT_DEBUG_REG27__eop_p1_q__SHIFT 0xb
#define VGT_DEBUG_REG27__gs_out_prim_type_p0_q_MASK 0x3000
#define VGT_DEBUG_REG27__gs_out_prim_type_p0_q__SHIFT 0xc
#define VGT_DEBUG_REG27__gsc_null_primitive_p0_q_MASK 0x4000
#define VGT_DEBUG_REG27__gsc_null_primitive_p0_q__SHIFT 0xe
#define VGT_DEBUG_REG27__gsc_eop_p0_q_MASK 0x8000
#define VGT_DEBUG_REG27__gsc_eop_p0_q__SHIFT 0xf
#define VGT_DEBUG_REG27__gsc_2cycle_output_MASK 0x10000
#define VGT_DEBUG_REG27__gsc_2cycle_output__SHIFT 0x10
#define VGT_DEBUG_REG27__gsc_2nd_cycle_p0_q_MASK 0x20000
#define VGT_DEBUG_REG27__gsc_2nd_cycle_p0_q__SHIFT 0x11
#define VGT_DEBUG_REG27__last_indx_of_vsprim_MASK 0x40000
#define VGT_DEBUG_REG27__last_indx_of_vsprim__SHIFT 0x12
#define VGT_DEBUG_REG27__first_vsprim_of_gsprim_p0_q_MASK 0x80000
#define VGT_DEBUG_REG27__first_vsprim_of_gsprim_p0_q__SHIFT 0x13
#define VGT_DEBUG_REG27__gsc_indx_count_p0_q_MASK 0x7ff00000
#define VGT_DEBUG_REG27__gsc_indx_count_p0_q__SHIFT 0x14
#define VGT_DEBUG_REG27__last_vsprim_of_gsprim_MASK 0x80000000
#define VGT_DEBUG_REG27__last_vsprim_of_gsprim__SHIFT 0x1f
#define VGT_DEBUG_REG28__con_state_q_MASK 0xf
#define VGT_DEBUG_REG28__con_state_q__SHIFT 0x0
#define VGT_DEBUG_REG28__second_cycle_q_MASK 0x10
#define VGT_DEBUG_REG28__second_cycle_q__SHIFT 0x4
#define VGT_DEBUG_REG28__process_tri_middle_p0_q_MASK 0x20
#define VGT_DEBUG_REG28__process_tri_middle_p0_q__SHIFT 0x5
#define VGT_DEBUG_REG28__process_tri_1st_2nd_half_p0_q_MASK 0x40
#define VGT_DEBUG_REG28__process_tri_1st_2nd_half_p0_q__SHIFT 0x6
#define VGT_DEBUG_REG28__process_tri_center_poly_p0_q_MASK 0x80
#define VGT_DEBUG_REG28__process_tri_center_poly_p0_q__SHIFT 0x7
#define VGT_DEBUG_REG28__pipe0_patch_dr_MASK 0x100
#define VGT_DEBUG_REG28__pipe0_patch_dr__SHIFT 0x8
#define VGT_DEBUG_REG28__pipe0_edge_dr_MASK 0x200
#define VGT_DEBUG_REG28__pipe0_edge_dr__SHIFT 0x9
#define VGT_DEBUG_REG28__pipe1_dr_MASK 0x400
#define VGT_DEBUG_REG28__pipe1_dr__SHIFT 0xa
#define VGT_DEBUG_REG28__pipe0_patch_rtr_MASK 0x800
#define VGT_DEBUG_REG28__pipe0_patch_rtr__SHIFT 0xb
#define VGT_DEBUG_REG28__pipe0_edge_rtr_MASK 0x1000
#define VGT_DEBUG_REG28__pipe0_edge_rtr__SHIFT 0xc
#define VGT_DEBUG_REG28__pipe1_rtr_MASK 0x2000
#define VGT_DEBUG_REG28__pipe1_rtr__SHIFT 0xd
#define VGT_DEBUG_REG28__outer_parity_p0_q_MASK 0x4000
#define VGT_DEBUG_REG28__outer_parity_p0_q__SHIFT 0xe
#define VGT_DEBUG_REG28__parallel_parity_p0_q_MASK 0x8000
#define VGT_DEBUG_REG28__parallel_parity_p0_q__SHIFT 0xf
#define VGT_DEBUG_REG28__first_ring_of_patch_p0_q_MASK 0x10000
#define VGT_DEBUG_REG28__first_ring_of_patch_p0_q__SHIFT 0x10
#define VGT_DEBUG_REG28__last_ring_of_patch_p0_q_MASK 0x20000
#define VGT_DEBUG_REG28__last_ring_of_patch_p0_q__SHIFT 0x11
#define VGT_DEBUG_REG28__last_edge_of_outer_ring_p0_q_MASK 0x40000
#define VGT_DEBUG_REG28__last_edge_of_outer_ring_p0_q__SHIFT 0x12
#define VGT_DEBUG_REG28__last_point_of_outer_ring_p1_MASK 0x80000
#define VGT_DEBUG_REG28__last_point_of_outer_ring_p1__SHIFT 0x13
#define VGT_DEBUG_REG28__last_point_of_inner_ring_p1_MASK 0x100000
#define VGT_DEBUG_REG28__last_point_of_inner_ring_p1__SHIFT 0x14
#define VGT_DEBUG_REG28__outer_edge_tf_eq_one_p0_q_MASK 0x200000
#define VGT_DEBUG_REG28__outer_edge_tf_eq_one_p0_q__SHIFT 0x15
#define VGT_DEBUG_REG28__advance_outer_point_p1_MASK 0x400000
#define VGT_DEBUG_REG28__advance_outer_point_p1__SHIFT 0x16
#define VGT_DEBUG_REG28__advance_inner_point_p1_MASK 0x800000
#define VGT_DEBUG_REG28__advance_inner_point_p1__SHIFT 0x17
#define VGT_DEBUG_REG28__next_ring_is_rect_p0_q_MASK 0x1000000
#define VGT_DEBUG_REG28__next_ring_is_rect_p0_q__SHIFT 0x18
#define VGT_DEBUG_REG28__pipe1_outer1_rtr_MASK 0x2000000
#define VGT_DEBUG_REG28__pipe1_outer1_rtr__SHIFT 0x19
#define VGT_DEBUG_REG28__pipe1_outer2_rtr_MASK 0x4000000
#define VGT_DEBUG_REG28__pipe1_outer2_rtr__SHIFT 0x1a
#define VGT_DEBUG_REG28__pipe1_inner1_rtr_MASK 0x8000000
#define VGT_DEBUG_REG28__pipe1_inner1_rtr__SHIFT 0x1b
#define VGT_DEBUG_REG28__pipe1_inner2_rtr_MASK 0x10000000
#define VGT_DEBUG_REG28__pipe1_inner2_rtr__SHIFT 0x1c
#define VGT_DEBUG_REG28__pipe1_patch_rtr_MASK 0x20000000
#define VGT_DEBUG_REG28__pipe1_patch_rtr__SHIFT 0x1d
#define VGT_DEBUG_REG28__pipe1_edge_rtr_MASK 0x40000000
#define VGT_DEBUG_REG28__pipe1_edge_rtr__SHIFT 0x1e
#define VGT_DEBUG_REG28__use_stored_inner_q_ring2_MASK 0x80000000
#define VGT_DEBUG_REG28__use_stored_inner_q_ring2__SHIFT 0x1f
#define VGT_DEBUG_REG29__con_state_q_MASK 0xf
#define VGT_DEBUG_REG29__con_state_q__SHIFT 0x0
#define VGT_DEBUG_REG29__second_cycle_q_MASK 0x10
#define VGT_DEBUG_REG29__second_cycle_q__SHIFT 0x4
#define VGT_DEBUG_REG29__process_tri_middle_p0_q_MASK 0x20
#define VGT_DEBUG_REG29__process_tri_middle_p0_q__SHIFT 0x5
#define VGT_DEBUG_REG29__process_tri_1st_2nd_half_p0_q_MASK 0x40
#define VGT_DEBUG_REG29__process_tri_1st_2nd_half_p0_q__SHIFT 0x6
#define VGT_DEBUG_REG29__process_tri_center_poly_p0_q_MASK 0x80
#define VGT_DEBUG_REG29__process_tri_center_poly_p0_q__SHIFT 0x7
#define VGT_DEBUG_REG29__pipe0_patch_dr_MASK 0x100
#define VGT_DEBUG_REG29__pipe0_patch_dr__SHIFT 0x8
#define VGT_DEBUG_REG29__pipe0_edge_dr_MASK 0x200
#define VGT_DEBUG_REG29__pipe0_edge_dr__SHIFT 0x9
#define VGT_DEBUG_REG29__pipe1_dr_MASK 0x400
#define VGT_DEBUG_REG29__pipe1_dr__SHIFT 0xa
#define VGT_DEBUG_REG29__pipe0_patch_rtr_MASK 0x800
#define VGT_DEBUG_REG29__pipe0_patch_rtr__SHIFT 0xb
#define VGT_DEBUG_REG29__pipe0_edge_rtr_MASK 0x1000
#define VGT_DEBUG_REG29__pipe0_edge_rtr__SHIFT 0xc
#define VGT_DEBUG_REG29__pipe1_rtr_MASK 0x2000
#define VGT_DEBUG_REG29__pipe1_rtr__SHIFT 0xd
#define VGT_DEBUG_REG29__outer_parity_p0_q_MASK 0x4000
#define VGT_DEBUG_REG29__outer_parity_p0_q__SHIFT 0xe
#define VGT_DEBUG_REG29__parallel_parity_p0_q_MASK 0x8000
#define VGT_DEBUG_REG29__parallel_parity_p0_q__SHIFT 0xf
#define VGT_DEBUG_REG29__first_ring_of_patch_p0_q_MASK 0x10000
#define VGT_DEBUG_REG29__first_ring_of_patch_p0_q__SHIFT 0x10
#define VGT_DEBUG_REG29__last_ring_of_patch_p0_q_MASK 0x20000
#define VGT_DEBUG_REG29__last_ring_of_patch_p0_q__SHIFT 0x11
#define VGT_DEBUG_REG29__last_edge_of_outer_ring_p0_q_MASK 0x40000
#define VGT_DEBUG_REG29__last_edge_of_outer_ring_p0_q__SHIFT 0x12
#define VGT_DEBUG_REG29__last_point_of_outer_ring_p1_MASK 0x80000
#define VGT_DEBUG_REG29__last_point_of_outer_ring_p1__SHIFT 0x13
#define VGT_DEBUG_REG29__last_point_of_inner_ring_p1_MASK 0x100000
#define VGT_DEBUG_REG29__last_point_of_inner_ring_p1__SHIFT 0x14
#define VGT_DEBUG_REG29__outer_edge_tf_eq_one_p0_q_MASK 0x200000
#define VGT_DEBUG_REG29__outer_edge_tf_eq_one_p0_q__SHIFT 0x15
#define VGT_DEBUG_REG29__advance_outer_point_p1_MASK 0x400000
#define VGT_DEBUG_REG29__advance_outer_point_p1__SHIFT 0x16
#define VGT_DEBUG_REG29__advance_inner_point_p1_MASK 0x800000
#define VGT_DEBUG_REG29__advance_inner_point_p1__SHIFT 0x17
#define VGT_DEBUG_REG29__next_ring_is_rect_p0_q_MASK 0x1000000
#define VGT_DEBUG_REG29__next_ring_is_rect_p0_q__SHIFT 0x18
#define VGT_DEBUG_REG29__pipe1_outer1_rtr_MASK 0x2000000
#define VGT_DEBUG_REG29__pipe1_outer1_rtr__SHIFT 0x19
#define VGT_DEBUG_REG29__pipe1_outer2_rtr_MASK 0x4000000
#define VGT_DEBUG_REG29__pipe1_outer2_rtr__SHIFT 0x1a
#define VGT_DEBUG_REG29__pipe1_inner1_rtr_MASK 0x8000000
#define VGT_DEBUG_REG29__pipe1_inner1_rtr__SHIFT 0x1b
#define VGT_DEBUG_REG29__pipe1_inner2_rtr_MASK 0x10000000
#define VGT_DEBUG_REG29__pipe1_inner2_rtr__SHIFT 0x1c
#define VGT_DEBUG_REG29__pipe1_patch_rtr_MASK 0x20000000
#define VGT_DEBUG_REG29__pipe1_patch_rtr__SHIFT 0x1d
#define VGT_DEBUG_REG29__pipe1_edge_rtr_MASK 0x40000000
#define VGT_DEBUG_REG29__pipe1_edge_rtr__SHIFT 0x1e
#define VGT_DEBUG_REG29__use_stored_inner_q_ring3_MASK 0x80000000
#define VGT_DEBUG_REG29__use_stored_inner_q_ring3__SHIFT 0x1f
#define VGT_DEBUG_REG30__pipe0_dr_MASK 0x1
#define VGT_DEBUG_REG30__pipe0_dr__SHIFT 0x0
#define VGT_DEBUG_REG30__pipe0_tf_dr_MASK 0x2
#define VGT_DEBUG_REG30__pipe0_tf_dr__SHIFT 0x1
#define VGT_DEBUG_REG30__pipe2_dr_MASK 0x4
#define VGT_DEBUG_REG30__pipe2_dr__SHIFT 0x2
#define VGT_DEBUG_REG30__event_or_null_p0_q_MASK 0x8
#define VGT_DEBUG_REG30__event_or_null_p0_q__SHIFT 0x3
#define VGT_DEBUG_REG30__pipe0_rtr_MASK 0x10
#define VGT_DEBUG_REG30__pipe0_rtr__SHIFT 0x4
#define VGT_DEBUG_REG30__pipe1_rtr_MASK 0x20
#define VGT_DEBUG_REG30__pipe1_rtr__SHIFT 0x5
#define VGT_DEBUG_REG30__pipe1_tf_rtr_MASK 0x40
#define VGT_DEBUG_REG30__pipe1_tf_rtr__SHIFT 0x6
#define VGT_DEBUG_REG30__pipe2_rtr_MASK 0x80
#define VGT_DEBUG_REG30__pipe2_rtr__SHIFT 0x7
#define VGT_DEBUG_REG30__ttp_patch_fifo_full_MASK 0x100
#define VGT_DEBUG_REG30__ttp_patch_fifo_full__SHIFT 0x8
#define VGT_DEBUG_REG30__ttp_patch_fifo_empty_MASK 0x200
#define VGT_DEBUG_REG30__ttp_patch_fifo_empty__SHIFT 0x9
#define VGT_DEBUG_REG30__ttp_tf0_fifo_empty_MASK 0x400
#define VGT_DEBUG_REG30__ttp_tf0_fifo_empty__SHIFT 0xa
#define VGT_DEBUG_REG30__ttp_tf1_fifo_empty_MASK 0x800
#define VGT_DEBUG_REG30__ttp_tf1_fifo_empty__SHIFT 0xb
#define VGT_DEBUG_REG30__ttp_tf2_fifo_empty_MASK 0x1000
#define VGT_DEBUG_REG30__ttp_tf2_fifo_empty__SHIFT 0xc
#define VGT_DEBUG_REG30__ttp_tf3_fifo_empty_MASK 0x2000
#define VGT_DEBUG_REG30__ttp_tf3_fifo_empty__SHIFT 0xd
#define VGT_DEBUG_REG30__ttp_tf4_fifo_empty_MASK 0x4000
#define VGT_DEBUG_REG30__ttp_tf4_fifo_empty__SHIFT 0xe
#define VGT_DEBUG_REG30__ttp_tf5_fifo_empty_MASK 0x8000
#define VGT_DEBUG_REG30__ttp_tf5_fifo_empty__SHIFT 0xf
#define VGT_DEBUG_REG30__tf_fetch_state_q_MASK 0x70000
#define VGT_DEBUG_REG30__tf_fetch_state_q__SHIFT 0x10
#define VGT_DEBUG_REG30__last_tf_of_tg_MASK 0x80000
#define VGT_DEBUG_REG30__last_tf_of_tg__SHIFT 0x13
#define VGT_DEBUG_REG30__tf_pointer_p0_q_MASK 0xf00000
#define VGT_DEBUG_REG30__tf_pointer_p0_q__SHIFT 0x14
#define VGT_DEBUG_REG30__dynamic_hs_p0_q_MASK 0x1000000
#define VGT_DEBUG_REG30__dynamic_hs_p0_q__SHIFT 0x18
#define VGT_DEBUG_REG30__first_fetch_of_tg_p0_q_MASK 0x2000000
#define VGT_DEBUG_REG30__first_fetch_of_tg_p0_q__SHIFT 0x19
#define VGT_DEBUG_REG30__first_data_ret_of_req_p0_q_MASK 0x4000000
#define VGT_DEBUG_REG30__first_data_ret_of_req_p0_q__SHIFT 0x1a
#define VGT_DEBUG_REG30__first_data_chunk_invalid_p0_q_MASK 0x8000000
#define VGT_DEBUG_REG30__first_data_chunk_invalid_p0_q__SHIFT 0x1b
#define VGT_DEBUG_REG30__tf_xfer_count_p2_q_MASK 0x30000000
#define VGT_DEBUG_REG30__tf_xfer_count_p2_q__SHIFT 0x1c
#define VGT_DEBUG_REG30__pipe4_dr_MASK 0x40000000
#define VGT_DEBUG_REG30__pipe4_dr__SHIFT 0x1e
#define VGT_DEBUG_REG30__pipe4_rtr_MASK 0x80000000
#define VGT_DEBUG_REG30__pipe4_rtr__SHIFT 0x1f
#define VGT_DEBUG_REG31__pipe0_dr_MASK 0x1
#define VGT_DEBUG_REG31__pipe0_dr__SHIFT 0x0
#define VGT_DEBUG_REG31__pipe0_rtr_MASK 0x2
#define VGT_DEBUG_REG31__pipe0_rtr__SHIFT 0x1
#define VGT_DEBUG_REG31__pipe1_outer_dr_MASK 0x4
#define VGT_DEBUG_REG31__pipe1_outer_dr__SHIFT 0x2
#define VGT_DEBUG_REG31__pipe1_inner_dr_MASK 0x8
#define VGT_DEBUG_REG31__pipe1_inner_dr__SHIFT 0x3
#define VGT_DEBUG_REG31__pipe2_outer_dr_MASK 0x10
#define VGT_DEBUG_REG31__pipe2_outer_dr__SHIFT 0x4
#define VGT_DEBUG_REG31__pipe2_inner_dr_MASK 0x20
#define VGT_DEBUG_REG31__pipe2_inner_dr__SHIFT 0x5
#define VGT_DEBUG_REG31__pipe3_outer_dr_MASK 0x40
#define VGT_DEBUG_REG31__pipe3_outer_dr__SHIFT 0x6
#define VGT_DEBUG_REG31__pipe3_inner_dr_MASK 0x80
#define VGT_DEBUG_REG31__pipe3_inner_dr__SHIFT 0x7
#define VGT_DEBUG_REG31__pipe4_outer_dr_MASK 0x100
#define VGT_DEBUG_REG31__pipe4_outer_dr__SHIFT 0x8
#define VGT_DEBUG_REG31__pipe4_inner_dr_MASK 0x200
#define VGT_DEBUG_REG31__pipe4_inner_dr__SHIFT 0x9
#define VGT_DEBUG_REG31__pipe5_outer_dr_MASK 0x400
#define VGT_DEBUG_REG31__pipe5_outer_dr__SHIFT 0xa
#define VGT_DEBUG_REG31__pipe5_inner_dr_MASK 0x800
#define VGT_DEBUG_REG31__pipe5_inner_dr__SHIFT 0xb
#define VGT_DEBUG_REG31__pipe2_outer_rtr_MASK 0x1000
#define VGT_DEBUG_REG31__pipe2_outer_rtr__SHIFT 0xc
#define VGT_DEBUG_REG31__pipe2_inner_rtr_MASK 0x2000
#define VGT_DEBUG_REG31__pipe2_inner_rtr__SHIFT 0xd
#define VGT_DEBUG_REG31__pipe3_outer_rtr_MASK 0x4000
#define VGT_DEBUG_REG31__pipe3_outer_rtr__SHIFT 0xe
#define VGT_DEBUG_REG31__pipe3_inner_rtr_MASK 0x8000
#define VGT_DEBUG_REG31__pipe3_inner_rtr__SHIFT 0xf
#define VGT_DEBUG_REG31__pipe4_outer_rtr_MASK 0x10000
#define VGT_DEBUG_REG31__pipe4_outer_rtr__SHIFT 0x10
#define VGT_DEBUG_REG31__pipe4_inner_rtr_MASK 0x20000
#define VGT_DEBUG_REG31__pipe4_inner_rtr__SHIFT 0x11
#define VGT_DEBUG_REG31__pipe5_outer_rtr_MASK 0x40000
#define VGT_DEBUG_REG31__pipe5_outer_rtr__SHIFT 0x12
#define VGT_DEBUG_REG31__pipe5_inner_rtr_MASK 0x80000
#define VGT_DEBUG_REG31__pipe5_inner_rtr__SHIFT 0x13
#define VGT_DEBUG_REG31__pg_con_outer_point1_rts_MASK 0x100000
#define VGT_DEBUG_REG31__pg_con_outer_point1_rts__SHIFT 0x14
#define VGT_DEBUG_REG31__pg_con_outer_point2_rts_MASK 0x200000
#define VGT_DEBUG_REG31__pg_con_outer_point2_rts__SHIFT 0x15
#define VGT_DEBUG_REG31__pg_con_inner_point1_rts_MASK 0x400000
#define VGT_DEBUG_REG31__pg_con_inner_point1_rts__SHIFT 0x16
#define VGT_DEBUG_REG31__pg_con_inner_point2_rts_MASK 0x800000
#define VGT_DEBUG_REG31__pg_con_inner_point2_rts__SHIFT 0x17
#define VGT_DEBUG_REG31__pg_patch_fifo_empty_MASK 0x1000000
#define VGT_DEBUG_REG31__pg_patch_fifo_empty__SHIFT 0x18
#define VGT_DEBUG_REG31__pg_edge_fifo_empty_MASK 0x2000000
#define VGT_DEBUG_REG31__pg_edge_fifo_empty__SHIFT 0x19
#define VGT_DEBUG_REG31__pg_inner3_perp_fifo_empty_MASK 0x4000000
#define VGT_DEBUG_REG31__pg_inner3_perp_fifo_empty__SHIFT 0x1a
#define VGT_DEBUG_REG31__pg_patch_fifo_full_MASK 0x8000000
#define VGT_DEBUG_REG31__pg_patch_fifo_full__SHIFT 0x1b
#define VGT_DEBUG_REG31__pg_edge_fifo_full_MASK 0x10000000
#define VGT_DEBUG_REG31__pg_edge_fifo_full__SHIFT 0x1c
#define VGT_DEBUG_REG31__pg_inner_perp_fifo_full_MASK 0x20000000
#define VGT_DEBUG_REG31__pg_inner_perp_fifo_full__SHIFT 0x1d
#define VGT_DEBUG_REG31__outer_ring_done_q_MASK 0x40000000
#define VGT_DEBUG_REG31__outer_ring_done_q__SHIFT 0x1e
#define VGT_DEBUG_REG31__inner_ring_done_q_MASK 0x80000000
#define VGT_DEBUG_REG31__inner_ring_done_q__SHIFT 0x1f
#define VGT_DEBUG_REG32__first_ring_of_patch_MASK 0x1
#define VGT_DEBUG_REG32__first_ring_of_patch__SHIFT 0x0
#define VGT_DEBUG_REG32__last_ring_of_patch_MASK 0x2
#define VGT_DEBUG_REG32__last_ring_of_patch__SHIFT 0x1
#define VGT_DEBUG_REG32__last_edge_of_outer_ring_MASK 0x4
#define VGT_DEBUG_REG32__last_edge_of_outer_ring__SHIFT 0x2
#define VGT_DEBUG_REG32__last_point_of_outer_edge_MASK 0x8
#define VGT_DEBUG_REG32__last_point_of_outer_edge__SHIFT 0x3
#define VGT_DEBUG_REG32__last_edge_of_inner_ring_MASK 0x10
#define VGT_DEBUG_REG32__last_edge_of_inner_ring__SHIFT 0x4
#define VGT_DEBUG_REG32__last_point_of_inner_edge_MASK 0x20
#define VGT_DEBUG_REG32__last_point_of_inner_edge__SHIFT 0x5
#define VGT_DEBUG_REG32__last_patch_of_tg_p0_q_MASK 0x40
#define VGT_DEBUG_REG32__last_patch_of_tg_p0_q__SHIFT 0x6
#define VGT_DEBUG_REG32__event_null_special_p0_q_MASK 0x80
#define VGT_DEBUG_REG32__event_null_special_p0_q__SHIFT 0x7
#define VGT_DEBUG_REG32__event_flag_p5_q_MASK 0x100
#define VGT_DEBUG_REG32__event_flag_p5_q__SHIFT 0x8
#define VGT_DEBUG_REG32__first_point_of_patch_p5_q_MASK 0x200
#define VGT_DEBUG_REG32__first_point_of_patch_p5_q__SHIFT 0x9
#define VGT_DEBUG_REG32__first_point_of_edge_p5_q_MASK 0x400
#define VGT_DEBUG_REG32__first_point_of_edge_p5_q__SHIFT 0xa
#define VGT_DEBUG_REG32__last_patch_of_tg_p5_q_MASK 0x800
#define VGT_DEBUG_REG32__last_patch_of_tg_p5_q__SHIFT 0xb
#define VGT_DEBUG_REG32__tess_topology_p5_q_MASK 0x3000
#define VGT_DEBUG_REG32__tess_topology_p5_q__SHIFT 0xc
#define VGT_DEBUG_REG32__pipe5_inner3_rtr_MASK 0x4000
#define VGT_DEBUG_REG32__pipe5_inner3_rtr__SHIFT 0xe
#define VGT_DEBUG_REG32__pipe5_inner2_rtr_MASK 0x8000
#define VGT_DEBUG_REG32__pipe5_inner2_rtr__SHIFT 0xf
#define VGT_DEBUG_REG32__pg_edge_fifo3_full_MASK 0x10000
#define VGT_DEBUG_REG32__pg_edge_fifo3_full__SHIFT 0x10
#define VGT_DEBUG_REG32__pg_edge_fifo2_full_MASK 0x20000
#define VGT_DEBUG_REG32__pg_edge_fifo2_full__SHIFT 0x11
#define VGT_DEBUG_REG32__pg_inner3_point_fifo_full_MASK 0x40000
#define VGT_DEBUG_REG32__pg_inner3_point_fifo_full__SHIFT 0x12
#define VGT_DEBUG_REG32__pg_outer3_point_fifo_full_MASK 0x80000
#define VGT_DEBUG_REG32__pg_outer3_point_fifo_full__SHIFT 0x13
#define VGT_DEBUG_REG32__pg_inner2_point_fifo_full_MASK 0x100000
#define VGT_DEBUG_REG32__pg_inner2_point_fifo_full__SHIFT 0x14
#define VGT_DEBUG_REG32__pg_outer2_point_fifo_full_MASK 0x200000
#define VGT_DEBUG_REG32__pg_outer2_point_fifo_full__SHIFT 0x15
#define VGT_DEBUG_REG32__pg_inner_point_fifo_full_MASK 0x400000
#define VGT_DEBUG_REG32__pg_inner_point_fifo_full__SHIFT 0x16
#define VGT_DEBUG_REG32__pg_outer_point_fifo_full_MASK 0x800000
#define VGT_DEBUG_REG32__pg_outer_point_fifo_full__SHIFT 0x17
#define VGT_DEBUG_REG32__inner2_fifos_rtr_MASK 0x1000000
#define VGT_DEBUG_REG32__inner2_fifos_rtr__SHIFT 0x18
#define VGT_DEBUG_REG32__inner_fifos_rtr_MASK 0x2000000
#define VGT_DEBUG_REG32__inner_fifos_rtr__SHIFT 0x19
#define VGT_DEBUG_REG32__outer_fifos_rtr_MASK 0x4000000
#define VGT_DEBUG_REG32__outer_fifos_rtr__SHIFT 0x1a
#define VGT_DEBUG_REG32__fifos_rtr_MASK 0x8000000
#define VGT_DEBUG_REG32__fifos_rtr__SHIFT 0x1b
#define VGT_DEBUG_REG32__SPARE_MASK 0xf0000000
#define VGT_DEBUG_REG32__SPARE__SHIFT 0x1c
#define VGT_DEBUG_REG33__pipe0_patch_dr_MASK 0x1
#define VGT_DEBUG_REG33__pipe0_patch_dr__SHIFT 0x0
#define VGT_DEBUG_REG33__ring3_pipe1_dr_MASK 0x2
#define VGT_DEBUG_REG33__ring3_pipe1_dr__SHIFT 0x1
#define VGT_DEBUG_REG33__pipe1_dr_MASK 0x4
#define VGT_DEBUG_REG33__pipe1_dr__SHIFT 0x2
#define VGT_DEBUG_REG33__pipe2_dr_MASK 0x8
#define VGT_DEBUG_REG33__pipe2_dr__SHIFT 0x3
#define VGT_DEBUG_REG33__pipe0_patch_rtr_MASK 0x10
#define VGT_DEBUG_REG33__pipe0_patch_rtr__SHIFT 0x4
#define VGT_DEBUG_REG33__ring2_pipe1_dr_MASK 0x20
#define VGT_DEBUG_REG33__ring2_pipe1_dr__SHIFT 0x5
#define VGT_DEBUG_REG33__ring1_pipe1_dr_MASK 0x40
#define VGT_DEBUG_REG33__ring1_pipe1_dr__SHIFT 0x6
#define VGT_DEBUG_REG33__pipe2_rtr_MASK 0x80
#define VGT_DEBUG_REG33__pipe2_rtr__SHIFT 0x7
#define VGT_DEBUG_REG33__pipe3_dr_MASK 0x100
#define VGT_DEBUG_REG33__pipe3_dr__SHIFT 0x8
#define VGT_DEBUG_REG33__pipe3_rtr_MASK 0x200
#define VGT_DEBUG_REG33__pipe3_rtr__SHIFT 0x9
#define VGT_DEBUG_REG33__ring2_in_sync_q_MASK 0x400
#define VGT_DEBUG_REG33__ring2_in_sync_q__SHIFT 0xa
#define VGT_DEBUG_REG33__ring1_in_sync_q_MASK 0x800
#define VGT_DEBUG_REG33__ring1_in_sync_q__SHIFT 0xb
#define VGT_DEBUG_REG33__pipe1_patch_rtr_MASK 0x1000
#define VGT_DEBUG_REG33__pipe1_patch_rtr__SHIFT 0xc
#define VGT_DEBUG_REG33__ring3_in_sync_q_MASK 0x2000
#define VGT_DEBUG_REG33__ring3_in_sync_q__SHIFT 0xd
#define VGT_DEBUG_REG33__tm_te11_event_rtr_MASK 0x4000
#define VGT_DEBUG_REG33__tm_te11_event_rtr__SHIFT 0xe
#define VGT_DEBUG_REG33__first_prim_of_patch_q_MASK 0x8000
#define VGT_DEBUG_REG33__first_prim_of_patch_q__SHIFT 0xf
#define VGT_DEBUG_REG33__con_prim_fifo_full_MASK 0x10000
#define VGT_DEBUG_REG33__con_prim_fifo_full__SHIFT 0x10
#define VGT_DEBUG_REG33__con_vert_fifo_full_MASK 0x20000
#define VGT_DEBUG_REG33__con_vert_fifo_full__SHIFT 0x11
#define VGT_DEBUG_REG33__con_prim_fifo_empty_MASK 0x40000
#define VGT_DEBUG_REG33__con_prim_fifo_empty__SHIFT 0x12
#define VGT_DEBUG_REG33__con_vert_fifo_empty_MASK 0x80000
#define VGT_DEBUG_REG33__con_vert_fifo_empty__SHIFT 0x13
#define VGT_DEBUG_REG33__last_patch_of_tg_p0_q_MASK 0x100000
#define VGT_DEBUG_REG33__last_patch_of_tg_p0_q__SHIFT 0x14
#define VGT_DEBUG_REG33__ring3_valid_p2_MASK 0x200000
#define VGT_DEBUG_REG33__ring3_valid_p2__SHIFT 0x15
#define VGT_DEBUG_REG33__ring2_valid_p2_MASK 0x400000
#define VGT_DEBUG_REG33__ring2_valid_p2__SHIFT 0x16
#define VGT_DEBUG_REG33__ring1_valid_p2_MASK 0x800000
#define VGT_DEBUG_REG33__ring1_valid_p2__SHIFT 0x17
#define VGT_DEBUG_REG33__tess_type_p0_q_MASK 0x3000000
#define VGT_DEBUG_REG33__tess_type_p0_q__SHIFT 0x18
#define VGT_DEBUG_REG33__tess_topology_p0_q_MASK 0xc000000
#define VGT_DEBUG_REG33__tess_topology_p0_q__SHIFT 0x1a
#define VGT_DEBUG_REG33__te11_out_vert_gs_en_MASK 0x10000000
#define VGT_DEBUG_REG33__te11_out_vert_gs_en__SHIFT 0x1c
#define VGT_DEBUG_REG33__con_ring3_busy_MASK 0x20000000
#define VGT_DEBUG_REG33__con_ring3_busy__SHIFT 0x1d
#define VGT_DEBUG_REG33__con_ring2_busy_MASK 0x40000000
#define VGT_DEBUG_REG33__con_ring2_busy__SHIFT 0x1e
#define VGT_DEBUG_REG33__con_ring1_busy_MASK 0x80000000
#define VGT_DEBUG_REG33__con_ring1_busy__SHIFT 0x1f
#define VGT_DEBUG_REG34__con_state_q_MASK 0xf
#define VGT_DEBUG_REG34__con_state_q__SHIFT 0x0
#define VGT_DEBUG_REG34__second_cycle_q_MASK 0x10
#define VGT_DEBUG_REG34__second_cycle_q__SHIFT 0x4
#define VGT_DEBUG_REG34__process_tri_middle_p0_q_MASK 0x20
#define VGT_DEBUG_REG34__process_tri_middle_p0_q__SHIFT 0x5
#define VGT_DEBUG_REG34__process_tri_1st_2nd_half_p0_q_MASK 0x40
#define VGT_DEBUG_REG34__process_tri_1st_2nd_half_p0_q__SHIFT 0x6
#define VGT_DEBUG_REG34__process_tri_center_poly_p0_q_MASK 0x80
#define VGT_DEBUG_REG34__process_tri_center_poly_p0_q__SHIFT 0x7
#define VGT_DEBUG_REG34__pipe0_patch_dr_MASK 0x100
#define VGT_DEBUG_REG34__pipe0_patch_dr__SHIFT 0x8
#define VGT_DEBUG_REG34__pipe0_edge_dr_MASK 0x200
#define VGT_DEBUG_REG34__pipe0_edge_dr__SHIFT 0x9
#define VGT_DEBUG_REG34__pipe1_dr_MASK 0x400
#define VGT_DEBUG_REG34__pipe1_dr__SHIFT 0xa
#define VGT_DEBUG_REG34__pipe0_patch_rtr_MASK 0x800
#define VGT_DEBUG_REG34__pipe0_patch_rtr__SHIFT 0xb
#define VGT_DEBUG_REG34__pipe0_edge_rtr_MASK 0x1000
#define VGT_DEBUG_REG34__pipe0_edge_rtr__SHIFT 0xc
#define VGT_DEBUG_REG34__pipe1_rtr_MASK 0x2000
#define VGT_DEBUG_REG34__pipe1_rtr__SHIFT 0xd
#define VGT_DEBUG_REG34__outer_parity_p0_q_MASK 0x4000
#define VGT_DEBUG_REG34__outer_parity_p0_q__SHIFT 0xe
#define VGT_DEBUG_REG34__parallel_parity_p0_q_MASK 0x8000
#define VGT_DEBUG_REG34__parallel_parity_p0_q__SHIFT 0xf
#define VGT_DEBUG_REG34__first_ring_of_patch_p0_q_MASK 0x10000
#define VGT_DEBUG_REG34__first_ring_of_patch_p0_q__SHIFT 0x10
#define VGT_DEBUG_REG34__last_ring_of_patch_p0_q_MASK 0x20000
#define VGT_DEBUG_REG34__last_ring_of_patch_p0_q__SHIFT 0x11
#define VGT_DEBUG_REG34__last_edge_of_outer_ring_p0_q_MASK 0x40000
#define VGT_DEBUG_REG34__last_edge_of_outer_ring_p0_q__SHIFT 0x12
#define VGT_DEBUG_REG34__last_point_of_outer_ring_p1_MASK 0x80000
#define VGT_DEBUG_REG34__last_point_of_outer_ring_p1__SHIFT 0x13
#define VGT_DEBUG_REG34__last_point_of_inner_ring_p1_MASK 0x100000
#define VGT_DEBUG_REG34__last_point_of_inner_ring_p1__SHIFT 0x14
#define VGT_DEBUG_REG34__outer_edge_tf_eq_one_p0_q_MASK 0x200000
#define VGT_DEBUG_REG34__outer_edge_tf_eq_one_p0_q__SHIFT 0x15
#define VGT_DEBUG_REG34__advance_outer_point_p1_MASK 0x400000
#define VGT_DEBUG_REG34__advance_outer_point_p1__SHIFT 0x16
#define VGT_DEBUG_REG34__advance_inner_point_p1_MASK 0x800000
#define VGT_DEBUG_REG34__advance_inner_point_p1__SHIFT 0x17
#define VGT_DEBUG_REG34__next_ring_is_rect_p0_q_MASK 0x1000000
#define VGT_DEBUG_REG34__next_ring_is_rect_p0_q__SHIFT 0x18
#define VGT_DEBUG_REG34__pipe1_outer1_rtr_MASK 0x2000000
#define VGT_DEBUG_REG34__pipe1_outer1_rtr__SHIFT 0x19
#define VGT_DEBUG_REG34__pipe1_outer2_rtr_MASK 0x4000000
#define VGT_DEBUG_REG34__pipe1_outer2_rtr__SHIFT 0x1a
#define VGT_DEBUG_REG34__pipe1_inner1_rtr_MASK 0x8000000
#define VGT_DEBUG_REG34__pipe1_inner1_rtr__SHIFT 0x1b
#define VGT_DEBUG_REG34__pipe1_inner2_rtr_MASK 0x10000000
#define VGT_DEBUG_REG34__pipe1_inner2_rtr__SHIFT 0x1c
#define VGT_DEBUG_REG34__pipe1_patch_rtr_MASK 0x20000000
#define VGT_DEBUG_REG34__pipe1_patch_rtr__SHIFT 0x1d
#define VGT_DEBUG_REG34__pipe1_edge_rtr_MASK 0x40000000
#define VGT_DEBUG_REG34__pipe1_edge_rtr__SHIFT 0x1e
#define VGT_DEBUG_REG34__use_stored_inner_q_ring1_MASK 0x80000000
#define VGT_DEBUG_REG34__use_stored_inner_q_ring1__SHIFT 0x1f
#define VGT_DEBUG_REG35__pipe0_dr_MASK 0x1
#define VGT_DEBUG_REG35__pipe0_dr__SHIFT 0x0
#define VGT_DEBUG_REG35__pipe1_dr_MASK 0x2
#define VGT_DEBUG_REG35__pipe1_dr__SHIFT 0x1
#define VGT_DEBUG_REG35__pipe0_rtr_MASK 0x4
#define VGT_DEBUG_REG35__pipe0_rtr__SHIFT 0x2
#define VGT_DEBUG_REG35__pipe1_rtr_MASK 0x8
#define VGT_DEBUG_REG35__pipe1_rtr__SHIFT 0x3
#define VGT_DEBUG_REG35__tfreq_tg_fifo_empty_MASK 0x10
#define VGT_DEBUG_REG35__tfreq_tg_fifo_empty__SHIFT 0x4
#define VGT_DEBUG_REG35__tfreq_tg_fifo_full_MASK 0x20
#define VGT_DEBUG_REG35__tfreq_tg_fifo_full__SHIFT 0x5
#define VGT_DEBUG_REG35__tf_data_fifo_busy_q_MASK 0x40
#define VGT_DEBUG_REG35__tf_data_fifo_busy_q__SHIFT 0x6
#define VGT_DEBUG_REG35__tf_data_fifo_rtr_q_MASK 0x80
#define VGT_DEBUG_REG35__tf_data_fifo_rtr_q__SHIFT 0x7
#define VGT_DEBUG_REG35__tf_skid_fifo_empty_MASK 0x100
#define VGT_DEBUG_REG35__tf_skid_fifo_empty__SHIFT 0x8
#define VGT_DEBUG_REG35__tf_skid_fifo_full_MASK 0x200
#define VGT_DEBUG_REG35__tf_skid_fifo_full__SHIFT 0x9
#define VGT_DEBUG_REG35__vgt_tc_rdreq_rtr_q_MASK 0x400
#define VGT_DEBUG_REG35__vgt_tc_rdreq_rtr_q__SHIFT 0xa
#define VGT_DEBUG_REG35__last_req_of_tg_p2_MASK 0x800
#define VGT_DEBUG_REG35__last_req_of_tg_p2__SHIFT 0xb
#define VGT_DEBUG_REG35__spi_vgt_hs_done_cnt_q_MASK 0x3f000
#define VGT_DEBUG_REG35__spi_vgt_hs_done_cnt_q__SHIFT 0xc
#define VGT_DEBUG_REG35__event_flag_p1_q_MASK 0x40000
#define VGT_DEBUG_REG35__event_flag_p1_q__SHIFT 0x12
#define VGT_DEBUG_REG35__null_flag_p1_q_MASK 0x80000
#define VGT_DEBUG_REG35__null_flag_p1_q__SHIFT 0x13
#define VGT_DEBUG_REG35__tf_data_fifo_cnt_q_MASK 0x7f00000
#define VGT_DEBUG_REG35__tf_data_fifo_cnt_q__SHIFT 0x14
#define VGT_DEBUG_REG35__second_tf_ret_data_q_MASK 0x8000000
#define VGT_DEBUG_REG35__second_tf_ret_data_q__SHIFT 0x1b
#define VGT_DEBUG_REG35__first_req_of_tg_p1_q_MASK 0x10000000
#define VGT_DEBUG_REG35__first_req_of_tg_p1_q__SHIFT 0x1c
#define VGT_DEBUG_REG35__VGT_TC_rdreq_send_out_MASK 0x20000000
#define VGT_DEBUG_REG35__VGT_TC_rdreq_send_out__SHIFT 0x1d
#define VGT_DEBUG_REG35__VGT_TC_rdnfo_stall_out_MASK 0x40000000
#define VGT_DEBUG_REG35__VGT_TC_rdnfo_stall_out__SHIFT 0x1e
#define VGT_DEBUG_REG35__TC_VGT_rdret_data_in_MASK 0x80000000
#define VGT_DEBUG_REG35__TC_VGT_rdret_data_in__SHIFT 0x1f
#define VGT_PERFCOUNTER_SEID_MASK__PERF_SEID_IGNORE_MASK_MASK 0xff
#define VGT_PERFCOUNTER_SEID_MASK__PERF_SEID_IGNORE_MASK__SHIFT 0x0
#define VGT_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define VGT_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define VGT_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define VGT_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define VGT_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define VGT_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define VGT_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define VGT_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define VGT_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define VGT_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define VGT_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0x3ff
#define VGT_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define VGT_PERFCOUNTER1_SELECT__PERF_SEL1_MASK 0xffc00
#define VGT_PERFCOUNTER1_SELECT__PERF_SEL1__SHIFT 0xa
#define VGT_PERFCOUNTER1_SELECT__CNTR_MODE_MASK 0xf00000
#define VGT_PERFCOUNTER1_SELECT__CNTR_MODE__SHIFT 0x14
#define VGT_PERFCOUNTER1_SELECT__PERF_MODE1_MASK 0xf000000
#define VGT_PERFCOUNTER1_SELECT__PERF_MODE1__SHIFT 0x18
#define VGT_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define VGT_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define VGT_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0xff
#define VGT_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define VGT_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define VGT_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define VGT_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0xff
#define VGT_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define VGT_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define VGT_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define VGT_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define VGT_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define VGT_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define VGT_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define VGT_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf000000
#define VGT_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x18
#define VGT_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf0000000
#define VGT_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x1c
#define VGT_PERFCOUNTER1_SELECT1__PERF_SEL2_MASK 0x3ff
#define VGT_PERFCOUNTER1_SELECT1__PERF_SEL2__SHIFT 0x0
#define VGT_PERFCOUNTER1_SELECT1__PERF_SEL3_MASK 0xffc00
#define VGT_PERFCOUNTER1_SELECT1__PERF_SEL3__SHIFT 0xa
#define VGT_PERFCOUNTER1_SELECT1__PERF_MODE3_MASK 0xf000000
#define VGT_PERFCOUNTER1_SELECT1__PERF_MODE3__SHIFT 0x18
#define VGT_PERFCOUNTER1_SELECT1__PERF_MODE2_MASK 0xf0000000
#define VGT_PERFCOUNTER1_SELECT1__PERF_MODE2__SHIFT 0x1c
#define VGT_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define VGT_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define VGT_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define VGT_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define VGT_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define VGT_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define VGT_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define VGT_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define VGT_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define VGT_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define VGT_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define VGT_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define VGT_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define VGT_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define VGT_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define VGT_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define IA_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0x3ff
#define IA_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define IA_PERFCOUNTER0_SELECT__PERF_SEL1_MASK 0xffc00
#define IA_PERFCOUNTER0_SELECT__PERF_SEL1__SHIFT 0xa
#define IA_PERFCOUNTER0_SELECT__CNTR_MODE_MASK 0xf00000
#define IA_PERFCOUNTER0_SELECT__CNTR_MODE__SHIFT 0x14
#define IA_PERFCOUNTER0_SELECT__PERF_MODE1_MASK 0xf000000
#define IA_PERFCOUNTER0_SELECT__PERF_MODE1__SHIFT 0x18
#define IA_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define IA_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define IA_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0xff
#define IA_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define IA_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define IA_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define IA_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0xff
#define IA_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define IA_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define IA_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define IA_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0xff
#define IA_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define IA_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define IA_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define IA_PERFCOUNTER0_SELECT1__PERF_SEL2_MASK 0x3ff
#define IA_PERFCOUNTER0_SELECT1__PERF_SEL2__SHIFT 0x0
#define IA_PERFCOUNTER0_SELECT1__PERF_SEL3_MASK 0xffc00
#define IA_PERFCOUNTER0_SELECT1__PERF_SEL3__SHIFT 0xa
#define IA_PERFCOUNTER0_SELECT1__PERF_MODE3_MASK 0xf000000
#define IA_PERFCOUNTER0_SELECT1__PERF_MODE3__SHIFT 0x18
#define IA_PERFCOUNTER0_SELECT1__PERF_MODE2_MASK 0xf0000000
#define IA_PERFCOUNTER0_SELECT1__PERF_MODE2__SHIFT 0x1c
#define IA_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define IA_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define IA_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define IA_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define IA_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define IA_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define IA_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define IA_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define IA_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define IA_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define IA_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define IA_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define IA_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define IA_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define IA_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define IA_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define WD_PERFCOUNTER0_SELECT__PERF_SEL_MASK 0xff
#define WD_PERFCOUNTER0_SELECT__PERF_SEL__SHIFT 0x0
#define WD_PERFCOUNTER0_SELECT__PERF_MODE_MASK 0xf0000000
#define WD_PERFCOUNTER0_SELECT__PERF_MODE__SHIFT 0x1c
#define WD_PERFCOUNTER1_SELECT__PERF_SEL_MASK 0xff
#define WD_PERFCOUNTER1_SELECT__PERF_SEL__SHIFT 0x0
#define WD_PERFCOUNTER1_SELECT__PERF_MODE_MASK 0xf0000000
#define WD_PERFCOUNTER1_SELECT__PERF_MODE__SHIFT 0x1c
#define WD_PERFCOUNTER2_SELECT__PERF_SEL_MASK 0xff
#define WD_PERFCOUNTER2_SELECT__PERF_SEL__SHIFT 0x0
#define WD_PERFCOUNTER2_SELECT__PERF_MODE_MASK 0xf0000000
#define WD_PERFCOUNTER2_SELECT__PERF_MODE__SHIFT 0x1c
#define WD_PERFCOUNTER3_SELECT__PERF_SEL_MASK 0xff
#define WD_PERFCOUNTER3_SELECT__PERF_SEL__SHIFT 0x0
#define WD_PERFCOUNTER3_SELECT__PERF_MODE_MASK 0xf0000000
#define WD_PERFCOUNTER3_SELECT__PERF_MODE__SHIFT 0x1c
#define WD_PERFCOUNTER0_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define WD_PERFCOUNTER0_LO__PERFCOUNTER_LO__SHIFT 0x0
#define WD_PERFCOUNTER1_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define WD_PERFCOUNTER1_LO__PERFCOUNTER_LO__SHIFT 0x0
#define WD_PERFCOUNTER2_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define WD_PERFCOUNTER2_LO__PERFCOUNTER_LO__SHIFT 0x0
#define WD_PERFCOUNTER3_LO__PERFCOUNTER_LO_MASK 0xffffffff
#define WD_PERFCOUNTER3_LO__PERFCOUNTER_LO__SHIFT 0x0
#define WD_PERFCOUNTER0_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define WD_PERFCOUNTER0_HI__PERFCOUNTER_HI__SHIFT 0x0
#define WD_PERFCOUNTER1_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define WD_PERFCOUNTER1_HI__PERFCOUNTER_HI__SHIFT 0x0
#define WD_PERFCOUNTER2_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define WD_PERFCOUNTER2_HI__PERFCOUNTER_HI__SHIFT 0x0
#define WD_PERFCOUNTER3_HI__PERFCOUNTER_HI_MASK 0xffffffff
#define WD_PERFCOUNTER3_HI__PERFCOUNTER_HI__SHIFT 0x0
#define DIDT_IND_INDEX__DIDT_IND_INDEX_MASK 0xffffffff
#define DIDT_IND_INDEX__DIDT_IND_INDEX__SHIFT 0x0
#define DIDT_IND_DATA__DIDT_IND_DATA_MASK 0xffffffff
#define DIDT_IND_DATA__DIDT_IND_DATA__SHIFT 0x0
#define DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK 0x1
#define DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT 0x0
#define DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK 0x2
#define DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT 0x1
#define DIDT_SQ_CTRL0__PHASE_OFFSET_MASK 0xc
#define DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT 0x2
#define DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK 0x10
#define DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
#define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
#define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
#define DIDT_SQ_CTRL1__MIN_POWER_MASK 0xffff
#define DIDT_SQ_CTRL1__MIN_POWER__SHIFT 0x0
#define DIDT_SQ_CTRL1__MAX_POWER_MASK 0xffff0000
#define DIDT_SQ_CTRL1__MAX_POWER__SHIFT 0x10
#define DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK 0x3fff
#define DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT 0x0
#define DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK 0x3ff0000
#define DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT 0x10
#define DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK 0x78000000
#define DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT 0x1b
#define DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK 0xff
#define DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT 0x0
#define DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK 0xff00
#define DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT 0x8
#define DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK 0xff0000
#define DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT 0x10
#define DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK 0xff000000
#define DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT 0x18
#define DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK 0xff
#define DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT 0x0
#define DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK 0xff00
#define DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT 0x8
#define DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK 0xff0000
#define DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT 0x10
#define DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK 0xff000000
#define DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT 0x18
#define DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK 0xff
#define DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT 0x0
#define DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK 0xff00
#define DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT 0x8
#define DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK 0xff0000
#define DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT 0x10
#define DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK 0xff000000
#define DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT 0x18
#define DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK 0x1
#define DIDT_DB_CTRL0__DIDT_CTRL_EN__SHIFT 0x0
#define DIDT_DB_CTRL0__USE_REF_CLOCK_MASK 0x2
#define DIDT_DB_CTRL0__USE_REF_CLOCK__SHIFT 0x1
#define DIDT_DB_CTRL0__PHASE_OFFSET_MASK 0xc
#define DIDT_DB_CTRL0__PHASE_OFFSET__SHIFT 0x2
#define DIDT_DB_CTRL0__DIDT_CTRL_RST_MASK 0x10
#define DIDT_DB_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
#define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
#define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
#define DIDT_DB_CTRL1__MIN_POWER_MASK 0xffff
#define DIDT_DB_CTRL1__MIN_POWER__SHIFT 0x0
#define DIDT_DB_CTRL1__MAX_POWER_MASK 0xffff0000
#define DIDT_DB_CTRL1__MAX_POWER__SHIFT 0x10
#define DIDT_DB_CTRL2__MAX_POWER_DELTA_MASK 0x3fff
#define DIDT_DB_CTRL2__MAX_POWER_DELTA__SHIFT 0x0
#define DIDT_DB_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK 0x3ff0000
#define DIDT_DB_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT 0x10
#define DIDT_DB_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK 0x78000000
#define DIDT_DB_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT 0x1b
#define DIDT_DB_WEIGHT0_3__WEIGHT0_MASK 0xff
#define DIDT_DB_WEIGHT0_3__WEIGHT0__SHIFT 0x0
#define DIDT_DB_WEIGHT0_3__WEIGHT1_MASK 0xff00
#define DIDT_DB_WEIGHT0_3__WEIGHT1__SHIFT 0x8
#define DIDT_DB_WEIGHT0_3__WEIGHT2_MASK 0xff0000
#define DIDT_DB_WEIGHT0_3__WEIGHT2__SHIFT 0x10
#define DIDT_DB_WEIGHT0_3__WEIGHT3_MASK 0xff000000
#define DIDT_DB_WEIGHT0_3__WEIGHT3__SHIFT 0x18
#define DIDT_DB_WEIGHT4_7__WEIGHT4_MASK 0xff
#define DIDT_DB_WEIGHT4_7__WEIGHT4__SHIFT 0x0
#define DIDT_DB_WEIGHT4_7__WEIGHT5_MASK 0xff00
#define DIDT_DB_WEIGHT4_7__WEIGHT5__SHIFT 0x8
#define DIDT_DB_WEIGHT4_7__WEIGHT6_MASK 0xff0000
#define DIDT_DB_WEIGHT4_7__WEIGHT6__SHIFT 0x10
#define DIDT_DB_WEIGHT4_7__WEIGHT7_MASK 0xff000000
#define DIDT_DB_WEIGHT4_7__WEIGHT7__SHIFT 0x18
#define DIDT_DB_WEIGHT8_11__WEIGHT8_MASK 0xff
#define DIDT_DB_WEIGHT8_11__WEIGHT8__SHIFT 0x0
#define DIDT_DB_WEIGHT8_11__WEIGHT9_MASK 0xff00
#define DIDT_DB_WEIGHT8_11__WEIGHT9__SHIFT 0x8
#define DIDT_DB_WEIGHT8_11__WEIGHT10_MASK 0xff0000
#define DIDT_DB_WEIGHT8_11__WEIGHT10__SHIFT 0x10
#define DIDT_DB_WEIGHT8_11__WEIGHT11_MASK 0xff000000
#define DIDT_DB_WEIGHT8_11__WEIGHT11__SHIFT 0x18
#define DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK 0x1
#define DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT 0x0
#define DIDT_TD_CTRL0__USE_REF_CLOCK_MASK 0x2
#define DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT 0x1
#define DIDT_TD_CTRL0__PHASE_OFFSET_MASK 0xc
#define DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT 0x2
#define DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK 0x10
#define DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
#define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
#define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
#define DIDT_TD_CTRL1__MIN_POWER_MASK 0xffff
#define DIDT_TD_CTRL1__MIN_POWER__SHIFT 0x0
#define DIDT_TD_CTRL1__MAX_POWER_MASK 0xffff0000
#define DIDT_TD_CTRL1__MAX_POWER__SHIFT 0x10
#define DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK 0x3fff
#define DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT 0x0
#define DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK 0x3ff0000
#define DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT 0x10
#define DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK 0x78000000
#define DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT 0x1b
#define DIDT_TD_WEIGHT0_3__WEIGHT0_MASK 0xff
#define DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT 0x0
#define DIDT_TD_WEIGHT0_3__WEIGHT1_MASK 0xff00
#define DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT 0x8
#define DIDT_TD_WEIGHT0_3__WEIGHT2_MASK 0xff0000
#define DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT 0x10
#define DIDT_TD_WEIGHT0_3__WEIGHT3_MASK 0xff000000
#define DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT 0x18
#define DIDT_TD_WEIGHT4_7__WEIGHT4_MASK 0xff
#define DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT 0x0
#define DIDT_TD_WEIGHT4_7__WEIGHT5_MASK 0xff00
#define DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT 0x8
#define DIDT_TD_WEIGHT4_7__WEIGHT6_MASK 0xff0000
#define DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT 0x10
#define DIDT_TD_WEIGHT4_7__WEIGHT7_MASK 0xff000000
#define DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT 0x18
#define DIDT_TD_WEIGHT8_11__WEIGHT8_MASK 0xff
#define DIDT_TD_WEIGHT8_11__WEIGHT8__SHIFT 0x0
#define DIDT_TD_WEIGHT8_11__WEIGHT9_MASK 0xff00
#define DIDT_TD_WEIGHT8_11__WEIGHT9__SHIFT 0x8
#define DIDT_TD_WEIGHT8_11__WEIGHT10_MASK 0xff0000
#define DIDT_TD_WEIGHT8_11__WEIGHT10__SHIFT 0x10
#define DIDT_TD_WEIGHT8_11__WEIGHT11_MASK 0xff000000
#define DIDT_TD_WEIGHT8_11__WEIGHT11__SHIFT 0x18
#define DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK 0x1
#define DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT 0x0
#define DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK 0x2
#define DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT 0x1
#define DIDT_TCP_CTRL0__PHASE_OFFSET_MASK 0xc
#define DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT 0x2
#define DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK 0x10
#define DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
#define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
#define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
#define DIDT_TCP_CTRL1__MIN_POWER_MASK 0xffff
#define DIDT_TCP_CTRL1__MIN_POWER__SHIFT 0x0
#define DIDT_TCP_CTRL1__MAX_POWER_MASK 0xffff0000
#define DIDT_TCP_CTRL1__MAX_POWER__SHIFT 0x10
#define DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK 0x3fff
#define DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT 0x0
#define DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK 0x3ff0000
#define DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT 0x10
#define DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK 0x78000000
#define DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT 0x1b
#define DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK 0xff
#define DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT 0x0
#define DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK 0xff00
#define DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT 0x8
#define DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK 0xff0000
#define DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT 0x10
#define DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK 0xff000000
#define DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT 0x18
#define DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK 0xff
#define DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT 0x0
#define DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK 0xff00
#define DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT 0x8
#define DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK 0xff0000
#define DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT 0x10
#define DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK 0xff000000
#define DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT 0x18
#define DIDT_TCP_WEIGHT8_11__WEIGHT8_MASK 0xff
#define DIDT_TCP_WEIGHT8_11__WEIGHT8__SHIFT 0x0
#define DIDT_TCP_WEIGHT8_11__WEIGHT9_MASK 0xff00
#define DIDT_TCP_WEIGHT8_11__WEIGHT9__SHIFT 0x8
#define DIDT_TCP_WEIGHT8_11__WEIGHT10_MASK 0xff0000
#define DIDT_TCP_WEIGHT8_11__WEIGHT10__SHIFT 0x10
#define DIDT_TCP_WEIGHT8_11__WEIGHT11_MASK 0xff000000
#define DIDT_TCP_WEIGHT8_11__WEIGHT11__SHIFT 0x18

#endif /* GFX_7_2_SH_MASK_H */


================================================
FILE: libhsakmt/tests/kfdtest/include/kfd_pm4_opcodes.h
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */


#ifndef KFD_PM4_OPCODES_H
#define KFD_PM4_OPCODES_H

enum it_opcode_type {
	IT_NOP                               = 0x10,
	IT_SET_BASE                          = 0x11,
	IT_CLEAR_STATE                       = 0x12,
	IT_INDEX_BUFFER_SIZE                 = 0x13,
	IT_DISPATCH_DIRECT                   = 0x15,
	IT_DISPATCH_INDIRECT                 = 0x16,
	IT_ATOMIC_GDS                        = 0x1D,
	IT_OCCLUSION_QUERY                   = 0x1F,
	IT_SET_PREDICATION                   = 0x20,
	IT_REG_RMW                           = 0x21,
	IT_COND_EXEC                         = 0x22,
	IT_PRED_EXEC                         = 0x23,
	IT_DRAW_INDIRECT                     = 0x24,
	IT_DRAW_INDEX_INDIRECT               = 0x25,
	IT_INDEX_BASE                        = 0x26,
	IT_DRAW_INDEX_2                      = 0x27,
	IT_CONTEXT_CONTROL                   = 0x28,
	IT_INDEX_TYPE                        = 0x2A,
	IT_DRAW_INDIRECT_MULTI               = 0x2C,
	IT_DRAW_INDEX_AUTO                   = 0x2D,
	IT_NUM_INSTANCES                     = 0x2F,
	IT_DRAW_INDEX_MULTI_AUTO             = 0x30,
	IT_INDIRECT_BUFFER_CNST              = 0x33,
	IT_STRMOUT_BUFFER_UPDATE             = 0x34,
	IT_DRAW_INDEX_OFFSET_2               = 0x35,
	IT_DRAW_PREAMBLE                     = 0x36,
	IT_WRITE_DATA                        = 0x37,
	IT_DRAW_INDEX_INDIRECT_MULTI         = 0x38,
	IT_MEM_SEMAPHORE                     = 0x39,
	IT_COPY_DW                           = 0x3B,
	IT_WAIT_REG_MEM                      = 0x3C,
	IT_INDIRECT_BUFFER                   = 0x3F,
	IT_COPY_DATA                         = 0x40,
	IT_PFP_SYNC_ME                       = 0x42,
	IT_SURFACE_SYNC                      = 0x43,
	IT_COND_WRITE                        = 0x45,
	IT_EVENT_WRITE                       = 0x46,
	IT_EVENT_WRITE_EOP                   = 0x47,
	IT_EVENT_WRITE_EOS                   = 0x48,
	IT_RELEASE_MEM                       = 0x49,
	IT_PREAMBLE_CNTL                     = 0x4A,
	IT_DMA_DATA                          = 0x50,
	IT_ACQUIRE_MEM                       = 0x58,
	IT_REWIND                            = 0x59,
	IT_LOAD_UCONFIG_REG                  = 0x5E,
	IT_LOAD_SH_REG                       = 0x5F,
	IT_LOAD_CONFIG_REG                   = 0x60,
	IT_LOAD_CONTEXT_REG                  = 0x61,
	IT_SET_CONFIG_REG                    = 0x68,
	IT_SET_CONTEXT_REG                   = 0x69,
	IT_SET_CONTEXT_REG_INDIRECT          = 0x73,
	IT_SET_SH_REG                        = 0x76,
	IT_SET_SH_REG_OFFSET                 = 0x77,
	IT_SET_QUEUE_REG                     = 0x78,
	IT_SET_UCONFIG_REG                   = 0x79,
	IT_SCRATCH_RAM_WRITE                 = 0x7D,
	IT_SCRATCH_RAM_READ                  = 0x7E,
	IT_LOAD_CONST_RAM                    = 0x80,
	IT_WRITE_CONST_RAM                   = 0x81,
	IT_DUMP_CONST_RAM                    = 0x83,
	IT_INCREMENT_CE_COUNTER              = 0x84,
	IT_INCREMENT_DE_COUNTER              = 0x85,
	IT_WAIT_ON_CE_COUNTER                = 0x86,
	IT_WAIT_ON_DE_COUNTER_DIFF           = 0x88,
	IT_SWITCH_BUFFER                     = 0x8B,
	IT_SET_RESOURCES                     = 0xA0,
	IT_MAP_PROCESS                       = 0xA1,
	IT_MAP_QUEUES                        = 0xA2,
	IT_UNMAP_QUEUES                      = 0xA3,
	IT_QUERY_STATUS                      = 0xA4,
	IT_RUN_LIST                          = 0xA5,
};

#define PM4_TYPE_0 0
#define PM4_TYPE_2 2
#define PM4_TYPE_3 3

#endif /* KFD_PM4_OPCODES_H */


================================================
FILE: libhsakmt/tests/kfdtest/include/pm4_pkt_struct_ai.h
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __PM4_PKT_STRUCT_AI_H__
#define __PM4_PKT_STRUCT_AI_H__

#ifndef PM4_MEC_RELEASE_MEM_AI_DEFINED
#define PM4_MEC_RELEASE_MEM_AI_DEFINED

enum AI_MEC_RELEASE_MEM_event_index_enum {
     event_index__mec_release_mem__end_of_pipe = 5,
     event_index__mec_release_mem__shader_done = 6 };

enum AI_MEC_RELEASE_MEM_cache_policy_enum {
     cache_policy__mec_release_mem__lru = 0,
     cache_policy__mec_release_mem__stream = 1 };

enum AI_MEC_RELEASE_MEM_pq_exe_status_enum {
     pq_exe_status__mec_release_mem__default = 0,
     pq_exe_status__mec_release_mem__phase_update = 1 };

enum AI_MEC_RELEASE_MEM_dst_sel_enum {
     dst_sel__mec_release_mem__memory_controller = 0,
     dst_sel__mec_release_mem__tc_l2 = 1,
     dst_sel__mec_release_mem__queue_write_pointer_register = 2,
     dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3 };

enum AI_MEC_RELEASE_MEM_int_sel_enum {
     int_sel__mec_release_mem__none = 0,
     int_sel__mec_release_mem__send_interrupt_only = 1,
     int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
     int_sel__mec_release_mem__send_data_after_write_confirm = 3,
     int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
     int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
     int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6 };

enum AI_MEC_RELEASE_MEM_data_sel_enum {
     data_sel__mec_release_mem__none = 0,
     data_sel__mec_release_mem__send_32_bit_low = 1,
     data_sel__mec_release_mem__send_64_bit_data = 2,
     data_sel__mec_release_mem__send_gpu_clock_counter = 3,
     data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
     data_sel__mec_release_mem__store_gds_data_to_memory = 5 };


typedef struct PM4_MEC_RELEASE_MEM_AI {
    union {
        PM4_TYPE_3_HEADER   header;
        unsigned int        ordinal1;
    };

    union {
        struct {
            unsigned int event_type:6;
            unsigned int reserved1:2;
            AI_MEC_RELEASE_MEM_event_index_enum event_index:4;
            unsigned int tcl1_vol_action_ena:1;
            unsigned int tc_vol_action_ena:1;
            unsigned int reserved2:1;
            unsigned int tc_wb_action_ena:1;
            unsigned int tcl1_action_ena:1;
            unsigned int tc_action_ena:1;
            unsigned int reserved3:1;
            unsigned int tc_nc_action_ena:1;
            unsigned int tc_wc_action_ena:1;
            unsigned int tc_md_action_ena:1;
            unsigned int reserved4:3;
            AI_MEC_RELEASE_MEM_cache_policy_enum cache_policy:2;
            unsigned int reserved5:2;
            AI_MEC_RELEASE_MEM_pq_exe_status_enum pq_exe_status:1;
            unsigned int reserved6:2;
        } bitfields2;
        unsigned int ordinal2;
    };

    union {
        struct {
            unsigned int reserved7:16;
            AI_MEC_RELEASE_MEM_dst_sel_enum dst_sel:2;
            unsigned int reserved8:6;
            AI_MEC_RELEASE_MEM_int_sel_enum int_sel:3;
            unsigned int reserved9:2;
            AI_MEC_RELEASE_MEM_data_sel_enum data_sel:3;
        } bitfields3;
        unsigned int ordinal3;
    };

    union {
        struct {
            unsigned int reserved10:2;
            unsigned int address_lo_32b:30;
        } bitfields4a;
        struct {
            unsigned int reserved11:3;
            unsigned int address_lo_64b:29;
        } bitfields4b;
        unsigned int reserved12;

        unsigned int ordinal4;
    };

    union {
        unsigned int address_hi;

        unsigned int reserved13;

        unsigned int ordinal5;
    };

    union {
        unsigned int data_lo;

        unsigned int cmp_data_lo;

        struct {
            unsigned int dw_offset:16;
            unsigned int num_dwords:16;
        } bitfields6c;
        unsigned int reserved14;

        unsigned int ordinal6;
    };

    union {
        unsigned int data_hi;

        unsigned int cmp_data_hi;

        unsigned int reserved15;

        unsigned int reserved16;

        unsigned int ordinal7;
    };

    unsigned int int_ctxid;
} PM4MEC_RELEASE_MEM_AI, *PPM4MEC_RELEASE_MEM_AI;

#endif  // PM4_MEC_RELEASE_MEM_AI_DEFINED
#endif  // __PM4_PKT_STRUCT_AI_H__


================================================
FILE: libhsakmt/tests/kfdtest/include/pm4_pkt_struct_ci.h
================================================
/*
 * Copyright (C) 2012-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __PM4_PKT_STRUCT_CI_H__
#define __PM4_PKT_STRUCT_CI_H__


enum WRITE_DATA_CI_atc_enum { atc_write_data_NOT_USE_ATC_0 = 0, atc_write_data_USE_ATC_1 = 1 };
enum WRITE_DATA_CI_engine_sel { engine_sel_write_data_ci_MICRO_ENGINE_0 = 0, engine_sel_write_data_ci_PREFETCH_PARSER_1 = 1, engine_sel_write_data_ci_CONST_ENG_2 = 2 };

typedef struct _PM4WRITE_DATA_CI {
    union {
        PM4_TYPE_3_HEADER   header;
        unsigned int        ordinal1;
    };

    union {
        struct {
            unsigned int reserved1:8;
            MEC_WRITE_DATA_dst_sel_enum dst_sel:4;
            unsigned int reserved2:4;
            MEC_WRITE_DATA_addr_incr_enum addr_incr:1;
            unsigned int reserved3:3;
            MEC_WRITE_DATA_wr_confirm_enum wr_confirm:1;
            unsigned int reserved4:3;
            WRITE_DATA_CI_atc_enum atc:1;
            MEC_WRITE_DATA_cache_policy_enum cache_policy:2;
            unsigned int volatile_setting:1;
            unsigned int reserved5:2;
            WRITE_DATA_CI_engine_sel engine_sel:2;
        } bitfields2;
        unsigned int ordinal2;
    };

    unsigned int dst_addr_lo;

    unsigned int dst_address_hi;

    unsigned int data[1];    // 1..N of these fields
}  PM4WRITE_DATA_CI, *PPM4WRITE_DATA_CI;


enum MEC_RELEASE_MEM_CI_atc_enum { atc_mec_release_mem_ci_NOT_USE_ATC_0 = 0, atc_mec_release_mem_ci_USE_ATC_1 = 1 };

typedef struct _PM4_RELEASE_MEM_CI {
    union {
        PM4_TYPE_3_HEADER   header;
        unsigned int        ordinal1;
    };

    union {
        struct {
            unsigned int event_type:6;
            unsigned int reserved1:2;
            MEC_RELEASE_MEM_event_index_enum event_index:4;
            unsigned int l1_vol:1;
            unsigned int l2_vol:1;
            unsigned int reserved:1;
            unsigned int l2_wb:1;
            unsigned int l1_inv:1;
            unsigned int l2_inv:1;
            unsigned int reserved2:6;
            MEC_RELEASE_MEM_CI_atc_enum atc:1;
            MEC_RELEASE_MEM_cache_policy_enum cache_policy:2;
            unsigned int volatile_setting:1;
            unsigned int reserved3:4;
        } bitfields2;
        unsigned int ordinal2;
    };

    union {
        struct {
            unsigned int reserved4:16;
            MEC_RELEASE_MEM_dst_sel_enum dst_sel:2;
            unsigned int reserved5:6;
            MEC_RELEASE_MEM_int_sel_enum int_sel:3;
            unsigned int reserved6:2;
            MEC_RELEASE_MEM_data_sel_enum data_sel:3;
        } bitfields3;
        unsigned int ordinal3;
    };

    union {
        struct {
            unsigned int reserved7:2;
            unsigned int address_lo_dword_aligned:30;
        } bitfields4a;
        struct {
            unsigned int reserved8:3;
            unsigned int address_lo_qword_aligned:29;
        } bitfields4b;
        unsigned int ordinal4;
    };

    unsigned int addr_hi;

    union {
        unsigned int data_lo;
        struct {
            unsigned int offset:16;
            unsigned int num_dwords:16;
        } bitfields5b;
        unsigned int ordinal6;
    };

    unsigned int data_hi;
}  PM4_RELEASE_MEM_CI, *PPM4_RELEASE_MEM_CI;

#endif  // __PM4_PKT_STRUCT_CI_H__


================================================
FILE: libhsakmt/tests/kfdtest/include/pm4_pkt_struct_common.h
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __PM4_PKT_STRUCT_COMMON_H__
#define __PM4_PKT_STRUCT_COMMON_H__

#ifndef PM4_HEADER_DEFINED
#define PM4_HEADER_DEFINED
typedef union PM4_TYPE_3_HEADER
{
    struct
    {
        unsigned int predicate : 1; ///< predicated version of packet when set
        unsigned int shaderType: 1; ///< 0: Graphics, 1: Compute Shader
        unsigned int reserved1 : 6; ///< reserved
        unsigned int opcode    : 8; ///< IT opcode
        unsigned int count     : 14;///< number of DWORDs - 1 in the information body.
        unsigned int type      : 2; ///< packet identifier. It should be 3 for type 3 packets
    };
    unsigned int u32All;
} PM4_TYPE_3_HEADER;
#endif // PM4_HEADER_DEFINED

//--------------------DISPATCH_DIRECT--------------------


typedef struct _PM4_DISPATCH_DIRECT
{
    union
    {
        PM4_TYPE_3_HEADER   header;            ///header
        unsigned int        ordinal1;
    };

    unsigned int dim_x;


    unsigned int dim_y;


    unsigned int dim_z;


    unsigned int dispatch_initiator;


}  PM4DISPATCH_DIRECT, *PPM4DISPATCH_DIRECT;

//--------------------INDIRECT_BUFFER--------------------

enum INDIRECT_BUFFER_cache_policy_enum { cache_policy_indirect_buffer_LRU_0 = 0, cache_policy_indirect_buffer_STREAM_1 = 1, cache_policy_indirect_buffer_BYPASS_2 = 2 };


//--------------------EVENT_WRITE--------------------

enum EVENT_WRITE_event_index_enum { event_index_event_write_OTHER_0 = 0, event_index_event_write_ZPASS_DONE_1 = 1, event_index_event_write_SAMPLE_PIPELINESTAT_2 = 2, event_index_event_write_SAMPLE_STREAMOUTSTAT_3 = 3, event_index_event_write_CS_VS_PS_PARTIAL_FLUSH_4 = 4, event_index_event_write_RESERVED_EOP_5 = 5, event_index_event_write_RESERVED_EOS_6 = 6, event_index_event_write_CACHE_FLUSH_7 = 7 };

typedef struct _PM4_EVENT_WRITE
{
    union
    {
        PM4_TYPE_3_HEADER   header;            ///header
        unsigned int        ordinal1;
    };

    union
    {
        struct
        {
            unsigned int event_type:6;
            unsigned int reserved1:2;
            EVENT_WRITE_event_index_enum event_index:4;
            unsigned int reserved2:20;
        } bitfields2;
        unsigned int ordinal2;
    };

    union
    {
        struct
        {
            unsigned int reserved3:3;
            unsigned int address_lo:29;
        } bitfields3;
        unsigned int ordinal3;
    };

    union
    {
        struct
        {
            unsigned int address_hi:16;
            unsigned int reserved4:16;
        } bitfields4;
        unsigned int ordinal4;
    };

}  PM4EVENT_WRITE, *PPM4EVENT_WRITE;


//--------------------SET_SH_REG--------------------


typedef struct _PM4_SET_SH_REG
{
    union
    {
        PM4_TYPE_3_HEADER   header;            ///header
        unsigned int        ordinal1;
    };

    union
    {
        struct
        {
            unsigned int reg_offset:16;
            unsigned int reserved1:16;
        } bitfields2;
        unsigned int ordinal2;
    };

    unsigned int reg_data[1];    //1..N of these fields


}  PM4SET_SH_REG, *PPM4SET_SH_REG;


//--------------------ACQUIRE_MEM--------------------

enum ACQUIRE_MEM_engine_enum { engine_acquire_mem_PFP_0 = 0, engine_acquire_mem_ME_1 = 1 };


typedef struct _PM4_ACQUIRE_MEM
{
    union
    {
        PM4_TYPE_3_HEADER   header;            ///header
        unsigned int        ordinal1;
    };

    union
    {
        struct
        {
            unsigned int coher_cntl:31;
            ACQUIRE_MEM_engine_enum engine:1;
        } bitfields2;
        unsigned int ordinal2;
    };

    unsigned int coher_size;


    union
    {
        struct
        {
            unsigned int coher_size_hi:8;
            unsigned int reserved1:24;
        } bitfields3;
        unsigned int ordinal4;
    };

    unsigned int coher_base_lo;


    union
    {
        struct
        {
            unsigned int coher_base_hi:25;
            unsigned int reserved2:7;
        } bitfields4;
        unsigned int ordinal6;
    };

    union
    {
        struct
        {
            unsigned int poll_interval:16;
            unsigned int reserved3:16;
        } bitfields5;
        unsigned int ordinal7;
    };

}  PM4ACQUIRE_MEM, *PPM4ACQUIRE_MEM;


//--------------------MEC_INDIRECT_BUFFER--------------------

typedef struct _PM4_MEC_INDIRECT_BUFFER
{
    union
    {
        PM4_TYPE_3_HEADER   header;            ///header
        unsigned int        ordinal1;
    };

    union
    {
        struct
        {
            unsigned int swap_function:2;
            unsigned int ib_base_lo:30;
        } bitfields2;
        unsigned int ordinal2;
    };

    union
    {
        struct
        {
            unsigned int ib_base_hi:16;
            unsigned int reserved1:16;
        } bitfields3;
        unsigned int ordinal3;
    };

    union
    {
        struct
        {
            unsigned int ib_size:20;
            unsigned int chain:1;
            unsigned int offload_polling:1;
            unsigned int volatile_setting:1;
            unsigned int valid:1;
            unsigned int vmid:4;
            INDIRECT_BUFFER_cache_policy_enum cache_policy:2;
            unsigned int reserved4:2;
        } bitfields4;
        unsigned int ordinal4;
    };

}  PM4MEC_INDIRECT_BUFFER, *PPM4MEC_INDIRECT_BUFFER;

//--------------------MEC_WAIT_REG_MEM--------------------

enum MEC_WAIT_REG_MEM_function_enum {
     function__mec_wait_reg_mem__always_pass = 0,
     function__mec_wait_reg_mem__less_than_ref_value = 1,
     function__mec_wait_reg_mem__less_than_equal_to_the_ref_value = 2,
     function__mec_wait_reg_mem__equal_to_the_reference_value = 3,
     function__mec_wait_reg_mem__not_equal_reference_value = 4,
     function__mec_wait_reg_mem__greater_than_or_equal_reference_value = 5,
     function__mec_wait_reg_mem__greater_than_reference_value = 6 };

enum MEC_WAIT_REG_MEM_mem_space_enum {
     mem_space__mec_wait_reg_mem__register_space = 0,
     mem_space__mec_wait_reg_mem__memory_space = 1 };

enum MEC_WAIT_REG_MEM_operation_enum {
     operation__mec_wait_reg_mem__wait_reg_mem = 0,
     operation__mec_wait_reg_mem__wr_wait_wr_reg = 1,
     operation__mec_wait_reg_mem__wait_mem_preemptable = 3 };


typedef struct PM4_MEC_WAIT_REG_MEM
{
    union
    {
        PM4_TYPE_3_HEADER   header;            ///header
        uint32_t            ordinal1;
    };

    union
    {
        struct
        {
            MEC_WAIT_REG_MEM_function_enum function:3;
            uint32_t reserved1:1;
            MEC_WAIT_REG_MEM_mem_space_enum mem_space:2;
            MEC_WAIT_REG_MEM_operation_enum operation:2;
            uint32_t reserved2:24;
        } bitfields2;
        uint32_t ordinal2;
    };

    union
    {
        struct
        {
            uint32_t reserved3:2;
            uint32_t mem_poll_addr_lo:30;
        } bitfields3a;
        struct
        {
            uint32_t reg_poll_addr:18;
            uint32_t reserved4:14;
        } bitfields3b;
        struct
        {
            uint32_t reg_write_addr1:18;
            uint32_t reserved5:14;
        } bitfields3c;
        uint32_t ordinal3;
    };

    union
    {
        uint32_t mem_poll_addr_hi;

        struct
        {
            uint32_t reg_write_addr2:18;
            uint32_t reserved6:14;
        } bitfields4b;
        uint32_t ordinal4;
    };

    uint32_t reference;

    uint32_t mask;

    union
    {
        struct
        {
            uint32_t poll_interval:16;
            uint32_t reserved7:15;
            uint32_t optimize_ace_offload_mode:1;
        } bitfields7;
        uint32_t ordinal7;
    };

} PM4MEC_WAIT_REG_MEM, *PPM4MEC_WAIT_REG_MEM;

//--------------------MEC_WRITE_DATA--------------------

enum MEC_WRITE_DATA_dst_sel_enum { dst_sel_mec_write_data_MEM_MAPPED_REGISTER_0 = 0, dst_sel_mec_write_data_TC_L2_2 = 2, dst_sel_mec_write_data_GDS_3 = 3, dst_sel_mec_write_data_MEMORY_5 = 5 };
enum MEC_WRITE_DATA_addr_incr_enum { addr_incr_mec_write_data_INCREMENT_ADDR_0 = 0, addr_incr_mec_write_data_DO_NOT_INCREMENT_ADDR_1 = 1 };
enum MEC_WRITE_DATA_wr_confirm_enum { wr_confirm_mec_write_data_DO_NOT_WAIT_FOR_CONFIRMATION_0 = 0, wr_confirm_mec_write_data_WAIT_FOR_CONFIRMATION_1 = 1 };
enum MEC_WRITE_DATA_cache_policy_enum { cache_policy_mec_write_data_LRU_0 = 0, cache_policy_mec_write_data_STREAM_1 = 1, cache_policy_mec_write_data_BYPASS_2 = 2 };

//--------------------MEC_RELEASE_MEM--------------------

enum MEC_RELEASE_MEM_event_index_enum { event_index_mec_release_mem_EVENT_WRITE_EOP_5 = 5, event_index_mec_release_mem_CS_Done_6 = 6 };
enum MEC_RELEASE_MEM_cache_policy_enum { cache_policy_mec_release_mem_LRU_0 = 0, cache_policy_mec_release_mem_STREAM_1 = 1, cache_policy_mec_release_mem_BYPASS_2 = 2 };
enum MEC_RELEASE_MEM_dst_sel_enum { dst_sel_mec_release_mem_MEMORY_CONTROLLER_0 = 0, dst_sel_mec_release_mem_TC_L2_1 = 1 };
enum MEC_RELEASE_MEM_int_sel_enum { int_sel_mec_release_mem_NONE_0 = 0, int_sel_mec_release_mem_SEND_INTERRUPT_ONLY_1 = 1, int_sel_mec_release_mem_SEND_INTERRUPT_AFTER_WRITE_CONFIRM_2 = 2, int_sel_mec_release_mem_SEND_DATA_AFTER_WRITE_CONFIRM_3 = 3 };
enum MEC_RELEASE_MEM_data_sel_enum { data_sel_mec_release_mem_NONE_0 = 0, data_sel_mec_release_mem_SEND_32_BIT_LOW_1 = 1, data_sel_mec_release_mem_SEND_64_BIT_DATA_2 = 2, data_sel_mec_release_mem_SEND_GPU_CLOCK_COUNTER_3 = 3, data_sel_mec_release_mem_SEND_CP_PERFCOUNTER_HI_LO_4 = 4, data_sel_mec_release_mem_STORE_GDS_DATA_TO_MEMORY_5 = 5 };


#endif


================================================
FILE: libhsakmt/tests/kfdtest/include/pm4_pkt_struct_nv.h
================================================
/*
 * Copyright 2018 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __PM4__PKT__STRUCT__NV__HPP__
#define __PM4__PKT__STRUCT__NV__HPP__

#include "pm4_pkt_struct_ai.h"

typedef struct _PM4_ACQUIRE_MEM_NV
{
    union
    {
        PM4_TYPE_3_HEADER   header;            ///header
        unsigned int        ordinal1;
    };

    unsigned int reserved;

    unsigned int coher_size;


    union
    {
        struct
        {
            unsigned int coher_size_hi:8;
            unsigned int reserved1:24;
        } bitfields3;
        unsigned int ordinal4;
    };

    unsigned int coher_base_lo;


    union
    {
        struct
        {
            unsigned int coher_base_hi:24;
            unsigned int reserved2:8;
        } bitfields4;
        unsigned int ordinal6;
    };

    union
    {
        struct
        {
            unsigned int poll_interval:16;
            unsigned int reserved3:16;
        } bitfields5;
        unsigned int ordinal7;
    };

    union
    {
        struct
        {
            unsigned int gcr_cntl:18;
            unsigned int reserved4:14;
        } bitfields6;
        unsigned int ordinal8;
    };


}  PM4ACQUIRE_MEM_NV, *PPM4ACQUIRE_MEM_NV;

typedef struct PM4_MEC_RELEASE_MEM_NV {
    union {
        PM4_TYPE_3_HEADER   header;
        unsigned int        ordinal1;
    };

    union {
        struct {
            unsigned int event_type:6;
            unsigned int reserved1:2;
            AI_MEC_RELEASE_MEM_event_index_enum event_index:4;
            unsigned int gcr_cntl:12;
            unsigned int reserved4:1;
            AI_MEC_RELEASE_MEM_cache_policy_enum cache_policy:2;
            unsigned int reserved5:1;
            AI_MEC_RELEASE_MEM_pq_exe_status_enum pq_exe_status:1;
            unsigned int reserved6:3;
        } bitfields2;
        unsigned int ordinal2;
    };

    union {
        struct {
            unsigned int reserved7:16;
            AI_MEC_RELEASE_MEM_dst_sel_enum dst_sel:2;
            unsigned int reserved8:6;
            AI_MEC_RELEASE_MEM_int_sel_enum int_sel:3;
            unsigned int reserved9:2;
            AI_MEC_RELEASE_MEM_data_sel_enum data_sel:3;
        } bitfields3;
        unsigned int ordinal3;
    };

    union {
        struct {
            unsigned int reserved10:2;
            unsigned int address_lo_32b:30;
        } bitfields4a;
        struct {
            unsigned int reserved11:3;
            unsigned int address_lo_64b:29;
        } bitfields4b;
        unsigned int reserved12;

        unsigned int ordinal4;
    };

    union {
        unsigned int address_hi;

        unsigned int reserved13;

        unsigned int ordinal5;
    };

    union {
        unsigned int data_lo;

        unsigned int cmp_data_lo;

        struct {
            unsigned int dw_offset:16;
            unsigned int num_dwords:16;
        } bitfields6c;
        unsigned int reserved14;

        unsigned int ordinal6;
    };

    union {
        unsigned int data_hi;

        unsigned int cmp_data_hi;

        unsigned int reserved15;

        unsigned int reserved16;

        unsigned int ordinal7;
    };

    unsigned int int_ctxid;
} PM4MEC_RELEASE_MEM_NV, *PPM4MEC_RELEASE_MEM_NV;


#endif // __PM4__PKT__STRUCT__NV__HPP__


================================================
FILE: libhsakmt/tests/kfdtest/include/sdma_pkt_struct.h
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __SDMA_PKT_STRUCT_H__
#define __SDMA_PKT_STRUCT_H__


const unsigned int SDMA_OP_NOP = 0;
const unsigned int SDMA_OP_COPY = 1;
const unsigned int SDMA_OP_WRITE = 2;

const unsigned int SDMA_OP_FENCE = 5;
const unsigned int SDMA_OP_TRAP = 6;
const unsigned int SDMA_OP_POLL_REGMEM = 8;
const unsigned int SDMA_OP_TIMESTAMP = 13;

const unsigned int SDMA_OP_CONST_FILL = 11;

const unsigned int SDMA_SUBOP_COPY_LINEAR = 0;

const unsigned int SDMA_SUBOP_WRITE_LINEAR = 0;

/*
** Definitions for SDMA_PKT_COPY_LINEAR packet
*/

typedef struct SDMA_PKT_COPY_LINEAR_TAG
{

    union
    {
        struct
        {
            unsigned int op:8;
            unsigned int sub_op:8;
            unsigned int reserved_0:11;
            unsigned int broadcast:1;
            unsigned int reserved_1:4;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union
    {
        struct
        {
            unsigned int count:22;
            unsigned int reserved_0:10;
        };
        unsigned int DW_1_DATA;
    } COUNT_UNION;

    union
    {
        struct
        {
            unsigned int reserved_0:16;
            unsigned int dst_sw:2;
            unsigned int reserved_1:4;
            unsigned int dst_ha:1;
            unsigned int reserved_2:1;
            unsigned int src_sw:2;
            unsigned int reserved_3:4;
            unsigned int src_ha:1;
            unsigned int reserved_4:1;
        };
        unsigned int DW_2_DATA;
    } PARAMETER_UNION;

    union
    {
        struct
        {
            unsigned int src_addr_31_0:32;
        };
        unsigned int DW_3_DATA;
    } SRC_ADDR_LO_UNION;

    union
    {
        struct
        {
            unsigned int src_addr_63_32:32;
        };
        unsigned int DW_4_DATA;
    } SRC_ADDR_HI_UNION;

    struct
    {
        union
        {
            struct
            {
                unsigned int dst_addr_31_0:32;
            };
            unsigned int DW_5_DATA;
        } DST_ADDR_LO_UNION;

        union
        {
            struct
            {
                unsigned int dst_addr_63_32:32;
            };
            unsigned int DW_6_DATA;
        } DST_ADDR_HI_UNION;
    } DST_ADDR[0];
} SDMA_PKT_COPY_LINEAR, *PSDMA_PKT_COPY_LINEAR;

/*
** Definitions for SDMA_PKT_WRITE_UNTILED packet
*/

typedef struct SDMA_PKT_WRITE_UNTILED_TAG
{

    union
    {
        struct
        {
            unsigned int op:8;
            unsigned int sub_op:8;
            unsigned int reserved_0:16;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union
    {
        struct
        {
            unsigned int dst_addr_31_0:32;
        };
        unsigned int DW_1_DATA;
    } DST_ADDR_LO_UNION;

    union
    {
        struct
        {
            unsigned int dst_addr_63_32:32;
        };
        unsigned int DW_2_DATA;
    } DST_ADDR_HI_UNION;

    union
    {
        struct
        {
            unsigned int count:22;
            unsigned int reserved_0:2;
            unsigned int sw:2;
            unsigned int reserved_1:6;
        };
        unsigned int DW_3_DATA;
    } DW_3_UNION;

    union
    {
        struct
        {
            unsigned int data0:32;
        };
        unsigned int DW_4_DATA;
    } DATA0_UNION;
} SDMA_PKT_WRITE_UNTILED, *PSDMA_PKT_WRITE_UNTILED;

/*
** Definitions for SDMA_PKT_FENCE packet
*/

typedef struct SDMA_PKT_FENCE_TAG
{

    union
    {
        struct
        {
            unsigned int op:8;
            unsigned int sub_op:8;
            unsigned int reserved_0:16;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union
    {
        struct
        {
            unsigned int addr_31_0:32;
        };
        unsigned int DW_1_DATA;
    } ADDR_LO_UNION;

    union
    {
        struct
        {
            unsigned int addr_63_32:32;
        };
        unsigned int DW_2_DATA;
    } ADDR_HI_UNION;

    union
    {
        struct
        {
            unsigned int data:32;
        };
        unsigned int DW_3_DATA;
    } DATA_UNION;
} SDMA_PKT_FENCE, *PSDMA_PKT_FENCE;

/*
** Definitions for SDMA_PKT_CONSTANT_FILL packet
*/

typedef struct SDMA_PKT_CONSTANT_FILL_TAG
{

    union
    {
        struct
        {
            unsigned int op:8;
            unsigned int sub_op:8;
            unsigned int sw:2;
            unsigned int reserved_0:12;
            unsigned int fillsize:2;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union
    {
        struct
        {
            unsigned int dst_addr_31_0:32;
        };
        unsigned int DW_1_DATA;
    } DST_ADDR_LO_UNION;

    union
    {
        struct
        {
            unsigned int dst_addr_63_32:32;
        };
        unsigned int DW_2_DATA;
    } DST_ADDR_HI_UNION;

    union
    {
        struct
        {
            unsigned int src_data_31_0:32;
        };
        unsigned int DW_3_DATA;
    } DATA_UNION;

    union
    {
        struct
        {
            unsigned int count:22;
            unsigned int reserved_0:10;
        };
        unsigned int DW_4_DATA;
    } COUNT_UNION;
} SDMA_PKT_CONSTANT_FILL, *PSDMA_PKT_CONSTANT_FILL;

/*
** Definitions for SDMA_PKT_TRAP packet
*/

typedef struct SDMA_PKT_TRAP_TAG
{

    union
    {
        struct
        {
            unsigned int op:8;
            unsigned int sub_op:8;
            unsigned int reserved_0:16;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union
    {
        struct
        {
            unsigned int int_context:28;
            unsigned int reserved_0:4;
        };
        unsigned int DW_1_DATA;
    } INT_CONTEXT_UNION;
} SDMA_PKT_TRAP, *PSDMA_PKT_TRAP;

/*
** Definitions for SDMA_PKT_POLL_REGMEM_TAG packet
*/

typedef struct SDMA_PKT_POLL_REGMEM_TAG {
    union {
        struct {
            unsigned int op : 8;
            unsigned int sub_op : 8;
            unsigned int reserved_0 : 10;
            unsigned int hdp_flush : 1;
            unsigned int reserved_1 : 1;
            unsigned int func : 3;
            unsigned int mem_poll : 1;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union {
        struct {
            unsigned int addr_31_0 : 32;
        };
        unsigned int DW_1_DATA;
    } ADDR_LO_UNION;

    union {
        struct {
            unsigned int addr_63_32 : 32;
        };
        unsigned int DW_2_DATA;
    } ADDR_HI_UNION;

    union {
        struct {
            unsigned int value : 32;
        };
        unsigned int DW_3_DATA;
    } VALUE_UNION;

    union {
        struct {
            unsigned int mask : 32;
        };
        unsigned int DW_4_DATA;
    } MASK_UNION;

    union {
        struct {
            unsigned int interval : 16;
            unsigned int retry_count : 12;
            unsigned int reserved_0 : 4;
        };
        unsigned int DW_5_DATA;
    } DW5_UNION;
} SDMA_PKT_POLL_REGMEM, *PSDMA_PKT_POLL_REGMEM;

/*
** Definitions for SDMA_PKT_TIMESTAMP packet
*/

typedef struct SDMA_PKT_TIMESTAMP_TAG
{

    union
    {
        struct
        {
            unsigned int op:8;
            unsigned int sub_op:8;
            unsigned int reserved_0:16;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union
    {
        struct
        {
            unsigned int addr_31_0:32;
        };
        unsigned int DW_1_DATA;
    } ADDR_LO_UNION;

    union
    {
        struct
        {
            unsigned int addr_63_32:32;
        };
        unsigned int DW_2_DATA;
    } ADDR_HI_UNION;
} SDMA_PKT_TIMESTAMP, *PSDMA_PKT_TIMESTAMP;


/*
** Definitions for SDMA_PKT_NOP packet
*/

typedef struct SDMA_PKT_NOP_TAG
{
    union
    {
        struct
        {
            unsigned int op:8;
            unsigned int sub_op:8;
            unsigned int count:14;
            unsigned int reserved_0:2;
        };
        unsigned int DW_0_DATA;
    } HEADER_UNION;

    union
    {
        struct
        {
            unsigned int data0:32;
        };
        unsigned int DW_1_DATA;
    } DATA0_UNION;
} SDMA_PKT_NOP, *PSDMA_PKT_NOP;

#endif // __SDMA_PKT_STRUCT_H__


================================================
FILE: libhsakmt/tests/kfdtest/scripts/kfdtest.exclude
================================================
declare -A FILTER

# Power management tests
FILTER[pm]=\
"KFDPMTest.SuspendWithActiveProcess:"\
"KFDPMTest.SuspendWithIdleQueue:"\
"KFDPMTest.SuspendWithIdleQueueAfterWork"


# Core tests, used in scenarios like bringup
# Software scheduler mode, i. e. non HWS mode
FILTER[core_sws]=\
"KFDQMTest.CreateDestroyCpQueue:"\
"KFDQMTest.SubmitNopCpQueue:"\
"KFDQMTest.SubmitPacketCpQueue:"\
"KFDQMTest.AllCpQueues:"\
"KFDQMTest.CreateDestroySdmaQueue:"\
"KFDQMTest.SubmitNopSdmaQueue:"\
"KFDQMTest.SubmitPacketSdmaQueue:"\
"KFDQMTest.AllSdmaQueues:"\
"KFDQMTest.AllXgmiSdmaQueues:"\
"KFDQMTest.AllQueues:"\
"KFDLocalMemoryTest.AccessLocalMem:"\
"KFDEventTest.SignalEvent"

# HWS mode
FILTER[core]=\
"${FILTER[core_sws]}:"\
"KFDCWSRTest.BasicTest"

# Permanent exclusions
# These tests are included for debugging, but are not executed in normal execution on any ASIC:
# FILTER[pm] need human intervention, so put it here. Developers can run them
# manually through "-p pm" option.
#
# CU Masking Linear are not working correctly due to how the HW distributes work over CUs.
# They are available for testing but are not currently expected to pass on CI/VI/AI.
#
# CU Masking Even is added here due to some non-obvious baseline measurements. Though
# using wallclock to measure performance is always risky, there are just too many ASICs
# where this test is failing. Ideally we'll get better CU Masking coverage via rocrtst
#
# The CheckZeroInitializationVram test is no longer expected to pass as KFD no longer
# clears memory at allocation time.
PERMANENT_BLACKLIST_ALL_ASICS=\
"-${FILTER[pm]}:"\
"KFDQMTest.BasicCuMaskingLinear:"\
"KFDQMTest.BasicCuMaskingEven:"\
"RDMATest.GPUDirect:"\
"KFDLocalMemoryTest.CheckZeroInitializationVram"

# This is the temporary blacklist for all ASICs. This is to be used when a test is failing consistently
# on every ASIC (Kaveri, Carrizo, Hawaii, Tonga, Fiji, Polaris10, Polaris11 and Vega10 .
# TODO means that a JIRA ticket needs to be created for this issue, as no documentation regarding
# failures can be found
# NOTE: If you update this alphabetical listing, add the corresponding JIRA ticket for reference
#
# KFDQMTest.GPUDoorbellWrite fails intermittently (KFD-318)
# KFDQMTest.mGPUShareBO (KFD-334)
# KFDHWSTest.* (SWDEV-193035)
# KFDEvictTest.BurstyTest (ROCMOPS-464)
# KFDEvictTest.BurstyTest (SWDEV-291256)
# KFDEvictTest.BurstyTest (KFD-425)
# KFDDBGTest.SuspendQueues (SWDEV-417850)
# KFDDBGTest.HitAddressWatch (SWDEV-420281)
TEMPORARY_BLACKLIST_ALL_ASICS=\
"KFDQMTest.GPUDoorbellWrite:"\
"KFDQMTest.mGPUShareBO:"\
"KFDQMTest.SdmaEventInterrupt:"\
"KFDMemoryTest.CacheInvalidateOnRemoteWrite:"\
"KFDEvictTest.BurstyTest:"\
"KFDHWSTest.*:"\
"KFDSVMRangeTest.ReadOnlyRangeTest*:"\
"KFDDBGTest.SuspendQueues:"\
"KFDDBGTest.HitAddressWatch"

BLACKLIST_ALL_ASICS=\
"$PERMANENT_BLACKLIST_ALL_ASICS:"\
"$TEMPORARY_BLACKLIST_ALL_ASICS"

# SDMA-based tests (KFDIPCTest.BasicTest, KFDQM.*Sdma*, KFDMemoryTest.MMBench) are all
# disabled on non-Hawaii due to SDMA instability - SWDEV-101666
SDMA_BLACKLIST=\
"KFDIPCTest.*:"\
"KFDLocalMemoryTest.CheckZeroInitializationVram:"\
"KFDMemoryTest.MemoryRegister:"\
"KFDMemoryTest.MMBench:"\
"KFDMemoryTest.SignalHandling:"\
"KFDQMTest.AllQueues:"\
"KFDQMTest.*Sdma*:"\
"KFDQMTest.CreateQueueStressSingleThreaded:"\
"KFDQMTest.GPUDoorbellWrite:"\
"KFDQMTest.P2PTest:"\
"KFDPerformanceTest.P2PBandWidthTest:"\
"KFDPerformanceTest.P2POverheadTest"

# Anything involving CP queue creation is failing on Kaveri. Separate them here for convenience (KFD-336)
KV_QUEUE_BLACKLIST=\
"KFDExceptionTest.AddressFault:"\
"KFDExceptionTest.PermissionFault:"\
"KFDLocalMemoryTest.*:"\
"KFDEventTest.Signal*Event*:"\
"KFDQMTest.CreateQueueStressSingleThreaded:"\
"KFDQMTest.*CpQueue*:"\
"KFDQMTest.*Dispatch*:"\
"KFDQMTest.Atomics:"\
"KFDQMTest.GPUDoorbellWrite"

# KFDCWSRTest.BasicTest*: SWDEV-353206
BLACKLIST_GFX10=\
"KFDMemoryTest.DeviceHdpFlush:"\
"KFDSVMEvictTest.*:"\
"KFDCWSRTest.BasicTest*"

BLACKLIST_GFX10_NV2X=\
"$BLACKLIST_GFX10:"\
"KFDPerfCountersTest.*"

# KFDMemoryTest.FlatScratchAccess           - SWDEV-329877
# KFDGWSTest.*: GFX11 will no longer use global wave sync
BLACKLIST_GFX11=\
"KFDQMTest.CreateAqlCpQueue:"\
"KFDCWSRTest.InterruptRestore:"\
"KFDPerfCountersTest.*:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDGWSTest.*"

BLACKLIST_GFX12=\
"KFDQMTest.CreateAqlCpQueue:"\
"KFDPerfCountersTest.*:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDGWSTest.*"

# KFDQMTest.CpuWriteCoherence fails. 0 dwordsAvailable (KFD-338)
# KFDMemoryTest.MemoryRegister fails on SDMA queue creation (KFD-337)
FILTER[kaveri]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"$KV_QUEUE_BLACKLIST:"\
"KFDMemoryTest.MemoryRegister:"\
"KFDQMTest.CpuWriteCoherence"

# KFDLocalMemoryTest.BasicTest is failing intermittently (KFD-368)
# KFDMemoryTest.BigSysBufferStressTest was failing intermittently on 4.9
# and hangs when executed twice (KFD-312)
# KFDQMTest.GPUDoorbellWrite fails on Hawaii. Could be HW-related (KFD-342)
FILTER[hawaii]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDLocalMemoryTest.BasicTest:"\
"KFDMemoryTest.BigSysBufferStressTest:"\
"KFDQMTest.GPUDoorbellWrite"

FILTER[carrizo]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDExceptionTest.PermissionFault"

# KFDPerfCountersTest.*Trace fail (KFD-339)
# KFDMemoryTest.QueryPointerInfo/MemoryRegister* (KFD-341)
# The remaining tests listed here fail on map memory to GPU with a VA conflict (KFD-340)
FILTER[tonga]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDCWSRTest.BasicTest:"\
"KFDPerfCountersTest.*:"\
"KFDQMTest.OverSubscribeCpQueues"

# Since Navi10 was merged, the PM4Event test takes 6min to run
FILTER[fiji]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDQMTest.PM4EventInterrupt:"\
"$SDMA_BLACKLIST"

FILTER[polaris10]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"

FILTER[polaris11]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"

FILTER[polaris12]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"

# KFDIPCTest.BasicTest (ROCMOPS-459) .CMABasicTest (ROCMOPS-460) .CrossMemoryAttachTest (ROCMOPS-461)
# KFDQMTest.AllSdmaQueues (ROCMOPS-463)
FILTER[vega10]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDIPCTest.BasicTest:"\
"KFDIPCTest.CMABasicTest:"\
"KFDIPCTest.CrossMemoryAttachTest:"\
"KFDQMTest.AllSdmaQueues"

FILTER[vega12]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"\

FILTER[vega20]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDQMTest.GPUDoorbellWrite"

FILTER[raven_dgpuFallback]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDEvictTest.*:"\
"KFDMemoryTest.MemoryRegister:"\
"KFDSVMRangeTest.BasicSystemMemTest:"\
"KFDSVMRangeTest.BasicVramTest:"\
"KFDSVMRangeTest.EvictSystemRangeTest:"\
"KFDSVMRangeTest.PartialUnmapSysMemTest:"\
"KFDSVMRangeTest.MigrateTest:"\
"KFDSVMRangeTest.MigratePolicyTest:"\
"KFDSVMRangeTest.MigrateGranularityTest:"\
"KFDSVMRangeTest.MigrateLargeBufTest:"\
"KFDSVMRangeTest.MultiThreadMigrationTest:"\
"KFDSVMRangeTest.MigrateAccessInPlaceTest:"\
"KFDSVMEvictTest.QueueTest"

FILTER[raven]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDEvictTest.*:"\
"KFDSVMRangeTest.EvictSystemRangeTest:"\
"KFDSVMRangeTest.PartialUnmapSysMemTest:"\
"KFDSVMRangeTest.PrefetchTest:"\
"KFDSVMRangeTest.MultiThreadMigrationTest:"\
"KFDSVMEvictTest.QueueTest:"\
"KFDQMTest.MultipleCpQueuesStressDispatch"

FILTER[renoir]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDEvictTest.*:"\
"KFDMemoryTest.LargestSysBufferTest:"\
"KFDMemoryTest.SignalHandling"

# KFDExceptionTest.* (KFD-435)
FILTER[arcturus]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDExceptionTest.FaultStorm:"\
"KFDNegativeTest.*"

FILTER[aldebaran]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDExceptionTest.FaultStorm:"\
"KFDMemoryTest.PtraceAccess:"\
"KFDMemoryTest.DeviceHdpFlush"

FILTER[navi10]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10:"\
"KFDMemoryTest.MMBench"

# Need to verify the following failed tests on another machine:
# Exceptions not being received during exception tests
# PerfCounters return HSAKMT_STATUS_INVALID_PARAMETER
# P2PBandwidth failing (wait times out) on node-to-multiple-nodes by [push, NONE]
FILTER[navi12]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10:"\
"KFDExceptionTest.*:"\
"KFDPerfCountersTest.*:"\
"KFDPerformanceTest.P2PBandWidthTest"

FILTER[navi14]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10"

FILTER[sienna_cichlid]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"

FILTER[navy_flounder]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"

FILTER[dimgrey_cavefish]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"

FILTER[beige_goby]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"

FILTER[yellow_carp]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"

FILTER[gfx1100]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"

# SWDEV-384028
FILTER[gfx1101]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11:"\
"KFDExceptionTest.SdmaQueueException"

FILTER[gfx1102]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"

FILTER[gfx1103]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"

FILTER[gfx1150]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"

FILTER[gfx1151]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"

FILTER[gfx1152]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"

FILTER[gfx1153]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"

FILTER[gfx1036]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"

FILTER[gfx942]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDMemoryTest.LargestSysBufferTest:"\
"KFDMemoryTest.BigSysBufferStressTest:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDIPCTest.BasicTest:"\
"KFDQMTest.QueueLatency"

FILTER[gfx950]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDMemoryTest.LargestSysBufferTest:"\
"KFDMemoryTest.BigSysBufferStressTest:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDIPCTest.BasicTest:"\
"KFDQMTest.QueueLatency:"\
"KFDEvictTest.*:"\
"KFDSVMEvictTest.QueueTest*:"\
"KFDGWSTest.Semaphore"

FILTER[gfx1200]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX12"

FILTER[gfx1201]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX12"

FILTER[RHEL9]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11:"\
"KFDQMTest.ExtendedCuMasking:"\
"KFDEvictTest.QueueTest:"\
"KFDPCSamplingTest.*"

FILTER[upstream]=\
"KFDIPCTest.*"


================================================
FILE: libhsakmt/tests/kfdtest/scripts/run_kfdtest.sh
================================================
#!/bin/bash
#
# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
#

# See if we can find the SHARE/BIN dirs in their expected locations
CWD="${BASH_SOURCE%/*}"
while read candidate; do
    if [ -e "$candidate/kfdtest.exclude" ]; then
        source "$candidate/kfdtest.exclude"
        break
    fi
done <<EOF
$KFDTEST_SHARE_DIR
$CWD
$CWD/../share/kfdtest
/opt/rocm/share/kfdtest
EOF

# Keep these checks until automation starts using the package install
if [ -z "${FILTER[core]}" ]; then
    if [ -e "$CWD/../bin/kfdtest/kfdtest.exclude" ]; then
        source "$CWD/../bin/kfdtest/kfdtest.exclude"
    elif [ -e "$CWD/../../share/kfdtest.exclude" ]; then
        source "$CWD/../../share/kfdtest.exclude"
    fi
fi

# This filter will always exist if we sourced a valid kfdtest.exclude
if [ -z "${FILTER[core]}" ]; then
    echo "Unable to locate kfdtest.exclude."
    echo "Please set KFDTEST_SHARE_DIR or ensure that kfdtest.exclude is present inside $CWD, $CWD/../share/kfdtest or /opt/rocm/share/kfdtest"
    exit 1
fi

# Using "which" produces different results in different
# OSes so use command -v instead. It returns "" if the
# command isn't in the PATH
if [ -z "$(command -v kfdtest)" ]; then
    if [ -z "$BIN_DIR" ]; then
        if [ -e "${0%/*}/kfdtest" ]; then
            BIN_DIR="${0%/*}"
        else
            # The default location
            BIN_DIR="/opt/rocm/bin"
        fi
    fi
    if [ -e "$BIN_DIR/kfdtest" ]; then
        KFDTEST="$BIN_DIR/kfdtest"
    else
        echo "Unable to locate kfdtest."
        echo "Please set BIN_DIR, ensure that kfdtest is in $PATH, or ensure that kfdtest is present inside ${0%/*} or /opt/rocm/bin"
        exit 1
    fi
else
    KFDTEST="kfdtest"
fi

PLATFORM=""
GDB=""
NODE=""
FORCE_HIGH=""
RUN_IN_DOCKER=""
ADDITIONAL_EXCLUDE=""

printUsage() {
    echo
    echo "Usage: $(basename $0) [options ...] [gtest arguments]"
    echo
    echo "Options:"
    echo "  -p <platform> , --platform <platform>    Only run tests that"\
                               "pass on the specified platform. Usually you"\
                               "don't need this option"
    echo "  -g            , --gdb                    Run in debugger"
    echo "  -n <node(s)>  , --node <node(s)>         NodeId(s) to test. Takes a single integer, or a"\
                               "quoted, space-separated string as an argument"\
                               "(e.g. -n 1 OR -n \"1 2 3\")"\
                               "NOTE: Node numbers come from /sys/class/kfd/kfd/topology/nodes/#"
    echo "  -l            , --list                   List available nodes"
    echo "  --high                                   Force clocks to high for test execution"
    echo "  -d            , --docker                 Run in docker container"
    echo "  -e <list>     , --exclude <list>         Additional tests to exclude, in addition to kfdtest.exclude."\
                               "Takes a colon-separated string as an argument"\
                               "(e.g. -e KFDEvictTest.*:KFDSVMEvictTest.*)"
    echo "  -h            , --help                   Prints this help"
    echo
    echo "Gtest arguments will be forwarded to the app"
    echo
    echo "Valid platform options: core_sws, core, polaris10, vega10, vega20, pm, all, and so on"
    echo "'all' option runs all tests"

    return 0
}
# Print gtest_filter for the given Platform
#    param - Platform.
getFilter() {
# For regular platforms such as vega10, this will automatically generate
# the valid variable BLACKLIST based on the variable platform.
    local platform=$1;

    case "$platform" in
        all ) gtestFilter="" ;;
        * )
            if [ -z "${FILTER[$platform]}" ]; then
                echo "Unsupported platform $platform. Exiting"
                exit 1
            fi

            gtestFilter="--gtest_filter=${FILTER[$platform]}"
            ;;
    esac

    # Check if the loaded driver is upstream (in-box) or DKMS
    rdma_get_pages_func=$(cat /proc/kallsyms | grep rdma_get_pages)
    if [ -z "$rdma_get_pages_func" ]; then
	    gtestFilter="$gtestFilter:${FILTER[upstream]}"
    fi

    if [ -n "$ADDITIONAL_EXCLUDE" ]; then
	    gtestFilter="$gtestFilter:$ADDITIONAL_EXCLUDE"
    fi
}

TOPOLOGY_SYSFS_DIR=/sys/devices/virtual/kfd/kfd/topology/nodes

# Prints list of HSA Nodes. HSA Nodes are identified from sysfs KFD topology. The nodes
# should have valid SIMD count
getHsaNodes() {
    for i in $(find $TOPOLOGY_SYSFS_DIR  -maxdepth 1 -mindepth 1 -type d); do
        simdcount=$(cat $i/properties | grep simd_count | awk '{print $2}')
        if [ $simdcount != 0 ]; then
            hsaNodeList+="$(basename $i) "
        fi
    done
    echo "$hsaNodeList"
}


# Prints GPU Name for the given Node ID. If transitioned to IP discovery,
# use target gfx version
#   param - Node ID
getNodeName() {
    local nodeId=$1; shift;
    local gpuName=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/name)
    if [ "$gpuName" == "raven" ]; then
      local CpuCoresCount=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/properties | grep cpu_cores_count | awk '{print $2}')
      local SimdCount=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/properties | grep simd_count | awk '{print $2}')
      if [ "$CpuCoresCount" -eq 0 ] && [ "$SimdCount" -gt 0 ]; then
        gpuName="raven_dgpuFallback"
      fi
    elif [ "$gpuName" == "ip discovery" ]; then
      if [ -n "$HSA_OVERRIDE_GFX_VERSION" ]; then
          gpuName="gfx$(echo "$HSA_OVERRIDE_GFX_VERSION" | awk 'BEGIN {FS="."; RS=""} {printf "%d%x%x", $1, $2, $3 }')"
      else
          local GfxVersionDec=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/properties | grep gfx_target_version | awk '{print $2}')
          if [[ ${#GfxVersionDec} = 5 ]]; then
              GfxVersionDec="0${GfxVersionDec}"
          fi
          gpuName="gfx$(printf "$GfxVersionDec" | fold -w2 | awk 'BEGIN {FS="\n"; RS=""} {printf "%d%x%x", $1, $2, $3}')"
      fi
    fi
    echo "$gpuName"
}

# Run KfdTest independently. Two global variables set by command-line
# will influence the tests as indicated below
#   PLATFORM - If set all tests will run with this platform filter
#   NODE - If set tests will be run only on this NODE, else it will be
#           run on all available HSA Nodes
runKfdTest() {
    if [ "$RUN_IN_DOCKER" == "true" ]; then
        if [ `sudo systemctl is-active docker` != "active" ]; then
            echo "docker isn't active, install and setup docker first!!!!"
            exit 0
        fi
        PKG_ROOT="$(getPackageRoot)"
    fi

    if [ -n "$GTEST_ARGS" ] && [ -n "$ADDITIONAL_EXCLUDE" ]; then
	    echo "Cannot use -e and --gtest_filter flags together"
	    exit 0
    fi

    if [ "$NODE" == "" ]; then
        hsaNodes=$(getHsaNodes)

        if [ "$hsaNodes" == "" ]; then
            echo "No GPU found in the system."
            exit 1
        fi
    else
        hsaNodes=$NODE
    fi

    for hsaNode in $hsaNodes; do
        nodeName=$(getNodeName $hsaNode)
        if [ "$PLATFORM" != "" ] && [ "$PLATFORM" != "$nodeName" ]; then
            echo "WARNING: Actual ASIC $nodeName treated as $PLATFORM"
            nodeName="$PLATFORM"
        fi

        getFilter $nodeName

        if [ "$RUN_IN_DOCKER" == "true" ]; then
            if [ "$NODE" == "" ]; then
                DEVICE_NODE="/dev/dri"
            else
                RENDER_NODE=$(($hsaNode + 127))
                DEVICE_NODE="/dev/dri/renderD${RENDER_NODE}"
            fi

            echo "Starting testing node $hsaNode ($nodeName) in docker container"
            sudo docker run -it --name kfdtest_docker --user="jenkins" --network=host \
            --device=/dev/kfd --device=${DEVICE_NODE} --group-add video --cap-add=SYS_PTRACE \
            --security-opt seccomp=unconfined -v $PKG_ROOT:/home/jenkins/rocm \
            compute-artifactory.amd.com:5000/yuho/tianli-ubuntu1604-kfdtest:01 \
            /home/jenkins/rocm/utils/run_kfdtest.sh -n $hsaNode $gtestFilter $GTEST_ARGS
            if [ "$?" = "0" ]; then
                echo "Finished node $hsaNode ($nodeName) successfully in docker container"
            else
                echo "Testing failed for node $hsaNode ($nodeName) in docker container"
            fi
            sudo docker rm kfdtest_docker
        else
            if [ "$HSA_TEST_GPUS_NUM" != "" ]; then
                echo "++++ Starting parallel testing on $HSA_TEST_GPUS_NUM gpu(s) ++++"
                $GDB $KFDTEST $gtestFilter $GTEST_ARGS
                echo "++++ Finished parallel testing on $HSA_TEST_GPUS_NUM gpu(s) ++++"
                exit 0;
            else
                echo ""
                echo "++++ Starting testing node $hsaNode ($nodeName) ++++"
                $GDB $KFDTEST "--node=$hsaNode" $gtestFilter $GTEST_ARGS
                echo "---- Finished testing node $hsaNode ($nodeName) ----"
            fi

        fi


    done

}

# Prints number of GPUs present in the system
getGPUCount() {
    gNodes=$(getHsaNodes)
    gNodes=( $gNodes )
    gpuCount=${#gNodes[@]}
    echo "$gpuCount"
}

while [ "$1" != "" ]; do
    case "$1" in
        -p  | --platform )
            shift 1; PLATFORM=$1 ;;
        -g  | --gdb )
            GDB="gdb --args" ;;
        -l  | --list )
            printGpuNodelist; exit 0 ;;
        -n  | --node )
            shift 1; NODE=$1 ;;
        --high)
            FORCE_HIGH="true" ;;
        -d  | --docker )
            RUN_IN_DOCKER="true" ;;
        -e  | --exclude )
            shift 1; ADDITIONAL_EXCLUDE="$1" ;;
        -h  | --help )
            printUsage; exit 0 ;;
        *)
            GTEST_ARGS=$@; break;;
    esac
    shift 1
done

# If the SMI is missing, try to find it
SMI="$(find /opt/rocm* -type l -name rocm-smi 2>/dev/null | tail -1)"
if [ -z ${SMI} ]; then
    if [ -x ${BIN_DIR}/rocm-smi ]; then
	SMI=${BIN_DIR}/rocm-smi
    else
	SMI=`which rocm-smi`
    fi
fi
# If the SMI is still missing, just report and continue
if [ "$FORCE_HIGH" == "true" ]; then
    if [ -e "$SMI" ]; then
        OLDPERF=$($SMI -p | awk '/Performance Level:/ {print $NF; exit}')
	$($SMI --setperflevel high &> /dev/null)
	if [ $? != 0 ]; then
            echo "SMI failed to set perf level"
	    OLDPERF=""
        fi
    else
        echo "Unable to set clocks to high, cannot find rocm-smi"
    fi
fi

# Set HSA_DEBUG env to run KFDMemoryTest.PtraceAccessInvisibleVram
export HSA_DEBUG=1
runKfdTest

# OLDPERF is only set if FORCE_HIGH and SMI both exist
if [ -n "$OLDPERF" ]; then
    $SMI --setperflevel $OLDPERF &> /dev/null
fi


================================================
FILE: libhsakmt/tests/kfdtest/src/AqlQueue.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "AqlQueue.hpp"
#include "GoogleTestExtension.hpp"


AqlQueue::AqlQueue(void) {
}


AqlQueue::~AqlQueue(void) {
}

unsigned int AqlQueue::Wptr() {
    return *m_Resources.Queue_write_ptr;
}

unsigned int AqlQueue::Rptr() {
    return *m_Resources.Queue_read_ptr;
}

unsigned int AqlQueue::RptrWhenConsumed() {
    return Wptr();
}

void AqlQueue::SubmitPacket() {
    // m_pending Wptr is in dwords
    *m_Resources.Queue_write_ptr = m_pendingWptr;
    *(m_Resources.Queue_DoorBell) = Wptr();
}


================================================
FILE: libhsakmt/tests/kfdtest/src/AqlQueue.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_AQL_QUEUE__H__
#define __KFD_AQL_QUEUE__H__

#include "BaseQueue.hpp"

class AqlQueue : public BaseQueue {
 public:
    AqlQueue();
    virtual ~AqlQueue();

    // @brief Updates queue write pointer and sets the queue doorbell to the queue write pointer
    virtual void SubmitPacket();

    // @return Read pointer in dwords
    virtual unsigned int Rptr();
    // @return Write pointer in dwords
    virtual unsigned int Wptr();
    // @return Expected m_Resources.Queue_read_ptr when all packets are consumed
    virtual unsigned int RptrWhenConsumed();

 protected:
    virtual PACKETTYPE PacketTypeSupported() { return PACKETTYPE_AQL; }

    virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_COMPUTE_AQL; }
};

#endif  // __KFD_AQL_QUEUE__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/Assemble.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

/**
 * Self-contained assembler that uses the LLVM MC API to assemble AMDGCN
 * instructions
 */

#include <llvm/Config/llvm-config.h>
#include <llvm/MC/MCAsmBackend.h>
#include <llvm/MC/MCAsmInfo.h>
#include <llvm/MC/MCCodeEmitter.h>
#include <llvm/MC/MCContext.h>
#include <llvm/MC/MCInstPrinter.h>
#include <llvm/MC/MCInstrInfo.h>
#include <llvm/MC/MCObjectFileInfo.h>
#include <llvm/MC/MCObjectWriter.h>
#include <llvm/MC/MCParser/AsmLexer.h>
#include <llvm/MC/MCParser/MCTargetAsmParser.h>
#include <llvm/MC/MCRegisterInfo.h>
#include <llvm/MC/MCStreamer.h>
#include <llvm/MC/MCSubtargetInfo.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/InitLLVM.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/TargetSelect.h>
#if LLVM_VERSION_MAJOR > 13
#include <llvm/MC/TargetRegistry.h>
#else
#include <llvm/Support/TargetRegistry.h>
#endif
#if LLVM_VERSION_MAJOR > 18
#include "llvm/Support/ManagedStatic.h"
#endif

#include <linux/elf.h>
#include "OSWrapper.hpp"
#include "Assemble.hpp"

using namespace llvm;

/* Assembler implementation is not multi-thread safe and is
 * asic type dependent. Instantiate it per thread/gpu use case,
 * delete each assembler after assembling
 */

void Init_LLVM() {
    LLVMInitializeAMDGPUTargetInfo();
    LLVMInitializeAMDGPUTargetMC();
    LLVMInitializeAMDGPUAsmParser();
}

void Shutdown_LLVM() {
    llvm_shutdown();
}

Assembler::Assembler(const uint32_t Gfxv) {
    SetTargetAsic(Gfxv);
    TextData = nullptr;
    TextSize = 0;
}

Assembler::~Assembler() {
    FlushText();
}

const char* Assembler::GetInstrStream() {
    return TextData;
}

const size_t Assembler::GetInstrStreamSize() {
    return TextSize;
}

int Assembler::CopyInstrStream(char* OutBuf, const size_t BufSize) {
    if (TextSize > BufSize)
        return -2;

    std::copy(TextData, TextData + TextSize, OutBuf);
    return 0;
}

const char* Assembler::GetTargetAsic() {
    return MCPU;
}

/**
 * Set MCPU via GFX Version from Thunk
 * LLVM Target IDs use decimal for Maj/Min, hex for Step
 */
void Assembler::SetTargetAsic(const uint32_t Gfxv) {
    const uint8_t Major = (Gfxv >> 16) & 0xff;
    const uint8_t Minor = (Gfxv >> 8) & 0xff;
    const uint8_t Step = Gfxv & 0xff;

    snprintf(MCPU, ASM_MCPU_LEN, "gfx%d%d%x", Major, Minor, Step);
}

/**
 * Flush/reset TextData and TextSize to initial state
 */
void Assembler::FlushText() {
    if (TextData)
        delete[] TextData;
    TextData = nullptr;
    TextSize = 0;
}

/**
 * Print hex of ELF object to stdout (debug)
 */
void Assembler::PrintELFHex(const std::string Data) {
    outs() << "ASM Info: assembled ELF hex data (length " << Data.length() << "):\n";
    outs() << "0x00:\t";
    for (size_t i = 0; i < Data.length(); ++i) {
        char c = Data[i];
        outs() << format_hex(static_cast<uint8_t>(c), 4);
        if ((i+1) % 16 == 0)
            outs() << "\n" << format_hex(i+1, 4) << ":\t";
        else
            outs() << " ";
    }
    outs() << "\n";
}

/**
 * Print hex of raw instruction stream to stdout (debug)
 */
void Assembler::PrintTextHex() {
    outs() << "ASM Info: assembled .text hex data (length " << TextSize << "):\n";
    outs() << "0x00:\t";
    for (size_t i = 0; i < TextSize; i++) {
        outs() << format_hex(static_cast<uint8_t>(TextData[i]), 4);
        if ((i+1) % 16 == 0)
            outs() << "\n" << format_hex(i+1, 4) << ":\t";
        else
            outs() << " ";
    }
    outs() << "\n";
}

/**
 * Extract raw instruction stream from .text section in ELF object
 *
 * @param RawData Raw C string of ELF object
 * @return 0 on success
 */
int Assembler::ExtractELFText(const char* RawData) {
    const Elf64_Ehdr* ElfHeader;
    const Elf64_Shdr* SectHeader;
    const Elf64_Shdr* SectStrTable;
    const char* SectStrAddr;
    unsigned NumSects, SectIdx;

    if (!(ElfHeader = reinterpret_cast<const Elf64_Ehdr*>(RawData))) {
        outs() << "ASM Error: elf data is invalid or corrupted\n";
        return -1;
    }
    if (ElfHeader->e_ident[EI_CLASS] != ELFCLASS64) {
        outs() << "ASM Error: elf object must be of 64-bit type\n";
        return -1;
    }

    SectHeader = reinterpret_cast<const Elf64_Shdr*>(RawData + ElfHeader->e_shoff);
    SectStrTable = &SectHeader[ElfHeader->e_shstrndx];
    SectStrAddr = static_cast<const char*>(RawData + SectStrTable->sh_offset);

    // Loop through sections, break on .text
    NumSects = ElfHeader->e_shnum;
    for (SectIdx = 0; SectIdx < NumSects; SectIdx++) {
        std::string SectName = std::string(SectStrAddr + SectHeader[SectIdx].sh_name);
        if (SectName == std::string(".text")) {
            TextSize = SectHeader[SectIdx].sh_size;
            TextData = new char[TextSize];
            memcpy(TextData, RawData + SectHeader[SectIdx].sh_offset, TextSize);
            break;
        }
    }

    if (SectIdx >= NumSects) {
        outs() << "ASM Error: couldn't locate .text section\n";
        return -1;
    }

    return 0;
}

/**
 * Assemble shader, fill member vars, and copy to output buffer
 *
 * @param AssemblySource Shader source represented as a raw C string
 * @param OutBuf Raw instruction stream output buffer
 * @param BufSize Size of OutBuf (defaults to PAGE_SIZE)
 * @param Gfxv Optional overload to temporarily set target ASIC
 * @return Value of RunAssemble() (0 on success)
 */
int Assembler::RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
                              const size_t BufSize) {
    int ret = RunAssemble(AssemblySource);
    return ret ? ret : CopyInstrStream(OutBuf, BufSize);
}
int Assembler::RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
                              const size_t BufSize, const uint32_t Gfxv) {
    const char* defaultMCPU = GetTargetAsic();
    SetTargetAsic(Gfxv);
    int ret = RunAssemble(AssemblySource);
    strncpy(MCPU, defaultMCPU, ASM_MCPU_LEN);
    return ret ? ret : CopyInstrStream(OutBuf, BufSize);
}

/**
 * Assemble shader and fill member vars
 *
 * @param AssemblySource Shader source represented as a raw C string
 * @return 0 on success
 */
int Assembler::RunAssemble(const char* const AssemblySource) {
    // Ensure target ASIC has been set
    if (!*MCPU) {
        outs() << "ASM Error: target asic is uninitialized\n";
        return -1;
    }

    // Delete TextData for any previous runs
    FlushText();

#if 0
    outs() << "ASM Info: running assembly for target: " << MCPU << "\n";
    outs() << "ASM Info: source:\n";
    outs() << AssemblySource << "\n";
#endif

    // Initialize MCOptions and target triple
    const MCTargetOptions MCOptions;
    Triple TheTriple;

    const Target* TheTarget =
        TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
    if (!TheTarget) {
        outs() << Error;
        return -1;
    }

    TheTriple.setArchName(ArchName);
    TheTriple.setVendorName(VendorName);
    TheTriple.setOSName(OSName);

    TripleName = TheTriple.getTriple();
    TheTriple.setTriple(Triple::normalize(TripleName));

    // Create MemoryBuffer for assembly source
    StringRef AssemblyRef(AssemblySource);
    std::unique_ptr<MemoryBuffer> BufferPtr =
        MemoryBuffer::getMemBuffer(AssemblyRef, "", false);
    if (!BufferPtr->getBufferSize()) {
        outs() << "ASM Error: assembly source is empty\n";
        return -1;
    }

    // Instantiate SrcMgr and transfer BufferPtr ownership
    SourceMgr SrcMgr;
    SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());

    // Initialize MC interfaces and base class objects
    std::unique_ptr<const MCRegisterInfo> MRI(
            TheTarget->createMCRegInfo(TripleName));
    if (!MRI) {
        outs() << "ASM Error: no register info for target " << MCPU << "\n";
        return -1;
    }
#if LLVM_VERSION_MAJOR > 9
    std::unique_ptr<const MCAsmInfo> MAI(
            TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
#else
    std::unique_ptr<const MCAsmInfo> MAI(
            TheTarget->createMCAsmInfo(*MRI, TripleName));
#endif
    if (!MAI) {
        outs() << "ASM Error: no assembly info for target " << MCPU << "\n";
        return -1;
    }
    std::unique_ptr<MCInstrInfo> MCII(
            TheTarget->createMCInstrInfo());
    if (!MCII) {
        outs() << "ASM Error: no instruction info for target " << MCPU << "\n";
        return -1;
    }
    std::unique_ptr<MCSubtargetInfo> STI(
            TheTarget->createMCSubtargetInfo(TripleName, MCPU, std::string()));
    if (!STI || !STI->isCPUStringValid(MCPU)) {
        outs() << "ASM Error: no subtarget info for target " << MCPU << "\n";
        return -1;
    }

    // Set up the MCContext for creating symbols and MCExpr's
#if LLVM_VERSION_MAJOR > 12
    MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr, &MCOptions);
#else
    MCObjectFileInfo MOFI;
    MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr, &MCOptions);
    MOFI.InitMCObjectFileInfo(TheTriple, true, Ctx);
#endif

    // Finalize setup for output object code stream
    std::string Data;
    std::unique_ptr<raw_string_ostream> DataStream(std::make_unique<raw_string_ostream>(Data));
    std::unique_ptr<buffer_ostream> BOS(std::make_unique<buffer_ostream>(*DataStream));
    raw_pwrite_stream* OS = BOS.get();

#if LLVM_VERSION_MAJOR > 14
    MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
#else
    MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
#endif
    MCAsmBackend* MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);

    if (!MAB) {
	    outs() << "ASM Error: Unable to create MCA Backend\n";
	    return -1;
    }

#if LLVM_VERSION_MAJOR > 20
    std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
        TheTriple, Ctx,
	std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(*OS),
        std::unique_ptr<MCCodeEmitter>(CE), *STI));
#else
    std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
        TheTriple, Ctx,
        std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(*OS),
        std::unique_ptr<MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
        MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ false));
#endif

    std::unique_ptr<MCAsmParser> Parser(
            createMCAsmParser(SrcMgr, Ctx, *Streamer, *MAI));

    // Set parser to target parser and run
    std::unique_ptr<MCTargetAsmParser> TAP(
            TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
    if (!TAP) {
        outs() << "ASM Error: no assembly parsing support for target " << MCPU << "\n";
        return -1;
    }
    Parser->setTargetParser(*TAP);

    if (Parser->Run(true)) {
        outs() << "ASM Error: assembly parser failed\n";
        return -1;
    }

    BOS.reset();
    DataStream->flush();

    int ret = ExtractELFText(Data.data());
    if (ret < 0 || !TextData) {
        outs() << "ASM Error: .text extraction failed\n";
        return ret;
    }

#if 0
    PrintELFHex(Data);
    PrintTextHex();
#endif

    return 0;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/Assemble.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef _ASSEMBLE_H_
#define _ASSEMBLE_H_

#include "OSWrapper.hpp"

#define ASM_MCPU_LEN 16

/* initialize LLVM targets and assembly printers/parsers */
void Init_LLVM();
/* shutdown LLVM */
void Shutdown_LLVM();

class Assembler {
  private:
      const char* ArchName = "amdgcn";
      const char* VendorName = "amd";
      const char* OSName = "amdhsa";
      char MCPU[ASM_MCPU_LEN];

      std::string TripleName;
      std::string Error;

      char* TextData;
      size_t TextSize;

      void SetTargetAsic(const uint32_t Gfxv);

      void FlushText();
      void PrintELFHex(const std::string Data);
      int ExtractELFText(const char* RawData);

  public:
      Assembler(const uint32_t Gfxv);
      ~Assembler();

      void PrintTextHex();
      const char* GetTargetAsic();

      const char* GetInstrStream();
      const size_t GetInstrStreamSize();
      int CopyInstrStream(char* OutBuf, const size_t BufSize = PAGE_SIZE);

      int RunAssemble(const char* const AssemblySource);
      int RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
                         const size_t BufSize = PAGE_SIZE);
      int RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
                         const size_t BufSize, const uint32_t Gfxv);
};

#endif  // _ASSEMBLE_H_


================================================
FILE: libhsakmt/tests/kfdtest/src/BaseDebug.cpp
================================================
/*
 * Copyright (C) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "BaseDebug.hpp"
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <hsakmt/linux/kfd_ioctl.h>
#include <fcntl.h>
#include "unistd.h"

BaseDebug::BaseDebug(void) {
}

BaseDebug::~BaseDebug(void) {
    /*
     * If the process is still attached, close and destroy the polling file
     * descriptor.  Note that on process termination, the KFD automatically
     * disables processes that are still runtime enabled and debug enabled
     * so we don't do it here.
     */
    if (m_Pid) {
        close(m_Fd.fd);
        unlink(m_Fd_Name);
    }
}

// Creates temp file descriptor and debug attaches.
HSAKMT_STATUS BaseDebug::Attach(struct kfd_runtime_info *rInfo,
                                int rInfoSize,
                                unsigned int pid,
                                uint64_t exceptionEnable) {
    struct kfd_ioctl_dbg_trap_args args = {0};
    char fd_name[32];

    memset(&args, 0x00, sizeof(args));

    mkfifo(m_Fd_Name, 0666);
    m_Fd.fd = open(m_Fd_Name, O_CLOEXEC | O_NONBLOCK | O_RDWR);
    m_Fd.events = POLLIN | POLLRDNORM;

    args.pid = pid;
    args.op = KFD_IOC_DBG_TRAP_ENABLE;
    args.enable.rinfo_ptr = (uint64_t)rInfo;
    args.enable.rinfo_size = rInfoSize;
    args.enable.dbg_fd = m_Fd.fd;
    args.enable.exception_mask = exceptionEnable;

    if (hsaKmtDebugTrapIoctl(&args, NULL, NULL)) {
        close(m_Fd.fd);
        unlink(m_Fd_Name);
        return HSAKMT_STATUS_ERROR;
    }

    m_Pid = pid;

    return HSAKMT_STATUS_SUCCESS;
}


void BaseDebug::Detach(void) {
    struct kfd_ioctl_dbg_trap_args args = {0};

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_DISABLE;

    hsaKmtDebugTrapIoctl(&args, NULL, NULL);

    close(m_Fd.fd);
    unlink(m_Fd_Name);

    m_Pid = 0;
    m_Fd.fd = 0;
    m_Fd.events = 0;
}

HSAKMT_STATUS BaseDebug::SendRuntimeEvent(uint64_t exceptions, int gpuId, int queueId)
{
    struct kfd_ioctl_dbg_trap_args args = {0};

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT;
    args.send_runtime_event.exception_mask = exceptions;
    args.send_runtime_event.gpu_id = gpuId;
    args.send_runtime_event.queue_id = queueId;

    return hsaKmtDebugTrapIoctl(&args, NULL, NULL);
}

HSAKMT_STATUS BaseDebug::QueryDebugEvent(uint64_t *exceptions,
                                         uint32_t *gpuId, uint32_t *queueId,
                                         int timeoutMsec)
{
    struct kfd_ioctl_dbg_trap_args args = {0};
    HSAKMT_STATUS result;
    int r = poll(&m_Fd, 1, timeoutMsec);

    if (r > 0) {
        char tmp[r];

        read(m_Fd.fd, tmp, sizeof(tmp));
    } else {
        return HSAKMT_STATUS_ERROR;
    }

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT;
    args.query_debug_event.exception_mask = *exceptions;

    result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);

    *exceptions = args.query_debug_event.exception_mask;

    if (gpuId)
        *gpuId = args.query_debug_event.gpu_id;

    if (queueId)
        *queueId = args.query_debug_event.queue_id;

    return result;
}

void BaseDebug::SetExceptionsEnabled(uint64_t exceptions)
{
    struct kfd_ioctl_dbg_trap_args args = {0};

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED;
    args.set_exceptions_enabled.exception_mask = exceptions;

    hsaKmtDebugTrapIoctl(&args, NULL, NULL);
}

HSAKMT_STATUS BaseDebug::SuspendQueues(unsigned int *numQueues,
                                       HSA_QUEUEID *queues,
                                       uint32_t *queueIds,
                                       uint64_t exceptionsToClear)
{
    struct kfd_ioctl_dbg_trap_args args = {0};

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
    args.suspend_queues.num_queues = *numQueues;
    args.suspend_queues.queue_array_ptr = (uint64_t)queueIds;
    args.suspend_queues.exception_mask = exceptionsToClear;

    return hsaKmtDebugTrapIoctl(&args, queues, (HSAuint64 *)numQueues);
}

HSAKMT_STATUS BaseDebug::ResumeQueues(unsigned int *numQueues,
                                       HSA_QUEUEID *queues,
                                       uint32_t *queueIds)
{
    struct kfd_ioctl_dbg_trap_args args = {0};

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_RESUME_QUEUES;
    args.resume_queues.num_queues = *numQueues;
    args.resume_queues.queue_array_ptr = (uint64_t)queueIds;

    return hsaKmtDebugTrapIoctl(&args, queues, (HSAuint64 *)numQueues);
}

HSAKMT_STATUS BaseDebug::QueueSnapshot(uint64_t exceptionsToClear,
                                  uint64_t snapshotBufAddr,
                                  uint32_t *numSnapshots)
{
    struct kfd_ioctl_dbg_trap_args args = {0};
    HSAKMT_STATUS result;

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT;
    args.queue_snapshot.exception_mask = exceptionsToClear;
    args.queue_snapshot.snapshot_buf_ptr = snapshotBufAddr;
    args.queue_snapshot.num_queues = *numSnapshots;
    args.queue_snapshot.entry_size = sizeof(struct kfd_queue_snapshot_entry);

    result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);

    *numSnapshots = args.queue_snapshot.num_queues;

    return result;
}

HSAKMT_STATUS BaseDebug::DeviceSnapshot(uint64_t exceptionsToClear,
                                  uint64_t snapshotBufAddr,
                                  uint32_t *numSnapshots)
{
    struct kfd_ioctl_dbg_trap_args args = {0};
    HSAKMT_STATUS result;

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
    args.device_snapshot.exception_mask = exceptionsToClear;
    args.device_snapshot.snapshot_buf_ptr = snapshotBufAddr;
    args.device_snapshot.num_devices = *numSnapshots;
    args.device_snapshot.entry_size = sizeof(struct kfd_dbg_device_info_entry);

    result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);

    *numSnapshots = args.device_snapshot.num_devices;

    return result;
}

HSAKMT_STATUS BaseDebug::SetWaveLaunchOverride(int mode,
                                               uint32_t *enableMask,
                                               uint32_t *supportMask)
{
    struct kfd_ioctl_dbg_trap_args args = {0};
    HSAKMT_STATUS Result;

    memset(&args, 0x00, sizeof(args));

    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE;
    args.launch_override.override_mode = mode;
    args.launch_override.enable_mask = *enableMask;
    args.launch_override.support_request_mask = *supportMask;

    Result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);

    *enableMask = args.launch_override.enable_mask;
    *supportMask = args.launch_override.support_request_mask;

    return Result;
}

HSAKMT_STATUS BaseDebug::SetAddressWatch(uint64_t address,
                                         int mode,
                                         uint64_t mask,
                                         uint32_t gpuId,
                                         uint32_t *id)
{
    struct kfd_ioctl_dbg_trap_args args = {};
    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH;
    args.set_node_address_watch.address = address;
    args.set_node_address_watch.mode = mode;
    args.set_node_address_watch.mask = mask;
    args.set_node_address_watch.gpu_id = gpuId;

    HSAKMT_STATUS result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);

    *id = args.set_node_address_watch.id;

    return result;
}

HSAKMT_STATUS BaseDebug::ClearAddressWatch(uint32_t gpuId,
                                           uint32_t id)
{
    struct kfd_ioctl_dbg_trap_args args = {};
    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH;
    args.clear_node_address_watch.gpu_id = gpuId;
    args.clear_node_address_watch.id = id;

    return hsaKmtDebugTrapIoctl(&args, NULL, NULL);
}

HSAKMT_STATUS BaseDebug::SetFlags(uint32_t *flags)
{
    struct kfd_ioctl_dbg_trap_args args = {};
    args.pid = m_Pid;
    args.op = KFD_IOC_DBG_TRAP_SET_FLAGS;
    args.set_flags.flags = *flags;

    HSAKMT_STATUS result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);

    *flags = args.set_flags.flags;

    return result;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/BaseDebug.hpp
================================================
/*
 * Copyright (C) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_BASE_DEBUG__H__
#define __KFD_BASE_DEBUG__H__

#include "hsakmt/hsakmt.h"
#include <poll.h>
#include <stdlib.h>

// @class BaseDebug
class BaseDebug {
 public:
    BaseDebug(void);
    virtual ~BaseDebug(void);

    HSAKMT_STATUS Attach(struct kfd_runtime_info *rInfo,
                         int rInfoSize,
                         unsigned int pid,
                         uint64_t exceptionEnable);

    void Detach(void);
    HSAKMT_STATUS SendRuntimeEvent(uint64_t exceptions, int gpuId, int queueId);
    HSAKMT_STATUS QueryDebugEvent(uint64_t *exceptions,
                                  uint32_t *gpuId, uint32_t *queueId,
                                  int timeoutMsec);
    void SetExceptionsEnabled(uint64_t exceptions);
    HSAKMT_STATUS SuspendQueues(unsigned int *numQueues, HSA_QUEUEID *queues, uint32_t *queueIds,
                                uint64_t exceptionsToClear);
    HSAKMT_STATUS ResumeQueues(unsigned int *numQueues, HSA_QUEUEID *queues, uint32_t *queueIds);
    HSAKMT_STATUS QueueSnapshot(uint64_t exceptionsToClear, uint64_t snapshotBufAddr,
                                uint32_t *numSnapshots);
    HSAKMT_STATUS DeviceSnapshot(uint64_t exceptionsToClear, uint64_t snapshotBuffAddr,
                                 uint32_t *numSnapshots);
    HSAKMT_STATUS SetWaveLaunchOverride(int mode, uint32_t *enableMask, uint32_t *supportMask);
    HSAKMT_STATUS SetAddressWatch(uint64_t address, int mode, uint64_t mask, uint32_t gpuId, uint32_t *id);
    HSAKMT_STATUS ClearAddressWatch(uint32_t gpuId, uint32_t id);
    HSAKMT_STATUS SetFlags(uint32_t *flags);

 private:
    unsigned int m_Pid;
    struct pollfd m_Fd;
    const char *m_Fd_Name = "/tmp/dbg_fifo";
};

#endif  // __KFD_BASE_DEBUG__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/BasePacket.cpp
================================================
/*
 * Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "BasePacket.hpp"
#include "KFDTestUtil.hpp"
#include "KFDBaseComponentTest.hpp"

BasePacket::BasePacket(void): m_packetAllocation(NULL) {
    m_FamilyId = g_baseTest->GetFamilyIdFromDefaultNode();
}

BasePacket::~BasePacket(void) {
    if (m_packetAllocation)
        free(m_packetAllocation);
}

void BasePacket::Dump() const {
    unsigned int size = SizeInDWords();
    const HSAuint32 *packet = (const HSAuint32 *)GetPacket();
    std::ostream &log = LOG();
    unsigned int i;

    log << "Packet dump:" << std::hex;
    for (i = 0; i < size; i++)
        log << " " << std::setw(8) << std::setfill('0') << packet[i];
    log << std::endl;
}

void *BasePacket::AllocPacket(void) {
    unsigned int size = SizeInBytes();

    EXPECT_NE(0, size);
    if (!size)
        return NULL;

    m_packetAllocation = calloc(1, size);
    EXPECT_NOTNULL(m_packetAllocation);

    return m_packetAllocation;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/BasePacket.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_BASE_PACKET__H__
#define __KFD_BASE_PACKET__H__

/**
 * All packets profiles must be defined here
 * Every type defined here has sub-types
 */
enum PACKETTYPE {
    PACKETTYPE_PM4,
    PACKETTYPE_SDMA,
    PACKETTYPE_AQL
};

// @class BasePacket
class BasePacket {
 public:
    BasePacket(void);
    virtual ~BasePacket(void);

    // @returns Packet type
    virtual PACKETTYPE PacketType() const = 0;
    // @returns Pointer to the packet
    virtual const void *GetPacket() const = 0;
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const = 0;
    // @returns Packet size in dwordS
    unsigned int SizeInDWords() const { return SizeInBytes()/sizeof(unsigned int); }

    void Dump() const;

 protected:
    unsigned int m_FamilyId;
    void *m_packetAllocation;

    void *AllocPacket(void);
};

#endif  // __KFD_BASE_PACKET__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/BaseQueue.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "BaseQueue.hpp"
#include "SDMAQueue.hpp"
#include "PM4Queue.hpp"
#include "AqlQueue.hpp"
#include "hsakmt/hsakmt.h"
#include "KFDBaseComponentTest.hpp"

BaseQueue::BaseQueue()
    :m_QueueBuf(NULL),
    m_SkipWaitConsumption(true) {
}

BaseQueue::~BaseQueue(void) {
    Destroy();
}

HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint64 *pointers) {
    HSAKMT_STATUS status;
    HSA_QUEUE_TYPE type = GetQueueType();

    if (m_QueueBuf != NULL) {
        // Queue already exists, one queue per object
        Destroy();
    }

    memset(&m_Resources, 0, sizeof(m_Resources));

    m_QueueBuf = new HsaMemoryBuffer(size, NodeId, true/*zero*/, false/*local*/, true/*exec*/,
                        /*isScratch */ false, /* isReadOnly */false, /* isUncached */true);

    if (type == HSA_QUEUE_COMPUTE_AQL) {
        m_Resources.Queue_read_ptr_aql = &pointers[0];
        m_Resources.Queue_write_ptr_aql = &pointers[1];
    }

    if (type == HSA_QUEUE_SDMA_BY_ENG_ID)
        status = hsaKmtCreateQueueExt(NodeId,
                                      type,
                                      DEFAULT_QUEUE_PERCENTAGE,
                                      DEFAULT_PRIORITY,
                                      m_SdmaEngineId,
                                      m_QueueBuf->As<unsigned int*>(),
                                      m_QueueBuf->Size(),
                                      NULL,
                                      &m_Resources);
    else
        status = hsaKmtCreateQueue(NodeId,
                                   type,
                                   DEFAULT_QUEUE_PERCENTAGE,
                                   DEFAULT_PRIORITY,
                                   m_QueueBuf->As<unsigned int*>(),
                                   m_QueueBuf->Size(),
                                   NULL,
                                   &m_Resources);

    if (status != HSAKMT_STATUS_SUCCESS) {
        return status;
    }

    if (m_Resources.Queue_read_ptr  == NULL) {
        WARN() << "CreateQueue: read pointer value should be 0" << std::endl;
        status = HSAKMT_STATUS_ERROR;
    }

    if (m_Resources.Queue_write_ptr  == NULL) {
        WARN() << "CreateQueue: write pointer value should be 0" << std::endl;
        status = HSAKMT_STATUS_ERROR;
    }

    // Needs to match the queue write ptr
    m_pendingWptr = 0;
    m_pendingWptr64 = 0;
    m_Node = NodeId;
    m_FamilyId = g_baseTest->GetFamilyIdFromNodeId(NodeId);
    return status;
}

HSAKMT_STATUS BaseQueue::Update(unsigned int percent, HSA_QUEUE_PRIORITY priority, bool nullifyBuffer) {
    void* pNewBuffer = (nullifyBuffer ? NULL : m_QueueBuf->As<void*>());
    HSAuint64 newSize = (nullifyBuffer ? 0 : m_QueueBuf->Size());

    return hsaKmtUpdateQueue(m_Resources.QueueId, percent, priority, pNewBuffer, newSize, NULL);
}

HSAKMT_STATUS BaseQueue::SetCUMask(unsigned int *mask, unsigned int mask_count) {
    return hsaKmtSetQueueCUMask(m_Resources.QueueId, mask_count, mask);
}

HSAKMT_STATUS BaseQueue::Destroy() {
    HSAKMT_STATUS status =  HSAKMT_STATUS_SUCCESS;

    if (m_QueueBuf != NULL) {
        status = hsaKmtDestroyQueue(m_Resources.QueueId);

        if (status == HSAKMT_STATUS_SUCCESS) {
            delete m_QueueBuf;
            m_QueueBuf = NULL;
        }
    }

    return status;
}

void BaseQueue::PlaceAndSubmitPacket(const BasePacket &packet) {
    PlacePacket(packet);
    SubmitPacket();
}

void BaseQueue::Wait4PacketConsumption(HsaEvent *event, unsigned int timeOut) {
    ASSERT_TRUE(!event) << "Not supported!" << std::endl;
    ASSERT_TRUE(WaitOnValue(m_Resources.Queue_read_ptr, RptrWhenConsumed(), timeOut));
}

bool BaseQueue::AllPacketsSubmitted() {
    return Wptr() == Rptr();
}

void BaseQueue::PlacePacket(const BasePacket &packet) {
    ASSERT_EQ(packet.PacketType(), PacketTypeSupported())
        << "Cannot add a packet since packet type doesn't match queue";

    unsigned int readPtr = Rptr();
    unsigned int writePtr = m_pendingWptr;
    HSAuint64 writePtr64 = m_pendingWptr64;

    unsigned int packetSizeInDwords = packet.SizeInDWords();
    unsigned int dwordsRequired = packetSizeInDwords;
    unsigned int queueSizeInDWord = m_QueueBuf->Size() / sizeof(uint32_t);

    if (writePtr + packetSizeInDwords > queueSizeInDWord) {
        // Wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
        dwordsRequired +=  queueSizeInDWord - writePtr;
    }

    unsigned int dwordsAvailable = (readPtr - 1 - writePtr + queueSizeInDWord) % queueSizeInDWord;
    ASSERT_GE(dwordsAvailable, dwordsRequired) << "Cannot add a packet, buffer overrun";

    ASSERT_GE(queueSizeInDWord, packetSizeInDwords) << "Cannot add a packet, packet size too large";

    if (writePtr + packetSizeInDwords >= queueSizeInDWord) {
        // Wraparound
        while (writePtr + packetSizeInDwords > queueSizeInDWord) {
            m_QueueBuf->As<unsigned int *>()[writePtr] = CMD_NOP;
            writePtr = (writePtr + 1) % queueSizeInDWord;
            writePtr64++;
        }

        // Not updating Wptr since we might want to place the packet without submission
        m_pendingWptr = (writePtr % queueSizeInDWord);
        m_pendingWptr64 = writePtr64;
    }

    memcpy(m_pendingWptr + m_QueueBuf->As<unsigned int*>(), packet.GetPacket(), packetSizeInDwords * 4);

    m_pendingWptr = (m_pendingWptr + packetSizeInDwords) % queueSizeInDWord;
    m_pendingWptr64 += packetSizeInDwords;
}

BaseQueue* QueueArray::GetQueue(unsigned int Node) {
    // If a queue exists for that node then return, else create one
    for (unsigned int i = 0; i < m_QueueList.size(); i++) {
        if (Node == m_QueueList.at(i)->GetNodeId())
            return m_QueueList.at(i);
    }

    BaseQueue *pQueue = NULL;

    switch (m_QueueType) {
    case HSA_QUEUE_COMPUTE:
        pQueue = new PM4Queue();
        break;
    case HSA_QUEUE_SDMA:
        pQueue = new SDMAQueue();
        break;
    case HSA_QUEUE_COMPUTE_AQL:
        pQueue = new AqlQueue();
        break;
    default:
        return NULL;
    }

    if (pQueue) {
        pQueue->Create(Node);
        m_QueueList.push_back(pQueue);
    }
    return pQueue;
}

void QueueArray::Destroy() {
    for (unsigned int i = 0; i < m_QueueList.size(); i++)
        delete m_QueueList.at(i);

    m_QueueList.clear();
}


================================================
FILE: libhsakmt/tests/kfdtest/src/BaseQueue.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_BASE_QUEUE__H__
#define __KFD_BASE_QUEUE__H__

#include <vector>
#include "KFDTestUtil.hpp"
#include "BasePacket.hpp"

// @class BasePacket
class BaseQueue {
 public:
    static const unsigned int DEFAULT_QUEUE_SIZE = PAGE_SIZE;
    static const HSA_QUEUE_PRIORITY DEFAULT_PRIORITY = HSA_QUEUE_PRIORITY_NORMAL;
    static const unsigned int DEFAULT_QUEUE_PERCENTAGE  = 100;
    static const unsigned int ZERO_QUEUE_PERCENTAGE     = 0;
    static const unsigned int     FLUSH_GPU_CACHES_TO   = 1000;

    BaseQueue(void);
    virtual ~BaseQueue(void);

    /** Create the queue.
     *  @see hsaKmtCreateQueue
     *  @param pointers is used only for creating AQL queues. Otherwise it is omitted.
     */
    virtual HSAKMT_STATUS Create(unsigned int NodeId, unsigned int size = DEFAULT_QUEUE_SIZE,
                                 HSAuint64 *pointers = NULL);
    /** Update the queue.
     *  @see hsaKmtUpdateQueue
     *  @param percent New queue percentage
     *  @param priority New queue priority
     *  @param nullifyBuffer
     *      If 'true', set the new buffer address to NULL and the size to 0. Otherwise
     *      don't change the queue buffer address/size.
     */
    virtual HSAKMT_STATUS Update(unsigned int percent, HSA_QUEUE_PRIORITY priority, bool nullifyBuffer);
    virtual HSAKMT_STATUS SetCUMask(unsigned int *mask, unsigned int mask_count);
    /** Destroy the queue.
     *  @see hsaKmtDestroyQueue
     */
    virtual HSAKMT_STATUS Destroy();
    /** Wait for all the packets submitted to the queue to be consumed. (i.e. wait until RPTR=WPTR).
     *  Note that all packets being consumed is not the same as all packets being processed.
     */
    virtual void Wait4PacketConsumption(HsaEvent *event = NULL, unsigned int timeOut = g_TestTimeOut);
    /** @brief Place packet and submit it in one function
     */
    virtual void PlaceAndSubmitPacket(const BasePacket &packet);
    /** @brief Copy packet to queue and update write pointer
     */
    virtual void PlacePacket(const BasePacket &packet);
    /** @brief Update queue write pointer and set the queue doorbell to the queue write pointer
     */
    virtual void SubmitPacket() = 0;
    /** @brief Check if all packets in queue are already processed
     *  Compare queue read and write pointers
     */
    bool AllPacketsSubmitted();

    void SetSkipWaitConsump(int val) { m_SkipWaitConsumption = val; }
    int GetSkipWaitConsump() { return m_SkipWaitConsumption; }
    int Size() { return m_QueueBuf->Size(); }

    HsaQueueResource *GetResource() { return &m_Resources; }
    unsigned int GetPendingWptr() { return m_pendingWptr; }
    HSAuint64 GetPendingWptr64() { return m_pendingWptr64; }
    virtual _HSA_QUEUE_TYPE GetQueueType() = 0;
    unsigned int GetNodeId() { return m_Node; }
    unsigned int GetFamilyId() { return m_FamilyId; }
    int GetSDMAEngineId() { return m_SdmaEngineId; }

 protected:
    static const unsigned int CMD_NOP_TYPE_2        = 0x80000000;
    static const unsigned int CMD_NOP_TYPE_3        = 0xFFFF1002;

    unsigned int CMD_NOP;
    unsigned int m_pendingWptr;
    HSAuint64 m_pendingWptr64;
    HsaQueueResource m_Resources;
    HsaMemoryBuffer *m_QueueBuf;
    unsigned int m_Node;
    unsigned int m_FamilyId;
    int m_SdmaEngineId;

    // @return Write pointer modulo queue size in dwords
    virtual unsigned int Wptr() = 0;
    // @return Read pointer modulo queue size in dwords
    virtual unsigned int Rptr() = 0;
    // @return Expected m_Resources.Queue_read_ptr when all packets consumed
    virtual unsigned int RptrWhenConsumed() = 0;
    virtual PACKETTYPE PacketTypeSupported() = 0;

 private:
    // Some tests(such as exception) may not need wait pm4 packet consumption on CZ.
    int m_SkipWaitConsumption;
};


// @class QueueArray
// Managed QueueArray for different GPU Nodes
class QueueArray {
    // List of Queues. One for each GPU
    std::vector<BaseQueue*> m_QueueList;
    _HSA_QUEUE_TYPE m_QueueType;

 public:
    QueueArray(_HSA_QUEUE_TYPE type): m_QueueType(type) {}
    ~QueueArray() {
        Destroy();
    }

    BaseQueue* GetQueue(unsigned int Node);
    void Destroy();
};

#endif  // __KFD_BASE_QUEUE__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/Dispatch.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "Dispatch.hpp"

#include "PM4Packet.hpp"

#include "asic_reg/gfx_7_2_d.h"
#include "asic_reg/gfx_7_2_sh_mask.h"

#include "KFDBaseComponentTest.hpp"

#define mmCOMPUTE_PGM_RSRC3                                                     0x2e2d

Dispatch::Dispatch(const HsaMemoryBuffer& isaBuf, const bool eventAutoReset)
    :m_IsaBuf(isaBuf), m_IndirectBuf(PACKETTYPE_PM4, PAGE_SIZE / sizeof(unsigned int), isaBuf.Node()),
    m_DimX(1), m_DimY(1), m_DimZ(1), m_pArg1(NULL), m_pArg2(NULL), m_pEop(NULL), m_ScratchEn(false),
    m_ComputeTmpringSize(0), m_scratch_base(0ll), m_SpiPriority(0) {
    HsaEventDescriptor eventDesc;
    eventDesc.EventType = HSA_EVENTTYPE_SIGNAL;
    eventDesc.NodeId = isaBuf.Node();
    eventDesc.SyncVar.SyncVar.UserData = NULL;
    eventDesc.SyncVar.SyncVarSize = 0;

    hsaKmtCreateEvent(&eventDesc, !eventAutoReset, false, &m_pEop);

    m_FamilyId  = g_baseTest->GetFamilyIdFromNodeId(isaBuf.Node());
    m_NeedCwsrWA = g_baseTest->NeedCwsrWA(isaBuf.Node());
}

Dispatch::~Dispatch() {
    if (m_pEop != NULL)
        hsaKmtDestroyEvent(m_pEop);
}

void Dispatch::SetArgs(void* pArg1, void* pArg2) {
    m_pArg1 = pArg1;
    m_pArg2 = pArg2;
}

void Dispatch::SetDim(unsigned int x, unsigned int y, unsigned int z) {
    m_DimX = x;
    m_DimY = y;
    m_DimZ = z;
}

void Dispatch::SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base) {
    m_ComputeTmpringSize = ((waveSize << 12) | (numWaves));
    m_ScratchEn = true;
    m_scratch_base = scratch_base;
}

void Dispatch::SetSpiPriority(unsigned int priority) {
    m_SpiPriority = priority;
}

void Dispatch::SetPriv(bool priv) {
    m_NeedCwsrWA = priv;
}

void Dispatch::Submit(BaseQueue& queue) {
    ASSERT_NE(m_pEop, (void*)0);
    EXPECT_EQ(m_FamilyId, queue.GetFamilyId());

    BuildIb();

    queue.PlaceAndSubmitPacket(PM4IndirectBufPacket(&m_IndirectBuf));

    // Write data to SyncVar for synchronization purpose
    if (m_pEop->EventData.EventData.SyncVar.SyncVar.UserData != NULL) {
        queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int*)m_pEop->
            EventData.EventData.SyncVar.SyncVar.UserData, m_pEop->EventId));
    }

    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false, m_pEop->EventData.HWData2, m_pEop->EventId));

    if (!queue.GetSkipWaitConsump())
        queue.Wait4PacketConsumption();
}

void Dispatch::Sync(unsigned int timeout) {
    ASSERT_SUCCESS(hsaKmtWaitOnEvent(m_pEop, timeout));
}

// Returning with status in order to allow actions to be performed before process termination
int Dispatch::SyncWithStatus(unsigned int timeout) {
    int stat;

    return ((stat = hsaKmtWaitOnEvent(m_pEop, timeout)) != HSAKMT_STATUS_SUCCESS);
}

void Dispatch::BuildIb() {
    HSAuint64 shiftedIsaAddr = m_IsaBuf.As<uint64_t>() >> 8;
    unsigned int arg0, arg1, arg2, arg3;
    SplitU64(reinterpret_cast<uint64_t>(m_pArg1), arg0, arg1);
    SplitU64(reinterpret_cast<uint64_t>(m_pArg2), arg2, arg3);

    // Starts at COMPUTE_START_X
    const unsigned int COMPUTE_DISPATCH_DIMS_VALUES[] = {
        0,      // START_X
        0,      // START_Y
        0,      // START_Z
        1,      // NUM_THREADS_X - this is actually the number of threads in a thread group
        1,      // NUM_THREADS_Y
        1,      // NUM_THREADS_Z
        0,      // COMPUTE_PIPELINESTAT_ENABLE
        0,      // COMPUTE_PERFCOUNT_ENABLE
    };

    /*
     * For some special asics in the list of DEGFX11_12113
     * COMPUTE_PGM_RSRC needs priv=1 to prevent hardware traps
     */
    const bool priv = m_NeedCwsrWA;

    unsigned int pgmRsrc1 =
        (0xc0 << COMPUTE_PGM_RSRC1__FLOAT_MODE__SHIFT) |
        ((m_SpiPriority & 3) << COMPUTE_PGM_RSRC1__PRIORITY__SHIFT) |
        (priv << COMPUTE_PGM_RSRC1__PRIV__SHIFT) |
        ((m_FamilyId < FAMILY_GFX12) ? (0x2 << COMPUTE_PGM_RSRC1__SGPRS__SHIFT) : 0) |
        (0x4 << COMPUTE_PGM_RSRC1__VGPRS__SHIFT);  // 4 * 8 = 32 VGPRs

    unsigned int pgmRsrc2 = 0;
    pgmRsrc2 |= (m_ScratchEn << COMPUTE_PGM_RSRC2__SCRATCH_EN__SHIFT)
            & COMPUTE_PGM_RSRC2__SCRATCH_EN_MASK;
    pgmRsrc2 |= ((m_scratch_base ? 6 : 4) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT)
            & COMPUTE_PGM_RSRC2__USER_SGPR_MASK;

    if (m_FamilyId < FAMILY_GFX12) {
        pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT)
            & COMPUTE_PGM_RSRC2__TRAP_PRESENT_MASK;
    }

    pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TGID_X_EN__SHIFT)
            & COMPUTE_PGM_RSRC2__TGID_X_EN_MASK;
    pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TIDIG_COMP_CNT__SHIFT)
            & COMPUTE_PGM_RSRC2__TIDIG_COMP_CNT_MASK;
    pgmRsrc2 |= (0 << COMPUTE_PGM_RSRC2__EXCP_EN__SHIFT)
            & COMPUTE_PGM_RSRC2__EXCP_EN_MASK;
    pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__EXCP_EN_MSB__SHIFT)
            & COMPUTE_PGM_RSRC2__EXCP_EN_MSB_MASK;

    const unsigned int COMPUTE_PGM_RSRC[] = {
        pgmRsrc1,
        pgmRsrc2
    };

    // Starts at COMPUTE_PGM_LO
    const unsigned int COMPUTE_PGM_VALUES_GFX8[] = {
        static_cast<uint32_t>(shiftedIsaAddr),                  // PGM_LO
        static_cast<uint32_t>(shiftedIsaAddr >> 32)             // PGM_HI
            | (hsakmt_is_dgpu() ? 0 : (1<<8))                          // including PGM_ATC=?
    };

    // Starts at COMPUTE_PGM_LO
    const unsigned int COMPUTE_PGM_VALUES_GFX9[] = {
        static_cast<uint32_t>(shiftedIsaAddr),                  // PGM_LO
        static_cast<uint32_t>(shiftedIsaAddr >> 32)             // PGM_HI
            | (hsakmt_is_dgpu() ? 0 : (1<<8)),                         // including PGM_ATC=?
        0,
        0,
        static_cast<uint32_t>(m_scratch_base >> 8),              // compute_dispatch_scratch_base
        static_cast<uint32_t>(m_scratch_base >> 40)
    };

    // Starts at COMPUTE_RESOURCE_LIMITS
    const unsigned int COMPUTE_RESOURCE_LIMITS[] = {
        0,                      // COMPUTE_RESOURCE_LIMITS
    };

    // Starts at COMPUTE_TMPRING_SIZE
    const unsigned int COMPUTE_TMPRING_SIZE[] = {
        m_ComputeTmpringSize,   // COMPUTE_TMPRING_SIZE
    };

    // Starts at COMPUTE_RESTART_X
    const unsigned int COMPUTE_RESTART_VALUES[] = {
        0,                      // COMPUTE_RESTART_X
        0,                      // COMPUTE_RESTART_Y
        0,                      // COMPUTE_RESTART_Z
        0                       // COMPUTE_THREAD_TRACE_ENABLE
    };

    // Starts at COMPUTE_USER_DATA_0
    const unsigned int COMPUTE_USER_DATA_VALUES[] = {
                // Reg name             - use in KFDtest - use in ABI
        arg0,   // COMPUTE_USER_DATA_0  - arg0           - resource descriptor for the scratch buffer - 1st dword
        arg1,   // COMPUTE_USER_DATA_1  - arg1           - resource descriptor for the scratch buffer - 2nd dword
        arg2,   // COMPUTE_USER_DATA_2  - arg2           - resource descriptor for the scratch buffer - 3rd dword
        arg3,   // COMPUTE_USER_DATA_3  - arg3           - resource descriptor for the scratch buffer - 4th dword
        static_cast<uint32_t>(m_scratch_base),  // COMPUTE_USER_DATA_4  - flat_scratch_lo
        static_cast<uint32_t>(m_scratch_base >> 32),  // COMPUTE_USER_DATA_4  - flat_scratch_hi
        0,      // COMPUTE_USER_DATA_6  -                - AQL queue address, low part
        0,      // COMPUTE_USER_DATA_7  -                - AQL queue address, high part
        0,      // COMPUTE_USER_DATA_8  -                - kernel arguments block, low part
        0,      // COMPUTE_USER_DATA_9  -                - kernel arguments block, high part
        0,      // COMPUTE_USER_DATA_10 -                - unused
        0,      // COMPUTE_USER_DATA_11 -                - unused
        0,      // COMPUTE_USER_DATA_12 -                - unused
        0,      // COMPUTE_USER_DATA_13 -                - unused
        0,      // COMPUTE_USER_DATA_14 -                - unused
        0,      // COMPUTE_USER_DATA_15 -                - unused
    };

    const unsigned int DISPATCH_INIT_VALUE = 0x00000021 | (hsakmt_is_dgpu() ? 0 : 0x1000) |
                ((m_FamilyId >= FAMILY_NV) ? 0x8000 : 0);
    // {COMPUTE_SHADER_EN=1, PARTIAL_TG_EN=0, FORCE_START_AT_000=0, ORDERED_APPEND_ENBL=0,
    // ORDERED_APPEND_MODE=0, USE_THREAD_DIMENSIONS=1, ORDER_MODE=0, DISPATCH_CACHE_CNTL=0,
    // SCALAR_L1_INV_VOL=0, VECTOR_L1_INV_VOL=0, DATA_ATC=?, RESTORE=0}
    // Set CS_W32_EN for wave32 workloads for gfx10 since all the shaders used in KFDTest is 32 bit .

    m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket(m_FamilyId));

    m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES,
                                                  ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES)));

    m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_LO,
        (m_FamilyId >= FAMILY_AI) ? COMPUTE_PGM_VALUES_GFX9 : COMPUTE_PGM_VALUES_GFX8,
        (m_FamilyId >= FAMILY_AI) ? ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX9) : ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX8)));
    m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC,
                                                  ARRAY_SIZE(COMPUTE_PGM_RSRC)));

    if (m_FamilyId == FAMILY_AL || m_FamilyId == FAMILY_AV) {
        const unsigned int COMPUTE_PGM_RSRC3[] = {9};
        m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC3, COMPUTE_PGM_RSRC3,
                                                      ARRAY_SIZE(COMPUTE_PGM_RSRC3)));
    }

    m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS,
                                                  ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS)));
    m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE,
                                                  ARRAY_SIZE(COMPUTE_TMPRING_SIZE)));
    m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES,
                                                  ARRAY_SIZE(COMPUTE_RESTART_VALUES)));

    m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES,
                                                  ARRAY_SIZE(COMPUTE_USER_DATA_VALUES)));

    m_IndirectBuf.AddPacket(PM4DispatchDirectPacket(m_DimX, m_DimY, m_DimZ, DISPATCH_INIT_VALUE));

    // EVENT_WRITE.partial_flush causes problems with preemptions in
    // GWS testing. Since this is specific to this PM4 command and
    // doesn't affect AQL, it's easier to fix KFDTest than the
    // firmware.
    //
    // Replace PartialFlush with an ReleaseMem (with no interrupt) + WaitRegMem
    //
    // Original: m_IndirectBuf.AddPacket(PM4PartialFlushPacket());
    uint32_t *nop = m_IndirectBuf.AddPacket(PM4NopPacket(2)); // NOP packet with one dword payload for the release-mem fence
    m_IndirectBuf.AddPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, (uint64_t)&nop[1], 0xdeadbeef));
    m_IndirectBuf.AddPacket(PM4WaitRegMemPacket(true, (uint64_t)&nop[1], 0xdeadbeef, 4));
}


================================================
FILE: libhsakmt/tests/kfdtest/src/Dispatch.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_DISPATCH__H__
#define __KFD_DISPATCH__H__
#include "KFDTestUtil.hpp"
#include "IndirectBuffer.hpp"
#include "BaseQueue.hpp"

class Dispatch {
 public:
    Dispatch(const HsaMemoryBuffer& isaBuf, const bool eventAutoReset = false);
    ~Dispatch();

    void SetArgs(void* pArg1, void* pArg2);

    void SetDim(unsigned int x, unsigned int y, unsigned int z);

    void Submit(BaseQueue& queue);

    void Sync(unsigned int timeout = HSA_EVENTTIMEOUT_INFINITE);

    int  SyncWithStatus(unsigned int timeout);

    void SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base);

    void SetSpiPriority(unsigned int priority);
    
    void SetPriv(bool priv);

    HsaEvent *GetHsaEvent() { return m_pEop; }

 private:
    void BuildIb();

 private:
    const HsaMemoryBuffer& m_IsaBuf;

    IndirectBuffer m_IndirectBuf;

    unsigned int m_DimX;
    unsigned int m_DimY;
    unsigned int m_DimZ;

    void* m_pArg1;
    void* m_pArg2;

    HsaEvent* m_pEop;

    bool            m_ScratchEn;
    unsigned int    m_ComputeTmpringSize;

    HSAuint64  m_scratch_base;
    unsigned int m_SpiPriority;
    unsigned int  m_FamilyId;
    bool  m_NeedCwsrWA;
};

#endif  // __KFD_DISPATCH__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/GoogleTestExtension.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "GoogleTestExtension.hpp"
#include "OSWrapper.hpp"

bool Ok2Run(unsigned int testProfile) {
    bool testMatchProfile = true;
    if ((testProfile & g_TestRunProfile) == 0) {
        WARN() << "Test is skipped beacuse profile does not match current run mode" << std::endl;
        testMatchProfile = false;
    }

    return testMatchProfile;
}

// This predication is used when specific HW capabilities must exist for the test to succeed.
bool TestReqEnvCaps(unsigned int envCaps) {
    bool testMatchEnv = true;
    if ((envCaps & g_TestENVCaps) != envCaps) {
        WARN() << "Test is skipped due to HW capability issues" << std::endl;
        testMatchEnv = false;
    }

    return testMatchEnv;
}

// This predication is used when specific HW capabilities must be absent for the test to succeed.
// e.g Testing capabilities not supported by HW scheduling
bool TestReqNoEnvCaps(unsigned int envCaps) {
    bool testMatchEnv = true;
    if ((envCaps & g_TestENVCaps) != 0) {
        WARN() << "Test is skipped due to HW capability issues" << std::endl;
        testMatchEnv = false;
    }

    return testMatchEnv;
}

std::ostream& operator<< (KFDLog log, LOGTYPE level) {
    const char *heading;

    if (level == LOGTYPE_WARNING) {
        SetConsoleTextColor(TEXTCOLOR_YELLOW);
        heading = "[----------] ";
    } else {
        SetConsoleTextColor(TEXTCOLOR_GREEN);
        heading = "[          ] ";
    }

    std::clog << heading;
    SetConsoleTextColor(TEXTCOLOR_WHITE);

    return std::clog;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/GoogleTestExtension.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __GOOGLETEST_EXTENSION__H__
#define __GOOGLETEST_EXTENSION__H__

#include <gtest/gtest.h>
#include "hsakmt/hsakmt.h"
#include "KFDTestFlags.hpp"

enum LOGTYPE {
    LOGTYPE_INFO,      // msg header in green
    LOGTYPE_WARNING    // msg header in yellow
};

class KFDLog{};
std::ostream& operator << (KFDLog log, LOGTYPE level);

// @brief  Log additional details, to be displayed in the same format as other google test outputs
// Currently not supported by gtest
// Should be used like cout: LOG() << "message" << value << std::endl;
#define LOG()      KFDLog() << LOGTYPE_INFO
#define WARN()     KFDLog() << LOGTYPE_WARNING

class KFDRecord: public testing::Test {
public:
    KFDRecord(const char *val): m_val(val) {}
    KFDRecord(std::string &val): m_val(val) {}
    KFDRecord(HSAint64 val): m_val(std::to_string(val)) {}
    KFDRecord(HSAuint64 val): m_val(std::to_string(val)) {}
    KFDRecord(double val): m_val(std::to_string(val)) {}
    ~KFDRecord() {
        RecordProperty(m_key.str().c_str(), m_val.c_str());
    }
    std::stringstream &get_key_stream() {
        return m_key;
    }
    virtual void TestBody() {};
private:
    std::string m_val;
    std::stringstream m_key;
};

#define RECORD(val)     (KFDRecord(val).get_key_stream())

// All tests MUST be in a try catch since the gtest flag to throw an exception on any fatal failure is enabled
#define TEST_START(testProfile)   if (Ok2Run(testProfile)) try {
#define TEST_END       } catch (...) {}

// Used to wrap setup and teardown functions, anything that is built-in gtest and is not a test
#define ROUTINE_START   try {
#define ROUTINE_END       }catch(...) {}

#define TEST_REQUIRE_ENV_CAPABILITIES(envCaps)          if (!TestReqEnvCaps(envCaps))  return;
#define TEST_REQUIRE_NO_ENV_CAPABILITIES(envCaps)  if (!TestReqNoEnvCaps(envCaps))  return;

#define ASSERT_SUCCESS(_val) ASSERT_EQ(HSAKMT_STATUS_SUCCESS, (_val))
#define EXPECT_SUCCESS(_val) EXPECT_EQ(HSAKMT_STATUS_SUCCESS, (_val))

#define EXPECT_EQ_GPU(expected, actual , gpuNode) EXPECT_EQ((expected), (actual)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"
#define ASSERT_SUCCESS_GPU(_val, gpuNode) ASSERT_EQ(HSAKMT_STATUS_SUCCESS, (_val)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"
#define EXPECT_SUCCESS_GPU(_val, gpuNode) EXPECT_EQ(HSAKMT_STATUS_SUCCESS, (_val)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"

#define ASSERT_NOTNULL(_val) ASSERT_NE((void *)NULL, _val)
#define EXPECT_NOTNULL(_val) EXPECT_NE((void *)NULL, _val)

#define ASSERT_NOTNULL_GPU(_val, gpuNode) ASSERT_NE((void *)NULL, _val) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"
#define EXPECT_NOTNULL_GPU(_val, gpuNode) EXPECT_NE((void *)NULL, _val) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"

#define EXPECT_NE_GPU(expected, actual, gpuNode) EXPECT_NE((expected), (actual)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"
#define EXPECT_GE_GPU(expected, actual, gpuNode) EXPECT_GE((expected), (actual)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"

#define ASSERT_GE_GPU(val1, val2, gpuNode) ASSERT_GE((val1), (val2)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"
#define ASSERT_NE_GPU(val1, val2, gpuNode) ASSERT_NE((val1), (val2)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"
#define ASSERT_EQ_GPU(val1, val2, gpuNode) ASSERT_EQ((val1), (val2)) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"

#define EXPECT_TRUE_GPU(condition, gpuNode) EXPECT_TRUE(condition) << "gpuNodeID: " << std::to_string(gpuNode) << "\n"

// @brief  Determines if it is ok to run a test given input flags
bool Ok2Run(unsigned int testProfile);

// @brief  Checks if all HW capabilities needed for a test to run exist
bool TestReqEnvCaps(unsigned int hwCaps);

// @brief  Checks if all HW capabilities that prevents a test from running are absent
bool TestReqNoEnvCaps(unsigned int hwCaps);

#endif


================================================
FILE: libhsakmt/tests/kfdtest/src/IndirectBuffer.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "IndirectBuffer.hpp"
#include "GoogleTestExtension.hpp"
#include "pm4_pkt_struct_common.h"
#include "PM4Packet.hpp"


IndirectBuffer::IndirectBuffer(PACKETTYPE type,  unsigned int sizeInDWords, unsigned int NodeId)
    :m_NumOfPackets(0), m_MaxSize(sizeInDWords), m_ActualSize(0), m_PacketTypeAllowed(type) {
    m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/,
                                        false/*local*/, true/*exec*/, false/*isScratch*/,
                                        false/*isReadOnly*/, true/*isUncached*/);
}

IndirectBuffer::~IndirectBuffer(void) {
    delete m_IndirectBuf;
}

uint32_t *IndirectBuffer::AddPacket(const BasePacket &packet) {
    EXPECT_EQ(packet.PacketType(), m_PacketTypeAllowed) << "Cannot add a packet since packet type doesn't match queue";

    unsigned int writePtr = m_ActualSize;

    EXPECT_GE(m_MaxSize, packet.SizeInDWords() + writePtr) << "Cannot add a packet, not enough room";

    memcpy(m_IndirectBuf->As<unsigned int*>() + writePtr , packet.GetPacket(),  packet.SizeInBytes());
    m_ActualSize += packet.SizeInDWords();
    m_NumOfPackets++;

    return m_IndirectBuf->As<HSAuint32 *>() + writePtr;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/IndirectBuffer.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __INDIRECT_BUFFER__H__
#define __INDIRECT_BUFFER__H__

#include "BasePacket.hpp"
#include "KFDTestUtil.hpp"

/** @class IndirectBuffer
 *  When working with an indirect buffer, create IndirectBuffer, fill it with all the packets you want,
 *  create an indirect packet to point to it, and submit the packet to queue
 */
class IndirectBuffer {
 public:
    // @param[size] Queue max size in DWords
    // @param[type] Packet type allowed in queue
    IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId);
    ~IndirectBuffer(void);

    // @brief Add packet to queue, all validations are done with gtest ASSERT and EXPECT
    uint32_t *AddPacket(const BasePacket &packet);
    // @returns Actual size of the indirect queue in DWords, equivalent to write pointer
    unsigned int SizeInDWord() { return m_ActualSize; }
    // @returns Indirect queue address
    unsigned int *Addr() { return m_IndirectBuf->As<unsigned int*>(); }

 protected:
    // Number of packets in the queue
    unsigned int m_NumOfPackets;
    // Max size of queue in DWords
    unsigned int m_MaxSize;
    // Current size of queue in DWords
    unsigned int m_ActualSize;
    HsaMemoryBuffer *m_IndirectBuf;
    // What packets are supported in this queue
    PACKETTYPE m_PacketTypeAllowed;
};

#endif  //  __INDIRECT_BUFFER__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDASMTest.cpp
================================================
/*
 * Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "GoogleTestExtension.hpp"
#include "KFDASMTest.hpp"
#include "ShaderStore.hpp"
#include "Assemble.hpp"

void KFDASMTest::SetUp() {}
void KFDASMTest::TearDown() {}

static const std::vector<uint32_t> TargetList = {
    0x080001,
    0x080002,
    0x080003,
    0x080005,
    0x080100,
    0x090000,
    0x090002,
    0x090004,
    0x090006,
    0x090008,
    0x090009,
    0x09000a,
    0x09000c,
    0x090402,
    0x0a0100,
    0x0a0101,
    0x0a0102,
    0x0a0103,
    0x0a0300,
    0x0a0301,
    0x0a0302,
    0x0a0303,
    0x0a0304,
    0x0a0305,
    0x0a0306,
    0x0c0000,
};

TEST_F(KFDASMTest, AssembleShaders) {
    TEST_START(TESTPROFILE_RUNALL)

    for (auto &t : TargetList) {
        Assembler asmblr(t);

        LOG() << "Running ASM test for target " << asmblr.GetTargetAsic() << std::endl;

        for (auto &s : ShaderList) {
            EXPECT_SUCCESS(asmblr.RunAssemble(s));
        }
    }

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDASMTest.hpp
================================================
/*
 * Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_ASM_TEST__H__
#define __KFD_ASM_TEST__H__

#include <gtest/gtest.h>

class KFDASMTest : public testing::Test {
 public:
    KFDASMTest() {}
    ~KFDASMTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFD_ASM_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDBaseComponentTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <syslog.h>

#include "KFDBaseComponentTest.hpp"
#include "KFDTestUtil.hpp"

extern unsigned int g_TestGPUsNum;

void KFDBaseComponentTest::SetUpTestCase() {
}

void KFDBaseComponentTest::TearDownTestCase() {
}

void KFDBaseComponentTest::SetUp() {
    ROUTINE_START

    ASSERT_SUCCESS(hsaKmtOpenKFD());
    EXPECT_SUCCESS(hsaKmtGetVersion(&m_VersionInfo));
    memset( &m_SystemProperties, 0, sizeof(m_SystemProperties) );
    memset(m_RenderNodes, 0, sizeof(m_RenderNodes));

    /** In order to be correctly testing the KFD interfaces and ensure
     *  that the KFD acknowledges relevant node parameters
     *  for the rest of the tests and used for more specific topology tests,
     *  call to GetSystemProperties for a system snapshot of the topology here
     */
    ASSERT_SUCCESS(hsaKmtAcquireSystemProperties(&m_SystemProperties));
    ASSERT_GT(m_SystemProperties.NumNodes, HSAuint32(0)) << "HSA has no nodes.";

    m_NodeInfo.Init(m_SystemProperties.NumNodes);

    // setting memory flags with default values , can be modified according to needs
    m_MemoryFlags.ui32.NonPaged = 0;                         // Paged
    m_MemoryFlags.ui32.CachePolicy = HSA_CACHING_NONCACHED;  // Non cached
    m_MemoryFlags.ui32.ReadOnly = 0;                         // Read/Write
    m_MemoryFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;         // 4KB page
    m_MemoryFlags.ui32.HostAccess = 1;                       // Host accessible
    m_MemoryFlags.ui32.NoSubstitute = 0;                     // Fall back to node 0 if needed
    m_MemoryFlags.ui32.GDSMemory = 0;
    m_MemoryFlags.ui32.Scratch = 0;

    /* nodeProperties is default gpu property, keep it to support old test method */
    const HsaNodeProperties *nodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
    ASSERT_NOTNULL(nodeProperties) << "failed to get HSA default GPU Node properties";

    /* m_FamilyId is default gpu family id, keep it to support old test method */
    m_FamilyId = FamilyIdFromNode(nodeProperties);

    /* these values are for default gpu, keep them to support old test method */
    GetHwQueueInfo(nodeProperties, &m_numCpQueues, &m_numSdmaEngines,
                    &m_numSdmaXgmiEngines, &m_numSdmaQueuesPerEngine);

    g_baseTest = this;

    /* m_pAsm is default gpu assembler, keep it to support old test method */
    m_pAsm = new Assembler(GetGfxVersion(nodeProperties));
    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    int gpuNode;
    for (int i = 0; i < gpuNodes.size(); i++) {
        gpuNode = gpuNodes.at(i);
        const HsaNodeProperties *nodeProperties = m_NodeInfo.GetNodeProperties(gpuNode);

        m_pAsmGPU[i] = new Assembler(GetGfxVersion(nodeProperties));
        GetHwQueueInfo(nodeProperties, &m_numCpQueues_GPU[i], &m_numSdmaEngines_GPU[i],
                    &m_numSdmaXgmiEngines_GPU[i], &m_numSdmaQueuesPerEngine_GPU[i]);
    }

    /* adjust g_TestGPUsNum not above MAX_GPU and gpu number at system */
    g_TestGPUsNum = std::min(g_TestGPUsNum, (unsigned int)gpuNodes.size());
    g_TestGPUsNum = (g_TestGPUsNum <= MAX_GPU) ? g_TestGPUsNum : MAX_GPU;

    const testing::TestInfo* curr_test_info =
                ::testing::UnitTest::GetInstance()->current_test_info();

    openlog("KFDTEST", LOG_CONS , LOG_USER);
    if (g_TestGPUsNum == 1)
        syslog(LOG_INFO, "[Test on Node#%03d] "
                    "STARTED ========== %s.%s ==========",
                    m_NodeInfo.HsaDefaultGPUNode(),
                    curr_test_info->test_case_name(), curr_test_info->name());
    else
        syslog(LOG_INFO, "[Test on %03d Node(s)] "
                    "STARTED ========== %s.%s ==========",
                    g_TestGPUsNum,
                    curr_test_info->test_case_name(), curr_test_info->name());

    ROUTINE_END
}

void KFDBaseComponentTest::TearDown() {
    ROUTINE_START

    for (int i = 0; i < MAX_RENDER_NODES; i++) {
        if (m_RenderNodes[i].fd <= 0)
            continue;

        amdgpu_device_deinitialize(m_RenderNodes[i].device_handle);
        drmClose(m_RenderNodes[i].fd);
    }

    EXPECT_SUCCESS(hsaKmtReleaseSystemProperties());
    EXPECT_SUCCESS(hsaKmtCloseKFD());
    g_baseTest = NULL;

    if (m_pAsm)
        delete m_pAsm;
    m_pAsm = nullptr;

    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    for (int i = 0; i < gpuNodes.size(); i++) {
        if ( m_pAsmGPU[i]) {
            delete  m_pAsmGPU[i];
            m_pAsmGPU[i] = NULL;
        }
    }

    const testing::TestInfo* curr_test_info =
                ::testing::UnitTest::GetInstance()->current_test_info();

    if (curr_test_info->result()->Passed())
        if (g_TestGPUsNum == 1)
            syslog(LOG_INFO, "[Test on Node#%03d] PASSED"
                             "  ========== %s.%s ==========",
                m_NodeInfo.HsaDefaultGPUNode(),
                curr_test_info->test_case_name(), curr_test_info->name());
        else
            syslog(LOG_INFO, "[Tested on %03d Node(s)] PASSED"
                             "  ========== %s.%s ==========",
                g_TestGPUsNum,
                curr_test_info->test_case_name(), curr_test_info->name());

    else
        if (g_TestGPUsNum == 1)
             syslog(LOG_WARNING, "[Test on Node#%03d] FAILED"
                                 "  ========== %s.%s ==========",
                m_NodeInfo.HsaDefaultGPUNode(),
                curr_test_info->test_case_name(), curr_test_info->name());
        else
             syslog(LOG_WARNING, "[Test on %03d Node(s)] FAILED"
                                 "  ========== %s.%s ==========",
                g_TestGPUsNum,
                curr_test_info->test_case_name(), curr_test_info->name());

    closelog();

    m_NodeInfo.Delete();
    ROUTINE_END
}

HSAuint64 KFDBaseComponentTest::GetSysMemSize() {
    const HsaNodeProperties *nodeProps;
    HsaMemoryProperties cpuMemoryProps;
    HSAuint64 systemMemSize = 0;

    /* Find System Memory size */
    for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
        nodeProps = m_NodeInfo.GetNodeProperties(node);
        if (nodeProps != NULL && nodeProps->NumCPUCores > 0 && nodeProps->NumMemoryBanks > 0) {
            /* For NUMA nodes, memory is distributed among different nodes.
             * Compute total system memory size. KFD driver also computes
             * the system memory (si_meminfo) similarly
             */
            EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(node, 1, &cpuMemoryProps));
            systemMemSize += cpuMemoryProps.SizeInBytes;
        }
    }

    return systemMemSize;
}

HSAuint64 KFDBaseComponentTest::GetVramSize(int gpuNode) {
    const HsaNodeProperties *nodeProps;

    /* Find framebuffer size */
    nodeProps = m_NodeInfo.GetNodeProperties(gpuNode);
    EXPECT_NE((const HsaNodeProperties *)NULL, nodeProps);
    HSAuint32 numBanks = nodeProps->NumMemoryBanks;
    HsaMemoryProperties memoryProps[numBanks];
    EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(gpuNode, numBanks, memoryProps));
    unsigned bank;
    for (bank = 0; bank < numBanks; bank++) {
        if (memoryProps[bank].HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE
                || memoryProps[bank].HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC)
            return memoryProps[bank].SizeInBytes;
    }

    return 0;
}

unsigned int KFDBaseComponentTest::GetFamilyIdFromNodeId(unsigned int nodeId)
{
    return  FamilyIdFromNode(m_NodeInfo.GetNodeProperties(nodeId));
}

Assembler* KFDBaseComponentTest::GetAssemblerFromNodeId(unsigned int nodeId)
{
    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(nodeId);

    if (gpuIndex < 0)
        return NULL;

    return m_pAsmGPU[gpuIndex];
}

bool KFDBaseComponentTest::SVMAPISupported_GPU(unsigned int gpuNode) {

    bool supported = m_NodeInfo.GetNodeProperties(gpuNode)
                         ->Capability.ui32.SVMAPISupported;

    if (!supported)
        LOG() << "SVM API not supported on gpuNode" << gpuNode << std::endl;

    return supported;
}


/*
 * Some asics need CWSR workround for DEGFX11_12113
 */
bool KFDBaseComponentTest::NeedCwsrWA(unsigned int nodeId)
{
    bool needCwsrWA = false;
    const HsaNodeProperties *props = m_NodeInfo.GetNodeProperties(nodeId);

    needCwsrWA = props->EngineId.ui32.Major == 11 &&
                  props->EngineId.ui32.Minor == 0 &&
                  (props->EngineId.ui32.Stepping == 0 ||
                   props->EngineId.ui32.Stepping == 1 ||
                   props->EngineId.ui32.Stepping == 2 ||
                   props->EngineId.ui32.Stepping == 5 ||
                   (props->EngineId.ui32.Stepping == 3 && props->NumArrays > 1));

    return needCwsrWA;
}

bool KFDBaseComponentTest::NeedNonPagedWptr(unsigned int nodeId)
{
    return GetFamilyIdFromNodeId(nodeId) >= FAMILY_GFX11;
}

int KFDBaseComponentTest::FindDRMRenderNode(int gpuNode) {
    HsaNodeProperties *nodeProperties;
    _HSAKMT_STATUS status;

    nodeProperties = new HsaNodeProperties();

    status = hsaKmtGetNodeProperties(gpuNode, nodeProperties);
    EXPECT_SUCCESS(status) << "Node index: " << gpuNode << "hsaKmtGetNodeProperties returned status " << status;

    if (status != HSAKMT_STATUS_SUCCESS) {
        delete nodeProperties;
        return -EINVAL;
    }

    int minor = nodeProperties->DrmRenderMinor;
    if (minor < 128) {
        LOG() << "Failed to get minor number " << minor << std::endl;
        return -EINVAL;
    }

    int index = minor - 128;

    if (m_RenderNodes[index].fd == 0) {
        m_RenderNodes[index].fd = drmOpenRender(minor);

        if (m_RenderNodes[index].fd < 0) {
            LOG() << "Failed to open render node" << std::endl;
            return -EINVAL;
        }

        if (amdgpu_device_initialize(m_RenderNodes[index].fd,
                &m_RenderNodes[index].major_version,
                &m_RenderNodes[index].minor_version,
                &m_RenderNodes[index].device_handle) != 0) {
            drmClose(m_RenderNodes[index].fd);
            m_RenderNodes[index].fd = 0;
            LOG() << "Failed to initialize amdgpu device" << std::endl;
            return -EINVAL;
        }
    }

    return index;
}

HsaVersionInfo* KFDBaseComponentTest::Get_Version() {
    return &m_VersionInfo;
}

HsaNodeInfo* KFDBaseComponentTest::Get_NodeInfo() {
    return &m_NodeInfo;
}

HsaMemFlags& KFDBaseComponentTest::GetHsaMemFlags() {
    return m_MemoryFlags;
}

static void* KFDTest_GPU(void* ptr) {

    KFDTEST_GPUPARAMETERS* pKFDTest_GPUParameters = (KFDTEST_GPUPARAMETERS*)ptr;

    Test_Function test_function        = pKFDTest_GPUParameters->pTest_Function;
    KFDTEST_PARAMETERS* pTestParamters = pKFDTest_GPUParameters->pKFDTest_Parameters;

    try {

        test_function(pTestParamters);

    } catch (...) {
        LOG() << "test failed at gpu" << pTestParamters->gpuNode << std::endl;
    }

    pthread_exit(NULL);
}

HSAKMT_STATUS KFDBaseComponentTest::KFDTestMultiGPU(Test_Function test_function,
                                                     unsigned int gpu_num) {

    HSAKMT_STATUS r = HSAKMT_STATUS_SUCCESS;
    int gpu_node;
    int err = 0;
    int i, j;

    KFDTEST_GPUPARAMETERS kfdtest_GpuParameters[gpu_num];
    KFDTEST_PARAMETERS kfdTest_Parameters[gpu_num];
    pthread_t pThreadGPU[gpu_num];

    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();

    for (i = 0; i < gpu_num; i++) {

        gpu_node = gpuNodes.at(i);

        kfdTest_Parameters[i].pTestObject = this;
        kfdTest_Parameters[i].gpuNode = gpu_node;

        kfdtest_GpuParameters[i].pKFDTest_Parameters = &kfdTest_Parameters[i];
        kfdtest_GpuParameters[i].pTest_Function = test_function;

        err = pthread_create(&pThreadGPU[i], NULL, KFDTest_GPU,
                             (void *)&kfdtest_GpuParameters[i]);
        if (err) {
            std::cout << "Thread creation for gpu node failed : " << gpu_node
                      << strerror(err) << std::endl;
            r = HSAKMT_STATUS_ERROR;
            goto err_out;
        }
    }

err_out:
   /* wait threads created successully to finish */
   for (j = 0; j < i; j++) {
       err = pthread_join(pThreadGPU[j], NULL);
       if (err) {
           std::cout << "pthread_join at gpu node failed : " << gpuNodes.at(j)
                     << strerror(err) << std::endl;
           r = HSAKMT_STATUS_ERROR;
       }
   }

   return r;
}

HSAKMT_STATUS KFDBaseComponentTest::KFDTest_Launch(Test_Function test_function) {

    /* test on default GPU only */
    if (g_TestGPUsNum == 1) {
        int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
        if (defaultGPUNode < 0) {
            LOG() << "defaultGPUNode is invalid." << defaultGPUNode <<std::endl;
            return HSAKMT_STATUS_INVALID_PARAMETER;
        }

        KFDTEST_PARAMETERS TestParamters;
        TestParamters.pTestObject = this;
        TestParamters.gpuNode = defaultGPUNode;
        try {
            test_function(&TestParamters);
        } catch (...) {
            LOG() << "test failed at gpu" << defaultGPUNode << std::endl;
        }

        return HSAKMT_STATUS_SUCCESS;
    }

    /* run test_function on all available GPUs */
    HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
    err = KFDTestMultiGPU(test_function, g_TestGPUsNum);

    return err;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDBaseComponentTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
#ifndef __KFD_BASE_COMPONENT_TEST__H__
#define __KFD_BASE_COMPONENT_TEST__H__

#include <gtest/gtest.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <xf86drm.h>
#include <amdgpu.h>
#include <amdgpu_drm.h>
#include <sys/param.h>
#include "hsakmt/hsakmt.h"
#include "OSWrapper.hpp"
#include "KFDTestUtil.hpp"
#include "Assemble.hpp"
#include "ShaderStore.hpp"

#define MAX_GPU 64

typedef struct _KFDTEST_PARAMETERS
{
    void*   pTestObject;
    int     gpuNode;
} KFDTEST_PARAMETERS;

typedef void (* Test_Function)(KFDTEST_PARAMETERS*);

typedef struct _KFDTESTGPU_PARAMETERS
{
    KFDTEST_PARAMETERS*   pKFDTest_Parameters;
    Test_Function         pTest_Function;
} KFDTEST_GPUPARAMETERS;

//  @class KFDBaseComponentTest
class KFDBaseComponentTest : public testing::Test {
 public:
    KFDBaseComponentTest(void) { m_MemoryFlags.Value = 0; }
    ~KFDBaseComponentTest(void) {}

    HSAuint64 GetSysMemSize();
    HSAuint64 GetVramSize(int gpuNode);
#define MAX_RENDER_NODES 64
    struct {
        int fd;
        uint32_t major_version;
        uint32_t minor_version;
        amdgpu_device_handle device_handle;
        uint32_t bdf;
    } m_RenderNodes[MAX_RENDER_NODES];

// @brief Finds DRM Render node corresponding to gpuNode
// @return DRM Render Node if successful or -1 on failure
    int FindDRMRenderNode(int gpuNode);
    unsigned int GetFamilyIdFromNodeId(unsigned int nodeId);
    Assembler* GetAssemblerFromNodeId(unsigned int nodeId);
    bool NeedCwsrWA(unsigned int nodeId);
    bool NeedNonPagedWptr(unsigned int nodeId);
    unsigned int GetFamilyIdFromDefaultNode(){ return m_FamilyId; }

    // @brief Executed before the first test that uses KFDBaseComponentTest.
    static  void SetUpTestCase();
    // @brief Executed after the last test from KFDBaseComponentTest.
    static  void TearDownTestCase();

    HsaVersionInfo*  Get_Version();
    HsaNodeInfo* Get_NodeInfo();
    HsaMemFlags& GetHsaMemFlags();
    bool SVMAPISupported_GPU(unsigned int nodeId);

    inline unsigned int Get_NumCpQueues(int gpuIndex){
        return m_numCpQueues_GPU[gpuIndex];
    }

    inline unsigned int Get_NumSdmaEngines(int gpuIndex){
        return m_numSdmaEngines_GPU[gpuIndex];
    }

    inline unsigned int Get_NumSdmaSdmaQueuesPerEngine(int gpuIndex){
        return m_numSdmaQueuesPerEngine_GPU[gpuIndex];
    }

    inline unsigned int Get_NumSdmaSdmaXgmiEngines(int gpuIndex){
        return m_numSdmaXgmiEngines_GPU[gpuIndex];
    }

    HSAKMT_STATUS KFDTestMultiGPU(Test_Function test_function,
				    unsigned int gpu_num);

    HSAKMT_STATUS KFDTest_Launch(Test_Function test_function);

 protected:
    HsaVersionInfo  m_VersionInfo;
    HsaSystemProperties m_SystemProperties;
    unsigned int m_FamilyId;
    unsigned int m_numCpQueues;
    unsigned int m_numSdmaEngines;
    unsigned int m_numSdmaXgmiEngines;
    unsigned int m_numSdmaQueuesPerEngine;
    HsaMemFlags m_MemoryFlags;
    HsaNodeInfo m_NodeInfo;
    HSAint32 m_xnack;
    Assembler* m_pAsm;

    Assembler* m_pAsmGPU[MAX_GPU];

    unsigned int m_numCpQueues_GPU[MAX_GPU];
    unsigned int m_numSdmaEngines_GPU[MAX_GPU];
    unsigned int m_numSdmaXgmiEngines_GPU[MAX_GPU];
    unsigned int m_numSdmaQueuesPerEngine_GPU[MAX_GPU];

    // @brief Executed before every test that uses KFDBaseComponentTest class and sets all common settings for the tests.
    virtual void SetUp();
    // @brief Executed after every test that uses KFDBaseComponentTest class.
    virtual void TearDown();

    /* TO DO: check all gpu support svm api */
    bool SVMAPISupported() {
        bool supported = m_NodeInfo.HsaDefaultGPUNodeProperties()
                        ->Capability.ui32.SVMAPISupported;
        if (!supported)
            LOG() << "SVM API not supported" << std::endl;
        return supported;
    }

    // Set xnack_override to -1 if parameter is not passed in, to avoid unnecessary code churn
    void SVMSetXNACKMode(int xnack_override = -1) {
        if (!SVMAPISupported())
            return;

        m_xnack = -1;
        HSAKMT_STATUS ret = hsaKmtGetXNACKMode(&m_xnack);
        if (ret != HSAKMT_STATUS_SUCCESS) {
            LOG() << "Failed " << ret << " to get XNACK mode" << std::endl;
            return;
        }

        HSAint32 xnack_on = -1;
        char *hsa_xnack = getenv("HSA_XNACK");

        // HSA_XNACK takes priority over kfdtest parameters
        if (hsa_xnack)
                xnack_on = strncmp(hsa_xnack, "0", 1);
        else if (xnack_override > -1)
                xnack_on = xnack_override;
        else
                return;

	// No need to set XNACK if it's already the current value
	if (xnack_on == m_xnack)
		return;

        ret = hsaKmtSetXNACKMode(xnack_on);
        if (ret != HSAKMT_STATUS_SUCCESS)
            LOG() << "Failed " << ret << " to set XNACK mode " << xnack_on << std::endl;
        else
            LOG() << "Setting XNACK mode to " << xnack_on << std::endl;
    }

    void SVMRestoreXNACKMode() {
        if (!SVMAPISupported())
             return;

        if (m_xnack == -1)
            return;

        hsaKmtSetXNACKMode(m_xnack);
    }
};

extern KFDBaseComponentTest* g_baseTest;
#endif  //  __KFD_BASE_COMPONENT_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDCWSRTest.cpp
================================================
/*
 * Copyright (C) 2015-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <tuple>
#include "KFDCWSRTest.hpp"
#include "Dispatch.hpp"

void KFDCWSRTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDCWSRTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

static inline uint32_t checkCWSREnabled() {
    uint32_t cwsr_enable = 0;

    fscanf_dec("/sys/module/amdgpu/parameters/cwsr_enable", &cwsr_enable);

    return cwsr_enable;
}

/**
 * KFDCWSRTest.BasicTest
 *
 * This test dispatches the PersistentIterateIsa shader, which continuously increments a vgpr for
 * (num_witems / WAVE_SIZE) waves. While this shader is running, dequeue/requeue requests
 * are sent in a loop to trigger CWSRs.
 *
 * This is a paremeterized test. See the INSTANTIATE_TEST_CASE_P below for an explanation
 * on the parameters.
 *
 * This test defines a CWSR threshold. The shader will continuously loop until inputBuf is
 * filled with the known stop value, which occurs once cwsr_thresh CWSRs have been
 * successfully triggered.
 *
 * 4 parameterized tests are defined:
 *
 * KFDCWSRTest.BasicTest/0
 * KFDCWSRTest.BasicTest/1
 * KFDCWSRTest.BasicTest/2
 * KFDCWSRTest.BasicTest/3
 *
 * 0: 1 work-item, CWSR threshold of 10
 * 1: 256 work-items (multi-wave), CWSR threshold of 50
 * 2: 512 work-items (multi-wave), CWSR threshold of 100
 * 3: 1024 work-items (multi-wave), CWSR threshold of 1000
 */

static void BasicTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDCWSRTest* pKFDCWSRTest = (KFDCWSRTest*)pTestParamters->pTestObject;

    const HSAuint32 m_FamilyId = pKFDCWSRTest->GetFamilyIdFromNodeId(gpuNode);

    Assembler* m_pAsm;
    m_pAsm = pKFDCWSRTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    int num_witems = std::get<0>(pKFDCWSRTest->GetParam());
    int cwsr_thresh = std::get<1>(pKFDCWSRTest->GetParam());
    // Increase delay on emulator by this factor.
    const int delayMult = (g_IsEmuMode ? 20 : 1);

    if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) {
        HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true, false, true);
        ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PersistentIterateIsa, isaBuffer.As<char*>()), gpuNode);

        unsigned stopval = 0x1234'5678;
        unsigned outval  = 0x8765'4321;

        // 4B per work-item ==> 1 page per 1024 work-items (take ceiling)
        unsigned bufSize = PAGE_SIZE * ((num_witems / 1024) + (num_witems % 1024 != 0));

        HsaMemoryBuffer inputBuf(bufSize, gpuNode, true, false, false);
        HsaMemoryBuffer outputBuf(bufSize, gpuNode, true, false, false);
        unsigned int* input = inputBuf.As<unsigned int*>();
        unsigned int* output = outputBuf.As<unsigned int*>();
        inputBuf.Fill(0);
        outputBuf.Fill(outval);

        PM4Queue queue;
        ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

        Dispatch dispatch(isaBuffer);
        dispatch.SetArgs(input, output);
        dispatch.SetDim(num_witems, 1, 1);
        dispatch.Submit(queue);

        Delay(5 * delayMult);

        LOG() << "Starting iteration for " << std::dec << num_witems
              << " work items(s) (targeting " << std::dec << cwsr_thresh
              << " CWSRs)" << std::endl;

        for (int num_cwsrs = 0; num_cwsrs < cwsr_thresh; num_cwsrs++) {

            // Send dequeue request
            EXPECT_SUCCESS_GPU(queue.Update(0, BaseQueue::DEFAULT_PRIORITY, false), gpuNode);

            Delay(5 * delayMult);

            // Send requeue request
            EXPECT_SUCCESS_GPU(queue.Update(100, BaseQueue::DEFAULT_PRIORITY, false), gpuNode);

            Delay(50 * delayMult);

            // Check for reg mangling
            for (int i = 0; i < num_witems; i++) {
                EXPECT_EQ_GPU(outval, output[i], gpuNode);
            }
        }

        LOG() << "Successful completion for " << std::dec << num_witems
              << " work item(s) (CWSRs triggered: " << std::dec << cwsr_thresh
              << ")" << std::endl;
        LOG() << "Signalling shader stop..." << std::endl;

        inputBuf.Fill(stopval);

        // Wait for shader to finish or timeout if shader has vm page fault
        EXPECT_EQ_GPU(0, dispatch.SyncWithStatus(180000), gpuNode);
        EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
    } else {
        LOG() << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
    }

}

TEST_P(KFDCWSRTest, BasicTest) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(BasicTest));

    TEST_END
}

/**
 * Instantiates various KFDCWSRTest.BasicTest parameterizations
 * Tuple Format: (num_witems, cwsr_thresh)
 *
 * num_witems:    Defines the number of work-items.
 * cwsr_thresh:   Defines the number of CWSRs to trigger.
 */
INSTANTIATE_TEST_CASE_P(
    , KFDCWSRTest,
    ::testing::Values(
            std::make_tuple(1, 10),     /* Single Wave Test,  10 CWSR Triggers */
            std::make_tuple(256, 50),   /* Multi Wave Test,   50 CWSR Triggers */
            std::make_tuple(512, 100),  /* Multi Wave Test,  100 CWSR Triggers */
            std::make_tuple(1024, 1000) /* Multi Wave Test, 1000 CWSR Triggers */
    )
);

/**
 * KFDCWSRTest.InterruptRestore
 *
 * This test verifies that CP can preempt an HQD while it is restoring a dispatch.
 * Create queue 1.
 * Start a dispatch on queue 1 which runs indefinitely and fills all CU wave slots.
 * Create queue 2, triggering context save on queue 1.
 * Start a dispatch on queue 2 which runs indefinitely and fills all CU wave slots.
 * Create queue 3, triggering context save and restore on queues 1 and 2.
 * Preempt runlist. One or both queues must interrupt context restore to preempt.
 */

static void InterruptRestore(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDCWSRTest* pKFDCWSRTest = (KFDCWSRTest*)pTestParamters->pTestObject;

    const HSAuint32 m_FamilyId = pKFDCWSRTest->GetFamilyIdFromNodeId(gpuNode);

    Assembler* m_pAsm;
    m_pAsm = pKFDCWSRTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

   if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) {
        HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

        ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(InfiniteLoopIsa, isaBuffer.As<char*>()), gpuNode);

        PM4Queue queue1, queue2, queue3;

        ASSERT_SUCCESS_GPU(queue1.Create(gpuNode), gpuNode);

        Dispatch *dispatch1, *dispatch2;

        dispatch1 = new Dispatch(isaBuffer);
        dispatch2 = new Dispatch(isaBuffer);

        dispatch1->SetDim(0x10000, 1, 1);
        dispatch2->SetDim(0x10000, 1, 1);

        dispatch1->Submit(queue1);

        ASSERT_SUCCESS_GPU(queue2.Create(gpuNode), gpuNode);

        dispatch2->Submit(queue2);

        // Give waves time to launch.
        Delay(1);

        ASSERT_SUCCESS_GPU(queue3.Create(gpuNode), gpuNode);

        EXPECT_SUCCESS_GPU(queue1.Destroy(), gpuNode);
        EXPECT_SUCCESS_GPU(queue2.Destroy(), gpuNode);
        EXPECT_SUCCESS_GPU(queue3.Destroy(), gpuNode);

        delete dispatch1;
        delete dispatch2;

    } else {
        LOG() << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
    }
}

TEST_F(KFDCWSRTest, InterruptRestore) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(InterruptRestore));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDCWSRTest.hpp
================================================
/*
 * Copyright (C) 2015-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_CWSR_TEST__H__
#define __KFD_CWSR_TEST__H__

#include <gtest/gtest.h>

#include "PM4Queue.hpp"
#include "KFDBaseComponentTest.hpp"

class KFDCWSRTest : public KFDBaseComponentTest,
                    public ::testing::WithParamInterface<std::tuple<int, int>> {
 public:
    KFDCWSRTest() {}
    ~KFDCWSRTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFD_CWSR_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDDBGTest.cpp
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "BaseDebug.hpp"
#include "KFDDBGTest.hpp"
#include <sys/ptrace.h>
#include <poll.h>
#include "hsakmt/linux/kfd_ioctl.h"
#include "KFDQMTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "Dispatch.hpp"
#include <string>

void KFDDBGTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDDBGTest::TearDown() {
    ROUTINE_START

    /* Reset the user trap handler */
    hsaKmtSetTrapHandler(m_NodeInfo.HsaDefaultGPUNode(), 0, 0, 0, 0);

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

/*
 * To test debug attaching to a spawned process (i.e. attach prior to the tracee
 * opening a KFD device), have the child request the parent to PTRACE attach and
 * wait for the parent to debug attach then allow the child to runtime enable.
 *
 * The following will be exercised:
 * - The KFD shall create a KFD process on behalf of the tracee during debug
 *   attach since the tracee has not opened a KFD device.
 * - Runtime enable on the tracee shall raise an event to the debugging parent
 *   and block until parent has signalled that it has recieved the runtime
 *   enable event.
 * - Tracee should follow a similar hand shake for runtime disable and debug
 *   detach should follow.
 *
 * */
TEST_F(KFDDBGTest, AttachToSpawnedProcess) {
    TEST_START(TESTPROFILE_RUNALL)
    if (m_FamilyId >= FAMILY_AI) {

        if (hsaKmtCheckRuntimeDebugSupport()) {
            LOG() << "Skip test as debug API not supported";
            goto exit;
        }

        pid_t childPid = fork();

        if (childPid == 0) { /* Debugged process */
            uint32_t rDebug;
            int r;

            /* Let parent become the debugger and wait for attach. */
            ptrace(PTRACE_TRACEME);
            raise(SIGSTOP);

            r = hsaKmtOpenKFD();

            if (r != HSAKMT_STATUS_SUCCESS) {
                WARN() << "KFD open failed in debugged process" << std::endl;
                exit(1);
            }

            LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime enable" << std::endl;

            r = hsaKmtRuntimeEnable(&rDebug, true);

            if (r != HSAKMT_STATUS_SUCCESS) {
                WARN() << "Runtime enabled failed" << std::endl;
                exit(1);
            }

            LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime disable and exit" << std::endl;

            hsaKmtRuntimeDisable();

            exit(0);
        } else {
            BaseDebug *debug = new BaseDebug();
            struct kfd_runtime_info r_info;
            memset(&r_info, 0, sizeof(struct kfd_runtime_info));
            uint64_t runtimeMask = KFD_EC_MASK(EC_PROCESS_RUNTIME);
            int childStatus;

            waitpid(childPid, &childStatus, 0);
            while (!WIFSTOPPED(childStatus));

            /* Attach and let new debugged process continue with runtime enable */
            LOG() << std::dec << "Attaching to PID " << childPid  << std::endl;
            ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), childPid, runtimeMask));
            ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_DISABLED);
            ASSERT_EQ(r_info.ttmp_setup, false);

            ptrace(PTRACE_CONT, childPid, NULL, NULL);

            /* Wait and unblock runtime enable */
            ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
            ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
            ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));

            /* Wait and unblock runtime disable */
            ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
            ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
            ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));

            LOG() << std::dec << "Detaching from PID " << childPid << std::endl;
            debug->Detach();

            ptrace(PTRACE_DETACH, childPid, NULL, NULL);

            LOG() << std::dec << "Waiting on PID " << childPid << " to exit" << std::endl;
            waitpid(childPid, &childStatus, 0);
            EXPECT_EQ(WIFEXITED(childStatus), true);
            EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
        }
    } else {
        LOG() << "Skipping test: Test not supported on family ID 0x"
              << m_FamilyId << "." << std::endl;
    }
exit:
    LOG() << std::endl;
    TEST_END
}

/*
 * Unlike AttachToSpawnedProcess, the debug parent will only attach after
 * a non-blocked runtime enable by the tracee.  The parent should expect
 * a status update that the tracee is runtime enabled on debug attach.
 * Cleanup with appropriate runtime disable and debug detach handshake.
 */
TEST_F(KFDDBGTest, AttachToRunningProcess) {
    TEST_START(TESTPROFILE_RUNALL)
    if (m_FamilyId >= FAMILY_AI) {

        if (hsaKmtCheckRuntimeDebugSupport()) {
            LOG() << "Skip test as debug API not supported";
            goto exit;
        }

    pid_t childPid = fork();

    if (childPid == 0) { /* Debugged process */
            uint32_t rDebug;
            int r;

            r = hsaKmtOpenKFD();

            if (r != HSAKMT_STATUS_SUCCESS) {
                WARN() << "KFD open failed in debugged process" << std::endl;
                exit(1);
             }

             LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime enable" << std::endl;

             r = hsaKmtRuntimeEnable(&rDebug, true);
             if (r != HSAKMT_STATUS_SUCCESS) {
                 WARN() << "Runtime enabled failed" << std::endl;
                 exit(1);
             }

             /* Let parent become the debugger and wait for attach. */
             ptrace(PTRACE_TRACEME);
             raise(SIGSTOP);

             LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime disable and exit" << std::endl;

             hsaKmtRuntimeDisable();

             exit(0);
        } else {
            BaseDebug *debug = new BaseDebug();
            struct kfd_runtime_info r_info;
            memset(&r_info, 0, sizeof(struct kfd_runtime_info));
            uint64_t runtimeMask = KFD_EC_MASK(EC_PROCESS_RUNTIME);
            int childStatus;

            waitpid(childPid, &childStatus, 0);
            while (!WIFSTOPPED(childStatus));

            /* Attach to running process and let it continue */
            LOG() << std::dec << "Attaching to PID " << childPid  << std::endl;
            ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), childPid, runtimeMask));
            ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);
            ASSERT_EQ(r_info.ttmp_setup, true);

            ptrace(PTRACE_CONT, childPid, NULL, NULL);

            /* Wait and unblock runtime disable */
            ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
            ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
            ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));

            LOG() << std::dec << "Detaching from PID " << childPid << std::endl;
            debug->Detach();

            ptrace(PTRACE_DETACH, childPid, NULL, NULL);

            LOG() << std::dec << "Waiting on PID " << childPid << " to exit" << std::endl;
            waitpid(childPid, &childStatus, 0);
            EXPECT_EQ(WIFEXITED(childStatus), true);
            EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
        }
    } else {
        LOG() << "Skipping test: Test not supported on family ID 0x"
              << m_FamilyId << "." << std::endl;
    }
exit:
    LOG() << std::endl;
    TEST_END
}

TEST_F(KFDDBGTest, HitTrapEvent) {
    TEST_START(TESTPROFILE_RUNALL)
    if (m_FamilyId >= FAMILY_AI) {
        int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();

        if (hsaKmtCheckRuntimeDebugSupport()) {
            LOG() << "Skip test as debug API not supported";
            goto exit;
        }

        ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

        // create shader and trap bufs then enable 2nd level trap
        HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
        HsaMemoryBuffer trapStatusBuf(PAGE_SIZE, defaultGPUNode, true, false, false);

        HsaMemoryBuffer trap(PAGE_SIZE*2, defaultGPUNode, true, false, true);
        HsaMemoryBuffer tmaBuf(PAGE_SIZE, defaultGPUNode, false, false, false);

        ASSERT_SUCCESS(hsaKmtSetTrapHandler(defaultGPUNode,
                                            trap.As<void *>(),
                                            0x1000,
                                            tmaBuf.As<void*>(),
                                            0x1000));

        // compile and dispatch shader
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(JumpToTrapIsa, isaBuf.As<char*>()));
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(TrapHandlerIsa, trap.As<char*>()));

        uint32_t rDebug;
        ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));

        BaseDebug *debug = new BaseDebug();
        struct kfd_runtime_info r_info;
        memset(&r_info, 0, sizeof(struct kfd_runtime_info));
        ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
        ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);

        PM4Queue queue;
        HsaQueueResource *qResources;
        ASSERT_SUCCESS(queue.Create(defaultGPUNode));

        unsigned int* trapStatus = trapStatusBuf.As<unsigned int*>();
        trapStatus[0] = 0;
        Dispatch *dispatch;
        dispatch = new Dispatch(isaBuf);
        dispatch->SetArgs(&trapStatus[0], NULL);
        dispatch->SetDim(1, 1, 1);

        /* Subscribe to trap events and submit the queue */
        uint64_t trapMask = KFD_EC_MASK(EC_QUEUE_WAVE_TRAP);
        debug->SetExceptionsEnabled(trapMask);
        dispatch->Submit(queue);

        /* Wait for trap event */
        uint32_t QueueId = -1;
        ASSERT_SUCCESS(debug->QueryDebugEvent(&trapMask, NULL, &QueueId, 5000));
        ASSERT_NE(QueueId, -1);
        ASSERT_EQ(trapMask, KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) | KFD_EC_MASK(EC_QUEUE_NEW));

        dispatch->Sync();
        EXPECT_SUCCESS(queue.Destroy());

        ASSERT_NE(trapStatus[0], 0);

        debug->Detach();
        hsaKmtRuntimeDisable();

        delete dispatch;
    } else {
        LOG() << "Skipping test: Test not supported on family ID 0x"
              << m_FamilyId << "." << std::endl;
    }
exit:
    LOG() << std::endl;
    TEST_END
}

TEST_F(KFDDBGTest, HitTrapOnWaveStartEndEvent) {
    TEST_START(TESTPROFILE_RUNALL)
    if (m_FamilyId >= FAMILY_AI) {
        int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();

        if (hsaKmtCheckRuntimeDebugSupport()) {
            LOG() << "Skip test as debug API not supported";
            goto exit;
        }

        ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

        // create shader and trap bufs then enable 2nd level trap
        HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
        HsaMemoryBuffer trap(PAGE_SIZE*2, defaultGPUNode, true, false, true);
        HsaMemoryBuffer tmaBuf(PAGE_SIZE, defaultGPUNode, false, false, false);

        ASSERT_SUCCESS(hsaKmtSetTrapHandler(defaultGPUNode,
                                            trap.As<void *>(),
                                            0x1000,
                                            tmaBuf.As<void*>(),
                                            0x1000));

        // compile and dispatch shader
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(NoopIsa, isaBuf.As<char*>()));
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(TrapHandlerIsa, trap.As<char*>()));

        uint32_t rDebug;
        ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));

        BaseDebug *debug = new BaseDebug();
        struct kfd_runtime_info r_info;
        memset(&r_info, 0, sizeof(struct kfd_runtime_info));
        ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
        ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);

        PM4Queue queue;
        HsaQueueResource *qResources;
        ASSERT_SUCCESS(queue.Create(defaultGPUNode));

        for (int i = 0; i < 2; i++) {
            uint32_t enableMask = !!!(i % 2) ? KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START :
                                               KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
            uint32_t supportedMask = enableMask, reqMask = enableMask;
            debug->SetWaveLaunchOverride(KFD_DBG_TRAP_OVERRIDE_OR, &reqMask, &supportedMask);

            if (!!!(supportedMask & enableMask)) {
                EXPECT_SUCCESS(queue.Destroy());
                debug->Detach();
                hsaKmtRuntimeDisable();
                LOG() << "Skipping test: Trap on start/end override not supported." << std::endl;
                goto exit;
            }

	    // previous set mask
            ASSERT_EQ(reqMask, !!!(i % 2) ? 0 : KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START);

            Dispatch *dispatch;
            dispatch = new Dispatch(isaBuf);
            dispatch->SetArgs(NULL, NULL);
            dispatch->SetDim(1, 1, 1);

            /* Subscribe to trap events and submit the queue */
            uint64_t trapMask = KFD_EC_MASK(EC_QUEUE_WAVE_TRAP);
            debug->SetExceptionsEnabled(trapMask);
            dispatch->Submit(queue);

            /* Wait for trap event */
            uint32_t QueueId = -1;
            ASSERT_SUCCESS(debug->QueryDebugEvent(&trapMask, NULL, &QueueId, 5000));
            ASSERT_NE(QueueId, -1);
            ASSERT_EQ(trapMask, KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) | KFD_EC_MASK(EC_QUEUE_NEW));

            dispatch->Sync();
            delete dispatch;
        }

        EXPECT_SUCCESS(queue.Destroy());

        debug->Detach();
        hsaKmtRuntimeDisable();
    } else {
        LOG() << "Skipping test: Test not supported on family ID 0x"
              << m_FamilyId << "." << std::endl;
    }
exit:
    LOG() << std::endl;
    TEST_END
}

TEST_F(KFDDBGTest, SuspendQueues) {
    TEST_START(TESTPROFILE_RUNALL)
    if (m_FamilyId >= FAMILY_AI) {
        int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();

        if (hsaKmtCheckRuntimeDebugSupport()) {
            LOG() << "Skip test as debug API not supported";
            goto exit;
        }

        ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

        // create shader and trap bufs then enable 2nd level trap
        HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);

        // compile and dispatch shader
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(JumpToTrapIsa, isaBuf.As<char*>()));

        uint32_t rDebug;
        ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));

        BaseDebug *debug = new BaseDebug();
        struct kfd_runtime_info r_info;
        memset(&r_info, 0, sizeof(struct kfd_runtime_info));
        ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
        ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);

        PM4Queue queue;
        HsaQueueResource *qResources;
        ASSERT_SUCCESS(queue.Create(defaultGPUNode));
        qResources = queue.GetResource();
        HSA_QUEUEID Queues[] = { qResources->QueueId };

        Dispatch *dispatch;
        dispatch = new Dispatch(isaBuf);
        dispatch->SetDim(1, 1, 1);
        dispatch->Submit(queue);

        uint32_t NumQueues = 1;
        uint32_t QueueIds[NumQueues];
        struct kfd_queue_snapshot_entry Snapshots[NumQueues];
        memset(Snapshots, 0, NumQueues * sizeof(struct kfd_queue_snapshot_entry));
        ASSERT_SUCCESS(debug->SuspendQueues(&NumQueues, Queues, &QueueIds[0], 0));

        // Suspend should fail as new queues cannot be suspended
        ASSERT_EQ(NumQueues, 0);
        ASSERT_NE(QueueIds[0] & KFD_DBG_QUEUE_INVALID_MASK, 0);

        // Snapshot queue, clear new queue status and suspend successfully.
        ASSERT_SUCCESS(debug->QueueSnapshot(0, (uint64_t)(&(Snapshots[0])), &NumQueues));
        ASSERT_EQ(NumQueues, 1);
        ASSERT_EQ(Snapshots[0].ctx_save_restore_area_size, 0);

        ASSERT_SUCCESS(debug->QueueSnapshot(KFD_EC_MASK(EC_QUEUE_NEW), (uint64_t)(&(Snapshots[0])),
                                            &NumQueues));
        ASSERT_EQ(NumQueues, 1);
        ASSERT_GT(Snapshots[0].ctx_save_restore_area_size, 0);

        ASSERT_SUCCESS(debug->SuspendQueues(&NumQueues, Queues, &QueueIds[0], 0));
        ASSERT_EQ(NumQueues, 1);
        ASSERT_EQ(QueueIds[0] & KFD_DBG_QUEUE_INVALID_MASK, 0);

        // Resume and destroy queue then clean up.
        ASSERT_SUCCESS(debug->ResumeQueues(&NumQueues, Queues, &QueueIds[0]));
        ASSERT_EQ(NumQueues, 1);
        ASSERT_EQ(QueueIds[0] & KFD_DBG_QUEUE_INVALID_MASK, 0);

        EXPECT_SUCCESS(queue.Destroy());

        debug->Detach();
        hsaKmtRuntimeDisable();

        delete dispatch;
    } else {
        LOG() << "Skipping test: Test not supported on family ID 0x"
              << m_FamilyId << "." << std::endl;
    }
exit:
    LOG() << std::endl;
    TEST_END
}

TEST_F(KFDDBGTest, HitMemoryViolation) {
    TEST_START(TESTPROFILE_RUNALL)
    if (m_FamilyId >= FAMILY_AI) {

        int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();

        ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

        if (hsaKmtCheckRuntimeDebugSupport()) {
            LOG() << "Skip test as debug API not supported";
            goto exit;
        }

        pid_t childPid = fork();

        if (childPid == 0) { // Debugged process
            uint32_t rDebug;
            int r;

            // Refresh setup for HSA device and mem buffer use in child
            KFDBaseComponentTest::TearDown();
            KFDBaseComponentTest::SetUp();

            // Let parent become the debugger and wait for attach.
            ptrace(PTRACE_TRACEME);
            raise(SIGSTOP);

            r = hsaKmtRuntimeEnable(&rDebug, true);

            if (r != HSAKMT_STATUS_SUCCESS) {
                WARN() << "Runtime enabled failed" << std::endl;
                exit(1);
            }

            HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
            ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PersistentIterateIsa, isaBuf.As<char*>()));
            PM4Queue queue;
            HsaQueueResource *qResources;
            ASSERT_SUCCESS(queue.Create(defaultGPUNode));

            // Create memory violation event on dispatch
            HsaEvent *vmFaultEvent;
            HsaEventDescriptor eventDesc;
            eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
            eventDesc.NodeId = defaultGPUNode;
            eventDesc.SyncVar.SyncVar.UserData = NULL;
            eventDesc.SyncVar.SyncVarSize = 0;
            r = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);

            if (r != HSAKMT_STATUS_SUCCESS) {
                WARN() << "Creating VM fault event failed" << std::endl;
                exit(1);
            }

            Dispatch dispatch(isaBuf);
            dispatch.SetDim(1, 1, 1);
            dispatch.SetPriv(false); //Override GFX11 CWSR WA
            dispatch.Submit(queue);

            // Queue immediately dies so halt process for tracer device inspection.
            raise(SIGSTOP);

            exit(0);
        } else {
            BaseDebug *debug = new BaseDebug();
            struct kfd_runtime_info r_info;
            memset(&r_info, 0, sizeof(struct kfd_runtime_info));
            uint64_t runtimeMask = KFD_EC_MASK(EC_PROCESS_RUNTIME);
            uint64_t memViolMask = KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
            uint64_t subscribeMask = runtimeMask | memViolMask;
            uint64_t queryMask = 0;
            int childStatus;

            waitpid(childPid, &childStatus, 0);
            while (!WIFSTOPPED(childStatus));

            ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), childPid, subscribeMask));
            ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_DISABLED);
            ASSERT_EQ(r_info.ttmp_setup, false);

            ptrace(PTRACE_CONT, childPid, NULL, NULL);

            // Wait and unblock runtime enable
            ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
            ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
            ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));

            // Wait for memory violation
            uint32_t deviceId = -1;
            ASSERT_SUCCESS(debug->QueryDebugEvent(&queryMask, &deviceId, NULL, 5000));
            ASSERT_NE(deviceId, -1);
            ASSERT_EQ(queryMask, memViolMask);

            const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
            uint32_t snapshotSize = gpuNodes.size();
            struct kfd_dbg_device_info_entry deviceInfo[snapshotSize];
            memset(deviceInfo, 0, snapshotSize * sizeof(struct kfd_dbg_device_info_entry));

            // Check device snapshot aligns with memory violation on target device.
            ASSERT_SUCCESS(debug->DeviceSnapshot(memViolMask, (uint64_t)(&deviceInfo[0]),
                                                 &snapshotSize));
            ASSERT_EQ(snapshotSize, gpuNodes.size());
            for (int i = 0; i < snapshotSize; i++) {
                if (deviceInfo[i].exception_status & memViolMask) {
                    ASSERT_EQ(deviceInfo[i].gpu_id, deviceId);
                    break;
                }
            }
            waitpid(childPid, &childStatus, 0);
            while (!WIFSTOPPED(childStatus));

            // Assume tracee queue has died and halted process
            ptrace(PTRACE_CONT, childPid, NULL, NULL);

            debug->Detach();

            ptrace(PTRACE_DETACH, childPid, NULL, NULL);

            waitpid(childPid, &childStatus, 0);
            EXPECT_EQ(WIFEXITED(childStatus), true);
            EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
        }
    } else {
        LOG() << "Skipping test: Test not supported on family ID 0x"
              << m_FamilyId << "." << std::endl;
    }
exit:
    LOG() << std::endl;
    TEST_END
}

TEST_F(KFDDBGTest, HitAddressWatch) {
    TEST_START(TESTPROFILE_RUNALL)
    if (m_FamilyId >= FAMILY_VI) {
        int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();

        if (hsaKmtCheckRuntimeDebugSupport()) {
            LOG() << "Skip test as debug API not supported";
            goto exit;
        }

        ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
        HsaNodeProperties nodeProps;
        ASSERT_SUCCESS(hsaKmtGetNodeProperties(defaultGPUNode, &nodeProps));

        HsaMemoryBuffer readerBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
        HsaMemoryBuffer writerBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
        HsaMemoryBuffer trap(PAGE_SIZE*2, defaultGPUNode, true, false, true);
        HsaMemoryBuffer tmaBuf(PAGE_SIZE, defaultGPUNode, false, false, false);

        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WatchReadIsa, readerBuf.As<char*>()));
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WatchWriteIsa, writerBuf.As<char*>()));
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(TrapHandlerIsa, trap.As<char*>()));
        ASSERT_SUCCESS(hsaKmtSetTrapHandler(defaultGPUNode,
                                            trap.As<void *>(),
                                            0x1000,
                                            tmaBuf.As<void*>(),
                                            0x1000));

        uint32_t rDebug;
        ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));

        struct kfd_runtime_info r_info;
        memset(&r_info, 0, sizeof(struct kfd_runtime_info));
        BaseDebug *debug = new BaseDebug();
        ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
        ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);

        const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
        uint32_t numDevices = gpuNodes.size();
        struct kfd_dbg_device_info_entry deviceInfo[numDevices];
        memset(deviceInfo, 0, numDevices * sizeof(struct kfd_dbg_device_info_entry));
        ASSERT_SUCCESS(debug->DeviceSnapshot(0, (uint64_t)(&deviceInfo[0]), &numDevices));
        ASSERT_EQ(numDevices, gpuNodes.size());
        bool is_precise = nodeProps.Capability.ui32.PreciseMemoryOperationsSupported;

        if (is_precise) {
            uint32_t trapFlags = KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
            ASSERT_SUCCESS(debug->SetFlags(&trapFlags));
        }

        uint32_t enableMask = KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
        uint32_t supportedMask = enableMask;
        ASSERT_SUCCESS(debug->SetWaveLaunchOverride(KFD_DBG_TRAP_OVERRIDE_OR,
                                                    &enableMask,
                                                    &supportedMask));
        ASSERT_NE(supportedMask & KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH, 0);
        ASSERT_EQ(enableMask & KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH, 0); // previous set mask

        PM4Queue queue;
        ASSERT_SUCCESS(queue.Create(defaultGPUNode));
        const uint32_t watchMask = -1 & UINT_MAX;

        HsaMemoryBuffer targetBuf(PAGE_SIZE, defaultGPUNode, true, false, false);
        HsaMemoryBuffer resultBuf(PAGE_SIZE, defaultGPUNode, true, false, false);
        unsigned int *target = targetBuf.As<unsigned int*>();
        unsigned int *result = resultBuf.As<unsigned int*>();

        for (int mode = KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ;
                 mode < KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL; mode++) {

            // atomics may not be supported on all devices so skip for now.
            if (mode != KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ &&
                mode != KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD)
                continue;

            uint32_t watchId = -1;
            ASSERT_SUCCESS(debug->SetAddressWatch((uint64_t)(&target[0]), mode,
                                                  watchMask, deviceInfo[0].gpu_id, &watchId));
            ASSERT_EQ(watchId, 0);

            const HsaMemoryBuffer &shaderBuf =
                mode == KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ ? readerBuf : writerBuf;
            uint32_t preciseMask = 0x1;
            uint32_t watchStsMask = m_FamilyId >= FAMILY_GFX12 ? 0x1 : 0x80;
            result[0] = preciseMask;
            Dispatch dispatch(shaderBuf);
            dispatch.SetDim(1, 1, 1);
            dispatch.SetArgs(&target[0], &result[0]);
            dispatch.SetPriv(false); // Override GFX11 CWSR WA
            dispatch.Submit(queue);
            dispatch.Sync();

            /*
             * result[0] contains both the HW watch status result mask and the
             * precise memory operation value check (precise = 1, non-precise = 2)
             * For devices before GFX12, these masks did not bit wise overlap and
             * are added into result[0].
             * In GFX12 and above, they overlap and must be subtracted instead of masked
             * to assert the correct value.
             */
            if (m_FamilyId < FAMILY_GFX12) {
                ASSERT_EQ(result[0] & watchStsMask, watchStsMask);

                if (is_precise)
                    ASSERT_EQ(result[0] & preciseMask, preciseMask);
            } else {
                uint32_t maskCheck = result[0] - watchStsMask - preciseMask;
                ASSERT_EQ(maskCheck, is_precise ? 0 : 1);
            }

            ASSERT_SUCCESS(debug->ClearAddressWatch(deviceInfo[0].gpu_id, watchId));
            resultBuf.Fill(0);
            targetBuf.Fill(0);
        }

        ASSERT_SUCCESS(queue.Destroy());
        debug->Detach();
        hsaKmtRuntimeDisable();
    } else {
        LOG() << "Skipping test: Test not supported on family ID 0x"
              << m_FamilyId << "." << std::endl;
    }
exit:
    LOG() << std::endl;
    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDDBGTest.hpp
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_DBG_TEST__H__
#define __KFD_DBG_TEST__H__

#include <gtest/gtest.h>

#include "KFDBaseComponentTest.hpp"

class KFDDBGTest : public KFDBaseComponentTest {
 public:
    KFDDBGTest() {}
    ~KFDDBGTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFD_DBG_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDEventTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <math.h>
#include <limits.h>

#include "KFDEventTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"


void KFDEventTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    for (int i = 0; i < MAX_GPU; i++)
        m_pHsaEventGPU[i] = NULL;

    ROUTINE_END
}

void KFDEventTest::TearDown() {
    ROUTINE_START

    // Not all tests create an event, destroy only if there is one
    for (int i = 0; i < MAX_GPU; i++) {
        if (m_pHsaEventGPU[i] != NULL) {
            // hsaKmtDestroyEvent moved to TearDown to make sure it is being called
            EXPECT_SUCCESS(hsaKmtDestroyEvent(m_pHsaEventGPU[i]));
        }
    }

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

static void CreateDestroyEvent(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDEventTest* pKFDEventTest = (KFDEventTest*)pTestParamters->pTestObject;

    int gpuIndex = pKFDEventTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HsaEvent* m_pHsaEvent = pKFDEventTest->m_pHsaEventGPU[gpuIndex];

    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &m_pHsaEvent), gpuNode);
    EXPECT_NE_GPU(0, m_pHsaEvent->EventData.HWData2, gpuNode);

}

TEST_F(KFDEventTest, CreateDestroyEvent) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CreateDestroyEvent));

    // Destroy event is being called in test TearDown
    TEST_END;
}


static void CreateMaxEvents(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDEventTest* pKFDEventTest = (KFDEventTest*)pTestParamters->pTestObject;

    static const unsigned int MAX_EVENT_NUMBER = 256;

    HsaEvent* pHsaEvent[MAX_EVENT_NUMBER];

    unsigned int i = 0;

    for (i = 0; i < MAX_EVENT_NUMBER; i++) {
        pHsaEvent[i] = NULL;
        ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &pHsaEvent[i]), gpuNode);
    }

    for (i = 0; i < MAX_EVENT_NUMBER; i++) {
        EXPECT_SUCCESS_GPU(hsaKmtDestroyEvent(pHsaEvent[i]), gpuNode);
    }
}

TEST_F(KFDEventTest, CreateMaxEvents) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CreateMaxEvents));

    TEST_END;
}

static void SignalEvent(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDEventTest* pKFDEventTest = (KFDEventTest*)pTestParamters->pTestObject;

    int gpuIndex = pKFDEventTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HsaEvent* m_pHsaEvent = pKFDEventTest->m_pHsaEventGPU[gpuIndex];
    HSAuint32 m_FamilyId = pKFDEventTest->GetFamilyIdFromNodeId(gpuNode);

    PM4Queue queue;
    HsaEvent *tmp_event;

    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, gpuNode, &tmp_event));

    /* Intentionally let event id for m_pHsaEvent be non zero */
    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, gpuNode, &m_pHsaEvent));
    ASSERT_NE(0, m_pHsaEvent->EventData.HWData2);

    ASSERT_SUCCESS(queue.Create(gpuNode));

    /* From gfx9 onward, m_pHsaEvent->EventId will also be passed to int_ctxid in
     * the Release Mem packet, which is used as context id in ISR.
     */
    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
                    m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));

    queue.Wait4PacketConsumption();

    EXPECT_SUCCESS_GPU(hsaKmtWaitOnEvent(m_pHsaEvent, g_TestTimeOut), gpuNode);

    EXPECT_SUCCESS_GPU(hsaKmtDestroyEvent(tmp_event), gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDEventTest, SignalEvent) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SignalEvent));

    TEST_END;
}

/* test event signaling with event age enabled wait */
static void SignalEventExt(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDEventTest* pKFDEventTest = (KFDEventTest*)pTestParamters->pTestObject;

    int gpuIndex = pKFDEventTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HsaEvent* m_pHsaEvent = pKFDEventTest->m_pHsaEventGPU[gpuIndex];
    HSAuint32 m_FamilyId = pKFDEventTest->GetFamilyIdFromNodeId(gpuNode);

    PM4Queue queue;
    HsaEvent *tmp_event;
    uint64_t event_age;

    if (pKFDEventTest->Get_Version()->KernelInterfaceMajorVersion == 1 &&
        pKFDEventTest->Get_Version()->KernelInterfaceMinorVersion < 14) {
        LOG() << "event age tracking isn't supported in KFD. Exiting." << std::endl;
        return;
    }

    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &tmp_event), gpuNode);

    /* Intentionally let event id for m_pHsaEvent be non zero */
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &m_pHsaEvent), gpuNode);
    ASSERT_NE_GPU(0, m_pHsaEvent->EventData.HWData2, gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    /* 1. event_age gets incremented every time when the event signals */
    event_age = 1;
    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
                    m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
    EXPECT_SUCCESS_GPU(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age), gpuNode);
    ASSERT_EQ_GPU(event_age, 2, gpuNode);
    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
                    m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
    EXPECT_SUCCESS_GPU(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age), gpuNode);
    ASSERT_EQ_GPU(event_age, 3, gpuNode);

    /* 2. event wait return without sleep after the event signals */
    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
                    m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
    sleep(1); /* wait for event signaling */
    EXPECT_SUCCESS_GPU(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age), gpuNode);
    ASSERT_EQ_GPU(event_age, 4, gpuNode);

    /* 3. signaling from CPU */
    hsaKmtSetEvent(m_pHsaEvent);
    EXPECT_SUCCESS_GPU(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age), gpuNode);
    ASSERT_EQ_GPU(event_age, 5, gpuNode);

    /* 4. when event_age is 0, hsaKmtWaitOnEvent_Ext always sleeps */
    event_age = 0;
    ASSERT_EQ_GPU(HSAKMT_STATUS_WAIT_TIMEOUT, hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age), gpuNode);

    /* 5. when event_age is 0, it always stays 0 after the event signals */
    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
                    m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
    EXPECT_SUCCESS(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age));
    ASSERT_EQ(event_age, 0);

    EXPECT_SUCCESS(hsaKmtDestroyEvent(tmp_event));

    EXPECT_SUCCESS(queue.Destroy());

}

TEST_F(KFDEventTest, SignalEventExt) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SignalEventExt));

    TEST_END;
}

static uint64_t gettime() {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return ((int64_t)ts.tv_sec) * 1000 * 1000 * 1000 + ts.tv_nsec;
}

static inline double pow2_round_up(int num) {
    return pow(2, ceil(log(num)/log(2)));
}

class QueueAndSignalBenchmark {
 private:
    static const int HISTORY_SIZE = 100;

    int mNumEvents;
    int mHistorySlot;
    uint64_t mTimeHistory[HISTORY_SIZE];
    uint64_t mLatHistory[HISTORY_SIZE];

 public:
    QueueAndSignalBenchmark(int events) : mNumEvents(events), mHistorySlot(0) {
        memset(mTimeHistory, 0, sizeof(mTimeHistory));
        memset(mLatHistory, 0, sizeof(mLatHistory));
    }

    int queueAndSignalEvents(int node, int eventCount, uint64_t &time, uint64_t &latency) {
        int r;
        uint64_t startTime;
        PM4Queue queue;

        unsigned int familyId = g_baseTest->GetFamilyIdFromNodeId(node);
        HsaEvent** pHsaEvent = reinterpret_cast<HsaEvent**>(calloc(eventCount, sizeof(HsaEvent*)));
        size_t packetSize = PM4ReleaseMemoryPacket(familyId, false, 0, 0).SizeInBytes();
        int qSize = fmax(PAGE_SIZE, pow2_round_up(packetSize*eventCount + 1));

        time = 0;

        r = queue.Create(node, qSize);
        if (r != HSAKMT_STATUS_SUCCESS)
            goto exit;

        for (int i = 0; i < eventCount; i++) {
            r = CreateQueueTypeEvent(false, false, node, &pHsaEvent[i]);
            if (r != HSAKMT_STATUS_SUCCESS)
                goto exit;

            queue.PlacePacket(PM4ReleaseMemoryPacket(familyId, false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId));
        }

        startTime = gettime();
        queue.SubmitPacket();
        for (int i = 0; i < eventCount; i++) {
            r = hsaKmtWaitOnEvent(pHsaEvent[i], g_TestTimeOut);

            if (r != HSAKMT_STATUS_SUCCESS)
                goto exit;

            if (i == 0)
                latency = gettime() - startTime;
        }
        time = gettime() - startTime;

exit:
        for (int i = 0; i < eventCount; i++) {
            if (pHsaEvent[i])
                hsaKmtDestroyEvent(pHsaEvent[i]);
        }
        queue.Destroy();

        return r;
    }

    void run(int node) {
        int r = 0;
        uint64_t time = 0, latency = 0;
        uint64_t avgLat = 0, avgTime = 0;
        uint64_t minTime = ULONG_MAX, maxTime = 0;
        uint64_t minLat = ULONG_MAX, maxLat = 0;

        ASSERT_EQ(queueAndSignalEvents(node, mNumEvents, time, latency), HSAKMT_STATUS_SUCCESS);

        mTimeHistory[mHistorySlot%HISTORY_SIZE] = time;
        mLatHistory[mHistorySlot%HISTORY_SIZE] = latency;

        for (int i = 0; i < HISTORY_SIZE; i++) {
            minTime = mTimeHistory[i] < minTime ? mTimeHistory[i] : minTime;
            maxTime = mTimeHistory[i] > maxTime ? mTimeHistory[i] : maxTime;
            avgTime += mTimeHistory[i];

            minLat = mLatHistory[i] < minLat ? mLatHistory[i] : minLat;
            maxLat = mLatHistory[i] > maxLat ? mLatHistory[i] : maxLat;
            avgLat += mLatHistory[i];
        }

        avgTime /= HISTORY_SIZE;
        avgLat /= HISTORY_SIZE;
        mHistorySlot++;

        printf("\033[KEvents: %d History: %d/%d\n", mNumEvents, mHistorySlot, HISTORY_SIZE);
        printf("\033[KMin Latency: %f ms\n", (float)minLat/1000000);
        printf("\033[KMax Latency: %f ms\n", (float)maxLat/1000000);
        printf("\033[KAvg Latency: %f ms\n", (float)avgLat/1000000);
        printf("\033[K   Min Rate: %f IH/ms\n", ((float)mNumEvents)/maxTime*1000000);
        printf("\033[K   Max Rate: %f IH/ms\n", ((float)mNumEvents)/minTime*1000000);
        printf("\033[K   Avg Rate: %f IH/ms\n", ((float)mNumEvents)/avgTime*1000000);
    }
};

TEST_F(KFDEventTest, DISABLED_MeasureInterruptConsumption) {
    TEST_START(TESTPROFILE_RUNALL);
    QueueAndSignalBenchmark latencyBench(128);
    QueueAndSignalBenchmark sustainedBench(4095);

    printf("\033[2J");
    while (true) {
        printf("\033[H");
        printf("--------------------------\n");
        latencyBench.run(m_NodeInfo.HsaDefaultGPUNode());
        printf("--------------------------\n");
        sustainedBench.run(m_NodeInfo.HsaDefaultGPUNode());
        printf("--------------------------\n");
    }

    TEST_END;
}

static void SignalMaxEvents(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDEventTest* pKFDEventTest = (KFDEventTest*)pTestParamters->pTestObject;

    static const unsigned int MAX_EVENT_NUMBER = 4095;
    uint64_t time, latency;

    QueueAndSignalBenchmark maxEventTest(MAX_EVENT_NUMBER);
    maxEventTest.queueAndSignalEvents(gpuNode, MAX_EVENT_NUMBER,
            time, latency);
}

TEST_F(KFDEventTest, SignalMaxEvents) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SignalMaxEvents));

    TEST_END;
}

static void SignalMultipleEventsWaitForAll(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDEventTest* pKFDEventTest = (KFDEventTest*)pTestParamters->pTestObject;

    int gpuIndex = pKFDEventTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HSAuint32 m_FamilyId = pKFDEventTest->GetFamilyIdFromNodeId(gpuNode);

    static const unsigned int EVENT_NUMBER = 64;  // 64 is the maximum for hsaKmtWaitOnMultipleEvents
    static const unsigned int WAIT_BETWEEN_SUBMISSIONS_MS = 50;

    HsaEvent* pHsaEvent[EVENT_NUMBER];
    unsigned int i = 0;

    for (i = 0; i < EVENT_NUMBER; i++) {
        pHsaEvent[i] = NULL;
        ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &pHsaEvent[i]), gpuNode);
    }

    PM4Queue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    unsigned int pktSizeDwords = 0;
    for (i = 0; i < EVENT_NUMBER; i++) {
        queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false, pHsaEvent[i]->EventData.HWData2,
                                   pHsaEvent[i]->EventId));
        queue.Wait4PacketConsumption();

        Delay(WAIT_BETWEEN_SUBMISSIONS_MS);
    }

    EXPECT_SUCCESS_GPU(hsaKmtWaitOnMultipleEvents(pHsaEvent, EVENT_NUMBER, true, g_TestTimeOut), gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    for (i = 0; i < EVENT_NUMBER; i++)
        EXPECT_SUCCESS_GPU(hsaKmtDestroyEvent(pHsaEvent[i]), gpuNode);
}

TEST_F(KFDEventTest, SignalMultipleEventsWaitForAll) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SignalMultipleEventsWaitForAll));

    TEST_END;
}

/* Send an event interrupt with 0 context ID. Test that KFD handles it
 * gracefully and with good performance. On current GPUs and firmware it
 * should be handled on a fast path.
 */
static void SignalInvalidEvent(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDEventTest* pKFDEventTest = (KFDEventTest*)pTestParamters->pTestObject;

    int gpuIndex = pKFDEventTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HsaEvent* m_pHsaEvent = pKFDEventTest->m_pHsaEventGPU[gpuIndex];
    HSAuint32 m_FamilyId = pKFDEventTest->GetFamilyIdFromNodeId(gpuNode);

    PM4Queue queue;

    // Create some dummy events, to make the slow path a bit slower
    static const unsigned int EVENT_NUMBER = 64;//4094;
    HsaEvent* pHsaEvent[EVENT_NUMBER];
    for (int i = 0; i < EVENT_NUMBER; i++) {
        pHsaEvent[i] = NULL;
        ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &pHsaEvent[i]), gpuNode);
    }

    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &m_pHsaEvent), gpuNode);
    ASSERT_NE_GPU(0, m_pHsaEvent->EventData.HWData2, gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    static const unsigned int REPS = 2000;
    HSAuint64 duration[REPS];
    HSAuint64 total = 0, min = 1000000, max = 0;
    for (int i = 0; i < REPS; i++) {
        // Invalid signal packet
        queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, false, 0, 0));
        // Submit valid signal packet
        queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
                        m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));

        HSAuint64 startTime = GetSystemTickCountInMicroSec();
        queue.SubmitPacket();

        EXPECT_SUCCESS_GPU(hsaKmtWaitOnEvent(m_pHsaEvent, g_TestTimeOut), gpuNode);

        duration[i] = GetSystemTickCountInMicroSec() - startTime;
        total += duration[i];
        if (duration[i] < min)
            min = duration[i];
        if (duration[i] > max)
            max = duration[i];
    }

    double mean = (double)(total - min - max) / (REPS - 2);
    double variance = 0;
    bool skippedMin = false, skippedMax = false;
    HSAuint64 newMin = max, newMax = min;
    for (int i = 0; i < REPS; i++) {
        if (!skippedMin && duration[i] == min) {
            skippedMin = true;
            continue;
        }
        if (!skippedMax && duration[i] == max) {
            skippedMax = true;
            continue;
        }
        if (duration[i] < newMin)
            newMin = duration[i];
        if (duration[i] > newMax)
            newMax = duration[i];
        double diff = mean - duration[i];
        variance += diff*diff;
    }
    variance /= REPS - 2;
    double stdDev = sqrt(variance);

    LOG() << "Time for event handling (min/avg/max [std.dev] in us) " << std::dec
          << newMin << "/" << mean << "/" << newMax << " [" << stdDev << "]\n";

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    for (int i = 0; i < EVENT_NUMBER; i++)
        EXPECT_SUCCESS_GPU(hsaKmtDestroyEvent(pHsaEvent[i]), gpuNode);

}

TEST_F(KFDEventTest, SignalInvalidEvent) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SignalInvalidEvent));

    TEST_END;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDEventTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDBaseComponentTest.hpp"

#ifndef __KFD_EVENT_TEST__H__
#define __KFD_EVENT_TEST__H__

class KFDEventTest :  public KFDBaseComponentTest {
 public:
    KFDEventTest(void) {}
    ~KFDEventTest(void) {}

    // @brief Executed before every test in KFDEventTest.
    virtual void SetUp();
    // @brief Executed after every test in KFDEventTest.
    virtual void TearDown();

    HsaEvent* m_pHsaEventGPU[MAX_GPU];
 protected:
    static const unsigned int EVENT_TIMEOUT = 5000;  // 5 seconds
};

#endif  // __KFD_EVENT_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDEvictTest.cpp
================================================
/*
 * Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <vector>
#include <string>
#include "KFDEvictTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"

#define N_PROCESSES             (2)     /* Number of processes running in parallel, must be at least 2 */
#define ALLOCATE_BUF_SIZE_MB    (64)
#define ALLOCATE_RETRY_TIMES    (3)
#define MAX_WAVEFRONTS          (512)

#define SDMA_NOP  0x0

void KFDEvictTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDEvictTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

void KFDEvictTest::AllocBuffers(bool m_IsParent, HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize,
                                std::vector<void *> &pBuffers) {
    HSAuint64   totalMB;

    totalMB = N_PROCESSES*count*(vramBufSize>>20);
    if (m_IsParent) {
        LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(="
              << totalMB << ")MB VRAM in KFD" << std::endl;
    }

    HsaMemMapFlags mapFlags = {0};
    HSAKMT_STATUS ret;
    HSAuint32 retry = 0;

    m_Flags.Value = 0;
    m_Flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    m_Flags.ui32.HostAccess = 0;
    m_Flags.ui32.NonPaged = 1;

    for (HSAuint32 i = 0; i < count; ) {
        ret = hsaKmtAllocMemory(defaultGPUNode, vramBufSize, m_Flags, &m_pBuf);
        if (ret == HSAKMT_STATUS_SUCCESS) {
            if (hsakmt_is_dgpu()) {
                if (hsaKmtMapMemoryToGPUNodes(m_pBuf, vramBufSize, NULL,
                       mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)) == HSAKMT_STATUS_ERROR) {
                    EXPECT_SUCCESS(hsaKmtFreeMemory(m_pBuf, vramBufSize));
                    LOG() << "Map failed for " << i << "/" << count << " buffer. Retrying allocation" << std::endl;
                    goto retry;
                }
            }
            pBuffers.push_back(m_pBuf);

            i++;
            retry = 0;
            continue;
        }
retry:
        if (retry++ > ALLOCATE_RETRY_TIMES) {
            break;
        }

        /* Wait for 1 second to try allocate again */
        sleep(1);
    }
}

void KFDEvictTest::FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize) {
    for (HSAuint32 i = 0; i < pBuffers.size(); i++) {
        m_pBuf = pBuffers[i];
        if (m_pBuf != NULL) {
            if (hsakmt_is_dgpu())
                EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(m_pBuf));
            EXPECT_SUCCESS(hsaKmtFreeMemory(m_pBuf, vramBufSize));
        }
    }
}

void KFDEvictTest::AllocAmdgpuBo(bool m_IsParent, int rn, HSAuint64 vramBufSize, amdgpu_bo_handle &handle) {
    struct amdgpu_bo_alloc_request alloc;

    alloc.alloc_size = vramBufSize / N_PROCESSES;
    alloc.phys_alignment = PAGE_SIZE;
    alloc.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
    alloc.flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;

    if (m_IsParent) {
        LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(="
              << (vramBufSize >> 20)  << ")MB VRAM in GFX" << std::endl;
    }
    ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle));
}

void KFDEvictTest::FreeAmdgpuBo(amdgpu_bo_handle handle) {
    ASSERT_EQ(0, amdgpu_bo_free(handle));
}

static int amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
                                   unsigned alignment, unsigned heap, uint64_t flags,
                                   amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
                                   amdgpu_va_handle *va_handle) {
    struct amdgpu_bo_alloc_request request = {};
    amdgpu_bo_handle buf_handle;
    amdgpu_va_handle handle;
    uint64_t vmc_addr;
    int r;

    request.alloc_size = size;
    request.phys_alignment = alignment;
    request.preferred_heap = heap;
    request.flags = flags;

    r = amdgpu_bo_alloc(dev, &request, &buf_handle);
    if (r)
        return r;

    r = amdgpu_va_range_alloc(dev,
                  amdgpu_gpu_va_range_general,
                  size, alignment, 0, &vmc_addr,
                  &handle, 0);
    if (r)
        goto error_va_alloc;

    r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP);
    if (r)
        goto error_va_map;

    r = amdgpu_bo_cpu_map(buf_handle, cpu);
    if (r)
        goto error_cpu_map;

    *bo = buf_handle;
    *mc_address = vmc_addr;
    *va_handle = handle;

    return 0;

error_cpu_map:
    amdgpu_bo_cpu_unmap(buf_handle);

error_va_map:
    amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);

error_va_alloc:
    amdgpu_bo_free(buf_handle);
    return r;
}

static inline int amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
                                           uint64_t mc_addr, uint64_t size) {
    amdgpu_bo_cpu_unmap(bo);
    amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP);
    amdgpu_va_range_free(va_handle);
    amdgpu_bo_free(bo);

    return 0;
}

static inline int amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1,
                                     amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list) {
    amdgpu_bo_handle resources[] = {bo1, bo2};

    return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
}

void KFDEvictTest::AmdgpuCommandSubmissionSdmaNop(int rn, amdgpu_bo_handle handle,
                                                     PM4Queue *computeQueue = NULL) {
    amdgpu_context_handle contextHandle;
    amdgpu_bo_handle ibResultHandle;
    void *ibResultCpu;
    uint64_t ibResultMcAddress;
    struct amdgpu_cs_request ibsRequest;
    struct amdgpu_cs_ib_info ibInfo;
    struct amdgpu_cs_fence fenceStatus;
    amdgpu_bo_list_handle boList;
    amdgpu_va_handle vaHandle;
    uint32_t *ptr;
    uint32_t expired;
    unsigned failCount = 0;

    ASSERT_EQ(0, amdgpu_cs_ctx_create(m_RenderNodes[rn].device_handle, &contextHandle));

    ASSERT_EQ(0, amdgpu_bo_alloc_and_map(m_RenderNodes[rn].device_handle,
        PAGE_SIZE, PAGE_SIZE,
        AMDGPU_GEM_DOMAIN_GTT, 0,
        &ibResultHandle, &ibResultCpu,
        &ibResultMcAddress, &vaHandle));

    ASSERT_EQ(0, amdgpu_get_bo_list(m_RenderNodes[rn].device_handle, ibResultHandle, handle,
        &boList));

    /* Fill Nop cammands in IB */
    ptr = reinterpret_cast<uint32_t *>(ibResultCpu);
    for (int i = 0; i < 16; i++)
        ptr[i] = SDMA_NOP;

    memset(&ibInfo, 0, sizeof(struct amdgpu_cs_ib_info));
    ibInfo.ib_mc_address = ibResultMcAddress;
    ibInfo.size = 16;

    memset(&ibsRequest, 0, sizeof(struct amdgpu_cs_request));
    ibsRequest.ip_type = AMDGPU_HW_IP_DMA;
    ibsRequest.ring = 0;
    ibsRequest.number_of_ibs = 1;
    ibsRequest.ibs = &ibInfo;
    ibsRequest.resources = boList;
    ibsRequest.fence_info.handle = NULL;

    memset(&fenceStatus, 0, sizeof(struct amdgpu_cs_fence));
    for (int i = 0; i < 100; i++) {
        int r = amdgpu_cs_submit(contextHandle, 0, &ibsRequest, 1);

        Delay(50);
        if (r) {
            failCount++;
            ASSERT_LE(failCount, 2);
            continue;
        }

        fenceStatus.context = contextHandle;
        fenceStatus.ip_type = AMDGPU_HW_IP_DMA;
        fenceStatus.ip_instance = 0;
        fenceStatus.ring = 0;
        fenceStatus.fence = ibsRequest.seq_no;

        EXPECT_EQ(0, amdgpu_cs_query_fence_status(&fenceStatus,
                                                  g_TestTimeOut*1000000,
                                                  0, &expired));
        if (!expired)
            WARN() << "CS did not signal completion" << std::endl;

        /* If a compute queue is given, submit a short compute job
         * every 16 loops (about once a second). If the process was
         * evicted, restore can take quite long.
         */
        if (computeQueue && (i & 0xf) == 0) {
            computeQueue->PlaceAndSubmitPacket(PM4NopPacket());
            computeQueue->Wait4PacketConsumption(NULL, 10000);
        }
    }

    EXPECT_EQ(0, amdgpu_bo_list_destroy(boList));

    EXPECT_EQ(0, amdgpu_bo_unmap_and_free(ibResultHandle, vaHandle,
        ibResultMcAddress, PAGE_SIZE));

    EXPECT_EQ(0, amdgpu_cs_ctx_free(contextHandle));
}

/* Evict and restore procedure basic test
 *
 * Use N_PROCESSES processes to allocate vram buf size larger than total vram size
 *
 * ALLOCATE_BUF_SIZE_MB buf allocation size
 *
 * buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
 * Total vram all processes allocated: 8GB for 4GB Fiji, and 20GB for 16GB Vega10
 *
 * Eviction and restore will happen many times:
 * ttm will evict buffers of another process if there is not enough free vram
 * process restore will evict buffers of another process
 *
 * Sometimes the allocation may fail (maybe that is normal)
 * ALLOCATE_RETRY_TIMES max retry times to allocate
 *
 * This is basic test with no queue, so vram is not used by the GPU during test
 *
 * TODO:
 *    - Synchronization between the processes, so they know for sure when
 *        they are done allocating memory
 */
TEST_F(KFDEvictTest, BasicTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
    HSAuint64 vramBufSize = ALLOCATE_BUF_SIZE_MB * 1024 * 1024;

    HSAuint64 vramSize = GetVramSize(defaultGPUNode);
    HSAuint64 sysMemSize = GetSysMemSize();

    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(defaultGPUNode);
    const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();

    if (!vramSize) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    if (pNodeProperties->Integrated) {
        LOG() << "Skipping test on APU." << std::endl;
        return;
    }

    LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
    LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;

    // Use 7/8 of VRAM between all processes
    HSAuint64 testSize = vramSize * 7 / 8;
    HSAuint32 count = testSize / (vramBufSize * N_PROCESSES);

    if (count == 0) {
        LOG() << "Skipping test: Not enough system memory available." << std::endl;
        return;
    }

    /* Fork the child processes */
    ForkChildProcesses(defaultGPUNode, N_PROCESSES);

    int rn = FindDRMRenderNode(defaultGPUNode);
    if (rn < 0) {
        LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
        WaitChildProcesses(defaultGPUNode);
        return;
    }

    std::vector<void *> pBuffers;
    AllocBuffers(m_IsParent[gpuIndex], defaultGPUNode, count, vramBufSize, pBuffers);

    /* Allocate gfx vram size of at most one-fourth system memory */
    HSAuint64 size = sysMemSize / 4 < testSize / 3 ? sysMemSize / 4 : testSize / 3;
    amdgpu_bo_handle handle;
    AllocAmdgpuBo(m_IsParent[gpuIndex], rn, size, handle);

    AmdgpuCommandSubmissionSdmaNop(rn, handle);

    FreeAmdgpuBo(handle);
    LOG() << m_psName[gpuIndex] << "free buffer" << std::endl;
    FreeBuffers(pBuffers, vramBufSize);

    WaitChildProcesses(defaultGPUNode);

    TEST_END
}

/* Evict and restore queue test
 *
 * N_PROCESSES processes read all local buffers in parallel while buffers are evicted and restored
 * If GPU vm page fault happens, then test shader will stop and failed to write specific value
 * at dest buffer. Test will report failed.
 *
 * Steps:
 *    - fork N_PROCESSES processes, each process does the same below
 *    - allocate local buffers, each buffer size is 64MB
 *    - allocate zero initialized host access address buffer and result buffer
 *        address buffer to pass address of local buffers to shader
 *        result buffer to store shader output result
 *    - submit queue to run ReadMemory shader
 *    - shader start m_DimX wavefronts, each wavefront keep reading one local buffer
 *    - notify shader to quit
 *    - check result buffer with specific value to confirm all wavefronts quit normally
 */
TEST_F(KFDEvictTest, QueueTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
    unsigned int count = MAX_WAVEFRONTS;

    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(defaultGPUNode);
    const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();

    /* Skip test for chip if it doesn't have CWSR, which the test depends on */
    if (m_FamilyId < FAMILY_VI || isTonga(pNodeProperties)) {
        LOG() << std::hex << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    if (pNodeProperties->Integrated) {
        LOG() << "Skipping test on APU." << std::endl;
        return;
    }

    HSAuint32 i;
    HSAuint64 vramSize = GetVramSize(defaultGPUNode);
    HSAuint64 sysMemSize = GetSysMemSize();

    if (!vramSize) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
    LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;

    // Use 7/8 of VRAM between all processes
    HSAuint64 testSize = vramSize * 7 / 8;
    HSAuint32 vramBufSize = testSize / (count * N_PROCESSES);
    vramBufSize = (vramBufSize / (1024 * 1024)) * (1024 * 1024);

    if (vramBufSize == 0) {
        LOG() << "Skipping test: Not enough system memory available." << std::endl;
        return;
    }
    /* Assert all buffer address can be stored within one page
     * because only one page host memory srcBuf is allocated
     */
    ASSERT_LE(count, PAGE_SIZE/sizeof(unsigned int *));

    /* Fork the child processes */
    ForkChildProcesses(defaultGPUNode, N_PROCESSES);

    int rn = FindDRMRenderNode(defaultGPUNode);
    if (rn < 0) {
        LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
        WaitChildProcesses(defaultGPUNode);
        return;
    }

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer addrBuffer(PAGE_SIZE, defaultGPUNode);
    HsaMemoryBuffer resultBuffer(PAGE_SIZE, defaultGPUNode);

    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadMemoryIsa, isaBuffer.As<char*>()));

    PM4Queue pm4Queue;
    ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));

    Dispatch dispatch0(isaBuffer);

    std::vector<void *> pBuffers;
    AllocBuffers(m_IsParent[gpuIndex], defaultGPUNode, count, vramBufSize, pBuffers);

    /* Allocate gfx vram size of at most one-fourth system memory */
    HSAuint64 size = sysMemSize / 4 < testSize / 3 ? sysMemSize / 4 : testSize / 3;
    amdgpu_bo_handle handle;
    AllocAmdgpuBo(m_IsParent[gpuIndex], rn, size, handle);

    unsigned int wavefront_num = pBuffers.size();
    LOG() << m_psName[gpuIndex] << "wavefront number " << wavefront_num << std::endl;

    void **localBufAddr = addrBuffer.As<void **>();
    unsigned int *result = resultBuffer.As<uint32_t *>();

    for (i = 0; i < wavefront_num; i++)
        *(localBufAddr + i) = pBuffers[i];

    for (i = 0; i < wavefront_num; i++)
        *(result + i) = vramBufSize;

    dispatch0.SetArgs(localBufAddr, result);
    dispatch0.SetDim(wavefront_num, 1, 1);
    /* Submit the packet and start shader */
    dispatch0.Submit(pm4Queue);

    AmdgpuCommandSubmissionSdmaNop(rn, handle);

    /* Uncomment this line for debugging */
    // LOG() << m_psName << "notify shader to quit" << std::endl;

    /* Fill address buffer so shader quits */
    addrBuffer.Fill(0x5678);

    /* Wait for shader to finish or timeout if shader has vm page fault */
    EXPECT_EQ(0, dispatch0.SyncWithStatus(g_TestTimeOut * 5));

    EXPECT_SUCCESS(pm4Queue.Destroy());

    FreeAmdgpuBo(handle);

    /* Uncomment this line for debugging */
    // LOG() << m_psName << "free buffer" << std::endl;

    /* Cleanup */
    FreeBuffers(pBuffers, vramBufSize);

    /* Check if all wavefronts finished successfully */
    for (i = 0; i < wavefront_num; i++)
        EXPECT_EQ(0x5678, *(result + i));

    WaitChildProcesses(defaultGPUNode);

    TEST_END
}

/* Evict a queue running in bursts, so that the process has a chance
 * to be idle when restored but the queue needs to resume to perform
 * more work later. This test is designed to stress the idle process
 * eviction optimization in KFD that leaves idle processes evicted
 * until the next time the doorbell page is accessed.
 */
TEST_F(KFDEvictTest, BurstyTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
    HSAuint64 vramBufSize = ALLOCATE_BUF_SIZE_MB * 1024 * 1024;

    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(defaultGPUNode);
    const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();

    if (pNodeProperties->Integrated) {
        LOG() << "Skipping test on APU." << std::endl;
        return;
    }

    HSAuint64 vramSize = GetVramSize(defaultGPUNode);
    HSAuint64 sysMemSize = GetSysMemSize();

    if (!vramSize) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
    LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;

    // Use 7/8 of VRAM between all processes
    HSAuint64 testSize = vramSize * 7 / 8;
    HSAuint32 count = testSize / (vramBufSize * N_PROCESSES);

    if (count == 0) {
        LOG() << "Skipping test: Not enough system memory available." << std::endl;
        return;
    }

    /* Fork the child processes */
    ForkChildProcesses(defaultGPUNode, N_PROCESSES);

    int rn = FindDRMRenderNode(defaultGPUNode);
    if (rn < 0) {
        LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
        WaitChildProcesses(defaultGPUNode);
        return;
    }

    PM4Queue pm4Queue;
    ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));

    std::vector<void *> pBuffers;
    AllocBuffers(m_IsParent[gpuIndex], defaultGPUNode, count, vramBufSize, pBuffers);

    /* Allocate gfx vram size of at most one third system memory */
    HSAuint64 size = sysMemSize / 3 < testSize / 2 ? sysMemSize / 3 : testSize / 2;
    amdgpu_bo_handle handle;
    AllocAmdgpuBo(m_IsParent[gpuIndex], rn, size, handle);

    AmdgpuCommandSubmissionSdmaNop(rn, handle, &pm4Queue);

    FreeAmdgpuBo(handle);
    LOG() << m_psName[gpuIndex] << "free buffer" << std::endl;
    FreeBuffers(pBuffers, vramBufSize);

    EXPECT_SUCCESS(pm4Queue.Destroy());

    WaitChildProcesses(defaultGPUNode);

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDEvictTest.hpp
================================================
/*
 * Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_EVICT_TEST__H__
#define __KFD_EVICT_TEST__H__

#include <string>
#include <vector>
#include "KFDMultiProcessTest.hpp"
#include "PM4Queue.hpp"

// @class KFDEvictTest
// Test eviction and restore procedure using two processes
class KFDEvictTest :  public KFDMultiProcessTest {
 public:
    KFDEvictTest(void) {}
    ~KFDEvictTest(void) {}

 protected:
    virtual void SetUp();
    virtual void TearDown();

    void AllocBuffers(bool m_IsParent, HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize,
                      std::vector<void *> &pBuffers);
    void FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize);
    void AllocAmdgpuBo(bool m_IsParent, int rn, HSAuint64 vramBufSize, amdgpu_bo_handle &handle);
    void FreeAmdgpuBo(amdgpu_bo_handle handle);
    void AmdgpuCommandSubmissionSdmaNop(int rn, amdgpu_bo_handle handle,
                                           PM4Queue *computeQueue);

 protected:  // Members
    HsaMemFlags     m_Flags;
    void*           m_pBuf;
};

#endif  // __KFD_EVICT_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDExceptionTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDExceptionTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"
#include <sys/mman.h>

void KFDExceptionTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    LOG() << "This Exception test might cause expected page fault "
             "error logs at kernel level." << std::endl;

    ROUTINE_END
}

void KFDExceptionTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    // WORKAROUND: This needs to be fixed in the kernel
    // Wait 500ms for the kernel to process any fault storms before the
    // next test to avoid reporting incorrect faults in the next test.
    Delay(500);

    ROUTINE_END
}

/* Test for memory exception. The function expects a Memory Fault to be
 * triggered by the GPU when it tries to copy dword from pSrc to pDst.
 * Should be called from a Child Process since the Memory Fault causes
 * all the queues to be halted.
*/
void KFDExceptionTest::TestMemoryException(int gpuNode, HSAuint64 pSrc,
                                           HSAuint64 pDst, unsigned int dimX,
                                           unsigned int dimY, unsigned int dimZ) {
    PM4Queue queue;
    HsaEvent *vmFaultEvent;
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HSAuint64 faultAddress, page_mask = ~((HSAuint64)PAGE_SIZE - 1);
    Dispatch dispatch(isaBuffer, false);

    HsaEventDescriptor eventDesc;
    eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
    eventDesc.NodeId = gpuNode;
    eventDesc.SyncVar.SyncVar.UserData = NULL;
    eventDesc.SyncVar.SyncVarSize = 0;

    ASSERT_SUCCESS_GPU(GetAssemblerFromNodeId(
       gpuNode)->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    m_ChildStatus = queue.Create(gpuNode);
    if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Queue create failed, on gpuNode: " << gpuNode << std::endl;
        return;
    }
    m_ChildStatus = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);
    if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Event create failed on gpuNode: " << gpuNode << std::endl;
        goto queuefail;
    }

    dispatch.SetDim(dimX, dimY, dimZ);
    dispatch.SetArgs(reinterpret_cast<void *>(pSrc), reinterpret_cast<void *>(pDst));
    dispatch.Submit(queue);

    m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut);
    if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Wait failed. No Exception triggered on gpuNode: " << gpuNode << std::endl;
        goto eventfail;
    }

    if (vmFaultEvent->EventData.EventType != HSA_EVENTTYPE_MEMORY) {
        WARN() << "Unexpected Event Received on gpuNode: " << gpuNode << vmFaultEvent->EventData.EventType
               << std::endl;
        m_ChildStatus = HSAKMT_STATUS_ERROR;
        goto eventfail;
    }
    faultAddress = vmFaultEvent->EventData.EventData.MemoryAccessFault.VirtualAddress;
    if (faultAddress != (pSrc & page_mask) &&
        faultAddress != (pDst & page_mask) ) {
        WARN() << "gpuNode: " << gpuNode << " Unexpected Fault Address " << faultAddress
               << " expected " << (pSrc & page_mask) << " or "
               << (pDst & page_mask) << std::endl;
        m_ChildStatus = HSAKMT_STATUS_ERROR;
    }

eventfail:
    hsaKmtDestroyEvent(vmFaultEvent);
queuefail:
    queue.Destroy();
}

void KFDExceptionTest::TestSdmaException(int gpuNode, void *pDst) {
    SDMAQueue queue;
    HsaEvent *vmFaultEvent;
    HSAuint64 faultAddress, page_mask = ~((HSAuint64)PAGE_SIZE - 1);


    HsaEventDescriptor eventDesc;
    eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
    eventDesc.NodeId = gpuNode;
    eventDesc.SyncVar.SyncVar.UserData = NULL;
    eventDesc.SyncVar.SyncVarSize = 0;

    m_ChildStatus = queue.Create(gpuNode);
    if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Queue create failed on gpuNode: " << gpuNode << std::endl;
        return;
    }

    m_ChildStatus = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);
    if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Event create failed on gpuNode: " << gpuNode << std::endl;
        goto queuefail;
    }

    queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(),
                                                   reinterpret_cast<void *>(pDst),
                                                   0x02020202));

    m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut);
    if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Wait failed. No Exception triggered on gpuNode: " << gpuNode << std::endl;
        goto eventfail;
    }

    if (vmFaultEvent->EventData.EventType != HSA_EVENTTYPE_MEMORY) {
        WARN() << "Unexpected Event Received " << vmFaultEvent->EventData.EventType
               << std::endl;
        m_ChildStatus = HSAKMT_STATUS_ERROR;
        goto eventfail;
    }
    faultAddress = vmFaultEvent->EventData.EventData.MemoryAccessFault.VirtualAddress;
    if (faultAddress != ((HSAuint64)pDst & page_mask) ) {
        WARN() << "gpuNode: " << gpuNode << "Unexpected Fault Address " << faultAddress
               << " expected " << ((HSAuint64)pDst & page_mask) << std::endl;
        m_ChildStatus = HSAKMT_STATUS_ERROR;
    }

eventfail:
    hsaKmtDestroyEvent(vmFaultEvent);
queuefail:
    queue.Destroy();
}

void AddressFault(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDExceptionTest* pKFDExceptionTest = (KFDExceptionTest*)pTestParamters->pTestObject;

    const HSAuint32 m_FamilyId = pKFDExceptionTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId == FAMILY_RV) {
        LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
        return;
    }

    pid_t m_ChildPid = fork();
    if (m_ChildPid == 0) {
        pKFDExceptionTest->TearDown();
        pKFDExceptionTest->SetUp();

        HsaMemoryBuffer srcBuffer(PAGE_SIZE, gpuNode, false);

        srcBuffer.Fill(0xAA55AA55);
        pKFDExceptionTest->TestMemoryException(gpuNode, srcBuffer.As<HSAuint64>(),
                                               0x12345678ULL);
        exit(0);

	} else {
        int childStatus;

        waitpid(m_ChildPid, &childStatus, 0);
        if (hsakmt_is_dgpu()) {
            EXPECT_EQ_GPU(WIFEXITED(childStatus), true, gpuNode);
            EXPECT_EQ_GPU(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS, gpuNode);
        } else {
            EXPECT_EQ_GPU(WIFSIGNALED(childStatus), true, gpuNode);
            EXPECT_EQ_GPU(WTERMSIG(childStatus), SIGSEGV, gpuNode);
        }
   }
}

/* Test Bad Address access in a child process */
TEST_F(KFDExceptionTest, AddressFault) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(AddressFault));

    TEST_END
}

/* Allocate Read Only buffer. Test Memory Exception failure by
 * attempting to write to that buffer in the child process.
 */
void PermissionFault(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDExceptionTest* pKFDExceptionTest = (KFDExceptionTest*)pTestParamters->pTestObject;

    const HSAuint32 m_FamilyId = pKFDExceptionTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId == FAMILY_RV) {
        LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
        return;
    }

    pid_t m_ChildPid = fork();
    if (m_ChildPid == 0) {
        pKFDExceptionTest->TearDown();
        pKFDExceptionTest->SetUp();

        HsaMemoryBuffer readOnlyBuffer(PAGE_SIZE, gpuNode, false /*zero*/,
                                       false /*isLocal*/, true /*isExec*/,
                                       false /*isScratch*/, true /*isReadOnly*/);
        HsaMemoryBuffer srcSysBuffer(PAGE_SIZE, gpuNode, false);

        srcSysBuffer.Fill(0xAA55AA55);

        pKFDExceptionTest->TestMemoryException(gpuNode, srcSysBuffer.As<HSAuint64>(),
                            readOnlyBuffer.As<HSAuint64>());

        exit(0);
    } else {
        int childStatus;

        waitpid(m_ChildPid, &childStatus, 0);
        if (hsakmt_is_dgpu()) {
            EXPECT_EQ(WIFEXITED(childStatus), true);
            EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
        } else {
            EXPECT_EQ(WIFSIGNALED(childStatus), true);
            EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
        }
    }

}

TEST_F(KFDExceptionTest, PermissionFault) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(PermissionFault));

    TEST_END
}

/* Allocate Read Only user pointer buffer. Test Memory Exception failure by
 * attempting to write to that buffer in the child process.
 */
void PermissionFaultUserPointer(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDExceptionTest* pKFDExceptionTest = (KFDExceptionTest*)pTestParamters->pTestObject;

    const HSAuint32 m_FamilyId = pKFDExceptionTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId == FAMILY_RV) {
        LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
        return;
    }

    pid_t m_ChildPid = fork();
    if (m_ChildPid == 0) {
        pKFDExceptionTest->TearDown();
        pKFDExceptionTest->SetUp();

         void *pBuf = mmap(NULL, PAGE_SIZE, PROT_READ,
                      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
         ASSERT_NE(pBuf, MAP_FAILED);
         EXPECT_SUCCESS(hsaKmtRegisterMemory(pBuf, PAGE_SIZE));
         EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(pBuf, PAGE_SIZE, NULL));
         HsaMemoryBuffer srcSysBuffer(PAGE_SIZE, gpuNode, false);

         srcSysBuffer.Fill(0xAA55AA55);

         pKFDExceptionTest->TestMemoryException(gpuNode, srcSysBuffer.As<HSAuint64>(),
                                                (HSAuint64)pBuf);

        exit(0);
    } else {
        int childStatus;

        waitpid(m_ChildPid, &childStatus, 0);
        if (hsakmt_is_dgpu()) {
            EXPECT_EQ(WIFEXITED(childStatus), true);
            EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
        } else {
            EXPECT_EQ(WIFSIGNALED(childStatus), true);
            EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
        }
   }

}

TEST_F(KFDExceptionTest, PermissionFaultUserPointer) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(PermissionFault));

    TEST_END
}

/* Test VM fault storm handling by copying to/from invalid pointers
 * with lots of work items at the same time
 */
void FaultStorm(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDExceptionTest* pKFDExceptionTest = (KFDExceptionTest*)pTestParamters->pTestObject;

    const HSAuint32 m_FamilyId = pKFDExceptionTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId == FAMILY_RV) {
        LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
        return;
    }

    HSAKMT_STATUS status;

    pid_t m_ChildPid = fork();
    if (m_ChildPid == 0) {
        pKFDExceptionTest->TearDown();
        pKFDExceptionTest->SetUp();

        pKFDExceptionTest->TestMemoryException(gpuNode, 0x12345678, 0x76543210, 1024, 1024, 1);

        exit(0);
    } else {
        int childStatus;

        waitpid(m_ChildPid, &childStatus, 0);
        if (hsakmt_is_dgpu()) {
            EXPECT_EQ_GPU(WIFEXITED(childStatus), true, gpuNode);
            EXPECT_EQ_GPU(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS, gpuNode);
        } else {
            EXPECT_EQ_GPU(WIFSIGNALED(childStatus), true, gpuNode);
            EXPECT_EQ_GPU(WTERMSIG(childStatus), SIGSEGV, gpuNode);
        }
    }

}

TEST_F(KFDExceptionTest, FaultStorm) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(FaultStorm));

    TEST_END
}

/*
 */
void SdmaQueueException(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDExceptionTest* pKFDExceptionTest = (KFDExceptionTest*)pTestParamters->pTestObject;

    const HSAuint32 m_FamilyId = pKFDExceptionTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId == FAMILY_RV) {
        LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
        return;
    }

    HSAKMT_STATUS status;

    pid_t m_ChildPid = fork();
    if (m_ChildPid == 0) {
        unsigned int* pDb = NULL;
        unsigned int *nullPtr = NULL;

        pKFDExceptionTest->TearDown();
        pKFDExceptionTest->SetUp();

        HsaMemFlags m_MemoryFlags;
        m_MemoryFlags.Value = 0;
       // setting memory flags with default values , can be modified according to needs
        m_MemoryFlags.ui32.NonPaged = 1;                         // Paged
        m_MemoryFlags.ui32.HostAccess = 0;                       // Host accessible
        ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode, PAGE_SIZE, m_MemoryFlags,
                                  reinterpret_cast<void**>(&pDb)), gpuNode);
        // verify that pDb is not null before it's being used
        ASSERT_NE_GPU(nullPtr, pDb, gpuNode) << "hsaKmtAllocMemory returned a null pointer";
        ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL), gpuNode);
        EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(pDb), gpuNode);

        pKFDExceptionTest->TestSdmaException(gpuNode, pDb);
        EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb, PAGE_SIZE), gpuNode);

        exit(0);
    } else {
        int childStatus;

        waitpid(m_ChildPid, &childStatus, 0);
        if (hsakmt_is_dgpu()) {
            EXPECT_EQ_GPU(WIFEXITED(childStatus), true, gpuNode);
            EXPECT_EQ_GPU(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS, gpuNode);
        } else {
            EXPECT_EQ_GPU(WIFSIGNALED(childStatus), true, gpuNode);
            EXPECT_EQ_GPU(WTERMSIG(childStatus), SIGSEGV, gpuNode);
        }
    }
}

TEST_F(KFDExceptionTest, SdmaQueueException) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SdmaQueueException));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDExceptionTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_EXCEPTION_TEST__H__
#define __KFD_EXCEPTION_TEST__H__

#include <gtest/gtest.h>

#include "KFDBaseComponentTest.hpp"

class KFDExceptionTest : public KFDBaseComponentTest {
 public:
    KFDExceptionTest() : m_ChildPid(-1) {
        /* Because there could be early return before m_ChildPid is set
         * by fork(), we should initialize m_ChildPid to a non-zero value
         * to avoid possible exit of the main process.
         */
    }

    ~KFDExceptionTest() {
        /* exit() is necessary for the child process. Otherwise when the
         * child process finishes, gtest assumes the test has finished and
         * starts the next test while the parent is still active.
         */
        if (m_ChildPid == 0) {
            if (!m_ChildStatus && HasFatalFailure())
                m_ChildStatus = HSAKMT_STATUS_ERROR;
            exit(m_ChildStatus);
        }
    }

    friend void AddressFault(KFDTEST_PARAMETERS* pTestParamters);
    friend void PermissionFault(KFDTEST_PARAMETERS* pTestParamters);
    friend void PermissionFaultUserPointer(KFDTEST_PARAMETERS* pTestParamters);
    friend void FaultStorm(KFDTEST_PARAMETERS* pTestParamters);
    friend void SdmaQueueException(KFDTEST_PARAMETERS* pTestParamters);

 protected:
    virtual void SetUp();
    virtual void TearDown();

    void TestMemoryException(int gpuNode, HSAuint64 pSrc, HSAuint64 pDst,
                             unsigned int dimX = 1, unsigned int dimY = 1,
                             unsigned int dimZ = 1);
    void TestSdmaException(int gpuNode, void *pDst);

 protected:  // Members
    pid_t m_ChildPid;
    HSAKMT_STATUS m_ChildStatus;
};

#endif  // __KFD_EXCEPTION_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDGWSTest.cpp
================================================
/*
 * Copyright (C) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDGWSTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "Dispatch.hpp"

void KFDGWSTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDGWSTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

static void Allocate(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDGWSTest* pKFDGWSTest = (KFDGWSTest*)pTestParamters->pTestObject;

    HSAuint32 firstGWS;
    PM4Queue queue;
    HsaNodeInfo* m_NodeInfo = pKFDGWSTest->Get_NodeInfo();
    const HsaNodeProperties *pNodeProperties = m_NodeInfo->GetNodeProperties(gpuNode);

    if (!pNodeProperties || !pNodeProperties->NumGws) {
        LOG() << "Skip test: GPU node doesn't support GWS" << std::endl;
        return;
    }

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtAllocQueueGWS(queue.GetResource()->QueueId,
                       pNodeProperties->NumGws,&firstGWS), gpuNode);
    EXPECT_EQ_GPU(0, firstGWS, gpuNode);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}
TEST_F(KFDGWSTest, Allocate) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(Allocate));

    TEST_END
}

static void Semaphore(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDGWSTest* pKFDGWSTest = (KFDGWSTest*)pTestParamters->pTestObject;

    HsaNodeInfo* m_NodeInfo = pKFDGWSTest->Get_NodeInfo();
    const HsaNodeProperties *pNodeProperties = m_NodeInfo->GetNodeProperties(gpuNode);

    HSAuint32 firstGWS;
    HSAuint32 numResources = 1;
    PM4Queue queue;

    if (!pNodeProperties || !pNodeProperties->NumGws) {
        LOG() << "Skip test: GPU node doesn't support GWS" << std::endl;
        return;
    }

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer buffer(PAGE_SIZE, gpuNode, true, false, false);
    ASSERT_SUCCESS(queue.Create(gpuNode));
    ASSERT_SUCCESS_GPU(hsaKmtAllocQueueGWS(queue.GetResource()->QueueId,
                       pNodeProperties->NumGws,&firstGWS), gpuNode);
    EXPECT_EQ_GPU(0, firstGWS, gpuNode);

    Assembler* m_pAsm;
    m_pAsm = pKFDGWSTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);
    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(GwsInitIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch0(isaBuffer);
    buffer.Fill(numResources, 0, 4);
    dispatch0.SetArgs(buffer.As<void*>(), NULL);
    dispatch0.Submit(queue);
    dispatch0.Sync();

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(GwsAtomicIncreaseIsa, isaBuffer.As<char*>()),gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(buffer.As<void*>(), NULL);
    dispatch.SetDim(1024, 16, 16);

    dispatch.Submit(queue);
    dispatch.Sync();

    EXPECT_EQ_GPU(1024*16*16+1, *buffer.As<uint32_t *>(), gpuNode);
    EXPECT_SUCCESS_GPU(queue.Destroy(),gpuNode);

}

TEST_F(KFDGWSTest, Semaphore) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(Semaphore));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDGWSTest.hpp
================================================
/*
 * Copyright (C) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_GWS_TEST__H__
#define __KFD_GWS_TEST__H__

#include <gtest/gtest.h>

#include "KFDBaseComponentTest.hpp"

class KFDGWSTest : public KFDBaseComponentTest {
 public:
    KFDGWSTest() {}
    ~KFDGWSTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFD_GWS_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDGraphicsInterop.cpp
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDGraphicsInterop.hpp"

#include "Dispatch.hpp"
#include "PM4Queue.hpp"

static void RegisterGraphicsHandle(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDGraphicsInterop* pKFDGraphicsInterop = (KFDGraphicsInterop*)pTestParamters->pTestObject;

    HsaNodeInfo* m_NodeInfo = pKFDGraphicsInterop->Get_NodeInfo();
    const HsaNodeProperties *pNodeProps = m_NodeInfo->GetNodeProperties(gpuNode);
    const HSAuint32 familyID = FamilyIdFromNode(pNodeProps);

    if (isTonga(pNodeProps)) {
        LOG() << "Skipping test: Tonga workaround in thunk returns incorrect allocation size." << std::endl;
        return;
    }

    HSAuint32 nodes[1] = {(uint32_t)gpuNode};

    const char metadata[] = "This data is really meta.";
    unsigned metadata_size = strlen(metadata)+1;
    int rn = pKFDGraphicsInterop->FindDRMRenderNode(gpuNode);

    if (rn < 0) {
        LOG() << "Skipping test: Could not find render node for default GPU node." << std::endl;
        return;
    }

    // Create the buffer with metadata and get a dmabuf handle to it
    struct amdgpu_bo_alloc_request alloc;
    amdgpu_bo_handle handle;
    if (familyID == FAMILY_CZ || isTonga(pNodeProps))
        alloc.alloc_size = PAGE_SIZE * 8;
    else
        alloc.alloc_size = PAGE_SIZE;
    alloc.phys_alignment = PAGE_SIZE;
    alloc.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
    alloc.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
    ASSERT_EQ_GPU(0, amdgpu_bo_alloc(pKFDGraphicsInterop->m_RenderNodes[rn].device_handle, &alloc, &handle), gpuNode);

    void *pCpuMap;
    ASSERT_EQ_GPU(0, amdgpu_bo_cpu_map(handle, &pCpuMap), gpuNode);
    memset(pCpuMap, 0xaa, PAGE_SIZE);
    EXPECT_EQ_GPU(0, amdgpu_bo_cpu_unmap(handle), gpuNode);

    struct amdgpu_bo_metadata meta;
    meta.flags = 0;
    meta.tiling_info = 0;
    meta.size_metadata = metadata_size;
    memcpy(meta.umd_metadata, metadata, metadata_size);
    EXPECT_EQ_GPU(0, amdgpu_bo_set_metadata(handle, &meta), gpuNode);

    uint32_t dmabufFd;
    EXPECT_EQ_GPU(0, amdgpu_bo_export(handle, amdgpu_bo_handle_type_dma_buf_fd, &dmabufFd), gpuNode);

    // Register it with HSA
    HsaGraphicsResourceInfo info;
    ASSERT_SUCCESS_GPU(hsaKmtRegisterGraphicsHandleToNodes(dmabufFd, &info,
                                                       1, nodes), gpuNode);

    /* DMA buffer handle and GEM handle are no longer needed, KFD
     * should have taken a reference to the BO
     */
    EXPECT_EQ_GPU(0, close(dmabufFd), gpuNode);
    EXPECT_EQ_GPU(0, amdgpu_bo_free(handle), gpuNode);

    // Check that buffer size and metadata match
    EXPECT_EQ_GPU(info.SizeInBytes, alloc.alloc_size, gpuNode);
    EXPECT_EQ_GPU(info.MetadataSizeInBytes, metadata_size, gpuNode);
    EXPECT_EQ_GPU(0, strcmp(metadata, (const char *)info.Metadata), gpuNode);

    // Map the buffer
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(info.MemoryAddress,
                                        info.SizeInBytes,
                                        NULL), gpuNode);

    // Copy contents to a system memory buffer for comparison
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    Assembler* m_pAsm;
    m_pAsm = pKFDGraphicsInterop->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));

    HsaMemoryBuffer dstBuffer(PAGE_SIZE, gpuNode, true/*zero*/);

    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    Dispatch dispatch(isaBuffer);

    dispatch.SetArgs(info.MemoryAddress, dstBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    EXPECT_EQ_GPU(dstBuffer.As<unsigned int *>()[0], 0xaaaaaaaa, gpuNode);

    // Test QueryMem before the cleanup
    HsaPointerInfo ptrInfo;
    EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo((const void *)info.MemoryAddress, &ptrInfo), gpuNode);
    EXPECT_EQ_GPU(ptrInfo.Type, HSA_POINTER_REGISTERED_GRAPHICS, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.Node, (HSAuint32)gpuNode, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.GPUAddress, (HSAuint64)info.MemoryAddress, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.SizeInBytes, alloc.alloc_size, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.MemFlags.ui32.CoarseGrain, 1, gpuNode);

    // Cleanup
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(info.MemoryAddress), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtDeregisterMemory(info.MemoryAddress), gpuNode);

}
TEST_F(KFDGraphicsInterop, RegisterGraphicsHandle) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(RegisterGraphicsHandle));

    TEST_END
}

#if 0
/* This test isn't testing things the way we wanted it to. It is flaky and
 * will end up failing if the memory is evicted, which isn't possible for what 
 * it is intended to test. It needs a rework
 */

/* Third-party device memory can be registered for GPU access in
 * ROCm stack. Test this feature. Third party device is mimicked
 * in multi-GPU system using Graphics stack (libdrm). CPU accessible
 * device memory is allocated using Graphics stack on gpuNode2 and
 * this memory will be registered on gpuNode1 for GPU access.
 */
TEST_F(KFDGraphicsInterop, RegisterForeignDeviceMem) {
    TEST_START(TESTPROFILE_RUNALL)

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Only supported on multi-dGPU system." << std::endl;
        return;
    }

    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: At least two GPUs are required." << std::endl;
        return;
    }

    /* gpuNode2 must have public memory (large bar) to allocate CPU accessible
     * device memory.
     */
    HSAint32 gpuNode1 = m_NodeInfo.HsaDefaultGPUNode(), gpuNode2 = 0;
    const HsaNodeProperties *pNodeProperties;

    gpuNode2 = m_NodeInfo.FindLargeBarGPUNode();
    if (gpuNode2 < 0) {
        LOG() << "Skipping test: At least one large bar GPU is required." << std::endl;
        return;
    }
    if (gpuNode1 == gpuNode2) {
        for (unsigned i = 0; i < gpuNodes.size(); i++) {
            if (gpuNodes.at(i) != gpuNode2) {
                gpuNode1 = gpuNodes.at(i);
                break;
            }
        }
    }

    const HsaNodeProperties *pNodeProps =
        m_NodeInfo.GetNodeProperties(gpuNode2);
    const HSAuint32 familyID = FamilyIdFromNode(pNodeProps);

    int rn = FindDRMRenderNode(gpuNode2);
    if (rn < 0) {
        LOG() << "Skipping test: Cound not find render node for 2nd GPU." << std::endl;
        return;
    }

    // Allocate CPU accessible device memory on gpuNode2
    struct amdgpu_bo_alloc_request alloc;
    amdgpu_bo_handle handle;
    if (familyID == FAMILY_CZ || isTonga(pNodeProps))
        alloc.alloc_size = PAGE_SIZE * 8;
    else
        alloc.alloc_size = PAGE_SIZE;
    alloc.phys_alignment = PAGE_SIZE;
    alloc.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
    alloc.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
    ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle));

    void *pCpuMap;
    ASSERT_EQ(0, amdgpu_bo_cpu_map(handle, &pCpuMap));
    memset(pCpuMap, 0xAA, PAGE_SIZE);

    /* Register third-party device memory in KFD. Test GPU access
     * by carrying out a simple copy test
     */
    HsaMemoryBuffer lockDeviceMemory(pCpuMap, PAGE_SIZE);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode1, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer dstBuffer(PAGE_SIZE, gpuNode1, true/*zero*/);
    PM4Queue queue;
    Dispatch dispatch(isaBuffer);

    m_pIsaGen->GetCopyDwordIsa(isaBuffer);
    ASSERT_SUCCESS(queue.Create(gpuNode1));

    dispatch.SetArgs(lockDeviceMemory.As<void*>(), dstBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    EXPECT_SUCCESS(queue.Destroy());
    EXPECT_EQ(dstBuffer.As<HSAuint32*>()[0], 0xAAAAAAAA);

    EXPECT_EQ(0, amdgpu_bo_cpu_unmap(handle));
    EXPECT_EQ(0, amdgpu_bo_free(handle));

    TEST_END
}
#endif


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDGraphicsInterop.hpp
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDMemoryTest.hpp"

#ifndef __KFD_GRAPHICS_INTEROP_TEST__H__
#define __KFD_GRAPHICS_INTEROP_TEST__H__

// @class KFDGraphicsInteropTest
// Adds access to graphics device for interoperability testing
class KFDGraphicsInterop :  public KFDMemoryTest {
 public:
    KFDGraphicsInterop(void) {}
    ~KFDGraphicsInterop(void) {}
};

#endif


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDHWSTest.cpp
================================================
/*
 * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDHWSTest.hpp"

void KFDHWSTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDHWSTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

void KFDHWSTest::RunTest_GPU(int gpuNode, unsigned nProcesses, unsigned nQueues, unsigned nLoops) {

    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(gpuNode);

    unsigned q, l;
    bool timeout = false;

    /* Fork the child processes for gpuNode */
    ForkChildProcesses(gpuNode, nProcesses);

    // Create queues
    PM4Queue *queues = new PM4Queue[nQueues];
    for (q = 0; q < nQueues; q++)
        ASSERT_SUCCESS_GPU(queues[q].Create(gpuNode), gpuNode);

    // Create dispatch pointers. Each loop iteration creates fresh dispatches
    Dispatch **dispatch = new Dispatch*[nQueues];
    for (q = 0; q < nQueues; q++)
        dispatch[q] = NULL;

    // Logging: Each process prints its index after each loop iteration, all in one line.
    std::ostream &log = LOG() << std::dec << "gpuNode: " << gpuNode << " Process: " << m_ProcessIndex[gpuIndex] << " starting." << std::endl;

    // Run work on all queues
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    Assembler* m_pAsm;
    m_pAsm = GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(NoopIsa, isaBuffer.As<char*>()));

    for (l = 0; l < nLoops; l++) {
        for (q = 0; q < nQueues; q++) {
            if (dispatch[q])
                delete dispatch[q];
            dispatch[q] = new Dispatch(isaBuffer);
            dispatch[q]->SetArgs(NULL, NULL);
            dispatch[q]->SetDim(1, 1, 1);
            dispatch[q]->Submit(queues[q]);
        }
        for (q = 0; q < nQueues; q++) {
            timeout = dispatch[q]->SyncWithStatus(g_TestTimeOut);
            if (timeout)
                goto timeout;
        }
        log << m_ProcessIndex[gpuIndex];
    }

timeout:
    log << std::endl;
    if (timeout) {
        WARN() << "gpuNode: " << gpuNode << " Process: " <<  m_ProcessIndex[gpuIndex] << " timeout." << std::endl;
    } else {
        LOG() << "gpuNode: " << gpuNode << " Process " << m_ProcessIndex[gpuIndex] << " done. Waiting ..." << std::endl;

        // Wait here before destroying queues. If another process' queues
        // are soft-hanging, destroying queues can resolve the soft-hang
        // by changing the run list. Make sure the other process's
        // dispatches have a chance to time out first.
        Delay(g_TestTimeOut+1000);
    }

    // Destroy queues and dispatches. Destroying the queues first
    // ensures that the memory allocated by the Dispatch is no longer
    // accessed by the GPU.
    LOG() << "gpuNode: " << gpuNode << " Process " << m_ProcessIndex[gpuIndex] << " cleaning up." << std::endl;
    for (q = 0; q < nQueues; q++) {
        EXPECT_SUCCESS_GPU(queues[q].Destroy(), gpuNode);
        if (dispatch[q])
            delete dispatch[q];
    }
    delete[] queues;
    delete[] dispatch;

    // This is after all the cleanup to avoid leaving any garbage
    // behind, but before WaitChildProcesses to ensure a child process
    // with a timeout exits with an error that can be detected by the
    // parent.
    ASSERT_FALSE(timeout);

    WaitChildProcesses(gpuNode);

}

void RunTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDHWSTest* pKKFDHWSTest = (KFDHWSTest*)pTestParamters->pTestObject;

    pKKFDHWSTest->RunTest_GPU(gpuNode, 3, 13, 40);
}

TEST_F(KFDHWSTest, MultiProcessOversubscribed) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(RunTest));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDHWSTest.hpp
================================================
/*
 * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_HWS_TEST__H__
#define __KFD_HWS_TEST__H__

#include <gtest/gtest.h>

#include "PM4Queue.hpp"
#include "KFDMultiProcessTest.hpp"
#include "Dispatch.hpp"

class KFDHWSTest : public KFDMultiProcessTest {
 public:
    KFDHWSTest() {}
    ~KFDHWSTest() {}

    friend void RunTest(KFDTEST_PARAMETERS* pTestParamters);
 protected:
    virtual void SetUp();
    virtual void TearDown();

    void RunTest_GPU(int gpuNode, unsigned nProcesses, unsigned nQueues, unsigned nLoops);

};

#endif  // __KFD_QCM_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDIPCTest.cpp
================================================
/*
 * Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDIPCTest.hpp"
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <vector>
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAQueue.hpp"
#include "SDMAPacket.hpp"

void KFDIPCTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDIPCTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

KFDIPCTest::~KFDIPCTest(void) {
    /* exit() is necessary for the child process. Otherwise when the
     * child process finishes, gtest assumes the test has finished and
     * starts the next test while the parent is still active.
     */
    if (m_ChildPid == 0)
        exit(::testing::UnitTest::GetInstance()->current_test_info()->result()->Failed());
}

/* Import shared Local Memory from parent process. Check for the pattern
 * filled in by the parent process. Then fill a new pattern.
 *
 * Check import handle has same HsaMemFlags as export handle to verify thunk and KFD
 * import export handle ioctl pass HsaMemFlags correctly.
 */
void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd, HsaMemFlags mflags) {
    /* Open KFD device for child process. This needs to called before
     * any memory definitions
     */
    TearDown();
    SetUp();

    SDMAQueue sdmaQueue;
    HsaSharedMemoryHandle sharedHandleLM;
    HSAuint64 size = PAGE_SIZE, sharedSize;
    HsaMemoryBuffer tempSysBuffer(size, defaultGPUNode, false);
    HSAuint32 *sharedLocalBuffer = NULL;
    HsaMemMapFlags mapFlags = {0};

    /* Read from Pipe the shared Handle. Import shared Local Memory */
    ASSERT_GE(read(pipefd[0], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);

    ASSERT_SUCCESS(hsaKmtRegisterSharedHandle(&sharedHandleLM,
                  reinterpret_cast<void**>(&sharedLocalBuffer), &sharedSize));
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sharedLocalBuffer, sharedSize, NULL,
                  mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));

    /* Check for pattern in the shared Local Memory */
    ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
    size = size < sharedSize ? size : sharedSize;
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), tempSysBuffer.As<HSAuint32*>(),
        sharedLocalBuffer, size));
    sdmaQueue.Wait4PacketConsumption();
    EXPECT_TRUE(WaitOnValue(tempSysBuffer.As<HSAuint32*>(), 0xAAAAAAAA));

    /* Fill in the Local Memory with different pattern */
    sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(), sharedLocalBuffer, 0xBBBBBBBB));
    sdmaQueue.Wait4PacketConsumption();

    HsaPointerInfo ptrInfo;
    EXPECT_SUCCESS(hsaKmtQueryPointerInfo(sharedLocalBuffer, &ptrInfo));
    EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_SHARED);
    EXPECT_EQ(ptrInfo.Node, (HSAuint32)defaultGPUNode);
    EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)sharedLocalBuffer);
    EXPECT_EQ(ptrInfo.SizeInBytes, sharedSize);
    EXPECT_EQ(ptrInfo.MemFlags.Value, mflags.Value);

    /* Clean up */
    EXPECT_SUCCESS(sdmaQueue.Destroy());
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(sharedLocalBuffer));
    EXPECT_SUCCESS(hsaKmtDeregisterMemory(sharedLocalBuffer));
}

/* Fill a pattern into Local Memory and share with the child process.
 * Then wait until Child process to exit and check for the new pattern
 * filled in by the child process.
 */

void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pipefd, HsaMemFlags mflags) {
    HSAuint64 size = PAGE_SIZE, sharedSize;
    int status;
    HSAuint64 AlternateVAGPU;
    void *toShareLocalBuffer;
    HsaMemoryBuffer tempSysBuffer(PAGE_SIZE, defaultGPUNode, false);
    SDMAQueue sdmaQueue;
    HsaSharedMemoryHandle sharedHandleLM;
    HsaMemMapFlags mapFlags = {0};

    ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, mflags, &toShareLocalBuffer));
    /* Fill a Local Buffer with a pattern */
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(toShareLocalBuffer, size, &AlternateVAGPU,
                       mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));
    tempSysBuffer.Fill(0xAAAAAAAA);

    /* Copy pattern in Local Memory before sharing it */
    ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), toShareLocalBuffer,
        tempSysBuffer.As<HSAuint32*>(), size));
    sdmaQueue.Wait4PacketConsumption();

    /* Share it with the child process */
    ASSERT_SUCCESS(hsaKmtShareMemory(toShareLocalBuffer, size, &sharedHandleLM));

    ASSERT_GE(write(pipefd[1], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);

    /* Wait for the child to finish */
    waitpid(cpid, &status, 0);

    EXPECT_EQ(WIFEXITED(status), 1);
    EXPECT_EQ(WEXITSTATUS(status), 0);

    /* Check for the new pattern filled in by child process */
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), tempSysBuffer.As<HSAuint32*>(),
        toShareLocalBuffer, size));
    sdmaQueue.Wait4PacketConsumption();
    EXPECT_TRUE(WaitOnValue(tempSysBuffer.As<HSAuint32*>(), 0xBBBBBBBB));

    /* Clean up */
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(toShareLocalBuffer));
    EXPECT_SUCCESS(sdmaQueue.Destroy());
}

/* Test IPC memory.
 * 1. Parent Process [Create/Fill] LocalMemory (LM) --share--> Child Process
 * 2. Child Process import LM and check for the pattern.
 * 3. Child Process fill in a new pattern and quit.
 * 4. Parent Process wait for the Child process to finish and then check for
 * the new pattern in LM
 *
 * IPC support is limited to Local Memory.
 */

TEST_F(KFDIPCTest, BasicTest) {
    TEST_START(TESTPROFILE_RUNALL)

    const std::vector<int>& GpuNodes = m_NodeInfo.GetNodesWithGPU();
    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    int pipefd[2];
    HsaMemFlags mflags = {0};

    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    if (!GetVramSize(defaultGPUNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    /* Test libhsakmt fork() clean up by defining some buffers. These
     * buffers gets duplicated in the child process but not are not valid
     * as it doesn't have proper mapping in GPU. The clean up code in libhsakmt
     * should handle it
     */
    volatile HSAuint32 stackData[1];
    HsaMemoryBuffer tmpSysBuffer(PAGE_SIZE, defaultGPUNode, false);
    HsaMemoryBuffer tmpUserptrBuffer((void *)&stackData[0], sizeof(HSAuint32));

    /* Create Pipes for communicating shared handles */
    ASSERT_EQ(pipe(pipefd), 0);

    /* Create a child process and share the above Local Memory with it */
    mflags.ui32.NonPaged = 1;
    mflags.ui32.CoarseGrain = 1;

    m_ChildPid = fork();
    if (m_ChildPid == 0)
        BasicTestChildProcess(defaultGPUNode, pipefd, mflags); /* Child Process */
    else
        BasicTestParentProcess(defaultGPUNode, m_ChildPid, pipefd, mflags); /* Parent proces */

    /* Code path executed by both parent and child with respective fds */
    close(pipefd[1]);
    close(pipefd[0]);

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDIPCTest.hpp
================================================
/*
 * Copyright (C) 2017-2018 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDBaseComponentTest.hpp"
#include "BaseQueue.hpp"

#ifndef __KFD_MEMORY_TEST__H__
#define __KFD_MEMORY_TEST__H__

#define CMA_MEMORY_TEST_ARRAY_SIZE 4
#define CMA_TEST_COUNT 3

enum CMA_MEM_TYPE {
    CMA_MEM_TYPE_SYSTEM = 0,
    CMA_MEM_TYPE_USERPTR,
    CMA_MEM_TYPE_LOCAL_MEM,
};

enum CMA_TEST_TYPE {
    CMA_READ_TEST = 0,
    CMA_WRITE_TEST
};

enum CMA_TEST_STATUS {
    CMA_TEST_SUCCESS = 0,
    CMA_IPC_PIPE_ERROR = 1,
    CMA_CHECK_PATTERN_ERROR,
    CMA_TEST_ABORT,
    CMA_TEST_NOMEM,
    CMA_PARENT_FAIL,
    CMA_TEST_HSA_READ_FAIL,
    CMA_TEST_HSA_WRITE_FAIL
};

/* @struct testMemoryDescriptor
 * @brief Describes test buffers for Cross Memory Attach Test.
 */
struct testMemoryDescriptor {
    CMA_MEM_TYPE m_MemType;
    HSAuint64 m_MemSize;
    /* The buffer will be initialized with this pattern */
    HSAuint32 m_FillPattern;
    /* After CMA test, this pattern is expected in the first word */
    HSAuint32 m_CheckFirstWordPattern;
    /* After CMA test, this pattern is expected in the last word */
    HSAuint32 m_CheckLastWordPattern;

    testMemoryDescriptor(CMA_MEM_TYPE memType, HSAuint64 memSize,
        HSAuint32 fillPattern, HSAuint32 firstCheckPattern,
        HSAuint32 lastCheckPattern) :
        m_MemType(memType),
        m_MemSize(memSize),
        m_FillPattern(fillPattern),
        m_CheckFirstWordPattern(firstCheckPattern),
        m_CheckLastWordPattern(lastCheckPattern) {}
    ~testMemoryDescriptor(){}
};

/* @class KFDCMAArray
 * @brief Array of buffers that will be passed between the parent and child
 *        process for Cross memory read and write tests
 */
class KFDCMAArray {
    /* Used to store the actual buffer array */
    HsaMemoryBuffer* m_MemArray[CMA_MEMORY_TEST_ARRAY_SIZE];
    /* Used for passing to thunk CMA functions */
    HsaMemoryRange m_HsaMemoryRange[CMA_MEMORY_TEST_ARRAY_SIZE];
    /* Though previous arrays are fixed sizes only m_ValidCount
     * ones are valid
     */
    HSAuint64 m_ValidCount;
    QueueArray m_QueueArray;

 public:
    KFDCMAArray();
    ~KFDCMAArray() {
        Destroy();
    }

    CMA_TEST_STATUS Init(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE], int node);
    CMA_TEST_STATUS Destroy();

    HsaMemoryRange*  getMemoryRange() { return m_HsaMemoryRange; }
    HSAuint64 getValidRangeCount() { return m_ValidCount; }
    void FillPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]);
    CMA_TEST_STATUS checkPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]);
    CMA_TEST_STATUS sendCMAArray(int writePipe);
    CMA_TEST_STATUS recvCMAArray(int readPipe);
};


// @class KFDIPCTest
class KFDIPCTest :  public KFDBaseComponentTest {
 public:
    KFDIPCTest(void) : m_ChildPid(-1) {}
    ~KFDIPCTest(void);
 protected:
    virtual void SetUp();
    virtual void TearDown();

    /* For IPC testing */
    void BasicTestChildProcess(int defaultGPUNode, int *pipefd, HsaMemFlags mflags);
    void BasicTestParentProcess(int defaultGPUNode, pid_t childPid, int *pipefd, HsaMemFlags mflags);

    /* For CMA testing */
    CMA_TEST_STATUS CrossMemoryAttachChildProcess(int defaultGPUNode, int writePipe,
                                                  int readPipe, CMA_TEST_TYPE testType);
    CMA_TEST_STATUS CrossMemoryAttachParentProcess(int defaultGPUNode, pid_t cid,
                                                   int writePipe, int readPipe, CMA_TEST_TYPE testType);
 protected:
    pid_t m_ChildPid;
};

#endif  // __KFD_MEMORY_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDLocalMemoryTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDLocalMemoryTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"

void KFDLocalMemoryTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDLocalMemoryTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

static void AccessLocalMem(KFDTEST_PARAMETERS* pTestParamters) {

    /* Skip test if not on dGPU path, which the test depends on */
    if (!hsakmt_is_dgpu()) {
        LOG() << "Not dGPU path, skipping the test" << std::endl;
        return;
    }

    int gpuNode = pTestParamters->gpuNode;

    //local memory
    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false, true);
    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    PM4Queue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0, 0));

    queue.Wait4PacketConsumption(event);

    hsaKmtDestroyEvent(event);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}

TEST_F(KFDLocalMemoryTest, AccessLocalMem) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(AccessLocalMem));

    TEST_END
}

static void BasicTest(KFDTEST_PARAMETERS* pTestParamters) {

    PM4Queue queue;
    HSAuint64 AlternateVAGPU;
    unsigned int BufferSize = PAGE_SIZE;
    HsaMemMapFlags mapFlags = {0};

    int gpuNode = pTestParamters->gpuNode;
    KFDLocalMemoryTest* pKFDLocalMemoryTest = (KFDLocalMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDLocalMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer srcSysBuffer(BufferSize, gpuNode, false);
    HsaMemoryBuffer destSysBuffer(BufferSize, gpuNode);
    HsaMemoryBuffer srcLocalBuffer(BufferSize, gpuNode, false, true);
    HsaMemoryBuffer dstLocalBuffer(BufferSize, gpuNode, false, true);

    srcSysBuffer.Fill(0x01010101);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPUNodes(srcLocalBuffer.As<void*>(), srcLocalBuffer.Size(), &AlternateVAGPU,
                       mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode)), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPUNodes(dstLocalBuffer.As<void*>(), dstLocalBuffer.Size(), &AlternateVAGPU,
                       mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode)), gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    queue.SetSkipWaitConsump(0);

    Dispatch dispatch(isaBuffer);

    dispatch.SetArgs(srcSysBuffer.As<void*>(), srcLocalBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    dispatch.SetArgs(srcLocalBuffer.As<void*>(), dstLocalBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    dispatch.SetArgs(dstLocalBuffer.As<void*>(), destSysBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    ASSERT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(srcLocalBuffer.As<void*>()), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(dstLocalBuffer.As<void*>()), gpuNode);
    EXPECT_EQ_GPU(destSysBuffer.As<unsigned int*>()[0], 0x01010101, gpuNode);

}

TEST_F(KFDLocalMemoryTest, BasicTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(BasicTest));

    TEST_END
}

static void VerifyContentsAfterUnmapAndMap(KFDTEST_PARAMETERS* pTestParamters)
{
    PM4Queue queue;
    HSAuint64 AlternateVAGPU;
    unsigned int BufferSize = PAGE_SIZE;
    HsaMemMapFlags mapFlags = {0};

    int gpuNode = pTestParamters->gpuNode;
    KFDLocalMemoryTest* pKFDLocalMemoryTest = (KFDLocalMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDLocalMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer SysBufferA(BufferSize, gpuNode, false);
    HsaMemoryBuffer SysBufferB(BufferSize, gpuNode, true);
    HsaMemoryBuffer LocalBuffer(BufferSize, gpuNode, false, true);

    SysBufferA.Fill(0x01010101);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    queue.SetSkipWaitConsump(0);

    if (!hsakmt_is_dgpu())
        ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPUNodes(LocalBuffer.As<void*>(), LocalBuffer.Size(), &AlternateVAGPU,
                           mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode)), gpuNode);

    Dispatch dispatch(isaBuffer);

    dispatch.SetArgs(SysBufferA.As<void*>(), LocalBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(LocalBuffer.As<void*>()), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtMapMemoryToGPUNodes(LocalBuffer.As<void*>(), LocalBuffer.Size(), &AlternateVAGPU,
                       mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode)), gpuNode);

    dispatch.SetArgs(LocalBuffer.As<void*>(), SysBufferB.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
    EXPECT_EQ_GPU(SysBufferB.As<unsigned int*>()[0], 0x01010101, gpuNode);
    if (!hsakmt_is_dgpu())
        EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(LocalBuffer.As<void*>()), gpuNode);
}

TEST_F(KFDLocalMemoryTest, VerifyContentsAfterUnmapAndMap) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(VerifyContentsAfterUnmapAndMap));

    TEST_END
}

/* Deliberately fragment GPUVM aperture to fill up address space
 *
 * General idea: Allocate buffers, but don't map them to GPU. This
 * will reserve virtual address space without pinning physical
 * memory. It should allow using more address space than physically
 * available memory.
 *
 * Even without pinning memory, TTM will still commit memory at
 * allocation time and swap out movable buffers to system memory or
 * even the hard drive, if it needs to. So we can't allocate arbitrary
 * amounts of virtual memory.
 *
 * Strategy to maximize the amount of allocated, fragmented address
 * space while keeping the amount of committed memory bounded at all
 * times:
 *
 * 1. Allocate N blocks of a given size, initially 1 page
 * 2. Free every other block, creating holes in the address space.
 *    This frees up half the memory
 * 3. Allocate N/4 blocks of 2-pages each. This requires as much
 *    memory as was freed in step 2. The block size is bigger than
 *    the 1-page holes, so new address space will be used.
 * 4. Free half the blocks just allocated, and half of the
 *    remaining blocks of step 1. This creates 3-page holes between
 *    the 1-page blocks from step 1, and 2-page holes between the
 *    2-page blocks from step 3. It frees up half of the total
 *    memory.
 * 5. Double the block size to 4, devide number of blocks by 2.
 *    Again, this will require the amount of memory freed in step 4.
 *    The block size 4 is bigger than the biggest hole (3 pages).
 * 6. Free half the memory again, creating 7-page holes between
 *    1-page blocks, 6-page holes between 2-page blocks, and 4-page
 *    holes between 4-page blocks.
 *
 * Repeat, doubling block size and halving number of blocks in each
 * iteration. Each iteration starts and ends with half the total
 * memory free. Because the block size is always bigger than the
 * biggest hole, each iteration increases the amount of address space
 * occupied by half the total memory size. Once the block size reaches
 * half of the free memory (1/4 of total memory) the limit is reached.
 *
 * With 2^n pages available memory, n * 2^(n-1) pages of address space
 * can be reserved. At the end of that process, half the memory will
 * be free.
 *
 *     Total memory     | Fragmented address space
 * order | pages | size | pages |  size | ratio
 * ------+-------+------+-------+-------+-------
 *     2 |    4  |  16K |    4  |   16K |   1
 *     3 |    8  |  32K |   12  |   48K |   1.5
 *     4 |   16  |  64K |   32  |  128K |   2
 *     5 |   32  | 128K |   80  |  320K |   2.5
 *     6 |   64  | 256K |  192  |  768K |   3
 *     7 |  128  | 512K |  448  | 1.75M |   3.5
 *     8 |  256  |   1M |    1M |    4M |   4
 *     9 |  512  |   2M | 2.25M |    9M |   4.5
 *    10 |    1K |   4M |    5M |   20M |   5
 *    11 |    2K |   8M |   11M |   44M |   5.5
 *    12 |    4K |  16M |   24M |   96M |   6
 *    13 |    8K |  32M |   52M |  208M |   6.5
 *    14 |   16K |  64M |  112M |  448M |   7
 *    15 |   32K | 128M |  240M |  960M |   7.5
 *    16 |   64K | 256M |  512M |    2G |   8
 *    17 |  128K | 512M | 1088M | 4.25G |   8.5
 *    18 |  256K |   1G | 2.25G |    9G |   9
 *    19 |  512K |   2G | 4.75G |   19G |   9.5
 *    20 |    1M |   4G |   10G |   40G |  10
 */

static void Fragmentation(KFDTEST_PARAMETERS* pTestParamters){

    int gpuNode = pTestParamters->gpuNode;
    KFDLocalMemoryTest* pKFDLocalMemoryTest = (KFDLocalMemoryTest*)pTestParamters->pTestObject;

    HSAuint64 fbSize;

    fbSize = pKFDLocalMemoryTest->GetVramSize(gpuNode);

    if (!fbSize) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    } else {
        LOG() << "Found VRAM of " << std::dec << (fbSize >> 20) << "MB." << std::endl;
    }

    /* Use up to half of available memory. Using more results in
     * excessive memory movement in TTM and slows down the test too
     * much. maxOrder is the size of the biggest block that will be
     * allocated. It's 1/4 of the usable memory, so 1/8 the total FB
     * size in pages.
     *
     * Use 8x bigger page size on dGPU to match Tonga alignment
     * workaround. Also nicely matches the 8x bigger GPUVM address
     * space on AMDGPU compared to RADEON.
     */
    unsigned pageSize = hsakmt_is_dgpu() ? PAGE_SIZE*8 : PAGE_SIZE;
    fbSize /= pageSize;
    unsigned maxOrder = 0;
    // Limit maxOrder up to 14 so this test doesn't run longer than 10 mins
    while (((fbSize >> maxOrder) >= 16) && (maxOrder < 14))
        maxOrder++;

    /* Queue and memory used by the shader copy tests */
    HsaMemoryBuffer sysBuffer(PAGE_SIZE, gpuNode, false);
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    /* instantiate Assembler for gpuNode */
    HsaNodeInfo* m_NodeInfo = pKFDLocalMemoryTest->Get_NodeInfo();
    const HsaNodeProperties *nodeProperties = m_NodeInfo->GetNodeProperties(gpuNode);
    Assembler* m_pAsm = new Assembler(GetGfxVersion(nodeProperties));

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);
   /* not need assember now */
    if (m_pAsm)
        delete m_pAsm;

    /* Allocate and test memory using the strategy explained at the top */
    HSAKMT_STATUS status;
    HsaMemFlags memFlags = {0};
    HsaMemMapFlags mapFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    memFlags.ui32.HostAccess = 0;
    memFlags.ui32.NonPaged = 1;
    struct {
        void **pointers;
        unsigned long nPages;
    } pages[maxOrder+1];
    unsigned order, o;
    unsigned long p;
    HSAuint64 size;
    unsigned value = 0;
    memset(pages, 0, sizeof(pages));
    for (order = 0; order <= maxOrder; order++) {
        // At maxOrder, block size is 1/4 of available memory
        pages[order].nPages = 1UL << (maxOrder - order + 2);
        // At order != 0, 1/2 the memory is already allocated
        if (order > 0)
            pages[order].nPages >>= 1;
        // Allocate page pointers
        pages[order].pointers = new void *[pages[order].nPages];
        EXPECT_NE_GPU((void **)NULL, pages[order].pointers, gpuNode)
            << "Couldn't allocate memory for " << pages[order].nPages
            << " pointers at order " << order << std::endl;
        if (!pages[order].pointers) {
            pages[order].nPages = 0;
            break;
        }
        /* Allocate buffers and access the start and end of every one:
         * 1. Copy from sysBuffer[0] to start of block
         * 2. Copy from start of block to end of block
         * 3. Copy from end of block to sysBuffer[1]
         * 4. Compare results */
        size = (HSAuint64)(1 << order) * pageSize;
        LOG() << std::dec << "Trying to allocate " << pages[order].nPages
              << " order " << order << " blocks " << std::endl;
        for (p = 0; p < pages[order].nPages; p++) {
            status = hsaKmtAllocMemory(gpuNode, size,
                                       memFlags, &pages[order].pointers[p]);
            if (status != HSAKMT_STATUS_SUCCESS) {
                EXPECT_EQ_GPU(HSAKMT_STATUS_NO_MEMORY, status, gpuNode);
                pages[order].nPages = p;
                break;
            }

            void *bufferEnd = reinterpret_cast<void *>(reinterpret_cast<unsigned long>(pages[order].pointers[p])
                                       + size - sizeof(unsigned));
            sysBuffer.As<unsigned *>()[0] = ++value;

            status = hsaKmtMapMemoryToGPUNodes(pages[order].pointers[p], size, NULL,
                               mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode));
            if (status != HSAKMT_STATUS_SUCCESS) {
                ASSERT_SUCCESS_GPU(hsaKmtFreeMemory(pages[order].pointers[p],
                                                size), gpuNode);
                pages[order].nPages = p;
                break;
            }
            Dispatch dispatch1(isaBuffer);
            dispatch1.SetArgs(sysBuffer.As<void*>(), pages[order].pointers[p]);
            dispatch1.Submit(queue);
            // no sync needed for multiple GPU dispatches to the same queue

            Dispatch dispatch2(isaBuffer);
            dispatch2.SetArgs(pages[order].pointers[p], bufferEnd);
            dispatch2.Submit(queue);
            // no sync needed for multiple GPU dispatches to the same queue

            Dispatch dispatch3(isaBuffer);
            dispatch3.SetArgs(bufferEnd,
                              reinterpret_cast<void *>(&(sysBuffer.As<unsigned*>()[1])));
            dispatch3.Submit(queue);
            dispatch3.Sync(g_TestTimeOut);
            EXPECT_EQ_GPU(value, sysBuffer.As<unsigned *>()[1], gpuNode);

            EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(pages[order].pointers[p]), gpuNode);
        }
        LOG() << "  Got " << pages[order].nPages
              << ", end of last block addr: "
              << reinterpret_cast<void *>(reinterpret_cast<unsigned long>(pages[order].pointers[p-1]) + size - 1)
              << std::endl;

        // Now free half the memory
        for (o = 0; o <= order; o++) {
            unsigned long step = 1UL << (order - o + 1);
            unsigned long offset = (step >> 1) - 1;
            size = (HSAuint64)(1 << o) * pageSize;
            LOG() << "  Freeing every " << step << "th order "
                  << o << " block starting with " << offset << std::endl;
            for (p = offset; p < pages[o].nPages; p += step) {
                ASSERT_NE_GPU((void **)NULL, pages[o].pointers[p], gpuNode);
                EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pages[o].pointers[p], size), gpuNode);
                pages[o].pointers[p] = NULL;
            }
        }
    }

    /* Clean up */
    for (order = 0; order <= maxOrder; order++) {
        if (pages[order].pointers == NULL)
            continue;

        size = (HSAuint64)(1 << order) * pageSize;
        for (p = 0; p < pages[order].nPages; p++)
            if (pages[order].pointers[p] != NULL)
                EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pages[order].pointers[p], size), gpuNode);

        delete[] pages[order].pointers;
    }

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDLocalMemoryTest, DISABLED_Fragmentation) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(Fragmentation));

    TEST_END
}

static void CheckZeroInitializationVram(KFDTEST_PARAMETERS* pTestParamters){

    int gpuNode = pTestParamters->gpuNode;
    KFDLocalMemoryTest* pKFDLocalMemoryTest = (KFDLocalMemoryTest*)pTestParamters->pTestObject;

    /* Testing VRAM */
    HSAuint64 vramSizeMB = pKFDLocalMemoryTest->GetVramSize(gpuNode) >> 20;

   if (!vramSizeMB) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
   }

    HSAuint64 vramBufSizeMB = vramSizeMB >> 2;
    /* limit the buffer size in order not to overflow the SDMA queue buffer. */
    if (vramBufSizeMB > 1024) {
        vramBufSizeMB = 1024;
    }
    HSAuint64 vramBufSize = vramBufSizeMB * 1024 * 1024;

    /* Make sure the entire VRAM is used at least once */
    int count = (vramSizeMB + vramBufSizeMB - 1) / vramBufSizeMB + 1;

    LOG() << "Using " << std::dec << vramBufSizeMB
            << "MB VRAM buffer to test " << std::dec << count
            << " times"<< std::endl;

    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode, 8 * PAGE_SIZE), gpuNode);

    HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
    volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();

    unsigned int offset = 2060;  // a constant offset, should be 4 aligned.

    while (count--) {
        HsaMemoryBuffer localBuffer(vramBufSize, gpuNode, false, true);

        EXPECT_TRUE_GPU(localBuffer.IsPattern(0, 0, sdmaQueue, tmp), gpuNode);

        for (HSAuint64 i = offset; i < vramBufSize;) {
            EXPECT_TRUE_GPU(localBuffer.IsPattern(i, 0, sdmaQueue, tmp), gpuNode);
            i += 4096;
        }

        /* Checking last 4 bytes */
        EXPECT_TRUE_GPU(localBuffer.IsPattern(vramBufSize - 4, 0, sdmaQueue, tmp), gpuNode);

        localBuffer.Fill(0xABCDEFFF, sdmaQueue);
    }

}

TEST_F(KFDLocalMemoryTest, CheckZeroInitializationVram) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CheckZeroInitializationVram));

    TEST_END
}

TEST_F(KFDLocalMemoryTest, MapVramToGPUNodesTest) {
    TEST_START(TESTPROFILE_RUNALL);

    HSAint32 src_node;
    HSAint32 dst_node;
    HsaPointerInfo info;

    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: Test requires at least two GPUs." << std::endl;
        return;
    }

    if (g_TestDstNodeId != -1 && g_TestNodeId != -1) {
        src_node = g_TestNodeId;
        dst_node = g_TestDstNodeId;
    } else {
        int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();

        dst_node = m_NodeInfo.FindLargeBarGPUNode();
        if (dst_node < 0) {
            LOG() << "Skipping test: Test requires at least one large bar GPU." << std::endl;
            return;
        }

        if (dst_node != defaultGPUNode) {
            /* At least one node should be defaultGPUNode */
            src_node = defaultGPUNode;
        } else {
            for (auto node : gpuNodes) {
                if (node != dst_node) {
                    src_node = node;
                    break;
                }
            }
        }
    }

    if (!m_NodeInfo.IsPeerAccessibleByNode(dst_node, src_node)) {
        LOG() << "Skipping test: GPUs are not peer-accessible" << std::endl;
        return;
    }

    LOG() << "Testing from GPU " << src_node << " to GPU " << dst_node << std::endl;

    void *shared_addr;
    HSAuint32 nodes[] = { (HSAuint32)src_node, (HSAuint32)dst_node };
    HsaMemFlags memFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    memFlags.ui32.HostAccess = 1;
    memFlags.ui32.NonPaged = 1;
    memFlags.ui32.ExecuteAccess = 1;

    HsaMemMapFlags mapFlags = {0};

    EXPECT_SUCCESS(hsaKmtAllocMemory(nodes[1], PAGE_SIZE, memFlags, &shared_addr));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 2, nodes));
    EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
    EXPECT_EQ(info.NMappedNodes, 2);

    EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[0]));
    EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
    EXPECT_EQ(info.NMappedNodes, 1);
    EXPECT_EQ(info.MappedNodes[0], nodes[0]);

    EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[1]));
    EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
    EXPECT_EQ(info.NMappedNodes, 1);
    EXPECT_EQ(info.MappedNodes[0], nodes[1]);

    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(shared_addr));
    EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
    EXPECT_EQ(info.NMappedNodes, 0);

    EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(shared_addr, PAGE_SIZE, NULL, mapFlags, 1, &nodes[0]));
    EXPECT_SUCCESS(hsaKmtQueryPointerInfo(shared_addr, &info));
    EXPECT_EQ(info.NMappedNodes, 1);
    EXPECT_EQ(info.MappedNodes[0], nodes[0]);

    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(shared_addr));
    EXPECT_SUCCESS(hsaKmtFreeMemory(shared_addr, PAGE_SIZE));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDLocalMemoryTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_LOCALMEMORY_TEST__H__
#define __KFD_LOCALMEMORY_TEST__H__

#include <gtest/gtest.h>

#include "KFDBaseComponentTest.hpp"

class KFDLocalMemoryTest : public KFDBaseComponentTest {
 public:
    KFDLocalMemoryTest() {}
    ~KFDLocalMemoryTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFD_LOCALMEMORY_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDMemoryTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDMemoryTest.hpp"
#include <sys/prctl.h>
#include <sys/ptrace.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <signal.h>
#include <numa.h>
#include <vector>
#include "Dispatch.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAQueue.hpp"
#include "SDMAPacket.hpp"
#include "hsakmt/linux/kfd_ioctl.h"

/* Captures user specified time (seconds) to sleep */
extern unsigned int g_SleepTime;

static pthread_mutex_t ptrace_mtx;

void KFDMemoryTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDMemoryTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

#include <sys/mman.h>
#define GB(x) ((x) << 30)

/*
 * Try to map as much as possible system memory to gpu
 * to see if KFD supports 1TB memory correctly or not.
 * After this test case, we can observe if there are any side effects.
 * NOTICE: There are memory usage limit checks in hsa/kfd according to the total
 * physical system memory.
 */
static void MMapLarge(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Test not supported on APU." << std::endl;
        return;
    }

	const HSAuint64 nObjects = 1<<14;
    HSAuint64 *AlternateVAGPU = new HSAuint64[nObjects];
    ASSERT_NE_GPU((HSAuint64)AlternateVAGPU, 0, gpuNode);
    HsaMemMapFlags mapFlags = {0};
    HSAuint64 s;
    char *addr;
    HSAuint64 flags = MAP_ANONYMOUS | MAP_PRIVATE;

    /* Test up to 1TB memory*/
    s = GB(1024ULL) / nObjects;
    addr = reinterpret_cast<char*>(mmap(0, s, PROT_READ | PROT_WRITE, flags, -1, 0));
    ASSERT_NE_GPU(addr, MAP_FAILED, gpuNode);
    memset(addr, 0, s);

    int i = 0;
    /* Allocate 1024GB, aka 1TB*/
    for (; i < nObjects; i++) {

        /* Code snippet to allow CRIU checkpointing */
        if (i == (1 << 6)) {
            if (g_SleepTime > 0) {
                LOG() << "Pause for: " << g_SleepTime << " seconds" <<  std::endl;
                sleep(g_SleepTime);
            }
        }

        if (hsaKmtRegisterMemory(addr + i, s - i))
            break;
        if (hsaKmtMapMemoryToGPUNodes(addr + i, s - i,
                    &AlternateVAGPU[i], mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode))) {
            hsaKmtDeregisterMemory(addr + i);
            break;
        }
    }

    LOG() << "Successfully registered and mapped " << (i * s >> 30)
            << "GB system memory to gpu" << std::endl;

    RECORD(i * s >> 30) << "Mmap-SysMem-Size";

    while (i--) {
        EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void*>(AlternateVAGPU[i])), gpuNode);
        EXPECT_SUCCESS_GPU(hsaKmtDeregisterMemory(reinterpret_cast<void*>(AlternateVAGPU[i])), gpuNode);
    }

    munmap(addr, s);
    delete []AlternateVAGPU;

}

TEST_F(KFDMemoryTest, MMapLarge) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(MMapLarge));

    TEST_END
}

/* Keep memory mapped to default node
 * Keep mapping/unmapping memory to/from non-default node
 * A shader running on default node consistantly accesses
 * memory - make sure memory is always accessible by default,
 * i.e. there is no gpu vm fault.
 * Synchronization b/t host program and shader:
 * 1. Host initializes src and dst buffer to 0
 * 2. Shader keeps reading src buffer and check value
 * 3. Host writes src buffer to 0x5678 to indicate quit, polling dst until it becomes 0x5678
 * 4. Shader write dst buffer to 0x5678 after src changes to 0x5678, then quits
 * 5. Host program quits after dst becomes 0x5678
 * Need at least two gpu nodes to run the test. The default node has to be a gfx9 node,
 * otherwise, test is skipped. Use kfdtest --node=$$ to specify the default node
 * This test case is introduced as a side-result of investigation of SWDEV-134798, which
 * is a gpu vm fault while running rocr conformance test. Here we try to simulate the
 * same test behaviour.
 */
static void MapUnmapToNodes(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;
	HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId < FAMILY_AI) {
        LOG() << "Skipping test: Test requires gfx9 and later asics." << std::endl;
        return;
    }

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    const std::vector<int> gpuNodes = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU();
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: At least two GPUs are required." << std::endl;
        return;
    }

    HSAuint32 nondefaultNode;
    for (unsigned i = 0; i < gpuNodes.size(); i++) {
        if (gpuNodes.at(i) != gpuNode) {
            nondefaultNode = gpuNodes.at(i);
            break;
        }
    }
    HSAuint32 mapNodes[2] = {HSAuint32(gpuNode), nondefaultNode};

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer srcBuffer(PAGE_SIZE, gpuNode);
    HsaMemoryBuffer dstBuffer(PAGE_SIZE, gpuNode);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As<char*>()), gpuNode);

    PM4Queue pm4Queue;
    ASSERT_SUCCESS_GPU(pm4Queue.Create(gpuNode), gpuNode);

    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
    dispatch0.Submit(pm4Queue);

    HsaMemMapFlags memFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    memFlags.ui32.HostAccess = 1;

    for (unsigned i = 0; i < 1<<14; i ++) {
        hsaKmtMapMemoryToGPUNodes(srcBuffer.As<void*>(), PAGE_SIZE, NULL, memFlags, (i>>5)&1+1, mapNodes);
    }

    /* Fill src buffer so shader quits */
    srcBuffer.Fill(0x5678);
    WaitOnValue(dstBuffer.As<uint32_t *>(), 0x5678);
    EXPECT_EQ_GPU(*dstBuffer.As<uint32_t *>(), 0x5678, gpuNode);
    EXPECT_SUCCESS_GPU(pm4Queue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, MapUnmapToNodes) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(MapUnmapToNodes));

    TEST_END
}

// Basic test of hsaKmtMapMemoryToGPU and hsaKmtUnmapMemoryToGPU
static void MapMemoryToGPU(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    unsigned int *nullPtr = NULL;
    unsigned int* pDb = NULL;

    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode /* system */, PAGE_SIZE, pKFDMemoryTest->GetHsaMemFlags(),
                   reinterpret_cast<void**>(&pDb)), gpuNode);
    // verify that pDb is not null before it's being used
    ASSERT_NE_GPU(nullPtr, pDb, gpuNode) << "hsaKmtAllocMemory returned a null pointer";
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(pDb), gpuNode);
    // Release the buffers
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb, PAGE_SIZE), gpuNode);
}

TEST_F(KFDMemoryTest, MapMemoryToGPU) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(MapMemoryToGPU));

    TEST_END
}


// Following tests are for hsaKmtAllocMemory with invalid params
TEST_F(KFDMemoryTest, InvalidMemoryPointerAlloc) {
    TEST_START(TESTPROFILE_RUNALL)

    m_MemoryFlags.ui32.NoNUMABind = 1;
    EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, NULL));

    TEST_END
}

TEST_F(KFDMemoryTest, ZeroMemorySizeAlloc) {
    TEST_START(TESTPROFILE_RUNALL)

    unsigned int* pDb = NULL;
    EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtAllocMemory(0 /* system */, 0, m_MemoryFlags,
              reinterpret_cast<void**>(&pDb)));

    TEST_END
}

// Basic test for hsaKmtAllocMemory
TEST_F(KFDMemoryTest, MemoryAlloc) {
    TEST_START(TESTPROFILE_RUNALL)

    unsigned int* pDb = NULL;
    m_MemoryFlags.ui32.NoNUMABind = 1;
    EXPECT_SUCCESS(hsaKmtAllocMemory(0 /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast<void**>(&pDb)));

    TEST_END
}

// Basic test for hsaKmtAllocMemory
static void MemoryAllocAll(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    HsaMemFlags memFlags = {0};
    memFlags.ui32.NonPaged = 1; // sys mem vs vram
    HSAuint64 available;

    if (pKFDMemoryTest->Get_Version()->KernelInterfaceMinorVersion < 9) {
        LOG() << "Available memory IOCTL not present in KFD. Exiting." << std::endl;
        return;
    }

    void *object = NULL;
    int shrink = 21, success = HSAKMT_STATUS_NO_MEMORY;
    EXPECT_SUCCESS_GPU(hsaKmtAvailableMemory(gpuNode, &available), gpuNode);
    LOG() << "Available: " << available << " bytes" << std::endl;
    HSAuint64 leeway = (10 << shrink), size = available + leeway;
    for (int i = 0; i < available >> shrink; i++) {
        if (hsaKmtAllocMemory(gpuNode, size, memFlags, &object) == HSAKMT_STATUS_SUCCESS) {
            success = hsaKmtFreeMemory(object, available);
            break;
        }
        size -= (1 << shrink);
    }
    if (success == HSAKMT_STATUS_SUCCESS) {
        LOG() << "Allocated: " << size << " bytes" << std::endl;
        if (size > available + leeway) {
            LOG() << "Under-reported available memory!" << std::endl;
        }
        if (size < available - leeway) {
            LOG() << "Over-reported available memory!" << std::endl;
        }
    }
    EXPECT_SUCCESS_GPU(success, gpuNode);
}

TEST_F(KFDMemoryTest, MemoryAllocAll) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(MemoryAllocAll));

    TEST_END
}

static void AccessPPRMem(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    if (hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Test requires APU." << std::endl;
        return;
    }

    unsigned int *destBuf = (unsigned int *)VirtualAllocMemory(NULL, PAGE_SIZE,
                                            MEM_READ | MEM_WRITE);

    PM4Queue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf,
                                0xABCDEF09, 0x12345678));

    queue.Wait4PacketConsumption(event);

    WaitOnValue(destBuf, 0xABCDEF09);
    WaitOnValue(destBuf + 1, 0x12345678);

    hsaKmtDestroyEvent(event);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    /* This sleep hides the dmesg PPR message storm on Raven, which happens
     * when the CPU buffer is freed before the excessive PPRs are all
     * consumed by IOMMU HW. Because of that, a kernel driver workaround
     * is put in place to address that, so we don't need to wait here.
     */
    // sleep(5);

    VirtualFreeMemory(destBuf, PAGE_SIZE);
}

TEST_F(KFDMemoryTest, AccessPPRMem) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(AccessPPRMem));

    TEST_END
}

// Linux OS-specific Test for registering OS allocated memory
static void MemoryRegister(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

	const HsaNodeProperties *pNodeProperties = pKFDMemoryTest->Get_NodeInfo()->GetNodeProperties(gpuNode);

    /* Different unaligned memory locations to be mapped for GPU
     * access:
     *
     * - initialized data segment (file backed)
     * - stack (anonymous memory)
     *
     * Separate them enough so they are in different cache lines
     * (64-byte = 16-dword).
     */
    static volatile HSAuint32 globalData = 0xdeadbeef;
    volatile HSAuint32 stackData[17] = {0};
    const unsigned dstOffset = 0;
    const unsigned sdmaOffset = 16;

    HsaMemoryBuffer srcBuffer((void *)&globalData, sizeof(HSAuint32));
    HsaMemoryBuffer dstBuffer((void *)&stackData[dstOffset], sizeof(HSAuint32));
    HsaMemoryBuffer sdmaBuffer((void *)&stackData[sdmaOffset], sizeof(HSAuint32));

    /* Create PM4 and SDMA queues before fork+COW to test queue
     * eviction and restore
     */
    PM4Queue pm4Queue;
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(pm4Queue.Create(gpuNode), gpuNode);
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    /* First submit just so the queues are not empty, and to get the
     * TLB populated (in case we need to flush TLBs somewhere after
     * updating the page tables)
     */
    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
    dispatch0.Submit(pm4Queue);
    dispatch0.Sync(g_TestTimeOut);

    sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(), sdmaBuffer.As<HSAuint32 *>(), 0x12345678));
    sdmaQueue.Wait4PacketConsumption();
    EXPECT_TRUE_GPU(WaitOnValue(&stackData[sdmaOffset], 0x12345678), gpuNode);

    /* Fork a child process to mark pages as COW */
    pid_t pid = fork();
    ASSERT_GE_GPU(pid, 0, gpuNode);
    if (pid == 0) {
        /* Child process waits for a SIGTERM from the parent. It can't
         * make any write access to the stack because we want the
         * parent to make the first write access and get a new copy. A
         * busy loop is the safest way to do that, since any function
         * call (e.g. sleep) would write to the stack.
         */
        while (1)
        {}
        WARN() << "Shouldn't get here!" << std::endl;
        exit(0);
    }

    /* Parent process writes to COW page(s) and gets a new copy. MMU
     * notifier needs to update the GPU mapping(s) for the test to
     * pass.
     */
    globalData = 0xD00BED00;
    stackData[dstOffset] = 0xdeadbeef;
    stackData[sdmaOffset] = 0xdeadbeef;

    /* Terminate the child process before a possible test failure that
     * would leave it spinning in the background indefinitely.
     */
    int status;
    EXPECT_EQ_GPU(0, kill(pid, SIGTERM), gpuNode);
    EXPECT_EQ_GPU(pid, waitpid(pid, &status, 0), gpuNode);
    EXPECT_NE_GPU(0, WIFSIGNALED(status), gpuNode);
    EXPECT_EQ_GPU(SIGTERM, WTERMSIG(status), gpuNode);

    /* Now check that the GPU is accessing the correct page */
    Dispatch dispatch1(isaBuffer);
    dispatch1.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
    dispatch1.Submit(pm4Queue);
    dispatch1.Sync(g_TestTimeOut);

    sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(), sdmaBuffer.As<HSAuint32 *>(), 0xD0BED0BE));
    sdmaQueue.Wait4PacketConsumption();

    EXPECT_SUCCESS_GPU(pm4Queue.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);

    EXPECT_EQ_GPU(0xD00BED00, globalData, gpuNode);
    EXPECT_EQ_GPU(0xD00BED00, stackData[dstOffset], gpuNode);
    EXPECT_EQ_GPU(0xD0BED0BE, stackData[sdmaOffset], gpuNode);
}

TEST_F(KFDMemoryTest, MemoryRegister) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(MemoryRegister));

    TEST_END
}

static void MemoryRegisterSamePtr(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

	HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Will run on APU once APU+dGPU supported." << std::endl;
        return;
    }

    const std::vector<int> gpuNodes = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU();
    HSAuint64 nGPU = gpuNodes.size();  // number of gpu nodes
    static volatile HSAuint32 mem[4];
    HSAuint64 gpuva1, gpuva2;

    /* Same address, different size */
    EXPECT_SUCCESS(hsaKmtRegisterMemory((void *)&mem[0], sizeof(HSAuint32)*2));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPU((void *)&mem[0], sizeof(HSAuint32)*2,
                                        &gpuva1));
    EXPECT_SUCCESS(hsaKmtRegisterMemory((void *)&mem[0], sizeof(HSAuint32)));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPU((void *)&mem[0], sizeof(HSAuint32),
                                        &gpuva2));
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva1)));
    EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva1)));
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva2)));
    EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva2)));

    /* Same address, same size */
    HsaMemMapFlags memFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    memFlags.ui32.HostAccess = 1;

    HSAuint32 nodes[nGPU];
    for (unsigned int i = 0; i < nGPU; i++)
        nodes[i] = gpuNodes.at(i);
    EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)&mem[2],
                            sizeof(HSAuint32)*2, nGPU, nodes));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes((void *)&mem[2],
                                        sizeof(HSAuint32) * 2,
                                        &gpuva1, memFlags, nGPU, nodes));
    EXPECT_SUCCESS(hsaKmtRegisterMemoryToNodes((void *)&mem[2],
                                        sizeof(HSAuint32) * 2, nGPU, nodes));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes((void *)&mem[2],
                                        sizeof(HSAuint32) * 2,
                                        &gpuva2, memFlags, nGPU, nodes));
    EXPECT_EQ(gpuva1, gpuva2);
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva1)));
    EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva1)));
    /* Confirm that we still have access to the memory, mem[2] */
    PM4Queue queue;
    ASSERT_SUCCESS(queue.Create(gpuNode));
    mem[2] = 0x0;
    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(reinterpret_cast<unsigned int *>(gpuva2),
                                                  0xdeadbeef));
    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, 0, 0));
    queue.Wait4PacketConsumption();
    EXPECT_EQ(true, WaitOnValue((unsigned int *)(&mem[2]), 0xdeadbeef));
    EXPECT_SUCCESS(queue.Destroy());
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(gpuva2)));
    EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(gpuva2)));
}

TEST_F(KFDMemoryTest, MemoryRegisterSamePtr) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(MemoryRegisterSamePtr));

    TEST_END
}

/* FlatScratchAccess
 * Since HsaMemoryBuffer has to be associated with a specific GPU node, this function in the current form
 * will not work for multiple GPU nodes. For now test only one default GPU node.
 * TODO: Generalize it to support multiple nodes
 */

#define SCRATCH_SLICE_SIZE 0x10000
#define SCRATCH_SLICE_NUM 3
#define SCRATCH_SIZE (SCRATCH_SLICE_NUM * SCRATCH_SLICE_SIZE)
#define SCRATCH_SLICE_OFFSET(i) ((i) * SCRATCH_SLICE_SIZE)

static void FlatScratchAccess(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

	HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId == FAMILY_CI || m_FamilyId == FAMILY_KV) {
        LOG() << "Skipping test: VI-based shader not supported on other ASICs." << std::endl;
        return;
    }

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer scratchBuffer(SCRATCH_SIZE, gpuNode, false/*zero*/, false/*local*/,
                                  false/*exec*/, true /*scratch*/);

    // Unmap scratch for sub-allocation mapping tests
    ASSERT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<void*>()), gpuNode);

    // Map and unmap a few slices in different order: 2-0-1, 0-2-1
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(2),
                                        SCRATCH_SLICE_SIZE, NULL), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(0),
                                        SCRATCH_SLICE_SIZE, NULL), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(1),
                                        SCRATCH_SLICE_SIZE, NULL), gpuNode);

    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(1)), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(2)), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(scratchBuffer.As<char*>() + SCRATCH_SLICE_OFFSET(0)), gpuNode);

    // Map everything for test below
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(scratchBuffer.As<char*>(), SCRATCH_SIZE, NULL), gpuNode);

    HsaMemoryBuffer srcMemBuffer(PAGE_SIZE, gpuNode);
    HsaMemoryBuffer dstMemBuffer(PAGE_SIZE, gpuNode);

    // Initialize the srcBuffer to some fixed value
    srcMemBuffer.Fill(0x01010101);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(ScratchCopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    const HsaNodeProperties *pNodeProperties = pKFDMemoryTest->Get_NodeInfo()->GetNodeProperties(gpuNode);

    /* TODO: Add support to all GPU Nodes.
     * The loop over the system nodes is removed as the test can be executed only on GPU nodes. This
     * also requires changes to be made to all the HsaMemoryBuffer variables defined above, as
     * HsaMemoryBuffer is now associated with a Node.
     */
    if (pNodeProperties != NULL) {
        // Get the aperture of the scratch buffer
        HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
        EXPECT_SUCCESS_GPU(hsaKmtGetNodeMemoryProperties(gpuNode, pNodeProperties->NumMemoryBanks,
                       memoryProperties), gpuNode);

        for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
            if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_GPU_SCRATCH) {
                int numWaves = pNodeProperties->NumShaderBanks;  // WAVES must be >= # SE
                int waveSize = 1;  // Amount of space used by each wave in units of 256 dwords

                PM4Queue queue;
                ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

                HSAuint64 scratchApertureAddr = memoryProperties[bank].VirtualBaseAddress;

                // Create a dispatch packet to copy
                Dispatch dispatchSrcToScratch(isaBuffer);

                // Setup the dispatch packet
                // Copying from the source Memory Buffer to the scratch buffer
                dispatchSrcToScratch.SetArgs(srcMemBuffer.As<void*>(), reinterpret_cast<void*>(scratchApertureAddr));
                dispatchSrcToScratch.SetDim(1, 1, 1);
                dispatchSrcToScratch.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());
                // Submit the packet
                dispatchSrcToScratch.Submit(queue);
                dispatchSrcToScratch.Sync();

                // Create another dispatch packet to copy scratch buffer contents to destination buffer.
                Dispatch dispatchScratchToDst(isaBuffer);

                // Set the arguments to copy from the scratch buffer to the destination buffer
                dispatchScratchToDst.SetArgs(reinterpret_cast<void*>(scratchApertureAddr), dstMemBuffer.As<void*>());
                dispatchScratchToDst.SetDim(1, 1, 1);
                dispatchScratchToDst.SetScratch(numWaves, waveSize, scratchBuffer.As<uint64_t>());

                // Submit the packet
                dispatchScratchToDst.Submit(queue);
                dispatchScratchToDst.Sync();

                // Check that the scratch buffer contents were correctly copied over to the system memory buffer
                EXPECT_EQ_GPU(dstMemBuffer.As<unsigned int*>()[0], 0x01010101, gpuNode);
            }
        }

        delete [] memoryProperties;
    }
}

TEST_F(KFDMemoryTest, FlatScratchAccess) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(FlatScratchAccess));

    TEST_END
}

static void GetTileConfigTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    HSAuint32 tile_config[32] = {0};
    HSAuint32 macro_tile_config[16] = {0};
    unsigned int i;
    HsaGpuTileConfig config = {0};

    config.TileConfig = tile_config;
    config.MacroTileConfig = macro_tile_config;
    config.NumTileConfigs = 32;
    config.NumMacroTileConfigs = 16;

    ASSERT_SUCCESS(hsaKmtGetTileConfig(gpuNode, &config));

    LOG() << "tile_config:" << std::endl;
    for (i = 0; i < config.NumTileConfigs; i++)
        LOG() << "\t" << std::dec << i << ": 0x" << std::hex
                << tile_config[i] << std::endl;

    LOG() << "macro_tile_config:" << std::endl;
    for (i = 0; i < config.NumMacroTileConfigs; i++)
        LOG() << "\t" << std::dec << i << ": 0x" << std::hex
                << macro_tile_config[i] << std::endl;

    LOG() << "gb_addr_config: 0x" << std::hex << config.GbAddrConfig
            << std::endl;
    LOG() << "num_banks: 0x" << std::hex << config.NumBanks << std::endl;
    LOG() << "num_ranks: 0x" << std::hex << config.NumRanks << std::endl;
}

TEST_F(KFDMemoryTest, GetTileConfigTest) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(GetTileConfigTest));

    TEST_END
}

void SearchLargestBuffer(int allocNode, const HsaMemFlags &memFlags,
                                        HSAuint64 highMB, int nodeToMap,
                                        HSAuint64 *lastSizeMB) {
    int ret;

    HsaMemMapFlags mapFlags = {0};
    HSAuint64 granularityMB = 8;

    /* Testing big buffers in VRAM */
    unsigned int * pDb = NULL;

    highMB = (highMB + granularityMB - 1) & ~(granularityMB - 1);

    HSAuint64 sizeMB;
    HSAuint64 size = 0;

    while (highMB > granularityMB) {
        sizeMB = highMB - granularityMB;
        size = sizeMB * 1024 * 1024;
        ret = hsaKmtAllocMemory(allocNode, size, memFlags,
                                reinterpret_cast<void**>(&pDb));
        if (ret) {
            highMB = sizeMB;
            continue;
        }

        /* Code snippet to allow CRIU checkpointing */
        if (g_SleepTime > 0) {
            LOG() << "Pause for: " << g_SleepTime << " seconds" <<  std::endl;
            sleep(g_SleepTime);
        }

        ret = hsaKmtMapMemoryToGPUNodes(pDb, size, NULL,
                        mapFlags, 1, reinterpret_cast<HSAuint32 *>(&nodeToMap));
        if (ret) {
            EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb, size), nodeToMap);
            highMB = sizeMB;
            continue;
        }
        EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(pDb), nodeToMap);
        EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb, size), nodeToMap);

        if (lastSizeMB)
           *lastSizeMB = sizeMB;
        break;
    }
}

/*
 * Largest*BufferTest allocates, maps/unmaps, and frees the largest possible
 * buffers. Its size is found using binary search in the range
 * (0, RAM SIZE) with a granularity of 8M. Also, the similar logic is
 * repeated on local buffers (VRAM).
 * Please note we limit the largest possible system buffer to be smaller than
 * the RAM size. The reason is that the system buffer can make use of virtual
 * memory so that a system buffer could be very large even though the RAM size
 * is small. For example, on a typical Carrizo platform, the largest allocated
 * system buffer could be more than 14G even though it only has 4G memory.
 * In that situation, it will take too much time to finish the test because of
 * the onerous memory swap operation. So we limit the buffer size that way.
 */
static void LargestSysBufferTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
        return;
    }

    int gpuNum = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU().size();

	/* if no gpu node */
	if (gpuNum <= 0)
		return;

    HSAuint64 lastTestedSizeMB = 0;

    HSAuint64 sysMemSizeMB;
    sysMemSizeMB = pKFDMemoryTest->GetSysMemSize() >> 20;

    sysMemSizeMB/=gpuNum;

    LOG() << "Found System Memory of " << std::dec << sysMemSizeMB
                    << "MB. Using 95% of that for the test" << std::endl;

    SearchLargestBuffer(0, pKFDMemoryTest->GetHsaMemFlags(), sysMemSizeMB*0.95, gpuNode,
                    &lastTestedSizeMB);

    LOG() << "The largest allocated system buffer is " << std::dec
            << lastTestedSizeMB << "MB" << std::endl;
}

TEST_F(KFDMemoryTest, LargestSysBufferTest) {
     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	 TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(LargestSysBufferTest));

    TEST_END
}

static void LargestVramBufferTest(KFDTEST_PARAMETERS* pTestParamters) {

   int gpuNode = pTestParamters->gpuNode;
   KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
        return;
    }

    HSAuint64 lastTestedSizeMB = 0;

    HsaMemFlags memFlags = {0};
    memFlags.ui32.HostAccess = 0;
    memFlags.ui32.NonPaged = 1;

    HSAuint64 vramSizeMB;
    vramSizeMB = pKFDMemoryTest->GetVramSize(gpuNode) >> 20;

    LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl;

    SearchLargestBuffer(gpuNode, memFlags, vramSizeMB, gpuNode,
                    &lastTestedSizeMB);

    LOG() << "The largest allocated VRAM buffer is " << std::dec
            << lastTestedSizeMB << "MB" << std::endl;

    /* Make sure 3/5 vram can be allocated.*/
    if (vramSizeMB <= 512)
        EXPECT_GE_GPU(lastTestedSizeMB * 5, vramSizeMB * 3, gpuNode);
    else
        EXPECT_GE_GPU(lastTestedSizeMB * 4, vramSizeMB * 3, gpuNode);

    if (lastTestedSizeMB * 16 < vramSizeMB * 15)
        WARN() << "The largest allocated VRAM buffer size is smaller than the expected "
            << vramSizeMB * 15 / 16 << "MB" << std::endl;
}

TEST_F(KFDMemoryTest, LargestVramBufferTest) {
     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	 TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(LargestVramBufferTest));

    TEST_END
}
/*
 * BigSysBufferStressTest allocates and maps 128M system buffers in a loop until it
 * fails, then unmaps and frees them afterwards. Meanwhile, a queue task is
 * performed on each buffer.
 */
static void BigSysBufferStressTest(KFDTEST_PARAMETERS* pTestParamters) {

   int gpuNode = pTestParamters->gpuNode;
   KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Running on APU fails and locks the system." << std::endl;
        return;
    }

    HSAuint64 AlternateVAGPU;
    HsaMemMapFlags mapFlags = {0};
    int ret;

    /* Repeatedly allocate and map big buffers in system memory until it fails,
     * then unmap and free them.
     */
#define ARRAY_ENTRIES 2048

    int i = 0, allocationCount = 0;
    unsigned int* pDb_array[ARRAY_ENTRIES];
    HSAuint64 block_size_mb = 128;
    HSAuint64 block_size = block_size_mb * 1024 * 1024;

    /* Test 4 times to see if there is any memory leak.*/
    for (int repeat = 1; repeat < 5; repeat++) {

        for (i = 0; i < ARRAY_ENTRIES; i++) {
            ret = hsaKmtAllocMemory(0 /* system */, block_size, pKFDMemoryTest->GetHsaMemFlags(),
                    reinterpret_cast<void**>(&pDb_array[i]));
            if (ret)
                break;

            ret = hsaKmtMapMemoryToGPUNodes(pDb_array[i], block_size,
                    &AlternateVAGPU, mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode));
            if (ret) {
                EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb_array[i], block_size), gpuNode);
                break;
            }
        }

        LOG() << "Allocated system buffers time " << std::dec << repeat << ": "
            << i << " * " << block_size_mb << "MB" << std::endl;

        if (allocationCount == 0)
            allocationCount = i;
        EXPECT_GE_GPU(i, allocationCount, gpuNode) << "There might be memory leak!" << std::endl;

        for (int j = 0; j < i; j++) {
            EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(pDb_array[j]), gpuNode);
            EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb_array[j], block_size), gpuNode);
        }
    }
}

TEST_F(KFDMemoryTest, BigSysBufferStressTest) {
     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	 TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(LargestVramBufferTest));

    TEST_END
}

#define VRAM_ALLOCATION_ALIGN (1 << 21)  //Align VRAM allocations to 2MB
static void MMBench(KFDTEST_PARAMETERS* pTestParamters) {

   int gpuNode = pTestParamters->gpuNode;
   KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    unsigned testIndex, sizeIndex, memType, nMemTypes;
    const char *memTypeStrings[2] = {"SysMem", "VRAM"};
    const struct {
        unsigned size;
        unsigned num;
    } bufParams[] = {
        /* Buffer sizes in x16 increments. Limit memory usage to about
         * 1GB. For small sizes we use 1000 buffers, which means we
         * conveniently measure microseconds and report nanoseconds.
         */
        {PAGE_SIZE      , 1000},  /*  4KB */
        {PAGE_SIZE <<  4, 1000},  /* 64KB */
        {PAGE_SIZE <<  9,  500},  /*  2MB */
        {PAGE_SIZE << 13,   32},  /* 32MB */
        {PAGE_SIZE << 18,    1},  /*  1GB */
    };
    const unsigned nSizes = sizeof(bufParams) / sizeof(bufParams[0]);
    const unsigned nTests = nSizes << 2;
#define TEST_BUFSIZE(index) (bufParams[(index) % nSizes].size)
#define TEST_NBUFS(index)  (bufParams[(index) % nSizes].num)
#define TEST_MEMTYPE(index) ((index / nSizes) & 0x1)
#define TEST_SDMA(index)    (((index / nSizes) >> 1) & 0x1)

    void *bufs[1000];
    HSAuint64 start, end;
    unsigned i;
    HSAKMT_STATUS ret;
    HsaMemFlags memFlags = {0};
    HsaMemMapFlags mapFlags = {0};
    HSAuint64 altVa;

    HSAuint64 vramSizeMB = pKFDMemoryTest->GetVramSize(gpuNode) >> 20;

    const std::vector<int> gpuNodes = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU();
    bool is_all_large_bar = true;

    for (unsigned i = 0; i < gpuNodes.size(); i++) {
        if (!pKFDMemoryTest->Get_NodeInfo()->IsGPUNodeLargeBar(gpuNodes.at(i))) {
                is_all_large_bar = false;
                break;
        }
    }

    LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl;

    if (vramSizeMB == 0)
        nMemTypes = 1;
    else
        nMemTypes = 2;

    /* Two SDMA queues to interleave user mode SDMA with memory
     * management on either SDMA engine. Make the queues long enough
     * to buffer at least nBufs x WriteData packets (7 dwords per
     * packet).
     */
    SDMAQueue sdmaQueue[2];
    ASSERT_SUCCESS_GPU(sdmaQueue[0].Create(gpuNode, PAGE_SIZE*8), gpuNode);
    ASSERT_SUCCESS_GPU(sdmaQueue[1].Create(gpuNode, PAGE_SIZE*8), gpuNode);
    HsaMemoryBuffer sdmaBuffer(PAGE_SIZE, 0); /* system memory */
#define INTERLEAVE_SDMA() do {                                          \
        if (interleaveSDMA) {                                           \
            sdmaQueue[0].PlaceAndSubmitPacket(                          \
                SDMAWriteDataPacket(sdmaQueue[0].GetFamilyId(), sdmaBuffer.As<HSAuint32 *>(),       \
                                    0x12345678));                       \
            sdmaQueue[1].PlaceAndSubmitPacket(                          \
                SDMAWriteDataPacket(sdmaQueue[1].GetFamilyId(), sdmaBuffer.As<HSAuint32 *>()+16,    \
                                    0x12345678));                       \
        }                                                               \
    } while (0)
#define IDLE_SDMA() do {                                                \
        if (interleaveSDMA) {                                           \
            sdmaQueue[0].Wait4PacketConsumption();                      \
            sdmaQueue[1].Wait4PacketConsumption();                      \
        }                                                               \
    } while (0)

    LOG() << "Test (avg. ns)\t    alloc   mapOne  umapOne   mapAll  umapAll     free" << std::endl;
    for (testIndex = 0; testIndex < nTests; testIndex++) {
        unsigned bufSize = TEST_BUFSIZE(testIndex);
        unsigned nBufs = TEST_NBUFS(testIndex);
        unsigned memType = TEST_MEMTYPE(testIndex);
        bool interleaveSDMA = TEST_SDMA(testIndex);
        unsigned bufLimit;
        HSAuint64 allocTime, map1Time, unmap1Time, mapAllTime, unmapAllTime, freeTime;
        HSAuint32 allocNode;

        /* Code snippet to allow CRIU checkpointing */
        if (testIndex == 3) {
            if (g_SleepTime > 0) {
                LOG() << "Pause for: " << g_SleepTime << " seconds" <<  std::endl;
                sleep(g_SleepTime);
            }
        }

        if ((testIndex % nSizes) == 0)
            LOG() << "--------------------------------------------------------------------------" << std::endl;

        if (memType >= nMemTypes)
            continue;  // skip unsupported mem types

        if (memType == 0) {
            allocNode = 0;
            memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
            memFlags.ui32.HostAccess = 1;
            memFlags.ui32.NonPaged = 0;
            memFlags.ui32.NoNUMABind = 1;
        } else {
            allocNode = gpuNode;
            memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
            memFlags.ui32.HostAccess = 0;
            memFlags.ui32.NonPaged = 1;

            /* Buffer sizes are 2MB aligned to match new allocation policy.
             * Upper limit of buffer number to fit 80% vram size. APUs w/
			 * smaller VRAM needs different criteria.
             */
            if (vramSizeMB <= 512)
                bufLimit = ((vramSizeMB << 20) * 6 / 10) / ALIGN_UP(bufSize, VRAM_ALLOCATION_ALIGN);
            else
                bufLimit = ((vramSizeMB << 20) * 8 / 10) / ALIGN_UP(bufSize, VRAM_ALLOCATION_ALIGN);

            if (bufLimit == 0)
                continue; // skip when bufSize > vram

            /* When vram is too small to fit all the buffers, fill 90% vram size*/
            nBufs = (nBufs < bufLimit) ? nBufs : bufLimit;
        }

        /* Allocation */
        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(allocNode, bufSize, memFlags,
                                             &bufs[i]), gpuNode);
            INTERLEAVE_SDMA();
        }
        allocTime = GetSystemTickCountInMicroSec() - start;
        IDLE_SDMA();

        /* Map to one GPU */
        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPUNodes(bufs[i], bufSize,
                                                     &altVa, mapFlags, 1,
                                                     (HSAuint32*)&gpuNode),  gpuNode);
            INTERLEAVE_SDMA();
        }
        map1Time = GetSystemTickCountInMicroSec() - start;
        IDLE_SDMA();

        /* Unmap from GPU */
        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(bufs[i]), gpuNode);
            INTERLEAVE_SDMA();
        }
        unmap1Time = GetSystemTickCountInMicroSec() - start;
        IDLE_SDMA();

        /* Map to all GPUs */
        if (is_all_large_bar) {
            start = GetSystemTickCountInMicroSec();
            for (i = 0; i < nBufs; i++) {
                ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(bufs[i], bufSize, &altVa), gpuNode);
                INTERLEAVE_SDMA();
            }
            mapAllTime = GetSystemTickCountInMicroSec() - start;
            IDLE_SDMA();

            /* Unmap from all GPUs */
            start = GetSystemTickCountInMicroSec();
            for (i = 0; i < nBufs; i++) {
                EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(bufs[i]));
                INTERLEAVE_SDMA();
            }
            unmapAllTime = GetSystemTickCountInMicroSec() - start;
            IDLE_SDMA();
        }

        /* Free */
        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(bufs[i], bufSize), gpuNode);
            INTERLEAVE_SDMA();
        }
        freeTime = GetSystemTickCountInMicroSec() - start;
        IDLE_SDMA();

        allocTime = allocTime * 1000 / nBufs;
        map1Time = map1Time * 1000 / nBufs;
        unmap1Time = unmap1Time * 1000 / nBufs;
        mapAllTime = mapAllTime * 1000 / nBufs;
        unmapAllTime = unmapAllTime * 1000 / nBufs;
        freeTime = freeTime * 1000 / nBufs;

        unsigned bufSizeLog;
        char bufSizeUnit;
        if (bufSize < (1 << 20)) {
            bufSizeLog = bufSize >> 10;
            bufSizeUnit = 'K';
        } else if (bufSize < (1 << 30)) {
            bufSizeLog = bufSize >> 20;
            bufSizeUnit = 'M';
        } else {
            bufSizeLog = bufSize >> 30;
            bufSizeUnit = 'G';
        }

        LOG() << std::dec << std::setiosflags(std::ios::right)
              << std::setw(3) << bufSizeLog << bufSizeUnit << "-"
              << memTypeStrings[memType] << "-"
              << (interleaveSDMA ? "SDMA\t" : "noSDMA\t")
              << std::setw(9) << allocTime
              << std::setw(9) << map1Time
              << std::setw(9) << unmap1Time
              << std::setw(9) << mapAllTime
              << std::setw(9) << unmapAllTime
              << std::setw(9) << freeTime << std::endl;

#define MMBENCH_KEY_PREFIX memTypeStrings[memType] << "-" \
                           << (interleaveSDMA ? "SDMA" : "noSDMA") << "-" \
                           << (bufSize >> 10) << "K-"
        RECORD(allocTime) << MMBENCH_KEY_PREFIX << "alloc";
        RECORD(map1Time) << MMBENCH_KEY_PREFIX << "mapOne";
        RECORD(unmap1Time) << MMBENCH_KEY_PREFIX << "unmapOne";
        RECORD(mapAllTime) << MMBENCH_KEY_PREFIX << "mapAll";
        RECORD(unmapAllTime) << MMBENCH_KEY_PREFIX << "unmapAll";
        RECORD(freeTime) << MMBENCH_KEY_PREFIX << "free";
    }
}

TEST_F(KFDMemoryTest, MMBench) {
     TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	 TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MMBench));

    TEST_END
}

static void QueryPointerInfo(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    unsigned int bufSize = PAGE_SIZE * 8;  // CZ and Tonga need 8 pages
    HsaPointerInfo ptrInfo;
    const std::vector<int> gpuNodes = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU();
    HSAuint64 nGPU = gpuNodes.size();  // number of gpu nodes

    /* GraphicHandle is tested at KFDGraphicsInterop.RegisterGraphicsHandle */

    /*** Memory allocated on CPU node ***/
    HsaMemoryBuffer hostBuffer(bufSize, 0/*node*/, false, false/*local*/);
    EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo(hostBuffer.As<void*>(), &ptrInfo), gpuNode);
    EXPECT_EQ_GPU(ptrInfo.Type, HSA_POINTER_ALLOCATED, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.Node, 0, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.MemFlags.Value, hostBuffer.Flags().Value, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.CPUAddress, hostBuffer.As<void*>(), gpuNode);
    EXPECT_EQ_GPU(ptrInfo.GPUAddress, (HSAuint64)hostBuffer.As<void*>(), gpuNode);
    EXPECT_EQ_GPU(ptrInfo.SizeInBytes, (HSAuint64)hostBuffer.Size(), gpuNode);
    EXPECT_EQ_GPU(ptrInfo.MemFlags.ui32.CoarseGrain, 0, gpuNode);
    if (hsakmt_is_dgpu()) {
        EXPECT_EQ_GPU((HSAuint64)ptrInfo.NMappedNodes, nGPU, gpuNode);
        // Check NMappedNodes again after unmapping the memory
        hsaKmtUnmapMemoryToGPU(hostBuffer.As<void*>());
        hsaKmtQueryPointerInfo(hostBuffer.As<void*>(), &ptrInfo);
    }
    EXPECT_EQ_GPU((HSAuint64)ptrInfo.NMappedNodes, 0, gpuNode);

    /* Skip testing local memory if the platform does not have it */
    if (pKFDMemoryTest->GetVramSize(gpuNode)) {
        HsaMemoryBuffer localBuffer(bufSize, gpuNode, false, true);
        EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo(localBuffer.As<void*>(), &ptrInfo), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.Type, HSA_POINTER_ALLOCATED, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.Node, gpuNode, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.MemFlags.Value, localBuffer.Flags().Value, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.CPUAddress, localBuffer.As<void*>(), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.GPUAddress, (HSAuint64)localBuffer.As<void*>(), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.SizeInBytes, (HSAuint64)localBuffer.Size(), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.MemFlags.ui32.CoarseGrain, 1, gpuNode);

        HSAuint32 *addr = localBuffer.As<HSAuint32 *>() + 4;
        EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo(reinterpret_cast<void *>(addr), &ptrInfo), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.GPUAddress, (HSAuint64)localBuffer.As<void*>(), gpuNode);
    }

    /** Registered memory: user pointer */
    static volatile HSAuint32 mem[4];  // 8 bytes for register only and
                                       // 8 bytes for register to nodes
    HsaMemoryBuffer hsaBuffer((void *)(&mem[0]), sizeof(HSAuint32)*2);
    /*
     * APU doesn't use userptr.
     * User pointers registered with SVM API, does not create vm_object_t.
     * Therefore, pointer info can not be queried.
     */
    if (hsakmt_is_dgpu() && mem != hsaBuffer.As<void*>()) {
        EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo((void *)(&mem[0]), &ptrInfo), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.Type, HSA_POINTER_REGISTERED_USER, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.CPUAddress, &mem[0], gpuNode);
        EXPECT_EQ_GPU(ptrInfo.GPUAddress, (HSAuint64)hsaBuffer.As<void*>(), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.SizeInBytes, sizeof(HSAuint32)*2, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.NRegisteredNodes, 0, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.NMappedNodes, nGPU, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.MemFlags.ui32.CoarseGrain, 1, gpuNode);
        // Register to nodes
        HSAuint32 nodes[nGPU];
        for (unsigned int i = 0; i < nGPU; i++)
            nodes[i] = gpuNodes.at(i);
        EXPECT_SUCCESS_GPU(hsaKmtRegisterMemoryToNodes((void *)(&mem[2]),
                                sizeof(HSAuint32)*2, nGPU, nodes), gpuNode);
        EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo((void *)(&mem[2]), &ptrInfo), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.NRegisteredNodes, nGPU, gpuNode);
        EXPECT_SUCCESS_GPU(hsaKmtDeregisterMemory((void *)(&mem[2])), gpuNode);
    }

    /* Not a starting address, but an address inside the memory range
     * should also get the memory information
     */
    HSAuint32 *address = hostBuffer.As<HSAuint32 *>() + 1;
    EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo(reinterpret_cast<void *>(address), &ptrInfo), gpuNode);
    EXPECT_EQ_GPU(ptrInfo.Type, HSA_POINTER_ALLOCATED, gpuNode);
    EXPECT_EQ_GPU(ptrInfo.CPUAddress, hostBuffer.As<void*>(), gpuNode);
    if (hsakmt_is_dgpu() && &mem[1] != hsaBuffer.As<HSAuint32 *>() + 1) {
        EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo((void *)(&mem[1]), &ptrInfo), gpuNode);
        EXPECT_EQ_GPU(ptrInfo.Type, HSA_POINTER_REGISTERED_USER, gpuNode);
        EXPECT_EQ_GPU(ptrInfo.CPUAddress, &mem[0], gpuNode);
    }

    /*** Set user data ***/
    char userData[16] = "This is a test.";
    EXPECT_SUCCESS_GPU(hsaKmtSetMemoryUserData(hostBuffer.As<HSAuint32 *>(), reinterpret_cast<void *>(userData)), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtQueryPointerInfo(hostBuffer.As<void*>(), &ptrInfo), gpuNode);
    EXPECT_EQ_GPU(ptrInfo.UserData, (void *)userData, gpuNode);
}

TEST_F(KFDMemoryTest, QueryPointerInfo) {

	 TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(QueryPointerInfo));

    TEST_END
}

/* Linux OS-specific test for a debugger accessing HSA memory in a
 * debugged process.
 *
 * Allocates a system memory and a visible local memory buffer (if
 * possible). Forks a child process that PTRACE_ATTACHes to the parent
 * to access its memory like a debugger would. Child copies data in
 * the parent process using PTRACE_PEEKDATA and PTRACE_POKEDATA. After
 * the child terminates, the parent checks that the copy was
 * successful.
 */
static void PtraceAccess(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    HsaMemFlags memFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    memFlags.ui32.HostAccess = 1;

    void *mem[2];
    unsigned i;

    /* Offset in the VRAM buffer to test crossing non-contiguous
     * buffer boundaries. The second access starting from offset
     * sizeof(HSAint64)+1 will cross a node boundary in a single access,
     * for node sizes of 4MB or smaller.
     */
    const HSAuint64 VRAM_OFFSET = (4 << 20) - 2 * sizeof(HSAint64);

    // Alloc system memory from node 0 and initialize it
    memFlags.ui32.NonPaged = 0;
    memFlags.ui32.NoNUMABind = 1;
    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(0, PAGE_SIZE*2, memFlags, &mem[0]), gpuNode);
    for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
        (reinterpret_cast<HSAuint8 *>(mem[0]))[i] = i;            // source
        (reinterpret_cast<HSAuint8 *>(mem[0]))[PAGE_SIZE+i] = 0;  // destination
    }

    // Try to alloc local memory from GPU node
    memFlags.ui32.NonPaged = 1;
    if (pKFDMemoryTest->Get_NodeInfo()->IsGPUNodeLargeBar(gpuNode)) {
        EXPECT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode, PAGE_SIZE*2 + (4 << 20),
                                            memFlags, &mem[1]), gpuNode);
        mem[1] = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem[1]) + VRAM_OFFSET);
        for (i = 0; i < 4*sizeof(HSAint64) + 4; i++) {
            (reinterpret_cast<HSAuint8 *>(mem[1]))[i] = i;
            (reinterpret_cast<HSAuint8 *>(mem[1]))[PAGE_SIZE+i] = 0;
        }
    } else {
        LOG() << "Not testing local memory, it's invisible" << std::endl;
        mem[1] = NULL;
    }

    /* Allow any process to trace this one. If kernel is built without
     * Yama, this is not needed, and this call will fail.
     */
#ifdef PR_SET_PTRACER
    prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
#endif

    pthread_mutex_lock(&ptrace_mtx);

    // Find current pid so the child can trace it
    pid_t tracePid = getpid();

    // Fork the child
    pid_t childPid = fork();
    ASSERT_GE_GPU(childPid, 0, gpuNode);
    if (childPid == 0) {
        int traceStatus;
        int err = 0, r;

        /* Child process: we catch any exceptions to make sure we detach
         * from the traced process, because terminating without detaching
         * leaves the traced process stopped.
         */
        r = ptrace(PTRACE_ATTACH, tracePid, NULL, NULL);
        if (r) {
            WARN() << "PTRACE_ATTACH failed: " << r << std::endl;
            exit(1);
        }
        try {
            do {
                waitpid(tracePid, &traceStatus, 0);
            } while (!WIFSTOPPED(traceStatus));

            for (i = 0; i < 4; i++) {
                // Test 4 different (mis-)alignments, leaving 1-byte gaps between longs
                HSAuint8 *addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[0]) + i) + i;
                errno = 0;
                long data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
                EXPECT_EQ_GPU(0, errno, gpuNode);
                EXPECT_EQ_GPU(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE,
                                    reinterpret_cast<void *>(data)), gpuNode);

                if (mem[1] == NULL)
                    continue;

                addr = reinterpret_cast<HSAuint8 *>(reinterpret_cast<long *>(mem[1]) + i) + i;
                errno = 0;
                data = ptrace(PTRACE_PEEKDATA, tracePid, addr, NULL);
                EXPECT_EQ_GPU(0, errno, gpuNode);
                EXPECT_EQ_GPU(0, ptrace(PTRACE_POKEDATA, tracePid, addr + PAGE_SIZE,
                                reinterpret_cast<void *>(data)), gpuNode);
            }
        } catch (...) {
            err = 1;
        }
        r = ptrace(PTRACE_DETACH, tracePid, NULL, NULL);
        if (r) {
            WARN() << "PTRACE_DETACH failed: " << r << std::endl;
            exit(1);
        }
        exit(err);
    } else {
        int childStatus;

        // Parent process, just wait for the child to finish
        EXPECT_EQ_GPU(childPid, waitpid(childPid, &childStatus, 0), gpuNode);
        EXPECT_NE_GPU(0, WIFEXITED(childStatus), gpuNode);
        EXPECT_EQ_GPU(0, WEXITSTATUS(childStatus), gpuNode);
    }

    pthread_mutex_unlock(&ptrace_mtx);

    // Clear gaps in the source that should not have been copied
    (reinterpret_cast<uint8_t*>(mem[0]))[  sizeof(long)    ] = 0;
    (reinterpret_cast<uint8_t*>(mem[0]))[2*sizeof(long) + 1] = 0;
    (reinterpret_cast<uint8_t*>(mem[0]))[3*sizeof(long) + 2] = 0;
    (reinterpret_cast<uint8_t*>(mem[0]))[4*sizeof(long) + 3] = 0;
    // Check results
    EXPECT_EQ_GPU(0, memcmp(mem[0], reinterpret_cast<HSAuint8 *>(mem[0]) + PAGE_SIZE,
                        sizeof(long)*4 + 4), gpuNode);
    // Free memory
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(mem[0], PAGE_SIZE*2), gpuNode);

    if (mem[1]) {
        (reinterpret_cast<uint8_t*>(mem[1]))[  sizeof(HSAint64)    ] = 0;
        (reinterpret_cast<uint8_t*>(mem[1]))[2*sizeof(HSAint64) + 1] = 0;
        (reinterpret_cast<uint8_t*>(mem[1]))[3*sizeof(HSAint64) + 2] = 0;
        (reinterpret_cast<uint8_t*>(mem[1]))[4*sizeof(HSAint64) + 3] = 0;
        EXPECT_EQ_GPU(0, memcmp(mem[1], reinterpret_cast<HSAuint8 *>(mem[1]) + PAGE_SIZE,
                            sizeof(HSAint64)*4 + 4), gpuNode);
        mem[1] = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem[1]) - VRAM_OFFSET);
        EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(mem[1], PAGE_SIZE*2), gpuNode);
    }
}

TEST_F(KFDMemoryTest, PtraceAccess) {
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(PtraceAccess));

    TEST_END
}

static void PtraceAccessInvisibleVram(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

	Assembler* m_pAsm;
	m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
	ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    char *hsaDebug = getenv("HSA_DEBUG");

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: There is no VRAM on APU." << std::endl;
        return;
    }

    if (!hsaDebug || !strcmp(hsaDebug, "0")) {
        LOG() << "Skipping test: HSA_DEBUG environment variable not set." << std::endl;
        return;
    }

    HsaMemMapFlags mapFlags = {0};
    HsaMemFlags memFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    /* Allocate host not accessible vram */
    memFlags.ui32.HostAccess = 0;
    memFlags.ui32.NonPaged = 1;

    void *mem, *mem0, *mem1;
    unsigned size = PAGE_SIZE*2 + (4 << 20);
    HSAuint64 data[2] = {0xdeadbeefdeadbeef, 0xcafebabecafebabe};
    unsigned int data0[2] = {0xdeadbeef, 0xdeadbeef};
    unsigned int data1[2] = {0xcafebabe, 0xcafebabe};

    const HSAuint64 VRAM_OFFSET = (4 << 20) - sizeof(HSAuint64);

    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode, size, memFlags, &mem), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPUNodes(mem, size, NULL,
                                mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode)), gpuNode);
    /* Set the word before 4M boundary to 0xdeadbeefdeadbeef
     * and the word after 4M boundary to 0xcafebabecafebabe
     */
    mem0 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET);
    mem1 = reinterpret_cast<void *>(reinterpret_cast<HSAuint8 *>(mem) + VRAM_OFFSET + sizeof(HSAuint64));
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem0,
                                                  data0[0], data0[1]));
    queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int *)mem1,
                                                  data1[0], data1[1]));
    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, 0, 0));
    queue.Wait4PacketConsumption();

    /* Allow any process to trace this one. If kernel is built without
     * Yama, this is not needed, and this call will fail.
     */
#ifdef PR_SET_PTRACER
    prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
#endif

    pthread_mutex_lock(&ptrace_mtx);

    // Find out my pid so the child can trace it
    pid_t tracePid = getpid();

    // Fork the child
    pid_t childPid = fork();
    ASSERT_GE_GPU(childPid, 0, gpuNode);
    if (childPid == 0) {
        int traceStatus;
        int err = 0, r;

        /* Child process: we catch any exceptions to make sure we detach
         * from the traced process, because terminating without detaching
         * leaves the traced process stopped.
         */
        r = ptrace(PTRACE_ATTACH, tracePid, NULL, NULL);
        if (r) {
            WARN() << "PTRACE_ATTACH failed: " << r << std::endl;
            exit(1);
        }
        try {
            do {
                waitpid(tracePid, &traceStatus, 0);
            } while (!WIFSTOPPED(traceStatus));

            /* Peek the memory */
            errno = 0;
            HSAint64 data0 = ptrace(PTRACE_PEEKDATA, tracePid, mem0, NULL);
            EXPECT_EQ_GPU(0, errno, gpuNode);
            EXPECT_EQ_GPU(data[0], data0, gpuNode);
            HSAint64 data1 = ptrace(PTRACE_PEEKDATA, tracePid, mem1, NULL);
            EXPECT_EQ_GPU(0, errno, gpuNode);
            EXPECT_EQ_GPU(data[1], data1, gpuNode);

            /* Swap mem0 and mem1 by poking */
            EXPECT_EQ_GPU(0, ptrace(PTRACE_POKEDATA, tracePid, mem0, reinterpret_cast<void *>(data[1])), gpuNode);
            EXPECT_EQ_GPU(0, errno, gpuNode);
            EXPECT_EQ_GPU(0, ptrace(PTRACE_POKEDATA, tracePid, mem1, reinterpret_cast<void *>(data[0])), gpuNode);
            EXPECT_EQ_GPU(0, errno, gpuNode);
        } catch (...) {
            err = 1;
        }
        r = ptrace(PTRACE_DETACH, tracePid, NULL, NULL);
        if (r) {
            WARN() << "PTRACE_DETACH failed: " << r << std::endl;
            exit(1);
        }
        exit(err);
    } else {
        int childStatus;

        // Parent process, just wait for the child to finish
        EXPECT_EQ_GPU(childPid, waitpid(childPid, &childStatus, 0), gpuNode);
        EXPECT_NE_GPU(0, WIFEXITED(childStatus), gpuNode);
        EXPECT_EQ_GPU(0, WEXITSTATUS(childStatus), gpuNode);
    }

    pthread_mutex_unlock(&ptrace_mtx);

    /* Use shader to read back data to check poke results */
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    // dstBuffer is cpu accessible gtt memory
    HsaMemoryBuffer dstBuffer(PAGE_SIZE, gpuNode);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(ScratchCopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(mem0, dstBuffer.As<void*>());
    dispatch0.Submit(queue);
    dispatch0.Sync();
    EXPECT_EQ_GPU(data1[0], dstBuffer.As<unsigned int*>()[0], gpuNode);

    Dispatch dispatch1(isaBuffer);
    dispatch1.SetArgs(mem1, dstBuffer.As<int*>());
    dispatch1.Submit(queue);
    dispatch1.Sync();
    WaitOnValue(dstBuffer.As<uint32_t *>(), data0[0]);
    EXPECT_EQ_GPU(data0[0], dstBuffer.As<unsigned int*>()[0], gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(mem), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(mem, size), gpuNode);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(PtraceAccessInvisibleVram));

    TEST_END
}

volatile int IntrSignalReceviced;

void CatchSignal(int IntrSignal) {
    IntrSignalReceviced = IntrSignal;
}

static void SignalHandling(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Test not supported on APU." << std::endl;
        return;
    }

    unsigned int *nullPtr = NULL;
    unsigned int* pDb = NULL;
    struct sigaction sa;
    SDMAQueue queue;
    HSAuint64 size, sysMemSize;

    sa.sa_handler = CatchSignal;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = 0;
    pid_t ParentPid = getpid();
    EXPECT_EQ(0, sigaction(SIGUSR1, &sa, NULL)) << "An error occurred while setting a signal handler";

    sysMemSize = pKFDMemoryTest->GetSysMemSize();

    /* System (kernel) memory are limited to 3/8th System RAM
     * Try to allocate 1/4th System RAM
     */
    size = (sysMemSize >> 2) & ~(HSAuint64)(PAGE_SIZE - 1);

    /* We don't need a too large buffer for this test. If it is too large,
     * on some platform, the upcoming hsaKmtAllocMemory() might fail. In
     * order to avoid this flaky behavior, limit the size to 3G.
     */
    size = size > (3ULL << 30) ? (3ULL << 30) : size;

    pKFDMemoryTest->GetHsaMemFlags().ui32.NoNUMABind = 1;
    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(0 /* system */, size, pKFDMemoryTest->GetHsaMemFlags(), reinterpret_cast<void**>(&pDb)), gpuNode);
    // Verify that pDb is not null before it's being used
    EXPECT_NE_GPU(nullPtr, pDb, gpuNode) << "hsaKmtAllocMemory returned a null pointer";

    pid_t childPid = fork();
    ASSERT_GE_GPU(childPid, 0, gpuNode);
    if (childPid == 0) {
        EXPECT_EQ_GPU(0, kill(ParentPid, SIGUSR1), gpuNode);
        exit(0);
    } else {
        LOG() << "Start Memory Mapping..." << std::endl;
        ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(pDb, size, NULL), gpuNode);
        LOG() << "Mapping finished" << std::endl;
        int childStatus;
        pid_t pid;

        // Parent process, just wait for the child to finish
        do {
            pid = waitpid(childPid, &childStatus, 0);
            if (IntrSignalReceviced) {
                LOG() << "Interrupt Signal " << std::dec << IntrSignalReceviced
                    << " Received" << std::endl;
                IntrSignalReceviced = 0;
            }
        } while(pid == -1 && errno == EINTR);
        EXPECT_EQ_GPU(childPid, pid, gpuNode);
        EXPECT_NE_GPU(0, WIFEXITED(childStatus), gpuNode);
        EXPECT_EQ_GPU(0, WEXITSTATUS(childStatus), gpuNode);
    }

    pDb[0] = 0x02020202;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), pDb, 0x01010101) );
    queue.Wait4PacketConsumption();
    EXPECT_TRUE_GPU(WaitOnValue(pDb, 0x01010101), gpuNode);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(pDb), gpuNode);
    // Release the buffers
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb, size), gpuNode);
}

TEST_F(KFDMemoryTest, SignalHandling) {
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SignalHandling));

    TEST_END
}

static void CheckZeroInitializationSysMem(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    int ret;

    int gpuNum = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU().size();

	/* if no gpu node */
	if (gpuNum <= 0)
		return;

    HSAuint64 sysMemSizeMB = pKFDMemoryTest->GetSysMemSize() >> 20;

    /* Testing system memory */
    HSAuint64 * pDb = NULL;

    HSAuint64 sysBufSizeMB = sysMemSizeMB >> 2;
    HSAuint64 sysBufSize = sysBufSizeMB * 1024 * 1024;

	/* use divided sys ram to test on each gpu to avoid sys ram OOM */
	HSAuint64 sysBufSizePerGPU = sysBufSize/gpuNum;

    int count = 5;

    LOG() << "Using " << std::dec << sysBufSizeMB
            << "MB system buffer to test " << std::dec << count
            << " times" << std::endl;

    unsigned int offset = 257;  // a constant offset, should be smaller than 512.
    unsigned int size = sysBufSizePerGPU / sizeof(*pDb);

    pKFDMemoryTest->GetHsaMemFlags().ui32.NoNUMABind = 1;

    while (count--) {
        ret = hsaKmtAllocMemory(0 /* system */, sysBufSizePerGPU, pKFDMemoryTest->GetHsaMemFlags(),
                                reinterpret_cast<void**>(&pDb));
        if (ret) {
            LOG() << "Failed to allocate system buffer of" << std::dec << sysBufSizeMB
                    << "MB" << std::endl;
            return;
        }

        /* Check the first 64 bits */
        EXPECT_EQ_GPU(0, pDb[0], gpuNode);
        pDb[0] = 1;

        for (HSAuint64 i = offset; i < size;) {
            EXPECT_EQ_GPU(0, pDb[i], gpuNode);
            pDb[i] = i + 1;  // set it to non zero

            i += 4096 / sizeof(*pDb);
        }

        /* check the last 64 bit */
        EXPECT_EQ_GPU(0, pDb[size-1], gpuNode);
        pDb[size-1] = size;

        EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(pDb, sysBufSizePerGPU), gpuNode);
    }
}

TEST_F(KFDMemoryTest, CheckZeroInitializationSysMem) {
	TEST_START(TESTPROFILE_RUNALL);
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);

    ASSERT_SUCCESS(KFDTest_Launch(CheckZeroInitializationSysMem));

    TEST_END
}

static inline void access(volatile void *sd, int size, int rw) {
    /* Most likely sitting in cache*/
    static struct DUMMY {
        char dummy[1024];
    } dummy;

    while ((size -= sizeof(dummy)) >= 0) {
        if (rw == 0)
            dummy = *(struct DUMMY *)((char*)sd + size);
        else
            *(struct DUMMY *)((char*)sd + size) = dummy;
    }
}

/*
 * On large-bar system, test the visible vram access speed.
 * KFD is not allowed to alloc visible vram on non-largebar system.
 */
static void MMBandWidth(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    unsigned nBufs = 1000; /* measure us, report ns */
    unsigned testIndex, sizeIndex, memType;
    const unsigned nMemTypes = 2;
    const char *memTypeStrings[nMemTypes] = {"SysMem", "VRAM"};
    const unsigned nSizes = 4;
    const unsigned bufSizes[nSizes] = {PAGE_SIZE, PAGE_SIZE*4, PAGE_SIZE*16, PAGE_SIZE*64};
    const unsigned nTests = nSizes * nMemTypes;
    const unsigned tmpBufferSize = PAGE_SIZE*64;
#define _TEST_BUFSIZE(index) (bufSizes[index % nSizes])
#define _TEST_MEMTYPE(index) ((index / nSizes) % nMemTypes)

    void *bufs[nBufs];
    HSAuint64 start;
    unsigned i;
    HSAKMT_STATUS ret;
    HsaMemFlags memFlags = {0};
    HsaMemMapFlags mapFlags = {0};

    HSAuint64 vramSizeMB = pKFDMemoryTest->GetVramSize(gpuNode) >> 20;

    LOG() << "Found VRAM of " << std::dec << vramSizeMB << "MB." << std::endl;

    if (!pKFDMemoryTest->Get_NodeInfo()->IsGPUNodeLargeBar(gpuNode) || !vramSizeMB) {
        LOG() << "Skipping test: Test requires a large bar GPU." << std::endl;
        return;
    }

    void *tmp = mmap(0,
            tmpBufferSize,
            PROT_READ | PROT_WRITE,
            MAP_ANONYMOUS | MAP_PRIVATE,
            -1,
            0);
    EXPECT_NE_GPU(tmp, MAP_FAILED, gpuNode);
    memset(tmp, 0, tmpBufferSize);

    LOG() << "Test (avg. ns)\t  memcpyRTime memcpyWTime accessRTime accessWTime" << std::endl;
    for (testIndex = 0; testIndex < nTests; testIndex++) {
        unsigned bufSize = _TEST_BUFSIZE(testIndex);
        unsigned memType = _TEST_MEMTYPE(testIndex);
        HSAuint64 mcpRTime, mcpWTime, accessRTime, accessWTime;
        HSAuint32 allocNode;
        unsigned bufLimit;

        if ((testIndex & (nSizes-1)) == 0)
            LOG() << "----------------------------------------------------------------------" << std::endl;

        if (memType == 0) {
            allocNode = 0;
            memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
            memFlags.ui32.HostAccess = 1;
            memFlags.ui32.NonPaged = 0;
            memFlags.ui32.NoNUMABind = 1;
        } else {
            /* Alloc visible vram*/
            allocNode = gpuNode;
            memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
            memFlags.ui32.HostAccess = 1;
            memFlags.ui32.NonPaged = 1;

	    /* Buffer sizes are 2MB aligned to match new allocation policy.
	     * Upper limit of buffer number to fit 80% vram size.
	     */
            bufLimit = ((vramSizeMB << 20) * 8 / 10) / ALIGN_UP(bufSize, VRAM_ALLOCATION_ALIGN);
            if (bufLimit == 0)
                continue; // skip when bufSize > vram

            /* When vram is too small to fit all the buffers, fill 80% vram size*/
            nBufs = std::min(nBufs , bufLimit);
        }

        for (i = 0; i < nBufs; i++)
            ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(allocNode, bufSize, memFlags,
                        &bufs[i]), gpuNode);

        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            memcpy(bufs[i], tmp, bufSize);
        }
        mcpWTime = GetSystemTickCountInMicroSec() - start;

        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            access(bufs[i], bufSize, 1);
        }
        accessWTime = GetSystemTickCountInMicroSec() - start;

        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            memcpy(tmp, bufs[i], bufSize);
        }
        mcpRTime = GetSystemTickCountInMicroSec() - start;

        start = GetSystemTickCountInMicroSec();
        for (i = 0; i < nBufs; i++) {
            access(bufs[i], bufSize, 0);
        }
        accessRTime = GetSystemTickCountInMicroSec() - start;

        for (i = 0; i < nBufs; i++)
            EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(bufs[i], bufSize), gpuNode);

        LOG() << std::dec
            << std::right << std::setw(3) << (bufSize >> 10) << "K-"
            << std::left << std::setw(14) << memTypeStrings[memType]
            << std::right
            << std::setw(12) << mcpRTime
            << std::setw(12) << mcpWTime
            << std::setw(12) << accessRTime
            << std::setw(12) << accessWTime
            << std::endl;

#define MMBANDWIDTH_KEY_PREFIX memTypeStrings[memType] << "-" \
                               << (bufSize >> 10) << "K" << "-"
        RECORD(mcpRTime) << MMBANDWIDTH_KEY_PREFIX << "mcpRTime";
        RECORD(mcpWTime) << MMBANDWIDTH_KEY_PREFIX << "mcpWTime";
        RECORD(accessRTime) << MMBANDWIDTH_KEY_PREFIX << "accessRTime";
        RECORD(accessWTime) << MMBANDWIDTH_KEY_PREFIX << "accessWTime";

        // skip slow tests
        if (mcpRTime + mcpWTime + accessRTime + accessWTime > 5000000)
            break;
    }

    munmap(tmp, tmpBufferSize);
}

TEST_F(KFDMemoryTest, MMBandWidth) {
	TEST_START(TESTPROFILE_RUNALL);
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);

    ASSERT_SUCCESS(KFDTest_Launch(MMBandWidth));

    TEST_END
}

/* For the purpose of testing HDP flush from CPU.
 * Use CPU to write to coherent vram and check
 * from shader.
 * Asic before gfx9 doesn't support user space
 * HDP flush so only run on vega10 and after.
 * This should only run on large bar system.
 */
static void HostHdpFlush(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

	Assembler* m_pAsm;
	m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
	ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

	HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    HsaMemFlags memoryFlags = pKFDMemoryTest->GetHsaMemFlags();
    /* buffer[0]: signal; buffer[1]: Input to shader; buffer[2]: Output to
     * shader
     */
    unsigned int *buffer = NULL;
    const HsaNodeProperties *pNodeProperties = pKFDMemoryTest->Get_NodeInfo()->GetNodeProperties(gpuNode);
    HSAuint32 *mmioBase = NULL;
    unsigned int *nullPtr = NULL;

    if (!pNodeProperties) {
        LOG() << "Failed to get gpu node properties." << std::endl;
        return;
    }

    if (m_FamilyId < FAMILY_AI) {
        LOG() << "Skipping test: Test requires gfx9 and later asics." << std::endl;
        return;
    }
    HSAuint64 vramSizeMB = pKFDMemoryTest->GetVramSize(gpuNode) >> 20;

    if (!pKFDMemoryTest->Get_NodeInfo()->IsGPUNodeLargeBar(gpuNode) || !vramSizeMB) {
        LOG() << "Skipping test: Test requires a large bar GPU." << std::endl;
        return;
    }

    HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
    EXPECT_SUCCESS_GPU(hsaKmtGetNodeMemoryProperties(gpuNode, pNodeProperties->NumMemoryBanks,
                   memoryProperties), gpuNode);
    for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
        if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_MMIO_REMAP) {
            mmioBase = (unsigned int *)memoryProperties[bank].VirtualBaseAddress;
            break;
        }
    }

    if (mmioBase == nullPtr) {
            LOG() << "Skipping test: bsecause mmioBase is nullPtr, the mmio remap feature is not supported." << std::endl;
            return;
    }

    memoryFlags.ui32.NonPaged = 1;
    memoryFlags.ui32.CoarseGrain = 0;
    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode, PAGE_SIZE, memoryFlags,
                   reinterpret_cast<void**>(&buffer)), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(buffer, PAGE_SIZE, NULL), gpuNode);

    /* Signal is dead from the beginning*/
    buffer[0] = 0xdead;
    buffer[1] = 0xfeeb;
    buffer[2] = 0xfeed;
    /* Submit a shader to poll the signal*/
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyOnSignalIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(buffer, NULL);
    dispatch0.Submit(queue);

    buffer[1] = 0xbeef;
    /* Flush HDP */
    mmioBase[KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL/4] = 0x1;
    buffer[0] = 0xcafe;

    /* Check test result*/
    dispatch0.Sync();
    mmioBase[KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL/4] = 0x1;
    EXPECT_EQ_GPU(0xbeef, buffer[2], gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
    delete [] memoryProperties;
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(buffer), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(buffer, PAGE_SIZE), gpuNode);
}

TEST_F(KFDMemoryTest, HostHdpFlush) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(HostHdpFlush));

    TEST_END
}

/* Test HDP flush from device.
 * Use shader on device 1 to write vram of device 0
 * and flush HDP of device 0. Read vram from device 0
 * and write back to vram to check the result from CPU.
 * Asic before gfx9 doesn't support device HDP flush
 * so only run on vega10 and after.
 * This should only run on system with at least one
 * large bar node (which is used as device 0).
 */
static void DeviceHdpFlush(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

	Assembler* m_pAsm;
	m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
	ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

	HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);
    HsaMemFlags memoryFlags = pKFDMemoryTest->GetHsaMemFlags();
    /* buffer is physically on device 0.
     * buffer[0]: Use as signaling b/t devices;
     * buffer[1]: Device 1 write to buffer[1] and device 0 read it
     * buffer[2]: Device 0 copy buffer[1] to buffer[2] for CPU to check
     */
    unsigned int *buffer = NULL;
    const HsaNodeProperties *pNodeProperties;
    HSAuint32 *mmioBase = NULL;
    unsigned int *nullPtr = NULL;
    std::vector<int> nodes;
    int numPeers;

    const std::vector<int> gpuNodes = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU();
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: At least two GPUs are required." << std::endl;
        return;
    }

     /* Users can use "--node=gpu1 --dst_node=gpu2" to specify devices */
    if (g_TestDstNodeId != -1 && g_TestNodeId != -1) {
        nodes.push_back(g_TestNodeId);
        nodes.push_back(g_TestDstNodeId);

        if (!pKFDMemoryTest->Get_NodeInfo()->IsPeerAccessibleByNode(g_TestDstNodeId, g_TestNodeId)) {
            LOG() << "Skipping test: first GPU specified is not peer-accessible." << std::endl;
            return;
        }

        if (nodes[0] == nodes[1]) {
            LOG() << "Skipping test: Different GPUs must be specified (2 GPUs required)." << std::endl;
            return;
        }
    } else {
        pKFDMemoryTest->Get_NodeInfo()->FindAccessiblePeers(&nodes, gpuNode);
        if (nodes.size() < 2) {
            LOG() << "Skipping test: Test requires at least one large bar GPU." << std::endl;
            LOG() << "               or two GPUs are XGMI connected." << std::endl;
            return;
        }
    }

    const HsaNodeProperties *pNodePropertiesDev1 = NULL;
    unsigned int m_FamilyIdDev1 = 0;

    pNodeProperties = pKFDMemoryTest->Get_NodeInfo()->GetNodeProperties(nodes[0]);
    pNodePropertiesDev1 = pKFDMemoryTest->Get_NodeInfo()->GetNodeProperties(nodes[1]);
    if (!pNodeProperties || !pNodePropertiesDev1) {
        LOG() << "Failed to get gpu node properties." << std::endl;
        return;
    }

    m_FamilyIdDev1 = FamilyIdFromNode(pNodePropertiesDev1);

    if (m_FamilyId < FAMILY_AI || m_FamilyIdDev1 < FAMILY_AI) {
        LOG() << "Skipping test: Test requires gfx9 and later asics." << std::endl;
        return;
    }

    if (pKFDMemoryTest->Get_NodeInfo()->IsNodeXGMItoCPU(nodes[0])) {
        LOG() << "Skipping test: PCIe link to CPU is required." << std::endl;
        return;
    }

    if (!pKFDMemoryTest->Get_NodeInfo()->IsGPUNodeLargeBar(nodes[0])) {
        LOG() << "Skipping test: Test requires device 0 large bar GPU." << std::endl;
        return;
    }

    HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
    EXPECT_SUCCESS_GPU(hsaKmtGetNodeMemoryProperties(nodes[0], pNodeProperties->NumMemoryBanks,
                   memoryProperties), gpuNode);
    for (unsigned int bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++) {
        if (memoryProperties[bank].HeapType == HSA_HEAPTYPE_MMIO_REMAP) {
            mmioBase = (unsigned int *)memoryProperties[bank].VirtualBaseAddress;
            break;
        }
    }

    if (mmioBase == nullPtr) {
            LOG() << "Skipping test: bsecause mmioBase is nullPtr, the mmio remap feature is not supported." << std::endl;
            return;
    }

    memoryFlags.ui32.NonPaged = 1;
    memoryFlags.ui32.CoarseGrain = 0;
    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(nodes[0], PAGE_SIZE, memoryFlags,
                   reinterpret_cast<void**>(&buffer)), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(buffer, PAGE_SIZE, NULL), gpuNode);

    /* Signal is dead from the beginning*/
    buffer[0] = 0xdead;
    buffer[1] = 0xfeeb;
    buffer[2] = 0xfeeb;
    /* Submit shaders*/
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(nodes[0]), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, nodes[0], true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyOnSignalIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(buffer, NULL);
    dispatch.Submit(queue);

    PM4Queue queue0;
    ASSERT_SUCCESS_GPU(queue0.Create(nodes[1]), gpuNode);
    HsaMemoryBuffer isaBuffer0(PAGE_SIZE, nodes[1], true/*zero*/, false/*local*/, true/*exec*/);

    /* Temporarily set target ASIC for Dev1 */
    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(WriteAndSignalIsa, isaBuffer0.As<char*>(),
                        PAGE_SIZE, GetGfxVersion(pNodePropertiesDev1)), gpuNode);

    Dispatch dispatch0(isaBuffer0);
    dispatch0.SetArgs(buffer, mmioBase);
    dispatch0.Submit(queue0);

    /* Check test result*/
    dispatch0.Sync();
    dispatch.Sync();
    EXPECT_EQ(0xbeef, buffer[2]);

    // Clean up
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(queue0.Destroy(), gpuNode);
    delete [] memoryProperties;
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(buffer), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(buffer, PAGE_SIZE), gpuNode);
}

TEST_F(KFDMemoryTest, DeviceHdpFlush) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(DeviceHdpFlush));

    TEST_END
}

/* Test should only run on Arcturus series which has the new RW mtype
 * Map a local VRAM with RW mtype (coarse grain for upper layer),
 * read it locally to cache it and write with local SDMA, remote devices(
 * CPU or Remote GPU shader connected with PCIe or XGMI),
 * then read again. The second read should get back what SDMA wrote,
 * since the cache should be invalidated on write and second read
 * should go to physical VRAM instead of cache.
 */
static void CacheInvalidateOnSdmaWrite(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
    volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();
    const int dwLocation = 100;

    if (m_FamilyId != FAMILY_AR) {
        LOG() << "Skipping test: Test requires arcturus series asics." << std::endl;
        return;
    }

    HsaMemoryBuffer buffer(PAGE_SIZE, gpuNode, false/*zero*/, true/*local*/, false/*exec*/);
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);
    buffer.Fill(0, sdmaQueue, 0, PAGE_SIZE);
    sdmaQueue.PlacePacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(), buffer.As<int*>(), 0x5678));

    /* Read buffer from shader to fill cache */
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(buffer.As<int*>(), buffer.As<int*>()+dwLocation);
    dispatch.Submit(queue);

    /* Delay 100ms to make sure shader executed*/
    Delay(100);

    /* SDMA writes to buffer. Shader should get what sdma writes and quits*/
    sdmaQueue.SubmitPacket();
    sdmaQueue.Wait4PacketConsumption();

    /* Check test result*/
    dispatch.Sync();
    EXPECT_EQ_GPU(buffer.IsPattern(dwLocation*sizeof(int), 0x5678, sdmaQueue, tmp), true, gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, CacheInvalidateOnSdmaWrite) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CacheInvalidateOnSdmaWrite));

    TEST_END
}

static void CacheInvalidateOnCPUWrite(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId != FAMILY_AR) {
        LOG() << "Skipping test: Test requires arcturus series asics." << std::endl;
        return;
    }

    if (!pKFDMemoryTest->Get_NodeInfo()->IsGPUNodeLargeBar(gpuNode)) {
        LOG() << "Skipping test: Test requires a large bar GPU." << std::endl;
        return;
    }

    int *buffer;
    HsaMemFlags memFlags = {0};
    /* Host accessible vram */
    memFlags.ui32.HostAccess = 1;
    memFlags.ui32.NonPaged = 1;
    memFlags.ui32.CoarseGrain = 1;
    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode, PAGE_SIZE, memFlags, reinterpret_cast<void**>(&buffer)), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(buffer, PAGE_SIZE, NULL), gpuNode);
    *buffer = 0;

    /* Read buffer from shader to fill cache */
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(buffer, buffer+100);
    dispatch.Submit(queue);

    /* Delay 100ms to make sure shader executed*/
    Delay(100);

    /* CPU writes to buffer. Shader should get what CPU writes and quits*/
    *buffer = 0x5678;

    /* Check test result*/
    dispatch.Sync();
    EXPECT_EQ_GPU(buffer[100], 0x5678, gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(buffer), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(buffer, PAGE_SIZE), gpuNode);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, CacheInvalidateOnCPUWrite) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CacheInvalidateOnCPUWrite));

    TEST_END
}

static void CacheInvalidateOnRemoteWrite(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
    volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();
    const int dwLocation = 100;
    const int dwLocation1 = 50;

    if (m_FamilyId != FAMILY_AR) {
        LOG() << "Skipping test: Test requires arcturus series asics." << std::endl;
        return;
    }

    const std::vector<int> gpuNodes = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU();
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: At least two GPUs are required." << std::endl;
        return;
    }

    HSAuint32 nondefaultNode;
    for (unsigned i = 0; i < gpuNodes.size(); i++) {
        if (gpuNodes.at(i) != gpuNode) {
            nondefaultNode = gpuNodes.at(i);
            break;
        }
    }

    HsaMemoryBuffer buffer(PAGE_SIZE, gpuNode, false/*zero*/, true/*local*/, false/*exec*/);
    buffer.MapMemToNodes(&nondefaultNode, 1);
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);
    buffer.Fill(0, sdmaQueue, 0, PAGE_SIZE);

    /* Read buffer from shader to fill cache */
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(buffer.As<int*>(), buffer.As<int*>()+dwLocation);
    dispatch.Submit(queue);

    /* Delay 100ms to make sure shader executed*/
    Delay(100);

    /* Using a remote shader to copy data from dwLocation1 to the beginning of the buffer.
     * Local shader should get what remote writes and quits
     */
    PM4Queue queue1;
    ASSERT_SUCCESS_GPU(queue1.Create(nondefaultNode), gpuNode);
    buffer.Fill(0x5678, sdmaQueue, dwLocation1*sizeof(int), 4);
    HsaMemoryBuffer isaBuffer1(PAGE_SIZE, nondefaultNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch1(isaBuffer1);
    dispatch1.SetArgs(buffer.As<int*>()+dwLocation1, buffer.As<int*>());
    dispatch1.Submit(queue1);
    dispatch1.Sync(g_TestTimeOut);

    /* Check test result*/
    dispatch.Sync();
    EXPECT_EQ_GPU(buffer.IsPattern(dwLocation*sizeof(int), 0x5678, sdmaQueue, tmp), true, gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(queue1.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, CacheInvalidateOnRemoteWrite) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CacheInvalidateOnRemoteWrite));

    TEST_END
}

/* Test is for new cache coherence on Aldebaran. It is to verify
 * two GPUs can coherently share a fine grain FB.
 */
static void VramCacheCoherenceWithRemoteGPU(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
    volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();
    const int dwSource = 0x40 * sizeof(int); /* At 3rd cache line */
    const int dwLocation = 0x80 * sizeof(int); /* At 5th cache line  */

    if (m_FamilyId != FAMILY_AL && m_FamilyId != FAMILY_AV) {
        LOG() << "Skipping test: Test requires aldebaran or aqua vanjaram series asics." << std::endl;
        return;
    }

    const std::vector<int> gpuNodes = pKFDMemoryTest->Get_NodeInfo()->GetNodesWithGPU();
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: At least two GPUs are required." << std::endl;
        return;
    }

    HSAuint32 nondefaultNode;
    for (unsigned i = 0; i < gpuNodes.size(); i++) {
        if (gpuNodes.at(i) != gpuNode) {
            nondefaultNode = gpuNodes.at(i);
            break;
        }
    }

    unsigned int nodes[2] = {(HSAuint32)gpuNode, nondefaultNode};

    /* Allocate a local FB */
    HsaMemoryBuffer buffer(PAGE_SIZE, gpuNode, false/*zero*/, true/*local*/, false/*exec*/);
    buffer.MapMemToNodes(&nodes[0], 2);
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);
    buffer.Fill(0, sdmaQueue, 0, PAGE_SIZE);
    buffer.Fill(0x5678, sdmaQueue, dwSource, 4);

    /* Read buffer[0] as flag from local shader to fill cache line (64 dws)
     * which should has 0 at buffer[1]
     */
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(buffer.As<char *>(), buffer.As<char *>()+dwLocation);
    dispatch.Submit(queue);

    /* Delay 100ms to make sure shader executed*/
    Delay(100);

    /* Using remote shader to write the flag and copy value from dwSource
     * to dwLocation in buffer.
     * Local shader should get the flag and execute CopyMemory
     */
    PM4Queue queue1;
    ASSERT_SUCCESS_GPU(queue1.Create(nondefaultNode), gpuNode);
    HsaMemoryBuffer isaBuffer1(PAGE_SIZE, nondefaultNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(WriteFlagAndValueIsa, isaBuffer1.As<char*>()), gpuNode);

    Dispatch dispatch1(isaBuffer1);
    dispatch1.SetArgs(buffer.As<char *>(), buffer.As<char *>()+dwSource);
    dispatch1.Submit(queue1);
    dispatch1.Sync(g_TestTimeOut);

    /* Check test result*/
    dispatch.Sync(g_TestTimeOut);
    EXPECT_EQ_GPU(buffer.IsPattern(dwLocation, 0x5678, sdmaQueue, tmp), true, gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(queue1.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, VramCacheCoherenceWithRemoteGPU) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(VramCacheCoherenceWithRemoteGPU));

    TEST_END
}

/* Test is for new cache coherence on A+A(Aldebaran). It is to verify
 * new XGMI coherence HW link in caches between CPU and GPUs
 * in local FB with fine grain mode.
 */
static void VramCacheCoherenceWithCPU(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId != FAMILY_AL && m_FamilyId != FAMILY_AV) {
        LOG() << "Skipping test: Test requires aldebaran or aqua vanjaram series asics." << std::endl;
        return;
    }

    const int dwLocation = 0x80;

    if (!pKFDMemoryTest->Get_NodeInfo()->IsNodeXGMItoCPU(gpuNode)) {
        LOG() << "Skipping test: XGMI link to CPU is required." << std::endl;
        return;
    }

    unsigned int *buffer;
    HsaMemFlags memFlags = {0};
    /* Allocate a fine grain local FB accessed by CPU */
    memFlags.ui32.HostAccess = 1;
    memFlags.ui32.NonPaged = 1;
    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode, PAGE_SIZE, memFlags,
            reinterpret_cast<void**>(&buffer)), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(buffer, PAGE_SIZE, NULL), gpuNode);
    buffer[0] = 0;
    buffer[dwLocation] = 0;

    /* Read buffer from shader to fill cache */
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(buffer, buffer+dwLocation);
    dispatch.Submit(queue);

    /* Delay 100ms to make sure shader executed*/
    Delay(100);

    /* CPU writes to buffer. Shader should get 0x5678 CPU writes
     * after cache invalidating(buffer_invl2) and quits
     */
    buffer[1] = 0x5678;
    buffer[0] = 1;

    /* Check test result*/
    dispatch.Sync(g_TestTimeOut);
    EXPECT_EQ_GPU(buffer[dwLocation], 0x5678, gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(buffer), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(buffer, PAGE_SIZE), gpuNode);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, VramCacheCoherenceWithCPU) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(VramCacheCoherenceWithCPU));

    TEST_END
}

/* Test is for new cache coherence on Aldebaran. It is to verify
 * new XGMI coherence HW link in caches between CPU and GPUs
 * in system RAM.
 */
static void SramCacheCoherenceWithGPU(KFDTEST_PARAMETERS* pTestParamters) {

	int gpuNode = pTestParamters->gpuNode;
	KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 m_FamilyId = pKFDMemoryTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId != FAMILY_AL && m_FamilyId != FAMILY_AV) {
        LOG() << "Skipping test: Test requires aldebaran or aqua vanjaram series asics." << std::endl;
        return;
    }

    const int dwLocation = 0x80;

    if (!pKFDMemoryTest->Get_NodeInfo()->IsNodeXGMItoCPU(gpuNode)) {
        LOG() << "Skipping test: XGMI link to CPU is required." << std::endl;
        return;
    }

    unsigned int *fineBuffer = NULL;
    unsigned int tmp;

    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(gpuNode /* system */, PAGE_SIZE, pKFDMemoryTest->GetHsaMemFlags(),
                       reinterpret_cast<void**>(&fineBuffer)), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(fineBuffer, PAGE_SIZE, NULL), gpuNode);
    fineBuffer[0] = 0;
    fineBuffer[1] = 0;
    /* Read buffer from CPU to fill cache */
    tmp = fineBuffer[dwLocation];

    /* Read fine grain buffer from shader to fill cache */
    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(fineBuffer, fineBuffer+dwLocation);
    dispatch.Submit(queue);

    /* Delay 100ms to make sure shader executed*/
    Delay(100);

    /* CPU writes to buffer. Shader should get what CPU writes and quits*/
    fineBuffer[1] = 0x5678;
    fineBuffer[0] = 1;

    /* Check test result, based on KFDEventTest.SignalEvent passed.
     * if Sync times out,
     * it means coherence issue that GPU doesn't read what CPU wrote.
     * if buffer value is not expected,
     * it means coherence issue that CPU doesn't read what GPU wrote.
     */
    dispatch.Sync(g_TestTimeOut);
    EXPECT_EQ_GPU(fineBuffer[dwLocation], 0x5678, gpuNode);

    // Clean up
    EXPECT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(fineBuffer), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtFreeMemory(fineBuffer, PAGE_SIZE), gpuNode);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDMemoryTest, SramCacheCoherenceWithGPU) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SramCacheCoherenceWithGPU));

    TEST_END
}

void KFDMemoryTest::AcquireReleaseTestRunCPU(HSAuint32 acquireNode, bool scalar) {

    LOG() << "Testing coherency from CPU to node " << std::dec << acquireNode << std::endl;

    /* Allocate shared buffer - must be at least 64 * 6 bytes */
    HsaMemoryBuffer buffer(PAGE_SIZE, acquireNode, false/*zero*/, false/*local*/, false/*exec*/);
    buffer.MapMemToNodes(&acquireNode, 1);

    /* Allocate output buffer and insert magic numbers */
    HsaMemoryBuffer outputBuffer(PAGE_SIZE, acquireNode, true, false, false);
    outputBuffer.As<char *>()[0x40] = 99;
    outputBuffer.As<char *>()[0x80] = 99;
    outputBuffer.As<char *>()[0xc0] = 99;
    outputBuffer.As<char *>()[0x100] = 99;
    outputBuffer.As<char *>()[0x140] = 99;

    /* Flush results of previous tests from the buffer */
    /* This would be done with SDMA, but SDMA doesn't work on some Aqua Vanjaram emulators */
    PM4Queue flushQueue;
    ASSERT_SUCCESS(flushQueue.Create(acquireNode));
    HsaMemoryBuffer flushBuffer(PAGE_SIZE, acquireNode, true/*zero*/, false/*local*/, true/*exec*/);
    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(FlushBufferForAcquireReleaseIsa, flushBuffer.As<char*>()));
    Dispatch flushDispatch(flushBuffer);
    flushDispatch.SetArgs(buffer.As<char *>(), NULL);
    flushDispatch.SetDim(1, 1, 1);
    flushDispatch.Submit(flushQueue);
    flushDispatch.Sync(g_TestTimeOut);

    /* Start acquiring thread */
    PM4Queue acquireQueue;
    ASSERT_SUCCESS(acquireQueue.Create(acquireNode));
    HsaMemoryBuffer acquireBuffer(PAGE_SIZE, acquireNode, true/*zero*/, false/*local*/, true/*exec*/);
    if (!scalar)
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadAcquireVectorIsa, acquireBuffer.As<char*>()));
    else
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadAcquireScalarIsa, acquireBuffer.As<char*>()));
    Dispatch acquireDispatch(acquireBuffer);
    acquireDispatch.SetArgs(buffer.As<char *>(), outputBuffer.As<char *>());
    acquireDispatch.SetDim(1, 1, 1);
    acquireDispatch.Submit(acquireQueue);

    /* Delay 100ms to ensure acquirer is waiting */
    Delay(100);

    if (!scalar) {
        buffer.As<char *>()[0x40] = 0x1;
        buffer.As<char *>()[0x80] = 0x2;
        buffer.As<char *>()[0xc0] = 0x3;
        buffer.As<char *>()[0x100] = 0x4;
        buffer.As<char *>()[0x140] = 0x5;
    } else {
        buffer.As<char *>()[0x40] = 0x6;
        buffer.As<char *>()[0x80] = 0x7;
        buffer.As<char *>()[0xc0] = 0x8;
        buffer.As<char *>()[0x100] = 0x9;
        buffer.As<char *>()[0x140] = 0xa;
    }
    buffer.As<char *>()[0x0] = 0x1;

    acquireDispatch.Sync(g_TestTimeOut);

    /* Check test result*/
    if (!scalar) {
        EXPECT_EQ(0x1, outputBuffer.As<char *>()[0x40]);
        EXPECT_EQ(0x2, outputBuffer.As<char *>()[0x80]);
        EXPECT_EQ(0x3, outputBuffer.As<char *>()[0xc0]);
        EXPECT_EQ(0x4, outputBuffer.As<char *>()[0x100]);
        EXPECT_EQ(0x5, outputBuffer.As<char *>()[0x140]);
    } else {
        EXPECT_EQ(0x6, outputBuffer.As<char *>()[0x40]);
        EXPECT_EQ(0x7, outputBuffer.As<char *>()[0x80]);
        EXPECT_EQ(0x8, outputBuffer.As<char *>()[0xc0]);
        EXPECT_EQ(0x9, outputBuffer.As<char *>()[0x100]);
        EXPECT_EQ(0xa, outputBuffer.As<char *>()[0x140]);
    }

    /*
     * Guide to results:
     * 0x99: acquiring shader did not write to output buffer at all
     * 0x77: coherency error. Either releasing shader did not write or acquiring shader read stale value
     * All five EXPECT_EQ fail: error occurs even when releasing shader bypasses cache
     * Only first four EXPECT_EQ fail: error occurs only when releasing shader uses cache
     */

    /* Clean up */
    EXPECT_SUCCESS(acquireQueue.Destroy());
    EXPECT_SUCCESS(flushQueue.Destroy());
}

void KFDMemoryTest::AcquireReleaseTestRun(HSAuint32 acquireNode, HSAuint32 releaseNode,
                                          bool localToRemote, bool scalar) {

    LOG() << "Testing coherency from node " << std::dec << releaseNode << " to node " << std::dec << acquireNode << std::endl;

    /* Allocate shared buffer - must be at least 64 * 6 bytes */
    HSAuint32 localNode;
    if (!localToRemote)
        localNode = acquireNode;
    else
        localNode = releaseNode;
    HsaMemoryBuffer buffer(PAGE_SIZE, localNode, false/*zero*/, true/*local*/, false/*exec*/);
    unsigned int nodes[2] = {acquireNode, releaseNode};
    buffer.MapMemToNodes(&nodes[0], 2);

    /* Allocate output buffer and insert magic numbers */
    HsaMemoryBuffer outputBuffer(PAGE_SIZE, acquireNode, true, false, false);
    outputBuffer.As<char *>()[0x40] = 99;
    outputBuffer.As<char *>()[0x80] = 99;
    outputBuffer.As<char *>()[0xc0] = 99;
    outputBuffer.As<char *>()[0x100] = 99;
    outputBuffer.As<char *>()[0x140] = 99;

    /* Flush results of previous tests from the buffer */
    /* This would be done with SDMA, but SDMA doesn't work on some Aqua Vanjaram emulators */
    PM4Queue flushQueue;
    ASSERT_SUCCESS(flushQueue.Create(acquireNode));
    HsaMemoryBuffer flushBuffer(PAGE_SIZE, acquireNode, true/*zero*/, false/*local*/, true/*exec*/);
    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(FlushBufferForAcquireReleaseIsa, flushBuffer.As<char*>()));
    Dispatch flushDispatch(flushBuffer);
    flushDispatch.SetArgs(buffer.As<char *>(), NULL);
    flushDispatch.SetDim(1, 1, 1);
    flushDispatch.Submit(flushQueue);
    flushDispatch.Sync(g_TestTimeOut);

    /* Start acquiring thread */
    PM4Queue acquireQueue;
    ASSERT_SUCCESS(acquireQueue.Create(acquireNode));
    HsaMemoryBuffer acquireBuffer(PAGE_SIZE, acquireNode, true/*zero*/, false/*local*/, true/*exec*/);
    if (!scalar)
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadAcquireVectorIsa, acquireBuffer.As<char*>()));
    else
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadAcquireScalarIsa, acquireBuffer.As<char*>()));
    Dispatch acquireDispatch(acquireBuffer);
    acquireDispatch.SetArgs(buffer.As<char *>(), outputBuffer.As<char *>());
    acquireDispatch.SetDim(1, 1, 1);
    acquireDispatch.Submit(acquireQueue);

    /* Delay 100ms to ensure acquirer is waiting */
    Delay(100);

    /* Start releasing thread */
    PM4Queue releaseQueue;
    ASSERT_SUCCESS(releaseQueue.Create(releaseNode));
    HsaMemoryBuffer releaseBuffer(PAGE_SIZE, releaseNode, true/*zero*/, false/*local*/, true/*exec*/);
    if (!scalar)
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WriteReleaseVectorIsa, releaseBuffer.As<char*>()));
    else
        ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WriteReleaseScalarIsa, releaseBuffer.As<char*>()));
    Dispatch releaseDispatch(releaseBuffer);
    releaseDispatch.SetArgs(buffer.As<char *>(), NULL);
    releaseDispatch.SetDim(1, 1, 1);
    releaseDispatch.Submit(releaseQueue);

    /* Wait for threads to finish */
    releaseDispatch.Sync(g_TestTimeOut);
    acquireDispatch.Sync(g_TestTimeOut);

    /* Check test result*/
    if (!scalar) {
        EXPECT_EQ(0x1, outputBuffer.As<char *>()[0x40]);
        EXPECT_EQ(0x2, outputBuffer.As<char *>()[0x80]);
        EXPECT_EQ(0x3, outputBuffer.As<char *>()[0xc0]);
        EXPECT_EQ(0x4, outputBuffer.As<char *>()[0x100]);
        EXPECT_EQ(0x5, outputBuffer.As<char *>()[0x140]);
    } else {
        EXPECT_EQ(0x6, outputBuffer.As<char *>()[0x40]);
        EXPECT_EQ(0x7, outputBuffer.As<char *>()[0x80]);
        EXPECT_EQ(0x8, outputBuffer.As<char *>()[0xc0]);
        EXPECT_EQ(0x9, outputBuffer.As<char *>()[0x100]);
        EXPECT_EQ(0xa, outputBuffer.As<char *>()[0x140]);
    }

    /*
     * Guide to results:
     * 0x99: acquiring shader did not write to output buffer at all
     * 0x77: coherency error. Either releasing shader did not write or acquiring shader read stale value
     * All five EXPECT_EQ fail: error occurs even when releasing shader bypasses cache
     * Only first four EXPECT_EQ fail: error occurs only when releasing shader uses cache
     */

    /* Clean up */
    EXPECT_SUCCESS(acquireQueue.Destroy());
    EXPECT_SUCCESS(releaseQueue.Destroy());
    EXPECT_SUCCESS(flushQueue.Destroy());
}

/* A test of the memory coherence features on Aqua_Vanjaram.
 * One shader stores values at 5 positions in memory, then performs
 * a write-release. The other shader performs a read-acquire, then loads
 * those 5 values, then stores them in a CPU-visible buffer
 *
 * withinGPU: When true, the two shaders will be loaded onto two nodes within
 *            the same GPU. When false, the two shaders will be loaded onto different
 *            GPUs.
 *
 * localToRemote: When true, the shared memory will be local to the releasing node.
 *                When false, the shared memory will be local to the acquiring node.
 *
 * scalar: When true, the shared data will be stored and loaded with scalar instructions.
 *         When false, the shared data will be stored and loaded with vector instructions.
 */
void KFDMemoryTest::AcquireReleaseTest(bool withinGPU, bool localToRemote, bool scalar) {

    if (m_FamilyId != FAMILY_AV) {
        LOG() << "Skipping test: Test requires aqua vanjaram series asics." << std::endl;
        return;
    }

    /* Find second node - nodes with the same DrmRenderMinor are on the same GPU */
    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    HSAuint32 acquireNode;
    HSAint32 acquireDRM;
    bool foundSecondNode = false;
    for (unsigned i = 0; i < gpuNodes.size(); i++) {
        acquireNode = gpuNodes.at(i);
        acquireDRM = m_NodeInfo.GetNodeProperties(acquireNode)->DrmRenderMinor;
        for (unsigned j = 0; j < gpuNodes.size(); j++) {
            if (!withinGPU) {
                if (m_NodeInfo.GetNodeProperties(gpuNodes.at(j))->DrmRenderMinor != acquireDRM) {
                    foundSecondNode = true;
                    AcquireReleaseTestRun(acquireNode, gpuNodes.at(j), localToRemote, scalar);
                }
            } else {
                if (m_NodeInfo.GetNodeProperties(gpuNodes.at(j))->DrmRenderMinor == acquireDRM && gpuNodes.at(j) != acquireNode) {
                    foundSecondNode = true;
                    AcquireReleaseTestRun(acquireNode, gpuNodes.at(j), localToRemote, scalar);
                }
            }
        }
    }
    if (!foundSecondNode) {
        if (!withinGPU) {
            LOG() << "Skipping test: At least two GPUs are required." << std::endl;
        } else {
            LOG() << "Skipping test: At least two nodes on the same GPU are required." << std::endl;
        }

    }
}

TEST_F(KFDMemoryTest, AcquireReleaseCPU) {
    if (m_FamilyId != FAMILY_AV) {
        LOG() << "Skipping test: Test requires aqua vanjaram series asics." << std::endl;
        return;
    }

    /* Find second node - nodes with the same DrmRenderMinor are on the same GPU */
    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    HSAuint32 acquireNode;
    for (unsigned i = 0; i < gpuNodes.size(); i++) {
        acquireNode = gpuNodes.at(i);
        AcquireReleaseTestRunCPU(acquireNode, true);
        AcquireReleaseTestRunCPU(acquireNode, false);
    }
}

TEST_F(KFDMemoryTest, AcquireReleaseFarLocalVector) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(false /* multi-GPU */, false /* acquirer is local */, false /* vector */);

    TEST_END
}

TEST_F(KFDMemoryTest, AcquireReleaseFarLocalScalar) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(false /* multi-GPU */, false /* acquirer is local */, true /* scalar */);

    TEST_END
}

TEST_F(KFDMemoryTest, AcquireReleaseFarRemoteVector) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(false /* multi-GPU */, true /* releaser is local */, false /* vector */);

    TEST_END
}

TEST_F(KFDMemoryTest, AcquireReleaseFarRemoteScalar) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(false /* multi-GPU */, true /* releaser is local */, true /* scalar */);

    TEST_END
}

TEST_F(KFDMemoryTest, AcquireReleaseCloseLocalVector) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(true /* within-GPU */, false /* acquirer is local */, false /* vector */);

    TEST_END
}

TEST_F(KFDMemoryTest, AcquireReleaseCloseLocalScalar) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(true /* within-GPU */, false /* acquirer is local */, true /* scalar */);

    TEST_END
}

TEST_F(KFDMemoryTest, AcquireReleaseCloseRemoteVector) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(true /* within-GPU */, true /* releaser is local */, false /* vector */);

    TEST_END
}

TEST_F(KFDMemoryTest, AcquireReleaseCloseRemoteScalar) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    AcquireReleaseTest(true /* within-GPU */, true /* releaser is local */, true /* scalar */);

    TEST_END
}


/* Application register same userptr to multiple GPUs using multiple threads
 * Test multiple threads register/deregister same userptr, to verify Thunk race handling
 */
struct ThreadParams {
    void* pBuf;
    HSAuint64 BufferSize;
    HSAuint64 VAGPU;
    pthread_barrier_t *barrier;
};
static unsigned int RegisterThread(void* p) {
    struct ThreadParams* pArgs = reinterpret_cast<struct ThreadParams*>(p);

    pthread_barrier_wait(pArgs->barrier);
    EXPECT_SUCCESS(hsaKmtRegisterMemory(pArgs->pBuf, pArgs->BufferSize));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(pArgs->pBuf, pArgs->BufferSize, &pArgs->VAGPU));

    return 0;
}
static unsigned int UnregisterThread(void* p) {
    struct ThreadParams* pArgs = reinterpret_cast<struct ThreadParams*>(p);

    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(reinterpret_cast<void *>(pArgs->VAGPU)));
    pthread_barrier_wait(pArgs->barrier);
    EXPECT_SUCCESS(hsaKmtDeregisterMemory(reinterpret_cast<void *>(pArgs->VAGPU)));

    return 0;
}

#define N_THREADS   32

TEST_F(KFDMemoryTest, MultiThreadRegisterUserptrTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    HSAuint32 test_loops = 1;
    HSAuint64 BufferSize = 1UL << 27;

    void *pBuf = mmap(NULL, BufferSize, PROT_READ | PROT_WRITE,
                      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    ASSERT_NE(pBuf, MAP_FAILED);

    struct ThreadParams params[N_THREADS];
    HSAuint64 threadId[N_THREADS];

    pthread_barrier_t barrier;
    ASSERT_SUCCESS(pthread_barrier_init(&barrier, NULL, N_THREADS));

    for (HSAuint32 loop = 0; loop < test_loops; loop++) {
        for (HSAuint32 i = 0; i < N_THREADS; i++) {
            params[i].pBuf = pBuf;
            params[i].BufferSize = BufferSize;
            params[i].VAGPU = 0;
            params[i].barrier = &barrier;
        }

        for (HSAuint32 i = 0; i < N_THREADS; i++)
            ASSERT_EQ(true, StartThread(&RegisterThread, &params[i], threadId[i]));
        for (HSAuint32 i = 0; i < N_THREADS; i++)
            WaitForThread(threadId[i]);

        for (HSAuint32 i = 0; i < N_THREADS; i++)
            ASSERT_EQ(params[0].VAGPU, params[i].VAGPU);

        for (HSAuint32 i = 0; i < N_THREADS; i++)
            ASSERT_EQ(true, StartThread(&UnregisterThread, &params[i], threadId[i]));
        for (HSAuint32 i = 0; i < N_THREADS; i++)
            WaitForThread(threadId[i]);
    }

    pthread_barrier_destroy(&barrier);
    munmap(pBuf, BufferSize);

    TEST_END
}

static void ExportDMABufTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    if (pKFDMemoryTest->Get_Version()->KernelInterfaceMinorVersion < 12) {
        LOG() << "Skipping test, requires KFD ioctl version 1.12 or newer" << std::endl;
        return;
    }

    // Use a GTT BO for export because it's conveniently CPU accessible.
    // On multi-GPU systems this also checks for interactions with driver-
    // internal DMA buf use for DMA attachment to multiple GPUs
    HsaMemFlags memFlags = pKFDMemoryTest->GetHsaMemFlags();
    memFlags.ui32.NonPaged = 1;

    HSAuint32 *buf;
    ASSERT_SUCCESS_GPU(hsaKmtAllocMemory(0, PAGE_SIZE, memFlags,
                                          reinterpret_cast<void**>(&buf)), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPU(buf, PAGE_SIZE, NULL), gpuNode);

    for (int i = 0; i < PAGE_SIZE/4; i++)
        buf[i] = i;
    const HSAuint64 INDEX = 25;
    const HSAuint64 SIZE = 25;
    HSAuint64 offset;
    int fd;

    // Expected error: address out of range (not a BO)
    ASSERT_EQ_GPU(HSAKMT_STATUS_INVALID_PARAMETER,
            hsaKmtExportDMABufHandle(buf + PAGE_SIZE/4, SIZE*4, &fd, &offset), gpuNode);
    // Expected error: size out of range
    ASSERT_EQ_GPU(HSAKMT_STATUS_INVALID_PARAMETER,
            hsaKmtExportDMABufHandle(buf + INDEX, PAGE_SIZE, &fd, &offset), gpuNode);

    // For real this time. Check that the offset matches
    ASSERT_SUCCESS_GPU(hsaKmtExportDMABufHandle(buf + INDEX, SIZE*4, &fd, &offset), gpuNode);
    ASSERT_EQ_GPU(INDEX*4, offset, gpuNode);

    // Free the original BO. The memory should persist as long as the DMA buf
    // handle exists.
    ASSERT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(buf), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtFreeMemory(buf, PAGE_SIZE), gpuNode);

    // Import the BO using the Interop API and check the contents. It doesn't
    // map the import for CPU access, which gives us an excuse to test GPU
    // mapping of the imported BO as well.
    HsaGraphicsResourceInfo info;
    ASSERT_SUCCESS_GPU(hsaKmtRegisterGraphicsHandleToNodes(fd, &info, 1, (HSAuint32 *)&gpuNode), gpuNode);
    buf = reinterpret_cast<HSAuint32 *>(info.MemoryAddress);
    ASSERT_EQ_GPU(info.SizeInBytes, PAGE_SIZE, gpuNode);

    HsaMemMapFlags mapFlags = {0};
    ASSERT_SUCCESS_GPU(hsaKmtMapMemoryToGPUNodes(buf, PAGE_SIZE, NULL, mapFlags, 1,
                                             (HSAuint32 *)&gpuNode), gpuNode);

    PM4Queue pm4Queue;
    ASSERT_SUCCESS_GPU(pm4Queue.Create(gpuNode), gpuNode);
    HsaMemoryBuffer dstBuffer(PAGE_SIZE, gpuNode);
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);
    for (int i = 0; i < PAGE_SIZE/4; i++) {
        Dispatch dispatch(isaBuffer);
        dispatch.SetArgs(&buf[i], dstBuffer.As<void*>());
        dispatch.Submit(pm4Queue);
        dispatch.Sync(g_TestTimeOut);
        ASSERT_EQ(i, *dstBuffer.As<HSAuint32 *>());
    }
    ASSERT_SUCCESS_GPU(pm4Queue.Destroy(), gpuNode);

    ASSERT_SUCCESS_GPU(hsaKmtUnmapMemoryToGPU(buf), gpuNode);
    ASSERT_SUCCESS_GPU(hsaKmtDeregisterMemory(buf), gpuNode);

    ASSERT_EQ_GPU(0, close(fd), gpuNode);
}

TEST_F(KFDMemoryTest, ExportDMABufTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(ExportDMABufTest));

    TEST_END
}

static void VA_VRAM_Only_AllocTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDMemoryTest* pKFDMemoryTest = (KFDMemoryTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDMemoryTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

   if (pKFDMemoryTest->Get_Version()->KernelInterfaceMinorVersion < 12) {
        LOG() << "Skipping test, requires KFD ioctl version 1.12 or newer" << std::endl;
        return;
    }

    HsaMemFlags memFlags = pKFDMemoryTest->GetHsaMemFlags();
    memFlags.ui32.NonPaged = 1;
    memFlags.ui32.HostAccess = 0;

    HsaMemMapFlags mapFlags = {0};

    HSAuint32 *buf;

    /*alloc va without vram alloc*/
    memFlags.ui32.OnlyAddress = 1;
    ASSERT_SUCCESS(hsaKmtAllocMemory(gpuNode, PAGE_SIZE, memFlags,
                                          reinterpret_cast<void**>(&buf)));

    /*mapping VA allocated by kfd api would fail*/
    ASSERT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtMapMemoryToGPU(buf, PAGE_SIZE, NULL));
    ASSERT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtMapMemoryToGPUNodes(buf, PAGE_SIZE, NULL,
                               mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode)));

    ASSERT_SUCCESS(hsaKmtFreeMemory(buf, PAGE_SIZE));

    /*alloc vram without va assigned*/
    memFlags.ui32.OnlyAddress = 0;
    memFlags.ui32.NoAddress = 1;
    ASSERT_SUCCESS(hsaKmtAllocMemory(gpuNode, PAGE_SIZE, memFlags,
                                      reinterpret_cast<void**>(&buf)));

    /*mapping handle allocated by kfd API would fail*/
    ASSERT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtMapMemoryToGPU(buf, PAGE_SIZE, NULL));
    ASSERT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtMapMemoryToGPUNodes(buf, PAGE_SIZE, NULL,
                               mapFlags, 1, reinterpret_cast<HSAuint32 *>(&gpuNode)));

    ASSERT_SUCCESS(hsaKmtFreeMemory(buf, PAGE_SIZE));
}

TEST_F(KFDMemoryTest, VA_VRAM_Only_AllocTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
	TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(VA_VRAM_Only_AllocTest));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDMemoryTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDBaseComponentTest.hpp"

#ifndef __KFD_MEMORY_TEST__H__
#define __KFD_MEMORY_TEST__H__

/* @class KFDTopologyTest
 * This class has no additional features to KFDBaseComponentTest
 * The separation was made so we are able to group all memory tests together
 */
class KFDMemoryTest :  public KFDBaseComponentTest {
 public:
    KFDMemoryTest(void) {}
    ~KFDMemoryTest(void) {}
 protected:
    virtual void SetUp();
    virtual void TearDown();

 protected:
    friend void SearchLargestBuffer(int allocNode, const HsaMemFlags &memFlags,
                                            HSAuint64 highMB, int nodeToMap,
                                            HSAuint64 *lastSizeMB);
    void AcquireReleaseTestRunCPU(HSAuint32 acquireNode, bool scalar);
    void AcquireReleaseTestRun(HSAuint32 acquireNode, HSAuint32 releaseNode,
                                          bool localToRemote, bool scalar);
    void AcquireReleaseTest(bool withinGPU, bool localToRemote, bool scalar);
};

#endif  // __KFD_MEMORY_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDMultiProcessTest.cpp
================================================
/*
 * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDMultiProcessTest.hpp"

void KFDMultiProcessTest::ForkChildProcesses(unsigned int nodeId, int nprocesses) {
    int i;
    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(nodeId);

    for (i = 0; i < nprocesses - 1; ++i) {
        pid_t pid = fork();
        ASSERT_GE(pid, 0);

        if (pid == 0) {
            /* Child process */
            /* Cleanup file descriptors copied from parent process
             * then call SetUp->hsaKmtOpenKFD to create new process
             */
            m_psName[gpuIndex] = "Child Test process " + std::to_string(i) +
                          " on gpuNode: " + std::to_string(gpuIndex) + " ";
            TearDown();
            SetUp();
            m_ChildPids[gpuIndex].clear();
            m_IsParent[gpuIndex] = false;
            m_ProcessIndex[gpuIndex] = i;
            return;
        }

        /* Parent process */
        m_ChildPids[gpuIndex].push_back(pid);
    }

    m_psName[gpuIndex] = "Parent Test process " + std::to_string(i) +
                        " on gpuNode: " + std::to_string(gpuIndex) + " ";
    m_ProcessIndex[gpuIndex] = i;
}

void KFDMultiProcessTest::WaitChildProcesses(unsigned int nodeId) {

    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(nodeId);

    if (m_IsParent[gpuIndex]) {
        /* Only run by parent process */
        int childStatus;
        int childExitOkNum = 0;
        int size = m_ChildPids[gpuIndex].size();

        for (HSAuint32 i = 0; i < size; i++) {
            pid_t pid = m_ChildPids[gpuIndex].front();

            waitpid(pid, &childStatus, 0);
            if (WIFEXITED(childStatus) == 1 && WEXITSTATUS(childStatus) == 0)
                childExitOkNum++;

            m_ChildPids[gpuIndex].erase(m_ChildPids[gpuIndex].begin());
        }

        EXPECT_EQ(childExitOkNum, size);
    }

    /* Child process or parent process finished successfully */
    m_ChildStatus[gpuIndex] = HSAKMT_STATUS_SUCCESS;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDMultiProcessTest.hpp
================================================
/*
 * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_MULTI_PROCESS_TEST__H__
#define __KFD_MULTI_PROCESS_TEST__H__

#include <string>
#include <vector>
#include "KFDBaseComponentTest.hpp"

// @class KFDMultiProcessTest
// Base class for tests forking multiple child processes
class KFDMultiProcessTest :  public KFDBaseComponentTest {
 public:
    KFDMultiProcessTest(void) {
        for ( int i = 0; i < MAX_GPU; i++) {
            m_ChildStatus[i] = HSAKMT_STATUS_ERROR;
            m_IsParent[i] = true;
        }
    }

    ~KFDMultiProcessTest(void) {
        for (int i = 0; i < MAX_GPU; i++) {
            if (!m_IsParent[i]) {
                /* Child process has to exit
                 * otherwise gtest will continue other tests
                */
                exit(m_ChildStatus[i]);
            }
        }

        try {
            const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
            int gpu_node;
            /* parent porcess waits all its child processes on each gpu */
            for (int i = 0; i < std::min((int)gpuNodes.size(), MAX_GPU); i++) {
                gpu_node = gpuNodes.at(i);
                WaitChildProcesses(gpu_node);
            }
       } catch (...) {}

    }

 protected:
    void ForkChildProcesses(unsigned int nodeId, int nprocesses);
    void WaitChildProcesses(unsigned int nodeId);

 protected:  // Members
    std::string     m_psName[MAX_GPU];
    int             m_ProcessIndex[MAX_GPU];
    std::vector<pid_t> m_ChildPids[MAX_GPU];
    HSAKMT_STATUS   m_ChildStatus[MAX_GPU];
    bool            m_IsParent[MAX_GPU];
};

#endif  // __KFD_MULTI_PROCESS_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDNegativeTest.cpp
================================================
/*
 * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDNegativeTest.hpp"
#include "Dispatch.hpp"
#include <sys/ptrace.h>

void KFDNegativeTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDNegativeTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

/**
 *  Basic Pipe Reset Test
 *
 *  KFD pipe reset sequence:
 *  - on HWS preemption hang KFD will scan the device and find the blocked
 *    hardware queue slot.
 *  - KFD will attempt to queue reset.
 *  - Bad packet lengths should cause queue reset to fail and the KFD will
 *    automatically fall back to pipe reset.
 *  - KFD will verify success by checking blocked hardware slot is now unnoccupied.
 *  - KFD should only signal a reset exception to processes that have had queues
 *    reset.
 */
TEST_F(KFDNegativeTest, BasicPipeReset) {
    TEST_START(TESTPROFILE_RUNALL);

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    const HsaNodeProperties *nodeProps = m_NodeInfo.GetNodeProperties(defaultGPUNode);
    bool perQueueResetSupported = nodeProps->Capability.ui32.PerQueueResetSupported;

    if (perQueueResetSupported) {
        int pipefd[2];
        pipe(pipefd);

        pid_t childPid = fork();

        if (childPid == 0) {
            // Refresh setup for HSA device and mem buffer use in child
            KFDBaseComponentTest::TearDown();
            KFDBaseComponentTest::SetUp();

            HsaEvent *resetEvent;
            ASSERT_SUCCESS(CreateHWExceptionEvent(false, false, defaultGPUNode, &resetEvent));

            LOG() << "Child ==> Wait on parent to set reset event" << std::endl;
            char buf;
            read(pipefd[0], &buf, 1);

            PM4Queue queue;
            ASSERT_SUCCESS(queue.Create(defaultGPUNode));

            PM4ReleaseMemoryPacket packet = PM4ReleaseMemoryPacket(m_FamilyId, true, 0, 0, false, false, 1);
            queue.PlaceAndSubmitPacket(packet);
            LOG() << "Child ==> Launching packet with bad header then dequeue" << std::endl;
            queue.Wait4PacketConsumption();
            queue.Destroy();

            // child expects hw exception event
            EXPECT_SUCCESS(hsaKmtWaitOnEvent(resetEvent, g_TestTimeOut));
            EXPECT_EQ(resetEvent->EventData.EventType, HSA_EVENTTYPE_HW_EXCEPTION);

            LOG() << "Child ==> Complete" << std::endl;

            exit(0);
	} else {
            int childStatus = 0;

            HsaEvent *resetEvent;

            ASSERT_SUCCESS(CreateHWExceptionEvent(false, false, defaultGPUNode, &resetEvent));

            char buf = 'x';
            write(pipefd[1], &buf, 1);
            LOG() << "Parent ==> Wait on child to launch bad packet" << std::endl;
            waitpid(childPid, &childStatus, 0);

            // parent process should not intercept reset event on child queue reset
            EXPECT_NE(HSAKMT_STATUS_SUCCESS, hsaKmtWaitOnEvent(resetEvent, 100));

            HsaMemoryBuffer destBuf(PAGE_SIZE, defaultGPUNode, false);
            destBuf.Fill(0xFF);
            HsaEvent *event;
            ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &event));

            PM4Queue queue;
            ASSERT_SUCCESS(queue.Create(defaultGPUNode));

            LOG() << "Parent ==> Submit queue packet to verify process is healthy" << std::endl;
            queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0, 0));
            queue.Wait4PacketConsumption(event);
            EXPECT_TRUE(WaitOnValue(destBuf.As<unsigned int*>(), 0));

            hsaKmtDestroyEvent(event);
            hsaKmtDestroyEvent(resetEvent);
            EXPECT_SUCCESS(queue.Destroy());

            LOG() << "Parent ==> Complete" << std::endl;
	}
    } else {
        LOG() << "Skipping test: Family ID 0x" << m_FamilyId << " with per-queue reset support = "
              << perQueueResetSupported << std::endl;
    }

    TEST_END
}

/**
 * Basic SDMA Reset
 *
 * To check SDMA queue reset, launch a healthy SDMA queue and a bad SDMA queue with
 * dispatches per SDMA engine.
 * Similar to compute queue reset, only processes that have bad SDMA queues should
 * be reset, leaving healthy SDMA queue unaffected.
 *
 * The test forks two processes, where for every given engine, the parent process
 * enqueues a healthy queue while the child process enqueues a bad queue that triggers
 * the reset in the following sequence:
 *
 * - Parent/child communicates test status via pipe 1 & 2
 * - Child waits on pipe 1 read for parent to enqueue a queue on SDMA engine <n> with
 *   healthy poll and write packet.
 * - Parent waits on pipe 2 read for child to enqueue a queue on SDMA engine <n> with
 *   unhealthy write packet then destroy its queue to trigger reset on HWS hang.
 * - Child waits on pipe 1 for parent to confirm healthy poll and write packet
 *   complete on SDMA engine <n>.
 * - Child should verify it recieves a reset event, while the parent should not
 *   recieve a reset event.
 * - The parent/child test re-iterates again on SDMA engine <n+1>.
 */
TEST_F(KFDNegativeTest, BasicSDMAReset) {
    TEST_START(TESTPROFILE_RUNALL);

    int gpuNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(gpuNode, 0) << "failed to get default GPU Node";

    const HsaNodeProperties *nodeProps = m_NodeInfo.GetNodeProperties(gpuNode);
    int totalEngines = nodeProps->NumSdmaEngines + nodeProps->NumSdmaXgmiEngines;
    bool perSDMAQueueResetSupported = nodeProps->Capability2.ui32.PerSDMAQueueResetSupported;

    if (perSDMAQueueResetSupported) {
        int pipe1[2];
        int pipe2[2];
        pipe(pipe1);
        pipe(pipe2);

        LOG() << std::dec << "Running SDMA queue reset on " << totalEngines
              <<" SDMA engines" << std::endl;

        pid_t childPid = fork();

        if (childPid == 0) {
            KFDBaseComponentTest::TearDown();
            KFDBaseComponentTest::SetUp();
            close(pipe1[1]); // Close write end of pipe1
            close(pipe2[0]); // Close read end of pipe2
            HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);
            unsigned int *dest = destBuf.As<unsigned int*>();
            for (int i = 0; i < totalEngines; i++) {
                HsaEvent *resetEvent;
                ASSERT_SUCCESS(CreateHWExceptionEvent(false, false, gpuNode, &resetEvent));

                // wait for parent to schedule healthy queue on engine
                char buf1, buf2 ='0' + i;
                read(pipe1[0], &buf1, 1);
                ASSERT_EQ(buf1, buf2);

                // submit bad queue and destroy to trigger reset
                SDMAQueueByEngId queue(i);
                ASSERT_SUCCESS(queue.Create(gpuNode));
                queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), &dest[0], 0, 6));
                Delay(50);
                LOG() << std::dec << "Reset SDMA queue on engine " << i << std::endl;
                queue.Destroy();

                // child expects hw exception event
                EXPECT_SUCCESS(hsaKmtWaitOnEvent(resetEvent, g_TestTimeOut));
                EXPECT_EQ(resetEvent->EventData.EventType, HSA_EVENTTYPE_HW_EXCEPTION);
                hsaKmtDestroyEvent(resetEvent);

                // ack reset to parent and wait for parent to check healthy queue
                write(pipe2[1], &buf2, 1);
                read(pipe1[0], &buf1, 1);
                ASSERT_EQ(buf1, buf2);
            }

            close(pipe1[0]);
            close(pipe2[1]);
            LOG() << "Child ==> Complete" << std::endl;
            exit(0);
        } else {
            int childStatus = 0;
            close(pipe1[0]); // Close read end of pipe1
            close(pipe2[1]); // Close write end of pipe2

            // parent process should not intercept reset event on child queue reset
            HsaMemoryBuffer pollBuf(PAGE_SIZE, gpuNode, false);
            HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);
            unsigned int *poll = pollBuf.As<unsigned int*>();
            unsigned int *dest = destBuf.As<unsigned int*>();
            uint32_t targetDestValue = 0x12345678;

            for (int i = 0; i < totalEngines; i++) {
               poll[0] = 0;
               dest[0] = 0;
               HsaEvent *event;
               HsaEvent *resetEvent;
               ASSERT_SUCCESS(CreateHWExceptionEvent(false, false, gpuNode, &resetEvent));
               ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, gpuNode, &event));

               SDMAQueueByEngId queue(i);
               ASSERT_SUCCESS(queue.Create(gpuNode));

               // submit write on poll to maintain non-zero read/write pointer
               // in engine during reset
               queue.PlaceAndSubmitPacket(SDMAPollRegMemPacket(&poll[0], 1));
               queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), &dest[0], targetDestValue));

               // wait for for child to trigger reset on engine
               char buf1 = '0' + i, buf2;
               write(pipe1[1], &buf1, 1);
               read(pipe2[0], &buf2, 1);
               ASSERT_EQ(buf1, buf2);

               // expect no reset event, then update poll to trigger write completion check
               EXPECT_NE(HSAKMT_STATUS_SUCCESS, hsaKmtWaitOnEvent(resetEvent, 100));
               poll[0] = 1;
               queue.Wait4PacketConsumption();
               EXPECT_TRUE(WaitOnValue(&dest[0], targetDestValue));
               hsaKmtDestroyEvent(event);
               hsaKmtDestroyEvent(resetEvent);
               EXPECT_SUCCESS(queue.Destroy());
               write(pipe1[1], &buf1, 1);
            }

            waitpid(childPid, &childStatus, 0);
            close(pipe1[1]);
            close(pipe2[0]);
            LOG() << "Parent ==> Complete" << std::endl;
        }
    } else {
        LOG() << "Skipping test: Family ID 0x" << m_FamilyId
              << " with per-sdma queue reset support = "
              << perSDMAQueueResetSupported << std::endl;
    }

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDNegativeTest.hpp
================================================
/*
 * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_NEGATIVE_TEST__H__
#define __KFD_NEGATIVE_TEST__H__

#include <gtest/gtest.h>

#include "PM4Queue.hpp"
#include "KFDBaseComponentTest.hpp"
#include "SDMAQueueByEngId.hpp"
#include "SDMAPacket.hpp"

class KFDNegativeTest : public KFDBaseComponentTest {
 public:
    KFDNegativeTest() {}
    ~KFDNegativeTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFD_NEGATIVE_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDOpenCloseKFDTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDOpenCloseKFDTest.hpp"
#include "KFDTestUtil.hpp"

// Before every test from this class fixture, open KFD
void KFDOpenCloseKFDTest::SetUp() {
    ROUTINE_START

    ASSERT_SUCCESS(hsaKmtOpenKFD() );

    ROUTINE_END
}

// After every test from this class fixture, close KFD
void KFDOpenCloseKFDTest::TearDown() {
    ROUTINE_START

    EXPECT_SUCCESS(hsaKmtCloseKFD() );

    ROUTINE_END
}

/* This test does not use class KFDOpenCloseKFDTest but is placed here
 * since it's testing same topic as other test
 * Verify that calling hsaKmtCloseKFD on a closed KFD will return right status
 */
TEST(KFDCloseKFDTest, CloseAClosedKfd ) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_EQ(HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED, hsaKmtCloseKFD());

    TEST_END
}

// Verify that calling hsaKmtCloseKFD on an already opened KFD will return right status
TEST_F(KFDOpenCloseKFDTest, OpenAlreadyOpenedKFD ) {
    TEST_START(TESTPROFILE_RUNALL)

    EXPECT_EQ(HSAKMT_STATUS_KERNEL_ALREADY_OPENED, hsaKmtOpenKFD());

    EXPECT_SUCCESS(hsaKmtCloseKFD());

    TEST_END
}

// Testing the normal scenario: open followed by close (done in the setup and teardown functions)
TEST_F(KFDOpenCloseKFDTest, OpenCloseKFD ) {
}

TEST_F(KFDOpenCloseKFDTest, InvalidKFDHandleTest ) {
    TEST_START(TESTPROFILE_RUNALL)

    HsaVersionInfo  m_VersionInfo;
    pid_t m_ChildPid = fork();
    if (m_ChildPid == 0) {
        EXPECT_EQ(HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED, hsaKmtGetVersion(&m_VersionInfo));
        exit(0);
    } else {
        int childStatus;
        EXPECT_EQ(m_ChildPid, waitpid(m_ChildPid, &childStatus, 0));
        EXPECT_NE(0, WIFEXITED(childStatus));
        EXPECT_EQ(0, WEXITSTATUS(childStatus));
    }
    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDOpenCloseKFDTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <gtest/gtest.h>
#include "hsakmt/hsakmt.h"

#ifndef __KFD_OPEN_CLOSE_KFD_TEST__H__
#define __KFD_OPEN_CLOSE_KFD_TEST__H__

//  @class KFDOpenCloseKFDTest
class KFDOpenCloseKFDTest : public testing::Test {
 public:
    KFDOpenCloseKFDTest(void) {}
    ~KFDOpenCloseKFDTest(void) {}

 protected:
    // @brief Executed before every test that uses KFDOpenCloseKFDTest class, sets all common settings for the tests.
    virtual void SetUp();
    // @brief Executed after every test that uses KFDOpenCloseKFDTest class
    virtual void TearDown();
};

#endif  //  __KFD_OPEN_CLOSE_KFD_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDPCSamplingTest.cpp
================================================
/*
 * Copyright (C) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDPCSamplingTest.hpp"
#include <sys/prctl.h>
#include <sys/ptrace.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <signal.h>
#include <numa.h>
#include <vector>
#include "Dispatch.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAQueue.hpp"
#include "SDMAPacket.hpp"
#include "hsakmt/linux/kfd_ioctl.h"

#define N_PROCESSES             (2)     /* Number of processes running in parallel, must be at least 2 */

/* Captures user specified time (seconds) to sleep */
extern unsigned int g_SleepTime;

void KFDPCSamplingTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDPCSamplingTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

TEST_F(KFDPCSamplingTest, BasicTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (hsaKmtPcSamplingSupport() != HSAKMT_STATUS_SUCCESS)
        return;

    HSAuint32 num_sample_info = 0;
    HSAuint32 return_num_sample_info = 0;

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "Failed to get default GPU Node.";

    /* 1. get pc sampling format numbe of entry */
    HSAKMT_STATUS ret = hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, NULL,
                                         num_sample_info, &return_num_sample_info);
    if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
        LOG() << "Skipping test: This GPU does not support PC Sampling." << std::endl;
        return;
    }
    ASSERT_GE(return_num_sample_info, 1);

    num_sample_info = return_num_sample_info;
    void *info_buf = calloc(num_sample_info, sizeof(HsaPcSamplingInfo));

    ASSERT_SUCCESS(hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, info_buf,
                                         num_sample_info, &return_num_sample_info));

    HsaPcSamplingInfo *samples = (HsaPcSamplingInfo*) info_buf;
    HsaPcSamplingTraceId traceId1, traceId2;

    samples[0].value = 0x100000; /* 1,048,576 usec */

    /* 1. Failed to start uncreated pc sampling ID */
    ASSERT_SUCCESS(!hsaKmtPcSamplingStart(defaultGPUNode, 12345));

    /* 2. Failed to stop uncreated pc sampling ID */
    ASSERT_SUCCESS(!hsaKmtPcSamplingStop(defaultGPUNode, 12345));

    /* 3. Failed to destroy uncreated pc sampling ID */
    ASSERT_SUCCESS(!hsaKmtPcSamplingDestroy(defaultGPUNode, 12345));

    /* 4. create pc sampling */
    ASSERT_SUCCESS(hsaKmtPcSamplingCreate(defaultGPUNode, &samples[0], &traceId1));
    ASSERT_SUCCESS(hsaKmtPcSamplingDestroy(defaultGPUNode, traceId1));

    /* 5. create twice in the same process with pc sampling activated */
    ASSERT_SUCCESS(hsaKmtPcSamplingCreate(defaultGPUNode, &samples[0], &traceId2));
    ASSERT_SUCCESS(hsaKmtPcSamplingStart(defaultGPUNode, traceId2));
          /* Creat and start 2nd session pc sampling */
    ASSERT_SUCCESS(hsaKmtPcSamplingCreate(defaultGPUNode, &samples[0], &traceId1));
    ASSERT_SUCCESS(hsaKmtPcSamplingStart(defaultGPUNode, traceId1));
    sleep(2);
          /* Stop its own pc sampling session, but another session still alive */
    ASSERT_SUCCESS(hsaKmtPcSamplingStop(defaultGPUNode, traceId2));
          /* Destroy its own pc sampling session when it is de-activated */
    ASSERT_SUCCESS(hsaKmtPcSamplingDestroy(defaultGPUNode, traceId2));
    sleep(1);
    ASSERT_SUCCESS(hsaKmtPcSamplingDestroy(defaultGPUNode, traceId1));

    free(info_buf);
    TEST_END
}

struct ThreadParams {
    int test_num;
    HSAuint32 GPUNode;
    HsaPcSamplingInfo *samples;
};

static unsigned int PCSamplingThread(void* p) {
    struct ThreadParams* pArgs = reinterpret_cast<struct ThreadParams*>(p);

    LOG() << "PCSamplingThread #" << pArgs->test_num << " start." << std::endl;
    HsaPcSamplingTraceId traceId;

    EXPECT_SUCCESS(hsaKmtPcSamplingCreate(pArgs->GPUNode, pArgs->samples, &traceId));
    EXPECT_SUCCESS(hsaKmtPcSamplingStart(pArgs->GPUNode, traceId));
    sleep(3);

    LOG() << "PCSamplingThread #" << pArgs->test_num << " stop." << std::endl;
    EXPECT_SUCCESS(hsaKmtPcSamplingStop(pArgs->GPUNode, traceId));
    EXPECT_SUCCESS(hsaKmtPcSamplingDestroy(pArgs->GPUNode, traceId));

    return 0;
}

TEST_F(KFDPCSamplingTest, MultiThreadPcSamplingTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (hsaKmtPcSamplingSupport() != HSAKMT_STATUS_SUCCESS)
        return;

    HSAuint64 threadId[2];
    struct ThreadParams params[2];
    HSAuint32 num_sample_info = 0;
    HSAuint32 return_num_sample_info = 0;

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "Failed to get default GPU Node";

    HSAKMT_STATUS ret = hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, NULL,
                                         num_sample_info, &return_num_sample_info);
    if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
        LOG() << "Skipping test: This GPU does not support PC Sampling." << std::endl;
        return;
    }
    ASSERT_GE(return_num_sample_info, 1);

    num_sample_info = return_num_sample_info;
    void *info_buf = calloc(num_sample_info, sizeof(HsaPcSamplingInfo));

    ASSERT_SUCCESS(hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, info_buf,
                                         num_sample_info, &return_num_sample_info));
    HsaPcSamplingInfo *samples = (HsaPcSamplingInfo*) info_buf;

    samples[0].value = 0x100000; /* 1,048,576 usec */

    params[0].test_num = 1;
    params[1].test_num = 2;
    params[0].GPUNode = defaultGPUNode;
    params[1].GPUNode = defaultGPUNode;
    params[0].samples = samples;
    params[1].samples = samples;

    ASSERT_EQ(true, StartThread(&PCSamplingThread, &params[0], threadId[0]));
    sleep(1);
    /* start 2nd thread after 1 sec */
    ASSERT_EQ(true, StartThread(&PCSamplingThread, &params[1], threadId[1]));

    WaitForThread(threadId[0]);
    WaitForThread(threadId[1]);

    free(info_buf);

    TEST_END;
}

struct ProcParams {
    std::string test_name;
    HSAuint32 GPUNode;
    HsaPcSamplingInfo *samples;
};

static unsigned int PCSamplingProcRun(void* p) {
    struct ProcParams* pArgs = reinterpret_cast<struct ProcParams*>(p);
    bool process1_flag = !pArgs->test_name.compare("Test process 1 ");
    int start_delay;

    if (process1_flag)
        start_delay = 0;
    else
        start_delay = 1;

    LOG() << "PCSamplingProc <" << pArgs->test_name <<
                 "> starting after 0x" <<  start_delay  << " secs" << std::endl;
    sleep(start_delay);

    HsaPcSamplingTraceId traceId = start_delay;

    EXPECT_SUCCESS(hsaKmtPcSamplingCreate(pArgs->GPUNode, pArgs->samples, &traceId));
    EXPECT_SUCCESS(hsaKmtPcSamplingStart(pArgs->GPUNode, traceId));
    sleep(3);

    LOG() << "PCSamplingProc <" << pArgs->test_name << "> stop" << std::endl;
    EXPECT_SUCCESS(hsaKmtPcSamplingStop(pArgs->GPUNode, traceId));
    EXPECT_SUCCESS(hsaKmtPcSamplingDestroy(pArgs->GPUNode, traceId));
    LOG() << "PCSamplingProc <" << pArgs->test_name << "> done" << std::endl;

    return 0;
}

TEST_F(KFDPCSamplingTest, MultiProcPcSamplingTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (hsaKmtPcSamplingSupport() != HSAKMT_STATUS_SUCCESS)
        return;

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "Failed to get default GPU Node";

    HSAuint32 num_sample_info = 0;
    HSAuint32 return_num_sample_info = 0;
    struct ProcParams params;

    params.GPUNode = defaultGPUNode;

    HSAKMT_STATUS ret = hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, NULL,
                                         num_sample_info, &return_num_sample_info);
    if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
        LOG() << "Skipping test: This GPU does not support PC Sampling." << std::endl;
        return;
    }
    ASSERT_GE(return_num_sample_info, 1);

    num_sample_info = return_num_sample_info;
    void *info_buf = calloc(num_sample_info, sizeof(HsaPcSamplingInfo));
    ASSERT_SUCCESS(hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, info_buf,
                                         num_sample_info, &return_num_sample_info));

    HsaPcSamplingInfo *samples = (HsaPcSamplingInfo*) info_buf;

    samples[0].value = 0x100000; /* 1,048,576 usec */

    /* Fork the child processes */
    ForkChildProcesses(defaultGPUNode, N_PROCESSES);

    int rn = FindDRMRenderNode(defaultGPUNode);
    if (rn < 0) {
        LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
        WaitChildProcesses(defaultGPUNode);
        return;
    }

    params.samples = samples;

    int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(defaultGPUNode);
    params.test_name = m_psName[gpuIndex];

    PCSamplingProcRun(&params);

    WaitChildProcesses(defaultGPUNode);

    if (info_buf)
        free(info_buf);
    TEST_END
}

/* Manully run multiple KFDPCSamplingTest.MultiProcPcSamplingTestM */
TEST_F(KFDPCSamplingTest, MultiProcPcSamplingTestM) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (hsaKmtPcSamplingSupport() != HSAKMT_STATUS_SUCCESS)
        return;

    HSAuint32 num_sample_info = 0;
    HSAuint32 return_num_sample_info = 0;

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "Failed to get default GPU Node";

    HSAKMT_STATUS ret = hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, NULL,
                                         num_sample_info, &return_num_sample_info);
    if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
        LOG() << "Skipping test: This GPU does not support PC Sampling." << std::endl;
        return;
    }
    ASSERT_GE(return_num_sample_info, 1);

    num_sample_info = return_num_sample_info;
    void *info_buf = calloc(num_sample_info, sizeof(HsaPcSamplingInfo));
    ASSERT_SUCCESS(hsaKmtPcSamplingQueryCapabilities(defaultGPUNode, info_buf,
                                         num_sample_info, &return_num_sample_info));

    HsaPcSamplingInfo *samples = (HsaPcSamplingInfo*) info_buf;
    HsaPcSamplingTraceId traceId;

    samples[0].value = 0x100000; /* 1,048,576 usec */
    ASSERT_SUCCESS(hsaKmtPcSamplingCreate(defaultGPUNode, &samples[0], &traceId));

    ASSERT_SUCCESS(hsaKmtPcSamplingStart(defaultGPUNode, traceId));
    sleep(3);
    ASSERT_SUCCESS(hsaKmtPcSamplingStop(defaultGPUNode, traceId));
    ASSERT_SUCCESS(hsaKmtPcSamplingDestroy(defaultGPUNode, traceId));

    free(info_buf);
    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDPCSamplingTest.hpp
================================================
/*
 * Copyright (C) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_PCSAMPLING_TEST__H__
#define __KFD_PCSAMPLING_TEST__H__

#include "KFDMultiProcessTest.hpp"

class KFDPCSamplingTest : public KFDMultiProcessTest {
 public:
    KFDPCSamplingTest(void) {}
    ~KFDPCSamplingTest(void) {}
 protected:
    virtual void SetUp();
    virtual void TearDown();

 protected:
};

#endif  // __KFD_PCSAMPLING_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDPMTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDPMTest.hpp"
#include "KFDTestUtil.hpp"
#include "PM4Packet.hpp"
#include "PM4Queue.hpp"
#include "hsakmt/hsakmt.h"

void KFDPMTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDPMTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

TEST_F(KFDPMTest, SuspendWithActiveProcess) {
    TEST_START(TESTPROFILE_RUNALL)

    EXPECT_EQ(true, SuspendAndWakeUp());

    TEST_END
}

TEST_F(KFDPMTest, SuspendWithIdleQueue) {
    TEST_START(TESTPROFILE_RUNALL)

    PM4Queue queue;
    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    ASSERT_SUCCESS(queue.Create(defaultGPUNode));

    EXPECT_EQ(true, SuspendAndWakeUp());

    EXPECT_SUCCESS(queue.Destroy());

    TEST_END
}

TEST_F(KFDPMTest, SuspendWithIdleQueueAfterWork) {
    TEST_START(TESTPROFILE_RUNALL)

    PM4Queue queue;
    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    HsaMemoryBuffer destBuffer(PAGE_SIZE, defaultGPUNode);

    ASSERT_SUCCESS(queue.Create(defaultGPUNode));

    HsaEvent *event;
    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &event));

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuffer.As<unsigned int*>(), 0x1, 0x2));
    queue.Wait4PacketConsumption(event);
    WaitOnValue(&(destBuffer.As<unsigned int*>()[0]), 0x1);
    WaitOnValue(&(destBuffer.As<unsigned int*>()[1]), 0x2);

    destBuffer.Fill(0);

    EXPECT_EQ(true, SuspendAndWakeUp());

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(&(destBuffer.As<unsigned int*>()[2]), 0x3, 0x4));
    queue.Wait4PacketConsumption(event);

    EXPECT_EQ(destBuffer.As<unsigned int*>()[0], 0);
    EXPECT_EQ(destBuffer.As<unsigned int*>()[1], 0);

    WaitOnValue(&(destBuffer.As<unsigned int*>()[2]), 0x3);
    WaitOnValue(&(destBuffer.As<unsigned int*>()[3]), 0x4);

    hsaKmtDestroyEvent(event);
    EXPECT_SUCCESS(queue.Destroy());

    TEST_END
}

// TODO: Suspend while workload is being executed by a queue


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDPMTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFDPMTEST_HPP__
#define __KFDPMTEST_HPP__

#include <gtest/gtest.h>
#include "KFDBaseComponentTest.hpp"

class KFDPMTest : public KFDBaseComponentTest {
 public:
    KFDPMTest() {}
    ~KFDPMTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFDPMTEST_HPP__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDPerfCounters.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDPerfCounters.hpp"

void KFDPerfCountersTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDPerfCountersTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

static struct block_name_table {
    char name[32];
    HSA_UUID uuid;
} block_lookup_table[] = {
    {"CB     ", {0x9ba429c6, 0xaf2d, 0x4b38, 0xb3, 0x49, 0x15, 0x72, 0x71, 0xbe, 0xac, 0x6a}},
    {"CPF    ", {0x2b0ad2b5, 0x1c43, 0x4f46, 0xa7, 0xbc, 0xe1, 0x19, 0x41, 0x1e, 0xa6, 0xc9}},
    {"CPG    ", {0x590ec94d, 0x20f0, 0x448f, 0x8d, 0xff, 0x31, 0x6c, 0x67, 0x9d, 0xe7, 0xff}},
    {"DB     ", {0x3d1a47fc, 0x0013, 0x4ed4, 0x83, 0x06, 0x82, 0x2c, 0xa0, 0xb7, 0xa6, 0xc2}},
    {"GDS    ", {0xf59276ec, 0x2526, 0x4bf8, 0x8e, 0xc0, 0x11, 0x8f, 0x77, 0x70, 0x0d, 0xc9}},
    {"GRBM   ", {0x8f00933c, 0xc33d, 0x4801, 0x97, 0xb7, 0x70, 0x07, 0xf7, 0x85, 0x73, 0xad}},
    {"GRBMSE ", {0x34ebd8d7, 0x7c8b, 0x4d15, 0x88, 0xfa, 0x0e, 0x4e, 0x4a, 0xf5, 0x9a, 0xc1}},
    {"IA     ", {0x34276944, 0x4264, 0x4fcd, 0x9d, 0x6e, 0xae, 0x26, 0x45, 0x82, 0xec, 0x51}},
    {"MC     ", {0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c}},
    {"PASC   ", {0xb0e7fb5d, 0x0efc, 0x4744, 0xb5, 0x16, 0x5d, 0x23, 0xdc, 0x1f, 0xd5, 0x6c}},
    {"PASU   ", {0x9a152b6a, 0x1fad, 0x45f2, 0xa5, 0xbf, 0xf1, 0x63, 0x82, 0x6b, 0xd0, 0xcd}},
    {"SPI    ", {0xeda81044, 0xd62c, 0x47eb, 0xaf, 0x89, 0x4f, 0x6f, 0xbf, 0x3b, 0x38, 0xe0}},
    {"SRBM   ", {0x9f8040e0, 0x6830, 0x4019, 0xac, 0xc8, 0x46, 0x3c, 0x9e, 0x44, 0x5b, 0x89}},
    {"SQ     ", {0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8}},
    {"SX     ", {0xbdb8d737, 0x43cc, 0x4162, 0xbe, 0x52, 0x51, 0xcf, 0xb8, 0x47, 0xbe, 0xaf}},
    {"TA     ", {0xc01ee43d, 0xad92, 0x44b1, 0x8a, 0xb9, 0xbe, 0x5e, 0x69, 0x6c, 0xee, 0xa7}},
    {"TCA    ", {0x333e393f, 0xe147, 0x4f49, 0xa6, 0xd1, 0x60, 0x91, 0x4c, 0x70, 0x86, 0xb0}},
    {"TCC    ", {0x848ce855, 0xd805, 0x4566, 0xa8, 0xab, 0x73, 0xe8, 0x84, 0xcc, 0x6b, 0xff}},
    {"TCP    ", {0xe10a013b, 0x17d4, 0x4bf5, 0xb0, 0x89, 0x42, 0x95, 0x91, 0x05, 0x9b, 0x60}},
    {"TCS    ", {0x4126245c, 0x4d96, 0x4d1a, 0x8a, 0xed, 0xa9, 0x39, 0xd4, 0xcc, 0x8e, 0xc9}},
    {"TD     ", {0x7d7c0fe4, 0xfe41, 0x4fea, 0x92, 0xc9, 0x45, 0x44, 0xd7, 0x70, 0x6d, 0xc6}},
    {"VGT    ", {0x0b6a8cb7, 0x7a01, 0x409f, 0xa2, 0x2c, 0x30, 0x14, 0x85, 0x4f, 0x13, 0x59}},
    {"WD     ", {0x0e176789, 0x46ed, 0x4b02, 0x97, 0x2a, 0x91, 0x6d, 0x2f, 0xac, 0x24, 0x4a}},
    {"DRIVER ", {0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa}}
};

static void GetBlockName(HSA_UUID uuid, char *name, uint32_t name_len,
                                       char *uuid_str, uint32_t uuid_str_len) {
    uint32_t i, table_size;

    table_size = sizeof(block_lookup_table) / sizeof(struct block_name_table);

    snprintf(name, name_len, "unknown");
    for (i = 0; i < table_size; i++) {
        if (!memcmp(&block_lookup_table[i].uuid, &uuid, sizeof(HSA_UUID))) {
            if (name)
                snprintf(name, name_len, "%s", block_lookup_table[i].name);
            break;
        }
    }

    if (uuid_str)
        snprintf(uuid_str, uuid_str_len,
                 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
                 uuid.Data1, uuid.Data2, uuid.Data3,
                 uuid.Data4[0], uuid.Data4[1], uuid.Data4[2],
                 uuid.Data4[3], uuid.Data4[4], uuid.Data4[5],
                 uuid.Data4[6], uuid.Data4[7]);
}

static void GetCounterProperties(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDPerfCountersTest* pKFDPerfCountersTest =
                         (KFDPerfCountersTest*)pTestParamters->pTestObject;

    HsaCounterProperties* pProps = NULL;
    ASSERT_SUCCESS(hsaKmtPmcGetCounterProperties(gpuNode, &pProps));
    /* Verifying that there is at least one block */
    ASSERT_NE(0, pProps->NumBlocks) << "No performance counters blocks";

    LOG() << std::dec << pProps->NumBlocks << " blocks found." << std::endl;

    HsaCounterBlockProperties *block;
    block = &pProps->Blocks[0];
    for (HSAuint32 i = 0; i < pProps->NumBlocks; i++) {
        char uuid_string[37] = "";
        char name[32] = "";
        GetBlockName(block->BlockId, name, 32, uuid_string, 37);

        char type[32];
        switch (block->Counters[0].Type) {
        case HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE:
            snprintf(type, sizeof(type), "Priv Immediate");
            break;
        case HSA_PROFILE_TYPE_PRIVILEGED_STREAMING:
            snprintf(type, sizeof(type), "Priv Streaming");
            break;
        case HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE:
            snprintf(type, sizeof(type), "Non-priv Immediate");
            break;
        case HSA_PROFILE_TYPE_NONPRIV_STREAMING:
            snprintf(type, sizeof(type), "Non-priv Immediate");
            break;
        default:
            snprintf(type, sizeof(type), "Unknown");
            break;
        }

        LOG() << name << " (" << uuid_string << "): " << type << ", " <<
            block->NumCounters << " counter IDs" << std::endl;
        block = reinterpret_cast<HsaCounterBlockProperties *>(&block->Counters[block->NumCounters]);
    }
}

TEST_F(KFDPerfCountersTest, GetCounterProperties) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(GetCounterProperties));

    TEST_END
}

static void RegisterTrace(KFDTEST_PARAMETERS* pTestParamters) {

    HsaCounterProperties* pProps;

    int gpuNode = pTestParamters->gpuNode;
    KFDPerfCountersTest* pKFDPerfCountersTest =
                         (KFDPerfCountersTest*)pTestParamters->pTestObject;

    HsaPmcTraceRoot root;

    pProps = NULL;
    ASSERT_SUCCESS(hsaKmtPmcGetCounterProperties(gpuNode, &pProps));

    /* Verifying that there is at least one block */
    ASSERT_NE(0, pProps->NumBlocks) << "No performance counters blocks";

    HsaCounterBlockProperties *block = &pProps->Blocks[0];
    bool priv_block_found = false;
    for (HSAuint32 i = 0; i < pProps->NumBlocks; i++) {
        if (block->Counters[0].Type <= HSA_PROFILE_TYPE_PRIVILEGED_STREAMING) {
            priv_block_found = true;
            break;
        }
        block = reinterpret_cast<HsaCounterBlockProperties *>(&block->Counters[block->NumCounters]);
    }

    if (!priv_block_found) {
        LOG() << "Skipping test: No privileged block is found."
            << std::endl;
        return;
    }

    /* Registering trace */
    ASSERT_SUCCESS(hsaKmtPmcRegisterTrace(gpuNode,
                                          block->NumConcurrent,
                                          block->Counters,
                                          &root));
    EXPECT_SUCCESS(hsaKmtPmcUnregisterTrace(gpuNode, root.TraceId));
}

TEST_F(KFDPerfCountersTest, RegisterTrace) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(RegisterTrace));

    TEST_END
}

static const unsigned int START_STOP_DELAY = 10000;     // 10 sec tracing

static void StartStopQueryTrace(KFDTEST_PARAMETERS* pTestParamters){

    HsaPmcTraceRoot root;
    HsaCounterProperties* pProps;

    int gpuNode = pTestParamters->gpuNode;
    KFDPerfCountersTest* pKFDPerfCountersTest =
                         (KFDPerfCountersTest*)pTestParamters->pTestObject;

    pProps = NULL;
    ASSERT_SUCCESS(hsaKmtPmcGetCounterProperties(gpuNode, &pProps));

    /* Verifying that there is at least one block */
    ASSERT_NE(0, pProps->NumBlocks) << "No performance counters blocks";

    HsaCounterBlockProperties *block = &pProps->Blocks[0];
    bool priv_block_found = false;
    for (HSAuint32 i = 0; i < pProps->NumBlocks; i++) {
        if (block->Counters[0].Type <= HSA_PROFILE_TYPE_PRIVILEGED_STREAMING) {
            priv_block_found = true;
            break;
        }
        block = reinterpret_cast<HsaCounterBlockProperties *>(&block->Counters[block->NumCounters]);
    }

    if (!priv_block_found) {
        LOG() << "Skipping test: No privileged block is found."
             << std::endl;
        return;
    }

    if (getuid()) { /* Non-root */
        LOG() << "Skipping test: Privileged counters requires the user as root." << std::endl;
        return;
    }

    /* Registering trace */
    ASSERT_SUCCESS(hsaKmtPmcRegisterTrace(gpuNode,
                                          block->NumConcurrent,
                                          block->Counters,
                                          &root));

    /* Acquiring access for the trace */
    ASSERT_SUCCESS(hsaKmtPmcAcquireTraceAccess(gpuNode, root.TraceId));

    /* Allocating memory buffer for the trace */
    HsaMemoryBuffer membuf(PAGE_SIZE, gpuNode);

    /* Starting the trace */
    ASSERT_SUCCESS(hsaKmtPmcStartTrace(root.TraceId,
                                       membuf.As<void*>(),
                                       membuf.Size()));

    /* Delay between START and STOP tracing */
    Delay(START_STOP_DELAY);

    /* Stopping the trace */
    ASSERT_SUCCESS(hsaKmtPmcStopTrace(root.TraceId));

    /* Querying the trace */
    ASSERT_SUCCESS(hsaKmtPmcQueryTrace(root.TraceId));
    uint64_t *buf = membuf.As<uint64_t*>();
    for (uint32_t i = 0; i < block->NumConcurrent; i++, buf++)
        LOG() << "Counter " << std::dec << i << ": " << *buf << std::endl;

    /* Releasing the trace */
    EXPECT_SUCCESS(hsaKmtPmcReleaseTraceAccess(0, root.TraceId));

    EXPECT_SUCCESS(hsaKmtPmcUnregisterTrace(gpuNode, root.TraceId));
}

TEST_F(KFDPerfCountersTest, StartStopQueryTrace) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(RegisterTrace));

    TEST_END
}

static void ClockCountersBasicTest(KFDTEST_PARAMETERS* pTestParamters){

    int gpuNode = pTestParamters->gpuNode;
    KFDPerfCountersTest* pKFDPerfCountersTest =
					  (KFDPerfCountersTest*)pTestParamters->pTestObject;

    HsaClockCounters counters1;
    HsaClockCounters counters2;

    EXPECT_SUCCESS(hsaKmtGetClockCounters(gpuNode, &counters1));

    Delay(100);

    EXPECT_SUCCESS(hsaKmtGetClockCounters(gpuNode, &counters2));

    EXPECT_NE(0, counters1.GPUClockCounter);
    EXPECT_NE(0, counters2.GPUClockCounter);
    EXPECT_NE(0, counters1.SystemClockCounter);
    EXPECT_NE(0, counters2.SystemClockCounter);

    EXPECT_GT(counters2.GPUClockCounter, counters1.GPUClockCounter);
    EXPECT_GT(counters2.SystemClockCounter, counters1.SystemClockCounter);

}

TEST_F(KFDPerfCountersTest, ClockCountersBasicTest) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(ClockCountersBasicTest));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDPerfCounters.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFDPERFCOUNTERS_HPP__
#define __KFDPERFCOUNTERS_HPP__

#include <gtest/gtest.h>
#include "KFDBaseComponentTest.hpp"

class KFDPerfCountersTest : public KFDBaseComponentTest {
 public:
    KFDPerfCountersTest() {}
    ~KFDPerfCountersTest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFDPERFCOUNTERS_HPP__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDPerformanceTest.cpp
================================================
/*
 * Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
#include <sys/time.h>
#include <vector>
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "AqlQueue.hpp"
#include "KFDTestUtilQueue.hpp"
#include <algorithm>
#include <gtest/gtest.h>
#include "KFDBaseComponentTest.hpp"

class KFDPerformanceTest: public KFDBaseComponentTest {
 protected:
    virtual void SetUp();
    virtual void TearDown();
};

void KFDPerformanceTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDPerformanceTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

enum P2PDirection {
    IN = 1,
    OUT = 2,
    IN_OUT = 3,
    NONE = 4,
};

/*
 * Do the copy of one GPU from & to multiple GPUs.
 */
static void
testNodeToNodes(HSAuint32 n1, const HSAuint32 *const n2Array, int n, P2PDirection n1Direction,
        P2PDirection n2Direction, HSAuint64 size, HSAuint64 *speed, HSAuint64 *speed2, std::stringstream *msg,
        bool isTestOverhead = false, HSAuint64 *time = 0) {
    HSAuint32 n2[n];
    void *n1Mem, *n2Mem[n];
    HsaMemFlags memFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    memFlags.ui32.HostAccess = 0;
    memFlags.ui32.NonPaged = 1;
    SDMACopyParams array[n * 4];
    int array_count = 0;
    HSAuint64 alloc_size = ALIGN_UP(size, PAGE_SIZE);
    std::vector<SDMACopyParams> copyArray;
    int i;

    ASSERT_SUCCESS(hsaKmtAllocMemory(n1, alloc_size, memFlags, &n1Mem));
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(n1Mem, alloc_size, NULL));

    for (i = 0; i < n; i++) {
        n2[i] = n2Array[i];
        ASSERT_SUCCESS(hsaKmtAllocMemory(n2[i], alloc_size, memFlags, &n2Mem[i]));
        ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(n2Mem[i], alloc_size, NULL));
    }

    for (i = 0; i < n; i++) {
        if (n1Direction != NONE)
            ASSERT_NE(n1, 0);
        if (n2Direction != NONE)
            ASSERT_NE(n2[i], 0);

        do {
            if (n1Direction == IN || n1Direction == IN_OUT)
                /* n2Mem -> n1Mem*/
                array[array_count++] = {n1, n2Mem[i], n1Mem, size, n1/*group id, just a hint*/};
            if (n1Direction == OUT || n1Direction == IN_OUT)
                /* n1Mem -> n2Mem*/
                array[array_count++] = {n1, n1Mem, n2Mem[i], size, n1};
            /* Issue two copies to make full use of sdma.*/
        } while (n1Direction < IN_OUT && n == 1 && array_count % 2);
        /* Do nothing if no IN or OUT specified.*/

        do {
            if (n2Direction == IN || n2Direction == IN_OUT)
                /* n1Mem -> n2Mem*/
                array[array_count++] = {n2[i], n1Mem, n2Mem[i], size, n2[i]};
            if (n2Direction == OUT || n2Direction == IN_OUT)
                /* n2Mem -> n1Mem*/
                array[array_count++] = {n2[i], n2Mem[i], n1Mem, size, n2[i]};
        } while (n2Direction < IN_OUT && array_count % 2);
    }

    /* We measure a bunch of packets.*/
    if (isTestOverhead) {
            for (i = 0; i < 1000; i++)
                for (int j = 0; j < array_count; j++)
                    copyArray.push_back(array[j]);
        sdma_multicopy(copyArray, 1, HEAD_TAIL);
        *time = CounterToNanoSec(copyArray[0].timeConsumption / (1000 * array_count));
    } else
        /* It did not respect the group id we set above.*/
        sdma_multicopy(array, array_count, speed, speed2, msg);

    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(n1Mem));
    EXPECT_SUCCESS(hsaKmtFreeMemory(n1Mem, alloc_size));

    for (i = 0; i < n; i++) {
        EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(n2Mem[i]));
        EXPECT_SUCCESS(hsaKmtFreeMemory(n2Mem[i], alloc_size));
    }
}

TEST_F(KFDPerformanceTest, P2PBandWidthTest) {
    TEST_START(TESTPROFILE_RUNALL);
    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Can't have 2 APUs on the same system." << std::endl;
        return;
    }

    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    std::vector<int> nodes;
    const bool isSpecified = g_TestDstNodeId != -1 && g_TestNodeId != -1;
    int numPeers = 0;
    const unsigned int maxSdmaQueues = m_numSdmaEngines * m_numSdmaQueuesPerEngine;

    if (isSpecified) {
        if (g_TestNodeId != g_TestDstNodeId) {
            nodes.push_back(g_TestNodeId);
            nodes.push_back(g_TestDstNodeId);
            if ((m_NodeInfo.IsPeerAccessibleByNode(g_TestNodeId, g_TestDstNodeId) &&
                 m_NodeInfo.IsPeerAccessibleByNode(g_TestDstNodeId, g_TestNodeId)))
                numPeers = 2;
        }
    } else {
        nodes = m_NodeInfo.GetNodesWithGPU();
        numPeers = nodes.size();
    }

    if (numPeers < 2) {
        LOG() << "Skipping test: Need at least two large bar GPU or XGMI connected." << std::endl;
        return;
    }

    g_TestTimeOut *= numPeers;

    std::vector<int> sysNodes(nodes); // include sysMem node 0...
    sysNodes.insert(sysNodes.begin(),0);

    const int total_tests = 7;
    const char *test_suits_string[total_tests] = {
        "Copy from node to node by [push, NONE]",
        "Copy from node to node by [pull, NONE]",
        "Full duplex copy from node to node by [push|pull, NONE]",
        "Full duplex copy from node to node by [push, push]",
        "Full duplex copy from node to node by [pull, pull]",
        "Copy from node to multiple nodes by [push, NONE]",
        "Copy from multiple nodes to node by [push, NONE]",
    };
    const P2PDirection test_suits[total_tests][2] = {
        /* One node used.*/
        {OUT,   NONE},
        {IN,    NONE},
        {IN_OUT,NONE},
        /* two nodes used.*/
        {OUT,   OUT},
        {IN,    IN},
        /* Multi nodes used*/
        {OUT,   NONE},
        {NONE,  OUT},
    };
    const int twoNodesIdx = 3;
    const int multiNodesIdx = 5;
    const HSAuint32 size = 32ULL << 20;
    int s = 0; //test index;
    std::stringstream msg;
    char str[64];

    if (isSpecified) {
        HSAuint32 n1 = g_TestNodeId;
        HSAuint32 n2 = g_TestDstNodeId;
        HSAuint64 speed, speed2;

        LOG() << "Copy from node to node by [push, pull]" << std::endl;
        snprintf(str, sizeof(str), "[%d -> %d] ", n1, n2);
        testNodeToNodes(n1, &n2, 1, OUT, IN, size, &speed, &speed2, &msg);

        LOG() << std::dec << str << (float)speed / 1024 << " - " <<
                                 (float)speed2 / 1024 << " GB/s" << std::endl;
        goto exit;

    }

    for (; s < twoNodesIdx; s++) {
        LOG() << test_suits_string[s] << std::endl;
        msg << test_suits_string[s] << std::endl;

        for (unsigned i = 0; i < nodes.size(); i++) {
            /* Src node is a GPU.*/
            HSAuint32 n1 = nodes[i];
            HSAuint64 speed, speed2;

            /* Pick up dst node which can be sysMem.*/
            for (unsigned j = 0; j < sysNodes.size(); j++) {
                HSAuint32 n2 = sysNodes[j];
                if (n1 == n2)
                    continue;

                if (!m_NodeInfo.IsPeerAccessibleByNode(n2, n1))
                    continue;

                snprintf(str, sizeof(str), "[%d -> %d] ", n1, n2);
                msg << str << std::endl;
                testNodeToNodes(n1, &n2, 1, test_suits[s][0], test_suits[s][1], size, &speed, &speed2, &msg);

                LOG() << std::dec << str << (float)speed / 1024 << " - " <<
                                            (float)speed2 / 1024 << " GB/s" << std::endl;
            }
        }
    }

    for (; s < multiNodesIdx; s++) {
        LOG() << test_suits_string[s] << std::endl;
        msg << test_suits_string[s] << std::endl;

        for (unsigned i = 0; i < nodes.size(); i++) {
            HSAuint32 n1 = nodes[i];
            HSAuint64 speed, speed2;

            for (unsigned j = i + 1; j < nodes.size(); j++) {
                HSAuint32 n2 = nodes[j];

                if (!m_NodeInfo.IsPeerAccessibleByNode(n2, n1) ||
                    !m_NodeInfo.IsPeerAccessibleByNode(n1, n2))
                    continue;

                snprintf(str, sizeof(str), "[%d <-> %d] ", n1, n2);
                msg << str << std::endl;
                testNodeToNodes(n1, &n2, 1, test_suits[s][0], test_suits[s][1], size, &speed, &speed2, &msg);

                LOG() << std::dec << str << (float)speed / 1024 << " - " <<
                                            (float)speed2 / 1024 << " GB/s" << std::endl;
            }
        }
    }

    for (; s < total_tests && !isSpecified; s++) {
        LOG() << test_suits_string[s] << std::endl;
        msg << test_suits_string[s] << std::endl;
        /* Just use GPU nodes to do copy.*/
        std::vector<int> &src = test_suits[s][0] != NONE ? nodes : sysNodes;
        std::vector<int> &dst = test_suits[s][1] != NONE ? nodes : sysNodes;

        for (unsigned i = 0; i < src.size(); i++) {
            HSAuint32 n1 = src[i];
            HSAuint64 speed, speed2;
            HSAuint32 n2[dst.size()];
            int n = 0;
            char str[64];

            for (unsigned j = 0; j < dst.size(); j++) {
                if (dst[j] != n1) {
                    if (test_suits[s][0] != NONE &&
                        !m_NodeInfo.IsPeerAccessibleByNode(dst[j], n1))
                            continue;
                    if (test_suits[s][1] != NONE &&
                        !m_NodeInfo.IsPeerAccessibleByNode(n1, dst[j]))
                            continue;
                    n2[n++] = dst[j];
                }
            }

            /* At least 2 dst GPUs.*/
            if (n < 2)
                continue;

            if (test_suits[s][1] == OUT) {
                snprintf(str, sizeof(str), "[[%d...%d] -> %d] ", dst.front(), dst.back(), n1);
                msg << str << std::endl;
                testNodeToNodes(n1, n2, n, test_suits[s][0], test_suits[s][1], size, &speed, &speed2, &msg);

                LOG() << std::dec << str << (float)speed / 1024 << " - " <<
                                        (float)speed2 / 1024 << " GB/s" << std::endl;
            } else {
                /* If the total number of peers is greater than the number of SDMA queues supported,
                 * then we test in the following way:
                 * 1. Test peers in batches where each batch consists of number of peers equal to the
                 *    max number of SDMA queues.
                 * 2. Keep repeating step 1 if number of peers left is greater than number of SDMA queues
                 *    supported.
                 * 3. Test the last batch with the remaining peers left which can be less than the number of
                 *    SDMA queues supported.
                 * For example, if there are 24 peers and max number of SDMA queues supported is 16, then
                 * the test will test 16 peers/nodes first and then remaining 8 in the next round.
                 */
                unsigned int j=0;
                unsigned int start_index;
                unsigned int end_index;
                do {
                    start_index = maxSdmaQueues * j++;
                    end_index = start_index + maxSdmaQueues - 1;

                    if (end_index + 1 > n)
                        end_index = n - 1;

                    snprintf(str, sizeof(str), "[%d -> [%d...%d]] ", n1, n2[start_index], n2[end_index]);
                    msg << str << std::endl;
                    testNodeToNodes(n1, &n2[start_index], end_index - start_index + 1,
                                    test_suits[s][0], test_suits[s][1], size, &speed, &speed2, &msg);
                    LOG() << std::dec << str << (float)speed / 1024 << " - " <<
                                                (float)speed2 / 1024 << " GB/s" << std::endl;
                } while(end_index < (n - 1));
            }
        }
    }

    g_TestTimeOut /= numPeers;
exit:
    /* New line.*/
    LOG() << std::endl << msg.str() << std::endl;

    TEST_END
}

TEST_F(KFDPerformanceTest, P2POverheadTest) {
    TEST_START(TESTPROFILE_RUNALL);
    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Can't have 2 APUs on the same system." << std::endl;
        return;
    }

    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    std::vector<int> nodes;

    nodes = m_NodeInfo.GetNodesWithGPU();
    int numPeers = nodes.size();

    if (numPeers < 2) {
        LOG() << "Skipping test: Need at least two large bar GPU or XGMI connected." << std::endl;
        return;
    }

    std::vector<int> sysNodes(nodes); // include sysMem node 0...
    sysNodes.insert(sysNodes.begin(),0);

    /* size should be small.*/
    const HSAuint32 sizeArray[] = {4, 8, 16, 64, 256, 1024};
    const int total_tests = 3;
    const char *test_suits_string[total_tests] = {
        "[push]     ",
        "[pull]     ",
        "[push|pull]",
    };
    const P2PDirection test_suits[total_tests] = {OUT, IN, IN_OUT};
    std::stringstream msg;
    int s; //test index;

    msg << "Test (avg. ns) | Size";
    for (auto &size : sizeArray)
        msg << "\t" << size;
    LOG() << msg.str() << std::endl;
    LOG() << "-----------------------------------------------------------------------" << std::endl;

    for (s = 0; s < total_tests; s++) {

        for (unsigned i = 0; i < nodes.size(); i++) {
            /* Src node is a GPU.*/
            HSAuint32 n1 = nodes[i];
            HSAuint64 time;

            /* Pick up dst node which can be sysMem.*/
            for (unsigned j = 0; j < sysNodes.size(); j++) {
                HSAuint32 n2 = sysNodes[j];
                std::stringstream msg;

                if (n1 != n2 && !m_NodeInfo.IsPeerAccessibleByNode(n2, n1))
                    continue;

                msg << test_suits_string[s] << "[" << n1 << " -> " << n2 << "]";
                for (auto &size : sizeArray) {
                    testNodeToNodes(n1, &n2, 1, test_suits[s], NONE, size, 0, 0, 0, 1, &time);
                    msg << "\t" << time;
                }
                LOG() << msg.str() << std::endl;
            }
        }
    }

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDQMTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <sys/time.h>
#include <sys/mman.h>
#include <vector>
#include <utility>
#include <mutex>

#include "KFDQMTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "XgmiOptimizedSDMAQueue.hpp"
#include "AqlQueue.hpp"
#include <algorithm>

#include "Dispatch.hpp"

extern unsigned int g_TestGPUsNum;

void KFDQMTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void KFDQMTest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

static void CreateDestroyCpQueue(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    PM4Queue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDQMTest, CreateDestroyCpQueue) {
    TEST_START(TESTPROFILE_RUNALL)

   ASSERT_SUCCESS(KFDTest_Launch(CreateDestroyCpQueue));

    TEST_END
}

static void SubmitNopCpQueue(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    PM4Queue queue;
    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    queue.PlaceAndSubmitPacket(PM4NopPacket());

    queue.Wait4PacketConsumption(event);

    hsaKmtDestroyEvent(event);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}

TEST_F(KFDQMTest, SubmitNopCpQueue) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SubmitNopCpQueue));

    TEST_END
}

static void SubmitPacketCpQueue(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);

    destBuf.Fill(0xFF);
    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    PM4Queue queue;
    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0, 0));

    queue.Wait4PacketConsumption(event);

    EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>(), 0), gpuNode);

    hsaKmtDestroyEvent(event);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDQMTest, SubmitPacketCpQueue) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SubmitPacketCpQueue));

    TEST_END
}

static void AllCpQueues(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    int gpuIndex = pKFDQMTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);

    destBuf.Fill(0xFF);

    unsigned int  m_numCpQueues = pKFDQMTest->Get_NumCpQueues(gpuIndex);
    std::vector<PM4Queue> queues(m_numCpQueues);

    for (unsigned int qidx = 0; qidx < m_numCpQueues; ++qidx)
        ASSERT_SUCCESS_GPU(queues[qidx].Create(gpuNode), gpuNode) << " QueueId=" << qidx;

    for (unsigned int qidx = 0; qidx < m_numCpQueues; ++qidx) {
        queues[qidx].PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>()+qidx*2, qidx, qidx));
        queues[qidx].PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, 0, 0));
        queues[qidx].Wait4PacketConsumption();

        EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>()+qidx*2, qidx), gpuNode);
    }

    for (unsigned int qidx = 0; qidx < m_numCpQueues; ++qidx)
       EXPECT_SUCCESS_GPU(queues[qidx].Destroy(), gpuNode);
}

TEST_F(KFDQMTest, AllCpQueues) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(AllCpQueues));

    TEST_END
}

static void CreateDestroySdmaQueue(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;

    SDMAQueue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}

TEST_F(KFDQMTest, CreateDestroySdmaQueue) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(CreateDestroySdmaQueue));

    TEST_END
}

static void SubmitNopSdmaQueue(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;

    SDMAQueue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    queue.PlaceAndSubmitPacket(SDMANopPacket());

    queue.Wait4PacketConsumption();

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}

TEST_F(KFDQMTest, SubmitNopSdmaQueue) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SubmitNopSdmaQueue));

    TEST_END
}

static void SubmitPacketSdmaQueue(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);

    destBuf.Fill(0xFF);

    SDMAQueue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), destBuf.As<void *>(), 0x02020202));

    queue.Wait4PacketConsumption();

    EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>(), 0x02020202), gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDQMTest, SubmitPacketSdmaQueue) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SubmitPacketSdmaQueue));

    TEST_END
}

static void AllSdmaQueues(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    int gpuIndex = pKFDQMTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);

    unsigned int m_numSdmaEngines = pKFDQMTest->Get_NumSdmaEngines(gpuIndex);
    unsigned int m_numSdmaQueuesPerEngine = pKFDQMTest->Get_NumSdmaSdmaQueuesPerEngine(gpuIndex);

    int bufSize = PAGE_SIZE;
    const unsigned int numSdmaQueues = m_numSdmaEngines * m_numSdmaQueuesPerEngine;

    LOG() << "Regular SDMA engines number: " << m_numSdmaEngines
          << " SDMA queues per engine: " << m_numSdmaQueuesPerEngine << std::endl;

    HsaMemoryBuffer destBuf(bufSize << 1 , gpuNode, false);
    HsaMemoryBuffer srcBuf(bufSize, gpuNode, false);
    destBuf.Fill(0xFF);

    std::vector<SDMAQueue> queues(numSdmaQueues);

    for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
        ASSERT_SUCCESS_GPU(queues[qidx].Create(gpuNode), gpuNode);

    for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx) {
        destBuf.Fill(0x0);
        srcBuf.Fill(qidx + 0xa0);
        queues[qidx].PlaceAndSubmitPacket(
            SDMACopyDataPacket(queues[qidx].GetFamilyId(), destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize));
        queues[qidx].PlaceAndSubmitPacket(
            SDMAWriteDataPacket(queues[qidx].GetFamilyId(), destBuf.As<unsigned int*>() + bufSize/4, 0x02020202));

        queues[qidx].Wait4PacketConsumption();

        EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>() + bufSize/4, 0x02020202), gpuNode);

        EXPECT_SUCCESS_GPU(memcmp(
            destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize), gpuNode);
    }

    for (unsigned int qidx = 0; qidx < numSdmaQueues; ++qidx)
        EXPECT_SUCCESS_GPU(queues[qidx].Destroy(), gpuNode);

}

TEST_F(KFDQMTest, AllSdmaQueues) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(AllSdmaQueues));

    TEST_END
}

static void AllXgmiSdmaQueues(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    int gpuIndex = pKFDQMTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    unsigned int m_numSdmaXgmiEngines = pKFDQMTest->Get_NumSdmaSdmaXgmiEngines(gpuIndex);
    unsigned int m_numSdmaQueuesPerEngine = pKFDQMTest->Get_NumSdmaSdmaQueuesPerEngine(gpuIndex);

    int bufSize = PAGE_SIZE;
    int j;

    const unsigned int numXgmiSdmaQueues =
            m_numSdmaXgmiEngines * m_numSdmaQueuesPerEngine;

    LOG() << "XGMI SDMA engines number: " << m_numSdmaXgmiEngines
            << " SDMA queues per engine: " << m_numSdmaQueuesPerEngine << std::endl;

    HsaMemoryBuffer destBuf(bufSize << 1 , gpuNode, false);
    HsaMemoryBuffer srcBuf(bufSize, gpuNode, false);
    destBuf.Fill(0xFF);

    std::vector<XgmiOptimizedSDMAQueue> xgmiSdmaQueues(numXgmiSdmaQueues);

    for (j = 0; j < numXgmiSdmaQueues; ++j)
        ASSERT_SUCCESS_GPU(xgmiSdmaQueues[j].Create(gpuNode), gpuNode);

    for (j = 0; j < numXgmiSdmaQueues; ++j) {
        destBuf.Fill(0x0);
        srcBuf.Fill(j + 0xa0);
        xgmiSdmaQueues[j].PlaceAndSubmitPacket(
            SDMACopyDataPacket(xgmiSdmaQueues[j].GetFamilyId(),
                    destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize));
        xgmiSdmaQueues[j].PlaceAndSubmitPacket(
            SDMAWriteDataPacket(xgmiSdmaQueues[j].GetFamilyId(),
                    destBuf.As<unsigned int*>() + bufSize/4, 0x02020202));

        xgmiSdmaQueues[j].Wait4PacketConsumption();

        EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>() + bufSize/4, 0x02020202), gpuNode);

        EXPECT_SUCCESS_GPU(memcmp(
            destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize), gpuNode);
    }

    for (j = 0; j < numXgmiSdmaQueues; ++j)
        EXPECT_SUCCESS_GPU(xgmiSdmaQueues[j].Destroy(), gpuNode);

}

TEST_F(KFDQMTest, AllXgmiSdmaQueues) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(AllXgmiSdmaQueues));

    TEST_END
}

static void AllQueues(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    int gpuIndex = pKFDQMTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    unsigned int m_numSdmaXgmiEngines = pKFDQMTest->Get_NumSdmaSdmaXgmiEngines(gpuIndex);
    unsigned int m_numSdmaQueuesPerEngine = pKFDQMTest->Get_NumSdmaSdmaQueuesPerEngine(gpuIndex);
    unsigned int m_numSdmaEngines = pKFDQMTest->Get_NumSdmaEngines(gpuIndex);
    unsigned int m_numCpQueues = pKFDQMTest->Get_NumCpQueues(gpuIndex);

    int bufSize = PAGE_SIZE;
    unsigned int i, j;

    const unsigned int numCpQueues = m_numCpQueues;
    const unsigned int numSdmaQueues = m_numSdmaEngines * m_numSdmaQueuesPerEngine;
    const unsigned int numXgmiSdmaQueues =
            m_numSdmaXgmiEngines * m_numSdmaQueuesPerEngine;

    HsaMemoryBuffer destBufCp(PAGE_SIZE, gpuNode, false);
    destBufCp.Fill(0xFF);

    HsaMemoryBuffer destBuf(bufSize << 1 , gpuNode, false);
    HsaMemoryBuffer srcBuf(bufSize, gpuNode, false);
    destBuf.Fill(0xFF);

    std::vector<PM4Queue> cpQueues(numCpQueues);
    std::vector<SDMAQueue> sdmaQueues(numSdmaQueues);
    std::vector<XgmiOptimizedSDMAQueue> xgmiSdmaQueues(numXgmiSdmaQueues);

    for (i = 0; i < numCpQueues; ++i)
        ASSERT_SUCCESS_GPU(cpQueues[i].Create(gpuNode), gpuNode) << " QueueId=" << i;

    for (j = 0; j < numSdmaQueues; ++j)
        ASSERT_SUCCESS_GPU(sdmaQueues[j].Create(gpuNode), gpuNode);

    for (j = 0; j < numXgmiSdmaQueues; ++j)
        ASSERT_SUCCESS_GPU(xgmiSdmaQueues[j].Create(gpuNode), gpuNode);


    for (i = 0; i < numCpQueues; ++i) {
        cpQueues[i].PlaceAndSubmitPacket(PM4WriteDataPacket(destBufCp.As<unsigned int*>()+i*2, i, i));
        cpQueues[i].PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, 0, 0));

        cpQueues[i].Wait4PacketConsumption();

        EXPECT_TRUE_GPU(WaitOnValue(destBufCp.As<unsigned int*>()+i*2, i), gpuNode);
    }

    for (j = 0; j < numSdmaQueues; ++j) {
        destBuf.Fill(0x0);
        srcBuf.Fill(j + 0xa0);
        sdmaQueues[j].PlaceAndSubmitPacket(
            SDMACopyDataPacket(sdmaQueues[j].GetFamilyId(), destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize));
        sdmaQueues[j].PlaceAndSubmitPacket(
            SDMAWriteDataPacket(sdmaQueues[j].GetFamilyId(), destBuf.As<unsigned int*>() + bufSize/4, 0x02020202));

        sdmaQueues[j].Wait4PacketConsumption();

        EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>() + bufSize/4, 0x02020202), gpuNode);

        EXPECT_SUCCESS_GPU(memcmp(
            destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize), gpuNode);
    }

    for (j = 0; j < numXgmiSdmaQueues; ++j) {
        destBuf.Fill(0x0);
        srcBuf.Fill(j + 0xa0);
        xgmiSdmaQueues[j].PlaceAndSubmitPacket(
            SDMACopyDataPacket(xgmiSdmaQueues[j].GetFamilyId(),
                    destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize));
        xgmiSdmaQueues[j].PlaceAndSubmitPacket(
            SDMAWriteDataPacket(xgmiSdmaQueues[j].GetFamilyId(),
                    destBuf.As<unsigned int*>() + bufSize/4, 0x02020202));

        xgmiSdmaQueues[j].Wait4PacketConsumption();

        EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>() + bufSize/4, 0x02020202), gpuNode);

        EXPECT_SUCCESS_GPU(memcmp(
            destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize), gpuNode);
    }


    for (i = 0; i < numCpQueues; ++i)
       EXPECT_SUCCESS_GPU(cpQueues[i].Destroy(), gpuNode);

    for (j = 0; j < numSdmaQueues; ++j)
        EXPECT_SUCCESS_GPU(sdmaQueues[j].Destroy(), gpuNode);

    for (j = 0; j < numXgmiSdmaQueues; ++j)
        EXPECT_SUCCESS_GPU(xgmiSdmaQueues[j].Destroy(), gpuNode);

}

TEST_F(KFDQMTest, AllQueues) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(AllQueues));

    TEST_END
}

/* The following test is designed to reproduce an intermittent hang on
 * Fiji and other VI/Polaris GPUs. This test typically hangs in a few
 * seconds. According to analysis done by HW engineers, the culprit
 * seems to be PCIe speed switching. The problem can be worked around
 * by disabling the lowest DPM level on Fiji.
 */
static void SdmaConcurrentCopies(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    int gpuIndex = pKFDQMTest->Get_NodeInfo()->HsaGPUindexFromGpuNode(gpuNode);
    HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

#define BUFFER_SIZE (64*1024)
#define NPACKETS 1
#define COPY_SIZE (BUFFER_SIZE / NPACKETS)
    HsaMemoryBuffer srcBuf(BUFFER_SIZE, 0, true);
    HsaMemoryBuffer dstBuf(BUFFER_SIZE, gpuNode, false, hsakmt_is_dgpu() ? true : false);

    SDMAQueue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    std::ostream &log = LOG();
    char progress[] = "-\b";
    log << "Running ... ";

    for (unsigned i = 0; i < 100000; i++) {
        if (i % 1000 == 0) {
            const char progressSteps[4] = {'-', '\\', '|', '/'};
            progress[0] = progressSteps[(i/1000) % 4];
            log << progress;
        }

        for (unsigned j = 0; j < NPACKETS; j++)
            queue.PlacePacket(
                SDMACopyDataPacket(queue.GetFamilyId(), dstBuf.As<char *>()+COPY_SIZE*j,
                                   srcBuf.As<char *>()+COPY_SIZE*j, COPY_SIZE));
        queue.SubmitPacket();

        /* Waste a variable amount of time. Submission timing
         * while SDMA runs concurrently seems to be critical for
         * reproducing the hang
         */
        for (int k = 0; k < (i & 0xfff); k++)
            memcpy(srcBuf.As<char *>()+PAGE_SIZE, srcBuf.As<char *>(), 1024);

        /* Wait for idle every 8 packets to allow the SDMA engine to
         * run concurrently for a bit without getting too far ahead
         */
        if ((i & 0x7) == 0)
            queue.Wait4PacketConsumption();
    }
    log << "Done." << std::endl;

    queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), srcBuf.As<unsigned *>(), 0x02020202));
    queue.Wait4PacketConsumption();
    EXPECT_TRUE_GPU(WaitOnValue(srcBuf.As<unsigned int*>(), 0x02020202), gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDQMTest, SdmaConcurrentCopies) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SdmaConcurrentCopies));

    TEST_END
}

static void DisableCpQueueByUpdateWithNullAddress(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);

    destBuf.Fill(0xFFFFFFFF);

    PM4Queue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0, 0));

    queue.Wait4PacketConsumption(event);

    WaitOnValue(destBuf.As<unsigned int*>(), 0);

    destBuf.Fill(0xFFFFFFFF);

    EXPECT_SUCCESS_GPU(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, true), gpuNode);

    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 1, 1));

    // Don't sync since we don't expect rptr to change when the queue is disabled.
    Delay(2000);

    EXPECT_EQ_GPU(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF, gpuNode)
        << "Packet executed even though the queue is supposed to be disabled!";

    EXPECT_SUCCESS_GPU(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false), gpuNode);

    queue.Wait4PacketConsumption(event);

    WaitOnValue(destBuf.As<unsigned int*>(), 1);

    hsaKmtDestroyEvent(event);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}

TEST_F(KFDQMTest, DisableCpQueueByUpdateWithNullAddress) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(DisableCpQueueByUpdateWithNullAddress));

    TEST_END
}

static void DisableSdmaQueueByUpdateWithNullAddress(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);

    destBuf.Fill(0xFFFFFFFF);

    SDMAQueue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), destBuf.As<void*>(), 0));

    WaitOnValue(destBuf.As<unsigned int*>(), 0);

    destBuf.Fill(0xFFFFFFFF);

    EXPECT_SUCCESS_GPU(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, true), gpuNode);

    queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), destBuf.As<void*>(), 0));

    // Don't sync since we don't expect rptr to change when the queue is disabled.
    Delay(2000);

    EXPECT_EQ_GPU(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF, gpuNode)
        << "Packet executed even though the queue is supposed to be disabled!";

    EXPECT_SUCCESS_GPU(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false), gpuNode);

    queue.Wait4PacketConsumption();

    WaitOnValue(destBuf.As<unsigned int*>(), 0);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);


}
TEST_F(KFDQMTest, DisableSdmaQueueByUpdateWithNullAddress) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(DisableSdmaQueueByUpdateWithNullAddress));

    TEST_END
}

static void DisableCpQueueByUpdateWithZeroPercentage(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);

    destBuf.Fill(0xFFFFFFFF);

    PM4Queue queue;

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    PM4WriteDataPacket packet1, packet2;
    packet1.InitPacket(destBuf.As<unsigned int*>(), 0, 0);
    packet2.InitPacket(destBuf.As<unsigned int*>(), 1, 1);

    queue.PlaceAndSubmitPacket(packet1);

    queue.Wait4PacketConsumption(event);

    WaitOnValue(destBuf.As<unsigned int*>(), 0);

    destBuf.Fill(0xFFFFFFFF);

    EXPECT_SUCCESS_GPU(queue.Update(0/*percentage*/, BaseQueue::DEFAULT_PRIORITY, false), gpuNode);

    queue.PlaceAndSubmitPacket(packet2);

    // Don't sync since we don't expect rptr to change when the queue is disabled.
    Delay(2000);

    EXPECT_EQ_GPU(destBuf.As<unsigned int*>()[0], 0xFFFFFFFF, gpuNode)
        << "Packet executed even though the queue is supposed to be disabled!";

    EXPECT_SUCCESS_GPU(queue.Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, BaseQueue::DEFAULT_PRIORITY, false), gpuNode);

    queue.Wait4PacketConsumption(event);

    WaitOnValue(destBuf.As<unsigned int*>(), 1);
    hsaKmtDestroyEvent(event);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}

TEST_F(KFDQMTest, DisableCpQueueByUpdateWithZeroPercentage) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(DisableCpQueueByUpdateWithZeroPercentage));

    TEST_END
}

static void CreateQueueStressSingleThreaded(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;

    static const HSAuint64 TEST_TIME_SEC = 15;

    HSAuint64 initialTime = GetSystemTickCountInMicroSec();

    unsigned int numIter = 0;

    HSAuint64 timePassed = 0;

    do {
        // The following means we'll get the order 0,0 => 0,1 => 1,0 => 1,1 so we cover all options.
        unsigned int firstToCreate = (numIter % 2 != 0) ? 1 : 0;
        unsigned int firstToDestroy = (numIter % 4 > 1) ? 1 : 0;

        unsigned int secondToCreate = (firstToCreate + 1)%2;
        unsigned int secondToDestroy = (firstToDestroy + 1)%2;

        BaseQueue *queues[2] = {new PM4Queue(), new SDMAQueue()};

        ASSERT_SUCCESS_GPU(queues[firstToCreate]->Create(gpuNode), gpuNode);
        ASSERT_SUCCESS_GPU(queues[secondToCreate]->Create(gpuNode),gpuNode);

        EXPECT_SUCCESS_GPU(queues[firstToDestroy]->Destroy(), gpuNode);
        EXPECT_SUCCESS_GPU(queues[secondToDestroy]->Destroy(), gpuNode);

        delete queues[0];
        delete queues[1];
        ++numIter;

        HSAuint64 curTime = GetSystemTickCountInMicroSec();
        timePassed = (curTime - initialTime) / 1000000;
    } while (timePassed < TEST_TIME_SEC);

}

TEST_F(KFDQMTest, CreateQueueStressSingleThreaded) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(CreateQueueStressSingleThreaded));

    TEST_END
}

static void OverSubscribeCpQueues(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    const HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId == FAMILY_CI || m_FamilyId == FAMILY_KV) {
        LOG() << "Skipping test: CI doesn't have HW scheduling." << std::endl;
        return;
    }

    /* The max queues per process is 1024 limited by
     * KFD, so MAX_CP_QUEUES is needed to adapt it
     * when total queues exceed it.
     */
    static const unsigned int MAX_CP_QUEUES = g_TestGPUsNum > 15 ?
                                              1024 / g_TestGPUsNum :
                                              65;
    static const unsigned int MAX_PACKETS = 100;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);

    destBuf.Fill(0xFF);

    PM4Queue queues[MAX_CP_QUEUES];

    for (unsigned int qidx = 0; qidx < MAX_CP_QUEUES; ++qidx)
        ASSERT_SUCCESS_GPU(queues[qidx].Create(gpuNode), gpuNode) << " QueueId=" << qidx;

    for (unsigned int qidx = 0; qidx < MAX_CP_QUEUES; ++qidx) {
        unsigned int pktSizeDw = 0;
        for (unsigned int i = 0; i < MAX_PACKETS; i++) {
            PM4WriteDataPacket packet;
            packet.InitPacket(destBuf.As<unsigned int*>()+qidx*2, qidx+i, qidx+i);  // two dwords per packet
            queues[qidx].PlacePacket(packet);
        }
    }

    for (unsigned int qidx = 0; qidx < MAX_CP_QUEUES; ++qidx)
        queues[qidx].SubmitPacket();

    // Delaying for 5 seconds in order to get all the results
    Delay(5000);

    for (unsigned int qidx = 0; qidx < MAX_CP_QUEUES; ++qidx)
        EXPECT_TRUE_GPU(queues[qidx].AllPacketsSubmitted(), gpuNode)<< "QueueId=" << qidx;;

    for (unsigned int qidx = 0; qidx < MAX_CP_QUEUES; ++qidx)
        EXPECT_SUCCESS_GPU(queues[qidx].Destroy(), gpuNode);

}

TEST_F(KFDQMTest, OverSubscribeCpQueues) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(OverSubscribeCpQueues));

    TEST_END
}

HSAint64 KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) {
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer dstBuffer(PAGE_SIZE, node, true, false, false);
    HsaMemoryBuffer ctlBuffer(PAGE_SIZE, node, true, false, false);

    EXPECT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As<char*>()));

    Dispatch dispatch(isaBuffer);
    dispatch.SetDim(1024, 16, 16);

    PM4Queue queue;
    EXPECT_SUCCESS(queue.Create(node));
    EXPECT_SUCCESS(queue.SetCUMask(mask, mask_count));
    queue.SetSkipWaitConsump(true);

    HSAuint64 startTime = GetSystemTickCountInMicroSec();
    dispatch.Submit(queue);
    dispatch.Sync();
    HSAuint64 endTime = GetSystemTickCountInMicroSec();

    EXPECT_SUCCESS(queue.Destroy());
    return endTime - startTime;
}

/* To cover for outliers, allow us to get the Average time based on a specified number of iterations */
HSAint64 KFDQMTest::GetAverageTimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count, int iterations) {
    HSAint64 timeArray[iterations];
    HSAint64 timeTotal = 0;
    if (iterations < 1) {
        LOG() << "ERROR: At least 1 iteration must be performed" << std::endl;
        return 0;
    }

    for (int x = 0; x < iterations; x++) {
        timeArray[x] = TimeConsumedwithCUMask(node, mask, mask_count);
        timeTotal += timeArray[x];
    }

    if (timeTotal == 0) {
        LOG() << "ERROR: Total time reported as 0. Exiting" << std::endl;
        return 0;
    }

    for (int x = 0; x < iterations; x++) {
        HSAint64 variance = timeArray[x] / (timeTotal / iterations);
        if (variance < CuNegVariance || variance > CuPosVariance)
            LOG() << "WARNING: Measurement #" << x << "/" << iterations << " (" << timeArray[x]
                  << ") is at least " << CuVariance*100 << "% away from the mean (" << timeTotal/iterations << ")"
                  << std::endl;
    }

    return timeTotal / iterations;
}

/*
 * Apply CU masking in a linear fashion, adding 1 CU per iteration
 * until all Shader Engines are full
 */
void BasicCuMaskingLinear(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    const HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId >= FAMILY_VI) {
        const HsaNodeProperties *pNodeProperties = pKFDQMTest->Get_NodeInfo()->GetNodeProperties(gpuNode);
        uint32_t ActiveCU = (pNodeProperties->NumFComputeCores / pNodeProperties->NumSIMDPerCU);
        uint32_t numSEs = pNodeProperties->NumShaderBanks;
        LOG() << std::dec << "# Compute cores: " << pNodeProperties->NumFComputeCores << std::endl;
        LOG() << std::dec << "# SIMDs per CU: " << pNodeProperties->NumSIMDPerCU << std::endl;
        LOG() << std::dec << "# Shader engines: " << numSEs << std::endl;
        LOG() << std::dec << "# Active CUs: " << ActiveCU << std::endl;
        HSAint64 TimewithCU1, TimewithCU;
        uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */
        uint32_t maskNumBits = maskNumDwords * 32;
        uint32_t mask[maskNumDwords];
        double ratio;

        mask[0] = 0x1;
        for (int i = 1; i < maskNumDwords; i++)
            mask[i] = 0x0;

        /* Execute once to get any HW optimizations out of the way */
        pKFDQMTest->TimeConsumedwithCUMask(gpuNode, mask, maskNumBits);

        LOG() << "Getting baseline performance numbers (CU Mask: 0x1)" << std::endl;
        TimewithCU1 = pKFDQMTest->GetAverageTimeConsumedwithCUMask(gpuNode, mask, maskNumBits, 3);

        for (int nCUs = 2; nCUs <= ActiveCU; nCUs++) {
            int maskIndex = (nCUs - 1) / 32;
            mask[maskIndex] |= 1 << ((nCUs - 1) % 32);

            TimewithCU = pKFDQMTest->TimeConsumedwithCUMask(gpuNode, mask, maskNumBits);
            ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs);

            LOG() << "Expected performance of " << nCUs << " CUs vs 1 CU:" << std::endl;
            LOG() << std::setprecision(2) << pKFDQMTest->CuNegVariance << " <= " << std::fixed << std::setprecision(8)
                  << ratio << " <= " << std::setprecision(2) << pKFDQMTest->CuPosVariance << std::endl;

            EXPECT_TRUE((ratio >= pKFDQMTest->CuNegVariance) && (ratio <= pKFDQMTest->CuPosVariance));

            RECORD(ratio) << "Ratio-" << nCUs << "-CUs";
        }
    } else {
        LOG() << "Skipping test: Test not supported for family ID 0x" << m_FamilyId << "." << std::endl;
    }
}

TEST_F(KFDQMTest, BasicCuMaskingLinear) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(BasicCuMaskingLinear));

    TEST_END
}


// ====== ExtendedCuMasking Helper Functions ====== //


#define CUMASK_DEBUG 0   // Enable extra output for debugging issues

#if CUMASK_DEBUG
#define DBG_PRINT printf
#else
#define DBG_PRINT
#endif


/*
 * Helper function to print multi-dword mask.
 *
 *   pHeader: A non-NULL pointer to a string to use as the header.
 *     pMask: A pointer to the mask to print out.
 * numDwords: Number of elements in mask array.
 *
 */
static void printMask(const char *pHeader, uint32_t *pMask, uint32_t numDwords) {
    printf("%s0x", pHeader);
    for (int i = numDwords - 1; i >= 0; i--) {
        printf("%08x", pMask[i]);
    }
    printf("\n");
}


/*
 * Set the CU mask for each specified WGPs.
 *
 * Note: The effect is cumulative, function can be called multiple times to
 *       set up additional WGPs in the provided pMask.
 *
 * pMask:      A non-NULL pointer to the CU mask.
 * maskConfig: Information on GPU configuration.
 * seMask:     Specifies SEs that are targetted.
 * saMask:     Specifies SAs that are targetted within the SEs specified.
 * wgpMask:    Specifies WGPs that are targetted within the (SE,SA) specified.
 *
 * For seMask, saMask, and wgpMask:
 *   One bit per SE/SA/WGP, multiple bits can be specified.
 *   Masks cannot be 0 (at least 1 SE, 1 SA and 1 WGP must be specified).
 *   Special value: -1 (specifies ALL)
 *
 */
static bool setCUMask(uint32_t *pMask, mask_config_t maskConfig, uint32_t seMask, uint32_t saMask, uint32_t wgpMask) {

    bool result = true;

    if (pMask) {
        if (seMask && saMask && wgpMask) {   // proceed only with non-zero mask
            for (int i = 0; i < maskConfig.numWGPperSA; i++) {
                if (((wgpMask >> i) & 1)) {
                    for (int j = 0; j < maskConfig.numSAperSE; j++) {
                        if (((saMask >> j) & 1)) {
                            for (int k = 0; k < maskConfig.numSEs; k++) {
                                if (((seMask >> k) & 1)) {
                                    uint32_t insLoc = k * 2 + j * (2 * maskConfig.numSEs) + i * (2 * maskConfig.numSEs * maskConfig.numSAperSE);
                                    pMask[insLoc / 32] |= (0x3 << (insLoc % 32));
                                }
                            }
                        }
                    }
                }
            }
        } else {
            LOG() << "ERROR: SE/SA/WGP mask values must be non-zero!\n";
            result = false;
        }
    } else {
        LOG() << "ERROR: pMask is NULL!\n";
        result = false;
    }

    return result;
}


/*
 * Compute an adjusted CU mask to use when some WGPs are inactive.
 *
 * The adjusted mask takes into account the inactive WGPs by removing their corresponding
 * bits from the mask as these are skipped by KFD.   As bits are removed from the mask,
 * the remaining bit values are shifted right.
 *
 *   pAdjMask: A non-NULL pointer where the adjusted mask will be written.
 *      pMask: A non-NULL pointer to the CU mask.
 * maskConfig: Information on GPU configuration.
 *
 * Returns:
 *      true: If adjusted mask has one or more non-zero bit set.
 *     false: If the adjusted mask is all zeroes (no WGPs left to do work).
 *
 * When false is returned, we should skipped the specific test scenario.
 *
 */
bool adjustMask(uint32_t *pAdjMask, uint32_t *pMask, mask_config_t maskConfig) {
    int wi = 0;
    int totalBits = maskConfig.numBits;
    bool nonZero = false;

    memset(pAdjMask, 0, sizeof(uint32_t) * maskConfig.numDwords);

    for (int ri = 0; ri < totalBits; ri += 2) {

        uint32_t value = (pMask[ri / 32] >> (ri % 32)) & 0x3;

        if ((maskConfig.pInactiveMask[ri / 32] & (0x3 << (ri % 32))) != 0)
        {
            // skip that entry
        }
        else
        {
            uint32_t newValue = value << (wi % 32);
            pAdjMask[wi / 32] |= newValue;
            wi += 2;

            if (value != 0) {
                nonZero = true;
            }
        }
    }

#if CUMASK_DEBUG
    printf("\nAdjusting mask:\n");
    printMask("    mask: ", pMask, maskConfig.numDwords);
    printMask("inactive: ", maskConfig.pInactiveMask, maskConfig.numDwords);
    printMask("adjusted: ", pAdjMask, maskConfig.numDwords);
    printf("\n");
#endif //CUMASK_DEBUG

    return nonZero;
}


/*
 * Validates the result of a test.
 *
 * pMask:        A non-NULL pointer to the CU mask that was used for the test.
 * maskConfig:   Information on GPU configuration.
 * numWorkItems: Number of work items used for shader execution.
 * pOutput:      Pointer to the output array.
 * pResultMask:  If non-NULL, result mask constructed from output is stored at that memory location.
 *
 */
static bool validateTest(uint32_t *pMask, mask_config_t maskConfig, uint32_t numWorkItems, out_data_t *pOutput, uint32_t *pResultMask)
{
    uint32_t resultMask[maskConfig.numDwords];
    bool result = false;

    memset(resultMask, 0, sizeof(resultMask));

    for (int i = 0; i < numWorkItems; i++) {
        DBG_PRINT("=== % 4d: 0x%08x [ se: %2d, sa: %2d, wgp: %2d]\n", i, pOutput[i].data, pOutput[i].se, pOutput[i].sa, pOutput[i].wgp);

        setCUMask(resultMask, maskConfig,
                  1 << pOutput[i].se,
                  1 << pOutput[i].sa,
                  1 << pOutput[i].wgp);
    }

    if (pResultMask) {
        memcpy(pResultMask, resultMask, sizeof(resultMask));
    }

    if (maskConfig.pInactiveMask) {
        // If some WGPs were inactive, compute a verify mask taking into account the inactive WGPs.
        uint32_t verifyMask[maskConfig.numDwords];
        memset(verifyMask, 0, sizeof(verifyMask));

        for (int i = 0; i < maskConfig.numDwords; i++) {
            verifyMask[i] = pMask[i] & ~maskConfig.pInactiveMask[i];
        }

#if CUMASK_DEBUG
        printf("\nValidate test:\n");
        printMask("        mask: ", pMask, maskConfig.numDwords);
        printMask("  resultMask: ", resultMask, maskConfig.numDwords);
        printMask("inactiveMask: ", maskConfig.pInactiveMask, maskConfig.numDwords);
        printMask("  verifyMask: ", verifyMask, maskConfig.numDwords);
#endif //CUMASK_DEBUG

        result = (memcmp(verifyMask, resultMask, sizeof(resultMask)) == 0);
    } else {

#if CUMASK_DEBUG
        printf("\nValidate test:\n");
        printMask("        mask: ", pMask, maskConfig.numDwords);
        printMask("  resultMask: ", resultMask, maskConfig.numDwords);
#endif //CUMASK_DEBUG

        result = (memcmp(pMask, resultMask, sizeof(resultMask)) == 0);
    }

    DBG_PRINT("      Result: %s\n\n", result ? "PASS" : "FAIL");

    return result;
}

/*
 * Set CU Mask, submit the testing shader, and validate the results.
 *
 * gpuNode:       The node to use for the test.
 * pMask:         A non-NULL pointer to the CU mask to use for the test.
 * maskConfig:    Information on GPU configuration.
 * programBuffer: The buffer that contains the shader program.
 * numWorkItems:  The number of work items to use.
 * pOutput:       A non-NULL pointer to the output buffer used by the shader.
 * pResultMask:   If non-NULL, result mask constructed from output is stored at that memory location.
 *
 */
static bool testCUMask(int gpuNode, uint32_t *pMask, mask_config_t maskConfig, HsaMemoryBuffer &programBuffer, uint32_t numWorkItems, out_data_t *pOutput, uint32_t *pResultMask = NULL) {

    PM4Queue queue;
    uint32_t *pAdjMask = NULL;
    uint32_t adjMask[maskConfig.numDwords];

    if (maskConfig.pInactiveMask) {
        if (adjustMask(adjMask, pMask, maskConfig)) {
            pAdjMask = adjMask;
        } else {
            // Adjusted mask is all zeroes, skip test and mark as passing.
            return true;
        }
    } else {
        pAdjMask = pMask;
    }

    Dispatch dispatch(programBuffer);
    dispatch.SetArgs(NULL, pOutput);
    dispatch.SetDim(numWorkItems, 1, 1);

    EXPECT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    EXPECT_SUCCESS_GPU(queue.SetCUMask(pAdjMask, maskConfig.numBits), gpuNode);

    dispatch.Submit(queue);
    dispatch.Sync();
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    return validateTest(pMask, maskConfig, numWorkItems, pOutput, pResultMask);
}


/*
 * ExtendedCuMasking
 *
 * Newer implementation of CU mask testing that focuses on correctness of masking.
 *
 * Unlike previous implementations, this new implementation does not rely on performance
 * measurements to decide if the masking took place.   Instead, this implementation checks
 * if waves were executed on all the CUs enabled and only the CUs enabled.
 *
 * Implementation does a series of tests, new tests can be easily added as needed.
 *
 * For each test, these steps are performed:
 *
 * 1) Decide the units that are enabled for the test (SEs, SAs, WGPs).
 * 2) Generate a CU mask that specifies the WGPs enabled on each (SE,SA) pairs.
 * 3) Set the mask for the queue and run a special shader.
 * 4) Shader records in a buffer the unit that is used by the wave (SE,SA,WGP).
 * 5) Test program analyses the results and verifies if shader used all and only the
 *    WGP units specified by the mask.
 *
 * Multiple tests are done with different combinations.
 * There are (2^numWGPs - 1) possibilities, not everything can be tested.
 *
 * For each new ASIC supported, the following changes might be required:
 * 1) Minor shader changes to put fill information into buffer.
 * 2) Format of out_data_t struct.
 * 3) Changes to validation code.
 *
 */
static void extendedCuMasking(KFDTEST_PARAMETERS* pTestParameters) {

    int gpuNode = pTestParameters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParameters->pTestObject;
    const HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId >= FAMILY_GFX12) {  // Supporting GFX12 and up for now

        // Lock to prevent interleave of logging on multigpu (multithreaded) testing
        static std::mutex logMutex;

        const HsaNodeProperties *pProps = pKFDQMTest->Get_NodeInfo()->GetNodeProperties(gpuNode);
        const uint32_t activeCU = (pProps->NumFComputeCores / pProps->NumSIMDPerCU);
        const uint32_t numSEs = pProps->NumShaderBanks;
        const uint32_t numSAperSE = pProps->NumArrays;
        const uint32_t numWGPperSA = pProps->NumCUPerArray / 2;
        const uint32_t maxCU = numSEs * numSAperSE * numWGPperSA * 2;

        std::ostringstream nodeStream;
        nodeStream << "(Node " << gpuNode << ")";
        const std::string nodeStr = nodeStream.str();

        logMutex.lock();
        LOG() << std::endl;
        LOG() << std::dec << "****** GFX Configuration " << nodeStr << " ******" << std::endl;
        LOG() << std::dec << "  Compute Cores (SIMD): " << std::setw(3) << pProps->NumFComputeCores << std::endl;
        LOG() << std::dec << "          SIMDs per CU: " << std::setw(3) << pProps->NumSIMDPerCU << std::endl;
        LOG() << std::dec << "            Active CUs: " << std::setw(3) << activeCU << std::endl;
        LOG() << std::dec << "               Max CUs: " << std::setw(3) << maxCU << std::endl;
        LOG() << std::dec << "        Shader Engines: " << std::setw(3) << numSEs << std::endl;
        LOG() << std::dec << "            SAs per SE: " << std::setw(3) << numSAperSE << std::endl;
        LOG() << std::dec << "           WGPs per SA: " << std::setw(3) << numWGPperSA << std::endl;
        LOG() << std::dec << "****************************************" << std::endl;
        logMutex.unlock();

        const uint32_t maskNumDwords = (maxCU + 31) / 32; /* Round up to the nearest multiple of 32 */
        const uint32_t maskNumBits = maskNumDwords * 32;


        uint32_t mask[maskNumDwords];
        uint32_t inactiveMask[maskNumDwords];

        mask_config_t maskConfig = { maskNumDwords, maskNumBits, numSEs, numSAperSE, numWGPperSA, NULL };

        /*
         * Note: On system with WGPs, CU bits in the same WGP must be either both set or both unset
         *       i.e. enabling/disabling is on a per-WGP basis.
         *
         * Format of CU Mask array (Assuming 4 SEs)
         *
         * Bit    Value    Masking
         *
         *  0,1    0x03     SE0 SA0 WGP0 (i.e. CU0 and CU1)
         *  2,3    0x0c     SE1 SA0 WGP0
         *  4,5    0x30     SE2 SA0 WGP0
         *  6,7    0xc0     SE3 SA0 WGP0
         *
         *  8,9    0x0300   SE0 SA1 WGP0
         * 10,11   0x0c00   SE1 SA1 WGP0
         * 12,13   0x3000   SE2 SA1 WGP0
         * 14,15   0xc000   SE3 SA1 WGP0
         *
         * 16,17   0x030000 SE0 SA0 WGP1
         * 18,19   0x030000 SE1 SA0 WGP1
         * ...
         * 32,33            SE0 SA0 WGP2
         * ...
         * 48,49            SE0 SA0 WGP3
         * ...
         *
         */

        /*
         * Number of work items needs to be sufficiently large to have enough work items for each WGP enabled.
         *
         * Using total number of WGPs multiplied by 16.
         *
         */
        const uint32_t numWorkItems = 16 * numSEs * numSAperSE * numWGPperSA;

        // Allocate buffers for program and output
        HsaMemoryBuffer programBuffer(PAGE_SIZE, gpuNode, true, false, true);
        HsaMemoryBuffer outputBuffer(((sizeof(out_data_t) * numWorkItems) + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1), gpuNode, true, false, false);
        out_data_t *pOutput = outputBuffer.As<out_data_t *>();

        // Assemble shader
        Assembler *pAsm = pKFDQMTest->GetAssemblerFromNodeId(gpuNode);
        ASSERT_NOTNULL_GPU(pAsm, gpuNode);
        ASSERT_SUCCESS_GPU(pAsm->RunAssembleBuf(CheckCuMaskIsa, programBuffer.As<char*>()), gpuNode);


       /*
        * Check and record any inactive WPGs.
        *
        */
        memset(mask, 0, sizeof(mask));
        memset(inactiveMask, 0, sizeof(inactiveMask));

        // Use full mask and collect all active CUs in inactiveMask
        setCUMask(mask, maskConfig, -1, -1, -1);
        if (testCUMask(gpuNode, mask, maskConfig, programBuffer, numWorkItems, pOutput, inactiveMask)) {
            // Using full mask, if all CUs are used, we expect them to be all active.
            EXPECT_TRUE_GPU(activeCU == maxCU, gpuNode);
        } else {
            // Some CUs were not used, generate inactive mask and count inactive CUs.
            uint32_t inactiveCount = 0;

            // Flip bits and count inactive
            for (int i = 0; i < maskNumDwords; i++) {
                inactiveMask[i] = ~inactiveMask[i];
                inactiveCount += __builtin_popcount(inactiveMask[i]);
            }

            // Check if what we detected is consistent with info from KFD
            EXPECT_TRUE_GPU((activeCU + inactiveCount) == maxCU, gpuNode);

            maskConfig.pInactiveMask = inactiveMask;

            std::ostringstream logStr;
            logStr << nodeStr << " Inactive WGP detected: " << inactiveCount << "  0x" << std::hex << std::setw(8);
            for (int i = maskNumDwords - 1; i >= 0; i--) {
                logStr << inactiveMask[i];
            }
            LOG() << logStr.str() << std::endl;
        }


        /*
         * Generate symmetric test configuration for all (SE, SA, WGP) combinations, one level at a time.
         *
         * Other levels fully enabled.
         *
         * Example: If testing SE disablement, all SA/WGP are enabled on the SE that are used.
         *          If testing SA disablement, all SE are used, all WGP are enabled on the SA enabled.
         *
         */
        uint32_t totalConfigTested = 0;

        // All SE combination (0 not allowed, need at least one enabled)
        LOG() << nodeStr << " === Testing SE mask (" << ((1 << numSEs) - 1) << " configs)\n";
        for (int i = 1; i < (1 << numSEs); i++) {
            memset(mask, 0, sizeof(mask));
            DBG_PRINT("SE mask: 0x%x\n", i);
            setCUMask(mask, maskConfig, i, -1, -1);
            EXPECT_TRUE_GPU(testCUMask(gpuNode, mask, maskConfig, programBuffer, numWorkItems, pOutput), gpuNode);
            totalConfigTested++;
        }

        // All SA combinations (0 not allowed, need at least one enabled)
        LOG() << nodeStr << " === Testing SA mask (" << ((1 << numSAperSE) - 1) << " configs)\n";
        for (uint32_t i = 1; i < (1 << numSAperSE); i++) {
            memset(mask, 0, sizeof(mask));

            DBG_PRINT("SA mask: 0x%x\n", i);
            setCUMask(mask, maskConfig, -1, i, -1);
            EXPECT_TRUE_GPU(testCUMask(gpuNode, mask, maskConfig, programBuffer, numWorkItems, pOutput), gpuNode);
            totalConfigTested++;
        }

        // All WGP combinations (0 not allowed, need at least one enabled)
        LOG() << nodeStr << " === Testing WGP mask (" << ((1 << numWGPperSA) - 1) << " configs)\n";
        for (uint32_t i = 1; i < (1 << numWGPperSA); i++) {
            memset(mask, 0, sizeof(mask));

            DBG_PRINT("WGP mask: 0x%x\n", i);
            setCUMask(mask, maskConfig, -1, -1, i);
            EXPECT_TRUE_GPU(testCUMask(gpuNode, mask, maskConfig, programBuffer, numWorkItems, pOutput), gpuNode);
            totalConfigTested++;
        }

        /*
         * Linear Masking
         *
         * Enable one WGP at a time until they are all enabled.
         *
         */
        {
            uint32_t totalWGPs = numSEs * numSAperSE * numWGPperSA;

            LOG() << nodeStr << " === Testing linear mask (" << totalWGPs << " configs)\n";

            memset(mask, 0, sizeof(mask));

            for (int32_t i = 0; i < totalWGPs; i++) {
                mask[i / 16] |= (0x3 << (i * 2));

#if CUMASK_DEBUG
                printMask("  linear mask: ", mask, maskNumDwords);
#endif //CUMASK_DEBUG

                EXPECT_TRUE_GPU(testCUMask(gpuNode, mask, maskConfig, programBuffer, numWorkItems, pOutput), gpuNode);
                totalConfigTested++;
            }
        }

        /*
         * Random asymmetric config.
         *
         * Asymmetric, different WGPs/SAs are enabled/disabled on different SEs.
         *
         */
        {
            uint32_t randomCount = 1000;  // Total number of random test to perform
            uint32_t seed = 1;            // Specifying a seed to have deterministic random sequence

            srand(seed);

            LOG() << nodeStr << " === Testing " << randomCount << " random mask config...\n";

            for (uint32_t i = 0; i < randomCount; i++) {

                memset(mask, 0, sizeof(mask));

                uint32_t wgpLeft = maxCU / 2;   // init to total WGPs
                uint32_t maskIndex = 0;

                while (wgpLeft > 0) {
                    uint32_t wgpBlock = (wgpLeft > 16) ? 16 : wgpLeft;   // max 16 WGPs at a time
                    wgpLeft -= wgpBlock;

                    /*
                     * Pick random number between 0 to (2^wgpBlock - 1) - 1.
                     * Then add 1 to get random number between 1 to (2^wgpBlock - 1).
                     * This ensure that we don't end up with 0 for all the dwords in the mask.
                     */
                    uint32_t wgpMask = (rand() % ((1ULL << wgpBlock) - 1)) + 1;

                    // expand WGP mask to CU mask by doubling each individual bits.
                    uint32_t expandToCUMask = 0;
                    for (uint32_t j = 0; j < wgpBlock; j++) {
                        if (wgpMask & (1 << j)) {
                            expandToCUMask |= (0x3ULL << (j * 2));
                        }
                    }

                    DBG_PRINT("maskIndex: %u  fullWGPMask: 0x%08x  expand: 0x%08x\n", maskIndex, wgpMask, expandToCUMask);

                    mask[maskIndex++] = expandToCUMask;
                }

                EXPECT_TRUE_GPU(testCUMask(gpuNode, mask, maskConfig, programBuffer, numWorkItems, pOutput), gpuNode);
                totalConfigTested++;
            }
        }

        LOG() << std::endl;
        LOG() << nodeStr << " Total config tested: " << totalConfigTested << std::endl;
        LOG() << std::endl;

    } else {
        LOG() << "Skipping test: Test not supported for family ID 0x" << m_FamilyId << "." << std::endl;
    }
}

TEST_F(KFDQMTest, ExtendedCuMasking) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(extendedCuMasking));

    TEST_END
}

#undef CUMASK_DEBUG
#undef DBG_PRINT

// ====== End of ExtendedCUMasking Functions ====== //


/**
 * Apply CU masking where the number of CUs is equal across all Shader Engines
 * This will work due to the HW splitting the workload unevenly across the Shader
 * Engines when ((#ofCUs)/(#ofShaderEngines)) is not a whole number. The tests above
 * will not yield viable results when an uneven distribution of CUs is used over multiple
 * shader engines (e.g. 0x1000100030003), until the HW changes how it schedules work.
 */
void BasicCuMaskingEven(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    const HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    if (m_FamilyId >= FAMILY_VI) {
        const HsaNodeProperties *pNodeProperties = pKFDQMTest->Get_NodeInfo()->GetNodeProperties(gpuNode);
        uint32_t ActiveCU = (pNodeProperties->NumFComputeCores / pNodeProperties->NumSIMDPerCU);
        uint32_t numShaderEngines = pNodeProperties->NumShaderBanks;
        if (numShaderEngines == 1) {
            LOG() << "Skipping test: Only 1 Shader Engine present." << std::endl;
            return;
        }

        LOG() << std::dec << "# Compute cores: " << pNodeProperties->NumFComputeCores << std::endl;
        LOG() << std::dec << "# SIMDs per CU: " << pNodeProperties->NumSIMDPerCU << std::endl;
        LOG() << std::dec << "# Shader engines: " << numShaderEngines << std::endl;
        LOG() << std::dec << "# Active CUs: " << ActiveCU << std::endl;
        HSAint64 TimewithCU1, TimewithCU;
        uint32_t maskNumDwords = (ActiveCU + 31) / 32; /* Round up to the nearest multiple of 32 */
        uint32_t maskNumBits = maskNumDwords * 32;
        uint32_t mask[maskNumDwords];
        int numCuPerShader = ActiveCU / numShaderEngines;
        double ratio;

        /* In KFD we symmetrically map mask to all SEs:
         * mask[0] bit0 -> se0 cu0;
         * mask[0] bit1 -> se1 cu0;
         * ... (if # SE is 4)
         * mask[0] bit4 -> se0 cu1;
         * ...
         */
        /* Set Mask to 1 CU per SE */
        memset(mask, 0, maskNumDwords * sizeof(uint32_t));
        for (int i = 0; i < numShaderEngines; i++) {
            int maskIndex = (i / 32) % maskNumDwords;
            mask[maskIndex] |= 1 << (i % 32);
        }

        /* Execute once to get any HW optimizations out of the way */
        pKFDQMTest->TimeConsumedwithCUMask(gpuNode, mask, maskNumBits);

        LOG() << "Getting baseline performance numbers (1 CU per SE)" << std::endl;
        TimewithCU1 = pKFDQMTest->GetAverageTimeConsumedwithCUMask(gpuNode, mask, maskNumBits, 3);

        /* Each loop will add 1 more CU per SE. We use the mod and divide to handle
         * when SEs aren't distributed in multiples of 32 (e.g. Tonga)
         * OR the new bit in for simplicity instead of re-creating the mask each iteration
         */
        for (int x = 0; x < numCuPerShader; x++) {
            for (int se = 0; se < numShaderEngines; se++) {
                int offset = x * numShaderEngines + se;
                int maskIndex = (offset / 32) % maskNumDwords;
                mask[maskIndex] |= 1 << (offset % 32);
            }
            int nCUs = x + 1;

            TimewithCU = pKFDQMTest->TimeConsumedwithCUMask(gpuNode, mask, maskNumBits);
            ratio = (double)(TimewithCU1) / ((double)(TimewithCU) * nCUs);

            LOG() << "Expected performance of " << nCUs << " CU(s)/SE vs 1 CU/SE:" << std::endl;
            LOG() << std::setprecision(2) << pKFDQMTest->CuNegVariance << " <= " << std::fixed << std::setprecision(8)
                  << ratio << " <= " << std::setprecision(2) << pKFDQMTest->CuPosVariance << std::endl;

            EXPECT_TRUE_GPU((ratio >= pKFDQMTest->CuNegVariance) && (ratio <= pKFDQMTest->CuPosVariance), gpuNode);

            RECORD(ratio) << "Ratio-" << nCUs << "-CUs";
        }
    } else {
        LOG() << "Skipping test: Test not supported for family ID 0x" << m_FamilyId << "." << std::endl;
    }
}

TEST_F(KFDQMTest, BasicCuMaskingEven) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(BasicCuMaskingEven));

    TEST_END
}

void testQueuePriority(KFDTEST_PARAMETERS* pTestParamters, bool isSamePipe)
{
    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    const HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    Assembler* m_pAsm;
    m_pAsm = pKFDQMTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    if (m_FamilyId < FAMILY_VI) {
        LOG() << "Skipping test: Shader won't run on CI." << std::endl;
        return;
    }

    // Reduce test case if running on emulator
    // Reduction applies to all 3 dims (effect is cubic)
    const int scaleDown = (g_IsEmuMode ? 4 : 1);

    HsaMemoryBuffer syncBuf(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HSAint32 *syncBuffer = syncBuf.As<HSAint32*>();
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    //ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As<char*>()));
	ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch[2] = {
        Dispatch(isaBuffer, true),
        Dispatch(isaBuffer, true)
    };

    const int queueCount = isSamePipe ? 13 : 2;
    int activeTaskBitmap = 0x3;
    HSAuint64 startTime, endTime[2];
    HsaEvent *pHsaEvent[2];
    int numEvent = 2;
    PM4Queue queue[queueCount];
    HSA_QUEUE_PRIORITY priority[2] = {
        HSA_QUEUE_PRIORITY_LOW,
        HSA_QUEUE_PRIORITY_HIGH
    };
    int i;

    /*
     * For different pipe variation:
     *   Only two queues are created, they should be on two different pipes.
     *
     * For same pipe variation:
     *   queue[2..12] are dummy queues. Create queue in this sequence to
     *   render queue[0] and queue[1] on same pipe with no assumptions
     *   about the number of pipes used by KFD. Queue #12 is a multiple
     *   of 1, 2, 3 and 4, so it falls on pipe 0 for any number of pipes
     */
	EXPECT_SUCCESS_GPU(queue[0].Create(gpuNode), gpuNode);  // Queue 0 is on Pipe 0
    if (isSamePipe) {
        for (i = 2; i < queueCount; i++)
            EXPECT_SUCCESS_GPU(queue[i].Create(gpuNode), gpuNode);
    }
    EXPECT_SUCCESS_GPU(queue[1].Create(gpuNode), gpuNode);

    for (i = 0; i < 2; i++) {
        syncBuffer[i] = -1;
        queue[i].Update(BaseQueue::DEFAULT_QUEUE_PERCENTAGE, priority[i], false);
        pHsaEvent[i] = dispatch[i].GetHsaEvent();
        pHsaEvent[i]->EventData.EventData.SyncVar.SyncVar.UserData = &syncBuffer[i];
        dispatch[i].SetDim(1024 / scaleDown , 16 / scaleDown, 16 / scaleDown);
    }

    startTime = GetSystemTickCountInMicroSec();
    for (i = 0; i < 2; i++)
        dispatch[i].Submit(queue[i]);

    while (activeTaskBitmap > 0) {
        hsaKmtWaitOnMultipleEvents(pHsaEvent, numEvent, false, g_TestTimeOut);
        for (i = 0; i < 2; i++) {
            if ((activeTaskBitmap & (1 << i)) && (syncBuffer[i] == pHsaEvent[i]->EventId)) {
                endTime[i] = GetSystemTickCountInMicroSec();
                activeTaskBitmap &= ~(1 << i);
            }
        }
    }

    for (i = 0; i < 2; i++) {
        int usecs = endTime[i] - startTime;
        LOG() << "Task priority: " << std::dec << priority[i] << "\t";
        LOG() << "Task duration: " << std::dec << std::setw(10) << usecs << " usecs" << std::endl;
    }

    for (i = 0; i < queueCount; i++) {
        EXPECT_SUCCESS_GPU(queue[i].Destroy(), gpuNode);
    }
}

static void QueuePriorityOnDifferentPipe(KFDTEST_PARAMETERS* pTestParamters) {

	testQueuePriority(pTestParamters, false);
}

TEST_F(KFDQMTest, QueuePriorityOnDifferentPipe) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(QueuePriorityOnDifferentPipe));

    TEST_END
}

void QueuePriorityOnSamePipe(KFDTEST_PARAMETERS* pTestParamters) {

    testQueuePriority(pTestParamters, true);
}

TEST_F(KFDQMTest, QueuePriorityOnSamePipe) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(QueuePriorityOnSamePipe));

    TEST_END
}

void KFDQMTest::SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node) {
    PM4Queue queue;

    if (node == -1)
        node = m_NodeInfo.HsaDefaultGPUNode();

    ASSERT_GE_GPU(node, 0, node) << "failed to get GPU Node";

    Dispatch dispatch(isaBuffer);
    dispatch.SetArgs(pSrcBuf, pDstBuf);
    dispatch.SetDim(1, 1, 1);

    ASSERT_SUCCESS_GPU(queue.Create(node), node);

    dispatch.Submit(queue);
    dispatch.Sync();

    EXPECT_SUCCESS_GPU(queue.Destroy(), node);
}

void EmptyDispatch(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDQMTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(NoopIsa, isaBuffer.As<char*>()), gpuNode);

    pKFDQMTest->SyncDispatch(isaBuffer, NULL, NULL, gpuNode);

}

TEST_F(KFDQMTest, EmptyDispatch) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(EmptyDispatch));

    TEST_END
}

void SimpleWriteDispatch(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDQMTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer srcBuffer(PAGE_SIZE, gpuNode, false);
    HsaMemoryBuffer destBuffer(PAGE_SIZE, gpuNode);

    srcBuffer.Fill(0x01010101);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()),gpuNode);

    pKFDQMTest->SyncDispatch(isaBuffer, srcBuffer.As<void*>(), destBuffer.As<void*>(), gpuNode);

    EXPECT_EQ(destBuffer.As<unsigned int*>()[0], 0x01010101);

}

TEST_F(KFDQMTest, SimpleWriteDispatch) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SimpleWriteDispatch));

    TEST_END
}

static void MultipleCpQueuesStressDispatch(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDQMTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    static const unsigned int MAX_CP_QUEUES = 16;

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer srcBuffer(PAGE_SIZE, gpuNode, false);
    HsaMemoryBuffer destBuffer(PAGE_SIZE, gpuNode);

    unsigned int* src = srcBuffer.As<unsigned int*>();
    unsigned int* dst = destBuffer.As<unsigned int*>();

    static const HSAuint64 TEST_TIME_SEC = 15;
    HSAuint64 initialTime, curTime;
    unsigned int numIter = 0;
    HSAuint64 timePassed = 0;

    unsigned int i;
    PM4Queue queues[MAX_CP_QUEUES];
    Dispatch* dispatch[MAX_CP_QUEUES];

    destBuffer.Fill(0xFF);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    for (i = 0; i < MAX_CP_QUEUES; ++i)
        ASSERT_SUCCESS_GPU(queues[i].Create(gpuNode), gpuNode) << " QueueId=" << i;

    initialTime = GetSystemTickCountInMicroSec();

    do {
        for (i = 0; i < MAX_CP_QUEUES; ++i) {
            dispatch[i] = new Dispatch(isaBuffer);
            src[i] = numIter;
            dst[i] = 0xff;
            dispatch[i]->SetArgs(&src[i], &dst[i]);
            dispatch[i]->SetDim(1, 1, 1);
            dispatch[i]->Submit(queues[i]);
        }
        for (i = 0; i < MAX_CP_QUEUES; ++i) {
            dispatch[i]->Sync();
            EXPECT_EQ_GPU(dst[i], src[i], gpuNode);
            delete dispatch[i];
        }
        ++numIter;
        curTime = GetSystemTickCountInMicroSec();
        timePassed = (curTime - initialTime) / 1000000;
    } while (timePassed < TEST_TIME_SEC);

    LOG() << "Total iterated : " << std::dec << numIter << std::endl;

    for (i = 0; i < MAX_CP_QUEUES; ++i)
       EXPECT_SUCCESS_GPU(queues[i].Destroy(), gpuNode);


}

TEST_F(KFDQMTest, MultipleCpQueuesStressDispatch) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(MultipleCpQueuesStressDispatch));

    TEST_END
}

static void CpuWriteCoherence(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    PM4Queue queue;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    /* The queue might be full and we fail to submit. There is always one word space unused in queue.
     * So let rptr one step ahead then we continually submit packet.
     */
    queue.PlaceAndSubmitPacket(PM4NopPacket());
    queue.Wait4PacketConsumption();
    EXPECT_EQ(1, queue.Rptr());

    do {
        queue.PlaceAndSubmitPacket(PM4NopPacket());
    } while (queue.Wptr() != 0);

    queue.Wait4PacketConsumption();

    EXPECT_EQ_GPU(0, queue.Rptr(), gpuNode);

    /* Now that the GPU has cached the PQ contents, we modify them in CPU cache and
     * ensure that the GPU sees the updated value:
     */
    queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0x42, 0x42));

    queue.Wait4PacketConsumption(event);

    WaitOnValue(destBuf.As<unsigned int*>(), 0x42);

    hsaKmtDestroyEvent(event);
}

TEST_F(KFDQMTest, CpuWriteCoherence) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CpuWriteCoherence));

    TEST_END
}

static void CreateAqlCpQueue(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    AqlQueue queue;

    HsaMemoryBuffer pointers(PAGE_SIZE, gpuNode, /*zero*/true, /*local*/false, /*exec*/false, /*isScratch */false, /* isReadOnly */false, /* isUncached */false, /* NonPaged */g_baseTest->NeedNonPagedWptr(gpuNode));

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode, PAGE_SIZE, pointers.As<HSAuint64 *>()), gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDQMTest, CreateAqlCpQueue) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(CreateAqlCpQueue));

    TEST_END
}

static void QueueLatency(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    PM4Queue queue;
    const int queueSize = PAGE_SIZE * 2;
    const int packetSize = PM4ReleaseMemoryPacket(m_FamilyId, 0, 0, 0, 0, 0).SizeInBytes();
    /* We always leave one NOP(dword) empty after packet which is required by ring itself.
     * We also place NOPs when queue wraparound to avoid crossing buffer end. See PlacePacket().
     * So the worst case is that we need two packetSize space to place one packet.
     * Like below, N=NOP,E=Empty,P=Packet.
     * |E|E|E|E|E|E|E|rptr...wptr|E|E|E|E|E| ---> |P|P|P|P|P|P|E|rptr...wptr|N|N|N|N|N|
     * So to respect that, we reserve packetSize space for these additional NOPs.
     * Also we reserve the remainder of the division by packetSize explicitly.
     * Reserve another packetSize for event-based wait which uses a releseMemory packet.
     */
    const int reservedSpace = packetSize + queueSize % packetSize + packetSize;
    const int slots = (queueSize - reservedSpace) / packetSize;
    HSAint64 queue_latency_avg = 0, queue_latency_min, queue_latency_max, queue_latency_med;
    HSAint64 overhead, workload;
    HSAint64 *queue_latency_arr = reinterpret_cast<HSAint64*>(calloc(slots, sizeof(HSAint64)));
    const int skip = 2;
    const char *fs[skip] = {"1st", "2nd"};
    HsaClockCounters *ts;
    HSAuint64 *qts;
    int i = 0;

    ASSERT_NE_GPU((HSAuint64)queue_latency_arr, 0, gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode, queueSize), gpuNode);

    LOG() << std::dec << "Queue Submit NanoSeconds (" << slots << " Packets)" << std::endl;

    HsaMemoryBuffer buf(ALIGN_UP(slots * sizeof(HsaClockCounters), PAGE_SIZE), 0);
    ts = buf.As<HsaClockCounters*>();

    HsaMemoryBuffer qbuf(ALIGN_UP(slots * sizeof(HSAuint64), PAGE_SIZE), 0);
    qts = qbuf.As<HSAuint64*>();

    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    /* GpuCounter overhead*/
    do {
        hsaKmtGetClockCounters(gpuNode, &ts[i]);
    } while (++i < slots);
    overhead = ts[slots-1].GPUClockCounter - ts[0].GPUClockCounter;
    overhead /= 2 * (slots - 1);

    /* Submit packets serially*/
    i = 0;
    do {
        queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, true,
                    (HSAuint64)&qts[i],
                    0,
                    true,
                    1));
        hsaKmtGetClockCounters(gpuNode, &ts[i]);
        queue.SubmitPacket();
        queue.Wait4PacketConsumption(event);
    } while (++i < slots);

    /* Calculate timing which includes workload and overhead*/
    i = 0;
    do {
        HSAint64 queue_latency = qts[i] - ts[i].GPUClockCounter;

        EXPECT_GE_GPU(queue_latency, 0, gpuNode);

        queue_latency_arr[i] = queue_latency;
        if (i >= skip)
            queue_latency_avg += queue_latency;
    } while (++i < slots);
    /* Calculate avg from packet[skip, slots-1] */
    queue_latency_avg /= (slots - skip);

    /* Workload of queue packet itself */
    i = 0;
    do {
        queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, true,
                    (HSAuint64)&qts[i],
                    0,
                    true,
                    1));
    } while (++i < slots);
    queue.SubmitPacket();
    queue.Wait4PacketConsumption(event);

    hsaKmtDestroyEvent(event);
    /* qts[i] records the timestamp of the end of packet[i] which is
     * approximate that of the beginging of packet[i+1].
     * The workload total is [0, skip], [skip+1, slots-1].
     * And We ignore [0, skip], that means we ignore (skip+1) packets.
     */
    workload = qts[slots - 1] - qts[skip];
    workload /= (slots - 1 - skip);

    EXPECT_GE_GPU(workload, 0, gpuNode);

    i = 0;
    do {
        /* The queue_latency is not that correct as the workload and overhead are average*/
        queue_latency_arr[i] -= workload + overhead;
        /* The First submit takes an HSAint64 time*/
        if (i < skip)
            LOG() << "Queue Latency " << fs[i] << ": \t" << CounterToNanoSec(queue_latency_arr[i]) << std::endl;
    } while (++i < slots);

    std::sort(queue_latency_arr + skip, queue_latency_arr + slots);

    queue_latency_min = queue_latency_arr[skip];
    queue_latency_med = queue_latency_arr[(slots+skip)/2];
    queue_latency_max = queue_latency_arr[slots-1];

    LOG() << "Queue Latency Avg:     \t" << CounterToNanoSec(queue_latency_avg) << std::endl;
    LOG() << "Queue Latency Min:     \t" << CounterToNanoSec(queue_latency_min) << std::endl;
    LOG() << "Queue Latency Median:  \t" << CounterToNanoSec(queue_latency_med) << std::endl;
    LOG() << "Queue Latency Max:     \t" << CounterToNanoSec(queue_latency_max) << std::endl;
    LOG() << "Queue Packet Workload: \t" << CounterToNanoSec(workload) << std::endl;
    LOG() << "Get GpuCounter Overhead: \t" << CounterToNanoSec(overhead) << std::endl;

    RECORD(CounterToNanoSec(queue_latency_avg)) << "Queue-Latency-Avg";
    RECORD(CounterToNanoSec(queue_latency_min)) << "Queue-Latency-Min";
    RECORD(CounterToNanoSec(queue_latency_med)) << "Queue-Latency-Med";
    RECORD(CounterToNanoSec(queue_latency_max)) << "Queue-Latency-Max";
    RECORD(CounterToNanoSec(workload)) << "Queue-Packet-Workload";
    RECORD(CounterToNanoSec(overhead)) << "GpuCounter-Overhead";

}

TEST_F(KFDQMTest, QueueLatency) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(QueueLatency));

    TEST_END
}

static void CpQueueWraparound(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    PM4Queue queue;

    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    HsaEvent *event;
    ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event), gpuNode);

    for (unsigned int pktIdx = 0; pktIdx <= PAGE_SIZE/sizeof(PM4WRITE_DATA_CI); ++pktIdx) {
        queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), pktIdx, pktIdx));
        queue.Wait4PacketConsumption(event);
        WaitOnValue(destBuf.As<unsigned int*>(), pktIdx);
    }

    for (unsigned int pktIdx = 0; pktIdx <= PAGE_SIZE/sizeof(PM4WRITE_DATA_CI); ++pktIdx) {
        queue.PlaceAndSubmitPacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), pktIdx, pktIdx));
        queue.Wait4PacketConsumption(event);
        WaitOnValue(destBuf.As<unsigned int*>(), pktIdx);
    }

    hsaKmtDestroyEvent(event);
    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

}

TEST_F(KFDQMTest, CpQueueWraparound) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(CpQueueWraparound));

    TEST_END
}

static void SdmaQueueWraparound(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    int bufSize = PAGE_SIZE;

    SDMAQueue queue;

    HsaMemoryBuffer destBuf(bufSize << 1, gpuNode, false);
    HsaMemoryBuffer srcBuf(bufSize, gpuNode, false);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    for (unsigned int pktIdx = 0;  pktIdx <= queue.Size()/sizeof(SDMA_PKT_COPY_LINEAR); ++pktIdx) {
        destBuf.Fill(0x0);
        srcBuf.Fill(pktIdx);
        queue.PlaceAndSubmitPacket(
                SDMACopyDataPacket(queue.GetFamilyId(), destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize));
        queue.PlaceAndSubmitPacket(
                SDMAWriteDataPacket(queue.GetFamilyId(), destBuf.As<unsigned int*>() + bufSize/4, 0x02020202));
        queue.Wait4PacketConsumption();

        EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int*>() + bufSize/4, 0x02020202), gpuNode);

        EXPECT_SUCCESS_GPU(memcmp(
                destBuf.As<unsigned int*>(), srcBuf.As<unsigned int*>(), bufSize), gpuNode);
    }

    for (unsigned int pktIdx = 0; pktIdx <= queue.Size()/sizeof(SDMA_PKT_WRITE_UNTILED); ++pktIdx) {
        queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), destBuf.As<unsigned int*>(), pktIdx));
        queue.Wait4PacketConsumption();
        WaitOnValue(destBuf.As<unsigned int*>(), pktIdx);
    }

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);
}

TEST_F(KFDQMTest, SdmaQueueWraparound) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(SdmaQueueWraparound));

    TEST_END
}

struct AtomicIncThreadParams {
    HSAint64* pDest;
    volatile unsigned int count;
    volatile bool loop;
};

unsigned int AtomicIncThread(void* pCtx) {
    AtomicIncThreadParams* pArgs = reinterpret_cast<AtomicIncThreadParams*>(pCtx);

    while (pArgs->loop) {
        AtomicInc(pArgs->pDest);
        ++pArgs->count;
    }

    LOG() << "CPU atomic increments finished" << std::endl;

    return 0;
}

static void Atomics(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    Assembler* m_pAsm;
    m_pAsm = pKFDQMTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    if (!hasPciAtomicsSupport(gpuNode)) {
        LOG() << "Skipping test: Node doesn't support Atomics." << std::endl;
        return;
    }

    HsaMemoryBuffer isaBuf(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode);

    PM4Queue queue;

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(AtomicIncIsa, isaBuf.As<char*>()), gpuNode);

    Dispatch dispatch(isaBuf);
    dispatch.SetArgs(destBuf.As<void*>(), NULL);
    dispatch.SetDim(1024, 1, 1);

    hsaKmtSetMemoryPolicy(gpuNode, HSA_CACHING_CACHED, HSA_CACHING_CACHED, NULL, 0);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    AtomicIncThreadParams params;
    params.pDest = destBuf.As<HSAint64*>();
    params.loop = true;
    params.count = 0;

    uint64_t threadId;

    ASSERT_EQ_GPU(true, StartThread(&AtomicIncThread, &params, threadId), gpuNode);

    LOG() << "Waiting for CPU to atomic increment 1000 times" << std::endl;

    while (params.count < 1000)
        {}

    LOG() << "Submitting the GPU atomic increment shader" << std::endl;

    dispatch.Submit(queue);
    dispatch.Sync();

    params.loop = false;

    WaitForThread(threadId);

    EXPECT_EQ_GPU(destBuf.As<unsigned int*>()[0], 1024 + params.count, gpuNode);

    LOG() << "GPU increments: 1024, CPU increments: " << std::dec
            << params.count << std::endl;

    queue.Destroy();
}

TEST_F(KFDQMTest, Atomics) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(Atomics));

    TEST_END
}

TEST_F(KFDQMTest, mGPUShareBO) {
    TEST_START(TESTPROFILE_RUNALL);

    unsigned int src_node = 2;
    unsigned int dst_node = 1;

    if (g_TestDstNodeId != -1 && g_TestNodeId != -1) {
        src_node = g_TestNodeId;
        dst_node = g_TestDstNodeId;
    }

    HsaMemoryBuffer shared_addr(PAGE_SIZE, dst_node, true, false, false, false);

    HsaMemoryBuffer srcNodeMem(PAGE_SIZE, src_node);
    HsaMemoryBuffer dstNodeMem(PAGE_SIZE, dst_node);

    /* Handle ISA to write to local memory BO */
    HsaMemoryBuffer isaBufferSrc(PAGE_SIZE, src_node, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer isaBufferDst(PAGE_SIZE, dst_node, true/*zero*/, false/*local*/, true/*exec*/);

    srcNodeMem.Fill(0x05050505);

    ASSERT_SUCCESS(m_pAsm->RunAssemble(CopyDwordIsa));

    m_pAsm->CopyInstrStream(isaBufferSrc.As<char*>());
    SyncDispatch(isaBufferSrc, srcNodeMem.As<void*>(), shared_addr.As<void *>(), src_node);

    m_pAsm->CopyInstrStream(isaBufferDst.As<char*>());
    SyncDispatch(isaBufferDst, shared_addr.As<void *>(), dstNodeMem.As<void*>(), dst_node);

    EXPECT_EQ(dstNodeMem.As<unsigned int*>()[0], 0x05050505);

    EXPECT_SUCCESS(shared_addr.UnmapMemToNodes(&dst_node, 1));

    TEST_END
}

static void
sdma_copy(HSAuint32 node, void *src, void *const dst[], int n, HSAuint64 size) {
    SDMAQueue sdmaQueue;
    HsaEvent *event;
    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, node, &event));
    ASSERT_SUCCESS(sdmaQueue.Create(node));
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), dst, src, n, size));
    sdmaQueue.Wait4PacketConsumption(event);
    EXPECT_SUCCESS(sdmaQueue.Destroy());
    hsaKmtDestroyEvent(event);
}

static void
sdma_fill(HSAint32 node, void *dst, unsigned int data, HSAuint64 size) {
    SDMAQueue sdmaQueue;
    HsaEvent *event;
    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, node, &event));
    ASSERT_SUCCESS(sdmaQueue.Create(node));
    sdmaQueue.PlaceAndSubmitPacket(SDMAFillDataPacket(sdmaQueue.GetFamilyId(), dst, data, size));
    sdmaQueue.Wait4PacketConsumption(event);
    EXPECT_SUCCESS(sdmaQueue.Destroy());
    hsaKmtDestroyEvent(event);
}

TEST_F(KFDQMTest, P2PTest) {
    TEST_START(TESTPROFILE_RUNALL);
    if (!hsakmt_is_dgpu()) {
        LOG() << "Skipping test: Two GPUs are required, but no dGPUs are present." << std::endl;
        return;
    }

    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: At least two GPUs are required." << std::endl;
        return;
    }
    std::vector<int> nodes;

    /* This test simulates RT team's P2P part in IPCtest:
     *
     * +------------------------------------------------+
     * |         gpu1           gpu2           gpuX     |
     * |gpu1 mem ----> gpu2 mem ----> gpuX mem          |
     * |        \               \               \  mGPUShareBO     |
     * |         \               \               \      |
     * |    system buffer   system buffer  system buffer|
     * +------------------------------------------------+
     *
     * Copy data from current GPU memory to next GPU memory and system memory
     * Using current GPU, aka p2p push.
     * Verify the system buffer has the expected content after each push.
     */

    /* Users can use "--node=gpu1 --dst_node=gpu2" to specify devices */
    if (g_TestDstNodeId != -1 && g_TestNodeId != -1) {
        nodes.push_back(g_TestNodeId);
        nodes.push_back(g_TestDstNodeId);

        if (!m_NodeInfo.IsPeerAccessibleByNode(g_TestNodeId, g_TestDstNodeId)) {
            LOG() << "Skipping test: Dst GPU specified is not peer-accessible." << std::endl;
            return;
        }
        if (nodes[0] == nodes[1]) {
            LOG() << "Skipping test: Different GPUs must be specified (2 GPUs required)." << std::endl;
            return;
        }
    } else {
        nodes = m_NodeInfo.GetNodesWithGPU();
        if (nodes.size() < 2) {
            LOG() << "Skipping test: Test requires at least one large bar GPU." << std::endl;
            LOG() << "               or two GPUs are XGMI connected." << std::endl;
            return;
        }
    }

    HSAuint32 *sysBuf;
    HSAuint32 size = 16ULL<<20;  // bigger than 16MB to test non-contiguous memory
    HsaMemFlags memFlags = {0};
    HsaMemMapFlags mapFlags = {0};
    memFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    memFlags.ui32.HostAccess = 0;
    memFlags.ui32.NonPaged = 1;
    memFlags.ui32.NoNUMABind = 1;
    unsigned int end = size / sizeof(HSAuint32) - 1;

    /* 1. Allocate a system buffer and allow the access to GPUs */
    EXPECT_SUCCESS(hsaKmtAllocMemory(0, size, m_MemoryFlags,
                                     reinterpret_cast<void **>(&sysBuf)));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sysBuf, size, NULL,
                                             mapFlags, nodes.size(), (HSAuint32 *)&nodes[0]));
#define MAGIC_NUM 0xdeadbeaf

    /* First GPU fills mem with MAGIC_NUM */
    void *src, *dst;
    HSAuint32 cur = nodes[0], next;
    ASSERT_SUCCESS(hsaKmtAllocMemory(cur, size, memFlags, reinterpret_cast<void**>(&src)));
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(src, size, NULL));
    sdma_fill(cur, src, MAGIC_NUM, size);

    for (unsigned i = 1; i <= nodes.size(); i++) {
        int n;
        memset(sysBuf, 0, size);

        /* Last GPU just copy mem to sysBuf*/
        if (i == nodes.size()) {
               n = 1;
               next = 0;/*system memory node*/
               dst = 0;
        } else {
            n = 2;
            next = nodes[i];

            /* check if cur access next node */
            if (!m_NodeInfo.IsPeerAccessibleByNode(next, cur))
                continue;

            ASSERT_SUCCESS(hsaKmtAllocMemory(next, size, memFlags, reinterpret_cast<void**>(&dst)));
            ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(dst, size, NULL));
        }

        LOG() << "Test " << cur << " -> " << next << std::endl;
        /* Copy to sysBuf and next GPU*/
        void *dst_array[] = {sysBuf, dst};
        sdma_copy(cur, src, dst_array, n, size);

        /* Verify the data*/
        EXPECT_EQ(sysBuf[0], MAGIC_NUM);
        EXPECT_EQ(sysBuf[end], MAGIC_NUM);

        LOG() << "PASS " << cur << " -> " << next << std::endl;

        EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(src));
        EXPECT_SUCCESS(hsaKmtFreeMemory(src, size));

        cur = next;
        src = dst;
    }

    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(sysBuf));
    EXPECT_SUCCESS(hsaKmtFreeMemory(sysBuf, size));

    TEST_END
}

static void PM4EventInterrupt(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    const HSAuint64 bufSize = PAGE_SIZE;
    const int packetCount = bufSize / sizeof(unsigned int);
    const int totalPacketSize = packetCount * PM4WriteDataPacket(0, 0).SizeInBytes() +
                                                PM4ReleaseMemoryPacket(m_FamilyId, 0, 0, 0).SizeInBytes();
    const int queueSize = RoundToPowerOf2(totalPacketSize);

    /* Reduce number of iteration if running with emulator. */
    const int numIter = (g_IsEmuMode ? 32 : 1024);

    /* 4 PM4 queues will be running at same time.*/
    const int numPM4Queue = 4;
    HsaEvent *event[numPM4Queue];
    PM4Queue queue[numPM4Queue];
    HsaMemoryBuffer *destBuf[numPM4Queue];
    unsigned int *buf[numPM4Queue];

    for (int i = 0; i < numPM4Queue; i++) {
        destBuf[i] = new HsaMemoryBuffer(bufSize, gpuNode, true, false); // System memory
        buf[i] = destBuf[i]->As<unsigned int *>();
    }

    /* A simple loop here to give more pressure.*/
    for (int test_count = 0; test_count < numIter; test_count++) {
        for (int i = 0; i < numPM4Queue; i++) {
            ASSERT_SUCCESS_GPU(queue[i].Create(gpuNode, queueSize), gpuNode);
            ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event[i]), gpuNode);

            /* Let CP have some workload first.*/
            for(int index = 0; index < packetCount; index++)
                queue[i].PlacePacket(PM4WriteDataPacket(buf[i] + index, 0xdeadbeaf));

            /* releaseMemory packet makes sure all previous written data is visible.*/
            queue[i].PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, 0,
                        reinterpret_cast<HSAuint64>(event[i]->EventData.HWData2),
                        event[i]->EventId,
                        true));
        }

        for (int i = 0; i < numPM4Queue; i++)
            queue[i].SubmitPacket();

        for (int i = 0; i < numPM4Queue; i++) {
            EXPECT_SUCCESS_GPU(hsaKmtWaitOnEvent(event[i], g_TestTimeOut), gpuNode);
            EXPECT_EQ_GPU(buf[i][0], 0xdeadbeaf, gpuNode);
            EXPECT_EQ_GPU(buf[i][packetCount - 1], 0xdeadbeaf, gpuNode);
            memset(buf[i], 0, bufSize);
        }

        for (int i = 0; i < numPM4Queue; i++) {
            EXPECT_SUCCESS_GPU(queue[i].Destroy(), gpuNode);
            EXPECT_SUCCESS_GPU(hsaKmtDestroyEvent(event[i]), gpuNode);
        }
    }

    for (int i = 0; i < numPM4Queue; i++)
        delete destBuf[i];
}

TEST_F(KFDQMTest, PM4EventInterrupt) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(PM4EventInterrupt));

    TEST_END
}

#include "KFDTestUtilQueue.hpp"
static void SdmaEventInterrupt(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;

    const HSAuint64 bufSize = 4 << 20;
    HsaMemoryBuffer srcBuf(bufSize, 0); // System memory.

    HSAuint64 *src = srcBuf.As<HSAuint64*>();
    TimeStamp *tsbuf = srcBuf.As<TimeStamp*>();
    tsbuf = reinterpret_cast<TimeStamp *>ALIGN_UP(tsbuf, sizeof(TimeStamp));

    /* Have 3 queues created for test.*/
    const int numSDMAQueue = 3;
    HsaEvent *event[numSDMAQueue];
    SDMAQueue queue[numSDMAQueue];
    HsaMemoryBuffer *destBuf[numSDMAQueue];
    HSAuint64 *dst[numSDMAQueue];

    for (int i = 0; i < numSDMAQueue; i++) {
        destBuf[i] = new HsaMemoryBuffer(bufSize, gpuNode, true, false); // System memory
        dst[i] = destBuf[i]->As<HSAuint64*>();
    }

    /* Test 1 queue, 2 queues, 3 queues running at same time one by one.*/
    for (int testSDMAQueue = 1; testSDMAQueue <= numSDMAQueue; testSDMAQueue++)
        /* A simple loop here to give more pressure.*/
        for (int test_count = 0; test_count < 2048; test_count++) {
            for (int i = 0; i < testSDMAQueue; i++) {
                TimeStamp *ts = tsbuf + i * 32;
                ASSERT_SUCCESS_GPU(queue[i].Create(gpuNode), gpuNode);
                /* FIXME
                 * We create event every time along with queue.
                 * However that will significantly enhance the failure of sdma event timeout.
                 */
                ASSERT_SUCCESS_GPU(CreateQueueTypeEvent(false, false, gpuNode, &event[i]), gpuNode);

                /* Get the timestamp directly. The first member of HsaClockCounters and TimeStamp is GPU clock counter.*/
                hsaKmtGetClockCounters(gpuNode, reinterpret_cast<HsaClockCounters*>(&ts[0]));
                /* Let sDMA have some workload first.*/
                queue[i].PlacePacket(SDMATimePacket(&ts[1]));
                queue[i].PlacePacket(
                        SDMACopyDataPacket(queue[i].GetFamilyId(), dst[i], src, bufSize));
                queue[i].PlacePacket(SDMATimePacket(&ts[2]));
                queue[i].PlacePacket(
                        SDMAFencePacket(queue[i].GetFamilyId(),
                                reinterpret_cast<void*>(event[i]->EventData.HWData2), event[i]->EventId));
                queue[i].PlacePacket(SDMATimePacket(&ts[3]));
                queue[i].PlacePacket(SDMATrapPacket(event[i]->EventId));
                queue[i].PlacePacket(SDMATimePacket(&ts[4]));

                /* Will verify the value of srcBuf and destBuf later. Give it a different value each time.*/
                src[0] = ts[0].timestamp;
            }

            for (int i = 0; i < testSDMAQueue; i++)
                queue[i].SubmitPacket();

            for (int i = 0; i < testSDMAQueue; i++) {
                TimeStamp *ts = tsbuf + i * 32;
                HSAKMT_STATUS ret = hsaKmtWaitOnEvent(event[i], g_TestTimeOut);

                if (dst[i][0] != src[0])
                    WARN() << "SDMACopyData FAIL! " << std::dec
                        << dst[i][0] << " VS " << src[0] << std::endl;

                if (ret == HSAKMT_STATUS_SUCCESS) {
                    for (int i = 1; i <= 4; i++)
                        /* Is queue latency too big? The workload is really small.*/
                        if (CounterToNanoSec(ts[i].timestamp - ts[i - 1].timestamp) > 1000000000)
                            WARN() << "SDMA queue latency is bigger than 1s!" << std::endl;
                } else {
                    WARN() << "Event On Queue " << testSDMAQueue << ":" << i
                        << " Timeout, try to resubmit packets!" << std::endl;

                    queue[i].SubmitPacket();

                    if (hsaKmtWaitOnEvent(event[i], g_TestTimeOut) == HSAKMT_STATUS_SUCCESS)
                        WARN() << "The timeout event is signaled!" << std::endl;
                    else
                        WARN() << "The timeout event is lost after resubmit!" << std::endl;

                    LOG() << "Time Consumption (ns)" << std::endl;
                    for (int i = 1; i <= 4; i++)
                        LOG() << std::dec << i << ": "
                            << CounterToNanoSec(ts[i].timestamp - ts[i - 1].timestamp) << std::endl;
                }

                EXPECT_SUCCESS_GPU(ret, gpuNode);
            }

            for (int i = 0; i < testSDMAQueue; i++) {
                EXPECT_SUCCESS_GPU(queue[i].Destroy(), gpuNode);
                EXPECT_SUCCESS_GPU(hsaKmtDestroyEvent(event[i]), gpuNode);
            }
        }

    for (int i = 0; i < numSDMAQueue; i++)
        delete destBuf[i];

}

TEST_F(KFDQMTest, SdmaEventInterrupt) {
    TEST_START(TESTPROFILE_RUNALL)

     ASSERT_SUCCESS(KFDTest_Launch(SdmaEventInterrupt));

    TEST_END
}

#define DOORBELL_WRITE_USE_SDMA
static void GPUDoorbellWrite(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDQMTest* pKFDQMTest = (KFDQMTest*)pTestParamters->pTestObject;
    HSAuint32 m_FamilyId = pKFDQMTest->GetFamilyIdFromNodeId(gpuNode);

    HsaMemoryBuffer destBuf(PAGE_SIZE, 0, true);
    PM4Queue pm4Queue;
#ifdef DOORBELL_WRITE_USE_SDMA
    SDMAQueue otherQueue;
#else
    PM4Queue otherQueue;
#endif

    ASSERT_SUCCESS_GPU(pm4Queue.Create(gpuNode), gpuNode);
    ASSERT_SUCCESS_GPU(otherQueue.Create(gpuNode), gpuNode);

    /* Place PM4 packet in the queue, but don't submit it */
    pm4Queue.PlacePacket(PM4WriteDataPacket(destBuf.As<unsigned int*>(), 0x12345678, 0x87654321));

    HsaQueueResource *qRes = pm4Queue.GetResource();

    if (m_FamilyId < FAMILY_AI) {
        unsigned int pendingWptr = pm4Queue.GetPendingWptr();

#ifdef DOORBELL_WRITE_USE_SDMA
        /* Write the wptr and doorbell update using the GPU's SDMA
         * engine. This should submit the PM4 packet on the first
         * queue.
         */
        otherQueue.PlacePacket(SDMAWriteDataPacket(otherQueue.GetFamilyId(), qRes->Queue_write_ptr,
                                                   pendingWptr));
        otherQueue.PlacePacket(SDMAWriteDataPacket(otherQueue.GetFamilyId(), qRes->Queue_DoorBell,
                                                   pendingWptr));
#else
        /* Write the wptr and doorbell update using WRITE_DATA packets
         * on a second PM4 queue. This should submit the PM4 packet on
         * the first queue.
         */
        otherQueue.PlacePacket(
            PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_write_ptr,
                                   pendingWptr, false));
        otherQueue.PlacePacket(
            PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_DoorBell,
                                   pendingWptr, false));
#endif

        otherQueue.SubmitPacket();
    } else {
        HSAuint64 pendingWptr64 = pm4Queue.GetPendingWptr64();

#ifdef DOORBELL_WRITE_USE_SDMA
        /* Write the wptr and doorbell update using the GPU's SDMA
         * engine. This should submit the PM4 packet on the first
         * queue.
         */
        otherQueue.PlacePacket(SDMAWriteDataPacket(otherQueue.GetFamilyId(), qRes->Queue_write_ptr,
                                                   2, &pendingWptr64));
        otherQueue.PlacePacket(SDMAWriteDataPacket(otherQueue.GetFamilyId(), qRes->Queue_DoorBell,
                                                   2, &pendingWptr64));
#else
        /* Write the 64-bit wptr and doorbell update using RELEASE_MEM
         * packets without IRQs on a second PM4 queue. RELEASE_MEM
         * should perform one atomic 64-bit access. This should submit
         * the PM4 packet on the first queue.
         */
        otherQueue.PlacePacket(
            PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_write_ptr,
                                   pendingWptr64, true));
        otherQueue.PlacePacket(
            PM4ReleaseMemoryPacket(m_FamilyId, true, (HSAuint64)qRes->Queue_DoorBell,
                                   pendingWptr64, true));
#endif

        otherQueue.SubmitPacket();
    }

    /* Check that the PM4 packet has been executed */
    EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int *>(), 0x12345678), gpuNode);
    EXPECT_TRUE_GPU(WaitOnValue(destBuf.As<unsigned int *>()+1, 0x87654321), gpuNode);

    EXPECT_SUCCESS_GPU(pm4Queue.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(otherQueue.Destroy(), gpuNode);

}

TEST_F(KFDQMTest, GPUDoorbellWrite) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(GPUDoorbellWrite));

    TEST_END
}

TEST_F(KFDQMTest, UserQueueBufValidation) {
    TEST_START(TESTPROFILE_RUNALL)

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    HsaQueueResource QueueResources;
    HsaMemoryBuffer *QueueBuf;
    HSAKMT_STATUS status;

    memset(&QueueResources, 0, sizeof(QueueResources));

    // System memory mapping on GPU
    QueueBuf = new HsaMemoryBuffer(PAGE_SIZE, defaultGPUNode);

    EXPECT_SUCCESS(hsaKmtCreateQueue(defaultGPUNode,
                               HSA_QUEUE_COMPUTE,
                               100,
                               HSA_QUEUE_PRIORITY_NORMAL,
                               QueueBuf->As<unsigned int*>(),
                               PAGE_SIZE,
                               NULL,
                               &QueueResources));
    EXPECT_SUCCESS(hsaKmtDestroyQueue(QueueResources.QueueId));

    // CP Queue creation should fail using wrong ring buffer size
    EXPECT_SUCCESS(!hsaKmtCreateQueue(defaultGPUNode,
                               HSA_QUEUE_COMPUTE,
                               100,
                               HSA_QUEUE_PRIORITY_NORMAL,
                               QueueBuf->As<unsigned int*>(),
                               PAGE_SIZE * 2,
                               NULL,
                               &QueueResources));

    // SDMA queue create should fail using wrong ring buffer size
    EXPECT_SUCCESS(!hsaKmtCreateQueue(defaultGPUNode,
                               HSA_QUEUE_SDMA,
                               100,
                               HSA_QUEUE_PRIORITY_NORMAL,
                               QueueBuf->As<unsigned int*>(),
                               PAGE_SIZE * 2,
                               NULL,
                               &QueueResources));

    // CP queue create should fail using NULL ring buffer
    EXPECT_SUCCESS(!hsaKmtCreateQueue(defaultGPUNode,
                               HSA_QUEUE_COMPUTE,
                               100,
                               HSA_QUEUE_PRIORITY_NORMAL,
                               NULL,
                               PAGE_SIZE,
                               NULL,
                               &QueueResources));

    // SDMA queue create should fail using NULL ring buffer
    EXPECT_SUCCESS(!hsaKmtCreateQueue(defaultGPUNode,
                               HSA_QUEUE_SDMA,
                               100,
                               HSA_QUEUE_PRIORITY_NORMAL,
                               NULL,
                               PAGE_SIZE,
                               NULL,
                               &QueueResources));

    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(QueueBuf->As<unsigned int*>()));
    EXPECT_SUCCESS(hsaKmtFreeMemory(QueueBuf->As<unsigned int*>(), PAGE_SIZE));

    //
    // This following negative test will evict user queues, must execute in child process,
    // because parent process is allowed to create queue to run the remaining tests.
    //
    pid_t childPid = fork();

    if (childPid == 0) { /* Child process */
        void *cwsr_addr;
        int exit_code = 1;

        TearDown();
        SetUp();

        // System memory mapping on GPU
        QueueBuf = new HsaMemoryBuffer(PAGE_SIZE, defaultGPUNode);
        memset(&QueueResources, 0, sizeof(QueueResources));

        status = hsaKmtCreateQueue(defaultGPUNode,
                               HSA_QUEUE_COMPUTE,
                               100,
                               HSA_QUEUE_PRIORITY_NORMAL,
                               QueueBuf->As<unsigned int*>(),
                               PAGE_SIZE,
                               NULL,
                               &QueueResources);
        if (status != HSAKMT_STATUS_SUCCESS) {
            LOG() << "create queue failed." << std::endl;
            goto free_exit;
        }

        // Update queue percentage 0 to set queue inactive in order to get queue info CWSR area
        status = hsaKmtUpdateQueue(QueueResources.QueueId, 0, HSA_QUEUE_PRIORITY_NORMAL,
                                     QueueBuf->As<unsigned int*>(), PAGE_SIZE, NULL);
        if (status != HSAKMT_STATUS_SUCCESS) {
            LOG() << "update queue failed." << std::endl;
            goto err_exit;
        }

        HsaQueueInfo QueueInfo;
        status = hsaKmtGetQueueInfo(QueueResources.QueueId, &QueueInfo);
        if (status != HSAKMT_STATUS_SUCCESS) {
            LOG() << "get queue info failed." << std::endl;
            goto err_exit;
        }

        // unmap CWSR buffer will evict queue before queue is destroyed
        cwsr_addr = QueueInfo.UserContextSaveArea;
        munmap(cwsr_addr, PAGE_SIZE);

        // unmap and free queue ring buffer should fail before the queue is destroyed
        status = hsaKmtFreeMemory(QueueBuf->As<unsigned int*>(), PAGE_SIZE);
        if (status == HSAKMT_STATUS_SUCCESS) {
            LOG() << "free queue buf should fail." << std::endl;
            goto err_exit;
        }

        status = hsaKmtUnmapMemoryToGPU(QueueBuf->As<unsigned int*>());
        if (status == HSAKMT_STATUS_SUCCESS) {
            LOG() << "unmap queue buf should fail." << std::endl;
            goto err_exit;
        }

        exit_code = 0;

err_exit:
        status = hsaKmtDestroyQueue(QueueResources.QueueId);
        if (status != HSAKMT_STATUS_SUCCESS) {
            LOG() << "destroy queue failed." << std::endl;
            exit_code = 1;
        }
free_exit:
        status = hsaKmtUnmapMemoryToGPU(QueueBuf->As<unsigned int*>());
        if (status != HSAKMT_STATUS_SUCCESS) {
            LOG() << "unmap queue buf failed." << std::endl;
            exit_code = 1;
        }

        status = hsaKmtFreeMemory(QueueBuf->As<unsigned int*>(), PAGE_SIZE);
        if (status != HSAKMT_STATUS_SUCCESS) {
            LOG() << "free queue buf failed." << std::endl;
            exit_code = 1;
        }

        exit(exit_code);
    } else {
        int childStatus;

        waitpid(childPid, &childStatus, 0);
        EXPECT_EQ(true, WIFEXITED(childStatus));
        EXPECT_EQ(0, WEXITSTATUS(childStatus));
    }

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDQMTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_QCM_TEST__H__
#define __KFD_QCM_TEST__H__

#include <gtest/gtest.h>

#include "PM4Queue.hpp"
#include "KFDBaseComponentTest.hpp"
#include "Dispatch.hpp"

/*
 * Used by ExtendedCuMasking test case to pass GPU configuration information to helper functions.
 */
typedef struct {
    uint32_t numDwords;
    uint32_t numBits;
    uint32_t numSEs;
    uint32_t numSAperSE;
    uint32_t numWGPperSA;
    uint32_t *pInactiveMask;
} mask_config_t;

/*
 * Used by ExtendedCuMasking test case.
 *
 * Struct is hardware-dependent and fields are layed out same way as hardware register.
 *
 */
typedef union {
    uint32_t data;
    // Fields needed from HW_ID1 (format same for GFX11 and GFX12)
    struct {
        unsigned     :10;
        unsigned wgp : 4;
        unsigned     : 2;
        unsigned  sa : 1;
        unsigned     : 1;
        unsigned  se : 3;
        unsigned     :11;
    };
} out_data_t;


class KFDQMTest : public KFDBaseComponentTest {
 public:
    KFDQMTest() {}

    ~KFDQMTest() {}

    friend void BasicCuMaskingLinear(KFDTEST_PARAMETERS* pTestParamters);
    friend void BasicCuMaskingEven(KFDTEST_PARAMETERS* pTestParamters);
    friend void EmptyDispatch(KFDTEST_PARAMETERS* pTestParamters) ;
    friend void SimpleWriteDispatch(KFDTEST_PARAMETERS* pTestParamters);

 protected:
    virtual void SetUp();
    virtual void TearDown();

    void SyncDispatch(const HsaMemoryBuffer& isaBuffer, void* pSrcBuf, void* pDstBuf, int node = -1);
    HSAint64 TimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count);
    HSAint64 GetAverageTimeConsumedwithCUMask(int node, uint32_t *mask, uint32_t mask_count, int iterations);
    friend void testQueuePriority(KFDTEST_PARAMETERS* pTestParamters, bool isSamePipe);

 protected:  // Members
    /* Acceptable performance for CU Masking should be within 5% of linearly-predicted performance */
    const double CuVariance = 0.15;
    const double CuNegVariance = 1.0 - CuVariance;
    const double CuPosVariance = 1.0 + CuVariance;
};

#endif  // __KFD_QCM_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDRASTest.cpp
================================================
/*
 * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <math.h>
#include <limits.h>

#include "hsakmt/linux/kfd_ioctl.h"
#include "KFDRASTest.hpp"
#include "PM4Queue.hpp"

#define AMDGPU_DEBUGFS_NODES "/sys/kernel/debug/dri/"
#define RAS_CONTROL "ras/ras_ctrl"
#define DRM_RENDER_NUMBER 64

void KFDRASTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    char path[256], name[128], tmp[128];
    int renderNode, minor, i;
    FILE *pDriMinor, *pDriPrimary;
    uint32_t rasFeatures = 0;
    HsaEventDescriptor eventDesc;

    m_pRasEvent = NULL;
    m_setupStatus = false;

    m_defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();

    renderNode = KFDBaseComponentTest::FindDRMRenderNode(m_defaultGPUNode);
    if (renderNode < 0) {
        LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
        throw;
    }

    amdgpu_query_info(m_RenderNodes[renderNode].device_handle,
                AMDGPU_INFO_RAS_ENABLED_FEATURES,
                sizeof(uint32_t), &rasFeatures);
    if (!(rasFeatures &
            (AMDGPU_INFO_RAS_ENABLED_SDMA |
             AMDGPU_INFO_RAS_ENABLED_UMC |
             AMDGPU_INFO_RAS_ENABLED_GFX))) {
        LOG() << "Skipping test: GPU doesn't support RAS features!" << std::endl;
        throw;
    }

    minor = renderNode + 128;

    snprintf(path, sizeof(path), "%s%d/%s", AMDGPU_DEBUGFS_NODES, minor, "name");
    pDriMinor = fopen(path, "r");
    if (!pDriMinor) {
        LOG() << "Skipping test: DRM render debugfs node requires root access!" << std::endl;
        throw;
    }

    memset(name, 0, sizeof(name));
    fread(name, sizeof(name), 1, pDriMinor);

    fclose(pDriMinor);

    for (i = 0; i < DRM_RENDER_NUMBER; i++) {
        snprintf(path, sizeof(path), "%s%d/%s", AMDGPU_DEBUGFS_NODES, i, "name");
        pDriPrimary = fopen(path, "r");
        if (!pDriPrimary)
            continue;
        memset(tmp, 0, sizeof(tmp));
        fread(tmp, sizeof(tmp), 1, pDriPrimary);
        if (!strcmp(name, tmp)) {
            fclose(pDriPrimary);
            break;
        }
        fclose(pDriPrimary);
    }

    if (i == DRM_RENDER_NUMBER) {
        LOG() << "Skipping test: Could not find the debugfs node!" << std::endl;
        throw;
    }

    snprintf(path, sizeof(path), "%s%d/%s", AMDGPU_DEBUGFS_NODES, i, RAS_CONTROL);
    m_pFile = fopen(path, "w");
    if (!m_pFile) {
        LOG() << "Skipping test: RAS error injection requires root access!" << std::endl;
        throw;
    }

    eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
    eventDesc.NodeId = m_defaultGPUNode;
    eventDesc.SyncVar.SyncVar.UserData = NULL;
    eventDesc.SyncVar.SyncVarSize = 0;

    ASSERT_SUCCESS(hsaKmtCreateEvent(&eventDesc, true, false, &m_pRasEvent));

    m_setupStatus = true;

    ROUTINE_END
}

void KFDRASTest::TearDown() {
    ROUTINE_START

    if (m_pRasEvent != NULL) {
        EXPECT_SUCCESS(hsaKmtDestroyEvent(m_pRasEvent));
    }

    fclose(m_pFile);

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

TEST_F(KFDRASTest, DISABLED_BasicTest) {
    TEST_START(TESTPROFILE_RUNALL);

    if (!m_setupStatus) {
        return;
    }

    // write an uncorrectable error injection at address 0 as value 0
    fwrite("inject umc ue 0 0", sizeof(char), 17, m_pFile);
    fflush(m_pFile);

    EXPECT_SUCCESS(hsaKmtWaitOnEvent(m_pRasEvent, g_TestTimeOut));

    EXPECT_EQ(1, m_pRasEvent->EventData.EventData.MemoryAccessFault.Failure.ErrorType);

    TEST_END;
}

TEST_F(KFDRASTest, DISABLED_MixEventsTest) {
    TEST_START(TESTPROFILE_RUNALL);

    if (!m_setupStatus) {
        return;
    }

    PM4Queue queue;
    HsaEvent* pHsaEvent;

    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_defaultGPUNode, &pHsaEvent));
    ASSERT_NE(0, pHsaEvent->EventData.HWData2);

    ASSERT_SUCCESS(queue.Create(m_defaultGPUNode));

    queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
            pHsaEvent->EventData.HWData2, pHsaEvent->EventId));

    queue.Wait4PacketConsumption();

    EXPECT_SUCCESS(hsaKmtWaitOnEvent(pHsaEvent, g_TestTimeOut));

    fwrite("inject umc ue 0 0", sizeof(char), 17, m_pFile);
    fflush(m_pFile);

    EXPECT_SUCCESS(hsaKmtWaitOnEvent(m_pRasEvent, g_TestTimeOut));

    EXPECT_EQ(1, m_pRasEvent->EventData.EventData.MemoryAccessFault.Failure.ErrorType);

    EXPECT_SUCCESS(queue.Destroy());
    EXPECT_SUCCESS(hsaKmtDestroyEvent(pHsaEvent));

    TEST_END;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDRASTest.hpp
================================================
/*
 * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDBaseComponentTest.hpp"

#ifndef __KFD_RAS_TEST__H__
#define __KFD_RAS_TEST__H__

// To be removed when amdgpu_drm.h updated with those definitions
#ifndef AMDGPU_INFO_RAS_ENABLED_FEATURES
#define AMDGPU_INFO_RAS_ENABLED_FEATURES    0x20

#define AMDGPU_INFO_RAS_ENABLED_UMC         (1 << 0)
#define AMDGPU_INFO_RAS_ENABLED_SDMA        (1 << 1)
#define AMDGPU_INFO_RAS_ENABLED_GFX         (1 << 2)
#endif

class KFDRASTest :  public KFDBaseComponentTest {
 public:
    KFDRASTest(void) {}
    ~KFDRASTest(void) {}

    // @brief Executed before every test in KFDRASTest.
    virtual void SetUp();
    // @brief Executed after every test in KFDRASTest.
    virtual void TearDown();

 protected:
    static const unsigned int EVENT_TIMEOUT = 5000;  // 5 seconds
    HsaEvent* m_pRasEvent;
    HSAint32 m_defaultGPUNode;
    FILE* m_pFile;
    bool m_setupStatus;
};

#endif  // __KFD_RAS_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDSVMEvictTest.cpp
================================================
/*
 * Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDSVMEvictTest.hpp"
#include <sys/mman.h>
#include <vector>
#include <string>
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"

#define N_PROCESSES             (2)     /* number of processes running in parallel, at least 2 */
#define ALLOCATE_BUF_SIZE_MB    (64)
#define ALLOCATE_RETRY_TIMES    (3)
#define MAX_WAVEFRONTS          (512)

void KFDSVMEvictTest::SetUp() {
    ROUTINE_START

    KFDLocalMemoryTest::SetUp();

    SVMSetXNACKMode(GetParam());

    ROUTINE_END
}

void KFDSVMEvictTest::TearDown() {
    ROUTINE_START

    SVMRestoreXNACKMode();

    KFDLocalMemoryTest::TearDown();

    ROUTINE_END
}

HSAint32 KFDSVMEvictTest::GetBufferCounter(HSAuint64 vramSize, HSAuint64 vramBufSize) {
    HSAuint64 vramBufSizeInPages = vramBufSize >> PAGE_SHIFT;
    HSAuint64 sysMemSize = GetSysMemSize();
    HSAuint64 size, sizeInPages;
    HSAuint32 count;

    LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;

    /* use one third of total system memory for eviction buffer to test
     * limit max allocate size to double of vramSize
     * count is zero if not enough memory for XNACK off case
     */
    size = MIN(sysMemSize / 3, vramSize / 2);
    size += vramSize;

    /* Check if there is enough system memory to pass test for XNACK off
     * KFD system memory limit is 15/16.
     */
    HSAint32 xnack_enable = 0;
    EXPECT_SUCCESS(hsaKmtGetXNACKMode(&xnack_enable));
    if (!xnack_enable && size > (sysMemSize - (sysMemSize >> 4)))
        return 0;

    sizeInPages = size >> PAGE_SHIFT;
    count = sizeInPages / (vramBufSizeInPages * N_PROCESSES);

    return count;
}

HSAint64 KFDSVMEvictTest::GetBufferSize(HSAuint64 vramSize, HSAuint32 count,
                                        HSAint32 xnack_enable) {
    HSAuint64 sysMemSize = GetSysMemSize();
    HSAuint64 size, sizeInPages;
    HSAuint64 vramBufSizeInPages;

    LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;

    /* use up to one third of total system memory for eviction buffer to test
     * limit max eviction size to 1/2 of vramSize.
     */
    size = MIN(sysMemSize / 3, vramSize / 2);
    size += vramSize;

    /* Check if there is enough system memory to pass test for XNACK off
     * KFD system memory limit is 15/16.
     */
    if (!xnack_enable && size > (sysMemSize - (sysMemSize >> 4)))
        return 0;

    sizeInPages = size >> PAGE_SHIFT;
    vramBufSizeInPages = sizeInPages / (count * N_PROCESSES);

    return vramBufSizeInPages << PAGE_SHIFT;
}

void KFDSVMEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize,
        std::vector<void *> &pBuffers, HSAuint32 Granularity) {
    HSAuint64   totalMB;

    totalMB = N_PROCESSES * count * (vramBufSize >> 20);
    if (m_IsParent) {
        LOG() << "Testing " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(="<< totalMB << ")MB" << std::endl;
    }
    HSAKMT_STATUS ret;
    HSAuint32 retry = 0;

    for (HSAuint32 i = 0; i < count; i++) {
        m_pBuf = mmap(0, vramBufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
        ASSERT_NE(MAP_FAILED, m_pBuf);

        m_Flags = (HSA_SVM_FLAGS)0;
retry:
        ret = RegisterSVMRange(defaultGPUNode, m_pBuf, vramBufSize, defaultGPUNode, m_Flags);
        if (ret == HSAKMT_STATUS_SUCCESS) {
            pBuffers.push_back(m_pBuf);
            if (Granularity)
                EXPECT_SUCCESS(SVMRangSetGranularity(m_pBuf, vramBufSize, Granularity));
            retry = 0;
        } else {
            if (retry++ > ALLOCATE_RETRY_TIMES) {
                munmap(m_pBuf, vramBufSize);
                break;
            }
            printf("retry %d allocate vram\n", retry);

            /* wait for 1 second to try allocate again */
            sleep(1);
            goto retry;
        }
    }
}

void KFDSVMEvictTest::FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize) {
    for (HSAuint32 i = 0; i < pBuffers.size(); i++) {
        m_pBuf = pBuffers[i];
        if (m_pBuf != NULL)
            munmap(m_pBuf, vramBufSize);
    }
}

void KFDSVMEvictTest::ForkChildProcesses(int nprocesses) {
    int i;

    for (i = 0; i < nprocesses - 1; ++i) {
        pid_t pid = fork();
        ASSERT_GE(pid, 0);

        if (pid == 0) {
            /* Child process */
            /* Cleanup file descriptors copied from parent process
             * then call SetUp->hsaKmtOpenKFD to create new process
             */
            m_psName = "Test process " + std::to_string(i) + " ";
            TearDown();
            SetUp();
            m_ChildPids.clear();
            m_IsParent = false;
            return;
        }

        /* Parent process */
        m_ChildPids.push_back(pid);
    }

    m_psName = "Test process " + std::to_string(i) + " ";
}

void KFDSVMEvictTest::WaitChildProcesses() {
    if (m_IsParent) {
        /* only run by parent process */
        int childStatus;
        int childExitOkNum = 0;
        int size = m_ChildPids.size();

        for (HSAuint32 i = 0; i < size; i++) {
            pid_t pid = m_ChildPids.front();

            waitpid(pid, &childStatus, 0);
            if (WIFEXITED(childStatus) == 1 && WEXITSTATUS(childStatus) == 0)
                childExitOkNum++;

            m_ChildPids.erase(m_ChildPids.begin());
        }

        ASSERT_EQ(childExitOkNum, size);
    }

    /* child process or parent process finished successfullly */
    m_ChildStatus = HSAKMT_STATUS_SUCCESS;
}

/* Evict and restore procedure basic test
 *
 * Use N_PROCESSES processes to allocate vram buf size larger than total vram size
 *
 * ALLOCATE_BUF_SIZE_MB buf allocation size
 *
 * number of buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
 * Total vram all processes allocated: 8GB for 4GB Fiji, and 20GB for 16GB Vega10
 *
 * many times of eviction and restore will happen:
 * ttm will evict buffers of another process if not enough free vram
 * process restore will evict buffers of another process
 *
 * Sometimes the allocate may fail (maybe that is normal)
 * ALLOCATE_RETRY_TIMES max retry times to allocate
 *
 * This is basic test, no queue so vram are not used by GPU during test
 *
 * Todo:
 *    - Synchronization between the processes, so they know for sure when
 *        they are done allocating memory
 */
TEST_P(KFDSVMEvictTest, BasicTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (!SVMAPISupported())
        return;

    HSAint32 xnack_enable = 0;
    EXPECT_SUCCESS(hsaKmtGetXNACKMode(&xnack_enable));
    if (!xnack_enable) {
	    LOG() << std::hex << "Test is skipped with xnack off" << std::endl;
            return;
    }

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
    HSAuint64 vramBufSize = ALLOCATE_BUF_SIZE_MB * 1024 * 1024;

    const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();

    if (pNodeProperties->Integrated) {
        LOG() << "Skipping test on APU." << std::endl;
        return;
    }

    HSAuint64 vramSize = GetVramSize(defaultGPUNode);

    if (!vramSize) {
        LOG() << "No VRAM found, skipping the test" << std::endl;
        return;
    } else {
        LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
    }

    HSAuint32 count = GetBufferCounter(vramSize, vramBufSize);
    if (count == 0) {
        LOG() << "Not enough system memory, skipping the test" << std::endl;
        return;
    }

    /* Fork the child processes */
    ForkChildProcesses(N_PROCESSES);

    std::vector<void *> pBuffers;
    AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers, 0);

    /* wait for other processes to finish allocation, then free buffer */
    sleep(ALLOCATE_RETRY_TIMES);

    LOG() << m_psName << "free buffer" << std::endl;
    FreeBuffers(pBuffers, vramBufSize);

    WaitChildProcesses();

    TEST_END
}

/* Evict and restore queue test
 *
 * N_PROCESSES processes read all local buffers in parallel while buffers are evicted and restored
 * If GPU vm page fault happens, then test shader will stop and failed to write specific value
 * at dest buffer. Test will report failed.
 *
 * Steps:
 *    - fork N_PROCESSES processes, each process does the same below
 *    - allocate local buffers, each buffer size is 64MB
 *    - allocate zero initialized host access address buffer and result buffer
 *        address buffer to pass address of local buffers to shader
 *        result buffer to store shader output result
 *    - submit queue to run ReadMemory shader
 *    - shader start m_DimX wavefronts, each wavefront keep reading one local buffer
 *    - notify shader to quit
 *    - check result buffer with specific value to confirm all wavefronts quit normally
 */
TEST_P(KFDSVMEvictTest, QueueTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    if (!SVMAPISupported())
        return;

    HSAint32 xnack_enable = 0;
    EXPECT_SUCCESS(hsaKmtGetXNACKMode(&xnack_enable));
    if (!xnack_enable) {
	LOG() << std::hex << "Test is skipped with xnack off" << std::endl;
        return;
    }

    HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
    unsigned int count = MAX_WAVEFRONTS;

    const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();

    /* Skip test for chip it doesn't have CWSR, which the test depends on */
    if (m_FamilyId < FAMILY_VI || isTonga(pNodeProperties) || m_FamilyId >= FAMILY_NV) {
        LOG() << std::hex << "Test is skipped for family ID 0x" << m_FamilyId << std::endl;
        return;
    }

    if (pNodeProperties->Integrated) {
        LOG() << "Skipping test on APU." << std::endl;
        return;
    }

    uint32_t cu_num = pNodeProperties->NumFComputeCores / pNodeProperties->NumSIMDPerCU;
    uint32_t wave_num = MIN(cu_num * 40,
                        (pNodeProperties->NumShaderBanks / pNodeProperties->NumArrays) * 512);
    if (wave_num < count * N_PROCESSES) {
        LOG() << std::hex << "Test is skipped, wave_num " << wave_num << " not enough" << std::endl;
        return;
    }

    HSAuint32 i;
    HSAuint64 vramSize = GetVramSize(defaultGPUNode);

    if (!vramSize) {
        LOG() << "No VRAM found, skipping the test" << std::endl;
        return;
    } else {
        LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB." << std::endl;
    }

    HSAuint64 vramBufSize = GetBufferSize(vramSize, count, xnack_enable);
    if (vramBufSize == 0) {
        LOG() << "Not enough system memory, skipping the test" << std::endl;
        return;
    }
    /* assert all buffer address can be stored within one page
     * because only one page host memory srcBuf is allocated
     */
    ASSERT_LE(count, PAGE_SIZE/sizeof(unsigned int *));

    /* Fork the child processes */
    ForkChildProcesses(N_PROCESSES);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer addrBuffer(PAGE_SIZE, defaultGPUNode);
    HsaMemoryBuffer resultBuffer(PAGE_SIZE, defaultGPUNode);

    std::vector<void *> pBuffers;
    HSAuint32 granularity = 0;
    /* xnack is on, shadder code will trigger gpu page fault that bring data
     * to vram. use granularity to move all data from system buffer to vram
     * to reduce system ram pressure in order to avoid system ram oom in system
     * that has less system ram.
     */
    if (xnack_enable)
       granularity = 0xff;
    AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers, granularity);

    unsigned int wavefront_num = pBuffers.size();
    LOG() << m_psName << "wavefront number " << wavefront_num << std::endl;

    void **localBufAddr = addrBuffer.As<void **>();
    unsigned int *result = resultBuffer.As<uint32_t *>();

    for (i = 0; i < wavefront_num; i++)
        *(localBufAddr + i) = pBuffers[i];

    for (i = 0; i < wavefront_num; i++)
        *(result + i) = vramBufSize;

    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadMemoryIsa, isaBuffer.As<char*>()));

    PM4Queue pm4Queue;
    ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));

    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(localBufAddr, result);
    dispatch0.SetDim(wavefront_num, 1, 1);
    /* submit the packet and start shader */
    dispatch0.Submit(pm4Queue);

    /* doing evict/restore queue test for 5 seconds while queue is running */
    sleep(5);

    /* LOG() << m_psName << "notify shader to quit" << std::endl; */
    /* fill address buffer so shader quits */
    addrBuffer.Fill(0x5678);

    /* wait for shader to finish or timeout if shade has vm page fault */
    dispatch0.SyncWithStatus(g_TestTimeOut * 5);

    ASSERT_SUCCESS(pm4Queue.Destroy());
    /* LOG() << m_psName << "free buffer" << std::endl; */
    /* cleanup */
    FreeBuffers(pBuffers, vramBufSize);

    /* check if all wavefronts finish successfully */
    for (i = 0; i < wavefront_num; i++)
        ASSERT_EQ(0x5678, *(result + i));

    WaitChildProcesses();

    TEST_END
}

INSTANTIATE_TEST_CASE_P(, KFDSVMEvictTest,::testing::Values(0, 1));


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDSVMEvictTest.hpp
================================================
/*
 * Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_SVM_EVICT_TEST__H__
#define __KFD_SVM_EVICT_TEST__H__

#include <string>
#include <vector>
#include "KFDLocalMemoryTest.hpp"
#include "KFDBaseComponentTest.hpp"

// @class KFDEvictTest
// Test eviction and restore procedure using two processes
class KFDSVMEvictTest : public KFDLocalMemoryTest,
                        public ::testing::WithParamInterface<int> {
 public:
    KFDSVMEvictTest(void): m_ChildStatus(HSAKMT_STATUS_ERROR), m_IsParent(true) {}

    ~KFDSVMEvictTest(void) {
        if (!m_IsParent) {
            /* child process has to exit
             * otherwise gtest will continue other tests
             */
            exit(m_ChildStatus);
        }

        try {
            WaitChildProcesses();
        } catch (...) {}
    }

 protected:
    virtual void SetUp();
    virtual void TearDown();

 protected:
    std::string CreateShader();
    void AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize,
                    std::vector<void *> &pBuffers, HSAuint32 Granularity);
    void FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize);
    void ForkChildProcesses(int nprocesses);
    void WaitChildProcesses();
    HSAint32 GetBufferCounter(HSAuint64 vramSize, HSAuint64 vramBufSize);
    HSAint64 GetBufferSize(HSAuint64 vramSize, HSAuint32 count,
                           HSAint32 xnack_enable);

 protected:  // members
    std::string     m_psName;
    std::vector<pid_t> m_ChildPids;
    HSA_SVM_FLAGS   m_Flags;
    void*           m_pBuf;
    HSAKMT_STATUS   m_ChildStatus;
    bool            m_IsParent;
};

#endif  // __KFD_SVM_EVICT_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDSVMRangeTest.cpp
================================================
/*
 * Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
#include "KFDSVMRangeTest.hpp"
#include <poll.h>
#include <sys/mman.h>
#include <vector>
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"

extern unsigned int g_TestGPUsNum;

void KFDSVMRangeTest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    SVMSetXNACKMode(GetParam());

    ROUTINE_END
}

void KFDSVMRangeTest::TearDown() {
    ROUTINE_START

    SVMRestoreXNACKMode();

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

static void BasicSystemMemTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    PM4Queue queue;
    HSAuint64 AlternateVAGPU;
    unsigned int BufferSize = PAGE_SIZE;

    if (!pKFDSVMRangeTest->GetVramSize(gpuNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaSVMRange srcSysBuffer(BufferSize, gpuNode);
    HsaSVMRange destSysBuffer(BufferSize,gpuNode);

    Assembler* m_pAsm;
    m_pAsm = pKFDSVMRangeTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    srcSysBuffer.Fill(0x01010101);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    queue.SetSkipWaitConsump(0);

    Dispatch dispatch(isaBuffer);

    dispatch.SetArgs(srcSysBuffer.As<void*>(), destSysBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    EXPECT_EQ_GPU(destSysBuffer.As<unsigned int*>()[0], 0x01010101, gpuNode);
}

TEST_P(KFDSVMRangeTest, BasicSystemMemTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(BasicSystemMemTest));

    TEST_END
}

static void SetGetAttributesTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    int i;
    unsigned int BufSize = PAGE_SIZE;
    HsaSVMRange *sysBuffer = new HsaSVMRange(BufSize);
    HSAuint32 nAttributes = 5;
    HSA_SVM_ATTRIBUTE outputAttributes[nAttributes];
    HSA_SVM_ATTRIBUTE inputAttributes[] = {
                                                {HSA_SVM_ATTR_PREFETCH_LOC, (HSAuint32)gpuNode},
                                                {HSA_SVM_ATTR_PREFERRED_LOC, (HSAuint32)gpuNode},
                                                {HSA_SVM_ATTR_SET_FLAGS,
                                                 HSA_SVM_FLAG_HOST_ACCESS | HSA_SVM_FLAG_GPU_EXEC | HSA_SVM_FLAG_COHERENT},
                                                {HSA_SVM_ATTR_GRANULARITY, 0x3F},
                                                {HSA_SVM_ATTR_ACCESS, (HSAuint32)gpuNode},
                                          };

    HSAuint32 expectedDefaultResults[] = {
                                             INVALID_NODEID,
                                             INVALID_NODEID,
                                             HSA_SVM_FLAG_HOST_ACCESS | HSA_SVM_FLAG_COHERENT,
                                             9,
                                             0,
                                         };
    HSAint32 enable = -1;
    EXPECT_SUCCESS_GPU(hsaKmtGetXNACKMode(&enable), gpuNode);
    expectedDefaultResults[4] = (enable) ?
                                 HSA_SVM_ATTR_ACCESS : HSA_SVM_ATTR_NO_ACCESS;
    char *pBuf = sysBuffer->As<char *>();

    LOG() << "Get default atrributes" << std::endl;
    memcpy(outputAttributes, inputAttributes, nAttributes * sizeof(HSA_SVM_ATTRIBUTE));
    EXPECT_SUCCESS_GPU(hsaKmtSVMGetAttr(pBuf, BufSize,
                                    nAttributes, outputAttributes), gpuNode);

    for (i = 0; i < nAttributes; i++) {
        /* Default granularity could be specified using module parameter,
         * therefore it is incorrect to expect a particular value
         */
        if (outputAttributes[i].type == HSA_SVM_ATTR_GRANULARITY)
            continue;

        if (outputAttributes[i].type == HSA_SVM_ATTR_ACCESS ||
            outputAttributes[i].type == HSA_SVM_ATTR_ACCESS_IN_PLACE ||
            outputAttributes[i].type == HSA_SVM_ATTR_NO_ACCESS)
            EXPECT_EQ_GPU(outputAttributes[i].type, expectedDefaultResults[i], gpuNode);
        else
            EXPECT_EQ_GPU(outputAttributes[i].value, expectedDefaultResults[i], gpuNode);
    }
    LOG() << "Setting/Getting atrributes" << std::endl;
    memcpy(outputAttributes, inputAttributes, nAttributes * sizeof(HSA_SVM_ATTRIBUTE));
    EXPECT_SUCCESS_GPU(hsaKmtSVMSetAttr(pBuf, BufSize,
                                    nAttributes, inputAttributes), gpuNode);
    EXPECT_SUCCESS_GPU(hsaKmtSVMGetAttr(pBuf, BufSize,
                                    nAttributes, outputAttributes), gpuNode);
    for (i = 0; i < nAttributes; i++) {
        if (outputAttributes[i].type == HSA_SVM_ATTR_ACCESS ||
            outputAttributes[i].type == HSA_SVM_ATTR_ACCESS_IN_PLACE ||
            outputAttributes[i].type == HSA_SVM_ATTR_NO_ACCESS)
            EXPECT_EQ_GPU(inputAttributes[i].type, outputAttributes[i].type, gpuNode);
        else
            EXPECT_EQ_GPU(inputAttributes[i].value, outputAttributes[i].value, gpuNode);
    }
    delete sysBuffer;

}

TEST_P(KFDSVMRangeTest, SetGetAttributesTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SetGetAttributesTest));

    TEST_END
}

TEST_P(KFDSVMRangeTest, XNACKModeTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (!SVMAPISupported())
        return;

    HSAuint32 i, j;
    HSAint32 r;
    PM4Queue queue;
    HSAint32 enable = 0;
    const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();

    EXPECT_SUCCESS(hsaKmtGetXNACKMode(&enable));
    for (i = 0; i < 2; i++) {
        enable = !enable;
        r = hsaKmtSetXNACKMode(enable);
        if (r == HSAKMT_STATUS_SUCCESS) {
            LOG() << "XNACK mode: " << std::boolalpha << enable <<
                     " supported" << std::endl;

            for (j = 0; j < gpuNodes.size(); j++) {
                LOG() << "Creating queue and try to set xnack mode on node: "
                      << gpuNodes.at(j) << std::endl;
                ASSERT_SUCCESS(queue.Create(gpuNodes.at(j)));
                EXPECT_EQ(HSAKMT_STATUS_ERROR,
                        hsaKmtSetXNACKMode(enable));
                EXPECT_SUCCESS(queue.Destroy());
            }
        } else if (r == HSAKMT_STATUS_NOT_SUPPORTED) {
            LOG() << "XNACK mode: " << std::boolalpha << enable <<
                     " NOT supported" << std::endl;
        }
    }

    TEST_END
}

static void InvalidRangeTest(KFDTEST_PARAMETERS* pTestParamters) {

    HSAuint32 Flags;;
    HSAKMT_STATUS ret;

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    Flags = HSA_SVM_FLAG_HOST_ACCESS | HSA_SVM_FLAG_COHERENT;

    ret = RegisterSVMRange(gpuNode, reinterpret_cast<void *>(0x10000), 0x1000, 0, Flags);
    EXPECT_NE_GPU(ret, HSAKMT_STATUS_SUCCESS, gpuNode);

}

TEST_P(KFDSVMRangeTest, InvalidRangeTest) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(InvalidRangeTest));

    TEST_END
}

void KFDSVMRangeTest::SplitRangeTest(int gpuNode, int prefetch_location) {
    unsigned int BufSize = 16 * PAGE_SIZE;

    HsaSVMRange *sysBuffer;
    HsaSVMRange *sysBuffer2;
    HsaSVMRange *sysBuffer3;
    HsaSVMRange *sysBuffer4;

    void *pBuf;

    // case 1
    pBuf = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(pBuf, BufSize, gpuNode, prefetch_location);
    sysBuffer2 = new HsaSVMRange(reinterpret_cast<char *>(pBuf) + 8192, PAGE_SIZE, gpuNode, prefetch_location);
    delete sysBuffer2;
    delete sysBuffer;
    munmap(pBuf, BufSize);

    // case 2.1
    pBuf = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(pBuf, BufSize, gpuNode, prefetch_location);
    sysBuffer2 = new HsaSVMRange(reinterpret_cast<char *>(pBuf) + 4096, BufSize - 4096, gpuNode,
                                 prefetch_location);
    delete sysBuffer2;
    delete sysBuffer;
    munmap(pBuf, BufSize);

    // case 2.2
    pBuf = mmap(0, BufSize + 8192, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(pBuf, BufSize, gpuNode, prefetch_location);
    sysBuffer2 = new HsaSVMRange(reinterpret_cast<char *>(pBuf) + 8192, BufSize, gpuNode, prefetch_location);
    delete sysBuffer2;
    delete sysBuffer;
    munmap(pBuf, BufSize + 8192);

    // case 3
    pBuf = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(pBuf, BufSize, gpuNode, prefetch_location);
    sysBuffer2 = new HsaSVMRange(reinterpret_cast<char *>(pBuf), BufSize - 8192, gpuNode, prefetch_location);
    delete sysBuffer2;
    delete sysBuffer;
    munmap(pBuf, BufSize);

    // case 4.1
    pBuf = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(pBuf, BufSize, gpuNode, prefetch_location);
    sysBuffer2 = new HsaSVMRange(pBuf, BufSize, gpuNode, prefetch_location);
    delete sysBuffer2;
    delete sysBuffer;
    munmap(pBuf, BufSize);

    // case 4.2
    pBuf = mmap(0, BufSize + 8192, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(pBuf, BufSize, gpuNode, prefetch_location);
    sysBuffer2 = new HsaSVMRange(pBuf, BufSize + 8192, gpuNode, prefetch_location);
    delete sysBuffer2;
    delete sysBuffer;
    munmap(pBuf, BufSize + 8192);

    // case 5
    pBuf = mmap(0, BufSize + 65536, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(reinterpret_cast<char *>(pBuf) + 8192, 8192, gpuNode, prefetch_location);
    sysBuffer2 = new HsaSVMRange(reinterpret_cast<char *>(pBuf) + 32768, 8192, gpuNode, prefetch_location);
    sysBuffer3 = new HsaSVMRange(pBuf, BufSize + 65536, gpuNode, prefetch_location);
    delete sysBuffer2;
    delete sysBuffer3;
    delete sysBuffer;
    munmap(pBuf, BufSize + 65536);

    // case 6, unregister after free
    pBuf = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(reinterpret_cast<char *>(pBuf) + 8192, 8192, gpuNode, prefetch_location);
    munmap(pBuf, BufSize);
    delete sysBuffer;
}

static void SplitSystemRangeTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    pKFDSVMRangeTest->SplitRangeTest(gpuNode, 0);

}

TEST_P(KFDSVMRangeTest, SplitSystemRangeTest) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SplitSystemRangeTest));

    TEST_END
}

static void EvictSystemRangeTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    Assembler* m_pAsm;
    m_pAsm = pKFDSVMRangeTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    HSAuint32 stackData[2 * PAGE_SIZE] = {0};
    char *pBuf = reinterpret_cast<char *>(((uint64_t)stackData + PAGE_SIZE) & ~(PAGE_SIZE - 1));
    HSAuint32 *globalData = reinterpret_cast<uint32_t *>(pBuf);
    const unsigned dstOffset = ((uint64_t)pBuf + 2 * PAGE_SIZE - (uint64_t)stackData) / 4;
    const unsigned sdmaOffset = dstOffset + PAGE_SIZE;

    *globalData = 0xdeadbeef;

    HsaSVMRange srcBuffer((globalData), PAGE_SIZE, gpuNode);
    HsaSVMRange dstBuffer(&stackData[dstOffset], PAGE_SIZE, gpuNode);
    HsaSVMRange sdmaBuffer(&stackData[sdmaOffset], PAGE_SIZE, gpuNode);

    /* Create PM4 and SDMA queues before fork+COW to test queue
     * eviction and restore
     */
    PM4Queue pm4Queue;
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(pm4Queue.Create(gpuNode), gpuNode);
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    Dispatch dispatch0(isaBuffer);
    dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
    dispatch0.Submit(pm4Queue);
    dispatch0.Sync(g_TestTimeOut);

    sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(),
                                   sdmaBuffer.As<HSAuint32 *>(), 0x12345678));

    sdmaQueue.Wait4PacketConsumption();
    EXPECT_TRUE_GPU(WaitOnValue(&stackData[sdmaOffset], 0x12345678), gpuNode);

    /* Fork a child process to mark pages as COW */
    pid_t pid = fork();
    ASSERT_GE_GPU(pid, 0, gpuNode);
    if (pid == 0) {
        /* Child process waits for a SIGTERM from the parent. It can't
         * make any write access to the stack because we want the
         * parent to make the first write access and get a new copy. A
         * busy loop is the safest way to do that, since any function
         * call (e.g. sleep) would write to the stack.
         */
        while (1)
        {}
        WARN() << "Shouldn't get here!" << std::endl;
        exit(0);
    }

    /* Parent process writes to COW page(s) and gets a new copy. MMU
     * notifier needs to update the GPU mapping(s) for the test to
     * pass.
     */
    *globalData = 0xD00BED00;
    stackData[dstOffset] = 0xdeadbeef;
    stackData[sdmaOffset] = 0xdeadbeef;

    /* Terminate the child process before a possible test failure that
     * would leave it spinning in the background indefinitely.
     */
    int status;
    EXPECT_EQ_GPU(0, kill(pid, SIGTERM),gpuNode);
    EXPECT_EQ_GPU(pid, waitpid(pid, &status, 0), gpuNode);
    EXPECT_NE_GPU(0, WIFSIGNALED(status), gpuNode);
    EXPECT_EQ_GPU(SIGTERM, WTERMSIG(status), gpuNode);

    /* Now check that the GPU is accessing the correct page */
    Dispatch dispatch1(isaBuffer);
    dispatch1.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
    dispatch1.Submit(pm4Queue);
    dispatch1.Sync(g_TestTimeOut);

    sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(),
                                   sdmaBuffer.As<HSAuint32 *>(), 0xD0BED0BE));
    sdmaQueue.Wait4PacketConsumption();

    EXPECT_SUCCESS_GPU(pm4Queue.Destroy(), gpuNode);
    EXPECT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);

    EXPECT_EQ_GPU(0xD00BED00, *globalData, gpuNode);
    EXPECT_EQ_GPU(0xD00BED00, stackData[dstOffset], gpuNode);
    EXPECT_EQ_GPU(0xD0BED0BE, stackData[sdmaOffset],gpuNode);

}

TEST_P(KFDSVMRangeTest, EvictSystemRangeTest) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(EvictSystemRangeTest));

    TEST_END
}

static void PartialUnmapSysMemTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    Assembler* m_pAsm;
    m_pAsm = pKFDSVMRangeTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    unsigned int BufSize = 16 * PAGE_SIZE;
    void *pBuf;

    PM4Queue queue;
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaSVMRange *sysBuffer;
    HsaSVMRange destSysBuffer(BufSize, gpuNode);

    pBuf = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    sysBuffer = new HsaSVMRange(pBuf, BufSize, gpuNode, 0);
    sysBuffer->Fill(0x01010101);

    char *pBuf2 = reinterpret_cast<char *>(pBuf) + 8192;
    unsigned int Buf2Size = 4 * PAGE_SIZE;
    char *pBuf3 = pBuf2 + Buf2Size;

    munmap(pBuf2, Buf2Size);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);

    Dispatch dispatch(isaBuffer);
    Dispatch dispatch2(isaBuffer);

    dispatch.SetArgs(pBuf3, destSysBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);
    EXPECT_EQ_GPU(destSysBuffer.As<unsigned int*>()[0], 0x01010101, gpuNode);

    dispatch2.SetArgs(pBuf, destSysBuffer.As<void*>());
    dispatch2.Submit(queue);
    dispatch2.Sync(g_TestTimeOut);

    EXPECT_EQ_GPU(destSysBuffer.As<unsigned int*>()[0], 0x01010101, gpuNode);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    //munmap(pBuf, BufSize);
    /* munmpa vm ranges that has not been done */
    munmap(pBuf, 8192);
    munmap(pBuf3, BufSize - 8192 - Buf2Size);

}

TEST_P(KFDSVMRangeTest, PartialUnmapSysMemTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

   ASSERT_SUCCESS(KFDTest_Launch(PartialUnmapSysMemTest));

    TEST_END
}

static void BasicVramTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    Assembler* m_pAsm;
    m_pAsm = pKFDSVMRangeTest->GetAssemblerFromNodeId(gpuNode);
    ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);

    PM4Queue queue;
    HSAuint64 AlternateVAGPU;
    unsigned int BufferSize = PAGE_SIZE;

    if (!pKFDSVMRangeTest->GetVramSize(gpuNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaSVMRange srcSysBuffer(BufferSize, gpuNode);
    HsaSVMRange locBuffer(BufferSize, gpuNode, gpuNode);
    HsaSVMRange destSysBuffer(BufferSize, gpuNode);

    srcSysBuffer.Fill(0x01010101);

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()), gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode), gpuNode);
    queue.SetSkipWaitConsump(0);

    Dispatch dispatch(isaBuffer);
    Dispatch dispatch2(isaBuffer);

    dispatch.SetArgs(srcSysBuffer.As<void*>(), locBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);

    dispatch2.SetArgs(locBuffer.As<void*>(), destSysBuffer.As<void*>());
    dispatch2.Submit(queue);
    dispatch2.Sync(g_TestTimeOut);

    EXPECT_SUCCESS_GPU(queue.Destroy(), gpuNode);

    EXPECT_EQ_GPU(destSysBuffer.As<unsigned int*>()[0], 0x01010101, gpuNode);

}

TEST_P(KFDSVMRangeTest, BasicVramTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(BasicVramTest));

    TEST_END
}

static void SplitVramRangeTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    pKFDSVMRangeTest->SplitRangeTest(gpuNode, gpuNode);

}

TEST_P(KFDSVMRangeTest, SplitVramRangeTest) {
    TEST_START(TESTPROFILE_RUNALL)

    ASSERT_SUCCESS(KFDTest_Launch(SplitVramRangeTest));

    TEST_END
}

static void PrefetchTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int BufSize = 16 << 10;
    HsaSVMRange *sysBuffer;
    uint32_t node_id;

    sysBuffer = new HsaSVMRange(BufSize, gpuNode);
    char *pBuf = sysBuffer->As<char *>();
    delete sysBuffer;

    /* after mumap sysBuffer it should be not accessible from gpuNode */
    HSA_SVM_ATTRIBUTE attr;
    attr.type = HSA_SVM_ATTR_ACCESS;
    attr.value = 0;
    /* hsaKmtSVMGetAttr for HSA_SVM_ATTR_ACCESS is either fail or
     * returned attr.value not equal gpuNode
     */
    if (hsaKmtSVMGetAttr(pBuf, BufSize, 1, &attr) == HSAKMT_STATUS_SUCCESS)
        EXPECT_NE_GPU(attr.value, gpuNode, gpuNode);

    sysBuffer = new HsaSVMRange(BufSize, gpuNode);
    pBuf = sysBuffer->As<char *>();
    char *pLocBuf = pBuf + BufSize / 2;

    EXPECT_SUCCESS_GPU(SVMRangeGetPrefetchNode(pBuf, BufSize, &node_id), gpuNode);
    EXPECT_EQ_GPU(node_id, 0, gpuNode);

    EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pLocBuf, BufSize / 2, gpuNode), gpuNode);

    EXPECT_SUCCESS_GPU(SVMRangeGetPrefetchNode(pLocBuf, BufSize / 2, &node_id), gpuNode);
    EXPECT_EQ_GPU(node_id, gpuNode, gpuNode);

    EXPECT_SUCCESS_GPU(SVMRangeGetPrefetchNode(pBuf, BufSize, &node_id), gpuNode);
    EXPECT_EQ_GPU(node_id, 0xffffffff, gpuNode);
    delete sysBuffer;

}

TEST_P(KFDSVMRangeTest, PrefetchTest) {
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(PrefetchTest));

    TEST_END
}

static void MigrateTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    if (!pKFDSVMRangeTest->GetVramSize(gpuNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    HSAuint32 migrateRepeat = 8;
    unsigned int BufferSize = 16 << 20;

    HsaSVMRange DataBuffer(BufferSize, gpuNode);
    HSAuint32 *pData = DataBuffer.As<HSAuint32 *>();

    HsaSVMRange SysBuffer(BufferSize, gpuNode);
    HSAuint32 *pBuf = SysBuffer.As<HSAuint32 *>();
    EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf, BufferSize, 0), gpuNode);

    HsaSVMRange SysBuffer2(BufferSize, gpuNode);
    HSAuint32 *pBuf2 = SysBuffer2.As<HSAuint32 *>();
    EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf2, BufferSize, 0), gpuNode);

    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);

    for (HSAuint32 i = 0; i < BufferSize / 4; i++)
        pData[i] = i;

    while (migrateRepeat--) {
        /* Migrate from ram to vram */
        EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf, BufferSize, gpuNode), gpuNode);
        EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf2, BufferSize, gpuNode), gpuNode);
        /* Update content in migrated buffer in vram */
        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    pBuf, pData, BufferSize));
        sdmaQueue.Wait4PacketConsumption();
        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    pBuf2, pData, BufferSize));
        sdmaQueue.Wait4PacketConsumption();

        /* Migrate from vram to ram
         * CPU access the buffer migrated to vram have page fault
         * page fault trigger migration from vram back to ram
         * so SysBuffer should have same value as in vram
         */
        for (HSAuint32 i = 0; i < BufferSize / 4; i++) {
            ASSERT_EQ_GPU(i, pBuf[i], gpuNode);
            ASSERT_EQ_GPU(i, pBuf2[i], gpuNode);
        }
   }

    /* If xnack off, after migrating back to ram, GPU mapping should be updated to ram
     * test if shade can read from ram
     * If xnack on, GPU mapping should be cleared, test if GPU vm fault can update
     * page table and shade can read from ram.
     */
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                pBuf, pData, BufferSize));
    sdmaQueue.Wait4PacketConsumption();
    for (HSAuint32 i = 0; i < BufferSize / 4; i++)
        ASSERT_EQ_GPU(i, pBuf[i], gpuNode);

}

TEST_P(KFDSVMRangeTest, MigrateTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MigrateTest));

    TEST_END
}

static void MigrateAccessInPlaceTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    if (!pKFDSVMRangeTest->GetVramSize(gpuNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    unsigned int BufferSize = MIN(256ULL << 20, pKFDSVMRangeTest->GetVramSize(gpuNode) / 2);
    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode),gpuNode);

    HsaSVMRange DataBuffer(BufferSize, gpuNode);
    HSAuint32 *pData = DataBuffer.As<HSAuint32 *>();

    EXPECT_SUCCESS_GPU(SVMRangeMapInPlaceToNode(pData, BufferSize, gpuNode), gpuNode);
    EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pData, BufferSize, gpuNode), gpuNode);

    for (HSAuint32 i = 0; i < BufferSize / 4; i += 1024)
        pData[i] = i;

    /* GPU/SDMA update content in buffer migrated back to system memory */
    sdmaQueue.PlaceAndSubmitPacket(SDMAFillDataPacket(sdmaQueue.GetFamilyId(),
           pData, 0x55AAAA55, BufferSize));
    sdmaQueue.Wait4PacketConsumption();

    for (HSAuint32 i = 0; i < BufferSize / 4; i += 1024)
        ASSERT_EQ_GPU(0x55AAAA55, pData[i], gpuNode);

    ASSERT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);

}
/*
 * Test if GPU mapping to system memory is correct after range on VRAM split and migrate back
 * to system memory.
 *
 * Steps, it is same for XNACK on or off
 *   1. alloc 256MB range on system memory, set ACCESS_IN_PLACE by GPU
 *   2. Prefetcg to migrate range to GPU VRAM
 *   3. Use CPU to fill the range, range is migrated back to system memory, and split by granularity,
 *      GPU mapping update to system memory
 *   4. Use GPU sdma to fill the range in system memory
 *   5. Check if data is correct in system memory
 */
TEST_P(KFDSVMRangeTest, MigrateAccessInPlaceTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MigrateAccessInPlaceTest));

    TEST_END
}

/*
 * The test changes migration granularity, then trigger CPU page fault to migrate
 * the svm range from vram to ram.
 * Check the dmesg driver output to confirm the number of CPU page fault is correct
 * based on granularity.
 *
 * For example, this is BufferPages = 5, while granularity change from 2 to 0
 * [  292.623498] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597ee000
 * [  292.623727] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597f0000
 * [  292.724414] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597ee000
 * [  292.724824] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597f0000
 * [  292.725094] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597f2000
 * [  292.728186] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597ee000
 * [  292.729171] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597ef000
 * [  292.729576] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597f0000
 * [  292.730010] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597f1000
 * [  292.730931] amdgpu:svm_migrate_to_ram:744: CPU page fault address 0x7f22597f2000
 */

static void MigrateGranularityTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test on gpuNode: No svm range support for family ID 0x" << gpuNode << m_FamilyId << "." << std::endl;
        return;
    }

    if (!pKFDSVMRangeTest->GetVramSize(gpuNode)) {
        LOG() << "Skipping test: No VRAM found on gpuNode." << gpuNode << std::endl;
        return;
    }

    HSAuint64 BufferPages = 16384;
    HSAuint64 BufferSize = BufferPages * PAGE_SIZE;
    HsaSVMRange SysBuffer(BufferSize, gpuNode);
    HSAint32 *pBuf = SysBuffer.As<HSAint32*>();

    HsaSVMRange SysBuffer2(BufferSize, gpuNode);
    HSAint32 *pBuf2 = SysBuffer2.As<HSAint32*>();

    HSAint32 Granularity;

    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);

    for (Granularity = 0; (1ULL << Granularity) <= BufferPages; Granularity++);
    for (HSAuint32 i = 0; i < BufferPages; i++)
        pBuf2[i * PAGE_SIZE / 4] = i;

    while (Granularity--) {
        /* Prefetch the entire range to vram */
        EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf, BufferSize, gpuNode), gpuNode);
        EXPECT_SUCCESS_GPU(SVMRangSetGranularity(pBuf, BufferSize, Granularity), gpuNode);

        /* Change Buffer content in vram, then migrate it back to ram */
        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                        pBuf, pBuf2, BufferSize));
        sdmaQueue.Wait4PacketConsumption();

        /* Migrate from vram to ram */
        for (HSAuint32 i = 0; i < BufferPages; i++)
            ASSERT_EQ_GPU(i, pBuf[i * PAGE_SIZE / 4], gpuNode);
    }

}

TEST_P(KFDSVMRangeTest, MigrateGranularityTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MigrateGranularityTest));

    TEST_END
}

static void MigrateLargeBufTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    PM4Queue queue;
    HSAuint64 AlternateVAGPU;
    unsigned long BufferSize = 1L << 30;

    unsigned long maxSDMASize = 128L << 20;  /* IB size is 4K */
    unsigned long Size, i;

    HSAuint64 vramSize;
    vramSize = pKFDSVMRangeTest->GetVramSize(gpuNode);
    if (!vramSize) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    BufferSize = MIN(BufferSize, vramSize * 3 / 4);

    /* Check if the system memory size is sufficient
     * to register the system buffer and system buffer 2
     */
    if(BufferSize * 2 > pKFDSVMRangeTest->GetSysMemSize() / 2) {
        LOG() << "Skipping test: Not enough system memory." << std::endl;
        return;
    }
    HsaSVMRange SysBuffer(BufferSize, gpuNode);
    SysBuffer.Fill(0x1);

    HsaSVMRange SysBuffer2(BufferSize, gpuNode);
    SysBuffer2.Fill(0x2);

    /* Migrate from ram to vram
     * using same address to register to GPU to trigger migration
     * so LocalBuffer will have same value as SysBuffer
     */
    HsaSVMRange LocalBuffer(SysBuffer.As<void*>(), BufferSize, gpuNode, gpuNode);

    SDMAQueue sdmaQueue;

    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);
    for (i = 0; i < BufferSize; i += Size) {
        Size = (BufferSize - i) > maxSDMASize ? maxSDMASize : (BufferSize - i);
        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    SysBuffer2.As<char*>() + i, LocalBuffer.As<char*>() + i, Size));
        sdmaQueue.Wait4PacketConsumption();
    }

    /* Check content in migrated buffer in vram */
    for (i = 0; i < BufferSize / 4; i += 1024)
        ASSERT_EQ_GPU(0x1, SysBuffer2.As<unsigned int*>()[i], gpuNode);

    /* Change LocalBuffer content in vram, then migrate it back to ram */
    SysBuffer2.Fill(0x3);

    for (i = 0; i < BufferSize; i += Size) {
        Size = (BufferSize - i) > maxSDMASize ? maxSDMASize : (BufferSize - i);
        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    LocalBuffer.As<char*>() + i, SysBuffer2.As<char*>() + i, Size));
        sdmaQueue.Wait4PacketConsumption();
    }

    /* Migrate from vram to ram
     * CPU access the buffer migrated to vram have page fault
     * page fault trigger migration from vram back to ram
     * so SysBuffer should have same value as in LocalBuffer
     */
    EXPECT_SUCCESS_GPU(SVMRangSetGranularity(SysBuffer.As<unsigned int*>(), BufferSize, 30),gpuNode);
    for (i = 0; i < BufferSize / 4; i += 1024)
        ASSERT_EQ_GPU(0x3, SysBuffer.As<unsigned int*>()[i], gpuNode);

    /* After migrating back to ram, GPU mapping should be updated to ram
     * test if shade can read from ram
     */
    SysBuffer.Fill(0x4);

    for (i = 0; i < BufferSize; i += Size) {
        Size = (BufferSize - i) > maxSDMASize ? maxSDMASize : (BufferSize - i);
        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    SysBuffer2.As<char*>() + i, LocalBuffer.As<char*>() + i, Size));
        sdmaQueue.Wait4PacketConsumption();
    }

    for (i = 0; i < BufferSize / 4; i += 1024)
        ASSERT_EQ_GPU(0x4, SysBuffer2.As<unsigned int*>()[i],gpuNode);

}

TEST_P(KFDSVMRangeTest, MigrateLargeBufTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MigrateLargeBufTest));

    TEST_END
}

static void MigratePolicyTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test on gpuNode: No svm range support for family ID 0x" << gpuNode << m_FamilyId << "." << std::endl;
        return;
    }

    if (!pKFDSVMRangeTest->GetVramSize(gpuNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    unsigned long BufferSize = 1UL << 20;

    HsaSVMRange DataBuffer(BufferSize, gpuNode);
    HSAuint64 *pData = DataBuffer.As<HSAuint64 *>();

    HsaSVMRange SysBuffer(BufferSize, gpuNode);
    HSAuint64 *pBuf = SysBuffer.As<HSAuint64 *>();

    SDMAQueue sdmaQueue;
    ASSERT_SUCCESS_GPU(sdmaQueue.Create(gpuNode),gpuNode);

    for (HSAuint64 i = 0; i < BufferSize / 8; i++)
        pData[i] = i;

    /* Prefetch to migrate from ram to vram */
    EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf, BufferSize, gpuNode),gpuNode);

    /* Update content in migrated buffer in vram */
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                pBuf, pData, BufferSize));
    sdmaQueue.Wait4PacketConsumption(NULL, HSA_EVENTTIMEOUT_INFINITE);

    /* Migrate from vram to ram
     * CPU access the buffer migrated to vram have page fault
     * page fault trigger migration from vram back to ram
     * so SysBuffer should have same value as in vram
     */
    for (HSAuint64 i = 0; i < BufferSize / 8; i++) {
        ASSERT_EQ_GPU(i, pBuf[i],gpuNode);
        /* Update buf */
        pBuf[i] = i + 1;
    }

    /* Migrate from ram to vram if xnack on
     * If xnack off, after migrating back to ram, GPU mapping should be updated to ram
     * test if shade can read from ram
     * If xnack on, GPU mapping should be cleared, test if GPU vm fault can update
     * page table and shade can read from ram.
     */
//#define USE_PM4_QUEUE_TRIGGER_VM_FAULT
#ifdef USE_PM4_QUEUE_TRIGGER_VM_FAULT
    HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
    PM4Queue queue;

    ASSERT_SUCCESS_GPU(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()),gpuNode);

    ASSERT_SUCCESS_GPU(queue.Create(gpuNode),gpuNode);

    for (HSAuint64 i = 0; i < BufferSize / 8; i += 512) {
        Dispatch dispatch(isaBuffer);
        
        dispatch.SetArgs(pBuf + i, pData + i);
        dispatch.Submit(queue);
        dispatch.Sync(HSA_EVENTTIMEOUT_INFINITE);
    }
#else
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                pData, pBuf, BufferSize));
    sdmaQueue.Wait4PacketConsumption(NULL, HSA_EVENTTIMEOUT_INFINITE);
#endif

    for (HSAuint64 i = 0; i < BufferSize / 8; i += 512)
        ASSERT_EQ_GPU(i + 1, pData[i],gpuNode);

    ASSERT_SUCCESS_GPU(sdmaQueue.Destroy(),gpuNode);

}

TEST_P(KFDSVMRangeTest, MigratePolicyTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MigratePolicyTest));

    TEST_END
}

/* Multiple GPU migration test
 *
 * Steps:
 *     1. Prefetch pBuf, pData to all GPUs, to test migration from GPU to GPU
 *     2. Use sdma queue on all GPUs, to copy data from pBuf to pData
 *     3. Check pData data
 *
 * Notes:
 *     With xnack on, step 2 will have retry fault on pBuf, to migrate from GPU to GPU,
 *     retry fault on pData, to migrate from CPU to GPU
 *
 *     With xnack off, pBuf and pData should prefetch to CPU to ensure multiple GPU access
 *
 *     step3 migrate pData from GPU to CPU
 *
 * Test will skip if only one GPU found
 */
TEST_P(KFDSVMRangeTest, MultiGPUMigrationTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (!SVMAPISupported())
        return;

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    const std::vector<int> gpuNodesAll = m_NodeInfo.GetNodesWithGPU();
    std::vector<int> gpuNodes;

    for (auto node : gpuNodesAll) {
        const HsaNodeProperties *pNodeProperties;

        pNodeProperties = m_NodeInfo.GetNodeProperties(node);
        if (pNodeProperties->Capability.ui32.SVMAPISupported)
            gpuNodes.push_back(node);
    }
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: at least two SVM supported GPUs needed." << std::endl;
        return;
    }

    unsigned long BufferSize = 1UL << 20;

    HsaSVMRange SysBuffer(BufferSize, defaultGPUNode);
    HSAuint64 *pBuf = SysBuffer.As<HSAuint64 *>();
    HsaSVMRange DataBuffer(BufferSize, defaultGPUNode);
    HSAuint64 *pData = DataBuffer.As<HSAuint64 *>();

    SDMAQueue sdmaQueue;

    for (HSAuint64 i = 0; i < BufferSize / 8; i++)
        pBuf[i] = i;

    for (auto node : gpuNodes) {
        EXPECT_SUCCESS(SVMRangeMapToNode(pBuf, BufferSize, node));
        EXPECT_SUCCESS(SVMRangePrefetchToNode(pBuf, BufferSize, node));

        EXPECT_SUCCESS(SVMRangeMapToNode(pData, BufferSize, node));
        EXPECT_SUCCESS(SVMRangePrefetchToNode(pData, BufferSize, node));
    }

    for (auto node : gpuNodes) {
        ASSERT_SUCCESS(sdmaQueue.Create(node));

        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    pData, pBuf, BufferSize));
        sdmaQueue.Wait4PacketConsumption();

        for (HSAuint64 i = 0; i < BufferSize / 8; i += 512)
            ASSERT_EQ(i, pData[i]);

        EXPECT_SUCCESS(sdmaQueue.Destroy());
    }

    TEST_END
}

/* Multiple GPU access in place test
 *
 * Steps:
 *     1. Prefetch pBuf, pData to all GPUs, with ACCESS_IN_PLACE on GPUs
 *     2. Use sdma queue on all GPUs, to copy data from pBuf to pData
 *     3. Prefetch pData to CPU, check pData data
 *
 * Notes:
 *     With xnack on, step 2 will have retry fault on pBuf, to migrate from GPU to GPU.
 *     If multiple GPU on xGMI same hive, there should not have retry fault on pBuf
 *     because mapping should update to another GPU vram through xGMI
 *
 *     With xnack off, pBuf and pData should prefetch to CPU to ensure multiple GPU access
 *
 *     step3 migrate pData from GPU to CPU, should not have retry fault on GPUs.
 *
 * Test will skip if only one GPU found
 */
TEST_P(KFDSVMRangeTest, MultiGPUAccessInPlaceTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (!SVMAPISupported())
        return;

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    const std::vector<int> gpuNodesAll = m_NodeInfo.GetNodesWithGPU();
    std::vector<int> gpuNodes;

    for (auto node : gpuNodesAll) {
        const HsaNodeProperties *pNodeProperties;

        pNodeProperties = m_NodeInfo.GetNodeProperties(node);
        if (pNodeProperties->Capability.ui32.SVMAPISupported)
            gpuNodes.push_back(node);
    }
    if (gpuNodes.size() < 2) {
        LOG() << "Skipping test: at least two SVM supported GPUs needed." << std::endl;
        return;
    }

    unsigned long BufferSize = 1UL << 20;

    HsaSVMRange SysBuffer(BufferSize, defaultGPUNode);
    HSAuint64 *pBuf = SysBuffer.As<HSAuint64 *>();
    HsaSVMRange DataBuffer(BufferSize, defaultGPUNode);
    HSAuint64 *pData = DataBuffer.As<HSAuint64 *>();

    SDMAQueue sdmaQueue;

    for (HSAuint64 i = 0; i < BufferSize / 8; i++)
        pBuf[i] = i;

    for (auto node : gpuNodes) {
        EXPECT_SUCCESS(SVMRangeMapInPlaceToNode(pBuf, BufferSize, node));
        EXPECT_SUCCESS(SVMRangePrefetchToNode(pBuf, BufferSize, node));

        EXPECT_SUCCESS(SVMRangeMapInPlaceToNode(pData, BufferSize, node));
        EXPECT_SUCCESS(SVMRangePrefetchToNode(pData, BufferSize, node));
    }

    for (auto node : gpuNodes) {
        ASSERT_SUCCESS(sdmaQueue.Create(node));

        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    pData, pBuf, BufferSize));
        sdmaQueue.Wait4PacketConsumption();

        for (HSAuint64 i = 0; i < BufferSize / 8; i += 512)
            ASSERT_EQ(i, pData[i]);

        EXPECT_SUCCESS(sdmaQueue.Destroy());
    }

    TEST_END
}

/* Multiple thread migration test
 *
 * 2 threads do migration at same time to test range migration race conditon handle.
 *
 * Steps:
 * 1. register 128MB range on system memory, don't map to GPU, 128MB is max size to put in
 *    sdma queue 4KB IB buffer.
 * 2. one thread prefetch range to GPU, another thread use sdma queue to access range at same
 *    time to generate retry vm fault to migrate range to GPU
 * 3. one thread prefetch range to CPU, another thread read range to generate CPU page fault
 *    to migrate range to CPU at same time
 * 4. loop test step 2 and 3 twice, to random CPU/GPU fault and prefetch migration order
 */
struct ReadThreadParams {
    HSAuint64* pBuf;
    HSAint64 BufferSize;
    int defaultGPUNode;
};

unsigned int CpuReadThread(void* p) {
    struct ReadThreadParams* pArgs = reinterpret_cast<struct ReadThreadParams*>(p);

    for (HSAuint64 i = 0; i < pArgs->BufferSize / 8; i += 512)
         EXPECT_EQ(i, pArgs->pBuf[i]);
    return 0;
}

unsigned int GpuReadThread(void* p) {
    struct ReadThreadParams* pArgs = reinterpret_cast<struct ReadThreadParams*>(p);

    EXPECT_SUCCESS(SVMRangePrefetchToNode(pArgs->pBuf, pArgs->BufferSize, pArgs->defaultGPUNode));
    return 0;
}

static void MultiThreadMigrationTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test on gpuNode: No svm range support for family ID 0x" << gpuNode << m_FamilyId << "." << std::endl;
        return;
    }

    unsigned long test_loops = 2;
    unsigned long BufferSize = 1UL << 27;
    HsaSVMRange SysBuffer(BufferSize, gpuNode);
    HSAuint64 *pBuf = SysBuffer.As<HSAuint64 *>();
    HsaSVMRange DataBuffer(BufferSize, gpuNode);
    HSAuint64 *pData = DataBuffer.As<HSAuint64 *>();
    SDMAQueue sdmaQueue;
    uint64_t threadId;
    struct ReadThreadParams params;

    params.pBuf = pBuf;
    params.BufferSize = BufferSize;
    params.defaultGPUNode = gpuNode;

    EXPECT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);

    for (HSAuint64 i = 0; i < BufferSize / 8; i++)
        pBuf[i] = i;

    for (HSAuint64 i = 0; i < test_loops; i++) {
        /* 2 threads migrate to GPU */
        sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    pData, pBuf, BufferSize));
        ASSERT_EQ_GPU(true, StartThread(&GpuReadThread, &params, threadId), gpuNode);
        sdmaQueue.Wait4PacketConsumption();
        WaitForThread(threadId);

        /* 2 threads migrate to cpu */
        ASSERT_EQ_GPU(true, StartThread(&CpuReadThread, &params, threadId), gpuNode);
        EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf, BufferSize, 0), gpuNode);
        WaitForThread(threadId);
    }

    EXPECT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);

}

TEST_P(KFDSVMRangeTest, MultiThreadMigrationTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MultiThreadMigrationTest));

    TEST_END
}

/*
 * Test SVM support file backed range
 *
 * Create temp file, mmap to alloc memory backed on file.
 * Create file backed svm range, to map to GPU for xnack on or off
 * Use sdma to write data to memory, should write to file
 * Close file, and then check if file data is updated correctly
 */
static void MigrateFileBackedRangeTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test on gpuNode: No svm range support for family ID 0x"
            << gpuNode << m_FamilyId << "." << std::endl;
        return;
    }

    char tmpfname[] = "/tmp/kfdtest-XXXXXX";
    int fd = mkostemp(tmpfname, 0600);
    ASSERT_NE(-1, fd);

    size_t size = PAGE_SIZE;
    char *buf = reinterpret_cast<char *>(alloca(size));
    memset(buf, 0x30, size);

    ASSERT_EQ(size, write(fd, buf, size));

    void *MmapedFile = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    ASSERT_NE(MAP_FAILED, MmapedFile);

    HsaSVMRange filebackedRange(MmapedFile, size, gpuNode, gpuNode);

    SDMAQueue sdmaQueue;
    EXPECT_SUCCESS(sdmaQueue.Create(gpuNode));

    sdmaQueue.PlaceAndSubmitPacket(SDMAFillDataPacket(sdmaQueue.GetFamilyId(),
                    MmapedFile, 0x33333333, size));
    sdmaQueue.Wait4PacketConsumption();

    EXPECT_SUCCESS(sdmaQueue.Destroy());
    munmap(MmapedFile, size);
    EXPECT_SUCCESS(close(fd));

    fd = open(tmpfname, O_RDONLY);
    ASSERT_NE(-1, fd);

    ASSERT_EQ(size, read(fd, buf, size));
    EXPECT_EQ(0x33, buf[0]);

    EXPECT_SUCCESS(close(fd));
    EXPECT_SUCCESS(remove(tmpfname));
}

TEST_P(KFDSVMRangeTest, MigrateFileBackedRangeTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(MigrateFileBackedRangeTest));

    TEST_END
}

/*
 * Test SVM support read only range
 *
 * Map read only range to GPU, test sdma can read the range
 * write to range should trigger GPU vm fault for both xnack on and off
 */

TEST_P(KFDSVMRangeTest, ReadOnlyRangeTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (!SVMAPISupported())
        return;

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    if (!GetVramSize(defaultGPUNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    /*
     * Use child process to run test because the test trigger GPU vm fault, KFD evict all user queues
     * of the process and no more test can run after vm fault on the process.
     */
    int pid = fork();
    if (pid == 0) {
        TearDown();
        SetUp();
    } else {
        int childStatus;

        waitpid(pid, &childStatus, 0);
        if (hsakmt_is_dgpu()) {
            EXPECT_EQ(true, WIFEXITED(childStatus));
            EXPECT_EQ(0, WEXITSTATUS(childStatus));
        } else {
            EXPECT_EQ(true, WIFSIGNALED(childStatus));
            EXPECT_EQ(SIGSEGV, WTERMSIG(childStatus));
        }

        return;
    }

    /* Use child process to run test */
    int ret = 0;
    HsaSVMRange inBuffer(PAGE_SIZE * 2, defaultGPUNode);
    HSAuint8 *pinBuf = inBuffer.As<HSAuint8 *>();

    memset(pinBuf, 0x55, PAGE_SIZE);

    /* Map readonly pinBuf to GPU, sDMA should be able to read it */
    mprotect(pinBuf, PAGE_SIZE, PROT_READ);

    HsaSVMRange outputBuffer(PAGE_SIZE, defaultGPUNode);
    HSAuint8 *pBuf = outputBuffer.As<HSAuint8 *>();

    HsaEvent *vmFaultEvent;
    HSAuint64 faultAddress;
    HsaEventDescriptor eventDesc;
    eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
    eventDesc.NodeId = defaultGPUNode;
    eventDesc.SyncVar.SyncVar.UserData = NULL;
    eventDesc.SyncVar.SyncVarSize = 0;

    ret = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Event create failed" << std::endl;
        exit(ret);
    }

    SDMAQueue sdmaQueue;

    ret = sdmaQueue.Create(defaultGPUNode);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Queue create failed" << std::endl;
        goto queue_fail;
    }
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    pBuf, reinterpret_cast<void *>(pinBuf), PAGE_SIZE));
    sdmaQueue.Wait4PacketConsumption();
    EXPECT_EQ(0x55, pBuf[0]);
    if (pBuf[0] != 0x55)
        goto event_fail;

    /* sDMA write to readonly pinBuf should fail with GPU vm fault, check if pinBuf content is
     * not changed, and KFD send HSA_EVENTTYPE_MEMORY event back with fault address pinBuf.
     *
     * This must be the last step of test because all queues are evicted after vm fault.
     */

    memset(pBuf, 0xAA, PAGE_SIZE);
    sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(),
                    pinBuf, reinterpret_cast<void *>(pBuf), PAGE_SIZE));

    ret = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut);
    if (ret != HSAKMT_STATUS_SUCCESS) {
        WARN() << "Wait failed. No Exception triggered" << std::endl;
        goto event_fail;
    }
    if (vmFaultEvent->EventData.EventType != HSA_EVENTTYPE_MEMORY) {
        WARN() << "Unexpected Event Received " << vmFaultEvent->EventData.EventType << std::endl;
        ret = HSAKMT_STATUS_ERROR;

        goto event_fail;
    }
    faultAddress = vmFaultEvent->EventData.EventData.MemoryAccessFault.VirtualAddress;
    if (faultAddress != (HSAuint64)pinBuf) {
        WARN() << "Unexpected Fault Address " << faultAddress << std::endl;
        ret = HSAKMT_STATUS_ERROR;
    }

event_fail:
    EXPECT_SUCCESS(sdmaQueue.Destroy());
queue_fail:
    hsaKmtDestroyEvent(vmFaultEvent);
    /* Child process exit, otherwise it will continue to run remaining tests */
    exit(ret);

    TEST_END
}

/*
 * Test SMI HMM SVM profiling event
 * Use separate thread to read event the same way as ROCr and ROCProfiler
 */
struct ReadEventThreadParams {
    int nodeid;
    HSAuint64 *pBuf;
    int BufSize;
    pthread_barrier_t *barrier;
};

unsigned int ReadSMIEventThread(void* p) {
    struct ReadEventThreadParams *pArgs = (struct ReadEventThreadParams *)p;
    char msg[HSA_SMI_EVENT_MSG_SIZE];
    struct pollfd fds = {0};
    HSAuint64 events;
    int fd;

    EXPECT_SUCCESS_GPU(hsaKmtOpenSMI(pArgs->nodeid, &fd), pArgs->nodeid);

    events = HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_INDEX_MAX) - 1;
    EXPECT_EQ_GPU(write(fd, &events, sizeof(events)), sizeof(events), pArgs->nodeid);

    pthread_barrier_wait(pArgs->barrier);

    fds.fd = fd;
    fds.events = POLLIN;
    EXPECT_GE(poll(&fds, 1, 1000), 0);

    memset(msg, 0, sizeof(msg));
    EXPECT_GE_GPU(read(fd, msg, HSA_SMI_EVENT_MSG_SIZE), 0, pArgs->nodeid);

    int event_id, pid, size, trigger, unused;
    unsigned int id;
    HSAuint64 timestamp;
    HSAuint64 addr;

    sscanf(msg, "%x", &event_id);

    /* check each possible response event message format */
    if (event_id == HSA_SMI_EVENT_MIGRATE_START) {
        /* the message is HSA_SMI_EVENT_MIGRATE_START */
        EXPECT_EQ_GPU(sscanf(msg + sizeof(event_id), "%ld -%d @%lx(%d) %d->%x %x:%d %d\n", &timestamp, &pid,
                     &addr, &size, &unused, &unused, &unused, &unused, &trigger), 9, pArgs->nodeid);
        EXPECT_EQ_GPU((HSAuint64 *)(addr << PAGE_SHIFT), pArgs->pBuf, pArgs->nodeid);
        EXPECT_EQ_GPU(size << PAGE_SHIFT, pArgs->BufSize, pArgs->nodeid);
        EXPECT_EQ_GPU(pid, getpid(), pArgs->nodeid);
        EXPECT_EQ_GPU(trigger, HSA_MIGRATE_TRIGGER_PREFETCH, pArgs->nodeid);

     }else if (event_id == HSA_SMI_EVENT_QUEUE_EVICTION) {
        /* the message is HSA_SMI_EVENT_QUEUE_EVICTION */
        EXPECT_EQ_GPU(sscanf(msg + sizeof(event_id), "%ld -%d %x %d\n",  &timestamp, &pid, &id, &trigger),
                      4, pArgs->nodeid);
        EXPECT_EQ_GPU(pid, getpid(), pArgs->nodeid);
        EXPECT_EQ_GPU(trigger, HSA_QUEUE_EVICTION_TRIGGER_SVM, pArgs->nodeid);

    } else if (event_id == HSA_SMI_EVENT_QUEUE_RESTORE) {
      /* the message is HSA_SMI_EVENT_QUEUE_RESTORE */
        EXPECT_EQ_GPU(sscanf(msg + sizeof(event_id), "%ld -%d %x\n", &timestamp, &pid, &id), 3, pArgs->nodeid);
        EXPECT_EQ_GPU(pid, getpid(), pArgs->nodeid);

    } else if (event_id == HSA_SMI_EVENT_UNMAP_FROM_GPU) {
        /* the message is HSA_SMI_EVENT_UNMAP_FROM_GPU */
        EXPECT_EQ_GPU(sscanf(msg + sizeof(event_id), "%ld -%d @%lx(%d) %x %d\n", &timestamp, &pid,
                      &addr, &size, &id, &trigger), 6, pArgs->nodeid);
        /* unmap address can be from different gpus */
        EXPECT_EQ_GPU(size << PAGE_SHIFT, pArgs->BufSize, pArgs->nodeid);
        EXPECT_EQ_GPU(pid, getpid(), pArgs->nodeid);
        EXPECT_EQ_GPU(trigger, HSA_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU, pArgs->nodeid);
    } else {
        WARN() << "HMMProfilingEvent failed on gpuNode: " <<  pArgs->nodeid << std::endl;
    }

    close(fd);
    return 0;
}

static void HMMProfilingEvent(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    if (pKFDSVMRangeTest->Get_Version()->KernelInterfaceMinorVersion < 10)
        return;

    const HsaNodeProperties *pNodeProperties =
        pKFDSVMRangeTest->Get_NodeInfo()->GetNodeProperties(gpuNode);
    if (pNodeProperties->Integrated) {
        LOG() << "Skipping test on APU." << std::endl;
        return;
    }

    if (!pKFDSVMRangeTest->GetVramSize(gpuNode)) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    if (pKFDSVMRangeTest->Get_NodeInfo()->IsAppAPU(gpuNode)) {
        LOG() << "Skipping test on AppAPU." << std::endl;
        return;
    }

    pthread_barrier_t barrier;
    ASSERT_SUCCESS(pthread_barrier_init(&barrier, NULL, 2));

    int BufSize = 16 << 10;
    HsaSVMRange SysBuffer(BufSize, gpuNode);
    HSAuint64 *pBuf = SysBuffer.As<HSAuint64 *>();

    struct ReadEventThreadParams pArgs = {gpuNode, pBuf, BufSize, &barrier};
    uint64_t threadId;
    ASSERT_EQ(true, StartThread(&ReadSMIEventThread, &pArgs, threadId));

    pthread_barrier_wait(&barrier);

    EXPECT_SUCCESS(SVMRangePrefetchToNode(pBuf, BufSize, gpuNode));

    WaitForThread(threadId);

}

TEST_P(KFDSVMRangeTest, HMMProfilingEvent) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(HMMProfilingEvent));

    TEST_END
}

/*
 * Test SVM support VRAM overcommitment
 *
 * Prefetch total VRAM size plus overCommitSize SVM range to VRAM. after VRAM is full,
 * KFD should support VRAM overcommitment by evicting SVM ranges to system memory to alloc
 * VRAM for new ranges.
 */
static void VramOvercommitTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test on gpuNode: No svm range support for family ID 0x" << gpuNode << m_FamilyId << "." << std::endl;
        return;
    }

    HSAuint64 vramSize = pKFDSVMRangeTest->GetVramSize(gpuNode);
    if (!vramSize) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    unsigned long overCommitSize = 1UL << 30;

    /* With XNACK off, KFD checks that all SVM memory will fit into system memory */
	if (!g_TestGPUsNum && vramSize + overCommitSize > pKFDSVMRangeTest->GetSysMemSize() / 2) {
        LOG() << "Skipping test: Not enough system memory." << std::endl;
        return;
	} else if (g_TestGPUsNum && g_TestGPUsNum *(vramSize + overCommitSize)
			    > pKFDSVMRangeTest->GetSysMemSize() / 2) {
        LOG() << "Skipping test: Not enough system memory." << std::endl;
        return;
	}

    unsigned long BufSize = 512UL << 20;
    unsigned long numBufs = (vramSize + overCommitSize) / BufSize;
    HSAKMT_STATUS ret;

    void *pBuf[numBufs];
    unsigned long i;

    for (i = 0; i < numBufs; i++) {
        pBuf[i] = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
        ASSERT_NE(MAP_FAILED, pBuf[i]);

        ret = RegisterSVMRange(gpuNode, pBuf[i], BufSize, gpuNode, 0);
        if (ret != HSAKMT_STATUS_SUCCESS)
            break;
    }

    EXPECT_EQ_GPU(numBufs, i, gpuNode);

    while (i--)
        munmap(pBuf[i], BufSize);

}

TEST_P(KFDSVMRangeTest, VramOvercommitTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(VramOvercommitTest));

    TEST_END
}

/*
 * Test SVM support VRAM overcommitment
 *
 * Prefetch giant overcommit SVM range to VRAM, KFD should support VRAM overcommitment
 * by spliting giant range into smaller ranges, evicting SVM ranges to system memory to
 * alloc VRAM for overcommitment ranges.
 */
TEST_P(KFDSVMRangeTest, VramOvercommitGiantRangeTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    if (!SVMAPISupported())
        return;

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test: No svm range support for family "
                             "ID 0x" << m_FamilyId << "." << std::endl;
        return;
    }

    HSAuint64 vramSize = GetVramSize(defaultGPUNode);
    if (!vramSize) {
        LOG() << "Skipping test: No VRAM found." << std::endl;
        return;
    }

    unsigned long overCommitSize = 1UL << 30;

    /* With XNACK off, KFD checks that all SVM memory will fit into system memory */
    if (vramSize + overCommitSize > GetSysMemSize() / 2) {
        LOG() << "Skipping test: no enough system memory." << std::endl;
        return;
    }

    unsigned long BufSize = vramSize + overCommitSize;
    HSAKMT_STATUS ret;
    void *pBuf;

    pBuf = mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    ASSERT_NE(MAP_FAILED, pBuf);

    ret = RegisterSVMRange(defaultGPUNode, pBuf, BufSize, defaultGPUNode, 0);
    EXPECT_EQ (HSAKMT_STATUS_SUCCESS, ret);

    munmap(pBuf, BufSize);
    TEST_END
}

/*
 * Test partial range prefault
 *
 * mmap alloc 4 pages range, memset middle 2 pages, prefetch entire range to VRAM,
 * use sdma to memset the rest 2 pages, each page has different value 0x1, 0x2, 0x3, 0x4
 * then check if all page have the specific value after migrating 4 pages to system memory.
 */
static void PrefaultPartialRangeTest(KFDTEST_PARAMETERS* pTestParamters) {

    int gpuNode = pTestParamters->gpuNode;
    KFDSVMRangeTest* pKFDSVMRangeTest = (KFDSVMRangeTest*)pTestParamters->pTestObject;

    if (!pKFDSVMRangeTest->SVMAPISupported_GPU(gpuNode))
        return;

    unsigned int m_FamilyId = pKFDSVMRangeTest->GetFamilyIdFromNodeId(gpuNode);
    if (m_FamilyId < FAMILY_AI) {
        LOG() << std::hex << "Skipping test on gpuNode: No svm range support for family ID 0x" << gpuNode << m_FamilyId << "." << std::endl;
        return;
    }

    unsigned long BufSize = 4 * PAGE_SIZE;
    HSAKMT_STATUS ret;
    char *pBuf;

    pBuf = (char *)mmap(0, BufSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    ASSERT_NE_GPU(MAP_FAILED, pBuf, gpuNode);

    memset(pBuf + PAGE_SIZE, 0x2, PAGE_SIZE);
    memset(pBuf + 2 * PAGE_SIZE, 0x3, PAGE_SIZE);

    EXPECT_SUCCESS_GPU(RegisterSVMRange(gpuNode, pBuf, BufSize, 0, 0), gpuNode);
    EXPECT_SUCCESS_GPU(SVMRangePrefetchToNode(pBuf, BufSize, gpuNode), gpuNode);

    SDMAQueue sdmaQueue;
    EXPECT_SUCCESS_GPU(sdmaQueue.Create(gpuNode), gpuNode);

    sdmaQueue.PlaceAndSubmitPacket(SDMAFillDataPacket(sdmaQueue.GetFamilyId(),
                       pBuf, 0x01010101, PAGE_SIZE));
    sdmaQueue.PlaceAndSubmitPacket(SDMAFillDataPacket(sdmaQueue.GetFamilyId(),
                       pBuf + 3 * PAGE_SIZE, 0x04040404, PAGE_SIZE));
    sdmaQueue.Wait4PacketConsumption();

    EXPECT_SUCCESS_GPU(sdmaQueue.Destroy(), gpuNode);

    for (int i = 0; i < 4; i++)
        EXPECT_EQ_GPU(pBuf[i * PAGE_SIZE], i + 1, gpuNode);

    munmap(pBuf, BufSize);

}

TEST_P(KFDSVMRangeTest, PrefaultPartialRangeTest) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    ASSERT_SUCCESS(KFDTest_Launch(PrefaultPartialRangeTest));

    TEST_END
}

INSTANTIATE_TEST_CASE_P(, KFDSVMRangeTest,::testing::Values(0, 1));


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDSVMRangeTest.hpp
================================================
/*
 * Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_SVMRANGE_TEST__H__
#define __KFD_SVMRANGE_TEST__H__

#include <gtest/gtest.h>

#include "KFDBaseComponentTest.hpp"

class KFDSVMRangeTest : public KFDBaseComponentTest,
                        public ::testing::WithParamInterface<int> {
 public:
    KFDSVMRangeTest() {}
    ~KFDSVMRangeTest() {}
    void SplitRangeTest(int defaultGPUNode, int prefetch_location);

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __KFD_LOCALMEMORY_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTestFlags.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_TEST_FLAGS__H__
#define __KFD_TEST_FLAGS__H__

extern unsigned int g_TestRunProfile;
extern unsigned int g_TestENVCaps;
extern unsigned int g_TestTimeOut;
extern int g_TestNodeId;
extern int g_TestDstNodeId;
extern bool g_IsChildProcess;
extern bool g_IsEmuMode;

// Each test should call TEST_START with the test custom profile and HW scheduling
enum TESTPROFILE{
    TESTPROFILE_DEV =          0x1,
    TESTPROFILE_PROMO =    0x2,
    // 0x4 - 0x8000 - unused flags
    // Can add any flag that will mark only part of the tests to run
    TESTPROFILE_RUNALL = 0xFFFF
};

enum ENVCAPS{
    ENVCAPS_NOADDEDCAPS    =  0x0,
    ENVCAPS_HWSCHEDULING   =  0x1,
    ENVCAPS_16BITPASID             =  0x2,
    ENVCAPS_32BITLINUX              =  0x4,
    ENVCAPS_64BITLINUX              =  0x8
    // 0x8 - 0x8000 - unused flags
    // Can add any flag that will mark specific hw limitation or capability
};

enum KfdFamilyId {
    FAMILY_UNKNOWN = 0,
    FAMILY_CI,    // Sea Islands: Hawaii (P), Maui (P), Bonaire (M)
    FAMILY_KV,    // Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi
    FAMILY_VI,    // Volcanic Islands: Iceland (V), Tonga (M)
    FAMILY_CZ,    // Carrizo, Nolan, Amur
    FAMILY_AI,    // Arctic Islands
    FAMILY_RV,    // Raven
    FAMILY_AR,    // Arcturus
    FAMILY_AL,    // Aldebaran
    FAMILY_AV,    // Aqua Vanjaram
    FAMILY_NV,    // Navi10
    FAMILY_GFX11, // GFX11
    FAMILY_GFX12, // GFX12
};

#endif  //  __KFD_TEST_FLAGS__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTestMain.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "gtest/gtest.h"
#include "KFDTestFlags.hpp"
#include "KFDTestUtil.hpp"
#include "GoogleTestExtension.hpp"
#include "OSWrapper.hpp"
#include "Assemble.hpp"

#define KFD_TEST_DEFAULT_TIMEOUT 60000

std::ostream& operator << (std::ostream& out, TESTPROFILE profile) {
    switch (profile) {
    case TESTPROFILE_DEV:
        out << "Developer Test";
        break;
    case TESTPROFILE_PROMO:
        out << "Promotion Test";
        break;
    case TESTPROFILE_RUNALL:
        out << "Full Test";
        break;
    default:
        out << "INVALID";
    }

    return out;
}

unsigned int g_TestGPUsNum ;
unsigned int g_TestRunProfile;
unsigned int g_TestENVCaps;
unsigned int g_TestTimeOut;
int g_TestNodeId;
int g_TestDstNodeId;
bool g_IsChildProcess;
bool g_IsEmuMode;
unsigned int g_SleepTime;
unsigned int g_TestGPUFamilyId;
class KFDBaseComponentTest *g_baseTest;

GTEST_API_ int main(int argc, char **argv) {
    // Default values for run parameters
    g_TestRunProfile = TESTPROFILE_RUNALL;
    g_TestENVCaps = ENVCAPS_NOADDEDCAPS | ENVCAPS_64BITLINUX;
    g_TestTimeOut = KFD_TEST_DEFAULT_TIMEOUT;

    testing::InitGoogleTest(&argc, argv);

    CommandLineArguments args;
    memset(&args, 0, sizeof(args));

    bool success = GetCommandLineArguments(argc, argv, args);

    if (success) {
        int r;
        if ((GetHwCapabilityHWS() || args.HwsEnabled == HWCAP__FORCE_ENABLED) &&
                (args.HwsEnabled != HWCAP__FORCE_DISABLED))
            g_TestENVCaps |= ENVCAPS_HWSCHEDULING;

        g_TestRunProfile = args.TestProfile;
        g_IsChildProcess = args.ChildProcess;

        if ( args.TimeOut > 0 )
            g_TestTimeOut = args.TimeOut;

        g_SleepTime = 0x00;
        if (args.SleepTime > 0) {
            g_SleepTime = args.SleepTime;
        }

        // If --node is not specified, then args.NodeId == -1
        g_TestNodeId = args.NodeId;
        g_TestDstNodeId = args.DstNodeId;

        g_IsEmuMode = CheckEmuModeEnabled();

        LOG() << "Profile: " << (TESTPROFILE)g_TestRunProfile << std::endl;
        LOG() << "HW capabilities: 0x" << std::hex << g_TestENVCaps << std::endl;
        if (g_IsEmuMode)
        {
            LOG() << "Emulation Mode Enabled" << std::endl;
        }

        if (g_SleepTime > 0) {
            LOG() << "Sleep time in seconds as specified by user: " << std::dec << g_SleepTime << std::endl;
        }

        char *testGPUsNum = NULL;
        /* if HSA_TEST_GPUS_NUM is defined use it, otherwise test on 1 gpu */
        testGPUsNum = getenv("HSA_TEST_GPUS_NUM");
        if (testGPUsNum)
            g_TestGPUsNum = std::max(1, atoi(testGPUsNum));
        else
            g_TestGPUsNum = 1;

        /* init LLVM one time*/
        Init_LLVM();

        r = RUN_ALL_TESTS();

        /* shutdown LLVM after tests finish */
        Shutdown_LLVM();

        LOG() << "kfdtest finished with return code: " << r << std::endl;
        return r;
    }
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTestUtil.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDTestUtil.hpp"
#include <stdlib.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <algorithm>
#include <vector>
#include "BaseQueue.hpp"
#include "Dispatch.hpp"
#include "SDMAPacket.hpp"

void WaitUntilInput() {
    char dummy;
    printf("Press enter to continue: ");
    do {
        scanf("%c", &dummy);
    } while (dummy != 10); // enter key's ascii value is 10
}

/* fscanf_dec - read a file whose content is a decimal number
 *      @file [IN ] file to read
 *      @num [OUT] number in the file
 *
 * It is copied from the same function in libhsakmt
 */
HSAKMT_STATUS fscanf_dec(const char *file, uint32_t *num)
{
    FILE *fd;
    HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;

    fd = fopen(file, "r");
    if (!fd) {
        LOG() << "Failed to open " << file << std::endl;
        return HSAKMT_STATUS_INVALID_PARAMETER;
    }
    if (fscanf(fd, "%u", num) != 1) {
        LOG() << "Failed to parse as a decimal: " << file << std::endl;;
        ret = HSAKMT_STATUS_ERROR;
    }

    fclose(fd);
    return ret;
}

uint64_t RoundToPowerOf2(uint64_t val) {
    val--;

    /* Shift with amount larger than the bit width can result in
     * undefined behavior by compiler for release builds.
     * Shift till 32 bit only which is less than bit width of val.
     */
    for (int i = 1; i <= 32; i *= 2)
        val |= val >> i;

    val++;

    return val;
}

bool WaitOnValue(const volatile unsigned int *buf, unsigned int value, unsigned int timeOut) {
    while (timeOut > 0 && *buf != value) {
        Delay(1);

        if (timeOut != HSA_EVENTTIMEOUT_INFINITE)
            timeOut--;
    }

    return *buf == value;
}

void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart) {
    rLoPart = static_cast<unsigned int>(value);
    rHiPart = static_cast<unsigned int>(value >> 32);
}

bool CheckEmuModeEnabled()
{
    uint32_t emu_mode = false;
    fscanf_dec("/sys/module/amdgpu/parameters/emu_mode", &emu_mode);
    return (emu_mode != 0);
}

bool GetHwCapabilityHWS() {
    unsigned int value = 0;
    bool valExists = ReadDriverConfigValue(CONFIG_HWS, value);

    /* HWS is enabled by default */
    return ( (!valExists) || ( value > 0));
}

HSAKMT_STATUS CreateQueueTypeEvent(
    bool                ManualReset,            // IN
    bool                IsSignaled,             // IN
    unsigned int        NodeId,                 // IN
    HsaEvent**          Event                   // OUT
    ) {
    HsaEventDescriptor Descriptor;

// TODO: Create per-OS header with this sort of definitions
#ifdef _WIN32
    Descriptor.EventType = HSA_EVENTTYPE_QUEUE_EVENT;
#else
    Descriptor.EventType = HSA_EVENTTYPE_SIGNAL;
#endif
    Descriptor.SyncVar.SyncVar.UserData = (void*)0xABCDABCD;
    Descriptor.NodeId = NodeId;

    return hsaKmtCreateEvent(&Descriptor, ManualReset, IsSignaled, Event);
}

HSAKMT_STATUS CreateHWExceptionEvent(
    bool                ManualReset,            // IN
    bool                IsSignaled,             // IN
    unsigned int        NodeId,                 // IN
    HsaEvent**          Event                   // OUT
    ) {
    HsaEventDescriptor Descriptor;

    Descriptor.EventType = HSA_EVENTTYPE_HW_EXCEPTION;
    Descriptor.SyncVar.SyncVar.UserData = (void*)0xABCDABCD;
    Descriptor.NodeId = NodeId;

    return hsaKmtCreateEvent(&Descriptor, ManualReset, IsSignaled, Event);
}

static bool hsakmt_is_dgpu_dev = false;

bool hsakmt_is_dgpu() {
    return hsakmt_is_dgpu_dev;
}

bool hasPciAtomicsSupport(int node) {
    /* If we can't get Node Properties, assume a lack of Atomics support */
    HsaNodeProperties *pNodeProperties = new HsaNodeProperties();
    if (hsaKmtGetNodeProperties(node, pNodeProperties)) {
        LOG() << "Unable to get Node Properties for node " << node << std::endl;
        return false;
    }

    /* APUs don't have IO Links, but support Atomic Ops by default */
    if (pNodeProperties->NumCPUCores && pNodeProperties->NumFComputeCores)
        return true;

    HsaIoLinkProperties *IolinkProperties = new HsaIoLinkProperties[pNodeProperties->NumIOLinks];
    if (hsaKmtGetNodeIoLinkProperties(node, pNodeProperties->NumIOLinks, IolinkProperties)) {
        LOG() << "Unable to get Node IO Link Information for node " << node << std::endl;
        return false;
    }

    /* Make sure we're checking GPU-to-CPU connection here */
    for (int linkId = 0; linkId < pNodeProperties->NumIOLinks; linkId++) {
        /* Make sure it's a CPU */
        HsaNodeProperties *linkProps = new HsaNodeProperties();
        if (hsaKmtGetNodeProperties(IolinkProperties[linkId].NodeTo, linkProps)) {
            LOG() << "Unable to get connected device's IO Link information" << std::endl;
            return false;
        }
        if (linkProps->NumCPUCores) {
            /* IOLink flags are only valid if Override flag is set */
            return (IolinkProperties[linkId].Flags.ui32.Override &&
                   !IolinkProperties[linkId].Flags.ui32.NoAtomics32bit &&
                   !IolinkProperties[linkId].Flags.ui32.NoAtomics64bit);
        }
    }

    return false;
}

unsigned int FamilyIdFromNode(const HsaNodeProperties *props) {
    unsigned int familyId = FAMILY_UNKNOWN;

    switch (props->EngineId.ui32.Major) {
    case 7:
        if (props->EngineId.ui32.Minor == 0) {
            if (props->EngineId.ui32.Stepping == 0)
                familyId = FAMILY_KV;
            else
                familyId = FAMILY_CI;
        }
        break;
    case 8:
        familyId = FAMILY_VI;
        if (props->EngineId.ui32.Stepping == 1)
            familyId = FAMILY_CZ;
        break;
    case 9:
        familyId = FAMILY_AI;
        if (props->EngineId.ui32.Minor >= 4)
            familyId = FAMILY_AV;
        else if (props->EngineId.ui32.Stepping == 2)
            familyId = FAMILY_RV;
        else if (props->EngineId.ui32.Stepping == 8)
            familyId = FAMILY_AR;
        else if (props->EngineId.ui32.Stepping == 10)
            familyId = FAMILY_AL;
        break;
    case 10:
        familyId = FAMILY_NV;
        break;
    case 11:
        familyId = FAMILY_GFX11;
        break;
    case 12:
        familyId = FAMILY_GFX12;
	break;
    }

    if (props->NumCPUCores && props->NumFComputeCores)
        hsakmt_is_dgpu_dev = false;
    else
        hsakmt_is_dgpu_dev = true;

    return familyId;
}

void GetHwQueueInfo(const HsaNodeProperties *props,
                 unsigned int *p_num_cp_queues,
                 unsigned int *p_num_sdma_engines,
                 unsigned int *p_num_sdma_xgmi_engines,
                 unsigned int *p_num_sdma_queues_per_engine) {
    if (p_num_sdma_engines)
        *p_num_sdma_engines = props->NumSdmaEngines;

    if (p_num_sdma_xgmi_engines)
        *p_num_sdma_xgmi_engines = props->NumSdmaXgmiEngines;

    if (p_num_sdma_queues_per_engine)
        *p_num_sdma_queues_per_engine = props->NumSdmaQueuesPerEngine;

    if (p_num_cp_queues)
        *p_num_cp_queues = props->NumCpQueues;
}

bool isTonga(const HsaNodeProperties *props) {
    /* Tonga has some workarounds in the thunk that cause certain failures */
    if (props->EngineId.ui32.Major == 8 && props->EngineId.ui32.Stepping == 2) {
        return true;
    }

    return false;
}

const uint32_t GetGfxVersion(const HsaNodeProperties *props) {
    return ((props->EngineId.ui32.Major << 16) |
            (props->EngineId.ui32.Minor <<  8) |
            (props->EngineId.ui32.Stepping));
}

HSAuint64 GetSystemTickCountInMicroSec() {
    struct timeval t;
    gettimeofday(&t, 0);
    return t.tv_sec * 1000000ULL + t.tv_usec;
}

const HsaMemoryBuffer HsaMemoryBuffer::Null;

HsaMemoryBuffer::HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero, bool isLocal, bool isExec,
                                 bool isScratch, bool isReadOnly, bool isUncached, bool NonPaged)
    :m_Size(size),
    m_pUser(NULL),
    m_pBuf(NULL),
    m_Local(isLocal),
    m_Node(node) {
    m_Flags.Value = 0;

    HsaMemMapFlags mapFlags = {0};
    bool map_specific_gpu = (node && !isScratch);

    if (isScratch) {
        m_Flags.ui32.Scratch = 1;
        m_Flags.ui32.HostAccess = 1;
    } else {
        m_Flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;

        if (isLocal) {
            m_Flags.ui32.HostAccess = 0;
            m_Flags.ui32.NonPaged = 1;
            m_Flags.ui32.CoarseGrain = 1;
            EXPECT_EQ(isUncached, 0) << "Uncached flag is relevant only for system or host memory";
        } else {
            m_Flags.ui32.HostAccess = 1;
            m_Flags.ui32.NonPaged = NonPaged ? 1 : 0;
            m_Flags.ui32.CoarseGrain = 0;
            m_Flags.ui32.NoNUMABind = 1;
            m_Flags.ui32.Uncached = isUncached;
        }

        if (isExec)
            m_Flags.ui32.ExecuteAccess = 1;
    }
    if (isReadOnly)
        m_Flags.ui32.ReadOnly = 1;

    if (zero)
        EXPECT_EQ(m_Flags.ui32.HostAccess, 1);

    EXPECT_SUCCESS(hsaKmtAllocMemory(m_Node, m_Size, m_Flags, &m_pBuf));
    if (hsakmt_is_dgpu()) {
        if (map_specific_gpu)
            EXPECT_SUCCESS(hsaKmtMapMemoryToGPUNodes(m_pBuf, m_Size, NULL, mapFlags, 1, &m_Node));
        else
            EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(m_pBuf, m_Size, NULL));
        m_MappedNodes = 1 << m_Node;
    }

    if (zero && !isLocal)
        Fill(0);
}

HsaMemoryBuffer::HsaMemoryBuffer(void *addr, HSAuint64 size):
    m_Size(size),
    m_pUser(addr),
    m_pBuf(NULL),
    m_Local(false),
    m_Node(0) {
    HSAuint64 gpuva = 0;
    EXPECT_SUCCESS(hsaKmtRegisterMemory(m_pUser, m_Size));
    EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(m_pUser, m_Size, &gpuva));
    m_pBuf = gpuva ? (void *)gpuva : m_pUser;
}

HsaMemoryBuffer::HsaMemoryBuffer()
    :m_Size(0),
    m_pBuf(NULL) {
}

void HsaMemoryBuffer::Fill(unsigned char value, HSAuint64 offset, HSAuint64 size) {
    HSAuint32 uiValue;

    EXPECT_EQ(m_Local, 0) << "Local Memory. Call Fill(HSAuint32 value, BaseQueue& baseQueue)";

    size = size ? size : m_Size;
    ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;

    if (m_pUser != NULL)
        memset(reinterpret_cast<char *>(m_pUser) + offset, value, size);
    else if (m_pBuf != NULL)
        memset(reinterpret_cast<char *>(m_pBuf) + offset, value, size);
    else
        ASSERT_TRUE(0) << "Invalid HsaMemoryBuffer";
}

/* Fill CPU accessible buffer with the value. */
void HsaMemoryBuffer::Fill(HSAuint32 value, HSAuint64 offset, HSAuint64 size) {
    HSAuint64 i;
    HSAuint32 *ptr = NULL;

    EXPECT_EQ(m_Local, 0) << "Local Memory. Call Fill(HSAuint32 value, BaseQueue& baseQueue)";
    size = size ? size : m_Size;
    EXPECT_EQ((size & (sizeof(HSAuint32) - 1)), 0) << "Not word aligned. Call Fill(unsigned char)";
    ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;

    if (m_pUser != NULL)
        ptr = reinterpret_cast<HSAuint32 *>(reinterpret_cast<char *>(m_pUser) + offset);
    else if (m_pBuf != NULL)
        ptr = reinterpret_cast<HSAuint32 *>(reinterpret_cast<char *>(m_pBuf) + offset);

    ASSERT_NOTNULL(ptr);

    for (i = 0; i < size / sizeof(HSAuint32); i++)
        ptr[i] = value;
}

/* Fill GPU only accessible Local memory with @value using SDMA Constant Fill Command */
void HsaMemoryBuffer::Fill(HSAuint32 value, BaseQueue& baseQueue, HSAuint64 offset, HSAuint64 size) {
    HsaEvent* event = NULL;

    EXPECT_NE(m_Local, 0) << "Not Local Memory. Call Fill(HSAuint32 value)";

    ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_Node, &event));
    ASSERT_EQ(baseQueue.GetQueueType(), HSA_QUEUE_SDMA) << "Only SDMA queues supported";

    size = size ? size : m_Size;
    ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;

    baseQueue.PlacePacket(SDMAFillDataPacket(baseQueue.GetFamilyId(),
                                (reinterpret_cast<void *>(this->As<char*>() + offset)), value, size));
    baseQueue.PlacePacket(SDMAFencePacket(baseQueue.GetFamilyId(),
                                reinterpret_cast<void*>(event->EventData.HWData2), event->EventId));
    baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));
    EXPECT_SUCCESS(hsaKmtWaitOnEvent(event, g_TestTimeOut));

    hsaKmtDestroyEvent(event);
}

/* Check if HsaMemoryBuffer[location] has the pattern specified.
 * Return TRUE if correct pattern else return FALSE
 * HsaMemoryBuffer has to be CPU accessible
 */
bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern) {
    HSAuint32 *ptr = NULL;

    EXPECT_EQ(m_Local, 0) << "Local Memory. Call IsPattern(..baseQueue& baseQueue)";

    if (location >= m_Size) /* Out of bounds */
        return false;

    if (m_pUser != NULL)
        ptr = reinterpret_cast<HSAuint32 *>(m_pUser);
    else if (m_pBuf != NULL)
        ptr = reinterpret_cast<HSAuint32 *>(m_pBuf);
    else
        return false;

    if (ptr)
        return (ptr[location/sizeof(HSAuint32)] == pattern);

    return false;
}

/* Check if HsaMemoryBuffer[location] has the pattern specified.
 * Return TRUE if correct pattern else return FALSE
 * HsaMemoryBuffer is supposed to be only GPU accessible
 * Use @baseQueue to copy the HsaMemoryBuffer[location] to stack and check the value
 */

bool HsaMemoryBuffer::IsPattern(HSAuint64 location, HSAuint32 pattern, BaseQueue& baseQueue, volatile HSAuint32 *tmp) {
    HsaEvent* event = NULL;
    int ret;

    EXPECT_NE(m_Local, 0) << "Not Local Memory. Call IsPattern(HSAuint64 location, HSAuint32 pattern)";
    EXPECT_EQ(baseQueue.GetQueueType(), HSA_QUEUE_SDMA) << "Only SDMA queues supported";

    if (location >= m_Size) /* Out of bounds */
        return false;

    ret = CreateQueueTypeEvent(false, false, m_Node, &event);
    if (ret)
        return false;

    *tmp = ~pattern;
    baseQueue.PlacePacket(SDMACopyDataPacket(baseQueue.GetFamilyId(), (void *)tmp,
            reinterpret_cast<void *>(this->As<HSAuint64>() + location),
            sizeof(HSAuint32)));
    baseQueue.PlacePacket(SDMAFencePacket(baseQueue.GetFamilyId(), reinterpret_cast<void*>(event->EventData.HWData2),
            event->EventId));
    baseQueue.PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));

    ret = hsaKmtWaitOnEvent(event, g_TestTimeOut);
    hsaKmtDestroyEvent(event);
    if (ret)
        return false;

    return WaitOnValue(tmp, pattern);
}

unsigned int HsaMemoryBuffer::Size() {
    return m_Size;
}

HsaMemFlags HsaMemoryBuffer::Flags() {
    return m_Flags;
}

unsigned int HsaMemoryBuffer::Node() const {
    return m_Node;
}

int HsaMemoryBuffer::MapMemToNodes(unsigned int *nodes, unsigned int nodes_num) {
    HsaMemMapFlags mapFlags = {0};
    int ret, bit;

    ret = hsaKmtMapMemoryToGPUNodes(m_pBuf, m_Size, NULL, mapFlags, nodes_num, nodes);
    if (ret != 0) {
        return ret;
    }

    for (unsigned int i = 0; i < nodes_num; i++) {
        bit = 1 << nodes[i];
        m_MappedNodes |= bit;
    }

    return 0;
}

int HsaMemoryBuffer::UnmapMemToNodes(unsigned int *nodes, unsigned int nodes_num) {
    int ret, bit;

    ret = hsaKmtUnmapMemoryToGPU(m_pBuf);
    if (ret)
        return ret;

    for (unsigned int i = 0; i < nodes_num; i++) {
        bit = 1 << nodes[i];
        m_MappedNodes &= ~bit;
    }

    return 0;
}

void HsaMemoryBuffer::UnmapAllNodes() {
    unsigned int *Arr, size, i, j;
    int bit;

    size = 0;
    for (i = 0; i < 8; i++) {
        bit = 1 << i;
        if (m_MappedNodes & bit)
            size++;
    }

    Arr = (unsigned int *)malloc(sizeof(unsigned int) * size);
    if (!Arr)
        return;

    for (i = 0, j =0; i < 8; i++) {
        bit = 1 << i;
        if (m_MappedNodes & bit)
            Arr[j++] = i;
    }

    /*
     * TODO: When thunk is updated, use hsaKmtRegisterToNodes. Then nodes will be used
     */
    hsaKmtUnmapMemoryToGPU(m_pBuf);

    m_MappedNodes = 0;

    free(Arr);
}

HsaMemoryBuffer::~HsaMemoryBuffer() {
    if (m_pUser != NULL) {
        hsaKmtUnmapMemoryToGPU(m_pUser);
        hsaKmtDeregisterMemory(m_pUser);
    } else if (m_pBuf != NULL) {
        if (hsakmt_is_dgpu()) {
            if (m_MappedNodes) {
                hsaKmtUnmapMemoryToGPU(m_pBuf);
            }
        }
        hsaKmtFreeMemory(m_pBuf, m_Size);
    }
    m_pBuf = NULL;
}

HsaInteropMemoryBuffer::HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle,
                                               HSAuint64 size, unsigned int node)
    :m_Size(0),
     m_pBuf(NULL),
     m_graphic_handle(0),
     m_Node(node) {
    HSAuint64 flat_address;
    EXPECT_SUCCESS(hsaKmtMapGraphicHandle(m_Node, device_handle, buffer_handle, 0, size, &flat_address));
    m_pBuf = reinterpret_cast<void*>(flat_address);
}

HsaInteropMemoryBuffer::~HsaInteropMemoryBuffer() {
    hsaKmtUnmapGraphicHandle(m_Node, (HSAuint64)m_pBuf, m_Size);
}


HsaNodeInfo::HsaNodeInfo() {
}

/* Init - Get and store information about all the HSA nodes from the Thunk Library.
 * @NumOfNodes - Number to system nodes returned by hsaKmtAcquireSystemProperties
 * @Return - false: if no node information is available
 */
bool HsaNodeInfo::Init(int NumOfNodes) {
    HsaNodeProperties *nodeProperties;
    _HSAKMT_STATUS status;
    bool ret = false;

    for (int i = 0; i < NumOfNodes; i++) {
        nodeProperties = new HsaNodeProperties();

        status = hsaKmtGetNodeProperties(i, nodeProperties);
        /* This is not a fatal test (not using assert), since even when it fails for one node
         * we want to get information regarding others.
         */
        EXPECT_SUCCESS(status) << "Node index: " << i << "hsaKmtGetNodeProperties returned status " << status;

        if (status == HSAKMT_STATUS_SUCCESS) {
            m_HsaNodeProps.push_back(nodeProperties);
            ret = true;  // Return true if atleast one information is available

            if (nodeProperties->NumFComputeCores)
                m_NodesWithGPU.push_back(i);
            else
                m_NodesWithoutGPU.push_back(i);
        } else {
            delete nodeProperties;
        }
    }

    return ret;
}

void HsaNodeInfo::Delete() {
    const HsaNodeProperties *nodeProperties;

    for (unsigned int i = 0; i < m_HsaNodeProps.size(); i++)
        delete m_HsaNodeProps.at(i);

    m_HsaNodeProps.clear();
    m_NodesWithGPU.clear();
    m_NodesWithoutGPU.clear();
}

HsaNodeInfo::~HsaNodeInfo() {
    Delete();
}

const std::vector<int>& HsaNodeInfo::GetNodesWithGPU() const {
    return m_NodesWithGPU;
}

const HsaNodeProperties* HsaNodeInfo::GetNodeProperties(int NodeNum) const {
    return m_HsaNodeProps.at(NodeNum);
}

const int HsaNodeInfo::HsaGPUindexFromGpuNode(int gpuNodeId) const {
    if (m_NodesWithGPU.size() == 0)
        return -1;

    for (unsigned int i = 0; i < m_NodesWithGPU.size(); i++) {
        if (gpuNodeId == m_NodesWithGPU.at(i))
            return i;
    }

    return -1;
}

const HsaNodeProperties* HsaNodeInfo::HsaDefaultGPUNodeProperties() const {
    int NodeNum = HsaDefaultGPUNode();
    if (NodeNum < 0)
        return NULL;
    return GetNodeProperties(NodeNum);
}

const int HsaNodeInfo::HsaDefaultGPUNode() const {
    if (m_NodesWithGPU.size() == 0)
        return -1;

    if (g_TestNodeId >= 0) {
        // Check if this is a valid Id, if so use this else use first available
        for (unsigned int i = 0; i < m_NodesWithGPU.size(); i++) {
            if (g_TestNodeId == m_NodesWithGPU.at(i))
                return g_TestNodeId;
        }
    }

    return m_NodesWithGPU.at(0);
}

void HsaNodeInfo::PrintNodeInfo() const {
    const HsaNodeProperties *nodeProperties;

    for (unsigned int i = 0; i < m_HsaNodeProps.size(); i++) {
        nodeProperties = m_HsaNodeProps.at(i);

        LOG() << "***********************************" << std::endl;
        LOG() << "Node " << i << std::endl;
        LOG() << "NumCPUCores=\t" << nodeProperties->NumCPUCores << std::endl;
        LOG() << "NumFComputeCores=\t" << nodeProperties->NumFComputeCores << std::endl;
        LOG() << "NumMemoryBanks=\t" << nodeProperties->NumMemoryBanks << std::endl;
        LOG() << "VendorId=\t" << nodeProperties->VendorId << std::endl;
        LOG() << "DeviceId=\t" << nodeProperties->DeviceId << std::endl;
        LOG() << "***********************************" << std::endl;
    }

    LOG() << "Default GPU NODE " << HsaDefaultGPUNode() << std::endl;
}

const bool HsaNodeInfo::IsGPUNodeLargeBar(int node) const {
    const HsaNodeProperties *pNodeProperties;

    pNodeProperties = GetNodeProperties(node);
    if (pNodeProperties) {
        HsaMemoryProperties *memoryProperties =
                new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
        EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(node,
                       pNodeProperties->NumMemoryBanks, memoryProperties));
        for (unsigned bank = 0; bank < pNodeProperties->NumMemoryBanks; bank++)
            if (memoryProperties[bank].HeapType ==
                                HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC) {
                delete [] memoryProperties;
                return true;
            }
        delete [] memoryProperties;
    }

    return false;
}

const bool HsaNodeInfo::IsAppAPU(int node) const {
    const HsaNodeProperties *pNodeProperties = GetNodeProperties(node);

    /*  CPU with compute cores is small APU, not AppAPU */
    if (pNodeProperties->NumCPUCores && pNodeProperties->NumFComputeCores)
        return false;

    HsaIoLinkProperties *IolinkProperties = new HsaIoLinkProperties[pNodeProperties->NumIOLinks];
    if (hsaKmtGetNodeIoLinkProperties(node, pNodeProperties->NumIOLinks, IolinkProperties)) {
        LOG() << "Unable to get Node IO Link Information for node " << node << std::endl;
        delete [] IolinkProperties;
        return false;
    }

    /* Checking GPU-to-CPU connection weight */
    for (int linkId = 0; linkId < pNodeProperties->NumIOLinks; linkId++) {
        HsaNodeProperties linkProps;

        if (hsaKmtGetNodeProperties(IolinkProperties[linkId].NodeTo, &linkProps)) {
            LOG() << "Unable to get connected device's IO Link information" << std::endl;
            break;
        }

        /* If it's GPU-CPU link with connection weight KFD_CRAT_INTRA_SOCKET_WEIGHT 13 */
        if (linkProps.NumCPUCores && IolinkProperties[linkId].Weight == 13) {
            delete [] IolinkProperties;
            return true;
        }
    }
    delete [] IolinkProperties;
    return false;
}

const bool HsaNodeInfo::IsPeerAccessibleByNode(int peer, int node) const {
    const HsaNodeProperties *pNodeProperties;

    pNodeProperties = GetNodeProperties(node);
    if (pNodeProperties) {
        HsaIoLinkProperties p2pLinksProperties[pNodeProperties->NumIOLinks];
        EXPECT_SUCCESS(hsaKmtGetNodeIoLinkProperties(node,
					pNodeProperties->NumIOLinks, p2pLinksProperties));

        for (unsigned link = 0; link < pNodeProperties->NumIOLinks; link++)
            if (p2pLinksProperties[link].NodeTo == peer)
                return true;
    }

    return false;
}

const int HsaNodeInfo::FindLargeBarGPUNode() const {
    const std::vector<int> gpuNodes = GetNodesWithGPU();

    for (unsigned i = 0; i < gpuNodes.size(); i++)
        if (IsGPUNodeLargeBar(gpuNodes.at(i)))
            return gpuNodes.at(i);

    return -1;
}

const bool HsaNodeInfo::AreGPUNodesXGMI(int node0, int node1) const {
    const HsaNodeProperties *pNodeProperties0 = GetNodeProperties(node0);
    const HsaNodeProperties *pNodeProperties1 = GetNodeProperties(node1);

    if ((pNodeProperties0->HiveID != 0) && (pNodeProperties1->HiveID != 0) &&
        (pNodeProperties0->HiveID == pNodeProperties1->HiveID))
        return true;

    return false;
}

int HsaNodeInfo::FindAccessiblePeers(std::vector<int> *peers,
		                             HSAuint32 node) const {
    peers->push_back(node);

    for (unsigned i = 0; i < m_NodesWithGPU.size(); i++) {
        if (m_NodesWithGPU.at(i) == node)
            continue;

        if (IsPeerAccessibleByNode(m_NodesWithGPU.at(i), node))
            peers->push_back(m_NodesWithGPU.at(i));
    }
    return peers->size();
}

const bool HsaNodeInfo::IsNodeXGMItoCPU(int node) const {
    const HsaNodeProperties *pNodeProperties;
    bool ret = false;

    pNodeProperties = GetNodeProperties(node);
    if (pNodeProperties && pNodeProperties->NumIOLinks) {
        HsaIoLinkProperties  *IolinkProperties =  new HsaIoLinkProperties[pNodeProperties->NumIOLinks];
        EXPECT_SUCCESS(hsaKmtGetNodeIoLinkProperties(node, pNodeProperties->NumIOLinks, IolinkProperties));

        for (int linkId = 0; linkId < pNodeProperties->NumIOLinks; linkId++) {
            EXPECT_EQ(node, IolinkProperties[linkId].NodeFrom);
            const HsaNodeProperties *pNodeProperties0 =
                    GetNodeProperties(IolinkProperties[linkId].NodeTo);
            if (pNodeProperties0->NumFComputeCores == 0 &&
                    IolinkProperties[linkId].IoLinkType == HSA_IOLINK_TYPE_XGMI)
                ret = true;
        }
        delete [] IolinkProperties;
    }

    return ret;
}

HSAKMT_STATUS RegisterSVMRange(HSAuint32 GPUNode, void *MemoryAddress,
                               HSAuint64 SizeInBytes, HSAuint32 PrefetchNode,
                               HSAuint32 SVMFlags) {
    HSA_SVM_ATTRIBUTE *attrs;
    HSAuint64 s_attr;
    HSAuint32 nattr;
    HSAKMT_STATUS r;

    nattr = 4;
    s_attr = sizeof(*attrs) * nattr;
    attrs = (HSA_SVM_ATTRIBUTE *)alloca(s_attr);

    attrs[0].type = HSA_SVM_ATTR_PREFETCH_LOC;
    attrs[0].value = PrefetchNode;
    attrs[1].type = HSA_SVM_ATTR_PREFERRED_LOC;
    attrs[1].value = PrefetchNode;
    attrs[2].type = HSA_SVM_ATTR_SET_FLAGS;
    attrs[2].value = SVMFlags;
    attrs[3].type = HSA_SVM_ATTR_ACCESS;
    attrs[3].value = GPUNode;

    r = hsaKmtSVMSetAttr(MemoryAddress, SizeInBytes, nattr, attrs);
    if (r)
        return HSAKMT_STATUS_ERROR;

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS SVMRangeGetPrefetchNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                      HSAuint32 *PrefetchNode) {
    HSA_SVM_ATTRIBUTE attr;
    int r;

    attr.type = HSA_SVM_ATTR_PREFETCH_LOC;
    attr.value = 0;

    r = hsaKmtSVMGetAttr(MemoryAddress, SizeInBytes, 1, &attr);
    if (r)
        return HSAKMT_STATUS_ERROR;

    *PrefetchNode = attr.value;

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS SVMRangePrefetchToNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                           HSAuint32 PrefetchNode) {
    HSA_SVM_ATTRIBUTE attr;
    int r;

    attr.type = HSA_SVM_ATTR_PREFETCH_LOC;
    attr.value = PrefetchNode;

    r = hsaKmtSVMSetAttr(MemoryAddress, SizeInBytes, 1, &attr);
    if (r)
        return HSAKMT_STATUS_ERROR;

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS SVMRangeMapToNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                           HSAuint32 NodeID) {
    HSA_SVM_ATTRIBUTE attr;
    int r;

    attr.type = HSA_SVM_ATTR_ACCESS;
    attr.value = NodeID;

    r = hsaKmtSVMSetAttr(MemoryAddress, SizeInBytes, 1, &attr);
    if (r)
        return HSAKMT_STATUS_ERROR;

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS SVMRangeMapInPlaceToNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                           HSAuint32 NodeID) {
    HSA_SVM_ATTRIBUTE attr;
    int r;

    attr.type = HSA_SVM_ATTR_ACCESS_IN_PLACE;
    attr.value = NodeID;

    r = hsaKmtSVMSetAttr(MemoryAddress, SizeInBytes, 1, &attr);
    if (r)
        return HSAKMT_STATUS_ERROR;

    return HSAKMT_STATUS_SUCCESS;
}

HSAKMT_STATUS SVMRangSetGranularity(void *MemoryAddress, HSAuint64 SizeInBytes,
                                    HSAuint32 Granularity) {
    HSA_SVM_ATTRIBUTE attr;
    int r;

    attr.type = HSA_SVM_ATTR_GRANULARITY;
    attr.value = Granularity;

    r = hsaKmtSVMSetAttr(MemoryAddress, SizeInBytes, 1, &attr);
    if (r)
        return HSAKMT_STATUS_ERROR;

    return HSAKMT_STATUS_SUCCESS;
}

HsaSVMRange::HsaSVMRange(HSAuint64 size, HSAuint32 GPUNode) :
    HsaSVMRange(NULL, size, GPUNode, 0) {}

HsaSVMRange::HsaSVMRange(HSAuint64 size) :
    HsaSVMRange(NULL, size, 0, 0, true) {}

HsaSVMRange::HsaSVMRange(HSAuint64 size, HSAuint32 GPUNode, HSAuint32 PrefetchNode) :
    HsaSVMRange(NULL, size, GPUNode, PrefetchNode) {}

HsaSVMRange::HsaSVMRange(void *addr, HSAuint64 size, HSAuint32 GPUNode, HSAuint32 PrefetchNode,
                         bool noRegister, bool isLocal, bool isExec, bool isReadOnly):
    m_Size(size),
    m_pUser(addr),
    m_Local(isLocal),
    m_Node(PrefetchNode),
    m_SelfAllocated(false) {
    if (!m_pUser) {
        m_pUser = mmap(0, m_Size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
        EXPECT_NE(MAP_FAILED, m_pUser);
        m_SelfAllocated = true;
    }

    if (m_Local)
        m_Flags = HSA_SVM_FLAG_HOST_ACCESS;
    else
        m_Flags = HSA_SVM_FLAG_HOST_ACCESS | HSA_SVM_FLAG_COHERENT;

    if (isReadOnly)
        m_Flags |= HSA_SVM_FLAG_GPU_RO;
    if (isExec)
        m_Flags |= HSA_SVM_FLAG_GPU_EXEC;

    if (!noRegister)
        EXPECT_SUCCESS(RegisterSVMRange(GPUNode, m_pUser, m_Size, PrefetchNode, m_Flags));
}

HsaSVMRange::~HsaSVMRange() {
    if (m_pUser != NULL) {
        if (m_SelfAllocated)
            munmap(m_pUser, m_Size);
        m_pUser = NULL;
    }
}

void HsaSVMRange::Fill(HSAuint32 value, HSAuint64 offset, HSAuint64 size) {
    HSAuint64 i;
    HSAuint32 *ptr = NULL;

    size = size ? size : m_Size;
    EXPECT_EQ((size & (sizeof(HSAuint32) - 1)), 0) << "Not word aligned. Call Fill(unsigned char)";
    ASSERT_TRUE(size + offset <= m_Size) << "Buffer Overflow" << std::endl;

    if (m_pUser != NULL)
        ptr = reinterpret_cast<HSAuint32 *>(reinterpret_cast<char *>(m_pUser) + offset);

    ASSERT_NOTNULL(ptr);

    for (i = 0; i < size / sizeof(HSAuint32); i++)
        ptr[i] = value;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTestUtil.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD__TEST__UTIL__H__
#define __KFD__TEST__UTIL__H__

#include <gtest/gtest.h>
#include <vector>
#include "OSWrapper.hpp"
#include "GoogleTestExtension.hpp"
#include "hsakmt/hsakmt.h"

class BaseQueue;
#define ARRAY_SIZE(_x) (sizeof(_x)/sizeof(_x[0]))
#define ALIGN_UP(x, align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1))
#define CounterToNanoSec(x) ((x) * 1000 / (hsakmt_is_dgpu() ? 27 : 100))

void WaitUntilInput();
HSAKMT_STATUS fscanf_dec(const char *file, uint32_t *num);
uint64_t RoundToPowerOf2(uint64_t val);

// @brief: waits until the value is written to the buffer or until time out if received through args
bool WaitOnValue(const volatile unsigned int *buf, unsigned int value, unsigned int timeOut = g_TestTimeOut);

void SplitU64(const HSAuint64 value, unsigned int& rLoPart, unsigned int& rHiPart);

bool CheckEmuModeEnabled();

bool GetHwCapabilityHWS();

HSAKMT_STATUS CreateQueueTypeEvent(bool ManualReset, bool IsSignaled, unsigned int NodeId, HsaEvent** Event);
HSAKMT_STATUS CreateHWExceptionEvent(bool ManualReset, bool IsSignaled, unsigned int NodeId, HsaEvent** Event);

bool hsakmt_is_dgpu();
bool isTonga(const HsaNodeProperties *props);
bool hasPciAtomicsSupport(int node);
unsigned int FamilyIdFromNode(const HsaNodeProperties *props);
const uint32_t GetGfxVersion(const HsaNodeProperties *props);

void GetHwQueueInfo(const HsaNodeProperties *props,
                 unsigned int *p_num_cp_queues,
                 unsigned int *p_num_sdma_engines,
                 unsigned int *p_num_sdma_xgmi_engines,
                 unsigned int *p_num_sdma_queues_per_engine);

HSAuint64 GetSystemTickCountInMicroSec();

class HsaMemoryBuffer {
 public:
    static const HsaMemoryBuffer Null;

 public:
    HsaMemoryBuffer(HSAuint64 size, unsigned int node, bool zero = true, bool isLocal = false,
                    bool isExec = false, bool isScratch = false, bool isReadOnly = false, bool isUncached = false, bool NonPaged = false);
    HsaMemoryBuffer(void *addr, HSAuint64 size);
    template<typename RetType>
    RetType As() {
        return reinterpret_cast<RetType>(m_pBuf);
    }

    template<typename RetType>
    const RetType As() const {
        return reinterpret_cast<const RetType>(m_pBuf);
    }

    /* Fill @size bytes of buffer with @value starting from @offset
     * If @size is 0, the whole buffer is filled with @value
     */
    void Fill(unsigned char value, HSAuint64 offset = 0, HSAuint64 size = 0);
    void Fill(HSAuint32 value, HSAuint64 offset = 0, HSAuint64 size = 0);
    void Fill(int value, HSAuint64 offset = 0, HSAuint64 size = 0) {
              Fill((HSAuint32)value, offset, size);
    }
    void Fill(HSAuint32 value, BaseQueue& baseQueue,
              HSAuint64 offset = 0, HSAuint64 size = 0);

    bool IsPattern(HSAuint64 location, HSAuint32 pattern);
    bool IsPattern(HSAuint64 location, HSAuint32 pattern,
                   BaseQueue& baseQueue, volatile HSAuint32 *tmp);

    unsigned int Size();
    HsaMemFlags Flags();
    unsigned int Node() const;

    int MapMemToNodes(unsigned int *nodes, unsigned int nodes_num);
    int UnmapMemToNodes(unsigned int *nodes, unsigned int nodes_num);

    void *GetUserPtr() { return m_pUser; }
    bool isLocal() { return m_Local; }
    ~HsaMemoryBuffer();

 private:
    // Disable copy
    HsaMemoryBuffer(const HsaMemoryBuffer&);
    const HsaMemoryBuffer& operator=(const HsaMemoryBuffer&);

    void UnmapAllNodes();
    HsaMemoryBuffer();

 private:
    HsaMemFlags m_Flags;
    HSAuint64 m_Size;
    void* m_pUser;
    void* m_pBuf;
    bool m_Local;
    unsigned int m_Node;
    HSAuint64 m_MappedNodes;
};
HSAKMT_STATUS RegisterSVMRange(HSAuint32 GPUNode, void *MemoryAddress,
                               HSAuint64 SizeInBytes, HSAuint32 PrefetchNode,
                               HSAuint32 SVMFlags);
HSAKMT_STATUS SVMRangeGetPrefetchNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                      HSAuint32 *PrefetchNode);
HSAKMT_STATUS SVMRangePrefetchToNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                     HSAuint32 PrefetchNode);
HSAKMT_STATUS SVMRangeMapToNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                     HSAuint32 NodeID);
HSAKMT_STATUS SVMRangeMapInPlaceToNode(void *MemoryAddress, HSAuint64 SizeInBytes,
                                     HSAuint32 NodeID);
HSAKMT_STATUS SVMRangSetGranularity(void *MemoryAddress, HSAuint64 SizeInBytes,
                                    HSAuint32 Granularity);

class HsaSVMRange {
 public:
    HsaSVMRange(HSAuint64 size, HSAuint32 GPUNode);
    HsaSVMRange(HSAuint64 size, HSAuint32 GPUNode, HSAuint32 PreferredNode);
    HsaSVMRange(HSAuint64 size);
    HsaSVMRange(void *addr, HSAuint64 size, HSAuint32 GPUNode, HSAuint32 PreferredNode = 0,
                bool noRegister = false, bool isLocal = false, bool isExec = false,
                bool isReadOnly = false);
    template<typename RetType>
    RetType As() {
        return reinterpret_cast<RetType>(m_pUser);
    }

    template<typename RetType>
    const RetType As() const {
        return reinterpret_cast<const RetType>(m_pUser);
    }
    ~HsaSVMRange();

    void Fill(HSAuint32 value, HSAuint64 offset = 0, HSAuint64 size = 0);

 private:
    HSAuint32 m_Flags;
    HSAuint64 m_Size;
    void* m_pUser;
    bool m_SelfAllocated;
    bool m_Local;
    unsigned int m_Node;
};

class HsaInteropMemoryBuffer {
 public:
    HsaInteropMemoryBuffer(HSAuint64 device_handle, HSAuint64 buffer_handle, HSAuint64 size, unsigned int node);

    template<typename RetType>
    RetType As() {
        return reinterpret_cast<RetType>(m_pBuf);
    }

    template<typename RetType>
    const RetType As() const {
        return reinterpret_cast<const RetType>(m_pBuf);
    }

    unsigned int Size();

    ~HsaInteropMemoryBuffer();

 private:
    // Disable copy
    HsaInteropMemoryBuffer(const HsaInteropMemoryBuffer&);
    const HsaInteropMemoryBuffer& operator=(const HsaInteropMemoryBuffer&);

 private:
    HSAuint64 m_Size;
    void* m_pBuf;
    HSAuint64 m_graphic_handle;
    unsigned int m_Node;
};

// @class HsaNodeInfo - Gather and store all HSA node information from Thunk.
class HsaNodeInfo {
    // List containing HsaNodeProperties of all Nodes available
    std::vector<HsaNodeProperties*> m_HsaNodeProps;

    // List of HSA Nodes that contain a GPU. This includes both APU and dGPU
    std::vector<int> m_NodesWithGPU;

    // List of HSA Nodes with CPU only
    std::vector<int> m_NodesWithoutGPU;

 public:
    HsaNodeInfo();
    ~HsaNodeInfo();

    bool Init(int NumOfNodes);
    void Delete();

    /* This function should be deprecated soon. This for transistion purpose only
     * Currently, KfdTest is designed to test only ONE node. This function acts
     * as transition.
     */
    const HsaNodeProperties* HsaDefaultGPUNodeProperties() const;
    const int HsaDefaultGPUNode() const;

    /* TODO: Use the following two functions to support multi-GPU.
     * const std::vector<int>& GpuNodes = GetNodesWithGPU()
     * for (..GpuNodes.size()..) GetNodeProperties(GpuNodes.at(i))
     */
    const std::vector<int>& GetNodesWithGPU() const;

    /* get gpu index from gpuNodeID */
    const int HsaGPUindexFromGpuNode(int gpuNodeId) const;

    // @param node index of the node we are looking at
    // @param nodeProperties HsaNodeProperties returned
    const HsaNodeProperties* GetNodeProperties(int NodeNum) const;

    void PrintNodeInfo() const;
    const bool IsGPUNodeLargeBar(int node) const;
    const bool IsAppAPU(int node) const;
    const bool IsPeerAccessibleByNode(int peer, int node) const;
    // @brief Find the first available Large-BAR GPU node
    // @return: Node ID if successful or -1
    const int FindLargeBarGPUNode() const;
    const bool AreGPUNodesXGMI(int node0, int node1) const;
    int FindAccessiblePeers(std::vector<int> *peers,
                                        HSAuint32 node) const;
    /* @brief: to determine if the node is XGMI-linked to CPU
     * @param: node index of the node we are looking at
     * @return: bool true or false
     */
    const bool IsNodeXGMItoCPU(int node) const;
};

#endif  // __KFD__TEST__UTIL__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTestUtilQueue.cpp
================================================
/*
 * Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <algorithm>
#include <memory>
#include <vector>
#include <list>
#include "SDMAQueue.hpp"
#include "PM4Queue.hpp"
#include "SDMAPacket.hpp"
#include "PM4Packet.hpp"
#include "KFDTestUtil.hpp"
#include "KFDTestUtilQueue.hpp"
#include "KFDBaseComponentTest.hpp"

#define MB_PER_SEC(size, time) ((((size) * 1ULL) >> 20) * 1000ULL * 1000ULL * 1000ULL / (time))

class AsyncMPSQ;
class AsyncMPMQ;

typedef std::shared_ptr<AsyncMPSQ> sharedAsyncMPSQ;
typedef std::list<sharedAsyncMPSQ> AsyncMPSQList;

typedef std::shared_ptr<BasePacket> sharedPacket;
typedef std::list<sharedPacket> PacketList;

/* AsyncMPSQ is short for Async multiple packet single queue.
 * It is allowed to place a list of packets to run on one queue of the specified GPU node.
 */
class AsyncMPSQ {
    public:
        AsyncMPSQ() : m_queue(NULL), m_buf(NULL), m_event(NULL) { /*do nothing*/}

        virtual ~AsyncMPSQ(void) { Destroy(); }

        /* It is the main function to deal with the packet and queue.*/
        void PlacePacketOnNode(PacketList &packetList, int node, TSPattern tsp);

        /* Run the packets placed on nodes and return immediately.*/
        void Submit(void) { ASSERT_NE(m_queue, nullptr); m_queue->SubmitPacket(); }

        /* Return only when all packets are consumed.
         * If there is any packet issues some IO operations, wait these IO to complete too.
         */
        void Wait(void) {
            ASSERT_NE(m_queue, nullptr);
            m_queue->Wait4PacketConsumption(m_event, std::max((unsigned int)6000, g_TestTimeOut));
        }

        /* Report the time used between packet [begin, end) in Global Counter on success.
         * Return 0 on failure.
         */
        HSAuint64 Report(int indexOfPacketBegin = 0, int indexOfPacketEnd = 0);
        /* Report the timestamp around the packet.
         * Return the time used on success.
         * Return 0 on failure.
         */
        HSAuint64 Report(int indexOfPacket, HSAuint64 &tsBegin, HSAuint64 &tsEnd);

    private:
        BaseQueue *m_queue;
        HSA_QUEUE_TYPE m_queueType;
        HsaEvent *m_event;
        /* m_ts points to m_buf's memory.*/
        HsaMemoryBuffer *m_buf;
        TimeStamp *m_ts;
        unsigned m_ts_count;
        TSPattern m_ts_pattern;

        void AllocTimeStampBuf(int packetCount);
        void Destroy();

        /* It determines which queue will be created.*/
        void InitQueueType(PACKETTYPE packetType) {
            if (packetType == PACKETTYPE_SDMA)
                m_queueType = HSA_QUEUE_SDMA;
            else if (packetType == PACKETTYPE_PM4)
                m_queueType = HSA_QUEUE_COMPUTE;
            else
                WARN() << "Unsupported queue type!" << std::endl;
        }

        unsigned int TimePacketSize(void) {
            if (m_queueType == HSA_QUEUE_SDMA)
                return SDMATimePacket(0).SizeInBytes();
            else if (m_queueType == HSA_QUEUE_COMPUTE)
                return PM4ReleaseMemoryPacket(m_queue->GetFamilyId(), 0, 0, 0, 0, 0).SizeInBytes();
            return 0;
        }

        void CreateNewQueue(int node, unsigned int queueSize) {
            if (m_queueType == HSA_QUEUE_SDMA)
                m_queue = new SDMAQueue();
            else if (m_queueType == HSA_QUEUE_COMPUTE)
                m_queue = new PM4Queue();
            else {
                m_queue = NULL;
                WARN() << "Unsupported queue type!" << std::endl;
            }

            if (m_queue)
                ASSERT_SUCCESS(m_queue->Create(node, queueSize));
        }

        void PlaceTimestampPacket(void *addr) {
            if (m_queueType == HSA_QUEUE_SDMA)
                PlacePacket(SDMATimePacket(addr));
            else if (m_queueType == HSA_QUEUE_COMPUTE)
                PlacePacket(
                        PM4ReleaseMemoryPacket(m_queue->GetFamilyId(), true, (HSAuint64)addr, 0, true, true));
            else
                WARN() << "Unsupported queue type!" << std::endl;
        }

        void PlacePacket(const BasePacket &packet) {
            m_queue->PlacePacket(packet);
        }
};

void AsyncMPSQ::Destroy(void) {
    /* Delete queue first.*/
    if (m_queue) {
        delete m_queue;
    }

    if (m_buf)
        delete m_buf;

    if (m_event)
        hsaKmtDestroyEvent(m_event);
}

void AsyncMPSQ::AllocTimeStampBuf(int packetCount) {
    if (m_ts_pattern == NOTS) {
        m_buf = NULL;
        m_ts = NULL;
        m_ts_count = 0;
        return;
    }

    if (m_ts_pattern == ALLTS)
        /* One extra timestamp packet.*/
        m_ts_count = packetCount + 1;
    else
        m_ts_count = 2;

    /* One more timestamp space to fit with alignment.*/
    HSAuint64 size = ALIGN_UP(sizeof(TimeStamp) * (m_ts_count + 1), PAGE_SIZE);

    m_buf = new HsaMemoryBuffer(size, 0, true, false);

    TimeStamp *array = m_buf->As<TimeStamp*>();

    /* SDMATimePacket need 32bytes aligned boundary dst address*/
    m_ts = reinterpret_cast<TimeStamp *>ALIGN_UP(array, sizeof(TimeStamp));
}

void AsyncMPSQ::PlacePacketOnNode(PacketList &packets, int node, TSPattern tsp = ALLTS) {
    int nPacket = packets.size();

    if (nPacket == 0) {
        WARN() << "Empty packetList!" << std::endl;
        return;
    }

    /*1: All resources should be freed.*/
    Destroy();

    /*2: Must initialize queueType first.*/
    InitQueueType(packets.front()->PacketType());
    /*3: Initialize timestamp buf second with the pattern.*/
    m_ts_pattern = tsp;
    AllocTimeStampBuf(nPacket);
    /*4: Create a event for Wait().*/
    CreateQueueTypeEvent(false, false, node, &m_event);

    int i = -1;
    int packetSize = 0;
    /* Calculate the space to put all timestamp packet.*/
    int timePacketSize = TimePacketSize() * m_ts_count;
    /* Another one page space to put fence, trap, etc*/
    int extraPacketSize = PAGE_SIZE + timePacketSize;

    /* To calculate the total packet size we will need to create the queue.
     * As the packet in the vector might be different with each other,
     * we have no other way to calculate the queuesize.
     */
    for (auto &packet : packets)
        packetSize += packet->SizeInBytes();

    /* queueSize need be power of 2.*/
    const int queueSize = RoundToPowerOf2(packetSize + extraPacketSize);

    /*5: Create a new queue on node for the packets.*/
    CreateNewQueue(node, queueSize);

    if (tsp != NOTS) {
        i++;
        PlaceTimestampPacket(m_ts + i);
    }

    for (auto &packet : packets) {
        PlacePacket(*packet);
        if (tsp == ALLTS) {
            i++;
            PlaceTimestampPacket(m_ts + i);
        }
    }

    if (tsp == HEAD_TAIL) {
        i++;
        PlaceTimestampPacket(m_ts + i);
    }

    ASSERT_EQ(i + 1, m_ts_count);
}

HSAuint64 AsyncMPSQ::Report(int indexOfPacket, HSAuint64 &begin, HSAuint64 &end) {
    /* Should not get any timestamp if NOTS is specified.*/
    int error = 0;
    EXPECT_NE(m_ts_pattern, NOTS)
        << " Error " << ++error << ": No timestamp would be reported!" << std::endl;

    if (m_ts_pattern == HEAD_TAIL)
        indexOfPacket = 0;

    EXPECT_NE(m_ts, nullptr)
        << " Error " << ++error << ": No timestamp buf!" << std::endl;
    /* m_ts_count is equal to packets count + 1, see PlacePacketOnNode().
     * So the max index of a packet is m_ts_count - 2.
     * make it unsigned to defend any minus values.
     */
    EXPECT_GE(m_ts_count - 2, (unsigned)indexOfPacket)
        << " Error " << ++error << ": Index overflow!" << std::endl;

    if (error)
        return 0;

    begin = m_ts[indexOfPacket].timestamp;
    end = m_ts[indexOfPacket + 1].timestamp;
    return end - begin;
}

HSAuint64 AsyncMPSQ::Report(int indexOfPacketBegin, int indexOfPacketEnd) {
    HSAuint64 ts[4];
    int error = 0;

    if (indexOfPacketEnd == 0)
        indexOfPacketEnd = m_ts_count - 1;

    EXPECT_GT((unsigned)indexOfPacketEnd, (unsigned)indexOfPacketBegin)
        << " Error " << ++error << ": Index inverted!" << std::endl;

    if (error)
        return 0;
    /* Get the timestamps around the two packets.*/
    if (!Report(indexOfPacketBegin, ts[0], ts[1]))
        return 0;
    /* [begin, end)*/
    if (!Report(indexOfPacketEnd - 1, ts[2], ts[3]))
        return 0;

    EXPECT_GT(ts[3], ts[0])
        << " Waring: Might be wrong timestamp values!" << std::endl;

    return ts[3] - ts[0];
}

/* AsyncMPMQ is short for Async multiple packet multiple queue.
 * AsyncMPMQ manages a list of AsyncMPSQ.
 * So the packet can be running on multiple GPU nodes at same time.
 */

class AsyncMPMQ {
    public:
        AsyncMPMQ(void) { /* do nothing*/}

        virtual ~AsyncMPMQ(void) { /*do nothing*/}

        sharedAsyncMPSQ PlacePacketOnNode(PacketList &packetList, int node, TSPattern tsp = ALLTS) {
            /* Create a sharedAsyncMPSQ object and push it into the AsyncMPSQList.
             * As we might submit packet to same GPU nodes several times, AsyncMPSQ *
             * is returned to stand for the AsyncMPSQ it is created with
             */
            sharedAsyncMPSQ mpsq_ptr(new AsyncMPSQ);
            mpsq_ptr->PlacePacketOnNode(packetList, node, tsp);
            m_mpsqList.push_back(mpsq_ptr);
            return mpsq_ptr;
        }

        void Submit(void) {
            for (auto &mpsq : m_mpsqList)
                mpsq->Submit();
        }

        void Wait(void) {
            for (auto &mpsq : m_mpsqList)
                mpsq->Wait();
        }

    private:
        AsyncMPSQList m_mpsqList;
};


/*
 * SDMA queue helper functions.
 */

bool sort_SDMACopyParams(const SDMACopyParams &a1, const SDMACopyParams &a2) {
    if (a1.node != a2.node)
        return a1.node < a2.node;
    return a1.group < a2.group;
}

/*
 * Copy from src to dst with corresponding sDMA.
 * It will try to merge copy on same node into one queue unless
 * caller forbid it by setting mashup to 0 and SDMACopyParams::group to different values.
 * On condition of mashup is 1, it will re-sort array into mergeable state.
 * All mergeable copy will be placed together.
 * On condition os mashup is 0, it keeps array in original order.
 * It will merge nearby copy if they have same group and node anyway.
 */
void sdma_multicopy(std::vector<SDMACopyParams> &array, int mashup, TSPattern tsp) {
    int i, packet_index = 0, queue_index = 0;
    PacketList packetList;
    AsyncMPMQ obj;
    std::vector<sharedAsyncMPSQ> handle;

    /* Sort it and then reduce the amount of queues if caller permits.
     * We might change the order of array only here.
     */
    if (mashup)
        std::sort(array.begin(), array.end(), sort_SDMACopyParams);

    for (i = 0; i < array.size(); i++) {
        sharedPacket packet(new
                SDMACopyDataPacket(g_baseTest->GetFamilyIdFromNodeId(array[i].node), array[i].dst, array[i].src, array[i].size));
        packetList.push_back(packet);

        /* We put the real queue_id in local handle[] to reduce some assignment.*/
        array[i].queue_id = queue_index;
        /* Every queue has its packets with the index starts from 0.*/
        array[i].packet_id = packet_index++;

        /* If next copy is on same node and group, try to merge it into same queue.*/
        if (i + 1 < array.size() && array[i].node == array[i + 1].node
                                    && array[i].group == array[i + 1].group)
                continue;

        /* Now we have prepare one packetList, place packet into the queue on GPU node.*/
        queue_index++;
        handle.push_back(obj.PlacePacketOnNode(packetList, array[i].node, tsp));

        /* Prepare a new(empty) packetList.*/
        packetList.clear();

        /* Prepare a new(zero) packet index for the packets in the new queue.*/
        packet_index = 0;
    }

    obj.Submit();
    obj.Wait();

    if (tsp == NOTS)
        return;

    /* Get the time used by packet.*/
    for (i = 0; i < array.size(); i++)
        array[i].timeConsumption = (handle[array[i].queue_id])->Report(
                array[i].packet_id, array[i].timeBegin, array[i].timeEnd);
}

static
void sdma_multicopy_report(std::vector<SDMACopyParams> &array, HSAuint64 countPerGroup, std::stringstream *msg,
                                HSAuint64 &timeConsumptionMin, HSAuint64 &timeConsumptionMax,
                                HSAuint64 &totalSizeMin, HSAuint64 &totalSizeMax) {
    HSAuint64 begin, end;
    /* There can be different count of copies in different groups in the future.
     * But assume they are same now.
     */
    HSAuint64 group = array.size() / countPerGroup;
    HSAuint64 interval = -1;
    timeConsumptionMin = -1;
    timeConsumptionMax = 0;
    totalSizeMin = totalSizeMax = 0;

    /* Try to find out
     * 1) The max/min timeConsumption of one copy in all copies.
     * 2) The minimal average of timeConsumption of one packet in all copies.
     * And one char # or - stands for one interval, aka minimal average.
     * Say, one copy use 10ns with 10 copy packets. the other copy use 20ns
     * with 10 copy packets. So the interval is 1ns, the timeConsumption is 20ns.
     * So the ouput msg will be like
     * ########## //copy1 10ns
     * #---##----####### //copy2 20ns
     */
    for (int i = 0; i < group; i++) {
        HSAuint64 begin, end, base = i * countPerGroup;

        begin = array[base].timeBegin;
        end = array[base + countPerGroup - 1].timeEnd;

        if (begin == 0 && end == 0)
            continue;

        if (timeConsumptionMax < end - begin)
            timeConsumptionMax = end - begin;

        if (timeConsumptionMin > end - begin)
            timeConsumptionMin = end - begin;
    }

    interval = timeConsumptionMin / countPerGroup;

    /* Draw the timestamp event for each copy list.
     * - means still doing copy.
     * # means just finish one copy.
     */
    if (msg)
        for (int i = 0; i < group; i++) {
            HSAuint64 base = i * countPerGroup;
            HSAuint64 last = array[base].timeBegin;
            HSAuint64 timeConsumption;

            *msg << "[" << array[base].node << " : " << array[base].group << "] ";

            for (int j = 0; j < countPerGroup; j++) {
                timeConsumption = array[base + j].timeEnd - last;

                while (timeConsumption >= interval) {
                    timeConsumption -= interval;
                    last += interval;

                    if (timeConsumption >= interval)
                        *msg << "-";
                    else
                        *msg << "#";
                };
            }

            *msg << std::endl;
        }

    /* Try to find out
     * 1) The size of all copies in all queues.
     * 2) The size of the copies running within the same period in all queues.
     * We assume all packets begin to run at same time.
     */
    for (int i = 0; i < group; i++) {
        HSAuint64 base = i * countPerGroup;
        HSAuint64 time = 0;

        for (int j = 0; j < countPerGroup; j++) {
            totalSizeMax += array[base + j].size;

            if (time < timeConsumptionMin) {
                time += array[base + j].timeConsumption;
                totalSizeMin += array[base + j].size;
            }
        }
    }
}

/*
 * Do copy with corresponding sDMA.
 */
void
sdma_multicopy(SDMACopyParams *copyArray, int arrayCount,
                        HSAuint64 *minSpeed, HSAuint64 *maxSpeed, std::stringstream *msg) {
    const HSAuint64 countPerGroup = minSpeed || maxSpeed ? 100 : 1;
    std::vector<SDMACopyParams> array;
    HSAuint64 totalSizeMin, totalSizeMax, timeConsumptionMin, timeConsumptionMax;

    for (int i = 0; i < arrayCount; i++) {
        /* Each copy has its own queue.*/
        copyArray[i].group = i;
        for (int j = 0; j < countPerGroup; j++)
            array.push_back(copyArray[i]);
    }

    sdma_multicopy(array, 0, ALLTS);

    sdma_multicopy_report(array, countPerGroup, msg,
            timeConsumptionMin, timeConsumptionMax,
            totalSizeMin, totalSizeMax);

    if (minSpeed)
        *minSpeed = MB_PER_SEC(totalSizeMin, CounterToNanoSec(timeConsumptionMin));

    if (maxSpeed)
        *maxSpeed = MB_PER_SEC(totalSizeMax, CounterToNanoSec(timeConsumptionMax));
}

/*
 * PM4 queue helper functions.
 */
// TODO


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTestUtilQueue.hpp
================================================
/*
 * Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD__TEST__UTIL__QUEUE__H__
#define __KFD__TEST__UTIL__QUEUE__H__

#include "hsakmt/hsakmt.h"
#include <vector>

typedef struct {
        HSAuint64 timestamp;
        HSAuint64 timeConsumption;
        HSAuint64 timeBegin;
        HSAuint64 timeEnd;
} TimeStamp;

/* We have three pattern to put timestamp packet,
 * NOTS: No timestamp packet insert.
 * ALLTS: Put timestamp packet around every packet. This is the default behavoir.
 *    It will look like |timestamp|packet|timestamp|...|packet|timestamp|
 * HEAD_TAIL: Put timestmap packet at head and tail to measure the overhead of a bunch of packet.
 *    It will look like |timestamp|packet|...|packet|timestamp|
 */
typedef enum {
    NOTS = 0,
    ALLTS = 1,
    HEAD_TAIL = 2,
} TSPattern;

typedef struct {
    /* input values*/
    HSAuint32 node;
    void *src;
    void *dst;
    HSAuint64 size;
    /* input value for internal use.*/
    HSAuint64 group;
    /* output value*/
    HSAuint64 timeConsumption;
    HSAuint64 timeBegin;
    HSAuint64 timeEnd;
    /* private: Output values for internal use.*/
    HSAuint64 queue_id;
    HSAuint64 packet_id;
} SDMACopyParams;

void sdma_multicopy(SDMACopyParams *array, int n,
        HSAuint64 *speedSmall = 0, HSAuint64 *speedLarge = 0, std::stringstream *s = 0);
void sdma_multicopy(std::vector<SDMACopyParams> &array, int mashup = 1, TSPattern tsp = ALLTS);
#endif //__KFD__TEST__UTIL__QUEUE__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTopologyTest.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDTopologyTest.hpp"
#include <vector>
#include <string>

const HSAuint64 KFDTopologyTest::c_4Gigabyte = (1ull << 32) - 1;
const HSAuint64 KFDTopologyTest::c_40BitAddressSpace = (1ull << 40);

TEST_F(KFDTopologyTest , BasicTest) {
    TEST_START(TESTPROFILE_RUNALL)

    const HsaNodeProperties *pNodeProperties;

    // Goes over all nodes in the sytem properties and check the basic info received
    for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
        pNodeProperties = m_NodeInfo.GetNodeProperties(node);
        if (pNodeProperties != NULL) {
            HSAuint64 uniqueid;
            if (!pNodeProperties->UniqueID)
                uniqueid = 0;
            else
                uniqueid = pNodeProperties->UniqueID;
            LOG() << "UniqueID : " << std::dec << uniqueid <<
                     " Node index: " << node << std::endl;
            // Checking for cpu core only if it's a cpu only node or if its KAVERI apu.
            if (pNodeProperties->DeviceId == 0 || FamilyIdFromNode(pNodeProperties) == FAMILY_KV) {
                EXPECT_GT(pNodeProperties->NumCPUCores, HSAuint32(0)) << "Node index: " << node
                                                                      << " No CPUs core are connected for node index";
            }

            // If it's not a cpu only node, look for a gpu core
            if (pNodeProperties->DeviceId != 0) {
                EXPECT_GT(pNodeProperties->NumFComputeCores, HSAuint32(0)) << "Node index: " << node
                                                                           << "No GPUs core are connected.";
                // EngineId only applies to GPU, not CPU-only nodes
                EXPECT_GT(pNodeProperties->EngineId.ui32.uCode, 0) << "uCode version is 0";
                EXPECT_GE(pNodeProperties->EngineId.ui32.Major, 7) << "Major Version is less than 7";
                EXPECT_LT(pNodeProperties->EngineId.ui32.Minor, 10) << "Minor Version is greater than 9";
                EXPECT_GT(pNodeProperties->uCodeEngineVersions.uCodeSDMA, 0) << "sDMA firmware version is 0";

                LOG() << "VGPR Size is " << pNodeProperties->VGPRSizePerCU <<
                         "  SGPR Size is " << pNodeProperties->SGPRSizePerCU << std::endl;
            }
            EXPECT_GT(pNodeProperties->NumMemoryBanks, HSAuint32(0)) << "Node index: " << node << "No MemoryBanks.";
            if (pNodeProperties->NumCaches ==0)
                // SWDEV-420270
                // For "Intel Meteor lake Mobile", the cache info is not in sysfs,
                // That means /sys/devices/system/node/node%d/%s/cache is not exist.
                LOG() <<  "Node index: " << node << "  No Caches or not available to read ." << std::endl;
        }
    }

    TEST_END
}

// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
TEST_F(KFDTopologyTest, GetNodePropertiesInvalidParams) {
    TEST_START(TESTPROFILE_RUNALL)

    EXPECT_EQ(HSAKMT_STATUS_INVALID_PARAMETER, hsaKmtGetNodeProperties(0, NULL));

    TEST_END
}

// This test verifies failure status on hsaKmtGetNodeProperties with invalid params
TEST_F(KFDTopologyTest, GetNodePropertiesInvalidNodeNum) {
    TEST_START(TESTPROFILE_RUNALL)

    HsaNodeProperties nodeProperties;
    memset(&nodeProperties, 0, sizeof(nodeProperties));
    EXPECT_EQ(HSAKMT_STATUS_INVALID_NODE_UNIT, hsaKmtGetNodeProperties(m_SystemProperties.NumNodes, &nodeProperties));

    TEST_END
}

// Test that we can get memory properties successfully per node
// TODO: Check validity of values returned
TEST_F(KFDTopologyTest, GetNodeMemoryProperties) {
    TEST_START(TESTPROFILE_RUNALL)
    const HsaNodeProperties *pNodeProperties;

    for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
        pNodeProperties = m_NodeInfo.GetNodeProperties(node);

        if (pNodeProperties != NULL) {
            HsaMemoryProperties *memoryProperties = new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
            EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(node, pNodeProperties->NumMemoryBanks, memoryProperties));
            delete [] memoryProperties;
        }
    }

    TEST_END
}


// Test that the GPU local memory aperture is valid.
TEST_F(KFDTopologyTest, GpuvmApertureValidate) {
    TEST_REQUIRE_NO_ENV_CAPABILITIES(ENVCAPS_32BITLINUX);

    TEST_START(TESTPROFILE_RUNALL)
    const HsaNodeProperties *pNodeProperties;
    const std::vector<int> GpuNodes = m_NodeInfo.GetNodesWithGPU();

    for (unsigned i = 0; i < GpuNodes.size(); i++) {
        pNodeProperties = m_NodeInfo.GetNodeProperties(GpuNodes.at(i));
        if (pNodeProperties != NULL) {
            if (!hsakmt_is_dgpu() && !(FamilyIdFromNode(pNodeProperties) == FAMILY_KV)) {
                LOG() << "Skipping test: GPUVM framebuffer heap not exposed on APU except Kaveri." << std::endl;
                return;
            }
            HsaMemoryProperties *memoryProperties =  new HsaMemoryProperties[pNodeProperties->NumMemoryBanks];
            EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(GpuNodes.at(i), pNodeProperties->NumMemoryBanks,
                                                         memoryProperties));
            bool GpuVMHeapFound = false;
            for (unsigned int bank = 0 ; bank  < pNodeProperties->NumMemoryBanks ; bank++) {
                // Check for either private (small-bar/APU) or public (large-bar)
                if ((HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE == memoryProperties[bank].HeapType) ||
                     (HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC == memoryProperties[bank].HeapType))
                    GpuVMHeapFound = true;
            }
            EXPECT_TRUE(GpuVMHeapFound);
            delete [] memoryProperties;
        }
    }

    TEST_END
}

// Test that we can get cache property successfully per node
// TODO: Check validity of values returned
TEST_F(KFDTopologyTest, GetNodeCacheProperties) {
    TEST_START(TESTPROFILE_RUNALL)

    const HsaNodeProperties *pNodeProperties;

    for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
        pNodeProperties = m_NodeInfo.GetNodeProperties(node);
        if (pNodeProperties != NULL) {
            HsaCacheProperties *cacheProperties = new HsaCacheProperties[pNodeProperties->NumCaches];
            EXPECT_SUCCESS(hsaKmtGetNodeCacheProperties(node, pNodeProperties->CComputeIdLo,
                           pNodeProperties->NumCaches, cacheProperties));
            if (pNodeProperties->NumCPUCores > 0) {  // this is a CPU node
                LOG() << "CPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches"
                      << std::endl;
                for (unsigned n = 0; n < pNodeProperties->NumCaches; n++) {
                    LOG()<< n << " - Level " << cacheProperties[n].CacheLevel <<
                    " Type " << cacheProperties[n].CacheType.Value <<
                    " Size " << (cacheProperties[n].CacheSize >> 10) << "K " <<
                    " Associativity " << cacheProperties[n].CacheAssociativity <<
                    " LineSize " << cacheProperties[n].CacheLineSize <<
                    " LinesPerTag " << cacheProperties[n].CacheLinesPerTag << std::endl;
                    char string[1024] = "";
                    char sibling[5] = "";
                    for (unsigned i = 0; i < 256; i++) {
                        if (cacheProperties[n].SiblingMap[i]) {
                            sprintf(sibling, "%d,", i);
                            strcat(string, sibling);
                        }
                    }
                    LOG() << "     ProcIdLow " << cacheProperties[n].ProcessorIdLow <<
                    " SiblingMap " << string << std::endl;
                }
            } else {  // this is a GPU node
                LOG() << "GPU Node " << std::dec << node << ": " << pNodeProperties->NumCaches << " caches"
                      << std::endl;
                for (unsigned n = 0; n < pNodeProperties->NumCaches; n++) {
                    LOG()<< n << " - Level " << cacheProperties[n].CacheLevel <<
                    " Type " << cacheProperties[n].CacheType.Value <<
                    " Size " << cacheProperties[n].CacheSize << "K " <<
                    " Associativity " << cacheProperties[n].CacheAssociativity <<
                    " LineSize " << cacheProperties[n].CacheLineSize <<
                    " LinesPerTag " << cacheProperties[n].CacheLinesPerTag << std::endl;
                    char string[1024] = "";
                    char sibling[5] = "";
                    for (unsigned i = 0; i < 256; i++) {
                        if (cacheProperties[n].SiblingMap[i]) {
                            snprintf(sibling, 5, "%d,", i);
                            strcat(string, sibling);
                        }
                    }
                    LOG() << "     ProcIdLow " << cacheProperties[n].ProcessorIdLow <<
                    " SiblingMap " << string << std::endl;
                }
            }
            delete [] cacheProperties;
        }
    }

    TEST_END
}

// Test that we can get NodeIoLink property successfully per node
// TODO: Check validity of values returned
// GetNodeIoLinkProperties is disabled for now, test fails due to bug in BIOS
TEST_F(KFDTopologyTest, GetNodeIoLinkProperties) {
    TEST_START(TESTPROFILE_RUNALL)
    const HsaNodeProperties *pNodeProperties;
    int linkId;
    char c;

    LOG() << "Topology. [FromNode]--(Weight)-->[ToNode]" << std::endl;

    for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
        pNodeProperties = m_NodeInfo.GetNodeProperties(node);

        if (pNodeProperties != NULL) {
            HsaIoLinkProperties  *IolinkProperties =  new HsaIoLinkProperties[pNodeProperties->NumIOLinks];
            EXPECT_SUCCESS(hsaKmtGetNodeIoLinkProperties(node, pNodeProperties->NumIOLinks, IolinkProperties));
            if (pNodeProperties->NumIOLinks == 0) {
                // No io_links. Just print the node
                LOG() << "[" << node << "]" << std::endl;
                continue;
            }

            for (linkId = 0; linkId < pNodeProperties->NumIOLinks; linkId++) {
                if (linkId == 0) {
                    // First io_link. Print Parent Node and io_link Node
                    EXPECT_EQ(node, IolinkProperties[linkId].NodeFrom);
                    LOG() << "[" << IolinkProperties[linkId].NodeFrom << "]--(" <<
                        IolinkProperties[linkId].Weight << ")-->" <<
                        "[" << IolinkProperties[linkId].NodeTo << "]" << std::endl;
                    continue;
                }
                if (linkId == (pNodeProperties->NumIOLinks - 1))
                    c = '`';  // last node
                else
                    c = '|';
                LOG() << "  " << c << "--(" << IolinkProperties[linkId].Weight << ")-->" <<
                    "[" << IolinkProperties[linkId].NodeTo << "]" << std::endl;
            }
            LOG() << std::endl;
            delete [] IolinkProperties;
        }
    }

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/KFDTopologyTest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "KFDBaseComponentTest.hpp"

#ifndef __KFD_TOPOLOGY_TEST__H__
#define __KFD_TOPOLOGY_TEST__H__

/* @class KFDTopologyTest
 * This class has no additional features to KFDBaseComponentTest
 * The separation was made so we are able to group all topology tests together
 */
class KFDTopologyTest : public KFDBaseComponentTest {
 public:
    KFDTopologyTest(void) {}
    ~KFDTopologyTest(void) {}
    static const HSAuint64 c_4Gigabyte;
    static const HSAuint64 c_40BitAddressSpace;
};

#endif  // __KFD_TOPOLOGY_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/LinuxOSWrapper.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef WIN32

#include "OSWrapper.hpp"

#include <gtest/gtest.h>
#include <unistd.h>
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <drm.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/ioctl.h>

static int protection_flags[8] = {PROT_NONE, PROT_READ, PROT_WRITE, PROT_READ | PROT_WRITE,
                                  PROT_EXEC, PROT_EXEC | PROT_READ, PROT_EXEC | PROT_WRITE,
                                  PROT_EXEC | PROT_WRITE | PROT_READ};

void SetConsoleTextColor(TEXTCOLOR color) {
    // TODO: Complete
}

void Delay(int delayCount) {
    // usleep accepts time in microseconds
    usleep(delayCount * 1000);
}

void *VirtualAllocMemory(void *address, unsigned int size, int memProtection ) {
    void *ptr;

    ptr = mmap(address, size, protection_flags[memProtection], MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);

    if (ptr == MAP_FAILED)
        ptr = NULL;
    return ptr;
}

bool VirtualFreeMemory(void *address, unsigned int size) {
    if (munmap(address, size) == 0)
        return true;
    else
        return false;
}

HSAuint64 GetLastErrorNo() {
    return errno;
}

bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess) {
    // TODO: Implement
    return false;
}

bool SuspendAndWakeUp() {
    printf("Please press any key after the system suspends....\n");

    // Use "sudo apt install pm-utils" to install the "pm-suspend"
    int ret = system("sudo pm-suspend");

    if (ret == -1) {
        printf("The system linux command could not be run!\n");
        return false;
    } else {
        if (WEXITSTATUS(ret)) {
            printf("Use 'sudo apt install pm-utils' to install 'pm-suspend' on Ubuntu\n");
            return false;
        }
    }

    return true;
}

bool ReadDriverConfigValue(CONFIG_VALUE config, unsigned int& rValue) {
    return false;
}

void ComandLineArgumentsUsage() {
    printf("Invalid option value\n");
    printf("\t--hws arg\t - Force HW capability\n");
    printf("\t--profile arg\t - Test profile\n");
    printf("\t--child arg\t - Child Process\n");
    printf("\t--timeout arg\t - Time Out\n");
    printf("\t--dst_node\t - For testing multiple nodes");
    printf("\t--sleep_time\t - For testing CRIU, etc");
}

bool GetCommandLineArguments(int argc, char **argv, CommandLineArguments& rArgs) {
    int option_index = 0;

    /* Make getop silent */
    opterr = 0;
    static struct option long_options[] = {
        { "hws", required_argument, 0, 0 },
        { "profile", required_argument, 0, 0},
        { "child", required_argument, 0, 0},
        { "timeout", required_argument, 0, 0},
        { "node", required_argument, 0, 0 },
        { "dst_node", required_argument, 0, 0 },
        { "sleep_time", required_argument, 0, 0 },
        { 0, 0, 0, 0 }
    };

    rArgs.HwsEnabled = HWCAP__DEFAULT;
    rArgs.TestProfile = TESTPROFILE_RUNALL;
    rArgs.ChildProcess = false;
    rArgs.TimeOut = 0;
    rArgs.NodeId = -1;
    rArgs.DstNodeId = -1;
    rArgs.SleepTime = 0;

    while (true) {
        int c = getopt_long(argc, argv, "", long_options, &option_index);

        /* Detect the end of the options. */
        if (c != 0)
            break;

        /* If this option sets a flag, do nothing else. */
        if (long_options[option_index].flag != 0)
            continue;

        if (optarg == NULL) {
            ComandLineArgumentsUsage();
            return false;
        }

        switch (option_index) {
        /* HWS case */
        case 0:
            if (!strcmp(optarg, "disable")) {
                rArgs.HwsEnabled = HWCAP__FORCE_DISABLED;
            } else if (!strcmp(optarg, "enable")) {
                rArgs.HwsEnabled = HWCAP__FORCE_ENABLED;
            } else {
                ComandLineArgumentsUsage();
                return false;
            }
            break;
        /* TEST PROFILE */
        case 1:
            if (!strcmp(optarg, "dev")) {
                rArgs.TestProfile = TESTPROFILE_DEV;
            } else if (!strcmp(optarg, "promo")) {
                rArgs.TestProfile = TESTPROFILE_PROMO;
            } else if (!strcmp(optarg, "all")) {
                rArgs.TestProfile = TESTPROFILE_RUNALL;
            } else {
                ComandLineArgumentsUsage();
                return false;
            }
            break;

        case 2:
            rArgs.ChildProcess = true;
            break;

        case 3:
            {
                int timeOut = atoi(optarg);
                if (timeOut > 0)
                    rArgs.TimeOut = timeOut;
            }
            break;
        case 4:
            {
                int nodeId = atoi(optarg);
                if (nodeId >= 0)
                    rArgs.NodeId = nodeId;
            }
            break;
        case 5:
            {
                int dstNodeId = atoi(optarg);
                if (dstNodeId >= 0)
                    rArgs.DstNodeId = dstNodeId;
            }
            break;
        /* Sleep time - used in testing CRIU */
        case 6:
            {
                int sleepTime = atoi(optarg);
                if (sleepTime >= 0)
                    rArgs.SleepTime = sleepTime;
            }
            break;
        }
    }

    return true;
}

void HWMemoryBarrier() {
    __sync_synchronize();
}

bool StartThread(unsigned int (*thread_func)(void*), void* param, uint64_t& thread_id) {
    pthread_t id;
    bool ret = false;
    typedef void* (*pthread_func_t)(void*);

    if (!pthread_create(&id, NULL, (pthread_func_t)thread_func, param)) {
        thread_id = (pthread_t)id;
        ret = true;
    }
    return ret;
}

bool WaitForThread(uint64_t threadId) {
    return 0 == pthread_join((pthread_t)threadId, NULL);
}

HSAint64 AtomicInc(volatile HSAint64* pValue) {
    return __sync_add_and_fetch(pValue, 1);
}

void MemoryBarrier() {
       __sync_synchronize();
}

#endif  // !WIN32


================================================
FILE: libhsakmt/tests/kfdtest/src/OSWrapper.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#include <sys/user.h>
#include <string>

#include "KFDTestFlags.hpp"
#include "hsakmt/hsakmt.h"

#ifndef __OS__WRAPPER__H__
#define __OS__WRAPPER__H__

#ifndef PAGE_SIZE
#define PAGE_SIZE   (1<<12)
#endif
#ifndef PAGE_SHIFT
#define PAGE_SHIFT  (12)
#endif

enum TEXTCOLOR {
    TEXTCOLOR_WHITE,
    TEXTCOLOR_GREEN,
    TEXTCOLOR_YELLOW
};

enum OS_PRIVILEGE {
    OS_DRIVER_OPERATIONS,
    OS_SUSPEND
};

enum CONFIG_VALUE {
    CONFIG_HWS
};

enum HwCapabilityStatus {
    HWCAP__FORCE_DISABLED,
    HWCAP__DEFAULT,
    HWCAP__FORCE_ENABLED
};

struct CommandLineArguments {
    HwCapabilityStatus HwsEnabled;
    TESTPROFILE TestProfile;
    bool ChildProcess;
    unsigned int TimeOut;
    int NodeId;
    int DstNodeId;
    /* Time in units of seconds */
    unsigned int SleepTime;
};

// It is either MEM_NONE or the bitwise OR of one or more of the following flags
#define MEM_NONE 0x00
#define MEM_READ 0x01
#define MEM_WRITE 0x02
#define MEM_EXECUTE 0x4

// @brief Change console text color
void SetConsoleTextColor(TEXTCOLOR color);
// @params delayCount : delay time in milliseconds
void Delay(int delayCount);
// @brief Replacement for windows VirtualAlloc func
void *VirtualAllocMemory(void *address, unsigned int size, int memProtection = MEM_READ | MEM_WRITE);
// @brief Replacement for windows FreeVirtual func
bool VirtualFreeMemory(void *address, unsigned int size);
// @brief Retrieve the last error number
HSAuint64 GetLastErrorNo();

HSAint64 AtomicInc(volatile HSAint64* pValue);

void MemoryBarrier();

/* @brief: Runs the selected test case number of times required, each in a separate process
 * @params testToRun : Can be a specific test testcase like TestCase.TestName or if you want
 *                     to run all tests in a test case: TestCase.* and so on
 * @params numOfProcesses : How many processes to run in parallel
 * @params runsPerProcess : How many iteration a test should do per process, must be a positive number
 */
bool MultiProcessTest(const char *testToRun, int numOfProcesses, int runsPerProcess = 1);

/* Put the system to S3/S4 power state and bring it back to S0.
 * @return 'true' on success, 'false' on failure.
 */
bool SuspendAndWakeUp();

bool ReadDriverConfigValue(CONFIG_VALUE config, unsigned int& rValue);

bool GetCommandLineArguments(int argc, char **argv, CommandLineArguments& rArgs);

void HWMemoryBarrier();
bool StartThread(unsigned int (*)(void*), void* pParam, uint64_t& threadId);
bool WaitForThread(uint64_t threadId);

#endif  // __OS__WRAPPER__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/PM4Packet.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include "PM4Packet.hpp"
#include "hsakmt/hsakmttypes.h"
#include "KFDBaseComponentTest.hpp"

#include "asic_reg/gfx_7_2_enum.h"

unsigned int PM4Packet::CalcCountValue() const {
    return (SizeInDWords() - (sizeof(PM4_TYPE_3_HEADER) / sizeof(uint32_t)) - 1);
}

void PM4Packet::InitPM4Header(PM4_TYPE_3_HEADER &header, it_opcode_type opCode) {
    header.count                    = CalcCountValue() + m_HeaderCountOffset;
    header.opcode                 = opCode;
    header.type                      = PM4_TYPE_3;
    header.shaderType          = 1;  // compute
    header.predicate              = 0;
    header.reserved1             = 0;
}

unsigned int PM4WriteDataPacket::SizeInBytes() const {
    return (offsetof(PM4WRITE_DATA_CI, data) + m_ndw*sizeof(uint32_t));
}

void PM4WriteDataPacket::InitPacket(unsigned int *destBuf, void *data) {
    m_pPacketData = reinterpret_cast<PM4WRITE_DATA_CI *>(AllocPacket());

    InitPM4Header(m_pPacketData->header, IT_WRITE_DATA);

    m_pPacketData->bitfields2.dst_sel      = dst_sel_mec_write_data_MEMORY_5;  // memory-async
    m_pPacketData->bitfields2.addr_incr    = addr_incr_mec_write_data_INCREMENT_ADDR_0;  // increment addr
    m_pPacketData->bitfields2.wr_confirm   = wr_confirm_mec_write_data_WAIT_FOR_CONFIRMATION_1;
    m_pPacketData->bitfields2.atc          = hsakmt_is_dgpu() ?
        atc_write_data_NOT_USE_ATC_0 : atc_write_data_USE_ATC_1;
    m_pPacketData->bitfields2.cache_policy = cache_policy_mec_write_data_BYPASS_2;

    m_pPacketData->dst_addr_lo    = static_cast<uint32_t>(
        reinterpret_cast<uint64_t>(destBuf));  // byte addr
    m_pPacketData->dst_address_hi = static_cast<uint32_t>(
        reinterpret_cast<uint64_t>(destBuf) >> 32);

    memcpy(m_pPacketData->data, data, m_ndw * sizeof(uint32_t));
}

PM4ReleaseMemoryPacket::PM4ReleaseMemoryPacket(unsigned int familyId, bool isPolling,
                    uint64_t address, uint64_t data, bool is64bit, bool isTimeStamp,
                    int headerCountOffset):m_pPacketData(NULL) {
    m_FamilyId = familyId;
    m_HeaderCountOffset = headerCountOffset;
    if (familyId < FAMILY_AI)
        InitPacketCI(isPolling, address, data, is64bit, isTimeStamp);
    else if (familyId < FAMILY_NV)
        InitPacketAI(isPolling, address, data, is64bit, isTimeStamp);
    else
        InitPacketNV(isPolling, address, data, is64bit, isTimeStamp);
}

void PM4ReleaseMemoryPacket::InitPacketCI(bool isPolling, uint64_t address,
                                    uint64_t data, bool is64bit, bool isTimeStamp) {
    PM4_RELEASE_MEM_CI *pkt;

    m_packetSize = sizeof(PM4_RELEASE_MEM_CI);
    pkt = reinterpret_cast<PM4_RELEASE_MEM_CI *>(AllocPacket());
    m_pPacketData = pkt;

    InitPM4Header(pkt->header, IT_RELEASE_MEM);

    pkt->bitfields2.event_type       = 0x14;
    pkt->bitfields2.event_index      = event_index_mec_release_mem_EVENT_WRITE_EOP_5;
                        // Possible values:
                        // 0101(5): EVENT_WRITE_EOP event types
                        // 0110(6): Reserved for EVENT_WRITE_EOS packet.
                        // 0111(7): Reserved (previously) for EVENT_WRITE packet.
    pkt->bitfields2.l2_wb            = 1;
    pkt->bitfields2.l2_inv           = 1;
    pkt->bitfields2.cache_policy     = cache_policy_mec_release_mem_BYPASS_2;
    pkt->bitfields2.atc = hsakmt_is_dgpu() ?
                    atc_mec_release_mem_ci_NOT_USE_ATC_0 :
                    atc_mec_release_mem_ci_USE_ATC_1;  // ATC setting for fences and timestamps to the MC or TCL2.
    pkt->bitfields3.dst_sel          = dst_sel_mec_release_mem_MEMORY_CONTROLLER_0;
                        // Possible values:
                        // 0 - memory_controller.
                        // 1 - tc_l2.
    if (address) {
        pkt->bitfields3.int_sel      = (isPolling ?
                    int_sel_mec_release_mem_SEND_DATA_AFTER_WRITE_CONFIRM_3 :
                    int_sel_mec_release_mem_SEND_INTERRUPT_AFTER_WRITE_CONFIRM_2);
                // Possible values:
                // 0 - None (Do not send an interrupt).
                // 1 - Send Interrupt Only. Program DATA_SEL 0".
                // 2 - Send Interrupt when Write Confirm (WC) is received from the MC.
                // 3 - Wait for WC, but dont send interrupt (applicable to 7.3+) [g73_1]
                // 4 - Reserved for INTERRUPT packet
        if (isTimeStamp && is64bit)
            pkt->bitfields3.data_sel = data_sel_mec_release_mem_SEND_GPU_CLOCK_COUNTER_3;
        else
            pkt->bitfields3.data_sel     = is64bit ?
                        data_sel_mec_release_mem_SEND_64_BIT_DATA_2 :
                        data_sel_mec_release_mem_SEND_32_BIT_LOW_1;
                    // Possible values:
                    // 0 - None, i.e., Discard Data.
                    // 1 - Send 32-bit Data Low (Discard Data High).
                    // 2 - Send 64-bit Data.
                    // 3 - Send current value of the 64 bit global GPU clock counter.
                    // 4 - Send current value of the 64 bit system clock counter.
                    // 5 - Store GDS Data to memory.
                    // 6 - Reserved for use by the CP for Signal Semaphore.
                    // 7 - Reserved for use by the CP for Wait Semaphore.
    } else {
        pkt->bitfields3.int_sel      = (isPolling ?
                    int_sel_mec_release_mem_NONE_0 :
                    int_sel_mec_release_mem_SEND_INTERRUPT_ONLY_1);
        pkt->bitfields3.data_sel     = data_sel_mec_release_mem_NONE_0;
    }

    pkt->bitfields4a.address_lo_dword_aligned = static_cast<uint32_t>((address&0xffffffff) >> 2);
    pkt->addr_hi = static_cast<uint32_t>(address>>32);

    pkt->data_lo = static_cast<uint32_t>(data);
    pkt->data_hi = static_cast<uint32_t>(data >> 32);
}
void PM4ReleaseMemoryPacket::InitPacketAI(bool isPolling, uint64_t address,
                                        uint64_t data, bool is64bit, bool isTimeStamp) {
    PM4MEC_RELEASE_MEM_AI *pkt;

    m_packetSize = sizeof(PM4MEC_RELEASE_MEM_AI);
    pkt = reinterpret_cast<PM4MEC_RELEASE_MEM_AI *>(AllocPacket());
    m_pPacketData = pkt;

    InitPM4Header(pkt->header, IT_RELEASE_MEM);

    pkt->bitfields2.event_type       = 0x14;
    pkt->bitfields2.event_index      = event_index__mec_release_mem__end_of_pipe;
    pkt->bitfields2.tc_wb_action_ena = 1;
    pkt->bitfields2.tc_action_ena    = 1;
    pkt->bitfields2.cache_policy     = cache_policy__mec_release_mem__lru;

    pkt->bitfields3.dst_sel          = dst_sel__mec_release_mem__memory_controller;

    if (address) {
        pkt->bitfields3.int_sel  = (isPolling ?
                int_sel__mec_release_mem__send_data_after_write_confirm:
                int_sel__mec_release_mem__send_interrupt_after_write_confirm);

        if (isTimeStamp && is64bit)
            pkt->bitfields3.data_sel = data_sel__mec_release_mem__send_gpu_clock_counter;
        else
            pkt->bitfields3.data_sel     = is64bit ?
                    data_sel__mec_release_mem__send_64_bit_data :
                    data_sel__mec_release_mem__send_32_bit_low;
    } else {
        pkt->bitfields3.int_sel  = (isPolling ?
                int_sel__mec_release_mem__none:
                int_sel__mec_release_mem__send_interrupt_only);
        pkt->bitfields3.data_sel     = data_sel__mec_release_mem__none;
    }

    pkt->bitfields4a.address_lo_32b = static_cast<uint32_t>((address&0xffffffff) >> 2);
    pkt->address_hi = static_cast<uint32_t>(address>>32);

    pkt->data_lo = static_cast<uint32_t>(data);
    pkt->data_hi = static_cast<uint32_t>(data >> 32);

    pkt->int_ctxid = static_cast<uint32_t>(data);
}

void PM4ReleaseMemoryPacket::InitPacketNV(bool isPolling, uint64_t address,
                                uint64_t data, bool is64bit, bool isTimeStamp) {
    PM4MEC_RELEASE_MEM_NV *pkt;

    m_packetSize = sizeof(PM4_MEC_RELEASE_MEM_NV);
    pkt = reinterpret_cast<PM4_MEC_RELEASE_MEM_NV *>(AllocPacket());
    m_pPacketData = pkt;

    InitPM4Header(pkt->header, IT_RELEASE_MEM);

    pkt->bitfields2.event_type       = 0x14;
    pkt->bitfields2.event_index      = event_index__mec_release_mem__end_of_pipe;
    pkt->bitfields2.gcr_cntl         = (1<<10) | (1<<9) | (1<<8) | (1<<3) | (1<<2);
    pkt->bitfields2.cache_policy     = cache_policy__mec_release_mem__lru;

    pkt->bitfields3.dst_sel          = dst_sel__mec_release_mem__memory_controller;

    if (address) {
        pkt->bitfields3.int_sel  = (isPolling ?
                int_sel__mec_release_mem__send_data_after_write_confirm:
                int_sel__mec_release_mem__send_interrupt_after_write_confirm);

        if (isTimeStamp && is64bit)
            pkt->bitfields3.data_sel = data_sel__mec_release_mem__send_gpu_clock_counter;
        else
            pkt->bitfields3.data_sel     = is64bit ?
                    data_sel__mec_release_mem__send_64_bit_data :
                    data_sel__mec_release_mem__send_32_bit_low;
    } else {
        pkt->bitfields3.int_sel  = (isPolling ?
                int_sel__mec_release_mem__none:
                int_sel__mec_release_mem__send_interrupt_only);
        pkt->bitfields3.data_sel     = data_sel__mec_release_mem__none;
    }

    pkt->bitfields4a.address_lo_32b = static_cast<uint32_t>((address&0xffffffff) >> 2);
    pkt->address_hi = static_cast<uint32_t>(address>>32);

    pkt->data_lo = static_cast<uint32_t>(data);
    pkt->data_hi = static_cast<uint32_t>(data >> 32);

    pkt->int_ctxid = static_cast<uint32_t>(data);
}

PM4IndirectBufPacket::PM4IndirectBufPacket(IndirectBuffer *pIb) {
    InitPacket(pIb);
}

unsigned int PM4IndirectBufPacket::SizeInBytes() const {
    return sizeof(PM4MEC_INDIRECT_BUFFER);
}

void PM4IndirectBufPacket::InitPacket(IndirectBuffer *pIb) {
    memset(&m_packetData, 0, SizeInBytes());
    InitPM4Header(m_packetData.header,  IT_INDIRECT_BUFFER);

    m_packetData.bitfields2.ib_base_lo = static_cast<HSAuint32>((reinterpret_cast<HSAuint64>(pIb->Addr()))) >> 2;
    m_packetData.bitfields3.ib_base_hi = reinterpret_cast<HSAuint64>(pIb->Addr()) >> 32;
    m_packetData.bitfields4.ib_size          = pIb->SizeInDWord();
    m_packetData.bitfields4.chain            = 0;
    m_packetData.bitfields4.offload_polling  = 0;
    m_packetData.bitfields4.volatile_setting = 0;
    m_packetData.bitfields4.valid            = 1;
    m_packetData.bitfields4.vmid             = 0;  // in iommutest:  vmid = queueParams.VMID;
    m_packetData.bitfields4.cache_policy     = cache_policy_indirect_buffer_BYPASS_2;
}
PM4AcquireMemoryPacket::PM4AcquireMemoryPacket(unsigned int familyId):m_pPacketData(NULL)
{
    m_FamilyId = familyId;

    if (familyId < FAMILY_NV)
        InitPacketAI();
    else
        InitPacketNV();
}

void PM4AcquireMemoryPacket::InitPacketAI(void) {

    PM4ACQUIRE_MEM *pkt;
    m_packetSize = sizeof(PM4ACQUIRE_MEM);
    pkt = reinterpret_cast<PM4ACQUIRE_MEM*>(AllocPacket());
    m_pPacketData = pkt;

    InitPM4Header(pkt->header,  IT_ACQUIRE_MEM);
    pkt->bitfields2.coher_cntl     = 0x28c00000;  // copied from the way the HSART does this.
    pkt->bitfields2.engine         = engine_acquire_mem_PFP_0;
    pkt->coher_size                = 0xFFFFFFFF;
    pkt->bitfields3.coher_size_hi  = 0;
    pkt->coher_base_lo             = 0;
    pkt->bitfields4.coher_base_hi  = 0;
    pkt->bitfields5.poll_interval  = 4;  // copied from the way the HSART does this.
}
void PM4AcquireMemoryPacket::InitPacketNV(void) {
    PM4ACQUIRE_MEM_NV *pkt;
    m_packetSize = sizeof(PM4ACQUIRE_MEM_NV);
    pkt = reinterpret_cast<PM4ACQUIRE_MEM_NV*>(AllocPacket());
    m_pPacketData = pkt;

    InitPM4Header(pkt->header,  IT_ACQUIRE_MEM);
    pkt->coher_size                = 0xFFFFFFFF;
    pkt->bitfields3.coher_size_hi  = 0;
    pkt->coher_base_lo             = 0;
    pkt->bitfields4.coher_base_hi  = 0;
    pkt->bitfields5.poll_interval  = 4; //copied from the way the HSART does this.
    /* Invalidate gL2, gL1 with range base
          * Invalidate GLV, GLK (L0$)
          * Invalidate all Icache (GLI)
          */
    pkt->bitfields6.gcr_cntl = (1<<14|1<<9|1<<8|1<<7|1);
}

PM4SetShaderRegPacket::PM4SetShaderRegPacket(void) {
}

PM4SetShaderRegPacket::PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[],
                                             unsigned int numRegs) {
    InitPacket(baseOffset, regValues, numRegs);
}

void PM4SetShaderRegPacket::InitPacket(unsigned int baseOffset, const unsigned int regValues[],
                                       unsigned int numRegs) {
    // 1st register is a part of the packet struct.
    m_packetSize = sizeof(PM4SET_SH_REG) + (numRegs-1)*sizeof(uint32_t);

    /* Allocating the size of the packet, since the packet is assembled from a struct
     * followed by an additional dword data
     */
    m_pPacketData = reinterpret_cast<PM4SET_SH_REG *>(AllocPacket());

    memset(m_pPacketData, 0, m_packetSize);

    InitPM4Header(m_pPacketData->header,  IT_SET_SH_REG);

    m_pPacketData->bitfields2.reg_offset = baseOffset - PERSISTENT_SPACE_START;

    memcpy(m_pPacketData->reg_data, regValues, numRegs*sizeof(uint32_t));
}

PM4DispatchDirectPacket::PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY,
                                                 unsigned int dimZ, unsigned int dispatchInit) {
    InitPacket(dimX, dimY, dimZ, dispatchInit);
}

void PM4DispatchDirectPacket::InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ,
                                         unsigned int dispatchInit) {
    memset(&m_packetData, 0, SizeInBytes());
    InitPM4Header(m_packetData.header, IT_DISPATCH_DIRECT);

    m_packetData.dim_x = dimX;
    m_packetData.dim_y = dimY;
    m_packetData.dim_z = dimZ;
    m_packetData.dispatch_initiator = dispatchInit;
}

unsigned int PM4DispatchDirectPacket::SizeInBytes() const {
    return sizeof(PM4DISPATCH_DIRECT);
}

PM4PartialFlushPacket::PM4PartialFlushPacket(void) {
    memset(&m_packetData, 0, SizeInBytes());
    InitPM4Header(m_packetData.header, IT_EVENT_WRITE);

    m_packetData.bitfields2.event_index = event_index_event_write_CS_VS_PS_PARTIAL_FLUSH_4;
    m_packetData.bitfields2.event_type = CS_PARTIAL_FLUSH;
}

unsigned int PM4PartialFlushPacket::SizeInBytes() const {
    // For PARTIAL_FLUSH_CS packets, the last 2 dwordS don't exist.
    return sizeof(PM4EVENT_WRITE) - sizeof(uint32_t)*2;
}

PM4NopPacket::PM4NopPacket(unsigned int count): m_packetSize(count * 4) {
    m_packetData = reinterpret_cast<PM4_TYPE_3_HEADER *>(AllocPacket());
    InitPM4Header(*m_packetData, IT_NOP);
}

PM4WaitRegMemPacket::PM4WaitRegMemPacket(bool memory, uint64_t addr,
                                         uint32_t ref, uint16_t pollInterval) {
    InitPacket(function__mec_wait_reg_mem__equal_to_the_reference_value,
               memory ?
               mem_space__mec_wait_reg_mem__memory_space :
               mem_space__mec_wait_reg_mem__register_space,
               operation__mec_wait_reg_mem__wait_reg_mem,
               addr, ref, 0xffffffff, pollInterval);
}
PM4WaitRegMemPacket::PM4WaitRegMemPacket(unsigned int function,
                                         unsigned int space,
                                         unsigned int operation,
                                         uint64_t addr, uint32_t ref,
                                         uint32_t mask, uint16_t pollInterval) {
    InitPacket(function, space, operation, addr, ref, mask, pollInterval);
}

void PM4WaitRegMemPacket::InitPacket(unsigned int function,
                                     unsigned int space,
                                     unsigned int operation,
                                     uint64_t addr, uint32_t ref,
                                     uint32_t mask, uint16_t pollInterval) {
    memset(&m_packetData, 0, SizeInBytes());
    InitPM4Header(m_packetData.header, IT_WAIT_REG_MEM);

    m_packetData.bitfields2.function = (MEC_WAIT_REG_MEM_function_enum)function;
    m_packetData.bitfields2.mem_space = (MEC_WAIT_REG_MEM_mem_space_enum)space;
    m_packetData.bitfields2.operation = (MEC_WAIT_REG_MEM_operation_enum)operation;

    m_packetData.ordinal3 = addr;
    m_packetData.mem_poll_addr_hi = addr >> 32;

    m_packetData.reference = ref;
    m_packetData.mask = mask;

    m_packetData.bitfields7.poll_interval = pollInterval;
    m_packetData.bitfields7.optimize_ace_offload_mode = 1;
}

unsigned int PM4WaitRegMemPacket::SizeInBytes() const {
    return sizeof(m_packetData);
}


================================================
FILE: libhsakmt/tests/kfdtest/src/PM4Packet.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_PM4_PACKET__H__
#define __KFD_PM4_PACKET__H__

#include "BasePacket.hpp"
#include "kfd_pm4_opcodes.h"
#include "pm4_pkt_struct_common.h"
#include "pm4_pkt_struct_ci.h"
#include "pm4_pkt_struct_ai.h"
#include "pm4_pkt_struct_nv.h"
#include "IndirectBuffer.hpp"

// @class PM4Packet: Marks a group of all PM4 packets
class PM4Packet : public BasePacket {
 public:
        PM4Packet(void): m_HeaderCountOffset(0) {}
        virtual ~PM4Packet(void) {}

        virtual PACKETTYPE PacketType() const { return PACKETTYPE_PM4; }
        unsigned int CalcCountValue() const;

 protected:
        int m_HeaderCountOffset;
        void InitPM4Header(PM4_TYPE_3_HEADER &header, it_opcode_type opCode);
};

// @class PM4WriteDataPacket
class PM4WriteDataPacket : public PM4Packet {
 public:
    // Empty constructor, before using the packet call the init func
    PM4WriteDataPacket(void): m_ndw(0), m_pPacketData(NULL) {}
    // This contructor will also init the packet, no need for additional calls
    PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1):
        m_ndw(1), m_pPacketData(NULL) {InitPacket(destBuf, &data1);}
    PM4WriteDataPacket(unsigned int *destBuf, unsigned int data1, unsigned int data2):
        m_ndw(2), m_pPacketData(NULL) {
        unsigned int data[2] = {data1, data2};
        InitPacket(destBuf, data);
    }

    virtual ~PM4WriteDataPacket(void) {}
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_pPacketData; }
    // @brief Initialise the packet
    void InitPacket(unsigned int *destBuf, unsigned int data1) {
        m_ndw = 1;
        InitPacket(destBuf, &data1);
    }
    void InitPacket(unsigned int *destBuf, unsigned int data1, unsigned int data2) {
        unsigned int data[2] = {data1, data2};
        m_ndw = 2;
        InitPacket(destBuf, data);
    }
    void InitPacket(unsigned int *destBuf, void *data);

 protected:
    unsigned int m_ndw;
    // PM4WRITE_DATA_CI struct contains all the packet's data
    PM4WRITE_DATA_CI  *m_pPacketData;
};

// @class PM4ReleaseMemoryPacket
class PM4ReleaseMemoryPacket : public PM4Packet {
 public:
    // Empty constructor, before using the packet call the init func
    PM4ReleaseMemoryPacket(void): m_pPacketData(NULL) {}
    // This contructor will also init the packet, no need for additional calls
    PM4ReleaseMemoryPacket(unsigned int familyId, bool isPolling, uint64_t address, uint64_t data,
                           bool is64bit = false, bool isTimeStamp = false, int headerCountOffset = 0);

    virtual ~PM4ReleaseMemoryPacket(void) {}
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_packetSize; }
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_pPacketData; }
    // @brief Initialise the packet

 private:
    void InitPacketCI(bool isPolling, uint64_t address, uint64_t data,
                 bool is64bit = false, bool isTimeStamp = false);
    void InitPacketAI(bool isPolling, uint64_t address, uint64_t data,
                 bool is64bit = false, bool isTimeStamp = false);
    void InitPacketNV(bool isPolling, uint64_t address, uint64_t data,
                 bool is64bit = false, bool isTimeStamp = false);

    void *m_pPacketData;
    unsigned int  m_packetSize;
};

// @class PM4IndirectBufPacket
class PM4IndirectBufPacket : public PM4Packet {
 public:
    // Empty constructor, before using the packet call the init func
    PM4IndirectBufPacket(void) {}
    // This contructor will also init the packet, no need for additional calls
    explicit PM4IndirectBufPacket(IndirectBuffer *pIb);

    virtual ~PM4IndirectBufPacket(void) {}
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }
    // @breif Initialise the packet
    void InitPacket(IndirectBuffer *pIb);

 private:
    // PM4MEC_INDIRECT_BUFFER struct contains all the packet's data
    PM4MEC_INDIRECT_BUFFER  m_packetData;
};

// @class PM4AcquireMemoryPacket
class PM4AcquireMemoryPacket : public PM4Packet {
 public:
    PM4AcquireMemoryPacket(unsigned int familyId);
    virtual ~PM4AcquireMemoryPacket(void) {}

    // @returns the packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_packetSize; }
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_pPacketData; }

 private:
    void InitPacketAI(void);
    void InitPacketNV(void);
    void *m_pPacketData;
    unsigned int  m_packetSize;
};

// @class PM4SetShaderRegPacket Packet that writes to consecutive registers starting at baseOffset.
class PM4SetShaderRegPacket : public PM4Packet {
 public:
    PM4SetShaderRegPacket(void);

    PM4SetShaderRegPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs);

    virtual ~PM4SetShaderRegPacket(void) {}

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_packetSize; }
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_pPacketData; }

    void InitPacket(unsigned int baseOffset, const unsigned int regValues[], unsigned int numRegs);

 private:
    unsigned int m_packetSize;
    // PM4SET_SH_REG struct contains all the packet's data
    PM4SET_SH_REG  *m_pPacketData;
};

// @class PM4DispatchDirectPacket
class PM4DispatchDirectPacket : public PM4Packet {
 public:
    PM4DispatchDirectPacket(void) {}

    PM4DispatchDirectPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit);

    virtual ~PM4DispatchDirectPacket(void) {}

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }

    void InitPacket(unsigned int dimX, unsigned int dimY, unsigned int dimZ, unsigned int dispatchInit);

 private:
    // PM4DISPATCH_DIRECT struct contains all the packet's data
    PM4DISPATCH_DIRECT  m_packetData;
};

// @class PM4PartialFlushPacket
class PM4PartialFlushPacket : public PM4Packet {
 public:
    PM4PartialFlushPacket(void);
    virtual ~PM4PartialFlushPacket(void) {}

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }

 private:
    // PM4EVENT_WRITE struct contains all the packet's data
    PM4EVENT_WRITE  m_packetData;
};

// @class PM4NopPacket
class PM4NopPacket : public PM4Packet {
 public:
    PM4NopPacket(unsigned int count = 1);
    virtual ~PM4NopPacket(void) {}

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_packetSize; }
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return m_packetData; }

 private:
    unsigned int m_packetSize;
    PM4_TYPE_3_HEADER *m_packetData;
};

// @class PM4WaitRegMemPacket
class PM4WaitRegMemPacket : public PM4Packet {
 public:
    PM4WaitRegMemPacket(void) {}
    PM4WaitRegMemPacket(bool memory, uint64_t addr, uint32_t ref, uint16_t pollInterval);
    PM4WaitRegMemPacket(unsigned int function, unsigned int space, unsigned int operation,
                        uint64_t addr, uint32_t ref, uint32_t mask, uint16_t pollInterval);
    virtual ~PM4WaitRegMemPacket(void) {}

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const;
    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return &m_packetData; }

    void InitPacket(unsigned int function, unsigned int space, unsigned int operation,
                    uint64_t addr, uint32_t ref, uint32_t mask, uint16_t pollInterval);

 private:
    PM4MEC_WAIT_REG_MEM m_packetData;
};

#endif  // __KFD_PM4_PACKET__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/PM4Queue.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "PM4Queue.hpp"
#include "pm4_pkt_struct_common.h"
#include "GoogleTestExtension.hpp"
#include "kfd_pm4_opcodes.h"


PM4Queue::PM4Queue(void) {
    CMD_NOP = CMD_NOP_TYPE_3;
}

PM4Queue::~PM4Queue(void) {
}

unsigned int PM4Queue::Wptr() {
    /* Write pointer in dwords. Simulate 32-bit wptr that wraps at
     * queue size even on Vega10 and later chips with 64-bit wptr.
     */
    return *m_Resources.Queue_write_ptr % (m_QueueBuf->Size() / 4);
}

unsigned int PM4Queue::Rptr() {
    /* CP read pointer in dwords. It's still 32-bit even on Vega10. */
    return *m_Resources.Queue_read_ptr;
}

unsigned int PM4Queue::RptrWhenConsumed() {
    /* On PM4 queues Rptr is always 32-bit in dword units and wraps at
     * queue size. The expected value when all packets are consumed is
     * exactly the value returned by Wptr().
     */
    return Wptr();
}

void PM4Queue::SubmitPacket() {
    // m_pending Wptr is in dwords
    if (m_FamilyId < FAMILY_AI) {
        // Pre-Vega10 uses 32-bit wptr and doorbell
        MemoryBarrier();
        *m_Resources.Queue_write_ptr = m_pendingWptr;
        MemoryBarrier();
        *(m_Resources.Queue_DoorBell) = m_pendingWptr;
    } else {
        // Vega10 and later uses 64-bit wptr and doorbell
        MemoryBarrier();
        *m_Resources.Queue_write_ptr_aql = m_pendingWptr64;
        MemoryBarrier();
        *(m_Resources.Queue_DoorBell_aql) = m_pendingWptr64;
    }
}

void PM4Queue::Wait4PacketConsumption(HsaEvent *event, unsigned int timeOut) {
    if (event) {
        PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, 0,
                    event->EventData.HWData2,
                    event->EventId,
                    true));

        EXPECT_SUCCESS(hsaKmtWaitOnEvent(event, timeOut));
    } else {
        BaseQueue::Wait4PacketConsumption(NULL, timeOut);
    }
}


================================================
FILE: libhsakmt/tests/kfdtest/src/PM4Queue.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_PM4_QUEUE__H__
#define __KFD_PM4_QUEUE__H__

#include "BaseQueue.hpp"
#include "PM4Packet.hpp"

class PM4Queue : public BaseQueue {
 public:
    PM4Queue(void);
    virtual ~PM4Queue(void);

    // @brief update queue write pointer and sets the queue doorbell to the queue write pointer
    virtual void SubmitPacket();

    // @ return read pointer modulo queue size in dwords
    virtual unsigned int Rptr();
    // @ return write pointer modulo queue size in dwords
    virtual unsigned int Wptr();
    // @ return expected m_Resources.Queue_read_ptr when all packets consumed
    virtual unsigned int RptrWhenConsumed();
    /** Wait for all the packets submitted to the queue to be consumed. (i.e. wait until RPTR=WPTR).
     *  Note that all packets being consumed is not the same as all packets being processed.
     *  If event is set, wait all packets being processed.
     *  And we can benefit from that as it has
     *  1) Less CPU usage (process can sleep, waiting for interrupt).
     *  2) Lower latency (GPU only updates RPTR in memory periodically).
     */
    virtual void Wait4PacketConsumption(HsaEvent *event = NULL, unsigned int timeOut = g_TestTimeOut);

 protected:
    virtual PACKETTYPE PacketTypeSupported() { return PACKETTYPE_PM4; }

    virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_COMPUTE; }
};

#endif  // __KFD_PM4_QUEUE__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/RDMATest.cpp
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "RDMATest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"
#include "RDMAUtil.hpp"

void RDMATest::SetUp() {
    ROUTINE_START

    KFDBaseComponentTest::SetUp();

    ROUTINE_END
}

void RDMATest::TearDown() {
    ROUTINE_START

    KFDBaseComponentTest::TearDown();

    ROUTINE_END
}

TEST_F(RDMATest, GPUDirect) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);
    HSAuint64 AlternateVAGPU;

    PM4Queue queue;
    unsigned int BufferSize = PAGE_SIZE;
    int ret;

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer srcSysBuffer(BufferSize, defaultGPUNode, false);
    HsaMemoryBuffer srcLocalBuffer(BufferSize, defaultGPUNode, false, true);

    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(srcSysBuffer.As<void*>(),
                                        srcSysBuffer.Size(),
                                        &AlternateVAGPU));
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(srcLocalBuffer.As<void*>(),
                                        srcLocalBuffer.Size(),
                                        &AlternateVAGPU));

    /* Fill up srcSysBuffer */
    srcSysBuffer.Fill(0xfe);

    /* Put 'copy dword' command to ISA buffer */
    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));


    ASSERT_SUCCESS(queue.Create(defaultGPUNode));
    Dispatch dispatch(isaBuffer);

    /* Submit the command to GPU so GPU will copy from system memory
     * (srcSysBuffer) to local memory(srcLocalBuffer)
     */
    dispatch.SetArgs(srcSysBuffer.As<void*>(), srcLocalBuffer.As<void*>());
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);  // GPU executed the command

    EXPECT_SUCCESS(queue.Destroy());

    LocalMemoryAccess Rdma;

    Rdma.Open();
    ASSERT_GE(Rdma.fd, 0) << "Failed to open RDMA";

    /* GetPages asks the test driver to convert GPU virtual memory to DMA/
     * Physical memory and save it in the list. rdma_mmap maps the memory to
     * user space memory.
     */
    ret = Rdma.GetPages((uint64_t)srcLocalBuffer.As<void*>(), PAGE_SIZE);
    ASSERT_EQ(ret, 0) << "Failed to get pages";

    void *gpuAddr = Rdma.MMap((uint64_t)srcLocalBuffer.As<void*>(), PAGE_SIZE);
    ASSERT_GE((uint64_t)gpuAddr, 0) << "Failed to map RDMA address.";

    /* Read the memory to confirm that application can read the local memory
     * correctly from the mapped address.
     */
    EXPECT_EQ(memcmp(gpuAddr, srcSysBuffer.As<void*>(), 4), 0);

    Rdma.UnMap(gpuAddr, PAGE_SIZE);
    Rdma.Close();

    TEST_END
}

TEST_F(RDMATest, ContiguousVRAMAllocation) {
    TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
    TEST_START(TESTPROFILE_RUNALL);

    HSAuint64 AlternateVAGPU;

    PM4Queue queue;
    unsigned long BufferSize = 4UL << 30;

    int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
    ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";

    if (GetVramSize(defaultGPUNode) < BufferSize + (1UL << 30)) {
        LOG() << "no enough VRAM, skipping the test" << std::endl;
        return;
    }

    HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
    HsaMemoryBuffer srcSysBuffer(PAGE_SIZE, defaultGPUNode, false);
    void *LocalBuffer;
    HsaMemFlags memFlags = {0};
    int ret;

    memFlags.ui32.NonPaged = 1;
    memFlags.ui32.Contiguous = 1;
    ret = hsaKmtAllocMemory(defaultGPUNode, BufferSize, memFlags, &LocalBuffer);
    if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
        LOG() << "KFD does not support contiguous memory, skipping the test" << std::endl;
        return;
    }

    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(srcSysBuffer.As<void*>(),
                                        srcSysBuffer.Size(),
                                        &AlternateVAGPU));
    ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(LocalBuffer, BufferSize, &AlternateVAGPU));

    /* Fill up srcSysBuffer */
    srcSysBuffer.Fill(0xfe);

    /* Put 'copy dword' command to ISA buffer */
    ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));

    ASSERT_SUCCESS(queue.Create(defaultGPUNode));
    Dispatch dispatch(isaBuffer);

    /* Submit the command to GPU so GPU will copy from system memory
     * (srcSysBuffer) to local memory(LocalBuffer)
     */
    dispatch.SetArgs(srcSysBuffer.As<void*>(), LocalBuffer);
    dispatch.Submit(queue);
    dispatch.Sync(g_TestTimeOut);  // GPU executed the command

    EXPECT_SUCCESS(queue.Destroy());

    LocalMemoryAccess Rdma;
    void *gpuAddr;

    Rdma.Open();
    if (Rdma.fd < 0) {
        LOG() << "amdp2ptest.ko driver not loaded, skipping RDMA getpages" << std::endl;
        goto exit;
    }

    /* GetPages asks the test driver to convert GPU virtual memory to DMA/
     * Physical memory and save it in the list. rdma_mmap maps the memory to
     * user space memory.
     */
    ret = Rdma.GetPages((uint64_t)LocalBuffer, BufferSize);
    ASSERT_EQ(ret, 0) << "Failed to get pages";

    gpuAddr = Rdma.MMap((uint64_t)LocalBuffer, BufferSize);
    ASSERT_GE((int64_t)gpuAddr, 0) << "Failed to map RDMA address.";

    printf("contiguous VRAM address %p size 0x%lx bytes\n", LocalBuffer, BufferSize);
    printf("Pause to dump page table to check if allocation is contiguous\n");
    printf("Press Enter key to continue\n");
    getchar();

    /* Read the memory to confirm that application can read the local memory
     * correctly from the mapped address.
     */
    EXPECT_EQ(memcmp(gpuAddr, srcSysBuffer.As<void*>(), 4), 0);

    Rdma.UnMap(gpuAddr, PAGE_SIZE);
    Rdma.Close();

exit:
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(srcSysBuffer.As<void*>()));
    EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(LocalBuffer));
    EXPECT_SUCCESS(hsaKmtFreeMemory(LocalBuffer, BufferSize));

    TEST_END
}


================================================
FILE: libhsakmt/tests/kfdtest/src/RDMATest.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __RDMA_TEST__H__
#define __RDMA_TEST__H__

#include <gtest/gtest.h>

#include "KFDBaseComponentTest.hpp"

class RDMATest : public KFDBaseComponentTest {
 public:
    RDMATest() {}
    ~RDMATest() {}

 protected:
    virtual void SetUp();
    virtual void TearDown();
};

#endif  // __RDMA_TEST__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/RDMAUtil.cpp
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <gtest/gtest.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <string>
#include "amdp2ptest.h"
#include "RDMAUtil.hpp"

void LocalMemoryAccess::Open() {
    fd = open(AMDP2PTEST_DEVICE_PATH, O_RDWR);
}

void LocalMemoryAccess::Close() {
    close(fd);
    fd = -1;
}

int LocalMemoryAccess::GetPages(uint64_t gpu_va_addr, uint64_t size) {
    struct AMDRDMA_IOCTL_GET_PAGES_PARAM param = {0};

    if (fd <= 0)
        return -1;

    param.addr = gpu_va_addr;
    param.length = size;

    return ioctl(fd, AMD2P2PTEST_IOCTL_GET_PAGES, &param);
}

void *LocalMemoryAccess::MMap(uint64_t offset, size_t size) {
    void *gpuAddr;

    if (fd <= 0)
        return NULL;

    gpuAddr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
    return gpuAddr;
}

void LocalMemoryAccess::UnMap(void *offset, size_t size) {
    munmap(offset, size);
}


================================================
FILE: libhsakmt/tests/kfdtest/src/RDMAUtil.hpp
================================================
/*
 * Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __RDMA__UTIL__H__
#define __RDMA__UTIL__H__

class LocalMemoryAccess {
 public:
    int fd;
    void Open(void);
    void Close(void);
    int GetPages(uint64_t, uint64_t);
    void *MMap(uint64_t, size_t);
    void UnMap(void *, size_t);
};

#endif  // __RDMA__UTIL__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/SDMAPacket.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include "SDMAPacket.hpp"
#include "KFDTestUtil.hpp"

/* Byte/dword count in many SDMA packets is 1-based in AI, meaning a
 * count of 1 is encoded as 0.
 */
#define SDMA_COUNT(c) (m_FamilyId < FAMILY_AI ? (c) : (c)-1)

SDMAWriteDataPacket::SDMAWriteDataPacket(unsigned int familyId, void* destAddr, unsigned int data,
                                         unsigned int packetSizeOffset):
    packetData(NULL) {
    m_FamilyId = familyId;
    InitPacket(destAddr, 1, &data, packetSizeOffset);
}

SDMAWriteDataPacket::SDMAWriteDataPacket(unsigned int familyId, void* destAddr, unsigned int ndw,
                                         void *data):
    packetData(NULL) {
    m_FamilyId = familyId;
    InitPacket(destAddr, ndw, data);
}

void SDMAWriteDataPacket::InitPacket(void* destAddr, unsigned int ndw,
                                     void *data, unsigned int packetSizeOffset) {
    packetSize = sizeof(SDMA_PKT_WRITE_UNTILED) +
        (ndw - 1) * sizeof(unsigned int);
    packetSize -= packetSizeOffset;
    packetData = reinterpret_cast<SDMA_PKT_WRITE_UNTILED *>(AllocPacket());

    packetData->HEADER_UNION.op = SDMA_OP_WRITE;
    packetData->HEADER_UNION.sub_op = SDMA_SUBOP_WRITE_LINEAR;

    SplitU64(reinterpret_cast<HSAuint64>(destAddr),
             packetData->DST_ADDR_LO_UNION.DW_1_DATA,  // dst_addr_31_0
             packetData->DST_ADDR_HI_UNION.DW_2_DATA);  // dst_addr_63_32

    packetData->DW_3_UNION.count = SDMA_COUNT(ndw);
    memcpy(&packetData->DATA0_UNION.DW_4_DATA, data, ndw*sizeof(unsigned int));
}

#define BITS (21)
#define TWO_MEG (1 << BITS)
SDMACopyDataPacket::SDMACopyDataPacket(unsigned int familyId,
                        void *const dsts[], void *src, int n, unsigned int surfsize) {
    int32_t size = 0, i;
    void **dst = reinterpret_cast<void**>(malloc(sizeof(void*) * n));
    const int singlePacketSize = sizeof(SDMA_PKT_COPY_LINEAR) +
                        sizeof(SDMA_PKT_COPY_LINEAR::DST_ADDR[0]) * n;

    if (n > 2)
        WARN() << "SDMACopyDataPacket does not support more than 2 dst addresses!" << std::endl;

    m_FamilyId = familyId;
    memcpy(dst, dsts, sizeof(void*) * n);

    packetSize = ((surfsize + TWO_MEG - 1) >> BITS) * singlePacketSize;

    SDMA_PKT_COPY_LINEAR *pSDMA = reinterpret_cast<SDMA_PKT_COPY_LINEAR *>(AllocPacket());
    packetData = pSDMA;

    while (surfsize > 0) {
        /* SDMA support maximum 0x3fffe0 byte in one copy, take 2M here */
        if (surfsize > TWO_MEG)
            size = TWO_MEG;
        else
            size = surfsize;

        memset(pSDMA, 0, singlePacketSize);
        pSDMA->HEADER_UNION.op           = SDMA_OP_COPY;
        pSDMA->HEADER_UNION.sub_op       = SDMA_SUBOP_COPY_LINEAR;
        pSDMA->HEADER_UNION.broadcast       = n > 1 ? 1 : 0;
        pSDMA->COUNT_UNION.count             = SDMA_COUNT(size);
        SplitU64(reinterpret_cast<HSAuint64>(src),
                 pSDMA->SRC_ADDR_LO_UNION.DW_3_DATA,  // src_addr_31_0
                 pSDMA->SRC_ADDR_HI_UNION.DW_4_DATA);  // src_addr_63_32

        for (i = 0; i < n; i++)
            SplitU64(reinterpret_cast<HSAuint64>(dst[i]),
                    pSDMA->DST_ADDR[i].DST_ADDR_LO_UNION.DW_5_DATA,  // dst_addr_31_0
                    pSDMA->DST_ADDR[i].DST_ADDR_HI_UNION.DW_6_DATA);  // dst_addr_63_32

        pSDMA = reinterpret_cast<SDMA_PKT_COPY_LINEAR *>(reinterpret_cast<char *>(pSDMA) + singlePacketSize);
        for (i = 0; i < n; i++)
            dst[i] = reinterpret_cast<char *>(dst[i]) + size;
        src = reinterpret_cast<char *>(src) + size;
        surfsize -= size;
    }
    free(dst);
}

SDMACopyDataPacket::SDMACopyDataPacket(unsigned int familyId, void* dst, void *src, unsigned int surfsize) {
    new (this)SDMACopyDataPacket(familyId, &dst, src, 1, surfsize);
}

SDMAFillDataPacket::SDMAFillDataPacket(unsigned int familyId, void *dst, unsigned int data, unsigned int size) {
    unsigned int copy_size;
    SDMA_PKT_CONSTANT_FILL *pSDMA;

    m_FamilyId = familyId;
    /* SDMA support maximum 0x3fffe0 byte in one copy. Use 2M copy_size */
    m_PacketSize = ((size + TWO_MEG - 1) >> BITS) * sizeof(SDMA_PKT_CONSTANT_FILL);
    pSDMA = reinterpret_cast<SDMA_PKT_CONSTANT_FILL *>(AllocPacket());
    m_PacketData = pSDMA;

    while (size > 0) {
        if (size > TWO_MEG)
            copy_size = TWO_MEG;
        else
            copy_size = size;

        pSDMA->HEADER_UNION.op = SDMA_OP_CONST_FILL;
        pSDMA->HEADER_UNION.sub_op = 0;

        /* If both size and address are DW aligned, then use DW fill */
        if (!(copy_size & 0x3) && !((HSAuint64)dst & 0x3))
            pSDMA->HEADER_UNION.fillsize = 2; /* DW Fill */
        else
            pSDMA->HEADER_UNION.fillsize = 0; /* Byte Fill */

        pSDMA->COUNT_UNION.count = SDMA_COUNT(copy_size);

        SplitU64(reinterpret_cast<HSAuint64>(dst),
            pSDMA->DST_ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/
            pSDMA->DST_ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/

        pSDMA->DATA_UNION.DW_3_DATA = data;
        pSDMA++;

        dst = reinterpret_cast<char *>(dst) + copy_size;
        size -= copy_size;
    }
}

SDMAFencePacket::SDMAFencePacket(void) {
}

SDMAFencePacket::SDMAFencePacket(unsigned int familyId, void* destAddr, unsigned int data) {
    m_FamilyId = familyId;
    if (m_FamilyId < FAMILY_NV)
        InitPacketCI(destAddr, data);
    else
        InitPacketNV(destAddr, data);
}

SDMAFencePacket::~SDMAFencePacket(void) {
}

void SDMAFencePacket::InitPacketCI(void* destAddr, unsigned int data) {
    memset(&packetData, 0, SizeInBytes());

    packetData.HEADER_UNION.op = SDMA_OP_FENCE;

    SplitU64(reinterpret_cast<HSAuint64>(destAddr),
             packetData.ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/
             packetData.ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/

    packetData.DATA_UNION.data = data;
}

void SDMAFencePacket::InitPacketNV(void * destAddr,unsigned int data) {
    memset(&packetData, 0, SizeInBytes());

    /* GPA=0 becaue we use virtual address
     * Snoop = 1 because we want the write be CPU coherent
     * System = 1 because the memory is system memory
     * mtype = uncached, for the purpose of CPU coherent, L2 policy doesn't matter in this case
     */
    packetData.HEADER_UNION.DW_0_DATA = (0 << 23) | (1 << 22) | (1 << 20) | (3 << 16) | SDMA_OP_FENCE;

    SplitU64(reinterpret_cast<unsigned long long>(destAddr),
             packetData.ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/
             packetData.ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/

    packetData.DATA_UNION.data = data;
}


SDMATrapPacket::SDMATrapPacket(unsigned int eventID) {
    InitPacket(eventID);
}

SDMATrapPacket::~SDMATrapPacket(void) {
}

void SDMATrapPacket::InitPacket(unsigned int eventID) {
    memset(&packetData, 0, SizeInBytes());

    packetData.HEADER_UNION.op = SDMA_OP_TRAP;
    packetData.INT_CONTEXT_UNION.int_context = eventID;
}

SDMAPollRegMemPacket::SDMAPollRegMemPacket(void *addr, int value) {
    InitPacket(addr, value);
}

SDMAPollRegMemPacket::~SDMAPollRegMemPacket(void) {
}

void SDMAPollRegMemPacket::InitPacket(void *addr, int value) {
    memset(&packetData, 0, SizeInBytes());

    packetData.HEADER_UNION.op = SDMA_OP_POLL_REGMEM;
    packetData.HEADER_UNION.mem_poll = 1;
    packetData.HEADER_UNION.func = 0x3; // IsEqual.
    SplitU64(reinterpret_cast<unsigned long long>(addr),
             packetData.ADDR_LO_UNION.DW_1_DATA,
             packetData.ADDR_HI_UNION.DW_2_DATA);
    packetData.VALUE_UNION.value = value;
    packetData.MASK_UNION.mask = 0xffffffff; // Compare the whole content.
    packetData.DW5_UNION.interval = 0x04;
    packetData.DW5_UNION.retry_count = 0xfff;
}

SDMATimePacket::SDMATimePacket(void *destaddr) {
    InitPacket(destaddr);
}

SDMATimePacket::~SDMATimePacket(void) {
}

void SDMATimePacket::InitPacket(void *destaddr) {
    memset(&packetData, 0, SizeInBytes());

    packetData.HEADER_UNION.op = SDMA_OP_TIMESTAMP;
    packetData.HEADER_UNION.sub_op = 1 << 1; /* Get Global GPU Timestamp*/

    if (reinterpret_cast<unsigned long long>(destaddr) & 0x1f)
        WARN() << "SDMATimePacket dst address must aligned to 32bytes boundary" << std::endl;

    SplitU64(reinterpret_cast<unsigned long long>(destaddr),
            packetData.ADDR_LO_UNION.DW_1_DATA, /*dst_addr_31_0*/
            packetData.ADDR_HI_UNION.DW_2_DATA); /*dst_addr_63_32*/
}

SDMANopPacket::SDMANopPacket(unsigned int count) {
    packetSize = count * sizeof(unsigned int);
    packetData = reinterpret_cast<SDMA_PKT_NOP *>(AllocPacket());

    packetData->HEADER_UNION.op = SDMA_OP_NOP;
    packetData->HEADER_UNION.sub_op = 0;
    packetData->HEADER_UNION.count = count - 1;
}


================================================
FILE: libhsakmt/tests/kfdtest/src/SDMAPacket.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_SDMA_PACKET__H__
#define __KFD_SDMA_PACKET__H__

#include "BasePacket.hpp"
#include "sdma_pkt_struct.h"

// @class SDMAPacket: Marks a group of all SDMA packets
class SDMAPacket : public BasePacket {
 public:
        SDMAPacket(void) {}
        virtual ~SDMAPacket(void) {}

        virtual PACKETTYPE PacketType() const { return PACKETTYPE_SDMA; }
};

class SDMAWriteDataPacket : public SDMAPacket {
 public:
    // This contructor will also init the packet, no need for additional calls
    SDMAWriteDataPacket(unsigned int familyId, void* destAddr, unsigned int data,
		        unsigned int packSizeOffset = 0);
    SDMAWriteDataPacket(unsigned int familyId, void* destAddr, unsigned int ndw, void *data);

    virtual ~SDMAWriteDataPacket(void) {}

    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return packetData; }
    // @breif Initialise the packet
    void InitPacket(void* destAddr, unsigned int ndw, void *data, unsigned int packetSizeOffset = 0);
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return packetSize; }

 protected:
    // SDMA_PKT_WRITE_UNTILED struct contains all the packet's data
    SDMA_PKT_WRITE_UNTILED *packetData;
    unsigned int packetSize;
};

class SDMACopyDataPacket : public SDMAPacket {
 public:
    // This contructor will also init the packet, no need for additional calls
    SDMACopyDataPacket(unsigned int familyId, void *dest, void *src, unsigned int size);
    SDMACopyDataPacket(unsigned int familyId, void *const dst[], void *src, int n, unsigned int surfsize);

    virtual ~SDMACopyDataPacket(void) {}

    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return packetData; }

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return packetSize; }

 protected:
    // SDMA_PKT_COPY_LINEAR struct contains all the packet's data
    SDMA_PKT_COPY_LINEAR  *packetData;

    unsigned int packetSize;
};

class SDMAFillDataPacket : public SDMAPacket {
 public:
    // This contructor will also init the packet, no need for additional calls
    SDMAFillDataPacket(unsigned int familyId, void *dest, unsigned int data, unsigned int size);

    virtual ~SDMAFillDataPacket(void) {}

    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return m_PacketData; }

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return m_PacketSize; }

 protected:
    // SDMA_PKT_CONSTANT_FILL struct contains all the packet's data
    SDMA_PKT_CONSTANT_FILL  *m_PacketData;

    unsigned int m_PacketSize;
};

class SDMAFencePacket : public SDMAPacket {
 public:
    // Empty constructor, before using the packet call the init func
    SDMAFencePacket(void);
    // This contructor will also init the packet, no need for additional calls
    SDMAFencePacket(unsigned int familyId, void* destAddr, unsigned int data);

    virtual ~SDMAFencePacket(void);

    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return &packetData; }
    // @brief Initialise the packet
    void InitPacketCI(void* destAddr, unsigned int data);
    void InitPacketNV(void* destAddr, unsigned int data);

    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_FENCE ); }

 protected:
    // SDMA_PKT_FENCE struct contains all the packet's data
    SDMA_PKT_FENCE  packetData;
};

class SDMATrapPacket : public SDMAPacket {
 public:
    // Empty constructor, before using the packet call the init func
    explicit SDMATrapPacket(unsigned int eventID = 0);

    virtual ~SDMATrapPacket(void);

    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return &packetData; }
    // @brief Initialise the packet
    void InitPacket(unsigned int eventID);
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_TRAP); }

 protected:
    // SDMA_PKT_TRAP struct contains all the packet's data
    SDMA_PKT_TRAP  packetData;
};

class SDMAPollRegMemPacket : public SDMAPacket {
 public:
    // This contructor will also init the packet, no need for additional calls
    SDMAPollRegMemPacket(void* addr, int value);

    virtual ~SDMAPollRegMemPacket(void);

    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return &packetData; }
    // @breif Initialise the packet
    void InitPacket(void* addr, int value);
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_POLL_REGMEM); }

 protected:
    // SDMA_PKT_WRITE_UNTILED struct contains all the packet's data
    SDMA_PKT_POLL_REGMEM packetData;
    unsigned int packetSize;
};

class SDMATimePacket : public SDMAPacket {
 public:
    // Empty constructor, before using the packet call the init func
    SDMATimePacket(void*);

    virtual ~SDMATimePacket(void);

    // @returns Pointer to the packet
    virtual const void *GetPacket() const  { return &packetData; }
    // @brief Initialise the packet
    void InitPacket(void*);
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return sizeof(SDMA_PKT_TIMESTAMP); }

 protected:
    SDMA_PKT_TIMESTAMP  packetData;
};

class SDMANopPacket : public SDMAPacket {
 public:
    SDMANopPacket(unsigned int count = 1);
    virtual ~SDMANopPacket(void) {}

    // @returns Pointer to the packet
    virtual const void *GetPacket() const { return packetData; }
    // @returns Packet size in bytes
    virtual unsigned int SizeInBytes() const { return packetSize; }

 private:
    SDMA_PKT_NOP *packetData;
    unsigned int packetSize;
};


#endif  // __KFD_SDMA_PACKET__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/SDMAQueue.cpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "SDMAQueue.hpp"
#include "SDMAPacket.hpp"

SDMAQueue::SDMAQueue(void) {
     CMD_NOP = 0;
}

SDMAQueue::~SDMAQueue(void) {
}

unsigned int SDMAQueue::Wptr() {
    /* In SDMA queues write pointers are saved in bytes, convert the
     * wptr value to dword to fit the way BaseQueue works. On Vega10
     * the write ptr is 64-bit. We only read the low 32 bit (assuming
     * the queue buffer is smaller than 4GB) and modulo divide by the
     * queue size to simulate a 32-bit read pointer.
     */
    return (*m_Resources.Queue_write_ptr % m_QueueBuf->Size()) /
        sizeof(unsigned int);
}

unsigned int SDMAQueue::Rptr() {
    /* In SDMA queues read pointers are saved in bytes, convert the
     * read value to dword to fit the way BaseQueue works. On Vega10
     * the read ptr is 64-bit. We only read the low 32 bit (assuming
     * the queue buffer is smaller than 4GB) and modulo divide by the
     * queue size to simulate a 32-bit read pointer.
     */
    return (*m_Resources.Queue_read_ptr % m_QueueBuf->Size()) /
        sizeof(unsigned int);
}

unsigned int SDMAQueue::RptrWhenConsumed() {
    /* Rptr is same size and byte units as Wptr. Here we only care
     * about the low 32-bits. When all packets are consumed, read and
     * write pointers should have the same value.
     */
    return *m_Resources.Queue_write_ptr;
}

void SDMAQueue::SubmitPacket() {
    // m_pending Wptr is in dwords
    if (m_FamilyId < FAMILY_AI) {
        // Pre-Vega10 uses 32-bit wptr and doorbell
        unsigned int wPtrInBytes = m_pendingWptr * sizeof(unsigned int);
        MemoryBarrier();
        *m_Resources.Queue_write_ptr = wPtrInBytes;
        MemoryBarrier();
        *(m_Resources.Queue_DoorBell) = wPtrInBytes;
    } else {
        // Vega10 and later uses 64-bit wptr and doorbell
        HSAuint64 wPtrInBytes = m_pendingWptr64 * sizeof(unsigned int);
        MemoryBarrier();
        *m_Resources.Queue_write_ptr_aql = wPtrInBytes;
        MemoryBarrier();
        *(m_Resources.Queue_DoorBell_aql) = wPtrInBytes;
    }
}

void SDMAQueue::Wait4PacketConsumption(HsaEvent *event, unsigned int timeOut) {
    if (event) {
        PlacePacket(SDMAFencePacket(m_FamilyId, (void*)event->EventData.HWData2, event->EventId));

        PlaceAndSubmitPacket(SDMATrapPacket(event->EventId));

        EXPECT_SUCCESS(hsaKmtWaitOnEvent(event, timeOut));
    } else {
        BaseQueue::Wait4PacketConsumption(NULL, timeOut);
    }
}


================================================
FILE: libhsakmt/tests/kfdtest/src/SDMAQueue.hpp
================================================
/*
 * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_SDMA_QUEUE__H__
#define __KFD_SDMA_QUEUE__H__

#include "BaseQueue.hpp"

class SDMAQueue : public BaseQueue {
 public:
    SDMAQueue(void);
    virtual ~SDMAQueue(void);

    // @brief Update queue write pointer and set the queue doorbell to the queue write pointer
    virtual void SubmitPacket();

    /** Wait for all the packets submitted to the queue to be consumed. (i.e. wait until RPTR=WPTR).
     *  Note that all packets being consumed is not the same as all packets being processed.
     *  If event is set, wait all packets being processed.
     *  And we can benefit from that as it has
     *  1) Less CPU usage (process can sleep, waiting for interrupt).
     *  2) Lower latency (GPU only updates RPTR in memory periodically).
     */
    virtual void Wait4PacketConsumption(HsaEvent *event = NULL, unsigned int timeOut = g_TestTimeOut);

 protected:
    // @ return Write pointer modulo queue size in dwords
    virtual unsigned int Wptr();
    // @ return Read pointer modulo queue size in dwords
    virtual unsigned int Rptr();
    // @ return Expected m_Resources.Queue_read_ptr when all packets are consumed
    virtual unsigned int RptrWhenConsumed();

    virtual PACKETTYPE PacketTypeSupported() { return PACKETTYPE_SDMA; }

    virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_SDMA; }
};

#endif  // __KFD_SDMA_QUEUE__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/SDMAQueueByEngId.hpp
================================================
/*
 * Copyright (C) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_SDMA_QUEUE_BY_ENG_ID__H__
#define __KFD_SDMA_QUEUE_BY_ENG_ID__H__

#include "SDMAQueue.hpp"

class SDMAQueueByEngId : public SDMAQueue {
 public:
    SDMAQueueByEngId(int engineId) {CMD_NOP = 0; m_SdmaEngineId = engineId;}
    virtual ~SDMAQueueByEngId(void) {}
 protected:
    virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_SDMA_BY_ENG_ID; }
};

#endif  // __KFD_SDMA_QUEUE_BY_ENG_ID__H__


================================================
FILE: libhsakmt/tests/kfdtest/src/ShaderStore.cpp
================================================
/*
 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "ShaderStore.hpp"

/**
 * KFDASMTest List
 */

const std::vector<const char*> ShaderList = {
    NoopIsa,
    CopyDwordIsa,
    InfiniteLoopIsa,
    AtomicIncIsa,
    ScratchCopyDwordIsa,
    PollMemoryIsa,
    CopyOnSignalIsa,
    PollAndCopyIsa,
    WriteFlagAndValueIsa,
    WriteAndSignalIsa,
    LoopIsa,
    PersistentIterateIsa,
    ReadMemoryIsa,
    GwsInitIsa,
    GwsAtomicIncreaseIsa,
    CheckCuMaskIsa
};

/**
 * Macros
 */

#define SHADER_START ".text\n"

/* Macros for portable v_add_co_u32, v_add_co_ci_u32,
 * and v_cmp_lt_u32.
 */
#define SHADER_MACROS_U32 \
    "   .text\n"\
    "   .macro V_ADD_CO_U32 vdst, src0, vsrc1\n"\
    "       .if (.amdgcn.gfx_generation_number >= 10)\n"\
    "           v_add_co_u32        \\vdst, vcc_lo, \\src0, \\vsrc1\n"\
    "       .elseif (.amdgcn.gfx_generation_number >= 9)\n"\
    "           v_add_co_u32        \\vdst, vcc, \\src0, \\vsrc1\n"\
    "       .else\n"\
    "           v_add_u32           \\vdst, vcc, \\src0, \\vsrc1\n"\
    "       .endif\n"\
    "   .endm\n"\
    "   .macro V_ADD_CO_CI_U32 vdst, src0, vsrc1\n"\
    "       .if (.amdgcn.gfx_generation_number >= 10)\n"\
    "           v_add_co_ci_u32     \\vdst, vcc_lo, \\src0, \\vsrc1, vcc_lo\n"\
    "       .elseif (.amdgcn.gfx_generation_number >= 9)\n"\
    "           v_addc_co_u32       \\vdst, vcc, \\src0, \\vsrc1, vcc\n"\
    "       .else\n"\
    "           v_addc_u32          \\vdst, vcc, \\src0, \\vsrc1, vcc\n"\
    "       .endif\n"\
    "   .endm\n"\
    "   .macro V_CMP_LT_U32 src0, vsrc1\n"\
    "       .if (.amdgcn.gfx_generation_number >= 10)\n"\
    "           v_cmp_lt_u32        vcc_lo, \\src0, \\vsrc1\n"\
    "       .else\n"\
    "           v_cmp_lt_u32        vcc, \\src0, \\vsrc1\n"\
    "       .endif\n"\
    "   .endm\n"\
    "   .macro V_CMP_EQ_U32 src0, vsrc1\n"\
    "       .if (.amdgcn.gfx_generation_number >= 10)\n"\
    "           v_cmp_eq_u32        vcc_lo, \\src0, \\vsrc1\n"\
    "       .else\n"\
    "           v_cmp_eq_u32        vcc, \\src0, \\vsrc1\n"\
    "       .endif\n"\
    "   .endm\n"

/* Macros for portable flat load/store/atomic instructions.
 *
 * gc943 (gfx94x) deprecates glc/slc in favour of nt/sc1/sc0.
 * The below macros when used will always use the nt sc1 sc0
 * modifiers for gfx94x, but also take in arg0 arg1 to specify
 * (for non-gfx94x): glc, slc, or glc slc.
 */
#define SHADER_MACROS_FLAT \
    "   .macro FLAT_LOAD_DWORD_NSS vdst, vaddr arg0 arg1\n"\
    "       .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)\n"\
    "           flat_load_dword \\vdst, \\vaddr nt sc1 sc0\n"\
    "       .else\n"\
    "           flat_load_dword \\vdst, \\vaddr \\arg0 \\arg1\n"\
    "       .endif\n"\
    "   .endm\n"\
    "   .macro FLAT_LOAD_DWORDX2_NSS vdst, vaddr arg0 arg1\n"\
    "       .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)\n"\
    "           flat_load_dwordx2 \\vdst, \\vaddr nt sc1 sc0\n"\
    "       .else\n"\
    "           flat_load_dwordx2 \\vdst, \\vaddr \\arg0 \\arg1\n"\
    "       .endif\n"\
    "   .endm\n"\
    "   .macro FLAT_STORE_DWORD_NSS vaddr, vsrc arg0 arg1\n"\
    "       .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)\n"\
    "           flat_store_dword \\vaddr, \\vsrc nt sc1 sc0\n"\
    "       .else\n"\
    "           flat_store_dword \\vaddr, \\vsrc \\arg0 \\arg1\n"\
    "       .endif\n"\
    "   .endm\n"\
    "   .macro FLAT_ATOMIC_ADD_NSS vdst, vaddr, vsrc arg0 arg1\n"\
    "       .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)\n"\
    "           flat_atomic_add \\vdst, \\vaddr, \\vsrc nt sc1 sc0\n"\
    "       .else\n"\
    "           flat_atomic_add \\vdst, \\vaddr, \\vsrc \\arg0 \\arg1\n"\
    "       .endif\n"\
    "   .endm\n"

/**
 * Common
 */

const char *NoopIsa =
    SHADER_START
    R"(
        s_endpgm
)";

const char *CopyDwordIsa =
    SHADER_START
    SHADER_MACROS_FLAT
    R"(
        v_mov_b32 v0, s0
        v_mov_b32 v1, s1
        v_mov_b32 v2, s2
        v_mov_b32 v3, s3
        .if (.amdgcn.gfx_generation_number >= 12)
            FLAT_LOAD_DWORD_NSS v4, v[0:1] scope:SCOPE_SYS
            s_wait_loadcnt 0
            FLAT_STORE_DWORD_NSS v[2:3], v4 scope:SCOPE_SYS
        .else
            FLAT_LOAD_DWORD_NSS v4, v[0:1] glc slc
            s_waitcnt 0
            FLAT_STORE_DWORD_NSS v[2:3], v4 glc slc
        .endif
        s_endpgm
)";

const char *InfiniteLoopIsa =
    SHADER_START
    R"(
        .text
        LOOP:
        s_nop 0x10
        s_branch LOOP
        s_endpgm
)";

const char *AtomicIncIsa =
    SHADER_START
    SHADER_MACROS_FLAT
    R"(
        v_mov_b32 v0, s0
        v_mov_b32 v1, s1
        .if (.amdgcn.gfx_generation_number >= 12)
            v_mov_b32 v2, 1
            FLAT_ATOMIC_ADD_NSS v3, v[0:1], v2 scope:SCOPE_SYS th:TH_ATOMIC_RETURN
        .elseif (.amdgcn.gfx_generation_number >= 8)
            v_mov_b32 v2, 1
            FLAT_ATOMIC_ADD_NSS v3, v[0:1], v2 glc slc
        .else
            v_mov_b32 v2, -1
            flat_atomic_inc v3, v[0:1], v2 glc slc
        .endif
        s_endpgm
)";

/**
 * KFDMemoryTest
 */

const char *ScratchCopyDwordIsa =
    SHADER_START
    SHADER_MACROS_FLAT
    R"(
        // Copy the parameters from scalar registers to vector registers
        .if (.amdgcn.gfx_generation_number >= 9)
            v_mov_b32 v0, s0
            v_mov_b32 v1, s1
            v_mov_b32 v2, s2
            v_mov_b32 v3, s3
        .else
            v_mov_b32_e32 v0, s0
            v_mov_b32_e32 v1, s1
            v_mov_b32_e32 v2, s2
            v_mov_b32_e32 v3, s3
        .endif

        // Setup the scratch parameters. This assumes a single 16-reg block
        .if (.amdgcn.gfx_generation_number >= 12)
            s_setreg_b32 hwreg(HW_REG_SCRATCH_BASE_LO), s4
            s_setreg_b32 hwreg(HW_REG_SCRATCH_BASE_HI), s5
        .elseif (.amdgcn.gfx_generation_number >= 10)
            s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
            s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
        .elseif (.amdgcn.gfx_generation_number == 9)
            s_mov_b32 flat_scratch_lo, s4
            s_mov_b32 flat_scratch_hi, s5
        .else
            s_mov_b32 flat_scratch_lo, 8
            s_mov_b32 flat_scratch_hi, 0
        .endif

        // Copy a dword between the passed addresses
        .if (.amdgcn.gfx_generation_number >= 12)
            FLAT_LOAD_DWORD_NSS v4, v[0:1] scope:SCOPE_SYS
            s_wait_loadcnt 0
            FLAT_STORE_DWORD_NSS v[2:3], v4 scope:SCOPE_SYS
        .else
            FLAT_LOAD_DWORD_NSS v4, v[0:1] slc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            FLAT_STORE_DWORD_NSS v[2:3], v4 slc
        .endif

        s_endpgm
)";

/* Continuously poll src buffer and check buffer value
 * After src buffer is filled with specific value (0x5678,
 * by host program), fill dst buffer with specific
 * value(0x5678) and quit
 */
const char *PollMemoryIsa =
    SHADER_START
    R"(
        // Assume src address in s0, s1, and dst address in s2, s3
        s_movk_i32 s18, 0x5678
        .if (.amdgcn.gfx_generation_number >= 10)
            v_mov_b32 v0, s2
            v_mov_b32 v1, s3
            v_mov_b32 v2, 0x5678
        .endif
        LOOP:
        .if (.amdgcn.gfx_generation_number >= 12)
            s_load_dword s16, s[0:1], 0x0 scope:SCOPE_SYS
        .else
            s_load_dword s16, s[0:1], 0x0 glc
        .endif
        s_cmp_eq_i32 s16, s18
        s_cbranch_scc0   LOOP
        .if (.amdgcn.gfx_generation_number >= 12)
            flat_store_dword v[0:1], v2 scope:SCOPE_SYS
        .elseif (.amdgcn.gfx_generation_number >= 10)
            flat_store_dword v[0:1], v2 slc
        .else
            s_store_dword s18, s[2:3], 0x0 glc
        .endif
        s_endpgm
)";

/* Similar to PollMemoryIsa except that the buffer
 * polled can be Non-coherant memory. SCC system-level
 * cache coherence is not supported in scalar (smem) path.
 * Use vmem operations with scc
 */
const char *PollNCMemoryIsa =
    SHADER_START
    R"(
        // Assume src address in s0, s1, and dst address in s2, s3
        v_mov_b32 v6, 0x5678
        v_mov_b32 v0, s0
        v_mov_b32 v1, s1
        LOOP:
        flat_load_dword v4, v[0:1] scc
        v_cmp_eq_u32 vcc, v4, v6
        s_cbranch_vccz   LOOP
        v_mov_b32 v0, s2
        v_mov_b32 v1, s3
        flat_store_dword v[0:1], v6 scc
        s_endpgm
)";

/* Input: A buffer of at least 3 dwords.
 * DW0: used as a signal. 0xcafe means it is signaled
 * DW1: Input buffer for device to read.
 * DW2: Output buffer for device to write.
 * Once receive signal, device will copy DW1 to DW2
 * This shader continously poll the signal buffer,
 * Once signal buffer is signaled, it copies input buffer
 * to output buffer
 */
const char *CopyOnSignalIsa =
    SHADER_START
    R"(
        // Assume input buffer in s0, s1
        .if (.amdgcn.gfx_generation_number >= 10)
            s_add_u32 s2, s0, 0x8
            s_addc_u32 s3, s1, 0x0
            s_mov_b32 s18, 0xcafe
            v_mov_b32 v0, s0
            v_mov_b32 v1, s1
            v_mov_b32 v4, s2
            v_mov_b32 v5, s3
        .else
            s_mov_b32 s18, 0xcafe
        .endif

        .if (.amdgcn.gfx_generation_number >= 12)

            POLLSIGNAL:
            s_load_dword s16, s[0:1], 0x0 scope:SCOPE_SYS
            s_cmp_eq_i32 s16, s18
            s_cbranch_scc0   POLLSIGNAL

            s_load_dword s17, s[0:1], 0x4 scope:SCOPE_SYS
            s_wait_kmcnt 0

            v_mov_b32 v2, s17
            flat_store_dword v[4:5], v2 scope:SCOPE_SYS
        .else

            POLLSIGNAL:
            s_load_dword s16, s[0:1], 0x0 glc
            s_cmp_eq_i32 s16, s18
            s_cbranch_scc0   POLLSIGNAL

            s_load_dword s17, s[0:1], 0x4 glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            .if (.amdgcn.gfx_generation_number >= 10)
                v_mov_b32 v2, s17
                flat_store_dword v[4:5], v2 glc
            .else
                s_store_dword s17, s[0:1], 0x8 glc
            .endif
            s_waitcnt vmcnt(0) & lgkmcnt(0)

        .endif
        s_endpgm
)";

/* Continuously poll the flag at src buffer
 * After the flag of s[0:1] is 1 filled,
 * copy the value from s[0:1]+4 to dst buffer
 *
 * Note: Only works on GFX9 (only used in
 *       aldebaran tests)
 */
const char *PollAndCopyIsa =
    SHADER_START
    SHADER_MACROS_FLAT
    R"(
        // Assume src buffer in s[0:1] and dst buffer in s[2:3]
        // Path for Aldebaran, Aqua Vanjaram
        .if (.amdgcn.gfx_generation_number == 9 && (.amdgcn.gfx_generation_minor >= 4 || .amdgcn.gfx_generation_stepping == 10))
            v_mov_b32 v0, s0
            v_mov_b32 v1, s1
            v_mov_b32 v18, 0x1
            LOOP0:
            FLAT_LOAD_DWORD_NSS v16, v[0:1] glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            v_cmp_eq_i32 vcc, v16, v18
            s_cbranch_vccz LOOP0
            .if (.amdgcn.gfx_generation_minor >= 4)
                buffer_inv sc1 sc0
            .else
                buffer_invl2
            .endif
            s_load_dword s17, s[0:1], 0x4 glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            s_store_dword s17, s[2:3], 0x0 glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            buffer_wbl2
        .elseif (.amdgcn.gfx_generation_number == 9)
            s_movk_i32 s18, 0x1
            LOOP1:
            s_load_dword s16, s[0:1], 0x0 glc
            s_cmp_eq_i32 s16, s18
            s_cbranch_scc0 LOOP1
            s_load_dword s17, s[0:1], 0x4 glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            s_store_dword s17, s[2:3], 0x0 glc
        .endif
        s_waitcnt vmcnt(0) & lgkmcnt(0)
        s_endpgm
)";

/* Input0: A buffer of at least 2 dwords.
 * DW0: used as a signal. Write 0x1 to signal
 * DW1: Write the value from 2nd input buffer
 *      for other device to read.
 * Input1: A buffer of at least 2 dwords.
 * DW0: used as the value to be written.
 *
 * Note: Only works on Aldebaran and Aqua Vanjaram
 */
const char *WriteFlagAndValueIsa =
    SHADER_START
    SHADER_MACROS_FLAT
    R"(
        // Assume two inputs buffer in s[0:1] and s[2:3]
        .if (.amdgcn.gfx_generation_number == 9 && (.amdgcn.gfx_generation_minor >= 4 || .amdgcn.gfx_generation_stepping == 10))
            v_mov_b32 v0, s0
            v_mov_b32 v1, s1
            s_load_dword s18, s[2:3], 0x0 glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            s_store_dword s18, s[0:1], 0x4 glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            buffer_wbl2
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            v_mov_b32 v16, 0x1
            FLAT_STORE_DWORD_NSS v[0:1], v16 glc
        .endif
        s_endpgm
)";

/* Input0: A buffer of at least 2 dwords.
 * DW0: used as a signal. Write 0xcafe to signal
 * DW1: Write to this buffer for other device to read.
 * Input1: mmio base address
 */
const char *WriteAndSignalIsa =
    SHADER_START
    R"(
        // Assume input buffer in s0, s1
        .if (.amdgcn.gfx_generation_number >= 10)
            s_add_u32 s4, s0, 0x4
            s_addc_u32 s5, s1, 0x0
            v_mov_b32 v0, s0
            v_mov_b32 v1, s1
            v_mov_b32 v2, s2
            v_mov_b32 v3, s3
            v_mov_b32 v4, s4
            v_mov_b32 v5, s5
            .if (.amdgcn.gfx_generation_number >= 12)
                v_mov_b32 v18, 0xbeef
                flat_store_dword v[4:5], v18 scope:SCOPE_SYS
                v_mov_b32 v18, 0x1
                flat_store_dword v[2:3], v18 scope:SCOPE_SYS
                v_mov_b32 v18, 0xcafe
                flat_store_dword v[0:1], v18 scope:SCOPE_SYS
            .else
                v_mov_b32 v18, 0xbeef
                flat_store_dword v[4:5], v18 glc
                v_mov_b32 v18, 0x1
                flat_store_dword v[2:3], v18 glc
                v_mov_b32 v18, 0xcafe
                flat_store_dword v[0:1], v18 glc
            .endif
        .else
            s_mov_b32 s18, 0xbeef
            s_store_dword s18, s[0:1], 0x4 glc
            s_mov_b32 s18, 0x1
            s_store_dword s18, s[2:3], 0 glc
            s_mov_b32 s18, 0xcafe
            s_store_dword s18, s[0:1], 0x0 glc
        .endif
        s_endpgm
)";

/* Input:
 * s[0:1], A buffer of at least 64 * 6 bytes
 *
 * Store the value 0x77 at the 5 addresses 0x40,
 * 0x80, ..., 0x140 in the buffer
 *
 * Aqua Vanjaram only
 */
const char *FlushBufferForAcquireReleaseIsa =
    SHADER_START
    R"(
        .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
            s_mov_b32 s11, 0x77
            s_mov_b32 s12, 0x0
            // Store some data on 5 different cache lines
            s_store_dword s12, s[0:1], 0x0 glc
            s_store_dword s11, s[0:1], 0x40 glc
            s_store_dword s11, s[0:1], 0x80 glc
            s_store_dword s11, s[0:1], 0xc0 glc
            s_store_dword s11, s[0:1], 0x100 glc
            s_store_dword s11, s[0:1], 0x140 glc
            s_waitcnt lgkmcnt(0)
        .endif
        s_endpgm
)";

/* Input:
 * s[0:1], A buffer of at least 64 * 6 bytes,
 * shared with the acquiring shader
 *
 * Store the values 1 - 5 at the 5 addresses 0x40,
 * 0x80, ..., 0x140 in the buffer, then signal
 * the flag at address 0x0 in the buffer.
 *
 * Uses vector stores
 *
 * Aqua Vanjaram only
 */
const char *WriteReleaseVectorIsa =
    SHADER_START
    R"(
        .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
            v_mov_b32 v11, 0x1
            v_mov_b32 v12, 0x2
            v_mov_b32 v13, 0x3
            v_mov_b32 v14, 0x4
            v_mov_b32 v15, 0x5
            v_mov_b32 v21, 0x40
            v_mov_b32 v22, 0x80
            v_mov_b32 v23, 0xc0
            v_mov_b32 v24, 0x100
            v_mov_b32 v25, 0x140
            // Store some data on 5 different cache lines
            global_store_dword v21, v11, s[0:1]
            global_store_dword v22, v12, s[0:1]
            global_store_dword v23, v13, s[0:1]
            global_store_dword v24, v14, s[0:1]
            global_store_dword v25, v15, s[0:1] nt sc1 sc0
            s_waitcnt vmcnt(0)
            // Write-Release
            s_mov_b32 s16, 0x1
            buffer_wbl2 sc1 sc0
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            s_store_dword s16, s[0:1], 0x0 glc
        .endif
        s_endpgm
)";

/* Input:
 * s[0:1], A buffer of at least 64 * 6 bytes,
 * shared with the acquiring shader
 *
 * Store the values 6 - 10 at the 5 addresses 0x40,
 * 0x80, ..., 0x140 in the buffer, then signal
 * the flag at address 0x0 in the buffer.
 *
 * Uses scalar stores
 *
 * Aqua Vanjaram only
 */
const char *WriteReleaseScalarIsa =
    SHADER_START
    R"(
        .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
            s_mov_b32 s11, 0x6
            s_mov_b32 s12, 0x7
            s_mov_b32 s13, 0x8
            s_mov_b32 s14, 0x9
            s_mov_b32 s15, 0xa
            // Store some data on 5 different cache lines
            s_store_dword s11, s[0:1], 0x40
            s_store_dword s12, s[0:1], 0x80
            s_store_dword s13, s[0:1], 0xc0
            s_store_dword s14, s[0:1], 0x100
            s_store_dword s15, s[0:1], 0x140 glc
            s_waitcnt lgkmcnt(0)
            // Write-Release
            s_dcache_wb // WB Scalar L1 cache
            s_mov_b32 s16, 0x1
            buffer_wbl2 sc1 sc0
            s_waitcnt vmcnt(0) & lgkmcnt(0)
            s_store_dword s16, s[0:1], 0x0 glc
            s_waitcnt lgkmcnt(0)
        .endif
        s_endpgm
)";

/* Input:
 * s[0:1], A buffer of at least 64 * 6 bytes,
 * shared with the releasing shader
 * s[2:3], A buffer of at least 64 * 6 bytes,
 * accessible by the CPU, used for output
 *
 * Polls the flag at address 0x0 in the shared buffer.
 * When the signal is received, read the values
 * at the 5 addresses 0x40, 0x80, ... 0x140,
 * and store them at the same locations in
 * the output buffer
 *
 * Uses vector loads
 *
 * Aqua Vanjaram only
 */
const char *ReadAcquireVectorIsa =
    SHADER_START
    R"(
        .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
            // Read-Acquire
            s_mov_b32 s18, 0x1
            LOOP:
            s_load_dword s17, s[0:1], 0x0 glc
            s_waitcnt lgkmcnt(0)
            s_cmp_eq_i32 s17, s18
            s_cbranch_scc0 LOOP
            buffer_inv sc1 sc0
            // Load data
            v_mov_b32 v21, 0x40
            v_mov_b32 v22, 0x80
            v_mov_b32 v23, 0xc0
            v_mov_b32 v24, 0x100
            v_mov_b32 v25, 0x140
            global_load_dword v11, v21, s[0:1]
            global_load_dword v12, v22, s[0:1]
            global_load_dword v13, v23, s[0:1]
            global_load_dword v14, v24, s[0:1]
            global_load_dword v15, v25, s[0:1]
            s_waitcnt vmcnt(0)
            // Store data for output
            v_mov_b32 v21, 0x40
            v_mov_b32 v22, 0x80
            v_mov_b32 v23, 0xc0
            v_mov_b32 v24, 0x100
            v_mov_b32 v25, 0x140
            global_store_dword v21, v11, s[2:3] nt sc1 sc0
            global_store_dword v22, v12, s[2:3] nt sc1 sc0
            global_store_dword v23, v13, s[2:3] nt sc1 sc0
            global_store_dword v24, v14, s[2:3] nt sc1 sc0
            global_store_dword v25, v15, s[2:3] nt sc1 sc0
            s_waitcnt vmcnt(0)
        .endif
        s_endpgm
)";

/* Input:
 * s[0:1], A buffer of at least 64 * 6 bytes,
 * shared with the releasing shader
 * s[2:3], A buffer of at least 64 * 6 bytes,
 * accessible by the CPU, used for output
 *
 * Polls the flag at address 0x0 in the shared buffer.
 * When the signal is received, read the values
 * at the 5 addresses 0x40, 0x80, ... 0x140,
 * and store them at the same locations in
 * the output buffer
 *
 * Uses scalar loads
 *
 * Aqua Vanjaram only
 */
const char *ReadAcquireScalarIsa =
    SHADER_START
    R"(
        .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
            // Read-Acquire
            s_mov_b32 s18, 0x1
            LOOP:
            s_load_dword s17, s[0:1], 0x0 glc
            s_waitcnt lgkmcnt(0)
            s_cmp_eq_i32 s17, s18
            s_cbranch_scc0 LOOP
            buffer_inv sc1 sc0
            // Load data
            s_load_dword s21, s[0:1], 0x40
            s_load_dword s22, s[0:1], 0x80
            s_load_dword s23, s[0:1], 0xc0
            s_load_dword s24, s[0:1], 0x100
            s_load_dword s25, s[0:1], 0x140
            s_waitcnt lgkmcnt(0)
            // Store data for output
            s_store_dword s21, s[2:3], 0x40 glc
            s_store_dword s22, s[2:3], 0x80 glc
            s_store_dword s23, s[2:3], 0xc0 glc
            s_store_dword s24, s[2:3], 0x100 glc
            s_store_dword s25, s[2:3], 0x140 glc
            s_waitcnt lgkmcnt(0)
        .endif
        s_endpgm
)";

/**
 * KFDQMTest
 */

/* A simple isa loop program with dense mathematic operations
 * s1 controls the number iterations of the loop
 * This shader can be used by GFX8, GFX9 and GFX10
 */
const char *LoopIsa =
    SHADER_START
    R"(
        s_movk_i32    s0, 0x0008
        s_movk_i32    s1, 0x00ff
        s_mov_b32     s4, 0
        s_mov_b32     s5, 0
        s_mov_b32     s6, 0
        s_mov_b32     s7, 0
        s_mov_b32     s12, 0
        s_mov_b32     s13, 0
        s_mov_b32     s14, 0
        s_mov_b32     s15, 0
        v_mov_b32     v0, 0
        v_mov_b32     v1, 0
        v_mov_b32     v2, 0
        v_mov_b32     v3, 0
        v_mov_b32     v4, 0
        v_mov_b32     v5, 0
        v_mov_b32     v6, 0
        v_mov_b32     v7, 0
        v_mov_b32     v8, 0
        v_mov_b32     v9, 0
        v_mov_b32     v10, 0
        v_mov_b32     v11, 0
        v_mov_b32     v12, 0
        v_mov_b32     v13, 0
        v_mov_b32     v14, 0
        v_mov_b32     v15, 0
        v_mov_b32     v16, 0
        LOOP:
        s_mov_b32     s8, s4
        s_mov_b32     s9, s1
        s_mov_b32     s10, s6
        s_mov_b32     s11, s7
        s_cmp_le_i32  s1, s0
        s_cbranch_scc1  END_OF_PGM
        v_add_f32     v0, 2.0, v0
        v_cvt_f32_i32 v17, s1
        .if (.amdgcn.gfx_generation_number >= 12)
            s_wait_dscnt     0
            s_wait_kmcnt     0
        .else
            s_waitcnt lgkmcnt(0)
        .endif
        v_add_f32     v18, s8, v17
        v_add_f32     v19, s9, v17
        v_add_f32     v20, s10, v17
        v_add_f32     v21, s11, v17
        v_add_f32     v22, s12, v17
        v_add_f32     v23, s13, v17
        v_add_f32     v24, s14, v17
        v_add_f32     v17, s15, v17
        v_log_f32     v25, v18
        v_mul_f32     v25, v22, v25
        v_exp_f32     v25, v25
        v_log_f32     v26, v19
        v_mul_f32     v26, v23, v26
        v_exp_f32     v26, v26
        v_log_f32     v27, v20
        v_mul_f32     v27, v24, v27
        v_exp_f32     v27, v27
        v_log_f32     v28, v21
        v_mul_f32     v28, v17, v28
        v_exp_f32     v28, v28
        v_add_f32     v5, v5, v25
        v_add_f32     v6, v6, v26
        v_add_f32     v7, v7, v27
        v_add_f32     v8, v8, v28
        v_mul_f32     v18, 0x3fb8aa3b, v18
        v_exp_f32     v18, v18
        v_mul_f32     v19, 0x3fb8aa3b, v19
        v_exp_f32     v19, v19
        v_mul_f32     v20, 0x3fb8aa3b, v20
        v_exp_f32     v20, v20
        v_mul_f32     v21, 0x3fb8aa3b, v21
        v_exp_f32     v21, v21
        v_add_f32     v9, v9, v18
        v_add_f32     v10, v10, v19
        v_add_f32     v11, v11, v20
        v_add_f32     v12, v12, v21
        v_sqrt_f32    v18, v22
        v_sqrt_f32    v19, v23
        v_sqrt_f32    v20, v24
        v_sqrt_f32    v21, v17
        v_add_f32     v13, v13, v18
        v_add_f32     v14, v14, v19
        v_add_f32     v15, v15, v20
        v_add_f32     v16, v16, v21
        v_rsq_f32     v18, v22
        v_rsq_f32     v19, v23
        v_rsq_f32     v20, v24
        v_rsq_f32     v17, v17
        v_add_f32     v1, v1, v18
        v_add_f32     v2, v2, v19
        v_add_f32     v3, v3, v20
        v_add_f32     v4, v4, v17
        s_add_u32     s0, s0, 1
        s_branch      LOOP
        END_OF_PGM:
        s_endpgm
)";


/**
 * KFDCWSRTest
 */

/* Initial state:
 *   s[0:1] - input buffer base address
 *   s[2:3] - output buffer base address
 *   s4 - workgroup id
 *   v0 - workitem id
 * Registers:
 *   v0 - calculated workitem = v0 + s4 * NUM_THREADS_X, which is s4
 *   v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
 *   v6 - register storing known-value output for mangle testing
 *   v7 - counter
 */
const char *PersistentIterateIsa =
    SHADER_START
    SHADER_MACROS_U32
    SHADER_MACROS_FLAT
    R"(
        // Compute address of output buffer
        .if (.amdgcn.gfx_generation_number >= 12)
            v_mov_b32               v0, ttmp9   // use workgroup id as index
        .else
            v_mov_b32               v0, s4      // use workgroup id as index
        .endif
        v_lshlrev_b32           v0, 2, v0       // v0 *= 4
        V_ADD_CO_U32            v4, s2, v0      // v[4:5] = s[2:3] + v0 * 4
        v_mov_b32               v5, s3          // v[4:5] = s[2:3] + v0 * 4
        V_ADD_CO_CI_U32         v5, v5, 0       // v[4:5] = s[2:3] + v0 * 4

        // Store known-value output in register
        .if (.amdgcn.gfx_generation_number >= 12)
            FLAT_LOAD_DWORD_NSS     v6, v[4:5] scope:SCOPE_SYS
            s_wait_loadcnt 0                        // wait for memory reads to finish
        .else
            FLAT_LOAD_DWORD_NSS     v6, v[4:5] glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)         // wait for memory reads to finish
        .endif

        // Initialize counter
        v_mov_b32               v7, 0

        LOOP:
        flat_store_dword        v[4:5], v6      // store known-val in output
        V_ADD_CO_U32            v7, 1, v7       // increment counter

        .if (.amdgcn.gfx_generation_number >= 12)
            s_load_dword            s6, s[0:1], 0 scope:SCOPE_SYS
            s_wait_loadcnt 0                        // wait for memory reads to finish
        .else
            s_load_dword            s6, s[0:1], 0 glc
            s_waitcnt vmcnt(0) & lgkmcnt(0)         // wait for memory reads to finish
        .endif
        s_cmp_eq_i32            s6, 0x12345678  // compare input buf to stopval
        s_cbranch_scc1          L_QUIT          // branch if notified to quit by host

        s_branch LOOP

        L_QUIT:
        s_endpgm
)";

/**
 * KFDEvictTest
 */

/* Shader to read local buffers using multiple wavefronts in parallel
 * until address buffer is filled with specific value 0x5678 by host program,
 * then each wavefront fills value 0x5678 at corresponding result buffer and quit
 *
 * Initial state:
 *   s[0:1]   - address buffer base address
 *   s[2:3]   - result buffer base address
 *   s4/ttmp9 - workgroup id (in s4 pre-GFX12, in ttmp9 on GFX12)
 *   v0       - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1
 * Registers:
 *   v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X
 *   v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8
 *   v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
 *   v[6:7] - local buf address used for read test
 *   v11 - size of local buffer in MB
 */
const char *ReadMemoryIsa =
    SHADER_START
    SHADER_MACROS_U32
    SHADER_MACROS_FLAT
    R"(
        // Compute address of corresponding output buffer
        .if (.amdgcn.gfx_generation_number >= 12)
            v_mov_b32           v0, ttmp9       // use workgroup id as index
        .else
            v_mov_b32           v0, s4          // use workgroup id as index
        .endif
        v_lshlrev_b32           v0, 2, v0       // v0 *= 4
        V_ADD_CO_U32            v4, s2, v0      // v[4:5] = s[2:3] + v0 * 4
        v_mov_b32               v5, s3          // v[4:5] = s[2:3] + v0 * 4
        V_ADD_CO_CI_U32         v5, v5, 0       // v[4:5] = s[2:3] + v0 * 4

        // Compute input buffer offset used to store corresponding local buffer address
        v_lshlrev_b32           v0, 1, v0       // v0 *= 8
        V_ADD_CO_U32            v2, s0, v0      // v[2:3] = s[0:1] + v0 * 8
        v_mov_b32               v3, s1          // v[2:3] = s[0:1] + v0 * 8
        V_ADD_CO_CI_U32         v3, v3, 0       // v[2:3] = s[0:1] + v0 * 8

        // Load local buffer size from output buffer
        .if (.amdgcn.gfx_generation_number >= 12)
            FLAT_LOAD_DWORD_NSS     v11, v[4:5] scope:SCOPE_DEV
        .else
            FLAT_LOAD_DWORD_NSS     v11, v[4:5] slc
        .endif

        // Load 64bit local buffer address stored at v[2:3] to v[6:7]
        .if (.amdgcn.gfx_generation_number >= 12)
            FLAT_LOAD_DWORDX2_NSS   v[6:7], v[2:3] scope:SCOPE_DEV
            s_wait_loadcnt 0
        .else
            FLAT_LOAD_DWORDX2_NSS   v[6:7], v[2:3] slc
            s_waitcnt vmcnt(0) & lgkmcnt(0)         // wait for memory reads to finish
        .endif
        v_mov_b32               v8, 0x5678
        s_movk_i32              s8, 0x5678
        L_REPEAT:
        .if (.amdgcn.gfx_generation_number >= 12)
            s_load_dword        s16, s[0:1], 0x0 scope:SCOPE_SYS
            s_wait_kmcnt        0                      // wait for memory reads to finish
        .else
            s_load_dword        s16, s[0:1], 0x0 glc
            s_waitcnt           vmcnt(0) & lgkmcnt(0)  // wait for memory reads to finish
        .endif
        s_cmp_eq_i32            s16, s8
        s_cbranch_scc1          L_QUIT          // if notified to quit by host

        // Loop read local buffer starting at v[6:7]
        // every 4k page only read once
        v_mov_b32               v9, 0
        v_mov_b32               v10, 0x1000     // 4k page
        v_mov_b32               v12, v6
        v_mov_b32               v13, v7
        L_LOOP_READ:
        .if (.amdgcn.gfx_generation_number >= 12)
            FLAT_LOAD_DWORDX2_NSS   v[14:15], v[12:13] scope:SCOPE_DEV
        .else
            FLAT_LOAD_DWORDX2_NSS   v[14:15], v[12:13] slc
        .endif
        V_ADD_CO_U32            v9, v9, v10
        V_ADD_CO_U32            v12, v12, v10
        V_ADD_CO_CI_U32         v13, v13, 0
        V_CMP_LT_U32            v9, v11
        s_cbranch_vccnz         L_LOOP_READ
        s_branch                L_REPEAT
        L_QUIT:
        flat_store_dword        v[4:5], v8
        .if (.amdgcn.gfx_generation_number >= 12)
            s_wait_storecnt     0
        .else
            s_waitcnt vmcnt(0) & lgkmcnt(0)         // wait for memory writes to finish
        .endif
        s_endpgm
)";

/**
 * KFDGWSTest
 */

/* Shader to initialize gws counter to 1 */
const char *GwsInitIsa =
    SHADER_START
    R"(
        .if (.amdgcn.gfx_generation_number >= 12)
        .else
            s_mov_b32 m0, 0
            s_nop 0
            s_load_dword s16, s[0:1], 0x0 glc
            s_waitcnt 0
            v_mov_b32 v0, s16
            s_waitcnt 0
            ds_gws_init v0 offset:0 gds
            s_waitcnt 0
            s_endpgm
        .endif
)";

/* Atomically increase a value in memory
 * This is expected to be executed from
 * multiple work groups simultaneously.
 * GWS semaphore is used to guarantee
 * the operation is atomic.
 */
const char *GwsAtomicIncreaseIsa =
    SHADER_START
    R"(
        // Assume src address in s0, s1
        .if (.amdgcn.gfx_generation_number >= 12)
        .elseif (.amdgcn.gfx_generation_number >= 10)
            s_mov_b32 m0, 0
            s_mov_b32 exec_lo, 0x1
            v_mov_b32 v0, s0
            v_mov_b32 v1, s1
            ds_gws_sema_p offset:0 gds
            s_waitcnt 0
            flat_load_dword v2, v[0:1] glc dlc
            s_waitcnt 0
            v_add_nc_u32 v2, v2, 1
            flat_store_dword v[0:1], v2
            s_waitcnt_vscnt null, 0
            ds_gws_sema_v offset:0 gds
        .else
            s_mov_b32 m0, 0
            s_nop 0
            ds_gws_sema_p offset:0 gds
            s_waitcnt 0
            s_load_dword s16, s[0:1], 0x0 glc
            s_waitcnt 0
            s_add_u32 s16, s16, 1
            s_store_dword s16, s[0:1], 0x0 glc
            s_waitcnt lgkmcnt(0)
            ds_gws_sema_v offset:0 gds
        .endif
        s_waitcnt 0
        s_endpgm
)";


/*
 * Shader used by ExtendedCuMasking test case to check if CU mask is used correctly.
 *
 * Shader will write to output buffer the (SE, SA, WGP) used by the wave.
 * The test program will then analyse the data.
 *
 * Inputs
 * ------
 * s[2:3]  : output buffer base address
 * s4/ttmp9: workgroup id (s4 for pre-GFX12, ttmp9 for GFX12)
 *
 * Output
 * ------
 * Store HW_ID1 content in output buffer at index corresponding to workgroup id.
 *
 */
const char *CheckCuMaskIsa =
    SHADER_START
    SHADER_MACROS_U32
    SHADER_MACROS_FLAT
    R"(
        // Get workgroup id
        .if (.amdgcn.gfx_generation_number >= 12)
            v_mov_b32    v0, ttmp9
        .else
            v_mov_b32    v0, s4
        .endif

        // Address of output buffer element: v[4:5] = s[2:3] + v0 * 4
        v_lshlrev_b32    v6, 2, v0
        V_ADD_CO_U32     v4, s2, v6
        v_mov_b32        v5, s3
        V_ADD_CO_CI_U32  v5, v5, 0

        // Store HW_ID1 content
        .if (.amdgcn.gfx_generation_number >= 12)
            s_getreg_b32     s6, hwreg(HW_REG_HW_ID1)
        .else
            s_getreg_b32     s6, hwreg(HW_REG_HW_ID)
        .endif
        v_mov_b32        v1, s6
        flat_store_dword v[4:5], v1

        s_endpgm
)";


const char *JumpToTrapIsa =
    SHADER_START
    SHADER_MACROS_U32
    R"(
        /*copy the parameters from scalar registers to vector registers*/
        v_mov_b32 v4, 0
        v_mov_b32 v0, s0
        v_mov_b32 v1, s1
        s_trap 1
        EXIT_LOOP:
        V_CMP_EQ_U32 v4, 0
        s_cbranch_vccnz EXIT_LOOP
        flat_store_dword v[0:1], v4
        s_waitcnt vmcnt(0)&lgkmcnt(0)
        s_endpgm
)";

const char *TrapHandlerIsa =
    SHADER_START
    R"(
        CHECK_VMFAULT:
        /*if trap jumped to by vmfault, restore skip m0 signalling*/
        .if (.amdgcn.gfx_generation_number < 12)
            s_getreg_b32 ttmp14, hwreg(HW_REG_TRAPSTS)
            s_and_b32 ttmp2, ttmp14, 0x800
        .else
            s_getreg_b32 ttmp14, hwreg(HW_REG_EXCP_FLAG_PRIV)
            s_and_b32 ttmp2, ttmp14, 0x10
        .endif
        s_cbranch_scc1 RESTORE_AND_EXIT
        /*check for address watch event and record pc check point delta*/
        .if (.amdgcn.gfx_generation_number < 12)
            s_and_b32 ttmp2, ttmp14, 0x7080
        .else
            s_and_b32 ttmp2, ttmp14, 0xf
        .endif
        s_cbranch_scc0 GET_DOORBELL
        v_mov_b32 v5, v4 // capture watch checkpoint
        v_mov_b32 v6, ttmp14 // capture watch trapsts
        s_branch RESTORE_AND_EXIT
        GET_DOORBELL:
        .if .amdgcn.gfx_generation_number < 11
            s_mov_b32 ttmp2, exec_lo
            s_mov_b32 ttmp3, exec_hi
            s_mov_b32 exec_lo, 0x80000000
            s_sendmsg 10
            WAIT_SENDMSG:
            /*wait until msb is cleared (i.e. doorbell fetched)*/
            s_nop 7
            s_bitcmp0_b32 exec_lo, 0x1F
            s_cbranch_scc0 WAIT_SENDMSG
            /* restore exec */
            s_mov_b32 exec_hi, ttmp3
            s_and_b32 exec_lo, exec_lo, 0xfff
            s_mov_b32 ttmp3, exec_lo
            s_mov_b32 exec_lo, ttmp2
        .else
            s_sendmsg_rtn_b32 ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
            s_waitcnt lgkmcnt(0)
            s_and_b32 ttmp3, ttmp3, 0x3ff
        .endif
        s_mov_b32 ttmp2, m0
        s_or_b32 ttmp3, ttmp3, 0x800
        /* set m0, send interrupt and restore m0 and exit trap*/
        s_mov_b32 m0, ttmp3
        s_nop 0x0
        s_sendmsg sendmsg(MSG_INTERRUPT)
        s_waitcnt lgkmcnt(0)
        s_mov_b32 m0, ttmp2
        v_mov_b32 v4, ttmp1
        .if (.amdgcn.gfx_generation_number >= 12)
            s_getreg_b32 ttmp14, hwreg(HW_REG_EXCP_FLAG_PRIV)
            s_and_b32 ttmp2, ttmp14, 0x300
            s_cbranch_scc1 RESTORE_AND_EXIT
         .elseif (.amdgcn.gfx_generation_number == 11)
            s_getreg_b32 ttmp14, hwreg(HW_REG_TRAPSTS)
            s_and_b32 ttmp2, ttmp14, 0x30000
            s_cbranch_scc1 RESTORE_AND_EXIT
         .else
            s_getreg_b32 ttmp14, hwreg(HW_REG_TRAPSTS)
            s_and_b32 ttmp2, ttmp14, 0x1800000
            s_cbranch_scc1 RESTORE_AND_EXIT
         .endif
        /* restore and increment program counter to skip shader trap jump*/
        s_add_u32 ttmp0, ttmp0, 4
        s_addc_u32 ttmp1, ttmp1, 0
        s_and_b32 ttmp1, ttmp1, 0xffff
        RESTORE_AND_EXIT:
        /* restore SQ_WAVE_IB_STS */
        s_lshr_b32 ttmp2, ttmp11, (26 - 15)
        s_and_b32 ttmp2, ttmp2, (0x8000 | 0x1F0000)
        s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
        /* restore SQ_WAVE_STATUS */
        s_and_b64 exec, exec, exec
        s_and_b64 vcc, vcc, vcc
        s_setreg_b32 hwreg(HW_REG_STATUS), ttmp12
        s_rfe_b64 [ttmp0, ttmp1]
)";

#define WATCH_START SHADER_START SHADER_MACROS_U32\
    "v_mov_b32 v0, s0\n"\
    "v_mov_b32 v1, s1\n"\
    "v_mov_b32 v2, s2\n"\
    "v_mov_b32 v3, s3\n"\
    "flat_load_dword v4, v[2:3]\n"\
    "s_waitcnt vmcnt(0) & lgkmcnt(0)\n"\
    "v_mov_b32 v5, 0\n"\
    "v_mov_b32 v6, 0\n"

#define WATCH_END "\n"\
    "v_mov_b32 v4, 2\n"\
    "LOOP:\n"\
    "V_CMP_EQ_U32 v6, 0\n"\
    "s_cbranch_vccnz LOOP\n"\
    "V_ADD_CO_U32 v6, v6, v5\n"\
    "flat_store_dword v[2:3], v6\n"\
    "s_waitcnt vmcnt(0) & lgkmcnt(0)\n"\
    "s_endpgm\n"

const char *WatchReadIsa =
    WATCH_START
    "flat_load_dword v7, v[0:1]"
    WATCH_END;

const char *WatchWriteIsa =
    WATCH_START
    "flat_store_dword v[0:1], v4"
    WATCH_END;


================================================
FILE: libhsakmt/tests/kfdtest/src/ShaderStore.hpp
================================================
/*
 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef _SHADERSTORE_H_
#define _SHADERSTORE_H_

#include <vector>

/* KFDASMTest List */
extern const std::vector<const char*> ShaderList;

/* Common */
extern const char *NoopIsa;
extern const char *CopyDwordIsa;
extern const char *InfiniteLoopIsa;
extern const char *AtomicIncIsa;

/* KFDMemoryTest */
extern const char *ScratchCopyDwordIsa;
extern const char *PollMemoryIsa;
extern const char *PollNCMemoryIsa;
extern const char *CopyOnSignalIsa;
extern const char *PollAndCopyIsa;
extern const char *WriteFlagAndValueIsa;
extern const char *WriteAndSignalIsa;
extern const char *WriteReleaseVectorIsa;
extern const char *WriteReleaseScalarIsa;
extern const char *ReadAcquireVectorIsa;
extern const char *ReadAcquireScalarIsa;
extern const char *FlushBufferForAcquireReleaseIsa;

/* KFDQMTest */
extern const char *LoopIsa;
extern const char *CheckCuMaskIsa;

/* KFDCWSRTest */
extern const char *PersistentIterateIsa;

/* KFDEvictTest */
extern const char *ReadMemoryIsa;

/* KFDGWSTest */
extern const char *GwsInitIsa;
extern const char *GwsAtomicIncreaseIsa;

/* HitTrapEvent */
extern const char *JumpToTrapIsa;
extern const char *TrapHandlerIsa;

/* HitWatchPointEvent */
extern const char *WatchReadIsa;
extern const char *WatchWriteIsa;
extern const char *WatchAtomicIsa;

#endif  // _SHADERSTORE_H_


================================================
FILE: libhsakmt/tests/kfdtest/src/XgmiOptimizedSDMAQueue.hpp
================================================
/*
 * Copyright (C) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#ifndef __KFD_XGMI_OPTIMIZED_SDMA_QUEUE__H__
#define __KFD_XGMI_OPTIMIZED_SDMA_QUEUE__H__

#include "SDMAQueue.hpp"

class XgmiOptimizedSDMAQueue : public SDMAQueue {
 public:
    XgmiOptimizedSDMAQueue(void) {CMD_NOP = 0;}
    virtual ~XgmiOptimizedSDMAQueue(void) {}
 protected:
    virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_SDMA_XGMI; }
};

#endif  // __KFD_XGMI_OPTIMIZED_SDMA_QUEUE__H__


================================================
FILE: libhsakmt/tests/rdma/simple/app/CMakeLists.txt
================================================
cmake_minimum_required (VERSION 2.6)

project (rdma_test)

find_package(PkgConfig)
pkg_check_modules(DRM REQUIRED libdrm)
pkg_check_modules(DRM_AMDGPU REQUIRED libdrm_amdgpu)
include_directories(${DRM_AMDGPU_INCLUDE_DIRS})

if( DEFINED ENV{LIBHSAKMT_PATH} )
    set ( LIBHSAKMT_PATH $ENV{LIBHSAKMT_PATH} )
    message ( "LIBHSAKMT_PATH environment variable is set" )
else()
    if ( ${ROCM_INSTALL_PATH} )
       set ( ENV{PKG_CONFIG_PATH} ${ROCM_INSTALL_PATH}/lib/pkgconfig )
    else()
       set ( ENV{PKG_CONFIG_PATH} /opt/rocm/lib/pkgconfig )
    endif()

    pkg_check_modules(HSAKMT libhsakmt)

    if( NOT HSAKMT_FOUND )
       set ( LIBHSAKMT_PATH $ENV{OUT_DIR} )
    endif()
endif()

if( DEFINED LIBHSAKMT_PATH )
    set ( HSAKMT_LIBRARY_DIRS ${LIBHSAKMT_PATH}/lib )
    set ( HSAKMT_LIBRARIES hsakmt )
endif()


link_directories(${HSAKMT_LIBRARY_DIRS})

include_directories($ENV{LIBHSAKMT_ROOT}/include)
include_directories(../drv)

add_executable(rdma_test rdma_test.cpp)
target_link_libraries(rdma_test libhsakmt.a dl pthread numa drm drm_amdgpu)


================================================
FILE: libhsakmt/tests/rdma/simple/app/rdma_test.cpp
================================================
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include "hsakmt/hsakmt.h"
#include "amdp2ptest.h"

int rdma_fd = -1;

void rdma_open()
{
    rdma_fd = open(AMDP2PTEST_DEVICE_PATH, O_RDWR);

    if (-1 == rdma_fd ) {
        int ret = errno;
        fprintf(stderr, "error opening driver (errno=%d/%s)\n", ret, strerror(ret));
        exit(EXIT_FAILURE);
    }
}

void rdma_close()
{
    int retcode = close(rdma_fd);

    if (-1 == retcode) {
        fprintf(stderr, "error closing driver (errno=%d/%s)\n", retcode, strerror(retcode));
        exit(EXIT_FAILURE);
    }

    rdma_fd = -1;
}

int rdma_map(uint64_t gpu_ptr, size_t size, void **cpu_ptr)
{
    int ret = 0;

    *cpu_ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, rdma_fd, gpu_ptr);

    if (*cpu_ptr == NULL) {
        int __errno = errno;
        *cpu_ptr = NULL;
        fprintf(stderr, "Can't BAR, error=%s(%d) size=%zu offset=%llx\n",
                strerror(__errno), __errno, size, (long long unsigned)gpu_ptr);
        ret = __errno;
    }

    return ret;
}

int rdma_unmap(void *cpu_ptr, size_t size)
{
    int ret = 0;

    int retcode = munmap(cpu_ptr, size);

    if (-1 == retcode) {
        int __errno = errno;
        fprintf(stderr, "can't unmap BAR, error=%s(%d) size=%zu\n",
                strerror(__errno), __errno, size);
        ret = __errno;
    }

    return ret;
}

void run_rdma_tests(HSAuint32 Node, HsaMemoryProperties *MemoryProperty)
{
    printf("Size 0x%lx (%ld MB)\n", MemoryProperty->SizeInBytes,
                                        MemoryProperty->SizeInBytes / (1024 * 1024));
    printf("VirtualBaseAddress 0x%lx\n", MemoryProperty->VirtualBaseAddress);


    void *cpu_ptr;
    int ret = 0;
    void *MemoryAddress = 0;
    HSAuint64 SizeInBytes = 4096;
    HsaMemFlags memFlags = {0};

    memFlags.ui32.NonPaged    = 1;
    memFlags.ui32.CachePolicy = HSA_CACHING_WRITECOMBINED;
    memFlags.ui32.NoSubstitute = 1;
    memFlags.ui32.PageSize     = HSA_PAGE_SIZE_4KB;
//    memFlags.ui32.HostAccess   = 1;
    memFlags.ui32.CoarseGrain  = 1;

    HSAKMT_STATUS status = hsaKmtAllocMemory(Node,
                                             SizeInBytes,
                                             memFlags,
                                             &MemoryAddress);

    if (status != HSAKMT_STATUS_SUCCESS)
    {
        fprintf(stderr, "Failure to allocate memory. Status %d\n", status);
        exit(EXIT_FAILURE);
    }

    printf("Memory allocated. Address 0x%p\n", MemoryAddress);

    struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM get_page_size = {0};
    get_page_size.addr   = (uint64_t) MemoryAddress;
    get_page_size.length = SizeInBytes;

    ret = ioctl(rdma_fd, AMD2P2PTEST_IOCTL_GET_PAGE_SIZE, &get_page_size);

    if (ret != 0)
    {
        fprintf(stderr,
                "AMD2P2PTEST_IOCTL_GET_PAGE_SIZE error (errno=%d/%s)\n",
                ret, strerror(ret));
        exit(EXIT_FAILURE);
    }

    printf("GPU Page size: 0x%ld\n", get_page_size.page_size);

    struct AMDRDMA_IOCTL_GET_PAGES_PARAM get_cpu_ptr = {0};
    get_cpu_ptr.addr    = (uint64_t) MemoryAddress;
    get_cpu_ptr.length  = SizeInBytes;

    ret = ioctl(rdma_fd, AMD2P2PTEST_IOCTL_GET_PAGES, &get_cpu_ptr);

    if (ret != 0)
    {
        fprintf(stderr, "AMD2P2PTEST_IOCTL_GET_PAGES error (errno=%d/%s)\n",
                         ret, strerror(ret));
        exit(EXIT_FAILURE);
    }


    ret = rdma_map((uint64_t)MemoryAddress, 4096, &cpu_ptr);

    if (ret < 0)
    {
        exit(EXIT_FAILURE);
    }

    printf("CPU Virtual address 0x%p\n", cpu_ptr);

    hsaKmtFreeMemory(MemoryAddress, SizeInBytes);
}

int getSysMemorySize(unsigned long *memSize)
{
    FILE *meminfo = fopen("/proc/meminfo", "r");

    if(meminfo == NULL)
        return -1;

    char buff[256];
    while (fgets(buff, sizeof(buff), meminfo))
    {
        long ramKB;
        if (sscanf(buff, "MemTotal: %ld kB", &ramKB) == 1)
        {
            *memSize = ramKB * 1024;
            break;
        }
    }

    fclose(meminfo);
    printf("Total system memory size 0x%lx\n", *memSize);
    return 0;
}

/*
 * RDMA contiguous memory allocation test
 *
 * Test steps:
 * 1. fragment the entire VRAM, alloc all VRAM using multiple buffers, then free 1 buffer from every
      other buffers
 * 2. alloc memFlags.ui32.Contiguous=1 buffer for contiguous VRAM allocation
 * 3. Call AMD2P2PTEST_IOCTL_GET_PAGES to get contiguous VRAM buffer pages
 * 4. Test fails if any above step failed
 */
void run_rdma_contiguous_mem_tests(HSAuint32 Node, HsaMemoryProperties *MemoryProperty)
{
    unsigned int *nullPtr = NULL;
    unsigned long bufSize = 512ULL << 20;
    unsigned long nBuf;

    HSAuint64 vramSize;
    unsigned long sysMemSize;
    HsaMemFlags memFlags = {0};
    HSAKMT_STATUS status;

    if (getSysMemorySize(&sysMemSize) < 0) {
        fprintf(stderr, "Failed to get system memory size\n");
        exit(EXIT_FAILURE);
    }
    status = hsaKmtAvailableMemory(Node, &vramSize);
    if (status != HSAKMT_STATUS_SUCCESS) {
        fprintf(stderr, "Failed %d to get VRAM size\n", status);
        exit(EXIT_FAILURE);
    }
    if (sysMemSize < (16UL << 30) || vramSize < (4UL << 30)) {
        fprintf(stderr, "No enough system memory or VRAM\n");
        exit(0);
    }
    nBuf = vramSize / bufSize;

    void **pBuf = (void **)malloc(sizeof(*pBuf) * nBuf);
    memFlags.ui32.NonPaged = 1;

    for (int i = 0; i < nBuf; i++) {
        status = hsaKmtAllocMemory(Node, bufSize, memFlags, &pBuf[i]);
        if (status != HSAKMT_STATUS_SUCCESS) {
            fprintf(stderr, "Failed %d to alloc buf %d\n", status, i);
            exit(EXIT_FAILURE);
        }

        status = hsaKmtMapMemoryToGPU(pBuf[i], bufSize, NULL);
        if (status != HSAKMT_STATUS_SUCCESS) {
            fprintf(stderr, "Failed %d to map buf %d\n", status, i);
            exit(EXIT_FAILURE);
        }
    }

    //printf("Freeing every other BO to fragment VRAM\n");
    for (int i = 0; i < nBuf; i+=2) {
        status = hsaKmtUnmapMemoryToGPU(pBuf[i]);
        if (status != HSAKMT_STATUS_SUCCESS) {
            fprintf(stderr, "Failed %d to unmap buf %d from GPU\n", status, i);
            exit(EXIT_FAILURE);
        }
        status = hsaKmtFreeMemory(pBuf[i], bufSize);
        if (status != HSAKMT_STATUS_SUCCESS) {
            fprintf(stderr, "Failed %d to free buf %d\n", status, i);
            exit(EXIT_FAILURE);
        }
    }

    printf("Node %d Size 0x%lx (%ld MB)\n", Node, MemoryProperty->SizeInBytes,
                                        MemoryProperty->SizeInBytes / (1024 * 1024));

    void *cpu_ptr;
    int ret = 0;
    void *MemoryAddress = 0;
    HSAuint64 SizeInBytes = 1UL <<  30;

    memFlags.ui32.Contiguous = 1;

    status = hsaKmtAllocMemory(Node, SizeInBytes, memFlags, &MemoryAddress);
    if (status != HSAKMT_STATUS_SUCCESS)
    {
        fprintf(stderr, "Failure to allocate memory 0x%lx. Status %d\n", SizeInBytes, status);
        exit(EXIT_FAILURE);
    }

    status = hsaKmtMapMemoryToGPU(MemoryAddress, SizeInBytes, NULL);
    if (status != HSAKMT_STATUS_SUCCESS)
    {
        fprintf(stderr, "Failure to map memory. Status %d\n", status);
        exit(EXIT_FAILURE);
    }

    printf("VRAM allocated. Address %p size 0x%lx bytes\n", MemoryAddress, SizeInBytes);
    //printf("Press Enter key to continue\n");
    //getchar();

    struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM get_page_size = {0};
    get_page_size.addr   = (uint64_t) MemoryAddress;
    get_page_size.length = SizeInBytes;

    ret = ioctl(rdma_fd, AMD2P2PTEST_IOCTL_GET_PAGE_SIZE, &get_page_size);
    if (ret != 0)
    {
        fprintf(stderr, "AMD2P2PTEST_IOCTL_GET_PAGE_SIZE error (errno=%d/%s)\n",
                ret, strerror(ret));
        exit(EXIT_FAILURE);
    }

    printf("GPU Page size: 0x%ld\n", get_page_size.page_size);

    struct AMDRDMA_IOCTL_GET_PAGES_PARAM get_cpu_ptr = {0};
    get_cpu_ptr.addr    = (uint64_t) MemoryAddress;
    get_cpu_ptr.length  = SizeInBytes;

    ret = ioctl(rdma_fd, AMD2P2PTEST_IOCTL_GET_PAGES, &get_cpu_ptr);
    if (ret != 0)
    {
        fprintf(stderr, "AMD2P2PTEST_IOCTL_GET_PAGES error (errno=%d/%s)\n",
                         ret, strerror(ret));
		//printf("IOCTL_GET_PAGES failed, Press Enter key to continue\n");
		//getchar();
        exit(EXIT_FAILURE);
    }

    printf("IOCTL_GET_PAGES return contiguous VRAM address %p size 0x%lx bytes\n", MemoryAddress, SizeInBytes);
    printf("Pause to dump page table to check if allocation is contiguous\n");
    printf("Press Enter key to continue\n");
    getchar();

    ret = rdma_map((uint64_t)MemoryAddress, 4096, &cpu_ptr);
    if (ret < 0)
    {
        exit(EXIT_FAILURE);
    }

    hsaKmtFreeMemory(MemoryAddress, SizeInBytes);
}

int main(void)
{
    HsaVersionInfo      VersionInfo;

    HSAKMT_STATUS          status = hsaKmtOpenKFD();

    if( status == HSAKMT_STATUS_SUCCESS)
    {
        status = hsaKmtGetVersion(&VersionInfo);

        if(status == HSAKMT_STATUS_SUCCESS)
        {
            printf("Kernel Interface Major Version: %d\n", VersionInfo.KernelInterfaceMajorVersion);
            printf("Kernel Interface Minor Version: %d\n", VersionInfo.KernelInterfaceMinorVersion);
        }
    }

    rdma_open();

    HsaSystemProperties SystemProperties = {0};
    status = hsaKmtAcquireSystemProperties(&SystemProperties);

    if(status != HSAKMT_STATUS_SUCCESS)
    {
        fprintf(stderr, "hsaKmtAcquireSystemProperties call failed. Error: %d\n", status);
        exit(EXIT_FAILURE);
    }

    printf("System properties: Number of nodes: %d\n", SystemProperties.NumNodes);

    for (HSAuint32 iNode = 0; iNode < SystemProperties.NumNodes; iNode++)
    {
        HsaNodeProperties  NodeProperties = {0};
        status = hsaKmtGetNodeProperties(iNode, &NodeProperties);

        if(status != HSAKMT_STATUS_SUCCESS)
        {
            fprintf(stderr, "hsaKmtGetNodeProperties (Node = %d) call failed. Error: %d\n",
                             iNode, status);
            exit(EXIT_FAILURE);
        }

        printf("Node %d -> Number of Memory Banks = %d\n", iNode,
                            NodeProperties.NumMemoryBanks);

        HsaMemoryProperties*  MemoryProperties =
                    new HsaMemoryProperties[NodeProperties.NumMemoryBanks];

        status = hsaKmtGetNodeMemoryProperties(iNode,
                                               NodeProperties.NumMemoryBanks,
                                               MemoryProperties);

        if(status != HSAKMT_STATUS_SUCCESS)
        {
            fprintf(stderr, "hsaKmtGetNodeMemoryProperties (Node = %d) call failed. Error: %d\n",
                             iNode, status);
            exit(EXIT_FAILURE);
        }

        for (HSAuint32 iMemBank = 0; iMemBank < NodeProperties.NumMemoryBanks; iMemBank++)
        {
            printf("Heap type: %d\n", MemoryProperties[iMemBank].HeapType);

            if (MemoryProperties[iMemBank].HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC)
            {
                // We found local memory available for RDMA operation.
                // Run some tests on it.
                run_rdma_tests(iNode, &MemoryProperties[iMemBank]);
                run_rdma_contiguous_mem_tests(iNode, &MemoryProperties[iMemBank]);
            }
        }
    }


    status = hsaKmtReleaseSystemProperties();

    if(status != HSAKMT_STATUS_SUCCESS)
    {
        fprintf(stderr, "hsaKmtReleaseSystemProperties call failed. Error: %d\n",
                status);
        exit(EXIT_FAILURE);
    }

    rdma_close();

    status = hsaKmtCloseKFD();

    if(status != HSAKMT_STATUS_SUCCESS)
    {
        fprintf(stderr, "hsaKmtCloseKFD call failed. Error: %d\n", status);
        exit(EXIT_FAILURE);
    }

    return EXIT_SUCCESS;
}


================================================
FILE: libhsakmt/tests/rdma/simple/drv/amdp2ptest.c
================================================
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */


#include <linux/version.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/delay.h>
#include <linux/compiler.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/miscdevice.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/uaccess.h>

#include "drm/amd_rdma.h"
#include "amdp2ptest.h"


MODULE_AUTHOR("serguei.sagalovitch@amd.com");
MODULE_LICENSE("MIT");
MODULE_DESCRIPTION("AMD RDMA basic API test kernel-mode driver");
MODULE_VERSION("1.0");


const struct amd_rdma_interface *rdma_interface;


struct va_pages_node {
	struct list_head node;
	struct amd_p2p_info *pages;
};


struct amdp2ptest_pages_list {
	struct list_head	head;
	struct mutex	lock;
};


#define MSG_INFO(fmt, args ...)	\
			pr_info(AMDP2PTEST_DEVICE_NAME ": " fmt, ## args)
#define MSG_ERR(fmt, args ...)	\
			pr_err(AMDP2PTEST_DEVICE_NAME ": " fmt, ## args)
#define MSG_warn(fmt, args ...)	\
			pr_warn(AMDP2PTEST_DEVICE_NAME ": " fmt, ## args)

static int amdp2ptest_open(struct inode *inode, struct file *filp)
{
	struct amdp2ptest_pages_list *list;

	MSG_INFO("Open driver\n");

	list = kmalloc(sizeof(struct amdp2ptest_pages_list), GFP_KERNEL);

	if (!list) {
		MSG_ERR("Can't alloc kernel memory to store list stucture\n");
		return -ENOMEM;
	}

	INIT_LIST_HEAD(&list->head);
	mutex_init(&list->lock);

	filp->private_data = list;

	return 0;
}


static int amdp2ptest_release(struct inode *inode, struct file *filp)
{
	struct va_pages_node	      *va_pages = NULL;
	int retcode;
	struct amdp2ptest_pages_list *list = filp->private_data;
	struct list_head *p, *n;

	MSG_INFO("Close driver\n");

	list_for_each_safe(p, n, &list->head) {
		va_pages = list_entry(p, struct va_pages_node, node);
		MSG_INFO("Free pages: VA 0x%llx\n", va_pages->pages->va);
		retcode = rdma_interface->put_pages(&va_pages->pages);

		if (retcode != 0)
			MSG_ERR("Could not put pages back: %d\n", retcode);

		mutex_lock(&list->lock);
		list_del(&va_pages->node);
		mutex_unlock(&list->lock);
		kfree(va_pages);
	}

	filp->private_data = NULL;
	kfree(list);
	return 0;
}


static int ioctl_get_page_size(struct file *filp, unsigned long arg)
{
	struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM params = {0};
	unsigned long page_size;
	int result;

	MSG_INFO("AMD2P2PTEST_IOCTL_GET_PAGE_SIZE");

	if (copy_from_user(&params, (void *)arg, sizeof(params))) {
		MSG_ERR("copy_from_user failed on pointer %p\n",
							(void *)arg);
		return -EFAULT;
	}

	MSG_INFO("addr %llx, length %llx\n", params.addr,
					     params.length);
	result = rdma_interface->get_page_size(params.addr,
				params.length,
				get_task_pid(current, PIDTYPE_PID),
				&page_size);

	if (result) {
		MSG_ERR("Could not get page size. %d", result);
		return -EFAULT;
	}

	params.page_size = page_size;
	MSG_INFO("Page size %llx\n", params.page_size);

	if (copy_to_user((void *)arg, &params, sizeof(params))) {
		MSG_ERR("copy_to_user failed on user pointer %p\n",
						(void *)arg);

		return -EFAULT;
	}

	return 0;
}

static int ioctl_get_pages(struct file *filp, unsigned long arg)
{
	struct va_pages_node	      *va_pages = NULL;
	struct amdp2ptest_pages_list *list = filp->private_data;
	struct AMDRDMA_IOCTL_GET_PAGES_PARAM params = {0};
	int result;
	struct amd_p2p_info  *pages;

	MSG_INFO("AMD2P2PTEST_IOCTL_GET_PAGES");

	if (copy_from_user(&params, (void *)arg, sizeof(params))) {
		MSG_ERR("copy_from_user failed on pointer %p\n",
							(void *)arg);
		return -EFAULT;
	}


	MSG_INFO("addr %llx, length %llx\n", params.addr, params.length);

	result = rdma_interface->get_pages(params.addr, params.length,
					get_task_pid(current, PIDTYPE_PID),
					0, /* There is no dma_device for which
					      to get pages -> no IOMMU support
					      is needed */
					&pages,
					NULL,
					list /* Pointer to the list */
					);

	if (result) {
		MSG_ERR("Could not get pages table. %d", result);
		return -EFAULT;
	}

	if (copy_to_user((void *)arg, &params, sizeof(params))) {
		MSG_ERR("copy_to_user failed on user pointer %p\n",
							(void *)arg);
		rdma_interface->put_pages(&pages);
		return -EFAULT;
	}


	va_pages = kmalloc(sizeof(struct va_pages_node), GFP_KERNEL);

	if (va_pages == 0) {
		MSG_ERR("Can't alloc kernel memory\n");
		rdma_interface->put_pages(&pages);
		return -ENOMEM;
	}

	memset(va_pages, 0, sizeof(struct va_pages_node));
	va_pages->pages = pages;

	mutex_lock(&list->lock);
	list_add(&va_pages->node, &list->head);
	mutex_unlock(&list->lock);

	return 0;
}


static int ioctl_put_pages(struct file *filp, unsigned long arg)
{
	struct va_pages_node	      *va_pages = NULL;
	struct amdp2ptest_pages_list *list = filp->private_data;
	struct AMDRDMA_IOCTL_PUT_PAGES_PARAM params = {0};
	struct list_head *p, *n;
	int retcode;

	MSG_INFO("AMD2P2PTEST_IOCTL_PUT_PAGES");

	if (copy_from_user(&params, (void *)arg, sizeof(params))) {
		MSG_ERR("copy_from_user failed on pointer %p\n",
							(void *)arg);
		return -EFAULT;
	}

	MSG_INFO("addr %llx, length %llx\n", params.addr, params.length);


	list_for_each_safe(p, n, &list->head) {
		va_pages = list_entry(p, struct va_pages_node, node);

		if (va_pages->pages->va == params.addr &&
			va_pages->pages->size == params.length) {

			retcode = rdma_interface->put_pages(&va_pages->pages);

			if (retcode != 0) {
				MSG_ERR("Could not put pages back: %d\n",
						retcode);
			}

			mutex_lock(&list->lock);
			list_del(&va_pages->node);
			mutex_unlock(&list->lock);
			kfree(va_pages);
			/* Note: Do not break from loop to allow test
			 * situation when "get_pages" would be called
			 * on the same memory several times
			 **/
		}
	}

	return 0;
}


static const struct ioctl_handler_map {
	int (*handler)(struct file *filp, unsigned long arg);
	unsigned int cmd;
} handlers[] = {
	{ ioctl_get_page_size,	AMD2P2PTEST_IOCTL_GET_PAGE_SIZE },
	{ ioctl_get_pages,	AMD2P2PTEST_IOCTL_GET_PAGES	},
	{ ioctl_put_pages,	AMD2P2PTEST_IOCTL_PUT_PAGES	},
	{ NULL, 0 }
};


static long amdp2ptest_unlocked_ioctl(struct file *filp, unsigned int cmd,
							 unsigned long arg)
{
	int result = -EINVAL;
	int i;

	for (i = 0; handlers[i].handler != NULL; i++)
		if (cmd == handlers[i].cmd) {
			result = handlers[i].handler(filp, arg);
			break;
		}

	return result;
}


static int amdp2ptest_mmap(struct file *filp, struct vm_area_struct *vma)
{
	int i;
	struct scatterlist *sg;
	struct va_pages_node	      *va_pages = NULL;
	struct amdp2ptest_pages_list *list = filp->private_data;
	struct list_head *p, *n;
	uint64_t gpu_va = vma->vm_pgoff << PAGE_SHIFT;

	MSG_INFO("Mapping to CPU user space\n");
	MSG_INFO("Begin vm_start 0x%lx, vm_end 0x%lx\n", vma->vm_start, vma->vm_end);
	MSG_INFO("vm_pgoff 0x%lx\n", vma->vm_pgoff);
	MSG_INFO("gpu_va address 0x%llx\n", gpu_va);

	list_for_each_safe(p, n, &list->head) {
		unsigned long addr = vma->vm_start;
		long mmap_size = vma->vm_end - vma->vm_start;
		long size;
		int ret;

		va_pages = list_entry(p, struct va_pages_node, node);

		MSG_INFO("node va 0x%llx size 0x%llx\n", va_pages->pages->va,
			va_pages->pages->size);

		if (gpu_va >= va_pages->pages->va  &&
		    gpu_va + size <= va_pages->pages->va + va_pages->pages->size) {
			MSG_INFO("Found node: va=0x%llx,size=0x%llx,nents %d\n",
					va_pages->pages->va,
					va_pages->pages->size,
					va_pages->pages->pages->nents);

			for_each_sg(va_pages->pages->pages->sgl, sg,
					va_pages->pages->pages->nents, i) {

				MSG_INFO("Found page[%d]: dma 0x%llx size 0x%x\n",
					i, sg->dma_address, sg->length);

				size = min_t(unsigned long, sg->length, mmap_size);
				MSG_INFO("remap_pfn range addr 0x%lx to dma_addr 0x%llx size 0x%lx\n",
					addr, sg->dma_address, size);
				ret = remap_pfn_range(vma,
						addr,
						sg->dma_address >> PAGE_SHIFT,
						size,
						vma->vm_page_prot);
				if (ret) {
					MSG_ERR("Failed remap_pfn() size 0x%lx ret %d\n",
						size, ret);
					return ret;
				}
				addr += size;
				mmap_size -= size;
				if (mmap_size <= 0)
					break;
			}
			return 0;
		}
	}

	return -EINVAL;
}


/*---------------------------------------------------------------------------*/

static const struct file_operations amdp2ptest_fops = {
	.owner = THIS_MODULE,
	.unlocked_ioctl = amdp2ptest_unlocked_ioctl,
	.open = amdp2ptest_open,
	.release = amdp2ptest_release,
	.mmap = amdp2ptest_mmap,
};


static struct miscdevice amdp2ptest_dev = {
	/*
	 * We don't care what minor number we end up with, so tell the
	 * kernel to just pick one.
	 */
	.minor = MISC_DYNAMIC_MINOR,
	/*
	 * Name ourselves /dev/hello.
	 */
	.name = AMDP2PTEST_DEVICE_NAME,
	/*
	 * What functions to call when a program performs file
	 * operations on the device.
	 */
	.fops = &amdp2ptest_fops,

	/* Security attribute / access */
	.mode = S_IRWXU | S_IRWXG | S_IRWXO
};

static int (*p2p_query_rdma_interface)(const struct amd_rdma_interface **);

static int __init amdp2ptest_init(void)
{
	int result;

	p2p_query_rdma_interface = (int (*)(const struct amd_rdma_interface **))
				   symbol_request(amdkfd_query_rdma_interface);
	if (!p2p_query_rdma_interface) {
		MSG_ERR("Can not get symbol amdkfd_query_rdma_interface, please load amdgpu driver\n");
		return -ENOENT;
	}

	result = p2p_query_rdma_interface(&rdma_interface);
	if (result < 0) {
		MSG_ERR("Can not get RDMA Interface (result = %d)\n", result);
		return result;
	}

	MSG_INFO("RDMA Interface %p\n",		rdma_interface);
	MSG_INFO("     get_pages %p\n",		rdma_interface->get_pages);
	MSG_INFO("     put_pages %p\n",		rdma_interface->put_pages);
	MSG_INFO("     is_gpu_address %p\n",	rdma_interface->is_gpu_address);
	MSG_INFO("     get_page_size %p\n",	rdma_interface->get_page_size);


	/*
	* Create the device in the /sys/class/misc directory.
	* Udev will automatically create the /dev/xxxxx device using
	* the default rules.
	*/
	result  = misc_register(&amdp2ptest_dev);

	if (result < 0) {
		MSG_ERR("Can not register device (result = %d)\n", result);
		return result;
	}

	return 0;
}


/* Note: cleanup_module is never called if registering failed */
static void __exit amdp2ptest_cleanup(void)
{
	MSG_INFO("Unregistering\n");

	misc_deregister(&amdp2ptest_dev);
	if (p2p_query_rdma_interface)
		symbol_put(amdkfd_query_rdma_interface);
}


module_init(amdp2ptest_init);
module_exit(amdp2ptest_cleanup);


================================================
FILE: libhsakmt/tests/rdma/simple/drv/amdp2ptest.h
================================================
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
#ifndef AMDP2PTEST_H_
#define AMDP2PTEST_H_

#include <linux/ioctl.h>

#define AMDP2PTEST_IOCTL_MAGIC 'A'


#define AMDP2PTEST_DEVICE_NAME "amdp2ptest"
#define AMDP2PTEST_DEVICE_PATH "/dev/amdp2ptest"

struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM {
	/* Input parameters */
	uint64_t addr;
	uint64_t length;

	/* Output parameters */
	uint64_t page_size;
};

struct AMDRDMA_IOCTL_GET_PAGES_PARAM {
	/* Input parameters */
	uint64_t addr;
	uint64_t length;
	uint64_t is_local;	/* 1 if this is the pointer to local
				   allocation */

	/* Output parameters */
	uint64_t cpu_ptr;
};


struct AMDRDMA_IOCTL_PUT_PAGES_PARAM {
	/* Input parameters */
	uint64_t addr;
	uint64_t length;
};


#define AMD2P2PTEST_IOCTL_GET_PAGE_SIZE	\
_IOWR(AMDP2PTEST_IOCTL_MAGIC, 1, struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM *)

#define AMD2P2PTEST_IOCTL_GET_PAGES \
_IOWR(AMDP2PTEST_IOCTL_MAGIC, 2, struct AMDRDMA_IOCTL_GET_PAGES_PARAM *)

#define AMD2P2PTEST_IOCTL_PUT_PAGES	\
_IOW(AMDP2PTEST_IOCTL_MAGIC, 3, struct AMDRDMA_IOCTL_PUT_PAGES_PARAM *)


#endif  /* AMDP2PTEST_H */


================================================
FILE: libhsakmt/tests/reopen/CMakeLists.txt
================================================
cmake_minimum_required (VERSION 2.6)

project (kmtreopen)

link_directories($ENV{ROOT_OF_ROOTS}/out/lib)

include_directories($ENV{LIBHSAKMT_ROOT}/include)

add_executable(kmtreopen kmtreopen.c)
target_link_libraries(kmtreopen libdl.so)


================================================
FILE: libhsakmt/tests/reopen/kmtreopen.c
================================================
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#include <hsakmt.h>

HSAKMT_STATUS HSAKMTAPI (*pfn_hsaKmtOpenKFD)(void);
HSAKMT_STATUS HSAKMTAPI (*pfn_hsaKmtCloseKFD)(void);
HSAKMT_STATUS HSAKMTAPI (*pfn_hsaKmtGetVersion)(HsaVersionInfo* VersionInfo);
HSAKMT_STATUS HSAKMTAPI (*pfn_hsaKmtAcquireSystemProperties)(HsaSystemProperties* SystemProperties);
HSAKMT_STATUS HSAKMTAPI (*pfn_hsaKmtReleaseSystemProperties)(void);

HsaVersionInfo g_versionInfo;
HsaSystemProperties g_systemProperties;

static void hsa_perror(const char *s, HSAKMT_STATUS status)
{
    static const char *errorStrings[] = {
        [HSAKMT_STATUS_SUCCESS] = "Success",
        [HSAKMT_STATUS_ERROR] = "General error",
        [HSAKMT_STATUS_DRIVER_MISMATCH] = "Driver mismatch",
        [HSAKMT_STATUS_INVALID_PARAMETER] = "Invalid parameter",
        [HSAKMT_STATUS_INVALID_HANDLE] = "Invalid handle",
        [HSAKMT_STATUS_INVALID_NODE_UNIT] = "Invalid node or unit",
        [HSAKMT_STATUS_NO_MEMORY] = "No memory",
        [HSAKMT_STATUS_BUFFER_TOO_SMALL] = "Buffer too small",
        [HSAKMT_STATUS_NOT_IMPLEMENTED] = "Not implemented",
        [HSAKMT_STATUS_NOT_SUPPORTED] = "Not supported",
        [HSAKMT_STATUS_UNAVAILABLE] = "Unavailable",
        [HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED] = "Kernel IO channel not opened",
        [HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR] = "Kernel communication error",
        [HSAKMT_STATUS_KERNEL_ALREADY_OPENED] = "Kernel already opened",
        [HSAKMT_STATUS_HSAMMU_UNAVAILABLE] = "HSA MMU unavailable",
        [HSAKMT_STATUS_WAIT_FAILURE] = "Wait failure",
        [HSAKMT_STATUS_WAIT_TIMEOUT] = "Wait timeout",
        [HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED] = "Memory already registered",
        [HSAKMT_STATUS_MEMORY_NOT_REGISTERED] = "Memory not registered",
        [HSAKMT_STATUS_MEMORY_ALIGNMENT] = "Memory alignment error"
    };

    if (status >= 0 && status <= HSAKMT_STATUS_MEMORY_ALIGNMENT)
        fprintf(stderr, "%s: %s\n", s, errorStrings[status]);
    else
        fprintf(stderr, "%s: Unknown error %d\n", s, status);
}

#define HSA_CHECK_RETURN(call) do {             \
        HSAKMT_STATUS __ret;                    \
        printf("  Calling %s\n", #call);        \
        __ret = pfn_##call;                     \
        if (__ret != HSAKMT_STATUS_SUCCESS) {   \
            hsa_perror(#call, __ret);           \
            return __ret;                       \
        }                                       \
    } while(0)

#define HSA_DLSYM(handle, func) do {                            \
        pfn_##func = dlsym(handle, #func);                      \
        if (pfn_##func == NULL) {                               \
            fprintf(stderr, "dlsym failed: %s\n", dlerror());   \
            return HSAKMT_STATUS_ERROR;                         \
        }                                                       \
    } while(0)

static int runTest(void *handle)
{
    HSA_DLSYM(handle, hsaKmtOpenKFD);
    HSA_DLSYM(handle, hsaKmtCloseKFD);
    HSA_DLSYM(handle, hsaKmtGetVersion);
    HSA_DLSYM(handle, hsaKmtAcquireSystemProperties);
    HSA_DLSYM(handle, hsaKmtReleaseSystemProperties);

    HSA_CHECK_RETURN(hsaKmtOpenKFD());
    HSA_CHECK_RETURN(hsaKmtGetVersion(&g_versionInfo));
    HSA_CHECK_RETURN(hsaKmtAcquireSystemProperties(&g_systemProperties));

    HSA_CHECK_RETURN(hsaKmtReleaseSystemProperties());
    HSA_CHECK_RETURN(hsaKmtCloseKFD());

    return HSAKMT_STATUS_SUCCESS;
}

int main(int argc, char *argv[])
{
    void *handle;
    int i;

    for (i = 0; i < 5; i++) {
        printf("Iteration %d:\n  Loading libhsakmt.so\n", i+1);

        handle = dlopen("libhsakmt.so", RTLD_LAZY);
        if (handle == NULL) {
            fprintf(stderr, "dlopen failed: %s\n", dlerror());
            exit(1);
        }

        if (runTest(handle) != HSAKMT_STATUS_SUCCESS)
            exit(1);

        printf("  Unloading libhsakmt.so\n");
        if (dlclose(handle) != 0) {
            fprintf(stderr, "dlclose failed: %s\n", dlerror());
            exit(1);
        }
    }
}


================================================
FILE: rocrtst/.gitignore
================================================

*.o
*.bin
*.tar
*.hsaco
*.orig
*.obsol
*.bk
*.old
*.cmake
build


================================================
FILE: rocrtst/Kernels/CMakeLists.txt
================================================

cmake_minimum_required(VERSION 2.8.0)

#
#  Setup build environment
#
#  1) Setup env var LLVM_DIR and OCL_BITCODE_DIR to point to
#     folders containing relevant libraries seperately
#
#     export LLVM_DIR="Path to Lightning build artifacts"
#
#     export OCL_BITCODE_DIR="Path containing AMDGCN Bitcode libraries"
#
#  2) Make an new folder called build under root folder
#
#     mkdir build
#
#  3) Enter into folder of build, and run CMAKE to generate makefile
#     and make it
#
#     cd build; cmake ..; make
#

if(WIN32)
  message("Windows platform is not supported")
  return()
endif()

#
# Flag to enable / disable verbose output.
#
SET( CMAKE_VERBOSE_MAKEFILE on )

set(PROJECT_NAME "CompileKernels")
project (${PROJECT_NAME})

#
# Validate LLVM related resources are available
#
if (NOT DEFINED ENV{LLVM_DIR})
  message("LLVM_DIR define is not set. Kernels cannot be built.")
  return()
endif()

#
# Validate Opencl related resources are available
#
if (NOT DEFINED ENV{OCL_BITCODE_DIR})
  message(FATAL_ERROR "OCL_BITCODE_DIR define is not set. Kernels cannot be built.")
endif()

set(CLANG $ENV{LLVM_DIR}/clang)
if (NOT EXISTS ${CLANG})
  message("Path to clang (${CLANG}) is not valid. Is LLVM_DIR defined correctly?")
  return()
endif()

#
# Define Opencl version if it is not defined
#
if (DEFINED ENV{OPENCL_VER})
  set(OPENCL_VER $ENV{OPENCL_VER})
else()
  message("OPENCL_VER define is not set. Using default")
  set(OPENCL_VER "2.0")
endif()

#
# Define list of Target Device types for which to get code objects
#
set(DEV_LIST "gfx803" "gfx900" CACHE STRING "List of Gfx Devices")
set(TARGET_DEV_LIST ${DEV_LIST})
separate_arguments(TARGET_DEV_LIST)

# Maintains a global list of targets to build
set (ROCM_CODEOBJ_LIST "" CACHE INTERNAL ROCM_CODEOBJ_LIST)

#
# Options that are passed along to Clang to enable code object generation
#
set(KERN_SUFFIX "kernels.hsaco")
# Check if device-libs bitcode is following old or new layout
set(BITCODE_DIR "$ENV{OCL_BITCODE_DIR}")
if(EXISTS "${BITCODE_DIR}/opencl.amdgcn.bc")
  set(BITCODE_ARGS "-nogpulib
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/opencl.amdgcn.bc
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ockl.amdgcn.bc
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ocml.amdgcn.bc")
else()
  set(BITCODE_ARGS "--hip-device-lib-path=${BITCODE_DIR}")
endif()

#
# Compiles Opencl kernel into a AMDGcn code object
#
function(CompileKernel KRNL_NAME TARGET_DEV)

  #
  # Bind names for code object file and directory containing it
  set(KERNEL_DIR ${PROJECT_BINARY_DIR}/${TARGET_DEV})
  set(CODEASM_FILE "${KRNL_NAME}_${TARGET_DEV}.asm")
  set(CODEOBJ_FILE "${KRNL_NAME}_${TARGET_DEV}.hsaco")

  #
  # Add target name to a global list of target names. This must
  # be executed before the add_custom_target rule
  set(TARGET_NAME "${KRNL_NAME}_${TARGET_DEV}")
  set(ROCM_CODEOBJ_LIST ${ROCM_CODEOBJ_LIST} ${TARGET_NAME}
                      CACHE INTERNAL ROCM_CODEOBJ_LIST)

  #
  # Build clang arguments into a string and tokenize it into a list
  # The command "separate_arguments" will replace each instance of
  # space char with a semi-colon char. Like any other program clang
  # needs its arguments to be passed in as a list of tokens. The
  # following strings are used to generate a code object and code
  # asm files
  #
  string(CONCAT CODE_ARG_STR "-Xclang -finclude-default-header "
                "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEV} "
                "${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} "
                "${PROJECT_SOURCE_DIR}/${CL_FILE} -o ${KERNEL_DIR}/${CODEOBJ_FILE}")
  string(CONCAT ASM_ARG_STR "-S -Xclang -finclude-default-header "
                "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEV} "
                "${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} "
                "${PROJECT_SOURCE_DIR}/${CL_FILE} -o ${KERNEL_DIR}/${CODEASM_FILE}")
  set(ASM_ARG_LIST ${ASM_ARG_STR})
  set(CODE_ARG_LIST ${CODE_ARG_STR})
  separate_arguments(ASM_ARG_LIST)
  separate_arguments(CODE_ARG_LIST)

  #
  # Create a custom command to execute associated commands
  # and a target it is associated with
  #
  add_custom_command(OUTPUT ${KERNEL_DIR}/${KNAME_EXE}
                     COMMAND ${CMAKE_COMMAND} -E make_directory ${KERNEL_DIR}
                     COMMAND ${CLANG} ${ASM_ARG_LIST}
                     COMMAND ${CLANG} ${CODE_ARG_LIST}
                     COMMENT "BUILDING KERNEL..." VERBATIM)
  add_custom_target("${TARGET_NAME}" ALL DEPENDS "${KERNEL_DIR}/${KNAME_EXE}")

endfunction(CompileKernel)

function(buildCodeObjects kname)

  # Bind the name of CL file and associate
  # a name for the target
  set(KNAME_EXE "${kname}")
  set(CL_FILE "${kname}_kernel.cl")

  # Iterate through list of target devices
  foreach(tdev ${TARGET_DEV_LIST})
    CompileKernel(${kname} ${tdev})
  endforeach(tdev)

endfunction(buildCodeObjects)

buildCodeObjects("read")
buildCodeObjects("write")
buildCodeObjects("binary_search")

#
# Create a custom target which will build the full suite
# of code objects for all kernels and target device pairs
#
add_custom_target(rocm_code_objs ALL DEPENDS ${ROCM_CODEOBJ_LIST})


================================================
FILE: rocrtst/Kernels/binary_search_kernel.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/**
 * One instance of this kernel call is a thread.
 * Each thread finds out the segment in which it should look for the element.
 * After that, it checks if the element is between the lower bound and upper
 * bound of its segment. If yes, then this segment becomes the total
 * searchspace for the next pass.
 *
 * To achieve this, it writes the lower bound and upper bound to the output
 * array. In case the element at the left end (lower bound) matches the element
 * we are looking for, that is marked in the output and we no longer need to
 * look any further.
 */
 
__kernel void
binarySearch(__global uint4 * outputArray,
             __const __global uint2  * sortedArray,
             const   unsigned int findMe) {
  unsigned int tid = get_global_id(0);

  // Then we find the elements  for this thread
  uint2 element = sortedArray[tid];


  // If the element to be found does not lie between
  // them, then nothing left to do in this thread
  if((element.x > findMe) || (element.y < findMe)) {
    return;
  } else {
    // However, if the element does lie between the lower
    // and upper bounds of this thread's searchspace
    // we need to narrow down the search further in this
    // search space 
    // The search space for this thread is marked in the
    // output as being the total search space for the next pass
    outputArray[0].x = tid;
    outputArray[0].w = 1;
  }
}


__kernel void
binarySearch_mulkeys(__global int *keys,
                     __global uint *input,
                     const unsigned int numKeys,
                     __global int *output) {

  int gid = get_global_id(0);
  int lBound = gid * 256;
  int uBound = lBound + 255;

  for(int i = 0; i < numKeys; i++) {
    if(keys[i] >= input[lBound] && keys[i] <= input[uBound])
      output[i]=lBound;
  }

}


__kernel void
binarySearch_mulkeysConcurrent(__global uint *keys,
                               __global uint *input,
                               const unsigned int inputSize, // num. of inputs
                               const unsigned int numSubdivisions,
                               __global int *output) {

  int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions);
  int uBound = lBound + inputSize / numSubdivisions;
  int myKey = keys[get_global_id(0) / numSubdivisions];
  int mid;

  while(uBound >= lBound) {
    mid = (lBound + uBound) / 2;
    if(input[mid] == myKey) {
      output[get_global_id(0) / numSubdivisions] = mid;
      return;
    } else if(input[mid] > myKey) {
      uBound = mid - 1;
    } else {
      lBound = mid + 1;
    }
  }
}


================================================
FILE: rocrtst/Kernels/read_kernel.cl
================================================

/**
 * @brief Opencl kernel to read from a buffer and sum its values
 * into a destination integer
 *
 * @param src Pointer to an array of 16 unsigned integers (32-bit) i.e. one instance
 * has 16 * 32-bit = 64 bytes
 * 
 * @param size Specifies number of uint16 elements in the array
 *
 * @param threads Number of threads running this kernel
 *
 * @param dst Output parameter updated with sum of the input buffer
 *
 * @note: It is critical that the size of 'src' be a integral multiple
 * of (threads * sizeof(uint16)). If it is fractional and less than ONE
 * it will lead to accessing memory that is out-of-bounds. If it is fractional
 * more but more than ONE then it will lead to some threads not doing work
 * at all leading to incorrect benchmark computation
 *
 */

__kernel void
  read_kernel(__global uint16 *src,
              ulong size, uint threads, __global uint* dst) {

  uint16 pval;
  int idx = get_global_id(0);
  __global uint16 *srcEnd = src + size;
  
  uint tmp = 0;
  src = &src[idx];
  while (src < srcEnd) {
    pval = *src;
    src += threads;
    tmp += pval.s0 + pval.s1 + pval.s2 + pval.s3 +  \
           pval.s4 + pval.s5 + pval.s6 +  pval.s7 + \
           pval.s8 + pval.s9 + pval.sa + pval.sb +  \
           pval.sc + pval.sd + pval.se + pval.sf;
  }
  atomic_add(dst, tmp);
}


================================================
FILE: rocrtst/Kernels/write_kernel.cl
================================================

/**
 * @brief Opencl kernel to write into a buffer the values of const integer list
 *
 * @param dst Pointer to an array of 16 unsigned integers (32-bit) i.e. one instance
 * has 16 * 32-bit = 64 bytes
 * 
 * @param size Specifies number of uint16 elements in the array
 *
 * @param threads Number of threads running this kernel
 *
 * @note: It is critical that the size of 'dst' be a integral multiple
 * of (threads * sizeof(uint16)). If it is fractional and less than ONE
 * it will lead to accessing memory that is out-of-bounds. If it is fractional
 * more but more than ONE then it will lead to some threads not doing work
 * at all leading to incorrect benchmark computation
 *
 */

__kernel void
  write_kernel(__global uint16 *dst,
               ulong size, uint threads) {

  uint16 pval = (uint16)(0xabababab, 0xabababab, 0xabababab, 0xabababab,
                         0xabababab, 0xabababab, 0xabababab, 0xabababab,
                         0xabababab, 0xabababab, 0xabababab, 0xabababab,
                         0xabababab, 0xabababab, 0xabababab, 0xabababab);

  int idx = get_global_id(0);
  __global uint16 *dstEnd = dst + size;
  
  dst = &dst[idx];
  do {
    *dst = pval;
    dst += threads;
  } while (dst < dstEnd);

}


================================================
FILE: rocrtst/README.md
================================================
# Building rocrtst

## Library dependencies
rocrtst needs hwloc and libnuma to build and run. On Debian systems, for example, you would need to get them like so:
```sh
sudo apt-get install libhwloc-dev libnuma-dev
```
## CMake option values
When building rocrtst, several cmake command line options are available--some mandatory, some optional. These are described here:
  * TARGET_DEVICES=<string>
    * Optional
    * semi-colon separated list of gpus to build kernels for; e.g. "gfx908;gfx900;...".
    * Default: the list of devices that is used is specified in the CMakeLists.txt file, and includes the all the currently supported targets.
  * ROCRTST_BLD_TYPE=<debug|release>
    * Optional
    * Build a debug or release build
    * Default: Build the debug version
  * CMAKE_PREFIX_PATH=<"ROCR root path; LLVM root path">
    * Required
    * Where to find ROCr and LLVM. The ROCr root path is typically something like /opt/rocm. The LLVM directory is typically something like /opt/rocm/llvm
  * CMAKE_INSTALL_PREFIX="<Root path where rocrtst should be installed>"
    * Optional
    * Where to install rocrtst
  * CPACK_PACKAGING_INSTALL_PREFIX="<path where to install>"
    * Optional
    * Where to install rocrtst within DEB/RPM packages
  * CPACK_GENERATOR=<list of package generators>
    * Optional
    * List of CPack build generators to use; e.g. "DEB;RPM"
  * ROCM_PATCH_VERSION=<string>
    * Optional
    * ROCm patch version used in package name
  * ROCM_DIR=<ROCm path>
    * Required
    * ROCm root directory
  * LLVM_DIR="<clang location>"
    * Required
    * Location of clang executable
  * OPENCL_DIR=<location of OpenCL root>
    * Required
    * Location where OpenCL root resides
  * EMULATOR_BUILD=<true|false>
    * Optional
    * If EMULATOR_BUILD is defined, rocrtst will avoid tests that typically run too long on an HW emulator, or use a scaled-down version of the test.

## Steps to build
```sh
mkdir build
cd build
# See description of these options above.
# The values for these options are examples. They should be tailored
# for your system.
cmake -DTARGET_DEVICES=$GPU_LIST \
  -DROCRTST_BLD_TYPE=$ROCRTST_BUILD_TYPE \
  -DCMAKE_PREFIX_PATH="$PACKAGE_ROOT;$PACKAGE_ROOT/llvm" \
  -DCMAKE_INSTALL_PREFIX="$ROCM_INSTALL_PATH" \
  -DCPACK_PACKAGING_INSTALL_PREFIX="$ROCM_INSTALL_PATH" \
  -DCPACK_GENERATOR="DEB;RPM" \
  -DROCM_PATCH_VERSION=$ROCM_LIBPATCH_VERSION \
  -DROCM_DIR=$PACKAGE_ROOT \
  -DLLVM_DIR="$PACKAGE_ROOT/llvm/bin" \
  -DOPENCL_DIR=$PACKAGE_ROOT \
  -DEMULATOR_BUILD=$EMULATOR_BUILD \
      ..
# Build rocrtst executable
make
# Build rocrtst kernels
make rocrtst_kernels
```
## Running rocrtst
rocrtst needs to be able to find the ROCr library. This can be through ldconfig method or by setting LD_LIBRARY_PATH to have the ROCr library directory.
When rocrtst is built, there is one rocrtst executable, and several symlinks pointing to that executable, one from each asic sub-directory. For example, for gfx900, we would see the following:
```sh
cd <rocrtst bin root>/gfx900
ls -l rocrtst
lrwxrwxrwx 1 user user 12 Sep 28 17:23 rocrtst64 -> ../rocrtst64
```
To run rocrtst, we should call the ASIC specific symlink. This allows the asic-specific kernels to be found.

rocrtst is a Google Test ("gtest") based program and accepts gtest options. Additionally, there are some rocrtst specfic options. All of these options can be seen by using the "-h" option:
```sh
$ <rocrtst bin>/gfx900 $ ./rocrtst64 -h
<GTest option descrption>
Optional RocRTst Arguments:
--iterations, -i <number of iterations to execute>; override default, which varies for each test
--rocrtst_help, -r print this help message
--verbosity, -v <verbosity level>
  Verbosity levels:
   0    -- minimal; just summary information
   1    -- intermediate; show intermediate values such as intermediate perf. data
   2    -- progress; show progress displays
   >= 3 -- more debug output
--monitor_verbosity, -m <monitor verbosity level>
  Monitor Verbosity levels:
   0    -- don't read or print out any GPU monitor information;
   1    -- print out all available monitor information before the first test and after each test
   >= 2 -- print out even more monitor information (test specific)

```


================================================
FILE: rocrtst/common/base_rocr.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include "common/base_rocr.h"
#include "common/base_rocr_utils.h"
#include "common/os.h"

namespace rocrtst {

BaseRocR::BaseRocR(void) {
  num_iteration_ = 1;
  cpu_device_.handle = -1;
  gpu_device1_.handle = -1;
  device_pool_.handle = 0;
  kern_arg_pool_.handle = 0;
  main_queue_ = nullptr;
  kernarg_buffer_ = nullptr;
  kernel_object_ = 0;
  memset(&aql_, 0, sizeof(aql_));
  set_requires_profile(-1);
  set_enable_interrupt(false);
  orig_hsa_enable_interrupt_ = GetEnv("HSA_ENABLE_INTERRUPT");
  set_kernel_file_name("");
  set_verbosity(0);
  set_monitor_verbosity(0);
  set_title("unset_title");
}

BaseRocR::~BaseRocR() {
}

}  // namespace rocrtst


================================================
FILE: rocrtst/common/base_rocr.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/// \file
/// File containg base class declaration needed for all RocR tests and samples
/// that allow derived classes to use utility functions.

#ifndef ROCRTST_COMMON_BASE_ROCR_H_
#define ROCRTST_COMMON_BASE_ROCR_H_
#include <stdint.h>
#include <stdio.h>
#include <string>
#include "common/common.h"
#include "common/hsatimer.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "common/rocr.h"

namespace rocrtst {

/// Common interface for RocR tests and samples, required for several
/// common functions
class BaseRocR {
 public:
  BaseRocR(void);

  virtual ~BaseRocR(void);

  ///< Setters and Getters

  void set_gpu_device1(hsa_agent_t in_dev) {
    gpu_device1_.handle = in_dev.handle;
  }
  hsa_agent_t* gpu_device1(void) {
    return &gpu_device1_;
  }

  void set_cpu_device(hsa_agent_t in_dev) {
    cpu_device_.handle = in_dev.handle;
  }
  hsa_agent_t* cpu_device(void) {
    return &cpu_device_;
  }

  void set_kernel_file_name(const char* in_file_name) {
    kernel_file_name_ = in_file_name;
  }
  std::string const kernel_file_name(void) const {
    return kernel_file_name_;
  }
  const

  void set_kernel_name(std::string in_kernel_name) {
    kernel_name_ = in_kernel_name;
  }
  std::string const kernel_name(void) const {
    return kernel_name_;
  }

  void set_agent_name(std::string in_agent_name) {
    agent_name_ = in_agent_name;
  }
  std::string const get_agent_name(void) const {
    return agent_name_;
  }

  void set_kernel_object(uint64_t in_kernel_object) {
    kernel_object_ = in_kernel_object;
  }
  uint64_t kernel_object(void) const {
    return kernel_object_;
  }

  void set_profile(hsa_profile_t in_prof) {
    profile_ = in_prof;
  }
  hsa_profile_t profile(void) const {
    return profile_;
  }

  uint32_t private_segment_size(void) const {
    return private_segment_size_;
  }
  void set_private_segment_size(uint32_t sz) {
    private_segment_size_ = sz;
  }

  void set_group_segment_size(uint32_t sz) {
    group_segment_size_ = sz;
  }
  uint32_t group_segment_size(void) const {
    return group_segment_size_;
  }

  void set_group_size(uint32_t sz) {
    group_size_ = sz;
  }
  uint32_t group_size(void) const {
    return group_size_;
  }

  void set_main_queue(hsa_queue_t* q) {
    main_queue_ = q;
  }
  hsa_queue_t* main_queue(void) const {
    return main_queue_;
  }

  void clear_code_object() {
    for(std::vector<CodeObject *>::iterator  it = objs_.begin(); it != objs_.end(); ++it) {
      delete *it;
    }
    objs_.clear();
  }
  void set_code_object(CodeObject* obj) {
    objs_.push_back(obj);
  }

  hsa_kernel_dispatch_packet_t& aql(void) {
    return aql_;
  }

  void set_num_iteration(int num) {
    num_iteration_ = num;
  }
  uint32_t num_iteration(void) const {
    return num_iteration_;
  }

  hsa_amd_memory_pool_t& device_pool(void) {
    return device_pool_;
  }

  hsa_amd_memory_pool_t& cpu_pool(void) {
    return cpu_pool_;
  }

  hsa_amd_memory_pool_t& kern_arg_pool(void) {
    return kern_arg_pool_;
  }

  void set_kernarg_size(uint32_t sz) {
    kernarg_size_ = sz;
  }
  uint32_t kernarg_size(void) const {
    return kernarg_size_;
  }

  void set_kernarg_align(uint32_t align) {
    kernarg_align_ = align;
  }
  uint32_t kernarg_align(void) const {
    return kernarg_align_;
  }

  void* kernarg_buffer(void) const {
    return kernarg_buffer_;
  }
  void set_kernarg_buffer(void* buffer) {
    kernarg_buffer_ = buffer;
  }

  int32_t requires_profile(void) const {
    return requires_profile_;
  }

  char* orig_hsa_enable_interrupt() const {
    return orig_hsa_enable_interrupt_;
  }

  bool enable_interrupt() const {
    return enable_interrupt_;
  }

  void set_title(std::string name) {
    title_ = name;
  }
  std::string title(void) const {
    return title_;
  }

  PerfTimer* hsa_timer(void) {
    return &hsa_timer_;
  }

  void set_verbosity(uint32_t v) {
    verbosity_ = v;
  }
  uint32_t verbosity(void) const {
    return verbosity_;
  }

  void set_monitor_verbosity(uint32_t m) {
    monitor_verbosity_ = m;
  }
  uint32_t monitor_verbosity(void) const {
    return monitor_verbosity_;
  }

 protected:
  void set_requires_profile(int32_t reqd_prof) {
    requires_profile_ = reqd_prof;
  }

  void set_enable_interrupt(bool doEnable) {
    enable_interrupt_ = doEnable;
  }

 private:
  uint64_t num_iteration_;   ///< Number of times to execute test

  hsa_queue_t* main_queue_;   ///< AQL queue used for packets

  std::vector<CodeObject*> objs_; ///< CodeObject vector

  hsa_agent_t gpu_device1_;   ///< Handle to first GPU found

  hsa_agent_t cpu_device_;   ///< Handle to CPU

  hsa_amd_memory_pool_t device_pool_;   ///< Memory pool on gpu pool list

  hsa_amd_memory_pool_t cpu_pool_;   ///< Memory pool on cpu pool list

  hsa_amd_memory_pool_t kern_arg_pool_;   ///< Memory pool suitable for args

  uint64_t kernel_object_;   ///< Handle to kernel code

  std::string kernel_file_name_;   ///< Code object file name

  std::string kernel_name_;   ///< Kernel name

  std::string agent_name_;   ///< Agent name

  hsa_kernel_dispatch_packet_t aql_;   ///< Kernel dispatch packet

  uint32_t group_segment_size_;   ///< Kernel group seg size

  uint32_t kernarg_size_;   ///< Kernarg memory size

  uint32_t kernarg_align_;   ///< Alignment for kern argument memory

  void* kernarg_buffer_;    ///< Unaligned allocated kernel arg. buffer

  hsa_profile_t profile_;   ///< Device profile.

  uint32_t group_size_;   ///< Number of work items in one group

  uint32_t private_segment_size_;   ///< Kernel private seg size

  int32_t requires_profile_;   ///< Profile required by test (-1 if no req.)

  char* orig_hsa_enable_interrupt_;   ///< Orig. value of HSA_ENABLE_INTERRUPT

  bool enable_interrupt_;   ///< Whether to enable/disable interrupts for test

  std::string title_;   ///< Displayed title of test

  uint32_t verbosity_;   ///< How much additional output to produce

  uint32_t monitor_verbosity_;   ///< How much additional output to produce

  PerfTimer hsa_timer_;   ///< Timer to be used for timing parts of test
};

}  // namespace rocrtst
#endif  // ROCRTST_COMMON_BASE_ROCR_H_


================================================
FILE: rocrtst/common/base_rocr_utils.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/// \file
/// Utility functions that act on BaseRocR objects.

#include "common/base_rocr_utils.h"
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string>
#include "common/base_rocr.h"
#include "common/helper_funcs.h"
#include "common/os.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

namespace rocrtst {


#define RET_IF_HSA_UTILS_ERR(err)                                                                  \
  {                                                                                                \
    if ((err) != HSA_STATUS_SUCCESS) {                                                             \
      const char* msg = 0;                                                                         \
      hsa_status_string(err, &msg);                                                                \
      EXPECT_EQ(HSA_STATUS_SUCCESS, err) << msg;                                                   \
      return (err);                                                                                \
    }                                                                                              \
  }

#define RET_IF_HSA_UTILS_ERR_RET(err, ret)                                                             \
  {                                                                                                \
    if ((err) != HSA_STATUS_SUCCESS) {                                                             \
      const char* msg = 0;                                                                         \
      hsa_status_string(err, &msg);                                                                \
      EXPECT_EQ(HSA_STATUS_SUCCESS, err) << msg;                                                   \
      return (ret);                                                                                \
    }                                                                                              \
  }
// Clean up some of the common handles and memory used by BaseRocR code, then
// shut down hsa. Restore HSA_ENABLE_INTERRUPT to original value, if necessary
hsa_status_t CommonCleanUp(BaseRocR* test) {
  hsa_status_t err;

  assert(test != nullptr);

  if (nullptr != test->kernarg_buffer()) {
    err = hsa_amd_memory_pool_free(test->kernarg_buffer());
    RET_IF_HSA_UTILS_ERR(err);
    test->set_kernarg_buffer(nullptr);
  }

  if (nullptr != test->main_queue()) {
    err = hsa_queue_destroy(test->main_queue());
    RET_IF_HSA_UTILS_ERR(err);
    test->set_main_queue(nullptr);
  }

  if (test->aql().completion_signal.handle != 0) {
    err = hsa_signal_destroy(test->aql().completion_signal);
    RET_IF_HSA_UTILS_ERR(err);
  }

  test->clear_code_object();
  err = hsa_shut_down();
  RET_IF_HSA_UTILS_ERR(err);

  // Ensure that HSA is actually closed.
  hsa_status_t check = hsa_shut_down();
  if (check != HSA_STATUS_ERROR_NOT_INITIALIZED) {
    EXPECT_EQ(HSA_STATUS_ERROR_NOT_INITIALIZED, check) << "hsa_init reference count was too high.";
    return HSA_STATUS_ERROR;
  }

  std::string intr_val;

  if (test->orig_hsa_enable_interrupt() == nullptr) {
    intr_val = "";
  } else {
    intr_val = test->orig_hsa_enable_interrupt();
  }

  SetEnv("HSA_ENABLE_INTERRUPT", intr_val.c_str());

  return err;
}

static const char* PROFILE_STR[] = {"HSA_PROFILE_BASE", "HSA_PROFILE_FULL", };

/// Verify that the machine running the test has the required profile.
/// This function will verify that the execution machine meets any specific
/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL).
/// \param[in] test Test that provides profile requirements.
/// \returns bool
///          - true Machine meets test requirements
///          - false Machine does not meet test requirements
bool CheckProfileAndInform(BaseRocR* test) {
  if (test->verbosity() > 0) {
    std::cout << "Target HW Profile is "
              << PROFILE_STR[test->profile()] << std::endl;
  }

  if (test->requires_profile() == -1) {
    if (test->verbosity() > 0) {
      std::cout << "Test can run on any profile. OK." << std::endl;
    }
    return true;
  } else {
    std::cout << "Test requires " << PROFILE_STR[test->requires_profile()]
              << ". ";

    if (test->requires_profile() != test->profile()) {
      std::cout << "Not Running." << std::endl;
      return false;
    } else {
      std::cout << "OK." << std::endl;
      return true;
    }
  }
}

/// Helper function to process error returned from
///  iterate function like hsa_amd_agent_iterate_memory_pools
/// \param[in] Error returned from iterate call
/// \returns HSA_STATUS_SUCCESS iff iterate call succeeds in finding
///  what was being searched for
static hsa_status_t ProcessIterateError(hsa_status_t err) {
  if (err == HSA_STATUS_INFO_BREAK) {
    err = HSA_STATUS_SUCCESS;
  } else if (err == HSA_STATUS_SUCCESS) {
    // This actually means no pool was found.
    err = HSA_STATUS_ERROR;
  }
  return err;
}

// Find pools for cpu, gpu and for kernel arguments. These pools have
// common basic requirements, but are not suitable for all cases. In
// that case, set cpu_pool(), device_pool() and/or kern_arg_pool()
// yourself instead of using this function.
hsa_status_t SetPoolsTypical(BaseRocR* test) {
  hsa_status_t err;
  if (test->profile() == HSA_PROFILE_FULL) {
    err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
          rocrtst::FindAPUStandardPool, &test->cpu_pool());
    RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));

    err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
          rocrtst::FindAPUStandardPool, &test->device_pool());
    RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));

    err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
          rocrtst::FindAPUStandardPool, &test->kern_arg_pool());
    RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));

  } else {
    err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
          rocrtst::FindStandardPool, &test->cpu_pool());
    RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));

    err = hsa_amd_agent_iterate_memory_pools(*test->gpu_device1(),
          rocrtst::FindStandardPool, &test->device_pool());
    RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));

    err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
          rocrtst::FindKernArgPool, &test->kern_arg_pool());
    RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
  }

  return HSA_STATUS_SUCCESS;
}

// Enable interrupts if necessary, and call hsa_init()
hsa_status_t InitAndSetupHSA(BaseRocR* test) {
  hsa_status_t err;

  if (test->enable_interrupt()) {
    SetEnv("HSA_ENABLE_INTERRUPT", "1");
  }

  err = hsa_init();
  RET_IF_HSA_UTILS_ERR(err);

  return HSA_STATUS_SUCCESS;
}

// Attempt to find and set test->cpu_device and test->gpu_device1
hsa_status_t SetDefaultAgents(BaseRocR* test) {
  hsa_agent_t gpu_device1;
  hsa_agent_t cpu_device;
  hsa_status_t err;

  gpu_device1.handle = 0;
  err = hsa_iterate_agents(FindGPUDevice, &gpu_device1);
  RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
  test->set_gpu_device1(gpu_device1);

  cpu_device.handle = 0;
  err = hsa_iterate_agents(FindCPUDevice, &cpu_device);
  RET_IF_HSA_UTILS_ERR(rocrtst::ProcessIterateError(err));
  test->set_cpu_device(cpu_device);

  if (0 == gpu_device1.handle) {
    std::cout << "GPU Device is not Created properly!" << std::endl;
    RET_IF_HSA_UTILS_ERR(HSA_STATUS_ERROR);
  }

  if (0 == cpu_device.handle) {
    std::cout << "CPU Device is not Created properly!" << std::endl;
    RET_IF_HSA_UTILS_ERR(HSA_STATUS_ERROR);
  }

  if (test->verbosity() > 0) {
    char name[64] = {0};
    err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_NAME, name);
    RET_IF_HSA_UTILS_ERR(err);
    std::cout << "The gpu device name is " << name << std::endl;
  }

  hsa_profile_t profile;
  err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_PROFILE, &profile);
  RET_IF_HSA_UTILS_ERR(err);
  test->set_profile(profile);

  if (!CheckProfileAndInform(test)) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

// See if the profile of the target matches any required profile by the
// test program.
bool CheckProfile(BaseRocR const* test) {
  if (test->requires_profile() == -1) {
    return true;
  } else {
    return (test->requires_profile() == test->profile());
  }
}

/// Locate file using local and device named file paths.
std::string LocateKernelFile(std::string filename, hsa_agent_t agent) {
  char agent_name[64];
  std::string obj_file;
  hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
  RET_IF_HSA_UTILS_ERR_RET(err, obj_file);

  obj_file = "./" + filename;
  int file_handle = open(obj_file.c_str(), O_RDONLY);
  if (file_handle < 0) {
    obj_file = "./" + std::string(agent_name) + "/" + filename;
    file_handle = open(obj_file.c_str(), O_RDONLY);
    if(file_handle < 0)
      std::runtime_error("Could not open file.\n");
  }

  close(file_handle);
  return obj_file;
}

// Load the specified kernel code from the specified file, inspect and fill
// in BaseRocR member variables related to the kernel and executable.
// Required Input BaseRocR member variables:
// - gpu_device1()
// - kernel_file_name()
// - kernel_name()
//
// Written BaseRocR member variables:
//  -kernel_object()
//  -private_segment_size()
//  -group_segment_size()
//  -kernarg_size()
//  -kernarg_align()
hsa_status_t LoadKernelFromObjFile(BaseRocR* test, hsa_agent_t* agent) {
  hsa_status_t err;
  Kernel kern;
  std::string kern_name;
  char agent_name[64];
  std::string obj_file;
  CodeObject* obj;

  assert(test != nullptr);
  if (agent == nullptr) {
    agent = test->gpu_device1();  // Assume GPU agent for now
  }

  obj_file = LocateKernelFile(test->kernel_file_name(), *agent);
  Device *gpu = (Device*)(agent - offsetof(Device, agent));
  obj = new CodeObject(obj_file, *gpu);
  test->set_code_object(obj);
  kern_name = test->kernel_name() + ".kd";

  if(!obj->GetKernel(kern_name, kern)) {
      ADD_FAILURE();
      return HSA_STATUS_ERROR;
  }

  test->set_kernel_object(kern.handle);
  test->set_private_segment_size(kern.scratch);
  test->set_group_segment_size(kern.group);
  test->set_kernarg_size(kern.kernarg_size);
  assert(kern.kernarg_align >= 16 && "Reported kernarg size is too small.");
  kern.kernarg_size = (kern.kernarg_size == 0) ? 16 : kern.kernarg_size;
  test->set_kernarg_align(kern.kernarg_size);
  return HSA_STATUS_SUCCESS;
}

hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
                         uint32_t num_pkts) {
  hsa_status_t err;

  if (num_pkts == 0) {
    err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
                             &num_pkts);
    RET_IF_HSA_UTILS_ERR(err);
  }

  err = hsa_queue_create(device, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL,
                         NULL, UINT32_MAX, UINT32_MAX, queue);
  RET_IF_HSA_UTILS_ERR(err);

  return HSA_STATUS_SUCCESS;
}
// Initialize the provided aql packet with standard default values, and
// values from provided BaseRocR object.
hsa_status_t InitializeAQLPacket(const BaseRocR* test,
                         hsa_kernel_dispatch_packet_t* aql) {
  hsa_status_t err;

  assert(aql != nullptr);

  if (aql == nullptr) {
    return HSA_STATUS_ERROR;
  }
  
  // Initialize Packet type as Invalid
  // Update packet type to Kernel Dispatch
  // right before ringing doorbell
  aql->header = 1;

  aql->setup = 1;
  aql->workgroup_size_x = 256;
  aql->workgroup_size_y = 1;
  aql->workgroup_size_z = 1;

  aql->grid_size_x = (uint64_t) 256;  // manual_input*group_input; workg max sz
  aql->grid_size_y = 1;
  aql->grid_size_z = 1;

  aql->private_segment_size = test->private_segment_size();

  aql->group_segment_size = test->group_segment_size();

  // Pin kernel code and the kernel argument buffer to the aql packet->
  aql->kernel_object = test->kernel_object();

  // aql->kernarg_address may be filled in by AllocAndSetKernArgs() if it is
  // called before this function, so we don't want overwrite it, therefore
  // we ignore it in this function.

  if (!aql->completion_signal.handle)
    err = hsa_signal_create(1, 0, NULL, &aql->completion_signal);
  else
    err = HSA_STATUS_SUCCESS;

  return err;
}

// Copy BaseRocR aql object values to the BaseRocR object queue in the
// specified queue position (ind)
hsa_kernel_dispatch_packet_t * WriteAQLToQueue(BaseRocR* test, uint64_t *ind) {
  assert(test);
  assert(test->main_queue());

  void *queue_base = test->main_queue()->base_address;
  const uint32_t queue_mask = test->main_queue()->size - 1;
  uint64_t que_idx = hsa_queue_add_write_index_relaxed(test->main_queue(), 1);
  *ind = que_idx;

  hsa_kernel_dispatch_packet_t* staging_aql_packet = &test->aql();
  hsa_kernel_dispatch_packet_t* queue_aql_packet;

  queue_aql_packet =
       &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
                                                        [que_idx & queue_mask];

  queue_aql_packet->workgroup_size_x = staging_aql_packet->workgroup_size_x;
  queue_aql_packet->workgroup_size_y = staging_aql_packet->workgroup_size_y;
  queue_aql_packet->workgroup_size_z = staging_aql_packet->workgroup_size_z;
  queue_aql_packet->grid_size_x = staging_aql_packet->grid_size_x;
  queue_aql_packet->grid_size_y = staging_aql_packet->grid_size_y;
  queue_aql_packet->grid_size_z = staging_aql_packet->grid_size_z;
  queue_aql_packet->private_segment_size =
                                     staging_aql_packet->private_segment_size;
  queue_aql_packet->group_segment_size =
                                       staging_aql_packet->group_segment_size;
  queue_aql_packet->kernel_object = staging_aql_packet->kernel_object;
  queue_aql_packet->kernarg_address = staging_aql_packet->kernarg_address;
  queue_aql_packet->completion_signal = staging_aql_packet->completion_signal;

  return queue_aql_packet;
}

void
WriteAQLToQueueLoc(hsa_queue_t *queue, uint64_t indx,
                                      hsa_kernel_dispatch_packet_t *aql_pkt) {
  assert(queue);
  assert(aql_pkt);

  void *queue_base = queue->base_address;
  const uint32_t queue_mask = queue->size - 1;
  hsa_kernel_dispatch_packet_t* queue_aql_packet;

  queue_aql_packet =
       &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
                                                        [indx & queue_mask];

  queue_aql_packet->workgroup_size_x = aql_pkt->workgroup_size_x;
  queue_aql_packet->workgroup_size_y = aql_pkt->workgroup_size_y;
  queue_aql_packet->workgroup_size_z = aql_pkt->workgroup_size_z;
  queue_aql_packet->grid_size_x = aql_pkt->grid_size_x;
  queue_aql_packet->grid_size_y = aql_pkt->grid_size_y;
  queue_aql_packet->grid_size_z = aql_pkt->grid_size_z;
  queue_aql_packet->private_segment_size =
                                     aql_pkt->private_segment_size;
  queue_aql_packet->group_segment_size =
                                       aql_pkt->group_segment_size;
  queue_aql_packet->kernel_object = aql_pkt->kernel_object;
  queue_aql_packet->kernarg_address = aql_pkt->kernarg_address;
  queue_aql_packet->completion_signal = aql_pkt->completion_signal;
}

// Allocate a buffer in the kern_arg_pool for the kernel arguments and write
// the arguments to buffer
hsa_status_t AllocAndSetKernArgs(BaseRocR* test, void* args, size_t arg_size) {
  void* kern_arg_buf = nullptr;
  hsa_status_t err;
  size_t buf_size;
  size_t req_align;
  assert(args != nullptr);
  assert(test != nullptr);

  req_align = test->kernarg_align();
  // Allocate enough extra space for alignment adjustments if ncessary
  buf_size = arg_size + (req_align << 1);

  err = hsa_amd_memory_pool_allocate(test->kern_arg_pool(), buf_size, 0,
                                     reinterpret_cast<void**>(&kern_arg_buf));
  RET_IF_HSA_UTILS_ERR(err);

  test->set_kernarg_buffer(kern_arg_buf);

  void *adj_kern_arg_buf = rocrtst::AlignUp(kern_arg_buf, req_align);

  assert(arg_size >= test->kernarg_size());
  assert(((uintptr_t)adj_kern_arg_buf + arg_size) <
                                        ((uintptr_t)kern_arg_buf + buf_size));

  hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, kern_arg_buf);
  RET_IF_HSA_UTILS_ERR(err);

  err = hsa_memory_copy(adj_kern_arg_buf, args, arg_size);
  RET_IF_HSA_UTILS_ERR(err);

  test->aql().kernarg_address = adj_kern_arg_buf;

  return HSA_STATUS_SUCCESS;
}

#undef RET_IF_HSA_UTILS_ERR

}  // namespace rocrtst


================================================
FILE: rocrtst/common/base_rocr_utils.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_COMMON_BASE_ROCR_UTILS_H_
#define ROCRTST_COMMON_BASE_ROCR_UTILS_H_ 1

/// \file
/// Prototypes of utility functions that act on BaseRocR objects.

#include "common/base_rocr.h"
#include "hsa/hsa.h"

namespace rocrtst {

/// Locate kernel code object file and return path suitable for use with open().
std::string LocateKernelFile(std::string filename, hsa_agent_t agent);

/// Open binary kernel object file and set all member data related to the
/// kernel. Assumes that input test already has the kernel file name,
/// agent name and kernel function specifed
/// \param[in] test Test for which the kernel will be loaded.
/// \param[in] agent for which the kernel will be loaded .
/// \returns HSA_STATUS_SUCCESS if no errors
hsa_status_t LoadKernelFromObjFile(BaseRocR* test, hsa_agent_t* agent);

/// Do initialization tasks for HSA test program.
/// \param[in] test Test to initialize
/// \returns HSA_STATUS_SUCCESS if no errors
hsa_status_t InitAndSetupHSA(BaseRocR* test);

/// Find and set the cpu and gpu agent member variables. Also checks that
/// gpu agent meets test requirements (e.g., FULL profile vs. BASE profile).
hsa_status_t SetDefaultAgents(BaseRocR* test);

/// For the provided device agent, create an AQL queue
/// \param[in] device Device for which a queue is to be created
/// \param[out] queue Address to which created queue pointer will be written
/// \param[in] num_pkts Size of the queue to create
/// \param[in] do_profile [Optional] Specificy whether profiled queue should
///  be created
/// \returns  HSA_STATUS_SUCCESS if no errors encountered
hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
                         uint32_t num_pkts = 0);

/// This function sets some reasonable default values for an AQL packet.
/// Override any field as necessary after calling this function.
/// \param[in] test Test from which information to populate aql packet can
/// be drawn.
/// \param[inout] aql Caller provided pointer to aql packet that will be
/// populated
/// \returns Appropriate hsa_status_t
hsa_status_t InitializeAQLPacket(const BaseRocR* test,
                         hsa_kernel_dispatch_packet_t* aql);

/// This function writes all of the aql packet fields to the queue besides
/// "setup" and "header". This assumes all the aql fields have be set
/// appropriately.
/// \param[in] test Test containing the queue and aql packet to be written.
/// \returns Pointer to dispatch packet in queue that was written to
hsa_kernel_dispatch_packet_t* WriteAQLToQueue(BaseRocR* test, uint64_t *ind);

void WriteAQLToQueueLoc(hsa_queue_t *queue, uint64_t indx,
                                      hsa_kernel_dispatch_packet_t *aql_pkt);
/// This function writes the first 32 bits of an aql packet to the provided
/// aql packet. This function is meant to be called immediately before
/// ringing door_bell signal.
/// \param[in] header Value to be written to header field
/// \param[in] setup Value to be written to setup field
/// \param[in] queue_packet Start address of in queue memory of aql packet to
/// be written
/// \returns void
inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
                                hsa_kernel_dispatch_packet_t* queue_packet) {
  __atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
                                    header | (setup <<16), __ATOMIC_RELEASE);
}

/// Perform common operations to clean up after executing a test. Specifically,
/// hsa_shut_down() is called and environment variables that were changed are
/// reset to their original values.
/// \param[in] test Test for which clean up with be performed
/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
///   error code otherwise.
hsa_status_t CommonCleanUp(BaseRocR* test);

///  Check to see if target machine has the necessary profile to run the
///  provided test.
///  \param[1] test The test that specifies the required profile.
bool CheckProfile(BaseRocR const* test);

/// Allocate memory from the kernel args pool and write the provided argument
/// data to the kernel arg memory. Assumes kern_arg memory pool has been
/// assigned. The amount of memory allocated will actually be \p arg_size
/// plus the alignment required by the kernel arguments. The argument will
/// be written with the proper alignment within the allocated buffer.
/// \p test kernarg_buffer() will point to the allocated buffer, and it should
/// be freed when the kernel is no longer being used.
/// \param test Test from which to find kern_arg pool to write arguments
/// \param args pointer to block of data containing kernel arguments to be
///  written. Arguments are assumed to be of the correct placement, length,
///  and with any padding that is expected by the OpenCL kernel
/// \param arg_size Size of the kernel arg data (including padding) to be
/// written
/// \returns HSA_STATUS_SUCCESS if no errors
hsa_status_t AllocAndSetKernArgs(BaseRocR* test, void* args,
                                 size_t arg_size);

/// Verify that the machine running the test has the required profile.
/// This function will verify that the execution machine meets any specific
/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL).
/// \param[in] test Test that provides profile requirements.
/// \returns bool
///          - true Machine meets test requirements
///          - false Machine does not meet test requirements
bool CheckProfileAndInform(BaseRocR* test);

/// This function will set the cpu and gpu memory pools to the type used in
/// many applications.
/// \param[in] test Test that provides profile requirements.
/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
///   error code otherwise.
hsa_status_t SetPoolsTypical(BaseRocR* test);

/// Work-around for hsa_amd_memory_fill, which is currently broken.
/// \param[in] ptr Pointer to start of memory location to be filled
/// \param[in] value Value to write to each byte of input buffer
/// \param[in] count Size of buffer to fill
/// \param[in] dst_ag Agent owning the buffer to be filled
/// \param[in] src_ag Agent wanting to do the fill
/// \param[in] test Test that has handles to cpu and gpu agents that can own
/// either source or destination of fill
/// \returns HSA_STATUS_OK if not errors
hsa_status_t hsa_memory_fill_workaround_gen(void* ptr, uint32_t value,
      size_t count, hsa_agent_t dst_ag, hsa_agent_t src_ag, BaseRocR* test);
}  // namespace rocrtst
#endif  // ROCRTST_COMMON_BASE_ROCR_UTILS_H_


================================================
FILE: rocrtst/common/common.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/// \file
/// Implementation of utility functions used by RocR applications
#include "common/common.h"
#include <assert.h>
#include <sstream>
#include <string>
#include <memory>

namespace rocrtst {


#define RET_IF_HSA_COMMON_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
              __FILE__ << ". Call returned " << err << std::endl; \
    return (err); \
  } \
}

static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
                                                hsa_device_type_t dev_type) {
  assert(data != nullptr);

  if (data == nullptr) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
                                &hsa_device_type);
  RET_IF_HSA_COMMON_ERR(hsa_error_code);

  if (hsa_device_type == dev_type) {
    *(reinterpret_cast<hsa_agent_t*>(data)) = agent;
    return HSA_STATUS_INFO_BREAK;
  }

  return HSA_STATUS_SUCCESS;
}

// Find CPU Agents
hsa_status_t IterateCPUAgents(hsa_agent_t agent, void *data) {
  hsa_status_t status;
  assert(data != nullptr);
  if (data == nullptr) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  std::vector<hsa_agent_t>* cpus = static_cast<std::vector<hsa_agent_t>*>(data);
  hsa_device_type_t device_type;
  status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
  RET_IF_HSA_COMMON_ERR(status);
  if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_CPU == device_type) {
    cpus->push_back(agent);
  }
  return status;
}


// Find GPU Agents
hsa_status_t IterateGPUAgents(hsa_agent_t agent, void *data) {
  hsa_status_t status;
  assert(data != nullptr);
  if (data == nullptr) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  std::vector<hsa_agent_t>* gpus = static_cast<std::vector<hsa_agent_t>*>(data);
  hsa_device_type_t device_type;
  status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
  RET_IF_HSA_COMMON_ERR(status);
  if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_GPU == device_type) {
    gpus->push_back(agent);
  }
  return status;
}

// Find coarse grained device memory if this exists.  Fine grain otherwise.
hsa_status_t GetGlobalMemoryPool(hsa_amd_memory_pool_t pool, void* data) {
  hsa_amd_segment_t segment;
  hsa_status_t err;
  hsa_amd_memory_pool_t* ret = reinterpret_cast<hsa_amd_memory_pool_t*>(data);

  err = hsa_amd_memory_pool_get_info(pool,
                                         HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                                         &segment);
  RET_IF_HSA_COMMON_ERR(err);
  if (HSA_AMD_SEGMENT_GLOBAL != segment)
    return HSA_STATUS_SUCCESS;

  hsa_amd_memory_pool_global_flag_t flags;
  err = hsa_amd_memory_pool_get_info(pool,
                                        HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
                                        &flags);
  RET_IF_HSA_COMMON_ERR(err);

  // this is valid for dGPUs. But on APUs, it has to be FINE_GRAINED
  if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) {
    *ret = pool;
  } else {  // this is for APUs
    if ((ret == nullptr) && (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED)) {
      *ret = pool;
    }
  }
  return HSA_STATUS_SUCCESS;
}

// Find  a memory pool that can be used for kernarg locations.
hsa_status_t GetKernArgMemoryPool(hsa_amd_memory_pool_t pool, void* data) {
  hsa_status_t err;
  if (nullptr == data) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  hsa_amd_segment_t segment;
  err = hsa_amd_memory_pool_get_info(pool,
                                         HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                                         &segment);
  RET_IF_HSA_COMMON_ERR(err);
  if (HSA_AMD_SEGMENT_GLOBAL != segment) {
    return HSA_STATUS_SUCCESS;
  }

  hsa_amd_memory_pool_global_flag_t flags;
  err = hsa_amd_memory_pool_get_info(pool,
                                         HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
                                         &flags);
  RET_IF_HSA_COMMON_ERR(err);

  if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT) {
    hsa_amd_memory_pool_t* ret =
                                reinterpret_cast<hsa_amd_memory_pool_t*>(data);
    *ret = pool;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
  return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
}

hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
  return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
}

/// Ennumeration that indicates whether a pool property must be present or not.
/// This is meant to be used by FindPool
typedef enum {
  POOL_PROP_OFF = 0,   ///< The property must be present.
  POOL_PROP_ON,        ///< The property must not be present.
  POOL_PROP_DONT_CARE  ///< We don't care if the property is present or not.
} pool_prop_t;

static hsa_status_t
FindPool(hsa_amd_memory_pool_t pool, void* data, hsa_amd_segment_t in_segment,
    pool_prop_t accessible_by_all, pool_prop_t kern_arg,
                                                    pool_prop_t fine_grain) {
  if (nullptr == data) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_status_t err;
  hsa_amd_segment_t segment;
  uint32_t flag;

  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                                     &segment);
  RET_IF_HSA_COMMON_ERR(err);

  if (in_segment != segment) {
    return HSA_STATUS_SUCCESS;
  }

  if (HSA_AMD_SEGMENT_GLOBAL == in_segment) {
    err = hsa_amd_memory_pool_get_info(pool,
                               HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
    RET_IF_HSA_COMMON_ERR(err);

    if (kern_arg != POOL_PROP_DONT_CARE) {
      uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
      if ((karg_st == 0 && kern_arg == POOL_PROP_ON) ||
          (karg_st != 0 && kern_arg == POOL_PROP_OFF)) {
        return HSA_STATUS_SUCCESS;
      }
    }
    if (fine_grain != POOL_PROP_DONT_CARE) {
      uint32_t fg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED;
      if ((fg_st == 0 && fine_grain == POOL_PROP_ON) ||
          (fg_st != 0 && fine_grain == POOL_PROP_OFF)) {
        return HSA_STATUS_SUCCESS;
      }
    }
  }

  if (accessible_by_all != POOL_PROP_DONT_CARE) {
    bool access_read;
    err = hsa_amd_memory_pool_get_info(pool,
          (hsa_amd_memory_pool_info_t)
                    HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_read);
    RET_IF_HSA_COMMON_ERR(err);

    if (((!access_read) && accessible_by_all == POOL_PROP_ON) ||
        (access_read  && (accessible_by_all == POOL_PROP_OFF))) {
      return HSA_STATUS_SUCCESS;
    }
  }

  *(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
  return HSA_STATUS_INFO_BREAK;
}

hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
  return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
                                          POOL_PROP_OFF, POOL_PROP_DONT_CARE);
}

hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
    return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
                                            POOL_PROP_ON, POOL_PROP_DONT_CARE);
}
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
  return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_ON,
                                          POOL_PROP_OFF, POOL_PROP_DONT_CARE);
}

hsa_status_t FindAPUStandardPool(hsa_amd_memory_pool_t pool, void* data) {
  return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
                                          POOL_PROP_DONT_CARE, POOL_PROP_DONT_CARE);
}

// Populate the vector with handles to all agents and pools
hsa_status_t
GetAgentPools(std::vector<std::shared_ptr<agent_pools_t>> *agent_pools) {
  hsa_status_t err;

  assert(agent_pools != nullptr);

  auto save_agent = [](hsa_agent_t a, void *data)->hsa_status_t {
    std::vector<std::shared_ptr<agent_pools_t>> *ag_vec;
    hsa_status_t err;
    assert(data != nullptr);
    ag_vec =
        reinterpret_cast<std::vector<std::shared_ptr<agent_pools_t>> *>(data);
    std::shared_ptr<agent_pools_t> ag(new agent_pools_t);
    ag->agent = a;


    auto save_pool = [](hsa_amd_memory_pool_t p, void *data)->hsa_status_t {
      assert(data != nullptr);
      std::vector<hsa_amd_memory_pool_t> *p_list =
                 reinterpret_cast<std::vector<hsa_amd_memory_pool_t> *>(data);
      p_list->push_back(p);

      return HSA_STATUS_SUCCESS;
    };

    err = hsa_amd_agent_iterate_memory_pools(a, save_pool,
                                        reinterpret_cast<void *>(&ag->pools));
    ag_vec->push_back(ag);
    return err;
  };

  err = hsa_iterate_agents(save_agent, reinterpret_cast<void *>(agent_pools));
  return err;
}

static hsa_status_t MakeGlobalFlagsString(const pool_info_t *pool_i,
                                        std::string* out_str) {
  uint32_t global_flag = pool_i->global_flag;

  assert(out_str != nullptr);

  *out_str = "";

  std::vector < std::string > flags;

  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) {
    flags.push_back("KERNARG");
  }

  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
    flags.push_back("FINE GRAINED");
  }

  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED & global_flag) {
    flags.push_back("EXT-SCOPE FINE GRAINED");
  }

  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) {
    flags.push_back("COARSE GRAINED");
  }

  if (flags.size() > 0) {
    *out_str += flags[0];
  }

  for (size_t i = 1; i < flags.size(); i++) {
    *out_str += ", " + flags[i];
  }

  return HSA_STATUS_SUCCESS;
}
static hsa_status_t DumpSegment(const pool_info_t *pool_i,
                                 std::string const *ind_lvl) {
  hsa_status_t err;

  fprintf(stdout, "%s%-28s", ind_lvl->c_str(), "Pool Segment:");
  std::string seg_str = "";
  std::string tmp_str;

  switch (pool_i->segment) {
    case HSA_AMD_SEGMENT_GLOBAL:
      err = MakeGlobalFlagsString(pool_i, &tmp_str);
      RET_IF_HSA_COMMON_ERR(err);

      seg_str += "GLOBAL; FLAGS: " + tmp_str;
      break;

    case HSA_AMD_SEGMENT_READONLY:
      seg_str += "READONLY";
      break;

    case HSA_AMD_SEGMENT_PRIVATE:
      seg_str += "PRIVATE";
      break;

    case HSA_AMD_SEGMENT_GROUP:
      seg_str += "GROUP";
      break;

    default:
      std::cout << "Not Supported" << std::endl;
      break;
  }

  fprintf(stdout, "%-35s\n", seg_str.c_str());

  return HSA_STATUS_SUCCESS;
}

hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
                                                        pool_info_t *pool_i) {
  hsa_status_t err;

  err = hsa_amd_memory_pool_get_info(pool,
                  HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag);
  RET_IF_HSA_COMMON_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                                                             &pool_i->segment);
  RET_IF_HSA_COMMON_ERR(err);

  // Get the size of the POOL
  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
                                                          &pool_i->size);
  RET_IF_HSA_COMMON_ERR(err);

#ifdef ROCRTST_EMULATOR_BUILD
  // Limit pool sizes to 2 GB on emulator
  const size_t max_pool_size = 2*1024*1024*1024UL;
  pool_i->size = std::min(pool_i->size, max_pool_size);
#endif

  err = hsa_amd_memory_pool_get_info(pool,
             HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
                                                      &pool_i->alloc_allowed);
  RET_IF_HSA_COMMON_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
             HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
                                                      &pool_i->alloc_granule);
  RET_IF_HSA_COMMON_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE,
                                     &pool_i->alloc_rec_granule);
  RET_IF_HSA_COMMON_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
                           HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
                                               &pool_i->alloc_alignment);
  RET_IF_HSA_COMMON_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
                      HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL,
                                                  &pool_i->accessible_by_all);
  RET_IF_HSA_COMMON_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
                       HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                       &pool_i->aggregate_alloc_max);
  RET_IF_HSA_COMMON_ERR(err);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
                                uint32_t indent) {
  std::string ind_lvl(indent, ' ');

  DumpSegment(pool_i, &ind_lvl);

  std::string sz_str = std::to_string(pool_i->size / 1024) + "KB";
  fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Size:",
          sz_str.c_str());

  fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Allocatable:",
          (pool_i->alloc_allowed ? "TRUE" : "FALSE"));

  std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB";
  fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
          gr_str.c_str());

  std::string recgr_str = std::to_string(pool_i->alloc_rec_granule / 1024) + "KB";
  fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(),
          "Pool Alloc Recommended Granule:", recgr_str.c_str());

  std::string al_str =
                   std::to_string(pool_i->alloc_alignment / 1024) + "KB";
  fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
          al_str.c_str());

  fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Acessible by all:",
          (pool_i->accessible_by_all ? "TRUE" : "FALSE"));

  std::string agg_str =
              std::to_string(pool_i->aggregate_alloc_max / 1024) + "KB";
  fprintf(stdout, "%s%-28s%-36s\n", ind_lvl.c_str(), "Pool Aggregate Alloc Size:",
          agg_str.c_str());

  return HSA_STATUS_SUCCESS;
}

static const char* Types[] = {"HSA_EXT_POINTER_TYPE_UNKNOWN",
                              "HSA_EXT_POINTER_TYPE_HSA",
                              "HSA_EXT_POINTER_TYPE_LOCKED",
                              "HSA_EXT_POINTER_TYPE_GRAPHICS",
                              "HSA_EXT_POINTER_TYPE_IPC"
                             };

hsa_status_t DumpPointerInfo(void* ptr) {
  hsa_amd_pointer_info_t info;
  hsa_agent_t* agents;
  uint32_t count;
  hsa_status_t err;

  err = hsa_amd_pointer_info(ptr, &info, malloc, &count, &agents);
  RET_IF_HSA_COMMON_ERR(err);

  std::cout << "Info for ptr: " << ptr << std::endl;
  std::cout << "CPU ptr: " << reinterpret_cast<void*>(info.hostBaseAddress) <<
                                                                     std::endl;
  std::cout << "GPU ptr: " << reinterpret_cast<void*>(info.agentBaseAddress)
                                                                  << std::endl;
  std::cout << "Size: " << info.sizeInBytes << std::endl;
  std::cout << "Type: " << Types[info.type] << std::endl;
  std::cout << "UsrPtr " << reinterpret_cast<void*>(info.userData) <<
                                                                     std::endl;
  std::cout << "Accessible by: ";

  for (uint32_t i = 0; i < count; i++) {
    std::cout << agents[i].handle << " ";
  }

  std::cout << " ;[EOM]" << std::endl;
  free(agents);
  return HSA_STATUS_SUCCESS;
}


/*! \brief Writes to the buffer and increments the write pointer to the
 *         buffer. Also, ensures that the argument is written to an
 *         aligned memory as specified. Return the new write pointer.
 *
 * @param dst The write pointer to the buffer
 * @param src The source pointer
 * @param size The size in bytes to copy
 * @param alignment The alignment to follow while writing to the buffer
 */
#if 0
inline void *
addArg(void * dst, const void* src, size_t size, uint32_t alignment) {
    dst = rocrtst::AlignUp(dst, alignment);
    ::memcpy(dst, src, size);
    return dst + size;
}
#endif
#undef RET_IF_HSA_COMMON_ERR

}  // namespace rocrtst


================================================
FILE: rocrtst/common/common.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/// \file
/// RocR related helper functions for sequeneces that come up frequently

#ifndef ROCRTST_COMMON_COMMON_H_
#define ROCRTST_COMMON_COMMON_H_

#include <stdio.h>
#include <string.h>
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <vector>
#include <memory>

#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

namespace rocrtst {

#if defined(_MSC_VER)
#define ALIGNED_(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif  // __GNUC__
#endif  // _MSC_VER

#define MULTILINE(...) # __VA_ARGS__

// define below should be deleted. Leaving in commented out until code that
// refers to it has been corrected
// #define HSA_ARGUMENT_ALIGN_BYTES 16

// This structure holds memory pool information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
typedef struct pool_info_t_ {
    uint32_t segment;
    size_t size;
    bool alloc_allowed;
    size_t alloc_granule;
    size_t alloc_alignment;
    size_t alloc_rec_granule;
    bool accessible_by_all;
    uint32_t global_flag;
    uint64_t aggregate_alloc_max;
    inline bool operator==(const pool_info_t_ &a) {
      if (a.segment == segment && a.size == size
          && a.alloc_allowed == alloc_allowed
          && a.alloc_granule == alloc_granule
          && a.alloc_rec_granule == alloc_rec_granule
          && a.alloc_alignment == alloc_alignment
          && a.accessible_by_all == accessible_by_all
          && a.aggregate_alloc_max == aggregate_alloc_max
          && a.global_flag == global_flag )
          return true;
      else
          return false;
    }
} pool_info_t;


struct agent_pools_t{
    hsa_agent_t agent;
    std::vector<hsa_amd_memory_pool_t> pools;
};

/// Fill in the pool_info_t structure for the provided pool.
/// \param[in] pool Pool for which information will be retrieved
/// \param[out] pool_i Pointer to structure where pool info will be stored
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t *pool_i);

/// If the provided agent is associated with a GPU, return that agent through
/// output parameter. This function is meant to be the call-back function used
/// with hsa_iterate_agents to find GPU agents.
/// \param[in] agent Agent to evaluate if GPU
/// \param[out] data If agent is associated with a GPU, this pointer will point
///  to the agent upon return
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data);

/// If the provided agent is associated with a CPU, return that agent through
/// output parameter. This function is meant to be the call-back function used
/// with hsa_iterate_agents to find CPU agents.
/// \param[in] agent Agent to evaluate if CPU
/// \param[out] data If agent is associated with a CPU, this pointer will point
///  to the agent upon return
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data);

// TODO(cfreehil): get rid of FindGlobalPool and replace with FindStandardPool
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data);

/// If the provided agent is associated with a CPU, return that agent through
/// output parameter. This function is meant to be the call-back function used
/// with hsa_iterate_agents to find all the CPU agents.
/// \param[in] agent Agent to evaluate if CPU
/// \param[out] data If agent is associated with a CPU, this pointer will point
///  to the agent upon return
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t IterateCPUAgents(hsa_agent_t agent, void *data);

/// If the provided agent is associated with a GPU, return that agent through
/// output parameter. This function is meant to be the call-back function used
/// with hsa_iterate_agents to find  all the GPU agents.
/// \param[in] agent Agent to evaluate if GPU
/// \param[out] data If agent is associated with a GPU, this pointer will point
///  to the agent upon return
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t IterateGPUAgents(hsa_agent_t agent, void *data);

/// Find a GLOBAL memory pool. By this, we mean not a kernel args pool.
/// This function is meant to be the call-back function used
/// with hsa_amd_agent_iterate_memory_pools.
/// \param[in] pool Pool to evaluate for required properties
/// \param[in] data If pool meets criteria, this pointer will point
///  to the pool upon return
/// \returns hsa_status_t
///      -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
///      -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
///      -else return an appropriate error code for any error encountered
hsa_status_t GetGlobalMemoryPool(hsa_amd_memory_pool_t pool, void* data);

/// Find a "kernel arg" pool.
/// This function is meant to be the call-back function used
/// with hsa_amd_agent_iterate_memory_pools.
/// \param[in] pool Pool to evaluate for required properties
/// \param[in] data If pool meets criteria, this pointer will point
///  to the pool upon return
/// \returns hsa_status_t
///      -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
///      -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
///      -else return an appropriate error code for any error encountered
hsa_status_t GetKernArgMemoryPool(hsa_amd_memory_pool_t pool, void* data);


/// Find a "standard" pool. By this, we mean not a kernel args pool.
/// The pool found will have the following properties:
///     HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: Off
///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
/// This function is meant to be the call-back function used
/// with hsa_amd_agent_iterate_memory_pools.
/// \param[in] pool Pool to evaluate for required properties
/// \param[in] data If pool meets criteria, this pointer will point
///  to the pool upon return
/// \returns hsa_status_t
///      -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
///      -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
///      -else return an appropriate error code for any error encountered
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data);
hsa_status_t FindAPUStandardPool(hsa_amd_memory_pool_t pool, void* data);

/// Find a "kernel arg" pool.
/// The pool found will have the following properties:
///     HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: On
///     HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
/// This function is meant to be the call-back function used
/// with hsa_amd_agent_iterate_memory_pools.
/// \param[in] pool Pool to evaluate for required properties
/// \param[in] data If pool meets criteria, this pointer will point
///  to the pool upon return
/// \returns hsa_status_t
///      -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
///      -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
///      -else return an appropriate error code for any error encountered
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data);

/// Dump information about provided memory pool to STDOUT
/// \param[in] pool Pool to gather and dump information for
/// \param[in] indent Number of spaces to indent output.
/// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors
hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
                                                         uint32_t indent = 0);

/// Dump information about a provided pointer to STDOUT.
/// \param[in] ptr Pointer about which information is dumped.
/// \returns HSA_STATUS_SUCCESS if there are no errors
hsa_status_t DumpPointerInfo(void* ptr);

hsa_status_t GetAgentPools(
                    std::vector<std::shared_ptr<agent_pools_t>> *agent_pools);

}  // namespace rocrtst
#endif  // ROCRTST_COMMON_COMMON_H_


================================================
FILE: rocrtst/common/concurrent_utils.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <errno.h>
#include <stdio.h>
#include <signal.h>
#include <cstdlib>
#include "common/concurrent_utils.h"

namespace rocrtst {

/**
 * @brief worker function is invoked by each thread to execute tests
 * Initially, all threads are blocked to wait run_flag. After run_flag being
 * set up, the worker function begin to execute test function and change
 * the status of tests to TEST_RUNNING. After test function finish, the status
 * of tests will be changed to TEST_FINISHED, and worker function will be
 * blocked until run_flag being set up again.
 * @param input Pointer to thread_aux data structure, which contains test
 * function pointer and corresponding args for the test function, and other
 * auxiliary information, including status of test, number of running tests,
 * run_flag, exit_flag, etc.
 */

static void *worker(void *input) {
  func_ptr fun_prt;
  thread_aux* thread = reinterpret_cast<thread_aux*>(input);
  fun_prt = reinterpret_cast<func_ptr>(thread->test->fun_prt);
  int run_flag_l = 0;

  // While loop to repeatedly execute test function
  while (1) {
    pthread_mutex_lock(thread->test_mutex);
    // Blocked to wait run_flag or exit_flag being changed
    while (*thread->run_flag == run_flag_l && *thread->exit_flag == 0) {
      pthread_cond_wait(thread->test_cond, thread->test_mutex);
    }
    pthread_mutex_unlock(thread->test_mutex);

    // Reset run_flag
    run_flag_l = run_flag_l ^ 1;

    // If exit_flag is 0, run test function and set status of the test to
    // TEST_RUNNING
    if (*thread->exit_flag == 0) {
      thread->test->status = TEST_RUNNING;
      fun_prt(thread->test->data);

      // After test function finish, subtract the number of running tests via atomic operations
      // and check the number of running tests, if the number equal to 1,
      // it means all tests are finished, broadcast a signal to the wakeup master
      // thread.
      pthread_mutex_lock(thread->test_mutex);
      (*(thread->num_running_t))--;

      if ((*thread->num_running_t) == 0) {
        pthread_cond_broadcast(thread->test_cond);
      }
      pthread_mutex_unlock(thread->test_mutex);

      // Set status of the test to TEST_STOP
      thread->test->status = TEST_STOP;
    } else {
      // If exit_flag is no-zero, set status of the test to TEST_FINISHED
      thread->test->status = TEST_FINISHED;
      pthread_exit(NULL);
    }
  }
  return NULL;
}

/**
 * @brief create a test_group data structure, initialize variables in
 * the test_group structure, allocate a test_list of group_size and
 * return a pointer to the test_group.
 * @param group_size The size of test group, i.e., the size of test lists
 * @return Pointer to the new test_group
 */
test_group *TestGroupCreate(size_t group_size) {
  test_group *new_group = static_cast<test_group *>(malloc(sizeof(test_group)));
  // initialize variables in the data structure
  new_group->group_size = group_size;
  new_group->n_threads = 0;
  new_group->num_test = 0;
  new_group->run_flag = 0;
  new_group->exit_flag = 0;
  new_group->num_running_t = 0;
  // malloc test_list array with group_size
  new_group->test_list = static_cast<test_aux *>(malloc(sizeof(test_aux) * group_size));

  return new_group;
}

void TestGroupWait(test_group *t_group) {
  pthread_mutex_lock(&t_group->test_mutex);
  while (t_group->num_running_t != 0) {
    pthread_cond_wait(&t_group->test_cond, &t_group->test_mutex);
  }
  pthread_mutex_unlock(&t_group->test_mutex);

  return;
}

void TestGroupAdd(test_group *t_group, func_ptr fun_prt, void *data, size_t num_copy) {
  if (t_group->group_size < (num_copy + t_group->num_test)) {
    fprintf(stderr, "Error beyound group size: %lu, please resize the test_group\n", t_group->group_size);
    return;
  }

  int num_test = t_group->num_test;
  test_aux *test_list = t_group->test_list;
  unsigned int ii;
  for (ii = 0; ii < num_copy; ii++) {
    test_list[num_test + ii].fun_prt = reinterpret_cast<void*>(fun_prt);
    test_list[num_test + ii].data = data;
    test_list[num_test + ii].status = TEST_NOT_STARTED;
  }
  t_group->num_test = num_test + num_copy;

  return;
}

void TestGroupResize(test_group *t_group, size_t new_group_size) {
  if (new_group_size < t_group->group_size) {
    fprintf(stderr, "Error new group_size is smaller than current group_size\n");
  }

  test_aux *new_test_list;
  new_test_list = static_cast<test_aux *>(realloc(t_group->test_list, new_group_size * sizeof(test_aux)));
  t_group->group_size = new_group_size;
  t_group->test_list = new_test_list;

  return;
}

// Create threads for tests
void TestGroupThreadCreate(test_group *t_group) {
  pthread_mutex_init(&(t_group->test_mutex), NULL);
  pthread_cond_init(&(t_group->test_cond), NULL);
  pthread_attr_init(&(t_group->attr));
  pthread_attr_setdetachstate(&(t_group->attr), PTHREAD_CREATE_JOINABLE);

  int n_threads;
  int ii = 0;

  n_threads = t_group->n_threads = t_group->num_test;
  thread_aux *thread_list = t_group->thread_list =
              static_cast<thread_aux *>(malloc(sizeof(thread_aux) * n_threads));
  t_group->tid = static_cast<pthread_t*>(malloc(sizeof(pthread_t) * n_threads));

  for (ii = 0; ii < n_threads; ++ii) {
    // CPU_ZERO(&thread_list[ii].cpuset);
    thread_list[ii].tid = ii;
    thread_list[ii].test = t_group->test_list + ii;
    thread_list[ii].run_flag = &(t_group->run_flag);
    thread_list[ii].exit_flag = &(t_group->exit_flag);
    thread_list[ii].test_mutex = &(t_group->test_mutex);
    thread_list[ii].test_cond = &(t_group->test_cond);
    thread_list[ii].num_running_t = &(t_group->num_running_t);
    int status = pthread_create(t_group->tid + ii, &(t_group->attr), worker, thread_list + ii);

    // Print error statements and break
    if (status != 0) {
      printf("pthread_create return value %d\n", status);
      printf("pthread_create error at idx: %d of %d\n", ii, n_threads);
      perror("pthread_create failed");
      break;
    }
  }

  // Update test group properties to 
  // accommodate thread creation error
  t_group->num_test = ii;
  t_group->n_threads = ii;
  return;
}

// Return number of test
int TestGroupNumTests(test_group *t_group) {
  return t_group->num_test;
}

// Set affinity of the specific test
void TestGroupThreadAffinity(test_group *t_group, int test_id, int cpu_id) {
/*  Setting CPU affinity isn't currently supported.
 *  CPU_SET(cpu_id, &t_group->thread_list[test_id].cpuset);
 *  int status;
 *  status = pthread_setaffinity_np(t_group->tid[test_id],
 *          sizeof(cpu_set_t), &t_group->thread_list[test_id].cpuset);
 *  if (status != 0) {
 *      perror("pthread_setaffinity_np error");
 *  }
 */
  return;
}

// Set run_flag to 1
void TestGroupStart(test_group *t_group) {
  if (t_group->num_running_t != 0) {
    fprintf(stderr, "Error: %d tests are not finished\n", t_group->num_running_t);
    return;
  }

  pthread_mutex_lock(&t_group->test_mutex);
  t_group->run_flag = t_group->run_flag ^ 1;
  t_group->num_running_t = t_group->num_test;
  pthread_cond_broadcast(&t_group->test_cond);
  pthread_mutex_unlock(&t_group->test_mutex);

  return;
}

// Set exit_flag to 1, wait all threads finish and cleanup
void TestGroupExit(test_group *t_group) {
  int ii = 0;
  int status;

  pthread_mutex_lock(&t_group->test_mutex);
  t_group->exit_flag = 1;
  pthread_cond_broadcast(&t_group->test_cond);
  pthread_mutex_unlock(&t_group->test_mutex);

  for (ii = 0; ii < t_group->n_threads; ++ii) {
    status = pthread_join(t_group->tid[ii], 0);
    if (status < 0) {
      perror("pthread_join failed");
      t_group->test_list[ii].status = TEST_ERROR;
    }
  }

  pthread_attr_destroy(&(t_group->attr));
  pthread_mutex_destroy(&(t_group->test_mutex));
  pthread_cond_destroy(&(t_group->test_cond));

  free(t_group->tid);
  free(t_group->thread_list);

  return;
}

void TestGroupKill(test_group *t_group) {
  int ii = 0;
  int status;
  for (ii = 0; ii < t_group->n_threads; ++ii) {
    status = pthread_cancel(t_group->tid[ii]);
    if (status < 0) {
      perror("pthread_cancel failed");
      t_group->test_list[ii].status = TEST_ERROR;
    }
  }

  pthread_attr_destroy(&(t_group->attr));
  pthread_mutex_destroy(&(t_group->test_mutex));
  pthread_cond_destroy(&(t_group->test_cond));

  free(t_group->tid);
  free(t_group->thread_list);

  return;
}

void TestGroupDestroy(test_group *t_group) {
  free(t_group->test_list);
  free(t_group);

  return;
}

int TestGroupTestStatus(test_group *t_group, int test_id) {
  if (test_id >= t_group->n_threads) {
    fprintf(stderr, "test_id: %d is larger than the number of test: %d\n", test_id, t_group->num_test);
  }

  if (t_group->test_list[test_id].status == TEST_RUNNING) {
    if (pthread_kill(t_group->tid[test_id], 0) == ESRCH) {
      t_group->test_list[test_id].status = TEST_ERROR;
    }
  }

  return t_group->test_list[test_id].status;
}

}  // namespace rocrtst


================================================
FILE: rocrtst/common/concurrent_utils.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_COMMON_CONCURRENT_UTILS_H_
#define ROCRTST_COMMON_CONCURRENT_UTILS_H_

#include <pthread.h>
#include <stdint.h>
#include <iostream>


namespace rocrtst {
/**
 * @enum TEST_STATUS
 * @brief This enum lists status of test pthread
 */
enum TEST_STATUS {TEST_NOT_STARTED, TEST_RUNNING,
                  TEST_STOP, TEST_FINISHED, TEST_ERROR};


typedef void (*func_ptr)(void *input);
/**
 * @struct test_aux
 * @brief This structure holds information for a test
 */
struct test_aux{
    // Pointer to the test function
    void *fun_prt;
    // Pointer to the data for the test function
    void *data;
    // status of the test listed in enum TEST_STATUS
    uint16_t status;
};

/**
 * @struct thread_aux
 * @brief This structure holds the data for a test thread.
 */
struct thread_aux {
    // Thread Id
    int tid;
    // Pointer to a test item
    test_aux *test;
    // Pointer to the run_flag shared in the test group
    volatile int *run_flag;
    // Pointer to the exit_flag shared in the test group
    volatile int *exit_flag;
    // Pointer to the pthread mutex shared in the test group
    pthread_mutex_t *test_mutex;
    // Pointer to the pthread condition shared in the test group
    pthread_cond_t *test_cond;
    // Pointer to the number of running tests
    volatile unsigned int *num_running_t;
};

/**
 * @struct test_group
 * @brief This structure holds data for a test group
 */
struct test_group {
    // test group size, i.e., size of test_list array
    size_t group_size;
    // number of test
    int num_test;
    // number of threads - since one test per thread, equal to num_test
    int n_threads;
    // a flag for telling all threads to run - 0: stop, 1: run
    volatile int run_flag;
    // a flag for telling all threads to finish - 1: exit
    volatile int exit_flag;
    // pthread tid
    pthread_t *tid;
    // pthread attr
    pthread_attr_t attr;
    // pthread mutex shared in a group
    pthread_mutex_t test_mutex;
    // pthread condition signal shared in a group
    pthread_cond_t test_cond;
    // the list of test info
    test_aux *test_list;
    // the list of thread info
    thread_aux *thread_list;
    // number of running tests
    volatile unsigned int num_running_t;
};

/**
 * @brief create a test group, and preallocate
 * test_list array with group_size
 * @return initialized struct test_group
 */
test_group* TestGroupCreate(size_t group_size);

/**
 * @brief resize the array of test_list
 * @return
 */
void TestGroupResize(test_group *t_group, size_t new_group_size);

/**
 * @brief add a new test into the specific test group
 * @param t_group Pointer to a test group
 * @param fun Pointer to the test function
 * @param data Pointer to data for the test function
 * @param num_copy Number of copies of the test
 */
void TestGroupAdd(test_group *t_group, func_ptr fun,
                    void *data, size_t num_copy);

/**
 * @brief create threads for tests in a test group
 * @param t_group Pointer to a test group
 */
void TestGroupThreadCreate(test_group *t_group);

/**
 * @brief return the number of tests in a test group
 * @param t_group Pointer to a test group
 */
int TestGroupNumTests(test_group *t_group);

/**
 * @brief run all threads/tests in a test group
 * @param t_group Pointer to a test group
 */
void TestGroupStart(test_group *t_group);

/**
 * @brief wait all threads/tests in a test group finish
 * The function is blocked until all threads are finished
 * @param t_group Pointer to a test group
 */
void TestGroupWait(test_group *t_group);

/**
 * @brief terminate all threads/tests in a test group by sending a signal
 * set exit_flag to 1, wait until all threads are finished
 * @param t_group Pointer to a test group
 */
void TestGroupExit(test_group *t_group);

/**
 * @brief destroy a test group, release all resources
 * @param t_group Pointer to a test group
 */
void TestGroupDestroy(test_group *t_group);

/**
 * @brief check the status of specific test in a test group
 * @param t_group Pointer to a test group
 * @param test_id Test No.
 * @return the status of the test listed in enum TEST_STATUS
 */
int TestGroupTestStatus(test_group *t_group, int test_id);

/**
 * @brief set affinity of the specific test
 * @param t_group Pointer to a test group
 * @param test_id Test No.
 * @param cpu_id CPU No. that the test is binded to
 */
void TestGroupThreadAffinity(test_group *t_group,
                                int test_id, int cpu_id);

/**
 * @brief force kill a test group
 * @param t_group Pointer to a test group
 */
void TestGroupKill(test_group *t_group);
}  // namespace rocrtst
#endif  // ROCRTST_COMMON_CONCURRENT_UTILS_H_


================================================
FILE: rocrtst/common/helper_funcs.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include "common/helper_funcs.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include <assert.h>
#include <cmath>
#include <iostream>
#include <string>
#include <vector>
#include <numeric>

namespace rocrtst {

template<typename T>
void PrintArray(const std::string header, const T* data, const int width,
                const int height) {
  std::cout << std::endl << header << std::endl;

  for (int i = 0; i < height; i++) {
    for (int j = 0; j < width; j++) {
      std::cout << data[i * width + j] << " ";
    }

    std::cout << std::endl;
  }

  std::cout << std::endl;
}

template<typename T>
int FillRandom(T* arrayPtr,
               const int width,
               const int height,
               const T rangeMin,
               const T rangeMax,
               unsigned int seed) {
  if (!arrayPtr) {
    return 1;
  }

  if (!seed) {
    seed = (unsigned int)time(NULL);
  }

  srand(seed);
  double range = static_cast<double>(rangeMax - rangeMin) + 1.0;

  /* random initialisation of input */
  for (int i = 0; i < height; i++) {
    for (int j = 0; j < width; j++) {
      int index = i * width + j;
      arrayPtr[index] = rangeMin + T(range * rand_r(&seed) / (RAND_MAX + 1.0));
    }
  }

  return 0;
}

uint64_t RoundToPowerOf2(uint64_t val) {
  val--;
  /*
   * Shift with amount larger than the bit width can result in
   * undefined behavior by compiler for release builds.
   * Shift till 32 bit only which is less than bit width of val.
   */
  for (int i = 1; i <= 32; i *= 2) val |= val >> i;

  val++;
  return val;
}

bool IsPowerOf2(uint64_t val) {
  uint64_t tmp = val;

  if ((tmp & (-tmp)) - tmp == 0 && tmp != 0) {
    return true;
  } else {
    return false;
  }
}

bool
Compare(const float* refData, const float* data,
        const int length, const float epsilon) {
  float error = 0.0f;
  float ref = 0.0f;

  for (int i = 1; i < length; ++i) {
    float diff = refData[i] - data[i];
    error += diff * diff;
    ref += refData[i] * refData[i];
  }

  float normRef =::sqrtf(static_cast<float>(ref));

  if (::fabs(static_cast<float>(ref)) < 1e-7f) {
    return false;
  }

  float normError = ::sqrtf(static_cast<float>(error));
  error = normError / normRef;

  return error < epsilon;
}

bool
Compare(const double* refData, const double* data,
        const int length, const double epsilon) {
  double error = 0.0;
  double ref = 0.0;

  for (int i = 1; i < length; ++i) {
    double diff = refData[i] - data[i];
    error += diff * diff;
    ref += refData[i] * refData[i];
  }

  double normRef =::sqrt(static_cast<double>(ref));

  if (::fabs(static_cast<double>(ref)) < 1e-7) {
    return false;
  }

  double normError = ::sqrt(static_cast<double>(error));
  error = normError / normRef;

  return error < epsilon;
}

intptr_t
AlignDown(intptr_t value, size_t alignment) {
    assert(alignment != 0 && "Zero alignment");
    return (intptr_t) (value & ~(alignment - 1));
}

void *
AlignDown(void* value, size_t alignment) {
    return reinterpret_cast<void*>(AlignDown(
                              reinterpret_cast<uintptr_t>(value), alignment));
}

void *
AlignUp(void* value, size_t alignment) {
    return reinterpret_cast<void*>(
     AlignDown((uintptr_t)(reinterpret_cast<uintptr_t>(value) + alignment - 1),
                                                                   alignment));
}

double CalcMedian(const std::vector<double> &scores) {
  double median;
  size_t size = scores.size();

  if (size % 2 == 0) {
    median = (scores[size / 2 - 1] + scores[size / 2]) / 2;
  } else {
    median = scores[size / 2];
  }

  return median;
}

double CalcMean(const std::vector<double> &scores) {
  double mean;

  mean = std::accumulate(scores.begin(), scores.end(), 0.0);
  return mean/scores.size();
}

double CalcMean(const std::vector<double>& v1, const std::vector<double>& v2) {
  double mean = 0;
  size_t size = v1.size();

  for (size_t i = 0; i < size; i++) {
    mean += v2[i] - v1[i];
  }

  return mean / size;
}

double CalcStdDeviation(std::vector<double> scores, int score_mean) {
  double ret = 0.0;

  for (size_t i = 0; i < scores.size(); ++i) {
    ret += (scores[i] - score_mean) * (scores[i] - score_mean);
  }

  ret /= scores.size();

  return sqrt(ret);
}

/////////////////////////////////////////////////////////////////
// Template Instantiations
/////////////////////////////////////////////////////////////////

template
void PrintArray<uint32_t>(const std::string, const unsigned int*, int, int);

template
void PrintArray<float>(const std::string, const float*, int, int);

template
int FillRandom<uint32_t>(uint32_t* arrayPtr,
                         const int width, const int height,
                         uint32_t rangeMin, uint32_t rangeMax,
                                                           unsigned int seed);

template
int FillRandom<float>(float* arrayPtr,
                      const int width, const int height,
                      float rangeMin, float rangeMax, unsigned int seed);

}  // namespace rocrtst


================================================
FILE: rocrtst/common/helper_funcs.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_COMMON_HELPER_FUNCS_H_
#define ROCRTST_COMMON_HELPER_FUNCS_H_

/// \file
/// General-purpose helper functions

#include <string>
#include <vector>
#include <stdint.h>

#if defined(__GNUC__)
#define __forceinline __inline__ __attribute__((always_inline))
#endif

#define STRING2(x) #x
#define STRING(x) STRING2(x)

#define PASTE2(x, y) x##y
#define PASTE(x, y) PASTE2(x, y)

namespace rocrtst {

bool Compare(const float* refData, const float* data,
             const int length, const float epsilon = 1e-6f);
bool Compare(const double* refData, const double* data,
             const int length, const double epsilon = 1e-6);

/// Calculate the mean number of the vector
double CalcMean(const std::vector<double> &scores);

/// Calculate the mean time of difference of the two vectors
double CalcMean(const std::vector<double>& v1, const std::vector<double>& v2);

/// Return the median value of a vector of doubles
/// \param[in] scores Vector of doubles
/// \returns double Median value of provided vector
double CalcMedian(const std::vector<double> &scores);

/// Calculate the standard deviation of the vector
double CalcStdDeviation(std::vector<double> scores, int score_mean);

/// Display an array to std::out
template<typename T>
void PrintArray(
  const std::string header,
  const T* data,
  const int width,
  const int height);

/// Fill an array with random values
template<typename T>
int FillRandom(
  T* arrayPtr,
  const int width,
  const int height,
  const T rangeMin,
  const T rangeMax,
  unsigned int seed = 123);

intptr_t AlignDown(intptr_t value, size_t alignment);
void* AlignDown(void* value, size_t alignment);
void* AlignUp(void* value, size_t alignment);

/// Rounds to a power of 2
uint64_t RoundToPowerOf2(uint64_t val);

///  Checks if a value is a power of 2
bool IsPowerOf2(uint64_t val);

// Count set bits.
static __forceinline uint32_t popcount(uint32_t value) {
  return __builtin_popcount(value);
}

template <typename lambda>
class ScopeGuard {
 public:
  explicit __forceinline ScopeGuard(const lambda& release)
      : release_(release), dismiss_(false) {}

  ScopeGuard(const ScopeGuard& rhs) {*this = rhs; }

  __forceinline ~ScopeGuard() {
    if (!dismiss_) release_();
  }
  __forceinline ScopeGuard& operator=(ScopeGuard& rhs) {
    dismiss_ = rhs.dismiss_;
    release_ = rhs.release_;
    rhs.dismiss_ = true;
  }
  __forceinline void Dismiss() { dismiss_ = true; }

 private:
  lambda release_;
  bool dismiss_;
};

template <typename lambda>
static __forceinline ScopeGuard<lambda> MakeScopeGuard(lambda rel) {
  return ScopeGuard<lambda>(rel);
}

#define MAKE_SCOPE_GUARD_HELPER(lname, sname, ...) \
  auto lname = __VA_ARGS__;                        \
  rocrtst::ScopeGuard<decltype(lname)> sname(lname);
#define MAKE_SCOPE_GUARD(...)                                   \
  MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), \
                          PASTE(scopeGuard, __COUNTER__), __VA_ARGS__)
#define MAKE_NAMED_SCOPE_GUARD(name, ...)                             \
  MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), name, \
                          __VA_ARGS__)

#define ASSERT_SUCCESS(_val) ASSERT_EQ(HSA_STATUS_SUCCESS, (_val))

#define ARRAY_SIZE(_x) (sizeof(_x) / sizeof(_x[0]))

}  // namespace rocrtst
#endif  //  ROCRTST_COMMON_HELPER_FUNCS_H_


================================================
FILE: rocrtst/common/hsatimer.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include "common/hsatimer.h"
#include <x86intrin.h>

namespace rocrtst {

static const uint64_t kNanosecondsPerSecond = 1000000000;

PerfTimer::PerfTimer(void) {
  freq_in_100mhz = MeasureTSCFreqHz();
}

PerfTimer::~PerfTimer() {
  while (!_timers.empty()) {
    Timer* temp = _timers.back();
    _timers.pop_back();
    delete temp;
  }
}

int PerfTimer::CreateTimer(void) {
  Timer* newTimer = new Timer;
  newTimer->_start = 0;
  newTimer->_clocks = 0;

  newTimer->_freq = kNanosecondsPerSecond;

  /* Push back the address of new Timer instance created */
  _timers.push_back(newTimer);
  return static_cast<int>(_timers.size() - 1);
}

int PerfTimer::StartTimer(int index) {
  if (index >= static_cast<int>(_timers.size())) {
    Error("Cannot reset timer. Invalid handle.");
    return 1;
  }

// General Linux timing method
#ifndef _AMD
  struct timespec s;
  clock_gettime(CLOCK_MONOTONIC, &s);
  _timers[index]->_start = (uint64_t) s.tv_sec * kNanosecondsPerSecond
                           + (uint64_t) s.tv_nsec;
#else

  // AMD timing method

  unsigned int unused;
  _timers[index]->_start = __rdtscp(&unused);

#endif

  return 0;
}

int PerfTimer::StopTimer(int index) {
  uint64_t n = 0;

  if (index >= static_cast<int>(_timers.size())) {
    Error("Cannot reset timer. Invalid handle.");
    return 1;
  }

  // General Linux timing method
#ifndef _AMD
  struct timespec s;
  clock_gettime(CLOCK_MONOTONIC, &s);
  n = (uint64_t) s.tv_sec * kNanosecondsPerSecond + (uint64_t) s.tv_nsec;
#else
  // AMD Linux timing

  unsigned int unused;
  n = __rdtscp(&unused);
#endif

  n -= _timers[index]->_start;
  _timers[index]->_start = 0;

#ifndef _AMD
  _timers[index]->_clocks += n;
#else
  // convert to ms
  _timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz;
  cout << "_AMD is enabled!!!" << endl;
#endif

  return 0;
}

void PerfTimer::Error(std::string str) {
  std::cout << str << std::endl;
}

double PerfTimer::ReadTimer(int index) {
  if (index >= static_cast<int>(_timers.size())) {
    Error("Cannot read timer. Invalid handle.");
    return 1;
  }

  double reading = static_cast<double>(_timers[index]->_clocks);

  reading = static_cast<double>(reading / _timers[index]->_freq);

  return reading;
}

void PerfTimer::ResetTimer(int index) {
  // Check if index value is over the timer's size
  if (index >= static_cast<int>(_timers.size())) {
    Error("Invalid index value\n");
    exit(1);
  }

  _timers[index]->_clocks = 0.0;
  _timers[index]->_start = 0.0;
}

uint64_t PerfTimer::CoarseTimestampUs() {
  struct timespec ts;
  clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
  return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
}

uint64_t PerfTimer::MeasureTSCFreqHz() {
  // Make a coarse interval measurement of TSC ticks for 1 gigacycles.
  unsigned int unused;
  uint64_t tscTicksEnd;

  uint64_t coarseBeginUs = CoarseTimestampUs();
  uint64_t tscTicksBegin = __rdtscp(&unused);

  do {
    tscTicksEnd = __rdtscp(&unused);
  } while (tscTicksEnd - tscTicksBegin < 1000000000);

  uint64_t coarseEndUs = CoarseTimestampUs();

  // Compute the TSC frequency and round to nearest 100MHz.
  uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
  uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
  return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}

}  // namespace rocrtst


================================================
FILE: rocrtst/common/hsatimer.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_COMMON_HSATIMER_H_
#define ROCRTST_COMMON_HSATIMER_H_

#include <stdint.h>
#include <iostream>
#include <vector>
#include <string>
/// \file
/// Timer related class.

namespace rocrtst {

class PerfTimer {
 private:
  struct Timer {
    std::string name; /* < name name of time object*/
    uint64_t _freq; /* < _freq frequency*/
    uint64_t _clocks; /* < _clocks number of ticks at end*/
    uint64_t _start; /* < _start start point ticks*/
  };

  std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
  double freq_in_100mhz;

 public:
  PerfTimer(void);
  ~PerfTimer(void);

  /// Create a new timer.
  /// \returns A new timer instantance index
  int CreateTimer(void);

  /// Start the timer associated with the given index
  /// \param[in] index Index of the timer to start
  /// \returns int 0 for success, non-zero otherwise
  int StartTimer(int index);

  /// Stop the timer associated with the given index
  /// \param[in] Index Index of the timer to stop
  /// \returns int 0 for success, non-zero otherwise
  int StopTimer(int index);

  /// Reset the timer to 0
  /// param[in] Index of the timer to reset
  /// \returns void
  void ResetTimer(int index);

  /// Read the time value of the timer associated with the provided index.
  /// Units are seconds
  /// \param[in] index Index of the timer to read
  /// \returns double Value of the timer
  double ReadTimer(int index);

 private:
  void Error(std::string str);
  uint64_t CoarseTimestampUs();
  uint64_t MeasureTSCFreqHz();
};

}  // namespace rocrtst
#endif  // ROCRTST_COMMON_HSATIMER_H_


================================================
FILE: rocrtst/common/os.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include "common/os.h"
#include <stdlib.h>

namespace rocrtst {

void SetEnv(const char* env_var_name, const char* env_var_value) {
  int err = setenv(env_var_name, env_var_value, 1);

  if (0 != err) {
    printf("Set environment variable failed!\n");
    exit(1);
  }

  return;
}

char* GetEnv(const char* env_var_name) {
  return getenv(env_var_name);
}

}  // namespace rocrtst


================================================
FILE: rocrtst/common/os.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/// \file OS specific functionality

#ifndef ROCRTST_COMMON_OS_H_
#define ROCRTST_COMMON_OS_H_

#include <stdio.h>
namespace rocrtst {

/// Set envriroment variable.
/// \param[in] env_var_name Environment variable to set.
/// \param[in] env_var_value Value to set environment variable to.
/// \returns void
void SetEnv(const char* env_var_name, const char* env_var_value);

/// Get envriroment variable.
/// \param[in] env_var_name Environment variable to get.
/// \returns Pointer to string of characters that is the value of the
///  environment variable.
char* GetEnv(const char* env_var_name);

}  // namespace rocrtst
#endif  // ROCRTST_COMMON_OS_H_


================================================
FILE: rocrtst/common/rocr.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2021-2021, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include "common/rocr.h"
System System::sys;

bool DeviceDiscovery(System& devices) {
  hsa_status_t err;

  err = hsa_iterate_agents([](hsa_agent_t agent, void* data) {
    hsa_status_t err;

    System* devices = (System*)data;

    Device dev;
    dev.agent = agent;

    dev.fine = -1u;
    dev.coarse = -1u;

    hsa_device_type_t type;
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
    CHECK(err);

    err = hsa_amd_agent_iterate_memory_pools(agent, [](hsa_amd_memory_pool_t pool, void* data) {
      std::vector<Device::Memory>& pools = *reinterpret_cast<std::vector<Device::Memory>*>(data);
      hsa_status_t err;

      hsa_amd_segment_t segment;
      err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
      CHECK(err);

      if(segment != HSA_AMD_SEGMENT_GLOBAL)
        return HSA_STATUS_SUCCESS;

      uint32_t flags;
      err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
      CHECK(err);

      Device::Memory mem;
      mem.pool=pool;
      mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED);
      mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT);

      err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size);
      CHECK(err);

      err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &mem.granule);
      CHECK(err);

      pools.push_back(mem);
      return HSA_STATUS_SUCCESS;
    }, (void*)&dev.pools);

    if(!dev.pools.empty()) {
      for(size_t i=0; i<dev.pools.size(); i++) {
        if(dev.pools[i].fine && dev.pools[i].kernarg && dev.fine==-1u)
          dev.fine = i;
        if(dev.pools[i].fine && !dev.pools[i].kernarg)
          dev.fine = i;
        if(!dev.pools[i].fine)
          dev.coarse = i;
      }

      if(type == HSA_DEVICE_TYPE_CPU)
        devices->cpu_.push_back(dev);
      else
        devices->gpu_.push_back(dev);

      devices->all_devices_.push_back(dev.agent);
    }

    return HSA_STATUS_SUCCESS;
  }, &devices);

  [&]() {
    for(auto& dev : devices.cpu_) {
      for(auto& mem : dev.pools) {
        if(mem.fine && mem.kernarg) {
          devices.kernarg_ = mem;
          return;
        }
      }
    }
  }();

  if(devices.cpu_.empty() || devices.gpu_.empty() || devices.kernarg_.pool.handle == 0)
    return false;
  return true;
}

void System::Init() {
  hsa_status_t err = hsa_init();
  CHECK(err);

  DeviceDiscovery(sys);
}

void System::Shutdown() {
  sys.~System();
  new (&sys) System();
  hsa_status_t err = hsa_shut_down();
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  err = hsa_shut_down();
  EXPECT_EQ(HSA_STATUS_ERROR_NOT_INITIALIZED, err);
}

CodeObject::CodeObject(std::string filename, Device& agent) : agent(agent.agent) {
  hsa_status_t err;

  file = open(filename.c_str(), O_RDONLY);
  if(file == -1) {
    throw std::runtime_error("Could not open file.\n");
  }
  MAKE_NAMED_SCOPE_GUARD(fileGuard, [&](){ close(file); });

  err = hsa_code_object_reader_create_from_file(file, &code_obj_rdr);
  CHECK(err);
  MAKE_NAMED_SCOPE_GUARD(readerGuard, [&](){ hsa_code_object_reader_destroy(code_obj_rdr); });
  
  err = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr, &executable);
  CHECK(err);
  MAKE_NAMED_SCOPE_GUARD(exeGuard, [&](){ hsa_executable_destroy(executable); });

  err = hsa_executable_load_agent_code_object(executable, agent.agent, code_obj_rdr, nullptr, nullptr);
  CHECK(err);

  err = hsa_executable_freeze(executable, nullptr);
  CHECK(err);

  exeGuard.Dismiss();
  readerGuard.Dismiss();
  fileGuard.Dismiss();
}

CodeObject::~CodeObject() {
  hsa_executable_destroy(executable);
  hsa_code_object_reader_destroy(code_obj_rdr);
  close(file);
}

bool CodeObject::GetKernel(std::string name, Kernel& kern) {
  hsa_executable_symbol_t symbol;
  hsa_status_t err = hsa_executable_get_symbol_by_name(executable, name.c_str(), &agent, &symbol);
  if(err != HSA_STATUS_SUCCESS) {
    err = hsa_executable_get_symbol_by_name(executable, (name+".kd").c_str(), &agent, &symbol);
    if(err != HSA_STATUS_SUCCESS) {
      return false;
    }
  }

  err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kern.handle);
  CHECK(err);

  err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kern.scratch);
  CHECK(err);
  //printf("Scratch: %d\n", kern.scratch);

  err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &kern.group);
  CHECK(err);
  //printf("LDS: %d\n", kern.group);
  
  // Remaining needs code object v2 or comgr.
  err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kern.kernarg_size);
  CHECK(err);
  //printf("Kernarg Size: %d\n", kern.kernarg_size);

  err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kern.kernarg_align);
  CHECK(err);
  //printf("Kernarg Align: %d\n", kern.kernarg_align);

  return true;
}

// Not for parallel insertion.
bool SubmitPacket(hsa_queue_t* queue, Aql& pkt) {
  size_t mask = queue->size - 1;
  Aql* ring = (Aql*)queue->base_address;

  uint64_t write = hsa_queue_load_write_index_relaxed(queue);
  uint64_t read = hsa_queue_load_read_index_relaxed(queue);
  //if(write - read + 1 > queue->size)
  //  return false;
  
  Aql& dst = ring[write & mask];

  uint16_t header = pkt.header.raw;
  pkt.header.raw = dst.header.raw;
  dst = pkt;
  __atomic_store_n(&dst.header.raw, header, __ATOMIC_RELEASE);
  pkt.header.raw = header;

  hsa_queue_store_write_index_release(queue, write+1);
  hsa_signal_store_screlease(queue->doorbell_signal, write);

  return true;
}

void* hsaMalloc(size_t size, const Device::Memory& mem) {
  void* ret;
  hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret);
  CHECK(err);
  err = hsa_amd_agents_allow_access(System::all_devices().size(), &System::all_devices()[0], nullptr, ret);
  CHECK(err);
  return ret;
}

void* hsaMalloc(size_t size, const Device& dev, bool fine) {
  uint32_t index = fine ? dev.fine : dev.coarse;
  assert(index != -1u && "Memory type unavailable.");
  return hsaMalloc(size, dev.pools[index]);
}


================================================
FILE: rocrtst/common/rocr.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2021-2021, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/hsa_ext_image.h"

#include "common/helper_funcs.h"

#include "gtest/gtest.h"

#include <fcntl.h>
#include <assert.h>
#include "string.h"

#include <vector>

#define CHECK(err) [&](){                         \
    if(err != HSA_STATUS_SUCCESS) {               \
      EXPECT_EQ(HSA_STATUS_SUCCESS, err);         \
      throw std::runtime_error("CHECK failure."); \
    }                                             \
  }();

struct Device {
  struct Memory {
    hsa_amd_memory_pool_t pool;
    bool fine;
    bool kernarg;
    size_t size;
    size_t granule;
  };

  hsa_agent_t agent;
  std::vector<Memory> pools;
  uint32_t fine;
  uint32_t coarse;
};

struct Kernel {
  uint64_t handle;
  uint32_t scratch;
  uint32_t group;
  uint32_t kernarg_size;
  uint32_t kernarg_align;
};

// Assumes bitfield layout is little endian.
// Assumes std::atomic<uint16_t> is binary compatible with uint16_t and uses HW atomics.
union AqlHeader {
  struct {
    uint16_t type     : 8;
    uint16_t barrier  : 1;
    uint16_t acquire  : 2;
    uint16_t release  : 2;
    uint16_t reserved : 3;
  };
  uint16_t raw;
};

struct BarrierValue {
  AqlHeader header;
  uint8_t AmdFormat;
  uint8_t reserved;
  uint32_t reserved1;
  hsa_signal_t signal;
  hsa_signal_value_t value;
  hsa_signal_value_t mask;
  uint32_t cond;
  uint32_t reserved2;
  uint64_t reserved3;
  uint64_t reserved4;
  hsa_signal_t completion_signal;
};

union Aql {
  AqlHeader header;
  hsa_kernel_dispatch_packet_t dispatch;
  hsa_barrier_and_packet_t barrier_and;
  hsa_barrier_or_packet_t barrier_or;
  BarrierValue barrier_value;
};

struct OCLHiddenArgs {
  uint64_t offset_x;
  uint64_t offset_y;
  uint64_t offset_z;
  void* printf_buffer;
  void* enqueue;
  void* enqueue2;
  void* multi_grid;
};

struct hip_hiddens {
  uint64_t offset_x;
  uint64_t offset_y;
  uint64_t offset_z;
  uint64_t _;
  uint64_t _2;
  uint64_t _3;
  uint64_t multi_grid_sync;
};

class System {
public:
  std::vector<Device> cpu_, gpu_;
  std::vector<hsa_agent_t> all_devices_;
  Device::Memory kernarg_;

  static void Init();
  static void Shutdown();
  static std::vector<Device>& cpu() { return sys.cpu_; }
  static std::vector<Device>& gpu() { return sys.gpu_; }
  static std::vector<hsa_agent_t>& all_devices() { return sys.all_devices_; }
  static Device::Memory& kernarg() { return sys.kernarg_; }
  static System sys;
};

class CodeObject {
public:
  CodeObject(std::string filename, Device& agent);
  ~CodeObject();
  bool GetKernel(std::string name, Kernel& kernel);
private:
  hsa_file_t file;
  hsa_code_object_reader_t code_obj_rdr;
  hsa_executable_t executable;
  hsa_agent_t agent;
};

// Not for parallel insertion.
bool SubmitPacket(hsa_queue_t* queue, Aql& pkt);

void* hsaMalloc(size_t size, const Device::Memory& mem);
void* hsaMalloc(size_t size, const Device& dev, bool fine);


================================================
FILE: rocrtst/common/utils_test/CMakeLists.txt
================================================
#
# Source files for Tests verifying rocrtst Utils library
#
set (rocrtstUtilsTestSrcs utils_timer_gtest.cpp)
set (rocrtstUtilsTestSrcs ${rocrtstUtilsTestSrcs} utils_timer_test.cpp)
set (rocrtstUtilsTestSrcs ${rocrtstUtilsTestSrcs} utils_cpp11_gtest.cpp)

#
# Header files include path(s).
#
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/utils)
include_directories(${PROJECT_SOURCE_DIR}/gtest/include)

#
# Build rule to build an executable object
#
add_executable(${ROCRTST_UTIL_TEST_NAME} ${rocrtstUtilsTestSrcs})

#
# Link unresolved symbols of rocrtst Utils Test executable
#
target_link_libraries(${ROCRTST_UTIL_TEST_NAME} ${ROCRTST_LIBS} elf c stdc++ dl pthread rt)

#
# Install build artifacts into one common location
#
INSTALL(TARGETS ${ROCRTST_UTIL_TEST_NAME}
        ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
        LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
        RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)


================================================
FILE: rocrtst/common/utils_test/utils_cpp11_gtest.cpp
================================================
#include<iostream>
#include<thread>
#include"gtest/gtest.h"

using std::cout;
using std::endl;

// @Brief: this function is defined to be executed for thread #1
static void ThreadEntry1() {
  cout << "The first thread is launched!" << endl;
  return;
}
// @Brief: this function is defined to be executed for thread #2
static void ThreadEntry2() {
  cout << "The second thread is launched!" << endl;
  return;
}

// @Brief: google test case added for basic C++11 thread feature.
// Here, in main function, it will create two threas objects, then,
// check if each thread are joinable, if so, main thread wait until
// the spawned threads finish.
TEST(rocrtstCpp11Feature, BasicThread) {
  // Define two threads object;
  std::thread thread1;
  std::thread thread2;

  // At this point, it should be non-joinable
  ASSERT_EQ(false, thread1.joinable());
  ASSERT_EQ(false, thread2.joinable());

  // Assign execution codes to threads;
  thread1 = std::thread(ThreadEntry1);
  thread2 = std::thread(ThreadEntry2);

  // Now, the two threads should be joinable
  ASSERT_EQ(true, thread1.joinable());
  ASSERT_EQ(true, thread2.joinable());

  // Join the two threads until they finish
  thread1.join();
  thread2.join();

  // When execution flow reaches here, it succeed.
  cout << "Done!" << endl;
}


================================================
FILE: rocrtst/common/utils_test/utils_timer_gtest.cpp
================================================


#include <iostream>

#include "gtest/gtest.h"

#include "utils_timer_test.hpp"

using namespace std;

class rocrtstUtilsTimerGtest : public ::testing::Test {

 protected:

  // No argument constructor called from Google Test Framework
  rocrtstUtilsTimerGtest() { };

};

TEST_F(rocrtstUtilsTimerGtest, TestingTimer101) {

  // Create a Hsa Perf Utils Timer Test object.
  // The test will iterate 108 times with sleep
  // time of 3 milliseconds per iteration
  rocrtstUtilsTimerTest* timer = new rocrtstUtilsTimerTest(108, 3);

  // Let the timer object collect data
  timer->run();

  // Print the statistics of timer object
  timer->print();
}


================================================
FILE: rocrtst/common/utils_test/utils_timer_test.cpp
================================================

#include <iostream>
#include "hsatimer.h"
#include <unistd.h>
#include "utils_timer_test.hpp"

using namespace std;


// Destructor method of test driver
rocrtstUtilsTimerTest::~rocrtstUtilsTimerTest() { }

// Constructor method of test driver
//
// @brief loopCnt number of times to call sleep Api
//
// @brief sleepTimer time to sleep in milliseconds
rocrtstUtilsTimerTest::rocrtstUtilsTimerTest(uint32_t loopCnt, uint32_t sleepTime) :
  loopCnt_(loopCnt), sleepTime_(sleepTime), total_time_(0) { }

// Execute user defined number of sleep calls and collect the
// total time taken by such calls
void rocrtstUtilsTimerTest::run() {

  double time;
  PerfTimer timer;
  uint32_t index = timer.CreateTimer();

  for (uint32_t idx; idx < loopCnt_; idx++) {

    timer.StartTimer(index);
    usleep(sleepTime_);
    timer.StopTimer(index);
    time = timer.ReadTimer(index);
    total_time_ += time;
  }
}

// Print time reported by Hsa Perf Utils Timer service
void rocrtstUtilsTimerTest::print() {

  std::cout << "Time taken by " << loopCnt_;
  std::cout << " iterations of sleep is: " << total_time_ << std::endl;
}


================================================
FILE: rocrtst/common/utils_test/utils_timer_test.hpp
================================================
#ifndef ROCRTST_UTILS_TIMER_TEST_H_
#define ROCRTST_UTILS_TIMER_TEST_H_

// Encapsulates Api's to access Timer service of rocrtst Utils library
class rocrtstUtilsTimerTest {

 public:

  // Destructor method of test driver
  ~rocrtstUtilsTimerTest();

  // Constructor method of test driver
  //
  // @brief loopCnt number of times to call sleep Api
  //
  // @brief sleepTimer time to sleep in milliseconds
  rocrtstUtilsTimerTest(uint32_t loopCnt, uint32_t sleepTime);

  // Execute user defined number of sleep calls and collect the
  // total time taken by such calls
  void run();

  // Print time reported by rocrtst Utils Timer service
  void print();

 private:

  // Number of times to invoke sleep Api
  uint32_t loopCnt_;

  // Time to sleep per cycle, in milliseconds
  uint32_t sleepTime_;

  // Time taken by sleep Api
  double total_time_;
};

#endif


================================================
FILE: rocrtst/gtest/CMakeLists.txt
================================================
#
# Source files for Google Test Framework
#
set (gtFrwkSrcs src/gtest.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest-port.cpp)
set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-printers.cpp)
set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-filepath.cpp)
set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-test-part.cpp)
set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-typed-test.cpp)
set (gtFrwkSrcs	${gtFrwkSrcs} src/gtest-death-test.cpp)
set (gtFrwkSrcs ${gtFrwkSrcs} src/gtest_main.cpp)

#
# Header files include path(s).
#
include_directories(include)
include_directories(include/gtest)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

#
# Build Google Test Framework as a Static Library object
#
add_library(${GOOGLE_TEST_FRWK_NAME} STATIC ${gtFrwkSrcs})

#
# Install build artifacts into one common location
#
INSTALL(TARGETS ${GOOGLE_TEST_FRWK_NAME}
        ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
        LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
        RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)


================================================
FILE: rocrtst/gtest/include/gtest/gtest-death-test.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the public API for death tests.  It is
// #included by gtest.h so a user doesn't need to include this
// directly.

#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_

#include "gtest/internal/gtest-death-test-internal.h"

namespace testing {

// This flag controls the style of death tests.  Valid values are "threadsafe",
// meaning that the death test child process will re-execute the test binary
// from the start, running only a single death test, or "fast",
// meaning that the child process will execute the test logic immediately
// after forking.
GTEST_DECLARE_string_(death_test_style);

#if GTEST_HAS_DEATH_TEST

namespace internal {

// Returns a Boolean value indicating whether the caller is currently
// executing in the context of the death test child process.  Tools such as
// Valgrind heap checkers may need this to modify their behavior in death
// tests.  IMPORTANT: This is an internal utility.  Using it may break the
// implementation of death tests.  User code MUST NOT use it.
GTEST_API_ bool InDeathTestChild();

}  // namespace internal

// The following macros are useful for writing death tests.

// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
// executed:
//
//   1. It generates a warning if there is more than one active
//   thread.  This is because it's safe to fork() or clone() only
//   when there is a single thread.
//
//   2. The parent process clone()s a sub-process and runs the death
//   test in it; the sub-process exits with code 0 at the end of the
//   death test, if it hasn't exited already.
//
//   3. The parent process waits for the sub-process to terminate.
//
//   4. The parent process checks the exit code and error message of
//   the sub-process.
//
// Examples:
//
//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
//   for (int i = 0; i < 5; i++) {
//     EXPECT_DEATH(server.ProcessRequest(i),
//                  "Invalid request .* in ProcessRequest()")
//                  << "Failed to die on request " << i;
//   }
//
//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
//
//   bool KilledBySIGHUP(int exit_code) {
//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
//   }
//
//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
//
// On the regular expressions used in death tests:
//
//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
//   which uses the POSIX extended regex syntax.
//
//   On other platforms (e.g. Windows), we only support a simple regex
//   syntax implemented as part of Google Test.  This limited
//   implementation should be enough most of the time when writing
//   death tests; though it lacks many features you can find in PCRE
//   or POSIX extended regex syntax.  For example, we don't support
//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
//   repetition count ("x{5,7}"), among others.
//
//   Below is the syntax that we do support.  We chose it to be a
//   subset of both PCRE and POSIX extended regex, so it's easy to
//   learn wherever you come from.  In the following: 'A' denotes a
//   literal character, period (.), or a single \\ escape sequence;
//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
//   natural numbers.
//
//     c     matches any literal character c
//     \\d   matches any decimal digit
//     \\D   matches any character that's not a decimal digit
//     \\f   matches \f
//     \\n   matches \n
//     \\r   matches \r
//     \\s   matches any ASCII whitespace, including \n
//     \\S   matches any character that's not a whitespace
//     \\t   matches \t
//     \\v   matches \v
//     \\w   matches any letter, _, or decimal digit
//     \\W   matches any character that \\w doesn't match
//     \\c   matches any literal character c, which must be a punctuation
//     .     matches any single character except \n
//     A?    matches 0 or 1 occurrences of A
//     A*    matches 0 or many occurrences of A
//     A+    matches 1 or many occurrences of A
//     ^     matches the beginning of a string (not that of each line)
//     $     matches the end of a string (not that of each line)
//     xy    matches x followed by y
//
//   If you accidentally use PCRE or POSIX extended regex features
//   not implemented by us, you will get a run-time failure.  In that
//   case, please try to rewrite your regular expression within the
//   above syntax.
//
//   This implementation is *not* meant to be as highly tuned or robust
//   as a compiled regex library, but should perform well enough for a
//   death test, which already incurs significant overhead by launching
//   a child process.
//
// Known caveats:
//
//   A "threadsafe" style death test obtains the path to the test
//   program from argv[0] and re-executes it in the sub-process.  For
//   simplicity, the current implementation doesn't search the PATH
//   when launching the sub-process.  This means that the user must
//   invoke the test program via a path that contains at least one
//   path separator (e.g. path/to/foo_test and
//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
//   is rarely a problem as people usually don't put the test binary
//   directory in PATH.
//
// TODO(wan@google.com): make thread-safe death tests search the PATH.

// Asserts that a given statement causes the program to exit, with an
// integer exit status that satisfies predicate, and emitting error output
// that matches regex.
# define ASSERT_EXIT(statement, predicate, regex) \
    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)

// Like ASSERT_EXIT, but continues on to successive tests in the
// test case, if any:
# define EXPECT_EXIT(statement, predicate, regex) \
    GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)

// Asserts that a given statement causes the program to exit, either by
// explicitly exiting with a nonzero exit code or being killed by a
// signal, and emitting error output that matches regex.
# define ASSERT_DEATH(statement, regex) \
    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)

// Like ASSERT_DEATH, but continues on to successive tests in the
// test case, if any:
# define EXPECT_DEATH(statement, regex) \
    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)

// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:

// Tests that an exit code describes a normal exit with a given exit code.
class GTEST_API_ ExitedWithCode {
 public:
  explicit ExitedWithCode(int exit_code);
  bool operator()(int exit_status) const;
 private:
  // No implementation - assignment is unsupported.
  void operator=(const ExitedWithCode& other);

  const int exit_code_;
};

# if !GTEST_OS_WINDOWS
// Tests that an exit code describes an exit due to termination by a
// given signal.
class GTEST_API_ KilledBySignal {
 public:
  explicit KilledBySignal(int signum);
  bool operator()(int exit_status) const;
 private:
  const int signum_;
};
# endif  // !GTEST_OS_WINDOWS

// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
// The death testing framework causes this to have interesting semantics,
// since the sideeffects of the call are only visible in opt mode, and not
// in debug mode.
//
// In practice, this can be used to test functions that utilize the
// LOG(DFATAL) macro using the following style:
//
// int DieInDebugOr12(int* sideeffect) {
//   if (sideeffect) {
//     *sideeffect = 12;
//   }
//   LOG(DFATAL) << "death";
//   return 12;
// }
//
// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
//   int sideeffect = 0;
//   // Only asserts in dbg.
//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
//
// #ifdef NDEBUG
//   // opt-mode has sideeffect visible.
//   EXPECT_EQ(12, sideeffect);
// #else
//   // dbg-mode no visible sideeffect.
//   EXPECT_EQ(0, sideeffect);
// #endif
// }
//
// This will assert that DieInDebugReturn12InOpt() crashes in debug
// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
// appropriate fallback value (12 in this case) in opt mode. If you
// need to test that a function has appropriate side-effects in opt
// mode, include assertions against the side-effects.  A general
// pattern for this is:
//
// EXPECT_DEBUG_DEATH({
//   // Side-effects here will have an effect after this statement in
//   // opt mode, but none in debug mode.
//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
// }, "death");
//
# ifdef NDEBUG

#  define EXPECT_DEBUG_DEATH(statement, regex) \
  GTEST_EXECUTE_STATEMENT_(statement, regex)

#  define ASSERT_DEBUG_DEATH(statement, regex) \
  GTEST_EXECUTE_STATEMENT_(statement, regex)

# else

#  define EXPECT_DEBUG_DEATH(statement, regex) \
  EXPECT_DEATH(statement, regex)

#  define ASSERT_DEBUG_DEATH(statement, regex) \
  ASSERT_DEATH(statement, regex)

# endif  // NDEBUG for EXPECT_DEBUG_DEATH
#endif  // GTEST_HAS_DEATH_TEST

// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
// death tests are supported; otherwise they just issue a warning.  This is
// useful when you are combining death test assertions with normal test
// assertions in one test.
#if GTEST_HAS_DEATH_TEST
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
    EXPECT_DEATH(statement, regex)
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
    ASSERT_DEATH(statement, regex)
#else
# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
#endif

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest-message.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the Message class.
//
// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
// leave some internal implementation details in this header file.
// They are clearly marked by comments like this:
//
//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
//
// Such code is NOT meant to be used by a user directly, and is subject
// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
// program!

#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_

#include <limits>

#include "gtest/internal/gtest-port.h"

// Ensures that there is at least one operator<< in the global namespace.
// See Message& operator<<(...) below for why.
void operator<<(const testing::internal::Secret&, int);

namespace testing {

// The Message class works like an ostream repeater.
//
// Typical usage:
//
//   1. You stream a bunch of values to a Message object.
//      It will remember the text in a stringstream.
//   2. Then you stream the Message object to an ostream.
//      This causes the text in the Message to be streamed
//      to the ostream.
//
// For example;
//
//   testing::Message foo;
//   foo << 1 << " != " << 2;
//   std::cout << foo;
//
// will print "1 != 2".
//
// Message is not intended to be inherited from.  In particular, its
// destructor is not virtual.
//
// Note that stringstream behaves differently in gcc and in MSVC.  You
// can stream a NULL char pointer to it in the former, but not in the
// latter (it causes an access violation if you do).  The Message
// class hides this difference by treating a NULL char pointer as
// "(null)".
class GTEST_API_ Message {
 private:
  // The type of basic IO manipulators (endl, ends, and flush) for
  // narrow streams.
  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);

 public:
  // Constructs an empty Message.
  Message();

  // Copy constructor.
  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
    *ss_ << msg.GetString();
  }

  // Constructs a Message from a C-string.
  explicit Message(const char* str) : ss_(new ::std::stringstream) {
    *ss_ << str;
  }

#if GTEST_OS_SYMBIAN
  // Streams a value (either a pointer or not) to this object.
  template <typename T>
  inline Message& operator <<(const T& value) {
    StreamHelper(typename internal::is_pointer<T>::type(), value);
    return *this;
  }
#else
  // Streams a non-pointer value to this object.
  template <typename T>
  inline Message& operator <<(const T& val) {
    // Some libraries overload << for STL containers.  These
    // overloads are defined in the global namespace instead of ::std.
    //
    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
    // overloads are visible in either the std namespace or the global
    // namespace, but not other namespaces, including the testing
    // namespace which Google Test's Message class is in.
    //
    // To allow STL containers (and other types that has a << operator
    // defined in the global namespace) to be used in Google Test
    // assertions, testing::Message must access the custom << operator
    // from the global namespace.  With this using declaration,
    // overloads of << defined in the global namespace and those
    // visible via Koenig lookup are both exposed in this function.
    using ::operator <<;
    *ss_ << val;
    return *this;
  }

  // Streams a pointer value to this object.
  //
  // This function is an overload of the previous one.  When you
  // stream a pointer to a Message, this definition will be used as it
  // is more specialized.  (The C++ Standard, section
  // [temp.func.order].)  If you stream a non-pointer, then the
  // previous definition will be used.
  //
  // The reason for this overload is that streaming a NULL pointer to
  // ostream is undefined behavior.  Depending on the compiler, you
  // may get "0", "(nil)", "(null)", or an access violation.  To
  // ensure consistent result across compilers, we always treat NULL
  // as "(null)".
  template <typename T>
  inline Message& operator <<(T* const& pointer) {  // NOLINT
    if (pointer == NULL) {
      *ss_ << "(null)";
    }
    else {
      *ss_ << pointer;
    }

    return *this;
  }
#endif  // GTEST_OS_SYMBIAN

  // Since the basic IO manipulators are overloaded for both narrow
  // and wide streams, we have to provide this specialized definition
  // of operator <<, even though its body is the same as the
  // templatized version above.  Without this definition, streaming
  // endl or other basic IO manipulators to Message will confuse the
  // compiler.
  Message& operator <<(BasicNarrowIoManip val) {
    *ss_ << val;
    return *this;
  }

  // Instead of 1/0, we want to see true/false for bool values.
  Message& operator <<(bool b) {
    return *this << (b ? "true" : "false");
  }

  // These two overloads allow streaming a wide C string to a Message
  // using the UTF-8 encoding.
  Message& operator <<(const wchar_t* wide_c_str);
  Message& operator <<(wchar_t* wide_c_str);

#if GTEST_HAS_STD_WSTRING
  // Converts the given wide string to a narrow string using the UTF-8
  // encoding, and streams the result to this Message object.
  Message& operator <<(const ::std::wstring& wstr);
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_GLOBAL_WSTRING
  // Converts the given wide string to a narrow string using the UTF-8
  // encoding, and streams the result to this Message object.
  Message& operator <<(const ::wstring& wstr);
#endif  // GTEST_HAS_GLOBAL_WSTRING

  // Gets the text streamed to this object so far as an std::string.
  // Each '\0' character in the buffer is replaced with "\\0".
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  std::string GetString() const;

 private:

#if GTEST_OS_SYMBIAN
  // These are needed as the Nokia Symbian Compiler cannot decide between
  // const T& and const T* in a function template. The Nokia compiler _can_
  // decide between class template specializations for T and T*, so a
  // tr1::type_traits-like is_pointer works, and we can overload on that.
  template <typename T>
  inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
    if (pointer == NULL) {
      *ss_ << "(null)";
    }
    else {
      *ss_ << pointer;
    }
  }
  template <typename T>
  inline void StreamHelper(internal::false_type /*is_pointer*/,
                           const T& value) {
    // See the comments in Message& operator <<(const T&) above for why
    // we need this using statement.
    using ::operator <<;
    *ss_ << value;
  }
#endif  // GTEST_OS_SYMBIAN

  // We'll hold the text streamed to this object here.
  const internal::scoped_ptr< ::std::stringstream> ss_;

  // We declare (but don't implement) this to prevent the compiler
  // from implementing the assignment operator.
  void operator=(const Message&);
};

// Streams a Message to an ostream.
inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
  return os << sb.GetString();
}

namespace internal {

// Converts a streamable value to an std::string.  A NULL pointer is
// converted to "(null)".  When the input value is a ::string,
// ::std::string, ::wstring, or ::std::wstring object, each NUL
// character in it is replaced with "\\0".
template <typename T>
std::string StreamableToString(const T& streamable) {
  return (Message() << streamable).GetString();
}

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest-param-test.h
================================================
// This file was GENERATED by command:
//     pump.py gtest-param-test.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: vladl@google.com (Vlad Losev)
//
// Macros and functions for implementing parameterized tests
// in Google C++ Testing Framework (Google Test)
//
// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_


// Value-parameterized tests allow you to test your code with different
// parameters without writing multiple copies of the same test.
//
// Here is how you use value-parameterized tests:

#if 0

// To write value-parameterized tests, first you should define a fixture
// class. It is usually derived from testing::TestWithParam<T> (see below for
// another inheritance scheme that's sometimes useful in more complicated
// class hierarchies), where the type of your parameter values.
// TestWithParam<T> is itself derived from testing::Test. T can be any
// copyable type. If it's a raw pointer, you are responsible for managing the
// lifespan of the pointed values.

class FooTest : public ::testing::TestWithParam<const char*> {
  // You can implement all the usual class fixture members here.
};

// Then, use the TEST_P macro to define as many parameterized tests
// for this fixture as you want. The _P suffix is for "parameterized"
// or "pattern", whichever you prefer to think.

TEST_P(FooTest, DoesBlah) {
  // Inside a test, access the test parameter with the GetParam() method
  // of the TestWithParam<T> class:
  EXPECT_TRUE(foo.Blah(GetParam()));
  ...
}

TEST_P(FooTest, HasBlahBlah) {
  ...
}

// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
// case with any set of parameters you want. Google Test defines a number
// of functions for generating test parameters. They return what we call
// (surprise!) parameter generators. Here is a  summary of them, which
// are all in the testing namespace:
//
//
//  Range(begin, end [, step]) - Yields values {begin, begin+step,
//                               begin+step+step, ...}. The values do not
//                               include end. step defaults to 1.
//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
//  ValuesIn(container)        - Yields values from a C-style array, an STL
//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
//  Bool()                     - Yields sequence {false, true}.
//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
//                               for the math savvy) of the values generated
//                               by the N generators.
//
// For more details, see comments at the definitions of these functions below
// in this file.
//
// The following statement will instantiate tests from the FooTest test case
// each with parameter values "meeny", "miny", and "moe".

INSTANTIATE_TEST_CASE_P(InstantiationName,
                        FooTest,
                        Values("meeny", "miny", "moe"));

// To distinguish different instances of the pattern, (yes, you
// can instantiate it more then once) the first argument to the
// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
// actual test case name. Remember to pick unique prefixes for different
// instantiations. The tests from the instantiation above will have
// these names:
//
//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
//
// You can use these names in --gtest_filter.
//
// This statement will instantiate all tests from FooTest again, each
// with parameter values "cat" and "dog":

const char* pets[] = {"cat", "dog"};
INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));

// The tests from the instantiation above will have these names:
//
//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
//
// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
// in the given test case, whether their definitions come before or
// AFTER the INSTANTIATE_TEST_CASE_P statement.
//
// Please also note that generator expressions (including parameters to the
// generators) are evaluated in InitGoogleTest(), after main() has started.
// This allows the user on one hand, to adjust generator parameters in order
// to dynamically determine a set of tests to run and on the other hand,
// give the user a chance to inspect the generated tests with Google Test
// reflection API before RUN_ALL_TESTS() is executed.
//
// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
// for more examples.
//
// In the future, we plan to publish the API for defining new parameter
// generators. But for now this interface remains part of the internal
// implementation and is subject to change.
//
//
// A parameterized test fixture must be derived from testing::Test and from
// testing::WithParamInterface<T>, where T is the type of the parameter
// values. Inheriting from TestWithParam<T> satisfies that requirement because
// TestWithParam<T> inherits from both Test and WithParamInterface. In more
// complicated hierarchies, however, it is occasionally useful to inherit
// separately from Test and WithParamInterface. For example:

class BaseTest : public ::testing::Test {
  // You can inherit all the usual members for a non-parameterized test
  // fixture here.
};

class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
  // The usual test fixture members go here too.
};

TEST_F(BaseTest, HasFoo) {
  // This is an ordinary non-parameterized test.
}

TEST_P(DerivedTest, DoesBlah) {
  // GetParam works just the same here as if you inherit from TestWithParam.
  EXPECT_TRUE(foo.Blah(GetParam()));
}

#endif  // 0

#include "gtest/internal/gtest-port.h"

#if !GTEST_OS_SYMBIAN
# include <utility>
#endif

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-param-util.h"
#include "gtest/internal/gtest-param-util-generated.h"

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Functions producing parameter generators.
//
// Google Test uses these generators to produce parameters for value-
// parameterized tests. When a parameterized test case is instantiated
// with a particular generator, Google Test creates and runs tests
// for each element in the sequence produced by the generator.
//
// In the following sample, tests from test case FooTest are instantiated
// each three times with parameter values 3, 5, and 8:
//
// class FooTest : public TestWithParam<int> { ... };
//
// TEST_P(FooTest, TestThis) {
// }
// TEST_P(FooTest, TestThat) {
// }
// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
//

// Range() returns generators providing sequences of values in a range.
//
// Synopsis:
// Range(start, end)
//   - returns a generator producing a sequence of values {start, start+1,
//     start+2, ..., }.
// Range(start, end, step)
//   - returns a generator producing a sequence of values {start, start+step,
//     start+step+step, ..., }.
// Notes:
//   * The generated sequences never include end. For example, Range(1, 5)
//     returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
//     returns a generator producing {1, 3, 5, 7}.
//   * start and end must have the same type. That type may be any integral or
//     floating-point type or a user defined type satisfying these conditions:
//     * It must be assignable (have operator=() defined).
//     * It must have operator+() (operator+(int-compatible type) for
//       two-operand version).
//     * It must have operator<() defined.
//     Elements in the resulting sequences will also have that type.
//   * Condition start < end must be satisfied in order for resulting sequences
//     to contain any elements.
//
template <typename T, typename IncrementT>
internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
  return internal::ParamGenerator<T>(
           new internal::RangeGenerator<T, IncrementT>(start, end, step));
}

template <typename T>
internal::ParamGenerator<T> Range(T start, T end) {
  return Range(start, end, 1);
}

// ValuesIn() function allows generation of tests with parameters coming from
// a container.
//
// Synopsis:
// ValuesIn(const T (&array)[N])
//   - returns a generator producing sequences with elements from
//     a C-style array.
// ValuesIn(const Container& container)
//   - returns a generator producing sequences with elements from
//     an STL-style container.
// ValuesIn(Iterator begin, Iterator end)
//   - returns a generator producing sequences with elements from
//     a range [begin, end) defined by a pair of STL-style iterators. These
//     iterators can also be plain C pointers.
//
// Please note that ValuesIn copies the values from the containers
// passed in and keeps them to generate tests in RUN_ALL_TESTS().
//
// Examples:
//
// This instantiates tests from test case StringTest
// each with C-string values of "foo", "bar", and "baz":
//
// const char* strings[] = {"foo", "bar", "baz"};
// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
//
// This instantiates tests from test case StlStringTest
// each with STL strings with values "a" and "b":
//
// ::std::vector< ::std::string> GetParameterStrings() {
//   ::std::vector< ::std::string> v;
//   v.push_back("a");
//   v.push_back("b");
//   return v;
// }
//
// INSTANTIATE_TEST_CASE_P(CharSequence,
//                         StlStringTest,
//                         ValuesIn(GetParameterStrings()));
//
//
// This will also instantiate tests from CharTest
// each with parameter values 'a' and 'b':
//
// ::std::list<char> GetParameterChars() {
//   ::std::list<char> list;
//   list.push_back('a');
//   list.push_back('b');
//   return list;
// }
// ::std::list<char> l = GetParameterChars();
// INSTANTIATE_TEST_CASE_P(CharSequence2,
//                         CharTest,
//                         ValuesIn(l.begin(), l.end()));
//
template <typename ForwardIterator>
internal::ParamGenerator <
typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type >
ValuesIn(ForwardIterator begin, ForwardIterator end) {
  typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
  ::value_type ParamType;
  return internal::ParamGenerator<ParamType>(
           new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
}

template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
  return ValuesIn(array, array + N);
}

template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
  const Container& container) {
  return ValuesIn(container.begin(), container.end());
}

// Values() allows generating tests from explicitly specified list of
// parameters.
//
// Synopsis:
// Values(T v1, T v2, ..., T vN)
//   - returns a generator producing sequences with elements v1, v2, ..., vN.
//
// For example, this instantiates tests from test case BarTest each
// with values "one", "two", and "three":
//
// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
//
// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
// The exact type of values will depend on the type of parameter in BazTest.
//
// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
//
// Currently, Values() supports from 1 to 50 parameters.
//
template <typename T1>
internal::ValueArray1<T1> Values(T1 v1) {
  return internal::ValueArray1<T1>(v1);
}

template <typename T1, typename T2>
internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
  return internal::ValueArray2<T1, T2>(v1, v2);
}

template <typename T1, typename T2, typename T3>
internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
  return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
}

template <typename T1, typename T2, typename T3, typename T4>
internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
  return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5>
internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
    T5 v5) {
  return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6) {
  return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
    T4 v4, T5 v5, T6 v6, T7 v7) {
  return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
         v6, v7);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
  return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
         v5, v6, v7, v8);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
  return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
         v4, v5, v6, v7, v8, v9);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
  return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
         v2, v3, v4, v5, v6, v7, v8, v9, v10);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11>
internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
         T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
T10 v10, T11 v11) {
  return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
         T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12>
internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
T10 v10, T11 v11, T12 v12) {
  return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13>
internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
         T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
T10 v10, T11 v11, T12 v12, T13 v13) {
  return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14>
internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
  return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
                        v14);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15>
internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
  return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
                             v13, v14, v15);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16>
internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
                               T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
T16 v16) {
  return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
                                  v12, v13, v14, v15, v16);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17>
internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
                                    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
T16 v16, T17 v17) {
  return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
                                       v11, v12, v13, v14, v15, v16, v17);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18>
internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
             T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
T16 v16, T17 v17, T18 v18) {
  return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
             v10, v11, v12, v13, v14, v15, v16, v17, v18);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19>
internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
             T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
  return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
             v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20>
internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
             T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
  return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
             v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21>
internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
             T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
  return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
             v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22>
internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
             T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
             T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
T21 v21, T22 v22) {
  return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
             v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
             v20, v21, v22);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23>
internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
             T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
             T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
T21 v21, T22 v22, T23 v23) {
  return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
             v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
             v20, v21, v22, v23);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24>
internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
             T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
             T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
T21 v21, T22 v22, T23 v23, T24 v24) {
  return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
             v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
             v19, v20, v21, v22, v23, v24);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25>
internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
             T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
             T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
  return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
             v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
             v18, v19, v20, v21, v22, v23, v24, v25);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26>
internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
T26 v26) {
  return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
              v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27>
internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
         T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
T26 v26, T27 v27) {
  return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
                   v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28>
internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
         T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
T26 v26, T27 v27, T28 v28) {
  return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
                        v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
                        v28);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29>
internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
T26 v26, T27 v27, T28 v28, T29 v29) {
  return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
                             v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
                             v27, v28, v29);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30>
internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
                          T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
                          T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
  return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
                                  v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
                                  v26, v27, v28, v29, v30);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31>
internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
                               T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
                               T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
  return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
                                       v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
                                       v25, v26, v27, v28, v29, v30, v31);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32>
internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
                                    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
                                    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
                                    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
T32 v32) {
  return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
             v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
             v24, v25, v26, v27, v28, v29, v30, v31, v32);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33>
internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
             T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
             T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
             T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
T32 v32, T33 v33) {
  return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
             v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
             v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34>
internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
             T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
             T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
             T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
T31 v31, T32 v32, T33 v33, T34 v34) {
  return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
             v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
             v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35>
internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
             T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
             T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
             T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
  return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
             v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
             v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36>
internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
             T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
             T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
             T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
  return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
             v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
             v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
             v34, v35, v36);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37>
internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
             T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
             T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
             T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
             T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
T37 v37) {
  return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
             v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
             v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
             v34, v35, v36, v37);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38>
internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
             T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
             T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
             T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
             T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
T37 v37, T38 v38) {
  return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
             v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
             v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
             v33, v34, v35, v36, v37, v38);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39>
internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
             T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
             T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
             T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
             T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
T37 v37, T38 v38, T39 v39) {
  return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
             v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
             v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
             v32, v33, v34, v35, v36, v37, v38, v39);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40>
internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
             T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
             T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
             T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
             T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
  return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
              v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
              v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41>
internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
         T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
                     T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
  return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
                   v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
                   v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42>
internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
         T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
                     T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
                     T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
T42 v42) {
  return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
                        v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
                        v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
                        v42);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43>
internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
         T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
                     T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
                     T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
T42 v42, T43 v43) {
  return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
                             v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
                             v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
                             v41, v42, v43);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44>
internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
                     T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
                     T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
                     T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
                     T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
T42 v42, T43 v43, T44 v44) {
  return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
                                  v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
                                  v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
                                  v40, v41, v42, v43, v44);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45>
internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
                          T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
                          T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
                          T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
                          T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
  return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
                                       v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
                                       v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
                                       v39, v40, v41, v42, v43, v44, v45);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46>
internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
                               T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
                               T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
                               T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
                               T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
  return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
             v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
             v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
             v38, v39, v40, v41, v42, v43, v44, v45, v46);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47>
internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
                                    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
                                    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
                                    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
                                    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
  return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
             v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
             v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
             v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48>
internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
             T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
             T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
             T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
             T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
             T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
T48 v48) {
  return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
             v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
             v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
             v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49>
internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
             T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
             T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
             T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
             T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
             T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
T47 v47, T48 v48, T49 v49) {
  return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
             v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
             v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
             v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49, typename T50>
internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
             T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
             T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
             T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
             T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
             T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
  return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
         T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
         T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
         T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
             v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
             v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
             v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
             v48, v49, v50);
}

// Bool() allows generating tests with parameters in a set of (false, true).
//
// Synopsis:
// Bool()
//   - returns a generator producing sequences with elements {false, true}.
//
// It is useful when testing code that depends on Boolean flags. Combinations
// of multiple flags can be tested when several Bool()'s are combined using
// Combine() function.
//
// In the following example all tests in the test case FlagDependentTest
// will be instantiated twice with parameters false and true.
//
// class FlagDependentTest : public testing::TestWithParam<bool> {
//   virtual void SetUp() {
//     external_flag = GetParam();
//   }
// }
// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
//
inline internal::ParamGenerator<bool> Bool() {
  return Values(false, true);
}

# if GTEST_HAS_COMBINE
// Combine() allows the user to combine two or more sequences to produce
// values of a Cartesian product of those sequences' elements.
//
// Synopsis:
// Combine(gen1, gen2, ..., genN)
//   - returns a generator producing sequences with elements coming from
//     the Cartesian product of elements from the sequences generated by
//     gen1, gen2, ..., genN. The sequence elements will have a type of
//     tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
//     of elements from sequences produces by gen1, gen2, ..., genN.
//
// Combine can have up to 10 arguments. This number is currently limited
// by the maximum number of elements in the tuple implementation used by Google
// Test.
//
// Example:
//
// This will instantiate tests in test case AnimalTest each one with
// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
// tuple("dog", BLACK), and tuple("dog", WHITE):
//
// enum Color { BLACK, GRAY, WHITE };
// class AnimalTest
//     : public testing::TestWithParam<tuple<const char*, Color> > {...};
//
// TEST_P(AnimalTest, AnimalLooksNice) {...}
//
// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
//                         Combine(Values("cat", "dog"),
//                                 Values(BLACK, WHITE)));
//
// This will instantiate tests in FlagDependentTest with all variations of two
// Boolean flags:
//
// class FlagDependentTest
//     : public testing::TestWithParam<tuple<bool, bool> > {
//   virtual void SetUp() {
//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
//     tie(external_flag_1, external_flag_2) = GetParam();
//   }
// };
//
// TEST_P(FlagDependentTest, TestFeature1) {
//   // Test your code using external_flag_1 and external_flag_2 here.
// }
// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
//                         Combine(Bool(), Bool()));
//
template <typename Generator1, typename Generator2>
internal::CartesianProductHolder2<Generator1, Generator2> Combine(
  const Generator1& g1, const Generator2& g2) {
  return internal::CartesianProductHolder2<Generator1, Generator2>(
           g1, g2);
}

template <typename Generator1, typename Generator2, typename Generator3>
internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
  const Generator1& g1, const Generator2& g2, const Generator3& g3) {
  return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
           g1, g2, g3);
}

template <typename Generator1, typename Generator2, typename Generator3,
          typename Generator4>
internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
         Generator4> Combine(
           const Generator1& g1, const Generator2& g2, const Generator3& g3,
const Generator4& g4) {
  return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
         Generator4>(
           g1, g2, g3, g4);
}

template <typename Generator1, typename Generator2, typename Generator3,
          typename Generator4, typename Generator5>
internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
         Generator4, Generator5> Combine(
           const Generator1& g1, const Generator2& g2, const Generator3& g3,
const Generator4& g4, const Generator5& g5) {
  return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
         Generator4, Generator5>(
           g1, g2, g3, g4, g5);
}

template <typename Generator1, typename Generator2, typename Generator3,
          typename Generator4, typename Generator5, typename Generator6>
internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6> Combine(
           const Generator1& g1, const Generator2& g2, const Generator3& g3,
const Generator4& g4, const Generator5& g5, const Generator6& g6) {
  return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6>(
           g1, g2, g3, g4, g5, g6);
}

template <typename Generator1, typename Generator2, typename Generator3,
          typename Generator4, typename Generator5, typename Generator6,
          typename Generator7>
internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7> Combine(
           const Generator1& g1, const Generator2& g2, const Generator3& g3,
           const Generator4& g4, const Generator5& g5, const Generator6& g6,
const Generator7& g7) {
  return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7>(
           g1, g2, g3, g4, g5, g6, g7);
}

template <typename Generator1, typename Generator2, typename Generator3,
          typename Generator4, typename Generator5, typename Generator6,
          typename Generator7, typename Generator8>
internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
           const Generator1& g1, const Generator2& g2, const Generator3& g3,
           const Generator4& g4, const Generator5& g5, const Generator6& g6,
const Generator7& g7, const Generator8& g8) {
  return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7, Generator8>(
           g1, g2, g3, g4, g5, g6, g7, g8);
}

template <typename Generator1, typename Generator2, typename Generator3,
          typename Generator4, typename Generator5, typename Generator6,
          typename Generator7, typename Generator8, typename Generator9>
internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7, Generator8,
         Generator9> Combine(
           const Generator1& g1, const Generator2& g2, const Generator3& g3,
           const Generator4& g4, const Generator5& g5, const Generator6& g6,
const Generator7& g7, const Generator8& g8, const Generator9& g9) {
  return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
           g1, g2, g3, g4, g5, g6, g7, g8, g9);
}

template <typename Generator1, typename Generator2, typename Generator3,
          typename Generator4, typename Generator5, typename Generator6,
          typename Generator7, typename Generator8, typename Generator9,
          typename Generator10>
internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
         Generator10> Combine(
           const Generator1& g1, const Generator2& g2, const Generator3& g3,
           const Generator4& g4, const Generator5& g5, const Generator6& g6,
           const Generator7& g7, const Generator8& g8, const Generator9& g9,
const Generator10& g10) {
  return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
         Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
         Generator10>(
           g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
}
# endif  // GTEST_HAS_COMBINE


# define TEST_P(test_case_name, test_name) \
  class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
      : public test_case_name { \
   public: \
    GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
    virtual void TestBody(); \
   private: \
    static int AddToRegistry() { \
      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
          GetTestCasePatternHolder<test_case_name>(\
              #test_case_name, __FILE__, __LINE__)->AddTestPattern(\
                  #test_case_name, \
                  #test_name, \
                  new ::testing::internal::TestMetaFactory< \
                      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
      return 0; \
    } \
    static int gtest_registering_dummy_; \
    GTEST_DISALLOW_COPY_AND_ASSIGN_(\
        GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
  }; \
  int GTEST_TEST_CLASS_NAME_(test_case_name, \
                             test_name)::gtest_registering_dummy_ = \
      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
  void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()

# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
  ::testing::internal::ParamGenerator<test_case_name::ParamType> \
      gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
  int gtest_##prefix##test_case_name##_dummy_ = \
      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
          GetTestCasePatternHolder<test_case_name>(\
              #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
                  #prefix, \
                  &gtest_##prefix##test_case_name##_EvalGenerator_, \
                  __FILE__, __LINE__)

}  // namespace testing

#endif  // GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest-printers.h
================================================
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// A user can teach this function how to print a class type T by
// defining either operator<<() or PrintTo() in the namespace that
// defines T.  More specifically, the FIRST defined function in the
// following list will be used (assuming T is defined in namespace
// foo):
//
//   1. foo::PrintTo(const T&, ostream*)
//   2. operator<<(ostream&, const T&) defined in either foo or the
//      global namespace.
//
// If none of the above is defined, it will print the debug string of
// the value if it is a protocol buffer, or print the raw bytes in the
// value otherwise.
//
// To aid debugging: when T is a reference type, the address of the
// value is also printed; when T is a (const) char pointer, both the
// pointer value and the NUL-terminated string it points to are
// printed.
//
// We also provide some convenient wrappers:
//
//   // Prints a value to a string.  For a (const or not) char
//   // pointer, the NUL-terminated string (but not the pointer) is
//   // printed.
//   std::string ::testing::PrintToString(const T& value);
//
//   // Prints a value tersely: for a reference type, the referenced
//   // value (but not the address) is printed; for a (const or not) char
//   // pointer, the NUL-terminated string (but not the pointer) is
//   // printed.
//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
//
//   // Prints value using the type inferred by the compiler.  The difference
//   // from UniversalTersePrint() is that this function prints both the
//   // pointer and the NUL-terminated string for a (const or not) char pointer.
//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
//
//   // Prints the fields of a tuple tersely to a string vector, one
//   // element for each field. Tuple support must be enabled in
//   // gtest-port.h.
//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
//       const Tuple& value);
//
// Known limitation:
//
// The print primitives print the elements of an STL-style container
// using the compiler-inferred type of *iter where iter is a
// const_iterator of the container.  When const_iterator is an input
// iterator but not a forward iterator, this inferred type may not
// match value_type, and the print output may be incorrect.  In
// practice, this is rarely a problem as for most containers
// const_iterator is a forward iterator.  We'll fix this if there's an
// actual need for it.  Note that this fix cannot rely on value_type
// being defined as many user-defined container types don't have
// value_type.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_

#include <ostream>  // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "gtest/internal/gtest-port.h"
#include "gtest/internal/gtest-internal.h"

namespace testing {

// Definitions in the 'internal' and 'internal2' name spaces are
// subject to change without notice.  DO NOT USE THEM IN USER CODE!
namespace internal2 {

// Prints the given number of bytes in the given object to the given
// ostream.
GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
                                     size_t count,
                                     ::std::ostream* os);

// For selecting which printer to use when a given type has neither <<
// nor PrintTo().
enum TypeKind {
  kProtobuf,              // a protobuf type
  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
  // (e.g. a named or unnamed enum type)
  kOtherType              // anything else
};

// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
// by the universal printer to print a value of type T when neither
// operator<< nor PrintTo() is defined for T, where kTypeKind is the
// "kind" of T as defined by enum TypeKind.
template <typename T, TypeKind kTypeKind>
class TypeWithoutFormatter {
 public:
  // This default version is called when kTypeKind is kOtherType.
  static void PrintValue(const T& value, ::std::ostream* os) {
    PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
                         sizeof(value), os);
  }
};

// We print a protobuf using its ShortDebugString() when the string
// doesn't exceed this many characters; otherwise we print it using
// DebugString() for better readability.
const size_t kProtobufOneLinerMaxLength = 50;

template <typename T>
class TypeWithoutFormatter<T, kProtobuf> {
 public:
  static void PrintValue(const T& value, ::std::ostream* os) {
    const ::testing::internal::string short_str = value.ShortDebugString();
    const ::testing::internal::string pretty_str =
      short_str.length() <= kProtobufOneLinerMaxLength ?
      short_str : ("\n" + value.DebugString());
    *os << ("<" + pretty_str + ">");
  }
};

template <typename T>
class TypeWithoutFormatter<T, kConvertibleToInteger> {
 public:
  // Since T has no << operator or PrintTo() but can be implicitly
  // converted to BiggestInt, we print it as a BiggestInt.
  //
  // Most likely T is an enum type (either named or unnamed), in which
  // case printing it as an integer is the desired behavior.  In case
  // T is not an enum, printing it as an integer is the best we can do
  // given that it has no user-defined printer.
  static void PrintValue(const T& value, ::std::ostream* os) {
    const internal::BiggestInt kBigInt = value;
    *os << kBigInt;
  }
};

// Prints the given value to the given ostream.  If the value is a
// protocol message, its debug string is printed; if it's an enum or
// of a type implicitly convertible to BiggestInt, it's printed as an
// integer; otherwise the bytes in the value are printed.  This is
// what UniversalPrinter<T>::Print() does when it knows nothing about
// type T and T has neither << operator nor PrintTo().
//
// A user can override this behavior for a class type Foo by defining
// a << operator in the namespace where Foo is defined.
//
// We put this operator in namespace 'internal2' instead of 'internal'
// to simplify the implementation, as much code in 'internal' needs to
// use << in STL, which would conflict with our own << were it defined
// in 'internal'.
//
// Note that this operator<< takes a generic std::basic_ostream<Char,
// CharTraits> type instead of the more restricted std::ostream.  If
// we define it to take an std::ostream instead, we'll get an
// "ambiguous overloads" compiler error when trying to print a type
// Foo that supports streaming to std::basic_ostream<Char,
// CharTraits>, as the compiler cannot tell whether
// operator<<(std::ostream&, const T&) or
// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
// specific.
template <typename Char, typename CharTraits, typename T>
::std::basic_ostream<Char, CharTraits>& operator<<(
  ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
  TypeWithoutFormatter < T,
                       (internal::IsAProtocolMessage<T>::value ? kProtobuf :
                        internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
                        kConvertibleToInteger : kOtherType) >::PrintValue(x, &os);
  return os;
}

}  // namespace internal2
}  // namespace testing

// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
// magic needed for implementing UniversalPrinter won't work.
namespace testing_internal {

// Used to print a value that is not an STL-style container when the
// user doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
  // With the following statement, during unqualified name lookup,
  // testing::internal2::operator<< appears as if it was declared in
  // the nearest enclosing namespace that contains both
  // ::testing_internal and ::testing::internal2, i.e. the global
  // namespace.  For more details, refer to the C++ Standard section
  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
  // testing::internal2::operator<< in case T doesn't come with a <<
  // operator.
  //
  // We cannot write 'using ::testing::internal2::operator<<;', which
  // gcc 3.3 fails to compile due to a compiler bug.
  using namespace ::testing::internal2;  // NOLINT

  // Assuming T is defined in namespace foo, in the next statement,
  // the compiler will consider all of:
  //
  //   1. foo::operator<< (thanks to Koenig look-up),
  //   2. ::operator<< (as the current namespace is enclosed in ::),
  //   3. testing::internal2::operator<< (thanks to the using statement above).
  //
  // The operator<< whose type matches T best will be picked.
  //
  // We deliberately allow #2 to be a candidate, as sometimes it's
  // impossible to define #1 (e.g. when foo is ::std, defining
  // anything in it is undefined behavior unless you are a compiler
  // vendor.).
  *os << value;
}

}  // namespace testing_internal

namespace testing {
namespace internal {

// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
// value to the given ostream.  The caller must ensure that
// 'ostream_ptr' is not NULL, or the behavior is undefined.
//
// We define UniversalPrinter as a class template (as opposed to a
// function template), as we need to partially specialize it for
// reference types, which cannot be done with function templates.
template <typename T>
class UniversalPrinter;

template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os);

// Used to print an STL-style container when the user doesn't define
// a PrintTo() for it.
template <typename C>
void DefaultPrintTo(IsContainer /* dummy */,
                    false_type /* is not a pointer */,
                    const C& container, ::std::ostream* os) {
  const size_t kMaxCount = 32;  // The maximum number of elements to print.
  *os << '{';
  size_t count = 0;

  for (typename C::const_iterator it = container.begin();
       it != container.end(); ++it, ++count) {
    if (count > 0) {
      *os << ',';

      if (count == kMaxCount) {  // Enough has been printed.
        *os << " ...";
        break;
      }
    }

    *os << ' ';
    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
    // handle *it being a native array.
    internal::UniversalPrint(*it, os);
  }

  if (count > 0) {
    *os << ' ';
  }

  *os << '}';
}

// Used to print a pointer that is neither a char pointer nor a member
// pointer, when the user doesn't define PrintTo() for it.  (A member
// variable pointer or member function pointer doesn't really point to
// a location in the address space.  Their representation is
// implementation-defined.  Therefore they will be printed as raw
// bytes.)
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
                    true_type /* is a pointer */,
                    T* p, ::std::ostream* os) {
  if (p == NULL) {
    *os << "NULL";
  }
  else {
    // C++ doesn't allow casting from a function pointer to any object
    // pointer.
    //
    // IsTrue() silences warnings: "Condition is always true",
    // "unreachable code".
    if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
      // T is not a function type.  We just call << to print p,
      // relying on ADL to pick up user-defined << for their pointer
      // types, if any.
      *os << p;
    }
    else {
      // T is a function type, so '*os << p' doesn't do what we want
      // (it just prints p as bool).  We want to print p as a const
      // void*.  However, we cannot cast it to const void* directly,
      // even using reinterpret_cast, as earlier versions of gcc
      // (e.g. 3.4.5) cannot compile the cast when p is a function
      // pointer.  Casting to UInt64 first solves the problem.
      *os << reinterpret_cast<const void*>(
            reinterpret_cast<internal::UInt64>(p));
    }
  }
}

// Used to print a non-container, non-pointer value when the user
// doesn't define PrintTo() for it.
template <typename T>
void DefaultPrintTo(IsNotContainer /* dummy */,
                    false_type /* is not a pointer */,
                    const T& value, ::std::ostream* os) {
  ::testing_internal::DefaultPrintNonContainerTo(value, os);
}

// Prints the given value using the << operator if it has one;
// otherwise prints the bytes in it.  This is what
// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
// or overloaded for type T.
//
// A user can override this behavior for a class type Foo by defining
// an overload of PrintTo() in the namespace where Foo is defined.  We
// give the user this option as sometimes defining a << operator for
// Foo is not desirable (e.g. the coding style may prevent doing it,
// or there is already a << operator but it doesn't do what the user
// wants).
template <typename T>
void PrintTo(const T& value, ::std::ostream* os) {
  // DefaultPrintTo() is overloaded.  The type of its first two
  // arguments determine which version will be picked.  If T is an
  // STL-style container, the version for container will be called; if
  // T is a pointer, the pointer version will be called; otherwise the
  // generic version will be called.
  //
  // Note that we check for container types here, prior to we check
  // for protocol message types in our operator<<.  The rationale is:
  //
  // For protocol messages, we want to give people a chance to
  // override Google Mock's format by defining a PrintTo() or
  // operator<<.  For STL containers, other formats can be
  // incompatible with Google Mock's format for the container
  // elements; therefore we check for container types here to ensure
  // that our format is used.
  //
  // The second argument of DefaultPrintTo() is needed to bypass a bug
  // in Symbian's C++ compiler that prevents it from picking the right
  // overload between:
  //
  //   PrintTo(const T& x, ...);
  //   PrintTo(T* x, ...);
  DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
}

// The following list of PrintTo() overloads tells
// UniversalPrinter<T>::Print() how to print standard types (built-in
// types, strings, plain arrays, and pointers).

// Overloads for various char types.
GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
inline void PrintTo(char c, ::std::ostream* os) {
  // When printing a plain char, we always treat it as unsigned.  This
  // way, the output won't be affected by whether the compiler thinks
  // char is signed or not.
  PrintTo(static_cast<unsigned char>(c), os);
}

// Overloads for other simple built-in types.
inline void PrintTo(bool x, ::std::ostream* os) {
  *os << (x ? "true" : "false");
}

// Overload for wchar_t type.
// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its decimal code (except for L'\0').
// The L'\0' char is printed as "L'\\0'". The decimal code is printed
// as signed integer when wchar_t is implemented by the compiler
// as a signed type and is printed as an unsigned integer when wchar_t
// is implemented as an unsigned type.
GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);

// Overloads for C strings.
GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
inline void PrintTo(char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const char*>(s), os);
}

// signed/unsigned char is often used for representing binary data, so
// we print pointers to it as void* to be safe.
inline void PrintTo(const signed char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(signed char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}
inline void PrintTo(unsigned char* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const void*>(s), os);
}

// MSVC can be configured to define wchar_t as a typedef of unsigned
// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
// type.  When wchar_t is a typedef, defining an overload for const
// wchar_t* would cause unsigned short* be printed as a wide string,
// possibly causing invalid memory accesses.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Overloads for wide C strings
GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
inline void PrintTo(wchar_t* s, ::std::ostream* os) {
  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
}
#endif

// Overload for C arrays.  Multi-dimensional arrays are printed
// properly.

// Prints the given number of elements in an array, without printing
// the curly braces.
template <typename T>
void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
  UniversalPrint(a[0], os);

  for (size_t i = 1; i != count; i++) {
    *os << ", ";
    UniversalPrint(a[i], os);
  }
}

// Overloads for ::string and ::std::string.
#if GTEST_HAS_GLOBAL_STRING
GTEST_API_ void PrintStringTo(const ::string& s, ::std::ostream* os);
inline void PrintTo(const ::string& s, ::std::ostream* os) {
  PrintStringTo(s, os);
}
#endif  // GTEST_HAS_GLOBAL_STRING

GTEST_API_ void PrintStringTo(const ::std::string& s, ::std::ostream* os);
inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
  PrintStringTo(s, os);
}

// Overloads for ::wstring and ::std::wstring.
#if GTEST_HAS_GLOBAL_WSTRING
GTEST_API_ void PrintWideStringTo(const ::wstring& s, ::std::ostream* os);
inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
  PrintWideStringTo(s, os);
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

#if GTEST_HAS_STD_WSTRING
GTEST_API_ void PrintWideStringTo(const ::std::wstring& s, ::std::ostream* os);
inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
  PrintWideStringTo(s, os);
}
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_TR1_TUPLE
// Overload for ::std::tr1::tuple.  Needed for printing function arguments,
// which are packed as tuples.

// Helper function for printing a tuple.  T must be instantiated with
// a tuple type.
template <typename T>
void PrintTupleTo(const T& t, ::std::ostream* os);

// Overloaded PrintTo() for tuples of various arities.  We support
// tuples of up-to 10 fields.  The following implementation works
// regardless of whether tr1::tuple is implemented using the
// non-standard variadic template feature or not.

inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1>
void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2>
void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
             ::std::ostream* os) {
  PrintTupleTo(t, os);
}

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
void PrintTo(
  const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
  ::std::ostream* os) {
  PrintTupleTo(t, os);
}
#endif  // GTEST_HAS_TR1_TUPLE

// Overload for std::pair.
template <typename T1, typename T2>
void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
  *os << '(';
  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
  // a reference type.  The same for printing value.second.
  UniversalPrinter<T1>::Print(value.first, os);
  *os << ", ";
  UniversalPrinter<T2>::Print(value.second, os);
  *os << ')';
}

// Implements printing a non-reference type T by letting the compiler
// pick the right overload of PrintTo() for T.
template <typename T>
class UniversalPrinter {
 public:
  // MSVC warns about adding const to a function type, so we want to
  // disable the warning.
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4180)  // Temporarily disables warning 4180.
#endif  // _MSC_VER

  // Note: we deliberately don't call this PrintTo(), as that name
  // conflicts with ::testing::internal::PrintTo in the body of the
  // function.
  static void Print(const T& value, ::std::ostream* os) {
    // By default, ::testing::internal::PrintTo() is used for printing
    // the value.
    //
    // Thanks to Koenig look-up, if T is a class and has its own
    // PrintTo() function defined in its namespace, that function will
    // be visible here.  Since it is more specific than the generic ones
    // in ::testing::internal, it will be picked by the compiler in the
    // following statement - exactly what we want.
    PrintTo(value, os);
  }

#ifdef _MSC_VER
# pragma warning(pop)           // Restores the warning state.
#endif  // _MSC_VER
};

// UniversalPrintArray(begin, len, os) prints an array of 'len'
// elements, starting at address 'begin'.
template <typename T>
void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
  if (len == 0) {
    *os << "{}";
  }
  else {
    *os << "{ ";
    const size_t kThreshold = 18;
    const size_t kChunkSize = 8;

    // If the array has more than kThreshold elements, we'll have to
    // omit some details by printing only the first and the last
    // kChunkSize elements.
    // TODO(wan@google.com): let the user control the threshold using a flag.
    if (len <= kThreshold) {
      PrintRawArrayTo(begin, len, os);
    }
    else {
      PrintRawArrayTo(begin, kChunkSize, os);
      *os << ", ..., ";
      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
    }

    *os << " }";
  }
}
// This overload prints a (const) char array compactly.
GTEST_API_ void UniversalPrintArray(
  const char* begin, size_t len, ::std::ostream* os);

// This overload prints a (const) wchar_t array compactly.
GTEST_API_ void UniversalPrintArray(
  const wchar_t* begin, size_t len, ::std::ostream* os);

// Implements printing an array type T[N].
template <typename T, size_t N>
class UniversalPrinter<T[N]> {
 public:
  // Prints the given array, omitting some elements when there are too
  // many.
  static void Print(const T (&a)[N], ::std::ostream* os) {
    UniversalPrintArray(a, N, os);
  }
};

// Implements printing a reference type T&.
template <typename T>
class UniversalPrinter<T&> {
 public:
  // MSVC warns about adding const to a function type, so we want to
  // disable the warning.
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4180)  // Temporarily disables warning 4180.
#endif  // _MSC_VER

  static void Print(const T& value, ::std::ostream* os) {
    // Prints the address of the value.  We use reinterpret_cast here
    // as static_cast doesn't compile when T is a function type.
    *os << "@" << reinterpret_cast<const void*>(&value) << " ";

    // Then prints the value itself.
    UniversalPrint(value, os);
  }

#ifdef _MSC_VER
# pragma warning(pop)           // Restores the warning state.
#endif  // _MSC_VER
};

// Prints a value tersely: for a reference type, the referenced value
// (but not the address) is printed; for a (const) char pointer, the
// NUL-terminated string (but not the pointer) is printed.

template <typename T>
class UniversalTersePrinter {
 public:
  static void Print(const T& value, ::std::ostream* os) {
    UniversalPrint(value, os);
  }
};
template <typename T>
class UniversalTersePrinter<T&> {
 public:
  static void Print(const T& value, ::std::ostream* os) {
    UniversalPrint(value, os);
  }
};
template <typename T, size_t N>
class UniversalTersePrinter<T[N]> {
 public:
  static void Print(const T (&value)[N], ::std::ostream* os) {
    UniversalPrinter<T[N]>::Print(value, os);
  }
};
template <>
class UniversalTersePrinter<const char*> {
 public:
  static void Print(const char* str, ::std::ostream* os) {
    if (str == NULL) {
      *os << "NULL";
    }
    else {
      UniversalPrint(string(str), os);
    }
  }
};
template <>
class UniversalTersePrinter<char*> {
 public:
  static void Print(char* str, ::std::ostream* os) {
    UniversalTersePrinter<const char*>::Print(str, os);
  }
};

#if GTEST_HAS_STD_WSTRING
template <>
class UniversalTersePrinter<const wchar_t*> {
 public:
  static void Print(const wchar_t* str, ::std::ostream* os) {
    if (str == NULL) {
      *os << "NULL";
    }
    else {
      UniversalPrint(::std::wstring(str), os);
    }
  }
};
#endif

template <>
class UniversalTersePrinter<wchar_t*> {
 public:
  static void Print(wchar_t* str, ::std::ostream* os) {
    UniversalTersePrinter<const wchar_t*>::Print(str, os);
  }
};

template <typename T>
void UniversalTersePrint(const T& value, ::std::ostream* os) {
  UniversalTersePrinter<T>::Print(value, os);
}

// Prints a value using the type inferred by the compiler.  The
// difference between this and UniversalTersePrint() is that for a
// (const) char pointer, this prints both the pointer and the
// NUL-terminated string.
template <typename T>
void UniversalPrint(const T& value, ::std::ostream* os) {
  // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
  // UniversalPrinter with T directly.
  typedef T T1;
  UniversalPrinter<T1>::Print(value, os);
}

#if GTEST_HAS_TR1_TUPLE
typedef ::std::vector<string> Strings;

// This helper template allows PrintTo() for tuples and
// UniversalTersePrintTupleFieldsToStrings() to be defined by
// induction on the number of tuple fields.  The idea is that
// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
// fields in tuple t, and can be defined in terms of
// TuplePrefixPrinter<N - 1>.

// The inductive case.
template <size_t N>
struct TuplePrefixPrinter {
  // Prints the first N fields of a tuple.
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
    TuplePrefixPrinter < N - 1 >::PrintPrefixTo(t, os);
    *os << ", ";
    UniversalPrinter < typename ::std::tr1::tuple_element < N - 1, Tuple >::type >
    ::Print(::std::tr1::get < N - 1 > (t), os);
  }

  // Tersely prints the first N fields of a tuple to a string vector,
  // one element for each field.
  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
    TuplePrefixPrinter < N - 1 >::TersePrintPrefixToStrings(t, strings);
    ::std::stringstream ss;
    UniversalTersePrint(::std::tr1::get < N - 1 > (t), &ss);
    strings->push_back(ss.str());
  }
};

// Base cases.
template <>
struct TuplePrefixPrinter<0> {
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}

  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
};
// We have to specialize the entire TuplePrefixPrinter<> class
// template here, even though the definition of
// TersePrintPrefixToStrings() is the same as the generic version, as
// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't
// support specializing a method template of a class template.
template <>
struct TuplePrefixPrinter<1> {
  template <typename Tuple>
  static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
    UniversalPrinter<typename ::std::tr1::tuple_element<0, Tuple>::type>::
    Print(::std::tr1::get<0>(t), os);
  }

  template <typename Tuple>
  static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
    ::std::stringstream ss;
    UniversalTersePrint(::std::tr1::get<0>(t), &ss);
    strings->push_back(ss.str());
  }
};

// Helper function for printing a tuple.  T must be instantiated with
// a tuple type.
template <typename T>
void PrintTupleTo(const T& t, ::std::ostream* os) {
  *os << "(";
  TuplePrefixPrinter< ::std::tr1::tuple_size<T>::value>::
  PrintPrefixTo(t, os);
  *os << ")";
}

// Prints the fields of a tuple tersely to a string vector, one
// element for each field.  See the comment before
// UniversalTersePrint() for how we define "tersely".
template <typename Tuple>
Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
  Strings result;
  TuplePrefixPrinter< ::std::tr1::tuple_size<Tuple>::value>::
  TersePrintPrefixToStrings(value, &result);
  return result;
}
#endif  // GTEST_HAS_TR1_TUPLE

}  // namespace internal

template <typename T>
::std::string PrintToString(const T& value) {
  ::std::stringstream ss;
  internal::UniversalTersePrinter<T>::Print(value, &ss);
  return ss.str();
}

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest-spi.h
================================================
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Utilities for testing Google Test itself and code that uses Google Test
// (e.g. frameworks built on top of Google Test).

#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_

#include "gtest/gtest.h"

namespace testing {

// This helper class can be used to mock out Google Test failure reporting
// so that we can test Google Test or code that builds on Google Test.
//
// An object of this class appends a TestPartResult object to the
// TestPartResultArray object given in the constructor whenever a Google Test
// failure is reported. It can either intercept only failures that are
// generated in the same thread that created this object or it can intercept
// all generated failures. The scope of this mock object can be controlled with
// the second argument to the two arguments constructor.
class GTEST_API_ ScopedFakeTestPartResultReporter
  : public TestPartResultReporterInterface {
 public:
  // The two possible mocking modes of this object.
  enum InterceptMode {
    INTERCEPT_ONLY_CURRENT_THREAD,  // Intercepts only thread local failures.
    INTERCEPT_ALL_THREADS           // Intercepts all failures.
  };

  // The c'tor sets this object as the test part result reporter used
  // by Google Test.  The 'result' parameter specifies where to report the
  // results. This reporter will only catch failures generated in the current
  // thread. DEPRECATED
  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);

  // Same as above, but you can choose the interception scope of this object.
  ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
                                   TestPartResultArray* result);

  // The d'tor restores the previous test part result reporter.
  virtual ~ScopedFakeTestPartResultReporter();

  // Appends the TestPartResult object to the TestPartResultArray
  // received in the constructor.
  //
  // This method is from the TestPartResultReporterInterface
  // interface.
  virtual void ReportTestPartResult(const TestPartResult& result);
 private:
  void Init();

  const InterceptMode intercept_mode_;
  TestPartResultReporterInterface* old_reporter_;
  TestPartResultArray* const result_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
};

namespace internal {

// A helper class for implementing EXPECT_FATAL_FAILURE() and
// EXPECT_NONFATAL_FAILURE().  Its destructor verifies that the given
// TestPartResultArray contains exactly one failure that has the given
// type and contains the given substring.  If that's not the case, a
// non-fatal failure will be generated.
class GTEST_API_ SingleFailureChecker {
 public:
  // The constructor remembers the arguments.
  SingleFailureChecker(const TestPartResultArray* results,
                       TestPartResult::Type type,
                       const string& substr);
  ~SingleFailureChecker();
 private:
  const TestPartResultArray* const results_;
  const TestPartResult::Type type_;
  const string substr_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
};

}  // namespace internal

}  // namespace testing

// A set of macros for testing Google Test assertions or code that's expected
// to generate Google Test fatal failures.  It verifies that the given
// statement will cause exactly one fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
//   - 'statement' cannot reference local non-static variables or
//     non-static members of the current object.
//   - 'statement' cannot return a value.
//   - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works.  The AcceptsMacroThatExpandsToUnprotectedComma test in
// gtest_unittest.cc will fail to compile if we do that.
#define EXPECT_FATAL_FAILURE(statement, substr) \
  do { \
    class GTestExpectFatalFailureHelper {\
     public:\
      static void Execute() { statement; }\
    };\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
      GTestExpectFatalFailureHelper::Execute();\
    }\
  } while (::testing::internal::AlwaysFalse())

#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
  do { \
    class GTestExpectFatalFailureHelper {\
     public:\
      static void Execute() { statement; }\
    };\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ALL_THREADS, &gtest_failures);\
      GTestExpectFatalFailureHelper::Execute();\
    }\
  } while (::testing::internal::AlwaysFalse())

// A macro for testing Google Test assertions or code that's expected to
// generate Google Test non-fatal failures.  It asserts that the given
// statement will cause exactly one non-fatal Google Test failure with 'substr'
// being part of the failure message.
//
// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
// affects and considers failures generated in the current thread and
// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
//
// 'statement' is allowed to reference local variables and members of
// the current object.
//
// The verification of the assertion is done correctly even when the statement
// throws an exception or aborts the current function.
//
// Known restrictions:
//   - You cannot stream a failure message to this macro.
//
// Note that even though the implementations of the following two
// macros are much alike, we cannot refactor them to use a common
// helper macro, due to some peculiarity in how the preprocessor
// works.  If we do that, the code won't compile when the user gives
// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
// expands to code containing an unprotected comma.  The
// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
// catches that.
//
// For the same reason, we have to write
//   if (::testing::internal::AlwaysTrue()) { statement; }
// instead of
//   GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
// to avoid an MSVC warning on unreachable code.
#define EXPECT_NONFATAL_FAILURE(statement, substr) \
  do {\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
        (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter:: \
          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
      if (::testing::internal::AlwaysTrue()) { statement; }\
    }\
  } while (::testing::internal::AlwaysFalse())

#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
  do {\
    ::testing::TestPartResultArray gtest_failures;\
    ::testing::internal::SingleFailureChecker gtest_checker(\
        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
        (substr));\
    {\
      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
          &gtest_failures);\
      if (::testing::internal::AlwaysTrue()) { statement; }\
    }\
  } while (::testing::internal::AlwaysFalse())

#endif  // GTEST_INCLUDE_GTEST_GTEST_SPI_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest-test-part.h
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//

#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_

#include <iosfwd>
#include <vector>
#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-string.h"

namespace testing {

// A copyable object representing the result of a test part (i.e. an
// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
//
// Don't inherit from TestPartResult as its destructor is not virtual.
class GTEST_API_ TestPartResult {
 public:
  // The possible outcomes of a test part (i.e. an assertion or an
  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
  enum Type {
    kSuccess,          // Succeeded.
    kNonFatalFailure,  // Failed but the test can continue.
    kFatalFailure      // Failed and the test should be terminated.
  };

  // C'tor.  TestPartResult does NOT have a default constructor.
  // Always use this constructor (with parameters) to create a
  // TestPartResult object.
  TestPartResult(Type a_type,
                 const char* a_file_name,
                 int a_line_number,
                 const char* a_message)
    : type_(a_type),
      file_name_(a_file_name == NULL ? "" : a_file_name),
      line_number_(a_line_number),
      summary_(ExtractSummary(a_message)),
      message_(a_message) {
  }

  // Gets the outcome of the test part.
  Type type() const {
    return type_;
  }

  // Gets the name of the source file where the test part took place, or
  // NULL if it's unknown.
  const char* file_name() const {
    return file_name_.empty() ? NULL : file_name_.c_str();
  }

  // Gets the line in the source file where the test part took place,
  // or -1 if it's unknown.
  int line_number() const {
    return line_number_;
  }

  // Gets the summary of the failure message.
  const char* summary() const {
    return summary_.c_str();
  }

  // Gets the message associated with the test part.
  const char* message() const {
    return message_.c_str();
  }

  // Returns true iff the test part passed.
  bool passed() const {
    return type_ == kSuccess;
  }

  // Returns true iff the test part failed.
  bool failed() const {
    return type_ != kSuccess;
  }

  // Returns true iff the test part non-fatally failed.
  bool nonfatally_failed() const {
    return type_ == kNonFatalFailure;
  }

  // Returns true iff the test part fatally failed.
  bool fatally_failed() const {
    return type_ == kFatalFailure;
  }

 private:
  Type type_;

  // Gets the summary of the failure message by omitting the stack
  // trace in it.
  static std::string ExtractSummary(const char* message);

  // The name of the source file where the test part took place, or
  // "" if the source file is unknown.
  std::string file_name_;
  // The line in the source file where the test part took place, or -1
  // if the line number is unknown.
  int line_number_;
  std::string summary_;  // The test failure summary.
  std::string message_;  // The test failure message.
};

// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result);

// An array of TestPartResult objects.
//
// Don't inherit from TestPartResultArray as its destructor is not
// virtual.
class GTEST_API_ TestPartResultArray {
 public:
  TestPartResultArray() {}

  // Appends the given TestPartResult to the array.
  void Append(const TestPartResult& result);

  // Returns the TestPartResult at the given index (0-based).
  const TestPartResult& GetTestPartResult(int index) const;

  // Returns the number of TestPartResult objects in the array.
  int size() const;

 private:
  std::vector<TestPartResult> array_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
};

// This interface knows how to report a test part result.
class TestPartResultReporterInterface {
 public:
  virtual ~TestPartResultReporterInterface() {}

  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
};

namespace internal {

// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
// statement generates new fatal failures. To do so it registers itself as the
// current test part result reporter. Besides checking if fatal failures were
// reported, it only delegates the reporting to the former result reporter.
// The original result reporter is restored in the destructor.
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
class GTEST_API_ HasNewFatalFailureHelper
  : public TestPartResultReporterInterface {
 public:
  HasNewFatalFailureHelper();
  virtual ~HasNewFatalFailureHelper();
  virtual void ReportTestPartResult(const TestPartResult& result);
  bool has_new_fatal_failure() const {
    return has_new_fatal_failure_;
  }
 private:
  bool has_new_fatal_failure_;
  TestPartResultReporterInterface* original_reporter_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
};

}  // namespace internal

}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest-typed-test.h
================================================
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_

// This header implements typed tests and type-parameterized tests.

// Typed (aka type-driven) tests repeat the same test for types in a
// list.  You must know which types you want to test with when writing
// typed tests. Here's how you do it:

#if 0

// First, define a fixture class template.  It should be parameterized
// by a type.  Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
 public:
  ...
  typedef std::list<T> List;
  static T shared_;
  T value_;
};

// Next, associate a list of types with the test case, which will be
// repeated for each type in the list.  The typedef is necessary for
// the macro to parse correctly.
typedef testing::Types<char, int, unsigned int> MyTypes;
TYPED_TEST_CASE(FooTest, MyTypes);

// If the type list contains only one type, you can write that type
// directly without Types<...>:
//   TYPED_TEST_CASE(FooTest, int);

// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
// tests for this test case as you want.
TYPED_TEST(FooTest, DoesBlah) {
  // Inside a test, refer to TypeParam to get the type parameter.
  // Since we are inside a derived class template, C++ requires use to
  // visit the members of FooTest via 'this'.
  TypeParam n = this->value_;

  // To visit static members of the fixture, add the TestFixture::
  // prefix.
  n += TestFixture::shared_;

  // To refer to typedefs in the fixture, add the "typename
  // TestFixture::" prefix.
  typename TestFixture::List values;
  values.push_back(n);
  ...
}

TYPED_TEST(FooTest, HasPropertyA) {
  ...
}

#endif  // 0

// Type-parameterized tests are abstract test patterns parameterized
// by a type.  Compared with typed tests, type-parameterized tests
// allow you to define the test pattern without knowing what the type
// parameters are.  The defined pattern can be instantiated with
// different types any number of times, in any number of translation
// units.
//
// If you are designing an interface or concept, you can define a
// suite of type-parameterized tests to verify properties that any
// valid implementation of the interface/concept should have.  Then,
// each implementation can easily instantiate the test suite to verify
// that it conforms to the requirements, without having to write
// similar tests repeatedly.  Here's an example:

#if 0

// First, define a fixture class template.  It should be parameterized
// by a type.  Remember to derive it from testing::Test.
template <typename T>
class FooTest : public testing::Test {
  ...
};

// Next, declare that you will define a type-parameterized test case
// (the _P suffix is for "parameterized" or "pattern", whichever you
// prefer):
TYPED_TEST_CASE_P(FooTest);

// Then, use TYPED_TEST_P() to define as many type-parameterized tests
// for this type-parameterized test case as you want.
TYPED_TEST_P(FooTest, DoesBlah) {
  // Inside a test, refer to TypeParam to get the type parameter.
  TypeParam n = 0;
  ...
}

TYPED_TEST_P(FooTest, HasPropertyA) {
  ...
}

// Now the tricky part: you need to register all test patterns before
// you can instantiate them.  The first argument of the macro is the
// test case name; the rest are the names of the tests in this test
// case.
REGISTER_TYPED_TEST_CASE_P(FooTest,
                           DoesBlah, HasPropertyA);

// Finally, you are free to instantiate the pattern with the types you
// want.  If you put the above code in a header file, you can #include
// it in multiple C++ source files and instantiate it multiple times.
//
// To distinguish different instances of the pattern, the first
// argument to the INSTANTIATE_* macro is a prefix that will be added
// to the actual test case name.  Remember to pick unique prefixes for
// different instances.
typedef testing::Types<char, int, unsigned int> MyTypes;
INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);

// If the type list contains only one type, you can write that type
// directly without Types<...>:
//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);

#endif  // 0

#include "gtest/internal/gtest-port.h"
#include "gtest/internal/gtest-type-util.h"

// Implements typed tests.

#if GTEST_HAS_TYPED_TEST

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the typedef for the type parameters of the
// given test case.
# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_

// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define TYPED_TEST_CASE(CaseName, Types) \
  typedef ::testing::internal::TypeList< Types >::type \
      GTEST_TYPE_PARAMS_(CaseName)

# define TYPED_TEST(CaseName, TestName) \
  template <typename gtest_TypeParam_> \
  class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
      : public CaseName<gtest_TypeParam_> { \
   private: \
    typedef CaseName<gtest_TypeParam_> TestFixture; \
    typedef gtest_TypeParam_ TypeParam; \
    virtual void TestBody(); \
  }; \
  bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
      ::testing::internal::TypeParameterizedTest< \
          CaseName, \
          ::testing::internal::TemplateSel< \
              GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
          GTEST_TYPE_PARAMS_(CaseName)>::Register(\
              "", #CaseName, #TestName, 0); \
  template <typename gtest_TypeParam_> \
  void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()

#endif  // GTEST_HAS_TYPED_TEST

// Implements type-parameterized tests.

#if GTEST_HAS_TYPED_TEST_P

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the namespace name that the type-parameterized tests for
// the given type-parameterized test case are defined in.  The exact
// name of the namespace is subject to change without notice.
# define GTEST_CASE_NAMESPACE_(TestCaseName) \
  gtest_case_##TestCaseName##_

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Expands to the name of the variable used to remember the names of
// the defined tests in the given test case.
# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
  gtest_typed_test_case_p_state_##TestCaseName##_

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
//
// Expands to the name of the variable used to remember the names of
// the registered tests in the given test case.
# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
  gtest_registered_test_names_##TestCaseName##_

// The variables defined in the type-parameterized test macros are
// static as typically these macros are used in a .h file that can be
// #included in multiple translation units linked together.
# define TYPED_TEST_CASE_P(CaseName) \
  static ::testing::internal::TypedTestCasePState \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)

# define TYPED_TEST_P(CaseName, TestName) \
  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
  template <typename gtest_TypeParam_> \
  class TestName : public CaseName<gtest_TypeParam_> { \
   private: \
    typedef CaseName<gtest_TypeParam_> TestFixture; \
    typedef gtest_TypeParam_ TypeParam; \
    virtual void TestBody(); \
  }; \
  static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
          __FILE__, __LINE__, #CaseName, #TestName); \
  } \
  template <typename gtest_TypeParam_> \
  void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()

# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
  typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
  } \
  static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
          __FILE__, __LINE__, #__VA_ARGS__)

// The 'Types' template argument below must have spaces around it
// since some compilers may choke on '>>' when passing a template
// instance (e.g. Types<int>)
# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
  bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
      ::testing::internal::TypeParameterizedTestCase<CaseName, \
          GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
          ::testing::internal::TypeList< Types >::type>::Register(\
              #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))

#endif  // GTEST_HAS_TYPED_TEST_P

#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines the public API for Google Test.  It should be
// included by any test program that uses Google Test.
//
// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
// leave some internal implementation details in this header file.
// They are clearly marked by comments like this:
//
//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
//
// Such code is NOT meant to be used by a user directly, and is subject
// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
// program!
//
// Acknowledgment: Google Test borrowed the idea of automatic test
// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
// easyUnit framework.

#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
#define GTEST_INCLUDE_GTEST_GTEST_H_

#include <limits>
#include <ostream>
#include <vector>

#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-string.h"
#include "gtest/gtest-death-test.h"
#include "gtest/gtest-message.h"
#include "gtest/gtest-param-test.h"
#include "gtest/gtest-printers.h"
#include "gtest/gtest_prod.h"
#include "gtest/gtest-test-part.h"
#include "gtest/gtest-typed-test.h"

// Depending on the platform, different string classes are available.
// On Linux, in addition to ::std::string, Google also makes use of
// class ::string, which has the same interface as ::std::string, but
// has a different implementation.
//
// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
// ::string is available AND is a distinct type to ::std::string, or
// define it to 0 to indicate otherwise.
//
// If the user's ::std::string and ::string are the same class due to
// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0.
//
// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined
// heuristically.

namespace testing {

// Declares the flags.

// This flag temporary enables the disabled tests.
GTEST_DECLARE_bool_(also_run_disabled_tests);

// This flag brings the debugger on an assertion failure.
GTEST_DECLARE_bool_(break_on_failure);

// This flag controls whether Google Test catches all test-thrown exceptions
// and logs them as failures.
GTEST_DECLARE_bool_(catch_exceptions);

// This flag enables using colors in terminal output. Available values are
// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
// to let Google Test decide.
GTEST_DECLARE_string_(color);

// This flag sets up the filter to select by name using a glob pattern
// the tests to run. If the filter is not given all tests are executed.
GTEST_DECLARE_string_(filter);

// This flag causes the Google Test to list tests. None of the tests listed
// are actually run if the flag is provided.
GTEST_DECLARE_bool_(list_tests);

// This flag controls whether Google Test emits a detailed XML report to a file
// in addition to its normal textual output.
GTEST_DECLARE_string_(output);

// This flags control whether Google Test prints the elapsed time for each
// test.
GTEST_DECLARE_bool_(print_time);

// This flag specifies the random number seed.
GTEST_DECLARE_int32_(random_seed);

// This flag sets how many times the tests are repeated. The default value
// is 1. If the value is -1 the tests are repeating forever.
GTEST_DECLARE_int32_(repeat);

// This flag controls whether Google Test includes Google Test internal
// stack frames in failure stack traces.
GTEST_DECLARE_bool_(show_internal_stack_frames);

// When this flag is specified, tests' order is randomized on every iteration.
GTEST_DECLARE_bool_(shuffle);

// This flag specifies the maximum number of stack frames to be
// printed in a failure message.
GTEST_DECLARE_int32_(stack_trace_depth);

// When this flag is specified, a failed assertion will throw an
// exception if exceptions are enabled, or exit the program with a
// non-zero code otherwise.
GTEST_DECLARE_bool_(throw_on_failure);

// When this flag is set with a "host:port" string, on supported
// platforms test results are streamed to the specified port on
// the specified host machine.
GTEST_DECLARE_string_(stream_result_to);

// The upper limit for valid stack trace depths.
const int kMaxStackTraceDepth = 100;

namespace internal {

class AssertHelper;
class DefaultGlobalTestPartResultReporter;
class ExecDeathTest;
class NoExecDeathTest;
class FinalSuccessChecker;
class GTestFlagSaver;
class StreamingListenerTest;
class TestResultAccessor;
class TestEventListenersAccessor;
class TestEventRepeater;
class UnitTestRecordPropertyTestHelper;
class WindowsDeathTest;
class UnitTestImpl* GetUnitTestImpl();
void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                    const std::string& message);

}  // namespace internal

// The friend relationship of some of these classes is cyclic.
// If we don't forward declare them the compiler might confuse the classes
// in friendship clauses with same named classes on the scope.
class Test;
class TestCase;
class TestInfo;
class UnitTest;

// A class for indicating whether an assertion was successful.  When
// the assertion wasn't successful, the AssertionResult object
// remembers a non-empty message that describes how it failed.
//
// To create an instance of this class, use one of the factory functions
// (AssertionSuccess() and AssertionFailure()).
//
// This class is useful for two purposes:
//   1. Defining predicate functions to be used with Boolean test assertions
//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
//   2. Defining predicate-format functions to be
//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
//
// For example, if you define IsEven predicate:
//
//   testing::AssertionResult IsEven(int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess();
//     else
//       return testing::AssertionFailure() << n << " is odd";
//   }
//
// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
// will print the message
//
//   Value of: IsEven(Fib(5))
//     Actual: false (5 is odd)
//   Expected: true
//
// instead of a more opaque
//
//   Value of: IsEven(Fib(5))
//     Actual: false
//   Expected: true
//
// in case IsEven is a simple Boolean predicate.
//
// If you expect your predicate to be reused and want to support informative
// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
// about half as often as positive ones in our tests), supply messages for
// both success and failure cases:
//
//   testing::AssertionResult IsEven(int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess() << n << " is even";
//     else
//       return testing::AssertionFailure() << n << " is odd";
//   }
//
// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
//
//   Value of: IsEven(Fib(6))
//     Actual: true (8 is even)
//   Expected: false
//
// NB: Predicates that support negative Boolean assertions have reduced
// performance in positive ones so be careful not to use them in tests
// that have lots (tens of thousands) of positive Boolean assertions.
//
// To use this class with EXPECT_PRED_FORMAT assertions such as:
//
//   // Verifies that Foo() returns an even number.
//   EXPECT_PRED_FORMAT1(IsEven, Foo());
//
// you need to define:
//
//   testing::AssertionResult IsEven(const char* expr, int n) {
//     if ((n % 2) == 0)
//       return testing::AssertionSuccess();
//     else
//       return testing::AssertionFailure()
//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
//   }
//
// If Foo() returns 5, you will see the following message:
//
//   Expected: Foo() is even
//     Actual: it's 5
//
class GTEST_API_ AssertionResult {
 public:
  // Copy constructor.
  // Used in EXPECT_TRUE/FALSE(assertion_result).
  AssertionResult(const AssertionResult& other);
  // Used in the EXPECT_TRUE/FALSE(bool_expression).
  explicit AssertionResult(bool success) : success_(success) {}

  // Returns true iff the assertion succeeded.
  operator bool() const {
    return success_;  // NOLINT
  }

  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
  AssertionResult operator!() const;

  // Returns the text streamed into this AssertionResult. Test assertions
  // use it when they fail (i.e., the predicate's outcome doesn't match the
  // assertion's expectation). When nothing has been streamed into the
  // object, returns an empty string.
  const char* message() const {
    return message_.get() != NULL ?  message_->c_str() : "";
  }
  // TODO(vladl@google.com): Remove this after making sure no clients use it.
  // Deprecated; please use message() instead.
  const char* failure_message() const {
    return message();
  }

  // Streams a custom failure message into this object.
  template <typename T> AssertionResult& operator<<(const T& value) {
    AppendMessage(Message() << value);
    return *this;
  }

  // Allows streaming basic output manipulators such as endl or flush into
  // this object.
  AssertionResult& operator<<(
    ::std::ostream & (*basic_manipulator)(::std::ostream& stream)) {
    AppendMessage(Message() << basic_manipulator);
    return *this;
  }

 private:
  // Appends the contents of message to message_.
  void AppendMessage(const Message& a_message) {
    if (message_.get() == NULL) {
      message_.reset(new ::std::string);
    }

    message_->append(a_message.GetString().c_str());
  }

  // Stores result of the assertion predicate.
  bool success_;
  // Stores the message describing the condition in case the expectation
  // construct is not satisfied with the predicate's outcome.
  // Referenced via a pointer to avoid taking too much stack frame space
  // with test assertions.
  internal::scoped_ptr< ::std::string> message_;

  GTEST_DISALLOW_ASSIGN_(AssertionResult);
};

// Makes a successful assertion result.
GTEST_API_ AssertionResult AssertionSuccess();

// Makes a failed assertion result.
GTEST_API_ AssertionResult AssertionFailure();

// Makes a failed assertion result with the given failure message.
// Deprecated; use AssertionFailure() << msg.
GTEST_API_ AssertionResult AssertionFailure(const Message& msg);

// The abstract class that all tests inherit from.
//
// In Google Test, a unit test program contains one or many TestCases, and
// each TestCase contains one or many Tests.
//
// When you define a test using the TEST macro, you don't need to
// explicitly derive from Test - the TEST macro automatically does
// this for you.
//
// The only time you derive from Test is when defining a test fixture
// to be used a TEST_F.  For example:
//
//   class FooTest : public testing::Test {
//    protected:
//     virtual void SetUp() { ... }
//     virtual void TearDown() { ... }
//     ...
//   };
//
//   TEST_F(FooTest, Bar) { ... }
//   TEST_F(FooTest, Baz) { ... }
//
// Test is not copyable.
class GTEST_API_ Test {
 public:
  friend class TestInfo;

  // Defines types for pointers to functions that set up and tear down
  // a test case.
  typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
  typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;

  // The d'tor is virtual as we intend to inherit from Test.
  virtual ~Test();

  // Sets up the stuff shared by all tests in this test case.
  //
  // Google Test will call Foo::SetUpTestCase() before running the first
  // test in test case Foo.  Hence a sub-class can define its own
  // SetUpTestCase() method to shadow the one defined in the super
  // class.
  static void SetUpTestCase() {}

  // Tears down the stuff shared by all tests in this test case.
  //
  // Google Test will call Foo::TearDownTestCase() after running the last
  // test in test case Foo.  Hence a sub-class can define its own
  // TearDownTestCase() method to shadow the one defined in the super
  // class.
  static void TearDownTestCase() {}

  // Returns true iff the current test has a fatal failure.
  static bool HasFatalFailure();

  // Returns true iff the current test has a non-fatal failure.
  static bool HasNonfatalFailure();

  // Returns true iff the current test has a (either fatal or
  // non-fatal) failure.
  static bool HasFailure() {
    return HasFatalFailure() || HasNonfatalFailure();
  }

  // Logs a property for the current test, test case, or for the entire
  // invocation of the test program when used outside of the context of a
  // test case.  Only the last value for a given key is remembered.  These
  // are public static so they can be called from utility functions that are
  // not members of the test fixture.  Calls to RecordProperty made during
  // lifespan of the test (from the moment its constructor starts to the
  // moment its destructor finishes) will be output in XML as attributes of
  // the <testcase> element.  Properties recorded from fixture's
  // SetUpTestCase or TearDownTestCase are logged as attributes of the
  // corresponding <testsuite> element.  Calls to RecordProperty made in the
  // global context (before or after invocation of RUN_ALL_TESTS and from
  // SetUp/TearDown method of Environment objects registered with Google
  // Test) will be output as attributes of the <testsuites> element.
  static void RecordProperty(const std::string& key, const std::string& value);
  static void RecordProperty(const std::string& key, int value);

 protected:
  // Creates a Test object.
  Test();

  // Sets up the test fixture.
  virtual void SetUp();

  // Tears down the test fixture.
  virtual void TearDown();

 private:
  // Returns true iff the current test has the same fixture class as
  // the first test in the current test case.
  static bool HasSameFixtureClass();

  // Runs the test after the test fixture has been set up.
  //
  // A sub-class must implement this to define the test logic.
  //
  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
  // Instead, use the TEST or TEST_F macro.
  virtual void TestBody() = 0;

  // Sets up, executes, and tears down the test.
  void Run();

  // Deletes self.  We deliberately pick an unusual name for this
  // internal method to avoid clashing with names used in user TESTs.
  void DeleteSelf_() {
    delete this;
  }

  // Uses a GTestFlagSaver to save and restore all Google Test flags.
  const internal::GTestFlagSaver* const gtest_flag_saver_;

  // Often a user mis-spells SetUp() as Setup() and spends a long time
  // wondering why it is never called by Google Test.  The declaration of
  // the following method is solely for catching such an error at
  // compile time:
  //
  //   - The return type is deliberately chosen to be not void, so it
  //   will be a conflict if a user declares void Setup() in his test
  //   fixture.
  //
  //   - This method is private, so it will be another compiler error
  //   if a user calls it from his test fixture.
  //
  // DO NOT OVERRIDE THIS FUNCTION.
  //
  // If you see an error about overriding the following function or
  // about it being private, you have mis-spelled SetUp() as Setup().
  struct Setup_should_be_spelled_SetUp {};
  virtual Setup_should_be_spelled_SetUp* Setup() {
    return NULL;
  }

  // We disallow copying Tests.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
};

typedef internal::TimeInMillis TimeInMillis;

// A copyable object representing a user specified test property which can be
// output as a key/value string pair.
//
// Don't inherit from TestProperty as its destructor is not virtual.
class TestProperty {
 public:
  // C'tor.  TestProperty does NOT have a default constructor.
  // Always use this constructor (with parameters) to create a
  // TestProperty object.
  TestProperty(const std::string& a_key, const std::string& a_value) :
    key_(a_key), value_(a_value) {
  }

  // Gets the user supplied key.
  const char* key() const {
    return key_.c_str();
  }

  // Gets the user supplied value.
  const char* value() const {
    return value_.c_str();
  }

  // Sets a new value, overriding the one supplied in the constructor.
  void SetValue(const std::string& new_value) {
    value_ = new_value;
  }

 private:
  // The key supplied by the user.
  std::string key_;
  // The value supplied by the user.
  std::string value_;
};

// The result of a single Test.  This includes a list of
// TestPartResults, a list of TestProperties, a count of how many
// death tests there are in the Test, and how much time it took to run
// the Test.
//
// TestResult is not copyable.
class GTEST_API_ TestResult {
 public:
  // Creates an empty TestResult.
  TestResult();

  // D'tor.  Do not inherit from TestResult.
  ~TestResult();

  // Gets the number of all test parts.  This is the sum of the number
  // of successful test parts and the number of failed test parts.
  int total_part_count() const;

  // Returns the number of the test properties.
  int test_property_count() const;

  // Returns true iff the test passed (i.e. no test part failed).
  bool Passed() const {
    return !Failed();
  }

  // Returns true iff the test failed.
  bool Failed() const;

  // Returns true iff the test fatally failed.
  bool HasFatalFailure() const;

  // Returns true iff the test has a non-fatal failure.
  bool HasNonfatalFailure() const;

  // Returns the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const {
    return elapsed_time_;
  }

  // Returns the i-th test part result among all the results. i can range
  // from 0 to test_property_count() - 1. If i is not in that range, aborts
  // the program.
  const TestPartResult& GetTestPartResult(int i) const;

  // Returns the i-th test property. i can range from 0 to
  // test_property_count() - 1. If i is not in that range, aborts the
  // program.
  const TestProperty& GetTestProperty(int i) const;

 private:
  friend class TestInfo;
  friend class TestCase;
  friend class UnitTest;
  friend class internal::DefaultGlobalTestPartResultReporter;
  friend class internal::ExecDeathTest;
  friend class internal::TestResultAccessor;
  friend class internal::UnitTestImpl;
  friend class internal::WindowsDeathTest;

  // Gets the vector of TestPartResults.
  const std::vector<TestPartResult>& test_part_results() const {
    return test_part_results_;
  }

  // Gets the vector of TestProperties.
  const std::vector<TestProperty>& test_properties() const {
    return test_properties_;
  }

  // Sets the elapsed time.
  void set_elapsed_time(TimeInMillis elapsed) {
    elapsed_time_ = elapsed;
  }

  // Adds a test property to the list. The property is validated and may add
  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
  // key names). If a property is already recorded for the same key, the
  // value will be updated, rather than storing multiple values for the same
  // key.  xml_element specifies the element for which the property is being
  // recorded and is used for validation.
  void RecordProperty(const std::string& xml_element,
                      const TestProperty& test_property);

  // Adds a failure if the key is a reserved attribute of Google Test
  // testcase tags.  Returns true if the property is valid.
  // TODO(russr): Validate attribute names are legal and human readable.
  static bool ValidateTestProperty(const std::string& xml_element,
                                   const TestProperty& test_property);

  // Adds a test part result to the list.
  void AddTestPartResult(const TestPartResult& test_part_result);

  // Returns the death test count.
  int death_test_count() const {
    return death_test_count_;
  }

  // Increments the death test count, returning the new count.
  int increment_death_test_count() {
    return ++death_test_count_;
  }

  // Clears the test part results.
  void ClearTestPartResults();

  // Clears the object.
  void Clear();

  // Protects mutable state of the property vector and of owned
  // properties, whose values may be updated.
  internal::Mutex test_properites_mutex_;

  // The vector of TestPartResults
  std::vector<TestPartResult> test_part_results_;
  // The vector of TestProperties
  std::vector<TestProperty> test_properties_;
  // Running count of death tests.
  int death_test_count_;
  // The elapsed time, in milliseconds.
  TimeInMillis elapsed_time_;

  // We disallow copying TestResult.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
};  // class TestResult

// A TestInfo object stores the following information about a test:
//
//   Test case name
//   Test name
//   Whether the test should be run
//   A function pointer that creates the test object when invoked
//   Test result
//
// The constructor of TestInfo registers itself with the UnitTest
// singleton such that the RUN_ALL_TESTS() macro knows which tests to
// run.
class GTEST_API_ TestInfo {
 public:
  // Destructs a TestInfo object.  This function is not virtual, so
  // don't inherit from TestInfo.
  ~TestInfo();

  // Returns the test case name.
  const char* test_case_name() const {
    return test_case_name_.c_str();
  }

  // Returns the test name.
  const char* name() const {
    return name_.c_str();
  }

  // Returns the name of the parameter type, or NULL if this is not a typed
  // or a type-parameterized test.
  const char* type_param() const {
    if (type_param_.get() != NULL) {
      return type_param_->c_str();
    }

    return NULL;
  }

  // Returns the text representation of the value parameter, or NULL if this
  // is not a value-parameterized test.
  const char* value_param() const {
    if (value_param_.get() != NULL) {
      return value_param_->c_str();
    }

    return NULL;
  }

  // Returns true if this test should run, that is if the test is not
  // disabled (or it is disabled but the also_run_disabled_tests flag has
  // been specified) and its full name matches the user-specified filter.
  //
  // Google Test allows the user to filter the tests by their full names.
  // The full name of a test Bar in test case Foo is defined as
  // "Foo.Bar".  Only the tests that match the filter will run.
  //
  // A filter is a colon-separated list of glob (not regex) patterns,
  // optionally followed by a '-' and a colon-separated list of
  // negative patterns (tests to exclude).  A test is run if it
  // matches one of the positive patterns and does not match any of
  // the negative patterns.
  //
  // For example, *A*:Foo.* is a filter that matches any string that
  // contains the character 'A' or starts with "Foo.".
  bool should_run() const {
    return should_run_;
  }

  // Returns true iff this test will appear in the XML report.
  bool is_reportable() const {
    // For now, the XML report includes all tests matching the filter.
    // In the future, we may trim tests that are excluded because of
    // sharding.
    return matches_filter_;
  }

  // Returns the result of the test.
  const TestResult* result() const {
    return &result_;
  }

 private:
#if GTEST_HAS_DEATH_TEST
  friend class internal::DefaultDeathTestFactory;
#endif  // GTEST_HAS_DEATH_TEST
  friend class Test;
  friend class TestCase;
  friend class internal::UnitTestImpl;
  friend class internal::StreamingListenerTest;
  friend TestInfo* internal::MakeAndRegisterTestInfo(
    const char* test_case_name,
    const char* name,
    const char* type_param,
    const char* value_param,
    internal::TypeId fixture_class_id,
    Test::SetUpTestCaseFunc set_up_tc,
    Test::TearDownTestCaseFunc tear_down_tc,
    internal::TestFactoryBase* factory);

  // Constructs a TestInfo object. The newly constructed instance assumes
  // ownership of the factory object.
  TestInfo(const std::string& test_case_name,
           const std::string& name,
           const char* a_type_param,   // NULL if not a type-parameterized test
           const char* a_value_param,  // NULL if not a value-parameterized test
           internal::TypeId fixture_class_id,
           internal::TestFactoryBase* factory);

  // Increments the number of death tests encountered in this test so
  // far.
  int increment_death_test_count() {
    return result_.increment_death_test_count();
  }

  // Creates the test object, runs it, records its result, and then
  // deletes it.
  void Run();

  static void ClearTestResult(TestInfo* test_info) {
    test_info->result_.Clear();
  }

  // These fields are immutable properties of the test.
  const std::string test_case_name_;     // Test case name
  const std::string name_;               // Test name
  // Name of the parameter type, or NULL if this is not a typed or a
  // type-parameterized test.
  const internal::scoped_ptr<const ::std::string> type_param_;
  // Text representation of the value parameter, or NULL if this is not a
  // value-parameterized test.
  const internal::scoped_ptr<const ::std::string> value_param_;
  const internal::TypeId fixture_class_id_;   // ID of the test fixture class
  bool should_run_;                 // True iff this test should run
  bool is_disabled_;                // True iff this test is disabled
  bool matches_filter_;             // True if this test matches the
  // user-specified filter.
  internal::TestFactoryBase* const factory_;  // The factory that creates
  // the test object

  // This field is mutable and needs to be reset before running the
  // test for the second time.
  TestResult result_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
};

// A test case, which consists of a vector of TestInfos.
//
// TestCase is not copyable.
class GTEST_API_ TestCase {
 public:
  // Creates a TestCase with the given name.
  //
  // TestCase does NOT have a default constructor.  Always use this
  // constructor to create a TestCase object.
  //
  // Arguments:
  //
  //   name:         name of the test case
  //   a_type_param: the name of the test's type parameter, or NULL if
  //                 this is not a type-parameterized test.
  //   set_up_tc:    pointer to the function that sets up the test case
  //   tear_down_tc: pointer to the function that tears down the test case
  TestCase(const char* name, const char* a_type_param,
           Test::SetUpTestCaseFunc set_up_tc,
           Test::TearDownTestCaseFunc tear_down_tc);

  // Destructor of TestCase.
  virtual ~TestCase();

  // Gets the name of the TestCase.
  const char* name() const {
    return name_.c_str();
  }

  // Returns the name of the parameter type, or NULL if this is not a
  // type-parameterized test case.
  const char* type_param() const {
    if (type_param_.get() != NULL) {
      return type_param_->c_str();
    }

    return NULL;
  }

  // Returns true if any test in this test case should run.
  bool should_run() const {
    return should_run_;
  }

  // Gets the number of successful tests in this test case.
  int successful_test_count() const;

  // Gets the number of failed tests in this test case.
  int failed_test_count() const;

  // Gets the number of disabled tests that will be reported in the XML report.
  int reportable_disabled_test_count() const;

  // Gets the number of disabled tests in this test case.
  int disabled_test_count() const;

  // Gets the number of tests to be printed in the XML report.
  int reportable_test_count() const;

  // Get the number of tests in this test case that should run.
  int test_to_run_count() const;

  // Gets the number of all tests in this test case.
  int total_test_count() const;

  // Returns true iff the test case passed.
  bool Passed() const {
    return !Failed();
  }

  // Returns true iff the test case failed.
  bool Failed() const {
    return failed_test_count() > 0;
  }

  // Returns the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const {
    return elapsed_time_;
  }

  // Returns the i-th test among all the tests. i can range from 0 to
  // total_test_count() - 1. If i is not in that range, returns NULL.
  const TestInfo* GetTestInfo(int i) const;

  // Returns the TestResult that holds test properties recorded during
  // execution of SetUpTestCase and TearDownTestCase.
  const TestResult& ad_hoc_test_result() const {
    return ad_hoc_test_result_;
  }

 private:
  friend class Test;
  friend class internal::UnitTestImpl;

  // Gets the (mutable) vector of TestInfos in this TestCase.
  std::vector<TestInfo*>& test_info_list() {
    return test_info_list_;
  }

  // Gets the (immutable) vector of TestInfos in this TestCase.
  const std::vector<TestInfo*>& test_info_list() const {
    return test_info_list_;
  }

  // Returns the i-th test among all the tests. i can range from 0 to
  // total_test_count() - 1. If i is not in that range, returns NULL.
  TestInfo* GetMutableTestInfo(int i);

  // Sets the should_run member.
  void set_should_run(bool should) {
    should_run_ = should;
  }

  // Adds a TestInfo to this test case.  Will delete the TestInfo upon
  // destruction of the TestCase object.
  void AddTestInfo(TestInfo* test_info);

  // Clears the results of all tests in this test case.
  void ClearResult();

  // Clears the results of all tests in the given test case.
  static void ClearTestCaseResult(TestCase* test_case) {
    test_case->ClearResult();
  }

  // Runs every test in this TestCase.
  void Run();

  // Runs SetUpTestCase() for this TestCase.  This wrapper is needed
  // for catching exceptions thrown from SetUpTestCase().
  void RunSetUpTestCase() {
    (*set_up_tc_)();
  }

  // Runs TearDownTestCase() for this TestCase.  This wrapper is
  // needed for catching exceptions thrown from TearDownTestCase().
  void RunTearDownTestCase() {
    (*tear_down_tc_)();
  }

  // Returns true iff test passed.
  static bool TestPassed(const TestInfo* test_info) {
    return test_info->should_run() && test_info->result()->Passed();
  }

  // Returns true iff test failed.
  static bool TestFailed(const TestInfo* test_info) {
    return test_info->should_run() && test_info->result()->Failed();
  }

  // Returns true iff the test is disabled and will be reported in the XML
  // report.
  static bool TestReportableDisabled(const TestInfo* test_info) {
    return test_info->is_reportable() && test_info->is_disabled_;
  }

  // Returns true iff test is disabled.
  static bool TestDisabled(const TestInfo* test_info) {
    return test_info->is_disabled_;
  }

  // Returns true iff this test will appear in the XML report.
  static bool TestReportable(const TestInfo* test_info) {
    return test_info->is_reportable();
  }

  // Returns true if the given test should run.
  static bool ShouldRunTest(const TestInfo* test_info) {
    return test_info->should_run();
  }

  // Shuffles the tests in this test case.
  void ShuffleTests(internal::Random* random);

  // Restores the test order to before the first shuffle.
  void UnshuffleTests();

  // Name of the test case.
  std::string name_;
  // Name of the parameter type, or NULL if this is not a typed or a
  // type-parameterized test.
  const internal::scoped_ptr<const ::std::string> type_param_;
  // The vector of TestInfos in their original order.  It owns the
  // elements in the vector.
  std::vector<TestInfo*> test_info_list_;
  // Provides a level of indirection for the test list to allow easy
  // shuffling and restoring the test order.  The i-th element in this
  // vector is the index of the i-th test in the shuffled test list.
  std::vector<int> test_indices_;
  // Pointer to the function that sets up the test case.
  Test::SetUpTestCaseFunc set_up_tc_;
  // Pointer to the function that tears down the test case.
  Test::TearDownTestCaseFunc tear_down_tc_;
  // True iff any test in this test case should run.
  bool should_run_;
  // Elapsed time, in milliseconds.
  TimeInMillis elapsed_time_;
  // Holds test properties recorded during execution of SetUpTestCase and
  // TearDownTestCase.
  TestResult ad_hoc_test_result_;

  // We disallow copying TestCases.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
};

// An Environment object is capable of setting up and tearing down an
// environment.  The user should subclass this to define his own
// environment(s).
//
// An Environment object does the set-up and tear-down in virtual
// methods SetUp() and TearDown() instead of the constructor and the
// destructor, as:
//
//   1. You cannot safely throw from a destructor.  This is a problem
//      as in some cases Google Test is used where exceptions are enabled, and
//      we may want to implement ASSERT_* using exceptions where they are
//      available.
//   2. You cannot use ASSERT_* directly in a constructor or
//      destructor.
class Environment {
 public:
  // The d'tor is virtual as we need to subclass Environment.
  virtual ~Environment() {}

  // Override this to define how to set up the environment.
  virtual void SetUp() {}

  // Override this to define how to tear down the environment.
  virtual void TearDown() {}
 private:
  // If you see an error about overriding the following function or
  // about it being private, you have mis-spelled SetUp() as Setup().
  struct Setup_should_be_spelled_SetUp {};
  virtual Setup_should_be_spelled_SetUp* Setup() {
    return NULL;
  }
};

// The interface for tracing execution of tests. The methods are organized in
// the order the corresponding events are fired.
class TestEventListener {
 public:
  virtual ~TestEventListener() {}

  // Fired before any test activity starts.
  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;

  // Fired before each iteration of tests starts.  There may be more than
  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
  // index, starting from 0.
  virtual void OnTestIterationStart(const UnitTest& unit_test,
                                    int iteration) = 0;

  // Fired before environment set-up for each iteration of tests starts.
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;

  // Fired after environment set-up for each iteration of tests ends.
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;

  // Fired before the test case starts.
  virtual void OnTestCaseStart(const TestCase& test_case) = 0;

  // Fired before the test starts.
  virtual void OnTestStart(const TestInfo& test_info) = 0;

  // Fired after a failed assertion or a SUCCEED() invocation.
  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;

  // Fired after the test ends.
  virtual void OnTestEnd(const TestInfo& test_info) = 0;

  // Fired after the test case ends.
  virtual void OnTestCaseEnd(const TestCase& test_case) = 0;

  // Fired before environment tear-down for each iteration of tests starts.
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;

  // Fired after environment tear-down for each iteration of tests ends.
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;

  // Fired after each iteration of tests finishes.
  virtual void OnTestIterationEnd(const UnitTest& unit_test,
                                  int iteration) = 0;

  // Fired after all test activities have ended.
  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
};

// The convenience class for users who need to override just one or two
// methods and are not concerned that a possible change to a signature of
// the methods they override will not be caught during the build.  For
// comments about each method please see the definition of TestEventListener
// above.
class EmptyTestEventListener : public TestEventListener {
 public:
  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
                                    int /*iteration*/) {}
  virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
  virtual void OnTestStart(const TestInfo& /*test_info*/) {}
  virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
  virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
  virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
                                  int /*iteration*/) {}
  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
};

// TestEventListeners lets users add listeners to track events in Google Test.
class GTEST_API_ TestEventListeners {
 public:
  TestEventListeners();
  ~TestEventListeners();

  // Appends an event listener to the end of the list. Google Test assumes
  // the ownership of the listener (i.e. it will delete the listener when
  // the test program finishes).
  void Append(TestEventListener* listener);

  // Removes the given event listener from the list and returns it.  It then
  // becomes the caller's responsibility to delete the listener. Returns
  // NULL if the listener is not found in the list.
  TestEventListener* Release(TestEventListener* listener);

  // Returns the standard listener responsible for the default console
  // output.  Can be removed from the listeners list to shut down default
  // console output.  Note that removing this object from the listener list
  // with Release transfers its ownership to the caller and makes this
  // function return NULL the next time.
  TestEventListener* default_result_printer() const {
    return default_result_printer_;
  }

  // Returns the standard listener responsible for the default XML output
  // controlled by the --gtest_output=xml flag.  Can be removed from the
  // listeners list by users who want to shut down the default XML output
  // controlled by this flag and substitute it with custom one.  Note that
  // removing this object from the listener list with Release transfers its
  // ownership to the caller and makes this function return NULL the next
  // time.
  TestEventListener* default_xml_generator() const {
    return default_xml_generator_;
  }

 private:
  friend class TestCase;
  friend class TestInfo;
  friend class internal::DefaultGlobalTestPartResultReporter;
  friend class internal::NoExecDeathTest;
  friend class internal::TestEventListenersAccessor;
  friend class internal::UnitTestImpl;

  // Returns repeater that broadcasts the TestEventListener events to all
  // subscribers.
  TestEventListener* repeater();

  // Sets the default_result_printer attribute to the provided listener.
  // The listener is also added to the listener list and previous
  // default_result_printer is removed from it and deleted. The listener can
  // also be NULL in which case it will not be added to the list. Does
  // nothing if the previous and the current listener objects are the same.
  void SetDefaultResultPrinter(TestEventListener* listener);

  // Sets the default_xml_generator attribute to the provided listener.  The
  // listener is also added to the listener list and previous
  // default_xml_generator is removed from it and deleted. The listener can
  // also be NULL in which case it will not be added to the list. Does
  // nothing if the previous and the current listener objects are the same.
  void SetDefaultXmlGenerator(TestEventListener* listener);

  // Controls whether events will be forwarded by the repeater to the
  // listeners in the list.
  bool EventForwardingEnabled() const;
  void SuppressEventForwarding();

  // The actual list of listeners.
  internal::TestEventRepeater* repeater_;
  // Listener responsible for the standard result output.
  TestEventListener* default_result_printer_;
  // Listener responsible for the creation of the XML output file.
  TestEventListener* default_xml_generator_;

  // We disallow copying TestEventListeners.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
};

// A UnitTest consists of a vector of TestCases.
//
// This is a singleton class.  The only instance of UnitTest is
// created when UnitTest::GetInstance() is first called.  This
// instance is never deleted.
//
// UnitTest is not copyable.
//
// This class is thread-safe as long as the methods are called
// according to their specification.
class GTEST_API_ UnitTest {
 public:
  // Gets the singleton UnitTest object.  The first time this method
  // is called, a UnitTest object is constructed and returned.
  // Consecutive calls will return the same object.
  static UnitTest* GetInstance();

  // Runs all tests in this UnitTest object and prints the result.
  // Returns 0 if successful, or 1 otherwise.
  //
  // This method can only be called from the main thread.
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  int Run() GTEST_MUST_USE_RESULT_;

  // Returns the working directory when the first TEST() or TEST_F()
  // was executed.  The UnitTest object owns the string.
  const char* original_working_dir() const;

  // Returns the TestCase object for the test that's currently running,
  // or NULL if no test is running.
  const TestCase* current_test_case() const
  GTEST_LOCK_EXCLUDED_(mutex_);

  // Returns the TestInfo object for the test that's currently running,
  // or NULL if no test is running.
  const TestInfo* current_test_info() const
  GTEST_LOCK_EXCLUDED_(mutex_);

  // Returns the random seed used at the start of the current test run.
  int random_seed() const;

#if GTEST_HAS_PARAM_TEST
  // Returns the ParameterizedTestCaseRegistry object used to keep track of
  // value-parameterized tests and instantiate and register them.
  //
  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
  internal::ParameterizedTestCaseRegistry& parameterized_test_registry()
  GTEST_LOCK_EXCLUDED_(mutex_);
#endif  // GTEST_HAS_PARAM_TEST

  // Gets the number of successful test cases.
  int successful_test_case_count() const;

  // Gets the number of failed test cases.
  int failed_test_case_count() const;

  // Gets the number of all test cases.
  int total_test_case_count() const;

  // Gets the number of all test cases that contain at least one test
  // that should run.
  int test_case_to_run_count() const;

  // Gets the number of successful tests.
  int successful_test_count() const;

  // Gets the number of failed tests.
  int failed_test_count() const;

  // Gets the number of disabled tests that will be reported in the XML report.
  int reportable_disabled_test_count() const;

  // Gets the number of disabled tests.
  int disabled_test_count() const;

  // Gets the number of tests to be printed in the XML report.
  int reportable_test_count() const;

  // Gets the number of all tests.
  int total_test_count() const;

  // Gets the number of tests that should run.
  int test_to_run_count() const;

  // Gets the time of the test program start, in ms from the start of the
  // UNIX epoch.
  TimeInMillis start_timestamp() const;

  // Gets the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const;

  // Returns true iff the unit test passed (i.e. all test cases passed).
  bool Passed() const;

  // Returns true iff the unit test failed (i.e. some test case failed
  // or something outside of all tests failed).
  bool Failed() const;

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  const TestCase* GetTestCase(int i) const;

  // Returns the TestResult containing information on test failures and
  // properties logged outside of individual test cases.
  const TestResult& ad_hoc_test_result() const;

  // Returns the list of event listeners that can be used to track events
  // inside Google Test.
  TestEventListeners& listeners();

 private:
  // Registers and returns a global test environment.  When a test
  // program is run, all global test environments will be set-up in
  // the order they were registered.  After all tests in the program
  // have finished, all global test environments will be torn-down in
  // the *reverse* order they were registered.
  //
  // The UnitTest object takes ownership of the given environment.
  //
  // This method can only be called from the main thread.
  Environment* AddEnvironment(Environment* env);

  // Adds a TestPartResult to the current TestResult object.  All
  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
  // eventually call this to report their results.  The user code
  // should use the assertion macros instead of calling this directly.
  void AddTestPartResult(TestPartResult::Type result_type,
                         const char* file_name,
                         int line_number,
                         const std::string& message,
                         const std::string& os_stack_trace)
  GTEST_LOCK_EXCLUDED_(mutex_);

  // Adds a TestProperty to the current TestResult object when invoked from
  // inside a test, to current TestCase's ad_hoc_test_result_ when invoked
  // from SetUpTestCase or TearDownTestCase, or to the global property set
  // when invoked elsewhere.  If the result already contains a property with
  // the same key, the value will be updated.
  void RecordProperty(const std::string& key, const std::string& value);

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  TestCase* GetMutableTestCase(int i);

  // Accessors for the implementation object.
  internal::UnitTestImpl* impl() {
    return impl_;
  }
  const internal::UnitTestImpl* impl() const {
    return impl_;
  }

  // These classes and funcions are friends as they need to access private
  // members of UnitTest.
  friend class Test;
  friend class internal::AssertHelper;
  friend class internal::ScopedTrace;
  friend class internal::StreamingListenerTest;
  friend class internal::UnitTestRecordPropertyTestHelper;
  friend Environment* AddGlobalTestEnvironment(Environment* env);
  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
  friend void internal::ReportFailureInUnknownLocation(
    TestPartResult::Type result_type,
    const std::string& message);

  // Creates an empty UnitTest.
  UnitTest();

  // D'tor
  virtual ~UnitTest();

  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
  // Google Test trace stack.
  void PushGTestTrace(const internal::TraceInfo& trace)
  GTEST_LOCK_EXCLUDED_(mutex_);

  // Pops a trace from the per-thread Google Test trace stack.
  void PopGTestTrace()
  GTEST_LOCK_EXCLUDED_(mutex_);

  // Protects mutable state in *impl_.  This is mutable as some const
  // methods need to lock it too.
  mutable internal::Mutex mutex_;

  // Opaque implementation object.  This field is never changed once
  // the object is constructed.  We don't mark it as const here, as
  // doing so will cause a warning in the constructor of UnitTest.
  // Mutable state in *impl_ is protected by mutex_.
  internal::UnitTestImpl* impl_;

  // We disallow copying UnitTest.
  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
};

// A convenient wrapper for adding an environment for the test
// program.
//
// You should call this before RUN_ALL_TESTS() is called, probably in
// main().  If you use gtest_main, you need to call this before main()
// starts for it to take effect.  For example, you can define a global
// variable like this:
//
//   testing::Environment* const foo_env =
//       testing::AddGlobalTestEnvironment(new FooEnvironment);
//
// However, we strongly recommend you to write your own main() and
// call AddGlobalTestEnvironment() there, as relying on initialization
// of global variables makes the code harder to read and may cause
// problems when you register multiple environments from different
// translation units and the environments have dependencies among them
// (remember that the compiler doesn't guarantee the order in which
// global variables from different translation units are initialized).
inline Environment* AddGlobalTestEnvironment(Environment* env) {
  return UnitTest::GetInstance()->AddEnvironment(env);
}

// Initializes Google Test.  This must be called before calling
// RUN_ALL_TESTS().  In particular, it parses a command line for the
// flags that Google Test recognizes.  Whenever a Google Test flag is
// seen, it is removed from argv, and *argc is decremented.
//
// No value is returned.  Instead, the Google Test flag variables are
// updated.
//
// Calling the function for the second time has no user-visible effect.
GTEST_API_ void InitGoogleTest(int* argc, char** argv);

// This overloaded version can be used in Windows programs compiled in
// UNICODE mode.
GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);

namespace internal {

// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
// value of type ToPrint that is an operand of a comparison assertion
// (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
// the comparison, and is used to help determine the best way to
// format the value.  In particular, when the value is a C string
// (char pointer) and the other operand is an STL string object, we
// want to format the C string as a string, since we know it is
// compared by value with the string object.  If the value is a char
// pointer but the other operand is not an STL string object, we don't
// know whether the pointer is supposed to point to a NUL-terminated
// string, and thus want to print it as a pointer to be safe.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.

// The default case.
template <typename ToPrint, typename OtherOperand>
class FormatForComparison {
 public:
  static ::std::string Format(const ToPrint& value) {
    return ::testing::PrintToString(value);
  }
};

// Array.
template <typename ToPrint, size_t N, typename OtherOperand>
class FormatForComparison<ToPrint[N], OtherOperand> {
 public:
  static ::std::string Format(const ToPrint* value) {
    return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
  }
};

// By default, print C string as pointers to be safe, as we don't know
// whether they actually point to a NUL-terminated string.

#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                \
  template <typename OtherOperand>                                      \
  class FormatForComparison<CharType*, OtherOperand> {                  \
   public:                                                              \
    static ::std::string Format(CharType* value) {                      \
      return ::testing::PrintToString(static_cast<const void*>(value)); \
    }                                                                   \
  }

GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);

#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_

// If a C string is compared with an STL string object, we know it's meant
// to point to a NUL-terminated string, and thus can print it as a string.

#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
  template <>                                                           \
  class FormatForComparison<CharType*, OtherStringType> {               \
   public:                                                              \
    static ::std::string Format(CharType* value) {                      \
      return ::testing::PrintToString(value);                           \
    }                                                                   \
  }

GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);

#if GTEST_HAS_GLOBAL_STRING
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string);
#endif

#if GTEST_HAS_GLOBAL_WSTRING
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring);
#endif

#if GTEST_HAS_STD_WSTRING
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
#endif

#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_

// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
// operand to be used in a failure message.  The type (but not value)
// of the other operand may affect the format.  This allows us to
// print a char* as a raw pointer when it is compared against another
// char* or void*, and print it as a C string when it is compared
// against an std::string object, for example.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
template <typename T1, typename T2>
std::string FormatForComparisonFailureMessage(
  const T1& value, const T2& /* other_operand */) {
  return FormatForComparison<T1, T2>::Format(value);
}

// The helper function for {ASSERT|EXPECT}_EQ.
template <typename T1, typename T2>
AssertionResult CmpHelperEQ(const char* expected_expression,
                            const char* actual_expression,
                            const T1& expected,
                            const T2& actual) {
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4389)  // Temporarily disables warning on
  // signed/unsigned mismatch.
#endif

  if (expected == actual) {
    return AssertionSuccess();
  }

#ifdef _MSC_VER
# pragma warning(pop)          // Restores the warning state.
#endif

  return EqFailure(expected_expression,
                   actual_expression,
                   FormatForComparisonFailureMessage(expected, actual),
                   FormatForComparisonFailureMessage(actual, expected),
                   false);
}

// With this overloaded version, we allow anonymous enums to be used
// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
// can be implicitly cast to BiggestInt.
GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression,
                                       const char* actual_expression,
                                       BiggestInt expected,
                                       BiggestInt actual);

// The helper class for {ASSERT|EXPECT}_EQ.  The template argument
// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
// is a null pointer literal.  The following default implementation is
// for lhs_is_null_literal being false.
template <bool lhs_is_null_literal>
class EqHelper {
 public:
  // This templatized version is for the general case.
  template <typename T1, typename T2>
  static AssertionResult Compare(const char* expected_expression,
                                 const char* actual_expression,
                                 const T1& expected,
                                 const T2& actual) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }

  // With this overloaded version, we allow anonymous enums to be used
  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
  // enums can be implicitly cast to BiggestInt.
  //
  // Even though its body looks the same as the above version, we
  // cannot merge the two, as it will make anonymous enums unhappy.
  static AssertionResult Compare(const char* expected_expression,
                                 const char* actual_expression,
                                 BiggestInt expected,
                                 BiggestInt actual) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }
};

// This specialization is used when the first argument to ASSERT_EQ()
// is a null pointer literal, like NULL, false, or 0.
template <>
class EqHelper<true> {
 public:
  // We define two overloaded versions of Compare().  The first
  // version will be picked when the second argument to ASSERT_EQ() is
  // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
  // EXPECT_EQ(false, a_bool).
  template <typename T1, typename T2>
  static AssertionResult Compare(
    const char* expected_expression,
    const char* actual_expression,
    const T1& expected,
    const T2& actual,
    // The following line prevents this overload from being considered if T2
    // is not a pointer type.  We need this because ASSERT_EQ(NULL, my_ptr)
    // expands to Compare("", "", NULL, my_ptr), which requires a conversion
    // to match the Secret* in the other overload, which would otherwise make
    // this template match better.
    typename EnableIf < !is_pointer<T2>::value >::type* = 0) {
    return CmpHelperEQ(expected_expression, actual_expression, expected,
                       actual);
  }

  // This version will be picked when the second argument to ASSERT_EQ() is a
  // pointer, e.g. ASSERT_EQ(NULL, a_pointer).
  template <typename T>
  static AssertionResult Compare(
    const char* expected_expression,
    const char* actual_expression,
    // We used to have a second template parameter instead of Secret*.  That
    // template parameter would deduce to 'long', making this a better match
    // than the first overload even without the first overload's EnableIf.
    // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to
    // non-pointer argument" (even a deduced integral argument), so the old
    // implementation caused warnings in user code.
    Secret* /* expected (NULL) */,
    T* actual) {
    // We already know that 'expected' is a null pointer.
    return CmpHelperEQ(expected_expression, actual_expression,
                       static_cast<T*>(NULL), actual);
  }
};

// A macro for implementing the helper functions needed to implement
// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
// of similar code.
//
// For each templatized helper function, we also define an overloaded
// version for BiggestInt in order to reduce code bloat and allow
// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
// with gcc 4.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
template <typename T1, typename T2>\
AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
                                   const T1& val1, const T2& val2) {\
  if (val1 op val2) {\
    return AssertionSuccess();\
  } else {\
    return AssertionFailure() \
        << "Expected: (" << expr1 << ") " #op " (" << expr2\
        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
  }\
}\
GTEST_API_ AssertionResult CmpHelper##op_name(\
    const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)

// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.

// Implements the helper function for {ASSERT|EXPECT}_NE
GTEST_IMPL_CMP_HELPER_(NE, != );
// Implements the helper function for {ASSERT|EXPECT}_LE
GTEST_IMPL_CMP_HELPER_(LE, <= );
// Implements the helper function for {ASSERT|EXPECT}_LT
GTEST_IMPL_CMP_HELPER_(LT, < );
// Implements the helper function for {ASSERT|EXPECT}_GE
GTEST_IMPL_CMP_HELPER_(GE, >= );
// Implements the helper function for {ASSERT|EXPECT}_GT
GTEST_IMPL_CMP_HELPER_(GT, > );

#undef GTEST_IMPL_CMP_HELPER_

// The helper function for {ASSERT|EXPECT}_STREQ.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
    const char* actual_expression,
    const char* expected,
    const char* actual);

// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
    const char* actual_expression,
    const char* expected,
    const char* actual);

// The helper function for {ASSERT|EXPECT}_STRNE.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
    const char* s2_expression,
    const char* s1,
    const char* s2);

// The helper function for {ASSERT|EXPECT}_STRCASENE.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
    const char* s2_expression,
    const char* s1,
    const char* s2);


// Helper function for *_STREQ on wide strings.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
    const char* actual_expression,
    const wchar_t* expected,
    const wchar_t* actual);

// Helper function for *_STRNE on wide strings.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
    const char* s2_expression,
    const wchar_t* s1,
    const wchar_t* s2);

}  // namespace internal

// IsSubstring() and IsNotSubstring() are intended to be used as the
// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
// themselves.  They check whether needle is a substring of haystack
// (NULL is considered a substring of itself only), and return an
// appropriate error message when they fail.
//
// The {needle,haystack}_expr arguments are the stringified
// expressions that generated the two real arguments.
GTEST_API_ AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const char* needle, const char* haystack);
GTEST_API_ AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const wchar_t* needle, const wchar_t* haystack);
GTEST_API_ AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const char* needle, const char* haystack);
GTEST_API_ AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const wchar_t* needle, const wchar_t* haystack);
GTEST_API_ AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::string& needle, const ::std::string& haystack);
GTEST_API_ AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::string& needle, const ::std::string& haystack);

#if GTEST_HAS_STD_WSTRING
GTEST_API_ AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::wstring& needle, const ::std::wstring& haystack);
GTEST_API_ AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::wstring& needle, const ::std::wstring& haystack);
#endif  // GTEST_HAS_STD_WSTRING

namespace internal {

// Helper template function for comparing floating-points.
//
// Template parameter:
//
//   RawType: the raw floating-point type (either float or double)
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
template <typename RawType>
AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression,
    const char* actual_expression,
    RawType expected,
    RawType actual) {
  const FloatingPoint<RawType> lhs(expected), rhs(actual);

  if (lhs.AlmostEquals(rhs)) {
    return AssertionSuccess();
  }

  ::std::stringstream expected_ss;
  expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
              << expected;

  ::std::stringstream actual_ss;
  actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
            << actual;

  return EqFailure(expected_expression,
                   actual_expression,
                   StringStreamToString(&expected_ss),
                   StringStreamToString(&actual_ss),
                   false);
}

// Helper function for implementing ASSERT_NEAR.
//
// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
    const char* expr2,
    const char* abs_error_expr,
    double val1,
    double val2,
    double abs_error);

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
// A class that enables one to stream messages to assertion macros
class GTEST_API_ AssertHelper {
 public:
  // Constructor.
  AssertHelper(TestPartResult::Type type,
               const char* file,
               int line,
               const char* message);
  ~AssertHelper();

  // Message assignment is a semantic trick to enable assertion
  // streaming; see the GTEST_MESSAGE_ macro below.
  void operator=(const Message& message) const;

 private:
  // We put our data in a struct so that the size of the AssertHelper class can
  // be as small as possible.  This is important because gcc is incapable of
  // re-using stack space even for temporary variables, so every EXPECT_EQ
  // reserves stack space for another AssertHelper.
  struct AssertHelperData {
    AssertHelperData(TestPartResult::Type t,
                     const char* srcfile,
                     int line_num,
                     const char* msg)
      : type(t), file(srcfile), line(line_num), message(msg) { }

    TestPartResult::Type const type;
    const char* const file;
    int const line;
    std::string const message;

   private:
    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
  };

  AssertHelperData* const data_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
};

}  // namespace internal

#if GTEST_HAS_PARAM_TEST
// The pure interface class that all value-parameterized tests inherit from.
// A value-parameterized class must inherit from both ::testing::Test and
// ::testing::WithParamInterface. In most cases that just means inheriting
// from ::testing::TestWithParam, but more complicated test hierarchies
// may need to inherit from Test and WithParamInterface at different levels.
//
// This interface has support for accessing the test parameter value via
// the GetParam() method.
//
// Use it with one of the parameter generator defining functions, like Range(),
// Values(), ValuesIn(), Bool(), and Combine().
//
// class FooTest : public ::testing::TestWithParam<int> {
//  protected:
//   FooTest() {
//     // Can use GetParam() here.
//   }
//   virtual ~FooTest() {
//     // Can use GetParam() here.
//   }
//   virtual void SetUp() {
//     // Can use GetParam() here.
//   }
//   virtual void TearDown {
//     // Can use GetParam() here.
//   }
// };
// TEST_P(FooTest, DoesBar) {
//   // Can use GetParam() method here.
//   Foo foo;
//   ASSERT_TRUE(foo.DoesBar(GetParam()));
// }
// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));

template <typename T>
class WithParamInterface {
 public:
  typedef T ParamType;
  virtual ~WithParamInterface() {}

  // The current parameter value. Is also available in the test fixture's
  // constructor. This member function is non-static, even though it only
  // references static data, to reduce the opportunity for incorrect uses
  // like writing 'WithParamInterface<bool>::GetParam()' for a test that
  // uses a fixture whose parameter type is int.
  const ParamType& GetParam() const {
    GTEST_CHECK_(parameter_ != NULL)
        << "GetParam() can only be called inside a value-parameterized test "
        << "-- did you intend to write TEST_P instead of TEST_F?";
    return *parameter_;
  }

 private:
  // Sets parameter value. The caller is responsible for making sure the value
  // remains alive and unchanged throughout the current test.
  static void SetParam(const ParamType* parameter) {
    parameter_ = parameter;
  }

  // Static value used for accessing parameter during a test lifetime.
  static const ParamType* parameter_;

  // TestClass must be a subclass of WithParamInterface<T> and Test.
  template <class TestClass> friend class internal::ParameterizedTestFactory;
};

template <typename T>
const T* WithParamInterface<T>::parameter_ = NULL;

// Most value-parameterized classes can ignore the existence of
// WithParamInterface, and can just inherit from ::testing::TestWithParam.

template <typename T>
class TestWithParam : public Test, public WithParamInterface<T> {
};

#endif  // GTEST_HAS_PARAM_TEST

// Macros for indicating success/failure in test code.

// ADD_FAILURE unconditionally adds a failure to the current test.
// SUCCEED generates a success - it doesn't automatically make the
// current test successful, as a test is only successful when it has
// no failure.
//
// EXPECT_* verifies that a certain condition is satisfied.  If not,
// it behaves like ADD_FAILURE.  In particular:
//
//   EXPECT_TRUE  verifies that a Boolean condition is true.
//   EXPECT_FALSE verifies that a Boolean condition is false.
//
// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
// that they will also abort the current function on failure.  People
// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
// writing data-driven tests often find themselves using ADD_FAILURE
// and EXPECT_* more.

// Generates a nonfatal failure with a generic message.
#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")

// Generates a nonfatal failure at the given source file location with
// a generic message.
#define ADD_FAILURE_AT(file, line) \
  GTEST_MESSAGE_AT_(file, line, "Failed", \
                    ::testing::TestPartResult::kNonFatalFailure)

// Generates a fatal failure with a generic message.
#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")

// Define this macro to 1 to omit the definition of FAIL(), which is a
// generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_FAIL
# define FAIL() GTEST_FAIL()
#endif

// Generates a success with a generic message.
#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")

// Define this macro to 1 to omit the definition of SUCCEED(), which
// is a generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_SUCCEED
# define SUCCEED() GTEST_SUCCEED()
#endif

// Macros for testing exceptions.
//
//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
//         Tests that the statement throws the expected exception.
//    * {ASSERT|EXPECT}_NO_THROW(statement):
//         Tests that the statement doesn't throw any exception.
//    * {ASSERT|EXPECT}_ANY_THROW(statement):
//         Tests that the statement throws an exception.

#define EXPECT_THROW(statement, expected_exception) \
  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
#define EXPECT_NO_THROW(statement) \
  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
#define EXPECT_ANY_THROW(statement) \
  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
#define ASSERT_THROW(statement, expected_exception) \
  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
#define ASSERT_NO_THROW(statement) \
  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
#define ASSERT_ANY_THROW(statement) \
  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)

// Boolean assertions. Condition can be either a Boolean expression or an
// AssertionResult. For more information on how to use AssertionResult with
// these macros see comments on that class.
#define EXPECT_TRUE(condition) \
  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
                      GTEST_NONFATAL_FAILURE_)
#define EXPECT_FALSE(condition) \
  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                      GTEST_NONFATAL_FAILURE_)
#define ASSERT_TRUE(condition) \
  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
                      GTEST_FATAL_FAILURE_)
#define ASSERT_FALSE(condition) \
  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                      GTEST_FATAL_FAILURE_)

// Includes the auto-generated header that implements a family of
// generic predicate assertion macros.
#include "gtest/gtest_pred_impl.h"

// Macros for testing equalities and inequalities.
//
//    * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual
//    * {ASSERT|EXPECT}_NE(v1, v2):           Tests that v1 != v2
//    * {ASSERT|EXPECT}_LT(v1, v2):           Tests that v1 < v2
//    * {ASSERT|EXPECT}_LE(v1, v2):           Tests that v1 <= v2
//    * {ASSERT|EXPECT}_GT(v1, v2):           Tests that v1 > v2
//    * {ASSERT|EXPECT}_GE(v1, v2):           Tests that v1 >= v2
//
// When they are not, Google Test prints both the tested expressions and
// their actual values.  The values must be compatible built-in types,
// or you will get a compiler error.  By "compatible" we mean that the
// values can be compared by the respective operator.
//
// Note:
//
//   1. It is possible to make a user-defined type work with
//   {ASSERT|EXPECT}_??(), but that requires overloading the
//   comparison operators and is thus discouraged by the Google C++
//   Usage Guide.  Therefore, you are advised to use the
//   {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
//   equal.
//
//   2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
//   pointers (in particular, C strings).  Therefore, if you use it
//   with two C strings, you are testing how their locations in memory
//   are related, not how their content is related.  To compare two C
//   strings by content, use {ASSERT|EXPECT}_STR*().
//
//   3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to
//   {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you
//   what the actual value is when it fails, and similarly for the
//   other comparisons.
//
//   4. Do not depend on the order in which {ASSERT|EXPECT}_??()
//   evaluate their arguments, which is undefined.
//
//   5. These macros evaluate their arguments exactly once.
//
// Examples:
//
//   EXPECT_NE(5, Foo());
//   EXPECT_EQ(NULL, a_pointer);
//   ASSERT_LT(i, array_size);
//   ASSERT_GT(records.size(), 0) << "There is no record left.";

#define EXPECT_EQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal:: \
                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
                      expected, actual)
#define EXPECT_NE(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual)
#define EXPECT_LE(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
#define EXPECT_LT(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
#define EXPECT_GE(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
#define EXPECT_GT(val1, val2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)

#define GTEST_ASSERT_EQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal:: \
                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
                      expected, actual)
#define GTEST_ASSERT_NE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
#define GTEST_ASSERT_LE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
#define GTEST_ASSERT_LT(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
#define GTEST_ASSERT_GE(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
#define GTEST_ASSERT_GT(val1, val2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)

// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
// ASSERT_XY(), which clashes with some users' own code.

#if !GTEST_DONT_DEFINE_ASSERT_EQ
# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_NE
# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_LE
# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_LT
# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_GE
# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
#endif

#if !GTEST_DONT_DEFINE_ASSERT_GT
# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
#endif

// C-string Comparisons.  All tests treat NULL and any non-NULL string
// as different.  Two NULLs are equal.
//
//    * {ASSERT|EXPECT}_STREQ(s1, s2):     Tests that s1 == s2
//    * {ASSERT|EXPECT}_STRNE(s1, s2):     Tests that s1 != s2
//    * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
//    * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
//
// For wide or narrow string objects, you can use the
// {ASSERT|EXPECT}_??() macros.
//
// Don't depend on the order in which the arguments are evaluated,
// which is undefined.
//
// These macros evaluate their arguments exactly once.

#define EXPECT_STREQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
#define EXPECT_STRNE(s1, s2) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
#define EXPECT_STRCASEEQ(expected, actual) \
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
#define EXPECT_STRCASENE(s1, s2)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)

#define ASSERT_STREQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
#define ASSERT_STRNE(s1, s2) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
#define ASSERT_STRCASEEQ(expected, actual) \
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
#define ASSERT_STRCASENE(s1, s2)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)

// Macros for comparing floating-point numbers.
//
//    * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual):
//         Tests that two float values are almost equal.
//    * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual):
//         Tests that two double values are almost equal.
//    * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
//         Tests that v1 and v2 are within the given distance to each other.
//
// Google Test uses ULP-based comparison to automatically pick a default
// error bound that is appropriate for the operands.  See the
// FloatingPoint template class in gtest-internal.h if you are
// interested in the implementation details.

#define EXPECT_FLOAT_EQ(expected, actual)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
                      expected, actual)

#define EXPECT_DOUBLE_EQ(expected, actual)\
  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
                      expected, actual)

#define ASSERT_FLOAT_EQ(expected, actual)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
                      expected, actual)

#define ASSERT_DOUBLE_EQ(expected, actual)\
  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
                      expected, actual)

#define EXPECT_NEAR(val1, val2, abs_error)\
  EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
                      val1, val2, abs_error)

#define ASSERT_NEAR(val1, val2, abs_error)\
  ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
                      val1, val2, abs_error)

// These predicate format functions work on floating-point values, and
// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
//
//   EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
                                   float val1, float val2);
GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
                                    double val1, double val2);


#if GTEST_OS_WINDOWS

// Macros that test for HRESULT failure and success, these are only useful
// on Windows, and rely on Windows SDK macros and APIs to compile.
//
//    * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
//
// When expr unexpectedly fails or succeeds, Google Test prints the
// expected result and the actual result with both a human-readable
// string representation of the error, if available, as well as the
// hex result code.
# define EXPECT_HRESULT_SUCCEEDED(expr) \
    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))

# define ASSERT_HRESULT_SUCCEEDED(expr) \
    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))

# define EXPECT_HRESULT_FAILED(expr) \
    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))

# define ASSERT_HRESULT_FAILED(expr) \
    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))

#endif  // GTEST_OS_WINDOWS

// Macros that execute statement and check that it doesn't generate new fatal
// failures in the current thread.
//
//   * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
//
// Examples:
//
//   EXPECT_NO_FATAL_FAILURE(Process());
//   ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
//
#define ASSERT_NO_FATAL_FAILURE(statement) \
    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
#define EXPECT_NO_FATAL_FAILURE(statement) \
    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)

// Causes a trace (including the source file path, the current line
// number, and the given message) to be included in every test failure
// message generated by code in the current scope.  The effect is
// undone when the control leaves the current scope.
//
// The message argument can be anything streamable to std::ostream.
//
// In the implementation, we include the current line number as part
// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
// to appear in the same block - as long as they are on different
// lines.
#define SCOPED_TRACE(message) \
  ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
    __FILE__, __LINE__, ::testing::Message() << (message))

// Compile-time assertion for type equality.
// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
// the same type.  The value it returns is not interesting.
//
// Instead of making StaticAssertTypeEq a class template, we make it a
// function template that invokes a helper class template.  This
// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
// defining objects of that type.
//
// CAVEAT:
//
// When used inside a method of a class template,
// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
// instantiated.  For example, given:
//
//   template <typename T> class Foo {
//    public:
//     void Bar() { testing::StaticAssertTypeEq<int, T>(); }
//   };
//
// the code:
//
//   void Test1() { Foo<bool> foo; }
//
// will NOT generate a compiler error, as Foo<bool>::Bar() is never
// actually instantiated.  Instead, you need:
//
//   void Test2() { Foo<bool> foo; foo.Bar(); }
//
// to cause a compiler error.
template <typename T1, typename T2>
bool StaticAssertTypeEq() {
  (void)internal::StaticAssertTypeEqHelper<T1, T2>();
  return true;
}

// Defines a test.
//
// The first parameter is the name of the test case, and the second
// parameter is the name of the test within the test case.
//
// The convention is to end the test case name with "Test".  For
// example, a test case for the Foo class can be named FooTest.
//
// The user should put his test code between braces after using this
// macro.  Example:
//
//   TEST(FooTest, InitializesCorrectly) {
//     Foo foo;
//     EXPECT_TRUE(foo.StatusIsOK());
//   }

// Note that we call GetTestTypeId() instead of GetTypeId<
// ::testing::Test>() here to get the type ID of testing::Test.  This
// is to work around a suspected linker bug when using Google Test as
// a framework on Mac OS X.  The bug causes GetTypeId<
// ::testing::Test>() to return different values depending on whether
// the call is from the Google Test framework itself or from user test
// code.  GetTestTypeId() is guaranteed to always return the same
// value, as it always calls GetTypeId<>() from the Google Test
// framework.
#define GTEST_TEST(test_case_name, test_name)\
  GTEST_TEST_(test_case_name, test_name, \
              ::testing::Test, ::testing::internal::GetTestTypeId())

// Define this macro to 1 to omit the definition of TEST(), which
// is a generic name and clashes with some other libraries.
#if !GTEST_DONT_DEFINE_TEST
# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
#endif

// Defines a test that uses a test fixture.
//
// The first parameter is the name of the test fixture class, which
// also doubles as the test case name.  The second parameter is the
// name of the test within the test case.
//
// A test fixture class must be declared earlier.  The user should put
// his test code between braces after using this macro.  Example:
//
//   class FooTest : public testing::Test {
//    protected:
//     virtual void SetUp() { b_.AddElement(3); }
//
//     Foo a_;
//     Foo b_;
//   };
//
//   TEST_F(FooTest, InitializesCorrectly) {
//     EXPECT_TRUE(a_.StatusIsOK());
//   }
//
//   TEST_F(FooTest, ReturnsElementCountCorrectly) {
//     EXPECT_EQ(0, a_.size());
//     EXPECT_EQ(1, b_.size());
//   }

#define TEST_F(test_fixture, test_name)\
  GTEST_TEST_(test_fixture, test_name, test_fixture, \
              ::testing::internal::GetTypeId<test_fixture>())

}  // namespace testing

// Use this function in main() to run all tests.  It returns 0 if all
// tests are successful, or 1 otherwise.
//
// RUN_ALL_TESTS() should be invoked after the command line has been
// parsed by InitGoogleTest().
//
// This function was formerly a macro; thus, it is in the global
// namespace and has an all-caps name.
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;

inline int RUN_ALL_TESTS() {
  return ::testing::UnitTest::GetInstance()->Run();
}

#endif  // GTEST_INCLUDE_GTEST_GTEST_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest_pred_impl.h
================================================
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command
// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
//
// Implements a family of generic predicate assertion macros.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_

// Makes sure this header is not included before gtest.h.
#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
# error Do not include gtest_pred_impl.h directly.  Include gtest.h instead.
#endif  // GTEST_INCLUDE_GTEST_GTEST_H_

// This header implements a family of generic predicate assertion
// macros:
//
//   ASSERT_PRED_FORMAT1(pred_format, v1)
//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
//   ...
//
// where pred_format is a function or functor that takes n (in the
// case of ASSERT_PRED_FORMATn) values and their source expression
// text, and returns a testing::AssertionResult.  See the definition
// of ASSERT_EQ in gtest.h for an example.
//
// If you don't care about formatting, you can use the more
// restrictive version:
//
//   ASSERT_PRED1(pred, v1)
//   ASSERT_PRED2(pred, v1, v2)
//   ...
//
// where pred is an n-ary function or functor that returns bool,
// and the values v1, v2, ..., must support the << operator for
// streaming to std::ostream.
//
// We also define the EXPECT_* variations.
//
// For now we only support predicates whose arity is at most 5.
// Please email googletestframework@googlegroups.com if you need
// support for higher arities.

// GTEST_ASSERT_ is the basic statement to which all of the assertions
// in this file reduce.  Don't use this in your code.

#define GTEST_ASSERT_(expression, on_failure) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (const ::testing::AssertionResult gtest_ar = (expression)) \
    ; \
  else \
    on_failure(gtest_ar.failure_message())


// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
// this in your code.
template <typename Pred,
          typename T1>
AssertionResult AssertPred1Helper(const char* pred_text,
                                  const char* e1,
                                  Pred pred,
                                  const T1& v1) {
  if (pred(v1)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << pred_text << "("
         << e1 << ") evaluates to false, where"
         << "\n" << e1 << " evaluates to " << v1;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
// Don't use this in your code.
#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, v1), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
// this in your code.
#define GTEST_PRED1_(pred, v1, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
                                             #v1, \
                                             pred, \
                                             v1), on_failure)

// Unary predicate assertion macros.
#define EXPECT_PRED_FORMAT1(pred_format, v1) \
  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED1(pred, v1) \
  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT1(pred_format, v1) \
  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED1(pred, v1) \
  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2>
AssertionResult AssertPred2Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2) {
  if (pred(v1, v2)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << pred_text << "("
         << e1 << ", "
         << e2 << ") evaluates to false, where"
         << "\n" << e1 << " evaluates to " << v1
         << "\n" << e2 << " evaluates to " << v2;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
// Don't use this in your code.
#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
// this in your code.
#define GTEST_PRED2_(pred, v1, v2, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             pred, \
                                             v1, \
                                             v2), on_failure)

// Binary predicate assertion macros.
#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED2(pred, v1, v2) \
  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED2(pred, v1, v2) \
  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3>
AssertionResult AssertPred3Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3) {
  if (pred(v1, v2, v3)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << pred_text << "("
         << e1 << ", "
         << e2 << ", "
         << e3 << ") evaluates to false, where"
         << "\n" << e1 << " evaluates to " << v1
         << "\n" << e2 << " evaluates to " << v2
         << "\n" << e3 << " evaluates to " << v3;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
// Don't use this in your code.
#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
// this in your code.
#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3), on_failure)

// Ternary predicate assertion macros.
#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED3(pred, v1, v2, v3) \
  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED3(pred, v1, v2, v3) \
  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3,
          typename T4>
AssertionResult AssertPred4Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  const char* e4,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3,
                                  const T4& v4) {
  if (pred(v1, v2, v3, v4)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << pred_text << "("
         << e1 << ", "
         << e2 << ", "
         << e3 << ", "
         << e4 << ") evaluates to false, where"
         << "\n" << e1 << " evaluates to " << v1
         << "\n" << e2 << " evaluates to " << v2
         << "\n" << e3 << " evaluates to " << v3
         << "\n" << e4 << " evaluates to " << v4;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
// Don't use this in your code.
#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
// this in your code.
#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             #v4, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3, \
                                             v4), on_failure)

// 4-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)


// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
// this in your code.
template <typename Pred,
          typename T1,
          typename T2,
          typename T3,
          typename T4,
          typename T5>
AssertionResult AssertPred5Helper(const char* pred_text,
                                  const char* e1,
                                  const char* e2,
                                  const char* e3,
                                  const char* e4,
                                  const char* e5,
                                  Pred pred,
                                  const T1& v1,
                                  const T2& v2,
                                  const T3& v3,
                                  const T4& v4,
                                  const T5& v5) {
  if (pred(v1, v2, v3, v4, v5)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << pred_text << "("
         << e1 << ", "
         << e2 << ", "
         << e3 << ", "
         << e4 << ", "
         << e5 << ") evaluates to false, where"
         << "\n" << e1 << " evaluates to " << v1
         << "\n" << e2 << " evaluates to " << v2
         << "\n" << e3 << " evaluates to " << v3
         << "\n" << e4 << " evaluates to " << v4
         << "\n" << e5 << " evaluates to " << v5;
}

// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
// Don't use this in your code.
#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
                on_failure)

// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
// this in your code.
#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
                                             #v1, \
                                             #v2, \
                                             #v3, \
                                             #v4, \
                                             #v5, \
                                             pred, \
                                             v1, \
                                             v2, \
                                             v3, \
                                             v4, \
                                             v5), on_failure)

// 5-ary predicate assertion macros.
#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)


#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_


================================================
FILE: rocrtst/gtest/include/gtest/gtest_prod.h
================================================
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// Google C++ Testing Framework definitions useful in production code.

#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_

// When you need to test the private or protected members of a class,
// use the FRIEND_TEST macro to declare your tests as friends of the
// class.  For example:
//
// class MyClass {
//  private:
//   void MyMethod();
//   FRIEND_TEST(MyClassTest, MyMethod);
// };
//
// class MyClassTest : public testing::Test {
//   // ...
// };
//
// TEST_F(MyClassTest, MyMethod) {
//   // Can call MyClass::MyMethod() here.
// }

#define FRIEND_TEST(test_case_name, test_name)\
friend class test_case_name##_##test_name##_Test

#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-death-test-internal.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file defines internal utilities needed for implementing
// death tests.  They are subject to change without notice.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_

#include "gtest/internal/gtest-internal.h"

#include <stdio.h>

namespace testing {
namespace internal {

GTEST_DECLARE_string_(internal_run_death_test);

// Names of the flags (needed for parsing Google Test flags).
const char kDeathTestStyleFlag[] = "death_test_style";
const char kDeathTestUseFork[] = "death_test_use_fork";
const char kInternalRunDeathTestFlag[] = "internal_run_death_test";

#if GTEST_HAS_DEATH_TEST

// DeathTest is a class that hides much of the complexity of the
// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
// returns a concrete class that depends on the prevailing death test
// style, as defined by the --gtest_death_test_style and/or
// --gtest_internal_run_death_test flags.

// In describing the results of death tests, these terms are used with
// the corresponding definitions:
//
// exit status:  The integer exit information in the format specified
//               by wait(2)
// exit code:    The integer code passed to exit(3), _exit(2), or
//               returned from main()
class GTEST_API_ DeathTest {
 public:
  // Create returns false if there was an error determining the
  // appropriate action to take for the current death test; for example,
  // if the gtest_death_test_style flag is set to an invalid value.
  // The LastMessage method will return a more detailed message in that
  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
  // argument is set.  If the death test should be skipped, the pointer
  // is set to NULL; otherwise, it is set to the address of a new concrete
  // DeathTest object that controls the execution of the current test.
  static bool Create(const char* statement, const RE* regex,
                     const char* file, int line, DeathTest** test);
  DeathTest();
  virtual ~DeathTest() { }

  // A helper class that aborts a death test when it's deleted.
  class ReturnSentinel {
   public:
    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
    ~ReturnSentinel() {
      test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT);
    }
   private:
    DeathTest* const test_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
  } GTEST_ATTRIBUTE_UNUSED_;

  // An enumeration of possible roles that may be taken when a death
  // test is encountered.  EXECUTE means that the death test logic should
  // be executed immediately.  OVERSEE means that the program should prepare
  // the appropriate environment for a child process to execute the death
  // test, then wait for it to complete.
  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };

  // An enumeration of the three reasons that a test might be aborted.
  enum AbortReason {
    TEST_ENCOUNTERED_RETURN_STATEMENT,
    TEST_THREW_EXCEPTION,
    TEST_DID_NOT_DIE
  };

  // Assumes one of the above roles.
  virtual TestRole AssumeRole() = 0;

  // Waits for the death test to finish and returns its status.
  virtual int Wait() = 0;

  // Returns true if the death test passed; that is, the test process
  // exited during the test, its exit status matches a user-supplied
  // predicate, and its stderr output matches a user-supplied regular
  // expression.
  // The user-supplied predicate may be a macro expression rather
  // than a function pointer or functor, or else Wait and Passed could
  // be combined.
  virtual bool Passed(bool exit_status_ok) = 0;

  // Signals that the death test did not die as expected.
  virtual void Abort(AbortReason reason) = 0;

  // Returns a human-readable outcome message regarding the outcome of
  // the last death test.
  static const char* LastMessage();

  static void set_last_death_test_message(const std::string& message);

 private:
  // A string containing a description of the outcome of the last death test.
  static std::string last_death_test_message_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
};

// Factory interface for death tests.  May be mocked out for testing.
class DeathTestFactory {
 public:
  virtual ~DeathTestFactory() { }
  virtual bool Create(const char* statement, const RE* regex,
                      const char* file, int line, DeathTest** test) = 0;
};

// A concrete DeathTestFactory implementation for normal use.
class DefaultDeathTestFactory : public DeathTestFactory {
 public:
  virtual bool Create(const char* statement, const RE* regex,
                      const char* file, int line, DeathTest** test);
};

// Returns true if exit_status describes a process that was terminated
// by a signal, or exited normally with a nonzero exit code.
GTEST_API_ bool ExitedUnsuccessfully(int exit_status);

// Traps C++ exceptions escaping statement and reports them as test
// failures. Note that trapping SEH exceptions is not implemented here.
# if GTEST_HAS_EXCEPTIONS
#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
  try { \
    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
  } catch (const ::std::exception& gtest_exception) { \
    fprintf(\
        stderr, \
        "\n%s: Caught std::exception-derived exception escaping the " \
        "death test statement. Exception message: %s\n", \
        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
        gtest_exception.what()); \
    fflush(stderr); \
    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
  } catch (...) { \
    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
  }

# else
#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)

# endif

// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
// ASSERT_EXIT*, and EXPECT_EXIT*.
# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    const ::testing::internal::RE& gtest_regex = (regex); \
    ::testing::internal::DeathTest* gtest_dt; \
    if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
        __FILE__, __LINE__, &gtest_dt)) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
    } \
    if (gtest_dt != NULL) { \
      ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
          gtest_dt_ptr(gtest_dt); \
      switch (gtest_dt->AssumeRole()) { \
        case ::testing::internal::DeathTest::OVERSEE_TEST: \
          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
          } \
          break; \
        case ::testing::internal::DeathTest::EXECUTE_TEST: { \
          ::testing::internal::DeathTest::ReturnSentinel \
              gtest_sentinel(gtest_dt); \
          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
          break; \
        } \
        default: \
          break; \
      } \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
      fail(::testing::internal::DeathTest::LastMessage())
// The symbol "fail" here expands to something into which a message
// can be streamed.

// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
// NDEBUG mode. In this case we need the statements to be executed, the regex is
// ignored, and the macro must accept a streamed message even though the message
// is never printed.
# define GTEST_EXECUTE_STATEMENT_(statement, regex) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
     GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
  } else \
    ::testing::Message()

// A class representing the parsed contents of the
// --gtest_internal_run_death_test flag, as it existed when
// RUN_ALL_TESTS was called.
class InternalRunDeathTestFlag {
 public:
  InternalRunDeathTestFlag(const std::string& a_file,
                           int a_line,
                           int an_index,
                           int a_write_fd)
    : file_(a_file), line_(a_line), index_(an_index),
      write_fd_(a_write_fd) {}

  ~InternalRunDeathTestFlag() {
    if (write_fd_ >= 0) {
      posix::Close(write_fd_);
    }
  }

  const std::string& file() const {
    return file_;
  }
  int line() const {
    return line_;
  }
  int index() const {
    return index_;
  }
  int write_fd() const {
    return write_fd_;
  }

 private:
  std::string file_;
  int line_;
  int index_;
  int write_fd_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
};

// Returns a newly created InternalRunDeathTestFlag object with fields
// initialized from the GTEST_FLAG(internal_run_death_test) flag if
// the flag is specified; otherwise returns NULL.
InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();

#else  // GTEST_HAS_DEATH_TEST

// This macro is used for implementing macros such as
// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
// death tests are not supported. Those macros must compile on such systems
// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
// systems that support death tests. This allows one to write such a macro
// on a system that does not support death tests and be sure that it will
// compile on a death-test supporting system.
//
// Parameters:
//   statement -  A statement that a macro such as EXPECT_DEATH would test
//                for program termination. This macro has to make sure this
//                statement is compiled but not executed, to ensure that
//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
//                parameter iff EXPECT_DEATH compiles with it.
//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
//                the output of statement.  This parameter has to be
//                compiled but not evaluated by this macro, to ensure that
//                this macro only accepts expressions that a macro such as
//                EXPECT_DEATH would accept.
//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
//                compile inside functions where ASSERT_DEATH doesn't
//                compile.
//
//  The branch that has an always false condition is used to ensure that
//  statement and regex are compiled (and thus syntactically correct) but
//  never executed. The unreachable code macro protects the terminator
//  statement from generating an 'unreachable code' warning in case
//  statement unconditionally returns or throws. The Message constructor at
//  the end allows the syntax of streaming additional messages into the
//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
    if (::testing::internal::AlwaysTrue()) { \
      GTEST_LOG_(WARNING) \
          << "Death tests are not supported on this platform.\n" \
          << "Statement '" #statement "' cannot be verified."; \
    } else if (::testing::internal::AlwaysFalse()) { \
      ::testing::internal::RE::PartialMatch(".*", (regex)); \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
      terminator; \
    } else \
      ::testing::Message()

#endif  // GTEST_HAS_DEATH_TEST

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-filepath.h
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: keith.ray@gmail.com (Keith Ray)
//
// Google Test filepath utilities
//
// This header file declares classes and functions used internally by
// Google Test.  They are subject to change without notice.
//
// This file is #included in <gtest/internal/gtest-internal.h>.
// Do not include this header file separately!

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_

#include "gtest/internal/gtest-string.h"

namespace testing {
namespace internal {

// FilePath - a class for file and directory pathname manipulation which
// handles platform-specific conventions (like the pathname separator).
// Used for helper functions for naming files in a directory for xml output.
// Except for Set methods, all methods are const or static, which provides an
// "immutable value object" -- useful for peace of mind.
// A FilePath with a value ending in a path separator ("like/this/") represents
// a directory, otherwise it is assumed to represent a file. In either case,
// it may or may not represent an actual file or directory in the file system.
// Names are NOT checked for syntax correctness -- no checking for illegal
// characters, malformed paths, etc.

class GTEST_API_ FilePath {
 public:
  FilePath() : pathname_("") { }
  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }

  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
    Normalize();
  }

  FilePath& operator=(const FilePath& rhs) {
    Set(rhs);
    return *this;
  }

  void Set(const FilePath& rhs) {
    pathname_ = rhs.pathname_;
  }

  const std::string& string() const {
    return pathname_;
  }
  const char* c_str() const {
    return pathname_.c_str();
  }

  // Returns the current working directory, or "" if unsuccessful.
  static FilePath GetCurrentDir();

  // Given directory = "dir", base_name = "test", number = 0,
  // extension = "xml", returns "dir/test.xml". If number is greater
  // than zero (e.g., 12), returns "dir/test_12.xml".
  // On Windows platform, uses \ as the separator rather than /.
  static FilePath MakeFileName(const FilePath& directory,
                               const FilePath& base_name,
                               int number,
                               const char* extension);

  // Given directory = "dir", relative_path = "test.xml",
  // returns "dir/test.xml".
  // On Windows, uses \ as the separator rather than /.
  static FilePath ConcatPaths(const FilePath& directory,
                              const FilePath& relative_path);

  // Returns a pathname for a file that does not currently exist. The pathname
  // will be directory/base_name.extension or
  // directory/base_name_<number>.extension if directory/base_name.extension
  // already exists. The number will be incremented until a pathname is found
  // that does not already exist.
  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
  // There could be a race condition if two or more processes are calling this
  // function at the same time -- they could both pick the same filename.
  static FilePath GenerateUniqueFileName(const FilePath& directory,
                                         const FilePath& base_name,
                                         const char* extension);

  // Returns true iff the path is "".
  bool IsEmpty() const {
    return pathname_.empty();
  }

  // If input name has a trailing separator character, removes it and returns
  // the name, otherwise return the name string unmodified.
  // On Windows platform, uses \ as the separator, other platforms use /.
  FilePath RemoveTrailingPathSeparator() const;

  // Returns a copy of the FilePath with the directory part removed.
  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
  // returns an empty FilePath ("").
  // On Windows platform, '\' is the path separator, otherwise it is '/'.
  FilePath RemoveDirectoryName() const;

  // RemoveFileName returns the directory path with the filename removed.
  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
  // On Windows platform, '\' is the path separator, otherwise it is '/'.
  FilePath RemoveFileName() const;

  // Returns a copy of the FilePath with the case-insensitive extension removed.
  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
  // FilePath("dir/file"). If a case-insensitive extension is not
  // found, returns a copy of the original FilePath.
  FilePath RemoveExtension(const char* extension) const;

  // Creates directories so that path exists. Returns true if successful or if
  // the directories already exist; returns false if unable to create
  // directories for any reason. Will also return false if the FilePath does
  // not represent a directory (that is, it doesn't end with a path separator).
  bool CreateDirectoriesRecursively() const;

  // Create the directory so that path exists. Returns true if successful or
  // if the directory already exists; returns false if unable to create the
  // directory for any reason, including if the parent directory does not
  // exist. Not named "CreateDirectory" because that's a macro on Windows.
  bool CreateFolder() const;

  // Returns true if FilePath describes something in the file-system,
  // either a file, directory, or whatever, and that something exists.
  bool FileOrDirectoryExists() const;

  // Returns true if pathname describes a directory in the file-system
  // that exists.
  bool DirectoryExists() const;

  // Returns true if FilePath ends with a path separator, which indicates that
  // it is intended to represent a directory. Returns false otherwise.
  // This does NOT check that a directory (or file) actually exists.
  bool IsDirectory() const;

  // Returns true if pathname describes a root directory. (Windows has one
  // root directory per disk drive.)
  bool IsRootDirectory() const;

  // Returns true if pathname describes an absolute path.
  bool IsAbsolutePath() const;

 private:
  // Replaces multiple consecutive separators with a single separator.
  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
  // redundancies that might be in a pathname involving "." or "..".
  //
  // A pathname with multiple consecutive separators may occur either through
  // user error or as a result of some scripts or APIs that generate a pathname
  // with a trailing separator. On other platforms the same API or script
  // may NOT generate a pathname with a trailing "/". Then elsewhere that
  // pathname may have another "/" and pathname components added to it,
  // without checking for the separator already being there.
  // The script language and operating system may allow paths like "foo//bar"
  // but some of the functions in FilePath will not handle that correctly. In
  // particular, RemoveTrailingPathSeparator() only removes one separator, and
  // it is called in CreateDirectoriesRecursively() assuming that it will change
  // a pathname from directory syntax (trailing separator) to filename syntax.
  //
  // On Windows this method also replaces the alternate path separator '/' with
  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
  // "bar\\foo".

  void Normalize();

  // Returns a pointer to the last occurence of a valid path separator in
  // the FilePath. On Windows, for example, both '/' and '\' are valid path
  // separators. Returns NULL if no path separator was found.
  const char* FindLastPathSeparator() const;

  std::string pathname_;
};  // class FilePath

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-internal.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file declares functions and macros used internally by
// Google Test.  They are subject to change without notice.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_

#include "gtest/internal/gtest-port.h"

#if GTEST_OS_LINUX
# include <stdlib.h>
# include <sys/types.h>
# include <sys/wait.h>
# include <unistd.h>
#endif  // GTEST_OS_LINUX

#if GTEST_HAS_EXCEPTIONS
# include <stdexcept>
#endif

#include <ctype.h>
#include <float.h>
#include <string.h>
#include <iomanip>
#include <limits>
#include <set>

#include "gtest/gtest-message.h"
#include "gtest/internal/gtest-string.h"
#include "gtest/internal/gtest-filepath.h"
#include "gtest/internal/gtest-type-util.h"

// Due to C++ preprocessor weirdness, we need double indirection to
// concatenate two tokens when one of them is __LINE__.  Writing
//
//   foo ## __LINE__
//
// will result in the token foo__LINE__, instead of foo followed by
// the current line number.  For more details, see
// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar

class ProtocolMessage;
namespace proto2 {
class Message;
}

namespace testing {

// Forward declarations.

class AssertionResult;                 // Result of an assertion.
class Message;                         // Represents a failure message.
class Test;                            // Represents a test.
class TestInfo;                        // Information about a test.
class TestPartResult;                  // Result of a test part.
class UnitTest;                        // A collection of test cases.

template <typename T>
::std::string PrintToString(const T& value);

namespace internal {

struct TraceInfo;                      // Information about a trace point.
class ScopedTrace;                     // Implements scoped trace.
class TestInfoImpl;                    // Opaque implementation of TestInfo
class UnitTestImpl;                    // Opaque implementation of UnitTest

// How many times InitGoogleTest() has been called.
GTEST_API_ extern int g_init_gtest_count;

// The text used in failure messages to indicate the start of the
// stack trace.
GTEST_API_ extern const char kStackTraceMarker[];

// Two overloaded helpers for checking at compile time whether an
// expression is a null pointer literal (i.e. NULL or any 0-valued
// compile-time integral constant).  Their return values have
// different sizes, so we can use sizeof() to test which version is
// picked by the compiler.  These helpers have no implementations, as
// we only need their signatures.
//
// Given IsNullLiteralHelper(x), the compiler will pick the first
// version if x can be implicitly converted to Secret*, and pick the
// second version otherwise.  Since Secret is a secret and incomplete
// type, the only expression a user can write that has type Secret* is
// a null pointer literal.  Therefore, we know that x is a null
// pointer literal if and only if the first version is picked by the
// compiler.
char IsNullLiteralHelper(Secret* p);
char (&IsNullLiteralHelper(...))[2];  // NOLINT

// A compile-time bool constant that is true if and only if x is a
// null pointer literal (i.e. NULL or any 0-valued compile-time
// integral constant).
#ifdef GTEST_ELLIPSIS_NEEDS_POD_
// We lose support for NULL detection where the compiler doesn't like
// passing non-POD classes through ellipsis (...).
# define GTEST_IS_NULL_LITERAL_(x) false
#else
# define GTEST_IS_NULL_LITERAL_(x) \
    (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
#endif  // GTEST_ELLIPSIS_NEEDS_POD_

// Appends the user-supplied message to the Google-Test-generated message.
GTEST_API_ std::string AppendUserMessage(
  const std::string& gtest_msg, const Message& user_msg);

#if GTEST_HAS_EXCEPTIONS

// This exception is thrown by (and only by) a failed Google Test
// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
// are enabled).  We derive it from std::runtime_error, which is for
// errors presumably detectable only at run time.  Since
// std::runtime_error inherits from std::exception, many testing
// frameworks know how to extract and print the message inside it.
class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
 public:
  explicit GoogleTestFailureException(const TestPartResult& failure);
};

#endif  // GTEST_HAS_EXCEPTIONS

// A helper class for creating scoped traces in user programs.
class GTEST_API_ ScopedTrace {
 public:
  // The c'tor pushes the given source file location and message onto
  // a trace stack maintained by Google Test.
  ScopedTrace(const char* file, int line, const Message& message);

  // The d'tor pops the info pushed by the c'tor.
  //
  // Note that the d'tor is not virtual in order to be efficient.
  // Don't inherit from ScopedTrace!
  ~ScopedTrace();

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
} GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
// c'tor and d'tor.  Therefore it doesn't
// need to be used otherwise.

// Constructs and returns the message for an equality assertion
// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
//
// The first four parameters are the expressions used in the assertion
// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
// where foo is 5 and bar is 6, we have:
//
//   expected_expression: "foo"
//   actual_expression:   "bar"
//   expected_value:      "5"
//   actual_value:        "6"
//
// The ignoring_case parameter is true iff the assertion is a
// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
// be inserted into the message.
GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
                                     const char* actual_expression,
                                     const std::string& expected_value,
                                     const std::string& actual_value,
                                     bool ignoring_case);

// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
GTEST_API_ std::string GetBoolAssertionFailureMessage(
  const AssertionResult& assertion_result,
  const char* expression_text,
  const char* actual_predicate_value,
  const char* expected_predicate_value);

// This template class represents an IEEE floating-point number
// (either single-precision or double-precision, depending on the
// template parameters).
//
// The purpose of this class is to do more sophisticated number
// comparison.  (Due to round-off error, etc, it's very unlikely that
// two floating-points will be equal exactly.  Hence a naive
// comparison by the == operation often doesn't work.)
//
// Format of IEEE floating-point:
//
//   The most-significant bit being the leftmost, an IEEE
//   floating-point looks like
//
//     sign_bit exponent_bits fraction_bits
//
//   Here, sign_bit is a single bit that designates the sign of the
//   number.
//
//   For float, there are 8 exponent bits and 23 fraction bits.
//
//   For double, there are 11 exponent bits and 52 fraction bits.
//
//   More details can be found at
//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
//
// Template parameter:
//
//   RawType: the raw floating-point type (either float or double)
template <typename RawType>
class FloatingPoint {
 public:
  // Defines the unsigned integer type that has the same size as the
  // floating point number.
  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;

  // Constants.

  // # of bits in a number.
  static const size_t kBitCount = 8 * sizeof(RawType);

  // # of fraction bits in a number.
  static const size_t kFractionBitCount =
    std::numeric_limits<RawType>::digits - 1;

  // # of exponent bits in a number.
  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;

  // The mask for the sign bit.
  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);

  // The mask for the fraction bits.
  static const Bits kFractionBitMask =
    ~static_cast<Bits>(0) >> (kExponentBitCount + 1);

  // The mask for the exponent bits.
  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);

  // How many ULP's (Units in the Last Place) we want to tolerate when
  // comparing two numbers.  The larger the value, the more error we
  // allow.  A 0 value means that two numbers must be exactly the same
  // to be considered equal.
  //
  // The maximum error of a single floating-point operation is 0.5
  // units in the last place.  On Intel CPU's, all floating-point
  // calculations are done with 80-bit precision, while double has 64
  // bits.  Therefore, 4 should be enough for ordinary use.
  //
  // See the following article for more details on ULP:
  // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
  static const size_t kMaxUlps = 4;

  // Constructs a FloatingPoint from a raw floating-point number.
  //
  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
  // around may change its bits, although the new value is guaranteed
  // to be also a NAN.  Therefore, don't expect this constructor to
  // preserve the bits in x when x is a NAN.
  explicit FloatingPoint(const RawType& x) {
    u_.value_ = x;
  }

  // Static methods

  // Reinterprets a bit pattern as a floating-point number.
  //
  // This function is needed to test the AlmostEquals() method.
  static RawType ReinterpretBits(const Bits bits) {
    FloatingPoint fp(0);
    fp.u_.bits_ = bits;
    return fp.u_.value_;
  }

  // Returns the floating-point number that represent positive infinity.
  static RawType Infinity() {
    return ReinterpretBits(kExponentBitMask);
  }

  // Returns the maximum representable finite floating-point number.
  static RawType Max();

  // Non-static methods

  // Returns the bits that represents this number.
  const Bits& bits() const {
    return u_.bits_;
  }

  // Returns the exponent bits of this number.
  Bits exponent_bits() const {
    return kExponentBitMask & u_.bits_;
  }

  // Returns the fraction bits of this number.
  Bits fraction_bits() const {
    return kFractionBitMask & u_.bits_;
  }

  // Returns the sign bit of this number.
  Bits sign_bit() const {
    return kSignBitMask & u_.bits_;
  }

  // Returns true iff this is NAN (not a number).
  bool is_nan() const {
    // It's a NAN if the exponent bits are all ones and the fraction
    // bits are not entirely zeros.
    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
  }

  // Returns true iff this number is at most kMaxUlps ULP's away from
  // rhs.  In particular, this function:
  //
  //   - returns false if either number is (or both are) NAN.
  //   - treats really large numbers as almost equal to infinity.
  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
  bool AlmostEquals(const FloatingPoint& rhs) const {
    // The IEEE standard says that any comparison operation involving
    // a NAN must return false.
    if (is_nan() || rhs.is_nan()) {
      return false;
    }

    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
           <= kMaxUlps;
  }

 private:
  // The data type used to store the actual floating-point number.
  union FloatingPointUnion {
    RawType value_;  // The raw floating-point number.
    Bits bits_;      // The bits that represent the number.
  };

  // Converts an integer from the sign-and-magnitude representation to
  // the biased representation.  More precisely, let N be 2 to the
  // power of (kBitCount - 1), an integer x is represented by the
  // unsigned number x + N.
  //
  // For instance,
  //
  //   -N + 1 (the most negative number representable using
  //          sign-and-magnitude) is represented by 1;
  //   0      is represented by N; and
  //   N - 1  (the biggest number representable using
  //          sign-and-magnitude) is represented by 2N - 1.
  //
  // Read http://en.wikipedia.org/wiki/Signed_number_representations
  // for more details on signed number representations.
  static Bits SignAndMagnitudeToBiased(const Bits& sam) {
    if (kSignBitMask & sam) {
      // sam represents a negative number.
      return ~sam + 1;
    }
    else {
      // sam represents a positive number.
      return kSignBitMask | sam;
    }
  }

  // Given two numbers in the sign-and-magnitude representation,
  // returns the distance between them as an unsigned number.
  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits& sam1,
      const Bits& sam2) {
    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
  }

  FloatingPointUnion u_;
};

// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
// macro defined by <windows.h>.
template <>
inline float FloatingPoint<float>::Max() {
  return FLT_MAX;
}
template <>
inline double FloatingPoint<double>::Max() {
  return DBL_MAX;
}

// Typedefs the instances of the FloatingPoint template class that we
// care to use.
typedef FloatingPoint<float> Float;
typedef FloatingPoint<double> Double;

// In order to catch the mistake of putting tests that use different
// test fixture classes in the same test case, we need to assign
// unique IDs to fixture classes and compare them.  The TypeId type is
// used to hold such IDs.  The user should treat TypeId as an opaque
// type: the only operation allowed on TypeId values is to compare
// them for equality using the == operator.
typedef const void* TypeId;

template <typename T>
class TypeIdHelper {
 public:
  // dummy_ must not have a const type.  Otherwise an overly eager
  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
  static bool dummy_;
};

template <typename T>
bool TypeIdHelper<T>::dummy_ = false;

// GetTypeId<T>() returns the ID of type T.  Different values will be
// returned for different types.  Calling the function twice with the
// same type argument is guaranteed to return the same ID.
template <typename T>
TypeId GetTypeId() {
  // The compiler is required to allocate a different
  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
  // the template.  Therefore, the address of dummy_ is guaranteed to
  // be unique.
  return &(TypeIdHelper<T>::dummy_);
}

// Returns the type ID of ::testing::Test.  Always call this instead
// of GetTypeId< ::testing::Test>() to get the type ID of
// ::testing::Test, as the latter may give the wrong result due to a
// suspected linker bug when compiling Google Test as a Mac OS X
// framework.
GTEST_API_ TypeId GetTestTypeId();

// Defines the abstract factory interface that creates instances
// of a Test object.
class TestFactoryBase {
 public:
  virtual ~TestFactoryBase() {}

  // Creates a test instance to run. The instance is both created and destroyed
  // within TestInfoImpl::Run()
  virtual Test* CreateTest() = 0;

 protected:
  TestFactoryBase() {}

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
};

// This class provides implementation of TeastFactoryBase interface.
// It is used in TEST and TEST_F macros.
template <class TestClass>
class TestFactoryImpl : public TestFactoryBase {
 public:
  virtual Test* CreateTest() {
    return new TestClass;
  }
};

#if GTEST_OS_WINDOWS

// Predicate-formatters for implementing the HRESULT checking macros
// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
// We pass a long instead of HRESULT to avoid causing an
// include dependency for the HRESULT type.
GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
    long hr);  // NOLINT
GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
    long hr);  // NOLINT

#endif  // GTEST_OS_WINDOWS

// Types of SetUpTestCase() and TearDownTestCase() functions.
typedef void (*SetUpTestCaseFunc)();
typedef void (*TearDownTestCaseFunc)();

// Creates a new TestInfo object and registers it with Google Test;
// returns the created object.
//
// Arguments:
//
//   test_case_name:   name of the test case
//   name:             name of the test
//   type_param        the name of the test's type parameter, or NULL if
//                     this is not a typed or a type-parameterized test.
//   value_param       text representation of the test's value parameter,
//                     or NULL if this is not a type-parameterized test.
//   fixture_class_id: ID of the test fixture class
//   set_up_tc:        pointer to the function that sets up the test case
//   tear_down_tc:     pointer to the function that tears down the test case
//   factory:          pointer to the factory that creates a test object.
//                     The newly created TestInfo instance will assume
//                     ownership of the factory object.
GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
  const char* test_case_name,
  const char* name,
  const char* type_param,
  const char* value_param,
  TypeId fixture_class_id,
  SetUpTestCaseFunc set_up_tc,
  TearDownTestCaseFunc tear_down_tc,
  TestFactoryBase* factory);

// If *pstr starts with the given prefix, modifies *pstr to be right
// past the prefix and returns true; otherwise leaves *pstr unchanged
// and returns false.  None of pstr, *pstr, and prefix can be NULL.
GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);

#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// State of the definition of a type-parameterized test case.
class GTEST_API_ TypedTestCasePState {
 public:
  TypedTestCasePState() : registered_(false) {}

  // Adds the given test name to defined_test_names_ and return true
  // if the test case hasn't been registered; otherwise aborts the
  // program.
  bool AddTestName(const char* file, int line, const char* case_name,
                   const char* test_name) {
    if (registered_) {
      fprintf(stderr, "%s Test %s must be defined before "
              "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
              FormatFileLocation(file, line).c_str(), test_name, case_name);
      fflush(stderr);
      posix::Abort();
    }

    defined_test_names_.insert(test_name);
    return true;
  }

  // Verifies that registered_tests match the test names in
  // defined_test_names_; returns registered_tests if successful, or
  // aborts the program otherwise.
  const char* VerifyRegisteredTestNames(
    const char* file, int line, const char* registered_tests);

 private:
  bool registered_;
  ::std::set<const char*> defined_test_names_;
};

// Skips to the first non-space char after the first comma in 'str';
// returns NULL if no comma is found in 'str'.
inline const char* SkipComma(const char* str) {
  const char* comma = strchr(str, ',');

  if (comma == NULL) {
    return NULL;
  }

  while (IsSpace(*(++comma))) {}

  return comma;
}

// Returns the prefix of 'str' before the first comma in it; returns
// the entire string if it contains no comma.
inline std::string GetPrefixUntilComma(const char* str) {
  const char* comma = strchr(str, ',');
  return comma == NULL ? str : std::string(str, comma);
}

// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
// registers a list of type-parameterized tests with Google Test.  The
// return value is insignificant - we just need to return something
// such that we can call this function in a namespace scope.
//
// Implementation note: The GTEST_TEMPLATE_ macro declares a template
// template parameter.  It's defined in gtest-type-util.h.
template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
class TypeParameterizedTest {
 public:
  // 'index' is the index of the test in the type list 'Types'
  // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
  // Types).  Valid values for 'index' are [0, N - 1] where N is the
  // length of Types.
  static bool Register(const char* prefix, const char* case_name,
                       const char* test_names, int index) {
    typedef typename Types::Head Type;
    typedef Fixture<Type> FixtureClass;
    typedef typename GTEST_BIND_(TestSel, Type) TestClass;

    // First, registers the first type-parameterized test in the type
    // list.
    MakeAndRegisterTestInfo(
      (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/"
       + StreamableToString(index)).c_str(),
      GetPrefixUntilComma(test_names).c_str(),
      GetTypeName<Type>().c_str(),
      NULL,  // No value parameter.
      GetTypeId<FixtureClass>(),
      TestClass::SetUpTestCase,
      TestClass::TearDownTestCase,
      new TestFactoryImpl<TestClass>);

    // Next, recurses (at compile time) with the tail of the type list.
    return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail>
           ::Register(prefix, case_name, test_names, index + 1);
  }
};

// The base case for the compile time recursion.
template <GTEST_TEMPLATE_ Fixture, class TestSel>
class TypeParameterizedTest<Fixture, TestSel, Types0> {
 public:
  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
                       const char* /*test_names*/, int /*index*/) {
    return true;
  }
};

// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
// registers *all combinations* of 'Tests' and 'Types' with Google
// Test.  The return value is insignificant - we just need to return
// something such that we can call this function in a namespace scope.
template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
class TypeParameterizedTestCase {
 public:
  static bool Register(const char* prefix, const char* case_name,
                       const char* test_names) {
    typedef typename Tests::Head Head;

    // First, register the first test in 'Test' for each type in 'Types'.
    TypeParameterizedTest<Fixture, Head, Types>::Register(
      prefix, case_name, test_names, 0);

    // Next, recurses (at compile time) with the tail of the test list.
    return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types>
           ::Register(prefix, case_name, SkipComma(test_names));
  }
};

// The base case for the compile time recursion.
template <GTEST_TEMPLATE_ Fixture, typename Types>
class TypeParameterizedTestCase<Fixture, Templates0, Types> {
 public:
  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
                       const char* /*test_names*/) {
    return true;
  }
};

#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// Returns the current OS stack trace as an std::string.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(
  UnitTest* unit_test, int skip_count);

// Helpers for suppressing warnings on unreachable code or constant
// condition.

// Always returns true.
GTEST_API_ bool AlwaysTrue();

// Always returns false.
inline bool AlwaysFalse() {
  return !AlwaysTrue();
}

// Helper for suppressing false warning from Clang on a const char*
// variable declared in a conditional expression always being NULL in
// the else branch.
struct GTEST_API_ ConstCharPtr {
  ConstCharPtr(const char* str) : value(str) {}
  operator bool() const {
    return true;
  }
  const char* value;
};

// A simple Linear Congruential Generator for generating random
// numbers with a uniform distribution.  Unlike rand() and srand(), it
// doesn't use global state (and therefore can't interfere with user
// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
// but it's good enough for our purposes.
class GTEST_API_ Random {
 public:
  static const UInt32 kMaxRange = 1u << 31;

  explicit Random(UInt32 seed) : state_(seed) {}

  void Reseed(UInt32 seed) {
    state_ = seed;
  }

  // Generates a random number from [0, range).  Crashes if 'range' is
  // 0 or greater than kMaxRange.
  UInt32 Generate(UInt32 range);

 private:
  UInt32 state_;
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
};

// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
// compiler error iff T1 and T2 are different types.
template <typename T1, typename T2>
struct CompileAssertTypesEqual;

template <typename T>
struct CompileAssertTypesEqual<T, T> {
};

// Removes the reference from a type if it is a reference type,
// otherwise leaves it unchanged.  This is the same as
// tr1::remove_reference, which is not widely available yet.
template <typename T>
struct RemoveReference {
  typedef T type;
};  // NOLINT
template <typename T>
struct RemoveReference<T&> {
  typedef T type;
};  // NOLINT

// A handy wrapper around RemoveReference that works when the argument
// T depends on template parameters.
#define GTEST_REMOVE_REFERENCE_(T) \
    typename ::testing::internal::RemoveReference<T>::type

// Removes const from a type if it is a const type, otherwise leaves
// it unchanged.  This is the same as tr1::remove_const, which is not
// widely available yet.
template <typename T>
struct RemoveConst {
  typedef T type;
};  // NOLINT
template <typename T>
struct RemoveConst<const T> {
  typedef T type;
};  // NOLINT

// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
// definition to fail to remove the const in 'const int[3]' and 'const
// char[3][4]'.  The following specialization works around the bug.
template <typename T, size_t N>
struct RemoveConst<const T[N]> {
  typedef typename RemoveConst<T>::type type[N];
};

#if defined(_MSC_VER) && _MSC_VER < 1400
// This is the only specialization that allows VC++ 7.1 to remove const in
// 'const int[3] and 'const int[3][4]'.  However, it causes trouble with GCC
// and thus needs to be conditionally compiled.
template <typename T, size_t N>
struct RemoveConst<T[N]> {
  typedef typename RemoveConst<T>::type type[N];
};
#endif

// A handy wrapper around RemoveConst that works when the argument
// T depends on template parameters.
#define GTEST_REMOVE_CONST_(T) \
    typename ::testing::internal::RemoveConst<T>::type

// Turns const U&, U&, const U, and U all into U.
#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
    GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))

// Adds reference to a type if it is not a reference type,
// otherwise leaves it unchanged.  This is the same as
// tr1::add_reference, which is not widely available yet.
template <typename T>
struct AddReference {
  typedef T& type;
};  // NOLINT
template <typename T>
struct AddReference<T&> {
  typedef T& type;
};  // NOLINT

// A handy wrapper around AddReference that works when the argument T
// depends on template parameters.
#define GTEST_ADD_REFERENCE_(T) \
    typename ::testing::internal::AddReference<T>::type

// Adds a reference to const on top of T as necessary.  For example,
// it transforms
//
//   char         ==> const char&
//   const char   ==> const char&
//   char&        ==> const char&
//   const char&  ==> const char&
//
// The argument T must depend on some template parameters.
#define GTEST_REFERENCE_TO_CONST_(T) \
    GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T))

// ImplicitlyConvertible<From, To>::value is a compile-time bool
// constant that's true iff type From can be implicitly converted to
// type To.
template <typename From, typename To>
class ImplicitlyConvertible {
 private:
  // We need the following helper functions only for their types.
  // They have no implementations.

  // MakeFrom() is an expression whose type is From.  We cannot simply
  // use From(), as the type From may not have a public default
  // constructor.
  static From MakeFrom();

  // These two functions are overloaded.  Given an expression
  // Helper(x), the compiler will pick the first version if x can be
  // implicitly converted to type To; otherwise it will pick the
  // second version.
  //
  // The first version returns a value of size 1, and the second
  // version returns a value of size 2.  Therefore, by checking the
  // size of Helper(x), which can be done at compile time, we can tell
  // which version of Helper() is used, and hence whether x can be
  // implicitly converted to type To.
  static char Helper(To);
  static char (&Helper(...))[2];  // NOLINT

  // We have to put the 'public' section after the 'private' section,
  // or MSVC refuses to compile the code.
 public:
  // MSVC warns about implicitly converting from double to int for
  // possible loss of data, so we need to temporarily disable the
  // warning.
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4244)  // Temporarily disables warning 4244.

  static const bool value =
    sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
# pragma warning(pop)           // Restores the warning state.
#elif defined(__BORLANDC__)
  // C++Builder cannot use member overload resolution during template
  // instantiation.  The simplest workaround is to use its C++0x type traits
  // functions (C++Builder 2009 and above only).
  static const bool value = __is_convertible(From, To);
#else
  static const bool value =
    sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
#endif  // _MSV_VER
};
template <typename From, typename To>
const bool ImplicitlyConvertible<From, To>::value;

// IsAProtocolMessage<T>::value is a compile-time bool constant that's
// true iff T is type ProtocolMessage, proto2::Message, or a subclass
// of those.
template <typename T>
struct IsAProtocolMessage
  : public bool_constant <
    ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value ||
    ImplicitlyConvertible<const T*, const ::proto2::Message*>::value > {
};

// When the compiler sees expression IsContainerTest<C>(0), if C is an
// STL-style container class, the first overload of IsContainerTest
// will be viable (since both C::iterator* and C::const_iterator* are
// valid types and NULL can be implicitly converted to them).  It will
// be picked over the second overload as 'int' is a perfect match for
// the type of argument 0.  If C::iterator or C::const_iterator is not
// a valid type, the first overload is not viable, and the second
// overload will be picked.  Therefore, we can determine whether C is
// a container class by checking the type of IsContainerTest<C>(0).
// The value of the expression is insignificant.
//
// Note that we look for both C::iterator and C::const_iterator.  The
// reason is that C++ injects the name of a class as a member of the
// class itself (e.g. you can refer to class iterator as either
// 'iterator' or 'iterator::iterator').  If we look for C::iterator
// only, for example, we would mistakenly think that a class named
// iterator is an STL container.
//
// Also note that the simpler approach of overloading
// IsContainerTest(typename C::const_iterator*) and
// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
typedef int IsContainer;
template <class C>
IsContainer IsContainerTest(int /* dummy */,
                            typename C::iterator* /* it */ = NULL,
                            typename C::const_iterator* /* const_it */ = NULL) {
  return 0;
}

typedef char IsNotContainer;
template <class C>
IsNotContainer IsContainerTest(long /* dummy */) {
  return '\0';
}

// EnableIf<condition>::type is void when 'Cond' is true, and
// undefined when 'Cond' is false.  To use SFINAE to make a function
// overload only apply when a particular expression is true, add
// "typename EnableIf<expression>::type* = 0" as the last parameter.
template<bool> struct EnableIf;
template<> struct EnableIf<true> {
  typedef void type;
};  // NOLINT

// Utilities for native arrays.

// ArrayEq() compares two k-dimensional native arrays using the
// elements' operator==, where k can be any integer >= 0.  When k is
// 0, ArrayEq() degenerates into comparing a single pair of values.

template <typename T, typename U>
bool ArrayEq(const T* lhs, size_t size, const U* rhs);

// This generic version is used when k is 0.
template <typename T, typename U>
inline bool ArrayEq(const T& lhs, const U& rhs) {
  return lhs == rhs;
}

// This overload is used when k >= 1.
template <typename T, typename U, size_t N>
inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
  return internal::ArrayEq(lhs, N, rhs);
}

// This helper reduces code bloat.  If we instead put its logic inside
// the previous ArrayEq() function, arrays with different sizes would
// lead to different copies of the template code.
template <typename T, typename U>
bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
  for (size_t i = 0; i != size; i++) {
    if (!internal::ArrayEq(lhs[i], rhs[i])) {
      return false;
    }
  }

  return true;
}

// Finds the first element in the iterator range [begin, end) that
// equals elem.  Element may be a native array type itself.
template <typename Iter, typename Element>
Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
  for (Iter it = begin; it != end; ++it) {
    if (internal::ArrayEq(*it, elem)) {
      return it;
    }
  }

  return end;
}

// CopyArray() copies a k-dimensional native array using the elements'
// operator=, where k can be any integer >= 0.  When k is 0,
// CopyArray() degenerates into copying a single value.

template <typename T, typename U>
void CopyArray(const T* from, size_t size, U* to);

// This generic version is used when k is 0.
template <typename T, typename U>
inline void CopyArray(const T& from, U* to) {
  *to = from;
}

// This overload is used when k >= 1.
template <typename T, typename U, size_t N>
inline void CopyArray(const T(&from)[N], U(*to)[N]) {
  internal::CopyArray(from, N, *to);
}

// This helper reduces code bloat.  If we instead put its logic inside
// the previous CopyArray() function, arrays with different sizes
// would lead to different copies of the template code.
template <typename T, typename U>
void CopyArray(const T* from, size_t size, U* to) {
  for (size_t i = 0; i != size; i++) {
    internal::CopyArray(from[i], to + i);
  }
}

// The relation between an NativeArray object (see below) and the
// native array it represents.
enum RelationToSource {
  kReference,  // The NativeArray references the native array.
  kCopy        // The NativeArray makes a copy of the native array and
  // owns the copy.
};

// Adapts a native array to a read-only STL-style container.  Instead
// of the complete STL container concept, this adaptor only implements
// members useful for Google Mock's container matchers.  New members
// should be added as needed.  To simplify the implementation, we only
// support Element being a raw type (i.e. having no top-level const or
// reference modifier).  It's the client's responsibility to satisfy
// this requirement.  Element can be an array type itself (hence
// multi-dimensional arrays are supported).
template <typename Element>
class NativeArray {
 public:
  // STL-style container typedefs.
  typedef Element value_type;
  typedef Element* iterator;
  typedef const Element* const_iterator;

  // Constructs from a native array.
  NativeArray(const Element* array, size_t count, RelationToSource relation) {
    Init(array, count, relation);
  }

  // Copy constructor.
  NativeArray(const NativeArray& rhs) {
    Init(rhs.array_, rhs.size_, rhs.relation_to_source_);
  }

  ~NativeArray() {
    // Ensures that the user doesn't instantiate NativeArray with a
    // const or reference type.
    static_cast<void>(StaticAssertTypeEqHelper<Element,
                      GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>());

    if (relation_to_source_ == kCopy) {
      delete[] array_;
    }
  }

  // STL-style container methods.
  size_t size() const {
    return size_;
  }
  const_iterator begin() const {
    return array_;
  }
  const_iterator end() const {
    return array_ + size_;
  }
  bool operator==(const NativeArray& rhs) const {
    return size() == rhs.size() &&
           ArrayEq(begin(), size(), rhs.begin());
  }

 private:
  // Initializes this object; makes a copy of the input array if
  // 'relation' is kCopy.
  void Init(const Element* array, size_t a_size, RelationToSource relation) {
    if (relation == kReference) {
      array_ = array;
    }
    else {
      Element* const copy = new Element[a_size];
      CopyArray(array, a_size, copy);
      array_ = copy;
    }

    size_ = a_size;
    relation_to_source_ = relation;
  }

  const Element* array_;
  size_t size_;
  RelationToSource relation_to_source_;

  GTEST_DISALLOW_ASSIGN_(NativeArray);
};

}  // namespace internal
}  // namespace testing

#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
  ::testing::internal::AssertHelper(result_type, file, line, message) \
    = ::testing::Message()

#define GTEST_MESSAGE_(message, result_type) \
  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)

#define GTEST_FATAL_FAILURE_(message) \
  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)

#define GTEST_NONFATAL_FAILURE_(message) \
  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)

#define GTEST_SUCCESS_(message) \
  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)

// Suppresses MSVC warnings 4072 (unreachable code) for the code following
// statement if it returns or throws (or doesn't return or throw in some
// situations).
#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
  if (::testing::internal::AlwaysTrue()) { statement; }

#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::ConstCharPtr gtest_msg = "") { \
    bool gtest_caught_expected = false; \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (expected_exception const&) { \
      gtest_caught_expected = true; \
    } \
    catch (...) { \
      gtest_msg.value = \
          "Expected: " #statement " throws an exception of type " \
          #expected_exception ".\n  Actual: it throws a different type."; \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
    } \
    if (!gtest_caught_expected) { \
      gtest_msg.value = \
          "Expected: " #statement " throws an exception of type " \
          #expected_exception ".\n  Actual: it throws nothing."; \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
      fail(gtest_msg.value)

#define GTEST_TEST_NO_THROW_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (...) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
      fail("Expected: " #statement " doesn't throw an exception.\n" \
           "  Actual: it throws.")

#define GTEST_TEST_ANY_THROW_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    bool gtest_caught_any = false; \
    try { \
      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    } \
    catch (...) { \
      gtest_caught_any = true; \
    } \
    if (!gtest_caught_any) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
      fail("Expected: " #statement " throws an exception.\n" \
           "  Actual: it doesn't.")


// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
// either a boolean expression or an AssertionResult. text is a textual
// represenation of expression as it was passed into the EXPECT_TRUE.
#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (const ::testing::AssertionResult gtest_ar_ = \
      ::testing::AssertionResult(expression)) \
    ; \
  else \
    fail(::testing::internal::GetBoolAssertionFailureMessage(\
        gtest_ar_, text, #actual, #expected).c_str())

#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
  if (::testing::internal::AlwaysTrue()) { \
    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
    if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
    } \
  } else \
    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
      fail("Expected: " #statement " doesn't generate new fatal " \
           "failures in the current thread.\n" \
           "  Actual: it does.")

// Expands to the name of the class that implements the given test.
#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
  test_case_name##_##test_name##_Test

// Helper macro for defining tests.
#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
 public:\
  GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
 private:\
  virtual void TestBody();\
  static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
  GTEST_DISALLOW_COPY_AND_ASSIGN_(\
      GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
};\
\
::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
  ::test_info_ =\
    ::testing::internal::MakeAndRegisterTestInfo(\
        #test_case_name, #test_name, NULL, NULL, \
        (parent_id), \
        parent_class::SetUpTestCase, \
        parent_class::TearDownTestCase, \
        new ::testing::internal::TestFactoryImpl<\
            GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-linked_ptr.h
================================================
// Copyright 2003 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Dan Egnor (egnor@google.com)
//
// A "smart" pointer type with reference tracking.  Every pointer to a
// particular object is kept on a circular linked list.  When the last pointer
// to an object is destroyed or reassigned, the object is deleted.
//
// Used properly, this deletes the object when the last reference goes away.
// There are several caveats:
// - Like all reference counting schemes, cycles lead to leaks.
// - Each smart pointer is actually two pointers (8 bytes instead of 4).
// - Every time a pointer is assigned, the entire list of pointers to that
//   object is traversed.  This class is therefore NOT SUITABLE when there
//   will often be more than two or three pointers to a particular object.
// - References are only tracked as long as linked_ptr<> objects are copied.
//   If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
//   will happen (double deletion).
//
// A good use of this class is storing object references in STL containers.
// You can safely put linked_ptr<> in a vector<>.
// Other uses may not be as good.
//
// Note: If you use an incomplete type with linked_ptr<>, the class
// *containing* linked_ptr<> must have a constructor and destructor (even
// if they do nothing!).
//
// Bill Gibbons suggested we use something like this.
//
// Thread Safety:
//   Unlike other linked_ptr implementations, in this implementation
//   a linked_ptr object is thread-safe in the sense that:
//     - it's safe to copy linked_ptr objects concurrently,
//     - it's safe to copy *from* a linked_ptr and read its underlying
//       raw pointer (e.g. via get()) concurrently, and
//     - it's safe to write to two linked_ptrs that point to the same
//       shared object concurrently.
// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
// confusion with normal linked_ptr.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_

#include <stdlib.h>
#include <assert.h>

#include "gtest/internal/gtest-port.h"

namespace testing {
namespace internal {

// Protects copying of all linked_ptr objects.
GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);

// This is used internally by all instances of linked_ptr<>.  It needs to be
// a non-template class because different types of linked_ptr<> can refer to
// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
// So, it needs to be possible for different types of linked_ptr to participate
// in the same circular linked list, so we need a single class type here.
//
// DO NOT USE THIS CLASS DIRECTLY YOURSELF.  Use linked_ptr<T>.
class linked_ptr_internal {
 public:
  // Create a new circle that includes only this instance.
  void join_new() {
    next_ = this;
  }

  // Many linked_ptr operations may change p.link_ for some linked_ptr
  // variable p in the same circle as this object.  Therefore we need
  // to prevent two such operations from occurring concurrently.
  //
  // Note that different types of linked_ptr objects can coexist in a
  // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
  // linked_ptr<Derived2>).  Therefore we must use a single mutex to
  // protect all linked_ptr objects.  This can create serious
  // contention in production code, but is acceptable in a testing
  // framework.

  // Join an existing circle.
  void join(linked_ptr_internal const* ptr)
  GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
    MutexLock lock(&g_linked_ptr_mutex);

    linked_ptr_internal const* p = ptr;

    while (p->next_ != ptr) {
      p = p->next_;
    }

    p->next_ = this;
    next_ = ptr;
  }

  // Leave whatever circle we're part of.  Returns true if we were the
  // last member of the circle.  Once this is done, you can join() another.
  bool depart()
  GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
    MutexLock lock(&g_linked_ptr_mutex);

    if (next_ == this) {
      return true;
    }

    linked_ptr_internal const* p = next_;

    while (p->next_ != this) {
      p = p->next_;
    }

    p->next_ = next_;
    return false;
  }

 private:
  mutable linked_ptr_internal const* next_;
};

template <typename T>
class linked_ptr {
 public:
  typedef T element_type;

  // Take over ownership of a raw pointer.  This should happen as soon as
  // possible after the object is created.
  explicit linked_ptr(T* ptr = NULL) {
    capture(ptr);
  }
  ~linked_ptr() {
    depart();
  }

  // Copy an existing linked_ptr<>, adding ourselves to the list of references.
  template <typename U> linked_ptr(linked_ptr<U> const& ptr) {
    copy(&ptr);
  }
  linked_ptr(linked_ptr const& ptr) {  // NOLINT
    assert(&ptr != this);
    copy(&ptr);
  }

  // Assignment releases the old value and acquires the new.
  template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
    depart();
    copy(&ptr);
    return *this;
  }

  linked_ptr& operator=(linked_ptr const& ptr) {
    if (&ptr != this) {
      depart();
      copy(&ptr);
    }

    return *this;
  }

  // Smart pointer members.
  void reset(T* ptr = NULL) {
    depart();
    capture(ptr);
  }
  T* get() const {
    return value_;
  }
  T* operator->() const {
    return value_;
  }
  T& operator*() const {
    return *value_;
  }

  bool operator==(T* p) const {
    return value_ == p;
  }
  bool operator!=(T* p) const {
    return value_ != p;
  }
  template <typename U>
  bool operator==(linked_ptr<U> const& ptr) const {
    return value_ == ptr.get();
  }
  template <typename U>
  bool operator!=(linked_ptr<U> const& ptr) const {
    return value_ != ptr.get();
  }

 private:
  template <typename U>
  friend class linked_ptr;

  T* value_;
  linked_ptr_internal link_;

  void depart() {
    if (link_.depart()) {
      delete value_;
    }
  }

  void capture(T* ptr) {
    value_ = ptr;
    link_.join_new();
  }

  template <typename U> void copy(linked_ptr<U> const* ptr) {
    value_ = ptr->get();

    if (value_) {
      link_.join(&ptr->link_);
    }
    else {
      link_.join_new();
    }
  }
};

template<typename T> inline
bool operator==(T* ptr, const linked_ptr<T>& x) {
  return ptr == x.get();
}

template<typename T> inline
bool operator!=(T* ptr, const linked_ptr<T>& x) {
  return ptr != x.get();
}

// A function to convert T* into linked_ptr<T>
// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
template <typename T>
linked_ptr<T> make_linked_ptr(T* ptr) {
  return linked_ptr<T>(ptr);
}

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-param-util-generated.h
================================================
// This file was GENERATED by command:
//     pump.py gtest-param-util-generated.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)

// Type and function utilities for implementing parameterized tests.
// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently Google Test supports at most 50 arguments in Values,
// and at most 10 arguments in Combine. Please contact
// googletestframework@googlegroups.com if you need more.
// Please note that the number of arguments to Combine is limited
// by the maximum arity of the implementation of tr1::tuple which is
// currently set at 10.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
#include "gtest/internal/gtest-param-util.h"
#include "gtest/internal/gtest-port.h"

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Forward declarations of ValuesIn(), which is implemented in
// include/gtest/gtest-param-test.h.
template <typename ForwardIterator>
internal::ParamGenerator <
typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type >
ValuesIn(ForwardIterator begin, ForwardIterator end);

template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);

template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
  const Container& container);

namespace internal {

// Used in the Values() function to provide polymorphic capabilities.
template <typename T1>
class ValueArray1 {
 public:
  explicit ValueArray1(T1 v1) : v1_(v1) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    return ValuesIn(&v1_, &v1_ + 1);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray1& other);

  const T1 v1_;
};

template <typename T1, typename T2>
class ValueArray2 {
 public:
  ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_)};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray2& other);

  const T1 v1_;
  const T2 v2_;
};

template <typename T1, typename T2, typename T3>
class ValueArray3 {
 public:
  ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray3& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
};

template <typename T1, typename T2, typename T3, typename T4>
class ValueArray4 {
 public:
  ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
    v4_(v4) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray4& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5>
class ValueArray5 {
 public:
  ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
    v4_(v4), v5_(v5) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray5& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
class ValueArray6 {
 public:
  ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
    v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray6& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
class ValueArray7 {
 public:
  ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
    v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray7& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
class ValueArray8 {
 public:
  ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
              T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray8& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
class ValueArray9 {
 public:
  ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
              T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray9& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
class ValueArray10 {
 public:
  ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray10& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11>
class ValueArray11 {
 public:
  ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
    v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray11& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12>
class ValueArray12 {
 public:
  ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
    v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray12& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13>
class ValueArray13 {
 public:
  ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
    v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
    v12_(v12), v13_(v13) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray13& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14>
class ValueArray14 {
 public:
  ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
    v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray14& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15>
class ValueArray15 {
 public:
  ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
    v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray15& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16>
class ValueArray16 {
 public:
  ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
    v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
    v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
    v16_(v16) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray16& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17>
class ValueArray17 {
 public:
  ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
               T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray17& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18>
class ValueArray18 {
 public:
  ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray18& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19>
class ValueArray19 {
 public:
  ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
    v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
    v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray19& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20>
class ValueArray20 {
 public:
  ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
    v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
    v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
    v19_(v19), v20_(v20) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray20& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21>
class ValueArray21 {
 public:
  ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
    v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
    v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
    v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray21& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22>
class ValueArray22 {
 public:
  ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
    v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray22& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23>
class ValueArray23 {
 public:
  ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
    v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
    v23_(v23) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray23& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24>
class ValueArray24 {
 public:
  ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
    v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
    v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
    v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
    v22_(v22), v23_(v23), v24_(v24) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray24& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25>
class ValueArray25 {
 public:
  ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
               T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray25& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26>
class ValueArray26 {
 public:
  ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray26& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27>
class ValueArray27 {
 public:
  ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
    v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
    v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
    v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
    v26_(v26), v27_(v27) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray27& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28>
class ValueArray28 {
 public:
  ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
    v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
    v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
    v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
    v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray28& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29>
class ValueArray29 {
 public:
  ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
    v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
    v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
    v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
    v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray29& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30>
class ValueArray30 {
 public:
  ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
    v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
    v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
    v29_(v29), v30_(v30) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray30& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31>
class ValueArray31 {
 public:
  ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
    v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
    v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
    v29_(v29), v30_(v30), v31_(v31) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray31& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32>
class ValueArray32 {
 public:
  ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
    v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
    v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
    v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
    v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
    v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray32& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33>
class ValueArray33 {
 public:
  ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
               T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
    v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
    v33_(v33) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray33& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34>
class ValueArray34 {
 public:
  ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
    v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
    v33_(v33), v34_(v34) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray34& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35>
class ValueArray35 {
 public:
  ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
    v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
    v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
    v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
    v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
    v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray35& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36>
class ValueArray36 {
 public:
  ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
    v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
    v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
    v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
    v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
    v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray36& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37>
class ValueArray37 {
 public:
  ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
    v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
    v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
    v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
    v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
    v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
    v36_(v36), v37_(v37) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray37& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38>
class ValueArray38 {
 public:
  ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
    v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
    v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
    v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
    v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray38& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39>
class ValueArray39 {
 public:
  ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
    v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
    v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
    v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
    v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray39& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40>
class ValueArray40 {
 public:
  ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
    v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
    v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
    v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
    v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
    v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
    v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
    v40_(v40) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray40& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41>
class ValueArray41 {
 public:
  ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
               T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
    v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
    v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
    v39_(v39), v40_(v40), v41_(v41) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray41& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42>
class ValueArray42 {
 public:
  ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
    v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
    v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
    v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray42& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43>
class ValueArray43 {
 public:
  ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
    v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
    v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
    v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
    v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
    v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
    v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray43& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44>
class ValueArray44 {
 public:
  ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
    v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
    v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
    v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
    v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
    v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
    v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
    v43_(v43), v44_(v44) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray44& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45>
class ValueArray45 {
 public:
  ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
    v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
    v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
    v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
    v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
    v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
    v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
    v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
                       static_cast<T>(v45_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray45& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46>
class ValueArray46 {
 public:
  ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
    v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
    v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
    v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
    v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
    v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
                       static_cast<T>(v45_), static_cast<T>(v46_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray46& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47>
class ValueArray47 {
 public:
  ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
    v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
    v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
    v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
    v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
    v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
    v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
    v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
    v47_(v47) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
                       static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray47& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48>
class ValueArray48 {
 public:
  ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
    v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
    v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
    v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
    v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
    v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
    v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
    v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
    v46_(v46), v47_(v47), v48_(v48) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
                       static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
                       static_cast<T>(v48_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray48& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49>
class ValueArray49 {
 public:
  ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
               T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
    v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
    v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
    v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
    v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
                       static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
                       static_cast<T>(v48_), static_cast<T>(v49_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray49& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
  const T49 v49_;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49, typename T50>
class ValueArray50 {
 public:
  ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
               T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
               T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
               T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
               T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
               T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
               T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
    v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
    v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
    v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
    v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
    v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
    v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
    v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
                       static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
                       static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
                       static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
                       static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
                       static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
                       static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
                       static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
                       static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
                       static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
                       static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
                       static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
                       static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
                       static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
                       static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
                       static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
                       static_cast<T>(v48_), static_cast<T>(v49_), static_cast<T>(v50_)
                      };
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray50& other);

  const T1 v1_;
  const T2 v2_;
  const T3 v3_;
  const T4 v4_;
  const T5 v5_;
  const T6 v6_;
  const T7 v7_;
  const T8 v8_;
  const T9 v9_;
  const T10 v10_;
  const T11 v11_;
  const T12 v12_;
  const T13 v13_;
  const T14 v14_;
  const T15 v15_;
  const T16 v16_;
  const T17 v17_;
  const T18 v18_;
  const T19 v19_;
  const T20 v20_;
  const T21 v21_;
  const T22 v22_;
  const T23 v23_;
  const T24 v24_;
  const T25 v25_;
  const T26 v26_;
  const T27 v27_;
  const T28 v28_;
  const T29 v29_;
  const T30 v30_;
  const T31 v31_;
  const T32 v32_;
  const T33 v33_;
  const T34 v34_;
  const T35 v35_;
  const T36 v36_;
  const T37 v37_;
  const T38 v38_;
  const T39 v39_;
  const T40 v40_;
  const T41 v41_;
  const T42 v42_;
  const T43 v43_;
  const T44 v44_;
  const T45 v45_;
  const T46 v46_;
  const T47 v47_;
  const T48 v48_;
  const T49 v49_;
  const T50 v50_;
};

# if GTEST_HAS_COMBINE
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Generates values from the Cartesian product of values produced
// by the argument generators.
//
template <typename T1, typename T2>
class CartesianProductGenerator2
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2> > {
 public:
  typedef ::std::tr1::tuple<T1, T2> ParamType;

  CartesianProductGenerator2(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2)
    : g1_(g1), g2_(g2) {}
  virtual ~CartesianProductGenerator2() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current2_;

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd()) {
        current_value_ = ParamType(*current1_, *current2_);
      }
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    ParamType current_value_;
  };  // class CartesianProductGenerator2::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator2& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
};  // class CartesianProductGenerator2


template <typename T1, typename T2, typename T3>
class CartesianProductGenerator3
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3> ParamType;

  CartesianProductGenerator3(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
    : g1_(g1), g2_(g2), g3_(g3) {}
  virtual ~CartesianProductGenerator3() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current3_;

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd()) {
        current_value_ = ParamType(*current1_, *current2_, *current3_);
      }
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    ParamType current_value_;
  };  // class CartesianProductGenerator3::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator3& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
};  // class CartesianProductGenerator3


template <typename T1, typename T2, typename T3, typename T4>
class CartesianProductGenerator4
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4> ParamType;

  CartesianProductGenerator4(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
                             const ParamGenerator<T4>& g4)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
  virtual ~CartesianProductGenerator4() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin(), g4_, g4_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
                        g4_, g4_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3,
             const ParamGenerator<T4>& g4,
             const typename ParamGenerator<T4>::iterator& current4)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
        begin4_(g4.begin()), end4_(g4.end()), current4_(current4)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current4_;

      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_ &&
               current4_ == typed_other->current4_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
                                   *current4_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_ ||
        current4_ == end4_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    ParamType current_value_;
  };  // class CartesianProductGenerator4::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator4& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
};  // class CartesianProductGenerator4


template <typename T1, typename T2, typename T3, typename T4, typename T5>
class CartesianProductGenerator5
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5> ParamType;

  CartesianProductGenerator5(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
                             const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
  virtual ~CartesianProductGenerator5() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
                        g4_, g4_.end(), g5_, g5_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3,
             const ParamGenerator<T4>& g4,
             const typename ParamGenerator<T4>::iterator& current4,
             const ParamGenerator<T5>& g5,
             const typename ParamGenerator<T5>::iterator& current5)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
        begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
        begin5_(g5.begin()), end5_(g5.end()), current5_(current5)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current5_;

      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }

      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_ &&
               current4_ == typed_other->current4_ &&
               current5_ == typed_other->current5_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
                                   *current4_, *current5_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_ ||
        current4_ == end4_ ||
        current5_ == end5_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    ParamType current_value_;
  };  // class CartesianProductGenerator5::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator5& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
};  // class CartesianProductGenerator5


template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
class CartesianProductGenerator6
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5,
    T6> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> ParamType;

  CartesianProductGenerator6(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
                             const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
                             const ParamGenerator<T6>& g6)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
  virtual ~CartesianProductGenerator6() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
                        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3,
             const ParamGenerator<T4>& g4,
             const typename ParamGenerator<T4>::iterator& current4,
             const ParamGenerator<T5>& g5,
             const typename ParamGenerator<T5>::iterator& current5,
             const ParamGenerator<T6>& g6,
             const typename ParamGenerator<T6>::iterator& current6)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
        begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
        begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
        begin6_(g6.begin()), end6_(g6.end()), current6_(current6)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current6_;

      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }

      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }

      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_ &&
               current4_ == typed_other->current4_ &&
               current5_ == typed_other->current5_ &&
               current6_ == typed_other->current6_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
                                   *current4_, *current5_, *current6_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_ ||
        current4_ == end4_ ||
        current5_ == end5_ ||
        current6_ == end6_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    ParamType current_value_;
  };  // class CartesianProductGenerator6::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator6& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
};  // class CartesianProductGenerator6


template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
class CartesianProductGenerator7
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
    T7> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;

  CartesianProductGenerator7(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
                             const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
                             const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
  virtual ~CartesianProductGenerator7() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
                        g7_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
                        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3,
             const ParamGenerator<T4>& g4,
             const typename ParamGenerator<T4>::iterator& current4,
             const ParamGenerator<T5>& g5,
             const typename ParamGenerator<T5>::iterator& current5,
             const ParamGenerator<T6>& g6,
             const typename ParamGenerator<T6>::iterator& current6,
             const ParamGenerator<T7>& g7,
             const typename ParamGenerator<T7>::iterator& current7)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
        begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
        begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
        begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
        begin7_(g7.begin()), end7_(g7.end()), current7_(current7)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current7_;

      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }

      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }

      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }

      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_ &&
               current4_ == typed_other->current4_ &&
               current5_ == typed_other->current5_ &&
               current6_ == typed_other->current6_ &&
               current7_ == typed_other->current7_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
                                   *current4_, *current5_, *current6_, *current7_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_ ||
        current4_ == end4_ ||
        current5_ == end5_ ||
        current6_ == end6_ ||
        current7_ == end7_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    ParamType current_value_;
  };  // class CartesianProductGenerator7::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator7& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
};  // class CartesianProductGenerator7


template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
class CartesianProductGenerator8
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
    T7, T8> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;

  CartesianProductGenerator8(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
                             const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
                             const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
                             const ParamGenerator<T8>& g8)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
      g8_(g8) {}
  virtual ~CartesianProductGenerator8() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
                        g7_.begin(), g8_, g8_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
                        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
                        g8_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3,
             const ParamGenerator<T4>& g4,
             const typename ParamGenerator<T4>::iterator& current4,
             const ParamGenerator<T5>& g5,
             const typename ParamGenerator<T5>::iterator& current5,
             const ParamGenerator<T6>& g6,
             const typename ParamGenerator<T6>::iterator& current6,
             const ParamGenerator<T7>& g7,
             const typename ParamGenerator<T7>::iterator& current7,
             const ParamGenerator<T8>& g8,
             const typename ParamGenerator<T8>::iterator& current8)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
        begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
        begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
        begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
        begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
        begin8_(g8.begin()), end8_(g8.end()), current8_(current8)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current8_;

      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }

      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }

      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }

      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }

      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_ &&
               current4_ == typed_other->current4_ &&
               current5_ == typed_other->current5_ &&
               current6_ == typed_other->current6_ &&
               current7_ == typed_other->current7_ &&
               current8_ == typed_other->current8_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
                                   *current4_, *current5_, *current6_, *current7_, *current8_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_ ||
        current4_ == end4_ ||
        current5_ == end5_ ||
        current6_ == end6_ ||
        current7_ == end7_ ||
        current8_ == end8_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    ParamType current_value_;
  };  // class CartesianProductGenerator8::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator8& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
};  // class CartesianProductGenerator8


template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
class CartesianProductGenerator9
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
    T7, T8, T9> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;

  CartesianProductGenerator9(const ParamGenerator<T1>& g1,
                             const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
                             const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
                             const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
                             const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
      g9_(g9) {}
  virtual ~CartesianProductGenerator9() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
                        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
                        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
                        g8_.end(), g9_, g9_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3,
             const ParamGenerator<T4>& g4,
             const typename ParamGenerator<T4>::iterator& current4,
             const ParamGenerator<T5>& g5,
             const typename ParamGenerator<T5>::iterator& current5,
             const ParamGenerator<T6>& g6,
             const typename ParamGenerator<T6>::iterator& current6,
             const ParamGenerator<T7>& g7,
             const typename ParamGenerator<T7>::iterator& current7,
             const ParamGenerator<T8>& g8,
             const typename ParamGenerator<T8>::iterator& current8,
             const ParamGenerator<T9>& g9,
             const typename ParamGenerator<T9>::iterator& current9)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
        begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
        begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
        begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
        begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
        begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
        begin9_(g9.begin()), end9_(g9.end()), current9_(current9)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current9_;

      if (current9_ == end9_) {
        current9_ = begin9_;
        ++current8_;
      }

      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }

      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }

      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }

      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }

      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_ &&
               current4_ == typed_other->current4_ &&
               current5_ == typed_other->current5_ &&
               current6_ == typed_other->current6_ &&
               current7_ == typed_other->current7_ &&
               current8_ == typed_other->current8_ &&
               current9_ == typed_other->current9_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_),
        begin9_(other.begin9_),
        end9_(other.end9_),
        current9_(other.current9_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
                                   *current4_, *current5_, *current6_, *current7_, *current8_,
                                   *current9_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_ ||
        current4_ == end4_ ||
        current5_ == end5_ ||
        current6_ == end6_ ||
        current7_ == end7_ ||
        current8_ == end8_ ||
        current9_ == end9_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    const typename ParamGenerator<T9>::iterator begin9_;
    const typename ParamGenerator<T9>::iterator end9_;
    typename ParamGenerator<T9>::iterator current9_;
    ParamType current_value_;
  };  // class CartesianProductGenerator9::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator9& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
  const ParamGenerator<T9> g9_;
};  // class CartesianProductGenerator9


template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
class CartesianProductGenerator10
  : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
    T7, T8, T9, T10> > {
 public:
  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;

  CartesianProductGenerator10(const ParamGenerator<T1>& g1,
                              const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
                              const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
                              const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
                              const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
                              const ParamGenerator<T10>& g10)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
      g9_(g9), g10_(g10) {}
  virtual ~CartesianProductGenerator10() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
                        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
                        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
                        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
                        g8_.end(), g9_, g9_.end(), g10_, g10_.end());
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base,
             const ParamGenerator<T1>& g1,
             const typename ParamGenerator<T1>::iterator& current1,
             const ParamGenerator<T2>& g2,
             const typename ParamGenerator<T2>::iterator& current2,
             const ParamGenerator<T3>& g3,
             const typename ParamGenerator<T3>::iterator& current3,
             const ParamGenerator<T4>& g4,
             const typename ParamGenerator<T4>::iterator& current4,
             const ParamGenerator<T5>& g5,
             const typename ParamGenerator<T5>::iterator& current5,
             const ParamGenerator<T6>& g6,
             const typename ParamGenerator<T6>::iterator& current6,
             const ParamGenerator<T7>& g7,
             const typename ParamGenerator<T7>::iterator& current7,
             const ParamGenerator<T8>& g8,
             const typename ParamGenerator<T8>::iterator& current8,
             const ParamGenerator<T9>& g9,
             const typename ParamGenerator<T9>::iterator& current9,
             const ParamGenerator<T10>& g10,
             const typename ParamGenerator<T10>::iterator& current10)
      : base_(base),
        begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
        begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
        begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
        begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
        begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
        begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
        begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
        begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
        begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
        begin10_(g10.begin()), end10_(g10.end()), current10_(current10)    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current10_;

      if (current10_ == end10_) {
        current10_ = begin10_;
        ++current9_;
      }

      if (current9_ == end9_) {
        current9_ = begin9_;
        ++current8_;
      }

      if (current8_ == end8_) {
        current8_ = begin8_;
        ++current7_;
      }

      if (current7_ == end7_) {
        current7_ = begin7_;
        ++current6_;
      }

      if (current6_ == end6_) {
        current6_ = begin6_;
        ++current5_;
      }

      if (current5_ == end5_) {
        current5_ = begin5_;
        ++current4_;
      }

      if (current4_ == end4_) {
        current4_ = begin4_;
        ++current3_;
      }

      if (current3_ == end3_) {
        current3_ = begin3_;
        ++current2_;
      }

      if (current2_ == end2_) {
        current2_ = begin2_;
        ++current1_;
      }

      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const {
      return &current_value_;
    }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
        CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
             (
               current1_ == typed_other->current1_ &&
               current2_ == typed_other->current2_ &&
               current3_ == typed_other->current3_ &&
               current4_ == typed_other->current4_ &&
               current5_ == typed_other->current5_ &&
               current6_ == typed_other->current6_ &&
               current7_ == typed_other->current7_ &&
               current8_ == typed_other->current8_ &&
               current9_ == typed_other->current9_ &&
               current10_ == typed_other->current10_);
    }

   private:
    Iterator(const Iterator& other)
      : base_(other.base_),
        begin1_(other.begin1_),
        end1_(other.end1_),
        current1_(other.current1_),
        begin2_(other.begin2_),
        end2_(other.end2_),
        current2_(other.current2_),
        begin3_(other.begin3_),
        end3_(other.end3_),
        current3_(other.current3_),
        begin4_(other.begin4_),
        end4_(other.end4_),
        current4_(other.current4_),
        begin5_(other.begin5_),
        end5_(other.end5_),
        current5_(other.current5_),
        begin6_(other.begin6_),
        end6_(other.end6_),
        current6_(other.current6_),
        begin7_(other.begin7_),
        end7_(other.end7_),
        current7_(other.current7_),
        begin8_(other.begin8_),
        end8_(other.end8_),
        current8_(other.current8_),
        begin9_(other.begin9_),
        end9_(other.end9_),
        current9_(other.current9_),
        begin10_(other.begin10_),
        end10_(other.end10_),
        current10_(other.current10_) {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType(*current1_, *current2_, *current3_,
                                   *current4_, *current5_, *current6_, *current7_, *current8_,
                                   *current9_, *current10_);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
        current1_ == end1_ ||
        current2_ == end2_ ||
        current3_ == end3_ ||
        current4_ == end4_ ||
        current5_ == end5_ ||
        current6_ == end6_ ||
        current7_ == end7_ ||
        current8_ == end8_ ||
        current9_ == end9_ ||
        current10_ == end10_;
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
    const typename ParamGenerator<T1>::iterator begin1_;
    const typename ParamGenerator<T1>::iterator end1_;
    typename ParamGenerator<T1>::iterator current1_;
    const typename ParamGenerator<T2>::iterator begin2_;
    const typename ParamGenerator<T2>::iterator end2_;
    typename ParamGenerator<T2>::iterator current2_;
    const typename ParamGenerator<T3>::iterator begin3_;
    const typename ParamGenerator<T3>::iterator end3_;
    typename ParamGenerator<T3>::iterator current3_;
    const typename ParamGenerator<T4>::iterator begin4_;
    const typename ParamGenerator<T4>::iterator end4_;
    typename ParamGenerator<T4>::iterator current4_;
    const typename ParamGenerator<T5>::iterator begin5_;
    const typename ParamGenerator<T5>::iterator end5_;
    typename ParamGenerator<T5>::iterator current5_;
    const typename ParamGenerator<T6>::iterator begin6_;
    const typename ParamGenerator<T6>::iterator end6_;
    typename ParamGenerator<T6>::iterator current6_;
    const typename ParamGenerator<T7>::iterator begin7_;
    const typename ParamGenerator<T7>::iterator end7_;
    typename ParamGenerator<T7>::iterator current7_;
    const typename ParamGenerator<T8>::iterator begin8_;
    const typename ParamGenerator<T8>::iterator end8_;
    typename ParamGenerator<T8>::iterator current8_;
    const typename ParamGenerator<T9>::iterator begin9_;
    const typename ParamGenerator<T9>::iterator end9_;
    typename ParamGenerator<T9>::iterator current9_;
    const typename ParamGenerator<T10>::iterator begin10_;
    const typename ParamGenerator<T10>::iterator end10_;
    typename ParamGenerator<T10>::iterator current10_;
    ParamType current_value_;
  };  // class CartesianProductGenerator10::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator10& other);

  const ParamGenerator<T1> g1_;
  const ParamGenerator<T2> g2_;
  const ParamGenerator<T3> g3_;
  const ParamGenerator<T4> g4_;
  const ParamGenerator<T5> g5_;
  const ParamGenerator<T6> g6_;
  const ParamGenerator<T7> g7_;
  const ParamGenerator<T8> g8_;
  const ParamGenerator<T9> g9_;
  const ParamGenerator<T10> g10_;
};  // class CartesianProductGenerator10


// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Helper classes providing Combine() with polymorphic features. They allow
// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
// convertible to U.
//
template <class Generator1, class Generator2>
class CartesianProductHolder2 {
 public:
  CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
    : g1_(g1), g2_(g2) {}
  template <typename T1, typename T2>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2> >(
             new CartesianProductGenerator2<T1, T2>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder2& other);

  const Generator1 g1_;
  const Generator2 g2_;
};  // class CartesianProductHolder2

template <class Generator1, class Generator2, class Generator3>
class CartesianProductHolder3 {
 public:
  CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
                          const Generator3& g3)
    : g1_(g1), g2_(g2), g3_(g3) {}
  template <typename T1, typename T2, typename T3>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >(
             new CartesianProductGenerator3<T1, T2, T3>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder3& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
};  // class CartesianProductHolder3

template <class Generator1, class Generator2, class Generator3,
          class Generator4>
class CartesianProductHolder4 {
 public:
  CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
                          const Generator3& g3, const Generator4& g4)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
  template <typename T1, typename T2, typename T3, typename T4>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >(
             new CartesianProductGenerator4<T1, T2, T3, T4>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_),
               static_cast<ParamGenerator<T4> >(g4_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder4& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
};  // class CartesianProductHolder4

template <class Generator1, class Generator2, class Generator3,
          class Generator4, class Generator5>
class CartesianProductHolder5 {
 public:
  CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
                          const Generator3& g3, const Generator4& g4, const Generator5& g5)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >(
             new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_),
               static_cast<ParamGenerator<T4> >(g4_),
               static_cast<ParamGenerator<T5> >(g5_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder5& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
};  // class CartesianProductHolder5

template <class Generator1, class Generator2, class Generator3,
          class Generator4, class Generator5, class Generator6>
class CartesianProductHolder6 {
 public:
  CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
                          const Generator3& g3, const Generator4& g4, const Generator5& g5,
                          const Generator6& g6)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
            typename T6>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >(
             new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_),
               static_cast<ParamGenerator<T4> >(g4_),
               static_cast<ParamGenerator<T5> >(g5_),
               static_cast<ParamGenerator<T6> >(g6_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder6& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
};  // class CartesianProductHolder6

template <class Generator1, class Generator2, class Generator3,
          class Generator4, class Generator5, class Generator6, class Generator7>
class CartesianProductHolder7 {
 public:
  CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
                          const Generator3& g3, const Generator4& g4, const Generator5& g5,
                          const Generator6& g6, const Generator7& g7)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
            typename T6, typename T7>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
  T7> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> >(
             new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_),
               static_cast<ParamGenerator<T4> >(g4_),
               static_cast<ParamGenerator<T5> >(g5_),
               static_cast<ParamGenerator<T6> >(g6_),
               static_cast<ParamGenerator<T7> >(g7_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder7& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
};  // class CartesianProductHolder7

template <class Generator1, class Generator2, class Generator3,
          class Generator4, class Generator5, class Generator6, class Generator7,
          class Generator8>
class CartesianProductHolder8 {
 public:
  CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
                          const Generator3& g3, const Generator4& g4, const Generator5& g5,
                          const Generator6& g6, const Generator7& g7, const Generator8& g8)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
      g8_(g8) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
            typename T6, typename T7, typename T8>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7,
  T8> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
             new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_),
               static_cast<ParamGenerator<T4> >(g4_),
               static_cast<ParamGenerator<T5> >(g5_),
               static_cast<ParamGenerator<T6> >(g6_),
               static_cast<ParamGenerator<T7> >(g7_),
               static_cast<ParamGenerator<T8> >(g8_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder8& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
};  // class CartesianProductHolder8

template <class Generator1, class Generator2, class Generator3,
          class Generator4, class Generator5, class Generator6, class Generator7,
          class Generator8, class Generator9>
class CartesianProductHolder9 {
 public:
  CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
                          const Generator3& g3, const Generator4& g4, const Generator5& g5,
                          const Generator6& g6, const Generator7& g7, const Generator8& g8,
                          const Generator9& g9)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
      g9_(g9) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
            typename T6, typename T7, typename T8, typename T9>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
  T9> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
           T9> >(
             new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_),
               static_cast<ParamGenerator<T4> >(g4_),
               static_cast<ParamGenerator<T5> >(g5_),
               static_cast<ParamGenerator<T6> >(g6_),
               static_cast<ParamGenerator<T7> >(g7_),
               static_cast<ParamGenerator<T8> >(g8_),
               static_cast<ParamGenerator<T9> >(g9_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder9& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
  const Generator9 g9_;
};  // class CartesianProductHolder9

template <class Generator1, class Generator2, class Generator3,
          class Generator4, class Generator5, class Generator6, class Generator7,
          class Generator8, class Generator9, class Generator10>
class CartesianProductHolder10 {
 public:
  CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
                           const Generator3& g3, const Generator4& g4, const Generator5& g5,
                           const Generator6& g6, const Generator7& g7, const Generator8& g8,
                           const Generator9& g9, const Generator10& g10)
    : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
      g9_(g9), g10_(g10) {}
  template <typename T1, typename T2, typename T3, typename T4, typename T5,
            typename T6, typename T7, typename T8, typename T9, typename T10>
  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
  T9, T10> >() const {
    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
           T9, T10> >(
             new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
             T10>(
               static_cast<ParamGenerator<T1> >(g1_),
               static_cast<ParamGenerator<T2> >(g2_),
               static_cast<ParamGenerator<T3> >(g3_),
               static_cast<ParamGenerator<T4> >(g4_),
               static_cast<ParamGenerator<T5> >(g5_),
               static_cast<ParamGenerator<T6> >(g6_),
               static_cast<ParamGenerator<T7> >(g7_),
               static_cast<ParamGenerator<T8> >(g8_),
               static_cast<ParamGenerator<T9> >(g9_),
               static_cast<ParamGenerator<T10> >(g10_)));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder10& other);

  const Generator1 g1_;
  const Generator2 g2_;
  const Generator3 g3_;
  const Generator4 g4_;
  const Generator5 g5_;
  const Generator6 g6_;
  const Generator7 g7_;
  const Generator8 g8_;
  const Generator9 g9_;
  const Generator10 g10_;
};  // class CartesianProductHolder10

# endif  // GTEST_HAS_COMBINE

}  // namespace internal
}  // namespace testing

#endif  //  GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-param-util-generated.h.pump
================================================
$$ -*- mode: c++; -*-
$var n = 50  $$ Maximum length of Values arguments we want to support.
$var maxtuple = 10  $$ Maximum number of Combine arguments we want to support.
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)

// Type and function utilities for implementing parameterized tests.
// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently Google Test supports at most $n arguments in Values,
// and at most $maxtuple arguments in Combine. Please contact
// googletestframework@googlegroups.com if you need more.
// Please note that the number of arguments to Combine is limited
// by the maximum arity of the implementation of tr1::tuple which is
// currently set at $maxtuple.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
#include "gtest/internal/gtest-param-util.h"
#include "gtest/internal/gtest-port.h"

#if GTEST_HAS_PARAM_TEST

namespace testing {

// Forward declarations of ValuesIn(), which is implemented in
// include/gtest/gtest-param-test.h.
template <typename ForwardIterator>
internal::ParamGenerator<
  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
ValuesIn(ForwardIterator begin, ForwardIterator end);

template <typename T, size_t N>
internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);

template <class Container>
internal::ParamGenerator<typename Container::value_type> ValuesIn(
    const Container& container);

namespace internal {

// Used in the Values() function to provide polymorphic capabilities.
template <typename T1>
class ValueArray1 {
 public:
  explicit ValueArray1(T1 v1) : v1_(v1) {}

  template <typename T>
  operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray1& other);

  const T1 v1_;
};

$range i 2..n
$for i [[
$range j 1..i

template <$for j, [[typename T$j]]>
class ValueArray$i {
 public:
  ValueArray$i($for j, [[T$j v$j]]) : $for j, [[v$(j)_(v$j)]] {}

  template <typename T>
  operator ParamGenerator<T>() const {
    const T array[] = {$for j, [[static_cast<T>(v$(j)_)]]};
    return ValuesIn(array);
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const ValueArray$i& other);

$for j [[

  const T$j v$(j)_;
]]

};

]]

# if GTEST_HAS_COMBINE
// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Generates values from the Cartesian product of values produced
// by the argument generators.
//
$range i 2..maxtuple
$for i [[
$range j 1..i
$range k 2..i

template <$for j, [[typename T$j]]>
class CartesianProductGenerator$i
    : public ParamGeneratorInterface< ::std::tr1::tuple<$for j, [[T$j]]> > {
 public:
  typedef ::std::tr1::tuple<$for j, [[T$j]]> ParamType;

  CartesianProductGenerator$i($for j, [[const ParamGenerator<T$j>& g$j]])
      : $for j, [[g$(j)_(g$j)]] {}
  virtual ~CartesianProductGenerator$i() {}

  virtual ParamIteratorInterface<ParamType>* Begin() const {
    return new Iterator(this, $for j, [[g$(j)_, g$(j)_.begin()]]);
  }
  virtual ParamIteratorInterface<ParamType>* End() const {
    return new Iterator(this, $for j, [[g$(j)_, g$(j)_.end()]]);
  }

 private:
  class Iterator : public ParamIteratorInterface<ParamType> {
   public:
    Iterator(const ParamGeneratorInterface<ParamType>* base, $for j, [[

      const ParamGenerator<T$j>& g$j,
      const typename ParamGenerator<T$j>::iterator& current$(j)]])
        : base_(base),
$for j, [[

          begin$(j)_(g$j.begin()), end$(j)_(g$j.end()), current$(j)_(current$j)
]]    {
      ComputeCurrentValue();
    }
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
      return base_;
    }
    // Advance should not be called on beyond-of-range iterators
    // so no component iterators must be beyond end of range, either.
    virtual void Advance() {
      assert(!AtEnd());
      ++current$(i)_;

$for k [[
      if (current$(i+2-k)_ == end$(i+2-k)_) {
        current$(i+2-k)_ = begin$(i+2-k)_;
        ++current$(i+2-k-1)_;
      }

]]
      ComputeCurrentValue();
    }
    virtual ParamIteratorInterface<ParamType>* Clone() const {
      return new Iterator(*this);
    }
    virtual const ParamType* Current() const { return &current_value_; }
    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const Iterator* typed_other =
          CheckedDowncastToActualType<const Iterator>(&other);
      // We must report iterators equal if they both point beyond their
      // respective ranges. That can happen in a variety of fashions,
      // so we have to consult AtEnd().
      return (AtEnd() && typed_other->AtEnd()) ||
         ($for j  && [[

          current$(j)_ == typed_other->current$(j)_
]]);
    }

   private:
    Iterator(const Iterator& other)
        : base_(other.base_), $for j, [[

        begin$(j)_(other.begin$(j)_),
        end$(j)_(other.end$(j)_),
        current$(j)_(other.current$(j)_)
]] {
      ComputeCurrentValue();
    }

    void ComputeCurrentValue() {
      if (!AtEnd())
        current_value_ = ParamType($for j, [[*current$(j)_]]);
    }
    bool AtEnd() const {
      // We must report iterator past the end of the range when either of the
      // component iterators has reached the end of its range.
      return
$for j  || [[

          current$(j)_ == end$(j)_
]];
    }

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<ParamType>* const base_;
    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
    // current[i]_ is the actual traversing iterator.
$for j [[

    const typename ParamGenerator<T$j>::iterator begin$(j)_;
    const typename ParamGenerator<T$j>::iterator end$(j)_;
    typename ParamGenerator<T$j>::iterator current$(j)_;
]]

    ParamType current_value_;
  };  // class CartesianProductGenerator$i::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductGenerator$i& other);


$for j [[
  const ParamGenerator<T$j> g$(j)_;

]]
};  // class CartesianProductGenerator$i


]]

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Helper classes providing Combine() with polymorphic features. They allow
// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
// convertible to U.
//
$range i 2..maxtuple
$for i [[
$range j 1..i

template <$for j, [[class Generator$j]]>
class CartesianProductHolder$i {
 public:
CartesianProductHolder$i($for j, [[const Generator$j& g$j]])
      : $for j, [[g$(j)_(g$j)]] {}
  template <$for j, [[typename T$j]]>
  operator ParamGenerator< ::std::tr1::tuple<$for j, [[T$j]]> >() const {
    return ParamGenerator< ::std::tr1::tuple<$for j, [[T$j]]> >(
        new CartesianProductGenerator$i<$for j, [[T$j]]>(
$for j,[[

        static_cast<ParamGenerator<T$j> >(g$(j)_)
]]));
  }

 private:
  // No implementation - assignment is unsupported.
  void operator=(const CartesianProductHolder$i& other);


$for j [[
  const Generator$j g$(j)_;

]]
};  // class CartesianProductHolder$i

]]

# endif  // GTEST_HAS_COMBINE

}  // namespace internal
}  // namespace testing

#endif  //  GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-param-util.h
================================================
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: vladl@google.com (Vlad Losev)

// Type and function utilities for implementing parameterized tests.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_

#include <iterator>
#include <utility>
#include <vector>

// scripts/fuse_gtest.py depends on gtest's own header being #included
// *unconditionally*.  Therefore these #includes cannot be moved
// inside #if GTEST_HAS_PARAM_TEST.
#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-linked_ptr.h"
#include "gtest/internal/gtest-port.h"
#include "gtest/gtest-printers.h"

#if GTEST_HAS_PARAM_TEST

namespace testing {
namespace internal {

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Outputs a message explaining invalid registration of different
// fixture class for the same test case. This may happen when
// TEST_P macro is used to define two tests with the same name
// but in different namespaces.
GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
    const char* file, int line);

template <typename> class ParamGeneratorInterface;
template <typename> class ParamGenerator;

// Interface for iterating over elements provided by an implementation
// of ParamGeneratorInterface<T>.
template <typename T>
class ParamIteratorInterface {
 public:
  virtual ~ParamIteratorInterface() {}
  // A pointer to the base generator instance.
  // Used only for the purposes of iterator comparison
  // to make sure that two iterators belong to the same generator.
  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
  // Advances iterator to point to the next element
  // provided by the generator. The caller is responsible
  // for not calling Advance() on an iterator equal to
  // BaseGenerator()->End().
  virtual void Advance() = 0;
  // Clones the iterator object. Used for implementing copy semantics
  // of ParamIterator<T>.
  virtual ParamIteratorInterface* Clone() const = 0;
  // Dereferences the current iterator and provides (read-only) access
  // to the pointed value. It is the caller's responsibility not to call
  // Current() on an iterator equal to BaseGenerator()->End().
  // Used for implementing ParamGenerator<T>::operator*().
  virtual const T* Current() const = 0;
  // Determines whether the given iterator and other point to the same
  // element in the sequence generated by the generator.
  // Used for implementing ParamGenerator<T>::operator==().
  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
};

// Class iterating over elements provided by an implementation of
// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
// and implements the const forward iterator concept.
template <typename T>
class ParamIterator {
 public:
  typedef T value_type;
  typedef const T& reference;
  typedef ptrdiff_t difference_type;

  // ParamIterator assumes ownership of the impl_ pointer.
  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
  ParamIterator& operator=(const ParamIterator& other) {
    if (this != &other) {
      impl_.reset(other.impl_->Clone());
    }

    return *this;
  }

  const T& operator*() const {
    return *impl_->Current();
  }
  const T* operator->() const {
    return impl_->Current();
  }
  // Prefix version of operator++.
  ParamIterator& operator++() {
    impl_->Advance();
    return *this;
  }
  // Postfix version of operator++.
  ParamIterator operator++(int /*unused*/) {
    ParamIteratorInterface<T>* clone = impl_->Clone();
    impl_->Advance();
    return ParamIterator(clone);
  }
  bool operator==(const ParamIterator& other) const {
    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
  }
  bool operator!=(const ParamIterator& other) const {
    return !(*this == other);
  }

 private:
  friend class ParamGenerator<T>;
  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
  scoped_ptr<ParamIteratorInterface<T> > impl_;
};

// ParamGeneratorInterface<T> is the binary interface to access generators
// defined in other translation units.
template <typename T>
class ParamGeneratorInterface {
 public:
  typedef T ParamType;

  virtual ~ParamGeneratorInterface() {}

  // Generator interface definition
  virtual ParamIteratorInterface<T>* Begin() const = 0;
  virtual ParamIteratorInterface<T>* End() const = 0;
};

// Wraps ParamGeneratorInterface<T> and provides general generator syntax
// compatible with the STL Container concept.
// This class implements copy initialization semantics and the contained
// ParamGeneratorInterface<T> instance is shared among all copies
// of the original object. This is possible because that instance is immutable.
template<typename T>
class ParamGenerator {
 public:
  typedef ParamIterator<T> iterator;

  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}

  ParamGenerator& operator=(const ParamGenerator& other) {
    impl_ = other.impl_;
    return *this;
  }

  iterator begin() const {
    return iterator(impl_->Begin());
  }
  iterator end() const {
    return iterator(impl_->End());
  }

 private:
  linked_ptr<const ParamGeneratorInterface<T> > impl_;
};

// Generates values from a range of two comparable values. Can be used to
// generate sequences of user-defined types that implement operator+() and
// operator<().
// This class is used in the Range() function.
template <typename T, typename IncrementT>
class RangeGenerator : public ParamGeneratorInterface<T> {
 public:
  RangeGenerator(T begin, T end, IncrementT step)
    : begin_(begin), end_(end),
      step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
  virtual ~RangeGenerator() {}

  virtual ParamIteratorInterface<T>* Begin() const {
    return new Iterator(this, begin_, 0, step_);
  }
  virtual ParamIteratorInterface<T>* End() const {
    return new Iterator(this, end_, end_index_, step_);
  }

 private:
  class Iterator : public ParamIteratorInterface<T> {
   public:
    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
             IncrementT step)
      : base_(base), value_(value), index_(index), step_(step) {}
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
      return base_;
    }
    virtual void Advance() {
      value_ = value_ + step_;
      index_++;
    }
    virtual ParamIteratorInterface<T>* Clone() const {
      return new Iterator(*this);
    }
    virtual const T* Current() const {
      return &value_;
    }
    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      const int other_index =
        CheckedDowncastToActualType<const Iterator>(&other)->index_;
      return index_ == other_index;
    }

   private:
    Iterator(const Iterator& other)
      : ParamIteratorInterface<T>(),
        base_(other.base_), value_(other.value_), index_(other.index_),
        step_(other.step_) {}

    // No implementation - assignment is unsupported.
    void operator=(const Iterator& other);

    const ParamGeneratorInterface<T>* const base_;
    T value_;
    int index_;
    const IncrementT step_;
  };  // class RangeGenerator::Iterator

  static int CalculateEndIndex(const T& begin,
                               const T& end,
                               const IncrementT& step) {
    int end_index = 0;

    for (T i = begin; i < end; i = i + step) {
      end_index++;
    }

    return end_index;
  }

  // No implementation - assignment is unsupported.
  void operator=(const RangeGenerator& other);

  const T begin_;
  const T end_;
  const IncrementT step_;
  // The index for the end() iterator. All the elements in the generated
  // sequence are indexed (0-based) to aid iterator comparison.
  const int end_index_;
};  // class RangeGenerator


// Generates values from a pair of STL-style iterators. Used in the
// ValuesIn() function. The elements are copied from the source range
// since the source can be located on the stack, and the generator
// is likely to persist beyond that stack frame.
template <typename T>
class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
 public:
  template <typename ForwardIterator>
  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
    : container_(begin, end) {}
  virtual ~ValuesInIteratorRangeGenerator() {}

  virtual ParamIteratorInterface<T>* Begin() const {
    return new Iterator(this, container_.begin());
  }
  virtual ParamIteratorInterface<T>* End() const {
    return new Iterator(this, container_.end());
  }

 private:
  typedef typename ::std::vector<T> ContainerType;

  class Iterator : public ParamIteratorInterface<T> {
   public:
    Iterator(const ParamGeneratorInterface<T>* base,
             typename ContainerType::const_iterator iterator)
      : base_(base), iterator_(iterator) {}
    virtual ~Iterator() {}

    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
      return base_;
    }
    virtual void Advance() {
      ++iterator_;
      value_.reset();
    }
    virtual ParamIteratorInterface<T>* Clone() const {
      return new Iterator(*this);
    }
    // We need to use cached value referenced by iterator_ because *iterator_
    // can return a temporary object (and of type other then T), so just
    // having "return &*iterator_;" doesn't work.
    // value_ is updated here and not in Advance() because Advance()
    // can advance iterator_ beyond the end of the range, and we cannot
    // detect that fact. The client code, on the other hand, is
    // responsible for not calling Current() on an out-of-range iterator.
    virtual const T* Current() const {
      if (value_.get() == NULL) {
        value_.reset(new T(*iterator_));
      }

      return value_.get();
    }
    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
      // Having the same base generator guarantees that the other
      // iterator is of the same type and we can downcast.
      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
          << "The program attempted to compare iterators "
          << "from different generators." << std::endl;
      return iterator_ ==
             CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
    }

   private:
    Iterator(const Iterator& other)
    // The explicit constructor call suppresses a false warning
    // emitted by gcc when supplied with the -Wextra option.
      : ParamIteratorInterface<T>(),
        base_(other.base_),
        iterator_(other.iterator_) {}

    const ParamGeneratorInterface<T>* const base_;
    typename ContainerType::const_iterator iterator_;
    // A cached value of *iterator_. We keep it here to allow access by
    // pointer in the wrapping iterator's operator->().
    // value_ needs to be mutable to be accessed in Current().
    // Use of scoped_ptr helps manage cached value's lifetime,
    // which is bound by the lifespan of the iterator itself.
    mutable scoped_ptr<const T> value_;
  };  // class ValuesInIteratorRangeGenerator::Iterator

  // No implementation - assignment is unsupported.
  void operator=(const ValuesInIteratorRangeGenerator& other);

  const ContainerType container_;
};  // class ValuesInIteratorRangeGenerator

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Stores a parameter value and later creates tests parameterized with that
// value.
template <class TestClass>
class ParameterizedTestFactory : public TestFactoryBase {
 public:
  typedef typename TestClass::ParamType ParamType;
  explicit ParameterizedTestFactory(ParamType parameter) :
    parameter_(parameter) {}
  virtual Test* CreateTest() {
    TestClass::SetParam(&parameter_);
    return new TestClass();
  }

 private:
  const ParamType parameter_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactoryBase is a base class for meta-factories that create
// test factories for passing into MakeAndRegisterTestInfo function.
template <class ParamType>
class TestMetaFactoryBase {
 public:
  virtual ~TestMetaFactoryBase() {}

  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// TestMetaFactory creates test factories for passing into
// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
// ownership of test factory pointer, same factory object cannot be passed
// into that method twice. But ParameterizedTestCaseInfo is going to call
// it for each Test/Parameter value combination. Thus it needs meta factory
// creator class.
template <class TestCase>
class TestMetaFactory
  : public TestMetaFactoryBase<typename TestCase::ParamType> {
 public:
  typedef typename TestCase::ParamType ParamType;

  TestMetaFactory() {}

  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
    return new ParameterizedTestFactory<TestCase>(parameter);
  }

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfoBase is a generic interface
// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
// accumulates test information provided by TEST_P macro invocations
// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
// and uses that information to register all resulting test instances
// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
// a collection of pointers to the ParameterizedTestCaseInfo objects
// and calls RegisterTests() on each of them when asked.
class ParameterizedTestCaseInfoBase {
 public:
  virtual ~ParameterizedTestCaseInfoBase() {}

  // Base part of test case name for display purposes.
  virtual const string& GetTestCaseName() const = 0;
  // Test case id to verify identity.
  virtual TypeId GetTestCaseTypeId() const = 0;
  // UnitTest class invokes this method to register tests in this
  // test case right before running them in RUN_ALL_TESTS macro.
  // This method should not be called more then once on any single
  // instance of a ParameterizedTestCaseInfoBase derived class.
  virtual void RegisterTests() = 0;

 protected:
  ParameterizedTestCaseInfoBase() {}

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
};

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
// macro invocations for a particular test case and generators
// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
// test case. It registers tests with all values generated by all
// generators when asked.
template <class TestCase>
class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
 public:
  // ParamType and GeneratorCreationFunc are private types but are required
  // for declarations of public methods AddTestPattern() and
  // AddTestCaseInstantiation().
  typedef typename TestCase::ParamType ParamType;
  // A function that returns an instance of appropriate generator type.
  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();

  explicit ParameterizedTestCaseInfo(const char* name)
    : test_case_name_(name) {}

  // Test case base name for display purposes.
  virtual const string& GetTestCaseName() const {
    return test_case_name_;
  }
  // Test case id to verify identity.
  virtual TypeId GetTestCaseTypeId() const {
    return GetTypeId<TestCase>();
  }
  // TEST_P macro uses AddTestPattern() to record information
  // about a single test in a LocalTestInfo structure.
  // test_case_name is the base name of the test case (without invocation
  // prefix). test_base_name is the name of an individual test without
  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
  // test case base name and DoBar is test base name.
  void AddTestPattern(const char* test_case_name,
                      const char* test_base_name,
                      TestMetaFactoryBase<ParamType>* meta_factory) {
    tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
                                          test_base_name,
                                          meta_factory)));
  }
  // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
  // about a generator.
  int AddTestCaseInstantiation(const string& instantiation_name,
                               GeneratorCreationFunc* func,
                               const char* /* file */,
                               int /* line */) {
    instantiations_.push_back(::std::make_pair(instantiation_name, func));
    return 0;  // Return value used only to run this method in namespace scope.
  }
  // UnitTest class invokes this method to register tests in this test case
  // test cases right before running tests in RUN_ALL_TESTS macro.
  // This method should not be called more then once on any single
  // instance of a ParameterizedTestCaseInfoBase derived class.
  // UnitTest has a guard to prevent from calling this method more then once.
  virtual void RegisterTests() {
    for (typename TestInfoContainer::iterator test_it = tests_.begin();
         test_it != tests_.end(); ++test_it) {
      linked_ptr<TestInfo> test_info = *test_it;

      for (typename InstantiationContainer::iterator gen_it =
             instantiations_.begin(); gen_it != instantiations_.end();
           ++gen_it) {
        const string& instantiation_name = gen_it->first;
        ParamGenerator<ParamType> generator((*gen_it->second)());

        string test_case_name;

        if ( !instantiation_name.empty() ) {
          test_case_name = instantiation_name + "/";
        }

        test_case_name += test_info->test_case_base_name;

        int i = 0;

        for (typename ParamGenerator<ParamType>::iterator param_it =
               generator.begin();
             param_it != generator.end(); ++param_it, ++i) {
          Message test_name_stream;
          test_name_stream << test_info->test_base_name << "/" << i;
          MakeAndRegisterTestInfo(
            test_case_name.c_str(),
            test_name_stream.GetString().c_str(),
            NULL,  // No type parameter.
            PrintToString(*param_it).c_str(),
            GetTestCaseTypeId(),
            TestCase::SetUpTestCase,
            TestCase::TearDownTestCase,
            test_info->test_meta_factory->CreateTestFactory(*param_it));
        }  // for param_it
      }  // for gen_it
    }  // for test_it
  }  // RegisterTests

 private:
  // LocalTestInfo structure keeps information about a single test registered
  // with TEST_P macro.
  struct TestInfo {
    TestInfo(const char* a_test_case_base_name,
             const char* a_test_base_name,
             TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
      test_case_base_name(a_test_case_base_name),
      test_base_name(a_test_base_name),
      test_meta_factory(a_test_meta_factory) {}

    const string test_case_base_name;
    const string test_base_name;
    const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
  };
  typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
  // Keeps pairs of <Instantiation name, Sequence generator creation function>
  // received from INSTANTIATE_TEST_CASE_P macros.
  typedef ::std::vector<std::pair<string, GeneratorCreationFunc*> >
  InstantiationContainer;

  const string test_case_name_;
  TestInfoContainer tests_;
  InstantiationContainer instantiations_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
};  // class ParameterizedTestCaseInfo

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
// macros use it to locate their corresponding ParameterizedTestCaseInfo
// descriptors.
class ParameterizedTestCaseRegistry {
 public:
  ParameterizedTestCaseRegistry() {}
  ~ParameterizedTestCaseRegistry() {
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      delete *it;
    }
  }

  // Looks up or creates and returns a structure containing information about
  // tests and instantiations of a particular test case.
  template <class TestCase>
  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
    const char* test_case_name,
    const char* file,
    int line) {
    ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;

    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      if ((*it)->GetTestCaseName() == test_case_name) {
        if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
          // Complain about incorrect usage of Google Test facilities
          // and terminate the program since we cannot guaranty correct
          // test case setup and tear-down in this case.
          ReportInvalidTestCaseType(test_case_name,  file, line);
          posix::Abort();
        }
        else {
          // At this point we are sure that the object we found is of the same
          // type we are looking for, so we downcast it to that type
          // without further checks.
          typed_test_info = CheckedDowncastToActualType <
                            ParameterizedTestCaseInfo<TestCase> > (*it);
        }

        break;
      }
    }

    if (typed_test_info == NULL) {
      typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name);
      test_case_infos_.push_back(typed_test_info);
    }

    return typed_test_info;
  }
  void RegisterTests() {
    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
         it != test_case_infos_.end(); ++it) {
      (*it)->RegisterTests();
    }
  }

 private:
  typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;

  TestCaseInfoContainer test_case_infos_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
};

}  // namespace internal
}  // namespace testing

#endif  //  GTEST_HAS_PARAM_TEST

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-port.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan)
//
// Low-level types and utilities for porting Google Test to various
// platforms.  They are subject to change without notice.  DO NOT USE
// THEM IN USER CODE.
//
// This file is fundamental to Google Test.  All other Google Test source
// files are expected to #include this.  Therefore, it cannot #include
// any other Google Test header.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_

// The user can define the following macros in the build script to
// control Google Test's behavior.  If the user doesn't define a macro
// in this list, Google Test will define it.
//
//   GTEST_HAS_CLONE          - Define it to 1/0 to indicate that clone(2)
//                              is/isn't available.
//   GTEST_HAS_EXCEPTIONS     - Define it to 1/0 to indicate that exceptions
//                              are enabled.
//   GTEST_HAS_GLOBAL_STRING  - Define it to 1/0 to indicate that ::string
//                              is/isn't available (some systems define
//                              ::string, which is different to std::string).
//   GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string
//                              is/isn't available (some systems define
//                              ::wstring, which is different to std::wstring).
//   GTEST_HAS_POSIX_RE       - Define it to 1/0 to indicate that POSIX regular
//                              expressions are/aren't available.
//   GTEST_HAS_PTHREAD        - Define it to 1/0 to indicate that <pthread.h>
//                              is/isn't available.
//   GTEST_HAS_RTTI           - Define it to 1/0 to indicate that RTTI is/isn't
//                              enabled.
//   GTEST_HAS_STD_WSTRING    - Define it to 1/0 to indicate that
//                              std::wstring does/doesn't work (Google Test can
//                              be used where std::wstring is unavailable).
//   GTEST_HAS_TR1_TUPLE      - Define it to 1/0 to indicate tr1::tuple
//                              is/isn't available.
//   GTEST_HAS_SEH            - Define it to 1/0 to indicate whether the
//                              compiler supports Microsoft's "Structured
//                              Exception Handling".
//   GTEST_HAS_STREAM_REDIRECTION
//                            - Define it to 1/0 to indicate whether the
//                              platform supports I/O stream redirection using
//                              dup() and dup2().
//   GTEST_USE_OWN_TR1_TUPLE  - Define it to 1/0 to indicate whether Google
//                              Test's own tr1 tuple implementation should be
//                              used.  Unused when the user sets
//                              GTEST_HAS_TR1_TUPLE to 0.
//   GTEST_LANG_CXX11         - Define it to 1/0 to indicate that Google Test
//                              is building in C++11/C++98 mode.
//   GTEST_LINKED_AS_SHARED_LIBRARY
//                            - Define to 1 when compiling tests that use
//                              Google Test as a shared library (known as
//                              DLL on Windows).
//   GTEST_CREATE_SHARED_LIBRARY
//                            - Define to 1 when compiling Google Test itself
//                              as a shared library.

// This header defines the following utilities:
//
// Macros indicating the current platform (defined to 1 if compiled on
// the given platform; otherwise undefined):
//   GTEST_OS_AIX      - IBM AIX
//   GTEST_OS_CYGWIN   - Cygwin
//   GTEST_OS_HPUX     - HP-UX
//   GTEST_OS_LINUX    - Linux
//     GTEST_OS_LINUX_ANDROID - Google Android
//   GTEST_OS_MAC      - Mac OS X
//     GTEST_OS_IOS    - iOS
//       GTEST_OS_IOS_SIMULATOR - iOS simulator
//   GTEST_OS_NACL     - Google Native Client (NaCl)
//   GTEST_OS_OPENBSD  - OpenBSD
//   GTEST_OS_QNX      - QNX
//   GTEST_OS_SOLARIS  - Sun Solaris
//   GTEST_OS_SYMBIAN  - Symbian
//   GTEST_OS_WINDOWS  - Windows (Desktop, MinGW, or Mobile)
//     GTEST_OS_WINDOWS_DESKTOP  - Windows Desktop
//     GTEST_OS_WINDOWS_MINGW    - MinGW
//     GTEST_OS_WINDOWS_MOBILE   - Windows Mobile
//   GTEST_OS_ZOS      - z/OS
//
// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
// most stable support.  Since core members of the Google Test project
// don't have access to other platforms, support for them may be less
// stable.  If you notice any problems on your platform, please notify
// googletestframework@googlegroups.com (patches for fixing them are
// even more welcome!).
//
// Note that it is possible that none of the GTEST_OS_* macros are defined.
//
// Macros indicating available Google Test features (defined to 1 if
// the corresponding feature is supported; otherwise undefined):
//   GTEST_HAS_COMBINE      - the Combine() function (for value-parameterized
//                            tests)
//   GTEST_HAS_DEATH_TEST   - death tests
//   GTEST_HAS_PARAM_TEST   - value-parameterized tests
//   GTEST_HAS_TYPED_TEST   - typed tests
//   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
//   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used. Do not confuse with
//                            GTEST_HAS_POSIX_RE (see above) which users can
//                            define themselves.
//   GTEST_USES_SIMPLE_RE   - our own simple regex is used;
//                            the above two are mutually exclusive.
//   GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().
//
// Macros for basic C++ coding:
//   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
//   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
//                              variable don't have to be used.
//   GTEST_DISALLOW_ASSIGN_   - disables operator=.
//   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
//   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
//
// Synchronization:
//   Mutex, MutexLock, ThreadLocal, GetThreadCount()
//                  - synchronization primitives.
//   GTEST_IS_THREADSAFE - defined to 1 to indicate that the above
//                         synchronization primitives have real implementations
//                         and Google Test is thread-safe; or 0 otherwise.
//
// Template meta programming:
//   is_pointer     - as in TR1; needed on Symbian and IBM XL C/C++ only.
//   IteratorTraits - partial implementation of std::iterator_traits, which
//                    is not available in libCstd when compiled with Sun C++.
//
// Smart pointers:
//   scoped_ptr     - as in TR2.
//
// Regular expressions:
//   RE             - a simple regular expression class using the POSIX
//                    Extended Regular Expression syntax on UNIX-like
//                    platforms, or a reduced regular exception syntax on
//                    other platforms, including Windows.
//
// Logging:
//   GTEST_LOG_()   - logs messages at the specified severity level.
//   LogToStderr()  - directs all log messages to stderr.
//   FlushInfoLog() - flushes informational log messages.
//
// Stdout and stderr capturing:
//   CaptureStdout()     - starts capturing stdout.
//   GetCapturedStdout() - stops capturing stdout and returns the captured
//                         string.
//   CaptureStderr()     - starts capturing stderr.
//   GetCapturedStderr() - stops capturing stderr and returns the captured
//                         string.
//
// Integer types:
//   TypeWithSize   - maps an integer to a int type.
//   Int32, UInt32, Int64, UInt64, TimeInMillis
//                  - integers of known sizes.
//   BiggestInt     - the biggest signed integer type.
//
// Command-line utilities:
//   GTEST_FLAG()       - references a flag.
//   GTEST_DECLARE_*()  - declares a flag.
//   GTEST_DEFINE_*()   - defines a flag.
//   GetInjectableArgvs() - returns the command line as a vector of strings.
//
// Environment variable utilities:
//   GetEnv()             - gets the value of an environment variable.
//   BoolFromGTestEnv()   - parses a bool environment variable.
//   Int32FromGTestEnv()  - parses an Int32 environment variable.
//   StringFromGTestEnv() - parses a string environment variable.

#include <ctype.h>   // for isspace, etc
#include <stddef.h>  // for ptrdiff_t
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifndef _WIN32_WCE
# include <sys/types.h>
# include <sys/stat.h>
#endif  // !_WIN32_WCE

#if defined __APPLE__
# include <AvailabilityMacros.h>
# include <TargetConditionals.h>
#endif

#include <iostream>  // NOLINT
#include <sstream>  // NOLINT
#include <string>  // NOLINT

#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
#define GTEST_FLAG_PREFIX_ "gtest_"
#define GTEST_FLAG_PREFIX_DASH_ "gtest-"
#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
#define GTEST_NAME_ "Google Test"
#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/"

// Determines the version of gcc that is used to compile this.
#ifdef __GNUC__
// 40302 means version 4.3.2.
# define GTEST_GCC_VER_ \
    (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
#endif  // __GNUC__

// Determines the platform on which Google Test is compiled.
#ifdef __CYGWIN__
# define GTEST_OS_CYGWIN 1
#elif defined __SYMBIAN32__
# define GTEST_OS_SYMBIAN 1
#elif defined _WIN32
# define GTEST_OS_WINDOWS 1
# ifdef _WIN32_WCE
#  define GTEST_OS_WINDOWS_MOBILE 1
# elif defined(__MINGW__) || defined(__MINGW32__)
#  define GTEST_OS_WINDOWS_MINGW 1
# else
#  define GTEST_OS_WINDOWS_DESKTOP 1
# endif  // _WIN32_WCE
#elif defined __APPLE__
# define GTEST_OS_MAC 1
# if TARGET_OS_IPHONE
#  define GTEST_OS_IOS 1
#  if TARGET_IPHONE_SIMULATOR
#   define GTEST_OS_IOS_SIMULATOR 1
#  endif
# endif
#elif defined __linux__
# define GTEST_OS_LINUX 1
# if defined __ANDROID__
#  define GTEST_OS_LINUX_ANDROID 1
# endif
#elif defined __MVS__
# define GTEST_OS_ZOS 1
#elif defined(__sun) && defined(__SVR4)
# define GTEST_OS_SOLARIS 1
#elif defined(_AIX)
# define GTEST_OS_AIX 1
#elif defined(__hpux)
# define GTEST_OS_HPUX 1
#elif defined __native_client__
# define GTEST_OS_NACL 1
#elif defined __OpenBSD__
# define GTEST_OS_OPENBSD 1
#elif defined __QNX__
# define GTEST_OS_QNX 1
#endif  // __CYGWIN__

#ifndef GTEST_LANG_CXX11
// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when
// -std={c,gnu}++{0x,11} is passed.  The C++11 standard specifies a
// value for __cplusplus, and recent versions of clang, gcc, and
// probably other compilers set that too in C++11 mode.
# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L
// Compiling in at least C++11 mode.
#  define GTEST_LANG_CXX11 1
# else
#  define GTEST_LANG_CXX11 0
# endif
#endif

// Brings in definitions for functions used in the testing::internal::posix
// namespace (read, write, close, chdir, isatty, stat). We do not currently
// use them on Windows Mobile.
#if !GTEST_OS_WINDOWS
// This assumes that non-Windows OSes provide unistd.h. For OSes where this
// is not the case, we need to include headers that provide the functions
// mentioned above.
# include <unistd.h>
# include <strings.h>
#elif !GTEST_OS_WINDOWS_MOBILE
# include <direct.h>
# include <io.h>
#endif

#if GTEST_OS_LINUX_ANDROID
// Used to define __ANDROID_API__ matching the target NDK API level.
#  include <android/api-level.h>  // NOLINT
#endif

// Code Added by Ramesh based on changes by Mario
#if defined(_MSC_VER)
# include <windows.h>
#endif

// Defines this to true iff Google Test can use POSIX regular expressions.
#ifndef GTEST_HAS_POSIX_RE
# if GTEST_OS_LINUX_ANDROID
// On Android, <regex.h> is only available starting with Gingerbread.
#  define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
# else
#  define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
# endif
#endif

#if GTEST_HAS_POSIX_RE

// On some platforms, <regex.h> needs someone to define size_t, and
// won't compile otherwise.  We can #include it here as we already
// included <stdlib.h>, which is guaranteed to define size_t through
// <stddef.h>.
# include <regex.h>  // NOLINT

# define GTEST_USES_POSIX_RE 1

#elif GTEST_OS_WINDOWS

// <regex.h> is not available on Windows.  Use our own simple regex
// implementation instead.
# define GTEST_USES_SIMPLE_RE 1

#else

// <regex.h> may not be available on this platform.  Use our own
// simple regex implementation instead.
# define GTEST_USES_SIMPLE_RE 1

#endif  // GTEST_HAS_POSIX_RE

#ifndef GTEST_HAS_EXCEPTIONS
// The user didn't tell us whether exceptions are enabled, so we need
// to figure it out.
# if defined(_MSC_VER) || defined(__BORLANDC__)
// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS
// macro to enable exceptions, so we'll do the same.
// Assumes that exceptions are enabled by default.
#  ifndef _HAS_EXCEPTIONS
#   define _HAS_EXCEPTIONS 1
#  endif  // _HAS_EXCEPTIONS
#  define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
# elif defined(__GNUC__) && __EXCEPTIONS
// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__SUNPRO_CC)
// Sun Pro CC supports exceptions.  However, there is no compile-time way of
// detecting whether they are enabled or not.  Therefore, we assume that
// they are enabled unless the user tells us otherwise.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__IBMCPP__) && __EXCEPTIONS
// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
#  define GTEST_HAS_EXCEPTIONS 1
# elif defined(__HP_aCC)
// Exception handling is in effect by default in HP aCC compiler. It has to
// be turned of by +noeh compiler option if desired.
#  define GTEST_HAS_EXCEPTIONS 1
# else
// For other compilers, we assume exceptions are disabled to be
// conservative.
#  define GTEST_HAS_EXCEPTIONS 0
# endif  // defined(_MSC_VER) || defined(__BORLANDC__)
#endif  // GTEST_HAS_EXCEPTIONS

#if !defined(GTEST_HAS_STD_STRING)
// Even though we don't use this macro any longer, we keep it in case
// some clients still depend on it.
# define GTEST_HAS_STD_STRING 1
#elif !GTEST_HAS_STD_STRING
// The user told us that ::std::string isn't available.
# error "Google Test cannot be used where ::std::string isn't available."
#endif  // !defined(GTEST_HAS_STD_STRING)

#ifndef GTEST_HAS_GLOBAL_STRING
// The user didn't tell us whether ::string is available, so we need
// to figure it out.

# define GTEST_HAS_GLOBAL_STRING 0

#endif  // GTEST_HAS_GLOBAL_STRING

#ifndef GTEST_HAS_STD_WSTRING
// The user didn't tell us whether ::std::wstring is available, so we need
// to figure it out.
// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
//   is available.

// Cygwin 1.7 and below doesn't support ::std::wstring.
// Solaris' libc++ doesn't support it either.  Android has
// no support for it at least as recent as Froyo (2.2).
# define GTEST_HAS_STD_WSTRING \
    (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))

#endif  // GTEST_HAS_STD_WSTRING

#ifndef GTEST_HAS_GLOBAL_WSTRING
// The user didn't tell us whether ::wstring is available, so we need
// to figure it out.
# define GTEST_HAS_GLOBAL_WSTRING \
    (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
#endif  // GTEST_HAS_GLOBAL_WSTRING

// Determines whether RTTI is available.
#ifndef GTEST_HAS_RTTI
// The user didn't tell us whether RTTI is enabled, so we need to
// figure it out.

# ifdef _MSC_VER

#  ifdef _CPPRTTI  // MSVC defines this macro iff RTTI is enabled.
#   define GTEST_HAS_RTTI 1
#  else
#   define GTEST_HAS_RTTI 0
#  endif

// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)

#  ifdef __GXX_RTTI
// When building against STLport with the Android NDK and with
// -frtti -fno-exceptions, the build fails at link time with undefined
// references to __cxa_bad_typeid. Note sure if STL or toolchain bug,
// so disable RTTI when detected.
#   if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \
       !defined(__EXCEPTIONS)
#    define GTEST_HAS_RTTI 0
#   else
#    define GTEST_HAS_RTTI 1
#   endif  // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS
#  else
#   define GTEST_HAS_RTTI 0
#  endif  // __GXX_RTTI

// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends
// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the
// first version with C++ support.
# elif defined(__clang__)

#  define GTEST_HAS_RTTI __has_feature(cxx_rtti)

// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
// both the typeid and dynamic_cast features are present.
# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)

#  ifdef __RTTI_ALL__
#   define GTEST_HAS_RTTI 1
#  else
#   define GTEST_HAS_RTTI 0
#  endif

# else

// For all other compilers, we assume RTTI is enabled.
#  define GTEST_HAS_RTTI 1

# endif  // _MSC_VER

#endif  // GTEST_HAS_RTTI

// It's this header's responsibility to #include <typeinfo> when RTTI
// is enabled.
#if GTEST_HAS_RTTI
# include <typeinfo>
#endif

// Determines whether Google Test can use the pthreads library.
#ifndef GTEST_HAS_PTHREAD
// The user didn't tell us explicitly, so we assume pthreads support is
// available on Linux and Mac.
//
// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
// to your compiler flags.
# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \
    || GTEST_OS_QNX)
#endif  // GTEST_HAS_PTHREAD

#if GTEST_HAS_PTHREAD
// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
// true.
# include <pthread.h>  // NOLINT

// For timespec and nanosleep, used below.
# include <time.h>  // NOLINT
#endif

// Determines whether Google Test can use tr1/tuple.  You can define
// this macro to 0 to prevent Google Test from using tuple (any
// feature depending on tuple with be disabled in this mode).
#ifndef GTEST_HAS_TR1_TUPLE
# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR)
// STLport, provided with the Android NDK, has neither <tr1/tuple> or <tuple>.
#  define GTEST_HAS_TR1_TUPLE 0
# else
// The user didn't tell us not to do it, so we assume it's OK.
#  define GTEST_HAS_TR1_TUPLE 0
# endif
#endif  // GTEST_HAS_TR1_TUPLE

// Determines whether Google Test's own tr1 tuple implementation
// should be used.
#ifndef GTEST_USE_OWN_TR1_TUPLE
// The user didn't tell us, so we need to figure it out.

// We use our own TR1 tuple if we aren't sure the user has an
// implementation of it already.  At this time, libstdc++ 4.0.0+ and
// MSVC 2010 are the only mainstream standard libraries that come
// with a TR1 tuple implementation.  NVIDIA's CUDA NVCC compiler
// pretends to be GCC by defining __GNUC__ and friends, but cannot
// compile GCC's tuple implementation.  MSVC 2008 (9.0) provides TR1
// tuple in a 323 MB Feature Pack download, which we cannot assume the
// user has.  QNX's QCC compiler is a modified GCC but it doesn't
// support TR1 tuple.  libc++ only provides std::tuple, in C++11 mode,
// and it can be used with some compilers that define __GNUC__.
# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \
      && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600
#  define GTEST_ENV_HAS_TR1_TUPLE_ 1
# endif

// C++11 specifies that <tuple> provides std::tuple. Use that if gtest is used
// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6
// can build with clang but need to use gcc4.2's libstdc++).
# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325)
#  define GTEST_ENV_HAS_STD_TUPLE_ 1
# endif

# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_
#  define GTEST_USE_OWN_TR1_TUPLE 0
# else
#  define GTEST_USE_OWN_TR1_TUPLE 1
# endif

#endif  // GTEST_USE_OWN_TR1_TUPLE

// To avoid conditional compilation everywhere, we make it
// gtest-port.h's responsibility to #include the header implementing
// tr1/tuple.
#if GTEST_HAS_TR1_TUPLE

# if GTEST_USE_OWN_TR1_TUPLE
#  include "gtest/internal/gtest-tuple.h"
# elif GTEST_ENV_HAS_STD_TUPLE_
#  include <tuple>
// C++11 puts its tuple into the ::std namespace rather than
// ::std::tr1.  gtest expects tuple to live in ::std::tr1, so put it there.
// This causes undefined behavior, but supported compilers react in
// the way we intend.
namespace std {
namespace tr1 {
using ::std::get;
using ::std::make_tuple;
using ::std::tuple;
using ::std::tuple_element;
using ::std::tuple_size;
}
}

# elif GTEST_OS_SYMBIAN

// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
// use STLport's tuple implementation, which unfortunately doesn't
// work as the copy of STLport distributed with Symbian is incomplete.
// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
// use its own tuple implementation.
#  ifdef BOOST_HAS_TR1_TUPLE
#   undef BOOST_HAS_TR1_TUPLE
#  endif  // BOOST_HAS_TR1_TUPLE

// This prevents <boost/tr1/detail/config.hpp>, which defines
// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
#  define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
#  include <tuple>

# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header.  This does
// not conform to the TR1 spec, which requires the header to be <tuple>.

#  if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
// which is #included by <tr1/tuple>, to not compile when RTTI is
// disabled.  _TR1_FUNCTIONAL is the header guard for
// <tr1/functional>.  Hence the following #define is a hack to prevent
// <tr1/functional> from being included.
#   define _TR1_FUNCTIONAL 1
#   include <tr1/tuple>
#   undef _TR1_FUNCTIONAL  // Allows the user to #include
// <tr1/functional> if he chooses to.
#  else
#   include <tr1/tuple>  // NOLINT
#  endif  // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302

# else
// If the compiler is not GCC 4.0+, we assume the user is using a
// spec-conforming TR1 implementation.
#  include <tuple>  // NOLINT
# endif  // GTEST_USE_OWN_TR1_TUPLE

#endif  // GTEST_HAS_TR1_TUPLE

// Determines whether clone(2) is supported.
// Usually it will only be available on Linux, excluding
// Linux on the Itanium architecture.
// Also see http://linux.die.net/man/2/clone.
#ifndef GTEST_HAS_CLONE
// The user didn't tell us, so we need to figure it out.

# if GTEST_OS_LINUX && !defined(__ia64__)
#  if GTEST_OS_LINUX_ANDROID
// On Android, clone() is only available on ARM starting with Gingerbread.
#    if defined(__arm__) && __ANDROID_API__ >= 9
#     define GTEST_HAS_CLONE 1
#    else
#     define GTEST_HAS_CLONE 0
#    endif
#  else
#   define GTEST_HAS_CLONE 1
#  endif
# else
#  define GTEST_HAS_CLONE 0
# endif  // GTEST_OS_LINUX && !defined(__ia64__)

#endif  // GTEST_HAS_CLONE

// Determines whether to support stream redirection. This is used to test
// output correctness and to implement death tests.
#ifndef GTEST_HAS_STREAM_REDIRECTION
// By default, we assume that stream redirection is supported on all
// platforms except known mobile ones.
# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN
#  define GTEST_HAS_STREAM_REDIRECTION 0
# else
#  define GTEST_HAS_STREAM_REDIRECTION 1
# endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
#endif  // GTEST_HAS_STREAM_REDIRECTION

// Determines whether to support death tests.
// Google Test does not support death tests for VC 7.1 and earlier as
// abort() in a VC 7.1 application compiled as GUI in debug config
// pops up a dialog window that cannot be suppressed programmatically.
#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
     (GTEST_OS_MAC && !GTEST_OS_IOS) || GTEST_OS_IOS_SIMULATOR || \
     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \
     GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \
     GTEST_OS_OPENBSD || GTEST_OS_QNX)
# define GTEST_HAS_DEATH_TEST 1
# include <vector>  // NOLINT
#endif

// We don't support MSVC 7.1 with exceptions disabled now.  Therefore
// all the compilers we care about are adequate for supporting
// value-parameterized tests.
#define GTEST_HAS_PARAM_TEST 1

// Determines whether to support type-driven tests.

// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
// Sun Pro CC, IBM Visual Age, and HP aCC support.
#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
    defined(__IBMCPP__) || defined(__HP_aCC)
# define GTEST_HAS_TYPED_TEST 1
# define GTEST_HAS_TYPED_TEST_P 1
#endif

// Determines whether to support Combine(). This only makes sense when
// value-parameterized tests are enabled.  The implementation doesn't
// work on Sun Studio since it doesn't understand templated conversion
// operators.
#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC)
# define GTEST_HAS_COMBINE 1
#endif

// Determines whether the system compiler uses UTF-16 for encoding wide strings.
#define GTEST_WIDE_STRING_USES_UTF16_ \
    (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)

// Determines whether test results can be streamed to a socket.
#if GTEST_OS_LINUX
# define GTEST_CAN_STREAM_RESULTS_ 1
#endif

// Defines some utility macros.

// The GNU compiler emits a warning if nested "if" statements are followed by
// an "else" statement and braces are not used to explicitly disambiguate the
// "else" binding.  This leads to problems with code like:
//
//   if (gate)
//     ASSERT_*(condition) << "Some message";
//
// The "switch (0) case 0:" idiom is used to suppress this.
#ifdef __INTEL_COMPILER
# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
#else
# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default:  // NOLINT
#endif

// Use this annotation at the end of a struct/class definition to
// prevent the compiler from optimizing away instances that are never
// used.  This is useful when all interesting logic happens inside the
// c'tor and / or d'tor.  Example:
//
//   struct Foo {
//     Foo() { ... }
//   } GTEST_ATTRIBUTE_UNUSED_;
//
// Also use it after a variable or parameter declaration to tell the
// compiler the variable/parameter does not have to be used.
#if defined(__GNUC__) && !defined(COMPILER_ICC)
# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
#else
# define GTEST_ATTRIBUTE_UNUSED_
#endif

// A macro to disallow operator=
// This should be used in the private: declarations for a class.
#define GTEST_DISALLOW_ASSIGN_(type)\
  void operator=(type const &)

// A macro to disallow copy constructor and operator=
// This should be used in the private: declarations for a class.
#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\
  type(type const &);\
  GTEST_DISALLOW_ASSIGN_(type)

// Tell the compiler to warn about unused return values for functions declared
// with this macro.  The macro should be used on function declarations
// following the argument list:
//
//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
#else
# define GTEST_MUST_USE_RESULT_
#endif  // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC

// Determine whether the compiler supports Microsoft's Structured Exception
// Handling.  This is supported by several Windows compilers but generally
// does not exist on any other system.
#ifndef GTEST_HAS_SEH
// The user didn't tell us, so we need to figure it out.

# if defined(_MSC_VER) || defined(__BORLANDC__)
// These two compilers are known to support SEH.
#  define GTEST_HAS_SEH 1
# else
// Assume no SEH.
#  define GTEST_HAS_SEH 0
# endif

#endif  // GTEST_HAS_SEH

#ifdef _MSC_VER

# if GTEST_LINKED_AS_SHARED_LIBRARY
#  define GTEST_API_ __declspec(dllimport)
# elif GTEST_CREATE_SHARED_LIBRARY
#  define GTEST_API_ __declspec(dllexport)
# endif

#endif  // _MSC_VER

#ifndef GTEST_API_
# define GTEST_API_
#endif

#ifdef __GNUC__
// Ask the compiler to never inline a given function.
# define GTEST_NO_INLINE_ __attribute__((noinline))
#else
# define GTEST_NO_INLINE_
#endif

// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION)
# define GTEST_HAS_CXXABI_H_ 1
#else
# define GTEST_HAS_CXXABI_H_ 0
#endif

namespace testing {

class Message;

namespace internal {

// A secret type that Google Test users don't know about.  It has no
// definition on purpose.  Therefore it's impossible to create a
// Secret object, which is what we want.
class Secret;

// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
//   GTEST_COMPILE_ASSERT_(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
//                         content_type_names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
//   GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.

template <bool>
struct CompileAssert {
};

#define GTEST_COMPILE_ASSERT_(expr, msg) \
  typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \
      msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_

// Implementation details of GTEST_COMPILE_ASSERT_:
//
// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1
//   elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
//    #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
//   does not work, as gcc supports variable-length arrays whose sizes
//   are determined at run-time (this is gcc's extension and not part
//   of the C++ standard).  As a result, gcc fails to reject the
//   following code with the simple definition:
//
//     int foo;
//     GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is
//                                      // not a compile-time constant.
//
// - By using the type CompileAssert<(bool(expr))>, we ensures that
//   expr is a compile-time constant.  (Template arguments must be
//   determined at compile-time.)
//
// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
//   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
//
//     CompileAssert<bool(expr)>
//
//   instead, these compilers will refuse to compile
//
//     GTEST_COMPILE_ASSERT_(5 > 0, some_message);
//
//   (They seem to think the ">" in "5 > 0" marks the end of the
//   template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
//     ((expr) ? 1 : -1).
//
//   This is to avoid running into a bug in MS VC 7.1, which
//   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.

// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
//
// This template is declared, but intentionally undefined.
template <typename T1, typename T2>
struct StaticAssertTypeEqHelper;

template <typename T>
struct StaticAssertTypeEqHelper<T, T> {};

#if GTEST_HAS_GLOBAL_STRING
typedef ::string string;
#else
typedef ::std::string string;
#endif  // GTEST_HAS_GLOBAL_STRING

#if GTEST_HAS_GLOBAL_WSTRING
typedef ::wstring wstring;
#elif GTEST_HAS_STD_WSTRING
typedef ::std::wstring wstring;
#endif  // GTEST_HAS_GLOBAL_WSTRING

// A helper for suppressing warnings on constant condition.  It just
// returns 'condition'.
GTEST_API_ bool IsTrue(bool condition);

// Defines scoped_ptr.

// This implementation of scoped_ptr is PARTIAL - it only contains
// enough stuff to satisfy Google Test's need.
template <typename T>
class scoped_ptr {
 public:
  typedef T element_type;

  explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
  ~scoped_ptr() {
    reset();
  }

  T& operator*() const {
    return *ptr_;
  }
  T* operator->() const {
    return ptr_;
  }
  T* get() const {
    return ptr_;
  }

  T* release() {
    T* const ptr = ptr_;
    ptr_ = NULL;
    return ptr;
  }

  void reset(T* p = NULL) {
    if (p != ptr_) {
      if (IsTrue(sizeof(T) > 0)) {  // Makes sure T is a complete type.
        delete ptr_;
      }

      ptr_ = p;
    }
  }

 private:
  T* ptr_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
};

// Defines RE.

// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
// Regular Expression syntax.
class GTEST_API_ RE {
 public:
  // A copy constructor is required by the Standard to initialize object
  // references from r-values.
  RE(const RE& other) {
    Init(other.pattern());
  }

  // Constructs an RE from a string.
  RE(const ::std::string& regex) {
    Init(regex.c_str());  // NOLINT
  }

#if GTEST_HAS_GLOBAL_STRING

  RE(const ::string& regex) {
    Init(regex.c_str());  // NOLINT
  }

#endif  // GTEST_HAS_GLOBAL_STRING

  RE(const char* regex) {
    Init(regex);  // NOLINT
  }
  ~RE();

  // Returns the string representation of the regex.
  const char* pattern() const {
    return pattern_;
  }

  // FullMatch(str, re) returns true iff regular expression re matches
  // the entire str.
  // PartialMatch(str, re) returns true iff regular expression re
  // matches a substring of str (including str itself).
  //
  // TODO(wan@google.com): make FullMatch() and PartialMatch() work
  // when str contains NUL characters.
  static bool FullMatch(const ::std::string& str, const RE& re) {
    return FullMatch(str.c_str(), re);
  }
  static bool PartialMatch(const ::std::string& str, const RE& re) {
    return PartialMatch(str.c_str(), re);
  }

#if GTEST_HAS_GLOBAL_STRING

  static bool FullMatch(const ::string& str, const RE& re) {
    return FullMatch(str.c_str(), re);
  }
  static bool PartialMatch(const ::string& str, const RE& re) {
    return PartialMatch(str.c_str(), re);
  }

#endif  // GTEST_HAS_GLOBAL_STRING

  static bool FullMatch(const char* str, const RE& re);
  static bool PartialMatch(const char* str, const RE& re);

 private:
  void Init(const char* regex);

  // We use a const char* instead of an std::string, as Google Test used to be
  // used where std::string is not available.  TODO(wan@google.com): change to
  // std::string.
  const char* pattern_;
  bool is_valid_;

#if GTEST_USES_POSIX_RE

  regex_t full_regex_;     // For FullMatch().
  regex_t partial_regex_;  // For PartialMatch().

#else  // GTEST_USES_SIMPLE_RE

  const char* full_pattern_;  // For FullMatch();

#endif

  GTEST_DISALLOW_ASSIGN_(RE);
};

// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);

// Formats a file location for compiler-independent XML output.
// Although this function is not platform dependent, we put it next to
// FormatFileLocation in order to contrast the two functions.
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
    int line);

// Defines logging utilities:
//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
//                          message itself is streamed into the macro.
//   LogToStderr()  - directs all log messages to stderr.
//   FlushInfoLog() - flushes informational log messages.

enum GTestLogSeverity {
  GTEST_INFO,
  GTEST_WARNING,
  GTEST_ERROR,
  GTEST_FATAL
};

// Formats log entry severity, provides a stream object for streaming the
// log message, and terminates the message with a newline when going out of
// scope.
class GTEST_API_ GTestLog {
 public:
  GTestLog(GTestLogSeverity severity, const char* file, int line);

  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
  ~GTestLog();

  ::std::ostream& GetStream() {
    return ::std::cerr;
  }

 private:
  const GTestLogSeverity severity_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
};

#define GTEST_LOG_(severity) \
    ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
                                  __FILE__, __LINE__).GetStream()

inline void LogToStderr() {}
inline void FlushInfoLog() {
  fflush(NULL);
}

// INTERNAL IMPLEMENTATION - DO NOT USE.
//
// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
// is not satisfied.
//  Synopsys:
//    GTEST_CHECK_(boolean_condition);
//     or
//    GTEST_CHECK_(boolean_condition) << "Additional message";
//
//    This checks the condition and if the condition is not satisfied
//    it prints message about the condition violation, including the
//    condition itself, plus additional message streamed into it, if any,
//    and then it aborts the program. It aborts the program irrespective of
//    whether it is built in the debug mode or not.
#define GTEST_CHECK_(condition) \
    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
    if (::testing::internal::IsTrue(condition)) \
      ; \
    else \
      GTEST_LOG_(FATAL) << "Condition " #condition " failed. "

// An all-mode assert to verify that the given POSIX-style function
// call returns 0 (indicating success).  Known limitation: this
// doesn't expand to a balanced 'if' statement, so enclose the macro
// in {} if you need to use it as the only statement in an 'if'
// branch.
#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
  if (const int gtest_error = (posix_call)) \
    GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
                      << gtest_error

// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
//
// Use ImplicitCast_ as a safe version of static_cast for upcasting in
// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
// const Foo*).  When you use ImplicitCast_, the compiler checks that
// the cast is safe.  Such explicit ImplicitCast_s are necessary in
// surprisingly many situations where C++ demands an exact type match
// instead of an argument type convertable to a target type.
//
// The syntax for using ImplicitCast_ is the same as for static_cast:
//
//   ImplicitCast_<ToType>(expr)
//
// ImplicitCast_ would have been part of the C++ standard library,
// but the proposal was submitted too late.  It will probably make
// its way into the language in the future.
//
// This relatively ugly name is intentional. It prevents clashes with
// similar functions users may have (e.g., implicit_cast). The internal
// namespace alone is not enough because the function can be found by ADL.
template<typename To>
inline To ImplicitCast_(To x) {
  return x;
}

// When you upcast (that is, cast a pointer from type Foo to type
// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
// always succeed.  When you downcast (that is, cast a pointer from
// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
// how do you know the pointer is really of type SubclassOfFoo?  It
// could be a bare Foo, or of type DifferentSubclassOfFoo.  Thus,
// when you downcast, you should use this macro.  In debug mode, we
// use dynamic_cast<> to double-check the downcast is legal (we die
// if it's not).  In normal mode, we do the efficient static_cast<>
// instead.  Thus, it's important to test in debug mode to make sure
// the cast is legal!
//    This is the only place in the code we should use dynamic_cast<>.
// In particular, you SHOULDN'T be using dynamic_cast<> in order to
// do RTTI (eg code like this:
//    if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
//    if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
// You should design the code some other way not to need this.
//
// This relatively ugly name is intentional. It prevents clashes with
// similar functions users may have (e.g., down_cast). The internal
// namespace alone is not enough because the function can be found by ADL.
template<typename To, typename From>  // use like this: DownCast_<T*>(foo);
inline To DownCast_(From* f) {  // so we only accept pointers
  // Ensures that To is a sub-type of From *.  This test is here only
  // for compile-time type checking, and has no overhead in an
  // optimized build at run-time, as it will be optimized away
  // completely.
  if (false) {
    const To to = NULL;
    ::testing::internal::ImplicitCast_<From*>(to);
  }

#if GTEST_HAS_RTTI
  // RTTI: debug mode only!
  GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL);
#endif
  return static_cast<To>(f);
}

// Downcasts the pointer of type Base to Derived.
// Derived must be a subclass of Base. The parameter MUST
// point to a class of type Derived, not any subclass of it.
// When RTTI is available, the function performs a runtime
// check to enforce this.
template <class Derived, class Base>
Derived* CheckedDowncastToActualType(Base* base) {
#if GTEST_HAS_RTTI
  GTEST_CHECK_(typeid(*base) == typeid(Derived));
  return dynamic_cast<Derived*>(base);  // NOLINT
#else
  return static_cast<Derived*>(base);  // Poor man's downcast.
#endif
}

#if GTEST_HAS_STREAM_REDIRECTION

// Defines the stderr capturer:
//   CaptureStdout     - starts capturing stdout.
//   GetCapturedStdout - stops capturing stdout and returns the captured string.
//   CaptureStderr     - starts capturing stderr.
//   GetCapturedStderr - stops capturing stderr and returns the captured string.
//
GTEST_API_ void CaptureStdout();
GTEST_API_ std::string GetCapturedStdout();
GTEST_API_ void CaptureStderr();
GTEST_API_ std::string GetCapturedStderr();

#endif  // GTEST_HAS_STREAM_REDIRECTION


#if GTEST_HAS_DEATH_TEST

const ::std::vector<testing::internal::string>& GetInjectableArgvs();
void SetInjectableArgvs(const ::std::vector<testing::internal::string>*
                        new_argvs);

// A copy of all command line arguments.  Set by InitGoogleTest().
extern ::std::vector<testing::internal::string> g_argvs;

#endif  // GTEST_HAS_DEATH_TEST

// Defines synchronization primitives.

#if GTEST_HAS_PTHREAD

// Sleeps for (roughly) n milli-seconds.  This function is only for
// testing Google Test's own constructs.  Don't use it in user tests,
// either directly or indirectly.
inline void SleepMilliseconds(int n) {
  const timespec time = {
    0,                  // 0 seconds.
    n * 1000L * 1000L,  // And n ms.
  };
  nanosleep(&time, NULL);
}

// Allows a controller thread to pause execution of newly created
// threads until notified.  Instances of this class must be created
// and destroyed in the controller thread.
//
// This class is only for testing Google Test's own constructs. Do not
// use it in user tests, either directly or indirectly.
class Notification {
 public:
  Notification() : notified_(false) {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
  }
  ~Notification() {
    pthread_mutex_destroy(&mutex_);
  }

  // Notifies all threads created with this notification to start. Must
  // be called from the controller thread.
  void Notify() {
    pthread_mutex_lock(&mutex_);
    notified_ = true;
    pthread_mutex_unlock(&mutex_);
  }

  // Blocks until the controller thread notifies. Must be called from a test
  // thread.
  void WaitForNotification() {
    for (;;) {
      pthread_mutex_lock(&mutex_);
      const bool notified = notified_;
      pthread_mutex_unlock(&mutex_);

      if (notified) {
        break;
      }

      SleepMilliseconds(10);
    }
  }

 private:
  pthread_mutex_t mutex_;
  bool notified_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
};

// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
// Consequently, it cannot select a correct instantiation of ThreadWithParam
// in order to call its Run(). Introducing ThreadWithParamBase as a
// non-templated base class for ThreadWithParam allows us to bypass this
// problem.
class ThreadWithParamBase {
 public:
  virtual ~ThreadWithParamBase() {}
  virtual void Run() = 0;
};

// pthread_create() accepts a pointer to a function type with the C linkage.
// According to the Standard (7.5/1), function types with different linkages
// are different even if they are otherwise identical.  Some compilers (for
// example, SunStudio) treat them as different types.  Since class methods
// cannot be defined with C-linkage we need to define a free C-function to
// pass into pthread_create().
extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
  static_cast<ThreadWithParamBase*>(thread)->Run();
  return NULL;
}

// Helper class for testing Google Test's multi-threading constructs.
// To use it, write:
//
//   void ThreadFunc(int param) { /* Do things with param */ }
//   Notification thread_can_start;
//   ...
//   // The thread_can_start parameter is optional; you can supply NULL.
//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
//   thread_can_start.Notify();
//
// These classes are only for testing Google Test's own constructs. Do
// not use them in user tests, either directly or indirectly.
template <typename T>
class ThreadWithParam : public ThreadWithParamBase {
 public:
  typedef void (*UserThreadFunc)(T);

  ThreadWithParam(
    UserThreadFunc func, T param, Notification* thread_can_start)
    : func_(func),
      param_(param),
      thread_can_start_(thread_can_start),
      finished_(false) {
    ThreadWithParamBase* const base = this;
    // The thread can be created only after all fields except thread_
    // have been initialized.
    GTEST_CHECK_POSIX_SUCCESS_(
      pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
  }
  ~ThreadWithParam() {
    Join();
  }

  void Join() {
    if (!finished_) {
      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
      finished_ = true;
    }
  }

  virtual void Run() {
    if (thread_can_start_ != NULL) {
      thread_can_start_->WaitForNotification();
    }

    func_(param_);
  }

 private:
  const UserThreadFunc func_;  // User-supplied thread function.
  const T param_;  // User-supplied parameter to the thread function.
  // When non-NULL, used to block execution until the controller thread
  // notifies.
  Notification* const thread_can_start_;
  bool finished_;  // true iff we know that the thread function has finished.
  pthread_t thread_;  // The native thread object.

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
};

// MutexBase and Mutex implement mutex on pthreads-based platforms. They
// are used in conjunction with class MutexLock:
//
//   Mutex mutex;
//   ...
//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the end
//                            // of the current scope.
//
// MutexBase implements behavior for both statically and dynamically
// allocated mutexes.  Do not use MutexBase directly.  Instead, write
// the following to define a static mutex:
//
//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
//
// You can forward declare a static mutex like this:
//
//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
//
// To create a dynamic mutex, just define an object of type Mutex.
class MutexBase {
 public:
  // Acquires this mutex.
  void Lock() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
    owner_ = pthread_self();
    has_owner_ = true;
  }

  // Releases this mutex.
  void Unlock() {
    // Since the lock is being released the owner_ field should no longer be
    // considered valid. We don't protect writing to has_owner_ here, as it's
    // the caller's responsibility to ensure that the current thread holds the
    // mutex when this is called.
    has_owner_ = false;
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
  }

  // Does nothing if the current thread holds the mutex. Otherwise, crashes
  // with high probability.
  void AssertHeld() const {
    GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
        << "The current thread is not holding the mutex @" << this;
  }

  // A static mutex may be used before main() is entered.  It may even
  // be used before the dynamic initialization stage.  Therefore we
  // must be able to initialize a static mutex object at link time.
  // This means MutexBase has to be a POD and its member variables
  // have to be public.
 public:
  pthread_mutex_t mutex_;  // The underlying pthread mutex.
  // has_owner_ indicates whether the owner_ field below contains a valid thread
  // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
  // accesses to the owner_ field should be protected by a check of this field.
  // An alternative might be to memset() owner_ to all zeros, but there's no
  // guarantee that a zero'd pthread_t is necessarily invalid or even different
  // from pthread_self().
  bool has_owner_;
  pthread_t owner_;  // The thread holding the mutex.
};

// Forward-declares a static mutex.
# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
    extern ::testing::internal::MutexBase mutex

// Defines and statically (i.e. at link time) initializes a static mutex.
// The initialization list here does not explicitly initialize each field,
// instead relying on default initialization for the unspecified fields. In
// particular, the owner_ field (a pthread_t) is not explicitly initialized.
// This allows initialization to work whether pthread_t is a scalar or struct.
// The flag -Wmissing-field-initializers must not be specified for this to work.
# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
    ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false }

// The Mutex class can only be used for mutexes created at runtime. It
// shares its API with MutexBase otherwise.
class Mutex : public MutexBase {
 public:
  Mutex() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
    has_owner_ = false;
  }
  ~Mutex() {
    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
  }

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
};

// We cannot name this class MutexLock as the ctor declaration would
// conflict with a macro named MutexLock, which is defined on some
// platforms.  Hence the typedef trick below.
class GTestMutexLock {
 public:
  explicit GTestMutexLock(MutexBase* mutex)
    : mutex_(mutex) {
    mutex_->Lock();
  }

  ~GTestMutexLock() {
    mutex_->Unlock();
  }

 private:
  MutexBase* const mutex_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
};

typedef GTestMutexLock MutexLock;

// Helpers for ThreadLocal.

// pthread_key_create() requires DeleteThreadLocalValue() to have
// C-linkage.  Therefore it cannot be templatized to access
// ThreadLocal<T>.  Hence the need for class
// ThreadLocalValueHolderBase.
class ThreadLocalValueHolderBase {
 public:
  virtual ~ThreadLocalValueHolderBase() {}
};

// Called by pthread to delete thread-local data stored by
// pthread_setspecific().
extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
}

// Implements thread-local storage on pthreads-based systems.
//
//   // Thread 1
//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
//
//   // Thread 2
//   tl.set(150);  // Changes the value for thread 2 only.
//   EXPECT_EQ(150, tl.get());
//
//   // Thread 1
//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
//   tl.set(200);
//   EXPECT_EQ(200, tl.get());
//
// The template type argument T must have a public copy constructor.
// In addition, the default ThreadLocal constructor requires T to have
// a public default constructor.
//
// An object managed for a thread by a ThreadLocal instance is deleted
// when the thread exits.  Or, if the ThreadLocal instance dies in
// that thread, when the ThreadLocal dies.  It's the user's
// responsibility to ensure that all other threads using a ThreadLocal
// have exited when it dies, or the per-thread objects for those
// threads will not be deleted.
//
// Google Test only uses global ThreadLocal objects.  That means they
// will die after main() has returned.  Therefore, no per-thread
// object managed by Google Test will be leaked as long as all threads
// using Google Test have exited when main() returns.
template <typename T>
class ThreadLocal {
 public:
  ThreadLocal() : key_(CreateKey()),
    default_() {}
  explicit ThreadLocal(const T& value) : key_(CreateKey()),
    default_(value) {}

  ~ThreadLocal() {
    // Destroys the managed object for the current thread, if any.
    DeleteThreadLocalValue(pthread_getspecific(key_));

    // Releases resources associated with the key.  This will *not*
    // delete managed objects for other threads.
    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
  }

  T* pointer() {
    return GetOrCreateValue();
  }
  const T* pointer() const {
    return GetOrCreateValue();
  }
  const T& get() const {
    return *pointer();
  }
  void set(const T& value) {
    *pointer() = value;
  }

 private:
  // Holds a value of type T.
  class ValueHolder : public ThreadLocalValueHolderBase {
   public:
    explicit ValueHolder(const T& value) : value_(value) {}

    T* pointer() {
      return &value_;
    }

   private:
    T value_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
  };

  static pthread_key_t CreateKey() {
    pthread_key_t key;
    // When a thread exits, DeleteThreadLocalValue() will be called on
    // the object managed for that thread.
    GTEST_CHECK_POSIX_SUCCESS_(
      pthread_key_create(&key, &DeleteThreadLocalValue));
    return key;
  }

  T* GetOrCreateValue() const {
    ThreadLocalValueHolderBase* const holder =
      static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));

    if (holder != NULL) {
      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
    }

    ValueHolder* const new_holder = new ValueHolder(default_);
    ThreadLocalValueHolderBase* const holder_base = new_holder;
    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
    return new_holder->pointer();
  }

  // A key pthreads uses for looking up per-thread values.
  const pthread_key_t key_;
  const T default_;  // The default value for each thread.

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
};

# define GTEST_IS_THREADSAFE 1

#else  // GTEST_HAS_PTHREAD

// A dummy implementation of synchronization primitives (mutex, lock,
// and thread-local variable).  Necessary for compiling Google Test where
// mutex is not supported - using Google Test in multiple threads is not
// supported on such platforms.

class Mutex {
 public:

  /**
   * Original Code being commented out and replaced
   * with changes made by Mario.
   *

  Mutex() {}
  void Lock() {}
  void Unlock() {}
  void AssertHeld() const {}

  *
  */
  Mutex(): owner_(0), handle_() {
    ::InitializeCriticalSection(&handle_);
  }

  ~Mutex() {
    ::DeleteCriticalSection(&handle_);
  }

  void Lock() {
    ::EnterCriticalSection(&handle_);
    owner_ = ::GetCurrentThreadId();
  }

  void Unlock() {
    ::LeaveCriticalSection(&handle_);
    owner_ = 0;
  }

  // Does nothing if the current thread holds the mutex.
  // Otherwise, crashes with high probability.
  void AssertHeld() const {
    GTEST_CHECK_(owner_ == ::GetCurrentThreadId())
        << "The current thread is not holding the mutex @" << this;
  }

 private:
  DWORD              owner_;
  CRITICAL_SECTION   handle_;
};

// Changing the extern to static in the following statement
// per changes by Mario
# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
  static ::testing::internal::Mutex mutex

// Changing the extern to static in the following statement
// per changes by Mario
# define GTEST_DEFINE_STATIC_MUTEX_(mutex)

class GTestMutexLock {
 public:
  /**
   * Replacing original code with changes made by Mario
  explicit GTestMutexLock(Mutex*) {}  // NOLINT
  */

  explicit GTestMutexLock(Mutex* inMutex) : mutex_(inMutex) {
    mutex_->Lock();
  }

  ~GTestMutexLock() {
    mutex_->Unlock();
  }

 private:
  Mutex* mutex_;
};

typedef GTestMutexLock MutexLock;

class ThreadLocalValueHolderBase {
 public:
  virtual ~ThreadLocalValueHolderBase() {}
};

extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
}

// Implements thread-local storage on windows system.
template <typename T>
class ThreadLocal {
 public:

  /**
   * Replacing original code with changes from Mario
   *
  ThreadLocal() : value_() {}
  explicit ThreadLocal(const T& value) : value_(value) {}
  T* pointer() { return &value_; }
  const T* pointer() const { return &value_; }
  const T& get() const { return value_; }
  void set(const T& value) { value_ = value; }
  */

  ThreadLocal() : key_(CreateKey()),
    default_() {}
  explicit ThreadLocal(const T& value) : key_(CreateKey()),
    default_(value) {}

  ~ThreadLocal() {
    // Destroys the managed object for the current thread, if any.
    DeleteThreadLocalValue(TlsGetValue(key_));

    // Releases resources associated with the key.  This will *not*
    // delete managed objects for other threads.
    GTEST_CHECK_(TlsFree(key_) > 0);
  }

  T* pointer() {
    return GetOrCreateValue();
  }
  const T* pointer() const {
    return GetOrCreateValue();
  }
  const T& get() const {
    return *pointer();
  }
  void set(const T& value) {
    *pointer() = value;
  }

 private:
  // Holds a value of type T.
  class ValueHolder : public ThreadLocalValueHolderBase {
   public:
    explicit ValueHolder(const T& value) : value_(value) {}

    T* pointer() {
      return &value_;
    }

   private:
    T value_;
    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
  };

  static DWORD CreateKey() {
    DWORD key;
    // When a thread exits, DeleteThreadLocalValue() will be called on
    // the object managed for that thread.
    GTEST_CHECK_((key = TlsAlloc()) != TLS_OUT_OF_INDEXES);
    return key;
  }

  T* GetOrCreateValue() const {
    ThreadLocalValueHolderBase* const holder =
      static_cast<ThreadLocalValueHolderBase*>(TlsGetValue(key_));

    if (holder != NULL) {
      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
    }

    ValueHolder* const new_holder = new ValueHolder(default_);
    ThreadLocalValueHolderBase* const holder_base = new_holder;
    GTEST_CHECK_(TlsSetValue(key_, holder_base) != 0);
    return new_holder->pointer();
  }

  // A key pthreads uses for looking up per-thread values.
  const DWORD key_;
  const T default_;  // The default value for each thread.

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
};

// The above synchronization primitives have dummy implementations.
// Therefore Google Test is not thread-safe.
# define GTEST_IS_THREADSAFE 0

#endif  // GTEST_HAS_PTHREAD

// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
GTEST_API_ size_t GetThreadCount();

// Passing non-POD classes through ellipsis (...) crashes the ARM
// compiler and generates a warning in Sun Studio.  The Nokia Symbian
// and the IBM XL C/C++ compiler try to instantiate a copy constructor
// for objects passed through ellipsis (...), failing for uncopyable
// objects.  We define this to ensure that only POD is passed through
// ellipsis on these systems.
#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC)
// We lose support for NULL detection where the compiler doesn't like
// passing non-POD classes through ellipsis (...).
# define GTEST_ELLIPSIS_NEEDS_POD_ 1
#else
# define GTEST_CAN_COMPARE_NULL 1
#endif

// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
// const T& and const T* in a function template.  These compilers
// _can_ decide between class template specializations for T and T*,
// so a tr1::type_traits-like is_pointer works.
#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
# define GTEST_NEEDS_IS_POINTER_ 1
#endif

template <bool bool_value>
struct bool_constant {
  typedef bool_constant<bool_value> type;
  static const bool value = bool_value;
};
template <bool bool_value> const bool bool_constant<bool_value>::value;

typedef bool_constant<false> false_type;
typedef bool_constant<true> true_type;

template <typename T>
struct is_pointer : public false_type {};

template <typename T>
struct is_pointer<T*> : public true_type {};

template <typename Iterator>
struct IteratorTraits {
  typedef typename Iterator::value_type value_type;
};

template <typename T>
struct IteratorTraits<T*> {
  typedef T value_type;
};

template <typename T>
struct IteratorTraits<const T*> {
  typedef T value_type;
};

#if GTEST_OS_WINDOWS
# define GTEST_PATH_SEP_ "\\"
# define GTEST_HAS_ALT_PATH_SEP_ 1
// The biggest signed integer type the compiler supports.
typedef __int64 BiggestInt;
#else
# define GTEST_PATH_SEP_ "/"
# define GTEST_HAS_ALT_PATH_SEP_ 0
typedef long long BiggestInt;  // NOLINT
#endif  // GTEST_OS_WINDOWS

// Utilities for char.

// isspace(int ch) and friends accept an unsigned char or EOF.  char
// may be signed, depending on the compiler (or compiler flags).
// Therefore we need to cast a char to unsigned char before calling
// isspace(), etc.

inline bool IsAlpha(char ch) {
  return isalpha(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsAlNum(char ch) {
  return isalnum(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsDigit(char ch) {
  return isdigit(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsLower(char ch) {
  return islower(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsSpace(char ch) {
  return isspace(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsUpper(char ch) {
  return isupper(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsXDigit(char ch) {
  return isxdigit(static_cast<unsigned char>(ch)) != 0;
}
inline bool IsXDigit(wchar_t ch) {
  const unsigned char low_byte = static_cast<unsigned char>(ch);
  return ch == low_byte && isxdigit(low_byte) != 0;
}

inline char ToLower(char ch) {
  return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
}
inline char ToUpper(char ch) {
  return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
}

// The testing::internal::posix namespace holds wrappers for common
// POSIX functions.  These wrappers hide the differences between
// Windows/MSVC and POSIX systems.  Since some compilers define these
// standard functions as macros, the wrapper cannot have the same name
// as the wrapped function.

namespace posix {

// Functions with a different name on Windows.

#if GTEST_OS_WINDOWS

typedef struct _stat StatStruct;

# ifdef __BORLANDC__
inline int IsATTY(int fd) {
  return isatty(fd);
}
inline int StrCaseCmp(const char* s1, const char* s2) {
  return stricmp(s1, s2);
}
inline char* StrDup(const char* src) {
  return strdup(src);
}
# else  // !__BORLANDC__
#  if GTEST_OS_WINDOWS_MOBILE
inline int IsATTY(int /* fd */) {
  return 0;
}
#  else
inline int IsATTY(int fd) {
  return _isatty(fd);
}
#  endif  // GTEST_OS_WINDOWS_MOBILE
inline int StrCaseCmp(const char* s1, const char* s2) {
  return _stricmp(s1, s2);
}
inline char* StrDup(const char* src) {
  return _strdup(src);
}
# endif  // __BORLANDC__

# if GTEST_OS_WINDOWS_MOBILE
inline int FileNo(FILE* file) {
  return reinterpret_cast<int>(_fileno(file));
}
// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
// time and thus not defined there.
# else
inline int FileNo(FILE* file) {
  return _fileno(file);
}
inline int Stat(const char* path, StatStruct* buf) {
  return _stat(path, buf);
}
inline int RmDir(const char* dir) {
  return _rmdir(dir);
}
inline bool IsDir(const StatStruct& st) {
  return (_S_IFDIR & st.st_mode) != 0;
}
# endif  // GTEST_OS_WINDOWS_MOBILE

#else

typedef struct stat StatStruct;

inline int FileNo(FILE* file) {
  return fileno(file);
}
inline int IsATTY(int fd) {
  return isatty(fd);
}
inline int Stat(const char* path, StatStruct* buf) {
  return stat(path, buf);
}
inline int StrCaseCmp(const char* s1, const char* s2) {
  return strcasecmp(s1, s2);
}
inline char* StrDup(const char* src) {
  return strdup(src);
}
inline int RmDir(const char* dir) {
  return rmdir(dir);
}
inline bool IsDir(const StatStruct& st) {
  return S_ISDIR(st.st_mode);
}

#endif  // GTEST_OS_WINDOWS

// Functions deprecated by MSVC 8.0.

#ifdef _MSC_VER
// Temporarily disable warning 4996 (deprecated function).
# pragma warning(push)
# pragma warning(disable:4996)
#endif

inline const char* StrNCpy(char* dest, const char* src, size_t n) {
  return strncpy(dest, src, n);
}

// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
// StrError() aren't needed on Windows CE at this time and thus not
// defined there.

#if !GTEST_OS_WINDOWS_MOBILE
inline int ChDir(const char* dir) {
  return chdir(dir);
}
#endif
inline FILE* FOpen(const char* path, const char* mode) {
  return fopen(path, mode);
}
#if !GTEST_OS_WINDOWS_MOBILE
inline FILE* FReopen(const char* path, const char* mode, FILE* stream) {
  return freopen(path, mode, stream);
}
inline FILE* FDOpen(int fd, const char* mode) {
  return fdopen(fd, mode);
}
#endif
inline int FClose(FILE* fp) {
  return fclose(fp);
}
#if !GTEST_OS_WINDOWS_MOBILE
inline int Read(int fd, void* buf, unsigned int count) {
  return static_cast<int>(read(fd, buf, count));
}
inline int Write(int fd, const void* buf, unsigned int count) {
  return static_cast<int>(write(fd, buf, count));
}
inline int Close(int fd) {
  return close(fd);
}
inline const char* StrError(int errnum) {
  return strerror(errnum);
}
#endif
inline const char* GetEnv(const char* name) {
#if GTEST_OS_WINDOWS_MOBILE
  // We are on Windows CE, which has no environment variables.
  return NULL;
#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
  // Environment variables which we programmatically clear will be set to the
  // empty string rather than unset (NULL).  Handle that case.
  const char* const env = getenv(name);
  return (env != NULL && env[0] != '\0') ? env : NULL;
#else
  return getenv(name);
#endif
}

#ifdef _MSC_VER
# pragma warning(pop)  // Restores the warning state.
#endif

#if GTEST_OS_WINDOWS_MOBILE
// Windows CE has no C library. The abort() function is used in
// several places in Google Test. This implementation provides a reasonable
// imitation of standard behaviour.
void Abort();
#else
inline void Abort() {
  abort();
}
#endif  // GTEST_OS_WINDOWS_MOBILE

}  // namespace posix

// MSVC "deprecates" snprintf and issues warnings wherever it is used.  In
// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
// MSVC-based platforms.  We map the GTEST_SNPRINTF_ macro to the appropriate
// function in order to achieve that.  We use macro definition here because
// snprintf is a variadic function.
#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
// MSVC 2005 and above support variadic macros.
# define GTEST_SNPRINTF_(buffer, size, format, ...) \
     _snprintf_s(buffer, size, size, format, __VA_ARGS__)
#elif defined(_MSC_VER)
// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't
// complain about _snprintf.
# define GTEST_SNPRINTF_ _snprintf
#else
# define GTEST_SNPRINTF_ snprintf
#endif

// The maximum number a BiggestInt can represent.  This definition
// works no matter BiggestInt is represented in one's complement or
// two's complement.
//
// We cannot rely on numeric_limits in STL, as __int64 and long long
// are not part of standard C++ and numeric_limits doesn't need to be
// defined for them.
const BiggestInt kMaxBiggestInt =
  ~(static_cast<BiggestInt>(1) << (8 * sizeof(BiggestInt) - 1));

// This template class serves as a compile-time function from size to
// type.  It maps a size in bytes to a primitive type with that
// size. e.g.
//
//   TypeWithSize<4>::UInt
//
// is typedef-ed to be unsigned int (unsigned integer made up of 4
// bytes).
//
// Such functionality should belong to STL, but I cannot find it
// there.
//
// Google Test uses this class in the implementation of floating-point
// comparison.
//
// For now it only handles UInt (unsigned int) as that's all Google Test
// needs.  Other types can be easily added in the future if need
// arises.
template <size_t size>
class TypeWithSize {
 public:
  // This prevents the user from using TypeWithSize<N> with incorrect
  // values of N.
  typedef void UInt;
};

// The specialization for size 4.
template <>
class TypeWithSize<4> {
 public:
  // unsigned int has size 4 in both gcc and MSVC.
  //
  // As base/basictypes.h doesn't compile on Windows, we cannot use
  // uint32, uint64, and etc here.
  typedef int Int;
  typedef unsigned int UInt;
};

// The specialization for size 8.
template <>
class TypeWithSize<8> {
 public:
#if GTEST_OS_WINDOWS
  typedef __int64 Int;
  typedef unsigned __int64 UInt;
#else
  typedef long long Int;  // NOLINT
  typedef unsigned long long UInt;  // NOLINT
#endif  // GTEST_OS_WINDOWS
};

// Integer types of known sizes.
typedef TypeWithSize<4>::Int Int32;
typedef TypeWithSize<4>::UInt UInt32;
typedef TypeWithSize<8>::Int Int64;
typedef TypeWithSize<8>::UInt UInt64;
typedef TypeWithSize<8>::Int TimeInMillis;  // Represents time in milliseconds.

// Utilities for command line flags and environment variables.

// Macro for referencing flags.
#define GTEST_FLAG(name) FLAGS_gtest_##name

// Macros for declaring flags.
#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
#define GTEST_DECLARE_int32_(name) \
    GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
#define GTEST_DECLARE_string_(name) \
    GTEST_API_ extern ::std::string GTEST_FLAG(name)

// Macros for defining flags.
#define GTEST_DEFINE_bool_(name, default_val, doc) \
    GTEST_API_ bool GTEST_FLAG(name) = (default_val)
#define GTEST_DEFINE_int32_(name, default_val, doc) \
    GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
#define GTEST_DEFINE_string_(name, default_val, doc) \
    GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)

// Thread annotations
#define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
#define GTEST_LOCK_EXCLUDED_(locks)

// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
// to *value and returns true; otherwise leaves *value unchanged and returns
// false.
// TODO(chandlerc): Find a better way to refactor flag and environment parsing
// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
// function.
bool ParseInt32(const Message& src_text, const char* str, Int32* value);

// Parses a bool/Int32/string from the environment variable
// corresponding to the given Google Test flag.
bool BoolFromGTestEnv(const char* flag, bool default_val);
GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
const char* StringFromGTestEnv(const char* flag, const char* default_val);

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-string.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
//
// The Google C++ Testing Framework (Google Test)
//
// This header file declares the String class and functions used internally by
// Google Test.  They are subject to change without notice. They should not used
// by code external to Google Test.
//
// This header file is #included by <gtest/internal/gtest-internal.h>.
// It should not be #included by other files.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_

#ifdef __BORLANDC__
// string.h is not guaranteed to provide strcpy on C++ Builder.
# include <mem.h>
#endif

#include <string.h>
#include <string>

#include "gtest/internal/gtest-port.h"

namespace testing {
namespace internal {

// String - an abstract class holding static string utilities.
class GTEST_API_ String {
 public:
  // Static utility methods

  // Clones a 0-terminated C string, allocating memory using new.  The
  // caller is responsible for deleting the return value using
  // delete[].  Returns the cloned string, or NULL if the input is
  // NULL.
  //
  // This is different from strdup() in string.h, which allocates
  // memory using malloc().
  static const char* CloneCString(const char* c_str);

#if GTEST_OS_WINDOWS_MOBILE
  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
  // able to pass strings to Win32 APIs on CE we need to convert them
  // to 'Unicode', UTF-16.

  // Creates a UTF-16 wide string from the given ANSI string, allocating
  // memory using new. The caller is responsible for deleting the return
  // value using delete[]. Returns the wide string, or NULL if the
  // input is NULL.
  //
  // The wide string is created using the ANSI codepage (CP_ACP) to
  // match the behaviour of the ANSI versions of Win32 calls and the
  // C runtime.
  static LPCWSTR AnsiToUtf16(const char* c_str);

  // Creates an ANSI string from the given wide string, allocating
  // memory using new. The caller is responsible for deleting the return
  // value using delete[]. Returns the ANSI string, or NULL if the
  // input is NULL.
  //
  // The returned string is created using the ANSI codepage (CP_ACP) to
  // match the behaviour of the ANSI versions of Win32 calls and the
  // C runtime.
  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
#endif

  // Compares two C strings.  Returns true iff they have the same content.
  //
  // Unlike strcmp(), this function can handle NULL argument(s).  A
  // NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool CStringEquals(const char* lhs, const char* rhs);

  // Converts a wide C string to a String using the UTF-8 encoding.
  // NULL will be converted to "(null)".  If an error occurred during
  // the conversion, "(failed to convert from wide string)" is
  // returned.
  static std::string ShowWideCString(const wchar_t* wide_c_str);

  // Compares two wide C strings.  Returns true iff they have the same
  // content.
  //
  // Unlike wcscmp(), this function can handle NULL argument(s).  A
  // NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);

  // Compares two C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike strcasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL C string,
  // including the empty string.
  static bool CaseInsensitiveCStringEquals(const char* lhs,
      const char* rhs);

  // Compares two wide C strings, ignoring case.  Returns true iff they
  // have the same content.
  //
  // Unlike wcscasecmp(), this function can handle NULL argument(s).
  // A NULL C string is considered different to any non-NULL wide C string,
  // including the empty string.
  // NB: The implementations on different platforms slightly differ.
  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
  // environment variable. On GNU platform this method uses wcscasecmp
  // which compares according to LC_CTYPE category of the current locale.
  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
  // current locale.
  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
      const wchar_t* rhs);

  // Returns true iff the given string ends with the given suffix, ignoring
  // case. Any string is considered to end with an empty suffix.
  static bool EndsWithCaseInsensitive(
    const std::string& str, const std::string& suffix);

  // Formats an int value as "%02d".
  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2

  // Formats an int value as "%X".
  static std::string FormatHexInt(int value);

  // Formats a byte as "%02X".
  static std::string FormatByte(unsigned char value);

 private:
  String();  // Not meant to be instantiated.
};  // class String

// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
// character in the buffer is replaced with "\\0".
GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-tuple.h
================================================
// This file was GENERATED by command:
//     pump.py gtest-tuple.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2009 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Implements a subset of TR1 tuple needed by Google Test and Google Mock.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_

#include <utility>  // For ::std::pair.

// The compiler used in Symbian has a bug that prevents us from declaring the
// tuple template as a friend (it complains that tuple is redefined).  This
// hack bypasses the bug by declaring the members that should otherwise be
// private as public.
// Sun Studio versions < 12 also have the above bug.
#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
#else
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
    template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
   private:
#endif

// GTEST_n_TUPLE_(T) is the type of an n-tuple.
#define GTEST_0_TUPLE_(T) tuple<>
#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
    void, void, void>
#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
    void, void, void>
#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
    void, void, void>
#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
    void, void, void>
#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
    void, void, void>
#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
    void, void, void>
#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    void, void, void>
#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, void, void>
#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, T##8, void>
#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
    T##7, T##8, T##9>

// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
#define GTEST_0_TYPENAMES_(T)
#define GTEST_1_TYPENAMES_(T) typename T##0
#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3
#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4
#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5
#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6
#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, \
    typename T##7, typename T##8
#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
    typename T##3, typename T##4, typename T##5, typename T##6, \
    typename T##7, typename T##8, typename T##9

// In theory, defining stuff in the ::std namespace is undefined
// behavior.  We can do this as we are playing the role of a standard
// library vendor.
namespace std {
namespace tr1 {

template <typename T0 = void, typename T1 = void, typename T2 = void,
          typename T3 = void, typename T4 = void, typename T5 = void,
          typename T6 = void, typename T7 = void, typename T8 = void,
          typename T9 = void>
class tuple;

// Anything in namespace gtest_internal is Google Test's INTERNAL
// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
namespace gtest_internal {

// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
template <typename T>
struct ByRef {
  typedef const T& type;
};  // NOLINT
template <typename T>
struct ByRef<T&> {
  typedef T& type;
};  // NOLINT

// A handy wrapper for ByRef.
#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type

// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
// is the same as tr1::add_reference<T>::type.
template <typename T>
struct AddRef {
  typedef T& type;
};  // NOLINT
template <typename T>
struct AddRef<T&> {
  typedef T& type;
};  // NOLINT

// A handy wrapper for AddRef.
#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type

// A helper for implementing get<k>().
template <int k> class Get;

// A helper for implementing tuple_element<k, T>.  kIndexValid is true
// iff k < the number of fields in tuple type T.
template <bool kIndexValid, int kIndex, class Tuple>
struct TupleElement;

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 0, GTEST_10_TUPLE_(T) > {
  typedef T0 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 1, GTEST_10_TUPLE_(T) > {
  typedef T1 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 2, GTEST_10_TUPLE_(T) > {
  typedef T2 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 3, GTEST_10_TUPLE_(T) > {
  typedef T3 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 4, GTEST_10_TUPLE_(T) > {
  typedef T4 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 5, GTEST_10_TUPLE_(T) > {
  typedef T5 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 6, GTEST_10_TUPLE_(T) > {
  typedef T6 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 7, GTEST_10_TUPLE_(T) > {
  typedef T7 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 8, GTEST_10_TUPLE_(T) > {
  typedef T8 type;
};

template <GTEST_10_TYPENAMES_(T)>
struct TupleElement<true, 9, GTEST_10_TUPLE_(T) > {
  typedef T9 type;
};

}  // namespace gtest_internal

template <>
class tuple<> {
 public:
  tuple() {}
  tuple(const tuple& /* t */)  {}
  tuple& operator=(const tuple& /* t */) {
    return *this;
  }
};

template <GTEST_1_TYPENAMES_(T)>
class GTEST_1_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}

  tuple(const tuple & t) : f0_(t.f0_) {}

  template <GTEST_1_TYPENAMES_(U)>
  tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_1_TYPENAMES_(U)>
  tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_1_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
    f0_ = t.f0_;
    return *this;
  }

  T0 f0_;
};

template <GTEST_2_TYPENAMES_(T)>
class GTEST_2_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
    f1_(f1) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_) {}

  template <GTEST_2_TYPENAMES_(U)>
  tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
  template <typename U0, typename U1>
  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_2_TYPENAMES_(U)>
  tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
    return CopyFrom(t);
  }
  template <typename U0, typename U1>
  tuple& operator=(const ::std::pair<U0, U1>& p) {
    f0_ = p.first;
    f1_ = p.second;
    return *this;
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_2_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
};

template <GTEST_3_TYPENAMES_(T)>
class GTEST_3_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}

  template <GTEST_3_TYPENAMES_(U)>
  tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_3_TYPENAMES_(U)>
  tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_3_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
};

template <GTEST_4_TYPENAMES_(T)>
class GTEST_4_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
    f3_(f3) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}

  template <GTEST_4_TYPENAMES_(U)>
  tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
    f3_(t.f3_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_4_TYPENAMES_(U)>
  tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_4_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
};

template <GTEST_5_TYPENAMES_(T)>
class GTEST_5_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
                 GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
    f4_(t.f4_) {}

  template <GTEST_5_TYPENAMES_(U)>
  tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
    f3_(t.f3_), f4_(t.f4_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_5_TYPENAMES_(U)>
  tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_5_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
};

template <GTEST_6_TYPENAMES_(T)>
class GTEST_6_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
                 GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
    f5_(f5) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
    f4_(t.f4_), f5_(t.f5_) {}

  template <GTEST_6_TYPENAMES_(U)>
  tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
    f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_6_TYPENAMES_(U)>
  tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_6_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
};

template <GTEST_7_TYPENAMES_(T)>
class GTEST_7_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
                 GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
    f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
    f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}

  template <GTEST_7_TYPENAMES_(U)>
  tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
    f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_7_TYPENAMES_(U)>
  tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_7_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
};

template <GTEST_8_TYPENAMES_(T)>
class GTEST_8_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
                 GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
                 GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
    f5_(f5), f6_(f6), f7_(f7) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
    f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}

  template <GTEST_8_TYPENAMES_(U)>
  tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
    f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_8_TYPENAMES_(U)>
  tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_8_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
};

template <GTEST_9_TYPENAMES_(T)>
class GTEST_9_TUPLE_(T) {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
                 GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
                 GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
    f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}

  tuple(const tuple & t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
    f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}

  template <GTEST_9_TYPENAMES_(U)>
  tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
    f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}

  tuple& operator=(const tuple & t) {
    return CopyFrom(t);
  }

  template <GTEST_9_TYPENAMES_(U)>
  tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_9_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    f8_ = t.f8_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
  T8 f8_;
};

template <GTEST_10_TYPENAMES_(T)>
class tuple {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
    f9_() {}

  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
                 GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
                 GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
                 GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
    f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}

  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
    f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}

  template <GTEST_10_TYPENAMES_(U)>
  tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
    f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
    f9_(t.f9_) {}

  tuple& operator=(const tuple& t) {
    return CopyFrom(t);
  }

  template <GTEST_10_TYPENAMES_(U)>
  tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_10_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
    f0_ = t.f0_;
    f1_ = t.f1_;
    f2_ = t.f2_;
    f3_ = t.f3_;
    f4_ = t.f4_;
    f5_ = t.f5_;
    f6_ = t.f6_;
    f7_ = t.f7_;
    f8_ = t.f8_;
    f9_ = t.f9_;
    return *this;
  }

  T0 f0_;
  T1 f1_;
  T2 f2_;
  T3 f3_;
  T4 f4_;
  T5 f5_;
  T6 f6_;
  T7 f7_;
  T8 f8_;
  T9 f9_;
};

// 6.1.3.2 Tuple creation functions.

// Known limitations: we don't support passing an
// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
// implement tie().

inline tuple<> make_tuple() {
  return tuple<>();
}

template <GTEST_1_TYPENAMES_(T)>
inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
  return GTEST_1_TUPLE_(T)(f0);
}

template <GTEST_2_TYPENAMES_(T)>
inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
  return GTEST_2_TUPLE_(T)(f0, f1);
}

template <GTEST_3_TYPENAMES_(T)>
inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
  return GTEST_3_TUPLE_(T)(f0, f1, f2);
}

template <GTEST_4_TYPENAMES_(T)>
inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
                                    const T3& f3) {
  return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
}

template <GTEST_5_TYPENAMES_(T)>
inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
                                    const T3& f3, const T4& f4) {
  return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
}

template <GTEST_6_TYPENAMES_(T)>
inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
                                    const T3& f3, const T4& f4, const T5& f5) {
  return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
}

template <GTEST_7_TYPENAMES_(T)>
inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
                                    const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
  return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
}

template <GTEST_8_TYPENAMES_(T)>
inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
                                    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
  return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
}

template <GTEST_9_TYPENAMES_(T)>
inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
                                    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
                                    const T8& f8) {
  return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
}

template <GTEST_10_TYPENAMES_(T)>
inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
                                     const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
                                     const T8& f8, const T9& f9) {
  return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
}

// 6.1.3.3 Tuple helper classes.

template <typename Tuple> struct tuple_size;

template <GTEST_0_TYPENAMES_(T)>
struct tuple_size<GTEST_0_TUPLE_(T) > {
  static const int value = 0;
};

template <GTEST_1_TYPENAMES_(T)>
struct tuple_size<GTEST_1_TUPLE_(T) > {
  static const int value = 1;
};

template <GTEST_2_TYPENAMES_(T)>
struct tuple_size<GTEST_2_TUPLE_(T) > {
  static const int value = 2;
};

template <GTEST_3_TYPENAMES_(T)>
struct tuple_size<GTEST_3_TUPLE_(T) > {
  static const int value = 3;
};

template <GTEST_4_TYPENAMES_(T)>
struct tuple_size<GTEST_4_TUPLE_(T) > {
  static const int value = 4;
};

template <GTEST_5_TYPENAMES_(T)>
struct tuple_size<GTEST_5_TUPLE_(T) > {
  static const int value = 5;
};

template <GTEST_6_TYPENAMES_(T)>
struct tuple_size<GTEST_6_TUPLE_(T) > {
  static const int value = 6;
};

template <GTEST_7_TYPENAMES_(T)>
struct tuple_size<GTEST_7_TUPLE_(T) > {
  static const int value = 7;
};

template <GTEST_8_TYPENAMES_(T)>
struct tuple_size<GTEST_8_TUPLE_(T) > {
  static const int value = 8;
};

template <GTEST_9_TYPENAMES_(T)>
struct tuple_size<GTEST_9_TUPLE_(T) > {
  static const int value = 9;
};

template <GTEST_10_TYPENAMES_(T)>
struct tuple_size<GTEST_10_TUPLE_(T) > {
  static const int value = 10;
};

template <int k, class Tuple>
struct tuple_element {
  typedef typename gtest_internal::TupleElement <
  k < (tuple_size<Tuple>::value), k, Tuple>::type type;
};

#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type

// 6.1.3.4 Element access.

namespace gtest_internal {

template <>
class Get<0> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
  Field(Tuple& t) {
    return t.f0_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
  ConstField(const Tuple& t) {
    return t.f0_;
  }
};

template <>
class Get<1> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
  Field(Tuple& t) {
    return t.f1_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
  ConstField(const Tuple& t) {
    return t.f1_;
  }
};

template <>
class Get<2> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
  Field(Tuple& t) {
    return t.f2_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
  ConstField(const Tuple& t) {
    return t.f2_;
  }
};

template <>
class Get<3> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
  Field(Tuple& t) {
    return t.f3_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
  ConstField(const Tuple& t) {
    return t.f3_;
  }
};

template <>
class Get<4> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
  Field(Tuple& t) {
    return t.f4_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
  ConstField(const Tuple& t) {
    return t.f4_;
  }
};

template <>
class Get<5> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
  Field(Tuple& t) {
    return t.f5_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
  ConstField(const Tuple& t) {
    return t.f5_;
  }
};

template <>
class Get<6> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
  Field(Tuple& t) {
    return t.f6_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
  ConstField(const Tuple& t) {
    return t.f6_;
  }
};

template <>
class Get<7> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
  Field(Tuple& t) {
    return t.f7_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
  ConstField(const Tuple& t) {
    return t.f7_;
  }
};

template <>
class Get<8> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
  Field(Tuple& t) {
    return t.f8_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
  ConstField(const Tuple& t) {
    return t.f8_;
  }
};

template <>
class Get<9> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
  Field(Tuple& t) {
    return t.f9_;  // NOLINT
  }

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
  ConstField(const Tuple& t) {
    return t.f9_;
  }
};

}  // namespace gtest_internal

template <int k, GTEST_10_TYPENAMES_(T)>
GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
get(GTEST_10_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::Field(t);
}

template <int k, GTEST_10_TYPENAMES_(T)>
GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_10_TUPLE_(T)))
get(const GTEST_10_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::ConstField(t);
}

// 6.1.3.5 Relational operators

// We only implement == and !=, as we don't have a need for the rest yet.

namespace gtest_internal {

// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
// first k fields of t1 equals the first k fields of t2.
// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
// k1 != k2.
template <int kSize1, int kSize2>
struct SameSizeTuplePrefixComparator;

template <>
struct SameSizeTuplePrefixComparator<0, 0> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
    return true;
  }
};

template <int k>
struct SameSizeTuplePrefixComparator<k, k> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
    return SameSizeTuplePrefixComparator < k - 1, k - 1 >::Eq(t1, t2) &&
           ::std::tr1::get < k - 1 > (t1) == ::std::tr1::get < k - 1 > (t2);
  }
};

}  // namespace gtest_internal

template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
inline bool operator==(const GTEST_10_TUPLE_(T)& t,
                       const GTEST_10_TUPLE_(U)& u) {
  return gtest_internal::SameSizeTuplePrefixComparator <
         tuple_size<GTEST_10_TUPLE_(T) >::value,
         tuple_size<GTEST_10_TUPLE_(U) >::value >::Eq(t, u);
}

template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
                       const GTEST_10_TUPLE_(U)& u) {
  return !(t == u);
}

// 6.1.4 Pairs.
// Unimplemented.

}  // namespace tr1
}  // namespace std

#undef GTEST_0_TUPLE_
#undef GTEST_1_TUPLE_
#undef GTEST_2_TUPLE_
#undef GTEST_3_TUPLE_
#undef GTEST_4_TUPLE_
#undef GTEST_5_TUPLE_
#undef GTEST_6_TUPLE_
#undef GTEST_7_TUPLE_
#undef GTEST_8_TUPLE_
#undef GTEST_9_TUPLE_
#undef GTEST_10_TUPLE_

#undef GTEST_0_TYPENAMES_
#undef GTEST_1_TYPENAMES_
#undef GTEST_2_TYPENAMES_
#undef GTEST_3_TYPENAMES_
#undef GTEST_4_TYPENAMES_
#undef GTEST_5_TYPENAMES_
#undef GTEST_6_TYPENAMES_
#undef GTEST_7_TYPENAMES_
#undef GTEST_8_TYPENAMES_
#undef GTEST_9_TYPENAMES_
#undef GTEST_10_TYPENAMES_

#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
#undef GTEST_BY_REF_
#undef GTEST_ADD_REF_
#undef GTEST_TUPLE_ELEMENT_

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-tuple.h.pump
================================================
$$ -*- mode: c++; -*-
$var n = 10  $$ Maximum number of tuple fields we want to support.
$$ This meta comment fixes auto-indentation in Emacs. }}
// Copyright 2009 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Implements a subset of TR1 tuple needed by Google Test and Google Mock.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_

#include <utility>  // For ::std::pair.

// The compiler used in Symbian has a bug that prevents us from declaring the
// tuple template as a friend (it complains that tuple is redefined).  This
// hack bypasses the bug by declaring the members that should otherwise be
// private as public.
// Sun Studio versions < 12 also have the above bug.
#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
#else
# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
    template <GTEST_$(n)_TYPENAMES_(U)> friend class tuple; \
   private:
#endif


$range i 0..n-1
$range j 0..n
$range k 1..n
// GTEST_n_TUPLE_(T) is the type of an n-tuple.
#define GTEST_0_TUPLE_(T) tuple<>

$for k [[
$range m 0..k-1
$range m2 k..n-1
#define GTEST_$(k)_TUPLE_(T) tuple<$for m, [[T##$m]]$for m2 [[, void]]>

]]

// GTEST_n_TYPENAMES_(T) declares a list of n typenames.

$for j [[
$range m 0..j-1
#define GTEST_$(j)_TYPENAMES_(T) $for m, [[typename T##$m]]


]]

// In theory, defining stuff in the ::std namespace is undefined
// behavior.  We can do this as we are playing the role of a standard
// library vendor.
namespace std {
namespace tr1 {

template <$for i, [[typename T$i = void]]>
class tuple;

// Anything in namespace gtest_internal is Google Test's INTERNAL
// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
namespace gtest_internal {

// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
template <typename T>
struct ByRef { typedef const T& type; };  // NOLINT
template <typename T>
struct ByRef<T&> { typedef T& type; };  // NOLINT

// A handy wrapper for ByRef.
#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type

// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
// is the same as tr1::add_reference<T>::type.
template <typename T>
struct AddRef { typedef T& type; };  // NOLINT
template <typename T>
struct AddRef<T&> { typedef T& type; };  // NOLINT

// A handy wrapper for AddRef.
#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type

// A helper for implementing get<k>().
template <int k> class Get;

// A helper for implementing tuple_element<k, T>.  kIndexValid is true
// iff k < the number of fields in tuple type T.
template <bool kIndexValid, int kIndex, class Tuple>
struct TupleElement;


$for i [[
template <GTEST_$(n)_TYPENAMES_(T)>
struct TupleElement<true, $i, GTEST_$(n)_TUPLE_(T) > {
  typedef T$i type;
};


]]
}  // namespace gtest_internal

template <>
class tuple<> {
 public:
  tuple() {}
  tuple(const tuple& /* t */)  {}
  tuple& operator=(const tuple& /* t */) { return *this; }
};


$for k [[
$range m 0..k-1
template <GTEST_$(k)_TYPENAMES_(T)>
class $if k < n [[GTEST_$(k)_TUPLE_(T)]] $else [[tuple]] {
 public:
  template <int k> friend class gtest_internal::Get;

  tuple() : $for m, [[f$(m)_()]] {}

  explicit tuple($for m, [[GTEST_BY_REF_(T$m) f$m]]) : [[]]
$for m, [[f$(m)_(f$m)]] {}

  tuple(const tuple& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}

  template <GTEST_$(k)_TYPENAMES_(U)>
  tuple(const GTEST_$(k)_TUPLE_(U)& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}

$if k == 2 [[
  template <typename U0, typename U1>
  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}

]]

  tuple& operator=(const tuple& t) { return CopyFrom(t); }

  template <GTEST_$(k)_TYPENAMES_(U)>
  tuple& operator=(const GTEST_$(k)_TUPLE_(U)& t) {
    return CopyFrom(t);
  }

$if k == 2 [[
  template <typename U0, typename U1>
  tuple& operator=(const ::std::pair<U0, U1>& p) {
    f0_ = p.first;
    f1_ = p.second;
    return *this;
  }

]]

  GTEST_DECLARE_TUPLE_AS_FRIEND_

  template <GTEST_$(k)_TYPENAMES_(U)>
  tuple& CopyFrom(const GTEST_$(k)_TUPLE_(U)& t) {

$for m [[
    f$(m)_ = t.f$(m)_;

]]
    return *this;
  }


$for m [[
  T$m f$(m)_;

]]
};


]]
// 6.1.3.2 Tuple creation functions.

// Known limitations: we don't support passing an
// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
// implement tie().

inline tuple<> make_tuple() { return tuple<>(); }

$for k [[
$range m 0..k-1

template <GTEST_$(k)_TYPENAMES_(T)>
inline GTEST_$(k)_TUPLE_(T) make_tuple($for m, [[const T$m& f$m]]) {
  return GTEST_$(k)_TUPLE_(T)($for m, [[f$m]]);
}

]]

// 6.1.3.3 Tuple helper classes.

template <typename Tuple> struct tuple_size;


$for j [[
template <GTEST_$(j)_TYPENAMES_(T)>
struct tuple_size<GTEST_$(j)_TUPLE_(T) > {
  static const int value = $j;
};


]]
template <int k, class Tuple>
struct tuple_element {
  typedef typename gtest_internal::TupleElement<
      k < (tuple_size<Tuple>::value), k, Tuple>::type type;
};

#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type

// 6.1.3.4 Element access.

namespace gtest_internal {


$for i [[
template <>
class Get<$i> {
 public:
  template <class Tuple>
  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
  Field(Tuple& t) { return t.f$(i)_; }  // NOLINT

  template <class Tuple>
  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
  ConstField(const Tuple& t) { return t.f$(i)_; }
};


]]
}  // namespace gtest_internal

template <int k, GTEST_$(n)_TYPENAMES_(T)>
GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T)))
get(GTEST_$(n)_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::Field(t);
}

template <int k, GTEST_$(n)_TYPENAMES_(T)>
GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_$(n)_TUPLE_(T)))
get(const GTEST_$(n)_TUPLE_(T)& t) {
  return gtest_internal::Get<k>::ConstField(t);
}

// 6.1.3.5 Relational operators

// We only implement == and !=, as we don't have a need for the rest yet.

namespace gtest_internal {

// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
// first k fields of t1 equals the first k fields of t2.
// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
// k1 != k2.
template <int kSize1, int kSize2>
struct SameSizeTuplePrefixComparator;

template <>
struct SameSizeTuplePrefixComparator<0, 0> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
    return true;
  }
};

template <int k>
struct SameSizeTuplePrefixComparator<k, k> {
  template <class Tuple1, class Tuple2>
  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
    return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
        ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
  }
};

}  // namespace gtest_internal

template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
inline bool operator==(const GTEST_$(n)_TUPLE_(T)& t,
                       const GTEST_$(n)_TUPLE_(U)& u) {
  return gtest_internal::SameSizeTuplePrefixComparator<
      tuple_size<GTEST_$(n)_TUPLE_(T) >::value,
      tuple_size<GTEST_$(n)_TUPLE_(U) >::value>::Eq(t, u);
}

template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
inline bool operator!=(const GTEST_$(n)_TUPLE_(T)& t,
                       const GTEST_$(n)_TUPLE_(U)& u) { return !(t == u); }

// 6.1.4 Pairs.
// Unimplemented.

}  // namespace tr1
}  // namespace std


$for j [[
#undef GTEST_$(j)_TUPLE_

]]


$for j [[
#undef GTEST_$(j)_TYPENAMES_

]]

#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
#undef GTEST_BY_REF_
#undef GTEST_ADD_REF_
#undef GTEST_TUPLE_ELEMENT_

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-type-util.h
================================================
// This file was GENERATED by command:
//     pump.py gtest-type-util.h.pump
// DO NOT EDIT BY HAND!!!

// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Type utilities needed for implementing typed and type-parameterized
// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently we support at most 50 types in a list, and at most 50
// type-parameterized tests in one type-parameterized test case.
// Please contact googletestframework@googlegroups.com if you need
// more.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_

#include "gtest/internal/gtest-port.h"

// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
// libstdc++ (which is where cxxabi.h comes from).
# if GTEST_HAS_CXXABI_H_
#  include <cxxabi.h>
# elif defined(__HP_aCC)
#  include <acxx_demangle.h>
# endif  // GTEST_HASH_CXXABI_H_

namespace testing {
namespace internal {

// GetTypeName<T>() returns a human-readable name of type T.
// NB: This function is also used in Google Mock, so don't move it inside of
// the typed-test-only section below.
template <typename T>
std::string GetTypeName() {
# if GTEST_HAS_RTTI

  const char* const name = typeid(T).name();
#  if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
  int status = 0;
  // gcc's implementation of typeid(T).name() mangles the type name,
  // so we have to demangle it.
#   if GTEST_HAS_CXXABI_H_
  using abi::__cxa_demangle;
#   endif  // GTEST_HAS_CXXABI_H_
  char* const readable_name = __cxa_demangle(name, 0, 0, &status);
  const std::string name_str(status == 0 ? readable_name : name);
  free(readable_name);
  return name_str;
#  else
  return name;
#  endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC

# else

  return "<type>";

# endif  // GTEST_HAS_RTTI
}

#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
// type.  This can be used as a compile-time assertion to ensure that
// two types are equal.

template <typename T1, typename T2>
struct AssertTypeEq;

template <typename T>
struct AssertTypeEq<T, T> {
  typedef bool type;
};

// A unique type used as the default value for the arguments of class
// template Types.  This allows us to simulate variadic templates
// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
// support directly.
struct None {};

// The following family of struct and struct templates are used to
// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
// represents a type list with N types (T1, T2, ..., and TN) in it.
// Except for Types0, every struct in the family has two member types:
// Head for the first type in the list, and Tail for the rest of the
// list.

// The empty type list.
struct Types0 {};

// Type lists of length 1, 2, 3, and so on.

template <typename T1>
struct Types1 {
  typedef T1 Head;
  typedef Types0 Tail;
};
template <typename T1, typename T2>
struct Types2 {
  typedef T1 Head;
  typedef Types1<T2> Tail;
};

template <typename T1, typename T2, typename T3>
struct Types3 {
  typedef T1 Head;
  typedef Types2<T2, T3> Tail;
};

template <typename T1, typename T2, typename T3, typename T4>
struct Types4 {
  typedef T1 Head;
  typedef Types3<T2, T3, T4> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5>
struct Types5 {
  typedef T1 Head;
  typedef Types4<T2, T3, T4, T5> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
struct Types6 {
  typedef T1 Head;
  typedef Types5<T2, T3, T4, T5, T6> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
struct Types7 {
  typedef T1 Head;
  typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
struct Types8 {
  typedef T1 Head;
  typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
struct Types9 {
  typedef T1 Head;
  typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
struct Types10 {
  typedef T1 Head;
  typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11>
struct Types11 {
  typedef T1 Head;
  typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12>
struct Types12 {
  typedef T1 Head;
  typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13>
struct Types13 {
  typedef T1 Head;
  typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14>
struct Types14 {
  typedef T1 Head;
  typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15>
struct Types15 {
  typedef T1 Head;
  typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16>
struct Types16 {
  typedef T1 Head;
  typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17>
struct Types17 {
  typedef T1 Head;
  typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18>
struct Types18 {
  typedef T1 Head;
  typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19>
struct Types19 {
  typedef T1 Head;
  typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20>
struct Types20 {
  typedef T1 Head;
  typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21>
struct Types21 {
  typedef T1 Head;
  typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22>
struct Types22 {
  typedef T1 Head;
  typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23>
struct Types23 {
  typedef T1 Head;
  typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24>
struct Types24 {
  typedef T1 Head;
  typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25>
struct Types25 {
  typedef T1 Head;
  typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26>
struct Types26 {
  typedef T1 Head;
  typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27>
struct Types27 {
  typedef T1 Head;
  typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28>
struct Types28 {
  typedef T1 Head;
  typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29>
struct Types29 {
  typedef T1 Head;
  typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30>
struct Types30 {
  typedef T1 Head;
  typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31>
struct Types31 {
  typedef T1 Head;
  typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32>
struct Types32 {
  typedef T1 Head;
  typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33>
struct Types33 {
  typedef T1 Head;
  typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34>
struct Types34 {
  typedef T1 Head;
  typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35>
struct Types35 {
  typedef T1 Head;
  typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36>
struct Types36 {
  typedef T1 Head;
  typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37>
struct Types37 {
  typedef T1 Head;
  typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38>
struct Types38 {
  typedef T1 Head;
  typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39>
struct Types39 {
  typedef T1 Head;
  typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40>
struct Types40 {
  typedef T1 Head;
  typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41>
struct Types41 {
  typedef T1 Head;
  typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42>
struct Types42 {
  typedef T1 Head;
  typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43>
struct Types43 {
  typedef T1 Head;
  typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44>
struct Types44 {
  typedef T1 Head;
  typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
          T44> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45>
struct Types45 {
  typedef T1 Head;
  typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
          T44, T45> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46>
struct Types46 {
  typedef T1 Head;
  typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
          T44, T45, T46> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47>
struct Types47 {
  typedef T1 Head;
  typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
          T44, T45, T46, T47> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48>
struct Types48 {
  typedef T1 Head;
  typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
          T44, T45, T46, T47, T48> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49>
struct Types49 {
  typedef T1 Head;
  typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
          T44, T45, T46, T47, T48, T49> Tail;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49, typename T50>
struct Types50 {
  typedef T1 Head;
  typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
          T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
          T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
          T44, T45, T46, T47, T48, T49, T50> Tail;
};


}  // namespace internal

// We don't want to require the users to write TypesN<...> directly,
// as that would require them to count the length.  Types<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Types<int>
// will appear as Types<int, None, None, ..., None> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Types<T1, ..., TN>, and Google Test will translate
// that to TypesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Types template.
template <typename T1 = internal::None, typename T2 = internal::None,
          typename T3 = internal::None, typename T4 = internal::None,
          typename T5 = internal::None, typename T6 = internal::None,
          typename T7 = internal::None, typename T8 = internal::None,
          typename T9 = internal::None, typename T10 = internal::None,
          typename T11 = internal::None, typename T12 = internal::None,
          typename T13 = internal::None, typename T14 = internal::None,
          typename T15 = internal::None, typename T16 = internal::None,
          typename T17 = internal::None, typename T18 = internal::None,
          typename T19 = internal::None, typename T20 = internal::None,
          typename T21 = internal::None, typename T22 = internal::None,
          typename T23 = internal::None, typename T24 = internal::None,
          typename T25 = internal::None, typename T26 = internal::None,
          typename T27 = internal::None, typename T28 = internal::None,
          typename T29 = internal::None, typename T30 = internal::None,
          typename T31 = internal::None, typename T32 = internal::None,
          typename T33 = internal::None, typename T34 = internal::None,
          typename T35 = internal::None, typename T36 = internal::None,
          typename T37 = internal::None, typename T38 = internal::None,
          typename T39 = internal::None, typename T40 = internal::None,
          typename T41 = internal::None, typename T42 = internal::None,
          typename T43 = internal::None, typename T44 = internal::None,
          typename T45 = internal::None, typename T46 = internal::None,
          typename T47 = internal::None, typename T48 = internal::None,
          typename T49 = internal::None, typename T50 = internal::None>
struct Types {
  typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
};

template <>
struct Types<internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types0 type;
};
template <typename T1>
struct Types<T1, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types1<T1> type;
};
template <typename T1, typename T2>
struct Types<T1, T2, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types2<T1, T2> type;
};
template <typename T1, typename T2, typename T3>
struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types3<T1, T2, T3> type;
};
template <typename T1, typename T2, typename T3, typename T4>
struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types4<T1, T2, T3, T4> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5>
struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types5<T1, T2, T3, T4, T5> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6>
struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7>
struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
          T12> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
          T26> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
          T40> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None, internal::None> {
  typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
         internal::None, internal::None, internal::None, internal::None,
         internal::None, internal::None> {
  typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
         internal::None, internal::None, internal::None, internal::None,
         internal::None> {
  typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44, T45> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
         T46, internal::None, internal::None, internal::None, internal::None> {
  typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44, T45, T46> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
         T46, T47, internal::None, internal::None, internal::None> {
  typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44, T45, T46, T47> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
         T46, T47, T48, internal::None, internal::None> {
  typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44, T45, T46, T47, T48> type;
};
template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49>
struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
         T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
         T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
         T46, T47, T48, T49, internal::None> {
  typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
};

namespace internal {

# define GTEST_TEMPLATE_ template <typename T> class

// The template "selector" struct TemplateSel<Tmpl> is used to
// represent Tmpl, which must be a class template with one type
// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
// as the type Tmpl<T>.  This allows us to actually instantiate the
// template "selected" by TemplateSel<Tmpl>.
//
// This trick is necessary for simulating typedef for class templates,
// which C++ doesn't support directly.
template <GTEST_TEMPLATE_ Tmpl>
struct TemplateSel {
  template <typename T>
  struct Bind {
    typedef Tmpl<T> type;
  };
};

# define GTEST_BIND_(TmplSel, T) \
  TmplSel::template Bind<T>::type

// A unique struct template used as the default value for the
// arguments of class template Templates.  This allows us to simulate
// variadic templates (e.g. Templates<int>, Templates<int, double>,
// and etc), which C++ doesn't support directly.
template <typename T>
struct NoneT {};

// The following family of struct and struct templates are used to
// represent template lists.  In particular, TemplatesN<T1, T2, ...,
// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
// for Templates0, every struct in the family has two member types:
// Head for the selector of the first template in the list, and Tail
// for the rest of the list.

// The empty template list.
struct Templates0 {};

// Template lists of length 1, 2, 3, and so on.

template <GTEST_TEMPLATE_ T1>
struct Templates1 {
  typedef TemplateSel<T1> Head;
  typedef Templates0 Tail;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
struct Templates2 {
  typedef TemplateSel<T1> Head;
  typedef Templates1<T2> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
struct Templates3 {
  typedef TemplateSel<T1> Head;
  typedef Templates2<T2, T3> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4>
struct Templates4 {
  typedef TemplateSel<T1> Head;
  typedef Templates3<T2, T3, T4> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
struct Templates5 {
  typedef TemplateSel<T1> Head;
  typedef Templates4<T2, T3, T4, T5> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
struct Templates6 {
  typedef TemplateSel<T1> Head;
  typedef Templates5<T2, T3, T4, T5, T6> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7>
struct Templates7 {
  typedef TemplateSel<T1> Head;
  typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
struct Templates8 {
  typedef TemplateSel<T1> Head;
  typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
struct Templates9 {
  typedef TemplateSel<T1> Head;
  typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10>
struct Templates10 {
  typedef TemplateSel<T1> Head;
  typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
struct Templates11 {
  typedef TemplateSel<T1> Head;
  typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
struct Templates12 {
  typedef TemplateSel<T1> Head;
  typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13>
struct Templates13 {
  typedef TemplateSel<T1> Head;
  typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
struct Templates14 {
  typedef TemplateSel<T1> Head;
  typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
struct Templates15 {
  typedef TemplateSel<T1> Head;
  typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16>
struct Templates16 {
  typedef TemplateSel<T1> Head;
  typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
struct Templates17 {
  typedef TemplateSel<T1> Head;
  typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
struct Templates18 {
  typedef TemplateSel<T1> Head;
  typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19>
struct Templates19 {
  typedef TemplateSel<T1> Head;
  typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
struct Templates20 {
  typedef TemplateSel<T1> Head;
  typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
struct Templates21 {
  typedef TemplateSel<T1> Head;
  typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22>
struct Templates22 {
  typedef TemplateSel<T1> Head;
  typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
struct Templates23 {
  typedef TemplateSel<T1> Head;
  typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
struct Templates24 {
  typedef TemplateSel<T1> Head;
  typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25>
struct Templates25 {
  typedef TemplateSel<T1> Head;
  typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
struct Templates26 {
  typedef TemplateSel<T1> Head;
  typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
struct Templates27 {
  typedef TemplateSel<T1> Head;
  typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28>
struct Templates28 {
  typedef TemplateSel<T1> Head;
  typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
struct Templates29 {
  typedef TemplateSel<T1> Head;
  typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
struct Templates30 {
  typedef TemplateSel<T1> Head;
  typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31>
struct Templates31 {
  typedef TemplateSel<T1> Head;
  typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
struct Templates32 {
  typedef TemplateSel<T1> Head;
  typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
struct Templates33 {
  typedef TemplateSel<T1> Head;
  typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34>
struct Templates34 {
  typedef TemplateSel<T1> Head;
  typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
struct Templates35 {
  typedef TemplateSel<T1> Head;
  typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
struct Templates36 {
  typedef TemplateSel<T1> Head;
  typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37>
struct Templates37 {
  typedef TemplateSel<T1> Head;
  typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
struct Templates38 {
  typedef TemplateSel<T1> Head;
  typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
struct Templates39 {
  typedef TemplateSel<T1> Head;
  typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40>
struct Templates40 {
  typedef TemplateSel<T1> Head;
  typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
struct Templates41 {
  typedef TemplateSel<T1> Head;
  typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
struct Templates42 {
  typedef TemplateSel<T1> Head;
  typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43>
struct Templates43 {
  typedef TemplateSel<T1> Head;
  typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
struct Templates44 {
  typedef TemplateSel<T1> Head;
  typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43, T44> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
struct Templates45 {
  typedef TemplateSel<T1> Head;
  typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43, T44, T45> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46>
struct Templates46 {
  typedef TemplateSel<T1> Head;
  typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43, T44, T45, T46> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
struct Templates47 {
  typedef TemplateSel<T1> Head;
  typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43, T44, T45, T46, T47> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
struct Templates48 {
  typedef TemplateSel<T1> Head;
  typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43, T44, T45, T46, T47, T48> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
          GTEST_TEMPLATE_ T49>
struct Templates49 {
  typedef TemplateSel<T1> Head;
  typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43, T44, T45, T46, T47, T48, T49> Tail;
};

template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
          GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
struct Templates50 {
  typedef TemplateSel<T1> Head;
  typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
          T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
          T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
          T43, T44, T45, T46, T47, T48, T49, T50> Tail;
};


// We don't want to require the users to write TemplatesN<...> directly,
// as that would require them to count the length.  Templates<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Templates<list>
// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Templates<T1, ..., TN>, and Google Test will translate
// that to TemplatesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Templates template.
template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
          GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
          GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
          GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
          GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
          GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
          GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
          GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
          GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
          GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
          GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
          GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
          GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
          GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
          GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
          GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
          GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
          GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
          GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
          GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
          GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
          GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
          GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
          GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
          GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
struct Templates {
  typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
};

template <>
struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT> {
  typedef Templates0 type;
};
template <GTEST_TEMPLATE_ T1>
struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT> {
  typedef Templates1<T1> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT> {
  typedef Templates2<T1, T2> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates3<T1, T2, T3> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4>
struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates4<T1, T2, T3, T4> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates5<T1, T2, T3, T4, T5> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates6<T1, T2, T3, T4, T5, T6> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7>
struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT> {
  typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT> {
  typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT> {
  typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT> {
  typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT> {
  typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT> {
  typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT> {
  typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT> {
  typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT> {
  typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT> {
  typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT> {
  typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT> {
  typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
         NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43, T44> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
         T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43, T44, T45> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
         T45, T46, NoneT, NoneT, NoneT, NoneT> {
  typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43, T44, T45, T46> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
         T45, T46, T47, NoneT, NoneT, NoneT> {
  typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43, T44, T45, T46, T47> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
         T45, T46, T47, T48, NoneT, NoneT> {
  typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43, T44, T45, T46, T47, T48> type;
};
template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
          GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
          GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
          GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
          GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
          GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
          GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
          GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
          GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
          GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
          GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
          GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
          GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
          GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
          GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
          GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
          GTEST_TEMPLATE_ T49>
struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
         T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
         T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
         T45, T46, T47, T48, T49, NoneT> {
  typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
          T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
          T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
          T42, T43, T44, T45, T46, T47, T48, T49> type;
};

// The TypeList template makes it possible to use either a single type
// or a Types<...> list in TYPED_TEST_CASE() and
// INSTANTIATE_TYPED_TEST_CASE_P().

template <typename T>
struct TypeList {
  typedef Types1<T> type;
};

template <typename T1, typename T2, typename T3, typename T4, typename T5,
          typename T6, typename T7, typename T8, typename T9, typename T10,
          typename T11, typename T12, typename T13, typename T14, typename T15,
          typename T16, typename T17, typename T18, typename T19, typename T20,
          typename T21, typename T22, typename T23, typename T24, typename T25,
          typename T26, typename T27, typename T28, typename T29, typename T30,
          typename T31, typename T32, typename T33, typename T34, typename T35,
          typename T36, typename T37, typename T38, typename T39, typename T40,
          typename T41, typename T42, typename T43, typename T44, typename T45,
          typename T46, typename T47, typename T48, typename T49, typename T50>
struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
         T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
         T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
         T44, T45, T46, T47, T48, T49, T50> > {
  typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
          T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
          T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
          T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
};

#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_


================================================
FILE: rocrtst/gtest/include/gtest/internal/gtest-type-util.h.pump
================================================
$$ -*- mode: c++; -*-
$var n = 50  $$ Maximum length of type lists we want to support.
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Type utilities needed for implementing typed and type-parameterized
// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
//
// Currently we support at most $n types in a list, and at most $n
// type-parameterized tests in one type-parameterized test case.
// Please contact googletestframework@googlegroups.com if you need
// more.

#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_

#include "gtest/internal/gtest-port.h"

// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
// libstdc++ (which is where cxxabi.h comes from).
# if GTEST_HAS_CXXABI_H_
#  include <cxxabi.h>
# elif defined(__HP_aCC)
#  include <acxx_demangle.h>
# endif  // GTEST_HASH_CXXABI_H_

namespace testing {
namespace internal {

// GetTypeName<T>() returns a human-readable name of type T.
// NB: This function is also used in Google Mock, so don't move it inside of
// the typed-test-only section below.
template <typename T>
std::string GetTypeName() {
# if GTEST_HAS_RTTI

  const char* const name = typeid(T).name();
#  if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
  int status = 0;
  // gcc's implementation of typeid(T).name() mangles the type name,
  // so we have to demangle it.
#   if GTEST_HAS_CXXABI_H_
  using abi::__cxa_demangle;
#   endif  // GTEST_HAS_CXXABI_H_
  char* const readable_name = __cxa_demangle(name, 0, 0, &status);
  const std::string name_str(status == 0 ? readable_name : name);
  free(readable_name);
  return name_str;
#  else
  return name;
#  endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC

# else

  return "<type>";

# endif  // GTEST_HAS_RTTI
}

#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
// type.  This can be used as a compile-time assertion to ensure that
// two types are equal.

template <typename T1, typename T2>
struct AssertTypeEq;

template <typename T>
struct AssertTypeEq<T, T> {
  typedef bool type;
};

// A unique type used as the default value for the arguments of class
// template Types.  This allows us to simulate variadic templates
// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
// support directly.
struct None {};

// The following family of struct and struct templates are used to
// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
// represents a type list with N types (T1, T2, ..., and TN) in it.
// Except for Types0, every struct in the family has two member types:
// Head for the first type in the list, and Tail for the rest of the
// list.

// The empty type list.
struct Types0 {};

// Type lists of length 1, 2, 3, and so on.

template <typename T1>
struct Types1 {
  typedef T1 Head;
  typedef Types0 Tail;
};

$range i 2..n

$for i [[
$range j 1..i
$range k 2..i
template <$for j, [[typename T$j]]>
struct Types$i {
  typedef T1 Head;
  typedef Types$(i-1)<$for k, [[T$k]]> Tail;
};


]]

}  // namespace internal

// We don't want to require the users to write TypesN<...> directly,
// as that would require them to count the length.  Types<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Types<int>
// will appear as Types<int, None, None, ..., None> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Types<T1, ..., TN>, and Google Test will translate
// that to TypesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Types template.

$range i 1..n
template <$for i, [[typename T$i = internal::None]]>
struct Types {
  typedef internal::Types$n<$for i, [[T$i]]> type;
};

template <>
struct Types<$for i, [[internal::None]]> {
  typedef internal::Types0 type;
};

$range i 1..n-1
$for i [[
$range j 1..i
$range k i+1..n
template <$for j, [[typename T$j]]>
struct Types<$for j, [[T$j]]$for k[[, internal::None]]> {
  typedef internal::Types$i<$for j, [[T$j]]> type;
};

]]

namespace internal {

# define GTEST_TEMPLATE_ template <typename T> class

// The template "selector" struct TemplateSel<Tmpl> is used to
// represent Tmpl, which must be a class template with one type
// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
// as the type Tmpl<T>.  This allows us to actually instantiate the
// template "selected" by TemplateSel<Tmpl>.
//
// This trick is necessary for simulating typedef for class templates,
// which C++ doesn't support directly.
template <GTEST_TEMPLATE_ Tmpl>
struct TemplateSel {
  template <typename T>
  struct Bind {
    typedef Tmpl<T> type;
  };
};

# define GTEST_BIND_(TmplSel, T) \
  TmplSel::template Bind<T>::type

// A unique struct template used as the default value for the
// arguments of class template Templates.  This allows us to simulate
// variadic templates (e.g. Templates<int>, Templates<int, double>,
// and etc), which C++ doesn't support directly.
template <typename T>
struct NoneT {};

// The following family of struct and struct templates are used to
// represent template lists.  In particular, TemplatesN<T1, T2, ...,
// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
// for Templates0, every struct in the family has two member types:
// Head for the selector of the first template in the list, and Tail
// for the rest of the list.

// The empty template list.
struct Templates0 {};

// Template lists of length 1, 2, 3, and so on.

template <GTEST_TEMPLATE_ T1>
struct Templates1 {
  typedef TemplateSel<T1> Head;
  typedef Templates0 Tail;
};

$range i 2..n

$for i [[
$range j 1..i
$range k 2..i
template <$for j, [[GTEST_TEMPLATE_ T$j]]>
struct Templates$i {
  typedef TemplateSel<T1> Head;
  typedef Templates$(i-1)<$for k, [[T$k]]> Tail;
};


]]

// We don't want to require the users to write TemplatesN<...> directly,
// as that would require them to count the length.  Templates<...> is much
// easier to write, but generates horrible messages when there is a
// compiler error, as gcc insists on printing out each template
// argument, even if it has the default value (this means Templates<list>
// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
// errors).
//
// Our solution is to combine the best part of the two approaches: a
// user would write Templates<T1, ..., TN>, and Google Test will translate
// that to TemplatesN<T1, ..., TN> internally to make error messages
// readable.  The translation is done by the 'type' member of the
// Templates template.

$range i 1..n
template <$for i, [[GTEST_TEMPLATE_ T$i = NoneT]]>
struct Templates {
  typedef Templates$n<$for i, [[T$i]]> type;
};

template <>
struct Templates<$for i, [[NoneT]]> {
  typedef Templates0 type;
};

$range i 1..n-1
$for i [[
$range j 1..i
$range k i+1..n
template <$for j, [[GTEST_TEMPLATE_ T$j]]>
struct Templates<$for j, [[T$j]]$for k[[, NoneT]]> {
  typedef Templates$i<$for j, [[T$j]]> type;
};

]]

// The TypeList template makes it possible to use either a single type
// or a Types<...> list in TYPED_TEST_CASE() and
// INSTANTIATE_TYPED_TEST_CASE_P().

template <typename T>
struct TypeList {
  typedef Types1<T> type;
};


$range i 1..n
template <$for i, [[typename T$i]]>
struct TypeList<Types<$for i, [[T$i]]> > {
  typedef typename Types<$for i, [[T$i]]>::type type;
};

#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P

}  // namespace internal
}  // namespace testing

#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_


================================================
FILE: rocrtst/gtest/src/gtest-all.cpp
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// Google C++ Testing Framework (Google Test)
//
// Sometimes it's desirable to build Google Test by compiling a single file.
// This file serves this purpose.

// This line ensures that gtest.h can be compiled on its own, even
// when it's fused.
#include "gtest/gtest.h"


// The following lines pull in the real gtest *.cc files.
/**
#include "src/gtest.cc"
#include "src/gtest-death-test.cc"
#include "src/gtest-filepath.cc"
#include "src/gtest-port.cc"
#include "src/gtest-printers.cc"
#include "src/gtest-test-part.cc"
#include "src/gtest-typed-test.cc"
**/


================================================
FILE: rocrtst/gtest/src/gtest-death-test.cpp
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev)
//
// This file implements death tests.

#include "gtest/gtest-death-test.h"
#include "gtest/internal/gtest-port.h"

#if GTEST_HAS_DEATH_TEST

# if GTEST_OS_MAC
#  include <crt_externs.h>
# endif  // GTEST_OS_MAC

# include <errno.h>
# include <fcntl.h>
# include <limits.h>

# if GTEST_OS_LINUX
#  include <signal.h>
# endif  // GTEST_OS_LINUX

# include <stdarg.h>

# if GTEST_OS_WINDOWS
#  include <windows.h>
# else
#  include <sys/mman.h>
#  include <sys/wait.h>
# endif  // GTEST_OS_WINDOWS

# if GTEST_OS_QNX
#  include <spawn.h>
# endif  // GTEST_OS_QNX

#endif  // GTEST_HAS_DEATH_TEST

#include "gtest/gtest-message.h"
#include "gtest/internal/gtest-string.h"

// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION_

namespace testing {

// Constants.

// The default death test style.
static const char kDefaultDeathTestStyle[] = "fast";

GTEST_DEFINE_string_(
  death_test_style,
  internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle),
  "Indicates how to run a death test in a forked child process: "
  "\"threadsafe\" (child process re-executes the test binary "
  "from the beginning, running only the specific death test) or "
  "\"fast\" (child process runs the death test immediately "
  "after forking).");

GTEST_DEFINE_bool_(
  death_test_use_fork,
  internal::BoolFromGTestEnv("death_test_use_fork", false),
  "Instructs to use fork()/_exit() instead of clone() in death tests. "
  "Ignored and always uses fork() on POSIX systems where clone() is not "
  "implemented. Useful when running under valgrind or similar tools if "
  "those do not support clone(). Valgrind 3.3.1 will just fail if "
  "it sees an unsupported combination of clone() flags. "
  "It is not recommended to use this flag w/o valgrind though it will "
  "work in 99% of the cases. Once valgrind is fixed, this flag will "
  "most likely be removed.");

namespace internal {
GTEST_DEFINE_string_(
  internal_run_death_test, "",
  "Indicates the file, line number, temporal index of "
  "the single death test to run, and a file descriptor to "
  "which a success code may be sent, all separated by "
  "the '|' characters.  This flag is specified if and only if the current "
  "process is a sub-process launched for running a thread-safe "
  "death test.  FOR INTERNAL USE ONLY.");
}  // namespace internal

#if GTEST_HAS_DEATH_TEST

namespace internal {

// Valid only for fast death tests. Indicates the code is running in the
// child process of a fast style death test.
static bool g_in_fast_death_test_child = false;

// Returns a Boolean value indicating whether the caller is currently
// executing in the context of the death test child process.  Tools such as
// Valgrind heap checkers may need this to modify their behavior in death
// tests.  IMPORTANT: This is an internal utility.  Using it may break the
// implementation of death tests.  User code MUST NOT use it.
bool InDeathTestChild() {
# if GTEST_OS_WINDOWS

  // On Windows, death tests are thread-safe regardless of the value of the
  // death_test_style flag.
  return !GTEST_FLAG(internal_run_death_test).empty();

# else

  if (GTEST_FLAG(death_test_style) == "threadsafe") {
    return !GTEST_FLAG(internal_run_death_test).empty();
  }
  else {
    return g_in_fast_death_test_child;
  }

#endif
}

}  // namespace internal

// ExitedWithCode constructor.
ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {
}

// ExitedWithCode function-call operator.
bool ExitedWithCode::operator()(int exit_status) const {
# if GTEST_OS_WINDOWS

  return exit_status == exit_code_;

# else

  return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;

# endif  // GTEST_OS_WINDOWS
}

# if !GTEST_OS_WINDOWS
// KilledBySignal constructor.
KilledBySignal::KilledBySignal(int signum) : signum_(signum) {
}

// KilledBySignal function-call operator.
bool KilledBySignal::operator()(int exit_status) const {
  return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
}
# endif  // !GTEST_OS_WINDOWS

namespace internal {

// Utilities needed for death tests.

// Generates a textual description of a given exit code, in the format
// specified by wait(2).
static std::string ExitSummary(int exit_code) {
  Message m;

# if GTEST_OS_WINDOWS

  m << "Exited with exit status " << exit_code;

# else

  if (WIFEXITED(exit_code)) {
    m << "Exited with exit status " << WEXITSTATUS(exit_code);
  }
  else if (WIFSIGNALED(exit_code)) {
    m << "Terminated by signal " << WTERMSIG(exit_code);
  }

#  ifdef WCOREDUMP

  if (WCOREDUMP(exit_code)) {
    m << " (core dumped)";
  }

#  endif
# endif  // GTEST_OS_WINDOWS

  return m.GetString();
}

// Returns true if exit_status describes a process that was terminated
// by a signal, or exited normally with a nonzero exit code.
bool ExitedUnsuccessfully(int exit_status) {
  return !ExitedWithCode(0)(exit_status);
}

# if !GTEST_OS_WINDOWS
// Generates a textual failure message when a death test finds more than
// one thread running, or cannot determine the number of threads, prior
// to executing the given statement.  It is the responsibility of the
// caller not to pass a thread_count of 1.
static std::string DeathTestThreadWarning(size_t thread_count) {
  Message msg;
  msg << "Death tests use fork(), which is unsafe particularly"
      << " in a threaded context. For this test, " << GTEST_NAME_ << " ";

  if (thread_count == 0) {
    msg << "couldn't detect the number of threads.";
  }
  else {
    msg << "detected " << thread_count << " threads.";
  }

  return msg.GetString();
}
# endif  // !GTEST_OS_WINDOWS

// Flag characters for reporting a death test that did not die.
static const char kDeathTestLived = 'L';
static const char kDeathTestReturned = 'R';
static const char kDeathTestThrew = 'T';
static const char kDeathTestInternalError = 'I';

// An enumeration describing all of the possible ways that a death test can
// conclude.  DIED means that the process died while executing the test
// code; LIVED means that process lived beyond the end of the test code;
// RETURNED means that the test statement attempted to execute a return
// statement, which is not allowed; THREW means that the test statement
// returned control by throwing an exception.  IN_PROGRESS means the test
// has not yet concluded.
// TODO(vladl@google.com): Unify names and possibly values for
// AbortReason, DeathTestOutcome, and flag characters above.
enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };

// Routine for aborting the program which is safe to call from an
// exec-style death test child process, in which case the error
// message is propagated back to the parent process.  Otherwise, the
// message is simply printed to stderr.  In either case, the program
// then exits with status 1.
void DeathTestAbort(const std::string& message) {
  // On a POSIX system, this function may be called from a threadsafe-style
  // death test child process, which operates on a very small stack.  Use
  // the heap for any additional non-minuscule memory requirements.
  const InternalRunDeathTestFlag* const flag =
    GetUnitTestImpl()->internal_run_death_test_flag();

  if (flag != NULL) {
    FILE* parent = posix::FDOpen(flag->write_fd(), "w");
    fputc(kDeathTestInternalError, parent);
    fprintf(parent, "%s", message.c_str());
    fflush(parent);
    _exit(1);
  }
  else {
    fprintf(stderr, "%s", message.c_str());
    fflush(stderr);
    posix::Abort();
  }
}

// A replacement for CHECK that calls DeathTestAbort if the assertion
// fails.
# define GTEST_DEATH_TEST_CHECK_(expression) \
  do { \
    if (!::testing::internal::IsTrue(expression)) { \
      DeathTestAbort( \
          ::std::string("CHECK failed: File ") + __FILE__ +  ", line " \
          + ::testing::internal::StreamableToString(__LINE__) + ": " \
          + #expression); \
    } \
  } while (::testing::internal::AlwaysFalse())

// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
// evaluating any system call that fulfills two conditions: it must return
// -1 on failure, and set errno to EINTR when it is interrupted and
// should be tried again.  The macro expands to a loop that repeatedly
// evaluates the expression as long as it evaluates to -1 and sets
// errno to EINTR.  If the expression evaluates to -1 but errno is
// something other than EINTR, DeathTestAbort is called.
# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \
  do { \
    int gtest_retval; \
    do { \
      gtest_retval = (expression); \
    } while (gtest_retval == -1 && errno == EINTR); \
    if (gtest_retval == -1) { \
      DeathTestAbort( \
          ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
          + ::testing::internal::StreamableToString(__LINE__) + ": " \
          + #expression + " != -1"); \
    } \
  } while (::testing::internal::AlwaysFalse())

// Returns the message describing the last system error in errno.
std::string GetLastErrnoDescription() {
  return errno == 0 ? "" : posix::StrError(errno);
}

// This is called from a death test parent process to read a failure
// message from the death test child process and log it with the FATAL
// severity. On Windows, the message is read from a pipe handle. On other
// platforms, it is read from a file descriptor.
static void FailFromInternalError(int fd) {
  Message error;
  char buffer[256];
  int num_read;

  do {
    while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
      buffer[num_read] = '\0';
      error << buffer;
    }
  }
  while (num_read == -1 && errno == EINTR);

  if (num_read == 0) {
    GTEST_LOG_(FATAL) << error.GetString();
  }
  else {
    const int last_error = errno;
    GTEST_LOG_(FATAL) << "Error while reading death test internal: "
                      << GetLastErrnoDescription() << " [" << last_error << "]";
  }
}

// Death test constructor.  Increments the running death test count
// for the current test.
DeathTest::DeathTest() {
  TestInfo* const info = GetUnitTestImpl()->current_test_info();

  if (info == NULL) {
    DeathTestAbort("Cannot run a death test outside of a TEST or "
                   "TEST_F construct");
  }
}

// Creates and returns a death test by dispatching to the current
// death test factory.
bool DeathTest::Create(const char* statement, const RE* regex,
                       const char* file, int line, DeathTest** test) {
  return GetUnitTestImpl()->death_test_factory()->Create(
           statement, regex, file, line, test);
}

const char* DeathTest::LastMessage() {
  return last_death_test_message_.c_str();
}

void DeathTest::set_last_death_test_message(const std::string& message) {
  last_death_test_message_ = message;
}

std::string DeathTest::last_death_test_message_;

// Provides cross platform implementation for some death functionality.
class DeathTestImpl : public DeathTest {
 protected:
  DeathTestImpl(const char* a_statement, const RE* a_regex)
    : statement_(a_statement),
      regex_(a_regex),
      spawned_(false),
      status_(-1),
      outcome_(IN_PROGRESS),
      read_fd_(-1),
      write_fd_(-1) {}

  // read_fd_ is expected to be closed and cleared by a derived class.
  ~DeathTestImpl() {
    GTEST_DEATH_TEST_CHECK_(read_fd_ == -1);
  }

  void Abort(AbortReason reason);
  virtual bool Passed(bool status_ok);

  const char* statement() const {
    return statement_;
  }
  const RE* regex() const {
    return regex_;
  }
  bool spawned() const {
    return spawned_;
  }
  void set_spawned(bool is_spawned) {
    spawned_ = is_spawned;
  }
  int status() const {
    return status_;
  }
  void set_status(int a_status) {
    status_ = a_status;
  }
  DeathTestOutcome outcome() const {
    return outcome_;
  }
  void set_outcome(DeathTestOutcome an_outcome) {
    outcome_ = an_outcome;
  }
  int read_fd() const {
    return read_fd_;
  }
  void set_read_fd(int fd) {
    read_fd_ = fd;
  }
  int write_fd() const {
    return write_fd_;
  }
  void set_write_fd(int fd) {
    write_fd_ = fd;
  }

  // Called in the parent process only. Reads the result code of the death
  // test child process via a pipe, interprets it to set the outcome_
  // member, and closes read_fd_.  Outputs diagnostics and terminates in
  // case of unexpected codes.
  void ReadAndInterpretStatusByte();

 private:
  // The textual content of the code this object is testing.  This class
  // doesn't own this string and should not attempt to delete it.
  const char* const statement_;
  // The regular expression which test output must match.  DeathTestImpl
  // doesn't own this object and should not attempt to delete it.
  const RE* const regex_;
  // True if the death test child process has been successfully spawned.
  bool spawned_;
  // The exit status of the child process.
  int status_;
  // How the death test concluded.
  DeathTestOutcome outcome_;
  // Descriptor to the read end of the pipe to the child process.  It is
  // always -1 in the child process.  The child keeps its write end of the
  // pipe in write_fd_.
  int read_fd_;
  // Descriptor to the child's write end of the pipe to the parent process.
  // It is always -1 in the parent process.  The parent keeps its end of the
  // pipe in read_fd_.
  int write_fd_;
};

// Called in the parent process only. Reads the result code of the death
// test child process via a pipe, interprets it to set the outcome_
// member, and closes read_fd_.  Outputs diagnostics and terminates in
// case of unexpected codes.
void DeathTestImpl::ReadAndInterpretStatusByte() {
  char flag;
  int bytes_read;

  // The read() here blocks until data is available (signifying the
  // failure of the death test) or until the pipe is closed (signifying
  // its success), so it's okay to call this in the parent before
  // the child process has exited.
  do {
    bytes_read = posix::Read(read_fd(), &flag, 1);
  }
  while (bytes_read == -1 && errno == EINTR);

  if (bytes_read == 0) {
    set_outcome(DIED);
  }
  else if (bytes_read == 1) {
    switch (flag) {
      case kDeathTestReturned:
        set_outcome(RETURNED);
        break;

      case kDeathTestThrew:
        set_outcome(THREW);
        break;

      case kDeathTestLived:
        set_outcome(LIVED);
        break;

      case kDeathTestInternalError:
        FailFromInternalError(read_fd());  // Does not return.
        break;

      default:
        GTEST_LOG_(FATAL) << "Death test child process reported "
                          << "unexpected status byte ("
                          << static_cast<unsigned int>(flag) << ")";
    }
  }
  else {
    GTEST_LOG_(FATAL) << "Read from death test child process failed: "
                      << GetLastErrnoDescription();
  }

  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
  set_read_fd(-1);
}

// Signals that the death test code which should have exited, didn't.
// Should be called only in a death test child process.
// Writes a status byte to the child's status file descriptor, then
// calls _exit(1).
void DeathTestImpl::Abort(AbortReason reason) {
  // The parent process considers the death test to be a failure if
  // it finds any data in our pipe.  So, here we write a single flag byte
  // to the pipe, then exit.
  const char status_ch =
    reason == TEST_DID_NOT_DIE ? kDeathTestLived :
    reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned;

  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
  // We are leaking the descriptor here because on some platforms (i.e.,
  // when built as Windows DLL), destructors of global objects will still
  // run after calling _exit(). On such systems, write_fd_ will be
  // indirectly closed from the destructor of UnitTestImpl, causing double
  // close if it is also closed here. On debug configurations, double close
  // may assert. As there are no in-process buffers to flush here, we are
  // relying on the OS to close the descriptor after the process terminates
  // when the destructors are not run.
  _exit(1);  // Exits w/o any normal exit hooks (we were supposed to crash)
}

// Returns an indented copy of stderr output for a death test.
// This makes distinguishing death test output lines from regular log lines
// much easier.
static ::std::string FormatDeathTestOutput(const ::std::string& output) {
  ::std::string ret;

  for (size_t at = 0; ; ) {
    const size_t line_end = output.find('\n', at);
    ret += "[  DEATH   ] ";

    if (line_end == ::std::string::npos) {
      ret += output.substr(at);
      break;
    }

    ret += output.substr(at, line_end + 1 - at);
    at = line_end + 1;
  }

  return ret;
}

// Assesses the success or failure of a death test, using both private
// members which have previously been set, and one argument:
//
// Private data members:
//   outcome:  An enumeration describing how the death test
//             concluded: DIED, LIVED, THREW, or RETURNED.  The death test
//             fails in the latter three cases.
//   status:   The exit status of the child process. On *nix, it is in the
//             in the format specified by wait(2). On Windows, this is the
//             value supplied to the ExitProcess() API or a numeric code
//             of the exception that terminated the program.
//   regex:    A regular expression object to be applied to
//             the test's captured standard error output; the death test
//             fails if it does not match.
//
// Argument:
//   status_ok: true if exit_status is acceptable in the context of
//              this particular death test, which fails if it is false
//
// Returns true iff all of the above conditions are met.  Otherwise, the
// first failing condition, in the order given above, is the one that is
// reported. Also sets the last death test message string.
bool DeathTestImpl::Passed(bool status_ok) {
  if (!spawned()) {
    return false;
  }

  const std::string error_message = GetCapturedStderr();

  bool success = false;
  Message buffer;

  buffer << "Death test: " << statement() << "\n";

  switch (outcome()) {
    case LIVED:
      buffer << "    Result: failed to die.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;

    case THREW:
      buffer << "    Result: threw an exception.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;

    case RETURNED:
      buffer << "    Result: illegal return in test statement.\n"
             << " Error msg:\n" << FormatDeathTestOutput(error_message);
      break;

    case DIED:
      if (status_ok) {
        const bool matched = RE::PartialMatch(error_message.c_str(), *regex());

        if (matched) {
          success = true;
        }
        else {
          buffer << "    Result: died but not with expected error.\n"
                 << "  Expected: " << regex()->pattern() << "\n"
                 << "Actual msg:\n" << FormatDeathTestOutput(error_message);
        }
      }
      else {
        buffer << "    Result: died but not with expected exit code:\n"
               << "            " << ExitSummary(status()) << "\n"
               << "Actual msg:\n" << FormatDeathTestOutput(error_message);
      }

      break;

    case IN_PROGRESS:
    default:
      GTEST_LOG_(FATAL)
          << "DeathTest::Passed somehow called before conclusion of test";
  }

  DeathTest::set_last_death_test_message(buffer.GetString());
  return success;
}

# if GTEST_OS_WINDOWS
// WindowsDeathTest implements death tests on Windows. Due to the
// specifics of starting new processes on Windows, death tests there are
// always threadsafe, and Google Test considers the
// --gtest_death_test_style=fast setting to be equivalent to
// --gtest_death_test_style=threadsafe there.
//
// A few implementation notes:  Like the Linux version, the Windows
// implementation uses pipes for child-to-parent communication. But due to
// the specifics of pipes on Windows, some extra steps are required:
//
// 1. The parent creates a communication pipe and stores handles to both
//    ends of it.
// 2. The parent starts the child and provides it with the information
//    necessary to acquire the handle to the write end of the pipe.
// 3. The child acquires the write end of the pipe and signals the parent
//    using a Windows event.
// 4. Now the parent can release the write end of the pipe on its side. If
//    this is done before step 3, the object's reference count goes down to
//    0 and it is destroyed, preventing the child from acquiring it. The
//    parent now has to release it, or read operations on the read end of
//    the pipe will not return when the child terminates.
// 5. The parent reads child's output through the pipe (outcome code and
//    any possible error messages) from the pipe, and its stderr and then
//    determines whether to fail the test.
//
// Note: to distinguish Win32 API calls from the local method and function
// calls, the former are explicitly resolved in the global namespace.
//
class WindowsDeathTest : public DeathTestImpl {
 public:
  WindowsDeathTest(const char* a_statement,
                   const RE* a_regex,
                   const char* file,
                   int line)
    : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {}

  // All of these virtual functions are inherited from DeathTest.
  virtual int Wait();
  virtual TestRole AssumeRole();

 private:
  // The name of the file in which the death test is located.
  const char* const file_;
  // The line number on which the death test is located.
  const int line_;
  // Handle to the write end of the pipe to the child process.
  AutoHandle write_handle_;
  // Child process handle.
  AutoHandle child_handle_;
  // Event the child process uses to signal the parent that it has
  // acquired the handle to the write end of the pipe. After seeing this
  // event the parent can release its own handles to make sure its
  // ReadFile() calls return when the child terminates.
  AutoHandle event_handle_;
};

// Waits for the child in a death test to exit, returning its exit
// status, or 0 if no child process exists.  As a side effect, sets the
// outcome data member.
int WindowsDeathTest::Wait() {
  if (!spawned()) {
    return 0;
  }

  // Wait until the child either signals that it has acquired the write end
  // of the pipe or it dies.
  const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() };

  switch (::WaitForMultipleObjects(2,
                                   wait_handles,
                                   FALSE,  // Waits for any of the handles.
                                   INFINITE)) {
    case WAIT_OBJECT_0:
    case WAIT_OBJECT_0 + 1:
      break;

    default:
      GTEST_DEATH_TEST_CHECK_(false);  // Should not get here.
  }

  // The child has acquired the write end of the pipe or exited.
  // We release the handle on our side and continue.
  write_handle_.Reset();
  event_handle_.Reset();

  ReadAndInterpretStatusByte();

  // Waits for the child process to exit if it haven't already. This
  // returns immediately if the child has already exited, regardless of
  // whether previous calls to WaitForMultipleObjects synchronized on this
  // handle or not.
  GTEST_DEATH_TEST_CHECK_(
    WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(),
        INFINITE));
  DWORD status_code;
  GTEST_DEATH_TEST_CHECK_(
    ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE);
  child_handle_.Reset();
  set_status(static_cast<int>(status_code));
  return status();
}

// The AssumeRole process for a Windows death test.  It creates a child
// process with the same executable as the current process to run the
// death test.  The child process is given the --gtest_filter and
// --gtest_internal_run_death_test flags such that it knows to run the
// current death test only.
DeathTest::TestRole WindowsDeathTest::AssumeRole() {
  const UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
    impl->internal_run_death_test_flag();
  const TestInfo* const info = impl->current_test_info();
  const int death_test_index = info->result()->death_test_count();

  if (flag != NULL) {
    // ParseInternalRunDeathTestFlag() has performed all the necessary
    // processing.
    set_write_fd(flag->write_fd());
    return EXECUTE_TEST;
  }

  // WindowsDeathTest uses an anonymous pipe to communicate results of
  // a death test.
  SECURITY_ATTRIBUTES handles_are_inheritable = {
    sizeof(SECURITY_ATTRIBUTES), NULL, TRUE
  };
  HANDLE read_handle, write_handle;
  GTEST_DEATH_TEST_CHECK_(
    ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable,
                 0)  // Default buffer size.
    != FALSE);
  set_read_fd(::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle),
                                O_RDONLY));
  write_handle_.Reset(write_handle);
  event_handle_.Reset(::CreateEvent(
                        &handles_are_inheritable,
                        TRUE,    // The event will automatically reset to non-signaled state.
                        FALSE,   // The initial state is non-signalled.
                        NULL));  // The even is unnamed.
  GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL);
  const std::string filter_flag =
    std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" +
    info->test_case_name() + "." + info->name();
  const std::string internal_flag =
    std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag +
    "=" + file_ + "|" + StreamableToString(line_) + "|" +
    StreamableToString(death_test_index) + "|" +
    StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
    // size_t has the same width as pointers on both 32-bit and 64-bit
    // Windows platforms.
    // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
    "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) +
    "|" + StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));

  char executable_path[_MAX_PATH + 1];  // NOLINT
  GTEST_DEATH_TEST_CHECK_(
    _MAX_PATH + 1 != ::GetModuleFileNameA(NULL,
                                          executable_path,
                                          _MAX_PATH));

  std::string command_line =
    std::string(::GetCommandLineA()) + " " + filter_flag + " \"" +
    internal_flag + "\"";

  DeathTest::set_last_death_test_message("");

  CaptureStderr();
  // Flush the log buffers since the log streams are shared with the child.
  FlushInfoLog();

  // The child process will share the standard handles with the parent.
  STARTUPINFOA startup_info;
  memset(&startup_info, 0, sizeof(STARTUPINFO));
  startup_info.dwFlags = STARTF_USESTDHANDLES;
  startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
  startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
  startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);

  PROCESS_INFORMATION process_info;
  GTEST_DEATH_TEST_CHECK_(::CreateProcessA(
                            executable_path,
                            const_cast<char*>(command_line.c_str()),
                            NULL,   // Retuned process handle is not inheritable.
                            NULL,   // Retuned thread handle is not inheritable.
                            TRUE,   // Child inherits all inheritable handles (for write_handle_).
                            0x0,    // Default creation flags.
                            NULL,   // Inherit the parent's environment.
                            UnitTest::GetInstance()->original_working_dir(),
                            &startup_info,
                            &process_info) != FALSE);
  child_handle_.Reset(process_info.hProcess);
  ::CloseHandle(process_info.hThread);
  set_spawned(true);
  return OVERSEE_TEST;
}
# else  // We are not on Windows.

// ForkingDeathTest provides implementations for most of the abstract
// methods of the DeathTest interface.  Only the AssumeRole method is
// left undefined.
class ForkingDeathTest : public DeathTestImpl {
 public:
  ForkingDeathTest(const char* statement, const RE* regex);

  // All of these virtual functions are inherited from DeathTest.
  virtual int Wait();

 protected:
  void set_child_pid(pid_t child_pid) {
    child_pid_ = child_pid;
  }

 private:
  // PID of child process during death test; 0 in the child process itself.
  pid_t child_pid_;
};

// Constructs a ForkingDeathTest.
ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex)
  : DeathTestImpl(a_statement, a_regex),
    child_pid_(-1) {}

// Waits for the child in a death test to exit, returning its exit
// status, or 0 if no child process exists.  As a side effect, sets the
// outcome data member.
int ForkingDeathTest::Wait() {
  if (!spawned()) {
    return 0;
  }

  ReadAndInterpretStatusByte();

  int status_value;
  GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
  set_status(status_value);
  return status_value;
}

// A concrete death test class that forks, then immediately runs the test
// in the child process.
class NoExecDeathTest : public ForkingDeathTest {
 public:
  NoExecDeathTest(const char* a_statement, const RE* a_regex) :
    ForkingDeathTest(a_statement, a_regex) { }
  virtual TestRole AssumeRole();
};

// The AssumeRole process for a fork-and-run death test.  It implements a
// straightforward fork, with a simple pipe to transmit the status byte.
DeathTest::TestRole NoExecDeathTest::AssumeRole() {
  const size_t thread_count = GetThreadCount();

  if (thread_count != 1) {
    GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
  }

  int pipe_fd[2];
  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);

  DeathTest::set_last_death_test_message("");
  CaptureStderr();
  // When we fork the process below, the log file buffers are copied, but the
  // file descriptors are shared.  We flush all log files here so that closing
  // the file descriptors in the child process doesn't throw off the
  // synchronization between descriptors and buffers in the parent process.
  // This is as close to the fork as possible to avoid a race condition in case
  // there are multiple threads running before the death test, and another
  // thread writes to the log file.
  FlushInfoLog();

  const pid_t child_pid = fork();
  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
  set_child_pid(child_pid);

  if (child_pid == 0) {
    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
    set_write_fd(pipe_fd[1]);
    // Redirects all logging to stderr in the child process to prevent
    // concurrent writes to the log files.  We capture stderr in the parent
    // process and append the child process' output to a log.
    LogToStderr();
    // Event forwarding to the listeners of event listener API mush be shut
    // down in death test subprocesses.
    GetUnitTestImpl()->listeners()->SuppressEventForwarding();
    g_in_fast_death_test_child = true;
    return EXECUTE_TEST;
  }
  else {
    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
    set_read_fd(pipe_fd[0]);
    set_spawned(true);
    return OVERSEE_TEST;
  }
}

// A concrete death test class that forks and re-executes the main
// program from the beginning, with command-line flags set that cause
// only this specific death test to be run.
class ExecDeathTest : public ForkingDeathTest {
 public:
  ExecDeathTest(const char* a_statement, const RE* a_regex,
                const char* file, int line) :
    ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
  virtual TestRole AssumeRole();
 private:
  static ::std::vector<testing::internal::string>
  GetArgvsForDeathTestChildProcess() {
    ::std::vector<testing::internal::string> args = GetInjectableArgvs();
    return args;
  }
  // The name of the file in which the death test is located.
  const char* const file_;
  // The line number on which the death test is located.
  const int line_;
};

// Utility class for accumulating command-line arguments.
class Arguments {
 public:
  Arguments() {
    args_.push_back(NULL);
  }

  ~Arguments() {
    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
         ++i) {
      free(*i);
    }
  }
  void AddArgument(const char* argument) {
    args_.insert(args_.end() - 1, posix::StrDup(argument));
  }

  template <typename Str>
  void AddArguments(const ::std::vector<Str>& arguments) {
    for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
         i != arguments.end();
         ++i) {
      args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
    }
  }
  char* const* Argv() {
    return &args_[0];
  }

 private:
  std::vector<char*> args_;
};

// A struct that encompasses the arguments to the child process of a
// threadsafe-style death test process.
struct ExecDeathTestArgs {
  char* const* argv;  // Command-line arguments for the child's call to exec
  int close_fd;       // File descriptor to close; the read end of a pipe
};

#  if GTEST_OS_MAC
inline char** GetEnviron() {
  // When Google Test is built as a framework on MacOS X, the environ variable
  // is unavailable. Apple's documentation (man environ) recommends using
  // _NSGetEnviron() instead.
  return *_NSGetEnviron();
}
#  else
// Some POSIX platforms expect you to declare environ. extern "C" makes
// it reside in the global namespace.
extern "C" char** environ;
inline char** GetEnviron() {
  return environ;
}
#  endif  // GTEST_OS_MAC

#  if !GTEST_OS_QNX
// The main function for a threadsafe-style death test child process.
// This function is called in a clone()-ed process and thus must avoid
// any potentially unsafe operations like malloc or libc functions.
static int ExecDeathTestChildMain(void* child_arg) {
  ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));

  // We need to execute the test program in the same environment where
  // it was originally invoked.  Therefore we change to the original
  // working directory first.
  const char* const original_dir =
    UnitTest::GetInstance()->original_working_dir();

  // We can safely call chdir() as it's a direct system call.
  if (chdir(original_dir) != 0) {
    DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
                   GetLastErrnoDescription());
    return EXIT_FAILURE;
  }

  // We can safely call execve() as it's a direct system call.  We
  // cannot use execvp() as it's a libc function and thus potentially
  // unsafe.  Since execve() doesn't search the PATH, the user must
  // invoke the test program via a valid path that contains at least
  // one path separator.
  execve(args->argv[0], args->argv, GetEnviron());
  DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " +
                 original_dir + " failed: " +
                 GetLastErrnoDescription());
  return EXIT_FAILURE;
}
#  endif  // !GTEST_OS_QNX

// Two utility routines that together determine the direction the stack
// grows.
// This could be accomplished more elegantly by a single recursive
// function, but we want to guard against the unlikely possibility of
// a smart compiler optimizing the recursion away.
//
// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
// StackLowerThanAddress into StackGrowsDown, which then doesn't give
// correct answer.
void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_;
void StackLowerThanAddress(const void* ptr, bool* result) {
  int dummy;
  *result = (&dummy < ptr);
}

bool StackGrowsDown() {
  int dummy;
  bool result;
  StackLowerThanAddress(&dummy, &result);
  return result;
}

// Spawns a child process with the same executable as the current process in
// a thread-safe manner and instructs it to run the death test.  The
// implementation uses fork(2) + exec.  On systems where clone(2) is
// available, it is used instead, being slightly more thread-safe.  On QNX,
// fork supports only single-threaded environments, so this function uses
// spawn(2) there instead.  The function dies with an error message if
// anything goes wrong.
static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
  ExecDeathTestArgs args = { argv, close_fd };
  pid_t child_pid = -1;

#  if GTEST_OS_QNX
  // Obtains the current directory and sets it to be closed in the child
  // process.
  const int cwd_fd = open(".", O_RDONLY);
  GTEST_DEATH_TEST_CHECK_(cwd_fd != -1);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC));
  // We need to execute the test program in the same environment where
  // it was originally invoked.  Therefore we change to the original
  // working directory first.
  const char* const original_dir =
    UnitTest::GetInstance()->original_working_dir();

  // We can safely call chdir() as it's a direct system call.
  if (chdir(original_dir) != 0) {
    DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
                   GetLastErrnoDescription());
    return EXIT_FAILURE;
  }

  int fd_flags;
  // Set close_fd to be closed after spawn.
  GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD));
  GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD,
                                        fd_flags | FD_CLOEXEC));
  struct inheritance inherit = {0};
  // spawn is a system call.
  child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron());
  // Restores the current working directory.
  GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));

#  else   // GTEST_OS_QNX
#   if GTEST_OS_LINUX
  // When a SIGPROF signal is received while fork() or clone() are executing,
  // the process may hang. To avoid this, we ignore SIGPROF here and re-enable
  // it after the call to fork()/clone() is complete.
  struct sigaction saved_sigprof_action;
  struct sigaction ignore_sigprof_action;
  memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action));
  sigemptyset(&ignore_sigprof_action.sa_mask);
  ignore_sigprof_action.sa_handler = SIG_IGN;
  GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction(
                                    SIGPROF, &ignore_sigprof_action, &saved_sigprof_action));
#   endif  // GTEST_OS_LINUX

#   if GTEST_HAS_CLONE
  const bool use_fork = GTEST_FLAG(death_test_use_fork);

  if (!use_fork) {
    static const bool stack_grows_down = StackGrowsDown();
    const size_t stack_size = getpagesize();
    // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
    void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
                             MAP_ANON | MAP_PRIVATE, -1, 0);
    GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);

    // Maximum stack alignment in bytes:  For a downward-growing stack, this
    // amount is subtracted from size of the stack space to get an address
    // that is within the stack space and is aligned on all systems we care
    // about.  As far as I know there is no ABI with stack alignment greater
    // than 64.  We assume stack and stack_size already have alignment of
    // kMaxStackAlignment.
    const size_t kMaxStackAlignment = 64;
    void* const stack_top =
      static_cast<char*>(stack) +
      (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
    GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment &&
                            reinterpret_cast<intptr_t>(stack_top) % kMaxStackAlignment == 0);

    child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);

    GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
  }

#   else
  const bool use_fork = true;
#   endif  // GTEST_HAS_CLONE

  if (use_fork && (child_pid = fork()) == 0) {
    ExecDeathTestChildMain(&args);
    _exit(0);
  }

#  endif  // GTEST_OS_QNX
#  if GTEST_OS_LINUX
  GTEST_DEATH_TEST_CHECK_SYSCALL_(
    sigaction(SIGPROF, &saved_sigprof_action, NULL));
#  endif  // GTEST_OS_LINUX

  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
  return child_pid;
}

// The AssumeRole process for a fork-and-exec death test.  It re-executes the
// main program from the beginning, setting the --gtest_filter
// and --gtest_internal_run_death_test flags to cause only the current
// death test to be re-run.
DeathTest::TestRole ExecDeathTest::AssumeRole() {
  const UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
    impl->internal_run_death_test_flag();
  const TestInfo* const info = impl->current_test_info();
  const int death_test_index = info->result()->death_test_count();

  if (flag != NULL) {
    set_write_fd(flag->write_fd());
    return EXECUTE_TEST;
  }

  int pipe_fd[2];
  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
  // Clear the close-on-exec flag on the write end of the pipe, lest
  // it be closed when the child process does an exec:
  GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);

  const std::string filter_flag =
    std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "="
    + info->test_case_name() + "." + info->name();
  const std::string internal_flag =
    std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "="
    + file_ + "|" + StreamableToString(line_) + "|"
    + StreamableToString(death_test_index) + "|"
    + StreamableToString(pipe_fd[1]);
  Arguments args;
  args.AddArguments(GetArgvsForDeathTestChildProcess());
  args.AddArgument(filter_flag.c_str());
  args.AddArgument(internal_flag.c_str());

  DeathTest::set_last_death_test_message("");

  CaptureStderr();
  // See the comment in NoExecDeathTest::AssumeRole for why the next line
  // is necessary.
  FlushInfoLog();

  const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]);
  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
  set_child_pid(child_pid);
  set_read_fd(pipe_fd[0]);
  set_spawned(true);
  return OVERSEE_TEST;
}

# endif  // !GTEST_OS_WINDOWS

// Creates a concrete DeathTest-derived class that depends on the
// --gtest_death_test_style flag, and sets the pointer pointed to
// by the "test" argument to its address.  If the test should be
// skipped, sets that pointer to NULL.  Returns true, unless the
// flag is set to an invalid value.
bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
                                     const char* file, int line,
                                     DeathTest** test) {
  UnitTestImpl* const impl = GetUnitTestImpl();
  const InternalRunDeathTestFlag* const flag =
    impl->internal_run_death_test_flag();
  const int death_test_index = impl->current_test_info()
                               ->increment_death_test_count();

  if (flag != NULL) {
    if (death_test_index > flag->index()) {
      DeathTest::set_last_death_test_message(
        "Death test count (" + StreamableToString(death_test_index)
        + ") somehow exceeded expected maximum ("
        + StreamableToString(flag->index()) + ")");
      return false;
    }

    if (!(flag->file() == file && flag->line() == line &&
          flag->index() == death_test_index)) {
      *test = NULL;
      return true;
    }
  }

# if GTEST_OS_WINDOWS

  if (GTEST_FLAG(death_test_style) == "threadsafe" ||
      GTEST_FLAG(death_test_style) == "fast") {
    *test = new WindowsDeathTest(statement, regex, file, line);
  }

# else

  if (GTEST_FLAG(death_test_style) == "threadsafe") {
    *test = new ExecDeathTest(statement, regex, file, line);
  }
  else if (GTEST_FLAG(death_test_style) == "fast") {
    *test = new NoExecDeathTest(statement, regex);
  }

# endif  // GTEST_OS_WINDOWS

  else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
    DeathTest::set_last_death_test_message(
      "Unknown death test style \"" + GTEST_FLAG(death_test_style)
      + "\" encountered");
    return false;
  }

  return true;
}

// Splits a given string on a given delimiter, populating a given
// vector with the fields.  GTEST_HAS_DEATH_TEST implies that we have
// ::std::string, so we can use it here.
static void SplitString(const ::std::string& str, char delimiter,
                        ::std::vector< ::std::string>* dest) {
  ::std::vector< ::std::string> parsed;
  ::std::string::size_type pos = 0;

  while (::testing::internal::AlwaysTrue()) {
    const ::std::string::size_type colon = str.find(delimiter, pos);

    if (colon == ::std::string::npos) {
      parsed.push_back(str.substr(pos));
      break;
    }
    else {
      parsed.push_back(str.substr(pos, colon - pos));
      pos = colon + 1;
    }
  }

  dest->swap(parsed);
}

# if GTEST_OS_WINDOWS
// Recreates the pipe and event handles from the provided parameters,
// signals the event, and returns a file descriptor wrapped around the pipe
// handle. This function is called in the child process only.
int GetStatusFileDescriptor(unsigned int parent_process_id,
                            size_t write_handle_as_size_t,
                            size_t event_handle_as_size_t) {
  AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
                                   FALSE,  // Non-inheritable.
                                   parent_process_id));

  if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
    DeathTestAbort("Unable to open parent process " +
                   StreamableToString(parent_process_id));
  }

  // TODO(vladl@google.com): Replace the following check with a
  // compile-time assertion when available.
  GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));

  const HANDLE write_handle =
    reinterpret_cast<HANDLE>(write_handle_as_size_t);
  HANDLE dup_write_handle;

  // The newly initialized handle is accessible only in in the parent
  // process. To obtain one accessible within the child, we need to use
  // DuplicateHandle.
  if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
                         ::GetCurrentProcess(), &dup_write_handle,
                         0x0,    // Requested privileges ignored since
                         // DUPLICATE_SAME_ACCESS is used.
                         FALSE,  // Request non-inheritable handler.
                         DUPLICATE_SAME_ACCESS)) {
    DeathTestAbort("Unable to duplicate the pipe handle " +
                   StreamableToString(write_handle_as_size_t) +
                   " from the parent process " +
                   StreamableToString(parent_process_id));
  }

  const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
  HANDLE dup_event_handle;

  if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
                         ::GetCurrentProcess(), &dup_event_handle,
                         0x0,
                         FALSE,
                         DUPLICATE_SAME_ACCESS)) {
    DeathTestAbort("Unable to duplicate the event handle " +
                   StreamableToString(event_handle_as_size_t) +
                   " from the parent process " +
                   StreamableToString(parent_process_id));
  }

  const int write_fd =
    ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);

  if (write_fd == -1) {
    DeathTestAbort("Unable to convert pipe handle " +
                   StreamableToString(write_handle_as_size_t) +
                   " to a file descriptor");
  }

  // Signals the parent that the write end of the pipe has been acquired
  // so the parent can release its own write end.
  ::SetEvent(dup_event_handle);

  return write_fd;
}
# endif  // GTEST_OS_WINDOWS

// Returns a newly created InternalRunDeathTestFlag object with fields
// initialized from the GTEST_FLAG(internal_run_death_test) flag if
// the flag is specified; otherwise returns NULL.
InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
  if (GTEST_FLAG(internal_run_death_test) == "") {
    return NULL;
  }

  // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
  // can use it here.
  int line = -1;
  int index = -1;
  ::std::vector< ::std::string> fields;
  SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields);
  int write_fd = -1;

# if GTEST_OS_WINDOWS

  unsigned int parent_process_id = 0;
  size_t write_handle_as_size_t = 0;
  size_t event_handle_as_size_t = 0;

  if (fields.size() != 6
      || !ParseNaturalNumber(fields[1], &line)
      || !ParseNaturalNumber(fields[2], &index)
      || !ParseNaturalNumber(fields[3], &parent_process_id)
      || !ParseNaturalNumber(fields[4], &write_handle_as_size_t)
      || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
    DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
                   GTEST_FLAG(internal_run_death_test));
  }

  write_fd = GetStatusFileDescriptor(parent_process_id,
                                     write_handle_as_size_t,
                                     event_handle_as_size_t);
# else

  if (fields.size() != 4
      || !ParseNaturalNumber(fields[1], &line)
      || !ParseNaturalNumber(fields[2], &index)
      || !ParseNaturalNumber(fields[3], &write_fd)) {
    DeathTestAbort("Bad --gtest_internal_run_death_test flag: "
                   + GTEST_FLAG(internal_run_death_test));
  }

# endif  // GTEST_OS_WINDOWS

  return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
}

}  // namespace internal

#endif  // GTEST_HAS_DEATH_TEST

}  // namespace testing


================================================
FILE: rocrtst/gtest/src/gtest-filepath.cpp
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: keith.ray@gmail.com (Keith Ray)

#include "gtest/gtest-message.h"
#include "gtest/internal/gtest-filepath.h"
#include "gtest/internal/gtest-port.h"

#include <stdlib.h>

#if GTEST_OS_WINDOWS_MOBILE
# include <windows.h>
#elif GTEST_OS_WINDOWS
# include <direct.h>
# include <io.h>
#elif GTEST_OS_SYMBIAN
// Symbian OpenC has PATH_MAX in sys/syslimits.h
# include <sys/syslimits.h>
#else
# include <limits.h>
# include <climits>  // Some Linux distributions define PATH_MAX here.
#endif  // GTEST_OS_WINDOWS_MOBILE

#if GTEST_OS_WINDOWS
# define GTEST_PATH_MAX_ _MAX_PATH
#elif defined(PATH_MAX)
# define GTEST_PATH_MAX_ PATH_MAX
#elif defined(_XOPEN_PATH_MAX)
# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
#else
# define GTEST_PATH_MAX_ _POSIX_PATH_MAX
#endif  // GTEST_OS_WINDOWS

#include "gtest/internal/gtest-string.h"

namespace testing {
namespace internal {

#if GTEST_OS_WINDOWS
// On Windows, '\\' is the standard path separator, but many tools and the
// Windows API also accept '/' as an alternate path separator. Unless otherwise
// noted, a file path can contain either kind of path separators, or a mixture
// of them.
const char kPathSeparator = '\\';
const char kAlternatePathSeparator = '/';
const char kPathSeparatorString[] = "\\";
const char kAlternatePathSeparatorString[] = "/";
# if GTEST_OS_WINDOWS_MOBILE
// Windows CE doesn't have a current directory. You should not use
// the current directory in tests on Windows CE, but this at least
// provides a reasonable fallback.
const char kCurrentDirectoryString[] = "\\";
// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
const DWORD kInvalidFileAttributes = 0xffffffff;
# else
const char kCurrentDirectoryString[] = ".\\";
# endif  // GTEST_OS_WINDOWS_MOBILE
#else
const char kPathSeparator = '/';
const char kPathSeparatorString[] = "/";
const char kCurrentDirectoryString[] = "./";
#endif  // GTEST_OS_WINDOWS

// Returns whether the given character is a valid path separator.
static bool IsPathSeparator(char c) {
#if GTEST_HAS_ALT_PATH_SEP_
  return (c == kPathSeparator) || (c == kAlternatePathSeparator);
#else
  return c == kPathSeparator;
#endif
}

// Returns the current working directory, or "" if unsuccessful.
FilePath FilePath::GetCurrentDir() {
#if GTEST_OS_WINDOWS_MOBILE
  // Windows CE doesn't have a current directory, so we just return
  // something reasonable.
  return FilePath(kCurrentDirectoryString);
#elif GTEST_OS_WINDOWS
  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
  return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
#else
  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
  return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
#endif  // GTEST_OS_WINDOWS_MOBILE
}

// Returns a copy of the FilePath with the case-insensitive extension removed.
// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
// FilePath("dir/file"). If a case-insensitive extension is not
// found, returns a copy of the original FilePath.
FilePath FilePath::RemoveExtension(const char* extension) const {
  const std::string dot_extension = std::string(".") + extension;

  if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
    return FilePath(pathname_.substr(
                      0, pathname_.length() - dot_extension.length()));
  }

  return *this;
}

// Returns a pointer to the last occurence of a valid path separator in
// the FilePath. On Windows, for example, both '/' and '\' are valid path
// separators. Returns NULL if no path separator was found.
const char* FilePath::FindLastPathSeparator() const {
  const char* const last_sep = strrchr(c_str(), kPathSeparator);
#if GTEST_HAS_ALT_PATH_SEP_
  const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);

  // Comparing two pointers of which only one is NULL is undefined.
  if (last_alt_sep != NULL &&
      (last_sep == NULL || last_alt_sep > last_sep)) {
    return last_alt_sep;
  }

#endif
  return last_sep;
}

// Returns a copy of the FilePath with the directory part removed.
// Example: FilePath("path/to/file").RemoveDirectoryName() returns
// FilePath("file"). If there is no directory part ("just_a_file"), it returns
// the FilePath unmodified. If there is no file part ("just_a_dir/") it
// returns an empty FilePath ("").
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveDirectoryName() const {
  const char* const last_sep = FindLastPathSeparator();
  return last_sep ? FilePath(last_sep + 1) : *this;
}

// RemoveFileName returns the directory path with the filename removed.
// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
// On Windows platform, '\' is the path separator, otherwise it is '/'.
FilePath FilePath::RemoveFileName() const {
  const char* const last_sep = FindLastPathSeparator();
  std::string dir;

  if (last_sep) {
    dir = std::string(c_str(), last_sep + 1 - c_str());
  }
  else {
    dir = kCurrentDirectoryString;
  }

  return FilePath(dir);
}

// Helper functions for naming files in a directory for xml output.

// Given directory = "dir", base_name = "test", number = 0,
// extension = "xml", returns "dir/test.xml". If number is greater
// than zero (e.g., 12), returns "dir/test_12.xml".
// On Windows platform, uses \ as the separator rather than /.
FilePath FilePath::MakeFileName(const FilePath& directory,
                                const FilePath& base_name,
                                int number,
                                const char* extension) {
  std::string file;

  if (number == 0) {
    file = base_name.string() + "." + extension;
  }
  else {
    file = base_name.string() + "_" + StreamableToString(number)
           + "." + extension;
  }

  return ConcatPaths(directory, FilePath(file));
}

// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
// On Windows, uses \ as the separator rather than /.
FilePath FilePath::ConcatPaths(const FilePath& directory,
                               const FilePath& relative_path) {
  if (directory.IsEmpty()) {
    return relative_path;
  }

  const FilePath dir(directory.RemoveTrailingPathSeparator());
  return FilePath(dir.string() + kPathSeparator + relative_path.string());
}

// Returns true if pathname describes something findable in the file-system,
// either a file, directory, or whatever.
bool FilePath::FileOrDirectoryExists() const {
#if GTEST_OS_WINDOWS_MOBILE
  LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
  const DWORD attributes = GetFileAttributes(unicode);
  delete [] unicode;
  return attributes != kInvalidFileAttributes;
#else
  posix::StatStruct file_stat;
  return posix::Stat(pathname_.c_str(), &file_stat) == 0;
#endif  // GTEST_OS_WINDOWS_MOBILE
}

// Returns true if pathname describes a directory in the file-system
// that exists.
bool FilePath::DirectoryExists() const {
  bool result = false;
#if GTEST_OS_WINDOWS
  // Don't strip off trailing separator if path is a root directory on
  // Windows (like "C:\\").
  const FilePath& path(IsRootDirectory() ? *this :
                       RemoveTrailingPathSeparator());
#else
  const FilePath& path(*this);
#endif

#if GTEST_OS_WINDOWS_MOBILE
  LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
  const DWORD attributes = GetFileAttributes(unicode);
  delete [] unicode;

  if ((attributes != kInvalidFileAttributes) &&
      (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
    result = true;
  }

#else
  posix::StatStruct file_stat;
  result = posix::Stat(path.c_str(), &file_stat) == 0 &&
           posix::IsDir(file_stat);
#endif  // GTEST_OS_WINDOWS_MOBILE

  return result;
}

// Returns true if pathname describes a root directory. (Windows has one
// root directory per disk drive.)
bool FilePath::IsRootDirectory() const {
#if GTEST_OS_WINDOWS
  // TODO(wan@google.com): on Windows a network share like
  // \\server\share can be a root directory, although it cannot be the
  // current directory.  Handle this properly.
  return pathname_.length() == 3 && IsAbsolutePath();
#else
  return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
#endif
}

// Returns true if pathname describes an absolute path.
bool FilePath::IsAbsolutePath() const {
  const char* const name = pathname_.c_str();
#if GTEST_OS_WINDOWS
  return pathname_.length() >= 3 &&
         ((name[0] >= 'a' && name[0] <= 'z') ||
          (name[0] >= 'A' && name[0] <= 'Z')) &&
         name[1] == ':' &&
         IsPathSeparator(name[2]);
#else
  return IsPathSeparator(name[0]);
#endif
}

// Returns a pathname for a file that does not currently exist. The pathname
// will be directory/base_name.extension or
// directory/base_name_<number>.extension if directory/base_name.extension
// already exists. The number will be incremented until a pathname is found
// that does not already exist.
// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
// There could be a race condition if two or more processes are calling this
// function at the same time -- they could both pick the same filename.
FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
    const FilePath& base_name,
    const char* extension) {
  FilePath full_pathname;
  int number = 0;

  do {
    full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
  }
  while (full_pathname.FileOrDirectoryExists());

  return full_pathname;
}

// Returns true if FilePath ends with a path separator, which indicates that
// it is intended to represent a directory. Returns false otherwise.
// This does NOT check that a directory (or file) actually exists.
bool FilePath::IsDirectory() const {
  return !pathname_.empty() &&
         IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
}

// Create directories so that path exists. Returns true if successful or if
// the directories already exist; returns false if unable to create directories
// for any reason.
bool FilePath::CreateDirectoriesRecursively() const {
  if (!this->IsDirectory()) {
    return false;
  }

  if (pathname_.length() == 0 || this->DirectoryExists()) {
    return true;
  }

  const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
  return parent.CreateDirectoriesRecursively() && this->CreateFolder();
}

// Create the directory so that path exists. Returns true if successful or
// if the directory already exists; returns false if unable to create the
// directory for any reason, including if the parent directory does not
// exist. Not named "CreateDirectory" because that's a macro on Windows.
bool FilePath::CreateFolder() const {
#if GTEST_OS_WINDOWS_MOBILE
  FilePath removed_sep(this->RemoveTrailingPathSeparator());
  LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
  int result = CreateDirectory(unicode, NULL) ? 0 : -1;
  delete [] unicode;
#elif GTEST_OS_WINDOWS
  int result = _mkdir(pathname_.c_str());
#else
  int result = mkdir(pathname_.c_str(), 0777);
#endif  // GTEST_OS_WINDOWS_MOBILE

  if (result == -1) {
    return this->DirectoryExists();  // An error is OK if the directory exists.
  }

  return true;  // No error.
}

// If input name has a trailing separator character, remove it and return the
// name, otherwise return the name string unmodified.
// On Windows platform, uses \ as the separator, other platforms use /.
FilePath FilePath::RemoveTrailingPathSeparator() const {
  return IsDirectory()
         ? FilePath(pathname_.substr(0, pathname_.length() - 1))
         : *this;
}

// Removes any redundant separators that might be in the pathname.
// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
// redundancies that might be in a pathname involving "." or "..".
// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
void FilePath::Normalize() {
  if (pathname_.c_str() == NULL) {
    pathname_ = "";
    return;
  }

  const char* src = pathname_.c_str();
  char* const dest = new char[pathname_.length() + 1];
  char* dest_ptr = dest;
  memset(dest_ptr, 0, pathname_.length() + 1);

  while (*src != '\0') {
    *dest_ptr = *src;

    if (!IsPathSeparator(*src)) {
      src++;
    }
    else {
#if GTEST_HAS_ALT_PATH_SEP_

      if (*dest_ptr == kAlternatePathSeparator) {
        *dest_ptr = kPathSeparator;
      }

#endif

      while (IsPathSeparator(*src)) {
        src++;
      }
    }

    dest_ptr++;
  }

  *dest_ptr = '\0';
  pathname_ = dest;
  delete[] dest;
}

}  // namespace internal
}  // namespace testing


================================================
FILE: rocrtst/gtest/src/gtest-internal-inl.h
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Utility functions and classes used by the Google C++ testing framework.
//
// Author: wan@google.com (Zhanyong Wan)
//
// This file contains purely Google Test's internal implementation.  Please
// DO NOT #INCLUDE IT IN A USER PROGRAM.

#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
#define GTEST_SRC_GTEST_INTERNAL_INL_H_

// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is
// part of Google Test's implementation; otherwise it's undefined.
#if !GTEST_IMPLEMENTATION_
// A user is trying to include this from his code - just say no.
# error "gtest-internal-inl.h is part of Google Test's internal implementation."
# error "It must not be included except by Google Test itself."
#endif  // GTEST_IMPLEMENTATION_

#ifndef _WIN32_WCE
# include <errno.h>
#endif  // !_WIN32_WCE
#include <stddef.h>
#include <stdlib.h>  // For strtoll/_strtoul64/malloc/free.
#include <string.h>  // For memmove.

#include <algorithm>
#include <string>
#include <vector>

#include "gtest/internal/gtest-port.h"

#if GTEST_CAN_STREAM_RESULTS_
# include <arpa/inet.h>  // NOLINT
# include <netdb.h>  // NOLINT
#endif

#if GTEST_OS_WINDOWS
# include <windows.h>  // NOLINT
#endif  // GTEST_OS_WINDOWS

#include "gtest/gtest.h"  // NOLINT
#include "gtest/gtest-spi.h"

namespace testing {

// Declares the flags.
//
// We don't want the users to modify this flag in the code, but want
// Google Test's own unit tests to be able to access it. Therefore we
// declare it here as opposed to in gtest.h.
GTEST_DECLARE_bool_(death_test_use_fork);

namespace internal {

// The value of GetTestTypeId() as seen from within the Google Test
// library.  This is solely for testing GetTestTypeId().
GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;

// Names of the flags (needed for parsing Google Test flags).
const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
const char kBreakOnFailureFlag[] = "break_on_failure";
const char kCatchExceptionsFlag[] = "catch_exceptions";
const char kColorFlag[] = "color";
const char kFilterFlag[] = "filter";
const char kListTestsFlag[] = "list_tests";
const char kOutputFlag[] = "output";
const char kPrintTimeFlag[] = "print_time";
const char kRandomSeedFlag[] = "random_seed";
const char kRepeatFlag[] = "repeat";
const char kShuffleFlag[] = "shuffle";
const char kStackTraceDepthFlag[] = "stack_trace_depth";
const char kStreamResultToFlag[] = "stream_result_to";
const char kThrowOnFailureFlag[] = "throw_on_failure";

// A valid random seed must be in [1, kMaxRandomSeed].
const int kMaxRandomSeed = 99999;

// g_help_flag is true iff the --help flag or an equivalent form is
// specified on the command line.
GTEST_API_ extern bool g_help_flag;

// Returns the current time in milliseconds.
GTEST_API_ TimeInMillis GetTimeInMillis();

// Returns true iff Google Test should use colors in the output.
GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);

// Formats the given time in milliseconds as seconds.
GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);

// Converts the given time in milliseconds to a date string in the ISO 8601
// format, without the timezone information.  N.B.: due to the use the
// non-reentrant localtime() function, this function is not thread safe.  Do
// not use it in any code that can be called from multiple threads.
GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);

// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
GTEST_API_ bool ParseInt32Flag(
  const char* str, const char* flag, Int32* value);

// Returns a random seed in range [1, kMaxRandomSeed] based on the
// given --gtest_random_seed flag value.
inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
  const unsigned int raw_seed = (random_seed_flag == 0) ?
                                static_cast<unsigned int>(GetTimeInMillis()) :
                                static_cast<unsigned int>(random_seed_flag);

  // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
  // it's easy to type.
  const int normalized_seed =
    static_cast<int>((raw_seed - 1U) %
                     static_cast<unsigned int>(kMaxRandomSeed)) + 1;
  return normalized_seed;
}

// Returns the first valid random seed after 'seed'.  The behavior is
// undefined if 'seed' is invalid.  The seed after kMaxRandomSeed is
// considered to be 1.
inline int GetNextRandomSeed(int seed) {
  GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
      << "Invalid random seed " << seed << " - must be in [1, "
      << kMaxRandomSeed << "].";
  const int next_seed = seed + 1;
  return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
}

// This class saves the values of all Google Test flags in its c'tor, and
// restores them in its d'tor.
class GTestFlagSaver {
 public:
  // The c'tor.
  GTestFlagSaver() {
    also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
    break_on_failure_ = GTEST_FLAG(break_on_failure);
    catch_exceptions_ = GTEST_FLAG(catch_exceptions);
    color_ = GTEST_FLAG(color);
    death_test_style_ = GTEST_FLAG(death_test_style);
    death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
    filter_ = GTEST_FLAG(filter);
    internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
    list_tests_ = GTEST_FLAG(list_tests);
    output_ = GTEST_FLAG(output);
    print_time_ = GTEST_FLAG(print_time);
    random_seed_ = GTEST_FLAG(random_seed);
    repeat_ = GTEST_FLAG(repeat);
    shuffle_ = GTEST_FLAG(shuffle);
    stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
    stream_result_to_ = GTEST_FLAG(stream_result_to);
    throw_on_failure_ = GTEST_FLAG(throw_on_failure);
  }

  // The d'tor is not virtual.  DO NOT INHERIT FROM THIS CLASS.
  ~GTestFlagSaver() {
    GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
    GTEST_FLAG(break_on_failure) = break_on_failure_;
    GTEST_FLAG(catch_exceptions) = catch_exceptions_;
    GTEST_FLAG(color) = color_;
    GTEST_FLAG(death_test_style) = death_test_style_;
    GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
    GTEST_FLAG(filter) = filter_;
    GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
    GTEST_FLAG(list_tests) = list_tests_;
    GTEST_FLAG(output) = output_;
    GTEST_FLAG(print_time) = print_time_;
    GTEST_FLAG(random_seed) = random_seed_;
    GTEST_FLAG(repeat) = repeat_;
    GTEST_FLAG(shuffle) = shuffle_;
    GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
    GTEST_FLAG(stream_result_to) = stream_result_to_;
    GTEST_FLAG(throw_on_failure) = throw_on_failure_;
  }

 private:
  // Fields for saving the original values of flags.
  bool also_run_disabled_tests_;
  bool break_on_failure_;
  bool catch_exceptions_;
  std::string color_;
  std::string death_test_style_;
  bool death_test_use_fork_;
  std::string filter_;
  std::string internal_run_death_test_;
  bool list_tests_;
  std::string output_;
  bool print_time_;
  internal::Int32 random_seed_;
  internal::Int32 repeat_;
  bool shuffle_;
  internal::Int32 stack_trace_depth_;
  std::string stream_result_to_;
  bool throw_on_failure_;
} GTEST_ATTRIBUTE_UNUSED_;

// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
// to "(Invalid Unicode 0xXXXXXXXX)".
GTEST_API_ std::string CodePointToUtf8(UInt32 code_point);

// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars);

// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
// if the variable is present. If a file already exists at this location, this
// function will write over it. If the variable is present, but the file cannot
// be created, prints an error and exits.
void WriteToShardStatusFileIfNeeded();

// Checks whether sharding is enabled by examining the relevant
// environment variable values. If the variables are present,
// but inconsistent (e.g., shard_index >= total_shards), prints
// an error and exits. If in_subprocess_for_death_test, sharding is
// disabled because it must only be applied to the original test
// process. Otherwise, we could filter out death tests we intended to execute.
GTEST_API_ bool ShouldShard(const char* total_shards_str,
                            const char* shard_index_str,
                            bool in_subprocess_for_death_test);

// Parses the environment variable var as an Int32. If it is unset,
// returns default_val. If it is not an Int32, prints an error and
// and aborts.
GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);

// Given the total number of shards, the shard index, and the test id,
// returns true iff the test should be run on this shard. The test id is
// some arbitrary but unique non-negative integer assigned to each test
// method. Assumes that 0 <= shard_index < total_shards.
GTEST_API_ bool ShouldRunTestOnShard(
  int total_shards, int shard_index, int test_id);

// STL container utilities.

// Returns the number of elements in the given container that satisfy
// the given predicate.
template <class Container, typename Predicate>
inline int CountIf(const Container& c, Predicate predicate) {
  // Implemented as an explicit loop since std::count_if() in libCstd on
  // Solaris has a non-standard signature.
  int count = 0;

  for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) {
    if (predicate(*it)) {
      ++count;
    }
  }

  return count;
}

// Applies a function/functor to each element in the container.
template <class Container, typename Functor>
void ForEach(const Container& c, Functor functor) {
  std::for_each(c.begin(), c.end(), functor);
}

// Returns the i-th element of the vector, or default_value if i is not
// in range [0, v.size()).
template <typename E>
inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
}

// Performs an in-place shuffle of a range of the vector's elements.
// 'begin' and 'end' are element indices as an STL-style range;
// i.e. [begin, end) are shuffled, where 'end' == size() means to
// shuffle to the end of the vector.
template <typename E>
void ShuffleRange(internal::Random* random, int begin, int end,
                  std::vector<E>* v) {
  const int size = static_cast<int>(v->size());
  GTEST_CHECK_(0 <= begin && begin <= size)
      << "Invalid shuffle range start " << begin << ": must be in range [0, "
      << size << "].";
  GTEST_CHECK_(begin <= end && end <= size)
      << "Invalid shuffle range finish " << end << ": must be in range ["
      << begin << ", " << size << "].";

  // Fisher-Yates shuffle, from
  // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
  for (int range_width = end - begin; range_width >= 2; range_width--) {
    const int last_in_range = begin + range_width - 1;
    const int selected = begin + random->Generate(range_width);
    std::swap((*v)[selected], (*v)[last_in_range]);
  }
}

// Performs an in-place shuffle of the vector's elements.
template <typename E>
inline void Shuffle(internal::Random* random, std::vector<E>* v) {
  ShuffleRange(random, 0, static_cast<int>(v->size()), v);
}

// A function for deleting an object.  Handy for being used as a
// functor.
template <typename T>
static void Delete(T* x) {
  delete x;
}

// A predicate that checks the key of a TestProperty against a known key.
//
// TestPropertyKeyIs is copyable.
class TestPropertyKeyIs {
 public:
  // Constructor.
  //
  // TestPropertyKeyIs has NO default constructor.
  explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}

  // Returns true iff the test name of test property matches on key_.
  bool operator()(const TestProperty& test_property) const {
    return test_property.key() == key_;
  }

 private:
  std::string key_;
};

// Class UnitTestOptions.
//
// This class contains functions for processing options the user
// specifies when running the tests.  It has only static members.
//
// In most cases, the user can specify an option using either an
// environment variable or a command line flag.  E.g. you can set the
// test filter using either GTEST_FILTER or --gtest_filter.  If both
// the variable and the flag are present, the latter overrides the
// former.
class GTEST_API_ UnitTestOptions {
 public:
  // Functions for processing the gtest_output flag.

  // Returns the output format, or "" for normal printed output.
  static std::string GetOutputFormat();

  // Returns the absolute path of the requested output file, or the
  // default (test_detail.xml in the original working directory) if
  // none was explicitly specified.
  static std::string GetAbsolutePathToOutputFile();

  // Functions for processing the gtest_filter flag.

  // Returns true iff the wildcard pattern matches the string.  The
  // first ':' or '\0' character in pattern marks the end of it.
  //
  // This recursive algorithm isn't very efficient, but is clear and
  // works well enough for matching test names, which are short.
  static bool PatternMatchesString(const char* pattern, const char* str);

  // Returns true iff the user-specified filter matches the test case
  // name and the test name.
  static bool FilterMatchesTest(const std::string& test_case_name,
                                const std::string& test_name);

#if GTEST_OS_WINDOWS
  // Function for supporting the gtest_catch_exception flag.

  // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
  // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
  // This function is useful as an __except condition.
  static int GTestShouldProcessSEH(DWORD exception_code);
#endif  // GTEST_OS_WINDOWS

  // Returns true if "name" matches the ':' separated list of glob-style
  // filters in "filter".
  static bool MatchesFilter(const std::string& name, const char* filter);
};

// Returns the current application's name, removing directory path if that
// is present.  Used by UnitTestOptions::GetOutputFile.
GTEST_API_ FilePath GetCurrentExecutableName();

// The role interface for getting the OS stack trace as a string.
class OsStackTraceGetterInterface {
 public:
  OsStackTraceGetterInterface() {}
  virtual ~OsStackTraceGetterInterface() {}

  // Returns the current OS stack trace as an std::string.  Parameters:
  //
  //   max_depth  - the maximum number of stack frames to be included
  //                in the trace.
  //   skip_count - the number of top frames to be skipped; doesn't count
  //                against max_depth.
  virtual string CurrentStackTrace(int max_depth, int skip_count) = 0;

  // UponLeavingGTest() should be called immediately before Google Test calls
  // user code. It saves some information about the current stack that
  // CurrentStackTrace() will use to find and hide Google Test stack frames.
  virtual void UponLeavingGTest() = 0;

 private:
  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
};

// A working implementation of the OsStackTraceGetterInterface interface.
class OsStackTraceGetter : public OsStackTraceGetterInterface {
 public:
  OsStackTraceGetter() : caller_frame_(NULL) {}

  virtual string CurrentStackTrace(int max_depth, int skip_count)
  GTEST_LOCK_EXCLUDED_(mutex_);

  virtual void UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_);

  // This string is inserted in place of stack frames that are part of
  // Google Test's implementation.
  static const char* const kElidedFramesMarker;

 private:
  Mutex mutex_;  // protects all internal state

  // We save the stack frame below the frame that calls user code.
  // We do this because the address of the frame immediately below
  // the user code changes between the call to UponLeavingGTest()
  // and any calls to CurrentStackTrace() from within the user code.
  void* caller_frame_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
};

// Information about a Google Test trace point.
struct TraceInfo {
  const char* file;
  int line;
  std::string message;
};

// This is the default global test part result reporter used in UnitTestImpl.
// This class should only be used by UnitTestImpl.
class DefaultGlobalTestPartResultReporter
  : public TestPartResultReporterInterface {
 public:
  explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
  // Implements the TestPartResultReporterInterface. Reports the test part
  // result in the current test.
  virtual void ReportTestPartResult(const TestPartResult& result);

 private:
  UnitTestImpl* const unit_test_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter);
};

// This is the default per thread test part result reporter used in
// UnitTestImpl. This class should only be used by UnitTestImpl.
class DefaultPerThreadTestPartResultReporter
  : public TestPartResultReporterInterface {
 public:
  explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
  // Implements the TestPartResultReporterInterface. The implementation just
  // delegates to the current global test part result reporter of *unit_test_.
  virtual void ReportTestPartResult(const TestPartResult& result);

 private:
  UnitTestImpl* const unit_test_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter);
};

// The private implementation of the UnitTest class.  We don't protect
// the methods under a mutex, as this class is not accessible by a
// user and the UnitTest class that delegates work to this class does
// proper locking.
class GTEST_API_ UnitTestImpl {
 public:
  explicit UnitTestImpl(UnitTest* parent);
  virtual ~UnitTestImpl();

  // There are two different ways to register your own TestPartResultReporter.
  // You can register your own repoter to listen either only for test results
  // from the current thread or for results from all threads.
  // By default, each per-thread test result repoter just passes a new
  // TestPartResult to the global test result reporter, which registers the
  // test part result for the currently running test.

  // Returns the global test part result reporter.
  TestPartResultReporterInterface* GetGlobalTestPartResultReporter();

  // Sets the global test part result reporter.
  void SetGlobalTestPartResultReporter(
    TestPartResultReporterInterface* reporter);

  // Returns the test part result reporter for the current thread.
  TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();

  // Sets the test part result reporter for the current thread.
  void SetTestPartResultReporterForCurrentThread(
    TestPartResultReporterInterface* reporter);

  // Gets the number of successful test cases.
  int successful_test_case_count() const;

  // Gets the number of failed test cases.
  int failed_test_case_count() const;

  // Gets the number of all test cases.
  int total_test_case_count() const;

  // Gets the number of all test cases that contain at least one test
  // that should run.
  int test_case_to_run_count() const;

  // Gets the number of successful tests.
  int successful_test_count() const;

  // Gets the number of failed tests.
  int failed_test_count() const;

  // Gets the number of disabled tests that will be reported in the XML report.
  int reportable_disabled_test_count() const;

  // Gets the number of disabled tests.
  int disabled_test_count() const;

  // Gets the number of tests to be printed in the XML report.
  int reportable_test_count() const;

  // Gets the number of all tests.
  int total_test_count() const;

  // Gets the number of tests that should run.
  int test_to_run_count() const;

  // Gets the time of the test program start, in ms from the start of the
  // UNIX epoch.
  TimeInMillis start_timestamp() const {
    return start_timestamp_;
  }

  // Gets the elapsed time, in milliseconds.
  TimeInMillis elapsed_time() const {
    return elapsed_time_;
  }

  // Returns true iff the unit test passed (i.e. all test cases passed).
  bool Passed() const {
    return !Failed();
  }

  // Returns true iff the unit test failed (i.e. some test case failed
  // or something outside of all tests failed).
  bool Failed() const {
    return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed();
  }

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  const TestCase* GetTestCase(int i) const {
    const int index = GetElementOr(test_case_indices_, i, -1);
    return index < 0 ? NULL : test_cases_[i];
  }

  // Gets the i-th test case among all the test cases. i can range from 0 to
  // total_test_case_count() - 1. If i is not in that range, returns NULL.
  TestCase* GetMutableTestCase(int i) {
    const int index = GetElementOr(test_case_indices_, i, -1);
    return index < 0 ? NULL : test_cases_[index];
  }

  // Provides access to the event listener list.
  TestEventListeners* listeners() {
    return &listeners_;
  }

  // Returns the TestResult for the test that's currently running, or
  // the TestResult for the ad hoc test if no test is running.
  TestResult* current_test_result();

  // Returns the TestResult for the ad hoc test.
  const TestResult* ad_hoc_test_result() const {
    return &ad_hoc_test_result_;
  }

  // Sets the OS stack trace getter.
  //
  // Does nothing if the input and the current OS stack trace getter
  // are the same; otherwise, deletes the old getter and makes the
  // input the current getter.
  void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);

  // Returns the current OS stack trace getter if it is not NULL;
  // otherwise, creates an OsStackTraceGetter, makes it the current
  // getter, and returns it.
  OsStackTraceGetterInterface* os_stack_trace_getter();

  // Returns the current OS stack trace as an std::string.
  //
  // The maximum number of stack frames to be included is specified by
  // the gtest_stack_trace_depth flag.  The skip_count parameter
  // specifies the number of top frames to be skipped, which doesn't
  // count against the number of frames to be included.
  //
  // For example, if Foo() calls Bar(), which in turn calls
  // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
  // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
  std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_;

  // Finds and returns a TestCase with the given name.  If one doesn't
  // exist, creates one and returns it.
  //
  // Arguments:
  //
  //   test_case_name: name of the test case
  //   type_param:     the name of the test's type parameter, or NULL if
  //                   this is not a typed or a type-parameterized test.
  //   set_up_tc:      pointer to the function that sets up the test case
  //   tear_down_tc:   pointer to the function that tears down the test case
  TestCase* GetTestCase(const char* test_case_name,
                        const char* type_param,
                        Test::SetUpTestCaseFunc set_up_tc,
                        Test::TearDownTestCaseFunc tear_down_tc);

  // Adds a TestInfo to the unit test.
  //
  // Arguments:
  //
  //   set_up_tc:    pointer to the function that sets up the test case
  //   tear_down_tc: pointer to the function that tears down the test case
  //   test_info:    the TestInfo object
  void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc,
                   Test::TearDownTestCaseFunc tear_down_tc,
                   TestInfo* test_info) {
    // In order to support thread-safe death tests, we need to
    // remember the original working directory when the test program
    // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
    // the user may have changed the current directory before calling
    // RUN_ALL_TESTS().  Therefore we capture the current directory in
    // AddTestInfo(), which is called to register a TEST or TEST_F
    // before main() is reached.
    if (original_working_dir_.IsEmpty()) {
      original_working_dir_.Set(FilePath::GetCurrentDir());
      GTEST_CHECK_(!original_working_dir_.IsEmpty())
          << "Failed to get the current working directory.";
    }

    GetTestCase(test_info->test_case_name(),
                test_info->type_param(),
                set_up_tc,
                tear_down_tc)->AddTestInfo(test_info);
  }

#if GTEST_HAS_PARAM_TEST
  // Returns ParameterizedTestCaseRegistry object used to keep track of
  // value-parameterized tests and instantiate and register them.
  internal::ParameterizedTestCaseRegistry& parameterized_test_registry() {
    return parameterized_test_registry_;
  }
#endif  // GTEST_HAS_PARAM_TEST

  // Sets the TestCase object for the test that's currently running.
  void set_current_test_case(TestCase* a_current_test_case) {
    current_test_case_ = a_current_test_case;
  }

  // Sets the TestInfo object for the test that's currently running.  If
  // current_test_info is NULL, the assertion results will be stored in
  // ad_hoc_test_result_.
  void set_current_test_info(TestInfo* a_current_test_info) {
    current_test_info_ = a_current_test_info;
  }

  // Registers all parameterized tests defined using TEST_P and
  // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter
  // combination. This method can be called more then once; it has guards
  // protecting from registering the tests more then once.  If
  // value-parameterized tests are disabled, RegisterParameterizedTests is
  // present but does nothing.
  void RegisterParameterizedTests();

  // Runs all tests in this UnitTest object, prints the result, and
  // returns true if all tests are successful.  If any exception is
  // thrown during a test, this test is considered to be failed, but
  // the rest of the tests will still be run.
  bool RunAllTests();

  // Clears the results of all tests, except the ad hoc tests.
  void ClearNonAdHocTestResult() {
    ForEach(test_cases_, TestCase::ClearTestCaseResult);
  }

  // Clears the results of ad-hoc test assertions.
  void ClearAdHocTestResult() {
    ad_hoc_test_result_.Clear();
  }

  // Adds a TestProperty to the current TestResult object when invoked in a
  // context of a test or a test case, or to the global property set. If the
  // result already contains a property with the same key, the value will be
  // updated.
  void RecordProperty(const TestProperty& test_property);

  enum ReactionToSharding {
    HONOR_SHARDING_PROTOCOL,
    IGNORE_SHARDING_PROTOCOL
  };

  // Matches the full name of each test against the user-specified
  // filter to decide whether the test should run, then records the
  // result in each TestCase and TestInfo object.
  // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
  // based on sharding variables in the environment.
  // Returns the number of tests that should run.
  int FilterTests(ReactionToSharding shard_tests);

  // Prints the names of the tests matching the user-specified filter flag.
  void ListTestsMatchingFilter();

  const TestCase* current_test_case() const {
    return current_test_case_;
  }
  TestInfo* current_test_info() {
    return current_test_info_;
  }
  const TestInfo* current_test_info() const {
    return current_test_info_;
  }

  // Returns the vector of environments that need to be set-up/torn-down
  // before/after the tests are run.
  std::vector<Environment*>& environments() {
    return environments_;
  }

  // Getters for the per-thread Google Test trace stack.
  std::vector<TraceInfo>& gtest_trace_stack() {
    return *(gtest_trace_stack_.pointer());
  }
  const std::vector<TraceInfo>& gtest_trace_stack() const {
    return gtest_trace_stack_.get();
  }

#if GTEST_HAS_DEATH_TEST
  void InitDeathTestSubprocessControlInfo() {
    internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
  }
  // Returns a pointer to the parsed --gtest_internal_run_death_test
  // flag, or NULL if that flag was not specified.
  // This information is useful only in a death test child process.
  // Must not be called before a call to InitGoogleTest.
  const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
    return internal_run_death_test_flag_.get();
  }

  // Returns a pointer to the current death test factory.
  internal::DeathTestFactory* death_test_factory() {
    return death_test_factory_.get();
  }

  void SuppressTestEventsIfInSubprocess();

  friend class ReplaceDeathTestFactory;
#endif  // GTEST_HAS_DEATH_TEST

  // Initializes the event listener performing XML output as specified by
  // UnitTestOptions. Must not be called before InitGoogleTest.
  void ConfigureXmlOutput();

#if GTEST_CAN_STREAM_RESULTS_
  // Initializes the event listener for streaming test results to a socket.
  // Must not be called before InitGoogleTest.
  void ConfigureStreamingOutput();
#endif

  // Performs initialization dependent upon flag values obtained in
  // ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
  // ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
  // this function is also called from RunAllTests.  Since this function can be
  // called more than once, it has to be idempotent.
  void PostFlagParsingInit();

  // Gets the random seed used at the start of the current test iteration.
  int random_seed() const {
    return random_seed_;
  }

  // Gets the random number generator.
  internal::Random* random() {
    return &random_;
  }

  // Shuffles all test cases, and the tests within each test case,
  // making sure that death tests are still run first.
  void ShuffleTests();

  // Restores the test cases and tests to their order before the first shuffle.
  void UnshuffleTests();

  // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
  // UnitTest::Run() starts.
  bool catch_exceptions() const {
    return catch_exceptions_;
  }

 private:
  friend class ::testing::UnitTest;

  // Used by UnitTest::Run() to capture the state of
  // GTEST_FLAG(catch_exceptions) at the moment it starts.
  void set_catch_exceptions(bool value) {
    catch_exceptions_ = value;
  }

  // The UnitTest object that owns this implementation object.
  UnitTest* const parent_;

  // The working directory when the first TEST() or TEST_F() was
  // executed.
  internal::FilePath original_working_dir_;

  // The default test part result reporters.
  DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
  DefaultPerThreadTestPartResultReporter
  default_per_thread_test_part_result_reporter_;

  // Points to (but doesn't own) the global test part result reporter.
  TestPartResultReporterInterface* global_test_part_result_repoter_;

  // Protects read and write access to global_test_part_result_reporter_.
  internal::Mutex global_test_part_result_reporter_mutex_;

  // Points to (but doesn't own) the per-thread test part result reporter.
  internal::ThreadLocal<TestPartResultReporterInterface*>
  per_thread_test_part_result_reporter_;

  // The vector of environments that need to be set-up/torn-down
  // before/after the tests are run.
  std::vector<Environment*> environments_;

  // The vector of TestCases in their original order.  It owns the
  // elements in the vector.
  std::vector<TestCase*> test_cases_;

  // Provides a level of indirection for the test case list to allow
  // easy shuffling and restoring the test case order.  The i-th
  // element of this vector is the index of the i-th test case in the
  // shuffled order.
  std::vector<int> test_case_indices_;

#if GTEST_HAS_PARAM_TEST
  // ParameterizedTestRegistry object used to register value-parameterized
  // tests.
  internal::ParameterizedTestCaseRegistry parameterized_test_registry_;

  // Indicates whether RegisterParameterizedTests() has been called already.
  bool parameterized_tests_registered_;
#endif  // GTEST_HAS_PARAM_TEST

  // Index of the last death test case registered.  Initially -1.
  int last_death_test_case_;

  // This points to the TestCase for the currently running test.  It
  // changes as Google Test goes through one test case after another.
  // When no test is running, this is set to NULL and Google Test
  // stores assertion results in ad_hoc_test_result_.  Initially NULL.
  TestCase* current_test_case_;

  // This points to the TestInfo for the currently running test.  It
  // changes as Google Test goes through one test after another.  When
  // no test is running, this is set to NULL and Google Test stores
  // assertion results in ad_hoc_test_result_.  Initially NULL.
  TestInfo* current_test_info_;

  // Normally, a user only writes assertions inside a TEST or TEST_F,
  // or inside a function called by a TEST or TEST_F.  Since Google
  // Test keeps track of which test is current running, it can
  // associate such an assertion with the test it belongs to.
  //
  // If an assertion is encountered when no TEST or TEST_F is running,
  // Google Test attributes the assertion result to an imaginary "ad hoc"
  // test, and records the result in ad_hoc_test_result_.
  TestResult ad_hoc_test_result_;

  // The list of event listeners that can be used to track events inside
  // Google Test.
  TestEventListeners listeners_;

  // The OS stack trace getter.  Will be deleted when the UnitTest
  // object is destructed.  By default, an OsStackTraceGetter is used,
  // but the user can set this field to use a custom getter if that is
  // desired.
  OsStackTraceGetterInterface* os_stack_trace_getter_;

  // True iff PostFlagParsingInit() has been called.
  bool post_flag_parse_init_performed_;

  // The random number seed used at the beginning of the test run.
  int random_seed_;

  // Our random number generator.
  internal::Random random_;

  // The time of the test program start, in ms from the start of the
  // UNIX epoch.
  TimeInMillis start_timestamp_;

  // How long the test took to run, in milliseconds.
  TimeInMillis elapsed_time_;

#if GTEST_HAS_DEATH_TEST
  // The decomposed components of the gtest_internal_run_death_test flag,
  // parsed when RUN_ALL_TESTS is called.
  internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
  internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_;
#endif  // GTEST_HAS_DEATH_TEST

  // A per-thread stack of traces created by the SCOPED_TRACE() macro.
  internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;

  // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests()
  // starts.
  bool catch_exceptions_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl);
};  // class UnitTestImpl

// Convenience function for accessing the global UnitTest
// implementation object.
inline UnitTestImpl* GetUnitTestImpl() {
  return UnitTest::GetInstance()->impl();
}

#if GTEST_USES_SIMPLE_RE

// Internal helper functions for implementing the simple regular
// expression matcher.
GTEST_API_ bool IsInSet(char ch, const char* str);
GTEST_API_ bool IsAsciiDigit(char ch);
GTEST_API_ bool IsAsciiPunct(char ch);
GTEST_API_ bool IsRepeat(char ch);
GTEST_API_ bool IsAsciiWhiteSpace(char ch);
GTEST_API_ bool IsAsciiWordChar(char ch);
GTEST_API_ bool IsValidEscape(char ch);
GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
GTEST_API_ bool ValidateRegex(const char* regex);
GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
GTEST_API_ bool MatchRepetitionAndRegexAtHead(
  bool escaped, char ch, char repeat, const char* regex, const char* str);
GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);

#endif  // GTEST_USES_SIMPLE_RE

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.
GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);

#if GTEST_HAS_DEATH_TEST

// Returns the message describing the last system error, regardless of the
// platform.
GTEST_API_ std::string GetLastErrnoDescription();

# if GTEST_OS_WINDOWS
// Provides leak-safe Windows kernel handle ownership.
class AutoHandle {
 public:
  AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
  explicit AutoHandle(HANDLE handle) : handle_(handle) {}

  ~AutoHandle() {
    Reset();
  }

  HANDLE Get() const {
    return handle_;
  }
  void Reset() {
    Reset(INVALID_HANDLE_VALUE);
  }
  void Reset(HANDLE handle) {
    if (handle != handle_) {
      if (handle_ != INVALID_HANDLE_VALUE) {
        ::CloseHandle(handle_);
      }

      handle_ = handle;
    }
  }

 private:
  HANDLE handle_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
};
# endif  // GTEST_OS_WINDOWS

// Attempts to parse a string into a positive integer pointed to by the
// number parameter.  Returns true if that is possible.
// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
// it here.
template <typename Integer>
bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
  // Fail fast if the given string does not begin with a digit;
  // this bypasses strtoXXX's "optional leading whitespace and plus
  // or minus sign" semantics, which are undesirable here.
  if (str.empty() || !IsDigit(str[0])) {
    return false;
  }

  errno = 0;

  char* end;
  // BiggestConvertible is the largest integer type that system-provided
  // string-to-number conversion routines can return.

# if GTEST_OS_WINDOWS && !defined(__GNUC__)

  // MSVC and C++ Builder define __int64 instead of the standard long long.
  typedef unsigned __int64 BiggestConvertible;
  const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);

# else

  typedef unsigned long long BiggestConvertible;  // NOLINT
  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);

# endif  // GTEST_OS_WINDOWS && !defined(__GNUC__)

  const bool parse_success = *end == '\0' && errno == 0;

  // TODO(vladl@google.com): Convert this to compile time assertion when it is
  // available.
  GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));

  const Integer result = static_cast<Integer>(parsed);

  if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
    *number = result;
    return true;
  }

  return false;
}
#endif  // GTEST_HAS_DEATH_TEST

// TestResult contains some private methods that should be hidden from
// Google Test user but are required for testing. This class allow our tests
// to access them.
//
// This class is supplied only for the purpose of testing Google Test's own
// constructs. Do not use it in user tests, either directly or indirectly.
class TestResultAccessor {
 public:
  static void RecordProperty(TestResult* test_result,
                             const std::string& xml_element,
                             const TestProperty& property) {
    test_result->RecordProperty(xml_element, property);
  }

  static void ClearTestPartResults(TestResult* test_result) {
    test_result->ClearTestPartResults();
  }

  static const std::vector<testing::TestPartResult>& test_part_results(
    const TestResult& test_result) {
    return test_result.test_part_results();
  }
};

#if GTEST_CAN_STREAM_RESULTS_

// Streams test results to the given port on the given host machine.
class StreamingListener : public EmptyTestEventListener {
 public:
  // Abstract base class for writing strings to a socket.
  class AbstractSocketWriter {
   public:
    virtual ~AbstractSocketWriter() {}

    // Sends a string to the socket.
    virtual void Send(const string& message) = 0;

    // Closes the socket.
    virtual void CloseConnection() {}

    // Sends a string and a newline to the socket.
    void SendLn(const string& message) {
      Send(message + "\n");
    }
  };

  // Concrete class for actually writing strings to a socket.
  class SocketWriter : public AbstractSocketWriter {
   public:
    SocketWriter(const string& host, const string& port)
      : sockfd_(-1), host_name_(host), port_num_(port) {
      MakeConnection();
    }

    virtual ~SocketWriter() {
      if (sockfd_ != -1) {
        CloseConnection();
      }
    }

    // Sends a string to the socket.
    virtual void Send(const string& message) {
      GTEST_CHECK_(sockfd_ != -1)
          << "Send() can be called only when there is a connection.";

      const int len = static_cast<int>(message.length());

      if (write(sockfd_, message.c_str(), len) != len) {
        GTEST_LOG_(WARNING)
            << "stream_result_to: failed to stream to "
            << host_name_ << ":" << port_num_;
      }
    }

   private:
    // Creates a client socket and connects to the server.
    void MakeConnection();

    // Closes the socket.
    void CloseConnection() {
      GTEST_CHECK_(sockfd_ != -1)
          << "CloseConnection() can be called only when there is a connection.";

      close(sockfd_);
      sockfd_ = -1;
    }

    int sockfd_;  // socket file descriptor
    const string host_name_;
    const string port_num_;

    GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter);
  };  // class SocketWriter

  // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
  static string UrlEncode(const char* str);

  StreamingListener(const string& host, const string& port)
    : socket_writer_(new SocketWriter(host, port)) {
    Start();
  }

  explicit StreamingListener(AbstractSocketWriter* socket_writer)
    : socket_writer_(socket_writer) {
    Start();
  }

  void OnTestProgramStart(const UnitTest& /* unit_test */) {
    SendLn("event=TestProgramStart");
  }

  void OnTestProgramEnd(const UnitTest& unit_test) {
    // Note that Google Test current only report elapsed time for each
    // test iteration, not for the entire test program.
    SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));

    // Notify the streaming server to stop.
    socket_writer_->CloseConnection();
  }

  void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
    SendLn("event=TestIterationStart&iteration=" +
           StreamableToString(iteration));
  }

  void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
    SendLn("event=TestIterationEnd&passed=" +
           FormatBool(unit_test.Passed()) + "&elapsed_time=" +
           StreamableToString(unit_test.elapsed_time()) + "ms");
  }

  void OnTestCaseStart(const TestCase& test_case) {
    SendLn(std::string("event=TestCaseStart&name=") + test_case.name());
  }

  void OnTestCaseEnd(const TestCase& test_case) {
    SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed())
           + "&elapsed_time=" + StreamableToString(test_case.elapsed_time())
           + "ms");
  }

  void OnTestStart(const TestInfo& test_info) {
    SendLn(std::string("event=TestStart&name=") + test_info.name());
  }

  void OnTestEnd(const TestInfo& test_info) {
    SendLn("event=TestEnd&passed=" +
           FormatBool((test_info.result())->Passed()) +
           "&elapsed_time=" +
           StreamableToString((test_info.result())->elapsed_time()) + "ms");
  }

  void OnTestPartResult(const TestPartResult& test_part_result) {
    const char* file_name = test_part_result.file_name();

    if (file_name == NULL) {
      file_name = "";
    }

    SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
           "&line=" + StreamableToString(test_part_result.line_number()) +
           "&message=" + UrlEncode(test_part_result.message()));
  }

 private:
  // Sends the given message and a newline to the socket.
  void SendLn(const string& message) {
    socket_writer_->SendLn(message);
  }

  // Called at the start of streaming to notify the receiver what
  // protocol we are using.
  void Start() {
    SendLn("gtest_streaming_protocol_version=1.0");
  }

  string FormatBool(bool value) {
    return value ? "1" : "0";
  }

  const scoped_ptr<AbstractSocketWriter> socket_writer_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
};  // class StreamingListener

#endif  // GTEST_CAN_STREAM_RESULTS_

}  // namespace internal
}  // namespace testing

#endif  // GTEST_SRC_GTEST_INTERNAL_INL_H_


================================================
FILE: rocrtst/gtest/src/gtest-port.cpp
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

#include "gtest/internal/gtest-port.h"

#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#if GTEST_OS_WINDOWS_MOBILE
# include <windows.h>  // For TerminateProcess()
#elif GTEST_OS_WINDOWS
# include <io.h>
# include <sys/stat.h>
#else
# include <unistd.h>
#endif  // GTEST_OS_WINDOWS_MOBILE

#if GTEST_OS_MAC
# include <mach/mach_init.h>
# include <mach/task.h>
# include <mach/vm_map.h>
#endif  // GTEST_OS_MAC

#if GTEST_OS_QNX
# include <devctl.h>
# include <sys/procfs.h>
#endif  // GTEST_OS_QNX

#include "gtest/gtest-spi.h"
#include "gtest/gtest-message.h"
#include "gtest/internal/gtest-internal.h"
#include "gtest/internal/gtest-string.h"

// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION_

namespace testing {
namespace internal {

#if defined(_MSC_VER) || defined(__BORLANDC__)
// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
const int kStdOutFileno = 1;
const int kStdErrFileno = 2;
#else
const int kStdOutFileno = STDOUT_FILENO;
const int kStdErrFileno = STDERR_FILENO;
#endif  // _MSC_VER

#if GTEST_OS_MAC

// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
size_t GetThreadCount() {
  const task_t task = mach_task_self();
  mach_msg_type_number_t thread_count;
  thread_act_array_t thread_list;
  const kern_return_t status = task_threads(task, &thread_list, &thread_count);

  if (status == KERN_SUCCESS) {
    // task_threads allocates resources in thread_list and we need to free them
    // to avoid leaks.
    vm_deallocate(task,
                  reinterpret_cast<vm_address_t>(thread_list),
                  sizeof(thread_t) * thread_count);
    return static_cast<size_t>(thread_count);
  }
  else {
    return 0;
  }
}

#elif GTEST_OS_QNX

// Returns the number of threads running in the process, or 0 to indicate that
// we cannot detect it.
size_t GetThreadCount() {
  const int fd = open("/proc/self/as", O_RDONLY);

  if (fd < 0) {
    return 0;
  }

  procfs_info process_info;
  const int status =
    devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL);
  close(fd);

  if (status == EOK) {
    return static_cast<size_t>(process_info.num_threads);
  }
  else {
    return 0;
  }
}

#else

size_t GetThreadCount() {
  // There's no portable way to detect the number of threads, so we just
  // return 0 to indicate that we cannot detect it.
  return 0;
}

#endif  // GTEST_OS_MAC

#if GTEST_USES_POSIX_RE

// Implements RE.  Currently only needed for death tests.

RE::~RE() {
  if (is_valid_) {
    // regfree'ing an invalid regex might crash because the content
    // of the regex is undefined. Since the regex's are essentially
    // the same, one cannot be valid (or invalid) without the other
    // being so too.
    regfree(&partial_regex_);
    regfree(&full_regex_);
  }

  free(const_cast<char*>(pattern_));
}

// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
  if (!re.is_valid_) {
    return false;
  }

  regmatch_t match;
  return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
}

// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
  if (!re.is_valid_) {
    return false;
  }

  regmatch_t match;
  return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
}

// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
  pattern_ = posix::StrDup(regex);

  // Reserves enough bytes to hold the regular expression used for a
  // full match.
  const size_t full_regex_len = strlen(regex) + 10;
  char* const full_pattern = new char[full_regex_len];

  snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
  is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;

  // We want to call regcomp(&partial_regex_, ...) even if the
  // previous expression returns false.  Otherwise partial_regex_ may
  // not be properly initialized can may cause trouble when it's
  // freed.
  //
  // Some implementation of POSIX regex (e.g. on at least some
  // versions of Cygwin) doesn't accept the empty string as a valid
  // regex.  We change it to an equivalent form "()" to be safe.
  if (is_valid_) {
    const char* const partial_regex = (*regex == '\0') ? "()" : regex;
    is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
  }

  EXPECT_TRUE(is_valid_)
      << "Regular expression \"" << regex
      << "\" is not a valid POSIX Extended regular expression.";

  delete[] full_pattern;
}

#elif GTEST_USES_SIMPLE_RE

// Returns true iff ch appears anywhere in str (excluding the
// terminating '\0' character).
bool IsInSet(char ch, const char* str) {
  return ch != '\0' && strchr(str, ch) != NULL;
}

// Returns true iff ch belongs to the given classification.  Unlike
// similar functions in <ctype.h>, these aren't affected by the
// current locale.
bool IsAsciiDigit(char ch) {
  return '0' <= ch && ch <= '9';
}
bool IsAsciiPunct(char ch) {
  return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
}
bool IsRepeat(char ch) {
  return IsInSet(ch, "?*+");
}
bool IsAsciiWhiteSpace(char ch) {
  return IsInSet(ch, " \f\n\r\t\v");
}
bool IsAsciiWordChar(char ch) {
  return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
         ('0' <= ch && ch <= '9') || ch == '_';
}

// Returns true iff "\\c" is a supported escape sequence.
bool IsValidEscape(char c) {
  return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
}

// Returns true iff the given atom (specified by escaped and pattern)
// matches ch.  The result is undefined if the atom is invalid.
bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
  if (escaped) {  // "\\p" where p is pattern_char.
    switch (pattern_char) {
      case 'd':
        return IsAsciiDigit(ch);

      case 'D':
        return !IsAsciiDigit(ch);

      case 'f':
        return ch == '\f';

      case 'n':
        return ch == '\n';

      case 'r':
        return ch == '\r';

      case 's':
        return IsAsciiWhiteSpace(ch);

      case 'S':
        return !IsAsciiWhiteSpace(ch);

      case 't':
        return ch == '\t';

      case 'v':
        return ch == '\v';

      case 'w':
        return IsAsciiWordChar(ch);

      case 'W':
        return !IsAsciiWordChar(ch);
    }

    return IsAsciiPunct(pattern_char) && pattern_char == ch;
  }

  return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
}

// Helper function used by ValidateRegex() to format error messages.
std::string FormatRegexSyntaxError(const char* regex, int index) {
  return (Message() << "Syntax error at index " << index
          << " in simple regular expression \"" << regex << "\": ").GetString();
}

// Generates non-fatal failures and returns false if regex is invalid;
// otherwise returns true.
bool ValidateRegex(const char* regex) {
  if (regex == NULL) {
    // TODO(wan@google.com): fix the source file location in the
    // assertion failures to match where the regex is used in user
    // code.
    ADD_FAILURE() << "NULL is not a valid simple regular expression.";
    return false;
  }

  bool is_valid = true;

  // True iff ?, *, or + can follow the previous atom.
  bool prev_repeatable = false;

  for (int i = 0; regex[i]; i++) {
    if (regex[i] == '\\') {  // An escape sequence
      i++;

      if (regex[i] == '\0') {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
                      << "'\\' cannot appear at the end.";
        return false;
      }

      if (!IsValidEscape(regex[i])) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
                      << "invalid escape sequence \"\\" << regex[i] << "\".";
        is_valid = false;
      }

      prev_repeatable = true;
    }
    else {    // Not an escape sequence.
      const char ch = regex[i];

      if (ch == '^' && i > 0) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'^' can only appear at the beginning.";
        is_valid = false;
      }
      else if (ch == '$' && regex[i + 1] != '\0') {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'$' can only appear at the end.";
        is_valid = false;
      }
      else if (IsInSet(ch, "()[]{}|")) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'" << ch << "' is unsupported.";
        is_valid = false;
      }
      else if (IsRepeat(ch) && !prev_repeatable) {
        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
                      << "'" << ch << "' can only follow a repeatable token.";
        is_valid = false;
      }

      prev_repeatable = !IsInSet(ch, "^$?*+");
    }
  }

  return is_valid;
}

// Matches a repeated regex atom followed by a valid simple regular
// expression.  The regex atom is defined as c if escaped is false,
// or \c otherwise.  repeat is the repetition meta character (?, *,
// or +).  The behavior is undefined if str contains too many
// characters to be indexable by size_t, in which case the test will
// probably time out anyway.  We are fine with this limitation as
// std::string has it too.
bool MatchRepetitionAndRegexAtHead(
  bool escaped, char c, char repeat, const char* regex,
  const char* str) {
  const size_t min_count = (repeat == '+') ? 1 : 0;
  const size_t max_count = (repeat == '?') ? 1 :
                           static_cast<size_t>(-1) - 1;
  // We cannot call numeric_limits::max() as it conflicts with the
  // max() macro on Windows.

  for (size_t i = 0; i <= max_count; ++i) {
    // We know that the atom matches each of the first i characters in str.
    if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
      // We have enough matches at the head, and the tail matches too.
      // Since we only care about *whether* the pattern matches str
      // (as opposed to *how* it matches), there is no need to find a
      // greedy match.
      return true;
    }

    if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) {
      return false;
    }
  }

  return false;
}

// Returns true iff regex matches a prefix of str.  regex must be a
// valid simple regular expression and not start with "^", or the
// result is undefined.
bool MatchRegexAtHead(const char* regex, const char* str) {
  if (*regex == '\0') { // An empty regex matches a prefix of anything.
    return true;
  }

  // "$" only matches the end of a string.  Note that regex being
  // valid guarantees that there's nothing after "$" in it.
  if (*regex == '$') {
    return *str == '\0';
  }

  // Is the first thing in regex an escape sequence?
  const bool escaped = *regex == '\\';

  if (escaped) {
    ++regex;
  }

  if (IsRepeat(regex[1])) {
    // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
    // here's an indirect recursion.  It terminates as the regex gets
    // shorter in each recursion.
    return MatchRepetitionAndRegexAtHead(
             escaped, regex[0], regex[1], regex + 2, str);
  }
  else {
    // regex isn't empty, isn't "$", and doesn't start with a
    // repetition.  We match the first atom of regex with the first
    // character of str and recurse.
    return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
           MatchRegexAtHead(regex + 1, str + 1);
  }
}

// Returns true iff regex matches any substring of str.  regex must be
// a valid simple regular expression, or the result is undefined.
//
// The algorithm is recursive, but the recursion depth doesn't exceed
// the regex length, so we won't need to worry about running out of
// stack space normally.  In rare cases the time complexity can be
// exponential with respect to the regex length + the string length,
// but usually it's must faster (often close to linear).
bool MatchRegexAnywhere(const char* regex, const char* str) {
  if (regex == NULL || str == NULL) {
    return false;
  }

  if (*regex == '^') {
    return MatchRegexAtHead(regex + 1, str);
  }

  // A successful match can be anywhere in str.
  do {
    if (MatchRegexAtHead(regex, str)) {
      return true;
    }
  }
  while (*str++ != '\0');

  return false;
}

// Implements the RE class.

RE::~RE() {
  free(const_cast<char*>(pattern_));
  free(const_cast<char*>(full_pattern_));
}

// Returns true iff regular expression re matches the entire str.
bool RE::FullMatch(const char* str, const RE& re) {
  return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
}

// Returns true iff regular expression re matches a substring of str
// (including str itself).
bool RE::PartialMatch(const char* str, const RE& re) {
  return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
}

// Initializes an RE from its string representation.
void RE::Init(const char* regex) {
  pattern_ = full_pattern_ = NULL;

  if (regex != NULL) {
    pattern_ = posix::StrDup(regex);
  }

  is_valid_ = ValidateRegex(regex);

  if (!is_valid_) {
    // No need to calculate the full pattern when the regex is invalid.
    return;
  }

  const size_t len = strlen(regex);
  // Reserves enough bytes to hold the regular expression used for a
  // full match: we need space to prepend a '^', append a '$', and
  // terminate the string with '\0'.
  char* buffer = static_cast<char*>(malloc(len + 3));
  full_pattern_ = buffer;

  if (*regex != '^') {
    *buffer++ = '^';  // Makes sure full_pattern_ starts with '^'.
  }

  // We don't use snprintf or strncpy, as they trigger a warning when
  // compiled with VC++ 8.0.
  memcpy(buffer, regex, len);
  buffer += len;

  if (len == 0 || regex[len - 1] != '$') {
    *buffer++ = '$';  // Makes sure full_pattern_ ends with '$'.
  }

  *buffer = '\0';
}

#endif  // GTEST_USES_POSIX_RE

const char kUnknownFile[] = "unknown file";

// Formats a source file path and a line number as they would appear
// in an error message from the compiler used to compile this code.
GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
  const std::string file_name(file == NULL ? kUnknownFile : file);

  if (line < 0) {
    return file_name + ":";
  }

#ifdef _MSC_VER
  return file_name + "(" + StreamableToString(line) + "):";
#else
  return file_name + ":" + StreamableToString(line) + ":";
#endif  // _MSC_VER
}

// Formats a file location for compiler-independent XML output.
// Although this function is not platform dependent, we put it next to
// FormatFileLocation in order to contrast the two functions.
// Note that FormatCompilerIndependentFileLocation() does NOT append colon
// to the file location it produces, unlike FormatFileLocation().
GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
  const char* file, int line) {
  const std::string file_name(file == NULL ? kUnknownFile : file);

  if (line < 0) {
    return file_name;
  }
  else {
    return file_name + ":" + StreamableToString(line);
  }
}


GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
  : severity_(severity) {
  const char* const marker =
    severity == GTEST_INFO ?    "[  INFO ]" :
    severity == GTEST_WARNING ? "[WARNING]" :
    severity == GTEST_ERROR ?   "[ ERROR ]" : "[ FATAL ]";
  GetStream() << ::std::endl << marker << " "
              << FormatFileLocation(file, line).c_str() << ": ";
}

// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
GTestLog::~GTestLog() {
  GetStream() << ::std::endl;

  if (severity_ == GTEST_FATAL) {
    fflush(stderr);
    posix::Abort();
  }
}
// Disable Microsoft deprecation warnings for POSIX functions called from
// this class (creat, dup, dup2, and close)
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4996)
#endif  // _MSC_VER

#if GTEST_HAS_STREAM_REDIRECTION

// Object that captures an output stream (stdout/stderr).
class CapturedStream {
 public:
  // The ctor redirects the stream to a temporary file.
  explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
# if GTEST_OS_WINDOWS
    char temp_dir_path[MAX_PATH + 1] = { '\0' };  // NOLINT
    char temp_file_path[MAX_PATH + 1] = { '\0' };  // NOLINT

    ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
    const UINT success = ::GetTempFileNameA(temp_dir_path,
                                            "gtest_redir",
                                            0,  // Generate unique file name.
                                            temp_file_path);
    GTEST_CHECK_(success != 0)
        << "Unable to create a temporary file in " << temp_dir_path;
    const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
    GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
                                    << temp_file_path;
    filename_ = temp_file_path;
# else
    // There's no guarantee that a test has write access to the current
    // directory, so we create the temporary file in the /tmp directory
    // instead. We use /tmp on most systems, and /sdcard on Android.
    // That's because Android doesn't have /tmp.
#  if GTEST_OS_LINUX_ANDROID
    // Note: Android applications are expected to call the framework's
    // Context.getExternalStorageDirectory() method through JNI to get
    // the location of the world-writable SD Card directory. However,
    // this requires a Context handle, which cannot be retrieved
    // globally from native code. Doing so also precludes running the
    // code as part of a regular standalone executable, which doesn't
    // run in a Dalvik process (e.g. when running it through 'adb shell').
    //
    // The location /sdcard is directly accessible from native code
    // and is the only location (unofficially) supported by the Android
    // team. It's generally a symlink to the real SD Card mount point
    // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
    // other OEM-customized locations. Never rely on these, and always
    // use /sdcard.
    char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
#  else
    char name_template[] = "/tmp/captured_stream.XXXXXX";
#  endif  // GTEST_OS_LINUX_ANDROID
    const int captured_fd = mkstemp(name_template);
    filename_ = name_template;
# endif  // GTEST_OS_WINDOWS
    fflush(NULL);
    dup2(captured_fd, fd_);
    close(captured_fd);
  }

  ~CapturedStream() {
    remove(filename_.c_str());
  }

  std::string GetCapturedString() {
    if (uncaptured_fd_ != -1) {
      // Restores the original stream.
      fflush(NULL);
      dup2(uncaptured_fd_, fd_);
      close(uncaptured_fd_);
      uncaptured_fd_ = -1;
    }

    FILE* const file = posix::FOpen(filename_.c_str(), "r");
    const std::string content = ReadEntireFile(file);
    posix::FClose(file);
    return content;
  }

 private:
  // Reads the entire content of a file as an std::string.
  static std::string ReadEntireFile(FILE* file);

  // Returns the size (in bytes) of a file.
  static size_t GetFileSize(FILE* file);

  const int fd_;  // A stream to capture.
  int uncaptured_fd_;
  // Name of the temporary file holding the stderr output.
  ::std::string filename_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
};

// Returns the size (in bytes) of a file.
size_t CapturedStream::GetFileSize(FILE* file) {
  fseek(file, 0, SEEK_END);
  return static_cast<size_t>(ftell(file));
}

// Reads the entire content of a file as a string.
std::string CapturedStream::ReadEntireFile(FILE* file) {
  const size_t file_size = GetFileSize(file);
  char* const buffer = new char[file_size];

  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
  size_t bytes_read = 0;       // # of bytes read so far

  fseek(file, 0, SEEK_SET);

  // Keeps reading the file until we cannot read further or the
  // pre-determined file size is reached.
  do {
    bytes_last_read = fread(buffer + bytes_read, 1, file_size - bytes_read, file);
    bytes_read += bytes_last_read;
  }
  while (bytes_last_read > 0 && bytes_read < file_size);

  const std::string content(buffer, bytes_read);
  delete[] buffer;

  return content;
}

# ifdef _MSC_VER
#  pragma warning(pop)
# endif  // _MSC_VER

static CapturedStream* g_captured_stderr = NULL;
static CapturedStream* g_captured_stdout = NULL;

// Starts capturing an output stream (stdout/stderr).
void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
  if (*stream != NULL) {
    GTEST_LOG_(FATAL) << "Only one " << stream_name
                      << " capturer can exist at a time.";
  }

  *stream = new CapturedStream(fd);
}

// Stops capturing the output stream and returns the captured string.
std::string GetCapturedStream(CapturedStream** captured_stream) {
  const std::string content = (*captured_stream)->GetCapturedString();

  delete *captured_stream;
  *captured_stream = NULL;

  return content;
}

// Starts capturing stdout.
void CaptureStdout() {
  CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
}

// Starts capturing stderr.
void CaptureStderr() {
  CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
}

// Stops capturing stdout and returns the captured string.
std::string GetCapturedStdout() {
  return GetCapturedStream(&g_captured_stdout);
}

// Stops capturing stderr and returns the captured string.
std::string GetCapturedStderr() {
  return GetCapturedStream(&g_captured_stderr);
}

#endif  // GTEST_HAS_STREAM_REDIRECTION

#if GTEST_HAS_DEATH_TEST

// A copy of all command line arguments.  Set by InitGoogleTest().
::std::vector<testing::internal::string> g_argvs;

static const ::std::vector<testing::internal::string>* g_injected_test_argvs =
  NULL;  // Owned.

void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) {
  if (g_injected_test_argvs != argvs) {
    delete g_injected_test_argvs;
  }

  g_injected_test_argvs = argvs;
}

const ::std::vector<testing::internal::string>& GetInjectableArgvs() {
  if (g_injected_test_argvs != NULL) {
    return *g_injected_test_argvs;
  }

  return g_argvs;
}
#endif  // GTEST_HAS_DEATH_TEST

#if GTEST_OS_WINDOWS_MOBILE
namespace posix {
void Abort() {
  DebugBreak();
  TerminateProcess(GetCurrentProcess(), 1);
}
}  // namespace posix
#endif  // GTEST_OS_WINDOWS_MOBILE

// Returns the name of the environment variable corresponding to the
// given flag.  For example, FlagToEnvVar("foo") will return
// "GTEST_FOO" in the open-source version.
static std::string FlagToEnvVar(const char* flag) {
  const std::string full_flag =
    (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();

  Message env_var;

  for (size_t i = 0; i != full_flag.length(); i++) {
    env_var << ToUpper(full_flag.c_str()[i]);
  }

  return env_var.GetString();
}

// Parses 'str' for a 32-bit signed integer.  If successful, writes
// the result to *value and returns true; otherwise leaves *value
// unchanged and returns false.
bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
  // Parses the environment variable as a decimal integer.
  char* end = NULL;
  const long long_value = strtol(str, &end, 10);  // NOLINT

  // Has strtol() consumed all characters in the string?
  if (*end != '\0') {
    // No - an invalid character was encountered.
    Message msg;
    msg << "WARNING: " << src_text
        << " is expected to be a 32-bit integer, but actually"
        << " has value \"" << str << "\".\n";
    printf("%s", msg.GetString().c_str());
    fflush(stdout);
    return false;
  }

  // Is the parsed value in the range of an Int32?
  const Int32 result = static_cast<Int32>(long_value);

  if (long_value == LONG_MAX || long_value == LONG_MIN ||
      // The parsed value overflows as a long.  (strtol() returns
      // LONG_MAX or LONG_MIN when the input overflows.)
      result != long_value
      // The parsed value overflows as an Int32.
     ) {
    Message msg;
    msg << "WARNING: " << src_text
        << " is expected to be a 32-bit integer, but actually"
        << " has value " << str << ", which overflows.\n";
    printf("%s", msg.GetString().c_str());
    fflush(stdout);
    return false;
  }

  *value = result;
  return true;
}

// Reads and returns the Boolean environment variable corresponding to
// the given flag; if it's not set, returns default_value.
//
// The value is considered true iff it's not "0".
bool BoolFromGTestEnv(const char* flag, bool default_value) {
  const std::string env_var = FlagToEnvVar(flag);
  const char* const string_value = posix::GetEnv(env_var.c_str());
  return string_value == NULL ?
         default_value : strcmp(string_value, "0") != 0;
}

// Reads and returns a 32-bit integer stored in the environment
// variable corresponding to the given flag; if it isn't set or
// doesn't represent a valid 32-bit integer, returns default_value.
Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
  const std::string env_var = FlagToEnvVar(flag);
  const char* const string_value = posix::GetEnv(env_var.c_str());

  if (string_value == NULL) {
    // The environment variable is not set.
    return default_value;
  }

  Int32 result = default_value;

  if (!ParseInt32(Message() << "Environment variable " << env_var,
                  string_value, &result)) {
    printf("The default value %s is used.\n",
           (Message() << default_value).GetString().c_str());
    fflush(stdout);
    return default_value;
  }

  return result;
}

// Reads and returns the string environment variable corresponding to
// the given flag; if it's not set, returns default_value.
const char* StringFromGTestEnv(const char* flag, const char* default_value) {
  const std::string env_var = FlagToEnvVar(flag);
  const char* const value = posix::GetEnv(env_var.c_str());
  return value == NULL ? default_value : value;
}

}  // namespace internal
}  // namespace testing


================================================
FILE: rocrtst/gtest/src/gtest-printers.cpp
================================================
// Copyright 2007, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

// Google Test - The Google C++ Testing Framework
//
// This file implements a universal value printer that can print a
// value of any type T:
//
//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
//
// It uses the << operator when possible, and prints the bytes in the
// object otherwise.  A user can override its behavior for a class
// type Foo by defining either operator<<(::std::ostream&, const Foo&)
// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
// defines Foo.

#include "gtest/gtest-printers.h"
#include <ctype.h>
#include <stdio.h>
#include <ostream>  // NOLINT
#include <string>
#include "gtest/internal/gtest-port.h"

namespace testing {

namespace {

using ::std::ostream;

// Prints a segment of bytes in the given object.
void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
                                size_t count, ostream* os) {
  char text[5] = "";

  for (size_t i = 0; i != count; i++) {
    const size_t j = start + i;

    if (i != 0) {
      // Organizes the bytes into groups of 2 for easy parsing by
      // human.
      if ((j % 2) == 0) {
        *os << ' ';
      }
      else {
        *os << '-';
      }
    }

    GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
    *os << text;
  }
}

// Prints the bytes in the given value to the given ostream.
void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
                              ostream* os) {
  // Tells the user how big the object is.
  *os << count << "-byte object <";

  const size_t kThreshold = 132;
  const size_t kChunkSize = 64;

  // If the object size is bigger than kThreshold, we'll have to omit
  // some details by printing only the first and the last kChunkSize
  // bytes.
  // TODO(wan): let the user control the threshold using a flag.
  if (count < kThreshold) {
    PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
  }
  else {
    PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
    *os << " ... ";
    // Rounds up to 2-byte boundary.
    const size_t resume_pos = (count - kChunkSize + 1) / 2 * 2;
    PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
  }

  *os << ">";
}

}  // namespace

namespace internal2 {

// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
// given object.  The delegation simplifies the implementation, which
// uses the << operator and thus is easier done outside of the
// ::testing::internal namespace, which contains a << operator that
// sometimes conflicts with the one in STL.
void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
                          ostream* os) {
  PrintBytesInObjectToImpl(obj_bytes, count, os);
}

}  // namespace internal2

namespace internal {

// Depending on the value of a char (or wchar_t), we print it in one
// of three formats:
//   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
//   - as a hexidecimal escape sequence (e.g. '\x7F'), or
//   - as a special escape sequence (e.g. '\r', '\n').
enum CharFormat {
  kAsIs,
  kHexEscape,
  kSpecialEscape
};

// Returns true if c is a printable ASCII character.  We test the
// value of c directly instead of calling isprint(), which is buggy on
// Windows Mobile.
inline bool IsPrintableAscii(wchar_t c) {
  return 0x20 <= c && c <= 0x7E;
}

// Prints a wide or narrow char c as a character literal without the
// quotes, escaping it when necessary; returns how c was formatted.
// The template argument UnsignedChar is the unsigned version of Char,
// which is the type of c.
template <typename UnsignedChar, typename Char>
static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
  switch (static_cast<wchar_t>(c)) {
    case L'\0':
      *os << "\\0";
      break;

    case L'\'':
      *os << "\\'";
      break;

    case L'\\':
      *os << "\\\\";
      break;

    case L'\a':
      *os << "\\a";
      break;

    case L'\b':
      *os << "\\b";
      break;

    case L'\f':
      *os << "\\f";
      break;

    case L'\n':
      *os << "\\n";
      break;

    case L'\r':
      *os << "\\r";
      break;

    case L'\t':
      *os << "\\t";
      break;

    case L'\v':
      *os << "\\v";
      break;

    default:
      if (IsPrintableAscii(c)) {
        *os << static_cast<char>(c);
        return kAsIs;
      }
      else {
        *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
        return kHexEscape;
      }
  }

  return kSpecialEscape;
}

// Prints a wchar_t c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
  switch (c) {
    case L'\'':
      *os << "'";
      return kAsIs;

    case L'"':
      *os << "\\\"";
      return kSpecialEscape;

    default:
      return PrintAsCharLiteralTo<wchar_t>(c, os);
  }
}

// Prints a char c as if it's part of a string literal, escaping it when
// necessary; returns how c was formatted.
static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
  return PrintAsStringLiteralTo(
           static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
}

// Prints a wide or narrow character c and its code.  '\0' is printed
// as "'\\0'", other unprintable characters are also properly escaped
// using the standard C++ escape sequence.  The template argument
// UnsignedChar is the unsigned version of Char, which is the type of c.
template <typename UnsignedChar, typename Char>
void PrintCharAndCodeTo(Char c, ostream* os) {
  // First, print c as a literal in the most readable form we can find.
  *os << ((sizeof(c) > 1) ? "L'" : "'");
  const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
  *os << "'";

  // To aid user debugging, we also print c's code in decimal, unless
  // it's 0 (in which case c was printed as '\\0', making the code
  // obvious).
  if (c == 0) {
    return;
  }

  *os << " (" << static_cast<int>(c);

  // For more convenience, we print c's code again in hexidecimal,
  // unless c was already printed in the form '\x##' or the code is in
  // [1, 9].
  if (format == kHexEscape || (1 <= c && c <= 9)) {
    // Do nothing.
  }
  else {
    *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
  }

  *os << ")";
}

void PrintTo(unsigned char c, ::std::ostream* os) {
  PrintCharAndCodeTo<unsigned char>(c, os);
}
void PrintTo(signed char c, ::std::ostream* os) {
  PrintCharAndCodeTo<unsigned char>(c, os);
}

// Prints a wchar_t as a symbol if it is printable or as its internal
// code otherwise and also as its code.  L'\0' is printed as "L'\\0'".
void PrintTo(wchar_t wc, ostream* os) {
  PrintCharAndCodeTo<wchar_t>(wc, os);
}

// Prints the given array of characters to the ostream.  CharType must be either
// char or wchar_t.
// The array starts at begin, the length is len, it may include '\0' characters
// and may not be NUL-terminated.
template <typename CharType>
static void PrintCharsAsStringTo(
  const CharType* begin, size_t len, ostream* os) {
  const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
  *os << kQuoteBegin;
  bool is_previous_hex = false;

  for (size_t index = 0; index < len; ++index) {
    const CharType cur = begin[index];

    if (is_previous_hex && IsXDigit(cur)) {
      // Previous character is of '\x..' form and this character can be
      // interpreted as another hexadecimal digit in its number. Break string to
      // disambiguate.
      *os << "\" " << kQuoteBegin;
    }

    is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
  }

  *os << "\"";
}

// Prints a (const) char/wchar_t array of 'len' elements, starting at address
// 'begin'.  CharType must be either char or wchar_t.
template <typename CharType>
static void UniversalPrintCharArray(
  const CharType* begin, size_t len, ostream* os) {
  // The code
  //   const char kFoo[] = "foo";
  // generates an array of 4, not 3, elements, with the last one being '\0'.
  //
  // Therefore when printing a char array, we don't print the last element if
  // it's '\0', such that the output matches the string literal as it's
  // written in the source code.
  if (len > 0 && begin[len - 1] == '\0') {
    PrintCharsAsStringTo(begin, len - 1, os);
    return;
  }

  // If, however, the last element in the array is not '\0', e.g.
  //    const char kFoo[] = { 'f', 'o', 'o' };
  // we must print the entire array.  We also print a message to indicate
  // that the array is not NUL-terminated.
  PrintCharsAsStringTo(begin, len, os);
  *os << " (no terminating NUL)";
}

// Prints a (const) char array of 'len' elements, starting at address 'begin'.
void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
  UniversalPrintCharArray(begin, len, os);
}

// Prints a (const) wchar_t array of 'len' elements, starting at address
// 'begin'.
void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
  UniversalPrintCharArray(begin, len, os);
}

// Prints the given C string to the ostream.
void PrintTo(const char* s, ostream* os) {
  if (s == NULL) {
    *os << "NULL";
  }
  else {
    *os << ImplicitCast_<const void*>(s) << " pointing to ";
    PrintCharsAsStringTo(s, strlen(s), os);
  }
}

// MSVC compiler can be configured to define whar_t as a typedef
// of unsigned short. Defining an overload for const wchar_t* in that case
// would cause pointers to unsigned shorts be printed as wide strings,
// possibly accessing more memory than intended and causing invalid
// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
// wchar_t is implemented as a native type.
#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
// Prints the given wide C string to the ostream.
void PrintTo(const wchar_t* s, ostream* os) {
  if (s == NULL) {
    *os << "NULL";
  }
  else {
    *os << ImplicitCast_<const void*>(s) << " pointing to ";
    PrintCharsAsStringTo(s, wcslen(s), os);
  }
}
#endif  // wchar_t is native

// Prints a ::string object.
#if GTEST_HAS_GLOBAL_STRING
void PrintStringTo(const ::string& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_GLOBAL_STRING

void PrintStringTo(const ::std::string& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}

// Prints a ::wstring object.
#if GTEST_HAS_GLOBAL_WSTRING
void PrintWideStringTo(const ::wstring& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

#if GTEST_HAS_STD_WSTRING
void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
  PrintCharsAsStringTo(s.data(), s.size(), os);
}
#endif  // GTEST_HAS_STD_WSTRING

}  // namespace internal

}  // namespace testing


================================================
FILE: rocrtst/gtest/src/gtest-test-part.cpp
================================================
// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: mheule@google.com (Markus Heule)
//
// The Google C++ Testing Framework (Google Test)

#include "gtest/gtest-test-part.h"

// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION_

namespace testing {

using internal::GetUnitTestImpl;

// Gets the summary of the failure message by omitting the stack trace
// in it.
std::string TestPartResult::ExtractSummary(const char* message) {
  const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
  return stack_trace == NULL ? message :
         std::string(message, stack_trace);
}

// Prints a TestPartResult object.
std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
  return os
         << result.file_name() << ":" << result.line_number() << ": "
         << (result.type() == TestPartResult::kSuccess ? "Success" :
             result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
             "Non-fatal failure") << ":\n"
         << result.message() << std::endl;
}

// Appends a TestPartResult to the array.
void TestPartResultArray::Append(const TestPartResult& result) {
  array_.push_back(result);
}

// Returns the TestPartResult at the given index (0-based).
const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
  if (index < 0 || index >= size()) {
    printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
    internal::posix::Abort();
  }

  return array_[index];
}

// Returns the number of TestPartResult objects in the array.
int TestPartResultArray::size() const {
  return static_cast<int>(array_.size());
}

namespace internal {

HasNewFatalFailureHelper::HasNewFatalFailureHelper()
  : has_new_fatal_failure_(false),
    original_reporter_(GetUnitTestImpl()->
                       GetTestPartResultReporterForCurrentThread()) {
  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
}

HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
    original_reporter_);
}

void HasNewFatalFailureHelper::ReportTestPartResult(
  const TestPartResult& result) {
  if (result.fatally_failed()) {
    has_new_fatal_failure_ = true;
  }

  original_reporter_->ReportTestPartResult(result);
}

}  // namespace internal

}  // namespace testing


================================================
FILE: rocrtst/gtest/src/gtest-typed-test.cpp
================================================
// Copyright 2008 Google Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)

#include "gtest/gtest-typed-test.h"
#include "gtest/gtest.h"

namespace testing {
namespace internal {

#if GTEST_HAS_TYPED_TEST_P

// Skips to the first non-space char in str. Returns an empty string if str
// contains only whitespace characters.
static const char* SkipSpaces(const char* str) {
  while (IsSpace(*str)) {
    str++;
  }

  return str;
}

// Verifies that registered_tests match the test names in
// defined_test_names_; returns registered_tests if successful, or
// aborts the program otherwise.
const char* TypedTestCasePState::VerifyRegisteredTestNames(
  const char* file, int line, const char* registered_tests) {
  typedef ::std::set<const char*>::const_iterator DefinedTestIter;
  registered_ = true;

  // Skip initial whitespace in registered_tests since some
  // preprocessors prefix stringizied literals with whitespace.
  registered_tests = SkipSpaces(registered_tests);

  Message errors;
  ::std::set<std::string> tests;

  for (const char* names = registered_tests; names != NULL;
       names = SkipComma(names)) {
    const std::string name = GetPrefixUntilComma(names);

    if (tests.count(name) != 0) {
      errors << "Test " << name << " is listed more than once.\n";
      continue;
    }

    bool found = false;

    for (DefinedTestIter it = defined_test_names_.begin();
         it != defined_test_names_.end();
         ++it) {
      if (name == *it) {
        found = true;
        break;
      }
    }

    if (found) {
      tests.insert(name);
    }
    else {
      errors << "No test named " << name
             << " can be found in this test case.\n";
    }
  }

  for (DefinedTestIter it = defined_test_names_.begin();
       it != defined_test_names_.end();
       ++it) {
    if (tests.count(*it) == 0) {
      errors << "You forgot to list test " << *it << ".\n";
    }
  }

  const std::string& errors_str = errors.GetString();

  if (errors_str != "") {
    fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
            errors_str.c_str());
    fflush(stderr);
    posix::Abort();
  }

  return registered_tests;
}

#endif  // GTEST_HAS_TYPED_TEST_P

}  // namespace internal
}  // namespace testing


================================================
FILE: rocrtst/gtest/src/gtest.cpp
================================================
// Copyright 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// The Google C++ Testing Framework (Google Test)

#include "gtest/gtest.h"
#include "gtest/gtest-spi.h"

#include <ctype.h>
#include <math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <wchar.h>
#include <wctype.h>

#include <algorithm>
#include <iomanip>
#include <limits>
#include <ostream>  // NOLINT
#include <sstream>
#include <vector>

#if GTEST_OS_LINUX

// TODO(kenton@google.com): Use autoconf to detect availability of
// gettimeofday().
# define GTEST_HAS_GETTIMEOFDAY_ 1

# include <fcntl.h>  // NOLINT
# include <limits.h>  // NOLINT
# include <sched.h>  // NOLINT
// Declares vsnprintf().  This header is not available on Windows.
# include <strings.h>  // NOLINT
# include <sys/mman.h>  // NOLINT
# include <sys/time.h>  // NOLINT
# include <unistd.h>  // NOLINT
# include <string>

#elif GTEST_OS_SYMBIAN
# define GTEST_HAS_GETTIMEOFDAY_ 1
# include <sys/time.h>  // NOLINT

#elif GTEST_OS_ZOS
# define GTEST_HAS_GETTIMEOFDAY_ 1
# include <sys/time.h>  // NOLINT

// On z/OS we additionally need strings.h for strcasecmp.
# include <strings.h>  // NOLINT

#elif GTEST_OS_WINDOWS_MOBILE  // We are on Windows CE.

# include <windows.h>  // NOLINT

#elif GTEST_OS_WINDOWS  // We are on Windows proper.

# include <io.h>  // NOLINT
# include <sys/timeb.h>  // NOLINT
# include <sys/types.h>  // NOLINT
# include <sys/stat.h>  // NOLINT

# if GTEST_OS_WINDOWS_MINGW
// MinGW has gettimeofday() but not _ftime64().
// TODO(kenton@google.com): Use autoconf to detect availability of
//   gettimeofday().
// TODO(kenton@google.com): There are other ways to get the time on
//   Windows, like GetTickCount() or GetSystemTimeAsFileTime().  MinGW
//   supports these.  consider using them instead.
#  define GTEST_HAS_GETTIMEOFDAY_ 1
#  include <sys/time.h>  // NOLINT
# endif  // GTEST_OS_WINDOWS_MINGW

// cpplint thinks that the header is already included, so we want to
// silence it.
# include <windows.h>  // NOLINT

#else

// Assume other platforms have gettimeofday().
// TODO(kenton@google.com): Use autoconf to detect availability of
//   gettimeofday().
# define GTEST_HAS_GETTIMEOFDAY_ 1

// cpplint thinks that the header is already included, so we want to
// silence it.
# include <sys/time.h>  // NOLINT
# include <unistd.h>  // NOLINT

#endif  // GTEST_OS_LINUX

#if GTEST_HAS_EXCEPTIONS
# include <stdexcept>
#endif

#if GTEST_CAN_STREAM_RESULTS_
# include <arpa/inet.h>  // NOLINT
# include <netdb.h>  // NOLINT
#endif

// Indicates that this translation unit is part of Google Test's
// implementation.  It must come before gtest-internal-inl.h is
// included, or there will be a compiler error.  This trick is to
// prevent a user from accidentally including gtest-internal-inl.h in
// his code.
#define GTEST_IMPLEMENTATION_ 1
#include "src/gtest-internal-inl.h"
#undef GTEST_IMPLEMENTATION_

#if GTEST_OS_WINDOWS
# define vsnprintf _vsnprintf
#endif  // GTEST_OS_WINDOWS

namespace testing {

using internal::CountIf;
using internal::ForEach;
using internal::GetElementOr;
using internal::Shuffle;

// Constants.

// A test whose test case name or test name matches this filter is
// disabled and not run.
static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";

// A test case whose name matches this filter is considered a death
// test case and will be run before test cases whose name doesn't
// match this filter.
static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";

// A test filter that matches everything.
static const char kUniversalFilter[] = "*";

// The default output file for XML output.
static const char kDefaultOutputFile[] = "test_detail.xml";

// The environment variable name for the test shard index.
static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
// The environment variable name for the total number of test shards.
static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
// The environment variable name for the test shard status file.
static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";

namespace internal {

// The text used in failure messages to indicate the start of the
// stack trace.
const char kStackTraceMarker[] = "\nStack trace:\n";

// g_help_flag is true iff the --help flag or an equivalent form is
// specified on the command line.
bool g_help_flag = false;

}  // namespace internal

static const char* GetDefaultFilter() {
  return kUniversalFilter;
}

GTEST_DEFINE_bool_(
  also_run_disabled_tests,
  internal::BoolFromGTestEnv("also_run_disabled_tests", false),
  "Run disabled tests too, in addition to the tests normally being run.");

GTEST_DEFINE_bool_(
  break_on_failure,
  internal::BoolFromGTestEnv("break_on_failure", false),
  "True iff a failed assertion should be a debugger break-point.");

GTEST_DEFINE_bool_(
  catch_exceptions,
  internal::BoolFromGTestEnv("catch_exceptions", true),
  "True iff " GTEST_NAME_
  " should catch exceptions and treat them as test failures.");

GTEST_DEFINE_string_(
  color,
  internal::StringFromGTestEnv("color", "auto"),
  "Whether to use colors in the output.  Valid values: yes, no, "
  "and auto.  'auto' means to use colors if the output is "
  "being sent to a terminal and the TERM environment variable "
  "is set to a terminal type that supports colors.");

GTEST_DEFINE_string_(
  filter,
  internal::StringFromGTestEnv("filter", GetDefaultFilter()),
  "A colon-separated list of glob (not regex) patterns "
  "for filtering the tests to run, optionally followed by a "
  "'-' and a : separated list of negative patterns (tests to "
  "exclude).  A test is run if it matches one of the positive "
  "patterns and does not match any of the negative patterns.");

GTEST_DEFINE_bool_(list_tests, false,
                   "List all tests without running them.");

GTEST_DEFINE_string_(
  output,
  internal::StringFromGTestEnv("output", ""),
  "A format (currently must be \"xml\"), optionally followed "
  "by a colon and an output file name or directory. A directory "
  "is indicated by a trailing pathname separator. "
  "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
  "If a directory is specified, output files will be created "
  "within that directory, with file-names based on the test "
  "executable's name and, if necessary, made unique by adding "
  "digits.");

GTEST_DEFINE_bool_(
  print_time,
  internal::BoolFromGTestEnv("print_time", true),
  "True iff " GTEST_NAME_
  " should display elapsed time in text output.");

GTEST_DEFINE_int32_(
  random_seed,
  internal::Int32FromGTestEnv("random_seed", 0),
  "Random number seed to use when shuffling test orders.  Must be in range "
  "[1, 99999], or 0 to use a seed based on the current time.");

GTEST_DEFINE_int32_(
  repeat,
  internal::Int32FromGTestEnv("repeat", 1),
  "How many times to repeat each test.  Specify a negative number "
  "for repeating forever.  Useful for shaking out flaky tests.");

GTEST_DEFINE_bool_(
  show_internal_stack_frames, false,
  "True iff " GTEST_NAME_ " should include internal stack frames when "
  "printing test failure stack traces.");

GTEST_DEFINE_bool_(
  shuffle,
  internal::BoolFromGTestEnv("shuffle", false),
  "True iff " GTEST_NAME_
  " should randomize tests' order on every run.");

GTEST_DEFINE_int32_(
  stack_trace_depth,
  internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth),
  "The maximum number of stack frames to print when an "
  "assertion fails.  The valid range is 0 through 100, inclusive.");

GTEST_DEFINE_string_(
  stream_result_to,
  internal::StringFromGTestEnv("stream_result_to", ""),
  "This flag specifies the host name and the port number on which to stream "
  "test results. Example: \"localhost:555\". The flag is effective only on "
  "Linux.");

GTEST_DEFINE_bool_(
  throw_on_failure,
  internal::BoolFromGTestEnv("throw_on_failure", false),
  "When this flag is specified, a failed assertion will throw an exception "
  "if exceptions are enabled or exit the program with a non-zero code "
  "otherwise.");

namespace internal {

// Generates a random number from [0, range), using a Linear
// Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
// than kMaxRange.
UInt32 Random::Generate(UInt32 range) {
  // These constants are the same as are used in glibc's rand(3).
  state_ = (1103515245U * state_ + 12345U) % kMaxRange;

  GTEST_CHECK_(range > 0)
      << "Cannot generate a number in the range [0, 0).";
  GTEST_CHECK_(range <= kMaxRange)
      << "Generation of a number in [0, " << range << ") was requested, "
      << "but this can only generate numbers in [0, " << kMaxRange << ").";

  // Converting via modulus introduces a bit of downward bias, but
  // it's simple, and a linear congruential generator isn't too good
  // to begin with.
  return state_ % range;
}

// GTestIsInitialized() returns true iff the user has initialized
// Google Test.  Useful for catching the user mistake of not initializing
// Google Test before calling RUN_ALL_TESTS().
//
// A user must call testing::InitGoogleTest() to initialize Google
// Test.  g_init_gtest_count is set to the number of times
// InitGoogleTest() has been called.  We don't protect this variable
// under a mutex as it is only accessed in the main thread.
GTEST_API_ int g_init_gtest_count = 0;
static bool GTestIsInitialized() {
  return g_init_gtest_count != 0;
}

// Iterates over a vector of TestCases, keeping a running sum of the
// results of calling a given int-returning method on each.
// Returns the sum.
static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
                               int (TestCase::*method)() const) {
  int sum = 0;

  for (size_t i = 0; i < case_list.size(); i++) {
    sum += (case_list[i]->*method)();
  }

  return sum;
}

// Returns true iff the test case passed.
static bool TestCasePassed(const TestCase* test_case) {
  return test_case->should_run() && test_case->Passed();
}

// Returns true iff the test case failed.
static bool TestCaseFailed(const TestCase* test_case) {
  return test_case->should_run() && test_case->Failed();
}

// Returns true iff test_case contains at least one test that should
// run.
static bool ShouldRunTestCase(const TestCase* test_case) {
  return test_case->should_run();
}

// AssertHelper constructor.
AssertHelper::AssertHelper(TestPartResult::Type type,
                           const char* file,
                           int line,
                           const char* message)
  : data_(new AssertHelperData(type, file, line, message)) {
}

AssertHelper::~AssertHelper() {
  delete data_;
}

// Message assignment, for assertion streaming support.
void AssertHelper::operator=(const Message& message) const {
  UnitTest::GetInstance()->
  AddTestPartResult(data_->type, data_->file, data_->line,
                    AppendUserMessage(data_->message, message),
                    UnitTest::GetInstance()->impl()
                    ->CurrentOsStackTraceExceptTop(1)
                    // Skips the stack frame for this function itself.
                   );  // NOLINT
}

// Mutex for linked pointers.
GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex);

// Application pathname gotten in InitGoogleTest.
std::string g_executable_path;

// Returns the current application's name, removing directory path if that
// is present.
FilePath GetCurrentExecutableName() {
  FilePath result;

#if GTEST_OS_WINDOWS
  result.Set(FilePath(g_executable_path).RemoveExtension("exe"));
#else
  result.Set(FilePath(g_executable_path));
#endif  // GTEST_OS_WINDOWS

  return result.RemoveDirectoryName();
}

// Functions for processing the gtest_output flag.

// Returns the output format, or "" for normal printed output.
std::string UnitTestOptions::GetOutputFormat() {
  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();

  if (gtest_output_flag == NULL) {
    return std::string("");
  }

  const char* const colon = strchr(gtest_output_flag, ':');
  return (colon == NULL) ?
         std::string(gtest_output_flag) :
         std::string(gtest_output_flag, colon - gtest_output_flag);
}

// Returns the name of the requested output file, or the default if none
// was explicitly specified.
std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();

  if (gtest_output_flag == NULL) {
    return "";
  }

  const char* const colon = strchr(gtest_output_flag, ':');

  if (colon == NULL)
    return internal::FilePath::ConcatPaths(
             internal::FilePath(
               UnitTest::GetInstance()->original_working_dir()),
             internal::FilePath(kDefaultOutputFile)).string();

  internal::FilePath output_name(colon + 1);

  if (!output_name.IsAbsolutePath())
    // TODO(wan@google.com): on Windows \some\path is not an absolute
    // path (as its meaning depends on the current drive), yet the
    // following logic for turning it into an absolute path is wrong.
    // Fix it.
    output_name = internal::FilePath::ConcatPaths(
                    internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
                    internal::FilePath(colon + 1));

  if (!output_name.IsDirectory()) {
    return output_name.string();
  }

  internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
                              output_name, internal::GetCurrentExecutableName(),
                              GetOutputFormat().c_str()));
  return result.string();
}

// Returns true iff the wildcard pattern matches the string.  The
// first ':' or '\0' character in pattern marks the end of it.
//
// This recursive algorithm isn't very efficient, but is clear and
// works well enough for matching test names, which are short.
bool UnitTestOptions::PatternMatchesString(const char* pattern,
    const char* str) {
  switch (*pattern) {
    case '\0':
    case ':':  // Either ':' or '\0' marks the end of the pattern.
      return *str == '\0';

    case '?':  // Matches any single character.
      return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);

    case '*':  // Matches any string (possibly empty) of characters.
      return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
             PatternMatchesString(pattern + 1, str);

    default:  // Non-special character.  Matches itself.
      return *pattern == *str &&
             PatternMatchesString(pattern + 1, str + 1);
  }
}

bool UnitTestOptions::MatchesFilter(
  const std::string& name, const char* filter) {
  const char* cur_pattern = filter;

  for (;;) {
    if (PatternMatchesString(cur_pattern, name.c_str())) {
      return true;
    }

    // Finds the next pattern in the filter.
    cur_pattern = strchr(cur_pattern, ':');

    // Returns if no more pattern can be found.
    if (cur_pattern == NULL) {
      return false;
    }

    // Skips the pattern separater (the ':' character).
    cur_pattern++;
  }
}

// Returns true iff the user-specified filter matches the test case
// name and the test name.
bool UnitTestOptions::FilterMatchesTest(const std::string& test_case_name,
                                        const std::string& test_name) {
  const std::string& full_name = test_case_name + "." + test_name.c_str();

  // Split --gtest_filter at '-', if there is one, to separate into
  // positive filter and negative filter portions
  const char* const p = GTEST_FLAG(filter).c_str();
  const char* const dash = strchr(p, '-');
  std::string positive;
  std::string negative;

  if (dash == NULL) {
    positive = GTEST_FLAG(filter).c_str();  // Whole string is a positive filter
    negative = "";
  }
  else {
    positive = std::string(p, dash);   // Everything up to the dash
    negative = std::string(dash + 1);  // Everything after the dash

    if (positive.empty()) {
      // Treat '-test1' as the same as '*-test1'
      positive = kUniversalFilter;
    }
  }

  // A filter is a colon-separated list of patterns.  It matches a
  // test if any pattern in it matches the test.
  return (MatchesFilter(full_name, positive.c_str()) &&
          !MatchesFilter(full_name, negative.c_str()));
}

#if GTEST_HAS_SEH
// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
// This function is useful as an __except condition.
int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
  // Google Test should handle a SEH exception if:
  //   1. the user wants it to, AND
  //   2. this is not a breakpoint exception, AND
  //   3. this is not a C++ exception (VC++ implements them via SEH,
  //      apparently).
  //
  // SEH exception code for C++ exceptions.
  // (see http://support.microsoft.com/kb/185294 for more information).
  const DWORD kCxxExceptionCode = 0xe06d7363;

  bool should_handle = true;

  if (!GTEST_FLAG(catch_exceptions)) {
    should_handle = false;
  }
  else if (exception_code == EXCEPTION_BREAKPOINT) {
    should_handle = false;
  }
  else if (exception_code == kCxxExceptionCode) {
    should_handle = false;
  }

  return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH;
}
#endif  // GTEST_HAS_SEH

}  // namespace internal

// The c'tor sets this object as the test part result reporter used by
// Google Test.  The 'result' parameter specifies where to report the
// results. Intercepts only failures from the current thread.
ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
  TestPartResultArray* result)
  : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD),
    result_(result) {
  Init();
}

// The c'tor sets this object as the test part result reporter used by
// Google Test.  The 'result' parameter specifies where to report the
// results.
ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
  InterceptMode intercept_mode, TestPartResultArray* result)
  : intercept_mode_(intercept_mode),
    result_(result) {
  Init();
}

void ScopedFakeTestPartResultReporter::Init() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();

  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
    old_reporter_ = impl->GetGlobalTestPartResultReporter();
    impl->SetGlobalTestPartResultReporter(this);
  }
  else {
    old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
    impl->SetTestPartResultReporterForCurrentThread(this);
  }
}

// The d'tor restores the test part result reporter used by Google Test
// before.
ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();

  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
    impl->SetGlobalTestPartResultReporter(old_reporter_);
  }
  else {
    impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
  }
}

// Increments the test part result count and remembers the result.
// This method is from the TestPartResultReporterInterface interface.
void ScopedFakeTestPartResultReporter::ReportTestPartResult(
  const TestPartResult& result) {
  result_->Append(result);
}

namespace internal {

// Returns the type ID of ::testing::Test.  We should always call this
// instead of GetTypeId< ::testing::Test>() to get the type ID of
// testing::Test.  This is to work around a suspected linker bug when
// using Google Test as a framework on Mac OS X.  The bug causes
// GetTypeId< ::testing::Test>() to return different values depending
// on whether the call is from the Google Test framework itself or
// from user test code.  GetTestTypeId() is guaranteed to always
// return the same value, as it always calls GetTypeId<>() from the
// gtest.cc, which is within the Google Test framework.
TypeId GetTestTypeId() {
  return GetTypeId<Test>();
}

// The value of GetTestTypeId() as seen from within the Google Test
// library.  This is solely for testing GetTestTypeId().
extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();

// This predicate-formatter checks that 'results' contains a test part
// failure of the given type and that the failure message contains the
// given substring.
AssertionResult HasOneFailure(const char* /* results_expr */,
                              const char* /* type_expr */,
                              const char* /* substr_expr */,
                              const TestPartResultArray& results,
                              TestPartResult::Type type,
                              const string& substr) {
  const std::string expected(type == TestPartResult::kFatalFailure ?
                             "1 fatal failure" :
                             "1 non-fatal failure");
  Message msg;

  if (results.size() != 1) {
    msg << "Expected: " << expected << "\n"
        << "  Actual: " << results.size() << " failures";

    for (int i = 0; i < results.size(); i++) {
      msg << "\n" << results.GetTestPartResult(i);
    }

    return AssertionFailure() << msg;
  }

  const TestPartResult& r = results.GetTestPartResult(0);

  if (r.type() != type) {
    return AssertionFailure() << "Expected: " << expected << "\n"
           << "  Actual:\n"
           << r;
  }

  if (strstr(r.message(), substr.c_str()) == NULL) {
    return AssertionFailure() << "Expected: " << expected << " containing \""
           << substr << "\"\n"
           << "  Actual:\n"
           << r;
  }

  return AssertionSuccess();
}

// The constructor of SingleFailureChecker remembers where to look up
// test part results, what type of failure we expect, and what
// substring the failure message should contain.
SingleFailureChecker:: SingleFailureChecker(
  const TestPartResultArray* results,
  TestPartResult::Type type,
  const string& substr)
  : results_(results),
    type_(type),
    substr_(substr) {}

// The destructor of SingleFailureChecker verifies that the given
// TestPartResultArray contains exactly one failure that has the given
// type and contains the given substring.  If that's not the case, a
// non-fatal failure will be generated.
SingleFailureChecker::~SingleFailureChecker() {
  EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_);
}

DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
  UnitTestImpl* unit_test) : unit_test_(unit_test) {}

void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
  const TestPartResult& result) {
  unit_test_->current_test_result()->AddTestPartResult(result);
  unit_test_->listeners()->repeater()->OnTestPartResult(result);
}

DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
  UnitTestImpl* unit_test) : unit_test_(unit_test) {}

void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
  const TestPartResult& result) {
  unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
}

// Returns the global test part result reporter.
TestPartResultReporterInterface*
UnitTestImpl::GetGlobalTestPartResultReporter() {
  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
  return global_test_part_result_repoter_;
}

// Sets the global test part result reporter.
void UnitTestImpl::SetGlobalTestPartResultReporter(
  TestPartResultReporterInterface* reporter) {
  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
  global_test_part_result_repoter_ = reporter;
}

// Returns the test part result reporter for the current thread.
TestPartResultReporterInterface*
UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
  return per_thread_test_part_result_reporter_.get();
}

// Sets the test part result reporter for the current thread.
void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
  TestPartResultReporterInterface* reporter) {
  per_thread_test_part_result_reporter_.set(reporter);
}

// Gets the number of successful test cases.
int UnitTestImpl::successful_test_case_count() const {
  return CountIf(test_cases_, TestCasePassed);
}

// Gets the number of failed test cases.
int UnitTestImpl::failed_test_case_count() const {
  return CountIf(test_cases_, TestCaseFailed);
}

// Gets the number of all test cases.
int UnitTestImpl::total_test_case_count() const {
  return static_cast<int>(test_cases_.size());
}

// Gets the number of all test cases that contain at least one test
// that should run.
int UnitTestImpl::test_case_to_run_count() const {
  return CountIf(test_cases_, ShouldRunTestCase);
}

// Gets the number of successful tests.
int UnitTestImpl::successful_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count);
}

// Gets the number of failed tests.
int UnitTestImpl::failed_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count);
}

// Gets the number of disabled tests that will be reported in the XML report.
int UnitTestImpl::reportable_disabled_test_count() const {
  return SumOverTestCaseList(test_cases_,
                             &TestCase::reportable_disabled_test_count);
}

// Gets the number of disabled tests.
int UnitTestImpl::disabled_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count);
}

// Gets the number of tests to be printed in the XML report.
int UnitTestImpl::reportable_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count);
}

// Gets the number of all tests.
int UnitTestImpl::total_test_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::total_test_count);
}

// Gets the number of tests that should run.
int UnitTestImpl::test_to_run_count() const {
  return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count);
}

// Returns the current OS stack trace as an std::string.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
  (void)skip_count;
  return "";
}

// Returns the current time in milliseconds.
TimeInMillis GetTimeInMillis() {
#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
  // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
  // http://analogous.blogspot.com/2005/04/epoch.html
  const TimeInMillis kJavaEpochToWinFileTimeDelta =
    static_cast<TimeInMillis>(116444736UL) * 100000UL;
  const DWORD kTenthMicrosInMilliSecond = 10000;

  SYSTEMTIME now_systime;
  FILETIME now_filetime;
  ULARGE_INTEGER now_int64;
  // TODO(kenton@google.com): Shouldn't this just use
  //   GetSystemTimeAsFileTime()?
  GetSystemTime(&now_systime);

  if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
    now_int64.LowPart = now_filetime.dwLowDateTime;
    now_int64.HighPart = now_filetime.dwHighDateTime;
    now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
                         kJavaEpochToWinFileTimeDelta;
    return now_int64.QuadPart;
  }

  return 0;
#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
  __timeb64 now;

# ifdef _MSC_VER

  // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
  // (deprecated function) there.
  // TODO(kenton@google.com): Use GetTickCount()?  Or use
  //   SystemTimeToFileTime()
#  pragma warning(push)          // Saves the current warning state.
#  pragma warning(disable:4996)  // Temporarily disables warning 4996.
  _ftime64(&now);
#  pragma warning(pop)           // Restores the warning state.
# else

  _ftime64(&now);

# endif  // _MSC_VER

  return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
#elif GTEST_HAS_GETTIMEOFDAY_
  struct timeval now;
  gettimeofday(&now, NULL);
  return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
#else
# error "Don't know how to get the current time on your system."
#endif
}

// Utilities

// class String.

#if GTEST_OS_WINDOWS_MOBILE
// Creates a UTF-16 wide string from the given ANSI string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the wide string, or NULL if the
// input is NULL.
LPCWSTR String::AnsiToUtf16(const char* ansi) {
  if (!ansi) {
    return NULL;
  }

  const int length = strlen(ansi);
  const int unicode_length =
    MultiByteToWideChar(CP_ACP, 0, ansi, length,
                        NULL, 0);
  WCHAR* unicode = new WCHAR[unicode_length + 1];
  MultiByteToWideChar(CP_ACP, 0, ansi, length,
                      unicode, unicode_length);
  unicode[unicode_length] = 0;
  return unicode;
}

// Creates an ANSI string from the given wide string, allocating
// memory using new. The caller is responsible for deleting the return
// value using delete[]. Returns the ANSI string, or NULL if the
// input is NULL.
const char* String::Utf16ToAnsi(LPCWSTR utf16_str)  {
  if (!utf16_str) {
    return NULL;
  }

  const int ansi_length =
    WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
                        NULL, 0, NULL, NULL);
  char* ansi = new char[ansi_length + 1];
  WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
                      ansi, ansi_length, NULL, NULL);
  ansi[ansi_length] = 0;
  return ansi;
}

#endif  // GTEST_OS_WINDOWS_MOBILE

// Compares two C strings.  Returns true iff they have the same content.
//
// Unlike strcmp(), this function can handle NULL argument(s).  A NULL
// C string is considered different to any non-NULL C string,
// including the empty string.
bool String::CStringEquals(const char* lhs, const char* rhs) {
  if ( lhs == NULL ) {
    return rhs == NULL;
  }

  if ( rhs == NULL ) {
    return false;
  }

  return strcmp(lhs, rhs) == 0;
}

#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING

// Converts an array of wide chars to a narrow string using the UTF-8
// encoding, and streams the result to the given Message object.
static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
                                     Message* msg) {
  for (size_t i = 0; i != length; ) {  // NOLINT
    if (wstr[i] != L'\0') {
      *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));

      while (i != length && wstr[i] != L'\0') {
        i++;
      }
    }
    else {
      *msg << '\0';
      i++;
    }
  }
}

#endif  // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING

}  // namespace internal

// Constructs an empty Message.
// We allocate the stringstream separately because otherwise each use of
// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
// stack frame leading to huge stack frames in some cases; gcc does not reuse
// the stack space.
Message::Message() : ss_(new ::std::stringstream) {
  // By default, we want there to be enough precision when printing
  // a double to a Message.
  *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
}

// These two overloads allow streaming a wide C string to a Message
// using the UTF-8 encoding.
Message& Message::operator <<(const wchar_t* wide_c_str) {
  return *this << internal::String::ShowWideCString(wide_c_str);
}
Message& Message::operator <<(wchar_t* wide_c_str) {
  return *this << internal::String::ShowWideCString(wide_c_str);
}

#if GTEST_HAS_STD_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& Message::operator <<(const ::std::wstring& wstr) {
  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
  return *this;
}
#endif  // GTEST_HAS_STD_WSTRING

#if GTEST_HAS_GLOBAL_WSTRING
// Converts the given wide string to a narrow string using the UTF-8
// encoding, and streams the result to this Message object.
Message& Message::operator <<(const ::wstring& wstr) {
  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
  return *this;
}
#endif  // GTEST_HAS_GLOBAL_WSTRING

// Gets the text streamed to this object so far as an std::string.
// Each '\0' character in the buffer is replaced with "\\0".
std::string Message::GetString() const {
  return internal::StringStreamToString(ss_.get());
}

// AssertionResult constructors.
// Used in EXPECT_TRUE/FALSE(assertion_result).
AssertionResult::AssertionResult(const AssertionResult& other)
  : success_(other.success_),
    message_(other.message_.get() != NULL ?
             new ::std::string(*other.message_) :
             static_cast< ::std::string*>(NULL)) {
}

// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
AssertionResult AssertionResult::operator!() const {
  AssertionResult negation(!success_);

  if (message_.get() != NULL) {
    negation << *message_;
  }

  return negation;
}

// Makes a successful assertion result.
AssertionResult AssertionSuccess() {
  return AssertionResult(true);
}

// Makes a failed assertion result.
AssertionResult AssertionFailure() {
  return AssertionResult(false);
}

// Makes a failed assertion result with the given failure message.
// Deprecated; use AssertionFailure() << message.
AssertionResult AssertionFailure(const Message& message) {
  return AssertionFailure() << message;
}

namespace internal {

// Constructs and returns the message for an equality assertion
// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
//
// The first four parameters are the expressions used in the assertion
// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
// where foo is 5 and bar is 6, we have:
//
//   expected_expression: "foo"
//   actual_expression:   "bar"
//   expected_value:      "5"
//   actual_value:        "6"
//
// The ignoring_case parameter is true iff the assertion is a
// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
// be inserted into the message.
AssertionResult EqFailure(const char* expected_expression,
                          const char* actual_expression,
                          const std::string& expected_value,
                          const std::string& actual_value,
                          bool ignoring_case) {
  Message msg;
  msg << "Value of: " << actual_expression;

  if (actual_value != actual_expression) {
    msg << "\n  Actual: " << actual_value;
  }

  msg << "\nExpected: " << expected_expression;

  if (ignoring_case) {
    msg << " (ignoring case)";
  }

  if (expected_value != expected_expression) {
    msg << "\nWhich is: " << expected_value;
  }

  return AssertionFailure() << msg;
}

// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
std::string GetBoolAssertionFailureMessage(
  const AssertionResult& assertion_result,
  const char* expression_text,
  const char* actual_predicate_value,
  const char* expected_predicate_value) {
  const char* actual_message = assertion_result.message();
  Message msg;
  msg << "Value of: " << expression_text
      << "\n  Actual: " << actual_predicate_value;

  if (actual_message[0] != '\0') {
    msg << " (" << actual_message << ")";
  }

  msg << "\nExpected: " << expected_predicate_value;
  return msg.GetString();
}

// Helper function for implementing ASSERT_NEAR.
AssertionResult DoubleNearPredFormat(const char* expr1,
                                     const char* expr2,
                                     const char* abs_error_expr,
                                     double val1,
                                     double val2,
                                     double abs_error) {
  const double diff = fabs(val1 - val2);

  if (diff <= abs_error) {
    return AssertionSuccess();
  }

  // TODO(wan): do not print the value of an expression if it's
  // already a literal.
  return AssertionFailure()
         << "The difference between " << expr1 << " and " << expr2
         << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
         << expr1 << " evaluates to " << val1 << ",\n"
         << expr2 << " evaluates to " << val2 << ", and\n"
         << abs_error_expr << " evaluates to " << abs_error << ".";
}


// Helper template for implementing FloatLE() and DoubleLE().
template <typename RawType>
AssertionResult FloatingPointLE(const char* expr1,
                                const char* expr2,
                                RawType val1,
                                RawType val2) {
  // Returns success if val1 is less than val2,
  if (val1 < val2) {
    return AssertionSuccess();
  }

  // or if val1 is almost equal to val2.
  const FloatingPoint<RawType> lhs(val1), rhs(val2);

  if (lhs.AlmostEquals(rhs)) {
    return AssertionSuccess();
  }

  // Note that the above two checks will both fail if either val1 or
  // val2 is NaN, as the IEEE floating-point standard requires that
  // any predicate involving a NaN must return false.

  ::std::stringstream val1_ss;
  val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
          << val1;

  ::std::stringstream val2_ss;
  val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
          << val2;

  return AssertionFailure()
         << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
         << "  Actual: " << StringStreamToString(&val1_ss) << " vs "
         << StringStreamToString(&val2_ss);
}

}  // namespace internal

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
AssertionResult FloatLE(const char* expr1, const char* expr2,
                        float val1, float val2) {
  return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
}

// Asserts that val1 is less than, or almost equal to, val2.  Fails
// otherwise.  In particular, it fails if either val1 or val2 is NaN.
AssertionResult DoubleLE(const char* expr1, const char* expr2,
                         double val1, double val2) {
  return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
}

namespace internal {

// The helper function for {ASSERT|EXPECT}_EQ with int or enum
// arguments.
AssertionResult CmpHelperEQ(const char* expected_expression,
                            const char* actual_expression,
                            BiggestInt expected,
                            BiggestInt actual) {
  if (expected == actual) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   FormatForComparisonFailureMessage(expected, actual),
                   FormatForComparisonFailureMessage(actual, expected),
                   false);
}

// A macro for implementing the helper functions needed to implement
// ASSERT_?? and EXPECT_?? with integer or enum arguments.  It is here
// just to avoid copy-and-paste of similar code.
#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
                                   BiggestInt val1, BiggestInt val2) {\
  if (val1 op val2) {\
    return AssertionSuccess();\
  } else {\
    return AssertionFailure() \
        << "Expected: (" << expr1 << ") " #op " (" << expr2\
        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
  }\
}

// Implements the helper function for {ASSERT|EXPECT}_NE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(NE, != )
// Implements the helper function for {ASSERT|EXPECT}_LE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(LE, <= )
// Implements the helper function for {ASSERT|EXPECT}_LT with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(LT, < )
// Implements the helper function for {ASSERT|EXPECT}_GE with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(GE, >= )
// Implements the helper function for {ASSERT|EXPECT}_GT with int or
// enum arguments.
GTEST_IMPL_CMP_HELPER_(GT, > )

#undef GTEST_IMPL_CMP_HELPER_

// The helper function for {ASSERT|EXPECT}_STREQ.
AssertionResult CmpHelperSTREQ(const char* expected_expression,
                               const char* actual_expression,
                               const char* expected,
                               const char* actual) {
  if (String::CStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   PrintToString(expected),
                   PrintToString(actual),
                   false);
}

// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
                                   const char* actual_expression,
                                   const char* expected,
                                   const char* actual) {
  if (String::CaseInsensitiveCStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   PrintToString(expected),
                   PrintToString(actual),
                   true);
}

// The helper function for {ASSERT|EXPECT}_STRNE.
AssertionResult CmpHelperSTRNE(const char* s1_expression,
                               const char* s2_expression,
                               const char* s1,
                               const char* s2) {
  if (!String::CStringEquals(s1, s2)) {
    return AssertionSuccess();
  }
  else {
    return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
           << s2_expression << "), actual: \""
           << s1 << "\" vs \"" << s2 << "\"";
  }
}

// The helper function for {ASSERT|EXPECT}_STRCASENE.
AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
                                   const char* s2_expression,
                                   const char* s1,
                                   const char* s2) {
  if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
    return AssertionSuccess();
  }
  else {
    return AssertionFailure()
           << "Expected: (" << s1_expression << ") != ("
           << s2_expression << ") (ignoring case), actual: \""
           << s1 << "\" vs \"" << s2 << "\"";
  }
}

}  // namespace internal

namespace {

// Helper functions for implementing IsSubString() and IsNotSubstring().

// This group of overloaded functions return true iff needle is a
// substring of haystack.  NULL is considered a substring of itself
// only.

bool IsSubstringPred(const char* needle, const char* haystack) {
  if (needle == NULL || haystack == NULL) {
    return needle == haystack;
  }

  return strstr(haystack, needle) != NULL;
}

bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
  if (needle == NULL || haystack == NULL) {
    return needle == haystack;
  }

  return wcsstr(haystack, needle) != NULL;
}

// StringType here can be either ::std::string or ::std::wstring.
template <typename StringType>
bool IsSubstringPred(const StringType& needle,
                     const StringType& haystack) {
  return haystack.find(needle) != StringType::npos;
}

// This function implements either IsSubstring() or IsNotSubstring(),
// depending on the value of the expected_to_be_substring parameter.
// StringType here can be const char*, const wchar_t*, ::std::string,
// or ::std::wstring.
template <typename StringType>
AssertionResult IsSubstringImpl(
  bool expected_to_be_substring,
  const char* needle_expr, const char* haystack_expr,
  const StringType& needle, const StringType& haystack) {
  if (IsSubstringPred(needle, haystack) == expected_to_be_substring) {
    return AssertionSuccess();
  }

  const bool is_wide_string = sizeof(needle[0]) > 1;
  const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
  return AssertionFailure()
         << "Value of: " << needle_expr << "\n"
         << "  Actual: " << begin_string_quote << needle << "\"\n"
         << "Expected: " << (expected_to_be_substring ? "" : "not ")
         << "a substring of " << haystack_expr << "\n"
         << "Which is: " << begin_string_quote << haystack << "\"";
}

}  // namespace

// IsSubstring() and IsNotSubstring() check whether needle is a
// substring of haystack (NULL is considered a substring of itself
// only), and return an appropriate error message when they fail.

AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const char* needle, const char* haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const wchar_t* needle, const wchar_t* haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const char* needle, const char* haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const wchar_t* needle, const wchar_t* haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::string& needle, const ::std::string& haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::string& needle, const ::std::string& haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}

#if GTEST_HAS_STD_WSTRING
AssertionResult IsSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::wstring& needle, const ::std::wstring& haystack) {
  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
}

AssertionResult IsNotSubstring(
  const char* needle_expr, const char* haystack_expr,
  const ::std::wstring& needle, const ::std::wstring& haystack) {
  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
}
#endif  // GTEST_HAS_STD_WSTRING

namespace internal {

#if GTEST_OS_WINDOWS

namespace {

// Helper function for IsHRESULT{SuccessFailure} predicates
AssertionResult HRESULTFailureHelper(const char* expr,
                                     const char* expected,
                                     long hr) {  // NOLINT
# if GTEST_OS_WINDOWS_MOBILE

  // Windows CE doesn't support FormatMessage.
  const char error_text[] = "";

# else

  // Looks up the human-readable system message for the HRESULT code
  // and since we're not passing any params to FormatMessage, we don't
  // want inserts expanded.
  const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM |
                       FORMAT_MESSAGE_IGNORE_INSERTS;
  const DWORD kBufSize = 4096;
  // Gets the system's human readable message string for this HRESULT.
  char error_text[kBufSize] = { '\0' };
  DWORD message_length = ::FormatMessageA(kFlags,
                                          0,  // no source, we're asking system
                                          hr,  // the error
                                          0,  // no line width restrictions
                                          error_text,  // output buffer
                                          kBufSize,  // buf size
                                          NULL);  // no arguments for inserts

  // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
  for (; message_length && IsSpace(error_text[message_length - 1]);
       --message_length) {
    error_text[message_length - 1] = '\0';
  }

# endif  // GTEST_OS_WINDOWS_MOBILE

  const std::string error_hex("0x" + String::FormatHexInt(hr));
  return ::testing::AssertionFailure()
         << "Expected: " << expr << " " << expected << ".\n"
         << "  Actual: " << error_hex << " " << error_text << "\n";
}

}  // namespace

AssertionResult IsHRESULTSuccess(const char* expr, long hr) {  // NOLINT
  if (SUCCEEDED(hr)) {
    return AssertionSuccess();
  }

  return HRESULTFailureHelper(expr, "succeeds", hr);
}

AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
  if (FAILED(hr)) {
    return AssertionSuccess();
  }

  return HRESULTFailureHelper(expr, "fails", hr);
}

#endif  // GTEST_OS_WINDOWS

// Utility functions for encoding Unicode text (wide strings) in
// UTF-8.

// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
// like this:
//
// Code-point length   Encoding
//   0 -  7 bits       0xxxxxxx
//   8 - 11 bits       110xxxxx 10xxxxxx
//  12 - 16 bits       1110xxxx 10xxxxxx 10xxxxxx
//  17 - 21 bits       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

// The maximum code-point a one-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) <<  7) - 1;

// The maximum code-point a two-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;

// The maximum code-point a three-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2 * 6)) - 1;

// The maximum code-point a four-byte UTF-8 sequence can represent.
const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3 * 6)) - 1;

// Chops off the n lowest bits from a bit pattern.  Returns the n
// lowest bits.  As a side effect, the original bit pattern will be
// shifted to the right by n bits.
inline UInt32 ChopLowBits(UInt32* bits, int n) {
  const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
  *bits >>= n;
  return low_bits;
}

// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
// to "(Invalid Unicode 0xXXXXXXXX)".
std::string CodePointToUtf8(UInt32 code_point) {
  if (code_point > kMaxCodePoint4) {
    return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")";
  }

  char str[5];  // Big enough for the largest valid code point.

  if (code_point <= kMaxCodePoint1) {
    str[1] = '\0';
    str[0] = static_cast<char>(code_point);                          // 0xxxxxxx
  }
  else if (code_point <= kMaxCodePoint2) {
    str[2] = '\0';
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xC0 | code_point);                   // 110xxxxx
  }
  else if (code_point <= kMaxCodePoint3) {
    str[3] = '\0';
    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xE0 | code_point);                   // 1110xxxx
  }
  else {    // code_point <= kMaxCodePoint4
    str[4] = '\0';
    str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
    str[0] = static_cast<char>(0xF0 | code_point);                   // 11110xxx
  }

  return str;
}

// The following two functions only make sense if the the system
// uses UTF-16 for wide string encoding. All supported systems
// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.

// Determines if the arguments constitute UTF-16 surrogate pair
// and thus should be combined into a single Unicode code point
// using CreateCodePointFromUtf16SurrogatePair.
inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
  return sizeof(wchar_t) == 2 &&
         (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00;
}

// Creates a Unicode code point from UTF16 surrogate pair.
inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
    wchar_t second) {
  const UInt32 mask = (1 << 10) - 1;
  return (sizeof(wchar_t) == 2) ?
         (((first & mask) << 10) | (second & mask)) + 0x10000 :
         // This function should not be called when the condition is
         // false, but we provide a sensible default in case it is.
         static_cast<UInt32>(first);
}

// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
  if (num_chars == -1) {
    num_chars = static_cast<int>(wcslen(str));
  }

  ::std::stringstream stream;

  for (int i = 0; i < num_chars; ++i) {
    UInt32 unicode_code_point;

    if (str[i] == L'\0') {
      break;
    }
    else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
      unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i],
                           str[i + 1]);
      i++;
    }
    else {
      unicode_code_point = static_cast<UInt32>(str[i]);
    }

    stream << CodePointToUtf8(unicode_code_point);
  }

  return StringStreamToString(&stream);
}

// Converts a wide C string to an std::string using the UTF-8 encoding.
// NULL will be converted to "(null)".
std::string String::ShowWideCString(const wchar_t* wide_c_str) {
  if (wide_c_str == NULL) {
    return "(null)";
  }

  return internal::WideStringToUtf8(wide_c_str, -1);
}

// Compares two wide C strings.  Returns true iff they have the same
// content.
//
// Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
// C string is considered different to any non-NULL C string,
// including the empty string.
bool String::WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs) {
  if (lhs == NULL) {
    return rhs == NULL;
  }

  if (rhs == NULL) {
    return false;
  }

  return wcscmp(lhs, rhs) == 0;
}

// Helper function for *_STREQ on wide strings.
AssertionResult CmpHelperSTREQ(const char* expected_expression,
                               const char* actual_expression,
                               const wchar_t* expected,
                               const wchar_t* actual) {
  if (String::WideCStringEquals(expected, actual)) {
    return AssertionSuccess();
  }

  return EqFailure(expected_expression,
                   actual_expression,
                   PrintToString(expected),
                   PrintToString(actual),
                   false);
}

// Helper function for *_STRNE on wide strings.
AssertionResult CmpHelperSTRNE(const char* s1_expression,
                               const char* s2_expression,
                               const wchar_t* s1,
                               const wchar_t* s2) {
  if (!String::WideCStringEquals(s1, s2)) {
    return AssertionSuccess();
  }

  return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
         << s2_expression << "), actual: "
         << PrintToString(s1)
         << " vs " << PrintToString(s2);
}

// Compares two C strings, ignoring case.  Returns true iff they have
// the same content.
//
// Unlike strcasecmp(), this function can handle NULL argument(s).  A
// NULL C string is considered different to any non-NULL C string,
// including the empty string.
bool String::CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) {
  if (lhs == NULL) {
    return rhs == NULL;
  }

  if (rhs == NULL) {
    return false;
  }

  return posix::StrCaseCmp(lhs, rhs) == 0;
}

// Compares two wide C strings, ignoring case.  Returns true iff they
// have the same content.
//
// Unlike wcscasecmp(), this function can handle NULL argument(s).
// A NULL C string is considered different to any non-NULL wide C string,
// including the empty string.
// NB: The implementations on different platforms slightly differ.
// On windows, this method uses _wcsicmp which compares according to LC_CTYPE
// environment variable. On GNU platform this method uses wcscasecmp
// which compares according to LC_CTYPE category of the current locale.
// On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
// current locale.
bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
    const wchar_t* rhs) {
  if (lhs == NULL) {
    return rhs == NULL;
  }

  if (rhs == NULL) {
    return false;
  }

#if GTEST_OS_WINDOWS
  return _wcsicmp(lhs, rhs) == 0;
#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID
  return wcscasecmp(lhs, rhs) == 0;
#else
  // Android, Mac OS X and Cygwin don't define wcscasecmp.
  // Other unknown OSes may not define it either.
  wint_t left, right;

  do {
    left = towlower(*lhs++);
    right = towlower(*rhs++);
  }
  while (left && left == right);

  return left == right;
#endif  // OS selector
}

// Returns true iff str ends with the given suffix, ignoring case.
// Any string is considered to end with an empty suffix.
bool String::EndsWithCaseInsensitive(
  const std::string& str, const std::string& suffix) {
  const size_t str_len = str.length();
  const size_t suffix_len = suffix.length();
  return (str_len >= suffix_len) &&
         CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len,
                                      suffix.c_str());
}

// Formats an int value as "%02d".
std::string String::FormatIntWidth2(int value) {
  std::stringstream ss;
  ss << std::setfill('0') << std::setw(2) << value;
  return ss.str();
}

// Formats an int value as "%X".
std::string String::FormatHexInt(int value) {
  std::stringstream ss;
  ss << std::hex << std::uppercase << value;
  return ss.str();
}

// Formats a byte as "%02X".
std::string String::FormatByte(unsigned char value) {
  std::stringstream ss;
  ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase
     << static_cast<unsigned int>(value);
  return ss.str();
}

// Converts the buffer in a stringstream to an std::string, converting NUL
// bytes to "\\0" along the way.
std::string StringStreamToString(::std::stringstream* ss) {
  const ::std::string& str = ss->str();
  const char* const start = str.c_str();
  const char* const end = start + str.length();

  std::string result;
  result.reserve(2 * (end - start));

  for (const char* ch = start; ch != end; ++ch) {
    if (*ch == '\0') {
      result += "\\0";  // Replaces NUL with "\\0";
    }
    else {
      result += *ch;
    }
  }

  return result;
}

// Appends the user-supplied message to the Google-Test-generated message.
std::string AppendUserMessage(const std::string& gtest_msg,
                              const Message& user_msg) {
  // Appends the user message if it's non-empty.
  const std::string user_msg_string = user_msg.GetString();

  if (user_msg_string.empty()) {
    return gtest_msg;
  }

  return gtest_msg + "\n" + user_msg_string;
}

}  // namespace internal

// class TestResult

// Creates an empty TestResult.
TestResult::TestResult()
  : death_test_count_(0),
    elapsed_time_(0) {
}

// D'tor.
TestResult::~TestResult() {
}

// Returns the i-th test part result among all the results. i can
// range from 0 to total_part_count() - 1. If i is not in that range,
// aborts the program.
const TestPartResult& TestResult::GetTestPartResult(int i) const {
  if (i < 0 || i >= total_part_count()) {
    internal::posix::Abort();
  }

  return test_part_results_.at(i);
}

// Returns the i-th test property. i can range from 0 to
// test_property_count() - 1. If i is not in that range, aborts the
// program.
const TestProperty& TestResult::GetTestProperty(int i) const {
  if (i < 0 || i >= test_property_count()) {
    internal::posix::Abort();
  }

  return test_properties_.at(i);
}

// Clears the test part results.
void TestResult::ClearTestPartResults() {
  test_part_results_.clear();
}

// Adds a test part result to the list.
void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
  test_part_results_.push_back(test_part_result);
}

// Adds a test property to the list. If a property with the same key as the
// supplied property is already represented, the value of this test_property
// replaces the old value for that key.
void TestResult::RecordProperty(const std::string& xml_element,
                                const TestProperty& test_property) {
  if (!ValidateTestProperty(xml_element, test_property)) {
    return;
  }

  internal::MutexLock lock(&test_properites_mutex_);
  const std::vector<TestProperty>::iterator property_with_matching_key =
    std::find_if(test_properties_.begin(), test_properties_.end(),
                 internal::TestPropertyKeyIs(test_property.key()));

  if (property_with_matching_key == test_properties_.end()) {
    test_properties_.push_back(test_property);
    return;
  }

  property_with_matching_key->SetValue(test_property.value());
}

// The list of reserved attributes used in the <testsuites> element of XML
// output.
static const char* const kReservedTestSuitesAttributes[] = {
  "disabled",
  "errors",
  "failures",
  "name",
  "random_seed",
  "tests",
  "time",
  "timestamp"
};

// The list of reserved attributes used in the <testsuite> element of XML
// output.
static const char* const kReservedTestSuiteAttributes[] = {
  "disabled",
  "errors",
  "failures",
  "name",
  "tests",
  "time"
};

// The list of reserved attributes used in the <testcase> element of XML output.
static const char* const kReservedTestCaseAttributes[] = {
  "classname",
  "name",
  "status",
  "time",
  "type_param",
  "value_param"
};

template <int kSize>
std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
  return std::vector<std::string>(array, array + kSize);
}

static std::vector<std::string> GetReservedAttributesForElement(
  const std::string& xml_element) {
  if (xml_element == "testsuites") {
    return ArrayAsVector(kReservedTestSuitesAttributes);
  }
  else if (xml_element == "testsuite") {
    return ArrayAsVector(kReservedTestSuiteAttributes);
  }
  else if (xml_element == "testcase") {
    return ArrayAsVector(kReservedTestCaseAttributes);
  }
  else {
    GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
  }

  // This code is unreachable but some compilers may not realizes that.
  return std::vector<std::string>();
}

static std::string FormatWordList(const std::vector<std::string>& words) {
  Message word_list;

  for (size_t i = 0; i < words.size(); ++i) {
    if (i > 0 && words.size() > 2) {
      word_list << ", ";
    }

    if (i == words.size() - 1) {
      word_list << "and ";
    }

    word_list << "'" << words[i] << "'";
  }

  return word_list.GetString();
}

bool ValidateTestPropertyName(const std::string& property_name,
                              const std::vector<std::string>& reserved_names) {
  if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
      reserved_names.end()) {
    ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
                  << " (" << FormatWordList(reserved_names)
                  << " are reserved by " << GTEST_NAME_ << ")";
    return false;
  }

  return true;
}

// Adds a failure if the key is a reserved attribute of the element named
// xml_element.  Returns true if the property is valid.
bool TestResult::ValidateTestProperty(const std::string& xml_element,
                                      const TestProperty& test_property) {
  return ValidateTestPropertyName(test_property.key(),
                                  GetReservedAttributesForElement(xml_element));
}

// Clears the object.
void TestResult::Clear() {
  test_part_results_.clear();
  test_properties_.clear();
  death_test_count_ = 0;
  elapsed_time_ = 0;
}

// Returns true iff the test failed.
bool TestResult::Failed() const {
  for (int i = 0; i < total_part_count(); ++i) {
    if (GetTestPartResult(i).failed()) {
      return true;
    }
  }

  return false;
}

// Returns true iff the test part fatally failed.
static bool TestPartFatallyFailed(const TestPartResult& result) {
  return result.fatally_failed();
}

// Returns true iff the test fatally failed.
bool TestResult::HasFatalFailure() const {
  return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
}

// Returns true iff the test part non-fatally failed.
static bool TestPartNonfatallyFailed(const TestPartResult& result) {
  return result.nonfatally_failed();
}

// Returns true iff the test has a non-fatal failure.
bool TestResult::HasNonfatalFailure() const {
  return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
}

// Gets the number of all test parts.  This is the sum of the number
// of successful test parts and the number of failed test parts.
int TestResult::total_part_count() const {
  return static_cast<int>(test_part_results_.size());
}

// Returns the number of the test properties.
int TestResult::test_property_count() const {
  return static_cast<int>(test_properties_.size());
}

// class Test

// Creates a Test object.

// The c'tor saves the values of all Google Test flags.
Test::Test()
  : gtest_flag_saver_(new internal::GTestFlagSaver) {
}

// The d'tor restores the values of all Google Test flags.
Test::~Test() {
  delete gtest_flag_saver_;
}

// Sets up the test fixture.
//
// A sub-class may override this.
void Test::SetUp() {
}

// Tears down the test fixture.
//
// A sub-class may override this.
void Test::TearDown() {
}

// Allows user supplied key value pairs to be recorded for later output.
void Test::RecordProperty(const std::string& key, const std::string& value) {
  UnitTest::GetInstance()->RecordProperty(key, value);
}

// Allows user supplied key value pairs to be recorded for later output.
void Test::RecordProperty(const std::string& key, int value) {
  Message value_message;
  value_message << value;
  RecordProperty(key, value_message.GetString().c_str());
}

namespace internal {

void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                    const std::string& message) {
  // This function is a friend of UnitTest and as such has access to
  // AddTestPartResult.
  UnitTest::GetInstance()->AddTestPartResult(
    result_type,
    NULL,  // No info about the source file where the exception occurred.
    -1,    // We have no info on which line caused the exception.
    message,
    "");   // No stack trace, either.
}

}  // namespace internal

// Google Test requires all tests in the same test case to use the same test
// fixture class.  This function checks if the current test has the
// same fixture class as the first test in the current test case.  If
// yes, it returns true; otherwise it generates a Google Test failure and
// returns false.
bool Test::HasSameFixtureClass() {
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  const TestCase* const test_case = impl->current_test_case();

  // Info about the first test in the current test case.
  const TestInfo* const first_test_info = test_case->test_info_list()[0];
  const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
  const char* const first_test_name = first_test_info->name();

  // Info about the current test.
  const TestInfo* const this_test_info = impl->current_test_info();
  const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
  const char* const this_test_name = this_test_info->name();

  if (this_fixture_id != first_fixture_id) {
    // Is the first test defined using TEST?
    const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
    // Is this test defined using TEST?
    const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();

    if (first_is_TEST || this_is_TEST) {
      // The user mixed TEST and TEST_F in this test case - we'll tell
      // him/her how to fix it.

      // Gets the name of the TEST and the name of the TEST_F.  Note
      // that first_is_TEST and this_is_TEST cannot both be true, as
      // the fixture IDs are different for the two tests.
      const char* const TEST_name =
        first_is_TEST ? first_test_name : this_test_name;
      const char* const TEST_F_name =
        first_is_TEST ? this_test_name : first_test_name;

      ADD_FAILURE()
          << "All tests in the same test case must use the same test fixture\n"
          << "class, so mixing TEST_F and TEST in the same test case is\n"
          << "illegal.  In test case " << this_test_info->test_case_name()
          << ",\n"
          << "test " << TEST_F_name << " is defined using TEST_F but\n"
          << "test " << TEST_name << " is defined using TEST.  You probably\n"
          << "want to change the TEST to TEST_F or move it to another test\n"
          << "case.";
    }
    else {
      // The user defined two fixture classes with the same name in
      // two namespaces - we'll tell him/her how to fix it.
      ADD_FAILURE()
          << "All tests in the same test case must use the same test fixture\n"
          << "class.  However, in test case "
          << this_test_info->test_case_name() << ",\n"
          << "you defined test " << first_test_name
          << " and test " << this_test_name << "\n"
          << "using two different test fixture classes.  This can happen if\n"
          << "the two classes are from different namespaces or translation\n"
          << "units and have the same name.  You should probably rename one\n"
          << "of the classes to put the tests into different test cases.";
    }

    return false;
  }

  return true;
}

#if GTEST_HAS_SEH

// Adds an "exception thrown" fatal failure to the current test.  This
// function returns its result via an output parameter pointer because VC++
// prohibits creation of objects with destructors on stack in functions
// using __try (see error C2712).
static std::string* FormatSehExceptionMessage(DWORD exception_code,
    const char* location) {
  Message message;
  message << "SEH exception with code 0x" << std::setbase(16) <<
          exception_code << std::setbase(10) << " thrown in " << location << ".";

  return new std::string(message.GetString());
}

#endif  // GTEST_HAS_SEH

namespace internal {

#if GTEST_HAS_EXCEPTIONS

// Adds an "exception thrown" fatal failure to the current test.
static std::string FormatCxxExceptionMessage(const char* description,
    const char* location) {
  Message message;

  if (description != NULL) {
    message << "C++ exception with description \"" << description << "\"";
  }
  else {
    message << "Unknown C++ exception";
  }

  message << " thrown in " << location << ".";

  return message.GetString();
}

static std::string PrintTestPartResultToString(
  const TestPartResult& test_part_result);

GoogleTestFailureException::GoogleTestFailureException(
  const TestPartResult& failure)
  : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}

#endif  // GTEST_HAS_EXCEPTIONS

// We put these helper functions in the internal namespace as IBM's xlC
// compiler rejects the code if they were declared static.

// Runs the given method and handles SEH exceptions it throws, when
// SEH is supported; returns the 0-value for type Result in case of an
// SEH exception.  (Microsoft compilers cannot handle SEH and C++
// exceptions in the same function.  Therefore, we provide a separate
// wrapper function for handling SEH exceptions.)
template <class T, typename Result>
Result HandleSehExceptionsInMethodIfSupported(
  T* object, Result (T::*method)(), const char* location) {
#if GTEST_HAS_SEH

  __try {
    return (object->*method)();
  }
  __except (internal::UnitTestOptions::GTestShouldProcessSEH(    // NOLINT
              GetExceptionCode())) {
    // We create the exception message on the heap because VC++ prohibits
    // creation of objects with destructors on stack in functions using __try
    // (see error C2712).
    std::string* exception_message = FormatSehExceptionMessage(
                                       GetExceptionCode(), location);
    internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
        *exception_message);
    delete exception_message;
    return static_cast<Result>(0);
  }

#else
  (void)location;
  return (object->*method)();
#endif  // GTEST_HAS_SEH
}

// Runs the given method and catches and reports C++ and/or SEH-style
// exceptions, if they are supported; returns the 0-value for type
// Result in case of an SEH exception.
template <class T, typename Result>
Result HandleExceptionsInMethodIfSupported(
  T* object, Result (T::*method)(), const char* location) {
  // NOTE: The user code can affect the way in which Google Test handles
  // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
  // RUN_ALL_TESTS() starts. It is technically possible to check the flag
  // after the exception is caught and either report or re-throw the
  // exception based on the flag's value:
  //
  // try {
  //   // Perform the test method.
  // } catch (...) {
  //   if (GTEST_FLAG(catch_exceptions))
  //     // Report the exception as failure.
  //   else
  //     throw;  // Re-throws the original exception.
  // }
  //
  // However, the purpose of this flag is to allow the program to drop into
  // the debugger when the exception is thrown. On most platforms, once the
  // control enters the catch block, the exception origin information is
  // lost and the debugger will stop the program at the point of the
  // re-throw in this function -- instead of at the point of the original
  // throw statement in the code under test.  For this reason, we perform
  // the check early, sacrificing the ability to affect Google Test's
  // exception handling in the method where the exception is thrown.
  if (internal::GetUnitTestImpl()->catch_exceptions()) {
#if GTEST_HAS_EXCEPTIONS

    try {
      return HandleSehExceptionsInMethodIfSupported(object, method, location);
    }
    catch (const internal::GoogleTestFailureException&) {    // NOLINT
      // This exception type can only be thrown by a failed Google
      // Test assertion with the intention of letting another testing
      // framework catch it.  Therefore we just re-throw it.
      throw;
    }
    catch (const std::exception& e) {    // NOLINT
      internal::ReportFailureInUnknownLocation(
        TestPartResult::kFatalFailure,
        FormatCxxExceptionMessage(e.what(), location));
    }
    catch (...) {    // NOLINT
      internal::ReportFailureInUnknownLocation(
        TestPartResult::kFatalFailure,
        FormatCxxExceptionMessage(NULL, location));
    }

    return static_cast<Result>(0);
#else
    return HandleSehExceptionsInMethodIfSupported(object, method, location);
#endif  // GTEST_HAS_EXCEPTIONS
  }
  else {
    return (object->*method)();
  }
}

}  // namespace internal

// Runs the test and updates the test result.
void Test::Run() {
  if (!HasSameFixtureClass()) {
    return;
  }

  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");

  // We will run the test only if SetUp() was successful.
  if (!HasFatalFailure()) {
    impl->os_stack_trace_getter()->UponLeavingGTest();
    internal::HandleExceptionsInMethodIfSupported(
      this, &Test::TestBody, "the test body");
  }

  // However, we want to clean up as much as possible.  Hence we will
  // always call TearDown(), even if SetUp() or the test body has
  // failed.
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
    this, &Test::TearDown, "TearDown()");
}

// Returns true iff the current test has a fatal failure.
bool Test::HasFatalFailure() {
  return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
}

// Returns true iff the current test has a non-fatal failure.
bool Test::HasNonfatalFailure() {
  return internal::GetUnitTestImpl()->current_test_result()->
         HasNonfatalFailure();
}

// class TestInfo

// Constructs a TestInfo object. It assumes ownership of the test factory
// object.
TestInfo::TestInfo(const std::string& a_test_case_name,
                   const std::string& a_name,
                   const char* a_type_param,
                   const char* a_value_param,
                   internal::TypeId fixture_class_id,
                   internal::TestFactoryBase* factory)
  : test_case_name_(a_test_case_name),
    name_(a_name),
    type_param_(a_type_param ? new std::string(a_type_param) : NULL),
    value_param_(a_value_param ? new std::string(a_value_param) : NULL),
    fixture_class_id_(fixture_class_id),
    should_run_(false),
    is_disabled_(false),
    matches_filter_(false),
    factory_(factory),
    result_() {}

// Destructs a TestInfo object.
TestInfo::~TestInfo() {
  delete factory_;
}

namespace internal {

// Creates a new TestInfo object and registers it with Google Test;
// returns the created object.
//
// Arguments:
//
//   test_case_name:   name of the test case
//   name:             name of the test
//   type_param:       the name of the test's type parameter, or NULL if
//                     this is not a typed or a type-parameterized test.
//   value_param:      text representation of the test's value parameter,
//                     or NULL if this is not a value-parameterized test.
//   fixture_class_id: ID of the test fixture class
//   set_up_tc:        pointer to the function that sets up the test case
//   tear_down_tc:     pointer to the function that tears down the test case
//   factory:          pointer to the factory that creates a test object.
//                     The newly created TestInfo instance will assume
//                     ownership of the factory object.
TestInfo* MakeAndRegisterTestInfo(
  const char* test_case_name,
  const char* name,
  const char* type_param,
  const char* value_param,
  TypeId fixture_class_id,
  SetUpTestCaseFunc set_up_tc,
  TearDownTestCaseFunc tear_down_tc,
  TestFactoryBase* factory) {
  TestInfo* const test_info =
    new TestInfo(test_case_name, name, type_param, value_param,
                 fixture_class_id, factory);
  GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
  return test_info;
}

#if GTEST_HAS_PARAM_TEST
void ReportInvalidTestCaseType(const char* test_case_name,
                               const char* file, int line) {
  Message errors;
  errors
      << "Attempted redefinition of test case " << test_case_name << ".\n"
      << "All tests in the same test case must use the same test fixture\n"
      << "class.  However, in test case " << test_case_name << ", you tried\n"
      << "to define a test using a fixture class different from the one\n"
      << "used earlier. This can happen if the two fixture classes are\n"
      << "from different namespaces and have the same name. You should\n"
      << "probably rename one of the classes to put the tests into different\n"
      << "test cases.";

  fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
          errors.GetString().c_str());
}
#endif  // GTEST_HAS_PARAM_TEST

}  // namespace internal

namespace {

// A predicate that checks the test name of a TestInfo against a known
// value.
//
// This is used for implementation of the TestCase class only.  We put
// it in the anonymous namespace to prevent polluting the outer
// namespace.
//
// TestNameIs is copyable.
class TestNameIs {
 public:
  // Constructor.
  //
  // TestNameIs has NO default constructor.
  explicit TestNameIs(const char* name)
    : name_(name) {}

  // Returns true iff the test name of test_info matches name_.
  bool operator()(const TestInfo* test_info) const {
    return test_info && test_info->name() == name_;
  }

 private:
  std::string name_;
};

}  // namespace

namespace internal {

// This method expands all parameterized tests registered with macros TEST_P
// and INSTANTIATE_TEST_CASE_P into regular tests and registers those.
// This will be done just once during the program runtime.
void UnitTestImpl::RegisterParameterizedTests() {
#if GTEST_HAS_PARAM_TEST

  if (!parameterized_tests_registered_) {
    parameterized_test_registry_.RegisterTests();
    parameterized_tests_registered_ = true;
  }

#endif
}

}  // namespace internal

// Creates the test object, runs it, records its result, and then
// deletes it.
void TestInfo::Run() {
  if (!should_run_) {
    return;
  }

  // Tells UnitTest where to store test result.
  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->set_current_test_info(this);

  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();

  // Notifies the unit test event listeners that a test is about to start.
  repeater->OnTestStart(*this);

  const TimeInMillis start = internal::GetTimeInMillis();

  impl->os_stack_trace_getter()->UponLeavingGTest();

  // Creates the test object.
  Test* const test = internal::HandleExceptionsInMethodIfSupported(
                       factory_, &internal::TestFactoryBase::CreateTest,
                       "the test fixture's constructor");

  // Runs the test only if the test object was created and its
  // constructor didn't generate a fatal failure.
  if ((test != NULL) && !Test::HasFatalFailure()) {
    // This doesn't throw as all user code that can throw are wrapped into
    // exception handling code.
    test->Run();
  }

  // Deletes the test object.
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
    test, &Test::DeleteSelf_, "the test fixture's destructor");

  result_.set_elapsed_time(internal::GetTimeInMillis() - start);

  // Notifies the unit test event listener that a test has just finished.
  repeater->OnTestEnd(*this);

  // Tells UnitTest to stop associating assertion results to this
  // test.
  impl->set_current_test_info(NULL);
}

// class TestCase

// Gets the number of successful tests in this test case.
int TestCase::successful_test_count() const {
  return CountIf(test_info_list_, TestPassed);
}

// Gets the number of failed tests in this test case.
int TestCase::failed_test_count() const {
  return CountIf(test_info_list_, TestFailed);
}

// Gets the number of disabled tests that will be reported in the XML report.
int TestCase::reportable_disabled_test_count() const {
  return CountIf(test_info_list_, TestReportableDisabled);
}

// Gets the number of disabled tests in this test case.
int TestCase::disabled_test_count() const {
  return CountIf(test_info_list_, TestDisabled);
}

// Gets the number of tests to be printed in the XML report.
int TestCase::reportable_test_count() const {
  return CountIf(test_info_list_, TestReportable);
}

// Get the number of tests in this test case that should run.
int TestCase::test_to_run_count() const {
  return CountIf(test_info_list_, ShouldRunTest);
}

// Gets the number of all tests.
int TestCase::total_test_count() const {
  return static_cast<int>(test_info_list_.size());
}

// Creates a TestCase with the given name.
//
// Arguments:
//
//   name:         name of the test case
//   a_type_param: the name of the test case's type parameter, or NULL if
//                 this is not a typed or a type-parameterized test case.
//   set_up_tc:    pointer to the function that sets up the test case
//   tear_down_tc: pointer to the function that tears down the test case
TestCase::TestCase(const char* a_name, const char* a_type_param,
                   Test::SetUpTestCaseFunc set_up_tc,
                   Test::TearDownTestCaseFunc tear_down_tc)
  : name_(a_name),
    type_param_(a_type_param ? new std::string(a_type_param) : NULL),
    set_up_tc_(set_up_tc),
    tear_down_tc_(tear_down_tc),
    should_run_(false),
    elapsed_time_(0) {
}

// Destructor of TestCase.
TestCase::~TestCase() {
  // Deletes every Test in the collection.
  ForEach(test_info_list_, internal::Delete<TestInfo>);
}

// Returns the i-th test among all the tests. i can range from 0 to
// total_test_count() - 1. If i is not in that range, returns NULL.
const TestInfo* TestCase::GetTestInfo(int i) const {
  const int index = GetElementOr(test_indices_, i, -1);
  return index < 0 ? NULL : test_info_list_[index];
}

// Returns the i-th test among all the tests. i can range from 0 to
// total_test_count() - 1. If i is not in that range, returns NULL.
TestInfo* TestCase::GetMutableTestInfo(int i) {
  const int index = GetElementOr(test_indices_, i, -1);
  return index < 0 ? NULL : test_info_list_[index];
}

// Adds a test to this test case.  Will delete the test upon
// destruction of the TestCase object.
void TestCase::AddTestInfo(TestInfo* test_info) {
  test_info_list_.push_back(test_info);
  test_indices_.push_back(static_cast<int>(test_indices_.size()));
}

// Runs every test in this TestCase.
void TestCase::Run() {
  if (!should_run_) {
    return;
  }

  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
  impl->set_current_test_case(this);

  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();

  repeater->OnTestCaseStart(*this);
  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
    this, &TestCase::RunSetUpTestCase, "SetUpTestCase()");

  const internal::TimeInMillis start = internal::GetTimeInMillis();

  for (int i = 0; i < total_test_count(); i++) {
    GetMutableTestInfo(i)->Run();
  }

  elapsed_time_ = internal::GetTimeInMillis() - start;

  impl->os_stack_trace_getter()->UponLeavingGTest();
  internal::HandleExceptionsInMethodIfSupported(
    this, &TestCase::RunTearDownTestCase, "TearDownTestCase()");

  repeater->OnTestCaseEnd(*this);
  impl->set_current_test_case(NULL);
}

// Clears the results of all tests in this test case.
void TestCase::ClearResult() {
  ad_hoc_test_result_.Clear();
  ForEach(test_info_list_, TestInfo::ClearTestResult);
}

// Shuffles the tests in this test case.
void TestCase::ShuffleTests(internal::Random* random) {
  Shuffle(random, &test_indices_);
}

// Restores the test order to before the first shuffle.
void TestCase::UnshuffleTests() {
  for (size_t i = 0; i < test_indices_.size(); i++) {
    test_indices_[i] = static_cast<int>(i);
  }
}

// Formats a countable noun.  Depending on its quantity, either the
// singular form or the plural form is used. e.g.
//
// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
// FormatCountableNoun(5, "book", "books") returns "5 books".
static std::string FormatCountableNoun(int count,
                                       const char* singular_form,
                                       const char* plural_form) {
  return internal::StreamableToString(count) + " " +
         (count == 1 ? singular_form : plural_form);
}

// Formats the count of tests.
static std::string FormatTestCount(int test_count) {
  return FormatCountableNoun(test_count, "test", "tests");
}

// Formats the count of test cases.
static std::string FormatTestCaseCount(int test_case_count) {
  return FormatCountableNoun(test_case_count, "test case", "test cases");
}

// Converts a TestPartResult::Type enum to human-friendly string
// representation.  Both kNonFatalFailure and kFatalFailure are translated
// to "Failure", as the user usually doesn't care about the difference
// between the two when viewing the test result.
static const char* TestPartResultTypeToString(TestPartResult::Type type) {
  switch (type) {
    case TestPartResult::kSuccess:
      return "Success";

    case TestPartResult::kNonFatalFailure:
    case TestPartResult::kFatalFailure:
#ifdef _MSC_VER
      return "error: ";
#else
      return "Failure\n";
#endif

    default:
      return "Unknown result type";
  }
}

namespace internal {

// Prints a TestPartResult to an std::string.
static std::string PrintTestPartResultToString(
  const TestPartResult& test_part_result) {
  return (Message()
          << internal::FormatFileLocation(test_part_result.file_name(),
                                          test_part_result.line_number())
          << " " << TestPartResultTypeToString(test_part_result.type())
          << test_part_result.message()).GetString();
}

// Prints a TestPartResult.
static void PrintTestPartResult(const TestPartResult& test_part_result) {
  const std::string& result =
    PrintTestPartResultToString(test_part_result);
  printf("%s\n", result.c_str());
  fflush(stdout);
  // If the test program runs in Visual Studio or a debugger, the
  // following statements add the test part result message to the Output
  // window such that the user can double-click on it to jump to the
  // corresponding source code location; otherwise they do nothing.
#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  // We don't call OutputDebugString*() on Windows Mobile, as printing
  // to stdout is done by OutputDebugString() there already - we don't
  // want the same message printed twice.
  ::OutputDebugStringA(result.c_str());
  ::OutputDebugStringA("\n");
#endif
}

// class PrettyUnitTestResultPrinter

enum GTestColor {
  COLOR_DEFAULT,
  COLOR_RED,
  COLOR_GREEN,
  COLOR_YELLOW
};

#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE

// Returns the character attribute for the given color.
WORD GetColorAttribute(GTestColor color) {
  switch (color) {
    case COLOR_RED:
      return FOREGROUND_RED;

    case COLOR_GREEN:
      return FOREGROUND_GREEN;

    case COLOR_YELLOW:
      return FOREGROUND_RED | FOREGROUND_GREEN;

    default:
      return 0;
  }
}

#else

// Returns the ANSI color code for the given color.  COLOR_DEFAULT is
// an invalid input.
const char* GetAnsiColorCode(GTestColor color) {
  switch (color) {
    case COLOR_RED:
      return "1";

    case COLOR_GREEN:
      return "2";

    case COLOR_YELLOW:
      return "3";

    default:
      return NULL;
  };
}

#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE

// Returns true iff Google Test should use colors in the output.
bool ShouldUseColor(bool stdout_is_tty) {
  const char* const gtest_color = GTEST_FLAG(color).c_str();

  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
#if GTEST_OS_WINDOWS
    // On Windows the TERM variable is usually not set, but the
    // console there does support colors.
    return stdout_is_tty;
#else
    // On non-Windows platforms, we rely on the TERM variable.
    const char* const term = posix::GetEnv("TERM");
    const bool term_supports_color =
      String::CStringEquals(term, "xterm") ||
      String::CStringEquals(term, "xterm-color") ||
      String::CStringEquals(term, "xterm-256color") ||
      String::CStringEquals(term, "screen") ||
      String::CStringEquals(term, "screen-256color") ||
      String::CStringEquals(term, "linux") ||
      String::CStringEquals(term, "cygwin");
    return stdout_is_tty && term_supports_color;
#endif  // GTEST_OS_WINDOWS
  }

  return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
         String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
         String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
         String::CStringEquals(gtest_color, "1");
  // We take "yes", "true", "t", and "1" as meaning "yes".  If the
  // value is neither one of these nor "auto", we treat it as "no" to
  // be conservative.
}

// Helpers for printing colored strings to stdout. Note that on Windows, we
// cannot simply emit special characters and have the terminal change colors.
// This routine must actually emit the characters rather than return a string
// that would be colored when printed, as can be done on Linux.
void ColoredPrintf(GTestColor color, const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);

#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || GTEST_OS_IOS
  const bool use_color = false;
#else
  static const bool in_color_mode =
    ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
#endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
  // The '!= 0' comparison is necessary to satisfy MSVC 7.1.

  if (!use_color) {
    vprintf(fmt, args);
    va_end(args);
    return;
  }

#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);

  // Gets the current text color.
  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
  const WORD old_color_attrs = buffer_info.wAttributes;

  // We need to flush the stream buffers into the console before each
  // SetConsoleTextAttribute call lest it affect the text that is already
  // printed but has not yet reached the console.
  fflush(stdout);
  SetConsoleTextAttribute(stdout_handle,
                          GetColorAttribute(color) | FOREGROUND_INTENSITY);
  vprintf(fmt, args);

  fflush(stdout);
  // Restores the text color.
  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
#else
  printf("\033[0;3%sm", GetAnsiColorCode(color));
  vprintf(fmt, args);
  printf("\033[m");  // Resets the terminal to default.
#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
  va_end(args);
}

// Text printed in Google Test's text output and --gunit_list_tests
// output to label the type parameter and value parameter for a test.
static const char kTypeParamLabel[] = "TypeParam";
static const char kValueParamLabel[] = "GetParam()";

void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
  const char* const type_param = test_info.type_param();
  const char* const value_param = test_info.value_param();

  if (type_param != NULL || value_param != NULL) {
    printf(", where ");

    if (type_param != NULL) {
      printf("%s = %s", kTypeParamLabel, type_param);

      if (value_param != NULL) {
        printf(" and ");
      }
    }

    if (value_param != NULL) {
      printf("%s = %s", kValueParamLabel, value_param);
    }
  }
}

// This class implements the TestEventListener interface.
//
// Class PrettyUnitTestResultPrinter is copyable.
class PrettyUnitTestResultPrinter : public TestEventListener {
 public:
  PrettyUnitTestResultPrinter() {}
  static void PrintTestName(const char* test_case, const char* test) {
    printf("%s.%s", test_case, test);
  }

  // The following methods override what's in the TestEventListener class.
  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestCaseStart(const TestCase& test_case);
  virtual void OnTestStart(const TestInfo& test_info);
  virtual void OnTestPartResult(const TestPartResult& result);
  virtual void OnTestEnd(const TestInfo& test_info);
  virtual void OnTestCaseEnd(const TestCase& test_case);
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}

 private:
  static void PrintFailedTests(const UnitTest& unit_test);
};

// Fired before each iteration of tests starts.
void PrettyUnitTestResultPrinter::OnTestIterationStart(
  const UnitTest& unit_test, int iteration) {
  if (GTEST_FLAG(repeat) != 1) {
    printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);
  }

  const char* const filter = GTEST_FLAG(filter).c_str();

  // Prints the filter if it's not *.  This reminds the user that some
  // tests may be skipped.
  if (!String::CStringEquals(filter, kUniversalFilter)) {
    ColoredPrintf(COLOR_YELLOW,
                  "Note: %s filter = %s\n", GTEST_NAME_, filter);
  }

  if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
    const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
    ColoredPrintf(COLOR_YELLOW,
                  "Note: This is test shard %d of %s.\n",
                  static_cast<int>(shard_index) + 1,
                  internal::posix::GetEnv(kTestTotalShards));
  }

  if (GTEST_FLAG(shuffle)) {
    ColoredPrintf(COLOR_YELLOW,
                  "Note: Randomizing tests' orders with a seed of %d .\n",
                  unit_test.random_seed());
  }

  ColoredPrintf(COLOR_GREEN,  "[==========] ");
  printf("Running %s from %s.\n",
         FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
  const UnitTest& /*unit_test*/) {
  ColoredPrintf(COLOR_GREEN,  "[----------] ");
  printf("Global test environment set-up.\n");
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
  const std::string counts =
    FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s", counts.c_str(), test_case.name());

  if (test_case.type_param() == NULL) {
    printf("\n");
  }
  else {
    printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
  }

  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
  ColoredPrintf(COLOR_GREEN,  "[ RUN      ] ");
  PrintTestName(test_info.test_case_name(), test_info.name());
  printf("\n");
  fflush(stdout);
}

// Called after an assertion failure.
void PrettyUnitTestResultPrinter::OnTestPartResult(
  const TestPartResult& result) {
  // If the test part succeeded, we don't need to do anything.
  if (result.type() == TestPartResult::kSuccess) {
    return;
  }

  // Print failure message from the assertion (e.g. expected this and got that).
  PrintTestPartResult(result);
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
  if (test_info.result()->Passed()) {
    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
  }
  else {
    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
  }

  PrintTestName(test_info.test_case_name(), test_info.name());

  if (test_info.result()->Failed()) {
    PrintFullTestCommentIfPresent(test_info);
  }

  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms)\n", internal::StreamableToString(
             test_info.result()->elapsed_time()).c_str());
  }
  else {
    printf("\n");
  }

  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
  if (!GTEST_FLAG(print_time)) {
    return;
  }

  const std::string counts =
    FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
  ColoredPrintf(COLOR_GREEN, "[----------] ");
  printf("%s from %s (%s ms total)\n\n",
         counts.c_str(), test_case.name(),
         internal::StreamableToString(test_case.elapsed_time()).c_str());
  fflush(stdout);
}

void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
  const UnitTest& /*unit_test*/) {
  ColoredPrintf(COLOR_GREEN,  "[----------] ");
  printf("Global test environment tear-down\n");
  fflush(stdout);
}

// Internal helper for printing the list of failed tests.
void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
  const int failed_test_count = unit_test.failed_test_count();

  if (failed_test_count == 0) {
    return;
  }

  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
    const TestCase& test_case = *unit_test.GetTestCase(i);

    if (!test_case.should_run() || (test_case.failed_test_count() == 0)) {
      continue;
    }

    for (int j = 0; j < test_case.total_test_count(); ++j) {
      const TestInfo& test_info = *test_case.GetTestInfo(j);

      if (!test_info.should_run() || test_info.result()->Passed()) {
        continue;
      }

      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
      printf("%s.%s", test_case.name(), test_info.name());
      PrintFullTestCommentIfPresent(test_info);
      printf("\n");
    }
  }
}

void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
    int /*iteration*/) {
  ColoredPrintf(COLOR_GREEN,  "[==========] ");
  printf("%s from %s ran.",
         FormatTestCount(unit_test.test_to_run_count()).c_str(),
         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());

  if (GTEST_FLAG(print_time)) {
    printf(" (%s ms total)",
           internal::StreamableToString(unit_test.elapsed_time()).c_str());
  }

  printf("\n");
  ColoredPrintf(COLOR_GREEN,  "[  PASSED  ] ");
  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());

  int num_failures = unit_test.failed_test_count();

  if (!unit_test.Passed()) {
    const int failed_test_count = unit_test.failed_test_count();
    ColoredPrintf(COLOR_RED,  "[  FAILED  ] ");
    printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
    PrintFailedTests(unit_test);
    printf("\n%2d FAILED %s\n", num_failures,
           num_failures == 1 ? "TEST" : "TESTS");
  }

  int num_disabled = unit_test.reportable_disabled_test_count();

  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
    if (!num_failures) {
      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
    }

    ColoredPrintf(COLOR_YELLOW,
                  "  YOU HAVE %d DISABLED %s\n\n",
                  num_disabled,
                  num_disabled == 1 ? "TEST" : "TESTS");
  }

  // Ensure that Google Test output is printed before, e.g., heapchecker output.
  fflush(stdout);
}

// End PrettyUnitTestResultPrinter

// class TestEventRepeater
//
// This class forwards events to other event listeners.
class TestEventRepeater : public TestEventListener {
 public:
  TestEventRepeater() : forwarding_enabled_(true) {}
  virtual ~TestEventRepeater();
  void Append(TestEventListener* listener);
  TestEventListener* Release(TestEventListener* listener);

  // Controls whether events will be forwarded to listeners_. Set to false
  // in death test child processes.
  bool forwarding_enabled() const {
    return forwarding_enabled_;
  }
  void set_forwarding_enabled(bool enable) {
    forwarding_enabled_ = enable;
  }

  virtual void OnTestProgramStart(const UnitTest& unit_test);
  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
  virtual void OnTestCaseStart(const TestCase& test_case);
  virtual void OnTestStart(const TestInfo& test_info);
  virtual void OnTestPartResult(const TestPartResult& result);
  virtual void OnTestEnd(const TestInfo& test_info);
  virtual void OnTestCaseEnd(const TestCase& test_case);
  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
  virtual void OnTestProgramEnd(const UnitTest& unit_test);

 private:
  // Controls whether events will be forwarded to listeners_. Set to false
  // in death test child processes.
  bool forwarding_enabled_;
  // The list of listeners that receive events.
  std::vector<TestEventListener*> listeners_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater);
};

TestEventRepeater::~TestEventRepeater() {
  ForEach(listeners_, Delete<TestEventListener>);
}

void TestEventRepeater::Append(TestEventListener* listener) {
  listeners_.push_back(listener);
}

// TODO(vladl@google.com): Factor the search functionality into Vector::Find.
TestEventListener* TestEventRepeater::Release(TestEventListener* listener) {
  for (size_t i = 0; i < listeners_.size(); ++i) {
    if (listeners_[i] == listener) {
      listeners_.erase(listeners_.begin() + i);
      return listener;
    }
  }

  return NULL;
}

// Since most methods are very similar, use macros to reduce boilerplate.
// This defines a member that forwards the call to all listeners.
#define GTEST_REPEATER_METHOD_(Name, Type) \
void TestEventRepeater::Name(const Type& parameter) { \
  if (forwarding_enabled_) { \
    for (size_t i = 0; i < listeners_.size(); i++) { \
      listeners_[i]->Name(parameter); \
    } \
  } \
}
// This defines a member that forwards the call to all listeners in reverse
// order.
#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
void TestEventRepeater::Name(const Type& parameter) { \
  if (forwarding_enabled_) { \
    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
      listeners_[i]->Name(parameter); \
    } \
  } \
}

GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase)
GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase)
GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)

#undef GTEST_REPEATER_METHOD_
#undef GTEST_REVERSE_REPEATER_METHOD_

void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
    int iteration) {
  if (forwarding_enabled_) {
    for (size_t i = 0; i < listeners_.size(); i++) {
      listeners_[i]->OnTestIterationStart(unit_test, iteration);
    }
  }
}

void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
    int iteration) {
  if (forwarding_enabled_) {
    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
      listeners_[i]->OnTestIterationEnd(unit_test, iteration);
    }
  }
}

// End TestEventRepeater

// This class generates an XML output file.
class XmlUnitTestResultPrinter : public EmptyTestEventListener {
 public:
  explicit XmlUnitTestResultPrinter(const char* output_file);

  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);

 private:
  // Is c a whitespace character that is normalized to a space character
  // when it appears in an XML attribute value?
  static bool IsNormalizableWhitespace(char c) {
    return c == 0x9 || c == 0xA || c == 0xD;
  }

  // May c appear in a well-formed XML document?
  static bool IsValidXmlCharacter(char c) {
    return IsNormalizableWhitespace(c) || c >= 0x20;
  }

  // Returns an XML-escaped copy of the input string str.  If
  // is_attribute is true, the text is meant to appear as an attribute
  // value, and normalizable whitespace is preserved by replacing it
  // with character references.
  static std::string EscapeXml(const std::string& str, bool is_attribute);

  // Returns the given string with all characters invalid in XML removed.
  static std::string RemoveInvalidXmlCharacters(const std::string& str);

  // Convenience wrapper around EscapeXml when str is an attribute value.
  static std::string EscapeXmlAttribute(const std::string& str) {
    return EscapeXml(str, true);
  }

  // Convenience wrapper around EscapeXml when str is not an attribute value.
  static std::string EscapeXmlText(const char* str) {
    return EscapeXml(str, false);
  }

  // Verifies that the given attribute belongs to the given element and
  // streams the attribute as XML.
  static void OutputXmlAttribute(std::ostream* stream,
                                 const std::string& element_name,
                                 const std::string& name,
                                 const std::string& value);

  // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
  static void OutputXmlCDataSection(::std::ostream* stream, const char* data);

  // Streams an XML representation of a TestInfo object.
  static void OutputXmlTestInfo(::std::ostream* stream,
                                const char* test_case_name,
                                const TestInfo& test_info);

  // Prints an XML representation of a TestCase object
  static void PrintXmlTestCase(::std::ostream* stream,
                               const TestCase& test_case);

  // Prints an XML summary of unit_test to output stream out.
  static void PrintXmlUnitTest(::std::ostream* stream,
                               const UnitTest& unit_test);

  // Produces a string representing the test properties in a result as space
  // delimited XML attributes based on the property key="value" pairs.
  // When the std::string is not empty, it includes a space at the beginning,
  // to delimit this attribute from prior attributes.
  static std::string TestPropertiesAsXmlAttributes(const TestResult& result);

  // The output file.
  const std::string output_file_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter);
};

// Creates a new XmlUnitTestResultPrinter.
XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
  : output_file_(output_file) {
  if (output_file_.c_str() == NULL || output_file_.empty()) {
    fprintf(stderr, "XML output file may not be null\n");
    fflush(stderr);
    exit(EXIT_FAILURE);
  }
}

// Called after the unit test ends.
void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
    int /*iteration*/) {
  FILE* xmlout = NULL;
  FilePath output_file(output_file_);
  FilePath output_dir(output_file.RemoveFileName());

  if (output_dir.CreateDirectoriesRecursively()) {
    xmlout = posix::FOpen(output_file_.c_str(), "w");
  }

  if (xmlout == NULL) {
    // TODO(wan): report the reason of the failure.
    //
    // We don't do it for now as:
    //
    //   1. There is no urgent need for it.
    //   2. It's a bit involved to make the errno variable thread-safe on
    //      all three operating systems (Linux, Windows, and Mac OS).
    //   3. To interpret the meaning of errno in a thread-safe way,
    //      we need the strerror_r() function, which is not available on
    //      Windows.
    fprintf(stderr,
            "Unable to open file \"%s\"\n",
            output_file_.c_str());
    fflush(stderr);
    exit(EXIT_FAILURE);
  }

  std::stringstream stream;
  PrintXmlUnitTest(&stream, unit_test);
  fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
  fclose(xmlout);
}

// Returns an XML-escaped copy of the input string str.  If is_attribute
// is true, the text is meant to appear as an attribute value, and
// normalizable whitespace is preserved by replacing it with character
// references.
//
// Invalid XML characters in str, if any, are stripped from the output.
// It is expected that most, if not all, of the text processed by this
// module will consist of ordinary English text.
// If this module is ever modified to produce version 1.1 XML output,
// most invalid characters can be retained using character references.
// TODO(wan): It might be nice to have a minimally invasive, human-readable
// escaping scheme for invalid characters, rather than dropping them.
std::string XmlUnitTestResultPrinter::EscapeXml(
  const std::string& str, bool is_attribute) {
  Message m;

  for (size_t i = 0; i < str.size(); ++i) {
    const char ch = str[i];

    switch (ch) {
      case '<':
        m << "&lt;";
        break;

      case '>':
        m << "&gt;";
        break;

      case '&':
        m << "&amp;";
        break;

      case '\'':
        if (is_attribute) {
          m << "&apos;";
        }
        else {
          m << '\'';
        }

        break;

      case '"':
        if (is_attribute) {
          m << "&quot;";
        }
        else {
          m << '"';
        }

        break;

      default:
        if (IsValidXmlCharacter(ch)) {
          if (is_attribute && IsNormalizableWhitespace(ch))
            m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
              << ";";
          else {
            m << ch;
          }
        }

        break;
    }
  }

  return m.GetString();
}

// Returns the given string with all characters invalid in XML removed.
// Currently invalid characters are dropped from the string. An
// alternative is to replace them with certain characters such as . or ?.
std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
  const std::string& str) {
  std::string output;
  output.reserve(str.size());

  for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
    if (IsValidXmlCharacter(*it)) {
      output.push_back(*it);
    }

  return output;
}

// The following routines generate an XML representation of a UnitTest
// object.
//
// This is how Google Test concepts map to the DTD:
//
// <testsuites name="AllTests">        <-- corresponds to a UnitTest object
//   <testsuite name="testcase-name">  <-- corresponds to a TestCase object
//     <testcase name="test-name">     <-- corresponds to a TestInfo object
//       <failure message="...">...</failure>
//       <failure message="...">...</failure>
//       <failure message="...">...</failure>
//                                     <-- individual assertion failures
//     </testcase>
//   </testsuite>
// </testsuites>

// Formats the given time in milliseconds as seconds.
std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
  ::std::stringstream ss;
  ss << ms / 1000.0;
  return ss.str();
}

// Converts the given epoch time in milliseconds to a date string in the ISO
// 8601 format, without the timezone information.
std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
  // Using non-reentrant version as localtime_r is not portable.
  time_t seconds = static_cast<time_t>(ms / 1000);
#ifdef _MSC_VER
# pragma warning(push)          // Saves the current warning state.
# pragma warning(disable:4996)  // Temporarily disables warning 4996
  // (function or variable may be unsafe).
  const struct tm* const time_struct = localtime(&seconds);  // NOLINT
# pragma warning(pop)           // Restores the warning state again.
#else
  const struct tm* const time_struct = localtime(&seconds);  // NOLINT
#endif

  if (time_struct == NULL) {
    return "";  // Invalid ms value
  }

  // YYYY-MM-DDThh:mm:ss
  return StreamableToString(time_struct->tm_year + 1900) + "-" +
         String::FormatIntWidth2(time_struct->tm_mon + 1) + "-" +
         String::FormatIntWidth2(time_struct->tm_mday) + "T" +
         String::FormatIntWidth2(time_struct->tm_hour) + ":" +
         String::FormatIntWidth2(time_struct->tm_min) + ":" +
         String::FormatIntWidth2(time_struct->tm_sec);
}

// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
    const char* data) {
  const char* segment = data;
  *stream << "<![CDATA[";

  for (;;) {
    const char* const next_segment = strstr(segment, "]]>");

    if (next_segment != NULL) {
      stream->write(
        segment, static_cast<std::streamsize>(next_segment - segment));
      *stream << "]]>]]&gt;<![CDATA[";
      segment = next_segment + strlen("]]>");
    }
    else {
      *stream << segment;
      break;
    }
  }

  *stream << "]]>";
}

void XmlUnitTestResultPrinter::OutputXmlAttribute(
  std::ostream* stream,
  const std::string& element_name,
  const std::string& name,
  const std::string& value) {
  const std::vector<std::string>& allowed_names =
    GetReservedAttributesForElement(element_name);

  GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
               allowed_names.end())
      << "Attribute " << name << " is not allowed for element <" << element_name
      << ">.";

  *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
}

// Prints an XML representation of a TestInfo object.
// TODO(wan): There is also value in printing properties with the plain printer.
void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
    const char* test_case_name,
    const TestInfo& test_info) {
  const TestResult& result = *test_info.result();
  const std::string kTestcase = "testcase";

  *stream << "    <testcase";
  OutputXmlAttribute(stream, kTestcase, "name", test_info.name());

  if (test_info.value_param() != NULL) {
    OutputXmlAttribute(stream, kTestcase, "value_param",
                       test_info.value_param());
  }

  if (test_info.type_param() != NULL) {
    OutputXmlAttribute(stream, kTestcase, "type_param", test_info.type_param());
  }

  OutputXmlAttribute(stream, kTestcase, "status",
                     test_info.should_run() ? "run" : "notrun");
  OutputXmlAttribute(stream, kTestcase, "time",
                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
  OutputXmlAttribute(stream, kTestcase, "classname", test_case_name);
  *stream << TestPropertiesAsXmlAttributes(result);

  int failures = 0;

  for (int i = 0; i < result.total_part_count(); ++i) {
    const TestPartResult& part = result.GetTestPartResult(i);

    if (part.failed()) {
      if (++failures == 1) {
        *stream << ">\n";
      }

      const string location = internal::FormatCompilerIndependentFileLocation(
                                part.file_name(), part.line_number());
      const string summary = location + "\n" + part.summary();
      *stream << "      <failure message=\""
              << EscapeXmlAttribute(summary.c_str())
              << "\" type=\"\">";
      const string detail = location + "\n" + part.message();
      OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
      *stream << "</failure>\n";
    }
  }

  if (failures == 0) {
    *stream << " />\n";
  }
  else {
    *stream << "    </testcase>\n";
  }
}

// Prints an XML representation of a TestCase object
void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream,
    const TestCase& test_case) {
  const std::string kTestsuite = "testsuite";
  *stream << "  <" << kTestsuite;
  OutputXmlAttribute(stream, kTestsuite, "name", test_case.name());
  OutputXmlAttribute(stream, kTestsuite, "tests",
                     StreamableToString(test_case.reportable_test_count()));
  OutputXmlAttribute(stream, kTestsuite, "failures",
                     StreamableToString(test_case.failed_test_count()));
  OutputXmlAttribute(
    stream, kTestsuite, "disabled",
    StreamableToString(test_case.reportable_disabled_test_count()));
  OutputXmlAttribute(stream, kTestsuite, "errors", "0");
  OutputXmlAttribute(stream, kTestsuite, "time",
                     FormatTimeInMillisAsSeconds(test_case.elapsed_time()));
  *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result())
          << ">\n";

  for (int i = 0; i < test_case.total_test_count(); ++i) {
    if (test_case.GetTestInfo(i)->is_reportable()) {
      OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i));
    }
  }

  *stream << "  </" << kTestsuite << ">\n";
}

// Prints an XML summary of unit_test to output stream out.
void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
    const UnitTest& unit_test) {
  const std::string kTestsuites = "testsuites";

  *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
  *stream << "<" << kTestsuites;

  OutputXmlAttribute(stream, kTestsuites, "tests",
                     StreamableToString(unit_test.reportable_test_count()));
  OutputXmlAttribute(stream, kTestsuites, "failures",
                     StreamableToString(unit_test.failed_test_count()));
  OutputXmlAttribute(
    stream, kTestsuites, "disabled",
    StreamableToString(unit_test.reportable_disabled_test_count()));
  OutputXmlAttribute(stream, kTestsuites, "errors", "0");
  OutputXmlAttribute(
    stream, kTestsuites, "timestamp",
    FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
  OutputXmlAttribute(stream, kTestsuites, "time",
                     FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));

  if (GTEST_FLAG(shuffle)) {
    OutputXmlAttribute(stream, kTestsuites, "random_seed",
                       StreamableToString(unit_test.random_seed()));
  }

  *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());

  OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
  *stream << ">\n";

  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
    if (unit_test.GetTestCase(i)->reportable_test_count() > 0) {
      PrintXmlTestCase(stream, *unit_test.GetTestCase(i));
    }
  }

  *stream << "</" << kTestsuites << ">\n";
}

// Produces a string representing the test properties in a result as space
// delimited XML attributes based on the property key="value" pairs.
std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
  const TestResult& result) {
  Message attributes;

  for (int i = 0; i < result.test_property_count(); ++i) {
    const TestProperty& property = result.GetTestProperty(i);
    attributes << " " << property.key() << "="
               << "\"" << EscapeXmlAttribute(property.value()) << "\"";
  }

  return attributes.GetString();
}

// End XmlUnitTestResultPrinter

#if GTEST_CAN_STREAM_RESULTS_

// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
// replaces them by "%xx" where xx is their hexadecimal value. For
// example, replaces "=" with "%3D".  This algorithm is O(strlen(str))
// in both time and space -- important as the input str may contain an
// arbitrarily long test failure message and stack trace.
string StreamingListener::UrlEncode(const char* str) {
  string result;
  result.reserve(strlen(str) + 1);

  for (char ch = *str; ch != '\0'; ch = *++str) {
    switch (ch) {
      case '%':
      case '=':
      case '&':
      case '\n':
        result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
        break;

      default:
        result.push_back(ch);
        break;
    }
  }

  return result;
}

void StreamingListener::SocketWriter::MakeConnection() {
  GTEST_CHECK_(sockfd_ == -1)
      << "MakeConnection() can't be called when there is already a connection.";

  addrinfo hints;
  memset(&hints, 0, sizeof(hints));
  hints.ai_family = AF_UNSPEC;    // To allow both IPv4 and IPv6 addresses.
  hints.ai_socktype = SOCK_STREAM;
  addrinfo* servinfo = NULL;

  // Use the getaddrinfo() to get a linked list of IP addresses for
  // the given host name.
  const int error_num = getaddrinfo(
                          host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);

  if (error_num != 0) {
    GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
                        << gai_strerror(error_num);
  }

  // Loop through all the results and connect to the first we can.
  for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL;
       cur_addr = cur_addr->ai_next) {
    sockfd_ = socket(
                cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol);

    if (sockfd_ != -1) {
      // Connect the client socket to the server socket.
      if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) {
        close(sockfd_);
        sockfd_ = -1;
      }
    }
  }

  freeaddrinfo(servinfo);  // all done with this structure

  if (sockfd_ == -1) {
    GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to "
                        << host_name_ << ":" << port_num_;
  }
}

// End of class Streaming Listener
#endif  // GTEST_CAN_STREAM_RESULTS__

// Class ScopedTrace

// Pushes the given source file location and message onto a per-thread
// trace stack maintained by Google Test.
ScopedTrace::ScopedTrace(const char* file, int line, const Message& message)
GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
  TraceInfo trace;
  trace.file = file;
  trace.line = line;
  trace.message = message.GetString();

  UnitTest::GetInstance()->PushGTestTrace(trace);
}

// Pops the info pushed by the c'tor.
ScopedTrace::~ScopedTrace()
GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
  UnitTest::GetInstance()->PopGTestTrace();
}


// class OsStackTraceGetter

// Returns the current OS stack trace as an std::string.  Parameters:
//
//   max_depth  - the maximum number of stack frames to be included
//                in the trace.
//   skip_count - the number of top frames to be skipped; doesn't count
//                against max_depth.
//
string OsStackTraceGetter::CurrentStackTrace(int /* max_depth */,
    int /* skip_count */)
GTEST_LOCK_EXCLUDED_(mutex_) {
  return "";
}

void OsStackTraceGetter::UponLeavingGTest()
GTEST_LOCK_EXCLUDED_(mutex_) {
}

const char* const
OsStackTraceGetter::kElidedFramesMarker =
  "... " GTEST_NAME_ " internal frames ...";

// A helper class that creates the premature-exit file in its
// constructor and deletes the file in its destructor.
class ScopedPrematureExitFile {
 public:
  explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
    : premature_exit_filepath_(premature_exit_filepath) {
    // If a path to the premature-exit file is specified...
    if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') {
      // create the file with a single "0" character in it.  I/O
      // errors are ignored as there's nothing better we can do and we
      // don't want to fail the test because of this.
      FILE* pfile = posix::FOpen(premature_exit_filepath, "w");
      fwrite("0", 1, 1, pfile);
      fclose(pfile);
    }
  }

  ~ScopedPrematureExitFile() {
    if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') {
      remove(premature_exit_filepath_);
    }
  }

 private:
  const char* const premature_exit_filepath_;

  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile);
};

}  // namespace internal

// class TestEventListeners

TestEventListeners::TestEventListeners()
  : repeater_(new internal::TestEventRepeater()),
    default_result_printer_(NULL),
    default_xml_generator_(NULL) {
}

TestEventListeners::~TestEventListeners() {
  delete repeater_;
}

// Returns the standard listener responsible for the default console
// output.  Can be removed from the listeners list to shut down default
// console output.  Note that removing this object from the listener list
// with Release transfers its ownership to the user.
void TestEventListeners::Append(TestEventListener* listener) {
  repeater_->Append(listener);
}

// Removes the given event listener from the list and returns it.  It then
// becomes the caller's responsibility to delete the listener. Returns
// NULL if the listener is not found in the list.
TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
  if (listener == default_result_printer_) {
    default_result_printer_ = NULL;
  }
  else if (listener == default_xml_generator_) {
    default_xml_generator_ = NULL;
  }

  return repeater_->Release(listener);
}

// Returns repeater that broadcasts the TestEventListener events to all
// subscribers.
TestEventListener* TestEventListeners::repeater() {
  return repeater_;
}

// Sets the default_result_printer attribute to the provided listener.
// The listener is also added to the listener list and previous
// default_result_printer is removed from it and deleted. The listener can
// also be NULL in which case it will not be added to the list. Does
// nothing if the previous and the current listener objects are the same.
void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
  if (default_result_printer_ != listener) {
    // It is an error to pass this method a listener that is already in the
    // list.
    delete Release(default_result_printer_);
    default_result_printer_ = listener;

    if (listener != NULL) {
      Append(listener);
    }
  }
}

// Sets the default_xml_generator attribute to the provided listener.  The
// listener is also added to the listener list and previous
// default_xml_generator is removed from it and deleted. The listener can
// also be NULL in which case it will not be added to the list. Does
// nothing if the previous and the current listener objects are the same.
void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
  if (default_xml_generator_ != listener) {
    // It is an error to pass this method a listener that is already in the
    // list.
    delete Release(default_xml_generator_);
    default_xml_generator_ = listener;

    if (listener != NULL) {
      Append(listener);
    }
  }
}

// Controls whether events will be forwarded by the repeater to the
// listeners in the list.
bool TestEventListeners::EventForwardingEnabled() const {
  return repeater_->forwarding_enabled();
}

void TestEventListeners::SuppressEventForwarding() {
  repeater_->set_forwarding_enabled(false);
}

// class UnitTest

// Gets the singleton UnitTest object.  The first time this method is
// called, a UnitTest object is constructed and returned.  Consecutive
// calls will return the same object.
//
// We don't protect this under mutex_ as a user is not supposed to
// call this before main() starts, from which point on the return
// value will never change.
UnitTest* UnitTest::GetInstance() {
  // When compiled with MSVC 7.1 in optimized mode, destroying the
  // UnitTest object upon exiting the program messes up the exit code,
  // causing successful tests to appear failed.  We have to use a
  // different implementation in this case to bypass the compiler bug.
  // This implementation makes the compiler happy, at the cost of
  // leaking the UnitTest object.

  // CodeGear C++Builder insists on a public destructor for the
  // default implementation.  Use this implementation to keep good OO
  // design with private destructor.

#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
  static UnitTest* const instance = new UnitTest;
  return instance;
#else
  static UnitTest instance;
  return &instance;
#endif  // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
}

// Gets the number of successful test cases.
int UnitTest::successful_test_case_count() const {
  return impl()->successful_test_case_count();
}

// Gets the number of failed test cases.
int UnitTest::failed_test_case_count() const {
  return impl()->failed_test_case_count();
}

// Gets the number of all test cases.
int UnitTest::total_test_case_count() const {
  return impl()->total_test_case_count();
}

// Gets the number of all test cases that contain at least one test
// that should run.
int UnitTest::test_case_to_run_count() const {
  return impl()->test_case_to_run_count();
}

// Gets the number of successful tests.
int UnitTest::successful_test_count() const {
  return impl()->successful_test_count();
}

// Gets the number of failed tests.
int UnitTest::failed_test_count() const {
  return impl()->failed_test_count();
}

// Gets the number of disabled tests that will be reported in the XML report.
int UnitTest::reportable_disabled_test_count() const {
  return impl()->reportable_disabled_test_count();
}

// Gets the number of disabled tests.
int UnitTest::disabled_test_count() const {
  return impl()->disabled_test_count();
}

// Gets the number of tests to be printed in the XML report.
int UnitTest::reportable_test_count() const {
  return impl()->reportable_test_count();
}

// Gets the number of all tests.
int UnitTest::total_test_count() const {
  return impl()->total_test_count();
}

// Gets the number of tests that should run.
int UnitTest::test_to_run_count() const {
  return impl()->test_to_run_count();
}

// Gets the time of the test program start, in ms from the start of the
// UNIX epoch.
internal::TimeInMillis UnitTest::start_timestamp() const {
  return impl()->start_timestamp();
}

// Gets the elapsed time, in milliseconds.
internal::TimeInMillis UnitTest::elapsed_time() const {
  return impl()->elapsed_time();
}

// Returns true iff the unit test passed (i.e. all test cases passed).
bool UnitTest::Passed() const {
  return impl()->Passed();
}

// Returns true iff the unit test failed (i.e. some test case failed
// or something outside of all tests failed).
bool UnitTest::Failed() const {
  return impl()->Failed();
}

// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
const TestCase* UnitTest::GetTestCase(int i) const {
  return impl()->GetTestCase(i);
}

// Returns the TestResult containing information on test failures and
// properties logged outside of individual test cases.
const TestResult& UnitTest::ad_hoc_test_result() const {
  return *impl()->ad_hoc_test_result();
}

// Gets the i-th test case among all the test cases. i can range from 0 to
// total_test_case_count() - 1. If i is not in that range, returns NULL.
TestCase* UnitTest::GetMutableTestCase(int i) {
  return impl()->GetMutableTestCase(i);
}

// Returns the list of event listeners that can be used to track events
// inside Google Test.
TestEventListeners& UnitTest::listeners() {
  return *impl()->listeners();
}

// Registers and returns a global test environment.  When a test
// program is run, all global test environments will be set-up in the
// order they were registered.  After all tests in the program have
// finished, all global test environments will be torn-down in the
// *reverse* order they were registered.
//
// The UnitTest object takes ownership of the given environment.
//
// We don't protect this under mutex_, as we only support calling it
// from the main thread.
Environment* UnitTest::AddEnvironment(Environment* env) {
  if (env == NULL) {
    return NULL;
  }

  impl_->environments().push_back(env);
  return env;
}

// Adds a TestPartResult to the current TestResult object.  All Google Test
// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
// this to report their results.  The user code should use the
// assertion macros instead of calling this directly.
void UnitTest::AddTestPartResult(
  TestPartResult::Type result_type,
  const char* file_name,
  int line_number,
  const std::string& message,
  const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) {
  Message msg;
  msg << message;

  internal::MutexLock lock(&mutex_);

  if (impl_->gtest_trace_stack().size() > 0) {
    msg << "\n" << GTEST_NAME_ << " trace:";

    for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
         i > 0; --i) {
      const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
      msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
          << " " << trace.message;
    }
  }

  if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) {
    msg << internal::kStackTraceMarker << os_stack_trace;
  }

  const TestPartResult result =
    TestPartResult(result_type, file_name, line_number,
                   msg.GetString().c_str());
  impl_->GetTestPartResultReporterForCurrentThread()->
  ReportTestPartResult(result);

  if (result_type != TestPartResult::kSuccess) {
    // gtest_break_on_failure takes precedence over
    // gtest_throw_on_failure.  This allows a user to set the latter
    // in the code (perhaps in order to use Google Test assertions
    // with another testing framework) and specify the former on the
    // command line for debugging.
    if (GTEST_FLAG(break_on_failure)) {
#if GTEST_OS_WINDOWS
      // Using DebugBreak on Windows allows gtest to still break into a debugger
      // when a failure happens and both the --gtest_break_on_failure and
      // the --gtest_catch_exceptions flags are specified.
      DebugBreak();
#else
      // Dereference NULL through a volatile pointer to prevent the compiler
      // from removing. We use this rather than abort() or __builtin_trap() for
      // portability: Symbian doesn't implement abort() well, and some debuggers
      // don't correctly trap abort().
      *static_cast<volatile int*>(NULL) = 1;
#endif  // GTEST_OS_WINDOWS
    }
    else if (GTEST_FLAG(throw_on_failure)) {
#if GTEST_HAS_EXCEPTIONS
      throw internal::GoogleTestFailureException(result);
#else
      // We cannot call abort() as it generates a pop-up in debug mode
      // that cannot be suppressed in VC 7.1 or below.
      exit(1);
#endif
    }
  }
}

// Adds a TestProperty to the current TestResult object when invoked from
// inside a test, to current TestCase's ad_hoc_test_result_ when invoked
// from SetUpTestCase or TearDownTestCase, or to the global property set
// when invoked elsewhere.  If the result already contains a property with
// the same key, the value will be updated.
void UnitTest::RecordProperty(const std::string& key,
                              const std::string& value) {
  impl_->RecordProperty(TestProperty(key, value));
}

// Runs all tests in this UnitTest object and prints the result.
// Returns 0 if successful, or 1 otherwise.
//
// We don't protect this under mutex_, as we only support calling it
// from the main thread.
int UnitTest::Run() {
  const bool in_death_test_child_process =
    internal::GTEST_FLAG(internal_run_death_test).length() > 0;

  // Google Test implements this protocol for catching that a test
  // program exits before returning control to Google Test:
  //
  //   1. Upon start, Google Test creates a file whose absolute path
  //      is specified by the environment variable
  //      TEST_PREMATURE_EXIT_FILE.
  //   2. When Google Test has finished its work, it deletes the file.
  //
  // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before
  // running a Google-Test-based test program and check the existence
  // of the file at the end of the test execution to see if it has
  // exited prematurely.

  // If we are in the child process of a death test, don't
  // create/delete the premature exit file, as doing so is unnecessary
  // and will confuse the parent process.  Otherwise, create/delete
  // the file upon entering/leaving this function.  If the program
  // somehow exits before this function has a chance to return, the
  // premature-exit file will be left undeleted, causing a test runner
  // that understands the premature-exit-file protocol to report the
  // test as having failed.
  const internal::ScopedPrematureExitFile premature_exit_file(
    in_death_test_child_process ?
    NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));

  // Captures the value of GTEST_FLAG(catch_exceptions).  This value will be
  // used for the duration of the program.
  impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions));

#if GTEST_HAS_SEH

  // Either the user wants Google Test to catch exceptions thrown by the
  // tests or this is executing in the context of death test child
  // process. In either case the user does not want to see pop-up dialogs
  // about crashes - they are expected.
  if (impl()->catch_exceptions() || in_death_test_child_process) {
# if !GTEST_OS_WINDOWS_MOBILE
    // SetErrorMode doesn't exist on CE.
    SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
                 SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
# endif  // !GTEST_OS_WINDOWS_MOBILE

# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
    // Death test children can be terminated with _abort().  On Windows,
    // _abort() can show a dialog with a warning message.  This forces the
    // abort message to go to stderr instead.
    _set_error_mode(_OUT_TO_STDERR);
# endif

# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE

    // In the debug version, Visual Studio pops up a separate dialog
    // offering a choice to debug the aborted program. We need to suppress
    // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
    // executed. Google Test will notify the user of any unexpected
    // failure via stderr.
    //
    // VC++ doesn't define _set_abort_behavior() prior to the version 8.0.
    // Users of prior VC versions shall suffer the agony and pain of
    // clicking through the countless debug dialogs.
    // TODO(vladl@google.com): find a way to suppress the abort dialog() in the
    // debug mode when compiled with VC 7.1 or lower.
    if (!GTEST_FLAG(break_on_failure))
      _set_abort_behavior(
        0x0,                                    // Clear the following flags:
        _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.

# endif
  }

#endif  // GTEST_HAS_SEH

  return internal::HandleExceptionsInMethodIfSupported(
           impl(),
           &internal::UnitTestImpl::RunAllTests,
           "auxiliary test code (environments or event listeners)") ? 0 : 1;
}

// Returns the working directory when the first TEST() or TEST_F() was
// executed.
const char* UnitTest::original_working_dir() const {
  return impl_->original_working_dir_.c_str();
}

// Returns the TestCase object for the test that's currently running,
// or NULL if no test is running.
const TestCase* UnitTest::current_test_case() const
GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  return impl_->current_test_case();
}

// Returns the TestInfo object for the test that's currently running,
// or NULL if no test is running.
const TestInfo* UnitTest::current_test_info() const
GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  return impl_->current_test_info();
}

// Returns the random seed used at the start of the current test run.
int UnitTest::random_seed() const {
  return impl_->random_seed();
}

#if GTEST_HAS_PARAM_TEST
// Returns ParameterizedTestCaseRegistry object used to keep track of
// value-parameterized tests and instantiate and register them.
internal::ParameterizedTestCaseRegistry&
UnitTest::parameterized_test_registry()
GTEST_LOCK_EXCLUDED_(mutex_) {
  return impl_->parameterized_test_registry();
}
#endif  // GTEST_HAS_PARAM_TEST

// Creates an empty UnitTest.
UnitTest::UnitTest() {
  impl_ = new internal::UnitTestImpl(this);
}

// Destructor of UnitTest.
UnitTest::~UnitTest() {
  delete impl_;
}

// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
// Google Test trace stack.
void UnitTest::PushGTestTrace(const internal::TraceInfo& trace)
GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  impl_->gtest_trace_stack().push_back(trace);
}

// Pops a trace from the per-thread Google Test trace stack.
void UnitTest::PopGTestTrace()
GTEST_LOCK_EXCLUDED_(mutex_) {
  internal::MutexLock lock(&mutex_);
  impl_->gtest_trace_stack().pop_back();
}

namespace internal {

UnitTestImpl::UnitTestImpl(UnitTest* parent)
  : parent_(parent),
#ifdef _MSC_VER
# pragma warning(push)                    // Saves the current warning state.
# pragma warning(disable:4355)            // Temporarily disables warning 4355
    // (using this in initializer).
    default_global_test_part_result_reporter_(this),
    default_per_thread_test_part_result_reporter_(this),
# pragma warning(pop)                     // Restores the warning state again.
#else
    default_global_test_part_result_reporter_(this),
    default_per_thread_test_part_result_reporter_(this),
#endif  // _MSC_VER
    global_test_part_result_repoter_(
      &default_global_test_part_result_reporter_),
    per_thread_test_part_result_reporter_(
      &default_per_thread_test_part_result_reporter_),
#if GTEST_HAS_PARAM_TEST
    parameterized_test_registry_(),
    parameterized_tests_registered_(false),
#endif  // GTEST_HAS_PARAM_TEST
    last_death_test_case_(-1),
    current_test_case_(NULL),
    current_test_info_(NULL),
    ad_hoc_test_result_(),
    os_stack_trace_getter_(NULL),
    post_flag_parse_init_performed_(false),
    random_seed_(0),  // Will be overridden by the flag before first use.
    random_(0),  // Will be reseeded before first use.
    start_timestamp_(0),
    elapsed_time_(0),
#if GTEST_HAS_DEATH_TEST
    death_test_factory_(new DefaultDeathTestFactory),
#endif
    // Will be overridden by the flag before first use.
    catch_exceptions_(false) {
  listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
}

UnitTestImpl::~UnitTestImpl() {
  // Deletes every TestCase.
  ForEach(test_cases_, internal::Delete<TestCase>);

  // Deletes every Environment.
  ForEach(environments_, internal::Delete<Environment>);

  delete os_stack_trace_getter_;
}

// Adds a TestProperty to the current TestResult object when invoked in a
// context of a test, to current test case's ad_hoc_test_result when invoke
// from SetUpTestCase/TearDownTestCase, or to the global property set
// otherwise.  If the result already contains a property with the same key,
// the value will be updated.
void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
  std::string xml_element;
  TestResult* test_result;  // TestResult appropriate for property recording.

  if (current_test_info_ != NULL) {
    xml_element = "testcase";
    test_result = &(current_test_info_->result_);
  }
  else if (current_test_case_ != NULL) {
    xml_element = "testsuite";
    test_result = &(current_test_case_->ad_hoc_test_result_);
  }
  else {
    xml_element = "testsuites";
    test_result = &ad_hoc_test_result_;
  }

  test_result->RecordProperty(xml_element, test_property);
}

#if GTEST_HAS_DEATH_TEST
// Disables event forwarding if the control is currently in a death test
// subprocess. Must not be called before InitGoogleTest.
void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
  if (internal_run_death_test_flag_.get() != NULL) {
    listeners()->SuppressEventForwarding();
  }
}
#endif  // GTEST_HAS_DEATH_TEST

// Initializes event listeners performing XML output as specified by
// UnitTestOptions. Must not be called before InitGoogleTest.
void UnitTestImpl::ConfigureXmlOutput() {
  const std::string& output_format = UnitTestOptions::GetOutputFormat();

  if (output_format == "xml") {
    listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
                                          UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
  }
  else if (output_format != "") {
    printf("WARNING: unrecognized output format \"%s\" ignored.\n",
           output_format.c_str());
    fflush(stdout);
  }
}

#if GTEST_CAN_STREAM_RESULTS_
// Initializes event listeners for streaming test results in string form.
// Must not be called before InitGoogleTest.
void UnitTestImpl::ConfigureStreamingOutput() {
  const std::string& target = GTEST_FLAG(stream_result_to);

  if (!target.empty()) {
    const size_t pos = target.find(':');

    if (pos != std::string::npos) {
      listeners()->Append(new StreamingListener(target.substr(0, pos),
                          target.substr(pos + 1)));
    }
    else {
      printf("WARNING: unrecognized streaming target \"%s\" ignored.\n",
             target.c_str());
      fflush(stdout);
    }
  }
}
#endif  // GTEST_CAN_STREAM_RESULTS_

// Performs initialization dependent upon flag values obtained in
// ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
// ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
// this function is also called from RunAllTests.  Since this function can be
// called more than once, it has to be idempotent.
void UnitTestImpl::PostFlagParsingInit() {
  // Ensures that this function does not execute more than once.
  if (!post_flag_parse_init_performed_) {
    post_flag_parse_init_performed_ = true;

#if GTEST_HAS_DEATH_TEST
    InitDeathTestSubprocessControlInfo();
    SuppressTestEventsIfInSubprocess();
#endif  // GTEST_HAS_DEATH_TEST

    // Registers parameterized tests. This makes parameterized tests
    // available to the UnitTest reflection API without running
    // RUN_ALL_TESTS.
    RegisterParameterizedTests();

    // Configures listeners for XML output. This makes it possible for users
    // to shut down the default XML output before invoking RUN_ALL_TESTS.
    ConfigureXmlOutput();

#if GTEST_CAN_STREAM_RESULTS_
    // Configures listeners for streaming test results to the specified server.
    ConfigureStreamingOutput();
#endif  // GTEST_CAN_STREAM_RESULTS_
  }
}

// A predicate that checks the name of a TestCase against a known
// value.
//
// This is used for implementation of the UnitTest class only.  We put
// it in the anonymous namespace to prevent polluting the outer
// namespace.
//
// TestCaseNameIs is copyable.
class TestCaseNameIs {
 public:
  // Constructor.
  explicit TestCaseNameIs(const std::string& name)
    : name_(name) {}

  // Returns true iff the name of test_case matches name_.
  bool operator()(const TestCase* test_case) const {
    return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0;
  }

 private:
  std::string name_;
};

// Finds and returns a TestCase with the given name.  If one doesn't
// exist, creates one and returns it.  It's the CALLER'S
// RESPONSIBILITY to ensure that this function is only called WHEN THE
// TESTS ARE NOT SHUFFLED.
//
// Arguments:
//
//   test_case_name: name of the test case
//   type_param:     the name of the test case's type parameter, or NULL if
//                   this is not a typed or a type-parameterized test case.
//   set_up_tc:      pointer to the function that sets up the test case
//   tear_down_tc:   pointer to the function that tears down the test case
TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
                                    const char* type_param,
                                    Test::SetUpTestCaseFunc set_up_tc,
                                    Test::TearDownTestCaseFunc tear_down_tc) {
  // Can we find a TestCase with the given name?
  const std::vector<TestCase*>::const_iterator test_case =
    std::find_if(test_cases_.begin(), test_cases_.end(),
                 TestCaseNameIs(test_case_name));

  if (test_case != test_cases_.end()) {
    return *test_case;
  }

  // No.  Let's create one.
  TestCase* const new_test_case =
    new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc);

  // Is this a death test case?
  if (internal::UnitTestOptions::MatchesFilter(test_case_name,
      kDeathTestCaseFilter)) {
    // Yes.  Inserts the test case after the last death test case
    // defined so far.  This only works when the test cases haven't
    // been shuffled.  Otherwise we may end up running a death test
    // after a non-death test.
    ++last_death_test_case_;
    test_cases_.insert(test_cases_.begin() + last_death_test_case_,
                       new_test_case);
  }
  else {
    // No.  Appends to the end of the list.
    test_cases_.push_back(new_test_case);
  }

  test_case_indices_.push_back(static_cast<int>(test_case_indices_.size()));
  return new_test_case;
}

// Helpers for setting up / tearing down the given environment.  They
// are for use in the ForEach() function.
static void SetUpEnvironment(Environment* env) {
  env->SetUp();
}
static void TearDownEnvironment(Environment* env) {
  env->TearDown();
}

// Runs all tests in this UnitTest object, prints the result, and
// returns true if all tests are successful.  If any exception is
// thrown during a test, the test is considered to be failed, but the
// rest of the tests will still be run.
//
// When parameterized tests are enabled, it expands and registers
// parameterized tests first in RegisterParameterizedTests().
// All other functions called from RunAllTests() may safely assume that
// parameterized tests are ready to be counted and run.
bool UnitTestImpl::RunAllTests() {
  // Makes sure InitGoogleTest() was called.
  if (!GTestIsInitialized()) {
    printf("%s",
           "\nThis test program did NOT call ::testing::InitGoogleTest "
           "before calling RUN_ALL_TESTS().  Please fix it.\n");
    return false;
  }

  // Do not run any test if the --help flag was specified.
  if (g_help_flag) {
    return true;
  }

  // Repeats the call to the post-flag parsing initialization in case the
  // user didn't call InitGoogleTest.
  PostFlagParsingInit();

  // Even if sharding is not on, test runners may want to use the
  // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
  // protocol.
  internal::WriteToShardStatusFileIfNeeded();

  // True iff we are in a subprocess for running a thread-safe-style
  // death test.
  bool in_subprocess_for_death_test = false;

#if GTEST_HAS_DEATH_TEST
  in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL);
#endif  // GTEST_HAS_DEATH_TEST

  const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
                                        in_subprocess_for_death_test);

  // Compares the full test names with the filter to decide which
  // tests to run.
  const bool has_tests_to_run = FilterTests(should_shard
                                ? HONOR_SHARDING_PROTOCOL
                                : IGNORE_SHARDING_PROTOCOL) > 0;

  // Lists the tests and exits if the --gtest_list_tests flag was specified.
  if (GTEST_FLAG(list_tests)) {
    // This must be called *after* FilterTests() has been called.
    ListTestsMatchingFilter();
    return true;
  }

  random_seed_ = GTEST_FLAG(shuffle) ?
                 GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;

  // True iff at least one test has failed.
  bool failed = false;

  TestEventListener* repeater = listeners()->repeater();

  start_timestamp_ = GetTimeInMillis();
  repeater->OnTestProgramStart(*parent_);

  // How many times to repeat the tests?  We don't want to repeat them
  // when we are inside the subprocess of a death test.
  const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
  // Repeats forever if the repeat count is negative.
  const bool forever = repeat < 0;

  for (int i = 0; forever || i != repeat; i++) {
    // We want to preserve failures generated by ad-hoc test
    // assertions executed before RUN_ALL_TESTS().
    ClearNonAdHocTestResult();

    const TimeInMillis start = GetTimeInMillis();

    // Shuffles test cases and tests if requested.
    if (has_tests_to_run && GTEST_FLAG(shuffle)) {
      random()->Reseed(random_seed_);
      // This should be done before calling OnTestIterationStart(),
      // such that a test event listener can see the actual test order
      // in the event.
      ShuffleTests();
    }

    // Tells the unit test event listeners that the tests are about to start.
    repeater->OnTestIterationStart(*parent_, i);

    // Runs each test case if there is at least one test to run.
    if (has_tests_to_run) {
      // Sets up all environments beforehand.
      repeater->OnEnvironmentsSetUpStart(*parent_);
      ForEach(environments_, SetUpEnvironment);
      repeater->OnEnvironmentsSetUpEnd(*parent_);

      // Runs the tests only if there was no fatal failure during global
      // set-up.
      if (!Test::HasFatalFailure()) {
        for (int test_index = 0; test_index < total_test_case_count();
             test_index++) {
          GetMutableTestCase(test_index)->Run();
        }
      }

      // Tears down all environments in reverse order afterwards.
      repeater->OnEnvironmentsTearDownStart(*parent_);
      std::for_each(environments_.rbegin(), environments_.rend(),
                    TearDownEnvironment);
      repeater->OnEnvironmentsTearDownEnd(*parent_);
    }

    elapsed_time_ = GetTimeInMillis() - start;

    // Tells the unit test event listener that the tests have just finished.
    repeater->OnTestIterationEnd(*parent_, i);

    // Gets the result and clears it.
    if (!Passed()) {
      failed = true;
    }

    // Restores the original test order after the iteration.  This
    // allows the user to quickly repro a failure that happens in the
    // N-th iteration without repeating the first (N - 1) iterations.
    // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
    // case the user somehow changes the value of the flag somewhere
    // (it's always safe to unshuffle the tests).
    UnshuffleTests();

    if (GTEST_FLAG(shuffle)) {
      // Picks a new random seed for each iteration.
      random_seed_ = GetNextRandomSeed(random_seed_);
    }
  }

  repeater->OnTestProgramEnd(*parent_);

  return !failed;
}

// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
// if the variable is present. If a file already exists at this location, this
// function will write over it. If the variable is present, but the file cannot
// be created, prints an error and exits.
void WriteToShardStatusFileIfNeeded() {
  const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);

  if (test_shard_file != NULL) {
    FILE* const file = posix::FOpen(test_shard_file, "w");

    if (file == NULL) {
      ColoredPrintf(COLOR_RED,
                    "Could not write to the test shard status file \"%s\" "
                    "specified by the %s environment variable.\n",
                    test_shard_file, kTestShardStatusFile);
      fflush(stdout);
      exit(EXIT_FAILURE);
    }

    fclose(file);
  }
}

// Checks whether sharding is enabled by examining the relevant
// environment variable values. If the variables are present,
// but inconsistent (i.e., shard_index >= total_shards), prints
// an error and exits. If in_subprocess_for_death_test, sharding is
// disabled because it must only be applied to the original test
// process. Otherwise, we could filter out death tests we intended to execute.
bool ShouldShard(const char* total_shards_env,
                 const char* shard_index_env,
                 bool in_subprocess_for_death_test) {
  if (in_subprocess_for_death_test) {
    return false;
  }

  const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
  const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);

  if (total_shards == -1 && shard_index == -1) {
    return false;
  }
  else if (total_shards == -1 && shard_index != -1) {
    const Message msg = Message()
                        << "Invalid environment variables: you have "
                        << kTestShardIndex << " = " << shard_index
                        << ", but have left " << kTestTotalShards << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  }
  else if (total_shards != -1 && shard_index == -1) {
    const Message msg = Message()
                        << "Invalid environment variables: you have "
                        << kTestTotalShards << " = " << total_shards
                        << ", but have left " << kTestShardIndex << " unset.\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  }
  else if (shard_index < 0 || shard_index >= total_shards) {
    const Message msg = Message()
                        << "Invalid environment variables: we require 0 <= "
                        << kTestShardIndex << " < " << kTestTotalShards
                        << ", but you have " << kTestShardIndex << "=" << shard_index
                        << ", " << kTestTotalShards << "=" << total_shards << ".\n";
    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
    fflush(stdout);
    exit(EXIT_FAILURE);
  }

  return total_shards > 1;
}

// Parses the environment variable var as an Int32. If it is unset,
// returns default_val. If it is not an Int32, prints an error
// and aborts.
Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
  const char* str_val = posix::GetEnv(var);

  if (str_val == NULL) {
    return default_val;
  }

  Int32 result;

  if (!ParseInt32(Message() << "The value of environment variable " << var,
                  str_val, &result)) {
    exit(EXIT_FAILURE);
  }

  return result;
}

// Given the total number of shards, the shard index, and the test id,
// returns true iff the test should be run on this shard. The test id is
// some arbitrary but unique non-negative integer assigned to each test
// method. Assumes that 0 <= shard_index < total_shards.
bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
  return (test_id % total_shards) == shard_index;
}

// Compares the name of each test with the user-specified filter to
// decide whether the test should be run, then records the result in
// each TestCase and TestInfo object.
// If shard_tests == true, further filters tests based on sharding
// variables in the environment - see
// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide.
// Returns the number of tests that should run.
int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
  const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
                             Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
  const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
                            Int32FromEnvOrDie(kTestShardIndex, -1) : -1;

  // num_runnable_tests are the number of tests that will
  // run across all shards (i.e., match filter and are not disabled).
  // num_selected_tests are the number of tests to be run on
  // this shard.
  int num_runnable_tests = 0;
  int num_selected_tests = 0;

  for (size_t i = 0; i < test_cases_.size(); i++) {
    TestCase* const test_case = test_cases_[i];
    const std::string& test_case_name = test_case->name();
    test_case->set_should_run(false);

    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
      TestInfo* const test_info = test_case->test_info_list()[j];
      const std::string test_name(test_info->name());
      // A test is disabled if test case name or test name matches
      // kDisableTestFilter.
      const bool is_disabled =
        internal::UnitTestOptions::MatchesFilter(test_case_name,
            kDisableTestFilter) ||
        internal::UnitTestOptions::MatchesFilter(test_name,
            kDisableTestFilter);
      test_info->is_disabled_ = is_disabled;

      const bool matches_filter =
        internal::UnitTestOptions::FilterMatchesTest(test_case_name,
            test_name);
      test_info->matches_filter_ = matches_filter;

      const bool is_runnable =
        (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
        matches_filter;

      const bool is_selected = is_runnable &&
                               (shard_tests == IGNORE_SHARDING_PROTOCOL ||
                                ShouldRunTestOnShard(total_shards, shard_index,
                                    num_runnable_tests));

      num_runnable_tests += is_runnable;
      num_selected_tests += is_selected;

      test_info->should_run_ = is_selected;
      test_case->set_should_run(test_case->should_run() || is_selected);
    }
  }

  return num_selected_tests;
}

// Prints the given C-string on a single line by replacing all '\n'
// characters with string "\\n".  If the output takes more than
// max_length characters, only prints the first max_length characters
// and "...".
static void PrintOnOneLine(const char* str, int max_length) {
  if (str != NULL) {
    for (int i = 0; *str != '\0'; ++str) {
      if (i >= max_length) {
        printf("...");
        break;
      }

      if (*str == '\n') {
        printf("\\n");
        i += 2;
      }
      else {
        printf("%c", *str);
        ++i;
      }
    }
  }
}

// Prints the names of the tests matching the user-specified filter flag.
void UnitTestImpl::ListTestsMatchingFilter() {
  // Print at most this many characters for each type/value parameter.
  const int kMaxParamLength = 250;

  for (size_t i = 0; i < test_cases_.size(); i++) {
    const TestCase* const test_case = test_cases_[i];
    bool printed_test_case_name = false;

    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
      const TestInfo* const test_info =
        test_case->test_info_list()[j];

      if (test_info->matches_filter_) {
        if (!printed_test_case_name) {
          printed_test_case_name = true;
          printf("%s.", test_case->name());

          if (test_case->type_param() != NULL) {
            printf("  # %s = ", kTypeParamLabel);
            // We print the type parameter on a single line to make
            // the output easy to parse by a program.
            PrintOnOneLine(test_case->type_param(), kMaxParamLength);
          }

          printf("\n");
        }

        printf("  %s", test_info->name());

        if (test_info->value_param() != NULL) {
          printf("  # %s = ", kValueParamLabel);
          // We print the value parameter on a single line to make the
          // output easy to parse by a program.
          PrintOnOneLine(test_info->value_param(), kMaxParamLength);
        }

        printf("\n");
      }
    }
  }

  fflush(stdout);
}

// Sets the OS stack trace getter.
//
// Does nothing if the input and the current OS stack trace getter are
// the same; otherwise, deletes the old getter and makes the input the
// current getter.
void UnitTestImpl::set_os_stack_trace_getter(
  OsStackTraceGetterInterface* getter) {
  if (os_stack_trace_getter_ != getter) {
    delete os_stack_trace_getter_;
    os_stack_trace_getter_ = getter;
  }
}

// Returns the current OS stack trace getter if it is not NULL;
// otherwise, creates an OsStackTraceGetter, makes it the current
// getter, and returns it.
OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
  if (os_stack_trace_getter_ == NULL) {
    os_stack_trace_getter_ = new OsStackTraceGetter;
  }

  return os_stack_trace_getter_;
}

// Returns the TestResult for the test that's currently running, or
// the TestResult for the ad hoc test if no test is running.
TestResult* UnitTestImpl::current_test_result() {
  return current_test_info_ ?
         &(current_test_info_->result_) : &ad_hoc_test_result_;
}

// Shuffles all test cases, and the tests within each test case,
// making sure that death tests are still run first.
void UnitTestImpl::ShuffleTests() {
  // Shuffles the death test cases.
  ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_);

  // Shuffles the non-death test cases.
  ShuffleRange(random(), last_death_test_case_ + 1,
               static_cast<int>(test_cases_.size()), &test_case_indices_);

  // Shuffles the tests inside each test case.
  for (size_t i = 0; i < test_cases_.size(); i++) {
    test_cases_[i]->ShuffleTests(random());
  }
}

// Restores the test cases and tests to their order before the first shuffle.
void UnitTestImpl::UnshuffleTests() {
  for (size_t i = 0; i < test_cases_.size(); i++) {
    // Unshuffles the tests in each test case.
    test_cases_[i]->UnshuffleTests();
    // Resets the index of each test case.
    test_case_indices_[i] = static_cast<int>(i);
  }
}

// Returns the current OS stack trace as an std::string.
//
// The maximum number of stack frames to be included is specified by
// the gtest_stack_trace_depth flag.  The skip_count parameter
// specifies the number of top frames to be skipped, which doesn't
// count against the number of frames to be included.
//
// For example, if Foo() calls Bar(), which in turn calls
// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/,
    int skip_count) {
  // We pass skip_count + 1 to skip this wrapper function in addition
  // to what the user really wants to skip.
  return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
}

// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to
// suppress unreachable code warnings.
namespace {
class ClassUniqueToAlwaysTrue {};
}

bool IsTrue(bool condition) {
  return condition;
}

bool AlwaysTrue() {
#if GTEST_HAS_EXCEPTIONS

  // This condition is always false so AlwaysTrue() never actually throws,
  // but it makes the compiler think that it may throw.
  if (IsTrue(false)) {
    throw ClassUniqueToAlwaysTrue();
  }

#endif  // GTEST_HAS_EXCEPTIONS
  return true;
}

// If *pstr starts with the given prefix, modifies *pstr to be right
// past the prefix and returns true; otherwise leaves *pstr unchanged
// and returns false.  None of pstr, *pstr, and prefix can be NULL.
bool SkipPrefix(const char* prefix, const char** pstr) {
  const size_t prefix_len = strlen(prefix);

  if (strncmp(*pstr, prefix, prefix_len) == 0) {
    *pstr += prefix_len;
    return true;
  }

  return false;
}

// Parses a string as a command line flag.  The string should have
// the format "--flag=value".  When def_optional is true, the "=value"
// part can be omitted.
//
// Returns the value of the flag, or NULL if the parsing failed.
const char* ParseFlagValue(const char* str,
                           const char* flag,
                           bool def_optional) {
  // str and flag must not be NULL.
  if (str == NULL || flag == NULL) {
    return NULL;
  }

  // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
  const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag;
  const size_t flag_len = flag_str.length();

  if (strncmp(str, flag_str.c_str(), flag_len) != 0) {
    return NULL;
  }

  // Skips the flag name.
  const char* flag_end = str + flag_len;

  // When def_optional is true, it's OK to not have a "=value" part.
  if (def_optional && (flag_end[0] == '\0')) {
    return flag_end;
  }

  // If def_optional is true and there are more characters after the
  // flag name, or if def_optional is false, there must be a '=' after
  // the flag name.
  if (flag_end[0] != '=') {
    return NULL;
  }

  // Returns the string after "=".
  return flag_end + 1;
}

// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
// In the former case, the value is taken as true as long as it does
// not start with '0', 'f', or 'F'.
//
// In the latter case, the value is taken as true.
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, true);

  // Aborts if the parsing failed.
  if (value_str == NULL) {
    return false;
  }

  // Converts the string value to a bool.
  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
  return true;
}

// Parses a string for an Int32 flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
  if (value_str == NULL) {
    return false;
  }

  // Sets *value to the value of the flag.
  return ParseInt32(Message() << "The value of flag --" << flag,
                    value_str, value);
}

// Parses a string for a string flag, in the form of
// "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true.  On failure, returns false without changing *value.
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
  if (value_str == NULL) {
    return false;
  }

  // Sets *value to the value of the flag.
  *value = value_str;
  return true;
}

// Determines whether a string has a prefix that Google Test uses for its
// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
// If Google Test detects that a command line flag has its prefix but is not
// recognized, it will print its help message. Flags starting with
// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
// internal flags and do not trigger the help message.
static bool HasGoogleTestFlagPrefix(const char* str) {
  return (SkipPrefix("--", &str) ||
          SkipPrefix("-", &str) ||
          SkipPrefix("/", &str)) &&
         !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
         (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
          SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
}

// Prints a string containing code-encoded text.  The following escape
// sequences can be used in the string to control the text color:
//
//   @@    prints a single '@' character.
//   @R    changes the color to red.
//   @G    changes the color to green.
//   @Y    changes the color to yellow.
//   @D    changes to the default terminal text color.
//
// TODO(wan@google.com): Write tests for this once we add stdout
// capturing to Google Test.
static void PrintColorEncoded(const char* str) {
  GTestColor color = COLOR_DEFAULT;  // The current color.

  // Conceptually, we split the string into segments divided by escape
  // sequences.  Then we print one segment at a time.  At the end of
  // each iteration, the str pointer advances to the beginning of the
  // next segment.
  for (;;) {
    const char* p = strchr(str, '@');

    if (p == NULL) {
      ColoredPrintf(color, "%s", str);
      return;
    }

    ColoredPrintf(color, "%s", std::string(str, p).c_str());

    const char ch = p[1];
    str = p + 2;

    if (ch == '@') {
      ColoredPrintf(color, "@");
    }
    else if (ch == 'D') {
      color = COLOR_DEFAULT;
    }
    else if (ch == 'R') {
      color = COLOR_RED;
    }
    else if (ch == 'G') {
      color = COLOR_GREEN;
    }
    else if (ch == 'Y') {
      color = COLOR_YELLOW;
    }
    else {
      --str;
    }
  }
}

static const char kColorEncodedHelpMessage[] =
  "This program contains tests written using " GTEST_NAME_ ". You can use the\n"
  "following command line flags to control its behavior:\n"
  "\n"
  "Test Selection:\n"
  "  @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
  "      List the names of all tests instead of running them. The name of\n"
  "      TEST(Foo, Bar) is \"Foo.Bar\".\n"
  "  @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
  "[@G-@YNEGATIVE_PATTERNS]@D\n"
  "      Run only the tests whose name matches one of the positive patterns but\n"
  "      none of the negative patterns. '?' matches any single character; '*'\n"
  "      matches any substring; ':' separates two patterns.\n"
  "  @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
  "      Run all disabled tests too.\n"
  "\n"
  "Test Execution:\n"
  "  @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
  "      Run the tests repeatedly; use a negative count to repeat forever.\n"
  "  @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
  "      Randomize tests' orders on every iteration.\n"
  "  @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
  "      Random number seed to use for shuffling test orders (between 1 and\n"
  "      99999, or 0 to use a seed based on the current time).\n"
  "\n"
  "Test Output:\n"
  "  @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
  "      Enable/disable colored output. The default is @Gauto@D.\n"
  "  -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
  "      Don't print the elapsed time of each test.\n"
  "  @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G"
  GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
  "      Generate an XML report in the given directory or with the given file\n"
  "      name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n"
#if GTEST_CAN_STREAM_RESULTS_
  "  @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n"
  "      Stream test results to the given server.\n"
#endif  // GTEST_CAN_STREAM_RESULTS_
  "\n"
  "Assertion Behavior:\n"
#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
  "  @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
  "      Set the default death test style.\n"
#endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
  "  @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
  "      Turn assertion failures into debugger break-points.\n"
  "  @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
  "      Turn assertion failures into C++ exceptions.\n"
  "  @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n"
  "      Do not report exceptions as test failures. Instead, allow them\n"
  "      to crash the program or throw a pop-up (on Windows).\n"
  "\n"
  "Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
  "the corresponding\n"
  "environment variable of a flag (all letters in upper-case). For example, to\n"
  "disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
  "color=no@D or set\n"
  "the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
  "\n"
  "For more information, please read the " GTEST_NAME_ " documentation at\n"
  "@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
  "(not one in your own code or tests), please report it to\n"
  "@G<" GTEST_DEV_EMAIL_ ">@D.\n";

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.  The type parameter CharType can be
// instantiated to either char or wchar_t.
template <typename CharType>
void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
  for (int i = 1; i < *argc; i++) {
    const std::string arg_string = StreamableToString(argv[i]);
    const char* const arg = arg_string.c_str();

    using internal::ParseBoolFlag;
    using internal::ParseInt32Flag;
    using internal::ParseStringFlag;

    // Do we see a Google Test flag?
    if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
                      &GTEST_FLAG(also_run_disabled_tests)) ||
        ParseBoolFlag(arg, kBreakOnFailureFlag,
                      &GTEST_FLAG(break_on_failure)) ||
        ParseBoolFlag(arg, kCatchExceptionsFlag,
                      &GTEST_FLAG(catch_exceptions)) ||
        ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
        ParseStringFlag(arg, kDeathTestStyleFlag,
                        &GTEST_FLAG(death_test_style)) ||
        ParseBoolFlag(arg, kDeathTestUseFork,
                      &GTEST_FLAG(death_test_use_fork)) ||
        ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
        ParseStringFlag(arg, kInternalRunDeathTestFlag,
                        &GTEST_FLAG(internal_run_death_test)) ||
        ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
        ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
        ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
        ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
        ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
        ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
        ParseInt32Flag(arg, kStackTraceDepthFlag,
                       &GTEST_FLAG(stack_trace_depth)) ||
        ParseStringFlag(arg, kStreamResultToFlag,
                        &GTEST_FLAG(stream_result_to)) ||
        ParseBoolFlag(arg, kThrowOnFailureFlag,
                      &GTEST_FLAG(throw_on_failure))
       ) {
      // Yes.  Shift the remainder of the argv list left by one.  Note
      // that argv has (*argc + 1) elements, the last one always being
      // NULL.  The following loop moves the trailing NULL element as
      // well.
      for (int j = i; j != *argc; j++) {
        argv[j] = argv[j + 1];
      }

      // Decrements the argument count.
      (*argc)--;

      // We also need to decrement the iterator as we just removed
      // an element.
      i--;
    }
    else if (arg_string == "--help" || arg_string == "-h" ||
             arg_string == "-?" || arg_string == "/?" ||
             HasGoogleTestFlagPrefix(arg)) {
      // Both help flag and unrecognized Google Test flags (excluding
      // internal ones) trigger help display.
      g_help_flag = true;
    }
  }

  if (g_help_flag) {
    // We print the help here instead of in RUN_ALL_TESTS(), as the
    // latter may not be called at all if the user is using Google
    // Test with another testing framework.
    PrintColorEncoded(kColorEncodedHelpMessage);
  }
}

// Parses the command line for Google Test flags, without initializing
// other parts of Google Test.
void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
  ParseGoogleTestFlagsOnlyImpl(argc, argv);
}
void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
  ParseGoogleTestFlagsOnlyImpl(argc, argv);
}

// The internal implementation of InitGoogleTest().
//
// The type parameter CharType can be instantiated to either char or
// wchar_t.
template <typename CharType>
void InitGoogleTestImpl(int* argc, CharType** argv) {
  g_init_gtest_count++;

  // We don't want to run the initialization code twice.
  if (g_init_gtest_count != 1) {
    return;
  }

  if (*argc <= 0) {
    return;
  }

  internal::g_executable_path = internal::StreamableToString(argv[0]);

#if GTEST_HAS_DEATH_TEST

  g_argvs.clear();

  for (int i = 0; i != *argc; i++) {
    g_argvs.push_back(StreamableToString(argv[i]));
  }

#endif  // GTEST_HAS_DEATH_TEST

  ParseGoogleTestFlagsOnly(argc, argv);
  GetUnitTestImpl()->PostFlagParsingInit();
}

}  // namespace internal

// Initializes Google Test.  This must be called before calling
// RUN_ALL_TESTS().  In particular, it parses a command line for the
// flags that Google Test recognizes.  Whenever a Google Test flag is
// seen, it is removed from argv, and *argc is decremented.
//
// No value is returned.  Instead, the Google Test flag variables are
// updated.
//
// Calling the function for the second time has no user-visible effect.
void InitGoogleTest(int* argc, char** argv) {
  internal::InitGoogleTestImpl(argc, argv);
}

// This overloaded version can be used in Windows programs compiled in
// UNICODE mode.
void InitGoogleTest(int* argc, wchar_t** argv) {
  internal::InitGoogleTestImpl(argc, argv);
}

}  // namespace testing


================================================
FILE: rocrtst/gtest/src/gtest_main.cpp
================================================
// Copyright 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <stdio.h>

int hsa_arg_cnt;
char* hsa_arg_list[32];

#include "gtest/gtest.h"

GTEST_API_ int main(int argc, char** argv) {

  printf("Running main() from gtest_main.cc\n");
  testing::InitGoogleTest(&argc, argv);

  // Copy cmdline args for Hsa into a globally visible symbol
  hsa_arg_cnt = argc;

  for (int jdx = 0; jdx < argc; jdx++) {
    hsa_arg_list[jdx] = argv[jdx];
  }

  return RUN_ALL_TESTS();
}


================================================
FILE: rocrtst/samples/CMakeLists.txt
================================================
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 2.8.0)

#
# GCC 4.8 or higher compiler required.
#

#
#   Required Defines on cmake command line
#
#   1) Set location of ROCR header files
#
#      ROCM_DIR="Root for RocM install"
#
#   2) Set ROCRTST_BLD_TYPE to either "Debug" or "Release".
#      If not set, the default value is "Debug" is bound.
#
#      ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
#
#   3) Set ROCRTST_BLD_BITS to either "32" or "64"
#      If not set, the default value of "64" is bound.
#
#       ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
#
#   4) Set TARGET_DEVICES to indicate gpu types for kernel
#      builds (e.g., "gfx803;gfx900; ...")
#
#   Building rocrtst Suite
#
#   1) Create build folder e.g. "rocrtst/build" - any name will do
#   2) Cd into build folder
#   3) Run "cmake .."
#   4) Run "make"
#

cmake_minimum_required(VERSION 3.5.0)

# Set Name for Samples Project
#

set(PROJECT_NAME "sample64")
project (${PROJECT_NAME})

set(DEFAULT_TARGET "gfx803")

#############################
# COMMON AREA
#############################
#
# Currently support for Windows platform is not present
#
if(WIN32)
  message("This sample is not supported on Windows platform")
  return()
endif()

#
# Process input variables
#

# Required Defines first:
find_package(hsa-runtime64 REQUIRED )
message(STATUS "HSA Runtime found at ${hsa-runtime64_DIR} ")

if (DEFINED LLVM_DIR)
  set(CLANG ${LLVM_DIR}/clang)
  if (NOT EXISTS ${CLANG})
    # SPK temp until Jenkins script input is corrected.
    set (CLANG ${OPENCL_DIR}/bin/clang)
    if (NOT EXISTS ${CLANG})
    message("ERROR: path to clang (${CLANG}) is not valid. Is define LLVM_DIR correct?")
    return()
    endif()
  endif()
else()
    message("WARNING: LLVM_DIR define is not set. Kernels will not be built.")
endif()

if (DEFINED OPENCL_DIR)
  set(OPENCL_INC_DIR ${OPENCL_DIR}/include)
  set(OPENCL_LIB_DIR ${OPENCL_DIR}/lib)
else()
    message("WARNING: OPENCL_DIR define is not set. Kernels will not be built.")
endif()

if (DEFINED OPENCL_VER)
  set(OPENCL_VER ${OPENCL_VER})
else()
  message("OPENCL_VER define is not set. Using default")
  set(OPENCL_VER "2.0")
endif()

if(NOT EXISTS "${OPENCL_INC_DIR}/opencl-c.h")
  set(OPENCL_INC_DIR "${OPENCL_DIR}/../../../external/llvm-project/clang/lib/Headers/")
  if(NOT EXISTS "${OPENCL_INC_DIR}/opencl-c.h")
    message(WARNING "opencl-c.h not found.")
  endif()
endif()

if (NOT DEFINED TARGET_DEVICES)
  message("WARNING: No targets devices provided on command line")
  message("  e.g., cmake -DTARGET_DEVICES=\"gfx803;gfx900;gfx...\" ..")
  message("  Using default target of $DEFAULT_TARGET")
  list(APPEND TARGET_DEVICES "gfx803")
endif()

string(TOLOWER "${ROCRTST_BLD_TYPE}" tmp)
if("${tmp}" STREQUAL release)
  set(BUILD_TYPE "Release")
  set(ISDEBUG 0)
else()
  set(BUILD_TYPE "Debug")
  set(ISDEBUG 1)
endif()

if(${EMULATOR_BUILD})
add_definitions(-DROCRTST_EMULATOR_BUILD=1)
endif()

find_path(BITCODE_DIR NAMES "opencl.bc" "opencl.amdgcn.bc"
  PATHS
    "${ROCM_DIR}/amdgcn/bitcode"
    "${ROCM_DIR}/lib/bitcode"
    "${ROCM_DIR}/lib"
    "${ROCM_DIR}/lib/x86_64/bitcode"
    "${OPENCL_DIR}/amdgcn/bitcode"
    "${OPENCL_DIR}/lib/x86_64/bitcode"
    "${LLVM_DIR}/../lib/bitcode"
    "${CMAKE_PREFIX_PATH}/amdgcn/bitcode"
    "${CMAKE_PREFIX_PATH}/lib/bitcode"
    "${CMAKE_PREFIX_PATH}/lib/x86_64/bitcode")


#
# Print out the build configuration being used:
#
#   Build Src directory
#   Build Binary directory
#   Build Type: Debug Vs Release, 32 Vs 64
#   Compiler Version, etc
#
message("")
message("Build Configuration:")
message("-------------IS64BIT: " ${IS64BIT})
message("-----------BuildType: " ${BUILD_TYPE})
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("------Target Devices: ${TARGET_DEVICES}")
message("----------Clang path: " ${CLANG})
message("----------OpenCL Dir: " ${OPENCL_DIR})
message("-------OpenCL version " ${OPENCL_VER})
message("")

#
# Set the build type based on user input
#
set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
#
# Flag to enable / disable verbose output.
#
SET( CMAKE_VERBOSE_MAKEFILE on )
#
# Compiler pre-processor definitions.
#
# Define MACRO "DEBUG" if build type is "Debug"
if(${BUILD_TYPE} STREQUAL "Debug")
add_definitions(-DDEBUG)
endif()

#add_definitions(-D__linux__)
add_definitions(-DLITTLEENDIAN_CPU=1)

#
# Linux Compiler options
#
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")

#
# Add compiler flags to include symbol information for debug builds
#
if(ISDEBUG)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0")
endif()
message("ISDEBUG STEP:Done")

include_directories("${OPENCL_DIR}/include")

# Use this function to build any samples that have kernels to be built
function(process_sample S_NAME TARG_DEV HAS_KERNEL)
  set(KERNEL_DIR ${PROJECT_BINARY_DIR}/${TARG_DEV})
  set(SNAME_KERNEL "${S_NAME}_kernels.hsaco")

  set(TARG_NAME "${S_NAME}_hsaco.${TARG_DEV}")
  set (HSACO_TARG_LIST ${HSACO_TARG_LIST} ${TARG_NAME}
                                               CACHE INTERNAL HSACO_TARG_LIST)

  if (${HAS_KERNEL})
    # Build the kernel
    separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
     "-x cl -target amdgcn-amd-amdhsa -Xclang -finclude-default-header -mcpu=${TARG_DEV} ${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${KERNEL_DIR}/${SNAME_KERNEL}")
    add_custom_target("${TARG_NAME}" ${CLANG} ${CLANG_ARG_LIST} COMMAND
      ${CMAKE_COMMAND} -E create_symlink
        "../${SNAME_EXE}" "${KERNEL_DIR}/${SNAME_EXE}"
       COMMENT "BUILDING KERNEL..."
      VERBATIM)
   else()
    # No kernel to build, but we need to set up symlinks; we'll use the hsaco
    # targ name, even though we aren't building an hsaco
    add_custom_target("${TARG_NAME}"
       ${CMAKE_COMMAND} -E create_symlink
             "../${SNAME_EXE}" "${KERNEL_DIR}/${SNAME_EXE}"
       COMMENT "NO KERNEL TO BUILD; SYMLINK ONLY..."
      VERBATIM)
   endif()
endfunction(process_sample)

function(build_sample_for_devices S_NAME HAS_KERNEL)
  set(SNAME_EXE "${S_NAME}")

  aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/${S_NAME} S_NAME_SOURCES)
  add_executable(${SNAME_EXE} ${S_NAME_SOURCES})
  target_link_libraries(${SNAME_EXE} hsa-runtime64::hsa-runtime64 c stdc++ dl pthread rt)
  set(HSACO_TARG_LIST PARENT_SCOPE)

  foreach(t ${TARGET_DEVICES})
    process_sample(${S_NAME} ${t} ${HAS_KERNEL})
  endforeach(t)
endfunction(build_sample_for_devices)

function(add_symlink_to_exe DST)
  foreach(td ${TARGET_DEVICES})
  endforeach(td)
endfunction(add_symlink_to_exe)


# Make directories for each possible target device
foreach(td ${TARGET_DEVICES})
  file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/${td})
endforeach(td)

###########################
# SAMPLE SPECIFIC SECTION
###########################

set (HSACO_TARG_LIST "" CACHE INTERNAL HSACO_TARG_LIST)

set(KERN_SUFFIX "kernels.hsaco")

# Check if device-libs bitcode is following old or new layout
if(EXISTS "${BITCODE_DIR}/opencl.amdgcn.bc")
  set(BITCODE_ARGS "-nogpulib
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/opencl.amdgcn.bc
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ockl.amdgcn.bc
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ocml.amdgcn.bc")
else()
  set(BITCODE_ARGS "--hip-device-lib-path=${BITCODE_DIR}")
endif()

set(CL_FILE_LIST
               "${PROJECT_SOURCE_DIR}/binary_search/binary_search_kernels.cl")
build_sample_for_devices("binary_search" TRUE)

# RocR Info
build_sample_for_devices("rocrinfo" FALSE)

# IPC
build_sample_for_devices("ipc" FALSE)

# Async Mem. Copy
build_sample_for_devices("async_mem_copy" FALSE)

add_custom_target(sample_kernels DEPENDS ${HSACO_TARG_LIST})
install(TARGETS ${SAMPLE_EXE}
        ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
        LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
        RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)


================================================
FILE: rocrtst/samples/README.txt
================================================


To build the sample, first export the following environment variables:

export ROCR_DIR=<root of RocR install; for RocR includes and libraries>
export OPENCL_DIR=<root of OpenCL install; for required clang and bitcode libs>
export OPENCL_VER=<OpenCL version; e.g., "2.0">
export TARGET_DEVICE=<GPU type; e.g., "gfx803" or "gfx900">

Next, do the following:
mkdir build
cd build
cmake ..

Finally, do the following to build the application and respective kernels:

make
make sample_kernels


================================================
FILE: rocrtst/samples/async_mem_copy/async_mem_copy.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#include <cassert>
#include <iostream>

#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

static const uint32_t kTestFillValue1 = 0xabcdef12;
static const uint32_t kTestFillValue2 = 0xba5eba11;
static const uint32_t kTestFillValue3 = 0xfeed5a1e;
static const uint32_t kTestInitValue = 0xbaadf00d;

// This structure holds an agent pointer and associated memory pool to be used
// for this test program.
struct async_mem_cpy_agent {
    hsa_agent_t dev;
    hsa_amd_memory_pool_t pool;
    size_t granule;
    void *ptr;
};

struct async_mem_cpy_pool_query {
  async_mem_cpy_agent* pool_info;
  hsa_agent_t peer_device;
};

struct callback_args {
    struct async_mem_cpy_agent cpu;
    struct async_mem_cpy_agent gpu1;
    struct async_mem_cpy_agent gpu2;
};

// Find the least common multiple of 2 numbers
static uint32_t lcm(uint32_t a, uint32_t  b) {
    int tmp_a;
    int tmp_b;

    tmp_a = a;
    tmp_b = b;

    while (tmp_a != tmp_b) {
      if (tmp_a < tmp_b) {
        tmp_a = tmp_a + a;
      } else {
        tmp_b = tmp_b + b;
      }
    }

    return tmp_a;
}

// This function is a callback for hsa_amd_agent_iterate_memory_pools()
// and will test whether the provided memory pool is 1) in the GLOBAL
// segment, 2) allows allocation and 3) is accessible by the provided
// agent. The "data" input parameter is assumed to be pointing to a
// struct async_mem_cpy_agent. If the provided pool meets these criteria,
// HSA_STATUS_INFO_BREAK is returned.
static hsa_status_t
FindPool(hsa_amd_memory_pool_t in_pool, void* data) {
  hsa_amd_segment_t segment;
  hsa_status_t err;

  if (nullptr == data) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  struct async_mem_cpy_pool_query *args = (struct async_mem_cpy_pool_query *)data;

  err = hsa_amd_memory_pool_get_info(in_pool,
                                  HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
  RET_IF_HSA_ERR(err);
  if (segment != HSA_AMD_SEGMENT_GLOBAL) {
    return HSA_STATUS_SUCCESS;
  }

  bool canAlloc;
  err = hsa_amd_memory_pool_get_info(in_pool,
                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &canAlloc);
  RET_IF_HSA_ERR(err);
  if (!canAlloc) {
     return HSA_STATUS_SUCCESS;
  }

  if(args->peer_device.handle != 0) {
    hsa_amd_memory_pool_access_t access =
      HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
    err = hsa_amd_agent_memory_pool_get_info(args->peer_device, in_pool,
      HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
    RET_IF_HSA_ERR(err);

    if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
      return HSA_STATUS_SUCCESS;
    }
  }

  err = hsa_amd_memory_pool_get_info(in_pool,
    HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &args->pool_info->granule);
  RET_IF_HSA_ERR(err);

  args->pool_info->pool = in_pool;

  return HSA_STATUS_INFO_BREAK;
}

// This function is meant to be a callback to hsa_iterate_agents. For each
// input agent the iterator provides as input, this function will check to
// see if the input agent is a CPU agent. If so, it will update the
// async_mem_cpy_agent structure pointed to by the input parameter "data".

// Return values:
//  HSA_STATUS_INFO_BREAK -- CPU agent has been found and stored. Iterator
//    should stop iterating
//  HSA_STATUS_SUCCESS -- CPU agent has not yet been found; iterator
//    should keep iterating
//  Other -- Some error occurred
static hsa_status_t FindCPUDevice(hsa_agent_t agent, void *data) {
  if (data == NULL) {
     return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
                                                            &hsa_device_type);
  RET_IF_HSA_ERR(err);

  if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
    struct async_mem_cpy_agent *args = (struct async_mem_cpy_agent *)data;

    args->dev = agent;

    async_mem_cpy_pool_query pool_query;
    pool_query.peer_device.handle = 0;
    pool_query.pool_info = args;

    err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);

    if (err == HSA_STATUS_INFO_BREAK) {  // we found what we were looking for
      return HSA_STATUS_INFO_BREAK;
    } else {
      args->dev = {0};
      return err;
    }
  }

  // Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
  return HSA_STATUS_SUCCESS;
}

// This function is meant to be a callback to hsa_iterate_agents. It will
// attempt to find 2, or at least 1 GPU agent suitable for our test. The data
// input parameter should point to a callback_args struct. The 2 GPU fields
// will be updated as GPUs are discovered.
// Return values:
//  HSA_STATUS_INFO_BREAK -- 2 GPU agents have been found and stored. Iterator
//    should stop iterating
//  HSA_STATUS_SUCCESS -- 2 GPU agents have not yet been found; 0 or 1 may
//    have been found; iterator function should keep iterating
//  Other -- Some error occurred
static hsa_status_t FindGPUs(hsa_agent_t agent, void *data) {
  if (data == NULL) {
     return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t err = hsa_agent_get_info(agent,
                                     HSA_AGENT_INFO_DEVICE, &hsa_device_type);
  RET_IF_HSA_ERR(err);

  if (hsa_device_type != HSA_DEVICE_TYPE_GPU) {
    return HSA_STATUS_SUCCESS;
  }

  struct callback_args *args = (struct callback_args *)data;
  struct async_mem_cpy_agent *gpu;

  async_mem_cpy_pool_query pool_query = {0,0};

  if (args->gpu1.dev.handle == 0) {
    gpu = &args->gpu1;
  } else {
    gpu = &args->gpu2;
    // Check that gpu1 has peer access into the selected pool.
    pool_query.peer_device = args->gpu1.dev;
  }

  // Make sure GPU device has pool host can access
  gpu->dev = agent;
  pool_query.pool_info = gpu;

  err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);

  if (err == HSA_STATUS_INFO_BREAK) {
    if (gpu == &args->gpu2) {
      // We found 2 gpu's
      return HSA_STATUS_INFO_BREAK;
    } else {
      // Keep looking for another gpu
      return HSA_STATUS_SUCCESS;
    }
  } else {
    gpu->dev = {0};
  }

  RET_IF_HSA_ERR(err);

  // Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
  return HSA_STATUS_SUCCESS;
}

// This is the main test, showing various paths of async. copy. Source and
// destination agents and their respective pools should already be discovered.
// Additionally, buffer from the pools should already be allocated and availble
// from the input parameters.
static hsa_status_t
AsyncCpyTest(async_mem_cpy_agent *dst, async_mem_cpy_agent *src,
                               callback_args *args, size_t sz, uint32_t val) {
  hsa_status_t err;
  hsa_signal_t copy_signal;

  // Initialize the system and destination buffers with a value so we can later validate it has
  // been overwritten
  void *sysPtr = args->cpu.ptr;

  err = hsa_amd_memory_fill(sysPtr, kTestInitValue, sz/sizeof(uint32_t));
  RET_IF_HSA_ERR(err);

  if(dst->ptr != sysPtr) {
    err = hsa_amd_memory_fill(dst->ptr, kTestInitValue, sz/sizeof(uint32_t));
    RET_IF_HSA_ERR(err);
  }

  // Fill the source buffer with the provided uint32_t value
  err = hsa_amd_memory_fill(src->ptr, val, sz/sizeof(uint32_t));
  RET_IF_HSA_ERR(err);

  // Make sure the target and destination agents have access to the buffer.
  hsa_agent_t ag_list[2] = {dst->dev, src->dev};
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst->ptr);
  RET_IF_HSA_ERR(err);

  // Create a signal that will be used to inform us when the copy is done
  err = hsa_signal_create(1, 0, NULL, &copy_signal);
  RET_IF_HSA_ERR(err);

  // Do the copy...
  err = hsa_amd_memory_async_copy(dst->ptr, dst->dev, src->ptr, src->dev,
                                                    sz, 0, NULL, copy_signal);
  RET_IF_HSA_ERR(err);

  // Here we do a blocking wait. Alternatively, we could also use a
  // non-blocking wait in a loop, and do other work while waiting.
  if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT,
                                       1, -1, HSA_WAIT_STATE_BLOCKED) != 0) {
    printf("Async copy returned error value.\n");
    return HSA_STATUS_ERROR;
  }

  // Verify the copy was successful; copy from the dst buffer to the sysBuf,
  // (if the result is not already in sys. mem.) and check the sysBuf values
  if (dst->ptr != sysPtr) {
    if (src->ptr != sysPtr) {
      // In this case, we need to give the gpu dev that owns dst->ptr access
      // to the system memory we are going to copy to.
      hsa_agent_t ag_list_ck[2] = {dst->dev, args->cpu.dev};
      err = hsa_amd_agents_allow_access(2, ag_list_ck, NULL, sysPtr);
      RET_IF_HSA_ERR(err);
    }

    // Reset signal to 1
    hsa_signal_store_screlease(copy_signal, 1);
    err = hsa_amd_memory_async_copy(sysPtr, args->cpu.dev, dst->ptr, dst->dev,
                                                    sz, 0, NULL, copy_signal);
    RET_IF_HSA_ERR(err);

    if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT,
                                       1, -1, HSA_WAIT_STATE_BLOCKED) != 0) {
      printf("Async copy returned error value.\n");
      return HSA_STATUS_ERROR;
    }
  }

  // Check that the contents of the buffer are what is expected.
  for (uint32_t i = 0; i < sz/sizeof(uint32_t); ++i) {
    if (reinterpret_cast<uint32_t *>(sysPtr)[i] != val) {
      fprintf(stdout, "Expected 0x%x but got 0x%x in buffer at index %d.\n",
                             val, reinterpret_cast<uint32_t *>(sysPtr)[i], i);
      return HSA_STATUS_ERROR;
    }
  }

  return HSA_STATUS_SUCCESS;
}

// This program illustrates the usage of the asynchronous copy capability of
// the RocR runtime library. The program will create a system memory buffer and
// a local buffer for each GPU, up to 2 GPUs, if the system has at least 2
// GPUs. The program will copy data to/from the host from/to the GPU. If 2
// GPUs are available, the program will also copy data from one to the other.
int main() {
  hsa_status_t err;
  struct callback_args args;
  bool twoGPUs = false;

  err = hsa_init();
  RET_IF_HSA_ERR(err);

  // First, find the cpu agent and associated pool
  args.cpu = {0, 0, 0};
  err = hsa_iterate_agents(FindCPUDevice, reinterpret_cast<void *>(&args.cpu));
  assert(err == HSA_STATUS_INFO_BREAK);
  if (err != HSA_STATUS_INFO_BREAK) {
    return -1;
  }

  // Now, find 1 or 2 (if possible) GPUs and associated pool(s) for our test
  args.gpu1 = {0, 0, 0};
  args.gpu2 = {0, 0, 0};
  err = hsa_iterate_agents(FindGPUs, &args);

  if (err == HSA_STATUS_INFO_BREAK) {
    twoGPUs = true;
  } else {
    // See if we at least have 1 GPU
    if (args.gpu1.dev.handle == 0) {
      fprintf(stdout,
       "GPU with accessible VRAM not found; at least 1 required. Exiting\n");
      return -1;
    }
    fprintf(stdout, "Only 1 GPU found with required VRAM. "
                                      "Peer-to-Peer copy will be skipped.\n");
  }

  // We will use the smallest amount of allocatable memory that works for all
  // potential sources and destinations of the copy
  size_t sz = lcm(args.cpu.granule, args.gpu1.granule);

  // Allocate memory on each source/destination
  if (twoGPUs) {
    sz = lcm(sz, args.gpu2.granule);

    err = hsa_amd_memory_pool_allocate(args.gpu2.pool, sz, 0,
                                    reinterpret_cast<void**>(&args.gpu2.ptr));
    RET_IF_HSA_ERR(err);
  }

  err = hsa_amd_memory_pool_allocate(args.cpu.pool, sz, 0,
                                     reinterpret_cast<void**>(&args.cpu.ptr));
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_allocate(args.gpu1.pool, sz, 0,
                                    reinterpret_cast<void**>(&args.gpu1.ptr));
  RET_IF_HSA_ERR(err);

  char name[64];
  err = hsa_agent_get_info(args.cpu.dev, HSA_AGENT_INFO_NAME, &name);
  fprintf(stdout, "CPU is \"%s\"\n", name);

  err = hsa_agent_get_info(args.gpu1.dev, HSA_AGENT_INFO_NAME, &name);
  fprintf(stdout, "GPU1 is \"%s\"\n", name);

  if (twoGPUs) {
    err = hsa_agent_get_info(args.gpu2.dev, HSA_AGENT_INFO_NAME, &name);
    fprintf(stdout, "GPU2 is \"%s\"\n", name);
  }

  fprintf(stdout,
              "Copying %lu bytes from gpu1 memory to system memory...\n", sz);
  err = AsyncCpyTest(&args.cpu, &args.gpu1, &args, sz, kTestFillValue1);
  RET_IF_HSA_ERR(err);
  fprintf(stdout, "Success!\n");

  fprintf(stdout,
              "Copying %lu bytes from system memory to gpu1 memory...\n", sz);
  err = AsyncCpyTest(&args.gpu1, &args.cpu, &args, sz, kTestFillValue2);
  RET_IF_HSA_ERR(err);
  fprintf(stdout, "Success!\n");

  if (twoGPUs) {
    fprintf(stdout,
                "Copying %lu bytes from gpu1 memory to gpu2 memory...\n", sz);
    err = AsyncCpyTest(&args.gpu2, &args.gpu1, &args, sz, kTestFillValue3);
    RET_IF_HSA_ERR(err);
    fprintf(stdout, "Success!\n");
  }

  // Clean up
  err = hsa_amd_memory_pool_free(args.cpu.ptr);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_free(args.gpu1.ptr);
  RET_IF_HSA_ERR(err);

  if (twoGPUs) {
    err = hsa_amd_memory_pool_free(args.gpu2.ptr);
    RET_IF_HSA_ERR(err);
  }
}


================================================
FILE: rocrtst/samples/binary_search/binary_search.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <string>
#include <iostream>
#include <climits>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
              __FILE__ << ". Call returned " << err << std::endl; \
    return (err); \
  } \
}

#ifndef ROCRTST_EMULATOR_BUILD
static const uint32_t kBinarySearchLength = 512;
static const uint32_t kBinarySearchFindMe = 108;
static const uint32_t kWorkGroupSize = 256;
#else
static const uint32_t kBinarySearchLength = 16;
static const uint32_t kBinarySearchFindMe = 6;
static const uint32_t kWorkGroupSize = 8;
#endif

// Hold all the info specific to binary search
typedef struct BinarySearch {
  // Binary Search parameters
  uint32_t length;
  uint32_t work_group_size;
  uint32_t work_grid_size;
  uint32_t num_sub_divisions;
  uint32_t find_me;

  // Buffers needed for this application
  uint32_t* input;
  uint32_t* input_arr;
  uint32_t* input_arr_local;
  uint32_t* output;
  // Keneral argument buffers and addresses
  void* kern_arg_buffer;  // Begin of allocated memory
  //  this pointer to be deallocated
  void* kern_arg_address;  // Properly aligned address to be used in aql
  // packet (don't use for deallocation)

  // Kernel code
  std::string kernel_file_name;
  std::string kernel_name;
  uint32_t kernarg_size;
  uint32_t kernarg_align;

  // HSA/RocR objects needed for this application
  hsa_agent_t gpu_dev;
  hsa_agent_t cpu_dev;
  hsa_signal_t signal;
  hsa_queue_t* queue;
  hsa_amd_memory_pool_t cpu_pool;
  hsa_amd_memory_pool_t gpu_pool;
  hsa_amd_memory_pool_t kern_arg_pool;

  // Other items we need to populate AQL packet
  uint64_t kernel_object;
  uint32_t group_segment_size;   ///< Kernel group seg size
  uint32_t private_segment_size;   ///< Kernel private seg size
} BinarySearch;

void InitializeBinarySearch(BinarySearch* bs) {
  bs->kernel_file_name = "./binary_search_kernels.hsaco";
  bs->kernel_name = "binarySearch.kd";
  bs->length = kBinarySearchLength;
  bs->find_me = kBinarySearchFindMe;
  bs->work_group_size = kWorkGroupSize;
  bs->num_sub_divisions = bs->length / bs->work_group_size;
}

// This function is called by the call-back functions used to find an agent of
// the specified hsa_device_type_t. Note that it cannot be called directly from
// hsa_iterate_agents() as it does not match the prototype of the call-back
// function. It must be wrapped by a function with the correct prototype.
//
// Return values:
//  HSA_STATUS_INFO_BREAK -- "agent" is of the specified type (dev_type)
//  HSA_STATUS_SUCCESS -- "agent" is not of the specified type
//  Other -- Some error occurred
static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
                              hsa_device_type_t dev_type) {
  if (data == nullptr) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // See if the provided agent matches the input type (dev_type)
  hsa_device_type_t hsa_device_type;
  hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
                                &hsa_device_type);
  RET_IF_HSA_ERR(hsa_error_code);

  if (hsa_device_type == dev_type) {
    *(reinterpret_cast<hsa_agent_t*>(data)) = agent;
    return HSA_STATUS_INFO_BREAK;
  }

  return HSA_STATUS_SUCCESS;
}

// This is the call-back function used to find a GPU type agent. Note that the
// prototype of this function is dictated by the HSA specification
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
  return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
}

// This is the call-back function used to find a CPU type agent. Note that the
// prototype of this function is dictated by the HSA specification
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
  return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
}

// Find the CPU and GPU agents we need to run this sample, and save them in the
// BinarySearch structure for later use.
hsa_status_t FindDevices(BinarySearch* bs) {
  hsa_status_t err;

  // Note that hsa_iterate_agents iterate through all known agents until
  // HSA_STATUS_SUCCESS is not returned. The call-backs are implemented such
  // that HSA_STATUS_INFO_BREAK means we found an agent of the specified type.
  // This value is returned by hsa_iterate_agents.
  bs->gpu_dev.handle = 0;
  err = hsa_iterate_agents(FindGPUDevice, &bs->gpu_dev);

  if (err != HSA_STATUS_INFO_BREAK) {
    return HSA_STATUS_ERROR;
  }

  bs->cpu_dev.handle = 0;
  err = hsa_iterate_agents(FindCPUDevice, &bs->cpu_dev);

  if (err != HSA_STATUS_INFO_BREAK) {
    return HSA_STATUS_ERROR;
  }

  if (0 == bs->gpu_dev.handle) {
    std::cout << "GPU Device is not Created properly!" << std::endl;
    RET_IF_HSA_ERR(HSA_STATUS_ERROR);
  }

  if (0 == bs->cpu_dev.handle) {
    std::cout << "CPU Device is not Created properly!" << std::endl;
    RET_IF_HSA_ERR(HSA_STATUS_ERROR);
  }

  return HSA_STATUS_SUCCESS;
}

// This function checks to see if the provided
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
// the function adds an additional requirement that the pool have the
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
// pools must NOT have this property.
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
// no pool was found meeting the requirements. If an error is encountered, we
// return that error.

// Note that this function does not match the required prototype for the
// hsa_amd_agent_iterate_memory_pools call back function, and therefore must be
// wrapped by a function with the correct prototype.
static hsa_status_t
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
  hsa_status_t err;
  hsa_amd_segment_t segment;
  uint32_t flag;

  if (nullptr == data) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                                     &segment);
  RET_IF_HSA_ERR(err);

  if (HSA_AMD_SEGMENT_GLOBAL != segment) {
    return HSA_STATUS_SUCCESS;
  }

  err = hsa_amd_memory_pool_get_info(pool,
                                HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
  RET_IF_HSA_ERR(err);

  uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;

  if ((karg_st == 0 && kern_arg) ||
      (karg_st != 0 && !kern_arg)) {
    return HSA_STATUS_SUCCESS;
  }

  *(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
  return HSA_STATUS_INFO_BREAK;
}

// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
  return FindGlobalPool(pool, data, false);
}

// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
  return FindGlobalPool(pool, data, true);
}

// Find memory pools that we will need to allocate from for this sample
// application. We will need memory associated with the host CPU, the GPU
// executing the kernels, and for kernel arguments. This function will
// save the found pools to the BinarySearch structure for use elsewhere
// in this program.
hsa_status_t FindPools(BinarySearch* bs) {
  hsa_status_t err;

  err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev, FindStandardPool,
        &bs->cpu_pool);

  if (err != HSA_STATUS_INFO_BREAK) {
    return HSA_STATUS_ERROR;
  }

  err = hsa_amd_agent_iterate_memory_pools(bs->gpu_dev, FindStandardPool,
        &bs->gpu_pool);

  if (err != HSA_STATUS_INFO_BREAK) {
    return HSA_STATUS_ERROR;
  }

  err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev,
        FindKernArgPool, &bs->kern_arg_pool);

  if (err != HSA_STATUS_INFO_BREAK) {
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

// Once the needed memory pools have been found and the BinarySearch structure
// has been updated with these handles, this function is then used to allocate
// memory from those pools.
// Devices with which a pool is associated already have access to the pool.
// However, other devices may also need to read or write to that memory. Below,
// we see how we can grant access to other devices to address this issue.
hsa_status_t AllocateAndInitBuffers(BinarySearch* bs) {
  hsa_status_t err;
  uint32_t out_length = 4 * sizeof(uint32_t);
  uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);

  // In all of these examples, we want both the cpu and gpu to have access to
  // the buffer in question. We use the array of agents below in the susequent
  // calls to hsa_amd_agents_allow_access() for this purpose.
  hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};

  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
                                     reinterpret_cast<void**>(&bs->input));
  RET_IF_HSA_ERR(err);
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input);
  RET_IF_HSA_ERR(err);
  (void)memset(bs->input, 0, in_length);

  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, out_length, 0,
                                     reinterpret_cast<void**>(&bs->output));
  RET_IF_HSA_ERR(err);
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->output);
  RET_IF_HSA_ERR(err);
  (void)memset(bs->input, 0, in_length);

  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
                                     reinterpret_cast<void**>(&bs->input_arr));
  RET_IF_HSA_ERR(err);
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr);
  RET_IF_HSA_ERR(err);
  (void)memset(bs->input, 0, in_length);

  err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
                               reinterpret_cast<void**>(&bs->input_arr_local));
  RET_IF_HSA_ERR(err);
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr_local);
  RET_IF_HSA_ERR(err);

  // Binary-search application specific code...
  // Initialize input buffer with random values in an increasing order
  uint32_t max = bs->length * 20;
  bs->input[0] = 0;

  uint32_t seed = (unsigned int)time(NULL);
  srand(seed);

  for (uint32_t i = 1; i < bs->length; ++i) {
    bs->input[i] = bs->input[i - 1] +
     static_cast<uint32_t>(max * rand_r(&seed) / static_cast<float>(RAND_MAX));
  }

// #define VERBOSE 1
#ifdef VERBOSE
  std::cout << "Input array values:" << std::endl;

  for (uint32_t i = 0; i < bs->length; ++i) {
    std::cout << "input[" << i << "] = " << bs->input[i] << " ";

    if (i % 4 == 0) {
      std::cout << std::endl;
    }
  }

  std::cout << std::endl;
#endif

  return err;
}

// The code in this function illustrates how to load a kernel from
// pre-compiled code. The goal is to get a handle that can be later
// used in an AQL packet and also to extract information about kernel
// that we will need. All of the information hand kernel handle will
// be saved to the BinarySearch structure. It will be used when we
// populate the AQL packet.
hsa_status_t LoadKernelFromObjFile(BinarySearch* bs) {
  hsa_status_t err;
  hsa_code_object_reader_t code_obj_rdr = {0};
  hsa_executable_t executable = {0};

  hsa_file_t file_handle = open(bs->kernel_file_name.c_str(), O_RDONLY);

  if (file_handle == -1) {
    char agent_name[64];
    err = hsa_agent_get_info(bs->gpu_dev, HSA_AGENT_INFO_NAME, agent_name);
    RET_IF_HSA_ERR(err);
    std::string fileName = std::string("./") + agent_name + "/" + bs->kernel_file_name;
    hsa_file_t file_handle = open(fileName.c_str(), O_RDONLY);
  }

  if (file_handle == -1) {
    std::cout << "failed to open " << bs->kernel_file_name.c_str() <<
              " at line " << __LINE__ << ", errno: " << errno << std::endl;
    return HSA_STATUS_ERROR;
  }

  err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
  close(file_handle);
  RET_IF_HSA_ERR(err);

  err = hsa_executable_create_alt(HSA_PROFILE_FULL,
                HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &executable);
  RET_IF_HSA_ERR(err);

  err = hsa_executable_load_agent_code_object(executable, bs->gpu_dev,
        code_obj_rdr, NULL, NULL);
  RET_IF_HSA_ERR(err);

  err = hsa_executable_freeze(executable, NULL);
  RET_IF_HSA_ERR(err);

  hsa_executable_symbol_t kern_sym;
  err = hsa_executable_get_symbol(executable, NULL, bs->kernel_name.c_str(),
                                  bs->gpu_dev, 0, &kern_sym);
  RET_IF_HSA_ERR(err);

  err = hsa_executable_symbol_get_info(kern_sym,
                                    HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
                                                          &bs->kernel_object);
  RET_IF_HSA_ERR(err);

  err = hsa_executable_symbol_get_info(kern_sym,
                      HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
                                                   &bs->private_segment_size);
  RET_IF_HSA_ERR(err);

  err = hsa_executable_symbol_get_info(kern_sym,
                        HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
                                                     &bs->group_segment_size);
  RET_IF_HSA_ERR(err);

  // Remaining queries not supported on code object v3.
  err = hsa_executable_symbol_get_info(kern_sym,
                      HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
                                                           &bs->kernarg_size);
  RET_IF_HSA_ERR(err);

  err = hsa_executable_symbol_get_info(kern_sym,
                 HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
                                                          &bs->kernarg_align);
  RET_IF_HSA_ERR(err);
  assert(bs->kernarg_align >= 16 && "Reported kernarg size is too small.");
  bs->kernarg_align = (bs->kernarg_align == 0) ? 16 : bs->kernarg_align;

  return err;
}

// This function shows how to do an asynchronous copy. We have to create a
// signal and use the signal to notify us when the copy has completed.
hsa_status_t AgentMemcpy(void* dst, const void* src,
                         size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) {
  hsa_signal_t s;
  hsa_status_t err;

  err = hsa_signal_create(1, 0, NULL, &s);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s);
  RET_IF_HSA_ERR(err);

  if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
                                UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
    err = HSA_STATUS_ERROR;
    std::cout << "Async copy signal error" << std::endl;

    RET_IF_HSA_ERR(err);
  }

  err = hsa_signal_destroy(s);

  RET_IF_HSA_ERR(err);

  return err;
}

// AlignDown and AlignUp are 2 utility functions we use to find an aligned
// boundary either below or above a given value (address). The function will
// return a value that has the specified alignment.
static intptr_t
AlignDown(intptr_t value, size_t alignment) {
  assert(alignment != 0 && "Zero alignment");
  return (intptr_t) (value & ~(alignment - 1));
}
static void*
AlignUp(void* value, size_t alignment) {
  return reinterpret_cast<void*>(AlignDown((uintptr_t)
           (reinterpret_cast<uintptr_t>(value) + alignment - 1), alignment));
}

// This function populates the AQL patch with the information
// we have collected and stored in the BinarySearch structure thus far.
void PopulateAQLPacket(BinarySearch const* bs,
                       hsa_kernel_dispatch_packet_t* aql) {
  aql->header = 0;  // Dummy val. for now. Set this right before doorbell ring
  aql->setup = 1;
  aql->workgroup_size_x = bs->work_group_size;
  aql->workgroup_size_y = 1;
  aql->workgroup_size_z = 1;
  aql->grid_size_x = bs->work_grid_size;
  aql->grid_size_y = 1;
  aql->grid_size_z = 1;
  aql->private_segment_size = bs->private_segment_size;
  aql->group_segment_size = bs->group_segment_size;
  aql->kernel_object = bs->kernel_object;
  aql->kernarg_address = bs->kern_arg_address;
  aql->completion_signal = bs->signal;

  return;
}
/*
 * Write everything in the provided AQL packet to the queue except the first 32
 * bits which include the header and setup fields. That should be done
 * last.
 */
void WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql,
                     hsa_queue_t* q) {
  void* queue_base = q->base_address;
  const uint32_t queue_mask = q->size - 1;
  uint64_t que_idx = hsa_queue_add_write_index_relaxed(q, 1);

  hsa_kernel_dispatch_packet_t* queue_aql_packet;

  queue_aql_packet =
    &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
    [que_idx & queue_mask];

  queue_aql_packet->workgroup_size_x = in_aql->workgroup_size_x;
  queue_aql_packet->workgroup_size_y = in_aql->workgroup_size_y;
  queue_aql_packet->workgroup_size_z = in_aql->workgroup_size_z;
  queue_aql_packet->grid_size_x = in_aql->grid_size_x;
  queue_aql_packet->grid_size_y = in_aql->grid_size_y;
  queue_aql_packet->grid_size_z = in_aql->grid_size_z;
  queue_aql_packet->private_segment_size = in_aql->private_segment_size;
  queue_aql_packet->group_segment_size = in_aql->group_segment_size;
  queue_aql_packet->kernel_object = in_aql->kernel_object;
  queue_aql_packet->kernarg_address = in_aql->kernarg_address;
  queue_aql_packet->completion_signal = in_aql->completion_signal;
}

// This function allocates memory from the kern_arg pool we already found, and
// then sets the argument values needed by the kernel code.
hsa_status_t AllocAndSetKernArgs(BinarySearch* bs, void* args,
                                 size_t arg_size, void** aql_buf_ptr) {
  void* kern_arg_buf = nullptr;
  hsa_status_t err;
  size_t buf_size;
  size_t req_align;

  // The kernel code must be written to memory at the correct alignment. We
  // already queried the executable to get the correct alignment, which is
  // stored in bs->kernarg_align. In case the memory returned from
  // hsa_amd_memory_pool is not of the correct alignment, we request a little
  // more than what we need in case we need to adjust.
  req_align = bs->kernarg_align;
  // Allocate enough extra space for alignment adjustments if ncessary
  buf_size = arg_size + (req_align << 1);

  err = hsa_amd_memory_pool_allocate(bs->kern_arg_pool, buf_size, 0,
                                     reinterpret_cast<void**>(&kern_arg_buf));
  RET_IF_HSA_ERR(err);

  // Address of the allocated buffer
  bs->kern_arg_buffer = kern_arg_buf;

  // Addr. of kern arg start.
  bs->kern_arg_address = AlignUp(kern_arg_buf, req_align);

  assert(arg_size >= bs->kernarg_size);
  assert(((uintptr_t)bs->kern_arg_address + arg_size) <
         ((uintptr_t)bs->kern_arg_buffer + buf_size));

  (void)memcpy(bs->kern_arg_address, args, arg_size);
  RET_IF_HSA_ERR(err);

  // Make sure both the CPU and GPU can access the kernel arguments
  hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->kern_arg_buffer);
  RET_IF_HSA_ERR(err);

  // Save this info in our BinarySearch structure for later.
  *aql_buf_ptr = bs->kern_arg_address;

  return HSA_STATUS_SUCCESS;
}

// This wrapper atomically writes the provided header and setup to the
// provided AQL packet. The provided AQL packet address should be in the
// queue memory space.
inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
                                  hsa_kernel_dispatch_packet_t* queue_packet) {
  __atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
                   header | (setup << 16), __ATOMIC_RELEASE);
}

// Once all the required data for kernel execution is collected (in this
// application it is stored in the BinarySearch structure) we can put it in
// an AQL packet and ring the queue door bell to tell the command processor to
// execute it.
hsa_status_t Run(BinarySearch* bs) {
  hsa_status_t err;

  std::cout << "Executing kernel " << bs->kernel_name << std::endl;

  // Adjust the size of workgroup
  // This is mostly application specific.
  if (bs->work_group_size > 64) {
    bs->work_group_size = 64;
    bs->num_sub_divisions = bs->length / bs->work_group_size;
  }
  if (bs->num_sub_divisions < bs->work_group_size) {
    bs->num_sub_divisions = bs->work_group_size;
  }

  bs->work_grid_size = bs->num_sub_divisions;

  // Explanation of BinarySearch algorithm.
  /*
   * Since a plain binary search on the GPU would not achieve much benefit
   * over the GPU we are doing an N'ary search. We split the array into N
   * segments every pass and therefore get log (base N) passes instead of log
   * (base 2) passes.
   *
   * In every pass, only the thread that can potentially have the element we
   * are looking for writes to the output array. For ex: if we are looking to
   * find 4567 in the array and every thread is searching over a segment of
   * 1000 values and the input array is 1, 2, 3, 4,... then the first thread
   * is searching in 1 to 1000, the second one from 1001 to 2000, etc. The
   * first one does not write to the output. The second one doesn't either.
   * The fifth one however is from 4001 to 5000. So it can potentially have
   * the element 4567 which lies between them.
   *
   * This particular thread writes to the output the lower bound, upper bound
   * and whether the element equals the lower bound element. So, it would be
   * 4001, 5000, 0
   *
   * The next pass would subdivide 4001 to 5000 into smaller segments and
   * continue the same process from there.
   *
   * When a pass returns 1 in the third element, it means the element has been
   * found and we can stop executing the kernel. If the element is not found,
   * then the execution stops after looking at segment of size 1.
   */

  uint32_t global_lower_bound = 0;
  uint32_t global_upper_bound = bs->length - 1;
  uint32_t sub_div_size = (global_upper_bound - global_lower_bound + 1) /
                          bs->num_sub_divisions;

  if ((bs->input[0] > bs->find_me) ||
      (bs->input[bs->length - 1] < bs->find_me)) {
    bs->output[0] = 0;
    bs->output[1] = bs->length - 1;
    bs->output[2] = 0;
    std::cout << "Returning too early" << std::endl;
    return HSA_STATUS_SUCCESS;
  }

  bs->output[3] = 1;

  // Setup the kernel args
  // See the meta-data for the compiled OpenCL kernel code to ascertain
  // the sizes, padding and alignment required for kernel arguments.
  // This can be seen by executing
  // $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
  // The kernel code will expect the following arguments aligned as shown.
  typedef uint32_t uint2[2];
  typedef uint32_t uint4[4];
  struct __attribute__((aligned(16))) local_args_t {
    uint4* outputArray;
    uint2*  sortedArray;
    uint32_t findMe;
    uint32_t pad;
    uint64_t global_offset_x;
    uint64_t global_offset_y;
    uint64_t global_offset_z;
    uint64_t printf_buffer;
    uint64_t default_queue;
    uint64_t completion_action;
  } local_args;

  local_args.outputArray = reinterpret_cast<uint4*>(bs->output);
  local_args.sortedArray = reinterpret_cast<uint2*>(bs->input_arr_local);
  local_args.findMe = bs->find_me;
  local_args.global_offset_x = 0;
  local_args.global_offset_y = 0;
  local_args.global_offset_z = 0;
  local_args.printf_buffer = 0;
  local_args.default_queue = 0;
  local_args.completion_action = 0;

  // Copy the kernel args structure into kernel arg memory
  err = AllocAndSetKernArgs(bs, &local_args, sizeof(local_args),
                            &bs->kern_arg_address);
  RET_IF_HSA_ERR(err);

  // Populate an AQL packet with the info we've gathered
  hsa_kernel_dispatch_packet_t aql;
  PopulateAQLPacket(bs, &aql);

  uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);

  while ((sub_div_size > 1) && (bs->output[3] != 0)) {
    for (uint32_t i = 0 ; i < bs->num_sub_divisions; i++) {
      int idx1 = i * sub_div_size;
      int idx2 = ((i + 1) * sub_div_size) - 1;
      bs->input_arr[2 * i] = bs->input[idx1];
      bs->input_arr[2 * i + 1] = bs->input[idx2];
    }

    // Copy kernel parameter from system memory to local memory
    err = AgentMemcpy(reinterpret_cast<uint8_t*>(bs->input_arr_local),
                      reinterpret_cast<uint8_t*>(bs->input_arr),
                                        in_length, bs->gpu_dev, bs->cpu_dev);

    RET_IF_HSA_ERR(err);

    // Reset output buffer to zero
    bs->output[3] = 0;

    // Dispatch kernel with global work size, work group size with ONE dimesion
    // and wait for kernel to complete

    // Compute the write index of queue and copy Aql packet into it
    uint64_t que_idx = hsa_queue_load_write_index_relaxed(bs->queue);

    const uint32_t mask = bs->queue->size - 1;

    // This function simply copies the data we've collected so far into our
    // local AQL packet, except the the setup and header fields.
    WriteAQLToQueue(&aql, bs->queue);

    uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
    aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
                  HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
                  HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

    // Set the packet's type, acquire and release fences. This should be done
    // atomically after all the other fields have been set, using release
    // memory ordering to ensure all the fields are set when the door bell
    // signal is activated.
    void* q_base = bs->queue->base_address;

    AtomicSetPacketHeader(aql_header, aql.setup,
                      &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                                                   (q_base))[que_idx & mask]);

    // Increment the write index and ring the doorbell to dispatch kernel.
    hsa_queue_store_write_index_relaxed(bs->queue, (que_idx + 1));
    hsa_signal_store_relaxed(bs->queue->doorbell_signal, que_idx);

    // Wait on the dispatch signal until the kernel is finished.
    // Modify the wait condition to HSA_WAIT_STATE_ACTIVE (instead of
    // HSA_WAIT_STATE_BLOCKED) if polling is needed instead of blocking, as we
    // have below.
    // The call below will block until the condition is met. Below we have said
    // the condition is that the signal value (initiailzed to 1) associated with
    // the queue is less than 1. When the kernel associated with the queued AQL
    // packet has completed execution, the signal value is automatically
    // decremented by the packet processor.
    hsa_signal_value_t value = hsa_signal_wait_scacquire(bs->signal,
                               HSA_SIGNAL_CONDITION_LT, 1,
                               UINT64_MAX, HSA_WAIT_STATE_BLOCKED);

    // value should be 0, or we timed-out
    if (value) {
      std::cout << "Timed out waiting for kernel to complete?" << std::endl;
      RET_IF_HSA_ERR(HSA_STATUS_ERROR);
    }

    // Reset the signal to its initial value for the next iteration
    hsa_signal_store_screlease(bs->signal, 1);

    // Binary search algorithm stuff...
    global_lower_bound = bs->output[0] * sub_div_size;
    global_upper_bound = global_lower_bound + sub_div_size - 1;
    sub_div_size = (global_upper_bound - global_lower_bound + 1) /
                   bs->num_sub_divisions;
  }

  uint32_t element_index = UINT_MAX;

  for (uint32_t i = global_lower_bound; i <= global_upper_bound; i++) {
    if (bs->input[i] == bs->find_me) {
      element_index = i;
      bs->output[0] = i;
      bs->output[1] = i + 1;
      bs->output[2] = 1;
      break;
    }

    // Element is not found in region specified
    // by global lower bound to global upper bound
    bs->output[2] = 0;
  }

  uint32_t is_elem_found = bs->output[2];

  std::cout << "Lower bound = " << global_lower_bound << std::endl;
  std::cout << "Upper bound = " << global_upper_bound << std::endl;
  std::cout << "Element search for = " << bs->find_me << std::endl;


  if (is_elem_found == 1) {
    std::cout << "Element found at index " << element_index << std::endl;
  } else {
    std::cout << "Element value " << bs->find_me << " not found" << std::endl;
  }

  return HSA_STATUS_SUCCESS;
}

// Release all the RocR resources we have acquired in this application.
hsa_status_t CleanUp(BinarySearch* bs) {
  hsa_status_t err;

  err = hsa_amd_memory_pool_free(bs->input);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_free(bs->output);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_free(bs->input_arr);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_free(bs->kern_arg_buffer);
  RET_IF_HSA_ERR(err);

  err = hsa_queue_destroy(bs->queue);
  RET_IF_HSA_ERR(err);

  err = hsa_signal_destroy(bs->signal);
  RET_IF_HSA_ERR(err);

  err = hsa_shut_down();
  RET_IF_HSA_ERR(err);

  return HSA_STATUS_SUCCESS;
}

int main(int argc, char* argv[]) {
  // This BinarySearch structure (bs) below holds all of the appl. specific
  // info we need to run the sample. This includes algorithm specific
  // information as well as handles to RocR/HSA objects.

  // The basic structure of this sample is to fill in this structure with the
  // required RocR/HSA handles to RocR resources (e.g., agents, memory pools,
  // queues, etc.) and then dispatch the packets to the queue, and examine the
  // output.

  BinarySearch bs;
  hsa_status_t err;

  // Set some working values specific to this application
  InitializeBinarySearch(&bs);

  // hsa_init() initializes internal data structures and causes devices
  // (agents), memory pools and other resources to be discovered.
  err = hsa_init();
  RET_IF_HSA_ERR(err);

  // Find the agents needed for the sample
  err = FindDevices(&bs);
  RET_IF_HSA_ERR(err);

  // Create the completion signal used when dispatching a packet
  err = hsa_signal_create(1, 0, NULL, &bs.signal);
  RET_IF_HSA_ERR(err);

  // Create a queue to submit our binary search AQL packets
  err = hsa_queue_create(bs.gpu_dev, 128, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
                         UINT32_MAX, UINT32_MAX, &bs.queue);
  RET_IF_HSA_ERR(err);

  // Find the HSA memory pools we need to run this sample
  err = FindPools(&bs);
  RET_IF_HSA_ERR(err);

  // Allocate memory from the correct memory pool, and initialize them as
  // neeeded for the algorihm.
  err = AllocateAndInitBuffers(&bs);
  RET_IF_HSA_ERR(err);

  // Create a kernel object from the pre-compiled kernel, and read some
  // attributes associated with the kernel that we will need.
  err = LoadKernelFromObjFile(&bs);
  RET_IF_HSA_ERR(err);

  // Fill in the AQL packet, assign the kernel arguments, enqueue the packet,
  // "ring" the doorbell, and wait for completion.
  err = Run(&bs);
  RET_IF_HSA_ERR(err);

  // Release all the RocR resources we've acquired and shutdown HSA.
  err = CleanUp(&bs);

  return 0;
}


#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/samples/binary_search/binary_search_kernels.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/**
 * One instance of this kernel call is a thread.
 * Each thread finds out the segment in which it should look for the element.
 * After that, it checks if the element is between the lower bound and upper
 * bound of its segment. If yes, then this segment becomes the total
 * searchspace for the next pass.
 *
 * To achieve this, it writes the lower bound and upper bound to the output
 * array. In case the element at the left end (lower bound) matches the element
 * we are looking for, that is marked in the output and we no longer need to
 * look any further.
 */
 
__kernel void
binarySearch(__global uint4 * outputArray,
             __const __global uint2  * sortedArray,
             const   unsigned int findMe) {
  unsigned int tid = get_global_id(0);

  // Then we find the elements  for this thread
  uint2 element = sortedArray[tid];


  // If the element to be found does not lie between
  // them, then nothing left to do in this thread
  if((element.x > findMe) || (element.y < findMe)) {
    return;
  } else {
    // However, if the element does lie between the lower
    // and upper bounds of this thread's searchspace
    // we need to narrow down the search further in this
    // search space 
    // The search space for this thread is marked in the
    // output as being the total search space for the next pass
    outputArray[0].x = tid;
    outputArray[0].w = 1;
  }
}


__kernel void
binarySearch_mulkeys(__global int *keys,
                     __global uint *input,
                     const unsigned int numKeys,
                     __global int *output) {

  int gid = get_global_id(0);
  int lBound = gid * 256;
  int uBound = lBound + 255;

  for(int i = 0; i < numKeys; i++) {
    if(keys[i] >= input[lBound] && keys[i] <= input[uBound])
      output[i]=lBound;
  }

}


__kernel void
binarySearch_mulkeysConcurrent(__global uint *keys,
                               __global uint *input,
                               const unsigned int inputSize, // num. of inputs
                               const unsigned int numSubdivisions,
                               __global int *output) {

  int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions);
  int uBound = lBound + inputSize / numSubdivisions;
  int myKey = keys[get_global_id(0) / numSubdivisions];
  int mid;

  while(uBound >= lBound) {
    mid = (lBound + uBound) / 2;
    if(input[mid] == myKey) {
      output[get_global_id(0) / numSubdivisions] = mid;
      return;
    } else if(input[mid] > myKey) {
      uBound = mid - 1;
    } else {
      lBound = mid + 1;
    }
  }
}


================================================
FILE: rocrtst/samples/ipc/ipc.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/wait.h>

#include <cassert>
#include <iostream>

#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

static const uint32_t kShmemID = 1594685;

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

struct callback_args {
  hsa_agent_t host;
  hsa_agent_t device;
  hsa_amd_memory_pool_t cpu_pool;
  hsa_amd_memory_pool_t gpu_pool;
  size_t gpu_mem_granule;
};

// This function will test whether the provided memory pool is 1) in the
// GLOBAL segment, 2) allows allocation and 3) is accessible by the provided
// agent. If the provided pool meets these criteria, HSA_STATUS_INFO_BREAK is
// returned
static hsa_status_t
FindPool(hsa_amd_memory_pool_t in_pool, hsa_agent_t agent) {
  hsa_amd_segment_t segment;
  hsa_status_t err;

  err = hsa_amd_memory_pool_get_info(in_pool,
                                  HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
  RET_IF_HSA_ERR(err);
  if (segment != HSA_AMD_SEGMENT_GLOBAL) {
    return HSA_STATUS_SUCCESS;
  }

  bool canAlloc;
  err = hsa_amd_memory_pool_get_info(in_pool,
                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &canAlloc);
  RET_IF_HSA_ERR(err);
  if (!canAlloc) {
     return HSA_STATUS_SUCCESS;
  }

  hsa_amd_memory_pool_access_t access =
                                     HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
  err = hsa_amd_agent_memory_pool_get_info(agent, in_pool,
                              HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
  RET_IF_HSA_ERR(err);

  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    return HSA_STATUS_SUCCESS;
  }

  return HSA_STATUS_INFO_BREAK;
}

// Callback function for hsa_amd_agent_iterate_memory_pools(). If the provided
// pool is suitable (see comments for FindPool()), HSA_STATUS_INFO_BREAK is
// returned. The input parameter "data" should point to memory for a "struct
// callback_args", which includes a gpu pool and a granule field.  These fields
// will be filled in by this function if the provided pool meets all the
// requirements.
static hsa_status_t FindDevicePool(hsa_amd_memory_pool_t pool, void* data) {
  hsa_status_t err;

  if (nullptr == data) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  struct callback_args *args = (struct callback_args *)data;

  err = FindPool(pool, args->device);

  if (err == HSA_STATUS_INFO_BREAK) {
    args->gpu_pool = pool;


#ifdef ROCRTST_EMULATOR_BUILD
  args->gpu_mem_granule = 4;
#else
    err = hsa_amd_memory_pool_get_info(args->gpu_pool,
      HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &args->gpu_mem_granule);
    RET_IF_HSA_ERR(err);
#endif

    // We found what we were looking for, so return HSA_STATUS_INFO_BREAK
    return HSA_STATUS_INFO_BREAK;
  }

  return HSA_STATUS_SUCCESS;
}

// Callback function for hsa_amd_agent_iterate_memory_pools(). If the provided
// pool is suitable (see comments for FindPool()), HSA_STATUS_INFO_BREAK is
// returned. The input parameter "data" should point to memory for a "struct
// callback_args", which includes a cpu pool. This field will be filled in by
// this function if the provided pool meets all the requirements.
static hsa_status_t FindCPUPool(hsa_amd_memory_pool_t pool, void* data) {
  hsa_status_t err;

  if (nullptr == data) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  struct callback_args *args = (struct callback_args *)data;

  err = FindPool(pool, args->host);

  if (err == HSA_STATUS_INFO_BREAK) {
    args->cpu_pool = pool;
  }
  return err;
}


// This function is meant to be a call-back to hsa_iterate_agents. Find the
// first GPU agent that has memory accessible by CPU
// Return values:
//  HSA_STATUS_INFO_BREAK -- 2 GPU agents have been found and stored. Iterator
//    should stop iterating
//  HSA_STATUS_SUCCESS -- 2 GPU agents have not yet been found; iterator
//    should keep iterating
//  Other -- Some error occurred
static hsa_status_t FindGpu(hsa_agent_t agent, void *data) {
  if (data == NULL) {
     return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t err = hsa_agent_get_info(agent,
                                     HSA_AGENT_INFO_DEVICE, &hsa_device_type);
  RET_IF_HSA_ERR(err);

  if (hsa_device_type != HSA_DEVICE_TYPE_GPU) {
    return HSA_STATUS_SUCCESS;
  }

  struct callback_args *args = (struct callback_args *)data;

  // Make sure GPU device has pool host can access
  args->device = agent;
  err = hsa_amd_agent_iterate_memory_pools(agent, FindDevicePool, args);

  if (err == HSA_STATUS_INFO_BREAK) {
    // We were looking for, so return HSA_STATUS_INFO_BREAK
    return HSA_STATUS_INFO_BREAK;
  } else {
    args->device = {0};
  }

  RET_IF_HSA_ERR(err);

  // Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
  return HSA_STATUS_SUCCESS;
}

// This function is meant to be a call-back to hsa_iterate_agents. For each
// input agent the iterator provides as input, this function will check to
// see if the input agent is a CPU. If so, it will update the callback_args
// structure pointed to by the input parameter "data".

// Return values:
//  HSA_STATUS_INFO_BREAK -- CPU agent has been found and stored. Iterator
//    should stop iterating
//  HSA_STATUS_SUCCESS -- CPU agent has not yet been found; iterator
//    should keep iterating
//  Other -- Some error occurred
static hsa_status_t FindCPUDevice(hsa_agent_t agent, void *data) {
  if (data == NULL) {
     return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
                                                            &hsa_device_type);
  RET_IF_HSA_ERR(err);

  if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
    struct callback_args *args = (struct callback_args *)data;

    args->host = agent;

    err = hsa_amd_agent_iterate_memory_pools(agent, FindCPUPool, args);

    if (err == HSA_STATUS_INFO_BREAK) {  // we found what we were looking for
      return HSA_STATUS_INFO_BREAK;
    } else {
      args->host = {0};
      return err;
    }
  }

  // Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
  return HSA_STATUS_SUCCESS;
}

// This function will test whether the gpu-local buffer has been filled
// with an expected value and return an error if not. The expected value is
// also replaced with a new value.
// Implementation notes: We create a buffer in system memory and copy
// the gpu-local data buffer to be tested to this system memory buffer.
// We also write the system memory buffer with the new value, and then copy
// it back the gpu-local buffer.
static hsa_status_t
CheckAndFillBuffer(struct callback_args *args, void *gpu_src_ptr,
                                     uint32_t exp_cur_val, uint32_t new_val) {
  hsa_signal_t copy_signal;
  size_t sz = args->gpu_mem_granule;
  hsa_agent_t cpu_ag = args->host;
  hsa_agent_t gpu_ag = args->device;
  hsa_status_t err;

  err = hsa_signal_create(1, 0, NULL, &copy_signal);
  RET_IF_HSA_ERR(err);

  uint32_t *sysBuf;

  err = hsa_amd_memory_pool_allocate(args->cpu_pool, sz, 0,
                                          reinterpret_cast<void **>(&sysBuf));
  RET_IF_HSA_ERR(err);

  hsa_agent_t ag_list[2] = {args->device, args->host};
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, sysBuf);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_async_copy(sysBuf, cpu_ag, gpu_src_ptr, gpu_ag,
                                                    sz, 0, NULL, copy_signal);
  RET_IF_HSA_ERR(err);

  if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT,
                                       1, -1, HSA_WAIT_STATE_BLOCKED) != 0) {
    printf("Async copy returned error value.\n");
    return HSA_STATUS_ERROR;
  }

  uint32_t count = sz/sizeof(uint32_t);

  for (uint32_t i = 0; i < count; ++i) {
    if (sysBuf[i] != exp_cur_val) {
      fprintf(stdout, "Expected %d but got %d in buffer.\n",
                                                      exp_cur_val, sysBuf[i]);
      err = HSA_STATUS_ERROR;
      break;
    }
    sysBuf[i] = new_val;
  }

  hsa_signal_store_relaxed(copy_signal, 1);

  err = hsa_amd_memory_async_copy(gpu_src_ptr, gpu_ag, sysBuf, cpu_ag,
                                                    sz, 0, NULL, copy_signal);
  RET_IF_HSA_ERR(err);

  if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT,
                                       1, -1, HSA_WAIT_STATE_BLOCKED) != 0) {
    printf("Async copy returned error value.\n");
    return HSA_STATUS_ERROR;
  }

  err = hsa_signal_destroy(copy_signal);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_free(sysBuf);
  RET_IF_HSA_ERR(err);

  return HSA_STATUS_SUCCESS;
}

// See if the other process wrote an error value to the token; if not, write
// the newVal to the token.
static void CheckAndSetToken(volatile int *token, int newVal) {
  if (*token == -1) {
    printf("Error in other process. Exiting.\n");
    exit(-1);
  } else {
    *token = newVal;
  }
}

// Summary of this IPC Sample:
// This program demonstrates the IPC apis. Run it by executing 2 instances
// of the program.
// The first process will allocate some gpu-local memory and fill it with
// 1's. This HSA buffer will be made shareable with hsa_amd_ipc_memory_create()
// The 2nd process will access this shared buffer with
// hsa_amd_ipc_memory_attach(), verify that 1's were written, and then fill
// the buffer with 2's. Finally, the first process will then read the
// gpu-local buffer and verify that the 2's were indeed written. The main
// point is to show how hsa memory buffer handles can be shared among
// processes.
//
// Implementation Notes:
// -Standard linux shared memory is used in this sample program as a way
// of sharing info and  synchronizing the 2 processes. This is independent
// of RocR IPC and should not be confused with it.
int main(int argc, char** argv) {
  // IPC test
  struct Shared {
    volatile int token;
    volatile int count;
    volatile size_t size;
    volatile hsa_amd_ipc_memory_t handle;
    volatile hsa_amd_ipc_signal_t signal_handle;
  };

  // Allocate linux shared memory.
  Shared* shared = (Shared*)mmap(nullptr, sizeof(Shared), PROT_READ | PROT_WRITE,
                                 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
  if (shared == MAP_FAILED) {
    fprintf(stdout, "Unable to allocate shared memory. Exiting.\n");
    return -1;
  }

  // "token" is used to signal state changes between the 2 processes.
  volatile int* token = &shared->token;
  *token = 0;
  bool processOne;

  // Spawn second process and verify communication
  int child = fork();
  if (child == -1) {
    printf("fork failed.  Exiting.\n");
    return -1;
  }
  if (child != 0) {
    processOne = true;

    // Signal to other process we are waiting, and then wait...
    *token = 1;
    while (*token == 1) {
      sched_yield();
    }

    fprintf(stdout, "Second process observed, handshake...\n");
    *token = 1;
    while (*token == 1) {
      sched_yield();
    }
  } else {
    processOne = false;
    fprintf(stdout, "Second process running.\n");

    while (*token == 0) {
      sched_yield();
    }

    CheckAndSetToken(token, 0);
    // Wait for handshake
    while (*token == 0) {
      sched_yield();
    }
    CheckAndSetToken(token, 0);
    fprintf(stdout, "Handshake complete.\n");
  }

  hsa_status_t err;

  err = hsa_init();
  RET_IF_HSA_ERR(err);

  struct callback_args args = {0, 0, 0};

  err = hsa_iterate_agents(FindCPUDevice, &args);
  assert(err == HSA_STATUS_INFO_BREAK);
  if (err != HSA_STATUS_INFO_BREAK) {
    return -1;
  }

  err = hsa_iterate_agents(FindGpu, &args);

  if (err != HSA_STATUS_INFO_BREAK) {
    printf(
     "No GPU with accessible VRAM required for this program found. Exiting\n");
    return -1;
  }

  // Print out name of the device.
  char name1[64] = {0};
  char name2[64] = {0};
  err = hsa_agent_get_info(args.host, HSA_AGENT_INFO_NAME, name1);
  RET_IF_HSA_ERR(err);
  err = hsa_agent_get_info(args.device, HSA_AGENT_INFO_NAME, name2);
  RET_IF_HSA_ERR(err);
  uint16_t loc1, loc2;
  err = hsa_agent_get_info(args.host,
                           (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &loc1);
  RET_IF_HSA_ERR(err);
  err = hsa_agent_get_info(args.device,
                           (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &loc2);
  RET_IF_HSA_ERR(err);
  fprintf(stdout, "Using: %s (%d) and %s (%d)\n", name1, loc1, name2, loc2);

  // Get signal for async copy
  hsa_signal_t copy_signal;
  err = hsa_signal_create(1, 0, NULL, &copy_signal);
  RET_IF_HSA_ERR(err);

// Wrap printf to add first or second process indicator
#define PROCESS_LOG(format, ...) \
    fprintf(stdout, "line:%d P%u: " format, \
                      __LINE__, static_cast<int>(!processOne), ##__VA_ARGS__);

  hsa_agent_t ag_list[2] = {args.device, args.host};

  if (processOne) {
    // Allocate some VRAM and fill it with 1's
    uint32_t* gpuBuf = NULL;
    err = hsa_amd_memory_pool_allocate(args.gpu_pool, args.gpu_mem_granule, 0,
                                            reinterpret_cast<void**>(&gpuBuf));
    RET_IF_HSA_ERR(err);

    PROCESS_LOG("Allocated local memory buffer at %p\n", gpuBuf);

    err = hsa_amd_agents_allow_access(2, ag_list, NULL, gpuBuf);
    RET_IF_HSA_ERR(err);

    err = hsa_amd_ipc_memory_create(gpuBuf, args.gpu_mem_granule,
                          const_cast<hsa_amd_ipc_memory_t*>(&shared->handle));
    PROCESS_LOG(
    "Created IPC handle associated with gpu-local buffer at P0 address %p\n",
                                                                      gpuBuf);

    RET_IF_HSA_ERR(err);

    uint32_t count = args.gpu_mem_granule/sizeof(uint32_t);
    shared->size = args.gpu_mem_granule;
    shared->count = count;

    err = hsa_amd_memory_fill(gpuBuf, 1, count);
    RET_IF_HSA_ERR(err);

    // Get IPC capable signal
    hsa_signal_t ipc_signal;
    err = hsa_amd_signal_create(1, 0, NULL, HSA_AMD_SIGNAL_IPC, &ipc_signal);
    RET_IF_HSA_ERR(err);

    err = hsa_amd_ipc_signal_create(ipc_signal,
                                    const_cast<hsa_amd_ipc_signal_t*>(&shared->signal_handle));
    PROCESS_LOG("Created IPC handle associated with ipc_signal\n");
    RET_IF_HSA_ERR(err);

    // Signal Process 2 that the gpu buffer is ready to read.
    CheckAndSetToken(token, 1);

    PROCESS_LOG("Allocated buffer and filled it with 1's. Wait for P1...\n");
    hsa_signal_value_t ret =
        hsa_signal_wait_acquire(ipc_signal, HSA_SIGNAL_CONDITION_NE, 1, -1, HSA_WAIT_STATE_BLOCKED);

    if (ret != 2) {
      hsa_signal_store_release(ipc_signal, -1);
      return -1;
    }

    err = CheckAndFillBuffer(&args, gpuBuf, 2, 0);
    RET_IF_HSA_ERR(err);
    PROCESS_LOG("Confirmed P1 filled buffer with 2\n")
    PROCESS_LOG("PASSED on P0\n");

    hsa_signal_store_relaxed(ipc_signal, 0);
    
    err = hsa_signal_destroy(ipc_signal);
    RET_IF_HSA_ERR(err);

    err = hsa_amd_memory_pool_free(gpuBuf);
    RET_IF_HSA_ERR(err);

    waitpid(child, nullptr, 0);

  } else {  // "ProcessTwo"
    PROCESS_LOG("Waiting for process 0 to write 1 to token...\n");
    while (*token == 0) {
      sched_yield();
    }
    if (*token != 1) {
      *token = -1;
      return -1;
    }

    // Attach shared VRAM
    void* ptr;
    err = hsa_amd_ipc_memory_attach(
      const_cast<hsa_amd_ipc_memory_t*>(&shared->handle), shared->size, 1,
                                                               ag_list, &ptr);
    RET_IF_HSA_ERR(err);

    PROCESS_LOG(
     "Attached to IPC handle; P1 buffer address gpu-local memory is %p\n",
                                                                         ptr);

    // Attach shared signal
    hsa_signal_t ipc_signal;
    err = hsa_amd_ipc_signal_attach(const_cast<hsa_amd_ipc_signal_t*>(&shared->signal_handle),
                                    &ipc_signal);
    RET_IF_HSA_ERR(err);

    PROCESS_LOG("Attached to signal IPC handle\n");

    err = CheckAndFillBuffer(&args, reinterpret_cast<uint32_t *>(ptr), 1, 2);
    RET_IF_HSA_ERR(err);

    PROCESS_LOG(
      "Confirmed P0 filled buffer with 1; P1 re-filled buffer with 2\n");
    PROCESS_LOG("PASSED on P1\n");

    hsa_signal_store_release(ipc_signal, 2);

    err = hsa_amd_ipc_memory_detach(ptr);
    RET_IF_HSA_ERR(err);

    hsa_signal_wait_relaxed(ipc_signal, HSA_SIGNAL_CONDITION_NE, 2, -1, HSA_WAIT_STATE_BLOCKED);

    err = hsa_signal_destroy(ipc_signal);
    RET_IF_HSA_ERR(err);
  }

  err = hsa_signal_destroy(copy_signal);
  RET_IF_HSA_ERR(err);

  munmap(shared, sizeof(Shared));

  err = hsa_shut_down();
  RET_IF_HSA_ERR(err);

#undef PROCESS_LOG
  return 0;
}


================================================
FILE: rocrtst/samples/rocm_async/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8.0)

#
#  Setup build environment
#
#  1) Setup env var ROCR_INC_DIR and ROCR_LIB_DIR to point to 
#     ROC Runtime header and libraries seperately
#     
#     export ROCR_INC_DIR="Path to ROC Runtime header"
#
#     export ROCR_LIB_DIR="Path to ROC Runtime libraries"
#
#     export ROCT_LIB_DIR="Path to ROC Thunk libraries"
#
#  2) Make an new folder called build under root folder
#
#     mkdir build
#
#  3) Enter into folder of build, and run CMAKE to generate makefile
#     and make it
#
#     cd build; cmake ..; make
#

if(WIN32)
  MESSAGE("Windows platfomr is not supported")
  RRETURN()
endif()

if(NOT EXISTS $ENV{ROCR_INC_DIR}/hsa/hsa.h)
  MESSAGE("ERROR: ROC Runtime headers can't be found under specified path")
  RETURN()
endif()

#
# Flag to enable / disable verbose output.
#
SET( CMAKE_VERBOSE_MAKEFILE on )

#
# Set core runtime module name
#
set ( ROC_THUNK_NAME "hsakmt" )
set ( ROC_THUNK_LIBRARY "lib${ROC_THUNK_NAME}" )
set ( CORE_RUNTIME_NAME "hsa-runtime" )
set ( CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}64" )
set ( CORE_RUNTIME_LIBRARY "lib${CORE_RUNTIME_TARGET}" )

if(NOT EXISTS $ENV{ROCR_LIB_DIR}/${CORE_RUNTIME_LIBRARY}.so)
  MESSAGE("ERROR: ROC Runtime libraries can't be found under sprcified path")
  RETURN()
endif()

if(NOT EXISTS $ENV{ROCT_LIB_DIR}/${ROC_THUNK_LIBRARY}.so)
  MESSAGE("ERROR: ROC Thunk libraries can't be found under sprcified path")
  RETURN()
endif()

set(PROJECT_NAME "rocm_async")
set(TEST_NAME "${PROJECT_NAME}")
project (${PROJECT_NAME})

string(TOLOWER "${CMAKE_BUILD_TYPE}" tmp)
if("${tmp}" STREQUAL "debug")
  set(ISDEBUG "1")
  add_definitions(-DDEBUG)
endif()

if(ISDEBUG)
  set(CMAKE_CXX_FLAGS "-std=c++11 -O0")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
else()
  set(CMAKE_CXX_FLAGS "-std=c++11 -O2")
endif()

#
# Set the remaining compiler flags
#
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")

INCLUDE_DIRECTORIES($ENV{ROCR_INC_DIR})

LINK_DIRECTORIES($ENV{ROCR_LIB_DIR})

# Add sources that belong to the project
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} Src)

add_executable(rocm_async  ${Src})
target_link_libraries(rocm_async hsa-runtime64)


================================================
FILE: rocrtst/samples/rocm_async/Readme.txt
================================================

Introduction:
#############

RocmAsync is designed to capture the performance characteristics of buffer
copying and kernel read/write operations. The help screen of the benchmark
shows various options one can use in initiating cop/read/writer operations.
In addition one can also query the topology of the system in terms of memory
pools and their agents

Build Environment:
##################

To be able to build RocmAsync, users must ensure that the build platform has
following conditions satisfied:
Build Procedure:
################

The following simply lists the steps to build RocmAsync
  
--- Define following environment variable to specify location of header
    and library files

    // Containins header files exported by ROC Runtime
    ROCR_INC_DIR="Path of ROC Runtime Header Files"

    // Containins library files exported by ROC Runtime
    ROCR_LIB_DIR="Path of ROC Runtime Library Files"

--- Create a build directory. The location of build directory can be anywhere
    in the file system as long as it has read / write / execute permissions for
    the user invoking the commands. User can choose any valid filename for the
    build directory as the examples below illustrate

        e.g. mkdir rocm_async/perfBuild
        e.g. mkdir <parent_Of_rocm_async>rocm_async-build
        e.g. mkdir <user_home>/rocmAsyncBuild

--- Set working directory to be the new build directory

        e.g. cd rocm_async/perfBuild
        e.g. cd <parent_Of_rocm_async>rocm-async-build
        e.g. cd <user_home>/rocmAsyncBuild

--- Invoke Cmake to interpret build rules and generate native build files
    The argument for cmake should be the root folder of RocmAsync test suite

        // Builds Release version (default)
        e.g. cmake .../rocm_async

        // Builds Debug version
        e.g. cmake -DCMAKE_BUILD_TYPE:STRING=Debug .../rocm_async

--- Invoke the native build rules generated by cmake to build the various
    object, library and executable files

        e.g. make

--- Invoke the install command to copy build artifacts to pre-defined folders
    of RocmAsync suite. Upon completion artifacts will be copied to the bin and
    lib directories of build directory
    
        e.g. make install
    
    @note: All executables will be found in <build_directory>/bin folder


================================================
FILE: rocrtst/samples/rocm_async/base_test.cpp
================================================
#include "base_test.hpp"

// Default Constructor
BaseTest::BaseTest(size_t num) {
  
  // Set the numIteration_ to be 10 by default
  num_iteration_ = num;
}

BaseTest::~BaseTest() {}


================================================
FILE: rocrtst/samples/rocm_async/base_test.hpp
================================================

#ifndef ROCM_ASYNC_BW_BASE_TEST_H_
#define ROCM_ASYNC_BW_BASE_TEST_H_

#include "hsa/hsa.h"
#include <iostream>
#include <string>
#include <vector>

using namespace std;

// @Brief: An interface for tests to do some basic things,

class BaseTest {

 public:

  BaseTest(size_t num = 10);

  virtual ~BaseTest();

  // @Brief: Allows setup proceedures to be completed
  // before running the benchmark test case
  virtual void SetUp() = 0;

  // @Brief: Launches the proceedures of test scenario
  virtual void Run() = 0;

  // @Brief: Allows clean up proceedures to be invoked
  virtual void Close() = 0;

  // @Brief: Display the results
  virtual void Display() const = 0;

  // @Brief: Set number of iterations to run
  void set_num_iteration(size_t num) {
    num_iteration_ = num;
    return;
  }

  // @Brief: Pre-declare some variables for deriviation, the
  // derived class may declare more if needed
 protected:

  // @Brief: Real iteration number
  uint64_t num_iteration_;

  // @Brief: Status code
  hsa_status_t err_;
};

#endif  //  ROCM_ASYNC_BW_BASE_TEST_H_


================================================
FILE: rocrtst/samples/rocm_async/common.cpp
================================================
#include "common.hpp"

void error_check(hsa_status_t hsa_error_code, int line_num, const char* str) {
  if (hsa_error_code != HSA_STATUS_SUCCESS &&
      hsa_error_code != HSA_STATUS_INFO_BREAK) {
    printf("HSA Error Found!  In file: %s;   At line: %d\n", str, line_num);
    const char* string = nullptr;
    hsa_status_string(hsa_error_code, &string);
    printf("Error: %s\n", string);
    exit(EXIT_FAILURE);
  }
}

// So far, always find the first device
hsa_status_t FindGpuDevice(hsa_agent_t agent, void* data) {
  if (data == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t hsa_error_code =
      hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
  if (hsa_error_code != HSA_STATUS_SUCCESS) {
    return hsa_error_code;
  }

  if (hsa_device_type == HSA_DEVICE_TYPE_GPU) {
    *((hsa_agent_t*)data) = agent;
    return HSA_STATUS_INFO_BREAK;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t FindCpuDevice(hsa_agent_t agent, void* data) {
  if (data == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t hsa_error_code =
      hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
  if (hsa_error_code != HSA_STATUS_SUCCESS) {
    return hsa_error_code;
  }

  if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
    *((hsa_agent_t*)data) = agent;
    return HSA_STATUS_INFO_BREAK;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t region, void* data) {
  if (NULL == data) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_status_t err;
  hsa_amd_segment_t segment;
  uint32_t flag;

  err = hsa_amd_memory_pool_get_info(region, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
  ErrorCheck(err);

  err = hsa_amd_memory_pool_get_info(region, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
  ErrorCheck(err);

  if ((HSA_AMD_SEGMENT_GLOBAL == segment) &&
      (flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED)) {
    *((hsa_amd_memory_pool_t*)data) = region;
  }

  return HSA_STATUS_SUCCESS;
}

double CalcMedian(vector<double> scores) {
  double median;
  size_t size = scores.size();

  if (size % 2 == 0)
    median = (scores[size / 2 - 1] + scores[size / 2]) / 2;
  else
    median = scores[size / 2];

  return median;
}

double CalcMean(vector<double> scores) {
  double mean = 0;
  size_t size = scores.size();

  for (size_t i = 0; i < size; ++i) mean += scores[i];

  return mean / size;
}

double CalcStdDeviation(vector<double> scores, int score_mean) {
  double ret = 0.0;
  for (size_t i = 0; i < scores.size(); ++i) {
    ret += (scores[i] - score_mean) * (scores[i] - score_mean);
  }

  ret /= scores.size();

  return sqrt(ret);
}

int CalcConcurrentQueues(vector<double> scores) {
  int num_of_concurrent_queues = 0;
  vector<double> execpted_exec_time_array;

  for (size_t i = 0; i < scores.size(); ++i) {
    execpted_exec_time_array.push_back(scores[0] / (1 << i));
  }

  for (size_t i = 0; i < scores.size(); ++i) {
    cout << "expected exe time = " << execpted_exec_time_array[i] << endl;
  }

  for (size_t i = 1; i < scores.size(); ++i) {
    if ((execpted_exec_time_array[i] - scores[i]) <
        0.1 * execpted_exec_time_array[i])
      ++num_of_concurrent_queues;
  }

  return num_of_concurrent_queues;
}

/**  hsa_status_t FindHostRegion(hsa_region_t region, void *data) {
  if (data == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  bool is_host_region = false;
  hsa_status_t hsa_error_code = hsa_region_get_info(
    region, (hsa_region_info_t)HSA_EXT_REGION_INFO_HOST_ACCESS, &is_host_region
  );
  if (hsa_error_code != HSA_STATUS_SUCCESS) {
    return hsa_error_code;
  }

  if (is_host_region) {
    *((hsa_region_t*)data) = region;
  }

  return HSA_STATUS_SUCCESS;
} */


================================================
FILE: rocrtst/samples/rocm_async/common.hpp
================================================
#ifndef ROCM_ASYNC_BW_COMMON_HPP
#define ROCM_ASYNC_BW_COMMON_HPP

#include <cstdlib>
#include <iostream>
#include <vector>
#include <cmath>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

using namespace std;

#if defined(_MSC_VER)
#define ALIGNED_(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define ALIGNED_(x) __attribute__((aligned(x)))
#endif  // __GNUC__
#endif  // _MSC_VER

#define MULTILINE(...) #__VA_ARGS__

#define HSA_ARGUMENT_ALIGN_BYTES 16

#define ErrorCheck(x) error_check(x, __LINE__, __FILE__)

// @Brief: Check HSA API return value
void error_check(hsa_status_t hsa_error_code, int line_num, const char* str);

// @Brief: Find the first avaliable GPU device
hsa_status_t FindGpuDevice(hsa_agent_t agent, void* data);

// @Brief: Find the first avaliable CPU device
hsa_status_t FindCpuDevice(hsa_agent_t agent, void* data);

// @Brief: Find the agent's global region / pool
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t region, void* data);

// @Brief: Calculate the mean number of the vector
double CalcMean(vector<double> scores);

// @Brief: Calculate the Median valud of the vector
double CalcMedian(vector<double> scores);

// @Brief: Calculate the standard deviation of the vector
double CalcStdDeviation(vector<double> scores, int score_mean);

#endif  // ROCM_ASYNC_BW_COMMON_HPP


================================================
FILE: rocrtst/samples/rocm_async/hsatimer.cpp
================================================
#include "hsatimer.hpp"

#define NANOSECONDS_PER_SECOND 1000000000

PerfTimer::PerfTimer() {
  freq_in_100mhz = MeasureTSCFreqHz();
}

PerfTimer::~PerfTimer() {
  while (!_timers.empty()) {
    Timer *temp = _timers.back();
    _timers.pop_back();
    delete temp;
  }
}

// Create a new timer instance and return its index
int PerfTimer::CreateTimer() {

  Timer *newTimer = new Timer;
  newTimer->_start = 0.0;
  newTimer->_clocks = 0.0;

  #ifdef _WIN32
  QueryPerformanceFrequency((LARGE_INTEGER *)&newTimer->_freq);
  #endif

  #ifdef  __linux__
  newTimer->_freq = NANOSECONDS_PER_SECOND;
  #endif

  // Save the timer object in timer list
  _timers.push_back(newTimer);
  return (int)(_timers.size() - 1);
}

int PerfTimer::StartTimer(int index) {

  if (index >= (int)_timers.size()) {
    Error("Cannot reset timer. Invalid handle.");
    return HSA_FAILURE;
  }

  #ifdef _WIN32
    // General Windows timing method
    #ifndef _AMD
      long long tmpStart;
      QueryPerformanceCounter((LARGE_INTEGER *)&(tmpStart));
  _   timers[index]->_start = (double)tmpStart;
    // AMD Windows timing method
    #else
    #endif
  #endif

  #ifdef  __linux__
    // General Linux timing method
    #ifndef _AMD
      struct timespec s;
      clock_gettime(CLOCK_MONOTONIC, &s);
      _timers[index]->_start =
      (long long)s.tv_sec * NANOSECONDS_PER_SECOND + (long long)s.tv_nsec;
    // AMD Linux timing method
    #else
      unsigned int unused;
    _timers[index]->_start = __rdtscp(&unused);
    #endif
  #endif

  return HSA_SUCCESS;
}

int PerfTimer::StopTimer(int index) {

  long long n = 0;
  if (index >= (int)_timers.size()) {
    Error("Cannot reset timer. Invalid handle.");
    return HSA_FAILURE;
  }
  
  #ifdef _WIN32
    #ifndef _AMD
      long long n1;
      QueryPerformanceCounter((LARGE_INTEGER *)&(n1));
      n = n1;
    // AMD Window Timing
    #else
    #endif
  #endif

  #ifdef  __linux__
    // General Linux timing method
    #ifndef _AMD
      struct timespec s;
      clock_gettime(CLOCK_MONOTONIC, &s);
      n = (long long)s.tv_sec * NANOSECONDS_PER_SECOND + (long long)s.tv_nsec;
    // AMD Linux timing
    #else
      unsigned int unused;
      n = __rdtscp(&unused);
    #endif
  #endif

  n -= _timers[index]->_start;
  _timers[index]->_start = 0;

  #ifndef _AMD
    _timers[index]->_clocks += n;
  #endif

  #ifdef  __linux__
    //_timers[index]->_clocks += 10 * n /freq_in_100mhz;      // unit is ns
    _timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz;  // convert to ms
    // cout << "_AMD is enabled!!!" << endl;
  #endif

  return HSA_SUCCESS;
}

void PerfTimer::Error(string str) { cout << str << endl; }

double PerfTimer::ReadTimer(int index) {

  if (index >= (int)_timers.size()) {
    Error("Cannot read timer. Invalid handle.");
    return HSA_FAILURE;
  }

  double reading = double(_timers[index]->_clocks);

  reading = double(reading / _timers[index]->_freq);

  return reading;
}

void PerfTimer::ResetTimer(int index) {
  
  // Check if index value is over the timer's size
  if (index >= (int)_timers.size()) {
    Error("Invalid index value\n");
    exit(1);
  }

  _timers[index]->_clocks = 0.0;
  _timers[index]->_start = 0.0;
}

uint64_t PerfTimer::CoarseTimestampUs() {
  
  #ifdef _WIN32
    uint64_t freqHz, ticks;
    QueryPerformanceFrequency((LARGE_INTEGER *)&freqHz);
    QueryPerformanceCounter((LARGE_INTEGER *)&ticks);

    // Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
    while (ticks > (1ULL << 44)) {
      ticks /= 16;
      freqHz /= 16;
    }

    return (ticks * 1000000) / freqHz;
  #endif

  #ifdef  __linux__
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
    return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
  #endif
}

uint64_t PerfTimer::MeasureTSCFreqHz() {
  
  // Make a coarse interval measurement of TSC ticks for 1 gigacycles.
  unsigned int unused;
  uint64_t tscTicksEnd;

  uint64_t coarseBeginUs = CoarseTimestampUs();
  uint64_t tscTicksBegin = __rdtscp(&unused);
  do {
    tscTicksEnd = __rdtscp(&unused);
  } while (tscTicksEnd - tscTicksBegin < 1000000000);

  uint64_t coarseEndUs = CoarseTimestampUs();

  // Compute the TSC frequency and round to nearest 100MHz.
  uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
  uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
  return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}


================================================
FILE: rocrtst/samples/rocm_async/hsatimer.hpp
================================================

#ifndef ROCM_ASYNC_BW_MYTIME_H_
#define ROCM_ASYNC_BW_MYTIME_H_

// Will use AMD timer and general Linux timer based on users'
// need --> compilation flag. Support for windows platform is
// not currently available

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <x86intrin.h>
#include <string.h>

#include <iostream>
#include <vector>
#include <string>

using namespace std;

#include <sys/time.h>

#define HSA_FAILURE 1
#define HSA_SUCCESS 0

class PerfTimer {

 private:

  struct Timer {
    string name;       /* < name name of time object*/
    long long _freq;   /* < _freq frequency*/
    long long _clocks; /* < _clocks number of ticks at end*/
    long long _start;  /* < _start start point ticks*/
  };

  std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
  double freq_in_100mhz;

 public:

  PerfTimer();
  ~PerfTimer();

 private:

  // AMD timing method
  uint64_t CoarseTimestampUs();
  uint64_t MeasureTSCFreqHz();

  // General Linux timing method

 public:
  
  int CreateTimer();
  int StartTimer(int index);
  int StopTimer(int index);
  void ResetTimer(int index);

 public:
 
  // retrieve time
  double ReadTimer(int index);
  
  // write into a file
  double WriteTimer(int index);

 public:
  void Error(string str);
};

#endif    //  ROCM_ASYNC_BW_MYTIME_H_


================================================
FILE: rocrtst/samples/rocm_async/main.cpp
================================================
#include <unistd.h>
#include <iostream>
#include "hsatimer.hpp"
#include "rocm_async.hpp"

using namespace std;

int main(int argc, char** argv) {

  // Create the Bandwidth test object
  RocmAsync bw_test(argc, argv);

  // Initialize the Bandwidth test object
  bw_test.SetUp();

  // Run the Bandwidth tests requested by user
  bw_test.Run();

  // Display the time taken by various tests
  bw_test.Display();

  // Release the Bandwidth test object resources
  bw_test.Close();
  return 0;
}


================================================
FILE: rocrtst/samples/rocm_async/os.cpp
================================================

// Compiling for Windows Platform
#ifdef _WIN32

#include "os.hpp"
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>

void SetEnv(const char* env_var_name, const char* env_var_value) {
  bool err = SetEnvironmentVariable(env_var_name, env_var_value);
  if (false == err) {
    printf("Set environment variable failed!\n");
    exit(1);
  }
  return;
}

char* GetEnv(const char* env_var_name) {
  char* buff;
  DWORD char_count = GetEnvironmentVariable(env_var_name, NULL, 0);
  if (char_count == 0) return NULL;
  buff = (char*)malloc(sizeof(char) * char_count);
  GetEnvironmentVariable(env_var_name, buff, char_count);
  buff[char_count - 1] = '\0';
  return buff;
}

#endif    // End of Windows Code

// Compiling for Linux Platform
#ifdef  __linux__

#include "os.hpp"
#include <stdlib.h>

void SetEnv(const char* env_var_name, const char* env_var_value) {
  int err = setenv(env_var_name, env_var_value, 1);
  if (0 != err) {
    printf("Set environment variable failed!\n");
    exit(1);
  }
  return;
}

char* GetEnv(const char* env_var_name) { return getenv(env_var_name); }

#endif    // End of Linux Code


================================================
FILE: rocrtst/samples/rocm_async/os.hpp
================================================

#ifndef ROCM_ASYNC_BW_UTILS_OS_H_
#define ROCM_ASYNC_BW_UTILS_OS_H_

#include <stdio.h>

// Set envriroment variable
void SetEnv(const char* env_var_name, const char* env_var_value);

// Get the value of enviroment
char* GetEnv(const char* env_var_name);


#endif    //  ROCM_ASYNC_BW_UTILS_OS_H_


================================================
FILE: rocrtst/samples/rocm_async/rocm_async.cpp
================================================

#include "common.hpp"
#include "rocm_async.hpp"

#include <stdlib.h>
#include <assert.h>
#include <algorithm>
#include <unistd.h>
#include <cctype>
#include <sstream>

// The values are in megabytes at allocation time
const uint32_t RocmAsync::SIZE_LIST[] = { 64, 128, 256, 512 };
//const uint32_t RocmAsync::SIZE_LIST[] = { 2, 4, 8, 16, 32, 64, 128, 256, 512 };

uint32_t RocmAsync::GetIterationNum() {
  return num_iteration_ * 1.2 + 1;
}

void RocmAsync::AcquireAccess(hsa_agent_t agent, void* ptr) {
  err_ = hsa_amd_agents_allow_access(1, &agent, NULL, ptr);
  ErrorCheck(err_);
}

void RocmAsync::AllocateHostBuffers(bool bidir, uint32_t size,
                                    void*& src_fwd, void*& dst_fwd,
                                    void* buf_src_fwd, void* buf_dst_fwd,
                                    hsa_agent_t src_agent_fwd, hsa_agent_t dst_agent_fwd,
                                    void*& src_rev, void*& dst_rev,
                                    void* buf_src_rev, void* buf_dst_rev,
                                    hsa_agent_t src_agent_rev, hsa_agent_t dst_agent_rev,
                                    hsa_signal_t& signal_fwd, hsa_signal_t& signal_rev) {

  // Allocate host buffers and setup accessibility for copy operation
  err_ = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, (void**)&src_fwd);
  ErrorCheck(err_);
  AcquireAccess(src_agent_fwd, src_fwd);
  AcquireAccess(cpu_agent_, buf_src_fwd);

  err_ = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, (void**)&dst_fwd);
  ErrorCheck(err_);
  AcquireAccess(dst_agent_fwd, dst_fwd);
  AcquireAccess(cpu_agent_, buf_dst_fwd);

  // Initialize host buffers to a determinate value
  memset(src_fwd, 0x23, size);
  memset(dst_fwd, 0x00, size);
  
  // Create a signal to wait on copy operation
  // @TODO: replace it with a signal pool call
  err_ = hsa_signal_create(1, 0, NULL, &signal_fwd);
  ErrorCheck(err_);

  if (bidir == false) {
    return;
  }

  err_ = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, (void**)&src_rev);
  ErrorCheck(err_);
  AcquireAccess(src_agent_rev, src_rev);
  AcquireAccess(cpu_agent_, buf_src_rev);

  err_ = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, (void**)&dst_rev);
  ErrorCheck(err_);
  AcquireAccess(dst_agent_rev, dst_rev);
  AcquireAccess(cpu_agent_, buf_dst_rev);

  // Initialize host buffers to a determinate value
  memset(src_rev, 0x23, size);
  memset(dst_rev, 0x00, size);
  
  err_ = hsa_signal_create(1, 0, NULL, &signal_rev);
  ErrorCheck(err_);
}

void RocmAsync::AllocateCopyBuffers(bool bidir, uint32_t size,
                        void*& src_fwd, hsa_amd_memory_pool_t src_pool_fwd,
                        void*& dst_fwd, hsa_amd_memory_pool_t dst_pool_fwd,
                        hsa_agent_t src_agent_fwd, hsa_agent_t dst_agent_fwd,
                        void*& src_rev, hsa_amd_memory_pool_t src_pool_rev,
                        void*& dst_rev, hsa_amd_memory_pool_t dst_pool_rev,
                        hsa_agent_t src_agent_rev, hsa_agent_t dst_agent_rev,
                        hsa_signal_t& signal_fwd, hsa_signal_t& signal_rev) {

  // Allocate buffers in src and dst pools for forward copy
  err_ = hsa_amd_memory_pool_allocate(src_pool_fwd, size, 0, &src_fwd);
  ErrorCheck(err_);
  err_ = hsa_amd_memory_pool_allocate(dst_pool_fwd, size, 0, &dst_fwd);
  ErrorCheck(err_);

  // Allocate buffers in src and dst pools for reverse copy
  if (bidir) {
    err_ = hsa_amd_memory_pool_allocate(src_pool_rev, size, 0, &src_rev);
    ErrorCheck(err_);
    err_ = hsa_amd_memory_pool_allocate(dst_pool_rev, size, 0, &dst_rev);
    ErrorCheck(err_);
  }

  // Acquire access to src and dst buffers for forward copy
  AcquireAccess(src_agent_fwd, dst_fwd);
  AcquireAccess(dst_agent_fwd, src_fwd);

  // Acquire access to src and dst buffers for reverse copy
  if (bidir) {
    AcquireAccess(src_agent_rev, dst_rev);
    AcquireAccess(dst_agent_rev, src_rev);
  }
  
  // Create a signal to wait on copy operation
  // @TODO: replace it with a signal pool call
  err_ = hsa_signal_create(1, 0, NULL, &signal_fwd);
  ErrorCheck(err_);
  if (bidir) {
    err_ = hsa_signal_create(1, 0, NULL, &signal_rev);
    ErrorCheck(err_);
  }
}

void RocmAsync::ReleaseBuffers(bool bidir,
                               void* src_fwd, void* src_rev,
                               void* dst_fwd, void* dst_rev,
                               hsa_signal_t signal_fwd,
                               hsa_signal_t signal_rev) {

  // Free the src and dst buffers used in forward copy
  // including the signal used to wait
  err_ = hsa_amd_memory_pool_free(src_fwd);
  ErrorCheck(err_);
  err_ = hsa_amd_memory_pool_free(dst_fwd);
  ErrorCheck(err_);
  err_ = hsa_signal_destroy(signal_fwd);
  ErrorCheck(err_);

  // Free the src and dst buffers used in reverse copy
  // including the signal used to wait
  if (bidir) {
    err_ = hsa_amd_memory_pool_free(src_rev);
    ErrorCheck(err_);
    err_ = hsa_amd_memory_pool_free(dst_rev);
    ErrorCheck(err_);
    err_ = hsa_signal_destroy(signal_rev);
    ErrorCheck(err_);
  }
}

double RocmAsync::GetGpuCopyTime(bool bidir,
                                 hsa_signal_t signal_fwd,
                                 hsa_signal_t signal_rev) {

  // Obtain time taken for forward copy
  hsa_amd_profiling_async_copy_time_t async_time_fwd = {0};
  err_= hsa_amd_profiling_get_async_copy_time(signal_fwd, &async_time_fwd);
  ErrorCheck(err_);
  if (bidir == false) {
    return(async_time_fwd.end - async_time_fwd.start);
  }

  hsa_amd_profiling_async_copy_time_t async_time_rev = {0};
  err_= hsa_amd_profiling_get_async_copy_time(signal_rev, &async_time_rev);
  ErrorCheck(err_);
  double start = min(async_time_fwd.start, async_time_rev.start);
  double end = max(async_time_fwd.end, async_time_rev.end);
  return(end - start);
}

void RocmAsync::copy_buffer(void* dst, hsa_agent_t dst_agent,
                            void* src, hsa_agent_t src_agent,
                            size_t size, hsa_signal_t signal) {

  // Copy from src into dst buffer
  err_ = hsa_amd_memory_async_copy(dst, dst_agent,
                                   src, src_agent,
                                   size, 0, NULL, signal);
  ErrorCheck(err_);
  
  // Wait for the forward copy operation to complete
  while (hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1,
                                     uint64_t(-1), HSA_WAIT_STATE_ACTIVE));
}

void RocmAsync::RunCopyBenchmark(async_trans_t& trans) {

  // Bind if this transaction is bidirectional
  bool bidir = trans.copy.bidir_;

  // Initialize size of buffer to equal the largest element of allocation
  uint32_t size_len = size_list_.size();
  uint32_t max_size = size_list_.back() * 1024 * 1024;
  
  // Bind to resources such as pool and agents that are involved
  // in both forward and reverse copy operations
  void* buf_src_fwd;
  void* buf_dst_fwd;
  void* buf_src_rev;
  void* buf_dst_rev;
  void* host_src_fwd;
  void* host_dst_fwd;
  void* host_src_rev;
  void* host_dst_rev;
  hsa_signal_t signal_fwd;
  hsa_signal_t signal_rev;
  hsa_signal_t host_signal_fwd;
  hsa_signal_t host_signal_rev;
  hsa_amd_memory_pool_t src_pool_fwd = trans.copy.src_pool_;
  hsa_amd_memory_pool_t dst_pool_fwd = trans.copy.dst_pool_;
  hsa_amd_memory_pool_t src_pool_rev = dst_pool_fwd;
  hsa_amd_memory_pool_t dst_pool_rev = src_pool_fwd;
  hsa_agent_t src_agent_fwd = pool_list_[trans.copy.src_idx_].owner_agent_;
  hsa_agent_t dst_agent_fwd = pool_list_[trans.copy.dst_idx_].owner_agent_;
  hsa_agent_t src_agent_rev = dst_agent_fwd;
  hsa_agent_t dst_agent_rev = src_agent_fwd;

  // Allocate buffers and signal objects
  AllocateCopyBuffers(bidir, max_size,
                      buf_src_fwd, src_pool_fwd, 
                      buf_dst_fwd, dst_pool_fwd,
                      src_agent_fwd, dst_agent_fwd,
                      buf_src_rev, src_pool_rev, 
                      buf_dst_rev, dst_pool_rev,
                      src_agent_rev, dst_agent_rev,
                      signal_fwd, signal_rev);
  
  if (verify_) {
    AllocateHostBuffers(bidir, max_size,
                        host_src_fwd, host_dst_fwd,
                        buf_src_fwd, buf_dst_fwd,
                        src_agent_fwd, dst_agent_fwd,
                        host_src_rev, host_dst_rev,
                        buf_src_rev, buf_dst_rev,
                        src_agent_rev, dst_agent_rev,
                        host_signal_fwd, host_signal_rev);

    // Initialize source buffer with values from verification buffer
    copy_buffer(buf_src_fwd, src_agent_fwd,
                host_src_fwd, cpu_agent_,
                max_size, host_signal_fwd);
    ErrorCheck(err_);
    if (bidir) {
      copy_buffer(buf_src_rev, src_agent_rev,
                  host_src_rev, cpu_agent_,
                  max_size, host_signal_rev);
      ErrorCheck(err_);
    }
  }

  // Bind the number of iterations
  uint32_t iterations = GetIterationNum();

  // Iterate through the differnt buffer sizes to
  // compute the bandwidth as determined by copy
  for (uint32_t idx = 0; idx < size_len; idx++) {
    
    // This should not be happening
    uint32_t curr_size = size_list_[idx] * 1024 * 1024;
    if (curr_size > max_size) {
      break;
    }

    std::vector<double> cpu_time;
    std::vector<double> gpu_time;
    for (uint32_t it = 0; it < iterations; it++) {
      #if DEBUG
      printf(".");
      fflush(stdout);
      #endif

      hsa_signal_store_relaxed(signal_fwd, 1);
      if (bidir) {
        hsa_signal_store_relaxed(signal_rev, 1);
      }

      if (verify_) {
        AcquireAccess(src_agent_fwd, buf_dst_fwd);
        AcquireAccess(dst_agent_fwd, buf_src_fwd);
        if (bidir) {
          AcquireAccess(src_agent_rev, buf_dst_rev);
          AcquireAccess(dst_agent_rev, buf_src_rev);
        }
      }

      // Create a timer object and reset signals
      PerfTimer timer;
      uint32_t index = timer.CreateTimer();

      // Start the timer and launch forward copy operation
      timer.StartTimer(index);
      err_ = hsa_amd_memory_async_copy(buf_dst_fwd, dst_agent_fwd,
                                       buf_src_fwd, src_agent_fwd,
                                       curr_size, 0, NULL, signal_fwd);
      ErrorCheck(err_);

      // Launch reverse copy operation if it is bidirectional
      if (bidir) {
        err_ = hsa_amd_memory_async_copy(buf_dst_rev, dst_agent_rev,
                                         buf_src_rev, src_agent_rev,
                                         curr_size, 0, NULL, signal_rev);
        ErrorCheck(err_);
      }

      // Wait for the forward copy operation to complete
      while (hsa_signal_wait_acquire(signal_fwd, HSA_SIGNAL_CONDITION_LT, 1,
                                     uint64_t(-1), HSA_WAIT_STATE_ACTIVE));

      // Wait for the reverse copy operation to complete
      if (bidir) {
        while (hsa_signal_wait_acquire(signal_rev, HSA_SIGNAL_CONDITION_LT, 1,
                                       uint64_t(-1), HSA_WAIT_STATE_ACTIVE));
      }

      // Stop the timer object
      timer.StopTimer(index);

      // Push the time taken for copy into a vector of copy times
      cpu_time.push_back(timer.ReadTimer(index));

      // Collect time from the signal(s)
      if (trans.copy.uses_gpu_) {
        double temp = GetGpuCopyTime(bidir, signal_fwd, signal_rev);
        gpu_time.push_back(temp);
      }

      if (verify_) {

        // Re-Establish access to destination buffer and host buffer
        AcquireAccess(cpu_agent_, buf_dst_fwd);
        AcquireAccess(dst_agent_fwd, host_dst_fwd);
        
        // Init dst buffer with values from outbuffer of copy operation
        hsa_signal_store_relaxed(host_signal_fwd, 1);
        copy_buffer(host_dst_fwd, cpu_agent_,
                    buf_dst_fwd, dst_agent_fwd,
                    curr_size, host_signal_fwd);
        ErrorCheck(err_);
        
        // Compare output equals input
        err_ = (hsa_status_t)memcmp(host_src_fwd, host_dst_fwd, curr_size);
        ErrorCheck(err_);

        if (bidir) {

          // Re-Establish access to destination buffer and host buffer
          AcquireAccess(cpu_agent_, buf_dst_rev);
          AcquireAccess(dst_agent_rev, host_dst_rev);

          hsa_signal_store_relaxed(host_signal_rev, 1);
          copy_buffer(host_dst_rev, cpu_agent_,
                      buf_dst_rev, dst_agent_rev,
                      curr_size, host_signal_rev);
          ErrorCheck(err_);
        
          // Compare output equals input
          err_ = (hsa_status_t)memcmp(host_src_rev, host_dst_rev, curr_size);
          ErrorCheck(err_);
        }
      }
    }
    #if DEBUG
    std::cout << std::endl;
    #endif

    // Get Cpu min copy time
    trans.cpu_min_time_.push_back(GetMinTime(cpu_time));
    // Get Cpu mean copy time and store to the array
    trans.cpu_avg_time_.push_back(GetMeanTime(cpu_time));

    if (trans.copy.uses_gpu_) {
      // Get Gpu min copy time
      trans.gpu_min_time_.push_back(GetMinTime(gpu_time));
      // Get Gpu mean copy time and store to the array
      trans.gpu_avg_time_.push_back(GetMeanTime(gpu_time));
    }

    // Clear the stack of cpu times
    cpu_time.clear();
    gpu_time.clear();
  }
  
  // Free up buffers and signal objects used in copy operation
  ReleaseBuffers(bidir, buf_src_fwd, buf_src_rev,
                 buf_dst_fwd, buf_dst_rev, signal_fwd, signal_rev);
  
  if (verify_) {
    ReleaseBuffers(bidir, host_src_fwd, host_src_rev,
                   host_dst_fwd, host_dst_rev, host_signal_fwd, host_signal_rev);
  }
}

void RocmAsync::Run() {

  // Enable profiling of Async Copy Activity
  err_ = hsa_amd_profiling_async_copy_enable(true);
  ErrorCheck(err_);

  // Iterate through the list of transactions and execute them
  uint32_t trans_size = trans_list_.size();
  for (uint32_t idx = 0; idx < trans_size; idx++) {
    async_trans_t& trans = trans_list_[idx];
    if ((trans.req_type_ == REQ_COPY_BIDIR) ||
        (trans.req_type_ == REQ_COPY_UNIDIR) ||
        (trans.req_type_ == REQ_COPY_ALL_BIDIR) ||
        (trans.req_type_ == REQ_COPY_ALL_UNIDIR)) {
      RunCopyBenchmark(trans);
      ComputeCopyTime(trans);
    }
    if ((trans.req_type_ == REQ_READ) ||
        (trans.req_type_ == REQ_WRITE)) {
      RunIOBenchmark(trans);
    }
  }

  // Disable profiling of Async Copy Activity
  err_ = hsa_amd_profiling_async_copy_enable(false);
  ErrorCheck(err_);

}

void RocmAsync::Close() {
  hsa_status_t status = hsa_shut_down();
  ErrorCheck(status);
  return;
}

// Sets up the bandwidth test object to enable running
// the various test scenarios requested by user. The
// things this proceedure takes care of are:
//    
//    Parse user arguments
//    Discover RocR Device Topology
//    Determine validity of requested test scenarios
//    Build the list of transactions to execute
//    Miscellaneous
//
void RocmAsync::SetUp() {

  // Parse user arguments
  ParseArguments();

  // Validate input parameters
  bool status = ValidateArguments();
  if (status == false) {
    PrintHelpScreen();
    exit(1);
  }

  // Build list of transactions (copy, read, write) to execute
  status = BuildTransList();
  if (status == false) {
    PrintHelpScreen();
    exit(1);
  }
}

RocmAsync::RocmAsync(int argc, char** argv) : BaseTest() {
  usr_argc_ = argc;
  usr_argv_ = argv;
  verify_ = false;
  pool_index_ = 0;
  agent_index_ = 0;
  req_read_ = REQ_INVALID;
  req_write_ = REQ_INVALID;
  req_copy_bidir_ = REQ_INVALID;
  req_copy_unidir_ = REQ_INVALID;
  req_copy_all_bidir_ = REQ_INVALID;
  req_copy_all_unidir_ = REQ_INVALID;
}

RocmAsync::~RocmAsync() { }


================================================
FILE: rocrtst/samples/rocm_async/rocm_async.hpp
================================================
#ifndef __ROCM_ASYNC_BW_H__
#define __ROCM_ASYNC_BW_H__

#include "hsa/hsa.h"
#include "base_test.hpp"
#include "hsatimer.hpp"
#include "common.hpp"
#include <vector>

using namespace std;

// Structure to encapsulate a RocR agent and its index in a list
typedef struct agent_info {

  agent_info(hsa_agent_t agent,
             uint32_t index, hsa_device_type_t device_type) {
    agent_ = agent;
    index_ = index;
    device_type_ = device_type;
  }

  agent_info() {}
  
  uint32_t index_;
  hsa_agent_t agent_;
  hsa_device_type_t device_type_;

} agent_info_t;

typedef struct pool_info {

  pool_info(hsa_agent_t agent, uint32_t agent_index,
            hsa_amd_memory_pool_t pool, hsa_amd_segment_t segment,
            size_t size, size_t alloc_max_size, uint32_t index,
            bool is_fine_grained, bool is_kernarg, bool access_to_all,
            hsa_amd_memory_pool_access_t owner_access) {

    pool_ = pool;
    index_ = index;
    segment_ = segment;
    owner_agent_ = agent;
    agent_index_ = agent_index;
    size_ = size;
    allocable_size_ = alloc_max_size;
    is_kernarg_ = is_kernarg;
    owner_access_ = owner_access;
    access_to_all_ = access_to_all;
    is_fine_grained_ = is_fine_grained;
  }

  pool_info() {}

  uint32_t index_;
  bool is_kernarg_;
  bool access_to_all_;
  bool is_fine_grained_;
  size_t size_;
  size_t allocable_size_;
  uint32_t agent_index_;
  hsa_agent_t owner_agent_;
  hsa_amd_segment_t segment_;
  hsa_amd_memory_pool_t pool_;
  hsa_amd_memory_pool_access_t owner_access_;

} pool_info_t;

// Used to print out topology info
typedef struct agent_pool_info {

  agent_pool_info() {}
  
  agent_info agent;
  
  vector<pool_info_t> pool_list;

} agent_pool_info_t;

typedef struct async_trans {

  uint32_t req_type_;
  union {
    struct {
      bool bidir_;
      bool uses_gpu_;
      uint32_t src_idx_;
      uint32_t dst_idx_;
      hsa_amd_memory_pool_t src_pool_;
      hsa_amd_memory_pool_t dst_pool_;
    } copy;
    struct {
      void* code_;
      uint32_t agent_idx_;
      hsa_agent_t agent_;
      uint32_t pool_idx_;
      hsa_amd_memory_pool_t pool_;
    } kernel;
  };

  // Cpu BenchMark average copy time
  vector<double> cpu_avg_time_;

  // Cpu Min time
  vector<double> cpu_min_time_;

  // Gpu BenchMark average copy time
  vector<double> gpu_avg_time_;

  // Gpu Min time
  vector<double> gpu_min_time_;

  // BenchMark's Average copy time and average bandwidth
  vector<double> avg_time_;
  vector<double> avg_bandwidth_;

  // BenchMark's Min copy time and peak bandwidth
  vector<double> min_time_;
  vector<double> peak_bandwidth_;

  async_trans(uint32_t req_type) { req_type_ = req_type; }
} async_trans_t;

typedef enum Request_Type {

  REQ_READ = 1,
  REQ_WRITE = 2,
  REQ_COPY_BIDIR = 3,
  REQ_COPY_UNIDIR = 4,
  REQ_COPY_ALL_BIDIR = 5,
  REQ_COPY_ALL_UNIDIR = 6,
  REQ_INVALID = 7,

} Request_Type;

class RocmAsync : public BaseTest {

 public:

  // @brief: Constructor for test case of RocmAsync
  RocmAsync(int argc, char** argv);

  // @brief: Destructor for test case of RocmAsync
  virtual ~RocmAsync();

  // @brief: Setup the environment for measurement
  virtual void SetUp();

  // @brief: Core measurement execution
  virtual void Run();

  // @brief: Clean up and retrive the resource
  virtual void Close();

  // @brief: Display the results
  virtual void Display() const;

 private:

  // @brief: Print Help Menu Screen
  void PrintHelpScreen();

  // @brief: Discover the topology of pools on Rocm Platform
  void DiscoverTopology();

  // @brief: Print topology info
  void PrintTopology();

  // @brief: Print info on agents in system
  void PrintAgentsList();

  // @brief: Print info on memory pools in system
  void PrintPoolsList();

  // @brief: Parse the arguments provided by user to
  // build list of transactions
  void ParseArguments();
  
  // @brief: Print the list of transactions
  void PrintTransList();

  // @brief: Run read/write requests of users
  void RunIOBenchmark(async_trans_t& trans);

  // @brief: Run copy requests of users
  void RunCopyBenchmark(async_trans_t& trans);

  // @brief: Get iteration number
  uint32_t GetIterationNum();

  // @brief: Get the mean copy time
  double GetMeanTime(std::vector<double>& vec);

  // @brief: Get the min copy time
  double GetMinTime(std::vector<double>& vec);

  // @brief: Dispaly Benchmark result
  void DisplayIOTime(async_trans_t& trans) const;
  void DisplayCopyTime(async_trans_t& trans) const;
  void DisplayCopyTimeMatrix() const;

  private:

  // @brief: Validate the arguments passed in by user
  bool ValidateArguments();
  bool ValidateReadReq();
  bool ValidateWriteReq();
  bool ValidateReadOrWriteReq(vector<uint32_t>& in_list);
  
  bool ValidateBidirCopyReq();
  bool ValidateUnidirCopyReq();
  bool ValidateCopyReq(vector<uint32_t>& in_list);
  void PrintIOAccessError(uint32_t agent_idx, uint32_t pool_idx);
  void PrintCopyAccessError(uint32_t src_pool_idx, uint32_t dst_pool_idx);
  
  bool PoolIsPresent(vector<uint32_t>& in_list);
  bool PoolIsDuplicated(vector<uint32_t>& in_list);

  // @brief: Builds a list of transaction per user request
  void ComputeCopyTime(async_trans_t& trans);
  bool BuildTransList();
  bool BuildReadTrans();
  bool BuildWriteTrans();
  bool BuildBidirCopyTrans();
  bool BuildUnidirCopyTrans();
  bool BuildAllPoolsBidirCopyTrans();
  bool BuildAllPoolsUnidirCopyTrans();
  bool BuildReadOrWriteTrans(uint32_t req_type,
                             vector<uint32_t>& in_list);
  bool BuildCopyTrans(uint32_t req_type,
                      vector<uint32_t>& src_list,
                      vector<uint32_t>& dst_list);

  void AllocateCopyBuffers(bool bidir, uint32_t size,
                           void*& src_fwd, hsa_amd_memory_pool_t src_pool_fwd,
                           void*& dst_fwd, hsa_amd_memory_pool_t dst_pool_fwd,
                           hsa_agent_t src_agent_fwd, hsa_agent_t dst_agent_fwd,
                           void*& src_rev, hsa_amd_memory_pool_t src_pool_rev,
                           void*& dst_rev, hsa_amd_memory_pool_t dst_pool_rev,
                           hsa_agent_t src_agent_rev, hsa_agent_t dst_agent_rev,
                           hsa_signal_t& signal_fwd, hsa_signal_t& signal_rev);
  void ReleaseBuffers(bool bidir,
                      void* src_fwd, void* src_rev,
                      void* dst_fwd, void* dst_rev,
                      hsa_signal_t signal_fwd, hsa_signal_t signal_rev);
  double GetGpuCopyTime(bool bidir, hsa_signal_t signal_fwd, hsa_signal_t signal_rev);
  void AllocateHostBuffers(bool bidir, uint32_t size,
                                    void*& src_fwd, void*& dst_fwd,
                                    void* buf_src_fwd, void* buf_dst_fwd,
                                    hsa_agent_t src_agent_fwd, hsa_agent_t dst_agent_fwd,
                                    void*& src_rev, void*& dst_rev,
                                    void* buf_src_rev, void* buf_dst_rev,
                                    hsa_agent_t src_agent_rev, hsa_agent_t dst_agent_rev,
                                    hsa_signal_t& signal_fwd, hsa_signal_t& signal_rev);
  void copy_buffer(void* dst, hsa_agent_t dst_agent,
                   void* src, hsa_agent_t src_agent,
                   size_t size, hsa_signal_t signal);

  // @brief: Check if agent and access memory pool, if so, set 
  // access to the agent, if not, exit
  void AcquireAccess(hsa_agent_t agent, void* ptr);

  // Functions to find agents and memory pools and udpate
  // relevant data structures used to maintain system topology
  friend hsa_status_t AgentInfo(hsa_agent_t agent, void* data);
  friend hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data);

 protected:
  
  // More variables declared for testing
  // vector<transaction> tran_;

  // Used to help count agent_info
  uint32_t agent_index_;

  // List used to store agent info, indexed by agent_index_
  vector<agent_info_t> agent_list_;

  // Used to help count pool_info_t
  uint32_t pool_index_;

  // List used to store pool_info_t, indexed by pool_index_
  vector<pool_info_t> pool_list_;

  // List used to store agent_pool_info_t
  vector<agent_pool_info_t> agent_pool_list_;

  // List of agents involved in a bidrectional copy operation
  // Size of the list cannot exceed the number of agents
  // reported by the system
  vector<uint32_t> bidir_list_;

  // List of source agents in a unidrectional copy operation
  // Size of the list cannot exceed the number of agents
  // reported by the system
  vector<uint32_t> src_list_;

  // List of destination agents in a unidrectional copy operation
  // Size of the list cannot exceed the number of agents
  // reported by the system
  vector<uint32_t> dst_list_;

  // List of agents involved in read operation. Has
  // two agents, the first agent hosts the memory pool
  // while the second agent executes the read operation
  vector<uint32_t> read_list_;
  
  // List of agents involved in write operation. Has
  // two agents, the first agent hosts the memory pool
  // while the second agent executes the write operation
  vector<uint32_t> write_list_;
  
  // List of sizes to use in copy and read/write transactions
  // Size is specified in terms of Megabytes
  vector<uint32_t> size_list_;

  // Type of service requested by user
  uint32_t req_read_;
  uint32_t req_write_;
  uint32_t req_copy_bidir_;
  uint32_t req_copy_unidir_;
  uint32_t req_copy_all_bidir_;
  uint32_t req_copy_all_unidir_;

  // List used to store transactions per user request
  vector<async_trans_t> trans_list_;

  // List used to store transactions involving Cpu-Gpu pools
  vector<async_trans_t> matrix_trans_list_;

  // Variable to store argument number

  // Variable to store argument number

  // Variable to store argument number
  uint32_t usr_argc_;

  // Pointer to store address of argument text
  char** usr_argv_;

  // BenchMark copy time
  vector<double> op_time_;

  // Min time
  vector<double> min_time_;

  // Determines if user has requested verification
  bool verify_;

  // CPU agent used for verification
  hsa_agent_t cpu_agent_;

  // System region
  hsa_amd_memory_pool_t sys_pool_;
 
  static const uint32_t SIZE_LIST[4];
  //static const uint32_t SIZE_LIST[9];

};

#endif


================================================
FILE: rocrtst/samples/rocm_async/rocm_async_io.cpp
================================================

#include "common.hpp"
#include "rocm_async.hpp"

#include <stdlib.h>
#include <assert.h>
#include <algorithm>
#include <unistd.h>
#include <cctype>
#include <sstream>

void RocmAsync::RunIOBenchmark(async_trans_t& trans) {

  std::cout << "Unsupported Request - Read / Write" << std::endl;
  exit(1);
}


================================================
FILE: rocrtst/samples/rocm_async/rocm_async_parse.cpp
================================================
#include "common.hpp"
#include "rocm_async.hpp"

#include <algorithm>
#include <sstream>
#include <unistd.h>

// Parse option value string. The string has one more decimal
// values separated by comma - "3,6,9,12,15".
static bool ParseOptionValue(char* value, vector<uint32_t>&value_list) {
 
  // Capture the option value string
  std::stringstream stream;
  stream << value;
  
  uint32_t token = 0x11231926;
  do {
    
    // Read the option value
    stream >> token;

    // Update output list with values
    value_list.push_back(token);

    // Ignore the delimiter
    if((stream.eof()) ||
       (stream.peek() == ',')) {
      stream.ignore();
    } else {
      return false;
    }

  } while (!stream.eof());

  return true;
}

void RocmAsync::ParseArguments() {

  bool print_help = false;
  bool copy_all_bi = false;
  bool copy_all_uni = false;
  bool print_topology = false;

  // This will suppress prints from getopt implementation
  // In case of error, it will return the character '?' as
  // return value.
  opterr = 0;
  
  int opt;
  bool status;
  while ((opt = getopt(usr_argc_, usr_argv_, "hvtaAb:s:d:r:w:m:")) != -1) {
    switch (opt) {

      // Print help screen
      case 'h':
        print_help = true;
        break;

      // Print system topology
      case 't':
        print_topology = true;
        break;

      // Set verification flag to true
      case 'v':
        verify_ = true;
        break;

      // Collect list of agents involved in bidirectional copy operation
      case 'b':
        status = ParseOptionValue(optarg, bidir_list_);
        if (status) {
          req_copy_bidir_ = REQ_COPY_BIDIR;
          break;
        }
        print_help = true;
        break;

      // Collect list of source pools involved in unidirectional copy operation
      case 's':
        status = ParseOptionValue(optarg, src_list_);
        if (status) {
          req_copy_unidir_ = REQ_COPY_UNIDIR;
          break;
        }
        print_help = true;
        break;

      // Collect list of destination pools involved in unidirectional copy operation
      case 'd':
        status = ParseOptionValue(optarg, dst_list_);
        if (status) {
          req_copy_unidir_ = REQ_COPY_UNIDIR;
          break;
        }
        print_help = true;
        break;

      // Collect request to read a buffer
      case 'r':
        req_read_ = REQ_READ;
        status = ParseOptionValue(optarg, read_list_);
        if (status == false) {
          print_help = true;
        }
        break;

      // Collect request to write a buffer
      case 'w':
        req_write_ = REQ_WRITE;
        status = ParseOptionValue(optarg, write_list_);
        if (status == false) {
          print_help = true;
        }
        break;

      // Size of buffers to use in copy and read/write operations
      case 'm':
        status = ParseOptionValue(optarg, size_list_);
        if (status == false) {
          print_help = true;
        }
        break;

      // Enable Unidirectional copy among all valid pools
      case 'a':
        copy_all_uni = true;
        req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
        break;

      // Enable Bidirectional copy among all valid pools
      case 'A':
        copy_all_bi = true;
        req_copy_all_bidir_ = REQ_COPY_ALL_BIDIR;
        break;

      // getopt implementation returns the value of the unknown
      // option or an option with missing operand in the variable
      // optopt
      case '?':
        std::cout << "Value of optopt is: " << '?' << std::endl;
        if ((optopt == 'b' || optopt == 's' || optopt == 'd' || optopt == 'e')) {
          std::cout << "Error: Option -b -s -d and -e require argument" << std::endl;
        }
        print_help = true;
        break;
      default:
        print_help = true;
        break;
    }
  }
  
  // Print help screen if user option has "-h"
  if (print_help) {
    PrintHelpScreen();
    exit(0);
  }
  
  // Initialize Roc Runtime
  err_ = hsa_init();
  ErrorCheck(err_);

  // Discover the topology of RocR agent in system
  DiscoverTopology();
  
  // Print system topology if user option has "-t"
  if (print_topology) {
    PrintTopology();
    exit(0);
  }

  // Invalidate request if user has requested full
  // copying for both unidirectional and bidirectional
  if ((copy_all_bi) && (copy_all_uni)) {
    PrintHelpScreen();
    exit(0);
  }

  // Initialize pool list if full copying in unidirectional mode is enabled
  if (copy_all_uni) {
    uint32_t size = pool_list_.size();
    for (uint32_t idx = 0; idx < size; idx++) {
      src_list_.push_back(idx);
      dst_list_.push_back(idx);
    }
  }

  // Initialize pool list if full copying in bidirectional mode is enabled
  if (copy_all_bi) {
    uint32_t size = pool_list_.size();
    for (uint32_t idx = 0; idx < size; idx++) {
      bidir_list_.push_back(idx);
    }
  }

  // Initialize the list of buffer sizes to use in copy/read/write operations
  // For All Copy operations use only one buffer size
  if (size_list_.size() == 0) {
    uint32_t size_len = sizeof(SIZE_LIST)/sizeof(uint32_t);
    for (uint32_t idx = 0; idx < size_len; idx++) {
      if ((copy_all_bi) || (copy_all_uni)) {
        if (idx == 0) {
          size_list_.push_back(SIZE_LIST[idx]);
        }
      } else {
        size_list_.push_back(SIZE_LIST[idx]);
      }
    }
  }
  std::sort(size_list_.begin(), size_list_.end());
}


================================================
FILE: rocrtst/samples/rocm_async/rocm_async_print.cpp
================================================
#include "common.hpp"
#include "rocm_async.hpp"

// @Brief: Print Help Menu Screen
void RocmAsync::PrintHelpScreen() {

  std::cout << std::endl;
  std::cout << "Runs with following options:" << std::endl;
  std::cout << std::endl;
  std::cout << "\t -h Prints the help screen" << std::endl;
  std::cout << "\t -g Prints Gpu times for transfers" << std::endl;
  std::cout << "\t -t Prints system topology and its memory pools" << std::endl;
  std::cout << "\t -m List of buffer sizes to use, specified in Megabytes" << std::endl;
  std::cout << "\t -r List of pool,agent pairs engaged in Read operation" << std::endl;
  std::cout << "\t -w List of pool,agent pairs engaged in Write operation" << std::endl;
  std::cout << "\t -b List pools to use in bidirectional copy operations" << std::endl;
  std::cout << "\t -s List of source pools to use in copy unidirectional operations" << std::endl;
  std::cout << "\t -d List of destination pools to use in unidirectional copy operations" << std::endl;
  std::cout << "\t -a Perform Unidirectional Copy involving all pool combinations" << std::endl;
  std::cout << "\t -A Perform Bidirectional Copy involving all pool combinations" << std::endl;
  std::cout << std::endl;
  
  std::cout << std::endl;
  std::cout << "\t @note 1: Removes copyReq(srcI, dstJ) - where either Src or Dst Pool is fine-grained" << std::endl;
  std::cout << std::endl;
  std::cout << "\t @note 2: Treats copyReq(dstI, srcJ) as NOT EQUAL to copyReq(dstJ, srcI) " << std::endl;
  std::cout << "\t            Underlying copy engine could be different " << std::endl;
  std::cout << std::endl;

  /*
  std::cout << "\t @note 1: Removes copyReq(srcI, dstI) - where Src & Dst Pools are same" << std::endl;
  std::cout << std::endl;
  std::cout << "\t @note 2: Removes copyReq(srcI, dstJ) - where Src & Dst Pools are Cpu bound" << std::endl;
  std::cout << std::endl;
  std::cout << "\t @note 3: Removes copyReq(srcI, dstJ) - where either Src or Dst Pool is fine-grained" << std::endl;
  std::cout << std::endl;
  std::cout << "\t @note 4: Treats copyReq(dstI, srcJ) as NOT EQUAL to copyReq(dstJ, srcI) " << std::endl;
  std::cout << "\t            Underlying copy engine could be different " << std::endl;
  std::cout << std::endl;
  */
}

// @brief: Print the topology of Memory Pools and Agents present in system
void RocmAsync::PrintTopology() {

  size_t count = agent_pool_list_.size();
  std::cout << std::endl;
  for (uint32_t idx = 0; idx < count; idx++) {
    agent_pool_info_t node = agent_pool_list_.at(idx);

    // Print agent info
    std::cout << "Agent: " << node.agent.index_ << std::endl;
    if (HSA_DEVICE_TYPE_CPU == node.agent.device_type_)
      std::cout << "  Agent Device Type:                            CPU" << std::endl;
    else if (HSA_DEVICE_TYPE_GPU == node.agent.device_type_)
      std::cout << "  Agent Device Type:                            GPU" << std::endl;

    // Print pool info
    size_t pool_count = node.pool_list.size();
    for (uint32_t jdx = 0; jdx < pool_count; jdx++) {
      std::cout << "    Memory Pool:                                "
           << node.pool_list.at(jdx).index_ << std::endl;
      std::cout << "        max allocable size in KB:               "
           << node.pool_list.at(jdx).allocable_size_ / 1024 << std::endl;
      std::cout << "        segment id:                             "
           << node.pool_list.at(jdx).segment_ << std::endl;
      std::cout << "        is kernarg:                             "
           << node.pool_list.at(jdx).is_kernarg_ << std::endl;
      std::cout << "        is fine-grained:                        "
           << node.pool_list.at(jdx).is_fine_grained_ << std::endl;
      std::cout << "        accessible to owner:                    "
           << node.pool_list.at(jdx).owner_access_ << std::endl;
      std::cout << "        accessible to all by default:           "
           << node.pool_list.at(jdx).access_to_all_ << std::endl;
    }
    std::cout << std::endl;
  }
  std::cout << std::endl;
}

// @brief: Print info on agents in system
void RocmAsync::PrintAgentsList() {

  size_t count = agent_pool_list_.size();
  for (uint32_t idx = 0; idx < count; idx++) {
    std::cout << std::endl;
    agent_pool_info_t node = agent_pool_list_.at(idx);
    std::cout << "Agent: " << node.agent.index_ << std::endl;
    if (HSA_DEVICE_TYPE_CPU == node.agent.device_type_)
      std::cout << "  Agent Device Type:            CPU" << std::endl;
    else if (HSA_DEVICE_TYPE_GPU == node.agent.device_type_)
      std::cout << "  Agent Device Type:           GPU" << std::endl;
  }
  std::cout << std::endl;
}

// @brief: Print info on memory pools in system
void RocmAsync::PrintPoolsList() {

  size_t pool_count = pool_list_.size();
  for (uint32_t jdx = 0; jdx < pool_count; jdx++) {
    std::cout << std::endl;
    std::cout << "Memory Pool Idx:                          "
         << pool_list_.at(jdx).index_ << std::endl;
    std::cout << "  max allocable size in KB:               "
         << pool_list_.at(jdx).allocable_size_ / 1024 << std::endl;
    std::cout << "  segment id:                             "
         << pool_list_.at(jdx).segment_ << std::endl;
    std::cout << "  is kernarg:                             "
         << pool_list_.at(jdx).is_kernarg_ << std::endl;
    std::cout << "  is fine-grained:                        "
         << pool_list_.at(jdx).is_fine_grained_ << std::endl;
    std::cout << "  accessible to owner:                    "
         << pool_list_.at(jdx).owner_access_ << std::endl;
    std::cout << "  accessible to all by default:           "
         << pool_list_.at(jdx).access_to_all_ << std::endl;
  }
  std::cout << std::endl;

}

// @brief: Print the list of transactions that will be executed
void RocmAsync::PrintTransList() {

  size_t count = trans_list_.size();
  for (uint32_t idx = 0; idx < count; idx++) {
    async_trans_t trans = trans_list_.at(idx);
    std::cout << std::endl;
    std::cout << "                 Transaction Id: " << idx << std::endl;
    std::cout << "               Transaction Type: " << trans.req_type_ << std::endl;
    if ((trans.req_type_ == REQ_READ) || (trans.req_type_ == REQ_WRITE)) {
      std::cout << "Rocm Kernel used by Transaction: " << trans.kernel.code_ << std::endl;
      std::cout << "Rocm Memory Pool Used by Kernel: " << trans.kernel.pool_idx_ << std::endl;
      std::cout << "  Rocm Agent used for Execution: " << trans.kernel.agent_idx_ << std::endl;
    }
    if ((trans.req_type_ == REQ_COPY_BIDIR) || (trans.req_type_ == REQ_COPY_UNIDIR)) {
      std::cout << "   Src Memory Pool used in Copy: " << trans.copy.src_idx_ << std::endl;
      std::cout << "   Dst Memory Pool used in Copy: " << trans.copy.dst_idx_ << std::endl;
    }

  }
  std::cout << std::endl;
}

// @brief: Prints error message when a request to copy between
// source pool and destination pool is not possible
void RocmAsync::PrintCopyAccessError(uint32_t src_idx, uint32_t dst_idx) {

  // Retrieve Roc runtime handles for Src memory pool and agents
  uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
  hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
    
  // Retrieve Roc runtime handles for Dst memory pool and agents
  uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
  hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;

  std::cout << std::endl;
  std::cout << "Index of Src Pool: " << src_idx << std::endl;
  std::cout << "Index of Dst Pool: " << dst_idx << std::endl;
  std::cout << "Index of Src Pool's Agent: " << src_dev_idx << std::endl;
  std::cout << "Index of Dst Pool's Agent: " << dst_dev_idx << std::endl;
  std::cout << "Device Type of Src Pool's Agent: " << src_dev_type << std::endl;
  std::cout << "Device Type of Dst Pool's Agent: " << dst_dev_type << std::endl;
  std::cout << "Rocm Agent hosting Src Pool cannot ACCESS Dst Pool" << std::endl;
  std::cout << std::endl;
}

// @brief: Prints error message when a request to read / write from
// a pool by an agent is not possible
void RocmAsync::PrintIOAccessError(uint32_t exec_idx, uint32_t pool_idx) {

  // Retrieve device type of executing agent
  hsa_device_type_t exec_dev_type = agent_list_[exec_idx].device_type_;
    
  // Retrieve device type of memory pool's agent
  uint32_t pool_dev_idx = pool_list_[pool_idx].agent_index_;
  hsa_device_type_t pool_dev_type = agent_list_[pool_dev_idx].device_type_;

  std::cout << std::endl;
  std::cout << "Index of Executing Agent: " << exec_idx << std::endl;
  std::cout << "Device Type of Executing Agent: " << exec_dev_type << std::endl;
  
  std::cout << "Index of Buffer's Memory Pool: " << pool_idx << std::endl;
  std::cout << "Index of Buffer Memory Pool's Agent: " << pool_dev_idx << std::endl;
  std::cout << "Device Type of Buffer Memory Pool's Agent: " << pool_dev_type << std::endl;
  std::cout << "Rocm Agent executing Read / Write request cannot ACCESS Buffer's Memory Pool" << std::endl;
  std::cout << std::endl;
}


================================================
FILE: rocrtst/samples/rocm_async/rocm_async_report.cpp
================================================
#include "common.hpp"
#include "rocm_async.hpp"

#include <iomanip>
#include <sstream>
#include <algorithm>

static void printRecord(uint32_t size, double avg_time,
                        double bandwidth, double min_time,
                        double peak_bandwidth) {

  std::stringstream size_str;
  size_str << size << " MB";

  uint32_t format = 15;
  std::cout.precision(3);
  std::cout.width(format);
  std::cout << size_str.str();
  std::cout.width(format);
  std::cout << (avg_time * 1e6);
  std::cout.width(format);
  std::cout << bandwidth;
  std::cout.width(format);
  std::cout << (min_time * 1e6);
  std::cout.width(format);
  std::cout << peak_bandwidth;
  std::cout << std::endl;
}

static void printCopyBanner(uint32_t src_pool_id, uint32_t src_agent_type,
                            uint32_t dst_pool_id, uint32_t dst_agent_type) {

  std::stringstream src_type;
  std::stringstream dst_type;
  (src_agent_type == 0) ? src_type <<  "Cpu" : src_type << "Gpu";
  (dst_agent_type == 0) ? dst_type <<  "Cpu" : dst_type << "Gpu";

  std::cout << std::endl;
  std::cout << "================";
  std::cout << "           Benchmark Result";
  std::cout << "         ================";
  std::cout << std::endl;
  std::cout << "================";
  std::cout << " Src Pool Id: " << src_pool_id;
  std::cout << " Src Agent Type: " << src_type.str();
  std::cout << " ================";
  std::cout << std::endl;
  std::cout << "================";
  std::cout << " Dst Pool Id: " << dst_pool_id;
  std::cout << " Dst Agent Type: " << dst_type.str();
  std::cout << " ================";
  std::cout << std::endl;
  std::cout << std::endl;

  uint32_t format = 15;
  std::cout.setf(ios::left);
  std::cout.width(format);
  std::cout << "Data Size";
  std::cout.width(format);
  std::cout << "Avg Time(us)";
  std::cout.width(format);
  std::cout << "Avg BW(GB/s)";
  std::cout.width(format);
  std::cout << "Min Time(us)";
  std::cout.width(format);
  std::cout << "Peak BW(GB/s)";
  std::cout << std::endl;
}

double RocmAsync::GetMinTime(std::vector<double>& vec) {

  std::sort(vec.begin(), vec.end());
  return vec.at(0);
}

double RocmAsync::GetMeanTime(std::vector<double>& vec) {

  std::sort(vec.begin(), vec.end());
  vec.erase(vec.begin());
  vec.erase(vec.begin(), vec.begin() + num_iteration_ * 0.1);
  vec.erase(vec.begin() + num_iteration_, vec.end());

  double mean = 0.0;
  int num = vec.size();
  for (int it = 0; it < num; it++) {
    mean += vec[it];
  }
  mean /= num;
  return mean;
}

void RocmAsync::Display() const {

  // Iterate through list of transactions and display its timing data
  uint32_t trans_size = trans_list_.size();
  if (trans_size == 0) {
    std::cout << std::endl;
    std::cout << "  One or more of the requests wered filtered out " << std::endl;
    std::cout << "      i.e. No Valid Requests were Made or Remain" << std::endl;
    std::cout << std::endl;
    return;
  }

  if ((req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) ||
      (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
    DisplayCopyTimeMatrix();
    std::cout << std::endl;
    return;
  }

  for (uint32_t idx = 0; idx < trans_size; idx++) {
    async_trans_t trans = trans_list_[idx];
    if ((trans.req_type_ == REQ_COPY_BIDIR) ||
        (trans.req_type_ == REQ_COPY_UNIDIR)) {
      DisplayCopyTime(trans);
    }
    if ((trans.req_type_ == REQ_READ) ||
        (trans.req_type_ == REQ_WRITE)) {
      DisplayIOTime(trans);
    }
  }
  std::cout << std::endl;
}

void RocmAsync::DisplayIOTime(async_trans_t& trans) const {

}

void RocmAsync::DisplayCopyTime(async_trans_t& trans) const {
  
  // Print Benchmark Header
  uint32_t src_idx = trans.copy.src_idx_;
  uint32_t dst_idx = trans.copy.dst_idx_;
  uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
  hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
  uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
  hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
  printCopyBanner(src_idx, src_dev_type, dst_idx, dst_dev_type);
  
  uint32_t size_len = size_list_.size();
  for (uint32_t idx = 0; idx < size_len; idx++) {
    printRecord(size_list_[idx], trans.avg_time_[idx],
                trans.avg_bandwidth_[idx], trans.min_time_[idx],
                trans.peak_bandwidth_[idx]);
  }
}

void RocmAsync::DisplayCopyTimeMatrix() const {
  
  double* avg_matrix = new double[agent_index_ * agent_index_]();
  double* peak_matrix = new double[agent_index_ * agent_index_]();
  uint32_t trans_size = trans_list_.size();
  for (uint32_t idx = 0; idx < trans_size; idx++) {
    async_trans_t trans = trans_list_[idx];
    uint32_t src_idx = trans.copy.src_idx_;
    uint32_t dst_idx = trans.copy.dst_idx_;
    uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
    uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
    avg_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.avg_bandwidth_[0];
    peak_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0];
  }

  uint32_t format = 12;
  std::cout.setf(ios::left);

  std::cout << std::endl;
  std::cout.width(format);
  std::cout << "";
  std::cout.width(format);
  if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
    std::cout << "Peak Bandwidth For Unidirectional Copies GB/sec";
  } else {
    std::cout << "Peak Bandwidth For Bidirectional Copies GB/sec";
  }
  std::cout << std::endl;
  std::cout << std::endl;

  std::cout.width(format);
  std::cout << "";
  std::cout.width(format);
  std::cout << "";
  for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
    std::cout.width(format);
    std::stringstream agent_id;
    agent_id << "Dev-" << idx0;
    std::cout << agent_id.str();
  }
  std::cout << std::endl;
  std::cout << std::endl;
  for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
    std::cout.width(format);
    std::cout << "";
    std::stringstream agent_id;
    agent_id << "Dev-" << idx0;
    std::cout.width(format);
    std::cout << agent_id.str();
    for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
      std::cout.width(format);
      std::cout << peak_matrix[(idx0 * agent_index_) + idx1];
    }
    std::cout << std::endl;
    std::cout << std::endl;
  }
  std::cout << std::endl;

  std::cout.width(format);
  std::cout << "";
  std::cout.width(format);
  if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
    std::cout << "Average Bandwidth For Unidirectional Copies GB/sec";
  } else {
    std::cout << "Average Bandwidth For Bidirectional Copies GB/sec";
  }
  std::cout << std::endl;
  std::cout << std::endl;

  std::cout.width(format);
  std::cout << "";
  std::cout.width(format);
  std::cout << "";
  for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
    std::cout.width(format);
    std::stringstream agent_id;
    agent_id << "Dev-" << idx0;
    std::cout << agent_id.str();
  }
  std::cout << std::endl;
  std::cout << std::endl;
  for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
    std::cout.width(format);
    std::cout << "";
    std::stringstream agent_id;
    agent_id << "Dev-" << idx0;
    std::cout.width(format);
    std::cout << agent_id.str();
    for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
      std::cout.width(format);
      std::cout << avg_matrix[(idx0 * agent_index_) + idx1];
    }
    std::cout << std::endl;
    std::cout << std::endl;
  }
  std::cout << std::endl;

  /*
  std::cout.width(format);
  std::cout << "";
  std::cout << "@note-1: ZERO in Dev-i != Dev-j means DIRECT PATH doesn't exist";
  std::cout << std::endl;
  std::cout.width(format);
  std::cout << "";
  std::cout << "@note-2: ZERO in Dev-i == Dev-j means COPY operation is filtered out";
  std::cout << std::endl;
  std::cout << std::endl;
  */
}


================================================
FILE: rocrtst/samples/rocm_async/rocm_async_topology.cpp
================================================
#include "common.hpp"
#include "rocm_async.hpp"

// @brief: Helper method to iterate throught the memory pools of
// an agent and discover its properties
hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data) {

  hsa_status_t status;
  RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);

  // Query pools' segment, report only pools from global segment
  hsa_amd_segment_t segment;
  status = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
  ErrorCheck(status);
  if (HSA_AMD_SEGMENT_GLOBAL != segment) {
    return HSA_STATUS_SUCCESS;
  }

  // Determine if allocation is allowed in this pool
  // Report only pools that allow an alloction by user
  bool alloc = false;
  status = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
  ErrorCheck(status);
  if (alloc != true) {
    return HSA_STATUS_SUCCESS;
  }

  // Query the pool size
  size_t size = 0;
  status = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
  ErrorCheck(status);

  // Query the max allocatable size
  size_t max_size = 0;
  status = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE, &max_size);
  ErrorCheck(status);

  // Determine if the pools is accessible to all agents
  bool access_to_all = false;
  status = hsa_amd_memory_pool_get_info(pool,
                HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_to_all);
  ErrorCheck(status);

  // Determine type of access to owner agent
  hsa_amd_memory_pool_access_t owner_access;
  hsa_agent_t agent = asyncDrvr->agent_list_.back().agent_;
  status = hsa_amd_agent_memory_pool_get_info(agent, pool,
                         HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &owner_access);
  ErrorCheck(status);

  // Determine if the pool is fine-grained or coarse-grained
  uint32_t flag = 0;
  status = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
  ErrorCheck(status);
  bool is_kernarg = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag);
  bool is_fine_grained = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & flag);

  // Update the pool handle for system memory if kernarg is true
  if (is_kernarg) {
    asyncDrvr->sys_pool_ = pool;
  }

  // Create an instance of agent_pool_info and add it to the list
  pool_info_t pool_info(agent, asyncDrvr->agent_index_, pool,
                        segment, size, max_size, asyncDrvr->pool_index_,
                        is_fine_grained, is_kernarg,
                        access_to_all, owner_access);
  asyncDrvr->pool_list_.push_back(pool_info);

  // Create an agent_pool_infot and add it to its list
  asyncDrvr->agent_pool_list_[asyncDrvr->agent_index_].pool_list.push_back(pool_info);
  asyncDrvr->pool_index_++;

  return HSA_STATUS_SUCCESS;
}

// @brief: Helper method to iterate throught the agents of
// a system and discover its properties
hsa_status_t AgentInfo(hsa_agent_t agent, void* data) {

  RocmAsync* asyncDrvr = reinterpret_cast<RocmAsync*>(data);

  // Get the name of the agent
  char agent_name[64];
  hsa_status_t status;
  status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
  ErrorCheck(status);

  // Get device type
  hsa_device_type_t device_type;
  status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
  ErrorCheck(status);

  // Capture the handle of Cpu agent
  if (device_type == HSA_DEVICE_TYPE_CPU) {
    asyncDrvr->cpu_agent_ = agent;
  }

  asyncDrvr->agent_list_.push_back(agent_info(agent, asyncDrvr->agent_index_, device_type));

  // Contruct an new agent_pool_info structure and add it to the list
  agent_pool_info node;
  node.agent = asyncDrvr->agent_list_.back();
  asyncDrvr->agent_pool_list_.push_back(node);

  status = hsa_amd_agent_iterate_memory_pools(agent, MemPoolInfo, asyncDrvr);
  asyncDrvr->agent_index_++;

  return HSA_STATUS_SUCCESS;
}

void RocmAsync::DiscoverTopology() {
  err_ = hsa_iterate_agents(AgentInfo, this);
}


================================================
FILE: rocrtst/samples/rocm_async/rocm_async_trans.cpp
================================================
#include "common.hpp"
#include "rocm_async.hpp"

bool RocmAsync::BuildReadOrWriteTrans(uint32_t req_type,
                                      vector<uint32_t>& in_list) {
  
  // Validate the list of pool-agent tuples
  hsa_status_t status;
  hsa_amd_memory_pool_access_t access;
  uint32_t list_size = in_list.size();
  for (uint32_t idx = 0; idx < list_size; idx+=2) {
    
    uint32_t pool_idx = in_list[idx];
    uint32_t exec_idx = in_list[idx + 1];
    
    // Retrieve Roc runtime handles for memory pool and agent
    hsa_agent_t exec_agent = agent_list_[exec_idx].agent_;
    hsa_amd_memory_pool_t pool = pool_list_[pool_idx].pool_;
  
    // Determine agent can access the memory pool
    status = hsa_amd_agent_memory_pool_get_info(exec_agent, pool,
                           HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
    ErrorCheck(status);
    
    // Determine if accessibility to agent is not denied
    if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
      PrintIOAccessError(exec_idx, pool_idx);
      return false;
    }

    // Agent has access, build an instance of transaction
    // and add it to the list of transactions
    async_trans_t trans(req_type);
    trans.kernel.code_ = nullptr;
    trans.kernel.pool_ = pool;
    trans.kernel.pool_idx_ = pool_idx;
    trans.kernel.agent_ = exec_agent;
    trans.kernel.agent_idx_ = exec_idx;
    trans_list_.push_back(trans);
  }
  return true;
}

bool RocmAsync::BuildReadTrans() {
  return BuildReadOrWriteTrans(REQ_READ, read_list_);
}

bool RocmAsync::BuildWriteTrans() {
  return BuildReadOrWriteTrans(REQ_WRITE, write_list_);
}

bool RocmAsync::BuildCopyTrans(uint32_t req_type,
                               vector<uint32_t>& src_list,
                               vector<uint32_t>& dst_list) {

  uint32_t src_size = src_list.size();
  uint32_t dst_size = dst_list.size();
  
  hsa_status_t status;
  hsa_amd_memory_pool_access_t access;
  for (uint32_t idx = 0; idx < src_size; idx++) {
    
    // Retrieve Roc runtime handles for Src memory pool and agents
    uint32_t src_idx = src_list[idx];
    hsa_agent_t src_agent = pool_list_[src_idx].owner_agent_;
    hsa_amd_memory_pool_t src_pool = pool_list_[src_idx].pool_;
    uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
    hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;

    // Determine if dst pool is fine grained, if so filter out
    // the transaction
    if ((req_type == REQ_COPY_ALL_BIDIR) ||
        (req_type == REQ_COPY_ALL_UNIDIR)) {
      bool src_fine_grained =  pool_list_[src_idx].is_fine_grained_;
      if (src_fine_grained) {
        continue;
      }
    }

    for (uint32_t jdx = 0; jdx < dst_size; jdx++) {
    
      // Retrieve Roc runtime handles for Dst memory pool and agents
      uint32_t dst_idx = dst_list[jdx];
      hsa_agent_t dst_agent = pool_list_[dst_idx].owner_agent_;
      hsa_amd_memory_pool_t dst_pool = pool_list_[dst_idx].pool_;
      uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
      hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;

      // Determine if dst pool is fine grained, if so filter out
      // the transaction
      if ((req_type == REQ_COPY_ALL_BIDIR) ||
          (req_type == REQ_COPY_ALL_UNIDIR)) {
        bool dst_fine_grained =  pool_list_[dst_idx].is_fine_grained_;
        if (dst_fine_grained) {
          continue;
        }
      }

      // Filter out transaction when Src & Dst pools belong to Cpu
      /*
      if ((src_dev_type == HSA_DEVICE_TYPE_CPU) &&
          (dst_dev_type == HSA_DEVICE_TYPE_CPU)) {
        continue;
      }
      */

      // Filter out transaction with same Src & Dst pools
      /*
      if (src_idx == dst_idx) {
        continue;
      }
      */
      
      // Determine if accessibility to src pool for dst agent is not denied
      status = hsa_amd_agent_memory_pool_get_info(dst_agent, src_pool,
                             HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
      ErrorCheck(status);
      if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
        PrintCopyAccessError(src_idx, dst_idx);
        return false;
      }

      // Determine if accessibility to dst pool for src agent is not denied
      status = hsa_amd_agent_memory_pool_get_info(src_agent, dst_pool,
                             HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
      ErrorCheck(status);
      if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
        return false;
      }

      // Agents have access, build an instance of transaction
      // and add it to the list of transactions
      async_trans_t trans(req_type);
      trans.copy.src_idx_ = src_idx;
      trans.copy.dst_idx_ = dst_idx;
      trans.copy.src_pool_ = src_pool;
      trans.copy.dst_pool_ = dst_pool;
      trans.copy.bidir_ = ((req_type == REQ_COPY_BIDIR) ||
                           (req_type == REQ_COPY_ALL_BIDIR));
      trans.copy.uses_gpu_ = ((src_dev_type == HSA_DEVICE_TYPE_GPU) ||
                              (dst_dev_type == HSA_DEVICE_TYPE_GPU));
      trans_list_.push_back(trans);
    }
  }
  return true;
}

bool RocmAsync::BuildBidirCopyTrans() {
  return BuildCopyTrans(REQ_COPY_BIDIR, bidir_list_, bidir_list_);
}

bool RocmAsync::BuildUnidirCopyTrans() {
  return BuildCopyTrans(REQ_COPY_UNIDIR, src_list_, dst_list_);
}

bool RocmAsync::BuildAllPoolsBidirCopyTrans() {
  return BuildCopyTrans(REQ_COPY_ALL_BIDIR, bidir_list_, bidir_list_);
}

bool RocmAsync::BuildAllPoolsUnidirCopyTrans() {
  return BuildCopyTrans(REQ_COPY_ALL_UNIDIR, src_list_, dst_list_);
}

// @brief: Builds a list of transaction per user request
bool RocmAsync::BuildTransList() {
  
  // Build list of Read transactions per user request
  bool status = false;
  if (req_read_ == REQ_READ) {
    status = BuildReadTrans();
    if (status == false) {
      return status;
    }
  }

  // Build list of Write transactions per user request
  status = false;
  if (req_write_ == REQ_WRITE) {
    status = BuildWriteTrans();
    if (status == false) {
      return status;
    }
  }

  // Build list of Bidirectional Copy transactions per user request
  status = false;
  if (req_copy_bidir_ == REQ_COPY_BIDIR) {
    status = BuildBidirCopyTrans();
    if (status == false) {
      return status;
    }
  }

  // Build list of Unidirectional Copy transactions per user request
  status = false;
  if (req_copy_unidir_ == REQ_COPY_UNIDIR) {
    status = BuildUnidirCopyTrans();
    if (status == false) {
      return status;
    }
  }

  // Build list of All Bidir Copy transactions per user request
  status = false;
  if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
    status = BuildAllPoolsBidirCopyTrans();
    if (status == false) {
      return status;
    }
  }

  // Build list of All Unidir Copy transactions per user request
  status = false;
  if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
    status = BuildAllPoolsUnidirCopyTrans();
    if (status == false) {
      return status;
    }
  }

  // All of the transaction are built up
  return true;
}

void RocmAsync::ComputeCopyTime(async_trans_t& trans) {

  // Get the frequency of Gpu Timestamping
  uint64_t sys_freq = 0;
  hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_freq);
  
  double avg_time = 0;
  double min_time = 0;
  double bandwidth = 0;
  uint32_t data_size = 0;
  double peak_bandwidth = 0;
  uint32_t size_len = size_list_.size();
  for (uint32_t idx = 0; idx < size_len; idx++) {
    
    // Adjust size of data involved in copy
    data_size = size_list_[idx];
    if (trans.copy.bidir_ == true) {
      data_size += size_list_[idx];
    }
    data_size = data_size * 1024 * 1024;

    // Copy operation does not involve a Gpu device
    if (trans.copy.uses_gpu_ != true) {
      avg_time = trans.cpu_avg_time_[idx];
      min_time = trans.cpu_min_time_[idx];
      bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
      peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
    } else {
      avg_time = trans.gpu_avg_time_[idx] / sys_freq;
      min_time = trans.gpu_min_time_[idx] / sys_freq;
      bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
      peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
    }

    trans.min_time_.push_back(min_time);
    trans.avg_time_.push_back(avg_time);
    trans.avg_bandwidth_.push_back(bandwidth);
    trans.peak_bandwidth_.push_back(peak_bandwidth);
  }
}


================================================
FILE: rocrtst/samples/rocm_async/rocm_async_validate.cpp
================================================

#include "common.hpp"
#include "rocm_async.hpp"

#include <assert.h>
#include <algorithm>
#include <unistd.h>
#include <cctype>
#include <sstream>

bool RocmAsync::PoolIsPresent(vector<uint32_t>& in_list) {
  
  bool is_present;
  uint32_t idx1 = 0;
  uint32_t idx2 = 0;
  uint32_t count = in_list.size();
  uint32_t pool_count = pool_list_.size();
  for (idx1 = 0; idx1 < count; idx1++) {
    is_present = false;
    for (idx2 = 0; idx2 < pool_count; idx2++) {
      if (in_list[idx1] == pool_list_[idx2].index_) {
        is_present = true;
        break;
      }
    }
    if (is_present == false) {
      return false;
    }
  }

  return true;
}

bool RocmAsync::PoolIsDuplicated(vector<uint32_t>& in_list) {
  
  uint32_t idx1 = 0;
  uint32_t idx2 = 0;
  uint32_t count = in_list.size();
  for (idx1 = 0; idx1 < count; idx1++) {
    for (idx2 = 0; idx2 < count; idx2++) {
      if ((in_list[idx1] == in_list[idx2]) && (idx1 != idx2)){
        return false;
      }
    }
  }
  return true;
}

bool RocmAsync::ValidateReadOrWriteReq(vector<uint32_t>& in_list) {

  // Determine read / write request is even
  // Request is specified as a list of memory
  // pool, agent tuples - first element identifies
  // memory pool while the second element denotes
  // an agent
  uint32_t list_size = in_list.size();
  if ((list_size % 2) != 0) {
    return false;
  }
  
  // Validate the list of pool-agent tuples
  for (uint32_t idx = 0; idx < list_size; idx+=2) {
    uint32_t pool_idx = in_list[idx];
    uint32_t exec_idx = in_list[idx + 1];
    // Determine the pool and agent exist in system
    if ((pool_idx >= pool_index_) ||
        (exec_idx >= agent_index_)) {
      return false;
    }
  }
  return true;
}

bool RocmAsync::ValidateReadReq() {
  return ValidateReadOrWriteReq(read_list_);
}

bool RocmAsync::ValidateWriteReq() {
  return ValidateReadOrWriteReq(write_list_);
}

bool RocmAsync::ValidateCopyReq(vector<uint32_t>& in_list) {
  
  // Determine pool list length is valid
  uint32_t count = in_list.size();
  uint32_t pool_count = pool_list_.size();
  if (count > pool_count) {
    return false;
  }
  
  // Determine no pool is duplicated
  bool status = PoolIsDuplicated(in_list);
  if (status == false) {
    return false;
  }
  
  // Determine every pool is present in system
  return PoolIsPresent(in_list);
}

bool RocmAsync::ValidateBidirCopyReq() {
  return ValidateCopyReq(bidir_list_);
}

bool RocmAsync::ValidateUnidirCopyReq() {
  return ((ValidateCopyReq(src_list_)) && (ValidateCopyReq(dst_list_)));
}

bool RocmAsync::ValidateArguments() {
  
  // Determine if user has requested a READ
  // operation and gave valid inputs
  bool status = false;
  if (req_read_ == REQ_READ) {
    status = ValidateReadReq();
    if (status == false) {
      return status;
    }
  }

  // Determine if user has requested a WRITE
  // operation and gave valid inputs
  status = false;
  if (req_write_ == REQ_WRITE) {
    status = ValidateWriteReq();
    if (status == false) {
      return status;
    }
  }

  // Determine if user has requested a Copy
  // operation that is bidirectional and gave
  // valid inputs. Same validation is applied
  // for all-to-all unidirectional copy operation
  status = false;
  if ((req_copy_bidir_ == REQ_COPY_BIDIR) ||
      (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
    status = ValidateBidirCopyReq();
    if (status == false) {
      return status;
    }
  }

  // Determine if user has requested a Copy
  // operation that is unidirectional and gave
  // valid inputs. Same validation is applied
  // for all-to-all bidirectional copy operation
  status = false;
  if ((req_copy_unidir_ == REQ_COPY_UNIDIR) ||
      (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
    status = ValidateUnidirCopyReq();
    if (status == false) {
      return status;
    }
  }

  // All of the request are well formed
  return true;
}


================================================
FILE: rocrtst/samples/rocrinfo/rocrinfo.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#include <stdio.h>
#include <vector>
#include <string>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    printf("hsa api call failure at line %d, file: %s. Call returned %d\n", \
                                                   __LINE__, __FILE__, err); \
    return (err); \
  } \
}

// This structure holds system information acquired through hsa info related
// calls, and is later used for reference when displaying the information.
struct system_info_t {
    uint16_t major, minor;
    uint64_t timestamp_frequency = 0;
    uint64_t max_wait = 0;
    hsa_endianness_t endianness;
    hsa_machine_model_t machine_model;
};

// This structure holds agent information acquired through hsa info related
// calls, and is later used for reference when displaying the information.
struct agent_info_t {
  char name[64];
  char vendor_name[64];
  hsa_agent_feature_t agent_feature;
  hsa_profile_t agent_profile;
  hsa_default_float_rounding_mode_t float_rounding_mode;
  uint32_t max_queue;
  uint32_t queue_min_size;
  uint32_t queue_max_size;
  hsa_queue_type_t queue_type;
  uint32_t node;
  hsa_device_type_t device_type;
  uint32_t cache_size[4];
  uint32_t chip_id;
  uint32_t cacheline_size;
  uint32_t max_clock_freq;
  uint32_t compute_unit;
  uint32_t wavefront_size;
  uint32_t workgroup_max_size;
  uint32_t grid_max_size;
  uint32_t fbarrier_max_size;
  uint32_t waves_per_cu;
  hsa_isa_t agent_isa;
  hsa_dim3_t grid_max_dim;
  uint16_t workgroup_max_dim[3];
  uint16_t bdf_id;
  bool fast_f16;
};

// This structure holds memory pool information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
struct pool_info_t {
    uint32_t segment;
    size_t pool_size;
    bool alloc_allowed;
    size_t alloc_granule;
    size_t alloc_recommended_granule;
    size_t pool_alloc_alignment;
    bool pl_access;
    uint32_t global_flag;
};

// This structure holds ISA information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
struct isa_info_t {
    char *name_str;
    uint32_t workgroup_max_size;
    hsa_dim3_t grid_max_dim;
    uint64_t grid_max_size;
    uint32_t fbarrier_max_size;
    uint16_t workgroup_max_dim[3];
    bool def_rounding_modes[3];
    bool base_rounding_modes[3];
    bool mach_models[2];
    bool profiles[2];
    bool fast_f16;
};

// This structure holds cache information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
struct cache_info_t {
    char *name_str;
    uint8_t level;
    uint32_t size;
};

static const uint32_t kLabelFieldSize = 25;
static const uint32_t kValueFieldSize = 35;
static const uint32_t kIndentSize = 2;

static void printLabelInt(char const *l, int d, uint32_t indent_lvl = 0) {
  std::string ind(kIndentSize * indent_lvl, ' ');

  printf("%s%-*s%-*u\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, d);
}
static void printLabelStr(char const *l, char const *s,
                                                    uint32_t indent_lvl = 0) {
  std::string ind(kIndentSize * indent_lvl, ' ');
  printf("%s%-*s%-*s\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, s);
}
static void printLabel(char const *l, bool newline = false,
                                                    uint32_t indent_lvl = 0) {
  std::string ind(kIndentSize * indent_lvl, ' ');

  printf("%s%-*s", ind.c_str(), kLabelFieldSize, l);

  if (newline) {
    printf("\n");
  }
}
static void printValueStr(char const *s, bool newline = true) {
  printf("%-*s\n", kValueFieldSize, s);
}

// Acquire system information
static hsa_status_t AcquireSystemInfo(system_info_t *sys_info) {
  hsa_status_t err;

  // Get Major and Minor version of runtime
  err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &sys_info->major);
  RET_IF_HSA_ERR(err);
  err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &sys_info->minor);
  RET_IF_HSA_ERR(err);

  // Get timestamp frequency
  err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY,
                                              &sys_info->timestamp_frequency);
  RET_IF_HSA_ERR(err);

  // Get maximum duration of a signal wait operation
  err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT,
                                                         &sys_info->max_wait);
  RET_IF_HSA_ERR(err);

  // Get Endianness of the system
  err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &sys_info->endianness);
  RET_IF_HSA_ERR(err);

  // Get machine model info
  err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL,
                                                     &sys_info->machine_model);
  RET_IF_HSA_ERR(err);
  return err;
}

static void DisplaySystemInfo(system_info_t const *sys_info) {
  printLabel("Runtime Version:");
  printf("%d.%d\n", sys_info->major, sys_info->minor);
  printLabel("System Timestamp Freq.:");
  printf("%fMHz\n", sys_info->timestamp_frequency / 1e6);
  printLabel("Sig. Max Wait Duration:");
  printf("%lu (number of timestamp)\n", sys_info->max_wait);

  printLabel("Machine Model:");
  if (HSA_MACHINE_MODEL_SMALL == sys_info->machine_model) {
    printValueStr("SMALL");
  } else if (HSA_MACHINE_MODEL_LARGE == sys_info->machine_model) {
    printValueStr("LARGE");
  }

  printLabel("System Endianness:");
  if (HSA_ENDIANNESS_LITTLE == sys_info->endianness) {
    printValueStr("LITTLE");
  } else if (HSA_ENDIANNESS_BIG == sys_info->endianness) {
    printValueStr("BIG");
  }
  printf("\n");
}

static hsa_status_t
AcquireAgentInfo(hsa_agent_t agent, agent_info_t *agent_i) {
  hsa_status_t err;
  // Get agent name and vendor
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_i->name);
  RET_IF_HSA_ERR(err);
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME,
                                                       &agent_i->vendor_name);
  RET_IF_HSA_ERR(err);

  // Get agent feature
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE,
                                                     &agent_i->agent_feature);
  RET_IF_HSA_ERR(err);

  // Get profile supported by the agent
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE,
                                                     &agent_i->agent_profile);
  RET_IF_HSA_ERR(err);

  // Get floating-point rounding mode
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE,
                                               &agent_i->float_rounding_mode);
  RET_IF_HSA_ERR(err);

  // Get max number of queue
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX,
                                                         &agent_i->max_queue);
  RET_IF_HSA_ERR(err);

  // Get queue min size
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE,
                                                    &agent_i->queue_min_size);
  RET_IF_HSA_ERR(err);

  // Get queue max size
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
                                                    &agent_i->queue_max_size);
  RET_IF_HSA_ERR(err);

  // Get queue type
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE,
                                                        &agent_i->queue_type);
  RET_IF_HSA_ERR(err);

  // Get agent node
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &agent_i->node);
  RET_IF_HSA_ERR(err);

  // Get device type
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
                                                       &agent_i->device_type);
  RET_IF_HSA_ERR(err);

  if (HSA_DEVICE_TYPE_GPU == agent_i->device_type) {
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_i->agent_isa);
    RET_IF_HSA_ERR(err);
  }

  // Get cache size
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE,
                                                        agent_i->cache_size);
  RET_IF_HSA_ERR(err);

  // Get chip id
  err = hsa_agent_get_info(agent,
                           (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID,
                                                           &agent_i->chip_id);
  RET_IF_HSA_ERR(err);

  // Get cacheline size
  err = hsa_agent_get_info(agent,
                       (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE,
                                                    &agent_i->cacheline_size);
  RET_IF_HSA_ERR(err);

  // Get Max clock frequency
  err = hsa_agent_get_info(agent,
                  (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,
                                                    &agent_i->max_clock_freq);
  RET_IF_HSA_ERR(err);

  // Get Agent BDFID
  err = hsa_agent_get_info(agent,
                (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &agent_i->bdf_id);
  RET_IF_HSA_ERR(err);

  // Get number of Compute Unit
  err = hsa_agent_get_info(agent,
                   (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
                                                      &agent_i->compute_unit);
  RET_IF_HSA_ERR(err);

  // Check if the agent is kernel agent
  if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
    // Get flaf of fast_f16 operation
    err = hsa_agent_get_info(agent,
                       HSA_AGENT_INFO_FAST_F16_OPERATION, &agent_i->fast_f16);
    RET_IF_HSA_ERR(err);

    // Get wavefront size
    err = hsa_agent_get_info(agent,
                     HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_i->wavefront_size);
    RET_IF_HSA_ERR(err);

    // Get max total number of work-items in a workgroup
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE,
                                                &agent_i->workgroup_max_size);
    RET_IF_HSA_ERR(err);

    // Get max number of work-items of each dimension of a work-group
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
                                                 &agent_i->workgroup_max_dim);
    RET_IF_HSA_ERR(err);

    // Get max number of a grid per dimension
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM,
                                                      &agent_i->grid_max_dim);
    RET_IF_HSA_ERR(err);

    // Get max total number of work-items in a grid
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE,
                                                     &agent_i->grid_max_size);
    RET_IF_HSA_ERR(err);

    // Get max number of fbarriers per work group
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE,
                                                 &agent_i->fbarrier_max_size);
    RET_IF_HSA_ERR(err);

    err = hsa_agent_get_info(agent,
                    (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
                                                      &agent_i->waves_per_cu);
    RET_IF_HSA_ERR(err);
  }
  return err;
}

static void DisplayAgentInfo(agent_info_t *agent_i) {
  printLabelStr("Name:", agent_i->name, 1);
  printLabelStr("Vendor Name:", agent_i->vendor_name, 1);

  printLabel("Feature:", false, 1);
  if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH
      && agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) {
    printValueStr("KERNEL_DISPATCH & AGENT_DISPATCH");
  } else if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
    printValueStr("KERNEL_DISPATCH");
  } else if (agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) {
    printValueStr("AGENT_DISPATCH");
  } else {
    printValueStr("None specified");
  }

  printLabel("Profile:", false, 1);
  if (HSA_PROFILE_BASE == agent_i->agent_profile) {
    printValueStr("BASE_PROFILE");
  } else if (HSA_PROFILE_FULL == agent_i->agent_profile) {
    printValueStr("FULL_PROFILE");
  } else {
    printValueStr("Unknown");
  }

  printLabel("Float Round Mode:", false, 1);
  if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO == agent_i->float_rounding_mode) {
    printValueStr("ZERO");
  } else if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR ==
                                               agent_i->float_rounding_mode) {
    printValueStr("NEAR");
  } else {
    printValueStr("Not Supported");
  }

  printLabelInt("Max Queue Number:", agent_i->max_queue, 1);
  printLabelInt("Queue Min Size:", agent_i->queue_min_size, 1);
  printLabelInt("Queue Max Size:", agent_i->queue_max_size, 1);

  if (HSA_QUEUE_TYPE_MULTI == agent_i->queue_type) {
    printLabelStr("Queue Type:", "MULTI", 1);
  } else if (HSA_QUEUE_TYPE_SINGLE == agent_i->queue_type) {
    printLabelStr("Queue Type:", "SINGLE", 1);
  } else {
    printLabelStr("Queue Type:", "Unknown", 1);
  }

  printLabelInt("Node:", agent_i->node, 1);

  printLabel("Device Type:", false, 1);
  if (HSA_DEVICE_TYPE_CPU == agent_i->device_type) {
    printValueStr("CPU");
  } else if (HSA_DEVICE_TYPE_GPU == agent_i->device_type) {
    printValueStr("GPU");
  } else {
    printValueStr("DSP");
  }

  printLabel("Cache Info:", true, 1);

  for (int i = 0; i < 4; i++) {
    if (agent_i->cache_size[i]) {
      std::string tmp_str("L");
      tmp_str += std::to_string(i+1);
      tmp_str += ":";
      printLabel(tmp_str.c_str(), false, 2);

      tmp_str = std::to_string(agent_i->cache_size[i]/1024);
      tmp_str += "KB";
      printValueStr(tmp_str.c_str());
    }
  }

  printLabelInt("Chip ID:", agent_i->chip_id, 1);
  printLabelInt("Cacheline Size:", agent_i->cacheline_size, 1);
  printLabelInt("Max Clock Frequency (MHz):", agent_i->max_clock_freq, 1);
  printLabelInt("BDFID:", agent_i->bdf_id, 1);
  printLabelInt("Compute Unit:", agent_i->compute_unit, 1);

  printLabel("Features:", false, 1);
  if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
    printf("%s", "KERNEL_DISPATCH ");
  }
  if (agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) {
    printf("%s", "AGENT_DISPATCH");
  }
  if (agent_i->agent_feature == 0) {
    printf("None");
  }
  printf("\n");

  if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) {
    printLabelStr("Fast F16 Operation:", agent_i->fast_f16 ? "TRUE":"FALSE", 1);

    printLabelInt("Wavefront Size:", agent_i->wavefront_size, 1);
    printLabelInt("Workgroup Max Size:", agent_i->workgroup_max_size, 1);

    printLabel("Workgroup Max Size Per Dimension:", true, 1);
    std::string dim;
    for (int i = 0; i < 3; i++) {
      dim = "Dim[" + std::to_string(i) + "]:";
      printLabelInt(dim.c_str(),
              reinterpret_cast<uint32_t*>(&agent_i->workgroup_max_dim)[i], 2);
    }
    printLabelInt("Grid Max Size:", agent_i->grid_max_size, 1);
    printLabelInt("Waves Per CU:", agent_i->waves_per_cu, 1);
    printLabelInt("Max Work-item Per CU:",
                            agent_i->wavefront_size*agent_i->waves_per_cu, 1);
    printLabel("Grid Max Size per Dimension:", true, 1);
    for (int i = 0; i < 3; i++) {
      dim = "Dim[" + std::to_string(i) + "]:";
      printLabelInt(dim.c_str(),
                 reinterpret_cast<uint32_t*>(&agent_i->grid_max_dim)[i], 2);
    }

    printLabelInt("Max number Of fbarriers Per Workgroup:",
                                             agent_i->fbarrier_max_size, 1);
  }
}

static hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
                                                        pool_info_t *pool_i) {
  hsa_status_t err;

  err = hsa_amd_memory_pool_get_info(pool,
                  HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                                                             &pool_i->segment);
  RET_IF_HSA_ERR(err);

  // Get the size of the POOL
  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
                                                          &pool_i->pool_size);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
             HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
                                                      &pool_i->alloc_allowed);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
             HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
                                                      &pool_i->alloc_granule);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
                           HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
                                               &pool_i->pool_alloc_alignment);
  RET_IF_HSA_ERR(err);

  err =
      hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE,
                                   &pool_i->alloc_recommended_granule);
  RET_IF_HSA_ERR(err);

  err = hsa_amd_memory_pool_get_info(pool,
                      HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL,
                                                          &pool_i->pl_access);
  RET_IF_HSA_ERR(err);

  return HSA_STATUS_SUCCESS;
}

static void MakeGlobalFlagsString(uint32_t global_flag, std::string* out_str) {
  *out_str = "";

  std::vector<std::string> flags;

  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) {
    flags.push_back("KERNARG");
  }

  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
    flags.push_back("FINE GRAINED");
  }

  if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) {
    flags.push_back("COARSE GRAINED");
  }

  if (flags.size() > 0) {
    *out_str += flags[0];
  }

  for (size_t i = 1; i < flags.size(); i++) {
    *out_str += ", " + flags[i];
  }
}

static void DumpSegment(pool_info_t *pool_i, uint32_t ind_lvl) {
  std::string seg_str;
  std::string tmp_str;

  printLabel("Segment:", false, ind_lvl);

  switch (pool_i->segment) {
    case HSA_AMD_SEGMENT_GLOBAL:
      MakeGlobalFlagsString(pool_i->global_flag, &tmp_str);
      seg_str += "GLOBAL; FLAGS: " + tmp_str;
      break;

    case HSA_AMD_SEGMENT_READONLY:
      seg_str += "READONLY";
      break;

    case HSA_AMD_SEGMENT_PRIVATE:
      seg_str += "PRIVATE";
      break;

    case HSA_AMD_SEGMENT_GROUP:
      seg_str += "GROUP";
      break;

    default:
      printf("Not Supported\n");
      break;
  }
  printValueStr(seg_str.c_str());
}

static void DisplayPoolInfo(pool_info_t *pool_i, uint32_t indent) {
  DumpSegment(pool_i, indent);

  std::string sz_str = std::to_string(pool_i->pool_size/1024) + "KB";
  printLabelStr("Size:", sz_str.c_str(), indent);
  printLabelStr("Allocatable:", (pool_i->alloc_allowed ? "TRUE" : "FALSE"),
                                                                      indent);
  std::string gr_str = std::to_string(pool_i->alloc_granule/1024)+"KB";
  printLabelStr("Alloc Granule:", gr_str.c_str(), indent);

  std::string al_str = std::to_string(pool_i->pool_alloc_alignment/1024)+"KB";
  printLabelStr("Alloc Alignment:", al_str.c_str(), indent);

  printLabelStr("Acessible by all:", (pool_i->pl_access ? "TRUE" : "FALSE"),
                                                                      indent);
}

static hsa_status_t
AcquireAndDisplayMemPoolInfo(const hsa_amd_memory_pool_t pool,
                                                            uint32_t indent) {
  hsa_status_t err;
  pool_info_t pool_i;

  err = AcquirePoolInfo(pool, &pool_i);
  RET_IF_HSA_ERR(err);

  DisplayPoolInfo(&pool_i, 3);

  return err;
}

static hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) {
  hsa_status_t err;
  int* p_int = reinterpret_cast<int*>(data);
  (*p_int)++;

  std::string pool_str("Pool ");
  pool_str += std::to_string(*p_int);
  printLabel(pool_str.c_str(), true, 2);

  err = AcquireAndDisplayMemPoolInfo(pool, 3);
  RET_IF_HSA_ERR(err);

  return err;
}

static hsa_status_t AcquireISAInfo(hsa_isa_t isa, isa_info_t *isa_i) {
  hsa_status_t err;
  uint32_t name_len;
  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &name_len);
  RET_IF_HSA_ERR(err);

  isa_i->name_str = new char[name_len];
  if (isa_i->name_str == nullptr) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, isa_i->name_str);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_MACHINE_MODELS,
                                                          isa_i->mach_models);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_PROFILES, isa_i->profiles);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES,
                                                   isa_i->def_rounding_modes);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa,
                    HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES,
                                                  isa_i->base_rounding_modes);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FAST_F16_OPERATION,
                                                            &isa_i->fast_f16);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_DIM,
                                                   &isa_i->workgroup_max_dim);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_SIZE,
                                                  &isa_i->workgroup_max_size);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_DIM,
                                                        &isa_i->grid_max_dim);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_SIZE,
                                                        &isa_i->grid_max_size);
  RET_IF_HSA_ERR(err);

  err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FBARRIER_MAX_SIZE,
                                                    &isa_i->fbarrier_max_size);
  RET_IF_HSA_ERR(err);

  return err;
}

static void DisplayISAInfo(isa_info_t *isa_i, uint32_t indent) {
  printLabelStr("Name:", isa_i->name_str, indent);

  std::string models("");
  if (isa_i->mach_models[HSA_MACHINE_MODEL_SMALL]) {
    models = "HSA_MACHINE_MODEL_SMALL ";
  }
  if (isa_i->mach_models[HSA_MACHINE_MODEL_LARGE]) {
    models += "HSA_MACHINE_MODEL_LARGE";
  }
  printLabelStr("Machine Models:", models.c_str(), indent);

  std::string profiles("");
  if (isa_i->profiles[HSA_PROFILE_BASE]) {
    profiles = "HSA_PROFILE_BASE ";
  }
  if (isa_i->profiles[HSA_PROFILE_FULL]) {
    profiles += "HSA_PROFILE_FULL";
  }
  printLabelStr("Profiles:", profiles.c_str(), indent);

  std::string rounding_modes("");
  if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT]) {
    rounding_modes = "DEFAULT ";
  }
  if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO]) {
    rounding_modes += "ZERO ";
  }
  if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR]) {
    rounding_modes += "NEAR";
  }
  printLabelStr("Default Rounding Mode:", rounding_modes.c_str(), indent);

  rounding_modes = "";
  if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT]) {
    rounding_modes = "DEFAULT ";
  }
  if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO]) {
    rounding_modes += "ZERO ";
  }
  if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR]) {
    rounding_modes += "NEAR";
  }
  printLabelStr("Default Rounding Mode:", rounding_modes.c_str(), indent);

  printLabelStr("Fast f16:", (isa_i->fast_f16 ? "TRUE" : "FALSE"), indent);

  printLabel("Workgroup Max Dimension:", true, indent);
  std::string dim;
  for (int i = 0; i < 3; i++) {
    dim = "Dim[" + std::to_string(i) + "]:";
    printLabelInt(dim.c_str(),
         reinterpret_cast<uint32_t*>(&isa_i->workgroup_max_dim)[i], indent+1);
  }

  printLabelInt("Workgroup Max Size:", isa_i->workgroup_max_size, indent);

  printLabel("Grid Max Dimension:", true, indent);
  printLabelInt("x", isa_i->grid_max_dim.x, indent+1);
  printLabelInt("y", isa_i->grid_max_dim.y, indent+1);
  printLabelInt("z", isa_i->grid_max_dim.z, indent+1);

  printLabelInt("Grid Max Size:", isa_i->grid_max_size, indent);
  printLabelInt("FBarrier Max Size:", isa_i->fbarrier_max_size, indent);
}

static hsa_status_t
AcquireAndDisplayISAInfo(const hsa_isa_t isa, uint32_t indent) {
  hsa_status_t err;
  isa_info_t isa_i;

  isa_i.name_str = nullptr;
  err = AcquireISAInfo(isa, &isa_i);
  RET_IF_HSA_ERR(err);

  DisplayISAInfo(&isa_i, 3);

  if (isa_i.name_str != nullptr) {
    delete []isa_i.name_str;
  }
  return err;
}
static hsa_status_t get_isa_info(hsa_isa_t isa, void* data) {
  hsa_status_t err;
  int* isa_int = reinterpret_cast<int*>(data);
  (*isa_int)++;

  std::string isa_str("ISA ");
  isa_str += std::to_string(*isa_int);
  printLabel(isa_str.c_str(), true, 2);

  err = AcquireAndDisplayISAInfo(isa, 3);
  RET_IF_HSA_ERR(err);

  return err;
}
// Cache info dump is ifdef'd out as it generates a lot of output that is
// not that interesting. Define ENABLE_CACHE_DUMP if this is of interest.
#ifdef ENABLE_CACHE_DUMP
static void DisplayCacheInfo(cache_info_t *cache_i, uint32_t indent) {
  printLabelStr("Name:", cache_i->name_str, indent);

  printLabelInt("Level:", cache_i->level, indent);
  printLabelInt("Size:", cache_i->size, indent);
}

static hsa_status_t AcquireCacheInfo(hsa_cache_t cache, cache_info_t *cache_i) {
  hsa_status_t err;
  uint32_t name_len;
  err = hsa_cache_get_info(cache, HSA_CACHE_INFO_NAME_LENGTH, &name_len);
  RET_IF_HSA_ERR(err);

  cache_i->name_str = new char[name_len];
  if (cache_i->name_str == nullptr) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  err = hsa_cache_get_info(cache, HSA_CACHE_INFO_NAME, cache_i->name_str);
  RET_IF_HSA_ERR(err);

  err = hsa_cache_get_info(cache, HSA_CACHE_INFO_LEVEL, &cache_i->level);
  RET_IF_HSA_ERR(err);

  err = hsa_cache_get_info(cache, HSA_CACHE_INFO_SIZE, &cache_i->size);
  RET_IF_HSA_ERR(err);
  return err;
}

static hsa_status_t
AcquireAndDisplayCacheInfo(const hsa_cache_t cache, uint32_t indent) {
  hsa_status_t err;
  cache_info_t cache_i;

  err = AcquireCacheInfo(cache, &cache_i);
  RET_IF_HSA_ERR(err);

  DisplayCacheInfo(&cache_i, 3);

  if (cache_i.name_str != nullptr) {
    delete []cache_i.name_str;
  }

  return err;
}

static hsa_status_t get_cache_info(hsa_cache_t cache, void* data) {
  hsa_status_t err;
  int* cache_int = reinterpret_cast<int*>(data);
  (*cache_int)++;

  std::string cache_str("Cache L");
  cache_str += std::to_string(*cache_int);
  printLabel(cache_str.c_str(), true, 2);

  err = AcquireAndDisplayCacheInfo(cache, 3);
  RET_IF_HSA_ERR(err);

  return err;
}
#endif  // ENABLE_CACHE_DUMP
static hsa_status_t
AcquireAndDisplayAgentInfo(hsa_agent_t agent, void* data) {
  int pool_number = 0;
  int isa_number = 0;

  hsa_status_t err;
  agent_info_t agent_i;

  int *agent_number = reinterpret_cast<int*>(data);
  (*agent_number)++;

  err = AcquireAgentInfo(agent, &agent_i);
  RET_IF_HSA_ERR(err);

  std::string ind(kIndentSize, ' ');

  printLabel("*******", true);
  std::string agent_ind("Agent ");
  agent_ind += std::to_string(*agent_number).c_str();
  printLabel(agent_ind.c_str(), true);
  printLabel("*******", true);

  DisplayAgentInfo(&agent_i);

  printLabel("Pool Info:", true, 1);
  err = hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, &pool_number);
  RET_IF_HSA_ERR(err);

  printLabel("ISA Info:", true, 1);
  err = hsa_agent_iterate_isas(agent, get_isa_info, &isa_number);
  if (err == HSA_STATUS_ERROR_INVALID_AGENT) {
    printLabel("N/A", true, 2);
    return HSA_STATUS_SUCCESS;
  }
  RET_IF_HSA_ERR(err);

#if ENABLE_CACHE_DUMP
  int cache_number = 0;
  printLabel("Cache Info:", true, 1);
  err = hsa_agent_iterate_caches(agent, get_cache_info, &cache_number);
  if (err == HSA_STATUS_ERROR_INVALID_AGENT) {
    printLabel("N/A", true, 2);
    return HSA_STATUS_SUCCESS;
  }
#endif
  RET_IF_HSA_ERR(err);

  return HSA_STATUS_SUCCESS;
}

// Print out all static information known to HSA about the target system.
// Throughout this program, the Acquire-type functions make HSA calls to
// interate through HSA objects and then perform HSA get_info calls to
// acccumulate information about those objects. Corresponding to each
// Acquire-type function is a Display* function which display the
// accumulated data in a formatted way.
int main(int argc, char* argv[]) {
  hsa_status_t err;

  err = hsa_init();
  RET_IF_HSA_ERR(err);

  // Acquire and display system information
  system_info_t sys_info;

  // This function will call HSA get_info functions to gather information
  // about the system.
  err = AcquireSystemInfo(&sys_info);
  RET_IF_HSA_ERR(err);

  printLabel("=====================", true);
  printLabel("HSA System Attributes", true);
  printLabel("=====================", true);
  DisplaySystemInfo(&sys_info);

  // Iterate through every agent and get and display their info
  printLabel("==========", true);
  printLabel("HSA Agents", true);
  printLabel("==========", true);
  uint32_t agent_ind = 0;
  err = hsa_iterate_agents(AcquireAndDisplayAgentInfo, &agent_ind);
  RET_IF_HSA_ERR(err);

  printLabel("*** Done ***", true);

  err = hsa_shut_down();
  RET_IF_HSA_ERR(err);
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/agent_props.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <iostream>
#include <vector>

#include "suites/functional/agent_props.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const uint32_t kNumBufferElements = 256;

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

static const char kSubTestSeparator[] = "  **************************";

static void PrintAgentPropsSubtestHeader(const char *header) {
  std::cout << "  *** " << header << " ***" << std::endl;
}

AgentPropTest::AgentPropTest(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  set_title("  *** Query RocR Agent Properties ***");
  set_description("  *** Checks properties of Agent's on a system ***");
}

AgentPropTest::~AgentPropTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void AgentPropTest::SetUp(void) {
  TestBase::SetUp();
  std::cout << "  *** Initialize ROCr Runtime and " 
            << "acquire handles of agents" << " ***" << std::endl;
}

void AgentPropTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void AgentPropTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void AgentPropTest::DisplayResults(void) const {
  TestBase::DisplayResults();
  std::cout << std::endl;
  for (uint32_t idx = 0 ; idx < this->propList_.size(); ++idx) {
    std::cout << this->propList_[idx] << std::endl;
  }
  return;
}

void AgentPropTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

// Extend this method to query for agent properties that are
// currently not tested
void AgentPropTest::QueryAgentProp(hsa_agent_t agent,
                                   hsa_agent_info_t prop) {
  hsa_status_t err;
  hsa_device_type_t agType;
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agType);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  std::stringstream ss;
  ss << "  Agent " << "(";
  switch (agType) {
    case HSA_DEVICE_TYPE_CPU:
      ss << "CPU) : ";
      break;
    case HSA_DEVICE_TYPE_GPU:
      ss << "GPU) : ";
      break;
    case HSA_DEVICE_TYPE_DSP:
      ss << "DSP) : ";
      break;
    case HSA_DEVICE_TYPE_AIE:
      ss << "AIE) : ";
      break;
  }

  // Print the agent property
  uint32_t key = uint32_t(prop);
  switch (key) {
  // Retrieves UUID property value of the agent
  case HSA_AMD_AGENT_INFO_UUID: {
    char uuid[32];
    err = hsa_agent_get_info(agent, prop, (void*)&uuid[0]);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    ss << uuid;
    propList_.push_back(ss.str());
    break;
  }
  case HSA_AMD_AGENT_INFO_CLOCK_COUNTERS: {
    std::stringstream str_s;

    hsa_amd_clock_counters_t counters = {0};

    err = hsa_agent_get_info(agent, prop, &counters);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    str_s << "\n\n  Clock Counters";
    str_s << "\n  Clock Frequency: "  << counters.system_clock_frequency << "\n";
    str_s << "  GPU Clock counter: "  << counters.gpu_clock_counter << "\n";
    str_s << "  System system_clock_counter: "  << counters.system_clock_counter << "\n";
    str_s << "  CPU Clock counter: "  << counters.cpu_clock_counter << "\n";
    propList_.push_back(str_s.str());

    ASSERT_NE(0, counters.system_clock_frequency);
    ASSERT_NE(0, counters.gpu_clock_counter);
    ASSERT_NE(0, counters.system_clock_counter);
    ASSERT_NE(0, counters.cpu_clock_counter);

    break;
  }
  default:
    FAIL();
  }

}

void AgentPropTest::QueryAgentUUID() {
  hsa_status_t err;
  if (verbosity() > 0) {
    PrintAgentPropsSubtestHeader("Query GPU and CPU Agent's UUID");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (uint32_t idx = 0 ; idx < cpus.size(); ++idx) {
    QueryAgentProp(cpus[idx], (hsa_agent_info_t)HSA_AMD_AGENT_INFO_UUID);
  }

  for (uint32_t idx = 0 ; idx < gpus.size(); ++idx) {
    QueryAgentProp(gpus[idx], (hsa_agent_info_t)HSA_AMD_AGENT_INFO_UUID);
  }

  if (verbosity() > 0) {
    std::cout << "  *** Execution completed - subtest Passed " << " ***" << std::endl;
  }
}

void AgentPropTest::QueryAgentClockCounters() {
  hsa_status_t err;
  if (verbosity() > 0) {
    PrintAgentPropsSubtestHeader("Query Agent's Clock Counters");
  }

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (uint32_t idx = 0 ; idx < gpus.size(); ++idx) {
    QueryAgentProp(gpus[idx], (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CLOCK_COUNTERS);
  }

  if (verbosity() > 0) {
    std::cout << "  *** Execution completed - subtest Passed " << " ***" << std::endl;
  }
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/agent_props.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_AGENT_PROPS_H_
#define ROCRTST_SUITES_FUNCTIONAL_AGENT_PROPS_H_

#include <string>
#include <vector>

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class AgentPropTest : public TestBase {
 public:
    AgentPropTest();

  // @Brief: Destructor for test case of AgentPropTest
  virtual ~AgentPropTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // @Brief: Query UUID property of agents of a ROCm platform
  void QueryAgentUUID();

  // @Brief: Query Clock Counter property of agents of a ROCm platform
  void QueryAgentClockCounters();

 private:
  // Capture value for all agents on system
  std::vector<std::string> propList_;

  void QueryAgentProp(hsa_agent_t agent, hsa_agent_info_t prop);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_AGENT_PROPS_H_


================================================
FILE: rocrtst/suites/functional/aql_barrier_bit.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <iostream>
#include <vector>
#include "suites/functional/aql_barrier_bit.h"
#include "common/base_rocr_utils.h"
#include "common/concurrent_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const int NUM_WAIT_KERNELS = 8;

static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
                                  hsa_kernel_dispatch_packet_t* queue_packet) {
  __atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
                   header | (setup << 16), __ATOMIC_RELEASE);
}

AqlBarrierBitTest::AqlBarrierBitTest(bool set, bool notSet) : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  if (set) {
  set_title("RocR Aql Barrier Bit Set Test");
  set_description("This test checks the barrier bit functionality, set");
  } else if (notSet) {
  set_title("RocR Concurrent Shutdown Test");
  set_description("This test checks the barrier bit functionality, un set");
  }
}

AqlBarrierBitTest::~AqlBarrierBitTest(void) {
}

void AqlBarrierBitTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}

void AqlBarrierBitTest::Run(void) {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }
  TestBase::Run();
}

void AqlBarrierBitTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void AqlBarrierBitTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void AqlBarrierBitTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
}

void AqlBarrierBitTest::BarrierBitSet(void) {
  hsa_status_t status;

  // The kernarg data structure
  typedef struct __attribute__ ((aligned(16))) signal_args_s {
    void *signal_values;
  } signal_args_t;
  signal_args_t signal_args;

  // Get the GPU agents into a vector
  std::vector<hsa_agent_t> agent_list;
  status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &agent_list);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Get CPU agent to get the kern_arg pool
  std::vector<hsa_agent_t> cpu_agent;
  status = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpu_agent);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Repeat the test for each agent
  unsigned int ii;
  for (ii = 0; ii < agent_list.size(); ++ii) {
  // Check if the queue supports dispatch
  uint32_t features = 0;
  status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_FEATURE, &features);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
    continue;
  }

  // Find a memory pool that supports fine grained memory
  hsa_amd_memory_pool_t global_pool;
  global_pool.handle = (uint64_t)-1;
  status = hsa_amd_agent_iterate_memory_pools(agent_list[ii], rocrtst::GetGlobalMemoryPool, &global_pool);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Obtain the agent's machine model
  hsa_machine_model_t machine_model;
  status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Obtain the agent's profile
  hsa_profile_t profile;
  status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_PROFILE, &profile);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);


  // Find a memory pool that supports kernel arguments
  hsa_amd_memory_pool_t kernarg_pool;
  kernarg_pool.handle = (uint64_t)-1;
  status = hsa_amd_agent_iterate_memory_pools(cpu_agent[0], rocrtst::GetKernArgMemoryPool, &kernarg_pool);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Create a queue
  hsa_queue_t* queue;
  status = hsa_queue_create(agent_list[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  set_kernel_file_name("signal_operations_kernels.hsaco");
  set_kernel_name("signal_wait_kernel");
  status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Allocate the kernel argument buffer from the correct pool
  signal_args_t* kernarg_buffer = NULL;
  status = hsa_amd_memory_pool_allocate(kernarg_pool,
           sizeof(signal_args_t), 0,
           reinterpret_cast<void**>(&kernarg_buffer));
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  status = hsa_amd_agents_allow_access(1, &agent_list[ii], NULL, kernarg_buffer);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Create the completion signal
  hsa_signal_t completion_signal;
  status = hsa_signal_create(1, 0, NULL, &completion_signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  hsa_amd_memory_pool_access_t access;
  status = hsa_amd_agent_memory_pool_get_info(cpu_agent[0],
                                              global_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  hsa_signal_t* kernel_signal;
  hsa_signal_value_t* set_value;


  hsa_signal_t s;
  status = hsa_signal_create(1, 0, NULL, &s);

  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    // Create the kernel signal
    status = hsa_amd_memory_pool_allocate(global_pool,
                                          sizeof(hsa_signal_t), 0, reinterpret_cast<void**>(&kernel_signal));
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_signal_create(1, 0, NULL, kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);

    status = hsa_amd_memory_pool_allocate(global_pool,
                                          sizeof(hsa_signal_value_t), 0, reinterpret_cast<void**>(&set_value));
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, set_value);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    memset(set_value, 0, sizeof(hsa_signal_value_t));

    // Set the signal_args with kernel_signal, will be accessed from Kernel side
    signal_args.signal_values = reinterpret_cast<void*>(kernel_signal);
  }

  memcpy(kernarg_buffer, &signal_args, sizeof(signal_args_t));

  // Create the set kernel completion signal
  hsa_signal_t set_kernel_completion_signal;
  status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &set_kernel_completion_signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Create the wait kernel completion signals
  hsa_signal_t wait_kernel_completion_signal[NUM_WAIT_KERNELS];
  int jj;
  for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
    status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &wait_kernel_completion_signal[jj]);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }

  // Setup the dispatch packet
  hsa_kernel_dispatch_packet_t dispatch_packet;
  memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));

  dispatch_packet.workgroup_size_x = 1;
  dispatch_packet.workgroup_size_y = 1;
  dispatch_packet.workgroup_size_z = 1;
  dispatch_packet.grid_size_x = 1;
  dispatch_packet.grid_size_y = 1;
  dispatch_packet.grid_size_z = 1;
  dispatch_packet.kernel_object = kernel_object();
  dispatch_packet.group_segment_size = group_segment_size();
  dispatch_packet.private_segment_size = private_segment_size();
  dispatch_packet.kernarg_address = kernarg_buffer;


  for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
    // Set the appropriate completion signal
    dispatch_packet.completion_signal = wait_kernel_completion_signal[jj];
    // Dispatch the kernel
    // const uint32_t queue_size = queue->size;
    const uint32_t queue_mask = queue->size - 1;
    // write to command queue
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);
    reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                 (queue->base_address)[index & queue_mask] = dispatch_packet;
    hsa_queue_store_write_index_relaxed(queue, index + 1);

    dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
    dispatch_packet.header |= 0 << HSA_PACKET_HEADER_BARRIER;
    dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
    void* q_base = queue->base_address;
    // Set the Aql packet header
    AtomicSetPacketHeader(dispatch_packet.header, dispatch_packet.setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);

    // ringdoor bell
    hsa_signal_store_relaxed(queue->doorbell_signal, index);
  }

  // Dispatch the set kernel, setting the barrier bit to 1
  dispatch_packet.header |= 1 == HSA_PACKET_HEADER_BARRIER;

  set_kernel_file_name("signal_operations_kernels.hsaco");
  set_kernel_name("signal_st_rlx_kernel");
  status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);

  // Set the appropriate completion signal and code descriptor values
  dispatch_packet.kernel_object = kernel_object();
  dispatch_packet.group_segment_size = group_segment_size();
  dispatch_packet.private_segment_size = private_segment_size();
  dispatch_packet.kernarg_address = kernarg_buffer;

  // Dispatch the kernel
  // const uint32_t queue_size = queue->size;
  const uint32_t queue_mask = queue->size - 1;
  // write to command queue
  uint64_t index = hsa_queue_load_write_index_relaxed(queue);
  reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                 (queue->base_address)[index & queue_mask] = dispatch_packet;
  hsa_queue_store_write_index_relaxed(queue, index + 1);
  // ringdoor bell
  hsa_signal_store_relaxed(queue->doorbell_signal, index);

  // Query the systems timestamp frequency for wait timeout
  uint16_t freq;
  status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, reinterpret_cast<void*>(&freq));

  // Wait on the completion signal of the set kernel, but
  // timeout after 1 second
  uint64_t wait_time = (uint64_t) freq;
  hsa_signal_value_t signal_value;
  signal_value = hsa_signal_wait_relaxed(set_kernel_completion_signal,
                                         HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
  ASSERT_EQ(1, signal_value);

  // Wait on the completion signals of each of the wait kernels, again timing out after 1 second
  for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
    signal_value = hsa_signal_wait_relaxed(wait_kernel_completion_signal[jj],
                                           HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
    ASSERT_EQ(1, signal_value);
  }


  // destroy the signal created for async copy
  status = hsa_signal_destroy(completion_signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    status = hsa_amd_memory_pool_free(kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_amd_memory_pool_free(set_value);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  } else {
    status = hsa_amd_memory_unlock(kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);

    status = hsa_amd_memory_unlock(set_value);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }
  // Destroy the queue
  status = hsa_queue_destroy(queue);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }
}

void AqlBarrierBitTest::BarrierBitNotSet(void) {
  hsa_status_t status;

  // The kernarg data structure
  typedef struct __attribute__ ((aligned(16))) signal_args_s {
    void *signal_values;
  } signal_args_t;
  signal_args_t signal_args;

  // Get the GPU agents into a vector
  std::vector<hsa_agent_t> agent_list;
  status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &agent_list);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);


  // Get CPU agent to get the kern_arg pool
  std::vector<hsa_agent_t> cpu_agent;
  status = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpu_agent);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Repeat the test for each agent
  unsigned int ii;
  for (ii = 0; ii < agent_list.size(); ++ii) {
  // Check if the queue supports dispatch
  uint32_t features = 0;
  status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_FEATURE, &features);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
    continue;
  }

  // Find a memory pool that supports fine grained memory
  hsa_amd_memory_pool_t global_pool;
  global_pool.handle = (uint64_t)-1;
  status = hsa_amd_agent_iterate_memory_pools(agent_list[ii], rocrtst::GetGlobalMemoryPool, &global_pool);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Obtain the agent's machine model
  hsa_machine_model_t machine_model;
  status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Obtain the agent's profile
  hsa_profile_t profile;
  status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_PROFILE, &profile);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);


  // Find a memory pool that supports kernel arguments
  hsa_amd_memory_pool_t kernarg_pool;
  kernarg_pool.handle = (uint64_t)-1;
  status = hsa_amd_agent_iterate_memory_pools(cpu_agent[0], rocrtst::GetKernArgMemoryPool, &kernarg_pool);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Create a queue
  hsa_queue_t* queue;
  status = hsa_queue_create(agent_list[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  set_kernel_file_name("signal_operations_kernels.hsaco");
  set_kernel_name("signal_wait_kernel");
  status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Allocate the kernel argument buffer from the correct pool
  signal_args_t* kernarg_buffer = NULL;
  status = hsa_amd_memory_pool_allocate(kernarg_pool,
           sizeof(signal_args_t), 0,
           reinterpret_cast<void**>(&kernarg_buffer));
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  status = hsa_amd_agents_allow_access(1, &agent_list[ii], NULL, kernarg_buffer);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Create the completion signal
  hsa_signal_t completion_signal;
  status = hsa_signal_create(1, 0, NULL, &completion_signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  hsa_amd_memory_pool_access_t access;
  status = hsa_amd_agent_memory_pool_get_info(cpu_agent[0],
                                              global_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  hsa_signal_t* kernel_signal;
  hsa_signal_value_t* set_value;


  hsa_signal_t s;
  status = hsa_signal_create(1, 0, NULL, &s);

  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    // Create the kernel signal
    status = hsa_amd_memory_pool_allocate(global_pool,
                                          sizeof(hsa_signal_t), 0, reinterpret_cast<void**>(&kernel_signal));
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_signal_create(1, 0, NULL, kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);

    status = hsa_amd_memory_pool_allocate(global_pool,
                                          sizeof(hsa_signal_value_t), 0, reinterpret_cast<void**>(&set_value));
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, set_value);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    memset(set_value, 0, sizeof(hsa_signal_value_t));

    // Set the signal_args with kernel_signal, will be accessed from Kernel side
    signal_args.signal_values = reinterpret_cast<void*>(kernel_signal);
  }

  memcpy(kernarg_buffer, &signal_args, sizeof(signal_args_t));

  // Create the set kernel completion signal
  hsa_signal_t set_kernel_completion_signal;
  status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &set_kernel_completion_signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  // Create the wait kernel completion signals
  hsa_signal_t wait_kernel_completion_signal[NUM_WAIT_KERNELS];
  int jj;
  for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
    status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &wait_kernel_completion_signal[jj]);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }

  // Setup the dispatch packet
  hsa_kernel_dispatch_packet_t dispatch_packet;
  memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));

  dispatch_packet.workgroup_size_x = 1;
  dispatch_packet.workgroup_size_y = 1;
  dispatch_packet.workgroup_size_z = 1;
  dispatch_packet.grid_size_x = 1;
  dispatch_packet.grid_size_y = 1;
  dispatch_packet.grid_size_z = 1;
  dispatch_packet.kernel_object = kernel_object();
  dispatch_packet.group_segment_size = group_segment_size();
  dispatch_packet.private_segment_size = private_segment_size();
  dispatch_packet.kernarg_address = kernarg_buffer;


  for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
    // Set the appropriate completion signal
    dispatch_packet.completion_signal = wait_kernel_completion_signal[jj];
    // Dispatch the kernel
    // const uint32_t queue_size = queue->size;
    const uint32_t queue_mask = queue->size - 1;
    // write to command queue
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);
    reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                 (queue->base_address)[index & queue_mask] = dispatch_packet;
    hsa_queue_store_write_index_relaxed(queue, index + 1);
    dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
    dispatch_packet.header |= 0 << HSA_PACKET_HEADER_BARRIER;
    dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;

    void* q_base = queue->base_address;
    // Set the Aql packet header
    AtomicSetPacketHeader(dispatch_packet.header, dispatch_packet.setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);

    // ringdoor bell
    hsa_signal_store_relaxed(queue->doorbell_signal, index);
  }

  // Dispatch the set kernel, NOT setting the barrier bit
  set_kernel_file_name("signal_operations_kernels.hsaco");
  set_kernel_name("signal_st_rlx_kernel");
  status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);

  // Set the appropriate completion signal and code descriptor values
  dispatch_packet.kernel_object = kernel_object();
  dispatch_packet.group_segment_size = group_segment_size();
  dispatch_packet.private_segment_size = private_segment_size();
  dispatch_packet.kernarg_address = kernarg_buffer;

  // Set the appropriate completion signal
  dispatch_packet.completion_signal = set_kernel_completion_signal;
  // Dispatch the kernel
  // const uint32_t queue_size = queue->size;
  const uint32_t queue_mask = queue->size - 1;
  // write to command queue
  uint64_t index = hsa_queue_load_write_index_relaxed(queue);
  reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                 (queue->base_address)[index & queue_mask] = dispatch_packet;
  hsa_queue_store_write_index_relaxed(queue, index + 1);
  // ringdoor bell
  hsa_signal_store_relaxed(queue->doorbell_signal, index);

  // Query the systems timestamp frequency for wait timeout
  uint16_t freq;
  status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, reinterpret_cast<void*>(&freq));

  // Wait on the completion signal of the set kernel, but
  // timeout after 1 second
  uint64_t wait_time = (uint64_t) freq;
  hsa_signal_value_t signal_value;
  signal_value = hsa_signal_wait_relaxed(set_kernel_completion_signal,
                                         HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
  ASSERT_EQ(1, signal_value);

  // Wait on the completion signals of each of the wait kernels, again timing out after 1 second
  for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
    signal_value = hsa_signal_wait_relaxed(wait_kernel_completion_signal[jj],
                                           HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
    ASSERT_EQ(1, signal_value);
  }

  // Check kernel signal
  std::cout << "Kernel_signal Value after package execustion(should be 0) = " << (kernel_signal->handle) << std::endl;

  // destroy the signal created for async copy
  status = hsa_signal_destroy(s);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  status = hsa_signal_destroy(completion_signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    status = hsa_amd_memory_pool_free(kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    status = hsa_amd_memory_pool_free(set_value);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  } else {
    status = hsa_amd_memory_unlock(kernel_signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);

    status = hsa_amd_memory_unlock(set_value);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }
  // Destroy the queue
  status = hsa_queue_destroy(queue);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }
}


================================================
FILE: rocrtst/suites/functional/aql_barrier_bit.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_AQL_BARRIER_BIT_H_
#define ROCRTST_SUITES_FUNCTIONAL_AQL_BARRIER_BIT_H_

#include <pthread.h>
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class AqlBarrierBitTest : public TestBase {
 public:
    AqlBarrierBitTest(bool, bool);

    // @Brief: Destructor for the AqlBarrierBitTest class
    virtual ~AqlBarrierBitTest();

    // @Brief: Setup the environment for measurement
    virtual void SetUp();

    // @Brief: Core measurement execution
    virtual void Run();

    // @Brief: Clean up and retrive the resource
    virtual void Close();

    // @Brief: Display  results
    virtual void DisplayResults() const;

    // @Brief: Display information about what this test does
    virtual void DisplayTestInfo(void);

    // @Brief: Runtime will be initialized Num_Times
    void BarrierBitSet(void);

    void BarrierBitNotSet(void);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_AQL_BARRIER_BIT_H_


================================================
FILE: rocrtst/suites/functional/concurrent_init.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <iostream>


#include "suites/functional/concurrent_init.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static void* TestHSAInitFunction(void* args) {
  // This function called for each thread
  // This will initialize the HSA runtime.
  hsa_status_t status;
  // const char* err_str; // Local variable

  // Initialize hsa runtime
  status = hsa_init();
  EXPECT_EQ(HSA_STATUS_SUCCESS, status) << "hsa_init failed in worker thread.";
  pthread_exit(nullptr);
  return nullptr;
}

static const int NumOfThreads = 100;  // Number of thread to be created

#define RET_IF_HSA_ERR(err)                                                                        \
  {                                                                                                \
    if ((err) != HSA_STATUS_SUCCESS) {                                                             \
      const char* msg = 0;                                                                         \
      hsa_status_string(err, &msg);                                                                \
      EXPECT_EQ(HSA_STATUS_SUCCESS, err) << msg;                                                   \
      return (err);                                                                                \
    }                                                                                              \
  \
}

ConcurrentInitTest::ConcurrentInitTest(void) : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  set_title("RocR Concurrent Init Test");
  set_description("This test initializes HSA runtime concurrently");
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
ConcurrentInitTest::~ConcurrentInitTest(void) {
}

// Compare required profile for this test case with what we're actually
// running on
void ConcurrentInitTest::SetUp(void) {
  return;  // hsa runtime initalized pthread callback function
}


// Compare required profile for this test case with what we're actually
// running on
void ConcurrentInitTest::Run(void) {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

// Compare required profile for this test case with what we're actually
// running on
void ConcurrentInitTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void ConcurrentInitTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void ConcurrentInitTest::Close() {
  // TestBase::SetUp() not used.
}

void ConcurrentInitTest::TestConcurrentInit(void) {
  pthread_t ThreadId[NumOfThreads];
  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);  // Setting the attribute to PTHREAD_CREATE_JOINABLE

  // This is to create threads concurrently
  // HSA runtime will be initialized for each thread
  for (int Id = 0; Id < NumOfThreads; ++Id) {
    int ThreadStatus = pthread_create(&ThreadId[Id], &attr, TestHSAInitFunction, nullptr);
    // Check if the thread is created successfully
    // Might want to switch to non-fatal EXPECT_EQ and
    // handle not being able to create so many threads.
    ASSERT_EQ(0, ThreadStatus) << "pthead_create failed.";
  }

  // Wait for workers.
  for (int Id = 0; Id < NumOfThreads; ++Id) {
    int err = pthread_join(ThreadId[Id], nullptr);
    ASSERT_EQ(0, err) << "pthread_join failed.";
  }

  // Invoke hsa_shut_down and verify that all the hsa_init's were counted.
  // HSA should be exactly closed after NumOfThreads calls.
  for (int Id = 0; Id < NumOfThreads; ++Id) {
    hsa_status_t err = hsa_shut_down();
    ASSERT_EQ(HSA_STATUS_SUCCESS, err) << "An hsa_init was missed.";
  }

  hsa_status_t err = hsa_shut_down();
  ASSERT_EQ(HSA_STATUS_ERROR_NOT_INITIALIZED, err) << "hsa_init reference count was too high.";
}
#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/concurrent_init.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_INIT_H_
#define ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_INIT_H_

#include <pthread.h>
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class ConcurrentInitTest : public TestBase {
 public:
    ConcurrentInitTest();

    // @Brief: Destructor for the ConcurrentInitTest class
    virtual ~ConcurrentInitTest();

    // @Brief: Setup the environment for measurement
    virtual void SetUp();

    // @Brief: Core measurement execution
    virtual void Run();

    // @Brief: Clean up and retrive the resource
    virtual void Close();

    // @Brief: Display  results
    virtual void DisplayResults() const;

    // @Brief: Display information about what this test does
    virtual void DisplayTestInfo(void);

    void TestConcurrentInit(void);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_INIT_H_


================================================
FILE: rocrtst/suites/functional/concurrent_init_shutdown.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <iostream>
#include "suites/functional/concurrent_init_shutdown.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static void* TestHSAInitShutdownFunction(void* args) {
  // This is callback function for each thread
  // This will initialize the HSA runtime and shutdown
  hsa_status_t status;

  // Initialize hsa runtime
  status = hsa_init();
  EXPECT_EQ(HSA_STATUS_SUCCESS, status) << "hsa_init failed in worker thread.";

  // Shutdown hsa runtime
  status = hsa_shut_down();
  EXPECT_EQ(HSA_STATUS_SUCCESS, status) << "hsa_shut_down failed in worker thread.";

  pthread_exit(nullptr);
  return nullptr;
}

static const int NumOfThreads = 100;  // Number of thread to be created

#define RET_IF_HSA_ERR(err)                                                                        \
  {                                                                                                \
    if ((err) != HSA_STATUS_SUCCESS) {                                                             \
      const char* msg = 0;                                                                         \
      hsa_status_string(err, &msg);                                                                \
      EXPECT_EQ(HSA_STATUS_SUCCESS, err) << msg;                                                   \
      return (err);                                                                                \
    }                                                                                              \
  \
}

ConcurrentInitShutdownTest::ConcurrentInitShutdownTest(void) : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                        // This is a default value which can be overridden
                        // on the command line.
  set_title("RocR Concurrent Init Test");
  set_description("This test initializes HSA runtime concurrently");
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
ConcurrentInitShutdownTest::~ConcurrentInitShutdownTest(void) {
}

// Compare required profile for this test case with what we're actually
// running on
void ConcurrentInitShutdownTest::SetUp(void) {
  return;  // hsa runtime initalized pthread callback function
}


// Compare required profile for this test case with what we're actually
// running on
void ConcurrentInitShutdownTest::Run(void) {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

// Compare required profile for this test case with what we're actually
// running on
void ConcurrentInitShutdownTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void ConcurrentInitShutdownTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void ConcurrentInitShutdownTest::Close() {
  // TestBase::SetUp() not used.
  return;
}

void ConcurrentInitShutdownTest::TestConcurrentInitShutdown(void) {
  pthread_t ThreadId[NumOfThreads];

  pthread_attr_t attr;
  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

  // This is to create threads concurrently
  // HSA runtime will be initialized and shutdown in each thread
  for (int Id = 0; Id < NumOfThreads; ++Id) {
    int ThreadStatus = pthread_create(&ThreadId[Id], &attr, TestHSAInitShutdownFunction, nullptr);
    // Check if the thread is created successfully
    // Might want to switch to non-fatal EXPECT_EQ and handle not being able to create so many
    // threads.
    ASSERT_EQ(0, ThreadStatus) << "pthead_create failed.";
  }

  // Wait for workers.
  for (int Id = 0; Id < NumOfThreads; ++Id) {
    int err = pthread_join(ThreadId[Id], nullptr);
    ASSERT_EQ(0, err) << "pthread_join failed.";
  }

  // Check that HSA refcount is exact.
  hsa_status_t err = hsa_shut_down();
  ASSERT_EQ(HSA_STATUS_ERROR_NOT_INITIALIZED, err) << "hsa_init reference count was too high.";
}
#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/concurrent_init_shutdown.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_INIT_SHUTDOWN_H_
#define ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_INIT_SHUTDOWN_H_
#include <pthread.h>
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class ConcurrentInitShutdownTest : public TestBase {
 public:
    ConcurrentInitShutdownTest();

    // @Brief: Destructor for the ConcurrentInitShutdownTest class
    virtual ~ConcurrentInitShutdownTest();

    // @Brief: Setup the environment for measurement
    virtual void SetUp();

    // @Brief: Core measurement execution
    virtual void Run();

    // @Brief: Clean up and retrive the resource
    virtual void Close();

    // @Brief: Display  results
    virtual void DisplayResults() const;

    // @Brief: Display information about what this test does
    virtual void DisplayTestInfo(void);

    void TestConcurrentInitShutdown(void);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_INIT_SHUTDOWN_H_


================================================
FILE: rocrtst/suites/functional/concurrent_shutdown.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <iostream>


#include "suites/functional/concurrent_shutdown.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

void* TestHSAShutdownFunction(void* args) {
  // This function called for each thread
  // This will shutdown the HSA runtime concurrently.
  hsa_status_t status;

  // Shutdown the hsa runtime concurrently
  status = hsa_shut_down();
  if (status != HSA_STATUS_SUCCESS) {
    std::cout << "Failed" << std::endl;
  }
  pthread_exit(NULL);
}

static const int NumOfThreads = 1000;  // Number of thread to be created
static const int NumTimesInitalize = 1000;  // Number of time the hsa runtime will be initialized

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

ConcurrentShutdownTest::ConcurrentShutdownTest(void) : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  set_title("RocR Concurrent Shutdown Test");
  set_description("This test initializes HSA runtime sequentially, shutdown concurrently");
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
ConcurrentShutdownTest::~ConcurrentShutdownTest(void) {
}

void ConcurrentShutdownTest::SetUp(void) {
  hsa_status_t status;
  // Initialize the hsa runtime sequentially, NumTimesInitalize
  for (int Counter = 0; Counter < NumTimesInitalize; ++Counter) {
  // Initialize hsa runtime NumTimesInitalize times.
    status = hsa_init();
    if (status != HSA_STATUS_SUCCESS) {
      std::cout << "Failed" << std::endl;
    }
  }
  return;  // hsa runtime initalized pthread callback function
}

void ConcurrentShutdownTest::Run(void) {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }
  TestBase::Run();
}

void ConcurrentShutdownTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void ConcurrentShutdownTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void ConcurrentShutdownTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  // all the reference count decremented in main function, ConcurrentShutdownTest::SequentiallyInitializeRuntime()
}

void ConcurrentShutdownTest::TestConcurrentShutdown(void) {
  pthread_t ThreadId[NumOfThreads];
  pthread_attr_t attr;
  pthread_attr_init(&attr);

  // Setting the attribute to PTHREAD_CREATE_JOINABLE
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

  for (int Id = 0; Id < NumOfThreads; ++Id) {  // This is to create threads concurrently
                                               // HSA runtime will be shutdown concurrently from each thread
    int ThreadStatus = pthread_create(ThreadId + Id,
                                      &attr, TestHSAShutdownFunction, &Id);
    // Check if the thread is created successfully
    if (ThreadStatus < 0) {
      std::cout << Id << "Thread creation failed " << std::endl;
    }
  }
}
#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/concurrent_shutdown.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_SHUTDOWN_H_
#define ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_SHUTDOWN_H_

#include <pthread.h>
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class ConcurrentShutdownTest : public TestBase {
 public:
    ConcurrentShutdownTest();

    // @Brief: Destructor for the ConcurrentShutdownTest class
    virtual ~ConcurrentShutdownTest();

    // @Brief: Setup the environment for measurement
    virtual void SetUp();

    // @Brief: Core measurement execution
    virtual void Run();

    // @Brief: Clean up and retrive the resource
    virtual void Close();

    // @Brief: Display  results
    virtual void DisplayResults() const;

    // @Brief: Display information about what this test does
    virtual void DisplayTestInfo(void);

    // @Brief: Runtime will be initialized Num_Times
    void SequentiallyInitializeRuntime(void);

    void TestConcurrentShutdown(void);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_CONCURRENT_SHUTDOWN_H_


================================================
FILE: rocrtst/suites/functional/cu_masking.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2021-2021, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include "suites/functional/cu_masking.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/os.h"
#include "common/helper_funcs.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

#include <string>
#include <stdlib.h>
#include <algorithm>
#include <random>
#include <chrono>

CU_Masking::CU_Masking() : TestBase() {
  std::string name;
  std::string desc;

  name = "CU Masking";
  desc = "This test checks CU masking functionality via hsa_amd_queue_cu_get(set)_mask and HSA_CU_MASK.";

  set_title(name);
  set_description(desc);

  set_kernel_file_name("cu_mask_kernels.hsaco");
}

void CU_Masking::Run() {
  hsa_status_t err;
  TestBase::Run();

  printf("Running %lu iterations\n", RealIterationNum());

  // Random source
  std::mt19937 rand(std::chrono::system_clock::now().time_since_epoch().count());

  // Store cu masking variable
  std::string mask_var;
  char* temp = getenv("HSA_CU_MASK");
  if(temp!=nullptr)
    mask_var = temp;
  unsetenv("HSA_CU_MASK");

  std::string mask_init_var;
  temp = getenv("HSA_CU_MASK_SKIP_INIT");
  if(temp!=nullptr)
    mask_init_var = temp;
  unsetenv("HSA_CU_MASK_SKIP_INIT");

  // Loop over and test all GPUs
  uint32_t idx = 0;
  while(true) {
    Device* gpu;
    CodeObject* obj;
    Kernel kern;

    struct args_t {
      uint32_t* hw_ids;
      OCLHiddenArgs _;
    };
    args_t* args;

    hsa_signal_t signal;
    hsa_queue_t* q;

    uint32_t cu_count;
    uint32_t group_size;
    uint32_t max_grid_size;
    uint32_t threads;

    auto init = [&]() {
      System::Init();
      if(idx == System::gpu().size())
        return false;

      gpu = &System::gpu()[idx];
      std::string filename = rocrtst::LocateKernelFile(kernel_file_name(), gpu->agent);

      obj = new CodeObject(filename, *gpu);

      err = hsa_agent_get_info(gpu->agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count);
      CHECK(err);

      err = hsa_agent_get_info(gpu->agent, (hsa_agent_info_t)HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &group_size);
      CHECK(err);

      err = hsa_agent_get_info(gpu->agent, (hsa_agent_info_t)HSA_AGENT_INFO_GRID_MAX_SIZE, &max_grid_size);
      CHECK(err);

      uint64_t max_threads = uint64_t(cu_count)*group_size*10;
      threads = max_threads < max_grid_size ? max_threads : max_grid_size;
      threads = (threads / group_size) * group_size;

      // All CU enabled check
      if(!obj->GetKernel("get_hw_id", kern)) {
        ADD_FAILURE();
        return false;
      }

      args = (args_t*)hsaMalloc(sizeof(args_t), System::kernarg());
      memset(args, 0, sizeof(args_t));

      args->hw_ids = (uint32_t*)hsaMalloc(sizeof(uint32_t)*threads, System::kernarg());

      err = hsa_signal_create(1, 0, nullptr, &signal);
      CHECK(err);

      err = hsa_queue_create(gpu->agent, 4096, HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr, 0, 0, &q);
      CHECK(err);

      return true;
    };

    auto fini = [&]() {
      err = hsa_queue_destroy(q);
      CHECK(err);
      err = hsa_signal_destroy(signal);
      CHECK(err);
      err = hsa_memory_free(args->hw_ids);
      CHECK(err);
      err = hsa_memory_free(args);
      CHECK(err);
      delete obj;
      gpu = nullptr;
      System::Shutdown();
    };

    auto dispatch = [&]() {
      memset(args->hw_ids, 0, sizeof(uint32_t)*threads);

      Aql pkt = { };
      pkt.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
      pkt.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
      pkt.header.release = HSA_FENCE_SCOPE_SYSTEM;
      pkt.dispatch.kernel_object = kern.handle;
      pkt.dispatch.private_segment_size = kern.scratch;
      pkt.dispatch.group_segment_size = kern.group;
      pkt.dispatch.setup = 1;
      pkt.dispatch.workgroup_size_x = group_size;
      pkt.dispatch.workgroup_size_y = 1;
      pkt.dispatch.workgroup_size_z = 1;
      pkt.dispatch.grid_size_x = threads;
      pkt.dispatch.grid_size_y = 1;
      pkt.dispatch.grid_size_z = 1;
      pkt.dispatch.kernarg_address = args;
      pkt.dispatch.completion_signal = signal;

      SubmitPacket(q, pkt);

      hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, -1ull, HSA_WAIT_STATE_BLOCKED);
      hsa_signal_store_relaxed(signal, 1);
    };

    auto getHwIds = [&](std::vector<uint32_t>& ids){
      dispatch();
      std::sort(&args->hw_ids[0], &args->hw_ids[threads]);
      uint32_t* end = std::unique(&args->hw_ids[0], &args->hw_ids[threads]);
      ids.clear();
      ids.insert(ids.begin(), &args->hw_ids[0], end);
    };

    // Check fully unconstrained.
    unsetenv("HSA_CU_MASK_SKIP_INIT");
    setenv("HSA_CU_MASK_SKIP_INIT", "1", 1);

    if(!init())
      break;
    
    {
      char name[64];
      hsa_agent_get_info(gpu->agent, HSA_AGENT_INFO_NAME, name);
      name[63]='\0';
      printf("Testing gpu index %u, %s\n", idx, name);
    }

    std::vector<uint32_t> left, right, isect;

    // Check unconstrained cu set.
    getHwIds(left);
    printf("Expecting %u CUs, found %lu with HSA_CU_MASK_SKIP_INIT.\n", cu_count, left.size());
    ASSERT_EQ(cu_count, left.size());
    fini();
    unsetenv("HSA_CU_MASK_SKIP_INIT");

    // Check fully enabled, but mask used, set.
    setenv("HSA_CU_MASK", (std::to_string(idx)+":0-"+std::to_string(cu_count-1)).c_str(), 1);
    init();
    getHwIds(right);
    printf("Expecting %u CUs, found %lu with HSA_CU_MASK.\n", cu_count, right.size());
    if(cu_count != right.size()) {
      isect.resize(left.size());
      auto isect_end = std::set_difference(left.begin(), left.end(), right.begin(), right.end(), isect.begin());
      isect.resize(isect_end - isect.begin());
      printf("Missing CUs: ");
      for(auto cu : isect)
        printf("%u ", cu);
      printf("\n");
    }
    ASSERT_EQ(cu_count, right.size());
    fini();
    unsetenv("HSA_CU_MASK");

    // Check rocr default mask.
    init();
    getHwIds(right);
    printf("Expecting %u CUs, found %lu.\n", cu_count, right.size());
    if(cu_count != right.size()) {
      isect.resize(left.size());
      auto isect_end = std::set_difference(left.begin(), left.end(), right.begin(), right.end(), isect.begin());
      isect.resize(isect_end - isect.begin());
      printf("Missing CUs: ");
      for(auto cu : isect)
        printf("%u ", cu);
      printf("\n");
    }
    ASSERT_EQ(cu_count, right.size());
    fini();

    std::vector<uint32_t> bits;
    for(uint32_t i=0; i<cu_count; i++)
      bits.push_back(i);
    
    std::vector<uint32_t> bitmask, resultmask;
    uint32_t dwords = (cu_count + 31) / 32;

    bitmask.resize(dwords);
    resultmask.resize(dwords);

    for(size_t iteration=0; iteration<RealIterationNum(); iteration++) {

      auto setBits = [&](uint32_t start, uint32_t stop, std::vector<uint32_t>& array) {
        assert(array.size() == dwords && "Bitmask array has incorrect size.");
        for(uint32_t i=0; i<dwords; i++)
          array[i] = 0;
        for(uint32_t i=start; i<stop; i++) {
          int dword = bits[i] / 32;
          int offset = bits[i] % 32;
          array[dword] |= (1 << offset);
        }
      };

      auto getMasks = [&](uint32_t start, uint32_t stop, std::vector<uint32_t>& hw_ids) {
        setBits(start, stop, bitmask);
        err = hsa_amd_queue_cu_set_mask(q, dwords*32, &bitmask[0]);
        if((err!=HSA_STATUS_SUCCESS) && (err!=(hsa_status_t)HSA_STATUS_CU_MASK_REDUCED))
          CHECK(err);
        err = hsa_amd_queue_cu_get_mask(q, dwords*32, &resultmask[0]);
        CHECK(err);
        getHwIds(hw_ids);
      };

      auto getIsect = [&]() {
        isect.resize(left.size());
        auto isect_end = std::set_intersection(left.begin(), left.end(), right.begin(), right.end(), isect.begin());
        isect.resize(isect_end - isect.begin());
      };

      auto printMask = [](std::vector<uint32_t>& mask) {
        printf("0x");
        for(size_t i=1; i<mask.size()+1; i++)
          printf("%08X", mask[mask.size()-i]);
      };

      auto printMasks = [&]() {
        printf("Set mask: ");
        printMask(bitmask);
        printf("\n");
        printf("Get mask: ");
        printMask(resultmask);
        printf("\n");
      };

      // CU set API check, no overlap
      std::shuffle(bits.begin(), bits.end(), rand);
      uint32_t split_index = (rand() % (cu_count - 2)) + 1;

      init();

      getMasks(0, split_index, left);
      printMasks();
      printf("Observed %lu CUs.\n", left.size());
      for(uint32_t i=0; i<dwords; i++)
        ASSERT_EQ(bitmask[i], resultmask[i]);
      ASSERT_EQ(split_index, left.size());

      getMasks(split_index, cu_count, right);
      printMasks();
      printf("Observed %lu CUs.\n", right.size());
      for(uint32_t i=0; i<dwords; i++)
        ASSERT_EQ(bitmask[i], resultmask[i]);
      ASSERT_EQ(cu_count-split_index, right.size());

      getIsect();
      printf("Overlap of %lu CUs.\n", isect.size());
      ASSERT_EQ(0u, isect.size());
      
      // CU set API check, overlap possible
      uint32_t high_split_index = (rand() % (cu_count - 2)) + 1;

      if(high_split_index < split_index)
        std::swap(high_split_index, split_index);

      getMasks(0, high_split_index, left);
      printMasks();
      printf("Observed %lu CUs.\n", left.size());
      for(uint32_t i=0; i<dwords; i++)
        ASSERT_EQ(bitmask[i], resultmask[i]);
      ASSERT_EQ(high_split_index, left.size());

      getMasks(split_index, cu_count, right);
      printMasks();
      printf("Observed %lu CUs.\n", right.size());
      for(uint32_t i=0; i<dwords; i++)
        ASSERT_EQ(bitmask[i], resultmask[i]);
      ASSERT_EQ(cu_count-split_index, right.size());

      getIsect();
      printf("Overlap of %lu CUs.\n", isect.size());
      ASSERT_EQ(high_split_index - split_index, isect.size());
      
      // HSA_CU_MASK check, default
      fini();
      
      // Pick masking bits for env var
      std::shuffle(bits.begin(), bits.end(), rand);
      uint32_t mask_index = (rand() % (cu_count - 2)) + 1;
      std::vector<uint32_t> env_mask(&bits[0], &bits[mask_index]);

      // Convert to string range syntax
      std::sort(env_mask.begin(), env_mask.end());
      uint32_t start, stop;
      start=stop=env_mask[0];
      std::vector<std::string> ranges;
      // Append invalid bit so that final loop will emit the last range.
      env_mask.push_back(-1);
      for(size_t j=1; j<env_mask.size(); j++) {
        uint32_t index = env_mask[j];
        if(index != stop+1) {
          if(start==stop)
            ranges.push_back(std::to_string(start));
          else
            ranges.push_back(std::to_string(start)+"-"+std::to_string(stop));
          start=stop=index;
        } else {
          stop = index;
        }
      }
      env_mask.pop_back();
      // Shuffle ranges
      std::shuffle(ranges.begin(), ranges.end(), rand);
      // Assemble final env var string.
      std::string env_var = std::to_string(idx) + ":";
      env_var += ranges[0];
      for(uint32_t i=1; i<ranges.size(); i++)
        env_var += ", " + ranges[i];

      // Set env var and check that default queues are masked.
      //env_var = "0:41-44, 104-107, 47-50, 67-68, 77-100, 61, 102, 19-24, 109, 70-75, 52-59, 63-65, 0-17, 27-39";
      setenv("HSA_CU_MASK", env_var.c_str(), 1);
      printf("HSA_CU_MASK = %s\n", env_var.c_str());
      env_mask.clear();
      env_mask.resize(dwords);
      setBits(0, mask_index, env_mask);
      printf("  HSA_CU_MASK => ");
      printMask(env_mask);
      printf("\n");

      init();
      
      getHwIds(left);
      printf("Expecting %u CUs, found %lu\n", mask_index, left.size());
      ASSERT_EQ(left.size(), mask_index);

      // Check that HSA_CU_MASK constrains the API
      // Find at least partially enabled CU mask.
      [&]() {
        while(true) {
          std::shuffle(bits.begin(), bits.end(), rand);
          split_index = (rand() % (cu_count - 2)) + 1;
          setBits(0, split_index, bitmask);
          for(uint32_t i=0; i<dwords; i++) {
            if((bitmask[i] & env_mask[i]) != 0)
              return;
          }
        }
      }();

      getMasks(0, split_index, left);
      printMasks();
      printf("Observed %lu CUs.\n", left.size());
      uint32_t enabledCus = 0;
      for(uint32_t i=0; i<dwords; i++) {
        bitmask[i] &= env_mask[i];
        enabledCus += rocrtst::popcount(bitmask[i]);
        ASSERT_EQ(bitmask[i], resultmask[i]);
      }
      ASSERT_EQ(enabledCus, left.size());
      ASSERT_LE(enabledCus, mask_index);

      fini();
      unsetenv("HSA_CU_MASK");

      // Todo: Hex syntax.  Syntax errors.  Above hw limit bits.

    }
    idx++;
  }

  if(!mask_var.empty())
    setenv("HSA_CU_MASK", mask_var.c_str(), 1);
  if(!mask_init_var.empty())
    setenv("HSA_CU_MASK_SKIP_INIT", mask_var.c_str(), 1);
}


================================================
FILE: rocrtst/suites/functional/cu_masking.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2021-2021, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_CU_MASKING_H_
#define ROCRTST_SUITES_FUNCTIONAL_CU_MASKING_H_
#include <vector>

#include "suites/test_common/test_base.h"
#include "common/base_rocr.h"
#include "common/common.h"

// @Brief: This class is defined to measure the mean latency of enqueuing
//  the packets to an empty kernel

class CU_Masking : public TestBase {
 public:
  // @Brief: Constructor
  explicit CU_Masking();

  // @Brief: Destructor
  virtual ~CU_Masking() {}

  // @Brief: Set up the environment for the test
  virtual void SetUp() { TestBase::SetupPrint(); }

  // @Brief: Run the test case
  virtual void Run();

  // @Brief: Clean up and close the runtime
  virtual void Close() { TestBase::ClosePrint(); }

 private:
  // @Brief: Get actual iteration number
  virtual size_t RealIterationNum() { return num_iteration(); }
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_CU_MASKING_H_


================================================
FILE: rocrtst/suites/functional/deallocation_notifier.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/* Test Name: deallocation_notifier
 *
 * Purpose: Verifies that deallocation callbacks are invoked prior to destruction,
 * are not retained between successive allocations, may be registered on non-base
 * addresses, are invoked exactly once, run concurrently with other APIs, and other
 * callbacks.
 *
 * Test Description:
 * Various interleavings of allocate, register callback, deregister callback, and deallocate.
 *
 * Expected Results: Callbacks should run before free returns.  Callbacks should trigger when
 * their allocation is released.  Free shoud deregister invoked callbacks.  Callbacks should not
 * be able to double free the allocation they monitor.  Callbacks should be able to execute
 * ROCr APIs including hsa_amd_memory_pool_allocate and hsa_amd_memory_pool_free, possibly
 * triggering other callbacks.
 *
 */
#include "suites/functional/deallocation_notifier.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

struct callback_status {
  int callback_status = 0;
  void* released_ptr = nullptr;
};

static callback_status notifiers[2];
static hsa_amd_memory_pool_t pool;

#define REGISTER(ptr, callback, i)                                                                 \
  do {                                                                                             \
    notifiers[i].callback_status = 0;                                                              \
    notifiers[i].released_ptr = ptr;                                                               \
    status = hsa_amd_register_deallocation_callback(ptr, callback, (void*)i);                      \
    ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Register deallocation callback error.";              \
  } while (false)

static void call(void* ptr, void* user) {
  size_t index = reinterpret_cast<size_t>(user);
  ASSERT_EQ(ptr, notifiers[index].released_ptr) << "Bad deallocation callback address";
  notifiers[index].callback_status = 1;
}

static void doublefree(void* ptr, void* user) {
  call(ptr, user);

  hsa_status_t status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_ERROR_INVALID_ALLOCATION, status) << "Double free did not return an error.";
}

static void recursive(void* ptr, void* user) {
  ASSERT_EQ(0, user) << "Wrong index.";
  call(ptr, user);

  hsa_status_t status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER(ptr, call, 1);
  hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(1, notifiers[1].callback_status) << "Callback not executed.";
}

DeallocationNotifierTest::DeallocationNotifierTest() : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  set_title("RocR Deallocation Notifier Test");
  set_description("Tests deallocation notification callbacks");
}

DeallocationNotifierTest::~DeallocationNotifierTest(void) {}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void DeallocationNotifierTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  pool = device_pool();

  return;
}

void DeallocationNotifierTest::Run(void) {
// Compare required profile for this test case with what we're actually
// running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
  TestDeallocationNotifier();
}

void DeallocationNotifierTest::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); }

void DeallocationNotifierTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void DeallocationNotifierTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

void DeallocationNotifierTest::TestDeallocationNotifier(void) {
  hsa_status_t status;

  // Attempt register on null address.  Should fail.
  void* ptr = nullptr;
  status = hsa_amd_register_deallocation_callback(ptr, call, (void*)0xDEADBEEF);
  ASSERT_EQ(HSA_STATUS_ERROR_INVALID_ARGUMENT, status) << "Register deallocation callback error.";

  // Attempt register on bad address (ie one not known to ROCr).  Should fail.
  ptr = malloc(4096);
  status = hsa_amd_register_deallocation_callback(ptr, call, (void*)0xDEADBEEF);
  free(ptr);
  ASSERT_EQ(HSA_STATUS_ERROR_INVALID_ALLOCATION, status) << "Register deallocation callback error.";

  // Allocate, register and free.  Callback should complete before free returns.
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER(ptr, call, 0);
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[0].callback_status) << "Callback not executed.";

  // Re-allocate, free.  No callback should be invoked.
  notifiers[0].callback_status = 0;
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(0, notifiers[0].callback_status) << "Callback reused.";

  // Allocate, register with non-base address, free.
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER((char*)ptr + 1024, call, 0);
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[0].callback_status) << "Callback not executed.";

  // Allocate, Register, Deregister, Free.  No callback should be invoked.
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER((char*)ptr + 1024, call, 0);
  status = hsa_amd_deregister_deallocation_callback((char*)ptr + 1024, call);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Deregister deallocation callback error.";
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(0, notifiers[0].callback_status) << "Callback reused.";

  // Allocate, register, register another and free.  Callbacks should complete before free returns.
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER(ptr, call, 0);
  REGISTER((char*)ptr + 1024, call, 1);
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[0].callback_status) << "Callback not executed.";
  ASSERT_EQ(1, notifiers[1].callback_status) << "Callback not executed.";

  // Repeat deregister.  Should error.
  status = hsa_amd_deregister_deallocation_callback((char*)ptr + 1024, call);
  ASSERT_EQ(HSA_STATUS_ERROR_INVALID_ARGUMENT, status) << "Deregister deallocation callback error.";

  // Deregister from null.  Should error.
  status = hsa_amd_deregister_deallocation_callback(nullptr, call);
  ASSERT_EQ(HSA_STATUS_ERROR_INVALID_ARGUMENT, status) << "Deregister deallocation callback error.";

  // Allocate fragment (second <2MB vram allocation), register, free.
  void* ptr0;
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr0);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER(ptr, call, 0);
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[0].callback_status) << "Callback not executed.";

  // Allocate multiple fragments, register, free.  Free order should be respected by callbacks.
  // Reuse fragment ptr0 from above.
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER(ptr, call, 0);
  REGISTER(ptr0, call, 1);
  status = hsa_amd_memory_pool_free(ptr0);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[1].callback_status) << "Callback not executed.";
  ASSERT_EQ(0, notifiers[0].callback_status) << "Callback executed improperly.";
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[0].callback_status) << "Callback not executed.";

  // Allocate, register, free, with double free in callback.  Callbacks should not be able to free
  // the triggering address again.
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER(ptr, doublefree, 0);
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[0].callback_status) << "Callback not executed.";

  // Allocate, register, free, with allocate, register, free in callback.  Callbacks should nest and
  // have access to HSA APIs.
  status = hsa_amd_memory_pool_allocate(pool, 4096, 0, &ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory allocation failure.";
  REGISTER(ptr, recursive, 0);
  status = hsa_amd_memory_pool_free(ptr);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status) << "Memory free failure.";
  ASSERT_EQ(1, notifiers[0].callback_status) << "Callback not executed.";
}


================================================
FILE: rocrtst/suites/functional/deallocation_notifier.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_DEALLOCATION_NOTIFIER_H_
#define ROCRTST_SUITES_FUNCTIONAL_DEALLOCATION_NOTIFIER_H_

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class DeallocationNotifierTest : public TestBase {
 public:
  DeallocationNotifierTest();

  // @Brief: Destructor for the DeallocationNotifierTest class
  virtual ~DeallocationNotifierTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // @Brief: Tests deallocation notifier callbacks.
  void TestDeallocationNotifier(void);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_DEALLOCATION_NOTIFIER_H_


================================================
FILE: rocrtst/suites/functional/debug_basic.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>

#include "suites/functional/debug_basic.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

#define M_ORDER 64
#define M_GET(M, I, J) M[I * M_ORDER + J]
#define M_SET(M, I, J, V) M[I * M_ORDER + J] = V

static const uint32_t kNumBufferElements = 256;
typedef struct test_debug_data_t {
  bool trap_triggered;
  hsa_queue_t** queue_pointer;
} test_debug_data;

static void TestDebugTrap(hsa_status_t status, hsa_queue_t *source, void *data);

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

DebugBasicTest::DebugBasicTest(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.

  set_title("RocR Debug Function Tests");
  set_description("This series of tests check debug related functions.");
  set_kernel_file_name("vector_add_debug_trap_kernels.hsaco");
  set_kernel_name("vector_add_debug_trap");
}

DebugBasicTest::~DebugBasicTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void DebugBasicTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}

void DebugBasicTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void DebugBasicTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void DebugBasicTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void DebugBasicTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

typedef struct __attribute__((aligned(16))) arguments_t {
  const int *a;
  const int *b;
  const int *c;
  int *d;
  int *e;
} arguments;

arguments *vectorAddKernArgs = NULL;

static const char kSubTestSeparator[] = "  **************************";

static void PrintDebugSubtestHeader(const char *header) {
  std::cout << "  *** Debug Basic Subtest: " << header << " ***" << std::endl;
}

void DebugBasicTest::VectorAddDebugTrapTest(hsa_agent_t cpuAgent,
                                            hsa_agent_t gpuAgent) {
  hsa_status_t err;
  hsa_queue_t *queue = NULL;  // command queue
  hsa_signal_t signal = {0};  // completion signal

  int *M_IN0 = NULL;
  int *M_IN1 = NULL;
  int *M_RESULT_DEVICE = NULL;
  int M_RESULT_HOST[M_ORDER * M_ORDER];

  // get queue size
  uint32_t queue_size = 0;
  err = hsa_agent_get_info(gpuAgent,
                           HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  test_debug_data user_data{.trap_triggered = false,
                            .queue_pointer = &queue};

  // create queue
  err = hsa_queue_create(gpuAgent,
                         queue_size, HSA_QUEUE_TYPE_MULTI,
                         TestDebugTrap, &user_data, 0, 0, &queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Find a memory pool that supports kernel arguments.
  hsa_amd_memory_pool_t kernarg_pool;
  err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                           rocrtst::GetKernArgMemoryPool,
                                           &kernarg_pool);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Get System Memory Pool on the cpuAgent to allocate host side buffers
  hsa_amd_memory_pool_t global_pool;
  err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                           rocrtst::GetGlobalMemoryPool,
                                           &global_pool);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // allocate input and output kernel arguments
  err = hsa_amd_memory_pool_allocate(global_pool,
                                     M_ORDER * M_ORDER * sizeof(int), 0,
                                     reinterpret_cast<void**>(&M_IN0));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_memory_pool_allocate(global_pool,
                                     M_ORDER * M_ORDER * sizeof(int), 0,
                                     reinterpret_cast<void**>(&M_IN1));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_memory_pool_allocate(global_pool,
                                     M_ORDER * M_ORDER * sizeof(int), 0,
                                     reinterpret_cast<void**>(&M_RESULT_DEVICE));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // create kernel arguments
  err = hsa_amd_memory_pool_allocate(kernarg_pool,
                                     sizeof(arguments), 0,
                                     reinterpret_cast<void**>(&vectorAddKernArgs));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Allow gpuAgent access to all allocated system memory.
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, M_IN0);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, M_IN1);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, M_RESULT_DEVICE);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, vectorAddKernArgs);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  memset(M_RESULT_HOST, 0, M_ORDER * M_ORDER * sizeof(int));
  memset(M_RESULT_DEVICE, 0, M_ORDER * M_ORDER * sizeof(int));

  vectorAddKernArgs->a = M_IN0;
  vectorAddKernArgs->b = M_IN1;
  vectorAddKernArgs->c = M_RESULT_DEVICE;

  // initialize input and run on host
  srand(time(NULL));
  for (int i = 0; i < M_ORDER; ++i) {
    for (int j = 0; j < M_ORDER; ++j) {
      M_SET(M_IN0, i, j, (1 + rand() % 10));
      M_SET(M_IN1, i, j, (1 + rand() % 10));
    }
  }

  for (int i = 0; i < M_ORDER; ++i) {
    for (int j = 0; j < M_ORDER; ++j) {
      int s = M_GET(M_IN0, i, j) + M_GET(M_IN1, i, j);
      M_SET(M_RESULT_HOST, i, j, s);
    }
  }

  // Create the executable, get symbol by name and load the code object
  err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill the dispatch packet with
  // workgroup_size, grid_size, kernelArgs and completion signal
  // Put it on the queue and launch the kernel by ringing the doorbell

  // create completion signal
  err = hsa_signal_create(1, 0, NULL, &signal);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // create aql packet
  hsa_kernel_dispatch_packet_t aql;
  memset(&aql, 0, sizeof(aql));

  // initialize aql packet
  aql.header = 0;
  aql.setup = 1;
  aql.workgroup_size_x = 64;
  aql.workgroup_size_y = 1;
  aql.workgroup_size_z = 1;
  aql.grid_size_x = M_ORDER * M_ORDER;
  aql.grid_size_y = 1;
  aql.grid_size_z = 1;
  aql.private_segment_size = 0;
  aql.group_segment_size = 0;
  aql.kernel_object = kernel_object();  // kernel_code;
  aql.kernarg_address = vectorAddKernArgs;
  aql.completion_signal = signal;

  // const uint32_t queue_size = queue->size;
  const uint32_t queue_mask = queue->size - 1;

  // write to command queue
  uint64_t index = hsa_queue_load_write_index_relaxed(queue);

  hsa_queue_store_write_index_relaxed(queue, index + 1);

  rocrtst::WriteAQLToQueueLoc(queue, index, &aql);

  uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
  aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
                HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
  aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
                HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

  void* q_base = queue->base_address;
  rocrtst::AtomicSetPacketHeader(aql_header, aql.setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);

  // ringdoor bell
  hsa_signal_store_relaxed(queue->doorbell_signal, index);

  // wait for the signal long enough for the debug trap event to happen
  hsa_signal_value_t completion;
  completion = hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1,
                                         0xffffff, HSA_WAIT_STATE_ACTIVE);

  // completion signal should not be changed.
  ASSERT_EQ(completion, 1);

  // trap should be triggered
  ASSERT_EQ(user_data.trap_triggered, true);

  hsa_signal_store_relaxed(signal, 1);

  if (M_IN0) { hsa_memory_free(M_IN0); }
  if (M_IN1) { hsa_memory_free(M_IN1); }
  if (M_RESULT_DEVICE) {hsa_memory_free(M_RESULT_DEVICE); }
  if (vectorAddKernArgs) { hsa_memory_free(vectorAddKernArgs); }
  if (signal.handle) { hsa_signal_destroy(signal); }
  if (queue) { hsa_queue_destroy(queue); }
  std::cout << kSubTestSeparator << std::endl;
}

void DebugBasicTest::VectorAddDebugTrapTest(void) {
  hsa_status_t err;

  PrintDebugSubtestHeader("VectorAddDebugTrapTest");

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    VectorAddDebugTrapTest(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void TestDebugTrap(hsa_status_t status, hsa_queue_t *source, void *data) {
  std::cout<< "runtime catched trap instruction successfully"<< std::endl;
  ASSERT_NE(source, nullptr);
  ASSERT_NE(data, nullptr);

  test_debug_data *debug_data = reinterpret_cast<test_debug_data*>(data);
  hsa_queue_t * queue  = *(debug_data->queue_pointer);
  debug_data->trap_triggered = true;
  // check the status
  ASSERT_EQ(status, HSA_STATUS_ERROR_EXCEPTION);

  // check the queue id and user data
  ASSERT_EQ(source->id, queue->id);
  std::cout<< "custom queue error handler completed successfully"<< std::endl;
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/debug_basic.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_DEBUG_BASIC_H_
#define ROCRTST_SUITES_FUNCTIONAL_DEBUG_BASIC_H_

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class DebugBasicTest : public TestBase {
 public:
    DebugBasicTest();

  // @Brief: Destructor for test case of MemoryTest
  virtual ~DebugBasicTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // @Brief: This test verify that GPU is able to Read & write CPU memory
  void VectorAddDebugTrapTest(void);

 private:
  void VectorAddDebugTrapTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_DEBUG_BASIC_H_


================================================
FILE: rocrtst/suites/functional/ipc.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

//
//        Parent Process
//  Allocate a block of gpu-local memory
//  Print log message about allocation
//  Acquire access to gpu-local memory
//    This step may not be needed
//  Obtain a IPC handle for gpu-local memory
//  Print log message about getting IPC handle
//  Initialize DWords of gpu-local memory with 0x01
//  Print log message about updating gpu-local memory
//  Create a Signal that is capable of IPC
//  Obtain a IPC handle to signal
//  Print log message about signalling Child process
//  Signal Child process that it can proceed
//  Print log message about waiting for signal from Child process
//  Wait for Child processes signal
//  Verify Child has updated DWords of gpu-local memory to 0x02
//  Print log message about validation of gpu-local memory
//  Set the DWords of gpu-local memory with 0x03
//  Signal Child process that it can proceed  by setting signal to 3
//  Wait for Child processes signal
//  Verify Child has updated DWords of gpu-local memory to 0x04
//  Print log message that IPC test passed
//
//        Child Process
//  Print log message about waiting for signal from Parent process
//  Wait/Yield for Parent process signal
//  Validate Parent process signal is per expectation
//  Attach to IPC memory handle shared by Parent process
//  Print log message about successful acquisition of IPC memory handle
//  Print log message about successful acquisition of IPC signal handle
//  Verify Parent process has updated every DWord of Gpu buffer to 0x01
//  Update every DWord of Gpu buffer with 0x02 value
//  Print log message about validation of Gpu buffer state i.e every DWord has 0x01
//  Register a callback using hsa_amd_signal_async_handler on the ipc signal
//    - the callback function will update gpu-local memory DWords to 0x04
//    - and update a local token to indicate that the callback happened.
//  Signal the parent process that it can proceed by setting signal to 2
//  Wait for callback function to update the local token.
//  Signal the parent process that it can proceed by setting signal to 4
//  Wait for parent to set signal to 0 to indicate that it can clean-up and exit.
//
// The comments provided below are focused more on the use of common rocrtst
// utilities and boilerplate code, rather than the example app. itself.
//

#include <sys/mman.h>

#include <algorithm>
#include <vector>
#include <atomic>

#include "suites/functional/ipc.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const uint32_t kNumBufferElements = 256;

struct callback_args {
  hsa_agent_t host;
  hsa_agent_t device;
  hsa_amd_memory_pool_t cpu_pool;
  hsa_amd_memory_pool_t gpu_pool;
  size_t gpu_mem_granule;
};

// Wrap printf to add first or second process indicator
#define PROCESS_LOG(format, ...)  { \
    if (verbosity() >= VERBOSE_STANDARD || !parentProcess_) { \
      fprintf(stdout, "line:%d P%u: " format, \
                   __LINE__, static_cast<int>(!parentProcess_), ##__VA_ARGS__); \
    } \
}

// Fork safe ASSERT_EQ.
#define MSG(y, msg, ...) msg
#define Y(y, ...) y

#define FORK_ASSERT_EQ(x, ...)                                                    \
  if ((x) != (Y(__VA_ARGS__))) {                                                  \
    if ((x) != (Y(__VA_ARGS__))) {                                                \
      std::cout << MSG(__VA_ARGS__, "");                                          \
      if (parentProcess_) {                                                       \
        shared_->parent_status = -1;                                              \
      } else {                                                                    \
        shared_->child_status = -1;                                               \
      }                                                                           \
      ASSERT_EQ(x, Y(__VA_ARGS__));                                               \
    }                                                                             \
  }

#define USR_TRIGGERED_FAILURE(x, y, z)                                            \
  if (usr_fail_val_ == (z)) {                                                     \
    std::cout << "Env value is: " << z << std::endl;                              \
    std::cout << "Return value before: " << x << std::endl;                       \
    std::cout << "Return value  after: " << y << std::endl << std::flush;         \
    (x) = (y);                                                                    \
  }

IPCTest::IPCTest(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  set_title("IPC Test");
  set_description("IPCTest verifies that the IPC feature of RocR is "
      "functioning as expected. The test first forks off second process. The "
      "2 processes share pointers to RocR allocated memory and also share "
      "signal handles");
}

IPCTest::~IPCTest(void) {
}

// See if the other process wrote an error value to the token; if not, write
// the newVal to the token.
static int CheckAndSetToken(std::atomic<int> *token, int newVal) {
  if (*token == -1) {
    return -1;
  } else {
    *token = newVal;
  }

  return 0;
}

static void ClearShared(Shared *s) {
  s->token = 0;
  s->count = 0;
  s->size = 0;
  s->child_status = 0;
  s->parent_status = 0;
  memset(&s->handle.handle, 0, sizeof(hsa_amd_ipc_memory_t));
  memset(&s->signal_handle, 0, sizeof(hsa_amd_ipc_signal_t));
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void IPCTest::SetUp(void) {
  hsa_status_t err;

  // Allow user to trigger a failure
  const char* env_val = getenv("ROCR_IPC_FAIL_KEY");
  if (env_val != NULL) {
    usr_fail_val_ = atoi(env_val);
  }

  // We must fork process before doing HSA stuff, specifically, hsa_init, as
  // each process needs to do this.
  // Allocate linux shared_ memory.
  shared_ = reinterpret_cast<Shared*>(
      mmap(nullptr, sizeof(Shared), PROT_READ | PROT_WRITE,
                                          MAP_SHARED | MAP_ANONYMOUS, -1, 0));
  ASSERT_NE(shared_, MAP_FAILED) << "mmap failed to allocated shared_ memory";

  // Initialize shared control block to zeros. The field "token"
  // is used to signal state changes between the 2 processes.
  ClearShared(shared_);

  // Spawn second process and verify communication
  child_ = 0;
  child_ = fork();
  ASSERT_NE(-1, child_) << "fork failed";
  std::atomic<int> * token = &shared_->token;
  if (child_ != 0) {
    parentProcess_ = true;

    // Signal to other process we are waiting, and then wait...
    *token = 1;
    while (*token == 1) {
      sched_yield();
    }

    PROCESS_LOG("Second process observed, handshake...\n");
    *token = 1;
    while (*token == 1) {
      sched_yield();
    }

  } else {
    parentProcess_ = false;
    set_verbosity(0);
    PROCESS_LOG("Second process running.\n");

    while (*token == 0) {
      sched_yield();
    }

    int ret;
    ret = CheckAndSetToken(token, 0);
    ASSERT_EQ(0, ret) << "Error detected in child process\n";
    // Wait for handshake
    while (*token == 0) {
      sched_yield();
    }
    ret = CheckAndSetToken(token, 0);
    ASSERT_EQ(0, ret) << "Error detected in child process\n";
  }
  // TestBase::SetUp() will set HSA_ENABLE_INTERRUPT if enable_interrupt() is
  // true, and call hsa_init(). It also prints the SetUp header.
  TestBase::SetUp();

  // SetDefaultAgents(this) will assign the first CPU and GPU found on
  // iterating through the agents and assign them to cpu_device_ and
  // gpu_device1_, respectively (cpu_device() and gpu_device1()). These
  // BaseRocR member variables are used in some utilities. Additionally,
  // SetDefaultAgents() checks the profile of the gpu and compares this
  // to any required profile.
  err = rocrtst::SetDefaultAgents(this);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Find and assign HSA_AMD_SEGMENT_GLOBAL pools for cpu, gpu and a kern_arg
  // pool
  err = rocrtst::SetPoolsTypical(this);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

// Update the size granularity for allocations
#ifdef ROCRTST_EMULATOR_BUILD
  gpu_mem_granule = 4;
#else
  err = hsa_amd_memory_pool_get_info(device_pool(), HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
                                     &gpu_mem_granule);
#endif

  return;
}

// Do a few extra iterations as we toss out some of the inital and final
// iterations when calculating statistics
uint32_t IPCTest::RealIterationNum(void) {
  return num_iteration() * 1.2 + 1;
}

/*
 * if the hsa_signal_value_t value matches sig_value, and
 * then set destination to
 * new value.
 */
struct signal_cb_handler_data {
  IPCTest *obj;
  hsa_signal_value_t exp_sig_value;
  uint32_t exp_value;
  uint32_t *destination;
  uint32_t new_value;
  std::atomic<int> token;
};

bool SignalCallbackHandler(hsa_signal_value_t value, void* arg) {
  signal_cb_handler_data* cb_data = reinterpret_cast<signal_cb_handler_data*>(arg);
  if (cb_data->exp_sig_value != value)
    return false;

  cb_data->obj->CheckAndFillBuffer(cb_data->destination, cb_data->exp_value, cb_data->new_value);
  cb_data->token++;

  /* return false to stop monitoring this callback */
  return false;
}

void IPCTest::ChildProcessImpl() {

  // Yield until shared token value changes i.e. is updated by parent.
  // Validate parent's update is per expectation
  PROCESS_LOG("Child: Waiting for parent process to signal\n");
  while (shared_->token == 0) {
    sched_yield();
  }
  if (shared_->token != 1) {
    shared_->token = -1;
  }
  FORK_ASSERT_EQ(1, shared_->token, "Child: Error detected in signaling token\n");
  PROCESS_LOG("Child: Waking upon signal from parent process\n");

  // List of devices involved in test. Gpu device is used
  // to allocate buffer and signal that are part of an IPC
  // transaction. Cpu is used in support of initialization
  // of Gpu buffer
  hsa_agent_t ag_list[2] = {*gpu_device1(), *cpu_device()};

  // Attach to IPC memory handle shared by parent process
  void* ipc_ptr;
  hsa_status_t err;
  err = hsa_amd_ipc_memory_attach(const_cast<hsa_amd_ipc_memory_t*>(&shared_->handle),
                                  shared_->size, 1, ag_list, &ipc_ptr);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 200);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Child: Failure in attaching to IPC memory handle\n");
  PROCESS_LOG("Child: Attached to IPC buffer shared by parent process\n");
  PROCESS_LOG("Child: Address of buffer enabled for IPC: %p\n", ipc_ptr);

  // Attach to IPC signal handle shared by parent process
  hsa_signal_t ipc_signal;
  err = hsa_amd_ipc_signal_attach(const_cast<hsa_amd_ipc_signal_t*>(&shared_->signal_handle),
                                  &ipc_signal);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 201);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Child: Failure in attaching to IPC signal handle\n");
  PROCESS_LOG("Child: Attached to IPC signal shared by parent process\n");

  // Validate Gpu buffer is filled per expectation i.e. if so update
  // per previously agreed upon value (first_val_ and second_val_)
  CheckAndFillBuffer(reinterpret_cast<uint32_t*>(ipc_ptr), first_val_, second_val_);
  PROCESS_LOG("Child: Confirmed DWord's of IPC buffer has: %d\n", first_val_);
  PROCESS_LOG("Child: Updated DWord's of IPC buffer to: %d\n", second_val_);

  // Register an async handler, we wait for parent process to set buffer value to
  // third_val_. During the callback, SignalCallbackHandler  will set cb_result
  // to fourth_val_ and increment cb_data->token
  struct signal_cb_handler_data child_cb_data;
  child_cb_data.obj = this;
  child_cb_data.exp_sig_value = 3;
  child_cb_data.exp_value = third_val_;
  child_cb_data.destination = reinterpret_cast<uint32_t*>(ipc_ptr);
  child_cb_data.new_value = fourth_val_;
  child_cb_data.token = 0;

  err = hsa_amd_signal_async_handler(ipc_signal, HSA_SIGNAL_CONDITION_GTE, 3, &SignalCallbackHandler, &child_cb_data);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 202);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Child: Failure registering async_handler to ipc_signal\n");
  PROCESS_LOG("Child: [pid:%d] Attached async handler to IPC signal shared by parent process\n", getpid());

  // Signal parent process to wake up and continue.
  // The next time parent process updates ipc_signal, SignalCallbackHandler will
  // be called
  hsa_signal_store_release(ipc_signal, 2);

  // Wait for SignalCallbackHandler to be called
  while (child_cb_data.token <= 0)
    sched_yield();

  PROCESS_LOG("Child: Confirmed DWord's of IPC buffer has: %d\n", third_val_);
  PROCESS_LOG("Child: Updated DWord's of IPC buffer to: %d\n", fourth_val_);

  // Signal parent process to wake up and continue
  hsa_signal_store_release(ipc_signal, 4);

  hsa_signal_value_t ret = 1;
  while(true) {
    ret = hsa_signal_wait_acquire(ipc_signal, HSA_SIGNAL_CONDITION_LT, 0, timeout_, HSA_WAIT_STATE_BLOCKED);
    if (shared_->child_status == -1) {
      exit(0);
    }
    if (ret < 0) {
      break;
    }
  }
  USR_TRIGGERED_FAILURE(ret, HSA_STATUS_ERROR, 203);
  FORK_ASSERT_EQ(-1, ret, "Child: Expected signal value of 0, but got " << ret << "\n");

  // Detach IPC memory that was used to test
  err = hsa_amd_ipc_memory_detach(ipc_ptr);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 204);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Child: Failure in detaching IPC memory handle\n");
  PROCESS_LOG("Child: Detached IPC memory handle\n");

  // Reset the signal object and release acquired resources
  err = hsa_signal_destroy(ipc_signal);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 205);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Child: Failure in destroying IPC signal handle\n");
  PROCESS_LOG("Child: IPC test PASSED\n");
}

void IPCTest::ParentProcessImpl() {

  // Ignoring the first allocation to exercise fragment allocation.
  hsa_status_t err;
  uint32_t* discard = NULL;
  err = hsa_amd_memory_pool_allocate(device_pool(), gpu_mem_granule, 0,
                                     reinterpret_cast<void**>(&discard));
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 100);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to allocate gpu memory\n");

  // Allocate some VRAM that is used to test IPC
  uint32_t* gpuBuf = NULL;
  err = hsa_amd_memory_pool_allocate(device_pool(), gpu_mem_granule, 0,
                                     reinterpret_cast<void**>(&gpuBuf));
  PROCESS_LOG("Parent: Allocated framebuffer of size: %zu\n", gpu_mem_granule);
  PROCESS_LOG("Parent: Address of allocated framebuffer: %p\n", gpuBuf);

  // Free the test allocation of memory block
  err = hsa_amd_memory_pool_free(discard);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 101);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to free gpu memory\n");

  // List of devices involved in test. Gpu device is used
  // to allocate buffer and signal that are part of an IPC
  // transaction. Cpu is used in support of initialization
  // of Gpu buffer
  hsa_agent_t ag_list[2] = {*gpu_device1(), *cpu_device()};

  // Grant access to buffer to participating devices
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, gpuBuf);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 102);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to get access to gpu memory\n");

  // Update shared data structure's buffer related parameters
  shared_->size = gpu_mem_granule;
  shared_->count = gpu_mem_granule / sizeof(uint32_t);

  // Initialize every DWord of IPC buffer with a value per previous
  // agreement i.e. first_val_
  err = hsa_amd_memory_fill(gpuBuf, first_val_, shared_->count);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 103);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to initialize gpu memory\n");
  PROCESS_LOG("Parent: Initialized Dword's of framebuffer with: %d\n", first_val_);

  // Create an IPC memory handle. IPC handle value is shared with
  // child process via a shared data structure
  err = hsa_amd_ipc_memory_create(gpuBuf, gpu_mem_granule,
                                  const_cast<hsa_amd_ipc_memory_t*>(&shared_->handle));
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 104);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to create IPC memory handle\n");
  PROCESS_LOG("Parent: Created IPC handle for framebuffer: %p\n", gpuBuf);

  // Create a signal that is capable of IPC. Also obtain a IPC handle
  // which is shared with child process via a shared data structure
  hsa_signal_t ipc_signal;
  err = hsa_amd_signal_create(1, 0, NULL, HSA_AMD_SIGNAL_IPC, &ipc_signal);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 105);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to create IPC signal\n");
  err = hsa_amd_ipc_signal_create(ipc_signal,
                                  const_cast<hsa_amd_ipc_signal_t*>(&shared_->signal_handle));
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 106);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to create IPC signal handle\n");
  PROCESS_LOG("Parent: Created IPC handle associated with ipc_signal\n");

  // Signal child process that the gpu buffer is ready to read.
  PROCESS_LOG("Parent: Signalling child proces process\n");
  CheckAndSetToken(&shared_->token, 1);
  PROCESS_LOG("Parent: Waiting for signal from child process\n");

  // Wait for child processs to signal. Child will update signal object
  // value to TWO (2). Check signal value is per expectation
  hsa_signal_value_t ret = 1;
  while(true) {
    ret = hsa_signal_wait_acquire(ipc_signal, HSA_SIGNAL_CONDITION_GTE, 2, timeout_, HSA_WAIT_STATE_BLOCKED);
    if (shared_->child_status == -1) {
      exit(0);
    }
    if (ret >= 2) {
      break;
    }
  }
  USR_TRIGGERED_FAILURE(ret, HSA_STATUS_ERROR, 107);
  FORK_ASSERT_EQ(2, ret, "Parent: Expected signal value of 2, but got " << ret << "\n");

  // Verify child process has updated all DWords of buffer per
  // previously agreed upon values (second_val_ and third_val_)
  CheckAndFillBuffer(gpuBuf, second_val_, third_val_);
  PROCESS_LOG("Parent: Confirmed DWord's of frambuffer has: %d\n", second_val_);
  PROCESS_LOG("Parent: Updated DWord's of framebuffer to: %d\n", third_val_);

  hsa_signal_store_relaxed(ipc_signal, 3);

  while(true) {
    ret = hsa_signal_wait_acquire(ipc_signal, HSA_SIGNAL_CONDITION_GTE, 4, timeout_, HSA_WAIT_STATE_BLOCKED);
    if (shared_->child_status == -1) {
      exit(0);
    }
    if (ret >= 4) {
      break;
    }
  }

  CheckAndFillBuffer(gpuBuf, fourth_val_, 0);
  PROCESS_LOG("Parent: Confirmed DWord's of frambuffer has: %d\n", fourth_val_);

  USR_TRIGGERED_FAILURE(ret, HSA_STATUS_ERROR, 108);
  FORK_ASSERT_EQ(4, ret, "Parent: Expected signal value of 4, but got " << ret << "\n");

  // Reset the signal object and release acquired resources
  hsa_signal_store_relaxed(ipc_signal, -1);
  err = hsa_signal_destroy(ipc_signal);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 109);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failure in destroying IPC signal\n");
  err = hsa_amd_memory_pool_free(gpuBuf);
  USR_TRIGGERED_FAILURE(err, HSA_STATUS_ERROR, 110);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "Parent: Failed to free gpu memory\n");
  PROCESS_LOG("Parent: IPC test PASSED\n");

  // Wait for child process to terminate before exiting
  int exit_status = 0;
  waitpid(child_, &exit_status, 0);
  munmap(shared_, sizeof(Shared));
}

void IPCTest::PrintVerboseMesg(void) {
  // Collect names of GPU's
  hsa_status_t err;
  char name1[64] = {0};
  char name2[64] = {0};
  err = hsa_agent_get_info(*cpu_device(), HSA_AGENT_INFO_NAME, name1);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "hsa_agent_get_info() failed\n");
  err = hsa_agent_get_info(*gpu_device1(), HSA_AGENT_INFO_NAME, name2);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err, "hsa_agent_get_info() failed\n");

  // Collect BDF information of GPU's
  uint32_t loc1, loc2;
  err = hsa_agent_get_info(*cpu_device(), (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &loc1);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  err = hsa_agent_get_info(*gpu_device1(), (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &loc2);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Print the name and BDF info about the devices
  fprintf(stdout, "Using: %s (%d) and %s (%d)\n", name1, loc1, name2, loc2);
}

void IPCTest::CheckAndFillBuffer(void* gpu_src_ptr, uint32_t exp_cur_val, uint32_t new_val) {
  uint32_t* sysBuf;
  hsa_status_t err;
  hsa_signal_value_t sig;
  hsa_signal_t copy_signal;

  // Bind the size granularity of allocation
  size_t sz = gpu_mem_granule;

  // Allocate a signal to track copy progress
  err = hsa_signal_create(1, 0, NULL, &copy_signal);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Allocate buffer in system memory to validate
  err = hsa_amd_memory_pool_allocate(cpu_pool(), sz, 0, reinterpret_cast<void**>(&sysBuf));
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Enable access to buffer in system memory
  hsa_agent_t ag_list[2] = {*gpu_device1(), *cpu_device()};
  err = hsa_amd_agents_allow_access(2, ag_list, NULL, sysBuf);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Copy data to buffer in system memory
  err = hsa_amd_memory_async_copy(sysBuf, *cpu_device(), gpu_src_ptr, *gpu_device1(), sz, 0, NULL,
                                  copy_signal);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Wait for copy to complete
  sig = hsa_signal_wait_relaxed(copy_signal,
                   HSA_SIGNAL_CONDITION_LT, 1, -1, HSA_WAIT_STATE_BLOCKED);
  FORK_ASSERT_EQ(0, sig, "Expected signal 0, but got " << sig << "\n");

  // Validate buffer has expected data
  uint32_t count = sz / sizeof(uint32_t);
  for (uint32_t idx = 0; idx < count; idx++) {
    if (exp_cur_val != sysBuf[idx]) {
      PROCESS_LOG("Validation failed: expected: %d observed: %d at index: %d\n",
                  exp_cur_val, sysBuf[idx], idx);
      FORK_ASSERT_EQ(exp_cur_val, sysBuf[idx]);
    }
    sysBuf[idx] = new_val;
  }

  // Reset copy signal and update buffer in Gpu with new value
  hsa_signal_store_relaxed(copy_signal, 1);
  err = hsa_amd_memory_async_copy(gpu_src_ptr, *gpu_device1(), sysBuf, *cpu_device(), sz, 0, NULL,
                                  copy_signal);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Wait for copy to complete
  sig = hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1, HSA_WAIT_STATE_BLOCKED);
  FORK_ASSERT_EQ(sig, 0, "Expected signal 0, but got " << sig << "\n");

  // Release resources allocated by this method
  err = hsa_signal_destroy(copy_signal);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  err = hsa_amd_memory_pool_free(sysBuf);
  FORK_ASSERT_EQ(HSA_STATUS_SUCCESS, err);
}

void IPCTest::Run(void) {
  TestBase::Run();

  // Collect and print debug information
  if (verbosity() >= VERBOSE_STANDARD) {
    PrintVerboseMesg();
  }

  // Note: Close() (and hsa_shut_down()) will be called from main()
  // processOne is true for parent process, false for child process
  if (parentProcess_) {
    ParentProcessImpl();
  } else {
    ChildProcessImpl();
    exit(0);
  }

  return;
}

void IPCTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void IPCTest::DisplayResults(void) const {
  TestBase::DisplayResults();
  return;
}

void IPCTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

#undef PROCESS_LOG
#undef FORK_ASSERT_EQ
#undef MSG
#undef Y


================================================
FILE: rocrtst/suites/functional/ipc.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_IPC_H_
#define ROCRTST_SUITES_FUNCTIONAL_IPC_H_

#include <sys/types.h>
#include <unistd.h>
#include <atomic>

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

struct Shared {
  std::atomic<int> token;
  std::atomic<int> count;
  std::atomic<size_t> size;
  std::atomic<int> child_status;
  std::atomic<int> parent_status;
  hsa_amd_ipc_memory_t handle;
  hsa_amd_ipc_signal_t signal_handle;
};

class IPCTest : public TestBase {
 public:
    IPCTest();

  // @Brief: Destructor for test case of TestExample
  virtual ~IPCTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // @Brief: Implements child process exclusive logic
  void ChildProcessImpl();

  // @Brief: Implements parent process exclusive logic
  void ParentProcessImpl();

  // @Brief: Implements the check to see if buffer has expected
  // value if so updates it with new values
  void CheckAndFillBuffer(void* gpu_src_ptr, uint32_t exp_cur_val, uint32_t new_val);

 private:
  // @Brief: Bind number of iterations to run per user specification
  uint32_t RealIterationNum(void);

  // @Brief: Collect and print verbose messages to enable debugging
  void PrintVerboseMesg(void);

  // @Brief: Values used to initialize framebuffer that is shared
  uint32_t first_val_ = 0x01;
  uint32_t second_val_ = 0x02;
  uint32_t third_val_ = 0x03;
  uint32_t fourth_val_ = 0x04;
  uint32_t fifth_val_ = 0x05;

  int child_;
  Shared* shared_;
  bool parentProcess_;
  size_t gpu_mem_granule;

  // Supports user triggered failure
  int32_t usr_fail_val_ = 0xFFFFFFFF;

  // Specifies timeout period for parent/child processes
  int32_t timeout_ = 0x20000;
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_IPC_H_


================================================
FILE: rocrtst/suites/functional/memory_access.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>

#include "suites/functional/memory_access.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"


#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}


MemoryAccessTest::MemoryAccessTest(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.

  set_title("RocR Memory Access Tests");
  set_description("This series of tests check memory allocation"
    "on GPU and CPU, i.e. GPU access to system memory "
    "and CPU access to GPU memory.");
}

MemoryAccessTest::~MemoryAccessTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryAccessTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}

void MemoryAccessTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void MemoryAccessTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryAccessTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void MemoryAccessTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


typedef struct  __attribute__ ((aligned(16)))  args_t {
     int *a;
     int *b;
     int *c;
  } args;

  args *kernArgs = NULL;

static const char kSubTestSeparator[] = "  **************************";

static void PrintMemorySubtestHeader(const char *header) {
  std::cout << "  *** Memory Subtest: " << header << " ***" << std::endl;
}

#if ROCRTST_EMULATOR_BUILD
static const int kMemoryAllocSize = 8;
#else
static const int kMemoryAllocSize = 1024;
#endif


// Test to check GPU can read & write to system memory
void MemoryAccessTest::GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent,
                                                   hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Get Global Memory Pool on the gpuAgent to allocate gpu buffers
  hsa_amd_memory_pool_t gpu_pool;
  err = hsa_amd_agent_iterate_memory_pools(gpuAgent,
                                            rocrtst::GetGlobalMemoryPool,
                                            &gpu_pool);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  hsa_amd_memory_pool_access_t access;
  hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool,
                                       HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                       &access);
  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    // hsa objects
    hsa_queue_t *queue = NULL;  // command queue
    hsa_signal_t signal = {0};  // completion signal


    // get queue size
    uint32_t queue_size = 0;
    err = hsa_agent_get_info(gpuAgent,
                                HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // create queue
    err = hsa_queue_create(gpuAgent,
                              queue_size, HSA_QUEUE_TYPE_MULTI,
                              NULL, NULL, 0, 0, &queue);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Get System Memory Pool on the cpuAgent to allocate host side buffers
    hsa_amd_memory_pool_t global_pool;
    err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                              rocrtst::GetGlobalMemoryPool,
                                              &global_pool);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);


    // Find a memory pool that supports kernel arguments.
    hsa_amd_memory_pool_t kernarg_pool;
    err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                              rocrtst::GetKernArgMemoryPool,
                                              &kernarg_pool);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Allocate the host side buffers
    // (sys_data,dup_sys_data,cpuResult,kernArg) on system memory
    int *sys_data = NULL;
    int *dup_sys_data = NULL;
    int *cpuResult = NULL;
    int *gpuResult = NULL;

    err = hsa_amd_memory_pool_allocate(global_pool,
                                      kMemoryAllocSize*sizeof(int), 0,
                                      reinterpret_cast<void **>(&cpuResult));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    err = hsa_amd_memory_pool_allocate(global_pool,
                                      kMemoryAllocSize*sizeof(int), 0,
                                      reinterpret_cast<void **>(&sys_data));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    err = hsa_amd_memory_pool_allocate(global_pool,
                                      kMemoryAllocSize*sizeof(int), 0,
                                      reinterpret_cast<void **>(&dup_sys_data));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);


    // Allocate the kernel argument buffer from the kernarg_pool.
    err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
                                        reinterpret_cast<void **>(&kernArgs));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // initialize the host buffers
    for (int i = 0; i < kMemoryAllocSize; ++i) {
      unsigned int seed = time(NULL);
      sys_data[i] = 1 + rand_r(&seed) % 1;
      dup_sys_data[i] = sys_data[i];
    }

    memset(cpuResult, 0, kMemoryAllocSize * sizeof(int));

    // for the dGPU, we have coarse grained local memory,
    // so allocate memory for it on the GPU's GLOBAL segment .

    // Get local memory of GPU to allocate device side buffers

    err = hsa_amd_memory_pool_allocate(gpu_pool,
      kMemoryAllocSize*sizeof(int), 0, reinterpret_cast<void **>(&gpuResult));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);


    // Allow cpuAgent access to all allocated GPU memory.
    err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpuResult);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    memset(gpuResult, 0, kMemoryAllocSize * sizeof(int));

    // Allow gpuAgent access to all allocated system memory.
    err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, cpuResult);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, dup_sys_data);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgs);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    kernArgs->a = sys_data;
    kernArgs->b = cpuResult;  // system memory passed to gpu for write
    kernArgs->c = gpuResult;  // gpu memory to verify that gpu read system data


    // Create the executable, get symbol by name and load the code object
    set_kernel_file_name("gpuReadWrite_kernels.hsaco");
    set_kernel_name("gpuReadWrite");
    err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Fill the dispatch packet with
    // workgroup_size, grid_size, kernelArgs and completion signal
    // Put it on the queue and launch the kernel by ringing the doorbell

    // create completion signal
    err = hsa_signal_create(1, 0, NULL, &signal);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // create aql packet
    hsa_kernel_dispatch_packet_t aql;
    memset(&aql, 0, sizeof(aql));

    // initialize aql packet
    aql.workgroup_size_x = 256;
    aql.workgroup_size_y = 1;
    aql.workgroup_size_z = 1;
    aql.grid_size_x = kMemoryAllocSize;
    aql.grid_size_y = 1;
    aql.grid_size_z = 1;
    aql.private_segment_size = 0;
    aql.group_segment_size = 0;
    aql.kernel_object = kernel_object();  // kernel_code;
    aql.kernarg_address = kernArgs;
    aql.completion_signal = signal;

    // const uint32_t queue_size = queue->size;
    const uint32_t queue_mask = queue->size - 1;

    // write to command queue
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);
    hsa_queue_store_write_index_relaxed(queue, index + 1);

    rocrtst::WriteAQLToQueueLoc(queue, index, &aql);

    hsa_kernel_dispatch_packet_t *q_base_addr =
        reinterpret_cast<hsa_kernel_dispatch_packet_t *>(queue->base_address);
    rocrtst::AtomicSetPacketHeader(
        (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
           (1 << HSA_PACKET_HEADER_BARRIER) |
          (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
           (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE),
                  (1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS),
        reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                          (&q_base_addr[index & queue_mask]));

    // ringdoor bell
    hsa_signal_store_relaxed(queue->doorbell_signal, index);
    // wait for the signal and reset it for future use
    while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1,
                                      (uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { }
    hsa_signal_store_relaxed(signal, 1);

    // compare device and host side results
    if (verbosity() > 0) {
      std::cout<< "check gpu has read the system memory"<< std::endl;
    }
    for (int i = 0; i < kMemoryAllocSize; ++i) {
      ASSERT_EQ(gpuResult[i], dup_sys_data[i]);
    }

    if (verbosity() > 0) {
      std::cout<< "gpu has read the system memory successfully"<< std::endl;
      std::cout<< "check gpu has written to system memory"<< std::endl;
    }
    for (int i = 0; i < kMemoryAllocSize; ++i) {
      ASSERT_EQ(cpuResult[i], i);
    }

    if (verbosity() > 0) {
      std::cout<< "gpu has written to system memory successfully"<< std::endl;
    }

    if (sys_data) { hsa_amd_memory_pool_free(sys_data); }
    if (dup_sys_data) { hsa_amd_memory_pool_free(dup_sys_data); }
    if (cpuResult) {hsa_amd_memory_pool_free(cpuResult); }
    if (gpuResult) {hsa_amd_memory_pool_free(gpuResult); }
    if (kernArgs) { hsa_amd_memory_pool_free(kernArgs); }
    if (signal.handle) { hsa_signal_destroy(signal); }
    if (queue) { hsa_queue_destroy(queue); }
  } else {
    if (verbosity() > 0) {
      std::cout<< "Test not applicable as system is not large bar."
                   "Skipping."<< std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }
}

// Test to check cpu can read & write to GPU memory
void MemoryAccessTest::CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent,
                                                 hsa_agent_t gpuAgent,
                                                 hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (pool_i.segment == HSA_AMD_SEGMENT_GLOBAL &&
        pool_i.global_flag == HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) {
    hsa_amd_memory_pool_access_t access;
    hsa_amd_agent_memory_pool_get_info(cpuAgent, pool,
                                         HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                         &access);
    if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
      if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 ||
                                           pool_i.alloc_alignment == 0) {
        if (verbosity() > 0) {
          std::cout << "  Test not applicable. Skipping." << std::endl;
          std::cout << kSubTestSeparator << std::endl;
        }
        return;
      }


      auto gran_sz = pool_i.alloc_granule;
      auto pool_sz = pool_i.size / gran_sz;
      auto max_alloc_size = pool_sz/2;
      unsigned int max_element = max_alloc_size/sizeof(unsigned int);
      unsigned int *gpu_data;
      unsigned int *sys_data;
      sys_data = (unsigned int*)malloc(max_alloc_size);

      ASSERT_NE(sys_data, nullptr);

      for (unsigned int i = 0; i < max_element; ++i) {
        sys_data[i] = i;
      }
      // err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data);
      // EXPECT_EQ(err, HSA_STATUS_SUCCESS);
      err = hsa_amd_memory_pool_allocate(pool, max_alloc_size, 0,
                                          reinterpret_cast<void**>(&gpu_data));
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
      /*
      if (err == HSA_STATUS_ERROR) {
        err = hsa_amd_memory_pool_free(gpu_data);
      }*/

      err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpu_data);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
      // EXPECT_EQ(HSA_STATUS_SUCCESS, err);

      // Verify CPU can read & write to GPU memory
      std::cout<< "Verify CPU can read & write to GPU memory"<< std::endl;
      for (unsigned int i = 0; i < max_element; ++i) {
        gpu_data[i] = i;  // Write to gpu memory directly
      }

     for (unsigned int  i = 0; i < max_element; ++i) {
       if (sys_data[i] != gpu_data[i]) {  // Reading GPU memory
            fprintf(stdout, "Values not mathing !! sys_data[%d]:%d ,"
                "gpu_data[%d]\n", sys_data[i], i, gpu_data[i]);
       }
     }
     std::cout<< "CPU have read & write to GPU memory successfully"<< std::endl;
     err = hsa_amd_memory_pool_free(gpu_data);
     free(sys_data);
     } else {
        if (verbosity() > 0) {
          std::cout<< "Test not applicable as system is not large bar."
                         "Skipping."<< std::endl;
          std::cout << kSubTestSeparator << std::endl;
        }
        return;
    }
  }
}


void MemoryAccessTest::CPUAccessToGPUMemoryTest(void) {
  hsa_status_t err;

  PrintMemorySubtestHeader("CPUAccessToGPUMemoryTest in Memory Pools");
  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    hsa_amd_memory_pool_t gpu_pool;
    memset(&gpu_pool, 0, sizeof(gpu_pool));
    err = hsa_amd_agent_iterate_memory_pools(gpus[i],
                                              rocrtst::GetGlobalMemoryPool,
                                              &gpu_pool);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    if (gpu_pool.handle == 0) {
      std::cout << "no global mempool in gpu agent" << std::endl;
      return;
    }
    CPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool);
  }
  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryAccessTest::GPUAccessToCPUMemoryTest(void) {
  hsa_status_t err;

  PrintMemorySubtestHeader("GPUAccessToCPUMemoryTest in Memory Pools");
  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    GPUAccessToCPUMemoryTest(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/memory_access.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ACCESS_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ACCESS_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class MemoryAccessTest : public TestBase {
 public:
    MemoryAccessTest();

  // @Brief: Destructor for test case of MemoryTest
  virtual ~MemoryAccessTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);


  // @Brief: This test verify that CPU is able to Read & write GPU memory
  void CPUAccessToGPUMemoryTest(void);

  // @Brief: This test verify that GPU is able to Read & write CPU memory
  void GPUAccessToCPUMemoryTest(void);


 private:
  void CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent,
                                                   hsa_agent_t gpuAgent,
                                                   hsa_amd_memory_pool_t pool);
  void GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ACCESS_H_


================================================
FILE: rocrtst/suites/functional/memory_alignment.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>

#include "suites/functional/memory_alignment.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/concurrent_utils.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"


static const uint32_t kNumThreads = 4096;

typedef struct control_block {
    hsa_amd_memory_pool_t* pool;
} cb_t;

// Callback function which will call upon when need
// to allocate memory from the pool in the thread.
static void CallbackVerifyPoolAlignmendFunc(void *data) {
  hsa_status_t err;
  cb_t *cb = reinterpret_cast<cb_t*>(data);

  rocrtst::pool_info_t info;
  memset(&info, 0, sizeof(rocrtst::pool_info_t));
  err = rocrtst::AcquirePoolInfo(*(cb->pool), &info);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  if (info.alloc_allowed) {
    // Get the allocated alignment size
    size_t alignment_size = info.alloc_alignment;
    EXPECT_TRUE(alignment_size);
    // Verifies the alignment attribute is a power of 2
    if (info.size != 0) {
      EXPECT_TRUE((alignment_size&&(!(alignment_size&(alignment_size-1)))));
    }
  }
  return;
}


MemoryAlignmentTest::MemoryAlignmentTest(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.

  set_title("RocR Memory Alignment Test");
  set_description(" This test verifies that each memory pool of the agent that"
  " has HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED alloc memory, It is "
  " aligned as specified by the HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT"
  " and has the alignment attribute is a power of 2.");
}

MemoryAlignmentTest::~MemoryAlignmentTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryAlignmentTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}

void MemoryAlignmentTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void MemoryAlignmentTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryAlignmentTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void MemoryAlignmentTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


static const char kSubTestSeparator[] = "  **************************";

static void PrintMemorySubtestHeader(const char *header) {
  std::cout << "  *** Memory Functional Subtest: " << header << " ***" << std::endl;
}

static void PrintAgentNameAndType(hsa_agent_t agent) {
  hsa_status_t err;

  char ag_name[64];
  hsa_device_type_t ag_type;

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  std::cout << "  Agent: " << ag_name << " (";
  switch (ag_type) {
    case HSA_DEVICE_TYPE_CPU:
      std::cout << "CPU)";
      break;
    case HSA_DEVICE_TYPE_GPU:
      std::cout << "GPU)";
      break;
    case HSA_DEVICE_TYPE_DSP:
      std::cout << "DSP)";
      break;
    case HSA_DEVICE_TYPE_AIE:
      std::cout << "AIE)";
      break;
    }
  std::cout << std::endl;
  return;
}


void MemoryAlignmentTest::MemoryPoolAlignment(hsa_agent_t agent,
                                                hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
    PrintAgentNameAndType(agent);
  }

  if (pool_i.alloc_allowed) {
    // Get the allocated alignment size
    size_t alignment_size = pool_i.alloc_alignment;
    EXPECT_TRUE(alignment_size);
    // Verifies the alignment attribute is a power of 2
    if (pool_i.size != 0) {
      EXPECT_TRUE((alignment_size&&(!(alignment_size&(alignment_size-1)))));
    }

    // verifies that alignment attribute is a power of 2 in different threads
    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);
    // The control blocks are used to pass data to the threads
    uint32_t kk;
    cb_t cb[kNumThreads];
    for (kk = 0; kk < kNumThreads; kk++) {
      cb[kk].pool = &pool;
      rocrtst::TestGroupAdd(tg_concurrent, &CallbackVerifyPoolAlignmendFunc, &cb[kk], 1);
    }

    // Create threads for each test
    rocrtst::TestGroupThreadCreate(tg_concurrent);

    // Start to run tests
    rocrtst::TestGroupStart(tg_concurrent);

    // Wait all tests finish
    rocrtst::TestGroupWait(tg_concurrent);

    // Exit all tests
    rocrtst::TestGroupExit(tg_concurrent);

    // Destroy thread group and cleanup resources
    rocrtst::TestGroupDestroy(tg_concurrent);
  }
  return;
}


void MemoryAlignmentTest::MemoryPoolAlignment(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("MemoryPoolAlignment in Basic func & Stress Test");
  }

  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      if (verbosity() > 0) {
        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      }
      MemoryPoolAlignment(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}


================================================
FILE: rocrtst/suites/functional/memory_alignment.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALIGNMENT_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALIGNMENT_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"


class MemoryAlignmentTest : public TestBase {
 public:
    MemoryAlignmentTest();

  // @Brief: Destructor for test case of MemoryTest
  virtual ~MemoryAlignmentTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  void MemoryPoolAlignment(void);


 private:
  void MemoryPoolAlignment(hsa_agent_t agent,
                             hsa_amd_memory_pool_t pool);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALIGNMENT_H_


================================================
FILE: rocrtst/suites/functional/memory_allocation.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <string>
#include <memory>

#include "suites/functional/memory_allocation.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

static const uint32_t kNumBufferElements = 256;
static const int kValue = 5;


MemoryAllocationTest::MemoryAllocationTest(bool launch_GroupMemory,
                                           bool launch_BasicAllocateFree) : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  std::string name;
  std::string desc;

  name = "RocR Memory Test ";
  if (launch_GroupMemory) {
    name += " For Kernel Dynamic Memory Alocation";
    desc += " This test Allocate group memory in kernel dynamically.";
  } else if (launch_BasicAllocateFree) {
    name += " For BasicAllocateFree";
    desc += " This test Allocate And free Memory on all the availble pool "
            " on which allocation is allowed on RocR Agents.";
  }
  set_title(name);
  set_description(desc);

  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
}

MemoryAllocationTest::~MemoryAllocationTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryAllocationTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  return;
}

void MemoryAllocationTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void MemoryAllocationTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryAllocationTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void MemoryAllocationTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

void MemoryAllocationTest::WriteAQLPktToQueue(hsa_queue_t* q) {
  void* queue_base = q->base_address;
  const uint32_t queue_mask = q->size - 1;
  uint64_t index = hsa_queue_add_write_index_relaxed(q, 1);

      reinterpret_cast<hsa_kernel_dispatch_packet_t *>(
                                     queue_base)[index & queue_mask] = aql();
}


typedef struct  __attribute__ ((aligned(16)))  args_t {
     uint32_t *a;
     uint32_t *b;
     uint32_t grp_offset;
     uint32_t count;
  } args;


static const char kSubTestSeparator[] = "  **************************";

static void PrintMemorySubtestHeader(const char *header) {
  std::cout << "  *** Memory Allocation  Test: " << header << " ***" << std::endl;
}

static const int kMemoryAllocSize = 1024;

void MemoryAllocationTest::GroupMemoryDynamicAllocation(hsa_agent_t cpuAgent,
                                                   hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Get Global Memory Pool on the gpuAgent to allocate gpu buffers
  hsa_amd_memory_pool_t gpu_pool;
  err = hsa_amd_agent_iterate_memory_pools(gpuAgent,
                                            rocrtst::GetGlobalMemoryPool,
                                            &gpu_pool);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  hsa_amd_memory_pool_access_t access;
  hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool,
                                       HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                       &access);
  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    // hsa objects
    hsa_queue_t *queue = NULL;  // command queue

    // get queue size
    uint32_t queue_size = 0;
    err = hsa_agent_get_info(gpuAgent,
                                HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // create queue
    err = hsa_queue_create(gpuAgent,
                              queue_size, HSA_QUEUE_TYPE_MULTI,
                              NULL, NULL, 0, 0, &queue);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Get System Memory Pool on the cpuAgent to allocate host side buffers
    hsa_amd_memory_pool_t global_pool;
    err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                              rocrtst::GetGlobalMemoryPool,
                                              &global_pool);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Find a memory pool that supports kernel arguments.
    hsa_amd_memory_pool_t kernarg_pool;
    err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                              rocrtst::GetKernArgMemoryPool,
                                              &kernarg_pool);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Allocate the host side buffers
    // (Indata,kernArg) on system memory
    uint32_t *Indata = NULL;
    args *kernArgs = NULL;

    err = hsa_amd_memory_pool_allocate(global_pool,
                                      kMemoryAllocSize*sizeof(uint32_t), 0,
                                      reinterpret_cast<void **>(&Indata));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);


    // Allocate the kernel argument buffer from the kernarg_pool.
    err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
                                        reinterpret_cast<void **>(&kernArgs));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // initialize the host buffers
    for (int i = 0; i < kMemoryAllocSize; ++i) {
      // unsigned int seed = time(NULL);
      Indata[i] = i;
    }

    // for the dGPU, we have coarse grained local memory,
    // so allocate memory for it on the GPU's GLOBAL segment .

    // Get local memory of GPU to allocate device side buffers
    uint32_t *OutData = NULL;
    err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize*sizeof(uint32_t), 0,
                                        reinterpret_cast<void **>(&OutData));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);


    // Allow cpuAgent access to all allocated GPU memory.
    err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, OutData);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    memset(OutData, 0, kMemoryAllocSize * sizeof(int));

    // Allow gpuAgent access to all allocated system memory.
    err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, Indata);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgs);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    uint32_t grp_offset = group_segment_size();
    kernArgs->a = Indata;
    // gpu memory where data will be copied from dynamically group memory
    kernArgs->b = OutData;
    kernArgs->grp_offset = grp_offset;
    kernArgs->count = kMemoryAllocSize;

    // Fill up the kernel packet except header
    err = rocrtst::InitializeAQLPacket(this, &aql());
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    // Create the executable, get symbol by name and load the code object
    set_kernel_file_name("groupMemoryDynamic_kernels.hsaco");
    set_kernel_name("group_memory_dynamic");
    err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // The total byte size of group memory, static + dynamic
    uint32_t total_grp_byte_size = group_segment_size() + kMemoryAllocSize * sizeof(uint32_t);
    if (verbosity() > 0) {
      std::cout << "aql.total_grp_byte_size" << total_grp_byte_size << std::endl;
    }

    // Fill up the kernel packet except header
    err = rocrtst::InitializeAQLPacket(this, &aql());
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    aql().workgroup_size_x = 256;
    aql().workgroup_size_y = 1;
    aql().workgroup_size_z = 1;
    aql().grid_size_y = 1;
    aql().grid_size_z = 1;
    aql().private_segment_size = 0;
    aql().grid_size_x = kMemoryAllocSize;
    aql().group_segment_size = total_grp_byte_size;
    aql().kernel_object = kernel_object();
    aql().kernarg_address = kernArgs;

    const uint32_t queue_mask = queue->size - 1;

    // Load index for writing header later to command queue at same index
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);
    hsa_queue_store_write_index_relaxed(queue, index + 1);

    // This function simply copies the data we've collected so far into our
    // local AQL packet, except the the setup and header fields.
    rocrtst::WriteAQLToQueueLoc(queue, index, &aql());

    aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

    void* q_base = queue->base_address;
    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);

    // ringdoor bell
    hsa_signal_store_relaxed(queue->doorbell_signal, index);

    // wait for the signal and reset it for future use
    while (hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                      (uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { }

    hsa_signal_store_relaxed(aql().completion_signal, 1);

    // compare Results
    for (int i = 0; i < kMemoryAllocSize; ++i) {
      if (verbosity() > 0) {
        // std::cout<< i << "OutData[i]" << OutData[i] << "Indata[i]" << Indata[i] <<std::endl;
      }
      ASSERT_EQ(OutData[i], Indata[i]);
    }
    if (Indata) { hsa_amd_memory_pool_free(Indata); }
    if (OutData) { hsa_amd_memory_pool_free(OutData); }
    if (kernArgs) { hsa_amd_memory_pool_free(kernArgs); }
    if (queue) { hsa_queue_destroy(queue); }
  } else {
    if (verbosity() > 0) {
      std::cout<< "Test not applicable as system is not large bar."
                   "Skipping."<< std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }
}


void MemoryAllocationTest::GroupMemoryDynamicAllocation(void) {
  hsa_status_t err;
  if (verbosity() > 0) {
    PrintMemorySubtestHeader("Memory Group dynamic allocation");
  }
  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    GroupMemoryDynamicAllocation(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}


static void PrintAgentNameAndType(hsa_agent_t agent) {
  hsa_status_t err;

  char ag_name[64];
  hsa_device_type_t ag_type;

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  std::cout << "  Agent: " << ag_name << " (";
  switch (ag_type) {
    case HSA_DEVICE_TYPE_CPU:
      std::cout << "CPU)";
      break;
    case HSA_DEVICE_TYPE_GPU:
      std::cout << "GPU)";
      break;
    case HSA_DEVICE_TYPE_DSP:
      std::cout << "DSP)";
      break;
    case HSA_DEVICE_TYPE_AIE:
      std::cout << "AIE)";
      break;
    }
  std::cout << std::endl;
  return;
}

static void PrintSegmentNameAndType(uint32_t segment) {
  switch (segment) {
    case HSA_AMD_SEGMENT_GLOBAL:
      std::cout << "  GLOBAL SEGMENT";
      break;
    case HSA_AMD_SEGMENT_GROUP:
      std::cout << "  GROUP SEGMENT";
      break;
    case HSA_AMD_SEGMENT_PRIVATE:
      std::cout << "  PRIVATE SEGMENT";
      break;
    case HSA_AMD_SEGMENT_READONLY:
      std::cout << "  READONLY SEGMENT";
      break;
    default:
      std::cout << "  no segment";
      break;
    }
  std::cout << std::endl;
  return;
}

void MemoryAllocationTest::MemoryBasicAllocationAndFree(hsa_agent_t agent,
                                               hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
    PrintAgentNameAndType(agent);
  }

  // if allocation is allowed in this pool allocate the memory
  // and then free it
  if (pool_i.alloc_allowed) {
    if (verbosity() > 0) {
      PrintSegmentNameAndType(pool_i.segment);
    }
    size_t max_size;
    err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                      &max_size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    max_size = (max_size > kMemoryAllocSize) ? kMemoryAllocSize : max_size;

    char *memoryPtr;
    err = hsa_amd_memory_pool_allocate(pool, max_size , 0,
                                       reinterpret_cast<void**>(&memoryPtr));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    if (memoryPtr) {
      err = hsa_amd_memory_pool_free(memoryPtr);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    }
  }
  return;
}


void MemoryAllocationTest::MemoryBasicAllocationAndFree(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
  if (verbosity() > 0) {
    PrintMemorySubtestHeader("MemoryBasicAllocationAndFree");
  }

  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      if (verbosity() > 0) {
        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      }
      MemoryBasicAllocationAndFree(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryAllocationTest::MemoryAllocateContiguousTest(hsa_agent_t agent,
                                                        hsa_amd_memory_pool_t pool) {
  rocrtst::pool_info_t pool_i;
  hsa_device_type_t ag_type;
  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));

  if (verbosity() > 0) PrintAgentNameAndType(agent);

  ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));

  // if allocation is allowed in this pool allocate the memory
  // and then free it
  if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed || !pool_i.alloc_granule ||
      !pool_i.alloc_alignment) {
    return;
  }

  if (verbosity() > 0) PrintSegmentNameAndType(pool_i.segment);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));

  const size_t alloc_size = pool_i.alloc_granule * 1024;

  char* memoryPtr;

  ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(pool, alloc_size, HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG,
                                              reinterpret_cast<void**>(&memoryPtr)));
  if (!memoryPtr) return;

  int dmabuf = -1;
  uint64_t offset;
  ASSERT_SUCCESS(hsa_amd_portable_export_dmabuf(memoryPtr, alloc_size, &dmabuf, &offset));

  std::vector<hsa_agent_t> accessible_gpus;
  for (auto gpuIter: gpus) {
    hsa_amd_memory_pool_access_t access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
    ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(gpuIter, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));
    if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)
      accessible_gpus.push_back(gpuIter);
  }

  void* importedPtr = nullptr;
  size_t importedSz;

  ASSERT_SUCCESS(hsa_amd_interop_map_buffer(accessible_gpus.size(), accessible_gpus.data(), dmabuf, 0, &importedSz,
                                                   &importedPtr, 0, NULL));

  ASSERT_NE(importedPtr, nullptr);
  ASSERT_EQ(importedSz, alloc_size);

  close(dmabuf);

  ASSERT_SUCCESS(hsa_amd_interop_unmap_buffer(importedPtr));

  ASSERT_SUCCESS(hsa_amd_memory_pool_free(memoryPtr));
  return;
}

void MemoryAllocationTest::MemoryAllocateContiguousTest(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
  if (verbosity() > 0) {
    PrintMemorySubtestHeader("MemoryAllocateContiguousTest");
  }

  ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      if (verbosity() > 0) {
        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      }
      MemoryAllocateContiguousTest(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}


================================================
FILE: rocrtst/suites/functional/memory_allocation.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALLOCATION_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALLOCATION_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class MemoryAllocationTest : public TestBase {
 public:
    MemoryAllocationTest(bool launch_GroupMemory,
                         bool launch_BasicAllocateFree);

  // @Brief: Destructor for test case of MemoryTest
  virtual ~MemoryAllocationTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  void GroupMemoryDynamicAllocation(void);

  void MemoryBasicAllocationAndFree(void);
  void MemoryAllocateContiguousTest(void);


 private:
  void GroupMemoryDynamicAllocation(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
  void MemoryBasicAllocationAndFree(hsa_agent_t agent,
                                               hsa_amd_memory_pool_t pool);
  void MemoryAllocateContiguousTest(hsa_agent_t agent, hsa_amd_memory_pool_t pool);

  void WriteAQLPktToQueue(hsa_queue_t* q);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALLOCATION_H_


================================================
FILE: rocrtst/suites/functional/memory_atomics.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <string>

#include "suites/functional/memory_atomics.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const uint32_t kNumBufferElements = 256;
static const int kValue = 5;

MemoryAtomic::MemoryAtomic(AtomicTest testtype) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  testtype_ = testtype;
  std::string name;
  std::string desc;

  name = "RocR Memory Atomic Test";
  desc = "";

  if (testtype_ == ADD) {
    name += " For ADD";
    desc += " This test will do Add kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == SUB) {
    name += " For Sub";
    desc += " This test will do Sub kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == AND) {
    name += " For And";
    desc += " This test will do AND kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == OR) {
    name += " For Or";
    desc += " This test will do OR kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == XOR) {
    name += " For Xor";
    desc += " This test will do XOR kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == MIN) {
    name += " For Minimum";
    desc += " This test will do Minimum kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == MAX) {
    name += " For Maximum";
    desc += " This test will do Maximum kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == XCHG) {
    name += " For Exchange";
    desc += " This test will do Xchg kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == INC) {
    name += " For Increment";
    desc += " This test will do Increment kernel atomic"
            " operation on GPU and system memory.";
  } else if (testtype_ == DEC) {
    name += " For Decremnet";
    desc += " This test will do decrement kernel atomic"
            " operation on GPU and system memory.";
  }

  set_title(name);
  set_description(desc);
  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
}

MemoryAtomic::~MemoryAtomic(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryAtomic::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  return;
}

void MemoryAtomic::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void MemoryAtomic::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryAtomic::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void MemoryAtomic::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

typedef struct  __attribute__ ((aligned(16)))  args_t {
  int *a;
  int *b;
  int *c;
  int d;
  int n;
  } args;

static const char kSubTestSeparator[] = "  **************************";


static const int kMemoryAllocSize = 4096;

void MemoryAtomic::MemoryAtomicTest(hsa_agent_t cpuAgent,
                                                   hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Get Global Memory Pool on the gpuAgent to allocate gpu buffers
  hsa_amd_memory_pool_t gpu_pool;
  err = hsa_amd_agent_iterate_memory_pools(gpuAgent,
                                            rocrtst::GetGlobalMemoryPool,
                                            &gpu_pool);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  hsa_amd_memory_pool_access_t access;
  hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool,
                                       HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                       &access);
  // hsa objects
  hsa_queue_t *queue = NULL;  // command queue
  // get queue size
  uint32_t queue_size = 0;
  err = hsa_agent_get_info(gpuAgent,
                           HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // create queue
  err = hsa_queue_create(gpuAgent,
                         queue_size, HSA_QUEUE_TYPE_MULTI,
                         NULL, NULL, 0, 0, &queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Find a memory pool that supports kernel arguments.
  hsa_amd_memory_pool_t kernarg_pool;
  err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                            rocrtst::GetKernArgMemoryPool,
                                            &kernarg_pool);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Allocate the host side buffers
  // (refSysdata,oldValues,oldrefdata,kernArg) on system memory

  // this is ref sys data on which atomics operation need to done
  int *refSysdata = NULL;
  // This is oldrefdata which will be required  to compare the returned old values after atomics operation
  int *oldrefdata = NULL;
  // This is returned old values
  int *oldValues = NULL;
  // This is expected data set
  int *expecteddata = NULL;
  // Array size for the data
  int arraySize = kMemoryAllocSize/sizeof(int);

  // Get System Memory Pool on the cpuAgent to allocate host side buffers
  hsa_amd_memory_pool_t global_pool;
  err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
                                            rocrtst::GetGlobalMemoryPool,
                                            &global_pool);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_memory_pool_allocate(global_pool,
                                    kMemoryAllocSize, 0,
                                    reinterpret_cast<void **>(&oldValues));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_memory_pool_allocate(global_pool,
                                    kMemoryAllocSize, 0,
                                    reinterpret_cast<void **>(&refSysdata));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_memory_pool_allocate(global_pool,
                                    kMemoryAllocSize, 0,
                                    reinterpret_cast<void **>(&oldrefdata));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_memory_pool_allocate(global_pool,
                                    kMemoryAllocSize, 0,
                                    reinterpret_cast<void **>(&expecteddata));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);


  // Allocate the kernel argument buffer from the kernarg_pool.
  args *kernArguments = NULL;
  err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
                                     reinterpret_cast<void **>(&kernArguments));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);


  memset(oldValues, 0, kMemoryAllocSize);
  memset(expecteddata, 0, kMemoryAllocSize);
  // this signal will be used for copying the data memory from To and fro from GPU
  // on Non-largebar system
  hsa_signal_t copy_signal;

  // for the dGPU, we have coarse grained local memory,
  // so allocate memory for it on the GPU's GLOBAL segment .

  // Get local memory of GPU to allocate device side buffers on which atomics operation need to done
  int *gpuRefData = NULL;

  // On non-Large bar system acess to GPU pool not allowed to directly so pinned memory
  // g_gpuRefData is pointer to GPU Memory allocated on non-large bar where
  // gpuRefData would be pointer to  host allocated memory on non-large bar
  int *g_gpuRefData = NULL;
  //  Pointer to the location where to store the new address
  int *device_ptr = NULL;

  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize, 0,
                                       reinterpret_cast<void **>(&gpuRefData));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Allow cpuAgent access to all allocated GPU memory.
    err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpuRefData);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    memset(gpuRefData, 0, kMemoryAllocSize);
  } else {
    err = hsa_signal_create(1, 0, NULL, &copy_signal);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    // Alocate the System Memory and get pointer gpuRefData
    err = hsa_amd_memory_pool_allocate(global_pool, kMemoryAllocSize, 0,
                                        reinterpret_cast<void **>(&gpuRefData));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    memset(gpuRefData, 0, kMemoryAllocSize);
    // Alocate the GPU Memory and get pointer g_gpuRefData
    err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize, 0,
                                        reinterpret_cast<void **>(&g_gpuRefData));
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    // Map the Host memory and get the pointer to new adress which is accesible to GPU agent
    err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, gpuRefData);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    device_ptr = gpuRefData;
  }


  // initialize the host buffers & gpuRefData buffer
  for (int i = 0; i < arraySize; ++i) {
    unsigned int seed = time(NULL);
    refSysdata[i] = 6 + rand_r(&seed) % 1;
    gpuRefData[i] = 6 + rand_r(&seed) % 1;
    oldrefdata[i] = refSysdata[i];
  }

  // Sync the data from system memory to GPU memory on non-largebar
  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    hsa_signal_store_relaxed(copy_signal, 1);
    err = hsa_amd_memory_async_copy(g_gpuRefData, gpuAgent, device_ptr,
                                    gpuAgent, kMemoryAllocSize, 0, NULL, copy_signal);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    while (hsa_signal_wait_acquire(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)(-1), HSA_WAIT_STATE_ACTIVE)) {}
  }


  // Allow gpuAgent access to all allocated system memory.
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, oldValues);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, refSysdata);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, oldrefdata);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArguments);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  kernArguments->a = refSysdata;
  if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    kernArguments->b = gpuRefData;
  } else {
    kernArguments->b = g_gpuRefData;
  }
  kernArguments->c = oldValues;

  if (testtype_ != INC && testtype_ != DEC) {
    kernArguments->d = kValue;
  }

  // Create the executable, get symbol by name and load the code object
  set_kernel_file_name("atomicOperations_kernels.hsaco");

  if (testtype_ == ADD) {
    set_kernel_name("test_atomic_add");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = oldrefdata[i] + kValue;
    }
  } else if (testtype_ == SUB) {
    set_kernel_name("test_atomic_sub");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = oldrefdata[i] - kValue;
    }
  } else if (testtype_ == AND) {
    set_kernel_name("test_atomic_and");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = oldrefdata[i] & kValue;
    }
  } else if (testtype_ == OR) {
    set_kernel_name("test_atomic_or");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = oldrefdata[i] | kValue;
    }
  } else if (testtype_ == XOR) {
    set_kernel_name("test_atomic_xor");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = oldrefdata[i] ^ kValue;
    }
  } else if (testtype_ == MIN) {
    set_kernel_name("test_atomic_min");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = std::min(oldrefdata[i], kValue);
    }
  } else if (testtype_ == MAX) {
    set_kernel_name("test_atomic_max");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = std::max(oldrefdata[i], kValue);
    }
  } else if (testtype_ == INC) {
    set_kernel_name("test_atomic_inc");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = oldrefdata[i] + 4;
    }
  } else if (testtype_ == DEC) {
    set_kernel_name("test_atomic_dec");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = oldrefdata[i] - 4;
    }
  } else if (testtype_ == XCHG) {
    set_kernel_name("test_atomic_xchg");
    // set the expected data result set from kernel
    for (int i = 0; i < arraySize; ++i) {
      expecteddata[i] = kValue;
    }
  } else {
    if (verbosity() > 0) {
      std::cout<< "No test specified" <<std::endl;
    }
  }

  err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  aql().workgroup_size_x = 256;
  aql().workgroup_size_y = 1;
  aql().workgroup_size_z = 1;
  aql().grid_size_x = arraySize;
  aql().kernarg_address = kernArguments;
  aql().kernel_object = kernel_object();

  const uint32_t queue_mask = queue->size - 1;

  // Load index for writing header later to command queue at same index
  uint64_t index = hsa_queue_load_write_index_relaxed(queue);
  hsa_queue_store_write_index_relaxed(queue, index + 1);

  rocrtst::WriteAQLToQueueLoc(queue, index, &aql());

  aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
  aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
               HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
  aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
               HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

  void* q_base = queue->base_address;
  // Set the Aql packet header
  rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                      &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                          (q_base))[index & queue_mask]);


  // ringdoor bell
  hsa_signal_store_relaxed(queue->doorbell_signal, index);

  // wait for the signal and reset it for future use
  while (hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                      (uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { }

  hsa_signal_store_relaxed(aql().completion_signal, 1);

  // Sync the data from GPU memory to system memory on non-largebar
  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    hsa_signal_store_relaxed(copy_signal, 1);
    err = hsa_amd_memory_async_copy(device_ptr, gpuAgent, g_gpuRefData,
                                    gpuAgent, kMemoryAllocSize, 0, NULL, copy_signal);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    while (hsa_signal_wait_acquire(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)(-1), HSA_WAIT_STATE_ACTIVE)) { }
  }

  // compare results with expected results
  for (int i = 0; i < arraySize; ++i) {
    ASSERT_EQ(refSysdata[i], expecteddata[i]);
    ASSERT_EQ(gpuRefData[i], expecteddata[i]);
    ASSERT_EQ(oldValues[i], oldrefdata[i]);
  }

  if (refSysdata) {
    err = hsa_memory_free(refSysdata);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }
  if (oldrefdata) {
    err = hsa_memory_free(oldrefdata);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }
  if (oldValues) {
    err = hsa_memory_free(oldValues);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }
  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    err = hsa_amd_memory_unlock(gpuRefData);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    // Destroy the copy signal
    err = hsa_signal_destroy(copy_signal);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    if (g_gpuRefData) {
      err = hsa_memory_free(g_gpuRefData);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    }
  }
  if (gpuRefData) {
    err = hsa_memory_free(gpuRefData);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }
  if (kernArguments) {
    err = hsa_memory_free(kernArguments);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }
  if (queue) {
    err = hsa_queue_destroy(queue);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }
}

void MemoryAtomic::MemoryAtomicTest(void) {
  hsa_status_t err;
  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    MemoryAtomicTest(cpus[0], gpus[i]);
  }
}


================================================
FILE: rocrtst/suites/functional/memory_atomics.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

// Atomic Test type
enum AtomicTest {
  ADD,  // For add atomic operation
  SUB,  // For sub atomic operation
  AND,  // For and atomic operation
  OR,   // For or atomic operation
  XOR,  // For xor atomic operation
  INC,  // For inc atomic operation
  DEC,  // For dec atomic operation
  MAX,  // For max atomic operation
  MIN,  // For min atomic operation
  XCHG,  // For xchg atomic operation
  NO_TEST};

class MemoryAtomic : public TestBase {
 public:
  explicit MemoryAtomic(AtomicTest testtype);

  // @Brief: Destructor for test case of MemoryTest
  virtual ~MemoryAtomic();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  void MemoryAtomicTest(void);


 private:
  void MemoryAtomicTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);

  void WriteAQLPktToQueue(hsa_queue_t* q);

  AtomicTest testtype_;
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ATOMICS_H_


================================================
FILE: rocrtst/suites/functional/memory_basic.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <sys/sysinfo.h>

#include "suites/functional/memory_basic.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const uint32_t kNumBufferElements = 256;

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}


MemoryTest::MemoryTest(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  set_title("RocR Memory Tests");
  set_description("This series of tests check memory allocation limits, extent"
    " of GPU access to system memory and other memory related functionality.");
}

MemoryTest::~MemoryTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  return;
}

void MemoryTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void MemoryTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void MemoryTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

hsa_status_t MemoryTest::TestAllocate(hsa_amd_memory_pool_t pool, size_t sz) {
  void *ptr;
  hsa_status_t err;

  err = hsa_amd_memory_pool_allocate(pool, sz, 0, &ptr);

  if (err == HSA_STATUS_SUCCESS) {
    err = hsa_memory_free(ptr);
  }

  return err;
}

static const char kSubTestSeparator[] = "  **************************";

static void PrintMemorySubtestHeader(const char *header) {
  std::cout << "  *** Memory Subtest: " << header << " ***" << std::endl;
}

// Test Fixtures
void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag,
                                                 hsa_amd_memory_pool_t pool) {
  hsa_status_t err;
  struct sysinfo info;

  rocrtst::pool_info_t pool_i;
  char ag_name[64];
  hsa_device_type_t ag_type;

  err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NAME, ag_name);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(ag, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  uint32_t node_id;
  err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NODE, &node_id);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  sysinfo(&info);
  if (verbosity() > 0) {
    time_t t = time(&t);

    std::cout << "  Current date and time: " << ctime(&t);
    std::cout << "  Agent: " << ag_name << " (";
    switch (ag_type) {
      case HSA_DEVICE_TYPE_CPU:
        std::cout << "CPU)" << std::endl;
        std::cout << "  System Total Memory:        "
                  << info.totalram / 1024 << " KB" << std::endl;
        std::cout << "  System Free Memory:         "
                  << info.freeram / 1024 << " KB";
        break;
      case HSA_DEVICE_TYPE_GPU:
        std::cout << "GPU)";
        break;
      case HSA_DEVICE_TYPE_DSP:
        std::cout << "DSP)";
        break;
      case HSA_DEVICE_TYPE_AIE:
        std::cout << "AIE)";
        break;

    }
    std::cout << std::endl;
  }

  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
      rocrtst::DumpMemoryPoolInfo(&pool_i, 2);
  }

  if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 || pool_i.alloc_alignment == 0 ||
      (pool_i.global_flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED)
      ) {
    if (verbosity() > 0) {
      std::cout << "  Test not applicable. Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  // To speed-up test, test all pools on CPU-0, only test coare-grained on remaining CPU agents
  if (ag_type == HSA_DEVICE_TYPE_CPU && node_id > 0 &&
      !(pool_i.global_flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) {

    if (verbosity() > 0) {
      std::cout << "  Test not applicable. Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  // Do everything in "granule" units
  auto gran_sz = pool_i.alloc_granule;
  auto pool_sz = pool_i.aggregate_alloc_max / gran_sz;

  // Neg. test: Try to allocate more than the pool size
  err = TestAllocate(pool, pool_sz*gran_sz + gran_sz);
  EXPECT_EQ(HSA_STATUS_ERROR_INVALID_ALLOCATION, err);

  pool_sz = (ag_type == HSA_DEVICE_TYPE_CPU)?
              std::min(pool_sz, info.totalram / gran_sz) :
              pool_sz;

  // Reduce upper_bound by 10% for system-RAM. Otherwise Linux OOM-Killer app can be triggered,
  // if system has allocated all available physical memory and swap space, and so killing this
  // process.
  uint64_t upper_bound = (ag_type == HSA_DEVICE_TYPE_CPU) ? (pool_sz * 0.90) : pool_sz;
  uint64_t lower_bound = 0;
  auto max_alloc_size = upper_bound;

  while (true) {
    err = TestAllocate(pool, max_alloc_size * gran_sz);
    ASSERT_TRUE(err == HSA_STATUS_SUCCESS ||
                err == HSA_STATUS_ERROR_OUT_OF_RESOURCES ||
                err == HSA_STATUS_ERROR_INVALID_ALLOCATION);
    if (err == HSA_STATUS_SUCCESS) {
      break;
    } else if (err == HSA_STATUS_ERROR_OUT_OF_RESOURCES ||
               err == HSA_STATUS_ERROR_INVALID_ALLOCATION) {
      upper_bound = max_alloc_size;
      max_alloc_size =
          static_cast<uint64_t>(max_alloc_size * 0.99);  // Reduce by 1% in each iteration
    }

    ASSERT_GT(upper_bound, lower_bound);
  }

  if (verbosity() > 0) {
    std::cout << "  Biggest single allocation size for this pool is " <<
                        (max_alloc_size * gran_sz)/1024 << "KB." << std::endl;
    std::cout << "  This is " <<
                  static_cast<float>(max_alloc_size)/pool_sz*100 <<
                                               "% of the total." << std::endl;
  }

  if (ag_type == HSA_DEVICE_TYPE_GPU) {
    if (pool_sz <= 536870912) {
      EXPECT_GE((float)max_alloc_size/pool_sz, (float)6/10);
    } else {
      EXPECT_GE((float)max_alloc_size/pool_sz, (float)3/4);
    }
  }
  if (verbosity() > 0) {
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryTest::MaxSingleAllocationTest(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  PrintMemorySubtestHeader("Maximum Single Allocation in Memory Pools");

  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      MaxSingleAllocationTest(a->agent, p);
    }
  }
}

void MemoryTest::MemAvailableTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool) {
  hsa_status_t err;
  void *memPtr1, *memPtr2;
  rocrtst::pool_info_t pool_i;
  char ag_name[64];
  hsa_device_type_t ag_type;
  uint64_t allocate_sz2, ag_avail_memory_before, ag_avail_memory_after;

  err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NAME, ag_name);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(ag, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  if (verbosity() > 0) {
    std::cout << "  Agent: " << ag_name << " (";
    switch (ag_type) {
      case HSA_DEVICE_TYPE_CPU:
        std::cout << "CPU)";
        break;
      case HSA_DEVICE_TYPE_GPU:
        std::cout << "GPU)";
        break;
      case HSA_DEVICE_TYPE_DSP:
        std::cout << "DSP)";
        break;
      case HSA_DEVICE_TYPE_AIE:
        std::cout << "AIE)";
        break;
    }
    std::cout << std::endl;
  }

  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (ag_type != HSA_DEVICE_TYPE_GPU ||
      !pool_i.alloc_allowed || !pool_i.alloc_granule || !pool_i.alloc_alignment) {
    if (verbosity() > 0) {
      std::cout << "  Test not applicable. Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  // Do everything in "granule" units
  auto gran_sz = pool_i.alloc_granule;
  auto pool_sz = pool_i.aggregate_alloc_max / gran_sz;

  err = hsa_agent_get_info(ag, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                            &ag_avail_memory_before);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Try to allocate half
  uint64_t allocate_sz1 = (pool_sz / 2) * gran_sz;

  err = hsa_amd_memory_pool_allocate(pool, allocate_sz1, 0, &memPtr1);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  hsa_amd_pointer_info_t info = {};
  info.size = sizeof(info);

  // Check pointer info for valid pointer
  ASSERT_SUCCESS(hsa_amd_pointer_info(memPtr1, &info, NULL, 0, NULL));

  ASSERT_EQ(info.type, HSA_EXT_POINTER_TYPE_HSA);
  ASSERT_EQ(info.sizeInBytes, allocate_sz1);
  ASSERT_EQ(info.agentOwner.handle, ag.handle);
  // ROCR may return a smaller size of info if it is an older version of ROCr and ROCr's
  // internal definition hsa_amd_pointer_info_t is smaller than the users. But ROCr cannot
  // return a bigger size
  ASSERT_LE(info.size, sizeof(info));

  // Check pointer info for invalid pointer
  hsa_amd_pointer_info_t info2 = {};
  info2.size = sizeof(info2);
  ASSERT_SUCCESS(hsa_amd_pointer_info((reinterpret_cast<uint8_t *>(memPtr1) + allocate_sz1 + 1), &info2, NULL, 0, NULL));
  ASSERT_EQ(info2.type, HSA_EXT_POINTER_TYPE_UNKNOWN);

  // Simulate case where ROCr has added extra parameters to hsa_amd_pointer_info.
  // i.e ROCr's hsa_amd_pointer_info is bigger than user's hsa_amd_pointer_info
  // ROCr should still return info.size same as user's size
  hsa_amd_pointer_info_t info3 = {};
  info3.size = sizeof(info3) - 2;
  ASSERT_SUCCESS(hsa_amd_pointer_info(memPtr1, &info3, NULL, 0, NULL));
  ASSERT_EQ(info.type, HSA_EXT_POINTER_TYPE_HSA);
  ASSERT_EQ(info3.size, sizeof(info3) - 2);

  err = hsa_agent_get_info(ag, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                            &ag_avail_memory_after);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Memory available after could be smaller because of fragmentation
  ASSERT_GE(ag_avail_memory_before - allocate_sz1, ag_avail_memory_after);

  // Try to allocate 30%/80% of remaining
  if (pool_i.aggregate_alloc_max <= 536870912)
    allocate_sz2 = (0.3 * ag_avail_memory_after * gran_sz) / gran_sz;
  else
    allocate_sz2 = (0.8 * ag_avail_memory_after * gran_sz) / gran_sz;


  err = hsa_amd_memory_pool_allocate(pool, allocate_sz2, 0, &memPtr2);
  if (err != HSA_STATUS_SUCCESS) hsa_memory_free(memPtr1);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(ag, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                            &ag_avail_memory_after);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  ASSERT_GE(ag_avail_memory_before - (allocate_sz1 + allocate_sz2),
                            ag_avail_memory_after);

  if (verbosity() > 0) {
    std::cout << "  Available memory before: " << ag_avail_memory_before << std::endl;
    std::cout << "         Memory allocated: " << allocate_sz1 
                  << " + " << allocate_sz2 << std::endl;
    std::cout << "   Available memory after: " << ag_avail_memory_after << std::endl;
  }

  err = hsa_memory_free(memPtr1);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_memory_free(memPtr2);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(ag, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                            &ag_avail_memory_after);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  ASSERT_EQ(ag_avail_memory_before, ag_avail_memory_after);

  if (verbosity() > 0) {
    std::cout << "     Available memory end: " << ag_avail_memory_after << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryTest::MemAvailableTest(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  PrintMemorySubtestHeader("Memory Available Allocation in Memory Pools");

  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      MemAvailableTest(a->agent, p);
    }
  }
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/memory_basic.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class MemoryTest : public TestBase {
 public:
    MemoryTest();

  // @Brief: Destructor for test case of MemoryTest
  virtual ~MemoryTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  void MaxSingleAllocationTest(void);

  void MemAvailableTest(void);

  hsa_status_t TestAllocate(hsa_amd_memory_pool_t pool, size_t sz);

 private:
  void MaxSingleAllocationTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool);

  void MemAvailableTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_


================================================
FILE: rocrtst/suites/functional/reference_count.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

/* Test Name: reference_count
 *
 * Purpose: Verifies that the hsa_init and hsa_shutdown APIs properly increment
 * and decrement reference counting.
 *
 * Test Description:
 * 1) Initialize the ROC runtime with hsa_init by calling that API N times, (N
 * should be large).
 * 2) Verify that the runtime is operational by querying the agent list.
 * 3) Call hsa_shutdown N-1 times.
 * 4) Again, verify the runtime is operational by querying the agent list.
 *
 * Expected Results: The runtime should remain operational when the reference
 * count is positive. Repeated calls to hsa_init should not cause undefined behavior.
 *
 */
#include <algorithm>
#include <iostream>


#include "suites/functional/reference_count.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const int NumOfTimes = 1000;  // No of times the hsa runtime will be initialized
static const double MaxRefCount = 2147483649;  // Setting to max value to test to INIT_MAX+2 as defined in hsa runtime

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
  } \
}

ReferenceCountTest::ReferenceCountTest(bool referenceCount_, bool maxReferenceCount_) : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  if (referenceCount_) {
    set_title("RocR Reference Count Test");
    set_description("Initializes HSA runtime N times and shutdown N-1 times, again call shutdown");
  } else if (maxReferenceCount_) {
    set_title("RocR Max Reference Count Test");
    set_description("This test initializes HSA runtime to maximum allowed reference count");
  }
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
ReferenceCountTest::~ReferenceCountTest(void) {
}

// Compare required profile for this test case with what we're actually
// running on
void ReferenceCountTest::SetUp(void) {
  return;  // hsa runtime initalized in ReferenceCountTest::TestReferenceCount()
}


// Compare required profile for this test case with what we're actually
// running on
void ReferenceCountTest::Run(void) {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

// Compare required profile for this test case with what we're actually
// running on
void ReferenceCountTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void ReferenceCountTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void ReferenceCountTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  // all the reference count decremented in main function, ReferenceCountTest::TestReferenceCount(void)
}

void ReferenceCountTest::TestReferenceCount(void) {
  hsa_status_t status;
  // Initialize hsa runtime N times
  for (int i = 0; i < NumOfTimes; ++i) {
    status = hsa_init();
    RET_IF_HSA_ERR(status);
  }

  // Shutdown hsa runtime N - 1 times
  for (int i = 0; i < NumOfTimes-1; ++i) {
    status = hsa_shut_down();
    RET_IF_HSA_ERR(status);
  }

  status = hsa_shut_down();
  RET_IF_HSA_ERR(status);
}

void ReferenceCountTest::TestMaxReferenceCount(void) {
  hsa_status_t status;
  // Initialize hsa runtime to maximum allowed  times
  for (int i = 0; i < MaxRefCount; ++i) {
    status = hsa_init();
    if (status != HSA_STATUS_SUCCESS && status == HSA_STATUS_ERROR_REFCOUNT_OVERFLOW) {
      std::cout << "Max allowed reference count is = " << i << std::endl;
      // Gracefull exit after reaching the INIT_MAX as defined in hsa rutnime.
      break;
    }
  }
  for (int i = 0; i < MaxRefCount-2; ++i) {
    status = hsa_shut_down();
    RET_IF_HSA_ERR(status);
  }
}
#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/reference_count.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_REFERENCE_COUNT_H_
#define ROCRTST_SUITES_FUNCTIONAL_REFERENCE_COUNT_H_

#include <pthread.h>
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class ReferenceCountTest:public TestBase {
 public:
    ReferenceCountTest(bool _referenceCount, bool _maxReferenceCount);

    // @Brief: Destructor for the ReferenceCountTest class
    virtual ~ReferenceCountTest();

    // @Brief: Setup the environment for measurement
    virtual void SetUp();

    // @Brief: Core measurement execution
    virtual void Run();

    // @Brief: Clean up and retrive the resource
    virtual void Close();

    // @Brief: Display  results
    virtual void DisplayResults() const;

    // @Brief: Display information about what this test does
    virtual void DisplayTestInfo(void);

    // @Brief: The function initializes runtime to N times, shutdown N-1
    void TestReferenceCount(void);

    // @Brief: The function checks maximum no.of allowed hsa runtime initialization before reaching outof resource
    void TestMaxReferenceCount(void);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_REFERENCE_COUNT_H_


================================================
FILE: rocrtst/suites/functional/signal_concurrent.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <iostream>
#include <vector>
#include "suites/functional/signal_concurrent.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/concurrent_utils.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const int N = 8;
static const int M = 32;
static const int INI_VAL = 0;
static const int CMP_VAL = 1;
hsa_signal_t *signals;

#define ASSERT_MSG(C, err) { \
  if (C == 1) { \
    std::cout << err << std::endl; \
  } \
}

static void TestSignalCreateFunction(void *data) {
  hsa_status_t status;
  int* offset = reinterpret_cast<int *>(data);
  int i;
  for (i = 0; i < M; ++i) {
    status = hsa_signal_create(INI_VAL, 0, NULL, &signals[*offset + i]);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }
  return;
}

static void signals_wait_host_func(void *data) {
  int i;
  for (i = 0; i < M * N; ++i) {
    hsa_signal_wait_scacquire(signals[i], HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX,
                              HSA_WAIT_STATE_BLOCKED);
  }
  return;
}

static void signals_wait_component_func(void *data) {
  int i;
  for (i = 0; i < M * N; ++i) {
    // Launch a kernel with signal_wait_func
    hsa_signal_wait_scacquire(signals[i], HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX,
                              HSA_WAIT_STATE_BLOCKED);
  }
  return;
}

static void TestSignalDestroyFunction(void* data) {
  hsa_status_t status;
  int *offset = reinterpret_cast<int*>(data);
  int i;
  for (i = 0; i < M; i++) {
    status = hsa_signal_destroy(signals[*offset + i]);
    ASSERT_EQ(HSA_STATUS_SUCCESS, status);
  }
}

static void signal_wait_host_func(void *data) {
  hsa_signal_t *signal_ptr = reinterpret_cast<hsa_signal_t*>(data);
  hsa_signal_wait_scacquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
  return;
}

static void signal_wait_component_func(void *data) {
  hsa_signal_t *signal_ptr = reinterpret_cast<hsa_signal_t*>(data);
  hsa_signal_wait_scacquire(*signal_ptr, HSA_SIGNAL_CONDITION_EQ, CMP_VAL, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
  return;
}
SignalConcurrentTest::SignalConcurrentTest(bool destroy, bool max_consumer, bool cpu, bool create)
    : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                        // This is a default value which can be overridden
                        // on the command line.
  if (destroy) {
    set_title("RocR Signal Destroy Concurrent Test");
    set_description("This test destroy signals concurrently");
  } else if (max_consumer) {
    set_title("RocR Signal Max Consumers Test");
    set_description("This verify signal is created with num_consumers and signal can wait on all");
  } else if (create) {
    set_title("RocR Signal Create Concurrent Test");
    set_description("This test create signals concurrently");
  } else if (cpu) {
    set_title("RocR CPU Signal Completion Test");
    set_description("This test checks whether CPU signals completed");
  }
}

SignalConcurrentTest::~SignalConcurrentTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void SignalConcurrentTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  return;
}


void SignalConcurrentTest::Run(void) {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void SignalConcurrentTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void SignalConcurrentTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}


void SignalConcurrentTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

void SignalConcurrentTest::TestSignalCreateConcurrent(void) {
  unsigned int i;
  hsa_status_t status;
  signals = reinterpret_cast<hsa_signal_t*>(malloc(sizeof(hsa_signal_t) * N * M));

  ASSERT_NE(signals, nullptr);

  struct rocrtst::test_group* tg_sg_create = rocrtst::TestGroupCreate(N);
  int* offset = reinterpret_cast<int*>(malloc(sizeof(int) * N));

  EXPECT_NE(offset, nullptr);
  if (!offset) {
	  free(signals);
	  return;
  }

  for (i = 0; i < N; ++i) {
    offset[i] = i * M;
    rocrtst::TestGroupAdd(tg_sg_create, &TestSignalCreateFunction, offset + i, 1);
    }
  rocrtst::TestGroupThreadCreate(tg_sg_create);
  rocrtst::TestGroupStart(tg_sg_create);
  rocrtst::TestGroupWait(tg_sg_create);
  rocrtst::TestGroupExit(tg_sg_create);
  rocrtst::TestGroupDestroy(tg_sg_create);

  std::vector<hsa_agent_t> gpus;
  status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    struct rocrtst::test_group *tg_sg_wait = rocrtst::TestGroupCreate(gpus.size());
    for (i = 0; i < gpus.size(); ++i) {
      hsa_device_type_t device_type;
      status = hsa_agent_get_info(gpus[i], HSA_AGENT_INFO_DEVICE, &device_type);
      ASSERT_EQ(HSA_STATUS_SUCCESS, status);
      if (device_type == HSA_DEVICE_TYPE_CPU) {
        rocrtst::TestGroupAdd(tg_sg_wait, &signals_wait_host_func, &(gpus[i]), 1);
      } else if (device_type == HSA_DEVICE_TYPE_GPU) {
        rocrtst::TestGroupAdd(tg_sg_wait, &signals_wait_component_func, &(gpus[i]), 1);
      } else if (device_type == HSA_DEVICE_TYPE_DSP) {
        ASSERT_MSG(1, "ERROR: DSP_AGENT NOT SUPPORTED\n");
      } else {
        ASSERT_MSG(1, "ERROR: UNKNOWN DEVICE\n");
      }
    }

    rocrtst::TestGroupThreadCreate(tg_sg_wait);
    rocrtst::TestGroupStart(tg_sg_wait);

    for (i = 0; i < N * M; ++i) {
      hsa_signal_store_relaxed(signals[i], CMP_VAL);
    }
    rocrtst::TestGroupWait(tg_sg_wait);
    rocrtst::TestGroupExit(tg_sg_wait);
    rocrtst::TestGroupDestroy(tg_sg_wait);

    for (i = 0; i < N * M; ++i) {
      status = hsa_signal_destroy(signals[i]);
      ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    }

    free(signals);
    free(offset);
}

 /*
 * Test Name: TestSignalDestroyConcurrent
 * Scope: Conformance
 *
 * Purpose: Verifies that signals can be created concurrently in different
 * threads.
 *
 * Test Description:
 * 1) Start N threads that each
 *   a) Create M signals, that are maintained in a global list.
 *   b) When creating the symbols specify all agents as consumers.
 * 2) After the signals have been created, have each agent wait on
 *    each of the signals. All agents should wait on a signal concurrently
 *    and all signals in the signal list should be waited on one at a time.
 * 3) Set the signal values in another thread so the waiting agents wake
 *    up, as expected.
 * 4) Destroy all of the signals in the main thread.
 *
 *   Expected Results: All of the signals should be created successfully.
 *   All
 *   agents should be able to wait on all of the N*M threads successfully.
 */
void SignalConcurrentTest::TestSignalDestroyConcurrent(void) {
  int i;

  signals = reinterpret_cast<hsa_signal_t *>(malloc(sizeof(hsa_signal_t) * N * M));

  ASSERT_NE(signals, nullptr);

  struct rocrtst::test_group *tg_sg_destroy = rocrtst::TestGroupCreate(N);
  int *offset = reinterpret_cast<int *>(malloc(sizeof(int) * N));

  EXPECT_NE(offset, nullptr);
  if (!offset)
    return;

  for (i = 0; i < N; ++i) {
    int j;
    offset[i] = i * M;
    for (j = 0; j < M; ++j) {
      hsa_status_t status = hsa_signal_create(INI_VAL, 0, NULL, &signals[i * M + j]);
      ASSERT_EQ(HSA_STATUS_SUCCESS, status);
    }
  }

  for (i = 0; i < N; ++i) {
    rocrtst::TestGroupAdd(tg_sg_destroy, &TestSignalDestroyFunction, &offset[i], 1);
  }

  rocrtst::TestGroupThreadCreate(tg_sg_destroy);
  rocrtst::TestGroupStart(tg_sg_destroy);
  rocrtst::TestGroupWait(tg_sg_destroy);
  rocrtst::TestGroupExit(tg_sg_destroy);
  rocrtst::TestGroupDestroy(tg_sg_destroy);

  free(signals);
  free(offset);
}

/*
 * Test Name: TestSignalCreateMaxConsumers
 * Scope: Conformance
 *
 * Purpose: Verifies that when a signal is created with the num_consumers
 * parameter set to the total number of agents and a consumers list
 * that contains all agents, the signal can be waited on by all agent_list.
 *
 * Test Description:
 * 1) Create a signal using the following parameters,
 *    a) A num_consumers value equal to the total number
 *       of agents on the system.
 *    b) A consumers list containing all of the agents
 *       in the system.
 * 2) After the signal is created, have all of the agents in
 * the system wait on the signal one at a time,
 * either using the appropriate hsa_signal_wait API or a
 * HSAIL instruction executed in a kernel.
 * 3) Set the signal on another thread such that the waiting
 * threads wait condition is satisfied.
 *
 * Expected Results: All of the agents should be able to properly wait
 * on the signal.
 */
void SignalConcurrentTest::TestSignalCreateMaxConsumers(void) {
  unsigned int i;
  hsa_status_t status;

  std::vector<hsa_agent_t> gpus;
  status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);


  hsa_signal_t signal;
  status = hsa_signal_create(INI_VAL, 0, NULL, &signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);

  struct rocrtst::test_group *tg_sg_wait = rocrtst::TestGroupCreate(gpus.size());
  for (i = 0; i < gpus.size(); ++i) {
    hsa_device_type_t device_type;
    hsa_agent_get_info(gpus[i], HSA_AGENT_INFO_DEVICE, &device_type);
    if (device_type == HSA_DEVICE_TYPE_CPU) {
      rocrtst::TestGroupAdd(tg_sg_wait, &signal_wait_host_func, &signal, 1);
    } else if (device_type == HSA_DEVICE_TYPE_GPU) {
      rocrtst::TestGroupAdd(tg_sg_wait, &signal_wait_component_func, &signal, 1);
    } else if (device_type == HSA_DEVICE_TYPE_DSP) {
      ASSERT_MSG(1, "ERROR: DSP_AGENT NOT SUPPORTED\n");
    } else {
      ASSERT_MSG(1, "ERROR: UNKOWN DEIVCE TYPE");
    }
  }

  rocrtst::TestGroupThreadCreate(tg_sg_wait);
  rocrtst::TestGroupStart(tg_sg_wait);

  hsa_signal_store_relaxed(signal, CMP_VAL);

  rocrtst::TestGroupWait(tg_sg_wait);
  rocrtst::TestGroupExit(tg_sg_wait);
  rocrtst::TestGroupDestroy(tg_sg_wait);

  status = hsa_signal_destroy(signal);
  ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}

void SignalConcurrentTest::TestSignalCPUCompletion(void) {
  // Not clear with the requirements, have to check with Runtime team/Ramesh
  // As we are not implemented the test fully hence the test will be skipped for now
  std::cout << "The test skipped siliently and reports as pass" << std::endl;
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/functional/signal_concurrent.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_SIGNAL_CONCURRENT_H_
#define ROCRTST_SUITES_FUNCTIONAL_SIGNAL_CONCURRENT_H_
#include <pthread.h>
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class SignalConcurrentTest : public TestBase {
 public:
    SignalConcurrentTest(bool, bool, bool, bool);

    // @Brief: Destructor for the SignalConcurrentTest class
    virtual ~SignalConcurrentTest();

    // @Brief: Setup the environment for measurement
    virtual void SetUp();

    // @Brief: Core measurement execution
    virtual void Run();

    // @Brief: Clean up and retrive the resource
    virtual void Close();

    // @Brief: Display  results
    virtual void DisplayResults() const;

    // @Brief: Display information about what this test does
    virtual void DisplayTestInfo(void);

    void TestSignalCreateConcurrent(void);

    void TestSignalDestroyConcurrent(void);

    void TestSignalCreateMaxConsumers(void);

    // @Brief: This is not implemented, created a member function for future reference
    void TestSignalCPUCompletion(void);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_SIGNAL_CONCURRENT_H_


================================================
FILE: rocrtst/suites/functional/signal_kernel.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *         AMD Research and AMD ROC Software Development
 *
 *         Advanced Micro Devices, Inc.
 *
 *         www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *  this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *  notice, this list of conditions and the following disclaimers in
 *  the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *  nor the names of its contributors may be used to endorse or promote
 *  products derived from this Software without specific prior written
 *  permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <inttypes.h>
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include "suites/functional/signal_kernel.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/concurrent_utils.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static unsigned int NumOfKernels = 1;

#define ASSERT_MSG(C, err) { \
  if (C == 1) { \
  std::cout << err << std::endl; \
  } \
}

SignalKernelTest::SignalKernelTest(SignalKernelType type_) : TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
            // This is a default value which can be overridden
            // on the command line.
  if (type_ == SET) {
  set_title("RocR Signal Kernel Set Test");
  set_description("This test verifies that the signal is set from kernel");
  } else if (type_ == WAIT) {
  set_title("RocR Signal Wait Test");
  set_description("This test verifies that the signal is re-set from system side");
  } else if (type_ == MULTISET) {
  set_title("RocR Signal Kernel Multi Set Test");
  set_description("This test verifies that the signal is set on multiple work-items");
  } else if (type_ ==  MULTIWAIT) {
  set_title("RocR Signal Kernel Multi Set Test");
  set_description("This tset verifies that re-set signal from system side, multiple work-items");
  }
}

SignalKernelTest::~SignalKernelTest(void) {
}

void SignalKernelTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}


void SignalKernelTest::Run(void) {
// Compare required profile for this test case with what we're actually
// running on
  if (!rocrtst::CheckProfile(this)) {
  return;
  }

  TestBase::Run();
}

void SignalKernelTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void SignalKernelTest::DisplayResults(void) const {
  return;
}

void SignalKernelTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


// The kernarg data structure
typedef struct __attribute__ ((aligned(16))) signal_args_s {
  void *signal_values;
} signal_args_t;
signal_args_t signal_args;

void SignalKernelTest::KernelSetFunction(SignalKernelType type_) {
  hsa_status_t status;

  // Get the GPU agents into a vector
  std::vector<hsa_agent_t> agent_list;
  status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &agent_list);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);

  // Get CPU agent to get the kern_arg pool
  std::vector<hsa_agent_t> cpu_agent;
  status = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpu_agent);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);

  // Repeat the test for each agent
  unsigned int ii;
  for (ii = 0; ii < agent_list.size(); ++ii) {
    // Check if the queue supports dispatch
    uint32_t features = 0;
    status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_FEATURE, &features);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);
    if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
      continue;
    }

    // Find a memory pool that supports fine grained memory
    hsa_amd_memory_pool_t global_pool;
    global_pool.handle = (uint64_t)-1;
    status = hsa_amd_agent_iterate_memory_pools(agent_list[ii], rocrtst::GetGlobalMemoryPool, &global_pool);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Obtain the agent's machine model
    hsa_machine_model_t machine_model;
    status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Find a memory pool that supports kernel arguments
    hsa_amd_memory_pool_t kernarg_pool;
    kernarg_pool.handle = (uint64_t)-1;
    status = hsa_amd_agent_iterate_memory_pools(cpu_agent[0], rocrtst::GetKernArgMemoryPool, &kernarg_pool);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);
    // Create a queue
    hsa_queue_t* queue;
    status = hsa_queue_create(agent_list[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    set_kernel_file_name("signal_operations_kernels.hsaco");
    if (type_ == SET) {
      set_kernel_name("signal_st_rlx_kernel");
    } else if (type_ == MULTISET) {
      set_kernel_name("signal_st_rlx_kernel_multi");
      NumOfKernels = 16;
    } else if (type_ == WAIT) {
      set_kernel_name("signal_wait_kernel");
    } else if (type_ == MULTIWAIT) {
      set_kernel_name("signal_wait_kernel_multi");
      NumOfKernels = 16;
    }

    status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);


    // Allocate the kernel argument buffer from the correct pool
    signal_args_t* kernarg_buffer = NULL;
    status = hsa_amd_memory_pool_allocate(kernarg_pool,
                 sizeof(signal_args_t), 0,
                   reinterpret_cast<void**>(&kernarg_buffer));
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    status = hsa_amd_agents_allow_access(1, &agent_list[ii], NULL, kernarg_buffer);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);
    // Create the completion signal
    hsa_signal_t completion_signal;
    status = hsa_signal_create(1, 0, NULL, &completion_signal);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    hsa_amd_memory_pool_access_t access;
    status = hsa_amd_agent_memory_pool_get_info(cpu_agent[0],
                                              global_pool,
                                              HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                              &access);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    hsa_signal_t* kernel_signal;

    if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
     // Create the kernel signal
      status = hsa_amd_memory_pool_allocate(global_pool,
                                          NumOfKernels*sizeof(hsa_signal_t), 0,
                                          reinterpret_cast<void**>(&kernel_signal));
      ASSERT_EQ(status, HSA_STATUS_SUCCESS);
      status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, kernel_signal);
      ASSERT_EQ(status, HSA_STATUS_SUCCESS);
      for (unsigned int k = 0; k < NumOfKernels; ++k) {
        status = hsa_signal_create(1, 0, NULL, kernel_signal);
        ASSERT_EQ(status, HSA_STATUS_SUCCESS);
      }
      // Set the signal_args with kernel_signal, will be accessed from Kernel side
      signal_args.signal_values = reinterpret_cast<void*>(kernel_signal);
    }

    memcpy(kernarg_buffer, &signal_args, sizeof(signal_args_t));

    // Setup the dispatch packet
    hsa_kernel_dispatch_packet_t dispatch_packet;
    memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));
    dispatch_packet.workgroup_size_x = NumOfKernels;
    dispatch_packet.workgroup_size_y = 1;
    dispatch_packet.workgroup_size_z = 1;
    dispatch_packet.grid_size_x = NumOfKernels;
    dispatch_packet.grid_size_y = 1;
    dispatch_packet.grid_size_z = 1;
    dispatch_packet.kernel_object = kernel_object();
    dispatch_packet.group_segment_size = group_segment_size();
    dispatch_packet.private_segment_size = private_segment_size();
    dispatch_packet.kernarg_address = kernarg_buffer;
    dispatch_packet.completion_signal = completion_signal;

    // const uint32_t queue_size = queue->size;
    const uint32_t queue_mask = queue->size - 1;

    // write to command queue
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);
    hsa_queue_store_write_index_relaxed(queue, index + 1);

    rocrtst::WriteAQLToQueueLoc(queue, index, &dispatch_packet);


    dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
    dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER;
    dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;

    void* q_base = queue->base_address;
    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(dispatch_packet.header, dispatch_packet.setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);

    // ringdoor bell
    hsa_signal_store_relaxed(queue->doorbell_signal, index);

    if (type_ == WAIT) {
      for (unsigned int k = 0; k < NumOfKernels; ++k) {
        // setting the kernel_signal to 0 from system side.
        kernel_signal[k].handle = 0;
      }
    }

    // Wait on the completion signal
    hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);

    // Check kernel signal
    for (unsigned int k = 0; k < NumOfKernels; ++k) {
      ASSERT_EQ(0, (int)(kernel_signal[k].handle));
    }

    status = hsa_signal_destroy(completion_signal);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);


    if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
      status = hsa_amd_memory_pool_free(kernel_signal);
      ASSERT_EQ(status, HSA_STATUS_SUCCESS);
    }

    status = hsa_amd_memory_pool_free(kernarg_buffer);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Destroy the queue
    status = hsa_queue_destroy(queue);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);
  }
}
void SignalKernelTest::TestSignalKernelSet(void) {
  KernelSetFunction(SET);
}


void SignalKernelTest::TestSignalKernelMultiSet(void) {
  KernelSetFunction(MULTISET);
}


void SignalKernelTest::TestSignalKernelWait(void) {
  KernelSetFunction(WAIT);
}


void SignalKernelTest::TestSignalKernelMultiWait(void) {
  hsa_status_t status;

  // Get the GPU agents into a vector
  std::vector<hsa_agent_t> agent_list;
  status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &agent_list);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);


  // Get CPU agent to get the kern_arg pool
  std::vector<hsa_agent_t> cpu_agent;
  status = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpu_agent);
  ASSERT_EQ(status, HSA_STATUS_SUCCESS);

  // Repeat the test for each agent
  unsigned int ii;
  for (ii = 0; ii < agent_list.size(); ++ii) {
    // Check if the queue supports dispatch
    uint32_t features = 0;
    status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_FEATURE, &features);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);
    if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
      continue;
    }

    // Find a memory pool that supports fine grained memory
    hsa_amd_memory_pool_t global_pool;
    global_pool.handle = (uint64_t)-1;
    status = hsa_amd_agent_iterate_memory_pools(agent_list[ii], rocrtst::GetGlobalMemoryPool, &global_pool);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Obtain the agent's machine model
    hsa_machine_model_t machine_model;
    status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);


    // Find a memory pool that supports kernel arguments
    hsa_amd_memory_pool_t kernarg_pool;
    kernarg_pool.handle = (uint64_t)-1;
    status = hsa_amd_agent_iterate_memory_pools(cpu_agent[0], rocrtst::GetKernArgMemoryPool, &kernarg_pool);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Create a queue
    hsa_queue_t* queue;
    status = hsa_queue_create(agent_list[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);


    set_kernel_file_name("signal_operations_kernels.hsaco");
    set_kernel_name("signal_wait_kernel_multi");
    status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Allocate the kernel argument buffer from the correct pool
    signal_args_t* kernarg_buffer = NULL;
    status = hsa_amd_memory_pool_allocate(kernarg_pool,
           sizeof(signal_args_t), 0,
           reinterpret_cast<void**>(&kernarg_buffer));
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    status = hsa_amd_agents_allow_access(1, &agent_list[ii], NULL, kernarg_buffer);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);
    // Create the completion signal
    hsa_signal_t completion_signal;
    status = hsa_signal_create(1, 0, NULL, &completion_signal);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    hsa_amd_memory_pool_access_t access;
    status = hsa_amd_agent_memory_pool_get_info(cpu_agent[0],
                                              global_pool,
                                              HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                              &access);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    hsa_signal_t* kernel_signal;

    if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
      // Create the kernel signal
      status = hsa_amd_memory_pool_allocate(global_pool,
                                          NumOfKernels*sizeof(hsa_signal_t), 0,
                                          reinterpret_cast<void**>(&kernel_signal));
      ASSERT_EQ(status, HSA_STATUS_SUCCESS);
      status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, kernel_signal);
      ASSERT_EQ(status, HSA_STATUS_SUCCESS);

      for (unsigned int k = 0; k < NumOfKernels; ++k) {
       status = hsa_signal_create(1, 0, NULL, kernel_signal + k);
       ASSERT_EQ(status, HSA_STATUS_SUCCESS);
      }

      // Set the signal_args with kernel_signal, will be accessed from Kernel side
      signal_args.signal_values = reinterpret_cast<void*>(kernel_signal);
    }

    memcpy(kernarg_buffer, &signal_args, sizeof(signal_args_t));

    // Setup the dispatch packet
    hsa_kernel_dispatch_packet_t dispatch_packet;
    memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));

    dispatch_packet.workgroup_size_x = NumOfKernels;
    dispatch_packet.workgroup_size_y = 1;
    dispatch_packet.workgroup_size_z = 1;
    dispatch_packet.grid_size_x = NumOfKernels;
    dispatch_packet.grid_size_y = 1;
    dispatch_packet.grid_size_z = 1;
    dispatch_packet.kernel_object = kernel_object();
    dispatch_packet.group_segment_size = group_segment_size();
    dispatch_packet.private_segment_size = private_segment_size();
    dispatch_packet.kernarg_address = kernarg_buffer;
    dispatch_packet.completion_signal = completion_signal;

    // const uint32_t queue_size = queue->size;
    const uint32_t queue_mask = queue->size - 1;
    // write to command queue
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);
    hsa_queue_store_write_index_relaxed(queue, index + 1);

    rocrtst::WriteAQLToQueueLoc(queue, index, &dispatch_packet);


    dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
    dispatch_packet.header |= 1 << HSA_PACKET_HEADER_BARRIER;
    dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;

    void* q_base = queue->base_address;
    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(dispatch_packet.header, dispatch_packet.setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);


    // ringdoor bell
    hsa_signal_store_relaxed(queue->doorbell_signal, index);

    // setting the kernel_signal to 0 from system side.
    for (unsigned int k = 0; k < NumOfKernels; ++k) {
      kernel_signal[k].handle = 0;
    }
    // Wait on the completion signal
    hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);

    // Check kernel signal
    ASSERT_EQ(0, (int)kernel_signal->handle);

    // destroy the signal created
    status = hsa_signal_destroy(completion_signal);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    status = hsa_amd_memory_pool_free(kernarg_buffer);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);

    // Destroy the queue
    status = hsa_queue_destroy(queue);
    ASSERT_EQ(status, HSA_STATUS_SUCCESS);
  }
}


================================================
FILE: rocrtst/suites/functional/signal_kernel.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_FUNCTIONAL_SIGNAL_KERNEL_H_
#define ROCRTST_SUITES_FUNCTIONAL_SIGNAL_KERNEL_H_
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

enum SignalKernelType {
	SET,  // For signal kernel set operation
	MULTISET, // For multiple kernel set operation
	WAIT, // For single wait operation
    MULTIWAIT, // For multiple wait operation
	NOTEST  // No operation
}; 
class SignalKernelTest : public TestBase {
 public:
    SignalKernelTest(SignalKernelType);

    // @Brief: Destructor for the SignalKernelTest class
    virtual ~SignalKernelTest();

    // @Brief: Setup the environment for measurement
    virtual void SetUp();

    // @Brief: Core measurement execution
    virtual void Run();

    // @Brief: Clean up and retrive the resource
    virtual void Close();

    // @Brief: Display  results
    virtual void DisplayResults() const;

    // @Brief: Display information about what this test does
    virtual void DisplayTestInfo(void);

    void TestSignalKernelSet(void);

    void TestSignalKernelWait(void);

    void TestSignalKernelMultiSet(void);

    void TestSignalKernelMultiWait(void);

    void KernelSetFunction(SignalKernelType);
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_SIGNAL_KERNEL_H_


================================================
FILE: rocrtst/suites/functional/virtual_memory.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <sys/mman.h>
#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <sys/socket.h>

#include "suites/functional/virtual_memory.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/concurrent_utils.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

// Wrap printf to add first or second process indicator
#define PROCESS_LOG(format, ...)                                                                   \
  {                                                                                                \
    if (verbosity() >= VERBOSE_STANDARD || !parentProcess_) {                                      \
      fprintf(stdout, "line:%d P%u: " format, __LINE__, static_cast<int>(!parentProcess_),         \
              ##__VA_ARGS__);                                                                      \
    }                                                                                              \
  }

// Fork safe ASSERT_EQ.
#define MSG(y, msg, ...) msg
#define Y(y, ...) y

#define FORK_ASSERT_EQ(x, ...)                                                                     \
  if ((x) != (Y(__VA_ARGS__))) {                                                                   \
    if ((x) != (Y(__VA_ARGS__))) {                                                                 \
      std::cout << MSG(__VA_ARGS__, "");                                                           \
      if (parentProcess_) {                                                                        \
        shared_->parent_status = -1;                                                               \
      } else {                                                                                     \
        shared_->child_status = -1;                                                                \
      }                                                                                            \
      ASSERT_EQ(x, Y(__VA_ARGS__));                                                                \
    }                                                                                              \
  }

static const char kSubTestSeparator[] = "  **************************";

static void PrintMemorySubtestHeader(const char* header) {
  std::cout << "  *** Virtual Memory Functional Subtest: " << header << " ***" << std::endl;
}

VirtMemoryTestBasic::VirtMemoryTestBasic(void) : TestBase() {
  set_title("ROCr Virtual Memory Basic Tests");
  set_description(" Tests virtual memory API functions");
}

VirtMemoryTestBasic::~VirtMemoryTestBasic(void) {}

void VirtMemoryTestBasic::TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
  std::vector<hsa_agent_t> gpus;
  rocrtst::pool_info_t pool_i;
  hsa_device_type_t ag_type;
  char ag_name[64];
  void* addrRangeUnmapped;
  hsa_status_t err;
  void* addrRange;

  ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));

  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));

  if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;

  size_t granule_size = pool_i.alloc_granule;

  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));
  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 20 * granule_size, 0, 0));
  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRangeUnmapped, 10 * granule_size, 0, 0));

  hsa_amd_vmem_alloc_handle_t mem_handle;
  ASSERT_SUCCESS(
      hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handle));

  /* Test alloc properties returns correct memory type and pool handle */
  hsa_amd_memory_pool_t poolRet;
  hsa_amd_memory_type_t memTypeRet;
  ASSERT_SUCCESS(hsa_amd_vmem_get_alloc_properties_from_handle(mem_handle, &poolRet, &memTypeRet));

  ASSERT_EQ(poolRet.handle, pool.handle);
  ASSERT_EQ(memTypeRet, MEMORY_TYPE_NONE);

  hsa_amd_vmem_alloc_handle_t mem_handleTypePinned;
  ASSERT_SUCCESS(hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_PINNED, 0,
                                            &mem_handleTypePinned));

  ASSERT_SUCCESS(
      hsa_amd_vmem_get_alloc_properties_from_handle(mem_handleTypePinned, &poolRet, &memTypeRet));
  ASSERT_EQ(poolRet.handle, pool.handle);
  ASSERT_EQ(memTypeRet, MEMORY_TYPE_PINNED);


  ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * granule_size, 0, mem_handle, 0));

  // Access to each GPU should be None
  for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
    hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_RW;

    ASSERT_SUCCESS(hsa_amd_vmem_get_access(addrRange, &perm, *gpuIt));
    ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_NONE);
  }

  /* Set RO Access to all GPUs */
  {
    int descIndex = 0;
    hsa_amd_memory_access_desc_t desc[gpus.size()];
    for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
      desc[descIndex++] = {HSA_ACCESS_PERMISSION_RO, *gpuIt};
    }

    ASSERT_SUCCESS(hsa_amd_vmem_set_access(addrRange, 10 * granule_size, desc, gpus.size()));
  }

  for (auto gpuIt = gpus.begin(); gpuIt != gpus.end(); ++gpuIt) {
    hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_NONE;

    ASSERT_SUCCESS(hsa_amd_vmem_get_access(addrRange, &perm, *gpuIt));
    ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_RO);

    /* addrRangeUnmapped was never mapped, so this is an invalid mapping */
    err = hsa_amd_vmem_get_access(addrRangeUnmapped, &perm, *gpuIt);
    ASSERT_EQ(err, HSA_STATUS_ERROR_INVALID_ALLOCATION);
  }

  if (gpus.size() > 1) {
    /* Call set_access with a smaller list of agents, this should leave access to
     * the other GPUs unchanged */
    hsa_amd_memory_access_desc_t desc = {HSA_ACCESS_PERMISSION_RW, gpus[1]};
    ASSERT_SUCCESS(hsa_amd_vmem_set_access(addrRange, 10 * granule_size, &desc, 1));

    size_t i = 0;
    for (i = 0; i < gpus.size(); i++) {
      hsa_access_permission_t perm = HSA_ACCESS_PERMISSION_NONE;

      /* Only 2nd GPU should have RW access */
      ASSERT_SUCCESS(hsa_amd_vmem_get_access(addrRange, &perm, gpus[i]));
      if (i == 1) {
        ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_RW);
      } else {
        ASSERT_EQ(perm, HSA_ACCESS_PERMISSION_RO);
      }
    }
  }

  ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * granule_size));
  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));
  ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 20 * granule_size));
  ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRangeUnmapped, 10 * granule_size));
}

void VirtMemoryTestBasic::TestCreateDestroy(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("CreateDestroy Test");
  }
  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      TestCreateDestroy(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "    Subtest finished" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void VirtMemoryTestBasic::TestRefCount(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
  rocrtst::pool_info_t pool_i;
  hsa_device_type_t ag_type;
  char ag_name[64];
  void* addrRangeUnmapped;
  hsa_status_t err;
  void* addrRange;

  ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name));
  ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));
  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));

  if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;

  size_t granule_size = pool_i.alloc_granule;

  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 10 * granule_size, 0, 0));

  hsa_amd_vmem_alloc_handle_t mem_handleA1;
  ASSERT_SUCCESS(
      hsa_amd_vmem_handle_create(pool, 10 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handleA1));
  ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * granule_size, 0, mem_handleA1, 0));

  /* Allocate duplicate handle */
  hsa_amd_vmem_alloc_handle_t mem_handleA1Dup;
  ASSERT_SUCCESS(hsa_amd_vmem_retain_alloc_handle(&mem_handleA1Dup, addrRange));

  /* Try to unmap with incorrect size */
  err = hsa_amd_vmem_unmap(addrRange, 5 * granule_size);
  ASSERT_NE(err, HSA_STATUS_SUCCESS);

  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handleA1));

  /* Try to release duplicate handle twice - second time should fail */
  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handleA1Dup));

  /* Already released so should fail*/
  err = hsa_amd_vmem_handle_release(mem_handleA1Dup);
  ASSERT_NE(err, HSA_STATUS_SUCCESS);

  /* Unmap with correct size - un-mapping after releasing the handle is valid */
  ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * granule_size));

  /* Try to free with incorrect size */
  err = hsa_amd_vmem_address_free(addrRange, 5 * granule_size);
  ASSERT_NE(err, HSA_STATUS_SUCCESS);

  /* Free with correct size */
  ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 10 * granule_size));
}

void VirtMemoryTestBasic::TestRefCount(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("Reference Count Test");
  }
  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }
  ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) TestRefCount(a->agent, p);
  }

  if (verbosity() > 0) {
    std::cout << "    Subtest finished" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void VirtMemoryTestBasic::TestPartialMapping(hsa_agent_t agent, hsa_amd_memory_pool_t pool) {
  rocrtst::pool_info_t pool_i;
  hsa_device_type_t ag_type;
  char ag_name[64];
  void* addrRangeUnmapped;
  hsa_status_t err;
  void* addrRange;

  ASSERT_SUCCESS(hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type));

  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(pool, &pool_i));

  if (ag_type != HSA_DEVICE_TYPE_GPU || !pool_i.alloc_allowed) return;

  size_t granule_size = pool_i.alloc_granule;

  /************************************************************************************************
    Map partial chunks within the address range and confirm what overlaps fail.
    Units below are in multiples of granule_size.

              ------------------------------------------------------------------
              | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
              ------------------------------------------------------------------
    Step 1:             A   A   A   A   A   A
    Step 2:                                                  B    B    B
    Step 3:                                                                 B
    Step 4:                                     B   B    B

   ***********************************************************************************************/

  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 15 * granule_size, 0, 0));

  hsa_amd_vmem_alloc_handle_t mem_handleA;

  // Step 1
  ASSERT_SUCCESS(
      hsa_amd_vmem_handle_create(pool, 8 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handleA));

  ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (2 * granule_size)),
                                  6 * granule_size, 0, mem_handleA, 0));

  // Step 2
  hsa_amd_vmem_alloc_handle_t mem_handleB;
  ASSERT_SUCCESS(
      hsa_amd_vmem_handle_create(pool, 8 * granule_size, MEMORY_TYPE_NONE, 0, &mem_handleB));

  ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (11 * granule_size)),
                                  3 * granule_size, 0, mem_handleB, 0));

  // Step 3
  // Should fail as this is exceeding size of address range
  err = hsa_amd_vmem_map((void*)((uint64_t)addrRange + (14 * granule_size)),
                                  2 * granule_size, 0, mem_handleB, 0);
  ASSERT_NE(err, HSA_STATUS_SUCCESS);

  ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (14 * granule_size)),
                                  1 * granule_size, 0, mem_handleB, 0));

  // Step 4
  // Should fail as this is overlapping with AddressRange[11] already mapped
  err = hsa_amd_vmem_map((void*)((uint64_t)addrRange + (8 * granule_size)),
                                  4 * granule_size, 0, mem_handleB, 0);
  ASSERT_NE(err, HSA_STATUS_SUCCESS);

  ASSERT_SUCCESS(hsa_amd_vmem_map((void*)((uint64_t)addrRange + (8 * granule_size)),
                                  3 * granule_size, 0, mem_handleB, 0));

  // Done, unmap all
  ASSERT_SUCCESS(
      hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (2 * granule_size)), 6 * granule_size));
  ASSERT_SUCCESS(
      hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (8 * granule_size)), 3 * granule_size));
  ASSERT_SUCCESS(
      hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (11 * granule_size)), 3 * granule_size));
  ASSERT_SUCCESS(
      hsa_amd_vmem_unmap((void*)((uint64_t)addrRange + (14 * granule_size)), 1 * granule_size));
  ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 15 * granule_size));
}

void VirtMemoryTestBasic::TestPartialMapping(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("Partial Mapping Test");
  }

  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  ASSERT_SUCCESS(rocrtst::GetAgentPools(&agent_pools));

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) TestPartialMapping(a->agent, p);
  }

  if (verbosity() > 0) {
    std::cout << "    Subtest finished" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

typedef struct __attribute__((aligned(16))) args_t {
  int* a;
  int* b;
  int* c;
} args;

args* kernArgsVirt = NULL;

// Test to check CPU can read & write to GPU memory
void VirtMemoryTestBasic::CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
                                                   hsa_amd_memory_pool_t device_pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));

  if (!(pool_i.segment == HSA_AMD_SEGMENT_GLOBAL &&
        pool_i.global_flag == HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED))
    return;

  hsa_amd_memory_pool_access_t access;
  hsa_amd_agent_memory_pool_get_info(cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                     &access);
  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    if (verbosity() > 0) {
      std::cout << "    Test not applicable as system is not large bar - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }
  if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 || pool_i.alloc_alignment == 0) {
    if (verbosity() > 0) {
      std::cout << "    Test not applicable. Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  auto max_alloc_size = pool_i.alloc_granule * 100;
  unsigned int max_element = max_alloc_size / sizeof(unsigned int);
  unsigned int* dev_data = NULL;
  unsigned int* host_data = NULL;
  host_data = (unsigned int*)malloc(max_alloc_size);

  ASSERT_NE(host_data, nullptr);

  for (unsigned int i = 0; i < max_element; ++i) {
    host_data[i] = i;
  }

  hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, cpuAgent},
                                                {HSA_ACCESS_PERMISSION_RW, gpuAgent}};

  hsa_amd_vmem_alloc_handle_t mem_handle_host, mem_handle_dev;
  ASSERT_SUCCESS(
      hsa_amd_vmem_address_reserve(reinterpret_cast<void**>(&dev_data), max_alloc_size, 0, 0));

  ASSERT_NE(dev_data, nullptr);

  ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool, max_alloc_size, MEMORY_TYPE_NONE, 0,
                                            &mem_handle_dev));
  ASSERT_SUCCESS(
      hsa_amd_vmem_map(reinterpret_cast<void*>(dev_data), max_alloc_size, 0, mem_handle_dev, 0));

  // Give device access to host data
  ASSERT_SUCCESS(hsa_amd_vmem_set_access(dev_data, max_alloc_size, permsAccess, 2));

  // Verify CPU can read & write to GPU memory
  std::cout << "    Verify CPU can read & write to GPU memory" << std::endl;
  for (unsigned int i = 0; i < max_element; ++i) {
    dev_data[i] = i;  // Write to gpu memory directly
  }

  for (unsigned int i = 0; i < max_element; ++i) {
    if (host_data[i] != dev_data[i]) {  // Reading GPU memory
      fprintf(stdout,
              "    Values not mathing !! host_data[%d]:%d ,"
              "dev_data[%d]\n",
              host_data[i], i, dev_data[i]);
    }
  }
  std::cout << "    CPU have read & write to GPU memory successfully" << std::endl;

  ASSERT_SUCCESS(hsa_amd_vmem_unmap(dev_data, max_alloc_size));
  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle_dev));
  ASSERT_SUCCESS(hsa_amd_vmem_address_free(reinterpret_cast<void*>(dev_data), max_alloc_size));
  free(host_data);
}

void VirtMemoryTestBasic::CPUAccessToGPUMemoryTest(void) {
  hsa_status_t err;
  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));

  if (verbosity() > 0) PrintMemorySubtestHeader("CPU To GPU Access test");

  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  for (unsigned int i = 0; i < gpus.size(); ++i) {
    hsa_amd_memory_pool_t gpu_pool;
    memset(&gpu_pool, 0, sizeof(gpu_pool));
    ASSERT_SUCCESS(
        hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
    if (gpu_pool.handle == 0) {
      std::cout << "    No global mempool in gpu agent" << std::endl;
      return;
    }
    CPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool);
  }
  if (verbosity() > 0) {
    std::cout << "    Subtest finished" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

// Test to check GPU can read & write to CPU memory
void VirtMemoryTestBasic::GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
                                                   hsa_amd_memory_pool_t device_pool) {
  rocrtst::pool_info_t pool_i;
  hsa_device_type_t ag_type;
  char ag_name[64];
  hsa_status_t err;

  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));

  if (!pool_i.alloc_allowed || pool_i.segment != HSA_AMD_SEGMENT_GLOBAL ||
      pool_i.global_flag != HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)
    return;

  hsa_amd_memory_pool_access_t access;
  ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(
      cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));

  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    if (verbosity() > 0) {
      std::cout << "    Test not applicable as system is not large bar - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
      return;
    }
  }

  hsa_queue_t* queue = NULL;  // command queue
  hsa_signal_t signal = {0};  // completion signal

  size_t& granule_size = pool_i.alloc_granule;
  size_t alloc_size = granule_size * 100;
  static const int kMemoryAllocSize = 1024;
  unsigned int max_element = alloc_size / sizeof(unsigned int);

  // get queue size
  uint32_t queue_size = 0;
  ASSERT_SUCCESS(hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size));

  // create queue
  ASSERT_SUCCESS(
      hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue));

  // Find a memory pool that supports kernel arguments.
  hsa_amd_memory_pool_t kernarg_pool;
  ASSERT_SUCCESS(
      hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetKernArgMemoryPool, &kernarg_pool));

  // Get System Memory Pool on the cpuAgent to allocate host side buffers
  hsa_amd_memory_pool_t global_pool;
  ASSERT_SUCCESS(
      hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetGlobalMemoryPool, &global_pool));

  struct host_data_t {
    int data[kMemoryAllocSize * 4];
    int dup_data[kMemoryAllocSize * 4];
    int result[kMemoryAllocSize * 4];
  };

  struct dev_data_t {
    int result[kMemoryAllocSize * 4];
  };


  struct host_data_t* host_data;
  struct dev_data_t* dev_data;

  ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(global_pool, sizeof(*host_data), 0,
                                              reinterpret_cast<void**>(&host_data)));

  // Allow gpuAgent access to all allocated system memory.
  ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, host_data));
  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&dev_data, sizeof(*dev_data), 0, 0));

  hsa_amd_vmem_alloc_handle_t mem_handle;

  ASSERT_SUCCESS(
      hsa_amd_vmem_handle_create(device_pool, sizeof(*dev_data), MEMORY_TYPE_NONE, 0, &mem_handle));
  ASSERT_SUCCESS(hsa_amd_vmem_map(dev_data, sizeof(*dev_data), 0, mem_handle, 0));

  // Give host and device access to device data
  hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, gpuAgent},
                                                {HSA_ACCESS_PERMISSION_RW, cpuAgent}};

  ASSERT_SUCCESS(hsa_amd_vmem_set_access(dev_data, sizeof(*dev_data), permsAccess, 2));

  // Allocate the kernel argument buffer from the kernarg_pool.
  ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
                                              reinterpret_cast<void**>(&kernArgsVirt)));

  // initialize the host buffers
  for (int i = 0; i < kMemoryAllocSize; ++i) {
    unsigned int seed = time(NULL);
    host_data->data[i] = 1 + rand_r(&seed) % 1;
    host_data->dup_data[i] = host_data->data[i];
  }

  memset(host_data->result, 0, sizeof(host_data->result));
  memset(dev_data->result, 0, sizeof(dev_data->result));

  ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgsVirt));

  kernArgsVirt->a = host_data->data;
  kernArgsVirt->b = host_data->result;  // system memory passed to gpu for write
  kernArgsVirt->c = dev_data->result;   // gpu memory to verify that gpu read system data

  // Create the executable, get symbol by name and load the code object
  set_kernel_file_name("gpuReadWrite_kernels.hsaco");
  set_kernel_name("gpuReadWrite");
  ASSERT_SUCCESS(rocrtst::LoadKernelFromObjFile(this, &gpuAgent));

  // Fill the dispatch packet with
  // workgroup_size, grid_size, kernelArgs and completion signal
  // Put it on the queue and launch the kernel by ringing the doorbell

  // create completion signal
  ASSERT_SUCCESS(hsa_signal_create(1, 0, NULL, &signal));

  // create aql packet
  hsa_kernel_dispatch_packet_t aql;
  memset(&aql, 0, sizeof(aql));

  // initialize aql packet
  aql.workgroup_size_x = 256;
  aql.workgroup_size_y = 1;
  aql.workgroup_size_z = 1;
  aql.grid_size_x = kMemoryAllocSize;
  aql.grid_size_y = 1;
  aql.grid_size_z = 1;
  aql.private_segment_size = 0;
  aql.group_segment_size = 0;
  aql.kernel_object = kernel_object();  // kernel_code;
  aql.kernarg_address = kernArgsVirt;
  aql.completion_signal = signal;

  // const uint32_t queue_size = queue->size;
  const uint32_t queue_mask = queue->size - 1;

  // write to command queue
  uint64_t index = hsa_queue_load_write_index_relaxed(queue);
  hsa_queue_store_write_index_relaxed(queue, index + 1);

  rocrtst::WriteAQLToQueueLoc(queue, index, &aql);

  hsa_kernel_dispatch_packet_t* q_base_addr =
      reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address);
  rocrtst::AtomicSetPacketHeader(
      (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
          (1 << HSA_PACKET_HEADER_BARRIER) |
          (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
          (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE),
      (1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS),
      reinterpret_cast<hsa_kernel_dispatch_packet_t*>(&q_base_addr[index & queue_mask]));

  // ringdoor bell
  hsa_signal_store_relaxed(queue->doorbell_signal, index);
  // wait for the signal and reset it for future use
  while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
                                   HSA_WAIT_STATE_ACTIVE)) {
  }
  hsa_signal_store_relaxed(signal, 1);

  // compare device and host side results
  if (verbosity() > 0) {
    std::cout << "    Check GPU has read the system memory" << std::endl;
  }
  for (int i = 0; i < kMemoryAllocSize; ++i) {
    // printf("Verifying data at index[%d]\n", i);
    ASSERT_EQ(dev_data->result[i], host_data->dup_data[i]);
  }

  if (verbosity() > 0) {
    std::cout << "    GPU has read the system memory successfully" << std::endl;
    std::cout << "    Check GPU has written to system memory" << std::endl;
  }
  for (int i = 0; i < kMemoryAllocSize; ++i) {
    ASSERT_EQ(host_data->result[i], i);
  }

  if (verbosity() > 0) {
    std::cout << "    GPU has written to system memory successfully" << std::endl;
  }

  ASSERT_SUCCESS(hsa_amd_vmem_unmap(dev_data, sizeof(*dev_data)));
  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));

  if (dev_data) {
    ASSERT_SUCCESS(hsa_amd_vmem_address_free(dev_data, sizeof(*dev_data)));
  }

  if (host_data) hsa_memory_free(host_data);
  if (kernArgsVirt) {
    hsa_memory_free(kernArgsVirt);
  }
  if (signal.handle) {
    hsa_signal_destroy(signal);
  }
  if (queue) {
    hsa_queue_destroy(queue);
  }
}

void VirtMemoryTestBasic::GPUAccessToCPUMemoryTest(void) {
  hsa_status_t err;
  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));

  if (verbosity() > 0) PrintMemorySubtestHeader("CPU To GPU Access test");

  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  for (unsigned int i = 0; i < gpus.size(); ++i) {
    hsa_amd_memory_pool_t gpu_pool;
    memset(&gpu_pool, 0, sizeof(gpu_pool));
    ASSERT_SUCCESS(
        hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
    if (gpu_pool.handle == 0) {
      std::cout << "no global mempool in GPU agent" << std::endl;
      return;
    }
    GPUAccessToCPUMemoryTest(cpus[0], gpus[i], gpu_pool);
  }
  if (verbosity() > 0) {
    std::cout << "    Subtest finished" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

// Test to check GPU can read & write to GPU memory
void VirtMemoryTestBasic::GPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
                                                   hsa_amd_memory_pool_t device_pool) {
  rocrtst::pool_info_t pool_i;
  hsa_device_type_t ag_type;
  char ag_name[64];
  hsa_status_t err;

  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));

  if (!pool_i.alloc_allowed || pool_i.segment != HSA_AMD_SEGMENT_GLOBAL ||
      pool_i.global_flag != HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)
    return;

  hsa_amd_memory_pool_access_t access;
  ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(
      cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));

  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    if (verbosity() > 0) {
      std::cout << "    Test not applicable as system is not large bar - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
      return;
    }
  }

  hsa_queue_t* queue = NULL;  // command queue
  hsa_signal_t signal = {0};  // completion signal

  size_t& granule_size = pool_i.alloc_granule;
  size_t alloc_size = granule_size * 100;
  static const int kMemoryAllocSize = 4096;
  unsigned int max_element = alloc_size / sizeof(unsigned int);

  // get queue size
  uint32_t queue_size = 0;
  ASSERT_SUCCESS(hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size));

  // create queue
  ASSERT_SUCCESS(
      hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue));

  // Find a memory pool that supports kernel arguments.
  hsa_amd_memory_pool_t kernarg_pool;
  ASSERT_SUCCESS(
      hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetKernArgMemoryPool, &kernarg_pool));

  // Get System Memory Pool on the cpuAgent to allocate host side buffers
  hsa_amd_memory_pool_t global_pool;
  ASSERT_SUCCESS(
      hsa_amd_agent_iterate_memory_pools(cpuAgent, rocrtst::GetGlobalMemoryPool, &global_pool));

  struct host_data_t {
    int data[kMemoryAllocSize * 4];
    int gpuWrite[kMemoryAllocSize * 4];
    int result[kMemoryAllocSize * 4];
  };

  struct dev_data_t {
    int data[kMemoryAllocSize * 4];
    int result[kMemoryAllocSize * 4];
  };


  struct host_data_t* host_data;
  struct dev_data_t* dev_data;

  ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(global_pool, sizeof(*host_data), 0,
                                              reinterpret_cast<void**>(&host_data)));

  // Allow gpuAgent access to all allocated system memory.
  ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, host_data));
  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&dev_data, sizeof(*dev_data), 0, 0));

  hsa_amd_vmem_alloc_handle_t mem_handle;

  ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool, sizeof(*dev_data), MEMORY_TYPE_PINNED, 0,
                                            &mem_handle));

  ASSERT_SUCCESS(hsa_amd_vmem_map(dev_data, sizeof(*dev_data), 0, mem_handle, 0));

  // Give host and device access to device data
  hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, gpuAgent}};

  ASSERT_SUCCESS(
      hsa_amd_vmem_set_access(dev_data, sizeof(*dev_data), permsAccess, ARRAY_SIZE(permsAccess)));

  // Allocate the kernel argument buffer from the kernarg_pool.
  ASSERT_SUCCESS(hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
                                              reinterpret_cast<void**>(&kernArgsVirt)));

  // create completion signal
  ASSERT_SUCCESS(hsa_signal_create(1, 0, NULL, &signal));

  // initialize the host buffers
  for (int i = 0; i < kMemoryAllocSize; ++i) {
    unsigned int seed = time(NULL);
    host_data->data[i] = 1 + rand_r(&seed) % 1;
  }

  ASSERT_SUCCESS(hsa_amd_memory_async_copy(dev_data->data, gpuAgent, host_data->data, cpuAgent,
                                           kMemoryAllocSize * 4, 0, NULL, signal));

  while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
                                   HSA_WAIT_STATE_ACTIVE)) {
  }
  hsa_signal_store_relaxed(signal, 1);

  memset(host_data->result, 0, sizeof(host_data->result));

  ASSERT_SUCCESS(hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgsVirt));


  kernArgsVirt->a = dev_data->data;
  kernArgsVirt->b = host_data->gpuWrite;  // system memory passed to gpu for write
  kernArgsVirt->c = dev_data->result;     // gpu memory to verify that gpu read system data

  // Create the executable, get symbol by name and load the code object
  set_kernel_file_name("gpuReadWrite_kernels.hsaco");
  set_kernel_name("gpuReadWrite");
  ASSERT_SUCCESS(rocrtst::LoadKernelFromObjFile(this, &gpuAgent));

  // Fill the dispatch packet with
  // workgroup_size, grid_size, kernelArgs and completion signal
  // Put it on the queue and launch the kernel by ringing the doorbell

  // create aql packet
  hsa_kernel_dispatch_packet_t aql;
  memset(&aql, 0, sizeof(aql));

  // initialize aql packet
  aql.workgroup_size_x = 256;
  aql.workgroup_size_y = 1;
  aql.workgroup_size_z = 1;
  aql.grid_size_x = kMemoryAllocSize;
  aql.grid_size_y = 1;
  aql.grid_size_z = 1;
  aql.private_segment_size = 0;
  aql.group_segment_size = 0;
  aql.kernel_object = kernel_object();  // kernel_code;
  aql.kernarg_address = kernArgsVirt;
  aql.completion_signal = signal;

  const uint32_t queue_mask = queue->size - 1;

  // write to command queue
  uint64_t index = hsa_queue_load_write_index_relaxed(queue);
  hsa_queue_store_write_index_relaxed(queue, index + 1);

  rocrtst::WriteAQLToQueueLoc(queue, index, &aql);

  hsa_kernel_dispatch_packet_t* q_base_addr =
      reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address);
  rocrtst::AtomicSetPacketHeader(
      (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
          (1 << HSA_PACKET_HEADER_BARRIER) |
          (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
          (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE),
      (1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS),
      reinterpret_cast<hsa_kernel_dispatch_packet_t*>(&q_base_addr[index & queue_mask]));

  // ringdoor bell
  hsa_signal_store_relaxed(queue->doorbell_signal, index);
  // wait for the signal and reset it for future use
  while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
                                   HSA_WAIT_STATE_ACTIVE)) {
  }
  hsa_signal_store_relaxed(signal, 1);

  ASSERT_SUCCESS(hsa_amd_memory_async_copy(host_data->result, cpuAgent, dev_data->result, gpuAgent,
                                           kMemoryAllocSize * 4, 0, NULL, signal));

  while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
                                   HSA_WAIT_STATE_ACTIVE)) {
  }
  // compare device and host side results
  if (verbosity() > 0) {
    std::cout << "    Check GPU has read the system memory" << std::endl;
  }
  for (int i = 0; i < kMemoryAllocSize; ++i) {
    // printf("Verifying data at index[%d]\n", i);
    ASSERT_EQ(host_data->result[i], host_data->data[i]);
  }

  if (verbosity() > 0) {
    std::cout << "    GPU has read the system memory successfully" << std::endl;
    std::cout << "    Check GPU has written to system memory" << std::endl;
  }
  for (int i = 0; i < kMemoryAllocSize; ++i) {
    ASSERT_EQ(host_data->gpuWrite[i], i);
  }

  if (verbosity() > 0) {
    std::cout << "    GPU has written to system memory successfully" << std::endl;
  }

  ASSERT_SUCCESS(hsa_amd_vmem_unmap(dev_data, sizeof(*dev_data)));
  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle));

  if (dev_data) {
    ASSERT_SUCCESS(hsa_amd_vmem_address_free(dev_data, sizeof(*dev_data)));
  }

  if (host_data) hsa_memory_free(host_data);
  if (kernArgsVirt) {
    hsa_memory_free(kernArgsVirt);
  }
  if (signal.handle) {
    hsa_signal_destroy(signal);
  }
  if (queue) {
    hsa_queue_destroy(queue);
  }
}

void VirtMemoryTestBasic::GPUAccessToGPUMemoryTest(void) {
  hsa_status_t err;
  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));

  if (verbosity() > 0) PrintMemorySubtestHeader("GPU To GPU Access test");

  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  for (unsigned int i = 0; i < gpus.size(); ++i) {
    hsa_amd_memory_pool_t gpu_pool;
    memset(&gpu_pool, 0, sizeof(gpu_pool));
    ASSERT_SUCCESS(
        hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
    if (gpu_pool.handle == 0) {
      std::cout << "no global mempool in GPU agent" << std::endl;
      return;
    }
    GPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool);
  }
  if (verbosity() > 0) {
    std::cout << "    Subtest finished" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void VirtMemoryTestBasic::NonContiguousChunks(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent,
                                              hsa_amd_memory_pool_t device_pool) {
  rocrtst::pool_info_t pool_i;
  hsa_device_type_t ag_type;
  char ag_name[64];
  hsa_status_t err;

  ASSERT_SUCCESS(rocrtst::AcquirePoolInfo(device_pool, &pool_i));

  if (!pool_i.alloc_allowed || pool_i.segment != HSA_AMD_SEGMENT_GLOBAL ||
      pool_i.global_flag != HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)
    return;

  hsa_amd_memory_pool_access_t access;
  ASSERT_SUCCESS(hsa_amd_agent_memory_pool_get_info(
      cpuAgent, device_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access));

  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    if (verbosity() > 0) {
      std::cout << "    Test not applicable as system is not large bar - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
      return;
    }
  }

  size_t& granule_size = pool_i.alloc_granule;
  size_t alloc_size = granule_size * 512;
  const unsigned NUM_BUFFERS = 6;

  void* addr;
  void* addr_chunks[NUM_BUFFERS];
  hsa_amd_vmem_alloc_handle_t mem_handles[NUM_BUFFERS];

  static const int kMemoryAllocSize = 4096;
  unsigned int max_element = alloc_size / sizeof(unsigned int);

  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve((void**)&addr, NUM_BUFFERS * alloc_size, 0, 0));

  for (unsigned i = 0; i < NUM_BUFFERS; i++) {
    // Allocate 6 separate memory memory handles
    ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool, alloc_size, MEMORY_TYPE_PINNED, 0,
                                              &(mem_handles[i])));
    addr_chunks[i] = ((uint8_t*)addr) + (i * alloc_size);
  }

  for (unsigned i = 0; i < NUM_BUFFERS; i++) {
    // Map each chunk in reverse order
    ASSERT_SUCCESS(hsa_amd_vmem_map(addr_chunks[i], alloc_size, 0, mem_handles[NUM_BUFFERS - i - 1],
                                    alloc_size));
  }

  hsa_amd_memory_access_desc_t permsAccess[] = {{HSA_ACCESS_PERMISSION_RW, gpuAgent}};

  ASSERT_SUCCESS(hsa_amd_vmem_set_access(addr, NUM_BUFFERS * alloc_size, permsAccess,
                                         ARRAY_SIZE(permsAccess)));

  for (unsigned i = 0; i < NUM_BUFFERS; i++) {
    // TODO Map them in opposite order
    ASSERT_SUCCESS(hsa_amd_vmem_unmap(addr_chunks[i], alloc_size));
  }

  ASSERT_SUCCESS(hsa_amd_vmem_address_free(addr, NUM_BUFFERS * alloc_size));
}

void VirtMemoryTestBasic::NonContiguousChunks(void) {
  hsa_status_t err;

  if (verbosity() > 0) PrintMemorySubtestHeader("GPU To GPU Access test");

  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus));

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  ASSERT_SUCCESS(hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus));

  for (unsigned int i = 0; i < gpus.size(); ++i) {
    hsa_amd_memory_pool_t gpu_pool;
    memset(&gpu_pool, 0, sizeof(gpu_pool));
    ASSERT_SUCCESS(
        hsa_amd_agent_iterate_memory_pools(gpus[i], rocrtst::GetGlobalMemoryPool, &gpu_pool));
    if (gpu_pool.handle == 0) {
      std::cout << "no global mempool in GPU agent" << std::endl;
      return;
    }
    NonContiguousChunks(cpus[0], gpus[i], gpu_pool);
  }
  if (verbosity() > 0) {
    std::cout << "    Subtest finished" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void VirtMemoryTestBasic::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  ASSERT_SUCCESS(rocrtst::SetDefaultAgents(this));
  ASSERT_SUCCESS(rocrtst::SetPoolsTypical(this));

  return;
}

void VirtMemoryTestBasic::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void VirtMemoryTestBasic::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); }

void VirtMemoryTestBasic::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void VirtMemoryTestBasic::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

VirtMemoryTestInterProcess::VirtMemoryTestInterProcess(void) : TestBase() {
  set_title("ROCr Virtual Memory Test - InterProcess ");
  set_description(" Tests Virtual Memory API with memory shared between two processes");
}

VirtMemoryTestInterProcess::~VirtMemoryTestInterProcess(void) {}

// See if the other process wrote an error value to the token; if not, write
// the newVal to the token.
static int CheckAndSetToken(std::atomic<int>* token, int newVal) {
  if (*token == -1) {
    return -1;
  } else {
    *token = newVal;
  }

  return 0;
}

static void ClearShared(SharedVirtMem* s) {
  s->token = 0;
  s->count = 0;
  s->size = 0;
  s->child_status = 0;
  s->parent_status = 0;
  memset(&s->sv, 0, sizeof(s->sv));
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void VirtMemoryTestInterProcess::SetUp(void) {
  hsa_status_t err;

  // We must fork process before doing HSA stuff, specifically, hsa_init, as
  // each process needs to do this.
  // Allocate linux shared_ memory.
  shared_ = reinterpret_cast<SharedVirtMem*>(mmap(
      nullptr, sizeof(SharedVirtMem), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
  ASSERT_NE(shared_, MAP_FAILED) << "mmap failed to allocated shared_ memory";

  // Initialize shared control block to zeros. The field "token"
  // is used to signal state changes between the 2 processes.
  ClearShared(shared_);

  if (socketpair(AF_UNIX, SOCK_DGRAM, 0, shared_->sv) != 0) {
    std::cout << "Failed to create Unix-domain socket pair" << std::endl;
    ASSERT_EQ(0, 1);
  }

  // Spawn second process and verify communication
  child_ = 0;
  child_ = fork();
  ASSERT_NE(-1, child_) << "fork failed";
  std::atomic<int>* token = &shared_->token;
  if (child_ != 0) {
    parentProcess_ = true;

    // Signal to other process we are waiting, and then wait...
    *token = 1;
    while (*token == 1) {
      sched_yield();
    }

    PROCESS_LOG("Second process observed, handshake...\n");
    *token = 1;
    while (*token == 1) {
      sched_yield();
    }

  } else {
    parentProcess_ = false;
    set_verbosity(0);
    PROCESS_LOG("Second process running.\n");

    while (*token == 0) {
      sched_yield();
    }

    int ret;
    ret = CheckAndSetToken(token, 0);
    ASSERT_EQ(0, ret) << "Error detected in child process\n";
    // Wait for handshake
    while (*token == 0) {
      sched_yield();
    }
    ret = CheckAndSetToken(token, 0);
    ASSERT_EQ(0, ret) << "Error detected in child process\n";
  }

  TestBase::SetUp();

  ASSERT_SUCCESS(rocrtst::SetDefaultAgents(this));
  ASSERT_SUCCESS(rocrtst::SetPoolsTypical(this));

  ASSERT_SUCCESS(hsa_amd_memory_pool_get_info(
      device_pool(), HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &min_gpu_mem_granule));

  ASSERT_SUCCESS(hsa_amd_memory_pool_get_info(
      device_pool(), HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE, &rec_gpu_mem_granule));

  return;
}

void VirtMemoryTestInterProcess::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();

  // Note: Close() (and hsa_shut_down()) will be called from main()
  // processOne is true for parent process, false for child process
  if (parentProcess_) {
    ParentProcessImpl();
  } else {
    ChildProcessImpl();
    exit(0);
  }
}

void VirtMemoryTestInterProcess::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); }

void VirtMemoryTestInterProcess::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void VirtMemoryTestInterProcess::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}

/* Send the dmabuf_fd to another process via Unix socket */
int VirtMemoryTestInterProcess::SendDmaBufFd(int socket, int dmabuf_fd) {
  char* iov_str = (char*)"rocrtst";
  struct msghdr msg = {0};
  char buf[CMSG_SPACE(sizeof(dmabuf_fd))];

  memset(buf, '\0', sizeof(buf));

  struct iovec io = {.iov_base = iov_str, .iov_len = strlen(iov_str)};

  msg.msg_iov = &io;
  msg.msg_iovlen = 1;
  msg.msg_control = buf;
  msg.msg_controllen = sizeof(buf);

  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
  cmsg->cmsg_level = SOL_SOCKET;
  cmsg->cmsg_type = SCM_RIGHTS;
  cmsg->cmsg_len = CMSG_LEN(sizeof(dmabuf_fd));

  // memmove(CMSG_DATA(cmsg), &dmabuf_fd, sizeof(dmabuf_fd));
  memcpy(CMSG_DATA(cmsg), &dmabuf_fd, sizeof(dmabuf_fd));

  msg.msg_controllen = CMSG_SPACE(sizeof(dmabuf_fd));

  size_t sent = sendmsg(socket, &msg, 0);

  return (sent < 0) ? -1 : 0;
}

/* Receive the dmabuf_fd to from process via Unix socket */
int VirtMemoryTestInterProcess::ReceiveDmaBufFd(int socket) {
  struct msghdr msg = {0};

  /* On Mac OS X, the struct iovec is needed, even if it points to minimal data */
  char m_buffer[1];
  struct iovec io = {.iov_base = m_buffer, .iov_len = sizeof(m_buffer)};
  msg.msg_iov = &io;
  msg.msg_iovlen = 1;

  char c_buffer[256];
  msg.msg_control = c_buffer;
  msg.msg_controllen = sizeof(c_buffer);

  size_t rcv = recvmsg(socket, &msg, 0);
  if (rcv < 0) return -1;

  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);

  int fd;
  memmove(&fd, CMSG_DATA(cmsg), sizeof(fd));

  return fd;
}

void VirtMemoryTestInterProcess::ParentProcessImpl() {
  hsa_status_t err;

  void* addrRange = NULL;

  bool supp = false;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 20 * rec_gpu_mem_granule, 0, 0));

  hsa_amd_vmem_alloc_handle_t exported_handle;
  ASSERT_SUCCESS(hsa_amd_vmem_handle_create(device_pool(), 20 * rec_gpu_mem_granule,
                                            MEMORY_TYPE_NONE, 0, &exported_handle));

  int dmabuf_fd;
  ASSERT_SUCCESS(hsa_amd_vmem_export_shareable_handle(&dmabuf_fd, exported_handle, 0));
  ASSERT_GE(dmabuf_fd, 0);

  // Signal child process that the gpu buffer is ready to read.
  PROCESS_LOG("Parent: Signalling child proces process\n");
  CheckAndSetToken(&shared_->token, 1);

  close(shared_->sv[1]);
  ASSERT_EQ(SendDmaBufFd(shared_->sv[0], dmabuf_fd), 0);

  hsa_amd_vmem_alloc_handle_t imported_handle;
  ASSERT_SUCCESS(hsa_amd_vmem_import_shareable_handle(dmabuf_fd, &imported_handle));

  /* Test importing same handle twice */
  hsa_amd_vmem_alloc_handle_t imported_handle2;
  ASSERT_SUCCESS(hsa_amd_vmem_import_shareable_handle(dmabuf_fd, &imported_handle2));
  ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * rec_gpu_mem_granule, 0, imported_handle, 0));
  ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * rec_gpu_mem_granule));
  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(imported_handle));
  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(imported_handle2));

  PROCESS_LOG("Parent: Waiting for child process to signal\n");
  while (shared_->token == 1) {
    sched_yield();
  }
  if (shared_->token != 2) {
    shared_->token = -1;
  }
  FORK_ASSERT_EQ(2, shared_->token, "Parent: Error detected in signaling token\n");
  PROCESS_LOG("Parent: Waking upon signal from child process\n");

  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(exported_handle));

  ASSERT_SUCCESS(hsa_amd_vmem_address_free(addrRange, 20 * rec_gpu_mem_granule));

  PROCESS_LOG("Parent: Virtual Memory test PASSED\n");
}

void VirtMemoryTestInterProcess::ChildProcessImpl() {
  int dmabuf_fd = -1;
  bool supp = false;
  hsa_status_t err;
  ASSERT_SUCCESS(hsa_system_get_info(HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED, (void*)&supp));
  if (!supp) {
    if (verbosity() > 0) {
      std::cout << "    Virtual Memory API not supported on this system - Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

  void* addrRange = NULL;
  ASSERT_SUCCESS(hsa_amd_vmem_address_reserve(&addrRange, 20 * rec_gpu_mem_granule, 0, 0));

  // Yield until shared token value changes i.e. is updated by parent.
  // Validate parent's update is per expectation
  PROCESS_LOG("Child: Waiting for parent process to signal\n");
  while (shared_->token == 0) {
    sched_yield();
  }
  if (shared_->token != 1) {
    shared_->token = -1;
  }
  FORK_ASSERT_EQ(1, shared_->token, "Child: Error detected in signaling token\n");
  PROCESS_LOG("Child: Waking upon signal from parent process\n");

  close(shared_->sv[0]);
  dmabuf_fd = ReceiveDmaBufFd(shared_->sv[1]);

  hsa_amd_vmem_alloc_handle_t imported_handle;
  ASSERT_SUCCESS(hsa_amd_vmem_import_shareable_handle(dmabuf_fd, &imported_handle));
  ASSERT_SUCCESS(hsa_amd_vmem_map(addrRange, 10 * rec_gpu_mem_granule, 0, imported_handle, 0));
  ASSERT_SUCCESS(hsa_amd_vmem_unmap(addrRange, 10 * rec_gpu_mem_granule));

  PROCESS_LOG("Child: Signalling parent process\n");
  CheckAndSetToken(&shared_->token, 2);

  ASSERT_SUCCESS(hsa_amd_vmem_handle_release(imported_handle));

  PROCESS_LOG("Child: Virtual Memory test PASSED\n");
}


================================================
FILE: rocrtst/suites/functional/virtual_memory.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2022, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_FUNCTIONAL_VIRTUAL_MEMORY_H_
#define ROCRTST_SUITES_FUNCTIONAL_VIRTUAL_MEMORY_H_

#include <atomic>

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class VirtMemoryTestBasic : public TestBase {
 public:
  VirtMemoryTestBasic();

  // @Brief: Destructor for test case of VirtMemoryTestBasic
  virtual ~VirtMemoryTestBasic();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  void TestCreateDestroy(void);
  void TestRefCount(void);
  void TestPartialMapping(void);
  void NonContiguousChunks(void);
  void GPUAccessToCPUMemoryTest(void);
  void CPUAccessToGPUMemoryTest(void);
  void GPUAccessToGPUMemoryTest(void);

 private:
  void TestCreateDestroy(hsa_agent_t agent, hsa_amd_memory_pool_t pool);
  void TestRefCount(hsa_agent_t agent, hsa_amd_memory_pool_t pool);
  void TestPartialMapping(hsa_agent_t agent, hsa_amd_memory_pool_t pool);
  void NonContiguousChunks(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
                           hsa_amd_memory_pool_t pool);

  void GPUAccessToCPUMemoryTest(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
                                hsa_amd_memory_pool_t pool);
  void CPUAccessToGPUMemoryTest(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
                                hsa_amd_memory_pool_t pool);
  void GPUAccessToGPUMemoryTest(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
                                hsa_amd_memory_pool_t pool);
};

struct SharedVirtMem {
  std::atomic<int> token;
  std::atomic<int> count;
  std::atomic<size_t> size;
  std::atomic<int> child_status;
  std::atomic<int> parent_status;

  int sv[2];
};

class VirtMemoryTestInterProcess : public TestBase {
 public:
  VirtMemoryTestInterProcess();

  // @Brief: Destructor for test case of VirtMemoryTest
  virtual ~VirtMemoryTestInterProcess();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  void ParentProcessImpl();
  void ChildProcessImpl();


 private:
  int SendDmaBufFd(int socket, int dmabuf_fd);
  int ReceiveDmaBufFd(int socket);

  int child_;
  SharedVirtMem* shared_;
  bool parentProcess_;
  size_t min_gpu_mem_granule; /* Minimum granularity */
  size_t rec_gpu_mem_granule; /* Recommented granularity */
};

#endif  // ROCRTST_SUITES_FUNCTIONAL_VIRTUAL_MEMORY_H_


================================================
FILE: rocrtst/suites/negative/memory_allocate_negative_tests.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>

#include "suites/negative/memory_allocate_negative_tests.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const uint32_t kNumBufferElements = 256;


#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}


MemoryAllocateNegativeTest::MemoryAllocateNegativeTest(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.

  set_title("RocR Memory Allocate Negative Test");
  set_description("This series of tests are Negative tests "
    "that do check memory allocation on GPU and CPU, "
    "i.e. requesting an allocation of more than max "
    "pool size or 0 size.");
}

MemoryAllocateNegativeTest::~MemoryAllocateNegativeTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryAllocateNegativeTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}

void MemoryAllocateNegativeTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void MemoryAllocateNegativeTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryAllocateNegativeTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void MemoryAllocateNegativeTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


static const char kSubTestSeparator[] = "  **************************";

static void PrintMemorySubtestHeader(const char *header) {
  std::cout << "  *** Memory Subtest: " << header << " ***" << std::endl;
}

static void PrintAgentNameAndType(hsa_agent_t agent) {
  hsa_status_t err;

  char ag_name[64];
  hsa_device_type_t ag_type;

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  std::cout << "  Agent: " << ag_name << " (";
  switch (ag_type) {
    case HSA_DEVICE_TYPE_CPU:
      std::cout << "CPU)";
      break;
    case HSA_DEVICE_TYPE_GPU:
      std::cout << "GPU)";
      break;
    case HSA_DEVICE_TYPE_DSP:
      std::cout << "DSP)";
      break;
    case HSA_DEVICE_TYPE_AIE:
      std::cout << "AIE)";
      break;
    }
  std::cout << std::endl;
  return;
}

static const int kMemoryAllocSize = 1024;

// This test verify that hsa_memory_allocate can't allocate
// memory more than HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE
void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(hsa_agent_t agent,
                                               hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
    PrintAgentNameAndType(agent);
  }

  // Determine if allocation is allowed in this pool
  if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0) {
    if (verbosity() > 0) {
      std::cout << "  Test not applicable. Skipping." << std::endl;
      std::cout << kSubTestSeparator << std::endl;
    }
    return;
  }

    char *memoryPtr;
  auto gran_sz = pool_i.alloc_granule;
  size_t max_size = pool_i.aggregate_alloc_max;
  err = hsa_amd_memory_pool_allocate(pool, (max_size + gran_sz), 0,
                                       reinterpret_cast<void**>(&memoryPtr));
    ASSERT_EQ(err, HSA_STATUS_ERROR_INVALID_ALLOCATION);
  return;
}


// This test verify that requesting an allocation
// of 0 size is valid on memory pool or not
void MemoryAllocateNegativeTest::ZeroMemoryAllocateTest(hsa_agent_t agent,
                                                hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
    PrintAgentNameAndType(agent);
  }

  // Determine if allocation is allowed in this pool
  bool alloc = false;
  err = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);

  if (alloc) {
    char *memoryPtr;
    err = hsa_amd_memory_pool_allocate(pool, 0, 0,
                                       reinterpret_cast<void**>(&memoryPtr));
    ASSERT_EQ(err, HSA_STATUS_ERROR_INVALID_ARGUMENT);
  }
  return;
}


void MemoryAllocateNegativeTest::MaxMemoryAllocateTest(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  PrintMemorySubtestHeader("MaxMemoryAllocateTest in Memory Pools");

  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      MaxMemoryAllocateTest(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryAllocateNegativeTest::ZeroMemoryAllocateTest(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  PrintMemorySubtestHeader("ZeroMemoryAllocateTest in Memory Pools");

  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      ZeroMemoryAllocateTest(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

static const uint32_t kMaxQueueSizeForAgent = 1024;
static const uint32_t kMaxQueue = 64;

typedef struct test_validation_data_t {
  bool cb_triggered;
  uint64_t expected_address;
} test_validation_data;

hsa_status_t CallbackSystemErrorHandling(const hsa_amd_event_t* event, void* data) {
  test_validation_data* user_data = reinterpret_cast<test_validation_data*>(data);

  if (event->event_type != HSA_AMD_GPU_MEMORY_ERROR_EVENT) {
    std::cout << "ERROR: Invalid error type" << std::endl;
    return HSA_STATUS_SUCCESS;
  }

  const hsa_amd_gpu_memory_error_info_t& error_info =
      reinterpret_cast<const hsa_amd_gpu_memory_error_info_t&>(event->memory_error);

  if (error_info.virtual_address != user_data->expected_address) {
    std::cout << "ERROR: Invalid virtual address" << std::endl;
    return HSA_STATUS_SUCCESS;
  }

  if (!(error_info.error_reason_mask & HSA_AMD_MEMORY_ERROR_MEMORY_IN_USE)) {
    std::cout << "ERROR: HSA_AMD_MEMORY_ERROR_MEMORY_IN_USE flag not set" << std::endl;
    return HSA_STATUS_SUCCESS;
  }

  user_data->cb_triggered = true;

  return HSA_STATUS_SUCCESS;
}


void MemoryAllocateNegativeTest::FreeQueueRingBufferTest(void) {
  hsa_status_t err;

  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
  set_kernel_file_name("dispatch_time_kernels.hsaco");
  set_kernel_name("empty_kernel");

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("RingBufferFree");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0; i < gpus.size(); ++i) {
    FreeQueueRingBufferTest(gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryAllocateNegativeTest::FreeQueueRingBufferTest(hsa_agent_t gpuAgent) {
  hsa_status_t err;

  auto enqueue_dispatch = [&](hsa_queue_t* queue) {
    hsa_signal_store_relaxed(aql().completion_signal, 1);

    aql().setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
    aql().workgroup_size_x = 1;
    aql().workgroup_size_y = 1;
    aql().workgroup_size_z = 1;

    aql().kernel_object = kernel_object();

    const uint32_t queue_mask = queue->size - 1;

    // Load index for writing header later to command queue at same index
    uint64_t index = hsa_queue_load_write_index_relaxed(queue);
    hsa_queue_store_write_index_relaxed(queue, index + 1);

    rocrtst::WriteAQLToQueueLoc(queue, index, &aql());
    aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                                   &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
                                       queue->base_address))[index & queue_mask]);


    // ringdoor bell
    hsa_signal_store_relaxed(queue->doorbell_signal, index);

    // wait for the signal long enough for the queue error handling callback to happen
    hsa_signal_value_t completion;
    completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                           0xffffff, HSA_WAIT_STATE_ACTIVE);
    // completion signal should be 0.
    return completion;
  };

  // Create the executable, get symbol by name and load the code object
  ASSERT_SUCCESS(rocrtst::LoadKernelFromObjFile(this, &gpuAgent));

  // Fill up the kernel packet except header
  ASSERT_SUCCESS(rocrtst::InitializeAQLPacket(this, &aql()));

  // get queue size
  uint32_t queue_max = 0;
  ASSERT_SUCCESS(hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max));

  // Adjust the size to the max of 1024
  queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max : kMaxQueueSizeForAgent;

  hsa_queue_t* queue[kMaxQueue];  // command queue
  uint32_t i;
  test_validation_data user_data = {};
  ASSERT_SUCCESS( hsa_amd_register_system_event_handler(CallbackSystemErrorHandling, &user_data));
  for (i = 0; i < kMaxQueue; ++i) {
    // create queue
    ASSERT_SUCCESS(hsa_queue_create(gpuAgent, kMaxQueueSizeForAgent, HSA_QUEUE_TYPE_SINGLE, NULL,
                                    NULL, 0, 0, &queue[i]));

    user_data.cb_triggered = false;
    user_data.expected_address = reinterpret_cast<uint64_t>(queue[i]->base_address);

    // Enqueue a dispatch and make sure completion signal is 0.
    ASSERT_EQ(enqueue_dispatch(queue[i]), 0);

    // Try to delete the Queue ring buffer, this should return error.
    // Note: This will leave the hsa-runtime internal allocation table in an inconsistent state
    // because hsa-runtime clean's up its internal allocation table before calling libhsakmt to try
    // to do the actual free. So when compiled in debug mode, this will trigger a "Can't find
    // address in allocation map" warning when hsa_queue_destroy is called afterwards. This is the
    // expected behavior because trying to re-organise hsa-runtime hsa_memory_free function to
    // handle this negative use-case is not worth it and the caller is expected to call abort in
    // their system error handler.

    ASSERT_NE(hsa_memory_free(queue[i]->base_address), HSA_STATUS_SUCCESS);

    // Make sure queue is still in a working state. Enqueue a second dispatch and make sure
    // completion signal is 0.
    ASSERT_EQ(enqueue_dispatch(queue[i]), 0);

    // Make sure CallbackSystemErrorHandling was called and memory event has valid info
    ASSERT_TRUE(user_data.cb_triggered);

    if (queue[i]) hsa_queue_destroy(queue[i]);
  }

  clear_code_object();
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/negative/memory_allocate_negative_tests.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_NEGATIVE_MEMORY_ALLOCATE_NEGATIVE_TESTS_H_
#define ROCRTST_SUITES_NEGATIVE_MEMORY_ALLOCATE_NEGATIVE_TESTS_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class MemoryAllocateNegativeTest : public TestBase {
 public:
    MemoryAllocateNegativeTest();

  // @Brief: Destructor for test case of MemoryTest
  virtual ~MemoryAllocateNegativeTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);


  // @Brief: This test verify that hsa_memory_allocate can't allocate
  // memory more than POOL_INFO_SIZE
  void MaxMemoryAllocateTest(void);

  // @Brief: This test verify that requesting an allocation
  // of 0 size is valid on memory pool or not
  void ZeroMemoryAllocateTest(void);

  // @Brief: This test verify that freeing a ring buffer used by a queue
  // will trigger an error
  void FreeQueueRingBufferTest(void);

 private:
  void MaxMemoryAllocateTest(hsa_agent_t agent,
                             hsa_amd_memory_pool_t pool);
  void ZeroMemoryAllocateTest(hsa_agent_t agent,
                             hsa_amd_memory_pool_t pool);

  void FreeQueueRingBufferTest(hsa_agent_t agent);
};

#endif  // ROCRTST_SUITES_NEGATIVE_MEMORY_ALLOCATE_NEGATIVE_TESTS_H_


================================================
FILE: rocrtst/suites/negative/queue_validation.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <string>

#include "suites/negative/queue_validation.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

static const uint32_t kMaxQueueSizeForAgent = 1024;
static const uint32_t kMaxQueue = 64;

typedef struct test_validation_data_t {
  bool cb_triggered;
  hsa_queue_t** queue_pointer;
  hsa_status_t  expected_status;
} test_validation_data;

static void CallbackQueueErrorHandling(hsa_status_t status, hsa_queue_t *source, void *data);

QueueValidation::QueueValidation(bool launch_InvalidDimension,
                                 bool launch_InvalidGroupMemory,
                                 bool launch_InvalidKernelObject,
                                 bool launch_InvalidPacket,
                                 bool launch_InvalidWorkGroupSize) :TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  std::string name;
  std::string desc;

  name = "RocR Queue Validation";
  desc = "This series of tests submit different negative aql packet into the queue"
         " and verifies that queue error handling callback called with proper exception.";

  if (launch_InvalidDimension) {
    name += " For InvalidDimension";
    desc += " This test verifies that if an aql packet specifies a dimension "
            " value above 3, the queue's error handling callback will trigger";
  } else if (launch_InvalidGroupMemory) {
    name += " For InvalidGroupMemory";
    desc += " This test verifies that if an aql packet specifies an invalid group"
            " memory size, the queue's error handling.";
  } else if (launch_InvalidKernelObject) {
    name += " ForInvalidKernelObject";
    desc += " This test verifies that if an aql packet specifies an invalid"
            " kernel object, the queue's error handling callback will trigger.";
  } else if (launch_InvalidPacket) {
    name += " For InvalidPacket";
    desc += " This test verifies that if an aql packet is invalid (bad packet type),"
            " the queue's error handling callback will trigger.";
  } else if (launch_InvalidWorkGroupSize) {
    name += " For InvalidWorkGroupSize";
    desc += " This test verifies that if an aql packet specifies an invalid"
            " workgroup size, the queue's error handling callback will trigger.";
  }
  set_title(name);
  set_description(desc);

  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
  set_kernel_file_name("dispatch_time_kernels.hsaco");
  set_kernel_name("empty_kernel");
}

QueueValidation::~QueueValidation(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void QueueValidation::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  /* The queue exceptions will trigger a coredump. Set the limit to 0 to disable  */
  if (getrlimit(RLIMIT_CORE, &rlimit_)) {
    perror("Could not get system rlimit\n");
  } else {
    struct rlimit rlimit_set;

    rlimit_set.rlim_cur = 0;
    rlimit_set.rlim_max = 0;

    /* Do not error if system does not allow disabling limit */
    if (setrlimit(RLIMIT_CORE, &rlimit_set))
      perror("Could not set core file size\n");
  }

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  return;
}

void QueueValidation::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void QueueValidation::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void QueueValidation::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void QueueValidation::Close() {
  /* Restore rlimit to initial value before test - do not error if fails */
  if (setrlimit(RLIMIT_CORE, &rlimit_))
      perror("Could not set core file size\n");

  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


static const char kSubTestSeparator[] = "  **************************";

static void PrintDebugSubtestHeader(const char *header) {
  std::cout << "  *** QueueValidation Subtest: " << header << " ***" << std::endl;
}

void QueueValidation::QueueValidationForInvalidDimension(hsa_agent_t cpuAgent,
                                            hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Create the executable, get symbol by name and load the code object
  err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // get queue size
  uint32_t queue_max = 0;
  err = hsa_agent_get_info(gpuAgent,
                           HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Adjust the size to the max of 1024
  queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;

  hsa_queue_t *queue[kMaxQueue];  // command queue
  uint32_t ii;
  test_validation_data user_data[kMaxQueue];
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // set callback flag to false if callback called then it will change to true
    user_data[ii].cb_triggered = false;
    // set the queue pointer
    user_data[ii].queue_pointer = &queue[ii];
    // set the expected status in queue error calback handling
    user_data[ii].expected_status = HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;

    // create queue
    err = hsa_queue_create(gpuAgent,
                       queue_max, HSA_QUEUE_TYPE_SINGLE,
                       CallbackQueueErrorHandling, &user_data[ii], 0, 0, &queue[ii]);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // setting the dimesion more than 3
    aql().setup = 4;
    aql().kernel_object = kernel_object();
    const uint32_t queue_mask = queue[ii]->size - 1;

    // Load index for writing header later to command queue at same index
    uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
    hsa_queue_store_write_index_relaxed(queue[ii], index + 1);

    rocrtst::WriteAQLToQueueLoc(queue[ii], index, &aql());

    aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

    void* q_base = queue[ii]->base_address;
    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);


    // ringdoor bell
    hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);

    // wait for the signal long enough for the queue error handling callback to happen
    hsa_signal_value_t completion;
    completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                           0xffffff, HSA_WAIT_STATE_ACTIVE);
    // completion signal should not be changed.
    ASSERT_EQ(completion, 1);

    hsa_signal_store_relaxed(aql().completion_signal, 1);
  }
  sleep(1);
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // queue error handling callback  should be triggered
    ASSERT_EQ(user_data[ii].cb_triggered, true);
    if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
  }

  clear_code_object();
}


void QueueValidation::QueueValidationInvalidGroupMemory(hsa_agent_t cpuAgent,
                                            hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Create the executable, get symbol by name and load the code object
  err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // get queue size
  uint32_t queue_max = 0;
  err = hsa_agent_get_info(gpuAgent,
                           HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Adjust the size to the max of 1024
  queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;

  hsa_queue_t *queue[kMaxQueue];  // command queue
  test_validation_data user_data[kMaxQueue];

  uint32_t ii;
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // set callback flag to false if callback called then it will change to true
    user_data[ii].cb_triggered = false;
    // set the queue pointer
    user_data[ii].queue_pointer = &queue[ii];
    // set the expected status in queue error calback handling
    user_data[ii].expected_status = HSA_STATUS_ERROR_INVALID_ALLOCATION;

    // create queue
    err = hsa_queue_create(gpuAgent,
                       queue_max, HSA_QUEUE_TYPE_SINGLE,
                       CallbackQueueErrorHandling, &user_data[ii], 0, 0, &queue[ii]);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    aql().kernel_object = kernel_object();
    // Request a large group memory segment size
    aql().group_segment_size = (uint32_t)-1;

    const uint32_t queue_mask = queue[ii]->size - 1;

    // Load index for writing header later to command queue at same index
    uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
    hsa_queue_store_write_index_relaxed(queue[ii], index + 1);

    rocrtst::WriteAQLToQueueLoc(queue[ii], index, &aql());

    aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

    void* q_base = queue[ii]->base_address;
    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);


    // ringdoor bell
    hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);

    // wait for the signal long enough for the queue error handling callback to happen
    hsa_signal_value_t completion;
    completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                           0xffffff, HSA_WAIT_STATE_ACTIVE);
    // completion signal should not be changed.
    ASSERT_EQ(completion, 1);

    hsa_signal_store_relaxed(aql().completion_signal, 1);
  }
  sleep(1);
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // queue error handling callback  should be triggered
    ASSERT_EQ(user_data[ii].cb_triggered, true);
    if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
  }

  clear_code_object();
}

void QueueValidation::QueueValidationForInvalidKernelObject(hsa_agent_t cpuAgent,
                                            hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Create the executable, get symbol by name and load the code object
  err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);


  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // get queue size
  uint32_t queue_max = 0;
  err = hsa_agent_get_info(gpuAgent,
                           HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Adjust the size to the max of 1024
  queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;

  hsa_queue_t *queue[kMaxQueue];  // command queue
  test_validation_data user_data[kMaxQueue];
  uint32_t ii;
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // set callback flag to false if callback called then it will change to true
    user_data[ii].cb_triggered = false;
    // set the queue pointer
    user_data[ii].queue_pointer = &queue[ii];
    // set the expected status in queue error calback handling
    user_data[ii].expected_status = HSA_STATUS_ERROR_INVALID_CODE_OBJECT;

    // create queue
    err = hsa_queue_create(gpuAgent,
                           kMaxQueueSizeForAgent, HSA_QUEUE_TYPE_SINGLE,
                           CallbackQueueErrorHandling, &user_data[ii], 0, 0, &queue[ii]);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // setting the null code object
    aql().kernel_object = 0;

    const uint32_t queue_mask = queue[ii]->size - 1;

    // Load index for writing header later to command queue at same index
    uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
    hsa_queue_store_write_index_relaxed(queue[ii], index + 1);

    rocrtst::WriteAQLToQueueLoc(queue[ii], index, &aql());

    aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                 HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

    void* q_base = queue[ii]->base_address;
    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);


    // ringdoor bell
    hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);

    // wait for the signal long enough for the queue error handling callback to happen
    hsa_signal_value_t completion;
    completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                           0xffffff, HSA_WAIT_STATE_ACTIVE);
    // completion signal should not be changed.
    ASSERT_EQ(completion, 1);

    hsa_signal_store_relaxed(aql().completion_signal, 1);
  }
  sleep(1);
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // queue error handling callback  should be triggered
    ASSERT_EQ(user_data[ii].cb_triggered, true);
    if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
  }

  clear_code_object();
}

void QueueValidation::QueueValidationForInvalidPacket(hsa_agent_t cpuAgent,
                                            hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Create the executable, get symbol by name and load the code object
  err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // get queue size
  uint32_t queue_max = 0;
  err = hsa_agent_get_info(gpuAgent,
                           HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Adjust the size to the max of 1024
  queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;

  hsa_queue_t *queue[kMaxQueue];  // command queue
  uint32_t ii;
  test_validation_data user_data[kMaxQueue];
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // set callback flag to false if callback called then it will change to true
    user_data[ii].cb_triggered = false;
    // set the queue pointer
    user_data[ii].queue_pointer = &queue[ii];
    // set the expected status in queue error calback handling
    user_data[ii].expected_status = HSA_STATUS_ERROR_INVALID_PACKET_FORMAT;

    // create queue
    err = hsa_queue_create(gpuAgent,
                       queue_max, HSA_QUEUE_TYPE_SINGLE,
                       CallbackQueueErrorHandling, &user_data[ii], 0, 0, &queue[ii]);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    const uint32_t queue_mask = queue[ii]->size - 1;

    // Load index for writing header later to command queue at same index
    uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
    hsa_queue_store_write_index_relaxed(queue[ii], index + 1);

    rocrtst::WriteAQLToQueueLoc(queue[ii], index, &aql());
    // setting the invalid packet type
    aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
    aql().header |=  0xFFFF << HSA_PACKET_HEADER_TYPE;
    aql().kernel_object = kernel_object();

    void* q_base = queue[ii]->base_address;
    // Set the Aql packet header
    rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                        &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                            (q_base))[index & queue_mask]);


    // ringdoor bell
    hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);

    // wait for the signal long enough for the queue error handling callback to happen
    hsa_signal_value_t completion;
    completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                           0xffffff, HSA_WAIT_STATE_ACTIVE);
    // completion signal should not be changed.
    ASSERT_EQ(completion, 1);

    hsa_signal_store_relaxed(aql().completion_signal, 1);
  }
  sleep(1);
  for (ii = 0; ii < kMaxQueue; ++ii) {
    // queue error handling callback  should be triggered
    ASSERT_EQ(user_data[ii].cb_triggered, true);
    if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
  }

  clear_code_object();
}

void QueueValidation::QueueValidationForInvalidWorkGroupSize(hsa_agent_t cpuAgent,
                                            hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // Create the executable, get symbol by name and load the code object
  err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // get queue size
  uint32_t queue_max = 0;
  err = hsa_agent_get_info(gpuAgent,
                           HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Adjust the size to the max of 1024
  queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;

  hsa_queue_t *queue[kMaxQueue];  // command queue
  test_validation_data user_data[kMaxQueue][3];
  uint32_t ii;
  for (ii = 0; ii < kMaxQueue; ++ii) {
    uint32_t jj;
    for (jj = 1; jj <= 3; ++jj) {
      // set callback flag to false if callback called then it will change to true
      user_data[ii][jj - 1].cb_triggered = false;
      // set the queue pointer
      user_data[ii][jj - 1].queue_pointer = &queue[ii];
      // set the expected status in queue error calback handling
      user_data[ii][jj - 1].expected_status = HSA_STATUS_ERROR_INVALID_ARGUMENT;

      // create queue
      err = hsa_queue_create(gpuAgent,
              kMaxQueueSizeForAgent, HSA_QUEUE_TYPE_SINGLE,
              CallbackQueueErrorHandling, &user_data[ii][jj - 1], 0, 0, &queue[ii]);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);

      aql().setup |= jj << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
      aql().workgroup_size_x = (jj == 1) ? (uint16_t)-1 : 1;
      aql().workgroup_size_y = (jj == 2) ? (uint16_t)-1 : 1;
      aql().workgroup_size_z = (jj == 3) ? (uint16_t)-1 : 1;

      aql().kernel_object = kernel_object();

      const uint32_t queue_mask = queue[ii]->size - 1;

      // Load index for writing header later to command queue at same index
      uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
      hsa_queue_store_write_index_relaxed(queue[ii], index + 1);

      rocrtst::WriteAQLToQueueLoc(queue[ii], index, &aql());
      aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
      aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                    HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
      aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
                    HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

      void* q_base = queue[ii]->base_address;
      // Set the Aql packet header
      rocrtst::AtomicSetPacketHeader(aql().header, aql().setup,
                          &(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
                          (q_base))[index & queue_mask]);


      // ringdoor bell
      hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);

      // wait for the signal long enough for the queue error handling callback to happen
      hsa_signal_value_t completion;
      completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
                                             0xffffff, HSA_WAIT_STATE_ACTIVE);
      // completion signal should not be changed.
      ASSERT_EQ(completion, 1);

      hsa_signal_store_relaxed(aql().completion_signal, 1);
      if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
    }
  }
  sleep(1);
  for (uint32_t ii = 0; ii < kMaxQueue; ++ii) {
    for (uint32_t jj = 0; jj < 3; ++jj) {
      // queue error handling callback  should be triggered
      ASSERT_EQ(user_data[ii][jj].cb_triggered, true);
    }
  }

  clear_code_object();
}


void QueueValidation::QueueValidationForInvalidDimension(void) {
  hsa_status_t err;
  if (verbosity() > 0) {
    PrintDebugSubtestHeader("InvalidDimensionTest");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueValidationForInvalidDimension(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void QueueValidation::QueueValidationInvalidGroupMemory(void) {
  hsa_status_t err;

  if (verbosity() > 0) {
    PrintDebugSubtestHeader("InvalidGroupMemory");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueValidationInvalidGroupMemory(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void QueueValidation::QueueValidationForInvalidKernelObject(void) {
  hsa_status_t err;

  if (verbosity() > 0) {
    PrintDebugSubtestHeader("InvalidKernelObject");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueValidationForInvalidKernelObject(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void QueueValidation::QueueValidationForInvalidPacket(void) {
  hsa_status_t err;

  if (verbosity() > 0) {
    PrintDebugSubtestHeader("InvalidPacket");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueValidationForInvalidPacket(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void QueueValidation::QueueValidationForInvalidWorkGroupSize(void) {
  hsa_status_t err;

  if (verbosity() > 0) {
    PrintDebugSubtestHeader("InvalidWorkGroupSize");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueValidationForInvalidWorkGroupSize(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}


void CallbackQueueErrorHandling(hsa_status_t status, hsa_queue_t* source, void* data) {
  ASSERT_NE(source, nullptr);
  ASSERT_NE(data, nullptr);

  test_validation_data *debug_data = reinterpret_cast<test_validation_data*>(data);
  hsa_queue_t * queue  = *(debug_data->queue_pointer);
  debug_data->cb_triggered = true;
  // check the status
  ASSERT_EQ(status, debug_data->expected_status);
  // check the queue id and user data
  ASSERT_EQ(source->id, queue->id);
  return;
}


================================================
FILE: rocrtst/suites/negative/queue_validation.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_NEGATIVE_QUEUE_VALIDATION_H_
#define ROCRTST_SUITES_NEGATIVE_QUEUE_VALIDATION_H_

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"
#include <sys/resource.h>


class QueueValidation : public TestBase {
 public:
    QueueValidation(bool launch_InvalidDimension,
                    bool launch_InvalidGroupMemory,
                    bool launch_InvalidKernelObject,
                    bool launch_InvalidPacket,
                    bool launch_InvalidWorkGroupSize);

  // @Brief: Destructor for test case of MemoryTest
  virtual ~QueueValidation();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // @Brief: Verifies that if an aql packet specifies a dimension
  // value above 3, the queue's error handling callback will trigger
  void QueueValidationForInvalidDimension(void);
  // @Brief: Verifies that if an aql packet specifies an invalid group
  // memory size, the queue's error handling
  void QueueValidationInvalidGroupMemory(void);
  // @Brief: Verifies that if an aql packet specifies an invalid
  // kernel object, the queue's error handling callback will trigger.
  void QueueValidationForInvalidKernelObject(void);
  // @Brief: Verifies that if an aql packet is invalid (bad packet type),
  // the queue's error handling callback will trigger
  void QueueValidationForInvalidPacket(void);
  // @Brief: Verifies that if an aql packet specifies an invalid
  // workgroup size, the queue's error handling callback will trigger.
  void QueueValidationForInvalidWorkGroupSize(void);


 private:
  struct rlimit rlimit_; //value of rlimit before test starts

  void QueueValidationForInvalidDimension(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
  void QueueValidationInvalidGroupMemory(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
  void QueueValidationForInvalidKernelObject(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
  void QueueValidationForInvalidPacket(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
  void QueueValidationForInvalidWorkGroupSize(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
};

#endif  // ROCRTST_SUITES_NEGATIVE_QUEUE_VALIDATION_H_


================================================
FILE: rocrtst/suites/performance/dispatch_time.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <algorithm>
#include <string>

#include "suites/performance/dispatch_time.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/os.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

DispatchTime::
DispatchTime(bool defaultInterrupt, bool launchSingleKernel) : TestBase(),
              use_default_interupt_(defaultInterrupt),
                                          launch_single_(launchSingleKernel) {
  queue_size_ = 0;
#ifdef ROCRTST_EMULATOR_BUILD
  num_batch_ = 2;
  set_num_iteration(1);
#else
  num_batch_ = 100000;
  set_num_iteration(100);
#endif

  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
  dispatch_time_mean_ = 0.0;

  set_kernel_file_name("dispatch_time_kernels.hsaco");
  set_kernel_name("empty_kernel");

  std::string name;
  std::string desc;

  name = "Average Dispatch Time";
  desc = "This test measures the time to handle AQL packets that "
      "do no work. Time is measured from when the packet is made available to"
      " the Command Processor to when the target agent notifies the host that "
      "the packet has been executed.  ";

  if (defaultInterrupt) {
    name += ", Default Interrupts";
    desc += "Interrupts are controlled by HSA_ENABLE_INTERRUPT environment "
                                                                "variable. ";
  } else {
    name += ", Interrupts Enabled";
    desc += "Interrupts are enabled. ";
  }

  if (launchSingleKernel) {
    name += ", Single Kernel";
    desc += " One kernel at a time is and executed.";
  } else {
    name += ", Multiple Kernels";
    desc += " Enough kernels to fill the queue are dispatched at one time";
  }

  set_title(name);
  set_description(desc);
}

DispatchTime::~DispatchTime() {
}

void DispatchTime::SetUp() {
  hsa_status_t err;

  // This need to happen before TestBase::SetUp()
  if (use_default_interupt_) {
    set_enable_interrupt(false);
  } else {
    set_enable_interrupt(true);
  }

  TestBase::SetUp();
  // If it indicates to use default signal, set env var properly

  err = SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  hsa_agent_t* gpu_dev = gpu_device1();

  // Create a queue
  hsa_queue_t* q = nullptr;
  rocrtst::CreateQueue(*gpu_dev, &q);
  ASSERT_NE(q, nullptr);
  set_main_queue(q);

  // Here, modify the batch size if it is larger than the queue size
  if (!launch_single_) {
    hsa_status_t err;
    uint32_t size = 0;
    err = hsa_agent_get_info(*gpu_dev, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    num_batch_ = num_batch_ > size ? size : num_batch_;
  }

  err = rocrtst::LoadKernelFromObjFile(this, gpu_dev);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet except header
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  aql().workgroup_size_x = 1;
  aql().grid_size_x = 1;
}

void DispatchTime::Run() {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
  if (launch_single_) {
    RunSingle();
  } else {
    RunMulti();
  }
}

size_t DispatchTime::RealIterationNum() {
  return num_iteration() * 1.2 + 1;
}

void DispatchTime::RunSingle() {
  std::vector<double> timer;

  uint32_t it = RealIterationNum();
  const uint32_t queue_mask = main_queue()->size - 1;

  // queue should be empty
  ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
            hsa_queue_load_write_index_scacquire(main_queue()));

  hsa_kernel_dispatch_packet_t *q_base_addr =
      reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                                 (main_queue()->base_address);

  if (it > main_queue()->size) {
    it = main_queue()->size;
  }
  for (uint32_t i = 0; i < it; i++) {
    // Obtain the current queue write index.
    uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);

    // Write the aql packet at the calculated queue index address.
    rocrtst::WriteAQLToQueueLoc(main_queue(), index, &aql());

    // Get timing stamp and ring the doorbell to dispatch the kernel.
    rocrtst::PerfTimer p_timer;
    int id = p_timer.CreateTimer();
    p_timer.StartTimer(id);

    rocrtst::AtomicSetPacketHeader(
        HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE,
        aql().setup,
        reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                        (&(q_base_addr)[index & queue_mask]));

    hsa_signal_store_screlease(main_queue()->doorbell_signal, index);

    // Wait on the dispatch signal until the kernel is finished.
    while (hsa_signal_wait_scacquire(aql().completion_signal,
         HSA_SIGNAL_CONDITION_LT, 1, (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE)) {
    }

    p_timer.StopTimer(id);

    timer.push_back(p_timer.ReadTimer(id));
    hsa_signal_store_screlease(aql().completion_signal, 1);

    if (verbosity() >= VERBOSE_PROGRESS) {
      std::cout << ".";
      fflush(stdout);
    }
  }

  if (verbosity() >= VERBOSE_PROGRESS) {
    std::cout << std::endl;
  }

  // Abandon the first result and after sort, delete the last 2% value
  timer.erase(timer.begin());
  std::sort(timer.begin(), timer.end());

  timer.erase(timer.begin() + num_iteration(), timer.end());

  dispatch_time_mean_ = rocrtst::CalcMean(timer);

  return;
}

void DispatchTime::RunMulti() {
  std::vector<double> timer;
  int it = RealIterationNum();
  const uint32_t queue_mask = main_queue()->size - 1;
  hsa_kernel_dispatch_packet_t *q_base_addr =
      reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                                 (main_queue()->base_address);

  // queue should be empty
  ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
            hsa_queue_load_write_index_scacquire(main_queue()));

  rocrtst::PerfTimer p_timer;

  for (int i = 0; i < it; i++) {
    uint64_t* index =
           reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * num_batch_));

    ASSERT_NE(index, nullptr);

    hsa_signal_store_screlease(aql().completion_signal, num_batch_);

    for (uint32_t j = 0; j < num_batch_; j++) {
      // index[j] = hsa_queue_add_write_index_scacq_screl(main_queue(), 1);
      index[j] = hsa_queue_add_write_index_relaxed(main_queue(), 1);

      // Write the aql packet at the calculated queue index address.
      rocrtst::WriteAQLToQueueLoc(main_queue(), index[j], &aql());
    }

    rocrtst::AtomicSetPacketHeader(
        (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
        (1 << HSA_PACKET_HEADER_BARRIER),
        aql().setup,
        reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                          (&q_base_addr[index[num_batch_ - 1] & queue_mask]));

    // Set packet header reversly; set all headers except the very first
    // one, for now.
    for (uint32_t j = num_batch_ - 1; j > 0; j--) {
      rocrtst::AtomicSetPacketHeader(
          HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE,
          aql().setup,
          reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                      (&q_base_addr[index[j] & queue_mask]));
    }

    // Get timing stamp and ring the doorbell to dispatch the kernel.
    int id = p_timer.CreateTimer();
    p_timer.StartTimer(id);
    // Set the very first header...
    rocrtst::AtomicSetPacketHeader(
        HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE,
        aql().setup,
        reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                     (&(q_base_addr)[index[0] & queue_mask]));

    hsa_signal_store_screlease(main_queue()->doorbell_signal, index[num_batch_ - 1]);

    // Wait on the dispatch signal until the kernel is finished.
    while (hsa_signal_wait_scacquire(aql().completion_signal,
        HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) {
    }

    p_timer.StopTimer(id);

    timer.push_back(p_timer.ReadTimer(id));
    hsa_signal_store_screlease(aql().completion_signal, 1);

    free(index);

    if (verbosity() >= VERBOSE_PROGRESS) {
      std::cout << ".";
      fflush(stdout);
    }
  }

  std::cout << std::endl;

  // Abandon the first result and after sort, delete the last 2% value
  timer.erase(timer.begin());
  std::sort(timer.begin(), timer.end());

  timer.erase(timer.begin() + num_iteration(), timer.end());

  dispatch_time_mean_ = rocrtst::CalcMean(timer);

  return;
}

void DispatchTime::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void DispatchTime::DisplayResults(void) const {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::DisplayResults();

  std::cout << "Average Time to Completion: ";
  if (launch_single_) {
    std::cout << dispatch_time_mean_ * 1e6;
  } else {
    std::cout << dispatch_time_mean_ * 1e6 / num_batch_;
  }

  std::cout << " uS" << std::endl;
  return;
}

void DispatchTime::Close() {
  TestBase::Close();
  return;
}


================================================
FILE: rocrtst/suites/performance/dispatch_time.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_PERFORMANCE_DISPATCH_TIME_H_
#define ROCRTST_SUITES_PERFORMANCE_DISPATCH_TIME_H_
#include <vector>

#include "suites/test_common/test_base.h"
#include "common/base_rocr.h"
#include "common/common.h"
#include "hsa/hsa.h"

// @Brief: This class is defined to measure the mean latency of launching
// an empty kernel

class DispatchTime : public TestBase {
 public:
  // @Brief: Constructor
  DispatchTime(bool defaultInterrupt, bool launchSingleKernel);

  // @Brief: Destructor
  virtual ~DispatchTime(void);

  // @Brief: Set up the environment for the test
  virtual void SetUp(void);

  // @Brief: Run the test case
  virtual void Run(void);

  // @Brief: Display  results we got
  virtual void DisplayResults(void) const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // @Brief: Clean up and close the runtime
  virtual void Close(void);

 private:
  // @Brief: Get actual iteration number
  virtual size_t RealIterationNum(void);

  // @Brief: Launch single packet each time
  virtual void RunSingle(void);

  // @Brief: Launch multiple packets each time
  virtual void RunMulti(void);

  // @Brief: Indicate if use default signal or not
  bool use_default_interupt_;

  // @Brief: Indicate if launch single kernel or not
  bool launch_single_;

  // @Brief: Store the size of queue
  uint32_t queue_size_;

  // @Brief: Number of packets in a batch
  uint32_t num_batch_;

  // @Brief: Ave. dispatch time
  double dispatch_time_mean_;

  char* orig_iterrupt_env_;
};

#endif  // ROCRTST_SUITES_PERFORMANCE_DISPATCH_TIME_H_


================================================
FILE: rocrtst/suites/performance/enqueueLatency.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#include <fcntl.h>
#include <algorithm>
#include <string>

#include "suites/performance/enqueueLatency.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/os.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

EnqueueLatency::
EnqueueLatency(bool enqueueSinglePacket) : TestBase(),
                                    enqueue_single_(enqueueSinglePacket) {
  queue_size_ = 0;
#if ROCRTST_EMULATOR_BUILD
  num_of_pkts_ = 2;
  set_num_iteration(1);
#else
  num_of_pkts_ = 100000;
  set_num_iteration(100);
#endif

  memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
  enqueue_time_mean_ = 0.0;

  std::string name;
  std::string desc;

  name = "Average Enqueue Time";
  desc = "This test measures the time when the packet enqueue to the"
      " queue and before the door bell is ring to notify the command processor "
      "to execute the packet";


  if (enqueueSinglePacket) {
    name += ", Single Packet";
    desc += " One Packet at a time in queue.";
  } else {
    name += ", Multiple Packets";
    desc += " Multiple i.e. maximum Packets equeued to queue at one time";
  }

  set_title(name);
  set_description(desc);
}

EnqueueLatency::~EnqueueLatency() {
}

void EnqueueLatency::SetUp() {
  hsa_status_t err;
  TestBase::SetUp();
  // If it indicates to use default signal, set env var properly

  err = SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
}

void EnqueueLatency::Run() {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }
  hsa_status_t err;
  TestBase::Run();

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    hsa_agent_t* gpu_dev = &gpus[i];
    char agent_name[64];
    err = hsa_agent_get_info(*gpu_dev, HSA_AGENT_INFO_NAME, agent_name);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    set_agent_name(agent_name);

    // Create a queue
    hsa_queue_t* q = nullptr;
    rocrtst::CreateQueue(*gpu_dev, &q);
    ASSERT_NE(q, nullptr);
    set_main_queue(q);

    set_kernel_file_name("dispatch_time_kernels.hsaco");
    set_kernel_name("empty_kernel");
    err = rocrtst::LoadKernelFromObjFile(this, gpu_dev);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // Fill up the kernel packet except header
    err = rocrtst::InitializeAQLPacket(this, &aql());
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    aql().workgroup_size_x = 1;
    aql().grid_size_x = 1;

    // Here, modify the batch size if it is larger than the queue size
    if (enqueue_single_) {
      EnqueueSinglePacket();
    } else {
      hsa_status_t err;
      uint32_t size = 0;
      err = hsa_agent_get_info(*gpu_dev, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &size);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);

      num_of_pkts_ = num_of_pkts_ > size ? size : num_of_pkts_;
      EnqueueMultiPackets();
    }
    hsa_queue_destroy(q);
    set_main_queue(nullptr);
  }
}


size_t EnqueueLatency::RealIterationNum() {
  return num_iteration() * 1.2 + 1;
}

void EnqueueLatency::EnqueueSinglePacket() {
  std::vector<double> timer;

  int it = RealIterationNum();
  const uint32_t queue_mask = main_queue()->size - 1;

  // queue should be empty
  ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
            hsa_queue_load_write_index_scacquire(main_queue()));

  hsa_kernel_dispatch_packet_t *q_base_addr =
                      reinterpret_cast<hsa_kernel_dispatch_packet_t *>(
                                                  main_queue()->base_address);
  rocrtst::PerfTimer p_timer;
  for (int i = 0; i < it; i++) {
    // Get timing stamp and ring the doorbell to dispatch the kernel.
    int id = p_timer.CreateTimer();
    p_timer.StartTimer(id);
    // Obtain the current queue write index.
    uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);

    ASSERT_LT(index, main_queue()->size + index);

    // Write the aql packet at the calculated queue index address.
    rocrtst::WriteAQLToQueueLoc(main_queue(), index, &aql());

    rocrtst::AtomicSetPacketHeader(
        HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE,
        aql().setup,
        reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                     (&(q_base_addr)[index & queue_mask]));

    p_timer.StopTimer(id);

    timer.push_back(p_timer.ReadTimer(id));
    hsa_signal_store_screlease(main_queue()->doorbell_signal, index);

    // Wait on the dispatch signal until the kernel is finished.
    while (hsa_signal_wait_scacquire(aql().completion_signal,
         HSA_SIGNAL_CONDITION_LT, 1, (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE)) {
    }

    hsa_signal_store_screlease(aql().completion_signal, 1);

    if (verbosity() >= VERBOSE_PROGRESS) {
      std::cout << ".";
      fflush(stdout);
    }
  }

  if (verbosity() >= VERBOSE_PROGRESS) {
    std::cout << std::endl;
  }

  // Abandon the first result and after sort, delete the last 2% value
  timer.erase(timer.begin());
  std::sort(timer.begin(), timer.end());

  timer.erase(timer.begin() + num_iteration(), timer.end());

  enqueue_time_mean_ = rocrtst::CalcMean(timer);

  return;
}

void EnqueueLatency::EnqueueMultiPackets() {
  std::vector<double> timer;
  int it = RealIterationNum();
  const uint32_t queue_mask = main_queue()->size - 1;

  // queue should be empty
  ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
            hsa_queue_load_write_index_scacquire(main_queue()));

  rocrtst::PerfTimer p_timer;

  hsa_kernel_dispatch_packet_t *q_base_addr =
                      reinterpret_cast<hsa_kernel_dispatch_packet_t *>(
                                                  main_queue()->base_address);

  for (int i = 0; i < it; i++) {
    // Get timing stamp and ring the doorbell to dispatch the kernel.
    int id = p_timer.CreateTimer();
    p_timer.StartTimer(id);
    uint64_t* index =
           reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * num_of_pkts_));

    ASSERT_NE(index, nullptr);

    hsa_signal_store_screlease(aql().completion_signal, num_of_pkts_);

    for (uint32_t j = 0; j < num_of_pkts_; j++) {
      // index[j] = hsa_queue_add_write_index_scacq_screl(main_queue(), 1);
      index[j] = hsa_queue_add_write_index_relaxed(main_queue(), 1);

      // Write the aql packet at the calculated queue index address.
      rocrtst::WriteAQLToQueueLoc(main_queue(), index[j], &aql());
    }
    // Write the aql packet at the calculated queue index address.

    rocrtst::AtomicSetPacketHeader(
        (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
        (1 << HSA_PACKET_HEADER_BARRIER),
        aql().setup,
        reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                      (&(q_base_addr)[index[num_of_pkts_ - 1] & queue_mask]));


    // Set packet header reversly; set all headers except the very first
    // one, for now.
    for (int32_t j = num_of_pkts_ - 1; j >= 0; j--) {
      rocrtst::AtomicSetPacketHeader(
          HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE,
          aql().setup,
          reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                     (&(q_base_addr)[index[j] & queue_mask]));
    }

    p_timer.StopTimer(id);

    timer.push_back(p_timer.ReadTimer(id));

    for (uint32_t j = 0; j < num_of_pkts_; j++) {
      hsa_signal_store_screlease(main_queue()->doorbell_signal, index[j]);
    }

    // Wait on the dispatch signal until the kernel is finished.
    while (hsa_signal_wait_scacquire(aql().completion_signal,
        HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) {
    }


    hsa_signal_store_screlease(aql().completion_signal, 1);

    free(index);

    if (verbosity() >= VERBOSE_PROGRESS) {
      std::cout << ".";
      fflush(stdout);
    }
  }

  std::cout << std::endl;

  // Abandon the first result and after sort, delete the last 2% value
  timer.erase(timer.begin());
  std::sort(timer.begin(), timer.end());

  timer.erase(timer.begin() + num_iteration(), timer.end());

  enqueue_time_mean_ = rocrtst::CalcMean(timer);

  return;
}


void EnqueueLatency::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void EnqueueLatency::DisplayResults(void) const {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::DisplayResults();

  std::cout << "Average Time to Completion: ";
  if (enqueue_single_) {
    std::cout << enqueue_time_mean_ * 1e6;
  } else {
    std::cout << enqueue_time_mean_ * 1e6 / num_of_pkts_;
  }

  std::cout << " uS" << std::endl;
  return;
}

void EnqueueLatency::Close() {
  TestBase::Close();
  return;
}


================================================
FILE: rocrtst/suites/performance/enqueueLatency.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_PERFORMANCE_ENQUEUELATENCY_H_
#define ROCRTST_SUITES_PERFORMANCE_ENQUEUELATENCY_H_
#include <vector>

#include "suites/test_common/test_base.h"
#include "common/base_rocr.h"
#include "common/common.h"
#include "hsa/hsa.h"

// @Brief: This class is defined to measure the mean latency of enqueuing
//  the packets to an empty kernel

class EnqueueLatency : public TestBase {
 public:
  // @Brief: Constructor
  explicit EnqueueLatency(bool launchSingleKernel);

  // @Brief: Destructor
  virtual ~EnqueueLatency(void);

  // @Brief: Set up the environment for the test
  virtual void SetUp(void);

  // @Brief: Run the test case
  virtual void Run(void);

  // @Brief: Display  results we got
  virtual void DisplayResults(void) const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // @Brief: Clean up and close the runtime
  virtual void Close(void);

  // @Brief: Create the executable, get symbol by name and load the code object
  // virtual void LoadCodeObject(hsa_agent_t gpuAgent,uint64_t &kernel_code);

 private:
  // @Brief: Get actual iteration number
  virtual size_t RealIterationNum(void);

  // @Brief: Launch single packet each time
  virtual void EnqueueSinglePacket(void);

  // @Brief: Launch multiple packets each time
  virtual void EnqueueMultiPackets(void);


  // @Brief: Indicate if we enqueued single pkt or not
  bool enqueue_single_;

  // @Brief: Store the size of queue
  uint32_t queue_size_;

  // @Brief: Number of packets in a batch
  uint32_t num_of_pkts_;

  // @Brief: Ave. dispatch time
  double enqueue_time_mean_;
};

#endif  // ROCRTST_SUITES_PERFORMANCE_ENQUEUELATENCY_H_


================================================
FILE: rocrtst/suites/performance/memory_async_copy.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <hwloc.h>
#include <hwloc/linux-libnuma.h>
#include <numa.h>

#include <vector>
#include <algorithm>

#include "common/base_rocr.h"
#include "suites/test_common/test_base.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "suites/performance/memory_async_copy.h"
#include "common/base_rocr_utils.h"
#include "common/helper_funcs.h"
#include "gtest/gtest.h"

#define RET_IF_HSA_ERR(err)                                                                        \
  {                                                                                                \
    if ((err) != HSA_STATUS_SUCCESS) {                                                             \
      const char* msg = 0;                                                                         \
      hsa_status_string(err, &msg);                                                                \
      EXPECT_EQ(HSA_STATUS_SUCCESS, err) << msg;                                                   \
      return (err);                                                                                \
    }                                                                                              \
  }

/* PCIE BDF ID: 0xC81407 is specific to DTIF platform */
static const uint32_t kDtifBdfId = 0xC81407;

constexpr const size_t MemoryAsyncCopy::Size[kNumGranularity];
constexpr const char* MemoryAsyncCopy::Str[kNumGranularity];
constexpr const int MemoryAsyncCopy::kMaxCopySize;

MemoryAsyncCopy::MemoryAsyncCopy(void) :
    TestBase() {
  static_assert(sizeof(Size)/sizeof(size_t) == kNumGranularity,
      "kNumGranularity does not match size of arrays");

  cpu_agent_.handle = 0;  // Ignore any previous initialization
  gpu_local_agent1_.handle = 0;
  gpu_local_agent2_.handle = 0;
  gpu_remote_agent_.handle = 0;
  topology_ = nullptr;
  cpu_hwl_numa_nodeset_ = nullptr;
  agent_index_ = 0;
  pool_index_ = 0;
  tran_.clear();
  agent_info()->clear();
  pool_info()->clear();
  node_info()->clear();
  verified_ = true;
  do_p2p_ = true;
  src_pool_id_ = -1;
  dst_pool_id_ = -1;
  set_num_iteration(10);  // Default value
  set_title("Asynchronous Memory Copy Bandwidth");
  set_description("This test measures bandwidth to/from Host from/to GPU "
      "and Peer to Peer using hsa_amd_memory_async_copy() to copy buffers "
      "of various length from memory pool to another.");
}

MemoryAsyncCopy::~MemoryAsyncCopy(void) {
  for (PoolInfo *p : pool_info_) {
    delete p;
  }

  for (AgentInfo *a : agent_info_) {
    delete a;
  }
}

void MemoryAsyncCopy::SetUp(void) {
  TestBase::SetUp();

  hwloc_topology_init(&topology_);

  FindTopology();

  if (verbosity() >= VERBOSE_STANDARD) {
    PrintTopology();
  }
  ConstructTransactionList();
  return;
}

void MemoryAsyncCopy::Run(void) {
  TestBase::Run();

  for (Transaction t : tran_) {
    this->RunBenchmarkWithVerification(&t);
  }
}

void MemoryAsyncCopy::FindSystemPool(void) {
  hsa_status_t err;

//  err = hsa_iterate_agents(rocrtst::FindCPUDevice, &cpu_agent_);
//  ASSERT_EQ(HSA_STATUS_INFO_BREAK, err);

  err = hsa_amd_agent_iterate_memory_pools(cpu_agent_, rocrtst::FindGlobalPool,
        &sys_pool_);
  ASSERT_EQ(HSA_STATUS_INFO_BREAK, err);
}

hsa_status_t AcquireAccess(hsa_agent_t agent,
                                    hsa_amd_memory_pool_t pool, void* ptr) {
  hsa_status_t err;

  hsa_amd_memory_pool_access_t access;
  err = hsa_amd_agent_memory_pool_get_info(agent, pool,
                              HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);

  RET_IF_HSA_ERR(err);

  if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
    return HSA_STATUS_ERROR;
  }

  if (access == HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT) {
    err = hsa_amd_agents_allow_access(1, &agent, NULL, ptr);
    RET_IF_HSA_ERR(err);
  }

  return err;
}

// Provided a destination pointer, pool and agent, and a source ptr, pool,
// and agent, get access for one of the 2 agents to the other agent's pool.
// Return the selected agent. This function will first attempt to gain access
// for the first agent to the second pool. If that succeeds, it will return a
// pointer to the first agent. Otherwise, the function will attempt to again
// access to the first pool by the second agent. If that succeeds a pointer to
// the second agent will be returned. If it fails, nullptr will be returned.
// We prefer to use GPU agents over CPU agents to avoid poor copy performance
// due to reading of uncached device memory by CPU.
hsa_agent_t *
MemoryAsyncCopy::AcquireAsyncCopyAccess(
         void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
         void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag) {
  hsa_status_t err;
  bool can_use_src_agent = false;
  hsa_device_type_t type = HSA_DEVICE_TYPE_CPU;

  err = AcquireAccess(*src_ag, dst_pool, dst_ptr);
  if (err == HSA_STATUS_SUCCESS) {
    can_use_src_agent = true;

    if (hsa_agent_get_info(*src_ag, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
      return NULL;

    // We prefer GPU agents over CPU agents, so if this is not a GPU agent,
    // try using the destination agent
    if (type == HSA_DEVICE_TYPE_GPU) return src_ag;
  }

  err = AcquireAccess(*dst_ag, src_pool, src_ptr);
  if (err == HSA_STATUS_SUCCESS) return dst_ag;

  if (can_use_src_agent) return src_ag;
  return NULL;
}

void MemoryAsyncCopy::PrintTransactionType(Transaction *t) {
  if (verbosity() >= VERBOSE_STANDARD) {
    printf("Executing Copy Path: From Pool %d To Pool %d ", t->src, t->dst);
    switch (t->type) {
      case H2D:
        printf("(Host-To-Device)\n");
        break;

      case D2H:
        printf("(Device-To-Host)\n");
        break;

      case P2P:
        printf("(Peer-To-Peer)\n");
        break;

      case H2DRemote:
        printf("(Host To Remote Device)\n");
        break;

      case D2HRemote:
        printf("(Remote Device To Host)\n");
        break;

      case P2PRemote:
        printf("(Peer To Remote Peer)\n");
        break;

      default:
        printf("**Unexpected path**\n");
        return;
    }
  }
}
void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) {
  hsa_status_t err;
  void* ptr_src;
  void* ptr_dst;
  size_t src_alloc_size;
  size_t dst_alloc_size;
  size_t max_alloc_size;
  size_t size;
  hsa_device_type_t ag_type;


  size_t max_trans_size = t->max_size * 1024;

  hsa_amd_memory_pool_t src_pool =  pool_info_[t->src]->pool_;
  hsa_agent_t dst_agent = pool_info_[t->dst]->owner_agent_info()->agent();
  hsa_amd_memory_pool_t dst_pool = pool_info_[t->dst]->pool_;
  hsa_agent_t src_agent = pool_info_[t->src]->owner_agent_info()->agent();

  PrintTransactionType(t);

  err = hsa_amd_memory_pool_get_info(src_pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                      &src_alloc_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(src_agent, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  if (src_alloc_size <= 536870912 && ag_type == HSA_DEVICE_TYPE_GPU) {
    err = hsa_agent_get_info(src_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                              &src_alloc_size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }

  err = hsa_amd_memory_pool_get_info(dst_pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                      &dst_alloc_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(dst_agent, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  if (dst_alloc_size <= 536870912 && ag_type == HSA_DEVICE_TYPE_GPU) {
    err = hsa_agent_get_info(dst_agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                              &dst_alloc_size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  }

  max_alloc_size = (src_alloc_size < dst_alloc_size) ? src_alloc_size: dst_alloc_size;

  if (dst_alloc_size <= 536870912 && ag_type == HSA_DEVICE_TYPE_GPU)
    size = (max_alloc_size/3 <= max_trans_size) ? max_alloc_size/3: max_trans_size;
  else
    size = (max_alloc_size/2 <= max_trans_size) ? max_alloc_size/2: max_trans_size;

  err = hsa_amd_memory_pool_allocate(src_pool, size, 0,
				      &ptr_src);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = hsa_amd_memory_pool_allocate(dst_pool, size, 0,
				      &ptr_dst);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);


  // rocrtst::CommonCleanUp data
  void* host_ptr_src = NULL;
  void* host_ptr_dst = NULL;
  err = hsa_amd_memory_pool_allocate(sys_pool_, size, 0,
                                     reinterpret_cast<void**>(&host_ptr_src));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  err = hsa_amd_memory_pool_allocate(sys_pool_, size, 0,
                                     reinterpret_cast<void**>(&host_ptr_dst));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = hsa_amd_memory_fill(host_ptr_src, 1, size/sizeof(uint32_t));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = hsa_amd_memory_fill(host_ptr_dst, 0, size/sizeof(uint32_t));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  hsa_signal_t s;
  err = hsa_signal_create(1, 0, NULL, &s);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);


  // Deallocate resources...
  MAKE_SCOPE_GUARD([&]() {
    err = hsa_amd_memory_pool_free(ptr_src);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);
    err = hsa_amd_memory_pool_free(ptr_dst);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    err = hsa_amd_memory_pool_free(host_ptr_src);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);
    err = hsa_amd_memory_pool_free(host_ptr_dst);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    err = hsa_signal_destroy(s);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  });

  // **** First copy from the system buffer source to the test source pool
  // Acquire the appropriate access; prefer GPU agent over CPU where there
  // is a choice.
  hsa_agent_t *cpy_ag = nullptr;
  cpy_ag = AcquireAsyncCopyAccess(ptr_src, src_pool, &src_agent, host_ptr_src,
                                                     sys_pool_, &cpu_agent_);
  if (cpy_ag == nullptr) {
    std::cout << "Agents " << t->src << " and " << t->dst <<
                              "cannot access each other's pool." << std::endl;
    std::cout << "Skipping..." << std::endl;
    return;
  }

  err = hsa_amd_memory_async_copy(ptr_src, *cpy_ag, host_ptr_src, *cpy_ag,
                                                            size, 0, NULL, s);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, uint64_t(-1),
                                   HSA_WAIT_STATE_ACTIVE))
  {}

  int iterations = RealIterationNum();

  // **** Next, copy from the test source pool to the test destination pool
  // Prefer a gpu agent to a cpu agent

  cpy_ag = AcquireAsyncCopyAccess(ptr_dst, dst_pool, &dst_agent, ptr_src,
                                                        src_pool, &src_agent);
  if (cpy_ag == nullptr) {
    std::cout << "Owner agents for pools" << t->src << " and " <<
                   t->dst << " cannot access each other's pool." << std::endl;
    std::cout << "Skipping..." << std::endl;
    return;
  }

  for (int i = 0; i < kNumGranularity; i++) {
    if (Size[i] > size) {
      printf("Skip test with block size %s\n", Str[i]);
      break;
    }
    printf("Start test with block size %s\n",Str[i]);

    std::vector<double> time;

    for (int it = 0; it < iterations; it++) {
      if (verbosity() >= VERBOSE_PROGRESS) {
        std::cout << ".";
        std::cout.flush();
      }

      hsa_signal_store_relaxed(t->signal, 1);

      rocrtst::PerfTimer copy_timer;
      int index = copy_timer.CreateTimer();

      copy_timer.StartTimer(index);
      err = hsa_amd_memory_async_copy(ptr_dst, *cpy_ag, ptr_src, *cpy_ag,
                                                 Size[i], 0, NULL, t->signal);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);

      while (hsa_signal_wait_scacquire(t->signal, HSA_SIGNAL_CONDITION_LT, 1,
                                         uint64_t(-1), HSA_WAIT_STATE_ACTIVE))
      {}

      copy_timer.StopTimer(index);

      hsa_signal_store_relaxed(s, 1);

      err = AcquireAccess(dst_agent, sys_pool_,
                    host_ptr_dst);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);


      err = hsa_amd_memory_async_copy(host_ptr_dst, cpu_agent_, ptr_dst,
                                                 dst_agent, Size[i], 0, NULL, s);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);

      while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
                                       uint64_t(-1), HSA_WAIT_STATE_ACTIVE))
      {}

      err = AcquireAccess(cpu_agent_, sys_pool_, host_ptr_dst);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);

      if (memcmp(host_ptr_src, host_ptr_dst, Size[i])) {
        verified_ = false;
      }
      // Push the result back to vector time

      time.push_back(copy_timer.ReadTimer(index));
    }

    if (verbosity() >= VERBOSE_PROGRESS) {
      std::cout << std::endl;
    }

    // Get Min copy time
    t->min_time->push_back(*std::min_element(time.begin(), time.end()));
    // Get mean copy time and store to the array
    t->benchmark_copy_time->push_back(GetMeanTime(&time));
  }
}

size_t MemoryAsyncCopy::RealIterationNum(void) {
  return num_iteration() * 1.2 + 1;
}

double MemoryAsyncCopy::GetMeanTime(std::vector<double> *vec) {
  std::sort(vec->begin(), vec->end());

  vec->erase(vec->begin());
  vec->erase(vec->begin(), vec->begin() + num_iteration() * 0.1);
  vec->erase(vec->begin() + num_iteration(), vec->end());

  double mean = 0.0;
  int num = vec->size();

  for (int it = 0; it < num; it++) {
    mean += (*vec)[it];
  }

  mean /= num;
  return mean;
}

void MemoryAsyncCopy::DisplayResults(void) const {
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::DisplayResults();
  hsa_status_t err;
  for (Transaction t : tran_) {
    DisplayBenchmark(&t);
    err = hsa_signal_destroy(t.signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    delete t.benchmark_copy_time;
    delete t.min_time;
  }

  return;
}

void MemoryAsyncCopy::DisplayBenchmark(Transaction *t) const {
  hsa_status_t err;
  size_t src_alloc_size;
  size_t dst_alloc_size;
  size_t max_alloc_size;
  size_t size;

  size_t max_trans_size = t->max_size * 1024;
  hsa_amd_memory_pool_t src_pool =  pool_info_[t->src]->pool_;
  hsa_amd_memory_pool_t dst_pool = pool_info_[t->dst]->pool_;

  err = hsa_amd_memory_pool_get_info(src_pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                    &src_alloc_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_memory_pool_get_info(dst_pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                    &dst_alloc_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  max_alloc_size = (src_alloc_size < dst_alloc_size) ? src_alloc_size: dst_alloc_size;

  size = (max_alloc_size/2 <= max_trans_size) ? max_alloc_size/2: max_trans_size;

  printf("=========================== PATH: From Pool %d To Pool %d (",
                                                              t->src, t->dst);

  switch (t->type) {
    case H2D:
      printf("Host-To-Device) ===========================\n");
      break;

    case D2H:
      printf("Device-To-Host) ===========================\n");
      break;

    case P2P:
      printf("Peer-To-Peer) =============================\n");
      break;

    case P2PRemote:
      printf("(Peer-To-Remote-Peer) =====================\n");
      break;

    case H2DRemote:
      printf("(Host-To-Remote-Device) ===================\n");
      break;

    case D2HRemote:
      printf("(Device-To-Remote-Host) ===================\n");
      break;

    default:
      ASSERT_TRUE(false) << "Unexpected Transaction value:" << t->type <<
                                                                    std::endl;
  }

  if ((*t->benchmark_copy_time).size() == 0) {
    printf("Skipped...\n");
    return;
  }
  if (verified_) {
    std::cout << "Verification: Pass" << std::endl;
  } else {
    std::cout << "Verification: Fail" << std::endl;
  }

  if (verbosity() < VERBOSE_STANDARD) {
    return;
  }

  printf("Data Size             Avg Time(us)         Avg BW(GB/s)"
                           "          Min Time(us)          Peak BW(GB/s)\n");

  for (int i = 0; i < kNumGranularity; i++) {

    if (Size[i] > size) {
      printf(
         "Notice: Data Size >= %s is skipped due to hard limit of 1/2 vram size \n\n",
         Str[i]
      );
      break;
    }

    double band_width =
    static_cast<double>(Size[i]/(*(t->benchmark_copy_time))[i]/1024/1024/1024);
    double peak_band_width =
       static_cast<double>(Size[i] / (*(t->min_time))[i]/ 1024 / 1024 / 1024);
    printf(
        "  %4s            %14lf        %14lf         %14lf         %14lf\n",
       Str[i], (*(t->benchmark_copy_time))[i] * 1e6, band_width,
                                  (*(t->min_time))[i] * 1e6, peak_band_width);
  }

  return;
}

void MemoryAsyncCopy::Close() {
  if (cpu_hwl_numa_nodeset_ != nullptr) {
    hwloc_bitmap_free(cpu_hwl_numa_nodeset_);
    cpu_hwl_numa_nodeset_ = nullptr;
  }
  hwloc_topology_destroy(topology_);

  // hwloc hack - hwloc uses OpenCL which loads ROCr.  As OpenCL does not have a shutdown routine it
  // can not free HSA state.  This will leak resources but is the only option short of isolating
  // hwloc in it's own process.
  while (hsa_shut_down() == HSA_STATUS_SUCCESS)
    ;
  hsa_init();

  TestBase::Close();
}

static hsa_status_t GetPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
  hsa_status_t err;
  MemoryAsyncCopy* ptr = reinterpret_cast<MemoryAsyncCopy*>(data);
  // Query pool segment, only report global one
  hsa_amd_segment_t region_segment;
  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                                     &region_segment);
  RET_IF_HSA_ERR(err);

  if (region_segment != HSA_AMD_SEGMENT_GLOBAL) {
    return HSA_STATUS_SUCCESS;
  }

  // Check if the pool is alloc allowed, if not, discard this pool
  bool alloc_allowed = false;
  err = hsa_amd_memory_pool_get_info(pool,
              HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed);
  RET_IF_HSA_ERR(err);

  if (alloc_allowed != true) {
    return HSA_STATUS_SUCCESS;
  }

  // Query the pool size
  size_t size = 0;
  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
                                     &size);
  RET_IF_HSA_ERR(err);

  // Query the max allocable size
  size_t alloc_max_size = 0;
  err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                     &alloc_max_size);
  RET_IF_HSA_ERR(err);

  // Check if the pool is fine-grained or coarse-grained
  uint32_t global_flag = 0;
  err = hsa_amd_memory_pool_get_info(pool,
                        HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
  RET_IF_HSA_ERR(err);

  bool is_fine_grained = HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED
                         & global_flag;

  int pool_i = ptr->pool_index();
  int ag_ind = ptr->agent_index();
  ptr->pool_info()->push_back(
    new PoolInfo(pool, pool_i, region_segment, is_fine_grained, size,
                                  alloc_max_size, ptr->agent_info()->back()));

  // Construct node_info and push back to agent_info_
  (*ptr->node_info())[ag_ind].pool.push_back(*ptr->pool_info()->back());
  ptr->set_pool_index(pool_i + 1);

  return HSA_STATUS_SUCCESS;
}

static hsa_status_t GetGPUAgents(hsa_agent_t agent, void* data) {
  hsa_status_t err;
  MemoryAsyncCopy* ptr = reinterpret_cast<MemoryAsyncCopy*>(data);

  hsa_device_type_t device_type;
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
  RET_IF_HSA_ERR(err);

  if (device_type != HSA_DEVICE_TYPE_GPU) {
    return HSA_STATUS_SUCCESS;
  }

  uint32_t agent_bdf_id;
  err = hsa_agent_get_info(agent,
                (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &agent_bdf_id);
  RET_IF_HSA_ERR(err);

  uint8_t bus = (agent_bdf_id & (0xFF << 8)) >> 8;
  uint8_t device = (agent_bdf_id & (0x1F << 3)) >> 3;

  // The function part of the location_id hasn't been used yet
  // and may not contain a valid function number.
  uint8_t function = 0; //(agent_bdf_id & 0x07);

  if (ptr->verbosity() >  MemoryAsyncCopy::VERBOSE_STANDARD) {
    char name[64];
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
    RET_IF_HSA_ERR(err);

    const char* name2 = (HSA_DEVICE_TYPE_GPU == device_type) ? "GPU" : "CPU";

    printf("The %s agent name located at PCIe Bus %x, Device %x, "
                                                     "Function %x, is %s.\n",
                                          name2, bus, device, function, name);
  }

  uint32_t pci_domain_id = 0;
  err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_DOMAIN, &pci_domain_id);
  RET_IF_HSA_ERR(err);

  bool is_dxg = false;
  int fd = open("/dev/dxg", O_RDWR);
  if (fd >= 0) {
    close(fd);
    is_dxg = true;
  }
  hwloc_obj_t gpu_numa_node = nullptr;
  if ((agent_bdf_id != kDtifBdfId) && !is_dxg) {
    hwloc_obj_t gpu_hwl_dev;
    gpu_hwl_dev = hwloc_get_pcidev_by_busid(ptr->topology(), pci_domain_id, bus, device,
                                                                      function);

    if (gpu_hwl_dev == nullptr) {
      return HSA_STATUS_ERROR;
    }

    gpu_numa_node = hwloc_get_ancestor_obj_by_type(ptr->topology(),
                                              HWLOC_OBJ_NUMANODE, gpu_hwl_dev);
  }

  if (gpu_numa_node != nullptr) {
    char s1[256], s2[256];
    hwloc_bitmap_snprintf(s1, sizeof(s1), gpu_numa_node->nodeset);
    hwloc_bitmap_snprintf(s2, sizeof(s2), ptr->cpu_hwl_numa_nodeset());
    printf("gpu nodeset: %s\n", s1);
    printf("cpu nodeset: %s\n", s2);
    if (!hwloc_bitmap_isequal(gpu_numa_node->nodeset,
                                              ptr->cpu_hwl_numa_nodeset())) {
      if (ptr->gpu_remote_agent().handle == 0) {
        ptr->set_gpu_remote_agent(agent);
      }

      if (ptr->gpu_local_agent1().handle != 0 &&
                                          ptr->gpu_local_agent2().handle != 0) {
        return HSA_STATUS_INFO_BREAK;
      } else {
        return HSA_STATUS_SUCCESS;
      }
    } else {
      if (ptr->gpu_local_agent1().handle == 0) {
        ptr->set_gpu_local_agent1(agent);
      } else if (ptr->gpu_local_agent2().handle == 0) {
        ptr->set_gpu_local_agent2(agent);
      }
      if (ptr->gpu_local_agent1().handle != 0 &&
                                     ptr->gpu_local_agent2().handle != 0 &&
                                        ptr->gpu_remote_agent().handle != 0) {
        return HSA_STATUS_INFO_BREAK;
      } else {
        return HSA_STATUS_SUCCESS;
      }
    }

    if (!hwloc_bitmap_isequal(gpu_numa_node->nodeset,
                                               ptr->cpu_hwl_numa_nodeset())) {
      std::cout << "ASSERT: Unexpected unequal nodesets" << std::endl;
      return HSA_STATUS_ERROR;
    }
  } else if (ptr->verbosity() >= MemoryAsyncCopy::VERBOSE_STANDARD) {
    std::cout << "Only 1 NUMA node found.\n" << std::endl;
  }

  if (ptr->gpu_local_agent1().handle != 0) {
    if (ptr->gpu_local_agent2().handle != 0) {
      if (gpu_numa_node == nullptr) {
        return HSA_STATUS_INFO_BREAK;
      } else if (ptr->gpu_remote_agent().handle == 0) {
        return HSA_STATUS_SUCCESS;
      } else {
        return HSA_STATUS_INFO_BREAK;
      }
    } else {
      ptr->set_gpu_local_agent2(agent);
      if (ptr->gpu_remote_agent().handle == 0) {
        return (gpu_numa_node == nullptr ?
                  HSA_STATUS_INFO_BREAK : HSA_STATUS_SUCCESS);
      } else {
        return HSA_STATUS_INFO_BREAK;
      }
    }
  } else {
    ptr->set_gpu_local_agent1(agent);
  }

  return HSA_STATUS_SUCCESS;
}

static hsa_status_t GetAgentInfo(hsa_agent_t agent, void* data) {
  MemoryAsyncCopy* ptr = reinterpret_cast<MemoryAsyncCopy*>(data);

  hsa_status_t err;
  int ret;

  if (ptr->cpu_agent().handle != 0) {
    return HSA_STATUS_ERROR;
  }


  // Get device type
  hsa_device_type_t device_type;
  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
  RET_IF_HSA_ERR(err);

  // First thing is to find CPU agent
  if (device_type != HSA_DEVICE_TYPE_CPU) {
    return HSA_STATUS_SUCCESS;
  }

  ptr->set_cpu_agent(agent);
  uint32_t cpu_numa_node_id;
//  hwloc_obj_t cpu_numa;
  hwloc_nodeset_t cpu_nodeset;

  err = hsa_agent_get_info(ptr->cpu_agent(), HSA_AGENT_INFO_NODE,
                                                           &cpu_numa_node_id);
  RET_IF_HSA_ERR(err);

  struct bitmask *numa_node_mask = numa_allocate_nodemask();
  cpu_nodeset = hwloc_bitmap_alloc();

  numa_bitmask_setbit(numa_node_mask, cpu_numa_node_id);

  ret = hwloc_nodeset_from_linux_libnuma_bitmask(ptr->topology(),
      cpu_nodeset, numa_node_mask);
  numa_free_nodemask(numa_node_mask);

  if (ret == -1) {
    hwloc_bitmap_free(cpu_nodeset);
    return HSA_STATUS_ERROR;
  }

  ptr->set_cpu_hwl_numa_nodeset(cpu_nodeset);

  err = hsa_iterate_agents(GetGPUAgents, data);

  if (err != HSA_STATUS_INFO_BREAK && err != HSA_STATUS_SUCCESS) {
    return err;
  }

  if (ptr->gpu_local_agent1().handle == 0) {
    hwloc_bitmap_free(ptr->cpu_hwl_numa_nodeset());
    ptr->set_cpu_hwl_numa_nodeset(nullptr);

    if (ptr->gpu_local_agent2().handle != 0) {
      std::cout << "Unexpected value set for gpu_local_agent2" << std::endl;
      return HSA_STATUS_ERROR;
    }
    // In this case, the CPU and at least 1 GPU are not on the same NUMA node;
    // try another CPU
    hsa_agent_t t;
    t.handle = 0;
    ptr->set_gpu_local_agent1(t);
    ptr->set_cpu_agent(t);
    ptr->set_gpu_remote_agent(t);
    return HSA_STATUS_SUCCESS;
  }
  auto add_agent = [&](hsa_agent_t ag, hsa_device_type_t dev_type,
                                                                bool remote) {
    if (ag.handle == 0) {
      return;
    }
    ptr->agent_info()->push_back(
            new AgentInfo(ag, ptr->agent_index(), dev_type, remote));

    // Contruct a new NodeInfo structure and push back to agent_info_
    NodeInfo node;
    node.agent = *ptr->agent_info()->back();
    ptr->node_info()->push_back(node);

    err = hsa_amd_agent_iterate_memory_pools(ag, GetPoolInfo, data);
    ptr->set_agent_index(ptr->agent_index() + 1);
  };

  add_agent(ptr->cpu_agent(), HSA_DEVICE_TYPE_CPU, false);
  add_agent(ptr->gpu_local_agent1(), HSA_DEVICE_TYPE_GPU, false);
  add_agent(ptr->gpu_local_agent2(), HSA_DEVICE_TYPE_GPU, false);
  add_agent(ptr->gpu_remote_agent(), HSA_DEVICE_TYPE_GPU, true);

  return HSA_STATUS_INFO_BREAK;
}

void MemoryAsyncCopy::FindTopology() {
  hsa_status_t err;

  hwloc_topology_set_flags(topology_, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                         HWLOC_TOPOLOGY_FLAG_IO_DEVICES);

  hwloc_topology_load(topology_);

  err = hsa_iterate_agents(GetAgentInfo, this);

  if (gpu_local_agent1_.handle == 0) {
    std::cout << "**** No GPU found in same NUMA node as a CPU ****"
                                                                 << std::endl;
  }
  ASSERT_EQ(HSA_STATUS_INFO_BREAK, err);

  FindSystemPool();
}

void MemoryAsyncCopy::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryAsyncCopy::ConstructTransactionList(void) {
  hsa_status_t err;

  tran_.clear();

  int cpu_pool_indx = -1;
  int gpu_local1_pool_indx = -1;
  int gpu_local2_pool_indx = -1;
  int gpu_remote_pool_indx = -1;

  auto push_trans = [&](int from_indx, int to_indx, TransType type) {
    Transaction t;
    t.src = from_indx;
    t.dst = to_indx;
    t.max_size = kMaxCopySize/1024;
    t.type = type;
    t.benchmark_copy_time = new  std::vector<double>;
    t.min_time = new std::vector<double>;
    err = hsa_signal_create(1, 0, NULL, &t.signal);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    tran_.push_back(t);
  };

  // Find the CPU Node and pool
  for (NodeInfo n : *node_info()) {
    if (cpu_pool_indx == -1 && n.agent.device_type() == HSA_DEVICE_TYPE_CPU) {
      cpu_pool_indx = n.pool[0].index_;
      continue;
    }

    if (n.agent.device_type() == HSA_DEVICE_TYPE_GPU) {
      if (!n.agent.is_remote()) {
        if (gpu_local1_pool_indx == -1) {
          gpu_local1_pool_indx = n.pool[0].index_;
          continue;
        }
        if (gpu_local2_pool_indx == -1) {
          gpu_local2_pool_indx = n.pool[0].index_;
        }
      } else if (gpu_remote_pool_indx == -1) {
        gpu_remote_pool_indx = n.pool[0].index_;
      }
    }
  }

  ASSERT_NE(cpu_pool_indx, -1);
  ASSERT_NE(gpu_local1_pool_indx, -1);

  push_trans(cpu_pool_indx, gpu_local1_pool_indx, H2D);
  push_trans(gpu_local1_pool_indx, cpu_pool_indx, D2H);

  if (do_p2p_ && gpu_local2_pool_indx != -1) {
    push_trans(gpu_local1_pool_indx, gpu_local2_pool_indx, P2P);
    push_trans(gpu_local2_pool_indx, gpu_local1_pool_indx, P2P);
  }

  if (gpu_remote_pool_indx != -1) {
    push_trans(cpu_pool_indx, gpu_remote_pool_indx, H2DRemote);
    push_trans(gpu_remote_pool_indx, cpu_pool_indx, D2HRemote);
    if (do_p2p_) {
      push_trans(gpu_local1_pool_indx, gpu_remote_pool_indx, P2PRemote);
      push_trans(gpu_remote_pool_indx, gpu_local1_pool_indx, P2PRemote);
    }
  }
}

void MemoryAsyncCopy::PrintTopology(void) {
  size_t node_num = node_info()->size();

  for (uint32_t i = 0; i < node_num; i++) {
    NodeInfo node = node_info()->at(i);
    // Print agent info
    std::cout << std::endl;
    std::cout << "Agent #" << node.agent.index_ << ":" << std::endl;

    if (HSA_DEVICE_TYPE_CPU == node.agent.device_type())
      std::cout << "Agent Device Type:                             CPU"
                << std::endl;
    else if (HSA_DEVICE_TYPE_GPU == node.agent.device_type())
      std::cout << "Agent Device Type:                             GPU"
                << std::endl;

    // Print pool info
    size_t pool_num = node.pool.size();

    for (uint32_t j = 0; j < pool_num; j++) {
      std::cout << "    Memory Pool#" << node.pool.at(j).index_ << ":"
                << std::endl;
      std::cout << "        max allocable size in KB: \t\t"
                << node.pool.at(j).allocable_size_ / 1024 << std::endl;
      std::cout << "        is fine-grained: \t\t\t"
                << node.pool.at(j).is_fine_grained_ << std::endl;
    }
  }
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/performance/memory_async_copy.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_PERFORMANCE_MEMORY_ASYNC_COPY_H_
#define ROCRTST_SUITES_PERFORMANCE_MEMORY_ASYNC_COPY_H_

#include <hwloc.h>

#include <vector>
#include <algorithm>

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "suites/test_common/test_base.h"

hsa_status_t AcquireAccess(hsa_agent_t agent,
                                    hsa_amd_memory_pool_t pool, void* ptr);
typedef enum TransType
              {H2D = 0, D2H, P2P, H2DRemote, D2HRemote, P2PRemote} TransType;

typedef struct Transaction {
  int src;
  int dst;
  hsa_signal_t signal;
  size_t max_size;  // Max. amount of kBytes to copy
  TransType type;
  // BenchMark copy time
  std::vector<double> *benchmark_copy_time;
  // Min time
  std::vector<double> *min_time;
} Transaction;

class AgentInfo {
 public:
    AgentInfo(hsa_agent_t agent, int index, hsa_device_type_t device_type,
                                                        bool remote = false) {
      agent_ = agent;
      index_ = index;
      device_type_ = device_type;
      remote_ = remote;
    }
    AgentInfo() {}

    ~AgentInfo() {}
    hsa_agent_t agent(void) const {return agent_;}
    hsa_device_type_t device_type(void) const {return device_type_;}
    bool is_remote(void) const {return remote_;}
    void set_remote(bool r) {remote_ = r;}
    hsa_agent_t agent_;
    int index_;

 private:
    hsa_device_type_t device_type_;
    bool remote_;
};

class PoolInfo {
 public:
    PoolInfo(hsa_amd_memory_pool_t pool, int index,
               hsa_amd_segment_t segment, bool is_fine_grained, size_t size,
               size_t max_alloc_size, AgentInfo *agent_info) {
      pool_ = pool;
      index_ = index;
      segment_ = segment;
      is_fine_grained_ = is_fine_grained;
      size_ = size;
      allocable_size_ = max_alloc_size;
      owner_agent_info_ = agent_info;
    }
    PoolInfo() {}
    ~PoolInfo() {}
    AgentInfo* owner_agent_info(void) const {return owner_agent_info_;}
    hsa_amd_memory_pool_t pool_;
    int index_;
    hsa_amd_segment_t segment_;
    bool is_fine_grained_;
    size_t size_;
    size_t allocable_size_;
 private:
    AgentInfo *owner_agent_info_;
};


// Used to print out topology info
typedef struct NodeInfo {
  AgentInfo agent;
  std::vector<PoolInfo> pool;
} NodeInfo;


class MemoryAsyncCopy : public TestBase {
 public:
  MemoryAsyncCopy();

  // @Brief: Destructor for test case of MemoryAsyncCopy
  virtual ~MemoryAsyncCopy();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

  // There are 3 levels of testing, from quickest/very specific to
  // longest/most complete:
  // 1. to and from a specified source to a specified target
  // 2. to and from the cpu to 1 gpu, and to/from a gpu to another gpu
  //    (if available)
  // 3. to and from the cpu to 1 gpu and, to/from every gpu to every
  //    other gpu
  // The default is #2 above. If *both* a source and dest. are set for #1
  // above, then that overides both #2 and #3
  void set_src_pool(int pool_id) {src_pool_id_ = pool_id;}
  void set_dst_pool(int pool_id) {dst_pool_id_ = pool_id;}
  int pool_index(void) const {return pool_index_;}
  void set_pool_index(int i) {pool_index_ = i;}
  int agent_index(void) const {return agent_index_;}
  void set_agent_index(int i) {agent_index_ = i;}
  std::vector<PoolInfo *> *pool_info(void) {return &pool_info_;}
  std::vector<AgentInfo *> *agent_info(void) {return &agent_info_;}
  std::vector<NodeInfo> *node_info(void) {return &node_info_;}

  hwloc_topology_t topology(void) const {return topology_;}
  void set_topology(hwloc_topology_t t) {topology_ = t;}

  hwloc_nodeset_t cpu_hwl_numa_nodeset(void) const {
                                                return cpu_hwl_numa_nodeset_;}
  void set_cpu_hwl_numa_nodeset(hwloc_nodeset_t ns) {
                                                  cpu_hwl_numa_nodeset_ = ns;}
  hsa_agent_t gpu_local_agent1() const {return gpu_local_agent1_;}
  void set_gpu_local_agent1(hsa_agent_t a) {gpu_local_agent1_ = a;}
  hsa_agent_t gpu_local_agent2() const {return gpu_local_agent2_;}
  void set_gpu_local_agent2(hsa_agent_t a) {gpu_local_agent2_ = a;}

  hsa_agent_t gpu_remote_agent() const {return gpu_remote_agent_;}
  void set_gpu_remote_agent(hsa_agent_t a) {gpu_remote_agent_ = a;}

  hsa_agent_t cpu_agent() const {return cpu_agent_;}
  void set_cpu_agent(hsa_agent_t a) {cpu_agent_ = a;}

  hsa_agent_t *
  AcquireAsyncCopyAccess(
         void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
         void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag);

 protected:
  void PrintTransactionType(Transaction *t);
#if ROCRTST_EMULATOR_BUILD
  static const int kNumGranularity = 1;
  static constexpr const char* Str[kNumGranularity] = {"1k"};

  static constexpr const size_t Size[kNumGranularity] = {1024};
#else

  static const int kNumGranularity = 20;
  static constexpr const char* Str[kNumGranularity] = {
      "1k", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K",
      "1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M"};

  static constexpr const size_t Size[kNumGranularity] = {
      1024, 2*1024, 4*1024, 8*1024, 16*1024, 32*1024, 64*1024, 128*1024,
      256*1024, 512*1024, 1024*1024, 2048*1024, 4096*1024, 8*1024*1024,
      16*1024*1024, 32*1024*1024, 64*1024*1024, 128*1024*1024, 256*1024*1024,
      512*1024*1024};
#endif
  static constexpr const int kMaxCopySize = Size[kNumGranularity - 1];

  // @Brief: Get real iteration number
  virtual size_t RealIterationNum(void);

  // @Brief: Get the mean copy time
  double GetMeanTime(std::vector<double>* vec);

  // @Brief: Find and print out the needed topology info
  virtual void FindTopology(void);

  // @Brief: Run for Benchmark mode with verification
  virtual void RunBenchmarkWithVerification(Transaction *t);

  // @Brief: Dispaly Benchmark result
  void DisplayBenchmark(Transaction *t) const;

  // @Brief: Print topology info
  void PrintTopology(void);

  virtual void ConstructTransactionList(void);

  // @Brief: Find system region
  void FindSystemPool(void);

  // More variables declared for testing
  std::vector<Transaction> tran_;

  // Variable used to store agent info, indexed by agent_index_
  std::vector<AgentInfo *> agent_info_;

  // Variable used to store region info, indexed by pool_index_
  std::vector<PoolInfo *> pool_info_;

  // To store node info
  std::vector<NodeInfo> node_info_;

  // Variable to help count agent index
  int agent_index_;

  // Variable to help count region index
  int pool_index_;

  // Verification result
  bool verified_;

  // Should we test p2p copying?
  bool do_p2p_;

  // Store the testing level
  int src_pool_id_;
  int dst_pool_id_;
  // System region
  hsa_amd_memory_pool_t sys_pool_;

  // CPU agent used for verification
  hsa_agent_t cpu_agent_;

  rocrtst::PerfTimer copy_timer_;

  hwloc_topology_t topology_;
  hwloc_nodeset_t cpu_hwl_numa_nodeset_;

  // hsa_agent_t cpu_agent_; use one in base class
  hsa_agent_t gpu_local_agent1_;
  hsa_agent_t gpu_local_agent2_;
  hsa_agent_t gpu_remote_agent_;  // Not associated with cpu_agent_
};


#endif  // ROCRTST_SUITES_PERFORMANCE_MEMORY_ASYNC_COPY_H_


================================================
FILE: rocrtst/suites/performance/memory_async_copy_numa.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <hwloc.h>
#include <hwloc/linux-libnuma.h>
#include <numa.h>

#include <vector>
#include <algorithm>

#include "common/base_rocr.h"
#include "suites/test_common/test_base.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "suites/performance/memory_async_copy_numa.h"
#include "common/base_rocr_utils.h"
#include "common/helper_funcs.h"
#include "gtest/gtest.h"

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

MemoryAsyncCopyNUMA::MemoryAsyncCopyNUMA(void) : MemoryAsyncCopy() {
  set_title("Asynchronous Memory Copy Bandwidth Using NUMA aware allocation");
  set_description("This test measures bandwidth to/from Host from/to GPU "
      "using hsa_amd_memory_async_copy() to copy buffers of various length "
      "from memory pool to another. Host memory is allocated using NUMA "
      "aware allocators. Bandwidth performance using NUMA should, at worst, "
      "be as good as using the standard hsa allocator.");

  do_p2p_ = false;
}

MemoryAsyncCopyNUMA::~MemoryAsyncCopyNUMA(void) {
}

void MemoryAsyncCopyNUMA::Run(void) {
  int ret;
  TestBase::Run();

  hwloc_bitmap_t cpu_bind_set = nullptr;
  char *a;

  // Bind CPU
  cpu_bind_set = hwloc_bitmap_alloc();

  hwloc_cpuset_from_nodeset(topology_, cpu_bind_set, cpu_hwl_numa_nodeset_);

  ASSERT_FALSE((bool)hwloc_bitmap_iszero(cpu_bind_set));

  if (hwloc_bitmap_isfull(cpu_bind_set)) {
    std::cout <<
     "All cpus associated with NUMA node. No hwloc cpu binding will be done."
                                                                 << std::endl;
  } else {
    hwloc_bitmap_t cpu_bind_set_chk = nullptr;
    cpu_bind_set_chk = hwloc_bitmap_alloc();

    hwloc_bitmap_singlify(cpu_bind_set);
    ret = hwloc_set_cpubind(topology_, cpu_bind_set, HWLOC_CPUBIND_PROCESS);
    ASSERT_TRUE(ret == 0 &&
          "hwloc: cpubind not supported or cannot be enforced. Check errno.");

    hwloc_get_cpubind(topology_, cpu_bind_set_chk, 0);

    if (verbosity() >= VERBOSE_STANDARD) {
      hwloc_bitmap_asprintf(&a, cpu_bind_set);
      printf("write hwloc cpubind mask: %s\n", a);
      hwloc_bitmap_asprintf(&a, cpu_bind_set_chk);
      printf("read hwloc cpubind mask: %s\n", a);
    }
    ASSERT_TRUE(hwloc_bitmap_isequal(cpu_bind_set, cpu_bind_set_chk) &&
                                              "Unexpected hwloc cpubind set");
    hwloc_bitmap_free(cpu_bind_set_chk);

    // Bind Memory
    ret = hwloc_set_membind_nodeset(topology_, cpu_hwl_numa_nodeset_,
                                     HWLOC_MEMBIND_BIND, 0);
    ASSERT_TRUE(ret == 0 &&
          "hwloc: membind not supported or cannot be enforced. Check errno.");
  }
  for (Transaction t : tran_) {
    RunBenchmarkWithVerification(&t);
  }

  hwloc_bitmap_free(cpu_bind_set);
}

void MemoryAsyncCopyNUMA::RunBenchmarkWithVerification(Transaction *t) {
  hsa_status_t err;
  void* ptr_src;
  void* ptr_dst;

  size_t size = t->max_size * 1024;

  hsa_amd_memory_pool_t src_pool =  pool_info_[t->src]->pool_;
  hsa_agent_t dst_agent = pool_info_[t->dst]->owner_agent_info()->agent();
  hsa_amd_memory_pool_t dst_pool = pool_info_[t->dst]->pool_;

  hsa_agent_t src_agent = pool_info_[t->src]->owner_agent_info()->agent();

  PrintTransactionType(t);

  // Allocate resources...
  void *locked_mem;

  // We are relying a previous call to hwloc_set_membind_nodeset() to set
  // policy
  void *local_alloc = hwloc_alloc(topology_, size);
  ASSERT_TRUE(local_alloc != nullptr && "hwloc_alloc_membind() failed");
  hsa_agent_t gpu_agent = ((t->type == H2D || t->type == H2DRemote) ?
                                                       dst_agent : src_agent);

  // 1. We should specify the gpu agent here as the cpu already has
  // access to the system memory.
  // 2. The host can only use the pointer assigned from the system mem.
  // alloc. call (e.g., "local_alloc" below). The gpu agent can only use the
  // pointer returned by the lock call (e.g., "locked_mem" below). This is
  // a current (as of August 2017) limitation of KFD.
  err = hsa_amd_memory_lock(local_alloc, size, &gpu_agent, 1, &locked_mem);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (t->type == D2H || t->type == D2HRemote) {
    err = hsa_amd_memory_pool_allocate(src_pool, size, 0, &ptr_src);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    ptr_dst = locked_mem;
  } else if (t->type == H2D || t->type == H2DRemote) {
    err = hsa_amd_memory_pool_allocate(dst_pool, size, 0, &ptr_dst);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    ptr_src = locked_mem;
  } else {
    ASSERT_EQ(t->type, P2P);
    std::cout << "Skipping P2P for NUMA test" << std::endl;
    return;
  }
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  void* host_ptr_src = NULL;
  void* host_ptr_dst = NULL;
  err = hsa_amd_memory_pool_allocate(sys_pool_, size, 0,
                                     reinterpret_cast<void**>(&host_ptr_src));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  err = hsa_amd_memory_pool_allocate(sys_pool_, size, 0,
                                     reinterpret_cast<void**>(&host_ptr_dst));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  hsa_signal_t s;
  err = hsa_signal_create(1, 0, NULL, &s);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // Deallocate resources...
  MAKE_SCOPE_GUARD([&]() {
    // NOTE that the host memory pointer (local_alloc) must be used below
    err = hsa_amd_memory_unlock(local_alloc);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    if (t->type == D2H) {
      err = hsa_amd_memory_pool_free(ptr_src);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);
    } else {
      err = hsa_amd_memory_pool_free(ptr_dst);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);
    }
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    // numa_free(local_alloc, size);
    hwloc_free(topology_, local_alloc, size);
    err = hsa_amd_memory_pool_free(host_ptr_src);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);
    err = hsa_amd_memory_pool_free(host_ptr_dst);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    err = hsa_signal_destroy(s);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);
  });

  hsa_agent_t *cpy_ag = nullptr;
  // **** First copy from the system buffer source to the test source pool
  // Acquire the appropriate access; prefer GPU agent over CPU where there
  // is a choice. We don't need to do this is the test source happens to
  // be the host pool

  err = hsa_amd_memory_fill(host_ptr_src, 1, size/sizeof(uint32_t));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = hsa_amd_memory_fill(host_ptr_dst, 0, size/sizeof(uint32_t));
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (t->type == D2H) {
    cpy_ag = AcquireAsyncCopyAccess(ptr_src, src_pool, &src_agent,
                                        host_ptr_src, sys_pool_, &cpu_agent_);
    if (cpy_ag == nullptr) {
      std::cout << "Agents " << t->src << " and " << t->dst <<
                              "cannot access each other's pool." << std::endl;
      std::cout << "Skipping..." << std::endl;
      return;
    }
    ASSERT_NE(cpy_ag, nullptr);

    err = hsa_amd_memory_async_copy(ptr_src, *cpy_ag, host_ptr_src, *cpy_ag,
                                                            size, 0, NULL, s);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
                                         uint64_t(-1), HSA_WAIT_STATE_ACTIVE))
    {}

    memset(local_alloc, 0, size);
  } else {  // H2D
    cpy_ag = AcquireAsyncCopyAccess(ptr_dst, dst_pool, &dst_agent,
                                        host_ptr_dst, sys_pool_, &cpu_agent_);
    if (cpy_ag == nullptr) {
      std::cout << "Agents " << t->src << " and " << t->dst <<
                              "cannot access each other's pool." << std::endl;
      std::cout << "Skipping..." << std::endl;
      return;
    }
    ASSERT_NE(cpy_ag, nullptr);

    err = hsa_amd_memory_async_copy(ptr_src, *cpy_ag, host_ptr_src, *cpy_ag,
                                                            size, 0, NULL, s);
    ASSERT_EQ(HSA_STATUS_SUCCESS, err);

    while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
                                         uint64_t(-1), HSA_WAIT_STATE_ACTIVE))
    {}

    memset(local_alloc, 1, size);
  }

  int iterations = RealIterationNum();

  // **** Next, copy from the test source pool to the test destination pool
  // Prefer a gpu agent to a cpu agent

  ASSERT_NE(cpy_ag, nullptr);

  cpy_ag = AcquireAsyncCopyAccess(ptr_dst, dst_pool, &dst_agent,
              ptr_src, src_pool, &src_agent);
  if (cpy_ag == nullptr) {
    std::cout << "Agents " << t->src << " and " << t->dst <<
                            "cannot access each other's pool." << std::endl;
    std::cout << "Skipping..." << std::endl;
    return;
  }
  ASSERT_NE(cpy_ag, nullptr);

  for (int i = 0; i < kNumGranularity; i++) {
    if (Size[i] > size) {
      break;
    }

    std::vector<double> time;

    for (int it = 0; it < iterations; it++) {
      if (verbosity() >= VERBOSE_PROGRESS) {
        std::cout << ".";
        std::cout.flush();
      }

      hsa_signal_store_relaxed(t->signal, 1);

      rocrtst::PerfTimer copy_timer;
      int index = copy_timer.CreateTimer();

      copy_timer.StartTimer(index);
      err = hsa_amd_memory_async_copy(ptr_dst, *cpy_ag, ptr_src, *cpy_ag,
                                                 Size[i], 0, NULL, t->signal);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);

      while (hsa_signal_wait_scacquire(t->signal, HSA_SIGNAL_CONDITION_LT, 1,
                                         uint64_t(-1), HSA_WAIT_STATE_ACTIVE))
      {}

      copy_timer.StopTimer(index);

      hsa_signal_store_relaxed(s, 1);

      err = AcquireAccess(dst_agent, sys_pool_, host_ptr_dst);
      ASSERT_EQ(HSA_STATUS_SUCCESS, err);

      if (t->type == D2H) {
        memcpy(host_ptr_dst, local_alloc, size);
      } else {
        err = hsa_amd_memory_async_copy(host_ptr_dst, dst_agent, ptr_dst,
                                                 dst_agent, size, 0, NULL, s);
        ASSERT_EQ(HSA_STATUS_SUCCESS, err);

        while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
                                       uint64_t(-1), HSA_WAIT_STATE_ACTIVE))
          {}
      }

      if (memcmp(host_ptr_src, host_ptr_dst, Size[i])) {
        verified_ = false;
      }
      // Push the result back to vector time
      time.push_back(copy_timer.ReadTimer(index));
    }

    if (verbosity() >= VERBOSE_PROGRESS) {
      std::cout << std::endl;
    }

    // Get Min copy time
    t->min_time->push_back(*std::min_element(time.begin(), time.end()));
    // Get mean copy time and store to the array
    t->benchmark_copy_time->push_back(GetMeanTime(&time));
  }
}

#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/performance/memory_async_copy_numa.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_PERFORMANCE_MEMORY_ASYNC_COPY_NUMA_H_
#define ROCRTST_SUITES_PERFORMANCE_MEMORY_ASYNC_COPY_NUMA_H_

#include <hwloc.h>

#include <vector>
#include <algorithm>

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "suites/test_common/test_base.h"
#include "suites/performance/memory_async_copy.h"

class MemoryAsyncCopyNUMA : public MemoryAsyncCopy {
 public:
  MemoryAsyncCopyNUMA();

  // @Brief: Destructor for test case of MemoryAsyncCopyNUMA
  virtual ~MemoryAsyncCopyNUMA();

  virtual void Run();

 protected:
  // @Brief: Run for Benchmark mode with verification
  virtual void RunBenchmarkWithVerification(Transaction *t);
};

#endif  // ROCRTST_SUITES_PERFORMANCE_MEMORY_ASYNC_COPY_NUMA_H_


================================================
FILE: rocrtst/suites/stress/memory_concurrent_tests.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <string>

#include "suites/stress/memory_concurrent_tests.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/concurrent_utils.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"


static const uint32_t kNumThreads = 1024;
static const uint32_t kMaxAllocSize = 1024 * 1024;


typedef struct control_block {
    hsa_amd_memory_pool_t* pool;
    size_t alloc_size;
    void* alloc_pointer;
} cb_t;


// Callback function which will call upon when need
// to allocate memory from the pool in the thread.
static void CallbackHSAMemoryAllocateFunc(void *data) {
  hsa_status_t err;
  cb_t *cb = static_cast<cb_t*>(data);

  err = hsa_amd_memory_pool_allocate(*(cb->pool),
                               cb->alloc_size, 0,
                               reinterpret_cast<void**>(&(cb->alloc_pointer)));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  return;
}

// Callback function which will call upon when need
// to Free memory from the pool in the thread.
static void CallbackHSAMemoryFreeFunc(void *data) {
  hsa_status_t err;
  cb_t *cb = static_cast<cb_t*>(data);

  err = hsa_memory_free(cb->alloc_pointer);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  return;
}

typedef struct thread_data_get_pool_info_s {
    // The current pool
    hsa_amd_memory_pool_t pool;
    // The pool info retrieved from main thread
    rocrtst::pool_info_t* info;
    // Consistency check result
    int consistency;
} thread_data_get_pool_info_t;

// Callback function which will call upon when need
// to Fetch different info for the pool in the thread.
static void CallbackGetPoolInfo(void* data) {
  hsa_status_t err;

  thread_data_get_pool_info_t* thread_data =
              static_cast<thread_data_get_pool_info_t*>(data);

  rocrtst::pool_info_t info;
  memset(&info, 0, sizeof(rocrtst::pool_info_t));
  err = rocrtst::AcquirePoolInfo(thread_data->pool, &info);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (*(thread_data->info) == info) {
    // The pool info is consistent with the one got from the main thread
    thread_data->consistency = 1;
  } else {
    thread_data->consistency = 0;
  }
}

MemoryConcurrentTest::MemoryConcurrentTest(bool launch_Concurrent_Allocate_,
                      bool launch_Concurrent_Free_ ,
                      bool launch_Concurrent_PoolGetInfo_) :TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.

  std::string name;
  std::string desc;

  name = "RocR Memory Concurrent";
  desc = "These series of tests are Stress tests which contains different subtests ";

  if (launch_Concurrent_Allocate_) {
    name += " Allocate";
    desc += " This test Verify that memory can be concurrently allocated from pool"
            " and thread safety while allocating memory from different threads"
            " on ROCR agents";
  } else if (launch_Concurrent_Free_) {
    name += " Free";
    desc += " This test thet memory Verify can be concurrently freed from pool"
            " and thread safety while memory free from different threads"
            " on ROCR agents";
  } else if (launch_Concurrent_PoolGetInfo_) {
    name += " PoolGetInfo";
    desc += " This test Verify that memory pool info can be concurrently "
            " get from different threads on ROCR agents";
  }
  set_title(name);
  set_description(desc);
}

MemoryConcurrentTest::~MemoryConcurrentTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryConcurrentTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}

void MemoryConcurrentTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void MemoryConcurrentTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void MemoryConcurrentTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void MemoryConcurrentTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


static const char kSubTestSeparator[] = "  **************************";

static void PrintMemorySubtestHeader(const char *header) {
  std::cout << "  *** Memory Stress Subtest: " << header << " ***" << std::endl;
}

static void PrintAgentNameAndType(hsa_agent_t agent) {
  hsa_status_t err;

  char ag_name[64];
  hsa_device_type_t ag_type;

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  std::cout << "  Agent: " << ag_name << " (";
  switch (ag_type) {
    case HSA_DEVICE_TYPE_CPU:
      std::cout << "CPU)";
      break;
    case HSA_DEVICE_TYPE_GPU:
      std::cout << "GPU)";
      break;
    case HSA_DEVICE_TYPE_DSP:
      std::cout << "DSP)";
      break;
    case HSA_DEVICE_TYPE_AIE:
      std::cout << "AIE)";
      break;
    }
  std::cout << std::endl;
  return;
}

// This test verify check  memory can be
// concurrently allocated from pool on ROCR agents
void MemoryConcurrentTest::MemoryConcurrentAllocate(hsa_agent_t agent,
                                               hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
    PrintAgentNameAndType(agent);
  }

  // Determine if allocation is allowed in this memory pool
  bool alloc = false;
  err = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);

  if (alloc) {
    size_t alloc_size;
    size_t total_vram_size;
    hsa_device_type_t ag_type;

    err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                      &total_vram_size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // If VRAM size <= 512MB, it should be APU whose VRAM is carved from system memory
    // and much smaller than dGPU. Change the threshold accordingly.
    if (total_vram_size <= 536870912 && ag_type == HSA_DEVICE_TYPE_GPU) {
      // Make sure do not allocate more than 1/4 of the available vram size
      err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                                &total_vram_size);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
      alloc_size = (total_vram_size*1/4 <= kMaxAllocSize*kNumThreads) ? total_vram_size*1/(4*kNumThreads): kMaxAllocSize;
    } else {
      // Make sure do not allocate more than 3/4 of the vram size
      alloc_size = (total_vram_size*3/4 <= kMaxAllocSize*kNumThreads) ? total_vram_size*3/(4*kNumThreads): kMaxAllocSize;
    }

    // Page align the alloc_size
    alloc_size = alloc_size - (alloc_size & ((1 << 12) - 1));

    // Create a test group
    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);

    // The control blocks are used to pass data to the threads
    uint32_t kk;
    cb_t cb[kNumThreads];
    for (kk = 0; kk < kNumThreads; kk++) {
      cb[kk].pool = &pool;
      cb[kk].alloc_size = alloc_size;
      rocrtst::TestGroupAdd(tg_concurrent, &CallbackHSAMemoryAllocateFunc, &cb[kk], 1);
    }

    // Create threads for each test
    rocrtst::TestGroupThreadCreate(tg_concurrent);

    // Start to run tests
    rocrtst::TestGroupStart(tg_concurrent);

    // Wait all tests finish
    rocrtst::TestGroupWait(tg_concurrent);

    // Exit all tests
    rocrtst::TestGroupExit(tg_concurrent);

    // Destroy thread group and cleanup resources
    rocrtst::TestGroupDestroy(tg_concurrent);

    // Check for overlapping addresses
    char *addr1, *addr2;
    for (kk = 0; kk < kNumThreads; ++kk) {
      addr1 = reinterpret_cast<char *>(cb[kk].alloc_pointer);
      addr2 = addr1+alloc_size;
      ASSERT_NE(reinterpret_cast<void *>(addr1), nullptr);
      uint32_t ll;
      for (ll = kk+1; ll < kNumThreads; ++ll) {
        if (addr1 < reinterpret_cast<char *>(cb[ll].alloc_pointer)) {
          ASSERT_LE(addr2, reinterpret_cast<char *>(cb[ll].alloc_pointer));
        }
        if (addr2 > reinterpret_cast<char *>(cb[ll].alloc_pointer)+alloc_size) {
          ASSERT_GE(addr1, reinterpret_cast<char *>(cb[ll].alloc_pointer)+alloc_size);
        }
      }
    }

    for (uint32_t ii = 0; ii < kNumThreads; ii++) {
      err = hsa_memory_free(cb[ii].alloc_pointer);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
    }
  }
  return;
}


// This test verify check  memory can be
// concurrently allocated from pool on ROCR agents
void MemoryConcurrentTest::MemoryConcurrentFree(hsa_agent_t agent,
                                                hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
    PrintAgentNameAndType(agent);
  }

  // Determine if allocation is allowed in this pool
  bool alloc = false;
  err = hsa_amd_memory_pool_get_info(pool,
                   HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  if (alloc) {
    // Get the maximum allocation size
    size_t alloc_size;
    size_t total_vram_size;
    hsa_device_type_t ag_type;

    err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
                                      &total_vram_size);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
    ASSERT_EQ(err, HSA_STATUS_SUCCESS);

    // If VRAM size <= 512MB, it should be APU whose VRAM is carved from system memory
    // and much smaller than dGPU. Change the threshold accordingly.
    if (total_vram_size <= 536870912 && ag_type == HSA_DEVICE_TYPE_GPU) {
      // Make sure do not allocate more than 1/4 of the available vram size
      err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_AVAIL,
                                &total_vram_size);
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);
      alloc_size = (total_vram_size*1/4 <= kMaxAllocSize*kNumThreads) ? total_vram_size*1/(4*kNumThreads): kMaxAllocSize;
    } else {
      // Make sure do not allocate more than 3/4 of the vram size
      alloc_size = (total_vram_size*3/4 <= kMaxAllocSize*kNumThreads) ? total_vram_size*3/(4*kNumThreads): kMaxAllocSize;
    }

    // Page align the alloc_size
    alloc_size = alloc_size - (alloc_size & ((1 << 12) - 1));

    // Create a test group
    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);

    // The control blocks are used to pass data to the threads
    uint32_t kk;
    cb_t cb[kNumThreads];
    for (kk = 0; kk < kNumThreads; kk++) {
      cb[kk].pool = &pool;
      cb[kk].alloc_size = alloc_size;
      err = hsa_amd_memory_pool_allocate(*(cb[kk].pool), cb[kk].alloc_size, 0, &(cb[kk].alloc_pointer));
      ASSERT_EQ(err, HSA_STATUS_SUCCESS);

      rocrtst::TestGroupAdd(tg_concurrent, &CallbackHSAMemoryFreeFunc, &cb[kk], 1);
    }

    // Create threads for each test
    rocrtst::TestGroupThreadCreate(tg_concurrent);

    // Start to run tests
    rocrtst::TestGroupStart(tg_concurrent);

    // Wait all tests finish
    rocrtst::TestGroupWait(tg_concurrent);

    // Exit all tests
    rocrtst::TestGroupExit(tg_concurrent);

    // Destroy thread group and cleanup resources
    rocrtst::TestGroupDestroy(tg_concurrent);
  }
  return;
}


// This test verify if each Agent pool's attribute information
// is consistent across multiple thread.
void MemoryConcurrentTest::MemoryConcurrentPoolGetInfo(hsa_agent_t agent,
                                                hsa_amd_memory_pool_t pool) {
  hsa_status_t err;

  rocrtst::pool_info_t pool_i;
  err = rocrtst::AcquirePoolInfo(pool, &pool_i);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  if (verbosity() > 0) {
    PrintAgentNameAndType(agent);
  }


  uint32_t kk;
  thread_data_get_pool_info_t thread_data[kNumThreads];

  // Create a test group
  rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);

  for (kk = 0; kk < kNumThreads; kk++) {
    thread_data[kk].pool = pool;
    thread_data[kk].info = &pool_i;
    thread_data[kk].consistency = 0;
    rocrtst::TestGroupAdd(tg_concurrent, &CallbackGetPoolInfo, thread_data + kk, 1);
  }

  // Create threads for each test
  rocrtst::TestGroupThreadCreate(tg_concurrent);

  // Start to run tests
  rocrtst::TestGroupStart(tg_concurrent);

  // Wait all tests finish
  rocrtst::TestGroupWait(tg_concurrent);

  // Exit all tests
  rocrtst::TestGroupExit(tg_concurrent);

  // Destroy thread group and cleanup resources
  rocrtst::TestGroupDestroy(tg_concurrent);

  // Verify pool info is consistent among all threads
  for (kk = 0; kk < kNumThreads; kk++) {
    ASSERT_EQ(thread_data[kk].consistency, 1);
  }
  return;
}


void MemoryConcurrentTest::MemoryConcurrentAllocate(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("MemoryConcurrentAllocate in Stress Test");
  }
  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      if (verbosity() > 0) {
        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      }
      MemoryConcurrentAllocate(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryConcurrentTest::MemoryConcurrentFree(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("MemoryConcurrentFree in Stress Test");
  }

  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      if (verbosity() > 0) {
        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      }
      MemoryConcurrentFree(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void MemoryConcurrentTest::MemoryConcurrentPoolGetInfo(void) {
  hsa_status_t err;
  std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;

  if (verbosity() > 0) {
    PrintMemorySubtestHeader("MemoryConcurrentPoolGetInfo in Stress Test");
  }
  err = rocrtst::GetAgentPools(&agent_pools);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  auto pool_idx = 0;
  for (auto a : agent_pools) {
    for (auto p : a->pools) {
      if (verbosity() > 0) {
        std::cout << "  Pool " << pool_idx++ << ":" << std::endl;
      }
      MemoryConcurrentPoolGetInfo(a->agent, p);
    }
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}


================================================
FILE: rocrtst/suites/stress/memory_concurrent_tests.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_
#define ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"


class MemoryConcurrentTest : public TestBase {
 public:
    MemoryConcurrentTest(bool launch_Concurrent_Allocate_,
                         bool launch_Concurrent_Free_ ,
                         bool launch_Concurrent_PoolGetInfo_);

  // @Brief: Destructor for test case of MemoryTest
  virtual ~MemoryConcurrentTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);


  // @Brief: This test verify check  memory can be
  // concurrently allocated from pool on ROCR agents
  void MemoryConcurrentAllocate(void);

  // @Brief: This test verify check  memory can be
  // concurrently freed from pool on ROCR agents
  void MemoryConcurrentFree(void);

  // @Brief: This test verify if each Agent pool's attribute information
  // is consistent across multiple thread.
  void MemoryConcurrentPoolGetInfo(void);

 private:
  void MemoryConcurrentAllocate(hsa_agent_t agent,
                             hsa_amd_memory_pool_t pool);
  void MemoryConcurrentFree(hsa_agent_t agent,
                             hsa_amd_memory_pool_t pool);
  void MemoryConcurrentPoolGetInfo(hsa_agent_t agent,
                             hsa_amd_memory_pool_t pool);

  // @Brief: Indicate if launch concurrent allocate test
  bool launch_Concurrent_Allocate_;

  // @Brief: Indicate if launch concurrent Free test
  bool launch_Concurrent_Free_;

  // @Brief: Indicate if launch concurrent pool get info test
  bool launch_Concurrent_PoolGetInfo_;
};

#endif  // ROCRTST_SUITES_STRESS_MEMORY_CONCURRENT_TESTS_H_


================================================
FILE: rocrtst/suites/stress/queue_write_index_concurrent_tests.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <string>

#include "suites/stress/queue_write_index_concurrent_tests.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/concurrent_utils.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

enum memoryOrdering {
  SCACQ_SCREL,
  SCACQUIRE,
  RELAXED,
  SCRELEASE,
  MEM_ORDERING_END};

static const uint32_t kNumThreadsForAdd = 10;

static const uint32_t kNumOfAddAtomic = 1*1024*1024;

typedef struct write_index_add_atomic_data_s {
    hsa_queue_t* queue;
    int memory_ordering_type;
} write_index_add_atomic_data_t;


static void thread_proc_write_index_add_atomic(void* data) {
  write_index_add_atomic_data_t* thread_data = reinterpret_cast<write_index_add_atomic_data_t*> (data);
  uint64_t ii;
  for (ii = 0; ii < kNumOfAddAtomic; ++ii) {
    switch (thread_data->memory_ordering_type) {
      case SCACQ_SCREL:
        hsa_queue_add_write_index_scacq_screl(thread_data->queue, 1);
        break;
      case SCACQUIRE:
        hsa_queue_add_write_index_scacquire(thread_data->queue, 1);
        break;
      case RELAXED:
        hsa_queue_add_write_index_relaxed(thread_data->queue, 1);
        break;
      case SCRELEASE:
        hsa_queue_add_write_index_screlease(thread_data->queue, 1);
        break;
      default:
        break;
    }
  }
}

static const uint32_t kNumThreadsForCas = 4;
static const uint32_t kNumOfCasAtomic = 1*1024*1024;
typedef struct write_index_cas_thread_data_s {
    hsa_queue_t* queue;
    int thread_index;
    int num_threads;
    uint64_t termination_value;
    int memory_ordering_type;
} write_index_cas_thread_data_t;

static void thread_proc_write_index_cas_atomic(void* data) {
  write_index_cas_thread_data_t* thread_data = reinterpret_cast<write_index_cas_thread_data_t*>(data);

  uint64_t ii;
  for (ii = thread_data->thread_index; ii < thread_data->termination_value; ii += thread_data->num_threads) {
    switch (thread_data->memory_ordering_type) {
      case SCACQ_SCREL:
        while ((uint64_t)ii !=
          hsa_queue_cas_write_index_scacq_screl(thread_data->queue, ii, ii + 1)) {}
          break;
     case SCACQUIRE:
        while ((uint64_t)ii !=
          hsa_queue_cas_write_index_scacquire(thread_data->queue, ii, ii + 1)) {}
          break;
     case RELAXED:
        while ((uint64_t)ii !=
          hsa_queue_cas_write_index_relaxed(thread_data->queue, ii, ii + 1)) {}
          break;
     case SCRELEASE:
        while ((uint64_t)ii !=
          hsa_queue_cas_write_index_screlease(thread_data->queue, ii, ii + 1)) {}
          break;
        }
    }
}

static const uint32_t kNumOfLoadStoreAtomic = 1*1024*1024;
// Use a 64-bit value to test the atomicity
static uint64_t kStoreValue = UINT64_MAX;

typedef struct write_index_load_atomic_thread_data_s {
  hsa_queue_t* queue;
  uint64_t num_iterations;
  int memory_ordering_type;
} write_index_load_atomic_thread_data_t;

typedef struct write_index_store_atomic_thread_data_s {
  hsa_queue_t* queue;
  uint64_t kStoreValue;
  uint64_t num_iterations;
  int memory_ordering_type;
} write_index_store_atomic_thread_data_t;

static uint64_t const WRITE_INDEX_FAILURE = 2;
void thread_proc_write_index_load_atomic(void* data) {
  write_index_load_atomic_thread_data_t* thread_data =
              reinterpret_cast<write_index_load_atomic_thread_data_t*>(data);
  uint32_t ii;
  for (ii = 0; ii < thread_data->num_iterations; ++ii) {
    uint64_t write_index = WRITE_INDEX_FAILURE;  // initalized with value other than kStoreValue
    if (SCRELEASE == thread_data->memory_ordering_type) {
      write_index = hsa_queue_load_write_index_scacquire(thread_data->queue);
    } else if (RELAXED == thread_data->memory_ordering_type) {
      write_index = hsa_queue_load_write_index_relaxed(thread_data->queue);
    }
    // The only two possible values
    EXPECT_TRUE(0 == write_index || kStoreValue == write_index);
  }
}

void thread_proc_write_index_store_atomic(void* data) {
  write_index_store_atomic_thread_data_t* thread_data =
              reinterpret_cast<write_index_store_atomic_thread_data_t*>(data);
  uint32_t ii;
  for (ii = 0; ii < thread_data->num_iterations; ++ii) {
    if (SCRELEASE == thread_data->memory_ordering_type) {
      hsa_queue_store_write_index_screlease(thread_data->queue, thread_data->kStoreValue);
    } else if (RELAXED == thread_data->memory_ordering_type) {
      hsa_queue_store_write_index_relaxed(thread_data->queue, thread_data->kStoreValue);
    }
  }
}


QueueWriteIndexConcurrentTest::QueueWriteIndexConcurrentTest(bool launch_Concurrent_AddWriteIndex,
                      bool launch_Concurrent_CasWriteIndex ,
                      bool launch_Concurrent_LoadStoreWriteIndex) :TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.

  std::string name;
  std::string desc;

  name = "RocR Queue write Index Tests";
  desc = "These series of tests are Stress tests which contains different subtests ";

  if (launch_Concurrent_AddWriteIndex) {
    name += " AddWriteIndex";
    desc += " This test Verifies that the hsa_queue_write_index_add operations is atomic"
            " and 'torn' adds do not occur when this API is executed concurrently.";
  } else if (launch_Concurrent_CasWriteIndex) {
    name += " CasWriteIndex";
    desc += " This test Verifies that the hsa_queue_cas_write_index operations is atomic,"
            " and 'torn' compare and swaps do not occur when this API is executed"
            " concurrently.";
  } else if (launch_Concurrent_LoadStoreWriteIndex) {
    name += " LoadStoreWriteIndex";
    desc += " This test Verifies that the hsa_queue_write_index_load and store operations"
            " are atomic, and 'torn' loads or stores do not occur when these APIs are executed"
            " concurrently.";
  }
  set_title(name);
  set_description(desc);
}

QueueWriteIndexConcurrentTest::~QueueWriteIndexConcurrentTest(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void QueueWriteIndexConcurrentTest::SetUp(void) {
  hsa_status_t err;

  TestBase::SetUp();

  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  return;
}

void QueueWriteIndexConcurrentTest::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();
}

void QueueWriteIndexConcurrentTest::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void QueueWriteIndexConcurrentTest::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  return;
}

void QueueWriteIndexConcurrentTest::Close() {
  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


static const char kSubTestSeparator[] = "  **************************";

static void PrintDebugSubtestHeader(const char *header) {
  std::cout << "  *** QueueWriteIndexConcurrent Subtest: " << header << " ***" << std::endl;
}


// This test verify check  memory can be
// concurrently allocated from pool on ROCR agents
void QueueWriteIndexConcurrentTest::QueueAddWriteIndexAtomic(hsa_agent_t cpuAgent,
                                    hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // check if the gpuAgent supports kernel dispatch
  uint32_t features = 0;
  err = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_FEATURE, &features);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
    return;
  }


  // Get max number of queues
  uint32_t queue_size;
  err = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Create a queue
  hsa_queue_t* queue;
  err = hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  int memory_ordering_type;
  for (memory_ordering_type = SCACQ_SCREL; memory_ordering_type < MEM_ORDERING_END; ++memory_ordering_type) {
    // Thread data
    write_index_add_atomic_data_t thread_data;
    thread_data.queue = queue;
    thread_data.memory_ordering_type = memory_ordering_type;

    // Create a test group
    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreadsForAdd);

    uint32_t kk;
    for (kk = 0; kk < kNumThreadsForAdd; kk++) {
      rocrtst::TestGroupAdd(tg_concurrent, &thread_proc_write_index_add_atomic, &thread_data, 1);
    }

    // Create threads for each test
    rocrtst::TestGroupThreadCreate(tg_concurrent);

    // Start to run tests
    rocrtst::TestGroupStart(tg_concurrent);

    // Wait all tests finish
    rocrtst::TestGroupWait(tg_concurrent);

    // Exit all tests
    rocrtst::TestGroupExit(tg_concurrent);

    // Destroy thread group and cleanup resources
    rocrtst::TestGroupDestroy(tg_concurrent);

    // Verify the write_index
    uint64_t write_index = hsa_queue_load_write_index_relaxed(queue);
    uint64_t expected = (uint64_t)(kNumOfAddAtomic * kNumThreadsForAdd);
    ASSERT_EQ(write_index, expected);

    // Restore the write_index of the queue
    hsa_queue_store_write_index_screlease(queue, 0);
  }

  // Destroy queue
  err = hsa_queue_destroy(queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}


// This test verify check  memory can be
// concurrently allocated from pool on ROCR agents
void QueueWriteIndexConcurrentTest::QueueCasWriteIndexAtomic(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // check if the gpuAgent supports kernel dispatch
  uint32_t features = 0;
  err = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_FEATURE, &features);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
    return;
  }


  // Get max number of queues
  uint32_t queue_size;
  err = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Create a queue
  hsa_queue_t* queue;
  err = hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  int memory_ordering_type;
  for (memory_ordering_type = SCACQ_SCREL; memory_ordering_type < MEM_ORDERING_END; ++memory_ordering_type) {
    // Thread data
    write_index_cas_thread_data_t thread_data[kNumThreadsForCas];

    // Create a test group
    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreadsForCas);

    uint32_t kk;
    for (kk = 0; kk < kNumThreadsForCas; ++kk) {
      thread_data[kk].queue = queue;
      thread_data[kk].thread_index = kk;
      thread_data[kk].num_threads = kNumThreadsForCas;
      thread_data[kk].memory_ordering_type = memory_ordering_type;
      thread_data[kk].termination_value = kNumOfCasAtomic;
      rocrtst::TestGroupAdd(tg_concurrent, &thread_proc_write_index_cas_atomic, thread_data + kk, 1);
    }

    // Create threads for each test
    rocrtst::TestGroupThreadCreate(tg_concurrent);

    // Start to run tests
    rocrtst::TestGroupStart(tg_concurrent);

    // Wait all tests finish
    rocrtst::TestGroupWait(tg_concurrent);

    // Exit all tests
    rocrtst::TestGroupExit(tg_concurrent);

    // Destroy thread group and cleanup resources
    rocrtst::TestGroupDestroy(tg_concurrent);

    // Verify the write_index
    uint64_t write_index = hsa_queue_load_write_index_relaxed(queue);
    uint64_t expected = (uint64_t)(kNumOfCasAtomic);
    ASSERT_EQ(write_index, expected);

    // Restore the write_index of the queue
    hsa_queue_store_write_index_screlease(queue, 0);
  }

  // Destroy queue
  err = hsa_queue_destroy(queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}


// This test verify if each Agent pool's attribute information
// is consistent across multiple thread.
void QueueWriteIndexConcurrentTest::QueueLoadStoreWriteIndexAtomic(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent) {
  hsa_status_t err;

  // check if the gpuAgent supports kernel dispatch
  uint32_t features = 0;
  err = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_FEATURE, &features);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
  if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
    return;
  }


  // Get max number of queues
  uint32_t queue_size;
  err = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Create a queue
  hsa_queue_t* queue;
  err = hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Use a 64-bit value to test the atomicity
  kStoreValue = UINT64_MAX;

  int memory_ordering_type;
  for (memory_ordering_type = RELAXED; memory_ordering_type < MEM_ORDERING_END; ++memory_ordering_type) {
    // Thread data
    write_index_load_atomic_thread_data_t  load_thread_data[2];
    write_index_store_atomic_thread_data_t store_thread_data[2];
    load_thread_data[0].queue = queue;
    load_thread_data[0].num_iterations = kNumOfLoadStoreAtomic;
    load_thread_data[0].memory_ordering_type = memory_ordering_type;
    load_thread_data[1].queue = queue;
    load_thread_data[1].num_iterations = kNumOfLoadStoreAtomic;
    load_thread_data[1].memory_ordering_type = memory_ordering_type;

    store_thread_data[0].queue = queue;
    store_thread_data[0].kStoreValue = 0;
    store_thread_data[0].num_iterations = kNumOfLoadStoreAtomic;
    store_thread_data[0].memory_ordering_type = memory_ordering_type;
    store_thread_data[1].queue = queue;
    store_thread_data[1].kStoreValue = kStoreValue;
    store_thread_data[1].num_iterations = kNumOfLoadStoreAtomic;
    store_thread_data[1].memory_ordering_type = memory_ordering_type;
    // Create a test group
    rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(4);
    rocrtst::TestGroupAdd(tg_concurrent, &thread_proc_write_index_load_atomic, load_thread_data, 1);
    rocrtst::TestGroupAdd(tg_concurrent, &thread_proc_write_index_load_atomic, load_thread_data  + 1, 1);
    rocrtst::TestGroupAdd(tg_concurrent,  &thread_proc_write_index_store_atomic, store_thread_data, 1);
    rocrtst::TestGroupAdd(tg_concurrent, &thread_proc_write_index_store_atomic, store_thread_data + 1, 1);


    // Create threads for each test
    rocrtst::TestGroupThreadCreate(tg_concurrent);

    // Start to run tests
    rocrtst::TestGroupStart(tg_concurrent);

    // Wait all tests finish
    rocrtst::TestGroupWait(tg_concurrent);

    // Exit all tests
    rocrtst::TestGroupExit(tg_concurrent);

    // Destroy thread group and cleanup resources
    rocrtst::TestGroupDestroy(tg_concurrent);
  }

  // Destroy queue
  err = hsa_queue_destroy(queue);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}


void QueueWriteIndexConcurrentTest::QueueAddWriteIndexAtomic(void) {
  hsa_status_t err;

  if (verbosity() > 0) {
    PrintDebugSubtestHeader("QueueAddWriteIndexAtomic");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueAddWriteIndexAtomic(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void QueueWriteIndexConcurrentTest::QueueCasWriteIndexAtomic(void) {
  hsa_status_t err;

  if (verbosity() > 0) {
    PrintDebugSubtestHeader("QueueCasWriteIndexAtomic");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueCasWriteIndexAtomic(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}

void QueueWriteIndexConcurrentTest::QueueLoadStoreWriteIndexAtomic(void) {
  hsa_status_t err;

  if (verbosity() > 0) {
    PrintDebugSubtestHeader("QueueLoadStoreWriteIndexAtomic");
  }

  // find all cpu agents
  std::vector<hsa_agent_t> cpus;
  err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // find all gpu agents
  std::vector<hsa_agent_t> gpus;
  err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  for (unsigned int i = 0 ; i< gpus.size(); ++i) {
    QueueLoadStoreWriteIndexAtomic(cpus[0], gpus[i]);
  }

  if (verbosity() > 0) {
    std::cout << "subtest Passed" << std::endl;
    std::cout << kSubTestSeparator << std::endl;
  }
}


================================================
FILE: rocrtst/suites/stress/queue_write_index_concurrent_tests.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_STRESS_QUEUE_WRITE_INDEX_CONCURRENT_TESTS_H_
#define ROCRTST_SUITES_STRESS_QUEUE_WRITE_INDEX_CONCURRENT_TESTS_H_


#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"


class QueueWriteIndexConcurrentTest : public TestBase {
 public:
    QueueWriteIndexConcurrentTest(bool launch_Concurrent_AddWriteIndex,
                         bool launch_Concurrent_CasWriteIndex ,
                         bool launch_Concurrent_LoadStoreWriteIndex);

  // @Brief: Destructor for test case of MemoryTest
  virtual ~QueueWriteIndexConcurrentTest();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);


  // @Brief: Verifies that the hsa_queue_write_index_add operations is atomic,
  // and 'torn' adds do not occur when this API is executed concurrently.
  void QueueAddWriteIndexAtomic(void);

  // @Brief:  Verifies that the hsa_queue_cas_write_index operations is atomic,
  // and 'torn' compare and swaps do not occur when this API is executed
  // concurrently.
  void QueueCasWriteIndexAtomic(void);

  // @Brief: Verifies that the hsa_queue_write_index_load and store operations
  // are atomic, and 'torn' loads or stores do not occur when these APIs are executed
  // concurrently.
  void QueueLoadStoreWriteIndexAtomic(void);

 private:
  void QueueAddWriteIndexAtomic(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
  void QueueCasWriteIndexAtomic(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
  void QueueLoadStoreWriteIndexAtomic(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
};

#endif  // ROCRTST_SUITES_STRESS_QUEUE_WRITE_INDEX_CONCURRENT_TESTS_H_


================================================
FILE: rocrtst/suites/test_common/CMakeLists.txt
================================================
#
#   Required Defines on cmake command line
#
#   1) Set location of ROCR header files
#
#      ROCM_DIR="Root for RocM install"
#
#   2) Set ROCRTST_BLD_TYPE to either "Debug" or "Release".
#      If not set, the default value is "Debug" is bound.
#
#      ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
#
#   3) Set ROCRTST_BLD_BITS to either "32" or "64"
#      If not set, the default value of "64" is bound.
#
#       ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
#
#   4) Set TARGET_DEVICES to indicate gpu types for kernel
#      builds (e.g., "gfx803;gfx900; ...")
#
#   Building rocrtst Suite
#
#
#   1) Create build folder e.g. "rocrtst/build" - any name will do
#   2) Cd into build folder
#   3) Run "cmake .."
#   4) Run "make"
#

cmake_minimum_required(VERSION 3.5.0)

# Set Name for Samples Project
#

set(PROJECT_NAME "rocrtst64")
project (${PROJECT_NAME})

if ( NOT DEFINED BUILD_SHARED_LIBS )
  set ( BUILD_SHARED_LIBS ON )
endif()

# For DEB/RPM generation
if(BUILD_SHARED_LIBS)
  set ( CPACK_PACKAGE_NAME "rocrtst" )
else()
  set ( CPACK_RPM_PACKAGE_NAME "rocrtst-static" )
  set ( CPACK_DEBIAN_PACKAGE_NAME "rocrtst-static" )
endif()

set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION "This package includes rocrtst and a convenience script to run the test suite" )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "Test suite for ROCr" )
set ( CPACK_PACKAGE_VERSION_MAJOR "1" )
set ( CPACK_PACKAGE_VERSION_MINOR "0" )
set ( CPACK_PACKAGE_VERSION_PATCH "0" )

# Make proper version for appending
# Default Value is 99999, setting it first
set(ROCM_VERSION_FOR_PACKAGE "99999")
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
  set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION})
endif()
set (PACKAGE_VERSION_STR "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}")
set ( CPACK_PACKAGE_VERSION "${PACKAGE_VERSION_STR}")

set ( CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}"  CACHE STRING "Default packaging prefix." )
set ( CPACK_GENERATOR "DEB;RPM"  CACHE STRING "Default packaging generators." )
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/ROCR-Runtime" )
set ( CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/RadeonOpenCompute/ROCR-Runtime" )
set ( CPACK_RPM_PACKAGE_AUTOREQ 0 )
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
## Debian package values
set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" )
if( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
  set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
endif()

if(BUILD_SHARED_LIBS)
  set (CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core,hsa-rocr")
else()
  set (CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core,hsa-rocr-static-dev")
endif()

## RPM package variables
set ( CPACK_RPM_PACKAGE_RELEASE "local" )
if( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
  set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
endif()
## Add os details to rpm package name. For deb packages, its set from build environment
# Modify line 87 to be "if the env. variable DISTRO_NAME IS centos or rhel"
if (DEFINED ENV{DISTRO_NAME} AND
  (ENV{DISTRO_NAME} MATCHES "centos" OR
   ENV{DISTRO_NAME} MATCHES "rhel" OR
   ENV{DISTRO_NAME} MATCHES "sles"))
  execute_process(COMMAND rpm --eval %{?dist}
          RESULT_VARIABLE PROC_RESULT
          OUTPUT_VARIABLE EVAL_RESULT
          OUTPUT_STRIP_TRAILING_WHITESPACE)
  if (PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
    string (APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
  endif()
endif()


if(BUILD_SHARED_LIBS)
  set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core,hsa-rocr")
else()
  set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core,hsa-rocr-static-devel")
endif()

set(DEFAULT_TARGETS "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx805;gfx810"
                    "gfx900;gfx902;gfx904;gfx906;gfx908;gfx909;gfx90a;gfx90c;gfx942;gfx950"
                    "gfx1010;gfx1011;gfx1012;gfx1013;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
                    "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201")

#
# Currently support for Windows platform is not present
#

#############################
# COMMON AREA
#############################
if(WIN32)
  message("rocrtst Suite is not supported on Windows platform")
  return()
endif()

#
# Process input variables
#

# Required Defines first:
find_package(hsa-runtime64 REQUIRED)
find_package(amd_smi REQUIRED)

set (ONLY64STR "64")

if (DEFINED LLVM_DIR)
  set(CLANG ${LLVM_DIR}/clang)
  if (NOT EXISTS ${CLANG})
    message("ERROR: path to clang (${CLANG}) is not valid. Is define LLVM_DIR correct?")
    return()
  endif()
else()
    message("WARNING: LLVM_DIR define is not set. Kernels will not be built.")
endif()

if (DEFINED OPENCL_DIR)
  set(OPENCL_INC_DIR ${OPENCL_DIR}/include)
  set(OPENCL_LIB_DIR ${OPENCL_DIR}/lib)
else()
    message("WARNING: OPENCL_DIR define is not set. Kernels will not be built.")
endif()

if (DEFINED OPENCL_VER)
  set(OPENCL_VER ${OPENCL_VER})
else()
  message("OPENCL_VER define is not set. Using default")
  set(OPENCL_VER "2.0")
endif()

if(NOT EXISTS "${OPENCL_INC_DIR}/opencl-c.h")
  if(DEFINED ENV{LLVM_PROJECT_ROOT})
    set(OPENCL_INC_DIR "$ENV{LLVM_PROJECT_ROOT}/clang/lib/Headers/")
  else()
    set(OPENCL_INC_DIR "${OPENCL_DIR}/../../../external/llvm-project/clang/lib/Headers/")
  endif()
  if(NOT EXISTS "${OPENCL_INC_DIR}/opencl-c.h")
    message(WARNING "opencl-c.h not found.")
  endif()
endif()

if (NOT DEFINED TARGET_DEVICES)
  message("No targets devices provided on command line")
  message("  e.g., cmake -DTARGET_DEVICES=\"gfx803;gfx900;gfx...\" ..")
  message("  Using default target of ${DEFAULT_TARGETS}")
  list(APPEND TARGET_DEVICES ${DEFAULT_TARGETS})
endif()

string(TOLOWER "${ROCRTST_BLD_TYPE}" tmp)
if("${tmp}" STREQUAL release)
  set(BUILD_TYPE "Release")
  set(ISDEBUG 0)
else()
  set(BUILD_TYPE "Debug")
  set(ISDEBUG 1)
endif()

find_path(BITCODE_DIR NAMES "opencl.bc" "opencl.amdgcn.bc"
  PATHS
    "${ROCM_DIR}/amdgcn/bitcode"
    "${ROCM_DIR}/lib/bitcode"
    "${ROCM_DIR}/lib"
    "${ROCM_DIR}/lib/x86_64/bitcode"
    "${OPENCL_DIR}/amdgcn/bitcode"
    "${OPENCL_DIR}/lib/x86_64/bitcode"
    "${LLVM_DIR}/../lib/bitcode"
    "${CMAKE_PREFIX_PATH}/amdgcn/bitcode"
    "${CMAKE_PREFIX_PATH}/lib/bitcode"
    "${CMAKE_PREFIX_PATH}/lib/x86_64/bitcode")

#
# Print out the build configuration being used:
#
#   Build Src directory
#   Build Binary directory
#   Build Type: Debug Vs Release, 32 Vs 64
#   Compiler Version, etc
#
message("")
message("Build Configuration:")
message("-------------IS64BIT: " ${IS64BIT})
message("-----------BuildType: " ${BUILD_TYPE})
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("------Target Devices: ${TARGET_DEVICES}")
message("----------Clang path: " ${CLANG})
message("----------OpenCL Dir: " ${OPENCL_DIR})
message("-------OpenCL version " ${OPENCL_VER})
message("")

set(KERNELS_DIR ${PROJECT_SOURCE_DIR}/kernels)
#
# Set the build type based on user input
#
set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
#
# Compiler pre-processor definitions.
#
# Define MACRO "DEBUG" if build type is "Debug"
if(${BUILD_TYPE} STREQUAL "Debug")
add_definitions(-DDEBUG)
endif()

if(${EMULATOR_BUILD})
add_definitions(-DROCRTST_EMULATOR_BUILD=1)
endif()


#add_definitions(-D__linux__)
add_definitions(-DLITTLEENDIAN_CPU=1)

#
# Linux Compiler options
#
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")

#
# Add compiler flags to include symbol information for debug builds
#
if(ISDEBUG)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0")
endif()
MESSAGE("ISDEBUG STEP:Done")

set(ROCRTST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)

# Set Name for Google Test Framework and build it as a
# static library to be linked with user test programs
#
set(GOOGLE_TEST_FRWK_NAME "google-test-frwk${ONLY64STR}")
add_subdirectory(${ROCRTST_ROOT}/gtest "${PROJECT_BINARY_DIR}/gtest" EXCLUDE_FROM_ALL)
set (ROCRTST_LIBS ${ROCRTST_LIBS} ${GOOGLE_TEST_FRWK_NAME})

MESSAGE("ROCRTST_LIBS SET STEP:Done")
#
#
# Other source directories
aux_source_directory(${ROCRTST_ROOT}/common common_srcs)

#
# Extend the list of libraries to be used for linking ROC Perf Apps
#
set(ROCRTST_LIBS ${ROCRTST_LIBS} hsa-runtime64::hsa-runtime64)
set(ROCRTST_LIBS ${ROCRTST_LIBS} amd_smi)


# Set Name for rocrtst
MESSAGE(${ROCRTST_LIBS})
set(ROCRTST "rocrtst${ONLY64STR}")

#
# Source files for building rocrtst
#
aux_source_directory(${ROCRTST_ROOT}/suites/performance performanceSources)
aux_source_directory(${ROCRTST_ROOT}/suites/functional functionalSources)
aux_source_directory(${ROCRTST_ROOT}/suites/negative negativeSources)
aux_source_directory(${ROCRTST_ROOT}/suites/stress stressSources)
aux_source_directory(${ROCRTST_ROOT}/suites/test_common testCommonSources)

# Header file include path

include_directories(${ROCRTST_ROOT})
include_directories(${ROCRTST_ROOT}/gtest/include)
include_directories(${ROCRTST_ROOT}/thirdparty/include/)

# Custom command set for code objects.
set (HSACO_TARG_LIST "")

# Use this function to build any samples that have kernels to be built
function(build_kernel S_NAME TARG_DEV)
  set(KERNEL_DIR ${PROJECT_BINARY_DIR}/${TARG_DEV})
  set(SNAME_KERNEL "${S_NAME}_kernels.hsaco")

  set(TARG_NAME "${S_NAME}_hsaco.${TARG_DEV}")
  set(HSACO_TARG_LIST ${HSACO_TARG_LIST} "${KERNEL_DIR}/${SNAME_KERNEL}" PARENT_SCOPE)
  string(SUBSTRING ${TARG_DEV} 3 -1 gfxNum)
  separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
   "-D ROCRTST_GPU=0x${gfxNum} -x cl -target amdgcn-amd-amdhsa -include ${OPENCL_INC_DIR}/opencl-c.h -mcpu=${TARG_DEV} ${BITCODE_ARGS} -cl-std=CL${OPENCL_VER} -mcode-object-version=4 ${CL_FILE_LIST} -o ${KERNEL_DIR}/${SNAME_KERNEL}")
  add_custom_command(OUTPUT "${KERNEL_DIR}/${SNAME_KERNEL}" COMMAND ${CLANG} ${CLANG_ARG_LIST} DEPENDS ${CL_FILE_LIST} ${CLANG} COMMENT "BUILDING ${KERNEL_DIR}/${SNAME_KERNEL}" VERBATIM)
endfunction(build_kernel)

function(build_sample_for_devices S_NAME)
  foreach(t ${TARGET_DEVICES})
    build_kernel(${S_NAME} ${t})
  endforeach(t)
  set(HSACO_TARG_LIST ${HSACO_TARG_LIST} PARENT_SCOPE)
endfunction(build_sample_for_devices)

# Make directories for each possible target device
# List of symlinks per directory
set(ROCRTST_LINKS_LIST "")
foreach(td ${TARGET_DEVICES})
  file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/${td})
  add_custom_command(OUTPUT "${PROJECT_BINARY_DIR}/${td}/${ROCRTST}" COMMAND ${CMAKE_COMMAND} -E create_symlink "../${ROCRTST}" "${td}/${ROCRTST}" COMMENT "BUILDING ${td}/${ROCRTST}" VERBATIM)
  set(ROCRTST_LINKS_LIST ${ROCRTST_LINKS_LIST} "${PROJECT_BINARY_DIR}/${td}/${ROCRTST}")
  install ( DIRECTORY ${PROJECT_BINARY_DIR}/${td} DESTINATION bin )
endforeach(td)

######################
# Kernel Build Section
######################
set(KERN_SUFFIX "kernels.hsaco")

# Check if device-libs bitcode is following old or new layout
if(EXISTS "${BITCODE_DIR}/opencl.amdgcn.bc")
  set(BITCODE_ARGS "-nogpulib
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/opencl.amdgcn.bc
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ockl.amdgcn.bc
    -Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ocml.amdgcn.bc")
else()
  set(BITCODE_ARGS "--hip-device-lib-path=${BITCODE_DIR}")
endif()

# Test Case Template example
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/test_case_template_kernels.cl")
build_sample_for_devices("test_case_template")

# P2P Memory Access
#set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
#set(CL_FILE_LIST "${KERNELS_DIR}/p2p_mem_access_kernels.cl")
#build_sample_for_devices("p2p_mem_access")

# Dispatch Time
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/dispatch_time_kernels.cl")
build_sample_for_devices("dispatch_time")

# gpuReadWrite
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/gpuReadWrite_kernels.cl")
build_sample_for_devices("gpuReadWrite")


# Vector Add Debug Trap
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/vector_add_debug_trap_kernel.cl")
build_sample_for_devices("vector_add_debug_trap")

# Vector Add Memory Fault
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/vector_add_memory_fault_kernel.cl")
build_sample_for_devices("vector_add_memory_fault")

# atomic_add_kernels
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/atomicOperations_kernels.cl")
build_sample_for_devices("atomicOperations")

# Signal Operations
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/signal_operations.cl")
build_sample_for_devices("signal_operations")

# groupMemoryDynamic
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/groupMemoryDynamic_kernels.cl")
build_sample_for_devices("groupMemoryDynamic")

# groupMemoryDynamic
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/cu_mask_kernels.cl")
build_sample_for_devices("cu_mask")

set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)

# Build rules
add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} ${stressSources}
                                           ${common_srcs} ${testCommonSources})

target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt numa ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/lib/libhwloc.so.5)

#Build kernels
add_custom_target(rocrtst_kernels ALL DEPENDS ${HSACO_TARG_LIST})

#Build symlinks
add_custom_target(rocrtst_links ALL DEPENDS ${ROCRTST_LINKS_LIST} )

## Set RUNPATH to pickup local copy of hwloc
set_property(TARGET ${ROCRTST} PROPERTY INSTALL_RPATH "$ORIGIN;$ORIGIN/thirdparty/lib;$ORIGIN/../lib/rocrtst/thirdparty/lib" )
set_property(TARGET ${ROCRTST} PROPERTY LINK_FLAGS "-Wl,--enable-new-dtags")

install(TARGETS ${ROCRTST}
        ARCHIVE DESTINATION lib
        LIBRARY DESTINATION lib
        RUNTIME DESTINATION bin)

install ( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/lib DESTINATION lib/rocrtst )

include ( CPack )


================================================
FILE: rocrtst/suites/test_common/kernels/atomicOperations_kernels.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
__kernel void test_atomic_add(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_add(&sysMemory[tid], value);
    atomic_add(&gpuMemory[tid], value);
}

__kernel void test_atomic_sub(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_sub(&sysMemory[tid], value);
    atomic_sub(&gpuMemory[tid], value);
}

__kernel void test_atomic_and(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_and(&sysMemory[tid], value);
    atomic_and(&gpuMemory[tid], value);
}

__kernel void test_atomic_or(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_or(&sysMemory[tid], value);
    atomic_or(&gpuMemory[tid], value);
}

__kernel void test_atomic_xor(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_xor(&sysMemory[tid], value);
    atomic_xor(&gpuMemory[tid], value);
}

__kernel void test_atomic_xchg(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_xchg(&sysMemory[tid], value);
    atomic_xchg(&gpuMemory[tid], value);
}

__kernel void test_atomic_inc(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_inc(&sysMemory[tid]);
    atomic_inc(&sysMemory[tid]);
    atomic_inc(&sysMemory[tid]);
    atomic_inc(&sysMemory[tid]);

    atomic_inc(&gpuMemory[tid]);
    atomic_inc(&gpuMemory[tid]);
    atomic_inc(&gpuMemory[tid]);
    atomic_inc(&gpuMemory[tid]);
}

__kernel void test_atomic_dec(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_dec(&sysMemory[tid]);
    atomic_dec(&sysMemory[tid]);
    atomic_dec(&sysMemory[tid]);
    atomic_dec(&sysMemory[tid]);

    atomic_dec(&gpuMemory[tid]);
    atomic_dec(&gpuMemory[tid]);
    atomic_dec(&gpuMemory[tid]);
    atomic_dec(&gpuMemory[tid]);
}

__kernel void test_atomic_max(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_max(&sysMemory[tid], value);
    atomic_max(&gpuMemory[tid], value);
}

__kernel void test_atomic_min(volatile __global int *sysMemory,
                              volatile __global int *gpuMemory,
                             __global int *oldValues, int value) {
    int  tid = get_global_id(0);
    oldValues[tid] = atomic_min(&sysMemory[tid], value);
    atomic_min(&gpuMemory[tid], value);
}


================================================
FILE: rocrtst/suites/test_common/kernels/cu_mask_kernels.cl
================================================
#define GETREG_IMMED(SIZE, OFFSET, REG) ((SIZE-1)<<11)|(OFFSET<<6)|REG

#if ROCRTST_GPU < 0x1000
  #define HW_ID_CU_ID_OFFSET 8
  #define HW_ID 4
  #if (ROCRTST_GPU == 0x908) || (ROCRTST_GPU == 0x90a) || (ROCRTST_GPU == 0x940)
    #define HW_ID_CU_ID_SIZE 8
  #else
    #define HW_ID_CU_ID_SIZE 7
  #endif
#else
  #define HW_ID_CU_ID_OFFSET 9 //Skips first bit of SIMD ID, could be wrong.
  #define HW_ID 23
  #define HW_ID_CU_ID_SIZE 10
#endif

__kernel void get_hw_id(__global uint* hw_ids) {
  uint idx = get_global_id(0);
  hw_ids[idx] = __builtin_amdgcn_s_getreg(GETREG_IMMED(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID));
}


================================================
FILE: rocrtst/suites/test_common/kernels/dispatch_time_kernels.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

__kernel void
empty_kernel(void) {
  return;
}
 

================================================
FILE: rocrtst/suites/test_common/kernels/gpuReadWrite_kernels.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

__kernel void gpuReadWrite(__global const int * a,
                           __global int * b, __global int * c) {
  int i = get_global_id(0);
  // Reading the system memory and writing to gpu memory
  c[i] = a[i];  // a[i] point to system memory while c[i] to gpu memory.
  //writing to system memory
  b[i] = i;
}


================================================
FILE: rocrtst/suites/test_common/kernels/groupMemoryDynamic_kernels.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2018, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


__kernel void group_memory_dynamic(__global uint * in,
                            __global uint * out,__local uint *grp_offset,__private uint count) {
  __local uint grp[256];
  __private int gid = get_global_id(0);
  if( gid > count)
    return;
  grp_offset[gid] = in[gid];
  out[gid] = grp_offset[gid];
  return;
}


================================================
FILE: rocrtst/suites/test_common/kernels/signal_operations.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */


#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable

__kernel void signal_st_rlx_kernel(__global void* dumy_signal)
{
 
 int  tid = get_global_id(0);
 volatile __global long* p = (volatile __global long* )(dumy_signal);
 atom_xchg(p,0);
}

__kernel void signal_st_rlx_kernel_multi(__global void* dumy_signal)
{
 
 int  tid = get_global_id(0);
 int offset = 8*tid; // handle is of long unsigned int, having size of 8bytes
 volatile __global long* p = (volatile __global long* )(dumy_signal+ offset);
 atom_xchg(p,0);
}

__kernel void signal_wait_kernel(__global void* dumy_signal)
{
 
 int  tid = get_global_id(0);
 volatile __global long* p = (volatile __global long* )(dumy_signal);
 while(!(*p == 0)) {  } // Will be using the volatile type as we dont have atom_cmp() function from Khronos spec

}

__kernel void signal_wait_kernel_multi(__global void* dumy_signal)
{
 
 int  tid = get_global_id(0);
 int offset = 8*tid; // handle is of long unsigned int, having size of 8bytes
 volatile __global long* p = (volatile __global long* )(dumy_signal + offset);
 while(!(*p == 0)) {  } // Will be using the volatile type as we dont have atom_cmp() function from Khronos spec

}


================================================
FILE: rocrtst/suites/test_common/kernels/test_case_template_kernels.cl
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

 __kernel void
square(__global int *dstArray,  __global const int *srcArray, const int sz) {
  unsigned int id = get_global_id(0);
  if (id < sz) {
     dstArray[id] = srcArray[id] * srcArray[id];
  } 
  return;
}
 

================================================
FILE: rocrtst/suites/test_common/kernels/vector_add_debug_trap_kernel.cl
================================================
__kernel void
vector_add_debug_trap(__global int *a,
                      __global int *b,
                      __global int *c)
{
  int gid = get_global_id(0);
  c[gid] = a[gid] + b[gid];
  __builtin_trap();
}


================================================
FILE: rocrtst/suites/test_common/kernels/vector_add_memory_fault_kernel.cl
================================================
static __global int ga[] = { 3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35 };

__kernel void
vector_add_memory_fault(
  __global const int *a,
  __global const int *b,
  __global const int *c,
  __global int *d,
  __global int *e)
{
    int gid = get_global_id(0);
    d[gid*10] = ga[gid & 31];
}


================================================
FILE: rocrtst/suites/test_common/main.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <string>
#include <vector>
#include <memory>

#include "gtest/gtest.h"
#include "suites/functional/agent_props.h"
#include "suites/functional/debug_basic.h"
#include "suites/functional/memory_basic.h"
#include "suites/functional/memory_access.h"
#include "suites/functional/ipc.h"
#include "suites/functional/memory_alignment.h"
#include "suites/functional/memory_atomics.h"
#include "suites/functional/memory_allocation.h"
#include "suites/functional/deallocation_notifier.h"
#include "suites/functional/virtual_memory.h"
#include "suites/performance/dispatch_time.h"
#include "suites/performance/memory_async_copy.h"
#include "suites/performance/memory_async_copy_numa.h"
#include "suites/performance/enqueueLatency.h"
#include "suites/negative/memory_allocate_negative_tests.h"
#include "suites/negative/queue_validation.h"
#include "suites/stress/memory_concurrent_tests.h"
#include "suites/stress/queue_write_index_concurrent_tests.h"
#include "suites/test_common/test_case_template.h"
#include "suites/test_common/main.h"
#include "suites/test_common/test_common.h"
#include "suites/functional/concurrent_init.h"
#include "suites/functional/concurrent_init_shutdown.h"
#include "suites/functional/concurrent_shutdown.h"
#include "suites/functional/reference_count.h"
#include "suites/functional/signal_concurrent.h"
#include "suites/functional/aql_barrier_bit.h"
#include "suites/functional/signal_kernel.h"
#include "suites/functional/cu_masking.h"
#include "amd_smi/amdsmi.h"

static RocrTstGlobals *sRocrtstGlvalues = nullptr;

static void SetFlags(TestBase *test) {
  assert(sRocrtstGlvalues != nullptr);

  test->set_num_iteration(sRocrtstGlvalues->num_iterations);
  test->set_verbosity(sRocrtstGlvalues->verbosity);
  test->set_monitor_verbosity(sRocrtstGlvalues->monitor_verbosity);
}

static void RunCustomTestProlog(TestBase *test) {
  SetFlags(test);

  test->DisplayTestInfo();
  test->SetUp();
  test->Run();
  return;
}
static void RunCustomTestEpilog(TestBase *test) {
  test->DisplayResults();
  test->Close();
  return;
}

// If the test case one big test, you should use RunGenericTest()
// to run the test case. OTOH, if the test case consists of multiple
// functions to be run as separate tests, follow this pattern:
//   * RunCustomTestProlog(test)  // Run() should contain minimal code
//   * <insert call to actual test function within test case>
//   * RunCustomTestEpilog(test)
static void RunGenericTest(TestBase *test) {
  RunCustomTestProlog(test);
  RunCustomTestEpilog(test);
  return;
}

// TEST ENTRY TEMPLATE:
// TEST(rocrtst, Perf_<test name>) {
//  <Test Implementation class> <test_obj>;
//
//  // Copy and modify implementation of RunGenericTest() if you need to deviate
//  // from the standard pattern implemented there.
//  RunGenericTest(&<test_obj>);
// }

TEST(rocrtst, Test_Example) {
  TestExample tst;

  RunGenericTest(&tst);
}

TEST(rocrtstFunc, MemoryAccessTests) {
  MemoryAccessTest mt;
  RunCustomTestProlog(&mt);
  mt.CPUAccessToGPUMemoryTest();
  mt.GPUAccessToCPUMemoryTest();
  RunCustomTestEpilog(&mt);
}

TEST(rocrtstFunc, GroupMemoryAllocationTest) {
  MemoryAllocationTest ma(true, false);
  RunCustomTestProlog(&ma);
  ma.GroupMemoryDynamicAllocation();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, MemoryAllocateAndFreeTest) {
  MemoryAllocationTest ma(false, true);
  RunCustomTestProlog(&ma);
  ma.MemoryBasicAllocationAndFree();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, MemoryAllocateContiguousTest) {
  MemoryAllocationTest ma(false, true);
  RunCustomTestProlog(&ma);
  ma.MemoryAllocateContiguousTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Concurrent_Init_Test) {
  ConcurrentInitTest ci;
  RunCustomTestProlog(&ci);
  ci.TestConcurrentInit();
  RunCustomTestEpilog(&ci);
}

TEST(rocrtstFunc, Concurrent_Init_Shutdown_Test) {
  ConcurrentInitShutdownTest ci;
  RunCustomTestProlog(&ci);
  ci.TestConcurrentInitShutdown();
  RunCustomTestEpilog(&ci);
}
TEST(rocrtstFunc, Concurrent_Shutdown) {
  ConcurrentShutdownTest cs;
  RunCustomTestProlog(&cs);
  cs.TestConcurrentShutdown();
  RunCustomTestEpilog(&cs);
}

TEST(rocrtstFunc, Reference_Count) {
  ReferenceCountTest rc(true, false);
  RunCustomTestProlog(&rc);
  rc.TestReferenceCount();
  RunCustomTestEpilog(&rc);
}

TEST(rocrtstFunc, Max_Reference_Count) {
  ReferenceCountTest rc(false, true);
  RunCustomTestProlog(&rc);
  rc.TestMaxReferenceCount();
  RunCustomTestEpilog(&rc);
}

TEST(rocrtstFunc, Signal_Destroy_Concurrently) {
  SignalConcurrentTest sd(true, false, false, false);
  RunCustomTestProlog(&sd);
  sd.TestSignalDestroyConcurrent();
  RunCustomTestEpilog(&sd);
}

TEST(rocrtstFunc, Signal_Max_Consumer) {
  SignalConcurrentTest sd(false, true, false, false);
  RunCustomTestProlog(&sd);
  sd.TestSignalCreateMaxConsumers();
  RunCustomTestEpilog(&sd);
}

TEST(rocrtstFunc, Signal_Create_Concurrently) {
  SignalConcurrentTest sd(false, false, false, true);
  RunCustomTestProlog(&sd);
  sd.TestSignalCreateConcurrent();
  RunCustomTestEpilog(&sd);
}

/* Temporary: Disable CU Masking until it is fixed */
TEST(rocrtstFunc, DISABLED_CU_Masking) {
  CU_Masking sd;
  RunGenericTest(&sd);
}

#ifndef ROCRTST_EMULATOR_BUILD
TEST(rocrtstFunc, IPC) {
  IPCTest ipc;
  RunGenericTest(&ipc);
}

TEST(rocrtstFunc, DISABLED_Signal_Kernel_Set) {
  SignalKernelTest sk(SET);
  RunCustomTestProlog(&sk);
  sk.TestSignalKernelSet();
  RunCustomTestEpilog(&sk);
}

TEST(rocrtstFunc, DISABLED_Signal_Kernel_Multi_Set) {
  SignalKernelTest sk(MULTISET);
  RunCustomTestProlog(&sk);
  sk.TestSignalKernelMultiSet();
  RunCustomTestEpilog(&sk);
}

TEST(rocrtstFunc, DISABLED_Signal_Kernel_Wait) {
  SignalKernelTest sw(WAIT);
  RunCustomTestProlog(&sw);
  sw.TestSignalKernelWait();
  RunCustomTestEpilog(&sw);
}

TEST(rocrtstFunc, DISABLED_Signal_Kernel_Multi_Wait) {
  SignalKernelTest sw(MULTIWAIT);
  RunCustomTestProlog(&sw);
  sw.TestSignalKernelMultiWait();
  RunCustomTestEpilog(&sw);
}

TEST(rocrtstFunc, DISABLED_Aql_Barrier_Bit_Set) {
  AqlBarrierBitTest ab(true, false);
  RunCustomTestProlog(&ab);
  ab.BarrierBitSet();
  RunCustomTestEpilog(&ab);
}

TEST(rocrtstFunc, DISABLED_Aql_Barrier_Bit_Not_Set) {
  AqlBarrierBitTest ab(false, true);
  RunCustomTestProlog(&ab);
  ab.BarrierBitNotSet();
  RunCustomTestEpilog(&ab);
}

TEST(rocrtstFunc, Memory_Max_Mem) {
  MemoryTest mt;

  RunCustomTestProlog(&mt);
  mt.MaxSingleAllocationTest();
  RunCustomTestEpilog(&mt);
}

TEST(rocrtstFunc, Memory_Available) {
  MemoryTest mt;

  RunCustomTestProlog(&mt);
  mt.MemAvailableTest();
  RunCustomTestEpilog(&mt);
}


TEST(rocrtstFunc, Memory_Atomic_Add_Test) {
  MemoryAtomic ma(ADD);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Sub_Test) {
  MemoryAtomic ma(SUB);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_And_Test) {
  MemoryAtomic ma(AND);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Or_Test) {
  MemoryAtomic ma(OR);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Xor_Test) {
  MemoryAtomic ma(XOR);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Min_Test) {
  MemoryAtomic ma(MIN);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Max_Test) {
  MemoryAtomic ma(MAX);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Inc_Test) {
  MemoryAtomic ma(INC);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Dec_Test) {
  MemoryAtomic ma(DEC);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Memory_Atomic_Xchg_Test) {
  MemoryAtomic ma(XCHG);
  RunCustomTestProlog(&ma);
  ma.MemoryAtomicTest();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, DISABLED_DebugBasicTests) {
  DebugBasicTest mt;
  RunCustomTestProlog(&mt);
  mt.VectorAddDebugTrapTest();
  RunCustomTestEpilog(&mt);
}

TEST(rocrtstFunc, Memory_Alignment_Test) {
  MemoryAlignmentTest ma;
  RunCustomTestProlog(&ma);
  ma.MemoryPoolAlignment();
  RunCustomTestEpilog(&ma);
}

TEST(rocrtstFunc, Deallocation_Notifier_Test) {
  DeallocationNotifierTest notifier;
  RunGenericTest(&notifier);
}

TEST(rocrtstFunc, AgentPropertiesTests) {
  AgentPropTest propTest;
  RunCustomTestProlog(&propTest);
  propTest.QueryAgentUUID();
  propTest.QueryAgentClockCounters();
  RunCustomTestEpilog(&propTest);
}

TEST(rocrtstFunc, VirtMemory_Basic_Test) {
  VirtMemoryTestBasic vmt;

  RunCustomTestProlog(&vmt);
  vmt.TestCreateDestroy();
  vmt.TestRefCount();
  vmt.TestPartialMapping();
  RunCustomTestEpilog(&vmt);
}

TEST(rocrtstFunc, VirtMemory_Access_Test) {
  VirtMemoryTestBasic vmt;

  RunCustomTestProlog(&vmt);
  vmt.CPUAccessToGPUMemoryTest();
  vmt.GPUAccessToCPUMemoryTest();
  vmt.GPUAccessToGPUMemoryTest();
  RunCustomTestEpilog(&vmt);
}

TEST(rocrtstFunc, VirtMemory_Interprocess_Test) {
  VirtMemoryTestInterProcess vmt;
  RunCustomTestProlog(&vmt);
  RunCustomTestEpilog(&vmt);
}

TEST(rocrtstNeg, Memory_Negative_Tests) {
  MemoryAllocateNegativeTest mt;
  RunCustomTestProlog(&mt);
  mt.ZeroMemoryAllocateTest();
  mt.MaxMemoryAllocateTest();

  // Disabled temporarily - Renable this test only
  // on recent GPUs - gfx94x+
  // mt.FreeQueueRingBufferTest();

  RunCustomTestEpilog(&mt);
}

TEST(rocrtstNeg, Queue_Validation_InvalidDimension) {
  QueueValidation qv(true, false, false, false, false);
  RunCustomTestProlog(&qv);
  qv.QueueValidationForInvalidDimension();
  RunCustomTestEpilog(&qv);
}

TEST(rocrtstNeg, Queue_Validation_InvalidGroupMemory) {
  QueueValidation qv(false, true, false, false, false);
  RunCustomTestProlog(&qv);
  qv.QueueValidationInvalidGroupMemory();
  RunCustomTestEpilog(&qv);
}

TEST(rocrtstNeg, Queue_Validation_InvalidKernelObject) {
  QueueValidation qv(false, false, true, false, false);
  RunCustomTestProlog(&qv);
  qv.QueueValidationForInvalidKernelObject();
  RunCustomTestEpilog(&qv);
}

TEST(rocrtstNeg, Queue_Validation_InvalidPacket) {
  QueueValidation qv(false, false, false, true, false);
  RunCustomTestProlog(&qv);
  qv.QueueValidationForInvalidPacket();
  RunCustomTestEpilog(&qv);
}

TEST(rocrtstNeg, DISABLED_Queue_Validation_InvalidWorkGroupSize) {
  QueueValidation qv(false, false, false, false, true);
  RunCustomTestProlog(&qv);
  qv.QueueValidationForInvalidWorkGroupSize();
  RunCustomTestEpilog(&qv);
}

TEST(rocrtstStress, Memory_Concurrent_Allocate_Test) {
  MemoryConcurrentTest mt(true, false, false);
  RunCustomTestProlog(&mt);
  mt.MemoryConcurrentAllocate();
  RunCustomTestEpilog(&mt);
}

TEST(rocrtstStress, Memory_Concurrent_Free_Test) {
  MemoryConcurrentTest mt(false, true, false);
  RunCustomTestProlog(&mt);
  mt.MemoryConcurrentFree();
  RunCustomTestEpilog(&mt);
}

TEST(rocrtstStress, Memory_Concurrent_Pool_Info_Test) {
  MemoryConcurrentTest mt(false, false, true);
  RunCustomTestProlog(&mt);
  mt.MemoryConcurrentPoolGetInfo();
  RunCustomTestEpilog(&mt);
}

TEST(rocrtstStress, Queue_Add_Write_Index_ConcurrentTest) {
  QueueWriteIndexConcurrentTest Qw(true, false, false);
  RunCustomTestProlog(&Qw);
  Qw.QueueAddWriteIndexAtomic();
  RunCustomTestEpilog(&Qw);
}

TEST(rocrtstStress, Queue_CAS_Write_Index_ConcurrentTest) {
  QueueWriteIndexConcurrentTest Qw(false, true, false);
  RunCustomTestProlog(&Qw);
  Qw.QueueCasWriteIndexAtomic();
  RunCustomTestEpilog(&Qw);
}

TEST(rocrtstStress, Queue_LoadStore_Write_Index_ConcurrentTest) {
  QueueWriteIndexConcurrentTest Qw(false, false, true);
  RunCustomTestProlog(&Qw);
  Qw.QueueLoadStoreWriteIndexAtomic();
  RunCustomTestEpilog(&Qw);
}

TEST(rocrtstPerf, Memory_Async_Copy) {
  MemoryAsyncCopy mac;
  // To do full test, uncomment this:
  //  mac.set_full_test(true);
  // To test only 1 path, add lines like this:
  //  mac.set_src_pool(<src pool id>);
  //  mac.set_dst_pool(<dst pool id>);
  // The default is to and from the cpu to 1 gpu, and to/from a gpu to
  // another gpu
  RunGenericTest(&mac);
}
#endif  // ROCRTST_EMULATOR_BUILD

TEST(rocrtstPerf, ENQUEUE_LATENCY) {
  EnqueueLatency singlePacketequeue(true);
  EnqueueLatency multiPacketequeue(false);
  RunGenericTest(&singlePacketequeue);
  RunGenericTest(&multiPacketequeue);
}

TEST(rocrtstPerf, DISABLED_Memory_Async_Copy_NUMA) {
  MemoryAsyncCopyNUMA numa;
  RunGenericTest(&numa);
}

TEST(rocrtstPerf, AQL_Dispatch_Time_Single_SpinWait) {
  DispatchTime dt(true, true);
  RunGenericTest(&dt);
}

TEST(rocrtstPerf, AQL_Dispatch_Time_Single_Interrupt) {
  DispatchTime dt(false, true);
  RunGenericTest(&dt);
}

TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_SpinWait) {
  DispatchTime dt(true, false);
  RunGenericTest(&dt);
}

TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_Interrupt) {
  DispatchTime dt(false, false);
  RunGenericTest(&dt);
}

int main(int argc, char** argv) {
  ::testing::InitGoogleTest(&argc, argv);

  #ifdef ROCRTST_EMULATOR_BUILD
  std::cout << "--- Emulation build ---" << std::endl;
  #endif

  RocrTstGlobals settings;

  // Set some default values
  settings.verbosity = 1;
  settings.monitor_verbosity = 0;
  settings.num_iterations = 5;

  if (ProcessCmdline(&settings, argc, argv)) {
    return 1;
  }
  sRocrtstGlvalues = &settings;

  if (settings.monitor_verbosity > 0) {
    amdsmi_status_t amdsmi_ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
    if (amdsmi_ret != AMDSMI_STATUS_SUCCESS) {
      std::cout << "Failed to initialize AMD smi" << std::endl;
      return 1;
    }
    DumpMonitorInfo();
  }
  return RUN_ALL_TESTS();
}


================================================
FILE: rocrtst/suites/test_common/main.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_TEST_COMMON_MAIN_H_
#define ROCRTST_SUITES_TEST_COMMON_MAIN_H_

#endif  // ROCRTST_SUITES_TEST_COMMON_MAIN_H_


================================================
FILE: rocrtst/suites/test_common/test_base.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <assert.h>

#include "suites/test_common/test_base.h"
#include "suites/test_common/test_common.h"
#include "common/base_rocr_utils.h"
#include "gtest/gtest.h"

static const int kOutputLineLength = 80;
static const char kLabelDelimiter[] = "####";
static const char kDescriptionLabel[] = "TEST DESCRIPTION";
static const char kTitleLabel[] = "TEST NAME";
static const char kSetupLabel[] = "TEST SETUP";
static const char kRunLabel[] = "TEST EXECUTION";
static const char kCloseLabel[] = "TEST CLEAN UP";
static const char kResultsLabel[] = "TEST RESULTS";


TestBase::TestBase() : description_("") {
}
TestBase::~TestBase() {
}

static void MakeHeaderStr(const char *inStr, std::string *outStr) {
  assert(outStr != nullptr);
  assert(inStr != nullptr);

  outStr->clear();
  *outStr = kLabelDelimiter;
  *outStr += " ";
  *outStr += inStr;
  *outStr += " ";
  *outStr += kLabelDelimiter;
}

void TestBase::SetupPrint() {
  std::string label;
  MakeHeaderStr(kSetupLabel, &label);
  printf("\n\t%s\n", label.c_str());
}

void TestBase::SetUp(void) {
  hsa_status_t err;
  SetupPrint();
  err = rocrtst::InitAndSetupHSA(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  return;
}

void TestBase::Run(void) {
  std::string label;
  MakeHeaderStr(kRunLabel, &label);
  printf("\n\t%s\n", label.c_str());
}

void TestBase::ClosePrint() {
  std::string label;
  MakeHeaderStr(kCloseLabel, &label);
  printf("\n\t%s\n", label.c_str());
}

void TestBase::Close(void) {
  hsa_status_t err;
  ClosePrint();
  if (monitor_verbosity() > 0) {
    DumpMonitorInfo();
  }

  err = rocrtst::CommonCleanUp(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);
}


void TestBase::DisplayResults(void) const {
  std::string label;
  MakeHeaderStr(kResultsLabel, &label);
  printf("\n\t%s\n", label.c_str());
}

void TestBase::DisplayTestInfo(void) {
  printf("#########################################"
                                  "######################################\n");

  std::string label;
  MakeHeaderStr(kTitleLabel, &label);
  printf("\n\t%s\n%s\n", label.c_str(), title().c_str());

  if (verbosity() >= VERBOSE_STANDARD) {
    MakeHeaderStr(kDescriptionLabel, &label);
    printf("\n\t%s\n%s\n", label.c_str(), description().c_str());
  }
}

void TestBase::set_description(std::string d) {
  int le = kOutputLineLength - 4;

  description_ = d;
  size_t endlptr;

  for (size_t i = le; i < description_.size(); i += le) {
    endlptr = description_.find_last_of(" ", i);
    description_.replace(endlptr, 1, "\n");
    i = endlptr;
  }
}


================================================
FILE: rocrtst/suites/test_common/test_base.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */
#ifndef ROCRTST_SUITES_TEST_COMMON_TEST_BASE_H_
#define ROCRTST_SUITES_TEST_COMMON_TEST_BASE_H_

#include <string>
#include <memory>
#include <vector>

#include "common/base_rocr.h"

class TestBase : public rocrtst::BaseRocR {
 public:
  TestBase(void);

  virtual ~TestBase(void);

  enum VerboseLevel {VERBOSE_MIN = 0, VERBOSE_STANDARD, VERBOSE_PROGRESS};

  // @Brief: Before run the core measure codes, do something to set up
  // i.e. init runtime, prepare packet...
  virtual void SetUp(void);

  // @Brief: Core measurement codes executing here
  virtual void Run(void);

  // @Brief: Do something clean up
  virtual void Close(void);

  // @Brief: Display the results
  virtual void DisplayResults(void) const;

  // @Brief: Display information about the test
  virtual void DisplayTestInfo(void);

  const std::string & description(void) const {return description_;}

  void set_description(std::string d);

  // @Brief: Emit setup output string only.  For tests with custom setup.
  void SetupPrint(void);

  // @Brief: Emit close output string only.  For tests with custom close.
  void ClosePrint(void);

 private:
  std::string description_;
};

#endif  // ROCRTST_SUITES_TEST_COMMON_TEST_BASE_H_


================================================
FILE: rocrtst/suites/test_common/test_case_template.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

// The purpose of this test is to provide an example of the use of the
// common RocrTest classes and utilities that are used in many examples.
// It can be used as a template to start off with when writing new tests.
// In many cases, the existing boilerplate code will be sufficient as is.
// Otherwise, the boilerplate code can be either supplemented or replaced
// by your own code in your example, as necessary.
//
// The comments provided are focused more on the use of the common rocrtst
// utilities and boilerplate code, rather than the example app. itself.
//
// The boilerplate code includes code for:
// * hsa initialization and clean up
// * code to load pre-built kernels
// * creating queues
// * populating AQL packets
// * checking for required profiles
// * finding cpu and gpu agents (callbacks for common use cases)
// * finding pools (having common requirements)
// * allocating and setting kernel arguments
// * somewhat standardized output
// * handling additional command line arguments, beyond google-test arguments
// * support for various level of verbosity, controlled from command line arg
// * support for building OpenCL kernels
// * timer support
//
// Overview of RocrTst code organization:
// Classes:
// * class BaseRocR (base_rocr.h) -- base class for all rocrtst examples and
//   tests. Most of the rocrtst common utilities act on BaseRocR objects
//
// * TestBase (test_base.h)  -- derives from BaseRocR and is the base class
//   for all tests under <rocrtst root>/suites. The implementation in TestBase
//   methods are typically actions that are required for most/all tests and
//   should therefore be called from the derived implementions of the methods.
//
// Utilities:
// * <rocrtst root>/common/base_rocr_utils.<cc/h> contains a set of utilities
//   that act on BaseRocR objects.
//
// * <rocrtst root>/common/common.<cc/h> contain other non-BaseRocR utilities
//
// Special Files:
// * main.cc -- The main google test file from which the tests are invoked.
//     There should be an entry for each test to be run there.
//
// * kernels -- OpenCL kernel source files should go in the kernels directory
//
// * CMakeLists.txt -- Host code (*.cc and *.h files) should build without
//     modifying the CMakeList.txt file, if the files are place in the
//     "performance" directory. However, an entry for OpenCL kernels. For
//     each kernel to be built, the bitcode libraries must be indicated before
//     the call to "build_kernel()" is made. See existing code for examples.

#include <algorithm>
#include <iostream>
#include <vector>

#include "suites/test_common/test_case_template.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"

#ifdef ROCRTST_EMULATOR_BUILD
static const uint32_t kNumBufferElements = 4;
#else
static const uint32_t kNumBufferElements = 256;
#endif

#define RET_IF_HSA_ERR(err) { \
  if ((err) != HSA_STATUS_SUCCESS) { \
    const char* msg = 0; \
    hsa_status_string(err, &msg); \
    std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
                          __FILE__ << ". Call returned " << err << std::endl; \
    std::cout << msg << std::endl; \
    return (err); \
  } \
}

// Many test cases want to perform an operation on memory sizes of various
// granularities.
#if 0
static const int kNumGranularity = 20;
const char* Str[kNumGranularity] = {"1k", "2K", "4K", "8K", "16K", "32K",
    "64K", "128K", "256K", "512K", "1M", "2M", "4M", "8M", "16M", "32M",
                                               "64M", "128M", "256M", "512M"};

const size_t Size[kNumGranularity] = {
    1024, 2*1024, 4*1024, 8*1024, 16*1024, 32*1024, 64*1024, 128*1024,
    256*1024, 512*1024, 1024*1024, 2048*1024, 4096*1024, 8*1024*1024,
    16*1024*1024, 32*1024*1024, 64*1024*1024, 128*1024*1024, 256*1024*1024,
    512*1024*1024};

static const int kMaxCopySize = Size[kNumGranularity - 1];
#endif
TestExample::TestExample(void) :
    TestBase() {
  set_num_iteration(10);  // Number of iterations to execute of the main test;
                          // This is a default value which can be overridden
                          // on the command line.
  set_title("Test Case Example");
  set_description("Put a description of the test case here. Line breaks "
      "will be taken care of on output, not here.");

  set_kernel_file_name("test_case_template_kernels.hsaco");
  set_kernel_name("square");  // kernel function name

#if 0
  // Set required profile to HSA_PROFILE_FULL or HSA_PROFILE_BASE if it
  // matters for this test. If either profile is fine, then leave with
  // default
  set_requires_profile(<value>);
#endif
}

TestExample::~TestExample(void) {
}

// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void TestExample::SetUp(void) {
  hsa_status_t err;

  // TestBase::SetUp() will set HSA_ENABLE_INTERRUPT if enable_interrupt() is
  // true, and call hsa_init(). It also prints the SetUp header.
  TestBase::SetUp();

  // SetDefaultAgents(this) will assign the first CPU and GPU found on
  // iterating through the agents and assign them to cpu_device_ and
  // gpu_device1_, respectively (cpu_device() and gpu_device1()). These
  // BaseRocR member variables are used in some utilities. Additionally,
  // SetDefaultAgents() checks the profile of the gpu and compares this
  // to any required profile.
  //
  // If SetDefaultAgents() is not used, if the profile of the target GPU
  // matters for this test, it should be set with set_profile() and
  // CheckProfileAndInform() should be called to check if it is the
  // required profile
  err = rocrtst::SetDefaultAgents(this);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  hsa_agent_t* gpu_dev = gpu_device1();

  // Find and assign HSA_AMD_SEGMENT_GLOBAL pools for cpu, gpu and a kern_arg
  // pool
  err = rocrtst::SetPoolsTypical(this);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Create a queue
  hsa_queue_t* q = nullptr;
  rocrtst::CreateQueue(*gpu_dev, &q);
  ASSERT_NE(q, nullptr);
  set_main_queue(q);

  err = rocrtst::LoadKernelFromObjFile(this, gpu_dev);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Fill up the kernel packet (except header) with some values we've
  // collected so far, and some reasonable default values; this should be after
  // LoadKernelFromObjFile(). AllocAndSetKernArgs() will fill in the kern_args
  err = rocrtst::InitializeAQLPacket(this, &aql());
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  hsa_agent_t ag_list[2] = {*gpu_device1(), *cpu_device()};

  // Allocate a few buffers for our example
  err = hsa_amd_memory_pool_allocate(cpu_pool(),
                                   kNumBufferElements*sizeof(uint32_t),
                                   0, reinterpret_cast<void**>(&src_buffer_));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_agents_allow_access(2, ag_list, NULL, src_buffer_);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Initialize the source buffer
  for (uint32_t i = 0; i < kNumBufferElements; ++i) {
    reinterpret_cast<uint32_t *>(src_buffer_)[i] = i;
  }

  err = hsa_amd_memory_pool_allocate(cpu_pool(),
                                   kNumBufferElements*sizeof(uint32_t),
                                   0, reinterpret_cast<void**>(&dst_buffer_));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst_buffer_);
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  // Set up Kernel arguments
  // See the meta-data for the compiled OpenCL kernel code to ascertain
  // the sizes, padding and alignment required for kernel arguments.
  // This can be seen by executing
  // $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
  // The kernel code will expect the following arguments aligned as shown.
//  typedef uint32_t uint4[4];
  struct __attribute__((aligned(16))) local_args_t {
    uint32_t* dstArray;
    uint32_t* srcArray;
    uint32_t size;
    uint32_t pad;
    uint64_t global_offset_x;
    uint64_t global_offset_y;
    uint64_t global_offset_z;
    uint64_t printf_buffer;
    uint64_t default_queue;
    uint64_t completion_action;
  } local_args;

  local_args.dstArray = reinterpret_cast<uint32_t *>(dst_buffer_);
  local_args.srcArray = reinterpret_cast<uint32_t *>(src_buffer_);
  local_args.size = kNumBufferElements;
  local_args.global_offset_x = 0;
  local_args.global_offset_y = 0;
  local_args.global_offset_z = 0;
  local_args.printf_buffer = 0;
  local_args.default_queue = 0;
  local_args.completion_action = 0;

  err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args));
  ASSERT_EQ(err, HSA_STATUS_SUCCESS);

  return;
}

// This wrapper atomically writes the provided header and setup to the
// provided AQL packet. The provided AQL packet address should be in the
// queue memory space.
static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
                                  hsa_kernel_dispatch_packet_t* queue_packet) {
  __atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
                   header | (setup << 16), __ATOMIC_RELEASE);
}

// Do a few extra iterations as we toss out some of the inital and final
// iterations when calculating statistics
uint32_t TestExample::RealIterationNum(void) {
  return num_iteration() * 1.2 + 1;
}

static bool VerifyResult(uint32_t *ar, size_t sz) {
  for (size_t i = 0; i < sz; ++i) {
    if (i*i != ar[i]) {
      return false;
    }
  }
  return true;
}
void TestExample::Run(void) {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::Run();

  // Override whatever we need to...
  aql().workgroup_size_x = kNumBufferElements;
  aql().grid_size_x = kNumBufferElements;

  std::vector<double> timer;

  int it = RealIterationNum();
  hsa_kernel_dispatch_packet_t *queue_aql_packet;

  rocrtst::PerfTimer p_timer;
  uint64_t index;

  for (int i = 0; i < it; i++) {
    // This function simply copies the data we've collected so far into our
    // local AQL packet, except the the setup and header fields.
    queue_aql_packet = WriteAQLToQueue(this, &index);
    ASSERT_EQ(queue_aql_packet,
              reinterpret_cast<hsa_kernel_dispatch_packet_t *>
                                      (main_queue()->base_address) + index);
    uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;

    aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
                  HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
    aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
                  HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;

    // Create and start a timer for this iteration
    int id = p_timer.CreateTimer();
    p_timer.StartTimer(id);

    ::AtomicSetPacketHeader(aql_header, aql().setup, queue_aql_packet);

    hsa_signal_store_screlease(main_queue()->doorbell_signal, index);

    // Wait on the dispatch signal until the kernel is finished.
    while (hsa_signal_wait_scacquire(aql().completion_signal,
         HSA_SIGNAL_CONDITION_LT, 1, (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE)) {
    }

    // Stop the timer
    p_timer.StopTimer(id);

    // Store time for later analysis
    timer.push_back(p_timer.ReadTimer(id));
    hsa_signal_store_screlease(aql().completion_signal, 1);

    ASSERT_TRUE(VerifyResult(reinterpret_cast<uint32_t *>(dst_buffer_),
                                                         kNumBufferElements));

    // Pay attention to verbosity level for things like progress output
    if (verbosity() >= VERBOSE_PROGRESS) {
      std::cout << ".";
      fflush(stdout);
    }
  }

  if (verbosity() >= VERBOSE_PROGRESS) {
    std::cout << std::endl;
  }

  // Abandon the first result and after sort, delete the last 2% value
  timer.erase(timer.begin());
  std::sort(timer.begin(), timer.end());
  timer.erase(timer.begin() + num_iteration(), timer.end());

  time_mean_ = rocrtst::CalcMean(timer);
}

void TestExample::DisplayTestInfo(void) {
  TestBase::DisplayTestInfo();
}

void TestExample::DisplayResults(void) const {
  // Compare required profile for this test case with what we're actually
  // running on
  if (!rocrtst::CheckProfile(this)) {
    return;
  }

  TestBase::DisplayResults();
  std::cout << "The average time was: " << time_mean_ * 1e6 <<
                                                           " uS" << std::endl;
  return;
}

void TestExample::Close() {
  hsa_status_t err;

  err = hsa_amd_memory_pool_free(src_buffer_);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  err = hsa_amd_memory_pool_free(dst_buffer_);
  ASSERT_EQ(HSA_STATUS_SUCCESS, err);

  // This will close handles opened within rocrtst utility calls and call
  // hsa_shut_down(), so it should be done after other hsa cleanup
  TestBase::Close();
}


#undef RET_IF_HSA_ERR


================================================
FILE: rocrtst/suites/test_common/test_case_template.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_TEST_COMMON_TEST_CASE_TEMPLATE_H_
#define ROCRTST_SUITES_TEST_COMMON_TEST_CASE_TEMPLATE_H_

#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"

class TestExample : public TestBase {
 public:
  TestExample();

  // @Brief: Destructor for test case of TestExample
  virtual ~TestExample();

  // @Brief: Setup the environment for measurement
  virtual void SetUp();

  // @Brief: Core measurement execution
  virtual void Run();

  // @Brief: Clean up and retrive the resource
  virtual void Close();

  // @Brief: Display  results
  virtual void DisplayResults() const;

  // @Brief: Display information about what this test does
  virtual void DisplayTestInfo(void);

 private:
  uint32_t RealIterationNum(void);

  double time_mean_;
  void *src_buffer_;
  void *dst_buffer_;
};

#endif  // ROCRTST_SUITES_TEST_COMMON_TEST_CASE_TEMPLATE_H_


================================================
FILE: rocrtst/suites/test_common/test_common.cc
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#include <assert.h>
#include <stdint.h>
#include <getopt.h>

#include <iostream>
#include <string>
#include <sstream>

#include "suites/test_common/test_base.h"
#include "suites/test_common/test_common.h"
#include "amd_smi/amdsmi.h"

static const struct option long_options[] = {
  {"iterations", required_argument, nullptr, 'i'},
  {"verbose", required_argument, nullptr, 'v'},
  {"monitor_verbose", required_argument, nullptr, 'm'},

  {nullptr, 0, nullptr, 0}
};
static const char* short_options = "i:v:m:r";

static void PrintHelp(void) {
  std::cout <<
     "Optional RocRTst Arguments:\n"
     "--iterations, -i <number of iterations to execute>; override default, "
         "which varies for each test\n"
     "--rocrtst_help, -r print this help message\n"
     "--verbosity, -v <verbosity level>\n"
     "  Verbosity levels:\n"
     "   0    -- minimal; just summary information\n"
     "   1    -- intermediate; show intermediate values such as intermediate "
                  "perf. data\n"
     "   2    -- progress; show progress displays\n"
     "   >= 3 -- more debug output\n"
     "--monitor_verbosity, -m <monitor verbosity level>\n"
     "  Monitor Verbosity levels:\n"
     "   0    -- don't read or print out any GPU monitor information;\n"
     "   1    -- print out all available monitor information before the first "
                 "test and after each test\n"
     "   >= 2 -- print out even more monitor information (test specific)\n";
}

uint32_t ProcessCmdline(RocrTstGlobals* test, int arg_cnt, char** arg_list) {
  int a;
  int ind = -1;

  assert(test != nullptr);

  while (true) {
    a = getopt_long(arg_cnt, arg_list, short_options, long_options, &ind);

    if (a == -1) {
      break;
    }

    switch (a) {
      case 'i':
        test->num_iterations = std::stoi(optarg);
        break;

      case 'v':
        test->verbosity = std::stoi(optarg);
        break;

      case 'm':
        test->monitor_verbosity = std::stoi(optarg);
        break;

      case 'r':
        PrintHelp();
        return 1;

      default:
        PrintHelp();
        return 1;
    }
  }
  return 0;
}

template<typename T>
static std::string IntegerToString(T intVal, bool hex = true) {
  std::stringstream stream;

  if (hex) {
    stream << "0x" << std::hex << intVal;
  } else {
    stream << std::dec << intVal;
  }
  return stream.str();
}

int DumpMonitorInfo() {
  int ret = 0;
  uint64_t value_u64;
  uint16_t value_u16;
  uint32_t value_u32;
  int64_t value_i64;
  std::string val_str;
  std::vector<std::string> val_vec;
  amdsmi_status_t amdsmi_ret;
  int dump_ret = 0;

  auto print_attr_label =
      [&](std::string attrib) -> bool {
          std::cout << "\t** " << attrib;
          if (ret == -1) {
            std::cout << "not available" << std::endl;
            return false;
          }
          return true;
  };

  auto delim = "\t***********************************";

  std::cout << "\t***** Hardware monitor values *****" << std::endl;
  std::cout << delim << std::endl;
  std::cout.setf(std::ios::dec, std::ios::basefield);

  // Get socket handles
  uint32_t socket_count = AMDSMI_MAX_DEVICES;
  amdsmi_socket_handle socket_handles[AMDSMI_MAX_DEVICES];
  amdsmi_ret = amdsmi_get_socket_handles(&socket_count, socket_handles);
  if (amdsmi_ret != AMDSMI_STATUS_SUCCESS) {
      std::cout << "Failed to get socket count. Error: " << 
                                                      amdsmi_ret << std::endl;
      amdsmi_shut_down();
      return 1;
  }

  uint32_t socket_processors = AMDSMI_MAX_DEVICES;
  uint32_t total_num_processors = 0;

  amdsmi_processor_handle processor_handles[AMDSMI_MAX_DEVICES];
  amdsmi_processor_handle socket_processor_handles[AMDSMI_MAX_DEVICES];

  // Collect devices from sockets
  for (uint32_t socket_idx = 0; socket_idx < socket_count; ++socket_idx) {
    amdsmi_ret = amdsmi_get_processor_handles(socket_handles[socket_idx], 
      &socket_processors, socket_processor_handles);
    if (amdsmi_ret != AMDSMI_STATUS_SUCCESS) {
        std::cout << "amdsmi_get_processor_handles() for socket " << 
                        socket_idx << " returned " << amdsmi_ret << std::endl;
        amdsmi_shut_down();
        return 1;
    }

    for (uint32_t i = 0; i < socket_processors && 
                        total_num_processors + i < AMDSMI_MAX_DEVICES; ++i) {
      processor_handles[total_num_processors + i] = socket_processor_handles[i];
    }
    total_num_processors += socket_processors;
  }

  // Filter for GPU processors
  uint32_t gpu_count = 0;
  for (uint32_t i = 0; i < total_num_processors; ++i) {
      processor_type_t processor_type;
      amdsmi_ret = amdsmi_get_processor_type(processor_handles[i], 
                                                              &processor_type);
      if (amdsmi_ret == AMDSMI_STATUS_SUCCESS && 
                              processor_type == AMDSMI_PROCESSOR_TYPE_AMD_GPU) {
          gpu_count++;
      }
  }

  for (uint32_t dindx = 0; dindx < gpu_count; ++dindx) {
    auto print_frequencies = [&](amdsmi_frequencies_t *freqs, 
                                                            std::string label) {
      if (amdsmi_ret != AMDSMI_STATUS_SUCCESS) {
        std::cout << "get frequency call  returned " << amdsmi_ret << std::endl;
        dump_ret = 1;
        return;
      }

      if (print_attr_label(label)) {
        for (uint32_t i = 0; i < freqs->num_supported; ++i) {
          std::cout << "\t**  " << i << ": " <<
                                         freqs->frequency[i]/1000000 << "Mhz";
          if (i == freqs->current) {
            std::cout << " *";
          }

          std::cout << std::endl;
        }
      }
    };
    auto print_val_str = [&](std::string val, std::string label) {
      std::cout << "\t** " << label;
      if (ret != AMDSMI_STATUS_SUCCESS) {
        std::cout << "not available; amdsmi call returned" << amdsmi_ret;
        dump_ret = 1;
      } else {
        std::cout << val;
      }
      std::cout << std:: endl;
    };

    amdsmi_ret = amdsmi_get_gpu_id(processor_handles[dindx], &value_u16);
    print_val_str(IntegerToString(value_u16), "Device ID: ");

    amdsmi_dev_perf_level_t perf;
    std::string perf_str;
    amdsmi_ret = amdsmi_get_gpu_perf_level(processor_handles[dindx], &perf);
    switch (perf) {
      case AMDSMI_DEV_PERF_LEVEL_AUTO:
        perf_str = "auto";
        break;
      default:
        perf_str = "unknown";
    }
    print_val_str(perf_str, "Performance Level: ");

    uint32_t overdrive_level;
    amdsmi_ret = amdsmi_get_gpu_overdrive_level(processor_handles[dindx], 
                                                            &overdrive_level);

    print_val_str(IntegerToString(value_u32, false) + "%", "OverDrive Level: ");

    amdsmi_frequencies_t freqs;
    amdsmi_ret = amdsmi_get_clk_freq(processor_handles[dindx], 
                                                AMDSMI_CLK_TYPE_SYS, &freqs);

    print_frequencies(&freqs, "Supported GPU clock frequencies:\n");

    amdsmi_ret = amdsmi_get_clk_freq(processor_handles[dindx], 
                                                AMDSMI_CLK_TYPE_MEM, &freqs);
    print_frequencies(&freqs, "Supported GPU Memory clock frequencies:\n");

    amdsmi_board_info_t board_info;
    amdsmi_get_gpu_board_info(processor_handles[dindx], &board_info);
    print_val_str(board_info.product_name, "Monitor name: ");
    
    amdsmi_ret = amdsmi_get_temp_metric(processor_handles[dindx], 
                AMDSMI_TEMPERATURE_TYPE_EDGE, AMDSMI_TEMP_CURRENT, &value_i64);
    print_val_str(IntegerToString(value_i64/1000, false) + "C",
                                                            "Temperature: ");

    amdsmi_ret = amdsmi_get_gpu_fan_speed(processor_handles[dindx], 
                                                                0, &value_i64);
    if (ret != AMDSMI_STATUS_SUCCESS) {
        std::cout << "not available; amdsmi call returned" << amdsmi_ret;
        dump_ret = 1;
    }
    amdsmi_ret = amdsmi_get_gpu_fan_speed_max(processor_handles[dindx], 
                                                                0, &value_u64);
    if (ret != AMDSMI_STATUS_SUCCESS) {
        std::cout << "not available; amdsmi call returned" << amdsmi_ret;
        dump_ret = 1;
    }
    if (print_attr_label("Current Fan Speed: ")) {
      std::cout << static_cast<float>(value_i64)/value_u64 * 100 << "% (" <<
          value_i64 << "/" << value_u64 << ")" << std::endl;
    }

    std::cout << "\t=======" << std::endl;
  }
  std::cout << delim << std::endl;
  return dump_ret;
}


================================================
FILE: rocrtst/suites/test_common/test_common.h
================================================
/*
 * =============================================================================
 *   ROC Runtime Conformance Release License
 * =============================================================================
 * The University of Illinois/NCSA
 * Open Source License (NCSA)
 *
 * Copyright (c) 2017, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Developed by:
 *
 *                 AMD Research and AMD ROC Software Development
 *
 *                 Advanced Micro Devices, Inc.
 *
 *                 www.amd.com
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to
 * deal with the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 *  - Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimers.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimers in
 *    the documentation and/or other materials provided with the distribution.
 *  - Neither the names of <Name of Development Group, Name of Institution>,
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this Software without specific prior written
 *    permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS WITH THE SOFTWARE.
 *
 */

#ifndef ROCRTST_SUITES_TEST_COMMON_TEST_COMMON_H_
#define ROCRTST_SUITES_TEST_COMMON_TEST_COMMON_H_

#include <memory>
#include <vector>

struct RocrTstGlobals {
  uint32_t verbosity;
  uint32_t monitor_verbosity;
  uint32_t num_iterations;
};

uint32_t ProcessCmdline(RocrTstGlobals* test, int arg_cnt, char** arg_list);

int DumpMonitorInfo(void);

#endif  // ROCRTST_SUITES_TEST_COMMON_TEST_COMMON_H_


================================================
FILE: rocrtst/thirdparty/include/LICENSE
================================================
Copyright © 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation.  All rights reserved.
Copyright © 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation.  All rights reserved.
Copyright © 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart.  All rights reserved.
Copyright © 2004-2005 The Regents of the University of California. All rights reserved.
Copyright © 2009      CNRS
Copyright © 2009-2016 Inria.  All rights reserved.
Copyright © 2009-2015 Université Bordeaux
Copyright © 2009-2015 Cisco Systems, Inc.  All rights reserved.
Copyright © 2009-2012 Oracle and/or its affiliates.  All rights reserved.
Copyright © 2010      IBM
Copyright © 2010      Jirka Hladky
Copyright © 2012      Aleksej Saushev, The NetBSD Foundation
Copyright © 2012      Blue Brain Project, EPFL. All rights reserved.
Copyright © 2013-2014 University of Wisconsin-La Crosse. All rights reserved.
Copyright © 2015      Research Organization for Information Science and Technology (RIST). All rights reserved.
Copyright © 2015-2016 Intel, Inc.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
   derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 


================================================
FILE: rocrtst/thirdparty/include/hwloc/autogen/config.h
================================================
/* include/hwloc/autogen/config.h.  Generated from config.h.in by configure.  */
/* -*- c -*-
 * Copyright © 2009 CNRS
 * Copyright © 2009-2014 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/* The configuration file */

#ifndef HWLOC_CONFIG_H
#define HWLOC_CONFIG_H

#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
# define __hwloc_restrict __restrict
#else
# if __STDC_VERSION__ >= 199901L
#  define __hwloc_restrict restrict
# else
#  define __hwloc_restrict
# endif
#endif

/* Note that if we're compiling C++, then just use the "inline"
   keyword, since it's part of C++ */
#if defined(c_plusplus) || defined(__cplusplus)
#  define __hwloc_inline inline
#elif defined(_MSC_VER) || defined(__HP_cc)
#  define __hwloc_inline __inline
#else
#  define __hwloc_inline __inline__
#endif

/*
 * Note: this is public.  We can not assume anything from the compiler used
 * by the application and thus the HWLOC_HAVE_* macros below are not
 * fetched from the autoconf result here. We only automatically use a few
 * well-known easy cases.
 */

/* Some handy constants to make the logic below a little more readable */
#if defined(__cplusplus) && \
    (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR >= 4))
#define GXX_ABOVE_3_4 1
#else
#define GXX_ABOVE_3_4 0
#endif

#if !defined(__cplusplus) && \
    (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
#define GCC_ABOVE_2_95 1
#else
#define GCC_ABOVE_2_95 0
#endif

#if !defined(__cplusplus) && \
    (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
#define GCC_ABOVE_2_96 1
#else
#define GCC_ABOVE_2_96 0
#endif

#if !defined(__cplusplus) && \
    (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
#define GCC_ABOVE_3_3 1
#else
#define GCC_ABOVE_3_3 0
#endif

/* Maybe before gcc 2.95 too */
#ifdef HWLOC_HAVE_ATTRIBUTE_UNUSED
#define __HWLOC_HAVE_ATTRIBUTE_UNUSED HWLOC_HAVE_ATTRIBUTE_UNUSED 
#elif defined(__GNUC__)
# define __HWLOC_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
#else
# define __HWLOC_HAVE_ATTRIBUTE_UNUSED 0
#endif
#if __HWLOC_HAVE_ATTRIBUTE_UNUSED
# define __hwloc_attribute_unused __attribute__((__unused__))
#else
# define __hwloc_attribute_unused
#endif

#ifdef HWLOC_HAVE_ATTRIBUTE_MALLOC
#define __HWLOC_HAVE_ATTRIBUTE_MALLOC HWLOC_HAVE_ATTRIBUTE_MALLOC 
#elif defined(__GNUC__)
# define __HWLOC_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
#else
# define __HWLOC_HAVE_ATTRIBUTE_MALLOC 0
#endif
#if __HWLOC_HAVE_ATTRIBUTE_MALLOC
# define __hwloc_attribute_malloc __attribute__((__malloc__))
#else
# define __hwloc_attribute_malloc
#endif

#ifdef HWLOC_HAVE_ATTRIBUTE_CONST
#define __HWLOC_HAVE_ATTRIBUTE_CONST HWLOC_HAVE_ATTRIBUTE_CONST 
#elif defined(__GNUC__)
# define __HWLOC_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
#else
# define __HWLOC_HAVE_ATTRIBUTE_CONST 0
#endif
#if __HWLOC_HAVE_ATTRIBUTE_CONST
# define __hwloc_attribute_const __attribute__((__const__))
#else
# define __hwloc_attribute_const
#endif

#ifdef HWLOC_HAVE_ATTRIBUTE_PURE
#define __HWLOC_HAVE_ATTRIBUTE_PURE HWLOC_HAVE_ATTRIBUTE_PURE 
#elif defined(__GNUC__)
# define __HWLOC_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
#else
# define __HWLOC_HAVE_ATTRIBUTE_PURE 0
#endif
#if __HWLOC_HAVE_ATTRIBUTE_PURE
# define __hwloc_attribute_pure __attribute__((__pure__))
#else
# define __hwloc_attribute_pure
#endif

#ifdef HWLOC_HAVE_ATTRIBUTE_DEPRECATED
#define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED HWLOC_HAVE_ATTRIBUTE_DEPRECATED 
#elif defined(__GNUC__)
# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3)
#else
# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED 0
#endif
#if __HWLOC_HAVE_ATTRIBUTE_DEPRECATED
# define __hwloc_attribute_deprecated __attribute__((__deprecated__))
#else
# define __hwloc_attribute_deprecated
#endif

#ifdef HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS
#define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS
#elif defined(__GNUC__)
# define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS (GXX_ABOVE_3_4 || GCC_ABOVE_3_3)
#else
# define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS 0
#endif
#if __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS
# define __hwloc_attribute_may_alias __attribute__((__may_alias__))
#else
# define __hwloc_attribute_may_alias
#endif

#ifdef HWLOC_C_HAVE_VISIBILITY
# if HWLOC_C_HAVE_VISIBILITY
#  define HWLOC_DECLSPEC __attribute__((__visibility__("default")))
# else
#  define HWLOC_DECLSPEC
# endif
#else
# define HWLOC_DECLSPEC
#endif

/* Defined to 1 on Linux */
#define HWLOC_LINUX_SYS 1

/* Defined to 1 if the CPU_SET macro works */
#define HWLOC_HAVE_CPU_SET 1

/* Defined to 1 if you have the `windows.h' header. */
/* #undef HWLOC_HAVE_WINDOWS_H */
#define hwloc_pid_t pid_t
#define hwloc_thread_t pthread_t

#ifdef HWLOC_HAVE_WINDOWS_H

#  include <windows.h>
typedef DWORDLONG hwloc_uint64_t;

#else /* HWLOC_HAVE_WINDOWS_H */

#  ifdef hwloc_thread_t
#    include <pthread.h>
#  endif /* hwloc_thread_t */

/* Defined to 1 if you have the <stdint.h> header file. */
#  define HWLOC_HAVE_STDINT_H 1

#  include <unistd.h>
#  ifdef HWLOC_HAVE_STDINT_H
#    include <stdint.h>
#  endif
typedef uint64_t hwloc_uint64_t;

#endif /* HWLOC_HAVE_WINDOWS_H */

/* Whether we need to re-define all the hwloc public symbols or not */
#define HWLOC_SYM_TRANSFORM 0

/* The hwloc symbol prefix */
#define HWLOC_SYM_PREFIX hwloc_

/* The hwloc symbol prefix in all caps */
#define HWLOC_SYM_PREFIX_CAPS HWLOC_

#endif /* HWLOC_CONFIG_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/bitmap.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2017 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief The bitmap API, for use in hwloc itself.
 */

#ifndef HWLOC_BITMAP_H
#define HWLOC_BITMAP_H

#include <hwloc/autogen/config.h>
#include <assert.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_bitmap The bitmap API
 *
 * The ::hwloc_bitmap_t type represents a set of objects, typically OS
 * processors -- which may actually be hardware threads (represented
 * by ::hwloc_cpuset_t, which is a typedef for ::hwloc_bitmap_t) -- or
 * memory nodes (represented by ::hwloc_nodeset_t, which is also a
 * typedef for ::hwloc_bitmap_t).
 *
 * <em>Both CPU and node sets are always indexed by OS physical number.</em>
 *
 * \note CPU sets and nodesets are described in \ref hwlocality_object_sets.
 *
 * A bitmap may be of infinite size (all bits are set after some point).
 * A bitmap may even be full if all bits are set.
 *
 * \note Several examples of using the bitmap API are available under the
 * doc/examples/ directory in the source tree.
 * Regression tests such as tests/hwloc/hwloc_bitmap*.c also make intensive use
 * of this API.
 * @{
 */


/** \brief
 * Set of bits represented as an opaque pointer to an internal bitmap.
 */
typedef struct hwloc_bitmap_s * hwloc_bitmap_t;
/** \brief a non-modifiable ::hwloc_bitmap_t */
typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t;


/*
 * Bitmap allocation, freeing and copying.
 */

/** \brief Allocate a new empty bitmap.
 *
 * \returns A valid bitmap or \c NULL.
 *
 * The bitmap should be freed by a corresponding call to
 * hwloc_bitmap_free().
 */
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc;

/** \brief Allocate a new full bitmap. */
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc;

/** \brief Free bitmap \p bitmap.
 *
 * If \p bitmap is \c NULL, no operation is performed.
 */
HWLOC_DECLSPEC void hwloc_bitmap_free(hwloc_bitmap_t bitmap);

/** \brief Duplicate bitmap \p bitmap by allocating a new bitmap and copying \p bitmap contents.
 *
 * If \p bitmap is \c NULL, \c NULL is returned.
 */
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_dup(hwloc_const_bitmap_t bitmap) __hwloc_attribute_malloc;

/** \brief Copy the contents of bitmap \p src into the already allocated bitmap \p dst */
HWLOC_DECLSPEC void hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t src);


/*
 * Bitmap/String Conversion
 */

/** \brief Stringify a bitmap.
 *
 * Up to \p buflen characters may be written in buffer \p buf.
 *
 * If \p buflen is 0, \p buf may safely be \c NULL.
 *
 * \return the number of character that were actually written if not truncating,
 * or that would have been written (not including the ending \\0).
 */
HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);

/** \brief Stringify a bitmap into a newly allocated string.
 *
 * \return -1 on error.
 */
HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);

/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
 */
HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);

/** \brief Stringify a bitmap in the list format.
 *
 * Lists are comma-separated indexes or ranges.
 * Ranges are dash separated indexes.
 * The last range may not have an ending indexes if the bitmap is infinitely set.
 *
 * Up to \p buflen characters may be written in buffer \p buf.
 *
 * If \p buflen is 0, \p buf may safely be \c NULL.
 *
 * \return the number of character that were actually written if not truncating,
 * or that would have been written (not including the ending \\0).
 */
HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);

/** \brief Stringify a bitmap into a newly allocated list string.
 *
 * \return -1 on error.
 */
HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);

/** \brief Parse a list string and stores it in bitmap \p bitmap.
 */
HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);

/** \brief Stringify a bitmap in the taskset-specific format.
 *
 * The taskset command manipulates bitmap strings that contain a single
 * (possible very long) hexadecimal number starting with 0x.
 *
 * Up to \p buflen characters may be written in buffer \p buf.
 *
 * If \p buflen is 0, \p buf may safely be \c NULL.
 *
 * \return the number of character that were actually written if not truncating,
 * or that would have been written (not including the ending \\0).
 */
HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);

/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
 *
 * \return -1 on error.
 */
HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);

/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
 */
HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);


/*
 * Building bitmaps.
 */

/** \brief Empty the bitmap \p bitmap */
HWLOC_DECLSPEC void hwloc_bitmap_zero(hwloc_bitmap_t bitmap);

/** \brief Fill bitmap \p bitmap with all possible indexes (even if those objects don't exist or are otherwise unavailable) */
HWLOC_DECLSPEC void hwloc_bitmap_fill(hwloc_bitmap_t bitmap);

/** \brief Empty the bitmap \p bitmap and add bit \p id */
HWLOC_DECLSPEC void hwloc_bitmap_only(hwloc_bitmap_t bitmap, unsigned id);

/** \brief Fill the bitmap \p and clear the index \p id */
HWLOC_DECLSPEC void hwloc_bitmap_allbut(hwloc_bitmap_t bitmap, unsigned id);

/** \brief Setup bitmap \p bitmap from unsigned long \p mask */
HWLOC_DECLSPEC void hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long mask);

/** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */
HWLOC_DECLSPEC void hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);


/*
 * Modifying bitmaps.
 */

/** \brief Add index \p id in bitmap \p bitmap */
HWLOC_DECLSPEC void hwloc_bitmap_set(hwloc_bitmap_t bitmap, unsigned id);

/** \brief Add indexes from \p begin to \p end in bitmap \p bitmap.
 *
 * If \p end is \c -1, the range is infinite.
 */
HWLOC_DECLSPEC void hwloc_bitmap_set_range(hwloc_bitmap_t bitmap, unsigned begin, int end);

/** \brief Replace \p i -th subset of bitmap \p bitmap with unsigned long \p mask */
HWLOC_DECLSPEC void hwloc_bitmap_set_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);

/** \brief Remove index \p id from bitmap \p bitmap */
HWLOC_DECLSPEC void hwloc_bitmap_clr(hwloc_bitmap_t bitmap, unsigned id);

/** \brief Remove indexes from \p begin to \p end in bitmap \p bitmap.
 *
 * If \p end is \c -1, the range is infinite.
 */
HWLOC_DECLSPEC void hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin, int end);

/** \brief Keep a single index among those set in bitmap \p bitmap
 *
 * May be useful before binding so that the process does not
 * have a chance of migrating between multiple logical CPUs
 * in the original mask.
 */
HWLOC_DECLSPEC void hwloc_bitmap_singlify(hwloc_bitmap_t bitmap);


/*
 * Consulting bitmaps.
 */

/** \brief Convert the beginning part of bitmap \p bitmap into unsigned long \p mask */
HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;

/** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */
HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure;

/** \brief Test whether index \p id is part of bitmap \p bitmap */
HWLOC_DECLSPEC int hwloc_bitmap_isset(hwloc_const_bitmap_t bitmap, unsigned id) __hwloc_attribute_pure;

/** \brief Test whether bitmap \p bitmap is empty */
HWLOC_DECLSPEC int hwloc_bitmap_iszero(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;

/** \brief Test whether bitmap \p bitmap is completely full
 *
 * \note A full bitmap is always infinitely set.
 */
HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;

/** \brief Compute the first index (least significant bit) in bitmap \p bitmap
 *
 * \return -1 if no index is set in \p bitmap.
 */
HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;

/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev
 *
 * If \p prev is -1, the first index is returned.
 *
 * \return -1 if no index with higher index is set in \p bitmap.
 */
HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;

/** \brief Compute the last index (most significant bit) in bitmap \p bitmap
 *
 * \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set.
 */
HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;

/** \brief Compute the "weight" of bitmap \p bitmap (i.e., number of
 * indexes that are in the bitmap).
 *
 * \return the number of indexes that are in the bitmap.
 *
 * \return -1 if \p bitmap is infinitely set.
 */
HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;

/** \brief Loop macro iterating on bitmap \p bitmap
 *
 * The loop must start with hwloc_bitmap_foreach_begin() and end
 * with hwloc_bitmap_foreach_end() followed by a terminating ';'.
 *
 * \p index is the loop variable; it should be an unsigned int.  The
 * first iteration will set \p index to the lowest index in the bitmap.
 * Successive iterations will iterate through, in order, all remaining
 * indexes set in the bitmap.  To be specific: each iteration will return a
 * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true.
 *
 * The assert prevents the loop from being infinite if the bitmap is infinitely set.
 *
 * \hideinitializer
 */
#define hwloc_bitmap_foreach_begin(id, bitmap) \
do { \
        assert(hwloc_bitmap_weight(bitmap) != -1); \
        for (id = hwloc_bitmap_first(bitmap); \
             (unsigned) id != (unsigned) -1; \
             id = hwloc_bitmap_next(bitmap, id)) {

/** \brief End of loop macro iterating on a bitmap.
 *
 * Needs a terminating ';'.
 *
 * \sa hwloc_bitmap_foreach_begin()
 * \hideinitializer
 */
#define hwloc_bitmap_foreach_end()		\
        } \
} while (0)


/*
 * Combining bitmaps.
 */

/** \brief Or bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
 *
 * \p res can be the same as \p bitmap1 or \p bitmap2
 */
HWLOC_DECLSPEC void hwloc_bitmap_or (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);

/** \brief And bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
 *
 * \p res can be the same as \p bitmap1 or \p bitmap2
 */
HWLOC_DECLSPEC void hwloc_bitmap_and (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);

/** \brief And bitmap \p bitmap1 and the negation of \p bitmap2 and store the result in bitmap \p res
 *
 * \p res can be the same as \p bitmap1 or \p bitmap2
 */
HWLOC_DECLSPEC void hwloc_bitmap_andnot (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);

/** \brief Xor bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
 *
 * \p res can be the same as \p bitmap1 or \p bitmap2
 */
HWLOC_DECLSPEC void hwloc_bitmap_xor (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);

/** \brief Negate bitmap \p bitmap and store the result in bitmap \p res
 *
 * \p res can be the same as \p bitmap
 */
HWLOC_DECLSPEC void hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap);


/*
 * Comparing bitmaps.
 */

/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects */
HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;

/** \brief Test whether bitmap \p sub_bitmap is part of bitmap \p super_bitmap.
 *
 * \note The empty bitmap is considered included in any other bitmap.
 */
HWLOC_DECLSPEC int hwloc_bitmap_isincluded (hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) __hwloc_attribute_pure;

/** \brief Test whether bitmap \p bitmap1 is equal to bitmap \p bitmap2 */
HWLOC_DECLSPEC int hwloc_bitmap_isequal (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;

/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 using their lowest index.
 *
 * Smaller least significant bit is smaller.
 * The empty bitmap is considered higher than anything.
 */
HWLOC_DECLSPEC int hwloc_bitmap_compare_first(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;

/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 in lexicographic order.
 *
 * Lexicographic comparison of bitmaps, starting for their highest indexes.
 * Compare last indexes first, then second, etc.
 * The empty bitmap is considered lower than anything.
 *
 * \note This is different from the non-existing hwloc_bitmap_compare_last()
 * which would only compare the highest index of each bitmap.
 */
HWLOC_DECLSPEC int hwloc_bitmap_compare(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_BITMAP_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/cpuset.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2010 inria.  All rights reserved.
 * Copyright © 2009-2010, 2013 Université Bordeaux 1
 * Copyright © 2009-2010 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief The old deprecated Cpuset API.
 * This interface should not be used anymore, it will be dropped in a later release.
 *
 * hwloc/bitmap.h should be used instead. Most hwloc_cpuset_foo functions are
 * replaced with hwloc_bitmap_foo. The only exceptions are:
 * - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf
 * - hwloc_cpuset_cpu -> hwloc_bitmap_only
 * - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut
 */

#ifndef HWLOC_CPUSET_H
#define HWLOC_CPUSET_H

#ifdef __cplusplus
extern "C" {
#endif

#include "hwloc/bitmap.h"

static __hwloc_inline hwloc_bitmap_t __hwloc_attribute_deprecated hwloc_cpuset_alloc(void) { return hwloc_bitmap_alloc(); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_free(hwloc_bitmap_t bitmap) { hwloc_bitmap_free(bitmap); }
static __hwloc_inline hwloc_bitmap_t __hwloc_attribute_deprecated hwloc_cpuset_dup(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_dup(bitmap); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t src) { hwloc_bitmap_copy(dst, src); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_snprintf(buf, buflen, bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_asprintf(strp, bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_from_string(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string) { return hwloc_bitmap_sscanf(bitmap, string); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_zero(hwloc_bitmap_t bitmap) { hwloc_bitmap_zero(bitmap); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_fill(hwloc_bitmap_t bitmap) { hwloc_bitmap_fill(bitmap); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_from_ulong(hwloc_bitmap_t bitmap, unsigned long mask) { hwloc_bitmap_from_ulong(bitmap, mask); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask) { hwloc_bitmap_from_ith_ulong(bitmap, i, mask); }
static __hwloc_inline unsigned __hwloc_attribute_deprecated long hwloc_cpuset_to_ulong(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_to_ulong(bitmap); }
static __hwloc_inline unsigned __hwloc_attribute_deprecated long hwloc_cpuset_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) { return hwloc_bitmap_to_ith_ulong(bitmap, i); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_cpu(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_only(bitmap, index_); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_all_but_cpu(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_allbut(bitmap, index_); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_set(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_set(bitmap, index_); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_set_range(hwloc_bitmap_t bitmap, unsigned begin, unsigned end) { hwloc_bitmap_set_range(bitmap, begin, end); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_set_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask) { hwloc_bitmap_set_ith_ulong(bitmap, i, mask); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_clr(hwloc_bitmap_t bitmap, unsigned index_) { hwloc_bitmap_clr(bitmap, index_); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_clr_range(hwloc_bitmap_t bitmap, unsigned begin, unsigned end) { hwloc_bitmap_clr_range(bitmap, begin, end); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isset(hwloc_const_bitmap_t bitmap, unsigned index_) { return hwloc_bitmap_isset(bitmap, index_); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_iszero(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_iszero(bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isfull(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_isfull(bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isequal(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_isequal(bitmap1, bitmap2); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_intersects(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_intersects(bitmap1, bitmap2); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_isincluded(hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) { return hwloc_bitmap_isincluded(sub_bitmap, super_bitmap); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_or(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_or(res, bitmap1, bitmap2); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_and(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_and(res, bitmap1, bitmap2); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_andnot(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_andnot(res, bitmap1, bitmap2); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_xor(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { hwloc_bitmap_xor(res, bitmap1, bitmap2); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_not(hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap) { hwloc_bitmap_not(res, bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_first(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_first(bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_last(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_last(bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_next(hwloc_const_bitmap_t bitmap, unsigned prev) { return hwloc_bitmap_next(bitmap, prev); }
static __hwloc_inline void __hwloc_attribute_deprecated hwloc_cpuset_singlify(hwloc_bitmap_t bitmap) { hwloc_bitmap_singlify(bitmap); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_compare_first(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_compare_first(bitmap1, bitmap2); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_compare(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) { return hwloc_bitmap_compare(bitmap1, bitmap2); }
static __hwloc_inline int __hwloc_attribute_deprecated hwloc_cpuset_weight(hwloc_const_bitmap_t bitmap) { return hwloc_bitmap_weight(bitmap); }

#define hwloc_cpuset_foreach_begin hwloc_bitmap_foreach_begin
#define hwloc_cpuset_foreach_end hwloc_bitmap_foreach_end

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* HWLOC_CPUSET_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/cuda.h
================================================
/*
 * Copyright © 2010-2017 Inria.  All rights reserved.
 * Copyright © 2010-2011 Université Bordeaux
 * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and the CUDA Driver API.
 *
 * Applications that use both hwloc and the CUDA Driver API may want to
 * include this file so as to get topology information for CUDA devices.
 *
 */

#ifndef HWLOC_CUDA_H
#define HWLOC_CUDA_H

#include <hwloc.h>
#include <hwloc/autogen/config.h>
#include <hwloc/helper.h>
#ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h>
#endif

#include <cuda.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API
 *
 * This interface offers ways to retrieve topology information about
 * CUDA devices when using the CUDA Driver API.
 *
 * @{
 */

/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
 *
 * Device \p cudevice must match the local machine.
 */
static __hwloc_inline int
hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
			      CUdevice cudevice, int *domain, int *bus, int *dev)
{
  CUresult cres;

#if CUDA_VERSION >= 4000
  cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice);
  if (cres != CUDA_SUCCESS) {
    errno = ENOSYS;
    return -1;
  }
#else
  *domain = 0;
#endif
  cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice);
  if (cres != CUDA_SUCCESS) {
    errno = ENOSYS;
    return -1;
  }
  cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice);
  if (cres != CUDA_SUCCESS) {
    errno = ENOSYS;
    return -1;
  }

  return 0;
}

/** \brief Get the CPU set of logical processors that are physically
 * close to device \p cudevice.
 *
 * Return the CPU set describing the locality of the CUDA device \p cudevice.
 *
 * Topology \p topology and device \p cudevice must match the local machine.
 * I/O devices detection and the CUDA component are not needed in the topology.
 *
 * The function only returns the locality of the device.
 * If more information about the device is needed, OS objects should
 * be used instead, see hwloc_cuda_get_device_osdev()
 * and hwloc_cuda_get_device_osdev_by_index().
 *
 * This function is currently only implemented in a meaningful way for
 * Linux; other systems will simply get a full cpuset.
 */
static __hwloc_inline int
hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
			     CUdevice cudevice, hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128
  char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX];
  FILE *sysfile = NULL;
  int domainid, busid, deviceid;

  if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid))
    return -1;

  if (!hwloc_topology_is_thissystem(topology)) {
    errno = EINVAL;
    return -1;
  }

  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid);
  sysfile = fopen(path, "r");
  if (!sysfile)
    return -1;

  if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0
      || hwloc_bitmap_iszero(set))
    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));

  fclose(sysfile);
#else
  /* Non-Linux systems simply get a full cpuset */
  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
  return 0;
}

/** \brief Get the hwloc PCI device object corresponding to the
 * CUDA device \p cudevice.
 *
 * Return the PCI device object describing the CUDA device \p cudevice.
 * Return NULL if there is none.
 *
 * Topology \p topology and device \p cudevice must match the local machine.
 * I/O devices detection must be enabled in topology \p topology.
 * The CUDA component is not needed in the topology.
 */
static __hwloc_inline hwloc_obj_t
hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
{
  int domain, bus, dev;

  if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
    return NULL;

  return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
}

/** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
 *
 * Return the hwloc OS device object that describes the given
 * CUDA device \p cudevice. Return NULL if there is none.
 *
 * Topology \p topology and device \p cudevice must match the local machine.
 * I/O devices detection and the CUDA component must be enabled in the topology.
 * If not, the locality of the object may still be found using
 * hwloc_cuda_get_device_cpuset().
 *
 * \note The corresponding hwloc PCI device may be found by looking
 * at the result parent pointer.
 */
static __hwloc_inline hwloc_obj_t
hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
{
	hwloc_obj_t osdev = NULL;
	int domain, bus, dev;

	if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
		return NULL;

	osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		hwloc_obj_t pcidev = osdev->parent;
		if (strncmp(osdev->name, "cuda", 4))
			continue;
		if (pcidev
		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
		    && (int) pcidev->attr->pcidev.domain == domain
		    && (int) pcidev->attr->pcidev.bus == bus
		    && (int) pcidev->attr->pcidev.dev == dev
		    && pcidev->attr->pcidev.func == 0)
			return osdev;
	}

	return NULL;
}

/** \brief Get the hwloc OS device object corresponding to the
 * CUDA device whose index is \p idx.
 *
 * Return the OS device object describing the CUDA device whose
 * index is \p idx. Return NULL if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection and the CUDA component must be enabled in the topology.
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 *
 * \note This function is identical to hwloc_cudart_get_device_osdev_by_index().
 */
static __hwloc_inline hwloc_obj_t
hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
{
	hwloc_obj_t osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
		    && osdev->name
		    && !strncmp("cuda", osdev->name, 4)
		    && atoi(osdev->name + 4) == (int) idx)
			return osdev;
	}
	return NULL;
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_CUDA_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/cudart.h
================================================
/*
 * Copyright © 2010-2017 Inria.  All rights reserved.
 * Copyright © 2010-2011 Université Bordeaux
 * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and the CUDA Runtime API.
 *
 * Applications that use both hwloc and the CUDA Runtime API may want to
 * include this file so as to get topology information for CUDA devices.
 *
 */

#ifndef HWLOC_CUDART_H
#define HWLOC_CUDART_H

#include <hwloc.h>
#include <hwloc/autogen/config.h>
#include <hwloc/helper.h>
#ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h>
#endif

#include <cuda.h> /* for CUDA_VERSION */
#include <cuda_runtime_api.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_cudart Interoperability with the CUDA Runtime API
 *
 * This interface offers ways to retrieve topology information about
 * CUDA devices when using the CUDA Runtime API.
 *
 * @{
 */

/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
 *
 * Device index \p idx must match the local machine.
 */
static __hwloc_inline int
hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
				int idx, int *domain, int *bus, int *dev)
{
  cudaError_t cerr;
  struct cudaDeviceProp prop;

  cerr = cudaGetDeviceProperties(&prop, idx);
  if (cerr) {
    errno = ENOSYS;
    return -1;
  }

#if CUDA_VERSION >= 4000
  *domain = prop.pciDomainID;
#else
  *domain = 0;
#endif

  *bus = prop.pciBusID;
  *dev = prop.pciDeviceID;

  return 0;
}

/** \brief Get the CPU set of logical processors that are physically
 * close to device \p idx.
 *
 * Return the CPU set describing the locality of the CUDA device
 * whose index is \p idx.
 *
 * Topology \p topology and device \p idx must match the local machine.
 * I/O devices detection and the CUDA component are not needed in the topology.
 *
 * The function only returns the locality of the device.
 * If more information about the device is needed, OS objects should
 * be used instead, see hwloc_cudart_get_device_osdev_by_index().
 *
 * This function is currently only implemented in a meaningful way for
 * Linux; other systems will simply get a full cpuset.
 */
static __hwloc_inline int
hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
			       int idx, hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX 128
  char path[HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX];
  FILE *sysfile = NULL;
  int domain, bus, dev;

  if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
    return -1;

  if (!hwloc_topology_is_thissystem(topology)) {
    errno = EINVAL;
    return -1;
  }

  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", (unsigned) domain, (unsigned) bus, (unsigned) dev);
  sysfile = fopen(path, "r");
  if (!sysfile)
    return -1;

  if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0
      || hwloc_bitmap_iszero(set))
    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));

  fclose(sysfile);
#else
  /* Non-Linux systems simply get a full cpuset */
  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
  return 0;
}

/** \brief Get the hwloc PCI device object corresponding to the
 * CUDA device whose index is \p idx.
 *
 * Return the PCI device object describing the CUDA device whose
 * index is \p idx. Return NULL if there is none.
 *
 * Topology \p topology and device \p idx must match the local machine.
 * I/O devices detection must be enabled in topology \p topology.
 * The CUDA component is not needed in the topology.
 */
static __hwloc_inline hwloc_obj_t
hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx)
{
  int domain, bus, dev;

  if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
    return NULL;

  return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
}

/** \brief Get the hwloc OS device object corresponding to the
 * CUDA device whose index is \p idx.
 *
 * Return the OS device object describing the CUDA device whose
 * index is \p idx. Return NULL if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection and the CUDA component must be enabled in the topology.
 * If not, the locality of the object may still be found using
 * hwloc_cudart_get_device_cpuset().
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 *
 * \note This function is identical to hwloc_cuda_get_device_osdev_by_index().
 */
static __hwloc_inline hwloc_obj_t
hwloc_cudart_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
{
	hwloc_obj_t osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
		    && osdev->name
		    && !strncmp("cuda", osdev->name, 4)
		    && atoi(osdev->name + 4) == (int) idx)
			return osdev;
	}
	return NULL;
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_CUDART_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/deprecated.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2014 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2010 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/**
 * This file contains the inline code of functions declared in hwloc.h
 */

#ifndef HWLOC_DEPRECATED_H
#define HWLOC_DEPRECATED_H

#ifndef HWLOC_H
#error Please include the main hwloc.h instead
#endif

#ifdef __cplusplus
extern "C" {
#endif

/* backward compat with v1.10 before Socket->Package renaming */
#define HWLOC_OBJ_SOCKET HWLOC_OBJ_PACKAGE
/* backward compat with v1.10 before Node->NUMANode clarification */
#define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE

/** \brief Return an object type from the string
 *
 * \return -1 if unrecognized.
 */
HWLOC_DECLSPEC hwloc_obj_type_t hwloc_obj_type_of_string (const char * string) __hwloc_attribute_pure __hwloc_attribute_deprecated;

/** \brief Stringify a given topology object into a human-readable form.
 *
 * \note This function is deprecated in favor of hwloc_obj_type_snprintf()
 * and hwloc_obj_attr_snprintf() since it is not very flexible and
 * only prints physical/OS indexes.
 *
 * Fill string \p string up to \p size characters with the description
 * of topology object \p obj in topology \p topology.
 *
 * If \p verbose is set, a longer description is used. Otherwise a
 * short description is used.
 *
 * \p indexprefix is used to prefix the \p os_index attribute number of
 * the object in the description. If \c NULL, the \c # character is used.
 *
 * If \p size is 0, \p string may safely be \c NULL.
 *
 * \return the number of character that were actually written if not truncating,
 * or that would have been written (not including the ending \\0).
 */
HWLOC_DECLSPEC int hwloc_obj_snprintf(char * __hwloc_restrict string, size_t size,
				      hwloc_topology_t topology, hwloc_obj_t obj,
				      const char * __hwloc_restrict indexprefix, int verbose) __hwloc_attribute_deprecated;

/** \brief Distribute \p n items over the topology under \p root
 *
 * Array \p cpuset will be filled with \p n cpusets recursively distributed
 * linearly over the topology under \p root, down to depth \p until (which can
 * be INT_MAX to distribute down to the finest level).
 *
 * This is typically useful when an application wants to distribute \p n
 * threads over a machine, giving each of them as much private cache as
 * possible and keeping them locally in number order.
 *
 * The caller may typically want to also call hwloc_bitmap_singlify()
 * before binding a thread so that it does not move at all.
 *
 * \note This function requires the \p root object to have a CPU set.
 */
static __hwloc_inline void
hwloc_distribute(hwloc_topology_t topology, hwloc_obj_t root, hwloc_cpuset_t *set, unsigned n, unsigned until) __hwloc_attribute_deprecated;
static __hwloc_inline void
hwloc_distribute(hwloc_topology_t topology, hwloc_obj_t root, hwloc_cpuset_t *set, unsigned n, unsigned until)
{
  hwloc_distrib(topology, &root, 1, set, n, until, 0);
}

/** \brief Distribute \p n items over the topology under \p roots
 *
 * This is the same as hwloc_distribute(), but takes an array of roots instead of
 * just one root.
 *
 * \note This function requires the \p roots objects to have a CPU set.
 */
static __hwloc_inline void
hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *roots, unsigned n_roots, hwloc_cpuset_t *set, unsigned n, unsigned until) __hwloc_attribute_deprecated;
static __hwloc_inline void
hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *roots, unsigned n_roots, hwloc_cpuset_t *set, unsigned n, unsigned until)
{
  hwloc_distrib(topology, roots, n_roots, set, n, until, 0);
}

#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_DEPRECATED_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/diff.h
================================================
/*
 * Copyright © 2013-2016 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Topology differences.
 */

#ifndef HWLOC_DIFF_H
#define HWLOC_DIFF_H

#ifndef HWLOC_H
#error Please include the main hwloc.h instead
#endif


#ifdef __cplusplus
extern "C" {
#elif 0
}
#endif


/** \defgroup hwlocality_diff Topology differences
 *
 * Applications that manipulate many similar topologies, for instance
 * one for each node of a homogeneous cluster, may want to compress
 * topologies to reduce the memory footprint.
 *
 * This file offers a way to manipulate the difference between topologies
 * and export/import it to/from XML.
 * Compression may therefore be achieved by storing one topology
 * entirely while the others are only described by their differences
 * with the former.
 * The actual topology can be reconstructed when actually needed by
 * applying the precomputed difference to the reference topology.
 *
 * This interface targets very similar nodes.
 * Only very simple differences between topologies are actually
 * supported, for instance a change in the memory size, the name
 * of the object, or some info attribute.
 * More complex differences such as adding or removing objects cannot
 * be represented in the difference structures and therefore return
 * errors.
 *
 * It means that there is no need to apply the difference when
 * looking at the tree organization (how many levels, how many
 * objects per level, what kind of objects, CPU and node sets, etc)
 * and when binding to objects.
 * However the difference must be applied when looking at object
 * attributes such as the name, the memory size or info attributes.
 *
 * @{
 */


/** \brief Type of one object attribute difference.
 */
typedef enum hwloc_topology_diff_obj_attr_type_e {
  /** \brief The object local memory is modified.
   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_uint64_s
   * (and the index field is ignored).
   */
  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE,

  /** \brief The object name is modified.
   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s
   * (and the name field is ignored).
   */

  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME,
  /** \brief the value of an info attribute is modified.
   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s.
   */
  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO
} hwloc_topology_diff_obj_attr_type_t;

/** \brief One object attribute difference.
 */
union hwloc_topology_diff_obj_attr_u {
  struct hwloc_topology_diff_obj_attr_generic_s {
    /* each part of the union must start with these */
    hwloc_topology_diff_obj_attr_type_t type;
  } generic;

  /** \brief Integer attribute modification with an optional index. */
  struct hwloc_topology_diff_obj_attr_uint64_s {
    /* used for storing integer attributes */
    hwloc_topology_diff_obj_attr_type_t type;
    hwloc_uint64_t index; /* not used for SIZE */
    hwloc_uint64_t oldvalue;
    hwloc_uint64_t newvalue;
  } uint64;

  /** \brief String attribute modification with an optional name */
  struct hwloc_topology_diff_obj_attr_string_s {
    /* used for storing name and info pairs */
    hwloc_topology_diff_obj_attr_type_t type;
    char *name; /* not used for NAME */
    char *oldvalue;
    char *newvalue;
  } string;
};


/** \brief Type of one element of a difference list.
 */
typedef enum hwloc_topology_diff_type_e {
  /** \brief An object attribute was changed.
   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s.
   */
  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR,

  /** \brief The difference is too complex,
   * it cannot be represented. The difference below
   * this object has not been checked.
   * hwloc_topology_diff_build() will return 1.
   *
   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s.
   */
  HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX
} hwloc_topology_diff_type_t;

/** \brief One element of a difference list between two topologies.
 */
typedef union hwloc_topology_diff_u {
  struct hwloc_topology_diff_generic_s {
    /* each part of the union must start with these */
    hwloc_topology_diff_type_t type;
    union hwloc_topology_diff_u * next; /* pointer to the next element of the list, or NULL */
  } generic;

  /* A difference in an object attribute. */
  struct hwloc_topology_diff_obj_attr_s {
    hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_OBJ_ATTR */
    union hwloc_topology_diff_u * next;
    /* List of attribute differences for a single object */
    unsigned obj_depth;
    unsigned obj_index;
    union hwloc_topology_diff_obj_attr_u diff;
  } obj_attr;

  /* A difference that is too complex. */
  struct hwloc_topology_diff_too_complex_s {
    hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX */
    union hwloc_topology_diff_u * next;
    /* Where we had to stop computing the diff in the first topology */
    unsigned obj_depth;
    unsigned obj_index;
  } too_complex;
} * hwloc_topology_diff_t;


/** \brief Compute the difference between 2 topologies.
 *
 * The difference is stored as a list of ::hwloc_topology_diff_t entries
 * starting at \p diff.
 * It is computed by doing a depth-first traversal of both topology trees
 * simultaneously.
 *
 * If the difference between 2 objects is too complex to be represented
 * (for instance if some objects have different types, or different numbers
 * of children), a special diff entry of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX
 * is queued.
 * The computation of the diff does not continue below these objects.
 * So each such diff entry means that the difference between two subtrees
 * could not be computed.
 *
 * \return 0 if the difference can be represented properly.
 *
 * \return 0 with \p diff pointing to NULL if there is no difference
 * between the topologies.
 *
 * \return 1 if the difference is too complex (see above). Some entries in
 * the list will be of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX.
 *
 * \return -1 on any other error.
 *
 * \note \p flags is currently not used. It should be 0.
 *
 * \note The output diff has to be freed with hwloc_topology_diff_destroy().
 *
 * \note The output diff can only be exported to XML or passed to
 * hwloc_topology_diff_apply() if 0 was returned, i.e. if no entry of type
 * ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is listed.
 *
 * \note The output diff may be modified by removing some entries from
 * the list. The removed entries should be freed by passing them to
 * to hwloc_topology_diff_destroy() (possible as another list).
*/
HWLOC_DECLSPEC int hwloc_topology_diff_build(hwloc_topology_t topology, hwloc_topology_t newtopology, unsigned long flags, hwloc_topology_diff_t *diff);

/** \brief Flags to be given to hwloc_topology_diff_apply().
 */
enum hwloc_topology_diff_apply_flags_e {
  /** \brief Apply topology diff in reverse direction.
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE = (1UL<<0)
};

/** \brief Apply a topology diff to an existing topology.
 *
 * \p flags is an OR'ed set of ::hwloc_topology_diff_apply_flags_e.
 *
 * The new topology is modified in place. hwloc_topology_dup()
 * may be used to duplicate it before patching.
 *
 * If the difference cannot be applied entirely, all previous applied
 * elements are unapplied before returning.
 *
 * \return 0 on success.
 *
 * \return -N if applying the difference failed while trying
 * to apply the N-th part of the difference. For instance -1
 * is returned if the very first difference element could not
 * be applied.
 */
HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags);

/** \brief Destroy a list of topology differences.
 *
 * \note The \p topology parameter must be a valid topology
 * but it is not required that it is related to \p diff.
 */
HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_t topology, hwloc_topology_diff_t diff);

/** \brief Load a list of topology differences from a XML file.
 *
 * If not \c NULL, \p refname will be filled with the identifier
 * string of the reference topology for the difference file,
 * if any was specified in the XML file.
 * This identifier is usually the name of the other XML file
 * that contains the reference topology.
 *
 * \note The \p topology parameter must be a valid topology
 * but it is not required that it is related to \p diff.
 *
 * \note the pointer returned in refname should later be freed
 * by the caller.
 */
HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(hwloc_topology_t topology, const char *xmlpath, hwloc_topology_diff_t *diff, char **refname);

/** \brief Export a list of topology differences to a XML file.
 *
 * If not \c NULL, \p refname defines an identifier string
 * for the reference topology which was used as a base when
 * computing this difference.
 * This identifier is usually the name of the other XML file
 * that contains the reference topology.
 * This attribute is given back when reading the diff from XML.
 *
 * \note The \p topology parameter must be a valid topology
 * but it is not required that it is related to \p diff.
 */
HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_t topology, hwloc_topology_diff_t diff, const char *refname, const char *xmlpath);

/** \brief Load a list of topology differences from a XML buffer.
 *
 * If not \c NULL, \p refname will be filled with the identifier
 * string of the reference topology for the difference file,
 * if any was specified in the XML file.
 * This identifier is usually the name of the other XML file
 * that contains the reference topology.
 *
 * \note The \p topology parameter must be a valid topology
 * but it is not required that it is related to \p diff.
 *
 * \note the pointer returned in refname should later be freed
 * by the caller.
  */
HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(hwloc_topology_t topology, const char *xmlbuffer, int buflen, hwloc_topology_diff_t *diff, char **refname);

/** \brief Export a list of topology differences to a XML buffer.
 *
 * If not \c NULL, \p refname defines an identifier string
 * for the reference topology which was used as a base when
 * computing this difference.
 * This identifier is usually the name of the other XML file
 * that contains the reference topology.
 * This attribute is given back when reading the diff from XML.
 *
 * \note The XML buffer should later be freed with hwloc_free_xmlbuffer().
 *
 * \note The \p topology parameter must be a valid topology
 * but it is not required that it is related to \p diff.
 */
HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_t topology, hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen);

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_DIFF_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/gl.h
================================================
/*
 * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
 * Copyright © 2012-2013 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and OpenGL displays.
 *
 * Applications that use both hwloc and OpenGL may want to include
 * this file so as to get topology information for OpenGL displays.
 */

#ifndef HWLOC_GL_H
#define HWLOC_GL_H

#include <hwloc.h>

#include <stdio.h>
#include <string.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_gl Interoperability with OpenGL displays
 *
 * This interface offers ways to retrieve topology information about
 * OpenGL displays.
 *
 * Only the NVIDIA display locality information is currently available,
 * using the NV-CONTROL X11 extension and the NVCtrl library.
 *
 * @{
 */

/** \brief Get the hwloc OS device object corresponding to the
 * OpenGL display given by port and device index.
 *
 * Return the OS device object describing the OpenGL display
 * whose port (server) is \p port and device (screen) is \p device.
 * Return NULL if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection and the GL component must be enabled in the topology.
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 */
static __hwloc_inline hwloc_obj_t
hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology,
					  unsigned port, unsigned device)
{
        unsigned x = (unsigned) -1, y = (unsigned) -1;
        hwloc_obj_t osdev = NULL;
        while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
                if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
                    && osdev->name
                    && sscanf(osdev->name, ":%u.%u", &x, &y) == 2
                    && port == x && device == y)
                        return osdev;
        }
	errno = EINVAL;
        return NULL;
}

/** \brief Get the hwloc OS device object corresponding to the
 * OpenGL display given by name.
 *
 * Return the OS device object describing the OpenGL display
 * whose name is \p name, built as ":port.device" such as ":0.0" .
 * Return NULL if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection and the GL component must be enabled in the topology.
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 */
static __hwloc_inline hwloc_obj_t
hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology,
				   const char *name)
{
        hwloc_obj_t osdev = NULL;
        while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
                if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
                    && osdev->name
                    && !strcmp(name, osdev->name))
                        return osdev;
        }
	errno = EINVAL;
        return NULL;
}

/** \brief Get the OpenGL display port and device corresponding
 * to the given hwloc OS object.
 *
 * Return the OpenGL display port (server) in \p port and device (screen)
 * in \p screen that correspond to the given hwloc OS device object.
 * Return \c -1 if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection and the GL component must be enabled in the topology.
 */
static __hwloc_inline int
hwloc_gl_get_display_by_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
			      hwloc_obj_t osdev,
			      unsigned *port, unsigned *device)
{
	unsigned x = -1, y = -1;
	if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
	    && sscanf(osdev->name, ":%u.%u", &x, &y) == 2) {
		*port = x;
		*device = y;
		return 0;
	}
	errno = EINVAL;
	return -1;
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_GL_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/glibc-sched.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2013 inria.  All rights reserved.
 * Copyright © 2009-2011 Université Bordeaux
 * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and glibc scheduling routines.
 *
 * Applications that use both hwloc and glibc scheduling routines such as
 * sched_getaffinity() or pthread_attr_setaffinity_np() may want to include
 * this file so as to ease conversion between their respective types.
 */

#ifndef HWLOC_GLIBC_SCHED_H
#define HWLOC_GLIBC_SCHED_H

#include <hwloc.h>
#include <hwloc/helper.h>
#include <assert.h>

#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority)
#error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h
#endif


#ifdef __cplusplus
extern "C" {
#endif


#ifdef HWLOC_HAVE_CPU_SET


/** \defgroup hwlocality_glibc_sched Interoperability with glibc sched affinity
 *
 * This interface offers ways to convert between hwloc cpusets and glibc cpusets
 * such as those manipulated by sched_getaffinity() or pthread_attr_setaffinity_np().
 *
 * \note Topology \p topology must match the current machine.
 *
 * @{
 */


/** \brief Convert hwloc CPU set \p toposet into glibc sched affinity CPU set \p schedset
 *
 * This function may be used before calling sched_setaffinity or any other function
 * that takes a cpu_set_t as input parameter.
 *
 * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
 */
static __hwloc_inline int
hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset,
				    cpu_set_t *schedset, size_t schedsetsize)
{
#ifdef CPU_ZERO_S
  unsigned cpu;
  CPU_ZERO_S(schedsetsize, schedset);
  hwloc_bitmap_foreach_begin(cpu, hwlocset)
    CPU_SET_S(cpu, schedsetsize, schedset);
  hwloc_bitmap_foreach_end();
#else /* !CPU_ZERO_S */
  unsigned cpu;
  CPU_ZERO(schedset);
  assert(schedsetsize == sizeof(cpu_set_t));
  hwloc_bitmap_foreach_begin(cpu, hwlocset)
    CPU_SET(cpu, schedset);
  hwloc_bitmap_foreach_end();
#endif /* !CPU_ZERO_S */
  return 0;
}

/** \brief Convert glibc sched affinity CPU set \p schedset into hwloc CPU set
 *
 * This function may be used before calling sched_setaffinity  or any other function
 * that takes a cpu_set_t  as input parameter.
 *
 * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
 */
static __hwloc_inline int
hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset,
                                       const cpu_set_t *schedset, size_t schedsetsize)
{
  int cpu;
#ifdef CPU_ZERO_S
  int count;
#endif
  hwloc_bitmap_zero(hwlocset);
#ifdef CPU_ZERO_S
  count = CPU_COUNT_S(schedsetsize, schedset);
  cpu = 0;
  while (count) {
    if (CPU_ISSET_S(cpu, schedsetsize, schedset)) {
      hwloc_bitmap_set(hwlocset, cpu);
      count--;
    }
    cpu++;
  }
#else /* !CPU_ZERO_S */
  /* sched.h does not support dynamic cpu_set_t (introduced in glibc 2.7),
   * assume we have a very old interface without CPU_COUNT (added in 2.6)
   */
  assert(schedsetsize == sizeof(cpu_set_t));
  for(cpu=0; cpu<CPU_SETSIZE; cpu++)
    if (CPU_ISSET(cpu, schedset))
      hwloc_bitmap_set(hwlocset, cpu);
#endif /* !CPU_ZERO_S */
  return 0;
}

/** @} */


#endif /* CPU_SET */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_GLIBC_SCHED_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/helper.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2016 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2010 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief High-level hwloc traversal helpers.
 */

#ifndef HWLOC_HELPER_H
#define HWLOC_HELPER_H

#ifndef HWLOC_H
#error Please include the main hwloc.h instead
#endif

#include <stdlib.h>
#include <errno.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_helper_find_inside Finding Objects inside a CPU set
 * @{
 */

/** \brief Get the first largest object included in the given cpuset \p set.
 *
 * \return the first object that is included in \p set and whose parent is not.
 *
 * This is convenient for iterating over all largest objects within a CPU set
 * by doing a loop getting the first largest object and clearing its CPU set
 * from the remaining CPU set.
 *
 * \note This function cannot work if the root object does not have a CPU set,
 * e.g. if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set)
{
  hwloc_obj_t obj = hwloc_get_root_obj(topology);
  if (!obj->cpuset || !hwloc_bitmap_intersects(obj->cpuset, set))
    return NULL;
  while (!hwloc_bitmap_isincluded(obj->cpuset, set)) {
    /* while the object intersects without being included, look at its children */
    hwloc_obj_t child = obj->first_child;
    while (child) {
      if (child->cpuset && hwloc_bitmap_intersects(child->cpuset, set))
	break;
      child = child->next_sibling;
    }
    if (!child)
      /* no child intersects, return their father */
      return obj;
    /* found one intersecting child, look at its children */
    obj = child;
  }
  /* obj is included, return it */
  return obj;
}

/** \brief Get the set of largest objects covering exactly a given cpuset \p set
 *
 * \return the number of objects returned in \p objs.
 *
 * \note This function cannot work if the root object does not have a CPU set,
 * e.g. if the topology is made of different machines.
 */
HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set,
						 hwloc_obj_t * __hwloc_restrict objs, int max);

/** \brief Return the next object at depth \p depth included in CPU set \p set.
 *
 * If \p prev is \c NULL, return the first object at depth \p depth
 * included in \p set.  The next invokation should pass the previous
 * return value in \p prev so as to obtain the next object in \p set.
 *
 * \note Objects with empty CPU sets are ignored
 * (otherwise they would be considered included in any given set).
 *
 * \note This function cannot work if objects at the given depth do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
					   unsigned depth, hwloc_obj_t prev)
{
  hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
  if (!next || !next->cpuset)
    return NULL;
  while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set)))
    next = next->next_cousin;
  return next;
}

/** \brief Return the next object of type \p type included in CPU set \p set.
 *
 * If there are multiple or no depth for given type, return \c NULL
 * and let the caller fallback to
 * hwloc_get_next_obj_inside_cpuset_by_depth().
 *
 * \note Objects with empty CPU sets are ignored
 * (otherwise they would be considered included in any given set).
 *
 * \note This function cannot work if objects of the given type do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
					  hwloc_obj_type_t type, hwloc_obj_t prev)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
    return NULL;
  return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev);
}

/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set.
 *
 * \note Objects with empty CPU sets are ignored
 * (otherwise they would be considered included in any given set).
 *
 * \note This function cannot work if objects at the given depth do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
				      unsigned depth, unsigned idx) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
				      unsigned depth, unsigned idx)
{
  hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
  unsigned count = 0;
  if (!obj || !obj->cpuset)
    return NULL;
  while (obj) {
    if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) {
      if (count == idx)
	return obj;
      count++;
    }
    obj = obj->next_cousin;
  }
  return NULL;
}

/** \brief Return the \p idx -th object of type \p type included in CPU set \p set.
 *
 * If there are multiple or no depth for given type, return \c NULL
 * and let the caller fallback to
 * hwloc_get_obj_inside_cpuset_by_depth().
 *
 * \note Objects with empty CPU sets are ignored
 * (otherwise they would be considered included in any given set).
 *
 * \note This function cannot work if objects of the given type do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
				     hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
				     hwloc_obj_type_t type, unsigned idx)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
    return NULL;
  return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx);
}

/** \brief Return the number of objects at depth \p depth included in CPU set \p set.
 *
 * \note Objects with empty CPU sets are ignored
 * (otherwise they would be considered included in any given set).
 *
 * \note This function cannot work if objects at the given depth do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline unsigned
hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
					 unsigned depth) __hwloc_attribute_pure;
static __hwloc_inline unsigned
hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
					 unsigned depth)
{
  hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
  unsigned count = 0;
  if (!obj || !obj->cpuset)
    return 0;
  while (obj) {
    if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set))
      count++;
    obj = obj->next_cousin;
  }
  return count;
}

/** \brief Return the number of objects of type \p type included in CPU set \p set.
 *
 * If no object for that type exists inside CPU set \p set, 0 is
 * returned.  If there are several levels with objects of that type
 * inside CPU set \p set, -1 is returned.
 *
 * \note Objects with empty CPU sets are ignored
 * (otherwise they would be considered included in any given set).
 *
 * \note This function cannot work if objects of the given type do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline int
hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
					hwloc_obj_type_t type) __hwloc_attribute_pure;
static __hwloc_inline int
hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
					hwloc_obj_type_t type)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
    return 0;
  if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
    return -1; /* FIXME: agregate nbobjs from different levels? */
  return hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth);
}

/** \brief Return the logical index among the objects included in CPU set \p set.
 *
 * Consult all objects in the same level as \p obj and inside CPU set \p set
 * in the logical order, and return the index of \p obj within them.
 * If \p set covers the entire topology, this is the logical index of \p obj.
 * Otherwise, this is similar to a logical index within the part of the topology
 * defined by CPU set \p set.
 *
 * \note Objects with empty CPU sets are ignored
 * (otherwise they would be considered included in any given set).
 */
static __hwloc_inline int
hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
				   hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline int
hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
				   hwloc_obj_t obj)
{
  int idx = 0;
  if (!hwloc_bitmap_isincluded(obj->cpuset, set))
    return -1;
  /* count how many objects are inside the cpuset on the way from us to the beginning of the level */
  while ((obj = obj->prev_cousin) != NULL)
    if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set))
      idx++;
  return idx;
}

/** @} */


/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set
 * @{
 */

/** \brief Get the child covering at least CPU set \p set.
 *
 * \return \c NULL if no child matches or if \p set is empty.
 *
 * \note This function cannot work if parent does not have a CPU set.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
				hwloc_obj_t parent) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
				hwloc_obj_t parent)
{
  hwloc_obj_t child;
  if (!parent->cpuset || hwloc_bitmap_iszero(set))
    return NULL;
  child = parent->first_child;
  while (child) {
    if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset))
      return child;
    child = child->next_sibling;
  }
  return NULL;
}

/** \brief Get the lowest object covering at least CPU set \p set
 *
 * \return \c NULL if no object matches or if \p set is empty.
 *
 * \note This function cannot work if the root object does not have a CPU set,
 * e.g. if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
{
  struct hwloc_obj *current = hwloc_get_root_obj(topology);
  if (hwloc_bitmap_iszero(set) || !current->cpuset || !hwloc_bitmap_isincluded(set, current->cpuset))
    return NULL;
  while (1) {
    hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current);
    if (!child)
      return current;
    current = child;
  }
}

/** \brief Iterate through same-depth objects covering at least CPU set \p set
 *
 * If object \p prev is \c NULL, return the first object at depth \p
 * depth covering at least part of CPU set \p set.  The next
 * invokation should pass the previous return value in \p prev so as
 * to obtain the next object covering at least another part of \p set.
 *
 * \note This function cannot work if objects at the given depth do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set,
					    unsigned depth, hwloc_obj_t prev)
{
  hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
  if (!next || !next->cpuset)
    return NULL;
  while (next && !hwloc_bitmap_intersects(set, next->cpuset))
    next = next->next_cousin;
  return next;
}

/** \brief Iterate through same-type objects covering at least CPU set \p set
 *
 * If object \p prev is \c NULL, return the first object of type \p
 * type covering at least part of CPU set \p set.  The next invokation
 * should pass the previous return value in \p prev so as to obtain
 * the next object of type \p type covering at least another part of
 * \p set.
 *
 * If there are no or multiple depths for type \p type, \c NULL is returned.
 * The caller may fallback to hwloc_get_next_obj_covering_cpuset_by_depth()
 * for each depth.
 *
 * \note This function cannot work if objects of the given type do
 * not have CPU sets or if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set,
					   hwloc_obj_type_t type, hwloc_obj_t prev)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
    return NULL;
  return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev);
}

/** @} */


/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects
 * @{
 *
 * Be sure to see the figure in \ref termsanddefs that shows a
 * complete topology tree, including depths, child/sibling/cousin
 * relationships, and an example of an asymmetric topology where one
 * package has fewer caches than its peers.
 */

/** \brief Returns the ancestor object of \p obj at depth \p depth. */
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj)
{
  hwloc_obj_t ancestor = obj;
  if (obj->depth < depth)
    return NULL;
  while (ancestor && ancestor->depth > depth)
    ancestor = ancestor->parent;
  return ancestor;
}

/** \brief Returns the ancestor object of \p obj with type \p type. */
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj)
{
  hwloc_obj_t ancestor = obj->parent;
  while (ancestor && ancestor->type != type)
    ancestor = ancestor->parent;
  return ancestor;
}

/** \brief Returns the common parent object to objects \p obj1 and \p obj2 */
static __hwloc_inline hwloc_obj_t
hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2)
{
  /* the loop isn't so easy since intermediate ancestors may have
   * different depth, causing us to alternate between using obj1->parent
   * and obj2->parent. Also, even if at some point we find ancestors of
   * of the same depth, their ancestors may have different depth again.
   */
  while (obj1 != obj2) {
    while (obj1->depth > obj2->depth)
      obj1 = obj1->parent;
    while (obj2->depth > obj1->depth)
      obj2 = obj2->parent;
    if (obj1 != obj2 && obj1->depth == obj2->depth) {
      obj1 = obj1->parent;
      obj2 = obj2->parent;
    }
  }
  return obj1;
}

/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root.
 *
 * \note This function assumes that both \p obj and \p subtree_root have a \p cpuset.
 */
static __hwloc_inline int
hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure;
static __hwloc_inline int
hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root)
{
  return hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset);
}

/** \brief Return the next child.
 *
 * If \p prev is \c NULL, return the first child.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev)
{
  if (!prev)
    return parent->first_child;
  if (prev->parent != parent)
    return NULL;
  return prev->next_sibling;
}

/** @} */


/** \defgroup hwlocality_helper_find_cache Looking at Cache Objects
 * @{
 */

/** \brief Find the depth of cache objects matching cache depth and type.
 *
 * Return the depth of the topology level that contains cache objects
 * whose attributes match \p cachedepth and \p cachetype. This function
 * intends to disambiguate the case where hwloc_get_type_depth() returns
 * ::HWLOC_TYPE_DEPTH_MULTIPLE.
 *
 * If no cache level matches, ::HWLOC_TYPE_DEPTH_UNKNOWN is returned.
 *
 * If \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED, the depth of the
 * unique matching unified cache level is returned.
 *
 * If \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION,
 * either a matching cache, or a unified cache is returned.
 *
 * If \p cachetype is \c -1, it is ignored and multiple levels may
 * match. The function returns either the depth of a uniquely matching
 * level or ::HWLOC_TYPE_DEPTH_MULTIPLE.
 */
static __hwloc_inline int
hwloc_get_cache_type_depth (hwloc_topology_t topology,
			    unsigned cachelevel, hwloc_obj_cache_type_t cachetype)
{
  int depth;
  int found = HWLOC_TYPE_DEPTH_UNKNOWN;
  for (depth=0; ; depth++) {
    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
    if (!obj)
      break;
    if (obj->type != HWLOC_OBJ_CACHE || obj->attr->cache.depth != cachelevel)
      /* doesn't match, try next depth */
      continue;
    if (cachetype == (hwloc_obj_cache_type_t) -1) {
      if (found != HWLOC_TYPE_DEPTH_UNKNOWN) {
	/* second match, return MULTIPLE */
        return HWLOC_TYPE_DEPTH_MULTIPLE;
      }
      /* first match, mark it as found */
      found = depth;
      continue;
    }
    if (obj->attr->cache.type == cachetype || obj->attr->cache.type == HWLOC_OBJ_CACHE_UNIFIED)
      /* exact match (either unified is alone, or we match instruction or data), return immediately */
      return depth;
  }
  /* went to the bottom, return what we found */
  return found;
}

/** \brief Get the first cache covering a cpuset \p set
 *
 * \return \c NULL if no cache matches.
 *
 * \note This function cannot work if the root object does not have a CPU set,
 * e.g. if the topology is made of different machines.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
{
  hwloc_obj_t current = hwloc_get_obj_covering_cpuset(topology, set);
  while (current) {
    if (current->type == HWLOC_OBJ_CACHE)
      return current;
    current = current->parent;
  }
  return NULL;
}

/** \brief Get the first cache shared between an object and somebody else.
 *
 * \return \c NULL if no cache matches or if an invalid object is given.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj)
{
  hwloc_obj_t current = obj->parent;
  if (!obj->cpuset)
    return NULL;
  while (current && current->cpuset) {
    if (!hwloc_bitmap_isequal(current->cpuset, obj->cpuset)
        && current->type == HWLOC_OBJ_CACHE)
      return current;
    current = current->parent;
  }
  return NULL;
}

/** @} */


/** \defgroup hwlocality_helper_find_misc Finding objects, miscellaneous helpers
 * @{
 *
 * Be sure to see the figure in \ref termsanddefs that shows a
 * complete topology tree, including depths, child/sibling/cousin
 * relationships, and an example of an asymmetric topology where one
 * package has fewer caches than its peers.
 */

/** \brief Returns the object of type ::HWLOC_OBJ_PU with \p os_index.
 *
 * This function is useful for converting a CPU set into the PU
 * objects it contains.
 * When retrieving the current binding (e.g. with hwloc_get_cpubind()),
 * one may iterate over the bits of the resulting CPU set with
 * hwloc_bitmap_foreach_begin(), and find the corresponding PUs
 * with this function.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index)
{
  hwloc_obj_t obj = NULL;
  while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PU, obj)) != NULL)
    if (obj->os_index == os_index)
      return obj;
  return NULL;
}

/** \brief Returns the object of type ::HWLOC_OBJ_NUMANODE with \p os_index.
 *
 * This function is useful for converting a nodeset into the NUMA node
 * objects it contains.
 * When retrieving the current binding (e.g. with hwloc_get_membind_nodeset()),
 * one may iterate over the bits of the resulting nodeset with
 * hwloc_bitmap_foreach_begin(), and find the corresponding NUMA nodes
 * with this function.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index)
{
  hwloc_obj_t obj = NULL;
  while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL)
    if (obj->os_index == os_index)
      return obj;
  return NULL;
}

/** \brief Do a depth-first traversal of the topology to find and sort
 *
 * all objects that are at the same depth than \p src.
 * Report in \p objs up to \p max physically closest ones to \p src.
 *
 * \return the number of objects returned in \p objs.
 *
 * \return 0 if \p src is an I/O object.
 *
 * \note This function requires the \p src object to have a CPU set.
 */
/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */
HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max);

/** \brief Find an object below another object, both specified by types and indexes.
 *
 * Start from the top system object and find object of type \p type1
 * and logical index \p idx1.  Then look below this object and find another
 * object of type \p type2 and logical index \p idx2.  Indexes are specified
 * within the parent, not withing the entire system.
 *
 * For instance, if type1 is PACKAGE, idx1 is 2, type2 is CORE and idx2
 * is 3, return the fourth core object below the third package.
 *
 * \note This function requires these objects to have a CPU set.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_by_type (hwloc_topology_t topology,
			     hwloc_obj_type_t type1, unsigned idx1,
			     hwloc_obj_type_t type2, unsigned idx2) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_by_type (hwloc_topology_t topology,
			     hwloc_obj_type_t type1, unsigned idx1,
			     hwloc_obj_type_t type2, unsigned idx2)
{
  hwloc_obj_t obj;
  obj = hwloc_get_obj_by_type (topology, type1, idx1);
  if (!obj || !obj->cpuset)
    return NULL;
  return hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, type2, idx2);
}

/** \brief Find an object below a chain of objects specified by types and indexes.
 *
 * This is a generalized version of hwloc_get_obj_below_by_type().
 *
 * Arrays \p typev and \p idxv must contain \p nr types and indexes.
 *
 * Start from the top system object and walk the arrays \p typev and \p idxv.
 * For each type and logical index couple in the arrays, look under the previously found
 * object to find the index-th object of the given type.
 * Indexes are specified within the parent, not withing the entire system.
 *
 * For instance, if nr is 3, typev contains NODE, PACKAGE and CORE,
 * and idxv contains 0, 1 and 2, return the third core object below
 * the second package below the first NUMA node.
 *
 * \note This function requires all these objects and the root object
 * to have a CPU set.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv)
{
  hwloc_obj_t obj = hwloc_get_root_obj(topology);
  int i;
  for(i=0; i<nr; i++) {
    if (!obj || !obj->cpuset)
      return NULL;
    obj = hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, typev[i], idxv[i]);
  }
  return obj;
}

/** @} */


/** \defgroup hwlocality_helper_distribute Distributing items over a topology
 * @{
 */

/** \brief Flags to be given to hwloc_distrib().
 */
enum hwloc_distrib_flags_e {
  /** \brief Distrib in reverse order, starting from the last objects.
   * \hideinitializer
   */
  HWLOC_DISTRIB_FLAG_REVERSE = (1UL<<0)
};

/** \brief Distribute \p n items over the topology under \p roots
 *
 * Array \p set will be filled with \p n cpusets recursively distributed
 * linearly over the topology under objects \p roots, down to depth \p until
 * (which can be INT_MAX to distribute down to the finest level).
 *
 * \p n_roots is usually 1 and \p roots only contains the topology root object
 * so as to distribute over the entire topology.
 *
 * This is typically useful when an application wants to distribute \p n
 * threads over a machine, giving each of them as much private cache as
 * possible and keeping them locally in number order.
 *
 * The caller may typically want to also call hwloc_bitmap_singlify()
 * before binding a thread so that it does not move at all.
 *
 * \p flags should be 0 or a OR'ed set of ::hwloc_distrib_flags_e.
 *
 * \note This function requires the \p roots objects to have a CPU set.
 *
 * \note This function replaces the now deprecated hwloc_distribute()
 * and hwloc_distributev() functions.
 */
static __hwloc_inline int
hwloc_distrib(hwloc_topology_t topology,
	      hwloc_obj_t *roots, unsigned n_roots,
	      hwloc_cpuset_t *set,
	      unsigned n,
	      unsigned until, unsigned long flags)
{
  unsigned i;
  unsigned tot_weight;
  unsigned given, givenweight;
  hwloc_cpuset_t *cpusetp = set;

  if (flags & ~HWLOC_DISTRIB_FLAG_REVERSE) {
    errno = EINVAL;
    return -1;
  }

  tot_weight = 0;
  for (i = 0; i < n_roots; i++)
    if (roots[i]->cpuset)
      tot_weight += hwloc_bitmap_weight(roots[i]->cpuset);

  for (i = 0, given = 0, givenweight = 0; i < n_roots; i++) {
    unsigned chunk, weight;
    hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i];
    hwloc_cpuset_t cpuset = root->cpuset;
    if (!cpuset)
      continue;
    weight = hwloc_bitmap_weight(cpuset);
    if (!weight)
      continue;
    /* Give to root a chunk proportional to its weight.
     * If previous chunks got rounded-up, we may get a bit less. */
    chunk = (( (givenweight+weight) * n  + tot_weight-1) / tot_weight)
          - ((  givenweight         * n  + tot_weight-1) / tot_weight);
    if (!root->arity || chunk <= 1 || root->depth >= until) {
      /* We can't split any more, put everything there.  */
      if (chunk) {
	/* Fill cpusets with ours */
	unsigned j;
	for (j=0; j < chunk; j++)
	  cpusetp[j] = hwloc_bitmap_dup(cpuset);
      } else {
	/* We got no chunk, just merge our cpuset to a previous one
	 * (the first chunk cannot be empty)
	 * so that this root doesn't get ignored.
	 */
	assert(given);
	hwloc_bitmap_or(cpusetp[-1], cpusetp[-1], cpuset);
      }
    } else {
      /* Still more to distribute, recurse into children */
      hwloc_distrib(topology, root->children, root->arity, cpusetp, chunk, until, flags);
    }
    cpusetp += chunk;
    given += chunk;
    givenweight += weight;
  }

  return 0;
}

/** @} */


/** \defgroup hwlocality_helper_topology_sets CPU and node sets of entire topologies
 * @{
 */
/** \brief Get complete CPU set
 *
 * \return the complete CPU set of logical processors of the system. If the
 * topology is the result of a combination of several systems, NULL is
 * returned.
 *
 * \note The returned cpuset is not newly allocated and should thus not be
 * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
 */
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_complete_cpuset(hwloc_topology_t topology)
{
  return hwloc_get_root_obj(topology)->complete_cpuset;
}

/** \brief Get topology CPU set
 *
 * \return the CPU set of logical processors of the system for which hwloc
 * provides topology information. This is equivalent to the cpuset of the
 * system object. If the topology is the result of a combination of several
 * systems, NULL is returned.
 *
 * \note The returned cpuset is not newly allocated and should thus not be
 * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
 */
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_topology_cpuset(hwloc_topology_t topology)
{
  return hwloc_get_root_obj(topology)->cpuset;
}

/** \brief Get online CPU set
 *
 * \return the CPU set of online logical processors of the system. If the
 * topology is the result of a combination of several systems, NULL is
 * returned.
 *
 * \note The returned cpuset is not newly allocated and should thus not be
 * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
 */
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_online_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_online_cpuset(hwloc_topology_t topology)
{
  return hwloc_get_root_obj(topology)->online_cpuset;
}

/** \brief Get allowed CPU set
 *
 * \return the CPU set of allowed logical processors of the system. If the
 * topology is the result of a combination of several systems, NULL is
 * returned.
 *
 * \note The returned cpuset is not newly allocated and should thus not be
 * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy.
 */
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
static __hwloc_inline hwloc_const_cpuset_t
hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology)
{
  return hwloc_get_root_obj(topology)->allowed_cpuset;
}

/** \brief Get complete node set
 *
 * \return the complete node set of memory of the system. If the
 * topology is the result of a combination of several systems, NULL is
 * returned.
 *
 * \note The returned nodeset is not newly allocated and should thus not be
 * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
 */
static __hwloc_inline hwloc_const_nodeset_t
hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
static __hwloc_inline hwloc_const_nodeset_t
hwloc_topology_get_complete_nodeset(hwloc_topology_t topology)
{
  return hwloc_get_root_obj(topology)->complete_nodeset;
}

/** \brief Get topology node set
 *
 * \return the node set of memory of the system for which hwloc
 * provides topology information. This is equivalent to the nodeset of the
 * system object. If the topology is the result of a combination of several
 * systems, NULL is returned.
 *
 * \note The returned nodeset is not newly allocated and should thus not be
 * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
 */
static __hwloc_inline hwloc_const_nodeset_t
hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
static __hwloc_inline hwloc_const_nodeset_t
hwloc_topology_get_topology_nodeset(hwloc_topology_t topology)
{
  return hwloc_get_root_obj(topology)->nodeset;
}

/** \brief Get allowed node set
 *
 * \return the node set of allowed memory of the system. If the
 * topology is the result of a combination of several systems, NULL is
 * returned.
 *
 * \note The returned nodeset is not newly allocated and should thus not be
 * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy.
 */
static __hwloc_inline hwloc_const_nodeset_t
hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
static __hwloc_inline hwloc_const_nodeset_t
hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology)
{
  return hwloc_get_root_obj(topology)->allowed_nodeset;
}

/** @} */


/** \defgroup hwlocality_helper_nodeset_convert Converting between CPU sets and node sets
 *
 * There are two semantics for converting cpusets to nodesets depending on how
 * non-NUMA machines are handled.
 *
 * When manipulating nodesets for memory binding, non-NUMA machines should be
 * considered as having a single NUMA node. The standard conversion routines
 * below should be used so that marking the first bit of the nodeset means
 * that memory should be bound to a non-NUMA whole machine.
 *
 * When manipulating nodesets as an actual list of NUMA nodes without any
 * need to handle memory binding on non-NUMA machines, the strict conversion
 * routines may be used instead.
 * @{
 */

/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases
 *
 * If some NUMA nodes have no CPUs at all, this function never sets their
 * indexes in the output node set, even if a full CPU set is given in input.
 *
 * If the topology contains no NUMA nodes, the machine is considered
 * as a single memory node, and the following behavior is used:
 * If \p cpuset is empty, \p nodeset will be emptied as well.
 * Otherwise \p nodeset will be entirely filled.
 */
static __hwloc_inline void
hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset)
{
	int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
	hwloc_obj_t obj;

	if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
		 if (hwloc_bitmap_iszero(_cpuset))
			hwloc_bitmap_zero(nodeset);
		else
			/* Assume the whole system */
			hwloc_bitmap_fill(nodeset);
		return;
	}

	hwloc_bitmap_zero(nodeset);
	obj = NULL;
	while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL)
		hwloc_bitmap_set(nodeset, obj->os_index);
}

/** \brief Convert a CPU set into a NUMA node set without handling non-NUMA cases
 *
 * This is the strict variant of hwloc_cpuset_to_nodeset(). It does not fix
 * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly
 * the same. However, if the topology contains no NUMA nodes, return an empty
 * nodeset.
 */
static __hwloc_inline void
hwloc_cpuset_to_nodeset_strict(struct hwloc_topology *topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset)
{
	int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
	hwloc_obj_t obj;
	if (depth == HWLOC_TYPE_DEPTH_UNKNOWN )
		return;
	hwloc_bitmap_zero(nodeset);
	obj = NULL;
	while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL)
		hwloc_bitmap_set(nodeset, obj->os_index);
}

/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases
 *
 * If the topology contains no NUMA nodes, the machine is considered
 * as a single memory node, and the following behavior is used:
 * If \p nodeset is empty, \p cpuset will be emptied as well.
 * Otherwise \p cpuset will be entirely filled.
 * This is useful for manipulating memory binding sets.
 */
static __hwloc_inline void
hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset)
{
	int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
	hwloc_obj_t obj;

	if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) {
		if (hwloc_bitmap_iszero(nodeset))
			hwloc_bitmap_zero(_cpuset);
		else
			/* Assume the whole system */
			hwloc_bitmap_fill(_cpuset);
		return;
	}

	hwloc_bitmap_zero(_cpuset);
	obj = NULL;
	while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) {
		if (hwloc_bitmap_isset(nodeset, obj->os_index))
			/* no need to check obj->cpuset because objects in levels always have a cpuset */
			hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset);
	}
}

/** \brief Convert a NUMA node set into a CPU set without handling non-NUMA cases
 *
 * This is the strict variant of hwloc_cpuset_from_nodeset(). It does not fix
 * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly
 * the same. However, if the topology contains no NUMA nodes, return an empty
 * cpuset.
 */
static __hwloc_inline void
hwloc_cpuset_from_nodeset_strict(struct hwloc_topology *topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset)
{
	int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
	hwloc_obj_t obj;
	if (depth == HWLOC_TYPE_DEPTH_UNKNOWN )
		return;
	hwloc_bitmap_zero(_cpuset);
	obj = NULL;
	while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL)
		if (hwloc_bitmap_isset(nodeset, obj->os_index))
			/* no need to check obj->cpuset because objects in levels always have a cpuset */
			hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset);
}

/** @} */


/** \defgroup hwlocality_distances Manipulating Distances
 * @{
 */

/** \brief Get the distances between all objects at the given depth.
 *
 * \return a distances structure containing a matrix with all distances
 * between all objects at the given depth.
 *
 * Slot i+nbobjs*j contains the distance from the object of logical index i
 * the object of logical index j.
 *
 * \note This function only returns matrices covering the whole topology,
 * without any unknown distance value. Those matrices are available in
 * top-level object of the hierarchy. Matrices of lower objects are not
 * reported here since they cover only part of the machine.
 *
 * The returned structure belongs to the hwloc library. The caller should
 * not modify or free it.
 *
 * \return \c NULL if no such distance matrix exists.
 */

static __hwloc_inline const struct hwloc_distances_s *
hwloc_get_whole_distance_matrix_by_depth(hwloc_topology_t topology, unsigned depth)
{
  hwloc_obj_t root = hwloc_get_root_obj(topology);
  unsigned i;
  for(i=0; i<root->distances_count; i++)
    if (root->distances[i]->relative_depth == depth)
      return root->distances[i];
  return NULL;
}

/** \brief Get the distances between all objects of a given type.
 *
 * \return a distances structure containing a matrix with all distances
 * between all objects of the given type.
 *
 * Slot i+nbobjs*j contains the distance from the object of logical index i
 * the object of logical index j.
 *
 * \note This function only returns matrices covering the whole topology,
 * without any unknown distance value. Those matrices are available in
 * top-level object of the hierarchy. Matrices of lower objects are not
 * reported here since they cover only part of the machine.
 *
 * The returned structure belongs to the hwloc library. The caller should
 * not modify or free it.
 *
 * \return \c NULL if no such distance matrix exists.
 */

static __hwloc_inline const struct hwloc_distances_s *
hwloc_get_whole_distance_matrix_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth < 0)
    return NULL;
  return hwloc_get_whole_distance_matrix_by_depth(topology, depth);
}

/** \brief Get distances for the given depth and covering some objects
 *
 * Return a distance matrix that describes depth \p depth and covers at
 * least object \p obj and all its children.
 *
 * When looking for the distance between some objects, a common ancestor should
 * be passed in \p obj.
 *
 * \p firstp is set to logical index of the first object described by the matrix.
 *
 * The returned structure belongs to the hwloc library. The caller should
 * not modify or free it.
 */
static __hwloc_inline const struct hwloc_distances_s *
hwloc_get_distance_matrix_covering_obj_by_depth(hwloc_topology_t topology,
						hwloc_obj_t obj, unsigned depth,
						unsigned *firstp)
{
  while (obj && obj->cpuset) {
    unsigned i;
    for(i=0; i<obj->distances_count; i++)
      if (obj->distances[i]->relative_depth == depth - obj->depth) {
	if (!obj->distances[i]->nbobjs)
	  continue;
	*firstp = hwloc_get_next_obj_inside_cpuset_by_depth(topology, obj->cpuset, depth, NULL)->logical_index;
	return obj->distances[i];
      }
    obj = obj->parent;
  }
  return NULL;
}

/** \brief Get the latency in both directions between two objects.
 *
 * Look at ancestor objects from the bottom to the top until one of them
 * contains a distance matrix that matches the objects exactly.
 *
 * \p latency gets the value from object \p obj1 to \p obj2, while
 * \p reverse_latency gets the reverse-direction value, which
 * may be different on some architectures.
 *
 * \return -1 if no ancestor contains a matching latency matrix.
 */
static __hwloc_inline int
hwloc_get_latency(hwloc_topology_t topology,
		   hwloc_obj_t obj1, hwloc_obj_t obj2,
		   float *latency, float *reverse_latency)
{
  hwloc_obj_t ancestor;
  const struct hwloc_distances_s * distances;
  unsigned first_logical ;

  if (obj1->depth != obj2->depth) {
    errno = EINVAL;
    return -1;
  }

  ancestor = hwloc_get_common_ancestor_obj(topology, obj1, obj2);
  distances = hwloc_get_distance_matrix_covering_obj_by_depth(topology, ancestor, obj1->depth, &first_logical);
  if (distances && distances->latency) {
    const float * latency_matrix = distances->latency;
    unsigned nbobjs = distances->nbobjs;
    unsigned l1 = obj1->logical_index - first_logical;
    unsigned l2 = obj2->logical_index - first_logical;
    *latency = latency_matrix[l1*nbobjs+l2];
    *reverse_latency = latency_matrix[l2*nbobjs+l1];
    return 0;
  }

  errno = ENOSYS;
  return -1;
}

/** @} */


/** \defgroup hwlocality_advanced_io Finding I/O objects
 * @{
 */

/** \brief Get the first non-I/O ancestor object.
 *
 * Given the I/O object \p ioobj, find the smallest non-I/O ancestor
 * object. This regular object may then be used for binding because
 * its locality is the same as \p ioobj.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_non_io_ancestor_obj(hwloc_topology_t topology __hwloc_attribute_unused,
			      hwloc_obj_t ioobj)
{
  hwloc_obj_t obj = ioobj;
  while (obj && !obj->cpuset) {
    obj = obj->parent;
  }
  return obj;
}

/** \brief Get the next PCI device in the system.
 *
 * \return the first PCI device if \p prev is \c NULL.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev)
{
  return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, prev);
}

/** \brief Find the PCI device object matching the PCI bus id
 * given domain, bus device and function PCI bus id.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_pcidev_by_busid(hwloc_topology_t topology,
			  unsigned domain, unsigned bus, unsigned dev, unsigned func)
{
  hwloc_obj_t obj = NULL;
  while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) {
    if (obj->attr->pcidev.domain == domain
	&& obj->attr->pcidev.bus == bus
	&& obj->attr->pcidev.dev == dev
	&& obj->attr->pcidev.func == func)
      return obj;
  }
  return NULL;
}

/** \brief Find the PCI device object matching the PCI bus id
 * given as a string xxxx:yy:zz.t or yy:zz.t.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid)
{
  unsigned domain = 0; /* default */
  unsigned bus, dev, func;

  if (sscanf(busid, "%x:%x.%x", &bus, &dev, &func) != 3
      && sscanf(busid, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) {
    errno = EINVAL;
    return NULL;
  }

  return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, func);
}

/** \brief Get the next OS device in the system.
 *
 * \return the first OS device if \p prev is \c NULL.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev)
{
  return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, prev);
}

/** \brief Get the next bridge in the system.
 *
 * \return the first bridge if \p prev is \c NULL.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_bridge(hwloc_topology_t topology, hwloc_obj_t prev)
{
  return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_BRIDGE, prev);
}

/* \brief Checks whether a given bridge covers a given PCI bus.
 */
static __hwloc_inline int
hwloc_bridge_covers_pcibus(hwloc_obj_t bridge,
			   unsigned domain, unsigned bus)
{
  return bridge->type == HWLOC_OBJ_BRIDGE
    && bridge->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
    && bridge->attr->bridge.downstream.pci.domain == domain
    && bridge->attr->bridge.downstream.pci.secondary_bus <= bus
    && bridge->attr->bridge.downstream.pci.subordinate_bus >= bus;
}

/** \brief Find the hostbridge that covers the given PCI bus.
 *
 * This is useful for finding the locality of a bus because
 * it is the hostbridge parent cpuset.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_hostbridge_by_pcibus(hwloc_topology_t topology,
			       unsigned domain, unsigned bus)
{
  hwloc_obj_t obj = NULL;
  while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) {
    if (hwloc_bridge_covers_pcibus(obj, domain, bus)) {
      /* found bridge covering this pcibus, make sure it's a hostbridge */
      assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST);
      assert(obj->parent->type != HWLOC_OBJ_BRIDGE);
      assert(obj->parent->cpuset);
      return obj;
    }
  }
  return NULL;
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_HELPER_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/inlines.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2013 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2010 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/**
 * This file contains the inline code of functions declared in hwloc.h
 */

#ifndef HWLOC_INLINES_H
#define HWLOC_INLINES_H

#ifndef HWLOC_H
#error Please include the main hwloc.h instead
#endif

#include <stdlib.h>
#include <errno.h>


#ifdef __cplusplus
extern "C" {
#endif

static __hwloc_inline int
hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type)
{
  int depth = hwloc_get_type_depth(topology, type);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
    return depth;

  /* find the highest existing level with type order >= */
  for(depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); ; depth--)
    if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) < 0)
      return depth+1;

  /* Shouldn't ever happen, as there is always a SYSTEM level with lower order and known depth.  */
  /* abort(); */
}

static __hwloc_inline int
hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type)
{
  int depth = hwloc_get_type_depth(topology, type);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
    return depth;

  /* find the lowest existing level with type order <= */
  for(depth = 0; ; depth++)
    if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) > 0)
      return depth-1;

  /* Shouldn't ever happen, as there is always a PU level with higher order and known depth.  */
  /* abort(); */
}

static __hwloc_inline int
hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
    return 0;
  if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
    return -1; /* FIXME: agregate nbobjs from different levels? */
  return hwloc_get_nbobjs_by_depth(topology, depth);
}

static __hwloc_inline hwloc_obj_t
hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
    return NULL;
  if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
    return NULL;
  return hwloc_get_obj_by_depth(topology, depth, idx);
}

static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev)
{
  if (!prev)
    return hwloc_get_obj_by_depth (topology, depth, 0);
  if (prev->depth != depth)
    return NULL;
  return prev->next_cousin;
}

static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type,
			    hwloc_obj_t prev)
{
  int depth = hwloc_get_type_depth(topology, type);
  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
    return NULL;
  return hwloc_get_next_obj_by_depth (topology, depth, prev);
}

static __hwloc_inline hwloc_obj_t
hwloc_get_root_obj (hwloc_topology_t topology)
{
  return hwloc_get_obj_by_depth (topology, 0, 0);
}

static __hwloc_inline const char *
hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name)
{
  unsigned i;
  for(i=0; i<obj->infos_count; i++)
    if (!strcmp(obj->infos[i].name, name))
      return obj->infos[i].value;
  return NULL;
}

static __hwloc_inline void *
hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
  void *p = hwloc_alloc_membind_nodeset(topology, len, nodeset, policy, flags);
  if (p)
    return p;
  hwloc_set_membind_nodeset(topology, nodeset, policy, flags);
  p = hwloc_alloc(topology, len);
  if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH)
    /* Enforce the binding by touching the data */
    memset(p, 0, len);
  return p;
}

static __hwloc_inline void *
hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
{
  void *p = hwloc_alloc_membind(topology, len, set, policy, flags);
  if (p)
    return p;
  hwloc_set_membind(topology, set, policy, flags);
  p = hwloc_alloc(topology, len);
  if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH)
    /* Enforce the binding by touching the data */
    memset(p, 0, len);
  return p;
}


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_INLINES_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/intel-mic.h
================================================
/*
 * Copyright © 2013-2016 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC).
 *
 * Applications that use both hwloc and Intel Xeon Phi (MIC) may want to
 * include this file so as to get topology information for MIC devices.
 */

#ifndef HWLOC_INTEL_MIC_H
#define HWLOC_INTEL_MIC_H

#include <hwloc.h>
#include <hwloc/autogen/config.h>
#include <hwloc/helper.h>
#ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h>
#include <dirent.h>
#include <string.h>
#endif

#include <stdio.h>
#include <stdlib.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC)
 *
 * This interface offers ways to retrieve topology information about
 * Intel Xeon Phi (MIC) devices.
 *
 * @{
 */

/** \brief Get the CPU set of logical processors that are physically
 * close to MIC device whose index is \p idx.
 *
 * Return the CPU set describing the locality of the MIC device whose index is \p idx.
 *
 * Topology \p topology and device index \p idx must match the local machine.
 * I/O devices detection is not needed in the topology.
 *
 * The function only returns the locality of the device.
 * If more information about the device is needed, OS objects should
 * be used instead, see hwloc_intel_mic_get_device_osdev_by_index().
 *
 * This function is currently only implemented in a meaningful way for
 * Linux; other systems will simply get a full cpuset.
 */
static __hwloc_inline int
hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
				  int idx __hwloc_attribute_unused,
				  hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
	/* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128
	char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX];
	DIR *sysdir = NULL;
	FILE *sysfile = NULL;
	struct dirent *dirent;
	unsigned pcibus, pcidev, pcifunc;

	if (!hwloc_topology_is_thissystem(topology)) {
		errno = EINVAL;
		return -1;
	}

	sprintf(path, "/sys/class/mic/mic%d", idx);
	sysdir = opendir(path);
	if (!sysdir)
		return -1;

	while ((dirent = readdir(sysdir)) != NULL) {
		if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) {
			sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc);
			sysfile = fopen(path, "r");
			if (!sysfile) {
				closedir(sysdir);
				return -1;
			}

			if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0
			    || hwloc_bitmap_iszero(set))
				hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));

			fclose(sysfile);
			break;
		}
	}

	closedir(sysdir);
#else
	/* Non-Linux systems simply get a full cpuset */
	hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
	return 0;
}

/** \brief Get the hwloc OS device object corresponding to the
 * MIC device for the given index.
 *
 * Return the OS device object describing the MIC device whose index is \p idx.
 * Return NULL if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection must be enabled in the topology.
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 */
static __hwloc_inline hwloc_obj_t
hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology,
					  unsigned idx)
{
	hwloc_obj_t osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
                    && osdev->name
		    && !strncmp("mic", osdev->name, 3)
		    && atoi(osdev->name + 3) == (int) idx)
                        return osdev;
        }
        return NULL;
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_INTEL_MIC_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/linux-libnuma.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2014 Inria.  All rights reserved.
 * Copyright © 2009-2010, 2012 Université Bordeaux
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and Linux libnuma.
 *
 * Applications that use both Linux libnuma and hwloc may want to
 * include this file so as to ease conversion between their respective types.
*/

#ifndef HWLOC_LINUX_LIBNUMA_H
#define HWLOC_LINUX_LIBNUMA_H

#include <hwloc.h>
#include <numa.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_linux_libnuma_ulongs Interoperability with Linux libnuma unsigned long masks
 *
 * This interface helps converting between Linux libnuma unsigned long masks
 * and hwloc cpusets and nodesets.
 *
 * It also offers a consistent behavior on non-NUMA machines
 * or non-NUMA-aware kernels by assuming that the machines have a single
 * NUMA node.
 *
 * \note Topology \p topology must match the current machine.
 *
 * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware.
 * (when CONFIG_NUMA is not set in the kernel configuration).
 * This helper and libnuma may thus not be strictly compatible in this case,
 * which may be detected by checking whether numa_available() returns -1.
 *
 * @{
 */


/** \brief Convert hwloc CPU set \p cpuset into the array of unsigned long \p mask
 *
 * \p mask is the array of unsigned long that will be filled.
 * \p maxnode contains the maximal node number that may be stored in \p mask.
 * \p maxnode will be set to the maximal node number that was found, plus one.
 *
 * This function may be used before calling set_mempolicy, mbind, migrate_pages
 * or any other function that takes an array of unsigned long and a maximal
 * node number as input parameter.
 */
static __hwloc_inline int
hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset,
				    unsigned long *mask, unsigned long *maxnode)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  unsigned long outmaxnode = -1;

  /* round-up to the next ulong and clear all bytes */
  *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1);
  memset(mask, 0, *maxnode/8);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL) {
      if (node->os_index >= *maxnode)
	continue;
      mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8));
      if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index)
	outmaxnode = node->os_index;
    }

  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (!hwloc_bitmap_iszero(cpuset)) {
      mask[0] = 1;
      outmaxnode = 0;
    }
  }

  *maxnode = outmaxnode+1;
  return 0;
}

/** \brief Convert hwloc NUMA node set \p nodeset into the array of unsigned long \p mask
 *
 * \p mask is the array of unsigned long that will be filled.
 * \p maxnode contains the maximal node number that may be stored in \p mask.
 * \p maxnode will be set to the maximal node number that was found, plus one.
 *
 * This function may be used before calling set_mempolicy, mbind, migrate_pages
 * or any other function that takes an array of unsigned long and a maximal
 * node number as input parameter.
 */
static __hwloc_inline int
hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset,
				      unsigned long *mask, unsigned long *maxnode)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  unsigned long outmaxnode = -1;

  /* round-up to the next ulong and clear all bytes */
  *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1);
  memset(mask, 0, *maxnode/8);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) {
      if (node->os_index >= *maxnode)
	continue;
      if (!hwloc_bitmap_isset(nodeset, node->os_index))
	continue;
      mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8));
      if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index)
	outmaxnode = node->os_index;
    }

  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (!hwloc_bitmap_iszero(nodeset)) {
      mask[0] = 1;
      outmaxnode = 0;
    }
  }

  *maxnode = outmaxnode+1;
  return 0;
}

/** \brief Convert the array of unsigned long \p mask into hwloc CPU set
 *
 * \p mask is a array of unsigned long that will be read.
 * \p maxnode contains the maximal node number that may be read in \p mask.
 *
 * This function may be used after calling get_mempolicy or any other function
 * that takes an array of unsigned long as output parameter (and possibly
 * a maximal node number as input parameter).
 */
static __hwloc_inline int
hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
				      const unsigned long *mask, unsigned long maxnode)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    hwloc_bitmap_zero(cpuset);
    while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
      if (node->os_index < maxnode
	  && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
	hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (mask[0] & 1)
      hwloc_bitmap_copy(cpuset, hwloc_topology_get_complete_cpuset(topology));
    else
      hwloc_bitmap_zero(cpuset);
  }

  return 0;
}

/** \brief Convert the array of unsigned long \p mask into hwloc NUMA node set
 *
 * \p mask is a array of unsigned long that will be read.
 * \p maxnode contains the maximal node number that may be read in \p mask.
 *
 * This function may be used after calling get_mempolicy or any other function
 * that takes an array of unsigned long as output parameter (and possibly
 * a maximal node number as input parameter).
 */
static __hwloc_inline int
hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
					const unsigned long *mask, unsigned long maxnode)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    hwloc_bitmap_zero(nodeset);
    while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
      if (node->os_index < maxnode
	  && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
	hwloc_bitmap_set(nodeset, node->os_index);
  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (mask[0] & 1)
      hwloc_bitmap_fill(nodeset);
    else
      hwloc_bitmap_zero(nodeset);
  }

  return 0;
}

/** @} */


/** \defgroup hwlocality_linux_libnuma_bitmask Interoperability with Linux libnuma bitmask
 *
 * This interface helps converting between Linux libnuma bitmasks
 * and hwloc cpusets and nodesets.
 *
 * It also offers a consistent behavior on non-NUMA machines
 * or non-NUMA-aware kernels by assuming that the machines have a single
 * NUMA node.
 *
 * \note Topology \p topology must match the current machine.
 *
 * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware.
 * (when CONFIG_NUMA is not set in the kernel configuration).
 * This helper and libnuma may thus not be strictly compatible in this case,
 * which may be detected by checking whether numa_available() returns -1.
 *
 * @{
 */


/** \brief Convert hwloc CPU set \p cpuset into the returned libnuma bitmask
 *
 * The returned bitmask should later be freed with numa_bitmask_free.
 *
 * This function may be used before calling many numa_ functions
 * that use a struct bitmask as an input parameter.
 *
 * \return newly allocated struct bitmask.
 */
static __hwloc_inline struct bitmask *
hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc;
static __hwloc_inline struct bitmask *
hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  struct bitmask *bitmask = numa_allocate_cpumask();
  if (!bitmask)
    return NULL;

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL)
      if (node->memory.local_memory)
	numa_bitmask_setbit(bitmask, node->os_index);
  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (!hwloc_bitmap_iszero(cpuset))
      numa_bitmask_setbit(bitmask, 0);
  }

  return bitmask;
}

/** \brief Convert hwloc NUMA node set \p nodeset into the returned libnuma bitmask
 *
 * The returned bitmask should later be freed with numa_bitmask_free.
 *
 * This function may be used before calling many numa_ functions
 * that use a struct bitmask as an input parameter.
 *
 * \return newly allocated struct bitmask.
 */
static __hwloc_inline struct bitmask *
hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc;
static __hwloc_inline struct bitmask *
hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  struct bitmask *bitmask = numa_allocate_cpumask();
  if (!bitmask)
    return NULL;

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
      if (hwloc_bitmap_isset(nodeset, node->os_index) && node->memory.local_memory)
	numa_bitmask_setbit(bitmask, node->os_index);
  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (!hwloc_bitmap_iszero(nodeset))
      numa_bitmask_setbit(bitmask, 0);
  }

  return bitmask;
}

/** \brief Convert libnuma bitmask \p bitmask into hwloc CPU set \p cpuset
 *
 * This function may be used after calling many numa_ functions
 * that use a struct bitmask as an output parameter.
 */
static __hwloc_inline int
hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
					const struct bitmask *bitmask)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    hwloc_bitmap_zero(cpuset);
    while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
      if (numa_bitmask_isbitset(bitmask, node->os_index))
	hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (numa_bitmask_isbitset(bitmask, 0))
      hwloc_bitmap_copy(cpuset, hwloc_topology_get_complete_cpuset(topology));
    else
      hwloc_bitmap_zero(cpuset);
  }

  return 0;
}

/** \brief Convert libnuma bitmask \p bitmask into hwloc NUMA node set \p nodeset
 *
 * This function may be used after calling many numa_ functions
 * that use a struct bitmask as an output parameter.
 */
static __hwloc_inline int
hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
					 const struct bitmask *bitmask)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);

  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
    hwloc_obj_t node = NULL;
    hwloc_bitmap_zero(nodeset);
    while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
      if (numa_bitmask_isbitset(bitmask, node->os_index))
	hwloc_bitmap_set(nodeset, node->os_index);
  } else {
    /* if no numa, libnuma assumes we have a single node */
    if (numa_bitmask_isbitset(bitmask, 0))
      hwloc_bitmap_fill(nodeset);
    else
      hwloc_bitmap_zero(nodeset);
  }

  return 0;
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_LINUX_NUMA_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/linux.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2016 Inria.  All rights reserved.
 * Copyright © 2009-2011 Université Bordeaux
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and Linux.
 *
 * Applications that use hwloc on Linux may want to include this file
 * if using some low-level Linux features.
 */

#ifndef HWLOC_LINUX_H
#define HWLOC_LINUX_H

#include <hwloc.h>
#include <stdio.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_linux Linux-specific helpers
 *
 * This includes helpers for manipulating Linux kernel cpumap files, and hwloc
 * equivalents of the Linux sched_setaffinity and sched_getaffinity system calls.
 *
 * @{
 */

/** \brief Convert a linux kernel cpumap file \p file into hwloc CPU set.
 *
 * Might be used when reading CPU set from sysfs attributes such as topology
 * and caches for processors, or local_cpus for devices.
 */
HWLOC_DECLSPEC int hwloc_linux_parse_cpumap_file(FILE *file, hwloc_cpuset_t set);

/** \brief Bind a thread \p tid on cpus given in cpuset \p set
 *
 * The behavior is exactly the same as the Linux sched_setaffinity system call,
 * but uses a hwloc cpuset.
 *
 * \note This is equivalent to calling hwloc_set_proc_cpubind() with
 * HWLOC_CPUBIND_THREAD as flags.
 */
HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set);

/** \brief Get the current binding of thread \p tid
 *
 * The behavior is exactly the same as the Linux sched_getaffinity system call,
 * but uses a hwloc cpuset.
 *
 * \note This is equivalent to calling hwloc_get_proc_cpubind() with
 * ::HWLOC_CPUBIND_THREAD as flags.
 */
HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set);

/** \brief Get the last physical CPU where thread \p tid ran.
 *
 * \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
 * ::HWLOC_CPUBIND_THREAD as flags.
 */
HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology, pid_t tid, hwloc_bitmap_t set);

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_LINUX_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/myriexpress.h
================================================
/*
 * Copyright © 2010-2014 Inria.  All rights reserved.
 * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and Myrinet Express.
 *
 * Applications that use both hwloc and Myrinet Express verbs may want to
 * include this file so as to get topology information for Myrinet hardware.
 *
 */

#ifndef HWLOC_MYRIEXPRESS_H
#define HWLOC_MYRIEXPRESS_H

#include <hwloc.h>
#include <hwloc/autogen/config.h>

#include <myriexpress.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_myriexpress Interoperability with Myrinet Express
 *
 * This interface offers ways to retrieve topology information about
 * Myrinet Express hardware.
 *
 * @{
 */

/** \brief Get the CPU set of logical processors that are physically
 * close the MX board \p id.
 *
 * Return the CPU set describing the locality of the Myrinet Express
 * board whose index is \p id.
 *
 * Topology \p topology and device \p id must match the local machine.
 * I/O devices detection is not needed in the topology.
 *
 * The function only returns the locality of the device.
 * No additional information about the device is available.
 */
static __hwloc_inline int
hwloc_mx_board_get_device_cpuset(hwloc_topology_t topology,
				 unsigned id, hwloc_cpuset_t set)
{
  uint32_t in, out;

  if (!hwloc_topology_is_thissystem(topology)) {
    errno = EINVAL;
    return -1;
  }

  in = id;
  if (mx_get_info(NULL, MX_NUMA_NODE, &in, sizeof(in), &out, sizeof(out)) != MX_SUCCESS) {
    errno = EINVAL;
    return -1;
  }

  if (out != (uint32_t) -1) {
    hwloc_obj_t obj = NULL;
    while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL)
      if (obj->os_index == out) {
	hwloc_bitmap_copy(set, obj->cpuset);
	goto out;
      }
  }
  /* fallback to the full topology cpuset */
  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));

 out:
  return 0;
}

/** \brief Get the CPU set of logical processors that are physically
 * close the MX endpoint \p endpoint.
 *
 * Return the CPU set describing the locality of the Myrinet Express
 * board that runs the MX endpoint \p endpoint.
 *
 * Topology \p topology and device \p id must match the local machine.
 * I/O devices detection is not needed in the topology.
 *
 * The function only returns the locality of the endpoint.
 * No additional information about the endpoint or device is available.
 */
static __hwloc_inline int
hwloc_mx_endpoint_get_device_cpuset(hwloc_topology_t topology,
				    mx_endpoint_t endpoint, hwloc_cpuset_t set)
{
  uint64_t nid;
  uint32_t nindex, eid;
  mx_endpoint_addr_t eaddr;

  if (mx_get_endpoint_addr(endpoint, &eaddr) != MX_SUCCESS) {
    errno = EINVAL;
    return -1;
  }

  if (mx_decompose_endpoint_addr(eaddr, &nid, &eid) != MX_SUCCESS) {
    errno = EINVAL;
    return -1;
  }

  if (mx_nic_id_to_board_number(nid, &nindex) != MX_SUCCESS) {
    errno = EINVAL;
    return -1;
  }

  return hwloc_mx_board_get_device_cpuset(topology, nindex, set);
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_MYRIEXPRESS_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/nvml.h
================================================
/*
 * Copyright © 2012-2016 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and the NVIDIA Management Library.
 *
 * Applications that use both hwloc and the NVIDIA Management Library may want to
 * include this file so as to get topology information for NVML devices.
 */

#ifndef HWLOC_NVML_H
#define HWLOC_NVML_H

#include <hwloc.h>
#include <hwloc/autogen/config.h>
#include <hwloc/helper.h>
#ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h>
#endif

#include <nvml.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_nvml Interoperability with the NVIDIA Management Library
 *
 * This interface offers ways to retrieve topology information about
 * devices managed by the NVIDIA Management Library (NVML).
 *
 * @{
 */

/** \brief Get the CPU set of logical processors that are physically
 * close to NVML device \p device.
 *
 * Return the CPU set describing the locality of the NVML device \p device.
 *
 * Topology \p topology and device \p device must match the local machine.
 * I/O devices detection and the NVML component are not needed in the topology.
 *
 * The function only returns the locality of the device.
 * If more information about the device is needed, OS objects should
 * be used instead, see hwloc_nvml_get_device_osdev()
 * and hwloc_nvml_get_device_osdev_by_index().
 *
 * This function is currently only implemented in a meaningful way for
 * Linux; other systems will simply get a full cpuset.
 */
static __hwloc_inline int
hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
			     nvmlDevice_t device, hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_NVML_DEVICE_SYSFS_PATH_MAX 128
  char path[HWLOC_NVML_DEVICE_SYSFS_PATH_MAX];
  FILE *sysfile = NULL;
  nvmlReturn_t nvres;
  nvmlPciInfo_t pci;

  if (!hwloc_topology_is_thissystem(topology)) {
    errno = EINVAL;
    return -1;
  }

  nvres = nvmlDeviceGetPciInfo(device, &pci);
  if (NVML_SUCCESS != nvres) {
    errno = EINVAL;
    return -1;
  }

  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", pci.domain, pci.bus, pci.device);
  sysfile = fopen(path, "r");
  if (!sysfile)
    return -1;

  if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0
      || hwloc_bitmap_iszero(set))
    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));

  fclose(sysfile);
#else
  /* Non-Linux systems simply get a full cpuset */
  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
  return 0;
}

/** \brief Get the hwloc OS device object corresponding to the
 * NVML device whose index is \p idx.
 *
 * Return the OS device object describing the NVML device whose
 * index is \p idx. Returns NULL if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection and the NVML component must be enabled in the topology.
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 */
static __hwloc_inline hwloc_obj_t
hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
{
	hwloc_obj_t osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
                if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
                    && osdev->name
		    && !strncmp("nvml", osdev->name, 4)
		    && atoi(osdev->name + 4) == (int) idx)
                        return osdev;
        }
        return NULL;
}

/** \brief Get the hwloc OS device object corresponding to NVML device \p device.
 *
 * Return the hwloc OS device object that describes the given
 * NVML device \p device. Return NULL if there is none.
 *
 * Topology \p topology and device \p device must match the local machine.
 * I/O devices detection and the NVML component must be enabled in the topology.
 * If not, the locality of the object may still be found using
 * hwloc_nvml_get_device_cpuset().
 *
 * \note The corresponding hwloc PCI device may be found by looking
 * at the result parent pointer.
 */
static __hwloc_inline hwloc_obj_t
hwloc_nvml_get_device_osdev(hwloc_topology_t topology, nvmlDevice_t device)
{
	hwloc_obj_t osdev;
	nvmlReturn_t nvres;
	nvmlPciInfo_t pci;

	if (!hwloc_topology_is_thissystem(topology)) {
		errno = EINVAL;
		return NULL;
	}

	nvres = nvmlDeviceGetPciInfo(device, &pci);
	if (NVML_SUCCESS != nvres)
		return NULL;

	osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		hwloc_obj_t pcidev = osdev->parent;
		if (strncmp(osdev->name, "nvml", 4))
			continue;
		if (pcidev
		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
		    && pcidev->attr->pcidev.domain == pci.domain
		    && pcidev->attr->pcidev.bus == pci.bus
		    && pcidev->attr->pcidev.dev == pci.device
		    && pcidev->attr->pcidev.func == 0)
			return osdev;
	}

	return NULL;
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_NVML_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/opencl.h
================================================
/*
 * Copyright © 2012-2017 Inria.  All rights reserved.
 * Copyright © 2013 Université Bordeaux.  All right reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and the OpenCL interface.
 *
 * Applications that use both hwloc and OpenCL may want to
 * include this file so as to get topology information for OpenCL devices.
 */

#ifndef HWLOC_OPENCL_H
#define HWLOC_OPENCL_H

#include <hwloc.h>
#include <hwloc/autogen/config.h>
#include <hwloc/helper.h>
#ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h>
#endif

#include <CL/cl.h>
#include <CL/cl_ext.h>

#include <stdio.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_opencl Interoperability with OpenCL
 *
 * This interface offers ways to retrieve topology information about
 * OpenCL devices.
 *
 * Only the AMD OpenCL interface currently offers useful locality information
 * about its devices.
 *
 * @{
 */

/** \brief Get the CPU set of logical processors that are physically
 * close to OpenCL device \p device.
 *
 * Return the CPU set describing the locality of the OpenCL device \p device.
 *
 * Topology \p topology and device \p device must match the local machine.
 * I/O devices detection and the OpenCL component are not needed in the topology.
 *
 * The function only returns the locality of the device.
 * If more information about the device is needed, OS objects should
 * be used instead, see hwloc_opencl_get_device_osdev()
 * and hwloc_opencl_get_device_osdev_by_index().
 *
 * This function is currently only implemented in a meaningful way for
 * Linux with the AMD OpenCL implementation; other systems will simply
 * get a full cpuset.
 */
static __hwloc_inline int
hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
			       cl_device_id device __hwloc_attribute_unused,
			       hwloc_cpuset_t set)
{
#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD)
	/* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */
#define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128
	char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX];
	FILE *sysfile = NULL;
	cl_device_topology_amd amdtopo;
	cl_int clret;

	if (!hwloc_topology_is_thissystem(topology)) {
		errno = EINVAL;
		return -1;
	}

	clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
	if (CL_SUCCESS != clret) {
		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
		return 0;
	}
	if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
		return 0;
	}

	sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus",
		(unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function);
	sysfile = fopen(path, "r");
	if (!sysfile)
		return -1;

	if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0
	    || hwloc_bitmap_iszero(set))
		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));

	fclose(sysfile);
#else
	/* Non-Linux + AMD OpenCL systems simply get a full cpuset */
	hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
  return 0;
}

/** \brief Get the hwloc OS device object corresponding to the
 * OpenCL device for the given indexes.
 *
 * Return the OS device object describing the OpenCL device
 * whose platform index is \p platform_index,
 * and whose device index within this platform if \p device_index.
 * Return NULL if there is none.
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection and the OpenCL component must be enabled in the topology.
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 */
static __hwloc_inline hwloc_obj_t
hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
				       unsigned platform_index, unsigned device_index)
{
	unsigned x = (unsigned) -1, y = (unsigned) -1;
	hwloc_obj_t osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
                    && osdev->name
		    && sscanf(osdev->name, "opencl%ud%u", &x, &y) == 2
		    && platform_index == x && device_index == y)
                        return osdev;
        }
        return NULL;
}

/** \brief Get the hwloc OS device object corresponding to OpenCL device \p device.
 *
 * Return the hwloc OS device object that describes the given
 * OpenCL device \p device. Return NULL if there is none.
 *
 * Topology \p topology and device \p device must match the local machine.
 * I/O devices detection and the OpenCL component must be enabled in the topology.
 * If not, the locality of the object may still be found using
 * hwloc_opencl_get_device_cpuset().
 *
 * \note The corresponding hwloc PCI device may be found by looking
 * at the result parent pointer.
 */
static __hwloc_inline hwloc_obj_t
hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
			      cl_device_id device __hwloc_attribute_unused)
{
#ifdef CL_DEVICE_TOPOLOGY_AMD
	hwloc_obj_t osdev;
	cl_device_topology_amd amdtopo;
	cl_int clret;

	clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
	if (CL_SUCCESS != clret) {
		errno = EINVAL;
		return NULL;
	}
	if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
		errno = EINVAL;
		return NULL;
	}

	osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		hwloc_obj_t pcidev = osdev->parent;
		if (strncmp(osdev->name, "opencl", 6))
			continue;
		if (pcidev
		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
		    && pcidev->attr->pcidev.domain == 0
		    && pcidev->attr->pcidev.bus == amdtopo.pcie.bus
		    && pcidev->attr->pcidev.dev == amdtopo.pcie.device
		    && pcidev->attr->pcidev.func == amdtopo.pcie.function)
			return osdev;
	}

	return NULL;
#else
	return NULL;
#endif
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_OPENCL_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/openfabrics-verbs.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2016 Inria.  All rights reserved.
 * Copyright © 2009-2010 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/** \file
 * \brief Macros to help interaction between hwloc and OpenFabrics
 * verbs.
 *
 * Applications that use both hwloc and OpenFabrics verbs may want to
 * include this file so as to get topology information for OpenFabrics
 * hardware (InfiniBand, etc).
 *
 */

#ifndef HWLOC_OPENFABRICS_VERBS_H
#define HWLOC_OPENFABRICS_VERBS_H

#include <hwloc.h>
#include <hwloc/autogen/config.h>
#ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h>
#endif

#include <infiniband/verbs.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_openfabrics Interoperability with OpenFabrics
 *
 * This interface offers ways to retrieve topology information about
 * OpenFabrics devices (InfiniBand, Omni-Path, usNIC, etc).
 *
 * @{
 */

/** \brief Get the CPU set of logical processors that are physically
 * close to device \p ibdev.
 *
 * Return the CPU set describing the locality of the OpenFabrics
 * device \p ibdev (InfiniBand, etc).
 *
 * Topology \p topology and device \p ibdev must match the local machine.
 * I/O devices detection is not needed in the topology.
 *
 * The function only returns the locality of the device.
 * If more information about the device is needed, OS objects should
 * be used instead, see hwloc_ibv_get_device_osdev()
 * and hwloc_ibv_get_device_osdev_by_name().
 *
 * This function is currently only implemented in a meaningful way for
 * Linux; other systems will simply get a full cpuset.
 */
static __hwloc_inline int
hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
			    struct ibv_device *ibdev, hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
  /* If we're on Linux, use the verbs-provided sysfs mechanism to
     get the local cpus */
#define HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX 128
  char path[HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX];
  FILE *sysfile = NULL;

  if (!hwloc_topology_is_thissystem(topology)) {
    errno = EINVAL;
    return -1;
  }

  sprintf(path, "/sys/class/infiniband/%s/device/local_cpus",
	  ibv_get_device_name(ibdev));
  sysfile = fopen(path, "r");
  if (!sysfile)
    return -1;

  if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0
      || hwloc_bitmap_iszero(set))
    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));

  fclose(sysfile);
#else
  /* Non-Linux systems simply get a full cpuset */
  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
  return 0;
}

/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
 * device named \p ibname.
 *
 * Return the OS device object describing the OpenFabrics device
 * (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname
 * (mlx5_0, hfi1_0, usnic_0, qib0, etc).
 * Returns NULL if there is none.
 * The name \p ibname is usually obtained from ibv_get_device_name().
 *
 * The topology \p topology does not necessarily have to match the current
 * machine. For instance the topology may be an XML import of a remote host.
 * I/O devices detection must be enabled in the topology.
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 */
static __hwloc_inline hwloc_obj_t
hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology,
				   const char *ibname)
{
	hwloc_obj_t osdev = NULL;
	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
		if (HWLOC_OBJ_OSDEV_OPENFABRICS == osdev->attr->osdev.type
		    && osdev->name && !strcmp(ibname, osdev->name))
			return osdev;
	}
	return NULL;
}

/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
 * device \p ibdev.
 *
 * Return the OS device object describing the OpenFabrics device \p ibdev
 * (InfiniBand, etc). Returns NULL if there is none.
 *
 * Topology \p topology and device \p ibdev must match the local machine.
 * I/O devices detection must be enabled in the topology.
 * If not, the locality of the object may still be found using
 * hwloc_ibv_get_device_cpuset().
 *
 * \note The corresponding PCI device object can be obtained by looking
 * at the OS device parent object.
 */
static __hwloc_inline hwloc_obj_t
hwloc_ibv_get_device_osdev(hwloc_topology_t topology,
			   struct ibv_device *ibdev)
{
	if (!hwloc_topology_is_thissystem(topology)) {
		errno = EINVAL;
		return NULL;
	}
	return hwloc_ibv_get_device_osdev_by_name(topology, ibv_get_device_name(ibdev));
}

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_OPENFABRICS_VERBS_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/plugins.h
================================================
/*
 * Copyright © 2013-2015 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */

#ifndef HWLOC_PLUGINS_H
#define HWLOC_PLUGINS_H

/** \file
 * \brief Public interface for building hwloc plugins.
 */

struct hwloc_backend;

#include <hwloc.h>
#ifdef HWLOC_INSIDE_PLUGIN
/* needed for hwloc_plugin_check_namespace() */
#include <ltdl.h>
#endif


/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components
 * @{
 */

/** \brief Discovery component type */
typedef enum hwloc_disc_component_type_e {
  /** \brief CPU-only discovery through the OS, or generic no-OS support.
   * \hideinitializer */
  HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0),

  /** \brief xml, synthetic or custom,
   * platform-specific components such as bgq.
   * Anything the discovers CPU and everything else.
   * No misc backend is expected to complement a global component.
   * \hideinitializer */
  HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1),

  /** \brief OpenCL, Cuda, etc.
   * \hideinitializer */
  HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2)
} hwloc_disc_component_type_t;

/** \brief Discovery component structure
 *
 * This is the major kind of components, taking care of the discovery.
 * They are registered by generic components, either statically-built or as plugins.
 */
struct hwloc_disc_component {
  /** \brief Discovery component type */
  hwloc_disc_component_type_t type;

  /** \brief Name.
   * If this component is built as a plugin, this name does not have to match the plugin filename.
   */
  const char *name;

  /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e.
   *
   * For a GLOBAL component, this usually includes all other types (~0).
   *
   * Other components only exclude types that may bring conflicting
   * topology information. MISC components should likely not be excluded
   * since they usually bring non-primary additional information.
   */
  unsigned excludes;

  /** \brief Instantiate callback to create a backend from the component.
   * Parameters data1, data2, data3 are NULL except for components
   * that have special enabling routines such as hwloc_topology_set_xml(). */
  struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3);

  /** \brief Component priority.
   * Used to sort topology->components, higher priority first.
   * Also used to decide between two components with the same name.
   *
   * Usual values are
   * 50 for native OS (or platform) components,
   * 45 for x86,
   * 40 for no-OS fallback,
   * 30 for global components (xml/synthetic/custom),
   * 20 for pci,
   * 10 for other misc components (opencl etc.).
   */
  unsigned priority;

  /** \private Used internally to list components by priority on topology->components
   * (the component structure is usually read-only,
   *  the core copies it before using this field for queueing)
   */
  struct hwloc_disc_component * next;
};

/** @} */


/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends
 * @{
 */

/** \brief Discovery backend structure
 *
 * A backend is the instantiation of a discovery component.
 * When a component gets enabled for a topology,
 * its instantiate() callback creates a backend.
 *
 * hwloc_backend_alloc() initializes all fields to default values
 * that the component may change (except "component" and "next")
 * before enabling the backend with hwloc_backend_enable().
 */
struct hwloc_backend {
  /** \private Reserved for the core, set by hwloc_backend_alloc() */
  struct hwloc_disc_component * component;
  /** \private Reserved for the core, set by hwloc_backend_enable() */
  struct hwloc_topology * topology;
  /** \private Reserved for the core. Set to 1 if forced through envvar, 0 otherwise. */
  int envvar_forced;
  /** \private Reserved for the core. Used internally to list backends topology->backends. */
  struct hwloc_backend * next;

  /** \brief Backend flags, as an OR'ed set of ::hwloc_backend_flag_e */
  unsigned long flags;

  /** \brief Backend-specific 'is_custom' property.
   * Shortcut on !strcmp(..->component->name, "custom").
   * Only the custom component should touch this. */
  int is_custom;

  /** \brief Backend-specific 'is_thissystem' property.
   * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled.
   * Set to -1 if the backend doesn't care (default). */
  int is_thissystem;

  /** \brief Backend private data, or NULL if none. */
  void * private_data;
  /** \brief Callback for freeing the private_data.
   * May be NULL.
   */
  void (*disable)(struct hwloc_backend *backend);

  /** \brief Main discovery callback.
   * returns > 0 if it modified the topology tree, -1 on error, 0 otherwise.
   * May be NULL if type is ::HWLOC_DISC_COMPONENT_TYPE_MISC. */
  int (*discover)(struct hwloc_backend *backend);

  /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend.
   * May be NULL. */
  int (*get_obj_cpuset)(struct hwloc_backend *backend, struct hwloc_backend *caller, struct hwloc_obj *obj, hwloc_bitmap_t cpuset);

  /** \brief Callback called by backends to notify this backend that a new object was added.
   * returns > 0 if it modified the topology tree, 0 otherwise.
   * May be NULL. */
  int (*notify_new_object)(struct hwloc_backend *backend, struct hwloc_backend *caller, struct hwloc_obj *obj);
};

/** \brief Backend flags */
enum hwloc_backend_flag_e {
  /** \brief Levels should be reconnected before this backend discover() is used.
   * \hideinitializer */
  HWLOC_BACKEND_FLAG_NEED_LEVELS = (1UL<<0)
};

/** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc.
 * The caller will then modify whatever needed, and call hwloc_backend_enable().
 */
HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component);

/** \brief Enable a previously allocated and setup backend. */
HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend);

/** \brief Used by backends discovery callbacks to request locality information from others.
 *
 * Traverse the list of enabled backends until one has a
 * get_obj_cpuset() method, and call it.
 */
HWLOC_DECLSPEC int hwloc_backends_get_obj_cpuset(struct hwloc_backend *caller, struct hwloc_obj *obj, hwloc_bitmap_t cpuset);

/** \brief Used by backends discovery callbacks to notify other
 * backends of new objects.
 *
 * Traverse the list of enabled backends (all but caller) and invoke
 * their notify_new_object() method to notify them that a new object
 * just got added to the topology.
 *
 * Currently only used for notifying of new PCI device objects.
 */
HWLOC_DECLSPEC int hwloc_backends_notify_new_object(struct hwloc_backend *caller, struct hwloc_obj *obj);

/** @} */


/** \defgroup hwlocality_generic_components Components and Plugins: Generic components
 * @{
 */

/** \brief Generic component type */
typedef enum hwloc_component_type_e {
  /** \brief The data field must point to a struct hwloc_disc_component. */
  HWLOC_COMPONENT_TYPE_DISC,

  /** \brief The data field must point to a struct hwloc_xml_component. */
  HWLOC_COMPONENT_TYPE_XML
} hwloc_component_type_t;

/** \brief Generic component structure
 *
 * Generic components structure, either statically listed by configure in static-components.h
 * or dynamically loaded as a plugin.
 */
struct hwloc_component {
  /** \brief Component ABI version, set to ::HWLOC_COMPONENT_ABI */
  unsigned abi;

  /** \brief Process-wide component initialization callback.
   *
   * This optional callback is called when the component is registered
   * to the hwloc core (after loading the plugin).
   *
   * When the component is built as a plugin, this callback
   * should call hwloc_check_plugin_namespace()
   * and return an negative error code on error.
   *
   * \p flags is always 0 for now.
   *
   * \return 0 on success, or a negative code on error.
   *
   * \note If the component uses ltdl for loading its own plugins,
   * it should load/unload them only in init() and finalize(),
   * to avoid race conditions with hwloc's use of ltdl.
   */
  int (*init)(unsigned long flags);

  /** \brief Process-wide component termination callback.
   *
   * This optional callback is called after unregistering the component
   * from the hwloc core (before unloading the plugin).
   *
   * \p flags is always 0 for now.
   *
   * \note If the component uses ltdl for loading its own plugins,
   * it should load/unload them only in init() and finalize(),
   * to avoid race conditions with hwloc's use of ltdl.
   */
  void (*finalize)(unsigned long flags);

  /** \brief Component type */
  hwloc_component_type_t type;

  /** \brief Component flags, unused for now */
  unsigned long flags;

  /** \brief Component data, pointing to a struct hwloc_disc_component or struct hwloc_xml_component. */
  void * data;
};

/** @} */


/** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components
 * @{
 */

/** \brief Add an object to the topology.
 *
 * It is sorted along the tree of other objects according to the inclusion of
 * cpusets, to eventually be added as a child of the smallest object including
 * this object.
 *
 * If the cpuset is empty, the type of the object (and maybe some attributes)
 * must be enough to find where to insert the object. This is especially true
 * for NUMA nodes with memory and no CPUs.
 *
 * The given object should not have children.
 *
 * This shall only be called before levels are built.
 *
 * In case of error, hwloc_report_os_error() is called.
 *
 * Returns the object on success.
 * Returns NULL and frees obj on error.
 * Returns another object and frees obj if it was merged with an identical pre-existing object.
 */
HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj);

/** \brief Type of error callbacks during object insertion */
typedef void (*hwloc_report_error_t)(const char * msg, int line);
/** \brief Report an insertion error from a backend */
HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line);
/** \brief Check whether insertion errors are hidden */
HWLOC_DECLSPEC int hwloc_hide_errors(void);

/** \brief Add an object to the topology and specify which error callback to use.
 *
 * Aside from the error callback selection, this function is identical to hwloc_insert_object_by_cpuset()
 */
HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj, hwloc_report_error_t report_error);

/** \brief Insert an object somewhere in the topology.
 *
 * It is added as the last child of the given parent.
 * The cpuset is completely ignored, so strange objects such as I/O devices should
 * preferably be inserted with this.
 *
 * When used for "normal" children with cpusets (when importing from XML
 * when duplicating a topology), the caller should make sure children are inserted
 * in order.
 *
 * The given object may have children.
 *
 * Remember to call topology_connect() afterwards to fix handy pointers.
 */
HWLOC_DECLSPEC void hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj);

/** \brief Allocate and initialize an object of the given type and physical index */
static __hwloc_inline struct hwloc_obj *
hwloc_alloc_setup_object(hwloc_obj_type_t type, signed os_index)
{
  struct hwloc_obj *obj = malloc(sizeof(*obj));
  memset(obj, 0, sizeof(*obj));
  obj->type = type;
  obj->os_index = os_index;
  obj->os_level = -1;
  obj->attr = malloc(sizeof(*obj->attr));
  memset(obj->attr, 0, sizeof(*obj->attr));
  /* do not allocate the cpuset here, let the caller do it */
  return obj;
}

/** \brief Setup object cpusets/nodesets by OR'ing its children.
 *
 * Used when adding an object late in the topology, after propagating sets up and down.
 * The caller should use this after inserting by cpuset (which means the cpusets is already OK).
 * Typical case: PCI backend adding a hostbridge parent.
 */
HWLOC_DECLSPEC int hwloc_fill_object_sets(hwloc_obj_t obj);

/** \brief Make sure that plugins can lookup core symbols.
 *
 * This is a sanity check to avoid lazy-lookup failures when libhwloc
 * is loaded within a plugin, and later tries to load its own plugins.
 * This may fail (and abort the program) if libhwloc symbols are in a
 * private namespace.
 *
 * \return 0 on success.
 * \return -1 if the plugin cannot be successfully loaded. The caller
 * plugin init() callback should return a negative error code as well.
 *
 * Plugins should call this function in their init() callback to avoid
 * later crashes if lazy symbol resolution is used by the upper layer that
 * loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
 *
 * \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
 * building the caller as a plugin.
 *
 * \note This function should remain inline so plugins can call it even
 * when they cannot find libhwloc symbols.
 */
static __hwloc_inline int
hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
{
#ifdef HWLOC_INSIDE_PLUGIN
  lt_dlhandle handle;
  void *sym;
  handle = lt_dlopen(NULL);
  if (!handle)
    /* cannot check, assume things will work */
    return 0;
  sym = lt_dlsym(handle, symbol);
  lt_dlclose(handle);
  if (!sym) {
    static int verboseenv_checked = 0;
    static int verboseenv_value = 0;
    if (!verboseenv_checked) {
      const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
      verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
      verboseenv_checked = 1;
    }
    if (verboseenv_value)
      fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
	      pluginname, symbol);
    return -1;
  }
#endif /* HWLOC_INSIDE_PLUGIN */
  return 0;
}

/** @} */


/** \defgroup hwlocality_components_pci_funcs Components and Plugins: PCI functions to be used by components
 * @{
 */

/** \brief Insert a list of PCI devices and bridges in the backend topology.
 *
 * Insert a list of objects (either PCI device or bridges) starting at first_obj
 * (linked by next_sibling in the topology, and ending with NULL).
 * Objects are placed under the right bridges, and the remaining upstream bridges
 * are then inserted in the topology by calling the get_obj_cpuset() callback to
 * find their locality.
 */
HWLOC_DECLSPEC int hwloc_insert_pci_device_list(struct hwloc_backend *backend, struct hwloc_obj *first_obj);

/** \brief Return the offset of the given capability in the PCI config space buffer
 *
 * This function requires a 256-bytes config space. Unknown/unavailable bytes should be set to 0xff.
 */
HWLOC_DECLSPEC unsigned hwloc_pci_find_cap(const unsigned char *config, unsigned cap);

/** \brief Fill linkspeed by reading the PCI config space where PCI_CAP_ID_EXP is at position offset.
 *
 * Needs 20 bytes of EXP capability block starting at offset in the config space
 * for registers up to link status.
 */
HWLOC_DECLSPEC int hwloc_pci_find_linkspeed(const unsigned char *config, unsigned offset, float *linkspeed);

/** \brief Modify the PCI device object into a bridge and fill its attribute if a bridge is found in the PCI config space.
 *
 * This function requires 64 bytes of common configuration header at the beginning of config.
 *
 * Returns -1 and destroys /p obj if bridge fields are invalid.
 */
HWLOC_DECLSPEC int hwloc_pci_prepare_bridge(hwloc_obj_t obj, const unsigned char *config);

/** @} */


#endif /* HWLOC_PLUGINS_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc/rename.h
================================================
/*
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * Copyright © 2010-2017 Inria.  All rights reserved.
 * See COPYING in top-level directory.
 */

#ifndef HWLOC_RENAME_H
#define HWLOC_RENAME_H

#include <hwloc/autogen/config.h>


#ifdef __cplusplus
extern "C" {
#endif


/* Only enact these defines if we're actually renaming the symbols
   (i.e., avoid trying to have no-op defines if we're *not*
   renaming). */

#if HWLOC_SYM_TRANSFORM

/* Use a preprocessor two-step in order to get the prefixing right.
   Make 2 macros: HWLOC_NAME and HWLOC_NAME_CAPS for renaming
   things. */

#define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b)
#define HWLOC_MUNGE_NAME2(a, b) a ## b
#define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name)
#define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name)

/* Now define all the "real" names to be the prefixed names.  This
   allows us to use the real names throughout the code base (i.e.,
   "hwloc_<foo>"); the preprocessor will adjust to have the prefixed
   name under the covers. */

/* Names from hwloc.h */

#define hwloc_get_api_version HWLOC_NAME(get_api_version)

#define hwloc_topology HWLOC_NAME(topology)
#define hwloc_topology_t HWLOC_NAME(topology_t)

#define hwloc_cpuset_t HWLOC_NAME(cpuset_t)
#define hwloc_const_cpuset_t HWLOC_NAME(const_cpuset_t)
#define hwloc_nodeset_t HWLOC_NAME(nodeset_t)
#define hwloc_const_nodeset_t HWLOC_NAME(const_nodeset_t)

#define HWLOC_OBJ_SYSTEM HWLOC_NAME_CAPS(OBJ_SYSTEM)
#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE)
#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE)
#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE)
#define HWLOC_OBJ_CACHE HWLOC_NAME_CAPS(OBJ_CACHE)
#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE)
#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU)
#define HWLOC_OBJ_MISC HWLOC_NAME_CAPS(OBJ_MISC)
#define HWLOC_OBJ_GROUP HWLOC_NAME_CAPS(OBJ_GROUP)
#define HWLOC_OBJ_BRIDGE HWLOC_NAME_CAPS(OBJ_BRIDGE)
#define HWLOC_OBJ_PCI_DEVICE HWLOC_NAME_CAPS(OBJ_PCI_DEVICE)
#define HWLOC_OBJ_OS_DEVICE HWLOC_NAME_CAPS(OBJ_OS_DEVICE)
#define HWLOC_OBJ_TYPE_MAX HWLOC_NAME_CAPS(OBJ_TYPE_MAX)
#define hwloc_obj_type_t HWLOC_NAME(obj_type_t)

#define hwloc_obj_cache_type_e HWLOC_NAME(obj_cache_type_e)
#define hwloc_obj_cache_type_t HWLOC_NAME(obj_cache_type_t)
#define HWLOC_OBJ_CACHE_UNIFIED HWLOC_NAME_CAPS(OBJ_CACHE_UNIFIED)
#define HWLOC_OBJ_CACHE_DATA HWLOC_NAME_CAPS(OBJ_CACHE_DATA)
#define HWLOC_OBJ_CACHE_INSTRUCTION HWLOC_NAME_CAPS(OBJ_CACHE_INSTRUCTION)

#define hwloc_obj_bridge_type_e HWLOC_NAME(obj_bridge_type_e)
#define hwloc_obj_bridge_type_t HWLOC_NAME(obj_bridge_type_t)
#define HWLOC_OBJ_BRIDGE_HOST HWLOC_NAME_CAPS(OBJ_BRIDGE_HOST)
#define HWLOC_OBJ_BRIDGE_PCI HWLOC_NAME_CAPS(OBJ_BRIDGE_PCI)

#define hwloc_obj_osdev_type_e HWLOC_NAME(obj_osdev_type_e)
#define hwloc_obj_osdev_type_t HWLOC_NAME(obj_osdev_type_t)
#define HWLOC_OBJ_OSDEV_BLOCK HWLOC_NAME_CAPS(OBJ_OSDEV_BLOCK)
#define HWLOC_OBJ_OSDEV_GPU HWLOC_NAME_CAPS(OBJ_OSDEV_GPU)
#define HWLOC_OBJ_OSDEV_NETWORK HWLOC_NAME_CAPS(OBJ_OSDEV_NETWORK)
#define HWLOC_OBJ_OSDEV_OPENFABRICS HWLOC_NAME_CAPS(OBJ_OSDEV_OPENFABRICS)
#define HWLOC_OBJ_OSDEV_DMA HWLOC_NAME_CAPS(OBJ_OSDEV_DMA)
#define HWLOC_OBJ_OSDEV_COPROC HWLOC_NAME_CAPS(OBJ_OSDEV_COPROC)

#define hwloc_compare_types HWLOC_NAME(compare_types)

#define hwloc_compare_types_e HWLOC_NAME(compare_types_e)
#define HWLOC_TYPE_UNORDERED HWLOC_NAME_CAPS(TYPE_UNORDERED)

#define hwloc_obj_memory_s HWLOC_NAME(obj_memory_s)
#define hwloc_obj_memory_page_type_s HWLOC_NAME(obj_memory_page_type_s)

#define hwloc_obj HWLOC_NAME(obj)
#define hwloc_obj_t HWLOC_NAME(obj_t)

#define hwloc_distances_s HWLOC_NAME(distances_s)
#define hwloc_obj_info_s HWLOC_NAME(obj_info_s)

#define hwloc_obj_attr_u HWLOC_NAME(obj_attr_u)
#define hwloc_cache_attr_s HWLOC_NAME(cache_attr_s)
#define hwloc_group_attr_s HWLOC_NAME(group_attr_s)
#define hwloc_pcidev_attr_s HWLOC_NAME(pcidev_attr_s)
#define hwloc_bridge_attr_s HWLOC_NAME(bridge_attr_s)
#define hwloc_osdev_attr_s HWLOC_NAME(osdev_attr_s)

#define hwloc_topology_init HWLOC_NAME(topology_init)
#define hwloc_topology_load HWLOC_NAME(topology_load)
#define hwloc_topology_destroy HWLOC_NAME(topology_destroy)
#define hwloc_topology_dup HWLOC_NAME(topology_dup)
#define hwloc_topology_check HWLOC_NAME(topology_check)
#define hwloc_topology_ignore_type HWLOC_NAME(topology_ignore_type)
#define hwloc_topology_ignore_type_keep_structure HWLOC_NAME(topology_ignore_type_keep_structure)
#define hwloc_topology_ignore_all_keep_structure HWLOC_NAME(topology_ignore_all_keep_structure)

#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e)

#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM)
#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
#define HWLOC_TOPOLOGY_FLAG_IO_DEVICES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_DEVICES)
#define HWLOC_TOPOLOGY_FLAG_IO_BRIDGES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_BRIDGES)
#define HWLOC_TOPOLOGY_FLAG_WHOLE_IO HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_IO)
#define HWLOC_TOPOLOGY_FLAG_ICACHES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_ICACHES)
#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)

#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags)
#define hwloc_topology_set_fsroot HWLOC_NAME(topology_set_fsroot)
#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid)
#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml)
#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer)
#define hwloc_topology_set_custom HWLOC_NAME(topology_set_custom)
#define hwloc_topology_set_distance_matrix HWLOC_NAME(topology_set_distance_matrix)

#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support)
#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support)
#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support)
#define hwloc_topology_support HWLOC_NAME(topology_support)
#define hwloc_topology_get_support HWLOC_NAME(topology_get_support)
#define hwloc_topology_set_userdata HWLOC_NAME(topology_set_userdata)
#define hwloc_topology_get_userdata HWLOC_NAME(topology_get_userdata)

#define hwloc_topology_export_xml HWLOC_NAME(topology_export_xml)
#define hwloc_topology_export_xmlbuffer HWLOC_NAME(topology_export_xmlbuffer)
#define hwloc_free_xmlbuffer HWLOC_NAME(free_xmlbuffer)
#define hwloc_topology_set_userdata_export_callback HWLOC_NAME(topology_set_userdata_export_callback)
#define hwloc_export_obj_userdata HWLOC_NAME(export_obj_userdata)
#define hwloc_export_obj_userdata_base64 HWLOC_NAME(export_obj_userdata_base64)
#define hwloc_topology_set_userdata_import_callback HWLOC_NAME(topology_set_userdata_import_callback)

#define hwloc_topology_export_synthetic_flags_e HWLOC_NAME(topology_export_synthetic_flags_e)
#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES)
#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)
#define hwloc_topology_export_synthetic HWLOC_NAME(topology_export_synthetic)

#define hwloc_topology_insert_misc_object_by_cpuset HWLOC_NAME(topology_insert_misc_object_by_cpuset)
#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent)

#define hwloc_custom_insert_topology HWLOC_NAME(custom_insert_topology)
#define hwloc_custom_insert_group_object_by_parent HWLOC_NAME(custom_insert_group_object_by_parent)

#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e)
#define HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_DISTANCES)
#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC)
#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO)
#define hwloc_topology_restrict HWLOC_NAME(topology_restrict)

#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth)
#define hwloc_get_type_depth HWLOC_NAME(get_type_depth)

#define hwloc_get_type_depth_e HWLOC_NAME(get_type_depth_e)
#define HWLOC_TYPE_DEPTH_UNKNOWN HWLOC_NAME_CAPS(TYPE_DEPTH_UNKNOWN)
#define HWLOC_TYPE_DEPTH_MULTIPLE HWLOC_NAME_CAPS(TYPE_DEPTH_MULTIPLE)
#define HWLOC_TYPE_DEPTH_BRIDGE HWLOC_NAME_CAPS(TYPE_DEPTH_BRIDGE)
#define HWLOC_TYPE_DEPTH_PCI_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_PCI_DEVICE)
#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE)

#define hwloc_get_depth_type HWLOC_NAME(get_depth_type)
#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth)
#define hwloc_get_nbobjs_by_type HWLOC_NAME(get_nbobjs_by_type)

#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem)
#define hwloc_topology_get_flags HWLOC_NAME(topology_get_flags)

#define hwloc_get_obj_by_depth HWLOC_NAME(get_obj_by_depth )
#define hwloc_get_obj_by_type HWLOC_NAME(get_obj_by_type )

#define hwloc_obj_type_string HWLOC_NAME(obj_type_string )
#define hwloc_obj_type_snprintf HWLOC_NAME(obj_type_snprintf )
#define hwloc_obj_attr_snprintf HWLOC_NAME(obj_attr_snprintf )
#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf)
#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf)

#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name)
#define hwloc_obj_add_info HWLOC_NAME(obj_add_info)

#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS)
#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD)
#define HWLOC_CPUBIND_STRICT HWLOC_NAME_CAPS(CPUBIND_STRICT)
#define HWLOC_CPUBIND_NOMEMBIND HWLOC_NAME_CAPS(CPUBIND_NOMEMBIND)

#define hwloc_cpubind_flags_t HWLOC_NAME(cpubind_flags_t)

#define hwloc_set_cpubind HWLOC_NAME(set_cpubind)
#define hwloc_get_cpubind HWLOC_NAME(get_cpubind)
#define hwloc_set_proc_cpubind HWLOC_NAME(set_proc_cpubind)
#define hwloc_get_proc_cpubind HWLOC_NAME(get_proc_cpubind)
#define hwloc_set_thread_cpubind HWLOC_NAME(set_thread_cpubind)
#define hwloc_get_thread_cpubind HWLOC_NAME(get_thread_cpubind)

#define hwloc_get_last_cpu_location HWLOC_NAME(get_last_cpu_location)
#define hwloc_get_proc_last_cpu_location HWLOC_NAME(get_proc_last_cpu_location)

#define HWLOC_MEMBIND_DEFAULT HWLOC_NAME_CAPS(MEMBIND_DEFAULT)
#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH)
#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND)
#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE)
#define HWLOC_MEMBIND_REPLICATE HWLOC_NAME_CAPS(MEMBIND_REPLICATE)
#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH)
#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED)

#define hwloc_membind_policy_t HWLOC_NAME(membind_policy_t)

#define HWLOC_MEMBIND_PROCESS HWLOC_NAME_CAPS(MEMBIND_PROCESS)
#define HWLOC_MEMBIND_THREAD HWLOC_NAME_CAPS(MEMBIND_THREAD)
#define HWLOC_MEMBIND_STRICT HWLOC_NAME_CAPS(MEMBIND_STRICT)
#define HWLOC_MEMBIND_MIGRATE HWLOC_NAME_CAPS(MEMBIND_MIGRATE)
#define HWLOC_MEMBIND_NOCPUBIND HWLOC_NAME_CAPS(MEMBIND_NOCPUBIND)
#define HWLOC_MEMBIND_BYNODESET HWLOC_NAME_CAPS(MEMBIND_BYNODESET)

#define hwloc_membind_flags_t HWLOC_NAME(membind_flags_t)

#define hwloc_set_membind_nodeset HWLOC_NAME(set_membind_nodeset)
#define hwloc_set_membind HWLOC_NAME(set_membind)
#define hwloc_get_membind_nodeset HWLOC_NAME(get_membind_nodeset)
#define hwloc_get_membind HWLOC_NAME(get_membind)
#define hwloc_set_proc_membind_nodeset HWLOC_NAME(set_proc_membind_nodeset)
#define hwloc_set_proc_membind HWLOC_NAME(set_proc_membind)
#define hwloc_get_proc_membind_nodeset HWLOC_NAME(get_proc_membind_nodeset)
#define hwloc_get_proc_membind HWLOC_NAME(get_proc_membind)
#define hwloc_set_area_membind_nodeset HWLOC_NAME(set_area_membind_nodeset)
#define hwloc_set_area_membind HWLOC_NAME(set_area_membind)
#define hwloc_get_area_membind_nodeset HWLOC_NAME(get_area_membind_nodeset)
#define hwloc_get_area_membind HWLOC_NAME(get_area_membind)
#define hwloc_get_area_memlocation HWLOC_NAME(get_area_memlocation)
#define hwloc_alloc_membind_nodeset HWLOC_NAME(alloc_membind_nodeset)
#define hwloc_alloc_membind HWLOC_NAME(alloc_membind)
#define hwloc_alloc HWLOC_NAME(alloc)
#define hwloc_free HWLOC_NAME(free)

#define hwloc_get_non_io_ancestor_obj HWLOC_NAME(get_non_io_ancestor_obj)
#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev)
#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid)
#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring)
#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev)
#define hwloc_get_next_bridge HWLOC_NAME(get_next_bridge)
#define hwloc_bridge_covers_pcibus HWLOC_NAME(bridge_covers_pcibus)
#define hwloc_get_hostbridge_by_pcibus HWLOC_NAME(get_hostbridge_by_pcibus)

/* hwloc/bitmap.h */

#define hwloc_bitmap_s HWLOC_NAME(bitmap_s)
#define hwloc_bitmap_t HWLOC_NAME(bitmap_t)
#define hwloc_const_bitmap_t HWLOC_NAME(const_bitmap_t)

#define hwloc_bitmap_alloc HWLOC_NAME(bitmap_alloc)
#define hwloc_bitmap_alloc_full HWLOC_NAME(bitmap_alloc_full)
#define hwloc_bitmap_free HWLOC_NAME(bitmap_free)
#define hwloc_bitmap_dup HWLOC_NAME(bitmap_dup)
#define hwloc_bitmap_copy HWLOC_NAME(bitmap_copy)
#define hwloc_bitmap_snprintf HWLOC_NAME(bitmap_snprintf)
#define hwloc_bitmap_asprintf HWLOC_NAME(bitmap_asprintf)
#define hwloc_bitmap_sscanf HWLOC_NAME(bitmap_sscanf)
#define hwloc_bitmap_list_snprintf HWLOC_NAME(bitmap_list_snprintf)
#define hwloc_bitmap_list_asprintf HWLOC_NAME(bitmap_list_asprintf)
#define hwloc_bitmap_list_sscanf HWLOC_NAME(bitmap_list_sscanf)
#define hwloc_bitmap_taskset_snprintf HWLOC_NAME(bitmap_taskset_snprintf)
#define hwloc_bitmap_taskset_asprintf HWLOC_NAME(bitmap_taskset_asprintf)
#define hwloc_bitmap_taskset_sscanf HWLOC_NAME(bitmap_taskset_sscanf)
#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero)
#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill)
#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong)

#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong)
#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong)
#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong)
#define hwloc_bitmap_only HWLOC_NAME(bitmap_only)
#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut)
#define hwloc_bitmap_set HWLOC_NAME(bitmap_set)
#define hwloc_bitmap_set_range HWLOC_NAME(bitmap_set_range)
#define hwloc_bitmap_set_ith_ulong HWLOC_NAME(bitmap_set_ith_ulong)
#define hwloc_bitmap_clr HWLOC_NAME(bitmap_clr)
#define hwloc_bitmap_clr_range HWLOC_NAME(bitmap_clr_range)
#define hwloc_bitmap_isset HWLOC_NAME(bitmap_isset)
#define hwloc_bitmap_iszero HWLOC_NAME(bitmap_iszero)
#define hwloc_bitmap_isfull HWLOC_NAME(bitmap_isfull)
#define hwloc_bitmap_isequal HWLOC_NAME(bitmap_isequal)
#define hwloc_bitmap_intersects HWLOC_NAME(bitmap_intersects)
#define hwloc_bitmap_isincluded HWLOC_NAME(bitmap_isincluded)
#define hwloc_bitmap_or HWLOC_NAME(bitmap_or)
#define hwloc_bitmap_and HWLOC_NAME(bitmap_and)
#define hwloc_bitmap_andnot HWLOC_NAME(bitmap_andnot)
#define hwloc_bitmap_xor HWLOC_NAME(bitmap_xor)
#define hwloc_bitmap_not HWLOC_NAME(bitmap_not)
#define hwloc_bitmap_first HWLOC_NAME(bitmap_first)
#define hwloc_bitmap_last HWLOC_NAME(bitmap_last)
#define hwloc_bitmap_next HWLOC_NAME(bitmap_next)
#define hwloc_bitmap_singlify HWLOC_NAME(bitmap_singlify)
#define hwloc_bitmap_compare_first HWLOC_NAME(bitmap_compare_first)
#define hwloc_bitmap_compare HWLOC_NAME(bitmap_compare)
#define hwloc_bitmap_weight HWLOC_NAME(bitmap_weight)

/* hwloc/cpuset.h -- deprecated but still available */

#define hwloc_cpuset_alloc HWLOC_NAME(cpuset_alloc)
#define hwloc_cpuset_free HWLOC_NAME(cpuset_free)
#define hwloc_cpuset_dup HWLOC_NAME(cpuset_dup)
#define hwloc_cpuset_copy HWLOC_NAME(cpuset_copy)
#define hwloc_cpuset_snprintf HWLOC_NAME(cpuset_snprintf)
#define hwloc_cpuset_asprintf HWLOC_NAME(cpuset_asprintf)
#define hwloc_cpuset_from_string HWLOC_NAME(cpuset_from_string)
#define hwloc_cpuset_zero HWLOC_NAME(cpuset_zero)
#define hwloc_cpuset_fill HWLOC_NAME(cpuset_fill)
#define hwloc_cpuset_from_ulong HWLOC_NAME(cpuset_from_ulong)
#define hwloc_cpuset_taskset_snprintf HWLOC_NAME(cpuset_taskset_snprintf)
#define hwloc_cpuset_taskset_asprintf HWLOC_NAME(cpuset_taskset_asprintf)
#define hwloc_cpuset_taskset_sscanf HWLOC_NAME(cpuset_taskset_sscanf)

#define hwloc_cpuset_from_ith_ulong HWLOC_NAME(cpuset_from_ith_ulong)
#define hwloc_cpuset_to_ulong HWLOC_NAME(cpuset_to_ulong)
#define hwloc_cpuset_to_ith_ulong HWLOC_NAME(cpuset_to_ith_ulong)
#define hwloc_cpuset_cpu HWLOC_NAME(cpuset_cpu)
#define hwloc_cpuset_all_but_cpu HWLOC_NAME(cpuset_all_but_cpu)
#define hwloc_cpuset_set HWLOC_NAME(cpuset_set)
#define hwloc_cpuset_set_range HWLOC_NAME(cpuset_set_range)
#define hwloc_cpuset_set_ith_ulong HWLOC_NAME(cpuset_set_ith_ulong)
#define hwloc_cpuset_clr HWLOC_NAME(cpuset_clr)
#define hwloc_cpuset_clr_range HWLOC_NAME(cpuset_clr_range)
#define hwloc_cpuset_isset HWLOC_NAME(cpuset_isset)
#define hwloc_cpuset_iszero HWLOC_NAME(cpuset_iszero)
#define hwloc_cpuset_isfull HWLOC_NAME(cpuset_isfull)
#define hwloc_cpuset_isequal HWLOC_NAME(cpuset_isequal)
#define hwloc_cpuset_intersects HWLOC_NAME(cpuset_intersects)
#define hwloc_cpuset_isincluded HWLOC_NAME(cpuset_isincluded)
#define hwloc_cpuset_or HWLOC_NAME(cpuset_or)
#define hwloc_cpuset_and HWLOC_NAME(cpuset_and)
#define hwloc_cpuset_andnot HWLOC_NAME(cpuset_andnot)
#define hwloc_cpuset_xor HWLOC_NAME(cpuset_xor)
#define hwloc_cpuset_not HWLOC_NAME(cpuset_not)
#define hwloc_cpuset_first HWLOC_NAME(cpuset_first)
#define hwloc_cpuset_last HWLOC_NAME(cpuset_last)
#define hwloc_cpuset_next HWLOC_NAME(cpuset_next)
#define hwloc_cpuset_singlify HWLOC_NAME(cpuset_singlify)
#define hwloc_cpuset_compare_first HWLOC_NAME(cpuset_compare_first)
#define hwloc_cpuset_compare HWLOC_NAME(cpuset_compare)
#define hwloc_cpuset_weight HWLOC_NAME(cpuset_weight)

/* hwloc/helper.h */

#define hwloc_get_type_or_below_depth HWLOC_NAME(get_type_or_below_depth)
#define hwloc_get_type_or_above_depth HWLOC_NAME(get_type_or_above_depth)
#define hwloc_get_root_obj HWLOC_NAME(get_root_obj)
#define hwloc_get_ancestor_obj_by_depth HWLOC_NAME(get_ancestor_obj_by_depth)
#define hwloc_get_ancestor_obj_by_type HWLOC_NAME(get_ancestor_obj_by_type)
#define hwloc_get_next_obj_by_depth HWLOC_NAME(get_next_obj_by_depth)
#define hwloc_get_next_obj_by_type HWLOC_NAME(get_next_obj_by_type)
#define hwloc_get_pu_obj_by_os_index HWLOC_NAME(get_pu_obj_by_os_index)
#define hwloc_get_numanode_obj_by_os_index HWLOC_NAME(get_numanode_obj_by_os_index)
#define hwloc_get_next_child HWLOC_NAME(get_next_child)
#define hwloc_get_common_ancestor_obj HWLOC_NAME(get_common_ancestor_obj)
#define hwloc_obj_is_in_subtree HWLOC_NAME(obj_is_in_subtree)
#define hwloc_get_first_largest_obj_inside_cpuset HWLOC_NAME(get_first_largest_obj_inside_cpuset)
#define hwloc_get_largest_objs_inside_cpuset HWLOC_NAME(get_largest_objs_inside_cpuset)
#define hwloc_get_next_obj_inside_cpuset_by_depth HWLOC_NAME(get_next_obj_inside_cpuset_by_depth)
#define hwloc_get_next_obj_inside_cpuset_by_type HWLOC_NAME(get_next_obj_inside_cpuset_by_type)
#define hwloc_get_obj_inside_cpuset_by_depth HWLOC_NAME(get_obj_inside_cpuset_by_depth)
#define hwloc_get_obj_inside_cpuset_by_type HWLOC_NAME(get_obj_inside_cpuset_by_type)
#define hwloc_get_nbobjs_inside_cpuset_by_depth HWLOC_NAME(get_nbobjs_inside_cpuset_by_depth)
#define hwloc_get_nbobjs_inside_cpuset_by_type HWLOC_NAME(get_nbobjs_inside_cpuset_by_type)
#define hwloc_get_obj_index_inside_cpuset HWLOC_NAME(get_obj_index_inside_cpuset)
#define hwloc_get_child_covering_cpuset HWLOC_NAME(get_child_covering_cpuset)
#define hwloc_get_obj_covering_cpuset HWLOC_NAME(get_obj_covering_cpuset)
#define hwloc_get_next_obj_covering_cpuset_by_depth HWLOC_NAME(get_next_obj_covering_cpuset_by_depth)
#define hwloc_get_next_obj_covering_cpuset_by_type HWLOC_NAME(get_next_obj_covering_cpuset_by_type)
#define hwloc_get_cache_type_depth HWLOC_NAME(get_cache_type_depth)
#define hwloc_get_cache_covering_cpuset HWLOC_NAME(get_cache_covering_cpuset)
#define hwloc_get_shared_cache_covering_obj HWLOC_NAME(get_shared_cache_covering_obj)
#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs)
#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type)
#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type)
#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e)
#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE)
#define hwloc_distrib HWLOC_NAME(distrib)
#define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy)
#define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset)
#define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset)
#define hwloc_topology_get_topology_cpuset HWLOC_NAME(topology_get_topology_cpuset)
#define hwloc_topology_get_online_cpuset HWLOC_NAME(topology_get_online_cpuset)
#define hwloc_topology_get_allowed_cpuset HWLOC_NAME(topology_get_allowed_cpuset)
#define hwloc_topology_get_complete_nodeset HWLOC_NAME(topology_get_complete_nodeset)
#define hwloc_topology_get_topology_nodeset HWLOC_NAME(topology_get_topology_nodeset)
#define hwloc_topology_get_allowed_nodeset HWLOC_NAME(topology_get_allowed_nodeset)
#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset)
#define hwloc_cpuset_to_nodeset_strict HWLOC_NAME(cpuset_to_nodeset_strict)
#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset)
#define hwloc_cpuset_from_nodeset_strict HWLOC_NAME(cpuset_from_nodeset_strict)
#define hwloc_get_whole_distance_matrix_by_depth HWLOC_NAME(get_whole_distance_matrix_by_depth)
#define hwloc_get_whole_distance_matrix_by_type HWLOC_NAME(get_whole_distance_matrix_by_type)
#define hwloc_get_distance_matrix_covering_obj_by_depth HWLOC_NAME(get_distance_matrix_covering_obj_by_depth)
#define hwloc_get_latency HWLOC_NAME(get_latency)

/* diff.h */

#define hwloc_topology_diff_obj_attr_type_e HWLOC_NAME(topology_diff_obj_attr_type_e)
#define hwloc_topology_diff_obj_attr_type_t HWLOC_NAME(topology_diff_obj_attr_type_t)
#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_SIZE)
#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_NAME)
#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_INFO)
#define hwloc_topology_diff_obj_attr_u HWLOC_NAME(topology_diff_obj_attr_u)
#define hwloc_topology_diff_obj_attr_generic_s HWLOC_NAME(topology_diff_obj_attr_generic_s)
#define hwloc_topology_diff_obj_attr_uint64_s HWLOC_NAME(topology_diff_obj_attr_uint64_s)
#define hwloc_topology_diff_obj_attr_string_s HWLOC_NAME(topology_diff_obj_attr_string_s)
#define hwloc_topology_diff_type_e HWLOC_NAME(topology_diff_type_e)
#define hwloc_topology_diff_type_t HWLOC_NAME(topology_diff_type_t)
#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR)
#define HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX HWLOC_NAME_CAPS(TOPOLOGY_DIFF_TOO_COMPLEX)
#define hwloc_topology_diff_u HWLOC_NAME(topology_diff_u)
#define hwloc_topology_diff_t HWLOC_NAME(topology_diff_t)
#define hwloc_topology_diff_generic_s HWLOC_NAME(topology_diff_generic_s)
#define hwloc_topology_diff_obj_attr_s HWLOC_NAME(topology_diff_obj_attr_s)
#define hwloc_topology_diff_too_complex_s HWLOC_NAME(topology_diff_too_complex_s)
#define hwloc_topology_diff_build HWLOC_NAME(topology_diff_build)
#define hwloc_topology_diff_apply_flags_e HWLOC_NAME(topology_diff_apply_flags_e)
#define HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_APPLY_REVERSE)
#define hwloc_topology_diff_apply HWLOC_NAME(topology_diff_apply)
#define hwloc_topology_diff_destroy HWLOC_NAME(topology_diff_destroy)
#define hwloc_topology_diff_load_xml HWLOC_NAME(topology_diff_load_xml)
#define hwloc_topology_diff_export_xml HWLOC_NAME(topology_diff_export_xml)
#define hwloc_topology_diff_load_xmlbuffer HWLOC_NAME(topology_diff_load_xmlbuffer)
#define hwloc_topology_diff_export_xmlbuffer HWLOC_NAME(topology_diff_export_xmlbuffer)

/* glibc-sched.h */

#define hwloc_cpuset_to_glibc_sched_affinity HWLOC_NAME(cpuset_to_glibc_sched_affinity)
#define hwloc_cpuset_from_glibc_sched_affinity HWLOC_NAME(cpuset_from_glibc_sched_affinity)

/* linux-libnuma.h */

#define hwloc_cpuset_to_linux_libnuma_ulongs HWLOC_NAME(cpuset_to_linux_libnuma_ulongs)
#define hwloc_nodeset_to_linux_libnuma_ulongs HWLOC_NAME(nodeset_to_linux_libnuma_ulongs)
#define hwloc_cpuset_from_linux_libnuma_ulongs HWLOC_NAME(cpuset_from_linux_libnuma_ulongs)
#define hwloc_nodeset_from_linux_libnuma_ulongs HWLOC_NAME(nodeset_from_linux_libnuma_ulongs)
#define hwloc_cpuset_to_linux_libnuma_bitmask HWLOC_NAME(cpuset_to_linux_libnuma_bitmask)
#define hwloc_nodeset_to_linux_libnuma_bitmask HWLOC_NAME(nodeset_to_linux_libnuma_bitmask)
#define hwloc_cpuset_from_linux_libnuma_bitmask HWLOC_NAME(cpuset_from_linux_libnuma_bitmask)
#define hwloc_nodeset_from_linux_libnuma_bitmask HWLOC_NAME(nodeset_from_linux_libnuma_bitmask)

/* linux.h */

#define hwloc_linux_parse_cpumap_file HWLOC_NAME(linux_parse_cpumap_file)
#define hwloc_linux_set_tid_cpubind HWLOC_NAME(linux_set_tid_cpubind)
#define hwloc_linux_get_tid_cpubind HWLOC_NAME(linux_get_tid_cpubind)
#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location)

/* openfabrics-verbs.h */

#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset)
#define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev)
#define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name)

/* myriexpress.h */

#define hwloc_mx_board_get_device_cpuset HWLOC_NAME(mx_board_get_device_cpuset)
#define hwloc_mx_endpoint_get_device_cpuset HWLOC_NAME(mx_endpoint_get_device_cpuset)

/* intel-mic.h */

#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset)
#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index)

/* opencl.h */

#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset)
#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev)
#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index)

/* cuda.h */

#define hwloc_cuda_get_device_pci_ids HWLOC_NAME(cuda_get_device_pci_ids)
#define hwloc_cuda_get_device_cpuset HWLOC_NAME(cuda_get_device_cpuset)
#define hwloc_cuda_get_device_pcidev HWLOC_NAME(cuda_get_device_pcidev)
#define hwloc_cuda_get_device_osdev HWLOC_NAME(cuda_get_device_osdev)
#define hwloc_cuda_get_device_osdev_by_index HWLOC_NAME(cuda_get_device_osdev_by_index)

/* cudart.h */

#define hwloc_cudart_get_device_pci_ids HWLOC_NAME(cudart_get_device_pci_ids)
#define hwloc_cudart_get_device_cpuset HWLOC_NAME(cudart_get_device_cpuset)
#define hwloc_cudart_get_device_pcidev HWLOC_NAME(cudart_get_device_pcidev)
#define hwloc_cudart_get_device_osdev_by_index HWLOC_NAME(cudart_get_device_osdev_by_index)

/* nvml.h */

#define hwloc_nvml_get_device_cpuset HWLOC_NAME(nvml_get_device_cpuset)
#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev)
#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index)

/* gl.h */

#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device)
#define hwloc_gl_get_display_osdev_by_name HWLOC_NAME(gl_get_display_osdev_by_name)
#define hwloc_gl_get_display_by_osdev HWLOC_NAME(gl_get_display_by_osdev)

/* hwloc/plugins.h */

#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e)
#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU)
#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL)
#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC)
#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t)
#define hwloc_disc_component HWLOC_NAME(disc_component)

#define hwloc_backend HWLOC_NAME(backend)
#define hwloc_backend_flag_e HWLOC_NAME(backend_flag_e)
#define HWLOC_BACKEND_FLAG_NEED_LEVELS HWLOC_NAME_CAPS(BACKEND_FLAG_NEED_LEVELS)

#define hwloc_backend_alloc HWLOC_NAME(backend_alloc)
#define hwloc_backend_enable HWLOC_NAME(backend_enable)
#define hwloc_backends_get_obj_cpuset HWLOC_NAME(backends_get_obj_cpuset)
#define hwloc_backends_notify_new_object HWLOC_NAME(backends_notify_new_object)

#define hwloc_component_type_e HWLOC_NAME(component_type_e)
#define HWLOC_COMPONENT_TYPE_DISC HWLOC_NAME_CAPS(COMPONENT_TYPE_DISC)
#define HWLOC_COMPONENT_TYPE_XML HWLOC_NAME_CAPS(COMPONENT_TYPE_XML)
#define hwloc_component_type_t HWLOC_NAME(component_type_t)
#define hwloc_component HWLOC_NAME(component)

#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace)

#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset)
#define hwloc_report_error_t HWLOC_NAME(report_error_t)
#define hwloc_report_os_error HWLOC_NAME(report_os_error)
#define hwloc_hide_errors HWLOC_NAME(hide_errors)
#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset)
#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent)
#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object)
#define hwloc_fill_object_sets HWLOC_NAME(fill_object_sets)

#define hwloc_insert_pci_device_list HWLOC_NAME(insert_pci_device_list)
#define hwloc_pci_find_cap HWLOC_NAME(pci_find_cap)
#define hwloc_pci_find_linkspeed HWLOC_NAME(pci_find_linkspeed)
#define hwloc_pci_prepare_bridge HWLOC_NAME(pci_prepare_bridge)

/* hwloc/deprecated.h */

#define hwloc_obj_type_of_string HWLOC_NAME(obj_type_of_string )
#define hwloc_obj_snprintf HWLOC_NAME(obj_snprintf)
#define hwloc_distributev HWLOC_NAME(distributev)
#define hwloc_distribute HWLOC_NAME(distribute)

/* private/debug.h */

#define hwloc_debug_enabled HWLOC_NAME(debug_enabled)
#define hwloc_debug HWLOC_NAME(debug)

/* private/misc.h */

#define hwloc_snprintf HWLOC_NAME(snprintf)
#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp)
#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual)
#define hwloc_ffs32 HWLOC_NAME(ffs32)
#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32)
#define hwloc_flsl_manual HWLOC_NAME(flsl_manual)
#define hwloc_fls32 HWLOC_NAME(fls32)
#define hwloc_flsl_from_fls32 HWLOC_NAME(flsl_from_fls32)
#define hwloc_weight_long HWLOC_NAME(weight_long)
#define hwloc_strncasecmp HWLOC_NAME(strncasecmp)

/* private/cpuid-x86.h */

#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid)
#define hwloc_x86_cpuid HWLOC_NAME(x86_cpuid)

/* private/xml.h */

#define hwloc__xml_verbose HWLOC_NAME(_xml_verbose)

#define hwloc__xml_import_state_s HWLOC_NAME(_xml_import_state_s)
#define hwloc__xml_import_state_t HWLOC_NAME(_xml_import_state_t)
#define hwloc__xml_import_diff HWLOC_NAME(_xml_import_diff)
#define hwloc_xml_backend_data_s HWLOC_NAME(xml_backend_data_s)
#define hwloc__xml_export_state_s HWLOC_NAME(_xml_export_state_s)
#define hwloc__xml_export_state_t HWLOC_NAME(_xml_export_state_t)
#define hwloc__xml_export_object HWLOC_NAME(_xml_export_object)
#define hwloc__xml_export_diff HWLOC_NAME(_xml_export_diff)

#define hwloc_xml_callbacks HWLOC_NAME(xml_callbacks)
#define hwloc_xml_component HWLOC_NAME(xml_component)
#define hwloc_xml_callbacks_register HWLOC_NAME(xml_callbacks_register)
#define hwloc_xml_callbacks_reset HWLOC_NAME(xml_callbacks_reset)

/* private/components.h */

#define hwloc_disc_component_force_enable HWLOC_NAME(disc_component_force_enable)
#define hwloc_disc_components_enable_others HWLOC_NAME(disc_components_instantiate_others)

#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all)
#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem)

#define hwloc_components_init HWLOC_NAME(components_init)
#define hwloc_components_destroy_all HWLOC_NAME(components_destroy_all)

/* private/private.h */

#define hwloc_ignore_type_e HWLOC_NAME(ignore_type_e)

#define HWLOC_IGNORE_TYPE_NEVER HWLOC_NAME_CAPS(IGNORE_TYPE_NEVER)
#define HWLOC_IGNORE_TYPE_KEEP_STRUCTURE HWLOC_NAME_CAPS(IGNORE_TYPE_KEEP_STRUCTURE)
#define HWLOC_IGNORE_TYPE_ALWAYS HWLOC_NAME_CAPS(IGNORE_TYPE_ALWAYS)

#define hwloc_os_distances_s HWLOC_NAME(os_distances_s)

#define hwloc_xml_imported_distances_s HWLOC_NAME(xml_imported_distances_s)

#define hwloc_alloc_obj_cpusets HWLOC_NAME(alloc_obj_cpusets)
#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level)
#define hwloc_get_sysctlbyname HWLOC_NAME(get_sysctlbyname)
#define hwloc_get_sysctl HWLOC_NAME(get_sysctl)
#define hwloc_fallback_nbprocessors HWLOC_NAME(fallback_nbprocessors)
#define hwloc_connect_children HWLOC_NAME(connect_children)
#define hwloc_connect_levels HWLOC_NAME(connect_levels)

#define hwloc__object_cpusets_compare_first HWLOC_NAME(_object_cpusets_compare_first)

#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults)
#define hwloc_topology_clear HWLOC_NAME(topology_clear)

#define hwloc__add_info HWLOC_NAME(_add_info)
#define hwloc__find_info_slot HWLOC_NAME(_find_info_slot)
#define hwloc__move_infos HWLOC_NAME(_move_infos)
#define hwloc__free_infos HWLOC_NAME(_free_infos)

#define hwloc_binding_hooks HWLOC_NAME(binding_hooks)
#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks)
#define hwloc_set_binding_hooks HWLOC_NAME(set_binding_hooks)

#define hwloc_set_linuxfs_hooks HWLOC_NAME(set_linuxfs_hooks)
#define hwloc_set_bgq_hooks HWLOC_NAME(set_bgq_hooks)
#define hwloc_set_solaris_hooks HWLOC_NAME(set_solaris_hooks)
#define hwloc_set_aix_hooks HWLOC_NAME(set_aix_hooks)
#define hwloc_set_osf_hooks HWLOC_NAME(set_osf_hooks)
#define hwloc_set_windows_hooks HWLOC_NAME(set_windows_hooks)
#define hwloc_set_darwin_hooks HWLOC_NAME(set_darwin_hooks)
#define hwloc_set_freebsd_hooks HWLOC_NAME(set_freebsd_hooks)
#define hwloc_set_netbsd_hooks HWLOC_NAME(set_netbsd_hooks)
#define hwloc_set_hpux_hooks HWLOC_NAME(set_hpux_hooks)

#define hwloc_look_hardwired_fujitsu_k HWLOC_NAME(look_hardwired_fujitsu_k)
#define hwloc_look_hardwired_fujitsu_fx10 HWLOC_NAME(look_hardwired_fujitsu_fx10)
#define hwloc_look_hardwired_fujitsu_fx100 HWLOC_NAME(look_hardwired_fujitsu_fx100)

#define hwloc_add_uname_info HWLOC_NAME(add_uname_info)
#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object)
#define hwloc__duplicate_objects HWLOC_NAME(_duplicate_objects)

#define hwloc_alloc_heap HWLOC_NAME(alloc_heap)
#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap)
#define hwloc_free_heap HWLOC_NAME(free_heap)
#define hwloc_free_mmap HWLOC_NAME(free_mmap)
#define hwloc_alloc_or_fail HWLOC_NAME(alloc_or_fail)

#define hwloc_distances_init HWLOC_NAME(distances_init)
#define hwloc_distances_destroy HWLOC_NAME(distances_destroy)
#define hwloc_distances_set HWLOC_NAME(distances_set)
#define hwloc_distances_set_from_env HWLOC_NAME(distances_set_from_env)
#define hwloc_distances_restrict_os HWLOC_NAME(distances_restrict_os)
#define hwloc_distances_restrict HWLOC_NAME(distances_restrict)
#define hwloc_distances_finalize_os HWLOC_NAME(distances_finalize_os)
#define hwloc_distances_finalize_logical HWLOC_NAME(distances_finalize_logical)
#define hwloc_clear_object_distances HWLOC_NAME(clear_object_distances)
#define hwloc_clear_object_distances_one HWLOC_NAME(clear_object_distances_one)
#define hwloc_group_by_distances HWLOC_NAME(group_by_distances)

#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64)
#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64)

#define hwloc_obj_add_info_nodup HWLOC_NAME(obj_add_info_nodup)

#define hwloc_progname HWLOC_NAME(progname)

#define hwloc_bitmap_compare_inclusion HWLOC_NAME(bitmap_compare_inclusion)

/* private/solaris-chiptype.h */

#define hwloc_solaris_chip_info_s HWLOC_NAME(solaris_chip_info_s)
#define hwloc_solaris_get_chip_info HWLOC_NAME(solaris_get_chip_info)

#endif /* HWLOC_SYM_TRANSFORM */


#ifdef __cplusplus
} /* extern "C" */
#endif


#endif /* HWLOC_RENAME_H */


================================================
FILE: rocrtst/thirdparty/include/hwloc.h
================================================
/*
 * Copyright © 2009 CNRS
 * Copyright © 2009-2017 Inria.  All rights reserved.
 * Copyright © 2009-2012 Université Bordeaux
 * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
 * See COPYING in top-level directory.
 */

/*=====================================================================
 *                 PLEASE GO READ THE DOCUMENTATION!
 *         ------------------------------------------------
 *               $tarball_directory/doc/doxygen-doc/
 *                                or
 *           http://www.open-mpi.org/projects/hwloc/doc/
 *=====================================================================
 *
 * FAIR WARNING: Do NOT expect to be able to figure out all the
 * subtleties of hwloc by simply reading function prototypes and
 * constant descrptions here in this file.
 *
 * Hwloc has wonderful documentation in both PDF and HTML formats for
 * your reading pleasure.  The formal documentation explains a LOT of
 * hwloc-specific concepts, provides definitions, and discusses the
 * "big picture" for many of the things that you'll find here in this
 * header file.
 *
 * The PDF/HTML documentation was generated via Doxygen; much of what
 * you'll see in there is also here in this file.  BUT THERE IS A LOT
 * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h!
 *
 * There are entire paragraph-length descriptions, discussions, and
 * pretty prictures to explain subtle corner cases, provide concrete
 * examples, etc.
 *
 * Please, go read the documentation.  :-)
 *
 * Moreover there are several examples of hwloc use under doc/examples
 * in the source tree.
 *
 *=====================================================================*/

/** \file
 * \brief The hwloc API.
 *
 * See hwloc/bitmap.h for bitmap specific macros.
 * See hwloc/helper.h for high-level topology traversal helpers.
 * See hwloc/inlines.h for the actual inline code of some functions below.
 */

#ifndef HWLOC_H
#define HWLOC_H

#include <hwloc/autogen/config.h>
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>

/*
 * Symbol transforms
 */
#include <hwloc/rename.h>

/*
 * Bitmap definitions
 */

#include <hwloc/bitmap.h>
#include <hwloc/cpuset.h>


#ifdef __cplusplus
extern "C" {
#endif


/** \defgroup hwlocality_api_version API version
 * @{
 */

/** \brief Indicate at build time which hwloc API version is being used.
 *
 * This number is updated to (X>>16)+(Y>>8)+Z when a new release X.Y.Z
 * actually modifies the API.
 *
 * Users may check for available features at build time using this number
 * (see \ref faq_upgrade).
 */
#define HWLOC_API_VERSION 0x00010b06

/** \brief Indicate at runtime which hwloc API version was used at build time.
 *
 * Should be ::HWLOC_API_VERSION if running on the same version.
 */
HWLOC_DECLSPEC unsigned hwloc_get_api_version(void);

/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */
#define HWLOC_COMPONENT_ABI 4

/** @} */


/** \defgroup hwlocality_object_sets Object Sets (hwloc_cpuset_t and hwloc_nodeset_t)
 *
 * Hwloc uses bitmaps to represent two distinct kinds of object sets:
 * CPU sets (::hwloc_cpuset_t) and NUMA node sets (::hwloc_nodeset_t).
 * These types are both typedefs to a common back end type
 * (::hwloc_bitmap_t), and therefore all the hwloc bitmap functions
 * are applicable to both ::hwloc_cpuset_t and ::hwloc_nodeset_t (see
 * \ref hwlocality_bitmap).
 *
 * The rationale for having two different types is that even though
 * the actions one wants to perform on these types are the same (e.g.,
 * enable and disable individual items in the set/mask), they're used
 * in very different contexts: one for specifying which processors to
 * use and one for specifying which NUMA nodes to use.  Hence, the
 * name difference is really just to reflect the intent of where the
 * type is used.
 *
 * @{
 */

/** \brief A CPU set is a bitmap whose bits are set according to CPU
 * physical OS indexes.
 *
 * It may be consulted and modified with the bitmap API as any
 * ::hwloc_bitmap_t (see hwloc/bitmap.h).
 *
 * Each bit may be converted into a PU object using
 * hwloc_get_pu_obj_by_os_index().
 */
typedef hwloc_bitmap_t hwloc_cpuset_t;
/** \brief A non-modifiable ::hwloc_cpuset_t. */
typedef hwloc_const_bitmap_t hwloc_const_cpuset_t;

/** \brief A node set is a bitmap whose bits are set according to NUMA
 * memory node physical OS indexes.
 *
 * It may be consulted and modified with the bitmap API as any
 * ::hwloc_bitmap_t (see hwloc/bitmap.h).
 * Each bit may be converted into a NUMA node object using
 * hwloc_get_numanode_obj_by_os_index().
 *
 * When binding memory on a system without any NUMA node
 * (when the whole memory is considered as a single memory bank),
 * the nodeset may be either empty (no memory selected)
 * or full (whole system memory selected).
 *
 * See also \ref hwlocality_helper_nodeset_convert.
 */
typedef hwloc_bitmap_t hwloc_nodeset_t;
/** \brief A non-modifiable ::hwloc_nodeset_t.
 */
typedef hwloc_const_bitmap_t hwloc_const_nodeset_t;

/** @} */


/** \defgroup hwlocality_object_types Object Types
 * @{
 */

/** \brief Type of topology object.
 *
 * \note Do not rely on the ordering or completeness of the values as new ones
 * may be defined in the future!  If you need to compare types, use
 * hwloc_compare_types() instead.
 */
typedef enum {
    /* ***************************************************************
       WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING

       If new enum values are added here, you MUST also go update the
       obj_type_order[] and obj_order_type[] arrays in src/topology.c.

       WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
       *************************************************************** */

  HWLOC_OBJ_SYSTEM,	/**< \brief Whole system (may be a cluster of machines).
  			  * The whole system that is accessible to hwloc.
			  * That may comprise several machines in SSI systems
			  * like Kerrighed.
			  */
  HWLOC_OBJ_MACHINE,	/**< \brief Machine.
			  * The typical root object type.
			  * A set of processors and memory with cache
			  * coherency.
			  */
  HWLOC_OBJ_NUMANODE,	/**< \brief NUMA node.
			  * An object that contains memory that is directly
			  * and byte-accessible to the host processors.
			  * It is usually close to some cores (the corresponding objects
			  * are descendants of the NUMA node object in the hwloc tree).
			  *
			  * There is always at one such object in the topology
			  * even if the machine is not NUMA.
			  */
  HWLOC_OBJ_PACKAGE,	/**< \brief Physical package.
			  * The physical package that usually gets inserted
			  * into a socket on the motherboard.
			  * A processor package usually contains multiple cores.
			  */
  HWLOC_OBJ_CACHE,	/**< \brief Cache.
			  * Can be L1i, L1d, L2, L3, ...
			  */
  HWLOC_OBJ_CORE,	/**< \brief Core.
			  * A computation unit (may be shared by several
			  * logical processors).
			  */
  HWLOC_OBJ_PU,		/**< \brief Processing Unit, or (Logical) Processor.
			  * An execution unit (may share a core with some
			  * other logical processors, e.g. in the case of
			  * an SMT core).
			  *
			  * Objects of this kind are always reported and can
			  * thus be used as fallback when others are not.
			  */

  HWLOC_OBJ_GROUP,	/**< \brief Group objects.
			  * Objects which do not fit in the above but are
			  * detected by hwloc and are useful to take into
			  * account for affinity. For instance, some operating systems
			  * expose their arbitrary processors aggregation this
			  * way.  And hwloc may insert such objects to group
			  * NUMA nodes according to their distances.
			  * See also \ref faq_groups.
			  *
			  * These objects are ignored when they do not bring
			  * any structure.
			  */

  HWLOC_OBJ_MISC,	/**< \brief Miscellaneous objects.
			  * Objects without particular meaning, that can e.g. be
			  * added by the application for its own use, or by hwloc
			  * for miscellaneous objects such as MemoryModule (DIMMs).
			  */

  HWLOC_OBJ_BRIDGE,	/**< \brief Bridge.
			  * Any bridge that connects the host or an I/O bus,
			  * to another I/O bus.
			  * Bridge objects have neither CPU sets nor node sets.
			  * They are not added to the topology unless I/O discovery
			  * is enabled with hwloc_topology_set_flags().
			  */
  HWLOC_OBJ_PCI_DEVICE,	/**< \brief PCI device.
			  * These objects have neither CPU sets nor node sets.
			  * They are not added to the topology unless I/O discovery
			  * is enabled with hwloc_topology_set_flags().
			  */
  HWLOC_OBJ_OS_DEVICE,	/**< \brief Operating system device.
			  * These objects have neither CPU sets nor node sets.
			  * They are not added to the topology unless I/O discovery
			  * is enabled with hwloc_topology_set_flags().
			  */

  HWLOC_OBJ_TYPE_MAX    /**< \private Sentinel value */

    /* ***************************************************************
       WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING

       If new enum values are added here, you MUST also go update the
       obj_type_order[] and obj_order_type[] arrays in src/topology.c.

       WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
       *************************************************************** */
} hwloc_obj_type_t;

/** \brief Cache type. */
typedef enum hwloc_obj_cache_type_e {
  HWLOC_OBJ_CACHE_UNIFIED,      /**< \brief Unified cache. */
  HWLOC_OBJ_CACHE_DATA,         /**< \brief Data cache. */
  HWLOC_OBJ_CACHE_INSTRUCTION   /**< \brief Instruction cache.
				  * Only used when the ::HWLOC_TOPOLOGY_FLAG_ICACHES topology flag is set. */
} hwloc_obj_cache_type_t;

/** \brief Type of one side (upstream or downstream) of an I/O bridge. */
typedef enum hwloc_obj_bridge_type_e {
  HWLOC_OBJ_BRIDGE_HOST,	/**< \brief Host-side of a bridge, only possible upstream. */
  HWLOC_OBJ_BRIDGE_PCI		/**< \brief PCI-side of a bridge. */
} hwloc_obj_bridge_type_t;

/** \brief Type of a OS device. */
typedef enum hwloc_obj_osdev_type_e {
  HWLOC_OBJ_OSDEV_BLOCK,	/**< \brief Operating system block device.
				  * For instance "sda" on Linux. */
  HWLOC_OBJ_OSDEV_GPU,		/**< \brief Operating system GPU device.
				  * For instance ":0.0" for a GL display,
				  * "card0" for a Linux DRM device. */
  HWLOC_OBJ_OSDEV_NETWORK,	/**< \brief Operating system network device.
				  * For instance the "eth0" interface on Linux. */
  HWLOC_OBJ_OSDEV_OPENFABRICS,	/**< \brief Operating system openfabrics device.
				  * For instance the "mlx4_0" InfiniBand HCA,
				  * or "hfi1_0" Omni-Path interface on Linux. */
  HWLOC_OBJ_OSDEV_DMA,		/**< \brief Operating system dma engine device.
				  * For instance the "dma0chan0" DMA channel on Linux. */
  HWLOC_OBJ_OSDEV_COPROC	/**< \brief Operating system co-processor device.
				  * For instance "mic0" for a Xeon Phi (MIC) on Linux,
				  * "opencl0d0" for a OpenCL device,
				  * "cuda0" for a CUDA device. */
} hwloc_obj_osdev_type_t;

/** \brief Compare the depth of two object types
 *
 * Types shouldn't be compared as they are, since newer ones may be added in
 * the future.  This function returns less than, equal to, or greater than zero
 * respectively if \p type1 objects usually include \p type2 objects, are the
 * same as \p type2 objects, or are included in \p type2 objects. If the types
 * can not be compared (because neither is usually contained in the other),
 * ::HWLOC_TYPE_UNORDERED is returned.  Object types containing CPUs can always
 * be compared (usually, a system contains machines which contain nodes which
 * contain packages which contain caches, which contain cores, which contain
 * processors).
 *
 * \note ::HWLOC_OBJ_PU will always be the deepest.
 * \note This does not mean that the actual topology will respect that order:
 * e.g. as of today cores may also contain caches, and packages may also contain
 * nodes. This is thus just to be seen as a fallback comparison method.
 */
HWLOC_DECLSPEC int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) __hwloc_attribute_const;

enum hwloc_compare_types_e {
    HWLOC_TYPE_UNORDERED = INT_MAX	/**< \brief Value returned by hwloc_compare_types() when types can not be compared. \hideinitializer */
};

/** @} */


/** \defgroup hwlocality_objects Object Structure and Attributes
 * @{
 */

union hwloc_obj_attr_u;

/** \brief Object memory */
struct hwloc_obj_memory_s {
  hwloc_uint64_t total_memory; /**< \brief Total memory (in bytes) in this object and its children */
  hwloc_uint64_t local_memory; /**< \brief Local memory (in bytes) */

  /** \brief Size of array \p page_types */
  unsigned page_types_len;
  /** \brief Array of local memory page types, \c NULL if no local memory and \p page_types is 0.
   *
   * The array is sorted by increasing \p size fields.
   * It contains \p page_types_len slots.
   */
  struct hwloc_obj_memory_page_type_s {
    hwloc_uint64_t size;	/**< \brief Size of pages */
    hwloc_uint64_t count;	/**< \brief Number of pages of this size */
  } * page_types;
};

/** \brief Structure of a topology object
 *
 * Applications must not modify any field except hwloc_obj.userdata.
 */
struct hwloc_obj {
  /* physical information */
  hwloc_obj_type_t type;		/**< \brief Type of object */

  unsigned os_index;			/**< \brief OS-provided physical index number.
					 * It is not guaranteed unique across the entire machine,
					 * except for PUs and NUMA nodes.
					 */
  char *name;				/**< \brief Object-specific name if any.
					 * Mostly used for identifying OS devices and Misc objects where
					 * a name string is more useful than numerical indexes.
					 */

  struct hwloc_obj_memory_s memory;	/**< \brief Memory attributes */

  union hwloc_obj_attr_u *attr;		/**< \brief Object type-specific Attributes,
					 * may be \c NULL if no attribute value was found */

  /* global position */
  unsigned depth;			/**< \brief Vertical index in the hierarchy.
					 *
					 * For normal objects, this is the depth of the horizontal level
					 * that contains this object and its cousins of the same type.
					 * If the topology is symmetric, this is equal to the parent depth
					 * plus one, and also equal to the number of parent/child links
					 * from the root object to here.
					 *
					 * For special objects (I/O and Misc) that are not
					 * in the main tree, this is a special negative value that
					 * corresponds to their dedicated level,
					 * see hwloc_get_type_depth() and ::hwloc_get_type_depth_e.
					 * Those special values can be passed to hwloc functions such
					 * hwloc_get_nbobjs_by_depth() as usual.
					 */
  unsigned logical_index;		/**< \brief Horizontal index in the whole list of similar objects,
					 * hence guaranteed unique across the entire machine.
					 * Could be a "cousin_rank" since it's the rank within the "cousin" list below
					 */
  signed os_level;			/**< \brief OS-provided physical level, -1 if unknown or meaningless */

  /* cousins are all objects of the same type (and depth) across the entire topology */
  struct hwloc_obj *next_cousin;	/**< \brief Next object of same type and depth */
  struct hwloc_obj *prev_cousin;	/**< \brief Previous object of same type and depth */

  /* children of the same parent are siblings, even if they may have different type and depth */
  struct hwloc_obj *parent;		/**< \brief Parent, \c NULL if root (system object) */
  unsigned sibling_rank;		/**< \brief Index in parent's \c children[] array */
  struct hwloc_obj *next_sibling;	/**< \brief Next object below the same parent */
  struct hwloc_obj *prev_sibling;	/**< \brief Previous object below the same parent */

  /* children array below this object */
  unsigned arity;			/**< \brief Number of children */
  struct hwloc_obj **children;		/**< \brief Children, \c children[0 .. arity -1] */
  struct hwloc_obj *first_child;	/**< \brief First child */
  struct hwloc_obj *last_child;		/**< \brief Last child */

  /* misc */
  void *userdata;			/**< \brief Application-given private data pointer,
					 * initialized to \c NULL, use it as you wish.
					 * See hwloc_topology_set_userdata_export_callback()
					 * if you wish to export this field to XML. */

  /* cpusets and nodesets */
  hwloc_cpuset_t cpuset;		/**< \brief CPUs covered by this object
                                          *
                                          * This is the set of CPUs for which there are PU objects in the topology
                                          * under this object, i.e. which are known to be physically contained in this
                                          * object and known how (the children path between this object and the PU
                                          * objects).
                                          *
                                          * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of
                                          * these CPUs may be offline, or not allowed for binding, see online_cpuset
                                          * and allowed_cpuset.
                                          *
                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
                                          */
  hwloc_cpuset_t complete_cpuset;       /**< \brief The complete CPU set of logical processors of this object,
                                          *
                                          * This includes not only the same as the cpuset field, but also some CPUs for
                                          * which topology information is unknown or incomplete, and the CPUs that are
                                          * ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set.
                                          * Thus no corresponding PU object may be found in the topology, because the
                                          * precise position is undefined. It is however known that it would be somewhere
                                          * under this object.
                                          *
                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
                                          */
  hwloc_cpuset_t online_cpuset;         /**< \brief The CPU set of online logical processors
                                          *
                                          * This includes the CPUs contained in this object that are online, i.e. draw
                                          * power and can execute threads.  It may however not be allowed to bind to
                                          * them due to administration rules, see allowed_cpuset.
                                          *
                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
                                          */
  hwloc_cpuset_t allowed_cpuset;        /**< \brief The CPU set of allowed logical processors
                                          *
                                          * This includes the CPUs contained in this object which are allowed for
                                          * binding, i.e. passing them to the hwloc binding functions should not return
                                          * permission errors.  This is usually restricted by administration rules.
                                          * Some of them may however be offline so binding to them may still not be
                                          * possible, see online_cpuset.
                                          *
                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
                                          */

  hwloc_nodeset_t nodeset;              /**< \brief NUMA nodes covered by this object or containing this object
                                          *
                                          * This is the set of NUMA nodes for which there are NUMA node objects in the
                                          * topology under or above this object, i.e. which are known to be physically
                                          * contained in this object or containing it and known how (the children path
                                          * between this object and the NUMA node objects).
                                          *
                                          * In the end, these nodes are those that are close to the current object.
                                          *
                                          * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of
                                          * these nodes may not be allowed for allocation, see allowed_nodeset.
                                          *
                                          * If there are no NUMA nodes in the machine, all the memory is close to this
                                          * object, so \p nodeset is full.
                                          *
                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
                                          */
  hwloc_nodeset_t complete_nodeset;     /**< \brief The complete NUMA node set of this object,
                                          *
                                          * This includes not only the same as the nodeset field, but also some NUMA
                                          * nodes for which topology information is unknown or incomplete, and the nodes
                                          * that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set.
                                          * Thus no corresponding NUMA node object may be found in the topology, because the
                                          * precise position is undefined. It is however known that it would be
                                          * somewhere under this object.
                                          *
                                          * If there are no NUMA nodes in the machine, all the memory is close to this
                                          * object, so \p complete_nodeset is full.
                                          *
                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
                                          */
  hwloc_nodeset_t allowed_nodeset;      /**< \brief The set of allowed NUMA memory nodes
                                          *
                                          * This includes the NUMA memory nodes contained in this object which are
                                          * allowed for memory allocation, i.e. passing them to NUMA node-directed
                                          * memory allocation should not return permission errors. This is usually
                                          * restricted by administration rules.
                                          *
                                          * If there are no NUMA nodes in the machine, all the memory is close to this
                                          * object, so \p allowed_nodeset is full.
                                          *
                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
                                          */

  struct hwloc_distances_s **distances;	/**< \brief Distances between all objects at same depth below this object */
  unsigned distances_count;

  struct hwloc_obj_info_s *infos;	/**< \brief Array of stringified info type=name. */
  unsigned infos_count;			/**< \brief Size of infos array. */

  int symmetric_subtree;		/**< \brief Set if the subtree of objects below this object is symmetric,
					  * which means all children and their children have identical subtrees.
					  * If set in the topology root object, lstopo may export the topology
					  * as a synthetic string.
					  */
};
/**
 * \brief Convenience typedef; a pointer to a struct hwloc_obj.
 */
typedef struct hwloc_obj * hwloc_obj_t;

/** \brief Object type-specific Attributes */
union hwloc_obj_attr_u {
  /** \brief Cache-specific Object Attributes */
  struct hwloc_cache_attr_s {
    hwloc_uint64_t size;		  /**< \brief Size of cache in bytes */
    unsigned depth;			  /**< \brief Depth of cache (e.g., L1, L2, ...etc.) */
    unsigned linesize;			  /**< \brief Cache-line size in bytes. 0 if unknown */
    int associativity;			  /**< \brief Ways of associativity,
    					    *  -1 if fully associative, 0 if unknown */
    hwloc_obj_cache_type_t type;          /**< \brief Cache type */
  } cache;
  /** \brief Group-specific Object Attributes */
  struct hwloc_group_attr_s {
    unsigned depth;			  /**< \brief Depth of group object */
  } group;
  /** \brief PCI Device specific Object Attributes */
  struct hwloc_pcidev_attr_s {
    unsigned short domain;
    unsigned char bus, dev, func;
    unsigned short class_id;
    unsigned short vendor_id, device_id, subvendor_id, subdevice_id;
    unsigned char revision;
    float linkspeed; /* in GB/s */
  } pcidev;
  /** \brief Bridge specific Object Attribues */
  struct hwloc_bridge_attr_s {
    union {
      struct hwloc_pcidev_attr_s pci;
    } upstream;
    hwloc_obj_bridge_type_t upstream_type;
    union {
      struct {
	unsigned short domain;
	unsigned char secondary_bus, subordinate_bus;
      } pci;
    } downstream;
    hwloc_obj_bridge_type_t downstream_type;
    unsigned depth;
  } bridge;
  /** \brief OS Device specific Object Attributes */
  struct hwloc_osdev_attr_s {
    hwloc_obj_osdev_type_t type;
  } osdev;
};

/** \brief Distances between objects
 *
 * One object may contain a distance structure describing distances
 * between all its descendants at a given relative depth. If the
 * containing object is the root object of the topology, then the
 * distances are available for all objects in the machine.
 *
 * If the \p latency pointer is not \c NULL, the pointed array contains
 * memory latencies (non-zero values), see below.
 *
 * In the future, some other types of distances may be considered.
 * In these cases, \p latency may be \c NULL.
 */
struct hwloc_distances_s {
  unsigned relative_depth;	/**< \brief Relative depth of the considered objects
				 * below the object containing this distance information. */
  unsigned nbobjs;		/**< \brief Number of objects considered in the matrix.
				 * It is the number of descendant objects at \p relative_depth
				 * below the containing object.
				 * It corresponds to the result of hwloc_get_nbobjs_inside_cpuset_by_depth(). */

  float *latency;		/**< \brief Matrix of latencies between objects, stored as a one-dimension array.
				 * May be \c NULL if the distances considered here are not latencies.
				 *
				 * Unless defined by the user, this currently contains latencies
				 * between NUMA nodes (as reported in the System Locality Distance Information Table
				 * (SLIT) in the ACPI specification), which may or may not be accurate.
				 * It corresponds to the latency for accessing the memory of one node
				 * from a core in another node.
				 *
				 * Values are normalized to get 1.0 as the minimal value in the matrix.
				 * Latency from i-th to j-th object is stored in slot i*nbobjs+j.
				 */
  float latency_max;		/**< \brief The maximal value in the latency matrix. */
  float latency_base;		/**< \brief The multiplier that should be applied to latency matrix
				 * to retrieve the original OS-provided latencies.
				 * Usually 10 on Linux since ACPI SLIT uses 10 for local latency.
				 */
};

/** \brief Object info
 *
 * \sa hwlocality_info_attr
 */
struct hwloc_obj_info_s {
  char *name;	/**< \brief Info name */
  char *value;	/**< \brief Info value */
};

/** @} */


/** \defgroup hwlocality_creation Topology Creation and Destruction
 * @{
 */

struct hwloc_topology;
/** \brief Topology context
 *
 * To be initialized with hwloc_topology_init() and built with hwloc_topology_load().
 */
typedef struct hwloc_topology * hwloc_topology_t;

/** \brief Allocate a topology context.
 *
 * \param[out] topologyp is assigned a pointer to the new allocated context.
 *
 * \return 0 on success, -1 on error.
 */
HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp);

/** \brief Build the actual topology
 *
 * Build the actual topology once initialized with hwloc_topology_init() and
 * tuned with \ref hwlocality_configuration routines.
 * No other routine may be called earlier using this topology context.
 *
 * \param topology is the topology to be loaded with objects.
 *
 * \return 0 on success, -1 on error.
 *
 * \note On failure, the topology is reinitialized. It should be either
 * destroyed with hwloc_topology_destroy() or configured and loaded again.
 *
 * \note This function may be called only once per topology.
 *
 * \note The binding of the current thread or process may temporarily change
 * during this call but it will be restored before it returns.
 *
 * \sa hwlocality_configuration
 */
HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology);

/** \brief Terminate and free a topology context
 *
 * \param topology is the topology to be freed
 */
HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology);

/** \brief Duplicate a topology.
 *
 * The entire topology structure as well as its objects
 * are duplicated into a new one.
 *
 * This is useful for keeping a backup while modifying a topology.
 *
 * \note Object userdata is not duplicated since hwloc does not know what it point to.
 * The objects of both old and new topologies will point to the same userdata.
 */
HWLOC_DECLSPEC int hwloc_topology_dup(hwloc_topology_t *newtopology, hwloc_topology_t oldtopology);

/** \brief Run internal checks on a topology structure
 *
 * The program aborts if an inconsistency is detected in the given topology.
 *
 * \param topology is the topology to be checked
 *
 * \note This routine is only useful to developers.
 *
 * \note The input topology should have been previously loaded with
 * hwloc_topology_load().
 */
HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology);

/** @} */


/** \defgroup hwlocality_configuration Topology Detection Configuration and Query
 *
 * Several functions can optionally be called between hwloc_topology_init() and
 * hwloc_topology_load() to configure how the detection should be performed,
 * e.g. to ignore some objects types, define a synthetic topology, etc.
 *
 * If none of them is called, the default is to detect all the objects of the
 * machine that the caller is allowed to access.
 *
 * This default behavior may also be modified through environment variables
 * if the application did not modify it already.
 * Setting HWLOC_XMLFILE in the environment enforces the discovery from a XML
 * file as if hwloc_topology_set_xml() had been called.
 * HWLOC_FSROOT switches to reading the topology from the specified Linux
 * filesystem root as if hwloc_topology_set_fsroot() had been called.
 * Finally, HWLOC_THISSYSTEM enforces the return value of
 * hwloc_topology_is_thissystem().
 *
 * @{
 */

/** \brief Ignore an object type.
 *
 * Ignore all objects from the given type.
 * The bottom-level type ::HWLOC_OBJ_PU may not be ignored.
 * The top-level object of the hierarchy will never be ignored, even if this function
 * succeeds.
 * Group objects are always ignored if they do not bring any structure
 * since they are designed to add structure to the topology.
 * I/O objects may not be ignored, topology flags should be used to configure
 * their discovery instead.
 */
HWLOC_DECLSPEC int hwloc_topology_ignore_type(hwloc_topology_t topology, hwloc_obj_type_t type);

/** \brief Ignore an object type if it does not bring any structure.
 *
 * Ignore all objects from the given type as long as they do not bring any structure:
 * Each ignored object should have a single children or be the only child of its parent.
 * The bottom-level type ::HWLOC_OBJ_PU may not be ignored.
 * I/O objects may not be ignored, topology flags should be used to configure
 * their discovery instead.
 */
HWLOC_DECLSPEC int hwloc_topology_ignore_type_keep_structure(hwloc_topology_t topology, hwloc_obj_type_t type);

/** \brief Ignore all objects that do not bring any structure.
 *
 * Ignore all objects that do not bring any structure:
 * This is equivalent to calling hwloc_topology_ignore_type_keep_structure()
 * for all object types.
 */
HWLOC_DECLSPEC int hwloc_topology_ignore_all_keep_structure(hwloc_topology_t topology);

/** \brief Flags to be set onto a topology context before load.
 *
 * Flags should be given to hwloc_topology_set_flags().
 * They may also be returned by hwloc_topology_get_flags().
 */
enum hwloc_topology_flags_e {
 /** \brief Detect the whole system, ignore reservations and offline settings.
   *
   * Gather all resources, even if some were disabled by the administrator.
   * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes,
   * and ignore the fact that some resources may be offline.
   *
   * When this flag is not set, PUs that are disallowed are not added to the topology.
   * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed.
   * NUMA nodes are always added but their available memory is set to 0 when disallowed.
   *
   * If the current topology is exported to XML and reimported later, this flag
   * should be set again in the reimported topology so that disallowed resources
   * are reimported as well.
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0),

 /** \brief Assume that the selected backend provides the topology for the
   * system on which we are running.
   *
   * This forces hwloc_topology_is_thissystem() to return 1, i.e. makes hwloc assume that
   * the selected backend provides the topology for the system on which we are running,
   * even if it is not the OS-specific backend but the XML backend for instance.
   * This means making the binding functions actually call the OS-specific
   * system calls and really do binding, while the XML backend would otherwise
   * provide empty hooks just returning success.
   *
   * Setting the environment variable HWLOC_THISSYSTEM may also result in the
   * same behavior.
   *
   * This can be used for efficiency reasons to first detect the topology once,
   * save it to an XML file, and quickly reload it later through the XML
   * backend, but still having binding functions actually do bind.
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1),

  /** \brief Detect PCI devices.
   *
   * By default, I/O devices are ignored. This flag enables I/O device
   * detection using the pci backend. Only the common PCI devices (GPUs,
   * NICs, block devices, ...) and host bridges (objects that connect the host
   * objects to an I/O subsystem) will be added to the topology.
   * Additionally it also enables MemoryModule misc objects.
   * Uncommon devices and other bridges (such as PCI-to-PCI bridges) will be
   * ignored.
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_FLAG_IO_DEVICES = (1UL<<2),

  /** \brief Detect PCI bridges.
   *
   * This flag should be combined with ::HWLOC_TOPOLOGY_FLAG_IO_DEVICES to enable
   * the detection of both common devices and of all useful bridges (bridges that
   * have at least one device behind them).
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_FLAG_IO_BRIDGES = (1UL<<3),

  /** \brief Detect the whole PCI hierarchy.
   *
   * This flag enables detection of all I/O devices (even the uncommon ones
   * such as DMA channels) and bridges (even those that have no device behind
   * them) using the pci backend.
   * This implies ::HWLOC_TOPOLOGY_FLAG_IO_DEVICES.
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_FLAG_WHOLE_IO = (1UL<<4),

  /** \brief Detect instruction caches.
   *
   * This flag enables detection of Instruction caches,
   * instead of only Data and Unified caches.
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_FLAG_ICACHES = (1UL<<5),

  /** \brief Get the set of allowed resources from the local operating system even if the topology was loaded from XML or synthetic description.
   *
   * If the topology was loaded from XML or from a synthetic string,
   * restrict it by applying the current process restrictions such as
   * Linux Cgroup/Cpuset.
   *
   * This is useful when the topology is not loaded directly from
   * the local machine (e.g. for performance reason) and it comes
   * with all resources, while the running process is restricted
   * to only parts of the machine.
   *
   * This flag is ignored unless ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM is
   * also set since the loaded topology must match the underlying machine
   * where restrictions will be gathered from.
   *
   * Setting the environment variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES
   * would result in the same behavior.
   * \hideinitializer
   */
  HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<6)
};

/** \brief Set OR'ed flags to non-yet-loaded topology.
 *
 * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded.
 *
 * If this function is called multiple times, the last invokation will erase
 * and replace the set of flags that was previously set.
 *
 * The flags set in a topology may be retrieved with hwloc_topology_get_flags()
 */
HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags);

/** \brief Get OR'ed flags of a topology.
 *
 * Get the OR'ed set of ::hwloc_topology_flags_e of a topology.
 *
 * \return the flags previously set with hwloc_topology_set_flags().
 */
HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology);

/** \brief Change which process the topology is viewed from
 *
 * On some systems, processes may have different views of the machine, for
 * instance the set of allowed CPUs. By default, hwloc exposes the view from
 * the current process. Calling hwloc_topology_set_pid() permits to make it
 * expose the topology of the machine from the point of view of another
 * process.
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 *
 * \note -1 is returned and errno is set to ENOSYS on platforms that do not
 * support this feature.
 */
HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid);

/** \brief Change the file-system root path when building the topology from sysfs/procfs.
 *
 * On Linux system, use sysfs and procfs files as if they were mounted on the given
 * \p fsroot_path instead of the main file-system root. Setting the environment
 * variable HWLOC_FSROOT may also result in this behavior.
 * Not using the main file-system root causes hwloc_topology_is_thissystem()
 * to return 0.
 *
 * Note that this function does not actually load topology
 * information; it just tells hwloc where to load it from.  You'll
 * still need to invoke hwloc_topology_load() to actually load the
 * topology information.
 *
 * \return -1 with errno set to ENOSYS on non-Linux and on Linux systems that
 * do not support it.
 * \return -1 with the appropriate errno if \p fsroot_path cannot be used.
 *
 * \note For convenience, this backend provides empty binding hooks which just
 * return success.  To have hwloc still actually call OS-specific hooks, the
 * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
 * file is really the underlying system.
 *
 * \note On success, the Linux component replaces the previously enabled
 * component (if any), but the topology is not actually modified until
 * hwloc_topology_load().
 */
HWLOC_DECLSPEC int hwloc_topology_set_fsroot(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict fsroot_path);

/** \brief Enable synthetic topology.
 *
 * Gather topology information from the given \p description,
 * a space-separated string of numbers describing
 * the arity of each level.
 * Each number may be prefixed with a type and a colon to enforce the type
 * of a level.  If only some level types are enforced, hwloc will try to
 * choose the other types according to usual topologies, but it may fail
 * and you may have to specify more level types manually.
 * See also the \ref synthetic.
 *
 * If \p description was properly parsed and describes a valid topology
 * configuration, this function returns 0.
 * Otherwise -1 is returned and errno is set to EINVAL.
 *
 * Note that this function does not actually load topology
 * information; it just tells hwloc where to load it from.  You'll
 * still need to invoke hwloc_topology_load() to actually load the
 * topology information.
 *
 * \note For convenience, this backend provides empty binding hooks which just
 * return success.
 *
 * \note On success, the synthetic component replaces the previously enabled
 * component (if any), but the topology is not actually modified until
 * hwloc_topology_load().
 */
HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict description);

/** \brief Enable XML-file based topology.
 *
 * Gather topology information from the XML file given at \p xmlpath.
 * Setting the environment variable HWLOC_XMLFILE may also result in this behavior.
 * This file may have been generated earlier with hwloc_topology_export_xml()
 * or lstopo file.xml.
 *
 * Note that this function does not actually load topology
 * information; it just tells hwloc where to load it from.  You'll
 * still need to invoke hwloc_topology_load() to actually load the
 * topology information.
 *
 * \return -1 with errno set to EINVAL on failure to read the XML file.
 *
 * \note See also hwloc_topology_set_userdata_import_callback()
 * for importing application-specific object userdata.
 *
 * \note For convenience, this backend provides empty binding hooks which just
 * return success.  To have hwloc still actually call OS-specific hooks, the
 * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
 * file is really the underlying system.
 *
 * \note On success, the XML component replaces the previously enabled
 * component (if any), but the topology is not actually modified until
 * hwloc_topology_load().
 */
HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath);

/** \brief Enable XML based topology using a memory buffer (instead of
 * a file, as with hwloc_topology_set_xml()).
 *
 * Gather topology information from the XML memory buffer given at \p
 * buffer and of length \p size.  This buffer may have been filled
 * earlier with hwloc_topology_export_xmlbuffer().
 *
 * Note that this function does not actually load topology
 * information; it just tells hwloc where to load it from.  You'll
 * still need to invoke hwloc_topology_load() to actually load the
 * topology information.
 *
 * \return -1 with errno set to EINVAL on failure to read the XML buffer.
 *
 * \note See also hwloc_topology_set_userdata_import_callback()
 * for importing application-specific object userdata.
 *
 * \note For convenience, this backend provides empty binding hooks which just
 * return success.  To have hwloc still actually call OS-specific hooks, the
 * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
 * file is really the underlying system.
 *
 * \note On success, the XML component replaces the previously enabled
 * component (if any), but the topology is not actually modified until
 * hwloc_topology_load().
 */
HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size);

/** \brief Prepare the topology for custom assembly.
 *
 * The topology then contains a single root object.
 * It must then be built by inserting other topologies with
 * hwloc_custom_insert_topology() or single objects with
 * hwloc_custom_insert_group_object_by_parent().
 * hwloc_topology_load() must be called to finalize the new
 * topology as usual.
 *
 * \note If nothing is inserted in the topology,
 * hwloc_topology_load() will fail with errno set to EINVAL.
 *
 * \note The cpuset and nodeset of the root object are NULL because
 * these sets are meaningless when assembling multiple topologies.
 *
 * \note On success, the custom component replaces the previously enabled
 * component (if any), but the topology is not actually modified until
 * hwloc_topology_load().
 */
HWLOC_DECLSPEC int hwloc_topology_set_custom(hwloc_topology_t topology);

/** \brief Provide a distance matrix.
 *
 * Provide the matrix of distances between a set of objects of the given type.
 * \p nbobjs must be at least 2.
 * The set may or may not contain all the existing objects of this type.
 * The objects are specified by their OS/physical index in the \p os_index
 * array. The \p distances matrix follows the same order.
 * The distance from object i to object j in the i*nbobjs+j.
 *
 * A single latency matrix may be defined for each type.
 * If another distance matrix already exists for the given type,
 * either because the user specified it or because the OS offers it,
 * it will be replaced by the given one.
 * If \p nbobjs is \c 0, \p os_index is \c NULL and \p distances is \c NULL,
 * the existing distance matrix for the given type is removed.
 *
 * \note Distance matrices are ignored in multi-node topologies.
 */
HWLOC_DECLSPEC int hwloc_topology_set_distance_matrix(hwloc_topology_t __hwloc_restrict topology,
						      hwloc_obj_type_t type, unsigned nbobjs,
						      unsigned *os_index, float *distances);

/** \brief Does the topology context come from this system?
 *
 * \return 1 if this topology context was built using the system
 * running this program.
 * \return 0 instead (for instance if using another file-system root,
 * a XML topology file, or a synthetic topology).
 */
HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t  __hwloc_restrict topology) __hwloc_attribute_pure;

/** \brief Flags describing actual discovery support for this topology. */
struct hwloc_topology_discovery_support {
  /** \brief Detecting the number of PU objects is supported. */
  unsigned char pu;
};

/** \brief Flags describing actual PU binding support for this topology.
 *
 * A flag may be set even if the feature isn't supported in all cases
 * (e.g. binding to random sets of non-contiguous objects).
 */
struct hwloc_topology_cpubind_support {
  /** Binding the whole current process is supported.  */
  unsigned char set_thisproc_cpubind;
  /** Getting the binding of the whole current process is supported.  */
  unsigned char get_thisproc_cpubind;
  /** Binding a whole given process is supported.  */
  unsigned char set_proc_cpubind;
  /** Getting the binding of a whole given process is supported.  */
  unsigned char get_proc_cpubind;
  /** Binding the current thread only is supported.  */
  unsigned char set_thisthread_cpubind;
  /** Getting the binding of the current thread only is supported.  */
  unsigned char get_thisthread_cpubind;
  /** Binding a given thread only is supported.  */
  unsigned char set_thread_cpubind;
  /** Getting the binding of a given thread only is supported.  */
  unsigned char get_thread_cpubind;
  /** Getting the last processors where the whole current process ran is supported */
  unsigned char get_thisproc_last_cpu_location;
  /** Getting the last processors where a whole process ran is supported */
  unsigned char get_proc_last_cpu_location;
  /** Getting the last processors where the current thread ran is supported */
  unsigned char get_thisthread_last_cpu_location;
};

/** \brief Flags describing actual memory binding support for this topology.
 *
 * A flag may be set even if the feature isn't supported in all cases
 * (e.g. binding to random sets of non-contiguous objects).
 */
struct hwloc_topology_membind_support {
  /** Binding the whole current process is supported.  */
  unsigned char set_thisproc_membind;
  /** Getting the binding of the whole current process is supported.  */
  unsigned char get_thisproc_membind;
  /** Binding a whole given process is supported.  */
  unsigned char set_proc_membind;
  /** Getting the binding of a whole given process is supported.  */
  unsigned char get_proc_membind;
  /** Binding the current thread only is supported.  */
  unsigned char set_thisthread_membind;
  /** Getting the binding of the current thread only is supported.  */
  unsigned char get_thisthread_membind;
  /** Binding a given memory area is supported. */
  unsigned char set_area_membind;
  /** Getting the binding of a given memory area is supported.  */
  unsigned char get_area_membind;
  /** Allocating a bound memory area is supported. */
  unsigned char alloc_membind;
  /** First-touch policy is supported. */
  unsigned char firsttouch_membind;
  /** Bind policy is supported. */
  unsigned char bind_membind;
  /** Interleave policy is supported. */
  unsigned char interleave_membind;
  /** Replication policy is supported. */
  unsigned char replicate_membind;
  /** Next-touch migration policy is supported. */
  unsigned char nexttouch_membind;
  /** Migration flags is supported. */
  unsigned char migrate_membind;
  /** Getting the last NUMA nodes where a memory area was allocated is supported */
  unsigned char get_area_memlocation;
};

/** \brief Set of flags describing actual support for this topology.
 *
 * This is retrieved with hwloc_topology_get_support() and will be valid until
 * the topology object is destroyed.  Note: the values are correct only after
 * discovery.
 */
struct hwloc_topology_support {
  struct hwloc_topology_discovery_support *discovery;
  struct hwloc_topology_cpubind_support *cpubind;
  struct hwloc_topology_membind_support *membind;
};

/** \brief Retrieve the topology support.
 *
 * Each flag indicates whether a feature is supported.
 * If set to 0, the feature is not supported.
 * If set to 1, the feature is supported, but the corresponding
 * call may still fail in some corner cases.
 *
 * These features are also listed by hwloc-info \--support
 */
HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology);

/** \brief Set the topology-specific userdata pointer.
 *
 * Each topology may store one application-given private data pointer.
 * It is initialized to \c NULL.
 * hwloc will never modify it.
 *
 * Use it as you wish, after hwloc_topology_init() and until hwloc_topolog_destroy().
 *
 * This pointer is not exported to XML.
 */
HWLOC_DECLSPEC void hwloc_topology_set_userdata(hwloc_topology_t topology, const void *userdata);

/** \brief Retrieve the topology-specific userdata pointer.
 *
 * Retrieve the application-given private data pointer that was
 * previously set with hwloc_topology_set_userdata().
 */
HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology);

/** @} */


/** \defgroup hwlocality_levels Object levels, depths and types
 * @{
 *
 * Be sure to see the figure in \ref termsanddefs that shows a
 * complete topology tree, including depths, child/sibling/cousin
 * relationships, and an example of an asymmetric topology where one
 * package has fewer caches than its peers.
 */

/** \brief Get the depth of the hierarchical tree of objects.
 *
 * This is the depth of ::HWLOC_OBJ_PU objects plus one.
 *
 * \note I/O and Misc objects are ignored when computing the depth
 * of the tree (they are placed on special levels, or none).
 */
HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure;

/** \brief Returns the depth of objects of type \p type.
 *
 * If no object of this type is present on the underlying architecture, or if
 * the OS doesn't provide this kind of information, the function returns
 * ::HWLOC_TYPE_DEPTH_UNKNOWN.
 *
 * If type is absent but a similar type is acceptable, see also
 * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth().
 *
 * If some objects of the given type exist in different levels,
 * for instance L1 and L2 caches, or L1i and L1d caches,
 * the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE.
 * See hwloc_get_cache_type_depth() in hwloc/helper.h to better handle this
 * case.
 *
 * If an I/O object type is given, the function returns a virtual value
 * because I/O objects are stored in special levels that are not CPU-related.
 * This virtual depth may be passed to other hwloc functions such as
 * hwloc_get_obj_by_depth() but it should not be considered as an actual
 * depth by the application. In particular, it should not be compared with
 * any other object depth or with the entire topology depth.
 *
 * If ::HWLOC_OBJ_MISC is given, the function returns ::HWLOC_TYPE_DEPTH_UNKNOWN.
 */
HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type);

enum hwloc_get_type_depth_e {
    HWLOC_TYPE_DEPTH_UNKNOWN = -1,    /**< \brief No object of given type exists in the topology. \hideinitializer */
    HWLOC_TYPE_DEPTH_MULTIPLE = -2,   /**< \brief Objects of given type exist at different depth in the topology. \hideinitializer */
    HWLOC_TYPE_DEPTH_BRIDGE = -3,     /**< \brief Virtual depth for bridge object level. \hideinitializer */
    HWLOC_TYPE_DEPTH_PCI_DEVICE = -4, /**< \brief Virtual depth for PCI device object level. \hideinitializer */
    HWLOC_TYPE_DEPTH_OS_DEVICE = -5   /**< \brief Virtual depth for software device object level. \hideinitializer */
};

/** \brief Returns the depth of objects of type \p type or below
 *
 * If no object of this type is present on the underlying architecture, the
 * function returns the depth of the first "present" object typically found
 * inside \p type.
 *
 * If some objects of the given type exist in different levels, for instance
 * L1 and L2 caches, the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE.
 */
static __hwloc_inline int
hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;

/** \brief Returns the depth of objects of type \p type or above
 *
 * If no object of this type is present on the underlying architecture, the
 * function returns the depth of the first "present" object typically
 * containing \p type.
 *
 * If some objects of the given type exist in different levels, for instance
 * L1 and L2 caches, the function returns ::HWLOC_TYPE_DEPTH_MULTIPLE.
 */
static __hwloc_inline int
hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;

/** \brief Returns the type of objects at depth \p depth.
 *
 * \p depth should between 0 and hwloc_topology_get_depth()-1.
 *
 * \return -1 if depth \p depth does not exist.
 */
HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure;

/** \brief Returns the width of level at depth \p depth.
 */
HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure;

/** \brief Returns the width of level type \p type
 *
 * If no object for that type exists, 0 is returned.
 * If there are several levels with objects of that type, -1 is returned.
 */
static __hwloc_inline int
hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;

/** \brief Returns the top-object of the topology-tree.
 *
 * Its type is typically ::HWLOC_OBJ_MACHINE but it could be different
 * for complex topologies.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure;

/** \brief Returns the topology object at logical index \p idx from depth \p depth */
HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, unsigned depth, unsigned idx) __hwloc_attribute_pure;

/** \brief Returns the topology object at logical index \p idx with type \p type
 *
 * If no object for that type exists, \c NULL is returned.
 * If there are several levels with objects of that type, \c NULL is returned
 * and ther caller may fallback to hwloc_get_obj_by_depth().
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure;

/** \brief Returns the next object at depth \p depth.
 *
 * If \p prev is \c NULL, return the first object at depth \p depth.
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev);

/** \brief Returns the next object of type \p type.
 *
 * If \p prev is \c NULL, return the first object at type \p type.  If
 * there are multiple or no depth for given type, return \c NULL and
 * let the caller fallback to hwloc_get_next_obj_by_depth().
 */
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type,
			    hwloc_obj_t prev);

/** @} */


/** \defgroup hwlocality_object_strings Converting between Object Types, Sets and Attributes, and Strings
 * @{
 */

/** \brief Return a constant stringified object type.
 *
 * This function is the basic way to convert a generic type into a string.
 *
 * hwloc_obj_type_snprintf() may return a more precise output for a specific
 * object, but it requires the caller to provide the output buffer.
 */
HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const;

/** \brief Stringify the type of a given topology object into a human-readable form.
 *
 * Contrary to hwloc_obj_type_string(), this function includes object-specific
 * attributes (such as the Group depth, the Bridge type, or OS device type)
 * in the output, and it requires the caller to provide the output buffer.
 *
 * The output is guaranteed to be the same for all objects of a same topology level.
 *
 * If \p size is 0, \p string may safely be \c NULL.
 *
 * \return the number of character that were actually written if not truncating,
 * or that would have been written (not including the ending \\0).
 */
HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj,
				   int verbose);

/** \brief Stringify the attributes of a given topology object into a human-readable form.
 *
 * Attribute values are separated by \p separator.
 *
 * Only the major attributes are printed in non-verbose mode.
 *
 * If \p size is 0, \p string may safely be \c NULL.
 *
 * \return the number of character that were actually written if not truncating,
 * or that would have been written (not including the ending \\0).
 */
HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * __hwloc_restrict separator,
				   int verbose);

/** \brief Stringify the cpuset containing a set of objects.
 *
 * If \p size is 0, \p string may safely be \c NULL.
 *
 * \return the number of character that were actually written if not truncating,
 * or that would have been written (not including the ending \\0).
 */
HWLOC_DECLSPEC int hwloc_obj_cpuset_snprintf(char * __hwloc_restrict str, size_t size, size_t nobj, const hwloc_obj_t * __hwloc_restrict objs);

/** \brief Return an object type and attributes from a type string.
 *
 * Convert strings such as "Package" or "Cache" into the corresponding types.
 * Matching is case-insensitive, and only the first letters are actually
 * required to match.
 *
 * This function is guaranteed to match any string returned by hwloc_obj_type_string()
 * or hwloc_obj_type_snprintf().
 *
 * Types that have specific attributes, for instance caches and groups,
 * may be returned in \p depthattrp and \p typeattrp. They are ignored
 * when these pointers are \c NULL.
 *
 * For instance "L2i" or "L2iCache" would return
 * type HWLOC_OBJ_CACHE in \p typep, 2 in \p depthattrp,
 * and HWLOC_OBJ_CACHE_TYPE_INSTRUCTION in \p typeattrp
 * (this last pointer should point to a hwloc_obj_cache_type_t).
 * "Group3" would return type HWLOC_OBJ_GROUP type and 3 in \p depthattrp.
 * Attributes that are not specified in the string (for instance "Group"
 * without a depth, or "L2Cache" without a cache type) are set to -1.
 *
 * \p typeattrp is only filled if the size specified in \p typeattrsize
 * is large enough. It is currently only used for caches, and the required
 * size is at least the size of hwloc_obj_cache_type_t.
 *
 * \return 0 if a type was correctly identified, otherwise -1.
 *
 * \note This is an extended version of the now deprecated hwloc_obj_type_of_string()
 */
HWLOC_DECLSPEC int hwloc_obj_type_sscanf(const char *string,
					 hwloc_obj_type_t *typep,
					 int *depthattrp,
					 void *typeattrp, size_t typeattrsize);

/** @} */


/** \defgroup hwlocality_info_attr Consulting and Adding Key-Value Info Attributes
 *
 * @{
 */

/** \brief Search the given key name in object infos and return the corresponding value.
 *
 * If multiple keys match the given name, only the first one is returned.
 *
 * \return \c NULL if no such key exists.
 */
static __hwloc_inline const char *
hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure;

/** \brief Add the given info name and value pair to the given object.
 *
 * The info is appended to the existing info array even if another key
 * with the same name already exists.
 *
 * The input strings are copied before being added in the object infos.
 *
 * \note This function may be used to enforce object colors in the lstopo
 * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb"
 * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details.
 *
 * \note If \p value contains some non-printable characters, they will
 * be dropped when exporting to XML, see hwloc_topology_export_xml().
 */
HWLOC_DECLSPEC void hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value);

/** @} */


/** \defgroup hwlocality_cpubinding CPU binding
 *
 * It is often useful to call hwloc_bitmap_singlify() first so that a single CPU
 * remains in the set. This way, the process will not even migrate between
 * different CPUs inside the given set.
 * Some operating systems also only support that kind of binding.
 *
 * Some operating systems do not provide all hwloc-supported
 * mechanisms to bind processes, threads, etc.
 * hwloc_topology_get_support() may be used to query about the actual CPU
 * binding support in the currently used operating system.
 *
 * When the requested binding operation is not available and the
 * ::HWLOC_CPUBIND_STRICT flag was passed, the function returns -1.
 * \p errno is set to \c ENOSYS when it is not possible to bind the requested kind of object
 * processes/threads. errno is set to \c EXDEV when the requested cpuset
 * can not be enforced (e.g. some systems only allow one CPU, and some
 * other systems only allow one NUMA node).
 *
 * If ::HWLOC_CPUBIND_STRICT was not passed, the function may fail as well,
 * or the operating system may use a slightly different operation
 * (with side-effects, smaller binding set, etc.)
 * when the requested operation is not exactly supported.
 *
 * The most portable version that should be preferred over the others,
 * whenever possible, is the following one which just binds the current program,
 * assuming it is single-threaded:
 *
 * \code
 * hwloc_set_cpubind(topology, set, 0),
 * \endcode
 *
 * If the program may be multithreaded, the following one should be preferred
 * to only bind the current thread:
 *
 * \code
 * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD),
 * \endcode
 *
 * \sa Some example codes are available under doc/examples/ in the source tree.
 *
 * \note To unbind, just call the binding function with either a full cpuset or
 * a cpuset equal to the system cpuset.
 *
 * \note On some operating systems, CPU binding may have effects on memory binding, see
 * ::HWLOC_CPUBIND_NOMEMBIND
 *
 * \note Running lstopo \--top or hwloc-ps can be a very convenient tool to check
 * how binding actually happened.
 * @{
 */

/** \brief Process/Thread binding flags.
 *
 * These bit flags can be used to refine the binding policy.
 *
 * The default (0) is to bind the current process, assumed to be
 * single-threaded, in a non-strict way.  This is the most portable
 * way to bind as all operating systems usually provide it.
 *
 * \note Not all systems support all kinds of binding.  See the
 * "Detailed Description" section of \ref hwlocality_cpubinding for a
 * description of errors that can occur.
 */
typedef enum {
  /** \brief Bind all threads of the current (possibly) multithreaded process.
   * \hideinitializer */
  HWLOC_CPUBIND_PROCESS = (1<<0),

  /** \brief Bind current thread of current process.
   * \hideinitializer */
  HWLOC_CPUBIND_THREAD = (1<<1),

  /** \brief Request for strict binding from the OS.
   *
   * By default, when the designated CPUs are all busy while other
   * CPUs are idle, operating systems may execute the thread/process
   * on those other CPUs instead of the designated CPUs, to let them
   * progress anyway.  Strict binding means that the thread/process
   * will _never_ execute on other cpus than the designated CPUs, even
   * when those are busy with other tasks and other CPUs are idle.
   *
   * \note Depending on the operating system, strict binding may not
   * be possible (e.g., the OS does not implement it) or not allowed
   * (e.g., for an administrative reasons), and the function will fail
   * in that case.
   *
   * When retrieving the binding of a process, this flag checks
   * whether all its threads  actually have the same binding. If the
   * flag is not given, the binding of each thread will be
   * accumulated.
   *
   * \note This flag is meaningless when retrieving the binding of a
   * thread.
   * \hideinitializer
   */
  HWLOC_CPUBIND_STRICT = (1<<2),

  /** \brief Avoid any effect on memory binding
   *
   * On some operating systems, some CPU binding function would also
   * bind the memory on the corresponding NUMA node.  It is often not
   * a problem for the application, but if it is, setting this flag
   * will make hwloc avoid using OS functions that would also bind
   * memory.  This will however reduce the support of CPU bindings,
   * i.e. potentially return -1 with errno set to ENOSYS in some
   * cases.
   *
   * This flag is only meaningful when used with functions that set
   * the CPU binding.  It is ignored when used with functions that get
   * CPU binding information.
   * \hideinitializer
   */
  HWLOC_CPUBIND_NOMEMBIND = (1<<3)
} hwloc_cpubind_flags_t;

/** \brief Bind current process or thread on cpus given in physical bitmap \p set.
 *
 * \return -1 with errno set to ENOSYS if the action is not supported
 * \return -1 with errno set to EXDEV if the binding cannot be enforced
 */
HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);

/** \brief Get current process or thread binding.
 *
 * Writes into \p set the physical cpuset which the process or thread (according to \e
 * flags) was last bound to.
 */
HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);

/** \brief Bind a process \p pid on cpus given in physical bitmap \p set.
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 *
 * \note As a special case on Linux, if a tid (thread ID) is supplied
 * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags,
 * the binding is applied to that specific thread.
 *
 * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags.
 */
HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags);

/** \brief Get the current physical binding of process \p pid.
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 *
 * \note As a special case on Linux, if a tid (thread ID) is supplied
 * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags,
 * the binding for that specific thread is returned.
 *
 * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags.
 */
HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);

#ifdef hwloc_thread_t
/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set.
 *
 * \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 *
 * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags.
 */
HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_cpuset_t set, int flags);
#endif

#ifdef hwloc_thread_t
/** \brief Get the current physical binding of thread \p tid.
 *
 * \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 *
 * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags.
 */
HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_cpuset_t set, int flags);
#endif

/** \brief Get the last physical CPU where the current process or thread ran.
 *
 * The operating system may move some tasks from one processor
 * to another at any time according to their binding,
 * so this function may return something that is already
 * outdated.
 *
 * \p flags can include either ::HWLOC_CPUBIND_PROCESS or ::HWLOC_CPUBIND_THREAD to
 * specify whether the query should be for the whole process (union of all CPUs
 * on which all threads are running), or only the current thread. If the
 * process is single-threaded, flags can be set to zero to let hwloc use
 * whichever method is available on the underlying OS.
 */
HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);

/** \brief Get the last physical CPU where a process ran.
 *
 * The operating system may move some tasks from one processor
 * to another at any time according to their binding,
 * so this function may return something that is already
 * outdated.
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 *
 * \note As a special case on Linux, if a tid (thread ID) is supplied
 * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags,
 * the last CPU location of that specific thread is returned.
 *
 * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags.
 */
HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);

/** @} */


/** \defgroup hwlocality_membinding Memory binding
 *
 * Memory binding can be done three ways:
 *
 * - explicit memory allocation thanks to hwloc_alloc_membind() and friends:
 *   the binding will have effect on the memory allocated by these functions.
 * - implicit memory binding through binding policy: hwloc_set_membind() and
 *   friends only define the current policy of the process, which will be
 *   applied to the subsequent calls to malloc() and friends.
 * - migration of existing memory ranges, thanks to hwloc_set_area_membind()
 *   and friends, which move already-allocated data.
 *
 * Not all operating systems support all three ways.
 * hwloc_topology_get_support() may be used to query about the actual memory
 * binding support in the currently used operating system.
 *
 * When the requested binding operation is not available and the
 * ::HWLOC_MEMBIND_STRICT flag was passed, the function returns -1.
 * \p errno will be set to \c ENOSYS when the system does support
 * the specified action or policy
 * (e.g., some systems only allow binding memory on a per-thread
 * basis, whereas other systems only allow binding memory for all
 * threads in a process).
 * \p errno will be set to EXDEV when the requested set can not be enforced
 * (e.g., some systems only allow binding memory to a single NUMA node).
 *
 * If ::HWLOC_MEMBIND_STRICT was not passed, the function may fail as well,
 * or the operating system may use a slightly different operation
 * (with side-effects, smaller binding set, etc.)
 * when the requested operation is not exactly supported.
 *
 * The most portable form that should be preferred over the others
 * whenever possible is as follows.
 * It allocates some memory hopefully bound to the specified set.
 * To do so, hwloc will possibly have to change the current memory
 * binding policy in order to actually get the memory bound, if the OS
 * does not provide any other way to simply allocate bound memory
 * without changing the policy for all allocations. That is the
 * difference with hwloc_alloc_membind(), which will never change the
 * current memory binding policy.
 *
 * \code
 * hwloc_alloc_membind_policy(topology, size, set,
 *                            HWLOC_MEMBIND_BIND, 0);
 * \endcode
 *
 * Each hwloc memory binding function is available in two forms: one
 * that takes a bitmap argument (a CPU set by default, or a NUMA memory
 * node set if the flag ::HWLOC_MEMBIND_BYNODESET is specified),
 * and another one (whose name ends with _nodeset) that always takes
 * a NUMA memory node set.
 * See \ref hwlocality_object_sets and \ref hwlocality_bitmap for a
 * discussion of CPU sets and NUMA memory node sets.
 * It is also possible to convert between CPU set and node set using
 * hwloc_cpuset_to_nodeset() or hwloc_cpuset_from_nodeset().
 *
 * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes.
 * Binding by nodeset should therefore be preferred whenever possible.
 *
 * \sa Some example codes are available under doc/examples/ in the source tree.
 *
 * \note On some operating systems, memory binding affects the CPU
 * binding; see ::HWLOC_MEMBIND_NOCPUBIND
 * @{
 */

/** \brief Memory binding policy.
 *
 * These constants can be used to choose the binding policy.  Only one policy can
 * be used at a time (i.e., the values cannot be OR'ed together).
 *
 * Not all systems support all kinds of binding.
 * hwloc_topology_get_support() may be used to query about the actual memory
 * binding policy support in the currently used operating system.
 * See the "Detailed Description" section of \ref hwlocality_membinding
 * for a description of errors that can occur.
 */
typedef enum {
  /** \brief Reset the memory allocation policy to the system default.
   * Depending on the operating system, this may correspond to
   * ::HWLOC_MEMBIND_FIRSTTOUCH (Linux),
   * or ::HWLOC_MEMBIND_BIND (AIX, HP-UX, OSF, Solaris, Windows).
   * This policy is never returned by get membind functions when running
   * on normal machines.
   * It is only returned when binding hooks are empty because the topology
   * was loaded from XML, or HWLOC_THISSYSTEM=0, etc.
   * \hideinitializer */
  HWLOC_MEMBIND_DEFAULT =	0,

  /** \brief Allocate memory
   * but do not immediately bind it to a specific locality. Instead,
   * each page in the allocation is bound only when it is first
   * touched. Pages are individually bound to the local NUMA node of
   * the first thread that touches it. If there is not enough memory
   * on the node, allocation may be done in the specified nodes
   * before allocating on other nodes.
   * \hideinitializer */
  HWLOC_MEMBIND_FIRSTTOUCH =	1,

  /** \brief Allocate memory on the specified nodes.
   * \hideinitializer */
  HWLOC_MEMBIND_BIND =		2,

  /** \brief Allocate memory on the given nodes in an interleaved
   * / round-robin manner.  The precise layout of the memory across
   * multiple NUMA nodes is OS/system specific. Interleaving can be
   * useful when threads distributed across the specified NUMA nodes
   * will all be accessing the whole memory range concurrently, since
   * the interleave will then balance the memory references.
   * \hideinitializer */
  HWLOC_MEMBIND_INTERLEAVE =	3,

  /** \brief Replicate memory on the given nodes; reads from this
   * memory will attempt to be serviced from the NUMA node local to
   * the reading thread. Replicating can be useful when multiple
   * threads from the specified NUMA nodes will be sharing the same
   * read-only data.
   *
   * This policy can only be used with existing memory allocations
   * (i.e., the hwloc_set_*membind*() functions); it cannot be used
   * with functions that allocate new memory (i.e., the hwloc_alloc*()
   * functions).
   * \hideinitializer */
  HWLOC_MEMBIND_REPLICATE =	4,

  /** \brief For each page bound with this policy, by next time
   * it is touched (and next time only), it is moved from its current
   * location to the local NUMA node of the thread where the memory
   * reference occurred (if it needs to be moved at all).
   * \hideinitializer */
  HWLOC_MEMBIND_NEXTTOUCH =	5,

  /** \brief Returned by get_membind() functions when multiple
   * threads or parts of a memory area have differing memory binding
   * policies.
   * \hideinitializer */
  HWLOC_MEMBIND_MIXED = -1
} hwloc_membind_policy_t;

/** \brief Memory binding flags.
 *
 * These flags can be used to refine the binding policy.
 * All flags can be logically OR'ed together with the exception of
 * ::HWLOC_MEMBIND_PROCESS and ::HWLOC_MEMBIND_THREAD;
 * these two flags are mutually exclusive.
 *
 * Not all systems support all kinds of binding.
 * hwloc_topology_get_support() may be used to query about the actual memory
 * binding support in the currently used operating system.
 * See the "Detailed Description" section of \ref hwlocality_membinding
 * for a description of errors that can occur.
 */
typedef enum {
  /** \brief Set policy for all threads of the specified (possibly
   * multithreaded) process.  This flag is mutually exclusive with
   * ::HWLOC_MEMBIND_THREAD.
   * \hideinitializer */
  HWLOC_MEMBIND_PROCESS =       (1<<0),

 /** \brief Set policy for a specific thread of the current process.
  * This flag is mutually exclusive with ::HWLOC_MEMBIND_PROCESS.
  * \hideinitializer */
  HWLOC_MEMBIND_THREAD =        (1<<1),

 /** Request strict binding from the OS.  The function will fail if
  * the binding can not be guaranteed / completely enforced.
  *
  * This flag has slightly different meanings depending on which
  * function it is used with.
  * \hideinitializer  */
  HWLOC_MEMBIND_STRICT =        (1<<2),

 /** \brief Migrate existing allocated memory.  If the memory cannot
  * be migrated and the ::HWLOC_MEMBIND_STRICT flag is passed, an error
  * will be returned.
  * \hideinitializer  */
  HWLOC_MEMBIND_MIGRATE =       (1<<3),

  /** \brief Avoid any effect on CPU binding.
   *
   * On some operating systems, some underlying memory binding
   * functions also bind the application to the corresponding CPU(s).
   * Using this flag will cause hwloc to avoid using OS functions that
   * could potentially affect CPU bindings.  Note, however, that using
   * NOCPUBIND may reduce hwloc's overall memory binding
   * support. Specifically: some of hwloc's memory binding functions
   * may fail with errno set to ENOSYS when used with NOCPUBIND.
   * \hideinitializer
   */
  HWLOC_MEMBIND_NOCPUBIND =     (1<<4),

  /** \brief Consider the bitmap argument as a nodeset.
   *
   * Functions whose name ends with _nodeset() take a nodeset argument.
   * Other functions take a bitmap argument that is considered a nodeset
   * if this flag is given, or a cpuset otherwise.
   *
   * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes.
   * Binding by nodeset should therefore be preferred whenever possible.
   * \hideinitializer
   */
  HWLOC_MEMBIND_BYNODESET =     (1<<5)
} hwloc_membind_flags_t;

/** \brief Set the default memory binding policy of the current
 * process or thread to prefer the NUMA node(s) specified by \p nodeset
 *
 * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is
 * specified, the current process is assumed to be single-threaded.
 * This is the most portable form as it permits hwloc to use either
 * process-based OS functions or thread-based OS functions, depending
 * on which are available.
 *
 * \return -1 with errno set to ENOSYS if the action is not supported
 * \return -1 with errno set to EXDEV if the binding cannot be enforced
 */
HWLOC_DECLSPEC int hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);

/** \brief Set the default memory binding policy of the current
 * process or thread to prefer the NUMA node(s) specified by \p set
 *
 * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is
 * specified, the current process is assumed to be single-threaded.
 * This is the most portable form as it permits hwloc to use either
 * process-based OS functions or thread-based OS functions, depending
 * on which are available.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 *
 * \return -1 with errno set to ENOSYS if the action is not supported
 * \return -1 with errno set to EXDEV if the binding cannot be enforced
 */
HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags);

/** \brief Query the default memory binding policy and physical locality of the
 * current process or thread.
 *
 * This function has two output parameters: \p nodeset and \p policy.
 * The values returned in these parameters depend on both the \p flags
 * passed in and the current memory binding policies and nodesets in
 * the queried target.
 *
 * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query
 * target is the current policies and nodesets for all the threads in
 * the current process.  Passing ::HWLOC_MEMBIND_THREAD specifies that
 * the query target is the current policy and nodeset for only the
 * thread invoking this function.
 *
 * If neither of these flags are passed (which is the most portable
 * method), the process is assumed to be single threaded.  This allows
 * hwloc to use either process-based OS functions or thread-based OS
 * functions, depending on which are available.
 *
 * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS
 * is also specified.  In this case, hwloc will check the default
 * memory policies and nodesets for all threads in the process.  If
 * they are not identical, -1 is returned and errno is set to EXDEV.
 * If they are identical, the values are returned in \p nodeset and \p
 * policy.
 *
 * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and
 * ::HWLOC_MEMBIND_STRICT is \em not specified), \p nodeset is set to
 * the logical OR of all threads' default nodeset.
 * If all threads' default policies are the same, \p policy is set to
 * that policy.  If they are different, \p policy is set to
 * ::HWLOC_MEMBIND_MIXED.
 *
 * In the ::HWLOC_MEMBIND_THREAD case (or when neither
 * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there
 * is only one nodeset and policy; they are returned in \p nodeset and
 * \p policy, respectively.
 *
 * If any other flags are specified, -1 is returned and errno is set
 * to EINVAL.
 */
HWLOC_DECLSPEC int hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);

/** \brief Query the default memory binding policy and physical locality of the
 * current process or thread.
 *
 * This function has two output parameters: \p set and \p policy.
 * The values returned in these parameters depend on both the \p flags
 * passed in and the current memory binding policies and nodesets in
 * the queried target.
 *
 * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query
 * target is the current policies and nodesets for all the threads in
 * the current process.  Passing ::HWLOC_MEMBIND_THREAD specifies that
 * the query target is the current policy and nodeset for only the
 * thread invoking this function.
 *
 * If neither of these flags are passed (which is the most portable
 * method), the process is assumed to be single threaded.  This allows
 * hwloc to use either process-based OS functions or thread-based OS
 * functions, depending on which are available.
 *
 * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS
 * is also specified.  In this case, hwloc will check the default
 * memory policies and nodesets for all threads in the process.  If
 * they are not identical, -1 is returned and errno is set to EXDEV.
 * If they are identical, the values are returned in \p set and \p
 * policy.
 *
 * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and
 * ::HWLOC_MEMBIND_STRICT is \em not specified), the default set
 * from each thread is logically OR'ed together.
 * If all threads' default policies are the same, \p policy is set to
 * that policy.  If they are different, \p policy is set to
 * ::HWLOC_MEMBIND_MIXED.
 *
 * In the ::HWLOC_MEMBIND_THREAD case (or when neither
 * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there
 * is only one set and policy; they are returned in \p set and
 * \p policy, respectively.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 *
 * If any other flags are specified, -1 is returned and errno is set
 * to EINVAL.
 */
HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags);

/** \brief Set the default memory binding policy of the specified
 * process to prefer the NUMA node(s) specified by \p nodeset
 *
 * \return -1 with errno set to ENOSYS if the action is not supported
 * \return -1 with errno set to EXDEV if the binding cannot be enforced
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 */
HWLOC_DECLSPEC int hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);

/** \brief Set the default memory binding policy of the specified
 * process to prefer the NUMA node(s) specified by \p set
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 *
 * \return -1 with errno set to ENOSYS if the action is not supported
 * \return -1 with errno set to EXDEV if the binding cannot be enforced
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 */
HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags);

/** \brief Query the default memory binding policy and physical locality of the
 * specified process.
 *
 * This function has two output parameters: \p nodeset and \p policy.
 * The values returned in these parameters depend on both the \p flags
 * passed in and the current memory binding policies and nodesets in
 * the queried target.
 *
 * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query
 * target is the current policies and nodesets for all the threads in
 * the specified process.  If ::HWLOC_MEMBIND_PROCESS is not specified
 * (which is the most portable method), the process is assumed to be
 * single threaded.  This allows hwloc to use either process-based OS
 * functions or thread-based OS functions, depending on which are
 * available.
 *
 * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to
 * this function.
 *
 * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default
 * memory policies and nodesets for all threads in the specified
 * process.  If they are not identical, -1 is returned and errno is
 * set to EXDEV.  If they are identical, the values are returned in \p
 * nodeset and \p policy.
 *
 * Otherwise, \p nodeset is set to the logical OR of all threads'
 * default nodeset.  If all threads' default policies are the same, \p
 * policy is set to that policy.  If they are different, \p policy is
 * set to ::HWLOC_MEMBIND_MIXED.
 *
 * If any other flags are specified, -1 is returned and errno is set
 * to EINVAL.
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 */
HWLOC_DECLSPEC int hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);

/** \brief Query the default memory binding policy and physical locality of the
 * specified process.
 *
 * This function has two output parameters: \p set and \p policy.
 * The values returned in these parameters depend on both the \p flags
 * passed in and the current memory binding policies and nodesets in
 * the queried target.
 *
 * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query
 * target is the current policies and nodesets for all the threads in
 * the specified process.  If ::HWLOC_MEMBIND_PROCESS is not specified
 * (which is the most portable method), the process is assumed to be
 * single threaded.  This allows hwloc to use either process-based OS
 * functions or thread-based OS functions, depending on which are
 * available.
 *
 * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to
 * this function.
 *
 * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default
 * memory policies and nodesets for all threads in the specified
 * process.  If they are not identical, -1 is returned and errno is
 * set to EXDEV.  If they are identical, the values are returned in \p
 * set and \p policy.
 *
 * Otherwise, \p set is set to the logical OR of all threads'
 * default set.  If all threads' default policies
 * are the same, \p policy is set to that policy.  If they are
 * different, \p policy is set to ::HWLOC_MEMBIND_MIXED.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 *
 * If any other flags are specified, -1 is returned and errno is set
 * to EINVAL.
 *
 * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
 * and \p HANDLE on native Windows platforms.
 */
HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags);

/** \brief Bind the already-allocated memory identified by (addr, len)
 * to the NUMA node(s) specified by \p nodeset.
 *
 * \return 0 if \p len is 0.
 * \return -1 with errno set to ENOSYS if the action is not supported
 * \return -1 with errno set to EXDEV if the binding cannot be enforced
 */
HWLOC_DECLSPEC int hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);

/** \brief Bind the already-allocated memory identified by (addr, len)
 * to the NUMA node(s) specified by \p set.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 *
 * \return 0 if \p len is 0.
 * \return -1 with errno set to ENOSYS if the action is not supported
 * \return -1 with errno set to EXDEV if the binding cannot be enforced
 */
HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags);

/** \brief Query the physical NUMA node(s) and binding policy of the memory
 * identified by (\p addr, \p len ).
 *
 * This function has two output parameters: \p nodeset and \p policy.
 * The values returned in these parameters depend on both the \p flags
 * passed in and the memory binding policies and nodesets of the pages
 * in the address range.
 *
 * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first
 * checked to see if they all have the same memory binding policy and
 * nodeset.  If they do not, -1 is returned and errno is set to EXDEV.
 * If they are identical across all pages, the nodeset and policy are
 * returned in \p nodeset and \p policy, respectively.
 *
 * If ::HWLOC_MEMBIND_STRICT is not specified, \p nodeset is set to the
 * union of all NUMA node(s) containing pages in the address range.
 * If all pages in the target have the same policy, it is returned in
 * \p policy.  Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED.
 *
 * If \p len is 0, -1 is returned and errno is set to EINVAL.
 *
 * If any other flags are specified, -1 is returned and errno is set
 * to EINVAL.
 */
HWLOC_DECLSPEC int hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);

/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of
 * the memory identified by (\p addr, \p len ).
 *
 * This function has two output parameters: \p set and \p policy.
 * The values returned in these parameters depend on both the \p flags
 * passed in and the memory binding policies and nodesets of the pages
 * in the address range.
 *
 * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first
 * checked to see if they all have the same memory binding policy and
 * nodeset.  If they do not, -1 is returned and errno is set to EXDEV.
 * If they are identical across all pages, the set and policy are
 * returned in \p set and \p policy, respectively.
 *
 * If ::HWLOC_MEMBIND_STRICT is not specified, the union of all NUMA
 * node(s) containing pages in the address range is calculated.
 * If all pages in the target have the same policy, it is returned in
 * \p policy.  Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 *
 * If \p len is 0, -1 is returned and errno is set to EINVAL.
 *
 * If any other flags are specified, -1 is returned and errno is set
 * to EINVAL.
 */
HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags);

/** \brief Get the NUMA nodes where memory identified by (\p addr, \p len ) is physically allocated.
 *
 * Fills \p set according to the NUMA nodes where the memory area pages
 * are physically allocated. If no page is actually allocated yet,
 * \p set may be empty.
 *
 * If pages spread to multiple nodes, it is not specified whether they spread
 * equitably, or whether most of them are on a single node, etc.
 *
 * The operating system may move memory pages from one processor
 * to another at any time according to their binding,
 * so this function may return something that is already
 * outdated.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified in \p flags, set is
 * considered a nodeset. Otherwise it's a cpuset.
 *
 * If \p len is 0, \p set is emptied.
 */
HWLOC_DECLSPEC int hwloc_get_area_memlocation(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, int flags);

/** \brief Allocate some memory
 *
 * This is equivalent to malloc(), except that it tries to allocate
 * page-aligned memory from the OS.
 *
 * \note The allocated memory should be freed with hwloc_free().
 */
HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len);

/** \brief Allocate some memory on NUMA memory nodes specified by \p nodeset
 *
 * \return NULL with errno set to ENOSYS if the action is not supported
 * and ::HWLOC_MEMBIND_STRICT is given
 * \return NULL with errno set to EXDEV if the binding cannot be enforced
 * and ::HWLOC_MEMBIND_STRICT is given
 * \return NULL with errno set to ENOMEM if the memory allocation failed
 * even before trying to bind.
 *
 * \note The allocated memory should be freed with hwloc_free().
 */
HWLOC_DECLSPEC void *hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;

/** \brief Allocate some memory on NUMA memory nodes specified by \p set
 *
 * \return NULL with errno set to ENOSYS if the action is not supported
 * and ::HWLOC_MEMBIND_STRICT is given
 * \return NULL with errno set to EXDEV if the binding cannot be enforced
 * and ::HWLOC_MEMBIND_STRICT is given
 * \return NULL with errno set to ENOMEM if the memory allocation failed
 * even before trying to bind.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 *
 * \note The allocated memory should be freed with hwloc_free().
 */
HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;

/** \brief Allocate some memory on NUMA memory nodes specified by \p nodeset
 *
 * This is similar to hwloc_alloc_membind() except that it is allowed to change
 * the current memory binding policy, thus providing more binding support, at
 * the expense of changing the current state.
 */
static __hwloc_inline void *
hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;

/** \brief Allocate some memory on NUMA memory nodes specified by \p set
 *
 * This is similar to hwloc_alloc_membind_nodeset() except that it is allowed to change
 * the current memory binding policy, thus providing more binding support, at
 * the expense of changing the current state.
 *
 * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
 * Otherwise it's a cpuset.
 */
static __hwloc_inline void *
hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;

/** \brief Free memory that was previously allocated by hwloc_alloc()
 * or hwloc_alloc_membind().
 */
HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len);

/** @} */


/** \defgroup hwlocality_tinker Modifying a loaded Topology
 * @{
 */

/** \brief Add a MISC object to the topology
 *
 * A new MISC object will be created and inserted into the topology at the
 * position given by bitmap \p cpuset. This offers a way to add new
 * intermediate levels to the topology hierarchy.
 *
 * \p cpuset and \p name will be copied to setup the new object attributes.
 *
 * \return the newly-created object.
 * \return \c NULL if the insertion conflicts with the existing topology tree.
 *
 * \note If \p name contains some non-printable characters, they will
 * be dropped when exporting to XML, see hwloc_topology_export_xml().
 */
HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, const char *name);

/** \brief Add a MISC object as a leaf of the topology
 *
 * A new MISC object will be created and inserted into the topology at the
 * position given by parent. It is appended to the list of existing children,
 * without ever adding any intermediate hierarchy level. This is useful for
 * annotating the topology without actually changing the hierarchy.
 *
 * \p name will be copied to the setup the new object attributes.
 * However, the new leaf object will not have any \p cpuset.
 *
 * \return the newly-created object
 *
 * \note If \p name contains some non-printable characters, they will
 * be dropped when exporting to XML, see hwloc_topology_export_xml().
 */
HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name);

/** \brief Flags to be given to hwloc_topology_restrict(). */
enum hwloc_restrict_flags_e {
  /** \brief Adapt distance matrices according to objects being removed during restriction.
   * If this flag is not set, distance matrices are removed.
   * \hideinitializer
   */
  HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES = (1<<0),

  /** \brief Move Misc objects to ancestors if their parents are removed during restriction.
   * If this flag is not set, Misc objects are removed when their parents are removed.
   * \hideinitializer
   */
  HWLOC_RESTRICT_FLAG_ADAPT_MISC = (1<<1),

  /** \brief Move I/O objects to ancestors if their parents are removed during restriction.
   * If this flag is not set, I/O devices and bridges are removed when their parents are removed.
   * \hideinitializer
   */
  HWLOC_RESTRICT_FLAG_ADAPT_IO = (1<<2)
};

/** \brief Restrict the topology to the given CPU set.
 *
 * Topology \p topology is modified so as to remove all objects that
 * are not included (or partially included) in the CPU set \p cpuset.
 * All objects CPU and node sets are restricted accordingly.
 *
 * \p flags is a OR'ed set of ::hwloc_restrict_flags_e.
 *
 * \note This call may not be reverted by restricting back to a larger
 * cpuset. Once dropped during restriction, objects may not be brought
 * back, except by loading another topology with hwloc_topology_load().
 *
 * \return 0 on success.
 *
 * \return -1 with errno set to EINVAL if the input cpuset is invalid.
 * The topology is not modified in this case.
 *
 * \return -1 with errno set to ENOMEM on failure to allocate internal data.
 * The topology is reinitialized in this case. It should be either
 * destroyed with hwloc_topology_destroy() or configured and loaded again.
 */
HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags);

/** @} */


/** \defgroup hwlocality_custom Building Custom Topologies
 *
 * A custom topology may be initialized by calling hwloc_topology_set_custom()
 * after hwloc_topology_init(). It may then be modified by inserting objects
 * or entire topologies. Once done assembling, hwloc_topology_load() should
 * be invoked as usual to finalize the topology.
 * @{
 */

/** \brief Insert an existing topology inside a custom topology
 *
 * Duplicate the existing topology \p oldtopology inside a new
 * custom topology \p newtopology as a leaf of object \p newparent.
 *
 * If \p oldroot is not \c NULL, duplicate \p oldroot and all its
 * children instead of the entire \p oldtopology. Passing the root
 * object of \p oldtopology in \p oldroot is equivalent to passing
 * \c NULL.
 *
 * The custom topology \p newtopology must have been prepared with
 * hwloc_topology_set_custom() and not loaded with hwloc_topology_load()
 * yet.
 *
 * \p newparent may be either the root of \p newtopology or an object
 * that was added through hwloc_custom_insert_group_object_by_parent().
 *
 * \note The cpuset and nodeset of the \p newparent object are not
 * modified based on the contents of \p oldtopology.
 */
HWLOC_DECLSPEC int hwloc_custom_insert_topology(hwloc_topology_t newtopology, hwloc_obj_t newparent, hwloc_topology_t oldtopology, hwloc_obj_t oldroot);

/** \brief Insert a new group object inside a custom topology
 *
 * An object with type ::HWLOC_OBJ_GROUP is inserted as a new child
 * of object \p parent.
 *
 * \p groupdepth is the depth attribute to be given to the new object.
 * It may for instance be 0 for top-level groups, 1 for their children,
 * and so on.
 *
 * The custom topology \p newtopology must have been prepared with
 * hwloc_topology_set_custom() and not loaded with hwloc_topology_load()
 * yet.
 *
 * \p parent may be either the root of \p topology or an object that
 * was added earlier through hwloc_custom_insert_group_object_by_parent().
 *
 * \note The cpuset and nodeset of the new group object are NULL because
 * these sets are meaningless when assembling multiple topologies.
 *
 * \note The cpuset and nodeset of the \p parent object are not modified.
 */
HWLOC_DECLSPEC hwloc_obj_t hwloc_custom_insert_group_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, int groupdepth);

/** @} */


/** \defgroup hwlocality_xmlexport Exporting Topologies to XML
 * @{
 */

/** \brief Export the topology into an XML file.
 *
 * This file may be loaded later through hwloc_topology_set_xml().
 *
 * \return -1 if a failure occured.
 *
 * \note See also hwloc_topology_set_userdata_export_callback()
 * for exporting application-specific object userdata.
 *
 * \note The topology-specific userdata pointer is ignored when exporting to XML.
 *
 * \note Only printable characters may be exported to XML string attributes.
 * Any other character, especially any non-ASCII character, will be silently
 * dropped.
 *
 * \note If \p name is "-", the XML output is sent to the standard output.
 */
HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const char *xmlpath);

/** \brief Export the topology into a newly-allocated XML memory buffer.
 *
 * \p xmlbuffer is allocated by the callee and should be freed with
 * hwloc_free_xmlbuffer() later in the caller.
 *
 * This memory buffer may be loaded later through hwloc_topology_set_xmlbuffer().
 *
 * \return -1 if a failure occured.
 *
 * \note See also hwloc_topology_set_userdata_export_callback()
 * for exporting application-specific object userdata.
 *
 * \note The topology-specific userdata pointer is ignored when exporting to XML.
 *
 * \note Only printable characters may be exported to XML string attributes.
 * Any other character, especially any non-ASCII character, will be silently
 * dropped.
 */
HWLOC_DECLSPEC int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen);

/** \brief Free a buffer allocated by hwloc_topology_export_xmlbuffer() */
HWLOC_DECLSPEC void hwloc_free_xmlbuffer(hwloc_topology_t topology, char *xmlbuffer);

/** \brief Set the application-specific callback for exporting object userdata
 *
 * The object userdata pointer is not exported to XML by default because hwloc
 * does not know what it contains.
 *
 * This function lets applications set \p export_cb to a callback function
 * that converts this opaque userdata into an exportable string.
 *
 * \p export_cb is invoked during XML export for each object whose
 * \p userdata pointer is not \c NULL.
 * The callback should use hwloc_export_obj_userdata() or
 * hwloc_export_obj_userdata_base64() to actually export
 * something to XML (possibly multiple times per object).
 *
 * \p export_cb may be set to \c NULL if userdata should not be exported to XML.
 *
 * \note The topology-specific userdata pointer is ignored when exporting to XML.
 */
HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology,
								void (*export_cb)(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj));

/** \brief Export some object userdata to XML
 *
 * This function may only be called from within the export() callback passed
 * to hwloc_topology_set_userdata_export_callback().
 * It may be invoked one of multiple times to export some userdata to XML.
 * The \p buffer content of length \p length is stored with optional name
 * \p name.
 *
 * When importing this XML file, the import() callback (if set) will be
 * called exactly as many times as hwloc_export_obj_userdata() was called
 * during export(). It will receive the corresponding \p name, \p buffer
 * and \p length arguments.
 *
 * \p reserved, \p topology and \p obj must be the first three parameters
 * that were given to the export callback.
 *
 * Only printable characters may be exported to XML string attributes.
 * If a non-printable character is passed in \p name or \p buffer,
 * the function returns -1 with errno set to EINVAL.
 *
 * If exporting binary data, the application should first encode into
 * printable characters only (or use hwloc_export_obj_userdata_base64()).
 * It should also take care of portability issues if the export may
 * be reimported on a different architecture.
 */
HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);

/** \brief Encode and export some object userdata to XML
 *
 * This function is similar to hwloc_export_obj_userdata() but it encodes
 * the input buffer into printable characters before exporting.
 * On import, decoding is automatically performed before the data is given
 * to the import() callback if any.
 *
 * This function may only be called from within the export() callback passed
 * to hwloc_topology_set_userdata_export_callback().
 *
 * The function does not take care of portability issues if the export
 * may be reimported on a different architecture.
 */
HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);

/** \brief Set the application-specific callback for importing userdata
 *
 * On XML import, userdata is ignored by default because hwloc does not know
 * how to store it in memory.
 *
 * This function lets applications set \p import_cb to a callback function
 * that will get the XML-stored userdata and store it in the object as expected
 * by the application.
 *
 * \p import_cb is called during hwloc_topology_load() as many times as
 * hwloc_export_obj_userdata() was called during export. The topology
 * is not entirely setup yet. Object attributes are ready to consult,
 * but links between objects are not.
 *
 * \p import_cb may be \c NULL if userdata should be ignored during import.
 *
 * \note \p buffer contains \p length characters followed by a null byte ('\0').
 *
 * \note This function should be called before hwloc_topology_load().
 *
 * \note The topology-specific userdata pointer is ignored when importing from XML.
 */
HWLOC_DECLSPEC void hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology,
								void (*import_cb)(hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length));

/** @} */


/** \defgroup hwlocality_syntheticexport Exporting Topologies to Synthetic
 * @{
 */

/** \brief Flags for exporting synthetic topologies.
 *
 * Flags to be given as a OR'ed set to hwloc_topology_export_synthetic().
 */
enum hwloc_topology_export_synthetic_flags_e {
 /** \brief Export extended types such as L2dcache as basic types such as Cache.
  *
  * This is required if loading the synthetic description with hwloc < 1.9.
  * \hideinitializer
  */
 HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES = (1UL<<0),

 /** \brief Do not export level attributes.
  *
  * Ignore level attributes such as memory/cache sizes or PU indexes.
  * This is required if loading the synthetic description with hwloc < 1.10.
  * \hideinitializer
  */
 HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS = (1UL<<1)
};

/** \brief Export the topology as a synthetic string.
 *
 * At most \p buflen characters will be written in \p buffer,
 * including the terminating \0.
 *
 * This exported string may be given back to hwloc_topology_set_synthetic().
 *
 * \p flags is a OR'ed set of hwloc_topology_export_synthetic_flags_e.
 *
 * \return The number of characters that were written,
 * not including the terminating \0.
 *
 * \return -1 if the topology could not be exported,
 * for instance if it is not symmetric.
 *
 * \note A 1024-byte buffer should be large enough for exporting
 * topologies in the vast majority of cases.
 */
  HWLOC_DECLSPEC int hwloc_topology_export_synthetic(hwloc_topology_t topology, char *buffer, size_t buflen, unsigned long flags);

/** @} */


#ifdef __cplusplus
} /* extern "C" */
#endif


/* high-level helpers */
#include <hwloc/helper.h>

/* inline code of some functions above */
#include <hwloc/inlines.h>

/* topology diffs */
#include <hwloc/diff.h>

/* deprecated headers */
#include <hwloc/deprecated.h>

#endif /* HWLOC_H */


================================================
FILE: rocrtst/thirdparty/lib/LICENSE
================================================
Copyright © 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation.  All rights reserved.
Copyright © 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation.  All rights reserved.
Copyright © 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart.  All rights reserved.
Copyright © 2004-2005 The Regents of the University of California. All rights reserved.
Copyright © 2009      CNRS
Copyright © 2009-2016 Inria.  All rights reserved.
Copyright © 2009-2015 Université Bordeaux
Copyright © 2009-2015 Cisco Systems, Inc.  All rights reserved.
Copyright © 2009-2012 Oracle and/or its affiliates.  All rights reserved.
Copyright © 2010      IBM
Copyright © 2010      Jirka Hladky
Copyright © 2012      Aleksej Saushev, The NetBSD Foundation
Copyright © 2012      Blue Brain Project, EPFL. All rights reserved.
Copyright © 2013-2014 University of Wisconsin-La Crosse. All rights reserved.
Copyright © 2015      Research Organization for Information Science and Technology (RIST). All rights reserved.
Copyright © 2015-2016 Intel, Inc.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
   derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 


================================================
FILE: runtime/cmake_modules/COPYING-CMAKE-SCRIPTS
================================================
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products 
   derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: runtime/cmake_modules/FindLibElf.cmake
================================================
# - Try to find libelf
# Once done this will define
#
#  LIBELF_FOUND - system has libelf
#  LIBELF_INCLUDE_DIRS - the libelf include directory
#  LIBELF_LIBRARIES - Link these to use libelf
#  LIBELF_DEFINITIONS - Compiler switches required for using libelf
#
#  Copyright (c) 2008 Bernhard Walle <bernhard.walle@gmx.de>
#
#  Redistribution and use is allowed according to the terms of the New
#  BSD license.
#  For details see the accompanying COPYING-CMAKE-SCRIPTS file.
#

if (LIBELF_FOUND)
  return()
endif (LIBELF_FOUND)

find_path (LIBELF_INCLUDE_DIRS
  NAMES
    libelf.h
  PATHS
    /usr/include
    /usr/include/libelf
    /usr/local/include
    /usr/local/include/libelf
    /opt/local/include
    /opt/local/include/libelf
    /sw/include
    /sw/include/libelf
    ENV CPATH)

find_library (LIBELF_LIBRARIES
  NAMES
    elf
  PATHS
    /usr/lib
    /usr/local/lib
    /opt/local/lib
    /sw/lib
    ENV LIBRARY_PATH
    ENV LD_LIBRARY_PATH)

include (FindPackageHandleStandardArgs)


# handle the QUIETLY and REQUIRED arguments and set LIBELF_FOUND to TRUE if all listed variables are TRUE
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibElf DEFAULT_MSG
  LIBELF_LIBRARIES
  LIBELF_INCLUDE_DIRS)

SET(CMAKE_REQUIRED_LIBRARIES elf)
INCLUDE(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("#include <libelf.h>
int main() {
  Elf *e = (Elf*)0;
  size_t sz;
  elf_getshdrstrndx(e, &sz);
  return 0;
}" ELF_GETSHDRSTRNDX)

mark_as_advanced(LIBELF_INCLUDE_DIRS LIBELF_LIBRARIES ELF_GETSHDRSTRNDX)

if(LIBELF_FOUND)
  add_library(elf UNKNOWN IMPORTED)
  set_property(TARGET elf PROPERTY IMPORTED_LOCATION ${LIBELF_LIBRARIES})
  set_property(TARGET elf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${LIBELF_INCLUDE_DIRS})
endif()


================================================
FILE: runtime/docs/api-reference/api.rst
================================================
.. meta::
   :description: HSA runtime implementation
   :keywords: ROCR, ROCm, library, tool, runtime

.. _rocr-api:

API
===

:ref:`genindex`

:ref:`search`

Architected Queuing Language
############################
.. doxygengroup:: aql
   :content-only:
   :inner:

Code objects [**DEPRECATED**]
#############################
.. doxygengroup:: code-object
   :content-only:
   :inner:

Executable
##########
.. doxygengroup:: executable
   :content-only:
   :inner:

Finalization extensions
#######################
.. doxygengroup:: ext-alt-finalizer-extensions
   :content-only:
   :inner:

Finalization program
####################
.. doxygengroup:: ext-alt-finalizer-program
   :content-only:
   :inner:

Images and samplers
###################
.. doxygengroup:: ext-images
   :content-only:
   :inner:

Instruction set architecture
############################
.. doxygengroup:: instruction-set-architecture
   :content-only:
   :inner:

Memory
######
.. doxygengroup:: memory
   :content-only:
   :inner:

Queues
######
.. doxygengroup:: queue
   :content-only:
   :inner:

Runtime notifications
#####################
.. doxygengroup:: status
   :content-only:
   :inner:

Signals
#######
.. doxygengroup:: signals
   :content-only:
   :inner:

System and agent information
############################
.. doxygengroup:: agentinfo
   :content-only:
   :inner:

Profiling
###############
.. doxygengroup:: profile
   :content-only:
   :inner:

Error codes
#############
.. doxygengroup:: error-codes
   :content-only:
   :inner:


================================================
FILE: runtime/docs/api-reference/c-interface-adaptors.rst
================================================
.. meta::
   :description: HSA runtime implementation
   :keywords: ROCR, ROCm, library, tool, runtime

.. _c-interface-adaptors:

C interface adaptors
=====================

The C interface layer is the :ref:`top layer in ROCR <runtime-design>` that provides C++ APIs as defined in the `HSA Runtime Specification 1.2 <https://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf>`_. The C interface layer also consists of the interfaces and default definitions for the standard extensions. The interface functions simply forward to a function pointer table defined here. The table is initialized to point to default definitions, which simply returns an appropriate error code. If available, the extension library is loaded as part of runtime initialization and the table is updated to point to the extension library.

Files present in this layer:

- ``hsa.h`` (cpp)

- ``hsa_ext_interface.h`` (cpp)

================================================
FILE: runtime/docs/api-reference/environment_variables.rst
================================================
.. meta::
   :description: HSA runtime implementation
   :keywords: ROCR, ROCm, library, tool, runtime

.. _environment-variables:

Environment variables
========================

The following table lists the most often used environment variables.

.. include:: ../data/env_variables.rst


================================================
FILE: runtime/docs/conf.py
================================================
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

import os
import re

from rocm_docs import ROCmDocs

with open('../../CMakeLists.txt', encoding='utf-8') as f:
    match = re.search(r'get_version\(\"?([0-9.]+)[^0-9.]+', f.read())
    if not match:
        raise ValueError("VERSION not found!")
    version_number = match[1]
left_nav_title = f"ROCR {version_number} Documentation"

# for PDF output on Read the Docs
project = "ROCR Documentation"
author = "Advanced Micro Devices, Inc."
copyright = "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved."
version = version_number
release = version_number

external_toc_path = "./sphinx/_toc.yml"

external_projects_current_project = "rocr-runtime"

docs_core = ROCmDocs(left_nav_title)
docs_core.run_doxygen(doxygen_root="doxygen", doxygen_path="doxygen/xml")
docs_core.setup()

for sphinx_var in ROCmDocs.SPHINX_VARS:
    globals()[sphinx_var] = getattr(docs_core, sphinx_var)


================================================
FILE: runtime/docs/contribution/contributing-to-rocr.rst
================================================
.. meta::
   :description: HSA runtime implementation
   :keywords: ROCR, ROCm, library, tool, runtime

.. _contributing-to-rocr:

Contributing to ROCR
========================

This document contains useful information required to contribute to ROCR.

.. _runtime-design:

Runtime design
-----------------

ROCR consists of the following primary layers:

1. :ref:`C interface adaptors <c-interface-adaptors>`

2. C++ interface classes and common functions

3. Device-specific implementations

The first layer provides interfaces to make ROCR APIs available to the user applications.
The second and third layers comprise of the internal ROCR implementation, which is available for contribution.

Additionally, the runtime is dependent on a small utility library that provides simple common functions, limited operating system, compiler abstraction, and atomic operation interfaces.

The following sections list the important files present in the second and third layer.

C++ interface classes and common functions
----------------------------------------------

The C++ interface layer provides abstract interface classes encapsulating commands to HSA signals, agents, and queues. This layer also contains the implementation of device-independent commands, such as ``hsa_init``, ``hsa_system_get_info``, and a default signal and queue implementation.

Files present in this layer:

- ``runtime.h`` (cpp)

- ``agent.h``

- ``queue.h``

- ``signal.h``

- ``memory_region.h`` (cpp)

- ``checked.h``

- ``memory_database.h`` (cpp)

- ``default_signal.h`` (cpp)

Device-specific implementations
----------------------------------

The device-specific layer contains implementations of the C++ interface classes that implement HSA functionality for ROCm supported devices.

Files present in this layer:

- ``amd_cpu_agent.h`` (cpp)

- ``amd_gpu_agent.h`` (cpp)

- ``amd_hw_aql_command_processor.h`` (cpp)

- ``amd_memory_region.h`` (cpp)

- ``amd_memory_registration.h`` (cpp)

- ``amd_topology.h`` (cpp)

- ``host_queue.h`` (cpp)

- ``interrupt_signal.h`` (cpp)

- ``hsa_ext_private_amd.h`` (cpp)

Source and include directories
--------------------------------

- ``core``: Source code for AMD’s implementation of the core HSA Runtime API’s

- ``cmake_modules``: CMake support modules and files

- ``inc``: Public and AMD-specific header files exposing the HSA Runtime`s interfaces

- ``libamdhsacode``: Code object definitions and interfaces

- ``loader``: Loads code objects

- ``utils``: Utilities required to build the core runtime

================================================
FILE: runtime/docs/data/env_variables.rst
================================================
.. meta::
    :description: ROCR-Runtime environment variables
    :keywords: AMD, ROCR, environment variables, environment

.. _rocr-env:
.. list-table::
    :header-rows: 1
    :widths: 35,14,51

    * - Environment variable
      - Default value
      - Value

    * - | ``ROCR_VISIBLE_DEVICES``
        | Specifies a list of device indices or UUIDs to be exposed to the applications.
      - None
      - ``0,GPU-DEADBEEFDEADBEEF``

    * - | ``HSA_NO_SCRATCH_RECLAIM``
        | Controls whether scratch memory allocations are permanently assigned to queues or can be reclaimed based on usage thresholds.
      - ``0``
      - | 0: Disable.
        | When dispatches need scratch memory that are lower than the threshold, the memory will be permanently assigned to the queue. For dispatches that exceed the threshold, a scratch-use-once mechanism will be used, resulting in the memory to be unassigned after the dispatch.
        | 1: Enable.
        | If a kernel dispatch needs scratch memory, runtime will allocate and permanently assign device memory to the queue handling the dispatch, even if the amount of scratch memory exceeds the default threshold. This memory will not be available to other queues or processes until this process exits.

    * - | ``HSA_SCRATCH_SINGLE_LIMIT``
        | Specifies the threshold for the amount of scratch memory allocated and reclaimed in kernel dispatches.
        | Enabling ``HSA_NO_SCRATCH_RECLAIM`` circumvents ``HSA_SCRATCH_SINGLE_LIMIT``, and treats ``HSA_SCRATCH_SINGLE_LIMIT`` as the maximum value.
      - ``146800640``
      - 0 to 4GB per XCC

    * - | ``HSA_SCRATCH_SINGLE_LIMIT_ASYNC``
        | On GPUs that support asynchronous scratch reclaim, this variable is used instead of ``HSA_SCRATCH_SINGLE_LIMIT`` to specify the threshold for scratch memory allocation.
      - ``3221225472`` (3GB)
      - 0 to 4GB per XCC

    * - | ``HSA_ENABLE_SCRATCH_ASYNC_RECLAIM``
        | Controls asynchronous scratch memory reclamation on supported GPUs.
        | When enabled, if a device memory allocation fails, ROCr will attempt to reclaim scratch memory assigned to all queues and retry the allocation.
      - ``1``
      - | 0: Disable asynchronous scratch reclaim.
        | 1: Enable asynchronous scratch reclaim on supported GPUs.

    * - | ``HSA_XNACK``
        | Enables XNACK.
      - None
      - 1: Enable

    * - | ``HSA_CU_MASK``
        | Sets the mask on a lower level of queue creation in the driver.
        | This mask is also applied to the queues being profiled.
      - None
      - ``1:0-8``

    * - | ``HSA_ENABLE_SDMA``
        | Enables the use of direct memory access (DMA) engines in all copy directions (Host-to-Device, Device-to-Host, Device-to-Device), when using any of the following APIs:
        | ``hsa_memory_copy``,
        | ``hsa_amd_memory_fill``,
        | ``hsa_amd_memory_async_copy``,
        | ``hsa_amd_memory_async_copy_on_engine``.
      - ``1``
      - | 0: Disable
        | 1: Enable

    * - | ``HSA_ENABLE_PEER_SDMA``
        | **Note**: This environment variable is ignored if ``HSA_ENABLE_SDMA`` is set to 0.
        | Enables the use of DMA engines for Device-to-Device copies, when using any of the following APIs:
        | ``hsa_memory_copy``,
        | ``hsa_amd_memory_async_copy``,
        | ``hsa_amd_memory_async_copy_on_engine``.
      - ``1``
      - | 0: Disable
        | 1: Enable

    * - | ``HSA_ENABLE_MWAITX``
        | When mwaitx is enabled, on AMD CPUs, runtime will hint to the CPU to go into lower power-states when doing busy loops by using the mwaitx instruction.
      - ``0``
      - | 0: Disable
        | 1: Enable

    * - | ``HSA_OVERRIDE_CPU_AFFINITY_DEBUG``
        | Controls whether ROCm helper threads inherit the parent process's CPU affinity mask.
      - ``1``
      - | 0: Enable inheritance. Helper threads use the parent process's core affinity mask, which should be set with enough cores for all threads.
        | 1: Disable inheritance. Helper threads spawn on all available cores, ignoring the parent's affinity settings, which may affect performance in certain environments.

    * - | ``HSA_ENABLE_DEBUG``
        | Enables additional debug information and validation in the runtime.
      - ``0``
      - | 0: Disable debug mode.
        | 1: Enable debug mode with additional validation and logging.


Hardware Debugging Environment Variables
----------------------------------------

The following environment variables are intended for experienced users who are debugging hardware-specific issues.
These settings may impact performance and stability and should only be used when troubleshooting specific hardware problems.

.. _rocr-debug-env:
.. list-table::
    :header-rows: 1
    :widths: 35,14,51

    * - Environment variable
      - Default value
      - Value

    * - | ``HSA_DISABLE_FRAGMENT_ALLOCATOR``
        | Disables internal memory fragment caching to help debug memory faults.
      - ``0``
      - | 0: Fragment allocator enabled (normal operation).
        | 1: Fragment allocator disabled. Helps debug tools identify memory faults at their origin by preventing cached memory blocks from masking out-of-bounds writes.

    * - | ``HSAKMT_DEBUG_LEVEL``
        | Controls the verbosity level of debug messages from the ``libhsakmt.so`` driver layer.
      - ``3``
      - | 3: Only error messages (``pr_err``) are printed.
        | 4: Error and warning messages (``pr_err``, ``pr_warn``) are printed.
        | 5: Same as level 4 (notice level not implemented).
        | 6: Error, warning, and info messages (``pr_err``, ``pr_warn``, ``pr_info``) are printed.
        | 7: All debug messages including ``pr_debug`` are printed.

    * - | ``HSA_ENABLE_INTERRUPT``
        | Controls how completion signals are detected, useful for diagnosing interrupt storm issues.
      - ``1``
      - | 0: Disable hardware interrupts. Uses memory-based polling for completion signals instead of interrupts.
        | 1: Enable hardware interrupts (normal operation).

    * - | ``HSA_SVM_GUARD_PAGES``
        | Controls the use of guard pages in Shared Virtual Memory (SVM) allocations.
      - ``1``
      - | 0: Disable SVM guard pages (for debugging memory access patterns).
        | 1: Enable SVM guard pages (normal operation).

    * - | ``HSA_DISABLE_CACHE``
        | Controls GPU L2 cache utilization for all memory regions.
      - ``0``
      - | 0: Normal caching behavior (L2 cache enabled).
        | 1: Disables L2 cache entirely. Sets all memory regions as uncacheable (MTYPE=UC) in the GPU, bypassing the L2 cache. Useful for diagnosing cache-related performance or correctness issues.


================================================
FILE: runtime/docs/index.rst
================================================
.. meta::
    :description: HSA runtime implementation
    :keywords: ROCm runtime, HSA runtime

.. _index:

=====================
ROCR documentation
=====================

The ROCm runtime (ROCR) is AMD's implementation of HSA runtime, which is a thin, user-mode API that exposes the necessary interfaces to access and interact with graphics hardware driven by the AMDGPU driver set and the ROCK kernel driver. To learn more, see :ref:`what-is-rocr-runtime`

You can access ROCR code on our `GitHub repository <https://github.com/ROCm/ROCR-Runtime>`_.

The documentation is structured as follows:

.. grid:: 2
  :gutter: 3

  .. grid-item-card:: Install

    * :ref:`installation`
    
  .. grid-item-card:: API reference

    * :ref:`c-interface-adaptors`
    * :ref:`environment-variables`
    * :ref:`rocr-api`

  .. grid-item-card:: Contribution

    * :ref:`contributing-to-rocr`

To contribute to the documentation, refer to
`Contributing to ROCm  <https://rocm.docs.amd.com/en/latest/contribute/contributing.html>`_.

You can find licensing information on the `Licensing <https://rocm.docs.amd.com/en/latest/about/license.html>`_ page.


================================================
FILE: runtime/docs/install/installation.rst
================================================
.. meta::
   :description: HSA runtime implementation
   :keywords: ROCR, ROCm, library, tool, runtime

.. _installation:

====================
Installation
====================

This document provides information required to build and install ROCR using prebuilt binaries or from source.

Build and install using prebuilt binaries
-------------------------------------------

Here is how you can install ROCR using prebuilt binaries.

Prerequisites
*******************

- A system supporting ROCm. See the `supported operating systems <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html#supported-operating-systems>`_.

- Install ROCm. See `how to install ROCm <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/>`_.

- Install ``libdrm`` package.

.. code-block:: shell
    
    sudo apt install libdrm-dev

The ROCR prebuilt binaries include:

**Core runtime package:**

- HSA include files to support application development on the HSA runtime for the ROCR runtime

- A 64-bit version of AMD’s HSA core runtime for the ROCR runtime

**Runtime extension package:**

- A 64-bit version of AMD’s runtime tools library

- A 64-bit version of AMD’s runtime image library

The contents of these packages are installed in ``/opt/rocm/hsa`` and ``/opt/rocm`` by default. The core runtime package depends on the ``hsakmt-roct-dev`` package.

Build and install from source
--------------------------------

Here is how you can build ROCR from source.

Prerequisites
***************

- CMake 3.7 or later. Export CMake bin into your PATH.

- Support packages ``libelf-dev`` and ``g++``.

.. code-block:: shell

    sudo apt install libelf-dev g++

- A compatible version of the ``libhsakmt`` library and the ``hsakmt.h`` header file. Obtain the latest version of these files from the `ROCT-Thunk-Interface repository <https://github.com/ROCm/ROCT-Thunk-Interface>`_.

- Install ``xxd``.

.. code-block:: shell

    sudo apt install xxd
    
Building the runtime
----------------------

The ``libhsakmt`` development packages include a CMake package config file. The runtime locates ``libhsakmt`` via ``find_package`` if ``libhsakmt`` is installed in a standard location. For installations that don't use standard ROCm paths, set CMake variables ``CMAKE_PREFIX_PATH`` or ``hsakmt_DIR`` to override ``find_package`` search paths.
The runtime includes an optional image support module (previously ``hsa-ext-rocr-dev``). By default this module is included in the runtime builds. To exclude the image module from the runtime, set the CMake variable ``IMAGE_SUPPORT`` to OFF.
To build the optional image module, install AMDGCN-compatible clang and device library. You can find the latest version of these additional build dependencies in the `ROCm package repository <https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/package-manager-integration.html#packages-in-rocm-programming-models>`_.
The latest source for these projects are available in the `llvm project <https://github.com/ROCm/llvm-project>`_ and `ROCm device libs <https://github.com/ROCm/ROCm-Device-Libs>`_ repositories.

The runtime optionally supports use of the CMake user package registry. By default the registry is not modified. Set CMake variable ``EXPORT_TO_USER_PACKAGE_REGISTRY`` to ON to enable updating the package registry.

To build, install, and produce packages on a system with standard ROCm packages installed, clone your copy of ROCR and run the following from ``src/``:

.. code-block:: shell

    mkdir build
    cd build
    cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm ..
    make
    make install
    make package

Example with a custom installation path, build dependency path, and options:

.. code-block:: shell

    cmake -DIMAGE_SUPPORT=OFF \
          -DEXPORT_TO_USER_PACKAGE_REGISTRY=ON \
          -DCMAKE_VERBOSE_MAKEFILE=1 \
          -DCMAKE_PREFIX_PATH=<alternate path(s) to build dependencies> \
          -DCMAKE_INSTALL_PATH=<custom install path for this build> \
          ..

Alternatively, use ``ccmake`` and ``cmake-gui``:

.. code-block:: shell

    mkdir build
    cd build
    ccmake ..
    press c to configure
    populate variables as desired
    press c again
    press g to generate and exit
    make

Building against the runtime
---------------------------------

The runtime provides a CMake package config file, installed by default to ``/opt/rocm/lib/cmake/hsa-runtime64``. The runtime exports CMake target ``hsa-runtime64`` in namespace ``hsa-runtime64``. A CMake project (``Foo``) using the runtime may locate, include, and link the runtime using the following template:

.. code-block:: shell

    # Add /opt/rocm to CMAKE_PREFIX_PATH.

    find_package(hsa-runtime64 1.0 REQUIRED)
    ...
    add_library(Foo ...)
    ...
    target_link_libraries(Foo PRIVATE hsa-runtime64::hsa-runtime64)


================================================
FILE: runtime/docs/license.rst
================================================
License
=======

.. include:: ../../LICENSE.txt


================================================
FILE: runtime/docs/sphinx/_toc.yml.in
================================================
# Anywhere {branch} is used, the branch name will be substituted.
# These comments will also be removed.
root: index
subtrees:
  - caption: Install
    entries:
    - file: install/installation

  - caption: API reference
    entries:
    - file: api-reference/c-interface-adaptors
    - file: api-reference/environment_variables
    - file: api-reference/api
    
  - caption: Contribution
    entries:
    - file: contribution/contributing-to-rocr

  - caption: About
    entries:
    - file: license


================================================
FILE: runtime/docs/sphinx/requirements.in
================================================
rocm-docs-core==1.8.0


================================================
FILE: runtime/docs/sphinx/requirements.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
#    pip-compile sphinx/requirements.in
#
accessible-pygments==0.0.5
    # via pydata-sphinx-theme
alabaster==1.0.0
    # via sphinx
babel==2.16.0
    # via
    #   pydata-sphinx-theme
    #   sphinx
beautifulsoup4==4.12.3
    # via pydata-sphinx-theme
breathe==4.35.0
    # via rocm-docs-core
certifi==2024.8.30
    # via requests
cffi==1.17.1
    # via
    #   cryptography
    #   pynacl
charset-normalizer==3.3.2
    # via requests
click==8.1.7
    # via sphinx-external-toc
cryptography==43.0.1
    # via pyjwt
deprecated==1.2.14
    # via pygithub
docutils==0.21.2
    # via
    #   breathe
    #   myst-parser
    #   pydata-sphinx-theme
    #   sphinx
fastjsonschema==2.20.0
    # via rocm-docs-core
gitdb==4.0.11
    # via gitpython
gitpython==3.1.43
    # via rocm-docs-core
idna==3.10
    # via requests
imagesize==1.4.1
    # via sphinx
jinja2==3.1.4
    # via
    #   myst-parser
    #   sphinx
markdown-it-py==3.0.0
    # via
    #   mdit-py-plugins
    #   myst-parser
markupsafe==2.1.5
    # via jinja2
mdit-py-plugins==0.4.2
    # via myst-parser
mdurl==0.1.2
    # via markdown-it-py
myst-parser==4.0.0
    # via rocm-docs-core
packaging==24.1
    # via
    #   pydata-sphinx-theme
    #   sphinx
pycparser==2.22
    # via cffi
pydata-sphinx-theme==0.15.4
    # via
    #   rocm-docs-core
    #   sphinx-book-theme
pygithub==2.4.0
    # via rocm-docs-core
pygments==2.18.0
    # via
    #   accessible-pygments
    #   pydata-sphinx-theme
    #   sphinx
pyjwt[crypto]==2.9.0
    # via pygithub
pynacl==1.5.0
    # via pygithub
pyyaml==6.0.2
    # via
    #   myst-parser
    #   rocm-docs-core
    #   sphinx-external-toc
requests==2.32.3
    # via
    #   pygithub
    #   sphinx
rocm-docs-core==1.8.0
    # via -r requirements.in
smmap==5.0.1
    # via gitdb
snowballstemmer==2.2.0
    # via sphinx
soupsieve==2.6
    # via beautifulsoup4
sphinx==8.0.2
    # via
    #   breathe
    #   myst-parser
    #   pydata-sphinx-theme
    #   rocm-docs-core
    #   sphinx-book-theme
    #   sphinx-copybutton
    #   sphinx-design
    #   sphinx-external-toc
    #   sphinx-notfound-page
sphinx-book-theme==1.1.3
    # via rocm-docs-core
sphinx-copybutton==0.5.2
    # via rocm-docs-core
sphinx-design==0.6.1
    # via rocm-docs-core
sphinx-external-toc==1.0.1
    # via rocm-docs-core
sphinx-notfound-page==1.0.4
    # via rocm-docs-core
sphinxcontrib-applehelp==2.0.0
    # via sphinx
sphinxcontrib-devhelp==2.0.0
    # via sphinx
sphinxcontrib-htmlhelp==2.1.0
    # via sphinx
sphinxcontrib-jsmath==1.0.1
    # via sphinx
sphinxcontrib-qthelp==2.0.0
    # via sphinx
sphinxcontrib-serializinghtml==2.0.0
    # via sphinx
tomli==2.0.1
    # via sphinx
typing-extensions==4.12.2
    # via
    #   pydata-sphinx-theme
    #   pygithub
urllib3==2.2.3
    # via
    #   pygithub
    #   requests
wrapt==1.16.0
    # via deprecated


================================================
FILE: runtime/docs/what-is-rocr-runtime.rst
================================================
.. meta::
   :description: HSA runtime implementation
   :keywords: ROCR, ROCm, library, tool, runtime

.. _what-is-rocr-runtime:

What is ROCR?
========================

The ROCm runtime (ROCR) is AMD's implementation of HSA runtime, which is a thin, user-mode API that exposes the necessary interfaces to access and interact with graphics hardware driven by the AMDGPU driver set and the ROCK kernel driver. Together they enable you to directly harness the power of discrete AMD graphics devices by allowing host applications to launch compute kernels directly to the graphics hardware.

The ROCR APIs are capable of the following:

- Error handling

- Runtime initialization and shutdown

- System and agent information

- Signals and synchronization

- Architected dispatch

- Memory management

- Fitting into a typical software architecture stack

ROCR provides direct access to the graphics hardware, allowing you more control over execution. An example of low-level hardware access is the support for one or more user-mode queues, which provides a low-latency kernel dispatch interface, allowing you to develop customized dispatch algorithms specific to your application.
The HSA Architected Queuing Language (AQL) is an open standard defined by the HSA Foundation, which specifies the packet syntax used to control supported AMD or ATI Radeon © graphics devices. The AQL language supports several packet types, including packets that can command the hardware to automatically resolve inter-packet dependencies (barrier AND and barrier OR packet), kernel dispatch packets, and agent dispatch packets.
In addition to user-mode queues and AQL, the HSA runtime exposes various virtual address ranges that can be accessed by one or more of the system’s graphics devices and also possibly by the host. The exposed virtual address ranges support either a fine-grained or a coarse-grained access. Updates to memory in a fine-grained region are immediately visible to all devices that can access it, but only one device can have access to a coarse-grained allocation at a time. You can change the ownership of a coarse-grained region using the HSA runtime memory APIs, but this transfer of ownership must be explicitly done by the host application.

For a complete description of the HSA Runtime APIs, AQL, and the HSA memory policy, refer to the `HSA Runtime Programmer’s Reference Manual <https://hsafoundation.com/wp-content/uploads/2021/02/HSA-Runtime-1.2.pdf>`_.


================================================
FILE: runtime/hsa-ext-finalize/CMakeLists.txt
================================================
cmake_minimum_required ( VERSION 3.5.0 )

## Verbose output.
set ( CMAKE_VERBOSE_MAKEFILE on )

## Determine external build folder.
if( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
    set( BUILD_FOLDER "lnx64a/B_rel" )
else()
    set( BUILD_FOLDER "lnx64a/B_dbg" )
endif()

## Check that the libhsail include and library directories are defined.
if ( NOT DEFINED LIBHSAIL_BUILD_FOLDER )
    set ( LIBHSAIL_BUILD_FOLDER ${BUILD_FOLDER} )
endif()

## Set ext runtime module name and project name.
set ( FINALIZE_NAME "hsa-ext-finalize" )
set ( FINALIZE_TARGET "${FINALIZE_NAME}64" )
project ( ${FINALIZE_TARGET} )

## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake_modules" )
include ( utils )

## Compiler preproc definitions.
#add_definitions ( -D__linux__ )
add_definitions ( -DUNIX_OS )
add_definitions ( -DLINUX )
add_definitions ( -D__AMD64__ )
add_definitions ( -D__x86_64__ )
add_definitions ( -DAMD_INTERNAL_BUILD )
add_definitions ( -DLITTLEENDIAN_CPU=1 )
add_definitions ( -D HSA_DEPRECATED= )

## Get the package version. The defaults to 1.0.0.
get_version ( "1.0.0" )

set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
if ( DEFINED VERSION_BUILD )
    set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
endif ()
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )

## Find the hsakmt library and include files
find_file ( HSAKMT_INC NAMES "hsakmt.h" "libhsakmt/hsakmt.h" )
find_library ( HSAKMT_LIB libhsakmt.so )
get_filename_component ( HSAKMT_LIB_PATH ${HSAKMT_LIB} DIRECTORY )
get_filename_component ( HSAKMT_INC_PATH ${HSAKMT_INC} DIRECTORY )
include_directories ( ${HSAKMT_INC_PATH} )
link_directories (${HSAKMT_LIB_PATH})

## Find the hsa-runtime and include files
find_file ( HSA_INC "hsa/hsa.h" )
find_library ( HSA_LIB libhsa-runtime64.so )
get_filename_component ( HSA_LIB_PATH ${HSA_LIB} DIRECTORY )
get_filename_component ( HSA_INC_PATH ${HSA_INC} DIRECTORY )
include_directories ( ${HSA_INC_PATH} )
link_directories (${HSA_LIB_PATH})

## Find the external library files and set the link command
find_library ( HSAIL_LIB libhsail.a )
set ( LIBHSAIL -Wl,--no-whole-archive PRIVATE ${HSAIL_LIB} )

find_library ( HSAIL_AMD_LIB libhsail-amd.a )
set ( LIBHSAILAMD -Wl,--no-whole-archive PRIVATE ${HSAIL_AMD_LIB} )

find_library ( AMDHSAFIN_LIB amdhsafin64.a )
set ( LIBAMDHSAFIN -Wl,--no-whole-archive ${AMDHSAFIN_LIB} )

find_library ( LIBAMDHSACODE libamdhsacode.a )

find_library ( LIBCACHING libcaching.a )

find_library ( LIBSCP3 scSP3_R1000.a )

find_library ( LIBDWARF libdwarf.a )

find_library ( LIBELF libelf.a )

## External dependencies and directories
if ( NOT DEFINED REG_INCLUDE )
  set(REG_INCLUDE ${HSA_CLOSED_SOURCE_DIR}/drivers/inc/asic_reg)
endif()

if ( NOT EXISTS ${REG_INCLUDE}/si_id.h )
    MESSAGE ( FATAL_ERROR "Environment variable REG_INCLUDE is not set appropriately. REG_INCLUDE=${REG_INCLUDE}" )
else ()
    set ( REG_INCLUDE ${REG_INCLUDE} )
endif ()

if( NOT DEFINED EXT_SOURCE_DIR )
  set ( EXT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
endif()

if( NOT DEFINED OPEN_SOURCE_DIR )
  set ( OPEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.." )
endif()

## Check for _GNU_SOURCE pthread extensions
set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
CHECK_SYMBOL_EXISTS ( "pthread_attr_setaffinity_np" "pthread.h" HAVE_PTHREAD_ATTR_SETAFFINITY_NP )
CHECK_SYMBOL_EXISTS ( "pthread_rwlockattr_setkind_np" "pthread.h" HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP )
unset(CMAKE_REQUIRED_DEFINITIONS)
if ( HAVE_PTHREAD_ATTR_SETAFFINITY_NP )
  target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_PTHREAD_ATTR_SETAFFINITY_NP )
endif()
if ( HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP )
  target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP )
endif()

## ------------------------- Linux Compiler and Linker options -------------------------
set ( CMAKE_CXX_FLAGS "-std=c++11 " )

set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -fPIC" )

if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
    set  ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64  -msse -msse2" )
elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" )
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
endif ()

if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug )
    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
endif ()

set ( DRVDEF "${EXT_SOURCE_DIR}/make/finalize.so.def" )

set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl,--version-script=${DRVDEF}" )

set ( CMAKE_SKIP_BUILD_RPATH TRUE )

## Library path(s).
include_directories ( ${REG_INCLUDE} )
include_directories ( ${EXT_SOURCE_DIR}/.. )
include_directories ( ${EXT_SOURCE_DIR}/inc )
include_directories ( ${OPEN_SOURCE_DIR}/hsa-runtime )
include_directories ( ${OPEN_SOURCE_DIR}/hsa-runtime/inc )
include_directories ( ${OPEN_SOURCE_DIR}/hsa-runtime/core/inc )
include_directories ( ${OPEN_SOURCE_DIR}/hsa-runtime/core/common )
include_directories ( ${HSA_CLOSED_SOURCE_DIR}/drivers/hsa/compiler/ext_finalize/amdhsafin/Interface )
include_directories ( ${HSA_CLOSED_SOURCE_DIR}/drivers/hsa/compiler/finalizer/HSAIL/include )
include_directories ( ${HSA_CLOSED_SOURCE_DIR}/drivers/hsa/compiler/finalizer/HSAIL/hsail-tools/libHSAIL-AMD )
include_directories ( ${HSA_CLOSED_SOURCE_DIR}/drivers/hsa/compiler/finalizer/HSAIL/hsail-tools/libHSAIL )
include_directories ( ${HSA_CLOSED_SOURCE_DIR}/drivers/hsa/compiler/finalizer/HSAIL/hsail-tools/libHSAIL/build/${LIBHSAIL_BUILD_FOLDER} )

set ( FINALIZE_SRCS ${EXT_SOURCE_DIR}/finalize/hsa_ext_finalize.cpp
                    ${EXT_SOURCE_DIR}/finalize/program_context.cpp
                    ${EXT_SOURCE_DIR}/finalize/finalizer_manager.cpp
                    ${EXT_SOURCE_DIR}/runtime/amd_ext.cpp
                    ${EXT_SOURCE_DIR}/runtime/device_info.cpp
                    ${OPEN_SOURCE_DIR}/hsa-runtime/core/common/hsa_table_interface.cpp
                    ${OPEN_SOURCE_DIR}/hsa-runtime/core/common/shared.cpp
                    ${OPEN_SOURCE_DIR}/hsa-runtime/core/util/lnx/os_linux.cpp
)

add_library ( ${FINALIZE_TARGET} SHARED ${FINALIZE_SRCS} )

## Set the VERSION and SOVERSION values
set_property ( TARGET ${FINALIZE_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${FINALIZE_TARGET} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" )

## Add the core runtime in the link
target_link_libraries (
    ${FINALIZE_TARGET}
    PRIVATE ${LIBAMDHSAFIN}
    PRIVATE ${LIBAMDHSACODE}
    PRIVATE ${LIBCACHING}
    PRIVATE ${LIBSCP3}
    PRIVATE ${LIBHSAILAMD}
    PRIVATE ${LIBHSAIL}
    PRIVATE ${LIBDWARF}
    PRIVATE ${LIBELF}
    PRIVATE hsa-runtime64
    PRIVATE hsakmt
    c stdc++ dl pthread rt
)

## If the build is Release, strip the target library
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
    add_custom_command ( TARGET ${FINALIZE_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
endif ()

## Set install information
install ( TARGETS ${FINALIZE_TARGET} LIBRARY DESTINATION hsa/lib )


================================================
FILE: runtime/hsa-ext-image/CMakeLists.txt
================================================
cmake_minimum_required ( VERSION 3.5.0 )

## Set ext runtime module name and project name.
set ( IMAGE_NAME "hsa-ext-image" )
set ( IMAGE_TARGET "${IMAGE_NAME}64" )
set ( IMAGE_LIBRARY "lib${IMAGE_TARGET}" )
project ( ${IMAGE_TARGET} )

# Optionally, build with ccache.
set(ROCM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
if (ROCM_CCACHE_BUILD)
  find_program(CCACHE_PROGRAM ccache)
  if (CCACHE_PROGRAM)
    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PROGRAM})
  else()
    message(WARNING "Unable to find ccache. Falling back to real compiler")
  endif() # if (CCACHE_PROGRAM)
endif() # if (ROCM_CCACHE_BUILD)

## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake_modules" )
include ( utils )

## Compiler preproc definitions.
#add_definitions ( -D__linux__ )
add_definitions ( -DUNIX_OS )
add_definitions ( -DLINUX )
add_definitions ( -D__AMD64__ )
add_definitions ( -D__x86_64__ )
add_definitions ( -DAMD_INTERNAL_BUILD )
add_definitions ( -DLITTLEENDIAN_CPU=1 )
add_definitions ( -D HSA_DEPRECATED= )
add_definitions ( -D BRAHMA_BUILD=1 )

## Get the package version.
get_version( "1.1.9")
set(SO_MAJOR 1)
set(SO_MINOR 1)
if ( ${ROCM_PATCH_VERSION} )
    set ( SO_PATCH ${ROCM_PATCH_VERSION})
    set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
else ()
    set(SO_PATCH 9)
endif ()

set( SO_VERSION_STRING "${SO_MAJOR}.${SO_MINOR}.${SO_PATCH}" )
set( PACKAGE_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}.${VERSION_COMMIT_COUNT}-${VERSION_JOB}-${VERSION_HASH}" )

## Find self
if( "${EXT_SOURCE_DIR}" STREQUAL "" )
    get_include_path( EXT_SOURCE_FILE null NAMES "image/hsa_ext_image.cpp" HINTS "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../../ext/" )
    get_filename_component( EXT_SOURCE_DIR "${EXT_SOURCE_FILE}/.." ABSOLUTE )
    unset( EXT_SOURCE_FILE CACHE )
endif()
set( EXT_SOURCE_DIR ${EXT_SOURCE_DIR} CACHE PATH "Image lib source dir" FORCE )

get_filename_component( OPEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.." ABSOLUTE CACHE )
set( OPEN_SOURCE_DIR ${OPEN_SOURCE_DIR} CACHE PATH "Open source root dir" FORCE )

## Set RUNPATH - ../../lib covers use of the legacy symlink in /hsa/lib/
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../../lib;$ORIGIN/../../lib64;$ORIGIN/../lib64")

## ------------------------- Linux Compiler and Linker options -------------------------
set ( CMAKE_CXX_FLAGS "-std=c++11 " )

set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -fPIC" )

if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
    set  ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64  -msse -msse2" )
elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" )
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
endif ()

if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug )
    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
endif ()

set ( DRVDEF "${EXT_SOURCE_DIR}/image/image.so.def" )

set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl,--version-script=${DRVDEF} -Wl,--enable-new-dtags" )

## Library path(s).
include_directories(${REG_INCLUDE})
include_directories(${EXT_SOURCE_DIR}/..)
include_directories(${EXT_SOURCE_DIR}/inc)
include_directories(${OPEN_SOURCE_DIR}/hsa-runtime)
include_directories(${OPEN_SOURCE_DIR}/hsa-runtime/inc)
include_directories(${OPEN_SOURCE_DIR}/hsa-runtime/core/inc)
set ( IMAGE_SRCS ${EXT_SOURCE_DIR}/image/hsa_ext_image.cpp)

add_library ( ${IMAGE_TARGET} SHARED ${IMAGE_SRCS} )

## Set the VERSION and SOVERSION values
set_property ( TARGET ${IMAGE_TARGET} PROPERTY VERSION "${SO_VERSION_STRING}" )
set_property ( TARGET ${IMAGE_TARGET} PROPERTY SOVERSION "${SO_MAJOR}" )

## Add the core runtime in the link
target_link_libraries (
    ${IMAGE_TARGET}
    c dl pthread rt
)

## If the build is Release, strip the target library
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
    add_custom_command ( TARGET ${IMAGE_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} $<TARGET_FILE_NAME:${IMAGE_TARGET}> )
endif ()

## Create symlinks for legacy packaging and install
add_custom_target ( hsa_images_lib_link ALL WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E create_symlink ../hsa/lib/${IMAGE_LIBRARY}.so ${IMAGE_LIBRARY}-link.so )

## Set install information
install ( TARGETS ${IMAGE_TARGET} LIBRARY DESTINATION hsa/lib )
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/${IMAGE_LIBRARY}-link.so DESTINATION lib PERMISSIONS OWNER_WRITE OWNER_READ RENAME ${IMAGE_LIBRARY}.so )


================================================
FILE: runtime/hsa-runtime/CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

cmake_minimum_required ( VERSION 3.7 )

## Clear target dependency data.
## Needed to allow UI transitions between static and dynamic builds.
## Need an update to CMake 3.12 to remove this hack. See CMake policy change CMP0073.
unset ( hsa-runtime64_LIB_DEPENDS CACHE )

set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_CXX_STANDARD 17)

## Set core runtime module name and project name.
set ( CORE_RUNTIME_NAME "hsa-runtime64" )
set ( CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}" )
set ( CORE_RUNTIME_LIBRARY "lib${CORE_RUNTIME_TARGET}" )

## Set project name
project( ${CORE_RUNTIME_TARGET} )

## Utilty functions
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
include ( utils )
include ( hsa_common )
include ( GNUInstallDirs )

## Adjust target name for static builds
## Original name will be an interface target that adds --whole-archive linker options around the target.
if( NOT ${BUILD_SHARED_LIBS} )
  set ( CORE_RUNTIME_TARGET "${CORE_RUNTIME_TARGET}_static" )
endif()

# Optionally, build HSA Runtime with ccache.
set(ROCM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
if (ROCM_CCACHE_BUILD)
  find_program(CCACHE_PROGRAM ccache)
  if (CCACHE_PROGRAM)
    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PROGRAM})
  else()
    message(WARNING "Unable to find ccache. Falling back to real compiler")
  endif() # if (CCACHE_PROGRAM)
endif() # if (ROCM_CCACHE_BUILD)

## Find external dependencies.
find_package(PkgConfig)
find_package(LibElf REQUIRED)

pkg_check_modules(drm REQUIRED IMPORTED_TARGET libdrm)

## Create the rocr target.
add_library( ${CORE_RUNTIME_TARGET} "" )

## Enforce uniform output file naming.
set_property(TARGET  ${CORE_RUNTIME_TARGET} PROPERTY OUTPUT_NAME ${CORE_RUNTIME_NAME} )

## Compiler preproc definitions.
target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE "${HSA_COMMON_DEFS}" __linux__ HSA_EXPORT=1 HSA_EXPORT_FINALIZER=1 HSA_EXPORT_IMAGES=1 HSA_DEPRECATED=
ROCR_BUILD_ID="${PACKAGE_VERSION_STRING}-${VERSION_JOB}-${VERSION_HASH}" )

## Check for memfd_create syscall
include(CheckSymbolExists)
CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE )
if ( HAVE_MEMFD_CREATE )
  target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_MEMFD_CREATE )
endif()

## Check for _GNU_SOURCE pthread extensions
set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
CHECK_SYMBOL_EXISTS ( "pthread_attr_setaffinity_np" "pthread.h" HAVE_PTHREAD_ATTR_SETAFFINITY_NP )
CHECK_SYMBOL_EXISTS ( "pthread_rwlockattr_setkind_np" "pthread.h" HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP )
unset(CMAKE_REQUIRED_DEFINITIONS)
if ( HAVE_PTHREAD_ATTR_SETAFFINITY_NP )
  target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_PTHREAD_ATTR_SETAFFINITY_NP )
endif()
if ( HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP )
  target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP )
endif()

## Set include directories for ROCr runtime
target_include_directories( ${CORE_RUNTIME_TARGET}
  PUBLIC
  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/inc>
  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
  PRIVATE
  ${CMAKE_CURRENT_SOURCE_DIR}
  ${CMAKE_CURRENT_SOURCE_DIR}/libamdhsacode
  ${CMAKE_CURRENT_BINARY_DIR}/core/runtime/trap_handler
  ${CMAKE_CURRENT_BINARY_DIR}/core/runtime/blit_shaders)


## ------------------------- Linux Compiler and Linker options -------------------------
set ( HSA_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=missing-braces -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-function )

## Extra x86 specific settings
if ( CMAKE_SYSTEM_PROCESSOR MATCHES "i?86|x86_64|amd64|AMD64" )
  set ( HSA_CXX_FLAGS ${HSA_CXX_FLAGS} -mmwaitx )
endif()

## Extra image settings - audit!
set ( HSA_CXX_FLAGS ${HSA_CXX_FLAGS} -Wno-deprecated-declarations )

if ( CMAKE_COMPILER_IS_GNUCXX )
    set ( HSA_CXX_FLAGS ${HSA_CXX_FLAGS} -Wno-error=maybe-uninitialized -Wno-error=unused-but-set-variable)
endif ()
if ( CMAKE_CXX_COMPILER_ID MATCHES "Clang")
  set ( HSA_CXX_FLAGS ${HSA_CXX_FLAGS} -Wno-error=self-assign)
  if( ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 13)
    set ( HSA_CXX_FLAGS ${HSA_CXX_FLAGS} -Wno-error=unused-but-set-variable)
  endif()
endif()

set ( DRVDEF "${CMAKE_CURRENT_SOURCE_DIR}/hsacore.so.def" )
set ( LNKSCR "hsacore.so.link" )
set ( HSA_SHARED_LINK_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl,${CMAKE_CURRENT_SOURCE_DIR}/${LNKSCR} -Wl,--version-script=${DRVDEF} -Wl,--enable-new-dtags" )

target_compile_options(${CORE_RUNTIME_TARGET} PRIVATE ${HSA_CXX_FLAGS})
#target_link_options not available prior to CMake 3.13
set_property(TARGET ${CORE_RUNTIME_TARGET} PROPERTY LINK_FLAGS ${HSA_SHARED_LINK_FLAGS})
##  -------------------------  End Compiler and Linker options ----------------------------

## Source files.
set ( SRCS core/driver/driver.cpp
           core/driver/kfd/amd_kfd_driver.cpp
           core/driver/xdna/amd_xdna_driver.cpp
           core/util/lnx/os_linux.cpp
           core/util/small_heap.cpp
           core/util/timer.cpp
           core/util/flag.cpp
           core/runtime/amd_aie_agent.cpp
           core/runtime/amd_aie_aql_queue.cpp
           core/runtime/amd_blit_kernel.cpp
           core/runtime/amd_blit_sdma.cpp
           core/runtime/amd_cpu_agent.cpp
           core/runtime/amd_gpu_agent.cpp
           core/runtime/amd_hsa_loader.cpp
           core/runtime/amd_aql_queue.cpp
           core/runtime/amd_loader_context.cpp
           core/runtime/hsa_ven_amd_loader.cpp
           core/runtime/amd_memory_region.cpp
           core/runtime/amd_filter_device.cpp
           core/runtime/amd_topology.cpp
           core/runtime/default_signal.cpp
           core/runtime/host_queue.cpp
           core/runtime/hsa.cpp
           core/runtime/hsa_api_trace.cpp
           core/runtime/hsa_ext_amd.cpp
           core/runtime/hsa_ext_interface.cpp
           core/runtime/interrupt_signal.cpp
           core/runtime/intercept_queue.cpp
           core/runtime/ipc_signal.cpp
           core/runtime/isa.cpp
           core/runtime/runtime.cpp
           core/runtime/signal.cpp
           core/runtime/queue.cpp
           core/runtime/cache.cpp
           core/runtime/svm_profiler.cpp
           core/runtime/thunk_loader.cpp
           core/common/hsa_table_interface.cpp
           loader/executable.cpp
           libamdhsacode/amd_elf_image.cpp
           libamdhsacode/amd_hsa_code_util.cpp
           libamdhsacode/amd_hsa_locks.cpp
           libamdhsacode/amd_options.cpp
           libamdhsacode/amd_hsa_code.cpp
           libamdhsacode/amd_core_dump.cpp )

if ( BUILD_THUNK_VIRTIO )
  list(APPEND SRCS core/driver/virtio/amd_kfd_virtio_driver.cpp)
  target_compile_definitions(hsa-runtime64 PRIVATE HSAKMT_VIRTIO_ENABLED=1)
endif()

target_sources( ${CORE_RUNTIME_TARGET} PRIVATE ${SRCS} )

## Depend on trap handler target.
add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/core/runtime/trap_handler )
add_dependencies( ${CORE_RUNTIME_TARGET} amd_trap_handler_v2 )

## Depend on blit shader target.
add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/core/runtime/blit_shaders )
add_dependencies( ${CORE_RUNTIME_TARGET} amd_blit_shaders_v2)

option(PC_SAMPLING_SUPPORT "Enable PC Sampling Support" ON)

if (${PC_SAMPLING_SUPPORT})
  target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HSA_PC_SAMPLING_SUPPORT)

  set( PCS_SRCS pcs/hsa_ven_amd_pc_sampling.cpp pcs/pcs_runtime.cpp )

  target_sources( ${CORE_RUNTIME_TARGET} PRIVATE ${PCS_SRCS} )
endif()

if ( NOT DEFINED IMAGE_SUPPORT AND CMAKE_SYSTEM_PROCESSOR MATCHES "i?86|x86_64|amd64|AMD64|loongarch64" )
  set ( IMAGE_SUPPORT ON )
endif()
set ( IMAGE_SUPPORT ${IMAGE_SUPPORT} CACHE BOOL "Build with image support (default: ON for x86, OFF elsewise)." )

## Optional image module defintions.
if(${IMAGE_SUPPORT})
  ## Image definitons - audit!
  target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE
    HSA_IMAGE_SUPPORT
    UNIX_OS
    LINUX
    AMD_INTERNAL_BUILD
    BRAHMA_BUILD=1 )

  set ( IMAGE_SRCS image/addrlib/src/addrinterface.cpp
                   image/addrlib/src/core/coord.cpp
                   image/addrlib/src/core/addrlib.cpp
                   image/addrlib/src/core/addrlib1.cpp
                   image/addrlib/src/core/addrlib2.cpp
                   image/addrlib/src/core/addrlib3.cpp
                   image/addrlib/src/core/addrobject.cpp
                   image/addrlib/src/core/addrelemlib.cpp
                   image/addrlib/src/gfx9/gfx9addrlib.cpp
                   image/addrlib/src/gfx10/gfx10addrlib.cpp
                   image/addrlib/src/gfx11/gfx11addrlib.cpp
                   image/addrlib/src/gfx12/gfx12addrlib.cpp
                   image/device_info.cpp
                   image/hsa_ext_image.cpp
                   image/image_runtime.cpp
                   image/image_manager.cpp
                   image/image_manager_kv.cpp
                   image/image_manager_ai.cpp
                   image/image_manager_nv.cpp
                   image/image_manager_gfx11.cpp
                   image/image_manager_gfx12.cpp
                   image/image_lut_kv.cpp
                   image/image_lut_gfx11.cpp
                   image/blit_object_gfx7xx.cpp
                   image/blit_object_gfx8xx.cpp
                   image/blit_object_gfx9xx.cpp
                   image/blit_kernel.cpp
                   ${CMAKE_CURRENT_BINARY_DIR}/image/blit_src/opencl_blit_objects.cpp )

  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/image/blit_src/opencl_blit_objects.cpp PROPERTIES GENERATED TRUE)

  target_include_directories( ${CORE_RUNTIME_TARGET}
    PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/image
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/inc
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/core
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/r800
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx9
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx10
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx11
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/gfx12
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/r800
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx9
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx10
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx11
    ${CMAKE_CURRENT_SOURCE_DIR}/image/addrlib/src/chip/gfx12 )

  target_sources( ${CORE_RUNTIME_TARGET} PRIVATE ${IMAGE_SRCS} )

  ## Depend on blit kernel target.
  add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/image/blit_src )
  add_dependencies( ${CORE_RUNTIME_TARGET} opencl_blit_objects )

endif()

target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE elf::elf dl pthread rt )
# For static package rocprofiler-register dependency is not required
# Link to hsakmt target for shared library builds
# Link to hsakmt-staticdrm target for static library builds
if( BUILD_SHARED_LIBS )
  target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE hsakmt::hsakmt PkgConfig::drm)
  if( BUILD_THUNK_VIRTIO )
    message(STATUS "Building with virtio support")
    target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE hsakmt_virtio)
  endif()
  find_package(rocprofiler-register)
  if(rocprofiler-register_FOUND)
    target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HSA_ROCPROFILER_REGISTER=1
                                                              HSA_VERSION_MAJOR=${VERSION_MAJOR}
                                                              HSA_VERSION_MINOR=${VERSION_MINOR}
                                                              HSA_VERSION_PATCH=${VERSION_PATCH})
    target_link_libraries(${CORE_RUNTIME_TARGET} PRIVATE rocprofiler-register::rocprofiler-register)
    set(HSA_DEP_ROCPROFILER_REGISTER ON CACHE INTERNAL "")
  else()
    set(HSA_DEP_ROCPROFILER_REGISTER OFF CACHE INTERNAL "")
  endif() # end rocprofiler-register_FOUND
else()
  include_directories(${drm_INCLUDE_DIRS})
  target_link_libraries ( ${CORE_RUNTIME_TARGET} PRIVATE hsakmt-staticdrm::hsakmt-staticdrm)
endif()#end BUILD_SHARED_LIBS

## Set the VERSION and SOVERSION values
set_property ( TARGET ${CORE_RUNTIME_TARGET} PROPERTY VERSION "${SO_VERSION_STRING}" )
set_property ( TARGET ${CORE_RUNTIME_TARGET} PROPERTY SOVERSION "${VERSION_MAJOR}" )

## Add the public interface export target if doing a static build.
## Bind ROCr dependencies to the interface target rather than to the source build
## target so that -Wl,--whole-archive is tightly applied.  Requires binding
## indirectly to the source build taret.
if( NOT ${BUILD_SHARED_LIBS} )
  add_library(${CORE_RUNTIME_NAME} INTERFACE)

  ## Bind to source build target interface but not its link requirements.
  target_include_directories( ${CORE_RUNTIME_NAME} INTERFACE $<TARGET_PROPERTY:${CORE_RUNTIME_NAME}::${CORE_RUNTIME_TARGET},INTERFACE_INCLUDE_DIRECTORIES> )
  target_link_libraries ( ${CORE_RUNTIME_NAME} INTERFACE -Wl,$<INSTALL_PREFIX>/${CMAKE_INSTALL_LIBDIR}/cmake/${CORE_RUNTIME_NAME}/${LNKSCR}
    -Wl,--whole-archive $<TARGET_FILE:${CORE_RUNTIME_NAME}::${CORE_RUNTIME_TARGET}> -Wl,--no-whole-archive)
  add_dependencies( ${CORE_RUNTIME_NAME} ${CORE_RUNTIME_TARGET} )

  ## Add external link requirements.
  target_link_libraries ( ${CORE_RUNTIME_NAME} INTERFACE hsakmt-staticdrm::hsakmt-staticdrm )
  target_link_libraries ( ${CORE_RUNTIME_NAME} INTERFACE elf::elf dl pthread rt )

  install ( TARGETS ${CORE_RUNTIME_NAME} EXPORT ${CORE_RUNTIME_NAME}Targets )
endif()

## Set install information
# Installs binaries and exports the library usage data to ${HSAKMT_TARGET}Targets
install ( TARGETS ${CORE_RUNTIME_TARGET} EXPORT ${CORE_RUNTIME_NAME}Targets
  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary )
install ( TARGETS ${CORE_RUNTIME_TARGET}
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan )

# Install license
if(ENABLE_ASAN_PACKAGING)
   install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan )
endif()
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT binary )

# Install public headers
install ( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/inc/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hsa COMPONENT dev )

## Configure and install package config file
# Record our usage data for clients find_package calls.
install ( EXPORT ${CORE_RUNTIME_NAME}Targets
  FILE ${CORE_RUNTIME_NAME}Targets.cmake
  NAMESPACE ${CORE_RUNTIME_NAME}::
  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CORE_RUNTIME_NAME}
  COMPONENT dev)

# Adds the target alias hsa-runtime64::hsa-runtime64 to the local cmake cache.
# This isn't necessary today.  It's harmless preparation for some
# hypothetical future in which the we might be included by add_subdirectory()
# in some other project's cmake file.  It allows uniform use of find_package
# and target_link_library() without regard to whether a target is external or
# a subdirectory of the current build.
add_library( ${CORE_RUNTIME_NAME}::${CORE_RUNTIME_NAME} ALIAS ${CORE_RUNTIME_NAME} )

# Create cmake configuration files
include(CMakePackageConfigHelpers)

configure_package_config_file(${CORE_RUNTIME_NAME}-config.cmake.in
  ${CORE_RUNTIME_NAME}-config.cmake
  INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CORE_RUNTIME_NAME} )

write_basic_package_version_file(${CORE_RUNTIME_NAME}-config-version.cmake
  VERSION ${SO_VERSION_STRING} COMPATIBILITY AnyNewerVersion )

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${CORE_RUNTIME_NAME}-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/${CORE_RUNTIME_NAME}-config-version.cmake
  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CORE_RUNTIME_NAME}
  COMPONENT dev)

# Install build files needed only when using a static build.
if( NOT ${BUILD_SHARED_LIBS} )
  # libelf find package module
  install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/FindLibElf.cmake ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/COPYING-CMAKE-SCRIPTS
    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CORE_RUNTIME_NAME}
    COMPONENT dev)
  # Linker script (defines function aliases)
  install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${LNKSCR}
    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CORE_RUNTIME_NAME}
    COMPONENT dev)
endif()


================================================
FILE: runtime/hsa-runtime/LICENSE.md
================================================
ROCR-Runtime LICENSE

The University of Illinois/NCSA
Open Source License (NCSA)

Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.

Developed by:

                AMD Research and AMD HSA Software Development

                Advanced Micro Devices, Inc.

                www.amd.com

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal with the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

 - Redistributions of source code must retain the above copyright notice,
   this list of conditions and the following disclaimers.
 - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimers in
   the documentation and/or other materials provided with the distribution.
 - Neither the names of Advanced Micro Devices, Inc,
   nor the names of its contributors may be used to endorse or promote
   products derived from this Software without specific prior written
   permission.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS WITH THE SOFTWARE.


================================================
FILE: runtime/hsa-runtime/cmake_modules/COPYING-CMAKE-SCRIPTS
================================================
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products 
   derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: runtime/hsa-runtime/cmake_modules/FindLibElf.cmake
================================================
# - Try to find libelf
# Once done this will define
#
#  LIBELF_FOUND - system has libelf
#  LIBELF_INCLUDE_DIRS - the libelf include directory
#  LIBELF_LIBRARIES - Link these to use libelf
#  LIBELF_DEFINITIONS - Compiler switches required for using libelf
#
#  Copyright (c) 2008 Bernhard Walle <bernhard.walle@gmx.de>
#
#  Redistribution and use is allowed according to the terms of the New
#  BSD license.
#  For details see the accompanying COPYING-CMAKE-SCRIPTS file.
#

if (LIBELF_FOUND)
  return()
endif (LIBELF_FOUND)

find_path (LIBELF_INCLUDE_DIRS
  NAMES
    libelf.h
  PATHS
    /usr/include
    /usr/include/libelf
    /usr/local/include
    /usr/local/include/libelf
    /opt/local/include
    /opt/local/include/libelf
    ENV CPATH)

find_library (LIBELF_LIBRARIES
  NAMES
    elf
  PATHS
    /usr/lib
    /usr/lib64
    /usr/local/lib
    /usr/local/lib64
    /opt/local/lib
    /opt/local/lib64
    ENV LIBRARY_PATH
    ENV LD_LIBRARY_PATH)

include (FindPackageHandleStandardArgs)


# handle the QUIETLY and REQUIRED arguments and set LIBELF_FOUND to TRUE if all listed variables are TRUE
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibElf DEFAULT_MSG
  LIBELF_LIBRARIES
  LIBELF_INCLUDE_DIRS)

SET(CMAKE_REQUIRED_LIBRARIES elf)
if (CMAKE_CXX_COMPILER_LOADED)
  INCLUDE(CheckCXXSourceCompiles)
  CHECK_CXX_SOURCE_COMPILES("#include <libelf.h>
  int main() {
    Elf *e = (Elf*)0;
    size_t sz;
    elf_getshdrstrndx(e, &sz);
    return 0;
  }" ELF_GETSHDRSTRNDX)
else()
set ( ELF_GETSHDRSTRNDX "TRUE" )
endif(CMAKE_CXX_COMPILER_LOADED)

mark_as_advanced(LIBELF_INCLUDE_DIRS LIBELF_LIBRARIES ELF_GETSHDRSTRNDX)

if(LIBELF_FOUND)
  add_library(elf::elf UNKNOWN IMPORTED)
  set_property(TARGET elf::elf PROPERTY IMPORTED_LOCATION ${LIBELF_LIBRARIES})
  set_property(TARGET elf::elf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${LIBELF_INCLUDE_DIRS})
endif()


================================================
FILE: runtime/hsa-runtime/cmake_modules/hsa_common.cmake
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

#
# HSA Build compiler definitions common between components.
#

set(IS64BIT 0)
set(ONLY64STR "32")
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
  set(IS64BIT 1)
  set(ONLY64STR "64")
endif()

set(HSA_COMMON_CXX_FLAGS "-Wall")
set(HSA_COMMON_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} "-fPIC")
if (CMAKE_COMPILER_IS_GNUCXX)
  set(HSA_COMMON_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} "-Wl,--unresolved-symbols=ignore-in-shared-libs")
endif ()
set(HSA_COMMON_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} "-fno-strict-aliasing")
if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
  set( HSA_COMMON_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} "-m64"  "-msse" "-msse2")
elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" )
  set ( HSA_COMMON_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} "-m32")
endif ()
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug )
  set ( HSA_COMMON_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} "-O0" "-ggdb")
endif ()
set( HSA_COMMON_DEFS "__STDC_LIMIT_MACROS")
set( HSA_COMMON_DEFS ${HSA_COMMON_DEFS} "__STDC_CONSTANT_MACROS")
set( HSA_COMMON_DEFS ${HSA_COMMON_DEFS} "__STDC_FORMAT_MACROS")
set( HSA_COMMON_DEFS ${HSA_COMMON_DEFS} "LITTLEENDIAN_CPU=1")


================================================
FILE: runtime/hsa-runtime/cmake_modules/utils.cmake
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

function( get_path LIB CACHED_PATH HELP )

    set( options "")
    set( oneValueArgs RESULT )
    set( multiValueArgs HINTS NAMES )
    cmake_parse_arguments(ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )

    # Search for canary file.
    if( ${LIB} )
        find_library( FULLPATH NAMES ${ARGS_NAMES} HINTS ${${CACHED_PATH}} ${ARGS_HINTS} )
    else()
        find_file( FULLPATH NAMES ${ARGS_NAMES} HINTS ${${CACHED_PATH}} ${ARGS_HINTS} )
    endif()
    set( RESULT (NOT ${FULLPATH} MATCHES NOTFOUND) )
    
    # Extract path
    get_filename_component ( DIRPATH ${FULLPATH} DIRECTORY )
    
    # Check path against cache
    if( NOT "${${CACHED_PATH}}" STREQUAL "" )
        if ( NOT "${${CACHED_PATH}}" STREQUAL "${DIRPATH}" )
            message(WARNING "${CACHED_PATH} may be incorrect." )
            set( DIRPATH ${${CACHED_PATH}} )
        endif()
    elseif(NOT ${RESULT})
        message(WARNING "${CACHED_PATH} not located during path search.")
    endif()

    # Set cache variable and help text
    set( ${CACHED_PATH} ${DIRPATH} CACHE PATH ${HELP} FORCE )
    unset( FULLPATH CACHE )

    # Return success flag
    if( NOT ${ARGS_RESULT} STREQUAL "" )
        set( ${ARGS_RESULT} ${RESULT} PARENT_SCOPE)
    endif()

endfunction()

## Searches for a file using include paths and stores the path to that file in the cache
## using the cached value if set.  Search paths are optional.  Returns success in RESULT.
## get_include_path(<VAR> NAMES name1 [name2...] [HINTS path1 [path2 ... ENV var]] [RESULT <var>]
macro( get_include_path CACHED_PATH HELP )
    get_path( 0 ${ARGV} )
endmacro()

## Searches for a file using library paths and stores the path to that file in the cache
## using the cached value if set.  Search paths are optional.  Returns success in RESULT.
## get_library_path(<VAR> NAMES name1 [name2...] [HINTS path1 [path2 ... ENV var]] [RESULT <var>]
macro( get_library_path CACHED_PATH HELP )
    get_path( 1 ${ARGV} )
endmacro()

## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function( parse_version VERSION_STRING )

    string ( FIND ${VERSION_STRING} "-" STRING_INDEX )

    if ( ${STRING_INDEX} GREATER -1 )
        math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
        string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
    endif ()

    string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
    list ( LENGTH VERSIONS VERSION_COUNT )

    if ( ${VERSION_COUNT} GREATER 0)
        list ( GET VERSIONS 0 MAJOR )
        set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
    endif ()

    if ( ${VERSION_COUNT} GREATER 1 )
        list ( GET VERSIONS 1 MINOR )
        set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
    endif ()

    if ( ${VERSION_COUNT} GREATER 2 )
        list ( GET VERSIONS 2 PATCH )
        set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
    endif ()

endfunction ()

## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function ( get_version DEFAULT_VERSION_STRING )

    set( VERSION_JOB "local-build" )
    set( VERSION_COMMIT_COUNT 0 )
    set( VERSION_HASH "unknown" )

    find_program( GIT NAMES git )

    if( GIT )

        #execute_process ( COMMAND git describe --tags --dirty --long
        #                  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
        #                  OUTPUT_VARIABLE GIT_TAG_STRING
        #                  OUTPUT_STRIP_TRAILING_WHITESPACE
        #                  RESULT_VARIABLE RESULT )

        # Get branch commit (common ancestor) of current branch and master branch.
        execute_process(COMMAND git merge-base HEAD origin/HEAD
                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                        OUTPUT_VARIABLE GIT_MERGE_BASE
                        OUTPUT_STRIP_TRAILING_WHITESPACE
                        RESULT_VARIABLE RESULT )

        if( ${RESULT} EQUAL 0 )
            # Count commits from branch point.
            execute_process(COMMAND git rev-list --count ${GIT_MERGE_BASE}..HEAD
                            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                            OUTPUT_VARIABLE VERSION_COMMIT_COUNT
                            OUTPUT_STRIP_TRAILING_WHITESPACE
                            RESULT_VARIABLE RESULT )
            if(NOT ${RESULT} EQUAL 0 )
                set( VERSION_COMMIT_COUNT 0 )
            endif()
        endif()

        # Get current short hash.
        execute_process(COMMAND git rev-parse --short HEAD
                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                        OUTPUT_VARIABLE VERSION_HASH
                        OUTPUT_STRIP_TRAILING_WHITESPACE
                        RESULT_VARIABLE RESULT )
        if( ${RESULT} EQUAL 0 )
            # Check for dirty workspace.
            execute_process(COMMAND git diff --quiet
                            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                            RESULT_VARIABLE RESULT )
            if(${RESULT} EQUAL 1)
                set(VERSION_HASH "${VERSION_HASH}-dirty")
            endif()
        else()
            set( VERSION_HASH "unknown" )
        endif()
    endif()

    # Build automation IDs
    if(DEFINED ENV{ROCM_BUILD_ID})
        set( VERSION_JOB $ENV{ROCM_BUILD_ID} )
    endif()

    parse_version(${DEFAULT_VERSION_STRING})

    set( VERSION_MAJOR  "${VERSION_MAJOR}" PARENT_SCOPE )
    set( VERSION_MINOR  "${VERSION_MINOR}" PARENT_SCOPE )
    set( VERSION_PATCH  "${VERSION_PATCH}" PARENT_SCOPE )
    set( VERSION_COMMIT_COUNT "${VERSION_COMMIT_COUNT}" PARENT_SCOPE )
    set( VERSION_HASH "${VERSION_HASH}" PARENT_SCOPE )
    set( VERSION_JOB "${VERSION_JOB}" PARENT_SCOPE )

    #message("${VERSION_MAJOR}" )
    #message("${VERSION_MINOR}" )
    #message("${VERSION_PATCH}" )
    #message("${VERSION_COMMIT_COUNT}")
    #message("${VERSION_HASH}")
    #message("${VERSION_JOB}")

endfunction()


================================================
FILE: runtime/hsa-runtime/core/common/hsa_table_interface.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "inc/hsa_api_trace.h"
#include "core/inc/hsa_api_trace_int.h"

static const HsaApiTable* hsaApiTable;
static const CoreApiTable* coreApiTable;
static const AmdExtTable* amdExtTable;
static const ToolsApiTable* toolsApiTable;

void hsa_table_interface_init(const HsaApiTable* apiTable) {
    hsaApiTable = apiTable;
    coreApiTable = apiTable->core_;
    amdExtTable = apiTable->amd_ext_;
    toolsApiTable = apiTable->tools_;
}

const HsaApiTable* hsa_table_interface_get_table() {
  return hsaApiTable;
}

// Pass through stub functions
hsa_status_t HSA_API hsa_init() {
  // We initialize the api tables here once more since the code above is prone to a
  // link-time ordering condition: This compilation unit here may get its global
  // variables initialized earlier than the global objects in other compilation units.
  // In particular Init::Init may get called earlier than that the underlying hsa_api_table_
  // object in hsa_api_trace.cpp has been initialized.
  rocr::core::LoadInitialHsaApiTable();
  return coreApiTable->hsa_init_fn();
}

hsa_status_t HSA_API hsa_shut_down() { return coreApiTable->hsa_shut_down_fn(); }

hsa_status_t HSA_API
    hsa_system_get_info(hsa_system_info_t attribute, void* value) {
  return coreApiTable->hsa_system_get_info_fn(attribute, value);
}

hsa_status_t HSA_API hsa_extension_get_name(uint16_t extension, const char** name) {
  return coreApiTable->hsa_extension_get_name_fn(extension, name);
}

hsa_status_t HSA_API
    hsa_system_extension_supported(uint16_t extension, uint16_t version_major,
                                   uint16_t version_minor, bool* result) {
  return coreApiTable->hsa_system_extension_supported_fn(
      extension, version_major, version_minor, result);
}

hsa_status_t HSA_API hsa_system_major_extension_supported(uint16_t extension,
                                                          uint16_t version_major,
                                                          uint16_t* version_minor, bool* result) {
  return coreApiTable->hsa_system_major_extension_supported_fn(extension, version_major,
                                                               version_minor, result);
}

hsa_status_t HSA_API hsa_system_get_extension_table(uint16_t extension, uint16_t version_major,
                                                    uint16_t version_minor, void* table) {
  return coreApiTable->hsa_system_get_extension_table_fn(
      extension, version_major, version_minor, table);
}

hsa_status_t HSA_API hsa_system_get_major_extension_table(uint16_t extension,
                                                          uint16_t version_major,
                                                          size_t table_length, void* table) {
  return coreApiTable->hsa_system_get_major_extension_table_fn(extension, version_major,
                                                               table_length, table);
}

hsa_status_t HSA_API
    hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void* data),
                       void* data) {
  return coreApiTable->hsa_iterate_agents_fn(callback, data);
}

hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent,
                                        hsa_agent_info_t attribute,
                                        void* value) {
  return coreApiTable->hsa_agent_get_info_fn(agent, attribute, value);
}

hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent,
                                                      hsa_profile_t profile,
                                                      uint16_t* mask) {
  return coreApiTable->hsa_agent_get_exception_policies_fn(agent, profile, mask);
}

hsa_status_t HSA_API hsa_cache_get_info(hsa_cache_t cache, hsa_cache_info_t attribute,
                                        void* value) {
  return coreApiTable->hsa_cache_get_info_fn(cache, attribute, value);
}

hsa_status_t HSA_API hsa_agent_iterate_caches(
    hsa_agent_t agent, hsa_status_t (*callback)(hsa_cache_t cache, void* data), void* value) {
  return coreApiTable->hsa_agent_iterate_caches_fn(agent, callback, value);
}

hsa_status_t HSA_API
    hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent,
                                  uint16_t version_major,
                                  uint16_t version_minor, bool* result) {
  return coreApiTable->hsa_agent_extension_supported_fn(
      extension, agent, version_major, version_minor, result);
}

hsa_status_t HSA_API hsa_agent_major_extension_supported(uint16_t extension, hsa_agent_t agent,
                                                         uint16_t version_major,
                                                         uint16_t* version_minor, bool* result) {
  return coreApiTable->hsa_agent_major_extension_supported_fn(extension, agent, version_major,
                                                              version_minor, result);
}

hsa_status_t HSA_API
    hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
                     void (*callback)(hsa_status_t status, hsa_queue_t* source,
                                      void* data),
                     void* data, uint32_t private_segment_size,
                     uint32_t group_segment_size, hsa_queue_t** queue) {
  return coreApiTable->hsa_queue_create_fn(agent, size, type, callback, data,
                                          private_segment_size,
                                          group_segment_size, queue);
}

hsa_status_t HSA_API
    hsa_soft_queue_create(hsa_region_t region, uint32_t size,
                          hsa_queue_type32_t type, uint32_t features,
                          hsa_signal_t completion_signal, hsa_queue_t** queue) {
  return coreApiTable->hsa_soft_queue_create_fn(region, size, type, features,
                                               completion_signal, queue);
}

hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t* queue) {
  return coreApiTable->hsa_queue_destroy_fn(queue);
}

hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t* queue) {
  return coreApiTable->hsa_queue_inactivate_fn(queue);
}

uint64_t HSA_API hsa_queue_load_read_index_scacquire(const hsa_queue_t* queue) {
  return coreApiTable->hsa_queue_load_read_index_scacquire_fn(queue);
}

uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t* queue) {
  return coreApiTable->hsa_queue_load_read_index_relaxed_fn(queue);
}

uint64_t HSA_API hsa_queue_load_write_index_scacquire(const hsa_queue_t* queue) {
  return coreApiTable->hsa_queue_load_write_index_scacquire_fn(queue);
}

uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t* queue) {
  return coreApiTable->hsa_queue_load_write_index_relaxed_fn(queue);
}

void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t* queue,
                                                 uint64_t value) {
  return coreApiTable->hsa_queue_store_write_index_relaxed_fn(queue, value);
}

void HSA_API hsa_queue_store_write_index_screlease(const hsa_queue_t* queue, uint64_t value) {
  return coreApiTable->hsa_queue_store_write_index_screlease_fn(queue, value);
}

uint64_t HSA_API hsa_queue_cas_write_index_scacq_screl(const hsa_queue_t* queue, uint64_t expected,
                                                       uint64_t value) {
  return coreApiTable->hsa_queue_cas_write_index_scacq_screl_fn(queue, expected, value);
}

uint64_t HSA_API hsa_queue_cas_write_index_scacquire(const hsa_queue_t* queue, uint64_t expected,
                                                     uint64_t value) {
  return coreApiTable->hsa_queue_cas_write_index_scacquire_fn(queue, expected, value);
}

uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t* queue,
                                                   uint64_t expected,
                                                   uint64_t value) {
  return coreApiTable->hsa_queue_cas_write_index_relaxed_fn(queue, expected,
                                                           value);
}

uint64_t HSA_API hsa_queue_cas_write_index_screlease(const hsa_queue_t* queue, uint64_t expected,
                                                     uint64_t value) {
  return coreApiTable->hsa_queue_cas_write_index_screlease_fn(queue, expected, value);
}

uint64_t HSA_API hsa_queue_add_write_index_scacq_screl(const hsa_queue_t* queue, uint64_t value) {
  return coreApiTable->hsa_queue_add_write_index_scacq_screl_fn(queue, value);
}

uint64_t HSA_API hsa_queue_add_write_index_scacquire(const hsa_queue_t* queue, uint64_t value) {
  return coreApiTable->hsa_queue_add_write_index_scacquire_fn(queue, value);
}

uint64_t HSA_API hsa_queue_add_write_index_relaxed(const hsa_queue_t* queue,
                                                   uint64_t value) {
  return coreApiTable->hsa_queue_add_write_index_relaxed_fn(queue, value);
}

uint64_t HSA_API hsa_queue_add_write_index_screlease(const hsa_queue_t* queue, uint64_t value) {
  return coreApiTable->hsa_queue_add_write_index_screlease_fn(queue, value);
}

void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t* queue,
                                                uint64_t value) {
  return coreApiTable->hsa_queue_store_read_index_relaxed_fn(queue, value);
}

void HSA_API hsa_queue_store_read_index_screlease(const hsa_queue_t* queue, uint64_t value) {
  return coreApiTable->hsa_queue_store_read_index_screlease_fn(queue, value);
}

hsa_status_t HSA_API hsa_agent_iterate_regions(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_region_t region, void* data), void* data) {
  return coreApiTable->hsa_agent_iterate_regions_fn(agent, callback, data);
}

hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region,
                                         hsa_region_info_t attribute,
                                         void* value) {
  return coreApiTable->hsa_region_get_info_fn(region, attribute, value);
}

hsa_status_t HSA_API hsa_memory_register(void* address, size_t size) {
  return coreApiTable->hsa_memory_register_fn(address, size);
}

hsa_status_t HSA_API hsa_memory_deregister(void* address, size_t size) {
  return coreApiTable->hsa_memory_deregister_fn(address, size);
}

hsa_status_t HSA_API
    hsa_memory_allocate(hsa_region_t region, size_t size, void** ptr) {
  return coreApiTable->hsa_memory_allocate_fn(region, size, ptr);
}

hsa_status_t HSA_API hsa_memory_free(void* ptr) {
  return coreApiTable->hsa_memory_free_fn(ptr);
}

hsa_status_t HSA_API hsa_memory_copy(void* dst, const void* src, size_t size) {
  return coreApiTable->hsa_memory_copy_fn(dst, src, size);
}

hsa_status_t HSA_API hsa_memory_assign_agent(void* ptr, hsa_agent_t agent,
                                             hsa_access_permission_t access) {
  return coreApiTable->hsa_memory_assign_agent_fn(ptr, agent, access);
}

hsa_status_t HSA_API
    hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
                      const hsa_agent_t* consumers, hsa_signal_t* signal) {
  return coreApiTable->hsa_signal_create_fn(initial_value, num_consumers,
                                           consumers, signal);
}

hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal) {
  return coreApiTable->hsa_signal_destroy_fn(signal);
}

hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal) {
  return coreApiTable->hsa_signal_load_relaxed_fn(signal);
}

hsa_signal_value_t HSA_API hsa_signal_load_scacquire(hsa_signal_t signal) {
  return coreApiTable->hsa_signal_load_scacquire_fn(signal);
}

void HSA_API
    hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_store_relaxed_fn(signal, value);
}

void HSA_API hsa_signal_store_screlease(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_store_screlease_fn(signal, value);
}

void HSA_API hsa_signal_silent_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_silent_store_relaxed_fn(signal, value);
}

void HSA_API hsa_signal_silent_store_screlease(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_silent_store_screlease_fn(signal, value);
}

hsa_signal_value_t HSA_API
    hsa_signal_wait_relaxed(hsa_signal_t signal,
                            hsa_signal_condition_t condition,
                            hsa_signal_value_t compare_value,
                            uint64_t timeout_hint,
                            hsa_wait_state_t wait_expectancy_hint) {
  return coreApiTable->hsa_signal_wait_relaxed_fn(
      signal, condition, compare_value, timeout_hint, wait_expectancy_hint);
}

hsa_signal_value_t HSA_API hsa_signal_wait_scacquire(hsa_signal_t signal,
                                                     hsa_signal_condition_t condition,
                                                     hsa_signal_value_t compare_value,
                                                     uint64_t timeout_hint,
                                                     hsa_wait_state_t wait_expectancy_hint) {
  return coreApiTable->hsa_signal_wait_scacquire_fn(signal, condition, compare_value, timeout_hint,
                                                    wait_expectancy_hint);
}

hsa_status_t HSA_API hsa_signal_group_create(uint32_t num_signals, const hsa_signal_t* signals,
                                             uint32_t num_consumers, const hsa_agent_t* consumers,
                                             hsa_signal_group_t* signal_group) {
  return coreApiTable->hsa_signal_group_create_fn(num_signals, signals, num_consumers, consumers,
                                                  signal_group);
}

hsa_status_t HSA_API hsa_signal_group_destroy(hsa_signal_group_t signal_group) {
  return coreApiTable->hsa_signal_group_destroy_fn(signal_group);
}

hsa_status_t HSA_API hsa_signal_group_wait_any_relaxed(hsa_signal_group_t signal_group,
                                                       const hsa_signal_condition_t* conditions,
                                                       const hsa_signal_value_t* compare_values,
                                                       hsa_wait_state_t wait_state_hint,
                                                       hsa_signal_t* signal,
                                                       hsa_signal_value_t* value) {
  return coreApiTable->hsa_signal_group_wait_any_relaxed_fn(
      signal_group, conditions, compare_values, wait_state_hint, signal, value);
}

hsa_status_t HSA_API hsa_signal_group_wait_any_scacquire(hsa_signal_group_t signal_group,
                                                         const hsa_signal_condition_t* conditions,
                                                         const hsa_signal_value_t* compare_values,
                                                         hsa_wait_state_t wait_state_hint,
                                                         hsa_signal_t* signal,
                                                         hsa_signal_value_t* value) {
  return coreApiTable->hsa_signal_group_wait_any_scacquire_fn(
      signal_group, conditions, compare_values, wait_state_hint, signal, value);
}

void HSA_API
    hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_and_relaxed_fn(signal, value);
}

void HSA_API hsa_signal_and_scacquire(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_and_scacquire_fn(signal, value);
}

void HSA_API hsa_signal_and_screlease(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_and_screlease_fn(signal, value);
}

void HSA_API hsa_signal_and_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_and_scacq_screl_fn(signal, value);
}

void HSA_API
    hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_or_relaxed_fn(signal, value);
}

void HSA_API hsa_signal_or_scacquire(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_or_scacquire_fn(signal, value);
}

void HSA_API hsa_signal_or_screlease(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_or_screlease_fn(signal, value);
}

void HSA_API hsa_signal_or_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_or_scacq_screl_fn(signal, value);
}

void HSA_API
    hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_xor_relaxed_fn(signal, value);
}

void HSA_API hsa_signal_xor_scacquire(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_xor_scacquire_fn(signal, value);
}

void HSA_API hsa_signal_xor_screlease(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_xor_screlease_fn(signal, value);
}

void HSA_API hsa_signal_xor_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_xor_scacq_screl_fn(signal, value);
}

void HSA_API
    hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_add_relaxed_fn(signal, value);
}

void HSA_API hsa_signal_add_scacquire(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_add_scacquire_fn(signal, value);
}

void HSA_API hsa_signal_add_screlease(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_add_screlease_fn(signal, value);
}

void HSA_API hsa_signal_add_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_add_scacq_screl_fn(signal, value);
}

void HSA_API
    hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_subtract_relaxed_fn(signal, value);
}

void HSA_API hsa_signal_subtract_scacquire(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_subtract_scacquire_fn(signal, value);
}

void HSA_API hsa_signal_subtract_screlease(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_subtract_screlease_fn(signal, value);
}

void HSA_API hsa_signal_subtract_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_subtract_scacq_screl_fn(signal, value);
}

hsa_signal_value_t HSA_API
    hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_exchange_relaxed_fn(signal, value);
}

hsa_signal_value_t HSA_API hsa_signal_exchange_scacquire(hsa_signal_t signal,
                                                         hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_exchange_scacquire_fn(signal, value);
}

hsa_signal_value_t HSA_API hsa_signal_exchange_screlease(hsa_signal_t signal,
                                                         hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_exchange_screlease_fn(signal, value);
}

hsa_signal_value_t HSA_API hsa_signal_exchange_scacq_screl(hsa_signal_t signal,
                                                           hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_exchange_scacq_screl_fn(signal, value);
}

hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal,
                                                  hsa_signal_value_t expected,
                                                  hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_cas_relaxed_fn(signal, expected, value);
}

hsa_signal_value_t HSA_API hsa_signal_cas_scacquire(hsa_signal_t signal,
                                                    hsa_signal_value_t expected,
                                                    hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_cas_scacquire_fn(signal, expected, value);
}

hsa_signal_value_t HSA_API hsa_signal_cas_screlease(hsa_signal_t signal,
                                                    hsa_signal_value_t expected,
                                                    hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_cas_screlease_fn(signal, expected, value);
}

hsa_signal_value_t HSA_API hsa_signal_cas_scacq_screl(hsa_signal_t signal,
                                                      hsa_signal_value_t expected,
                                                      hsa_signal_value_t value) {
  return coreApiTable->hsa_signal_cas_scacq_screl_fn(signal, expected, value);
}

//===--- Instruction Set Architecture -------------------------------------===//

hsa_status_t HSA_API hsa_isa_from_name(
    const char *name,
    hsa_isa_t *isa) {
  return coreApiTable->hsa_isa_from_name_fn(name, isa);
}

hsa_status_t HSA_API hsa_agent_iterate_isas(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_isa_t isa,
                             void *data),
    void *data) {
  return coreApiTable->hsa_agent_iterate_isas_fn(agent, callback, data);
}

/* deprecated */ hsa_status_t HSA_API hsa_isa_get_info(
    hsa_isa_t isa,
    hsa_isa_info_t attribute,
    uint32_t index,
    void *value) {
  return coreApiTable->hsa_isa_get_info_fn(isa, attribute, index, value);
}

hsa_status_t HSA_API hsa_isa_get_info_alt(
    hsa_isa_t isa,
    hsa_isa_info_t attribute,
    void *value) {
  return coreApiTable->hsa_isa_get_info_alt_fn(isa, attribute, value);
}

hsa_status_t HSA_API hsa_isa_get_exception_policies(
    hsa_isa_t isa,
    hsa_profile_t profile,
    uint16_t *mask) {
  return coreApiTable->hsa_isa_get_exception_policies_fn(isa, profile, mask);
}

hsa_status_t HSA_API hsa_isa_get_round_method(
    hsa_isa_t isa,
    hsa_fp_type_t fp_type,
    hsa_flush_mode_t flush_mode,
    hsa_round_method_t *round_method) {
  return coreApiTable->hsa_isa_get_round_method_fn(
      isa, fp_type, flush_mode, round_method);
}

hsa_status_t HSA_API hsa_wavefront_get_info(
    hsa_wavefront_t wavefront,
    hsa_wavefront_info_t attribute,
    void *value) {
  return coreApiTable->hsa_wavefront_get_info_fn(wavefront, attribute, value);
}

hsa_status_t HSA_API hsa_isa_iterate_wavefronts(
    hsa_isa_t isa,
    hsa_status_t (*callback)(hsa_wavefront_t wavefront,
                             void *data),
    void *data) {
  return coreApiTable->hsa_isa_iterate_wavefronts_fn(isa, callback, data);
}

/* deprecated */ hsa_status_t HSA_API hsa_isa_compatible(
    hsa_isa_t code_object_isa,
    hsa_isa_t agent_isa,
    bool *result) {
  return coreApiTable->hsa_isa_compatible_fn(
      code_object_isa, agent_isa, result);
}

//===--- Code Objects (deprecated) ----------------------------------------===//

/* deprecated */ hsa_status_t HSA_API hsa_code_object_serialize(
    hsa_code_object_t code_object,
    hsa_status_t (*alloc_callback)(size_t size,
                                   hsa_callback_data_t data,
                                   void **address),
    hsa_callback_data_t callback_data,
    const char *options,
    void **serialized_code_object,
    size_t *serialized_code_object_size) {
  return coreApiTable->hsa_code_object_serialize_fn(
      code_object, alloc_callback, callback_data, options,
      serialized_code_object, serialized_code_object_size);
}

/* deprecated */ hsa_status_t HSA_API hsa_code_object_deserialize(
    void *serialized_code_object,
    size_t serialized_code_object_size,
    const char *options,
    hsa_code_object_t *code_object) {
  return coreApiTable->hsa_code_object_deserialize_fn(
      serialized_code_object, serialized_code_object_size, options,
      code_object);
}

/* deprecated */ hsa_status_t HSA_API hsa_code_object_destroy(
    hsa_code_object_t code_object) {
  return coreApiTable->hsa_code_object_destroy_fn(code_object);
}

/* deprecated */ hsa_status_t HSA_API hsa_code_object_get_info(
    hsa_code_object_t code_object,
    hsa_code_object_info_t attribute,
    void *value) {
  return coreApiTable->hsa_code_object_get_info_fn(
      code_object, attribute, value);
}

/* deprecated */ hsa_status_t HSA_API hsa_code_object_get_symbol(
    hsa_code_object_t code_object,
    const char *symbol_name,
    hsa_code_symbol_t *symbol) {
  return coreApiTable->hsa_code_object_get_symbol_fn(
      code_object, symbol_name, symbol);
}

/* deprecated */ hsa_status_t HSA_API hsa_code_object_get_symbol_from_name(
    hsa_code_object_t code_object,
    const char *module_name,
    const char *symbol_name,
    hsa_code_symbol_t *symbol) {
  return coreApiTable->hsa_code_object_get_symbol_from_name_fn(
      code_object, module_name, symbol_name, symbol);
}

/* deprecated */ hsa_status_t HSA_API hsa_code_symbol_get_info(
    hsa_code_symbol_t code_symbol,
    hsa_code_symbol_info_t attribute,
    void *value) {
  return coreApiTable->hsa_code_symbol_get_info_fn(
      code_symbol, attribute, value);
}

/* deprecated */ hsa_status_t HSA_API hsa_code_object_iterate_symbols(
    hsa_code_object_t code_object,
    hsa_status_t (*callback)(hsa_code_object_t code_object,
                             hsa_code_symbol_t symbol,
                             void *data),
    void *data) {
  return coreApiTable->hsa_code_object_iterate_symbols_fn(
      code_object, callback, data);
}

//===--- Executable -------------------------------------------------------===//

hsa_status_t HSA_API hsa_code_object_reader_create_from_file(
    hsa_file_t file,
    hsa_code_object_reader_t *code_object_reader) {
  return coreApiTable->hsa_code_object_reader_create_from_file_fn(
      file, code_object_reader);
}

hsa_status_t HSA_API hsa_code_object_reader_create_from_memory(
    const void *code_object,
    size_t size,
    hsa_code_object_reader_t *code_object_reader) {
  return coreApiTable->hsa_code_object_reader_create_from_memory_fn(
      code_object, size, code_object_reader);
}

hsa_status_t HSA_API hsa_code_object_reader_destroy(
    hsa_code_object_reader_t code_object_reader) {
  return coreApiTable->hsa_code_object_reader_destroy_fn(code_object_reader);
}

/* deprecated */ hsa_status_t HSA_API hsa_executable_create(
    hsa_profile_t profile,
    hsa_executable_state_t executable_state,
    const char *options,
    hsa_executable_t *executable) {
  return coreApiTable->hsa_executable_create_fn(
      profile, executable_state, options, executable);
}

hsa_status_t HSA_API hsa_executable_create_alt(
    hsa_profile_t profile,
    hsa_default_float_rounding_mode_t default_float_rounding_mode,
    const char *options,
    hsa_executable_t *executable) {
  return coreApiTable->hsa_executable_create_alt_fn(
      profile, default_float_rounding_mode, options, executable);
}

hsa_status_t HSA_API hsa_executable_destroy(
    hsa_executable_t executable) {
  return coreApiTable->hsa_executable_destroy_fn(executable);
}

/* deprecated */ hsa_status_t HSA_API hsa_executable_load_code_object(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_code_object_t code_object,
    const char *options) {
  return coreApiTable->hsa_executable_load_code_object_fn(
      executable, agent, code_object, options);
}

hsa_status_t HSA_API hsa_executable_load_program_code_object(
    hsa_executable_t executable,
    hsa_code_object_reader_t code_object_reader,
    const char *options,
    hsa_loaded_code_object_t *loaded_code_object) {
  return coreApiTable->hsa_executable_load_program_code_object_fn(
      executable, code_object_reader, options, loaded_code_object);
}

hsa_status_t HSA_API hsa_executable_load_agent_code_object(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_code_object_reader_t code_object_reader,
    const char *options,
    hsa_loaded_code_object_t *loaded_code_object) {
  return coreApiTable->hsa_executable_load_agent_code_object_fn(
      executable, agent, code_object_reader, options, loaded_code_object);
}

hsa_status_t HSA_API hsa_executable_freeze(
    hsa_executable_t executable,
    const char *options) {
  return coreApiTable->hsa_executable_freeze_fn(executable, options);
}

hsa_status_t HSA_API hsa_executable_get_info(
    hsa_executable_t executable,
    hsa_executable_info_t attribute,
    void *value) {
  return coreApiTable->hsa_executable_get_info_fn(executable, attribute, value);
}

hsa_status_t HSA_API hsa_executable_global_variable_define(
    hsa_executable_t executable,
    const char *variable_name,
    void *address) {
  return coreApiTable->hsa_executable_global_variable_define_fn(
      executable, variable_name, address);
}

hsa_status_t HSA_API hsa_executable_agent_global_variable_define(
    hsa_executable_t executable,
    hsa_agent_t agent,
    const char *variable_name,
    void *address) {
  return coreApiTable->hsa_executable_agent_global_variable_define_fn(
      executable, agent, variable_name, address);
}

hsa_status_t HSA_API hsa_executable_readonly_variable_define(
    hsa_executable_t executable,
    hsa_agent_t agent,
    const char *variable_name,
    void *address) {
  return coreApiTable->hsa_executable_readonly_variable_define_fn(
      executable, agent, variable_name, address);
}

hsa_status_t HSA_API hsa_executable_validate(
    hsa_executable_t executable,
    uint32_t *result) {
  return coreApiTable->hsa_executable_validate_fn(executable, result);
}

hsa_status_t HSA_API hsa_executable_validate_alt(
    hsa_executable_t executable,
    const char *options,
    uint32_t *result) {
  return coreApiTable->hsa_executable_validate_alt_fn(
      executable, options, result);
}

/* deprecated */ hsa_status_t HSA_API hsa_executable_get_symbol(
    hsa_executable_t executable,
    const char *module_name,
    const char *symbol_name,
    hsa_agent_t agent,
    int32_t call_convention,
    hsa_executable_symbol_t *symbol) {
  return coreApiTable->hsa_executable_get_symbol_fn(
      executable, module_name, symbol_name, agent, call_convention, symbol);
}

hsa_status_t HSA_API hsa_executable_get_symbol_by_name(
    hsa_executable_t executable,
    const char *symbol_name,
    const hsa_agent_t *agent,
    hsa_executable_symbol_t *symbol) {
  return coreApiTable->hsa_executable_get_symbol_by_name_fn(
      executable, symbol_name, agent, symbol);
}

hsa_status_t HSA_API hsa_executable_symbol_get_info(
    hsa_executable_symbol_t executable_symbol,
    hsa_executable_symbol_info_t attribute,
    void *value) {
  return coreApiTable->hsa_executable_symbol_get_info_fn(
      executable_symbol, attribute, value);
}

/* deprecated */ hsa_status_t HSA_API hsa_executable_iterate_symbols(
    hsa_executable_t executable,
    hsa_status_t (*callback)(hsa_executable_t executable,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  return coreApiTable->hsa_executable_iterate_symbols_fn(
      executable, callback, data);
}

hsa_status_t HSA_API hsa_executable_iterate_agent_symbols(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_agent_t agent,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  return coreApiTable->hsa_executable_iterate_agent_symbols_fn(
      executable, agent, callback, data);
}

hsa_status_t HSA_API hsa_executable_iterate_program_symbols(
    hsa_executable_t executable,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  return coreApiTable->hsa_executable_iterate_program_symbols_fn(
      executable, callback, data);
}

//===--- Runtime Notifications --------------------------------------------===//

hsa_status_t HSA_API hsa_status_string(
    hsa_status_t status,
    const char **status_string) {
  return coreApiTable->hsa_status_string_fn(status, status_string);
}

/*
 * Following set of functions are bundled as AMD Extension Apis
 */

// Pass through stub functions
hsa_status_t HSA_API hsa_amd_coherency_get_type(hsa_agent_t agent,
                                                hsa_amd_coherency_type_t* type) {
  return amdExtTable->hsa_amd_coherency_get_type_fn(agent, type);
}

// Pass through stub functions
hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent,
                                                hsa_amd_coherency_type_t type) {
  return amdExtTable->hsa_amd_coherency_set_type_fn(agent, type);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable) {
  return amdExtTable->hsa_amd_profiling_set_profiler_enabled_fn(
                                     queue, enable);
}

hsa_status_t HSA_API
  hsa_amd_profiling_async_copy_enable(bool enable) {
    return amdExtTable->hsa_amd_profiling_async_copy_enable_fn(enable);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time(
    hsa_agent_t agent, hsa_signal_t signal,
    hsa_amd_profiling_dispatch_time_t* time) {
  return amdExtTable->hsa_amd_profiling_get_dispatch_time_fn(
                                     agent, signal, time);
}

hsa_status_t HSA_API
  hsa_amd_profiling_get_async_copy_time(
    hsa_signal_t hsa_signal, hsa_amd_profiling_async_copy_time_t* time) {
      return amdExtTable->hsa_amd_profiling_get_async_copy_time_fn(hsa_signal, time);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent,
                                                    uint64_t agent_tick,
                                                    uint64_t* system_tick) {
  return amdExtTable->hsa_amd_profiling_convert_tick_to_system_domain_fn(
                                     agent, agent_tick, system_tick);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_signal_async_handler(hsa_signal_t signal,
                                 hsa_signal_condition_t cond,
                                 hsa_signal_value_t value,
                                 hsa_amd_signal_handler handler, void* arg) {
  return amdExtTable->hsa_amd_signal_async_handler_fn(
                                     signal, cond, value, handler, arg);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_async_function(void (*callback)(void* arg), void* arg) {
  return amdExtTable->hsa_amd_async_function_fn(callback, arg);
}

// Mirrors Amd Extension Apis
uint32_t HSA_API hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* signals,
                                         hsa_signal_condition_t* conds, hsa_signal_value_t* values,
                                         uint64_t timeout_hint, hsa_wait_state_t wait_hint,
                                         hsa_signal_value_t* satisfying_values) {
  return amdExtTable->hsa_amd_signal_wait_all_fn(signal_count, signals, conds, values, timeout_hint,
                                                 wait_hint, satisfying_values);
}

// Mirrors Amd Extension Apis
uint32_t HSA_API
    hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals,
                            hsa_signal_condition_t* conds,
                            hsa_signal_value_t* values, uint64_t timeout_hint,
                            hsa_wait_state_t wait_hint,
                            hsa_signal_value_t* satisfying_value) {
  return amdExtTable->hsa_amd_signal_wait_any_fn(
                                     signal_count, signals,
                                     conds, values, timeout_hint,
                                     wait_hint, satisfying_value);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
                                               uint32_t num_cu_mask_count,
                                               const uint32_t* cu_mask) {
  return amdExtTable->hsa_amd_queue_cu_set_mask_fn(
                                     queue, num_cu_mask_count, cu_mask);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_queue_cu_get_mask(const hsa_queue_t* queue, uint32_t num_cu_mask_count,
                                               uint32_t* cu_mask) {
  return amdExtTable->hsa_amd_queue_cu_get_mask_fn(queue, num_cu_mask_count, cu_mask);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
                                 hsa_amd_memory_pool_info_t attribute,
                                 void* value) {
  return amdExtTable->hsa_amd_memory_pool_get_info_fn(
                                     memory_pool, attribute, value);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data),
    void* data) {
  return amdExtTable->hsa_amd_agent_iterate_memory_pools_fn(
                                     agent, callback, data);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size,
                                 uint32_t flags, void** ptr) {
  return amdExtTable->hsa_amd_memory_pool_allocate_fn(
                                     memory_pool, size, flags, ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr) {
  return amdExtTable->hsa_amd_memory_pool_free_fn(ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src,
                              hsa_agent_t src_agent, size_t size,
                              uint32_t num_dep_signals,
                              const hsa_signal_t* dep_signals,
                              hsa_signal_t completion_signal) {
  return amdExtTable->hsa_amd_memory_async_copy_fn(
                                     dst, dst_agent, src, src_agent, size,
                                     num_dep_signals, dep_signals, completion_signal);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agent, const void* src,
                              hsa_agent_t src_agent, size_t size,
                              uint32_t num_dep_signals,
                              const hsa_signal_t* dep_signals,
                              hsa_signal_t completion_signal,
                              hsa_amd_sdma_engine_id_t engine_id,
                              bool force_copy_on_sdma) {
  return amdExtTable->hsa_amd_memory_async_copy_on_engine_fn(
                                     dst, dst_agent, src, src_agent, size,
                                     num_dep_signals, dep_signals, completion_signal,
                                     engine_id, force_copy_on_sdma);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
                                      uint32_t *engine_ids_mask) {
  return amdExtTable->hsa_amd_memory_copy_engine_status_fn(dst_agent, src_agent,
                                                           engine_ids_mask);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
                                             uint32_t* recommended_ids_mask) {
  return amdExtTable->hsa_amd_memory_get_preferred_copy_engine_fn(dst_agent, src_agent,
                                                                  recommended_ids_mask);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_memory_async_copy_rect(
    const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
    const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
    hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
    hsa_signal_t completion_signal) {
  return amdExtTable->hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range,
                                                        copy_agent, dir, num_dep_signals,
                                                        dep_signals, completion_signal);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info(
    hsa_agent_t agent, hsa_amd_memory_pool_t memory_pool,
    hsa_amd_agent_memory_pool_info_t attribute, void* value) {
  return amdExtTable->hsa_amd_agent_memory_pool_get_info_fn(
                                     agent, memory_pool, attribute, value);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents,
                                const uint32_t* flags, const void* ptr) {
  return amdExtTable->hsa_amd_agents_allow_access_fn(
                                     num_agents, agents, flags, ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool,
                                    hsa_amd_memory_pool_t dst_memory_pool,
                                    bool* result) {
  return amdExtTable->hsa_amd_memory_pool_can_migrate_fn(
                                     src_memory_pool, dst_memory_pool, result);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr,
                                            hsa_amd_memory_pool_t memory_pool,
                                            uint32_t flags) {
  return amdExtTable->hsa_amd_memory_migrate_fn(
                                     ptr, memory_pool, flags);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
                                         hsa_agent_t* agents, int num_agent,
                                         void** agent_ptr) {
  return amdExtTable->hsa_amd_memory_lock_fn(
                                     host_ptr, size, agents, num_agent, agent_ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
                                                 int num_agent, hsa_amd_memory_pool_t pool,
                                                 uint32_t flags, void** agent_ptr) {
  return amdExtTable->hsa_amd_memory_lock_to_pool_fn(host_ptr, size, agents, num_agent, pool, flags,
                                                     agent_ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr) {
  return amdExtTable->hsa_amd_memory_unlock_fn(host_ptr);

}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API
    hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count) {
  return amdExtTable->hsa_amd_memory_fill_fn(ptr, value, count);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents,
                                        hsa_agent_t* agents,
                                        int interop_handle,
                                        uint32_t flags,
                                        size_t* size,
                                        void** ptr,
                                        size_t* metadata_size,
                                        const void** metadata) {
  return amdExtTable->hsa_amd_interop_map_buffer_fn(
                                     num_agents, agents, interop_handle,
                                     flags, size, ptr, metadata_size, metadata);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr) {
  return amdExtTable->hsa_amd_interop_unmap_buffer_fn(ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_image_create(
  hsa_agent_t agent,
  const hsa_ext_image_descriptor_t *image_descriptor,
  const hsa_amd_image_descriptor_t *image_layout,
  const void *image_data,
  hsa_access_permission_t access_permission,
  hsa_ext_image_t *image) {
  return amdExtTable->hsa_amd_image_create_fn(agent, image_descriptor,
                          image_layout, image_data, access_permission, image);
}

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
                              uint32_t* num_agents_accessible, hsa_agent_t** accessible) {
  return amdExtTable->hsa_amd_pointer_info_fn(ptr, info, alloc, num_agents_accessible, accessible);
}

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info_set_userdata(const void* ptr, void* userptr) {
  return amdExtTable->hsa_amd_pointer_info_set_userdata_fn(ptr, userptr);
}

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_memory_create(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle) {
  return amdExtTable->hsa_amd_ipc_memory_create_fn(ptr, len, handle);
}

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t* ipc, size_t len,
                                       uint32_t num_agents, const hsa_agent_t* mapping_agents,
                                       void** mapped_ptr) {
  return amdExtTable->hsa_amd_ipc_memory_attach_fn(ipc, len, num_agents, mapping_agents,
                                                   mapped_ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_memory_detach(void* mapped_ptr) {
  return amdExtTable->hsa_amd_ipc_memory_detach_fn(mapped_ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
                                   const hsa_agent_t* consumers, uint64_t attributes,
                                   hsa_signal_t* signal) {
  return amdExtTable->hsa_amd_signal_create_fn(initial_value, num_consumers, consumers, attributes,
                                               signal);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_ipc_signal_create(hsa_signal_t signal, hsa_amd_ipc_signal_t* handle) {
  return amdExtTable->hsa_amd_ipc_signal_create_fn(signal, handle);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle,
                                               hsa_signal_t* signal) {
  return amdExtTable->hsa_amd_ipc_signal_attach_fn(handle, signal);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_register_system_event_handler(
    hsa_amd_system_event_callback_t callback, void* data) {
  return amdExtTable->hsa_amd_register_system_event_handler_fn(callback, data);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_queue_set_priority(hsa_queue_t* queue,
                                                hsa_amd_queue_priority_t priority) {
  return amdExtTable->hsa_amd_queue_set_priority_fn(queue, priority);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_register_deallocation_callback(void* ptr,
                                                    hsa_amd_deallocation_callback_t callback,
                                                    void* user_data) {
  return amdExtTable->hsa_amd_register_deallocation_callback_fn(ptr, callback, user_data);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_deregister_deallocation_callback(void* ptr,
                                                      hsa_amd_deallocation_callback_t callback) {
  return amdExtTable->hsa_amd_deregister_deallocation_callback_fn(ptr, callback);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_signal_value_pointer(hsa_signal_t signal,
                                                  volatile hsa_signal_value_t** value_ptr) {
  return amdExtTable->hsa_amd_signal_value_pointer_fn(signal, value_ptr);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count) {
  return amdExtTable->hsa_amd_svm_attributes_set_fn(ptr, size, attribute_list, attribute_count);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count) {
  return amdExtTable->hsa_amd_svm_attributes_get_fn(ptr, size, attribute_list, attribute_count);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
                                        uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
                                        hsa_signal_t completion_signal) {
  return amdExtTable->hsa_amd_svm_prefetch_async_fn(ptr, size, agent, num_dep_signals, dep_signals, completion_signal);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_acquire(hsa_agent_t agent) {
  return amdExtTable->hsa_amd_spm_acquire_fn(agent);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_release(hsa_agent_t agent) {
  return amdExtTable->hsa_amd_spm_release_fn(agent);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, uint32_t* timeout,
                                                 uint32_t* size_copied, void* dest,
                                                 bool* is_data_loss) {
  return amdExtTable->hsa_amd_spm_set_dest_buffer_fn(agent, size, timeout, size_copied, dest,
                                                     is_data_loss);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
                                                    uint64_t* offset) {
  return amdExtTable->hsa_amd_portable_export_dmabuf_fn(ptr, size, dmabuf, offset);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size, int* dmabuf,
                                                    uint64_t* offset, uint64_t flags) {
  return amdExtTable->hsa_amd_portable_export_dmabuf_v2_fn(ptr, size, dmabuf, offset, flags);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf) {
  return amdExtTable->hsa_amd_portable_close_dmabuf_fn(dmabuf);
}

hsa_status_t HSA_API hsa_amd_vmem_address_reserve(void** ptr, size_t size, uint64_t address,
                                                  uint64_t flags) {
  return amdExtTable->hsa_amd_vmem_address_reserve_fn(ptr, size, address, flags);
}

hsa_status_t HSA_API hsa_amd_vmem_address_reserve_align(void** ptr, size_t size, uint64_t address,
                                                  uint64_t alignment, uint64_t flags) {
  return amdExtTable->hsa_amd_vmem_address_reserve_align_fn(ptr, size, address, alignment, flags);
}

hsa_status_t HSA_API hsa_amd_vmem_address_free(void* ptr, size_t size) {
  return amdExtTable->hsa_amd_vmem_address_free_fn(ptr, size);
}

hsa_status_t HSA_API hsa_amd_vmem_handle_create(hsa_amd_memory_pool_t pool, size_t size,
                                                hsa_amd_memory_type_t type, uint64_t flags,
                                                hsa_amd_vmem_alloc_handle_t* memory_handle) {
  return amdExtTable->hsa_amd_vmem_handle_create_fn(pool, size, type, flags, memory_handle);
}

hsa_status_t HSA_API hsa_amd_vmem_handle_release(hsa_amd_vmem_alloc_handle_t memory_handle) {
  return amdExtTable->hsa_amd_vmem_handle_release_fn(memory_handle);
}

hsa_status_t HSA_API hsa_amd_vmem_map(void* va, size_t size, size_t in_offset,
                                      hsa_amd_vmem_alloc_handle_t memory_handle, uint64_t flags) {
  return amdExtTable->hsa_amd_vmem_map_fn(va, size, in_offset, memory_handle, flags);
}

hsa_status_t HSA_API hsa_amd_vmem_unmap(void* va, size_t size) {
  return amdExtTable->hsa_amd_vmem_unmap_fn(va, size);
}

hsa_status_t HSA_API hsa_amd_vmem_set_access(void* va, size_t size,
                                             const hsa_amd_memory_access_desc_t* desc,
                                             const size_t desc_cnt) {
  return amdExtTable->hsa_amd_vmem_set_access_fn(va, size, desc, desc_cnt);
}

hsa_status_t HSA_API hsa_amd_vmem_get_access(void* va, hsa_access_permission_t* perms,
                                             const hsa_agent_t agent_handle) {
  return amdExtTable->hsa_amd_vmem_get_access_fn(va, perms, agent_handle);
}

hsa_status_t HSA_API hsa_amd_vmem_export_shareable_handle(int* dmabuf_fd,
                                                          hsa_amd_vmem_alloc_handle_t handle,
                                                          uint64_t flags) {
  return amdExtTable->hsa_amd_vmem_export_shareable_handle_fn(dmabuf_fd, handle, flags);
}

hsa_status_t HSA_API hsa_amd_vmem_import_shareable_handle(int dmabuf_fd,
                                                          hsa_amd_vmem_alloc_handle_t* handle) {
  return amdExtTable->hsa_amd_vmem_import_shareable_handle_fn(dmabuf_fd, handle);
}

hsa_status_t HSA_API hsa_amd_vmem_retain_alloc_handle(hsa_amd_vmem_alloc_handle_t* handle,
                                                      void* addr) {
  return amdExtTable->hsa_amd_vmem_retain_alloc_handle_fn(handle, addr);
}

hsa_status_t HSA_API hsa_amd_vmem_get_alloc_properties_from_handle(
    hsa_amd_vmem_alloc_handle_t alloc_handle, hsa_amd_memory_pool_t* pool,
    hsa_amd_memory_type_t* type) {
  return amdExtTable->hsa_amd_vmem_get_alloc_properties_from_handle_fn(alloc_handle, pool, type);
}

hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t agent, size_t threshold) {
  return amdExtTable->hsa_amd_agent_set_async_scratch_limit_fn(agent, threshold);
}

hsa_status_t HSA_API hsa_amd_queue_get_info(hsa_queue_t* queue,
                                            hsa_queue_info_attribute_t attribute, void* value) {
  return amdExtTable->hsa_amd_queue_get_info_fn(queue, attribute, value);
}

hsa_status_t HSA_API hsa_amd_enable_logging(uint8_t* flags, void* file) {
  return amdExtTable->hsa_amd_enable_logging_fn(flags, file);
}

// Tools only table interfaces.
namespace rocr {

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_queue_intercept_create(
    hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
    void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
    uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue) {
  return amdExtTable->hsa_amd_queue_intercept_create_fn(
      agent_handle, size, type, callback, data, private_segment_size, group_segment_size, queue);
}

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
                                              hsa_amd_queue_intercept_handler callback,
                                              void* user_data) {
  return amdExtTable->hsa_amd_queue_intercept_register_fn(queue, callback, user_data);
}

}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/common/shared.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_INC_SHARED_H_
#define HSA_RUNTME_CORE_INC_SHARED_H_

#include <assert.h>
#include <cstring>
#include <functional>
#include <memory>

#include "core/util/utils.h"

namespace rocr {
namespace core {
/// @brief Base class encapsulating the allocator and deallocator for
/// shared shared object.  As used this will allocate GPU visible host
/// memory mapped to all GPUs.
class BaseShared {
 public:
  static void SetAllocateAndFree(
      const std::function<void*(size_t, size_t, uint32_t, int)>& alloc,
      const std::function<void(void*)>& fr) {
    allocate_() = alloc;
    free_() = fr;
  }

 protected:
  static __forceinline std::function<void*(size_t, size_t, uint32_t, int)>&
   allocate_() {
    static std::function<void*(size_t, size_t, uint32_t, int)> alloc =
                                                                      nullptr;
    return alloc;
  }
  static __forceinline std::function<void(void*)>&
   free_() {
    static std::function<void(void*)> fr = nullptr;
    return fr;
  }

};

/// @brief Default Allocator for Shared.  Ensures allocations are whole pages.
template <typename T> class PageAllocator : private BaseShared {
 public:
  __forceinline static T* alloc(int flags = 0) {
    T* ret = reinterpret_cast<T*>(
                        allocate_()(AlignUp(sizeof(T), 4096), 4096, flags, 0));
    if (ret == nullptr) throw std::bad_alloc();

    MAKE_NAMED_SCOPE_GUARD(throwGuard, [&]() { free_()(ret); });

    new (ret) T;

    throwGuard.Dismiss();
    return ret;
  }

  __forceinline static T* alloc(int agent_node_id, int flags) {
    T* ret = reinterpret_cast<T*>(
            allocate_()(AlignUp(sizeof(T), 4096), 4096, flags, agent_node_id));
    if (ret == nullptr) throw std::bad_alloc();

    MAKE_NAMED_SCOPE_GUARD(throwGuard, [&]() { free_()(ret); });

    new (ret) T;

    throwGuard.Dismiss();
    return ret;
  }

  __forceinline static void free(T* ptr) {
    if (ptr != nullptr) {
      ptr->~T();
      free_()(ptr);
    }
  }
};

/// @brief Container for object located in GPU visible host memory.
/// If a custom allocator is not given then data will be placed in dedicated pages.
template <typename T, typename Allocator = PageAllocator<T>>
class Shared final : private BaseShared {
 public:
  explicit Shared(Allocator* pool = nullptr, int flags = 0) : pool_(pool) {
    assert(allocate_() != nullptr && free_() != nullptr &&
           "Shared object allocator is not set");

    if (pool_)
      shared_object_ = pool_->alloc();
    else
      shared_object_ = PageAllocator<T>::alloc(flags);
  }

  explicit Shared(int agent_node_id, Allocator* pool = nullptr, int flags = 0) : pool_(pool) {
    assert(allocate_() != nullptr && free_() != nullptr &&
           "Shared object allocator is not set");

    if (pool_)
      shared_object_ = pool_->alloc();
    else
      shared_object_ = PageAllocator<T>::alloc(agent_node_id, flags);
  }

  ~Shared() {
    assert(allocate_() != nullptr && free_() != nullptr &&
                                        "Shared object allocator is not set");

    if (pool_)
      pool_->free(shared_object_);
    else
      PageAllocator<T>::free(shared_object_);
  }

  Shared(Shared&& rhs) {
    this->~Shared();
    shared_object_ = rhs.shared_object_;
    rhs.shared_object_ = nullptr;
    pool_ = rhs.pool_;
    rhs.pool_ = nullptr;
  }
  Shared& operator=(Shared&& rhs) {
    this->~Shared();
    shared_object_ = rhs.shared_object_;
    rhs.shared_object_ = nullptr;
    pool_ = rhs.pool_;
    rhs.pool_ = nullptr;
    return *this;
  }

  T* shared_object() const { return shared_object_; }

 private:
  T* shared_object_;
  Allocator* pool_;
};

template <typename T> class Shared<T, PageAllocator<T>> final : private BaseShared {
 public:
  Shared(int flags = 0) {
    assert(allocate_() != nullptr && free_() != nullptr &&
                                        "Shared object allocator is not set");

    shared_object_ = PageAllocator<T>::alloc(flags);
  }

  Shared(int agent_node_id, int flags) {
    assert(allocate_() != nullptr && free_() != nullptr && "Shared object allocator is not set");

    shared_object_ = PageAllocator<T>::alloc(agent_node_id, flags);
  }

  ~Shared() {
    assert(allocate_() != nullptr && free_() != nullptr &&
           "Shared object allocator is not set");

    PageAllocator<T>::free(shared_object_);
  }

  Shared(Shared&& rhs) {
    this->~Shared();
    shared_object_ = rhs.shared_object_;
    rhs.shared_object_ = nullptr;
  }
  Shared& operator=(Shared&& rhs) {
    this->~Shared();
    shared_object_ = rhs.shared_object_;
    rhs.shared_object_ = nullptr;
    return *this;
  }

  T* shared_object() const { return shared_object_; }

 private:
  T* shared_object_;
};

/// @brief Container for array located in GPU visible host memory.
/// Alignment defaults to __alignof(T) but may be increased.
template <typename T, size_t Align> class SharedArray final : private BaseShared {
 public:
  SharedArray() : shared_object_(nullptr) {}

  explicit SharedArray(size_t length) : shared_object_(nullptr), len(length) {
    assert(allocate_() != nullptr && free_() != nullptr &&
                                        "Shared object allocator is not set");
    static_assert((__alignof(T) <= Align) || (Align == 0), "Align is less than alignof(T)");

    shared_object_ =
        reinterpret_cast<T*>(allocate_()(sizeof(T) * length, Max(__alignof(T), Align), 0, 0));
    if (shared_object_ == nullptr) throw std::bad_alloc();

    size_t i = 0;

    MAKE_NAMED_SCOPE_GUARD(loopGuard, [&]() {
      for (size_t t = 0; t < i - 1; t++) shared_object_[t].~T();
      free_()(shared_object_);
    });

    for (; i < length; i++) new (&shared_object_[i]) T;

    loopGuard.Dismiss();
  }

  ~SharedArray() {
    assert(allocate_() != nullptr && free_() != nullptr &&
                                        "Shared object allocator is not set");

    if (shared_object_ != nullptr) {
      for (size_t i = 0; i < len; i++) shared_object_[i].~T();
      free_()(shared_object_);
    }
  }

  SharedArray(SharedArray&& rhs) {
    this->~SharedArray();
    shared_object_ = rhs.shared_object_;
    rhs.shared_object_ = nullptr;
    len = rhs.len;
  }
  SharedArray& operator=(SharedArray&& rhs) {
    this->~SharedArray();
    shared_object_ = rhs.shared_object_;
    rhs.shared_object_ = nullptr;
    len = rhs.len;
    return *this;
  }

  T& operator[](size_t index) {
    assert(index < len && "Index out of bounds.");
    return shared_object_[index];
  }
  const T& operator[](size_t index) const {
    assert(index < len && "Index out of bounds.");
    return shared_object_[index];
  }

 private:
  T* shared_object_;
  size_t len;
};

}  // namespace core
}  // namespace rocr
#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/driver/driver.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/driver.h"

#include "inc/hsa.h"

namespace rocr {
namespace core {

Driver::Driver(DriverType kernel_driver_type, std::string devnode_name)
    : kernel_driver_type_(std::move(kernel_driver_type)),
      devnode_name_(std::move(devnode_name)) {}

} // namespace core
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_kfd_driver.h"

#include <memory>
#include <string>

#include <amdgpu_drm.h>
#include <link.h>
#include <sys/ioctl.h>

#include "hsakmt/hsakmt.h"

#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/runtime.h"

extern r_debug _amdgpu_r_debug;

namespace rocr {
namespace AMD {

static_assert(
    (sizeof(core::ShareableHandle::handle) >= sizeof(amdgpu_bo_handle)) &&
        (alignof(core::ShareableHandle::handle) >= alignof(amdgpu_bo_handle)),
    "ShareableHandle cannot store a amdgpu_bo_handle");

namespace {

__forceinline uint64_t drm_perm(hsa_access_permission_t perm) {
  switch (perm) {
  case HSA_ACCESS_PERMISSION_RO:
    return AMDGPU_VM_PAGE_READABLE;
  case HSA_ACCESS_PERMISSION_WO:
    return AMDGPU_VM_PAGE_WRITEABLE;
  case HSA_ACCESS_PERMISSION_RW:
    return AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
  case HSA_ACCESS_PERMISSION_NONE:
  default:
    return 0;
  }
}

} // namespace

KfdDriver::KfdDriver(std::string devnode_name)
    : core::Driver(core::DriverType::KFD, std::move(devnode_name)) {}

hsa_status_t KfdDriver::Init() {
  HSAKMT_STATUS ret =
      HSAKMT_CALL(hsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()));

  if (ret != HSAKMT_STATUS_SUCCESS && ret != HSAKMT_STATUS_NOT_SUPPORTED) return HSA_STATUS_ERROR;

  uint32_t caps_mask = 0;
  if (HSAKMT_CALL(hsaKmtGetRuntimeCapabilities(&caps_mask)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  core::Runtime::runtime_singleton_->KfdVersion(
      ret != HSAKMT_STATUS_NOT_SUPPORTED,
      !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK));

  if (HSAKMT_CALL(hsaKmtGetVersion(&version_)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  if (version_.KernelInterfaceMajorVersion == kfd_version_major_min &&
      version_.KernelInterfaceMinorVersion < kfd_version_major_min)
    return HSA_STATUS_ERROR;

  core::Runtime::runtime_singleton_->KfdVersion(version_);

  if (version_.KernelInterfaceMajorVersion == 1 && version_.KernelInterfaceMinorVersion == 0)
    core::g_use_interrupt_wait = false;

  bool xnack_mode = BindXnackMode();
  core::Runtime::runtime_singleton_->XnackEnabled(xnack_mode);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::ShutDown() {
  HSAKMT_STATUS ret = HSAKMT_CALL(hsaKmtRuntimeDisable());
  if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  ret = HSAKMT_CALL(hsaKmtReleaseSystemProperties());

  if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return Close();
}

hsa_status_t KfdDriver::DiscoverDriver(std::unique_ptr<core::Driver>& driver) {
  auto tmp_driver = std::unique_ptr<core::Driver>(new KfdDriver("/dev/kfd"));

  if (tmp_driver->Open() == HSA_STATUS_SUCCESS) {
    driver = std::move(tmp_driver);
    return HSA_STATUS_SUCCESS;
  }

  return HSA_STATUS_ERROR;
}

hsa_status_t KfdDriver::QueryKernelModeDriver(core::DriverQuery query) {
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::Open() {
  return HSAKMT_CALL(hsaKmtOpenKFD()) == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS
                                                  : HSA_STATUS_ERROR;
}

hsa_status_t KfdDriver::Close() {
  return HSAKMT_CALL(hsaKmtCloseKFD()) == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS
                                                   : HSA_STATUS_ERROR;
}

hsa_status_t KfdDriver::GetSystemProperties(HsaSystemProperties& sys_props) const {
  if (HSAKMT_CALL(hsaKmtReleaseSystemProperties()) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  if (HSAKMT_CALL(hsaKmtAcquireSystemProperties(&sys_props)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const {
  if (HSAKMT_CALL(hsaKmtGetNodeProperties(node_id, &node_props)) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetEdgeProperties(std::vector<HsaIoLinkProperties>& io_link_props,
                                          uint32_t node_id) const {
  if (HSAKMT_CALL(hsaKmtGetNodeIoLinkProperties(node_id, io_link_props.size(), io_link_props.data())) !=
      HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetMemoryProperties(uint32_t node_id,
                                            std::vector<HsaMemoryProperties>& mem_props) const {
  if (!mem_props.data()) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  if (HSAKMT_CALL(hsaKmtGetNodeMemoryProperties(node_id, mem_props.size(), mem_props.data())) !=
      HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetCacheProperties(uint32_t node_id, uint32_t processor_id,
                                           std::vector<HsaCacheProperties>& cache_props) const {
  if (!cache_props.data()) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  if (HSAKMT_CALL(hsaKmtGetNodeCacheProperties(node_id, processor_id, cache_props.size(), cache_props.data())) !=
      HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t
KfdDriver::AllocateMemory(const core::MemoryRegion &mem_region,
                          core::MemoryRegion::AllocateFlags alloc_flags,
                          void **mem, size_t size, uint32_t agent_node_id) {
  const MemoryRegion &m_region(static_cast<const MemoryRegion &>(mem_region));
  HsaMemFlags kmt_alloc_flags(m_region.mem_flags());

  kmt_alloc_flags.ui32.ExecuteAccess =
      (alloc_flags & core::MemoryRegion::AllocateExecutable ? 1 : 0);

  if (m_region.IsSystem() &&
      (alloc_flags & core::MemoryRegion::AllocateNonPaged)) {
    kmt_alloc_flags.ui32.NonPaged = 1;
  }

  if (!m_region.IsLocalMemory() &&
      (alloc_flags & core::MemoryRegion::AllocateMemoryOnly)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Allocating a memory handle for virtual memory
  kmt_alloc_flags.ui32.NoAddress =
      !!(alloc_flags & core::MemoryRegion::AllocateMemoryOnly);

  // Allocate pseudo fine grain memory
  kmt_alloc_flags.ui32.CoarseGrain =
      (alloc_flags & core::MemoryRegion::AllocatePCIeRW
           ? 0
           : kmt_alloc_flags.ui32.CoarseGrain);

  kmt_alloc_flags.ui32.NoSubstitute =
      (alloc_flags & core::MemoryRegion::AllocatePinned
           ? 1
           : kmt_alloc_flags.ui32.NoSubstitute);

  kmt_alloc_flags.ui32.GTTAccess =
      (alloc_flags & core::MemoryRegion::AllocateGTTAccess
           ? 1
           : kmt_alloc_flags.ui32.GTTAccess);

  kmt_alloc_flags.ui32.Uncached =
      (alloc_flags & core::MemoryRegion::AllocateUncached
            ? 1
            : kmt_alloc_flags.ui32.Uncached);

  kmt_alloc_flags.ui32.ExecuteBlit =
    !!(alloc_flags & core::MemoryRegion::AllocateExecutableBlitKernelObject);

  if (m_region.IsLocalMemory()) {
    // Allocate physically contiguous memory. AllocateKfdMemory function call
    // will fail if this flag is not supported in KFD.
    kmt_alloc_flags.ui32.Contiguous =
        (alloc_flags & core::MemoryRegion::AllocateContiguous
             ? 1
             : kmt_alloc_flags.ui32.Contiguous);
  }

  //// Only allow using the suballocator for ordinary VRAM.
  if (m_region.IsLocalMemory() && !kmt_alloc_flags.ui32.NoAddress) {
    bool subAllocEnabled =
        !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
    // Avoid modifying executable or queue allocations.
    bool useSubAlloc = subAllocEnabled;
    useSubAlloc &=
        ((alloc_flags & (~core::MemoryRegion::AllocateRestrict)) == 0);

    if (useSubAlloc) {
      *mem = m_region.fragment_alloc(size);

      if ((alloc_flags & core::MemoryRegion::AllocateAsan) &&
          HSAKMT_CALL(hsaKmtReplaceAsanHeaderPage(*mem)) != HSAKMT_STATUS_SUCCESS) {
        m_region.fragment_free(*mem);
        *mem = nullptr;
        return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
      }

      return HSA_STATUS_SUCCESS;
    }
  }

  const uint32_t node_id =
      (alloc_flags & core::MemoryRegion::AllocateGTTAccess)
          ? agent_node_id
          : m_region.owner()->node_id();

  //// Allocate memory.
  //// If it fails attempt to release memory from the block allocator and retry.
  *mem = AllocateKfdMemory(kmt_alloc_flags, node_id, size);
  if (*mem == nullptr) {
    m_region.owner()->Trim();
    *mem = AllocateKfdMemory(kmt_alloc_flags, node_id, size);
  }

  if (*mem != nullptr) {
    if (kmt_alloc_flags.ui32.NoAddress)
      return HSA_STATUS_SUCCESS;

    // Commit the memory.
    // For system memory, on non-restricted allocation, map it to all GPUs. On
    // restricted allocation, only CPU is allowed to access by default, so
    // no need to map
    // For local memory, only map it to the owning GPU. Mapping to other GPU,
    // if the access is allowed, is performed on AllowAccess.
    HsaMemMapFlags map_flag = m_region.map_flags();
    size_t map_node_count = 1;
    const uint32_t owner_node_id = m_region.owner()->node_id();
    const uint32_t *map_node_id = &owner_node_id;

    if (m_region.IsSystem()) {
      if ((alloc_flags & core::MemoryRegion::AllocateRestrict) == 0) {
        // Map to all GPU agents.
        map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size();

        if (map_node_count == 0) {
          // No need to pin since no GPU in the platform.
          return HSA_STATUS_SUCCESS;
        }

        map_node_id = &core::Runtime::runtime_singleton_->gpu_ids()[0];
      } else {
        // No need to pin it for CPU exclusive access.
        return HSA_STATUS_SUCCESS;
      }
    }

    uint64_t alternate_va = 0;
    const bool is_resident = MakeKfdMemoryResident(
        map_node_count, map_node_id, *mem, size, &alternate_va, map_flag);

    const bool require_pinning =
        (!m_region.full_profile() || m_region.IsLocalMemory() ||
         m_region.IsScratch());

    if (require_pinning && !is_resident) {
      FreeKfdMemory(*mem, size);
      *mem = nullptr;
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }

    if ((alloc_flags & core::MemoryRegion::AllocateAsan) &&
        HSAKMT_CALL(hsaKmtReplaceAsanHeaderPage(*mem)) != HSAKMT_STATUS_SUCCESS) {
      FreeKfdMemory(*mem, size);
      *mem = nullptr;
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
    return HSA_STATUS_SUCCESS;
  }

  return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}

hsa_status_t KfdDriver::FreeMemory(void *mem, size_t size) {
  MakeKfdMemoryUnresident(mem);
  return FreeKfdMemory(mem, size) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}

hsa_status_t KfdDriver::CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct,
                                    HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id,
                                    void* queue_addr, uint64_t queue_size_bytes, HsaEvent* event,
                                    HsaQueueResource& queue_resource) const {
  if (HSAKMT_CALL(hsaKmtCreateQueueExt(node_id, type, queue_pct, priority, sdma_engine_id,
                                       queue_addr, queue_size_bytes, event, &queue_resource)) !=
      HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::DestroyQueue(HSA_QUEUEID queue_id) const {
  if (HSAKMT_CALL(hsaKmtDestroyQueue(queue_id)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_pct,
                                    HSA_QUEUE_PRIORITY priority, void* queue_addr,
                                    uint64_t queue_size, HsaEvent* event) const {
  if (HSAKMT_CALL(hsaKmtUpdateQueue(queue_id, queue_pct, priority, queue_addr, queue_size,
                                    event)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t cu_mask_count,
                                       uint32_t* queue_cu_mask) const {
  if (HSAKMT_CALL(hsaKmtSetQueueCUMask(queue_id, cu_mask_count, queue_cu_mask)) !=
      HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_gws,
                                      uint32_t* first_gws) const {
  if (HSAKMT_CALL(hsaKmtAllocQueueGWS(queue_id, num_gws, first_gws)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::ExportDMABuf(void *mem, size_t size, int *dmabuf_fd,
                                     size_t *offset) {
  int dmabuf_fd_res = -1;
  size_t offset_res = 0;
  HSAKMT_STATUS status =
      HSAKMT_CALL(hsaKmtExportDMABufHandle(mem, size, &dmabuf_fd_res, &offset_res));
  if (status != HSAKMT_STATUS_SUCCESS) {
    if (status == HSAKMT_STATUS_INVALID_PARAMETER) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  *dmabuf_fd = dmabuf_fd_res;
  *offset = offset_res;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::ImportDMABuf(int dmabuf_fd, core::Agent &agent,
                                     core::ShareableHandle &handle) {
  auto &gpu_agent = static_cast<GpuAgent &>(agent);
  amdgpu_bo_import_result res;
  auto ret = DRM_CALL(amdgpu_bo_import(
      gpu_agent.libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res));
  if (ret)
    return HSA_STATUS_ERROR;

  handle.handle = reinterpret_cast<uint64_t>(res.buf_handle);
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::Map(core::ShareableHandle handle, void *mem,
                            size_t offset, size_t size,
                            hsa_access_permission_t perms) {
  const auto ldrm_bo = reinterpret_cast<amdgpu_bo_handle>(handle.handle);
  if (!ldrm_bo)
    return HSA_STATUS_ERROR;

  if (DRM_CALL(amdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast<uint64_t>(mem),
                      drm_perm(perms), AMDGPU_VA_OP_MAP)) != 0)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::Unmap(core::ShareableHandle handle, void *mem,
                              size_t offset, size_t size) {
  const auto ldrm_bo = reinterpret_cast<amdgpu_bo_handle>(handle.handle);
  if (!ldrm_bo)
    return HSA_STATUS_ERROR;

  if (DRM_CALL(amdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast<uint64_t>(mem), 0,
                      AMDGPU_VA_OP_UNMAP)) != 0)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::ReleaseShareableHandle(core::ShareableHandle &handle) {
  const auto ldrm_bo = reinterpret_cast<amdgpu_bo_handle>(handle.handle);
  if (!ldrm_bo)
    return HSA_STATUS_ERROR;

  const auto ret = DRM_CALL(amdgpu_bo_free(ldrm_bo));
  if (ret)
    return HSA_STATUS_ERROR;

  handle = {};
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::SPMAcquire(uint32_t preferred_node_id) const {
  if (HSAKMT_CALL(hsaKmtSPMAcquire(preferred_node_id)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::SPMRelease(uint32_t preferred_node_id) const {
  if (HSAKMT_CALL(hsaKmtSPMRelease(preferred_node_id)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes,
                                         uint32_t* timeout, uint32_t* size_copied,
                                         void* dest_mem_addr, bool* is_spm_data_loss) const {
  if (HSAKMT_CALL(hsaKmtSPMSetDestBuffer(preferred_node_id, size_bytes, timeout, size_copied, dest_mem_addr,
                             is_spm_data_loss)) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::OpenSMI(uint32_t node_id, int* fd) const {
  if (HSAKMT_CALL(hsaKmtOpenSMI(node_id, fd)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

void *KfdDriver::AllocateKfdMemory(const HsaMemFlags &flags, uint32_t node_id,
                                   size_t size) {
  void *mem = nullptr;
  const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtAllocMemory(node_id, size, flags, &mem));
  return (status == HSAKMT_STATUS_SUCCESS) ? mem : nullptr;
}

bool KfdDriver::FreeKfdMemory(void *mem, size_t size) {
  if (mem == nullptr || size == 0) {
    debug_print("Invalid free ptr:%p size:%lu\n", mem, size);
    return false;
  }

  if (HSAKMT_CALL(hsaKmtFreeMemory(mem, size)) != HSAKMT_STATUS_SUCCESS) {
    debug_print("Failed to free ptr:%p size:%lu\n", mem, size);
    return false;
  }
  return true;
}

bool KfdDriver::MakeKfdMemoryResident(size_t num_node, const uint32_t *nodes,
                                      const void *mem, size_t size,
                                      uint64_t *alternate_va,
                                      HsaMemMapFlags map_flag) {
  assert(num_node > 0);
  assert(nodes);

  *alternate_va = 0;

  HSAKMT_STATUS kmt_status(HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes(
      const_cast<void *>(mem), size, alternate_va, map_flag, num_node,
      const_cast<uint32_t *>(nodes))));

  return (kmt_status == HSAKMT_STATUS_SUCCESS);
}

void KfdDriver::MakeKfdMemoryUnresident(const void *mem) {
  HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast<void *>(mem)));
}

bool KfdDriver::BindXnackMode() {
  // Get users' preference for Xnack mode of ROCm platform.
  HSAint32 mode = core::Runtime::runtime_singleton_->flag().xnack();
  bool config_xnack = (mode != Flag::XNACK_REQUEST::XNACK_UNCHANGED);

  // Indicate to driver users' preference for Xnack mode
  // Call to driver can fail and is a supported feature
  HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
  if (config_xnack) {
    status = HSAKMT_CALL(hsaKmtSetXNACKMode(mode));
    if (status == HSAKMT_STATUS_SUCCESS) {
      return (mode != Flag::XNACK_DISABLE);
    }
  }

  // Get Xnack mode of devices bound by driver. This could happen
  // when a call to SET Xnack mode fails or user has no particular
  // preference
  status = HSAKMT_CALL(hsaKmtGetXNACKMode(&mode));
  if (status != HSAKMT_STATUS_SUCCESS) {
    debug_print(
        "KFD does not support xnack mode query.\nROCr must assume "
        "xnack is disabled.\n");
    return false;
  }
  return (mode != Flag::XNACK_DISABLE);
}

hsa_status_t KfdDriver::SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
                                       const void* buffer_base, uint64_t buffer_base_size) const {
  if (HSAKMT_CALL(hsaKmtSetTrapHandler(node_id, const_cast<void*>(base), base_size,
                                       const_cast<void*>(buffer_base), buffer_base_size)) !=
      HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const {
  assert(mem);
  assert(size > 0);

  HsaMemFlags flags = {};
  flags.ui32.Scratch = 1;
  flags.ui32.HostAccess = 1;

  void* ptr = AllocateKfdMemory(flags, node_id, size);
  if (ptr == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  *mem = ptr;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const {
  assert(device_handle);

  if (HSAKMT_CALL(hsaKmtGetAMDGPUDeviceHandle(node_id, reinterpret_cast<HsaAMDGPUDeviceHandle*>(device_handle))) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetClockCounters(uint32_t node_id, HsaClockCounters* clock_counter) const {
  assert(clock_counter);

  if (HSAKMT_CALL(hsaKmtGetClockCounters(node_id, clock_counter)) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const {
  assert(config);

  if (HSAKMT_CALL(hsaKmtGetTileConfig(node_id, config)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::AvailableMemory(uint32_t node_id, uint64_t* available_size) const {
  assert(available_size);

  if (HSAKMT_CALL(hsaKmtAvailableMemory(node_id, available_size)) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const {
  assert(ptr);
  assert(size > 0);

  if (HSAKMT_CALL(hsaKmtRegisterMemoryWithFlags(ptr, size, mem_flags)) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::DeregisterMemory(void* ptr) const {
  if (HSAKMT_CALL(hsaKmtDeregisterMemory(ptr)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
                                           const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
                                           const uint32_t* nodes) const {
  if (mem_flags == nullptr && nodes == nullptr) {
    if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(const_cast<void*>(mem), size, alternate_va)) !=
        HSAKMT_STATUS_SUCCESS) {
      return HSA_STATUS_ERROR;
    }
  } else if (mem_flags != nullptr && nodes != nullptr) {
    if (!MakeKfdMemoryResident(num_nodes, nodes, mem, size, alternate_va, *mem_flags)) {
      return HSA_STATUS_ERROR;
    }
  } else {
    debug_print("Invalid memory flags ptr:%p nodes ptr:%p\n", mem_flags, nodes);
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::MakeMemoryUnresident(const void* mem) const {
  HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast<void*>(mem)));
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const {
  // AIE does not support streaming performance monitor.
  HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
  status = HSAKMT_CALL(hsaKmtModelEnabled(enable));
  if (status != HSAKMT_STATUS_SUCCESS)
     return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const {
  assert(frequency);

  amdgpu_gpu_info info;
  amdgpu_device_handle handle;
  if (GetDeviceHandle(node_id, reinterpret_cast<void**>(&handle)) != HSA_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  if (DRM_CALL(amdgpu_query_gpu_info(handle, &info)) < 0) return HSA_STATUS_ERROR;

  // Reported by libdrm in KHz.
  *frequency = uint64_t(info.gpu_counter_freq) * 1000ull;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::ShareMemory(void* mem, size_t size,
                                    HsaSharedMemoryHandle* share_mem) const {
  assert(share_mem);

  if (HSAKMT_CALL(hsaKmtShareMemory(mem, size, share_mem)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::RegisterSharedHandle(const HsaSharedMemoryHandle* share_mem, void** mem,
                                             uint64_t* size) const {
  assert(share_mem);
  assert(mem);
  assert(size);

  if (HSAKMT_CALL(hsaKmtRegisterSharedHandle(share_mem, mem, size)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::ReplaceAsanHeaderPage(void* mem) const {
  if (HSAKMT_CALL(hsaKmtReplaceAsanHeaderPage(mem)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::ReturnAsanHeaderPage(void* mem) const {
  if (HSAKMT_CALL(hsaKmtReturnAsanHeaderPage(mem)) != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info,
                                                    uint32_t sample_info_sz,
                                                    uint32_t* sz_needed) const {
  HSAKMT_STATUS status = HSAKMT_CALL(
      hsaKmtPcSamplingQueryCapabilities(node_id, sample_info, sample_info_sz, sz_needed));
  if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
  }
  if (status != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info,
                                         uint32_t* trace_id) const {
  HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingCreate(node_id, sample_info, trace_id));
  if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
  }
  if (status != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const {
  HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingDestroy(node_id, trace_id));
  if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
  }
  if (status != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::PcSamplingStart(uint32_t node_id, uint32_t trace_id) const {
  HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingStart(node_id, trace_id));
  if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
  }
  if (status != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdDriver::PcSamplingStop(uint32_t node_id, uint32_t trace_id) const {
  HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingStop(node_id, trace_id));
  if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
  }
  if (status != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

} // namespace AMD
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_virtio_driver.h"
#include "hsakmt/hsakmt_virtio.h"

#include <link.h>
#include <vector>

#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/runtime.h"

extern r_debug _amdgpu_r_debug;

namespace rocr {
namespace AMD {

KfdVirtioDriver::KfdVirtioDriver(std::string devnode_name)
    : core::Driver(core::DriverType::KFD_VIRTIO, std::move(devnode_name)) {}

hsa_status_t KfdVirtioDriver::DiscoverDriver(std::unique_ptr<core::Driver>& driver) {
  auto tmp_driver = std::unique_ptr<core::Driver>(new KfdVirtioDriver(""));

  if (tmp_driver->Open() == HSA_STATUS_SUCCESS) {
    driver = std::move(tmp_driver);
    return HSA_STATUS_SUCCESS;
  }

  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::Open() {
  return vhsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::Close() {
  return vhsaKmtCloseKFD() == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::Init() {
  HSAKMT_STATUS ret =
      vhsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug());
  uint32_t caps_mask = 0;

  if (ret != HSAKMT_STATUS_SUCCESS && ret != HSAKMT_STATUS_NOT_SUPPORTED) return HSA_STATUS_ERROR;

  if (vhsaKmtGetRuntimeCapabilities(&caps_mask) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  core::Runtime::runtime_singleton_->KfdVersion(
      ret != HSAKMT_STATUS_NOT_SUPPORTED,
      !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK));

  if (vhsaKmtGetVersion(&version_) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  core::Runtime::runtime_singleton_->KfdVersion(version_);

  if (version_.KernelInterfaceMajorVersion == 1 && version_.KernelInterfaceMinorVersion == 0)
    core::g_use_interrupt_wait = false;

  /* Force disable interrupt wait in VIRTIO driver temporarily */
  core::g_use_interrupt_wait = false;

  /* Force disable XNACK in VIRTIO driver temporarily */
  core::Runtime::runtime_singleton_->XnackEnabled(false);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::ShutDown() {
  HSAKMT_STATUS ret = vhsaKmtRuntimeDisable();
  if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  ret = vhsaKmtReleaseSystemProperties();

  if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return Close();
}

hsa_status_t KfdVirtioDriver::QueryKernelModeDriver(core::DriverQuery query) {
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::GetSystemProperties(HsaSystemProperties& sys_props) const {
  if (vhsaKmtAcquireSystemProperties(&sys_props) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::GetNodeProperties(HsaNodeProperties& node_props,
                                                uint32_t node_id) const {
  if (vhsaKmtGetNodeProperties(node_id, &node_props) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::GetEdgeProperties(std::vector<HsaIoLinkProperties>& io_link_props,
                                                uint32_t node_id) const {
  if (vhsaKmtGetNodeIoLinkProperties(node_id, io_link_props.size(), io_link_props.data()) !=
      HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::GetMemoryProperties(
    uint32_t node_id, std::vector<HsaMemoryProperties>& mem_props) const {
  if (mem_props.empty()) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (vhsaKmtGetNodeMemoryProperties(node_id, mem_props.size(), mem_props.data()) !=
      HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::GetCacheProperties(
    uint32_t node_id, uint32_t processor_id, std::vector<HsaCacheProperties>& cache_props) const {
  if (cache_props.empty()) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (vhsaKmtGetNodeCacheProperties(node_id, 0, cache_props.size(), cache_props.data()) !=
      HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const {
  assert(device_handle != nullptr);

  if (vhsaKmtGetAMDGPUDeviceHandle(node_id, device_handle) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::GetClockCounters(uint32_t node_id,
                                               HsaClockCounters* clock_counter) const {
  assert(clock_counter != nullptr);

  if (vhsaKmtGetClockCounters(node_id, clock_counter) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
                                             const void* buffer_base,
                                             uint64_t buffer_base_size) const {
  if (vhsaKmtSetTrapHandler(node_id, const_cast<void*>(base), base_size,
                            const_cast<void*>(buffer_base),
                            buffer_base_size) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::AllocateMemory(const core::MemoryRegion& mem_region,
                                             core::MemoryRegion::AllocateFlags alloc_flags,
                                             void** mem, size_t size, uint32_t agent_node_id) {
  const MemoryRegion& m_region(static_cast<const MemoryRegion&>(mem_region));
  HsaMemFlags kmt_alloc_flags(m_region.mem_flags());
  HSAKMT_STATUS ret;

  kmt_alloc_flags.ui32.ExecuteAccess =
      (alloc_flags & core::MemoryRegion::AllocateExecutable ? 1 : 0);
  kmt_alloc_flags.ui32.AQLQueueMemory =
      (alloc_flags & core::MemoryRegion::AllocateDoubleMap ? 1 : 0);

  if (m_region.IsSystem() && (alloc_flags & core::MemoryRegion::AllocateNonPaged)) {
    kmt_alloc_flags.ui32.NonPaged = 1;
  }

  if (!m_region.IsLocalMemory() && (alloc_flags & core::MemoryRegion::AllocateMemoryOnly)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Allocating a memory handle for virtual memory
  kmt_alloc_flags.ui32.NoAddress = !!(alloc_flags & core::MemoryRegion::AllocateMemoryOnly);

  // Allocate pseudo fine grain memory
  kmt_alloc_flags.ui32.CoarseGrain =
      (alloc_flags & core::MemoryRegion::AllocatePCIeRW ? 0 : kmt_alloc_flags.ui32.CoarseGrain);

  kmt_alloc_flags.ui32.NoSubstitute =
      (alloc_flags & core::MemoryRegion::AllocatePinned ? 1 : kmt_alloc_flags.ui32.NoSubstitute);

  kmt_alloc_flags.ui32.GTTAccess =
      (alloc_flags & core::MemoryRegion::AllocateGTTAccess ? 1 : kmt_alloc_flags.ui32.GTTAccess);

  kmt_alloc_flags.ui32.Uncached =
      (alloc_flags & core::MemoryRegion::AllocateUncached ? 1 : kmt_alloc_flags.ui32.Uncached);

  if (m_region.IsLocalMemory()) {
    // Allocate physically contiguous memory. AllocateKfdMemory function call
    // will fail if this flag is not supported in KFD.
    kmt_alloc_flags.ui32.Contiguous =
        (alloc_flags & core::MemoryRegion::AllocateContiguous ? 1
                                                              : kmt_alloc_flags.ui32.Contiguous);
  }

  //// Only allow using the suballocator for ordinary VRAM.
  if (m_region.IsLocalMemory() && !kmt_alloc_flags.ui32.NoAddress) {
    bool subAllocEnabled = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
    // Avoid modifying executable or queue allocations.
    bool useSubAlloc = subAllocEnabled;
    useSubAlloc &= ((alloc_flags & (~core::MemoryRegion::AllocateRestrict)) == 0);

    if (useSubAlloc) {
      *mem = m_region.fragment_alloc(size);

      if ((alloc_flags & core::MemoryRegion::AllocateAsan)) {
        // TODO: Implement ASAN support for VIRTIO driver
        return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
      }

      return HSA_STATUS_SUCCESS;
    }
  }

  const uint32_t node_id = (alloc_flags & core::MemoryRegion::AllocateGTTAccess)
      ? agent_node_id
      : m_region.owner()->node_id();

  //// Allocate memory.
  //// If it fails attempt to release memory from the block allocator and retry.
  ret = vhsaKmtAllocMemory(node_id, size, kmt_alloc_flags, mem);
  if (ret != HSAKMT_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  if (*mem == nullptr) {
    m_region.owner()->Trim();
    ret = vhsaKmtAllocMemory(node_id, size, kmt_alloc_flags, mem);
    if (ret != HSAKMT_STATUS_SUCCESS) {
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
  }

  if (*mem != nullptr) {
    if (kmt_alloc_flags.ui32.NoAddress) return HSA_STATUS_SUCCESS;

    // Commit the memory.
    // For system memory, on non-restricted allocation, map it to all GPUs. On
    // restricted allocation, only CPU is allowed to access by default, so
    // no need to map
    // For local memory, only map it to the owning GPU. Mapping to other GPU,
    // if the access is allowed, is performed on AllowAccess.
    HsaMemMapFlags map_flag = m_region.map_flags();
    size_t map_node_count = 1;
    const uint32_t owner_node_id = m_region.owner()->node_id();
    const uint32_t* map_node_id = &owner_node_id;

    if (m_region.IsSystem()) {
      if ((alloc_flags & core::MemoryRegion::AllocateRestrict) == 0) {
        // Map to all GPU agents.
        map_node_count = core::Runtime::runtime_singleton_->gpu_ids().size();

        if (map_node_count == 0) {
          // No need to pin since no GPU in the platform.
          return HSA_STATUS_SUCCESS;
        }

        map_node_id = &core::Runtime::runtime_singleton_->gpu_ids()[0];
      } else {
        // No need to pin it for CPU exclusive access.
        return HSA_STATUS_SUCCESS;
      }
    }

    uint64_t alternate_va = 0;
    const bool is_resident =
        (MakeMemoryResident(*mem, size, &alternate_va, &map_flag, map_node_count, map_node_id) ==
         HSA_STATUS_SUCCESS);

    const bool require_pinning =
        (!m_region.full_profile() || m_region.IsLocalMemory() || m_region.IsScratch());

    if (require_pinning && !is_resident) {
      vhsaKmtFreeMemory(*mem, size);
      *mem = nullptr;
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }

    if ((alloc_flags & core::MemoryRegion::AllocateAsan)) {
      // TODO: Implement ASAN support for VIRTIO driver
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
    return HSA_STATUS_SUCCESS;
  }

  return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}

hsa_status_t KfdVirtioDriver::FreeMemory(void* mem, size_t size) {
  MakeMemoryUnresident(mem);
  return vhsaKmtFreeMemory(mem, size) == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS
                                                               : HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::AllocateScratchMemory(uint32_t node_id, uint64_t size,
                                                    void** mem) const {
  assert(mem != nullptr);
  assert(size != 0);

  HsaMemFlags flags = {};
  flags.ui32.Scratch = 1;
  flags.ui32.HostAccess = 1;
  void* ptr = nullptr;

  HSAKMT_STATUS ret = vhsaKmtAllocMemory(node_id, size, flags, &ptr);
  if (ret != HSAKMT_STATUS_SUCCESS || ptr == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  *mem = ptr;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::RegisterMemory(void* ptr, uint64_t size,
                                             HsaMemFlags mem_flags) const {
  assert(ptr != nullptr);
  assert(size != 0);

  if (vhsaKmtRegisterMemoryWithFlags(ptr, size, mem_flags) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::DeregisterMemory(void* ptr) const {
  if (vhsaKmtDeregisterMemory(ptr) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::AvailableMemory(uint32_t node_id, uint64_t* available_size) const {
  assert(available_size != nullptr);

  if (vhsaKmtAvailableMemory(node_id, available_size) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::MakeMemoryResident(const void* mem, size_t size,
                                                 uint64_t* alternate_va,
                                                 const HsaMemMapFlags* mem_flags,
                                                 uint32_t num_nodes, const uint32_t* nodes) const {
  assert(mem != nullptr);
  assert(size != 0);

  if (mem_flags == nullptr && nodes == nullptr) {
    if (vhsaKmtMapMemoryToGPU(const_cast<void*>(mem), size, alternate_va) != HSAKMT_STATUS_SUCCESS)
      return HSA_STATUS_ERROR;
  } else if (mem_flags != nullptr && nodes != nullptr) {
    if (vhsaKmtMapMemoryToGPUNodes(const_cast<void*>(mem), size, alternate_va, *mem_flags,
                                   num_nodes,
                                   const_cast<uint32_t*>(nodes)) != HSAKMT_STATUS_SUCCESS)
      return HSA_STATUS_ERROR;
  } else {
    debug_print("Invalid memory flags ptr:%p nodes ptr:%p\n", mem_flags, nodes);
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::MakeMemoryUnresident(const void* mem) const {
  vhsaKmtUnmapMemoryToGPU(const_cast<void*>(mem));
  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct,
                                          HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id,
                                          void* queue_addr, uint64_t queue_size_bytes,
                                          HsaEvent* event, HsaQueueResource& queue_resource) const {
  if (vhsaKmtCreateQueueExt(node_id, type, queue_pct, priority, sdma_engine_id, queue_addr,
                            queue_size_bytes, event, &queue_resource) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::DestroyQueue(HSA_QUEUEID queue_id) const {
  if (vhsaKmtDestroyQueue(queue_id) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_percentage,
                                          HSA_QUEUE_PRIORITY priority, void* queue_mem,
                                          uint64_t queue_size, HsaEvent* event) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t num_cu_mask,
                                             uint32_t* cu_mask) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_GWS,
                                            uint32_t* GWS) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::ExportDMABuf(void* mem, size_t size, int* dmabuf_fd, size_t* offset) {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::ImportDMABuf(int dmabuf_fd, core::Agent& agent,
                                           core::ShareableHandle& handle) {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::Map(core::ShareableHandle handle, void* mem, size_t offset,
                                  size_t size, hsa_access_permission_t perms) {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::Unmap(core::ShareableHandle handle, void* mem, size_t offset,
                                    size_t size) {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::ReleaseShareableHandle(core::ShareableHandle& handle) {
  return HSA_STATUS_ERROR;
}

hsa_status_t KfdVirtioDriver::GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const {
  if (vhsaKmtGetTileConfig(node_id, config) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::SPMAcquire(uint32_t node_id) const { return HSA_STATUS_ERROR; }

hsa_status_t KfdVirtioDriver::SPMRelease(uint32_t node_id) const { return HSA_STATUS_ERROR; }

hsa_status_t KfdVirtioDriver::SPMSetDestBuffer(uint32_t node_id, uint32_t size, uint32_t* timeout,
                                               uint32_t* size_copied, void* dest,
                                               bool* is_data_loss) const {
  return HSA_STATUS_ERROR;
}


hsa_status_t KfdVirtioDriver::OpenSMI(uint32_t node_id, int* fd) const { return HSA_STATUS_ERROR; }

hsa_status_t KfdVirtioDriver::GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const {
  assert(frequency != nullptr);

  amdgpu_gpu_info info;
  amdgpu_device_handle handle;
  if (GetDeviceHandle(node_id, reinterpret_cast<void**>(&handle)) != HSA_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  if (vamdgpu_query_gpu_info(handle, &info) < 0) return HSA_STATUS_ERROR;

  // Reported by libdrm in KHz.
  *frequency = uint64_t(info.gpu_counter_freq) * 1000ull;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t KfdVirtioDriver::IsModelEnabled(bool* enable) const {
  *enable = false;
  return HSA_STATUS_SUCCESS;
}

}  // namespace AMD
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_xdna_driver.h"

#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>

#include <memory>
#include <string>

#include "core/inc/amd_memory_region.h"
#include "core/inc/runtime.h"
#include "core/util/memory.h"
#include "core/util/utils.h"
#include "uapi/amdxdna_accel.h"

namespace rocr {
namespace AMD {

static_assert((sizeof(core::ShareableHandle::handle) >= sizeof(uint32_t)) &&
                  (alignof(core::ShareableHandle::handle) >= alignof(uint32_t)),
              "ShareableHandle cannot store a XDNA handle");

/// @brief Index of the first operand in a command.
///
/// Before the operands there are:
/// - 2 dwords for transaction op code
/// - 2 dwords for the instructions BO address
/// - 1 dword for the size of the instructions BO size
constexpr uint32_t operand_starting_index = 5;

/// @brief Default amdxdna_cu_config::cu_func when configuring a CU.
constexpr uint32_t default_cu_func = 0;

/// @brief Calculates the number of operands in a packet given the number of arguments in the
///        packet.
///
/// Each operand is 3 dwords (hi, lo address, and size). The op code is not counted in @p arg_count
/// but the instructions are.
///
/// @param arg_count number of arguments in the packet
/// @return number of operands in the packet
constexpr uint32_t GetOperandCount(uint32_t arg_count) { return (arg_count / 3) - 1; }

/// @brief Flushes operands.
static void FlushOperands(uint32_t count, hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload) {
  // Going through all of the operands in the command and flushing them.
  const uint32_t num_operands = GetOperandCount(count);
  for (uint32_t operand_iter = 0; operand_iter < num_operands; operand_iter++) {
    const uint32_t operand_index = operand_starting_index + 2 * operand_iter;
    const uint64_t operand_addr = Concat<uint64_t>(cmd_pkt_payload->data[operand_index + 1],
                                                   cmd_pkt_payload->data[operand_index]);
    const uint32_t operand_size_starting_index = operand_starting_index + 2 * num_operands;
    const uint32_t operand_bo_size =
        cmd_pkt_payload->data[operand_size_starting_index + operand_iter];
    FlushCpuCache(reinterpret_cast<void*>(operand_addr), 0, operand_bo_size);
  }
}

XdnaDriver::XdnaDriver(std::string devnode_name)
    : core::Driver(core::DriverType::XDNA, std::move(devnode_name)) {}

hsa_status_t XdnaDriver::DiscoverDriver(std::unique_ptr<core::Driver>& driver) {
  const int max_minor_num(64);
  static const std::string devnode_prefix("/dev/accel/accel");

  for (int i = 0; i < max_minor_num; ++i) {
    auto tmp_driver = std::unique_ptr<Driver>(new XdnaDriver(devnode_prefix + std::to_string(i)));
    if (tmp_driver->Open() == HSA_STATUS_SUCCESS) {
      if (tmp_driver->QueryKernelModeDriver(core::DriverQuery::GET_DRIVER_VERSION) ==
          HSA_STATUS_SUCCESS) {
        driver = std::move(tmp_driver);
        return HSA_STATUS_SUCCESS;
      } else {
        tmp_driver->Close();
      }
    }
  }

  return HSA_STATUS_ERROR;
}

uint64_t XdnaDriver::GetSystemMemoryByteSize() {
  const long pagesize = sysconf(_SC_PAGESIZE);
  const long page_count = sysconf(_SC_PHYS_PAGES);
  return pagesize * page_count;
}

uint64_t XdnaDriver::GetDevHeapByteSize() {
  return dev_heap_size;
}

hsa_status_t XdnaDriver::Init() { return InitDeviceHeap(); }

hsa_status_t XdnaDriver::ShutDown() { return FreeDeviceHeap(); }

hsa_status_t XdnaDriver::QueryKernelModeDriver(core::DriverQuery query) {
  switch (query) {
  case core::DriverQuery::GET_DRIVER_VERSION:
    return QueryDriverVersion();
  default:
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}

hsa_status_t XdnaDriver::Open() {
  fd_ = open(devnode_name_.c_str(), O_RDWR | O_CLOEXEC);
  if (fd_ < 0) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::Close() {
  int ret(0);
  if (fd_ > 0) {
    ret = close(fd_);
    fd_ = -1;
  }
  if (ret) {
    return HSA_STATUS_ERROR;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetSystemProperties(HsaSystemProperties& sys_props) const {
  sys_props.NumNodes = 1;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const {
  amdxdna_drm_query_aie_metadata aie_metadata = {};
  amdxdna_drm_get_info get_info_args = {};
  get_info_args.param = DRM_AMDXDNA_QUERY_AIE_METADATA;
  get_info_args.buffer_size = sizeof(aie_metadata);
  get_info_args.buffer = reinterpret_cast<uintptr_t>(&aie_metadata);

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_INFO, &get_info_args) < 0) {
    return HSA_STATUS_ERROR;
  }

  // Right now can only target N-1 columns as that is the number of shim DMAs
  // in NPU1 devices.
  node_props.NumNeuralCores = (aie_metadata.cols - 1) * aie_metadata.core.row_count;
  /// @todo XDNA driver currently only supports single-node AIE
  /// devices over PCIe. Update this once we can get topology
  /// information dynamically from the sysfs.
  node_props.NumIOLinks = 0;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetEdgeProperties(std::vector<HsaIoLinkProperties>& io_link_props,
                                           uint32_t node_id) const {
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetMemoryProperties(uint32_t node_id,
                                             std::vector<HsaMemoryProperties>& mem_props) const {
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetCacheProperties(uint32_t node_id, uint32_t processor_id,
                                            std::vector<HsaCacheProperties>& cache_props) const {
  // AIE currently has no caches.
  return HSA_STATUS_ERROR_INVALID_CACHE;
}

hsa_status_t
XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
                           core::MemoryRegion::AllocateFlags alloc_flags,
                           void **mem, size_t size, uint32_t node_id) {
  const MemoryRegion& m_region = static_cast<const MemoryRegion&>(mem_region);

  if (!m_region.IsSystem()) {
    return HSA_STATUS_ERROR_INVALID_REGION;
  }

  amdxdna_drm_create_bo create_bo_args = {};
  create_bo_args.size = size;
  const bool use_bo_shmem = !m_region.IsDeviceSVM();
  if (use_bo_shmem) {
    create_bo_args.type = AMDXDNA_BO_SHMEM;
  } else {
    create_bo_args.type = AMDXDNA_BO_DEV;
  }

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_bo_args) < 0) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  BOHandle bo_handle;
  bo_handle.handle = create_bo_args.handle;
  bo_handle.size = size;

  // Close the BO in case of error.
  MAKE_NAMED_SCOPE_GUARD(bo_guard, [&] { DestroyBOHandle(bo_handle); });

  amdxdna_drm_get_bo_info get_bo_info_args = {};
  get_bo_info_args.handle = create_bo_args.handle;
  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info_args) < 0) {
    return HSA_STATUS_ERROR;
  }

  /// TODO: For now we always map the memory and keep a mapping from handles
  /// to VA memory addresses. Once we can support the separate VMEM call to
  /// map handles we can fix this.
  if (use_bo_shmem) {
    bo_handle.vaddr =
        mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, get_bo_info_args.map_offset);
    if (bo_handle.vaddr == MAP_FAILED) {
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
  } else {
    bo_handle.vaddr = reinterpret_cast<void*>(get_bo_info_args.vaddr);
  }

  if (alloc_flags & core::MemoryRegion::AllocateMemoryOnly) {
    *mem = reinterpret_cast<void *>(create_bo_args.handle);
  } else {
    *mem = bo_handle.vaddr;
  }

  vmem_handle_mappings.emplace(bo_handle.handle, bo_handle.vaddr);
  vmem_addr_mappings.emplace(bo_handle.vaddr, bo_handle);

  bo_guard.Dismiss();

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::FreeMemory(void *mem, size_t size) {
  auto it = vmem_addr_mappings.find(mem);
  if (it == vmem_addr_mappings.end()) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  auto handle = it->second.handle;

  drm_gem_close close_args = {};
  close_args.handle = handle;
  if (ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_args) < 0) {
    return HSA_STATUS_ERROR;
  }

  vmem_handle_mappings.erase(handle);
  vmem_addr_mappings.erase(it);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct,
                                     HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id,
                                     void* queue_addr, uint64_t queue_size_bytes, HsaEvent* event,
                                     HsaQueueResource& queue_resource) const {
  queue_resource.QueueId = AMDXDNA_INVALID_CTX_HANDLE;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::DestroyQueue(HSA_QUEUEID queue_id) const {
  if (queue_id == AMDXDNA_INVALID_CTX_HANDLE) {
    return HSA_STATUS_ERROR_INVALID_QUEUE;
  }

  auto hw_ctx_handle = static_cast<uint32_t>(queue_id);
  amdxdna_drm_destroy_hwctx destroy_hwctx_args = {};
  destroy_hwctx_args.handle = hw_ctx_handle;

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_DESTROY_HWCTX, &destroy_hwctx_args) < 0) {
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_pct,
                                     HSA_QUEUE_PRIORITY priority, void* queue_addr,
                                     uint64_t queue_size, HsaEvent* event) const {
  // AIE doesn't support queue updates.
  return HSA_STATUS_ERROR_INVALID_QUEUE;
}

hsa_status_t XdnaDriver::SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t cu_mask_count,
                                        uint32_t* queue_cu_mask) const {
  // AIE doesn't support queue CU masks.
  return HSA_STATUS_ERROR_INVALID_QUEUE;
}

hsa_status_t XdnaDriver::AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_gws,
                                       uint32_t* first_gws) const {
  // AIE doesn't support GWS.
  return HSA_STATUS_ERROR_INVALID_QUEUE;
}

hsa_status_t XdnaDriver::ExportDMABuf(void* mem, size_t size, int* dmabuf_fd, size_t* offset) {
  auto bo_handle = FindBOHandle(mem);
  if (!bo_handle.IsValid()) {
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  }

  drm_prime_handle export_params = {};
  export_params.handle = bo_handle.handle;
  export_params.flags = DRM_RDWR;
  export_params.fd = -1;
  if (ioctl(fd_, DRM_IOCTL_PRIME_HANDLE_TO_FD, &export_params) < 0) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  *dmabuf_fd = export_params.fd;
  *offset = reinterpret_cast<uintptr_t>(mem) - reinterpret_cast<uintptr_t>(bo_handle.vaddr);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::ImportDMABuf(int dmabuf_fd, core::Agent &agent,
                                      core::ShareableHandle &handle) {
  drm_prime_handle import_params = {};
  import_params.handle = AMDXDNA_INVALID_BO_HANDLE;
  import_params.fd = dmabuf_fd;
  if (ioctl(fd_, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_params) < 0)
    return HSA_STATUS_ERROR;

  handle.handle = import_params.handle;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::Map(core::ShareableHandle handle, void *mem,
                             size_t offset, size_t size,
                             hsa_access_permission_t perms) {
  // Get fd associated with the handle.
  drm_prime_handle params = {};
  params.handle = handle.handle;
  params.fd = -1;
  if (ioctl(fd_, DRM_IOCTL_PRIME_HANDLE_TO_FD, &params) < 0)
    return HSA_STATUS_ERROR;

  // Change permissions.
  void *mapped_ptr = mmap(mem, size, PermissionsToMmapFlags(perms),
                          MAP_FIXED | MAP_SHARED, params.fd, offset);
  if (mapped_ptr == MAP_FAILED)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::Unmap(core::ShareableHandle handle, void *mem,
                               size_t offset, size_t size) {
  if (munmap(mem, size) != 0)
    return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::ReleaseShareableHandle(core::ShareableHandle &handle) {
  drm_gem_close close_params = {};
  close_params.handle = handle.handle;
  if (ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_params) < 0)
    return HSA_STATUS_ERROR;

  handle = {};

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::QueryDriverVersion() {
  amdxdna_drm_query_aie_version aie_version{0, 0};
  amdxdna_drm_get_info args{DRM_AMDXDNA_QUERY_AIE_VERSION, sizeof(aie_version),
                            reinterpret_cast<uintptr_t>(&aie_version)};

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_INFO, &args) < 0) {
    return HSA_STATUS_ERROR;
  }

  version_.KernelInterfaceMajorVersion = aie_version.major;
  version_.KernelInterfaceMinorVersion = aie_version.minor;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::InitDeviceHeap() {
  amdxdna_drm_create_bo create_bo_args = {};
  create_bo_args.size = dev_heap_size;
  create_bo_args.type = AMDXDNA_BO_DEV_HEAP;
  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_bo_args) < 0) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  dev_heap_handle.handle = create_bo_args.handle;

  // Unmap memory and close the BO in case of error.
  MAKE_NAMED_SCOPE_GUARD(dev_heap_handle_guard, [&] { DestroyBOHandle(dev_heap_handle); });

  amdxdna_drm_get_bo_info get_bo_info_args = {};
  get_bo_info_args.handle = dev_heap_handle.handle;
  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info_args) < 0) {
    return HSA_STATUS_ERROR;
  }

  const size_t size = dev_heap_align * 2 - 1;
  dev_heap_handle.vaddr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  if (dev_heap_handle.vaddr == MAP_FAILED) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  dev_heap_handle.size = size;

  void* addr_aligned = reinterpret_cast<void*>(
      AlignUp(reinterpret_cast<uintptr_t>(dev_heap_handle.vaddr), dev_heap_align));

  dev_heap_aligned =
      mmap(addr_aligned, dev_heap_size, PROT_READ | PROT_WRITE,
           MAP_SHARED | MAP_FIXED, fd_, get_bo_info_args.map_offset);
  if (dev_heap_aligned == MAP_FAILED) {
    dev_heap_aligned = nullptr;
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  dev_heap_handle_guard.Dismiss();

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::FreeDeviceHeap() {
  hsa_status_t status = HSA_STATUS_SUCCESS;

  if (dev_heap_aligned) {
    if (munmap(dev_heap_aligned, dev_heap_size) != 0) {
      status = HSA_STATUS_ERROR;
    }
    dev_heap_aligned = nullptr;
  }

  if (dev_heap_handle.IsValid()) {
    if (munmap(dev_heap_handle.vaddr, dev_heap_handle.size) != 0) {
      status = HSA_STATUS_ERROR;
    }
    drm_gem_close close_bo_args = {};
    close_bo_args.handle = dev_heap_handle.handle;
    ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
    dev_heap_handle = BOHandle{};
  }

  return status;
}

hsa_status_t XdnaDriver::ExecCmdAndWait(const BOHandle& cmd_chain_bo_handle,
                                        const std::vector<uint32_t>& bo_handles,
                                        HSA_QUEUEID queue_id) {
  if (queue_id == AMDXDNA_INVALID_CTX_HANDLE) {
    return HSA_STATUS_ERROR_INVALID_QUEUE;
  }

  auto hw_ctx_handle = static_cast<uint32_t>(queue_id);
  // Submit command chain.
  amdxdna_drm_exec_cmd exec_cmd = {};
  exec_cmd.hwctx = hw_ctx_handle;
  exec_cmd.type = AMDXDNA_CMD_SUBMIT_EXEC_BUF;
  exec_cmd.cmd_handles = cmd_chain_bo_handle.handle;
  exec_cmd.args = reinterpret_cast<uint64_t>(bo_handles.data());
  exec_cmd.cmd_count = 1;
  exec_cmd.arg_count = bo_handles.size();

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_EXEC_CMD, &exec_cmd) < 0) return HSA_STATUS_ERROR;

  // Waiting for command chain to finish.
  amdxdna_drm_wait_cmd wait_cmd = {};
  wait_cmd.hwctx = hw_ctx_handle;
  wait_cmd.timeout = DEFAULT_TIMEOUT_VAL;
  wait_cmd.seq = exec_cmd.seq;

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_WAIT_CMD, &wait_cmd) < 0) return HSA_STATUS_ERROR;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::PrepareBOs(uint32_t count,
                                    hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload,
                                    std::vector<uint32_t>& bo_handles) {
  const uint64_t instr_addr =
      Concat<uint64_t>(cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX + 1],
                       cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX]);
  auto instr_bo_handle = FindBOHandle(reinterpret_cast<void*>(instr_addr));
  if (!instr_bo_handle.IsValid()) {
    return HSA_STATUS_ERROR;
  }

  // Keep track of the instruction sequence BO.
  bo_handles.push_back(instr_bo_handle.handle);

  // Flush the instruction sequence. The packet contains the number of instructions.
  const uint32_t instr_bo_size =
      cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_SIZE_IDX] * INSTR_SIZE_BYTES;
  FlushCpuCache(reinterpret_cast<void*>(instr_addr), 0, instr_bo_size);

  // Going through all of the operands in the command, keeping track of the
  // addresses and turning the addresses into handles. The starting index of
  // the operands in a command is `operand_starting_index` and the fields
  // are 32-bits we need to iterate over every two
  const uint32_t num_operands = GetOperandCount(count);
  bo_handles.reserve(num_operands);
  for (uint32_t operand_iter = 0; operand_iter < num_operands; operand_iter++) {
    const uint32_t operand_index = operand_starting_index + 2 * operand_iter;
    const uint64_t operand_addr = Concat<uint64_t>(cmd_pkt_payload->data[operand_index + 1],
                                                   cmd_pkt_payload->data[operand_index]);
    auto operand_bo_handle = FindBOHandle(reinterpret_cast<void*>(operand_addr));
    if (!operand_bo_handle.IsValid()) {
      return HSA_STATUS_ERROR;
    }

    // Keep track of the operand BO.
    bo_handles.push_back(operand_bo_handle.handle);

    // Flush the operand.
    const uint32_t operand_size_starting_index = operand_starting_index + 2 * num_operands;
    const uint32_t operand_bo_size =
        cmd_pkt_payload->data[operand_size_starting_index + operand_iter];
    FlushCpuCache(reinterpret_cast<void*>(operand_addr), 0, operand_bo_size);
  }

  // Transform the instruction sequence address into device address
  cmd_pkt_payload->data[CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX] =
      DEV_ADDR_BASE | (instr_addr & DEV_ADDR_OFFSET_MASK);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::CreateCmdBO(uint32_t size, BOHandle& cmd_bo_handle) {
  amdxdna_drm_create_bo create_cmd_bo = {};
  create_cmd_bo.type = AMDXDNA_BO_CMD;
  create_cmd_bo.size = size;
  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_cmd_bo) < 0) {
    return HSA_STATUS_ERROR;
  }

  // Close the BO in case of error.
  MAKE_NAMED_SCOPE_GUARD(cmd_bo_handle_guard, [&] {
    drm_gem_close close_bo_args = {};
    close_bo_args.handle = create_cmd_bo.handle;
    ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
  });

  amdxdna_drm_get_bo_info cmd_bo_get_bo_info = {};
  cmd_bo_get_bo_info.handle = create_cmd_bo.handle;
  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &cmd_bo_get_bo_info) < 0) {
    return HSA_STATUS_ERROR;
  }

  void* mem = static_cast<amdxdna_cmd*>(mmap(nullptr, create_cmd_bo.size, PROT_READ | PROT_WRITE,
                                             MAP_SHARED, fd_, cmd_bo_get_bo_info.map_offset));
  if (mem == MAP_FAILED) {
    return HSA_STATUS_ERROR;
  }

  cmd_bo_handle = BOHandle{mem, create_cmd_bo.handle, size};

  cmd_bo_handle_guard.Dismiss();

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::SubmitCmdChain(hsa_amd_aie_ert_packet_t* first_pkt, uint32_t num_pkts,
                                        HSA_QUEUEID& queue_id, uint32_t num_core_tiles) {
  // Stores instruction and operand BOs.
  std::vector<uint32_t> bo_handles;

  // Stores commands that we are going to submit and the corresponding metadata.
  std::vector<BOHandle> cmd_bo_handles;
  cmd_bo_handles.reserve(num_pkts);
  // Unmap and close the command BOs in case of an error.
  MAKE_NAMED_SCOPE_GUARD(cmd_bo_handles_guard, [&] {
    for (auto& bo_handle : cmd_bo_handles) {
      DestroyBOHandle(bo_handle);
    }
  });

  auto hw_ctx_handle = static_cast<uint32_t>(queue_id);
  // PDI cache. If the cache is updated, a new hardware context will be created for the queue.
  auto pdi_cache_it = hw_ctx_pdi_cache_map.find(hw_ctx_handle);
  auto pdi_cache = (pdi_cache_it != hw_ctx_pdi_cache_map.end()) ? pdi_cache_it->second : PDICache{};
  bool reconfigure_queue = false;

  // Iterating over all the contiguous HSA_AMD_AIE_ERT_CMD_CHAIN packets
  for (uint32_t pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) {
    // Getting the current command packet
    hsa_amd_aie_ert_packet_t* pkt = first_pkt + pkt_iter;
    hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload =
        reinterpret_cast<hsa_amd_aie_ert_start_kernel_data_t*>(pkt->payload_data);

    // Add the handles for all of the BOs to bo_handles as well as rewrite
    // the instruction handle to contain the device address
    hsa_status_t status = PrepareBOs(pkt->count, cmd_pkt_payload, bo_handles);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }

    // Creating a packet that contains the command to execute the kernel
    const uint32_t cmd_size = sizeof(amdxdna_cmd) + pkt->count * sizeof(uint32_t);
    BOHandle cmd_bo_handle;
    status = CreateCmdBO(cmd_size, cmd_bo_handle);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }
    // Unmap and close the command BO in case of an error.
    MAKE_NAMED_SCOPE_GUARD(cmd_bo_handle_guard, [&] { DestroyBOHandle(cmd_bo_handle); });

    auto* cmd = static_cast<amdxdna_cmd*>(cmd_bo_handle.vaddr);

    // Filling in the fields of the command
    cmd->state = pkt->state;
    cmd->extra_cu_masks = 0;

    // The driver places a structure before each command in a command chain.
    // Need to increase the size of the command by the size of this structure.
    cmd->count = pkt->count + CMD_COUNT_SIZE_INCREASE;
    cmd->opcode = pkt->opcode;

    // Find if the PDI is cached in the queues PDI cache. If even one PDI is not found, the hardware
    // context will need to be reconfigured and the cache updated.
    auto pdi_bo_handle = FindBOHandle(cmd_pkt_payload->pdi_addr);
    if (!pdi_bo_handle.IsValid()) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

    // Determine if the PDI is cached, if not it will be added to the PDI cache.
    auto cached_pdi_index = pdi_cache.GetIndex(pdi_bo_handle.handle);
    if (cached_pdi_index == PDICache::NotFound) {
      FlushCpuCache(pdi_bo_handle.vaddr, 0, pdi_bo_handle.size);
      status = pdi_cache.SetNext(pdi_bo_handle, cached_pdi_index);
      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }
      reconfigure_queue = true;
    }

    cmd->data[0] = 0x1 << static_cast<uint32_t>(cached_pdi_index);
    memcpy((cmd->data + 1), cmd_pkt_payload->data, 4 * pkt->count);

    // Keeping track of the command
    cmd_bo_handles.push_back(cmd_bo_handle);
    cmd_bo_handle_guard.Dismiss();
  }

  // If there were PDIs that were not cached, the hardware context needs to be reconfigured.
  // The cache map will be update with the new hardware context.
  if (reconfigure_queue) {
    if (pdi_cache_it != hw_ctx_pdi_cache_map.end()) {
      hw_ctx_pdi_cache_map.erase(pdi_cache_it);
    }

    hsa_status_t status = ConfigHwCtx(pdi_cache, queue_id, num_core_tiles);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }

    // Update cache mapping.
    hw_ctx_pdi_cache_map.emplace(hw_ctx_handle, pdi_cache);
  }

  // Creating a packet that contains the command chain
  const uint32_t cmd_chain_size = (cmd_bo_handles.size() + 1) * sizeof(uint32_t);
  BOHandle cmd_chain_bo_handle;
  hsa_status_t status = CreateCmdBO(cmd_chain_size, cmd_chain_bo_handle);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }
  // Unmap and close the command chain BO in case of an error.
  MAKE_NAMED_SCOPE_GUARD(cmd_chain_bo_handle_guard, [&] { DestroyBOHandle(cmd_chain_bo_handle); });

  auto* cmd_chain = static_cast<amdxdna_cmd*>(cmd_chain_bo_handle.vaddr);

  // Writing information to the command buffer
  amdxdna_cmd_chain* cmd_chain_payload = reinterpret_cast<amdxdna_cmd_chain*>(cmd_chain->data);

  // Creating a command chain
  cmd_chain->state = HSA_AMD_AIE_ERT_STATE_NEW;
  cmd_chain->extra_cu_masks = 0;
  cmd_chain->count = sizeof(amdxdna_cmd_chain) + cmd_bo_handles.size() * sizeof(uint64_t);
  cmd_chain->opcode = HSA_AMD_AIE_ERT_CMD_CHAIN;
  cmd_chain_payload->command_count = cmd_bo_handles.size();
  cmd_chain_payload->submit_index = 0;
  cmd_chain_payload->error_index = 0;
  for (size_t i = 0; i < cmd_bo_handles.size(); i++) {
    cmd_chain_payload->data[i] = cmd_bo_handles[i].handle;
  }

  // Removing duplicates in the bo container. The driver will report
  // an error if we provide the same BO handle multiple times.
  // This can happen if any of the BOs are the same across jobs
  std::sort(bo_handles.begin(), bo_handles.end());
  bo_handles.erase(std::unique(bo_handles.begin(), bo_handles.end()), bo_handles.end());

  // Executing all commands in the command chain
  status = ExecCmdAndWait(cmd_chain_bo_handle, bo_handles, queue_id);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  for (uint32_t pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) {
    hsa_amd_aie_ert_packet_t* pkt = first_pkt + pkt_iter;
    auto* cmd_pkt_payload =
        reinterpret_cast<hsa_amd_aie_ert_start_kernel_data_t*>(pkt->payload_data);
    FlushOperands(pkt->count, cmd_pkt_payload);
  }

  // Unmapping and closing the cmd BOs
  cmd_bo_handles_guard.Dismiss();
  for (auto& command_bo_handle : cmd_bo_handles) {
    if (munmap(command_bo_handle.vaddr, command_bo_handle.size) != 0) {
      status = HSA_STATUS_ERROR;
    }
    drm_gem_close close_bo_args = {};
    close_bo_args.handle = command_bo_handle.handle;
    ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
  }

  // Unmapping and closing the cmd_chain BO
  cmd_chain_bo_handle_guard.Dismiss();
  if (munmap(cmd_chain, cmd_chain_size) != 0) {
    status = HSA_STATUS_ERROR;
  }
  drm_gem_close close_bo_args = {};
  close_bo_args.handle = cmd_chain_bo_handle.handle;
  ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);

  return status;
}

hsa_status_t XdnaDriver::SPMAcquire(uint32_t preferred_node_id) const {
  // AIE does not support streaming performance monitor.
  return HSA_STATUS_ERROR_INVALID_AGENT;
}

hsa_status_t XdnaDriver::SPMRelease(uint32_t preferred_node_id) const {
  // AIE does not support streaming performance monitor.
  return HSA_STATUS_ERROR_INVALID_AGENT;
};

hsa_status_t XdnaDriver::SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes,
                                          uint32_t* timeout, uint32_t* size_copied,
                                          void* dest_mem_addr, bool* is_spm_data_loss) const {
  // AIE does not support streaming performance monitor.
  return HSA_STATUS_ERROR_INVALID_AGENT;
}

hsa_status_t XdnaDriver::IsModelEnabled(bool* enable) const {
  // AIE does not support a driver model.
  *enable = false;
  return HSA_STATUS_SUCCESS;
}

void XdnaDriver::DestroyBOHandle(BOHandle& handle) {
  munmap(handle.vaddr, handle.size);
  drm_gem_close close_bo_args = {};
  close_bo_args.handle = handle.handle;
  ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
  handle = {};
}

XdnaDriver::BOHandle XdnaDriver::FindBOHandle(void* mem) const {
  auto it = vmem_addr_mappings.lower_bound(mem);
  if (it == vmem_addr_mappings.cend()) {
    // Exact address not found or is larger than the largest address.
    return BOHandle{};
  }

  if (it->first == mem) {
    // Exact address found.
    return it->second;
  }

  if (it == vmem_addr_mappings.cbegin()) {
    // Address is smaller than the smallest registered address.
    return BOHandle{};
  }

  // Go back one element, since lower_bound returns an iterator to the element that is equal or
  // greater.
  --it;

  assert(it->first < mem);
  if (mem >= (static_cast<char*>(it->first) + it->second.size)) {
    // Address is not from this allocation.
    return BOHandle{};
  }

  return it->second;
}

hsa_status_t XdnaDriver::ConfigHwCtx(const PDICache& pdi_bo_handles, HSA_QUEUEID& queue_id,
                                     uint32_t num_core_tiles) {
  const size_t config_cu_param_size =
      sizeof(amdxdna_hwctx_param_config_cu) + pdi_bo_handles.size() * sizeof(amdxdna_cu_config);

  auto* xdna_config_cu_param =
      static_cast<amdxdna_hwctx_param_config_cu*>(malloc(config_cu_param_size));
  if (xdna_config_cu_param == nullptr) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  MAKE_SCOPE_GUARD([xdna_config_cu_param] { free(xdna_config_cu_param); });

  xdna_config_cu_param->num_cus = pdi_bo_handles.size();

  for (size_t i = 0; i < pdi_bo_handles.size(); i++) {
    xdna_config_cu_param->cu_configs[i].cu_bo = pdi_bo_handles[i].handle;
    xdna_config_cu_param->cu_configs[i].cu_func = default_cu_func;
  }

  auto hw_ctx_handle = static_cast<uint32_t>(queue_id);

  if (hw_ctx_handle != AMDXDNA_INVALID_CTX_HANDLE) {
    // Destroy the hardware context
    // Note: we can do this because we have forced synchronization between
    // command chains. If we move to a more asynchronous model, we will need to
    // figure out how hardware context destruction works while applications
    // are running
    amdxdna_drm_destroy_hwctx destroy_hwctx_args = {};
    destroy_hwctx_args.handle = hw_ctx_handle;
    if (ioctl(fd_, DRM_IOCTL_AMDXDNA_DESTROY_HWCTX, &destroy_hwctx_args) < 0) {
      return HSA_STATUS_ERROR;
    }
    queue_id = AMDXDNA_INVALID_CTX_HANDLE;
  }

  // Create the new hardware context
  // Currently we do not leverage QoS information.
  amdxdna_qos_info qos_info = {};
  amdxdna_drm_create_hwctx create_hwctx_args = {};
  create_hwctx_args.qos_p = reinterpret_cast<uintptr_t>(&qos_info);
  create_hwctx_args.max_opc = 0x800;
  create_hwctx_args.num_tiles = num_core_tiles;

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_CREATE_HWCTX, &create_hwctx_args) < 0) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  // Configure the new hardware context
  amdxdna_drm_config_hwctx config_hw_ctx_args = {};
  config_hw_ctx_args.handle = create_hwctx_args.handle;
  config_hw_ctx_args.param_type = DRM_AMDXDNA_HWCTX_CONFIG_CU;
  config_hw_ctx_args.param_val = reinterpret_cast<uint64_t>(xdna_config_cu_param);
  config_hw_ctx_args.param_val_size = static_cast<uint32_t>(config_cu_param_size);

  if (ioctl(fd_, DRM_IOCTL_AMDXDNA_CONFIG_HWCTX, &config_hw_ctx_args) < 0) {
    return HSA_STATUS_ERROR;
  }

  queue_id = create_hwctx_args.handle;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
                                        const void* buffer_base, uint64_t buffer_base_size) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::GetClockCounters(uint32_t node_id, HsaClockCounters* clock_counter) const {
  return HSA_STATUS_ERROR;
}


hsa_status_t XdnaDriver::GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::AvailableMemory(uint32_t node_id, uint64_t* available_size) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::DeregisterMemory(void* ptr) const { return HSA_STATUS_ERROR; }

hsa_status_t XdnaDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
                                            const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
                                            const uint32_t* nodes) const {
  return HSA_STATUS_ERROR;
}

hsa_status_t XdnaDriver::MakeMemoryUnresident(const void* mem) const { return HSA_STATUS_ERROR; }

} // namespace AMD
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/driver/xdna/uapi/amdxdna_accel.h
================================================
/* SPDX-License-Identifier: NCSA */
/*
 * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
 */

#ifndef AMDXDNA_ACCEL_H_
#define AMDXDNA_ACCEL_H_

#include <libdrm/drm.h>
#include <linux/const.h>
#include <linux/stddef.h>

#if defined(__cplusplus)
extern "C" {
#endif

#ifndef __counted_by
#define __counted_by(cnt)
#endif

#define AMDXDNA_DRIVER_MAJOR 1
#define AMDXDNA_DRIVER_MINOR 0

#define AMDXDNA_INVALID_ADDR (~0UL)
#define AMDXDNA_INVALID_CTX_HANDLE 0
#define AMDXDNA_INVALID_BO_HANDLE 0
#define AMDXDNA_INVALID_FENCE_HANDLE 0

/*
 * The interface can grow/extend over time.
 * On each struct amdxdna_drm_*, to support potential extension, we defined it
 * like this.
 *
 * Example code:
 *
 * struct amdxdna_drm_example_data {
 *	.ext = (uintptr_t)&example_data_ext;
 *	...
 * };
 *
 * We don't have extension now. The extension struct will define in the future.
 */

enum amdxdna_drm_ioctl_id {
  DRM_AMDXDNA_CREATE_HWCTX,
  DRM_AMDXDNA_DESTROY_HWCTX,
  DRM_AMDXDNA_CONFIG_HWCTX,
  DRM_AMDXDNA_CREATE_BO,
  DRM_AMDXDNA_GET_BO_INFO,
  DRM_AMDXDNA_SYNC_BO,
  DRM_AMDXDNA_EXEC_CMD,
  DRM_AMDXDNA_GET_INFO,
  DRM_AMDXDNA_SET_STATE,
  DRM_AMDXDNA_WAIT_CMD,
  DRM_AMDXDNA_NUM_IOCTLS
};

enum amdxdna_device_type {
  AMDXDNA_DEV_TYPE_UNKNOWN = -1,
  AMDXDNA_DEV_TYPE_KMQ,
  AMDXDNA_DEV_TYPE_UMQ,
};

/**
 * struct qos_info - QoS information for driver.
 * @gops: Giga operations per second.
 * @fps: Frames per second.
 * @dma_bandwidth: DMA bandwidtha.
 * @latency: Frame response latency.
 * @frame_exec_time: Frame execution time.
 * @priority: Request priority.
 *
 * User program can provide QoS hints to driver.
 */
struct amdxdna_qos_info {
  __u32 gops;
  __u32 fps;
  __u32 dma_bandwidth;
  __u32 latency;
  __u32 frame_exec_time;
  __u32 priority;
};

/**
 * struct amdxdna_drm_create_hwctx - Create hardware context.
 * @ext: MBZ.
 * @ext_flags: MBZ.
 * @qos_p: Address of QoS info.
 * @umq_bo: BO handle for user mode queue(UMQ).
 * @log_buf_bo: BO handle for log buffer.
 * @max_opc: Maximum operations per cycle.
 * @num_tiles: Number of AIE tiles.
 * @mem_size: Size of AIE tile memory.
 * @umq_doorbell: Returned offset of doorbell associated with UMQ.
 * @handle: Returned hardware context handle.
 * @pad: Structure padding.
 */
struct amdxdna_drm_create_hwctx {
  __u64 ext;
  __u64 ext_flags;
  __u64 qos_p;
  __u32 umq_bo;
  __u32 log_buf_bo;
  __u32 max_opc;
  __u32 num_tiles;
  __u32 mem_size;
  __u32 umq_doorbell;
  __u32 handle;
  __u32 pad;
};

/**
 * struct amdxdna_drm_destroy_hwctx - Destroy hardware context.
 * @handle: Hardware context handle.
 * @pad: Structure padding.
 */
struct amdxdna_drm_destroy_hwctx {
  __u32 handle;
  __u32 pad;
};

/**
 * struct amdxdna_cu_config - configuration for one CU
 * @cu_bo: CU configuration buffer bo handle.
 * @cu_func: Function of a CU.
 * @pad: Structure padding.
 */
struct amdxdna_cu_config {
  __u32 cu_bo;
  __u8 cu_func;
  __u8 pad[3];
};

/**
 * struct amdxdna_hwctx_param_config_cu - configuration for CUs in hardware
 * context
 * @num_cus: Number of CUs to configure.
 * @pad: Structure padding.
 * @cu_configs: Array of CU configurations of struct amdxdna_cu_config.
 */
struct amdxdna_hwctx_param_config_cu {
  __u16 num_cus;
  __u16 pad[3];
  struct amdxdna_cu_config cu_configs[] __counted_by(num_cus);
};

enum amdxdna_drm_config_hwctx_param {
  DRM_AMDXDNA_HWCTX_CONFIG_CU,
  DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF,
  DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF,
  DRM_AMDXDNA_HWCTX_CONFIG_NUM
};

/**
 * struct amdxdna_drm_config_hwctx - Configure hardware context.
 * @handle: hardware context handle.
 * @param_type: Value in enum amdxdna_drm_config_hwctx_param. Specifies the
 *              structure passed in via param_val.
 * @param_val: A structure specified by the param_type struct member.
 * @param_val_size: Size of the parameter buffer pointed to by the param_val.
 *		    If param_val is not a pointer, driver can ignore this.
 * @pad: Structure padding.
 *
 * Note: if the param_val is a pointer pointing to a buffer, the maximum size
 * of the buffer is 4KiB(PAGE_SIZE).
 */
struct amdxdna_drm_config_hwctx {
  __u32 handle;
  __u32 param_type;
  __u64 param_val;
  __u32 param_val_size;
  __u32 pad;
};

/*
 * AMDXDNA_BO_SHMEM:	DRM GEM SHMEM bo
 * AMDXDNA_BO_DEV_HEAP: Shared host memory to device as heap memory
 * AMDXDNA_BO_DEV_BO:	Allocated from BO_DEV_HEAP
 * AMDXDNA_BO_CMD:	User and driver accessible bo
 * AMDXDNA_BO_DMA:	DRM GEM DMA bo
 */
enum amdxdna_bo_type {
  AMDXDNA_BO_INVALID = 0,
  AMDXDNA_BO_SHMEM,
  AMDXDNA_BO_DEV_HEAP,
  AMDXDNA_BO_DEV,
  AMDXDNA_BO_CMD,
  AMDXDNA_BO_DMA,
};

/**
 * struct amdxdna_drm_create_bo - Create a buffer object.
 * @flags: Buffer flags. MBZ.
 * @vaddr: User VA of buffer if applied. MBZ.
 * @size: Size in bytes.
 * @type: Buffer type.
 * @handle: Returned DRM buffer object handle.
 */
struct amdxdna_drm_create_bo {
  __u64 flags;
  __u64 vaddr;
  __u64 size;
  __u32 type;
  __u32 handle;
};

/**
 * struct amdxdna_drm_get_bo_info - Get buffer object information.
 * @ext: MBZ.
 * @ext_flags: MBZ.
 * @handle: DRM buffer object handle.
 * @pad: Structure padding.
 * @map_offset: Returned DRM fake offset for mmap().
 * @vaddr: Returned user VA of buffer. 0 in case user needs mmap().
 * @xdna_addr: Returned XDNA device virtual address.
 */
struct amdxdna_drm_get_bo_info {
  __u64 ext;
  __u64 ext_flags;
  __u32 handle;
  __u32 pad;
  __u64 map_offset;
  __u64 vaddr;
  __u64 xdna_addr;
};

/**
 * struct amdxdna_drm_sync_bo - Sync buffer object.
 * @handle: Buffer object handle.
 * @direction: Direction of sync, can be from device or to device.
 * @offset: Offset in the buffer to sync.
 * @size: Size in bytes.
 */
struct amdxdna_drm_sync_bo {
  __u32 handle;
#define SYNC_DIRECT_TO_DEVICE 0U
#define SYNC_DIRECT_FROM_DEVICE 1U
  __u32 direction;
  __u64 offset;
  __u64 size;
};

enum amdxdna_cmd_type {
  AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0,
  AMDXDNA_CMD_SUBMIT_DEPENDENCY,
  AMDXDNA_CMD_SUBMIT_SIGNAL,
};

/**
 * struct amdxdna_drm_exec_cmd - Execute command.
 * @ext: MBZ.
 * @ext_flags: MBZ.
 * @hwctx: Hardware context handle.
 * @type: One of command type in enum amdxdna_cmd_type.
 * @cmd_handles: Array of command handles or the command handle itself
 * in case of just one.
 * @args: Array of arguments for all command handles.
 * @cmd_count: Number of command handles in the cmd_handles array.
 * @arg_count: Number of arguments in the args array.
 * @seq: Returned sequence number for this command.
 */
struct amdxdna_drm_exec_cmd {
  __u64 ext;
  __u64 ext_flags;
  __u32 hwctx;
  __u32 type;
  __u64 cmd_handles;
  __u64 args;
  __u32 cmd_count;
  __u32 arg_count;
  __u64 seq;
};

/**
 * struct amdxdna_drm_wait_cmd - Wait exectuion command.
 *
 * @hwctx: hardware context handle.
 * @timeout: timeout in ms, 0 implies infinite wait.
 * @seq: sequence number of the command returned by execute command.
 *
 * Wait a command specified by seq to be completed.
 */
struct amdxdna_drm_wait_cmd {
  __u32 hwctx;
  __u32 timeout;
  __u64 seq;
};

/**
 * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware
 * @buffer: The user space buffer that will return the AIE status.
 * @buffer_size: The size of the user space buffer.
 * @cols_filled: A bitmap of AIE columns whose data has been returned in the buffer.
 */
struct amdxdna_drm_query_aie_status {
  __u64 buffer;      /* out */
  __u32 buffer_size; /* in */
  __u32 cols_filled; /* out */
};

/**
 * struct amdxdna_drm_query_aie_version - Query the version of the AIE hardware
 * @major: The major version number.
 * @minor: The minor version number.
 */
struct amdxdna_drm_query_aie_version {
  __u32 major; /* out */
  __u32 minor; /* out */
};

/**
 * struct amdxdna_drm_query_aie_tile_metadata - Query the metadata of AIE tile
 * (core, mem, shim)
 * @row_count: The number of rows.
 * @row_start: The starting row number.
 * @dma_channel_count: The number of dma channels.
 * @lock_count: The number of locks.
 * @event_reg_count: The number of events.
 * @pad: Structure padding.
 */
struct amdxdna_drm_query_aie_tile_metadata {
  __u16 row_count;
  __u16 row_start;
  __u16 dma_channel_count;
  __u16 lock_count;
  __u16 event_reg_count;
  __u16 pad[3];
};

/**
 * struct amdxdna_drm_query_aie_metadata - Query the metadata of the AIE hardware
 * @col_size: The size of a column in bytes.
 * @cols: The total number of columns.
 * @rows: The total number of rows.
 * @version: The version of the AIE hardware.
 * @core: The metadata for all core tiles.
 * @mem: The metadata for all mem tiles.
 * @shim: The metadata for all shim tiles.
 */
struct amdxdna_drm_query_aie_metadata {
  __u32 col_size;
  __u16 cols;
  __u16 rows;
  struct amdxdna_drm_query_aie_version version;
  struct amdxdna_drm_query_aie_tile_metadata core;
  struct amdxdna_drm_query_aie_tile_metadata mem;
  struct amdxdna_drm_query_aie_tile_metadata shim;
};

/**
 * struct amdxdna_drm_query_clock - Metadata for a clock
 * @name: The clock name.
 * @freq_mhz: The clock frequency.
 * @pad: Structure padding.
 */
struct amdxdna_drm_query_clock {
  __u8 name[16];
  __u32 freq_mhz;
  __u32 pad;
};

/**
 * struct amdxdna_drm_query_clock_metadata - Query metadata for clocks
 * @mp_npu_clock: The metadata for MP-NPU clock.
 * @h_clock: The metadata for H clock.
 */
struct amdxdna_drm_query_clock_metadata {
  struct amdxdna_drm_query_clock mp_npu_clock;
  struct amdxdna_drm_query_clock h_clock;
};

enum amdxdna_sensor_type { AMDXDNA_SENSOR_TYPE_POWER };

/**
 * struct amdxdna_drm_query_sensor - The data for single sensor.
 * @label: The name for a sensor.
 * @input: The current value of the sensor.
 * @max: The maximum value possible for the sensor.
 * @average: The average value of the sensor.
 * @highest: The highest recorded sensor value for this driver load for the sensor.
 * @status: The sensor status.
 * @units: The sensor units.
 * @unitm: Translates value member variables into the correct unit via (pow(10, unitm) * value).
 * @type: The sensor type from enum amdxdna_sensor_type.
 * @pad: Structure padding.
 */
struct amdxdna_drm_query_sensor {
  __u8 label[64];
  __u32 input;
  __u32 max;
  __u32 average;
  __u32 highest;
  __u8 status[64];
  __u8 units[16];
  __s8 unitm;
  __u8 type;
  __u8 pad[6];
};

/**
 * struct amdxdna_drm_query_hwctx - The data for single context.
 * @context_id: The ID for this context.
 * @start_col: The starting column for the partition assigned to this context.
 * @num_col: The number of columns in the partition assigned to this context.
 * @pad: Structure padding.
 * @pid: The Process ID of the process that created this context.
 * @command_submissions: The number of commands submitted to this context.
 * @command_completions: The number of commands completed by this context.
 * @migrations: The number of times this context has been moved to a different partition.
 * @preemptions: The number of times this context has been preempted by another context in the
 *               same partition.
 * @errors: The errors for this context.
 */
struct amdxdna_drm_query_hwctx {
  __u32 context_id;
  __u32 start_col;
  __u32 num_col;
  __u32 pad;
  __s64 pid;
  __u64 command_submissions;
  __u64 command_completions;
  __u64 migrations;
  __u64 preemptions;
  __u64 errors;
};

/**
 * struct amdxdna_drm_aie_mem - The data for AIE memory read/write
 * @col:   The AIE column index
 * @row:   The AIE row index
 * @addr:  The AIE memory address to read/write
 * @size:  The size of bytes to read/write
 * @buf_p: The buffer to store read/write data
 *
 * This is used for DRM_AMDXDNA_READ_AIE_MEM and DRM_AMDXDNA_WRITE_AIE_MEM
 * parameters.
 */
struct amdxdna_drm_aie_mem {
  __u32 col;
  __u32 row;
  __u32 addr;
  __u32 size;
  __u64 buf_p;
};

/**
 * struct amdxdna_drm_aie_reg - The data for AIE register read/write
 * @col: The AIE column index
 * @row: The AIE row index
 * @addr: The AIE register address to read/write
 * @val: The value to write or returned value from AIE
 *
 * This is used for DRM_AMDXDNA_READ_AIE_REG and DRM_AMDXDNA_WRITE_AIE_REG
 * parameters.
 */
struct amdxdna_drm_aie_reg {
  __u32 col;
  __u32 row;
  __u32 addr;
  __u32 val;
};

enum amdxdna_power_mode_type {
  POWER_MODE_DEFAULT, /**< Fallback to calculated DPM */
  POWER_MODE_LOW,     /**< Set frequency to lowest DPM */
  POWER_MODE_MEDIUM,  /**< Set frequency to medium DPM */
  POWER_MODE_HIGH,    /**< Set frequency to highest DPM */
  POWER_MODE_TURBO,   /**< More power, more performance */
};

/**
 * struct amdxdna_drm_get_power_mode - Get the power mode of the AIE hardware
 * @power_mode: The sensor type from enum amdxdna_power_mode_type
 * @pad: MBZ.
 */
struct amdxdna_drm_get_power_mode {
  __u8 power_mode;
  __u8 pad[7];
};

/**
 * struct amdxdna_drm_query_firmware_version - Query the version of the firmware
 * @major: The major version number
 * @minor: The minor version number
 * @patch: The patch level version number
 * @build: The build ID
 */
struct amdxdna_drm_query_firmware_version {
  __u32 major; /* out */
  __u32 minor; /* out */
  __u32 patch; /* out */
  __u32 build; /* out */
};

enum amdxdna_drm_get_param {
  DRM_AMDXDNA_QUERY_AIE_STATUS,
  DRM_AMDXDNA_QUERY_AIE_METADATA,
  DRM_AMDXDNA_QUERY_AIE_VERSION,
  DRM_AMDXDNA_QUERY_CLOCK_METADATA,
  DRM_AMDXDNA_QUERY_SENSORS,
  DRM_AMDXDNA_QUERY_HW_CONTEXTS,
  DRM_AMDXDNA_READ_AIE_MEM,
  DRM_AMDXDNA_READ_AIE_REG,
  DRM_AMDXDNA_QUERY_FIRMWARE_VERSION,
  DRM_AMDXDNA_GET_POWER_MODE,
  DRM_AMDXDNA_QUERY_TELEMETRY,
  DRM_AMDXDNA_NUM_GET_PARAM,
};

/**
 * struct amdxdna_drm_get_info - Get some information from the AIE hardware.
 * @param: Value in enum amdxdna_drm_get_param. Specifies the structure passed in the buffer.
 * @buffer_size: Size of the input buffer. Size needed/written by the kernel.
 * @buffer: A structure specified by the param struct member.
 */
struct amdxdna_drm_get_info {
  __u32 param;       /* in */
  __u32 buffer_size; /* in/out */
  __u64 buffer;      /* in/out */
};

/**
 * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware
 * @power_mode: The sensor type from enum amdxdna_power_mode_type
 * @pad: MBZ.
 */
struct amdxdna_drm_set_power_mode {
  __u8 power_mode;
  __u8 pad[7];
};

enum amdxdna_drm_set_param {
  DRM_AMDXDNA_SET_POWER_MODE,
  DRM_AMDXDNA_WRITE_AIE_MEM,
  DRM_AMDXDNA_WRITE_AIE_REG,
  DRM_AMDXDNA_NUM_SET_PARAM,
};

/**
 * struct amdxdna_drm_set_state - Set the state of some component within the AIE hardware.
 * @param: Value in enum amdxdna_drm_set_param. Specifies the structure passed in the buffer.
 * @buffer_size: Size of the input buffer.
 * @buffer: A structure specified by the param struct member.
 */
struct amdxdna_drm_set_state {
  __u32 param;       /* in */
  __u32 buffer_size; /* in */
  __u64 buffer;      /* in */
};

#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX                                                             \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, struct amdxdna_drm_create_hwctx)

#define DRM_IOCTL_AMDXDNA_DESTROY_HWCTX                                                            \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_DESTROY_HWCTX, struct amdxdna_drm_destroy_hwctx)

#define DRM_IOCTL_AMDXDNA_CONFIG_HWCTX                                                             \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CONFIG_HWCTX, struct amdxdna_drm_config_hwctx)

#define DRM_IOCTL_AMDXDNA_CREATE_BO                                                                \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_BO, struct amdxdna_drm_create_bo)

#define DRM_IOCTL_AMDXDNA_GET_BO_INFO                                                              \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_BO_INFO, struct amdxdna_drm_get_bo_info)

#define DRM_IOCTL_AMDXDNA_SYNC_BO                                                                  \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, struct amdxdna_drm_sync_bo)

#define DRM_IOCTL_AMDXDNA_EXEC_CMD                                                                 \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, struct amdxdna_drm_exec_cmd)

#define DRM_IOCTL_AMDXDNA_WAIT_CMD                                                                 \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, struct amdxdna_drm_wait_cmd)

#define DRM_IOCTL_AMDXDNA_GET_INFO                                                                 \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, struct amdxdna_drm_get_info)

#define DRM_IOCTL_AMDXDNA_SET_STATE                                                                \
  DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, struct amdxdna_drm_set_state)

#if defined(__cplusplus)
} /* extern c end */
#endif

#endif /* AMDXDNA_ACCEL_H_ */


================================================
FILE: runtime/hsa-runtime/core/inc/agent.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA runtime C++ interface file.

#ifndef HSA_RUNTME_CORE_INC_AGENT_H_
#define HSA_RUNTME_CORE_INC_AGENT_H_

#include <assert.h>
#include <vector>

#include "core/inc/checked.h"
#include "core/inc/isa.h"
#include "core/inc/memory_region.h"
#include "core/inc/queue.h"
#include "core/util/locks.h"
#include "core/util/utils.h"

namespace rocr {

// Forward declare AMD::MemoryRegion
namespace AMD {
class MemoryRegion;
}

namespace core {
class Driver;
class Signal;

typedef void (*HsaEventCallback)(hsa_status_t status, hsa_queue_t* source,
                                 void* data);

// Agent is intended to be an pure interface class and may be wrapped or
// replaced by tools libraries. All funtions other than Convert, node_id,
// device_type, and public_handle must be virtual.
class Agent : public Checked<0xF6BC25EB17E6F917> {
  friend class rocr::AMD::MemoryRegion;

 public:
  // @brief Convert agent object into hsa_agent_t.
  //
  // @param [in] agent Pointer to an agent.
  //
  // @retval hsa_agent_t
  static __forceinline hsa_agent_t Convert(Agent* agent) {
    const hsa_agent_t agent_handle = {
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(agent))};
    return agent_handle;
  }

  // @brief Convert agent object into const hsa_agent_t.
  //
  // @param [in] agent Pointer to an agent.
  //
  // @retval const hsa_agent_t
  static __forceinline const hsa_agent_t Convert(const Agent* agent) {
    const hsa_agent_t agent_handle = {
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(agent))};
    return agent_handle;
  }

  // @brief Convert hsa_agent_t handle into Agent*.
  //
  // @param [in] agent An hsa_agent_t handle.
  //
  // @retval Agent*
  static __forceinline Agent* Convert(hsa_agent_t agent) {
    return reinterpret_cast<Agent*>(agent.handle);
  }

  // Lightweight RTTI for vendor specific implementations.
  enum DeviceType {
    kAmdGpuDevice = 0,
    kAmdCpuDevice = 1,
    kAmdAieDevice = 2,
    kUnknownDevice = 3
  };

  // @brief Agent class contructor.
  //
  // @param [in] type CPU or GPU or other.
  explicit Agent(Driver &driver, uint32_t node_id, DeviceType type)
      : node_id_(node_id), device_type_(uint32_t(type)), driver_(&driver),
        profiling_enabled_(false), enabled_(false) {
    public_handle_ = Convert(this);
  }

  // @brief Agent class destructor.
  virtual ~Agent() {}

  // @brief Submit DMA copy command to move data from src to dst and wait
  // until it is finished.
  //
  // @details The agent must be able to access @p dst and @p src.
  //
  // @param [in] dst Memory address of the destination.
  // @param [in] src Memory address of the source.
  // @param [in] size Copy size in bytes.
  //
  // @retval HSA_STATUS_SUCCESS The memory copy is finished and successful.
  virtual hsa_status_t DmaCopy(void* dst, const void* src, size_t size) {
    return HSA_STATUS_ERROR;
  }

  // @brief Submit DMA copy command to move data from src to dst. This call
  // does not wait until the copy is finished
  //
  // @details The agent must be able to access @p dst and @p src. Memory copy
  // will be performed after all signals in @p dep_signals have value of 0.
  // On memory copy completion, the value of out_signal is decremented.
  //
  // @param [in] dst Memory address of the destination.
  // @param [in] dst_agent Agent that owns the memory pool associated with @p
  // dst.
  // @param [in] src Memory address of the source.
  // @param [in] src_agent Agent that owns the memory pool associated with @p
  // src.
  // @param [in] size Copy size in bytes.
  // @param [in] dep_signals Array of signal dependency.
  // @param [in] out_signal Completion signal.
  //
  // @retval HSA_STATUS_SUCCESS The memory copy is finished and successful.
  virtual hsa_status_t DmaCopy(void* dst, core::Agent& dst_agent,
                               const void* src, core::Agent& src_agent,
                               size_t size,
                               std::vector<core::Signal*>& dep_signals,
                               core::Signal& out_signal) {
    return HSA_STATUS_ERROR;
  }

  // @brief Submit DMA copy command to move data from src to dst on engine_id.
  // This call does not wait until the copy is finished
  //
  // @details All semantics and params are identical to DmaCopy except for engine_id.
  //
  // @param [in] engine_offset Target engine
  // @param [in] force_copy_on_sdma By default, blit kernel copies are used if
  // dst_agent == src_agent.  Setting this true forces the copy over SDMA1.
  //
  //
  // @retval HSA_STATUS_SUCCESS The memory copy is finished and successful.
  virtual hsa_status_t DmaCopyOnEngine(void* dst, core::Agent& dst_agent,
                               const void* src, core::Agent& src_agent,
                               size_t size,
                               std::vector<core::Signal*>& dep_signals,
                               core::Signal& out_signal,
                               int engine_offset,
                               bool force_copy_on_sdma) {
    return HSA_STATUS_ERROR;
  }

  // @brief Return DMA availability status for copy direction.
  //
  // @param [in] dst_agent Destination agent.
  // @param [in] src_agent Source agent.
  // @param [out] engine_ids_mask Mask of engine ids.
  //
  // @retval HSA_STATUS_SUCCESS DMA engines are available
  // @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES DMA engines are not available
  virtual hsa_status_t DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_agent,
                                     uint32_t *engine_ids_mask) {
    return HSA_STATUS_ERROR;
  }

  // @brief Return DMA availability status for copy direction.
  //
  // @param [in] dst_agent Destination agent.
  // @param [in] src_agent Source agent.
  // @param [out] recommended_ids_mask Mask of recommended engine ids.
  //
  // @retval HSA_STATUS_SUCCESS For mask returned
  virtual hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
                                          uint32_t* recommended_ids_mask) {
    return HSA_STATUS_ERROR;
  }

  // @brief Submit DMA command to set the content of a pointer and wait
  // until it is finished.
  //
  // @details The agent must be able to access @p ptr
  //
  // @param [in] ptr Address of the memory to be set.
  // @param [in] value The value/pattern that will be used to set @p ptr.
  // @param [in] count Number of uint32_t element to be set.
  //
  // @retval HSA_STATUS_SUCCESS The memory fill is finished and successful.
  virtual hsa_status_t DmaFill(void* ptr, uint32_t value, size_t count) {
    return HSA_STATUS_ERROR;
  }

  // @brief Invoke the user provided callback for each region accessible by
  // this agent.
  //
  // @param [in] callback User provided callback function.
  // @param [in] data User provided pointer as input for @p callback.
  //
  // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
  // region returns ::HSA_STATUS_SUCCESS.
  virtual hsa_status_t IterateRegion(
      hsa_status_t (*callback)(hsa_region_t region, void* data),
      void* data) const = 0;

  // @brief Invoke the user provided callback for each isa supported by
  // this agent.
  //
  // @param [in] callback User provided callback function.
  // @param [in] data User provided pointer as input for @p callback.
  //
  // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
  // isa returns ::HSA_STATUS_SUCCESS.
  virtual hsa_status_t IterateSupportedIsas(
      hsa_status_t (*callback)(hsa_isa_t isa, void* data),
      void* data) const = 0;

  // @brief Invoke the callback for each cache useable by this agent.
  virtual hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
                                    void* data) const = 0;

  /// @brief Create queue.
  ///
  /// @param [in] size Number of packets the queue is expected to hold. Must be a
  /// power of 2 greater than 0.
  /// @param [in] queue_type Queue type.
  /// @param [in] flags Flags to specify queue attributes on creation.
  /// @param [in] event_callback Callback invoked for every
  /// asynchronous event related to the newly created queue. May be NULL.The HSA
  /// runtime passes three arguments to the callback : a code identifying the
  /// event that triggered the invocation, a pointer to the queue where the event
  /// originated, and the application data.
  /// @param [in] data Application data that is passed to @p callback.
  /// @param [in] private_segment_size A hint to indicate the maximum expected
  /// private segment usage per work-item, in bytes.
  /// @param [in] group_segment_size A hint to indicate the maximum expected
  /// group segment usage per work-group, in bytes.
  /// @param[out] queue Memory location where the HSA runtime stores a pointer
  /// to the newly created queue.
  ///
  /// @retval HSA_STATUS_SUCCESS The queue has been created successfully.
  virtual hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type, uint64_t flags,
                                   HsaEventCallback event_callback, void* data,
                                   uint32_t private_segment_size, uint32_t group_segment_size,
                                   Queue** queue) = 0;

  // @brief Query the value of an attribute.
  //
  // @param [in] attribute Attribute to query.
  // @param [out] value Pointer to store the value of the attribute.
  //
  // @param HSA_STATUS_SUCCESS @p value has been filled with the value of the
  // attribute.
  virtual hsa_status_t GetInfo(hsa_agent_info_t attribute,
                               void* value) const = 0;

  // @brief Returns an array of regions owned by the agent.
  virtual const std::vector<const core::MemoryRegion*>& regions() const = 0;

  // @brief Returns the ISA's supported by the agent.
  // @details The returned vector is a list of pointers to the supported ISA,
  // ordered from most specific (and performant) to most generic. For CPU
  // and AIE agents, this list will be empty.
  virtual const std::vector<const core::Isa *>& supported_isas() const = 0;

  virtual uint64_t HiveId() const { return 0; }

  // @brief Returns the device type (CPU/GPU/Others).
  __forceinline uint32_t device_type() const { return device_type_; }

  // @brief Returns hsa_agent_t handle exposed to end user.
  //
  // @details Only matters when tools library need to intercept HSA calls.
  __forceinline hsa_agent_t public_handle() const { return public_handle_; }

  // @brief Returns node id associated with this agent.
  __forceinline uint32_t node_id() const { return node_id_; }

  // @brief Returns the driver associated with this agent.
  __forceinline Driver& driver() { return *driver_; }
  __forceinline const Driver& driver() const { return *driver_; }

  // @brief Getter for profiling_enabled_.
  __forceinline bool profiling_enabled() const { return profiling_enabled_; }

  // @brief Setter for profiling_enabled_.
  virtual hsa_status_t profiling_enabled(bool enable) {
    const hsa_status_t stat = EnableDmaProfiling(enable);
    if (HSA_STATUS_SUCCESS == stat) {
      profiling_enabled_ = enable;
    }

    return stat;
  }

  __forceinline bool Enabled() const { return enabled_; }

  __forceinline void Enable() { enabled_ = true; }

  __forceinline void Disable() { enabled_ = false; }

  virtual void Trim() {
    for (auto region : regions()) region->Trim();
  }

  virtual void ReleaseResources() { }

protected:
  // Intention here is to have a polymorphic update procedure for public_handle_
  // which is callable on any Agent* but only from some class dervied from
  // Agent*.  do_set_public_handle should remain protected or private in all
  // derived types.
  static __forceinline void set_public_handle(Agent* agent,
                                              hsa_agent_t handle) {
    agent->do_set_public_handle(handle);
  }

  virtual void do_set_public_handle(hsa_agent_t handle) {
    public_handle_ = handle;
  }

  // @brief Enable profiling of the asynchronous DMA copy. The timestamp
  // of each copy request will be stored in the completion signal structure.
  //
  // @param enable True to enable profiling. False to disable profiling.
  //
  // @retval HSA_STATUS_SUCCESS The profiling is enabled and the
  // timing of subsequent async copy will be measured.
  virtual hsa_status_t EnableDmaProfiling(bool enable) {
    return HSA_STATUS_SUCCESS;
  }

  hsa_agent_t public_handle_;
  std::vector<const core::Isa *> supported_isas_;

 private:
  // @brief Node id.
  const uint32_t node_id_;

  const uint32_t device_type_;

  Driver *driver_;

  bool profiling_enabled_;

  bool enabled_;

  // Used by an Agent's MemoryRegions to ensure serial memory operation on the device.
  // Serial memory operations are needed to ensure, among other things, that allocation failures are
  // due to true OOM conditions and per region caching (Trim and Allocate must be serial and
  // exclusive to ensure this).
  KernelMutex agent_memory_lock_;

  // Forbid copying and moving of this object
  DISALLOW_COPY_AND_ASSIGN(Agent);
};
}  // namespace core
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_aie_agent.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// AMD specific HSA backend.

#ifndef HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_
#define HSA_RUNTIME_CORE_INC_AMD_AIE_AGENT_H_

#include "core/inc/agent.h"
#include "core/inc/runtime.h"

namespace rocr {
namespace AMD {

class AieAgent : public core::Agent {
public:
 /// @brief AIE agent constructor.
 /// @param [in] node Node id.
 /// @param [in] node_props Node properties.
 AieAgent(uint32_t node, const HsaNodeProperties& node_props);

 ~AieAgent();

 hsa_status_t VisitRegion(bool include_peer,
                          hsa_status_t (*callback)(hsa_region_t region, void* data),
                          void* data) const;
 hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region, void* data),
                            void* data) const override;

 hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
                           void* value) const override;

 hsa_status_t IterateSupportedIsas(hsa_status_t (*callback)(hsa_isa_t isa, void* data),
                                   void* data) const override;

 hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override;

 hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type, uint64_t flags,
                          core::HsaEventCallback event_callback, void* data,
                          uint32_t private_segment_size, uint32_t group_segment_size,
                          core::Queue** queue) override;

 /// @brief Override from core::Agent.
 const std::vector<const core::Isa*>& supported_isas() const override { return supported_isas_; }

 const std::vector<const core::MemoryRegion*>& regions() const override { return regions_; }

 /// @brief Getter for the AIE system allocator.
 const std::function<void*(size_t size, size_t align, core::MemoryRegion::AllocateFlags flags)>&
 system_allocator() const {
   return system_allocator_;
 }

  /// @brief Getter for the AIE system deallocator.
  const std::function<void(void*)>& system_deallocator() const { return system_deallocator_; }

  const HsaNodeProperties& properties() const { return node_props_; }

private:
  /// @brief Query the driver to get the region list owned by this agent.
  void InitRegionList();
  /// @brief Setup the memory allocators used by this agent.
  void InitAllocators();

  std::vector<const core::MemoryRegion *> regions_;
  std::function<void *(size_t size, size_t align,
                       core::MemoryRegion::AllocateFlags flags)>
      system_allocator_;


  std::function<void(void*)> system_deallocator_;

  const hsa_profile_t profile_ = HSA_PROFILE_BASE;
  const uint32_t min_aql_size_ = 0x40;
  const uint32_t max_aql_size_ = 0x40;
  const uint32_t max_queues_ = 1;

  const HsaNodeProperties node_props_;
};

} // namespace AMD
} // namespace rocr

#endif // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_
#define HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_

#include <limits>

#include "core/inc/amd_aie_agent.h"
#include "core/inc/queue.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"

namespace rocr {
namespace AMD {

/// @brief Encapsulates HW AIE AQL Command Processor functionality. It
/// provides the interface for things such as doorbells, queue read and
/// write pointers, and a buffer.
class AieAqlQueue : public core::Queue,
                    private core::LocalSignal,
                    core::DoorbellSignal {
 public:
  static __forceinline bool IsType(core::Signal *signal) {
    return signal->IsType(&rtti_id());
  }

  static __forceinline bool IsType(core::Queue *queue) {
    return queue->IsType(&rtti_id());
  }

  AieAqlQueue(core::SharedQueue* shared_queue, AieAgent* agent, size_t req_size_pkts,
              uint32_t node_id, uint64_t flags);
  ~AieAqlQueue();

  hsa_status_t Inactivate() override;
  hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override;
  void Destroy() override;
  uint64_t LoadReadIndexRelaxed() override;
  uint64_t LoadReadIndexAcquire() override;
  uint64_t LoadWriteIndexRelaxed() override;
  uint64_t LoadWriteIndexAcquire() override;
  void StoreReadIndexRelaxed(uint64_t value) override { assert(false); }
  void StoreReadIndexRelease(uint64_t value) override { assert(false); }
  void StoreWriteIndexRelaxed(uint64_t value) override;
  void StoreWriteIndexRelease(uint64_t value) override;
  uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override;
  uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override;
  uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override;
  uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override;
  uint64_t AddWriteIndexRelaxed(uint64_t value) override;
  uint64_t AddWriteIndexAcquire(uint64_t value) override;
  uint64_t AddWriteIndexRelease(uint64_t value) override;
  uint64_t AddWriteIndexAcqRel(uint64_t value) override;
  void StoreRelaxed(hsa_signal_value_t value) override;
  void StoreRelease(hsa_signal_value_t value) override;

  /// @brief Provide information about the queue.
  hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute,
                       void *value) override;

  // AIE-specific API

  /// @brief Returns the agent associated with this queue.
  AieAgent& GetAgent() { return agent_; }

  // GPU-specific queue functions are unsupported.

  hsa_status_t GetCUMasking(uint32_t num_cu_mask_count,
                            uint32_t *cu_mask) override;
  hsa_status_t SetCUMasking(uint32_t num_cu_mask_count,
                            const uint32_t *cu_mask) override;
  void ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b,
                  hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE,
                  hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
                  hsa_signal_t *signal = NULL) override;

 private:
  HSA_QUEUEID queue_id_ = INVALID_QUEUEID;
  /// @brief ID of AIE device on which this queue has been mapped.
  uint32_t node_id_ = std::numeric_limits<uint32_t>::max();
  /// @brief Queue size in bytes.
  uint32_t queue_size_bytes_ = std::numeric_limits<uint32_t>::max();

 protected:
  bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id(); }

 private:
  AieAgent &agent_;

  /// @brief Base of the queue's ring buffer storage.
  void *ring_buf_ = nullptr;

  /// @brief Called when the doorbell is rung to submit all queued packets.
  void SubmitPackets();

  /// @brief Indicates if queue is active.
  std::atomic<bool> active_;
  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
    return rtti_id_;
  }

};

} // namespace AMD
} // namespace rocr

#endif  // HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_


================================================
FILE: runtime/hsa-runtime/core/inc/amd_aql_queue.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_HW_AQL_COMMAND_PROCESSOR_H_
#define HSA_RUNTIME_CORE_INC_AMD_HW_AQL_COMMAND_PROCESSOR_H_

#include "core/inc/runtime.h"
#include "core/inc/signal.h"
#include "core/inc/queue.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/util/locks.h"

namespace rocr {
namespace AMD {
/// @brief Encapsulates HW Aql Command Processor functionality. It
/// provide the interface for things such as Doorbell register, read,
/// write pointers and a buffer.
class AqlQueue : public core::Queue, private core::LocalSignal, public core::DoorbellSignal {
 public:
  static __forceinline bool IsType(core::Signal* signal) {
    return signal->IsType(&rtti_id());
  }

  static __forceinline bool IsType(core::Queue* queue) { return queue->IsType(&rtti_id()); }

  // Acquires/releases queue resources and requests HW schedule/deschedule.
  AqlQueue(core::SharedQueue* shared_queue, GpuAgent* agent, size_t req_size_pkts,
           HSAuint32 node_id, ScratchInfo& scratch, core::HsaEventCallback callback, void* err_data,
           uint64_t flags);

  ~AqlQueue();

  /// @brief Queue interfaces
  hsa_status_t Inactivate() override;

  /// @brief Change the scheduling priority of the queue
  hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override;

  /// @brief Destroy ref counted queue
  void Destroy() override;

  /// @brief Atomically reads the Read index of with Acquire semantics
  ///
  /// @return uint64_t Value of read index
  uint64_t LoadReadIndexAcquire() override;

  /// @brief Atomically reads the Read index of with Relaxed semantics
  ///
  /// @return uint64_t Value of read index
  uint64_t LoadReadIndexRelaxed() override;

  /// @brief Atomically reads the Write index of with Acquire semantics
  ///
  /// @return uint64_t Value of write index
  uint64_t LoadWriteIndexAcquire() override;

  /// @brief Atomically reads the Write index of with Relaxed semantics
  ///
  /// @return uint64_t Value of write index
  uint64_t LoadWriteIndexRelaxed() override;

  /// @brief This operation is illegal
  void StoreReadIndexRelaxed(uint64_t value) override { assert(false); }

  /// @brief This operation is illegal
  void StoreReadIndexRelease(uint64_t value) override { assert(false); }

  /// @brief Atomically writes the Write index of with Relaxed semantics
  ///
  /// @param value New value of write index to update with
  void StoreWriteIndexRelaxed(uint64_t value) override;

  /// @brief Atomically writes the Write index of with Release semantics
  ///
  /// @param value New value of write index to update with
  void StoreWriteIndexRelease(uint64_t value) override;

  /// @brief Compares and swaps Write index using Acquire and Release semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override;

  /// @brief Compares and swaps Write index using Acquire semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override;

  /// @brief Compares and swaps Write index using Relaxed semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override;

  /// @brief Compares and swaps Write index using Release semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override;

  /// @brief Updates the Write index using Acquire and Release semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t AddWriteIndexAcqRel(uint64_t value) override;

  /// @brief Updates the Write index using Acquire semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t AddWriteIndexAcquire(uint64_t value) override;

  /// @brief Updates the Write index using Relaxed semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t AddWriteIndexRelaxed(uint64_t value) override;

  /// @brief Updates the Write index using Release semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  uint64_t AddWriteIndexRelease(uint64_t value) override;

  /// @brief Set CU Masking
  ///
  /// @param num_cu_mask_count size of mask bit array
  ///
  /// @param cu_mask pointer to cu mask
  ///
  /// @return hsa_status_t
  hsa_status_t SetCUMasking(uint32_t num_cu_mask_count, const uint32_t* cu_mask) override;

  /// @brief Get CU Masking
  ///
  /// @param num_cu_mask_count size of mask bit array
  ///
  /// @param cu_mask pointer to cu mask
  ///
  /// @return hsa_status_t
  hsa_status_t GetCUMasking(uint32_t num_cu_mask_count, uint32_t* cu_mask) override;

  // @brief Submits a block of PM4 and waits until it has been executed.
  void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b,
                  hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE,
                  hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
                  hsa_signal_t* signal = NULL) override;

  /// @brief Enables/Disables profiling overrides SetProfiling from core::Queue
  void SetProfiling(bool enabled) override;

  /// @brief Update signal value using Relaxed semantics
  void StoreRelaxed(hsa_signal_value_t value) override;

  /// @brief Update signal value using Release semantics
  void StoreRelease(hsa_signal_value_t value) override;

  /// @brief Provide information about the queue
  hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute, void* value) override;

  /// @brief Enable use of GWS from this queue.
  hsa_status_t EnableGWS(int gws_slot_count);

  /// @brief Update internal scratch limits based on agent limits. If current allocated scratch are
  /// larger than new limits, perform async-reclaim.
  void CheckScratchLimits();

  /// @brief Async reclaim main scratch memory
  void AsyncReclaimMainScratch();

  /// @brief Async reclaim alternate scratch memory
  void AsyncReclaimAltScratch();

 protected:
  bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id(); }

 private:
  uint32_t ComputeRingBufferMinPkts();
  uint32_t ComputeRingBufferMaxPkts();

  // (De)allocates and (de)registers ring_buf_.
  void AllocRegisteredRingBuffer(uint32_t queue_size_pkts);

  /// @brief Frees the queue's packet ring buffer and its queue struct.
  void FreeQueueMemory();

  /// @brief Abstracts the file handle use for double mapping queues.
  void CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const;
  int CreateRingBufferFD(const char* ring_buf_shm_path, uint32_t ring_buf_phys_size_bytes) const;

  /// @brief Define the Scratch Buffer Descriptor and related parameters
  /// that enable kernel access scratch memory
  void InitScratchSRD();
  void FillBufRsrcWord0();
  void FillBufRsrcWord1();
  void FillBufRsrcWord1_Gfx11();
  void FillBufRsrcWord2();
  void FillBufRsrcWord3();
  void FillBufRsrcWord3_Gfx10();
  void FillBufRsrcWord3_Gfx11();
  void FillBufRsrcWord3_Gfx12();
  void FillComputeTmpRingSize();
  void FillAltComputeTmpRingSize();
  void FillComputeTmpRingSize_Gfx11();
  void FillComputeTmpRingSize_Gfx12();

  void FreeMainScratchSpace();
  void FreeAltScratchSpace();

  /// @brief Halt the queue without destroying it or fencing memory.
  void Suspend();

  /// @brief Resume the queue.
  void Resume();

  /// @brief Handle insufficient scratch
  void HandleInsufficientScratch(hsa_signal_value_t& error_code, hsa_signal_value_t& waitVal,
                                 bool& changeWait);

  /// @brief Handler for hardware queue events.
  template <bool HandleExceptions>
  static bool DynamicQueueEventsHandler(hsa_signal_value_t error_code, void* arg);

  /// @brief Handler for KFD exceptions.
  static bool ExceptionHandler(hsa_signal_value_t error_code, void* arg);

  // AQL packet ring buffer
  void* ring_buf_;

  // Size of ring_buf_ allocation.
  // This may be larger than (amd_queue_.hsa_queue.size * sizeof(AqlPacket)).
  uint32_t ring_buf_alloc_bytes_;

  // Id of the Queue used in communication with thunk
  HSA_QUEUEID queue_id_;

  // Indicates if queue is active
  std::atomic<bool> active_;

  // Handle of agent, which queue is attached to
  GpuAgent* agent_;

  // Handle of scratch memory descriptor
  ScratchInfo queue_scratch_;

  AMD::callback_t<core::HsaEventCallback> errors_callback_;

  void* errors_data_;

  // GPU-visible indirect buffer holding PM4 commands.
  void* pm4_ib_buf_;
  uint32_t pm4_ib_size_b_;
  KernelMutex pm4_ib_mutex_;

  // Error handler control variable.
  std::atomic<uint32_t> dynamicScratchState, exceptionState;
  enum { ERROR_HANDLER_DONE = 1, ERROR_HANDLER_TERMINATE = 2, ERROR_HANDLER_SCRATCH_RETRY = 4 };

  // Queue currently suspended or scheduled
  bool suspended_;

  // Thunk dispatch and wavefront scheduling priority
  HSA_QUEUE_PRIORITY priority_;

  // Exception notification signal
  Signal* exception_signal_;

  // CU mask lock
  KernelMutex mask_lock_;

  // Mutex to prevent AsyncReclaimScratch and HandleInsufficientScratch from
  // happening at the same time.
  KernelMutex scratch_lock_;

  // Current CU mask
  std::vector<uint32_t> cu_mask_;

  // Shared event used for queue errors
  static __forceinline HsaEvent*& queue_event() {
    static HsaEvent* queue_event_ = nullptr;
    return queue_event_;
  }
  // Queue count - used to ref count queue_event_
  static __forceinline std::atomic<uint32_t>& queue_count() {
    // This allocation is meant to last until the last thread has exited.
    // It is intentionally not freed.
    static std::atomic<uint32_t>* queue_count_ = new std::atomic<uint32_t>(0);
    return *queue_count_;
  }

  // Mutex for queue_event_ manipulation
KernelMutex& queue_lock() {
  // This allocation is meant to last until the last thread has exited.
  // It is intentionally not freed.
  static KernelMutex* queue_lock_ = new KernelMutex();
  return *queue_lock_;
}

  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
    return rtti_id_;
  }

  // Forbid copying and moving of this object
  DISALLOW_COPY_AND_ASSIGN(AqlQueue);
};

}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_available_drivers.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_INC_AMD_AVAILABLE_DRIVERS_H_
#define HSA_RUNTME_CORE_INC_AMD_AVAILABLE_DRIVERS_H_

#ifdef __linux__

#include "core/inc/amd_kfd_driver.h"
#include "core/inc/amd_xdna_driver.h"

#endif

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_blit_kernel.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_
#define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_

#include <map>
#include <mutex>
#include <vector>
#include <atomic>
#include <stdint.h>

#include "core/inc/blit.h"

namespace rocr {
namespace AMD {
class BlitKernel : public core::Blit {
 public:
  explicit BlitKernel(core::Queue* queue);
  virtual ~BlitKernel() override;

  /// @brief Initialize a blit kernel object.
  ///
  /// @param agent Pointer to the agent that will execute the AQL packets.
  ///
  /// @return hsa_status_t
  hsa_status_t Initialize(const core::Agent& agent);

  /// @brief Marks the blit kernel object as invalid and uncouples its link with
  /// the underlying AQL kernel queue. Use of the blit object
  /// once it has been release is illegal and any behavior is indeterminate
  ///
  /// @note: The call will block until all AQL packets have been executed.
  ///
  /// @param agent Agent passed to Initialize.
  ///
  /// @return hsa_status_t
  virtual hsa_status_t Destroy(const core::Agent& agent) override;

  /// @brief Submit an AQL packet to perform vector copy. The call is blocking
  /// until the command execution is finished.
  ///
  /// @param dst Memory address of the copy destination.
  /// @param src Memory address of the copy source.
  /// @param size Size of the data to be copied.
  virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
                                               size_t size) override;

  /// @brief Submit a linear copy command to the the underlying compute device's
  /// control block. The call is non blocking. The memory transfer will start
  /// after all dependent signals are satisfied. After the transfer is
  /// completed, the out signal will be decremented.
  ///
  /// @param dst Memory address of the copy destination.
  /// @param src Memory address of the copy source.
  /// @param size Size of the data to be copied.
  /// @param dep_signals Arrays of dependent signal.
  /// @param out_signal Output signal.
  /// @param gang_signals Array of gang signals.
  virtual hsa_status_t SubmitLinearCopyCommand(
      void* dst, const void* src, size_t size,
      std::vector<core::Signal*>& dep_signals,
      core::Signal& out_signal, std::vector<core::Signal*>& gang_signals) override;

  /// @brief Submit an AQL packet to perform memory fill. The call is blocking
  /// until the command execution is finished.
  ///
  /// @param ptr Memory address of the fill destination.
  /// @param value Value to be set.
  /// @param count Number of uint32_t element to be set to the value.
  virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value,
                                               size_t count) override;

  virtual hsa_status_t EnableProfiling(bool enable) override;

  virtual uint64_t PendingBytes() override;

  virtual void GangLeader(bool gang_leader) override {}
  virtual bool GangLeader() const override { return false; }

  const uint16_t kInvalidPacketHeader = HSA_PACKET_TYPE_INVALID;
 private:
  union KernelArgs {
    struct __ALIGNED__(16) {
      uint64_t phase1_src_start;
      uint64_t phase1_dst_start;
      uint64_t phase2_src_start;
      uint64_t phase2_dst_start;
      uint64_t phase3_src_start;
      uint64_t phase3_dst_start;
      uint64_t phase4_src_start;
      uint64_t phase4_dst_start;
      uint64_t phase4_src_end;
      uint64_t phase4_dst_end;
      uint32_t num_workitems;
    } copy_aligned;

    struct __ALIGNED__(16) {
      uint64_t phase1_src_start;
      uint64_t phase1_dst_start;
      uint64_t phase2_src_start;
      uint64_t phase2_dst_start;
      uint64_t phase2_src_end;
      uint64_t phase2_dst_end;
      uint32_t num_workitems;
    } copy_misaligned;

    struct __ALIGNED__(16) {
      uint64_t phase1_dst_start;
      uint64_t phase2_dst_start;
      uint64_t phase2_dst_end;
      uint32_t fill_value;
      uint32_t num_workitems;
    } fill;
  };

  // Index after which bytes will have been written.
  struct BytesWritten {
    uint64_t index;
    uint64_t bytes;
  };

  /// Reserve a slot in the queue buffer. The call will wait until the queue
  /// buffer has a room.
  uint64_t AcquireWriteIndex(uint32_t num_packet);

  /// Update the queue doorbell register with ::write_index. This
  /// function also serializes concurrent doorbell update to ensure that the
  /// packet processor doesn't get invalid packet.
  void ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet);

  void PopulateQueue(uint64_t index, uint64_t code_handle, void* args,
                     uint32_t grid_size_x, hsa_signal_t completion_signal);

  KernelArgs* ObtainAsyncKernelCopyArg();

  void RecordBlitHistory(uint64_t size, uint64_t index);

  /// AQL code object and size for each kernel.
  enum class KernelType {
    CopyAligned,
    CopyMisaligned,
    Fill,
  };

  struct KernelCode {
    void* code_buf_;
    size_t code_buf_size_;
  };

  std::map<KernelType, KernelCode> kernels_;

  /// AQL queue for submitting the vector copy kernel.
  core::Queue* queue_;
  uint32_t queue_bitmask_;

  /// Pointer to the kernel argument buffer.
  KernelArgs* kernarg_async_;
  uint32_t kernarg_async_mask_;
  volatile uint32_t kernarg_async_counter_;

  /// Completion signal for every kernel dispatched.
  hsa_signal_t completion_signal_;

  /// Bytes moved by commands < index.
  /// Any record's byte value may be inexact by the size of concurrently issued operations.
  std::vector<BytesWritten> bytes_written_;

  /// Total bytes written by all commands issued.
  uint64_t bytes_queued_;

  /// Index where most recent blit operation queued.
  uint64_t last_queued_;

  /// Orders command indices and bytes_queued_ updates
  std::mutex reservation_lock_;

  /// Search resume index
  std::atomic<uint64_t> pending_search_index_;

  /// Lock to synchronize access to kernarg_ and completion_signal_
  std::mutex lock_;

  /// Number of CUs on the underlying agent.
  int num_cus_;
};
}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_blit_sdma.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_SDMA_H_
#define HSA_RUNTIME_CORE_INC_AMD_BLIT_SDMA_H_

#include <mutex>
#include <stdint.h>
#include <vector>

#include "core/inc/amd_gpu_agent.h"
#include "core/inc/blit.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"
#include "core/util/utils.h"

namespace rocr {
namespace AMD {

class BlitSdmaBase : public core::Blit {
 public:
  static const size_t kQueueSize;
  static const size_t kCopyPacketSize;
  static const size_t kMaxSingleCopySize;
  static const size_t kMaxSingleFillSize;
  virtual bool isSDMA() const override { return true; }
  virtual hsa_status_t Initialize(const core::Agent& agent, bool use_xgmi,
                                  size_t linear_copy_size_override, int rec_engine) = 0;
  virtual hsa_status_t SubmitCopyRectCommand(const hsa_pitched_ptr_t* dst,
                                             const hsa_dim3_t* dst_offset,
                                             const hsa_pitched_ptr_t* src,
                                             const hsa_dim3_t* src_offset, const hsa_dim3_t* range,
                                             std::vector<core::Signal*>& dep_signals,
                                             core::Signal& out_signal) = 0;
};

template <bool useGCR> class BlitSdma : public BlitSdmaBase {
 public:
  BlitSdma();

  virtual ~BlitSdma() override;

  /// @brief Initialize a User Mode SDMA Queue object. Input parameters specify
  /// properties of queue being created.
  ///
  /// @param agent Pointer to the agent that will execute the PM4 commands.
  ///
  /// @return hsa_status_t
  virtual hsa_status_t Initialize(const core::Agent& agent, bool use_xgmi,
                                  size_t linear_copy_size_override, int rec_eng) override;

  /// @brief Marks the queue object as invalid and uncouples its link with
  /// the underlying compute device's control block. Use of queue object
  /// once it has been release is illegal and any behavior is indeterminate
  ///
  /// @note: The call will block until all packets have executed.
  ///
  /// @param agent Agent passed to Initialize.
  ///
  /// @return hsa_status_t
  virtual hsa_status_t Destroy(const core::Agent& agent) override;

  /// @brief Submit a linear copy command to the queue buffer.
  ///
  /// @param dst Memory address of the copy destination.
  /// @param src Memory address of the copy source.
  /// @param size Size of the data to be copied.
  virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
                                               size_t size) override;

  /// @brief Submit a linear copy command to the the underlying compute device's
  /// control block. The call is non blocking. The memory transfer will start
  /// after all dependent signals are satisfied. After the transfer is
  /// completed, the out signal will be decremented.
  ///
  /// @param dst Memory address of the copy destination.
  /// @param src Memory address of the copy source.
  /// @param size Size of the data to be copied.
  /// @param dep_signals Arrays of dependent signal.
  /// @param out_signal Output signal.
  /// @param gang_signals Array of gang signals.
  virtual hsa_status_t SubmitLinearCopyCommand(
      void* dst, const void* src, size_t size,
      std::vector<core::Signal*>& dep_signals,
      core::Signal& out_signal, std::vector<core::Signal*>& gang_signals) override;

  virtual hsa_status_t SubmitCopyRectCommand(const hsa_pitched_ptr_t* dst,
                                             const hsa_dim3_t* dst_offset,
                                             const hsa_pitched_ptr_t* src,
                                             const hsa_dim3_t* src_offset, const hsa_dim3_t* range,
                                             std::vector<core::Signal*>& dep_signals,
                                             core::Signal& out_signal) override;

  /// @brief Submit a linear fill command to the queue buffer
  ///
  /// @param ptr Memory address of the fill destination.
  /// @param value Value to be set.
  /// @param count Number of uint32_t element to be set to the value.
  virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value,
                                               size_t count) override;

  virtual hsa_status_t EnableProfiling(bool enable) override;

  virtual uint64_t PendingBytes() override;
  virtual void GangLeader(bool gang_leader) override { gang_leader_ = gang_leader; }
  virtual bool GangLeader() const override { return gang_leader_; }

 private:
  /// @brief Acquires the address into queue buffer where a new command
  /// packet of specified size could be written. The address that is
  /// returned is guaranteed to be unique even in a multi-threaded access
  /// scenario. This function is guaranteed to return a pointer for writing
  /// data into the queue buffer.
  ///
  /// @param cmd_size Command packet size in bytes.
  ///
  /// @param curr_index (output) Index to pass to ReleaseWriteAddress.
  ///
  /// @return pointer into the queue buffer where a PM4 packet of specified size
  /// could be written. NULL if input size is greater than the size of queue
  /// buffer.

  char* AcquireWriteAddress(uint32_t cmd_size, uint64_t& curr_index);

  void UpdateWriteAndDoorbellRegister(uint64_t curr_index, uint64_t new_index);

  /// @brief Updates the Write Register of compute device to the end of
  /// SDMA packet written into queue buffer. The update to Write Register
  /// will be safe under multi-threaded usage scenario. Furthermore, updates
  /// to Write Register are blocking until all prior updates are completed
  /// i.e. if two threads T1 & T2 were to call release, then updates by T2
  /// will block until T1 has completed its update (assumes T1 acquired the
  /// write address first).
  ///
  /// @param curr_index Index passed back from AcquireWriteAddress.
  ///
  /// @param cmd_size Command packet size in bytes.
  void ReleaseWriteAddress(uint64_t curr_index, uint32_t cmd_size);

  /// @brief Writes NO-OP words into queue buffer in case writing a command
  /// causes the queue buffer to wrap.
  ///
  /// @param curr_index Index to begin padding from.
  void PadRingToEnd(uint64_t curr_index);

  uint32_t WrapIntoRing(uint64_t index);
  bool CanWriteUpto(uint64_t upto_index);

  /// @brief Build fence command
  void BuildFenceCommand(char* fence_command_addr, uint32_t* fence,
                         uint32_t fence_value);

  /// @brief Build Hdp Flush command
  void BuildHdpFlushCommand(char* cmd_addr);

  void BuildCopyCommand(char* cmd_addr, uint32_t num_copy_command, void* dst,
                        const void* src, size_t size);

  void BuildCopyRectCommand(const std::function<void*(size_t)>& append,
                            const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset,
                            const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
                            const hsa_dim3_t* range);

  void BuildFillCommand(char* cmd_addr, uint32_t num_fill_command, void* ptr, uint32_t value,
                        size_t count);

  void BuildPollCommand(char* cmd_addr, void* addr, uint32_t reference);

  void BuildAtomicDecrementCommand(char* cmd_addr, void* addr);

  void BuildGetGlobalTimestampCommand(char* cmd_addr, void* write_address);

  void BuildTrapCommand(char* cmd_addr, uint32_t event_id);

  void BuildGCRCommand(char* cmd_addr, bool invalidate);

  hsa_status_t SubmitCommand(const void* cmds, size_t cmd_size, uint64_t size,
                             const std::vector<core::Signal*>& dep_signals,
                             core::Signal& out_signal, std::vector<core::Signal*>& gang_signals);

  hsa_status_t SubmitBlockingCommand(const void* cmds, size_t cmd_size, uint64_t size);

  // Agent object owning the SDMA engine.
  GpuAgent* agent_;

  /// Base address of the Queue buffer at construction time.
  char* queue_start_addr_;

  // Pending bytes tracking
  // bytes_written_ is indexed with wrapped command queue indices (which are in bytes).
  // The data_ index corresponding to a command queue index is the first uint64_t index which begins
  // in the packet area.  All packets have a header & at least one address so must be larger than 12
  // bytes, thus this index always exists.
  std::mutex reservation_lock_;
  uint64_t bytes_queued_;
  class {
   public:
    // Indexed by wrapped command queue indices (offsets).
    uint64_t& operator[](uint32_t index) { return data_[convert(index)]; }

    void resize(size_t size) { data_.resize(convert(size)); }

    void fill(uint32_t start, uint32_t stop, uint64_t value) {
      for (uint32_t i = convert(start); i < convert(stop); i++) {
        data_[i] = value;
      }
    }

   private:
    uint32_t convert(uint32_t index) { return (index + sizeof(uint64_t) - 1) / sizeof(uint64_t); }

    std::vector<uint64_t> data_;
  } bytes_written_;

  // Internal signals for blocking APIs
  core::unique_signal_ptr signals_[2];
  KernelMutex lock_;
  bool parity_;

  /// Queue resource descriptor for doorbell, read
  /// and write indices
  HsaQueueResource queue_resource_;

  // Monotonic ring indices, in bytes, tracking written and submitted commands.
  uint64_t cached_reserve_index_;
  uint64_t cached_commit_index_;

  static const uint32_t linear_copy_command_size_;

  static const uint32_t fill_command_size_;

  static const uint32_t fence_command_size_;

  static const uint32_t poll_command_size_;

  static const uint32_t flush_command_size_;

  static const uint32_t atomic_command_size_;

  static const uint32_t timestamp_command_size_;

  static const uint32_t trap_command_size_;

  static const uint32_t gcr_command_size_;

  // Max copy size of a single linear copy command packet.
  size_t max_single_linear_copy_size_;

  /// Max total copy size supported by the queue.
  size_t max_total_linear_copy_size_;

  /// Max count of uint32_t of a single fill command packet.
  size_t max_single_fill_size_;

  /// Max total fill count supported by the queue.
  size_t max_total_fill_size_;

  /// True if platform atomic is supported.
  bool platform_atomic_support_;

  /// True if sDMA supports HDP flush
  bool hdp_flush_support_;

  /// True if SDMA blit is gang leader
  bool gang_leader_;

  /// True if SDMA blit is ganged
  bool is_ganged_;

  /// Minimum submission size in bytes.
  size_t min_submission_size_;
};


typedef BlitSdma<false> BlitSdmaV4;

// SDMA is connected to gL2.
typedef BlitSdma<true> BlitSdmaV5;

}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_blit_shaders.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef OPENSRC_HSA_RUNTIME_CORE_INC_AMD_BLIT_SHADERS_H_ 
#define OPENSRC_HSA_RUNTIME_CORE_INC_AMD_BLIT_SHADERS_H_

namespace rocr {
namespace AMD {

static const unsigned int kCodeCopyAligned7[] = {
    0xC0820100, 0xC0840104, 0xC0860108, 0xC088010C, 0xC08A0110, 0xC00C0114,
    0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900,
    0xD2506A03, 0x01A90103, 0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05,
    0x01A90105, 0xD1C2006A, 0x00001102, 0xBF86000F, 0x87FE6A7E, 0xDC200000,
    0x01000002, 0xBF8C0F70, 0xD24A6A02, 0x00003102, 0xD2506A03, 0x01A90103,
    0xDC600000, 0x00000104, 0xD24A6A04, 0x00003104, 0xD2506A05, 0x01A90105,
    0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209, 0xD24A6A02,
    0x00001101, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001501,
    0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000E, 0xDC380000,
    0x08000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
    0xDC780000, 0x00000804, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
    0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD24A6A02, 0x00001901,
    0xD2506A03, 0x01A90103, 0x7E0A020F, 0xD24A6A04, 0x00001D01, 0xD2506A05,
    0x01A90105, 0xD1C2006A, 0x00002102, 0xBF86000F, 0x87FE6A7E, 0xDC300000,
    0x01000002, 0xD24A6A02, 0x00003302, 0xD2506A03, 0x01A90103, 0xBF8C0F70,
    0xDC700000, 0x00000104, 0xD24A6A04, 0x00003304, 0xD2506A05, 0x01A90105,
    0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD24A6A02, 0x00002100, 0xD2506A03,
    0x01A90103, 0x7E0A0213, 0xD24A6A04, 0x00002500, 0xD2506A05, 0x01A90105,
    0xD1C2006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000, 0x01000002,
    0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
};

static const unsigned int kCodeCopyMisaligned7[] = {
    0xC0820100, 0xC0840104, 0xC0860108, 0xC008010C, 0xBF8C007F, 0x8F028602,
    0x4A000002, 0x7E060205, 0xD24A6A02, 0x00000900, 0xD2506A03, 0x01A90103,
    0x7E0A0207, 0xD24A6A04, 0x00000D00, 0xD2506A05, 0x01A90105, 0xD1C2006A,
    0x00001102, 0xBF860032, 0xDC200000, 0x06000002, 0xD24A6A02, 0x00002102,
    0xD2506A03, 0x01A90103, 0xDC200000, 0x07000002, 0xD24A6A02, 0x00002102,
    0xD2506A03, 0x01A90103, 0xDC200000, 0x08000002, 0xD24A6A02, 0x00002102,
    0xD2506A03, 0x01A90103, 0xDC200000, 0x09000002, 0xD24A6A02, 0x00002102,
    0xD2506A03, 0x01A90103, 0xBF8C0F70, 0xDC600000, 0x00000604, 0xD24A6A04,
    0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000704, 0xD24A6A04,
    0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000804, 0xD24A6A04,
    0x00002104, 0xD2506A05, 0x01A90105, 0xDC600000, 0x00000904, 0xD24A6A04,
    0x00002104, 0xD2506A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD24A6A02,
    0x00001100, 0xD2506A03, 0x01A90103, 0x7E0A020B, 0xD24A6A04, 0x00001500,
    0xD2506A05, 0x01A90105, 0xD1C2006A, 0x00001902, 0xBF86000F, 0x87FE6A7E,
    0xDC200000, 0x01000002, 0xD24A6A02, 0x00002102, 0xD2506A03, 0x01A90103,
    0xBF8C0F70, 0xDC600000, 0x00000104, 0xD24A6A04, 0x00002104, 0xD2506A05,
    0x01A90105, 0xBF82FFEE, 0xBF810000,
};

static const unsigned int kCodeFill7[] = {
    0xC0820100, 0xC0840104, 0xBF8C007F, 0x8F028602, 0x4A000002, 0x7E08020A,
    0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8F0C840B, 0x34020084, 0x7E060205,
    0xD24A6A02, 0x00000901, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00000D02,
    0xBF860007, 0xDC780000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
    0x01A90103, 0xBF82FFF6, 0x8F0C820B, 0x34020082, 0x7E060207, 0xD24A6A02,
    0x00000D01, 0xD2506A03, 0x01A90103, 0xD1C2006A, 0x00001102, 0xBF860008,
    0x87FE6A7E, 0xDC700000, 0x00000402, 0xD24A6A02, 0x00001902, 0xD2506A03,
    0x01A90103, 0xBF82FFF5, 0xBF810000,
};

static const unsigned int kCodeCopyAligned8[] = {
    0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
    0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050,
    0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205, 0xD1196A02, 0x00000900,
    0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04, 0x00000D00, 0xD11C6A05,
    0x01A90105, 0xD0E9006A, 0x00001102, 0xBF86000F, 0x86FE6A7E, 0xDC400000,
    0x01000002, 0xBF8C0F70, 0xD1196A02, 0x00003102, 0xD11C6A03, 0x01A90103,
    0xDC600000, 0x00000104, 0xD1196A04, 0x00003104, 0xD11C6A05, 0x01A90105,
    0xBF82FFEE, 0xBEFE01C1, 0x8E198418, 0x24020084, 0x7E060209, 0xD1196A02,
    0x00001101, 0xD11C6A03, 0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001501,
    0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001902, 0xBF86000E, 0xDC5C0000,
    0x08000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
    0xDC7C0000, 0x00000804, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
    0xBF82FFEF, 0x8E198218, 0x24020082, 0x7E06020D, 0xD1196A02, 0x00001901,
    0xD11C6A03, 0x01A90103, 0x7E0A020F, 0xD1196A04, 0x00001D01, 0xD11C6A05,
    0x01A90105, 0xD0E9006A, 0x00002102, 0xBF86000F, 0x86FE6A7E, 0xDC500000,
    0x01000002, 0xD1196A02, 0x00003302, 0xD11C6A03, 0x01A90103, 0xBF8C0F70,
    0xDC700000, 0x00000104, 0xD1196A04, 0x00003304, 0xD11C6A05, 0x01A90105,
    0xBF82FFEE, 0xBEFE01C1, 0x7E060211, 0xD1196A02, 0x00002100, 0xD11C6A03,
    0x01A90103, 0x7E0A0213, 0xD1196A04, 0x00002500, 0xD11C6A05, 0x01A90105,
    0xD0E9006A, 0x00002902, 0xBF860006, 0x86FE6A7E, 0xDC400000, 0x01000002,
    0xBF8C0F70, 0xDC600000, 0x00000104, 0xBF810000,
};

static const unsigned int kCodeCopyMisaligned8[] = {
    0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
    0xC0020400, 0x00000030, 0xBF8C007F, 0x8E028602, 0x32000002, 0x7E060205,
    0xD1196A02, 0x00000900, 0xD11C6A03, 0x01A90103, 0x7E0A0207, 0xD1196A04,
    0x00000D00, 0xD11C6A05, 0x01A90105, 0xD0E9006A, 0x00001102, 0xBF860032,
    0xDC400000, 0x06000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
    0xDC400000, 0x07000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
    0xDC400000, 0x08000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
    0xDC400000, 0x09000002, 0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103,
    0xBF8C0F70, 0xDC600000, 0x00000604, 0xD1196A04, 0x00002104, 0xD11C6A05,
    0x01A90105, 0xDC600000, 0x00000704, 0xD1196A04, 0x00002104, 0xD11C6A05,
    0x01A90105, 0xDC600000, 0x00000804, 0xD1196A04, 0x00002104, 0xD11C6A05,
    0x01A90105, 0xDC600000, 0x00000904, 0xD1196A04, 0x00002104, 0xD11C6A05,
    0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD1196A02, 0x00001100, 0xD11C6A03,
    0x01A90103, 0x7E0A020B, 0xD1196A04, 0x00001500, 0xD11C6A05, 0x01A90105,
    0xD0E9006A, 0x00001902, 0xBF86000F, 0x86FE6A7E, 0xDC400000, 0x01000002,
    0xD1196A02, 0x00002102, 0xD11C6A03, 0x01A90103, 0xBF8C0F70, 0xDC600000,
    0x00000104, 0xD1196A04, 0x00002104, 0xD11C6A05, 0x01A90105, 0xBF82FFEE,
    0xBF810000,
};

static const unsigned int kCodeFill8[] = {
    0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xBF8C007F, 0x8E028602,
    0x32000002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, 0x8E0C840B,
    0x24020084, 0x7E060205, 0xD1196A02, 0x00000901, 0xD11C6A03, 0x01A90103,
    0xD0E9006A, 0x00000D02, 0xBF860007, 0xDC7C0000, 0x00000402, 0xD1196A02,
    0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF6, 0x8E0C820B, 0x24020082,
    0x7E060207, 0xD1196A02, 0x00000D01, 0xD11C6A03, 0x01A90103, 0xD0E9006A,
    0x00001102, 0xBF860008, 0x86FE6A7E, 0xDC700000, 0x00000402, 0xD1196A02,
    0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
};

}  // namespace AMD
}  // namespace rocr

#endif // OPENSRC_HSA_RUNTIME_CORE_INC_AMD_BLIT_SHADERS_H_


================================================
FILE: runtime/hsa-runtime/core/inc/amd_core_dump.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef OPENSRC_HSA_RUNTIME_CORE_INC_AMD_CORE_DUMP_HPP_
#define OPENSRC_HSA_RUNTIME_CORE_INC_AMD_CORE_DUMP_HPP_

namespace rocr {
namespace amd {
namespace coredump {
hsa_status_t dump_gpu_core();
}   //  namespace coredump
}   //  namespace amd
}   //  namespace rocr

#endif // OPENSRC_HSA_RUNTIME_CORE_INC_AMD_CORE_DUMP_HPP_


================================================
FILE: runtime/hsa-runtime/core/inc/amd_cpu_agent.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// AMD specific HSA backend.

#ifndef HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_
#define HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_

#include <vector>

#include "core/inc/runtime.h"
#include "core/inc/agent.h"
#include "core/inc/queue.h"
#include "core/inc/cache.h"
#include "core/inc/driver.h"

namespace rocr {
namespace AMD {
// @brief Class to represent a CPU device.
class CpuAgent : public core::Agent {
 public:
  // @brief CpuAgent constructor.
  //
  // @param [in] node Node id. Each CPU in different socket will get distinct
  // id.
  // @param [in] node_props Node property.
  // @param [in] driver_type Driver type. Default is KFD.
  CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props,
           core::DriverType driver_type = core::DriverType::KFD);

  // @brief CpuAgent destructor.
  ~CpuAgent();

  // @brief Invoke the user provided callback for each region accessible by
  // this agent.
  //
  // @param [in] include_peer If true, the callback will be also invoked on each
  // peer memory region accessible by this agent. If false, only invoke the
  // callback on memory region owned by this agent.
  // @param [in] callback User provided callback function.
  // @param [in] data User provided pointer as input for @p callback.
  //
  // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
  // region returns ::HSA_STATUS_SUCCESS.
  hsa_status_t VisitRegion(bool include_peer,
                           hsa_status_t (*callback)(hsa_region_t region,
                                                    void* data),
                           void* data) const;

  // @brief Override from core::Agent.
  hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region,
                                                      void* data),
                             void* data) const override;

  hsa_status_t IterateSupportedIsas(
                    hsa_status_t (*callback)(hsa_isa_t isa, void* data),
                                                  void* data) const override;

  // @brief Override from core::Agent.
  hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
                            void* value) const override;

  // @brief Override from core::Agent.
  hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override;

  // @brief Override from core::Agent.
  hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type, uint64_t flags,
                           core::HsaEventCallback event_callback, void* data,
                           uint32_t private_segment_size, uint32_t group_segment_size,
                           core::Queue** queue) override;

  // @brief Override from core::Agent.
  hsa_status_t DmaCopy(void* dst, core::Agent& dst_agent, const void* src, core::Agent& src_agent,
                       size_t size, std::vector<core::Signal*>& dep_signals,
                       core::Signal& out_signal) override;

  // @brief Returns number of data caches.
  __forceinline size_t num_cache() const { return cache_props_.size(); }

  // @brief Returns Hive ID
  __forceinline uint64_t HiveId() const override { return  properties_.HiveID; }

  // @brief Returns data cache property.
  //
  // @param [in] idx Cache level.
  __forceinline const HsaCacheProperties& cache_prop(int idx) const {
    return cache_props_[idx];
  }

  // @brief Override from core::Agent.
  const std::vector<const core::MemoryRegion*>& regions() const override {
    return regions_;
  }

  // @brief Override from core::Agent.
  const std::vector<const core::Isa*>& supported_isas() const override {
    return supported_isas_;
  }
 private:
  // @brief Query the driver to get the region list owned by this agent.
  void InitRegionList();

  // @brief Query the driver to get the cache properties.
  void InitCacheList();

  // @brief Invoke the user provided callback for every region in @p regions.
  //
  // @param [in] regions Array of region object.
  // @param [in] callback User provided callback function.
  // @param [in] data User provided pointer as input for @p callback.
  //
  // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
  // region returns ::HSA_STATUS_SUCCESS.
  hsa_status_t VisitRegion(
      const std::vector<const core::MemoryRegion*>& regions,
      hsa_status_t (*callback)(hsa_region_t region, void* data),
      void* data) const;

  // @brief Node property.
  const HsaNodeProperties properties_;

  // @brief Array of data cache property. The array index represents the cache
  // level.
  std::vector<HsaCacheProperties> cache_props_;

  // @brief Array of HSA cache objects.
  std::vector<std::unique_ptr<core::Cache>> caches_;

  // @brief Array of regions owned by this agent.
  std::vector<const core::MemoryRegion*> regions_;

  DISALLOW_COPY_AND_ASSIGN(CpuAgent);
};

}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_elf_image.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_ELF_IMAGE_HPP_
#define AMD_ELF_IMAGE_HPP_

#include <string>
#include <sstream>
#include <vector>
#include <memory>
#include <cstdint>

namespace rocr {
namespace amd {
namespace elf {
    class Symbol;
    class SymbolTable;
    class Section;
    class RelocationSection;

    class Segment {
    public:
      virtual ~Segment() { }
      virtual uint64_t type() const = 0;
      virtual uint64_t memSize() const = 0;
      virtual uint64_t align() const = 0;
      virtual uint64_t imageSize() const = 0;
      virtual uint64_t vaddr() const = 0;
      virtual uint64_t flags() const = 0;
      virtual uint64_t offset() const = 0;
      virtual const char* data() const = 0;
      virtual uint16_t getSegmentIndex() = 0;
      virtual bool updateAddSection(Section *section) = 0;
    };

    class Section {
    public:
      virtual ~Section() { }
      virtual uint16_t getSectionIndex() const = 0;
      virtual uint32_t type() const = 0;
      virtual std::string Name() const = 0;
      virtual uint64_t offset() const = 0;
      virtual uint64_t addr() const = 0;
      virtual bool updateAddr(uint64_t addr) = 0;
      virtual uint64_t addralign() const = 0;
      virtual uint64_t flags() const = 0;
      virtual uint64_t size() const = 0;
      virtual uint64_t nextDataOffset(uint64_t align) const = 0;
      virtual uint64_t addData(const void *src, uint64_t size, uint64_t align) = 0;
      virtual bool getData(uint64_t offset, void* dest, uint64_t size) = 0;
      virtual Segment* segment() = 0;
      virtual RelocationSection* asRelocationSection() = 0;
      virtual bool hasRelocationSection() const = 0;
      virtual RelocationSection* relocationSection(SymbolTable* symtab = 0) = 0;
      virtual bool setMemSize(uint64_t s) = 0;
      virtual uint64_t memSize() const = 0;
      virtual bool setAlign(uint64_t a) = 0;
      virtual uint64_t memAlign() const = 0;
    };

    class Relocation {
    public:
      virtual ~Relocation() { }
      virtual RelocationSection* section() = 0;
      virtual uint32_t type() = 0;
      virtual uint32_t symbolIndex() = 0;
      virtual Symbol* symbol() = 0;
      virtual uint64_t offset() = 0;
      virtual int64_t addend() = 0;
    };

    class RelocationSection : public virtual Section {
    public:
      virtual Relocation* addRelocation(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend) = 0;
      virtual size_t relocationCount() const = 0;
      virtual Relocation* relocation(size_t i) = 0;
      virtual Section* targetSection() = 0;
    };

    class StringTable : public virtual Section {
    public:
      virtual const char* addString(const std::string& s) = 0;
      virtual size_t addString1(const std::string& s) = 0;
      virtual const char* getString(size_t ndx) = 0;
      virtual size_t getStringIndex(const char* name) = 0;
    };

    class Symbol {
    public:
      virtual ~Symbol() { }
      virtual uint32_t index() = 0;
      virtual uint32_t type() = 0;
      virtual uint32_t binding() = 0;
      virtual uint64_t size() = 0;
      virtual uint64_t value() = 0;
      virtual unsigned char other() = 0;
      virtual std::string name() = 0;
      virtual Section* section() = 0;
      virtual void setValue(uint64_t value) = 0;
      virtual void setSize(uint64_t size) = 0;
    };

    class SymbolTable : public virtual Section {
    public:
      virtual Symbol* addSymbol(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other = 0) = 0;
      virtual size_t symbolCount() = 0;
      virtual Symbol* symbol(size_t i) = 0;
    };

    class NoteSection : public virtual Section {
    public:
      virtual bool addNote(const std::string& name, uint32_t type, const void* desc = 0, uint32_t desc_size = 0) = 0;
      virtual bool getNote(const std::string& name, uint32_t type, void** desc, uint32_t* desc_size) = 0;
    };

    class Image {
    public:
      virtual ~Image() { }

      virtual bool initNew(uint16_t machine, uint16_t type, uint8_t os_abi = 0, uint8_t abi_version = 0, uint32_t e_flags = 0) = 0;
      virtual bool loadFromFile(const std::string& filename) = 0;
      virtual bool saveToFile(const std::string& filename) = 0;
      virtual bool initFromBuffer(const void* buffer, size_t size) = 0;
      virtual bool initAsBuffer(const void* buffer, size_t size) = 0;
      virtual bool writeTo(const std::string& filename) = 0;
      virtual bool copyToBuffer(void** buf, size_t* size = 0) = 0; // Copy to new buffer allocated with malloc
      virtual bool copyToBuffer(void* buf, size_t size) = 0; // Copy to existing buffer of given size.

      virtual const char* data() = 0;
      virtual uint64_t size() = 0;

      virtual uint16_t Machine() = 0;
      virtual uint16_t Type() = 0;
      virtual uint32_t EFlags() = 0;
      virtual uint32_t ABIVersion() = 0;
      virtual uint32_t EClass() = 0;
      virtual uint32_t OsAbi() = 0;

      std::string output() { return out.str(); }

      virtual bool Freeze() = 0;
      virtual bool Validate() = 0;

      virtual StringTable* shstrtab() = 0;
      virtual StringTable* strtab() = 0;
      virtual SymbolTable* symtab() = 0;
      virtual SymbolTable* getSymtab(uint16_t index) = 0;
      virtual SymbolTable* dynsym() = 0;
      virtual SymbolTable* getDynsym(uint16_t index) = 0;
      virtual SymbolTable* getSymbolTable() = 0;
      virtual SymbolTable* getSymbolTable(uint16_t index) = 0;

      virtual StringTable* addStringTable(const std::string& name) = 0;
      virtual StringTable* getStringTable(uint16_t index) = 0;

      virtual SymbolTable* addSymbolTable(const std::string& name, StringTable* stab = 0) = 0;

      virtual size_t segmentCount() = 0;
      virtual Segment* segment(size_t i) = 0;
      virtual Segment* segmentByVAddr(uint64_t vaddr) = 0;

      virtual size_t sectionCount() = 0;
      virtual Section* section(size_t i) = 0;
      virtual Section* sectionByVAddr(uint64_t vaddr) = 0;

      virtual NoteSection* note() = 0;
      virtual NoteSection* addNoteSection(const std::string& name) = 0;

      virtual Segment* initSegment(uint32_t type, uint32_t flags, uint64_t paddr = 0) = 0;
      virtual bool addSegments() = 0;

      virtual Section* addSection(const std::string &name,
                                  uint32_t type,
                                  uint64_t flags = 0,
                                  uint64_t entsize = 0,
                                  Segment* segment = 0) = 0;

      virtual RelocationSection* relocationSection(Section* sec, SymbolTable* symtab = 0) = 0;

    protected:
      std::ostringstream out;
    };

    Image* NewElf32Image();
    Image* NewElf64Image();

    uint64_t ElfSize(const void* buffer);

    std::string GetNoteString(uint32_t s_size, const char* s);

}   //  namespace elf
}   //  namespace amd
}   //  namespace rocr

#endif // AMD_ELF_IMAGE_HPP_


================================================
FILE: runtime/hsa-runtime/core/inc/amd_filter_device.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_
#define HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_

#include <algorithm>
#include <cstdint>
#include <cstring>
#include <vector>
#include <map>
#include <string>
#include <sstream>

// Forward declaration of the HsaNodeProperties.
struct _HsaNodeProperties;
using HsaNodeProperties = _HsaNodeProperties;

namespace rocr {
namespace AMD {

// ROCr allows users to filter and reorder various Gpu devices that are
// present on ROCm system. This ability is made available via environment
// variable ROCR_VISIBLE_DEVICES (RVD). Users are allowed to specify a list
// of Gpu Identifiers separated by comma delimiter as the value of this env
// variable.
//
// On a ROCm platform instance, a Gpu device could be identified by its:
//
//    Index - Position at which ROCr reports it upon device enumeration
//    UUID  - A string that is unique and is immutable i.e. tags Gpu
//            instance across systems and power cycles. UUID values
//            are defined to begin with "GPU-" prefix
//
//    @note: Not all Gpu devices will report valid UUID's. For example,
//    Only devices from Gfx9 and later will encode valid UUID's. To account
//    for this and other reasons, the UUID string "GPU-XX" is defined as
//    indicating those devices. Users can still select those Gpu devices
//    by using their enumeration index
//
//  Users are allowed to select a device by specifying its UUID string in
//  full or part. A UUID string that does not uniquely match an agent's
//  valid UUID prefix is interpreted as terminating. The UUID string
//  "GPU-XX" will not match and therefore will terminate
//
//  RVD interpreter treats an empty token list as filtering all devices.
//    Users can use this mode to report ZERO Gpu devices
//
//  RVD interpreter treats a token as Illegal if can't be evaluated into an
//    instance of Device UUID or Enumeration Index
//
//  RVD interpreter treats a Legal instance of Enumeration Index as Terminating
//    if any ONE of the following conditions apply:
//      Value of index lies outside the interval [0 - (numGpuDevices - 1)]
//      Value of index maps to a device that has been previously selected
//
//  RVD interpreter treats a Legal instance of Device UUID as Terminating
//    if any ONE of the following conditions apply:
//      Value of UUID is the literal "GPU-XX"
//      Value of UUID matches ZERO devices on system
//      Value of UUID matches TWO or more devices on system
//      Value of UUID maps to a device that has been previously selected
//
//  RVD interpreter builds the list of Gpu devices to surface using tokens
//    that are Legal and NOT Terminating
//
//  Following are some examples of RVD value strings and their intepretation
//  on a ROCm system with four Gpu devices. Assume for now the UUID's of the
//  four Gpu devices are:
//    Gpu-0: "GPU-BABABABABABABABA"
//    Gpu-1: "GPU-ABBAABBAABBAABBA"
//    Gpu-2: "GPU-BABAABBAABBABABA"
//    Gpu-3: "GPU-ABBABABABABAABBA"
//
//    Surface ZERO devices
//    A1) ROCR_VISIBLE_DEVICES=""
//    A2) ROCR_VISIBLE_DEVICES="-1"
//    A3) ROCR_VISIBLE_DEVICES="GPU-XX"
//
//    Surface Gpu-3 and Gpu-0 devices in that order
//    B) ROCR_VISIBLE_DEVICES="3,GPU-BABABABABABABABA,4"
//
//    Surface Gpu-1 and Gpu-2 devices in that order
//    C) ROCR_VISIBLE_DEVICES="1,GPU-ABBAABBAABBAABBA,GPU-XX"
//
//    Surface Gpu-3 and Gpu-2 devices in that order
//    D) ROCR_VISIBLE_DEVICES="3,GPU-BABAABBA,GPU-XX"
//
class RvdFilter {
 public:
  /// @brief Constructor
  RvdFilter() {}

  // @brief Destructor.
  ~RvdFilter() {}

  /// @brief Determine if user has specified environment variable
  /// ROCR_VISIBLE_DEVICES (RVD) to filter and reorder Gpu devices
  ///
  /// @return TRUE if user has defined the env RVD
  static bool FilterDevices();

  /// @brief Determine if user has specified environment variable
  /// ROCR_VISIBLE_DEVICES (RVD) to filter out all Gpu devices i.e.
  /// surface ZERO devices
  ///
  /// @return TRUE if user has specified ZERO to be surfaced
  bool SelectZeroDevices();

  /// @brief Builds the list of tokens specified by user to filter
  /// and reorder Gpu devices. A token represents either a Gpu's
  /// enumeration index or its UUID value. It is possible for the
  /// list to have no tokens i.e. user has selected zero devices
  void BuildRvdTokenList();

  /// @brief Build the list of Gpu device UUIDs as enumerated by ROCt
  ///
  /// @param numNodes Number of ROCm devices present on system, includes
  /// both Cpu and Gpu's devices
  void BuildDeviceUuidList(const std::vector<HsaNodeProperties>& node_props);

  /// @brief Build the list of Gpu devices that will be enumerated to user
  ///
  /// @return Number of Gpu devices to surface upon devices enumeration
  uint32_t BuildUsrDeviceList();

  /// @brief Processes UUID token and returns its enumeration index
  ///
  /// @param token RVD token encoding a device's UUID value
  /// @return int32_t if it is valid, -1 otherwise
  int32_t ProcessUuidToken(const std::string& token);

  /// @brief Get the number of Gpu devices that will be surface
  /// upon device enumeration
  ///
  /// @uint32_t Number of devices to enumerate including possibly
  /// ZERO devices
  uint32_t GetUsrDeviceListSize();

  /// @brief Return the rank of queried Gpu device. If queried device
  /// is surfaced the number of Gpu devices that will be surface
  /// upon device enumeration
  ///
  /// @int32_t -1 if queried device is not surfaced, else a value in
  /// the range [0 - (numGpus - 1)]
  int32_t GetUsrDeviceRank(uint32_t roctIdx);

#ifndef NDEBUG
  /// @brief Set debug UUID values to Gpu devices. This is intended to
  /// help debug and test RVD module functionality
  void SetDeviceUuidList();

  /// @brief Print the list of Uuids of Gpu devices present on system
  void PrintDeviceUuidList();

  /// @brief Print the list of Gpu devices per their enumeration order
  void PrintUsrDeviceList();

  /// @brief Print the list of tokens specified by user to filter
  /// and reorder Gpu devices
  void PrintRvdTokenList();
#endif

 private:
  /// @brief List of tokens specified by user to select and reorder
  std::vector<std::string> rvdTokenList_;

  /// @brief Ordered list of ROCt enumerated Gpu device's UUID values
  std::vector<std::string> devUuidList_;

  /// @brief Ordered list of ROCr enumerated Gpu devices
  std::map<uint32_t, int32_t> usrDeviceList_;

};  // End of class RvdFilter

}  // namespace amd
}  // namespace rocr

#endif  // header guard - HSA_RUNTIME_CORE_INC_AMD_FILTER_DEVICE_H_


================================================
FILE: runtime/hsa-runtime/core/inc/amd_gpu_agent.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// AMD specific HSA backend.

#ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_AGENT_H_
#define HSA_RUNTIME_CORE_INC_AMD_GPU_AGENT_H_

#include <vector>
#include <list>
#include <map>

#include "hsakmt/hsakmt.h"

#include "core/inc/agent.h"
#include "core/inc/blit.h"
#include "core/inc/cache.h"
#include "core/inc/driver.h"
#include "core/inc/runtime.h"
#include "core/inc/scratch_cache.h"
#include "core/inc/signal.h"
#include "core/util/lazy_ptr.h"
#include "core/util/locks.h"
#include "core/util/small_heap.h"
#include "pcs/pcs_runtime.h"

namespace rocr {
namespace AMD {
class MemoryRegion;

typedef ScratchCache::ScratchInfo ScratchInfo;

// @brief Interface to represent a GPU agent.
class GpuAgentInt : public core::Agent {
 public:
  // @brief Constructor
  // @param [in] node_id Node id.
  // @param [in] driver_type Driver type. Default is KFD.
  GpuAgentInt(uint32_t node_id, core::DriverType driver_type)
      : core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node_id,
      core::Agent::DeviceType::kAmdGpuDevice) {}

   // @brief Ensure blits are ready (performance hint).
   virtual void PreloadBlits() {}

   // @brief Initialization hook invoked after tools library has loaded,
   // to allow tools interception of interface functions.
   //
   // @retval HSA_STATUS_SUCCESS if initialization is successful.
   virtual hsa_status_t PostToolsInit() = 0;

   virtual void ReleaseResources() = 0;

   // @brief Invoke the user provided callback for each region accessible by
   // this agent.
   //
   // @param [in] include_peer If true, the callback will be also invoked on
   // each peer memory region accessible by this agent. If false, only invoke
   // the callback on memory region owned by this agent.
   // @param [in] callback User provided callback function.
   // @param [in] data User provided pointer as input for @p callback.
   //
   // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
   // region returns ::HSA_STATUS_SUCCESS.
   virtual hsa_status_t
   VisitRegion(bool include_peer,
               hsa_status_t (*callback)(hsa_region_t region, void *data),
               void *data) const = 0;

   // @brief Carve scratch memory for main from scratch pool.
   //
   // @param [in,out] scratch Structure to be populated with the carved memory
   // information.
   virtual void AcquireQueueMainScratch(ScratchInfo &scratch) = 0;

   // @brief Carve scratch memory for alt from scratch pool.
   //
   // @param [in,out] scratch Structure to be populated with the carved memory
   // information.
   virtual void AcquireQueueAltScratch(ScratchInfo &scratch) = 0;

   // @brief Release scratch memory from main back to scratch pool.
   //
   // @param [in,out] scratch Scratch memory previously acquired with call to
   // ::AcquireQueueMainScratch.
   virtual void ReleaseQueueMainScratch(ScratchInfo &base) = 0;

   // @brief Release scratch memory back from alternate to scratch pool.
   //
   // @param [in,out] scratch Scratch memory  previously acquired with call to
   // ::AcquireQueueAltScratch.
   virtual void ReleaseQueueAltScratch(ScratchInfo &base) = 0;

   // @brief Translate the kernel start and end dispatch timestamp from agent
   // domain to host domain.
   //
   // @param [in] signal Pointer to signal that provides the dispatch timing.
   // @param [out] time Structure to be populated with the host domain value.
   virtual void TranslateTime(core::Signal *signal,
                              hsa_amd_profiling_dispatch_time_t &time) = 0;

   // @brief Translate the async copy start and end timestamp from agent
   // domain to host domain.
   //
   // @param [in] signal Pointer to signal that provides the async copy timing.
   // @param [out] time Structure to be populated with the host domain value.
   virtual void TranslateTime(core::Signal *signal,
                              hsa_amd_profiling_async_copy_time_t &time) = 0;

   // @brief Translate timestamp agent domain to host domain.
   //
   // @param [out] time Timestamp in agent domain.
   virtual uint64_t TranslateTime(uint64_t tick) = 0;

   // @brief Invalidate caches on the agent which may hold code object data.
   virtual void InvalidateCodeCaches(void *ptr, size_t size) = 0;

   // @brief Sets the coherency type of this agent.
   //
   // @param [in] type New coherency type.
   //
   // @retval true The new coherency type is set successfuly.
   virtual bool current_coherency_type(hsa_amd_coherency_type_t type) = 0;

   // @brief Returns the current coherency type of this agent.
   //
   // @retval Coherency type.
   virtual hsa_amd_coherency_type_t current_coherency_type() const = 0;

   virtual void RegisterGangPeer(core::Agent &gang_peer,
                                 unsigned int bandwidth_factor) = 0;

   virtual void RegisterRecSdmaEngIdMaskPeer(core::Agent &gang_peer,
                                             uint32_t rec_sdma_eng_id_mask) = 0;

   virtual void SetRecSdmaEngOverride(bool flag) = 0;

   // @brief Query the agent HSA profile.
   //
   // @retval HSA profile.
   virtual hsa_profile_t profile() const = 0;

   // @brief Query the agent memory bus width in bit.
   //
   // @retval Bus width in bit.
   virtual uint32_t memory_bus_width() const = 0;

   // @brief Query the agent memory maximum frequency in MHz.
   //
   // @retval Bus width in MHz.
   virtual uint32_t memory_max_frequency() const = 0;

   // @brief Whether agent supports asynchronous scratch reclaim. Depends on CP
   // FW
   virtual bool AsyncScratchReclaimEnabled() const = 0;

   // @brief Update the agent's scratch use-once threshold.
   // Only valid when async scratch reclaim is supported
   // @retval HSA_STATUS_SUCCESS if successful
   virtual hsa_status_t SetAsyncScratchThresholds(size_t use_once_limit) = 0;

   // @brief Iterate through supported PC Sampling configurations
   // @retval HSA_STATUS_SUCCESS if successful
   virtual hsa_status_t
   PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configuration_callback_t cb,
                           void *cb_data) = 0;

   virtual hsa_status_t
   PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession &session) = 0;

   virtual hsa_status_t
   PcSamplingCreateFromId(HsaPcSamplingTraceId pcsId,
                          pcs::PcsRuntime::PcSamplingSession &session) = 0;

   virtual hsa_status_t
   PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession &session) = 0;

   virtual hsa_status_t
   PcSamplingStart(pcs::PcsRuntime::PcSamplingSession &session) = 0;

   virtual hsa_status_t
   PcSamplingStop(pcs::PcsRuntime::PcSamplingSession &session) = 0;

   virtual hsa_status_t
   PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession &session) = 0;
};

class GpuAgent : public GpuAgentInt {
 public:
  // @brief GPU agent constructor.
  //
  // @param [in] node Node id. Each CPU in different socket will get distinct
  // id.
  // @param [in] node_props Node property.
  // @param [in] xnack_mode XNACK mode of device.
  // @param [in] index Index of the GPU device.
  // @param [in] driver_type Driver type. Default is KFD.
  GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, uint32_t index,
           core::DriverType driver_type = core::DriverType::KFD);

  // @brief GPU agent destructor.
  ~GpuAgent();

  // @brief Release allocated resources and disables agent
  void ReleaseResources() override;

  // @brief Ensure blits are ready (performance hint).
  void PreloadBlits() override;

  // @brief Override from core::Agent.
  hsa_status_t PostToolsInit() override;

  uint16_t GetMicrocodeVersion() const;

  uint16_t GetSdmaMicrocodeVersion() const;

  // @brief Assembles SP3 shader source into ISA or AQL code object.
  //
  // @param [in] src_sp3 SP3 shader source text representation.
  // @param [in] func_name Name of the SP3 function to assemble.
  // @param [in] assemble_target ISA or AQL assembly target.
  // @param [out] code_buf Code object buffer.
  // @param [out] code_buf_size Size of code object buffer in bytes.
  enum class AssembleTarget { ISA, AQL };

  void AssembleShader(const char* func_name, AssembleTarget assemble_target, void*& code_buf,
                      size_t& code_buf_size) const;

  // @brief Frees code object created by AssembleShader.
  //
  // @param [in] code_buf Code object buffer.
  // @param [in] code_buf_size Size of code object buffer in bytes.
  void ReleaseShader(void* code_buf, size_t code_buf_size) const;

  // @brief Override from core::Agent.
  hsa_status_t VisitRegion(bool include_peer,
                           hsa_status_t (*callback)(hsa_region_t region,
                                                    void* data),
                           void* data) const override;

  // @brief Override from core::Agent.
  hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region,
                                                      void* data),
                             void* data) const override;

  hsa_status_t IterateSupportedIsas(
                    hsa_status_t (*callback)(hsa_isa_t isa, void* data),
                                                  void* data) const override;

  // @brief Override from core::Agent.
  hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
                            void* value) const override;

  // @brief Override from core::Agent.
  hsa_status_t DmaCopy(void* dst, const void* src, size_t size) override;

  // @brief Override from core::Agent.
  hsa_status_t DmaCopy(void* dst, core::Agent& dst_agent, const void* src,
                       core::Agent& src_agent, size_t size,
                       std::vector<core::Signal*>& dep_signals,
                       core::Signal& out_signal) override;

  // @brief Override from core::Agent.
  hsa_status_t DmaCopyOnEngine(void* dst, core::Agent& dst_agent, const void* src,
                       core::Agent& src_agent, size_t size,
                       std::vector<core::Signal*>& dep_signals,
                       core::Signal& out_signal, int engine_offset,
                       bool force_copy_on_sdma) override;

  // @brief Override from core::Agent.
  hsa_status_t DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_agent,
                             uint32_t *engine_ids_mask) override;

  // @brief Override from core::Agent.
  hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
                                  uint32_t* recommended_ids_mask) override;

  // @brief Override from core::Agent.
  hsa_status_t DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset,
                           const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
                           const hsa_dim3_t* range, hsa_amd_copy_direction_t dir,
                           std::vector<core::Signal*>& dep_signals, core::Signal& out_signal);

  // @brief Override from core::Agent.
  hsa_status_t DmaFill(void* ptr, uint32_t value, size_t count) override;

  // @brief Override from core::Agent.
  hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override;

  // @brief Override from core::Agent.
  hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type, uint64_t flags,
                           core::HsaEventCallback event_callback, void* data,
                           uint32_t private_segment_size, uint32_t group_segment_size,
                           core::Queue** queue) override;

  // @brief Decrement GWS ref count.
  void GWSRelease();

  // @brief Override from AMD::GpuAgentInt.
  void AcquireQueueMainScratch(ScratchInfo& scratch) override;
  void ReleaseQueueMainScratch(ScratchInfo& scratch) override;

  void AcquireQueueAltScratch(ScratchInfo& scratch) override;
  void ReleaseQueueAltScratch(ScratchInfo& scratch) override;

  // @brief Override from AMD::GpuAgentInt.
  void TranslateTime(core::Signal* signal, hsa_amd_profiling_dispatch_time_t& time) override;

  // @brief Override from AMD::GpuAgentInt.
  void TranslateTime(core::Signal* signal, hsa_amd_profiling_async_copy_time_t& time) override;

  // @brief Override from AMD::GpuAgentInt.
  uint64_t TranslateTime(uint64_t tick) override;

  // @brief Override from AMD::GpuAgentInt.
  void InvalidateCodeCaches(void* ptr, size_t size) override;

  // @brief Override from AMD::GpuAgentInt.
  bool current_coherency_type(hsa_amd_coherency_type_t type) override;

  hsa_amd_coherency_type_t current_coherency_type() const override {
    return current_coherency_type_;
  }

  core::Agent* GetNearestCpuAgent(void) const;

  void RegisterGangPeer(core::Agent& gang_peer, unsigned int bandwidth_factor) override;

  void RegisterRecSdmaEngIdMaskPeer(core::Agent& gang_peer, uint32_t rec_sdma_eng_id_mask) override;

  // Getter & setters.

  // @brief Returns Hive ID
  __forceinline uint64_t HiveId() const override { return  properties_.HiveID; }

  // @brief Returns KFD's GPU id which is a hash used internally.
  __forceinline uint64_t KfdGpuID() const { return properties_.KFDGpuID; }

  // @brief Returns node property.
  __forceinline const HsaNodeProperties& properties() const {
    return properties_;
  }

  // @brief set rec_sdma_eng_override_
  __forceinline void SetRecSdmaEngOverride(bool flag) override { rec_sdma_eng_override_ = flag; }

  // @brief Returns number of data caches.
  __forceinline size_t num_cache() const { return cache_props_.size(); }

  // @brief Returns data cache property.
  //
  // @param [in] idx Cache level.
  __forceinline const HsaCacheProperties& cache_prop(int idx) const {
    return cache_props_[idx];
  }

  // @brief Override from core::Agent.
  const std::vector<const core::MemoryRegion*>& regions() const override {
    return regions_;
  }

  const std::vector<const core::Isa *>& supported_isas() const override {
                                                      return supported_isas_;}

  // @brief Override from AMD::GpuAgentInt.
  __forceinline hsa_profile_t profile() const override { return profile_; }

  // @brief Override from AMD::GpuAgentInt.
  __forceinline uint32_t memory_bus_width() const override {
    return memory_bus_width_;
  }

  // @brief Override from AMD::GpuAgentInt.
  __forceinline uint32_t memory_max_frequency() const override {
    return memory_max_frequency_;
  }

  // @brief Order the device is surfaced in hsa_iterate_agents counting only
  // GPU devices.
  __forceinline uint32_t enumeration_index() const { return enum_index_; }

  // @brief returns true if agent uses MES scheduler
  __forceinline const bool isMES() const { return (isa_->GetMajorVersion() >= 11) ? true : false; };

  // @brief returns the libdrm device handle
  __forceinline amdgpu_device_handle libDrmDev() const { return ldrm_dev_; }

  __forceinline void CheckClockTicks() {
    // If we did not update t1 since agent initialization, force a SyncClock. Otherwise computing
    // the SystemClockCounter to GPUClockCounter ratio in TranslateTime(tick) results to a division
    // by 0.
    if (t0_.GPUClockCounter == t1_.GPUClockCounter) SyncClocks();
  }

  /// @brief Override from AMD::GpuAgentInt.
  __forceinline bool is_xgmi_cpu_gpu() const { return xgmi_cpu_gpu_; }
  /// @brief Is large BAR support enabled for this GPU.
  __forceinline bool LargeBarEnabled() const { return large_bar_enabled_; }

  /// @brief Force a WC flush on PCIe devices by doing a write and then read-back
  __forceinline void PcieWcFlush(void *ptr, size_t size) const {
    if (!xgmi_cpu_gpu_) {
      _mm_sfence();
      *((uint8_t*)ptr + size - 1) = *((uint8_t*)ptr + size - 1);
      _mm_mfence();
      auto readback = *(reinterpret_cast<volatile uint8_t*>(ptr) + size - 1);
      UNUSED(readback);
    }
  }

  const size_t MAX_SCRATCH_APERTURE_PER_XCC = (1ULL << 32);
  size_t MaxScratchDevice() const { return properties_.NumXcc * MAX_SCRATCH_APERTURE_PER_XCC; }

  void ReserveScratch();

  // @brief If agent supports it, release scratch memory for all AQL queues on this agent.
  void AsyncReclaimScratchQueues();

  // @brief Returns true if scratch reclaim is enabled
  __forceinline bool AsyncScratchReclaimEnabled() const override {
    const uint32_t GFX94X_MIN_CP_FW_VERSION_REQUIRED = 177;
    const uint32_t GFX95X_MIN_CP_FW_VERSION_REQUIRED = 24;

    return (core::Runtime::runtime_singleton_->flag().enable_scratch_async_reclaim() &&
	    supported_isas()[0]->GetMajorVersion() == 9 &&
	    ((supported_isas()[0]->GetMinorVersion() == 4 &&
	      properties_.EngineId.ui32.uCode >= GFX94X_MIN_CP_FW_VERSION_REQUIRED) ||
	     (supported_isas()[0]->GetMinorVersion() == 5 &&
	      properties_.EngineId.ui32.uCode >= GFX95X_MIN_CP_FW_VERSION_REQUIRED)));
  };

  hsa_status_t SetAsyncScratchThresholds(size_t use_once_limit) override;

  __forceinline size_t ScratchSingleLimitAsyncThreshold() const {
    return scratch_limit_async_threshold_;
  }

  void Trim() override;

  const std::function<void*(size_t size, size_t align, core::MemoryRegion::AllocateFlags flags)>&
  system_allocator() const {
    return system_allocator_;
  }

  const std::function<void(void*)>& system_deallocator() const { return system_deallocator_; }

  const std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)>&
  finegrain_allocator() const {
    return finegrain_allocator_;
  }

  const std::function<void(void*)>& finegrain_deallocator() const { return finegrain_deallocator_; }

  /// @brief Allocate coarse grain device memory on this GPU agent.
  const std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)>&
  coarsegrain_allocator() const {
    return coarsegrain_allocator_;
  }

  /// @brief Deallocate memory allocated from the coarsegrain_allocator
  /// on this GPU agent.
  const std::function<void(void*)>& coarsegrain_deallocator() const {
    return coarsegrain_deallocator_;
  }

 protected:
  // Sizes are in packets.
  const uint32_t minAqlSize_ = 0x40;     // 4KB min
  const uint32_t maxAqlSize_ = 0x20000;  // 8MB max

  // @brief Create an internal queue allowing tools to be notified.
  core::Queue* CreateInterceptibleQueue(const uint32_t size = 0) {
    return CreateInterceptibleQueue(core::Queue::DefaultErrorHandler, nullptr, size);
  }

  // @brief Create an internal queue, with a custom error handler, allowing tools to be
  // notified.
  core::Queue* CreateInterceptibleQueue(void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
                                        void* data, const uint32_t size);

  // @brief Create SDMA blit object.
  //
  // @retval NULL if SDMA blit creation and initialization failed.
  core::Blit* CreateBlitSdma(bool use_xgmi, int rec_eng);

  // @brief Create Kernel blit object using provided compute queue.
  //
  // @retval NULL if Kernel blit creation and initialization failed.
  core::Blit* CreateBlitKernel(core::Queue* queue);

  // @brief Invoke the user provided callback for every region in @p regions.
  //
  // @param [in] regions Array of region object.
  // @param [in] callback User provided callback function.
  // @param [in] data User provided pointer as input for @p callback.
  //
  // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
  // region returns ::HSA_STATUS_SUCCESS.
  hsa_status_t VisitRegion(
      const std::vector<const core::MemoryRegion*>& regions,
      hsa_status_t (*callback)(hsa_region_t region, void* data),
      void* data) const;

  // @brief Update ::t1_ tick count.
  void SyncClocks();

  // @brief Binds the second-level trap handler to this node.
  void BindTrapHandler();

  // @brief Override from core::Agent.
  hsa_status_t EnableDmaProfiling(bool enable) override;

  hsa_status_t PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configuration_callback_t cb,
                                       void* cb_data) override;
  hsa_status_t PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession& session) override;
  hsa_status_t PcSamplingCreateFromId(HsaPcSamplingTraceId pcsId,
                            pcs::PcsRuntime::PcSamplingSession& session) override;
  hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) override;
  hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) override;
  hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) override;
  hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) override;
  hsa_status_t PcSamplingFlushDeviceBuffers(pcs::PcsRuntime::PcSamplingSession& session);

  // @brief Node properties.
  const HsaNodeProperties properties_;

  // @brief Current coherency type.
  hsa_amd_coherency_type_t current_coherency_type_;

  // @brief Maximum number of queues that can be created.
  uint32_t max_queues_;

  // @brief Object to manage scratch memory.
  SmallHeap scratch_pool_;

  // @brief Current short duration scratch memory size.
  size_t scratch_used_large_;

  // @brief Notifications for scratch release.
  std::map<hsa_signal_t, hsa_signal_value_t> scratch_notifiers_;

  // @brief Default scratch size per queue.
  size_t queue_scratch_len_;

  // @brief Default scratch size per work item.
  size_t scratch_per_thread_;

  // @brief Blit interfaces for each data path.
  enum BlitEnum { BlitDevToDev, BlitHostToDev, BlitDevToHost, DefaultBlitCount };

  // Blit objects managed by an instance of GpuAgent
  std::vector<lazy_ptr<core::Blit>> blits_;

  // List of agents connected via xGMI
  std::vector<const core::Agent*> xgmi_peer_list_;

  // Protects xgmi_peer_list_
  KernelMutex xgmi_peer_list_lock_;

  // @brief AQL queues for cache management and blit compute usage.
  enum QueueEnum {
    QueueUtility,     // Cache management and device to {host,device} blit compute
    QueueBlitOnly,    // Host to device blit
    QueuePCSampling,  // Dedicated high priority queue for PC Sampling
    QueueCount
  };

  lazy_ptr<core::Queue> queues_[QueueCount];

  // @brief Mutex to protect the update to coherency type.
  KernelMutex coherency_lock_;

  // @brief Mutex to protect access to scratch pool.
  KernelMutex scratch_lock_;

  // @brief Mutex to protect access to ::t1_.
  KernelMutex t1_lock_;

  // @brief Mutex to protect access to blit objects.
  KernelMutex blit_lock_;

  // @brief Mutex to protect sdma gang submissions.
  KernelMutex sdma_gang_lock_;

  // @brief GPU tick on initialization.
  HsaClockCounters t0_;

  HsaClockCounters t1_;

  double historical_clock_ratio_;

  // @brief s_memrealtime nominal clock frequency
  uint64_t wallclock_frequency_;

  // @brief Array of GPU cache property.
  std::vector<HsaCacheProperties> cache_props_;

  // @brief Array of HSA cache objects.
  std::vector<std::unique_ptr<core::Cache>> caches_;

  // @brief Array of regions owned by this agent.
  std::vector<const core::MemoryRegion*> regions_;

  core::Isa* isa_;

  // @brief HSA profile.
  hsa_profile_t profile_;

  void* trap_code_buf_;

  size_t trap_code_buf_size_;

  // @brief Mappings from doorbell index to queue, for trap handler.
  // Correlates with output of s_sendmsg(MSG_GET_DOORBELL) for queue identification.
  amd_queue_v2_t** doorbell_queue_map_;

  // @brief The GPU memory bus width in bit.
  uint32_t memory_bus_width_;

  // @brief The GPU memory maximum frequency in MHz.
  uint32_t memory_max_frequency_;

  // @brief Enumeration index
  uint32_t enum_index_;

  // @brief HDP flush registers
  hsa_amd_hdp_flush_t HDP_flush_ = {nullptr, nullptr};

 private:
  // @brief Query the driver to get the region list owned by this agent.
  void InitRegionList();

  // @brief Reserve memory for scratch pool to be used by AQL queue of this
  // agent.
  void InitScratchPool();

  // @brief Query the driver to get the cache properties.
  void InitCacheList();

  // @brief Create internal queues and blits.
  void InitDma();

  // @brief Setup GWS accessing queue.
  void InitGWS();

  // @brief Set-up memory allocators
  void InitAllocators();

  // @brief Initialize scratch handler thresholds
  void InitAsyncScratchThresholds();

  // @brief Register signal for notification when scratch may become available.
  // @p signal is notified by OR'ing with @p value.
  bool AddScratchNotifier(hsa_signal_t signal, hsa_signal_value_t value) {
    if (signal.handle != 0) return false;
    scratch_notifiers_[signal] = value;
    return true;
  }

  // @brief Deregister scratch notification signals.
  void ClearScratchNotifiers() { scratch_notifiers_.clear(); }

  // @brief Releases scratch back to the driver.
  // caller must hold scratch_lock_.
  void ReleaseScratch(void* base, size_t size, bool large);

  // Bind index of peer device that is connected via xGMI links
  lazy_ptr<core::Blit>& GetXgmiBlit(const core::Agent& peer_agent);

  // Bind the Blit object that will drive the copy operation
  // across PCIe links (H2D or D2H) or is within same device D2D
  lazy_ptr<core::Blit>& GetPcieBlit(const core::Agent& dst_agent, const core::Agent& src_agent);

  // Bind the Blit object that will drive the copy operation
  lazy_ptr<core::Blit>& GetBlitObject(const core::Agent& dst_agent, const core::Agent& src_agent,
                                      const size_t size);

  // Bind the Blit object that will drive the copy operation by engine ID
  lazy_ptr<core::Blit>& GetBlitObject(uint32_t engine_id);

  // @brief initialize libdrm handle
  void InitLibDrm();

  void GetInfoMemoryProperties(uint8_t value[8]) const;

  // @brief Alternative aperture base address. Only on KV.
  uintptr_t ape1_base_;

  // @brief Queue with GWS access.
  struct {
    lazy_ptr<core::Queue> queue_;
    int ref_ct_;
    KernelMutex lock_;
  } gws_queue_;

  // @brief list of AQL queues owned by this agent. Indexed by queue pointer
  std::vector<core::Queue*> aql_queues_;

  // Sets and Tracks pending SDMA status check or request counts
  void SetCopyRequestRefCount(bool set);
  void SetCopyStatusCheckRefCount(bool set);
  int pending_copy_req_ref_;
  int pending_copy_stat_check_ref_;

  // Tracks what SDMA blits have been used since initialization.
  uint32_t sdma_blit_used_mask_;

  // Scratch limit thresholds when async scratch is enabled.
  uint64_t scratch_limit_async_threshold_;

  ScratchCache scratch_cache_;

  /// @brief System memory allocator in the nearest NUMA node.
  std::function<void*(size_t size, size_t align, core::MemoryRegion::AllocateFlags flags)>
      system_allocator_;
  /// @brief System memory deallocator in the nearest NUMA node.
  std::function<void(void*)> system_deallocator_;
  /// @brief Fine-grain allocator on this GPU.
  std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)> finegrain_allocator_;
  /// @brief Fine-grain deallocator on this GPU.
  std::function<void(void*)> finegrain_deallocator_;
  /// @brief Coarse-grain allocator on this GPU.
  std::function<void*(size_t size, core::MemoryRegion::AllocateFlags flags)> coarsegrain_allocator_;
  /// @brief Coarse-grain deallocator on this GPU.
  std::function<void(void*)> coarsegrain_deallocator_;

  void* trap_handler_tma_region_;

  /* PC Sampling fields - begin */
  /* 2nd level Trap handler code is based on the offsets within this structure */
  typedef struct {
    uint64_t buf_write_val;
    uint32_t buf_size;
    uint32_t reserved0;
    uint32_t buf_written_val0;
    uint32_t buf_watermark0;
    hsa_signal_t done_sig0;
    uint32_t buf_written_val1;
    uint32_t buf_watermark1;
    hsa_signal_t done_sig1;
    uint8_t reserved1[16];
    /* pc_sample_t buffer0[buf_size]; */
    /* pc_sample_t buffer1[buf_size]; */
  } pcs_sampling_data_t;

  typedef struct {
    /* Sampling data - stored on device for trap handler access */
    pcs_sampling_data_t* device_data;

    /* Sampling host buffer - stored on host */
    uint8_t* host_buffer;
    size_t host_buffer_size;
    uint8_t* host_buffer_wrap_pos;
    uint8_t* host_write_ptr;
    uint8_t* host_read_ptr;
    size_t lost_sample_count;
    std::mutex host_buffer_mutex;

    uint32_t which_buffer;
    uint64_t* old_val;
    uint32_t* cmd_data;
    size_t cmd_data_sz;
    // signal to pass into ExecutePM4() so that we do not need to re-allocate a
    // new signal on each call
    hsa_signal_t exec_pm4_signal;

    os::Thread thread;
    pcs::PcsRuntime::PcSamplingSession* session;
  } pcs_data_t;
  /* PC Sampling fields - end */

  hsa_status_t UpdateTrapHandlerWithPCS(pcs_sampling_data_t* pcs_hosttrap_buffers,
                                        pcs_sampling_data_t* pcs_stochastic_buffers);

  // @brief Thread function to process PC sampling data collected via host-trap
  // or Stochastic sampling.
  void PcSamplingThread(pcs_data_t& pcs_data, const char* thread_name);

  // @brief device handle
  amdgpu_device_handle ldrm_dev_;

  DISALLOW_COPY_AND_ASSIGN(GpuAgent);

  // Check if SDMA engine by ID is free
  bool DmaEngineIsFree(uint32_t engine_id);

  std::map<uint64_t,unsigned int> gang_peers_info_;

  std::map<uint64_t, uint32_t> rec_sdma_eng_id_peers_info_;

  bool uses_rec_sdma_eng_id_mask_;
  bool rec_sdma_eng_override_;

  // structure for host trap sampling
  pcs_data_t pcs_hosttrap_data_;

  // structure for stochastic sampling
  pcs_data_t pcs_stochastic_data_;

  /// @brief XGMI CPU<->GPU
  bool xgmi_cpu_gpu_;
  /// @brief Is PCIe large BAR enabled.
  bool large_bar_enabled_;
};

}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_gpu_pm4.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_GPU_PM4_H_
#define HSA_RUNTIME_CORE_INC_AMD_GPU_PM4_H_

 // clang-format off

#define PM4_HDR_IT_OPCODE_NOP                             0x10
#define PM4_HDR_IT_OPCODE_INDIRECT_BUFFER                 0x3F
#define PM4_HDR_IT_OPCODE_RELEASE_MEM                     0x49
#define PM4_HDR_IT_OPCODE_ACQUIRE_MEM                     0x58

#define PM4_HDR_IT_OPCODE_ATOMIC_MEM                      0x1E
#define PM4_HDR_IT_OPCODE_PRED_EXEC                       0x23
#define PM4_HDR_IT_OPCODE_WRITE_DATA                      0x37
#define PM4_HDR_IT_OPCODE_WAIT_REG_MEM                    0x3C
#define PM4_HDR_IT_OPCODE_COPY_DATA                       0x40
#define PM4_HDR_IT_OPCODE_DMA_DATA                        0x50

#define PM4_HDR_SHADER_TYPE(x)                            (((x) & 0x1) << 1)
#define PM4_HDR_IT_OPCODE(x)                              (((x) & 0xFF) << 8)
#define PM4_HDR_COUNT(x)                                  (((x) & 0x3FFF) << 16)
#define PM4_HDR_TYPE(x)                                   (((x) & 0x3) << 30)

#define PM4_HDR(it_opcode, pkt_size_dw, gfxip_ver) (  \
  PM4_HDR_SHADER_TYPE((gfxip_ver) == 7 ? 1 : 0)    |  \
  PM4_HDR_IT_OPCODE(it_opcode)                     |  \
  PM4_HDR_COUNT(pkt_size_dw - 2)                   |  \
  PM4_HDR_TYPE(3)                                     \
)

#define PM4_INDIRECT_BUFFER_DW1_IB_BASE_LO(x)              (((x) & 0x3FFFFFFF) << 2)
#define PM4_INDIRECT_BUFFER_DW2_IB_BASE_HI(x)              (((x) & 0xFFFF) << 0)
#define PM4_INDIRECT_BUFFER_DW3_IB_SIZE(x)                 (((x) & 0xFFFFF) << 0)
#define PM4_INDIRECT_BUFFER_DW3_IB_VALID(x)                (((x) & 0x1) << 23)

#define PM4_ACQUIRE_MEM_DW1_COHER_CNTL(x)                  (((x) & 0x7FFFFFFF) << 0)
#  define PM4_ACQUIRE_MEM_COHER_CNTL_TC_WB_ACTION_ENA      (1 << 18)
#  define PM4_ACQUIRE_MEM_COHER_CNTL_TC_ACTION_ENA         (1 << 23)
#  define PM4_ACQUIRE_MEM_COHER_CNTL_SH_KCACHE_ACTION_ENA  (1 << 27)
#  define PM4_ACQUIRE_MEM_COHER_CNTL_SH_ICACHE_ACTION_ENA  (1 << 29)
#define PM4_ACQUIRE_MEM_DW2_COHER_SIZE(x)                  (((x) & 0xFFFFFFFF) << 0)
#define PM4_ACQUIRE_MEM_DW3_COHER_SIZE_HI(x)               (((x) & 0xFF) << 0)
#define PM4_ACQUIRE_MEM_DW4_COHER_BASE(x)                  ((x >> 8) & 0xFFFFFFFF)
#define PM4_ACQUIRE_MEM_DW4_COHER_BASE_HI(x)               ((x >> 40) & 0xFFFFFF)
#define PM4_ACQUIRE_MEM_DW7_GCR_CNTL(x)                    (((x) & 0x7FFFF) << 0)
#  define PM4_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x)              (((x) & 0x3) << 0)
#  define PM4_ACQUIRE_MEM_GCR_CNTL_GLK_INV                 (1 << 7)
#  define PM4_ACQUIRE_MEM_GCR_CNTL_GLV_INV                 (1 << 8)
#  define PM4_ACQUIRE_MEM_GCR_CNTL_GL1_INV                 (1 << 9)
#  define PM4_ACQUIRE_MEM_GCR_CNTL_GL2_INV                 (1 << 14)
#  define PM4_ACQUIRE_MEM_GCR_CNTL_GL2_WB                  (1 << 15)
#define PM4_RELEASE_MEM_DW1_EVENT_INDEX(x)                 (((x) & 0xF) << 8)
#  define PM4_RELEASE_MEM_EVENT_INDEX_AQL                  0x7

#define PM4_ATOMIC_MEM_DW1_ATOMIC(x)                       (((x) & 0x7F) << 0)
#  define PM4_ATOMIC_MEM_GL2_OP_ATOMIC_SWAP_RTN_64         (39 << 0)
#define PM4_ATOMIC_MEM_DW2_ADDR_LO(x)                      (((x) & 0xFFFFFFF8) << 0)
#define PM4_ATOMIC_MEM_DW3_ADDR_HI(x)                      (((x) & 0xFFFFFFFF) << 0)
#define PM4_ATOMIC_MEM_DW4_SRC_DATA_LO(x)                  (((x) & 0xFFFFFFFF) << 0)
#define PM4_ATOMIC_MEM_DW5_SRC_DATA_HI(x)                  (((x) & 0xFFFFFFFF) << 0)

#define PM4_PRED_EXEC_DW1_HEADER(x)                        (((x) & 0xFFFFFFFF) << 0)
#define PM4_PRED_EXEC_DW2_EXEC_COUNT(x)                    (((x) & 0x3FFF) << 0)
#define PM4_PRED_EXEC_DW2_VIRTUALXCCID_SELECT(x)           (((x) & 0xFF) << 24)

#define PM4_COPY_DATA_DW1(x)                               (((x) & 0xFFFFFFFF) << 0)
#  define PM4_COPY_DATA_SRC_SEL_ATOMIC_RETURN_DATA         (6 << 0)
#  define PM4_COPY_DATA_DST_SEL_TC_12                      (2 << 8)
#  define PM4_COPY_DATA_COUNT_SEL                          (1 << 16)
#  define PM4_COPY_DATA_WR_CONFIRM                         (1 << 20)
#define PM4_COPY_DATA_DW4_DST_ADDR_LO(x)                   (((x) & 0xFFFFFFF8) << 0)
#define PM4_COPY_DATA_DW5_DST_ADDR_HI(x)                   (((x) & 0xFFFFFFFF) << 0)

#define PM4_WAIT_REG_MEM_DW1(x)                            (((x) & 0xFFFFFFFF) << 0)
#  define PM4_WAIT_REG_MEM_FUNCTION_EQUAL_TO_REFERENCE     (3 << 0)
#  define PM4_WAIT_REG_MEM_MEM_SPACE_MEMORY_SPACE          (1 << 4)
#  define PM4_WAIT_REG_MEM_OPERATION_WAIT_REG_MEM          (0 << 6)
#define PM4_WAIT_REG_MEM_DW2_MEM_POLL_ADDR_LO(x)           (((x) & 0xFFFFFFFC) << 0)
#define PM4_WAIT_REG_MEM_DW3_MEM_POLL_ADDR_HI(x)           (((x) & 0xFFFFFFFF) << 0)
#define PM4_WAIT_REG_MEM_DW4_REFERENCE(x)                  (((x) & 0xFFFFFFFF) << 0)
#define PM4_WAIT_REG_MEM_DW6(x)                            (((x) & 0x8000FFFF) << 0)
#  define PM4_WAIT_REG_MEM_POLL_INTERVAL(x)                (((x) & 0xFFFF) << 0)
#  define PM4_WAIT_REG_MEM_OPTIMIZE_ACE_OFFLOAD_MODE       (1 << 31)

#define PM4_DMA_DATA_DW1(x)                            (((x) & 0xFFFFFFFF) << 0)
#  define PM4_DMA_DATA_DST_SEL_DST_ADDR_USING_L2       (3 << 20)
#  define PM4_DMA_DATA_SRC_SEL_SRC_ADDR_USING_L2       (3 << 29)
#define PM4_DMA_DATA_DW2_SRC_ADDR_LO(x)                (((x) & 0xFFFFFFFF) << 0)
#define PM4_DMA_DATA_DW3_SRC_ADDR_HI(x)                (((x) & 0xFFFFFFFF) << 0)
#define PM4_DMA_DATA_DW4_DST_ADDR_LO(x)                (((x) & 0xFFFFFFFF) << 0)
#define PM4_DMA_DATA_DW5_DST_ADDR_HI(x)                (((x) & 0xFFFFFFFF) << 0)
#define PM4_DMA_DATA_DW6(x)                            (((x) & 0xFFFFFFFF) << 0)
#  define PM4_DMA_DATA_BYTE_COUNT(x)                   (((x) & 0x3FFFFFF) << 0)
#  define PM4_DMA_DATA_DIS_WC                          (1 << 31)
#  define PM4_DMA_DATA_DIS_WC_LAST                     (0 << 31)

#define PM4_WRITE_DATA_DW1(x)                          (((x) & 0xFFFFFF00) << 0)
#  define PM4_WRITE_DATA_DST_SEL_TC_L2                 (2 << 8)
#  define PM4_WRITE_DATA_WR_CONFIRM_WAIT_CONFIRMATION  (1 << 20)
#define PM4_WRITE_DATA_DW2_DST_MEM_ADDR_LO(x)          (((x) & 0xFFFFFFFC) << 0)
#define PM4_WRITE_DATA_DW3_DST_MEM_ADDR_HI(x)          (((x) & 0xFFFFFFFF) << 0)
#define PM4_WRITE_DATA_DW4_DATA(x)                     (((x) & 0xFFFFFFFF) << 0)

// clang-format on

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_hsa_code.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_CODE_HPP_
#define AMD_HSA_CODE_HPP_

#include "core/inc/amd_elf_image.hpp"
#include "inc/amd_hsa_elf.h"
#include "inc/amd_hsa_kernel_code.h"
#include "inc/hsa.h"
#include "inc/hsa_ext_finalize.h"
#include <memory>
#include <sstream>
#include <cassert>
#include <unordered_map>

namespace rocr {
namespace amd {
namespace hsa {
namespace common {

template<uint64_t signature>
class Signed {
public:
  static const uint64_t CT_SIGNATURE;
  const uint64_t RT_SIGNATURE;

protected:
  Signed(): RT_SIGNATURE(signature) {}
  virtual ~Signed() {}
};

template<uint64_t signature>
const uint64_t Signed<signature>::CT_SIGNATURE = signature;

bool IsAccessibleMemoryAddress(uint64_t address);

template<typename class_type, typename member_type>
size_t OffsetOf(member_type class_type::*member)
{
  return (char*)&((class_type*)nullptr->*member) - (char*)nullptr;
}

template<typename class_type>
class_type* ObjectAt(uint64_t address)
{
  if (!IsAccessibleMemoryAddress(address)) {
    return nullptr;
  }

  const uint64_t *rt_signature =
    (const uint64_t*)(address + OffsetOf(&class_type::RT_SIGNATURE));
  if (nullptr == rt_signature) {
    return nullptr;
  }
  if (class_type::CT_SIGNATURE != *rt_signature) {
    return nullptr;
  }

  return (class_type*)address;
}

}   //  namespace common

namespace code {

    typedef amd::elf::Segment Segment;
    typedef amd::elf::Section Section;
    typedef amd::elf::RelocationSection RelocationSection;
    typedef amd::elf::Relocation Relocation;

    class KernelSymbol;
    class VariableSymbol;

    class Symbol {
    protected:
      amd::elf::Symbol* elfsym;

    public:
      explicit Symbol(amd::elf::Symbol* elfsym_)
        : elfsym(elfsym_) { }
      virtual ~Symbol() { }
      virtual bool IsKernelSymbol() const { return false; }
      virtual KernelSymbol* AsKernelSymbol() { assert(false); return 0; }
      virtual bool IsVariableSymbol() const { return false; }
      virtual VariableSymbol* AsVariableSymbol() { assert(false); return 0; }
      amd::elf::Symbol* elfSym() { return elfsym; }
      std::string Name() const { return elfsym ? elfsym->name() : ""; }
      Section* GetSection() { return elfsym->section(); }
      virtual uint64_t SectionOffset() const { return elfsym->value(); }
      virtual uint64_t VAddr() const { return elfsym->section()->addr() + elfsym->value(); }
      uint32_t Index() const { return elfsym ? elfsym->index() : 0; }
      bool IsDeclaration() const;
      bool IsDefinition() const;
      virtual bool IsAgent() const;
      virtual hsa_symbol_kind_t Kind() const = 0;
      hsa_symbol_linkage_t Linkage() const;
      hsa_variable_allocation_t Allocation() const;
      hsa_variable_segment_t Segment() const;
      uint64_t Size() const;
      uint32_t Size32() const;
      uint32_t Alignment() const;
      bool IsConst() const;
      virtual hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value);
      static hsa_code_symbol_t ToHandle(Symbol* sym);
      static Symbol* FromHandle(hsa_code_symbol_t handle);
      void setValue(uint64_t value) { elfsym->setValue(value); }
      void setSize(uint32_t size) { elfsym->setSize(size); }

      std::string GetModuleName() const;
      std::string GetSymbolName() const;
    };

    class KernelSymbol : public Symbol {
    private:
      uint32_t kernarg_segment_size, kernarg_segment_alignment;
      uint32_t group_segment_size, private_segment_size;
      bool is_dynamic_callstack;

    public:
      explicit KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
      bool IsKernelSymbol() const override { return true; }
      KernelSymbol* AsKernelSymbol() override { return this; }
      hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_KERNEL; }
      hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
    };

    class VariableSymbol : public Symbol {
    public:
      explicit VariableSymbol(amd::elf::Symbol* elfsym_)
        : Symbol(elfsym_) { }
      bool IsVariableSymbol() const override { return true; }
      VariableSymbol* AsVariableSymbol() override { return this; }
      hsa_symbol_kind_t Kind() const override { return HSA_SYMBOL_KIND_VARIABLE; }
      hsa_status_t GetInfo(hsa_code_symbol_info_t attribute, void *value) override;
    };

    class AmdHsaCode {
    private:
      std::ostringstream out;
      std::unique_ptr<amd::elf::Image> img;
      std::vector<Segment*> dataSegments;
      std::vector<Section*> dataSections;
      std::vector<RelocationSection*> relocationSections;
      std::vector<Symbol*> symbols;
      bool combineDataSegments;
      Segment* hsaSegments[AMDGPU_HSA_SEGMENT_LAST][2];
      Section* hsaSections[AMDGPU_HSA_SECTION_LAST];

      amd::elf::Section* hsatext;
      amd::elf::Section* imageInit;
      amd::elf::Section* samplerInit;
      amd::elf::Section* debugInfo;
      amd::elf::Section* debugLine;
      amd::elf::Section* debugAbbrev;

      bool PullElf();
      bool PullElfV1();
      bool PullElfV2();

      void AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size);
      template <typename S>
      bool GetAmdNote(uint32_t type, S** desc)
      {
        uint32_t desc_size;
        if (!img->note()->getNote("AMD", type, (void**) desc, &desc_size)) {
          out << "Failed to find note, type: " << type << std::endl;
          return false;
        }
        if (desc_size < sizeof(S)) {
          out << "Note size mismatch, type: " << type << " size: " << desc_size << " expected at least " << sizeof(S) << std::endl;
          return false;
        }
        return true;
      }

      void PrintSegment(std::ostream& out, Segment* segment);
      void PrintSection(std::ostream& out, Section* section);
      void PrintRawData(std::ostream& out, Section* section);
      void PrintRawData(std::ostream& out, const unsigned char *data, size_t size);
      void PrintRelocationData(std::ostream& out, RelocationSection* section);
      void PrintSymbol(std::ostream& out, Symbol* sym);
      void PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset = 0);
      std::string MangleSymbolName(const std::string& module_name, const std::string& symbol_name);
      bool ElfImageError();

    public:
      bool HasHsaText() const { return hsatext != 0; }
      amd::elf::Section* HsaText() { assert(hsatext); return hsatext; }
      const amd::elf::Section* HsaText() const { assert(hsatext); return hsatext; }
      amd::elf::SymbolTable* Symtab() { assert(img); return img->symtab(); }
      uint16_t Machine() const { return img->Machine(); }
      uint32_t EFlags() const { return img->EFlags(); }
      uint32_t EClass() const { return img->EClass(); }
      uint32_t OsAbi() const { return img->OsAbi(); }

      AmdHsaCode(bool combineDataSegments = true);
      virtual ~AmdHsaCode();

      std::string output() { return out.str(); }
      bool LoadFromFile(const std::string& filename);
      bool SaveToFile(const std::string& filename);
      bool WriteToBuffer(void* buffer);
      bool InitFromBuffer(const void* buffer, size_t size);
      bool InitAsBuffer(const void* buffer, size_t size);
      bool InitAsHandle(hsa_code_object_t code_handle);
      bool InitNew(bool xnack = false);
      bool Freeze();
      hsa_code_object_t GetHandle();
      const char* ElfData();
      uint64_t ElfSize();
      bool Validate();
      void Print(std::ostream& out);
      void PrintNotes(std::ostream& out);
      void PrintSegments(std::ostream& out);
      void PrintSections(std::ostream& out);
      void PrintSymbols(std::ostream& out);
      void PrintMachineCode(std::ostream& out);
      void PrintMachineCode(std::ostream& out, KernelSymbol* sym);
      bool PrintToFile(const std::string& filename);

      void AddNoteCodeObjectVersion(uint32_t major, uint32_t minor);
      bool GetNoteCodeObjectVersion(std::string& version);
      void AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode);
      bool GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round);
      void AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping);
      bool GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping);
      void AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer);
      bool GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name);
      void AddNoteProducerOptions(const std::string& options);
      void AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options);
      bool GetNoteProducerOptions(std::string& options);

      bool GetIsa(std::string& isaName, unsigned *genericVersion = nullptr);
      bool GetCodeObjectVersion(uint32_t* major, uint32_t* minor);
      hsa_status_t GetInfo(hsa_code_object_info_t attribute, void *value);
      hsa_status_t GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *sym);
      hsa_status_t IterateSymbols(hsa_code_object_t code_object,
                                  hsa_status_t (*callback)(
                                    hsa_code_object_t code_object,
                                    hsa_code_symbol_t symbol,
                                    void* data),
                                  void* data);

      void AddHsaTextData(const void* buffer, size_t size);
      uint64_t NextKernelCodeOffset() const;
      bool AddKernelCode(KernelSymbol* sym, const void* code, size_t size);

      Symbol* AddKernelDefinition(const std::string& name, const void* isa, size_t isa_size);

      size_t DataSegmentCount() const { return dataSegments.size(); }
      Segment* DataSegment(size_t i) const { return dataSegments[i]; }

      size_t DataSectionCount() { return dataSections.size(); }
      Section* DataSection(size_t i) { return dataSections[i]; }

      Section* AddEmptySection();
      Section* AddCodeSection(Segment* segment);
      Section* AddDataSection(const std::string &name,
                              uint32_t type,
                              uint64_t flags,
                              Segment* segment);

      bool HasImageInitSection() const { return imageInit != 0; }
      Section* ImageInitSection();
      void AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& init);
      void AddImageInitializer(Symbol* image, uint64_t destOffset,
        amdgpu_hsa_metadata_kind16_t kind,
        amdgpu_hsa_image_geometry8_t geometry,
        amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type,
        uint64_t width, uint64_t height, uint64_t depth, uint64_t array);


      bool HasSamplerInitSection() const { return samplerInit != 0; }
      amd::elf::Section* SamplerInitSection();
      amd::elf::Section* AddSamplerInit();
      void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& init);
      void AddSamplerInitializer(Symbol* sampler, uint64_t destOffset,
        amdgpu_hsa_sampler_coord8_t coord,
        amdgpu_hsa_sampler_filter8_t filter,
        amdgpu_hsa_sampler_addressing8_t addressing);

      void AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend);

      void InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);
      bool AddHsaSegments();
      Segment* HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable);

      void InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments = true);
      Section* HsaDataSection(amdgpu_hsa_elf_section_t section, bool combineSegments = true);

      Symbol* AddExecutableSymbol(const std::string &name,
                                  unsigned char type,
                                  unsigned char binding,
                                  unsigned char other,
                                  Section *section = 0);

      Symbol* AddVariableSymbol(const std::string &name,
                                unsigned char type,
                                unsigned char binding,
                                unsigned char other,
                                Section *section,
                                uint64_t value,
                                uint64_t size);
      void AddSectionSymbols();

      size_t RelocationSectionCount() { return relocationSections.size(); }
      RelocationSection* GetRelocationSection(size_t i) { return relocationSections[i]; }

      size_t SymbolCount() { return symbols.size(); }
      Symbol* GetSymbol(size_t i) { return symbols[i]; }
      Symbol* GetSymbolByElfIndex(size_t index);
      Symbol* FindSymbol(const std::string &n);

      void AddData(amdgpu_hsa_elf_section_t section, const void* data = 0, size_t size = 0);

      Section* DebugInfo();
      Section* DebugLine();
      Section* DebugAbbrev();

      Section* AddHsaHlDebug(const std::string& name, const void* data, size_t size);
    };

    class AmdHsaCodeManager {
    private:
      typedef std::unordered_map<uint64_t, AmdHsaCode*> CodeMap;
      CodeMap codeMap;

    public:
      AmdHsaCode* FromHandle(hsa_code_object_t handle);
      bool Destroy(hsa_code_object_t handle);
    };

    class KernelSymbolV2 : public KernelSymbol {
    private:
    public:
      explicit KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc);
      bool IsAgent() const override { return true; }
      uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
      uint64_t VAddr() const override { return elfsym->value(); }
    };

    class VariableSymbolV2 : public VariableSymbol {
    private:
    public:
      explicit VariableSymbolV2(amd::elf::Symbol* elfsym_) : VariableSymbol(elfsym_) { }
      bool IsAgent() const override { return false; }
      uint64_t SectionOffset() const override { return elfsym->value() - elfsym->section()->addr(); }
      uint64_t VAddr() const override { return elfsym->value(); }
    };
}   //  namespace code
}   //  namespace hsa
}   //  namespace amd
}   //  namespace rocr

#endif // AMD_HSA_CODE_HPP_


================================================
FILE: runtime/hsa-runtime/core/inc/amd_hsa_loader.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_LOADER_HPP
#define AMD_HSA_LOADER_HPP

#include <cstddef>
#include <cstdint>
#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"
#include "inc/hsa_ven_amd_loader.h"
#include "inc/amd_hsa_elf.h"
#include <string>
#include <memory>
#include <mutex>
#include <vector>

#if defined(_WIN32) || defined(_WIN64)
#include <io.h>
#define __read__  _read
#define __lseek__ _lseek
#else
#include <unistd.h>
#define __read__  read
#define __lseek__ lseek
#endif  // _WIN32 || _WIN64

/// @brief Major version of the AMD HSA Loader. Major versions are not backwards
/// compatible.
#define AMD_HSA_LOADER_VERSION_MAJOR 0

/// @brief Minor version of the AMD HSA Loader. Minor versions are backwards
/// compatible.
#define AMD_HSA_LOADER_VERSION_MINOR 5

/// @brief Descriptive version of the AMD HSA Loader.
#define AMD_HSA_LOADER_VERSION "AMD HSA Loader v0.05 (June 16, 2015)"

enum hsa_ext_symbol_info_t {
  HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE = 100,
  HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN = 101,
};

typedef uint32_t hsa_symbol_info32_t;
typedef hsa_executable_symbol_t hsa_symbol_t;
typedef hsa_executable_symbol_info_t hsa_symbol_info_t;

/// @brief Loaded code object attributes.
enum amd_loaded_code_object_info_t {
  AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE = 0,
  AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE_SIZE = 1
};

/// @brief Loaded segment handle.
typedef struct amd_loaded_segment_s {
  uint64_t handle;
} amd_loaded_segment_t;

/// @brief Loaded segment attributes.
enum amd_loaded_segment_info_t {
  AMD_LOADED_SEGMENT_INFO_TYPE = 0,
  AMD_LOADED_SEGMENT_INFO_ELF_BASE_ADDRESS = 1,
  AMD_LOADED_SEGMENT_INFO_LOAD_BASE_ADDRESS = 2,
  AMD_LOADED_SEGMENT_INFO_SIZE = 3
};

namespace rocr {
namespace amd {
namespace hsa {
namespace loader {

/// @class CodeObjectReaderImpl.
/// @brief Code Object Reader Wrapper.
struct CodeObjectReaderImpl final {
 public:
  /// @returns Handle equivalent of @p object.
  static hsa_code_object_reader_t Handle(
      const CodeObjectReaderImpl *object) {
    hsa_code_object_reader_t handle = {reinterpret_cast<uint64_t>(object)};
    return handle;
  }

  /// @returns Object equivalent of @p handle.
  static CodeObjectReaderImpl *Object(
      const hsa_code_object_reader_t &handle) {
    CodeObjectReaderImpl *object =
      reinterpret_cast<CodeObjectReaderImpl*>(handle.handle);
    return object;
  }

  /// @brief Default constructor.
  CodeObjectReaderImpl() {}

  /// @brief Default destructor.
  ~CodeObjectReaderImpl();

  hsa_status_t SetFile(
      hsa_file_t _code_object_file_descriptor,
      size_t _code_object_offset = 0,
      size_t _code_object_size = 0);

  hsa_status_t SetMemory(
      const void *_code_object_memory,
      size_t _code_object_size);

  const void *GetCodeObjectMemory() const { return code_object_memory; };

  std::string GetUri() const { return uri; };

 private:
  const void *code_object_memory{nullptr};
  size_t code_object_size{0};
  std::string uri{};
  bool is_mmap{false};
};

//===----------------------------------------------------------------------===//
// Context.                                                                   //
//===----------------------------------------------------------------------===//

class Context {
public:
  virtual ~Context() {}

  virtual hsa_isa_t IsaFromName(const char *name) = 0;

  virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa, unsigned genericVersion) = 0;

  virtual void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) = 0;

  virtual bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) = 0;

  virtual void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;

  virtual void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;

  virtual void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) = 0;

  virtual bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) = 0;

  virtual bool ImageExtensionSupported() = 0;

  virtual hsa_status_t ImageCreate(
    hsa_agent_t agent,
    hsa_access_permission_t image_permission,
    const hsa_ext_image_descriptor_t *image_descriptor,
    const void *image_data,
    hsa_ext_image_t *image_handle) = 0;

  virtual hsa_status_t ImageDestroy(
    hsa_agent_t agent, hsa_ext_image_t image_handle) = 0;

  virtual hsa_status_t SamplerCreate(
    hsa_agent_t agent,
    const hsa_ext_sampler_descriptor_t *sampler_descriptor,
    hsa_ext_sampler_t *sampler_handle) = 0;

  virtual hsa_status_t SamplerDestroy(
    hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) = 0;

protected:
  Context() {}

private:
  Context(const Context &c);
  Context& operator=(const Context &c);
};

//===----------------------------------------------------------------------===//
// Symbol.                                                                    //
//===----------------------------------------------------------------------===//

class Symbol {
public:
  static hsa_symbol_t Handle(Symbol *symbol) {
    hsa_symbol_t symbol_handle =
      {reinterpret_cast<uint64_t>(symbol)};
    return symbol_handle;
  }

  static Symbol* Object(hsa_symbol_t symbol_handle) {
    Symbol *symbol =
      reinterpret_cast<Symbol*>(symbol_handle.handle);
    return symbol;
  }

  virtual ~Symbol() {}

  virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void *value) = 0;

  virtual hsa_agent_t GetAgent() = 0;

protected:
  Symbol() {}

private:
  Symbol(const Symbol &s);
  Symbol& operator=(const Symbol &s);
};

//===----------------------------------------------------------------------===//
// LoadedCodeObject.                                                          //
//===----------------------------------------------------------------------===//

class LoadedCodeObject {
public:
  static hsa_loaded_code_object_t Handle(LoadedCodeObject *object) {
    hsa_loaded_code_object_t handle =
      {reinterpret_cast<uint64_t>(object)};
    return handle;
  }

  static LoadedCodeObject* Object(hsa_loaded_code_object_t handle) {
    LoadedCodeObject *object =
      reinterpret_cast<LoadedCodeObject*>(handle.handle);
    return object;
  }

  virtual ~LoadedCodeObject() {}

  virtual bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) = 0;

  virtual hsa_status_t IterateLoadedSegments(
    hsa_status_t (*callback)(
      amd_loaded_segment_t loaded_segment,
      void *data),
    void *data) = 0;

  virtual hsa_agent_t getAgent() const = 0;
  virtual hsa_executable_t getExecutable() const = 0;
  virtual uint64_t getElfData() const = 0;
  virtual uint64_t getElfSize() const = 0;
  virtual uint64_t getStorageOffset() const = 0;
  virtual uint64_t getLoadBase() const = 0;
  virtual uint64_t getLoadSize() const = 0;
  virtual int64_t getDelta() const = 0;
  virtual std::string getUri() const = 0;

protected:
  LoadedCodeObject() {}

private:
  LoadedCodeObject(const LoadedCodeObject&);
  LoadedCodeObject& operator=(const LoadedCodeObject&);
};

//===----------------------------------------------------------------------===//
// LoadedSegment.                                                             //
//===----------------------------------------------------------------------===//

class LoadedSegment {
public:
  static amd_loaded_segment_t Handle(LoadedSegment *object) {
    amd_loaded_segment_t handle =
      {reinterpret_cast<uint64_t>(object)};
    return handle;
  }

  static LoadedSegment* Object(amd_loaded_segment_t handle) {
    LoadedSegment *object =
      reinterpret_cast<LoadedSegment*>(handle.handle);
    return object;
  }

  virtual ~LoadedSegment() {}

  virtual bool GetInfo(amd_loaded_segment_info_t attribute, void *value) = 0;

protected:
  LoadedSegment() {}

private:
  LoadedSegment(const LoadedSegment&);
  LoadedSegment& operator=(const LoadedSegment&);
};

//===----------------------------------------------------------------------===//
// Executable.                                                                //
//===----------------------------------------------------------------------===//

class Executable {
public:
  static hsa_executable_t Handle(Executable *executable) {
    hsa_executable_t executable_handle =
      {reinterpret_cast<uint64_t>(executable)};
    return executable_handle;
  }

  static Executable* Object(hsa_executable_t executable_handle) {
    Executable *executable =
      reinterpret_cast<Executable*>(executable_handle.handle);
    return executable;
  }

  virtual ~Executable() {}

  virtual hsa_status_t GetInfo(
    hsa_executable_info_t executable_info, void *value) = 0;

  virtual hsa_status_t DefineProgramExternalVariable(
    const char *name, void *address) = 0;

  virtual hsa_status_t DefineAgentExternalVariable(
    const char *name,
    hsa_agent_t agent,
    hsa_variable_segment_t segment,
    void *address) = 0;

  virtual hsa_status_t LoadCodeObject(
    hsa_agent_t agent,
    hsa_code_object_t code_object,
    const char *options,
    const std::string &uri,
    hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;

  virtual hsa_status_t LoadCodeObject(
    hsa_agent_t agent,
    hsa_code_object_t code_object,
    size_t code_object_size,
    const char *options,
    const std::string &uri,
    hsa_loaded_code_object_t *loaded_code_object = nullptr) = 0;

  virtual hsa_status_t Freeze(const char *options) = 0;

  virtual hsa_status_t Validate(uint32_t *result) = 0;

  /// @note needed for hsa v1.0.
  /// @todo remove during loader refactoring.
  virtual bool IsProgramSymbol(const char *symbol_name) = 0;

  virtual Symbol* GetSymbol(
    const char *symbol_name,
    const hsa_agent_t *agent) = 0;

  typedef hsa_status_t (*iterate_symbols_f)(
    hsa_executable_t executable,
    hsa_symbol_t symbol_handle,
    void *data);

  virtual hsa_status_t IterateSymbols(
    iterate_symbols_f callback, void *data) = 0;

  /// @since hsa v1.1.
  virtual hsa_status_t IterateAgentSymbols(
      hsa_agent_t agent,
      hsa_status_t (*callback)(hsa_executable_t exec,
                               hsa_agent_t agent,
                               hsa_executable_symbol_t symbol,
                               void *data),
      void *data) = 0;

  /// @since hsa v1.1.
  virtual hsa_status_t IterateProgramSymbols(
      hsa_status_t (*callback)(hsa_executable_t exec,
                               hsa_executable_symbol_t symbol,
                               void *data),
      void *data) = 0;

  virtual hsa_status_t IterateLoadedCodeObjects(
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      hsa_loaded_code_object_t loaded_code_object,
      void *data),
    void *data) = 0;

  virtual size_t GetNumSegmentDescriptors() = 0;

  virtual size_t QuerySegmentDescriptors(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t total_num_segment_descriptors,
    size_t first_empty_segment_descriptor) = 0;

  virtual uint64_t FindHostAddress(uint64_t device_address) = 0;

  virtual void Print(std::ostream& out) = 0;
  virtual bool PrintToFile(const std::string& filename) = 0;

protected:
  Executable() {}

private:
  Executable(const Executable &e);
  Executable& operator=(const Executable &e);

  static std::vector<Executable*> executables;
  static std::mutex executables_mutex;
};

/// @class Loader
class Loader {
public:
  /// @brief Destructor.
  virtual ~Loader() {}

  /// @brief Creates AMD HSA Loader with specified @p context.
  ///
  /// @param[in] context Context. Must not be null.
  ///
  /// @returns AMD HSA Loader on success, null on failure.
  static Loader* Create(Context* context);

  /// @brief Destroys AMD HSA Loader @p Loader_object.
  ///
  /// @param[in] loader AMD HSA Loader to destroy. Must not be null.
  static void Destroy(Loader *loader);

  /// @returns Context associated with Loader.
  virtual Context* GetContext() const = 0;

  /// @brief Creates empty AMD HSA Executable with specified @p profile,
  /// @p options
  virtual Executable* CreateExecutable(
      hsa_profile_t profile,
      const char *options,
      hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) = 0;

  /// @brief Creates empty AMD HSA Executable with specified @p profile,
  /// @p options and @p isolated_context that is isolated from the runtime.
  virtual Executable* CreateExecutable(
      std::unique_ptr<Context> isolated_context,
      hsa_profile_t profile,
      const char *options,
      hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) = 0;

  /// @brief Freezes @p executable
  virtual hsa_status_t FreezeExecutable(Executable *executable, const char *options) = 0;

  /// @brief Destroys @p executable
  virtual void DestroyExecutable(Executable *executable) = 0;

  /// @brief Invokes @p callback for each created executable
  virtual hsa_status_t IterateExecutables(
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      void *data),
    void *data) = 0;

  /// @brief same as hsa_ven_amd_loader_query_segment_descriptors.
  virtual hsa_status_t QuerySegmentDescriptors(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t *num_segment_descriptors) = 0;

  /// @brief Finds the handle of executable to which @p device_address
  /// belongs. Return NULL handle if device address is invalid.
  virtual hsa_executable_t FindExecutable(uint64_t device_address) = 0;

  /// @brief Returns host address given @p device_address. If @p device_address
  /// is already host address, returns null pointer. If @p device_address is
  /// invalid address, returns null pointer.
  virtual uint64_t FindHostAddress(uint64_t device_address) = 0;

  /// @brief Print loader help.
  virtual void PrintHelp(std::ostream& out) = 0;

protected:
  /// @brief Default constructor.
  Loader() {}

private:
  /// @brief Copy constructor - not available.
  Loader(const Loader&);

  /// @brief Assignment operator - not available.
  Loader& operator=(const Loader&);
};


} // namespace loader
} // namespace hsa
} // namespace amd
} // namespace rocr

#endif // AMD_HSA_LOADER_HPP


================================================
FILE: runtime/hsa-runtime/core/inc/amd_kfd_driver.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_KFD_DRIVER_H_
#define HSA_RUNTIME_CORE_INC_AMD_KFD_DRIVER_H_

#include <memory>
#include <string>

#include "hsakmt/hsakmt.h"

#include "core/inc/driver.h"
#include "core/inc/memory_region.h"

namespace rocr {

namespace core {

class Queue;

}

namespace AMD {

/// @brief AMD Kernel Fusion Driver (KFD) for AMD GPU and CPU agents.
///
/// @details The user-mode driver into the Linux KFD for AMD GPU and CPU HSA
/// agents. Provides APIs for the ROCr core to discover the topology produced
/// by the KFD, allocate memory out of the KFD, manage DMA bufs, allocate queues,
/// and more.
class KfdDriver final : public core::Driver {
public:
  KfdDriver(std::string devnode_name);

  /// @brief Determine of the KFD is present on the system and attemp to open it if found.
  ///
  /// @param[out] Driver object for the KFD.
  /// @return HSA_STATUS_SUCCESS if driver found and opened.
  /// @return HSA_STATUS_ERROR if unable to find or open the KFD.
  static hsa_status_t DiscoverDriver(std::unique_ptr<core::Driver>& driver);

  hsa_status_t Init() override;
  hsa_status_t ShutDown() override;
  hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;
  hsa_status_t Open() override;
  hsa_status_t Close() override;
  hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const override;
  hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const override;
  hsa_status_t GetEdgeProperties(std::vector<HsaIoLinkProperties>& io_link_props,
                                 uint32_t node_id) const override;
  hsa_status_t GetMemoryProperties(uint32_t node_id,
                                   std::vector<HsaMemoryProperties>& mem_props) const override;
  hsa_status_t GetCacheProperties(uint32_t node_id, uint32_t processor_id,
                                  std::vector<HsaCacheProperties>& cache_props) const override;
  hsa_status_t AllocateMemory(const core::MemoryRegion &mem_region,
                              core::MemoryRegion::AllocateFlags alloc_flags,
                              void **mem, size_t size,
                              uint32_t node_id) override;
  hsa_status_t FreeMemory(void *mem, size_t size) override;
  hsa_status_t CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct,
                           HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id, void* queue_addr,
                           uint64_t queue_size_bytes, HsaEvent* event,
                           HsaQueueResource& queue_resource) const override;
  hsa_status_t UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_pct, HSA_QUEUE_PRIORITY priority,
                           void* queue_addr, uint64_t queue_size, HsaEvent* event) const override;
  hsa_status_t DestroyQueue(HSA_QUEUEID queue_id) const override;
  hsa_status_t SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t cu_mask_count,
                              uint32_t* queue_cu_mask) const override;
  hsa_status_t AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_gws,
                             uint32_t* first_gws) const override;
  hsa_status_t ExportDMABuf(void *mem, size_t size, int *dmabuf_fd,
                            size_t *offset) override;
  hsa_status_t ImportDMABuf(int dmabuf_fd, core::Agent &agent,
                            core::ShareableHandle &handle) override;
  hsa_status_t Map(core::ShareableHandle handle, void *mem, size_t offset,
                   size_t size, hsa_access_permission_t perms) override;
  hsa_status_t Unmap(core::ShareableHandle handle, void *mem, size_t offset,
                     size_t size) override;
  hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) override;

  hsa_status_t SPMAcquire(uint32_t preferred_node_id) const override;
  hsa_status_t SPMRelease(uint32_t preferred_node_id) const override;
  hsa_status_t SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, uint32_t* timeout,
                                uint32_t* size_copied, void* dest_mem_addr,
                                bool* is_spm_data_loss) const override;
  hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
                              const void* buffer_base, uint64_t buffer_base_size) const override;
  hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const override;
  hsa_status_t GetClockCounters(uint32_t node_id, HsaClockCounters* clock_counter) const override;
  hsa_status_t GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const override;
  hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const override;
  hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const override;
  hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
  hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
  hsa_status_t DeregisterMemory(void* ptr) const override;
  hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
                                  const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
                                  const uint32_t* nodes) const override;
  hsa_status_t MakeMemoryUnresident(const void* mem) const override;
  hsa_status_t ShareMemory(void* mem, size_t size, HsaSharedMemoryHandle* share_mem) const override;
  hsa_status_t RegisterSharedHandle(const HsaSharedMemoryHandle* share_mem, void** mem,
                                    uint64_t* size) const override;
  hsa_status_t ReplaceAsanHeaderPage(void* mem) const override;
  hsa_status_t ReturnAsanHeaderPage(void* mem) const override;
  hsa_status_t PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info,
                                           uint32_t sample_info_sz,
                                           uint32_t* sz_needed) const override;
  hsa_status_t PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info,
                                uint32_t* trace_id) const override;
  hsa_status_t PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const override;
  hsa_status_t PcSamplingStart(uint32_t node_id, uint32_t trace_id) const override;
  hsa_status_t PcSamplingStop(uint32_t node_id, uint32_t trace_id) const override;

  hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override;

  hsa_status_t IsModelEnabled(bool* enable) const override;

 private:
  /// @brief Allocate agent accessible memory (system / local memory).
  static void *AllocateKfdMemory(const HsaMemFlags &flags, uint32_t node_id,
                                 size_t size);

  /// @brief Free agent accessible memory (system / local memory).
  static bool FreeKfdMemory(void *mem, size_t size);

  /// @brief Pin memory.
  static bool MakeKfdMemoryResident(size_t num_node, const uint32_t *nodes,
                                    const void *mem, size_t size,
                                    uint64_t *alternate_va,
                                    HsaMemMapFlags map_flag);

  /// @brief Unpin memory.
  static void MakeKfdMemoryUnresident(const void *mem);

  /// @brief Query for user preference and use that to determine Xnack mode
  /// of ROCm system. Return true if Xnack mode is ON or false if OFF. Xnack
  /// mode of a system is orthogonal to devices that do not support Xnack mode.
  /// It is legal for a system with Xnack ON to have devices that do not support
  /// Xnack functionality.
  static bool BindXnackMode();

  // Minimum acceptable KFD version numbers.
  static const uint32_t kfd_version_major_min = 0;
  static const uint32_t kfd_version_minor_min = 99;
};

} // namespace AMD
} // namespace rocr

#endif // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_loader_context.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP
#define HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP

#include "core/inc/amd_hsa_loader.hpp"

namespace rocr {
namespace amd {

class LoaderContext final : public rocr::amd::hsa::loader::Context {
 public:
  LoaderContext() : rocr::amd::hsa::loader::Context() {}

  ~LoaderContext() {}

  hsa_isa_t IsaFromName(const char *name) override;

  bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t code_object_isa, unsigned codeGenericVersion) override;

  void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override;

  bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override;

  void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override;

  void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;

  void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override;

  bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override;

  bool ImageExtensionSupported() override;

  hsa_status_t ImageCreate(hsa_agent_t agent, hsa_access_permission_t image_permission,
                           const hsa_ext_image_descriptor_t* image_descriptor,
                           const void* image_data, hsa_ext_image_t* image_handle) override;

  hsa_status_t ImageDestroy(hsa_agent_t agent, hsa_ext_image_t image_handle) override;

  hsa_status_t SamplerCreate(hsa_agent_t agent,
                             const hsa_ext_sampler_descriptor_t* sampler_descriptor,
                             hsa_ext_sampler_t* sampler_handle) override;

  hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle) override;

private:
  LoaderContext(const LoaderContext&);
  LoaderContext& operator=(const LoaderContext&);
};

} // namespace amd
} // namespace rocr

#endif // HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP


================================================
FILE: runtime/hsa-runtime/core/inc/amd_memory_region.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// AMD specific HSA backend.

#ifndef HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_
#define HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_

#include "hsakmt/hsakmt.h"

#include "core/inc/agent.h"
#include "core/inc/runtime.h"
#include "core/inc/memory_region.h"
#include "core/util/simple_heap.h"
#include "core/util/locks.h"

#include "inc/hsa_ext_amd.h"

namespace rocr {
namespace AMD {
class MemoryRegion : public core::MemoryRegion {
 public:
  /// @brief Convert this object into hsa_region_t.
  static __forceinline hsa_region_t Convert(MemoryRegion* region) {
    const hsa_region_t region_handle = {
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
    return region_handle;
  }

  static __forceinline const hsa_region_t Convert(const MemoryRegion* region) {
    const hsa_region_t region_handle = {
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
    return region_handle;
  }

  /// @brief  Convert hsa_region_t into AMD::MemoryRegion *.
  static __forceinline MemoryRegion* Convert(hsa_region_t region) {
    return reinterpret_cast<MemoryRegion*>(region.handle);
  }

  MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain,
               bool user_visible, core::Agent* owner, const HsaMemoryProperties& mem_props);

  ~MemoryRegion();

  hsa_status_t Allocate(size_t& size, AllocateFlags alloc_flags, void** address, int agent_node_id = 0) const;

  hsa_status_t Free(void* address, size_t size) const;

  hsa_status_t IPCFragmentExport(void* address) const;

  hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const;

  hsa_status_t GetPoolInfo(hsa_amd_memory_pool_info_t attribute,
                           void* value) const;

  hsa_status_t GetAgentPoolInfo(const core::Agent& agent,
                                hsa_amd_agent_memory_pool_info_t attribute,
                                void* value) const;

  hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents,
                           const void* ptr, size_t size) const;

  hsa_status_t CanMigrate(const MemoryRegion& dst, bool& result) const;

  hsa_status_t Migrate(uint32_t flag, const void* ptr) const;

  hsa_status_t Lock(uint32_t num_agents, const hsa_agent_t* agents,
                    void* host_ptr, size_t size, void** agent_ptr) const;

  hsa_status_t Unlock(void* host_ptr) const;

  HSAuint64 GetBaseAddress() const { return mem_props_.VirtualBaseAddress; }

  HSAuint64 GetPhysicalSize() const { return mem_props_.SizeInBytes; }

  HSAuint64 GetVirtualSize() const { return virtual_size_; }

  hsa_status_t AssignAgent(void* ptr, size_t size, const core::Agent& agent,
                           hsa_access_permission_t access) const;

  void Trim() const;

  HSAuint64 GetCacheSize() const { return fragment_allocator_.cache_size(); }

  __forceinline bool IsLocalMemory() const {
    return ((mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) ||
            (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC));
  }

  __forceinline bool IsPublic() const {
    return (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC);
  }

  __forceinline bool IsSystem() const {
    return ((mem_props_.HeapType == HSA_HEAPTYPE_SYSTEM) ||
            (mem_props_.HeapType == HSA_HEAPTYPE_DEVICE_SVM));
  }

  __forceinline bool IsDeviceSVM() const {
    return (mem_props_.HeapType == HSA_HEAPTYPE_DEVICE_SVM);
  }

  __forceinline bool IsLDS() const {
    return mem_props_.HeapType == HSA_HEAPTYPE_GPU_LDS;
  }

  __forceinline bool IsGDS() const {
    return mem_props_.HeapType == HSA_HEAPTYPE_GPU_GDS;
  }

  __forceinline bool IsScratch() const {
    return mem_props_.HeapType == HSA_HEAPTYPE_GPU_SCRATCH;
  }

  __forceinline uint32_t BusWidth() const {
    return static_cast<uint32_t>(mem_props_.Width);
  }

  __forceinline uint32_t MaxMemCloc() const {
    return static_cast<uint32_t>(mem_props_.MemoryClockMax);
  }

  __forceinline static size_t GetPageSize() { return kPageSize_; }

  __forceinline const HsaMemFlags &mem_flags() const { return mem_flag_; }
  __forceinline const HsaMemMapFlags &map_flags() const { return map_flag_; }

  void *fragment_alloc(size_t size) const {
    return fragment_allocator_.alloc(size);
  }
  bool fragment_free(void *mem) const { return fragment_allocator_.free(mem); }

private:
  const HsaMemoryProperties mem_props_;

  HsaMemFlags mem_flag_;

  HsaMemMapFlags map_flag_;

  size_t max_single_alloc_size_;

  // Used to collect total system memory
  static size_t max_sysmem_alloc_size_;

  HSAuint64 virtual_size_;

  // Protects against concurrent allow_access calls to fragments of the same block by virtue of all
  // fragments of the block routing to the same MemoryRegion.
  mutable KernelMutex access_lock_;

  static const size_t kPageSize_;

  // Determine access type allowed to requesting device
  hsa_amd_memory_pool_access_t GetAccessInfo(const core::Agent& agent,
                                             const core::Runtime::LinkInfo& link_info) const;

  // Operational body for Allocate.  Recursive.
  hsa_status_t AllocateImpl(size_t& size, AllocateFlags alloc_flags, void** address, int agent_node_id) const;

  // Operational body for Free.  Recursive.
  hsa_status_t FreeImpl(void* address, size_t size) const;

  class BlockAllocator {
   private:
    MemoryRegion& region_;
    static const size_t block_size_ = 2 * 1024 * 1024;  // 2MB blocks.
   public:
    explicit BlockAllocator(MemoryRegion& region) : region_(region) {}
    void* alloc(size_t request_size, size_t& allocated_size) const;
    void free(void* ptr, size_t length) const { region_.FreeImpl(ptr, length); }
    size_t block_size() const { return block_size_; }
  };

  mutable SimpleHeap<BlockAllocator> fragment_allocator_;
};

}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_topology.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_
#define HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_

namespace rocr {
namespace AMD {
/// @brief Initializes the runtime.
/// Should not be called directly, must be called only from Runtime::Acquire()
bool Load();

/// @brief Shutdown/cleanup of runtime.
/// Should not be called directly, must be called only from Runtime::Release()
bool Unload();
}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/amd_trap_handler_v1.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef OPENSRC_HSA_RUNTIME_CORE_INC_AMD_TRAP_HANDLER_V1_H_ 
#define OPENSRC_HSA_RUNTIME_CORE_INC_AMD_TRAP_HANDLER_V1_H_ 

namespace rocr {
namespace AMD {

static const unsigned int kCodeTrapHandler8[] = {
    0xC0061C80, 0x000000C0, 0xBF8C007F, 0xBEFE0181, 0x80728872, 0x82738073,
    0x7E000272, 0x7E020273, 0x7E0402FF, 0x80000000, 0x7E060280, 0xDD800000,
    0x00000200, 0xBF8C0F70, 0x7DD40500, 0xBF870011, 0xC0061D39, 0x00000008,
    0xBF8C007F, 0x86F47474, 0xBF84000C, 0x80729072, 0x82738073, 0xC0021CB9,
    0x00000000, 0xBF8C007F, 0x7E000274, 0x7E020275, 0x7E040272, 0xDC700000,
    0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70,
};

static const unsigned int kCodeTrapHandler9[] = {
/*
  .set SQ_WAVE_PC_HI_ADDRESS_MASK              , 0xFFFF
  .set SQ_WAVE_PC_HI_TRAP_ID_SHIFT             , 16
  .set SQ_WAVE_PC_HI_TRAP_ID_SIZE              , 8
  .set SQ_WAVE_PC_HI_TRAP_ID_BFE               , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
  .set SQ_WAVE_PC_HI_HT_MASK                   , 0x1000000
  .set SQ_WAVE_STATUS_HALT_BIT                 , 13
  .set SQ_WAVE_STATUS_HALT_BFE                 , (SQ_WAVE_STATUS_HALT_BIT | (1 << 16))
  .set SQ_WAVE_TRAPSTS_ADDRESS_WATCH_MASK      , 0x7080
  .set SQ_WAVE_TRAPSTS_MEM_VIOL_MASK           , 0x100
  .set SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK       , 0x800
  .set SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK        , 0x10000000
  .set SQ_WAVE_MODE_DEBUG_EN_SHIFT             , 11
  .set SIGNAL_CODE_MEM_VIOL                    , (1 << 29)
  .set SIGNAL_CODE_ILLEGAL_INST                , (1 << 30)
  .set SIGNAL_CODE_LLVM_TRAP                   , (1 << 31)
  .set MAX_NUM_DOORBELLS_MASK                  , ((1 << 10) - 1)
  .set SENDMSG_M0_DOORBELL_ID_BITS             , 12
  .set SENDMSG_M0_DOORBELL_ID_MASK             , ((1 << SENDMSG_M0_DOORBELL_ID_BITS) - 1)

  .set TTMP7_DISPATCH_ID_CONVERTED_BIT         , 31
  .set TTMP7_WAVE_STOPPED_BIT                  , 30
  .set TTMP7_SAVED_STATUS_HALT_BIT             , 29
  .set TTMP7_SAVED_TRAP_ID_SHIFT               , 25
  .set TTMP7_SAVED_TRAP_ID_BITS                , 4
  .set TTMP7_SAVED_TRAP_ID_MASK                , ((1 << TTMP7_SAVED_TRAP_ID_BITS) - 1)
  .set TTMP7_PACKET_INDEX_BITS                 , 25
  .set TTMP7_PACKET_INDEX_MASK                 , ((1 << TTMP7_PACKET_INDEX_BITS) - 1)
  .set TTMP11_PC_HI_SHIFT                      , 7

  .if .amdgcn.gfx_generation_number == 9
    .set DEBUG_INTERRUPT_CONTEXT_ID_BIT        , 23
    .set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT   , 26
    .set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT     , 15
    .set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x1F8000
  .elseif .amdgcn.gfx_generation_number == 10
    .set DEBUG_INTERRUPT_CONTEXT_ID_BIT        , 22
    .set TTMP11_SAVE_REPLAY_W64H_SHIFT         , 31
    .set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT   , 24
    .set SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT      , 25
    .set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT     , 15
    .set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x3F8000
    .set SQ_WAVE_IB_STS_REPLAY_W64H_MASK       , 0x2000000
  .else
    .error "unsupported target"
  .endif

  // ABI between first and second level trap handler:
  //   ttmp0 = PC[31:0]
  //   ttmp1 = 0[2:0], PCRewind[3:0], HostTrap[0], TrapId[7:0], PC[47:32]
  //   ttmp12 = SQ_WAVE_STATUS
  //   ttmp14 = TMA[31:0]
  //   ttmp15 = TMA[63:32]
  // gfx9:
  //   ttmp11 = SQ_WAVE_IB_STS[20:15], 0[18:0], NoScratch[0], WaveIdInWG[5:0]
  // gfx10:
  //   ttmp11 = SQ_WAVE_IB_STS[25], SQ_WAVE_IB_STS[21:15], 0[16:0], NoScratch[0], WaveIdInWG[5:0]
  // gfx1030/gfx1100:
  //   ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0]

  .macro mGetDoorbellId
    s_mov_b32            exec_lo, 0x80000000
    s_sendmsg            sendmsg(MSG_GET_DOORBELL)
  .wait_sendmsg_\@:
    s_nop                7
    s_bitcmp0_b32        exec_lo, 0x1F
    s_cbranch_scc0       .wait_sendmsg_\@
  .endm

  .macro mExitTrap
    // Restore SQ_WAVE_IB_STS.
  .if .amdgcn.gfx_generation_number == 9
    s_lshr_b32           ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
    s_and_b32            ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
    s_setreg_b32         hwreg(HW_REG_IB_STS), ttmp2
  .endif
  .if .amdgcn.gfx_generation_number == 10
    s_lshr_b32           ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
    s_and_b32            ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
    s_lshr_b32           ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
    s_and_b32            ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
    s_or_b32             ttmp2, ttmp2, ttmp3
    s_setreg_b32         hwreg(HW_REG_IB_STS), ttmp2
  .endif

    // Restore SQ_WAVE_STATUS.
    s_and_b64            exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
    s_and_b64            vcc, vcc, vcc    // Restore STATUS.VCCZ, not writable by s_setreg_b32
    s_setreg_b32         hwreg(HW_REG_STATUS), ttmp12

    // Return to shader at unmodified PC.
    s_rfe_b64            [ttmp0, ttmp1]
  .endm

  trap_entry:
    s_andn2_b32          ttmp7, ttmp7, (TTMP7_SAVED_TRAP_ID_MASK << TTMP7_SAVED_TRAP_ID_SHIFT) | (1 << TTMP7_SAVED_STATUS_HALT_BIT)

    // Save the entry status.halt in ttmp7.saved_status_halt
    s_bfe_u32            ttmp2, ttmp12, SQ_WAVE_STATUS_HALT_BFE
    s_lshl_b32           ttmp2, ttmp2, TTMP7_SAVED_STATUS_HALT_BIT
    s_or_b32             ttmp7, ttmp7, ttmp2

    // If trap raised (non-zero trap id) then branch.
    s_bfe_u32            ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
    s_cbranch_scc1       .trap_raised

    // If non-masked exception raised then branch.
    s_getreg_b32         ttmp2, hwreg(HW_REG_TRAPSTS)
    s_and_b32            ttmp3, ttmp2, (SQ_WAVE_TRAPSTS_MEM_VIOL_MASK | SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK)
    s_cbranch_scc1       .excp_raised

  .signal_debugger:
    // Fetch doorbell index for our queue.
    s_mov_b32            ttmp2, exec_lo
    s_mov_b32            ttmp3, exec_hi
    mGetDoorbellId
    s_mov_b32            exec_hi, ttmp3

    // Restore exec_lo, move the doorbell_id into ttmp3
    s_and_b32            ttmp3, exec_lo, SENDMSG_M0_DOORBELL_ID_MASK
    s_mov_b32            exec_lo, ttmp2

    // Set the debug interrupt context id.
    // FIXME: Make conditional when exceptions are handled.
    s_bitset1_b32        ttmp3, DEBUG_INTERRUPT_CONTEXT_ID_BIT

    // Send an interrupt to trigger event notification.
    s_mov_b32            ttmp2, m0
    s_mov_b32            m0, ttmp3
    s_nop                0x0 // Manually inserted wait states
    s_sendmsg            sendmsg(MSG_INTERRUPT)

    // Restore m0
    s_mov_b32            m0, ttmp2

    // Parking the wave requires saving the original pc in the preserved ttmps.
    // Since all ttmps are used, we must first free ttmp6 by compressing the
    // 40bit dispatch ptr in ttmp6:7 into a 25bit queue packet id.
    //
    // Register layout before parking the wave:
    //
    // ttmp6: dispatch_ptr[31:6] 0[5:0]
    // ttmp7: 0[0] wave_stopped[0] status_halt[0] trap_id[3:0] 0[16:0] dispatch_ptr[39:32]
    // ttmp11: 1st_level_ttmp11[31:23] 0[15:0] 1st_level_ttmp11[6:0]
    //
    // After parking the wave:
    //
    // ttmp6:  pc_lo[31:0]
    // ttmp7:  1[0] wave_stopped[0] status_halt[0] trap_id[3:0] packet_id[24:0]
    // ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
    //
    // The conversion from dispatch ptr to queue packet index only needs to be
    // done once, the first time the wave executes the trap handler.

  .if ((.amdgcn.gfx_generation_number == 10 && .amdgcn.gfx_generation_minor >= 3) || .amdgcn.gfx_generation_number > 10)
    s_branch             .halt_wave
  .else
    s_bitcmp1_b32        ttmp7, TTMP7_DISPATCH_ID_CONVERTED_BIT
    s_cbranch_scc1       .ttmp7_has_dispatch_index

    s_and_b32            ttmp3, ttmp3, MAX_NUM_DOORBELLS_MASK
    s_lshl_b32           ttmp3, ttmp3, 0x3

    // Map doorbell index to amd_queue_t* through TMA (doorbell_queue_map).
    s_load_dwordx2       [ttmp2, ttmp3], [ttmp14, ttmp15], ttmp3 glc
    s_waitcnt            lgkmcnt(0)

    // Retrieve queue base_address from hsa_queue_t*.
    s_load_dword         ttmp2, [ttmp2, ttmp3], 0x8 glc
    s_waitcnt            lgkmcnt(0)

    // The dispatch index is (dispatch_ptr.lo - base_address.lo) >> 6
    s_sub_u32            ttmp2, ttmp6, ttmp2
    s_lshr_b32           ttmp2, ttmp2, 0x6
    s_andn2_b32          ttmp7, ttmp7, TTMP7_PACKET_INDEX_MASK
    s_or_b32             ttmp7, ttmp7, ttmp2
    s_bitset1_b32        ttmp7, TTMP7_DISPATCH_ID_CONVERTED_BIT

  .ttmp7_has_dispatch_index:
    // Save the PC
    s_mov_b32            ttmp6, ttmp0
    s_and_b32            ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
    s_lshl_b32           ttmp1, ttmp1, TTMP11_PC_HI_SHIFT
    s_andn2_b32          ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP11_PC_HI_SHIFT)
    s_or_b32             ttmp11, ttmp11, ttmp1

    // Park the wave
    s_getpc_b64          [ttmp0, ttmp1]
    s_add_u32            ttmp0, ttmp0, .parked - .
    s_addc_u32           ttmp1, ttmp1, 0x0
    s_branch             .halt_wave

  .parked:
    s_trap               0x2
    s_branch             .parked
  .endif

  .excp_raised:
    // If memory violation without XNACK error then signal queue error.
    // XNACK error will be handled by VM interrupt, since it has more information.
    s_and_b32            ttmp3, ttmp2, (SQ_WAVE_TRAPSTS_MEM_VIOL_MASK | SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK)
    s_cmp_eq_u32         ttmp3, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
    s_mov_b32            ttmp3, SIGNAL_CODE_MEM_VIOL
    s_cbranch_scc1       .signal_error

    // If illegal instruction then signal queue error.
    s_and_b32            ttmp3, ttmp2, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
    s_mov_b32            ttmp3, SIGNAL_CODE_ILLEGAL_INST
    s_cbranch_scc1       .signal_error

    // Otherwise (memory violation with XNACK error) return to shader. Do not
    // send a signal as that will cause an interrupt storm. Instead let the
    // interrupt generated by the TLB miss cause the kernel to notify ROCr and
    // put the queue into an error state. This also ensures the TLB interrupt
    // is received which provides information about the page causing the fault.
    s_branch             .halt_wave

  .trap_raised:
    // Save the entry trap id in ttmp7.saved_trap_id
    s_min_u32            ttmp3, ttmp2, 0xF
    s_lshl_b32           ttmp3, ttmp3, TTMP7_SAVED_TRAP_ID_SHIFT
    s_or_b32             ttmp7, ttmp7, ttmp3

    // If debugger trap (s_trap >= 3) then signal debugger.
    s_cmp_ge_u32         ttmp2, 0x3;
    s_cbranch_scc1       .signal_debugger

    // If llvm.trap (s_trap 2) then signal queue error.
    s_cmp_eq_u32         ttmp2, 0x2
    s_mov_b32            ttmp3, SIGNAL_CODE_LLVM_TRAP
    s_cbranch_scc1       .signal_error

    // For other traps advance PC and return to shader.
    s_add_u32            ttmp0, ttmp0, 0x4
    s_addc_u32           ttmp1, ttmp1, 0x0
    s_branch             .exit_trap

  .signal_error:
  .if (.amdgcn.gfx_generation_number == 10 && .amdgcn.gfx_generation_minor >= 3)
    // This needs to be rewritten for gfx10.3 as scalar stores are not available.
  .else
    // FIXME: don't trash ttmp4/ttmp5 when exception handling is unified.
    s_mov_b32            ttmp4, ttmp3

    // Fetch doorbell index for our queue.
    s_mov_b32            ttmp2, exec_lo
    s_mov_b32            ttmp3, exec_hi
    mGetDoorbellId
    s_mov_b32            exec_hi, ttmp3

    // Restore exec_lo, move the doorbell index into ttmp3
    s_and_b32            exec_lo, exec_lo, MAX_NUM_DOORBELLS_MASK
    s_lshl_b32           ttmp3, exec_lo, 0x3
    s_mov_b32            exec_lo, ttmp2

    // Map doorbell index to amd_queue_t* through TMA (doorbell_queue_map).
    s_load_dwordx2       [ttmp2, ttmp3], [ttmp14, ttmp15], ttmp3 glc
    s_waitcnt            lgkmcnt(0)

    // Retrieve queue_inactive_signal from amd_queue_t*.
    s_load_dwordx2       [ttmp2, ttmp3], [ttmp2, ttmp3], 0xC0 glc
    s_waitcnt            lgkmcnt(0)

    // Set queue signal value to error code.
    s_mov_b32            ttmp5, 0x0
    s_atomic_swap_x2     [ttmp4, ttmp5], [ttmp2, ttmp3], 0x8 glc
    s_waitcnt            lgkmcnt(0)

    // Skip event trigger if the signal value was already non-zero.
    s_or_b32             ttmp4, ttmp4, ttmp5
    s_cbranch_scc1       .skip_event_trigger

    // Check for a non-NULL signal event mailbox.
    s_load_dwordx2       [ttmp4, ttmp5], [ttmp2, ttmp3], 0x10 glc
    s_waitcnt            lgkmcnt(0)
    s_and_b64            [ttmp4, ttmp5], [ttmp4, ttmp5], [ttmp4, ttmp5]
    s_cbranch_scc0       .skip_event_trigger

    // Load the signal event value.
    s_load_dword         ttmp2, [ttmp2, ttmp3], 0x18 glc
    s_waitcnt            lgkmcnt(0)

    // Write the signal event value to the mailbox.
    s_store_dword        ttmp2, [ttmp4, ttmp5], 0x0 glc
    s_waitcnt            lgkmcnt(0)

    // Send an interrupt to trigger event notification.
    s_mov_b32            m0, 0x0
    s_nop                0
    s_sendmsg            sendmsg(MSG_INTERRUPT)
  .endif

  .skip_event_trigger:
    // Since we trashed ttmp4/ttmp5, reset the wave_id to 0
    s_mov_b32            ttmp4, 0x0
    s_mov_b32            ttmp5, 0x0

  .halt_wave:
    s_bitset1_b32        ttmp7, TTMP7_WAVE_STOPPED_BIT

    // Halt the wavefront.
    s_bitset1_b32        ttmp12, SQ_WAVE_STATUS_HALT_BIT

  .exit_trap:
    mExitTrap
*/
    0x8973ff73, 0x3e000000, 0x92eeff78, 0x0001000d, 0x8e6e9d6e, 0x87736e73,
    0x92eeff6d, 0x00080010, 0xbf850041, 0xb8eef803, 0x866fff6e, 0x00000900,
    0xbf850031, 0xbeee007e, 0xbeef007f, 0xbefe00ff, 0x80000000, 0xbf90000a,
    0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, 0x866fff7e, 0x00000fff,
    0xbefe006e, 0xbeef1a97, 0xbeee007c, 0xbefc006f, 0xbf800000, 0xbf900001,
    0xbefc006e, 0xbf0d9f73, 0xbf85000f, 0x866fff6f, 0x000003ff, 0x8e6f836f,
    0xc0051bbd, 0x0000006f, 0xbf8cc07f, 0xc0031bb7, 0x00000008, 0xbf8cc07f,
    0x80ee6e72, 0x8f6e866e, 0x8973ff73, 0x01ffffff, 0x87736e73, 0xbef31a9f,
    0xbef2006c, 0x866dff6d, 0x0000ffff, 0x8e6d876d, 0x8977ff77, 0x007fff80,
    0x87776d77, 0xbeec1c00, 0x806cff6c, 0x00000010, 0x826d806d, 0xbf820044,
    0xbf920002, 0xbf82fffe, 0x866fff6e, 0x10000100, 0xbf06ff6f, 0x00000100,
    0xbeef00ff, 0x20000000, 0xbf850011, 0x866fff6e, 0x00000800, 0xbeef00f4,
    0xbf85000d, 0xbf820036, 0x83ef8f6e, 0x8e6f996f, 0x87736f73, 0xbf09836e,
    0xbf85ffbe, 0xbf06826e, 0xbeef00ff, 0x80000000, 0xbf850003, 0x806c846c,
    0x826d806d, 0xbf82002c, 0xbef0006f, 0xbeee007e, 0xbeef007f, 0xbefe00ff,
    0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f,
    0x867eff7e, 0x000003ff, 0x8e6f837e, 0xbefe006e, 0xc0051bbd, 0x0000006f,
    0xbf8cc07f, 0xc0071bb7, 0x000000c0, 0xbf8cc07f, 0xbef10080, 0xc2831c37,
    0x00000008, 0xbf8cc07f, 0x87707170, 0xbf85000e, 0xc0071c37, 0x00000010,
    0xbf8cc07f, 0x86f07070, 0xbf840009, 0xc0031bb7, 0x00000018, 0xbf8cc07f,
    0xc0431bb8, 0x00000000, 0xbf8cc07f, 0xbefc0080, 0xbf800000, 0xbf900001,
    0xbef00080, 0xbef10080, 0xbef31a9e, 0xbef81a8d, 0x8f6e8b77, 0x866eff6e,
    0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
};

static const unsigned int kCodeTrapHandler90a[] = {
    0x8973ff73, 0x3e000000, 0x92eeff78, 0x0001000d, 0x8e6e9d6e, 0x87736e73,
    0x92eeff6d, 0x00080010, 0xbf850041, 0xb8eef803, 0x866fff6e, 0x00000900,
    0xbf850031, 0xbeee007e, 0xbeef007f, 0xbefe00ff, 0x80000000, 0xbf90000a,
    0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, 0x866fff7e, 0x00000fff,
    0xbefe006e, 0xbeef1a97, 0xbeee007c, 0xbefc006f, 0xbf800000, 0xbf900001,
    0xbefc006e, 0xbf0d9f73, 0xbf85000f, 0x866fff6f, 0x000003ff, 0x8e6f836f,
    0xc0051bbd, 0x0000006f, 0xbf8cc07f, 0xc0031bb7, 0x00000008, 0xbf8cc07f,
    0x80ee6e72, 0x8f6e866e, 0x8973ff73, 0x01ffffff, 0x87736e73, 0xbef31a9f,
    0xbef2006c, 0x866dff6d, 0x0000ffff, 0x8e6d876d, 0x8977ff77, 0x007fff80,
    0x87776d77, 0xbeec1c00, 0x806cff6c, 0x00000010, 0x826d806d, 0xbf820044,
    0xbf920002, 0xbf82fffe, 0x866fff6e, 0x10000100, 0xbf06ff6f, 0x00000100,
    0xbeef00ff, 0x20000000, 0xbf850011, 0x866fff6e, 0x00000800, 0xbeef00f4,
    0xbf85000d, 0xbf820036, 0x83ef8f6e, 0x8e6f996f, 0x87736f73, 0xbf09836e,
    0xbf85ffbe, 0xbf06826e, 0xbeef00ff, 0x80000000, 0xbf850003, 0x806c846c,
    0x826d806d, 0xbf82002c, 0xbef0006f, 0xbeee007e, 0xbeef007f, 0xbefe00ff,
    0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f,
    0x867eff7e, 0x000003ff, 0x8e6f837e, 0xbefe006e, 0xc0051bbd, 0x0000006f,
    0xbf8cc07f, 0xc0071bb7, 0x000000c0, 0xbf8cc07f, 0xbef10080, 0xc2831c37,
    0x00000008, 0xbf8cc07f, 0x87707170, 0xbf85000e, 0xc0071c37, 0x00000010,
    0xbf8cc07f, 0x86f07070, 0xbf840009, 0xc0031bb7, 0x00000018, 0xbf8cc07f,
    0xc0431bb8, 0x00000000, 0xbf8cc07f, 0xbefc0080, 0xbf800000, 0xbf900001,
    0xbef00080, 0xbef10080, 0xbef31a9e, 0xbef81a8d, 0x8f6e8b77, 0x866eff6e,
    0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
};


static const unsigned int kCodeTrapHandler1010[] = {
    0x8a73ff73, 0x3e000000, 0x93eeff78, 0x0001000d, 0x8f6e9d6e, 0x88736e73,
    0x93eeff6d, 0x00080010, 0xbf850041, 0xb96ef803, 0x876fff6e, 0x00000900,
    0xbf850031, 0xbeee037e, 0xbeef037f, 0xbefe03ff, 0x80000000, 0xbf90000a,
    0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff036f, 0x876fff7e, 0x00000fff,
    0xbefe036e, 0xbeef1d96, 0xbeee037c, 0xbefc036f, 0xbf800000, 0xbf900001,
    0xbefc036e, 0xbf0d9f73, 0xbf85000f, 0x876fff6f, 0x000003ff, 0x8f6f836f,
    0xf4051bbd, 0xde000000, 0xbf8cc07f, 0xf4011bb7, 0xfa000008, 0xbf8cc07f,
    0x80ee6e72, 0x906e866e, 0x8a73ff73, 0x01ffffff, 0x88736e73, 0xbef31d9f,
    0xbef2036c, 0x876dff6d, 0x0000ffff, 0x8f6d876d, 0x8a77ff77, 0x007fff80,
    0x88776d77, 0xbeec1f00, 0x806cff6c, 0x00000010, 0x826d806d, 0xbf820044,
    0xbf920002, 0xbf82fffe, 0x876fff6e, 0x10000100, 0xbf06ff6f, 0x00000100,
    0xbeef03ff, 0x20000000, 0xbf850011, 0x876fff6e, 0x00000800, 0xbeef03f4,
    0xbf85000d, 0xbf820036, 0x83ef8f6e, 0x8f6f996f, 0x88736f73, 0xbf09836e,
    0xbf85ffbe, 0xbf06826e, 0xbeef03ff, 0x80000000, 0xbf850003, 0x806c846c,
    0x826d806d, 0xbf82002c, 0xbef0036f, 0xbeee037e, 0xbeef037f, 0xbefe03ff,
    0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff036f,
    0x877eff7e, 0x000003ff, 0x8f6f837e, 0xbefe036e, 0xf4051bbd, 0xde000000,
    0xbf8cc07f, 0xf4051bb7, 0xfa0000c0, 0xbf8cc07f, 0xbef10380, 0xf6811c37,
    0xfa000008, 0xbf8cc07f, 0x88707170, 0xbf85000e, 0xf4051c37, 0xfa000010,
    0xbf8cc07f, 0x87f07070, 0xbf840009, 0xf4011bb7, 0xfa000018, 0xbf8cc07f,
    0xf4411bb8, 0xfa000000, 0xbf8cc07f, 0xbefc0380, 0xbf800000, 0xbf900001,
    0xbef00380, 0xbef10380, 0xbef31d9e, 0xbef81d8d, 0x906e8977, 0x876fff6e,
    0x003f8000, 0x906e8677, 0x876eff6e, 0x02000000, 0x886e6f6e, 0xb9eef807,
    0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, 0xbe80226c,
};

static const unsigned int kCodeTrapHandler10[] = {
    0x8a73ff73, 0x3e000000, 0x93eeff78, 0x0001000d, 0x8f6e9d6e, 0x88736e73,
    0x93eeff6d, 0x00080010, 0xbf850023, 0xb96ef803, 0x876fff6e, 0x00000900,
    0xbf850013, 0xbeee037e, 0xbeef037f, 0xbefe03ff, 0x80000000, 0xbf90000a,
    0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff036f, 0x876fff7e, 0x00000fff,
    0xbefe036e, 0xbeef1d96, 0xbeee037c, 0xbefc036f, 0xbf800000, 0xbf900001,
    0xbefc036e, 0xbf82001a, 0x876fff6e, 0x10000100, 0xbf06ff6f, 0x00000100,
    0xbeef03ff, 0x20000000, 0xbf850011, 0x876fff6e, 0x00000800, 0xbeef03f4,
    0xbf85000d, 0xbf82000e, 0x83ef8f6e, 0x8f6f996f, 0x88736f73, 0xbf09836e,
    0xbf85ffdc, 0xbf06826e, 0xbeef03ff, 0x80000000, 0xbf850003, 0x806c846c,
    0x826d806d, 0xbf820004, 0xbef00380, 0xbef10380, 0xbef31d9e, 0xbef81d8d,
    0x906e8977, 0x876fff6e, 0x003f8000, 0x906e8677, 0x876eff6e, 0x02000000,
    0x886e6f6e, 0xb9eef807, 0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, 0xbe80226c,
};

}  // namespace AMD
}  // namespace rocr

#endif  //OPENSRC_HSA_RUNTIME_CORE_INC_AMD_TRAP_HANDLER_V1_H_ 


================================================
FILE: runtime/hsa-runtime/core/inc/amd_virtio_driver.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_AMD_VIRTIO_DRIVER_H_
#define HSA_RUNTIME_CORE_INC_AMD_VIRTIO_DRIVER_H_

#include <memory>
#include <string>

#include "hsakmt/hsakmt.h"

#include "core/inc/driver.h"
#include "core/inc/memory_region.h"

namespace rocr {
namespace AMD {

class KfdVirtioDriver final : public core::Driver {
 public:
  KfdVirtioDriver(std::string devnode_name);

  static hsa_status_t DiscoverDriver(std::unique_ptr<core::Driver>& driver);

  hsa_status_t Init() override;
  hsa_status_t ShutDown() override;
  hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;
  hsa_status_t Open() override;
  hsa_status_t Close() override;
  hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const override;
  hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const override;
  hsa_status_t GetEdgeProperties(std::vector<HsaIoLinkProperties>& io_link_props,
                                 uint32_t node_id) const override;
  hsa_status_t GetMemoryProperties(uint32_t node_id,
                                   std::vector<HsaMemoryProperties>& mem_props) const override;
  hsa_status_t GetCacheProperties(uint32_t node_id, uint32_t processor_id,
                                  std::vector<HsaCacheProperties>& cache_props) const override;
  hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const;
  hsa_status_t GetClockCounters(uint32_t node_id, HsaClockCounters* clock_counter) const;
  hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
                              const void* buffer_base, uint64_t buffer_base_size) const;
  hsa_status_t AllocateMemory(const core::MemoryRegion& mem_region,
                              core::MemoryRegion::AllocateFlags alloc_flags, void** mem,
                              size_t size, uint32_t agent_node_id) override;
  hsa_status_t FreeMemory(void* mem, size_t size) override;
  hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const;
  hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
  hsa_status_t DeregisterMemory(void* ptr) const override;
  hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const;
  hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
                                  const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
                                  const uint32_t* nodes) const override;
  hsa_status_t MakeMemoryUnresident(const void* mem) const override;
  hsa_status_t CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct,
                           HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id, void* queue_addr,
                           uint64_t queue_size_bytes, HsaEvent* event,
                           HsaQueueResource& queue_resource) const override;
  hsa_status_t DestroyQueue(HSA_QUEUEID queue_id) const override;
  hsa_status_t UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_percentage,
                           HSA_QUEUE_PRIORITY priority, void* queue_mem, uint64_t queue_size,
                           HsaEvent* event) const override;
  hsa_status_t SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t num_cu_mask,
                              uint32_t* cu_mask) const override;
  hsa_status_t AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_GWS, uint32_t* GWS) const override;
  hsa_status_t ExportDMABuf(void* mem, size_t size, int* dmabuf_fd, size_t* offset) override;
  hsa_status_t ImportDMABuf(int dmabuf_fd, core::Agent& agent,
                            core::ShareableHandle& handle) override;
  hsa_status_t Map(core::ShareableHandle handle, void* mem, size_t offset, size_t size,
                   hsa_access_permission_t perms) override;
  hsa_status_t Unmap(core::ShareableHandle handle, void* mem, size_t offset, size_t size) override;
  hsa_status_t ReleaseShareableHandle(core::ShareableHandle& handle) override;
  hsa_status_t GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const;
  hsa_status_t SPMAcquire(uint32_t node_id) const override;
  hsa_status_t SPMRelease(uint32_t node_id) const override;
  hsa_status_t SPMSetDestBuffer(uint32_t node_id, uint32_t size, uint32_t* timeout,
                                uint32_t* size_copied, void* dest,
                                bool* is_data_loss) const override;
  hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override;
  hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const;
  hsa_status_t IsModelEnabled(bool* enable) const override;
};

}  // namespace AMD
}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_INC_AMD_VIRTIO_DRIVER_H_


================================================
FILE: runtime/hsa-runtime/core/inc/amd_xdna_driver.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_
#define HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_

#include <array>
#include <climits>
#include <map>
#include <memory>
#include <unordered_map>

#include "core/driver/xdna/uapi/amdxdna_accel.h"
#include "core/inc/amd_aie_agent.h"
#include "core/inc/driver.h"
#include "core/inc/memory_region.h"

/// @brief struct amdxdna_cmd_chain - Interpretation of data payload for
/// ERT_CMD_CHAIN
struct amdxdna_cmd_chain {
  /// Number of commands in chain
  __u32 command_count;
  /// Index of last successfully submitted command in chain
  __u32 submit_index;
  /// Index of failing command if cmd status is not completed
  __u32 error_index;
  __u32 reserved[3];
  /// Address of each command in chain
  __u64 data[] __counted_by(command_count);
};

/// @brief struct amdxdna_cmd - Exec buffer command header format
struct amdxdna_cmd {
  union {
    struct {
      /// Current state of a command
      __u32 state : 4;
      __u32 unused : 6;
      /// Extra CU masks in addition to mandatory mask
      __u32 extra_cu_masks : 2;
      /// Number of words in payload (data)
      __u32 count : 11;
      /// Opcode identifying specific command
      __u32 opcode : 5;
      __u32 reserved : 4;
    };
    __u32 header;
  };
  /// Count number of words representing packet payload
  __u32 data[] __counted_by(count);
};

namespace rocr {
namespace core {
class Queue;
}

namespace AMD {

// @brief: Used to transform an address into a device address
constexpr uint32_t DEV_ADDR_BASE = 0x04000000;
constexpr uint32_t DEV_ADDR_OFFSET_MASK = 0x02FFFFFF;

/// @brief: The driver places a structure before each command in a command chain.
/// Need to increase the size of the command by the size of this structure.
/// In the following xdna driver source can see where this is implemented:
/// Commit hash: eddd92c0f61592c576a500f16efa24eb23667c23
/// https://github.com/amd/xdna-driver/blob/main/src/driver/amdxdna/aie2_msg_priv.h#L387-L391
/// https://github.com/amd/xdna-driver/blob/main/src/driver/amdxdna/aie2_message.c#L637
constexpr uint32_t CMD_COUNT_SIZE_INCREASE = 3;

/// @brief: The size of an instruction in bytes
constexpr uint32_t INSTR_SIZE_BYTES = 4;

/// @brief: Index of command payload where the instruction sequence
/// address is located
constexpr uint32_t CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX = 2;
constexpr uint32_t CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_SIZE_IDX = 4;

/// @brief Environment variable to define job submission timeout
constexpr uint32_t DEFAULT_TIMEOUT_VAL = 50;

class XdnaDriver final : public core::Driver {
  /// @brief BO handle information.
  struct BOHandle {
    /// Mapped address.
    void* vaddr = nullptr;
    /// Handle returned by xdna.
    uint32_t handle = AMDXDNA_INVALID_BO_HANDLE;
    /// Size in bytes.
    size_t size = 0;

    constexpr BOHandle() = default;
    constexpr BOHandle(void* vaddr, uint32_t handle, size_t size)
        : vaddr{vaddr}, handle{handle}, size{size} {}
    constexpr bool IsValid() const { return handle != AMDXDNA_INVALID_BO_HANDLE; }
  };

  /// @brief CU mask size.
  static constexpr size_t cu_mask_size = sizeof(uint32_t) * CHAR_BIT;

  /// @brief Per hardware context PDI cache.
  class PDICache {
    std::array<BOHandle, cu_mask_size> entries = {};
    size_t entry_count = 0;

   public:
    /// @brief Sentinel value for entries not found.
    constexpr static size_t NotFound = cu_mask_size;

    /// @brief Returns the size of the cache.
    constexpr size_t size() const { return entry_count; }

    /// @brief Returns the index of the BO handle if it is the cache, otherwise @ref NotFound.
    ///
    /// This function does a linear search because the mask is small (32 elements).
    size_t GetIndex(uint32_t pdi_handle) const {
      for (size_t i = 0; i < entry_count; ++i) {
        if (entries[i].handle == pdi_handle) {
          return i;
        }
      }
      return NotFound;
    }

    /// @brief Sets the next cache entry.
    hsa_status_t SetNext(const BOHandle& pdi_bo_handle, size_t& index) {
      if (entry_count == entries.size()) {
        // cache is full
        return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
      }

      index = entry_count++;
      entries[index] = pdi_bo_handle;
      return HSA_STATUS_SUCCESS;
    }

    constexpr const BOHandle& operator[](size_t index) const { return entries[index]; }
  };

public:
  XdnaDriver(std::string devnode_name);

  static hsa_status_t DiscoverDriver(std::unique_ptr<core::Driver>& driver);

  /// @brief Returns the size of the system memory heap in bytes.
  static uint64_t GetSystemMemoryByteSize();

  /// @brief Returns the size of the dev heap in bytes.
  static uint64_t GetDevHeapByteSize();

  hsa_status_t Init() override;
  hsa_status_t ShutDown() override;
  hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;

  hsa_status_t Open() override;
  hsa_status_t Close() override;
  hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const override;
  hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const override;
  hsa_status_t GetEdgeProperties(std::vector<HsaIoLinkProperties>& io_link_props,
                                 uint32_t node_id) const override;
  hsa_status_t GetMemoryProperties(uint32_t node_id,
                                   std::vector<HsaMemoryProperties>& mem_props) const override;
  hsa_status_t GetCacheProperties(uint32_t node_id, uint32_t processor_id,
                                  std::vector<HsaCacheProperties>& cache_props) const override;
  hsa_status_t AllocateMemory(const core::MemoryRegion &mem_region,
                              core::MemoryRegion::AllocateFlags alloc_flags,
                              void **mem, size_t size,
                              uint32_t node_id) override;
  hsa_status_t FreeMemory(void *mem, size_t size) override;
  hsa_status_t CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct,
                           HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id, void* queue_addr,
                           uint64_t queue_size_bytes, HsaEvent* event,
                           HsaQueueResource& queue_resource) const override;
  hsa_status_t UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_pct, HSA_QUEUE_PRIORITY priority,
                           void* queue_addr, uint64_t queue_size, HsaEvent* event) const override;
  hsa_status_t DestroyQueue(HSA_QUEUEID queue_id) const override;
  hsa_status_t SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t cu_mask_count,
                              uint32_t* queue_cu_mask) const override;
  hsa_status_t AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_gws,
                             uint32_t* first_gws) const override;
  hsa_status_t ExportDMABuf(void *mem, size_t size, int *dmabuf_fd,
                            size_t *offset) override;
  hsa_status_t ImportDMABuf(int dmabuf_fd, core::Agent &agent,
                            core::ShareableHandle &handle) override;
  hsa_status_t Map(core::ShareableHandle handle, void *mem, size_t offset,
                   size_t size, hsa_access_permission_t perms) override;
  hsa_status_t Unmap(core::ShareableHandle handle, void *mem, size_t offset,
                     size_t size) override;
  hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) override;

  /// @brief Submits @p num_pkts packets in a command chain.
  hsa_status_t SubmitCmdChain(hsa_amd_aie_ert_packet_t* first_pkt, uint32_t num_pkts,
                              HSA_QUEUEID& queue_id, uint32_t num_core_tiles);

  hsa_status_t SPMAcquire(uint32_t preferred_node_id) const override;
  hsa_status_t SPMRelease(uint32_t preferred_node_id) const override;
  hsa_status_t SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, uint32_t* timeout,
                                uint32_t* size_copied, void* dest_mem_addr,
                                bool* is_spm_data_loss) const override;
  hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
                              const void* buffer_base, uint64_t buffer_base_size) const override;
  hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const override;
  hsa_status_t GetClockCounters(uint32_t node_id, HsaClockCounters* clock_counter) const override;
  hsa_status_t GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const override;
  hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const override;
  hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const override;
  hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
  hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
  hsa_status_t DeregisterMemory(void* ptr) const override;
  hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
                                  const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
                                  const uint32_t* nodes) const override;
  hsa_status_t MakeMemoryUnresident(const void* mem) const override;

  hsa_status_t IsModelEnabled(bool* enable) const override;

 private:
  /// @brief Destroys @p bo_handle.
  ///
  /// This function will unmap the virtual address and close the BO, but will not return any status.
  void DestroyBOHandle(BOHandle& bo_handle);

  /// @brief Finds the BO associated with the address.
  BOHandle FindBOHandle(void* mem) const;

  /// @brief Creates a new hardware context with the given PDI BO handles.
  hsa_status_t ConfigHwCtx(const PDICache& pdi_bo_handles, HSA_QUEUEID& queue_id,
                           uint32_t num_core_tiles);

  hsa_status_t QueryDriverVersion();

  /// @brief Allocate device accesible heap space.
  ///
  /// Allocate and map a buffer object (BO) that the AIE device can access.
  hsa_status_t InitDeviceHeap();
  hsa_status_t FreeDeviceHeap();

  /// @brief Creates a command BO and returns it to @p bo_info.
  ///
  /// @param size size of memory to allocate
  /// @param bo_info allocated BO
  hsa_status_t CreateCmdBO(uint32_t size, BOHandle& bo_info);

  /// @brief Gets all BOs from a command packet payload, flushes the caches associated with them and
  /// replaces the instruction virtual address with the device address.
  ///
  /// @param count Number of entries in the command
  /// @param cmd_pkt_payload A pointer to the payload of the command
  /// @param bo_handles vector that contains all BO handles
  hsa_status_t PrepareBOs(uint32_t count, hsa_amd_aie_ert_start_kernel_data_t* cmd_pkt_payload,
                          std::vector<uint32_t>& bo_handles);

  /// @brief Executes a command and waits for its completion
  ///
  /// @param cmd_chain_bo_handle command to execute
  /// @param bo_handles handles associated with the command
  /// @param aie_queue queue to submit to
  hsa_status_t ExecCmdAndWait(const BOHandle& cmd_chain_bo_handle,
                              const std::vector<uint32_t>& bo_handles, HSA_QUEUEID queue_id);

  /// TODO: Remove this in the future and rely on the core Runtime
  /// object to track handle allocations. Using the VMEM API for mapping XDNA
  /// driver handles requires a bit more refactoring. So rely on the XDNA driver
  /// to manage some of this for now.
  std::unordered_map<uint32_t, void *> vmem_handle_mappings;
  std::map<void*, BOHandle> vmem_addr_mappings;

  /// @brief Hardware context to PDI cache mapping.
  std::unordered_map<uint32_t, PDICache> hw_ctx_pdi_cache_map;

  /// @brief Virtual address range allocated for the device heap.
  ///
  /// Allocate a large enough space so we can carve out the device heap in
  /// this range and ensure it is aligned to 64MB. Currently, npu1 supports
  /// 64MB device heap and it must be aligned to 64MB.
  BOHandle dev_heap_handle;

  /// @brief The aligned device heap.
  void *dev_heap_aligned = nullptr;

  static constexpr size_t dev_heap_size = 64 * 1024 * 1024;
  static constexpr size_t dev_heap_align = 64 * 1024 * 1024;
};

} // namespace AMD
} // namespace rocr

#endif // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/blit.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_BLIT_H_
#define HSA_RUNTIME_CORE_INC_BLIT_H_

#include <stdint.h>

#include "core/inc/agent.h"

namespace rocr {
namespace core {
class Blit {
 public:
  explicit Blit() {}
  virtual ~Blit() {}

  /// @brief Marks the blit object as invalid and uncouples its link with
  /// the underlying compute device's control block. Use of blit object
  /// once it has been release is illegal and any behavior is indeterminate
  ///
  /// @note: The call will block until all commands have executed.
  ///
  /// @param agent Agent passed to Initialize.
  ///
  /// @return hsa_status_t
  virtual hsa_status_t Destroy(const core::Agent& agent) = 0;

  /// @brief Submit a linear copy command to the the underlying compute device's
  /// control block. The call is blocking until the command execution is
  /// finished.
  ///
  /// @param dst Memory address of the copy destination.
  /// @param src Memory address of the copy source.
  /// @param size Size of the data to be copied.
  virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src,
                                               size_t size) = 0;

  /// @brief Submit a linear copy command to the the underlying compute device's
  /// control block. The call is non blocking. The memory transfer will start
  /// after all dependent signals are satisfied. After the transfer is
  /// completed, the out signal will be decremented.
  ///
  /// @param dst Memory address of the copy destination.
  /// @param src Memory address of the copy source.
  /// @param size Size of the data to be copied.
  /// @param dep_signals Arrays of dependent signal.
  /// @param out_signal Output signal.
  /// @param gang_signals Array of gang signals.
  virtual hsa_status_t SubmitLinearCopyCommand(
      void* dst, const void* src, size_t size,
      std::vector<core::Signal*>& dep_signals, core::Signal& out_signal,
      std::vector<core::Signal*>& gang_signals) = 0;

  /// @brief Submit a linear fill command to the the underlying compute device's
  /// control block. The call is blocking until the command execution is
  /// finished.
  ///
  /// @param ptr Memory address of the fill destination.
  /// @param value Value to be set.
  /// @param num Number of uint32_t element to be set to the value.
  virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value,
                                               size_t num) = 0;

  /// @brief Enable profiling of the asynchronous copy command. The timestamp
  /// of each copy request will be stored in the completion signal structure.
  ///
  /// @param enable True to enable profiling. False to disable profiling.
  ///
  /// @return HSA_STATUS_SUCCESS if the request to enable/disable profiling is
  /// successful.
  virtual hsa_status_t EnableProfiling(bool enable) = 0;

  /// @brief Blit operations use SDMA.
  virtual bool isSDMA() const { return false; }

  /// @Brief Reports the approximate number of remaining bytes to copy or fill.  Any return of zero
  /// must be exact.
  virtual uint64_t PendingBytes() = 0;

  virtual void GangLeader(bool gang_leader) = 0;
  virtual bool GangLeader() const { return false; };
};
}  // namespace core
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/cache.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_CACHE_H
#define HSA_RUNTIME_CORE_INC_CACHE_H

#include "core/inc/hsa_internal.h"
#include "core/inc/checked.h"
#include "core/util/utils.h"
#include <utility>
#include <string>

namespace rocr {
namespace core {

class Cache : public Checked<0x39A6C7AD3F135B06> {
 public:
  static __forceinline hsa_cache_t Convert(const Cache* cache) {
    const hsa_cache_t handle = {static_cast<uint64_t>(reinterpret_cast<uintptr_t>(cache))};
    return handle;
  }
  static __forceinline Cache* Convert(const hsa_cache_t cache) {
    return reinterpret_cast<Cache*>(static_cast<uintptr_t>(cache.handle));
  }

  Cache(const std::string& name, uint8_t level, uint32_t size)
      : name_(name), level_(level), size_(size) {}

  Cache(std::string&& name, uint8_t level, uint32_t size)
      : name_(std::move(name)), level_(level), size_(size) {}

  hsa_status_t GetInfo(hsa_cache_info_t attribute, void* value);

 private:
  std::string name_;
  uint32_t level_;
  uint32_t size_;

  // Forbid copying and moving of this object
  DISALLOW_COPY_AND_ASSIGN(Cache);
};

}   // namespace core
}   // namespace rocr

#endif  // HSA_RUNTIME_CORE_INC_CACHE_H


================================================
FILE: runtime/hsa-runtime/core/inc/checked.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_INC_CHECKED_H_
#define HSA_RUNTME_CORE_INC_CHECKED_H_

#include <stdint.h>
#include <stdlib.h>

namespace rocr {
namespace core {

/// @brief Compares type codes and pointers to check object validity.  Used for cast validation.
template <uint64_t code, bool multiProcess = false> class Check final {
 public:
  typedef Check<code> CheckType;

  Check() { object_ = uintptr_t(this) ^ uintptr_t(code); }
  Check(const Check&) { object_ = uintptr_t(this) ^ uintptr_t(code); }
  Check(Check&&) { object_ = uintptr_t(this) ^ uintptr_t(code); }

  ~Check() { object_ = uintptr_t(NULL); }

  const Check& operator=(Check&& rhs) { return *this; }
  const Check& operator=(const Check& rhs) { return *this; }

  bool IsValid() const {
    return object_ == (uintptr_t(this) ^ uintptr_t(code));
  }

  uint64_t check_code() const { return code; }

 private:
  uintptr_t object_;
};

template <uint64_t code> class Check<code, true> final {
 public:
  typedef Check<code> CheckType;

  Check() { object_ = uintptr_t(code); }
  Check(const Check&) { object_ = uintptr_t(code); }
  Check(Check&&) { object_ = uintptr_t(code); }

  const Check& operator=(Check&& rhs) { return *this; }
  const Check& operator=(const Check& rhs) { return *this; }

  bool IsValid() const { return object_ == uintptr_t(code); }

  uint64_t check_code() const { return code; }

 private:
  uintptr_t object_;
};

/// @brief Base class for validating objects.
template <uint64_t code> class Checked {
 public:
  typedef Checked<code> CheckedType;

  bool IsValid() const { return id.IsValid(); }

  virtual ~Checked() {}

 private:
  Check<code, false> id;
};

}  // namespace core
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/default_signal.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA runtime C++ interface file.

#ifndef HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_
#define HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_

#include "core/inc/runtime.h"
#include "core/inc/signal.h"
#include "core/util/utils.h"

namespace rocr {
namespace core {

/// @brief Operations for a simple pure memory based signal.
/// @brief See base class Signal.
class BusyWaitSignal : public Signal {
 public:
  /// @brief Determines if a Signal* can be safely converted to BusyWaitSignal*
  /// via static_cast.
  static __forceinline bool IsType(Signal* ptr) {
    return ptr->IsType(&rtti_id());
  }

  /// @brief See base class Signal.
  explicit BusyWaitSignal(SharedSignal* abi_block, bool enableIPC);

  // Below are various methods corresponding to the APIs, which load/store the
  // signal value or modify the existing signal value automically and with
  // specified memory ordering semantics.

  hsa_signal_value_t LoadRelaxed();

  hsa_signal_value_t LoadAcquire();

  void StoreRelaxed(hsa_signal_value_t value);

  void StoreRelease(hsa_signal_value_t value);

  hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition,
                                 hsa_signal_value_t compare_value,
                                 uint64_t timeout, hsa_wait_state_t wait_hint);

  hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition,
                                 hsa_signal_value_t compare_value,
                                 uint64_t timeout, hsa_wait_state_t wait_hint);

  void AndRelaxed(hsa_signal_value_t value);

  void AndAcquire(hsa_signal_value_t value);

  void AndRelease(hsa_signal_value_t value);

  void AndAcqRel(hsa_signal_value_t value);

  void OrRelaxed(hsa_signal_value_t value);

  void OrAcquire(hsa_signal_value_t value);

  void OrRelease(hsa_signal_value_t value);

  void OrAcqRel(hsa_signal_value_t value);

  void XorRelaxed(hsa_signal_value_t value);

  void XorAcquire(hsa_signal_value_t value);

  void XorRelease(hsa_signal_value_t value);

  void XorAcqRel(hsa_signal_value_t value);

  void AddRelaxed(hsa_signal_value_t value);

  void AddAcquire(hsa_signal_value_t value);

  void AddRelease(hsa_signal_value_t value);

  void AddAcqRel(hsa_signal_value_t value);

  void SubRelaxed(hsa_signal_value_t value);

  void SubAcquire(hsa_signal_value_t value);

  void SubRelease(hsa_signal_value_t value);

  void SubAcqRel(hsa_signal_value_t value);

  hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value);

  hsa_signal_value_t ExchAcquire(hsa_signal_value_t value);

  hsa_signal_value_t ExchRelease(hsa_signal_value_t value);

  hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value);

  hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
                                hsa_signal_value_t value);

  hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
                                hsa_signal_value_t value);

  hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
                                hsa_signal_value_t value);

  hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
                               hsa_signal_value_t value);

  /// @brief see the base class Signal
  __forceinline hsa_signal_value_t* ValueLocation() const {
    return (hsa_signal_value_t*)&signal_.value;
  }

  /// @brief see the base class Signal
  __forceinline HsaEvent* EopEvent() { return NULL; }

 protected:
  bool _IsA(rtti_t id) const { return id == &rtti_id(); }

 private:
  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
    return rtti_id_;
  }

  DISALLOW_COPY_AND_ASSIGN(BusyWaitSignal);
};

/// @brief Simple memory only signal using a new ABI block.
class DefaultSignal : private LocalSignal, public BusyWaitSignal {
 public:
  /// @brief Determines if a Signal* can be safely converted to BusyWaitSignal*
  /// via static_cast.
  static __forceinline bool IsType(Signal* ptr) { return ptr->IsType(&rtti_id()); }

  /// @brief See base class Signal.
  explicit DefaultSignal(hsa_signal_value_t initial_value, bool enableIPC = false)
      : LocalSignal(initial_value, enableIPC), BusyWaitSignal(signal(), enableIPC) {}

 protected:
  bool _IsA(rtti_t id) const {
    if (id == &rtti_id()) return true;
    return BusyWaitSignal::_IsA(id);
  }

 private:
  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
    return rtti_id_;
  }

  DISALLOW_COPY_AND_ASSIGN(DefaultSignal);
};

}  // namespace core
}  // namespace rocr
#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/driver.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_INC_DRIVER_H_
#define HSA_RUNTME_CORE_INC_DRIVER_H_

#include <cstdint>
#include <limits>
#include <string>

#include "core/inc/memory_region.h"
#include "hsakmt/hsakmttypes.h"
#include "inc/hsa.h"

namespace rocr {
namespace core {

class Queue;

enum class DriverQuery { GET_DRIVER_VERSION };

enum class DriverType {
  XDNA = 0,
  KFD,
#ifdef HSAKMT_VIRTIO_ENABLED
  KFD_VIRTIO,
#endif
  NUM_DRIVER_TYPES
};

/// @brief Handle for exported / imported memory.
struct ShareableHandle {
  uint64_t handle{};

  bool IsValid() const { return handle != 0; }
};

/// @brief Kernel driver interface.
///
/// @details A class used to provide an interface between the core runtime
/// and agent kernel drivers. It also maintains state associated with active
/// kernel drivers.
class Driver {
public:
  Driver(DriverType kernel_driver_type, std::string devnode_name);
  virtual ~Driver() = default;

  /// @brief Initialize the driver's state after opening.
  virtual hsa_status_t Init() = 0;

  /// @brief Release the driver's resources and close the kernel-mode
  /// driver.
  virtual hsa_status_t ShutDown() = 0;

  /// @brief Get driver version information.
  /// @retval DriverVersionInfo containing the driver's version information.
  const HsaVersionInfo& Version() const { return version_; }

  /// @brief Query the kernel-model driver.
  /// @retval HSA_STATUS_SUCCESS if the kernel-model driver query was
  /// successful.
  virtual hsa_status_t QueryKernelModeDriver(DriverQuery query) = 0;

  /// @brief Open a connection to the driver using name_.
  /// @retval HSA_STATUS_SUCCESS if the driver was opened successfully.
  virtual hsa_status_t Open() = 0;

  /// @brief Close a connection to the open driver using fd_.
  /// @retval HSA_STATUS_SUCCESS if the driver was opened successfully.
  virtual hsa_status_t Close() = 0;

  /// @brief Get the system properties for nodes managed by this driver.
  virtual hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const = 0;

  /// @brief Get the properties for a specific node managed by this driver.
  virtual hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const = 0;

  /// @brief Get the edge (IO link) properties of a specific node (that is
  /// managed by this driver) in the topology graph.
  /// @param[out] io_link_props IO link properties of the node specified by @p node_id.
  /// @param[in] node_id ID of the node whose link properties are being queried.
  virtual hsa_status_t GetEdgeProperties(std::vector<HsaIoLinkProperties>& io_link_props,
                                         uint32_t node_id) const = 0;

  /// @brief Get the memory properties of a specific node.
  /// @param[in] node_id Node ID of the agent.
  /// @param[out] mem_props Memory properties of the node specified by @p node_id.
  /// @retval HSA_STATUS_SUCCESS if the driver sucessfully returns the node's
  /// memory properties.
  virtual hsa_status_t GetMemoryProperties(uint32_t node_id,
                                           std::vector<HsaMemoryProperties>& mem_props) const = 0;

  /// @brief Get the cache properties of a specific node.
  /// @param[in] node_ide Node ID of the agent.
  /// @param[out] cache_props Cache properties of the node specified by @p node_id.
  /// @retval HSA_STATUS_SUCCESS if the driver successfully returns the node's cache properties.
  virtual hsa_status_t GetCacheProperties(uint32_t node_id, uint32_t processor_id,
                                          std::vector<HsaCacheProperties>& cache_props) const = 0;

  /// @brief Allocate agent-accessible memory (system or agent-local memory).
  /// @param[out] mem pointer to newly allocated memory.
  /// @retval HSA_STATUS_SUCCESS if memory was successfully allocated or
  /// hsa_status_t error code if the memory allocation failed.
  virtual hsa_status_t AllocateMemory(const MemoryRegion &mem_region,
                                      MemoryRegion::AllocateFlags alloc_flags,
                                      void **mem, size_t size,
                                      uint32_t node_id) = 0;

  virtual hsa_status_t FreeMemory(void *mem, size_t size) = 0;

  /// @brief Create an agent dispatch queue with user-mode access rights.
  /// @param[in] node_id Node ID of the agent on which the queue is being created.
  /// @param[in] type Queue's type.
  /// @param[in] queue_pct Maximum percentage of a queue's occupancy allowed.
  /// @param[in] priority Queue's priority for scheduling.
  /// @param[in] sdma_engine_id ID of the SDMA engine on which the queue is being created. Only used
  /// if @p type is one of the SDMA queue types.
  /// @param[in] queue_addr Address of the queue's ring buffer.
  /// @param[in] queue_size_bytes Size of the queue's ring buffer in bytes.
  /// @param[in] event HsaEvent for event-driven callbacks.
  /// @param[out] queue_resource Queue resource information populated by the driver.
  virtual hsa_status_t CreateQueue(uint32_t node_id, HSA_QUEUE_TYPE type, uint32_t queue_pct,
                                   HSA_QUEUE_PRIORITY priority, uint32_t sdma_engine_id,
                                   void* queue_addr, uint64_t queue_size_bytes, HsaEvent* event,
                                   HsaQueueResource& queue_resource) const = 0;

  /// @brief Destroy a queue.
  /// @param queue_id Kernel-mode driver's assigned queue ID.
  virtual hsa_status_t DestroyQueue(HSA_QUEUEID queue_id) const = 0;

  /// @brief Update a queue's properties.
  /// @param[in] queue_id Kernel-mode driver's assigned queue ID.
  /// @param[in] queue_pct Maximum percentage of a queue's occupancy allowed.
  /// @param[in] priority Queue's priority for scheduling.
  /// @param[in] queue_addr Queue's ring buffer base address.
  /// @param[in] queue_size_bytes Size of the queue's ring buffer in bytes.
  /// @param[in] event HsaEvent for event-driven callbacks.
  virtual hsa_status_t UpdateQueue(HSA_QUEUEID queue_id, uint32_t queue_pct,
                                   HSA_QUEUE_PRIORITY priority, void* queue_addr,
                                   uint64_t queue_size_bytes, HsaEvent* event) const = 0;

  /// @brief Set the CU mask for a queue.
  /// @details This sets the CU bitmask for a queue. The CU mask determines which CUs
  /// a queue's dispatches can target. Currently this is only supported for GPU devices.
  /// @param[in] queue_id Kernel-mode driver's assigned queue ID.
  /// @param[in] cu_mask_count Number of CU bits in the mask.
  /// @param[in] queue_cu_mask New CU mask for the queue.
  virtual hsa_status_t SetQueueCUMask(HSA_QUEUEID queue_id, uint32_t cu_mask_count,
                                      uint32_t* queue_cu_mask) const = 0;

  /// @brief Allocate global wave sync (GWS) resource for a queue. This is only supported for GPUs.
  /// GWS can be used to synchronize wavefronts across the entire GPU device.
  /// @param[in] queue_id Kernel-mode driver's assigned queue ID.
  /// @param[in] num_gws Number of GWS slots.
  /// @param[in] first_gws First GWS slot.
  virtual hsa_status_t AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_gws,
                                     uint32_t* first_gws) const = 0;

  /// @brief Imports memory using dma-buf.
  ///
  /// @param[in] mem virtual address
  /// @param[in] size memory size in bytes
  /// @param[out] dmabuf_fd dma-buf file descriptor
  /// @param[out] offset memory offset in bytes
  virtual hsa_status_t ExportDMABuf(void *mem, size_t size, int *dmabuf_fd,
                                    size_t *offset) = 0;

  /// @brief Imports a memory chunk via dma-buf.
  ///
  /// @param[in] dmabuf_fd dma-buf file descriptor
  /// @param[in] agent agent to import the memory for
  /// @param[out] handle handle to the imported memory
  virtual hsa_status_t ImportDMABuf(int dmabuf_fd, core::Agent &agent,
                                    core::ShareableHandle &handle) = 0;

  /// @brief Maps the memory associated with the handle.
  ///
  /// @param[in] handle handle to the memory object
  /// @param[in] mem virtual address associated with the handle
  /// @param[in] offset memory offset in bytes
  /// @param[in] size memory size in bytes
  /// @param[out] perms new permissions
  virtual hsa_status_t Map(core::ShareableHandle handle, void *mem,
                           size_t offset, size_t size,
                           hsa_access_permission_t perms) = 0;

  /// @brief Unmaps the memory associated with the handle.
  ///
  /// @param[in] handle handle to the memory object
  /// @param[in] mem virtual address associated with the handle
  /// @param[in] offset memory offset in bytes
  /// @param[in] size memory size in bytes
  virtual hsa_status_t Unmap(core::ShareableHandle handle, void *mem,
                             size_t offset, size_t size) = 0;

  /// @brief Releases the object associated with the handle.
  ///
  /// @param[in] handle handle of the object to release
  virtual hsa_status_t
  ReleaseShareableHandle(core::ShareableHandle &handle) = 0;

  /// @brief Acquire a streaming performance monitor on an agent.
  /// @param[in] preferred_node_id Node ID of the preferred agent.
  virtual hsa_status_t SPMAcquire(uint32_t preferred_node_id) const = 0;
  /// @brief Release a streaming performance monitor on an agent.
  /// @param[in] preferred_node_id Node ID of the preferred agent.
  virtual hsa_status_t SPMRelease(uint32_t preferred_node_id) const = 0;
  /// @brief Setup the destination user-mode buffer for streaming performance monitor data.
  /// @param[in] preferred_node_id Node ID of the preferred agent.
  /// @param[in] size_bytes Size of the destination buffer in bytes.
  /// @param[in, out] timeout Timeout in milliseconds.
  /// @param[out] size_copied Size of data copied in bytes.
  /// @param[in] dest_mem_addr Destination address for streaming performance data. Set to NULL to
  /// stop copy on previous buffer.
  /// @param[out] is_spm_data_loss Data was lost if true.
  virtual hsa_status_t SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes,
                                        uint32_t* timeout, uint32_t* size_copied,
                                        void* dest_mem_addr, bool* is_spm_data_loss) const = 0;

  /// @brief Open anonymous file descriptor to enable events and read SMI events.
  /// @param[in] node_id Node ID to receive the SMI event from.
  /// @param[out] fd Anonymous file descriptor.
  /// @retval HSA_STATUS_ERROR_INVALID_AGENT if the agent's driver doesn't support
  /// SMI events.
  virtual hsa_status_t OpenSMI(uint32_t node_id, int* fd) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Sets trap handler and trap buffer to be used for all queues associated
  /// with the specified NodeId within this process context
  /// @param[in] node_id Node ID of the agent
  /// @param[in] base Trap handler base address
  /// @param[in] base_size Trap handler base size
  /// @param[in] buffer_base Trap buffer base address
  /// @param[in] buffer_base_size Trap buffer size
  /// @return HSA_STATUS_SUCCESS if the driver successfully sets the trap handler.
  virtual hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
                                      const void* buffer_base, uint64_t buffer_base_size) const = 0;

  /// @brief Gets the device handle for a specific node.
  /// @param node_id Node ID of the agent
  /// @param device_handle Device handle
  /// @return HSA_STATUS_SUCCESS if the driver successfully returns the device
  virtual hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const = 0;


  /// @brief Gets clock counters for particular Node
  /// @param[in] node_id Node ID of the agent
  /// @param[out] clock_counter Clock counter
  /// @return HSA_STATUS_SUCCESS if the driver successfully returns the clock
  virtual hsa_status_t GetClockCounters(uint32_t node_id,
                                        HsaClockCounters* clock_counter) const = 0;

  /// @brief Get the tile configuration for a specific node.
  ///
  /// @param[in] node_id Node ID of the agent
  /// @param[out] config Pointer to tile configuration
  /// @return HSA_STATUS_SUCCESS if the driver successfully returns the tile configuration.
  virtual hsa_status_t GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const = 0;

  /// @brief Check if the HSA KMT Model is enabled
  /// @param[out] enable True if the model is enabled, false otherwise
  virtual hsa_status_t IsModelEnabled(bool* enable) const = 0;

  /// @brief Gets the wallclock frequency for a specific node.
  /// @param[in] node_id Node ID of the agent
  /// @param[out] frequency Pointer to the wallclock frequency
  /// @return HSA_STATUS_SUCCESS if the wallclock frequency was successfully retrieved, or an error
  /// code.
  virtual hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const = 0;

  /// @brief Allocates scratch memory for the agent.
  /// @param[in] node_id Node ID of the agent
  /// @param[in] size Size of the scratch memory
  /// @param[out] mem Pointer to the scratch memory
  /// @return HSA_STATUS_SUCCESS if scratch memory allocated successfully.
  virtual hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const = 0;

  /// @brief Inquires memory available for allocation as a memory buffer
  /// @param[in] node_id Node ID of the agent
  /// @param[out] available_size Available memory size in bytes
  /// @return HSA_STATUS_SUCCESS if the driver successfully returns the available memory size.
  virtual hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const = 0;

  /// @brief Register memory to GPU
  /// @param[in] ptr Address of memory to be registered
  /// @param[in] size Size of memory
  /// @param[in] mem_flags Flags of memory registering
  /// @return HSA_STATUS_SUCCESS if memory registered successfully.
  virtual hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const = 0;

  /// @brief Unregisters with a memory
  /// @param[in] ptr Pointer of memory
  /// @return HSA_STATUS_SUCCESS if deregister memory successfully.
  virtual hsa_status_t DeregisterMemory(void* ptr) const = 0;

  /// @brief Make the memory is resident and can be accessed by GPU
  /// @param[in] mem address of memory to be made resident
  /// @param[in] size size of memory
  /// @param[out] alternate_va alternate virtual address
  /// @param[in] mem_flags memory flags can be null
  /// @param[in] num_nodes number of nodes to be used can be 0 if not used
  /// @param[in] nodes nodes to be used can be null
  /// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory
  virtual hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
                                          const HsaMemMapFlags* mem_flags = nullptr,
                                          uint32_t num_nodes = 0,
                                          const uint32_t* nodes = nullptr) const = 0;

  /// @brief Releases the residency of the memory
  /// @param[in] mem address of memory to be made unresident
  /// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory
  virtual hsa_status_t MakeMemoryUnresident(const void* mem) const = 0;

  /// @brief Shares memory with another process.
  /// @param[in] mem Pointer to the memory to be shared.
  /// @param[in] size Size of the memory to be shared.
  /// @param[out] share_mem Pointer to the shared memory handle.
  /// @return HSA_STATUS_SUCCESS if the memory was successfully shared, or an error code.
  virtual hsa_status_t ShareMemory(void* mem, size_t size, HsaSharedMemoryHandle* share_mem) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Registers a shared memory handle.
  /// @param[in] share_mem Pointer to the shared memory handle.
  /// @param[out] mem Pointer to the memory.
  /// @param[out] size Size of the memory.
  /// @return HSA_STATUS_SUCCESS if the memory was successfully registered, or an error code.
  virtual hsa_status_t RegisterSharedHandle(const HsaSharedMemoryHandle* share_mem, void** mem,
                                            uint64_t* size) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Replaces the ASAN header page with a valid one.
  /// @param[in] mem Pointer to the memory to be replaced.
  /// @return HSA_STATUS_SUCCESS if the ASAN header page was successfully replaced, or an error
  /// code.
  virtual hsa_status_t ReplaceAsanHeaderPage(void* mem) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Returns the ASAN header page to its original state.
  /// @param[in] mem Pointer to the memory to be returned.
  /// @return HSA_STATUS_SUCCESS if the ASAN header page was successfully returned, or an error
  /// code.
  virtual hsa_status_t ReturnAsanHeaderPage(void* mem) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Queries the PC sampling capabilities.
  /// @param[in] node_id Node ID of the agent
  /// @param[in] sample_info Pointer to the sample information
  /// @param[in] sample_info_sz Size of the sample information
  /// @param[out] sz_needed Size of the sample information needed
  /// @return HSA_STATUS_SUCCESS if the PC sampling capabilities were successfully queried, or an
  /// error code.
  virtual hsa_status_t PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info,
                                                   uint32_t sample_info_sz,
                                                   uint32_t* sz_needed) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Creates a PC sampling session.
  /// @param[in] node_id Node ID of the agent
  /// @param[in] sample_info Pointer to the sample information
  /// @param[out] trace_id Pointer to the trace ID
  /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully created, or an error
  /// code.
  virtual hsa_status_t PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info,
                                        uint32_t* trace_id) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Destroys a PC sampling session.
  /// @param[in] node_id Node ID of the agent
  /// @param[in] trace_id Trace ID of the PC sampling session
  /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully destroyed, or an error
  /// code.
  virtual hsa_status_t PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Starts a PC sampling session.
  /// @param[in] node_id Node ID of the agent
  /// @param[in] trace_id Trace ID of the PC sampling session
  /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully started, or an error
  /// code.
  virtual hsa_status_t PcSamplingStart(uint32_t node_id, uint32_t trace_id) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// @brief Stops a PC sampling session.
  /// @param[in] node_id Node ID of the agent
  /// @param[in] trace_id Trace ID of the PC sampling session
  /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully stopped, or an error
  /// code.
  virtual hsa_status_t PcSamplingStop(uint32_t node_id, uint32_t trace_id) const {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  /// Unique identifier for supported kernel-mode drivers.
  const DriverType kernel_driver_type_;

protected:
 HsaVersionInfo version_{std::numeric_limits<uint32_t>::max(),
                         std::numeric_limits<uint32_t>::max()};

 const std::string devnode_name_;
 int fd_ = -1;
};

} // namespace core
} // namespace rocr

#endif // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/exceptions.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_EXCEPTIONS_H
#define HSA_RUNTIME_CORE_INC_EXCEPTIONS_H

#include <exception>
#include <string>

#include "core/inc/hsa_internal.h"

namespace rocr {
namespace AMD {

/// @brief Exception type which carries an error code to return to the user.
class hsa_exception : public std::exception {
 public:
  hsa_exception(hsa_status_t error, const char* description) : err_(error), desc_(description) {}
  hsa_status_t error_code() const noexcept { return err_; }
  const char* what() const noexcept override { return desc_.c_str(); }

 private:
  hsa_status_t err_;
  std::string desc_;
};

/// @brief Holds and invokes callbacks, capturing any execptions and forwarding those to the user
/// after unwinding the runtime stack.
template <class F> class callback_t;
template <class R, class... Args> class callback_t<R (*)(Args...)> {
 public:
  typedef R (*func_t)(Args...);

  callback_t() : function(nullptr) {}

  // Should not be marked explicit.
  callback_t(func_t function_ptr) : function(function_ptr) {}
  callback_t& operator=(func_t function_ptr) { function = function_ptr; return *this; }

  bool operator==(func_t function_ptr) { return function == function_ptr; }
  bool operator!=(func_t function_ptr) { return function != function_ptr; }

  // Allows common function pointer idioms, such as if( func != nullptr )...
  // without allowing silent reversion to the original function pointer type.
  operator void*() { return reinterpret_cast<void*>(function); }

  R operator()(Args... args) {
    try {
      return function(args...);
    } catch (...) {
      throw std::nested_exception();
    }
  }

 private:
  func_t function;
};

}  // namespace amd
}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_INC_EXCEPTIONS_H


================================================
FILE: runtime/hsa-runtime/core/inc/host_queue.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_
#define HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_

#include "core/inc/memory_region.h"
#include "core/inc/queue.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"

namespace rocr {
namespace core {
class HostQueue : public Queue {
 public:
  static __forceinline bool IsType(core::Queue* queue) { return queue->IsType(&rtti_id()); }

  HostQueue(core::SharedQueue* shared_queue, hsa_region_t region, uint32_t ring_size,
            hsa_queue_type32_t type, uint32_t features, hsa_signal_t doorbell_signal);

  ~HostQueue();

  hsa_status_t Inactivate() override { return HSA_STATUS_SUCCESS; }
  hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override {
    return HSA_STATUS_ERROR_INVALID_QUEUE;
  }

  uint64_t LoadReadIndexAcquire() override {
    return atomic::Load(&amd_queue_.read_dispatch_id,
                        std::memory_order_acquire);
  }

  uint64_t LoadReadIndexRelaxed() override {
    return atomic::Load(&amd_queue_.read_dispatch_id,
                        std::memory_order_relaxed);
  }

  uint64_t LoadWriteIndexAcquire() override {
    return atomic::Load(&amd_queue_.write_dispatch_id,
                        std::memory_order_acquire);
  }

  uint64_t LoadWriteIndexRelaxed() override {
    return atomic::Load(&amd_queue_.write_dispatch_id,
                        std::memory_order_relaxed);
  }

  void StoreReadIndexRelaxed(uint64_t value) override {
    atomic::Store(&amd_queue_.read_dispatch_id, value,
                  std::memory_order_relaxed);
  }

  void StoreReadIndexRelease(uint64_t value) override {
    atomic::Store(&amd_queue_.read_dispatch_id, value,
                  std::memory_order_release);
  }

  void StoreWriteIndexRelaxed(uint64_t value) override {
    atomic::Store(&amd_queue_.write_dispatch_id, value,
                  std::memory_order_relaxed);
  }

  void StoreWriteIndexRelease(uint64_t value) override {
    atomic::Store(&amd_queue_.write_dispatch_id, value,
                  std::memory_order_release);
  }

  uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                       std::memory_order_acq_rel);
  }

  uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                       std::memory_order_acquire);
  }

  uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                       std::memory_order_relaxed);
  }

  uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                       std::memory_order_release);
  }

  uint64_t AddWriteIndexAcqRel(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value,
                       std::memory_order_acq_rel);
  }

  uint64_t AddWriteIndexAcquire(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value,
                       std::memory_order_acquire);
  }

  uint64_t AddWriteIndexRelaxed(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value,
                       std::memory_order_relaxed);
  }

  uint64_t AddWriteIndexRelease(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value,
                       std::memory_order_release);
  }

  hsa_status_t SetCUMasking(uint32_t num_cu_mask_count, const uint32_t* cu_mask) override {
    return HSA_STATUS_ERROR_INVALID_QUEUE;
  }

  hsa_status_t GetCUMasking(uint32_t num_cu_mask_count, uint32_t* cu_mask) override {
    return HSA_STATUS_ERROR_INVALID_QUEUE;
  }

  void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b,
                  hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE,
                  hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
                  hsa_signal_t* signal = NULL) override {
    assert(false && "HostQueue::ExecutePM4 is unimplemented");
  }

  hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute, void* value) override {
    assert(false && "HostQueue::GetInfo is unimplemented");
    return HSA_STATUS_ERROR_INVALID_QUEUE;
  }

  void* operator new(size_t size) {
    return _aligned_malloc(AlignUp(size, HSA_QUEUE_ALIGN_BYTES), HSA_QUEUE_ALIGN_BYTES);
  }

  void* operator new(size_t size, void* ptr) { return ptr; }

  void operator delete(void* ptr) { _aligned_free(ptr); }

  void operator delete(void*, void*) {}

 protected:
  bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id(); }

 private:
  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
    return rtti_id_;
  }
  static const size_t kRingAlignment = 256;
  const uint32_t size_;
  void* ring_;

  // Host queue id counter, starting from 0x80000000 to avoid overlaping
  // with aql queue id.
  static __forceinline std::atomic<uint32_t>& queue_count() {
    // This allocation is meant to last until the last thread has exited.
    // It is intentionally not freed.
    static std::atomic<uint32_t>* queue_count_ = new std::atomic<uint32_t>();
    return *queue_count_;
  }

  DISALLOW_COPY_AND_ASSIGN(HostQueue);
};
}  // namespace core
}  // namespace rocr
#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/hsa_amd_tool_int.hpp
================================================
#ifndef HSA_RUNTIME_INC_HSA_TOOL_HOOK_IMPL_H
#define HSA_RUNTIME_INC_HSA_TOOL_HOOK_IMPL_H

#include "inc/hsa_amd_tool.h"
#include "runtime.h"

// namespace rocr::AMD::tool {  // C++17
namespace rocr { namespace AMD { namespace tool {

using scratch_alloc_flag = hsa_amd_event_scratch_alloc_flag_t;

__forceinline void notify_event_scratch_alloc_start(const hsa_queue_t* queue,
                                                    scratch_alloc_flag flag, uint64_t dispatch_id);

__forceinline void notify_event_scratch_alloc_end(const hsa_queue_t* queue, scratch_alloc_flag flag,
                                                  uint64_t dispatch_id, size_t size,
                                                  size_t num_slots);

__forceinline void notify_event_scratch_free_start(const hsa_queue_t* queue,
                                                   scratch_alloc_flag flag);

__forceinline void notify_event_scratch_free_end(const hsa_queue_t* queue, scratch_alloc_flag flag);

__forceinline void notify_event_scratch_async_reclaim_start(const hsa_queue_t* queue,
                                                            scratch_alloc_flag flag);

__forceinline void notify_event_scratch_async_reclaim_end(const hsa_queue_t* queue,
                                                          scratch_alloc_flag flag);


// Impl

__forceinline void notify_event_scratch_alloc_start(const hsa_queue_t* queue,
                                                    scratch_alloc_flag flags,
                                                    uint64_t dispatch_id) {
  const auto& tool_table = core::hsa_api_table().tools_api;
  if (!tool_table.hsa_amd_tool_scratch_event_alloc_start_fn) {
    return;
  }

  auto event = hsa_amd_event_scratch_alloc_start_t{.kind = HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_START,
                                                   .queue = queue,
                                                   .flags = flags,
                                                   .dispatch_id = dispatch_id};

  tool_table.hsa_amd_tool_scratch_event_alloc_start_fn(
      hsa_amd_tool_event_t{.scratch_alloc_start = &event});
}

__forceinline void notify_event_scratch_alloc_end(const hsa_queue_t* queue,
                                                  scratch_alloc_flag flags, uint64_t dispatch_id,
                                                  size_t size, size_t num_slots) {
  const auto& tool_table = core::hsa_api_table().tools_api;
  if (!tool_table.hsa_amd_tool_scratch_event_alloc_end_fn) {
    return;
  }

  auto event = hsa_amd_event_scratch_alloc_end_t{
      .kind = HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_END,
      .queue = queue,
      .flags = flags,
      .dispatch_id = dispatch_id,
      .size = size,
      .num_slots = num_slots,
  };

  tool_table.hsa_amd_tool_scratch_event_alloc_end_fn(
      hsa_amd_tool_event_t{.scratch_alloc_end = &event});
}

__forceinline void notify_event_scratch_free_start(const hsa_queue_t* queue,
                                                   scratch_alloc_flag flags) {
  const auto& tool_table = core::hsa_api_table().tools_api;
  if (!tool_table.hsa_amd_tool_scratch_event_free_start_fn) {
    return;
  }

  auto event = hsa_amd_event_scratch_free_start_t{
      .kind = HSA_AMD_TOOL_EVENT_SCRATCH_FREE_START,
      .queue = queue,
      .flags = flags,
  };

  tool_table.hsa_amd_tool_scratch_event_free_start_fn(
      hsa_amd_tool_event_t{.scratch_free_start = &event});
}

__forceinline void notify_event_scratch_free_end(const hsa_queue_t* queue,
                                                 scratch_alloc_flag flags) {
  const auto& tool_table = core::hsa_api_table().tools_api;
  if (!tool_table.hsa_amd_tool_scratch_event_free_end_fn) {
    return;
  }

  auto event = hsa_amd_event_scratch_free_end_t{
      .kind = HSA_AMD_TOOL_EVENT_SCRATCH_FREE_END,
      .queue = queue,
      .flags = flags,
  };

  tool_table.hsa_amd_tool_scratch_event_free_end_fn(
      hsa_amd_tool_event_t{.scratch_free_end = &event});
}

__forceinline void notify_event_scratch_async_reclaim_start(const hsa_queue_t* queue,
                                                            scratch_alloc_flag flags) {
  const auto& tool_table = core::hsa_api_table().tools_api;
  if (!tool_table.hsa_amd_tool_scratch_event_async_reclaim_start_fn) {
    return;
  }

  auto event = hsa_amd_event_scratch_async_reclaim_start_t{
      .kind = HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_START,
      .queue = queue,
      .flags = flags,
  };

  tool_table.hsa_amd_tool_scratch_event_async_reclaim_start_fn(
      hsa_amd_tool_event_t{.scratch_async_reclaim_start = &event});
}

__forceinline void notify_event_scratch_async_reclaim_end(const hsa_queue_t* queue,
                                                          scratch_alloc_flag flags) {
  const auto& tool_table = core::hsa_api_table().tools_api;
  if (!tool_table.hsa_amd_tool_scratch_event_async_reclaim_end_fn) {
    return;
  }

  auto event = hsa_amd_event_scratch_async_reclaim_end_t{
      .kind = HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_END,
      .queue = queue,
      .flags = flags,
  };

  tool_table.hsa_amd_tool_scratch_event_async_reclaim_end_fn(
      hsa_amd_tool_event_t{.scratch_async_reclaim_end = &event});
}

// }  // namespace rocr::AMD::tool
}  // namespace rocr
}  // namespace AMD
}  // namespace tool

#endif

================================================
FILE: runtime/hsa-runtime/core/inc/hsa_api_trace_int.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H
#define HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H

#include "inc/hsa_api_trace.h"
#include "core/inc/hsa_internal.h"

namespace rocr {
namespace core {
  struct HsaApiTable {

    static const uint32_t HSA_EXT_FINALIZER_API_TABLE_ID = 0;
    static const uint32_t HSA_EXT_IMAGE_API_TABLE_ID = 1;
    static const uint32_t HSA_EXT_AQLPROFILE_API_TABLE_ID = 2;
    static const uint32_t HSA_EXT_PC_SAMPLING_API_TABLE_ID = 3;

    ::HsaApiTable hsa_api;
    ::CoreApiTable core_api;
    ::AmdExtTable amd_ext_api;
    ::FinalizerExtTable finalizer_api;
    ::ImageExtTable image_api;
    ::ToolsApiTable tools_api;
    ::PcSamplingExtTable pcs_api;

    HsaApiTable();
    void Init();
    void UpdateCore();
    void UpdateAmdExts();
    void UpdateTools();
    void CloneExts(void* ptr, uint32_t table_id);
    void LinkExts(void* ptr, uint32_t table_id);
    void Reset();
  };

  extern HsaApiTable& hsa_api_table();
  extern HsaApiTable& hsa_internal_api_table();

  void LoadInitialHsaApiTable();
}   //  namespace core
}   //  namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA AMD extension.

#ifndef HSA_RUNTIME_CORE_INC_EXT_AMD_H_
#define HSA_RUNTIME_CORE_INC_EXT_AMD_H_

#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"
#include "inc/hsa_ext_amd.h"

// Wrap internal implementation inside AMD namespace
namespace rocr {
namespace AMD {

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_coherency_get_type(hsa_agent_t agent,
                                                hsa_amd_coherency_type_t* type);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_coherency_set_type(hsa_agent_t agent,
                                                hsa_amd_coherency_type_t type);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_profiling_async_copy_enable(bool enable);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_profiling_get_dispatch_time(
    hsa_agent_t agent, hsa_signal_t signal,
    hsa_amd_profiling_dispatch_time_t* time);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_profiling_get_async_copy_time(
    hsa_signal_t signal, hsa_amd_profiling_async_copy_time_t* time);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent,
                                                    uint64_t agent_tick,
                                                    uint64_t* system_tick);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_signal_async_handler(hsa_signal_t signal,
                                 hsa_signal_condition_t cond,
                                 hsa_signal_value_t value,
                                 hsa_amd_signal_handler handler, void* arg);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_async_function(void (*callback)(void* arg), void* arg);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
                                           const hsa_agent_t* consumers, uint64_t attributes,
                                           hsa_signal_t* signal);

// Mirrors Amd Extension Apis
uint32_t hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* signals,
                                 hsa_signal_condition_t* conds, hsa_signal_value_t* values,
                                 uint64_t timeout_hint, hsa_wait_state_t wait_hint,
                                 hsa_signal_value_t* satisfying_values);

// Mirrors Amd Extension Apis
uint32_t
    hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals,
                            hsa_signal_condition_t* conds,
                            hsa_signal_value_t* values, uint64_t timeout_hint,
                            hsa_wait_state_t wait_hint,
                            hsa_signal_value_t* satisfying_value);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
                                               uint32_t num_cu_mask_count,
                                               const uint32_t* cu_mask);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_queue_cu_get_mask(const hsa_queue_t* queue, uint32_t num_cu_mask_count,
                                               uint32_t* cu_mask);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
                                 hsa_amd_memory_pool_info_t attribute,
                                 void* value);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_agent_iterate_memory_pools(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data),
    void* data);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size,
                                 uint32_t flags, void** ptr);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_memory_pool_free(void* ptr);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src,
                              hsa_agent_t src_agent, size_t size,
                              uint32_t num_dep_signals,
                              const hsa_signal_t* dep_signals,
                              hsa_signal_t completion_signal);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agent, const void* src,
                              hsa_agent_t src_agent, size_t size,
                              uint32_t num_dep_signals,
                              const hsa_signal_t* dep_signals,
                              hsa_signal_t completion_signal,
                              hsa_amd_sdma_engine_id_t engine_id,
                              bool force_copy_on_sdma);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
                                      uint32_t *engine_ids_mask);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
                                             uint32_t* recommended_ids_mask);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_memory_async_copy_rect(
    const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
    const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
    hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
    hsa_signal_t completion_signal);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_agent_memory_pool_get_info(
    hsa_agent_t agent, hsa_amd_memory_pool_t memory_pool,
    hsa_amd_agent_memory_pool_info_t attribute, void* value);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents,
                                const uint32_t* flags, const void* ptr);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool,
                                    hsa_amd_memory_pool_t dst_memory_pool,
                                    bool* result);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_memory_migrate(const void* ptr,
                                            hsa_amd_memory_pool_t memory_pool,
                                            uint32_t flags);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_memory_lock(void* host_ptr, size_t size,
                                         hsa_agent_t* agents, int num_agent,
                                         void** agent_ptr);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
                                                 int num_agent, hsa_amd_memory_pool_t pool,
                                                 uint32_t flags, void** agent_ptr);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_memory_unlock(void* host_ptr);

// Mirrors Amd Extension Apis
hsa_status_t
    hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_interop_map_buffer(uint32_t num_agents,
                                        hsa_agent_t* agents,
                                        int interop_handle,
                                        uint32_t flags,
                                        size_t* size,
                                        void** ptr,
                                        size_t* metadata_size,
                                        const void** metadata);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_interop_unmap_buffer(void* ptr);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info(const void* ptr, hsa_amd_pointer_info_t* info,
                                          void* (*alloc)(size_t), uint32_t* num_agents_accessible,
                                          hsa_agent_t** accessible);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_pointer_info_set_userdata(const void* ptr, void* userdata);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_memory_create(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t* handle, size_t len,
                                               uint32_t num_agents,
                                               const hsa_agent_t* mapping_agents,
                                               void** mapped_ptr);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_memory_detach(void* mapped_ptr);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_signal_create(hsa_signal_t signal, hsa_amd_ipc_signal_t* handle);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle,
                                               hsa_signal_t* signal);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_register_system_event_handler(hsa_amd_system_event_callback_t callback,
                                                           void* data);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_queue_set_priority(hsa_queue_t* queue,
                                                hsa_amd_queue_priority_t priority);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_register_deallocation_callback(
    void* ptr, hsa_amd_deallocation_callback_t callback, void* user_data);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_deregister_deallocation_callback(
    void* ptr, hsa_amd_deallocation_callback_t callback);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t signal,
                                          volatile hsa_signal_value_t** value_ptr);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_set(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_attributes_get(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
                                        uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
                                        hsa_signal_t completion_signal);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_acquire(hsa_agent_t agent);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_release(hsa_agent_t agent);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, uint32_t* timeout,
                                                 uint32_t* size_copied, void* dest,
                                                 bool* is_data_loss);

hsa_status_t HSA_API hsa_amd_portable_export_dmabuf_v2(const void* ptr,
                  size_t size, int* dmabuf, uint64_t* offset, uint64_t flags);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
                                                    uint64_t* offset);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_address_reserve(void** ptr, size_t size, uint64_t address,
                                          uint64_t flags);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_address_reserve_align(void** ptr, size_t size, uint64_t address,
                                          uint64_t alignment, uint64_t flags);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_address_free(void* ptr, size_t size);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_handle_create(hsa_amd_memory_pool_t pool, size_t size,
                                        hsa_amd_memory_type_t type, uint64_t flags,
                                        hsa_amd_vmem_alloc_handle_t* memory_handle);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_handle_release(hsa_amd_vmem_alloc_handle_t memory_handle);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_map(void* va, size_t size, size_t in_offset,
                              hsa_amd_vmem_alloc_handle_t memory_handle, uint64_t flags);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_unmap(void* va, size_t size);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_set_access(void* va, size_t size,
                                     const hsa_amd_memory_access_desc_t* desc,
                                     const size_t desc_cnt);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_get_access(void* va, hsa_access_permission_t* flags,
                                     const hsa_agent_t agent_handle);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_export_shareable_handle(int* dmabuf_fd,
                                                  hsa_amd_vmem_alloc_handle_t handle,
                                                  uint64_t flags);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_import_shareable_handle(int dmabuf_fd,
                                                  hsa_amd_vmem_alloc_handle_t* handle);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_retain_alloc_handle(hsa_amd_vmem_alloc_handle_t* allocHandle, void* addr);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_vmem_get_alloc_properties_from_handle(hsa_amd_vmem_alloc_handle_t allocHandle,
                                                           hsa_amd_memory_pool_t* pool,
                                                           hsa_amd_memory_type_t* type);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t agent, size_t threshold);

// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_queue_get_info(hsa_queue_t* queue, hsa_queue_info_attribute_t attribute,
                                    void* value);

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_enable_logging(uint8_t* flags, void* file);

}  // namespace amd
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/hsa_ext_interface.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_
#define HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_

#include <string>
#include <vector>

#include "core/inc/hsa_api_trace_int.h"

#include "core/util/os.h"
#include "core/util/utils.h"

namespace rocr {
namespace core {
struct ImageExtTableInternal : public ImageExtTable {
  decltype(::hsa_amd_image_get_info_max_dim)* hsa_amd_image_get_info_max_dim_fn;
};

struct PcSamplingExtTableInternal : public PcSamplingExtTable {};

class ExtensionEntryPoints {
 public:

  // Table of function pointers for Hsa Extension Image
  ImageExtTableInternal image_api;

  // Table of function pointers for Hsa vendor PC Sampling
  PcSamplingExtTableInternal pcs_api;

  // Table of function pointers for Hsa Extension Finalizer
  FinalizerExtTable finalizer_api;

  ExtensionEntryPoints();

  bool LoadFinalizer(std::string library_name);
  void Unload();

  // Update Image Api table with handles to implementation
  bool LoadImage();

  // Reset Api tables to point to null implementations
  void UnloadImage();

  // Update PC Sampling Api table with handles to implementation
  void LoadPcSampling();

  // Reset PC Sampling tables to point to null implementations
  void UnloadPcSampling();

 private:
  typedef void (*Load_t)(const ::HsaApiTable* table);
  typedef void (*Unload_t)();

  std::vector<os::LibHandle> libs_;

  // Initialize table for HSA Finalizer Extension Api's
  void InitFinalizerExtTable();

  // Initialize table for HSA Image Extension Api's
  void InitImageExtTable();

  // Initialize table for HSA PC Sampling Extension Api's
  void InitPcSamplingExtTable();

  // Initialize Amd Ext table for Api related to Images
  void InitAmdExtTable();

  // Update Amd Ext table for Api related to Images
  void UpdateAmdExtTable(decltype(::hsa_amd_image_create)* func_ptr);

  DISALLOW_COPY_AND_ASSIGN(ExtensionEntryPoints);
};
}   //  namespace core
}   //  namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/core/inc/hsa_internal.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_HSA_INTERNAL_H
#define HSA_RUNTIME_CORE_INC_HSA_INTERNAL_H

#include "inc/hsa.h"

namespace rocr {
namespace HSA {

  // Define core namespace interfaces - copy of function declarations in hsa.h
  hsa_status_t hsa_init();
  hsa_status_t hsa_shut_down();
  hsa_status_t
    hsa_system_get_info(hsa_system_info_t attribute, void *value);
  hsa_status_t hsa_extension_get_name(uint16_t extension, const char** name);
  hsa_status_t hsa_system_extension_supported(uint16_t extension, uint16_t version_major,
                                                      uint16_t version_minor, bool* result);
  hsa_status_t hsa_system_major_extension_supported(uint16_t extension,
                                                            uint16_t version_major,
                                                            uint16_t* version_minor, bool* result);
  hsa_status_t
    hsa_system_get_extension_table(uint16_t extension, uint16_t version_major,
    uint16_t version_minor, void *table);
  hsa_status_t hsa_system_get_major_extension_table(uint16_t extension,
                                                            uint16_t version_major,
                                                            size_t table_length, void* table);
  hsa_status_t
    hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void *data),
    void *data);
  hsa_status_t hsa_agent_get_info(hsa_agent_t agent,
    hsa_agent_info_t attribute,
    void *value);
  hsa_status_t hsa_agent_get_exception_policies(hsa_agent_t agent,
    hsa_profile_t profile,
    uint16_t *mask);
  hsa_status_t hsa_cache_get_info(hsa_cache_t cache, hsa_cache_info_t attribute,
                                          void* value);
  hsa_status_t hsa_agent_iterate_caches(
      hsa_agent_t agent, hsa_status_t (*callback)(hsa_cache_t cache, void* data), void* value);
  hsa_status_t
    hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent,
    uint16_t version_major,
    uint16_t version_minor, bool *result);
  hsa_status_t hsa_agent_major_extension_supported(uint16_t extension, hsa_agent_t agent,
                                                           uint16_t version_major,
                                                           uint16_t* version_minor, bool* result);
  hsa_status_t
    hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
    void (*callback)(hsa_status_t status, hsa_queue_t *source,
    void *data),
    void *data, uint32_t private_segment_size,
    uint32_t group_segment_size, hsa_queue_t **queue);
  hsa_status_t
    hsa_soft_queue_create(hsa_region_t region, uint32_t size,
    hsa_queue_type32_t type, uint32_t features,
    hsa_signal_t completion_signal, hsa_queue_t **queue);
  hsa_status_t hsa_queue_destroy(hsa_queue_t *queue);
  hsa_status_t hsa_queue_inactivate(hsa_queue_t *queue);
  uint64_t hsa_queue_load_read_index_scacquire(const hsa_queue_t* queue);
  uint64_t hsa_queue_load_read_index_relaxed(const hsa_queue_t *queue);
  uint64_t hsa_queue_load_write_index_scacquire(const hsa_queue_t* queue);
  uint64_t hsa_queue_load_write_index_relaxed(const hsa_queue_t *queue);
  void hsa_queue_store_write_index_relaxed(const hsa_queue_t *queue,
    uint64_t value);
  void hsa_queue_store_write_index_screlease(const hsa_queue_t* queue, uint64_t value);
  uint64_t hsa_queue_cas_write_index_scacq_screl(const hsa_queue_t* queue,
                                                         uint64_t expected, uint64_t value);
  uint64_t hsa_queue_cas_write_index_scacquire(const hsa_queue_t* queue, uint64_t expected,
                                                       uint64_t value);
  uint64_t hsa_queue_cas_write_index_relaxed(const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);
  uint64_t hsa_queue_cas_write_index_screlease(const hsa_queue_t* queue, uint64_t expected,
                                                       uint64_t value);
  uint64_t hsa_queue_add_write_index_scacq_screl(const hsa_queue_t* queue, uint64_t value);
  uint64_t hsa_queue_add_write_index_scacquire(const hsa_queue_t* queue, uint64_t value);
  uint64_t
    hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue, uint64_t value);
  uint64_t hsa_queue_add_write_index_screlease(const hsa_queue_t* queue, uint64_t value);
  void hsa_queue_store_read_index_relaxed(const hsa_queue_t *queue,
    uint64_t value);
  void hsa_queue_store_read_index_screlease(const hsa_queue_t* queue, uint64_t value);
  hsa_status_t hsa_agent_iterate_regions(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
  hsa_status_t hsa_region_get_info(hsa_region_t region,
    hsa_region_info_t attribute,
    void *value);
  hsa_status_t hsa_memory_register(void *address, size_t size);
  hsa_status_t hsa_memory_deregister(void *address, size_t size);
  hsa_status_t
    hsa_memory_allocate(hsa_region_t region, size_t size, void **ptr);
  hsa_status_t hsa_memory_free(void *ptr);
  hsa_status_t hsa_memory_copy(void *dst, const void *src, size_t size);
  hsa_status_t hsa_memory_assign_agent(void *ptr, hsa_agent_t agent,
    hsa_access_permission_t access);
  hsa_status_t
    hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
    const hsa_agent_t *consumers, hsa_signal_t *signal);
  hsa_status_t hsa_signal_destroy(hsa_signal_t signal);
  hsa_signal_value_t hsa_signal_load_relaxed(hsa_signal_t signal);
  hsa_signal_value_t hsa_signal_load_scacquire(hsa_signal_t signal);
  void
    hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_store_screlease(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_silent_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_silent_store_screlease(hsa_signal_t signal, hsa_signal_value_t value);
  hsa_signal_value_t
    hsa_signal_wait_relaxed(hsa_signal_t signal,
    hsa_signal_condition_t condition,
    hsa_signal_value_t compare_value,
    uint64_t timeout_hint,
    hsa_wait_state_t wait_expectancy_hint);
  hsa_signal_value_t hsa_signal_wait_scacquire(hsa_signal_t signal,
                                                       hsa_signal_condition_t condition,
                                                       hsa_signal_value_t compare_value,
                                                       uint64_t timeout_hint,
                                                       hsa_wait_state_t wait_expectancy_hint);
  hsa_status_t hsa_signal_group_create(uint32_t num_signals, const hsa_signal_t* signals,
                                               uint32_t num_consumers, const hsa_agent_t* consumers,
                                               hsa_signal_group_t* signal_group);
  hsa_status_t hsa_signal_group_destroy(hsa_signal_group_t signal_group);
  hsa_status_t hsa_signal_group_wait_any_scacquire(hsa_signal_group_t signal_group,
                                                           const hsa_signal_condition_t* conditions,
                                                           const hsa_signal_value_t* compare_values,
                                                           hsa_wait_state_t wait_state_hint,
                                                           hsa_signal_t* signal,
                                                           hsa_signal_value_t* value);
  hsa_status_t hsa_signal_group_wait_any_relaxed(hsa_signal_group_t signal_group,
                                                         const hsa_signal_condition_t* conditions,
                                                         const hsa_signal_value_t* compare_values,
                                                         hsa_wait_state_t wait_state_hint,
                                                         hsa_signal_t* signal,
                                                         hsa_signal_value_t* value);
  void
    hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_and_scacquire(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_and_screlease(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_and_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value);
  void
    hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_or_scacquire(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_or_screlease(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_or_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value);
  void
    hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_xor_scacquire(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_xor_screlease(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_xor_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value);
  void
    hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_add_scacquire(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_add_screlease(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_add_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value);
  void
    hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_subtract_scacquire(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_subtract_screlease(hsa_signal_t signal, hsa_signal_value_t value);
  void hsa_signal_subtract_scacq_screl(hsa_signal_t signal, hsa_signal_value_t value);
  hsa_signal_value_t
    hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
  hsa_signal_value_t hsa_signal_exchange_scacquire(hsa_signal_t signal,
                                                           hsa_signal_value_t value);
  hsa_signal_value_t hsa_signal_exchange_screlease(hsa_signal_t signal,
                                                           hsa_signal_value_t value);
  hsa_signal_value_t hsa_signal_exchange_scacq_screl(hsa_signal_t signal,
                                                             hsa_signal_value_t value);
  hsa_signal_value_t hsa_signal_cas_relaxed(hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);
  hsa_signal_value_t hsa_signal_cas_scacquire(hsa_signal_t signal,
                                                      hsa_signal_value_t expected,
                                                      hsa_signal_value_t value);
  hsa_signal_value_t hsa_signal_cas_screlease(hsa_signal_t signal,
                                                      hsa_signal_value_t expected,
                                                      hsa_signal_value_t value);
  hsa_signal_value_t hsa_signal_cas_scacq_screl(hsa_signal_t signal,
                                                        hsa_signal_value_t expected,
                                                        hsa_signal_value_t value);

  //===--- Instruction Set Architecture -----------------------------------===//

  hsa_status_t hsa_isa_from_name(
      const char *name,
      hsa_isa_t *isa);
  hsa_status_t hsa_agent_iterate_isas(
      hsa_agent_t agent,
      hsa_status_t (*callback)(hsa_isa_t isa,
                               void *data),
      void *data);
  /* deprecated */ hsa_status_t hsa_isa_get_info(
      hsa_isa_t isa,
      hsa_isa_info_t attribute,
      uint32_t index,
      void *value);
  hsa_status_t hsa_isa_get_info_alt(
      hsa_isa_t isa,
      hsa_isa_info_t attribute,
      void *value);
  hsa_status_t hsa_isa_get_exception_policies(
      hsa_isa_t isa,
      hsa_profile_t profile,
      uint16_t *mask);
  hsa_status_t hsa_isa_get_round_method(
      hsa_isa_t isa,
      hsa_fp_type_t fp_type,
      hsa_flush_mode_t flush_mode,
      hsa_round_method_t *round_method);
  hsa_status_t hsa_wavefront_get_info(
      hsa_wavefront_t wavefront,
      hsa_wavefront_info_t attribute,
      void *value);
  hsa_status_t hsa_isa_iterate_wavefronts(
      hsa_isa_t isa,
      hsa_status_t (*callback)(hsa_wavefront_t wavefront,
                               void *data),
      void *data);
  /* deprecated */ hsa_status_t hsa_isa_compatible(
      hsa_isa_t code_object_isa,
      hsa_isa_t agent_isa,
      bool *result);

  //===--- Code Objects (deprecated) --------------------------------------===//

  /* deprecated */ hsa_status_t hsa_code_object_serialize(
      hsa_code_object_t code_object,
      hsa_status_t (*alloc_callback)(size_t size,
                                     hsa_callback_data_t data,
                                     void **address),
      hsa_callback_data_t callback_data,
      const char *options,
      void **serialized_code_object,
      size_t *serialized_code_object_size);
  /* deprecated */ hsa_status_t hsa_code_object_deserialize(
      void *serialized_code_object,
      size_t serialized_code_object_size,
      const char *options,
      hsa_code_object_t *code_object);
  /* deprecated */ hsa_status_t hsa_code_object_destroy(
      hsa_code_object_t code_object);
  /* deprecated */ hsa_status_t hsa_code_object_get_info(
      hsa_code_object_t code_object,
      hsa_code_object_info_t attribute,
      void *value);
  /* deprecated */ hsa_status_t hsa_code_object_get_symbol(
      hsa_code_object_t code_object,
      const char *symbol_name,
      hsa_code_symbol_t *symbol);
  /* deprecated */ hsa_status_t hsa_code_object_get_symbol_from_name(
      hsa_code_object_t code_object,
      const char *module_name,
      const char *symbol_name,
      hsa_code_symbol_t *symbol);
  /* deprecated */ hsa_status_t hsa_code_symbol_get_info(
      hsa_code_symbol_t code_symbol,
      hsa_code_symbol_info_t attribute,
      void *value);
  /* deprecated */ hsa_status_t hsa_code_object_iterate_symbols(
      hsa_code_object_t code_object,
      hsa_status_t (*callback)(hsa_code_object_t code_object,
                               hsa_code_symbol_t symbol,
                               void *data),
      void *data);

  //===--- Executable -----------------------------------------------------===//

  hsa_status_t hsa_code_object_reader_create_from_file(
      hsa_file_t file,
      hsa_code_object_reader_t *code_object_reader);
  hsa_status_t hsa_code_object_reader_create_from_memory(
      const void *code_object,
      size_t size,
      hsa_code_object_reader_t *code_object_reader);
  hsa_status_t hsa_code_object_reader_destroy(
      hsa_code_object_reader_t code_object_reader);
  /* deprecated */ hsa_status_t hsa_executable_create(
      hsa_profile_t profile,
      hsa_executable_state_t executable_state,
      const char *options,
      hsa_executable_t *executable);
  hsa_status_t hsa_executable_create_alt(
      hsa_profile_t profile,
      hsa_default_float_rounding_mode_t default_float_rounding_mode,
      const char *options,
      hsa_executable_t *executable);
  hsa_status_t hsa_executable_destroy(
      hsa_executable_t executable);
  /* deprecated */ hsa_status_t hsa_executable_load_code_object(
      hsa_executable_t executable,
      hsa_agent_t agent,
      hsa_code_object_t code_object,
      const char *options);
  hsa_status_t hsa_executable_load_program_code_object(
      hsa_executable_t executable,
      hsa_code_object_reader_t code_object_reader,
      const char *options,
      hsa_loaded_code_object_t *loaded_code_object);
  hsa_status_t hsa_executable_load_agent_code_object(
      hsa_executable_t executable,
      hsa_agent_t agent,
      hsa_code_object_reader_t code_object_reader,
      const char *options,
      hsa_loaded_code_object_t *loaded_code_object);
  hsa_status_t hsa_executable_freeze(
      hsa_executable_t executable,
      const char *options);
  hsa_status_t hsa_executable_get_info(
      hsa_executable_t executable,
      hsa_executable_info_t attribute,
      void *value);
  hsa_status_t hsa_executable_global_variable_define(
      hsa_executable_t executable,
      const char *variable_name,
      void *address);
  hsa_status_t hsa_executable_agent_global_variable_define(
      hsa_executable_t executable,
      hsa_agent_t agent,
      const char *variable_name,
      void *address);
  hsa_status_t hsa_executable_readonly_variable_define(
      hsa_executable_t executable,
      hsa_agent_t agent,
      const char *variable_name,
      void *address);
  hsa_status_t hsa_executable_validate(
      hsa_executable_t executable,
      uint32_t *result);
  hsa_status_t hsa_executable_validate_alt(
      hsa_executable_t executable,
      const char *options,
      uint32_t *result);
  /* deprecated */ hsa_status_t hsa_executable_get_symbol(
      hsa_executable_t executable,
      const char *module_name,
      const char *symbol_name,
      hsa_agent_t agent,
      int32_t call_convention,
      hsa_executable_symbol_t *symbol);
  hsa_status_t hsa_executable_get_symbol_by_name(
      hsa_executable_t executable,
      const char *symbol_name,
      const hsa_agent_t *agent,
      hsa_executable_symbol_t *symbol);
  hsa_status_t hsa_executable_symbol_get_info(
      hsa_executable_symbol_t executable_symbol,
      hsa_executable_symbol_info_t attribute,
      void *value);
  /* deprecated */ hsa_status_t hsa_executable_iterate_symbols(
      hsa_executable_t executable,
      hsa_status_t (*callback)(hsa_executable_t executable,
                               hsa_executable_symbol_t symbol,
                               void *data),
      void *data);
  hsa_status_t hsa_executable_iterate_agent_symbols(
      hsa_executable_t executable,
      hsa_agent_t agent,
      hsa_status_t (*callback)(hsa_executable_t exec,
                               hsa_agent_t agent,
                               hsa_executable_symbol_t symbol,
                               void *data),
      void *data);
  hsa_status_t hsa_executable_iterate_program_symbols(
      hsa_executable_t executable,
      hsa_status_t (*callback)(hsa_executable_t exec,
                               hsa_executable_symbol_t symbol,
                               void *data),
      void *data);
  hsa_status_t hsa_get_tile_config(hsa_agent_t agent_handle, void* config);

  //===--- Runtime Notifications ------------------------------------------===//

  hsa_status_t hsa_status_string(
      hsa_status_t status,
      const char **status_string);

}   //  namespace HSA
}   //  namespace rocr

#ifdef BUILDING_HSA_CORE_RUNTIME
//This using declaration is deliberate!
//We want unqualified name resolution to fail when building the runtime.  This is a guard against accidental use of the intercept layer in the runtime.
//using namespace rocr::HSA;
#endif

#endif


================================================
FILE: runtime/hsa-runtime/core/inc/hsa_table_interface.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef RUNTIME_HSA_RUNTIME_CORE_INC_HSA_TABLE_INTERFACE_H_
#define RUNTIME_HSA_RUNTIME_CORE_INC_HSA_TABLE_INTERFACE_H_

#include "inc/hsa_api_trace.h"

void hsa_table_interface_init(const HsaApiTable* apiTable);

const HsaApiTable* hsa_table_interface_get_table();

#endif // RUNTIME_HSA_RUNTIME_CORE_INC_HSA_TABLE_INTERFACE_H_

================================================
FILE: runtime/hsa-runtime/core/inc/hsa_ven_amd_loader_impl.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2020-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTME_CORE_INC_HSA_VEN_AMD_LOADER_IMPL_H_
#define HSA_RUNTME_CORE_INC_HSA_VEN_AMD_LOADER_IMPL_H_

#include "inc/hsa_ven_amd_loader.h"

namespace rocr {

  hsa_status_t hsa_ven_amd_loader_query_host_address(
    const void *device_address,
    const void **host_address);

  hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t *num_segment_descriptors);

  hsa_status_t hsa_ven_amd_loader_query_executable(
    const void *device_address,
    hsa_executable_t *executable);

  hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
    hsa_executable_t executable,
    hsa_status_t (*callback)(
    hsa_executable_t executable,
    hsa_loaded_code_object_t loaded_code_object,
    void *data),
    void *data);

  hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
    hsa_loaded_code_object_t loaded_code_object,
    hsa_ven_amd_loader_loaded_code_object_info_t attribute,
    void *value);

  hsa_status_t
    hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
    hsa_file_t file,
    size_t offset,
    size_t size,
    hsa_code_object_reader_t *code_object_reader);

  hsa_status_t
    hsa_ven_amd_loader_iterate_executables(
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      void *data),
    void *data);
}  // namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/core/inc/intercept_queue.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_INTERCEPT_QUEUE_H_
#define HSA_RUNTIME_CORE_INC_INTERCEPT_QUEUE_H_

#include <vector>
#include <memory>
#include <utility>

#include "core/inc/runtime.h"
#include "core/inc/queue.h"
#include "core/inc/signal.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/exceptions.h"
#include "core/util/locks.h"

namespace rocr {
namespace core {

// @brief Generic container to forward Queue interfaces into Queue* member.
// Class only has utility as a base type customized Queue wrappers.
class QueueWrapper : public Queue {
 public:
  std::unique_ptr<Queue> wrapped;

  explicit QueueWrapper(std::unique_ptr<Queue> queue)
      : Queue(static_cast<core::SharedQueue*>(core::Runtime::runtime_singleton_->system_allocator()(
                  sizeof(core::SharedQueue), 4096, 0, 0)),
              0),
        wrapped(std::move(queue)) {
    memcpy(&amd_queue_, &wrapped->amd_queue_, sizeof(amd_queue_));
    wrapped->set_public_handle(wrapped.get(), public_handle_);
  }

  ~QueueWrapper() {
    if (shared_queue_) core::Runtime::runtime_singleton_->system_deallocator()(shared_queue_);
  }

  hsa_status_t Inactivate() override { return wrapped->Inactivate(); }
  hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override {
    return wrapped->SetPriority(priority);
  }
  uint64_t LoadReadIndexAcquire() override { return wrapped->LoadReadIndexAcquire(); }
  uint64_t LoadReadIndexRelaxed() override { return wrapped->LoadReadIndexRelaxed(); }
  uint64_t LoadWriteIndexRelaxed() override { return wrapped->LoadWriteIndexRelaxed(); }
  uint64_t LoadWriteIndexAcquire() override { return wrapped->LoadWriteIndexAcquire(); }
  void StoreReadIndexRelaxed(uint64_t value) override {
    return wrapped->StoreReadIndexRelaxed(value);
  }
  void StoreReadIndexRelease(uint64_t value) override {
    return wrapped->StoreReadIndexRelease(value);
  }
  void StoreWriteIndexRelaxed(uint64_t value) override {
    return wrapped->StoreWriteIndexRelaxed(value);
  }
  void StoreWriteIndexRelease(uint64_t value) override {
    return wrapped->StoreWriteIndexRelease(value);
  }
  uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override {
    return wrapped->CasWriteIndexAcqRel(expected, value);
  }
  uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override {
    return wrapped->CasWriteIndexAcquire(expected, value);
  }
  uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override {
    return wrapped->CasWriteIndexRelaxed(expected, value);
  }
  uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override {
    return wrapped->CasWriteIndexRelease(expected, value);
  }
  uint64_t AddWriteIndexAcqRel(uint64_t value) override {
    return wrapped->AddWriteIndexAcqRel(value);
  }
  uint64_t AddWriteIndexAcquire(uint64_t value) override {
    return wrapped->AddWriteIndexAcquire(value);
  }
  uint64_t AddWriteIndexRelaxed(uint64_t value) override {
    return wrapped->AddWriteIndexRelaxed(value);
  }
  uint64_t AddWriteIndexRelease(uint64_t value) override {
    return wrapped->AddWriteIndexRelease(value);
  }
  hsa_status_t SetCUMasking(uint32_t num_cu_mask_count, const uint32_t* cu_mask) override {
    return wrapped->SetCUMasking(num_cu_mask_count, cu_mask);
  }
  hsa_status_t GetCUMasking(uint32_t num_cu_mask_count, uint32_t* cu_mask) override {
    return wrapped->GetCUMasking(num_cu_mask_count, cu_mask);
  }
  void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b,
                  hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE,
                  hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
                  hsa_signal_t* signal = NULL) override {
    wrapped->ExecutePM4(cmd_data, cmd_size_b, acquireFence, releaseFence, signal);
  }
  void SetProfiling(bool enabled) override { wrapped->SetProfiling(enabled); }

 protected:
  void do_set_public_handle(hsa_queue_t* handle) override {
    public_handle_ = handle;
    wrapped->set_public_handle(wrapped.get(), handle);
  }
};

// @brief Generic container for a proxy queue.
// Presents an proxy packet buffer and doorbell signal for an underlying Queue.  Write index
// operations act on the proxy buffer while all other operations pass through to the underlying
// queue.
class QueueProxy : public QueueWrapper {
 public:
  explicit QueueProxy(std::unique_ptr<Queue> queue) : QueueWrapper(std::move(queue)) {}

  uint64_t LoadReadIndexAcquire() override {
    return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire);
  }
  uint64_t LoadReadIndexRelaxed() override {
    return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed);
  }
  void StoreReadIndexRelaxed(uint64_t value) override { assert(false); }
  void StoreReadIndexRelease(uint64_t value) override { assert(false); }

  uint64_t LoadWriteIndexRelaxed() override {
    return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed);
  }
  uint64_t LoadWriteIndexAcquire() override {
    return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire);
  }
  void StoreWriteIndexRelaxed(uint64_t value) override {
    atomic::Store(&amd_queue_.write_dispatch_id, value, std::memory_order_relaxed);
  }
  void StoreWriteIndexRelease(uint64_t value) override {
    atomic::Store(&amd_queue_.write_dispatch_id, value, std::memory_order_release);
  }
  uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, std::memory_order_acq_rel);
  }
  uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, std::memory_order_acquire);
  }
  uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, std::memory_order_relaxed);
  }
  uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) override {
    return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, std::memory_order_release);
  }
  uint64_t AddWriteIndexAcqRel(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value, std::memory_order_acq_rel);
  }
  uint64_t AddWriteIndexAcquire(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value, std::memory_order_acquire);
  }
  uint64_t AddWriteIndexRelaxed(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value, std::memory_order_relaxed);
  }
  uint64_t AddWriteIndexRelease(uint64_t value) override {
    return atomic::Add(&amd_queue_.write_dispatch_id, value, std::memory_order_release);
  }
};

// @brief Provides packet intercept and rewrite capability for a queue.
// Host-side dispatches are processed during doorbell ring.
// Device-side dispatches are processed as an asynchronous signal event.
class InterceptQueue : public QueueProxy, private LocalSignal, public DoorbellSignal {
 public:
  explicit InterceptQueue(std::unique_ptr<Queue> queue);
  ~InterceptQueue();

  void AddInterceptor(hsa_amd_queue_intercept_handler interceptor, void* data) {
    assert(interceptor != nullptr && "Packet intercept callback was nullptr.");
    interceptors.push_back(std::make_pair(interceptor, data));
  }

  hsa_status_t Inactivate() override {
    active_ = false;
    return wrapped->Inactivate();
  }

 private:
  // Serialize packet interception processing.
  KernelMutex lock_;

  // Largest processed packet index.
  uint64_t next_packet_;

  // Post interception packet overflow buffer
  std::vector<AqlPacket> overflow_;

  // Index at which async intercept processing was scheduled.
  uint64_t retry_index_;

  // Given the current value of the wrapped queue read index, determine if
  // there is a retry barrier packet already in the wrapped queue.
  bool IsPendingRetryPoint(uint64_t wrapped_current_read_index) const;

  // Event signal to use for async packet processing and control flag.
  Signal* async_doorbell_;
  std::atomic<bool> quit_;

  // Indicates queue active/inactive state.
  std::atomic<bool> active_;

  // Proxy packet buffer
  SharedArray<AqlPacket, 4096> buffer_;

  // Packet transform callbacks
  std::vector<std::pair<AMD::callback_t<hsa_amd_queue_intercept_handler>, void*>> interceptors;

  static const hsa_signal_value_t DOORBELL_MAX = 0xFFFFFFFFFFFFFFFFull;

  static bool HandleAsyncDoorbell(hsa_signal_value_t value, void* arg);
  static void PacketWriter(const void* pkts, uint64_t pkt_count);

  // Submit packets to the wrapped queue and return number of packets that were
  // submitted.
  uint64_t Submit(const AqlPacket* packets, uint64_t count);

  // Used as the final packet rewriter that submits the packets to the wrapped
  // queue.
  static void Submit(const void* pkts, uint64_t pkt_count, uint64_t user_pkt_index, void* data,
                     hsa_amd_queue_intercept_packet_writer writer);

  /*
   * Remaining Queue and Signal interface definitions.
   */
 public:
  /// @brief Update signal value using Relaxed semantics
  ///
  /// @param value Value of signal to update with
  void StoreRelaxed(hsa_signal_value_t value) override;

  /// @brief Update signal value using Release semantics
  ///
  /// @param value Value of signal to update with
  void StoreRelease(hsa_signal_value_t value) override {
    std::atomic_thread_fence(std::memory_order_release);
    StoreRelaxed(value);
  }

  /// @brief Provide information about the queue
  hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute, void* value) override;

  static __forceinline bool IsType(core::Signal* signal) { return signal->IsType(&rtti_id()); }
  static __forceinline bool IsType(core::Queue* queue) { return queue->IsType(&rtti_id()); }

 protected:
  bool _IsA(Queue::rtti_t id) const override { return id == &rtti_id(); }

 private:
  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
    return rtti_id_;
  }
};

}  // namespace core
}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_INC_INTERCEPT_QUEUE_H_


================================================
FILE: runtime/hsa-runtime/core/inc/interrupt_signal.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA runtime C++ interface file.

#ifndef HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_
#define HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_

#include <memory>
#include <vector>

#include "hsakmt/hsakmt.h"

#include "core/inc/signal.h"
#include "core/util/utils.h"

namespace rocr {
namespace core {

/// @brief A Signal implementation using interrupts versus plain memory based.
/// Also see base class Signal.
///
/// Breaks common/vendor separation - signals in general needs to be re-worked
/// at the foundation level to make sense in a multi-device system.
/// Supports only one waiter for now.
/// KFD changes are needed to support multiple waiters and have device
/// signaling.
class InterruptSignal : private LocalSignal, public Signal {
 public:
  class EventPool {
   public:
    struct Deleter {
      void operator()(HsaEvent* evt) { InterruptSignal::DestroyEvent(evt); }
    };
    using unique_event_ptr = ::std::unique_ptr<HsaEvent, Deleter>;

    EventPool() : allEventsAllocated(false) {}

    HsaEvent* alloc();
    void free(HsaEvent* evt);
    void clear() {
      events_.clear();
      allEventsAllocated = false;
    }

   private:
    HybridMutex lock_;
    std::vector<unique_event_ptr> events_;
    bool allEventsAllocated;
  };

  static HsaEvent* CreateEvent(HSA_EVENTTYPE type, bool manual_reset);
  static void DestroyEvent(HsaEvent* evt);

  /// @brief Determines if a Signal* can be safely converted to an
  /// InterruptSignal* via static_cast.
  static __forceinline bool IsType(Signal* ptr) {
    return ptr->IsType(&rtti_id());
  }

  explicit InterruptSignal(hsa_signal_value_t initial_value,
                           HsaEvent* use_event = NULL);

  ~InterruptSignal();

  // Below are various methods corresponding to the APIs, which load/store the
  // signal value or modify the existing signal value automically and with
  // specified memory ordering semantics.

  hsa_signal_value_t LoadRelaxed();

  hsa_signal_value_t LoadAcquire();

  void StoreRelaxed(hsa_signal_value_t value);

  void StoreRelease(hsa_signal_value_t value);

  hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition,
                                 hsa_signal_value_t compare_value,
                                 uint64_t timeout, hsa_wait_state_t wait_hint);

  hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition,
                                 hsa_signal_value_t compare_value,
                                 uint64_t timeout, hsa_wait_state_t wait_hint);

  void AndRelaxed(hsa_signal_value_t value);

  void AndAcquire(hsa_signal_value_t value);

  void AndRelease(hsa_signal_value_t value);

  void AndAcqRel(hsa_signal_value_t value);

  void OrRelaxed(hsa_signal_value_t value);

  void OrAcquire(hsa_signal_value_t value);

  void OrRelease(hsa_signal_value_t value);

  void OrAcqRel(hsa_signal_value_t value);

  void XorRelaxed(hsa_signal_value_t value);

  void XorAcquire(hsa_signal_value_t value);

  void XorRelease(hsa_signal_value_t value);

  void XorAcqRel(hsa_signal_value_t value);

  void AddRelaxed(hsa_signal_value_t value);

  void AddAcquire(hsa_signal_value_t value);

  void AddRelease(hsa_signal_value_t value);

  void AddAcqRel(hsa_signal_value_t value);

  void SubRelaxed(hsa_signal_value_t value);

  void SubAcquire(hsa_signal_value_t value);

  void SubRelease(hsa_signal_value_t value);

  void SubAcqRel(hsa_signal_value_t value);

  hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value);

  hsa_signal_value_t ExchAcquire(hsa_signal_value_t value);

  hsa_signal_value_t ExchRelease(hsa_signal_value_t value);

  hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value);

  hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
                                hsa_signal_value_t value);

  hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
                                hsa_signal_value_t value);

  hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
                                hsa_signal_value_t value);

  hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
                               hsa_signal_value_t value);

  /// @brief See base class Signal.
  __forceinline hsa_signal_value_t* ValueLocation() const {
    return (hsa_signal_value_t*)&signal_.value;
  }

  /// @brief See base class Signal.
  __forceinline HsaEvent* EopEvent() { return event_; }

 protected:
  bool _IsA(rtti_t id) const { return id == &rtti_id(); }

 private:
  /// @variable KFD event on which the interrupt signal is based on.
  HsaEvent* event_;

  /// @variable Indicates whether the signal should release the event when it
  /// closes or not.
  bool free_event_;

  /// Used to obtain a globally unique value (address) for rtti.
  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
    return rtti_id_;
  }

  void SetEvent();

  DISALLOW_COPY_AND_ASSIGN(InterruptSignal);
};

}  // namespace core
}  // namespace rocr
#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/ipc_signal.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_INC_IPC_SIGNAL_H_
#define HSA_RUNTME_CORE_INC_IPC_SIGNAL_H_

#include <atomic>
#include <utility>

#include "core/inc/signal.h"
#include "core/inc/default_signal.h"
#include "core/util/locks.h"

namespace rocr {
namespace core {

/// @brief Container for ipc shared memory.
class SharedMemory {
 public:
  SharedMemory(const hsa_amd_ipc_memory_t* handle, size_t len);
  ~SharedMemory();
  SharedMemory(SharedMemory&&);

  void* ptr() const { return ptr_; }

 private:
  void* ptr_;
};

/// @brief Container for ipc signal abi block.
class SharedMemorySignal {
 public:
  explicit SharedMemorySignal(const hsa_amd_ipc_memory_t* handle) : signal_(handle, 4096) {
    if (!signal()->IsValid())
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "IPC Signal handle is invalid.");
  }
  SharedSignal* signal() const { return reinterpret_cast<SharedSignal*>(signal_.ptr()); }

 private:
  SharedMemory signal_;
};

/// @brief Memory only signal using a shared memory ABI block.
class IPCSignal : private SharedMemorySignal, public BusyWaitSignal {
 public:
  /// @brief Creates a sharable handle for an IPC enabled signal.
  static void CreateHandle(Signal* signal, hsa_amd_ipc_signal_t* ipc_handle);

  /// @brief Opens an IPC signal from its IPC handle.
  static Signal* Attach(const hsa_amd_ipc_signal_t* ipc_handle);

  /// @brief Determines if a Signal* can be safely converted to BusyWaitSignal*
  /// via static_cast.
  static __forceinline bool IsType(Signal* ptr) { return ptr->IsType(&rtti_id()); }

 protected:
  bool _IsA(rtti_t id) const {
    if (id == &rtti_id()) return true;
    return BusyWaitSignal::_IsA(id);
  }

 private:
  static __forceinline int& rtti_id() {
    static int rtti_id_ = 0;
      return rtti_id_;
  }
  static KernelMutex lock_;

  explicit IPCSignal(SharedMemorySignal&& abi_block)
      : SharedMemorySignal(std::move(abi_block)), BusyWaitSignal(signal(), true) {}

  DISALLOW_COPY_AND_ASSIGN(IPCSignal);
};

}  // namespace core
}  // namespace rocr

#endif  // HSA_RUNTME_CORE_INC_IPC_SIGNAL_H_


================================================
FILE: runtime/hsa-runtime/core/inc/isa.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_ISA_H_
#define HSA_RUNTIME_CORE_ISA_H_

#include <cassert>
#include <cstdint>
#include <string>
#include <tuple>
#include <unordered_map>
#include "core/inc/amd_hsa_code.hpp"

namespace rocr {
namespace core {

/// @class Wavefront.
/// @brief Wavefront.
class Wavefront final: public amd::hsa::common::Signed<0xA02483F1AD7F101C> {
public:
  /// @brief Default destructor.
  ~Wavefront() {}

  /// @returns Handle equivalent of @p object.
  static hsa_wavefront_t Handle(const Wavefront *object) {
    hsa_wavefront_t handle = { reinterpret_cast<uint64_t>(object) };
    return handle;
  }

  /// @returns Object equivalent of @p handle.
  static Wavefront *Object(const hsa_wavefront_t &handle) {
    Wavefront *object = amd::hsa::common::ObjectAt<Wavefront>(handle.handle);
    return object;
  }

  /// @brief Query value of requested @p attribute and record it in @p value.
  bool GetInfo(const hsa_wavefront_info_t &attribute, void *value) const;

private:
  uint32_t num_threads_;
  /// @brief Default constructor.
  Wavefront() : num_threads_(0) {}
  Wavefront(uint32_t num_threads) : num_threads_(num_threads) {}

  /// @brief Wavefront's friends.
  friend class Isa;
  friend class IsaRegistry;
};

enum class IsaFeature : uint8_t {
  Unsupported,
  Any,
  Disabled,
  Enabled,
};

/// @class Isa.
/// @brief Instruction Set Architecture.
class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> {
 public:
  /// @brief Isa's version type.
  typedef std::tuple<int32_t, int32_t, int32_t> Version;

  /// @brief Default destructor.
  ~Isa() = default;

  /// @returns Handle equivalent of @p isa_object.
  static hsa_isa_t Handle(const Isa *isa_object) {
    hsa_isa_t isa_handle = { reinterpret_cast<uint64_t>(isa_object) };
    return isa_handle;
  }

  /// @returns Object equivalent of @p isa_handle.
  static Isa *Object(const hsa_isa_t &isa_handle) {
    Isa *isa_object = amd::hsa::common::ObjectAt<Isa>(isa_handle.handle);
    return isa_object;
  }

  /// @returns True if @p code_object_isa and @p agent_isa are compatible,
  /// false otherwise.
  static bool IsCompatible(const Isa &code_object_isa,
                      const Isa &agent_isa, unsigned int codeGenericVersion);

  /// @returns This Isa's version.
  const Version &GetVersion() const {
    return version_;
  }
  /// @returns This Isa's generic target.
  const std::string & GetIsaGeneric() const {return generic_;}


  /// @returns SRAM ECC feature status.
  IsaFeature GetSramecc() const {
    return sramecc_;
  }

  /// @returns XNACK feature status.
  IsaFeature GetXnack() const {
    return xnack_;
  }

  /// @returns This Isa's supported wavefront.
  const Wavefront &GetWavefront() const {
    return wavefront_;
  }

  /// @returns True if SRAMECC feature is supported, false otherwise.
  bool IsSrameccSupported() const {
    return sramecc_ != IsaFeature::Unsupported;
  }

  /// @returns True if XNACK feature is supported, false otherwise.
  bool IsXnackSupported() const {
    return xnack_ != IsaFeature::Unsupported;
  }

  /// @returns This Isa's major version.
  int32_t GetMajorVersion() const {
    return std::get<0>(version_);
  }

  /// @returns This Isa's minor version.
  int32_t GetMinorVersion() const {
    return std::get<1>(version_);
  }

  /// @returns This Isa's stepping.
  int32_t GetStepping() const {
    return std::get<2>(version_);
  }

  /// @brief Isa is always in valid state.
  bool IsValid() const {
    return true;
  }

  /// @returns This Isa's processor name.
  std::string GetProcessorName() const;

  /// @returns This Isa's name consisting of the target triple and target ID.
  std::string GetIsaName() const;

  /// @brief Query value of requested @p attribute and record it in @p value.
  bool GetInfo(const hsa_isa_info_t &attribute, void *value) const;

  /// @returns Round method (single or double) used to implement the floating-
  /// point multiply add instruction (mad) for a given combination of @p fp_type
  /// and @p flush_mode.
  hsa_round_method_t GetRoundMethod(
      hsa_fp_type_t fp_type,
      hsa_flush_mode_t flush_mode) const;

  /// @brief Default constructor.
  Isa()
      : version_(Version(-1, -1, -1)),
        sramecc_(IsaFeature::Unsupported),
        xnack_(IsaFeature::Unsupported) {}
  private:

  // @brief Isa's target ID name.
  std::string targetid_;

  // @brief Isa's generic version, if it exists. "" otherwise.
  std::string generic_;

  /// @brief Isa's version.
  Version version_;

  /// @brief SRAMECC feature.
  IsaFeature sramecc_;

  /// @brief XNACK feature.
  IsaFeature xnack_;

  /// @brief Isa's supported wavefront.
  Wavefront wavefront_;

  /// @brief Isa's friends.
  friend class IsaRegistry;
}; // class Isa

/// @class IsaRegistry.
/// @brief Instruction Set Architecture Registry.
class IsaRegistry final {
 public:
  /// @returns Isa for requested @p full_name, null pointer if not supported.
  static const Isa *GetIsa(const std::string &full_name);

  /// @returns Isa for requested @p version, null pointer if not supported.
  static const Isa *GetIsa(const Isa::Version &version,
                           IsaFeature sramecc = IsaFeature::Any,
                           IsaFeature xnack = IsaFeature::Any);
  static const std::unordered_map<std::string, unsigned int> &
                                                GetSupportedGenericVersions();
 private:
  /// @brief IsaRegistry's map type.
  typedef std::unordered_map<std::string, Isa> IsaMap;

  /// @brief  Default constructor
  IsaRegistry() = delete;

  /// @brief Default destructor
  ~IsaRegistry() = default;

  /// @returns Supported instruction set architectures.
  static const IsaMap& GetSupportedIsas();
}; // class IsaRegistry

} // namespace core
} // namespace rocr

#endif // HSA_RUNTIME_CORE_ISA_HPP_


================================================
FILE: runtime/hsa-runtime/core/inc/memory_region.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA runtime C++ interface file.

#ifndef HSA_RUNTME_CORE_INC_MEMORY_REGION_H_
#define HSA_RUNTME_CORE_INC_MEMORY_REGION_H_

#include <vector>

#include "core/inc/hsa_internal.h"
#include "core/inc/checked.h"
#include "core/util/utils.h"

namespace rocr {
namespace core {
class Agent;

class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
 public:
  MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain,
               bool user_visible, core::Agent* owner)
      : fine_grain_(fine_grain),
        kernarg_(kernarg),
        full_profile_(full_profile),
        extended_scope_fine_grain_(extended_scope_fine_grain),
        user_visible_(user_visible),
        owner_(owner) {
    assert(owner_ != NULL);
  }

  virtual ~MemoryRegion() {}

  // Convert this object into hsa_region_t.
  static __forceinline hsa_region_t Convert(MemoryRegion* region) {
    const hsa_region_t region_handle = {
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
    return region_handle;
  }

  static __forceinline const hsa_region_t Convert(const MemoryRegion* region) {
    const hsa_region_t region_handle = {
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(region))};
    return region_handle;
  }

  // Convert hsa_region_t into MemoryRegion *.
  static __forceinline MemoryRegion* Convert(hsa_region_t region) {
    return reinterpret_cast<MemoryRegion*>(region.handle);
  }

  enum AllocateEnum {
    AllocateNoFlags = 0,
    AllocateRestrict = (1 << 0),    // Don't map system memory to GPU agents
    AllocateExecutable = (1 << 1),  // Set executable permission
    AllocateDoubleMap = (1 << 2),   // Deprecated:Map twice VA allocation to backing store
    AllocateDirect = (1 << 3),      // Bypass fragment cache.
    AllocateIPC = (1 << 4),         // System memory that can be IPC-shared
    AllocateNonPaged = (1 << 4),    // Non-paged system memory (AllocateIPC alias)
    AllocatePCIeRW = (1 << 5),      // Enforce pseudo fine grain/RW memory
    AllocateAsan = (1 << 6),        // ASAN - First page of allocation remapped to system memory
    AllocatePinned = (1 << 7),      // Currently treating Pinned memory as NoSubstitute
    AllocateMemoryOnly = (1 << 8),  // Memory only handle from thunk, no virtual address
    // Flag to allocate system memory with GTT Access
    // Note: The node_id needs to be the node_id of the device even though this is allocating
    // system memory
    AllocateGTTAccess = (1 << 9),
    AllocateContiguous = (1 << 10), // Physically contiguous memory
    AllocateUncached = (1 << 11),   // Uncached memory
    // this flag is ignored by Thunk and only used for emulator/dxg to track code-object
    // allocations in AQL to PM4 conversion.
    AllocateExecutableBlitKernelObject = (1 << 12),
  };

  typedef uint32_t AllocateFlags;

  virtual hsa_status_t Allocate(size_t& size, AllocateFlags alloc_flags, void** address, int agent_node_id) const = 0;

  virtual hsa_status_t Free(void* address, size_t size) const = 0;

  // Prepares suballocated memory for IPC export.
  virtual hsa_status_t IPCFragmentExport(void* address) const = 0;

  // Translate memory properties into HSA region attribute.
  virtual hsa_status_t GetInfo(hsa_region_info_t attribute,
                               void* value) const = 0;

  virtual hsa_status_t AssignAgent(void* ptr, size_t size, const Agent& agent,
                                   hsa_access_permission_t access) const = 0;

  // Releases any cached memory that may be held within the allocator.
  virtual void Trim() const {}

  __forceinline bool fine_grain() const { return fine_grain_; }

  __forceinline bool extended_scope_fine_grain() const { return extended_scope_fine_grain_; }

  __forceinline bool kernarg() const { return kernarg_; }

  __forceinline bool full_profile() const { return full_profile_; }

  __forceinline bool user_visible() const { return user_visible_; }

  __forceinline core::Agent* owner() const { return owner_; }

 private:
  const bool fine_grain_;
  const bool kernarg_;
  const bool full_profile_;
  const bool extended_scope_fine_grain_;
  const bool user_visible_;

  core::Agent* owner_;
};
}  // namespace core
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/queue.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA runtime C++ interface file.

#ifndef HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_
#define HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_

#include <sstream>

#include "core/common/shared.h"
#include "core/inc/checked.h"
#include "core/inc/memory_region.h"
#include "core/util/utils.h"
#include "inc/amd_hsa_queue.h"
#include "inc/hsa_ext_amd.h"
#include "hsakmt/hsakmt.h"

namespace rocr {
namespace core {
struct AqlPacket {

  union {
    struct {
      uint16_t header;
      struct {
        uint8_t user_data[62];
      } body;
     } packet;
    struct {
      uint16_t header;
      uint8_t format;
      uint8_t rest[61];
    } amd_vendor;
    hsa_kernel_dispatch_packet_t dispatch;
    hsa_barrier_and_packet_t barrier_and;
    hsa_barrier_or_packet_t barrier_or;
    hsa_agent_dispatch_packet_t agent;
  };

  // Access the type field from a packet header. The caller is responsible for
  // loading the header using an atomic or ordinary load as appropriate.
  static uint8_t type(uint16_t header) {
    return ((header >> HSA_PACKET_HEADER_TYPE) & ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1));
  }

  // Determine if a packet is valid. The caller is responsible for loading the
  // header using an atomic or ordinary load as appropriate.
  static bool IsValid(uint16_t header) {
    return ((type(header) <= HSA_PACKET_TYPE_BARRIER_OR) &&
            (type(header) != HSA_PACKET_TYPE_INVALID));
  }

  bool __forceinline IsDispatchAndNeedsScratch() const {
    assert(IsValid(packet.header) && "Invalid packet in dynamic scratch handler.");

    if (type(packet.header) != HSA_PACKET_TYPE_KERNEL_DISPATCH ||
        dispatch.private_segment_size == 0)
      return false;

    assert((dispatch.workgroup_size_x != 0) && (dispatch.workgroup_size_y != 0) &&
           (dispatch.workgroup_size_z != 0) && "Invalid dispatch dimension.");

    return true;
  }

  std::string string() const {
    std::stringstream string;
    uint8_t t = type(packet.header);

    static const char* type_names[] = {
        "HSA_PACKET_TYPE_VENDOR_SPECIFIC", "HSA_PACKET_TYPE_INVALID",
        "HSA_PACKET_TYPE_KERNEL_DISPATCH", "HSA_PACKET_TYPE_BARRIER_AND",
        "HSA_PACKET_TYPE_AGENT_DISPATCH",  "HSA_PACKET_TYPE_BARRIER_OR"};

    if (t >= sizeof(type_names) / sizeof(const char*)) {
      string << "type: UNKNOWN#" << t;
      return string.str();
    }

    string << "type: " << type_names[t]
           << "\nbarrier: " << ((dispatch.header >> HSA_PACKET_HEADER_BARRIER) &
                                ((1 << HSA_PACKET_HEADER_WIDTH_BARRIER) - 1))
           << "\nacquire: " << ((dispatch.header >> HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) &
                                ((1 << HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE) - 1))
           << "\nrelease: " << ((dispatch.header >> HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE) &
                                ((1 << HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE) - 1));

    if (t == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
      string << "\nDim: " << dispatch.setup
             << "\nworkgroup_size: " << dispatch.workgroup_size_x << ", "
             << dispatch.workgroup_size_y << ", " << dispatch.workgroup_size_z
             << "\ngrid_size: " << dispatch.grid_size_x << ", "
             << dispatch.grid_size_y << ", " << dispatch.grid_size_z
             << "\nprivate_size: " << dispatch.private_segment_size
             << "\ngroup_size: " << dispatch.group_segment_size
             << "\nkernel_object: " << dispatch.kernel_object
             << "\nkern_arg: " << dispatch.kernarg_address
             << "\nsignal: " << dispatch.completion_signal.handle;
    }

    if ((t == HSA_PACKET_TYPE_BARRIER_AND) ||
        (t == HSA_PACKET_TYPE_BARRIER_OR)) {
      for (int i = 0; i < 5; i++)
        string << "\ndep[" << i << "]: " << barrier_and.dep_signal[i].handle;
      string << "\nsignal: " << barrier_and.completion_signal.handle;
    }

    return string.str();
  }
};

class Queue;

/// @brief Helper structure to simplify conversion of amd_queue_v2_t and
/// core::Queue object.
struct SharedQueue {
  amd_queue_v2_t amd_queue;
  Queue* core_queue;
};

/// @brief Class Queue which encapsulate user mode queues and
/// provides Api to access its Read, Write indices using Acquire,
/// Release and Relaxed semantics.
/*
Queue is intended to be an pure interface class and may be wrapped or replaced
by tools.
All funtions other than Convert and public_handle must be virtual.
*/
class Queue : public Checked<0xFA3906A679F9DB49> {
 public:
  Queue(SharedQueue* shared_queue, uint64_t queue_flags)
      : Queue(shared_queue, queue_flags, false) {}

  Queue(SharedQueue* shared_queue, uint64_t queue_flags, bool pcie_write_ordering)
      : amd_queue_(shared_queue->amd_queue),
        shared_queue_(shared_queue),
        flags_(queue_flags),
        pcie_write_ordering_(pcie_write_ordering) {
    public_handle_ = Convert(this);
    shared_queue->core_queue = this;
  }

  virtual ~Queue() {}

  virtual void Destroy() { delete this; }

  /// @brief Returns the handle of Queue's public data type
  ///
  /// @param queue Pointer to an instance of Queue implementation object
  ///
  /// @return hsa_queue_t * Pointer to the public data type of a queue
  static __forceinline hsa_queue_t* Convert(Queue* queue) {
    return (queue != nullptr) ? &queue->amd_queue_.hsa_queue : nullptr;
  }

  /// @brief Transform the public data type of a Queue's data type into an
  //  instance of it Queue class object
  ///
  /// @param queue Handle of public data type of a queue
  ///
  /// @return Queue * Pointer to the Queue's implementation object
  static __forceinline Queue* Convert(const hsa_queue_t* queue) {
    return (queue != nullptr)
        ? reinterpret_cast<SharedQueue*>(reinterpret_cast<uintptr_t>(queue) -
                                         offsetof(SharedQueue, amd_queue.hsa_queue))->core_queue
        : nullptr;
  }

  /// @brief Inactivate the queue object. Once inactivate a
  /// queue cannot be used anymore and must be destroyed
  ///
  /// @return hsa_status_t Status of request
  virtual hsa_status_t Inactivate() = 0;

  /// @brief Change the scheduling priority of the queue
  virtual hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) = 0;

  /// @brief Reads the Read Index of Queue using Acquire semantics
  ///
  /// @return uint64_t Value of Read index
  virtual uint64_t LoadReadIndexAcquire() = 0;

  /// @brief Reads the Read Index of Queue using Relaxed semantics
  ///
  /// @return uint64_t Value of Read index
  virtual uint64_t LoadReadIndexRelaxed() = 0;

  /// @brief Reads the Write Index of Queue using Acquire semantics
  ///
  /// @return uint64_t Value of Write index
  virtual uint64_t LoadWriteIndexAcquire() = 0;

  /// Reads the Write Index of Queue using Relaxed semantics
  ///
  /// @return uint64_t Value of Write index
  virtual uint64_t LoadWriteIndexRelaxed() = 0;

  /// @brief Updates the Read Index of Queue using Relaxed semantics
  ///
  /// @param value New value of Read index to update
  virtual void StoreReadIndexRelaxed(uint64_t value) = 0;

  /// @brief Updates the Read Index of Queue using Release semantics
  ///
  /// @param value New value of Read index to update
  virtual void StoreReadIndexRelease(uint64_t value) = 0;

  /// @brief Updates the Write Index of Queue using Relaxed semantics
  ///
  /// @param value New value of Write index to update
  virtual void StoreWriteIndexRelaxed(uint64_t value) = 0;

  /// @brief Updates the Write Index of Queue using Release semantics
  ///
  /// @param value New value of Write index to update
  virtual void StoreWriteIndexRelease(uint64_t value) = 0;

  /// @brief Compares and swaps Write index using Acquire and Release semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) = 0;

  /// @brief Compares and swaps Write index using Acquire semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) = 0;

  /// @brief Compares and swaps Write index using Relaxed semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) = 0;

  /// @brief Compares and swaps Write index using Release semantics
  ///
  /// @param expected Current value of write index
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) = 0;

  /// @brief Updates the Write index using Acquire and Release semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t AddWriteIndexAcqRel(uint64_t value) = 0;

  /// @brief Updates the Write index using Acquire semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t AddWriteIndexAcquire(uint64_t value) = 0;

  /// @brief Updates the Write index using Relaxed semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t AddWriteIndexRelaxed(uint64_t value) = 0;

  /// @brief Updates the Write index using Release semantics
  ///
  /// @param value Value of new write index
  ///
  /// @return uint64_t Value of write index before the update
  virtual uint64_t AddWriteIndexRelease(uint64_t value) = 0;

  /// @brief Set CU Masking
  ///
  /// @param num_cu_mask_count size of mask bit array
  ///
  /// @param cu_mask pointer to cu mask
  ///
  /// @return hsa_status_t
  virtual hsa_status_t SetCUMasking(uint32_t num_cu_mask_count, const uint32_t* cu_mask) = 0;

  /// @brief Get CU Masking
  ///
  /// @param num_cu_mask_count size of mask bit array
  ///
  /// @param cu_mask pointer to cu mask
  ///
  /// @return hsa_status_t
  virtual hsa_status_t GetCUMasking(uint32_t num_cu_mask_count, uint32_t* cu_mask) = 0;

  /// @brief Submits a block of PM4.
  ///
  /// @param cmd_data pointer to command buffer
  ///
  /// @param cmd_size_b command buffer size in bytes
  ///
  /// @param acquireFence acquire-fence type
  ///
  /// @param releaseFence acquire-fence type
  ///
  /// @param signal optional wait signal
  ///
  /// if @p signal is provided, function will return without waiting for commands to be executed
  /// if @p signal is NULL, waits until commands have been executed.
  virtual void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b,
                          hsa_fence_scope_t acquireFence = HSA_FENCE_SCOPE_NONE,
                          hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
                          hsa_signal_t* signal = NULL) = 0;

  virtual void SetProfiling(bool enabled) {
    AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING,
                     (enabled != 0));
  }

  /// @ brief Returns queue queries about the queue
  virtual hsa_status_t GetInfo(hsa_queue_info_attribute_t attribute, void* value) = 0;

  /// @ brief Reports async queue errors to stderr if no other error handler was registered.
  static void DefaultErrorHandler(hsa_status_t status, hsa_queue_t* source, void* data);

  // Handle of AMD Queue struct
  amd_queue_v2_t& amd_queue_;

  hsa_queue_t* public_handle() const { return public_handle_; }

  typedef void* rtti_t;

  bool IsType(rtti_t id) { return _IsA(id); }

  /// @brief Used to determine if the queue's packet buffer was allocated
  /// in the agent's local device memory.
  bool IsDeviceMemRingBuf() const {
    return (flags_ & HSA_AMD_QUEUE_CREATE_DEVICE_MEM_RING_BUF) != 0;
  }
  /// @brief Used to determine if the queue descriptor was allocated in
  /// the agent's local device memory.
  bool IsDeviceMemQueueDescriptor() const {
    return (flags_ & HSA_AMD_QUEUE_CREATE_DEVICE_MEM_QUEUE_DESCRIPTOR) != 0;
  }

  bool needsPcieOrdering() const { return pcie_write_ordering_; }

 protected:
  static void set_public_handle(Queue* ptr, hsa_queue_t* handle) {
    ptr->do_set_public_handle(handle);
  }
  virtual void do_set_public_handle(hsa_queue_t* handle) {
    public_handle_ = handle;
  }

  virtual bool _IsA(rtti_t id) const = 0;

  SharedQueue* shared_queue_;

  hsa_queue_t* public_handle_;

  /// Next available queue id.
  uint64_t GetQueueId() { return hsa_queue_counter_++; }

 private:

  // HSA Queue ID - used to bind a unique ID
  static std::atomic<uint64_t> hsa_queue_counter_;

  const uint64_t flags_;
  bool pcie_write_ordering_ = false;

  DISALLOW_COPY_AND_ASSIGN(Queue);
};
}   //  namespace core
}   //  namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/registers.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// This file is used only for open source cmake builds, if we hardcode the
// register values in amd_aql_queue.cpp then this file won't be required. For
// now we are using this file where register details are  spelled out in the
// structs/unions below.
#ifndef HSA_RUNTME_CORE_INC_REGISTERS_H_
#define HSA_RUNTME_CORE_INC_REGISTERS_H_

typedef enum SQ_RSRC_BUF_TYPE {
SQ_RSRC_BUF                              = 0x00000000,
SQ_RSRC_BUF_RSVD_1                       = 0x00000001,
SQ_RSRC_BUF_RSVD_2                       = 0x00000002,
SQ_RSRC_BUF_RSVD_3                       = 0x00000003,
} SQ_RSRC_BUF_TYPE;

typedef enum BUF_DATA_FORMAT {
BUF_DATA_FORMAT_INVALID                  = 0x00000000,
BUF_DATA_FORMAT_8                        = 0x00000001,
BUF_DATA_FORMAT_16                       = 0x00000002,
BUF_DATA_FORMAT_8_8                      = 0x00000003,
BUF_DATA_FORMAT_32                       = 0x00000004,
BUF_DATA_FORMAT_16_16                    = 0x00000005,
BUF_DATA_FORMAT_10_11_11                 = 0x00000006,
BUF_DATA_FORMAT_11_11_10                 = 0x00000007,
BUF_DATA_FORMAT_10_10_10_2               = 0x00000008,
BUF_DATA_FORMAT_2_10_10_10               = 0x00000009,
BUF_DATA_FORMAT_8_8_8_8                  = 0x0000000a,
BUF_DATA_FORMAT_32_32                    = 0x0000000b,
BUF_DATA_FORMAT_16_16_16_16              = 0x0000000c,
BUF_DATA_FORMAT_32_32_32                 = 0x0000000d,
BUF_DATA_FORMAT_32_32_32_32              = 0x0000000e,
BUF_DATA_FORMAT_RESERVED_15              = 0x0000000f,
} BUF_DATA_FORMAT;

typedef enum BUF_NUM_FORMAT {
BUF_NUM_FORMAT_UNORM                     = 0x00000000,
BUF_NUM_FORMAT_SNORM                     = 0x00000001,
BUF_NUM_FORMAT_USCALED                   = 0x00000002,
BUF_NUM_FORMAT_SSCALED                   = 0x00000003,
BUF_NUM_FORMAT_UINT                      = 0x00000004,
BUF_NUM_FORMAT_SINT                      = 0x00000005,
BUF_NUM_FORMAT_SNORM_OGL__SI__CI         = 0x00000006,
BUF_NUM_FORMAT_RESERVED_6__VI            = 0x00000006,
BUF_NUM_FORMAT_FLOAT                     = 0x00000007,
} BUF_NUM_FORMAT;

typedef enum BUF_FORMAT {
BUF_FORMAT_32_UINT                       = 0x00000014,
} BUF_FORMAT;

typedef enum SQ_SEL_XYZW01 {
SQ_SEL_0                                 = 0x00000000,
SQ_SEL_1                                 = 0x00000001,
SQ_SEL_RESERVED_0                        = 0x00000002,
SQ_SEL_RESERVED_1                        = 0x00000003,
SQ_SEL_X                                 = 0x00000004,
SQ_SEL_Y                                 = 0x00000005,
SQ_SEL_Z                                 = 0x00000006,
SQ_SEL_W                                 = 0x00000007,
} SQ_SEL_XYZW01;

	union COMPUTE_TMPRING_SIZE {
	struct {
#if		defined(LITTLEENDIAN_CPU)
		unsigned int                           WAVES : 12;
		unsigned int                        WAVESIZE : 13;
		unsigned int                                 : 7;
#elif		defined(BIGENDIAN_CPU)
		unsigned int                                 : 7;
		unsigned int                        WAVESIZE : 13;
		unsigned int                           WAVES : 12;
#endif
	} bitfields, bits;
	unsigned int	u32All;
	signed int	i32All;
	float	f32All;
	};

        union COMPUTE_TMPRING_SIZE_GFX11 {
          struct {
#if defined(LITTLEENDIAN_CPU)
            unsigned int WAVES : 12;
            unsigned int WAVESIZE : 15;
            unsigned int : 5;
#elif defined(BIGENDIAN_CPU)
            unsigned int : 5;
            unsigned int WAVESIZE : 15;
            unsigned int WAVES : 12;
#endif
          } bitfields, bits;
          unsigned int u32All;
          signed int i32All;
          float f32All;
        };

        union COMPUTE_TMPRING_SIZE_GFX12 {
          struct {
#if defined(LITTLEENDIAN_CPU)
            unsigned int WAVES : 12;
            unsigned int WAVESIZE : 18;
            unsigned int : 2;
#elif defined(BIGENDIAN_CPU)
            unsigned int : 2;
            unsigned int WAVESIZE : 18;
            unsigned int WAVES : 12;
#endif
          } bitfields, bits;
          unsigned int u32All;
          signed int i32All;
          float f32All;
        };


        union SQ_BUF_RSRC_WORD0 {
	struct {
#if		defined(LITTLEENDIAN_CPU)
		unsigned int                    BASE_ADDRESS : 32;
#elif		defined(BIGENDIAN_CPU)
		unsigned int                    BASE_ADDRESS : 32;
#endif
	} bitfields, bits;
	unsigned int	u32All;
	signed int	i32All;
	float	f32All;
	};


	union SQ_BUF_RSRC_WORD1 {
	struct {
#if		defined(LITTLEENDIAN_CPU)
		unsigned int                 BASE_ADDRESS_HI : 16;
		unsigned int                          STRIDE : 14;
		unsigned int                   CACHE_SWIZZLE : 1;
		unsigned int                  SWIZZLE_ENABLE : 1;
#elif		defined(BIGENDIAN_CPU)
		unsigned int                  SWIZZLE_ENABLE : 1;
		unsigned int                   CACHE_SWIZZLE : 1;
		unsigned int                          STRIDE : 14;
		unsigned int                 BASE_ADDRESS_HI : 16;
#endif
	} bitfields, bits;
	unsigned int	u32All;
	signed int	i32All;
	float	f32All;
	};

        union SQ_BUF_RSRC_WORD1_GFX11 {
          struct {
#if defined(LITTLEENDIAN_CPU)
            unsigned int BASE_ADDRESS_HI : 16;
            unsigned int STRIDE : 14;
            unsigned int SWIZZLE_ENABLE : 2;
#elif defined(BIGENDIAN_CPU)
            unsigned int SWIZZLE_ENABLE : 2;
            unsigned int STRIDE : 14;
            unsigned int BASE_ADDRESS_HI : 16;
#endif
          } bitfields, bits;
          unsigned int u32All;
          signed int i32All;
          float f32All;
        };


        union SQ_BUF_RSRC_WORD2 {
	struct {
#if		defined(LITTLEENDIAN_CPU)
		unsigned int                     NUM_RECORDS : 32;
#elif		defined(BIGENDIAN_CPU)
		unsigned int                     NUM_RECORDS : 32;
#endif
	} bitfields, bits;
	unsigned int	u32All;
	signed int	i32All;
	float	f32All;
	};


	union SQ_BUF_RSRC_WORD3 {
	struct {
#if		defined(LITTLEENDIAN_CPU)
                unsigned int                       DST_SEL_X : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                      NUM_FORMAT : 3;
                unsigned int                     DATA_FORMAT : 4;
                unsigned int                    ELEMENT_SIZE : 2;
                unsigned int                    INDEX_STRIDE : 2;
                unsigned int                  ADD_TID_ENABLE : 1;
                unsigned int                     ATC__CI__VI : 1;
                unsigned int                     HASH_ENABLE : 1;
                unsigned int                            HEAP : 1;
                unsigned int                   MTYPE__CI__VI : 3;
                unsigned int                            TYPE : 2;
#elif		defined(BIGENDIAN_CPU)
                unsigned int                            TYPE : 2;
                unsigned int                   MTYPE__CI__VI : 3;
                unsigned int                            HEAP : 1;
                unsigned int                     HASH_ENABLE : 1;
                unsigned int                     ATC__CI__VI : 1;
                unsigned int                  ADD_TID_ENABLE : 1;
                unsigned int                    INDEX_STRIDE : 2;
                unsigned int                    ELEMENT_SIZE : 2;
                unsigned int                     DATA_FORMAT : 4;
                unsigned int                      NUM_FORMAT : 3;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_X : 3;
#endif
	} bitfields, bits;
	unsigned int	u32All;
	signed int	i32All;
	float	f32All;
	};

	union SQ_BUF_RSRC_WORD3_GFX10 {
	struct {
#if		defined(LITTLEENDIAN_CPU)
                unsigned int                       DST_SEL_X : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                          FORMAT : 7;
                unsigned int                       RESERVED1 : 2;
                unsigned int                    INDEX_STRIDE : 2;
                unsigned int                  ADD_TID_ENABLE : 1;
                unsigned int                  RESOURCE_LEVEL : 1;
                unsigned int                       RESERVED2 : 3;
                unsigned int                      OOB_SELECT : 2;
                unsigned int                            TYPE : 2;
#elif		defined(BIGENDIAN_CPU)
                unsigned int                            TYPE : 2;
                unsigned int                      OOB_SELECT : 2;
                unsigned int                       RESERVED2 : 3;
                unsigned int                  RESOURCE_LEVEL : 1;
                unsigned int                  ADD_TID_ENABLE : 1;
                unsigned int                    INDEX_STRIDE : 2;
                unsigned int                       RESERVED1 : 2;
                unsigned int                          FORMAT : 7;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_X : 3;
#endif
        } bitfields, bits;
        unsigned int u32All;
        signed int i32All;
        float f32All;
        };

        // From V# Table
        union SQ_BUF_RSRC_WORD3_GFX11 {
          struct {
#if defined(LITTLEENDIAN_CPU)
            unsigned int DST_SEL_X : 3;
            unsigned int DST_SEL_Y : 3;
            unsigned int DST_SEL_Z : 3;
            unsigned int DST_SEL_W : 3;
            unsigned int FORMAT : 6;
            unsigned int RESERVED1 : 3;
            unsigned int INDEX_STRIDE : 2;
            unsigned int ADD_TID_ENABLE : 1;
            unsigned int RESERVED2 : 4;
            unsigned int OOB_SELECT : 2;
            unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
            unsigned int TYPE : 2;
            unsigned int OOB_SELECT : 2;
            unsigned int RESERVED2 : 4;
            unsigned int ADD_TID_ENABLE : 1;
            unsigned int INDEX_STRIDE : 2;
            unsigned int RESERVED1 : 3;
            unsigned int FORMAT : 6;
            unsigned int DST_SEL_W : 3;
            unsigned int DST_SEL_Z : 3;
            unsigned int DST_SEL_Y : 3;
            unsigned int DST_SEL_X : 3;
#endif
          } bitfields, bits;
        unsigned int	u32All;
	signed int	i32All;
	float	f32All;
        };

        // From V# Table
        union SQ_BUF_RSRC_WORD3_GFX12 {
          struct {
#if defined(LITTLEENDIAN_CPU)
            unsigned int DST_SEL_X : 3;
            unsigned int DST_SEL_Y : 3;
            unsigned int DST_SEL_Z : 3;
            unsigned int DST_SEL_W : 3;
            unsigned int FORMAT : 6;
            unsigned int RESERVED1 : 3;
            unsigned int INDEX_STRIDE : 2;
            unsigned int ADD_TID_ENABLE : 1;
            unsigned int WRITE_COMPRESS_ENABLE : 1;
            unsigned int COMPRESSION_EN : 1;
            unsigned int COMPRESSION_ACCESS_MODE : 2;
            unsigned int OOB_SELECT : 2;
            unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
            unsigned int TYPE : 2;
            unsigned int OOB_SELECT : 2;
            unsigned int COMPRESSION_ACCESS_MODE : 2;
            unsigned int COMPRESSION_EN : 1;
            unsigned int WRITE_COMPRESS_ENABLE : 1;
            unsigned int ADD_TID_ENABLE : 1;
            unsigned int INDEX_STRIDE : 2;
            unsigned int RESERVED1 : 3;
            unsigned int FORMAT : 6;
            unsigned int DST_SEL_W : 3;
            unsigned int DST_SEL_Z : 3;
            unsigned int DST_SEL_Y : 3;
            unsigned int DST_SEL_X : 3;
#endif
          } bitfields, bits;
        unsigned int	u32All;
	signed int	i32All;
	float	f32All;
        };
#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/runtime.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA runtime C++ interface file.

#ifndef HSA_RUNTME_CORE_INC_RUNTIME_H_
#define HSA_RUNTME_CORE_INC_RUNTIME_H_

#include <vector>
#include <map>
#include <memory>
#include <tuple>
#include <utility>
#include <thread>
#include <sys/un.h>

#if defined(__linux__)
#include <xf86drm.h>
#include <amdgpu.h>
#endif

#include "core/inc/hsa_ext_interface.h"
#include "core/inc/hsa_internal.h"
#include "core/inc/hsa_ext_amd_impl.h"

#include "core/inc/agent.h"
#include "core/inc/driver.h"
#include "core/inc/exceptions.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/memory_region.h"
#include "core/inc/signal.h"
#include "core/inc/svm_profiler.h"
#include "core/inc/thunk_loader.h"
#include "core/util/flag.h"
#include "core/util/locks.h"
#include "core/util/os.h"
#include "core/util/utils.h"

#include "core/inc/amd_loader_context.hpp"
#include "core/inc/amd_hsa_code.hpp"

#if defined(__clang__)
#if __has_feature(address_sanitizer)
#define SANITIZER_AMDGPU 1
#endif
#endif

//---------------------------------------------------------------------------//
//    Constants                                                              //
//---------------------------------------------------------------------------//

#define HSA_ARGUMENT_ALIGN_BYTES 16
#define HSA_QUEUE_ALIGN_BYTES 64
#define HSA_PACKET_ALIGN_BYTES 64
#define HSA_MAX_DEP_SIGNALS 5

//Avoids include
namespace rocr {
namespace AMD {
  class MemoryRegion;
} // namespace amd

namespace core {
extern bool g_use_interrupt_wait;
extern bool g_use_mwaitx;

/// @brief  Runtime class provides the following functions:
/// - open and close connection to kernel driver.
/// - load supported extension library (image and finalizer).
/// - load tools library.
/// - expose supported agents.
/// - allocate and free memory.
/// - memory copy and fill.
/// - grant access to memory (dgpu memory pool extension).
/// - maintain loader state.
/// - monitor asynchronous event from agent.
class Runtime {
 friend class AMD::MemoryRegion;
 public:
  /// @brief Structure to describe connectivity between agents.
  struct LinkInfo {
    LinkInfo() : num_hop(0), rec_sdma_eng_id_mask(0), info{0} {}

    uint32_t num_hop;
    uint32_t rec_sdma_eng_id_mask;
    hsa_amd_memory_pool_link_info_t info;
  };

  struct KfdVersion_t {
    HsaVersionInfo version;
    bool supports_exception_debugging;
    bool supports_event_age;
    bool supports_core_dump;
  };

  /// @brief Open connection to kernel driver and increment reference count.
  static hsa_status_t Acquire();

  /// @brief Decrement reference count and close connection to kernel driver.
  static hsa_status_t Release();

  /// @brief Checks if connection to kernel driver is opened.
  /// @retval True if the connection to kernel driver is opened.
  static bool IsOpen();

  // @brief Callback handler for HW Exceptions.
  static bool HwExceptionHandler(hsa_signal_value_t val, void* arg);

  // @brief Callback handler for VM fault access.
  static bool VMFaultHandler(hsa_signal_value_t val, void* arg);

  // @brief Print known allocations near ptr.
  static void PrintMemoryMapNear(void* ptr);

  /// @brief Singleton object of the runtime.
  static Runtime* runtime_singleton_;

  /// @brief Insert agent into agent list ::agents_.
  /// @param [in] agent Pointer to the agent object.
  void RegisterAgent(Agent* agent, bool Enabled);

  /// @brief Insert agent into the driver list.
  /// @param [in] driver Unique pointer to the driver object.
  void RegisterDriver(std::unique_ptr<Driver> driver);

  /// @brief Delete all agent objects from ::agents_.
  void DestroyAgents();

  /// @brief Close and delete all agent driver objects from ::agent_drivers_.
  void DestroyDrivers();

  /// @brief Set the number of links connecting the agents in the platform.
  void SetLinkCount(size_t num_link);

  /// @brief Register link information connecting @p node_id_from and @p
  /// node_id_to.
  /// @param [in] node_id_from Node id of the source node.
  /// @param [in] node_id_to Node id of the destination node.
  /// @param [in] link_info The link information between source and destination
  /// nodes.
  void RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to,
                        uint32_t num_hop, uint32_t rec_sdma_eng_id_mask,
                        hsa_amd_memory_pool_link_info_t& link_info);

  /// @brief Query link information between two nodes.
  /// @param [in] node_id_from Node id of the source node.
  /// @param [in] node_id_to Node id of the destination node.
  /// @retval The link information between source and destination nodes.
  const LinkInfo GetLinkInfo(uint32_t node_id_from, uint32_t node_id_to);

  /// @brief Invoke the user provided call back for each agent in the agent
  /// list.
  ///
  /// @param [in] callback User provided callback function.
  /// @param [in] data User provided pointer as input for @p callback.
  ///
  /// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
  /// agent returns ::HSA_STATUS_SUCCESS.
  hsa_status_t IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
                                                     void* data),
                            void* data);

  /// @brief Allocate memory on a particular region.
  ///
  /// @param [in] region Pointer to region object.
  /// @param [in] size Allocation size in bytes.
  /// @param [in] alloc_flags Modifiers to pass to MemoryRegion allocator.
  /// @param [out] address Pointer to store the allocation result.
  ///
  /// @retval ::HSA_STATUS_SUCCESS If allocation is successful.
  hsa_status_t AllocateMemory(const MemoryRegion* region, size_t size,
                              MemoryRegion::AllocateFlags alloc_flags,
                              void** address, int agent_node_id = 0);

  /// @brief Free memory previously allocated with AllocateMemory.
  ///
  /// @param [in] ptr Address of the memory to be freed.
  ///
  /// @retval ::HSA_STATUS_ERROR If @p ptr is not the address of previous
  /// allocation via ::core::Runtime::AllocateMemory
  /// @retval ::HSA_STATUS_SUCCESS if @p ptr is successfully released.
  hsa_status_t FreeMemory(void* ptr);

  hsa_status_t RegisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback,
                                       void* user_data);

  hsa_status_t DeregisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback);

  /// @brief Blocking memory copy from src to dst.
  ///
  /// @param [in] dst Memory address of the destination.
  /// @param [in] src Memory address of the source.
  /// @param [in] size Copy size in bytes.
  ///
  /// @retval ::HSA_STATUS_SUCCESS if memory copy is successful and completed.
  hsa_status_t CopyMemory(void* dst, const void* src, size_t size);

  /// @brief Non-blocking memory copy from src to dst.
  ///
  /// @details The memory copy will be performed after all signals in
  /// @p dep_signals have value of 0. On completion @p completion_signal
  /// will be decremented.
  ///
  /// @param [in] dst Memory address of the destination.
  /// @param [in] dst_agent Agent object associated with the destination. This
  /// agent should be able to access the destination and source.
  /// @param [in] src Memory address of the source.
  /// @param [in] src_agent Agent object associated with the source. This
  /// agent should be able to access the destination and source.
  /// @param [in] size Copy size in bytes.
  /// @param [in] dep_signals Array of signal dependency.
  /// @param [in] completion_signal Completion signal object.
  ///
  /// @retval ::HSA_STATUS_SUCCESS if copy command has been submitted
  /// successfully to the agent DMA queue.
  hsa_status_t CopyMemory(void* dst, core::Agent* dst_agent, const void* src,
                          core::Agent* src_agent, size_t size,
                          std::vector<core::Signal*>& dep_signals, core::Signal& completion_signal);

  /// @brief Non-blocking memory copy from src to dst on engine_id.
  ///
  /// @details All semantics and params are dentical to CopyMemory
  ///  with the exception of engine_id.
  ///
  /// @param [in] engine_id Target engine to copy on.
  ///
  /// @param [in] force_copy_on_sdma By default, a blit kernel copy is used
  /// when dst_agent == src_agent.  Setting this to true will force the copy
  /// over SDMA1.
  ///
  /// @retval ::HSA_STATUS_SUCCESS if copy command has been submitted
  /// successfully to the agent DMA queue.
  hsa_status_t CopyMemoryOnEngine(void* dst, core::Agent* dst_agent, const void* src,
                          core::Agent* src_agent, size_t size,
                          std::vector<core::Signal*>& dep_signals, core::Signal& completion_signal,
                          hsa_amd_sdma_engine_id_t  engine_id, bool force_copy_on_sdma);

  /// @brief Return SDMA availability status for copy direction
  ///
  /// @param [in] dst_agent Destination agent.
  /// @param [in] src_agent Source agent.
  /// @param [out] engine_ids_mask Mask of engine_ids.
  ///
  /// @retval HSA_STATUS_SUCCESS DMA engines are available
  /// @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES DMA engines are not available
  hsa_status_t CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_agent,
                                uint32_t *engine_ids_mask);

  /// @brief Get preferred SDMA engine for the copy direction
  ///
  /// @param [in] dst_agent Destination agent.
  /// @param [in] src_agent Source agent.
  /// @param [out] recommended_ids_mask Mask of recommended_ids.
  ///
  /// @retval HSA_STATUS_SUCCESS For mask returned
  hsa_status_t GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent,
                                  uint32_t* recommended_ids_mask);

  /// @brief Fill the first @p count of uint32_t in ptr with value.
  ///
  /// @param [in] ptr Memory address to be filled.
  /// @param [in] value The value/pattern that will be used to set @p ptr.
  /// @param [in] count Number of uint32_t element to be set.
  ///
  /// @retval ::HSA_STATUS_SUCCESS if memory fill is successful and completed.
  hsa_status_t FillMemory(void* ptr, uint32_t value, size_t count);

  /// @brief Set agents as the whitelist to access ptr.
  ///
  /// @param [in] num_agents The number of agent handles in @p agents array.
  /// @param [in] agents Agent handle array.
  /// @param [in] ptr Pointer of memory previously allocated via
  /// core::Runtime::AllocateMemory.
  ///
  /// @retval ::HSA_STATUS_SUCCESS The whitelist has been configured
  /// successfully and all agents in the @p agents could start accessing @p ptr.
  hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents,
                           const void* ptr);

  /// @brief Query system information.
  ///
  /// @param [in] attribute System info attribute to query.
  /// @param [out] value Pointer to store the attribute value.
  ///
  /// @retval HSA_STATUS_SUCCESS The attribute is valid and the @p value is
  /// set.
  hsa_status_t GetSystemInfo(hsa_system_info_t attribute, void* value);

  /// @brief Register a callback function @p handler that is associated with
  /// @p signal to asynchronous event monitor thread.
  ///
  /// @param [in] signal Signal handle associated with @p handler.
  /// @param [in] cond The condition to execute the @p handler.
  /// @param [in] value The value to compare with @p signal value. If the
  /// comparison satisfy @p cond, the @p handler will be called.
  /// @param [in] arg Pointer to the argument that will be provided to @p
  /// handler.
  ///
  /// @retval ::HSA_STATUS_SUCCESS Registration is successful.
  hsa_status_t SetAsyncSignalHandler(hsa_signal_t signal,
                                     hsa_signal_condition_t cond,
                                     hsa_signal_value_t value,
                                     hsa_amd_signal_handler handler, void* arg);

  hsa_status_t InteropMap(uint32_t num_agents, Agent** agents,
                          int interop_handle, uint32_t flags, size_t* size,
                          void** ptr, size_t* metadata_size,
                          const void** metadata);

  hsa_status_t InteropUnmap(void* ptr);

  struct PtrInfoBlockData {
    void* base;
    size_t length;
    core::Agent* agentOwner;
  };

  hsa_status_t PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
                       uint32_t* num_agents_accessible, hsa_agent_t** accessible,
                       PtrInfoBlockData* block_info = nullptr);

  hsa_status_t SetPtrInfoData(const void* ptr, void* userptr);

  hsa_status_t IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle);

  hsa_status_t IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, uint32_t num_agents,
                         Agent** mapping_agents, void** mapped_ptr);

  hsa_status_t IPCDetach(void* ptr);

  hsa_status_t SetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
                            size_t attribute_count);

  hsa_status_t GetSvmAttrib(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
                            size_t attribute_count);

  hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
                           const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);

  hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf,
                                            uint64_t* offset, uint64_t flags);

  hsa_status_t DmaBufClose(int dmabuf);

  hsa_status_t VMemoryAddressReserve(void** ptr, size_t size, uint64_t address, uint64_t alignment, uint64_t flags);

  hsa_status_t VMemoryAddressFree(void* ptr, size_t size);

  hsa_status_t VMemoryHandleCreate(const MemoryRegion* region, size_t size,
                                   MemoryRegion::AllocateFlags alloc_flags,
                                   uint64_t flags, hsa_amd_vmem_alloc_handle_t* memoryHandle);

  hsa_status_t VMemoryHandleRelease(hsa_amd_vmem_alloc_handle_t memoryHandle);

  hsa_status_t VMemoryHandleMap(void* va, size_t size, size_t in_offset,
                                hsa_amd_vmem_alloc_handle_t memoryHandle, uint64_t flags);

  hsa_status_t VMemoryHandleUnmap(void* va, size_t size);

  hsa_status_t VMemorySetAccess(void* va, size_t size, const hsa_amd_memory_access_desc_t* desc,
                                size_t desc_cnt);

  hsa_status_t VMemoryGetAccess(const void* va, hsa_access_permission_t* perms,
                                hsa_agent_t agent_handle);

  hsa_status_t VMemoryExportShareableHandle(int* dmabuf_fd,
                                            const hsa_amd_vmem_alloc_handle_t handle,
                                            const uint64_t flags);

  hsa_status_t VMemoryImportShareableHandle(const int dmabuf_fd,
                                            hsa_amd_vmem_alloc_handle_t* handle);

  hsa_status_t VMemoryRetainAllocHandle(hsa_amd_vmem_alloc_handle_t* memoryHandle, void* addr);

  hsa_status_t VMemoryGetAllocPropertiesFromHandle(const hsa_amd_vmem_alloc_handle_t memoryHandle,
                                                   const core::MemoryRegion** mem_region,
                                                   hsa_amd_memory_type_t* type);

  hsa_status_t EnableLogging(uint8_t* flags, void* file);

  const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }

  const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }

  const std::vector<Agent *> &aie_agents() { return aie_agents_; }

  const std::vector<Agent*>& disabled_gpu_agents() { return disabled_gpu_agents_; }

  const std::vector<uint32_t>& gpu_ids() { return gpu_ids_; }

  Agent* agent_by_gpuid(uint32_t gpuid) { return agents_by_gpuid_[gpuid]; }

  Agent* region_gpu() { return region_gpu_; }

  const std::vector<const MemoryRegion*>& system_regions_fine() const {
    return system_regions_fine_;
  }

  const std::vector<const MemoryRegion*>& system_regions_coarse() const {
    return system_regions_coarse_;
  }

  amd::hsa::loader::Loader* loader() { return loader_; }

  amd::LoaderContext* loader_context() { return &loader_context_; }

  amd::hsa::code::AmdHsaCodeManager* code_manager() { return &code_manager_; }

  std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags, int agent_node_id)>&
  system_allocator() {
    return system_allocator_;
  }

  std::function<void(void*)>& system_deallocator() {
    return system_deallocator_;
  }

  const Flag& flag() const { return flag_; }

  const ThunkLoader* thunkLoader() const { return thunkLoader_; }

  ExtensionEntryPoints extensions_;

  hsa_status_t SetCustomSystemEventHandler(hsa_amd_system_event_callback_t callback,
                                           void* data);

  hsa_status_t SetInternalQueueCreateNotifier(hsa_amd_runtime_queue_notifier callback,
                                              void* user_data);

  void InternalQueueCreateNotify(const hsa_queue_t* queue, hsa_agent_t agent);

  SharedSignalPool_t* GetSharedSignalPool() { return &SharedSignalPool; }

  InterruptSignal::EventPool* GetEventPool() { return &EventPool; }

  uint64_t sys_clock_freq() const { return sys_clock_freq_; }

  void KfdVersion(const HsaVersionInfo& version) {
    kfd_version.version = version;
    if (version.KernelInterfaceMajorVersion == 1 &&
      version.KernelInterfaceMinorVersion >= 14)
      kfd_version.supports_event_age = true;
  }

  void KfdVersion(bool exception_debugging, bool core_dump) {
    kfd_version.supports_exception_debugging = exception_debugging;
    kfd_version.supports_core_dump = core_dump;
  }

  KfdVersion_t KfdVersion() const { return kfd_version; }

  bool VirtualMemApiSupported() const { return virtual_mem_api_supported_; }
  bool XnackEnabled() const { return xnack_enabled_; }
  void XnackEnabled(bool enable) { xnack_enabled_ = enable; }

  Driver &AgentDriver(DriverType drv_type) {
    auto is_drv_type = [&](const std::unique_ptr<Driver> &d) {
      return d->kernel_driver_type_ == drv_type;
    };

    auto driver(std::find_if(agent_drivers_.begin(), agent_drivers_.end(),
                             is_drv_type));

    if (driver == agent_drivers_.end()) {
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                               "Invalid agent device type, no driver found.");
    }

    return **driver;
  }

  /// @brief Check if the drivers of the agents are different.
  /// @param [in] agents Array of agents to check.
  /// @param [in] num_agents Number of agents in the array.
  /// @return True if the drivers of the agents are different, false otherwise.
  static bool IsDifferentDriver(Agent* agents, uint32_t num_agents) {
    if (num_agents == 0 || agents == nullptr) return true;

    auto first_driver_type = agents[0].driver().kernel_driver_type_;
    for (uint32_t i = 1; i < num_agents; ++i) {
      if (agents[i].driver().kernel_driver_type_ != first_driver_type) {
        return true;
      }
    }
    return false;
  }

  std::vector<std::unique_ptr<Driver>>& AgentDrivers() { return agent_drivers_; }

  static bool IsGPUDriver(DriverType driver_type) {
    return driver_type == core::DriverType::KFD
#ifdef HSAKMT_VIRTIO_ENABLED
        || driver_type == core::DriverType::KFD_VIRTIO
#endif
        ;
  }

 protected:
  static void AsyncEventsLoop(void*);
  static void AsyncIPCSockServerConnLoop(void*);

  struct AllocationRegion {
    AllocationRegion()
        : region(NULL),
          size(0),
          size_requested(0),
          alloc_flags(core::MemoryRegion::AllocateNoFlags),
          user_ptr(nullptr),
          ldrm_bo(NULL) {}
    AllocationRegion(const MemoryRegion* region_arg, size_t size_arg, size_t size_requested,
                     MemoryRegion::AllocateFlags alloc_flags)
        : region(region_arg),
          size(size_arg),
          size_requested(size_requested),
          alloc_flags(alloc_flags),
          user_ptr(nullptr),
          ldrm_bo(NULL) {}

    struct notifier_t {
      void* ptr;
      AMD::callback_t<hsa_amd_deallocation_callback_t> callback;
      void* user_data;
    };

    const MemoryRegion* region;
    size_t size;           /* actual size = align_up(size_requested, granularity) */
    size_t size_requested; /* size requested by user */
    MemoryRegion::AllocateFlags alloc_flags;
    void* user_ptr;
    std::unique_ptr<std::vector<notifier_t>> notifiers;
    amdgpu_bo_handle ldrm_bo;
  };

  struct AsyncEventsControl {
    AsyncEventsControl() : async_events_thread_(NULL) {}
    void Shutdown();

    hsa_signal_t wake;
    os::Thread async_events_thread_;
    HybridMutex lock;
    bool exit;
  };

  struct AsyncEvents {
    void PushBack(hsa_signal_t signal, hsa_signal_condition_t cond,
                  hsa_signal_value_t value, hsa_amd_signal_handler handler,
                  void* arg);

    void CopyIndex(size_t dst, size_t src);

    size_t Size();

    void PopBack();

    void Clear();

    std::vector<hsa_signal_t> signal_;
    std::vector<hsa_signal_condition_t> cond_;
    std::vector<hsa_signal_value_t> value_;
    std::vector<hsa_amd_signal_handler> handler_;
    std::vector<HsaEvent*> hsa_events_; //!< A list of HSA events for KFD wait
    std::vector<uint64_t> age_;         //!< The age list for KFD wait
    std::vector<void*> arg_;
  };

  struct PrefetchRange;
  typedef std::map<uintptr_t, PrefetchRange> prefetch_map_t;

  struct PrefetchOp {
    void* base;
    size_t size;
    uint32_t node_id;
    int remaining_deps;
    hsa_signal_t completion;
    std::vector<hsa_signal_t> dep_signals;
    prefetch_map_t::iterator prefetch_map_entry;
  };

  struct PrefetchRange {
    PrefetchRange() {}
    PrefetchRange(size_t Bytes, PrefetchOp* Op) : bytes(Bytes), op(Op) {}
    size_t bytes;
    PrefetchOp* op;
    prefetch_map_t::iterator prev;
    prefetch_map_t::iterator next;
  };

  // Will be created before any user could call hsa_init but also could be
  // destroyed before incorrectly written programs call hsa_shutdown.
  static __forceinline KernelMutex& bootstrap_lock() {
    // This allocation is meant to last until the last thread has exited.
    // It is intentionally not freed.
    static KernelMutex* bootstrap_lock_ = new KernelMutex;
    return *bootstrap_lock_;
  }
  Runtime();

  Runtime(const Runtime&);

  Runtime& operator=(const Runtime&);

  ~Runtime() {}

  /// @brief Open connection to kernel driver.
  hsa_status_t Load();

  /// @brief Close connection to kernel driver and cleanup resources.
  void Unload();

  /// @brief Dynamically load extension libraries (images, finalizer) and
  /// call OnLoad method on each loaded library.
  void LoadExtensions();

  /// @brief Call OnUnload method on each extension library then close it.
  void UnloadExtensions();

  /// @brief Dynamically load tool libraries and call OnUnload method on each
  /// loaded library.
  void LoadTools();

  /// @brief Call OnUnload method of each tool library.
  void UnloadTools();

  /// @brief Close tool libraries.
  void CloseTools();

  // @brief Binds Error handlers to this node.
  void BindErrorHandlers();

  // @brief Acquire snapshot of system event handlers.
  // Returns a copy to avoid holding a lock during callbacks.
  std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
  GetSystemEventHandlers();

  /// @brief Get the index of ::link_matrix_.
  /// @param [in] node_id_from Node id of the source node.
  /// @param [in] node_id_to Node id of the destination node.
  /// @retval Index in ::link_matrix_.
  uint32_t GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to);

  /// @brief Get most recently issued SVM prefetch agent for the range in question.
  Agent* GetSVMPrefetchAgent(void* ptr, size_t size);

  /// @brief Get the highest used node id.
  uint32_t max_node_id() const { return agents_by_node_.rbegin()->first; }

  // Mutex object to protect multithreaded access to ::allocation_map_.
  // Also ensures atomicity of pointer info queries by interlocking
  // KFD map/unmap, register/unregister, and access to hsaKmtQueryPointerInfo
  // registered & mapped arrays.
  KernelSharedMutex memory_lock_;

  // Array containing driver interfaces for compatible agent kernel-mode
  // drivers. Currently supports AIE agents.
  std::vector<std::unique_ptr<Driver>> agent_drivers_;

  // Array containing tools library handles.
  std::vector<os::LibHandle> tool_libs_;

  // Agent list containing all CPU agents in the platform.
  std::vector<Agent*> cpu_agents_;

  // Agent list containing all compatible GPU agents in the platform.
  std::vector<Agent*> gpu_agents_;

  // Agent list containing all compatible AIE agents in the platform.
  std::vector<Agent *> aie_agents_;

  // Agent list containing incompletely initialized GPU agents not to be used by the process.
  std::vector<Agent*> disabled_gpu_agents_;

  // Agent map containing all agents indexed by their KFD node IDs.
  std::map<uint32_t, std::vector<Agent*> > agents_by_node_;

  // Agent map containing all agents indexed by their KFD gpuid.
  std::map<uint32_t, Agent*> agents_by_gpuid_;

  // Agent list containing all compatible gpu agent ids in the platform.
  std::vector<uint32_t> gpu_ids_;

  // List of all fine grain system memory region in the platform.
  std::vector<const MemoryRegion*> system_regions_fine_;

  // List of all coarse grain system memory region in the platform.
  std::vector<const MemoryRegion*> system_regions_coarse_;

  // Matrix of IO link.
  std::vector<LinkInfo> link_matrix_;

  // Loader instance.
  amd::hsa::loader::Loader* loader_;

  // Loader context.
  amd::LoaderContext loader_context_;

  // Code object manager.
  amd::hsa::code::AmdHsaCodeManager code_manager_;

  // Contains the region, address, and size of previously allocated memory.
  std::map<const void*, AllocationRegion> allocation_map_;

  // Pending prefetch containers.
  KernelMutex prefetch_lock_;
  prefetch_map_t prefetch_map_;

  // Allocator using ::system_region_
  std::function<void*(size_t size, size_t align, MemoryRegion::AllocateFlags flags, int agent_node_id)> system_allocator_;

  // Deallocator using ::system_region_
  std::function<void(void*)> system_deallocator_;

  // Deprecated HSA Region API GPU (for legacy APU support only)
  Agent* region_gpu_;

  struct AsyncEventsInfo {
    AsyncEventsControl control;
    AsyncEvents events;
    AsyncEvents new_events;
    bool monitor_exceptions;
  };

  struct AsyncEventsInfo asyncSignals_;
  struct AsyncEventsInfo asyncExceptions_;

  // System clock frequency.
  uint64_t sys_clock_freq_;

  // Number of Numa Nodes
  size_t num_nodes_;

  // @brief AMD HSA event to monitor for virtual memory access fault.
  HsaEvent* vm_fault_event_;

  // @brief HSA signal to contain the VM fault event.
  Signal* vm_fault_signal_;

  // @brief AMD HSA event to monitor for HW exceptions.
  HsaEvent* hw_exception_event_;

  // @brief HSA signal to contain the HW exceptionevent.
  Signal* hw_exception_signal_;

  // Custom system event handlers.
  std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
      system_event_handlers_;

  // System event handler lock
  KernelMutex system_event_lock_;

  // Internal queue creation notifier
  AMD::callback_t<hsa_amd_runtime_queue_notifier> internal_queue_create_notifier_;

  void* internal_queue_create_notifier_user_data_;

  // Holds reference count to runtime object.
  std::atomic<uint32_t> ref_count_;

  // Track environment variables.
  Flag flag_;

  ThunkLoader* thunkLoader_;

  // Pools memory for SharedSignal (Signal ABI blocks)
  SharedSignalPool_t SharedSignalPool;

  // Pools KFD Events for InterruptSignal
  InterruptSignal::EventPool EventPool;

  // Kfd version
  KfdVersion_t kfd_version;

  std::unique_ptr<AMD::SvmProfileControl> svm_profile_;

  // IPC DMA buf unix domain socket server dmabuf FD passing
  int ipc_sock_server_fd_;
  std::map<uint64_t, int> ipc_sock_server_conns_;
  KernelMutex ipc_sock_server_lock_;

 private:
  void CheckVirtualMemApiSupport();
  int GetAmdgpuDeviceArgs(Agent *agent, ShareableHandle handle, int *drm_fd,
                          uint64_t *cpu_addr);

  bool virtual_mem_api_supported_;
  bool xnack_enabled_;

  typedef void* ThunkHandle;

  struct AddressHandle {
    AddressHandle() : os_addr(nullptr), size(0), use_count(0), registered(false) {}
    AddressHandle(void* addr, size_t _size, bool _registered) : os_addr(addr), size(_size), use_count(0), registered(_registered) {}

    // Address returned by OS. May be different from user address when adjusted for alignment
    void *os_addr;
    size_t size;
    int use_count;
    bool registered;
  };
  std::map<const void*, AddressHandle> reserved_address_map_;  // Indexed by VA

  struct MemoryHandle {
    MemoryHandle(const MemoryRegion* region, size_t size, uint64_t flags_unused,
                 ThunkHandle thunk_handle, MemoryRegion::AllocateFlags alloc_flag)
        : region(region),
          size(size),
          ref_count(1),
          use_count(0),
          thunk_handle(thunk_handle),
          alloc_flag(alloc_flag) {}

    static __forceinline hsa_amd_vmem_alloc_handle_t Convert(ThunkHandle handle) {
      hsa_amd_vmem_alloc_handle_t ret_handle = {
          static_cast<uint64_t>(reinterpret_cast<uintptr_t>(handle))};
      return ret_handle;
    }

    static __forceinline ThunkHandle Convert(hsa_amd_vmem_alloc_handle_t handle) {
      return reinterpret_cast<void*>(handle.handle);
    }

    __forceinline core::Agent* agentOwner() const { return region->owner(); }

    const MemoryRegion* region;
    size_t size;
    int ref_count;
    int use_count;
    ThunkHandle thunk_handle;  // handle returned by Driver::Allocate(NoAddress = 1)
    MemoryRegion::AllocateFlags alloc_flag;
  };
  std::map<ThunkHandle, MemoryHandle> memory_handle_map_;

  struct MappedHandle;
  struct MappedHandleAllowedAgent {
    MappedHandleAllowedAgent(MappedHandle* _mappedHandle, Agent* targetAgent, void* va, size_t size,
                             hsa_access_permission_t perms);
    ~MappedHandleAllowedAgent();

    hsa_status_t RemoveAccess();
    hsa_status_t EnableAccess(hsa_access_permission_t perms);

    void* va;
    size_t size;
    Agent* targetAgent;
    hsa_access_permission_t permissions;
    MappedHandle* mappedHandle;
    ShareableHandle shareable_handle;
  };

  struct MappedHandle {
    MappedHandle(MemoryHandle *mem_handle, AddressHandle *address_handle,
                 uint64_t offset, size_t size, int drm_fd, void *drm_cpu_addr,
                 hsa_access_permission_t perm, ShareableHandle shareable_handle)
        : mem_handle(mem_handle), address_handle(address_handle),
          offset(offset), size(size), drm_fd(drm_fd),
          drm_cpu_addr(drm_cpu_addr), shareable_handle(shareable_handle) {}

    __forceinline core::Agent* agentOwner() const { return mem_handle->region->owner(); }

    MemoryHandle* mem_handle;
    AddressHandle* address_handle;
    uint64_t offset;
    size_t size;
    int drm_fd;
    void* drm_cpu_addr;  // CPU Buffer address
    ShareableHandle shareable_handle;
    std::map<Agent*, MappedHandleAllowedAgent> allowed_agents;
  };
  std::map<const void*, MappedHandle> mapped_handle_map_;  // Indexed by VA

  hsa_status_t VMemoryMapAllowAccess(const void *va,
                                     hsa_access_permission_t perm,
                                     const hsa_agent_t *agents,
                                     size_t num_agents);
  hsa_status_t
  VMemorySetAccessPerHandle(void *va, MappedHandle &MappedHandle,
                            const hsa_amd_memory_access_desc_t *desc,
                            const size_t desc_cnt);

  void InitIPCDmaBufSupport();
  bool ipc_dmabuf_supported_;
  int  IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
                       amdgpu_bo_import_result *res,
                       unsigned int numNodes, HSAuint32 *nodes,
                       void **importAddress, HSAuint64 *importSize);
};

}  // namespace core
}  // namespace rocr
#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/scratch_cache.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_SCRATCH_CACHE_H_
#define HSA_RUNTIME_CORE_INC_SCRATCH_CACHE_H_

#include "core/util/locks.h"
#include "core/util/utils.h"

#include <map>
#include <functional>

namespace rocr {
namespace AMD {

class ScratchCache {
 public:
  struct node {
    enum STATE { FREE = 0, ALLOC = 1, TRIM = 2, STEAL = 4 };
    void* base;
    bool large;
    uint32_t state;

    node() : base(nullptr), large(false), state(FREE) {}

    bool isFree() const { return state == FREE; }
    bool trimPending() const { return state == (ALLOC | TRIM); }

    void trim() {
      assert(!isFree() && "Trim of free scratch node.");
      state |= TRIM;
    }
    void free() {
      assert(!isFree() && "Free of free scratch node.");
      state = FREE;
    }
    void alloc() {
      assert(isFree() && "Alloc of non-free scratch node.");
      state = ALLOC;
    }
  };

  typedef ::std::multimap<size_t, node> map_t;
  typedef map_t::iterator ref_t;
  typedef ::std::function<void(void*, size_t, bool)> deallocator_t;

  // @brief Contains scratch memory information.
  struct ScratchInfo {
    // Size to satisfy the present dispatch without throttling.
    size_t dispatch_size;
    uint64_t dispatch_slots;

    bool large;
    size_t use_once_limit;
    size_t use_alt_limit;
    bool async_reclaim;  // This version of CP FW supports async_reclaim
    bool retry;
    uint32_t mem_alignment_size;  // Populated into SRD
    bool cooperative;
    hsa_signal_t queue_retry;

    // Size to fill the main_scratch with size_per_thread
    size_t main_size;
    size_t main_size_per_thread;    // Populated into SRD
    uint32_t main_lanes_per_wave;   // Populated into SRD
    uint32_t main_waves_per_group;  // Used during waves reduction
    void* main_queue_base;
    ptrdiff_t main_queue_process_offset;
    ScratchCache::ref_t main_scratch_node;

    size_t alt_size;
    size_t alt_size_per_thread;    // Populated into SRD
    uint32_t alt_lanes_per_wave;   // Populated into SRD
    uint32_t alt_waves_per_group;  // Used during waves reduction

    uint64_t alt_dispatch_limit_x;
    uint64_t alt_dispatch_limit_y;
    uint64_t alt_dispatch_limit_z;
    void* alt_queue_base;
    ptrdiff_t alt_queue_process_offset;
    ScratchCache::ref_t alt_scratch_node;
  };

  ScratchCache(const ScratchCache& rhs) = delete;
  ScratchCache(ScratchCache&& rhs) = delete;
  ScratchCache& operator=(const ScratchCache& rhs) = delete;
  ScratchCache& operator=(ScratchCache&& rhs) = delete;

  ScratchCache(deallocator_t deallocator) : dealloc(std::move(deallocator)), available_bytes_(0) {}

  ~ScratchCache() { assert(map.empty() && "ScratchCache not empty at shutdown."); }

  bool allocMain(ScratchInfo& info) {
    ref_t it = map.upper_bound(info.main_size - 1);
    if (it == map.end()) return false;

    // Small requests must have an exact size match and be small.
    if (!info.large) {
      while ((it != map.end()) && (it->first == info.main_size)) {
        if (it->second.isFree() && (!it->second.large)) {
          it->second.alloc();
          info.main_queue_base = it->second.base;
          info.main_scratch_node = it;
          available_bytes_ -= it->first;
          return true;
        }
        it++;
      }
      return false;
    }

    // Large requests may use a small allocation and do not require an exact size match.
    while (it != map.end()) {
      if (it->second.isFree()) {
        it->second.alloc();
        info.main_queue_base = it->second.base;
        info.main_scratch_node = it;
        available_bytes_ -= it->first;
        return true;
      }
      it++;
    }
    return false;
  }

  void freeMain(ScratchInfo& info) {
    if (info.main_scratch_node == map.end()) {
      // This is reserved scratch memory. Do not de-allocate, just mark it as free.
      assert(!reserved_.second.isFree() && "free called when reserved node already free.");
      reserved_.second.free();
      available_bytes_ += reserved_.first;
      return;
    }

    assert(!info.main_scratch_node->second.isFree() && "free called on free scratch node.");
    auto it = info.main_scratch_node;
    if (it->second.trimPending()) {
      dealloc(it->second.base, it->first, it->second.large);
      map.erase(it);
      return;
    }
    it->second.free();
    available_bytes_ += it->first;
  }

  void insertMain(ScratchInfo& info) {
    node n;
    n.base = info.main_queue_base;
    n.large = info.large;
    n.alloc();

    auto it = map.insert(std::make_pair(info.main_size, n));
    info.main_scratch_node = it;
  }

  bool trim(bool trim_nodes_in_use) {
    bool ret = !map.empty();
    auto it = map.begin();
    while (it != map.end()) {
      if (it->second.isFree()) {
        available_bytes_ -= it->first;
        dealloc(it->second.base, it->first, it->second.large);
        auto temp = it;
        it++;
        map.erase(temp);
      } else {
        if (trim_nodes_in_use) it->second.trim();
        it++;
      }
    }
    return ret;
  }

  bool allocAlt(ScratchInfo& info) {
    ref_t it = map.upper_bound(info.alt_size - 1);
    if (it == map.end()) return false;

    // Alt requests should have exact size
    while ((it != map.end()) && (it->first == info.alt_size)) {
      if (it->second.isFree() && (!it->second.large)) {
        it->second.alloc();
        info.alt_queue_base = it->second.base;
        info.alt_scratch_node = it;
        available_bytes_ -= it->first;
        return true;
      }
      it++;
    }
    return false;
  }

  void freeAlt(ScratchInfo& info) {
    assert(!info.alt_scratch_node->second.isFree() && "free called on free scratch node.");
    auto it = info.alt_scratch_node;
    if (it->second.trimPending()) {
      dealloc(it->second.base, it->first, it->second.large);
      map.erase(it);
      return;
    }
    it->second.free();
    available_bytes_ += it->first;
  }

  void insertAlt(ScratchInfo& info) {
    node n;
    n.base = info.alt_queue_base;
    n.large = false;
    n.alloc();

    auto it = map.insert(std::make_pair(info.alt_size, n));
    info.alt_scratch_node = it;
  }

  size_t free_bytes() const { return available_bytes_; }
  size_t reserved_bytes() const { return reserved_.first; }

  void reserve(size_t bytes, void* base) {
    assert(!reserved_.first && "Already reserved memory.");

    node n;
    n.base = base;
    n.large = 0;

    available_bytes_ += bytes;

    reserved_ = std::make_pair(bytes, n);
  }

  bool use_reserved(ScratchInfo& info) {
    if (!reserved_.second.isFree() || info.main_size > reserved_.first) {
      debug_print("reserved node is already in use or too small (requested:%ld reserved:%ld)\n",
                  info.main_size, reserved_.first);
      return false;
    }
    reserved_.second.large = info.large;
    reserved_.second.alloc();
    info.main_queue_base = reserved_.second.base;
    // Special case to indicate that this node is reserved memory
    info.main_scratch_node = map.end();
    available_bytes_ -= reserved_.first;
    return true;
  }

  void free_reserve() {
    available_bytes_ -= reserved_.first;
    if (reserved_.first) dealloc(reserved_.second.base, reserved_.first, reserved_.second.large);

    reserved_.first = 0;
    reserved_.second.base = NULL;
    reserved_.second.large = 0;
  }

 private:
  map_t map;
  deallocator_t dealloc;
  size_t available_bytes_;

  std::pair<size_t, node> reserved_;
};

}  // namespace AMD
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/sdma_registers.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_SDMA_REGISTERS_H_
#define HSA_RUNTIME_CORE_INC_SDMA_REGISTERS_H_

#include <stddef.h>
#include <stdint.h>

namespace rocr {
namespace AMD {

// SDMA packet for VI device.
// Reference: http://people.freedesktop.org/~agd5f/dma_packets.txt

const unsigned int SDMA_OP_COPY = 1;
const unsigned int SDMA_OP_FENCE = 5;
const unsigned int SDMA_OP_TRAP = 6;
const unsigned int SDMA_OP_POLL_REGMEM = 8;
const unsigned int SDMA_OP_ATOMIC = 10;
const unsigned int SDMA_OP_CONST_FILL = 11;
const unsigned int SDMA_OP_TIMESTAMP = 13;
const unsigned int SDMA_OP_GCR = 17;
const unsigned int SDMA_SUBOP_COPY_LINEAR = 0;
const unsigned int SDMA_SUBOP_COPY_LINEAR_RECT = 4;
const unsigned int SDMA_SUBOP_TIMESTAMP_GET_GLOBAL = 2;
const unsigned int SDMA_SUBOP_USER_GCR = 1;
const unsigned int SDMA_ATOMIC_ADD64 = 47;

typedef struct SDMA_PKT_COPY_LINEAR_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int extra_info : 16;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int count : 22;
      unsigned int reserved_0 : 10;
    } count;
    struct {
      unsigned int count : 30;
      unsigned int reserved_0 : 2;
    } count_ext;
    unsigned int DW_1_DATA;
  } COUNT_UNION;

  union {
    struct {
      unsigned int reserved_0 : 16;
      unsigned int dst_swap : 2;
      unsigned int reserved_1 : 6;
      unsigned int src_swap : 2;
      unsigned int reserved_2 : 6;
    };
    unsigned int DW_2_DATA;
  } PARAMETER_UNION;

  union {
    struct {
      unsigned int src_addr_31_0 : 32;
    };
    unsigned int DW_3_DATA;
  } SRC_ADDR_LO_UNION;

  union {
    struct {
      unsigned int src_addr_63_32 : 32;
    };
    unsigned int DW_4_DATA;
  } SRC_ADDR_HI_UNION;

  union {
    struct {
      unsigned int dst_addr_31_0 : 32;
    };
    unsigned int DW_5_DATA;
  } DST_ADDR_LO_UNION;

  union {
    struct {
      unsigned int dst_addr_63_32 : 32;
    };
    unsigned int DW_6_DATA;
  } DST_ADDR_HI_UNION;

  static const size_t kMaxSize_ = 0x3fffe0;
} SDMA_PKT_COPY_LINEAR;

// linear sub-window (pre-GFX12)
typedef struct SDMA_PKT_COPY_LINEAR_RECT_TAG {
  static const unsigned int pitch_bits = 19;
  static const unsigned int slice_bits = 28;
  static const unsigned int rect_xy_bits = 14;
  static const unsigned int rect_z_bits = 11;

  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int reserved : 13;
      unsigned int element : 3;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int src_addr_31_0 : 32;
    };
    unsigned int DW_1_DATA;
  } SRC_ADDR_LO_UNION;

  union {
    struct {
      unsigned int src_addr_63_32 : 32;
    };
    unsigned int DW_2_DATA;
  } SRC_ADDR_HI_UNION;

  union {
    struct {
      unsigned int src_offset_x : 14;
      unsigned int reserved_1 : 2;
      unsigned int src_offset_y : 14;
      unsigned int reserved_2 : 2;
    };
    unsigned int DW_3_DATA;
  } SRC_PARAMETER_1_UNION;

  union {
    struct {
      unsigned int src_offset_z : 11;
      unsigned int reserved_1 : 2;
      unsigned int src_pitch : pitch_bits;
    };
    unsigned int DW_4_DATA;
  } SRC_PARAMETER_2_UNION;

  union {
    struct {
      unsigned int src_slice_pitch : slice_bits;
      unsigned int reserved_1 : 4;
    };
    unsigned int DW_5_DATA;
  } SRC_PARAMETER_3_UNION;

  union {
    struct {
      unsigned int dst_addr_31_0 : 32;
    };
    unsigned int DW_6_DATA;
  } DST_ADDR_LO_UNION;

  union {
    struct {
      unsigned int dst_addr_63_32 : 32;
    };
    unsigned int DW_7_DATA;
  } DST_ADDR_HI_UNION;

  union {
    struct {
      unsigned int dst_offset_x : 14;
      unsigned int reserved_1 : 2;
      unsigned int dst_offset_y : 14;
      unsigned int reserved_2 : 2;
    };
    unsigned int DW_8_DATA;
  } DST_PARAMETER_1_UNION;

  union {
    struct {
      unsigned int dst_offset_z : 11;
      unsigned int reserved_1 : 2;
      unsigned int dst_pitch : pitch_bits;
    };
    unsigned int DW_9_DATA;
  } DST_PARAMETER_2_UNION;

  union {
    struct {
      unsigned int dst_slice_pitch : slice_bits;
      unsigned int reserved_1 : 4;
    };
    unsigned int DW_10_DATA;
  } DST_PARAMETER_3_UNION;

  union {
    struct {
      unsigned int rect_x : rect_xy_bits;
      unsigned int reserved_1 : 2;
      unsigned int rect_y : rect_xy_bits;
      unsigned int reserved_2 : 2;
    };
    unsigned int DW_11_DATA;
  } RECT_PARAMETER_1_UNION;

  union {
    struct {
      unsigned int rect_z : rect_z_bits;
      unsigned int reserved_1 : 5;
      unsigned int dst_swap : 2;
      unsigned int reserved_2 : 6;
      unsigned int src_swap : 2;
      unsigned int reserved_3 : 6;
    };
    unsigned int DW_12_DATA;
  } RECT_PARAMETER_2_UNION;

} SDMA_PKT_COPY_LINEAR_RECT;

// linear sub-window (GFX12)
typedef struct SDMA_PKT_COPY_LINEAR_RECT_TAG_GFX12 {
  static const unsigned int pitch_bits   = 16;
  static const unsigned int slice_bits   = 32;
  static const unsigned int rect_xy_bits = 16;
  static const unsigned int rect_z_bits  = 14;

  union {
    struct {
      unsigned int op       :  8;
      unsigned int sub_op   :  8;
      unsigned int reserved : 13;
      unsigned int element  :  3;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int src_addr_31_0 : 32;
    };
    unsigned int DW_1_DATA;
  } SRC_ADDR_LO_UNION;

  union {
    struct {
      unsigned int src_addr_63_32 : 32;
    };
    unsigned int DW_2_DATA;
  } SRC_ADDR_HI_UNION;

  union {
    struct {
      unsigned int src_offset_x : 16;
      unsigned int src_offset_y : 16;
    };
    unsigned int DW_3_DATA;
  } SRC_PARAMETER_1_UNION;

  union {
    struct {
      unsigned int src_offset_z : 14;
      unsigned int reserved_1   : 2;
      unsigned int src_pitch    : pitch_bits;
    };
    unsigned int DW_4_DATA;
  } SRC_PARAMETER_2_UNION;

  union {
    struct {
      unsigned int src_slice_pitch : slice_bits;
    };
    unsigned int DW_5_DATA;
  } SRC_PARAMETER_3_UNION;

  union {
    struct {
      unsigned int dst_addr_31_0 : 32;
    };
    unsigned int DW_6_DATA;
  } DST_ADDR_LO_UNION;

  union {
    struct {
      unsigned int dst_addr_63_32 : 32;
    };
    unsigned int DW_7_DATA;
  } DST_ADDR_HI_UNION;

  union {
    struct {
      unsigned int dst_offset_x : 16;
      unsigned int dst_offset_y : 16;
    };
    unsigned int DW_8_DATA;
  } DST_PARAMETER_1_UNION;

  union {
    struct {
      unsigned int dst_offset_z : 14;
      unsigned int reserved_1   : 2;
      unsigned int dst_pitch    : pitch_bits;
    };
    unsigned int DW_9_DATA;
  } DST_PARAMETER_2_UNION;

  union {
    struct {
      unsigned int dst_slice_pitch : slice_bits;
    };
    unsigned int DW_10_DATA;
  } DST_PARAMETER_3_UNION;

  union {
    struct {
      unsigned int rect_x : rect_xy_bits;
      unsigned int rect_y : rect_xy_bits;
      };
    unsigned int DW_11_DATA;
  } RECT_PARAMETER_1_UNION;

  union {
    struct {
      unsigned int rect_z           : rect_z_bits;
      unsigned int reserved_1       : 6;
      unsigned int dst_cache_policy : 3;
      unsigned int reserved_2       : 5;
      unsigned int src_cache_policy : 3;
      unsigned int reserved_3       : 1;
    };
    unsigned int DW_12_DATA;
  } RECT_PARAMETER_2_UNION;

} SDMA_PKT_COPY_LINEAR_RECT_GFX12;

typedef struct SDMA_PKT_CONSTANT_FILL_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int sw : 2;
      unsigned int reserved_0 : 12;
      unsigned int fillsize : 2;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int dst_addr_31_0 : 32;
    };
    unsigned int DW_1_DATA;
  } DST_ADDR_LO_UNION;

  union {
    struct {
      unsigned int dst_addr_63_32 : 32;
    };
    unsigned int DW_2_DATA;
  } DST_ADDR_HI_UNION;

  union {
    struct {
      unsigned int src_data_31_0 : 32;
    };
    unsigned int DW_3_DATA;
  } DATA_UNION;

  union {
    struct {
      unsigned int count : 22;
      unsigned int reserved_0 : 10;
    };
    unsigned int DW_4_DATA;
  } COUNT_UNION;

  static const size_t kMaxSize_ = 0x3fffe0;
} SDMA_PKT_CONSTANT_FILL;

typedef struct SDMA_PKT_FENCE_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int mtype : 3;
      unsigned int gcc : 1;
      unsigned int sys : 1;
      unsigned int pad1 : 1;
      unsigned int snp : 1;
      unsigned int gpa : 1;
      unsigned int l2_policy : 2;
      unsigned int reserved_0 : 6;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int addr_31_0 : 32;
    };
    unsigned int DW_1_DATA;
  } ADDR_LO_UNION;

  union {
    struct {
      unsigned int addr_63_32 : 32;
    };
    unsigned int DW_2_DATA;
  } ADDR_HI_UNION;

  union {
    struct {
      unsigned int data : 32;
    };
    unsigned int DW_3_DATA;
  } DATA_UNION;
} SDMA_PKT_FENCE;

typedef struct SDMA_PKT_POLL_REGMEM_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int reserved_0 : 10;
      unsigned int hdp_flush : 1;
      unsigned int reserved_1 : 1;
      unsigned int func : 3;
      unsigned int mem_poll : 1;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int addr_31_0 : 32;
    };
    unsigned int DW_1_DATA;
  } ADDR_LO_UNION;

  union {
    struct {
      unsigned int addr_63_32 : 32;
    };
    unsigned int DW_2_DATA;
  } ADDR_HI_UNION;

  union {
    struct {
      unsigned int value : 32;
    };
    unsigned int DW_3_DATA;
  } VALUE_UNION;

  union {
    struct {
      unsigned int mask : 32;
    };
    unsigned int DW_4_DATA;
  } MASK_UNION;

  union {
    struct {
      unsigned int interval : 16;
      unsigned int retry_count : 12;
      unsigned int reserved_0 : 4;
    };
    unsigned int DW_5_DATA;
  } DW5_UNION;
} SDMA_PKT_POLL_REGMEM;

typedef struct SDMA_PKT_ATOMIC_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int l : 1;
      unsigned int reserved_0 : 8;
      unsigned int operation : 7;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int addr_31_0 : 32;
    };
    unsigned int DW_1_DATA;
  } ADDR_LO_UNION;

  union {
    struct {
      unsigned int addr_63_32 : 32;
    };
    unsigned int DW_2_DATA;
  } ADDR_HI_UNION;

  union {
    struct {
      unsigned int src_data_31_0 : 32;
    };
    unsigned int DW_3_DATA;
  } SRC_DATA_LO_UNION;

  union {
    struct {
      unsigned int src_data_63_32 : 32;
    };
    unsigned int DW_4_DATA;
  } SRC_DATA_HI_UNION;

  union {
    struct {
      unsigned int cmp_data_31_0 : 32;
    };
    unsigned int DW_5_DATA;
  } CMP_DATA_LO_UNION;

  union {
    struct {
      unsigned int cmp_data_63_32 : 32;
    };
    unsigned int DW_6_DATA;
  } CMP_DATA_HI_UNION;

  union {
    struct {
      unsigned int loop_interval : 13;
      unsigned int reserved_0 : 19;
    };
    unsigned int DW_7_DATA;
  } LOOP_UNION;
} SDMA_PKT_ATOMIC;

typedef struct SDMA_PKT_TIMESTAMP_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int reserved_0 : 16;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int addr_31_0 : 32;
    };
    unsigned int DW_1_DATA;
  } ADDR_LO_UNION;

  union {
    struct {
      unsigned int addr_63_32 : 32;
    };
    unsigned int DW_2_DATA;
  } ADDR_HI_UNION;

} SDMA_PKT_TIMESTAMP;

typedef struct SDMA_PKT_TRAP_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int reserved_0 : 16;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int int_ctx : 28;
      unsigned int reserved_1 : 4;
    };
    unsigned int DW_1_DATA;
  } INT_CONTEXT_UNION;
} SDMA_PKT_TRAP;

// HDP flush packet, no parameters.
typedef struct SDMA_PKT_HDP_FLUSH_TAG {
  unsigned int DW_0_DATA;
  unsigned int DW_1_DATA;
  unsigned int DW_2_DATA;
  unsigned int DW_3_DATA;
  unsigned int DW_4_DATA;
  unsigned int DW_5_DATA;

  // Version of gfx9 sDMA microcode introducing SDMA_PKT_HDP_FLUSH
  static const uint16_t kMinVersion_ = 0x1A5;
} SDMA_PKT_HDP_FLUSH;
static const SDMA_PKT_HDP_FLUSH hdp_flush_cmd = {0x8, 0x0, 0x80000000, 0x0, 0x0, 0x0};

typedef struct SDMA_PKT_GCR_TAG {
  union {
    struct {
      unsigned int op : 8;
      unsigned int sub_op : 8;
      unsigned int : 16;
    };
    unsigned int DW_0_DATA;
  } HEADER_UNION;

  union {
    struct {
      unsigned int : 7;
      unsigned int BaseVA_LO : 25;
    };
    unsigned int DW_1_DATA;
  } WORD1_UNION;

  union {
    struct {
      unsigned int BaseVA_HI : 16;
      unsigned int GCR_CONTROL_GLI_INV : 2;
      unsigned int GCR_CONTROL_GL1_RANGE : 2;
      unsigned int GCR_CONTROL_GLM_WB : 1;
      unsigned int GCR_CONTROL_GLM_INV : 1;
      unsigned int GCR_CONTROL_GLK_WB : 1;
      unsigned int GCR_CONTROL_GLK_INV : 1;
      unsigned int GCR_CONTROL_GLV_INV : 1;
      unsigned int GCR_CONTROL_GL1_INV : 1;
      unsigned int GCR_CONTROL_GL2_US : 1;
      unsigned int GCR_CONTROL_GL2_RANGE : 2;
      unsigned int GCR_CONTROL_GL2_DISCARD : 1;
      unsigned int GCR_CONTROL_GL2_INV : 1;
      unsigned int GCR_CONTROL_GL2_WB : 1;
    };
    unsigned int DW_2_DATA;
  } WORD2_UNION;

  union {
    struct {
      unsigned int GCR_CONTROL_RANGE_IS_PA : 1;
      unsigned int GCR_CONTROL_SEQ : 2;
      unsigned int : 4;
      unsigned int LimitVA_LO : 25;
    };
    unsigned int DW_3_DATA;
  } WORD3_UNION;

  union {
    struct {
      unsigned int LimitVA_HI : 16;
      unsigned int : 8;
      unsigned int VMID : 4;
      unsigned int : 4;
    };
    unsigned int DW_4_DATA;
  } WORD4_UNION;
} SDMA_PKT_GCR;

}  // namespace amd
}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_INC_SDMA_REGISTERS_H_


================================================
FILE: runtime/hsa-runtime/core/inc/signal.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA runtime C++ interface file.

#ifndef HSA_RUNTME_CORE_INC_SIGNAL_H_
#define HSA_RUNTME_CORE_INC_SIGNAL_H_

#include <map>
#include <functional>
#include <memory>
#include <vector>
#include <utility>

#include "hsakmt/hsakmt.h"

#include "core/common/shared.h"

#include "core/inc/checked.h"
#include "core/inc/exceptions.h"

#include "core/util/utils.h"
#include "core/util/locks.h"
#include "core/util/timer.h"

#include "inc/amd_hsa_signal.h"

#if defined(__i386__) || defined(__x86_64__)
#include <mwaitxintrin.h>
#ifndef MWAITX_ECX_TIMER_ENABLE
#define MWAITX_ECX_TIMER_ENABLE 0x2  // BIT(1)
#endif
#endif

// Allow hsa_signal_t to be keys in STL structures.
namespace std {
template <> struct less<hsa_signal_t> {
  __forceinline bool operator()(const hsa_signal_t& x, const hsa_signal_t& y) const {
    return x.handle < y.handle;
  }
  typedef hsa_signal_t first_argument_type;
  typedef hsa_signal_t second_argument_type;
  typedef bool result_type;
};
}

namespace rocr {
namespace timer {
inline timer::fast_clock::duration GetFastTimeout(uint64_t timeout) {
  uint64_t hsa_freq = 0;
  HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
  return timer::duration_from_seconds<timer::fast_clock::duration>(
      double(timeout) / double(hsa_freq));
}

inline void CheckAbortTimeout(const timer::fast_clock::time_point& start_time,
                            uint32_t signal_abort_timeout) {
  if (signal_abort_timeout) {
    const timer::fast_clock::duration abort_timeout =
        std::chrono::seconds(signal_abort_timeout);
    if (timer::fast_clock::now() - start_time > abort_timeout) {
      throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL,
                             "Signal wait abort timeout.\n");
    }
  }
}

inline void DoMwaitx(int64_t* addr, uint32_t timeout, bool timer_enable = false) {
#if defined(__i386__) || defined(__x86_64__)
  _mm_monitorx(addr, 0, 0);
  _mm_mwaitx(0, timeout, timer_enable ? MWAITX_ECX_TIMER_ENABLE : 0);
#endif
}
} // namespace timer

inline bool CheckSignalCondition(int64_t value, hsa_signal_condition_t condition,
                               hsa_signal_value_t compare_value) {
  switch (condition) {
    case HSA_SIGNAL_CONDITION_EQ:
      return value == compare_value;
    case HSA_SIGNAL_CONDITION_NE:
      return value != compare_value;
    case HSA_SIGNAL_CONDITION_GTE:
      return value >= compare_value;
    case HSA_SIGNAL_CONDITION_LT:
      return value < compare_value;
    default:
      return false;
  }
}

namespace core {
class Agent;
class Signal;

/// @brief ABI and object conversion struct for signals.  May be shared between processes.
struct SharedSignal {
  amd_signal_t amd_signal;
  uint64_t sdma_start_ts;
  Signal* core_signal;
  Check<0x71FCCA6A3D5D5276, true> id;
  uint8_t reserved[8];
  uint64_t sdma_end_ts;
  uint8_t reserved2[24];

  SharedSignal() :
    sdma_start_ts(0),
    reserved{},
    sdma_end_ts(0),
    reserved2{} {
    memset(&amd_signal, 0, sizeof(amd_signal));
    amd_signal.kind = AMD_SIGNAL_KIND_INVALID;
    core_signal = nullptr;
  }

  bool IsValid() const { return (Convert(this).handle != 0) && id.IsValid(); }

  bool IsIPC() const { return core_signal == nullptr; }

  void GetSdmaTsAddresses(uint64_t*& start, uint64_t*& end) {
    /*
    SDMA timestamps on gfx7xx/8xxx require 32 byte alignment (gfx9xx relaxes
    alignment to 8 bytes).  This conflicts with the frozen format for amd_signal_t
    so we place the time stamps in sdma_start/end_ts instead (amd_signal.start_ts
    is also properly aligned).  Reading of the timestamps occurs in GetRawTs().
    */
    start = &sdma_start_ts;
    end = &sdma_end_ts;
  }

  void CopyPrep() {
    // Clear sdma_end_ts before a copy so we can detect if the copy was done via
    // SDMA or blit kernel.
    sdma_start_ts = 0;
    sdma_end_ts = 0;
  }

  void GetRawTs(bool FetchCopyTs, uint64_t& start, uint64_t& end) {
    /*
    If the read is for a copy we need to check if it was done by blit kernel or SDMA.
    Since we clear sdma_start/end_ts during CopyPrep we know it was a SDMA copy if one
    of those is non-zero.  Otherwise return compute kernel stamps from amd_signal.
    */
    if (FetchCopyTs && sdma_end_ts != 0) {
      start = sdma_start_ts;
      end = sdma_end_ts;
      return;
    }
    start = amd_signal.start_ts;
    end = amd_signal.end_ts;
  }

  static __forceinline SharedSignal* Convert(hsa_signal_t signal) {
    SharedSignal* ret = reinterpret_cast<SharedSignal*>(static_cast<uintptr_t>(signal.handle) -
                                                        offsetof(SharedSignal, amd_signal));
    return ret;
  }

  static __forceinline hsa_signal_t Convert(const SharedSignal* signal) {
    assert(signal != nullptr && "Conversion on null Signal object.");
    const uint64_t handle = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&signal->amd_signal));
    const hsa_signal_t signal_handle = {handle};
    return signal_handle;
  }
};
static_assert(std::is_standard_layout<SharedSignal>::value,
              "SharedSignal must remain standard layout for IPC use.");
static_assert(std::is_trivially_destructible<SharedSignal>::value,
              "SharedSignal must not be modified on delete for IPC use.");
static_assert((offsetof(SharedSignal, sdma_start_ts) % 32) == 0,
              "Bad SDMA time stamp alignment.");
static_assert((offsetof(SharedSignal, sdma_end_ts) % 32) == 0,
              "Bad SDMA time stamp alignment.");
static_assert(sizeof(SharedSignal) == 128,
              "Bad SharedSignal size.");


#define SIGNAL_PREALLOC_BLOCKS 512 //16K Signals

/// @brief Pool class for SharedSignal suitable for use with Shared.
class SharedSignalPool_t : private BaseShared {
 public:
  SharedSignalPool_t() : block_size_(SIGNAL_PREALLOC_BLOCKS * minblock_) {}
  ~SharedSignalPool_t() { clear(); }

  SharedSignal* alloc();
  void free(SharedSignal* ptr);
  void clear();

 private:
  static const size_t minblock_ = 4096 / sizeof(SharedSignal);
  HybridMutex lock_;
  std::vector<SharedSignal*> free_list_;
  std::vector<std::pair<void*, size_t>> block_list_;
  size_t block_size_;
};

class LocalSignal {
 public:
  // Temporary, for legacy tools lib support.
  explicit LocalSignal(hsa_signal_value_t initial_value) {
    local_signal_.shared_object()->amd_signal.value = initial_value;
  }
  LocalSignal(hsa_signal_value_t initial_value, bool exportable);

  SharedSignal* signal() const { return local_signal_.shared_object(); }

 private:
  Shared<SharedSignal, SharedSignalPool_t> local_signal_;
};

/// @brief An abstract base class which helps implement the public hsa_signal_t
/// type (an opaque handle) and its associated APIs. At its core, signal uses
/// a 32 or 64 bit value. This value can be waitied on or signaled atomically
/// using specified memory ordering semantics.
class Signal {
 public:
  /// @brief Constructor Links and publishes the signal interface object.
  explicit Signal(SharedSignal* abi_block, bool enableIPC = false)
      : signal_(abi_block->amd_signal), async_copy_agent_(NULL), refcount_(1) {
    assert(abi_block != nullptr && "Signal abi_block must not be NULL");

    waiting_ = 0;
    retained_ = 1;

    if (enableIPC) {
      abi_block->core_signal = nullptr;
      registerIpc();
    } else {
      abi_block->core_signal = this;
    }
  }

  /// @brief Interface to discard a signal handle (hsa_signal_t)
  /// Decrements signal ref count and invokes doDestroySignal() when
  /// Signal is no longer in use.
  void DestroySignal() {
    // If handle is now invalid wake any retained sleepers.
    if (--refcount_ == 0) CasRelaxed(0, 0);
    // Release signal, last release will destroy the object.
    Release();
  }

  /// @brief Converts from this interface class to the public
  /// hsa_signal_t type - an opaque handle.
  static __forceinline hsa_signal_t Convert(Signal* signal) {
    assert(signal != nullptr && "Conversion on null Signal object.");
    const uint64_t handle = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&signal->signal_));
    const hsa_signal_t signal_handle = {handle};
    return signal_handle;
  }

  /// @brief Converts from this interface class to the public
  /// hsa_signal_t type - an opaque handle.
  static __forceinline const hsa_signal_t Convert(const Signal* signal) {
    assert(signal != nullptr && "Conversion on null Signal object.");
    const uint64_t handle = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&signal->signal_));
    const hsa_signal_t signal_handle = {handle};
    return signal_handle;
  }

  /// @brief Converts from public hsa_signal_t type (an opaque handle) to
  /// this interface class object.
  static __forceinline Signal* Convert(hsa_signal_t signal) {
    if (signal.handle == 0) throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "");
    SharedSignal* shared = SharedSignal::Convert(signal);
    if (!shared->IsValid())
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_SIGNAL, "Signal handle is invalid.");
    if (shared->IsIPC()) {
      Signal* ret = lookupIpc(signal);
      if (ret == nullptr)
        throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_SIGNAL, "Signal handle is invalid.");
      return ret;
    } else {
      return shared->core_signal;
    }
  }

  static Signal* DuplicateHandle(hsa_signal_t signal) {
    if (signal.handle == 0) return nullptr;
    SharedSignal* shared = SharedSignal::Convert(signal);

    if (!shared->IsIPC()) {
      if (!shared->IsValid()) return nullptr;
      shared->core_signal->refcount_++;
      shared->core_signal->Retain();
      return shared->core_signal;
    }

    // IPC signals may only be duplicated while holding the ipcMap lock.
    return duplicateIpc(signal);
  }

  bool IsValid() const { return refcount_ != 0; }

  bool __forceinline isIPC() const { return SharedSignal::Convert(Convert(this))->IsIPC(); }

  // Below are various methods corresponding to the APIs, which load/store the
  // signal value or modify the existing signal value automically and with
  // specified memory ordering semantics.
  virtual hsa_signal_value_t LoadRelaxed() = 0;
  virtual hsa_signal_value_t LoadAcquire() = 0;

  virtual void StoreRelaxed(hsa_signal_value_t value) = 0;
  virtual void StoreRelease(hsa_signal_value_t value) = 0;

  virtual hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition,
                                         hsa_signal_value_t compare_value,
                                         uint64_t timeout,
                                         hsa_wait_state_t wait_hint) = 0;
  virtual hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition,
                                         hsa_signal_value_t compare_value,
                                         uint64_t timeout,
                                         hsa_wait_state_t wait_hint) = 0;

  virtual void AndRelaxed(hsa_signal_value_t value) = 0;
  virtual void AndAcquire(hsa_signal_value_t value) = 0;
  virtual void AndRelease(hsa_signal_value_t value) = 0;
  virtual void AndAcqRel(hsa_signal_value_t value) = 0;

  virtual void OrRelaxed(hsa_signal_value_t value) = 0;
  virtual void OrAcquire(hsa_signal_value_t value) = 0;
  virtual void OrRelease(hsa_signal_value_t value) = 0;
  virtual void OrAcqRel(hsa_signal_value_t value) = 0;

  virtual void XorRelaxed(hsa_signal_value_t value) = 0;
  virtual void XorAcquire(hsa_signal_value_t value) = 0;
  virtual void XorRelease(hsa_signal_value_t value) = 0;
  virtual void XorAcqRel(hsa_signal_value_t value) = 0;

  virtual void AddRelaxed(hsa_signal_value_t value) = 0;
  virtual void AddAcquire(hsa_signal_value_t value) = 0;
  virtual void AddRelease(hsa_signal_value_t value) = 0;
  virtual void AddAcqRel(hsa_signal_value_t value) = 0;

  virtual void SubRelaxed(hsa_signal_value_t value) = 0;
  virtual void SubAcquire(hsa_signal_value_t value) = 0;
  virtual void SubRelease(hsa_signal_value_t value) = 0;
  virtual void SubAcqRel(hsa_signal_value_t value) = 0;

  virtual hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value) = 0;
  virtual hsa_signal_value_t ExchAcquire(hsa_signal_value_t value) = 0;
  virtual hsa_signal_value_t ExchRelease(hsa_signal_value_t value) = 0;
  virtual hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value) = 0;

  virtual hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
                                        hsa_signal_value_t value) = 0;
  virtual hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
                                        hsa_signal_value_t value) = 0;
  virtual hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
                                        hsa_signal_value_t value) = 0;
  virtual hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
                                       hsa_signal_value_t value) = 0;

  //-------------------------
  // implementation specific
  //-------------------------
  typedef void* rtti_t;

  /// @brief Returns the address of the value.
  virtual hsa_signal_value_t* ValueLocation() const = 0;

  /// @brief Applies only to InterrupEvent type, returns the event used to.
  /// Returns NULL for DefaultEvent Type.
  virtual HsaEvent* EopEvent() = 0;

  /// @brief Waits until multiple signals in the list satisfy their conditions
  /// or a timeout is reached.
  /// @param signal_count Number of hsa_signals in the list.
  /// @param hsa_signals Pointer to array of HSA signals.
  /// @param conds Pointer to array of signal conditions.
  /// @param values Pointer to array of signal values.
  /// @param timeout Timeout hint value.
  /// @param wait_hint Hint about wait state.
  /// @param satisfying_values Vector of satisfying values. If \p wait_on_all
  /// is false (then we are waiting on any signal in the list) this will contain
  /// only the first satisfying value.
  /// @param wait_on_all Wait on all signals in the list to satisfy their
  /// conditions if true, else wait on any signal in the list to satisfy its
  /// condition.
  /// @return Return the index of the first signal in the list that satisfies
  /// its condition or -1 on a timeout. Note that if \p wait_on_all is true,
  /// then all signals in the list satisfy their conditions, thus the index will
  /// always be 0.
  static uint32_t WaitMultiple(uint32_t signal_count, const hsa_signal_t* hsa_signals,
                               const hsa_signal_condition_t* conds,
                               const hsa_signal_value_t* values, uint64_t timeout,
                               hsa_wait_state_t wait_hint,
                               std::vector<hsa_signal_value_t>& satisfying_values,
                               bool wait_on_all);

  /// @brief Dedicated funtion to wait on signals that are not of type HSA_EVENTTYPE_SIGNAL
  /// these events can only be received by calling the underlying driver (i.e via the hsaKmtWaitOnMultipleEvents_Ext
  /// function call). We still need to have 1 signal of type HSA_EVENT_TYPE_SIGNAL attached to the list of signals
  /// to be able to force hsaKmtWaitOnMultipleEvents_Ext to return.
  /// @param signal_count Number of hsa_signals
  /// @param hsa_signals Pointer to array of signals. All signals should have a valid EopEvent()
  /// @param conds list of conditions
  /// @param values list of values
  /// @param satisfying_value value to be satisfied
  /// @return index of signal that satisfies condition
  static uint32_t WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals,
                         const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
                         hsa_signal_value_t* satisfying_value);

  __forceinline bool IsType(rtti_t id) { return _IsA(id); }

  /// @brief Prevents the signal from being destroyed until the matching Release().
  void Retain() { retained_++; }
  void Release();

  /// @brief Checks if signal is currently in use by a wait API.
  bool InWaiting() const { return waiting_ != 0; }

  /// @brief Increments the waiting indicator.
  void WaitingInc() { waiting_++; }

  /// @brief Decrements the waiting indicator.
  void WaitingDec() { waiting_--; }

  // Prep for copy profiling.  Store copy agent and ready API block.
  __forceinline void async_copy_agent(core::Agent* agent) {
    async_copy_agent_ = agent;
    core::SharedSignal::Convert(Convert(this))->CopyPrep();
  }

  __forceinline core::Agent* async_copy_agent() { return async_copy_agent_; }

  void GetSdmaTsAddresses(uint64_t*& start, uint64_t*& end) {
    core::SharedSignal::Convert(Convert(this))->GetSdmaTsAddresses(start, end);
  }

  // Set FetchCopyTs = true when reading time stamps from a copy operation.
  void GetRawTs(bool FetchCopyTs, uint64_t& start, uint64_t& end) {
    core::SharedSignal::Convert(Convert(this))->GetRawTs(FetchCopyTs, start, end);
  }

  /// @brief Structure which defines key signal elements like type and value.
  /// Address of this struct is used as a value for the opaque handle of type
  /// hsa_signal_t provided to the public API.
  amd_signal_t& signal_;

 protected:
  virtual ~Signal();

  /// @brief Overrideable deletion function
  virtual void doDestroySignal() { delete this; }

  /// @brief Simple RTTI type checking helper
  /// Returns true if the object can be converted to the query type via
  /// static_cast.
  /// Do not use directly.  Use IsType in the desired derived type instead.
  virtual bool _IsA(rtti_t id) const = 0;

  /// @variable Indicates number of runtime threads waiting on this signal.
  /// Value of zero means no waits.
  std::atomic<uint32_t> waiting_;

  /// @variable Pointer to agent used to perform an async copy.
  core::Agent* async_copy_agent_;

 private:
  static KernelMutex ipcLock_;
  static std::map<decltype(hsa_signal_t::handle), Signal*> ipcMap_;

  static Signal* lookupIpc(hsa_signal_t signal);
  static Signal* duplicateIpc(hsa_signal_t signal);

  /// @variable Ref count of this signal's handle (see IPC APIs)
  std::atomic<uint32_t> refcount_;

  /// @variable Count of handle references and Retain() calls for this handle (see IPC APIs)
  std::atomic<uint32_t> retained_;

  void registerIpc();
  bool deregisterIpc();

  DISALLOW_COPY_AND_ASSIGN(Signal);
};

/// @brief Handle signal operations which are not for use on doorbells.
class DoorbellSignal : public Signal {
 public:
  using Signal::Signal;

  /// @brief This operation is illegal
  hsa_signal_value_t LoadRelaxed() final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t LoadAcquire() final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
                                 uint64_t timeout, hsa_wait_state_t wait_hint) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
                                 uint64_t timeout, hsa_wait_state_t wait_hint) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  void AndRelaxed(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void AndAcquire(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void AndRelease(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void AndAcqRel(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void OrRelaxed(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void OrAcquire(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void OrRelease(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void OrAcqRel(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void XorRelaxed(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void XorAcquire(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void XorRelease(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void XorAcqRel(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void AddRelaxed(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void AddAcquire(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void AddRelease(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void AddAcqRel(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void SubRelaxed(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void SubAcquire(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void SubRelease(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  void SubAcqRel(hsa_signal_value_t value) final override { assert(false); }

  /// @brief This operation is illegal
  hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t ExchAcquire(hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t ExchRelease(hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected,
                                hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t CasAcquire(hsa_signal_value_t expected,
                                hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t CasRelease(hsa_signal_value_t expected,
                                hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected,
                               hsa_signal_value_t value) final override {
    assert(false);
    return 0;
  }

  /// @brief This operation is illegal
  hsa_signal_value_t* ValueLocation() const final override {
    assert(false);
    return NULL;
  }

  /// @brief This operation is illegal
  HsaEvent* EopEvent() final override {
    assert(false);
    return NULL;
  }

 protected:
  /// @brief Disallow destroying doorbell apart from its queue.
  void doDestroySignal() final override { assert(false); }
};

struct hsa_signal_handle {
  hsa_signal_t signal;

  hsa_signal_handle() {}
  hsa_signal_handle(hsa_signal_t Signal) { signal = Signal; }
  operator hsa_signal_t() { return signal; }
  Signal* operator->() { return core::Signal::Convert(signal); }
};
static_assert(
    sizeof(hsa_signal_handle) == sizeof(hsa_signal_t),
    "hsa_signal_handle and hsa_signal_t must have identical binary layout.");
static_assert(
    sizeof(hsa_signal_handle[2]) == sizeof(hsa_signal_t[2]),
    "hsa_signal_handle and hsa_signal_t must have identical binary layout.");

class SignalGroup : public Checked<0xBD35DDDD578F091> {
 public:
  static __forceinline hsa_signal_group_t Convert(SignalGroup* group) {
    const hsa_signal_group_t handle = {static_cast<uint64_t>(reinterpret_cast<uintptr_t>(group))};
    return handle;
  }
  static __forceinline SignalGroup* Convert(hsa_signal_group_t group) {
    return reinterpret_cast<SignalGroup*>(static_cast<uintptr_t>(group.handle));
  }

  SignalGroup(uint32_t num_signals, const hsa_signal_t* signals);
  ~SignalGroup() { delete[] signals; }

  bool IsValid() const {
    if (CheckedType::IsValid() && signals != NULL) return true;
    return false;
  }

  const hsa_signal_t* List() const { return signals; }
  uint32_t Count() const { return count; }

 private:
  hsa_signal_t* signals;
  const uint32_t count;
  DISALLOW_COPY_AND_ASSIGN(SignalGroup);
};

class SignalDeleter {
 public:
  void operator()(Signal* ptr) { ptr->DestroySignal(); }
};
using unique_signal_ptr = ::std::unique_ptr<core::Signal, SignalDeleter>;

}  // namespace core
}  // namespace rocr
#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/svm_profiler.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022-2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_INC_SVM_PROFILER_H_
#define HSA_RUNTME_CORE_INC_SVM_PROFILER_H_

#include <vector>
#include <string>
#include <thread>
#include "core/util/os.h"

namespace rocr {
namespace AMD {

    class SvmProfileControl {
    public:
      SvmProfileControl();
      ~SvmProfileControl();

    private:
      template <typename... Args> std::string format(const char* format, Args... arg);
      void PollSmi();
      static void PollSmiRun(void* profileControl);
      int event;
      bool exit;
      os::Thread poll_smi_thread_;
      std::vector<char> format_buffer;
    };

} // namespace AMD
} // namespace rocr
#endif // header guard


================================================
FILE: runtime/hsa-runtime/core/inc/thunk_loader.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_THUNK_LOADER_H
#define HSA_RUNTIME_CORE_INC_THUNK_LOADER_H

#include <amdgpu.h>
#include "hsakmt/hsakmttypes.h"

class DtifPlatform;
typedef DtifPlatform* (DtifCreateFunc)(const char*);
typedef void (DtifDestroyFunc)();

namespace rocr {
namespace core {

#define HSAKMT_DEF(function_name)   PFN##function_name
#define HSAKMT_PFN(function_name)   pfn_##function_name
#define HSAKMT_CALL(function_name)   core::Runtime::runtime_singleton_->thunkLoader()->pfn_##function_name

#define DRM_DEF(function_name)   PFN##function_name
#define DRM_PFN(function_name)   pfn_##function_name
#define DRM_CALL(function_name)   core::Runtime::runtime_singleton_->thunkLoader()->pfn_##function_name

class ThunkLoader {
  public:
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtOpenKFD))(void);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtCloseKFD))(void);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetVersion))(HsaVersionInfo* VersionInfo);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtAcquireSystemProperties))(HsaSystemProperties* SystemProperties);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtReleaseSystemProperties))(void);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetNodeProperties))(HSAuint32 NodeId, \
                                      HsaNodeProperties* NodeProperties);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetNodeMemoryProperties))(HSAuint32 NodeId, \
                                      HSAuint32 NumBanks, \
                                      HsaMemoryProperties* MemoryProperties);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetNodeCacheProperties))(HSAuint32 NodeId, \
                                      HSAuint32 ProcessorId, \
                                      HSAuint32 NumCaches, \
                                      HsaCacheProperties* CacheProperties);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetNodeIoLinkProperties))(HSAuint32 NodeId, \
                                      HSAuint32 NumIoLinks, \
                                      HsaIoLinkProperties* IoLinkProperties);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtCreateEvent))(HsaEventDescriptor* EventDesc, \
                                      bool ManualReset, \
                                      bool IsSignaled, \
                                      HsaEvent** Event);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDestroyEvent))(HsaEvent* Event);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSetEvent))(HsaEvent* Event);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtResetEvent))(HsaEvent* Event);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtQueryEventState))(HsaEvent* Event, \
                                      HSAuint32 Milliseconds);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtWaitOnEvent))(HsaEvent* Event, \
                                      HSAuint32 Milliseconds);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents))(HsaEvent* Events[], \
                                      HSAuint32 NumEvents, \
                                      bool WaitOnAll, \
                                      HSAuint32 Milliseconds);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtCreateQueue))(HSAuint32 NodeId, \
                                      HSA_QUEUE_TYPE Type, \
                                      HSAuint32 QueuePercentage, \
                                      HSA_QUEUE_PRIORITY Priority, \
                                      void* QueueAddress, \
                                      HSAuint64 QueueSizeInBytes, \
                                      HsaEvent* Event, \
                                      HsaQueueResource* QueueResource);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtCreateQueueExt))(HSAuint32 NodeId, \
                                      HSA_QUEUE_TYPE Type, \
                                      HSAuint32 QueuePercentage, \
                                      HSA_QUEUE_PRIORITY Priority, \
                                      HSAuint32 SdmaEngineId, \
                                      void* QueueAddress, \
                                      HSAuint64 QueueSizeInBytes, \
                                      HsaEvent* Event, \
                                      HsaQueueResource* QueueResource);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtUpdateQueue))( HSA_QUEUEID QueueId, \
                                      HSAuint32 QueuePercentage, \
                                      HSA_QUEUE_PRIORITY Priority, \
                                      void* QueueAddress, \
                                      HSAuint64 QueueSize, \
                                      HsaEvent* Event);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDestroyQueue))(HSA_QUEUEID QueueId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSetQueueCUMask))(HSA_QUEUEID QueueId, \
                                      HSAuint32 CUMaskCount, \
                                      HSAuint32* QueueCUMask);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSetMemoryPolicy))(HSAuint32 Node, \
                                      HSAuint32 DefaultPolicy, \
                                      HSAuint32 AlternatePolicy, \
                                      void* MemoryAddressAlternate, \
                                      HSAuint64 MemorySizeInBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtAllocMemory))(HSAuint32 PreferredNode, \
                                      HSAuint64 SizeInBytes, \
                                      HsaMemFlags MemFlags, \
                                      void** MemoryAddress);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtAllocMemoryAlign))(HSAuint32 PreferredNode, \
                                      HSAuint64 SizeInBytes, \
                                      HSAuint64 Alignment, \
                                      HsaMemFlags emFlags, \
                                      void** MemoryAddress);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtFreeMemory))(void* MemoryAddress, \
                                      HSAuint64 SizeInBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtAvailableMemory))(HSAuint32 Node, \
                                      HSAuint64 *AvailableBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRegisterMemory))(void* MemoryAddress, \
                                      HSAuint64 MemorySizeInBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRegisterMemoryToNodes))(void *MemoryAddress, \
                                      HSAuint64 MemorySizeInBytes, \
                                      HSAuint64 NumberOfNodes, \
                                      HSAuint32* NodeArray);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRegisterMemoryWithFlags))(void *MemoryAddress, \
                                      HSAuint64 MemorySizeInBytes, \
                                      HsaMemFlags MemFlags);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodes))(HSAuint64 GraphicsResourceHandle, \
                                      HsaGraphicsResourceInfo *GraphicsResourceInfo, \
                                      HSAuint64 NumberOfNodes, \
                                      HSAuint32* NodeArray);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodesExt))(HSAuint64 GraphicsResourceHandle, \
                                      HsaGraphicsResourceInfo *GraphicsResourceInfo, \
                                      HSAuint64 NumberOfNodes, \
                                      HSAuint32* NodeArray, \
                                      HSA_REGISTER_MEM_FLAGS RegisterFlags);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtShareMemory))(void *MemoryAddress, \
                                      HSAuint64 SizeInBytes, \
                                      HsaSharedMemoryHandle *SharedMemoryHandle);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRegisterSharedHandle))(const HsaSharedMemoryHandle *SharedMemoryHandle, \
                                      void **MemoryAddress, \
                                      HSAuint64 *SizeInBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRegisterSharedHandleToNodes))(const HsaSharedMemoryHandle *SharedMemoryHandle, \
                                      void **MemoryAddress, \
                                      HSAuint64 *SizeInBytes, \
                                      HSAuint64 NumberOfNodes, \
                                      HSAuint32* NodeArray);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtProcessVMRead))(HSAuint32 Pid, \
                                      HsaMemoryRange *LocalMemoryArray, \
                                      HSAuint64 LocalMemoryArrayCount, \
                                      HsaMemoryRange *RemoteMemoryArray, \
                                      HSAuint64 RemoteMemoryArrayCount, \
                                      HSAuint64 *SizeCopied);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtProcessVMWrite))(HSAuint32 Pid, \
                                      HsaMemoryRange *LocalMemoryArray, \
                                      HSAuint64 LocalMemoryArrayCount, \
                                      HsaMemoryRange *RemoteMemoryArray, \
                                      HSAuint64 RemoteMemoryArrayCount, \
                                      HSAuint64 *SizeCopied);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDeregisterMemory))(void* MemoryAddress);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMapMemoryToGPU))(void*  MemoryAddress, \
                                      HSAuint64 MemorySizeInBytes, \
                                      HSAuint64* AlternateVAGPU);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMapMemoryToGPUNodes))(void* MemoryAddress, \
                                      HSAuint64 MemorySizeInBytes, \
                                      HSAuint64* AlternateVAGPU, \
                                      HsaMemMapFlags MemMapFlags, \
                                      HSAuint64 NumberOfNodes, \
                                      HSAuint32* NodeArray);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtUnmapMemoryToGPU))(void* MemoryAddress);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgRegister))(HSAuint32 NodeId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgUnregister))(HSAuint32 NodeId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgWavefrontControl))(HSAuint32 NodeId, \
                                      HSA_DBG_WAVEOP Operand, \
                                      HSA_DBG_WAVEMODE Mode, \
                                      HSAuint32 TrapId, \
                                      HsaDbgWaveMessage* DbgWaveMsgRing);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgAddressWatch))(HSAuint32 NodeId, \
                                      HSAuint32 NumWatchPoints, \
                                      HSA_DBG_WATCH_MODE WatchMode[], \
                                      void* WatchAddress[], \
                                      HSAuint64 WatchMask[], \
                                      HsaEvent* WatchEvent[]);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgEnable))(void **runtime_info, \
                                      HSAuint32 *data_size);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgDisable))(void);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgGetDeviceData))(void **data, \
                                      HSAuint32 *n_entries, \
                                      HSAuint32 *entry_size);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDbgGetQueueData))(void **data, \
                                      HSAuint32 *n_entries, \
                                      HSAuint32 *entry_size, \
                                      bool suspend_queues);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetClockCounters))(HSAuint32 NodeId, \
                                      HsaClockCounters* Counters);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcGetCounterProperties))(HSAuint32 NodeId, \
                                      HsaCounterProperties** CounterProperties);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcRegisterTrace))(HSAuint32 NodeId, \
                                      HSAuint32 NumberOfCounters, \
                                      HsaCounter* Counters, \
                                      HsaPmcTraceRoot* TraceRoot);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcUnregisterTrace))(HSAuint32 NodeId, \
                                      HSATraceId TraceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcAcquireTraceAccess))(HSAuint32 NodeId, \
                                      HSATraceId TraceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcReleaseTraceAccess))(HSAuint32 NodeId, \
                                      HSATraceId TraceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcStartTrace))(HSATraceId TraceId, \
                                      void* TraceBuffer, \
                                      HSAuint64 TraceBufferSizeBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcQueryTrace))(HSATraceId TraceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPmcStopTrace))(HSATraceId TraceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMapGraphicHandle))(HSAuint32 NodeId, \
                                      HSAuint64 GraphicDeviceHandle, \
                                      HSAuint64 GraphicResourceHandle, \
                                      HSAuint64 GraphicResourceOffset, \
                                      HSAuint64 GraphicResourceSize, \
                                      HSAuint64* FlatMemoryAddress);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtUnmapGraphicHandle))(HSAuint32 NodeId, \
                                      HSAuint64 FlatMemoryAddress, \
                                      HSAuint64 SizeInBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSetTrapHandler))(HSAuint32 NodeId, \
                                      void* TrapHandlerBaseAddress, \
                                      HSAuint64 TrapHandlerSizeInBytes, \
                                      void* TrapBufferBaseAddress, \
                                      HSAuint64 TrapBufferSizeInBytes);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetTileConfig))(HSAuint32 NodeId, \
                                      HsaGpuTileConfig* config);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtQueryPointerInfo))(const void* Pointer, \
                                      HsaPointerInfo* PointerInfo);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSetMemoryUserData))(const void* Pointer,  \
                                      void* UserData);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetQueueInfo))(HSA_QUEUEID QueueId, \
                                      HsaQueueInfo *QueueInfo);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtAllocQueueGWS))(HSA_QUEUEID QueueId, \
                                      HSAuint32 nGWS, \
                                      HSAuint32 *firstGWS);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRuntimeEnable))(void* rDebug, \
                                      bool setupTtmp);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtRuntimeDisable))(void);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtCheckRuntimeDebugSupport))(void);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetRuntimeCapabilities))(HSAuint32 *caps_mask);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtDebugTrapIoctl))(struct kfd_ioctl_dbg_trap_args *arg, \
                                      HSA_QUEUEID *Queues, \
                                      HSAuint64 *DebugReturn);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSPMAcquire))(HSAuint32 PreferredNode);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSPMRelease))(HSAuint32 PreferredNode);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSPMSetDestBuffer))(HSAuint32 PreferredNode, \
                                      HSAuint32 SizeInBytes, \
                                      HSAuint32* timeout, \
                                      HSAuint32* SizeCopied, \
                                      void *DestMemoryAddress, \
                                      bool *isSPMDataLoss);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSVMSetAttr))(void *start_addr, \
                                      HSAuint64 size, \
                                      unsigned int nattr, \
                                      HSA_SVM_ATTRIBUTE *attrs);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSVMGetAttr))(void *start_addr, \
                                      HSAuint64 size, \
                                      unsigned int nattr, \
                                      HSA_SVM_ATTRIBUTE *attrs);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtSetXNACKMode))(HSAint32 enable);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetXNACKMode))(HSAint32 * enable);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtOpenSMI))(HSAuint32 NodeId, \
                                      int *fd);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtExportDMABufHandle))(void *MemoryAddress, \
                                      HSAuint64 MemorySizeInBytes, \
                                      int *DMABufFd, \
                                      HSAuint64 *Offset);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtWaitOnEvent_Ext))(HsaEvent* Event, \
                                      HSAuint32 Milliseconds, \
                                      uint64_t *event_age);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents_Ext))(HsaEvent* Events[], \
                                      HSAuint32 NumEvents, \
                                      bool WaitOnAll, \
                                      HSAuint32 Milliseconds, \
                                      uint64_t *event_age);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtReplaceAsanHeaderPage))(void *addr);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtReturnAsanHeaderPage))(void *addr);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtGetAMDGPUDeviceHandle))(HSAuint32 NodeId, \
                                      HsaAMDGPUDeviceHandle *DeviceHandle);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingQueryCapabilities))(HSAuint32 NodeId, \
                                      void *sample_info, \
                                      HSAuint32 sample_info_sz, \
                                      HSAuint32 *sz_needed);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingCreate))(HSAuint32 node_id, \
                                      HsaPcSamplingInfo *sample_info, \
                                      HsaPcSamplingTraceId *traceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingDestroy))(HSAuint32 NodeId, \
                                      HsaPcSamplingTraceId traceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingStart))(HSAuint32 NodeId, \
                                      HsaPcSamplingTraceId traceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingStop))(HSAuint32 NodeId, \
                                      HsaPcSamplingTraceId traceId);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingSupport))(void);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtModelEnabled))(bool* enable);
    typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtQueueRingDoorbell))(HSA_QUEUEID QueueId);

    /* drm API */
    typedef int (DRM_DEF(amdgpu_device_initialize))(int fd, \
                                      uint32_t *major_version, \
                                      uint32_t *minor_version, \
                                      amdgpu_device_handle *device_handle);

    typedef int (DRM_DEF(amdgpu_device_deinitialize))(amdgpu_device_handle device_handle);

    typedef int (DRM_DEF(amdgpu_query_gpu_info))(amdgpu_device_handle dev, \
                                      struct amdgpu_gpu_info *info);

    typedef int (DRM_DEF(amdgpu_bo_cpu_map))(amdgpu_bo_handle bo, \
                                      void **cpu);

    typedef int (DRM_DEF(amdgpu_bo_free))(amdgpu_bo_handle buf_handle);

    typedef int (DRM_DEF(amdgpu_bo_export))(amdgpu_bo_handle bo, \
                                      enum amdgpu_bo_handle_type type, \
                                      uint32_t *shared_handle);

    typedef int (DRM_DEF(amdgpu_bo_import))(amdgpu_device_handle dev, \
                                      enum amdgpu_bo_handle_type type, \
                                      uint32_t shared_handle, \
                                      struct amdgpu_bo_import_result *output);

    typedef int (DRM_DEF(amdgpu_bo_va_op))(amdgpu_bo_handle bo, \
                                      uint64_t offset, \
                                      uint64_t size, \
                                      uint64_t addr, \
                                      uint64_t flags, \
                                      uint32_t op);

    typedef int (DRM_DEF(drmCommandWriteRead))(int fd, \
                                      unsigned long drmCommandIndex, \
                                      void *data, \
                                      unsigned long size);

    ThunkLoader();
    ~ThunkLoader();

    void LoadThunkApiTable();
    bool CreateThunkInstance();
    bool DestroyThunkInstance();

    HSAKMT_DEF(hsaKmtOpenKFD)* HSAKMT_PFN(hsaKmtOpenKFD);
    HSAKMT_DEF(hsaKmtCloseKFD)* HSAKMT_PFN(hsaKmtCloseKFD);
    HSAKMT_DEF(hsaKmtGetVersion)* HSAKMT_PFN(hsaKmtGetVersion);
    HSAKMT_DEF(hsaKmtAcquireSystemProperties)* HSAKMT_PFN(hsaKmtAcquireSystemProperties);
    HSAKMT_DEF(hsaKmtReleaseSystemProperties)* HSAKMT_PFN(hsaKmtReleaseSystemProperties);
    HSAKMT_DEF(hsaKmtGetNodeProperties)* HSAKMT_PFN(hsaKmtGetNodeProperties);
    HSAKMT_DEF(hsaKmtGetNodeMemoryProperties)* HSAKMT_PFN(hsaKmtGetNodeMemoryProperties);
    HSAKMT_DEF(hsaKmtGetNodeCacheProperties)* HSAKMT_PFN(hsaKmtGetNodeCacheProperties);
    HSAKMT_DEF(hsaKmtGetNodeIoLinkProperties)* HSAKMT_PFN(hsaKmtGetNodeIoLinkProperties);
    HSAKMT_DEF(hsaKmtCreateEvent)* HSAKMT_PFN(hsaKmtCreateEvent);
    HSAKMT_DEF(hsaKmtDestroyEvent)* HSAKMT_PFN(hsaKmtDestroyEvent);
    HSAKMT_DEF(hsaKmtSetEvent)* HSAKMT_PFN(hsaKmtSetEvent);
    HSAKMT_DEF(hsaKmtResetEvent)* HSAKMT_PFN(hsaKmtResetEvent);
    HSAKMT_DEF(hsaKmtQueryEventState)* HSAKMT_PFN(hsaKmtQueryEventState);
    HSAKMT_DEF(hsaKmtWaitOnEvent)* HSAKMT_PFN(hsaKmtWaitOnEvent);
    HSAKMT_DEF(hsaKmtWaitOnMultipleEvents)* HSAKMT_PFN(hsaKmtWaitOnMultipleEvents);
    HSAKMT_DEF(hsaKmtCreateQueue)* HSAKMT_PFN(hsaKmtCreateQueue);
    HSAKMT_DEF(hsaKmtCreateQueueExt)* HSAKMT_PFN(hsaKmtCreateQueueExt);
    HSAKMT_DEF(hsaKmtUpdateQueue)* HSAKMT_PFN(hsaKmtUpdateQueue);
    HSAKMT_DEF(hsaKmtDestroyQueue)* HSAKMT_PFN(hsaKmtDestroyQueue);
    HSAKMT_DEF(hsaKmtSetQueueCUMask)* HSAKMT_PFN(hsaKmtSetQueueCUMask);
    HSAKMT_DEF(hsaKmtSetMemoryPolicy)* HSAKMT_PFN(hsaKmtSetMemoryPolicy);
    HSAKMT_DEF(hsaKmtAllocMemory)* HSAKMT_PFN(hsaKmtAllocMemory);
    HSAKMT_DEF(hsaKmtAllocMemoryAlign)* HSAKMT_PFN(hsaKmtAllocMemoryAlign);
    HSAKMT_DEF(hsaKmtFreeMemory)* HSAKMT_PFN(hsaKmtFreeMemory);
    HSAKMT_DEF(hsaKmtAvailableMemory)* HSAKMT_PFN(hsaKmtAvailableMemory);
    HSAKMT_DEF(hsaKmtRegisterMemory)* HSAKMT_PFN(hsaKmtRegisterMemory);
    HSAKMT_DEF(hsaKmtRegisterMemoryToNodes)* HSAKMT_PFN(hsaKmtRegisterMemoryToNodes);
    HSAKMT_DEF(hsaKmtRegisterMemoryWithFlags)* HSAKMT_PFN(hsaKmtRegisterMemoryWithFlags);
    HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodes)* HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodes);
    HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodesExt)* HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodesExt);
    HSAKMT_DEF(hsaKmtShareMemory)* HSAKMT_PFN(hsaKmtShareMemory);
    HSAKMT_DEF(hsaKmtRegisterSharedHandle)* HSAKMT_PFN(hsaKmtRegisterSharedHandle);
    HSAKMT_DEF(hsaKmtRegisterSharedHandleToNodes)* HSAKMT_PFN(hsaKmtRegisterSharedHandleToNodes);
    HSAKMT_DEF(hsaKmtProcessVMRead)* HSAKMT_PFN(hsaKmtProcessVMRead);
    HSAKMT_DEF(hsaKmtProcessVMWrite)* HSAKMT_PFN(hsaKmtProcessVMWrite);
    HSAKMT_DEF(hsaKmtDeregisterMemory)* HSAKMT_PFN(hsaKmtDeregisterMemory);
    HSAKMT_DEF(hsaKmtMapMemoryToGPU)* HSAKMT_PFN(hsaKmtMapMemoryToGPU);
    HSAKMT_DEF(hsaKmtMapMemoryToGPUNodes)* HSAKMT_PFN(hsaKmtMapMemoryToGPUNodes);
    HSAKMT_DEF(hsaKmtUnmapMemoryToGPU)* HSAKMT_PFN(hsaKmtUnmapMemoryToGPU);
    HSAKMT_DEF(hsaKmtDbgRegister)* HSAKMT_PFN(hsaKmtDbgRegister);
    HSAKMT_DEF(hsaKmtDbgUnregister)* HSAKMT_PFN(hsaKmtDbgUnregister);
    HSAKMT_DEF(hsaKmtDbgWavefrontControl)* HSAKMT_PFN(hsaKmtDbgWavefrontControl);
    HSAKMT_DEF(hsaKmtDbgAddressWatch)* HSAKMT_PFN(hsaKmtDbgAddressWatch);
    HSAKMT_DEF(hsaKmtDbgEnable)* HSAKMT_PFN(hsaKmtDbgEnable);
    HSAKMT_DEF(hsaKmtDbgDisable)* HSAKMT_PFN(hsaKmtDbgDisable);
    HSAKMT_DEF(hsaKmtDbgGetDeviceData)* HSAKMT_PFN(hsaKmtDbgGetDeviceData);
    HSAKMT_DEF(hsaKmtDbgGetQueueData)* HSAKMT_PFN(hsaKmtDbgGetQueueData);
    HSAKMT_DEF(hsaKmtGetClockCounters)* HSAKMT_PFN(hsaKmtGetClockCounters);
    HSAKMT_DEF(hsaKmtPmcGetCounterProperties)* HSAKMT_PFN(hsaKmtPmcGetCounterProperties);
    HSAKMT_DEF(hsaKmtPmcRegisterTrace)* HSAKMT_PFN(hsaKmtPmcRegisterTrace);
    HSAKMT_DEF(hsaKmtPmcUnregisterTrace)* HSAKMT_PFN(hsaKmtPmcUnregisterTrace);
    HSAKMT_DEF(hsaKmtPmcAcquireTraceAccess)* HSAKMT_PFN(hsaKmtPmcAcquireTraceAccess);
    HSAKMT_DEF(hsaKmtPmcReleaseTraceAccess)* HSAKMT_PFN(hsaKmtPmcReleaseTraceAccess);
    HSAKMT_DEF(hsaKmtPmcStartTrace)* HSAKMT_PFN(hsaKmtPmcStartTrace);
    HSAKMT_DEF(hsaKmtPmcQueryTrace)* HSAKMT_PFN(hsaKmtPmcQueryTrace);
    HSAKMT_DEF(hsaKmtPmcStopTrace)* HSAKMT_PFN(hsaKmtPmcStopTrace);
    HSAKMT_DEF(hsaKmtMapGraphicHandle)* HSAKMT_PFN(hsaKmtMapGraphicHandle);
    HSAKMT_DEF(hsaKmtUnmapGraphicHandle)* HSAKMT_PFN(hsaKmtUnmapGraphicHandle);
    HSAKMT_DEF(hsaKmtSetTrapHandler)* HSAKMT_PFN(hsaKmtSetTrapHandler);
    HSAKMT_DEF(hsaKmtGetTileConfig)* HSAKMT_PFN(hsaKmtGetTileConfig);
    HSAKMT_DEF(hsaKmtQueryPointerInfo)* HSAKMT_PFN(hsaKmtQueryPointerInfo);
    HSAKMT_DEF(hsaKmtSetMemoryUserData)* HSAKMT_PFN(hsaKmtSetMemoryUserData);
    HSAKMT_DEF(hsaKmtGetQueueInfo)* HSAKMT_PFN(hsaKmtGetQueueInfo);
    HSAKMT_DEF(hsaKmtAllocQueueGWS)* HSAKMT_PFN(hsaKmtAllocQueueGWS);
    HSAKMT_DEF(hsaKmtRuntimeEnable)* HSAKMT_PFN(hsaKmtRuntimeEnable);
    HSAKMT_DEF(hsaKmtRuntimeDisable)* HSAKMT_PFN(hsaKmtRuntimeDisable);
    HSAKMT_DEF(hsaKmtCheckRuntimeDebugSupport)* HSAKMT_PFN(hsaKmtCheckRuntimeDebugSupport);
    HSAKMT_DEF(hsaKmtGetRuntimeCapabilities)* HSAKMT_PFN(hsaKmtGetRuntimeCapabilities);
    HSAKMT_DEF(hsaKmtDebugTrapIoctl)* HSAKMT_PFN(hsaKmtDebugTrapIoctl);
    HSAKMT_DEF(hsaKmtSPMAcquire)* HSAKMT_PFN(hsaKmtSPMAcquire);
    HSAKMT_DEF(hsaKmtSPMRelease)* HSAKMT_PFN(hsaKmtSPMRelease);
    HSAKMT_DEF(hsaKmtSPMSetDestBuffer)* HSAKMT_PFN(hsaKmtSPMSetDestBuffer);
    HSAKMT_DEF(hsaKmtSVMSetAttr)* HSAKMT_PFN(hsaKmtSVMSetAttr);
    HSAKMT_DEF(hsaKmtSVMGetAttr)* HSAKMT_PFN(hsaKmtSVMGetAttr);
    HSAKMT_DEF(hsaKmtSetXNACKMode)* HSAKMT_PFN(hsaKmtSetXNACKMode);
    HSAKMT_DEF(hsaKmtGetXNACKMode)* HSAKMT_PFN(hsaKmtGetXNACKMode);
    HSAKMT_DEF(hsaKmtOpenSMI)* HSAKMT_PFN(hsaKmtOpenSMI);
    HSAKMT_DEF(hsaKmtExportDMABufHandle)* HSAKMT_PFN(hsaKmtExportDMABufHandle);
    HSAKMT_DEF(hsaKmtWaitOnEvent_Ext)* HSAKMT_PFN(hsaKmtWaitOnEvent_Ext);
    HSAKMT_DEF(hsaKmtWaitOnMultipleEvents_Ext)* HSAKMT_PFN(hsaKmtWaitOnMultipleEvents_Ext);
    HSAKMT_DEF(hsaKmtReplaceAsanHeaderPage)* HSAKMT_PFN(hsaKmtReplaceAsanHeaderPage);
    HSAKMT_DEF(hsaKmtReturnAsanHeaderPage)* HSAKMT_PFN(hsaKmtReturnAsanHeaderPage);
    HSAKMT_DEF(hsaKmtGetAMDGPUDeviceHandle)* HSAKMT_PFN(hsaKmtGetAMDGPUDeviceHandle);
    HSAKMT_DEF(hsaKmtPcSamplingQueryCapabilities)* HSAKMT_PFN(hsaKmtPcSamplingQueryCapabilities);
    HSAKMT_DEF(hsaKmtPcSamplingCreate)* HSAKMT_PFN(hsaKmtPcSamplingCreate);
    HSAKMT_DEF(hsaKmtPcSamplingDestroy)* HSAKMT_PFN(hsaKmtPcSamplingDestroy);
    HSAKMT_DEF(hsaKmtPcSamplingStart)* HSAKMT_PFN(hsaKmtPcSamplingStart);
    HSAKMT_DEF(hsaKmtPcSamplingStop)* HSAKMT_PFN(hsaKmtPcSamplingStop);
    HSAKMT_DEF(hsaKmtPcSamplingSupport)* HSAKMT_PFN(hsaKmtPcSamplingSupport);
    HSAKMT_DEF(hsaKmtModelEnabled)* HSAKMT_PFN(hsaKmtModelEnabled);
    HSAKMT_DEF(hsaKmtQueueRingDoorbell)* HSAKMT_PFN(hsaKmtQueueRingDoorbell);

    DRM_DEF(amdgpu_device_initialize)* DRM_PFN(amdgpu_device_initialize);
    DRM_DEF(amdgpu_device_deinitialize)* DRM_PFN(amdgpu_device_deinitialize);
    DRM_DEF(amdgpu_query_gpu_info)* DRM_PFN(amdgpu_query_gpu_info);
    DRM_DEF(amdgpu_bo_cpu_map)* DRM_PFN(amdgpu_bo_cpu_map);
    DRM_DEF(amdgpu_bo_free)* DRM_PFN(amdgpu_bo_free);
    DRM_DEF(amdgpu_bo_export)* DRM_PFN(amdgpu_bo_export);
    DRM_DEF(amdgpu_bo_import)* DRM_PFN(amdgpu_bo_import);
    DRM_DEF(amdgpu_bo_va_op)* DRM_PFN(amdgpu_bo_va_op);
    DRM_DEF(drmCommandWriteRead)* DRM_PFN(drmCommandWriteRead);

  private:
    void *dtif_handle;
};

}   //  namespace core
}   //  namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_aie_agent.h"

#include <cstring>
#include <functional>
#include <string>

#include "core/inc/amd_aie_aql_queue.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/amd_xdna_driver.h"
#include "core/inc/driver.h"
#include "core/inc/runtime.h"

namespace rocr {
namespace AMD {

AieAgent::AieAgent(uint32_t node, const HsaNodeProperties& node_props)
    : core::Agent(core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::XDNA), node,
                  core::Agent::DeviceType::kAmdAieDevice),
      node_props_(node_props) {
  InitRegionList();
  InitAllocators();
}

AieAgent::~AieAgent() {
  std::for_each(regions_.begin(), regions_.end(), DeleteObject());
  regions_.clear();
}

hsa_status_t AieAgent::VisitRegion(bool include_peer,
                                   hsa_status_t (*callback)(hsa_region_t region,
                                                            void *data),
                                   void *data) const {
  AMD::callback_t<decltype(callback)> call(callback);
  for (const auto r : regions_) {
    hsa_region_t region_handle(core::MemoryRegion::Convert(r));
    hsa_status_t err = call(region_handle, data);
    if (err != HSA_STATUS_SUCCESS) {
      return err;
    }
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t AieAgent::IterateRegion(
    hsa_status_t (*callback)(hsa_region_t region, void *data),
    void *data) const {
  return VisitRegion(false, callback, data);
}

hsa_status_t AieAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache,
                                                             void *data),
                                    void *data) const {
  // AIE has no caches.
  return HSA_STATUS_ERROR_INVALID_CACHE;
}

hsa_status_t AieAgent::IterateSupportedIsas(
                    hsa_status_t (*callback)(hsa_isa_t isa, void* data),
                                                          void* data) const {
  AMD::callback_t<decltype(callback)> call(callback);
  for (const auto& isa : supported_isas()) {
    hsa_status_t err = call(core::Isa::Handle(isa), data);
    if (err != HSA_STATUS_SUCCESS) return err;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t AieAgent::GetInfo(hsa_agent_info_t attribute, void *value) const {
  const size_t attribute_ = static_cast<size_t>(attribute);

  switch (attribute_) {
  case HSA_AGENT_INFO_NAME: {
    const std::string name_info_("aie2");
    assert(name_info_.size() < HSA_PUBLIC_NAME_SIZE);
    std::memset(value, 0, HSA_PUBLIC_NAME_SIZE);
    std::strncat(reinterpret_cast<char *>(value), name_info_.c_str(),
                 name_info_.size());
    break;
  }
  case HSA_AGENT_INFO_VENDOR_NAME: {
    const std::string vendor_name_info_("AMD");
    assert(vendor_name_info_.size() < HSA_PUBLIC_NAME_SIZE);
    std::memset(value, 0, HSA_PUBLIC_NAME_SIZE);
    std::strncat(reinterpret_cast<char *>(value), vendor_name_info_.c_str(),
                 vendor_name_info_.size());
    break;
  }
  case HSA_AGENT_INFO_FEATURE:
    *((hsa_agent_feature_t *)value) = HSA_AGENT_FEATURE_AGENT_DISPATCH;
    break;
  case HSA_AGENT_INFO_MACHINE_MODEL:
    *reinterpret_cast<hsa_machine_model_t *>(value) = HSA_MACHINE_MODEL_LARGE;
    break;
  case HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES:
  case HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE:
    // TODO: validate if this is true.
    *reinterpret_cast<hsa_default_float_rounding_mode_t *>(value) =
        HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR;
    break;
  case HSA_AGENT_INFO_PROFILE:
    *reinterpret_cast<hsa_profile_t *>(value) = profile_;
    break;
  case HSA_AGENT_INFO_WAVEFRONT_SIZE:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AGENT_INFO_WORKGROUP_MAX_DIM:
    std::memset(value, 0, sizeof(uint16_t) * 3);
    break;
  case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AGENT_INFO_GRID_MAX_DIM:
    std::memset(value, 0, sizeof(uint16_t) * 3);
    break;
  case HSA_AGENT_INFO_GRID_MAX_SIZE:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AGENT_INFO_FBARRIER_MAX_SIZE:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AGENT_INFO_QUEUES_MAX:
    *reinterpret_cast<uint32_t *>(value) = max_queues_;
    break;
  case HSA_AGENT_INFO_QUEUE_MIN_SIZE:
    *reinterpret_cast<uint32_t *>(value) = min_aql_size_;
    break;
  case HSA_AGENT_INFO_QUEUE_MAX_SIZE:
    *reinterpret_cast<uint32_t *>(value) = max_aql_size_;
    break;
  case HSA_AGENT_INFO_QUEUE_TYPE:
    *reinterpret_cast<hsa_queue_type32_t *>(value) = HSA_QUEUE_TYPE_SINGLE;
    break;
  case HSA_AGENT_INFO_NODE:
    *reinterpret_cast<uint32_t *>(value) = node_id();
    break;
  case HSA_AGENT_INFO_DEVICE:
    *reinterpret_cast<hsa_device_type_t *>(value) = HSA_DEVICE_TYPE_AIE;
    break;
  case HSA_AGENT_INFO_CACHE_SIZE:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AGENT_INFO_VERSION_MAJOR:
    *reinterpret_cast<uint32_t *>(value) = 1;
    break;
  case HSA_AGENT_INFO_VERSION_MINOR:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_CHIP_ID:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_CACHELINE_SIZE:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_DRIVER_NODE_ID:
    *reinterpret_cast<uint32_t *>(value) = node_id();
    break;
  case HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_BDFID:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
  case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_PRODUCT_NAME: {
    const std::string product_name_info_("AIE-ML");
    assert(product_name_info_.size() < HSA_PUBLIC_NAME_SIZE);
    std::memset(value, 0, HSA_PUBLIC_NAME_SIZE);
    std::strncat(reinterpret_cast<char *>(value), product_name_info_.c_str(),
                 product_name_info_.size());
    break;
  }
  case HSA_AMD_AGENT_INFO_UUID: {
    // At this point AIE devices do not support UUID's.
    char uuid_tmp[] = "AIE-XX";
    snprintf((char *)value, sizeof(uuid_tmp), "%s", uuid_tmp);
    break;
  }
  case HSA_AMD_AGENT_INFO_ASIC_REVISION:
    *reinterpret_cast<uint32_t *>(value) = 0;
    break;
  case HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS:
    assert(regions_.size() != 0 && "No device local memory found!");
    *reinterpret_cast<bool *>(value) = true;
    break;
  case HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES:
    std::memset(value, 0, sizeof(uint8_t) * 8);
    break;
  case HSA_AMD_AGENT_INFO_CLOCK_COUNTERS:
    std::memset(value, 0, sizeof(hsa_amd_clock_counters_t));
    break;
  default:
    *reinterpret_cast<uint32_t *>(value) = 0;
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t AieAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type, uint64_t flags,
                                   core::HsaEventCallback event_callback, void* data,
                                   uint32_t private_segment_size, uint32_t group_segment_size,
                                   core::Queue** queue) {
  if ((flags & HSA_AMD_QUEUE_CREATE_DEVICE_MEM_RING_BUF) != 0 ||
      (flags & HSA_AMD_QUEUE_CREATE_DEVICE_MEM_QUEUE_DESCRIPTOR) != 0) {
    // AIE agents do not currently support queue creation in device memory.
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (!IsPowerOfTwo(size)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (size < min_aql_size_ || size > max_aql_size_) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  core::SharedQueue* shared_queue =
      static_cast<core::SharedQueue*>(core::Runtime::runtime_singleton_->system_allocator()(
          sizeof(core::SharedQueue), MemoryRegion::GetPageSize(), 0, node_id()));

  if (!shared_queue) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  auto aql_queue(new AieAqlQueue(shared_queue, this, size, node_id(), flags));
  if (aql_queue == nullptr) {
    core::Runtime::runtime_singleton_->system_deallocator()(shared_queue);
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  *queue = aql_queue;

  return HSA_STATUS_SUCCESS;
}

void AieAgent::InitRegionList() {
  /// TODO: Find a way to set the other memory properties in a reasonable way.
  ///       This should be easier once the ROCt source is incorporated into the
  ///       ROCr source. Since the AIE itself currently has no memory regions of
  ///       its own all memory is just the system DRAM.
  const uint64_t total_system_memory = XdnaDriver::GetSystemMemoryByteSize();

  /// For allocating kernel arguments or other objects that only need
  /// system memory.
  HsaMemoryProperties sys_mem_props = {};
  sys_mem_props.HeapType = HSA_HEAPTYPE_SYSTEM;
  sys_mem_props.SizeInBytes = total_system_memory;

  /// For any other allocation, e.g., buffers.
  HsaMemoryProperties other_mem_props = {};
  other_mem_props.HeapType = HSA_HEAPTYPE_SYSTEM;
  other_mem_props.SizeInBytes = total_system_memory;

  /// For allocating memory for programmable device image (PDI) files. These
  /// need to be mapped to the device so the hardware can access the PDIs.
  HsaMemoryProperties dev_mem_props = {};
  dev_mem_props.HeapType = HSA_HEAPTYPE_DEVICE_SVM;
  dev_mem_props.SizeInBytes = XdnaDriver::GetDevHeapByteSize();

  /// As of now the AIE devices support coarse-grain memory regions that require
  /// explicit sync operations.
  regions_.reserve(3);
  regions_.push_back(
      new MemoryRegion(false, true, false, false, true, this, sys_mem_props));
  regions_.push_back(
      new MemoryRegion(false, false, false, false, true, this, dev_mem_props));
  regions_.push_back(new MemoryRegion(false, false, false, false, true, this,
                                      other_mem_props));
}

void AieAgent::InitAllocators() {
  for (const auto *region : regions()) {
    const MemoryRegion *amd_mem_region(
        static_cast<const MemoryRegion *>(region));
    if (amd_mem_region->kernarg()) {
      system_allocator_ =
          [region](size_t size, size_t align,
                   core::MemoryRegion::AllocateFlags alloc_flags) -> void * {
        void *mem(nullptr);
        return (core::Runtime::runtime_singleton_->AllocateMemory(
                    region, size, alloc_flags, &mem) == HSA_STATUS_SUCCESS)
                   ? mem
                   : nullptr;
      };

      system_deallocator_ = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
      break;
    }
  }
}

} // namespace AMD
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_aie_aql_queue.h"
#include "core/inc/amd_xdna_driver.h"

#ifdef __linux__
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#endif

#ifdef _WIN32
#include <Windows.h>
#endif

#include <atomic>
#include <cstring>

#include "core/inc/amd_xdna_driver.h"
#include "core/inc/queue.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"
#include "core/util/utils.h"

namespace rocr {
namespace AMD {

AieAqlQueue::AieAqlQueue(core::SharedQueue* shared_queue, AieAgent* agent, size_t req_size_pkts,
                         uint32_t node_id, uint64_t flags)
    : Queue(shared_queue, flags),
      LocalSignal(0, false),
      DoorbellSignal(signal()),
      agent_(*agent),
      active_(false) {
  if (agent_.device_type() != core::Agent::DeviceType::kAmdAieDevice) {
    throw AMD::hsa_exception(
        HSA_STATUS_ERROR_INVALID_AGENT,
        "Attempting to create an AIE queue on a non-AIE agent.");
  }
  queue_size_bytes_ = req_size_pkts * sizeof(core::AqlPacket);
  ring_buf_ = agent_.system_allocator()(queue_size_bytes_, 4096,
                                        core::MemoryRegion::AllocateNoFlags);

  if (!ring_buf_) {
    throw AMD::hsa_exception(
        HSA_STATUS_ERROR_INVALID_QUEUE_CREATION,
        "Could not allocate a ring buffer for an AIE queue.");
  }

  // Populate hsa_queue_t fields.
  amd_queue_.hsa_queue.type = HSA_QUEUE_TYPE_SINGLE;
  amd_queue_.hsa_queue.id = INVALID_QUEUEID;
  amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this);
  amd_queue_.hsa_queue.size = req_size_pkts;
  amd_queue_.hsa_queue.base_address = ring_buf_;
  // Populate AMD queue fields.
  amd_queue_.write_dispatch_id = 0;
  amd_queue_.read_dispatch_id = 0;

  signal_.hardware_doorbell_ptr = nullptr;
  signal_.kind = AMD_SIGNAL_KIND_DOORBELL;
  signal_.queue_ptr = &amd_queue_;
  active_ = true;

  HsaQueueResource queue_resource = {};
  hsa_status_t status =
      agent_.driver().CreateQueue(node_id, HSA_QUEUE_COMPUTE_AQL, 0, HSA_QUEUE_PRIORITY_NORMAL, 0,
                                  nullptr, queue_size_bytes_, nullptr, queue_resource);
  if (status != HSA_STATUS_SUCCESS) {
    throw AMD::hsa_exception(status, "Failed to create a hardware context for an AIE queue.");
  }

  queue_id_ = queue_resource.QueueId;
  amd_queue_.hsa_queue.id = GetQueueId();
}

AieAqlQueue::~AieAqlQueue() {
  AieAqlQueue::Inactivate();
  if (ring_buf_) {
    agent_.system_deallocator()(ring_buf_);
  }
  if (shared_queue_) {
    core::Runtime::runtime_singleton_->system_deallocator()(shared_queue_);
  }
}

hsa_status_t AieAqlQueue::Inactivate() {
  bool active(active_.exchange(false, std::memory_order_relaxed));
  hsa_status_t status(HSA_STATUS_SUCCESS);

  if (active) {
    agent_.driver().DestroyQueue(queue_id_);
  }

  return status;
}

hsa_status_t AieAqlQueue::SetPriority(HSA_QUEUE_PRIORITY priority) {
  return HSA_STATUS_SUCCESS;
}

void AieAqlQueue::Destroy() { delete this; }

// Atomic Reads/Writes
uint64_t AieAqlQueue::LoadReadIndexRelaxed() {
  return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed);
}

uint64_t AieAqlQueue::LoadReadIndexAcquire() {
  return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire);
}

uint64_t AieAqlQueue::LoadWriteIndexRelaxed() {
  return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed);
}

uint64_t AieAqlQueue::LoadWriteIndexAcquire() {
  return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire);
}

void AieAqlQueue::StoreWriteIndexRelaxed(uint64_t value) {
  atomic::Store(&amd_queue_.write_dispatch_id, value,
                std::memory_order_relaxed);
}

void AieAqlQueue::StoreWriteIndexRelease(uint64_t value) {
  atomic::Store(&amd_queue_.write_dispatch_id, value,
                std::memory_order_release);
}

uint64_t AieAqlQueue::CasWriteIndexRelaxed(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_relaxed);
}

uint64_t AieAqlQueue::CasWriteIndexAcquire(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_acquire);
}

uint64_t AieAqlQueue::CasWriteIndexRelease(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_release);
}

uint64_t AieAqlQueue::CasWriteIndexAcqRel(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_acq_rel);
}

uint64_t AieAqlQueue::AddWriteIndexRelaxed(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_relaxed);
}

uint64_t AieAqlQueue::AddWriteIndexAcquire(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_acquire);
}

uint64_t AieAqlQueue::AddWriteIndexRelease(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_release);
}

uint64_t AieAqlQueue::AddWriteIndexAcqRel(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_acq_rel);
}

void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) { SubmitPackets(); }

void AieAqlQueue::SubmitPackets() {
  if (!active_.load(std::memory_order_relaxed)) {
    return;
  }

  auto& driver = static_cast<XdnaDriver&>(agent_.driver());
  void* queue_base = amd_queue_.hsa_queue.base_address;

  uint64_t cur_id = LoadReadIndexRelaxed();
  const uint64_t end = LoadWriteIndexAcquire();
  while (cur_id < end) {
    auto* pkt = static_cast<hsa_amd_aie_ert_packet_t*>(queue_base) + cur_id;

    // Get the packet header information
    if (pkt->header.header != HSA_PACKET_TYPE_VENDOR_SPECIFIC ||
        pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT) {
      assert(false && "Invalid packet header");
    }

    // Get the payload information
    switch (pkt->opcode) {
      case HSA_AMD_AIE_ERT_START_CU: {
        // Iterating over future packets and seeing how many contiguous HSA_AMD_AIE_ERT_START_CU
        // packets there are. All can be combined into a single chain.
        uint64_t num_cont_start_cu_pkts = 1;
        for (uint64_t peak_pkt_id = cur_id + 1; peak_pkt_id < end; peak_pkt_id++) {
          auto* peak_pkt = static_cast<hsa_amd_aie_ert_packet_t*>(queue_base) + peak_pkt_id;
          if (peak_pkt->opcode != HSA_AMD_AIE_ERT_START_CU) {
            break;
          }
          num_cont_start_cu_pkts++;
        }

        // Call into the driver to submit from cur_id to write_dispatch_id.
        // Submitting the command chain might create a new hardware context.
        hsa_status_t status = driver.SubmitCmdChain(pkt, num_cont_start_cu_pkts, queue_id_,
                                                    agent_.properties().NumNeuralCores);
        if (status != HSA_STATUS_SUCCESS) {
          assert(false && "Could not submit packets");
        }

        cur_id += num_cont_start_cu_pkts;
        break;
      }
      default:
        break;
    }
  }

  atomic::Store(&amd_queue_.read_dispatch_id, cur_id, std::memory_order_release);
}

void AieAqlQueue::StoreRelease(hsa_signal_value_t value) {
  std::atomic_thread_fence(std::memory_order_release);
  StoreRelaxed(value);
}

hsa_status_t AieAqlQueue::GetInfo(hsa_queue_info_attribute_t attribute,
                                  void *value) {
  switch (attribute) {
    case HSA_AMD_QUEUE_INFO_AGENT:
      *static_cast<hsa_agent_t*>(value) = agent_.public_handle();
      break;
    case HSA_AMD_QUEUE_INFO_DOORBELL_ID:
      // Hardware doorbell supports AQL semantics.
      *static_cast<uint64_t*>(value) = reinterpret_cast<uint64_t>(signal_.hardware_doorbell_ptr);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t AieAqlQueue::GetCUMasking(uint32_t num_cu_mask_count,
                                       uint32_t *cu_mask) {
  assert(false && "AIE AQL queue does not support CU masking.");
  return HSA_STATUS_ERROR;
}

hsa_status_t AieAqlQueue::SetCUMasking(uint32_t num_cu_mask_count,
                                       const uint32_t *cu_mask) {
  assert(false && "AIE AQL queue does not support CU masking.");
  return HSA_STATUS_ERROR;
}

void AieAqlQueue::ExecutePM4(uint32_t *cmd_data, size_t cmd_size_b,
                             hsa_fence_scope_t acquireFence,
                             hsa_fence_scope_t releaseFence,
                             hsa_signal_t *signal) {
  assert(false && "AIE AQL queue does not support PM4 packets.");
}

} // namespace AMD
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_aql_queue.h"

#ifdef __linux__
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>
#endif

#ifdef _WIN32
#include <Windows.h>
#endif

#include <stdio.h>
#include <string.h>

#include "core/inc/runtime.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/signal.h"
#include "core/inc/queue.h"
#include "core/util/utils.h"
#include "core/inc/registers.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/default_signal.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "core/inc/amd_gpu_pm4.h"
#include "core/inc/hsa_amd_tool_int.hpp"
#include "core/inc/amd_core_dump.hpp"

namespace rocr {
namespace AMD {

#define SCRATCH_ALT_RATIO 4

AqlQueue::AqlQueue(core::SharedQueue* shared_queue, GpuAgent* agent, size_t req_size_pkts,
                   HSAuint32 node_id, ScratchInfo& scratch, core::HsaEventCallback callback,
                   void* err_data, uint64_t flags)
    : Queue(shared_queue, flags, !agent->is_xgmi_cpu_gpu()),
      LocalSignal(0, false),
      DoorbellSignal(signal()),
      ring_buf_(nullptr),
      ring_buf_alloc_bytes_(0),
      queue_id_(HSA_QUEUEID(-1)),
      active_(false),
      agent_(agent),
      queue_scratch_(scratch),
      errors_callback_(callback),
      errors_data_(err_data),
      pm4_ib_buf_(nullptr),
      pm4_ib_size_b_(0x1000),
      dynamicScratchState(0),
      exceptionState(0),
      suspended_(false),
      priority_(HSA_QUEUE_PRIORITY_NORMAL),
      exception_signal_(nullptr) {

  // Queue size is a function of several restrictions.
  const uint32_t min_pkts = ComputeRingBufferMinPkts();
  const uint32_t max_pkts = ComputeRingBufferMaxPkts();

  // Apply sizing constraints to the ring buffer.
  uint32_t queue_size_pkts = uint32_t(req_size_pkts);
  queue_size_pkts = Min(queue_size_pkts, max_pkts);
  queue_size_pkts = Max(queue_size_pkts, min_pkts);

  uint32_t queue_size_bytes = queue_size_pkts * sizeof(core::AqlPacket);
  if ((queue_size_bytes & (queue_size_bytes - 1)) != 0)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_QUEUE_CREATION,
                             "Requested queue with non-power of two packet capacity.\n");

  // Allocate the AQL packet ring buffer.
  AllocRegisteredRingBuffer(queue_size_pkts);
  if (ring_buf_ == nullptr) throw std::bad_alloc();
  MAKE_NAMED_SCOPE_GUARD(RingGuard, [&]() { FreeQueueMemory(); });

  // Fill the ring buffer with invalid packet headers.
  // Leave packet content uninitialized to help track errors.
  for (uint32_t pkt_id = 0; pkt_id < queue_size_pkts; ++pkt_id) {
    (((core::AqlPacket*)ring_buf_)[pkt_id]).dispatch.header = HSA_PACKET_TYPE_INVALID;
  }

  // Zero the amd_queue_ structure to clear RPTR/WPTR before queue attach.
  memset(&amd_queue_, 0, sizeof(amd_queue_));

  // Initialize and map a HW AQL queue.
  HsaQueueResource queue_rsrc = {0};
  queue_rsrc.Queue_read_ptr_aql = (uint64_t*)&amd_queue_.read_dispatch_id;

  // Hardware write pointer supports AQL semantics.
  queue_rsrc.Queue_write_ptr_aql = (uint64_t*)&amd_queue_.write_dispatch_id;

  // Populate amd_queue_ structure.
  amd_queue_.hsa_queue.type = HSA_QUEUE_TYPE_MULTI;
  amd_queue_.hsa_queue.features = HSA_QUEUE_FEATURE_KERNEL_DISPATCH;
  amd_queue_.hsa_queue.base_address = ring_buf_;
  amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this);
  amd_queue_.hsa_queue.size = queue_size_pkts;
  amd_queue_.hsa_queue.id = INVALID_QUEUEID;
  amd_queue_.read_dispatch_id_field_base_byte_offset = uint32_t(
      uintptr_t(&amd_queue_.read_dispatch_id) - uintptr_t(&amd_queue_));
  // Initialize the doorbell signal structure.
  memset(&signal_, 0, sizeof(signal_));
  signal_.kind = AMD_SIGNAL_KIND_DOORBELL;
  signal_.hardware_doorbell_ptr = nullptr;
  signal_.queue_ptr = &amd_queue_;

  const auto& props = agent->properties();
  amd_queue_.max_cu_id = (props.NumFComputeCores / props.NumSIMDPerCU) - 1;
  amd_queue_.max_wave_id = (props.MaxWavesPerSIMD * props.NumSIMDPerCU) - 1;

#ifdef HSA_LARGE_MODEL
  AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64,
                   1);
#else
  AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64,
                   0);
#endif

  // Set group and private memory apertures in amd_queue_.
  auto& regions = agent->regions();

  for (auto region : regions) {
    const MemoryRegion* amdregion = static_cast<const AMD::MemoryRegion*>(region);
    uint64_t base = amdregion->GetBaseAddress();

    if (amdregion->IsLDS()) {
#ifdef HSA_LARGE_MODEL
      amd_queue_.group_segment_aperture_base_hi =
          uint32_t(uintptr_t(base) >> 32);
#else
      amd_queue_.group_segment_aperture_base_hi = uint32_t(base);
#endif
    }

    if (amdregion->IsScratch()) {
#ifdef HSA_LARGE_MODEL
      amd_queue_.private_segment_aperture_base_hi =
          uint32_t(uintptr_t(base) >> 32);
#else
      amd_queue_.private_segment_aperture_base_hi = uint32_t(base);
#endif
    }
  }

  assert(amd_queue_.group_segment_aperture_base_hi != 0 && "No group region found.");

  if (core::Runtime::runtime_singleton_->flag().check_flat_scratch()) {
    assert(amd_queue_.private_segment_aperture_base_hi != 0 && "No private region found.");
  }

  if (agent_->supported_isas()[0]->GetMajorVersion() >= 11)
    queue_scratch_.mem_alignment_size = 256;
  else
    queue_scratch_.mem_alignment_size = 1024;

  queue_scratch_.use_once_limit = core::Runtime::runtime_singleton_->flag().scratch_single_limit();
  if (queue_scratch_.use_once_limit > agent_->MaxScratchDevice()) {
    fprintf(stdout, "User specified scratch limit exceeds device limits (requested:%lu max:%lu)!\n",
                    queue_scratch_.use_once_limit, agent_->MaxScratchDevice());
    queue_scratch_.use_once_limit = agent_->MaxScratchDevice();
  }

  queue_scratch_.use_alt_limit = 0;

  queue_scratch_.async_reclaim = agent_->AsyncScratchReclaimEnabled();
  if (queue_scratch_.async_reclaim) {
    queue_scratch_.use_once_limit = agent_->ScratchSingleLimitAsyncThreshold();
    queue_scratch_.use_alt_limit = core::Runtime::runtime_singleton_->flag().enable_scratch_alt()
        ? (queue_scratch_.use_once_limit / SCRATCH_ALT_RATIO)
        : 0;
  }

  MAKE_NAMED_SCOPE_GUARD(EventGuard, [&]() {
    ScopedAcquire<KernelMutex> _lock(&queue_lock());
    queue_count()--;
    if (queue_count() == 0) {
      core::InterruptSignal::DestroyEvent(queue_event());
      queue_event() = nullptr;
    }
  });

  MAKE_NAMED_SCOPE_GUARD(SignalGuard, [&]() {
    if (amd_queue_.queue_inactive_signal.handle != 0)
      HSA::hsa_signal_destroy(amd_queue_.queue_inactive_signal);
    if (exception_signal_ != nullptr) exception_signal_->DestroySignal();
  });

  if (core::g_use_interrupt_wait) {
    ScopedAcquire<KernelMutex> _lock(&queue_lock());
    queue_count()++;
    if (queue_event() == nullptr) {
      assert(queue_count() == 1 && "Inconsistency in queue event reference counting found.\n");

      queue_event() = core::InterruptSignal::CreateEvent(HSA_EVENTTYPE_SIGNAL, false);
      if (queue_event() == nullptr)
        throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                                 "Queue event creation failed.\n");
    }
    auto Signal = new core::InterruptSignal(0, queue_event());
    assert(Signal != nullptr && "Should have thrown!\n");
    amd_queue_.queue_inactive_signal = core::InterruptSignal::Convert(Signal);
    exception_signal_ = new core::InterruptSignal(0, queue_event());
    assert(exception_signal_ != nullptr && "Should have thrown!\n");
  } else {
    EventGuard.Dismiss();
    auto Signal = new core::DefaultSignal(0);
    assert(Signal != nullptr && "Should have thrown!\n");
    amd_queue_.queue_inactive_signal = core::DefaultSignal::Convert(Signal);
    exception_signal_ = new core::DefaultSignal(0);
    assert(exception_signal_ != nullptr && "Should have thrown!\n");
  }

  // Make sure the queue signal always has a waiting_ > 0 so that
  // so that we call hsakmtSetEvent to force hsaKmtWaitOnEvent to return.
  exception_signal_->WaitingInc();

  // Ensure the amd_queue_ is fully initialized before creating the KFD queue.
  // This ensures that the debugger can access the fields once it detects there
  // is a KFD queue. The debugger may access the aperture addresses, queue
  // scratch base, and queue type.

  hsa_status_t status;
  if (core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging) {
    queue_rsrc.ErrorReason = &exception_signal_->signal_.value;
    status =
        agent->driver().CreateQueue(node_id, HSA_QUEUE_COMPUTE_AQL, 100, priority_, 0, ring_buf_,
                                    ring_buf_alloc_bytes_, queue_event(), queue_rsrc);
  } else {
    status = agent->driver().CreateQueue(node_id, HSA_QUEUE_COMPUTE_AQL, 100, priority_, 0,
                                         ring_buf_, ring_buf_alloc_bytes_, NULL, queue_rsrc);
  }
  if (status != HSA_STATUS_SUCCESS)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                             "Queue create failed\n");
  // Complete populating the doorbell signal structure.
  signal_.hardware_doorbell_ptr = queue_rsrc.Queue_DoorBell_aql;

  // Bind Id of Queue such that is unique i.e. it is not re-used by another
  // queue (AQL, HOST) in the same process during its lifetime.
  amd_queue_.hsa_queue.id = this->GetQueueId();

  queue_id_ = queue_rsrc.QueueId;
  MAKE_NAMED_SCOPE_GUARD(QueueGuard, [&]() { agent_->driver().DestroyQueue(queue_id_); });

  amd_queue_.scratch_max_use_index = UINT64_MAX;
  amd_queue_.alt_scratch_max_use_index = UINT64_MAX;

  // Set flag to notify CP FW that SW supports the new amd_queue_v2
  if (agent_->AsyncScratchReclaimEnabled())
    amd_queue_.caps |= AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM;

  // On the first queue creation, reserve some scratch memory on this agent.
  agent_->ReserveScratch();

  // Initialize scratch memory related entities
  queue_scratch_.queue_retry = amd_queue_.queue_inactive_signal;
  InitScratchSRD();

  if (core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging) {
    if (AMD::hsa_amd_signal_async_handler(amd_queue_.queue_inactive_signal, HSA_SIGNAL_CONDITION_NE,
                                          0, DynamicQueueEventsHandler<false>,
                                          this) != HSA_STATUS_SUCCESS)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                               "Queue event handler failed registration.\n");
    if (AMD::hsa_amd_signal_async_handler(core::Signal::Convert(exception_signal_),
                                          HSA_SIGNAL_CONDITION_NE, 0, ExceptionHandler,
                                          this) != HSA_STATUS_SUCCESS)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                               "Queue event handler failed registration.\n");
  } else {
    if (AMD::hsa_amd_signal_async_handler(amd_queue_.queue_inactive_signal, HSA_SIGNAL_CONDITION_NE,
                                          0, DynamicQueueEventsHandler<true>,
                                          this) != HSA_STATUS_SUCCESS)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                               "Queue event handler failed registration.\n");
    exceptionState = ERROR_HANDLER_DONE;
  }

  // Allocate IB for icache flushes.
  pm4_ib_buf_ =
      agent_->system_allocator()(pm4_ib_size_b_, 0x1000, core::MemoryRegion::AllocateExecutable);
  if (pm4_ib_buf_ == nullptr)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "PM4 IB allocation failed.\n");

  MAKE_NAMED_SCOPE_GUARD(PM4IBGuard, [&]() { agent_->system_deallocator()(pm4_ib_buf_); });

  // Set initial CU mask
  if (!core::Runtime::runtime_singleton_->flag().cu_mask_skip_init()) SetCUMasking(0, nullptr);

  active_ = true;

  PM4IBGuard.Dismiss();
  RingGuard.Dismiss();
  QueueGuard.Dismiss();
  EventGuard.Dismiss();
  SignalGuard.Dismiss();
}

AqlQueue::~AqlQueue() {
  // Remove error handler synchronously.
  // Sequences error handler callbacks with queue destroy.
  dynamicScratchState |= ERROR_HANDLER_TERMINATE;
  while ((dynamicScratchState & ERROR_HANDLER_DONE) != ERROR_HANDLER_DONE) {
    HSA::hsa_signal_store_screlease(amd_queue_.queue_inactive_signal, 0x8000000000000000ull);
    HSA::hsa_signal_wait_relaxed(amd_queue_.queue_inactive_signal, HSA_SIGNAL_CONDITION_NE,
                                 0x8000000000000000ull, -1ull, HSA_WAIT_STATE_BLOCKED);
  }

  // Remove kfd exception handler
  if (core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging) {
    exceptionState |= ERROR_HANDLER_TERMINATE;
    while ((exceptionState & ERROR_HANDLER_DONE) != ERROR_HANDLER_DONE) {
      const uint64_t timeout_ms = 5000;

      exception_signal_->StoreRelease(-1ull);
      exception_signal_->WaitRelaxed(HSA_SIGNAL_CONDITION_NE, -1ull, timeout_ms,
                                     HSA_WAIT_STATE_BLOCKED);
    }
  }

  Inactivate();

  if (queue_scratch_.main_queue_base) {
    tool::notify_event_scratch_free_start(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);
    agent_->ReleaseQueueMainScratch(queue_scratch_);
    tool::notify_event_scratch_free_end(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);
  }
  if (queue_scratch_.alt_queue_base) {
    tool::notify_event_scratch_free_start(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
    agent_->ReleaseQueueAltScratch(queue_scratch_);
    tool::notify_event_scratch_free_end(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
  }

  exception_signal_->WaitingDec();
  exception_signal_->DestroySignal();
  HSA::hsa_signal_destroy(amd_queue_.queue_inactive_signal);
  FreeQueueMemory();

  if (core::g_use_interrupt_wait) {
    ScopedAcquire<KernelMutex> lock(&queue_lock());
    queue_count()--;
    if (queue_count() == 0) {
      core::InterruptSignal::DestroyEvent(queue_event());
      queue_event() = nullptr;
    }
  }
  agent_->system_deallocator()(pm4_ib_buf_);
}

void AqlQueue::Destroy() {
  if (amd_queue_.hsa_queue.type == HSA_QUEUE_TYPE_COOPERATIVE) {
    agent_->GWSRelease();
    return;
  }
  delete this;
}

uint64_t AqlQueue::LoadReadIndexAcquire() {
  return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_acquire);
}

uint64_t AqlQueue::LoadReadIndexRelaxed() {
  return atomic::Load(&amd_queue_.read_dispatch_id, std::memory_order_relaxed);
}

uint64_t AqlQueue::LoadWriteIndexAcquire() {
  return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_acquire);
}

uint64_t AqlQueue::LoadWriteIndexRelaxed() {
  return atomic::Load(&amd_queue_.write_dispatch_id, std::memory_order_relaxed);
}

void AqlQueue::StoreWriteIndexRelaxed(uint64_t value) {
  atomic::Store(&amd_queue_.write_dispatch_id, value,
                std::memory_order_relaxed);
}

void AqlQueue::StoreWriteIndexRelease(uint64_t value) {
  atomic::Store(&amd_queue_.write_dispatch_id, value,
                std::memory_order_release);
}

uint64_t AqlQueue::CasWriteIndexAcqRel(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_acq_rel);
}
uint64_t AqlQueue::CasWriteIndexAcquire(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_acquire);
}
uint64_t AqlQueue::CasWriteIndexRelaxed(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_relaxed);
}
uint64_t AqlQueue::CasWriteIndexRelease(uint64_t expected, uint64_t value) {
  return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected,
                     std::memory_order_release);
}

uint64_t AqlQueue::AddWriteIndexAcqRel(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_acq_rel);
}

uint64_t AqlQueue::AddWriteIndexAcquire(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_acquire);
}

uint64_t AqlQueue::AddWriteIndexRelaxed(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_relaxed);
}

uint64_t AqlQueue::AddWriteIndexRelease(uint64_t value) {
  return atomic::Add(&amd_queue_.write_dispatch_id, value,
                     std::memory_order_release);
}

void AqlQueue::StoreRelaxed(hsa_signal_value_t value) {
  if (core::Runtime::runtime_singleton_->flag().enable_dtif()) {
    HSAKMT_CALL(hsaKmtQueueRingDoorbell(queue_id_));
  } else {
    // Hardware doorbell supports AQL semantics.
    _mm_sfence();
    *(signal_.hardware_doorbell_ptr) = uint64_t(value);
    /* signal_ is allocated as uncached so we do not need read-back to flush WC */
  }
  return;
}

void AqlQueue::StoreRelease(hsa_signal_value_t value) {
  std::atomic_thread_fence(std::memory_order_release);
  StoreRelaxed(value);
}

hsa_status_t AqlQueue::GetInfo(hsa_queue_info_attribute_t attribute, void* value) {
  switch (attribute) {
    case HSA_AMD_QUEUE_INFO_AGENT:
      *(reinterpret_cast<hsa_agent_t*>(value)) = agent_->public_handle();
      break;
    case HSA_AMD_QUEUE_INFO_DOORBELL_ID:
      *(reinterpret_cast<uint64_t*>(value)) =
          reinterpret_cast<uint64_t>(signal_.hardware_doorbell_ptr);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
}

uint32_t AqlQueue::ComputeRingBufferMinPkts() {
  // From CP_HQD_PQ_CONTROL.QUEUE_SIZE specification:
  //   Size of the primary queue (PQ) will be: 2^(HQD_QUEUE_SIZE+1) DWs.
  //   Min Size is 7 (2^8 = 256 DWs) and max size is 29 (2^30 = 1 G-DW)
  uint32_t min_bytes = 0x400;

  return uint32_t(min_bytes / sizeof(core::AqlPacket));
}

uint32_t AqlQueue::ComputeRingBufferMaxPkts() {
  // From CP_HQD_PQ_CONTROL.QUEUE_SIZE specification:
  //   Size of the primary queue (PQ) will be: 2^(HQD_QUEUE_SIZE+1) DWs.
  //   Min Size is 7 (2^8 = 256 DWs) and max size is 29 (2^30 = 1 G-DW)
  uint64_t max_bytes = 0x100000000;

  return uint32_t(max_bytes / sizeof(core::AqlPacket));
}

void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
  // Allocate storage for the ring buffer.
  ring_buf_alloc_bytes_ = queue_size_pkts * sizeof(core::AqlPacket);
  assert(IsMultipleOf(ring_buf_alloc_bytes_, 4096) && "Ring buffer sizes must be 4KiB aligned.");

  if (IsDeviceMemRingBuf()) {
    if (!agent_->LargeBarEnabled()) {
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_QUEUE_CREATION,
                                "Trying to allocate an AQL ring buffer in device memory without "
                                "large BAR PCIe enabled.");
    }
    ring_buf_ = agent_->coarsegrain_allocator()(
        ring_buf_alloc_bytes_,
        core::MemoryRegion::AllocateExecutable | core::MemoryRegion::AllocateUncached);
  } else {
    ring_buf_ = agent_->system_allocator()(
        ring_buf_alloc_bytes_, 0x1000,
        core::MemoryRegion::AllocateExecutable);
  }

  assert(ring_buf_ != NULL && "AQL queue memory allocation failure");
}

void AqlQueue::FreeQueueMemory() {
  if (shared_queue_) {
    if (IsDeviceMemQueueDescriptor())
      agent_->coarsegrain_deallocator()(shared_queue_);
    else
      core::Runtime::runtime_singleton_->system_deallocator()(shared_queue_);

    shared_queue_ = nullptr;
  }

  if (ring_buf_) {
    if (IsDeviceMemRingBuf()) {
      agent_->coarsegrain_deallocator()(ring_buf_);
    } else {
      agent_->system_deallocator()(ring_buf_);
    }
  }

  ring_buf_ = NULL;
  ring_buf_alloc_bytes_ = 0;
}

void AqlQueue::CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const {
#ifdef __linux__
#if !defined(HAVE_MEMFD_CREATE)
  shm_unlink(ring_buf_shm_path);
#endif
  close(fd);
#else
  assert(false && "Function only needed on Linux.");
#endif
}

int AqlQueue::CreateRingBufferFD(const char* ring_buf_shm_path,
                                 uint32_t ring_buf_phys_size_bytes) const {
#ifdef __linux__
  int fd;
#ifdef HAVE_MEMFD_CREATE
  fd = syscall(__NR_memfd_create, ring_buf_shm_path, 0);

  if (fd == -1) return -1;

  if (ftruncate(fd, ring_buf_phys_size_bytes) == -1) {
    CloseRingBufferFD(ring_buf_shm_path, fd);
    return -1;
  }
#else
  fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL, S_IRUSR | S_IWUSR);

  if (fd == -1) return -1;

  if (posix_fallocate(fd, 0, ring_buf_phys_size_bytes) != 0) {
    CloseRingBufferFD(ring_buf_shm_path, fd);
    return -1;
  }
#endif
  return fd;
#else
  assert(false && "Function only needed on Linux.");
  return -1;
#endif
}

void AqlQueue::Suspend() {
  suspended_ = true;
  auto err =
      agent_->driver().UpdateQueue(queue_id_, 0, priority_, ring_buf_, ring_buf_alloc_bytes_, NULL);
  assert(err == HSA_STATUS_SUCCESS && "Update queue failed.");
}

void AqlQueue::Resume() {
  if (suspended_) {
    suspended_ = false;
    auto err = agent_->driver().UpdateQueue(queue_id_, 100, priority_, ring_buf_,
                                            ring_buf_alloc_bytes_, NULL);
    assert(err == HSA_STATUS_SUCCESS && "Update queue failed.");
  }
}

hsa_status_t AqlQueue::Inactivate() {
  bool active = active_.exchange(false, std::memory_order_relaxed);
  if (active) {
    auto err = agent_->driver().DestroyQueue(queue_id_);
    assert(err == HSA_STATUS_SUCCESS && "Destroy queue failed.");
    atomic::Fence(std::memory_order_acquire);
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t AqlQueue::SetPriority(HSA_QUEUE_PRIORITY priority) {
  if (suspended_) {
    return HSA_STATUS_ERROR_INVALID_QUEUE;
  }

  priority_ = priority;
  auto err = agent_->driver().UpdateQueue(queue_id_, 100, priority_, ring_buf_,
                                          ring_buf_alloc_bytes_, NULL);
  return (err == HSA_STATUS_SUCCESS ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_OUT_OF_RESOURCES);
}

void AqlQueue::CheckScratchLimits() {
  auto& scratch = queue_scratch_;
  if (!scratch.async_reclaim) return;

  scratch.use_once_limit = agent_->ScratchSingleLimitAsyncThreshold();
  scratch.use_alt_limit = core::Runtime::runtime_singleton_->flag().enable_scratch_alt()
      ? (scratch.use_once_limit / SCRATCH_ALT_RATIO)
      : 0;

  if (scratch.main_size > scratch.use_once_limit)
    AsyncReclaimMainScratch();

  if (scratch.alt_size > scratch.use_alt_limit)
    AsyncReclaimAltScratch();

  return;
}

void AqlQueue::FreeMainScratchSpace() {
  auto& scratch = queue_scratch_;
  if (queue_scratch_.main_queue_base) {
    tool::notify_event_scratch_free_start(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);
    agent_->ReleaseQueueMainScratch(scratch);
    tool::notify_event_scratch_free_end(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);
  }
  scratch.main_size = 0;
  scratch.main_size_per_thread = 0;
  scratch.main_queue_process_offset = 0;
  InitScratchSRD();
}

void AqlQueue::AsyncReclaimMainScratch() {
  /*
   * Pseudocode for scratch memory management when asynchronous scratch is
   * supported
   *
   * Notes:
   * - CP FW only updates its copy of amd_queue_ (scratch_copy) on queue_connect
   * so changes to amd_queue_ by ROCr are only visible to CP FW after a queue
   * re-map.
   *
   * - CP sets AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM bit to indicate that this version
   * of CP FW supports asynchronous scratch reclaim. But CP will only update
   * amd_queue_.caps on queue-connect so ROCr assumes that async scratch reclaim
   * is supported based on the CP FW version.
   *
   * - ROCR sets AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM bit to indicate to CP that this
   * version of FW supports asynchronous scratch and therefore CP is allowed to
   * access the extra fields that exist in amd_queue_v2.
   *
   * CP FW Pseudocode:
   * On doorbell-ring:
   * <start>
   *    Start processing AQL dispatch packet at read_index
   *    if (packet->private_segment_size > 0) {
   *      // This dispatch needs scratch
   *      if (packet->private_segment_size <= scratch_copy.scratch_wave64_lane_byte_size) {
   *         if (read_index <= scratch_max_use_index) {
   *           scratch_copy->scratch_last_used_index = current_index
   *           dispatch-uses-primary-scratch
   *           goto proceed-with-dispatch
   *         }
   *      } else if (packet->private_segment_size <= scratch_copy.alt_scratch_wave64_lane_byte_size
   *              && packet->grid_size_x <= scratch_copy.alt_scratch_dispatch_limit_x
   *              && packet->grid_size_y <= scratch_copy.alt_scratch_dispatch_limit_y
   *              && packet->grid_size_z <= scratch_copy.alt_scratch_dispatch_limit_z) {
   *         if (read_index <= alt_scratch_max_use_index) {
   *           scratch_copy->alt_scratch_last_used_index = current_index
   *           dispatch-uses-alternate-scratch
   *           goto proceed-with-dispatch
   *         }
   *      }
   *      request-more-scratch
   *    }
   *    goto proceed-with-dispatch
   * <end>
   *
   * On queue-connect:
   * <start>
   *    set AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM to indicate that this version of CP
   *    FW supports asynchronous scratch reclaim
   * <end>
   *
   * On queue-disconnect:
   * <start>
   *     // This guarantees that ROCr sees updated values of scratch_last_used_index
   *     // and alt_scratch_last_used_index after queue is unmapped.
   *     queue->scratch_last_used_index= scratch_copy->scratch_last_used_index
   *     queue->alt_scratch_last_used_index= scratch_copy->alt_scratch_last_used_index
   * <end>
   *
   * ROCr Pseudocode:
   * On init:
   *     queue->scratch_max_use_index = UINT64_MAX
   *     queue->alt_scratch_max_use_index = UINT64_MAX
   *
   * To reclaim scratch:
   * <start>
   *      // mutex blocks async-thread in case CP raises signal to request more scratch
   *     acquire(scratch-mutex)
   *     queue-unmap
   *     // Tell CP that it cannot use scratch after current packet
   *     queue->scratch_last_used_index = max(amd_queue_->scratch_last_used_index_per_xcc[])
   *
   *     queue-map
   *     // wait for CP to finish current packet
   *     while (queue->max_scratch_use_index >= queue->read_dispatch_id)
   *         sched_yield();
   *
   *     free-scratch
   *     release(scratch-mutex)
   * <end>
   */
  auto getMaxMainScratchUseIndex = [&]() {
    uint64_t max = 0;
    for (int i = 0; i < agent_->properties().NumXcc; i++) {
      if (amd_queue_.scratch_last_used_index[i].main > max)
        max = amd_queue_.scratch_last_used_index[i].main;
    }
    return max;
  };

  auto& scratch = queue_scratch_;
  if (!scratch.async_reclaim || !scratch.main_size) {
    return;
  }

  assert((amd_queue_.caps & AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM) &&
          "This version of CP FW should support async scratch, but flag is not set");

  tool::notify_event_scratch_async_reclaim_start(public_handle(),
                                                 HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);

  ScopedAcquire<KernelMutex> lock(&scratch_lock_);

  // Unmap the queue. CP will check amd_queue_ fields on re-map
  Suspend();

  /*
   * amd_queue_.scratch_last_used_index[*].main is updated by CP FW every time a
   * dispatch packet is launched and it needs scratch memory.
   * If amd_queue_.scratch_last_used_index[*].main >= amd_queue_.read_dispatch_id
   * then this XCC is currently running a dispatch that uses scratch.
   * Setting max_scratch_use_index to max(amd_queue_.scratch_last_used_index[*].main)
   * prevents CP from trying to use main-scratch after
   * amd_queue_.scratch_max_use_index. If CP sees a dispatch that needs scratch,
   * it will raise a new signal. CP may use alt-scratch in the meantime.
   */
  amd_queue_.scratch_max_use_index = getMaxMainScratchUseIndex();

  Resume();

  // If current dispatch is using scratch, wait for it to finish
  while (amd_queue_.scratch_max_use_index >= LoadReadIndexRelaxed()) {
    //TODO: if mwaitx supported, //mwaitx(amd_queue_.read_dispatch_id);
    os::YieldThread();
  }

  FreeMainScratchSpace();
  tool::notify_event_scratch_async_reclaim_end(public_handle(),
                                                HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);

  return;
}

void AqlQueue::FreeAltScratchSpace() {
  auto& scratch = queue_scratch_;
  if (queue_scratch_.alt_queue_base) {
    tool::notify_event_scratch_free_start(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
    agent_->ReleaseQueueAltScratch(scratch);
    tool::notify_event_scratch_free_end(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
  }
  scratch.alt_size = 0;
  scratch.alt_size_per_thread = 0;
  scratch.alt_queue_process_offset = 0;
  InitScratchSRD();
}

void AqlQueue::AsyncReclaimAltScratch() {
  /*
   * See AsyncReclaimMainScratch() for scratch reclaim handshake protocol with
   * CP FW.
   */
  auto getMaxAltScratchUseIndex = [&]() {
    uint64_t max = 0;
    for (int i = 0; i < agent_->properties().NumXcc; i++) {
      if (amd_queue_.scratch_last_used_index[i].alt > max)
        max = amd_queue_.scratch_last_used_index[i].alt;
    }
    return max;
  };

  auto& scratch = queue_scratch_;
  if (!scratch.async_reclaim || !scratch.alt_size) {
    return;
  }

  assert((amd_queue_.caps & AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM) &&
          "This version of CP FW should support async scratch, but flag is not set");

  tool::notify_event_scratch_async_reclaim_start(public_handle(),
                                                 HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);

  ScopedAcquire<KernelMutex> lock(&scratch_lock_);

  // Unmap the queue. CP will check amd_queue_ fields on re-map
  Suspend();

  amd_queue_.alt_scratch_max_use_index = getMaxAltScratchUseIndex();

  Resume();

  // If current dispatch is using alt scratch, wait for it to finish
  while (amd_queue_.alt_scratch_max_use_index >= LoadReadIndexRelaxed()) {
    //TODO: if mwaitx supported, //mwaitx(amd_queue_.read_dispatch_id);
    os::YieldThread();
  }

  FreeAltScratchSpace();
  tool::notify_event_scratch_async_reclaim_end(public_handle(),
                                                HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
  return;
}

void AqlQueue::HandleInsufficientScratch(hsa_signal_value_t& error_code,
                                         hsa_signal_value_t& waitVal, bool& changeWait) {
  // Insufficient scratch - recoverable, don't process dynamic scratch if errors are present.
  auto& scratch = queue_scratch_;

  /*******************************************************************************************
   * uint32_t max_scratch_slots;   // Maximum number of slots for this device based on num CUs
   * uint64_t dispatch_slots;      // Number of slots wanted for this dispatch
   *
   * uint64_t all_slots_size;      // Size needed to fill all slots on this device
   * uint64_t dispatch_size;       // Size needed to fill wanted slots for this dispatch
   *
   * //Default values:
   * size_t use_once_limit = 128 MB      // When async reclaim not supported
   *                                     // DEFAULT_SCRATCH_SINGLE_LIMIT
   *                       = 3GB per-XCC // When async reclaim is supported
   *                                     // DEFAULT_SCRATCH_SINGLE_LIMIT_ASYNC_PER_XCC
   *
   * size_t use_alt_limit  = 768 MB per-XCC // use_once_limit/SCRATCH_ALT_RATIO
   *
   * if (async-scratch-reclaim-supported
   *     && dispatch_slots < max_scratch_slots
   *     && dispatch_size < use_alt_limit) {
   *   // This dispatch wants less waves than number of slots, use alternate scratch
   *   // alt_tmpring_size will have limited waves
   *  use_alt()
   * } else if (all_slots_size <= use_once_limit) {
   *  use_main()
   *
   *  //If we failed to allocate memory to fill all slots, scratch.use_once will be set
   *  if (scratch.use_once) {
   *    use_once
   *  } else if (all_slots_size > scratch.alt_size) {
   *    //Primary scratch is large enough to handle needs of alt-scratch
   *    free_alt()
   *  }
   * }
   *
   *******************************************************************************************/

  core::AqlPacket *pkt = NULL;
  uint64_t dispatch_id = UINT64_MAX;

  auto get_dispatch_pkt = [&]() {
    dispatch_id = amd_queue_.read_dispatch_id;
    do {
      // On GPUs where EOP is handled in asic, the read_dispatch_id is not
      // updated after each packet so look for the first dispatch that needs
      // scratch
      const uint64_t pkt_slot_idx =
          dispatch_id & (amd_queue_.hsa_queue.size - 1);

      core::AqlPacket *dispatch_pkt =
          &((core::AqlPacket *)amd_queue_.hsa_queue.base_address)[pkt_slot_idx];
      if (dispatch_pkt->IsDispatchAndNeedsScratch()) return dispatch_pkt;

      dispatch_id++;
    } while (dispatch_id <= LoadWriteIndexRelaxed());

    return (core::AqlPacket *)NULL;
  };

  auto calc_dispatch_waves_per_group = [&](core::AqlPacket& pkt) {
    const uint64_t lanes_per_group =
        (uint64_t(pkt.dispatch.workgroup_size_x) * pkt.dispatch.workgroup_size_y) *
        pkt.dispatch.workgroup_size_z;

    const uint32_t lanes_per_wave = (error_code & 0x400) ? 32 : 64;
    return (lanes_per_group + lanes_per_wave - 1) / lanes_per_wave;
  };

  auto calc_dispatch_groups = [&](core::AqlPacket& pkt) {
    const uint64_t lanes_per_group =
        (uint64_t(pkt.dispatch.workgroup_size_x) * pkt.dispatch.workgroup_size_y) *
        pkt.dispatch.workgroup_size_z;

    uint64_t groups = ((uint64_t(pkt.dispatch.grid_size_x) + pkt.dispatch.workgroup_size_x - 1) /
                       pkt.dispatch.workgroup_size_x) *
                      ((uint64_t(pkt.dispatch.grid_size_y) + pkt.dispatch.workgroup_size_y - 1) /
                       pkt.dispatch.workgroup_size_y) *
                      ((uint64_t(pkt.dispatch.grid_size_z) + pkt.dispatch.workgroup_size_z - 1) /
                       pkt.dispatch.workgroup_size_z);
    const uint32_t cu_count = amd_queue_.max_cu_id + 1;

    const uint32_t engines = agent_->properties().NumShaderBanks;

    const uint32_t symmetric_cus = AlignDown(cu_count, engines);
    const uint32_t asymmetryPerRound = cu_count - symmetric_cus;
    const uint64_t rounds = groups / cu_count;
    const uint64_t asymmetricGroups = rounds * asymmetryPerRound;
    const uint64_t symmetricGroups = groups - asymmetricGroups;
    uint64_t maxGroupsPerEngine =
        ((symmetricGroups + engines - 1) / engines) + (asymmetryPerRound ? rounds : 0);

    // For gfx10+ devices we must attempt to assign the smaller of 256 lanes or 16 groups to each
    // engine.
    if (agent_->supported_isas()[0]->GetMajorVersion() >= 10 &&
        maxGroupsPerEngine < 16 &&
                              lanes_per_group * maxGroupsPerEngine < 256) {
      uint64_t groups_per_interleave = (256 + lanes_per_group - 1) / lanes_per_group;
      maxGroupsPerEngine = Min(groups_per_interleave, 16ul);
    }

    // Populate all engines at max group occupancy, then clip down to device limits.
    return maxGroupsPerEngine * engines;
  };

  // TODO: Move this to queue constructor since it does not depend on pkt, must be re-computed if
  // CU Masking is enabled
  auto calc_device_slots = [&]() {
    // Get the hw maximum scratch slot count taking into consideration asymmetric harvest.
    const uint32_t engines = agent_->properties().NumShaderBanks;
    const uint32_t cu_count = amd_queue_.max_cu_id + 1;
    return AlignUp(cu_count, engines) * agent_->properties().MaxSlotsScratchCU;
  };

  assert(core::Runtime::runtime_singleton_->flag().enable_scratch_async_reclaim() &&
         (!scratch.async_reclaim || (amd_queue_.caps & AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM)) &&
          "Asynchronous scratch reclaim capability not set, but this FW version should support it");

  scratch.cooperative = (amd_queue_.hsa_queue.type == HSA_QUEUE_TYPE_COOPERATIVE);

  pkt = get_dispatch_pkt(); // Sets dispatch_id
  assert((pkt && dispatch_id != UINT64_MAX) &&
         "Could not find dispatch packet with private_segment_size > 0");

  tool::notify_event_scratch_alloc_start(
      public_handle(), HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE, dispatch_id);

  uint32_t device_slots = calc_device_slots();
  uint32_t groups = calc_dispatch_groups(*pkt);
  uint32_t waves_per_group = calc_dispatch_waves_per_group(*pkt);

  uint32_t dispatch_slots = groups * waves_per_group;
  dispatch_slots = std::min(dispatch_slots, device_slots);

  const uint64_t lanes_per_wave = (error_code & 0x400) ? 32 : 64;

  const uint64_t size_per_thread =
      AlignUp(pkt->dispatch.private_segment_size,
              scratch.mem_alignment_size / lanes_per_wave);
  const uint64_t device_size = size_per_thread * lanes_per_wave * device_slots;
  const uint64_t dispatch_size = size_per_thread * lanes_per_wave * dispatch_slots;

  ScopedAcquire<KernelMutex> lock(&scratch_lock_);

  // scratch.use_alt_limit will be 0 if alt scratch is not supported or disabled
  if (dispatch_size < scratch.use_alt_limit && dispatch_slots < device_slots) {
    // Try to use ALT scratch
    if (scratch.alt_queue_base) {
      tool::notify_event_scratch_free_start(public_handle(),
                                HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
      agent_->ReleaseQueueAltScratch(scratch);
      tool::notify_event_scratch_free_end(public_handle(),
                                HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
    }

    scratch.alt_size = dispatch_size;
    scratch.alt_size_per_thread = size_per_thread;
    scratch.alt_lanes_per_wave = lanes_per_wave;
    scratch.alt_waves_per_group = waves_per_group;

    agent_->AcquireQueueAltScratch(scratch);
    if (scratch.alt_queue_base) {
      scratch.alt_dispatch_limit_x = pkt->dispatch.grid_size_x;
      scratch.alt_dispatch_limit_y = pkt->dispatch.grid_size_y;
      scratch.alt_dispatch_limit_z = pkt->dispatch.grid_size_z;

      InitScratchSRD();
      /*
       * Indicate to CP FW that any dispatch may use alt scratch memory.
       * If ROCr wants to reclain scratch memory, it will set
       * amd_queue_.alt_scratch_max_use_index to a lower value
       */
      amd_queue_.alt_scratch_max_use_index = UINT64_MAX;
      // Restart the queue.
      HSA::hsa_signal_store_screlease(amd_queue_.queue_inactive_signal, 0);
      tool::notify_event_scratch_alloc_end(public_handle(), HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT,
                                           dispatch_id, scratch.alt_size, dispatch_slots);
      return;
    }
    // Could not allocate enough memory for alternate scratch fallback to primary scratch
    scratch.alt_size = 0;
    scratch.alt_size_per_thread = 0;
  }

  // Use PRIMARY scratch
  if (scratch.main_queue_base) {
    tool::notify_event_scratch_free_start(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);
    agent_->ReleaseQueueMainScratch(scratch);
    tool::notify_event_scratch_free_end(public_handle(),
                              HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE);
  }

  scratch.main_size = device_size;
  scratch.main_size_per_thread = size_per_thread;
  scratch.main_lanes_per_wave = lanes_per_wave;
  scratch.main_waves_per_group = waves_per_group;

  scratch.dispatch_size = dispatch_size;
  scratch.dispatch_slots = dispatch_slots;

  agent_->AcquireQueueMainScratch(scratch);

  if (scratch.retry) {
    dynamicScratchState |= ERROR_HANDLER_SCRATCH_RETRY;
    changeWait = true;
    waitVal = error_code;
  } else if (scratch.main_queue_base == nullptr) {
    // We could not allocate memory to fit even 1 wave
    tool::notify_event_scratch_alloc_end(public_handle(), HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE,
                                         dispatch_id, scratch.main_size, dispatch_slots);
    return;
  }

  // If we had to reduce number of waves
  if (scratch.large) {
    amd_queue_.queue_properties |= AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE;
    // Set system release fence to flush scratch stores with older firmware versions.
    if ((agent_->supported_isas()[0]->GetMajorVersion() == 8) && (agent_->GetMicrocodeVersion() < 729)) {
      pkt->dispatch.header &=
          ~(((1 << HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE) - 1)
            << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
      pkt->dispatch.header |=
          (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
    }
  } else if (scratch.alt_size && scratch.main_size > scratch.alt_size) {
    // Not using use-scratch-once, and dispatches that would fit in alt-scratch would also fit in
    // main scratch. No need for alt-scratch.
    tool::notify_event_scratch_async_reclaim_start(public_handle(),
                                                 HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
    FreeAltScratchSpace();
    tool::notify_event_scratch_async_reclaim_end(public_handle(),
                                                 HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT);
  }

  // Reset scratch memory related entities for the queue
  InitScratchSRD();
  /*
   * Indicate to CP FW that any dispatch may use alt scratch memory.
   * If ROCr wants to reclain scratch memory, it will set
   * amd_queue_.alt_scratch_max_use_index to a lower value
   */
  amd_queue_.scratch_max_use_index = UINT64_MAX;

  // Restart the queue.
  HSA::hsa_signal_store_screlease(amd_queue_.queue_inactive_signal, 0);

  auto alloc_flag = (scratch.large) ? HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE
                                    : HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE;

  tool::notify_event_scratch_alloc_end(public_handle(), alloc_flag, dispatch_id, scratch.main_size,
                                       dispatch_slots);

  return;
}

template <bool HandleExceptions>
bool AqlQueue::DynamicQueueEventsHandler(hsa_signal_value_t error_code, void* arg) {
  AqlQueue* queue = (AqlQueue*)arg;
  hsa_status_t errorCode = HSA_STATUS_SUCCESS;
  bool fatal = false;
  bool changeWait = false;
  hsa_signal_value_t waitVal;

  if ((queue->dynamicScratchState & ERROR_HANDLER_SCRATCH_RETRY) == ERROR_HANDLER_SCRATCH_RETRY) {
    queue->dynamicScratchState &= ~ERROR_HANDLER_SCRATCH_RETRY;
    changeWait = true;
    waitVal = 0;
    HSA::hsa_signal_and_relaxed(queue->amd_queue_.queue_inactive_signal, ~0x8000000000000000ull);
    error_code &= ~0x8000000000000000ull;
  }

  // Process errors only if queue is not terminating.
  if ((queue->dynamicScratchState & ERROR_HANDLER_TERMINATE) != ERROR_HANDLER_TERMINATE) {
    if (error_code == 512) {  // Large scratch reclaim
      tool::notify_event_scratch_free_start(queue->public_handle(),
                                            HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE);

      auto& scratch = queue->queue_scratch_;
      queue->agent_->ReleaseQueueMainScratch(scratch);
      scratch.main_queue_base = nullptr;
      scratch.main_size = 0;
      scratch.main_size_per_thread = 0;
      scratch.main_queue_process_offset = 0;
      queue->InitScratchSRD();

      HSA::hsa_signal_store_relaxed(queue->amd_queue_.queue_inactive_signal, 0);
      // Resumes queue processing.
      atomic::Store(&queue->amd_queue_.queue_properties,
                    queue->amd_queue_.queue_properties & (~AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE),
                    std::memory_order_release);
      atomic::Fence(std::memory_order_release);
      tool::notify_event_scratch_free_end(queue->public_handle(),
                                          HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE);
      return true;
    }

    // Process only one queue error.
    if (error_code & 0x401) {  // insufficient scratch, wave64 or wave32
      queue->HandleInsufficientScratch(error_code, waitVal, changeWait);

      // Out of scratch - promote error
      if (queue->queue_scratch_.main_queue_base == nullptr &&
          queue->queue_scratch_.alt_queue_base == nullptr)
        errorCode = HSA_STATUS_ERROR_OUT_OF_RESOURCES;


    } else if (HandleExceptions) {
      if ((error_code & 2) == 2) {  // Invalid dim
        errorCode = HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;

      } else if ((error_code & 4) == 4) {  // Invalid group memory
        errorCode = HSA_STATUS_ERROR_INVALID_ALLOCATION;

      } else if ((error_code & 8) == 8) {  // Invalid (or NULL) code
        errorCode = HSA_STATUS_ERROR_INVALID_CODE_OBJECT;

      } else if (((error_code & 32) == 32) ||    // Invalid format: 32 is generic,
                 ((error_code & 256) == 256)) {  // 256 is vendor specific packets
        errorCode = HSA_STATUS_ERROR_INVALID_PACKET_FORMAT;

      } else if ((error_code & 64) == 64) {  // Group is too large
        errorCode = HSA_STATUS_ERROR_INVALID_ARGUMENT;

      } else if ((error_code & 128) == 128) {  // Out of VGPRs
        errorCode = hsa_status_t(HSA_STATUS_ERROR_OUT_OF_REGISTERS);

      } else if ((error_code & 0x20000000) == 0x20000000) {  // Memory violation (>48-bit)
        errorCode = hsa_status_t(HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION);

      } else if ((error_code & 0x40000000) == 0x40000000) {  // Illegal instruction
        errorCode = hsa_status_t(HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION);

      } else if ((error_code & 0x80000000) == 0x80000000) {  // Debug trap
        errorCode = HSA_STATUS_ERROR_EXCEPTION;
        fatal = true;

      } else {  // Undefined code
        assert(false && "Undefined queue error code");
        errorCode = HSA_STATUS_ERROR;
        fatal = true;
      }
    } else {
      // Not handling exceptions, clear so that ExceptionHandler can run.
      HSA::hsa_signal_store_relaxed(queue->amd_queue_.queue_inactive_signal, 0);
    }

    if (errorCode == HSA_STATUS_SUCCESS) {
      if (changeWait) {
        core::Runtime::runtime_singleton_->SetAsyncSignalHandler(
            queue->amd_queue_.queue_inactive_signal, HSA_SIGNAL_CONDITION_NE, waitVal,
            DynamicQueueEventsHandler<HandleExceptions>, queue);
        return false;
      }
      return true;
    }

    queue->Suspend();
    if (queue->errors_callback_ != nullptr) {
      queue->errors_callback_(errorCode, queue->public_handle(), queue->errors_data_);
    }
    if (fatal) {
      // Temporarilly removed until there is clarity on exactly what debugtrap's semantics are.
      // assert(false && "Fatal queue error");
      // std::abort();
    }
  }
  // Copy here is to protect against queue being released between setting the scratch state and
  // updating the signal value.  The signal itself is safe to use because it is ref counted rather
  // than being released with the queue.
  hsa_signal_t signal = queue->amd_queue_.queue_inactive_signal;
  queue->dynamicScratchState = ERROR_HANDLER_DONE;
  HSA::hsa_signal_store_screlease(signal, -1ull);
  return false;
}

bool AqlQueue::ExceptionHandler(hsa_signal_value_t error_code, void* arg) {
  struct queue_error_t {
    uint32_t code;
    hsa_status_t status;
  };
  static const queue_error_t QueueErrors[] = {
      // EC_QUEUE_WAVE_ABORT
      { 1, HSA_STATUS_ERROR_EXCEPTION },
      // EC_QUEUE_WAVE_TRAP
      { 2, HSA_STATUS_ERROR_EXCEPTION },
      // EC_QUEUE_WAVE_MATH_ERROR
      { 3, HSA_STATUS_ERROR_EXCEPTION },
      // EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION
      { 4, (hsa_status_t)HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION },
      // EC_QUEUE_WAVE_MEMORY_VIOLATION
      { 5, (hsa_status_t)HSA_STATUS_ERROR_MEMORY_FAULT },
      // EC_QUEUE_WAVE_APERTURE_VIOLATION
      { 6, (hsa_status_t)HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION },
      // EC_QUEUE_PACKET_DISPATCH_DIM_INVALID
      { 16, HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS },
      // EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID
      { 17, HSA_STATUS_ERROR_INVALID_ALLOCATION },
      // EC_QUEUE_PACKET_DISPATCH_CODE_INVALID
      { 18, HSA_STATUS_ERROR_INVALID_CODE_OBJECT },
      // EC_QUEUE_PACKET_UNSUPPORTED
      { 20, HSA_STATUS_ERROR_INVALID_PACKET_FORMAT },
      // EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID
      { 21, HSA_STATUS_ERROR_INVALID_ARGUMENT },
      // EC_QUEUE_PACKET_DISPATCH_REGISTER_SIZE_INVALID
      { 22, HSA_STATUS_ERROR_INVALID_ISA },
      // EC_QUEUE_PACKET_VENDOR_UNSUPPORTED
      { 23, HSA_STATUS_ERROR_INVALID_PACKET_FORMAT },
      // EC_QUEUE_PREEMPTION_ERROR
      { 31, HSA_STATUS_ERROR },
      // EC_DEVICE_MEMORY_VIOLATION
      { 33, (hsa_status_t)HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION },
      // EC_DEVICE_RAS_ERROR
      { 34, HSA_STATUS_ERROR },
      // EC_DEVICE_FATAL_HALT
      { 35, HSA_STATUS_ERROR },
      // EC_DEVICE_NEW
      { 36, HSA_STATUS_ERROR },
      // EC_PROCESS_DEVICE_REMOVE
      { 50, HSA_STATUS_ERROR }};

  AqlQueue* queue = (AqlQueue*)arg;
  hsa_status_t errorCode = HSA_STATUS_ERROR;
  auto exceptionHandlerDone = [&]() {
    Signal* signal = queue->exception_signal_;
    queue->exceptionState = ERROR_HANDLER_DONE;
    signal->StoreRelease(0);
    return false;
  };

  if (queue->exceptionState == ERROR_HANDLER_TERMINATE) {
    return exceptionHandlerDone();
  }

  for (auto& error : QueueErrors) {
    if (error_code & (1UL << (error.code - 1))) {
      errorCode = error.status;
      break;
    }
  }

  // Undefined or unexpected code
  assert((errorCode != HSA_STATUS_ERROR) && "Undefined or unexpected queue error code");

  // Suppress VM fault reporting.  This is more useful when reported through the system error
  // handler.
  if (errorCode == static_cast<hsa_status_t>(HSA_STATUS_ERROR_MEMORY_FAULT)) {
    debug_print("Queue error - HSA_STATUS_ERROR_MEMORY_FAULT\n");
    return exceptionHandlerDone();
  }

  // Fallback if KFD does not support GPU core dump. In this case, there core dump is
  // generated by hsa-runtime.
  if (!core::Runtime::runtime_singleton_->KfdVersion().supports_core_dump &&
                queue->agent_->supported_isas()[0]->GetMajorVersion() != 11) {

    if (pcs::PcsRuntime::instance()->SessionsActive())
      fprintf(stderr, "GPU core dump skipped because PC Sampling active\n");
    else if (amd::coredump::dump_gpu_core())
      fprintf(stderr, "GPU core dump failed\n");
    // supports_core_dump flag is overwritten to avoid generate core dump file again
    // caught by a different exception handler. Such as VMFaultHandler.
    core::Runtime::runtime_singleton_->KfdVersion(
      core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging, true);
  }

  queue->Suspend();
  if (queue->errors_callback_ != nullptr) {
    queue->errors_callback_(errorCode, queue->public_handle(), queue->errors_data_);
  }
  return exceptionHandlerDone();
}

hsa_status_t AqlQueue::SetCUMasking(uint32_t num_cu_mask_count, const uint32_t* cu_mask) {
  uint32_t cu_count;
  agent_->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count);
  size_t mask_dwords = (cu_count + 31) / 32;
  // Mask to trim the last uint32_t in cu_mask to the physical CU count
  uint32_t tail_mask = (1 << (cu_count % 32)) - 1;

  auto global_mask = core::Runtime::runtime_singleton_->flag().cu_mask(agent_->enumeration_index());
  std::vector<uint32_t> mask;

  bool clipped = false;

  // num_cu_mask_count = 0 resets the CU mask.
  if (num_cu_mask_count == 0) {
    for (int i = 0; i < mask_dwords; i++) mask.push_back(-1);
  } else {
    for (int i = 0; i < num_cu_mask_count / 32; i++) mask.push_back(cu_mask[i]);
  }

  // Apply global mask to user mask
  if (!global_mask.empty()) {
    // Limit mask processing to smallest needed dword range
    size_t limit = Min(global_mask.size(), mask.size(), mask_dwords);

    // Check for disabling requested cus.
    for (int i = limit; i < mask.size(); i++) {
      if (mask[i] != 0) {
        clipped = true;
        break;
      }
    }

    mask.resize(limit, 0);
    for (size_t i = 0; i < limit; i++) {
      clipped |= ((mask[i] & (~global_mask[i])) != 0);
      mask[i] &= global_mask[i];
    }
  } else {
    // Limit to physical CU range only
    size_t limit = Min(mask.size(), mask_dwords);
    mask.resize(limit, 0);
  }

  // Clip last dword to physical CU limit if necessary
  if ((mask.size() == mask_dwords) && (tail_mask != 0)) mask[mask_dwords - 1] &= tail_mask;

  // Apply mask if non-default or not queue initialization.
  ScopedAcquire<KernelMutex> lock(&mask_lock_);
  if ((!cu_mask_.empty()) || (num_cu_mask_count != 0) || (!global_mask.empty())) {

    // Devices with WGPs must conform to even-indexed contiguous pairwise CU enablement.
    if (agent_->supported_isas()[0]->GetMajorVersion() >= 10) {
      for (int i = 0; i < mask.size() * 32; i += 2) {
        uint32_t cu_pair = (mask[i / 32] >> (i % 32)) & 0x3;
        if (cu_pair && cu_pair != 0x3) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      }
    }

    return agent_->driver().SetQueueCUMask(queue_id_, mask.size() * 32,
                                           reinterpret_cast<HSAuint32*>(&mask[0]));
  }

  // update current cu masking tracking.
  cu_mask_ = std::move(mask);
  return clipped ? (hsa_status_t)HSA_STATUS_CU_MASK_REDUCED : HSA_STATUS_SUCCESS;
}

hsa_status_t AqlQueue::GetCUMasking(uint32_t num_cu_mask_count, uint32_t* cu_mask) {
  ScopedAcquire<KernelMutex> lock(&mask_lock_);
  assert(!cu_mask_.empty() && "No current cu_mask!");

  uint32_t user_dword_count = num_cu_mask_count / 32;
  if (user_dword_count > cu_mask_.size()) {
    memset(&cu_mask[cu_mask_.size()], 0, sizeof(uint32_t) * (user_dword_count - cu_mask_.size()));
    user_dword_count = cu_mask_.size();
  }
  memcpy(cu_mask, &cu_mask_[0], sizeof(uint32_t) * user_dword_count);
  return HSA_STATUS_SUCCESS;
}

void AqlQueue::SetProfiling(bool enabled) {
  Queue::SetProfiling(enabled);

  if (enabled) agent_->CheckClockTicks();
  return;
}

// If in_signal is NULL then this ExecutePM4 will block and wait for PM4 commands to complete
// If in_signal is provided, then ExecutePM4 will return and caller may wait for in_signal
// Note: On gfx8, there is no completion signal support, so ExecutePM4 will block even if
// in_signal is provided, and it is still valid to check in_signal after ExecutePM4 returns.
void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b, hsa_fence_scope_t acquireFence,
                          hsa_fence_scope_t releaseFence, hsa_signal_t* in_signal) {
  // pm4_ib_buf_ is a shared resource, so mutually exclude here.
  ScopedAcquire<KernelMutex> lock(&pm4_ib_mutex_);

  // Obtain reference to any container queue.
  core::Queue* queue = core::Queue::Convert(public_handle());

  // Obtain a queue slot for a single AQL packet.
  uint64_t write_idx = queue->AddWriteIndexAcqRel(1);

  while ((write_idx - queue->LoadReadIndexRelaxed()) >= queue->amd_queue_.hsa_queue.size) {
    os::YieldThread();
  }

  uint32_t slot_idx = uint32_t(write_idx % queue->amd_queue_.hsa_queue.size);
  constexpr uint32_t slot_size_b = 0x40;
  uint32_t* queue_slot =
      (uint32_t*)(uintptr_t(queue->amd_queue_.hsa_queue.base_address) + (slot_idx * slot_size_b));

  // Copy client PM4 command into IB.
  assert(cmd_size_b < pm4_ib_size_b_ && "PM4 exceeds IB size");
  memcpy(pm4_ib_buf_, cmd_data, cmd_size_b);

  // Construct a PM4 command to execute the IB.
  constexpr uint32_t ib_jump_size_dw = 4;

  uint32_t ib_jump_cmd[ib_jump_size_dw] = {
      PM4_HDR(PM4_HDR_IT_OPCODE_INDIRECT_BUFFER, ib_jump_size_dw,
                              agent_->supported_isas()[0]->GetMajorVersion()),
      PM4_INDIRECT_BUFFER_DW1_IB_BASE_LO(uint32_t(uintptr_t(pm4_ib_buf_) >> 2)),
      PM4_INDIRECT_BUFFER_DW2_IB_BASE_HI(uint32_t(uintptr_t(pm4_ib_buf_) >> 32)),
      (PM4_INDIRECT_BUFFER_DW3_IB_SIZE(uint32_t(cmd_size_b / sizeof(uint32_t))) |
       PM4_INDIRECT_BUFFER_DW3_IB_VALID(1))};

  // To respect multi-producer semantics, first buffer commands for the queue slot.
  constexpr uint32_t slot_size_dw = uint32_t(slot_size_b / sizeof(uint32_t));
  uint32_t slot_data[slot_size_dw];
  hsa_signal_t local_signal = {0};
  hsa_status_t err;

  if (agent_->supported_isas()[0]->GetMajorVersion() <= 8) {
    // Construct a set of PM4 to fit inside the AQL packet slot.
    uint32_t slot_dw_idx = 0;

    // Construct a no-op command to pad the queue slot.
    constexpr uint32_t rel_mem_size_dw = 7;
    constexpr uint32_t nop_pad_size_dw = slot_size_dw - (ib_jump_size_dw + rel_mem_size_dw);

    uint32_t* nop_pad = &slot_data[slot_dw_idx];
    slot_dw_idx += nop_pad_size_dw;

    nop_pad[0] = PM4_HDR(PM4_HDR_IT_OPCODE_NOP, nop_pad_size_dw,
                              agent_->supported_isas()[0]->GetMajorVersion());

    for (uint32_t i = 1; i < nop_pad_size_dw; ++i) {
      nop_pad[i] = 0;
    }

    // Copy in command to execute the IB.
    assert(slot_dw_idx + ib_jump_size_dw <= slot_size_dw && "PM4 exceeded queue slot size");
    uint32_t* ib_jump = &slot_data[slot_dw_idx];
    slot_dw_idx += ib_jump_size_dw;

    memcpy(ib_jump, ib_jump_cmd, sizeof(ib_jump_cmd));

    // Construct a command to advance the read index and invalidate the packet
    // header. This must be the last command since this releases the queue slot
    // for writing.
    assert(slot_dw_idx + rel_mem_size_dw <= slot_size_dw && "PM4 exceeded queue slot size");
    uint32_t* rel_mem = &slot_data[slot_dw_idx];

    rel_mem[0] = PM4_HDR(PM4_HDR_IT_OPCODE_RELEASE_MEM, rel_mem_size_dw,
                              agent_->supported_isas()[0]->GetMajorVersion());
    rel_mem[1] = PM4_RELEASE_MEM_DW1_EVENT_INDEX(PM4_RELEASE_MEM_EVENT_INDEX_AQL);
    rel_mem[2] = 0;
    rel_mem[3] = 0;
    rel_mem[4] = 0;
    rel_mem[5] = 0;
    rel_mem[6] = 0;
  } else if (agent_->supported_isas()[0]->GetMajorVersion() >= 9) {
    // Construct an AQL packet to jump to the PM4 IB.
    struct amd_aql_pm4_ib {
      uint16_t header;
      uint16_t ven_hdr;
      uint32_t ib_jump_cmd[4];
      uint32_t dw_cnt_remain;
      uint32_t reserved[8];
      hsa_signal_t completion_signal;
    };

    if (!in_signal) {
      err = hsa_signal_create(1, 0, NULL, &local_signal);
      assert(err == HSA_STATUS_SUCCESS);
    }

    constexpr uint32_t AMD_AQL_FORMAT_PM4_IB = 0x1;

    amd_aql_pm4_ib aql_pm4_ib{};
    aql_pm4_ib.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE |
                        (acquireFence << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
                        (releaseFence << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);

    aql_pm4_ib.ven_hdr = AMD_AQL_FORMAT_PM4_IB;
    aql_pm4_ib.ib_jump_cmd[0] = ib_jump_cmd[0];
    aql_pm4_ib.ib_jump_cmd[1] = ib_jump_cmd[1];
    aql_pm4_ib.ib_jump_cmd[2] = ib_jump_cmd[2];
    aql_pm4_ib.ib_jump_cmd[3] = ib_jump_cmd[3];
    aql_pm4_ib.dw_cnt_remain = 0xA;
    aql_pm4_ib.completion_signal = in_signal ? *in_signal : local_signal;

    memcpy(slot_data, &aql_pm4_ib, sizeof(aql_pm4_ib));
  } else {
    assert(false && "AqlQueue::ExecutePM4 not implemented");
  }

  // Copy buffered commands into the queue slot.
  // Overwrite the AQL invalid header (first dword) last.
  // This prevents the slot from being read until it's fully written.
  memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t));
  if (IsDeviceMemRingBuf() && needsPcieOrdering()) {
    // Ensure the packet body is written as header may get reordered when writing over PCIE
    _mm_sfence();
  }
  atomic::Store(&queue_slot[0], slot_data[0], std::memory_order_release);

  // Submit the packet slot.
  core::Signal* doorbell = core::Signal::Convert(queue->amd_queue_.hsa_queue.doorbell_signal);
  doorbell->StoreRelease(write_idx);

  // Wait for the packet to be consumed.
  if (agent_->supported_isas()[0]->GetMajorVersion() <= 8) {
    while (queue->LoadReadIndexRelaxed() <= write_idx)
      os::YieldThread();

    if (in_signal) hsa_signal_store_screlease(*in_signal, 0);
  } else if (!in_signal) {
    // On gfx9 and newer, if in_signal is not provided, we block and wait for own signal
    hsa_signal_value_t ret;
    ret = hsa_signal_wait_scacquire(local_signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
                                    HSA_WAIT_STATE_ACTIVE);
    err = hsa_signal_destroy(local_signal);
    assert(ret == 0 && err == HSA_STATUS_SUCCESS);
  }
}

void AqlQueue::FillBufRsrcWord0() {
  SQ_BUF_RSRC_WORD0 srd0;
  uintptr_t scratch_base = uintptr_t(queue_scratch_.main_queue_base);

  srd0.bits.BASE_ADDRESS = uint32_t(scratch_base);
  amd_queue_.scratch_resource_descriptor[0] = srd0.u32All;
}

void AqlQueue::FillBufRsrcWord1() {
  SQ_BUF_RSRC_WORD1 srd1;
  uint32_t scratch_base_hi = 0;

#ifdef HSA_LARGE_MODEL
  uintptr_t scratch_base = uintptr_t(queue_scratch_.main_queue_base);
  scratch_base_hi = uint32_t(scratch_base >> 32);
  #endif

  srd1.bits.BASE_ADDRESS_HI = scratch_base_hi;
  srd1.bits.STRIDE = 0;
  srd1.bits.CACHE_SWIZZLE = 0;
  srd1.bits.SWIZZLE_ENABLE = 1;

  amd_queue_.scratch_resource_descriptor[1] = srd1.u32All;
}

void AqlQueue::FillBufRsrcWord1_Gfx11() {
  SQ_BUF_RSRC_WORD1_GFX11 srd1;
  uint32_t scratch_base_hi = 0;

#ifdef HSA_LARGE_MODEL
  uintptr_t scratch_base = uintptr_t(queue_scratch_.main_queue_base);
  scratch_base_hi = uint32_t(scratch_base >> 32);
#endif

  srd1.bits.BASE_ADDRESS_HI = scratch_base_hi;
  srd1.bits.STRIDE = 0;
  srd1.bits.SWIZZLE_ENABLE = 1;

  amd_queue_.scratch_resource_descriptor[1] = srd1.u32All;
}

void AqlQueue::FillBufRsrcWord2() {
  SQ_BUF_RSRC_WORD2 srd2;
  const auto& agent_props = agent_->properties();
  const uint32_t num_xcc = agent_props.NumXcc;

   // report size per XCC
  srd2.bits.NUM_RECORDS = uint32_t(queue_scratch_.main_size / num_xcc);

  amd_queue_.scratch_resource_descriptor[2] = srd2.u32All;
}

void AqlQueue::FillBufRsrcWord3() {
  SQ_BUF_RSRC_WORD3 srd3;

  srd3.bits.DST_SEL_X = SQ_SEL_X;
  srd3.bits.DST_SEL_Y = SQ_SEL_Y;
  srd3.bits.DST_SEL_Z = SQ_SEL_Z;
  srd3.bits.DST_SEL_W = SQ_SEL_W;
  srd3.bits.NUM_FORMAT = BUF_NUM_FORMAT_UINT;
  srd3.bits.DATA_FORMAT = BUF_DATA_FORMAT_32;
  srd3.bits.ELEMENT_SIZE = 1;  // 4
  srd3.bits.INDEX_STRIDE = 3;  // 64
  srd3.bits.ADD_TID_ENABLE = 1;
  srd3.bits.ATC__CI__VI = (agent_->profile() == HSA_PROFILE_FULL);
  srd3.bits.HASH_ENABLE = 0;
  srd3.bits.HEAP = 0;
  srd3.bits.MTYPE__CI__VI = 0;
  srd3.bits.TYPE = SQ_RSRC_BUF;

  amd_queue_.scratch_resource_descriptor[3] = srd3.u32All;
}

void AqlQueue::FillBufRsrcWord3_Gfx10() {
  SQ_BUF_RSRC_WORD3_GFX10 srd3;

  srd3.bits.DST_SEL_X = SQ_SEL_X;
  srd3.bits.DST_SEL_Y = SQ_SEL_Y;
  srd3.bits.DST_SEL_Z = SQ_SEL_Z;
  srd3.bits.DST_SEL_W = SQ_SEL_W;
  srd3.bits.FORMAT = BUF_FORMAT_32_UINT;
  srd3.bits.RESERVED1 = 0;
  srd3.bits.INDEX_STRIDE = 0;  // filled in by CP
  srd3.bits.ADD_TID_ENABLE = 1;
  srd3.bits.RESOURCE_LEVEL = 1;
  srd3.bits.RESERVED2 = 0;
  srd3.bits.OOB_SELECT = 2;  // no bounds check in swizzle mode
  srd3.bits.TYPE = SQ_RSRC_BUF;

  amd_queue_.scratch_resource_descriptor[3] = srd3.u32All;
}

void AqlQueue::FillBufRsrcWord3_Gfx11() {
  SQ_BUF_RSRC_WORD3_GFX11 srd3;

  srd3.bits.DST_SEL_X = SQ_SEL_X;
  srd3.bits.DST_SEL_Y = SQ_SEL_Y;
  srd3.bits.DST_SEL_Z = SQ_SEL_Z;
  srd3.bits.DST_SEL_W = SQ_SEL_W;
  srd3.bits.FORMAT = BUF_FORMAT_32_UINT;
  srd3.bits.RESERVED1 = 0;
  srd3.bits.INDEX_STRIDE = 0;  // filled in by CP
  srd3.bits.ADD_TID_ENABLE = 1;
  srd3.bits.RESERVED2 = 0;
  srd3.bits.OOB_SELECT = 2;  // no bounds check in swizzle mode
  srd3.bits.TYPE = SQ_RSRC_BUF;

  amd_queue_.scratch_resource_descriptor[3] = srd3.u32All;
}

void AqlQueue::FillBufRsrcWord3_Gfx12() {
  SQ_BUF_RSRC_WORD3_GFX12 srd3;

  srd3.bits.DST_SEL_X = SQ_SEL_X;
  srd3.bits.DST_SEL_Y = SQ_SEL_Y;
  srd3.bits.DST_SEL_Z = SQ_SEL_Z;
  srd3.bits.DST_SEL_W = SQ_SEL_W;
  srd3.bits.FORMAT = BUF_FORMAT_32_UINT;
  srd3.bits.RESERVED1 = 0;
  srd3.bits.INDEX_STRIDE = 0;  // filled in by CP
  srd3.bits.ADD_TID_ENABLE = 1;
  srd3.bits.WRITE_COMPRESS_ENABLE = 0;
  srd3.bits.COMPRESSION_EN = 0;
  srd3.bits.COMPRESSION_ACCESS_MODE = 0;
  srd3.bits.OOB_SELECT = 2;  // no bounds check in swizzle mode
  srd3.bits.TYPE = SQ_RSRC_BUF;

  amd_queue_.scratch_resource_descriptor[3] = srd3.u32All;
}

// Set concurrent wavefront limits only when scratch is being used.
void AqlQueue::FillComputeTmpRingSize() {
  COMPUTE_TMPRING_SIZE tmpring_size = {};
  if (queue_scratch_.main_size == 0) {
    amd_queue_.compute_tmpring_size = tmpring_size.u32All;
    return;
  }

  const auto& agent_props = agent_->properties();
  const uint32_t num_xcc = agent_props.NumXcc;

  // Determine the maximum number of waves device can support
  uint32_t num_cus = agent_props.NumFComputeCores / agent_props.NumSIMDPerCU;
  uint32_t max_scratch_waves = num_cus * agent_props.MaxSlotsScratchCU;

  // Scratch is allocated program COMPUTE_TMPRING_SIZE register
  // Scratch Size per Wave is specified in terms of kilobytes
  uint32_t wave_scratch =
      (((queue_scratch_.main_lanes_per_wave * queue_scratch_.main_size_per_thread) +
        queue_scratch_.mem_alignment_size - 1) /
       queue_scratch_.mem_alignment_size);
  tmpring_size.bits.WAVESIZE = wave_scratch;
  assert(wave_scratch == tmpring_size.bits.WAVESIZE && "WAVESIZE Overflow.");
  uint32_t num_waves = (queue_scratch_.main_size / num_xcc) /
      (tmpring_size.bits.WAVESIZE * queue_scratch_.mem_alignment_size);

  tmpring_size.bits.WAVES = std::min(num_waves, max_scratch_waves);
  amd_queue_.compute_tmpring_size = tmpring_size.u32All;
  assert((tmpring_size.bits.WAVES % (agent_props.NumShaderBanks / num_xcc) == 0) &&
         "Invalid scratch wave count.  Must be divisible by #SEs.");
}

// Set concurrent wavefront limits only when scratch is being used.
void AqlQueue::FillAltComputeTmpRingSize() {
  COMPUTE_TMPRING_SIZE tmpring_size = {};
  if (queue_scratch_.alt_size == 0) {
    amd_queue_.alt_compute_tmpring_size = tmpring_size.u32All;
    return;
  }

  const auto& agent_props = agent_->properties();
  const uint32_t num_xcc = agent_props.NumXcc;

  // Determine the maximum number of waves device can support
  uint32_t num_cus = agent_props.NumFComputeCores / agent_props.NumSIMDPerCU;
  uint32_t max_scratch_waves = num_cus * agent_props.MaxSlotsScratchCU;

  // Scratch is allocated program COMPUTE_TMPRING_SIZE register
  // Scratch Size per Wave is specified in terms of kilobytes
  uint32_t wave_scratch =
      (((queue_scratch_.alt_lanes_per_wave * queue_scratch_.alt_size_per_thread) +
        queue_scratch_.mem_alignment_size - 1) /
       queue_scratch_.mem_alignment_size);
  tmpring_size.bits.WAVESIZE = wave_scratch;
  assert(wave_scratch == tmpring_size.bits.WAVESIZE && "WAVESIZE Overflow.");
  uint32_t num_waves = (queue_scratch_.alt_size / num_xcc) /
      (tmpring_size.bits.WAVESIZE * queue_scratch_.mem_alignment_size);

  tmpring_size.bits.WAVES = std::min(num_waves, max_scratch_waves);
  amd_queue_.alt_compute_tmpring_size = tmpring_size.u32All;
  assert((tmpring_size.bits.WAVES % (agent_props.NumShaderBanks / num_xcc) == 0) &&
         "Invalid scratch wave count.  Must be divisible by #SEs.");
}

// Set concurrent wavefront limits only when scratch is being used.
void AqlQueue::FillComputeTmpRingSize_Gfx11() {
  COMPUTE_TMPRING_SIZE_GFX11 tmpring_size = {};
  if (queue_scratch_.main_size == 0) {
    amd_queue_.compute_tmpring_size = tmpring_size.u32All;
    return;
  }

  const auto& agent_props = agent_->properties();
  const uint32_t num_xcc = agent_props.NumXcc;

  // Determine the maximum number of waves device can support
  uint32_t num_cus = agent_props.NumFComputeCores / (agent_props.NumSIMDPerCU * num_xcc);
  uint32_t max_scratch_waves = num_cus * agent_props.MaxSlotsScratchCU;

  // Scratch is allocated program COMPUTE_TMPRING_SIZE register
  // Scratch Size per Wave is specified in terms of kilobytes
  uint32_t wave_scratch =
      (((queue_scratch_.main_lanes_per_wave * queue_scratch_.main_size_per_thread) +
        queue_scratch_.mem_alignment_size - 1) /
       queue_scratch_.mem_alignment_size);

  tmpring_size.bits.WAVESIZE = wave_scratch;
  assert(wave_scratch == tmpring_size.bits.WAVESIZE && "WAVESIZE Overflow.");

  uint32_t num_waves =
      queue_scratch_.main_size / (tmpring_size.bits.WAVESIZE * queue_scratch_.mem_alignment_size);

  // For GFX11 we specify number of waves per engine instead of total
  num_waves /= agent_->properties().NumShaderBanks;
  tmpring_size.bits.WAVES = std::min(num_waves, max_scratch_waves);
  amd_queue_.compute_tmpring_size = tmpring_size.u32All;
}

// Set concurrent wavefront limits only when scratch is being used.
void AqlQueue::FillComputeTmpRingSize_Gfx12() {
  // For GFX12, struct field size changes.
  // Consider refactoring code for GFX11/GFX12 if no other changes.
  COMPUTE_TMPRING_SIZE_GFX12 tmpring_size = {};
  if (queue_scratch_.main_size == 0) {
    amd_queue_.compute_tmpring_size = tmpring_size.u32All;
    return;
  }

  const auto& agent_props = agent_->properties();
  const uint32_t num_xcc = agent_props.NumXcc;

  // Determine the maximum number of waves device can support
  uint32_t num_cus = agent_props.NumFComputeCores / (agent_props.NumSIMDPerCU * num_xcc);
  uint32_t max_scratch_waves = num_cus * agent_props.MaxSlotsScratchCU;

  // Scratch is allocated program COMPUTE_TMPRING_SIZE register
  // Scratch Size per Wave is specified in terms of kilobytes
  uint32_t wave_scratch = (((queue_scratch_.main_lanes_per_wave * queue_scratch_.main_size_per_thread) +
                            queue_scratch_.mem_alignment_size - 1) /
                           queue_scratch_.mem_alignment_size);

  tmpring_size.bits.WAVESIZE = wave_scratch;
  assert(wave_scratch == tmpring_size.bits.WAVESIZE && "WAVESIZE Overflow.");

  uint32_t num_waves =
      queue_scratch_.main_size / (tmpring_size.bits.WAVESIZE * queue_scratch_.mem_alignment_size);

  // For GFX11 we specify number of waves per engine instead of total
  num_waves /= agent_->properties().NumShaderBanks;
  tmpring_size.bits.WAVES = std::min(num_waves, max_scratch_waves);
  amd_queue_.compute_tmpring_size = tmpring_size.u32All;
}

// @brief Define the Scratch Buffer Descriptor and related parameters
// that enable kernel access scratch memory
void AqlQueue::InitScratchSRD() {
  switch (agent_->supported_isas()[0]->GetMajorVersion()) {
    case 12:
      FillBufRsrcWord0();
      FillBufRsrcWord1_Gfx11();
      FillBufRsrcWord2();
      FillBufRsrcWord3_Gfx12();
      FillComputeTmpRingSize_Gfx12();
      break;
    case 11:
      FillBufRsrcWord0();
      FillBufRsrcWord1_Gfx11();
      FillBufRsrcWord2();
      FillBufRsrcWord3_Gfx11();
      FillComputeTmpRingSize_Gfx11();
      break;
    case 10:
      FillBufRsrcWord0();
      FillBufRsrcWord1();
      FillBufRsrcWord2();
      FillBufRsrcWord3_Gfx10();
      FillComputeTmpRingSize();
      break;
    default:
      FillBufRsrcWord0();
      FillBufRsrcWord1();
      FillBufRsrcWord2();
      FillBufRsrcWord3();
      FillComputeTmpRingSize();
      FillAltComputeTmpRingSize();
      break;
  }

  // Populate flat scratch parameters in amd_queue_.
  amd_queue_.scratch_backing_memory_location = queue_scratch_.main_queue_process_offset;
  amd_queue_.alt_scratch_backing_memory_location = queue_scratch_.alt_queue_process_offset;

  // For backwards compatibility this field records the per-lane scratch
  // for a 64 lane wavefront. If scratch was allocated for 32 lane waves
  // then the effective size for a 64 lane wave is halved.
  amd_queue_.scratch_wave64_lane_byte_size =
      uint32_t((queue_scratch_.main_size_per_thread * queue_scratch_.main_lanes_per_wave) / 64);

  amd_queue_.alt_scratch_wave64_lane_byte_size =
      uint32_t((queue_scratch_.alt_size_per_thread * queue_scratch_.alt_lanes_per_wave) / 64);

  amd_queue_.alt_scratch_dispatch_limit_x = queue_scratch_.alt_dispatch_limit_x;
  amd_queue_.alt_scratch_dispatch_limit_y = queue_scratch_.alt_dispatch_limit_y;
  amd_queue_.alt_scratch_dispatch_limit_z = queue_scratch_.alt_dispatch_limit_z;

  return;
}

hsa_status_t AqlQueue::EnableGWS(int gws_slot_count) {
  uint32_t discard;
  auto status = agent_->driver().AllocQueueGWS(queue_id_, gws_slot_count, &discard);
  if (status != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  amd_queue_.hsa_queue.type = HSA_QUEUE_TYPE_COOPERATIVE;
  return HSA_STATUS_SUCCESS;
}

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_blit_kernel.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_blit_kernel.h"

#include <algorithm>
#include <sstream>
#include <string>

#include "core/inc/amd_gpu_agent.h"
#include "core/inc/hsa_internal.h"
#include "core/util/utils.h"

namespace rocr {
namespace AMD {

static std::string& kBlitKernelSource() {
  static std::string kBlitKernelSource_(R"(
  // Compatibility function for GFXIP 7.

  function s_load_dword_offset(byte_offset)
    if kGFXIPVersion == 7
      return byte_offset / 4
    else
      return byte_offset
    end
  end

  // Memory copy for all cases except:
  //  (src_addr & 0x3) != (dst_addr & 0x3)
  //
  // Kernel argument buffer:
  //   [DW  0, 1]  Phase 1 src start address
  //   [DW  2, 3]  Phase 1 dst start address
  //   [DW  4, 5]  Phase 2 src start address
  //   [DW  6, 7]  Phase 2 dst start address
  //   [DW  8, 9]  Phase 3 src start address
  //   [DW 10,11]  Phase 3 dst start address
  //   [DW 12,13]  Phase 4 src start address
  //   [DW 14,15]  Phase 4 dst start address
  //   [DW 16,17]  Phase 4 src end address
  //   [DW 18,19]  Phase 4 dst end address
  //   [DW 20   ]  Total number of workitems

  var kCopyAlignedVecWidth = 4
  var kCopyAlignedUnroll = 1

  shader CopyAligned
    type(CS)
    user_sgpr_count(2)
    sgpr_count(32)
    vgpr_count(8 + (kCopyAlignedUnroll * kCopyAlignedVecWidth))

    // Retrieve kernel arguments.
    s_load_dwordx4          s[4:7], s[0:1], s_load_dword_offset(0x0)
    s_load_dwordx4          s[8:11], s[0:1], s_load_dword_offset(0x10)
    s_load_dwordx4          s[12:15], s[0:1], s_load_dword_offset(0x20)
    s_load_dwordx4          s[16:19], s[0:1], s_load_dword_offset(0x30)
    s_load_dwordx4          s[20:23], s[0:1], s_load_dword_offset(0x40)
    s_load_dword            s24, s[0:1], s_load_dword_offset(0x50)
    s_waitcnt               lgkmcnt(0)

    // Compute workitem id.
    s_lshl_b32              s2, s2, 0x6
    v_add_u32               v0, vcc, s2, v0

    // =====================================================
    // Phase 1: Byte copy up to 0x100 destination alignment.
    // =====================================================

    // Compute phase source address.
    v_mov_b32               v3, s5
    v_add_u32               v2, vcc, v0, s4
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Compute phase destination address.
    v_mov_b32               v5, s7
    v_add_u32               v4, vcc, v0, s6
    v_addc_u32              v5, vcc, v5, 0x0, vcc

  L_COPY_ALIGNED_PHASE_1_LOOP:
    // Mask off lanes (or branch out) after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[8:9]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_1_DONE
    s_and_b64               exec, exec, vcc

    // Load from/advance the source address.
    flat_load_ubyte         v1, v[2:3]
    s_waitcnt               vmcnt(0)
    v_add_u32               v2, vcc, v2, s24
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Write to/advance the destination address.
    flat_store_byte         v[4:5], v1
    v_add_u32               v4, vcc, v4, s24
    v_addc_u32              v5, vcc, v5, 0x0, vcc

    // Repeat until branched out.
    s_branch                L_COPY_ALIGNED_PHASE_1_LOOP

  L_COPY_ALIGNED_PHASE_1_DONE:
    // Restore EXEC mask for all lanes.
    s_mov_b64               exec, 0xFFFFFFFFFFFFFFFF

    // ========================================================
    // Phase 2: Unrolled dword[x4] copy up to last whole block.
    // ========================================================

    // Compute unrolled dword[x4] stride across all threads.
    if kCopyAlignedVecWidth == 4
      s_lshl_b32            s25, s24, 0x4
    else
      s_lshl_b32            s25, s24, 0x2
    end

    // Compute phase source address.
    if kCopyAlignedVecWidth == 4
      v_lshlrev_b32         v1, 0x4, v0
    else
      v_lshlrev_b32         v1, 0x2, v0
    end

    v_mov_b32               v3, s9
    v_add_u32               v2, vcc, v1, s8
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Compute phase destination address.
    v_mov_b32               v5, s11
    v_add_u32               v4, vcc, v1, s10
    v_addc_u32              v5, vcc, v5, 0x0, vcc

  L_COPY_ALIGNED_PHASE_2_LOOP:
    // Branch out after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[12:13]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_2_DONE

    // Load from/advance the source address.
    for var i = 0; i < kCopyAlignedUnroll; i ++
      if kCopyAlignedVecWidth == 4
        flat_load_dwordx4   v[8 + (i * 4)], v[2:3]
      else
        flat_load_dword     v[8 + i], v[2:3]
      end

      v_add_u32             v2, vcc, v2, s25
      v_addc_u32            v3, vcc, v3, 0x0, vcc
    end

    // Write to/advance the destination address.
    s_waitcnt               vmcnt(0)

    for var i = 0; i < kCopyAlignedUnroll; i ++
      if kCopyAlignedVecWidth == 4
        flat_store_dwordx4  v[4:5], v[8 + (i * 4)]
      else
        flat_store_dword    v[4:5], v[8 + i]
      end

      v_add_u32             v4, vcc, v4, s25
      v_addc_u32            v5, vcc, v5, 0x0, vcc
    end

    // Repeat until branched out.
    s_branch                L_COPY_ALIGNED_PHASE_2_LOOP

  L_COPY_ALIGNED_PHASE_2_DONE:

    // ===========================================
    // Phase 3: Dword copy up to last whole dword.
    // ===========================================

    // Compute dword stride across all threads.
    s_lshl_b32              s25, s24, 0x2

    // Compute phase source address.
    v_lshlrev_b32           v1, 0x2, v0
    v_mov_b32               v3, s13
    v_add_u32               v2, vcc, v1, s12
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Compute phase destination address.
    v_mov_b32               v5, s15
    v_add_u32               v4, vcc, v1, s14
    v_addc_u32              v5, vcc, v5, 0x0, vcc

  L_COPY_ALIGNED_PHASE_3_LOOP:
    // Mask off lanes (or branch out) after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[16:17]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_3_DONE
    s_and_b64               exec, exec, vcc

    // Load from/advance the source address.
    flat_load_dword         v1, v[2:3]
    v_add_u32               v2, vcc, v2, s25
    v_addc_u32              v3, vcc, v3, 0x0, vcc
    s_waitcnt               vmcnt(0)

    // Write to/advance the destination address.
    flat_store_dword        v[4:5], v1
    v_add_u32               v4, vcc, v4, s25
    v_addc_u32              v5, vcc, v5, 0x0, vcc

    // Repeat until branched out.
    s_branch                L_COPY_ALIGNED_PHASE_3_LOOP

  L_COPY_ALIGNED_PHASE_3_DONE:
    // Restore EXEC mask for all lanes.
    s_mov_b64               exec, 0xFFFFFFFFFFFFFFFF

    // =============================
    // Phase 4: Byte copy up to end.
    // =============================

    // Compute phase source address.
    v_mov_b32               v3, s17
    v_add_u32               v2, vcc, v0, s16
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Compute phase destination address.
    v_mov_b32               v5, s19
    v_add_u32               v4, vcc, v0, s18
    v_addc_u32              v5, vcc, v5, 0x0, vcc

    // Mask off lanes (or branch out) after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[20:21]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_4_DONE
    s_and_b64               exec, exec, vcc

    // Load from the source address.
    flat_load_ubyte         v1, v[2:3]
    s_waitcnt               vmcnt(0)

    // Write to the destination address.
    flat_store_byte         v[4:5], v1

  L_COPY_ALIGNED_PHASE_4_DONE:
    s_endpgm
  end

  // Memory copy for this case:
  //  (src_addr & 0x3) != (dst_addr & 0x3)
  //
  // Kernel argument buffer:
  //   [DW  0, 1]  Phase 1 src start address
  //   [DW  2, 3]  Phase 1 dst start address
  //   [DW  4, 5]  Phase 2 src start address
  //   [DW  6, 7]  Phase 2 dst start address
  //   [DW  8, 9]  Phase 2 src end address
  //   [DW 10,11]  Phase 2 dst end address
  //   [DW 12   ]  Total number of workitems

  var kCopyMisalignedUnroll = 4

  shader CopyMisaligned
    type(CS)
    user_sgpr_count(2)
    sgpr_count(23)
    vgpr_count(6 + kCopyMisalignedUnroll)

    // Retrieve kernel arguments.
    s_load_dwordx4          s[4:7], s[0:1], s_load_dword_offset(0x0)
    s_load_dwordx4          s[8:11], s[0:1], s_load_dword_offset(0x10)
    s_load_dwordx4          s[12:15], s[0:1], s_load_dword_offset(0x20)
    s_load_dword            s16, s[0:1], s_load_dword_offset(0x30)
    s_waitcnt               lgkmcnt(0)

    // Compute workitem id.
    s_lshl_b32              s2, s2, 0x6
    v_add_u32               v0, vcc, s2, v0

    // ===================================================
    // Phase 1: Unrolled byte copy up to last whole block.
    // ===================================================

    // Compute phase source address.
    v_mov_b32               v3, s5
    v_add_u32               v2, vcc, v0, s4
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Compute phase destination address.
    v_mov_b32               v5, s7
    v_add_u32               v4, vcc, v0, s6
    v_addc_u32              v5, vcc, v5, 0x0, vcc

  L_COPY_MISALIGNED_PHASE_1_LOOP:
    // Branch out after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[8:9]
    s_cbranch_vccz          L_COPY_MISALIGNED_PHASE_1_DONE

    // Load from/advance the source address.
    for var i = 0; i < kCopyMisalignedUnroll; i ++
      flat_load_ubyte       v[6 + i], v[2:3]
      v_add_u32             v2, vcc, v2, s16
      v_addc_u32            v3, vcc, v3, 0x0, vcc
    end

    // Write to/advance the destination address.
    s_waitcnt               vmcnt(0)

    for var i = 0; i < kCopyMisalignedUnroll; i ++
      flat_store_byte       v[4:5], v[6 + i]
      v_add_u32             v4, vcc, v4, s16
      v_addc_u32            v5, vcc, v5, 0x0, vcc
    end

    // Repeat until branched out.
    s_branch                L_COPY_MISALIGNED_PHASE_1_LOOP

  L_COPY_MISALIGNED_PHASE_1_DONE:

    // =============================
    // Phase 2: Byte copy up to end.
    // =============================

    // Compute phase source address.
    v_mov_b32               v3, s9
    v_add_u32               v2, vcc, v0, s8
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Compute phase destination address.
    v_mov_b32               v5, s11
    v_add_u32               v4, vcc, v0, s10
    v_addc_u32              v5, vcc, v5, 0x0, vcc

  L_COPY_MISALIGNED_PHASE_2_LOOP:
    // Mask off lanes (or branch out) after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[12:13]
    s_cbranch_vccz          L_COPY_MISALIGNED_PHASE_2_DONE
    s_and_b64               exec, exec, vcc

    // Load from/advance the source address.
    flat_load_ubyte         v1, v[2:3]
    v_add_u32               v2, vcc, v2, s16
    v_addc_u32              v3, vcc, v3, 0x0, vcc
    s_waitcnt               vmcnt(0)

    // Write to/advance the destination address.
    flat_store_byte         v[4:5], v1
    v_add_u32               v4, vcc, v4, s16
    v_addc_u32              v5, vcc, v5, 0x0, vcc

    // Repeat until branched out.
    s_branch                L_COPY_MISALIGNED_PHASE_2_LOOP

  L_COPY_MISALIGNED_PHASE_2_DONE:
    s_endpgm
  end

  // Memory fill for dword-aligned region.
  //
  // Kernel argument buffer:
  //   [DW  0, 1]  Phase 1 dst start address
  //   [DW  2, 3]  Phase 2 dst start address
  //   [DW  4, 5]  Phase 2 dst end address
  //   [DW  6   ]  Value to fill memory with
  //   [DW  7   ]  Total number of workitems

  var kFillVecWidth = 4
  var kFillUnroll = 1

  shader Fill
    type(CS)
    user_sgpr_count(2)
    sgpr_count(19)
    vgpr_count(8)

    // Retrieve kernel arguments.
    s_load_dwordx4          s[4:7], s[0:1], s_load_dword_offset(0x0)
    s_load_dwordx4          s[8:11], s[0:1], s_load_dword_offset(0x10)
    s_waitcnt               lgkmcnt(0)

    // Compute workitem id.
    s_lshl_b32              s2, s2, 0x6
    v_add_u32               v0, vcc, s2, v0

    // Copy fill pattern into VGPRs.
    for var i = 0; i < kFillVecWidth; i ++
      v_mov_b32           v[4 + i], s10
    end

    // ========================================================
    // Phase 1: Unrolled dword[x4] fill up to last whole block.
    // ========================================================

    // Compute unrolled dword[x4] stride across all threads.
    if kFillVecWidth == 4
      s_lshl_b32            s12, s11, 0x4
    else
      s_lshl_b32            s12, s11, 0x2
    end

    // Compute phase destination address.
    if kFillVecWidth == 4
      v_lshlrev_b32         v1, 0x4, v0
    else
      v_lshlrev_b32         v1, 0x2, v0
    end

    v_mov_b32               v3, s5
    v_add_u32               v2, vcc, v1, s4
    v_addc_u32              v3, vcc, v3, 0x0, vcc

  L_FILL_PHASE_1_LOOP:
    // Branch out after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[6:7]
    s_cbranch_vccz          L_FILL_PHASE_1_DONE

    // Write to/advance the destination address.
    for var i = 0; i < kFillUnroll; i ++
      if kFillVecWidth == 4
        flat_store_dwordx4  v[2:3], v[4:7]
      else
        flat_store_dword    v[2:3], v4
      end

      v_add_u32             v2, vcc, v2, s12
      v_addc_u32            v3, vcc, v3, 0x0, vcc
    end

    // Repeat until branched out.
    s_branch                L_FILL_PHASE_1_LOOP

  L_FILL_PHASE_1_DONE:

    // ==============================
    // Phase 2: Dword fill up to end.
    // ==============================

    // Compute dword stride across all threads.
    s_lshl_b32              s12, s11, 0x2

    // Compute phase destination address.
    v_lshlrev_b32           v1, 0x2, v0
    v_mov_b32               v3, s7
    v_add_u32               v2, vcc, v1, s6
    v_addc_u32              v3, vcc, v3, 0x0, vcc

  L_FILL_PHASE_2_LOOP:
    // Mask off lanes (or branch out) after phase end.
    v_cmp_lt_u64            vcc, v[2:3], s[8:9]
    s_cbranch_vccz          L_FILL_PHASE_2_DONE
    s_and_b64               exec, exec, vcc

    // Write to/advance the destination address.
    flat_store_dword        v[2:3], v4
    v_add_u32               v2, vcc, v2, s12
    v_addc_u32              v3, vcc, v3, 0x0, vcc

    // Repeat until branched out.
    s_branch                L_FILL_PHASE_2_LOOP

  L_FILL_PHASE_2_DONE:
    s_endpgm
  end
)");
  return kBlitKernelSource_;
}

// Search kernel source for variable definition and return value.
int GetKernelSourceParam(const char* paramName) {
  std::stringstream paramDef;
  paramDef << "var " << paramName << " = ";

  std::string::size_type paramDefLoc =
                              kBlitKernelSource().find(paramDef.str());
  assert(paramDefLoc != std::string::npos);
  std::string::size_type paramValLoc = paramDefLoc + paramDef.str().size();
  std::string::size_type paramEndLoc =
      kBlitKernelSource().find('\n', paramDefLoc);
  assert(paramDefLoc != std::string::npos);

  std::string paramVal(&kBlitKernelSource()[paramValLoc],
                       &kBlitKernelSource()[paramEndLoc]);
  return std::stoi(paramVal);
}


#define DEFINE_KERNEL_PARAM_FUNC(name) \
static int& name() { \
    static std::once_flag initFlag; \
    static int val; \
    std::call_once(initFlag, [&]() { \
        val = GetKernelSourceParam(#name); \
    }); \
    return val; \
}

// Use the macro to define the functions
DEFINE_KERNEL_PARAM_FUNC(kCopyAlignedVecWidth)
DEFINE_KERNEL_PARAM_FUNC(kCopyAlignedUnroll)
DEFINE_KERNEL_PARAM_FUNC(kCopyMisalignedUnroll)
DEFINE_KERNEL_PARAM_FUNC(kFillVecWidth)
DEFINE_KERNEL_PARAM_FUNC(kFillUnroll)

static unsigned extractAqlBits(unsigned v, unsigned pos, unsigned width) {
  return (v >> pos) & ((1 << width) - 1);
};

BlitKernel::BlitKernel(core::Queue* queue)
    : core::Blit(),
      queue_(queue),
      kernarg_async_(NULL),
      kernarg_async_mask_(0),
      kernarg_async_counter_(0),
      bytes_queued_(0),
      last_queued_(0),
      pending_search_index_(0),
      num_cus_(0) {
  completion_signal_.handle = 0;
}

BlitKernel::~BlitKernel() {}

hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
  queue_bitmask_ = queue_->public_handle()->size - 1;

  bytes_written_.resize(queue_->public_handle()->size);
  memset(&bytes_written_[0], -1, bytes_written_.size() * sizeof(BytesWritten));

  hsa_status_t status = HSA::hsa_signal_create(1, 0, NULL, &completion_signal_);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  const AMD::GpuAgent& gpuAgent = static_cast<const AMD::GpuAgent&>(agent);
  kernarg_async_ = reinterpret_cast<KernelArgs*>(
      gpuAgent.system_allocator()(queue_->public_handle()->size * AlignUp(sizeof(KernelArgs), 16),
                                  16, core::MemoryRegion::AllocateNoFlags));

  kernarg_async_mask_ = queue_->public_handle()->size - 1;

  // Obtain the number of compute units in the underlying agent.
  num_cus_ = gpuAgent.properties().NumFComputeCores / 4;

  // Assemble shaders to AQL code objects.
  std::map<KernelType, const char*> kernel_names = {
      {KernelType::CopyAligned, "CopyAligned"},
      {KernelType::CopyMisaligned, "CopyMisaligned"},
      {KernelType::Fill, "Fill"}};

  for (auto kernel_name : kernel_names) {
    KernelCode& kernel = kernels_[kernel_name.first];
    gpuAgent.AssembleShader(kernel_name.second, AMD::GpuAgent::AssembleTarget::AQL, kernel.code_buf_,
                            kernel.code_buf_size_);
  }

  if (agent.profiling_enabled()) {
    return EnableProfiling(true);
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::Destroy(const core::Agent& agent) {
  std::lock_guard<std::mutex> guard(lock_);

  const AMD::GpuAgent& gpuAgent = static_cast<const AMD::GpuAgent&>(agent);

  for (auto kernel_pair : kernels_) {
    gpuAgent.ReleaseShader(kernel_pair.second.code_buf_,
                           kernel_pair.second.code_buf_size_);
  }

  if (kernarg_async_ != NULL) {
    gpuAgent.system_deallocator()(kernarg_async_);
  }

  if (completion_signal_.handle != 0) {
    HSA::hsa_signal_destroy(completion_signal_);
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::SubmitLinearCopyCommand(void* dst, const void* src,
                                                 size_t size) {
  // Protect completion_signal_.
  std::lock_guard<std::mutex> guard(lock_);

  HSA::hsa_signal_store_relaxed(completion_signal_, 1);

  std::vector<core::Signal*> dep_signals(0);
  std::vector<core::Signal*> gang_signals(0);

  hsa_status_t stat = SubmitLinearCopyCommand(
      dst, src, size, dep_signals, *core::Signal::Convert(completion_signal_), gang_signals);

  if (stat != HSA_STATUS_SUCCESS) {
    return stat;
  }

  // Wait for the packet to finish.
  if (HSA::hsa_signal_wait_scacquire(completion_signal_, HSA_SIGNAL_CONDITION_LT, 1, uint64_t(-1),
                                     HSA_WAIT_STATE_ACTIVE) != 0) {
    // Signal wait returned unexpected value.
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::SubmitLinearCopyCommand(
    void* dst, const void* src, size_t size,
    std::vector<core::Signal*>& dep_signals, core::Signal& out_signal,
    std::vector<core::Signal*>& gang_signals) {
  // Reserve write index for barrier(s) + dispatch packet.
  const uint32_t num_barrier_packet = uint32_t((dep_signals.size() + 4) / 5);
  const uint32_t total_num_packet = num_barrier_packet + 1;

  uint64_t write_index;
  {
    std::lock_guard<std::mutex> lock(reservation_lock_);
    write_index = AcquireWriteIndex(total_num_packet);
    RecordBlitHistory(size, write_index + total_num_packet - 1);
  }

  uint64_t write_index_temp = write_index;

  // Insert barrier packets to handle dependent signals.
  // Barrier bit keeps signal checking traffic from competing with a copy.
  const uint16_t kBarrierPacketHeader = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
      (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
      (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);

  hsa_barrier_and_packet_t barrier_packet = {0};
  barrier_packet.header = HSA_PACKET_TYPE_INVALID;

  hsa_barrier_and_packet_t* queue_buffer =
      reinterpret_cast<hsa_barrier_and_packet_t*>(
          queue_->public_handle()->base_address);

  const size_t dep_signal_count = dep_signals.size();
  for (size_t i = 0; i < dep_signal_count; ++i) {
    const size_t idx = i % 5;
    barrier_packet.dep_signal[idx] = core::Signal::Convert(dep_signals[i]);
    if (i == (dep_signal_count - 1) || idx == 4) {
      std::atomic_thread_fence(std::memory_order_acquire);
      queue_buffer[(write_index)&queue_bitmask_] = barrier_packet;
      std::atomic_thread_fence(std::memory_order_release);
      queue_buffer[(write_index)&queue_bitmask_].header = kBarrierPacketHeader;

      LogPrint(HSA_AMD_LOG_FLAG_BLIT_KERNEL_PKTS,
      "HWq=%p, id=%lu, Barrier Header = "
      "0x%x (type=%d, barrier=%d, acquire=%d, release=%d), "
      "dep_signal=[0x%zx 0x%zx 0x%zx 0x%zx 0x%zx], completion_signal=0x%zx "
      "rptr=%lu, wptr=%lu",
      queue_->public_handle()->base_address, queue_->public_handle()->id,
      kBarrierPacketHeader,
      extractAqlBits(kBarrierPacketHeader,
                    HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE),
      extractAqlBits(kBarrierPacketHeader,
                    HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER),
      extractAqlBits(kBarrierPacketHeader, HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE,
                    HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE),
      extractAqlBits(kBarrierPacketHeader, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE,
                    HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE),
      barrier_packet.dep_signal[0].handle, 
      barrier_packet.dep_signal[1].handle,
      barrier_packet.dep_signal[2].handle,
      barrier_packet.dep_signal[3].handle, 
      barrier_packet.dep_signal[4].handle,
      barrier_packet.completion_signal.handle, 
      queue_->LoadReadIndexRelaxed(), write_index);

      ++write_index;

      memset(&barrier_packet, 0, sizeof(hsa_barrier_and_packet_t));
      barrier_packet.header = HSA_PACKET_TYPE_INVALID;
    }
  }

  // Insert dispatch packet for copy kernel.
  KernelArgs* args = ObtainAsyncKernelCopyArg();
  KernelCode* kernel_code = nullptr;
  int num_workitems = 0;

  bool aligned = ((uintptr_t(src) & 0x3) == (uintptr_t(dst) & 0x3));

  if (aligned) {
    // Use dword-based aligned kernel.
    kernel_code = &kernels_[KernelType::CopyAligned];

    // Compute the size of each copy phase.
    num_workitems = 64 * 4 * num_cus_;

    // Phase 1 (byte copy) ends when destination is 0x100-aligned.
    uintptr_t src_start = uintptr_t(src);
    uintptr_t dst_start = uintptr_t(dst);
    uint64_t phase1_size =
        std::min(size, uint64_t(0x100 - (dst_start & 0xFF)) & 0xFF);

    // Phase 2 (unrolled dwordx4 copy) ends when last whole block fits.
    uint64_t phase2_block = num_workitems * sizeof(uint32_t) *
                            kCopyAlignedUnroll() * kCopyAlignedVecWidth();
    uint64_t phase2_size = ((size - phase1_size) / phase2_block) * phase2_block;

    // Phase 3 (dword copy) ends when last whole dword fits.
    uint64_t phase3_size =
        ((size - phase1_size - phase2_size) / sizeof(uint32_t)) *
        sizeof(uint32_t);

    args->copy_aligned.phase1_src_start = src_start;
    args->copy_aligned.phase1_dst_start = dst_start;
    args->copy_aligned.phase2_src_start = src_start + phase1_size;
    args->copy_aligned.phase2_dst_start = dst_start + phase1_size;
    args->copy_aligned.phase3_src_start = src_start + phase1_size + phase2_size;
    args->copy_aligned.phase3_dst_start = dst_start + phase1_size + phase2_size;
    args->copy_aligned.phase4_src_start =
        src_start + phase1_size + phase2_size + phase3_size;
    args->copy_aligned.phase4_dst_start =
        dst_start + phase1_size + phase2_size + phase3_size;
    args->copy_aligned.phase4_src_end = src_start + size;
    args->copy_aligned.phase4_dst_end = dst_start + size;
    args->copy_aligned.num_workitems = num_workitems;
  } else {
    // Use byte-based misaligned kernel.
    kernel_code = &kernels_[KernelType::CopyMisaligned];

    // Compute the size of each copy phase.
    num_workitems = 64 * 4 * num_cus_;

    // Phase 1 (unrolled byte copy) ends when last whole block fits.
    uintptr_t src_start = uintptr_t(src);
    uintptr_t dst_start = uintptr_t(dst);
    uint64_t phase1_block =
        num_workitems * sizeof(uint8_t) * kCopyMisalignedUnroll();
    uint64_t phase1_size = (size / phase1_block) * phase1_block;

    args->copy_misaligned.phase1_src_start = src_start;
    args->copy_misaligned.phase1_dst_start = dst_start;
    args->copy_misaligned.phase2_src_start = src_start + phase1_size;
    args->copy_misaligned.phase2_dst_start = dst_start + phase1_size;
    args->copy_misaligned.phase2_src_end = src_start + size;
    args->copy_misaligned.phase2_dst_end = dst_start + size;
    args->copy_misaligned.num_workitems = num_workitems;
  }

  hsa_signal_t signal = {(core::Signal::Convert(&out_signal)).handle};
  PopulateQueue(write_index, uintptr_t(kernel_code->code_buf_), args,
                num_workitems, signal);

  // Submit barrier(s) and dispatch packets.
  ReleaseWriteIndex(write_index_temp, total_num_packet);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::SubmitLinearFillCommand(void* ptr, uint32_t value,
                                                 size_t count) {
  std::lock_guard<std::mutex> guard(lock_);

  // Reject misaligned base address.
  if ((uintptr_t(ptr) & 0x3) != 0) {
    return HSA_STATUS_ERROR;
  }

  // Compute the size of each fill phase.
  int num_workitems = 64 * num_cus_;

  // Phase 1 (unrolled dwordx4 copy) ends when last whole block fits.
  uintptr_t dst_start = uintptr_t(ptr);
  uint64_t fill_size = count * sizeof(uint32_t);

  uint64_t phase1_block =
      num_workitems * sizeof(uint32_t) * kFillUnroll() * kFillVecWidth();
  uint64_t phase1_size = (fill_size / phase1_block) * phase1_block;

  KernelArgs* args = ObtainAsyncKernelCopyArg();
  args->fill.phase1_dst_start = dst_start;
  args->fill.phase2_dst_start = dst_start + phase1_size;
  args->fill.phase2_dst_end = dst_start + fill_size;
  args->fill.fill_value = value;
  args->fill.num_workitems = num_workitems;

  // Submit dispatch packet.
  HSA::hsa_signal_store_relaxed(completion_signal_, 1);

  uint64_t write_index;
  {
    std::lock_guard<std::mutex> lock(reservation_lock_);
    write_index = AcquireWriteIndex(1);
    RecordBlitHistory(fill_size, write_index);
  }

  PopulateQueue(write_index, uintptr_t(kernels_[KernelType::Fill].code_buf_),
                args, num_workitems, completion_signal_);

  ReleaseWriteIndex(write_index, 1);

  // Wait for the packet to finish.
  if (HSA::hsa_signal_wait_scacquire(completion_signal_, HSA_SIGNAL_CONDITION_LT, 1, uint64_t(-1),
                                     HSA_WAIT_STATE_ACTIVE) != 0) {
    // Signal wait returned unexpected value.
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::EnableProfiling(bool enable) {
  queue_->SetProfiling(enable);
  return HSA_STATUS_SUCCESS;
}

uint64_t BlitKernel::AcquireWriteIndex(uint32_t num_packet) {
  assert(queue_->public_handle()->size >= num_packet);

  uint64_t write_index = queue_->AddWriteIndexAcqRel(num_packet);

  while (write_index + num_packet - queue_->LoadReadIndexRelaxed() > queue_->public_handle()->size) {
    os::YieldThread();
  }

  return write_index;
}

void BlitKernel::ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet) {
  // Update doorbel register with last packet id.
  core::Signal* doorbell =
      core::Signal::Convert(queue_->public_handle()->doorbell_signal);
  doorbell->StoreRelease(write_index + num_packet - 1);
}

void BlitKernel::PopulateQueue(uint64_t index, uint64_t code_handle, void* args,
                               uint32_t grid_size_x,
                               hsa_signal_t completion_signal) {
  assert(IsMultipleOf(args, 16));

  hsa_kernel_dispatch_packet_t packet = { };

  static const uint16_t kDispatchPacketHeader =
      (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
      (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
      (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);

  packet.header = kInvalidPacketHeader;
  packet.kernel_object = code_handle;
  packet.kernarg_address = args;

  // Setup working size.
  const int kNumDimension = 1;
  packet.setup = kNumDimension << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
  packet.grid_size_x = AlignUp(static_cast<uint32_t>(grid_size_x), 64);
  packet.grid_size_y = packet.grid_size_z = 1;
  packet.workgroup_size_x = 64;
  packet.workgroup_size_y = packet.workgroup_size_z = 1;

  packet.completion_signal = completion_signal;

  // Populate queue buffer with AQL packet.
  hsa_kernel_dispatch_packet_t* queue_buffer =
      reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
          queue_->public_handle()->base_address);
  std::atomic_thread_fence(std::memory_order_acquire);
  queue_buffer[index & queue_bitmask_] = packet;
  std::atomic_thread_fence(std::memory_order_release);
  if (queue_->IsDeviceMemRingBuf() && queue_->needsPcieOrdering()) {
    // Ensure the packet body is written as header may get reordered when writing over PCIE
    _mm_sfence();
  }
  __atomic_store_n(&(queue_buffer[index & queue_bitmask_].full_header),
                    kDispatchPacketHeader | packet.setup << 16, __ATOMIC_RELEASE);

  LogPrint(HSA_AMD_LOG_FLAG_BLIT_KERNEL_PKTS,
    "HWq=%p, id=%lu, Dispatch Header = "
    "0x%x (type=%d, barrier=%d, acquire=%d, release=%d), "
    "setup=%d, grid=[%zu, %zu, %zu], workgroup=[%zu, %zu, %zu], private_seg_size=%zu, "
    "group_seg_size=%zu, kernel_obj=0x%zx, kernarg_address=0x%zx, completion_signal=0x%zx "
    "rptr=%lu, wptr=%lu",
    queue_->public_handle()->base_address, queue_->public_handle()->id,
    kDispatchPacketHeader,
    extractAqlBits(kDispatchPacketHeader,
                   HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE),
    extractAqlBits(kDispatchPacketHeader,
                   HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER),
    extractAqlBits(kDispatchPacketHeader, HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE,
                   HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE),
    extractAqlBits(kDispatchPacketHeader, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE,
                   HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE),
    packet.setup, static_cast<size_t>(packet.grid_size_x), static_cast<size_t>(packet.grid_size_y), static_cast<size_t>(packet.grid_size_z),
    static_cast<size_t>(packet.workgroup_size_x), static_cast<size_t>(packet.workgroup_size_y), static_cast<size_t>(packet.workgroup_size_z),
    static_cast<size_t>(packet.private_segment_size), static_cast<size_t>(packet.group_segment_size),
    packet.kernel_object,reinterpret_cast<uintptr_t>(packet.kernarg_address),
    completion_signal.handle, queue_->LoadReadIndexRelaxed(), index);
}

BlitKernel::KernelArgs* BlitKernel::ObtainAsyncKernelCopyArg() {
  const uint32_t index =
      atomic::Add(&kernarg_async_counter_, 1U, std::memory_order_acquire) & kernarg_async_mask_;

  KernelArgs* arg = &kernarg_async_[index];
  assert(IsMultipleOf(arg, 16));
  return arg;
}

void BlitKernel::RecordBlitHistory(uint64_t size, uint64_t index) {
  uint64_t queued = bytes_queued_;
  bytes_queued_ += size;
  bytes_written_[index & queue_bitmask_].bytes = queued;
  bytes_written_[index & queue_bitmask_].index = index;
  last_queued_ = index;
}

uint64_t BlitKernel::PendingBytes() {
  uint64_t read = queue_->LoadReadIndexRelaxed();
  uint64_t index = pending_search_index_.load();
  uint64_t last = last_queued_;
  // If the last blit command has been run then the blit is empty.
  if (read > last) return 0;

  index = Max(index, read);
  while (index <= last) {
    // Ensure any record we use was not wrapped.
    if (index == bytes_written_[index & queue_bitmask_].index) {
      uint64_t ret = bytes_queued_ - bytes_written_[index & queue_bitmask_].bytes;

      // Store max search index.
      uint64_t old = pending_search_index_.load();
      while (old < index) {
        if (pending_search_index_.compare_exchange_strong(old, index)) break;
      }

      return ret;
    }
    index++;
  }
  debug_warning(false && "Race between PendingBytes and blit submission detected.");
  // Zero is a valid return in this case since the command which was last when the search started is
  // now complete.
  return 0;
}

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_blit_sdma.h"

#include <algorithm>
#include <atomic>
#include <cmath>
#include <cstring>
#include <limits>

#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/runtime.h"
#include "core/inc/sdma_registers.h"
#include "core/inc/signal.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/default_signal.h"

namespace rocr {
namespace AMD {

inline uint32_t ptrlow32(const void* p) {
  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
}

inline uint32_t ptrhigh32(const void* p) {
#if defined(HSA_LARGE_MODEL)
  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p) >> 32);
#else
  return 0;
#endif
}

const size_t BlitSdmaBase::kQueueSize = 1024 * 1024;
const size_t BlitSdmaBase::kCopyPacketSize = sizeof(SDMA_PKT_COPY_LINEAR);
const size_t BlitSdmaBase::kMaxSingleCopySize = SDMA_PKT_COPY_LINEAR::kMaxSize_;
const size_t BlitSdmaBase::kMaxSingleFillSize = SDMA_PKT_CONSTANT_FILL::kMaxSize_;

// Initialize size of various sDMA commands use by this module
template <bool useGCR>
const uint32_t BlitSdma<useGCR>::linear_copy_command_size_ = sizeof(SDMA_PKT_COPY_LINEAR);

template <bool useGCR>
const uint32_t BlitSdma<useGCR>::fill_command_size_ = sizeof(SDMA_PKT_CONSTANT_FILL);

template <bool useGCR>
const uint32_t BlitSdma<useGCR>::fence_command_size_ = sizeof(SDMA_PKT_FENCE);

template <bool useGCR>
const uint32_t BlitSdma<useGCR>::poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);

template <bool useGCR>
const uint32_t BlitSdma<useGCR>::flush_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);

template <bool useGCR>
const uint32_t BlitSdma<useGCR>::atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC);

template <bool useGCR>
const uint32_t BlitSdma<useGCR>::timestamp_command_size_ = sizeof(SDMA_PKT_TIMESTAMP);

template <bool useGCR> const uint32_t BlitSdma<useGCR>::trap_command_size_ = sizeof(SDMA_PKT_TRAP);

template <bool useGCR> const uint32_t BlitSdma<useGCR>::gcr_command_size_ = sizeof(SDMA_PKT_GCR);

template <bool useGCR>
BlitSdma<useGCR>::BlitSdma()
    : agent_(NULL),
      queue_start_addr_(NULL),
      bytes_queued_(0),
      parity_(false),
      cached_reserve_index_(0),
      cached_commit_index_(0),
      platform_atomic_support_(true),
      hdp_flush_support_(false),
      gang_leader_(false),
      is_ganged_(false),
      min_submission_size_(0) {
  std::memset(&queue_resource_, 0, sizeof(queue_resource_));
}

template <bool useGCR> BlitSdma<useGCR>::~BlitSdma() {}

template <bool useGCR>
hsa_status_t BlitSdma<useGCR>::Initialize(const core::Agent& agent, bool use_xgmi,
                                          size_t linear_copy_size_override, int rec_eng) {
  if (queue_start_addr_ != NULL) {
    // Already initialized.
    return HSA_STATUS_SUCCESS;
  }

  if (agent.device_type() != core::Agent::kAmdGpuDevice) {
    return HSA_STATUS_ERROR;
  }

  agent_ = reinterpret_cast<AMD::GpuAgent*>(&const_cast<core::Agent&>(agent));

  if (HSA_PROFILE_FULL == agent_->profile()) {
    assert(false && "Only support SDMA for dgpu currently");
    return HSA_STATUS_ERROR;
  }

  // Some GFX9 devices require a minimum of 64 DWORDS per ring buffer submission.
  if (agent_->supported_isas()[0]->GetVersion() >= core::Isa::Version(9, 0, 0) &&
     (agent_->supported_isas()[0]->GetVersion() <= core::Isa::Version(9, 0, 4) ||
     agent_->supported_isas()[0]->GetVersion() == core::Isa::Version(9, 0, 12))) {
    min_submission_size_ = 256;
  }

  const core::Runtime::LinkInfo& link =
            core::Runtime::runtime_singleton_->GetLinkInfo( agent_->node_id(),
                core::Runtime::runtime_singleton_->cpu_agents()[0]->node_id());
  if (agent_->supported_isas()[0]->GetVersion() == core::Isa::Version(7, 0, 1)) {
    platform_atomic_support_ = false;
  } else {
    platform_atomic_support_ = link.info.atomic_support_64bit;
  }

  // HDP flush supported on gfx900 and forward.
  // gfx90a can support xGMI host to device connections so bypass HDP flush
  // in this case.
  // gfx101x seems to have issues with HDP flushes
  if (agent_->supported_isas()[0]->GetMajorVersion() >= 9 &&
      !(agent_->supported_isas()[0]->GetMajorVersion() == 10 && agent_->supported_isas()[0]->GetMinorVersion() == 1)) {
    hdp_flush_support_ = link.info.link_type != HSA_AMD_LINK_INFO_TYPE_XGMI;
  }

  // Allocate queue buffer.
  queue_start_addr_ =
      (char*)agent_->system_allocator()(kQueueSize, 0x1000, core::MemoryRegion::AllocateExecutable);

  if (queue_start_addr_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  MAKE_NAMED_SCOPE_GUARD(cleanupOnException, [&]() { Destroy(agent); };);
  std::memset(queue_start_addr_, 0, kQueueSize);

  bytes_written_.resize(kQueueSize);

  // Access kernel driver to initialize the queue control block
  // This call binds user mode queue object to underlying compute
  // device. ROCr creates queues that are of two kinds: PCIe optimized
  // and xGMI optimized. Which queue to create is indicated via input
  // boolean flag
  const HSA_QUEUE_TYPE kQueueType_ = rec_eng >= 0 ? HSA_QUEUE_SDMA_BY_ENG_ID :
                                     (use_xgmi ? HSA_QUEUE_SDMA_XGMI : HSA_QUEUE_SDMA);
  if (agent_->driver().CreateQueue(agent_->node_id(), kQueueType_, 100, HSA_QUEUE_PRIORITY_MAXIMUM,
                                   rec_eng, queue_start_addr_, kQueueSize, nullptr,
                                   queue_resource_) != HSA_STATUS_SUCCESS) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  cached_reserve_index_ = *reinterpret_cast<uint64_t*>(queue_resource_.Queue_write_ptr);
  cached_commit_index_ = cached_reserve_index_;

  if (core::g_use_interrupt_wait) {
    signals_[0].reset(new core::InterruptSignal(0));
    signals_[1].reset(new core::InterruptSignal(0));
  } else {
    signals_[0].reset(new core::DefaultSignal(0));
    signals_[1].reset(new core::DefaultSignal(0));
  }

  max_single_linear_copy_size_ = linear_copy_size_override;

  cleanupOnException.Dismiss();
  return HSA_STATUS_SUCCESS;
}

template <bool useGCR> hsa_status_t BlitSdma<useGCR>::Destroy(const core::Agent& agent) {
  // Release all allocated resources and reset them to zero.

  if (queue_resource_.QueueId != 0) {
    // Release queue resources from the kernel
    auto err = agent_->driver().DestroyQueue(queue_resource_.QueueId);
    assert(err == HSA_STATUS_SUCCESS);
    memset(&queue_resource_, 0, sizeof(queue_resource_));
  }

  if (queue_start_addr_ != NULL) {
    // Release queue buffer.
    agent_->system_deallocator()(queue_start_addr_);
  }

  queue_start_addr_ = NULL;
  cached_reserve_index_ = 0;
  cached_commit_index_ = 0;

  signals_[0].reset();
  signals_[1].reset();

  return HSA_STATUS_SUCCESS;
}

template <bool useGCR>
hsa_status_t BlitSdma<useGCR>::SubmitBlockingCommand(const void* cmd, size_t cmd_size,
                                                     uint64_t size) {
  ScopedAcquire<KernelMutex> lock(&lock_);

  // Alternate between completion signals
  // Using two allows overlapping command writing and copies
  core::Signal* completionSignal;
  if (parity_)
    completionSignal = signals_[0].get();
  else
    completionSignal = signals_[1].get();
  parity_ ^= true;

  // Wait for prior operation with this signal to complete
  completionSignal->WaitRelaxed(HSA_SIGNAL_CONDITION_EQ, 0, -1, HSA_WAIT_STATE_BLOCKED);

  // Mark signal as in use, guard against exception leaving the signal in an unusable state.
  completionSignal->StoreRelaxed(2);
  MAKE_SCOPE_GUARD([&]() { completionSignal->StoreRelaxed(0); });
  lock.Release();

  std::vector<core::Signal*> gang_signals(0);

  // Submit command and wait for completion
  hsa_status_t ret =
      SubmitCommand(cmd, cmd_size, size, std::vector<core::Signal*>(), *completionSignal,
                    gang_signals);
  completionSignal->WaitRelaxed(HSA_SIGNAL_CONDITION_EQ, 1, -1, HSA_WAIT_STATE_BLOCKED);
  return ret;
}

template <bool useGCR>
hsa_status_t BlitSdma<useGCR>::SubmitCommand(const void* cmd, size_t cmd_size, uint64_t size,
                                             const std::vector<core::Signal*>& dep_signals,
                                             core::Signal& out_signal,
                                             std::vector<core::Signal*>& gang_signals) {
  uint32_t num_poll_command = 0;

  // Cached copy of dep_signals[i]->LoadRelaxed
  uint64_t dep_signals_value[HSA_MAX_DEP_SIGNALS];

  for (size_t i = 0; i < dep_signals.size(); ++i) {
    // The signal is 64 bit value, and poll checks for 32 bit value.
    // If the signal is already 0, then we do not need to poll.
    // If the upper 32-bits of the signal is 0, then we only need to poll the
    // lower 32-bits
    dep_signals_value[i] = dep_signals[i]->LoadRelaxed();
    if (dep_signals_value[i]) {
      num_poll_command++;
      if (dep_signals_value[i] >> 32)
        num_poll_command++;
    }
  }

  // Workaround for rare-issue on gfx908 where SDMA_OP_POLL_REGMEM returns before
  // polled memory is cleared
  static bool doublePoll = agent_->supported_isas()[0]->GetMajorVersion() == 9 &&
                           agent_->supported_isas()[0]->GetMinorVersion() == 0 &&
                           agent_->supported_isas()[0]->GetStepping() != 10;
  if (doublePoll)
    num_poll_command *= 2;

  const uint32_t total_poll_command_size =
      (num_poll_command * poll_command_size_);

  // Load the profiling state early in case the user disable or enable the
  // profiling in the middle of the call.
  const bool profiling_enabled = agent_->profiling_enabled();

  uint64_t* start_ts_addr = nullptr;
  uint64_t* end_ts_addr = nullptr;
  uint32_t total_timestamp_command_size = 0;

  // Gang leader polls gang item completions and does final decrement or
  // completion of gang signal to prevent race between poll and signal
  // destruction.
  uint32_t total_gang_complete_command_size = poll_command_size_ +
         (platform_atomic_support_ ? atomic_command_size_ : fence_command_size_);
  uint32_t total_gang_command_size = gang_leader_ ?
          static_cast<uint32_t>(gang_signals.size()) * total_gang_complete_command_size : 0;

  if (profiling_enabled && (gang_leader_ || gang_signals.empty())) {
    out_signal.GetSdmaTsAddresses(start_ts_addr, end_ts_addr);
    total_timestamp_command_size = 2 * timestamp_command_size_;
  }

  // On agent that does not support platform atomic, we replace it with
  // one or two fence packet(s) to update the signal value. The reason fence
  // is used and not write packet is because the SDMA engine may overlap a
  // serial copy/write packets.
  const uint64_t completion_signal_value =
      static_cast<uint64_t>(out_signal.LoadRelaxed() - 1);
  const size_t sync_command_size = (platform_atomic_support_)
                                       ? atomic_command_size_
                                       : (completion_signal_value > UINT32_MAX)
                                             ? 2 * fence_command_size_
                                             : fence_command_size_;

  // If the signal is an interrupt signal, we also need to make SDMA engine to
  // send interrupt packet to IH.
  const size_t interrupt_command_size =
      (out_signal.signal_.event_mailbox_ptr != 0)
          ? (fence_command_size_ + trap_command_size_)
          : 0;

  // Add space for acquire or release Hdp flush command
  uint32_t flush_cmd_size = 0;
  if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) {
    if (hdp_flush_support_) {
      flush_cmd_size = flush_command_size_;
    }
  }

  // Add space for cache flush.
  if (useGCR) flush_cmd_size += gcr_command_size_ * 2;

  const uint32_t total_command_size = total_poll_command_size + cmd_size + sync_command_size +
      total_timestamp_command_size + interrupt_command_size + flush_cmd_size + total_gang_command_size;
  const uint32_t pad_size = total_command_size < min_submission_size_ ?
                            min_submission_size_ - total_command_size : 0;

  uint64_t curr_index;
  char* command_addr;
  uint64_t prior_bytes, post_bytes;
  {
    std::lock_guard<std::mutex> lock(reservation_lock_);
    command_addr = AcquireWriteAddress(total_command_size + pad_size, curr_index);
    if (command_addr == nullptr) {
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
    prior_bytes = bytes_queued_;
    bytes_queued_ += size;
    post_bytes = bytes_queued_;
  }
  uint32_t wrapped_index = WrapIntoRing(curr_index);

  for (size_t i = 0; i < dep_signals.size(); ++i) {
    if (dep_signals_value[i]) {
      uint32_t* signal_addr =
          reinterpret_cast<uint32_t*>(dep_signals[i]->ValueLocation());

      if (dep_signals_value[i] >> 32) {
        // Wait for the higher 32 bits to 0.
        BuildPollCommand(command_addr, &signal_addr[1], 0);
        command_addr += poll_command_size_;
        bytes_written_[wrapped_index] = prior_bytes;
        wrapped_index += poll_command_size_;

        if (doublePoll) {
          BuildPollCommand(command_addr, &signal_addr[1], 0);
          command_addr += poll_command_size_;
          bytes_written_[wrapped_index] = prior_bytes;
          wrapped_index += poll_command_size_;
        }
      }
      // Then wait for the lower 32 bits to 0.
      BuildPollCommand(command_addr, &signal_addr[0], 0);
      command_addr += poll_command_size_;
      bytes_written_[wrapped_index] = prior_bytes;
      wrapped_index += poll_command_size_;

      if (doublePoll) {
        BuildPollCommand(command_addr, &signal_addr[0], 0);
        command_addr += poll_command_size_;
        bytes_written_[wrapped_index] = prior_bytes;
        wrapped_index += poll_command_size_;
      }
    }
  }

  if (profiling_enabled && (gang_leader_ || gang_signals.empty())) {
    BuildGetGlobalTimestampCommand(command_addr, reinterpret_cast<void*>(start_ts_addr));
    command_addr += timestamp_command_size_;
    bytes_written_[wrapped_index] = prior_bytes;
    wrapped_index += timestamp_command_size_;
  }

  // Issue a Hdp flush cmd
  if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) {
    if (hdp_flush_support_) {
      BuildHdpFlushCommand(command_addr);
      command_addr += flush_command_size_;
      bytes_written_[wrapped_index] = prior_bytes;
      wrapped_index += flush_command_size_;
    }
  }

  // Issue cache invalidate
  if (useGCR) {
    BuildGCRCommand(command_addr, true);
    command_addr += gcr_command_size_;
    bytes_written_[wrapped_index] = prior_bytes;
    wrapped_index += gcr_command_size_;
  }

  // Do the command after all polls are satisfied.
  memcpy(command_addr, cmd, cmd_size);
  command_addr += cmd_size;
  bytes_written_.fill(wrapped_index, wrapped_index + cmd_size, prior_bytes);
  wrapped_index += cmd_size;

  // Issue cache writeback
  if (useGCR) {
    BuildGCRCommand(command_addr, false);
    command_addr += gcr_command_size_;
    bytes_written_[wrapped_index] = post_bytes;
    wrapped_index += gcr_command_size_;
  }

  if (profiling_enabled && (gang_leader_ || gang_signals.empty())) {
    assert(IsMultipleOf(end_ts_addr, 32));
    BuildGetGlobalTimestampCommand(command_addr,
                                   reinterpret_cast<void*>(end_ts_addr));
    command_addr += timestamp_command_size_;
    bytes_written_[wrapped_index] = post_bytes;
    wrapped_index += timestamp_command_size_;
  }

  // Wait for non-leaders gang items to complete
  if (gang_leader_) {
    for (int i = 0; i < gang_signals.size(); i++) {
      uint32_t* gang_signal_addr =
          reinterpret_cast<uint32_t*>(gang_signals[i]->ValueLocation());
      BuildPollCommand(command_addr, gang_signal_addr, 1);
      command_addr += poll_command_size_;
      bytes_written_[wrapped_index] = prior_bytes;
      wrapped_index += poll_command_size_;

      // After non-leader gang-items have completed, decrement the gang signal value.
      if (platform_atomic_support_) {
        BuildAtomicDecrementCommand(command_addr, gang_signal_addr);
        command_addr += atomic_command_size_;
        bytes_written_[wrapped_index] = post_bytes;
        wrapped_index += atomic_command_size_;
      } else {
        BuildFenceCommand(command_addr, gang_signal_addr, 0);
        command_addr += fence_command_size_;
        bytes_written_[wrapped_index] = post_bytes;
        wrapped_index += fence_command_size_;
      }
    }
  }

  // After transfer is completed, decrement the signal value.
  if (platform_atomic_support_) {
    BuildAtomicDecrementCommand(command_addr, out_signal.ValueLocation());
    command_addr += atomic_command_size_;
    bytes_written_[wrapped_index] = post_bytes;
    wrapped_index += atomic_command_size_;
  } else {
    uint32_t* signal_value_location = reinterpret_cast<uint32_t*>(out_signal.ValueLocation());
    if (completion_signal_value > UINT32_MAX) {
      BuildFenceCommand(command_addr, signal_value_location + 1,
                        static_cast<uint32_t>(completion_signal_value >> 32));
      command_addr += fence_command_size_;
      bytes_written_[wrapped_index] = post_bytes;
      wrapped_index += fence_command_size_;
    }

    BuildFenceCommand(command_addr, signal_value_location,
                      static_cast<uint32_t>(completion_signal_value));
    command_addr += fence_command_size_;
    bytes_written_[wrapped_index] = post_bytes;
    wrapped_index += fence_command_size_;
  }

  // Update mailbox event and send interrupt to IH.
  if (out_signal.signal_.event_mailbox_ptr != 0) {
    BuildFenceCommand(command_addr,
                      reinterpret_cast<uint32_t*>(out_signal.signal_.event_mailbox_ptr),
                      static_cast<uint32_t>(out_signal.signal_.event_id));
    command_addr += fence_command_size_;
    bytes_written_[wrapped_index] = post_bytes;
    wrapped_index += fence_command_size_;

    BuildTrapCommand(command_addr, out_signal.signal_.event_id);
    command_addr += trap_command_size_;
    bytes_written_[wrapped_index] = post_bytes;
    wrapped_index += trap_command_size_;
  }

  // Pad size is DWORD aligned since all commands are dword aligned.
  // Insert NOP header DWORD with value of the number of null DWORDs shifted
  // by 16 bits to pad total submission.
  if (pad_size) {
    memset(command_addr, 0, pad_size);
    uint32_t *dword_command_addr = reinterpret_cast<uint32_t*>(command_addr);
    dword_command_addr[0] = (pad_size/4 - 1) << 16;
  }

  ReleaseWriteAddress(curr_index, total_command_size + pad_size);

  return HSA_STATUS_SUCCESS;
}

template <bool useGCR>
hsa_status_t BlitSdma<useGCR>::SubmitLinearCopyCommand(void* dst, const void* src, size_t size) {
  // Break the copy into multiple copy operation incase the copy size exceeds
  // the SDMA linear copy limit.
  const size_t max_copy_size = max_single_linear_copy_size_ ? max_single_linear_copy_size_ :
                               kMaxSingleCopySize;
  const uint32_t num_copy_command = (size + max_copy_size - 1) / max_copy_size;

  std::vector<SDMA_PKT_COPY_LINEAR> buff(num_copy_command);
  BuildCopyCommand(reinterpret_cast<char*>(&buff[0]), num_copy_command, dst, src, size);

  return SubmitBlockingCommand(&buff[0], buff.size() * sizeof(SDMA_PKT_COPY_LINEAR), size);
}

template <bool useGCR>
hsa_status_t BlitSdma<useGCR>::SubmitLinearCopyCommand(void* dst, const void* src, size_t size,
                                                       std::vector<core::Signal*>& dep_signals,
                                                       core::Signal& out_signal,
                                                       std::vector<core::Signal*>& gang_signals) {
  // Break the copy into multiple copy operations when the copy size exceeds
  // the SDMA linear copy limit.
  const size_t max_copy_size = max_single_linear_copy_size_ ? max_single_linear_copy_size_ :
                               kMaxSingleCopySize;
  const uint32_t num_copy_command = (size + max_copy_size - 1) / max_copy_size;

  // Assemble copy packets.
  std::vector<SDMA_PKT_COPY_LINEAR> buff(num_copy_command);
  BuildCopyCommand(reinterpret_cast<char*>(&buff[0]), num_copy_command, dst, src, size);

  return SubmitCommand(&buff[0], buff.size() * sizeof(SDMA_PKT_COPY_LINEAR), size, dep_signals,
                       out_signal, gang_signals);
}

template <bool useGCR>
hsa_status_t BlitSdma<useGCR>::SubmitCopyRectCommand(
    const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
    const hsa_dim3_t* src_offset, const hsa_dim3_t* range, std::vector<core::Signal*>& dep_signals,
    core::Signal& out_signal) {
  // Hardware requires DWORD alignment for base address, pitches
  // Also confirm that we have a geometric rect (copied block does not wrap an edge).
  if (((uintptr_t)dst->base) % 4 != 0 || ((uintptr_t)src->base) % 4 != 0)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                             "Copy rect base address not aligned.");
  if (((uintptr_t)dst->pitch) % 4 != 0 || ((uintptr_t)src->pitch) % 4 != 0)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect pitch not aligned.");
  if (((uintptr_t)dst->slice) % 4 != 0 || ((uintptr_t)src->slice) % 4 != 0)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect slice not aligned.");
  if (uint64_t(src_offset->x) + range->x > src->pitch ||
      uint64_t(dst_offset->x) + range->x > dst->pitch)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect width out of range.");
  if ((src->slice != 0) && (uint64_t(src_offset->y) + range->y) > src->slice / src->pitch)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect height out of range.");
  if ((dst->slice != 0) && (uint64_t(dst_offset->y) + range->y) > dst->slice / dst->pitch)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect height out of range.");
  if (range->z > 1 && (src->slice == 0 || dst->slice == 0))
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect slice needed.");

  // GFX12 or later use a different packet format that is incompatible (fields changed in size and location).
  const bool isGFX12Plus =
                        (agent_->supported_isas()[0]->GetMajorVersion() >= 12);

  // Common and GFX12 packet must match in size to use same code for vector/append.
  static_assert(sizeof(SDMA_PKT_COPY_LINEAR_RECT) == sizeof(SDMA_PKT_COPY_LINEAR_RECT_GFX12), "");

  const uint max_pitch = 1 << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::pitch_bits : SDMA_PKT_COPY_LINEAR_RECT::pitch_bits);

  std::vector<SDMA_PKT_COPY_LINEAR_RECT> pkts;
  std::vector<uint64_t> bytes_moved;
  auto append = [&](size_t size) {
    assert(size == sizeof(SDMA_PKT_COPY_LINEAR_RECT) && "SDMA packet size missmatch");
    pkts.emplace_back(SDMA_PKT_COPY_LINEAR_RECT());
    return &pkts.back();
  };

  // Do wide pitch 2D copies along X-Z
  if (range->z == 1 && (src->pitch > max_pitch || dst->pitch > max_pitch)) {
    hsa_pitched_ptr_t Src = *src;
    hsa_pitched_ptr_t Dst = *dst;
    hsa_dim3_t Soff = *src_offset;
    hsa_dim3_t Doff = *dst_offset;
    hsa_dim3_t Range = *range;

    Src.base = static_cast<char*>(Src.base) + Soff.z * Src.slice + Soff.y * Src.pitch;
    Dst.base = static_cast<char*>(Dst.base) + Doff.z * Dst.slice + Doff.y * Dst.pitch;
    Soff.y = Soff.z = 0;
    Doff.y = Doff.z = 0;

    Src.slice = Src.pitch;
    Src.pitch = 0;
    Dst.slice = Dst.pitch;
    Dst.pitch = 0;

    Range.z = Range.y;
    Range.y = 1;

    BuildCopyRectCommand(append, &Dst, &Doff, &Src, &Soff, &Range);
  } else {
    BuildCopyRectCommand(append, dst, dst_offset, src, src_offset, range);
  }

  uint64_t size = static_cast<uint64_t>(range->x) * static_cast<uint64_t>(range->y) * range->z;

  std::vector<core::Signal*> gang_signals(0);

  return SubmitCommand(&pkts[0], pkts.size() * sizeof(SDMA_PKT_COPY_LINEAR_RECT), size, dep_signals,
                       out_signal, gang_signals);
}

template <bool useGCR>
hsa_status_t BlitSdma<useGCR>::SubmitLinearFillCommand(void* ptr, uint32_t value, size_t count) {
  const size_t size = count * sizeof(uint32_t);

  const uint32_t num_fill_command = (size + kMaxSingleFillSize - 1) / kMaxSingleFillSize;

  std::vector<SDMA_PKT_CONSTANT_FILL> buff(num_fill_command);
  BuildFillCommand(reinterpret_cast<char*>(&buff[0]), num_fill_command, ptr, value, count);

  return SubmitBlockingCommand(&buff[0], buff.size() * sizeof(SDMA_PKT_CONSTANT_FILL), size);
}

template <bool useGCR> hsa_status_t BlitSdma<useGCR>::EnableProfiling(bool enable) {
  return HSA_STATUS_SUCCESS;
}

template <bool useGCR>
char* BlitSdma<useGCR>::AcquireWriteAddress(uint32_t cmd_size, uint64_t& curr_index) {
  // Ring is full when all but one byte is written.
  if (cmd_size >= kQueueSize) {
    return nullptr;
  }

  while (true) {
    curr_index = atomic::Load(&cached_reserve_index_, std::memory_order_acquire);

    // Check whether a linear region of the requested size is available.
    // If == cmd_size: region is at beginning of ring.
    // If < cmd_size: region intersects end of ring, pad with no-ops and retry.
    if (WrapIntoRing(curr_index + cmd_size) < cmd_size) {
      PadRingToEnd(curr_index);
      continue;
    }

    // Check whether the engine has finished using this region.
    const uint64_t new_index = curr_index + cmd_size;

    if (CanWriteUpto(new_index) == false) {
      // Wait for read index to move and try again.
      os::YieldThread();
      continue;
    }

    // Try to reserve this part of the ring.
    if (atomic::Cas(&cached_reserve_index_, new_index, curr_index, std::memory_order_release) ==
        curr_index) {
      return queue_start_addr_ + WrapIntoRing(curr_index);
    }

    // Another thread reserved curr_index, try again.
    os::YieldThread();
  }

  return nullptr;
}

template <bool useGCR>
void BlitSdma<useGCR>::UpdateWriteAndDoorbellRegister(uint64_t curr_index, uint64_t new_index) {
  while (true) {
    // Make sure that the address before ::curr_index is already released.
    // Otherwise the CP may read invalid packets.
    if (atomic::Load(&cached_commit_index_, std::memory_order_acquire) == curr_index) {
      if (core::Runtime::runtime_singleton_->flag().sdma_wait_idle()) {
        // TODO: remove when sdma wpointer issue is resolved.
        // Wait until the SDMA engine finish processing all packets before
        // updating the wptr and doorbell.
        while (WrapIntoRing(*reinterpret_cast<uint64_t*>(queue_resource_.Queue_read_ptr)) !=
               WrapIntoRing(curr_index)) {
          os::YieldThread();
        }
      }

      // Update write pointer and doorbell register.
      *reinterpret_cast<uint64_t*>(queue_resource_.Queue_write_ptr) = new_index;

      // Ensure write pointer is visible to GPU before doorbell.
      std::atomic_thread_fence(std::memory_order_release);

      *reinterpret_cast<uint64_t*>(queue_resource_.Queue_DoorBell) = new_index;

      atomic::Store(&cached_commit_index_, new_index, std::memory_order_release);
      break;
    }

    // Waiting for another thread to submit preceding commands first.
    os::YieldThread();
  }
}

template <bool useGCR>
void BlitSdma<useGCR>::ReleaseWriteAddress(uint64_t curr_index, uint32_t cmd_size) {
  if (cmd_size > kQueueSize) {
    assert(false && "cmd_addr is outside the queue buffer range");
    return;
  }

  UpdateWriteAndDoorbellRegister(curr_index, curr_index + cmd_size);
}

template <bool useGCR> void BlitSdma<useGCR>::PadRingToEnd(uint64_t curr_index) {
  // Reserve region from here to the end of the ring.
  uint64_t new_index = curr_index + (kQueueSize - WrapIntoRing(curr_index));

  // Check whether the engine has finished using this region.
  if (CanWriteUpto(new_index) == false) {
    // Wait for read index to move and try again.
    return;
  }

  if (atomic::Cas(&cached_reserve_index_, new_index, curr_index, std::memory_order_release) ==
      curr_index) {
    // Write and submit NOP commands in reserved region.
    char* nop_address = queue_start_addr_ + WrapIntoRing(curr_index);
    memset(nop_address, 0, new_index - curr_index);

    // Pad pending bytes tracking
    bytes_written_.fill(WrapIntoRing(curr_index), WrapIntoRing(new_index), bytes_queued_);

    UpdateWriteAndDoorbellRegister(curr_index, new_index);
  }
}

template <bool useGCR> uint32_t BlitSdma<useGCR>::WrapIntoRing(uint64_t index) {
  return index & (kQueueSize - 1);
}

template <bool useGCR> bool BlitSdma<useGCR>::CanWriteUpto(uint64_t upto_index) {
  // Get/calculate the monotonic read index.
  uint64_t hw_read_index = *reinterpret_cast<uint64_t*>(queue_resource_.Queue_read_ptr);

  // Check whether the read pointer has passed the given index.
  // At most we can submit (kQueueSize - 1) bytes at a time.
  return (upto_index - hw_read_index) < kQueueSize;
}

template <bool useGCR>
void BlitSdma<useGCR>::BuildFenceCommand(char* fence_command_addr, uint32_t* fence,
                                         uint32_t fence_value) {
  assert(fence_command_addr != NULL);
  SDMA_PKT_FENCE* packet_addr =
      reinterpret_cast<SDMA_PKT_FENCE*>(fence_command_addr);

  memset(packet_addr, 0, sizeof(SDMA_PKT_FENCE));

  packet_addr->HEADER_UNION.op = SDMA_OP_FENCE;

  if (agent_->supported_isas()[0]->GetMajorVersion() >= 10) {
    packet_addr->HEADER_UNION.mtype = 3;
  }

  packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(fence);

  packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(fence);

  packet_addr->DATA_UNION.data = fence_value;
}

template <bool useGCR>
void BlitSdma<useGCR>::BuildCopyCommand(char* cmd_addr, uint32_t num_copy_command, void* dst,
                                        const void* src, size_t size) {
  size_t cur_size = 0;
  const size_t max_copy_size = max_single_linear_copy_size_ ? max_single_linear_copy_size_ :
                                                              kMaxSingleCopySize;
  for (uint32_t i = 0; i < num_copy_command; ++i) {
    const uint32_t copy_size =
        static_cast<uint32_t>(std::min((size - cur_size), max_copy_size));

    void* cur_dst = static_cast<char*>(dst) + cur_size;
    const void* cur_src = static_cast<const char*>(src) + cur_size;

    SDMA_PKT_COPY_LINEAR* packet_addr =
        reinterpret_cast<SDMA_PKT_COPY_LINEAR*>(cmd_addr);

    memset(packet_addr, 0, sizeof(SDMA_PKT_COPY_LINEAR));

    packet_addr->HEADER_UNION.op = SDMA_OP_COPY;
    packet_addr->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR;

    if (max_copy_size == (1 << 30) -1)
      packet_addr->COUNT_UNION.count_ext.count = copy_size - 1; /* count is 1-based */
    else
      packet_addr->COUNT_UNION.count.count = copy_size - 1; /* count is 1-based */

    packet_addr->SRC_ADDR_LO_UNION.src_addr_31_0 = ptrlow32(cur_src);
    packet_addr->SRC_ADDR_HI_UNION.src_addr_63_32 = ptrhigh32(cur_src);

    packet_addr->DST_ADDR_LO_UNION.dst_addr_31_0 = ptrlow32(cur_dst);
    packet_addr->DST_ADDR_HI_UNION.dst_addr_63_32 = ptrhigh32(cur_dst);

    cmd_addr += linear_copy_command_size_;
    cur_size += copy_size;
  }

  assert(cur_size == size);
}

/*
Copies are done in terms of elements (1, 2, 4, 8, or 16 bytes) and have alignment restrictions.
Elements are coded by the log2 of the element size in bytes (ie. element 0=1 byte, 4=16 byte).
This routine breaks a large rect into tiles that can be handled by hardware.  Pitches and offsets
must be representable in terms of elements in all tiles of the copy.
*/
template <bool useGCR>
void BlitSdma<useGCR>::BuildCopyRectCommand(const std::function<void*(size_t)>& append,
                                            const hsa_pitched_ptr_t* dst,
                                            const hsa_dim3_t* dst_offset,
                                            const hsa_pitched_ptr_t* src,
                                            const hsa_dim3_t* src_offset, const hsa_dim3_t* range) {
  // Returns the index of the first set bit (ie log2 of the largest power of 2 that evenly divides
  // width), the largest element that perfectly covers width.
  // width | 16 ensures that we don't return a higher element than is supported and avoids
  // issues with 0.
  auto maxAlignedElement = [](size_t width) {
    return __builtin_ctz(width | 16);
  };

  // GFX12 or later use a different packet format that is incompatible (fields changed in size and location).
  const bool isGFX12Plus =
                      (agent_->supported_isas()[0]->GetMajorVersion() >= 12);

  // Limits in terms of element count
  const uint32_t max_pitch = 1    << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::pitch_bits   : SDMA_PKT_COPY_LINEAR_RECT::pitch_bits);
  const uint64_t max_slice = 1ULL << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::slice_bits   : SDMA_PKT_COPY_LINEAR_RECT::slice_bits);
  const uint32_t max_x     = 1    << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::rect_xy_bits : SDMA_PKT_COPY_LINEAR_RECT::rect_xy_bits);
  const uint32_t max_y     = 1    << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::rect_xy_bits : SDMA_PKT_COPY_LINEAR_RECT::rect_xy_bits);
  const uint32_t max_z     = 1    << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::rect_z_bits  : SDMA_PKT_COPY_LINEAR_RECT::rect_z_bits);

  // Find maximum element that describes the pitch and slice.
  // Pitch and slice must both be represented in units of elements.  No element larger than this
  // may be used in any tile as the pitches would not be exactly represented.
  int max_ele = Min(maxAlignedElement(src->pitch), maxAlignedElement(dst->pitch));
  if (range->z != 1)  // Only need to consider slice if HW will copy along Z.
    max_ele = Min(max_ele, maxAlignedElement(src->slice), maxAlignedElement(dst->slice));

  /*
  Find the minimum element size that will be needed for any tile.

  No subdivision of a range admits a larger element size for the smallest element in any subdivision
  than the element size that covers the whole range, though some can be worse (this is easily model
  checked).  Subdividing with any element larger than the covering element won't change the covering
  element of the remainder
  ( Range%Element = (Range-N*LargerElement)%Element since LargerElement%Element=0 ).
    Ex. range->x=71, assume max range is 16 elements:  We can break at 64 giving tiles:
    [0,63], [64-70] (width 64 & 7).  64 is covered by element 4 (16B) and 7 is covered by element 0
    (1B).  Exactly covering 71 requires using element 0.

  Base addresses in each tile must be DWORD aligned, if not then the offset from an aligned address
  must be represented in elements.  This may reduce the size of the element, but since elements are
  integer multiples of each other this is harmless.

  src and dst base has already been checked for DWORD alignment so we only need to consider the
  offset here.
  */
  int min_ele = Min(max_ele, maxAlignedElement(range->x), maxAlignedElement(src_offset->x % 4),
                    maxAlignedElement(dst_offset->x % 4));

  // Check that pitch and slice can be represented in the tile with the smallest element
  if ((src->pitch >> min_ele) > max_pitch || (dst->pitch >> min_ele) > max_pitch)
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect pitch out of limits.\n");
  if (range->z != 1) {  // Only need to consider slice if HW will copy along Z.
    if ((src->slice >> min_ele) > max_slice || (dst->slice >> min_ele) > max_slice)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                               "Copy rect slice out of limits.\n");
  }

  // Break copy into tiles
  for (uint32_t z = 0; z < range->z; z += max_z) {
    for (uint32_t y = 0; y < range->y; y += max_y) {
      uint32_t x = 0;
      while (x < range->x) {
        uint32_t width = range->x - x;

        // Get largest element which describes the start of this tile after its base address has
        // been aligned.  Base addresses must be DWORD (4 byte) aligned.
        int aligned_ele = Min(maxAlignedElement((src_offset->x + x) % 4),
                              maxAlignedElement((dst_offset->x + x) % 4), max_ele);

        // Get largest permissible element which exactly covers width
        int element = Min(maxAlignedElement(width), aligned_ele);
        int xcount = width >> element;

        // If width is too large then width is at least max_x bytes (bigger than any element) so
        // drop the width restriction and clip element count to max_x.
        if (xcount > max_x) {
          element = aligned_ele;
          xcount = Min(width >> element, max_x);
        }

        // Get base addresses and offsets for this tile.
        uintptr_t sbase = (uintptr_t)src->base + src_offset->x + x +
            (src_offset->y + y) * src->pitch + (src_offset->z + z) * src->slice;
        uintptr_t dbase = (uintptr_t)dst->base + dst_offset->x + x +
            (dst_offset->y + y) * dst->pitch + (dst_offset->z + z) * dst->slice;
        uint soff = (sbase % 4) >> element;
        uint doff = (dbase % 4) >> element;
        sbase &= ~3ull;
        dbase &= ~3ull;

        x += xcount << element;

        // GFX12 has a different packet format that is incompatible with pre-GFX12.
        if (isGFX12Plus) {
          SDMA_PKT_COPY_LINEAR_RECT_GFX12* pkt =
            (SDMA_PKT_COPY_LINEAR_RECT_GFX12*)append(sizeof(SDMA_PKT_COPY_LINEAR_RECT));
          *pkt = {};
          pkt->HEADER_UNION.op = SDMA_OP_COPY;
          pkt->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR_RECT;
          pkt->HEADER_UNION.element = element;
          pkt->SRC_ADDR_LO_UNION.src_addr_31_0 = sbase;
          pkt->SRC_ADDR_HI_UNION.src_addr_63_32 = sbase >> 32;
          pkt->SRC_PARAMETER_1_UNION.src_offset_x = soff;
          pkt->SRC_PARAMETER_2_UNION.src_pitch = (src->pitch >> element) - 1;
          pkt->SRC_PARAMETER_3_UNION.src_slice_pitch =
            (range->z == 1) ? 0 : (src->slice >> element) - 1;
          pkt->DST_ADDR_LO_UNION.dst_addr_31_0 = dbase;
          pkt->DST_ADDR_HI_UNION.dst_addr_63_32 = dbase >> 32;
          pkt->DST_PARAMETER_1_UNION.dst_offset_x = doff;
          pkt->DST_PARAMETER_2_UNION.dst_pitch = (dst->pitch >> element) - 1;
          pkt->DST_PARAMETER_3_UNION.dst_slice_pitch =
            (range->z == 1) ? 0 : (dst->slice >> element) - 1;
          pkt->RECT_PARAMETER_1_UNION.rect_x = xcount - 1;
          pkt->RECT_PARAMETER_1_UNION.rect_y = Min(range->y - y, max_y) - 1;
          pkt->RECT_PARAMETER_2_UNION.rect_z = Min(range->z - z, max_z) - 1;
        } else {  // Pre-GFX12, common packet used
          SDMA_PKT_COPY_LINEAR_RECT* pkt =
            (SDMA_PKT_COPY_LINEAR_RECT*)append(sizeof(SDMA_PKT_COPY_LINEAR_RECT));
          *pkt = {};
          pkt->HEADER_UNION.op = SDMA_OP_COPY;
          pkt->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR_RECT;
          pkt->HEADER_UNION.element = element;
          pkt->SRC_ADDR_LO_UNION.src_addr_31_0 = sbase;
          pkt->SRC_ADDR_HI_UNION.src_addr_63_32 = sbase >> 32;
          pkt->SRC_PARAMETER_1_UNION.src_offset_x = soff;
          pkt->SRC_PARAMETER_2_UNION.src_pitch = (src->pitch >> element) - 1;
          pkt->SRC_PARAMETER_3_UNION.src_slice_pitch =
            (range->z == 1) ? 0 : (src->slice >> element) - 1;
          pkt->DST_ADDR_LO_UNION.dst_addr_31_0 = dbase;
          pkt->DST_ADDR_HI_UNION.dst_addr_63_32 = dbase >> 32;
          pkt->DST_PARAMETER_1_UNION.dst_offset_x = doff;
          pkt->DST_PARAMETER_2_UNION.dst_pitch = (dst->pitch >> element) - 1;
          pkt->DST_PARAMETER_3_UNION.dst_slice_pitch =
            (range->z == 1) ? 0 : (dst->slice >> element) - 1;
          pkt->RECT_PARAMETER_1_UNION.rect_x = xcount - 1;
          pkt->RECT_PARAMETER_1_UNION.rect_y = Min(range->y - y, max_y) - 1;
          pkt->RECT_PARAMETER_2_UNION.rect_z = Min(range->z - z, max_z) - 1;
	}
      }
    }
  }
}

template <bool useGCR>
void BlitSdma<useGCR>::BuildFillCommand(char* cmd_addr, uint32_t num_fill_command, void* ptr,
                                        uint32_t value, size_t count) {
  char* cur_ptr = reinterpret_cast<char*>(ptr);
  const uint32_t maxDwordCount = kMaxSingleFillSize / sizeof(uint32_t);
  SDMA_PKT_CONSTANT_FILL* packet_addr = reinterpret_cast<SDMA_PKT_CONSTANT_FILL*>(cmd_addr);

  for (uint32_t i = 0; i < num_fill_command; i++) {
    assert(count != 0 && "SDMA fill command count error.");
    const uint32_t fill_count = Min(count, size_t(maxDwordCount));

    memset(packet_addr, 0, sizeof(SDMA_PKT_CONSTANT_FILL));

    packet_addr->HEADER_UNION.op = SDMA_OP_CONST_FILL;
    packet_addr->HEADER_UNION.fillsize = 2;  // DW fill

    packet_addr->DST_ADDR_LO_UNION.dst_addr_31_0 = ptrlow32(cur_ptr);
    packet_addr->DST_ADDR_HI_UNION.dst_addr_63_32 = ptrhigh32(cur_ptr);

    packet_addr->DATA_UNION.src_data_31_0 = value;

    /* count is 1-based */
    packet_addr->COUNT_UNION.count = (fill_count - 1) * sizeof(uint32_t);

    packet_addr++;
    cur_ptr += fill_count * sizeof(uint32_t);
    count -= fill_count;
  }
  assert(count == 0 && "SDMA fill command count error.");
}

template <bool useGCR>
void BlitSdma<useGCR>::BuildPollCommand(char* cmd_addr, void* addr, uint32_t reference) {
  SDMA_PKT_POLL_REGMEM* packet_addr =
      reinterpret_cast<SDMA_PKT_POLL_REGMEM*>(cmd_addr);

  memset(packet_addr, 0, sizeof(SDMA_PKT_POLL_REGMEM));

  packet_addr->HEADER_UNION.op = SDMA_OP_POLL_REGMEM;
  packet_addr->HEADER_UNION.mem_poll = 1;
  packet_addr->HEADER_UNION.func = 0x3;  // IsEqual.
  packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(addr);
  packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(addr);

  packet_addr->VALUE_UNION.value = reference;

  packet_addr->MASK_UNION.mask = 0xffffffff;  // Compare the whole content.

  packet_addr->DW5_UNION.interval = 0x04;
  packet_addr->DW5_UNION.retry_count = 0xfff;  // Retry forever.
}

template <bool useGCR>
void BlitSdma<useGCR>::BuildAtomicDecrementCommand(char* cmd_addr, void* addr) {
  SDMA_PKT_ATOMIC* packet_addr = reinterpret_cast<SDMA_PKT_ATOMIC*>(cmd_addr);

  memset(packet_addr, 0, sizeof(SDMA_PKT_ATOMIC));

  packet_addr->HEADER_UNION.op = SDMA_OP_ATOMIC;
  packet_addr->HEADER_UNION.operation = SDMA_ATOMIC_ADD64;

  packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(addr);
  packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(addr);

  packet_addr->SRC_DATA_LO_UNION.src_data_31_0 = 0xffffffff;
  packet_addr->SRC_DATA_HI_UNION.src_data_63_32 = 0xffffffff;
}

template <bool useGCR>
void BlitSdma<useGCR>::BuildGetGlobalTimestampCommand(char* cmd_addr, void* write_address) {
  SDMA_PKT_TIMESTAMP* packet_addr =
      reinterpret_cast<SDMA_PKT_TIMESTAMP*>(cmd_addr);

  memset(packet_addr, 0, sizeof(SDMA_PKT_TIMESTAMP));

  packet_addr->HEADER_UNION.op = SDMA_OP_TIMESTAMP;
  packet_addr->HEADER_UNION.sub_op = SDMA_SUBOP_TIMESTAMP_GET_GLOBAL;

  packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(write_address);
  packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(write_address);
}

template <bool useGCR> void BlitSdma<useGCR>::BuildTrapCommand(char* cmd_addr, uint32_t event_id) {
  SDMA_PKT_TRAP* packet_addr =
      reinterpret_cast<SDMA_PKT_TRAP*>(cmd_addr);

  memset(packet_addr, 0, sizeof(SDMA_PKT_TRAP));

  packet_addr->HEADER_UNION.op = SDMA_OP_TRAP;
  packet_addr->INT_CONTEXT_UNION.int_ctx = event_id;
}

template <bool useGCR> void BlitSdma<useGCR>::BuildHdpFlushCommand(char* cmd_addr) {
  assert(cmd_addr != NULL);
  SDMA_PKT_POLL_REGMEM* addr = reinterpret_cast<SDMA_PKT_POLL_REGMEM*>(cmd_addr);
  memcpy(addr, &hdp_flush_cmd, flush_command_size_);
}

template <bool useGCR> void BlitSdma<useGCR>::BuildGCRCommand(char* cmd_addr, bool invalidate) {
  assert(cmd_addr != NULL);
  assert(useGCR && "Unsupported SDMA command - GCR.");
  SDMA_PKT_GCR* addr = reinterpret_cast<SDMA_PKT_GCR*>(cmd_addr);
  memset(addr, 0, sizeof(SDMA_PKT_GCR));
  addr->HEADER_UNION.op = SDMA_OP_GCR;
  addr->HEADER_UNION.sub_op = SDMA_SUBOP_USER_GCR;
  addr->WORD2_UNION.GCR_CONTROL_GL2_WB = 1;
  addr->WORD2_UNION.GCR_CONTROL_GLK_WB = 1;
  if (invalidate) {
    addr->WORD2_UNION.GCR_CONTROL_GL2_INV = 1;
    addr->WORD2_UNION.GCR_CONTROL_GL1_INV = 1;
    addr->WORD2_UNION.GCR_CONTROL_GLV_INV = 1;
    addr->WORD2_UNION.GCR_CONTROL_GLK_INV = 1;
  }
  // Discarding all lines for now.
  addr->WORD2_UNION.GCR_CONTROL_GL2_RANGE = 0;
}

template <bool useGCR> uint64_t BlitSdma<useGCR>::PendingBytes() {
  uint64_t commit = atomic::Load(&cached_commit_index_, std::memory_order_acquire);
  uint64_t hw_read_index = *reinterpret_cast<uint64_t*>(queue_resource_.Queue_read_ptr);

  if (commit == hw_read_index) return 0;
  return bytes_queued_ - bytes_written_[WrapIntoRing(hw_read_index)];
}

template class BlitSdma<false>;
template class BlitSdma<true>;

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_cpu_agent.h"

#include <algorithm>
#include <cstring>
#include <thread>

#include "core/inc/amd_memory_region.h"
#include "core/inc/driver.h"
#include "core/inc/host_queue.h"

#include "inc/hsa_ext_image.h"

namespace rocr {
namespace AMD {
CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props,
                   core::DriverType driver_type)
    : core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node, kAmdCpuDevice),
      properties_(node_props) {
  InitRegionList();

  InitCacheList();
}

CpuAgent::~CpuAgent() {
  std::for_each(regions_.begin(), regions_.end(), DeleteObject());
  regions_.clear();
}

void CpuAgent::InitRegionList() {
  const bool is_apu_node = (properties_.NumFComputeCores > 0);

  std::vector<HsaMemoryProperties> mem_props(properties_.NumMemoryBanks);
  if (HSA_STATUS_SUCCESS == driver().GetMemoryProperties(node_id(), mem_props)) {
    std::vector<HsaMemoryProperties>::iterator system_prop =
        std::find_if(mem_props.begin(), mem_props.end(), [](HsaMemoryProperties prop) -> bool {
          return (prop.SizeInBytes > 0 && prop.HeapType == HSA_HEAPTYPE_SYSTEM);
        });

    HsaMemoryProperties system_props;
    std::memset(&system_props, 0, sizeof(HsaMemoryProperties));
    system_props.HeapType = HSA_HEAPTYPE_SYSTEM;
    system_props.SizeInBytes = 0;
    system_props.VirtualBaseAddress = 0;

    if (system_prop != mem_props.end()) system_props = *system_prop;

    // Fine-Grain Memory
    regions_.push_back(new MemoryRegion(true, false, is_apu_node, false, true, this, system_props));

    // Ext-Fine-Grain Memory
    regions_.push_back(new MemoryRegion(false, false, is_apu_node, true, true, this, system_props));

    // Kernargs
    regions_.push_back(new MemoryRegion(true, true, is_apu_node, false, true, this, system_props));

    if (!is_apu_node) {
      // Coarse Grain
      regions_.push_back(new MemoryRegion(false, false, is_apu_node, false, true, this, system_props));
    }
  }
}

void CpuAgent::InitCacheList() {
  // Get CPU cache information.
  cache_props_.resize(properties_.NumCaches);
  if (HSA_STATUS_SUCCESS !=
      driver().GetCacheProperties(node_id(), properties_.CComputeIdLo, cache_props_)) {
    cache_props_.clear();
  } else {
    // Only store CPU D-cache.
    for (size_t cache_id = 0; cache_id < cache_props_.size(); ++cache_id) {
      const HsaCacheType type = cache_props_[cache_id].CacheType;
      if (type.ui32.CPU != 1 || type.ui32.Instruction == 1) {
        cache_props_.erase(cache_props_.begin() + cache_id);
        --cache_id;
      }
    }
  }

  // Update cache objects
  caches_.clear();
  caches_.resize(cache_props_.size());
  char name[64];
  GetInfo(HSA_AGENT_INFO_NAME, name);
  std::string deviceName = name;
  for (size_t i = 0; i < caches_.size(); i++)
    caches_[i].reset(new core::Cache(deviceName + " L" + std::to_string(cache_props_[i].CacheLevel),
                                     cache_props_[i].CacheLevel, cache_props_[i].CacheSize));
}

hsa_status_t CpuAgent::VisitRegion(bool include_peer,
                                   hsa_status_t (*callback)(hsa_region_t region,
                                                            void* data),
                                   void* data) const {
  if (!include_peer) {
    return VisitRegion(regions_, callback, data);
  }

  // Expose all system regions in the system.
  hsa_status_t stat = VisitRegion(
      core::Runtime::runtime_singleton_->system_regions_fine(), callback, data);
  if (stat != HSA_STATUS_SUCCESS) {
    return stat;
  }

  return VisitRegion(core::Runtime::runtime_singleton_->system_regions_coarse(),
                     callback, data);
}

hsa_status_t CpuAgent::VisitRegion(
    const std::vector<const core::MemoryRegion*>& regions,
    hsa_status_t (*callback)(hsa_region_t region, void* data),
    void* data) const {
  for (const core::MemoryRegion* region : regions) {
    if (!region->user_visible()) continue;
    hsa_region_t region_handle = core::MemoryRegion::Convert(region);
    hsa_status_t status = callback(region_handle, data);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t CpuAgent::IterateRegion(
    hsa_status_t (*callback)(hsa_region_t region, void* data),
    void* data) const {
  return VisitRegion(true, callback, data);
}

hsa_status_t CpuAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
                                    void* data) const {
  for (size_t i = 0; i < caches_.size(); i++) {
    hsa_status_t stat = callback(core::Cache::Convert(caches_[i].get()), data);
    if (stat != HSA_STATUS_SUCCESS) return stat;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t CpuAgent::IterateSupportedIsas(
                  hsa_status_t (*callback)(hsa_isa_t isa, void* data),
                                                          void* data) const {
  AMD::callback_t<decltype(callback)> call(callback);
  for (const auto& isa : supported_isas()) {
    hsa_status_t stat = call(core::Isa::Handle(isa), data);
    if (stat != HSA_STATUS_SUCCESS) return stat;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t CpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {

  // agent, and vendor name size limit
  const size_t attribute_u = static_cast<size_t>(attribute);

  switch (attribute_u) {

    // The code copies HsaNodeProperties.MarketingName a Unicode string
    // which is encoded in UTF-16 as a 7-bit ASCII string. The value of
    // HsaNodeProperties.MarketingName is obtained from the "model name"
    // property of /proc/cpuinfo file
    case HSA_AGENT_INFO_NAME:
    case HSA_AMD_AGENT_INFO_PRODUCT_NAME: {
      std::memset(value, 0, HSA_PUBLIC_NAME_SIZE);
      char* temp = reinterpret_cast<char*>(value);
      for (uint32_t idx = 0;
           properties_.MarketingName[idx] != 0 && idx < HSA_PUBLIC_NAME_SIZE - 1; idx++) {
        temp[idx] = (uint8_t)properties_.MarketingName[idx];
      }
      break;
    }
    case HSA_AGENT_INFO_VENDOR_NAME:
      // TODO: hardcode for now, wait until SWDEV-88894 implemented
      std::memset(value, 0, HSA_PUBLIC_NAME_SIZE);
      std::memcpy(value, "CPU", sizeof("CPU"));
      break;
    case HSA_AGENT_INFO_FEATURE:
      *((hsa_agent_feature_t*)value) = static_cast<hsa_agent_feature_t>(0);
      break;
    case HSA_AGENT_INFO_MACHINE_MODEL:
#if defined(HSA_LARGE_MODEL)
      *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE;
#else
      *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL;
#endif
      break;
    case HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES:
    case HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE:
      // TODO: validate if this is true.
      *((hsa_default_float_rounding_mode_t*)value) =
          HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR;
      break;
    case HSA_AGENT_INFO_FAST_F16_OPERATION:
      // TODO: validate if this is true.
      *((bool*)value) = false;
      break;
    case HSA_AGENT_INFO_PROFILE:
      *((hsa_profile_t*)value) = HSA_PROFILE_FULL;
      break;
    case HSA_AGENT_INFO_WAVEFRONT_SIZE:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AGENT_INFO_WORKGROUP_MAX_DIM:
      std::memset(value, 0, sizeof(uint16_t) * 3);
      break;
    case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AGENT_INFO_GRID_MAX_DIM:
      std::memset(value, 0, sizeof(hsa_dim3_t));
      break;
    case HSA_AGENT_INFO_GRID_MAX_SIZE:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AGENT_INFO_FBARRIER_MAX_SIZE:
      // TODO: ?
      *((uint32_t*)value) = 0;
      break;
    case HSA_AGENT_INFO_QUEUES_MAX:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AGENT_INFO_QUEUE_MIN_SIZE:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AGENT_INFO_QUEUE_MAX_SIZE:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AGENT_INFO_QUEUE_TYPE:
      *((hsa_queue_type32_t*)value) = HSA_QUEUE_TYPE_MULTI;
      break;
    case HSA_AGENT_INFO_NODE:
      // TODO: associate with OS NUMA support (numactl / GetNumaProcessorNode).
      *((uint32_t*)value) = node_id();
      break;
    case HSA_AGENT_INFO_DEVICE:
      *((hsa_device_type_t*)value) = HSA_DEVICE_TYPE_CPU;
      break;
    case HSA_AGENT_INFO_CACHE_SIZE: {
      std::memset(value, 0, sizeof(uint32_t) * 4);
      const size_t num_cache = cache_props_.size();
      for (size_t i = 0; i < num_cache; ++i) {
        const uint32_t line_level = cache_props_[i].CacheLevel;
        ((uint32_t*)value)[line_level - 1] = cache_props_[i].CacheSize * 1024;
      }
    } break;
    case HSA_AGENT_INFO_ISA:
      ((hsa_isa_t*)value)->handle = 0;
      break;
    case HSA_AGENT_INFO_EXTENSIONS:
      memset(value, 0, sizeof(uint8_t) * 128);
      break;
    case HSA_AGENT_INFO_VERSION_MAJOR:
      *((uint16_t*)value) = 1;
      break;
    case HSA_AGENT_INFO_VERSION_MINOR:
      *((uint16_t*)value) = 1;
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
      *((uint32_t*)value) = 0;
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
      memset(value, 0, sizeof(uint32_t) * 2);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
      memset(value, 0, sizeof(uint32_t) * 3);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
      *((uint32_t*)value) = 0;
      break;
    case HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES:
    case HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES:
    case HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_CHIP_ID:
      *((uint32_t*)value) = properties_.DeviceId;
      break;
    case HSA_AMD_AGENT_INFO_CACHELINE_SIZE:
      // TODO: hardcode for now.
      *((uint32_t*)value) = 64;
      break;
    case HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT:
      *((uint32_t*)value) = properties_.NumCPUCores;
      break;
    case HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY:
      *((uint32_t*)value) = properties_.MaxEngineClockMhzCCompute;
      break;
    case HSA_AMD_AGENT_INFO_DRIVER_NODE_ID:
      *((uint32_t*)value) = node_id();
      break;
    case HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS:
      *((uint32_t*)value) = static_cast<uint32_t>(
          1 << properties_.Capability.ui32.WatchPointsTotalBits);
      break;
    case HSA_AMD_AGENT_INFO_BDFID:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.LocationId);
      break;
    case HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU:
      *((uint32_t*)value) = static_cast<uint32_t>(
          properties_.NumSIMDPerCU * properties_.MaxWavesPerSIMD);
      break;
    case HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU:
      *((uint32_t*)value) = properties_.NumSIMDPerCU;
      break;
    case HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES:
      *((uint32_t*)value) = properties_.NumShaderBanks;
      break;
    case HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE:
      *((uint32_t*)value) = properties_.NumArrays;
      break;
    case HSA_AMD_AGENT_INFO_HDP_FLUSH:
      *((hsa_amd_hdp_flush_t*)value) = {nullptr, nullptr};
      break;
    case HSA_AMD_AGENT_INFO_DOMAIN:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.Domain);
      break;
    case HSA_AMD_AGENT_INFO_UUID: {
      // At this point CPU devices do not support UUID's.
      char uuid_tmp[] = "CPU-XX";
      snprintf((char*)value, sizeof(uuid_tmp), "%s", uuid_tmp);
      break;
    }
    case HSA_AMD_AGENT_INFO_ASIC_REVISION:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.Capability.ui32.ASICRevision);
      break;
    case HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS:
      assert(regions_.size() != 0 && "No device local memory found!");
      *((bool*)value) = true;
      break;
    case HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY:
      return core::Runtime::runtime_singleton_->GetSystemInfo(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY,
                                                              value);
      break;
    case HSA_AMD_AGENT_INFO_ASIC_FAMILY_ID:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.FamilyID);
      break;
    case HSA_AMD_AGENT_INFO_UCODE_VERSION:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_NUM_SDMA_ENG:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_NUM_SDMA_XGMI_ENG:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_IOMMU_SUPPORT:
      *((hsa_amd_iommu_version_t*)value) = HSA_IOMMU_SUPPORT_NONE;
      break;
    case HSA_AMD_AGENT_INFO_NUM_XCC:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_DRIVER_UID:
      *((uint32_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_NEAREST_CPU:
      ((hsa_agent_t*)value)->handle = 0;
      break;
    case HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES:
      memset(value, 0, sizeof(uint8_t) * 8);
      break;
    case HSA_AMD_AGENT_INFO_AQL_EXTENSIONS:
      memset(value, 0, sizeof(uint8_t) * 8);
      break;
    case HSA_AMD_AGENT_INFO_SCRATCH_LIMIT_MAX:
    case HSA_AMD_AGENT_INFO_SCRATCH_LIMIT_CURRENT:
      *((uint64_t*)value) = 0;
      break;
    case HSA_AMD_AGENT_INFO_CLOCK_COUNTERS:
      memset(value, 0, sizeof(hsa_amd_clock_counters_t));
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      break;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t CpuAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type, uint64_t flags,
                                   core::HsaEventCallback event_callback, void* data,
                                   uint32_t private_segment_size, uint32_t group_segment_size,
                                   core::Queue** queue) {
  // No HW AQL packet processor on CPU device.
  return HSA_STATUS_ERROR;
}

hsa_status_t CpuAgent::DmaCopy(void* dst, core::Agent& dst_agent, const void* src,
                               core::Agent& src_agent, size_t size,
                               std::vector<core::Signal*>& dep_signals, core::Signal& out_signal) {
  // For cpu to cpu, fire and forget a copy thread.
  const bool profiling_enabled = (dst_agent.profiling_enabled() || src_agent.profiling_enabled());
  if (profiling_enabled) out_signal.async_copy_agent(this);
  std::thread(
      [](void* dst, const void* src, size_t size, std::vector<core::Signal*> dep_signals,
         core::Signal* completion_signal, bool profiling_enabled) {
        for (core::Signal* dep : dep_signals) {
          dep->WaitRelaxed(HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
        }

        if (profiling_enabled) {
          core::Runtime::runtime_singleton_->GetSystemInfo(HSA_SYSTEM_INFO_TIMESTAMP,
                                                           &completion_signal->signal_.start_ts);
        }

        memcpy(dst, src, size);

        if (profiling_enabled) {
          core::Runtime::runtime_singleton_->GetSystemInfo(HSA_SYSTEM_INFO_TIMESTAMP,
                                                           &completion_signal->signal_.end_ts);
        }

        completion_signal->SubRelease(1);
      },
      dst, src, size, dep_signals, &out_signal, profiling_enabled)
      .detach();
  return HSA_STATUS_SUCCESS;
}

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_filter_device.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_filter_device.h"

#include <algorithm>
#include <cstring>
#include <vector>
#include <map>
#include <string>
#include <sstream>
#include <iomanip>
#include <iostream>
#include <climits>

#include "core/util/utils.h"
#include "core/inc/runtime.h"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"

namespace rocr {
namespace AMD {

bool RvdFilter::FilterDevices() {
  return core::Runtime::runtime_singleton_->flag().filter_visible_gpus();
}

bool RvdFilter::SelectZeroDevices() {
  const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus();
  return envVal.empty();
}

void RvdFilter::BuildRvdTokenList() {
  // Determine if user has chosen ZERO devices to be surfaced
  const std::string& envVal = core::Runtime::runtime_singleton_->flag().visible_gpus();
  if (envVal.empty()) {
    return;
  }

  // Parse env value into tokens separated by comma (',') delimiter
  std::string token;
  char separator = ',';
  std::stringstream stream(envVal);
  while (getline(stream, token, separator)) {
    std::transform(token.begin(), token.end(), token.begin(), ::toupper);
    token = trim(token);
    rvdTokenList_.push_back(token);
  }
}

void RvdFilter::BuildDeviceUuidList(const std::vector<HsaNodeProperties>& node_props) {
  for (const auto& props : node_props) {
    if (props.NumFComputeCores == 0) {
      continue;
    }

    // For devices whose UUID is zero build a string that
    // will not match user provided value
    if (props.UniqueID == 0) {
      devUuidList_.push_back("Invalid-UUID");
      continue;
    }

    // For devices that support valid UUID values capture UUID
    // value into a upper case hex string of length 16 including
    // leading zeros if necessary
    std::stringstream stream;
    stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
           << props.UniqueID;
    std::string uuidVal(stream.str());
    std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper);
    devUuidList_.push_back(std::move(uuidVal));
  }
}

int32_t RvdFilter::ProcessUuidToken(const std::string& token) {
  // Determine if token exceeds max length of a UUID string
  uint32_t tokenLen = token.length();
  if ((tokenLen < 5) || (tokenLen > 20)) {
    return -1;
  }

  // Track the number of devices user token matches
  int32_t devIdx = -1;
  int32_t compareVal = -1;
  uint32_t numGpus = devUuidList_.size();
  for (uint32_t idx = 0; idx < numGpus; idx++) {
    uint32_t uuidLen = devUuidList_[idx].length();

    // Token could match UUID of another device
    if (tokenLen > uuidLen)
      continue;

    // Token could match as substring of device UUID
    compareVal = token.compare(0, tokenLen, devUuidList_[idx], 0, tokenLen);

    // Check if user Uuid matches with ROCt Uuid
    if (compareVal == 0) {
      if (devIdx != -1) {
        return -1;
      }
      devIdx = idx;
    }
  }

  // Return value includes possibility of both
  // finding or not finding a device
  return devIdx;
}

uint32_t RvdFilter::BuildUsrDeviceList() {
  // Get number of Gpu devices and user specified tokens
  uint32_t numGpus = devUuidList_.size();
  uint32_t loopCnt = std::min(numGpus, uint32_t(rvdTokenList_.size()));

  // Evaluate tokens into device index or UUID values
  int32_t usrIdx = 0;
  int32_t devIdx = -1;
  for (uint32_t idx = 0; idx < loopCnt; idx++) {
    // User token to be evaluated as UUID or device index
    std::string& token = rvdTokenList_[idx];

    // Token encodes a UUID valaue
    if (token.at(0) == 'G') {
      devIdx = ProcessUuidToken(token);
      if (devIdx == -1) {
        return usrDeviceList_.size();
      }

      // Token encodes device index
    } else {
      char* end = nullptr;
      const char* tmp = token.c_str();
      devIdx = std::strtol(tmp, &end, 0);
      if (*end != '\0') {
        return usrDeviceList_.size();
      }
    }

    // Rvd Token evaluates to wrong device index
    if ((devIdx < 0) || (devIdx >= numGpus)) {
      return usrDeviceList_.size();
    }

    // Determine if device index is previously seen
    // Such indices are interpreted as terminators
    bool exists = (usrDeviceList_.find(devIdx) != usrDeviceList_.end());
    if (exists) {
      return usrDeviceList_.size();
    }

    // Add index to the list of devices that will be
    // surfaced upon device enumeration
    usrDeviceList_[devIdx] = usrIdx++;
  }

  return usrDeviceList_.size();
}

uint32_t RvdFilter::GetUsrDeviceListSize() { return usrDeviceList_.size(); }

int32_t RvdFilter::GetUsrDeviceRank(uint32_t roctIdx) {
  const auto& it = usrDeviceList_.find(roctIdx);
  if (it != usrDeviceList_.end()) {
    return it->second;
  }
  return -1;
}

#ifndef NDEBUG
void RvdFilter::SetDeviceUuidList() {
  uint64_t dbgUuid[] = {0xBABABABABABABABA, 0xBABABABABABAABBA, 0xBABABABAABBAABBA,
                        0xBABAABBAABBAABBA, 0xABBAABBAABBAABBA, 0xABBAABBAABBABABA,
                        0xABBAABBABABABABA, 0xABBABABABABABABA};

  // Override or Set Uuid values for the first four devices
  uint32_t numGpus = devUuidList_.size();
  uint32_t numUuids = (sizeof(dbgUuid) / sizeof(uint64_t));
  for (uint32_t idx = 0; (idx < numGpus && (idx < numUuids)); idx++) {
    std::stringstream stream;

    // For devices whose UUID is zero
    if (dbgUuid[idx] == 0) {
      stream << "GPU-XX";
      continue;
    }

    // For devices that support valid UUID values
    stream << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
           << dbgUuid[idx];
    std::string uuidVal(stream.str());
    std::transform(uuidVal.begin(), uuidVal.end(), uuidVal.begin(), ::toupper);
    devUuidList_[idx] = std::move(uuidVal);
  }
}

void RvdFilter::PrintDeviceUuidList() {
  uint32_t numGpus = devUuidList_.size();
  for (uint32_t idx = 0; idx < numGpus; idx++) {
    std::cout << "Dev[" << idx << "]: " << devUuidList_[idx];
    std::cout << std::endl << std::flush;
  }
}

void RvdFilter::PrintUsrDeviceList() {
  // Flip the map values as value indicates surface rank
  for (auto const& elem : usrDeviceList_) {
    std::cout << "UsrDev[" << elem.second << "]: " << elem.first;
    std::cout << std::endl << std::flush;
  }
}

void RvdFilter::PrintRvdTokenList() {
  uint32_t numTokens = rvdTokenList_.size();
  for (uint32_t idx = 0; idx < numTokens; idx++) {
    std::cout << "Token[" << idx << "]: " << rvdTokenList_[idx];
    std::cout << std::endl << std::flush;
  }
}
#endif

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_gpu_agent.h"

#include <algorithm>
#include <atomic>
#include <cstring>
#include <climits>
#include <map>
#include <string>
#include <vector>
#include <memory>
#include <utility>
#include <iomanip>
#include <cmath>

#include "core/inc/amd_aql_queue.h"
#include "core/inc/amd_blit_kernel.h"
#include "core/inc/amd_blit_sdma.h"
#include "core/inc/amd_gpu_pm4.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/default_signal.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/isa.h"
#include "core/inc/runtime.h"
#include "core/util/os.h"
#include "inc/hsa_ext_image.h"
#include "inc/hsa_ven_amd_aqlprofile.h"
#include "inc/hsa_ven_amd_pc_sampling.h"

#include "core/inc/amd_trap_handler_v1.h"
#include "core/inc/amd_blit_shaders.h"
#include "core/inc/hsa_api_trace_int.h"
// Generated header
#include "amd_trap_handler_v2.h"
#include "amd_blit_shaders_v2.h"

#if defined(__linux__)
// libdrm headers
#include <xf86drm.h>
#include <amdgpu.h>
#endif


// Size of scratch (private) segment pre-allocated per thread, in bytes.
#define DEFAULT_SCRATCH_BYTES_PER_THREAD 2048
#define MAX_WAVE_SCRATCH 8387584  // See COMPUTE_TMPRING_SIZE.WAVESIZE
#define MAX_NUM_DOORBELLS 0x400

namespace rocr {

namespace AMD {
const uint64_t CP_DMA_DATA_TRANSFER_CNT_MAX = (1 << 26);

GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode,
                   uint32_t index, core::DriverType driver_type)
    : GpuAgentInt(node, driver_type),
      properties_(node_props),
      current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT),
      scratch_used_large_(0),
      queues_(),
      trap_code_buf_(NULL),
      trap_code_buf_size_(0),
      doorbell_queue_map_(NULL),
      memory_bus_width_(0),
      memory_max_frequency_(0),
      enum_index_(index),
      ape1_base_(0),
      pending_copy_req_ref_(0),
      pending_copy_stat_check_ref_(0),
      sdma_blit_used_mask_(0),
      scratch_limit_async_threshold_(0),
      scratch_cache_(
          [this](void* base, size_t size, bool large) { ReleaseScratch(base, size, large); }),
      trap_handler_tma_region_(NULL),
      rec_sdma_eng_override_(false),
      pcs_hosttrap_data_(),
      pcs_stochastic_data_(),
      xgmi_cpu_gpu_(false),
      large_bar_enabled_(false){
  const bool is_apu_node = (properties_.NumCPUCores > 0);
  profile_ = (is_apu_node) ? HSA_PROFILE_FULL : HSA_PROFILE_BASE;

  if (node_props.Capability.ui32.DoorbellType != 2)
    throw AMD::hsa_exception(HSA_STATUS_ERROR, "Agent creation failed.\nThe GPU node uses a deprecated doorbell type\n");

  hsa_status_t err = driver().GetClockCounters(node_id(), &t0_);
  t1_ = t0_;
  historical_clock_ratio_ = 0.0;
  assert(err == HSA_STATUS_SUCCESS && "hsaGetClockCounters error");

  const core::Isa *isa_base;

  if (node_props.OverrideEngineId.Value != 0) {
     isa_base = core::IsaRegistry::GetIsa(
         core::Isa::Version(node_props.OverrideEngineId.ui32.Major,
                            node_props.OverrideEngineId.ui32.Minor,
                            node_props.OverrideEngineId.ui32.Stepping));
  } else {
     isa_base = core::IsaRegistry::GetIsa(
         core::Isa::Version(node_props.EngineId.ui32.Major,
                            node_props.EngineId.ui32.Minor,
                            node_props.EngineId.ui32.Stepping));
  }

  if (!isa_base) {
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ISA, "Agent creation failed.\nThe GPU node has an unrecognized id.\n");
  }

  rocr::core::IsaFeature sramecc = rocr::core::IsaFeature::Unsupported;
  if (isa_base->IsSrameccSupported()) {
    switch (core::Runtime::runtime_singleton_->flag().sramecc_enable()) {
      case Flag::SRAMECC_DISABLED:
        sramecc = core::IsaFeature::Disabled;
        break;
      case Flag::SRAMECC_ENABLED:
        sramecc = core::IsaFeature::Enabled;
        break;
      case Flag::SRAMECC_DEFAULT:
        sramecc = node_props.Capability.ui32.SRAM_EDCSupport == 1 ? core::IsaFeature::Enabled
                                                                  : core::IsaFeature::Disabled;
        break;
    }
  }

  rocr::core::IsaFeature xnack = rocr::core::IsaFeature::Unsupported;
  if (isa_base->IsXnackSupported()) {
    // TODO: This needs to be obtained form KFD once HMM implemented.
    xnack = xnack_mode ? core::IsaFeature::Enabled
                      : core::IsaFeature::Disabled;
  }

  if (node_props.OverrideEngineId.Value != 0) {
    isa_ = (core::Isa*)core::IsaRegistry::GetIsa(
          core::Isa::Version(node_props.OverrideEngineId.ui32.Major, node_props.OverrideEngineId.ui32.Minor,
                             node_props.OverrideEngineId.ui32.Stepping), sramecc, xnack);
  } else {
  // Set instruction set architecture via node property, only on GPU device.
    isa_ = (core::Isa*)core::IsaRegistry::GetIsa(
          core::Isa::Version(node_props.EngineId.ui32.Major, node_props.EngineId.ui32.Minor,
                             node_props.EngineId.ui32.Stepping), sramecc, xnack);
  }

  assert(isa_ != nullptr && "ISA registry inconsistency.");

  supported_isas_.push_back(isa_);
  if (!isa_->GetIsaGeneric().empty()) {
    supported_isas_.push_back(core::IsaRegistry::GetIsa(isa_->GetIsaGeneric()));
  }

  current_coherency_type((profile_ == HSA_PROFILE_FULL)
                             ? HSA_AMD_COHERENCY_TYPE_COHERENT
                             : HSA_AMD_COHERENCY_TYPE_NONCOHERENT);

  max_queues_ = core::Runtime::runtime_singleton_->flag().max_queues();
#if !defined(HSA_LARGE_MODEL) || !defined(__linux__)
  if (max_queues_ == 0) {
    max_queues_ = 10;
  }
  max_queues_ = std::min(10U, max_queues_);
#else
  if (max_queues_ == 0) {
    max_queues_ = 128;
  }
  max_queues_ = std::min(128U, max_queues_);
#endif

  // Initialize libdrm device handle
  InitLibDrm();

#if !defined(__linux__)
  wallclock_frequency_ = 0;
#else
  bool model_enabled;
  hsa_status_t status = driver().IsModelEnabled(&model_enabled);
  assert(status == HSA_STATUS_SUCCESS && "IsModelEnabled failed");
  if (model_enabled) {
    wallclock_frequency_ = 0;
  } else {
    // Get wallclock freq
    err = driver().GetWallclockFrequency(node_id(), &wallclock_frequency_);
    if (err != HSA_STATUS_SUCCESS) {
      throw AMD::hsa_exception(err, "Agent creation failed.\nGetWallclockFrequency error.\n");
    }
  }
#endif

  auto& first_cpu = core::Runtime::runtime_singleton_->cpu_agents()[0];
  auto link_info = core::Runtime::runtime_singleton_->GetLinkInfo(first_cpu->node_id(), node_id());
  xgmi_cpu_gpu_ = (link_info.info.link_type == HSA_AMD_LINK_INFO_TYPE_XGMI);

  if (link_info.num_hop >= 1) {
    large_bar_enabled_ = true;
  }

  // Populate region list.
  InitRegionList();

  // Populate cache list.
  InitCacheList();

  // Initialize thresholds for async-scratch handling
  InitAsyncScratchThresholds();
}

GpuAgent::~GpuAgent() {
  std::for_each(regions_.begin(), regions_.end(), DeleteObject());
  regions_.clear();
}

void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_target,
                              void*& code_buf, size_t& code_buf_size) const {
  // Select precompiled shader implementation from name/target.
  struct ASICShader {
    const void* code;
    size_t size;
    int num_sgprs;
    int num_vgprs;
  };

  struct CompiledShader {
    ASICShader compute_7;
    ASICShader compute_8;
    ASICShader compute_9;
    ASICShader compute_90a;
    ASICShader compute_942;
    ASICShader compute_1010;
    ASICShader compute_10;
    ASICShader compute_11;
    ASICShader compute_12;
  };

  std::map<std::string, CompiledShader> compiled_shaders = {
      {"TrapHandler",
       {
           {NULL, 0, 0, 0},                                                 // gfx7
           {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4},            // gfx8
           {kCodeTrapHandler9, sizeof(kCodeTrapHandler9), 2, 4},            // gfx9
           {kCodeTrapHandler90a, sizeof(kCodeTrapHandler90a), 2, 4},        // gfx90a
           {NULL, 0, 0, 0},                                                 // gfx942
           {kCodeTrapHandler1010, sizeof(kCodeTrapHandler1010), 2, 4},      // gfx1010
           {kCodeTrapHandler10, sizeof(kCodeTrapHandler10), 2, 4},          // gfx10
           {NULL, 0, 0, 0},                                                 // gfx11
           // GFX12_TODO: Using one for GFX10 for now.
           //             If NULL is used (like GFX11), get an assert.
           {kCodeTrapHandler10, sizeof(kCodeTrapHandler10), 2, 4},          // gfx12
       }},
      {"TrapHandlerKfdExceptions",
       {
           {NULL, 0, 0, 0},                                                 // gfx7
           {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4},            // gfx8
           {kCodeTrapHandlerV2_9, sizeof(kCodeTrapHandlerV2_9), 2, 4},      // gfx9
           {kCodeTrapHandlerV2_9, sizeof(kCodeTrapHandlerV2_9), 2, 4},      // gfx90a
           {kCodeTrapHandlerV2_942, sizeof(kCodeTrapHandlerV2_942), 2, 4},  // gfx942
           {kCodeTrapHandlerV2_1010, sizeof(kCodeTrapHandlerV2_1010), 2, 4},// gfx1010
           {kCodeTrapHandlerV2_10, sizeof(kCodeTrapHandlerV2_10), 2, 4},    // gfx10
           {kCodeTrapHandlerV2_11, sizeof(kCodeTrapHandlerV2_11), 2, 4},    // gfx11
           {kCodeTrapHandlerV2_12, sizeof(kCodeTrapHandlerV2_12), 2, 4},    // gfx12
       }},
      {"CopyAligned",
       {
           {kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12},          // gfx7
           {kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12},          // gfx8
           {kCodeCopyAligned9, sizeof(kCodeCopyAligned9), 32, 12},          // gfx9
           {kCodeCopyAligned9, sizeof(kCodeCopyAligned9), 32, 12},          // gfx90a
           {kCodeCopyAligned9, sizeof(kCodeCopyAligned9), 32, 12},          // gfx942
           {kCodeCopyAligned10, sizeof(kCodeCopyAligned10), 32, 12},        // gfx1010
           {kCodeCopyAligned10, sizeof(kCodeCopyAligned10), 32, 12},        // gfx10
           {kCodeCopyAligned11, sizeof(kCodeCopyAligned11), 32, 12},        // gfx11
           {kCodeCopyAligned12, sizeof(kCodeCopyAligned12), 32, 12},        // gfx12
       }},
      {"CopyMisaligned",
       {
           {kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10},    // gfx7
           {kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10},    // gfx8
           {kCodeCopyMisaligned9, sizeof(kCodeCopyMisaligned9), 23, 10},    // gfx9
           {kCodeCopyMisaligned9, sizeof(kCodeCopyMisaligned9), 23, 10},    // gfx90a
           {kCodeCopyMisaligned9, sizeof(kCodeCopyMisaligned9), 23, 10},    // gfx942
           {kCodeCopyMisaligned10, sizeof(kCodeCopyMisaligned10), 23, 10},  // gfx1010
           {kCodeCopyMisaligned10, sizeof(kCodeCopyMisaligned10), 23, 10},  // gfx10
           {kCodeCopyMisaligned11, sizeof(kCodeCopyMisaligned11), 23, 10},  // gfx11
           {kCodeCopyMisaligned12, sizeof(kCodeCopyMisaligned12), 23, 10},  // gfx12
       }},
      {"Fill",
       {
           {kCodeFill7, sizeof(kCodeFill7), 19, 8},                         // gfx7
           {kCodeFill8, sizeof(kCodeFill8), 19, 8},                         // gfx8
           {kCodeFill9, sizeof(kCodeFill9), 19, 8},                         // gfx9
           {kCodeFill9, sizeof(kCodeFill9), 19, 8},                         // gfx90a
           {kCodeFill9, sizeof(kCodeFill9), 19, 8},                         // gfx942
           {kCodeFill10, sizeof(kCodeFill10), 19, 8},                       // gfx1010
           {kCodeFill10, sizeof(kCodeFill10), 19, 8},                       // gfx10
           {kCodeFill11, sizeof(kCodeFill11), 19, 8},                       // gfx11
           {kCodeFill12, sizeof(kCodeFill12), 19, 8},                       // gfx12
       }}};

  auto compiled_shader_it = compiled_shaders.find(func_name);
  assert(compiled_shader_it != compiled_shaders.end() &&
         "Precompiled shader unavailable");

  ASICShader* asic_shader = NULL;

  switch (isa_->GetMajorVersion()) {
    case 7:
      asic_shader = &compiled_shader_it->second.compute_7;
      break;
    case 8:
      asic_shader = &compiled_shader_it->second.compute_8;
      break;
    case 9:
      if((isa_->GetMinorVersion() == 0) && (isa_->GetStepping() == 10)) {
        asic_shader = &compiled_shader_it->second.compute_90a;
      } else if(isa_->GetMinorVersion() == 4 || isa_->GetMinorVersion() == 5) {
        asic_shader = &compiled_shader_it->second.compute_942;
      } else {
        asic_shader = &compiled_shader_it->second.compute_9;
      }
      break;
    case 10:
      if(isa_->GetMinorVersion() == 1)
        asic_shader = &compiled_shader_it->second.compute_1010;
      else
        asic_shader = &compiled_shader_it->second.compute_10;
      break;
    case 11:
        asic_shader = &compiled_shader_it->second.compute_11;
      break;
    case 12:
        asic_shader = &compiled_shader_it->second.compute_12;
      break;
    default:
      assert(false && "Precompiled shader unavailable for target");
  }

  // Allocate a GPU-visible buffer for the shader.
  size_t header_size =
      (assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0);
  code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000);

  code_buf = system_allocator()(code_buf_size, 0x1000,
    core::MemoryRegion::AllocateExecutable | core::MemoryRegion::AllocateExecutableBlitKernelObject);
  assert(code_buf != NULL && "Code buffer allocation failed");

  memset(code_buf, 0, code_buf_size);

  // Populate optional code object header.
  if (assemble_target == AssembleTarget::AQL) {
    amd_kernel_code_t* header = reinterpret_cast<amd_kernel_code_t*>(code_buf);

    int gran_sgprs = std::max(0, (int(asic_shader->num_sgprs) - 1) / 8);
    int gran_vgprs = std::max(0, (int(asic_shader->num_vgprs) - 1) / 4);

    header->kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
    AMD_HSA_BITS_SET(header->kernel_code_properties,
                     AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
                     1);
    AMD_HSA_BITS_SET(header->compute_pgm_rsrc1,
                     AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT,
                     gran_sgprs);
    AMD_HSA_BITS_SET(header->compute_pgm_rsrc1,
                     AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT,
                     gran_vgprs);
    AMD_HSA_BITS_SET(header->compute_pgm_rsrc1,
                     AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 3);
    AMD_HSA_BITS_SET(header->compute_pgm_rsrc1,
                     AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 1);
    AMD_HSA_BITS_SET(header->compute_pgm_rsrc2,
                     AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 2);
    AMD_HSA_BITS_SET(header->compute_pgm_rsrc2,
                     AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 1);

    // gfx90a, gfx942, gfx950
    if ((isa_->GetMajorVersion() == 9) &&
        (((isa_->GetMinorVersion() == 0) && (isa_->GetStepping() == 10)) ||
        (isa_->GetMinorVersion() == 4 || isa_->GetMinorVersion() == 5))) {
      // Program COMPUTE_PGM_RSRC3.ACCUM_OFFSET for 0 ACC VGPRs on gfx90a.
      // FIXME: Assemble code objects from source at build time
      int gran_accvgprs = ((gran_vgprs + 1) * 8) / 4 - 1;
      header->max_scratch_backing_memory_byte_size = uint64_t(gran_accvgprs) << 32;
    }
  }

  // Copy shader code into the GPU-visible buffer.
  memcpy((void*)(uintptr_t(code_buf) + header_size), asic_shader->code,
         asic_shader->size);
}

void GpuAgent::ReleaseShader(void* code_buf, size_t code_buf_size) const {
  system_deallocator()(code_buf);
}

void GpuAgent::InitRegionList() {
  const bool is_apu_node = (properties_.NumCPUCores > 0);

  std::vector<HsaMemoryProperties> mem_props(properties_.NumMemoryBanks);
  if (HSA_STATUS_SUCCESS == driver().GetMemoryProperties(node_id(), mem_props)) {
    for (uint32_t mem_idx = 0; mem_idx < properties_.NumMemoryBanks;
         ++mem_idx) {
      // Ignore the one(s) with unknown size.
      if (mem_props[mem_idx].SizeInBytes == 0) {
        continue;
      }

      switch (mem_props[mem_idx].HeapType) {
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
          if (!is_apu_node) {
            mem_props[mem_idx].VirtualBaseAddress = 0;
          }

          memory_bus_width_ = mem_props[mem_idx].Width;
          memory_max_frequency_ = mem_props[mem_idx].MemoryClockMax;
        case HSA_HEAPTYPE_GPU_LDS:
        case HSA_HEAPTYPE_GPU_SCRATCH: {
          MemoryRegion* region =
              new MemoryRegion(false, false, false, false, true, this, mem_props[mem_idx]);

          regions_.push_back(region);

          if (region->IsLocalMemory()) {
            // Extended Fine-Grain memory
            if (!(isa_->GetMajorVersion() == 12 && isa_->GetMinorVersion() == 0))
              regions_.push_back(
                  new MemoryRegion(false, false, false, true, true, this, mem_props[mem_idx]));

            // Expose VRAM as uncached/fine grain over PCIe (if enabled) or XGMI.
            bool user_visible = (properties_.HiveID != 0) ||
                core::Runtime::runtime_singleton_->flag().fine_grain_pcie();

            regions_.push_back(new MemoryRegion(true, false, false, false, user_visible, this,
                                                mem_props[mem_idx]));
          }
          break;
        }
        case HSA_HEAPTYPE_SYSTEM:
          if (is_apu_node) {
            memory_bus_width_ = mem_props[mem_idx].Width;
            memory_max_frequency_ = mem_props[mem_idx].MemoryClockMax;
          }
          break;
        case HSA_HEAPTYPE_MMIO_REMAP:
          // Remap offsets defined in kfd_ioctl.h
          HDP_flush_.HDP_MEM_FLUSH_CNTL = (uint32_t*)mem_props[mem_idx].VirtualBaseAddress;
          HDP_flush_.HDP_REG_FLUSH_CNTL = HDP_flush_.HDP_MEM_FLUSH_CNTL + 1;
          break;
        default:
          continue;
      }
    }
  }
}

void GpuAgent::InitScratchPool() {
  scratch_per_thread_ =
      core::Runtime::runtime_singleton_->flag().scratch_mem_size();
  if (scratch_per_thread_ == 0)
    scratch_per_thread_ = DEFAULT_SCRATCH_BYTES_PER_THREAD;

  // Scratch length is: waves/CU * threads/wave * queues * #CUs *
  // scratch/thread
  const uint32_t num_cu =
      properties_.NumFComputeCores / properties_.NumSIMDPerCU;
  queue_scratch_len_ = AlignUp(32 * 64 * num_cu * scratch_per_thread_, 65536);
  size_t max_scratch_len = queue_scratch_len_ * max_queues_;

#if defined(HSA_LARGE_MODEL) && defined(__linux__)
  // For 64-bit linux use max queues unless otherwise specified
  if ((max_scratch_len == 0) || (max_scratch_len > MaxScratchDevice())) {
    max_scratch_len = MaxScratchDevice();  // 4GB per XCC aperture max
  }
#endif

  void* scratch_base = nullptr;
  hsa_status_t err = driver().AllocateScratchMemory(node_id(), max_scratch_len, &scratch_base);
  assert(err == HSA_STATUS_SUCCESS && "AllocateScratchMemory failed");
  assert(IsMultipleOf(scratch_base, 0x1000) &&
         "Scratch base is not page aligned!");

  scratch_pool_. ~SmallHeap();
  if (HSA_STATUS_SUCCESS == err) {
    new (&scratch_pool_) SmallHeap(scratch_base, max_scratch_len);
  } else {
    new (&scratch_pool_) SmallHeap();
  }
}

void GpuAgent::InitAsyncScratchThresholds() {
  if (!AsyncScratchReclaimEnabled()) return;

  scratch_limit_async_threshold_ =
      core::Runtime::runtime_singleton_->flag().scratch_single_limit_async();

  if (!scratch_limit_async_threshold_) {
    // User did not set env var HSA_SCRATCH_SINGLE_LIMIT_ASYNC
    scratch_limit_async_threshold_ =
      core::Runtime::runtime_singleton_->flag().DEFAULT_SCRATCH_SINGLE_LIMIT_ASYNC_PER_XCC *
      (uint64_t)(properties().NumXcc);
  }
}

void GpuAgent::ReserveScratch()
{
  size_t reserved_sz = core::Runtime::runtime_singleton_->flag().scratch_single_limit();
  if (reserved_sz > MaxScratchDevice()) {
    fprintf(stdout, "User specified scratch limit exceeds device limits (requested:%lu max:%lu)!\n",
                    reserved_sz, MaxScratchDevice());
    reserved_sz = MaxScratchDevice();
  }

  size_t available;
  hsa_status_t err = driver().AvailableMemory(node_id(), &available);
  assert(err == HSA_STATUS_SUCCESS && "AvailableMemory failed");
  ScopedAcquire<KernelMutex> lock(&scratch_lock_);
  if (!scratch_cache_.reserved_bytes() && reserved_sz && available > 8 * reserved_sz) {
    HSAuint64 alt_va;
    void* reserved_base = scratch_pool_.alloc(reserved_sz);
    assert(reserved_base && "Could not allocate reserved memory");

    if (driver().MakeMemoryResident(reserved_base, reserved_sz, &alt_va) == HSA_STATUS_SUCCESS)
      scratch_cache_.reserve(reserved_sz, reserved_base);
    else
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Reserve scratch memory failed.");
  }
}

void GpuAgent::InitCacheList() {
  // Get GPU cache information.
  // Similar to getting CPU cache but here we use FComputeIdLo.
  cache_props_.resize(properties_.NumCaches);
  if (HSA_STATUS_SUCCESS !=
      driver().GetCacheProperties(node_id(), properties_.FComputeIdLo, cache_props_)) {
    cache_props_.clear();
  } else {
    // Only store GPU D-cache.
    for (size_t cache_id = 0; cache_id < cache_props_.size(); ++cache_id) {
      const HsaCacheType type = cache_props_[cache_id].CacheType;
      if (type.ui32.HSACU != 1 || type.ui32.Instruction == 1) {
        cache_props_.erase(cache_props_.begin() + cache_id);
        --cache_id;
      }
    }
  }

  // Update cache objects
  caches_.clear();
  caches_.resize(cache_props_.size());
  char name[64];
  GetInfo(HSA_AGENT_INFO_NAME, name);
  std::string deviceName = name;
  for (size_t i = 0; i < caches_.size(); i++)
    caches_[i].reset(new core::Cache(deviceName + " L" + std::to_string(cache_props_[i].CacheLevel),
                                     cache_props_[i].CacheLevel, cache_props_[i].CacheSize));
}

void GpuAgent::InitLibDrm() {
  hsa_status_t status;

  HsaAMDGPUDeviceHandle device_handle;
  status = driver().GetDeviceHandle(node_id(), &device_handle);
  if (status != HSA_STATUS_SUCCESS)
    throw AMD::hsa_exception(status,
                             "Agent creation failed.\nlibdrm get device handle failed.\n");

  ldrm_dev_ = (amdgpu_device_handle)device_handle;
}

hsa_status_t GpuAgent::IterateRegion(
    hsa_status_t (*callback)(hsa_region_t region, void* data),
    void* data) const {
  return VisitRegion(true, callback, data);
}

hsa_status_t GpuAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
                                    void* data) const {
  AMD::callback_t<decltype(callback)> call(callback);
  for (size_t i = 0; i < caches_.size(); i++) {
    hsa_status_t stat = call(core::Cache::Convert(caches_[i].get()), data);
    if (stat != HSA_STATUS_SUCCESS) return stat;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::IterateSupportedIsas(
                    hsa_status_t (*callback)(hsa_isa_t isa, void* data),
                                                          void* data) const {
  AMD::callback_t<decltype(callback)> call(callback);
  for (const auto& isa : supported_isas()) {
    hsa_status_t stat = call(core::Isa::Handle(isa), data);
    if (stat != HSA_STATUS_SUCCESS) return stat;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::VisitRegion(bool include_peer,
                                   hsa_status_t (*callback)(hsa_region_t region,
                                                            void* data),
                                   void* data) const {
  if (include_peer) {
    // Only expose system, local, and LDS memory of the blit agent.
    const auto& gpu_ids = core::Runtime::runtime_singleton_->gpu_ids();
    for (auto& gpu_id : gpu_ids) {
      if (this->node_id() == gpu_id) {
        hsa_status_t stat = VisitRegion(regions_, callback, data);
        if (stat != HSA_STATUS_SUCCESS) {
          return stat;
        }
      }
    }

    // Also expose system regions accessible by this agent.
    hsa_status_t stat =
        VisitRegion(core::Runtime::runtime_singleton_->system_regions_fine(),
                    callback, data);
    if (stat != HSA_STATUS_SUCCESS) {
      return stat;
    }

    return VisitRegion(
        core::Runtime::runtime_singleton_->system_regions_coarse(), callback,
        data);
  }

  // Only expose system, local, and LDS memory of this agent.
  return VisitRegion(regions_, callback, data);
}

hsa_status_t GpuAgent::VisitRegion(
    const std::vector<const core::MemoryRegion*>& regions,
    hsa_status_t (*callback)(hsa_region_t region, void* data),
    void* data) const {
  AMD::callback_t<decltype(callback)> call(callback);
  for (const core::MemoryRegion* region : regions) {
    if (!region->user_visible()) continue;

    const AMD::MemoryRegion* amd_region =
        reinterpret_cast<const AMD::MemoryRegion*>(region);

    // Only expose system, local, and LDS memory.
    if (amd_region->IsSystem() || amd_region->IsLocalMemory() ||
        amd_region->IsLDS()) {
      hsa_region_t region_handle = core::MemoryRegion::Convert(region);
      hsa_status_t status = call(region_handle, data);
      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }
    }
  }

  return HSA_STATUS_SUCCESS;
}

core::Queue* GpuAgent::CreateInterceptibleQueue(void (*callback)(hsa_status_t status,
                                                                 hsa_queue_t* source, void* data),
                                                void* data, const uint32_t in_size) {
  // Disabled intercept of internal queues pending tools updates.
  core::Queue* queue = nullptr;
  uint32_t size = std::max(in_size, minAqlSize_);
  size = std::min(size, maxAqlSize_);

  QueueCreate(size, HSA_QUEUE_TYPE_MULTI, HSA_AMD_QUEUE_CREATE_SYSTEM_MEM, callback, data, 0, 0,
              &queue);
  if (queue != nullptr)
    core::Runtime::runtime_singleton_->InternalQueueCreateNotify(core::Queue::Convert(queue),
                                                                 this->public_handle());
  return queue;
}

core::Blit* GpuAgent::CreateBlitSdma(bool use_xgmi, int rec_eng) {
  AMD::BlitSdmaBase* sdma;
  size_t copy_size_override = 0;
  const size_t copy_size_overrides[2] = {0x3fffff, 0x3fffffff};

  switch (isa_->GetMajorVersion()) {
    case 9:
      sdma = new BlitSdmaV4();
      copy_size_override = (isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 10) ?
                            copy_size_overrides[1] : copy_size_overrides[0];
      break;
    case 10:
      sdma = new BlitSdmaV5();
      copy_size_override = isa_->GetMinorVersion() < 3 ? copy_size_overrides[0] :
                                                         copy_size_overrides[1];
      break;
    case 11:
    case 12:
      sdma = new BlitSdmaV5();
      copy_size_override = copy_size_overrides[1];
      break;
    default:
      assert(false && "Unexpected device major version.");
      return nullptr;
  }

  Flag::SDMA_OVERRIDE copy_size_override_setting =
    core::Runtime::runtime_singleton_->flag().enable_sdma_copy_size_override();
  if (copy_size_override_setting == Flag::SDMA_DISABLE) copy_size_override = 0;

  rec_eng = uses_rec_sdma_eng_id_mask_ || !use_xgmi ? rec_eng : -1;

  if (sdma->Initialize(*this, use_xgmi, copy_size_override, rec_eng) != HSA_STATUS_SUCCESS) {
    sdma->Destroy(*this);
    delete sdma;
    sdma = nullptr;
  }

  return sdma;
}

core::Blit* GpuAgent::CreateBlitKernel(core::Queue* queue) {
  AMD::BlitKernel* kernl = new AMD::BlitKernel(queue);

  if (kernl->Initialize(*this) != HSA_STATUS_SUCCESS) {
    kernl->Destroy(*this);
    delete kernl;
    kernl = NULL;
  }

  return kernl;
}

void GpuAgent::InitDma() {
  // Setup lazy init pointers on queues and blits.
  auto queue_lambda = [this](HSA_QUEUE_PRIORITY priority = HSA_QUEUE_PRIORITY_NORMAL) {
    auto queue = CreateInterceptibleQueue();
    if (queue == nullptr)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                               "Internal queue creation failed.");

    if (priority != HSA_QUEUE_PRIORITY_NORMAL)
      if (queue->SetPriority(priority) != HSA_STATUS_SUCCESS)
        throw AMD::hsa_exception(HSA_STATUS_ERROR,
                                "Failed to increase queue priority for PC Sampling");
    return queue;
  };

  // Dedicated compute queue for host-to-device blits.
  queues_[QueueBlitOnly].reset(queue_lambda);
  // Share utility queue with device-to-host blits.
  queues_[QueueUtility].reset(queue_lambda);

  // Dedicated compute queue for PC Sampling CP-DMA commands. We need a dedicated queue that runs at
  // highest priority because we do not want the CP-DMA commands to be delayed/blocked due to
  // other dispatches/barriers that could be in the other AQL queues.
  queues_[QueuePCSampling].reset([queue_lambda]() { return queue_lambda(HSA_QUEUE_PRIORITY_MAXIMUM); });

  // Decide which engine to use for blits.
  auto blit_lambda = [this](bool use_xgmi, lazy_ptr<core::Queue>& queue, bool isHostToDev, uint32_t rec_eng) {
    Flag::SDMA_OVERRIDE sdma_override = core::Runtime::runtime_singleton_->flag().enable_sdma();

    // User SDMA queues are unstable on gfx8 and unsupported on gfx1013.
    bool use_sdma =
        ((isa_->GetMajorVersion() != 8) && (isa_->GetVersion() != std::make_tuple(10, 1, 3)));
    if (sdma_override != Flag::SDMA_DEFAULT) use_sdma = (sdma_override == Flag::SDMA_ENABLE);

    if (use_sdma && (HSA_PROFILE_BASE == profile_)) {
      // On gfx90a ensure that HostToDevice queue is created first and so is placed on SDMA0.
      if ((!use_xgmi) && (!isHostToDev) && (isa_->GetMajorVersion() == 9) &&
          (isa_->GetMinorVersion() == 0) && (isa_->GetStepping() == 10)) {
        GetBlitObject(BlitHostToDev);
        *blits_[BlitHostToDev];
      }

      // gfx94x is more efficient with reverse order of SDMA0/1 for host<->device copies
      if (!use_xgmi && isa_->GetMajorVersion() == 9 && isa_->GetMinorVersion() >= 4)
        rec_eng = (rec_eng + 1) % properties_.NumSdmaEngines;

      // Check support for targeted SDMA engines
      auto kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version;
      if (!(kfd_version.KernelInterfaceMajorVersion > 1 ||
            (kfd_version.KernelInterfaceMajorVersion == 1 &&
             kfd_version.KernelInterfaceMinorVersion >= 17)))
        rec_eng = -1;

      // Observing strange behavior when fixing host<->device engines
      // on GFX9 devices older than GFX90a, so bypass engine fix.
      if (!use_xgmi && isa_->GetMajorVersion() == 9 && isa_->GetMinorVersion() == 0
          && isa_->GetStepping() < 10)
        rec_eng = -1;

      // devices without dedicated xGMI SDMA engines should not target specific
      // SDMA engines for queue creation as resources are limited
      if (!properties_.NumSdmaXgmiEngines)
        rec_eng = -1;

      auto ret = CreateBlitSdma(use_xgmi, rec_eng);
      if (ret != nullptr) return ret;
    }

    // pending_copy_stat_check_ref_ will prevent unnecessary compute queue creation
    // since there is no graceful way to handle lazy loading when the caller needs to know
    // the status of available SDMA HW resources without a fallback.
    // Call to isSDMA should be used as a proxy error check if !blit_copy_fallback.
    auto ret = pending_copy_stat_check_ref_ ? new AMD::BlitKernel(NULL) :
                                              CreateBlitKernel((*queue).get());
    if (ret == nullptr)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Blit creation failed.");
    return ret;
  };

  // Determine and instantiate the number of blit objects to
  // engage. The total number is sum of three plus number of
  // sdma-xgmi engines
  uint32_t blit_cnt_ = DefaultBlitCount + properties_.NumSdmaXgmiEngines;
  blits_.resize(blit_cnt_);

  // Initialize blit objects used for D2D, H2D, D2H, and
  // P2P copy operations.
  // -- Blit at index BlitDevToDev(0) deals with copies within
  //    local framebuffer and always engages a Blit Kernel
  // -- Blit at index BlitHostToDev(1) deals with copies from
  //    Host to Device (H2D) and could engage either a Blit
  //    Kernel or sDMA
  // -- Blit at index BlitDevToHost(2) deals with copies from
  //    Device to Host (D2H) and Peer to Peer (P2P) over PCIe.
  //    It could engage either a Blit Kernel or sDMA
  // -- Blit at index DefaultBlitCount(3) and beyond deal
  //    exclusively P2P over xGMI links
  blits_[BlitDevToDev].reset([this]() {
    auto ret = CreateBlitKernel((*queues_[QueueUtility]).get());
    if (ret == nullptr)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Blit creation failed.");
    return ret;
  });
  blits_[BlitHostToDev].reset(
      [blit_lambda, this]() { return blit_lambda(false, queues_[QueueBlitOnly], true, 0); });
  blits_[BlitDevToHost].reset(
      [blit_lambda, this]() { return blit_lambda(false, queues_[QueueUtility], false, 1); });

  // XGMI engines.
  for (uint32_t idx = DefaultBlitCount; idx < blit_cnt_; idx++) {
    const int eng = idx - 1;
    blits_[idx].reset(
        [blit_lambda, this, eng]() { return blit_lambda(true, queues_[QueueUtility], false, eng); });
  }

  // GWS queues.
  InitGWS();
}

void GpuAgent::InitGWS() {
  gws_queue_.queue_.reset([this]() {
    if (properties_.NumGws == 0) return (core::Queue*)nullptr;
    const uint32_t defaultGWSQueueSize = 0x4000; // 16KB
    std::unique_ptr<core::Queue> queue(CreateInterceptibleQueue(defaultGWSQueueSize));
    if (queue == nullptr)
      throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                               "Internal queue creation failed.");

    auto err = static_cast<AqlQueue*>(queue.get())->EnableGWS(1);
    if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "GWS allocation failed.");

    gws_queue_.ref_ct_ = 0;
    return queue.release();
  });
}

void GpuAgent::GWSRelease() {
  ScopedAcquire<KernelMutex> lock(&gws_queue_.lock_);
  gws_queue_.ref_ct_--;
  if (gws_queue_.ref_ct_ != 0) return;
  InitGWS();
}

void GpuAgent::PreloadBlits() {
  for (auto& blit : blits_) {
    blit.touch();
  }
}

void GpuAgent::ReleaseResources() {
  if (this->Enabled()) {
    this->Disable();
    for (auto& blit : blits_) {
      if (!blit.empty()) {
        hsa_status_t status = blit->Destroy(*this);
        assert(status == HSA_STATUS_SUCCESS);
      }
    }

    if (ape1_base_ != 0) {
      _aligned_free(reinterpret_cast<void*>(ape1_base_));
    }

    scratch_cache_.trim(true);
    scratch_cache_.free_reserve();

    if (scratch_pool_.base() != NULL) {
      driver().FreeMemory(scratch_pool_.base(), scratch_pool_.size());
    }

    for (int i = 0; i < QueueCount; i++)
      queues_[i].reset();

    system_deallocator()(doorbell_queue_map_);

    if (trap_code_buf_ != NULL)
      system_deallocator()(trap_code_buf_);
  }
}

hsa_status_t GpuAgent::PostToolsInit() {
  // Defer memory allocation until agents have been discovered.
  InitAllocators();
  InitScratchPool();
  BindTrapHandler();
  InitDma();

  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::DmaCopy(void* dst, const void* src, size_t size) {
  return blits_[BlitDevToDev]->SubmitLinearCopyCommand(dst, src, size);
}

void GpuAgent::SetCopyRequestRefCount(bool set) {
  ScopedAcquire<KernelMutex> lock(&blit_lock_);
  while (pending_copy_stat_check_ref_) {
    blit_lock_.Release();
    os::YieldThread();
    blit_lock_.Acquire();
  }
  if (!set && pending_copy_req_ref_) pending_copy_req_ref_--;
  else pending_copy_req_ref_++;
}

void GpuAgent::SetCopyStatusCheckRefCount(bool set) {
  ScopedAcquire<KernelMutex> lock(&blit_lock_);
  while (pending_copy_req_ref_) {
    blit_lock_.Release();
    os::YieldThread();
    blit_lock_.Acquire();
  }
  if (!set && pending_copy_stat_check_ref_) pending_copy_stat_check_ref_--;
  else pending_copy_stat_check_ref_++;
}

// Assign direct peer gang factor to GPU
void GpuAgent::RegisterGangPeer(core::Agent& peer, unsigned int max_bandwidth_factor) {
  gang_peers_info_[peer.public_handle().handle] = max_bandwidth_factor;
}

// Assign direct peer recommended SDMA engine IDs to GPU
void GpuAgent::RegisterRecSdmaEngIdMaskPeer(core::Agent& peer, uint32_t rec_sdma_eng_id_mask) {
  auto kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version;
  bool rec_eng_enabled = core::Runtime::runtime_singleton_->flag().enable_sdma_recommended_eng() !=
                         Flag::SDMA_DISABLE;

  // Assume all recommended masks with single recommended engine (IsPowerOfTwo)
  // will only support targeting that engine and will not gang.
  // Also assume support is uniform for every device in the system.
  uses_rec_sdma_eng_id_mask_ = (kfd_version.KernelInterfaceMajorVersion > 1 ||
                                 (kfd_version.KernelInterfaceMajorVersion == 1 &&
                                  kfd_version.KernelInterfaceMinorVersion >= 17)) &&
                               isa_->GetMajorVersion() == 9 && isa_->GetMinorVersion() >= 4 &&
                               IsPowerOfTwo(rec_sdma_eng_id_mask) && rec_eng_enabled;

  rec_sdma_eng_id_peers_info_[peer.public_handle().handle] = uses_rec_sdma_eng_id_mask_ ?
                                                             rec_sdma_eng_id_mask : 0;
}

// Destroy gang signal
static bool GangCopyCompleteHandler(hsa_signal_value_t, void *arg ) {
  core::Signal *gang_signal = reinterpret_cast<core::Signal*>(arg);
  if (gang_signal->IsValid()) {
    gang_signal->DestroySignal();
    if (!gang_signal->IsValid()) {
      return false;
    }
  }
  return true;
}

hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent,
                               const void* src, core::Agent& src_agent,
                               size_t size,
                               std::vector<core::Signal*>& dep_signals,
                               core::Signal& out_signal) {
  // Recommended SDMA engine copies only have gang factor 1
  uint32_t rec_sdma_eng = ffs(rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle]);

  if (rec_sdma_eng)
    return DmaCopyOnEngine(dst, dst_agent, src, src_agent, size,
                           dep_signals, out_signal, rec_sdma_eng, false);

  if (profiling_enabled()) {
    // Track the agent so we could translate the resulting timestamp to system
    // domain correctly.
    out_signal.async_copy_agent(core::Agent::Convert(this->public_handle()));
  }

  // Calculate the number of gang items
  unsigned int gang_factor = 1;
  if (core::Runtime::runtime_singleton_->flag().enable_sdma_gang() != Flag::SDMA_DISABLE &&
      size >= 4096 && dst_agent.device_type() == core::Agent::kAmdGpuDevice)
    gang_factor = gang_peers_info_[dst_agent.public_handle().handle];
  // Use non-D2D (auxillary) SDMA engines in the event of xGMI D2D support
  // when xGMI SDMA context is not available.
  bool has_aux_gang = gang_factor > 1 &&
                      gang_factor >= properties_.NumSdmaEngines &&
                      !!!properties_.NumSdmaXgmiEngines;
  if (gang_factor > 1) {
    gang_factor = has_aux_gang ?
                      std::min(gang_factor, properties_.NumSdmaEngines) :
                      std::min(gang_factor, properties_.NumSdmaXgmiEngines);
  }

  ScopedAcquire<KernelMutex> lock(&sdma_gang_lock_);
  // Manage internal gang signals
  std::vector<core::Signal*> gang_signals;
  if (gang_factor > 1) {
    for (int i = 0; i < gang_factor - 1; i++) {
      core::Signal *gang_signal;

      // Initial value is 2 where 1 is for gang-leader to ack and
      // 1 for non-leader gang item to decrement
      gang_signal = new core::DefaultSignal(2);

      // Fall back to non-gang copy
      if (!gang_signal->IsValid()) {
        for (int j = 0; j < gang_signals.size(); j++) gang_signals[j]->DestroySignal();
        gang_factor = 1;
        break;
      }

      core::Runtime::runtime_singleton_->SetAsyncSignalHandler(
                                         core::Signal::Convert(gang_signal),
                                         HSA_SIGNAL_CONDITION_EQ, 0, GangCopyCompleteHandler,
                                         reinterpret_cast<void*>(gang_signal));
      gang_signals.push_back(gang_signal);
    }
  }

  // Bind the Blit object that will drive this copy operation
  size_t offset = 0, remainder_size = size;
  int gang_sig_count = 0;
  for (int i = 0; i < gang_factor; i++) {
    // Set leader and gang status to blit
    SetCopyRequestRefCount(true);
    MAKE_SCOPE_GUARD([&]() { SetCopyRequestRefCount(false); });
    lazy_ptr<core::Blit>& blit = gang_factor > 1 ?
                                 (has_aux_gang ? blits_[i + 1] : blits_[i + DefaultBlitCount]) :
                                 GetBlitObject(dst_agent, src_agent, size);
    blit->GangLeader(gang_factor > 1 && !i);

    hsa_status_t stat;
    size_t chunk = std::min(remainder_size, (size + gang_factor - 1)/gang_factor);
    if (!blit->GangLeader() && !gang_signals.empty()) {
      stat = blit->SubmitLinearCopyCommand(reinterpret_cast<uint8_t*>(dst) + offset,
                                           reinterpret_cast<const uint8_t*>(src) + offset,
                                           chunk, dep_signals,
                                           *gang_signals[gang_sig_count], gang_signals);
      gang_sig_count++;
    } else {
      stat = blit->SubmitLinearCopyCommand(reinterpret_cast<uint8_t*>(dst) + offset,
                                           reinterpret_cast<const uint8_t*>(src) + offset,
                                           chunk, dep_signals,
                                           out_signal, gang_signals);
    }

    if (stat)
      return stat;

    offset += chunk;
    remainder_size -= chunk;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::DmaCopyOnEngine(void* dst, core::Agent& dst_agent,
                               const void* src, core::Agent& src_agent,
                               size_t size,
                               std::vector<core::Signal*>& dep_signals,
                               core::Signal& out_signal,
                               int engine_offset,
                               bool force_copy_on_sdma) {
  // At this point it is guaranteed that one of
  // the two devices is a GPU, potentially both
  assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
          (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
         ("Both devices are CPU agents which is not expected"));

  if (engine_offset > properties_.NumSdmaEngines + properties_.NumSdmaXgmiEngines) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // check if dst and src are the same gpu or over xGMI.
  bool is_same_gpu = (src_agent.public_handle().handle == dst_agent.public_handle().handle) &&
                     (dst_agent.public_handle().handle == public_handle_.handle);

  bool is_p2p = !is_same_gpu && src_agent.device_type() == core::Agent::kAmdGpuDevice &&
                                dst_agent.device_type() == core::Agent::kAmdGpuDevice;

  if ((is_p2p &&
      core::Runtime::runtime_singleton_->flag().enable_peer_sdma() == Flag::SDMA_DISABLE) ||
      core::Runtime::runtime_singleton_->flag().enable_sdma() == Flag::SDMA_DISABLE) {
    // Note  that VDI/HIP will call DmaCopy instead of DmaCopyOnEngine for P2P copies, but
    // we still want to handle force Blit Kernels in this function in case other libraries
    // decide to use DmaCopyOnEngine for P2P copies

    engine_offset = BlitDevToDev;
  } else {
    bool is_xgmi = is_p2p && dst_agent.HiveId() && src_agent.HiveId() == dst_agent.HiveId() &&
                         properties_.NumSdmaXgmiEngines;

    // Due to a RAS issue, GFX90a can only support H2D copies on SDMA0
    bool is_h2d_blit = (src_agent.device_type() == core::Agent::kAmdCpuDevice &&
      dst_agent.device_type() == core::Agent::kAmdGpuDevice);
    bool limit_h2d_blit = isa_->GetVersion() == core::Isa::Version(9, 0, 10);

    // Ensure engine selection is within proper range based on transfer type
    if ((is_xgmi && !rec_sdma_eng_override_ && engine_offset <= properties_.NumSdmaEngines) ||
        (!is_xgmi && engine_offset > (properties_.NumSdmaEngines +
                                      properties_.NumSdmaXgmiEngines)) ||
          (!is_h2d_blit && !is_same_gpu && limit_h2d_blit &&
            engine_offset == BlitHostToDev)) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    engine_offset = is_same_gpu && !force_copy_on_sdma ? BlitDevToDev : engine_offset;
  }

  SetCopyRequestRefCount(true);
  MAKE_SCOPE_GUARD([&]() { SetCopyRequestRefCount(false); });
  lazy_ptr<core::Blit>& blit = GetBlitObject(engine_offset);

  if (profiling_enabled()) {
    // Track the agent so we could translate the resulting timestamp to system
    // domain correctly.
    out_signal.async_copy_agent(core::Agent::Convert(this->public_handle()));
  }

  std::vector<core::Signal*> gang_signals(0);

  hsa_status_t stat = blit->SubmitLinearCopyCommand(dst, src, size, dep_signals, out_signal,
                                                    gang_signals);

  return stat;
}

bool GpuAgent::DmaEngineIsFree(uint32_t engine_offset) {
  SetCopyStatusCheckRefCount(true);
  MAKE_SCOPE_GUARD([&]() { SetCopyStatusCheckRefCount(false); });
  bool is_free = !!!(sdma_blit_used_mask_ & (1 << engine_offset)) ||
                    (blits_[engine_offset]->isSDMA() &&
                     !!!blits_[engine_offset]->PendingBytes());
  return is_free;
}

hsa_status_t GpuAgent::DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_agent,
                                     uint32_t *engine_ids_mask) {
  assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
          (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
         ("Both devices are CPU agents which is not expected"));

  *engine_ids_mask = 0;
  if (src_agent.device_type() == core::Agent::kAmdGpuDevice &&
                   dst_agent.device_type() == core::Agent::kAmdGpuDevice &&
                     dst_agent.HiveId() && src_agent.HiveId() == dst_agent.HiveId() &&
                       properties_.NumSdmaXgmiEngines) {
    //Find a free xGMI SDMA engine
    if (rec_sdma_eng_override_) {
      for (int i = 0; i < (properties_.NumSdmaEngines + properties_.NumSdmaXgmiEngines); i++) {
        if (DmaEngineIsFree(BlitHostToDev + i)) {
          *engine_ids_mask |= (HSA_AMD_SDMA_ENGINE_0 << i);
        }
      }
    } else {
      for (int i = 0; i < properties_.NumSdmaXgmiEngines; i++) {
        if (DmaEngineIsFree(DefaultBlitCount + i)) {
          *engine_ids_mask |= (HSA_AMD_SDMA_ENGINE_2 << i);
        }
      }
    }
  } else {
    bool is_h2d_blit = (src_agent.device_type() == core::Agent::kAmdCpuDevice &&
      dst_agent.device_type() == core::Agent::kAmdGpuDevice);
    // Due to a RAS issue, GFX90a can only support H2D copies on SDMA0
    bool limit_h2d_blit = isa_->GetVersion() == core::Isa::Version(9, 0, 10);

    // Check if H2D is free
    if (DmaEngineIsFree(BlitHostToDev)) {
      if (is_h2d_blit || !limit_h2d_blit) {
        *engine_ids_mask |= HSA_AMD_SDMA_ENGINE_0;
      }
    }

    // Check is D2H is free
    if (DmaEngineIsFree(BlitDevToHost)) {
      *engine_ids_mask |= properties_.NumSdmaEngines > 1 ?
                          HSA_AMD_SDMA_ENGINE_1 :
                          HSA_AMD_SDMA_ENGINE_0;
    }
    // Find a free xGMI SDMA engine for H2D/D2H though it may be lower bandwidth
    for (int i = 0; i < properties_.NumSdmaXgmiEngines; i++) {
      if (DmaEngineIsFree(DefaultBlitCount + i)) {
         *engine_ids_mask |= (HSA_AMD_SDMA_ENGINE_2 << i);
      }
    }
  }

  return !!(*engine_ids_mask) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}

hsa_status_t GpuAgent::DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
                                          uint32_t *recommended_ids_mask) {
  assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
          (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
         ("Both devices are CPU agents which is not expected"));

  *recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle];

  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset,
                                   const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
                                   const hsa_dim3_t* range, hsa_amd_copy_direction_t dir,
                                   std::vector<core::Signal*>& dep_signals,
                                   core::Signal& out_signal) {
  if (isa_->GetMajorVersion() < 9) return HSA_STATUS_ERROR_INVALID_AGENT;

  SetCopyRequestRefCount(true);
  MAKE_SCOPE_GUARD([&]() { SetCopyRequestRefCount(false); });
  lazy_ptr<core::Blit>& blit = GetBlitObject((dir == hsaHostToDevice) ? BlitHostToDev :
                                                                        BlitDevToHost);

  if (!blit->isSDMA()) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  if (profiling_enabled()) {
    // Track the agent so we could translate the resulting timestamp to system
    // domain correctly.
    out_signal.async_copy_agent(core::Agent::Convert(this->public_handle()));
  }

  BlitSdmaBase* sdmaBlit = static_cast<BlitSdmaBase*>((*blit).get());
  hsa_status_t stat = sdmaBlit->SubmitCopyRectCommand(dst, dst_offset, src, src_offset, range,
                                                      dep_signals, out_signal);

  return stat;
}

hsa_status_t GpuAgent::DmaFill(void* ptr, uint32_t value, size_t count) {
  return blits_[BlitDevToDev]->SubmitLinearFillCommand(ptr, value, count);
}

hsa_status_t GpuAgent::EnableDmaProfiling(bool enable) {
  for (auto& blit : blits_) {
    if (!blit.empty()) {
      const hsa_status_t stat = blit->EnableProfiling(enable);
      if (stat != HSA_STATUS_SUCCESS) {
        return stat;
      }
    }
  }

  if (enable) CheckClockTicks();

  return HSA_STATUS_SUCCESS;
}

void GpuAgent::GetInfoMemoryProperties(uint8_t value[8]) const {
  auto setFlag = [&](uint32_t bit) {
    assert(bit < 8 * 8 && "Flag value exceeds input parameter size");

    uint index = bit / 8;
    uint subBit = bit % 8;
    ((uint8_t*)value)[index] |= 1 << subBit;
  };

  // Fill the HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU flag
  if (properties_.Integrated)
      setFlag(HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU);
}

hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
  // agent, and vendor name size limit
  const size_t attribute_u = static_cast<size_t>(attribute);
  // agent, and vendor name length limit excluding terminating nul character.
  constexpr size_t hsa_name_size = 63;

  const bool isa_has_image_support =
      (isa_->GetMajorVersion() == 9 &&
      (isa_->GetMinorVersion() == 4 || isa_->GetMinorVersion() == 5)) ? false : true;

  switch (attribute_u) {
    case HSA_AGENT_INFO_NAME: {
      std::string name = isa_->GetProcessorName();
      assert(name.size() <= hsa_name_size);
      std::memset(value, 0, hsa_name_size);
      char* temp = reinterpret_cast<char*>(value);
      std::strcpy(temp, name.c_str());
      break;
    }
    case HSA_AGENT_INFO_VENDOR_NAME:
      std::memset(value, 0, hsa_name_size);
      std::memcpy(value, "AMD", sizeof("AMD"));
      break;
    case HSA_AGENT_INFO_FEATURE:
      *((hsa_agent_feature_t*)value) = HSA_AGENT_FEATURE_KERNEL_DISPATCH;
      break;
    case HSA_AGENT_INFO_MACHINE_MODEL:
#if defined(HSA_LARGE_MODEL)
      *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE;
#else
      *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL;
#endif
      break;
    case HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES:
    case HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE:
      *((hsa_default_float_rounding_mode_t*)value) =
          HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR;
      break;
    case HSA_AGENT_INFO_FAST_F16_OPERATION:
      if (isa_->GetMajorVersion() >= 8) {
        *((bool*)value) = true;
      } else {
        *((bool*)value) = false;
      }
      break;
    case HSA_AGENT_INFO_PROFILE:
      *((hsa_profile_t*)value) = profile_;
      break;
    case HSA_AGENT_INFO_WAVEFRONT_SIZE:
      *((uint32_t*)value) = properties_.WaveFrontSize;
      break;
    case HSA_AGENT_INFO_WORKGROUP_MAX_DIM: {
      // TODO: must be per-device
      const uint16_t group_size[3] = {1024, 1024, 1024};
      std::memcpy(value, group_size, sizeof(group_size));
    } break;
    case HSA_AGENT_INFO_WORKGROUP_MAX_SIZE:
      // TODO: must be per-device
      *((uint32_t*)value) = 1024;
      break;
    case HSA_AGENT_INFO_GRID_MAX_DIM: {
      const hsa_dim3_t grid_size = {INT32_MAX, UINT16_MAX, UINT16_MAX};
      std::memcpy(value, &grid_size, sizeof(hsa_dim3_t));
    } break;
    case HSA_AGENT_INFO_GRID_MAX_SIZE:
      *((uint32_t*)value) = UINT32_MAX;
      break;
    case HSA_AGENT_INFO_FBARRIER_MAX_SIZE:
      // TODO: to confirm
      *((uint32_t*)value) = 32;
      break;
    case HSA_AGENT_INFO_QUEUES_MAX:
      *((uint32_t*)value) = max_queues_;
      break;
    case HSA_AGENT_INFO_QUEUE_MIN_SIZE:
      *((uint32_t*)value) = minAqlSize_;
      break;
    case HSA_AGENT_INFO_QUEUE_MAX_SIZE:
      *((uint32_t*)value) = maxAqlSize_;
      break;
    case HSA_AGENT_INFO_QUEUE_TYPE:
      *((hsa_queue_type32_t*)value) = HSA_QUEUE_TYPE_MULTI;
      break;
    case HSA_AGENT_INFO_NODE:
      // TODO: associate with OS NUMA support (numactl / GetNumaProcessorNode).
      *((uint32_t*)value) = node_id();
      break;
    case HSA_AGENT_INFO_DEVICE:
      *((hsa_device_type_t*)value) = HSA_DEVICE_TYPE_GPU;
      break;
    case HSA_AGENT_INFO_CACHE_SIZE: {
      std::memset(value, 0, sizeof(uint32_t) * 4);
      assert(cache_props_.size() > 0 && "GPU cache info missing.");
      const size_t num_cache = cache_props_.size();
      for (size_t i = 0; i < num_cache; ++i) {
        const uint32_t line_level = cache_props_[i].CacheLevel;
          /*
           * L1 Cache is per CU.
           * For L2 Cache and above, we report total for the partition so we sum
           * all the node entries.
           */
        if (line_level >= 2)
          reinterpret_cast<uint32_t*>(value)[line_level - 1] += cache_props_[i].CacheSize * 1024;
        else if (reinterpret_cast<uint32_t*>(value)[line_level - 1] == 0)
          reinterpret_cast<uint32_t*>(value)[line_level - 1] = cache_props_[i].CacheSize * 1024;
      }
    } break;
    case HSA_AGENT_INFO_ISA:
      *((hsa_isa_t*)value) = core::Isa::Handle(isa_);
      break;
    case HSA_AGENT_INFO_EXTENSIONS: {
      memset(value, 0, sizeof(uint8_t) * 128);

      auto setFlag = [&](uint32_t bit) {
        assert(bit < 128 * 8 && "Extension value exceeds extension bitmask");
        uint index = bit / 8;
        uint subBit = bit % 8;
        ((uint8_t*)value)[index] |= 1 << subBit;
      };

      if (core::hsa_internal_api_table().finalizer_api.hsa_ext_program_finalize_fn != NULL) {
        setFlag(HSA_EXTENSION_FINALIZER);
      }

      if (core::hsa_internal_api_table().image_api.hsa_ext_image_create_fn != NULL) {
        setFlag(HSA_EXTENSION_IMAGES);
      }

      if (core::hsa_internal_api_table().pcs_api.hsa_ven_amd_pcs_iterate_configuration_fn != NULL) {
        setFlag(HSA_EXTENSION_AMD_PC_SAMPLING);
      }

      if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
        os::CloseLib(lib);
        setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
      }

      setFlag(HSA_EXTENSION_AMD_PROFILER);

      break;
    }
    case HSA_AGENT_INFO_VERSION_MAJOR:
      *((uint16_t*)value) = 1;
      break;
    case HSA_AGENT_INFO_VERSION_MINOR:
      *((uint16_t*)value) = 1;
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
    case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
      if (!isa_has_image_support)
        *((uint32_t*)value) = 0;
      else
        return hsa_amd_image_get_info_max_dim(public_handle(), attribute, value);
      break;
    case HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES:
      // TODO: hardcode based on OCL constants.
      *((uint32_t*)value) = isa_has_image_support ? 128 : 0;
      break;
    case HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES:
      *((uint32_t*)value) = isa_has_image_support ? 64 : 0;
      break;
    case HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS:
      *((uint32_t*)value) = isa_has_image_support ? 16 : 0;
      break;
    case HSA_AMD_AGENT_INFO_CHIP_ID:
      *((uint32_t*)value) = properties_.DeviceId;
      break;
    case HSA_AMD_AGENT_INFO_CACHELINE_SIZE:
      for (auto& cache : cache_props_) {
        if ((cache.CacheLevel == 2) && (cache.CacheLineSize != 0)) {
          *((uint32_t*)value) = cache.CacheLineSize;
          return HSA_STATUS_SUCCESS;
        }
      }
      // Fallback for when KFD is returning zero.
      *((uint32_t*)value) = 64;
      break;
    case HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT:
      *((uint32_t*)value) =
          (properties_.NumFComputeCores / properties_.NumSIMDPerCU);
      break;
    case HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY:
      *((uint32_t*)value) = properties_.MaxEngineClockMhzFCompute;
      break;
    case HSA_AMD_AGENT_INFO_DRIVER_NODE_ID:
      *((uint32_t*)value) = node_id();
      break;
    case HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS:
      *((uint32_t*)value) = static_cast<uint32_t>(
          1 << properties_.Capability.ui32.WatchPointsTotalBits);
      break;
    case HSA_AMD_AGENT_INFO_BDFID:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.LocationId);
      break;
    case HSA_AMD_AGENT_INFO_MEMORY_WIDTH:
      *((uint32_t*)value) = memory_bus_width_;
      break;
    case HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY:
      *((uint32_t*)value) = memory_max_frequency_;
      break;

    // The code copies HsaNodeProperties.MarketingName a Unicode string
    // which is encoded in UTF-16 as a 7-bit ASCII string
    case HSA_AMD_AGENT_INFO_PRODUCT_NAME: {
      std::memset(value, 0, HSA_PUBLIC_NAME_SIZE);
      char* temp = reinterpret_cast<char*>(value);
      for (uint32_t idx = 0;
           properties_.MarketingName[idx] != 0 && idx < HSA_PUBLIC_NAME_SIZE - 1; idx++) {
        temp[idx] = (uint8_t)properties_.MarketingName[idx];
      }
      break;
    }
    case HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU:
      *((uint32_t*)value) = static_cast<uint32_t>(
          properties_.NumSIMDPerCU * properties_.MaxWavesPerSIMD);
      break;
    case HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU:
      *((uint32_t*)value) = properties_.NumSIMDPerCU;
      break;
    case HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES:
      *((uint32_t*)value) = properties_.NumShaderBanks;
      break;
    case HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE:
      *((uint32_t*)value) = properties_.NumArrays;
      break;
    case HSA_AMD_AGENT_INFO_HDP_FLUSH:
      *((hsa_amd_hdp_flush_t*)value) = HDP_flush_;
      break;
    case HSA_AMD_AGENT_INFO_DOMAIN:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.Domain);
      break;
    case HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES:
      *((bool*)value) = properties_.NumGws != 0;
      break;
    case HSA_AMD_AGENT_INFO_UUID: {
      uint64_t uuid_value = static_cast<uint64_t>(properties_.UniqueID);

      // Either device does not support UUID e.g. a Gfx8 device,
      // or runtime is using an older thunk library that does not
      // support UUID's
      if (uuid_value == 0) {
        char uuid_tmp[] = "GPU-XX";
        snprintf((char*)value, sizeof(uuid_tmp), "%s", uuid_tmp);
        break;
      }

      // Device supports UUID, build UUID string to return
      std::stringstream ss;
      ss << "GPU-" << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
         << uuid_value;
      snprintf((char*)value, (ss.str().length() + 1), "%s", (char*)ss.str().c_str());
      break;
    }
    case HSA_AMD_AGENT_INFO_ASIC_REVISION:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.Capability.ui32.ASICRevision);
      break;
    case HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS:
      assert(regions_.size() != 0 && "No device local memory found!");
      *((bool*)value) = properties_.Capability.ui32.CoherentHostAccess == 1;
      break;
    case HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT:
      if (core::Runtime::runtime_singleton_->flag().coop_cu_count() &&
          !(core::Runtime::runtime_singleton_->flag().cu_mask(enum_index_).empty())) {
        debug_warning("Cooperative launch and CU masking are currently incompatible!");
        *((uint32_t*)value) = 0;
        break;
      }

      if (core::Runtime::runtime_singleton_->flag().coop_cu_count() &&
          (isa_->GetMajorVersion() == 9) && (isa_->GetMinorVersion() == 0) &&
          (isa_->GetStepping() == 10)) {
        uint32_t count = 0;
        hsa_status_t err = GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &count);
        assert(err == HSA_STATUS_SUCCESS && "CU count query failed.");
        *((uint32_t*)value) = (count & 0xFFFFFFF8) - 8;  // value = floor(count/8)*8-8
        break;
      }
      return GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, value);
    case HSA_AMD_AGENT_INFO_MEMORY_AVAIL: {
      HSAuint64 availableBytes;
      hsa_status_t status;

      status = driver().AvailableMemory(node_id(), &availableBytes);

      if (status != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

      for (auto r : regions()) availableBytes += ((AMD::MemoryRegion*)r)->GetCacheSize();

      availableBytes += scratch_cache_.free_bytes() - scratch_cache_.reserved_bytes();

      *((uint64_t*)value) = availableBytes;
      break;
    }
    case HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY:
      *((uint64_t*)value) = wallclock_frequency_;
      break;
    case HSA_AMD_AGENT_INFO_ASIC_FAMILY_ID:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.FamilyID);
      break;
    case HSA_AMD_AGENT_INFO_UCODE_VERSION:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.EngineId.ui32.uCode);
      break;
    case HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.uCodeEngineVersions.uCodeSDMA);
      break;
    case HSA_AMD_AGENT_INFO_NUM_SDMA_ENG:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.NumSdmaEngines);
      break;
    case HSA_AMD_AGENT_INFO_NUM_SDMA_XGMI_ENG:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.NumSdmaXgmiEngines);
      break;
    case HSA_AMD_AGENT_INFO_IOMMU_SUPPORT:
      if (properties_.Capability.ui32.HSAMMUPresent)
        *((hsa_amd_iommu_version_t*)value) = HSA_IOMMU_SUPPORT_V2;
      else
        *((hsa_amd_iommu_version_t*)value) = HSA_IOMMU_SUPPORT_NONE;
      break;
    case HSA_AMD_AGENT_INFO_NUM_XCC:
      *((uint32_t*)value) = static_cast<uint32_t>(properties_.NumXcc);
      break;
    case HSA_AMD_AGENT_INFO_DRIVER_UID:
      *((uint32_t*)value) = KfdGpuID();
      break;
    case HSA_AMD_AGENT_INFO_NEAREST_CPU:
      *((hsa_agent_t*)value) = GetNearestCpuAgent()->public_handle();
      break;
    case HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES:
      memset(value, 0, sizeof(uint8_t) * 8);
      GetInfoMemoryProperties((uint8_t*)value);
      break;
    case HSA_AMD_AGENT_INFO_AQL_EXTENSIONS:
      memset(value, 0, sizeof(uint8_t) * 8);
      /* Not yet implemented */
      break;
    case HSA_AMD_AGENT_INFO_SCRATCH_LIMIT_MAX:
      *((uint64_t*)value) = MaxScratchDevice();
      break;
    case HSA_AMD_AGENT_INFO_SCRATCH_LIMIT_CURRENT:
      *((uint64_t*)value) = scratch_limit_async_threshold_;
      break;
    case HSA_AMD_AGENT_INFO_CLOCK_COUNTERS: {
      HsaClockCounters hsakmt_counters = {};
      hsa_amd_clock_counters_t* counters = static_cast<hsa_amd_clock_counters_t*>(value);

      hsa_status_t err = driver().GetClockCounters(node_id(), &hsakmt_counters);
      if (err == HSA_STATUS_SUCCESS) {
        counters->cpu_clock_counter = hsakmt_counters.CPUClockCounter;
        counters->gpu_clock_counter = hsakmt_counters.GPUClockCounter;
        counters->system_clock_counter = hsakmt_counters.SystemClockCounter;
        counters->system_clock_frequency = hsakmt_counters.SystemClockFrequencyHz;
        break;
      }
      return HSA_STATUS_ERROR;
    }
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      break;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type, uint64_t flags,
                                   core::HsaEventCallback event_callback, void* data,
                                   uint32_t private_segment_size, uint32_t group_segment_size,
                                   core::Queue** queue) {
  // Handle GWS queues.
  if (queue_type == HSA_QUEUE_TYPE_COOPERATIVE) {
    ScopedAcquire<KernelMutex> lock(&gws_queue_.lock_);
    auto ret = (*gws_queue_.queue_).get();
    if (ret != nullptr) {
      gws_queue_.ref_ct_++;
      *queue = ret;
      return HSA_STATUS_SUCCESS;
    }
    return HSA_STATUS_ERROR_INVALID_QUEUE_CREATION;
  }

  // AQL queues must be a power of two in length.
  if (!IsPowerOfTwo(size)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Enforce max size
  if (size > maxAqlSize_) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  // Enforce min size
  if (size < minAqlSize_) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Allocate scratch memory
  ScratchInfo scratch = {0};
  if (private_segment_size == UINT_MAX) {
    private_segment_size = (profile_ == HSA_PROFILE_BASE) ? 0 : scratch_per_thread_;
  }

  if (private_segment_size > 262128) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  // Asynchronous reclaim flag bit is set by CP FW on queue-connect, we will update this when
  // we get the first scratch request.
  scratch.async_reclaim = false;

  scratch.main_lanes_per_wave = 64;
  scratch.main_size_per_thread = AlignUp(private_segment_size, 1024 / scratch.main_lanes_per_wave);
  if (scratch.main_size_per_thread > 262128) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  scratch.main_size_per_thread = private_segment_size;

  const uint32_t num_cu = properties_.NumFComputeCores / properties_.NumSIMDPerCU;
  scratch.main_size = scratch.main_size_per_thread * properties_.MaxSlotsScratchCU *
      scratch.main_lanes_per_wave * num_cu;
  scratch.main_queue_base = nullptr;
  scratch.main_queue_process_offset = 0;

  MAKE_NAMED_SCOPE_GUARD(scratchGuard, [&]() { ReleaseQueueMainScratch(scratch); });

  if (scratch.main_size != 0) {
    AcquireQueueMainScratch(scratch);
    if (scratch.main_queue_base == nullptr) {
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
  }

  // Ensure utility queue has been created.
  // Deferring longer risks exhausting queue count before ISA upload and invalidation capability is
  // ensured.
  queues_[QueueUtility].touch();

  bool dev_mem_queue_descriptor = (flags & HSA_AMD_QUEUE_CREATE_DEVICE_MEM_QUEUE_DESCRIPTOR) != 0;

  // Create an HW AQL queue
  core::SharedQueue* shared_queue = nullptr;

  if (dev_mem_queue_descriptor) {
    shared_queue = static_cast<core::SharedQueue*>(
        finegrain_allocator()(sizeof(core::SharedQueue), core::MemoryRegion::AllocateUncached));
  } else {
    shared_queue =
        static_cast<core::SharedQueue*>(core::Runtime::runtime_singleton_->system_allocator()(
            sizeof(core::SharedQueue), MemoryRegion::GetPageSize(),
            isMES() ? (MemoryRegion::AllocateGTTAccess | MemoryRegion::AllocateNonPaged) : 0,
            node_id()));
  }

  if (!shared_queue) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  auto aql_queue = new AqlQueue(shared_queue, this, size, node_id(), scratch, event_callback, data,
                                flags);
  *queue = aql_queue;
  aql_queues_.push_back(aql_queue);

  if (doorbell_queue_map_) {
    // Calculate index of the queue doorbell within the doorbell aperture.
    auto doorbell_addr = uintptr_t(aql_queue->signal_.hardware_doorbell_ptr);
    auto doorbell_idx = (doorbell_addr >> 3) & (MAX_NUM_DOORBELLS - 1);
    doorbell_queue_map_[doorbell_idx] = &aql_queue->amd_queue_;
  }

  scratchGuard.Dismiss();
  return HSA_STATUS_SUCCESS;
}

void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) {
  assert(scratch.main_queue_base == nullptr &&
         "AcquireQueueMainScratch called while holding scratch.");
  bool need_queue_scratch_base = (isa_->GetMajorVersion() > 8);

  if (scratch.main_size == 0) {
    scratch.main_size = queue_scratch_len_;
    scratch.main_size_per_thread = scratch_per_thread_;
  }
  scratch.retry = false;

  // Fail scratch allocation if per wave limits are exceeded.
  uint64_t size_per_wave = AlignUp(scratch.main_size_per_thread * properties_.WaveFrontSize, 1024);
  if (size_per_wave > MAX_WAVE_SCRATCH) return;

  /*
  Determine size class needed.

  Scratch allocations come in two flavors based on how it is retired.  Small allocations may be
  kept bound to a queue and reused by firmware.  This memory can not be reclaimed by the runtime
  on demand so must be kept small to avoid egregious OOM conditions.  Other allocations, aka large,
  may be used by firmware only for one dispatch and are then surrendered to the runtime.  This has
  significant latency so we don't want to make all scratch allocations large (ie single use).

  Note that the designation "large" is for contrast with "small", which must really be small
  amounts of memory, and does not always imply a large quantity of memory is needed.  Other
  properties of the allocation may require single use and so qualify the allocation or use as
  "large".

  Here we decide on the boundaries for small scratch allocations.  Both the largest small single
  allocation and the maximum amount of memory bound by small allocations are limited.  Additionally
  some legacy devices do not support large scratch.

  For small scratch we must allocate enough memory for every physical scratch slot.
  For large scratch compute the minimum memory needed to run the dispatch without limiting
  occupancy.
  Limit total bound small scratch allocations to 1/8th of scratch pool and 1/4 of that for a single
  allocation.
  */
  bool large;

  ScopedAcquire<KernelMutex> lock(&scratch_lock_);
  const size_t small_limit = scratch_pool_.size() >> 3;
  bool use_reclaim = true;

  large = (scratch.main_size > scratch.use_once_limit) ||
          (!AsyncScratchReclaimEnabled() &&
            ((scratch_pool_.size() - scratch_pool_.remaining() - scratch_cache_.free_bytes() +
             scratch.main_size) > small_limit));

  if ((isa_->GetMajorVersion() < 8) ||
      core::Runtime::runtime_singleton_->flag().no_scratch_reclaim()) {
    large = false;
    use_reclaim = false;
  }

  // If large is selected then the scratch will not be retained.
  // In that case allocate the minimum necessary for the dispatch since we don't need all slots.
  if (large) scratch.main_size = scratch.dispatch_size;

  // Ensure mapping will be in whole pages.
  scratch.main_size = AlignUp(scratch.main_size, 4096);

  /*
  Sequence of attempts is:
    check cache
    attempt a new allocation
    trim unused blocks from cache
    attempt a new allocation
    check cache for sufficient used block, steal and wait (not implemented)
    trim used blocks from cache, evaluate retry
    reduce occupancy
  */

  // Lambda called in place.
  // Used to allow exit from nested loops.
  [&]() {
    // Check scratch cache
    scratch.large = large;
    if (scratch_cache_.allocMain(scratch)) return;

    // Attempt new allocation.
    for (int i = 0; i < 3; i++) {
      if (large)
        scratch.main_queue_base = scratch_pool_.alloc_high(scratch.main_size);
      else
        scratch.main_queue_base = scratch_pool_.alloc(scratch.main_size);

      scratch.large = large | (scratch.main_queue_base > scratch_pool_.high_split());
      assert(((!scratch.large) | use_reclaim) && "Large scratch used with reclaim disabled.");

      if (scratch.main_queue_base != nullptr) {
        HSAuint64 alternate_va;
        if ((profile_ == HSA_PROFILE_FULL) ||
            (driver().MakeMemoryResident(scratch.main_queue_base, scratch.main_size,
                                         &alternate_va) == HSA_STATUS_SUCCESS)) {
          if (scratch.large) scratch_used_large_ += scratch.main_size;
          scratch_cache_.insertMain(scratch);
          return;
        }
      }

      // Scratch request failed allocation or mapping.
      scratch_pool_.free(scratch.main_queue_base);
      scratch.main_queue_base = nullptr;

      // Release cached scratch and retry.
      // First iteration trims unused blocks, second trims all. 3rd uses reserved memory
      switch (i) {
        case 0:
          scratch_cache_.trim(false);
          break;
        case 1:
          scratch_cache_.trim(true);
          break;
        case 2:
          if (scratch_cache_.use_reserved(scratch)) return;
      }
    }

    // Retry if large may yield needed space.
    if (scratch_used_large_ != 0) {
      if (AddScratchNotifier(scratch.queue_retry, 0x8000000000000000ull)) scratch.retry = true;
      return;
    }

    // Fail scratch allocation if reducing occupancy is disabled.
    if (scratch.cooperative || (!use_reclaim) ||
        core::Runtime::runtime_singleton_->flag().no_scratch_thread_limiter())
      return;

    // Attempt to trim the maximum number of concurrent waves to allow scratch to fit.
    if (core::Runtime::runtime_singleton_->flag().enable_queue_fault_message())
      debug_print("Failed to map requested scratch (%ld) - reducing queue occupancy.\n",
                  scratch.main_size);
    const uint64_t num_cus = properties_.NumFComputeCores / properties_.NumSIMDPerCU;
    const uint64_t se_per_xcc = properties_.NumShaderBanks / properties_.NumXcc;

    const uint64_t total_waves = scratch.main_size / size_per_wave;
    uint64_t waves_per_cu = AlignUp(total_waves / num_cus, scratch.main_waves_per_group);

    while (waves_per_cu != 0) {
      size_t size = waves_per_cu * num_cus * size_per_wave;
      void* base = scratch_pool_.alloc_high(size);
      HSAuint64 alternate_va;
      if ((base != nullptr) &&
          ((profile_ == HSA_PROFILE_FULL) ||
           (driver().MakeMemoryResident(base, size, &alternate_va) == HSA_STATUS_SUCCESS))) {
        // Scratch allocated and either full profile or map succeeded.
        scratch.main_queue_base = base;
        scratch.main_size = size;
        scratch.large = true;
        scratch_used_large_ += scratch.main_size;
        scratch_cache_.insertMain(scratch);
        if (core::Runtime::runtime_singleton_->flag().enable_queue_fault_message())
          debug_print("  %ld scratch mapped, %.2f%% occupancy.\n", scratch.main_size,
                      float(waves_per_cu * num_cus) / scratch.dispatch_slots * 100.0f);
        return;
      }
      scratch_pool_.free(base);

      // Wave count must be divisible by #SEs in an XCC. If occupancy must be reduced
      // such that waves_per_cu < waves_per_group, continue reducing by #SEs per XCC
      // (only allowed if waves_per_group is a multiple #SEs per XCC).
      waves_per_cu -= (waves_per_cu <= scratch.main_waves_per_group &&
                       se_per_xcc < scratch.main_waves_per_group &&
                       scratch.main_waves_per_group % se_per_xcc == 0)
                       ? se_per_xcc
                       : scratch.main_waves_per_group;
    }

    // Failed to allocate minimal scratch
    assert(scratch.main_queue_base == nullptr && "bad scratch data");
    if (core::Runtime::runtime_singleton_->flag().enable_queue_fault_message())
      debug_print("  Could not allocate scratch for one wave per CU.\n");
    return;
  }();

  scratch.main_queue_process_offset = need_queue_scratch_base
      ? uintptr_t(scratch.main_queue_base)
      : uintptr_t(scratch.main_queue_base) - uintptr_t(scratch_pool_.base());
}

/* Should be called with scratch_lock_ */
void GpuAgent::ReleaseQueueMainScratch(ScratchInfo& scratch) {
  assert(scratch.main_queue_base);

  scratch_cache_.freeMain(scratch);
  scratch.main_queue_base = nullptr;
}

void GpuAgent::AcquireQueueAltScratch(ScratchInfo& scratch) {
  assert(scratch.async_reclaim && "Acquire Alt Scratch when FW does not support it");
  assert(scratch.alt_queue_base == nullptr &&
         "AcquireQueueAltScratch called while holding alt scratch.");

  // Fail scratch allocation if per wave limits are exceeded.
  uint64_t size_per_wave = AlignUp(scratch.alt_size_per_thread * properties_.WaveFrontSize, 1024);
  if (size_per_wave > MAX_WAVE_SCRATCH) return;

  ScopedAcquire<KernelMutex> lock(&scratch_lock_);

  // Ensure mapping will be in whole pages.
  scratch.alt_size = AlignUp(scratch.alt_size, 4096);

  /*
  Sequence of attempts is:
    check cache
    attempt a new allocation
    trim unused blocks from cache
    attempt a new allocation
    check cache for sufficient used block, steal and wait (not implemented)
    trim used blocks from cache, evaluate retry
  */

  // Lambda called in place.
  // Used to allow exit from nested loops.
  [&]() {
    // Check scratch cache
    if (scratch_cache_.allocAlt(scratch)) return;

    // Attempt new allocation.
    for (int i = 0; i < 2; i++) {
      scratch.alt_queue_base = scratch_pool_.alloc(scratch.alt_size);
      if (scratch.alt_queue_base != nullptr) {
        HSAuint64 alternate_va;
        if ((profile_ == HSA_PROFILE_FULL) ||
            (driver().MakeMemoryResident(scratch.alt_queue_base, scratch.alt_size, &alternate_va) ==
             HSA_STATUS_SUCCESS)) {
          scratch_cache_.insertAlt(scratch);
          return;
        }
      }

      // Scratch request failed allocation or mapping.
      scratch_pool_.free(scratch.alt_queue_base);
      scratch.alt_queue_base = nullptr;

      // Release cached scratch and retry.
      // First iteration trims unused blocks, second trims all. 3rd uses reserved memory
      switch (i) {
        case 0:
          scratch_cache_.trim(false);
          break;
        case 1:
          scratch_cache_.trim(true);
          break;
      }
    }

    if (core::Runtime::runtime_singleton_->flag().enable_queue_fault_message())
      debug_print("  Could not allocate alt scratch.\n");
    return;
  }();

  scratch.alt_queue_process_offset = uintptr_t(scratch.alt_queue_base);
}

/* Should be called with scratch_lock_ */
void GpuAgent::ReleaseQueueAltScratch(ScratchInfo& scratch) {
  assert(scratch.alt_queue_base);

  scratch_cache_.freeAlt(scratch);
  scratch.alt_queue_base = nullptr;
}

void GpuAgent::ReleaseScratch(void* base, size_t size, bool large) {
  if (profile_ == HSA_PROFILE_BASE) {
    if (HSA_STATUS_SUCCESS != driver().MakeMemoryUnresident(base)) {
      assert(false && "Unmap scratch subrange failed!");
    }
  }
  scratch_pool_.free(base);

  if (large) scratch_used_large_ -= size;

  // Notify waiters that additional scratch may be available.
  for (auto notifier : scratch_notifiers_) {
    HSA::hsa_signal_or_relaxed(notifier.first, notifier.second);
  }
  ClearScratchNotifiers();
}

// Go through all the AQL queues and try to release scratch memory
void GpuAgent::AsyncReclaimScratchQueues() {
  for (auto iter : aql_queues_) {
    auto aqlQueue = static_cast<AqlQueue*>(iter);
    aqlQueue->AsyncReclaimMainScratch();
    aqlQueue->AsyncReclaimAltScratch();
  }
}

hsa_status_t GpuAgent::SetAsyncScratchThresholds(size_t use_once_limit) {
  if (use_once_limit > MaxScratchDevice()) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  scratch_limit_async_threshold_ = use_once_limit;

  for (auto iter : aql_queues_) {
    auto aqlQueue = static_cast<AqlQueue*>(iter);
    aqlQueue->CheckScratchLimits();
  }
  return HSA_STATUS_SUCCESS;
}

void GpuAgent::TranslateTime(core::Signal* signal, hsa_amd_profiling_dispatch_time_t& time) {
  uint64_t start, end;
  signal->GetRawTs(false, start, end);
  // Order is important, we want to translate the end time first to ensure that packet duration is
  // not impacted by clock measurement latency jitter.
  time.end = TranslateTime(end);
  time.start = TranslateTime(start);

  if ((start == 0) || (end == 0) || (start < t0_.GPUClockCounter) || (end < t0_.GPUClockCounter))
    debug_print("Signal %p time stamps may be invalid.\n", &signal->signal_);
}

void GpuAgent::TranslateTime(core::Signal* signal, hsa_amd_profiling_async_copy_time_t& time) {
  uint64_t start, end;
  signal->GetRawTs(true, start, end);
  // Order is important, we want to translate the end time first to ensure that packet duration is
  // not impacted by clock measurement latency jitter.
  time.end = TranslateTime(end);
  time.start = TranslateTime(start);

  if ((start == 0) || (end == 0) || (start < t0_.GPUClockCounter) || (end < t0_.GPUClockCounter))
    debug_print("Signal %p time stamps may be invalid.\n", &signal->signal_);
}

/*
Times during program execution are interpolated to adjust for relative clock drift.
Interval timing may appear as ticks well before process start, leading to large errors due to
frequency adjustment (ie the profiling with NTP problem).  This is fixed by using a fixed frequency
for early times.
Intervals larger than t0_ will be frequency adjusted.  This admits a numerical error of not more
than twice the frequency stability (~10^-5).
*/
uint64_t GpuAgent::TranslateTime(uint64_t tick) {
  // Only allow short (error bounded) extrapolation for times during program execution.
  // Limit errors due to relative frequency drift to ~0.5us.  Sync clocks at 16Hz.
  const int64_t max_extrapolation = core::Runtime::runtime_singleton_->sys_clock_freq() >> 4;

  ScopedAcquire<KernelMutex> lock(&t1_lock_);
  // Limit errors due to correlated pair certainty to ~0.5us.
  // extrapolated time < (0.5us / half clock read certainty) * delay between clock measures
  // clock read certainty is <4us.
  if (((t1_.GPUClockCounter - t0_.GPUClockCounter) >> 2) + t1_.GPUClockCounter < tick) SyncClocks();

  // Good for ~300 yrs
  // uint64_t sysdelta = t1_.SystemClockCounter - t0_.SystemClockCounter;
  // uint64_t gpudelta = t1_.GPUClockCounter - t0_.GPUClockCounter;
  // int64_t offtick = int64_t(tick - t1_.GPUClockCounter);
  //__int128 num = __int128(sysdelta)*__int128(offtick) +
  //__int128(gpudelta)*__int128(t1_.SystemClockCounter);
  //__int128 sysLarge = num / __int128(gpudelta);
  // return sysLarge;

  // Good for ~3.5 months.
  uint64_t system_tick = 0;
  int64_t elapsed = 0;
  double ratio;

  // Valid ticks only need at most one SyncClocks.
  for (int i = 0; i < 2; i++) {
    ratio = double(t1_.SystemClockCounter - t0_.SystemClockCounter) /
        double(t1_.GPUClockCounter - t0_.GPUClockCounter);
    elapsed = int64_t(ratio * double(int64_t(tick - t1_.GPUClockCounter)));

    // Skip clock sync if under the extrapolation limit.
    if (elapsed < max_extrapolation) break;
    SyncClocks();
  }

  system_tick = uint64_t(elapsed) + t1_.SystemClockCounter;

  // tick predates HSA startup - extrapolate with fixed clock ratio
  if (tick < t0_.GPUClockCounter) {
    if (historical_clock_ratio_ == 0.0) historical_clock_ratio_ = ratio;
    system_tick = uint64_t(historical_clock_ratio_ * double(int64_t(tick - t0_.GPUClockCounter))) +
        t0_.SystemClockCounter;
  }

  return system_tick;
}

/* This function is deprecated */
bool GpuAgent::current_coherency_type(hsa_amd_coherency_type_t type) {
  current_coherency_type_ = type;
  return true;
}

uint16_t GpuAgent::GetMicrocodeVersion() const {
  return (properties_.EngineId.ui32.uCode);
}

uint16_t GpuAgent::GetSdmaMicrocodeVersion() const {
  return (properties_.uCodeEngineVersions.uCodeSDMA);
}

void GpuAgent::SyncClocks() {
  hsa_status_t err = driver().GetClockCounters(node_id(), &t1_);
  assert(err == HSA_STATUS_SUCCESS && "hsaGetClockCounters error");
}

hsa_status_t GpuAgent::UpdateTrapHandlerWithPCS(pcs_sampling_data_t* pcs_hosttrap_buffers, pcs_sampling_data_t* pcs_stochastic_buffers) {
  // Assemble the trap handler source code.
  void* tma_addr = nullptr;
  uint64_t tma_size = 0;

  assert(core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging);

  AssembleShader("TrapHandlerKfdExceptions", AssembleTarget::ISA, trap_code_buf_,
                 trap_code_buf_size_);

  /* pcs_hosttrap_buffers and pcs_stochastic_buffers are NULL until PC sampling is enabled */
  if (pcs_hosttrap_buffers || pcs_stochastic_buffers) {
    // ON non-large BAR systems, we cannot access device memory so we create a host copy
    // and then do a DmaCopy to device memory
    void* tma_region_host = (uint64_t*)system_allocator()(2 * sizeof(uint64_t), 0x1000, 0);
    if (tma_region_host == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

    MAKE_SCOPE_GUARD([&]() { system_deallocator()(tma_region_host); });

    ((uint64_t*)tma_region_host)[0] = (uint64_t)pcs_hosttrap_buffers;
    ((uint64_t*)tma_region_host)[1] = (uint64_t)pcs_stochastic_buffers;

    if (!trap_handler_tma_region_) {
      trap_handler_tma_region_ = (uint64_t*)finegrain_allocator()(2 * sizeof(uint64_t), 0);
      if (trap_handler_tma_region_ == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

      // NearestCpuAgent owns pool returned system_allocator()
      auto cpuAgent = GetNearestCpuAgent()->public_handle();

      hsa_status_t ret =
          AMD::hsa_amd_agents_allow_access(1, &cpuAgent, NULL, trap_handler_tma_region_);
      assert(ret == HSA_STATUS_SUCCESS);
    }

    /* On non-large BAR systems, we may not be able to access device memory, so do a DmaCopy */
    if (DmaCopy(trap_handler_tma_region_, tma_region_host, 2 * sizeof(uint64_t)) != HSA_STATUS_SUCCESS)
      return HSA_STATUS_ERROR;

    tma_size = 2 * sizeof(uint64_t);
    tma_addr = trap_handler_tma_region_;
  } else if (trap_handler_tma_region_) {
    finegrain_deallocator()(trap_handler_tma_region_);
    trap_handler_tma_region_ = NULL;
  }

  // Bind the trap handler to this node.
  return driver().SetTrapHandler(node_id(), trap_code_buf_, trap_code_buf_size_, tma_addr,
                                 tma_size);
}

void GpuAgent::BindTrapHandler() {
  if (isa_->GetMajorVersion() == 7) {
    // No trap handler support on Gfx7, soft error.
    return;
  }

  // Assemble the trap handler source code.
  void* tma_addr = nullptr;
  uint64_t tma_size = 0;

  if (core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging) {
    AssembleShader("TrapHandlerKfdExceptions", AssembleTarget::ISA, trap_code_buf_,
                   trap_code_buf_size_);
  } else {
    if (isa_->GetMajorVersion() >= 11 ||
       (isa_->GetMajorVersion() == 9 &&
        (isa_->GetMinorVersion() == 4 || isa_->GetMinorVersion() == 5))) {
      // No trap handler support without exception handling, soft error.
      return;
    }

    AssembleShader("TrapHandler", AssembleTarget::ISA, trap_code_buf_, trap_code_buf_size_);

    // Make an empty map from doorbell index to queue.
    // The trap handler uses this to retrieve a wave's amd_queue_v2_t*.
    auto doorbell_queue_map_size = MAX_NUM_DOORBELLS * sizeof(amd_queue_v2_t*);

    doorbell_queue_map_ = (amd_queue_v2_t**)system_allocator()(doorbell_queue_map_size, 0x1000, 0);
    assert(doorbell_queue_map_ != NULL && "Doorbell queue map allocation failed");

    memset(doorbell_queue_map_, 0, doorbell_queue_map_size);

    tma_addr = doorbell_queue_map_;
    tma_size = doorbell_queue_map_size;
  }

  // Bind the trap handler to this node.
  hsa_status_t err =
      driver().SetTrapHandler(node_id(), trap_code_buf_, trap_code_buf_size_, tma_addr, tma_size);
  assert(err == HSA_STATUS_SUCCESS && "SetTrapHandler() failed");
}

void GpuAgent::InvalidateCodeCaches(void *ptr, size_t size) {
  // Check for microcode cache invalidation support.
  // This is deprecated in later microcode builds.
  if (isa_->GetMajorVersion() == 7) {
    if (properties_.EngineId.ui32.uCode < 420) {
      // Microcode is handling code cache invalidation.
      return;
    }
  } else if (isa_->GetMajorVersion() == 8 && isa_->GetMinorVersion() == 0) {
    if (properties_.EngineId.ui32.uCode < 685) {
      // Microcode is handling code cache invalidation.
      return;
    }
  } else if (isa_->GetMajorVersion() > 12) {
    assert(false && "Code cache invalidation not implemented for this agent");
  }

  // Invalidate caches which may hold lines of code object allocation.
  uint32_t cache_inv[8] = {0};
  uint32_t cache_inv_size_dw;

  if (isa_->GetMajorVersion() < 10) {
      cache_inv[1] = PM4_ACQUIRE_MEM_DW1_COHER_CNTL(
          PM4_ACQUIRE_MEM_COHER_CNTL_SH_ICACHE_ACTION_ENA |
          PM4_ACQUIRE_MEM_COHER_CNTL_SH_KCACHE_ACTION_ENA |
          PM4_ACQUIRE_MEM_COHER_CNTL_TC_ACTION_ENA |
          PM4_ACQUIRE_MEM_COHER_CNTL_TC_WB_ACTION_ENA);

      cache_inv_size_dw = 7;
  } else {
      cache_inv[7] = PM4_ACQUIRE_MEM_DW7_GCR_CNTL(
          PM4_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1) |
          PM4_ACQUIRE_MEM_GCR_CNTL_GLK_INV |
          PM4_ACQUIRE_MEM_GCR_CNTL_GLV_INV |
          PM4_ACQUIRE_MEM_GCR_CNTL_GL1_INV |
          PM4_ACQUIRE_MEM_GCR_CNTL_GL2_INV);

      cache_inv_size_dw = 8;
  }

  cache_inv[0] = PM4_HDR(PM4_HDR_IT_OPCODE_ACQUIRE_MEM, cache_inv_size_dw,
             isa_->GetMajorVersion());

  if (ptr) {
    size_t size_granule = (size + 0xFF) >> 8;
    cache_inv[2] = PM4_ACQUIRE_MEM_DW2_COHER_SIZE(size_granule);
    cache_inv[3] = PM4_ACQUIRE_MEM_DW3_COHER_SIZE_HI(size_granule >> 32);
    cache_inv[4] = PM4_ACQUIRE_MEM_DW4_COHER_BASE((uint64_t)ptr);
    cache_inv[5] = PM4_ACQUIRE_MEM_DW4_COHER_BASE_HI((uint64_t)ptr);
  } else {
    cache_inv[2] = PM4_ACQUIRE_MEM_DW2_COHER_SIZE(0xFFFFFFFF);
    cache_inv[3] = PM4_ACQUIRE_MEM_DW3_COHER_SIZE_HI(0xFF);
  }

  // Submit the command to the utility queue and wait for it to complete.
  queues_[QueueUtility]->ExecutePM4(cache_inv, cache_inv_size_dw * sizeof(uint32_t));
}

lazy_ptr<core::Blit>& GpuAgent::GetBlitObject(uint32_t engine_offset) {
  sdma_blit_used_mask_ |= 1 << engine_offset;
  return blits_[engine_offset];
}

lazy_ptr<core::Blit>& GpuAgent::GetXgmiBlit(const core::Agent& dst_agent) {
  // Determine if destination is a member xgmi peers list
  uint32_t xgmi_engine_cnt = properties_.NumSdmaXgmiEngines;
  assert((xgmi_engine_cnt > 0) && ("Illegal condition, should not happen"));

  ScopedAcquire<KernelMutex> lock(&xgmi_peer_list_lock_);

  for (uint32_t idx = 0; idx < xgmi_peer_list_.size(); idx++) {
    uint64_t dst_handle = dst_agent.public_handle().handle;
    uint64_t peer_handle = xgmi_peer_list_[idx]->public_handle().handle;
    if (peer_handle == dst_handle) {
      return blits_[(idx % xgmi_engine_cnt) + DefaultBlitCount];
    }
  }

  // Add agent to the xGMI neighbours list
  xgmi_peer_list_.push_back(&dst_agent);
  return GetBlitObject(((xgmi_peer_list_.size() - 1) % xgmi_engine_cnt) + DefaultBlitCount);
}

lazy_ptr<core::Blit>& GpuAgent::GetPcieBlit(const core::Agent& dst_agent,
                                            const core::Agent& src_agent) {
  bool is_h2d = (src_agent.device_type() == core::Agent::kAmdCpuDevice &&
                 dst_agent.device_type() == core::Agent::kAmdGpuDevice);

  lazy_ptr<core::Blit>& blit = GetBlitObject(is_h2d ? BlitHostToDev : BlitDevToHost);
  return blit;
}

lazy_ptr<core::Blit>& GpuAgent::GetBlitObject(const core::Agent& dst_agent,
                                              const core::Agent& src_agent,
                                              const size_t size) {
  // At this point it is guaranteed that one of
  // the two devices is a GPU, potentially both
  assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
          (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
         ("Both devices are CPU agents which is not expected"));

  // Determine if Src and Dst devices are same and are the copying device
  // Such a copy is in the device local memory, which can only be saturated by a blit kernel.
  if ((src_agent.public_handle().handle) == (dst_agent.public_handle().handle) &&
      (dst_agent.public_handle().handle == public_handle_.handle)) {
    // If the copy is very small then cache flush overheads can dominate.
    // Choose a (potentially) SDMA enabled engine to avoid cache flushing.
    if (size < core::Runtime::runtime_singleton_->flag().force_sdma_size()) {
      return GetBlitObject(BlitDevToHost);
    }
    return blits_[BlitDevToDev];
  }

  if (core::Runtime::runtime_singleton_->flag().enable_peer_sdma() == Flag::SDMA_DISABLE
      && src_agent.device_type() == core::Agent::kAmdGpuDevice
      && dst_agent.device_type() == core::Agent::kAmdGpuDevice) {
      return blits_[BlitDevToDev];
  }

  // Acquire Hive Id of Src and Dst devices - ignore hive id for CPU devices.
  // CPU-GPU connections should always use the host (aka pcie) facing SDMA engines, even if the
  // connection is XGMI.
  uint64_t src_hive_id =
      (src_agent.device_type() == core::Agent::kAmdGpuDevice) ? src_agent.HiveId() : 0;
  uint64_t dst_hive_id =
      (dst_agent.device_type() == core::Agent::kAmdGpuDevice) ? dst_agent.HiveId() : 0;

  // Bind to a PCIe facing Blit object if the two
  // devices have different Hive Ids. This can occur
  // for following scenarios:
  //
  //  Neither device claims membership in a Hive
  //   srcId = 0 <-> dstId = 0;
  //
  //  Src device claims membership in a Hive
  //   srcId = 0x1926 <-> dstId = 0;
  //
  //  Dst device claims membership in a Hive
  //   srcId = 0 <-> dstId = 0x1123;
  //
  //  Both device claims membership in a Hive
  //  and the  Hives are different
  //   srcId = 0x1926 <-> dstId = 0x1123;
  //
  if ((dst_hive_id != src_hive_id) || (dst_hive_id == 0)) {
    return GetPcieBlit(dst_agent, src_agent);
  }

  // Accommodates platforms where devices have xGMI
  // links but without sdmaXgmiEngines e.g. Vega 20
  if (properties_.NumSdmaXgmiEngines == 0) {
    return GetPcieBlit(dst_agent, src_agent);
  }

  return GetXgmiBlit(dst_agent);
}

void GpuAgent::Trim() {
  Agent::Trim();
  AsyncReclaimScratchQueues();
  ScopedAcquire<KernelMutex> lock(&scratch_lock_);
  scratch_cache_.trim(false);
}

void GpuAgent::InitAllocators() {
  for (auto pool : GetNearestCpuAgent()->regions()) {
    if (pool->kernarg()) {
      system_allocator_ = [pool](size_t size, size_t alignment,
                                 MemoryRegion::AllocateFlags alloc_flags) -> void* {
        assert(alignment <= 4096);
        void* ptr = nullptr;
        return (HSA_STATUS_SUCCESS ==
                core::Runtime::runtime_singleton_->AllocateMemory(pool, size, alloc_flags, &ptr))
            ? ptr
            : nullptr;
      };

      system_deallocator_ = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
    }
  }
  assert(system_allocator_ && "Nearest NUMA node did not have a kernarg pool.");

  // Setup this GPU's fine-grain and coarse-grain allocators.
  for (auto region : regions()) {
    const AMD::MemoryRegion* amd_region = static_cast<const AMD::MemoryRegion*>(region);

    auto region_allocator = [region](size_t size,
                                     MemoryRegion::AllocateFlags alloc_flags) -> void* {
      void* ptr = nullptr;
       return (HSA_STATUS_SUCCESS ==
               core::Runtime::runtime_singleton_->AllocateMemory(region, size, alloc_flags, &ptr))
           ? ptr
           : nullptr;
    };

    auto region_deallocator = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };

    if (amd_region->IsLocalMemory() && amd_region->fine_grain()) {
      finegrain_allocator_ = region_allocator;
      finegrain_deallocator_ = region_deallocator;
    } else if (amd_region->IsLocalMemory() &&
               !(amd_region->fine_grain() || amd_region->extended_scope_fine_grain())) {
      coarsegrain_allocator_ = region_allocator;
      coarsegrain_deallocator_ = region_deallocator;
    }
  }
  assert(finegrain_allocator_ && "GPU agent does not have a fine-grain allocator");
  assert(coarsegrain_allocator_ && "GPU agent does not have a coarse-grain allocator");
}

core::Agent* GpuAgent::GetNearestCpuAgent() const {
  core::Agent* nearCpu = nullptr;
  uint32_t dist = -1u;
  for (auto cpu : core::Runtime::runtime_singleton_->cpu_agents()) {
    const core::Runtime::LinkInfo link_info =
        core::Runtime::runtime_singleton_->GetLinkInfo(node_id(), cpu->node_id());
    if (link_info.info.numa_distance < dist) {
      dist = link_info.info.numa_distance;
      nearCpu = cpu;
    }
  }
  return nearCpu;
}

hsa_status_t ConvertHsaKmtPcSamplingInfoToHsa(HsaPcSamplingInfo* hsaKmtPcSampling,
                                              hsa_ven_amd_pcs_configuration_t* hsaPcSampling) {
  assert(hsaKmtPcSampling && "Invalid hsaKmtPcSampling");
  assert(hsaPcSampling && "Invalid hsaPcSampling");

  switch (hsaKmtPcSampling->method) {
    case HSA_PC_SAMPLING_METHOD_KIND_HOSTTRAP_V1:
      hsaPcSampling->method = HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1;
      break;
    case HSA_PC_SAMPLING_METHOD_KIND_STOCHASTIC_V1:
      hsaPcSampling->method = HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1;
      break;
    default:
      // Sampling method not supported do not return this method to the user
      return HSA_STATUS_ERROR;
  }
  switch (hsaKmtPcSampling->units) {
    case HSA_PC_SAMPLING_UNIT_INTERVAL_MICROSECONDS:
      hsaPcSampling->units = HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS;
      break;
    case HSA_PC_SAMPLING_UNIT_INTERVAL_CYCLES:
      hsaPcSampling->units = HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES;
      break;
    case HSA_PC_SAMPLING_UNIT_INTERVAL_INSTRUCTIONS:
      hsaPcSampling->units = HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS;
      break;
    default:
      // Sampling unit not supported do not return this method to the user
      return HSA_STATUS_ERROR;
  }

  hsaPcSampling->min_interval = hsaKmtPcSampling->value_min;
  hsaPcSampling->max_interval = hsaKmtPcSampling->value_max;
  hsaPcSampling->flags = hsaKmtPcSampling->flags;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configuration_callback_t cb,
                                               void* cb_data) {
   uint32_t size = 0;

  if (!core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging)
    return HSA_STATUS_ERROR;

  // First query to get size of list needed
  hsa_status_t ret = driver().PcSamplingQueryCapabilities(node_id(), NULL, 0, &size);
  if (ret != HSA_STATUS_SUCCESS || size == 0) return ret;

  std::vector<HsaPcSamplingInfo> sampleInfoList(size);
  ret = driver().PcSamplingQueryCapabilities(node_id(), sampleInfoList.data(),
                                             sampleInfoList.size(), &size);

  if (ret != HSA_STATUS_SUCCESS) return ret;

  for (uint32_t i = 0; i < size; i++) {
    hsa_ven_amd_pcs_configuration_t hsaPcSampling;
    if (ConvertHsaKmtPcSamplingInfoToHsa(&sampleInfoList[i], &hsaPcSampling) == HSA_STATUS_SUCCESS
        && cb(&hsaPcSampling, cb_data) == HSA_STATUS_INFO_BREAK)
          return HSA_STATUS_SUCCESS;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession& session) {
  hsa_status_t ret;
  HsaPcSamplingInfo sampleInfo = {};
  HsaPcSamplingTraceId thunkId;

  // IOCTL id does not exist at the moment, so passing 0 is OK,
  // since it will be overridden later in this function.
  ret = PcSamplingCreateFromId(0, session);
  if (ret != HSA_STATUS_SUCCESS) return ret;

  // Obtain the sampling information from the session.
  session.GetHsaKmtSamplingInfo(&sampleInfo);

  // Pass the sampling information to the kernel driver to create PC
  // sampling session.
  ret = driver().PcSamplingCreate(node_id(), &sampleInfo, &thunkId);
  if (ret != HSA_STATUS_SUCCESS) {
    return ret;
  }

  debug_print("Created PC sampling session with thunkId:%d\n", thunkId);

  session.SetThunkId(thunkId);

  return ret;
}

hsa_status_t GpuAgent::PcSamplingCreateFromId(HsaPcSamplingTraceId ioctlId,
                                              pcs::PcsRuntime::PcSamplingSession& session) {
  // Determine the sampling method from the session
  hsa_ven_amd_pcs_method_kind_t sampling_method = session.method();

  pcs_data_t* pcs_data = nullptr;

  if (sampling_method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
    pcs_data = &pcs_hosttrap_data_;
  } else if (sampling_method == HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1) {
    pcs_data = &pcs_stochastic_data_;
  } else {
    // Unsupported sampling method
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Ensure only one session is active at a time for the given method
  if (pcs_data->session)
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;  // TODO: For now, we can only have
                                               // 1 pc sampling session at a
                                               // time. As a final solution, we
                                               // want to be able to support
                                               // multiple sessions at a time.
                                               // But this makes the
                                               // session.HandleSampleData more
                                               // complicated if multiple
                                               // sessions have different buffer
                                               // sizes.

  // This is current amd_aql_queue->pm4_ib_size_b_
  pcs_data->cmd_data_sz = 0x1000;  // 4KB
  pcs_data->cmd_data = (uint32_t*)malloc(pcs_data->cmd_data_sz);
  if (!pcs_data->cmd_data) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  if (HSA::hsa_signal_create(1, 0, NULL, &pcs_data->exec_pm4_signal) != HSA_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  pcs_data->old_val = (uint64_t*)system_allocator()(sizeof(uint64_t), 0x1000, 0);
  if (!pcs_data->old_val) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  if (AMD::hsa_amd_agents_allow_access(1, &public_handle_, NULL, pcs_data->old_val))
    return HSA_STATUS_ERROR;

  // Local copy of pc sampling data - we cannot access device memory directly on non-large BAR
  // systems
  pcs_sampling_data_t* device_datahost =
      (pcs_sampling_data_t*)system_allocator()(sizeof(pcs_sampling_data_t), 0x1000, 0);
  if (!device_datahost) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  MAKE_SCOPE_GUARD([&]() { system_deallocator()(device_datahost); });

  memset(device_datahost, 0, sizeof(*device_datahost));

  if (AMD::hsa_amd_agents_allow_access(1, &public_handle_, NULL, device_datahost) !=
      HSA_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  MAKE_NAMED_SCOPE_GUARD(freeResources, [&]() {
    if (pcs_data->device_data) {
      if (pcs_data->device_data->done_sig0.handle)
        HSA::hsa_signal_destroy(pcs_data->device_data->done_sig0);
      if (pcs_data->device_data->done_sig1.handle)
        HSA::hsa_signal_destroy(pcs_data->device_data->done_sig1);

      finegrain_deallocator()(pcs_data->device_data);
    }
    if (pcs_data->host_buffer) system_deallocator()(pcs_data->host_buffer);
  });

  // Force creating of PC Sampling queue to trigger exception early in case we exceed max availble
  // CP queues on this agent
  queues_[QueuePCSampling].touch();

  /*
   * When calling queue->ExecutePM4() Indirect Buffer size which is 0x1000 bytes (1024 DW).
   * The maximum indirect buffer size we need occurs when we enqueue the
   * WAIT_REG_MEM, DMA_COPY(s), WRITE_DATA ops:
   * For WAIT_REG_MEM = 7 DW
   * For each DMA_COPY = 7 DW
   * For WRITE_DATA_CMD = 6 DW
   *
   * So maximum number of DMA_COPY ops is:
   * (MAX_IB_SIZE - sizeof(WAIT_REG_MEM) - sizeof(WRITE_DATA_CMD)) / sizeof(DMA_COPY)
   * (1024 - 7 - 6) / 7 = 144
   *
   * Each DMA_COPY op can transfer (1 << 26) bytes, which is 9 GB. trap_buffer_size is a 32-bit
   * number, so the buffer must be < 4 GB. So we are not limited by Indirect Buffer size.
   * Set current limit to 256 MB to limit device VRAM usage
   */
  const size_t max_trap_buffer_size =
      core::Runtime::runtime_singleton_->flag().pc_sampling_max_device_buffer_size();

  /*
   * We use a double-buffer mechanism where there are 2 trap-buffers and 1 host-buffer
   * Warning: This currently assumes that client latency is smaller than time to fill 1
   * trap-buffer If latency is bigger, we have to increate host-buffer
   *
   * host-buffer must be >= client-buffer so that we can copy full size of client-buffer each
   * time. To avoid having to deal with wrap-arounds, host-buffer must be a multiple of
   * trap-buffers
   *
   * if client-buffer size is greater than 2x max_trap_buffer_size:
   *    We are limited by max_trap_buffer_size.
   *    trap-buffer = max-trap-buffer-size
   *    host-buffer = 2*smallest size greater than client-buffer but multiple of 1 trap-buffer
   * else:
   *    We reduce the trap-buffers so that:
   *    trap-buffer = half of user-buffer
   *    host-buffer = 2*user-buffer
   *
   * TODO: We are currently using a temporary host-buffer so that we can increase host-buffer to
   * factor in client latency. Using a direct-copy to the client buffer would be more efficient.
   * Revisit this once we have empirical data of latency vs how long it takes to fill 1
   * trap-buffer.
   */

  size_t trap_buffer_size = 0;
  if (session.buffer_size() > 2 * max_trap_buffer_size) {
    trap_buffer_size = max_trap_buffer_size;
    pcs_data->host_buffer_size = 2 * AlignUp(session.buffer_size(), trap_buffer_size);
    } else {
      trap_buffer_size = session.buffer_size() / 2;
      pcs_data->host_buffer_size = 2 * session.buffer_size();
    }

    pcs_data->host_buffer = (uint8_t*)system_allocator()(pcs_data->host_buffer_size, 0x1000, 0);
    if (!pcs_data->host_buffer) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

    if (AMD::hsa_amd_agents_allow_access(1, &public_handle_, NULL, pcs_data->host_buffer) !=
        HSA_STATUS_SUCCESS)
      return HSA_STATUS_ERROR;

    device_datahost->buf_size = trap_buffer_size / session.sample_size();

    if (HSA::hsa_signal_create(1, 0, NULL, &device_datahost->done_sig0) != HSA_STATUS_SUCCESS)
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

    if (HSA::hsa_signal_create(1, 0, NULL, &device_datahost->done_sig1) != HSA_STATUS_SUCCESS)
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

    // TODO: Once we have things working and can measure
    // latency after 2nd level trap handler decrements signals and set watermark accordingly
    device_datahost->buf_watermark0 = 0.8 * device_datahost->buf_size;
    device_datahost->buf_watermark1 = 0.8 * device_datahost->buf_size;

    // Allocate device memory for 2nd level trap handler TMA
    size_t deviceAllocSize = sizeof(pcs_sampling_data_t) + (2 * trap_buffer_size);
    pcs_data->device_data = (pcs_sampling_data_t*)finegrain_allocator()(deviceAllocSize, 0);
    if (pcs_data->device_data == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

    // This cpuAgent is the owner of the system_allocator() pool
    auto cpuAgent = GetNearestCpuAgent()->public_handle();
    if (AMD::hsa_amd_agents_allow_access(1, &cpuAgent, NULL, pcs_data->device_data) != HSA_STATUS_SUCCESS)
      return HSA_STATUS_ERROR;

    if (DmaCopy(pcs_data->device_data, device_datahost, sizeof(*device_datahost)) !=
        HSA_STATUS_SUCCESS) {
      debug_print("Failed to dmaCopy!\n");
      return HSA_STATUS_ERROR;
    }

    uint8_t* device_buf_ptr =
	reinterpret_cast<uint8_t*>(pcs_data->device_data) + sizeof(pcs_sampling_data_t);
    size_t count_in_bytes = deviceAllocSize - sizeof(pcs_sampling_data_t);
    size_t count_in_dwords = count_in_bytes / sizeof(uint32_t);

    if (DmaFill(device_buf_ptr, 0, count_in_dwords) !=
	 HSA_STATUS_SUCCESS) {
      debug_print("Failed to dmaFill!\n");
      return HSA_STATUS_ERROR;
    }

    pcs_data->lost_sample_count = 0;
    pcs_data->host_buffer_wrap_pos = 0;
    pcs_data->host_write_ptr = pcs_data->host_buffer;
    pcs_data->host_read_ptr = pcs_data->host_write_ptr;

    pcs_data->session = &session;

    if (UpdateTrapHandlerWithPCS(
            sampling_method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1 ? pcs_data->device_data : nullptr,
            sampling_method == HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1
                ? pcs_data->device_data
                : nullptr) != HSA_STATUS_SUCCESS)
      return HSA_STATUS_ERROR;

    session.SetThunkId(ioctlId);

    freeResources.Dismiss();

    return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) {
  if (PcSamplingStop(session) != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  hsa_status_t ret = driver().PcSamplingDestroy(node_id(), session.ThunkId());
  hsa_ven_amd_pcs_method_kind_t sampling_method = session.method();

  pcs_data_t* pcs_data = nullptr;

  if (sampling_method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
    pcs_data = &pcs_hosttrap_data_;
  } else if (sampling_method == HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1) {
    pcs_data = &pcs_stochastic_data_;
  } else {
    // Unsupported sampling method
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Mark session as inactive
  pcs_data->session = nullptr;

  free(pcs_data->cmd_data);
  system_deallocator()(pcs_data->old_val);
  HSA::hsa_signal_destroy(pcs_data->exec_pm4_signal);
  HSA::hsa_signal_destroy(pcs_data->device_data->done_sig0);
  HSA::hsa_signal_destroy(pcs_data->device_data->done_sig1);
  finegrain_deallocator()(pcs_data->device_data);
  system_deallocator()(pcs_data->host_buffer);

  pcs_data->device_data = NULL;
  pcs_data->host_buffer = NULL;
  pcs_data->session = NULL;

  // Update the trap handler to clear any associated device data
  UpdateTrapHandlerWithPCS(nullptr, nullptr);

  return ret;
}

hsa_status_t GpuAgent::PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) {
  if (session.isActive()) return HSA_STATUS_SUCCESS;


  auto method = session.method();

  pcs_data_t* pcs_data = nullptr;
  const char* thread_name = nullptr;
  if (method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
    pcs_data = &pcs_hosttrap_data_;
    thread_name = "PcSamplingHostTrapThread";
  } else if (method == HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1) {
    pcs_data = &pcs_stochastic_data_;
    thread_name = "PcSamplingStochasticThread";
  } else {
    // Unsupported sampling method
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Check if a session is already active
  if (pcs_data->session && pcs_data->session->isActive()) {
    debug_warning("Already have a PC sampling session in progress!");
    return (hsa_status_t)HSA_STATUS_ERROR_RESOURCE_BUSY;
  }

  // Assign the new session and mark it as active
  pcs_data->session = &session;
  pcs_data->session->start();

  // Creating thread data
  struct ThreadData {
    GpuAgent* agent;
    pcs_data_t* pcs_data;
    const char* thread_name;
  };

  auto* thread_data = new ThreadData{this, pcs_data, thread_name};

  // This thread will handle all PC Sampling sessions on this agent
  pcs_data->thread = os::CreateThread(
      [](void* arg) -> void {
        auto* thread_data = static_cast<ThreadData*>(arg);
        try {
          GpuAgent* agent = thread_data->agent;
          pcs_data_t* pcs_data = thread_data->pcs_data;
          const char* thread_name = thread_data->thread_name;

          agent->PcSamplingThread(*pcs_data, thread_name);
        } catch (...) {
	   fprintf(stdout, "Exception caught in PcSamplingThread. Exiting the thread!");
        }

        delete thread_data;
      },
      thread_data);

  if (!pcs_data->thread) {
    // if thread creation failed
    delete thread_data;
    throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
                             "Failed to start PC Sampling thread.");
  }

  // Start the sampling session in the kernel driver
  if (driver().PcSamplingStart(node_id(), session.ThunkId()) == HSA_STATUS_SUCCESS) {
    return HSA_STATUS_SUCCESS;
  }

  debug_print("Failed to start PC sampling session with thunkId:%d\n", session.ThunkId());
  // Clean up if starting the session failed
  pcs_data->session->stop();
  os::WaitForThread(pcs_data->thread);
  os::CloseThread(pcs_data->thread);
  pcs_data->thread = nullptr;
  pcs_data->session = nullptr;

  return HSA_STATUS_ERROR;
}

hsa_status_t GpuAgent::PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) {
  if (!session.isActive()) return HSA_STATUS_SUCCESS;

  // Stop the session
  session.stop();

  // Stop PC sampling in the kernel driver
  hsa_status_t ret = driver().PcSamplingStop(node_id(), session.ThunkId());
  if (ret != HSA_STATUS_SUCCESS)
    throw AMD::hsa_exception(HSA_STATUS_ERROR, "Failed to stop PC Sampling session.");

  // Determine the sampling method and corresponding data
  pcs_data_t* pcs_data = nullptr;
  auto method = session.method();

  if (method == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
    pcs_data = &pcs_hosttrap_data_;
  } else if (method == HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1) {
    pcs_data = &pcs_stochastic_data_;
  } else {
    // Unsupported sampling method
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  // Wake up pcs_hosttrap_thread_ if it is waiting for data
  HSA::hsa_signal_store_screlease(pcs_data->device_data->done_sig0, -1);
  HSA::hsa_signal_store_screlease(pcs_data->device_data->done_sig1, -1);

  // Wait for the thread to finish and clean up
  os::WaitForThread(pcs_data->thread);
  os::CloseThread(pcs_data->thread);
  pcs_data->thread = nullptr;
  pcs_data->session = nullptr;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t GpuAgent::PcSamplingFlushDeviceBuffers(
    pcs::PcsRuntime::PcSamplingSession& session) {
  pcs_data_t* pcs_data = nullptr;

  if (session.method() == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
    pcs_data = &pcs_hosttrap_data_;
  } else if (session.method() == HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1) {
    pcs_data = &pcs_stochastic_data_;
  } else {
    // No sampling session active
    return HSA_STATUS_SUCCESS;
  }

  /*
   * Device-buffer to Host-buffer to User-Buffer copy logic
   *
   * Device-buffer = buffer written by 2nd level trap handler
   * Host-buffer = buffer inside ROCr
   * User-buffer = Session buffer size specified in PCSamplingSessionCreate
   *
   * Conditions for the buffer sizes:
   * Host buffer is at least 2 times bigger than device buffer and Host buffer
   * is also at least 2 times bigger than User-Buffer.
   *
   * Key:
   * Device-Buffer[==--][----] : Device-Buffer#1 has size 4*N, and is half-full
   *                             Device-Buffer#2 has size 4*N and is empty
   *
   * Host-Buffer[=---------] : Host Buffer has size 10*N and is filled with N.
   *
   * N will vary based on the User-buffer size, this example is to show the
   * relative sizes between each copy.
   *
   * 1. Initial state
   *    - User has created a new session with buffer size = 7*N
   *
   *    Device-Buffer[---][---]
   *    Host-Buffer[--------------] wptr=0 rptr=0 wrap_pos=0
   *    User-Buffer[-------]
   *
   *    -- Device Buffer has size 3*N
   *    -- Host-Buffer has size 14*N (2x User-Buffer)
   *    -- User-Buffer has size 7*N
   *
   * 2. Device Buffer#1 hits watermark
   *    State at beginning:
   *    Device-Buffer[===][---]
   *    Host-Buffer[--------------]
   *    User-Buffer[-------]
   *
   *    -- Copy 3*N from Device-Buffer#1 to Host-Buffer
   *    -- In the meantime, 2nd level trap handler is writing to Device-Buffer#2
   *    -- We do not have enough data to fill User-Buffer
   *
   *    State at end:
   *    Device-Buffer[---][=--]
   *    Host-Buffer[===-----------] wptr=3 rptr=0, wrap_pos=0
   *    User-Buffer[-------]
   *
   * 3. Device Buffer#2 hits watermark
   *    State at beginning:
   *    Device-Buffer[---][===]
   *    Host-Buffer[===-----------]
   *    User-Buffer[-------]
   *
   *    -- Copy 3*N from Device-Buffer#2 to Host-Buffer
   *    -- In the meantime, 2nd level trap handler is writing to Device-Buffer#1
   *    -- We do not have enough data to fill User-Buffer
   *
   *    State at end:
   *    Device-Buffer[=--][---]
   *    Host-Buffer[======--------] wptr=6 rptr=0 wrap_pos=0
   *    User-Buffer[-------]
   *
   * 4. Device Buffer#1 hits watermark
   *    State at beginning:
   *    Device-Buffer[---][===]
   *    Host-Buffer[======--------]
   *    User-Buffer[-------]
   *
   *    -- Copy 3*N from Device-Buffer#2 to Host-Buffer
   *    -- In the meantime, 2nd level trap handler is writing to Device-Buffer#1
   *
   *    Device-Buffer[=--][---]
   *    Host-Buffer[=========-----]
   *    User-Buffer[-------]
   *
   *    -- We have enough data to fill User-Buffer. Callback user data-ready to
   *    -- copy 7*N to user.
   *
   *    Device-Buffer[=--][---]
   *    Host-Buffer[-------==-----]
   *    User-Buffer[=======]
   *
   *    -- User processes User-Buffer
   *
   *    Device-Buffer[=--][---]
   *    Host-Buffer[-------==-----] wptr=9 rptr=7 wrap_pos=0
   *    User-Buffer[-------]
   *
   * 6. Device Buffer#1 hits watermark
   *    State at end:
   *    Device-Buffer[---][=--]
   *    Host-Buffer[-------=====--] wptr=12 rptr=7 wrap_pos=0
   *    User-Buffer[-------]
   *
   * 7. Device Buffer#2 hits watermark
   *    State at beginning:
   *    Device-Buffer[---][===]
   *    Host-Buffer[-------=====--] wptr=12 rptr=7 wrap_pos=0
   *    User-Buffer[-------]
   *
   *    -- We do not have enough space after wptr. The CP-DMA copy
   *    -- can only copy a contiguous range, so copy to the
   *    -- beginning of Host-Buffer and set wrap_pos
   *
   *    Device-Buffer[=--][---]
   *    Host-Buffer[===----=====--] wptr=3 rptr=7 wrap_pos=12
   *    User-Buffer[-------]
   *
   *    -- We have enough data to fill User-Buffer. Callback user data-ready to
   *    -- copy 7*N to user. We copy the tail end (index 7-12) of Host-Buffer
   *    -- before copying the beginning of Host-Buffer (index 0-2).
   *
   *    Device-Buffer[=--][---]
   *    Host-Buffer[--=-----------] wptr=3 rptr=2 wrap_pos=0
   *    User-Buffer[=======]
   *
   *     -- User processes User-Buffer
   *
   * 8. Device Buffer#1 hits watermark
   *    State at end:
   *    Device-Buffer[---][=--]
   *    Host-Buffer[--====--------] wptr=6 rptr=2 wrap_pos=0
   *    User-Buffer[-------]
   */

  uint32_t next_buffer;

  uint64_t reset_write_val;
  uint32_t to_copy = 0, copy_bytes;

  const uint32_t atomic_ex_cmd_sz = 9;
  const uint32_t wait_reg_mem_cmd_sz = 7;
  const uint32_t acquire_mem_cmd_sz = 8;
  const uint32_t dma_data_cmd_sz = 7;
  const uint32_t copy_data_cmd_sz = 6;
  const uint32_t write_data_cmd_sz = 5;
  const uint32_t pred_exec_cmd_sz = 2;

  uint64_t buf_write_val;
  uint64_t buf_written_val[2];
  size_t buf_offset;
  uint8_t* buffer[2];
  size_t buf_size;

  uint32_t& which_buffer = pcs_data->which_buffer;
  uint32_t* cmd_data = pcs_data->cmd_data;
  size_t cmd_data_sz = pcs_data->cmd_data_sz;
  uint64_t* old_val = pcs_data->old_val;
  hsa_signal_t& exec_pm4_signal = pcs_data->exec_pm4_signal;

  uint8_t* host_buffer_begin = pcs_data->host_buffer;
  size_t& host_buffer_size = pcs_data->host_buffer_size;
  uint8_t*& host_write_ptr = pcs_data->host_write_ptr;
  uint8_t* host_buffer_end = host_buffer_begin + host_buffer_size;

  buf_write_val = reinterpret_cast<uint64_t>(&pcs_data->device_data->buf_write_val);
  buf_written_val[0] = reinterpret_cast<uint64_t>(&pcs_data->device_data->buf_written_val0);
  buf_written_val[1] = reinterpret_cast<uint64_t>(&pcs_data->device_data->buf_written_val1);
  buf_size = pcs_data->device_data->buf_size;

  buf_offset =
      offsetof(pcs_sampling_data_t, reserved1) + sizeof(((pcs_sampling_data_t*)0)->reserved1);

  buffer[0] = reinterpret_cast<uint8_t*>(pcs_data->device_data) + buf_offset;
  buffer[1] = buffer[0] + buf_size * session.sample_size();

  next_buffer = (which_buffer + 1) % 2;
  reset_write_val = (uint64_t)next_buffer << 63;

  unsigned int i = 0;
  if (properties_.NumXcc > 1) i+= pred_exec_cmd_sz;
  memset(cmd_data, 0, cmd_data_sz);

  /*
   * ATOMIC_MEM, perform atomic_exchange
   * We use a double-buffer mechanism so that trap handlers calls are writing to one buffer while
   * hsa-runtime is copying data from the other buffer.
   *
   * 1. Atomically swap buffers on the device. Future trap handler calls will put their data into
   *    next_buffer.
   * 2. Return a 64-bit packed value to ROCr; the upper bit is the old buffer and can be ignored.
   *    The lower 63 bits are how many trap handler entrances happened before the atomic swap
   *    i.e., what value to wait for in buf_written_val to know all previous trap entries were
   *    done.
   */

  cmd_data[i++] = PM4_HDR(PM4_HDR_IT_OPCODE_ATOMIC_MEM, atomic_ex_cmd_sz, isa_->GetMajorVersion());
  cmd_data[i++] = PM4_ATOMIC_MEM_DW1_ATOMIC(PM4_ATOMIC_MEM_GL2_OP_ATOMIC_SWAP_RTN_64);
  cmd_data[i++] = PM4_ATOMIC_MEM_DW2_ADDR_LO(buf_write_val);
  cmd_data[i++] = PM4_ATOMIC_MEM_DW3_ADDR_HI((buf_write_val) >> 32);
  cmd_data[i++] = PM4_ATOMIC_MEM_DW4_SRC_DATA_LO((uint64_t)reset_write_val);
  cmd_data[i++] = PM4_ATOMIC_MEM_DW5_SRC_DATA_HI(((uint64_t)reset_write_val) >> 32);
  i += 3;
  /* copy data */
  cmd_data[i++] = PM4_HDR(PM4_HDR_IT_OPCODE_COPY_DATA, copy_data_cmd_sz, isa_->GetMajorVersion());
  cmd_data[i++] =
      PM4_COPY_DATA_DW1(PM4_COPY_DATA_SRC_SEL_ATOMIC_RETURN_DATA | PM4_COPY_DATA_DST_SEL_TC_12 |
                        PM4_COPY_DATA_COUNT_SEL | PM4_COPY_DATA_WR_CONFIRM);
  i += 2;
  cmd_data[i++] = PM4_COPY_DATA_DW4_DST_ADDR_LO((uint64_t)old_val);
  cmd_data[i++] = PM4_COPY_DATA_DW5_DST_ADDR_HI(((uint64_t)old_val) >> 32);

  if (properties_.NumXcc > 1) {
    cmd_data[0] =
      PM4_HDR(PM4_HDR_IT_OPCODE_PRED_EXEC, pred_exec_cmd_sz, isa_->GetMajorVersion());
    cmd_data[1] =
      PM4_PRED_EXEC_DW2_EXEC_COUNT(i - pred_exec_cmd_sz) | PM4_PRED_EXEC_DW2_VIRTUALXCCID_SELECT(0x1);
  }

  HSA::hsa_signal_store_screlease(exec_pm4_signal, 1);

  queues_[QueuePCSampling]->ExecutePM4(
      cmd_data, i * sizeof(uint32_t), HSA_FENCE_SCOPE_NONE, HSA_FENCE_SCOPE_SYSTEM, &exec_pm4_signal);
  do {
    hsa_signal_value_t val = HSA::hsa_signal_wait_scacquire(
        exec_pm4_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
    if (val == -1) return HSA_STATUS_SUCCESS;
    if (val == 0) break;
  } while (true);

  *old_val &= (ULLONG_MAX >> 1);
  /* If the number of entries in old_val is larger than buf_size, then there was a buffer overflow
   * and the 2nd level trap handler code will skip recording samples, causing lost samples
   */
  if (*old_val > buf_size) {
    pcs_data->lost_sample_count = *old_val - buf_size;
    *old_val = buf_size;
  }

  to_copy = *old_val * session.sample_size();

  /* Make sure there is enough space after host_write_ptr */
  if (host_write_ptr + to_copy >= host_buffer_end) {
    // Need to wrap around
    pcs_data->host_buffer_wrap_pos = host_write_ptr;
    host_write_ptr = host_buffer_begin;
  }

  i = 0;
  if (properties_.NumXcc > 1) i+= pred_exec_cmd_sz;
  memset(cmd_data, 0, cmd_data_sz);

  /*
   * Do the WAIT_REG_MEM, DMA_DATA(s) and WRITE_DATA
   *
   * 1. Wait for all trap handlers have finished writing values to this buffer by waiting for
   *    buf_written_val to equal to old_val.
   * 2. Copy the values out of buffer to the host buffers.
   * 3. Reset buf_written_val so that we start writing to beginning of this buffer on the next
   *    buffer swap.
   */

  /* WAIT_REG_MEM, wait on buf_written_val */
  cmd_data[i++] =
      PM4_HDR(PM4_HDR_IT_OPCODE_WAIT_REG_MEM, wait_reg_mem_cmd_sz, isa_->GetMajorVersion());
  cmd_data[i++] = PM4_WAIT_REG_MEM_DW1(PM4_WAIT_REG_MEM_FUNCTION_EQUAL_TO_REFERENCE |
                                       PM4_WAIT_REG_MEM_MEM_SPACE_MEMORY_SPACE |
                                       PM4_WAIT_REG_MEM_OPERATION_WAIT_REG_MEM);
  cmd_data[i++] = PM4_WAIT_REG_MEM_DW2_MEM_POLL_ADDR_LO(buf_written_val[which_buffer]);
  cmd_data[i++] = PM4_WAIT_REG_MEM_DW3_MEM_POLL_ADDR_HI((buf_written_val[which_buffer]) >> 32);
  cmd_data[i++] = PM4_WAIT_REG_MEM_DW4_REFERENCE(*old_val);
  cmd_data[i++] = 0xFFFFFFFF;
  cmd_data[i++] = PM4_WAIT_REG_MEM_DW6(PM4_WAIT_REG_MEM_POLL_INTERVAL(4) |
                                       PM4_WAIT_REG_MEM_OPTIMIZE_ACE_OFFLOAD_MODE);

  // For GFX1200 and GFX1201 only - add an ACQUIRE_MEM packet to flush L2 cache before DMA.
  // This ensures that any data written by the trap handler is visible to the DMA engine.
  if ((isa_->GetMajorVersion() == 12) && (isa_->GetMinorVersion() == 0)) {
    cmd_data[i++] =
        PM4_HDR(PM4_HDR_IT_OPCODE_ACQUIRE_MEM, acquire_mem_cmd_sz, isa_->GetMajorVersion());
    cmd_data[i++] = 0;                                // DW1: COHER_CNTL
    cmd_data[i++] = 0;                                // DW2: COHER_SIZE
    cmd_data[i++] = 0;                                // DW3: COHER_SIZE_HI
    cmd_data[i++] = 0;                                // DW4: COHER_BASE_LO
    cmd_data[i++] = 0;                                // DW5: COHER_BASE_HI
    cmd_data[i++] = 4;                                // DW6: POLL_INTERVAL
    cmd_data[i++] = PM4_ACQUIRE_MEM_GCR_CNTL_GL2_WB;  // DW7: GCR_CNTL (GL2_WB=1, RANGE=ALL)
  }

  uint8_t* buffer_temp = buffer[which_buffer];

  for (copy_bytes = std::min(to_copy, (uint32_t)CP_DMA_DATA_TRANSFER_CNT_MAX); 0 < to_copy;
       to_copy -= copy_bytes) {

    /* DMA_DATA PACKETS, copy buffer using CPDMA */
    cmd_data[i++] = PM4_HDR(PM4_HDR_IT_OPCODE_DMA_DATA, dma_data_cmd_sz, isa_->GetMajorVersion());
    cmd_data[i++] = PM4_DMA_DATA_DW1(PM4_DMA_DATA_DST_SEL_DST_ADDR_USING_L2 |
                                     PM4_DMA_DATA_SRC_SEL_SRC_ADDR_USING_L2);
    cmd_data[i++] = PM4_DMA_DATA_DW2_SRC_ADDR_LO((uint64_t)buffer_temp);
    cmd_data[i++] = PM4_DMA_DATA_DW3_SRC_ADDR_HI(((uint64_t)buffer_temp) >> 32);
    cmd_data[i++] = PM4_DMA_DATA_DW4_DST_ADDR_LO((uint64_t)host_write_ptr);
    cmd_data[i++] = PM4_DMA_DATA_DW5_DST_ADDR_HI(((uint64_t)host_write_ptr) >> 32);
    if (copy_bytes >= to_copy) {
      copy_bytes = to_copy;
      cmd_data[i++] =
          PM4_DMA_DATA_DW6(PM4_DMA_DATA_BYTE_COUNT(copy_bytes) | PM4_DMA_DATA_DIS_WC_LAST);
    } else {
      cmd_data[i++] = PM4_DMA_DATA_DW6(PM4_DMA_DATA_BYTE_COUNT(copy_bytes) | PM4_DMA_DATA_DIS_WC);
    }
    buffer_temp += copy_bytes;
    host_write_ptr += copy_bytes;
  }

  /* WRITE_DATA, Reset buf_written_val */
  cmd_data[i++] = PM4_HDR(PM4_HDR_IT_OPCODE_WRITE_DATA, write_data_cmd_sz, isa_->GetMajorVersion());
  cmd_data[i++] = PM4_WRITE_DATA_DW1(PM4_WRITE_DATA_DST_SEL_TC_L2 |
                                     PM4_WRITE_DATA_WR_CONFIRM_WAIT_CONFIRMATION);
  cmd_data[i++] = PM4_WRITE_DATA_DW2_DST_MEM_ADDR_LO(buf_written_val[which_buffer]);
  cmd_data[i++] = PM4_WRITE_DATA_DW3_DST_MEM_ADDR_HI((buf_written_val[which_buffer]) >> 32);
  cmd_data[i++] = PM4_WRITE_DATA_DW4_DATA(0);

  if (properties_.NumXcc > 1) {
    cmd_data[0] =
      PM4_HDR(PM4_HDR_IT_OPCODE_PRED_EXEC, pred_exec_cmd_sz, isa_->GetMajorVersion());
    cmd_data[1] =
      PM4_PRED_EXEC_DW2_EXEC_COUNT(i - pred_exec_cmd_sz) | PM4_PRED_EXEC_DW2_VIRTUALXCCID_SELECT(0x1);
  }

  HSA::hsa_signal_store_screlease(exec_pm4_signal, 1);
  queues_[QueuePCSampling]->ExecutePM4(cmd_data, i * sizeof(uint32_t), HSA_FENCE_SCOPE_NONE,
                                       HSA_FENCE_SCOPE_SYSTEM, &exec_pm4_signal);
  do {
    hsa_signal_value_t val = HSA::hsa_signal_wait_scacquire(
        exec_pm4_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
    if (val == -1) return HSA_STATUS_SUCCESS;
    if (val == 0) break;
  } while (true);

  // save the position of next buffer
  which_buffer = next_buffer;

  return HSA_STATUS_SUCCESS;
}

void GpuAgent::PcSamplingThread(pcs_data_t& pcs_data, const char* thread_name) {
  // TODO: Implement lost sample count
  // TODO: Implement latency

  try {
    pcs::PcsRuntime::PcSamplingSession& session = *pcs_data.session;
    uint32_t& which_buffer = pcs_data.which_buffer;

    uint8_t* host_buffer_begin = pcs_data.host_buffer;
    uint8_t* host_buffer_end = pcs_data.host_buffer + pcs_data.host_buffer_size;

    hsa_signal_t done_sig[] = {pcs_data.device_data->done_sig0, pcs_data.device_data->done_sig1};

    while (pcs_data.session->isActive()) {
      // Wait for the signal to process the buffer
      do {
        hsa_signal_value_t val = HSA::hsa_signal_wait_scacquire(
            done_sig[which_buffer], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
        if (val == -1) goto thread_exit;
        if (val == 0) break;
      } while (true);
      HSA::hsa_signal_store_screlease(done_sig[which_buffer], 1);

      // Lock buffer to ensure thread-safe access
      std::lock_guard<std::mutex> lock(pcs_data.host_buffer_mutex);
      // Flush device buffers
      if (PcSamplingFlushDeviceBuffers(session) != HSA_STATUS_SUCCESS)
	    goto thread_exit;

      size_t bytes_before_wrap;
      size_t bytes_after_wrap;

      assert(pcs_data.host_read_ptr >= host_buffer_begin && pcs_data.host_read_ptr < host_buffer_end);
      assert(pcs_data.host_write_ptr >= host_buffer_begin && pcs_data.host_write_ptr < host_buffer_end);
      assert(pcs_data.host_buffer_wrap_pos ? (pcs_data.host_read_ptr > pcs_data.host_write_ptr)
                                           : (pcs_data.host_read_ptr <= pcs_data.host_write_ptr));

      if (pcs_data.host_buffer_wrap_pos) {
        assert(pcs_data.host_buffer_wrap_pos <= host_buffer_end &&
               pcs_data.host_buffer_wrap_pos > host_buffer_begin);
        assert(pcs_data.host_read_ptr <= pcs_data.host_buffer_wrap_pos);

        // Wrapped around
        bytes_before_wrap = pcs_data.host_buffer_wrap_pos - pcs_data.host_read_ptr;
        bytes_after_wrap = pcs_data.host_write_ptr - host_buffer_begin;

        while (bytes_before_wrap >= session.buffer_size()) {
          session.HandleSampleData(pcs_data.host_read_ptr, session.buffer_size(), nullptr, 0,
                                   pcs_data.lost_sample_count);
          pcs_data.host_read_ptr += session.buffer_size();
          bytes_before_wrap = pcs_data.host_buffer_wrap_pos - pcs_data.host_read_ptr;
          pcs_data.lost_sample_count = 0;
        }

        if (bytes_before_wrap + bytes_after_wrap >= session.buffer_size()) {
          session.HandleSampleData(pcs_data.host_read_ptr, bytes_before_wrap, host_buffer_begin,
                                   (session.buffer_size() - bytes_before_wrap), 0);
          pcs_data.host_read_ptr = host_buffer_begin + (session.buffer_size() - bytes_before_wrap);
          bytes_before_wrap = 0;
          pcs_data.host_buffer_wrap_pos = 0;
          bytes_after_wrap = pcs_data.host_write_ptr - pcs_data.host_read_ptr;
          pcs_data.lost_sample_count = 0;
        }

        while (bytes_after_wrap >= session.buffer_size()) {
          session.HandleSampleData(pcs_data.host_read_ptr, session.buffer_size(), nullptr, 0,
                                   pcs_data.lost_sample_count);
          pcs_data.host_read_ptr += session.buffer_size();
          bytes_before_wrap = 0;
          bytes_after_wrap = pcs_data.host_write_ptr - pcs_data.host_read_ptr;
          pcs_data.lost_sample_count = 0;
        }
      } else {
        // Handle non-wrapped buffer
        bytes_before_wrap = pcs_data.host_write_ptr - pcs_data.host_read_ptr;

        while (bytes_before_wrap >= session.buffer_size()) {
          assert(pcs_data.host_read_ptr >= host_buffer_begin &&
                 pcs_data.host_read_ptr + session.buffer_size() <= host_buffer_end);
          session.HandleSampleData(pcs_data.host_read_ptr, session.buffer_size(), nullptr, 0,
                                   pcs_data.lost_sample_count);
          pcs_data.host_read_ptr += session.buffer_size();
          bytes_before_wrap = pcs_data.host_write_ptr - pcs_data.host_read_ptr;
          pcs_data.lost_sample_count = 0;
        }
      }
    }
thread_exit:
  debug_print("%s::Exiting\n", thread_name);
} catch (const std::exception& e) {
  debug_print("Exception in %s: %s\n", thread_name, e.what());
} catch (...) {
  debug_print("Unknown exception in %s\n", thread_name);
}
}

hsa_status_t GpuAgent::PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) {
  pcs_data_t* pcs_data = nullptr;

  if (session.method() == HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1) {
    pcs_data = &pcs_hosttrap_data_;
  } else if (session.method() == HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1) {
    pcs_data = &pcs_stochastic_data_;
  } else {
    return HSA_STATUS_SUCCESS;  // Unsupported sampling method
  }

  uint8_t* host_buffer_begin = pcs_data->host_buffer;
  uint8_t* host_buffer_end = pcs_data->host_buffer + pcs_data->host_buffer_size;

  size_t bytes_before_wrap;
  size_t bytes_after_wrap;

  std::lock_guard<std::mutex> lock(pcs_data->host_buffer_mutex);
  // Flush device buffers
  if (PcSamplingFlushDeviceBuffers(session) != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR;

  assert(pcs_data->host_read_ptr >= host_buffer_begin && pcs_data->host_read_ptr < host_buffer_end);
  assert(pcs_data->host_write_ptr >= host_buffer_begin &&
         pcs_data->host_write_ptr < host_buffer_end);
  assert(pcs_data->host_buffer_wrap_pos ? (pcs_data->host_read_ptr > pcs_data->host_write_ptr)
                                        : (pcs_data->host_read_ptr <= pcs_data->host_write_ptr));

  if (pcs_data->host_buffer_wrap_pos) {
    assert(pcs_data->host_buffer_wrap_pos <= host_buffer_end &&
           pcs_data->host_buffer_wrap_pos > host_buffer_begin);
    assert(pcs_data->host_read_ptr <= pcs_data->host_buffer_wrap_pos);

    // Handle wrapped-around buffer
    bytes_before_wrap = pcs_data->host_buffer_wrap_pos - pcs_data->host_read_ptr;
    bytes_after_wrap = pcs_data->host_write_ptr - host_buffer_begin;

    while (bytes_before_wrap > 0) {
      size_t bytes_to_copy = std::min(bytes_before_wrap, session.buffer_size());

      session.HandleSampleData(pcs_data->host_read_ptr, bytes_to_copy, nullptr, 0,
                               pcs_data->lost_sample_count);
      pcs_data->host_read_ptr += bytes_to_copy;
      bytes_before_wrap = pcs_data->host_buffer_wrap_pos - pcs_data->host_read_ptr;
      pcs_data->lost_sample_count = 0;
    }

    assert(pcs_data->host_read_ptr == pcs_data->host_buffer_wrap_pos);
    pcs_data->host_buffer_wrap_pos = 0;
    pcs_data->host_read_ptr = host_buffer_begin;

    while (bytes_after_wrap > 0) {
      size_t bytes_to_copy = std::min(bytes_after_wrap, session.buffer_size());

      session.HandleSampleData(pcs_data->host_read_ptr, bytes_to_copy, nullptr, 0,
                               pcs_data->lost_sample_count);
      pcs_data->host_read_ptr += bytes_to_copy;
      bytes_after_wrap = pcs_data->host_write_ptr - pcs_data->host_read_ptr;
      pcs_data->lost_sample_count = 0;
    }
  } else {
    bytes_before_wrap = pcs_data->host_write_ptr - pcs_data->host_read_ptr;

    while (bytes_before_wrap > 0) {
      size_t bytes_to_copy = std::min(bytes_before_wrap, session.buffer_size());
      assert(pcs_data->host_read_ptr >= host_buffer_begin &&
             pcs_data->host_read_ptr + bytes_to_copy <= host_buffer_end);

      session.HandleSampleData(pcs_data->host_read_ptr, bytes_to_copy, nullptr, 0,
                               pcs_data->lost_sample_count);
      pcs_data->host_read_ptr += bytes_to_copy;
      bytes_before_wrap = pcs_data->host_write_ptr - pcs_data->host_read_ptr;
      pcs_data->lost_sample_count = 0;
    }
  }
  return HSA_STATUS_SUCCESS;
}

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_hsa_loader.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_hsa_loader.hpp"
#include "core/inc/runtime.h"

#include <assert.h>
#include <link.h>
#include <linux/limits.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>

#include <cstring>
#include <fstream>
#include <iomanip>
#include <sstream>
#include <string>

namespace {

#if !defined(_WIN32) && !defined(_WIN64)
uintptr_t PAGE_SIZE_MASK{
    [] () {
      uintptr_t page_size = sysconf(_SC_PAGE_SIZE);
      if (page_size == -1) {
        page_size = 1 << 12; // Default page size to 4KiB.
      }
      return ~(page_size - 1);
    } ()
  };
#endif

std::string EncodePathname(const char *file_path) {
  std::ostringstream ss;
  unsigned char c;

  ss.fill('0');
  ss << "file://";

  while ((c = *file_path++) != '\0') {
    if (isalnum(c) || c == '/' || c == '-' ||
        c == '_' || c == '.' || c == '~') {
      ss << c;
    } else {
      ss << std::uppercase;
      ss << '%' << std::hex << std::setw(2) << static_cast<int>(c);
      ss << std::nouppercase;
    }
  }

  return ss.str();
}

std::string GetUriFromMemoryAddress(const void *memory, size_t size) {
  pid_t pid = getpid();
  std::ostringstream uri_stream;
  uri_stream << "memory://" << pid
             << "#offset=0x" << std::hex << (uintptr_t)memory << std::dec
             << "&size=" << size;
  return uri_stream.str();
}

std::string GetUriFromMemoryInExecutableFile(const void *memory, size_t size) {
#if !defined(_WIN32) && !defined(_WIN64)
  uintptr_t address = reinterpret_cast<uintptr_t>(memory);
  struct callback_data_s {
    ElfW(Addr) address;
    size_t callback_num;
    const char *file_path;
    size_t file_offset;
  } callback_data{address, 0, nullptr, 0};

  // Iterate the loaded shared objects program headers to see if the ELF binary
  // is allocated in a mapped file.
  if (dl_iterate_phdr([](struct dl_phdr_info *info, size_t size, void *ptr) -> int {
    struct callback_data_s *callback_data = (struct callback_data_s *) ptr;
    const ElfW(Addr) elf_address = callback_data->address - info->dlpi_addr;

    int n = info->dlpi_phnum;
    while (--n >= 0) {
      // Check if lib name is not empty and its not a "vdso.so" lib,
      // The vDSO is a special shared object file that is built into
      // the Linux kernel. It is not a regular shared library and thus
      // does not have all the properties of regular shared libraries.
      // The way the vDSO is loaded and organized in memory is different
      // from regular shared libraries and it's not guaranteed that it
      // will have a specific segment or section. Hence its skipped.
      if (info->dlpi_name[0] != '\0'
          && std::string(info->dlpi_name).find("vdso.so") != std::string::npos) {
        continue;
      }

      if (info->dlpi_phdr[n].p_type == PT_LOAD
          && elf_address - info->dlpi_phdr[n].p_vaddr >= 0
          && elf_address - info->dlpi_phdr[n].p_vaddr < info->dlpi_phdr[n].p_memsz) {
        // The first callback is always the program executable.
        if (!info->dlpi_name[0] && callback_data->callback_num == 0) {
          static char argv0[PATH_MAX] = {0};
          if (!argv0[0] && readlink("/proc/self/exe", argv0, sizeof(argv0)) == -1)
            return 0;
          callback_data->file_path = argv0;
        } else {
          callback_data->file_path = info->dlpi_name;
        }

        callback_data->file_offset =
            elf_address - info->dlpi_phdr[n].p_vaddr + info->dlpi_phdr[n].p_offset;
        return 1;
      }
    }

    ++callback_data->callback_num;
    return 0;
  }, &callback_data)) {
    if (!callback_data.file_path || callback_data.file_path[0] == '\0') {
      return GetUriFromMemoryAddress(memory, size);
    }

    std::ostringstream uri_stream;
    uri_stream << EncodePathname(callback_data.file_path);
    uri_stream << "#offset=" << callback_data.file_offset;
    uri_stream << "&size=" << size;
    return uri_stream.str();
  }
#endif  // !defined(_WIN32) && !defined(_WIN64)
  return GetUriFromMemoryAddress(memory, size);
}

std::string GetUriFromMemoryInMmapedFile(const void *memory, size_t size) {
#if !defined(_WIN32) && !defined(_WIN64)
  std::ifstream proc_maps;
  proc_maps.open("/proc/self/maps", std::ifstream::in);
  if (!proc_maps.is_open() || !proc_maps.good()) {
    return GetUriFromMemoryAddress(memory, size);
  }

  std::string line;
  while (std::getline(proc_maps, line)) {
    std::stringstream tokens(line);

    uintptr_t low_address, high_address;
    char dash;
    tokens >> std::hex >> low_address >> std::dec
           >> dash
           >> std::hex >> high_address >> std::dec;
    if (dash != '-') {
      continue;
    }

    uintptr_t address = reinterpret_cast<uintptr_t>(memory);
    if (!(address >= low_address && (address + size) <= high_address)) {
      continue;
    }

    std::string permissions, device, uri_file_path;
    size_t offset;
    uint64_t inode;
    tokens >> permissions
           >> std::hex >> offset >> std::dec
           >> device
           >> inode
           >> uri_file_path;

    if (inode == 0 || uri_file_path.empty()) {
      return GetUriFromMemoryAddress(memory, size);
    }

    size_t uri_offset = offset + address - low_address;

    bool is_complete_file = false;
    if (uri_offset == 0) {
      std::ifstream uri_file(uri_file_path, std::ios::binary);
      if (uri_file) {
        uri_file.seekg(0, std::ios::end);
        is_complete_file = uri_file.tellg() == size;
      }
    }

    std::ostringstream uri_stream;
    uri_stream << EncodePathname(uri_file_path.c_str());
    if (!is_complete_file) {
      uri_stream << "#offset=" << uri_offset;
      uri_stream << "&size=" << size;
    }
    return uri_stream.str();
  }
#endif  // !defined(_WIN32) && !defined(_WIN64)
  return GetUriFromMemoryAddress(memory, size);
}

std::string GetUriFromFile(int file_descriptor, size_t offset, size_t size,
    bool is_complete_file, const void *memory) {
#if !defined(_WIN32) && !defined(_WIN64)
  std::ostringstream proc_fd_path;
  proc_fd_path << "/proc/self/fd/" << file_descriptor;

  char uri_file_path[PATH_MAX];
  memset(uri_file_path, 0, PATH_MAX);

  if (readlink(proc_fd_path.str().c_str(), uri_file_path, PATH_MAX) == -1) {
    return GetUriFromMemoryAddress(memory, size);
  }

  if (uri_file_path[0] == '\0') {
    return GetUriFromMemoryAddress(memory, size);
  }

  std::ostringstream uri_stream;
  uri_stream << EncodePathname(uri_file_path);
  if (!is_complete_file) {
    uri_stream << "#offset=" << offset;
    uri_stream << "&size=" << size;
  }
  return uri_stream.str();
#else
  return GetUriFromMemoryAddress(memory, size);
#endif  // !defined(_WIN32) && !defined(_WIN64)
}

}  // namespace

namespace rocr {
namespace amd {
namespace hsa {
namespace loader {

/// @brief Default destructor.
CodeObjectReaderImpl::~CodeObjectReaderImpl() {
  if (is_mmap) {
#if !defined(_WIN32) && !defined(_WIN64)
    uintptr_t address = reinterpret_cast<uintptr_t>(code_object_memory);
    uintptr_t adjusted_address = address & PAGE_SIZE_MASK;
    size_t adjusted_size = code_object_size + (address - adjusted_address);
    munmap(reinterpret_cast<void *>(adjusted_address), adjusted_size);
#else
    delete [] code_object_memory;
#endif  // !defined(_WIN32) && !defined(_WIN64)
  }
}

hsa_status_t CodeObjectReaderImpl::SetFile(
    hsa_file_t _code_object_file_descriptor,
    size_t _code_object_offset,
    size_t _code_object_size) {
  assert(!code_object_memory && "Code object reader wrapper is already set");

  if (_code_object_file_descriptor == -1) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  off_t file_size = __lseek__(_code_object_file_descriptor, 0, SEEK_END);
  if (file_size == (off_t)-1) {
    return HSA_STATUS_ERROR_INVALID_FILE;
  }
  if (file_size <= _code_object_offset) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }
  if (_code_object_size == 0) {
    _code_object_size = file_size - _code_object_offset;
  }
  bool is_complete_file = _code_object_offset == 0 && _code_object_size == file_size;

#if !defined(_WIN32) && !defined(_WIN64)
  off_t adjusted_offset = _code_object_offset & PAGE_SIZE_MASK;
  size_t adjusted_size = _code_object_size + (_code_object_offset - adjusted_offset);
  void *memory = mmap(nullptr, adjusted_size, PROT_READ, MAP_PRIVATE,
                      _code_object_file_descriptor, adjusted_offset);
  if (memory == (void *) -1) {
    return HSA_STATUS_ERROR_INVALID_FILE;
  }
  code_object_memory = reinterpret_cast<unsigned char*>(memory) +
                        (_code_object_offset & ~PAGE_SIZE_MASK);
  code_object_size = _code_object_size;
  is_mmap = true;
#else
  if (__lseek__(_code_object_file_descriptor, 0, SEEK_SET) == (off_t)-1) {
    return HSA_STATUS_ERROR_INVALID_FILE;
  }

  std::unique_ptr<unsigned char> memory(new unsigned char[_code_object_size]);
  if (!memory) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  if (__read__(_code_object_file_descriptor, mmap_memory,
                _code_object_size) != _code_object_size) {
    return HSA_STATUS_ERROR_INVALID_FILE;
  }
  mmap_memory = memory.release();
  mmap_size = _code_object_size;
  code_object_memory = memory;
  code_object_size = _code_object_size;
#endif  // !defined(_WIN32) && !defined(_WIN64)

  uri = GetUriFromFile(_code_object_file_descriptor, _code_object_offset,
                        _code_object_size, is_complete_file, code_object_memory);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t CodeObjectReaderImpl::SetMemory(
    const void *_code_object_memory,
    size_t _code_object_size) {
  assert(!code_object_memory && "Code object reader wrapper is already set");

  if (!_code_object_memory || _code_object_size == 0) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  code_object_memory = _code_object_memory;
  code_object_size = _code_object_size;

  bool loader_enable_mmap_uri = core::Runtime::runtime_singleton_->flag().loader_enable_mmap_uri();
  if (loader_enable_mmap_uri) {
    uri = GetUriFromMemoryInMmapedFile(_code_object_memory, _code_object_size);
  } else {
    uri = GetUriFromMemoryInExecutableFile(_code_object_memory, _code_object_size);
  }

  return HSA_STATUS_SUCCESS;
}

}  // namespace loader
}  // namespace hsa
}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_loader_context.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_loader_context.hpp"

#include <algorithm>
#include <cassert>
#include <cstring>

#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/util/os.h"

#include <cstdlib>
#include <utility>
#include "core/inc/hsa_internal.h"
#include "core/util/utils.h"
#include "inc/hsa_ext_amd.h"

#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#else
#include <sys/mman.h>
#endif

namespace rocr {
namespace {

class SegmentMemory {
public:
  virtual ~SegmentMemory() {}
  virtual void* Address(size_t offset = 0) const = 0;
  virtual void* HostAddress(size_t offset = 0) const = 0;
  virtual bool Allocated() const = 0;
  virtual bool Allocate(size_t size, size_t align, bool zero) = 0;
  virtual bool Copy(size_t offset, const void *src, size_t size) = 0;
  virtual void Free() = 0;
  virtual bool Freeze() = 0;

protected:
  SegmentMemory() {}

private:
  SegmentMemory(const SegmentMemory&);
  SegmentMemory& operator=(const SegmentMemory&);
};

class MallocedMemory final: public SegmentMemory {
public:
  MallocedMemory(): SegmentMemory(), ptr_(nullptr), size_(0) {}
  ~MallocedMemory() {}

  void* Address(size_t offset = 0) const override
    { assert(this->Allocated()); return (char*)ptr_ + offset; }
  void* HostAddress(size_t offset = 0) const override
    { return this->Address(offset); }
  bool Allocated() const override
    { return nullptr != ptr_; }

  bool Allocate(size_t size, size_t align, bool zero) override;
  bool Copy(size_t offset, const void *src, size_t size) override;
  void Free() override;
  bool Freeze() override;

private:
  MallocedMemory(const MallocedMemory&);
  MallocedMemory& operator=(const MallocedMemory&);

  void *ptr_;
  size_t size_;
};

bool MallocedMemory::Allocate(size_t size, size_t align, bool zero)
{
  assert(!this->Allocated());
  assert(0 < size);
  assert(0 < align && 0 == (align & (align - 1)));
  ptr_ = _aligned_malloc(size, align);
  if (nullptr == ptr_) {
    return false;
  }
  if (HSA_STATUS_SUCCESS != HSA::hsa_memory_register(ptr_, size)) {
    _aligned_free(ptr_);
    ptr_ = nullptr;
    return false;
  }
  if (zero) {
    memset(ptr_, 0x0, size);
  }
  size_ = size;
  return true;
}

bool MallocedMemory::Copy(size_t offset, const void *src, size_t size)
{
  assert(this->Allocated());
  assert(nullptr != src);
  assert(0 < size);
  memcpy(this->Address(offset), src, size);
  return true;
}

void MallocedMemory::Free()
{
  assert(this->Allocated());
  HSA::hsa_memory_deregister(ptr_, size_);
  _aligned_free(ptr_);
  ptr_ = nullptr;
  size_ = 0;
}

bool MallocedMemory::Freeze()
{
  assert(this->Allocated());
  return true;
}

class MappedMemory final: public SegmentMemory {
public:
  MappedMemory(): SegmentMemory(), ptr_(nullptr), size_(0) {}
  ~MappedMemory() {}

  void* Address(size_t offset = 0) const override
    { assert(this->Allocated()); return (char*)ptr_ + offset; }
  void* HostAddress(size_t offset = 0) const override
    { return this->Address(offset); }
  bool Allocated() const override
    { return nullptr != ptr_; }

  bool Allocate(size_t size, size_t align, bool zero) override;
  bool Copy(size_t offset, const void *src, size_t size) override;
  void Free() override;
  bool Freeze() override;

private:
  MappedMemory(const MappedMemory&);
  MappedMemory& operator=(const MappedMemory&);

  void *ptr_;
  size_t size_;
};

bool MappedMemory::Allocate(size_t size, size_t align, bool zero)
{
  assert(!this->Allocated());
  assert(0 < size);
  assert(0 < align && 0 == (align & (align - 1)));
#if defined(_WIN32) || defined(_WIN64)
  ptr_ = (void*)VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
#else
  ptr_ =
    mmap(nullptr, size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
#endif // _WIN32 || _WIN64
  if (nullptr == ptr_) {
    return false;
  }
  assert(0 == ((uintptr_t)ptr_) % align);
  if (HSA_STATUS_SUCCESS != HSA::hsa_memory_register(ptr_, size)) {
#if defined(_WIN32) || defined(_WIN64)
    VirtualFree(ptr_, size, MEM_DECOMMIT);
    VirtualFree(ptr_, 0, MEM_RELEASE);
#else
    munmap(ptr_, size);
#endif // _WIN32 || _WIN64
    ptr_ = nullptr;
    return false;
  }
  if (zero) {
    memset(ptr_, 0x0, size);
  }
  size_ = size;
  return true;
}

bool MappedMemory::Copy(size_t offset, const void *src, size_t size)
{
  assert(this->Allocated());
  assert(nullptr != src);
  assert(0 < size);
  memcpy(this->Address(offset), src, size);
  return true;
}

void MappedMemory::Free()
{
  assert(this->Allocated());
  HSA::hsa_memory_deregister(ptr_, size_);
#if defined(_WIN32) || defined(_WIN64)
  VirtualFree(ptr_, size_, MEM_DECOMMIT);
  VirtualFree(ptr_, 0, MEM_RELEASE);
#else
  munmap(ptr_, size_);
#endif // _WIN32 || _WIN64
  ptr_ = nullptr;
  size_ = 0;
}

bool MappedMemory::Freeze()
{
  assert(this->Allocated());
  return true;
}

class RegionMemory final: public SegmentMemory {
public:
 static const core::MemoryRegion* AgentLocal(hsa_agent_t agent, bool is_code);
 static const core::MemoryRegion* System(bool is_code);

 RegionMemory(const core::MemoryRegion* region, bool is_code)
     : SegmentMemory(),
       region_(region),
       ptr_(nullptr),
       host_ptr_(nullptr),
       size_(0),
       is_code_(is_code) {}
 ~RegionMemory() {}

 void* Address(size_t offset = 0) const override {
   assert(this->Allocated());
   return (char*)ptr_ + offset; }
  void* HostAddress(size_t offset = 0) const override
    { assert(this->Allocated()); return (char*)host_ptr_ + offset; }
  bool Allocated() const override
    { return nullptr != ptr_; }

  bool Allocate(size_t size, size_t align, bool zero) override;
  bool Copy(size_t offset, const void *src, size_t size) override;
  void Free() override;
  bool Freeze() override;

private:
  RegionMemory(const RegionMemory&);
  RegionMemory& operator=(const RegionMemory&);

  const core::MemoryRegion* region_;
  void *ptr_;
  void *host_ptr_;
  size_t size_;
  bool is_code_;
};

const core::MemoryRegion* RegionMemory::AgentLocal(hsa_agent_t agent, bool is_code) {
  AMD::GpuAgent *amd_agent = (AMD::GpuAgent*)core::Agent::Convert(agent);
  assert(amd_agent->device_type() == core::Agent::kAmdGpuDevice && "Invalid agent type.");
  auto agent_local_region =
      std::find_if(amd_agent->regions().begin(), amd_agent->regions().end(),
                   [&](const core::MemoryRegion* region) {
                     const AMD::MemoryRegion* amd_region = (const AMD::MemoryRegion*)region;
                     return amd_region->IsLocalMemory() && (!amd_region->fine_grain());
                   });
  return agent_local_region == amd_agent->regions().end() ? nullptr : *agent_local_region;
}

const core::MemoryRegion* RegionMemory::System(bool is_code) {
  if (is_code)
    return core::Runtime::runtime_singleton_->system_regions_coarse()[0];
  else
    return core::Runtime::runtime_singleton_->system_regions_fine()[0];
}

bool RegionMemory::Allocate(size_t size, size_t align, bool zero) {
  assert(!this->Allocated());
  assert(0 < size);
  assert(0 < align && 0 == (align & (align - 1)));
  core::MemoryRegion::AllocateFlags flags = core::MemoryRegion::AllocateNoFlags;
  if (is_code_) flags = core::MemoryRegion::AllocateExecutable;
  if (HSA_STATUS_SUCCESS !=
      core::Runtime::runtime_singleton_->AllocateMemory(region_, size, flags, &ptr_)) {
    ptr_ = nullptr;
    return false;
  }
  assert(0 == ((uintptr_t)ptr_) % align);
  if (HSA_STATUS_SUCCESS !=
      core::Runtime::runtime_singleton_->AllocateMemory(
          RegionMemory::System(false), size, core::MemoryRegion::AllocateNoFlags, &host_ptr_)) {
    HSA::hsa_memory_free(ptr_);
    ptr_ = nullptr;
    host_ptr_ = nullptr;
    return false;
  }
  if (zero) {
    memset(host_ptr_, 0x0, size);
  }
  size_ = size;
  return true;
}

bool RegionMemory::Copy(size_t offset, const void* src, size_t size) {
  assert(this->Allocated() && nullptr != host_ptr_);
  assert(nullptr != src);
  assert(0 < size);
  memcpy((char*)host_ptr_ + offset, src, size);
  return true;
}

void RegionMemory::Free()
{
  assert(this->Allocated());
  HSA::hsa_memory_free(ptr_);
  if (nullptr != host_ptr_) {
    HSA::hsa_memory_free(host_ptr_);
  }
  ptr_ = nullptr;
  host_ptr_ = nullptr;
  size_ = 0;
}

bool RegionMemory::Freeze() {
  assert(this->Allocated() && nullptr != host_ptr_);

  core::Agent* agent = region_->owner();

  const size_t& code_object_dmacopy_size =
    core::Runtime::runtime_singleton_->flag().co_dmacopy_size();

  const bool isGpuDevice = (agent->device_type() == core::Agent::kAmdGpuDevice);
  const bool isLargeBarDisabled = isGpuDevice && !reinterpret_cast<AMD::GpuAgent*>(agent)->LargeBarEnabled();
  const bool shouldDmaCopy = isGpuDevice && (isLargeBarDisabled || size_ > code_object_dmacopy_size);

  if (shouldDmaCopy) {
      if (HSA_STATUS_SUCCESS != agent->DmaCopy(ptr_, host_ptr_, size_)) return false;
  } else {
      memcpy(ptr_, host_ptr_, size_);
      if (is_code_ && isGpuDevice)
        reinterpret_cast<AMD::GpuAgent*>(agent)->PcieWcFlush(ptr_, size_);
  }

  // Invalidate agent caches if needed
  if (is_code_ && isGpuDevice)
      reinterpret_cast<AMD::GpuAgent*>(agent)->InvalidateCodeCaches(ptr_, size_);

  return true;
}

}  // namespace anonymous
namespace amd {

hsa_isa_t LoaderContext::IsaFromName(const char *name) {
  assert(name);

  hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
  hsa_isa_t isa_handle;
  isa_handle.handle = 0;

  hsa_status = HSA::hsa_isa_from_name(name, &isa_handle);
  if (HSA_STATUS_SUCCESS != hsa_status) {
    isa_handle.handle = 0;
    return isa_handle;
  }

  return isa_handle;
}

bool LoaderContext::IsaSupportedByAgent(hsa_agent_t agent,
                                        hsa_isa_t code_object_isa,
                                        unsigned codeGenericVersion) {
  struct callBackData {
    std::pair<hsa_isa_t, bool> comparison_data;
    const unsigned int codeGenericV;
  } cbData = {{code_object_isa, false}, codeGenericVersion};

  auto IsIsaEquivalent = [](hsa_isa_t agent_isa_h, void *data) {
    assert(data);

    struct callBackData *inOutCB = reinterpret_cast<decltype(&cbData)>(data);

    std::pair<hsa_isa_t, bool> *data_pair = &inOutCB->comparison_data;
    const unsigned int codeGenericV = inOutCB->codeGenericV;

    assert(data_pair);
    assert(!data_pair->second);

    const core::Isa *agent_isa = core::Isa::Object(agent_isa_h);
    assert(agent_isa);
    const core::Isa *code_object_isa = core::Isa::Object(data_pair->first);
    assert(code_object_isa);

    data_pair->second = core::Isa::IsCompatible(*code_object_isa, *agent_isa, codeGenericV);
    return data_pair->second ? HSA_STATUS_INFO_BREAK : HSA_STATUS_SUCCESS;
  };

  hsa_status_t status = HSA::hsa_agent_iterate_isas(agent, IsIsaEquivalent, &cbData);
  if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) {
    return false;
  }
  return cbData.comparison_data.second;
}

void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
                                  hsa_agent_t agent,
                                  size_t size,
                                  size_t align,
                                  bool zero)
{
  assert(0 < size);
  assert(0 < align && 0 == (align & (align - 1)));

  hsa_profile_t agent_profile;
  if (HSA_STATUS_SUCCESS !=
      HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) {
    return nullptr;
  }

  SegmentMemory *mem = nullptr;
  switch (segment) {
  case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
  case AMDGPU_HSA_SEGMENT_READONLY_AGENT: {
    switch (agent_profile) {
    case HSA_PROFILE_BASE:
      mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent, false), false);
      break;
    case HSA_PROFILE_FULL:
      mem = new (std::nothrow) RegionMemory(RegionMemory::System(false), false);
      break;
    default:
      assert(false);
    }
    break;
  }
  case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: {
    mem = new (std::nothrow) RegionMemory(RegionMemory::System(false), false);
    break;
  }
  case AMDGPU_HSA_SEGMENT_CODE_AGENT: {
    switch (agent_profile) {
    case HSA_PROFILE_BASE:
      mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent, true), true);
      break;
    case HSA_PROFILE_FULL:
      mem = new (std::nothrow) MappedMemory();
      break;
    default:
      assert(false);
    }
    break;
  }
  default:
    assert(false);
  }

  if (nullptr == mem) {
    return nullptr;
  }

  if (!mem->Allocate(size, align, zero)) {
    delete mem;
    return nullptr;
  }

  return mem;
}

bool LoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment, // not used.
                                hsa_agent_t agent,                // not used.
                                void* dst,
                                size_t offset,
                                const void* src,
                                size_t size)
{
  assert(nullptr != dst);
  return ((SegmentMemory*)dst)->Copy(offset, src, size);
}

void LoaderContext::SegmentFree(amdgpu_hsa_elf_segment_t segment, // not used.
                                hsa_agent_t agent,                // not used.
                                void* seg,
                                size_t size)                      // not used.
{
  assert(nullptr != seg);
  SegmentMemory *mem = (SegmentMemory*)seg;
  mem->Free();
  delete mem;
  mem = nullptr;
}

void* LoaderContext::SegmentAddress(amdgpu_hsa_elf_segment_t segment, // not used.
                                    hsa_agent_t agent,                // not used.
                                    void* seg,
                                    size_t offset)
{
  assert(nullptr != seg);
  return ((SegmentMemory*)seg)->Address(offset);
}

void* LoaderContext::SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, // not used.
                                        hsa_agent_t agent,                // not used.
                                        void* seg,
                                        size_t offset)
{
  assert(nullptr != seg);
  return ((SegmentMemory*)seg)->HostAddress(offset);
}

bool LoaderContext::SegmentFreeze(amdgpu_hsa_elf_segment_t segment, // not used.
                                  hsa_agent_t agent,                // not used.
                                  void* seg,
                                  size_t size)                      // not used.
{
  assert(nullptr != seg);
  return ((SegmentMemory*)seg)->Freeze();
}

bool LoaderContext::ImageExtensionSupported() {
  hsa_status_t hsa_status = HSA_STATUS_SUCCESS;
  bool result = false;

  hsa_status =
      HSA::hsa_system_extension_supported(HSA_EXTENSION_IMAGES, 1, 0, &result);
  if (HSA_STATUS_SUCCESS != hsa_status) {
    return false;
  }

  return result;
}

hsa_status_t LoaderContext::ImageCreate(
    hsa_agent_t agent, hsa_access_permission_t image_permission,
    const hsa_ext_image_descriptor_t *image_descriptor, const void *image_data,
    hsa_ext_image_t *image_handle) {
  assert(agent.handle);
  assert(image_descriptor);
  assert(image_data);
  assert(image_handle);

  assert(ImageExtensionSupported());

  return hsa_ext_image_create(agent, image_descriptor, image_data,
                              image_permission, image_handle);
}

hsa_status_t LoaderContext::ImageDestroy(hsa_agent_t agent,
                                         hsa_ext_image_t image_handle) {
  assert(agent.handle);
  assert(image_handle.handle);

  assert(ImageExtensionSupported());

  return hsa_ext_image_destroy(agent, image_handle);
}

hsa_status_t LoaderContext::SamplerCreate(
    hsa_agent_t agent, const hsa_ext_sampler_descriptor_t *sampler_descriptor,
    hsa_ext_sampler_t *sampler_handle) {
  assert(agent.handle);
  assert(sampler_descriptor);
  assert(sampler_handle);

  assert(ImageExtensionSupported());

  return hsa_ext_sampler_create(agent, sampler_descriptor, sampler_handle);
}

hsa_status_t LoaderContext::SamplerDestroy(hsa_agent_t agent,
                                           hsa_ext_sampler_t sampler_handle) {
  assert(agent.handle);
  assert(sampler_handle.handle);

  assert(ImageExtensionSupported());

  return hsa_ext_sampler_destroy(agent, sampler_handle);
}

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_memory_region.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_memory_region.h"

#include <algorithm>

#include "core/inc/runtime.h"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/util/utils.h"
#include "core/inc/exceptions.h"
#include <unistd.h>

namespace rocr {
namespace AMD {

// Tracks aggregate size of system memory available on platform
size_t MemoryRegion::max_sysmem_alloc_size_ = 0;
const size_t MemoryRegion::kPageSize_ = sysconf(_SC_PAGESIZE);

MemoryRegion::MemoryRegion(bool fine_grain, bool kernarg, bool full_profile,
                           bool extended_scope_fine_grain, bool user_visible, core::Agent* owner,
                           const HsaMemoryProperties& mem_props)
    : core::MemoryRegion(fine_grain, kernarg, full_profile, extended_scope_fine_grain, user_visible,
                         owner),
      mem_props_(mem_props),
      max_single_alloc_size_(0),
      virtual_size_(0),
      fragment_allocator_(BlockAllocator(*this)) {
  virtual_size_ = GetPhysicalSize();

  // extended_scope_fine_grain and fine_grain memory regions are mutually exclusive
  assert(!(fine_grain && extended_scope_fine_grain));

  mem_flag_.Value = 0;
  map_flag_.Value = 0;
  static const HSAuint64 kGpuVmSize = (1ULL << 40);

  // Bind the memory region based on whether it is
  // coarse or fine grain or extended scope fine grain.
  mem_flag_.ui32.CoarseGrain = (fine_grain || extended_scope_fine_grain) ? 0 : 1;

  // Extended scope fine-grained memory: Device scope atomics are promoted
  // to system scope atomics. Non-compliant systems may require the
  // application to perform device-specific actions, like HDP flushes,
  // to achieve system-scope coherence
  mem_flag_.ui32.ExtendedCoherent = (extended_scope_fine_grain) ? 1 : 0;

  if (IsLocalMemory()) {
    mem_flag_.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    mem_flag_.ui32.NoSubstitute = 1;
    mem_flag_.ui32.HostAccess =
        (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) ? 0 : 1;
    mem_flag_.ui32.NonPaged = 1;

    virtual_size_ = kGpuVmSize;

  } else if (IsSystem()) {
    mem_flag_.ui32.PageSize = GetPageSize();
    mem_flag_.ui32.NoSubstitute = 0;
    mem_flag_.ui32.HostAccess = 1;
    mem_flag_.ui32.CachePolicy = HSA_CACHING_CACHED;

    if (kernarg) mem_flag_.ui32.Uncached = 1;

    virtual_size_ =
        (full_profile) ? os::GetUserModeVirtualMemorySize() : kGpuVmSize;
  }


  // Adjust allocatable size per page align
  max_single_alloc_size_ = AlignDown(static_cast<size_t>(GetPhysicalSize()), GetPageSize());

  // Keep track of total system memory available
  // @note: System memory is surfaced as both coarse
  // and fine grain memory regions. To track total system
  // memory only fine grain is considered as it avoids
  // double counting
  if (IsSystem() && (fine_grain)) {
    max_sysmem_alloc_size_ += max_single_alloc_size_;
  }

  assert(GetVirtualSize() != 0);
  assert(IsMultipleOf(max_single_alloc_size_, GetPageSize()));
}

MemoryRegion::~MemoryRegion() {}

hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, void** address, int agent_node_id) const {
  ScopedAcquire<KernelMutex> lock(&owner()->agent_memory_lock_);
  return AllocateImpl(size, alloc_flags, address, agent_node_id);
}

hsa_status_t MemoryRegion::AllocateImpl(size_t& size, AllocateFlags alloc_flags,
                                        void** address, int agent_node_id) const {
  if (address == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (!IsSystem() && !IsLocalMemory()) {
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  }

  // Alocation requests for system memory considers aggregate
  // memory available on all CPU devices
  if (size > ((IsSystem() ?
                max_sysmem_alloc_size_ : max_single_alloc_size_))) {
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  }

  size = AlignUp(size, GetPageSize());

  return owner()->driver().AllocateMemory(*this, alloc_flags, address, size,
                                          agent_node_id);
}

hsa_status_t MemoryRegion::Free(void* address, size_t size) const {
  ScopedAcquire<KernelMutex> lock(&owner()->agent_memory_lock_);
  return FreeImpl(address, size);
}

hsa_status_t MemoryRegion::FreeImpl(void* address, size_t size) const {
  if (fragment_allocator_.free(address)) return HSA_STATUS_SUCCESS;

  return owner()->driver().FreeMemory(address, size);
}

// TODO:  Look into a better name and/or making this process transparent to exporting.
hsa_status_t MemoryRegion::IPCFragmentExport(void* address) const {
  ScopedAcquire<KernelMutex> lock(&owner()->agent_memory_lock_);
  if (!fragment_allocator_.discardBlock(address)) return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t MemoryRegion::GetInfo(hsa_region_info_t attribute,
                                   void* value) const {
  switch (attribute) {
    case HSA_REGION_INFO_SEGMENT:
      switch (mem_props_.HeapType) {
        case HSA_HEAPTYPE_SYSTEM:
        case HSA_HEAPTYPE_DEVICE_SVM:
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
          *((hsa_region_segment_t*)value) = HSA_REGION_SEGMENT_GLOBAL;
          break;
        case HSA_HEAPTYPE_GPU_LDS:
          *((hsa_region_segment_t*)value) = HSA_REGION_SEGMENT_GROUP;
          break;
        default:
          assert(false && "Memory region should only be global, group");
          break;
      }
      break;
    case HSA_REGION_INFO_GLOBAL_FLAGS:
      switch (mem_props_.HeapType) {
        case HSA_HEAPTYPE_SYSTEM:
        case HSA_HEAPTYPE_DEVICE_SVM:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE: {
          uint32_t ret = 0;

          ret = fine_grain()                ? HSA_REGION_GLOBAL_FLAG_FINE_GRAINED
              : extended_scope_fine_grain() ? HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED
                                            : HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED;

          if (kernarg()) ret |= HSA_REGION_GLOBAL_FLAG_KERNARG;
          *((uint32_t*)value) = ret;
          break;
        }
        default:
          *((uint32_t*)value) = 0;
          break;
      }
      break;
    case HSA_REGION_INFO_SIZE:
      *((size_t*)value) = static_cast<size_t>(GetPhysicalSize());
      break;
    case HSA_REGION_INFO_ALLOC_MAX_SIZE:
      switch (mem_props_.HeapType) {
        case HSA_HEAPTYPE_SYSTEM:
        case HSA_HEAPTYPE_DEVICE_SVM:
          *((size_t*)value) = max_sysmem_alloc_size_;
          break;
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
        case HSA_HEAPTYPE_GPU_SCRATCH:
          *((size_t*)value) = max_single_alloc_size_;
          break;
        default:
          *((size_t*)value) = 0;
      }
      break;
    case HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED:
      switch (mem_props_.HeapType) {
        case HSA_HEAPTYPE_SYSTEM:
        case HSA_HEAPTYPE_DEVICE_SVM:
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
          *((bool*)value) = true;
          break;
        default:
          *((bool*)value) = false;
          break;
      }
      break;
    case HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE:
      switch (mem_props_.HeapType) {
        case HSA_HEAPTYPE_SYSTEM:
        case HSA_HEAPTYPE_DEVICE_SVM:
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
          *((size_t*)value) = GetPageSize();
          break;
        default:
          *((size_t*)value) = 0;
          break;
      }
      break;
    case HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT:
      switch (mem_props_.HeapType) {
        case HSA_HEAPTYPE_SYSTEM:
        case HSA_HEAPTYPE_DEVICE_SVM:
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
          *((size_t*)value) = GetPageSize();
          break;
        default:
          *((size_t*)value) = 0;
          break;
      }
      break;
    default:
      switch ((hsa_amd_region_info_t)attribute) {
        case HSA_AMD_REGION_INFO_HOST_ACCESSIBLE:
          *((bool*)value) =
              (mem_props_.HeapType == HSA_HEAPTYPE_SYSTEM) ? true : false;
          break;
        case HSA_AMD_REGION_INFO_BASE:
          *((void**)value) = reinterpret_cast<void*>(GetBaseAddress());
          break;
        case HSA_AMD_REGION_INFO_BUS_WIDTH:
          *((uint32_t*)value) = BusWidth();
          break;
        case HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY:
          *((uint32_t*)value) = MaxMemCloc();
          break;
        default:
          return HSA_STATUS_ERROR_INVALID_ARGUMENT;
          break;
      }
      break;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t MemoryRegion::GetPoolInfo(hsa_amd_memory_pool_info_t attribute,
                                       void* value) const {
  switch (attribute) {
    case HSA_AMD_MEMORY_POOL_INFO_SEGMENT:
    case HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS:
    case HSA_AMD_MEMORY_POOL_INFO_SIZE:
    case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED:
    case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE:
    case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT:
      return GetInfo(static_cast<hsa_region_info_t>(attribute), value);
    case HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL:
      *((bool*)value) = IsSystem() ? true : false;
      break;
    case HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE:
      return GetInfo(HSA_REGION_INFO_ALLOC_MAX_SIZE, value);
    case HSA_AMD_MEMORY_POOL_INFO_LOCATION:
      if (IsLocalMemory())
        *((hsa_amd_memory_pool_location_t*)value) = HSA_AMD_MEMORY_POOL_LOCATION_GPU;
      else if (IsSystem())
        *((hsa_amd_memory_pool_location_t*)value) = HSA_AMD_MEMORY_POOL_LOCATION_CPU;
      else
        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      break;
    case HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE:
      switch (mem_props_.HeapType) {
        case HSA_HEAPTYPE_SYSTEM:
          *((size_t*)value) = GetPageSize();
          break;
        case HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE:
        case HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC:
          *((size_t*)value) = core::Runtime::runtime_singleton_->flag().disable_fragment_alloc()
              ? GetPageSize()
              : fragment_allocator_.default_block_size();
          break;
        default:
          *((size_t*)value) = 0;
          break;
      }
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_amd_memory_pool_access_t MemoryRegion::GetAccessInfo(
    const core::Agent& agent, const core::Runtime::LinkInfo& link_info) const {

  // Return allowed by default if memory pool is owned by requesting device
  if (agent.public_handle().handle == owner()->public_handle().handle) {
    return HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT;
  }

  // Requesting device does not have a link
  if (link_info.num_hop < 1) {
    return HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
  }

  // Determine access to fine and coarse grained system memory
  // Return allowed by default if requesting device is a CPU
  // Return disallowed by default if requesting device is not a CPU
  if (IsSystem()) {
    return (agent.device_type() == core::Agent::kAmdCpuDevice) ?
            (HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT) :
            (HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT);
  }

  // Determine access type for device local memory which is
  // guaranteed to be HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC

  if (IsLocalMemory()) {
    // Return disallowed by default if memory is coarse
    // grained or extended scope fine grained without regard to link type
    if (fine_grain() == false) {
      return HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT;
    }

    // Return disallowed by default if memory is fine
    // grained and requesting device is connected via xGMI link
    if (agent.HiveId() == owner()->HiveId()) {
      return HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT;
    }

    // Return never allowed if memory is fine grained
    // link type is not xGMI i.e. link is PCIe
    return HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
  }

  // Return never allowed if above conditions are not satisified
  // This can happen when memory pool references neither system
  // or device local memory
  return HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
}

hsa_status_t MemoryRegion::GetAgentPoolInfo(
    const core::Agent& agent, hsa_amd_agent_memory_pool_info_t attribute,
    void* value) const {
  const uint32_t node_id_from = agent.node_id();
  const uint32_t node_id_to = owner()->node_id();

  const core::Runtime::LinkInfo link_info =
      core::Runtime::runtime_singleton_->GetLinkInfo(node_id_from, node_id_to);

  const hsa_amd_memory_pool_access_t access_type = GetAccessInfo(agent, link_info);

  switch (attribute) {
    case HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS:
      *((hsa_amd_memory_pool_access_t*)value) = access_type;
      break;
    case HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS:
      *((uint32_t*)value) =
          (access_type != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)
              ? link_info.num_hop
              : 0;
      break;
    case HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO:
      memset(value, 0, sizeof(hsa_amd_memory_pool_link_info_t));
      if ((access_type != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) &&
          (link_info.num_hop > 0)) {
        memcpy(value, &link_info.info, sizeof(hsa_amd_memory_pool_link_info_t));
      }
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents,
                                       const hsa_agent_t* agents,
                                       const void* ptr, size_t size) const {
  if (num_agents == 0 || agents == NULL || ptr == NULL || size == 0) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (!IsSystem() && !IsLocalMemory()) {
    return HSA_STATUS_ERROR;
  }

  // Adjust for fragments.  Make accessibility sticky for fragments since this will satisfy the
  // union of accessible agents between the fragments in the block.
  hsa_amd_pointer_info_t info = {};
  uint32_t agent_count = 0;
  hsa_agent_t* accessible = nullptr;
  MAKE_SCOPE_GUARD([&]() { free(accessible); });
  core::Runtime::PtrInfoBlockData blockInfo = {};
  std::vector<uint64_t> union_agents;
  info.size = sizeof(info);

  ScopedAcquire<KernelMutex> lock(&access_lock_);

  if (core::Runtime::runtime_singleton_->PtrInfo(const_cast<void*>(ptr), &info, malloc,
                                                 &agent_count, &accessible,
                                                 &blockInfo) == HSA_STATUS_SUCCESS) {
    /*  Thunk may return type = HSA_EXT_POINTER_TYPE_UNKNOWN for userptrs */
    if (info.type != HSA_EXT_POINTER_TYPE_UNKNOWN &&
        (blockInfo.length != size || info.sizeInBytes != size)) {
      for (int i = 0; i < num_agents; i++) union_agents.push_back(agents[i].handle);
      for (int i = 0; i < agent_count; i++) union_agents.push_back(accessible[i].handle);
      std::sort(union_agents.begin(), union_agents.end());
      const auto& last = std::unique(union_agents.begin(), union_agents.end());
      union_agents.erase(last, union_agents.end());

      agents = reinterpret_cast<hsa_agent_t*>(&union_agents[0]);
      num_agents = union_agents.size();
      size = blockInfo.length;
      ptr = blockInfo.base;
    }
  }

  bool cpu_in_list = false;

  std::vector<uint32_t> whitelist_nodes;
  for (uint32_t i = 0; i < num_agents; ++i) {
    core::Agent* agent = core::Agent::Convert(agents[i]);
    if (agent == NULL || !agent->IsValid()) {
      return HSA_STATUS_ERROR_INVALID_AGENT;
    }

    switch (agent->device_type()) {
    case core::Agent::kAmdGpuDevice:
      whitelist_nodes.push_back(agent->node_id());
      break;
    case core::Agent::kAmdCpuDevice:
      cpu_in_list = true;
      break;
    case core::Agent::kAmdAieDevice:
    default:
      return HSA_STATUS_ERROR_INVALID_AGENT;
    }
  }

  if (whitelist_nodes.size() == 0 && IsSystem()) {
    assert(cpu_in_list);
    // This is a system region and only CPU agents in the whitelist.
    // Remove old mappings.
    owner()->driver().MakeMemoryUnresident(ptr);
    return HSA_STATUS_SUCCESS;
  }

  // If this is a local memory region, the owning gpu always needs to be in
  // the whitelist.
  if (IsLocalMemory() &&
      std::find(whitelist_nodes.begin(), whitelist_nodes.end(), owner()->node_id()) ==
          whitelist_nodes.end()) {
    whitelist_nodes.push_back(owner()->node_id());
  }

  HsaMemMapFlags map_flag = map_flag_;
  map_flag.ui32.HostAccess |= (cpu_in_list) ? 1 : 0;

  {  // Sequence with pointer info since queries to other fragments of the block may be adjusted by
     // this call.
    ScopedAcquire<KernelSharedMutex::Shared> lock(
        core::Runtime::runtime_singleton_->memory_lock_.shared());
    uint64_t alternate_va = 0;
    if (owner()->driver().MakeMemoryResident(ptr, size, &alternate_va, &map_flag,
                                             whitelist_nodes.size(),
                                             whitelist_nodes.data()) != HSA_STATUS_SUCCESS) {
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t MemoryRegion::CanMigrate(const MemoryRegion& dst,
                                      bool& result) const {
  // TODO: not implemented yet.
  result = false;
  return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}

hsa_status_t MemoryRegion::Migrate(uint32_t flag, const void* ptr) const {
  // TODO: not implemented yet.
  return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}

hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
                                void* host_ptr, size_t size,
                                void** agent_ptr) const {
  if (!IsSystem()) {
    return HSA_STATUS_ERROR;
  }

  if (full_profile()) {
    // For APU, any host pointer is always accessible by the gpu.
    *agent_ptr = host_ptr;
    return HSA_STATUS_SUCCESS;
  }

  std::vector<HSAuint32> whitelist_nodes;
  if (num_agents == 0 || agents == NULL) {
    // Map to all GPU agents.
    whitelist_nodes = core::Runtime::runtime_singleton_->gpu_ids();
  } else {
    for (uint32_t i = 0; i < num_agents; ++i) {
      core::Agent* agent = core::Agent::Convert(agents[i]);
      if (agent == NULL || !agent->IsValid()) {
        return HSA_STATUS_ERROR_INVALID_AGENT;
      }

      switch (agent->device_type()) {
      case core::Agent::kAmdGpuDevice:
        whitelist_nodes.push_back(agent->node_id());
        break;
      case core::Agent::kAmdCpuDevice:
        // Do nothing.
        break;
      case core::Agent::kAmdAieDevice:
      default:
        return HSA_STATUS_ERROR_INVALID_AGENT;
      }
    }
  }

  if (whitelist_nodes.size() == 0) {
    // No GPU agents in the whitelist. So no need to register and map since the
    // platform only has CPUs.
    *agent_ptr = host_ptr;
    return HSA_STATUS_SUCCESS;
  }

  // Call kernel driver to register and pin the memory.
  if (owner()->driver().RegisterMemory(host_ptr, size, const_cast<HsaMemFlags&>(mem_flag_)) ==
      HSA_STATUS_SUCCESS) {
    uint64_t alternate_va = 0;
    if (owner()->driver().MakeMemoryResident(host_ptr, size, &alternate_va, &map_flag_,
                                             whitelist_nodes.size(),
                                             whitelist_nodes.data()) == HSA_STATUS_SUCCESS) {
      if (alternate_va != 0) {
        *agent_ptr = reinterpret_cast<void*>(alternate_va);
      } else {
        *agent_ptr = host_ptr;
      }

      return HSA_STATUS_SUCCESS;
    }
    owner()->driver().DeregisterMemory(host_ptr);
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  return HSA_STATUS_ERROR;
}

hsa_status_t MemoryRegion::Unlock(void* host_ptr) const {
  if (!IsSystem()) {
    return HSA_STATUS_ERROR;
  }

  if (full_profile()) {
    return HSA_STATUS_SUCCESS;
  }

  if (owner()->driver().MakeMemoryUnresident(host_ptr) != HSA_STATUS_SUCCESS) {
    assert(false && "Failed to unmap host pointer");
  }
  if (owner()->driver().DeregisterMemory(host_ptr) != HSA_STATUS_SUCCESS) {
    assert(false && "Failed to deregister host pointer");
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t MemoryRegion::AssignAgent(void* ptr, size_t size,
                                       const core::Agent& agent,
                                       hsa_access_permission_t access) const {
  return HSA_STATUS_SUCCESS;
}

void MemoryRegion::Trim() const { fragment_allocator_.trim(); }

void* MemoryRegion::BlockAllocator::alloc(size_t request_size, size_t& allocated_size) const {
  void* ret;
  size_t bsize = AlignUp(request_size, block_size());

  hsa_status_t err = region_.AllocateImpl(
      bsize, core::MemoryRegion::AllocateRestrict | core::MemoryRegion::AllocateDirect, &ret, 0);
  if (err != HSA_STATUS_SUCCESS)
    throw AMD::hsa_exception(err, "MemoryRegion::BlockAllocator::alloc failed.");
  assert(ret != nullptr && "Region returned nullptr on success.");

  allocated_size = bsize;
  return ret;
}

}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/amd_topology.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_topology.h"

#include <algorithm>
#include <cstring>
#include <functional>

#ifndef NDEBUG
#include <iostream>
#endif

#include <array>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>

#include <link.h>

#include "core/inc/amd_aie_agent.h"
#include "core/inc/amd_available_drivers.h"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_filter_device.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/runtime.h"
#include "core/util/utils.h"
#ifdef HSAKMT_VIRTIO_ENABLED
#include "core/inc/amd_virtio_driver.h"
#endif

extern r_debug _amdgpu_r_debug;

namespace rocr {
namespace AMD {
// Anonymous namespace.
namespace {

const std::array<std::function<hsa_status_t(std::unique_ptr<core::Driver>&)>,
#if _WIN32
                 0
#elif __linux__
                 static_cast<size_t>(core::DriverType::NUM_DRIVER_TYPES)
#endif
                 >
    discover_driver_funcs = {
#ifdef __linux__
        KfdDriver::DiscoverDriver,
        XdnaDriver::DiscoverDriver,
#ifdef HSAKMT_VIRTIO_ENABLED
        KfdVirtioDriver::DiscoverDriver,
#endif
#endif
};

void DiscoverDrivers() {
  for (const auto& discover_driver_fn : discover_driver_funcs) {
    std::unique_ptr<core::Driver> driver;
    hsa_status_t ret = discover_driver_fn(driver);

    if (ret != HSA_STATUS_SUCCESS) continue;

    core::Runtime::runtime_singleton_->RegisterDriver(std::move(driver));
  }
}

bool InitializeDriver(std::unique_ptr<core::Driver>& driver) {
  MAKE_NAMED_SCOPE_GUARD(driver_guard, [&]() { driver->Close(); });

  if (driver->Init() != HSA_STATUS_SUCCESS) {
    return false;
  }

  driver_guard.Dismiss();
  return true;
}

void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop, core::DriverType driver_type) {
  CpuAgent* cpu = new CpuAgent(node_id, node_prop, driver_type);
  cpu->Enable();
  core::Runtime::runtime_singleton_->RegisterAgent(cpu, true);
}

GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode,
                      bool enabled, core::DriverType driver_type) {
  GpuAgent* gpu = nullptr;
  if (node_prop.NumFComputeCores == 0) {
      // Ignore non GPUs.
      return nullptr;
  }
  try {
    gpu = new GpuAgent(node_id, node_prop, xnack_mode,
                       core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type);

    const HsaVersionInfo& kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version;

    // Check for sramecc incompatibility due to sramecc not being reported correctly in kfd before
    // 1.4.
    if (gpu->supported_isas()[0]->IsSrameccSupported() &&
         (kfd_version.KernelInterfaceMajorVersion <= 1 &&
              kfd_version.KernelInterfaceMinorVersion < 4)) {
      // gfx906 has both sramecc modes in use.  Suppress the device.
      if ((gpu->supported_isas()[0]->GetProcessorName() == "gfx906") &&
          core::Runtime::runtime_singleton_->flag().check_sramecc_validity()) {
        char name[64];
        gpu->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, name);
        name[63] = '\0';
        fprintf(stderr,
                "HSA Error:  Incompatible kernel and userspace, %s disabled. Upgrade amdgpu.\n",
                name);
        delete gpu;
        return nullptr;
      }

      // gfx908 always has sramecc set to on in vbios.  Set mode bit to on and recreate the device.
      if (gpu->supported_isas()[0]->GetProcessorName() == "gfx908") {
        node_prop.Capability.ui32.SRAM_EDCSupport = 1;
        delete gpu;
        gpu = new GpuAgent(node_id, node_prop, xnack_mode,
                           core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type);
      }
    }
  } catch (const hsa_exception& e) {
    if(e.error_code() == HSA_STATUS_ERROR_INVALID_ISA) {
      ifdebug {
        if (!strIsEmpty(e.what())) debug_print("Warning: %s\n", e.what());
      }
      // Ignore unrecognized GPUs.
      return nullptr;
    } else {
      // Rethrow remaining exceptions.
      throw;
    }
  }
  if (enabled) gpu->Enable();
  core::Runtime::runtime_singleton_->RegisterAgent(gpu, enabled);
  return gpu;
}

void DiscoverAie(uint32_t node_id, HsaNodeProperties& node_prop) {
  AieAgent* aie = new AieAgent(node_id, node_prop);
  core::Runtime::runtime_singleton_->RegisterAgent(aie, true);
}

void RegisterLinkInfo(const std::unique_ptr<core::Driver>& driver, uint32_t node_id,
                      uint32_t num_link) {
  // Register connectivity links for this agent to the runtime.
  if (num_link == 0) {
    return;
  }

  std::vector<HsaIoLinkProperties> links(num_link);
  if (HSA_STATUS_SUCCESS != driver->GetEdgeProperties(links, node_id)) {
    return;
  }

  for (HsaIoLinkProperties io_link : links) {
    // Populate link info with thunk property.
    hsa_amd_memory_pool_link_info_t link_info = {0};

    switch (io_link.IoLinkType) {
      case HSA_IOLINKTYPE_HYPERTRANSPORT:
        link_info.link_type = HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT;
        link_info.atomic_support_32bit = true;
        link_info.atomic_support_64bit = true;
        link_info.coherent_support = true;
        break;
      case HSA_IOLINKTYPE_PCIEXPRESS:
        link_info.link_type = HSA_AMD_LINK_INFO_TYPE_PCIE;
        link_info.atomic_support_32bit = true;
        link_info.atomic_support_64bit = true;
        link_info.coherent_support = true;
        break;
      case HSA_IOLINK_TYPE_QPI_1_1:
        link_info.link_type = HSA_AMD_LINK_INFO_TYPE_QPI;
        link_info.atomic_support_32bit = true;
        link_info.atomic_support_64bit = true;
        link_info.coherent_support = true;
        break;
      case HSA_IOLINK_TYPE_INFINIBAND:
        link_info.link_type = HSA_AMD_LINK_INFO_TYPE_INFINBAND;
        debug_print("IOLINK is missing atomic and coherency defaults.\n");
        break;
      case HSA_IOLINK_TYPE_XGMI:
        link_info.link_type = HSA_AMD_LINK_INFO_TYPE_XGMI;
        link_info.atomic_support_32bit = true;
        link_info.atomic_support_64bit = true;
        link_info.coherent_support = true;
        break;
      default:
        debug_print("Unrecognized IOLINK type.\n");
        break;
    }

    // KFD is reporting wrong override status for XGMI.  Disallow override for bringup.
    if (io_link.Flags.ui32.Override == 1) {
      if (io_link.Flags.ui32.NoPeerToPeerDMA == 1) {
        // Ignore this link since peer to peer is not allowed.
        continue;
      }
      link_info.atomic_support_32bit = (io_link.Flags.ui32.NoAtomics32bit == 0);
      link_info.atomic_support_64bit = (io_link.Flags.ui32.NoAtomics64bit == 0);
      link_info.coherent_support = (io_link.Flags.ui32.NonCoherent == 0);
    }

    link_info.max_bandwidth = io_link.MaximumBandwidth;
    link_info.max_latency = io_link.MaximumLatency;
    link_info.min_bandwidth = io_link.MinimumBandwidth;
    link_info.min_latency = io_link.MinimumLatency;
    link_info.numa_distance = io_link.Weight;

    core::Runtime::runtime_singleton_->RegisterLinkInfo(
        io_link.NodeFrom, io_link.NodeTo, io_link.Weight, io_link.RecSdmaEngIdMask, link_info);
  }
}

/**
 * Process the list of Gpus that are surfaced to user
 */
void SurfaceGpuList(std::vector<int32_t>& gpu_list, bool xnack_mode, bool enabled) {
  // Process user visible Gpu devices
  const int32_t invalidIdx = -1;
  int32_t list_sz = gpu_list.size();
  HsaNodeProperties node_prop = {0};
  for (const auto& gpu_driver : core::Runtime::runtime_singleton_->AgentDrivers()) {
    if (!core::Runtime::IsGPUDriver(gpu_driver->kernel_driver_type_)) {
      continue;
    }

    for (int32_t idx = 0; idx < list_sz; idx++) {
      if (gpu_list[idx] == invalidIdx) {
        break;
      }

      // Obtain properties of the node
      hsa_status_t ret = gpu_driver->GetNodeProperties(node_prop, gpu_list[idx]);
      assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties");

      // disable interrupt signal for DTIF platform
      if (core::Runtime::runtime_singleton_->flag().enable_dtif())
        core::g_use_interrupt_wait = false;

      // Instantiate a Gpu device. The IO links
      // of this node have already been registered
      assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery.");
      DiscoverGpu(gpu_list[idx], node_prop, xnack_mode, enabled, gpu_driver->kernel_driver_type_);
    }
  }
}

/// @brief Calls into the user-mode driver for each node to build the topology
/// of the system.
///
/// @details Topology information includes information about each node in the
/// topology graph, which includes agents, IO links, memory, and caches.
bool BuildTopology() {
  auto rt = core::Runtime::runtime_singleton_;
  std::unordered_map<core::DriverType, HsaSystemProperties> driver_sys_props;
  std::unordered_map<core::DriverType, std::vector<HsaNodeProperties>> driver_node_props;
  size_t link_count = 0;
  /// @todo Currently we can filter out GPU devices using the
  /// ROCR_VISIBLE_DEVICES environment variable. Eventually this
  /// should be updated to allow for filtering other agents like
  /// AIEs.
  RvdFilter rvdFilter;
  int32_t invalidIdx = -1;
  uint32_t visibleCnt = 0;
  std::vector<int32_t> gpu_usr_list;
  std::vector<int32_t> gpu_disabled;
  bool filter = RvdFilter::FilterDevices();

  // Get the system properties from each driver, populate the node properties list
  // for each driver, then update the runtime's link count before traversing each
  // driver's individual nodes.
  for (const auto& driver : rt->AgentDrivers()) {
    auto &sys_props = driver_sys_props[driver->kernel_driver_type_];
    auto &node_props_vec = driver_node_props[driver->kernel_driver_type_];
    if (driver->GetSystemProperties(sys_props) != HSA_STATUS_SUCCESS)
      return false;

    const auto num_nodes = sys_props.NumNodes;

    if (!num_nodes) {
      continue;
    }

    link_count += num_nodes;
    node_props_vec.resize(num_nodes);
    uint32_t node_id = 0;

    for (auto& node_props : node_props_vec) {
      if (driver->GetNodeProperties(node_props, node_id) != HSA_STATUS_SUCCESS) {
        return false;
      }
      ++node_id;
    }
  }

  rt->SetLinkCount(link_count);

  // Traverse each driver's nodes and discover their agents.
  for (const auto& driver : rt->AgentDrivers()) {
    auto& node_props_vec = driver_node_props[driver->kernel_driver_type_];

    /// @todo: Add support for AIEs.
    // Query if env ROCR_VISIBLE_DEVICES is defined. If defined
    // determine number and order of GPU devices to be surfaced.
    if (filter && (core::Runtime::IsGPUDriver(driver->kernel_driver_type_))) {
      rvdFilter.BuildRvdTokenList();
      rvdFilter.BuildDeviceUuidList(node_props_vec);
      visibleCnt = rvdFilter.BuildUsrDeviceList();
      for (int32_t idx = 0; idx < visibleCnt; idx++) {
        gpu_usr_list.push_back(invalidIdx);
      }
    }

    // Discover agents on every node in the platform.
    int32_t kfdIdx = 0;
    uint32_t node_id = 0;
    for (auto& node_props : node_props_vec) {
      if (node_props.NumCPUCores) {
        // Node has CPU cores so instantiate a CPU agent.
        DiscoverCpu(node_id, node_props, driver->kernel_driver_type_);
      }

      if (node_props.NumNeuralCores) {
        // Node has AIE cores so instantiate an AIE agent.
        DiscoverAie(node_id, node_props);
      }

      // Current node is either a dGpu or Apu and might belong
      // to user visible list. Process node if present in usr
      // visible list, continue if not found
      if (node_props.NumFComputeCores != 0) {
        if (filter) {
          int32_t devRank = rvdFilter.GetUsrDeviceRank(kfdIdx);
          if (devRank != (-1)) {
            gpu_usr_list[devRank] = node_id;
          } else {
            gpu_disabled.push_back(node_id);
          }
        } else {
          gpu_usr_list.push_back(node_id);
        }
        kfdIdx++;
      }

      // Register IO links of node without regard to
      // it being visible to user or not. It is not
      // possible to access links of nodes that are
      // not visible
      RegisterLinkInfo(driver, node_id, node_props.NumIOLinks);
      ++node_id;
    }
  }

  // Instantiate ROCr objects to encapsulate Gpu devices
  SurfaceGpuList(gpu_usr_list, rt->XnackEnabled(), true);
  SurfaceGpuList(gpu_disabled, rt->XnackEnabled(), false);

  // Parse HSA_CU_MASK with GPU and CU count limits.
  uint32_t maxGpu = rt->gpu_agents().size();
  uint32_t maxCu = 0;
  uint32_t cus;
  for (auto& gpu : rt->gpu_agents()) {
    gpu->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cus);
    maxCu = Max(maxCu, cus);
  }
  const_cast<Flag&>(rt->flag()).parse_masks(maxGpu, maxCu);

  // Front load the rec_sdma_eng_id_mask to check whether needs to override old mask
  bool rec_sdma_engine_override = false;
  for (auto& src_gpu : rt->gpu_agents()) {
    uint32_t src_id = src_gpu->node_id();

    // Set RecSdmaEngOverride to true for all gpus
    if (rec_sdma_engine_override) {
      ((AMD::GpuAgent*)src_gpu)->SetRecSdmaEngOverride(rec_sdma_engine_override);
      continue;
    }

    // skip the pre-loop if NumSdmaXgmiEngines != 6
    if (((AMD::GpuAgent*)src_gpu)->properties().NumSdmaXgmiEngines != 6)
      break;

    for (auto& dst_gpu : rt->gpu_agents()) {
      uint32_t dst_id = dst_gpu->node_id();
      if (src_id != dst_id) {
        auto linfo = rt->GetLinkInfo(src_id, dst_id);
        if (IsPowerOfTwo(linfo.rec_sdma_eng_id_mask)) {
          rec_sdma_engine_override = true;
          ((AMD::GpuAgent*)src_gpu)->SetRecSdmaEngOverride(rec_sdma_engine_override);
          break;
        }
      }
    }
  }

  // Register destination agents that can SDMA gang copy for source agents
  for (auto& src_gpu : rt->gpu_agents()) {
    uint32_t src_id = src_gpu->node_id();
    for (auto& dst_gpu : rt->gpu_agents()) {
      uint32_t dst_id = dst_gpu->node_id();
      uint32_t gang_factor = 1, rec_sdma_eng_id_mask = 0;

      if (src_id != dst_id) {
        auto linfo = rt->GetLinkInfo(src_id, dst_id);
        // Ganging can only be done over xGMI and is either fixed or variable
        // based on topology information:
        // Weight of 13 - Intra-socket GPU link in multi-partition mode
        // Weigth of 15 - Direct GPU link in single partition mode
        // Weight of 41 - Inter-socket GPU link in multi-partition mode
        if (linfo.info.link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) {
          // Temporary work-around, disable SDMA ganging on non-APUs in non-SPX modes
          // Check xGMI APU status
          const bool isXgmiApu = static_cast<AMD::GpuAgent*>(src_gpu)->is_xgmi_cpu_gpu();
          if (linfo.info.numa_distance == 13 || linfo.info.numa_distance == 41)
            gang_factor = isXgmiApu ? 2 : 1;
          else if (linfo.info.numa_distance == 15 && linfo.info.min_bandwidth)
            gang_factor = linfo.info.max_bandwidth/linfo.info.min_bandwidth;
          else gang_factor = 1;

          rec_sdma_eng_id_mask = linfo.rec_sdma_eng_id_mask;

          // Override the old mask if rec sdma eng verride is true
          // Using one pcie sdma for device to device copy with limited XGMI SDMA engine.
          // This will help improve all to all copy with limited XGMI SDMA engine.
          if (rec_sdma_engine_override) {
            uint32_t sdma_engine_mask = (1 << (((AMD::GpuAgent*)src_gpu)->properties().NumSdmaEngines - 1));
            rec_sdma_eng_id_mask = !IsPowerOfTwo(rec_sdma_eng_id_mask) ?
              sdma_engine_mask : rec_sdma_eng_id_mask;
          }
        }
      }

      // Register all GPUs regardless of connection type to take advantage of easy
      // key-value lookup later on.
      ((AMD::GpuAgent*)src_gpu)->RegisterGangPeer(*dst_gpu, gang_factor);
      ((AMD::GpuAgent*)src_gpu)->RegisterRecSdmaEngIdMaskPeer(*dst_gpu, rec_sdma_eng_id_mask);
    }
  }
  return true;
}
}  // Anonymous namespace

bool Load() {
  DiscoverDrivers();

  if (core::Runtime::runtime_singleton_->AgentDrivers().empty()) return false;

  for (auto& d : core::Runtime::runtime_singleton_->AgentDrivers()) {
    bool is_model_enabled = false;
    d->IsModelEnabled(&is_model_enabled);
    if (is_model_enabled) continue;
    if (!InitializeDriver(d)) return false;
  }

  return BuildTopology();
}

bool Unload() {
  for (auto& driver : core::Runtime::runtime_singleton_->AgentDrivers()) {
    hsa_status_t ret = driver->ShutDown();
    if (ret != HSA_STATUS_SUCCESS) return false;
  }

  return true;
}
}  // namespace amd
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/blit_shaders/CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2023, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
##
################################################################################

# Minimum required version of CMake
cmake_minimum_required ( VERSION 3.7 )

# Find Clang package and LLVM package
find_package(Clang REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )
find_package(LLVM REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )

# Set the target devices
set (TARGET_DEVS "gfx900;gfx1010;gfx1030;gfx1100;gfx1200")

# Set the postfix for each target device
set (POSTFIX "9;1010;10;11;12")

# If verbose output is enabled, print paths and target devices
if(${CMAKE_VERBOSE_MAKEFILE})
	get_property(clang_path TARGET clang PROPERTY LOCATION)
	get_property(objcopy_path TARGET llvm-objcopy PROPERTY LOCATION)
	message("Using clang from: ${clang_path}")
	message("Using llvm-objcopy from: ${objcopy_path}")
	message("Blit Shaders assembled for: ${TARGET_DEVS}")
endif()

# Function to generate kernel bitcode
function(gen_kernel_bc TARGET_ID INPUT_FILE OUTPUT_FILE)
	set(CODE_OBJECT "${OUTPUT_FILE}.hsaco")

	# Separate clang arguments
	separate_arguments(CLANG_ARG_LIST UNIX_COMMAND "-x assembler -target amdgcn-amd-amdhsa -mcode-object-version=5 -fPIC -mcpu=${TARGET_ID} -o ${CODE_OBJECT} ${CMAKE_CURRENT_SOURCE_DIR}/${INPUT_FILE}")

	# Add custom command to generate the kernel bitcode
	add_custom_command(OUTPUT ${CODE_OBJECT} COMMAND clang ${CLANG_ARG_LIST}
	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${INPUT_FILE} clang
	COMMENT "BUILDING bitcode for ${OUTPUT_FILE}..."
	VERBATIM)

	separate_arguments(OBJCOPY_ARG_LIST UNIX_COMMAND "--dump-section=.text=${OUTPUT_FILE} ${CODE_OBJECT}")

	# Add custom command to extract binary from the bitcode
	add_custom_command(OUTPUT ${OUTPUT_FILE}
	COMMAND llvm-objcopy ${OBJCOPY_ARG_LIST}
	DEPENDS ${CODE_OBJECT} llvm-objcopy
	COMMENT "Extracting binary for ${OUTPUT_FILE}..."
	VERBATIM)

	if(${CMAKE_VERBOSE_MAKEFILE})
		message("     Blit Shader Source: " ${CMAKE_CURRENT_SOURCE_DIR}/${INPUT_FILE})
		message("     Blit Shader Binary: " ${OUTPUT_FILE})
	endif()

endfunction(gen_kernel_bc)

# Function to build a kernel for each target device
function(build_kernel BLIT_SHADER_NAME BLIT_FILE TARGET_ID POSTFIX)
	set(CODE_OBJECT_FILE "${BLIT_SHADER_NAME}${POSTFIX}")
	gen_kernel_bc(${TARGET_ID} ${BLIT_FILE} ${CODE_OBJECT_FILE})
	list(APPEND HSACO_TARG_LIST "${CODE_OBJECT_FILE}")
	set(HSACO_TARG_LIST ${HSACO_TARG_LIST} PARENT_SCOPE)

endfunction(build_kernel)

# Function to build kernels for all devices and shaders
function(build_kernels_for_devices SHADER_NAMES SHADER_FILES)
	set(HSACO_TARG_LIST "")

	list(LENGTH TARGET_DEVS num_target_devices)
	math(EXPR num_target_devices "${num_target_devices} - 1")
	list(LENGTH SHADER_NAMES num_shader_names)
	math(EXPR num_shader_names "${num_shader_names} - 1")

	foreach(shader_index RANGE ${num_shader_names})
		list(GET SHADER_NAMES ${shader_index} shader_name)
		list(GET SHADER_FILES ${shader_index} shader_file)
		foreach(device_index RANGE ${num_target_devices})
			# Get device from list of target devices
			list(GET TARGET_DEVS ${device_index} target_device)
			# Get postfix from list of postfixes
			list(GET POSTFIX ${device_index} postfix)
			if(${CMAKE_VERBOSE_MAKEFILE})
				message("\n  Generating: ${target_device} for ${shader_name} ...")
			endif()

			# Define the name of the code object file
			set(CODE_OBJECT_FILE "${shader_name}${postfix}")

			# Generate the kernel bitcode for the current device and shader
			gen_kernel_bc(${target_device} ${shader_file} ${CODE_OBJECT_FILE})
			# Append the code object file to the list
			list(APPEND HSACO_TARG_LIST "${CODE_OBJECT_FILE}")
		endforeach(device_index)
	endforeach(shader_index)

	# Make the list of code object files available in the parent scope
	set(HSACO_TARG_LIST ${HSACO_TARG_LIST} PARENT_SCOPE)

endfunction(build_kernels_for_devices)


# Function to generate the bytecode stream and create the header file
function(generate_bytecodeStrm HeaderFILE)
	set(ARG_LIST "${CMAKE_CURRENT_BINARY_DIR}/${HeaderFILE}.h")

	# Copy the shell script to the build directory
	configure_file(${CMAKE_CURRENT_SOURCE_DIR}/create_blit_shader_header.sh
		${CMAKE_CURRENT_BINARY_DIR}/create_blit_shader_header.sh
		COPYONLY)

	# Add a custom command to generate the header file
	add_custom_command(OUTPUT ${HeaderFILE}.h
		COMMAND ${CMAKE_CURRENT_BINARY_DIR}/create_blit_shader_header.sh ${ARG_LIST} ${HSACO_TARG_LIST}
		COMMENT "Collating blit shaders..."
		DEPENDS ${HSACO_TARG_LIST} ${CMAKE_CURRENT_BINARY_DIR}/create_blit_shader_header.sh)

	# Add a custom target that depends on the header file
	add_custom_target(${HeaderFILE} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${HeaderFILE}.h)

endfunction(generate_bytecodeStrm)


# Build kernels for deviceodeCopyAligned
build_kernels_for_devices("kCodeCopyAligned;kCodeCopyMisaligned;kCodeFill" "blit_copyAligned.s;blit_copyMisaligned.s;blit_fill.s")

# Generate bytecode stream
generate_bytecodeStrm("amd_blit_shaders_v2")


================================================
FILE: runtime/hsa-runtime/core/runtime/blit_shaders/blit_copyAligned.s
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
///////////////////////////////////////////////////////////////////////////////////////

.text

.macro V_ADD_CO_U32 vdst, src0, vsrc1
  .if (.amdgcn.gfx_generation_number >= 10)
		 v_add_co_u32        \vdst, vcc_lo, \src0, \vsrc1
	.elseif (.amdgcn.gfx_generation_number >= 9)
		v_add_co_u32        \vdst, vcc, \src0, \vsrc1
	.else
		v_add_u32           \vdst, vcc, \src0, \vsrc1
	.endif
.endm


.macro V_ADD_CO_CI_U32 vdst, src0, vsrc1
	.if (.amdgcn.gfx_generation_number >= 10)
		v_add_co_ci_u32     \vdst, vcc_lo, \src0, \vsrc1, vcc_lo
	.elseif (.amdgcn.gfx_generation_number >= 9)
		v_addc_co_u32       \vdst, vcc, \src0, \vsrc1, vcc
	.else
		v_addc_u32          \vdst, vcc, \src0, \vsrc1, vcc
	.endif
.endm

.macro V_CMP_LT_U64 src0, vsrc1
	.if (.amdgcn.gfx_generation_number >= 10)
		v_cmp_lt_u64        vcc_lo, \src0, \vsrc1
	.else
		v_cmp_lt_u64        vcc, \src0, \vsrc1
	.endif
.endm

.p2align 8

CopyAligned:
.set kCopyAlignedVecWidth, 4
compute_pgm_rsrc2_user_sgpr = 2
compute_pgm_rsrc2_tgid_x_en = 1
enable_sgpr_kernarg_segment_ptr = 1

.set kCopyAlignedUnroll, 1
.set kCopyAlignedNumSGPRs, 32
.set kCopyAlignedNumVGPRs, (8 + (kCopyAlignedUnroll * kCopyAlignedVecWidth))
.set CopyAlignedRsrc1SGPRs, (kCopyAlignedNumSGPRs - 1)/8
.set CopyAlignedRsrc1VGPRs, (kCopyAlignedNumVGPRs - 1)/4

compute_pgm_rsrc1_sgprs = CopyAlignedRsrc1SGPRs
compute_pgm_rsrc1_vgprs = CopyAlignedRsrc1VGPRs


  s_load_dwordx4  s[4:7], s[0:1], 0x0
  s_load_dwordx4  s[8:11], s[0:1], 0x10
  s_load_dwordx4  s[12:15], s[0:1], 0x20
  s_load_dwordx4  s[16:19], s[0:1], 0x30
  s_load_dwordx4  s[20:23], s[0:1], 0x40
  s_load_dword    s24, s[0:1], 0x50
  s_waitcnt                lgkmcnt(0)

  .if (.amdgcn.gfx_generation_number == 12)
    s_lshl_b32              s2, ttmp9, 0x6
  .else
    s_lshl_b32              s2, s2, 0x6
  .endif

    V_ADD_CO_U32            v0, s2, v0

    v_mov_b32               v3, s5
    V_ADD_CO_U32            v2, v0, s4
    V_ADD_CO_CI_U32         v3, v3, 0x0


    v_mov_b32               v5, s7
    V_ADD_CO_U32            v4, v0, s6
    V_ADD_CO_CI_U32         v5, v5, 0x0

  L_COPY_ALIGNED_PHASE_1_LOOP:

    V_CMP_LT_U64            v[2:3], s[8:9]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_1_DONE
    s_and_b64               exec, exec, vcc


    FLAT_LOAD_UBYTE         v1, v[2:3]
    s_waitcnt               vmcnt(0)
    V_ADD_CO_U32            v2, v2, s24
    V_ADD_CO_CI_U32         v3, v3, 0x0


    FLAT_STORE_BYTE         v[4:5], v1
    V_ADD_CO_U32            v4, v4, s24
    V_ADD_CO_CI_U32         v5, v5, 0x0

    s_branch                L_COPY_ALIGNED_PHASE_1_LOOP

  L_COPY_ALIGNED_PHASE_1_DONE:

    s_mov_b64               exec, 0xFFFFFFFFFFFFFFFF

.if kCopyAlignedVecWidth == 4
      s_lshl_b32            s25, s24, 0x4
  .else
      s_lshl_b32            s25, s24, 0x2
  .endif

  .if kCopyAlignedVecWidth == 4
    v_lshlrev_b32          v1, 0x4, v0
  .else
    v_lshlrev_b32          v1, 0x2, v0
  .endif


    v_mov_b32               v3, s9
    V_ADD_CO_U32            v2, v1, s8
    V_ADD_CO_CI_U32         v3, v3, 0x0

    v_mov_b32               v5, s11
    V_ADD_CO_U32            v4, v1, s10
    V_ADD_CO_CI_U32         v5, v5, 0x0

  L_COPY_ALIGNED_PHASE_2_LOOP:

    V_CMP_LT_U64            v[2:3], s[12:13]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_2_DONE

.macro mCopyAlignedPhase2Load iter iter_end
    .if kCopyAlignedVecWidth == 4
      flat_load_dwordx4    v[8 + (\iter * 4):8 + (\iter * 4) + 3], v[2:3]
    .else
      flat_load_dword      v[8 + \iter], v[2:3]
    .endif

    V_ADD_CO_U32           v2, v2, s25
    V_ADD_CO_CI_U32        v3, v3, 0x0

    .if (\iter_end - \iter)
      mCopyAlignedPhase2Load (\iter + 1), \iter_end
    .endif
.endm

mCopyAlignedPhase2Load 0, (kCopyAlignedUnroll - 1)

  s_waitcnt                vmcnt(0)

.macro mCopyAlignedPhase2Store iter iter_end
    .if kCopyAlignedVecWidth == 4
      flat_store_dwordX4   v[4:5], v[8 + (\iter * 4):8 + (\iter * 4) + 3]
    .else
      flat_store_dword     v[4:5], v[8 + \iter]
    .endif

	V_ADD_CO_U32         v4, v4, s25
	V_ADD_CO_CI_U32      v5, v5, 0x0


    .if (\iter_end - \iter)
      mCopyAlignedPhase2Store (\iter + 1), \iter_end
    .endif
.endm

mCopyAlignedPhase2Store 0, (kCopyAlignedUnroll - 1)

  s_branch                L_COPY_ALIGNED_PHASE_2_LOOP

  L_COPY_ALIGNED_PHASE_2_DONE:

    s_lshl_b32              s25, s24, 0x2

    v_lshlrev_b32           v1, 0x2, v0
    v_mov_b32               v3, s13
    V_ADD_CO_U32            v2, v1, s12
    V_ADD_CO_CI_U32         v3, v3, 0x0

    v_mov_b32               v5, s15
    V_ADD_CO_U32            v4, v1, s14
    V_ADD_CO_CI_U32         v5, v5, 0x0

  L_COPY_ALIGNED_PHASE_3_LOOP:

    V_CMP_LT_U64            v[2:3], s[16:17]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_3_DONE
    s_and_b64               exec, exec, vcc


    FLAT_LOAD_DWORD         v1, v[2:3]
    V_ADD_CO_U32            v2, v2, s25
    V_ADD_CO_CI_U32         v3, v3, 0x0
    s_waitcnt               vmcnt(0)


    flat_store_dword        v[4:5], v1
    V_ADD_CO_U32            v4, v4, s25
    V_ADD_CO_CI_U32         v5, v5, 0x0

    s_branch                L_COPY_ALIGNED_PHASE_3_LOOP

  L_COPY_ALIGNED_PHASE_3_DONE:

    s_mov_b64               exec, 0xFFFFFFFFFFFFFFFF

    v_mov_b32               v3, s17
    V_ADD_CO_U32            v2, v0, s16
    V_ADD_CO_CI_U32         v3, v3, 0x0

    v_mov_b32               v5, s19
    V_ADD_CO_U32            v4, v0, s18
    V_ADD_CO_CI_U32         v5, v5, 0x0

    V_CMP_LT_U64            v[2:3], s[20:21]
    s_cbranch_vccz          L_COPY_ALIGNED_PHASE_4_DONE
    s_and_b64               exec, exec, vcc

    FLAT_LOAD_UBYTE         v1, v[2:3]
    s_waitcnt               vmcnt(0)

    FLAT_STORE_BYTE         v[4:5], v1

  L_COPY_ALIGNED_PHASE_4_DONE:
    s_endpgm


================================================
FILE: runtime/hsa-runtime/core/runtime/blit_shaders/blit_copyMisaligned.s
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//   	AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//     www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////////

.text

.macro V_ADD_CO_U32 vdst, src0, vsrc1
  .if (.amdgcn.gfx_generation_number >= 10)
		 v_add_co_u32        \vdst, vcc_lo, \src0, \vsrc1
	.elseif (.amdgcn.gfx_generation_number >= 9)
		v_add_co_u32        \vdst, vcc, \src0, \vsrc1
	.else
		v_add_u32           \vdst, vcc, \src0, \vsrc1
	.endif
.endm


.macro V_ADD_CO_CI_U32 vdst, src0, vsrc1
	.if (.amdgcn.gfx_generation_number >= 10)
		v_add_co_ci_u32     \vdst, vcc_lo, \src0, \vsrc1, vcc_lo
	.elseif (.amdgcn.gfx_generation_number >= 9)
		v_addc_co_u32       \vdst, vcc, \src0, \vsrc1, vcc
	.else
		v_addc_u32          \vdst, vcc, \src0, \vsrc1, vcc
	.endif
.endm

.macro V_CMP_LT_U64 src0, vsrc1
	.if (.amdgcn.gfx_generation_number >= 10)
		v_cmp_lt_u64        vcc_lo, \src0, \vsrc1
	.else
		v_cmp_lt_u64        vcc, \src0, \vsrc1
	.endif
.endm

.set kCopyMisalignedUnroll, 4
.set kCopyMisalignedNumSGPRs, 17
.set kCopyMisalignedNumVGPRs, 6 + kCopyMisalignedUnroll
.set CopyMisalignedRsrc1SGPRs , (kCopyMisalignedNumSGPRs - 1) / 8

.if CopyMisalignedRsrc1SGPRs  < 0
    .set CopyMisalignedRsrc1SGPRs , 0
.endif

.set CopyMisalignedRsrc1VGPRs , (kCopyMisalignedNumVGPRs - 1) / 4
.if CopyMisalignedRsrc1VGPRs  < 0
    .set CopyMisalignedRsrc1VGPRs , 0
.endif

.p2align 8

CopyMisaligned:
  compute_pgm_rsrc1_sgprs = CopyMisalignedRsrc1SGPRs
  compute_pgm_rsrc1_vgprs = CopyMisalignedRsrc1VGPRs
  compute_pgm_rsrc2_user_sgpr = 2
  compute_pgm_rsrc2_tgid_x_en = 1
  enable_sgpr_kernarg_segment_ptr = 1

  s_load_dwordx4  s[4:7], s[0:1], 0x0
  s_load_dwordx4  s[8:11], s[0:1], 0x10
  s_load_dwordx4  s[12:15], s[0:1], 0x20
  s_load_dword    s16, s[0:1], 0x30
  s_waitcnt             lgkmcnt(0)

  .if (.amdgcn.gfx_generation_number == 12)
    s_lshl_b32          s2, ttmp9, 0x6
  .else
    s_lshl_b32          s2, s2, 0x6
  .endif

  V_ADD_CO_U32          v0, s2, v0

  v_mov_b32             v3, s5
  V_ADD_CO_U32          v2, v0, s4
  V_ADD_CO_CI_U32       v3, v3, 0x0

  v_mov_b32              v5, s7
  V_ADD_CO_U32           v4, v0, s6
  V_ADD_CO_CI_U32        v5, v5, 0x0

  L_COPY_MISALIGNED_PHASE_1_LOOP:

  V_CMP_LT_U64          v[2:3], s[8:9]
  s_cbranch_vccz        L_COPY_MISALIGNED_PHASE_1_DONE


  .macro mCopyMisalignedPhase1Load iter iter_end
    flat_load_ubyte     v[6 + \iter], v[2:3]
    V_ADD_CO_U32        v2, v2, s16
    V_ADD_CO_CI_U32     v3, v3, 0x0

    .if (\iter_end - \iter)
      mCopyMisalignedPhase1Load (\iter + 1), \iter_end
    .endif
  .endm

  mCopyMisalignedPhase1Load 0, (kCopyMisalignedUnroll - 1)

  s_waitcnt                vmcnt(0)

  .macro mCopyMisalignedPhase1Store iter iter_end
    flat_store_byte        v[4:5], v[6 + \iter]
    V_ADD_CO_U32           v4, v4, s16
    V_ADD_CO_CI_U32        v5, v5, 0x0

    .if (\iter_end - \iter)
      mCopyMisalignedPhase1Store (\iter + 1), \iter_end
    .endif
  .endm

    mCopyMisalignedPhase1Store 0, (kCopyMisalignedUnroll - 1)

    s_branch                L_COPY_MISALIGNED_PHASE_1_LOOP

  L_COPY_MISALIGNED_PHASE_1_DONE:

    v_mov_b32               v3, s9
    V_ADD_CO_U32            v2, v0, s8
    V_ADD_CO_CI_U32         v3, v3, 0x0

    v_mov_b32               v5, s11
    V_ADD_CO_U32            v4, v0, s10
    V_ADD_CO_CI_U32         v5, v5, 0x0

  L_COPY_MISALIGNED_PHASE_2_LOOP:

    V_CMP_LT_U64            v[2:3], s[12:13]
    s_cbranch_vccz          L_COPY_MISALIGNED_PHASE_2_DONE
    s_and_b64               exec, exec, vcc


    flat_load_ubyte         v1, v[2:3]
    V_ADD_CO_U32            v2, v2, s16
    V_ADD_CO_CI_U32         v3, v3, 0x0
    s_waitcnt               vmcnt(0)

    flat_store_byte         v[4:5], v1
    V_ADD_CO_U32            v4, v4, s16
    V_ADD_CO_CI_U32         v5, v5, 0x0

    s_branch                L_COPY_MISALIGNED_PHASE_2_LOOP

  L_COPY_MISALIGNED_PHASE_2_DONE:
    s_endpgm


================================================
FILE: runtime/hsa-runtime/core/runtime/blit_shaders/blit_fill.s
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////

.text

.macro V_ADD_CO_U32 vdst, src0, vsrc1
  .if (.amdgcn.gfx_generation_number >= 10)
		 v_add_co_u32        \vdst, vcc_lo, \src0, \vsrc1
	.elseif (.amdgcn.gfx_generation_number >= 9)
		v_add_co_u32        \vdst, vcc, \src0, \vsrc1
	.else
		v_add_u32           \vdst, vcc, \src0, \vsrc1
	.endif
.endm


.macro V_ADD_CO_CI_U32 vdst, src0, vsrc1
	.if (.amdgcn.gfx_generation_number >= 10)
		v_add_co_ci_u32     \vdst, vcc_lo, \src0, \vsrc1, vcc_lo
	.elseif (.amdgcn.gfx_generation_number >= 9)
		v_addc_co_u32       \vdst, vcc, \src0, \vsrc1, vcc
	.else
		v_addc_u32          \vdst, vcc, \src0, \vsrc1, vcc
	.endif
.endm

.macro V_CMP_LT_U64 src0, vsrc1
	.if (.amdgcn.gfx_generation_number >= 10)
		v_cmp_lt_u64        vcc_lo, \src0, \vsrc1
	.else
		v_cmp_lt_u64        vcc, \src0, \vsrc1
	.endif
.endm

.set kFillVecWidth, 4
.set kFillUnroll, 1

.set kFillNumSGPRs, 13
.set kFillNumVGPRs, 4 + kFillUnroll

.set FillRsrc1SGPRs , (kFillNumSGPRs - 1) / 8
  .if FillRsrc1SGPRs  < 0
    .set FillRsrc1SGPRs , 0
  .endif

.set FillRsrc1VGPRs , (kFillNumVGPRs - 1) / 4
  .if FillRsrc1VGPRs  < 0
    .set FillRsrc1VGPRs , 0
  .endif

.p2align 8

Fill:

    compute_pgm_rsrc1_sgprs = FillRsrc1SGPRs
    compute_pgm_rsrc1_vgprs = FillRsrc1VGPRs
    compute_pgm_rsrc2_user_sgpr = 2
    compute_pgm_rsrc2_tgid_x_en = 1
    enable_sgpr_kernarg_segment_ptr = 1

    s_load_dwordx4  s[4:7], s[0:1], 0x0
    s_load_dwordx4  s[8:11], s[0:1], 0x10
    s_waitcnt       lgkmcnt(0)

   .if (.amdgcn.gfx_generation_number == 12)
     s_lshl_b32      s2, ttmp9, 0x6
   .else
     s_lshl_b32      s2, s2, 0x6
   .endif

    V_ADD_CO_U32     v0, s2, v0

.macro mFillPattern iter iter_end
    v_mov_b32              v[4 + \iter], s10

    .if (\iter_end - \iter)
      mFillPattern (\iter + 1), \iter_end
    .endif
  .endm

  mFillPattern 0, (kFillVecWidth - 1)

  .if kFillVecWidth == 4
      s_lshl_b32            s12, s11, 0x4
  .else
      s_lshl_b32            s12, s11, 0x2
  .endif


  .if kFillVecWidth == 4
    v_lshlrev_b32          v1, 0x4, v0
  .else
    v_lshlrev_b32          v1, 0x2, v0
  .endif

   v_mov_b32               v3, s5
   V_ADD_CO_U32            v2, v1, s4
   V_ADD_CO_CI_U32         v3, v3, 0x0

  L_FILL_PHASE_1_LOOP:

    V_CMP_LT_U64            v[2:3], s[6:7]
    s_cbranch_vccz          L_FILL_PHASE_1_DONE

.macro mFillPhase1 iter iter_end
    .if kFillVecWidth == 4
      flat_store_dwordx4   v[2:3], v[4:7]
    .else
      flat_store_dword     v[2:3], v4
    .endif

     V_ADD_CO_U32          v2, v2, s12
     V_ADD_CO_CI_U32       v3, v3, 0x0

    .if \iter < \iter_end
      mFillPhase1 (\iter + 1), \iter_end
    .endif
.endm

mFillPhase1 0, kFillUnroll - 1

  s_branch                L_FILL_PHASE_1_LOOP

  L_FILL_PHASE_1_DONE:

    s_lshl_b32              s12, s11, 0x2

    v_lshlrev_b32           v1, 0x2, v0
    v_mov_b32               v3, s7
    V_ADD_CO_U32            v2, v1, s6
    V_ADD_CO_CI_U32         v3, v3, 0x0

  L_FILL_PHASE_2_LOOP:

    V_CMP_LT_U64            v[2:3], s[8:9]
    s_cbranch_vccz          L_FILL_PHASE_2_DONE
    s_and_b64               exec, exec, vcc


    flat_store_dword        v[2:3], v4
    V_ADD_CO_U32            v2, v2, s12
    V_ADD_CO_CI_U32         v3, v3, 0x0

    s_branch                L_FILL_PHASE_2_LOOP

  L_FILL_PHASE_2_DONE:
    s_endpgm


================================================
FILE: runtime/hsa-runtime/core/runtime/blit_shaders/create_blit_shader_header.sh
================================================
#!/bin/bash -e
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

amd_gpu_shaders="$1"

if ! command -v xxd >/dev/null
then
    echo "xxd not found!"
    exit 1
fi

# Create the file in a temporary location and then move it in atomically
{
cat <<EOF
//==============================================================================
//  This file is automatically generated during build process, don't modify it
//==============================================================================

namespace rocr {
namespace AMD {

EOF

shift
for file in "$@"
do
xxd -i $file
    echo -e '\n'
done

cat <<EOF
} // namespace AMD
} // namespace rocr

EOF

} > "$amd_gpu_shaders"


================================================
FILE: runtime/hsa-runtime/core/runtime/cache.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/cache.h"
#include "assert.h"

namespace rocr {
namespace core {

hsa_status_t Cache::GetInfo(hsa_cache_info_t attribute, void* value) {
  switch (attribute) {
    case HSA_CACHE_INFO_NAME_LENGTH:
      *(uint32_t*)value = name_.size();
      break;
    case HSA_CACHE_INFO_NAME:
      *(const char**)value = name_.c_str();
      break;
    case HSA_CACHE_INFO_LEVEL:
      *(uint8_t*)value = level_;
      break;
    case HSA_CACHE_INFO_SIZE:
      *(uint32_t*)value = size_;
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
}
}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/default_signal.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/default_signal.h"

#if defined(__i386__) || defined(__x86_64__)
#include <mwaitxintrin.h>
#define MWAITX_ECX_TIMER_ENABLE 0x2  // BIT(1)
#endif

namespace rocr {
namespace core {

BusyWaitSignal::BusyWaitSignal(SharedSignal* abi_block, bool enableIPC)
    : Signal(abi_block, enableIPC) {
  signal_.kind = AMD_SIGNAL_KIND_USER;
  signal_.event_mailbox_ptr = uint64_t(NULL);
}

hsa_signal_value_t BusyWaitSignal::LoadRelaxed() {
  return hsa_signal_value_t(
      atomic::Load(&signal_.value, std::memory_order_relaxed));
}

hsa_signal_value_t BusyWaitSignal::LoadAcquire() {
  return hsa_signal_value_t(
      atomic::Load(&signal_.value, std::memory_order_acquire));
}

void BusyWaitSignal::StoreRelaxed(hsa_signal_value_t value) {
  atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed);
}

void BusyWaitSignal::StoreRelease(hsa_signal_value_t value) {
  atomic::Store(&signal_.value, int64_t(value), std::memory_order_release);
}

hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
                                               hsa_signal_value_t compare_value, uint64_t timeout,
                                               hsa_wait_state_t wait_hint) {
  Retain();
  MAKE_SCOPE_GUARD([&]() { Release(); });

  waiting_++;
  MAKE_SCOPE_GUARD([&]() { waiting_--; });

  const uint32_t &signal_abort_timeout =
    core::Runtime::runtime_singleton_->flag().signal_abort_timeout();

  const timer::fast_clock::time_point start_time = timer::fast_clock::now();
  const timer::fast_clock::duration fast_timeout = timer::GetFastTimeout(timeout);

  while (true) {
    if (!IsValid()) return 0;

    int64_t value = atomic::Load(&signal_.value, std::memory_order_relaxed);

    if (CheckSignalCondition(value, condition, compare_value)) {
      return value;
    }

    if (timer::fast_clock::now() - start_time > fast_timeout) {
      return value;
    }

    timer::CheckAbortTimeout(start_time, signal_abort_timeout);

    if (g_use_mwaitx) {
      // Use timer-enabled mwaitx for busy waiting
      timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 60000, true);
    }
  }
}

hsa_signal_value_t BusyWaitSignal::WaitAcquire(hsa_signal_condition_t condition,
                                               hsa_signal_value_t compare_value, uint64_t timeout,
                                               hsa_wait_state_t wait_hint) {
  hsa_signal_value_t ret =
      WaitRelaxed(condition, compare_value, timeout, wait_hint);
  std::atomic_thread_fence(std::memory_order_acquire);
  return ret;
}

void BusyWaitSignal::AndRelaxed(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_relaxed);
}

void BusyWaitSignal::AndAcquire(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_acquire);
}

void BusyWaitSignal::AndRelease(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_release);
}

void BusyWaitSignal::AndAcqRel(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_acq_rel);
}

void BusyWaitSignal::OrRelaxed(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_relaxed);
}

void BusyWaitSignal::OrAcquire(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_acquire);
}

void BusyWaitSignal::OrRelease(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_release);
}

void BusyWaitSignal::OrAcqRel(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_acq_rel);
}

void BusyWaitSignal::XorRelaxed(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_relaxed);
}

void BusyWaitSignal::XorAcquire(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acquire);
}

void BusyWaitSignal::XorRelease(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_release);
}

void BusyWaitSignal::XorAcqRel(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acq_rel);
}

void BusyWaitSignal::AddRelaxed(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_relaxed);
}

void BusyWaitSignal::AddAcquire(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_acquire);
}

void BusyWaitSignal::AddRelease(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_release);
}

void BusyWaitSignal::AddAcqRel(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_acq_rel);
}

void BusyWaitSignal::SubRelaxed(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_relaxed);
}

void BusyWaitSignal::SubAcquire(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acquire);
}

void BusyWaitSignal::SubRelease(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_release);
}

void BusyWaitSignal::SubAcqRel(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acq_rel);
}

hsa_signal_value_t BusyWaitSignal::ExchRelaxed(hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
                                             std::memory_order_relaxed));
}

hsa_signal_value_t BusyWaitSignal::ExchAcquire(hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
                                             std::memory_order_acquire));
}

hsa_signal_value_t BusyWaitSignal::ExchRelease(hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
                                             std::memory_order_release));
}

hsa_signal_value_t BusyWaitSignal::ExchAcqRel(hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Exchange(&signal_.value, int64_t(value),
                                             std::memory_order_acq_rel));
}

hsa_signal_value_t BusyWaitSignal::CasRelaxed(hsa_signal_value_t expected,
                                              hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
                                        int64_t(expected),
                                        std::memory_order_relaxed));
}

hsa_signal_value_t BusyWaitSignal::CasAcquire(hsa_signal_value_t expected,
                                              hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
                                        int64_t(expected),
                                        std::memory_order_acquire));
}

hsa_signal_value_t BusyWaitSignal::CasRelease(hsa_signal_value_t expected,
                                              hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
                                        int64_t(expected),
                                        std::memory_order_release));
}

hsa_signal_value_t BusyWaitSignal::CasAcqRel(hsa_signal_value_t expected,
                                             hsa_signal_value_t value) {
  return hsa_signal_value_t(atomic::Cas(&signal_.value, int64_t(value),
                                        int64_t(expected),
                                        std::memory_order_acq_rel));
}

}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/host_queue.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/host_queue.h"

#include "core/inc/runtime.h"
#include "core/util/utils.h"

namespace rocr {
namespace core {

HostQueue::HostQueue(core::SharedQueue* shared_queue, hsa_region_t region, uint32_t ring_size,
                     hsa_queue_type32_t type, uint32_t features, hsa_signal_t doorbell_signal)
    : Queue(shared_queue, 0), size_(ring_size) {
  HSA::hsa_memory_register(this, sizeof(HostQueue));
  MAKE_NAMED_SCOPE_GUARD(registerGuard,
                         [&]() { HSA::hsa_memory_deregister(this, sizeof(HostQueue)); });

  const size_t queue_buffer_size = size_ * sizeof(AqlPacket);
  if (HSA_STATUS_SUCCESS !=
      HSA::hsa_memory_allocate(region, queue_buffer_size, &ring_)) {
    throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Host queue buffer alloc failed\n");
  }
  MAKE_NAMED_SCOPE_GUARD(bufferGuard, [&]() { HSA::hsa_memory_free(&ring_); });

  assert(IsMultipleOf(ring_, kRingAlignment));
  assert(ring_ != NULL);

  // Fill the ring buffer with invalid packet headers.
  // Leave packet content uninitialized to help track errors.
  for (uint32_t pkt_id = 0; pkt_id < size_; pkt_id++) {
    (((AqlPacket*)ring_)[pkt_id]).dispatch.header = HSA_PACKET_TYPE_INVALID;
  }

  amd_queue_.hsa_queue.base_address = ring_;
  amd_queue_.hsa_queue.size = size_;
  amd_queue_.hsa_queue.doorbell_signal = doorbell_signal;
  amd_queue_.hsa_queue.id = this->GetQueueId();
  amd_queue_.hsa_queue.type = type;
  amd_queue_.hsa_queue.features = features;
#ifdef HSA_LARGE_MODEL
  AMD_HSA_BITS_SET(
      amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 1);
#else
  AMD_HSA_BITS_SET(
      amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 0);
#endif
  amd_queue_.write_dispatch_id = amd_queue_.read_dispatch_id = 0;
  AMD_HSA_BITS_SET(
      amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 0);

  bufferGuard.Dismiss();
  registerGuard.Dismiss();
}

HostQueue::~HostQueue() {
  HSA::hsa_memory_free(shared_queue_);
  HSA::hsa_memory_free(ring_);
  HSA::hsa_memory_deregister(this, sizeof(HostQueue));
}

}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/hsa.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA C to C++ interface implementation.
// This file does argument checking and conversion to C++.
#include <cstdio>
#include <cstring>
#include <string>
#include <sys/types.h>

#include "core/inc/runtime.h"
#include "core/inc/agent.h"
#include "core/inc/host_queue.h"
#include "core/inc/isa.h"
#include "core/inc/memory_region.h"
#include "core/inc/queue.h"
#include "core/inc/signal.h"
#include "core/inc/cache.h"
#include "core/inc/amd_elf_image.hpp"
#include "core/inc/amd_hsa_loader.hpp"
#include "core/inc/amd_loader_context.hpp"
#include "core/inc/hsa_ven_amd_loader_impl.h"
#include "inc/hsa_ven_amd_aqlprofile.h"
#include "core/inc/hsa_ext_amd_impl.h"

namespace rocr {

using namespace amd::hsa;

template <class T>
struct ValidityError;
template <> struct ValidityError<core::Signal*> {
  enum { kValue = HSA_STATUS_ERROR_INVALID_SIGNAL };
};
template <> struct ValidityError<core::SignalGroup*> {
  enum { kValue = HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP };
};
template <> struct ValidityError<core::Agent*> {
  enum { kValue = HSA_STATUS_ERROR_INVALID_AGENT };
};
template <> struct ValidityError<core::MemoryRegion*> {
  enum { kValue = HSA_STATUS_ERROR_INVALID_REGION };
};
template <> struct ValidityError<core::Queue*> {
  enum { kValue = HSA_STATUS_ERROR_INVALID_QUEUE };
};
template <> struct ValidityError<core::Cache*> {
  enum { kValue = HSA_STATUS_ERROR_INVALID_CACHE };
};
template <> struct ValidityError<core::Isa*> {
  enum { kValue = HSA_STATUS_ERROR_INVALID_ISA };
};
template <class T> struct ValidityError<const T*> {
  enum { kValue = ValidityError<T*>::kValue };
};

#define IS_BAD_PTR(ptr)                                                        \
  do {                                                                         \
    if ((ptr) == nullptr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;            \
  } while (false)
#define IS_BAD_PROFILE(profile)                                                \
  do {                                                                         \
    if (profile != HSA_PROFILE_BASE &&                                         \
        profile != HSA_PROFILE_FULL) {                                         \
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                \
    }                                                                          \
  } while (false)
#define IS_BAD_EXECUTABLE_STATE(executable_state)                              \
  do {                                                                         \
    if (executable_state != HSA_EXECUTABLE_STATE_FROZEN &&                     \
        executable_state != HSA_EXECUTABLE_STATE_UNFROZEN) {                   \
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                \
    }                                                                          \
  } while (false)
#define IS_BAD_ROUNDING_MODE(rounding_mode)                                    \
  do {                                                                         \
    if (rounding_mode != HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT &&            \
        rounding_mode != HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO &&               \
        rounding_mode != HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR) {               \
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                \
    }                                                                          \
  } while (false)
#define IS_BAD_FP_TYPE(fp_type)                                                \
  do {                                                                         \
    if (fp_type != HSA_FP_TYPE_16 &&                                           \
        fp_type != HSA_FP_TYPE_32 &&                                           \
        fp_type != HSA_FP_TYPE_64) {                                           \
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                \
    }                                                                          \
  } while (false)
#define IS_BAD_FLUSH_MODE(flush_mode)                                          \
  do {                                                                         \
    if (flush_mode != HSA_FLUSH_MODE_FTZ &&                                    \
        flush_mode != HSA_FLUSH_MODE_NON_FTZ) {                                \
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                \
    }                                                                          \
  } while (false)
#define IS_VALID(ptr)                                                          \
  do {                                                                         \
    if (((ptr) == NULL) || !((ptr)->IsValid()))                                \
      return hsa_status_t(ValidityError<decltype(ptr)>::kValue);               \
  } while (false)
#define CHECK_STATUS(status)                                                   \
  do {                                                                         \
    if ((status) != HSA_STATUS_SUCCESS) return status;                         \
  } while (false)
#define CHECK_ALLOC(ptr)                                                       \
  do {                                                                         \
    if ((ptr) == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;            \
  } while (false)
#define IS_OPEN()                                                              \
  do {                                                                         \
    if (!core::Runtime::runtime_singleton_->IsOpen())                          \
      return HSA_STATUS_ERROR_NOT_INITIALIZED;                                 \
  } while (false)

template <class T>
static __forceinline bool IsValid(T* ptr) {
  return (ptr == NULL) ? NULL : ptr->IsValid();
}

namespace AMD {
hsa_status_t handleException();

template <class T> static __forceinline T handleExceptionT() {
  handleException();
  abort();
  return T();
}
}   // namespace amd

#define TRY try {
#define CATCH } catch(...) { return AMD::handleException(); }
#define CATCHRET(RETURN_TYPE) } catch(...) { return AMD::handleExceptionT<RETURN_TYPE>(); }

//-----------------------------------------------------------------------------
// Basic Checks
//-----------------------------------------------------------------------------
static_assert(sizeof(hsa_barrier_and_packet_t) ==
                  sizeof(hsa_kernel_dispatch_packet_t),
              "AQL packet definitions have wrong sizes!");
static_assert(sizeof(hsa_barrier_and_packet_t) ==
                  sizeof(hsa_agent_dispatch_packet_t),
              "AQL packet definitions have wrong sizes!");
static_assert(sizeof(hsa_barrier_and_packet_t) == 64,
              "AQL packet definitions have wrong sizes!");
static_assert(sizeof(hsa_barrier_and_packet_t) ==
                  sizeof(hsa_barrier_or_packet_t),
              "AQL packet definitions have wrong sizes!");
#ifdef HSA_LARGE_MODEL
static_assert(sizeof(void*) == 8, "HSA_LARGE_MODEL is set incorrectly!");
#else
static_assert(sizeof(void*) == 4, "HSA_LARGE_MODEL is set incorrectly!");
#endif

#if !defined(HSA_LARGE_MODEL) || !defined(__linux__)
// static_assert(false, "Only HSA_LARGE_MODEL (64bit mode) and Linux supported.");
#endif

namespace HSA {

//---------------------------------------------------------------------------//
//  Init/Shutdown routines
//---------------------------------------------------------------------------//
hsa_status_t hsa_init() {
  TRY;
  return core::Runtime::runtime_singleton_->Acquire();
  CATCH;
}

hsa_status_t hsa_shut_down() {
  TRY;
  IS_OPEN();
  return core::Runtime::runtime_singleton_->Release();
  CATCH;
}

//---------------------------------------------------------------------------//
//  System
//---------------------------------------------------------------------------//
hsa_status_t
    hsa_system_get_info(hsa_system_info_t attribute, void* value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);
  return core::Runtime::runtime_singleton_->GetSystemInfo(attribute, value);
  CATCH;
}

hsa_status_t hsa_extension_get_name(uint16_t extension, const char** name) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(name);
  switch (extension) {
    case HSA_EXTENSION_FINALIZER:
      *name = "HSA_EXTENSION_FINALIZER";
      break;
    case HSA_EXTENSION_IMAGES:
      *name = "HSA_EXTENSION_IMAGES";
      break;
    case HSA_EXTENSION_PERFORMANCE_COUNTERS:
      *name = "HSA_EXTENSION_PERFORMANCE_COUNTERS";
      break;
    case HSA_EXTENSION_PROFILING_EVENTS:
      *name = "HSA_EXTENSION_PROFILING_EVENTS";
      break;
    case HSA_EXTENSION_AMD_PROFILER:
      *name = "HSA_EXTENSION_AMD_PROFILER";
      break;
    case HSA_EXTENSION_AMD_LOADER:
      *name = "HSA_EXTENSION_AMD_LOADER";
      break;
    case HSA_EXTENSION_AMD_AQLPROFILE:
      *name = "HSA_EXTENSION_AMD_AQLPROFILE";
      break;
    default:
      *name = "HSA_EXTENSION_INVALID";
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t
    hsa_system_extension_supported(uint16_t extension, uint16_t version_major,
                                   uint16_t version_minor, bool* result) {
  TRY;
  IS_OPEN();

  if ((extension > HSA_EXTENSION_STD_LAST &&
       (extension < HSA_AMD_FIRST_EXTENSION || extension > HSA_AMD_LAST_EXTENSION)) ||
      result == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  *result = false;

  if (extension == HSA_EXTENSION_PERFORMANCE_COUNTERS ||
      extension == HSA_EXTENSION_PROFILING_EVENTS)
    return HSA_STATUS_SUCCESS;

  uint16_t system_version_major = 0;
  hsa_status_t status = core::Runtime::runtime_singleton_->GetSystemInfo(
      HSA_SYSTEM_INFO_VERSION_MAJOR, &system_version_major);
  assert(status == HSA_STATUS_SUCCESS);

  if (version_major <= system_version_major) {
    uint16_t system_version_minor = 0;
    if (version_minor <= system_version_minor) {
      *result = true;
    }
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_system_major_extension_supported(uint16_t extension, uint16_t version_major,
                                                  uint16_t* version_minor, bool* result) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(version_minor);
  IS_BAD_PTR(result);

  if ((extension == HSA_EXTENSION_IMAGES) && (version_major == 1)) {
    *version_minor = 0;
    *result = true;
    return HSA_STATUS_SUCCESS;
  }

  if ((extension == HSA_EXTENSION_FINALIZER) && (version_major == 1)) {
    *version_minor = 0;
    *result = true;
    return HSA_STATUS_SUCCESS;
  }

  if ((extension == HSA_EXTENSION_AMD_LOADER) && (version_major == 1)) {
    *version_minor = 0;
    *result = true;
    return HSA_STATUS_SUCCESS;
  }

  if ((extension == HSA_EXTENSION_AMD_AQLPROFILE) && (version_major == 1)) {
    *version_minor = 0;
    *result = true;
    return HSA_STATUS_SUCCESS;
  }

  *result = false;
  return HSA_STATUS_SUCCESS;
  CATCH;
}

static size_t get_extension_table_length(uint16_t extension, uint16_t major, uint16_t minor) {
  // Table to convert from major/minor to major/length
  struct sizes_t {
    std::string name;
    size_t size;
  };
  static sizes_t sizes[] = {
      {"hsa_ext_images_1_00_pfn_t", sizeof(hsa_ext_images_1_00_pfn_t)},
      {"hsa_ext_finalizer_1_00_pfn_t", sizeof(hsa_ext_finalizer_1_00_pfn_t)},
      {"hsa_ven_amd_loader_1_00_pfn_t", sizeof(hsa_ven_amd_loader_1_00_pfn_t)},
      {"hsa_ven_amd_loader_1_01_pfn_t", sizeof(hsa_ven_amd_loader_1_01_pfn_t)},
      {"hsa_ven_amd_loader_1_02_pfn_t", sizeof(hsa_ven_amd_loader_1_02_pfn_t)},
      {"hsa_ven_amd_loader_1_03_pfn_t", sizeof(hsa_ven_amd_loader_1_03_pfn_t)},
      {"hsa_ven_amd_aqlprofile_1_00_pfn_t", sizeof(hsa_ven_amd_aqlprofile_1_00_pfn_t)},
      {"hsa_ven_amd_pc_sampling_1_00_pfn_t", sizeof(hsa_ven_amd_pc_sampling_1_00_pfn_t)}};
  static const size_t num_tables = sizeof(sizes) / sizeof(sizes_t);

  if (minor > 99) return 0;

  std::string name;

  switch (extension) {
    case HSA_EXTENSION_FINALIZER:
      name = "hsa_ext_finalizer_";
      break;
    case HSA_EXTENSION_IMAGES:
      name = "hsa_ext_images_";
      break;
    // case HSA_EXTENSION_PERFORMANCE_COUNTERS:
    //  name = "hsa_ext_perf_counter_";
    //  break;
    // case HSA_EXTENSION_PROFILING_EVENTS:
    //  name = "hsa_ext_profiling_event_";
    //  break;
    // case HSA_EXTENSION_AMD_PROFILER:
    //  name = "hsa_ven_amd_profiler_";
    //  break;
    case HSA_EXTENSION_AMD_LOADER:
      name = "hsa_ven_amd_loader_";
      break;
    case HSA_EXTENSION_AMD_AQLPROFILE:
      name = "hsa_ven_amd_aqlprofile_";
      break;
    case HSA_EXTENSION_AMD_PC_SAMPLING:
      name = "hsa_ven_amd_pc_sampling_";
      break;
    default:
      return 0;
  }

  char buff[6];
  sprintf(buff, "%02u", minor);
  name += std::to_string(major) + "_" + buff + "_pfn_t";

  for (size_t i = 0; i < num_tables; i++) {
    if (sizes[i].name == name) return sizes[i].size;
  }
  return 0;
}

hsa_status_t hsa_system_get_extension_table(uint16_t extension, uint16_t version_major,
                                            uint16_t version_minor, void* table) {
  TRY;
  return HSA::hsa_system_get_major_extension_table(
      extension, version_major, get_extension_table_length(extension, version_major, version_minor),
      table);
  CATCH;
}

hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t version_major,
                                                  size_t table_length, void* table) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(table);

  if (table_length == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  if (extension == HSA_EXTENSION_IMAGES) {
    if (version_major !=
        core::Runtime::runtime_singleton_->extensions_.image_api.version.major_id) {
      return HSA_STATUS_ERROR;
    }

    hsa_ext_images_1_pfn_t ext_table;
    ext_table.hsa_ext_image_clear = hsa_ext_image_clear;
    ext_table.hsa_ext_image_copy = hsa_ext_image_copy;
    ext_table.hsa_ext_image_create = hsa_ext_image_create;
    ext_table.hsa_ext_image_data_get_info = hsa_ext_image_data_get_info;
    ext_table.hsa_ext_image_destroy = hsa_ext_image_destroy;
    ext_table.hsa_ext_image_export = hsa_ext_image_export;
    ext_table.hsa_ext_image_get_capability = hsa_ext_image_get_capability;
    ext_table.hsa_ext_image_import = hsa_ext_image_import;
    ext_table.hsa_ext_sampler_create = hsa_ext_sampler_create;
    ext_table.hsa_ext_sampler_destroy = hsa_ext_sampler_destroy;
    ext_table.hsa_ext_image_get_capability_with_layout = hsa_ext_image_get_capability_with_layout;
    ext_table.hsa_ext_image_data_get_info_with_layout = hsa_ext_image_data_get_info_with_layout;
    ext_table.hsa_ext_image_create_with_layout = hsa_ext_image_create_with_layout;
    ext_table.hsa_ext_sampler_create_v2 = hsa_ext_sampler_create_v2;

    memcpy(table, &ext_table, Min(sizeof(ext_table), table_length));

    return HSA_STATUS_SUCCESS;
  }

  if (extension == HSA_EXTENSION_AMD_PC_SAMPLING) {
    if (version_major != core::Runtime::runtime_singleton_->extensions_.pcs_api.version.major_id) {
      return HSA_STATUS_ERROR;
    }
    hsa_ven_amd_pc_sampling_1_00_pfn_t ext_table;
    ext_table.hsa_ven_amd_pcs_create = hsa_ven_amd_pcs_create;
    ext_table.hsa_ven_amd_pcs_create_from_id = hsa_ven_amd_pcs_create_from_id;
    ext_table.hsa_ven_amd_pcs_destroy = hsa_ven_amd_pcs_destroy;
    ext_table.hsa_ven_amd_pcs_start = hsa_ven_amd_pcs_start;
    ext_table.hsa_ven_amd_pcs_stop = hsa_ven_amd_pcs_stop;
    ext_table.hsa_ven_amd_pcs_flush = hsa_ven_amd_pcs_flush;

    memcpy(table, &ext_table, Min(sizeof(ext_table), table_length));
  }

  if (extension == HSA_EXTENSION_FINALIZER) {
    if (version_major !=
        core::Runtime::runtime_singleton_->extensions_.finalizer_api.version.major_id) {
      return HSA_STATUS_ERROR;
    }

    hsa_ext_finalizer_1_00_pfn_t ext_table;
    ext_table.hsa_ext_program_add_module = hsa_ext_program_add_module;
    ext_table.hsa_ext_program_create = hsa_ext_program_create;
    ext_table.hsa_ext_program_destroy = hsa_ext_program_destroy;
    ext_table.hsa_ext_program_finalize = hsa_ext_program_finalize;
    ext_table.hsa_ext_program_get_info = hsa_ext_program_get_info;
    ext_table.hsa_ext_program_iterate_modules = hsa_ext_program_iterate_modules;

    memcpy(table, &ext_table, Min(sizeof(ext_table), table_length));

    return HSA_STATUS_SUCCESS;
  }

  if (extension == HSA_EXTENSION_AMD_LOADER) {
    if (version_major != 1) return HSA_STATUS_ERROR;
    hsa_ven_amd_loader_1_03_pfn_t ext_table;
    ext_table.hsa_ven_amd_loader_query_host_address =
        hsa_ven_amd_loader_query_host_address;
    ext_table.hsa_ven_amd_loader_query_segment_descriptors =
        hsa_ven_amd_loader_query_segment_descriptors;
    ext_table.hsa_ven_amd_loader_query_executable =
        hsa_ven_amd_loader_query_executable;
    ext_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects =
        hsa_ven_amd_loader_executable_iterate_loaded_code_objects;
    ext_table.hsa_ven_amd_loader_loaded_code_object_get_info =
        hsa_ven_amd_loader_loaded_code_object_get_info;
    ext_table.hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size =
        hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size;
    ext_table.hsa_ven_amd_loader_iterate_executables =
        hsa_ven_amd_loader_iterate_executables;

    memcpy(table, &ext_table, Min(sizeof(ext_table), table_length));

    return HSA_STATUS_SUCCESS;
  }

  if (extension == HSA_EXTENSION_AMD_AQLPROFILE) {
    if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) {
      debug_print("aqlprofile API incompatible ver %d, current ver %d\n",
        version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR);
      return HSA_STATUS_ERROR;
    }

    os::LibHandle lib = os::LoadLib(kAqlProfileLib);
    if (lib == NULL) {
      debug_print("Loading '%s' failed\n", kAqlProfileLib);
      return HSA_STATUS_ERROR;
    }

    hsa_ven_amd_aqlprofile_pfn_t ext_table;
    ext_table.hsa_ven_amd_aqlprofile_version_major =
      (decltype(::hsa_ven_amd_aqlprofile_version_major)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_version_major");
    ext_table.hsa_ven_amd_aqlprofile_version_minor =
      (decltype(::hsa_ven_amd_aqlprofile_version_minor)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_version_minor");
    ext_table.hsa_ven_amd_aqlprofile_error_string =
      (decltype(::hsa_ven_amd_aqlprofile_error_string)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_error_string");
    ext_table.hsa_ven_amd_aqlprofile_validate_event =
      (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_validate_event");
    ext_table.hsa_ven_amd_aqlprofile_start =
      (decltype(::hsa_ven_amd_aqlprofile_start)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_start");
    ext_table.hsa_ven_amd_aqlprofile_stop =
      (decltype(::hsa_ven_amd_aqlprofile_stop)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_stop");
    ext_table.hsa_ven_amd_aqlprofile_read =
      (decltype(::hsa_ven_amd_aqlprofile_read)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_read");
    ext_table.hsa_ven_amd_aqlprofile_legacy_get_pm4 =
      (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
    ext_table.hsa_ven_amd_aqlprofile_get_info =
      (decltype(::hsa_ven_amd_aqlprofile_get_info)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_get_info");
    ext_table.hsa_ven_amd_aqlprofile_iterate_data =
      (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_iterate_data");
    ext_table.hsa_ven_amd_aqlprofile_iterate_event_ids =
      (decltype(::hsa_ven_amd_aqlprofile_iterate_event_ids)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_iterate_event_ids");
    ext_table.hsa_ven_amd_aqlprofile_iterate_event_coord =
      (decltype(::hsa_ven_amd_aqlprofile_iterate_event_coord)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_iterate_event_coord");
    ext_table.hsa_ven_amd_aqlprofile_att_marker =
      (decltype(::hsa_ven_amd_aqlprofile_att_marker)*)
        os::GetExportAddress(lib, "hsa_ven_amd_aqlprofile_att_marker");

    bool version_incompatible = true;
    uint32_t version_curr = 0;
    version_major = HSA_AQLPROFILE_VERSION_MAJOR;
    if (ext_table.hsa_ven_amd_aqlprofile_version_major != NULL) {
      version_curr = ext_table.hsa_ven_amd_aqlprofile_version_major();
      version_incompatible = (version_major != version_curr);
    }
    if (version_incompatible == true) {
      debug_print("Loading '%s' failed, incompatible ver %d, current ver %d\n",
        kAqlProfileLib, version_major, version_curr);
      return HSA_STATUS_ERROR;
    }

    memcpy(table, &ext_table, Min(sizeof(ext_table), table_length));

    return HSA_STATUS_SUCCESS;
  }

  return HSA_STATUS_ERROR;
  CATCH;
}

//---------------------------------------------------------------------------//
//  Agent
//---------------------------------------------------------------------------//
hsa_status_t
    hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void* data),
                       void* data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);
  return core::Runtime::runtime_singleton_->IterateAgent(callback, data);
  CATCH;
}

hsa_status_t hsa_agent_get_info(hsa_agent_t agent_handle,
                                        hsa_agent_info_t attribute,
                                        void* value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);
  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);
  return agent->GetInfo(attribute, value);
  CATCH;
}

hsa_status_t hsa_agent_get_exception_policies(hsa_agent_t agent_handle,
                                                      hsa_profile_t profile,
                                                      uint16_t* mask) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(mask);
  IS_BAD_PROFILE(profile);
  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);

  *mask = 0;
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_cache_get_info(hsa_cache_t cache, hsa_cache_info_t attribute, void* value) {
  TRY;
  IS_OPEN();
  core::Cache* Cache = core::Cache::Convert(cache);
  IS_VALID(Cache);
  IS_BAD_PTR(value);
  return Cache->GetInfo(attribute, value);
  CATCH;
}

hsa_status_t hsa_agent_iterate_caches(hsa_agent_t agent_handle,
                                      hsa_status_t (*callback)(hsa_cache_t cache, void* data),
                                      void* data) {
  TRY;
  IS_OPEN();
  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);
  IS_BAD_PTR(callback);
  return agent->IterateCache(callback, data);
  CATCH;
}

hsa_status_t
    hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent_handle,
                                  uint16_t version_major,
                                  uint16_t version_minor, bool* result) {
  TRY;
  IS_OPEN();

  if ((extension > HSA_EXTENSION_STD_LAST &&
       (extension < HSA_AMD_FIRST_EXTENSION || extension > HSA_AMD_LAST_EXTENSION)) ||
      result == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  *result = false;

  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);

  if (agent->device_type() == core::Agent::kAmdGpuDevice) {
    uint16_t agent_version_major = 0;
    hsa_status_t status =
        agent->GetInfo(HSA_AGENT_INFO_VERSION_MAJOR, &agent_version_major);
    assert(status == HSA_STATUS_SUCCESS);

    if (version_major <= agent_version_major) {
      uint16_t agent_version_minor = 0;
      if (version_minor <= agent_version_minor) {
        *result = true;
      }
    }
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_agent_major_extension_supported(uint16_t extension, hsa_agent_t agent_handle,
                                                 uint16_t version_major, uint16_t* version_minor,
                                                 bool* result) {
  TRY;
  IS_OPEN();

  if ((extension > HSA_EXTENSION_STD_LAST &&
       (extension < HSA_AMD_FIRST_EXTENSION || extension > HSA_AMD_LAST_EXTENSION)) ||
      result == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  *result = false;

  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);

  if (agent->device_type() == core::Agent::kAmdGpuDevice) {
    uint16_t agent_version_major = 0;
    hsa_status_t status = agent->GetInfo(HSA_AGENT_INFO_VERSION_MAJOR, &agent_version_major);
    assert(status == HSA_STATUS_SUCCESS);

    if (version_major <= agent_version_major) {
      *version_minor = 0;
      *result = true;
    }
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

/// @brief Api to create a user mode queue.
///
/// @param agent Hsa Agent which will execute Aql commands
///
/// @param size Size of Queue in terms of Aql packet size
///
/// @param type of Queue Single Writer or Multiple Writer
///
/// @param callback Callback function to register in case Quee
/// encounters an error
///
/// @param service_queue Pointer to a service queue
///
/// @param queue Output parameter updated with a pointer to the
/// queue being created
///
/// @return hsa_status
hsa_status_t hsa_queue_create(
    hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
    void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
    void* data, uint32_t private_segment_size, uint32_t group_segment_size,
    hsa_queue_t** queue) {
  TRY;
  IS_OPEN();

  if ((queue == nullptr) || (size == 0) || (!IsPowerOfTwo(size)) ||
      (type > HSA_QUEUE_TYPE_COOPERATIVE)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);

  hsa_queue_type32_t agent_queue_type = HSA_QUEUE_TYPE_MULTI;
  hsa_status_t status =
      agent->GetInfo(HSA_AGENT_INFO_QUEUE_TYPE, &agent_queue_type);
  assert(HSA_STATUS_SUCCESS == status);

  if ((agent_queue_type == HSA_QUEUE_TYPE_SINGLE) &&
      (type != HSA_QUEUE_TYPE_SINGLE)) {
    return HSA_STATUS_ERROR_INVALID_QUEUE_CREATION;
  }

  if (callback == nullptr) callback = core::Queue::DefaultErrorHandler;

  uint64_t queue_create_flags = 0;

  if (core::Runtime::runtime_singleton_->flag().dev_mem_queue_buf())
    queue_create_flags = HSA_AMD_QUEUE_CREATE_DEVICE_MEM_RING_BUF;

  core::Queue* cmd_queue = nullptr;
  status = agent->QueueCreate(size, type, queue_create_flags, callback, data, private_segment_size,
                              group_segment_size, &cmd_queue);
  if (status != HSA_STATUS_SUCCESS) return status;

  assert(cmd_queue != nullptr && "Queue not returned but status was success.\n");
  *queue = core::Queue::Convert(cmd_queue);
  return status;

  CATCH;
}

hsa_status_t hsa_soft_queue_create(hsa_region_t region, uint32_t size,
                                   hsa_queue_type32_t type, uint32_t features,
                                   hsa_signal_t doorbell_signal,
                                   hsa_queue_t** queue) {
  TRY;
  IS_OPEN();

  if ((queue == NULL) || (region.handle == 0) ||
      (doorbell_signal.handle == 0) || (size == 0) || (!IsPowerOfTwo(size)) ||
      (type < HSA_QUEUE_TYPE_MULTI) || (type > HSA_QUEUE_TYPE_SINGLE) ||
      (features == 0)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region);
  IS_VALID(mem_region);

  const core::Signal* signal = core::Signal::Convert(doorbell_signal);
  IS_VALID(signal);

  void* shared_queue = nullptr;
  hsa_status_t err = HSA::hsa_memory_allocate(region, sizeof(core::SharedQueue), &shared_queue);
  if (err != HSA_STATUS_SUCCESS) return err;
  assert(shared_queue && "Queue struct is NULL when creating host queue.");

  core::HostQueue* host_queue = new core::HostQueue(static_cast<core::SharedQueue*>(shared_queue),
                                                    region, size, type, features, doorbell_signal);

  *queue = core::Queue::Convert(host_queue);

  return HSA_STATUS_SUCCESS;
  CATCH;
}

/// @brief Api to destroy a user mode queue
///
/// @param queue Pointer to the queue being destroyed
///
/// @return hsa_status
hsa_status_t hsa_queue_destroy(hsa_queue_t* queue) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(queue);
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);
  cmd_queue->Destroy();
  return HSA_STATUS_SUCCESS;
  CATCH;
}

/// @brief Api to inactivate a user mode queue
///
/// @param queue Pointer to the queue being inactivated
///
/// @return hsa_status
hsa_status_t hsa_queue_inactivate(hsa_queue_t* queue) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(queue);
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);
  cmd_queue->Inactivate();
  return HSA_STATUS_SUCCESS;
  CATCH;
}

/// @brief Api to read the Read Index of Queue using Acquire semantics
///
/// @param queue Pointer to the queue whose read index is being read
///
/// @return uint64_t Value of Read index
uint64_t hsa_queue_load_read_index_scacquire(const hsa_queue_t* queue) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->LoadReadIndexAcquire();
  CATCHRET(uint64_t);
}

/// @brief Api to read the Read Index of Queue using Relaxed semantics
///
/// @param queue Pointer to the queue whose read index is being read
///
/// @return uint64_t Value of Read index
uint64_t hsa_queue_load_read_index_relaxed(const hsa_queue_t* queue) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->LoadReadIndexRelaxed();
  CATCHRET(uint64_t);
}

/// @brief Api to read the Write Index of Queue using Acquire semantics
///
/// @param queue Pointer to the queue whose write index is being read
///
/// @return uint64_t Value of Write index
uint64_t hsa_queue_load_write_index_scacquire(const hsa_queue_t* queue) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->LoadWriteIndexAcquire();
  CATCHRET(uint64_t);
}

/// @brief Api to read the Write Index of Queue using Relaxed semantics
///
/// @param queue Pointer to the queue whose write index is being read
///
/// @return uint64_t Value of Write index
uint64_t hsa_queue_load_write_index_relaxed(const hsa_queue_t* queue) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->LoadWriteIndexRelaxed();
  CATCHRET(uint64_t);
}

/// @brief Api to store the Read Index of Queue using Relaxed semantics
///
/// @param queue Pointer to the queue whose read index is being updated
///
/// @param value Value of new read index
void hsa_queue_store_read_index_relaxed(const hsa_queue_t* queue,
                                                uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  cmd_queue->StoreReadIndexRelaxed(value);
  CATCHRET(void);
}

/// @brief Api to store the Read Index of Queue using Release semantics
///
/// @param queue Pointer to the queue whose read index is being updated
///
/// @param value Value of new read index
void hsa_queue_store_read_index_screlease(const hsa_queue_t* queue, uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  cmd_queue->StoreReadIndexRelease(value);
  CATCHRET(void);
}

/// @brief Api to store the Write Index of Queue using Relaxed semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param value Value of new write index
void hsa_queue_store_write_index_relaxed(const hsa_queue_t* queue,
                                                 uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  cmd_queue->StoreWriteIndexRelaxed(value);
  CATCHRET(void);
}

/// @brief Api to store the Write Index of Queue using Release semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param value Value of new write index
void hsa_queue_store_write_index_screlease(const hsa_queue_t* queue, uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  cmd_queue->StoreWriteIndexRelease(value);
  CATCHRET(void);
}

/// @brief Api to compare and swap the Write Index of Queue using Acquire and
/// Release semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param expected Current value of write index
///
/// @param value Value of new write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_cas_write_index_scacq_screl(const hsa_queue_t* queue, uint64_t expected,
                                               uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->CasWriteIndexAcqRel(expected, value);
  CATCHRET(uint64_t);
}

/// @brief Api to compare and swap the Write Index of Queue using Acquire
/// Semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param expected Current value of write index
///
/// @param value Value of new write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_cas_write_index_scacquire(const hsa_queue_t* queue, uint64_t expected,
                                             uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->CasWriteIndexAcquire(expected, value);
  CATCHRET(uint64_t);
}

/// @brief Api to compare and swap the Write Index of Queue using Relaxed
/// Semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param expected Current value of write index
///
/// @param value Value of new write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_cas_write_index_relaxed(const hsa_queue_t* queue,
                                                   uint64_t expected,
                                                   uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->CasWriteIndexRelaxed(expected, value);
  CATCHRET(uint64_t);
}

/// @brief Api to compare and swap the Write Index of Queue using Release
/// Semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param expected Current value of write index
///
/// @param value Value of new write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_cas_write_index_screlease(const hsa_queue_t* queue, uint64_t expected,
                                             uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->CasWriteIndexRelease(expected, value);
  CATCHRET(uint64_t);
}

/// @brief Api to Add to the Write Index of Queue using Acquire and Release
/// Semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param value Value to add to write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_add_write_index_scacq_screl(const hsa_queue_t* queue, uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->AddWriteIndexAcqRel(value);
  CATCHRET(uint64_t);
}

/// @brief Api to Add to the Write Index of Queue using Acquire Semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param value Value to add to write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_add_write_index_scacquire(const hsa_queue_t* queue, uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->AddWriteIndexAcquire(value);
  CATCHRET(uint64_t);
}

/// @brief Api to Add to the Write Index of Queue using Relaxed Semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param value Value to add to write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_add_write_index_relaxed(const hsa_queue_t* queue,
                                                   uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->AddWriteIndexRelaxed(value);
  CATCHRET(uint64_t);
}

/// @brief Api to Add to the Write Index of Queue using Release Semantics
///
/// @param queue Pointer to the queue whose write index is being updated
///
/// @param value Value to add to write index
///
/// @return uint64_t Value of write index before the update
uint64_t hsa_queue_add_write_index_screlease(const hsa_queue_t* queue, uint64_t value) {
  TRY;
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  assert(IsValid(cmd_queue));
  return cmd_queue->AddWriteIndexRelease(value);
  CATCHRET(uint64_t);
}

//-----------------------------------------------------------------------------
// Memory
//-----------------------------------------------------------------------------
hsa_status_t hsa_agent_iterate_regions(
    hsa_agent_t agent_handle,
    hsa_status_t (*callback)(hsa_region_t region, void* data), void* data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);
  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);
  return agent->IterateRegion(callback, data);
  CATCH;
}

hsa_status_t hsa_region_get_info(hsa_region_t region,
                                         hsa_region_info_t attribute,
                                         void* value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region);
  IS_VALID(mem_region);

  return mem_region->GetInfo(attribute, value);
  CATCH;
}

hsa_status_t hsa_memory_register(void* address, size_t size) {
  TRY;
  IS_OPEN();

  if (size == 0 && address != NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_memory_deregister(void* address, size_t size) {
  TRY;
  IS_OPEN();

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t
    hsa_memory_allocate(hsa_region_t region, size_t size, void** ptr) {
  TRY;
  IS_OPEN();

  core::MemoryRegion::AllocateFlags alloc_flag = core::MemoryRegion::AllocateNoFlags;

  if (size == 0 || ptr == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region);
  IS_VALID(mem_region);

  return core::Runtime::runtime_singleton_->AllocateMemory(mem_region, size, alloc_flag, ptr);
  CATCH;
}

hsa_status_t hsa_memory_free(void* ptr) {
  TRY;
  IS_OPEN();

  if (ptr == NULL) {
    return HSA_STATUS_SUCCESS;
  }

  return core::Runtime::runtime_singleton_->FreeMemory(ptr);
  CATCH;
}

hsa_status_t hsa_memory_assign_agent(void* ptr,
                                             hsa_agent_t agent_handle,
                                             hsa_access_permission_t access) {
  TRY;
  IS_OPEN();

  if ((ptr == NULL) || (access < HSA_ACCESS_PERMISSION_RO) ||
      (access > HSA_ACCESS_PERMISSION_RW)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_memory_copy(void* dst, const void* src, size_t size) {
  TRY;
  IS_OPEN();

  if (dst == NULL || src == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (size == 0) {
    return HSA_STATUS_SUCCESS;
  }

  return core::Runtime::runtime_singleton_->CopyMemory(dst, src, size);
  CATCH;
}

//-----------------------------------------------------------------------------
// Signals
//-----------------------------------------------------------------------------

hsa_status_t
    hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
                      const hsa_agent_t* consumers, hsa_signal_t* hsa_signal) {
  return AMD::hsa_amd_signal_create(initial_value, num_consumers, consumers, 0, hsa_signal);
}

hsa_status_t hsa_signal_destroy(hsa_signal_t hsa_signal) {
  TRY;
  IS_OPEN();
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  signal->DestroySignal();
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_signal_value_t hsa_signal_load_relaxed(hsa_signal_t hsa_signal) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->LoadRelaxed();
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_load_scacquire(hsa_signal_t hsa_signal) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->LoadAcquire();
  CATCHRET(hsa_signal_value_t);
}

void hsa_signal_store_relaxed(hsa_signal_t hsa_signal,
                                      hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->StoreRelaxed(value);
  CATCHRET(void);
}

void hsa_signal_store_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->StoreRelease(value);
  CATCHRET(void);
}

hsa_signal_value_t
    hsa_signal_wait_relaxed(hsa_signal_t hsa_signal,
                            hsa_signal_condition_t condition,
                            hsa_signal_value_t compare_value,
                            uint64_t timeout_hint,
                            hsa_wait_state_t wait_state_hint) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->WaitRelaxed(condition, compare_value, timeout_hint,
                             wait_state_hint);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_wait_scacquire(hsa_signal_t hsa_signal,
                                             hsa_signal_condition_t condition,
                                             hsa_signal_value_t compare_value,
                                             uint64_t timeout_hint,
                                             hsa_wait_state_t wait_state_hint) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->WaitAcquire(condition, compare_value, timeout_hint,
                             wait_state_hint);
  CATCHRET(hsa_signal_value_t);
}

hsa_status_t hsa_signal_group_create(uint32_t num_signals, const hsa_signal_t* signals,
                                     uint32_t num_consumers, const hsa_agent_t* consumers,
                                     hsa_signal_group_t* signal_group) {
  TRY;
  IS_OPEN();
  if (num_signals == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  for (uint i = 0; i < num_signals; i++) IS_VALID(core::Signal::Convert(signals[i]));
  for (uint i = 0; i < num_consumers; i++) IS_VALID(core::Agent::Convert(consumers[i]));
  core::SignalGroup* group = new core::SignalGroup(num_signals, signals);
  CHECK_ALLOC(group);
  if (!group->IsValid()) {
    delete group;
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  *signal_group = core::SignalGroup::Convert(group);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_signal_group_destroy(hsa_signal_group_t signal_group) {
  TRY;
  IS_OPEN();
  core::SignalGroup* group = core::SignalGroup::Convert(signal_group);
  IS_VALID(group);
  delete group;
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_signal_group_wait_any_relaxed(hsa_signal_group_t signal_group,
                                               const hsa_signal_condition_t* conditions,
                                               const hsa_signal_value_t* compare_values,
                                               hsa_wait_state_t wait_state_hint,
                                               hsa_signal_t* signal, hsa_signal_value_t* value) {
  TRY;
  IS_OPEN();
  const core::SignalGroup* group = core::SignalGroup::Convert(signal_group);
  IS_VALID(group);
  const uint32_t index = AMD::hsa_amd_signal_wait_any(
      group->Count(), const_cast<hsa_signal_t*>(group->List()),
      const_cast<hsa_signal_condition_t*>(conditions),
      const_cast<hsa_signal_value_t*>(compare_values), uint64_t(-1), wait_state_hint, value);
  if (index >= group->Count()) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  *signal = group->List()[index];
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_signal_group_wait_any_scacquire(hsa_signal_group_t signal_group,
                                                 const hsa_signal_condition_t* conditions,
                                                 const hsa_signal_value_t* compare_values,
                                                 hsa_wait_state_t wait_state_hint,
                                                 hsa_signal_t* signal, hsa_signal_value_t* value) {
  TRY;
  hsa_status_t ret = HSA::hsa_signal_group_wait_any_relaxed(
      signal_group, conditions, compare_values, wait_state_hint, signal, value);
  std::atomic_thread_fence(std::memory_order_acquire);
  return ret;
  CATCH;
}

void hsa_signal_and_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AndRelaxed(value);
  CATCHRET(void);
}

void hsa_signal_and_scacquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AndAcquire(value);
  CATCHRET(void);
}

void hsa_signal_and_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AndRelease(value);
  CATCHRET(void);
}

void hsa_signal_and_scacq_screl(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AndAcqRel(value);
  CATCHRET(void);
}

void hsa_signal_or_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->OrRelaxed(value);
  CATCHRET(void);
}

void hsa_signal_or_scacquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->OrAcquire(value);
  CATCHRET(void);
}

void hsa_signal_or_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->OrRelease(value);
  CATCHRET(void);
}

void hsa_signal_or_scacq_screl(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->OrAcqRel(value);
  CATCHRET(void);
}

void hsa_signal_xor_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->XorRelaxed(value);
  CATCHRET(void);
}

void hsa_signal_xor_scacquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->XorAcquire(value);
  CATCHRET(void);
}

void hsa_signal_xor_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->XorRelease(value);
  CATCHRET(void);
}

void hsa_signal_xor_scacq_screl(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->XorAcqRel(value);
  CATCHRET(void);
}

void hsa_signal_add_relaxed(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AddRelaxed(value);
  CATCHRET(void);
}

void hsa_signal_add_scacquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AddAcquire(value);
  CATCHRET(void);
}

void hsa_signal_add_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AddRelease(value);
  CATCHRET(void);
}

void hsa_signal_add_scacq_screl(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->AddAcqRel(value);
  CATCHRET(void);
}

void hsa_signal_subtract_relaxed(hsa_signal_t hsa_signal,
                                         hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->SubRelaxed(value);
  CATCHRET(void);
}

void hsa_signal_subtract_scacquire(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->SubAcquire(value);
  CATCHRET(void);
}

void hsa_signal_subtract_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->SubRelease(value);
  CATCHRET(void);
}

void hsa_signal_subtract_scacq_screl(hsa_signal_t hsa_signal, hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  signal->SubAcqRel(value);
  CATCHRET(void);
}

hsa_signal_value_t
    hsa_signal_exchange_relaxed(hsa_signal_t hsa_signal,
                                hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->ExchRelaxed(value);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_exchange_scacquire(hsa_signal_t hsa_signal,
                                                 hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->ExchAcquire(value);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_exchange_screlease(hsa_signal_t hsa_signal,
                                                 hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->ExchRelease(value);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_exchange_scacq_screl(hsa_signal_t hsa_signal,
                                                   hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->ExchAcqRel(value);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_cas_relaxed(hsa_signal_t hsa_signal,
                                                  hsa_signal_value_t expected,
                                                  hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->CasRelaxed(expected, value);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_cas_scacquire(hsa_signal_t hsa_signal, hsa_signal_value_t expected,
                                            hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->CasAcquire(expected, value);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_cas_screlease(hsa_signal_t hsa_signal, hsa_signal_value_t expected,
                                            hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->CasRelease(expected, value);
  CATCHRET(hsa_signal_value_t);
}

hsa_signal_value_t hsa_signal_cas_scacq_screl(hsa_signal_t hsa_signal, hsa_signal_value_t expected,
                                              hsa_signal_value_t value) {
  TRY;
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  assert(IsValid(signal));
  return signal->CasAcqRel(expected, value);
  CATCHRET(hsa_signal_value_t);
}

//===--- Instruction Set Architecture -------------------------------------===//

using core::Isa;
using core::IsaRegistry;
using core::Wavefront;

hsa_status_t hsa_isa_from_name(
    const char *name,
    hsa_isa_t *isa) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(name);
  IS_BAD_PTR(isa);

  const Isa *isa_object = IsaRegistry::GetIsa(name);
  if (!isa_object) {
    return HSA_STATUS_ERROR_INVALID_ISA_NAME;
  }

  *isa = Isa::Handle(isa_object);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_agent_iterate_isas(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_isa_t isa,
                             void *data),
    void *data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);

  const core::Agent *agent_object = core::Agent::Convert(agent);
  IS_VALID(agent_object);

  return agent_object->IterateSupportedIsas(callback, data);
  CATCH;
}

/* deprecated */
hsa_status_t hsa_isa_get_info(
    hsa_isa_t isa,
    hsa_isa_info_t attribute,
    uint32_t index,
    void *value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  if (index != 0) {
    return HSA_STATUS_ERROR_INVALID_INDEX;
  }

  const Isa *isa_object = Isa::Object(isa);
  IS_VALID(isa_object);

  return isa_object->GetInfo(attribute, value) ?
      HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_INVALID_ARGUMENT;
  CATCH;
}

hsa_status_t hsa_isa_get_info_alt(
    hsa_isa_t isa,
    hsa_isa_info_t attribute,
    void *value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  const Isa *isa_object = Isa::Object(isa);
  IS_VALID(isa_object);

  return isa_object->GetInfo(attribute, value) ?
      HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_INVALID_ARGUMENT;
  CATCH;
}

hsa_status_t hsa_isa_get_exception_policies(
    hsa_isa_t isa,
    hsa_profile_t profile,
    uint16_t *mask) {
  TRY;
  IS_OPEN();
  IS_BAD_PROFILE(profile);
  IS_BAD_PTR(mask);

  const Isa *isa_object = Isa::Object(isa);
  IS_VALID(isa_object);

  // FIXME: update when exception policies are supported.
  *mask = 0;
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_isa_get_round_method(
    hsa_isa_t isa,
    hsa_fp_type_t fp_type,
    hsa_flush_mode_t flush_mode,
    hsa_round_method_t *round_method) {
  TRY;
  IS_OPEN();
  IS_BAD_FP_TYPE(fp_type);
  IS_BAD_FLUSH_MODE(flush_mode);
  IS_BAD_PTR(round_method);

  const Isa *isa_object = Isa::Object(isa);
  IS_VALID(isa_object);

  *round_method = isa_object->GetRoundMethod(fp_type, flush_mode);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_wavefront_get_info(
    hsa_wavefront_t wavefront,
    hsa_wavefront_info_t attribute,
    void *value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  const Wavefront *wavefront_object = Wavefront::Object(wavefront);
  if (!wavefront_object) {
    return HSA_STATUS_ERROR_INVALID_WAVEFRONT;
  }

  return wavefront_object->GetInfo(attribute, value) ?
      HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_INVALID_ARGUMENT;
  CATCH;
}

hsa_status_t hsa_isa_iterate_wavefronts(
    hsa_isa_t isa,
    hsa_status_t (*callback)(hsa_wavefront_t wavefront,
                             void *data),
    void *data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);

  const Isa *isa_object = Isa::Object(isa);
  IS_VALID(isa_object);

  const Wavefront &wavefront_object = isa_object->GetWavefront();

  return callback(Wavefront::Handle(&wavefront_object), data);
  CATCH;
}

/* deprecated */
hsa_status_t hsa_isa_compatible(
    hsa_isa_t code_object_isa,
    hsa_isa_t agent_isa,
    bool *result) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(result);

  const Isa *code_object_isa_object = Isa::Object(code_object_isa);
  IS_VALID(code_object_isa_object);

  const Isa *agent_isa_object = Isa::Object(agent_isa);
  IS_VALID(agent_isa_object);

  *result = Isa::IsCompatible(*code_object_isa_object, *agent_isa_object, 0);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

//===--- Code Objects (deprecated) ----------------------------------------===//

namespace {

hsa_status_t IsCodeObjectAllocRegion(
    hsa_region_t region,
    void *data) {
  assert(data);
  assert(((hsa_region_t*)data)->handle == 0);

  bool runtime_alloc_allowed = false;
  hsa_status_t status = HSA::hsa_region_get_info(
      region, HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &runtime_alloc_allowed);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  if (runtime_alloc_allowed) {
    ((hsa_region_t*)data)->handle = region.handle;
    return HSA_STATUS_INFO_BREAK;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t FindCodeObjectAllocRegionForAgent(
    hsa_agent_t agent,
    void *data) {
  assert(data);
  assert(((hsa_region_t*)data)->handle == 0);

  hsa_device_type_t device = HSA_DEVICE_TYPE_CPU;
  hsa_status_t status = HSA::hsa_agent_get_info(
      agent, HSA_AGENT_INFO_DEVICE, &device);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  if (device == HSA_DEVICE_TYPE_CPU) {
    return HSA::hsa_agent_iterate_regions(agent, IsCodeObjectAllocRegion, data);
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t FindCodeObjectAllocRegion(
    void *data) {
  assert(data);
  assert(((hsa_region_t*)data)->handle == 0);

  return HSA::hsa_iterate_agents(FindCodeObjectAllocRegionForAgent, data);
}

amd::hsa::code::AmdHsaCodeManager *GetCodeManager() {
  return core::Runtime::runtime_singleton_->code_manager();
}

} // namespace anonymous

/* deprecated */
hsa_status_t hsa_code_object_serialize(
    hsa_code_object_t code_object,
    hsa_status_t (*alloc_callback)(size_t size,
                                   hsa_callback_data_t data,
                                   void **address),
    hsa_callback_data_t callback_data,
    const char *options,
    void **serialized_code_object,
    size_t *serialized_code_object_size) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(alloc_callback);
  IS_BAD_PTR(serialized_code_object);
  IS_BAD_PTR(serialized_code_object_size);

  amd::hsa::code::AmdHsaCode *code = GetCodeManager()->FromHandle(code_object);
  if (!code) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  hsa_status_t status = alloc_callback(
      code->ElfSize(), callback_data, serialized_code_object);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }
  assert(*serialized_code_object);

  memcpy(*serialized_code_object, code->ElfData(), code->ElfSize());
  *serialized_code_object_size = code->ElfSize();

  return HSA_STATUS_SUCCESS;
  CATCH;
}

/* deprecated */
hsa_status_t hsa_code_object_deserialize(
    void *serialized_code_object,
    size_t serialized_code_object_size,
    const char *options,
    hsa_code_object_t *code_object) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(serialized_code_object);
  IS_BAD_PTR(code_object);

  if (serialized_code_object_size == 0) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_region_t code_object_alloc_region = {0};
  hsa_status_t status = FindCodeObjectAllocRegion(&code_object_alloc_region);
  if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) {
    return status;
  }
  assert(code_object_alloc_region.handle != 0);

  void *code_object_alloc_data = nullptr;
  status = HSA::hsa_memory_allocate(
      code_object_alloc_region, serialized_code_object_size,
      &code_object_alloc_data);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }
  assert(code_object_alloc_data);

  memcpy(
      code_object_alloc_data, serialized_code_object,
      serialized_code_object_size);
  code_object->handle = reinterpret_cast<uint64_t>(code_object_alloc_data);

  return HSA_STATUS_SUCCESS;
  CATCH;
}

/* deprecated */
hsa_status_t hsa_code_object_destroy(
    hsa_code_object_t code_object) {
  TRY;
  IS_OPEN();

  void *code_object_data = reinterpret_cast<void*>(code_object.handle);
  if (!code_object_data) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  if (!GetCodeManager()->Destroy(code_object)) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  HSA::hsa_memory_free(code_object_data);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

static std::string ConvertOldTargetNameToNew(
    const std::string &OldName, bool IsFinalizer, uint32_t EFlags) {
  std::string NewName = "";
  bool xnack_supported = false;

  // FIXME #1: Should 9:0:3 be completely (loader, sc, etc.) removed?
  // FIXME #2: What does PAL do with respect to boltzmann/usual fiji/tonga?
  if (OldName == "AMD:AMDGPU:6:0:0")
    NewName = "amdgcn-amd-amdhsa--gfx600";
  else if (OldName == "AMD:AMDGPU:6:0:1")
    NewName = "amdgcn-amd-amdhsa--gfx601";
  else if (OldName == "AMD:AMDGPU:6:0:2")
    NewName = "amdgcn-amd-amdhsa--gfx602";
  else if (OldName == "AMD:AMDGPU:7:0:0")
    NewName = "amdgcn-amd-amdhsa--gfx700";
  else if (OldName == "AMD:AMDGPU:7:0:1")
    NewName = "amdgcn-amd-amdhsa--gfx701";
  else if (OldName == "AMD:AMDGPU:7:0:2")
    NewName = "amdgcn-amd-amdhsa--gfx702";
  else if (OldName == "AMD:AMDGPU:7:0:3")
    NewName = "amdgcn-amd-amdhsa--gfx703";
  else if (OldName == "AMD:AMDGPU:7:0:4")
    NewName = "amdgcn-amd-amdhsa--gfx704";
  else if (OldName == "AMD:AMDGPU:7:0:5")
    NewName = "amdgcn-amd-amdhsa--gfx705";
  else if (OldName == "AMD:AMDGPU:8:0:1") {
    NewName = "amdgcn-amd-amdhsa--gfx801";
    xnack_supported = true;
  }
  else if (OldName == "AMD:AMDGPU:8:0:0" || OldName == "AMD:AMDGPU:8:0:2")
    NewName = "amdgcn-amd-amdhsa--gfx802";
  else if (OldName == "AMD:AMDGPU:8:0:3" || OldName == "AMD:AMDGPU:8:0:4")
    NewName = "amdgcn-amd-amdhsa--gfx803";
  else if (OldName == "AMD:AMDGPU:8:0:5")
    NewName = "amdgcn-amd-amdhsa--gfx805";
  else if (OldName == "AMD:AMDGPU:8:1:0") {
    NewName = "amdgcn-amd-amdhsa--gfx810";
    xnack_supported = true;
  }
  else if (OldName == "AMD:AMDGPU:9:0:0" || OldName == "AMD:AMDGPU:9:0:1") {
    NewName = "amdgcn-amd-amdhsa--gfx900";
    xnack_supported = true;
  }
  else if (OldName == "AMD:AMDGPU:9:0:2" || OldName == "AMD:AMDGPU:9:0:3") {
    NewName = "amdgcn-amd-amdhsa--gfx902";
    xnack_supported = true;
  }
  else if (OldName == "AMD:AMDGPU:9:0:4" || OldName == "AMD:AMDGPU:9:0:5") {
    NewName = "amdgcn-amd-amdhsa--gfx904";
    xnack_supported = true;
  }
  else if (OldName == "AMD:AMDGPU:9:0:6" || OldName == "AMD:AMDGPU:9:0:7") {
    NewName = "amdgcn-amd-amdhsa--gfx906";
    xnack_supported = true;
  }
  else if (OldName == "AMD:AMDGPU:9:0:12") {
    NewName = "amdgcn-amd-amdhsa--gfx90c";
    xnack_supported = true;
  }
  else {
    // Code object v2 only supports asics up to gfx906 plus gfx90c. Do NOT add
    // handling of new asics into this if-else-if* block.
    return "";
  }

  if (IsFinalizer) {
    if (EFlags & ELF::EF_AMDGPU_FEATURE_XNACK_V2)
      NewName = NewName + ":xnack+";
    else if (xnack_supported)
      NewName = NewName + ":xnack-";
  } else {
    if (OldName == "AMD:AMDGPU:8:0:1")
      NewName = NewName + ":xnack+";
    else if (OldName == "AMD:AMDGPU:8:1:0")
      NewName = NewName + ":xnack+";
    else if (OldName == "AMD:AMDGPU:9:0:1")
      NewName = NewName + ":xnack+";
    else if (OldName == "AMD:AMDGPU:9:0:3")
      NewName = NewName + ":xnack+";
    else if (OldName == "AMD:AMDGPU:9:0:5")
      NewName = NewName + ":xnack+";
    else if (OldName == "AMD:AMDGPU:9:0:7")
      NewName = NewName + ":xnack+";
    else if (xnack_supported)
      NewName = NewName + ":xnack-";
  }

  return NewName;
}

/* deprecated */
hsa_status_t hsa_code_object_get_info(
    hsa_code_object_t code_object,
    hsa_code_object_info_t attribute,
    void *value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  amd::hsa::code::AmdHsaCode *code = GetCodeManager()->FromHandle(code_object);
  if (!code) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  switch (attribute) {
    case HSA_CODE_OBJECT_INFO_ISA: {
      char isa_name[64];
      hsa_status_t status = code->GetInfo(attribute, &isa_name);
      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }

      std::string isa_name_str(isa_name);

      bool IsFinalizer = true;
      uint32_t codeHsailMajor;
      uint32_t codeHsailMinor;
      hsa_profile_t codeProfile;
      hsa_machine_model_t codeMachineModel;
      hsa_default_float_rounding_mode_t codeRoundingMode;
      if (!code->GetNoteHsail(&codeHsailMajor, &codeHsailMinor,
                              &codeProfile, &codeMachineModel,
                              &codeRoundingMode)) {
        // Only finalizer generated the "HSAIL" note.
        IsFinalizer = false;
      }

      std::string new_isa_name_str =
          ConvertOldTargetNameToNew(isa_name_str, IsFinalizer, code->EFlags());

      hsa_isa_t isa_handle = {0};
      status = HSA::hsa_isa_from_name(new_isa_name_str.c_str(), &isa_handle);
      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }

      *((hsa_isa_t*)value) = isa_handle;
      return HSA_STATUS_SUCCESS;
    }
    default: {
      return code->GetInfo(attribute, value);
    }
  }
  CATCH;
}

/* deprecated */
hsa_status_t hsa_code_object_get_symbol(
    hsa_code_object_t code_object,
    const char *symbol_name,
    hsa_code_symbol_t *symbol) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(symbol_name);
  IS_BAD_PTR(symbol);

  amd::hsa::code::AmdHsaCode *code = GetCodeManager()->FromHandle(code_object);
  if (!code) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  return code->GetSymbol(nullptr, symbol_name, symbol);
  CATCH;
}

/* deprecated */
hsa_status_t hsa_code_object_get_symbol_from_name(
    hsa_code_object_t code_object,
    const char *module_name,
    const char *symbol_name,
    hsa_code_symbol_t *symbol) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(symbol_name);
  IS_BAD_PTR(symbol);

  amd::hsa::code::AmdHsaCode *code = GetCodeManager()->FromHandle(code_object);
  if (!code) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  return code->GetSymbol(module_name, symbol_name, symbol);
  CATCH;
}

/* deprecated */
hsa_status_t hsa_code_symbol_get_info(
    hsa_code_symbol_t code_symbol,
    hsa_code_symbol_info_t attribute,
    void *value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  code::Symbol *symbol = code::Symbol::FromHandle(code_symbol);
  if (!symbol) {
    return HSA_STATUS_ERROR_INVALID_CODE_SYMBOL;
  }

  return symbol->GetInfo(attribute, value);
  CATCH;
}

/* deprecated */
hsa_status_t hsa_code_object_iterate_symbols(
    hsa_code_object_t code_object,
    hsa_status_t (*callback)(hsa_code_object_t code_object,
                             hsa_code_symbol_t symbol,
                             void *data),
    void *data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);

  amd::hsa::code::AmdHsaCode *code = GetCodeManager()->FromHandle(code_object);
  if (!code) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  return code->IterateSymbols(code_object, callback, data);
  CATCH;
}

//===--- Executable -------------------------------------------------------===//

using amd::hsa::common::Signed;
using amd::hsa::loader::Loader;
using amd::hsa::loader::Executable;
using amd::hsa::loader::CodeObjectReaderImpl;

namespace {

Loader *GetLoader() {
  return core::Runtime::runtime_singleton_->loader();
}

} // namespace anonymous

hsa_status_t hsa_code_object_reader_create_from_file(
    hsa_file_t file,
    hsa_code_object_reader_t *code_object_reader) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(code_object_reader);

  std::unique_ptr<CodeObjectReaderImpl> reader(
    new (std::nothrow) CodeObjectReaderImpl());
  CHECK_ALLOC(reader);

  hsa_status_t status = reader->SetFile(file);
  CHECK_STATUS(status);

  *code_object_reader = CodeObjectReaderImpl::Handle(reader.release());
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_code_object_reader_create_from_memory(
    const void *code_object,
    size_t size,
    hsa_code_object_reader_t *code_object_reader) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(code_object);
  IS_BAD_PTR(code_object_reader);

  if (size == 0) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  std::unique_ptr<CodeObjectReaderImpl> reader(
    new (std::nothrow) CodeObjectReaderImpl());
  CHECK_ALLOC(reader);

  hsa_status_t status = reader->SetMemory(code_object, size);
  CHECK_STATUS(status);

  *code_object_reader =
      CodeObjectReaderImpl::Handle(reader.release());
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_code_object_reader_destroy(
    hsa_code_object_reader_t code_object_reader) {
  TRY;
  IS_OPEN();

  CodeObjectReaderImpl *reader =
      CodeObjectReaderImpl::Object(code_object_reader);
  if (!reader) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER;
  }

  delete reader;

  return HSA_STATUS_SUCCESS;
  CATCH;
}

/* deprecated */
hsa_status_t hsa_executable_create(
    hsa_profile_t profile,
    hsa_executable_state_t executable_state,
    const char *options,
    hsa_executable_t *executable) {
  TRY;
  IS_OPEN();
  IS_BAD_PROFILE(profile);
  IS_BAD_EXECUTABLE_STATE(executable_state);
  IS_BAD_PTR(executable);

  // Invoke non-deprecated API.
  hsa_status_t status = HSA::hsa_executable_create_alt(
      profile, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, options, executable);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  Executable *exec = Executable::Object(*executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  if (executable_state == HSA_EXECUTABLE_STATE_FROZEN) {
    exec->Freeze(nullptr);
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_executable_create_alt(
    hsa_profile_t profile,
    hsa_default_float_rounding_mode_t default_float_rounding_mode,
    const char *options,
    hsa_executable_t *executable) {
  TRY;
  IS_OPEN();
  IS_BAD_PROFILE(profile);
  IS_BAD_ROUNDING_MODE(default_float_rounding_mode); // NOTES: should we check
                                                     // if default float
                                                     // rounding mode is valid?
                                                     // spec does not say so.
  IS_BAD_PTR(executable);

  Executable *exec = GetLoader()->CreateExecutable(
      std::unique_ptr<amd::LoaderContext>(new amd::LoaderContext()),
      profile, options, default_float_rounding_mode);
  CHECK_ALLOC(exec);

  *executable = Executable::Handle(exec);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_executable_destroy(
    hsa_executable_t executable) {
  TRY;
  IS_OPEN();

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  GetLoader()->DestroyExecutable(exec);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

/* deprecated */
hsa_status_t hsa_executable_load_code_object(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_code_object_t code_object,
    const char *options) {
  TRY;
  IS_OPEN();

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  void *code_object_p = reinterpret_cast<void*>(code_object.handle);
  if (!code_object_p) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }
  CodeObjectReaderImpl reader;
  reader.SetMemory(code_object_p, amd::elf::ElfSize(code_object_p));

  return exec->LoadCodeObject(agent, code_object, options, reader.GetUri());
  CATCH;
}

hsa_status_t hsa_executable_load_program_code_object(
    hsa_executable_t executable,
    hsa_code_object_reader_t code_object_reader,
    const char *options,
    hsa_loaded_code_object_t *loaded_code_object) {
  TRY;
  IS_OPEN();

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  CodeObjectReaderImpl *reader = CodeObjectReaderImpl::Object(
      code_object_reader);
  if (!reader) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER;
  }

  hsa_code_object_t code_object =
      {reinterpret_cast<uint64_t>(reader->GetCodeObjectMemory())};
  return exec->LoadCodeObject(
      {0}, code_object, options, reader->GetUri(), loaded_code_object);
  CATCH;
}

hsa_status_t hsa_executable_load_agent_code_object(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_code_object_reader_t code_object_reader,
    const char *options,
    hsa_loaded_code_object_t *loaded_code_object) {
  TRY;
  IS_OPEN();

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  CodeObjectReaderImpl *reader = CodeObjectReaderImpl::Object(
      code_object_reader);
  if (!reader) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER;
  }

  hsa_code_object_t code_object =
      {reinterpret_cast<uint64_t>(reader->GetCodeObjectMemory())};
  return exec->LoadCodeObject( agent, code_object, options,
                              reader->GetUri(), loaded_code_object);
  CATCH;
}

hsa_status_t hsa_executable_freeze(
    hsa_executable_t executable,
    const char *options) {
  TRY;
  IS_OPEN();

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return GetLoader()->FreezeExecutable(exec, options);
  CATCH;
}

hsa_status_t hsa_executable_get_info(
    hsa_executable_t executable,
    hsa_executable_info_t attribute,
    void *value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->GetInfo(attribute, value);
  CATCH;
}

hsa_status_t hsa_executable_global_variable_define(
    hsa_executable_t executable,
    const char *variable_name,
    void *address) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(variable_name);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->DefineProgramExternalVariable(variable_name, address);
  CATCH;
}

hsa_status_t hsa_executable_agent_global_variable_define(
    hsa_executable_t executable,
    hsa_agent_t agent,
    const char *variable_name,
    void *address) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(variable_name);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->DefineAgentExternalVariable(
      variable_name, agent, HSA_VARIABLE_SEGMENT_GLOBAL, address);
  CATCH;
}

hsa_status_t hsa_executable_readonly_variable_define(
    hsa_executable_t executable,
    hsa_agent_t agent,
    const char *variable_name,
    void *address) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(variable_name);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->DefineAgentExternalVariable(
      variable_name, agent, HSA_VARIABLE_SEGMENT_READONLY, address);
  CATCH;
}

hsa_status_t hsa_executable_validate(
    hsa_executable_t executable,
    uint32_t *result) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(result);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->Validate(result);
  CATCH;
}

hsa_status_t hsa_executable_validate_alt(
    hsa_executable_t executable,
    const char *options,
    uint32_t *result) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(result);

  return HSA::hsa_executable_validate(executable, result);
  CATCH;
}

/* deprecated */
hsa_status_t hsa_executable_get_symbol(
    hsa_executable_t executable,
    const char *module_name,
    const char *symbol_name,
    hsa_agent_t agent,
    int32_t call_convention,
    hsa_executable_symbol_t *symbol) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(symbol_name);
  IS_BAD_PTR(symbol);

  std::string mangled_name(symbol_name);
  if (mangled_name.empty()) {
    return HSA_STATUS_ERROR_INVALID_SYMBOL_NAME;
  }
  if (module_name && !std::string(module_name).empty()) {
    mangled_name.insert(0, "::");
    mangled_name.insert(0, std::string(module_name));
  }

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  // Invoke non-deprecated API.
  return HSA::hsa_executable_get_symbol_by_name(
      executable, mangled_name.c_str(),
      exec->IsProgramSymbol(mangled_name.c_str()) ? nullptr : &agent, symbol);
  CATCH;
}

hsa_status_t hsa_executable_get_symbol_by_name(
    hsa_executable_t executable,
    const char *symbol_name,
    const hsa_agent_t *agent, // NOTES: this is not consistent with the rest of
                              // of the specification, but seems like a better
                              // approach to distinguish program/agent symbols.
    hsa_executable_symbol_t *symbol) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(symbol_name);
  IS_BAD_PTR(symbol);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  loader::Symbol *sym = exec->GetSymbol(symbol_name, agent);
  if (!sym) {
    return HSA_STATUS_ERROR_INVALID_SYMBOL_NAME;
  }

  *symbol = loader::Symbol::Handle(sym);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_executable_symbol_get_info(
    hsa_executable_symbol_t executable_symbol,
    hsa_executable_symbol_info_t attribute,
    void *value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  loader::Symbol *sym = loader::Symbol::Object(executable_symbol);
  if (!sym) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL;
  }

  return sym->GetInfo(attribute, value) ?
    HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_INVALID_ARGUMENT;
  CATCH;
}

/* deprecated */
hsa_status_t hsa_executable_iterate_symbols(
    hsa_executable_t executable,
    hsa_status_t (*callback)(hsa_executable_t executable,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->IterateSymbols(callback, data);
  CATCH;
}

hsa_status_t hsa_executable_iterate_agent_symbols(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_agent_t agent,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);

  // NOTES: should we check if agent is valid? spec does not say so.
  const core::Agent *agent_object = core::Agent::Convert(agent);
  IS_VALID(agent_object);

  Executable *exec = Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->IterateAgentSymbols(agent, callback, data);
  CATCH;
}

hsa_status_t hsa_executable_iterate_program_symbols(
    hsa_executable_t executable,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);

  amd::hsa::loader::Executable *exec = amd::hsa::loader::Executable::Object(executable);
  if (!exec) {
    return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
  }

  return exec->IterateProgramSymbols(callback, data);
  CATCH;
}

hsa_status_t hsa_get_tile_config(hsa_agent_t agent_handle, void* config) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(config);

  const core::Agent* agent_object = core::Agent::Convert(agent_handle);
  IS_VALID(agent_object);

  return agent_object->driver().GetTileConfig(agent_object->node_id(),
                                              static_cast<HsaGpuTileConfig*>(config));
  CATCH;
}

//===--- Runtime Notifications --------------------------------------------===//

hsa_status_t hsa_status_string(
    hsa_status_t status,
    const char **status_string) {
  IS_BAD_PTR(status_string);
  const size_t status_u = static_cast<size_t>(status);
  switch (status_u) {
    case HSA_STATUS_SUCCESS:
      *status_string = "HSA_STATUS_SUCCESS: The function has been executed successfully.";
      break;
    case HSA_STATUS_INFO_BREAK:
      *status_string =
          "HSA_STATUS_INFO_BREAK: A traversal over a list of elements has been interrupted by the "
          "application before completing.";
      break;
    case HSA_STATUS_ERROR:
      *status_string = "HSA_STATUS_ERROR: A generic error has occurred.";
      break;
    case HSA_STATUS_ERROR_INVALID_ARGUMENT:
      *status_string =
          "HSA_STATUS_ERROR_INVALID_ARGUMENT: One of the actual arguments does not meet a "
          "precondition stated in the documentation of the corresponding formal argument.";
      break;
    case HSA_STATUS_ERROR_INVALID_QUEUE_CREATION:
      *status_string =
          "HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: The requested queue creation is not valid.";
      break;
    case HSA_STATUS_ERROR_INVALID_ALLOCATION:
      *status_string =
          "HSA_STATUS_ERROR_INVALID_ALLOCATION: The requested allocation is not valid.";
      break;
    case HSA_STATUS_ERROR_INVALID_AGENT:
      *status_string = "HSA_STATUS_ERROR_INVALID_AGENT: The agent is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_REGION:
      *status_string = "HSA_STATUS_ERROR_INVALID_REGION: The memory region is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_SIGNAL:
      *status_string = "HSA_STATUS_ERROR_INVALID_SIGNAL: The signal is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_QUEUE:
      *status_string = "HSA_STATUS_ERROR_INVALID_QUEUE: The queue is invalid.";
      break;
    case HSA_STATUS_ERROR_OUT_OF_RESOURCES:
      *status_string =
          "HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to allocate the necessary "
          "resources. This error may also occur when the core runtime library needs to spawn "
          "threads or create internal OS-specific events.";
      break;
    case HSA_STATUS_ERROR_INVALID_PACKET_FORMAT:
      *status_string = "HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: The AQL packet is malformed.";
      break;
    case HSA_STATUS_ERROR_RESOURCE_FREE:
      *status_string =
          "HSA_STATUS_ERROR_RESOURCE_FREE: An error has been detected while releasing a resource.";
      break;
    case HSA_STATUS_ERROR_NOT_INITIALIZED:
      *status_string =
          "HSA_STATUS_ERROR_NOT_INITIALIZED: An API other than hsa_init has been invoked while the "
          "reference count of the HSA runtime is zero.";
      break;
    case HSA_STATUS_ERROR_REFCOUNT_OVERFLOW:
      *status_string =
          "HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: The maximum reference count for the object has been "
          "reached.";
      break;
    case HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS:
      *status_string =
          "HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: The arguments passed to a functions are not "
          "compatible.";
      break;
    case HSA_STATUS_ERROR_INVALID_INDEX:
      *status_string = "HSA_STATUS_ERROR_INVALID_INDEX: The index is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_ISA:
      *status_string = "HSA_STATUS_ERROR_INVALID_ISA: The instruction set architecture is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_ISA_NAME:
      *status_string = "HSA_STATUS_ERROR_INVALID_ISA_NAME: The instruction set architecture name is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_CODE_OBJECT:
      *status_string = "HSA_STATUS_ERROR_INVALID_CODE_OBJECT: The code object is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_EXECUTABLE:
      *status_string = "HSA_STATUS_ERROR_INVALID_EXECUTABLE: The executable is invalid.";
      break;
    case HSA_STATUS_ERROR_FROZEN_EXECUTABLE:
      *status_string = "HSA_STATUS_ERROR_FROZEN_EXECUTABLE: The executable is frozen.";
      break;
    case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME:
      *status_string =
          "HSA_STATUS_ERROR_INVALID_SYMBOL_NAME: There is no symbol with the given name.";
      break;
    case HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED:
      *status_string =
          "HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED: The variable is already defined.";
      break;
    case HSA_STATUS_ERROR_VARIABLE_UNDEFINED:
      *status_string = "HSA_STATUS_ERROR_VARIABLE_UNDEFINED: The variable is undefined.";
      break;
    case HSA_STATUS_ERROR_EXCEPTION:
      *status_string =
          "HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception.";
      break;
    case HSA_STATUS_ERROR_INVALID_CODE_SYMBOL:
      *status_string = "HSA_STATUS_ERROR_INVALID_CODE_SYMBOL: The code object symbol is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL:
      *status_string =
          "HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL: The executable symbol is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_FILE:
      *status_string = "HSA_STATUS_ERROR_INVALID_FILE: The file descriptor is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER:
      *status_string =
          "HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER: The code object reader is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_CACHE:
      *status_string = "HSA_STATUS_ERROR_INVALID_CACHE: The cache is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_WAVEFRONT:
      *status_string = "HSA_STATUS_ERROR_INVALID_WAVEFRONT: The wavefront is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP:
      *status_string = "HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP: The signal group is invalid.";
      break;
    case HSA_STATUS_ERROR_INVALID_RUNTIME_STATE:
      *status_string =
          "HSA_STATUS_ERROR_INVALID_RUNTIME_STATE: The HSA runtime is not in the configuration "
          "state.";
      break;
    case HSA_STATUS_ERROR_FATAL:
      *status_string =
          "HSA_STATUS_ERROR_FATAL: The queue received an error that may require process "
          "termination.";
      break;
    case HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION:
      *status_string =
          "HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond "
          "the largest legal address.";
      break;
    case HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION:
      *status_string =
          "HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION: The agent attempted to execute an illegal shader "
          "instruction.";
      break;
    case HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED:
      *status_string =
          "HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED: Image format is not supported.";
      break;
    case HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED:
      *status_string = "HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED: Image size is not supported.";
      break;
    case HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED:
      *status_string =
          "HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED: Image pitch is not supported or invalid.";
      break;
    case HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED:
      *status_string =
          "HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED: Sampler descriptor is not "
          "supported or invalid.";
      break;
    case HSA_EXT_STATUS_ERROR_INVALID_PROGRAM:
      *status_string = "HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: Invalid program";
      break;
    case HSA_EXT_STATUS_ERROR_INVALID_MODULE:
      *status_string = "HSA_EXT_STATUS_ERROR_INVALID_MODULE: Invalid module";
      break;
    case HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE:
      *status_string = "HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: Incompatible module";
      break;
    case HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED:
      *status_string = "HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: Module already included";
      break;
    case HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH:
      *status_string = "HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: Symbol mismatch";
      break;
    case HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED:
      *status_string = "HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: Finalization failed";
      break;
    case HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH:
      *status_string = "HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: Directive mismatch";
      break;
    case HSA_STATUS_ERROR_MEMORY_FAULT:
      *status_string =
          "HSA_STATUS_ERROR_MEMORY_FAULT: Agent attempted to access an inaccessible address.";
      break;
    case HSA_STATUS_ERROR_INVALID_MEMORY_POOL:
      *status_string = "HSA_STATUS_ERROR_INVALID_MEMORY_POOL: The memory pool is invalid.";
      break;
    case HSA_STATUS_CU_MASK_REDUCED:
      *status_string =
          "HSA_STATUS_CU_MASK_REDUCED: The CU mask was successfully set but the mask attempted to "
          "enable a CU which was disabled for the process.  CUs disabled for the process remain "
          "disabled.";
      break;
    case HSA_STATUS_ERROR_OUT_OF_REGISTERS:
      *status_string =
          "HSA_STATUS_ERROR_OUT_OF_REGISTERS: Kernel has requested more VGPRs than are available "
          "on this agent";
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
}

}  // namespace HSA
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/hsa_api_trace_int.h"
#include "core/inc/runtime.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "core/inc/hsa_table_interface.h"

#include <iostream>

// Tools only APIs.
namespace rocr {
namespace AMD {
hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
                                              hsa_amd_queue_intercept_handler callback,
                                              void* user_data);
hsa_status_t hsa_amd_queue_intercept_create(
    hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
    void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
    uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);

hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
                                                   void* user_data);
}   //  namespace amd

namespace core {

HsaApiTable& hsa_api_table() {
  static HsaApiTable table;
  return table;
}

HsaApiTable& hsa_internal_api_table() {
  static HsaApiTable table;
  return table;
}

HsaApiTable::HsaApiTable() {
  Init();
}

// Initialize member fields for Hsa Core and Amd Extension Api's
// Member fields for Finalizer and Image extensions will be
// updated as part of Hsa Runtime initialization.
void HsaApiTable::Init() {
  // Compile time checks to make sure we increment STEPPING when new APIs are added.
  // Profiler team needs STEPPING to change every time we add functions to the tables so that
  // they can add preprocessor macros on the new functions

  constexpr size_t expected_core_api_table_size = 1016;
  constexpr size_t expected_amd_ext_table_size = 608;
  constexpr size_t expected_image_ext_table_size = 128;
  constexpr size_t expected_finalizer_ext_table_size = 64;
  constexpr size_t expected_tools_table_size = 64;
  constexpr size_t expected_pc_sampling_ext_table_size = 72;

  static_assert(sizeof(CoreApiTable) == expected_core_api_table_size,
                "HSA core API table size changed, bump HSA_CORE_API_TABLE_STEP_VERSION and set "
                "expected_core_api_table_size to the new size of the struct");
  static_assert(sizeof(AmdExtTable) == expected_amd_ext_table_size,
                "HSA AMD ext table size changed, bump HSA_AMD_EXT_API_TABLE_STEP_VERSION, "
                "HSA_AMD_INTERFACE_VERSION_MINOR and set expected_amd_ext_table_size to the new "
                "size of the struct");
  static_assert(sizeof(ImageExtTable) == expected_image_ext_table_size,
                "HSA image ext table size changed, bump HSA_IMAGE_API_TABLE_STEP_VERSION and set "
                "expected_image_ext_table_size to the new size of the struct");
  static_assert(sizeof(FinalizerExtTable) == expected_finalizer_ext_table_size,
                "HSA finalizer ext table size changed, bump HSA_FINALIZER_API_TABLE_STEP_VERSION "
                "and set expected_finalizer_ext_table_size to the new size of the struct");
  static_assert(sizeof(ToolsApiTable) == expected_tools_table_size,
                "HSA tools table size changed, bump HSA_TOOLS_API_TABLE_STEP_VERSION "
                "and set expected_tools_table_size to the new size of the struct");
  static_assert(sizeof(PcSamplingExtTable) == expected_pc_sampling_ext_table_size,
                "HSA finalizer ext table size changed, bump HSA_PC_SAMPLING_API_TABLE_STEP_VERSION "
                "and set expected_pc_sampling_ext_table_size to the new size of the struct");

  // Initialize Version of Api Table
  hsa_api.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
  hsa_api.version.minor_id = sizeof(::HsaApiTable);
  hsa_api.version.step_id = HSA_API_TABLE_STEP_VERSION;

  // Update Api table for Core and its major id
  UpdateCore();
  hsa_api.core_ = &core_api;

  // Update Api table for Amd Extensions and its major id
  UpdateAmdExts();
  hsa_api.amd_ext_ = &amd_ext_api;

  // Initialize Api tables for Finalizer, Image to NULL
  // The tables are initialized as part
  // of Hsa Runtime initialization, including their major ids
  hsa_api.finalizer_ext_ = NULL;
  hsa_api.image_ext_ = NULL;
  hsa_api.pc_sampling_ext_ = NULL;

  UpdateTools();
  hsa_api.tools_ = &tools_api;
}

void HsaApiTable::Reset() {
  Init();
}

void HsaApiTable::CloneExts(void* ext_table, uint32_t table_id) {

  assert(ext_table != NULL && "Invalid extension table linked.");

  // Update HSA Extension Finalizer Api table
  if (table_id == HSA_EXT_FINALIZER_API_TABLE_ID) {
    finalizer_api = *reinterpret_cast<FinalizerExtTable*>(ext_table);
    hsa_api.finalizer_ext_ = &finalizer_api;
    return;
  }

  // Update HSA Extension Image Api table
  if (table_id == HSA_EXT_IMAGE_API_TABLE_ID) {
    image_api = *reinterpret_cast<ImageExtTable*>(ext_table);
    hsa_api.image_ext_ = &image_api;
    return;
  }

  // Update HSA Extension PC Sampling Api table
  if (table_id == HSA_EXT_PC_SAMPLING_API_TABLE_ID) {
    pcs_api = *reinterpret_cast<PcSamplingExtTable*>(ext_table);
    hsa_api.pc_sampling_ext_ = &pcs_api;
    return;
  }
}

void HsaApiTable::LinkExts(void* ext_table, uint32_t table_id) {

  assert(ext_table != NULL && "Invalid extension table linked.");

  // Update HSA Extension Finalizer Api table
  if (table_id == HSA_EXT_FINALIZER_API_TABLE_ID) {
    finalizer_api = *reinterpret_cast<FinalizerExtTable*>(ext_table);
    hsa_api.finalizer_ext_ = reinterpret_cast<FinalizerExtTable*>(ext_table);
    return;
  }

  // Update HSA Extension Image Api table
  if (table_id == HSA_EXT_IMAGE_API_TABLE_ID) {
    image_api = *reinterpret_cast<ImageExtTable*>(ext_table);
    hsa_api.image_ext_ = reinterpret_cast<ImageExtTable*>(ext_table);
    return;
  }

  // Update HSA Extension PC Sampling Api table
  if (table_id == HSA_EXT_PC_SAMPLING_API_TABLE_ID) {
    pcs_api = *reinterpret_cast<PcSamplingExtTable*>(ext_table);
    hsa_api.pc_sampling_ext_ = &pcs_api;
    return;
  }
}

// Update Api table for Hsa Core Runtime
void HsaApiTable::UpdateCore() {

  // Initialize Version of Api Table
  core_api.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
  core_api.version.minor_id = sizeof(::CoreApiTable);
  core_api.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;

  // Initialize function pointers for Hsa Core Runtime Api's
  core_api.hsa_init_fn = HSA::hsa_init;
  core_api.hsa_shut_down_fn = HSA::hsa_shut_down;
  core_api.hsa_system_get_info_fn = HSA::hsa_system_get_info;
  core_api.hsa_system_extension_supported_fn = HSA::hsa_system_extension_supported;
  core_api.hsa_system_get_extension_table_fn = HSA::hsa_system_get_extension_table;
  core_api.hsa_iterate_agents_fn = HSA::hsa_iterate_agents;
  core_api.hsa_agent_get_info_fn = HSA::hsa_agent_get_info;
  core_api.hsa_agent_get_exception_policies_fn =
      HSA::hsa_agent_get_exception_policies;
  core_api.hsa_agent_extension_supported_fn = HSA::hsa_agent_extension_supported;
  core_api.hsa_queue_create_fn = HSA::hsa_queue_create;
  core_api.hsa_soft_queue_create_fn = HSA::hsa_soft_queue_create;
  core_api.hsa_queue_destroy_fn = HSA::hsa_queue_destroy;
  core_api.hsa_queue_inactivate_fn = HSA::hsa_queue_inactivate;
  core_api.hsa_queue_load_read_index_scacquire_fn = HSA::hsa_queue_load_read_index_scacquire;
  core_api.hsa_queue_load_read_index_relaxed_fn =
      HSA::hsa_queue_load_read_index_relaxed;
  core_api.hsa_queue_load_write_index_scacquire_fn = HSA::hsa_queue_load_write_index_scacquire;
  core_api.hsa_queue_load_write_index_relaxed_fn =
      HSA::hsa_queue_load_write_index_relaxed;
  core_api.hsa_queue_store_write_index_relaxed_fn =
      HSA::hsa_queue_store_write_index_relaxed;
  core_api.hsa_queue_store_write_index_screlease_fn = HSA::hsa_queue_store_write_index_screlease;
  core_api.hsa_queue_cas_write_index_scacq_screl_fn = HSA::hsa_queue_cas_write_index_scacq_screl;
  core_api.hsa_queue_cas_write_index_scacquire_fn = HSA::hsa_queue_cas_write_index_scacquire;
  core_api.hsa_queue_cas_write_index_relaxed_fn =
      HSA::hsa_queue_cas_write_index_relaxed;
  core_api.hsa_queue_cas_write_index_screlease_fn = HSA::hsa_queue_cas_write_index_screlease;
  core_api.hsa_queue_add_write_index_scacq_screl_fn = HSA::hsa_queue_add_write_index_scacq_screl;
  core_api.hsa_queue_add_write_index_scacquire_fn = HSA::hsa_queue_add_write_index_scacquire;
  core_api.hsa_queue_add_write_index_relaxed_fn =
      HSA::hsa_queue_add_write_index_relaxed;
  core_api.hsa_queue_add_write_index_screlease_fn = HSA::hsa_queue_add_write_index_screlease;
  core_api.hsa_queue_store_read_index_relaxed_fn =
      HSA::hsa_queue_store_read_index_relaxed;
  core_api.hsa_queue_store_read_index_screlease_fn = HSA::hsa_queue_store_read_index_screlease;
  core_api.hsa_agent_iterate_regions_fn = HSA::hsa_agent_iterate_regions;
  core_api.hsa_region_get_info_fn = HSA::hsa_region_get_info;
  core_api.hsa_memory_register_fn = HSA::hsa_memory_register;
  core_api.hsa_memory_deregister_fn = HSA::hsa_memory_deregister;
  core_api.hsa_memory_allocate_fn = HSA::hsa_memory_allocate;
  core_api.hsa_memory_free_fn = HSA::hsa_memory_free;
  core_api.hsa_memory_copy_fn = HSA::hsa_memory_copy;
  core_api.hsa_memory_assign_agent_fn = HSA::hsa_memory_assign_agent;
  core_api.hsa_signal_create_fn = HSA::hsa_signal_create;
  core_api.hsa_signal_destroy_fn = HSA::hsa_signal_destroy;
  core_api.hsa_signal_load_relaxed_fn = HSA::hsa_signal_load_relaxed;
  core_api.hsa_signal_load_scacquire_fn = HSA::hsa_signal_load_scacquire;
  core_api.hsa_signal_store_relaxed_fn = HSA::hsa_signal_store_relaxed;
  core_api.hsa_signal_store_screlease_fn = HSA::hsa_signal_store_screlease;
  core_api.hsa_signal_wait_relaxed_fn = HSA::hsa_signal_wait_relaxed;
  core_api.hsa_signal_wait_scacquire_fn = HSA::hsa_signal_wait_scacquire;
  core_api.hsa_signal_and_relaxed_fn = HSA::hsa_signal_and_relaxed;
  core_api.hsa_signal_and_scacquire_fn = HSA::hsa_signal_and_scacquire;
  core_api.hsa_signal_and_screlease_fn = HSA::hsa_signal_and_screlease;
  core_api.hsa_signal_and_scacq_screl_fn = HSA::hsa_signal_and_scacq_screl;
  core_api.hsa_signal_or_relaxed_fn = HSA::hsa_signal_or_relaxed;
  core_api.hsa_signal_or_scacquire_fn = HSA::hsa_signal_or_scacquire;
  core_api.hsa_signal_or_screlease_fn = HSA::hsa_signal_or_screlease;
  core_api.hsa_signal_or_scacq_screl_fn = HSA::hsa_signal_or_scacq_screl;
  core_api.hsa_signal_xor_relaxed_fn = HSA::hsa_signal_xor_relaxed;
  core_api.hsa_signal_xor_scacquire_fn = HSA::hsa_signal_xor_scacquire;
  core_api.hsa_signal_xor_screlease_fn = HSA::hsa_signal_xor_screlease;
  core_api.hsa_signal_xor_scacq_screl_fn = HSA::hsa_signal_xor_scacq_screl;
  core_api.hsa_signal_exchange_relaxed_fn = HSA::hsa_signal_exchange_relaxed;
  core_api.hsa_signal_exchange_scacquire_fn = HSA::hsa_signal_exchange_scacquire;
  core_api.hsa_signal_exchange_screlease_fn = HSA::hsa_signal_exchange_screlease;
  core_api.hsa_signal_exchange_scacq_screl_fn = HSA::hsa_signal_exchange_scacq_screl;
  core_api.hsa_signal_add_relaxed_fn = HSA::hsa_signal_add_relaxed;
  core_api.hsa_signal_add_scacquire_fn = HSA::hsa_signal_add_scacquire;
  core_api.hsa_signal_add_screlease_fn = HSA::hsa_signal_add_screlease;
  core_api.hsa_signal_add_scacq_screl_fn = HSA::hsa_signal_add_scacq_screl;
  core_api.hsa_signal_subtract_relaxed_fn = HSA::hsa_signal_subtract_relaxed;
  core_api.hsa_signal_subtract_scacquire_fn = HSA::hsa_signal_subtract_scacquire;
  core_api.hsa_signal_subtract_screlease_fn = HSA::hsa_signal_subtract_screlease;
  core_api.hsa_signal_subtract_scacq_screl_fn = HSA::hsa_signal_subtract_scacq_screl;
  core_api.hsa_signal_cas_relaxed_fn = HSA::hsa_signal_cas_relaxed;
  core_api.hsa_signal_cas_scacquire_fn = HSA::hsa_signal_cas_scacquire;
  core_api.hsa_signal_cas_screlease_fn = HSA::hsa_signal_cas_screlease;
  core_api.hsa_signal_cas_scacq_screl_fn = HSA::hsa_signal_cas_scacq_screl;

  //===--- Instruction Set Architecture -----------------------------------===//

  core_api.hsa_isa_from_name_fn = HSA::hsa_isa_from_name;
  // Deprecated since v1.1.
  core_api.hsa_isa_get_info_fn = HSA::hsa_isa_get_info;
  // Deprecated since v1.1.
  core_api.hsa_isa_compatible_fn = HSA::hsa_isa_compatible;

  //===--- Code Objects (deprecated) --------------------------------------===//

  // Deprecated since v1.1.
  core_api.hsa_code_object_serialize_fn = HSA::hsa_code_object_serialize;
  // Deprecated since v1.1.
  core_api.hsa_code_object_deserialize_fn = HSA::hsa_code_object_deserialize;
  // Deprecated since v1.1.
  core_api.hsa_code_object_destroy_fn = HSA::hsa_code_object_destroy;
  // Deprecated since v1.1.
  core_api.hsa_code_object_get_info_fn = HSA::hsa_code_object_get_info;
  // Deprecated since v1.1.
  core_api.hsa_code_object_get_symbol_fn = HSA::hsa_code_object_get_symbol;
  // Deprecated since v1.1.
  core_api.hsa_code_symbol_get_info_fn = HSA::hsa_code_symbol_get_info;
  // Deprecated since v1.1.
  core_api.hsa_code_object_iterate_symbols_fn =
      HSA::hsa_code_object_iterate_symbols;

  //===--- Executable -----------------------------------------------------===//

  // Deprecated since v1.1.
  core_api.hsa_executable_create_fn = HSA::hsa_executable_create;
  core_api.hsa_executable_destroy_fn = HSA::hsa_executable_destroy;
  // Deprecated since v1.1.
  core_api.hsa_executable_load_code_object_fn =
      HSA::hsa_executable_load_code_object;
  core_api.hsa_executable_freeze_fn = HSA::hsa_executable_freeze;
  core_api.hsa_executable_get_info_fn = HSA::hsa_executable_get_info;
  core_api.hsa_executable_global_variable_define_fn =
      HSA::hsa_executable_global_variable_define;
  core_api.hsa_executable_agent_global_variable_define_fn =
      HSA::hsa_executable_agent_global_variable_define;
  core_api.hsa_executable_readonly_variable_define_fn =
      HSA::hsa_executable_readonly_variable_define;
  core_api.hsa_executable_validate_fn = HSA::hsa_executable_validate;
  // Deprecated since v1.1.
  core_api.hsa_executable_get_symbol_fn = HSA::hsa_executable_get_symbol;
  core_api.hsa_executable_symbol_get_info_fn =
      HSA::hsa_executable_symbol_get_info;
  // Deprecated since v1.1.
  core_api.hsa_executable_iterate_symbols_fn =
      HSA::hsa_executable_iterate_symbols;

  //===--- Runtime Notifications ------------------------------------------===//

  core_api.hsa_status_string_fn = HSA::hsa_status_string;

  // Start HSA v1.1 additions
  core_api.hsa_extension_get_name_fn = HSA::hsa_extension_get_name;
  core_api.hsa_system_major_extension_supported_fn = HSA::hsa_system_major_extension_supported;
  core_api.hsa_system_get_major_extension_table_fn = HSA::hsa_system_get_major_extension_table;
  core_api.hsa_agent_major_extension_supported_fn = HSA::hsa_agent_major_extension_supported;
  core_api.hsa_cache_get_info_fn = HSA::hsa_cache_get_info;
  core_api.hsa_agent_iterate_caches_fn = HSA::hsa_agent_iterate_caches;
  // Silent store optimization is present in all signal ops when no agents are sleeping.
  core_api.hsa_signal_silent_store_relaxed_fn = HSA::hsa_signal_store_relaxed;
  core_api.hsa_signal_silent_store_screlease_fn = HSA::hsa_signal_store_screlease;
  core_api.hsa_signal_group_create_fn = HSA::hsa_signal_group_create;
  core_api.hsa_signal_group_destroy_fn = HSA::hsa_signal_group_destroy;
  core_api.hsa_signal_group_wait_any_scacquire_fn = HSA::hsa_signal_group_wait_any_scacquire;
  core_api.hsa_signal_group_wait_any_relaxed_fn = HSA::hsa_signal_group_wait_any_relaxed;

  //===--- Instruction Set Architecture - HSA v1.1 additions --------------===//

  core_api.hsa_agent_iterate_isas_fn = HSA::hsa_agent_iterate_isas;
  core_api.hsa_isa_get_info_alt_fn = HSA::hsa_isa_get_info_alt;
  core_api.hsa_isa_get_exception_policies_fn =
      HSA::hsa_isa_get_exception_policies;
  core_api.hsa_isa_get_round_method_fn = HSA::hsa_isa_get_round_method;
  core_api.hsa_wavefront_get_info_fn = HSA::hsa_wavefront_get_info;
  core_api.hsa_isa_iterate_wavefronts_fn = HSA::hsa_isa_iterate_wavefronts;

  //===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//

  // Deprecated since v1.1.
  core_api.hsa_code_object_get_symbol_from_name_fn =
      HSA::hsa_code_object_get_symbol_from_name;

  //===--- Executable - HSA v1.1 additions --------------------------------===//

  core_api.hsa_code_object_reader_create_from_file_fn =
      HSA::hsa_code_object_reader_create_from_file;
  core_api.hsa_code_object_reader_create_from_memory_fn =
      HSA::hsa_code_object_reader_create_from_memory;
  core_api.hsa_code_object_reader_destroy_fn =
      HSA::hsa_code_object_reader_destroy;
  core_api.hsa_executable_create_alt_fn = HSA::hsa_executable_create_alt;
  core_api.hsa_executable_load_program_code_object_fn =
      HSA::hsa_executable_load_program_code_object;
  core_api.hsa_executable_load_agent_code_object_fn =
      HSA::hsa_executable_load_agent_code_object;
  core_api.hsa_executable_validate_alt_fn = HSA::hsa_executable_validate_alt;
  core_api.hsa_executable_get_symbol_by_name_fn =
      HSA::hsa_executable_get_symbol_by_name;
  core_api.hsa_executable_iterate_agent_symbols_fn =
      HSA::hsa_executable_iterate_agent_symbols;
  core_api.hsa_executable_iterate_program_symbols_fn =
      HSA::hsa_executable_iterate_program_symbols;
}

// Update Api table for Amd Extensions.
// @note: Current implementation will initialize the
// member variable hsa_amd_image_create_fn while loading
// Image extension library
void HsaApiTable::UpdateAmdExts() {

  // Initialize Version of Api Table
  amd_ext_api.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
  amd_ext_api.version.minor_id = sizeof(::AmdExtTable);
  amd_ext_api.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;

  // Initialize function pointers for Amd Extension Api's
  amd_ext_api.hsa_amd_coherency_get_type_fn = AMD::hsa_amd_coherency_get_type;
  amd_ext_api.hsa_amd_coherency_set_type_fn = AMD::hsa_amd_coherency_set_type;
  amd_ext_api.hsa_amd_profiling_set_profiler_enabled_fn = AMD::hsa_amd_profiling_set_profiler_enabled;
  amd_ext_api.hsa_amd_profiling_async_copy_enable_fn = AMD::hsa_amd_profiling_async_copy_enable;
  amd_ext_api.hsa_amd_profiling_get_dispatch_time_fn = AMD::hsa_amd_profiling_get_dispatch_time;
  amd_ext_api.hsa_amd_profiling_get_async_copy_time_fn = AMD::hsa_amd_profiling_get_async_copy_time;
  amd_ext_api.hsa_amd_profiling_convert_tick_to_system_domain_fn = AMD::hsa_amd_profiling_convert_tick_to_system_domain;
  amd_ext_api.hsa_amd_signal_async_handler_fn = AMD::hsa_amd_signal_async_handler;
  amd_ext_api.hsa_amd_async_function_fn = AMD::hsa_amd_async_function;
  amd_ext_api.hsa_amd_signal_wait_any_fn = AMD::hsa_amd_signal_wait_any;
  amd_ext_api.hsa_amd_queue_cu_set_mask_fn = AMD::hsa_amd_queue_cu_set_mask;
  amd_ext_api.hsa_amd_queue_cu_get_mask_fn = AMD::hsa_amd_queue_cu_get_mask;
  amd_ext_api.hsa_amd_memory_pool_get_info_fn = AMD::hsa_amd_memory_pool_get_info;
  amd_ext_api.hsa_amd_agent_iterate_memory_pools_fn = AMD::hsa_amd_agent_iterate_memory_pools;
  amd_ext_api.hsa_amd_memory_pool_allocate_fn = AMD::hsa_amd_memory_pool_allocate;
  amd_ext_api.hsa_amd_memory_pool_free_fn = AMD::hsa_amd_memory_pool_free;
  amd_ext_api.hsa_amd_memory_async_copy_fn = AMD::hsa_amd_memory_async_copy;
  amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn = AMD::hsa_amd_memory_async_copy_on_engine;
  amd_ext_api.hsa_amd_memory_copy_engine_status_fn = AMD::hsa_amd_memory_copy_engine_status;
  amd_ext_api.hsa_amd_agent_memory_pool_get_info_fn = AMD::hsa_amd_agent_memory_pool_get_info;
  amd_ext_api.hsa_amd_agents_allow_access_fn = AMD::hsa_amd_agents_allow_access;
  amd_ext_api.hsa_amd_memory_pool_can_migrate_fn = AMD::hsa_amd_memory_pool_can_migrate;
  amd_ext_api.hsa_amd_memory_migrate_fn = AMD::hsa_amd_memory_migrate;
  amd_ext_api.hsa_amd_memory_lock_fn = AMD::hsa_amd_memory_lock;
  amd_ext_api.hsa_amd_memory_unlock_fn = AMD::hsa_amd_memory_unlock;
  amd_ext_api.hsa_amd_memory_fill_fn = AMD::hsa_amd_memory_fill;
  amd_ext_api.hsa_amd_interop_map_buffer_fn = AMD::hsa_amd_interop_map_buffer;
  amd_ext_api.hsa_amd_interop_unmap_buffer_fn = AMD::hsa_amd_interop_unmap_buffer;
  amd_ext_api.hsa_amd_pointer_info_fn = AMD::hsa_amd_pointer_info;
  amd_ext_api.hsa_amd_pointer_info_set_userdata_fn = AMD::hsa_amd_pointer_info_set_userdata;
  amd_ext_api.hsa_amd_ipc_memory_create_fn = AMD::hsa_amd_ipc_memory_create;
  amd_ext_api.hsa_amd_ipc_memory_attach_fn = AMD::hsa_amd_ipc_memory_attach;
  amd_ext_api.hsa_amd_ipc_memory_detach_fn = AMD::hsa_amd_ipc_memory_detach;
  amd_ext_api.hsa_amd_signal_create_fn = AMD::hsa_amd_signal_create;
  amd_ext_api.hsa_amd_ipc_signal_create_fn = AMD::hsa_amd_ipc_signal_create;
  amd_ext_api.hsa_amd_ipc_signal_attach_fn = AMD::hsa_amd_ipc_signal_attach;
  amd_ext_api.hsa_amd_register_system_event_handler_fn = AMD::hsa_amd_register_system_event_handler;
  amd_ext_api.hsa_amd_queue_intercept_create_fn = AMD::hsa_amd_queue_intercept_create;
  amd_ext_api.hsa_amd_queue_intercept_register_fn = AMD::hsa_amd_queue_intercept_register;
  amd_ext_api.hsa_amd_queue_set_priority_fn = AMD::hsa_amd_queue_set_priority;
  amd_ext_api.hsa_amd_memory_async_copy_rect_fn = AMD::hsa_amd_memory_async_copy_rect;
  amd_ext_api.hsa_amd_runtime_queue_create_register_fn = AMD::hsa_amd_runtime_queue_create_register;
  amd_ext_api.hsa_amd_memory_lock_to_pool_fn = AMD::hsa_amd_memory_lock_to_pool;
  amd_ext_api.hsa_amd_register_deallocation_callback_fn = AMD::hsa_amd_register_deallocation_callback;
  amd_ext_api.hsa_amd_deregister_deallocation_callback_fn = AMD::hsa_amd_deregister_deallocation_callback;
  amd_ext_api.hsa_amd_signal_value_pointer_fn = AMD::hsa_amd_signal_value_pointer;
  amd_ext_api.hsa_amd_svm_attributes_set_fn = AMD::hsa_amd_svm_attributes_set;
  amd_ext_api.hsa_amd_svm_attributes_get_fn = AMD::hsa_amd_svm_attributes_get;
  amd_ext_api.hsa_amd_svm_prefetch_async_fn = AMD::hsa_amd_svm_prefetch_async;
  amd_ext_api.hsa_amd_spm_acquire_fn = AMD::hsa_amd_spm_acquire;
  amd_ext_api.hsa_amd_spm_release_fn = AMD::hsa_amd_spm_release;
  amd_ext_api.hsa_amd_spm_set_dest_buffer_fn = AMD::hsa_amd_spm_set_dest_buffer;
  amd_ext_api.hsa_amd_portable_export_dmabuf_fn = AMD::hsa_amd_portable_export_dmabuf;
  amd_ext_api.hsa_amd_portable_close_dmabuf_fn = AMD::hsa_amd_portable_close_dmabuf;
  amd_ext_api.hsa_amd_vmem_address_reserve_fn = AMD::hsa_amd_vmem_address_reserve;
  amd_ext_api.hsa_amd_vmem_address_reserve_align_fn = AMD::hsa_amd_vmem_address_reserve_align;
  amd_ext_api.hsa_amd_vmem_address_free_fn = AMD::hsa_amd_vmem_address_free;
  amd_ext_api.hsa_amd_vmem_handle_create_fn = AMD::hsa_amd_vmem_handle_create;
  amd_ext_api.hsa_amd_vmem_handle_release_fn = AMD::hsa_amd_vmem_handle_release;
  amd_ext_api.hsa_amd_vmem_map_fn = AMD::hsa_amd_vmem_map;
  amd_ext_api.hsa_amd_vmem_unmap_fn = AMD::hsa_amd_vmem_unmap;
  amd_ext_api.hsa_amd_vmem_set_access_fn = AMD::hsa_amd_vmem_set_access;
  amd_ext_api.hsa_amd_vmem_get_access_fn = AMD::hsa_amd_vmem_get_access;
  amd_ext_api.hsa_amd_vmem_export_shareable_handle_fn = AMD::hsa_amd_vmem_export_shareable_handle;
  amd_ext_api.hsa_amd_vmem_import_shareable_handle_fn = AMD::hsa_amd_vmem_import_shareable_handle;
  amd_ext_api.hsa_amd_vmem_retain_alloc_handle_fn = AMD::hsa_amd_vmem_retain_alloc_handle;
  amd_ext_api.hsa_amd_vmem_get_alloc_properties_from_handle_fn =
      AMD::hsa_amd_vmem_get_alloc_properties_from_handle;
  amd_ext_api.hsa_amd_agent_set_async_scratch_limit_fn = AMD::hsa_amd_agent_set_async_scratch_limit;
  amd_ext_api.hsa_amd_queue_get_info_fn = AMD::hsa_amd_queue_get_info;
  amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging;
  amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all;
  amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine;
  amd_ext_api.hsa_amd_portable_export_dmabuf_v2_fn = AMD::hsa_amd_portable_export_dmabuf_v2;
}

void HsaApiTable::UpdateTools() {
  tools_api.version.major_id = HSA_TOOLS_API_TABLE_MAJOR_VERSION;
  tools_api.version.minor_id = sizeof(::ToolsApiTable);
  tools_api.version.step_id = HSA_TOOLS_API_TABLE_STEP_VERSION;

  tools_api.hsa_amd_tool_scratch_event_alloc_start_fn = nullptr;
  tools_api.hsa_amd_tool_scratch_event_alloc_end_fn = nullptr;
  tools_api.hsa_amd_tool_scratch_event_free_start_fn = nullptr;
  tools_api.hsa_amd_tool_scratch_event_free_end_fn = nullptr;
  tools_api.hsa_amd_tool_scratch_event_async_reclaim_start_fn = nullptr;
  tools_api.hsa_amd_tool_scratch_event_async_reclaim_end_fn = nullptr;
}

void LoadInitialHsaApiTable() {
  hsa_table_interface_init(&hsa_api_table().hsa_api);
}

}   //  namespace core
}   //  namespace rocr

class Init {
 public:
  Init() { rocr::core::LoadInitialHsaApiTable(); }
};
static Init LinkAtLoadOrFirstTranslationUnitAccess;


================================================
FILE: runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <algorithm>
#include <exception>
#include <map>
#include <memory>
#include <new>
#include <set>
#include <typeinfo>
#include <utility>
#include <vector>

#include "core/inc/agent.h"
#include "core/inc/amd_aie_agent.h"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/default_signal.h"
#include "core/inc/exceptions.h"
#include "core/inc/intercept_queue.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/ipc_signal.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"

namespace rocr {

template <class T>
struct ValidityError;
template <>
struct ValidityError<core::Signal*> {
  enum { value = HSA_STATUS_ERROR_INVALID_SIGNAL };
};

template <>
struct ValidityError<core::Agent*> {
  enum { value = HSA_STATUS_ERROR_INVALID_AGENT };
};

template <>
struct ValidityError<core::MemoryRegion*> {
  enum { value = HSA_STATUS_ERROR_INVALID_REGION };
};

template <>
struct ValidityError<AMD::MemoryRegion*> {
  enum { value = HSA_STATUS_ERROR_INVALID_REGION };
};

template <>
struct ValidityError<core::Queue*> {
  enum { value = HSA_STATUS_ERROR_INVALID_QUEUE };
};

template <class T>
struct ValidityError<const T*> {
  enum { value = ValidityError<T*>::value };
};

#define IS_TRUE(var)                                                                               \
  do {                                                                                             \
    if ((var) != true) return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                   \
  } while (false)

#define IS_BAD_PTR(ptr)                                          \
  do {                                                           \
    if ((ptr) == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \
  } while (false)

#define IS_ZERO(arg)                                                                               \
  do {                                                                                             \
    if ((arg) == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                      \
  } while (false)

#define IS_VALID_FD(fd)                                                                            \
  do {                                                                                             \
    if ((fd) < 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;                                        \
  } while (false)

#define IS_VALID(ptr)                                           \
  do {                                                          \
    if ((ptr) == NULL || !(ptr)->IsValid())                     \
      return hsa_status_t(ValidityError<decltype(ptr)>::value); \
  } while (false)

#define IS_NULL_OR_VALID(ptr)                                                                      \
  do {                                                                                             \
    if ((ptr) != NULL && !(ptr)->IsValid())                                                        \
      return hsa_status_t(ValidityError<decltype(ptr)>::value);                                    \
  } while (false)

#define CHECK_ALLOC(ptr)                                         \
  do {                                                           \
    if ((ptr) == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; \
  } while (false)

#define IS_OPEN()                                     \
  do {                                                \
    if (!core::Runtime::runtime_singleton_->IsOpen()) \
      return HSA_STATUS_ERROR_NOT_INITIALIZED;        \
  } while (false)

template <class T>
static __forceinline bool IsValid(T* ptr) {
  return (ptr == NULL) ? NULL : ptr->IsValid();
}

#define TRY try {
#define CATCH } catch(...) { return AMD::handleException(); }
#define CATCHRET(RETURN_TYPE) } catch(...) { return AMD::handleExceptionT<RETURN_TYPE>(); }

namespace AMD {

hsa_status_t handleException() {
  try {
    throw;
  } catch (const std::bad_alloc& e) {
    debug_print("HSA exception: BadAlloc\n");
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  } catch (const hsa_exception& e) {
    ifdebug {
      if (!strIsEmpty(e.what())) debug_print("HSA exception: %s\n", e.what());
    }
    return e.error_code();
  } catch (const std::exception& e) {
    debug_print("Unhandled exception: %s\n", e.what());
    assert(false && "Unhandled exception.");
    return HSA_STATUS_ERROR;
  } catch (const std::nested_exception& e) {
    debug_print("Callback threw, forwarding.\n");
    e.rethrow_nested();
    return HSA_STATUS_ERROR;
  } catch (...) {
    assert(false && "Unhandled exception.");
    abort();
    return HSA_STATUS_ERROR;
  }
}

template <class T> static __forceinline T handleExceptionT() {
  handleException();
  abort();
  return T();
}

hsa_status_t hsa_amd_coherency_get_type(hsa_agent_t agent_handle, hsa_amd_coherency_type_t* type) {
  TRY;
  IS_OPEN();

  const core::Agent* agent = core::Agent::Convert(agent_handle);

  IS_VALID(agent);

  IS_BAD_PTR(type);

  if (agent->device_type() != core::Agent::kAmdGpuDevice) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  const AMD::GpuAgentInt* gpu_agent =
      static_cast<const AMD::GpuAgentInt*>(agent);

  *type = gpu_agent->current_coherency_type();

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_coherency_set_type(hsa_agent_t agent_handle,
                                        hsa_amd_coherency_type_t type) {
  TRY;
  IS_OPEN();

  core::Agent* agent = core::Agent::Convert(agent_handle);

  IS_VALID(agent);

  if (type < HSA_AMD_COHERENCY_TYPE_COHERENT ||
      type > HSA_AMD_COHERENCY_TYPE_NONCOHERENT) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (agent->device_type() != core::Agent::kAmdGpuDevice) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  AMD::GpuAgent* gpu_agent = static_cast<AMD::GpuAgent*>(agent);

  if (!gpu_agent->current_coherency_type(type)) {
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count) {
  TRY;
  IS_OPEN();

  if ((ptr == nullptr) || (uintptr_t(ptr) % 4 != 0)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (count == 0) {
    return HSA_STATUS_SUCCESS;
  }

  return core::Runtime::runtime_singleton_->FillMemory(ptr, value, count);
  CATCH;
}

hsa_status_t hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle, const void* src,
                                       hsa_agent_t src_agent_handle, size_t size,
                                       uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
                                       hsa_signal_t completion_signal) {
  TRY;
  IS_BAD_PTR(dst);
  IS_BAD_PTR(src);

  if ((num_dep_signals == 0 && dep_signals != nullptr) ||
      (num_dep_signals > 0 && dep_signals == nullptr)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
  IS_VALID(dst_agent);

  core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
  IS_VALID(src_agent);

  std::vector<core::Signal*> dep_signal_list(num_dep_signals);
  if (num_dep_signals > 0) {
    for (size_t i = 0; i < num_dep_signals; ++i) {
      core::Signal* dep_signal_obj = core::Signal::Convert(dep_signals[i]);
      IS_VALID(dep_signal_obj);
      dep_signal_list[i] = dep_signal_obj;
    }
  }

  core::Signal* out_signal_obj = core::Signal::Convert(completion_signal);
  IS_VALID(out_signal_obj);

  bool rev_copy_dir = core::Runtime::runtime_singleton_->flag().rev_copy_dir();
  if (size > 0) {
    return core::Runtime::runtime_singleton_->CopyMemory(
        dst, (rev_copy_dir ? src_agent : dst_agent),
        src, (rev_copy_dir ? dst_agent : src_agent),
        size, dep_signal_list, *out_signal_obj);
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agent_handle,
                                       const void* src, hsa_agent_t src_agent_handle, size_t size,
                                       uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
                                       hsa_signal_t completion_signal,
                                       hsa_amd_sdma_engine_id_t engine_id,
                                       bool force_copy_on_sdma) {
  TRY;
  IS_BAD_PTR(dst);
  IS_BAD_PTR(src);

  if ((num_dep_signals == 0 && dep_signals != nullptr) ||
      (num_dep_signals > 0 && dep_signals == nullptr)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
  IS_VALID(dst_agent);

  core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
  IS_VALID(src_agent);

  std::vector<core::Signal*> dep_signal_list(num_dep_signals);
  if (num_dep_signals > 0) {
    for (size_t i = 0; i < num_dep_signals; ++i) {
      core::Signal* dep_signal_obj = core::Signal::Convert(dep_signals[i]);
      IS_VALID(dep_signal_obj);
      dep_signal_list[i] = dep_signal_obj;
    }
  }

  core::Signal* out_signal_obj = core::Signal::Convert(completion_signal);
  IS_VALID(out_signal_obj);

  bool rev_copy_dir = core::Runtime::runtime_singleton_->flag().rev_copy_dir();
  if (size > 0) {
    return core::Runtime::runtime_singleton_->CopyMemoryOnEngine(
        dst, (rev_copy_dir ? src_agent : dst_agent),
        src, (rev_copy_dir ? dst_agent : src_agent),
        size, dep_signal_list, *out_signal_obj, engine_id, force_copy_on_sdma);
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle,
                                               hsa_agent_t src_agent_handle,
                                               uint32_t *engine_ids_mask) {
  core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
  IS_VALID(dst_agent);

  core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
  IS_VALID(src_agent);

  return core::Runtime::runtime_singleton_->CopyMemoryStatus(dst_agent, src_agent,
                                                             engine_ids_mask);
}

hsa_status_t hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent_handle,
                                                      hsa_agent_t src_agent_handle,
                                                      uint32_t* recommended_ids_mask) {
  core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
  IS_VALID(dst_agent);

  core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
  IS_VALID(src_agent);

  return core::Runtime::runtime_singleton_->GetPreferredEngine(dst_agent, src_agent,
                                                               recommended_ids_mask);
}

hsa_status_t hsa_amd_memory_async_copy_rect(
    const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
    const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
    hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
    hsa_signal_t completion_signal) {
  TRY;
  if (dst == nullptr || src == nullptr || dst_offset == nullptr || src_offset == nullptr ||
      range == nullptr) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if ((num_dep_signals == 0 && dep_signals != NULL) ||
      (num_dep_signals > 0 && dep_signals == NULL)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (dir == hsaHostToHost) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  core::Agent* base_agent = core::Agent::Convert(copy_agent);
  IS_VALID(base_agent);
  if (base_agent->device_type() != core::Agent::DeviceType::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;
  AMD::GpuAgent* agent = static_cast<AMD::GpuAgent*>(base_agent);

  std::vector<core::Signal*> dep_signal_list(num_dep_signals);
  if (num_dep_signals > 0) {
    for (size_t i = 0; i < num_dep_signals; ++i) {
      core::Signal* dep_signal_obj = core::Signal::Convert(dep_signals[i]);
      IS_VALID(dep_signal_obj);
      dep_signal_list[i] = dep_signal_obj;
    }
  }

  core::Signal* out_signal_obj = core::Signal::Convert(completion_signal);
  IS_VALID(out_signal_obj);

  if ((range->x != 0) && (range->y != 0) && (range->z != 0)) {
    return agent->DmaCopyRect(dst, dst_offset, src, src_offset, range, dir, dep_signal_list,
                              *out_signal_obj);
  }

  return HSA_STATUS_SUCCESS;
  CATCH;
}


hsa_status_t hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable) {
  TRY;
  IS_OPEN();

  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);

  cmd_queue->SetProfiling(enable);

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_profiling_async_copy_enable(bool enable) {
  TRY;
  IS_OPEN();

  hsa_status_t ret = HSA_STATUS_SUCCESS;
  for (core::Agent* agent : core::Runtime::runtime_singleton_->gpu_agents()) {
    hsa_status_t err = agent->profiling_enabled(enable);
    if (err != HSA_STATUS_SUCCESS) ret = err;
  }

  for (core::Agent* agent : core::Runtime::runtime_singleton_->cpu_agents()) {
    hsa_status_t err = agent->profiling_enabled(enable);
    if (err != HSA_STATUS_SUCCESS) ret = err;
  }
  return ret;

  CATCH;
}

hsa_status_t hsa_amd_profiling_get_dispatch_time(
    hsa_agent_t agent_handle, hsa_signal_t hsa_signal,
    hsa_amd_profiling_dispatch_time_t* time) {
  TRY;
  IS_OPEN();

  IS_BAD_PTR(time);

  core::Agent* agent = core::Agent::Convert(agent_handle);

  IS_VALID(agent);

  core::Signal* signal = core::Signal::Convert(hsa_signal);

  IS_VALID(signal);

  if (agent->device_type() != core::Agent::kAmdGpuDevice) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(agent);

  // Translate timestamp from GPU to system domain.
  gpu_agent->TranslateTime(signal, *time);

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_profiling_get_async_copy_time(
    hsa_signal_t hsa_signal, hsa_amd_profiling_async_copy_time_t* time) {
  TRY;
  IS_OPEN();

  IS_BAD_PTR(time);

  core::Signal* signal = core::Signal::Convert(hsa_signal);

  IS_VALID(signal);

  core::Agent* agent = signal->async_copy_agent();

  if (agent == nullptr) {
    return HSA_STATUS_ERROR;
  }

  if (agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice) {
    // Translate timestamp from GPU to system domain.
    static_cast<AMD::GpuAgentInt*>(agent)->TranslateTime(signal, *time);
    return HSA_STATUS_SUCCESS;
  }

  // The timestamp is already in system domain.
  time->start = signal->signal_.start_ts;
  time->end = signal->signal_.end_ts;
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent_handle,
                                                             uint64_t agent_tick,
                                                             uint64_t* system_tick) {
  TRY;
  IS_OPEN();

  IS_BAD_PTR(system_tick);

  core::Agent* agent = core::Agent::Convert(agent_handle);

  IS_VALID(agent);

  if (agent->device_type() != core::Agent::kAmdGpuDevice) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(agent);

  *system_tick = gpu_agent->TranslateTime(agent_tick);

  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
                                   const hsa_agent_t* consumers, uint64_t attributes,
                                   hsa_signal_t* hsa_signal) {
  struct AgentHandleCompare {
    bool operator()(const hsa_agent_t& lhs, const hsa_agent_t& rhs) const {
      return lhs.handle < rhs.handle;
    }
  };

  TRY;
  IS_OPEN();
  IS_BAD_PTR(hsa_signal);

  core::Signal* ret;

  bool enable_ipc = attributes & HSA_AMD_SIGNAL_IPC;
  bool use_default =
      enable_ipc || (attributes & HSA_AMD_SIGNAL_AMD_GPU_ONLY) || (!core::g_use_interrupt_wait);

  if ((!use_default) && (num_consumers != 0)) {
    IS_BAD_PTR(consumers);

    // Check for duplicates in consumers.
    std::set<hsa_agent_t, AgentHandleCompare> consumer_set(consumers, consumers + num_consumers);
    if (consumer_set.size() != num_consumers) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    use_default = true;
    for (const core::Agent* cpu_agent : core::Runtime::runtime_singleton_->cpu_agents()) {
      use_default &= (consumer_set.find(cpu_agent->public_handle()) == consumer_set.end());
    }
  }

  if (use_default) {
    ret = new core::DefaultSignal(initial_value, enable_ipc);
  } else {
    ret = new core::InterruptSignal(initial_value);
  }

  *hsa_signal = core::Signal::Convert(ret);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t hsa_signal,
                                          volatile hsa_signal_value_t** value_ptr) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value_ptr);
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  IS_VALID(signal);

  if(!core::BusyWaitSignal::IsType(signal))
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  *value_ptr = (volatile hsa_signal_value_t*)&signal->signal_.value;
  return HSA_STATUS_SUCCESS;

  CATCH;
}

uint32_t hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* hsa_signals,
                                 hsa_signal_condition_t* conds, hsa_signal_value_t* values,
                                 uint64_t timeout_hint, hsa_wait_state_t wait_hint,
                                 hsa_signal_value_t* satisfying_values) {
  TRY;
  if (!core::Runtime::runtime_singleton_->IsOpen()) {
    throw AMD::hsa_exception(HSA_STATUS_ERROR_NOT_INITIALIZED, "hsa_amd_signal_wait_all called while not initialized");
  }

  // Treat NULL and invalid signals as already satisfied their condition and skip them
  std::vector<hsa_signal_t> valid_signals;
  std::vector<uint32_t> valid_signal_ids;
  for (uint32_t i = 0; i < signal_count; i++){
    if (hsa_signals[i].handle != 0 && core::SharedSignal::Convert(hsa_signals[i])->IsValid()){
      valid_signals.emplace_back(hsa_signals[i]);
      valid_signal_ids.emplace_back(i);
    }
  }

  // Return if there's no valid signal to wait on
  if (valid_signals.empty()){
    if (satisfying_values) {
      // Set 0 as satisfying value for NULL and invalid signals
      std::fill(satisfying_values, satisfying_values + signal_count, 0);
    }
    return uint32_t(0);
  }

  uint32_t valid_signal_count = valid_signals.size();

  std::vector<hsa_signal_value_t> satisfying_values_vec(valid_signal_count);
  uint32_t first_satysifying_signal_idx =
      core::Signal::WaitMultiple(valid_signal_count, valid_signals.data(), conds, values, timeout_hint, wait_hint,
                                 satisfying_values_vec, true);

  if (satisfying_values) {
    // Set 0 as satisfying value for NULL and invalid signals
    std::vector<hsa_signal_value_t> satisfying_values_vec_result(signal_count, 0);
    for (uint32_t i = 0; i < valid_signal_count; i++){
      satisfying_values_vec_result[valid_signal_ids[i]] = satisfying_values_vec[i];
    }
    std::copy(satisfying_values_vec_result.begin(), satisfying_values_vec_result.end(), satisfying_values);
  }

  return first_satysifying_signal_idx;
  CATCHRET(uint32_t);
}

uint32_t hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signals,
                                 hsa_signal_condition_t* conds, hsa_signal_value_t* values,
                                 uint64_t timeout_hint, hsa_wait_state_t wait_hint,
                                 hsa_signal_value_t* satisfying_value) {
  TRY;
  if (!core::Runtime::runtime_singleton_->IsOpen()) {
    throw AMD::hsa_exception(HSA_STATUS_ERROR_NOT_INITIALIZED, "hsa_amd_signal_wait_any called while not initialized");
  }

  // Ignore NULL and invalid signals
  std::vector<hsa_signal_t> valid_signals;
  std::vector<uint32_t> valid_signal_ids;
  for (uint32_t i = 0; i < signal_count; i++){
    if (hsa_signals[i].handle != 0 && core::SharedSignal::Convert(hsa_signals[i])->IsValid()){
      valid_signals.emplace_back(hsa_signals[i]);
      valid_signal_ids.emplace_back(i);
    }
  }

  // Return if there's no valid signal to wait on
  // satisfying_value is ignored
  if (valid_signals.empty()){
    return std::numeric_limits<uint32_t>::max();
  }

  std::vector<hsa_signal_value_t> satisfying_value_vec(1);
  uint32_t satisfying_signal_idx =
      core::Signal::WaitMultiple(valid_signals.size(), valid_signals.data(), conds, values, timeout_hint, wait_hint,
                                 satisfying_value_vec, false);

  //  Map back the index
  satisfying_signal_idx = valid_signal_ids[satisfying_signal_idx];

  if (satisfying_value) *satisfying_value = satisfying_value_vec.at(0);

  return satisfying_signal_idx;
  CATCHRET(uint32_t);
}

hsa_status_t hsa_amd_signal_async_handler(hsa_signal_t hsa_signal, hsa_signal_condition_t cond,
                                          hsa_signal_value_t value, hsa_amd_signal_handler handler,
                                          void* arg) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(handler);

  core::Signal* signal = core::Signal::Convert(hsa_signal);
  IS_VALID(signal);

  if ((core::g_use_interrupt_wait && (!core::InterruptSignal::IsType(signal)) &&
      !core::IPCSignal::IsType(signal)))
    return HSA_STATUS_ERROR_INVALID_SIGNAL;
  return core::Runtime::runtime_singleton_->SetAsyncSignalHandler(
      hsa_signal, cond, value, handler, arg);
  CATCH;
}

hsa_status_t hsa_amd_async_function(void (*callback)(void* arg), void* arg) {
  TRY;
  IS_OPEN();

  IS_BAD_PTR(callback);
  static const hsa_signal_t null_signal = {0};
  return core::Runtime::runtime_singleton_->SetAsyncSignalHandler(
      null_signal, HSA_SIGNAL_CONDITION_EQ, 0, (hsa_amd_signal_handler)callback,
      arg);
  CATCH;
}

hsa_status_t hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue, uint32_t num_cu_mask_count,
                                       const uint32_t* cu_mask) {
  TRY;
  IS_OPEN();

  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);
  if (num_cu_mask_count != 0) IS_BAD_PTR(cu_mask);
  if (num_cu_mask_count % 32 != 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  return cmd_queue->SetCUMasking(num_cu_mask_count, cu_mask);
  CATCH;
}

hsa_status_t hsa_amd_queue_cu_get_mask(const hsa_queue_t* queue, uint32_t num_cu_mask_count,
                                       uint32_t* cu_mask) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(cu_mask);

  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);
  if ((num_cu_mask_count == 0) || (num_cu_mask_count % 32 != 0))
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  return cmd_queue->GetCUMasking(num_cu_mask_count, cu_mask);
  CATCH;
}

hsa_status_t hsa_amd_memory_lock(void* host_ptr, size_t size,
                                 hsa_agent_t* agents, int num_agent,
                                 void** agent_ptr) {
  TRY;
  IS_OPEN();

  if (size == 0 || host_ptr == nullptr || agent_ptr == nullptr) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  *agent_ptr = nullptr;

  if ((agents != nullptr && num_agent == 0) || (agents == nullptr && num_agent != 0)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  // Check for APU
  if (core::Runtime::runtime_singleton_->system_regions_coarse().size() == 0) {
    assert(core::Runtime::runtime_singleton_->system_regions_fine()[0]->full_profile() &&
           "Missing coarse grain host memory on dGPU system.");
    *agent_ptr = host_ptr;
    return HSA_STATUS_SUCCESS;
  }

  const AMD::MemoryRegion* system_region = static_cast<const AMD::MemoryRegion*>(
      core::Runtime::runtime_singleton_->system_regions_coarse()[0]);

  return system_region->Lock(num_agent, agents, host_ptr, size, agent_ptr);
  CATCH;
}

hsa_status_t hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
                                         int num_agent, hsa_amd_memory_pool_t pool, uint32_t flags,
                                         void** agent_ptr) {
  TRY;
  IS_OPEN();

  if (size == 0 || host_ptr == nullptr || agent_ptr == nullptr || flags != 0) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  *agent_ptr = nullptr;

  if ((agents != nullptr && num_agent == 0) || (agents == nullptr && num_agent != 0)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_region_t region = {pool.handle};
  const AMD::MemoryRegion* mem_region = AMD::MemoryRegion::Convert(region);
  if (mem_region == nullptr) {
    return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
  }
  if (mem_region->owner()->device_type() != core::Agent::kAmdCpuDevice)
    return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;

  return mem_region->Lock(num_agent, agents, host_ptr, size, agent_ptr);
  CATCH;
}

hsa_status_t hsa_amd_memory_unlock(void* host_ptr) {
  TRY;
  IS_OPEN();

  const AMD::MemoryRegion* system_region =
      reinterpret_cast<const AMD::MemoryRegion*>(
          core::Runtime::runtime_singleton_->system_regions_fine()[0]);

  return system_region->Unlock(host_ptr);
  CATCH;
}

hsa_status_t hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
                                          hsa_amd_memory_pool_info_t attribute, void* value) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(value);

  hsa_region_t region = {memory_pool.handle};
  const AMD::MemoryRegion* mem_region = AMD::MemoryRegion::Convert(region);
  if (mem_region == NULL) {
    return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
  }

  return mem_region->GetPoolInfo(attribute, value);
  CATCH;
}

hsa_status_t hsa_amd_agent_iterate_memory_pools(
    hsa_agent_t agent_handle,
    hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data),
    void* data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);
  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);

  switch (agent->device_type()) {
  case core::Agent::kAmdCpuDevice:
    return reinterpret_cast<const AMD::CpuAgent *>(agent)->VisitRegion(
        false,
        reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
                                          void *data)>(callback),
        data);
  case core::Agent::kAmdAieDevice:
    return reinterpret_cast<const AMD::AieAgent *>(agent)->VisitRegion(
        false,
        reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
                                          void *data)>(callback),
        data);
  case core::Agent::kAmdGpuDevice:
    return reinterpret_cast<const AMD::GpuAgentInt *>(agent)->VisitRegion(
        false,
        reinterpret_cast<hsa_status_t (*)(hsa_region_t memory_pool,
                                          void *data)>(callback),
        data);
  default:
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  CATCH;
}

hsa_status_t hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size,
                                          uint32_t flags, void** ptr) {
  TRY;
  IS_OPEN();

  if (size == 0 || ptr == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_region_t region = {memory_pool.handle};
  const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region);

  if (mem_region == NULL || !mem_region->IsValid()) {
    return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
  }

  MemoryRegion::AllocateFlags alloc_flag = core::MemoryRegion::AllocateRestrict;

  if (flags & HSA_AMD_MEMORY_POOL_PCIE_FLAG)
    alloc_flag |= core::MemoryRegion::AllocatePCIeRW;

  if (flags & HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG)
    alloc_flag |= core::MemoryRegion::AllocateContiguous;

  if (flags & HSA_AMD_MEMORY_POOL_EXECUTABLE_FLAG)
    alloc_flag |= core::MemoryRegion::AllocateExecutable;

#ifdef SANITIZER_AMDGPU
  if (mem_region->owner()->device_type() == core::Agent::kAmdGpuDevice)
    alloc_flag |= core::MemoryRegion::AllocateAsan;
#endif

  return core::Runtime::runtime_singleton_->AllocateMemory(mem_region, size, alloc_flag, ptr);
  CATCH;
}

hsa_status_t hsa_amd_memory_pool_free(void* ptr) {
  return HSA::hsa_memory_free(ptr);
}

hsa_status_t hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents,
                                         const uint32_t* flags, const void* ptr) {
  TRY;
  IS_OPEN();

  if (num_agents == 0 || agents == NULL || flags != NULL || ptr == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return core::Runtime::runtime_singleton_->AllowAccess(num_agents, agents,
                                                        ptr);
  CATCH;
}

hsa_status_t hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool,
                                             hsa_amd_memory_pool_t dst_memory_pool, bool* result) {
  TRY;
  IS_OPEN();

  if (result == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_region_t src_region_handle = {src_memory_pool.handle};
  const AMD::MemoryRegion* src_mem_region =
      AMD::MemoryRegion::Convert(src_region_handle);

  if (src_mem_region == NULL || !src_mem_region->IsValid()) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
  }

  hsa_region_t dst_region_handle = {dst_memory_pool.handle};
  const AMD::MemoryRegion* dst_mem_region =
      AMD::MemoryRegion::Convert(dst_region_handle);

  if (dst_mem_region == NULL || !dst_mem_region->IsValid()) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
  }

  return src_mem_region->CanMigrate(*dst_mem_region, *result);
  CATCH;
}

hsa_status_t hsa_amd_memory_migrate(const void* ptr,
                                    hsa_amd_memory_pool_t memory_pool,
                                    uint32_t flags) {
  TRY;
  IS_OPEN();

  if (ptr == NULL || flags != 0) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_region_t dst_region_handle = {memory_pool.handle};
  const AMD::MemoryRegion* dst_mem_region =
      AMD::MemoryRegion::Convert(dst_region_handle);

  if (dst_mem_region == NULL || !dst_mem_region->IsValid()) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
  }

  return dst_mem_region->Migrate(flags, ptr);
  CATCH;
}

hsa_status_t hsa_amd_agent_memory_pool_get_info(
    hsa_agent_t agent_handle, hsa_amd_memory_pool_t memory_pool,
    hsa_amd_agent_memory_pool_info_t attribute, void* value) {
  TRY;
  IS_OPEN();

  if (value == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  const core::Agent* agent = core::Agent::Convert(agent_handle);
  IS_VALID(agent);

  hsa_region_t region_handle = {memory_pool.handle};
  const AMD::MemoryRegion* mem_region =
      AMD::MemoryRegion::Convert(region_handle);

  if (mem_region == NULL || !mem_region->IsValid()) {
    return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
  }

  return mem_region->GetAgentPoolInfo(*agent, attribute, value);
  CATCH;
}

hsa_status_t hsa_amd_interop_map_buffer(uint32_t num_agents,
                                        hsa_agent_t* agents, int interop_handle,
                                        uint32_t flags, size_t* size,
                                        void** ptr, size_t* metadata_size,
                                        const void** metadata) {
  static const int tinyArraySize=8;
  TRY;
  IS_OPEN();
  IS_BAD_PTR(agents);
  IS_BAD_PTR(size);
  IS_BAD_PTR(ptr);
  if (flags != 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  if (num_agents == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  core::Agent* short_agents[tinyArraySize];
  core::Agent** core_agents = short_agents;
  if (num_agents > tinyArraySize) {
    core_agents = new core::Agent* [num_agents];
    if (core_agents == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  MAKE_SCOPE_GUARD([&]() {
    if (num_agents > tinyArraySize) delete[] core_agents;
  });

  for (uint32_t i = 0; i < num_agents; i++) {
    core::Agent* device = core::Agent::Convert(agents[i]);
    IS_VALID(device);
    core_agents[i] = device;
  }

  auto ret = core::Runtime::runtime_singleton_->InteropMap(
      num_agents, core_agents, interop_handle, flags, size, ptr, metadata_size,
      metadata);

  return ret;
  CATCH;
}

hsa_status_t hsa_amd_interop_unmap_buffer(void* ptr) {
  TRY;
  IS_OPEN();
  if (ptr != NULL) core::Runtime::runtime_singleton_->InteropUnmap(ptr);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_pointer_info(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
                                  uint32_t* num_accessible, hsa_agent_t** accessible) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(ptr);
  IS_BAD_PTR(info);
  return core::Runtime::runtime_singleton_->PtrInfo(ptr, info, alloc, num_accessible, accessible);
  CATCH;
}

hsa_status_t hsa_amd_pointer_info_set_userdata(const void* ptr, void* userdata) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(ptr);
  return core::Runtime::runtime_singleton_->SetPtrInfoData(ptr, userdata);
  CATCH;
}

hsa_status_t hsa_amd_ipc_memory_create(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(ptr);
  IS_BAD_PTR(handle);
  return core::Runtime::runtime_singleton_->IPCCreate(ptr, len, handle);
  CATCH;
}

hsa_status_t hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t* ipc, size_t len,
                                       uint32_t num_agents, const hsa_agent_t* mapping_agents,
                                       void** mapped_ptr) {
  static const int tinyArraySize = 8;
  TRY;
  IS_OPEN();
  IS_BAD_PTR(mapped_ptr);
  if (num_agents != 0) IS_BAD_PTR(mapping_agents);

  core::Agent** core_agents = nullptr;
  if (num_agents > tinyArraySize)
    core_agents = new core::Agent*[num_agents];
  else
    core_agents = (core::Agent**)alloca(sizeof(core::Agent*) * num_agents);
  if (core_agents == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  MAKE_SCOPE_GUARD([&]() {
    if (num_agents > tinyArraySize) delete[] core_agents;
  });

  for (uint32_t i = 0; i < num_agents; i++) {
    core::Agent* device = core::Agent::Convert(mapping_agents[i]);
    IS_VALID(device);
    core_agents[i] = device;
  }

  return core::Runtime::runtime_singleton_->IPCAttach(ipc, len, num_agents, core_agents,
                                                      mapped_ptr);
  CATCH;
}

hsa_status_t hsa_amd_ipc_memory_detach(void* mapped_ptr) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(mapped_ptr);
  return core::Runtime::runtime_singleton_->IPCDetach(mapped_ptr);
  CATCH;
}

hsa_status_t hsa_amd_ipc_signal_create(hsa_signal_t hsa_signal, hsa_amd_ipc_signal_t* handle) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(handle);
  core::Signal* signal = core::Signal::Convert(hsa_signal);
  IS_VALID(signal);
  core::IPCSignal::CreateHandle(signal, handle);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle,
                                       hsa_signal_t* hsa_signal) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(handle);
  IS_BAD_PTR(hsa_signal);
  core::Signal* signal = core::IPCSignal::Attach(handle);
  *hsa_signal = core::Signal::Convert(signal);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

// For use by tools only - not in library export table.
hsa_status_t hsa_amd_queue_intercept_create(
    hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
    void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
    uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(queue);

  // A wrapped queue for the intercept queue must have at least 3 slots so
  // there is space for a packet, a new retry barrier packet, and an existing
  // retry packet that is in the process of being processed.
  if (size < 3) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  hsa_queue_t* lower_queue;
  hsa_status_t err = HSA::hsa_queue_create(agent_handle, size, type, callback, data,
                                           private_segment_size, group_segment_size, &lower_queue);
  if (err != HSA_STATUS_SUCCESS) return err;
  std::unique_ptr<core::Queue> lowerQueue(core::Queue::Convert(lower_queue));

  std::unique_ptr<core::InterceptQueue> upperQueue(new core::InterceptQueue(std::move(lowerQueue)));

  *queue = core::Queue::Convert(upperQueue.release());
  return HSA_STATUS_SUCCESS;
  CATCH;
}

// For use by tools only - not in library export table.
hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
                                              hsa_amd_queue_intercept_handler callback,
                                              void* user_data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(callback);
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);
  if (!core::InterceptQueue::IsType(cmd_queue)) return HSA_STATUS_ERROR_INVALID_QUEUE;
  core::InterceptQueue* iQueue = static_cast<core::InterceptQueue*>(cmd_queue);
  iQueue->AddInterceptor(callback, user_data);
  return HSA_STATUS_SUCCESS;
  CATCH;
}

hsa_status_t hsa_amd_register_system_event_handler(hsa_amd_system_event_callback_t callback,
                                                   void* data) {
  TRY;
  IS_OPEN();
  return core::Runtime::runtime_singleton_->SetCustomSystemEventHandler(callback, data);
  CATCH;
}

hsa_status_t hsa_amd_queue_set_priority(hsa_queue_t* queue,
                                                hsa_amd_queue_priority_t priority) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(queue);
  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);

  // Highest queue priority allowed for HSA user is HSA_QUEUE_PRIORITY_HIGH
  // HSA_QUEUE_PRIORITY_MAXIMUM is reserved for PC Sampling and can only be allocated internally
  // in ROCR
  static std::map<hsa_amd_queue_priority_t, HSA_QUEUE_PRIORITY> ext_kmt_priomap = {
      {HSA_AMD_QUEUE_PRIORITY_LOW, HSA_QUEUE_PRIORITY_MINIMUM},
      {HSA_AMD_QUEUE_PRIORITY_NORMAL, HSA_QUEUE_PRIORITY_NORMAL},
      {HSA_AMD_QUEUE_PRIORITY_HIGH, HSA_QUEUE_PRIORITY_HIGH},
  };

  auto priority_it = ext_kmt_priomap.find(priority);

  if (priority_it == ext_kmt_priomap.end()) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return cmd_queue->SetPriority(priority_it->second);
  CATCH;
}

hsa_status_t hsa_amd_register_deallocation_callback(void* ptr,
                                                    hsa_amd_deallocation_callback_t callback,
                                                    void* user_data) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(ptr);
  IS_BAD_PTR(callback);

  return core::Runtime::runtime_singleton_->RegisterReleaseNotifier(ptr, callback, user_data);

  CATCH;
}

hsa_status_t hsa_amd_deregister_deallocation_callback(void* ptr,
                                                      hsa_amd_deallocation_callback_t callback) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(ptr);
  IS_BAD_PTR(callback);

  return core::Runtime::runtime_singleton_->DeregisterReleaseNotifier(ptr, callback);

  CATCH;
}

// For use by tools only - not in library export table.
hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
                                                   void* user_data) {
  TRY;
  IS_OPEN();
  return core::Runtime::runtime_singleton_->SetInternalQueueCreateNotifier(callback, user_data);
  CATCH;
}

hsa_status_t hsa_amd_svm_attributes_set(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count) {
  TRY;
  IS_OPEN();
  return core::Runtime::runtime_singleton_->SetSvmAttrib(ptr, size, attribute_list,
                                                         attribute_count);
  CATCH;
}

hsa_status_t hsa_amd_svm_attributes_get(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count) {
  TRY;
  IS_OPEN();
  return core::Runtime::runtime_singleton_->GetSvmAttrib(ptr, size, attribute_list,
                                                         attribute_count);
  CATCH;
}

hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
                                        uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
                                        hsa_signal_t completion_signal) {
  TRY;
  IS_OPEN();
  // Validate inputs.
  // if (core::g_use_interrupt_wait && (!core::InterruptSignal::IsType(signal)))
  return core::Runtime::runtime_singleton_->SvmPrefetch(ptr, size, agent, num_dep_signals,
                                                        dep_signals, completion_signal);
  CATCH;
}

hsa_status_t hsa_amd_spm_acquire(hsa_agent_t preferred_agent) {
  TRY;
  IS_OPEN();
  const core::Agent* agent = core::Agent::Convert(preferred_agent);
  // Currently, the SPM API is only supported for GPU agents.
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  return agent->driver().SPMAcquire(agent->node_id());

  CATCH;
}

hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent) {
  TRY;
  IS_OPEN();

  const core::Agent* agent = core::Agent::Convert(preferred_agent);
  // Currently, the SPM API is only supported for GPU agents.
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  return agent->driver().SPMRelease(agent->node_id());

  CATCH;
}

hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes,
                                         uint32_t* timeout, uint32_t* size_copied, void* dest,
                                         bool* is_data_loss) {
  TRY;
  IS_OPEN();

  const core::Agent* agent = core::Agent::Convert(preferred_agent);
  // Currently, the SPM API is only supported for GPU agents.
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  return agent->driver().SPMSetDestBuffer(agent->node_id(), size_in_bytes, timeout, size_copied,
                                          dest, is_data_loss);
  CATCH;
}

hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
  uint64_t* offset) {
TRY;
IS_OPEN();
IS_BAD_PTR(ptr);
IS_BAD_PTR(dmabuf);
IS_BAD_PTR(offset);
IS_ZERO(size);
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size, dmabuf,
                                    offset, HSA_AMD_DMABUF_MAPPING_TYPE_NONE);
CATCH;
}

hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size,
                              int* dmabuf, uint64_t* offset, uint64_t flags) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(ptr);
  IS_BAD_PTR(dmabuf);
  IS_BAD_PTR(offset);
  IS_ZERO(size);
  return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size,
                                                      dmabuf, offset, flags);
  CATCH;
}

hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf) {
  TRY;
  return core::Runtime::runtime_singleton_->DmaBufClose(dmabuf);
  CATCH;
}

hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t address,
                                          uint64_t flags) {
  TRY;
  IS_OPEN();
  IS_ZERO(size);

  if (!(flags & HSA_AMD_VMEM_ADDRESS_NO_REGISTER))
    IS_TRUE(core::Runtime::runtime_singleton_->VirtualMemApiSupported());

  return core::Runtime::runtime_singleton_->VMemoryAddressReserve(va, size, address, 0, flags);
  CATCH;
}

hsa_status_t hsa_amd_vmem_address_reserve_align(void** va, size_t size, uint64_t address,
                                          uint64_t alignment, uint64_t flags) {
  TRY;
  IS_OPEN();
  IS_ZERO(size);
  IS_TRUE(core::Runtime::runtime_singleton_->VirtualMemApiSupported());
  return core::Runtime::runtime_singleton_->VMemoryAddressReserve(va, size, address, alignment, flags);
  CATCH;
}


hsa_status_t hsa_amd_vmem_address_free(void* va, size_t size) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(va);
  IS_ZERO(size);
  return core::Runtime::runtime_singleton_->VMemoryAddressFree(va, size);
  CATCH;
}

hsa_status_t hsa_amd_vmem_handle_create(hsa_amd_memory_pool_t memory_pool, size_t size,
                                        hsa_amd_memory_type_t type, uint64_t flags,
                                        hsa_amd_vmem_alloc_handle_t* memory_handle) {
  TRY;
  IS_OPEN();
  IS_ZERO(size);
  IS_TRUE(core::Runtime::runtime_singleton_->VirtualMemApiSupported());

  if (type != MEMORY_TYPE_NONE && type != MEMORY_TYPE_PINNED)
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  hsa_region_t region = {memory_pool.handle};
  const core::MemoryRegion* mem_region = core::MemoryRegion::Convert(region);

  if (mem_region == NULL || !mem_region->IsValid()) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  MemoryRegion::AllocateFlags alloc_flag = core::MemoryRegion::AllocateMemoryOnly;
  if (type == MEMORY_TYPE_PINNED) alloc_flag |= core::MemoryRegion::AllocatePinned;

  return core::Runtime::runtime_singleton_->VMemoryHandleCreate(mem_region, size, alloc_flag, flags,
                                                                memory_handle);
  CATCH;
}

hsa_status_t hsa_amd_vmem_handle_release(hsa_amd_vmem_alloc_handle_t memory_handle) {
  TRY;
  IS_OPEN();
  return core::Runtime::runtime_singleton_->VMemoryHandleRelease(memory_handle);
  CATCH;
}

hsa_status_t hsa_amd_vmem_map(void* va, size_t size, size_t in_offset,
                              hsa_amd_vmem_alloc_handle_t memory_handle, uint64_t flags) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(va);
  IS_ZERO(size);

  return core::Runtime::runtime_singleton_->VMemoryHandleMap(va, size, in_offset, memory_handle,
                                                             flags);
  CATCH;
}

hsa_status_t hsa_amd_vmem_unmap(void* va, size_t size) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(va);
  IS_ZERO(size);

  return core::Runtime::runtime_singleton_->VMemoryHandleUnmap(va, size);
  CATCH;
}

hsa_status_t hsa_amd_vmem_set_access(void* va, size_t size,
                                     const hsa_amd_memory_access_desc_t* desc,
                                     size_t desc_cnt) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(va);
  IS_ZERO(size);
  IS_BAD_PTR(desc);
  IS_ZERO(desc_cnt);

  return core::Runtime::runtime_singleton_->VMemorySetAccess(va, size, desc, desc_cnt);
  CATCH;
}

hsa_status_t hsa_amd_vmem_get_access(void* va, hsa_access_permission_t* perms,
                                     hsa_agent_t agent_handle) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(va);
  IS_BAD_PTR(perms);

  return core::Runtime::runtime_singleton_->VMemoryGetAccess(va, perms, agent_handle);
  CATCH;
}

hsa_status_t hsa_amd_vmem_export_shareable_handle(int* dmabuf_fd,
                                                  hsa_amd_vmem_alloc_handle_t handle,
                                                  uint64_t flags) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(dmabuf_fd);

  return core::Runtime::runtime_singleton_->VMemoryExportShareableHandle(dmabuf_fd, handle, flags);
  CATCH;
}

hsa_status_t hsa_amd_vmem_import_shareable_handle(int dmabuf_fd,
                                                  hsa_amd_vmem_alloc_handle_t* handle) {
  TRY;
  IS_BAD_PTR(handle);
  IS_VALID_FD(dmabuf_fd);

  return core::Runtime::runtime_singleton_->VMemoryImportShareableHandle(dmabuf_fd, handle);
  CATCH;
}

hsa_status_t hsa_amd_vmem_retain_alloc_handle(hsa_amd_vmem_alloc_handle_t* allocHandle,
                                              void* addr) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(addr);

  return core::Runtime::runtime_singleton_->VMemoryRetainAllocHandle(allocHandle, addr);
  CATCH;
}

hsa_status_t hsa_amd_vmem_get_alloc_properties_from_handle(hsa_amd_vmem_alloc_handle_t allocHandle,
                                                           hsa_amd_memory_pool_t* pool,
                                                           hsa_amd_memory_type_t* type) {
  TRY;
  IS_OPEN();
  IS_BAD_PTR(pool);
  IS_BAD_PTR(type);

  const core::MemoryRegion* mem_region = NULL;
  hsa_status_t ret = core::Runtime::runtime_singleton_->VMemoryGetAllocPropertiesFromHandle(
      allocHandle, &mem_region, type);
  if (ret == HSA_STATUS_SUCCESS) {
    hsa_region_t region = core::MemoryRegion::Convert(mem_region);
    pool->handle = region.handle;
  }

  return ret;
  CATCH;
}

hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t _agent, size_t threshold) {
  TRY;
  IS_OPEN();

  core::Agent* agent = core::Agent::Convert(_agent);
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(agent);

  if (!core::Runtime::runtime_singleton_->flag().enable_scratch_async_reclaim() ||
      !gpu_agent->AsyncScratchReclaimEnabled())
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  return gpu_agent->SetAsyncScratchThresholds(threshold);
  CATCH;
}

hsa_status_t HSA_API hsa_amd_queue_get_info(hsa_queue_t* _queue,
                                            hsa_queue_info_attribute_t attribute, void* value) {
  TRY;
  IS_OPEN();

  core::Queue* queue = core::Queue::Convert(_queue);
  IS_VALID(queue);

  return queue->GetInfo(attribute, value);
  CATCH;
}

hsa_status_t hsa_amd_enable_logging(uint8_t* flags, void *file) {
  TRY;
  return core::Runtime::runtime_singleton_->EnableLogging(flags, file);
  CATCH;
}

}   //  namespace amd
}   //  namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "image/inc/hsa_ext_image_impl.h"
#include "pcs/inc/hsa_ven_amd_pc_sampling_impl.h"
#include "core/inc/hsa_ext_interface.h"
#include "core/inc/runtime.h"

#include <string>

namespace rocr {
// Implementations for missing / unsupported extensions
template <class R, class... ARGS> static R hsa_ext_null(ARGS...) {
  return HSA_STATUS_ERROR_NOT_INITIALIZED;
}

namespace core {
ExtensionEntryPoints::ExtensionEntryPoints() {
  InitFinalizerExtTable();
  InitImageExtTable();
  InitPcSamplingExtTable();
  InitAmdExtTable();
}

// Initialize Finalizer function table to be NULLs
void ExtensionEntryPoints::InitFinalizerExtTable() {
  
  // Initialize Version of Api Table
  finalizer_api.version.major_id = 0x00;
  finalizer_api.version.minor_id = 0x00;
  finalizer_api.version.step_id = 0x00;

  finalizer_api.hsa_ext_program_create_fn = hsa_ext_null;
  finalizer_api.hsa_ext_program_destroy_fn = hsa_ext_null;
  finalizer_api.hsa_ext_program_add_module_fn = hsa_ext_null;
  finalizer_api.hsa_ext_program_iterate_modules_fn = hsa_ext_null;
  finalizer_api.hsa_ext_program_get_info_fn = hsa_ext_null;
  finalizer_api.hsa_ext_program_finalize_fn = hsa_ext_null;
}

// Initialize Image function table to be NULLs
void ExtensionEntryPoints::InitImageExtTable() {
 
  // Initialize Version of Api Table
  image_api.version.major_id = 0x00;
  image_api.version.minor_id = 0x00;
  image_api.version.step_id = 0x00;

  image_api.hsa_ext_image_get_capability_fn = hsa_ext_null;
  image_api.hsa_ext_image_data_get_info_fn = hsa_ext_null;
  image_api.hsa_ext_image_create_fn = hsa_ext_null;
  image_api.hsa_ext_image_import_fn = hsa_ext_null;
  image_api.hsa_ext_image_export_fn = hsa_ext_null;
  image_api.hsa_ext_image_copy_fn = hsa_ext_null;
  image_api.hsa_ext_image_clear_fn = hsa_ext_null;
  image_api.hsa_ext_image_destroy_fn = hsa_ext_null;
  image_api.hsa_ext_sampler_create_fn = hsa_ext_null;
  image_api.hsa_ext_sampler_destroy_fn = hsa_ext_null;
  image_api.hsa_amd_image_get_info_max_dim_fn = hsa_ext_null;
  image_api.hsa_ext_image_get_capability_with_layout_fn = hsa_ext_null;
  image_api.hsa_ext_image_data_get_info_with_layout_fn = hsa_ext_null;
  image_api.hsa_ext_image_create_with_layout_fn = hsa_ext_null;
}

// Initialize PC Sampling function table to be NULLs
void ExtensionEntryPoints::InitPcSamplingExtTable() {
  // Initialize Version of Api Table
  pcs_api.version.major_id = 0x00;
  pcs_api.version.minor_id = 0x00;
  pcs_api.version.step_id = 0x00;

  pcs_api.hsa_ven_amd_pcs_iterate_configuration_fn = hsa_ext_null;
  pcs_api.hsa_ven_amd_pcs_create_fn = hsa_ext_null;
  pcs_api.hsa_ven_amd_pcs_create_from_id_fn = hsa_ext_null;
  pcs_api.hsa_ven_amd_pcs_destroy_fn = hsa_ext_null;
  pcs_api.hsa_ven_amd_pcs_start_fn = hsa_ext_null;
  pcs_api.hsa_ven_amd_pcs_stop_fn = hsa_ext_null;
  pcs_api.hsa_ven_amd_pcs_flush_fn = hsa_ext_null;
}

// Initialize Amd Ext table for Api related to Images
void ExtensionEntryPoints::InitAmdExtTable() {
  hsa_api_table().amd_ext_api.hsa_amd_image_create_fn = hsa_ext_null;
  hsa_internal_api_table().amd_ext_api.hsa_amd_image_create_fn = hsa_ext_null;
}

// Update Amd Ext table for Api related to Images.
// @note: Interface should be updated when Amd Ext table
// begins hosting Api's from other extension libraries
void ExtensionEntryPoints::UpdateAmdExtTable(decltype(::hsa_amd_image_create)* func_ptr) {
  assert(hsa_api_table().amd_ext_api.hsa_amd_image_create_fn ==
             (decltype(hsa_amd_image_create)*)hsa_ext_null && 
             "Duplicate load of extension import.");
  assert(hsa_internal_api_table().amd_ext_api.hsa_amd_image_create_fn ==
             (decltype(hsa_amd_image_create)*)hsa_ext_null && 
             "Duplicate load of extension import.");
  hsa_api_table().amd_ext_api.hsa_amd_image_create_fn = func_ptr;
  hsa_internal_api_table().amd_ext_api.hsa_amd_image_create_fn = func_ptr;
}

void ExtensionEntryPoints::UnloadImage() {
  InitAmdExtTable();
  InitImageExtTable();
  core::hsa_internal_api_table().Reset();
#ifdef HSA_IMAGE_SUPPORT
  rocr::image::ReleaseImageRsrcs();
#endif
}

void ExtensionEntryPoints::Unload() {
  // Reset Image apis to hsa_ext_null function
  UnloadImage();
#ifdef HSA_PC_SAMPLING_SUPPORT
  rocr::pcs::ReleasePcSamplingRsrcs();
#endif

  for (auto lib : libs_) {
    void* ptr = os::GetExportAddress(lib, "Unload");
    if (ptr) {
      ((Unload_t)ptr)();
    }
  }
  // Due to valgrind bug, runtime cannot dlclose extensions see:
  // http://valgrind.org/docs/manual/faq.html#faq.unhelpful
  if (!core::Runtime::runtime_singleton_->flag().running_valgrind()) {
    for (auto lib : libs_) {
      os::CloseLib(lib);
    }
  }
  libs_.clear();

  InitFinalizerExtTable();
  InitPcSamplingExtTable();
  InitImageExtTable();
  InitAmdExtTable();
  core::hsa_internal_api_table().Reset();
}

bool ExtensionEntryPoints::LoadImage() {
#ifdef HSA_IMAGE_SUPPORT
  // Consult user input on linking to Image implementation
  bool disable_image = core::Runtime::runtime_singleton_->flag().disable_image();
  if (disable_image) {
    return true;
  }

  // Bind to Image implementation api's
  decltype(::hsa_amd_image_create)* func;
  rocr::image::LoadImage(&image_api, &func);

  // Initialize Version of Api Table
  image_api.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
  image_api.version.minor_id = sizeof(ImageExtTable);
  image_api.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;

  // Update private copy of Api table with handle for Image extensions
  hsa_internal_api_table().CloneExts(&image_api,
                                    core::HsaApiTable::HSA_EXT_IMAGE_API_TABLE_ID);

  // Update Amd Ext Api table Api that deals with Images
  UpdateAmdExtTable(func);
#endif
  return true;
}

void ExtensionEntryPoints::LoadPcSampling() {
#ifdef HSA_PC_SAMPLING_SUPPORT
  if (core::Runtime::runtime_singleton_->flag().disable_pc_sampling()) return;

  // Bind to Image implementation api's
  rocr::pcs::LoadPcSampling(&pcs_api);

  // Initialize Version of Api Table
  pcs_api.version.major_id = HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION;
  pcs_api.version.minor_id = sizeof(PcSamplingExtTable);
  pcs_api.version.step_id = HSA_PC_SAMPLING_API_TABLE_STEP_VERSION;

  // Update private copy of Api table with handle for Image extensions
  hsa_internal_api_table().CloneExts(&pcs_api,
                        core::HsaApiTable::HSA_EXT_PC_SAMPLING_API_TABLE_ID);
#endif
}

bool ExtensionEntryPoints::LoadFinalizer(std::string library_name) {
  os::LibHandle lib = os::LoadLib(library_name);
  if (lib == NULL) {
    return false;
  }
  libs_.push_back(lib);
  
  void* ptr;

  ptr = os::GetExportAddress(lib, "hsa_ext_program_create_impl");
  if (ptr != NULL) {
    assert(finalizer_api.hsa_ext_program_create_fn ==
               (decltype(::hsa_ext_program_create)*)hsa_ext_null &&
           "Duplicate load of extension import.");
    finalizer_api.hsa_ext_program_create_fn = (decltype(::hsa_ext_program_create)*)ptr;
  }

  ptr = os::GetExportAddress(lib, "hsa_ext_program_destroy_impl");
  if (ptr != NULL) {
    assert(finalizer_api.hsa_ext_program_destroy_fn ==
               (decltype(::hsa_ext_program_destroy)*)hsa_ext_null &&
           "Duplicate load of extension import.");
    finalizer_api.hsa_ext_program_destroy_fn =
        (decltype(::hsa_ext_program_destroy)*)ptr;
  }

  ptr = os::GetExportAddress(lib, "hsa_ext_program_add_module_impl");
  if (ptr != NULL) {
    assert(finalizer_api.hsa_ext_program_add_module_fn ==
               (decltype(::hsa_ext_program_add_module)*)hsa_ext_null &&
           "Duplicate load of extension import.");
    finalizer_api.hsa_ext_program_add_module_fn =
        (decltype(::hsa_ext_program_add_module)*)ptr;
  }

  ptr = os::GetExportAddress(lib, "hsa_ext_program_iterate_modules_impl");
  if (ptr != NULL) {
    assert(finalizer_api.hsa_ext_program_iterate_modules_fn ==
               (decltype(::hsa_ext_program_iterate_modules)*)hsa_ext_null &&
           "Duplicate load of extension import.");
    finalizer_api.hsa_ext_program_iterate_modules_fn =
        (decltype(::hsa_ext_program_iterate_modules)*)ptr;
  }

  ptr = os::GetExportAddress(lib, "hsa_ext_program_get_info_impl");
  if (ptr != NULL) {
    assert(finalizer_api.hsa_ext_program_get_info_fn ==
               (decltype(::hsa_ext_program_get_info)*)hsa_ext_null &&
           "Duplicate load of extension import.");
    finalizer_api.hsa_ext_program_get_info_fn =
        (decltype(::hsa_ext_program_get_info)*)ptr;
  }

  ptr = os::GetExportAddress(lib, "hsa_ext_program_finalize_impl");
  if (ptr != NULL) {
    assert(finalizer_api.hsa_ext_program_finalize_fn ==
               (decltype(::hsa_ext_program_finalize)*)hsa_ext_null &&
           "Duplicate load of extension import.");
    finalizer_api.hsa_ext_program_finalize_fn =
        (decltype(::hsa_ext_program_finalize)*)ptr;
  }
  
  // Initialize Version of Api Table
  finalizer_api.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
  finalizer_api.version.minor_id = sizeof(::FinalizerExtTable);
  finalizer_api.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
 
  // Update handle of table of HSA extensions
  hsa_internal_api_table().CloneExts(&finalizer_api,
                                    core::HsaApiTable::HSA_EXT_FINALIZER_API_TABLE_ID);

  ptr = os::GetExportAddress(lib, "Load");
  if (ptr != NULL) {
    ((Load_t)ptr)(&core::hsa_internal_api_table().hsa_api);
  }

  return true;
}

}  // namespace core
}  // namespace rocr

//---------------------------------------------------------------------------//
//   Exported extension stub functions
//---------------------------------------------------------------------------//

hsa_status_t hsa_ext_program_create(
    hsa_machine_model_t machine_model, hsa_profile_t profile,
    hsa_default_float_rounding_mode_t default_float_rounding_mode,
    const char* options, hsa_ext_program_t* program) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.finalizer_api
      .hsa_ext_program_create_fn(machine_model, profile,
                                 default_float_rounding_mode, options, program);
}

hsa_status_t hsa_ext_program_destroy(hsa_ext_program_t program) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.finalizer_api
      .hsa_ext_program_destroy_fn(program);
}

hsa_status_t hsa_ext_program_add_module(hsa_ext_program_t program,
                                        hsa_ext_module_t module) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.finalizer_api
      .hsa_ext_program_add_module_fn(program, module);
}

hsa_status_t hsa_ext_program_iterate_modules(
    hsa_ext_program_t program,
    hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
                             void* data),
    void* data) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.finalizer_api
      .hsa_ext_program_iterate_modules_fn(program, callback, data);
}

hsa_status_t hsa_ext_program_get_info(hsa_ext_program_t program,
                                      hsa_ext_program_info_t attribute,
                                      void* value) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.finalizer_api
      .hsa_ext_program_get_info_fn(program, attribute, value);
}

hsa_status_t hsa_ext_program_finalize(
    hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
    hsa_ext_control_directives_t control_directives, const char* options,
    hsa_code_object_type_t code_object_type, hsa_code_object_t* code_object) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.finalizer_api
      .hsa_ext_program_finalize_fn(program, isa, call_convention,
                                   control_directives, options,
                                   code_object_type, code_object);
}

hsa_status_t hsa_ext_image_get_capability(
    hsa_agent_t agent, hsa_ext_image_geometry_t geometry,
    const hsa_ext_image_format_t* image_format, uint32_t* capability_mask) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_get_capability_fn(agent, geometry, image_format,
                                       capability_mask);
}

hsa_status_t hsa_ext_image_data_get_info(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_info_t* image_data_info) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_data_get_info_fn(agent, image_descriptor,
                                      access_permission, image_data_info);
}

hsa_status_t hsa_ext_image_create(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
    const void* image_data, hsa_access_permission_t access_permission,
    hsa_ext_image_t* image) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_create_fn(agent, image_descriptor, image_data,
                               access_permission, image);
}

hsa_status_t hsa_ext_image_import(hsa_agent_t agent, const void* src_memory,
                                  size_t src_row_pitch, size_t src_slice_pitch,
                                  hsa_ext_image_t dst_image,
                                  const hsa_ext_image_region_t* image_region) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_import_fn(agent, src_memory, src_row_pitch,
                               src_slice_pitch, dst_image, image_region);
}

hsa_status_t hsa_ext_image_export(hsa_agent_t agent, hsa_ext_image_t src_image,
                                  void* dst_memory, size_t dst_row_pitch,
                                  size_t dst_slice_pitch,
                                  const hsa_ext_image_region_t* image_region) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_export_fn(agent, src_image, dst_memory, dst_row_pitch,
                               dst_slice_pitch, image_region);
}

hsa_status_t hsa_ext_image_copy(hsa_agent_t agent, hsa_ext_image_t src_image,
                                const hsa_dim3_t* src_offset,
                                hsa_ext_image_t dst_image,
                                const hsa_dim3_t* dst_offset,
                                const hsa_dim3_t* range) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_copy_fn(agent, src_image, src_offset, dst_image,
                             dst_offset, range);
}

hsa_status_t hsa_ext_image_clear(hsa_agent_t agent, hsa_ext_image_t image,
                                 const void* data,
                                 const hsa_ext_image_region_t* image_region) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_clear_fn(agent, image, data, image_region);
}

hsa_status_t hsa_ext_image_destroy(hsa_agent_t agent, hsa_ext_image_t image) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_destroy_fn(agent, image);
}

hsa_status_t hsa_ext_sampler_create(
    hsa_agent_t agent, const hsa_ext_sampler_descriptor_t* sampler_descriptor,
    hsa_ext_sampler_t* sampler) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_sampler_create_fn(agent, sampler_descriptor, sampler);
}

hsa_status_t hsa_ext_sampler_create_v2(
    hsa_agent_t agent, const hsa_ext_sampler_descriptor_v2_t* sampler_descriptor,
    hsa_ext_sampler_t* sampler) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_sampler_create_v2_fn(agent, sampler_descriptor, sampler);
}

hsa_status_t hsa_ext_sampler_destroy(hsa_agent_t agent,
                                     hsa_ext_sampler_t sampler) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_sampler_destroy_fn(agent, sampler);
}

hsa_status_t hsa_ext_image_get_capability_with_layout(
    hsa_agent_t agent, hsa_ext_image_geometry_t geometry,
    const hsa_ext_image_format_t* image_format,
    hsa_ext_image_data_layout_t image_data_layout,
    uint32_t* capability_mask) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_get_capability_with_layout_fn(agent, geometry, image_format,
                                       image_data_layout, capability_mask);
}

hsa_status_t hsa_ext_image_data_get_info_with_layout(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t* image_data_info) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_data_get_info_with_layout_fn(agent, image_descriptor,
                                      access_permission, image_data_layout,
                                      image_data_row_pitch, image_data_slice_pitch,
                                      image_data_info);
}

hsa_status_t hsa_ext_image_create_with_layout(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
    const void* image_data, hsa_access_permission_t access_permission,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_t* image) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_ext_image_create_with_layout_fn(agent, image_descriptor, image_data,
                               access_permission, image_data_layout,
                               image_data_row_pitch, image_data_slice_pitch,
                               image);
}

hsa_status_t HSA_API hsa_ven_amd_pcs_iterate_configuration(
    hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
    void* callback_data) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.pcs_api
      .hsa_ven_amd_pcs_iterate_configuration_fn(agent, configuration_callback, callback_data);
}

hsa_status_t HSA_API hsa_ven_amd_pcs_create(
    hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method, hsa_ven_amd_pcs_units_t units,
    size_t interval, size_t latency, size_t buffer_size,
    hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
    hsa_ven_amd_pcs_t* pc_sampling) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.pcs_api.hsa_ven_amd_pcs_create_fn(
      agent, method, units, interval, latency, buffer_size, data_ready_callback,
      client_callback_data, pc_sampling);
}

hsa_status_t HSA_API hsa_ven_amd_pcs_create_from_id(
    uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
    hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
    hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
    hsa_ven_amd_pcs_t* pc_sampling) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.pcs_api
      .hsa_ven_amd_pcs_create_from_id_fn(pcs_id, agent, method, units, interval, latency,
                                         buffer_size, data_ready_callback, client_callback_data,
                                         pc_sampling);
}

hsa_status_t HSA_API hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.pcs_api.hsa_ven_amd_pcs_destroy_fn(
      pc_sampling);
}

hsa_status_t HSA_API hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.pcs_api.hsa_ven_amd_pcs_start_fn(
      pc_sampling);
}

hsa_status_t HSA_API hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.pcs_api.hsa_ven_amd_pcs_stop_fn(
      pc_sampling);
}

hsa_status_t HSA_API hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.pcs_api.hsa_ven_amd_pcs_flush_fn(
      pc_sampling);
}

//---------------------------------------------------------------------------//
//  Stubs for internal extension functions
//---------------------------------------------------------------------------//

// Use the function pointer from local instance Image Extension
hsa_status_t hsa_amd_image_get_info_max_dim(hsa_agent_t component,
                                            hsa_agent_info_t attribute,
                                            void* value) {
  return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
      .hsa_amd_image_get_info_max_dim_fn(component, attribute, value);
}


================================================
FILE: runtime/hsa-runtime/core/runtime/hsa_ven_amd_loader.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/hsa_ven_amd_loader_impl.h"

#include "core/inc/amd_hsa_loader.hpp"
#include "core/inc/runtime.h"

namespace rocr {

using namespace amd::hsa;
using namespace core;

using loader::CodeObjectReaderImpl;
using loader::Executable;
using loader::LoadedCodeObject;
using loader::Loader;

namespace AMD {

hsa_status_t handleException();

}   // namespace amd

hsa_status_t hsa_ven_amd_loader_query_host_address(
  const void *device_address,
  const void **host_address) {
  try {
    if (!Runtime::runtime_singleton_->IsOpen()) {
      return HSA_STATUS_ERROR_NOT_INITIALIZED;
    }
    if (nullptr == device_address) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }
    if (nullptr == host_address) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    uintptr_t udaddr = reinterpret_cast<uintptr_t>(device_address);
    uintptr_t uhaddr = Runtime::runtime_singleton_->loader()->FindHostAddress(udaddr);
    if (0 == uhaddr) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    *host_address = reinterpret_cast<void*>(uhaddr);
    return HSA_STATUS_SUCCESS;
  } catch(...) { return AMD::handleException(); }
}

hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
  hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
  size_t *num_segment_descriptors) {
  try {
    if (!Runtime::runtime_singleton_->IsOpen()) {
      return HSA_STATUS_ERROR_NOT_INITIALIZED;
    }

    // Arguments are checked by the loader.
    return Runtime::runtime_singleton_->loader()->QuerySegmentDescriptors(segment_descriptors, num_segment_descriptors);
  } catch(...) { return AMD::handleException(); }
}

hsa_status_t hsa_ven_amd_loader_query_executable(
  const void *device_address,
  hsa_executable_t *executable) {
  try {
    if (!Runtime::runtime_singleton_->IsOpen()) {
      return HSA_STATUS_ERROR_NOT_INITIALIZED;
    }
    if ((nullptr == device_address) || (nullptr == executable)) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    uintptr_t udaddr = reinterpret_cast<uintptr_t>(device_address);
    hsa_executable_t exec = Runtime::runtime_singleton_->loader()->FindExecutable(udaddr);
    if (0 == exec.handle) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    *executable = exec;
    return HSA_STATUS_SUCCESS;
  } catch(...) { return AMD::handleException(); }
}

hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
  hsa_executable_t executable,
  hsa_status_t (*callback)(
    hsa_executable_t executable,
    hsa_loaded_code_object_t loaded_code_object,
    void *data),
  void *data) {
  try {
    if (!Runtime::runtime_singleton_->IsOpen()) {
      return HSA_STATUS_ERROR_NOT_INITIALIZED;
    }
    if (nullptr == callback) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    Executable *exec = Executable::Object(executable);
    if (!exec) {
      return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
    }

    return exec->IterateLoadedCodeObjects(callback, data);
  } catch(...) { return AMD::handleException(); }
}

hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
  hsa_loaded_code_object_t loaded_code_object,
  hsa_ven_amd_loader_loaded_code_object_info_t attribute,
  void *value) {
  try {
    if (!Runtime::runtime_singleton_->IsOpen()) {
      return HSA_STATUS_ERROR_NOT_INITIALIZED;
    }
    if (nullptr == value) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    const LoadedCodeObject *lcobj = LoadedCodeObject::Object(loaded_code_object);
    if (!lcobj) {
      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
    }

    switch (attribute) {
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE: {
        *((hsa_executable_t*)value) = lcobj->getExecutable();
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND: {
        *((uint32_t*)value) = lcobj->getAgent().handle == 0
            ? HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM
            : HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT;
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT: {
        hsa_agent_t agent = lcobj->getAgent();
        if (agent.handle == 0) {
            return HSA_STATUS_ERROR_INVALID_ARGUMENT;
        }
        *((hsa_agent_t*)value) = agent;
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE: {
        // TODO Update loader so it keeps track if code object was loaded from a
        // file or memory.
        *((uint32_t*)value) = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY;
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE: {
        *((uint64_t*)value) = lcobj->getElfData();
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE: {
        *((uint64_t*)value) = lcobj->getElfSize();
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE: {
        // TODO Update loader so it keeps track if code object was loaded from a
        // file or memory.
        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA: {
        // TODO Check if executable is frozen.
        // This suggests this code should be moved into LoadedCodeObjectImpl::getinfo
        // as is done for other *_get_info methods. Currently LoadedCodeObject has a
        // GetInfo method which is likely not used.
        // Also should this have a *NOT_FROZEN ststus code added?
        // if (state_ != HSA_EXECUTABLE_STATE_FROZEN) {
        //   return HSA_STATUS_ERROR_INVALID_ARGUMENT;
        // }
        *((int64_t*)value) = lcobj->getDelta();
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE: {
        // TODO Check if executable is frozen.
        *((uint64_t*)value) = lcobj->getLoadBase();
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE: {
        // TODO Check if executable is frozen.
        *((uint64_t*)value) = lcobj->getLoadSize();
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH: {
        *(reinterpret_cast<uint32_t*>(value)) = lcobj->getUri().size();
        break;
      }
      case HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI: {
        memcpy(value, lcobj->getUri().c_str(), lcobj->getUri().size());
        break;
      }
      default: {
        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      }
    }

    return HSA_STATUS_SUCCESS;
  } catch(...) { return AMD::handleException(); }
}

hsa_status_t
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
    hsa_file_t file,
    size_t offset,
    size_t size,
    hsa_code_object_reader_t *code_object_reader) {
  try {
    if (!Runtime::runtime_singleton_->IsOpen()) {
      return HSA_STATUS_ERROR_NOT_INITIALIZED;
    }
    if (nullptr == code_object_reader) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    if (size == 0) {
      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
    }

    std::unique_ptr<CodeObjectReaderImpl> reader(
        new (std::nothrow) CodeObjectReaderImpl());
    if (!reader) {
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }

    hsa_status_t status = reader->SetFile(file, offset, size);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }

    *code_object_reader = CodeObjectReaderImpl::Handle(reader.release());
    return HSA_STATUS_SUCCESS;
  } catch(...) { return AMD::handleException(); }
}

namespace {

Loader *GetLoader() {
  return Runtime::runtime_singleton_->loader();
}

} // namespace anonymous

hsa_status_t
hsa_ven_amd_loader_iterate_executables(
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      void *data),
    void *data) {
  try {
    if (!Runtime::runtime_singleton_->IsOpen()) {
      return HSA_STATUS_ERROR_NOT_INITIALIZED;
    }
    if (nullptr == callback) {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    return GetLoader()->IterateExecutables(callback, data);
  } catch(...) { return AMD::handleException(); }
}

} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/intercept_queue.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/intercept_queue.h"
#include "core/inc/amd_aql_queue.h"
#include "core/inc/default_signal.h"
#include "core/util/utils.h"
#include "inc/hsa_api_trace.h"

namespace rocr {
namespace core {

namespace {

// Determine if a packet is the AMD_AQL_FORMAT_INTERCEPT_MARKER packet. Loads
// the packet header non-atomically. That is permissable if the calling thread
// has previously loaded the header atomically to determine if it is not an
// INVALID packet. Once a packet is no longer INVALID its ownership belongs to
// the packer processor.
bool inline IsInterceptMarkerPacket(const AqlPacket* packet) {
  return (AqlPacket::type(packet->packet.header) == HSA_PACKET_TYPE_VENDOR_SPECIFIC) &&
      (packet->amd_vendor.format == AMD_AQL_FORMAT_INTERCEPT_MARKER);
}

}  // namespace

struct InterceptFrame {
  InterceptQueue* queue;
  uint64_t pkt_index;
  size_t interceptor_index;
};

static thread_local InterceptFrame Cursor = {nullptr, 0, 0};

static const uint16_t kInvalidHeader = (HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE) |
    (1 << HSA_PACKET_HEADER_BARRIER) |
    (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
    (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);

static const uint16_t kBarrierHeader = (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
    (1 << HSA_PACKET_HEADER_BARRIER) |
    (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
    (HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);

bool InterceptQueue::IsPendingRetryPoint(uint64_t wrapped_current_read_index) const {
  // This function is intended to determine if the last retry barrier packet
  // has definitely not been processed in order to avoid putting multiple retry
  // packets on the wrapped queue.
  //
  // The AQL protocol allows the packet processor to advance the read index any
  // time after the producer advances the write index. It does not specify the
  // latest that the read index must be advanced. This makes it impossible to
  // use the read index to determine if a packet has definitely not been
  // processed.
  //
  // This code assumes that the read index will be advanced no later than the
  // start of processing the next packet. So at worst, if the read index equals
  // the retry index the packet may have already been processed, and its
  // completion signal updated (perhaps that was the cause of entering
  // InterceptQueue::StoreRelaxed that is now invoking this function). But if
  // the read index is less than the retry index, then the packet has not yet
  // been processed, This implies that the minimum queue size is 3 (enforced in
  // hsa_amd_queue_intercept_create): a non-retry packet, a retry packet that
  // is being processed, and space for a new retry packet.
  //
  // FIXME: The above assumption can be removed by using a distinct interrupt
  // signal for the retry packet completion signal, and tracking when that
  // signal is updated and invokes its async handler. Currently the wrapped
  // queue doorbell signal is also being used as the retry completion signal.
  // If that is done then the minimum queue size needs to be changed from 3 to
  // 2 (enforced in hsa_amd_queue_intercept_create).
  return retry_index_ > wrapped_current_read_index;
}

InterceptQueue::InterceptQueue(std::unique_ptr<Queue> queue)
    : QueueProxy(std::move(queue)),
      LocalSignal(0, false),
      DoorbellSignal(signal()),
      next_packet_(0),
      retry_index_(0),
      quit_(false),
      active_(true) {
  // Initial retry_index_ value must ensure that
  // InterceptQueue::IsPendingRetryPoint will return false before the first
  // retry barrier packet is inserted.
  assert(!IsPendingRetryPoint(next_packet_) &&
         "Packet intercept error: initial retry index is incompatible with IsPendingRetryPoint.\n");
  buffer_ = SharedArray<AqlPacket, 4096>(wrapped->amd_queue_.hsa_queue.size);
  amd_queue_.hsa_queue.base_address = reinterpret_cast<void*>(&buffer_[0]);

  // Fill the ring buffer with invalid packet headers.
  // Leave packet content uninitialized to help trigger application errors.
  for (uint32_t pkt_id = 0; pkt_id < wrapped->amd_queue_.hsa_queue.size; ++pkt_id) {
    buffer_[pkt_id].packet.header = HSA_PACKET_TYPE_INVALID;
  }

  // Match the queue's signal ABI block to async_doorbell_'s
  // This allows us to use the queue's signal ABI block from devices to trigger async_doorbell while
  // host side use jumps directly to the queue's signal implementation.
  if (!core::g_use_interrupt_wait)
    async_doorbell_ = new DefaultSignal(DOORBELL_MAX);
  else
    async_doorbell_ = new InterruptSignal(DOORBELL_MAX);
  MAKE_NAMED_SCOPE_GUARD(sigGuard, [&]() { async_doorbell_->DestroySignal(); });
  this->signal_ = async_doorbell_->signal_;
  amd_queue_.hsa_queue.doorbell_signal = Signal::Convert(this);

  // Install an async handler for device side dispatches.
  auto err = Runtime::runtime_singleton_->SetAsyncSignalHandler(
      core::Signal::Convert(async_doorbell_), HSA_SIGNAL_CONDITION_NE,
      async_doorbell_->LoadRelaxed(), HandleAsyncDoorbell, this);
  if (err != HSA_STATUS_SUCCESS)
    throw AMD::hsa_exception(err, "Doorbell handler registration failed.\n");

  // Install copy submission interceptor.
  AddInterceptor(Submit, this);

  sigGuard.Dismiss();
}

InterceptQueue::~InterceptQueue() {
  active_ = false;

  // Kill the async doorbell handler
  // Doorbell may not be used during or after queue destroy, however an interrupt may be in flight.
  // Ensure doorbell value is not 0, mark for exit, wake handler and wait for termination value.
  async_doorbell_->StoreRelaxed(DOORBELL_MAX);
  quit_ = true;
  hsa_signal_value_t val = async_doorbell_->ExchRelaxed(1);
  if (val != 0)
    async_doorbell_->WaitRelaxed(HSA_SIGNAL_CONDITION_EQ, 0, -1, HSA_WAIT_STATE_BLOCKED);
  async_doorbell_->DestroySignal();
}

bool InterceptQueue::HandleAsyncDoorbell(hsa_signal_value_t value, void* arg) {
  InterceptQueue* queue = reinterpret_cast<InterceptQueue*>(arg);
  if (queue->quit_) {
    queue->async_doorbell_->StoreRelaxed(0);
    return false;
  }
  queue->async_doorbell_->StoreRelaxed(DOORBELL_MAX);
  queue->StoreRelease(value);
  return true;
}

void InterceptQueue::PacketWriter(const void* pkts, uint64_t pkt_count) {
  assert(Cursor.interceptor_index > 0 &&
         "Packet intercept error: final submit handler must not call PacketWritter.\n");
  --Cursor.interceptor_index;
  auto& handler = Cursor.queue->interceptors[Cursor.interceptor_index];
  handler.first(pkts, pkt_count, Cursor.pkt_index, handler.second, PacketWriter);
  // Restore index as the same rewrite handler may call the PacketWriter more than once.
  ++Cursor.interceptor_index;
}

void InterceptQueue::Submit(const void* pkts, uint64_t pkt_count, uint64_t user_pkt_index,
                            void* data, hsa_amd_queue_intercept_packet_writer writer) {
  InterceptQueue* queue = reinterpret_cast<InterceptQueue*>(data);
  const AqlPacket* packets = (const AqlPacket*)pkts;

  // Submit final packet transform to hardware.
  uint64_t submitted_count = queue->Submit(packets, pkt_count);
  if (submitted_count == pkt_count) return;

  // Could not submit all the final packets, stash unsubmitted ones for later.
  assert(queue->overflow_.empty() && "Packet intercept error: overflow buffer not empty.\n");
  for (uint64_t i = submitted_count; i < pkt_count; i++)
    queue->overflow_.push_back(packets[i]);
}

uint64_t InterceptQueue::Submit(const AqlPacket* packets, uint64_t count) {
  if (count == 0) return 0;

  uint64_t marker_count = 0;
  for (uint64_t i = 0; i < count; i++) {
    if (IsInterceptMarkerPacket(&packets[i])) ++marker_count;
  }

  AqlPacket* ring = reinterpret_cast<AqlPacket*>(wrapped->amd_queue_.hsa_queue.base_address);
  uint64_t mask = wrapped->amd_queue_.hsa_queue.size - 1;

  while (true) {
    uint64_t write = wrapped->LoadWriteIndexRelaxed();
    uint64_t read = wrapped->LoadReadIndexRelaxed();
    uint64_t free_slots = wrapped->amd_queue_.hsa_queue.size - (write - read);
    bool pending_retry_point = IsPendingRetryPoint(read);

    uint64_t submitted_count = count - marker_count;

    // If the number of packets is greater than the wrapped queue size, then we
    // can never submit them all at once. So submit what will fit, leaving one
    // slot free for the retry barrier packet if it is not already on the
    // queue.
    if (submitted_count >= wrapped->amd_queue_.hsa_queue.size) {
      submitted_count = free_slots - (pending_retry_point ? 0 : 1);
    }

    // Prefer to either submit all the packets, or none of the packets. This
    // ensures that all the packets of a rewrite will be on the queue at the
    // same time. This may be desirable for some rewrites. So if out of space
    // defer packet insertion. Always make sure there is a free slot available
    // for the retry barrier packet if there is not already one present.
    else if (free_slots < submitted_count + (pending_retry_point ? 0 : 1)) {
      submitted_count = 0;
    }

    // If we are not submitting all the packets, we need to ensure there is a
    // retry packet to cause the remaining packets to be submitted. If there is
    // not already a pending retry point add one.
    if (submitted_count < (count - marker_count) && !pending_retry_point) {
      // Reserve one slot for the barrier packet. There will always be at least
      // one free slot.
      assert(free_slots >= 1 &&
             "Packet intercept error: there is no free slot for a retry barrier packet.\n");
      // Reserve a slot for the barrier packet.
      uint64_t barrier = wrapped->AddWriteIndexRelaxed(1);
      assert(barrier == write &&
             "Packet intercept error: wrapped queue has been updated by another thread.\n");
      ++write;

      // Submit barrier which will wake async queue processing.
      ring[barrier & mask].packet.body = {};
      ring[barrier & mask].barrier_and.completion_signal = Signal::Convert(async_doorbell_);
      if (wrapped->IsDeviceMemRingBuf() && needsPcieOrdering()) {
        // Ensure the packet body is written as header may get reordered when writing over PCIE
        _mm_sfence();
      }
      atomic::Store(&ring[barrier & mask].barrier_and.header, kBarrierHeader,
                    std::memory_order_release);
      // Update the wrapped queue's doorbell so it knows there is a new packet in the queue.
      HSA::hsa_signal_store_screlease(wrapped->amd_queue_.hsa_queue.doorbell_signal, barrier);

      // Record the retry point
      retry_index_ = barrier;
    }

    // Attempt to reserve useable queue space if some packets need to be
    // submitted.
    uint64_t new_write = submitted_count == 0
        ? write
        : wrapped->CasWriteIndexRelaxed(write, write + submitted_count);
    if (new_write == write) {
      uint64_t packets_index = 0;
      uint64_t write_index = 0;
      uint64_t first_written_packet_index;
      while (submitted_count > 0 || (packets_index < count && IsInterceptMarkerPacket(&packets[packets_index]))) {
        // Ensure the marker packet callback is invoked before following
        // packets are made available for the packet processor.
        if (IsInterceptMarkerPacket(&packets[packets_index])) {
          const amd_aql_intercept_marker_t* marker_packet =
              reinterpret_cast<const amd_aql_intercept_marker_t*>(&packets[packets_index]);
          marker_packet->callback(marker_packet, &wrapped->amd_queue_.hsa_queue,
                                  write + write_index);
        } else {
          if (write_index == 0) {
            // Leave the header of the first packet as INVALID so packet
            // processor will not start processing any packets until all have
            // been written and the first packet header atomically store
            // released.
            ring[(write + write_index) & mask].packet.body = packets[packets_index].packet.body;
            first_written_packet_index = packets_index;
          } else {
            ring[(write + write_index) & mask] = packets[packets_index];
          }
          ++write_index;
          --submitted_count;
        }
        ++packets_index;
      }
      if (write_index != 0) {
        if (wrapped->IsDeviceMemRingBuf() && needsPcieOrdering()) {
          // Ensure the packet body is written as header may get reordered when writing over PCIE
          _mm_sfence();
        }
        atomic::Store(&ring[write & mask].packet.header, packets[first_written_packet_index].packet.header,
                      std::memory_order_release);
        HSA::hsa_signal_store_screlease(wrapped->amd_queue_.hsa_queue.doorbell_signal,
                                        write + write_index - 1);
      }
      return packets_index;
    }
  }
}

void InterceptQueue::StoreRelaxed(hsa_signal_value_t value) {
  if (!active_) return;

  // If called recursively defer to async doorbell thread.
  if (Cursor.queue != nullptr) {
    debug_print("Likely incorrect queue use observed in an interceptor.\n");
    async_doorbell_->StoreRelaxed(value);
    return;
  }

  ScopedAcquire<KernelMutex> lock(&lock_);

  // Submit overflow packets.
  if (!overflow_.empty()) {
    uint64_t submitted_count = Submit(&overflow_[0], overflow_.size());

    if (submitted_count < overflow_.size()) {
      overflow_.erase(overflow_.begin(), overflow_.begin() + submitted_count);
      // Since there was no space to submit all the overflow packets, there is
      // no space for other packets either.
      return;
    }

    // All overflow packets have been submitted.
    overflow_.clear();
  }

  Cursor.queue = this;

  AqlPacket* ring = reinterpret_cast<AqlPacket*>(amd_queue_.hsa_queue.base_address);
  uint64_t mask = wrapped->amd_queue_.hsa_queue.size - 1;

  // Loop over valid packets and process.
  uint64_t end = LoadWriteIndexAcquire();

  // Can only process packets that are occupying slots in the queue buffer. No
  // need to add a barrier packet to ensure the extra packets are processed as
  // the producer must ring the doorbell once the extra packets are made valid.
  if (end > next_packet_ + amd_queue_.hsa_queue.size)
    end = next_packet_ + amd_queue_.hsa_queue.size;

  uint64_t i = next_packet_;
  while (i < end) {
    // Load the packet header as atomic acquire as it may have been written by
    // another thread as atomic release. This ensures the rest of the packet
    // fields are visible. Once loaded and proven not to be INVALID, further
    // loads by this thread can be non-atomic.
    uint16_t header = atomic::Load(&ring[i & mask].packet.header, std::memory_order_acquire);
    if (!AqlPacket::IsValid(header)) break;

    // Process callbacks.
    Cursor.interceptor_index = interceptors.size() - 1;
    Cursor.pkt_index = i;
    auto& handler = interceptors[Cursor.interceptor_index];
    handler.first(&ring[i & mask], 1, i, handler.second, PacketWriter);
    if (IsDeviceMemRingBuf() && needsPcieOrdering()) {
      // Ensure the packet body is written as header may get reordered when writing over PCIE
      _mm_sfence();
    }
    // Invalidate consumed packet.
    atomic::Store(&ring[i & mask].packet.header, kInvalidHeader, std::memory_order_release);

    // Packet has now been processed so advance the read index.
    ++i;

    // Only allow the rewrite of one packet to be on the overflow queue. When
    // packets are put on the overflow queue a barrier packet will also be
    // added which has an async handler that will ring the doorbell, That
    // doorbell ring will ensure this function is re-invoked to put the
    // overflow packets on the hardware queue and continue rewriting packets on
    // the intercept queue.
    if (!overflow_.empty()) break;
  }

  next_packet_ = i;
  Cursor.queue = nullptr;
  atomic::Store(&amd_queue_.read_dispatch_id, next_packet_, std::memory_order_release);
}

hsa_status_t InterceptQueue::GetInfo(hsa_queue_info_attribute_t attribute, void* value) {
  switch (attribute) {
    case HSA_AMD_QUEUE_INFO_AGENT:
    case HSA_AMD_QUEUE_INFO_DOORBELL_ID: {
      if (!AMD::AqlQueue::IsType(wrapped.get())) return HSA_STATUS_ERROR_INVALID_QUEUE;

      AMD::AqlQueue* aqlQueue = static_cast<AMD::AqlQueue*>(wrapped.get());
      return aqlQueue->GetInfo(attribute, value);
    }
  }
  return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}

}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/interrupt_signal.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/interrupt_signal.h"
#include "core/inc/runtime.h"
#include "core/util/locks.h"

namespace rocr {
namespace core {

HsaEvent* InterruptSignal::EventPool::alloc() {
  ScopedAcquire<HybridMutex> lock(&lock_);
  if (events_.empty()) {
    if (!allEventsAllocated) {
      HsaEvent* evt = InterruptSignal::CreateEvent(HSA_EVENTTYPE_SIGNAL, false);
      if (evt == nullptr) allEventsAllocated = true;
      return evt;
    }
    return nullptr;
  }
  HsaEvent* ret = events_.back().release();
  events_.pop_back();
  return ret;
}

void InterruptSignal::EventPool::free(HsaEvent* evt) {
  if (evt == nullptr) return;
  ScopedAcquire<HybridMutex> lock(&lock_);
  events_.push_back(unique_event_ptr(evt));
}

HsaEvent* InterruptSignal::CreateEvent(HSA_EVENTTYPE type, bool manual_reset) {
  HsaEventDescriptor event_descriptor;
  event_descriptor.EventType = type;
  event_descriptor.SyncVar.SyncVar.UserData = NULL;
  event_descriptor.SyncVar.SyncVarSize = sizeof(hsa_signal_value_t);
  event_descriptor.NodeId = 0;

  HsaEvent* ret = NULL;
  if (HSAKMT_STATUS_SUCCESS ==
      HSAKMT_CALL(hsaKmtCreateEvent(&event_descriptor, manual_reset, false, &ret))) {
    if (type == HSA_EVENTTYPE_MEMORY) {
      memset(&ret->EventData.EventData.MemoryAccessFault.Failure, 0,
             sizeof(HsaAccessAttributeFailure));
    } else if (type == HSA_EVENTTYPE_HW_EXCEPTION) {
      memset(&ret->EventData.EventData.HwException, 0, sizeof(HsaHwException));
    }
  }

  return ret;
}

void InterruptSignal::DestroyEvent(HsaEvent* evt) { HSAKMT_CALL(hsaKmtDestroyEvent(evt)); }

InterruptSignal::InterruptSignal(hsa_signal_value_t initial_value, HsaEvent* use_event)
    : LocalSignal(initial_value, false), Signal(signal()) {
  if (use_event != nullptr) {
    event_ = use_event;
    free_event_ = false;
  } else {
    event_ = Runtime::runtime_singleton_->GetEventPool()->alloc();
    free_event_ = true;
  }

  if (event_ != nullptr) {
    signal_.event_id = event_->EventId;
    signal_.event_mailbox_ptr = event_->EventData.HWData2;
  } else {
    signal_.event_id = 0;
    signal_.event_mailbox_ptr = 0;
  }
  signal_.kind = AMD_SIGNAL_KIND_USER;
}

InterruptSignal::~InterruptSignal() {
  if (free_event_) Runtime::runtime_singleton_->GetEventPool()->free(event_);
}

hsa_signal_value_t InterruptSignal::LoadRelaxed() {
  return hsa_signal_value_t(
      atomic::Load(&signal_.value, std::memory_order_relaxed));
}

hsa_signal_value_t InterruptSignal::LoadAcquire() {
  return hsa_signal_value_t(
      atomic::Load(&signal_.value, std::memory_order_acquire));
}

void InterruptSignal::StoreRelaxed(hsa_signal_value_t value) {
  atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed);
  SetEvent();
}

void InterruptSignal::StoreRelease(hsa_signal_value_t value) {
  atomic::Store(&signal_.value, int64_t(value), std::memory_order_release);
  SetEvent();
}

hsa_signal_value_t InterruptSignal::WaitRelaxed(hsa_signal_condition_t condition,
                                               hsa_signal_value_t compare_value,
                                               uint64_t timeout,
                                               hsa_wait_state_t wait_hint) {
  Retain();
  MAKE_SCOPE_GUARD([&]() { Release(); });

  uint32_t prior = waiting_++;
  MAKE_SCOPE_GUARD([&]() { waiting_--; });

  uint64_t event_age = core::Runtime::runtime_singleton_->KfdVersion().supports_event_age ? 1 : 0;
  if (!event_age && prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;

  const timer::fast_clock::time_point start_time = timer::fast_clock::now();
  const timer::fast_clock::duration fast_timeout = timer::GetFastTimeout(timeout);
  const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
  const uint32_t &signal_abort_timeout =
    core::Runtime::runtime_singleton_->flag().signal_abort_timeout();

  while (true) {
    if (!IsValid()) return 0;

    int64_t value = atomic::Load(&signal_.value, std::memory_order_relaxed);

    if (CheckSignalCondition(value, condition, compare_value)) {
      return value;
    }

    auto now = timer::fast_clock::now();
    if (now - start_time > fast_timeout) {
      return value;
    }

    timer::CheckAbortTimeout(start_time, signal_abort_timeout);

    if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
      if (g_use_mwaitx) {
        // Short timeout for active waiting
        timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 1000);
      }
      continue;
    }

    if (now - start_time < kMaxElapsed) {
      if (g_use_mwaitx) {
        // Longer timeout with timer for passive waiting
        timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 60000, true);
      }
      continue;
    }

    auto remaining_ms = timer::duration_cast<std::chrono::milliseconds>(
      fast_timeout - (now - start_time)).count();

    uint32_t wait_ms = std::min<uint32_t>(
      static_cast<uint32_t>(std::min<uint64_t>(remaining_ms, 0xFFFFFFFEUL)),
      static_cast<uint32_t>(signal_abort_timeout ? signal_abort_timeout * 1000 : 0xFFFFFFFFUL)
    );

    HSAKMT_CALL(hsaKmtWaitOnEvent_Ext(event_, wait_ms, &event_age));
  }
}

hsa_signal_value_t InterruptSignal::WaitAcquire(
    hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
    uint64_t timeout, hsa_wait_state_t wait_hint) {
  hsa_signal_value_t ret =
      WaitRelaxed(condition, compare_value, timeout, wait_hint);
  std::atomic_thread_fence(std::memory_order_acquire);
  return ret;
}

void InterruptSignal::AndRelaxed(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_relaxed);
  SetEvent();
}

void InterruptSignal::AndAcquire(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_acquire);
  SetEvent();
}

void InterruptSignal::AndRelease(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_release);
  SetEvent();
}

void InterruptSignal::AndAcqRel(hsa_signal_value_t value) {
  atomic::And(&signal_.value, int64_t(value), std::memory_order_acq_rel);
  SetEvent();
}

void InterruptSignal::OrRelaxed(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_relaxed);
  SetEvent();
}

void InterruptSignal::OrAcquire(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_acquire);
  SetEvent();
}

void InterruptSignal::OrRelease(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_release);
  SetEvent();
}

void InterruptSignal::OrAcqRel(hsa_signal_value_t value) {
  atomic::Or(&signal_.value, int64_t(value), std::memory_order_acq_rel);
  SetEvent();
}

void InterruptSignal::XorRelaxed(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_relaxed);
  SetEvent();
}

void InterruptSignal::XorAcquire(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acquire);
  SetEvent();
}

void InterruptSignal::XorRelease(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_release);
  SetEvent();
}

void InterruptSignal::XorAcqRel(hsa_signal_value_t value) {
  atomic::Xor(&signal_.value, int64_t(value), std::memory_order_acq_rel);
  SetEvent();
}

void InterruptSignal::AddRelaxed(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_relaxed);
  SetEvent();
}

void InterruptSignal::AddAcquire(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_acquire);
  SetEvent();
}

void InterruptSignal::AddRelease(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_release);
  SetEvent();
}

void InterruptSignal::AddAcqRel(hsa_signal_value_t value) {
  atomic::Add(&signal_.value, int64_t(value), std::memory_order_acq_rel);
  SetEvent();
}

void InterruptSignal::SubRelaxed(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_relaxed);
  SetEvent();
}

void InterruptSignal::SubAcquire(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acquire);
  SetEvent();
}

void InterruptSignal::SubRelease(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_release);
  SetEvent();
}

void InterruptSignal::SubAcqRel(hsa_signal_value_t value) {
  atomic::Sub(&signal_.value, int64_t(value), std::memory_order_acq_rel);
  SetEvent();
}

hsa_signal_value_t InterruptSignal::ExchRelaxed(hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
      &signal_.value, int64_t(value), std::memory_order_relaxed));
  SetEvent();
  return ret;
}

hsa_signal_value_t InterruptSignal::ExchAcquire(hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
      &signal_.value, int64_t(value), std::memory_order_acquire));
  SetEvent();
  return ret;
}

hsa_signal_value_t InterruptSignal::ExchRelease(hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
      &signal_.value, int64_t(value), std::memory_order_release));
  SetEvent();
  return ret;
}

hsa_signal_value_t InterruptSignal::ExchAcqRel(hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(atomic::Exchange(
      &signal_.value, int64_t(value), std::memory_order_acq_rel));
  SetEvent();
  return ret;
}

hsa_signal_value_t InterruptSignal::CasRelaxed(hsa_signal_value_t expected,
                                               hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(
      atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
                  std::memory_order_relaxed));
  SetEvent();
  return ret;
}

hsa_signal_value_t InterruptSignal::CasAcquire(hsa_signal_value_t expected,
                                               hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(
      atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
                  std::memory_order_acquire));
  SetEvent();
  return ret;
}

hsa_signal_value_t InterruptSignal::CasRelease(hsa_signal_value_t expected,
                                               hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(
      atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
                  std::memory_order_release));
  SetEvent();
  return ret;
}

hsa_signal_value_t InterruptSignal::CasAcqRel(hsa_signal_value_t expected,
                                              hsa_signal_value_t value) {
  hsa_signal_value_t ret = hsa_signal_value_t(
      atomic::Cas(&signal_.value, int64_t(value), int64_t(expected),
                  std::memory_order_acq_rel));
  SetEvent();
  return ret;
}
  /// @brief Notify driver of signal value change if necessary.
  void InterruptSignal::SetEvent() {
    std::atomic_signal_fence(std::memory_order_seq_cst);
    if (InWaiting()) HSAKMT_CALL(hsaKmtSetEvent(event_));
  }

}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/ipc_signal.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/ipc_signal.h"

#include <utility>

#include "core/inc/runtime.h"
#include "core/inc/exceptions.h"

namespace rocr {
namespace core {

KernelMutex IPCSignal::lock_;

SharedMemory::SharedMemory(const hsa_amd_ipc_memory_t* handle, size_t len) {
  hsa_status_t err = Runtime::runtime_singleton_->IPCAttach(handle, len, 0, NULL, &ptr_);
  if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "IPC memory attach failed.");
}

SharedMemory::SharedMemory(SharedMemory&& rhs) {
  ptr_ = rhs.ptr_;
  rhs.ptr_ = nullptr;
}

SharedMemory::~SharedMemory() {
  if (ptr_ == nullptr) return;
  auto err = Runtime::runtime_singleton_->IPCDetach(ptr_);
  assert(err == HSA_STATUS_SUCCESS && "IPC detach failed.");
}

void IPCSignal::CreateHandle(Signal* signal, hsa_amd_ipc_signal_t* ipc_handle) {
  if (!signal->isIPC())
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Signal must be IPC enabled.");
  SharedSignal* shared = SharedSignal::Convert(Convert(signal));
  hsa_status_t err = Runtime::runtime_singleton_->IPCCreate(shared, 4096, ipc_handle);
  if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "IPC memory create failed.");
}

Signal* IPCSignal::Attach(const hsa_amd_ipc_signal_t* ipc_signal_handle) {
  SharedMemorySignal shared(ipc_signal_handle);

  if (!(shared.signal()->IsIPC()))
    throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                             "IPC memory does not contain an IPC signal abi block.");

  hsa_signal_t handle = SharedSignal::Convert(shared.signal());

  ScopedAcquire<KernelMutex> lock(&lock_);
  Signal* ret = core::Signal::DuplicateHandle(handle);
  if (ret == nullptr) ret = new IPCSignal(std::move(shared));
  return ret;
}

}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/isa.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/isa.h"
#include "core/util/utils.h"

#include <algorithm>
#include <cstring>
#include <iostream>
#include <sstream>
#include <utility>

namespace rocr {
namespace core {

bool Wavefront::GetInfo(
    const hsa_wavefront_info_t &attribute,
    void *value) const {
  if (!value) {
    return false;
  }

  switch (attribute) {
    case HSA_WAVEFRONT_INFO_SIZE: {
      *((uint32_t*)value) = num_threads_;
      return true;
    }
    default: {
      return false;
    }
  }
}

static __forceinline std::string strip_features(const std::string &isa_name) {
  return isa_name.substr(0, isa_name.find(':'));
}

/* static */
bool Isa::IsCompatible(const Isa &code_object_isa,
                       const Isa &agent_isa, unsigned int codeGenericVersion) {

  bool code_obj_isa_is_generic = false;
  auto generic_it = IsaRegistry::GetSupportedGenericVersions().find(
                                                 code_object_isa.GetIsaName());

  if (generic_it != IsaRegistry::GetSupportedGenericVersions().end()) {
    code_obj_isa_is_generic = true;
  }

  assert(code_object_isa.IsSrameccSupported() == agent_isa.IsSrameccSupported()
                                 && agent_isa.GetSramecc() != IsaFeature::Any);
  if ((code_object_isa.GetSramecc() == IsaFeature::Enabled ||
        code_object_isa.GetSramecc() == IsaFeature::Disabled) &&
      code_object_isa.GetSramecc() != agent_isa.GetSramecc())
    return false;

  assert(code_object_isa.IsXnackSupported() == agent_isa.IsXnackSupported() && agent_isa.GetXnack() != IsaFeature::Any);
  if ((code_object_isa.GetXnack() == IsaFeature::Enabled ||
        code_object_isa.GetXnack() == IsaFeature::Disabled) &&
      code_object_isa.GetXnack() != agent_isa.GetXnack())
    return false;

  if (code_obj_isa_is_generic) {
      // Verify the generic code object corresponds to the generic for
      // this isa agent.
      if (strip_features(agent_isa.GetIsaGeneric()) !=
                              strip_features(code_object_isa.GetIsaName())) {
        return false;
      }
      // Verify the generic code object version is greater than or equal to
      // the generic version for this isa agent.
      if (codeGenericVersion < generic_it->second) {
        return false;
      }
  } else if (code_object_isa.GetVersion() != agent_isa.GetVersion()) {
    return false;
  }

  return true;
}

std::string Isa::GetProcessorName() const {
  return strip_features(targetid_);
}

static __forceinline std::string prepend_isa_prefix(const std::string &isa_name) {
  constexpr char hsa_isa_name_prefix[] = "amdgcn-amd-amdhsa--";
  return hsa_isa_name_prefix + isa_name;
}

std::string Isa::GetIsaName() const {
  return prepend_isa_prefix(targetid_);
}

bool Isa::GetInfo(const hsa_isa_info_t &attribute, void *value) const {
  if (!value) {
    return false;
  }

  switch (attribute) {
    case HSA_ISA_INFO_NAME_LENGTH: {
      std::string isa_name = GetIsaName();
      *((uint32_t*)value) = static_cast<uint32_t>(isa_name.size() + 1);
      return true;
    }
    case HSA_ISA_INFO_NAME: {
      std::string isa_name = GetIsaName();
      memset(value, 0x0, isa_name.size() + 1);
      memcpy(value, isa_name.c_str(), isa_name.size());
      return true;
    }
    // deprecated.
    case HSA_ISA_INFO_CALL_CONVENTION_COUNT: {
      *((uint32_t*)value) = 1;
      return true;
    }
    // deprecated.
    case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE: {
      *((uint32_t*)value) = 64;
      return true;
    }
    // deprecated.
    case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT: {
      *((uint32_t*)value) = 40;
      return true;
    }
    case HSA_ISA_INFO_MACHINE_MODELS: {
      const bool machine_models[2] = {false, true};
      memcpy(value, machine_models, sizeof(machine_models));
      return true;
    }
    case HSA_ISA_INFO_PROFILES: {
      bool profiles[2] = {true, false};
      if (this->GetVersion() == Version(7, 0, 0) ||
          this->GetVersion() == Version(8, 0, 1)) {
        profiles[1] = true;
      }
      memcpy(value, profiles, sizeof(profiles));
      return true;
    }
    case HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES: {
      const bool rounding_modes[3] = {false, false, true};
      memcpy(value, rounding_modes, sizeof(rounding_modes));
      return true;
    }
    case HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES: {
      const bool rounding_modes[3] = {false, false, true};
      memcpy(value, rounding_modes, sizeof(rounding_modes));
      return true;
    }
    case HSA_ISA_INFO_FAST_F16_OPERATION: {
      if (this->GetMajorVersion() >= 8) {
        *((bool*)value) = true;
      } else {
        *((bool*)value) = false;
      }
      return true;
    }
    case HSA_ISA_INFO_WORKGROUP_MAX_DIM: {
      const uint16_t workgroup_max_dim[3] = {1024, 1024, 1024};
      memcpy(value, workgroup_max_dim, sizeof(workgroup_max_dim));
      return true;
    }
    case HSA_ISA_INFO_WORKGROUP_MAX_SIZE: {
      *((uint32_t*)value) = 1024;
      return true;
    }
    case HSA_ISA_INFO_GRID_MAX_DIM: {
      const hsa_dim3_t grid_max_dim = {INT32_MAX, UINT16_MAX, UINT16_MAX};
      memcpy(value, &grid_max_dim, sizeof(grid_max_dim));
      return true;
    }
    case HSA_ISA_INFO_GRID_MAX_SIZE: {
      *((uint64_t*)value) = UINT64_MAX;
      return true;
    }
    case HSA_ISA_INFO_FBARRIER_MAX_SIZE: {
      *((uint32_t*)value) = 32;
      return true;
    }
    default: {
      return false;
    }
  }
}

hsa_round_method_t Isa::GetRoundMethod(
    hsa_fp_type_t fp_type,
    hsa_flush_mode_t flush_mode) const {
  return HSA_ROUND_METHOD_SINGLE;
}

const Isa *IsaRegistry::GetIsa(const std::string &full_name) {
  auto isareg_iter = GetSupportedIsas().find(full_name);
  return isareg_iter == GetSupportedIsas().end() ?
                                              nullptr : &isareg_iter->second;
}

const Isa *IsaRegistry::GetIsa(const Isa::Version &version, IsaFeature sramecc, IsaFeature xnack) {
  auto isareg_iter = std::find_if(GetSupportedIsas().begin(),
                                  GetSupportedIsas().end(),
                                  [&](const IsaMap::value_type& isareg) {
                                    return isareg.second.GetVersion() == version &&
                                        (isareg.second.GetSramecc() == IsaFeature::Unsupported ||
                                         isareg.second.GetSramecc() == sramecc) &&
                                        (isareg.second.GetXnack() == IsaFeature::Unsupported ||
                                         isareg.second.GetXnack() == xnack);
                                  });
  return isareg_iter == GetSupportedIsas().end() ?
                                              nullptr : &isareg_iter->second;
}


// TODO: c++20 use constexpr or consteval
const std::unordered_map<std::string, unsigned int> &
IsaRegistry::GetSupportedGenericVersions() {
  static const
    std::unordered_map<std::string, unsigned int> * min_gen_versions =
                          new std::unordered_map<std::string, unsigned int> {
    {prepend_isa_prefix("gfx9-generic"), 1},
    {prepend_isa_prefix("gfx9-generic:xnack-"), 1},
    {prepend_isa_prefix("gfx9-generic:xnack+"), 1},
    {prepend_isa_prefix("gfx9-4-generic"), 1},
    {prepend_isa_prefix("gfx9-4-generic:xnack-"), 1},
    {prepend_isa_prefix("gfx9-4-generic:xnack+"), 1},
    {prepend_isa_prefix("gfx9-4-generic:sramecc-"), 1},
    {prepend_isa_prefix("gfx9-4-generic:sramecc+"), 1},
    {prepend_isa_prefix("gfx9-4-generic:sramecc-:xnack-"), 1},
    {prepend_isa_prefix("gfx9-4-generic:sramecc-:xnack+"), 1},
    {prepend_isa_prefix("gfx9-4-generic:sramecc+:xnack-"), 1},
    {prepend_isa_prefix("gfx9-4-generic:sramecc+:xnack+"), 1},
    {prepend_isa_prefix("gfx10-1-generic"), 1},
    {prepend_isa_prefix("gfx10-1-generic:xnack-"), 1},
    {prepend_isa_prefix("gfx10-1-generic:xnack+"), 1},
    {prepend_isa_prefix("gfx10-3-generic"), 1},
    {prepend_isa_prefix("gfx11-generic"), 1},
    {prepend_isa_prefix("gfx12-generic"), 1}
  };
  return *min_gen_versions;
}

const IsaRegistry::IsaMap& IsaRegistry::GetSupportedIsas() {
  // agent, and vendor name length limit excluding terminating nul character.
  constexpr size_t hsa_name_size = 63;
  // This allocation is meant to last until the last thread has exited.
  // It is intentionally not freed.
  static IsaMap* supported_isas = new IsaMap();

  if (supported_isas->size() > 0) {
    return *supported_isas;
  }
  
  auto parse_out_minor_ver = [&](const std::string& generic_name) -> int32_t {
      size_t dot_pos = generic_name.find('.');
      int32_t min;
      if (dot_pos != std::string::npos) {
          std::string minor_version_str = generic_name.substr(dot_pos + 1);
          size_t dash_pos = minor_version_str.find('-');
          if (dash_pos != std::string::npos) {
              minor_version_str = minor_version_str.substr(0, dash_pos);
          }
          min = std::stoi(minor_version_str);
      } else {
          min = 0xFF;
      }
      return min;
  };

// FIXME: Use static_assert when C++17 used.
#define ISAREG_ENTRY_GEN(name, maj, min, stp, sramecc, xnack, wavefrontsize, gen_name) \
 {                                                                                     \
  assert(std::char_traits<char>::length(name) <= hsa_name_size);                       \
  std::string isa_name = prepend_isa_prefix(name);                                     \
  (*supported_isas)[isa_name].targetid_ = name;                                           \
  (*supported_isas)[isa_name].version_ = Isa::Version(maj, min, stp);                     \
  (*supported_isas)[isa_name].sramecc_ = sramecc;                                         \
  (*supported_isas)[isa_name].xnack_ = xnack;                                             \
  (*supported_isas)[isa_name].wavefront_.num_threads_ = wavefrontsize;                    \
  std::string genericname(gen_name);                                                   \
  if (genericname.size() != 0) {                                                       \
    std::string gen_isa_name = prepend_isa_prefix(genericname);                        \
    (*supported_isas)[isa_name].generic_ = gen_isa_name;                                  \
    if ((*supported_isas).find(gen_isa_name) == (*supported_isas).end()) {                   \
      (*supported_isas)[gen_isa_name].targetid_ = genericname;                            \
      (*supported_isas)[gen_isa_name].version_ = Isa::Version(maj, parse_out_minor_ver(genericname), 0xFF); \
      (*supported_isas)[gen_isa_name].sramecc_ = sramecc;                                \
      (*supported_isas)[gen_isa_name].xnack_ = xnack;                                    \
      (*supported_isas)[gen_isa_name].wavefront_.num_threads_ = wavefrontsize;           \
    }                                                                                \
  }                                                                                  \
 }

  const IsaFeature unsupported = IsaFeature::Unsupported;
  const IsaFeature any = IsaFeature::Any;
  const IsaFeature disabled = IsaFeature::Disabled;
  const IsaFeature enabled = IsaFeature::Enabled;

  //               Target ID                 Version   SRAMECC      XNACK
  ISAREG_ENTRY_GEN("gfx700",                 7, 0, 0,  unsupported, unsupported, 64, "")
  ISAREG_ENTRY_GEN("gfx701",                 7, 0, 1,  unsupported, unsupported, 64, "")
  ISAREG_ENTRY_GEN("gfx702",                 7, 0, 2,  unsupported, unsupported, 64, "")
  ISAREG_ENTRY_GEN("gfx801",                 8, 0, 1,  unsupported, any,         64, "")
  ISAREG_ENTRY_GEN("gfx801:xnack-",          8, 0, 1,  unsupported, disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx801:xnack+",          8, 0, 1,  unsupported, enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx802",                 8, 0, 2,  unsupported, unsupported, 64, "")
  ISAREG_ENTRY_GEN("gfx803",                 8, 0, 3,  unsupported, unsupported, 64, "")
  ISAREG_ENTRY_GEN("gfx805",                 8, 0, 5,  unsupported, unsupported, 64, "")
  ISAREG_ENTRY_GEN("gfx810",                 8, 1, 0,  unsupported, any,         64, "")
  ISAREG_ENTRY_GEN("gfx810:xnack-",          8, 1, 0,  unsupported, disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx810:xnack+",          8, 1, 0,  unsupported, enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx900",                 9, 0, 0,  unsupported, any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx900:xnack-",          9, 0, 0,  unsupported, disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx900:xnack+",          9, 0, 0,  unsupported, enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx902",                 9, 0, 2,  unsupported, any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx902:xnack-",          9, 0, 2,  unsupported, disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx902:xnack+",          9, 0, 2,  unsupported, enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx904",                 9, 0, 4,  unsupported, any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx904:xnack-",          9, 0, 4,  unsupported, disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx904:xnack+",          9, 0, 4,  unsupported, enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx906",                 9, 0, 6,  any,         any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx906:xnack-",          9, 0, 6,  any,         disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx906:xnack+",          9, 0, 6,  any,         enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx906:sramecc-",        9, 0, 6,  disabled,    any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx906:sramecc+",        9, 0, 6,  enabled,     any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx906:sramecc-:xnack-", 9, 0, 6,  disabled,    disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx906:sramecc-:xnack+", 9, 0, 6,  disabled,    enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx906:sramecc+:xnack-", 9, 0, 6,  enabled,     disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx906:sramecc+:xnack+", 9, 0, 6,  enabled,     enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx908",                 9, 0, 8,  any,         any,         64, "")
  ISAREG_ENTRY_GEN("gfx908:xnack-",          9, 0, 8,  any,         disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx908:xnack+",          9, 0, 8,  any,         enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx908:sramecc-",        9, 0, 8,  disabled,    any,         64, "")
  ISAREG_ENTRY_GEN("gfx908:sramecc+",        9, 0, 8,  enabled,     any,         64, "")
  ISAREG_ENTRY_GEN("gfx908:sramecc-:xnack-", 9, 0, 8,  disabled,    disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx908:sramecc-:xnack+", 9, 0, 8,  disabled,    enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx908:sramecc+:xnack-", 9, 0, 8,  enabled,     disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx908:sramecc+:xnack+", 9, 0, 8,  enabled,     enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx909",                 9, 0, 9,  unsupported, any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx909:xnack-",          9, 0, 9,  unsupported, disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx909:xnack+",          9, 0, 9,  unsupported, enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx90a",                 9, 0, 10, any,         any,         64, "")
  ISAREG_ENTRY_GEN("gfx90a:xnack-",          9, 0, 10, any,         disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx90a:xnack+",          9, 0, 10, any,         enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx90a:sramecc-",        9, 0, 10, disabled,    any,         64, "")
  ISAREG_ENTRY_GEN("gfx90a:sramecc+",        9, 0, 10, enabled,     any,         64, "")
  ISAREG_ENTRY_GEN("gfx90a:sramecc-:xnack-", 9, 0, 10, disabled,    disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx90a:sramecc-:xnack+", 9, 0, 10, disabled,    enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx90a:sramecc+:xnack-", 9, 0, 10, enabled,     disabled,    64, "")
  ISAREG_ENTRY_GEN("gfx90a:sramecc+:xnack+", 9, 0, 10, enabled,     enabled,     64, "")
  ISAREG_ENTRY_GEN("gfx90c",                 9, 0, 12, unsupported, any,         64, "gfx9-generic")
  ISAREG_ENTRY_GEN("gfx90c:xnack-",          9, 0, 12, unsupported, disabled,    64, "gfx9-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx90c:xnack+",          9, 0, 12, unsupported, enabled,     64, "gfx9-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx942",                 9, 4, 2,  any,         any,         64, "gfx9-4-generic")
  ISAREG_ENTRY_GEN("gfx942:xnack-",          9, 4, 2,  any,         disabled,    64, "gfx9-4-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx942:xnack+",          9, 4, 2,  any,         enabled,     64, "gfx9-4-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx942:sramecc-",        9, 4, 2,  disabled,    any,         64, "gfx9-4-generic:sramecc-")
  ISAREG_ENTRY_GEN("gfx942:sramecc+",        9, 4, 2,  enabled,     any,         64, "gfx9-4-generic:sramecc+")
  ISAREG_ENTRY_GEN("gfx942:sramecc-:xnack-", 9, 4, 2,  disabled,    disabled,    64, "gfx9-4-generic:sramecc-:xnack-")
  ISAREG_ENTRY_GEN("gfx942:sramecc-:xnack+", 9, 4, 2,  disabled,    enabled,     64, "gfx9-4-generic:sramecc-:xnack+")
  ISAREG_ENTRY_GEN("gfx942:sramecc+:xnack-", 9, 4, 2,  enabled,     disabled,    64, "gfx9-4-generic:sramecc+:xnack-")
  ISAREG_ENTRY_GEN("gfx942:sramecc+:xnack+", 9, 4, 2,  enabled,     enabled,     64, "gfx9-4-generic:sramecc+:xnack+")
  ISAREG_ENTRY_GEN("gfx950",                 9, 5, 0,  any,         any,         64, "gfx9-4-generic")
  ISAREG_ENTRY_GEN("gfx950:xnack-",          9, 5, 0,  any,         disabled,    64, "gfx9-4-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx950:xnack+",          9, 5, 0,  any,         enabled,     64, "gfx9-4-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx950:sramecc-",        9, 5, 0,  disabled,    any,         64, "gfx9-4-generic:sramecc-")
  ISAREG_ENTRY_GEN("gfx950:sramecc+",        9, 5, 0,  enabled,     any,         64, "gfx9-4-generic:sramecc+")
  ISAREG_ENTRY_GEN("gfx950:sramecc-:xnack-", 9, 5, 0,  disabled,    disabled,    64, "gfx9-4-generic:sramecc-:xnack-")
  ISAREG_ENTRY_GEN("gfx950:sramecc-:xnack+", 9, 5, 0,  disabled,    enabled,     64, "gfx9-4-generic:sramecc-:xnack+")
  ISAREG_ENTRY_GEN("gfx950:sramecc+:xnack-", 9, 5, 0,  enabled,     disabled,    64, "gfx9-4-generic:sramecc+:xnack-")
  ISAREG_ENTRY_GEN("gfx950:sramecc+:xnack+", 9, 5, 0,  enabled,     enabled,     64, "gfx9-4-generic:sramecc+:xnack+")
  ISAREG_ENTRY_GEN("gfx1010",                10, 1, 0, unsupported, any,         32, "gfx10-1-generic")
  ISAREG_ENTRY_GEN("gfx1010:xnack-",         10, 1, 0, unsupported, disabled,    32, "gfx10-1-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx1010:xnack+",         10, 1, 0, unsupported, enabled,     32, "gfx10-1-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx1011",                10, 1, 1, unsupported, any,         32, "gfx10-1-generic")
  ISAREG_ENTRY_GEN("gfx1011:xnack-",         10, 1, 1, unsupported, disabled,    32, "gfx10-1-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx1011:xnack+",         10, 1, 1, unsupported, enabled,     32, "gfx10-1-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx1012",                10, 1, 2, unsupported, any,         32, "gfx10-1-generic")
  ISAREG_ENTRY_GEN("gfx1012:xnack-",         10, 1, 2, unsupported, disabled,    32, "gfx10-1-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx1012:xnack+",         10, 1, 2, unsupported, enabled,     32, "gfx10-1-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx1013",                10, 1, 3, unsupported, any,         32, "gfx10-1-generic")
  ISAREG_ENTRY_GEN("gfx1013:xnack-",         10, 1, 3, unsupported, disabled,    32, "gfx10-1-generic:xnack-")
  ISAREG_ENTRY_GEN("gfx1013:xnack+",         10, 1, 3, unsupported, enabled,     32, "gfx10-1-generic:xnack+")
  ISAREG_ENTRY_GEN("gfx1030",                10, 3, 0, unsupported, unsupported, 32, "gfx10-3-generic")
  ISAREG_ENTRY_GEN("gfx1031",                10, 3, 1, unsupported, unsupported, 32, "gfx10-3-generic")
  ISAREG_ENTRY_GEN("gfx1032",                10, 3, 2, unsupported, unsupported, 32, "gfx10-3-generic")
  ISAREG_ENTRY_GEN("gfx1033",                10, 3, 3, unsupported, unsupported, 32, "gfx10-3-generic")
  ISAREG_ENTRY_GEN("gfx1034",                10, 3, 4, unsupported, unsupported, 32, "gfx10-3-generic")
  ISAREG_ENTRY_GEN("gfx1035",                10, 3, 5, unsupported, unsupported, 32, "gfx10-3-generic")
  ISAREG_ENTRY_GEN("gfx1036",                10, 3, 6, unsupported, unsupported, 32, "gfx10-3-generic")
  ISAREG_ENTRY_GEN("gfx1100",                11, 0, 0, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1101",                11, 0, 1, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1102",                11, 0, 2, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1103",                11, 0, 3, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1150",                11, 5, 0, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1151",                11, 5, 1, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1152",                11, 5, 2, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1153",                11, 5, 3, unsupported, unsupported, 32, "gfx11-generic")
  ISAREG_ENTRY_GEN("gfx1200",                12, 0, 0, unsupported, unsupported, 32, "gfx12-generic")
  ISAREG_ENTRY_GEN("gfx1201",                12, 0, 1, unsupported, unsupported, 32, "gfx12-generic")
#undef ISAREG_ENTRY_GEN

  return *supported_isas;
}

} // namespace core
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/queue.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/queue.h"
#include "core/inc/runtime.h"

namespace rocr {
namespace core {

// HSA Queue ID - used to bind a unique ID
std::atomic<uint64_t> Queue::hsa_queue_counter_(0);

void Queue::DefaultErrorHandler(hsa_status_t status, hsa_queue_t* source, void* data) {
  if (core::Runtime::runtime_singleton_->flag().enable_queue_fault_message()) {
    const char* msg = "UNKNOWN ERROR";
    HSA::hsa_status_string(status, &msg);
    fprintf(stderr, "Queue at %p inactivated due to async error:\n\t%s\n", source, msg);
    abort();
  }
}

}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/runtime.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <algorithm>
#include <atomic>
#include <climits>
#include <cstring>
#include <regex>
#include <string>
#include <vector>
#include <list>
#include <link.h>
#include <dlfcn.h>
#include <amdgpu_drm.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <thread>

#include "core/inc/runtime.h"
#include "core/inc/hsa_table_interface.h"

#if defined(HSA_ROCPROFILER_REGISTER) && HSA_ROCPROFILER_REGISTER > 0
#include <rocprofiler-register/rocprofiler-register.h>
#endif

#include "core/common/shared.h"
#include "core/inc/amd_core_dump.hpp"
#include "core/inc/amd_cpu_agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/amd_memory_region.h"
#include "core/inc/amd_topology.h"
#include "core/inc/exceptions.h"
#include "core/inc/host_queue.h"
#include "core/inc/hsa_api_trace_int.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "core/inc/hsa_ext_interface.h"
#include "core/inc/interrupt_signal.h"
#include "core/inc/signal.h"
#include "core/util/memory.h"
#include "core/util/os.h"
#include "inc/hsa_ven_amd_aqlprofile.h"

#ifndef HSA_VERSION_MAJOR
#define HSA_VERSION_MAJOR 1
#endif
#ifndef HSA_VERSION_MINOR
#define HSA_VERSION_MINOR 1
#endif
#ifndef HSA_VERSION_PATCH
#define HSA_VERSION_PATCH 0
#endif

#if defined(HSA_ROCPROFILER_REGISTER) && HSA_ROCPROFILER_REGISTER > 0
#define ROCP_REG_VERSION                                                                           \
  ROCPROFILER_REGISTER_COMPUTE_VERSION_3(HSA_VERSION_MAJOR, HSA_VERSION_MINOR, HSA_VERSION_PATCH)

ROCPROFILER_REGISTER_DEFINE_IMPORT(hsa, ROCP_REG_VERSION)
#endif

const char rocrbuildid[] __attribute__((used)) = "ROCR BUILD ID: " STRING(ROCR_BUILD_ID);

extern r_debug _amdgpu_r_debug;

namespace rocr {
extern void _loader_debug_state();
namespace core {
bool g_use_interrupt_wait;
bool g_use_mwaitx;
Runtime* Runtime::runtime_singleton_ = NULL;

hsa_status_t Runtime::Acquire() {
  ScopedAcquire<KernelMutex> boot(&bootstrap_lock());

  if (runtime_singleton_ == NULL) {
    memset(log_flags, 0, sizeof(log_flags));
    runtime_singleton_ = new Runtime();
  }

  if (runtime_singleton_->ref_count_ == INT32_MAX) {
    return HSA_STATUS_ERROR_REFCOUNT_OVERFLOW;
  }

  runtime_singleton_->ref_count_++;
  MAKE_NAMED_SCOPE_GUARD(refGuard, [&]() { runtime_singleton_->ref_count_--; });

  if (runtime_singleton_->ref_count_ == 1) {
    hsa_status_t status = runtime_singleton_->Load();

    if (status != HSA_STATUS_SUCCESS) {
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
  }

  refGuard.Dismiss();
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::Release() {
  ScopedAcquire<KernelMutex> boot(&bootstrap_lock());

  if (runtime_singleton_ == nullptr) return HSA_STATUS_ERROR_NOT_INITIALIZED;

  if (runtime_singleton_->ref_count_ == 1) {
    // Release all registered memory, then unload backends
    runtime_singleton_->Unload();
  }

  runtime_singleton_->ref_count_--;

  if (runtime_singleton_->ref_count_ == 0) {
    delete runtime_singleton_;
    runtime_singleton_ = nullptr;
  }

  return HSA_STATUS_SUCCESS;
}

bool Runtime::IsOpen() {
  return (Runtime::runtime_singleton_ != NULL) &&
         (Runtime::runtime_singleton_->ref_count_ != 0);
}

// Register agent information only.  Must not call anything that may use the registered information
// since those tables are incomplete.
void Runtime::RegisterAgent(Agent* agent, bool Enabled) {
  // Record the agent in the node-to-agent reverse lookup table.
  agents_by_node_[agent->node_id()].push_back(agent);

  // Process agent as a CPU, GPU, or AIE device.
  if (agent->device_type() == Agent::DeviceType::kAmdCpuDevice) {
    cpu_agents_.push_back(agent);

    agents_by_gpuid_[0] = agent;

    // Add cpu regions to the system region list.
    for (const core::MemoryRegion* region : agent->regions()) {
      if (region->fine_grain()) {
        system_regions_fine_.push_back(region);
      } else {
        system_regions_coarse_.push_back(region);
      }
    }

    assert(system_regions_fine_.size() > 0);

    // Init default fine grain system region allocator using fine grain
    // system region of the first discovered CPU agent.
    if (cpu_agents_.size() == 1) {
      // Might need memory pooling to cover allocation that
      // requires less than 4096 bytes.

      // Default system pool must support kernarg
      for (auto pool : system_regions_fine_) {
        if (pool->kernarg()) {
          system_allocator_ = [pool](size_t size, size_t alignment,
                                     MemoryRegion::AllocateFlags alloc_flags, int agent_node_id) -> void* {
            assert(alignment <= 4096);
            void* ptr = NULL;
            return (HSA_STATUS_SUCCESS ==
                    core::Runtime::runtime_singleton_->AllocateMemory(pool, size, alloc_flags,
                                                                      &ptr, agent_node_id))
                ? ptr
                : NULL;
          };

          system_deallocator_ = [](void* ptr) {
            core::Runtime::runtime_singleton_->FreeMemory(ptr);
          };

          BaseShared::SetAllocateAndFree(system_allocator_, system_deallocator_);
          break;
        }
      }
    }
  } else if (agent->device_type() == Agent::DeviceType::kAmdGpuDevice) {
    if (Enabled) {
      gpu_agents_.push_back(agent);
      gpu_ids_.push_back(agent->node_id());
      agents_by_gpuid_[((AMD::GpuAgent*)agent)->KfdGpuID()] = agent;

      // Assign the first discovered gpu agent as region gpu.
      if (region_gpu_ == NULL) region_gpu_ = agent;
    } else {
      disabled_gpu_agents_.push_back(agent);
    }
  } else if (agent->device_type() == Agent::DeviceType::kAmdAieDevice) {
    aie_agents_.push_back(agent);
  }
}

// Register driver.
void Runtime::RegisterDriver(std::unique_ptr<Driver> driver) {
  agent_drivers_.push_back(std::move(driver));
}

void Runtime::DestroyAgents() {
  agents_by_node_.clear();

  std::for_each(gpu_agents_.begin(), gpu_agents_.end(), DeleteObject());
  gpu_agents_.clear();

  std::for_each(disabled_gpu_agents_.begin(), disabled_gpu_agents_.end(), DeleteObject());
  disabled_gpu_agents_.clear();

  gpu_ids_.clear();

  std::for_each(cpu_agents_.begin(), cpu_agents_.end(), DeleteObject());
  cpu_agents_.clear();

  std::for_each(aie_agents_.begin(), aie_agents_.end(), DeleteObject());
  aie_agents_.clear();

  region_gpu_ = NULL;

  system_regions_fine_.clear();
  system_regions_coarse_.clear();
}

void Runtime::DestroyDrivers() {
  agent_drivers_.clear();
}

void Runtime::SetLinkCount(size_t num_nodes) {
  num_nodes_ = num_nodes;
  link_matrix_.resize(num_nodes * num_nodes);
}

void Runtime::RegisterLinkInfo(uint32_t node_id_from, uint32_t node_id_to,
                               uint32_t num_hop, uint32_t rec_sdma_eng_id_mask,
                               hsa_amd_memory_pool_link_info_t& link_info) {
  const uint32_t idx = GetIndexLinkInfo(node_id_from, node_id_to);
  link_matrix_[idx].num_hop = num_hop;
  link_matrix_[idx].rec_sdma_eng_id_mask = rec_sdma_eng_id_mask;
  link_matrix_[idx].info = link_info;

  // Limit the number of hop to 1 since the runtime does not have enough
  // information to share to the user about each hop.
  link_matrix_[idx].num_hop = std::min(link_matrix_[idx].num_hop , 1U);
}

const Runtime::LinkInfo Runtime::GetLinkInfo(uint32_t node_id_from,
                                             uint32_t node_id_to) {
  return (node_id_from != node_id_to)
             ? link_matrix_[GetIndexLinkInfo(node_id_from, node_id_to)]
             : LinkInfo();  // No link.
}

uint32_t Runtime::GetIndexLinkInfo(uint32_t node_id_from, uint32_t node_id_to) {
  return ((node_id_from * num_nodes_) + node_id_to);
}

hsa_status_t Runtime::IterateAgent(hsa_status_t (*callback)(hsa_agent_t agent,
                                                            void* data),
                                   void* data) {
  AMD::callback_t<decltype(callback)> call(callback);

  std::vector<core::Agent *> *agent_lists[3] = {&cpu_agents_, &gpu_agents_,
                                                &aie_agents_};
  for (std::vector<core::Agent*>* agent_list : agent_lists) {
    for (size_t i = 0; i < agent_list->size(); ++i) {
      hsa_agent_t agent = Agent::Convert(agent_list->at(i));
      hsa_status_t status = call(agent, data);

      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::AllocateMemory(const MemoryRegion* region, size_t size,
                                     MemoryRegion::AllocateFlags alloc_flags,
                                     void** address, int agent_node_id) {
  size_t size_requested = size;  // region->Allocate(...) may align-up size to granularity
  hsa_status_t status = region->Allocate(size, alloc_flags, address, agent_node_id);
  // Track the allocation result so that it could be freed properly.
  if (status == HSA_STATUS_SUCCESS) {
    ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
    allocation_map_[*address] = AllocationRegion(region, size, size_requested, alloc_flags);
  }

  return status;
}

hsa_status_t Runtime::FreeMemory(void* ptr) {
  if (ptr == nullptr) {
    return HSA_STATUS_SUCCESS;
  }

  const MemoryRegion* region = nullptr;
  size_t size = 0;
  std::unique_ptr<std::vector<AllocationRegion::notifier_t>> notifiers;
  MemoryRegion::AllocateFlags alloc_flags = core::MemoryRegion::AllocateNoFlags;

  {
    ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);

    std::map<const void*, AllocationRegion>::iterator it = allocation_map_.find(ptr);

    if (it == allocation_map_.end()) {
      debug_warning(false && "Can't find address in allocation map");
      return HSA_STATUS_ERROR_INVALID_ALLOCATION;
    }
    region = it->second.region;
    size = it->second.size;
    alloc_flags = it->second.alloc_flags;

    // Imported fragments can't be released with FreeMemory.
    if (region == nullptr) {
      assert(false && "Can't release imported memory with free.");
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }

    notifiers = std::move(it->second.notifiers);

    allocation_map_.erase(it);
  }

  // Notifiers can't run while holding the lock or the callback won't be able to manage memory.
  // The memory triggering the notification has already been removed from the memory map so can't
  // be double released during the callback.
  if (notifiers) {
    for (auto& notifier : *notifiers) {
      notifier.callback(notifier.ptr, notifier.user_data);
    }
  }

  if (alloc_flags & core::MemoryRegion::AllocateAsan)
    assert(region->owner()->driver().ReturnAsanHeaderPage(ptr) == HSA_STATUS_SUCCESS);

  const hsa_status_t err = region->Free(ptr, size);
  if (err != HSA_STATUS_SUCCESS) {
    // hsaKmtFreeMemory failed to free this pointer. Throw a memory error event

    // Note: This should be treated as a fatal exception by the System Event Handler because:
    //  - This leaves allocation_map_ in an inconsistent state as this pointer entry has already
    //  been removed.
    //  - We already called back the notifier, but did not actually free.
    //  - We removed the ASAN Header but did not actually free.
    //
    // But this is a very unlikely use case and calling region->Free(..) before updating
    // allocation_map_ would require us to hold the memory_lock_ for much longer and we would not be
    // able to call hsaKmtReturnAsanHeaderPage after calling region->Free(..)

    const core::Agent* agentOwner = region->owner();
    hsa_status_t custom_handler_status = HSA_STATUS_ERROR;
    auto system_event_handlers = runtime_singleton_->GetSystemEventHandlers();

    if (!system_event_handlers.empty()) {
      hsa_amd_event_t memory_error_event;
      memory_error_event.event_type = HSA_AMD_GPU_MEMORY_ERROR_EVENT;
      hsa_amd_gpu_memory_error_info_t& error_info = memory_error_event.memory_error;

      error_info.virtual_address = reinterpret_cast<const uint64_t>(ptr);
      error_info.error_reason_mask = HSA_AMD_MEMORY_ERROR_MEMORY_IN_USE;
      error_info.agent = Agent::Convert(agentOwner);

      for (auto& callback : system_event_handlers) {
        hsa_status_t err = callback.first(&memory_error_event, callback.second);
        if (err == HSA_STATUS_SUCCESS) custom_handler_status = HSA_STATUS_SUCCESS;
      }
    }
    // No custom VM fault handler registered or it failed.
    if (custom_handler_status != HSA_STATUS_SUCCESS) {
      fprintf(stderr,
              "Memory critical error by agent node-%u (Agent handle: %p) on address %p. Reason: "
              "Memory in use. \n",
              agentOwner->node_id(), reinterpret_cast<void*>(agentOwner->public_handle().handle),
              ptr);

      assert(false && "GPU memory error.");
      std::abort();
    }
    return HSA_STATUS_ERROR;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::RegisterReleaseNotifier(void* ptr, hsa_amd_deallocation_callback_t callback,
                                              void* user_data) {
  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  auto mem = allocation_map_.upper_bound(ptr);
  if (mem != allocation_map_.begin()) {
    mem--;

    // No support for imported fragments yet.
    if (mem->second.region == nullptr) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

    if ((mem->first <= ptr) &&
        (ptr < reinterpret_cast<const uint8_t*>(mem->first) + mem->second.size)) {
      auto& notifiers = mem->second.notifiers;
      if (!notifiers) notifiers.reset(new std::vector<AllocationRegion::notifier_t>);
      AllocationRegion::notifier_t notifier = {
          ptr, AMD::callback_t<hsa_amd_deallocation_callback_t>(callback), user_data};
      notifiers->push_back(notifier);
      return HSA_STATUS_SUCCESS;
    }
  }
  return HSA_STATUS_ERROR_INVALID_ALLOCATION;
}

hsa_status_t Runtime::DeregisterReleaseNotifier(void* ptr,
                                                hsa_amd_deallocation_callback_t callback) {
  hsa_status_t ret = HSA_STATUS_ERROR_INVALID_ARGUMENT;
  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  auto mem = allocation_map_.upper_bound(ptr);
  if (mem != allocation_map_.begin()) {
    mem--;
    if ((mem->first <= ptr) &&
        (ptr < reinterpret_cast<const uint8_t*>(mem->first) + mem->second.size)) {
      auto& notifiers = mem->second.notifiers;
      if (!notifiers) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      for (size_t i = 0; i < notifiers->size(); i++) {
        if (((*notifiers)[i].ptr == ptr) && ((*notifiers)[i].callback) == callback) {
          (*notifiers)[i] = std::move((*notifiers)[notifiers->size() - 1]);
          notifiers->pop_back();
          i--;
          ret = HSA_STATUS_SUCCESS;
        }
      }
    }
  }
  return ret;
}

hsa_status_t Runtime::CopyMemory(void* dst, const void* src, size_t size) {
  void* source = const_cast<void*>(src);

  // Choose agents from pointer info
  bool is_src_system = false;
  bool is_dst_system = false;
  core::Agent* src_agent;
  core::Agent* dst_agent;

  // Fetch ownership
  const auto& is_system_mem = [&](void* ptr, core::Agent*& agent, bool& need_lock) {
    hsa_amd_pointer_info_t info = {};
    uint32_t count = 0;
    hsa_agent_t* accessible = nullptr;
    MAKE_SCOPE_GUARD([&]() { free(accessible); });
    info.size = sizeof(info);
    hsa_status_t err = PtrInfo(ptr, &info, malloc, &count, &accessible);
    if (err != HSA_STATUS_SUCCESS)
      throw AMD::hsa_exception(err, "PtrInfo failed in hsa_memory_copy.");
    ptrdiff_t endPtr = (ptrdiff_t)ptr + size;
    if (info.agentBaseAddress <= ptr &&
        endPtr <= (ptrdiff_t)info.agentBaseAddress + info.sizeInBytes) {
      if (info.agentOwner.handle == 0) info.agentOwner = accessible[0];
      agent = core::Agent::Convert(info.agentOwner);
      need_lock = false;
      return agent->device_type() != core::Agent::DeviceType::kAmdGpuDevice;
    } else {
      need_lock = true;
      agent = cpu_agents_[0];
      return true;
    }
  };

  bool src_lock, dst_lock;
  is_src_system = is_system_mem(source, src_agent, src_lock);
  is_dst_system = is_system_mem(dst, dst_agent, dst_lock);

  // CPU-CPU
  if (is_src_system && is_dst_system) {
    memcpy(dst, source, size);
    return HSA_STATUS_SUCCESS;
  }

  // Same GPU
  if (src_agent->node_id() == dst_agent->node_id()) return dst_agent->DmaCopy(dst, source, size);

  // GPU-CPU
  // Must ensure that system memory is visible to the GPU during the copy.
  const AMD::MemoryRegion* system_region =
      static_cast<const AMD::MemoryRegion*>(system_regions_fine_[0]);

  void* gpuPtr = nullptr;
  const auto& locked_copy = [&](void*& ptr, core::Agent* locking_agent) {
    void* tmp;
    hsa_agent_t agent = locking_agent->public_handle();
    hsa_status_t err = system_region->Lock(1, &agent, ptr, size, &tmp);
    if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "Lock failed in hsa_memory_copy.");
    gpuPtr = ptr;
    ptr = tmp;
  };

  MAKE_SCOPE_GUARD([&]() {
    if (gpuPtr != nullptr) system_region->Unlock(gpuPtr);
  });

  if (src_lock) locked_copy(source, dst_agent);
  if (dst_lock) locked_copy(dst, src_agent);
  if (is_src_system) return dst_agent->DmaCopy(dst, source, size);
  if (is_dst_system) return src_agent->DmaCopy(dst, source, size);

  /*
  GPU-GPU - functional support, not a performance path.

  This goes through system memory because we have to support copying between non-peer GPUs
  and we can't use P2P pointers even if the GPUs are peers.  Because hsa_amd_agents_allow_access
  requires the caller to specify all allowed agents we can't assume that a peer mapped pointer
  would remain mapped for the duration of the copy.
  */
  void* temp = system_allocator_(size, 0, core::MemoryRegion::AllocateNoFlags, 0);
  MAKE_SCOPE_GUARD([&]() { system_deallocator_(temp); });
  hsa_status_t err = src_agent->DmaCopy(temp, source, size);
  if (err == HSA_STATUS_SUCCESS) err = dst_agent->DmaCopy(dst, temp, size);
  return err;
}

hsa_status_t Runtime::CopyMemory(void* dst, core::Agent* dst_agent, const void* src,
                                 core::Agent* src_agent, size_t size,
                                 std::vector<core::Signal*>& dep_signals,
                                 core::Signal& completion_signal) {
  auto lookupAgent = [this](core::Agent* agent, const void* ptr) {
    hsa_amd_pointer_info_t info = {};
    PtrInfoBlockData block = {};
    info.size = sizeof(info);
    hsa_status_t err = PtrInfo(ptr, &info, nullptr, nullptr, nullptr, &block);
    if (err != HSA_STATUS_SUCCESS)
      throw AMD::hsa_exception(err, "PtrInfo failed in hsa_memory_copy.");
    // Limit to IPC and GFX types for now.  These are the only types for which the application may
    // not posess a proper agent handle.
    if ((info.type != HSA_EXT_POINTER_TYPE_IPC) && (info.type != HSA_EXT_POINTER_TYPE_GRAPHICS)) {
      return agent;
    }
    return block.agentOwner;
  };

  const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
  core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;

  // Lookup owning agent if blit kernel is selected or if flag override is set.
  if ((dst_agent == src_agent) || flag().discover_copy_agents()) {
    dst_agent = lookupAgent(dst_agent, dst);
    src_agent = lookupAgent(src_agent, src);
  }
  return copy_agent->DmaCopy(dst, *dst_agent, src, *src_agent, size, dep_signals,
                             completion_signal);
}

hsa_status_t Runtime::CopyMemoryOnEngine(void* dst, core::Agent* dst_agent, const void* src,
                                 core::Agent* src_agent, size_t size,
                                 std::vector<core::Signal*>& dep_signals,
                                 core::Signal& completion_signal,
                                 hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) {
  const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
  core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;

  // engine_id is single bitset unique.
  int engine_offset = ffs(engine_id);
  if (!engine_id || !!((engine_id >> engine_offset))) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return copy_agent->DmaCopyOnEngine(dst, *dst_agent, src, *src_agent, size, dep_signals,
                             completion_signal, engine_offset, force_copy_on_sdma);
}

hsa_status_t Runtime::CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_agent,
                                       uint32_t *engine_ids_mask) {
  const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
  core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;

  if (dst_agent == src_agent) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  return copy_agent->DmaCopyStatus(*dst_agent, *src_agent, engine_ids_mask);
}

hsa_status_t Runtime::GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent,
                                         uint32_t* recommended_ids_mask) {
  const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
  core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;

  if (dst_agent == src_agent) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  return copy_agent->DmaPreferredEngine(*dst_agent, *src_agent, recommended_ids_mask);
}

hsa_status_t Runtime::FillMemory(void* ptr, uint32_t value, size_t count) {
  // Choose blit agent from pointer info
  hsa_amd_pointer_info_t info = {};
  uint32_t agent_count = 0;
  hsa_agent_t* accessible = nullptr;
  info.size = sizeof(info);
  MAKE_SCOPE_GUARD([&]() { free(accessible); });
  hsa_status_t err = PtrInfo(ptr, &info, malloc, &agent_count, &accessible);
  if (err != HSA_STATUS_SUCCESS) return err;

  ptrdiff_t endPtr = (ptrdiff_t)ptr + count * sizeof(uint32_t);

  // Check for GPU fill
  // Selects GPU fill for SVM and Locked allocations if a GPU address is given and is mapped.
  if (info.agentBaseAddress <= ptr &&
      endPtr <= (ptrdiff_t)info.agentBaseAddress + info.sizeInBytes) {
    core::Agent* blit_agent = core::Agent::Convert(info.agentOwner);
    if (blit_agent->device_type() != core::Agent::DeviceType::kAmdGpuDevice) {
      blit_agent = nullptr;
      for (uint32_t i = 0; i < agent_count; i++) {
        if (core::Agent::Convert(accessible[i])->device_type() ==
            core::Agent::DeviceType::kAmdGpuDevice) {
          blit_agent = core::Agent::Convert(accessible[i]);
          break;
        }
      }
    }
    if (blit_agent) return blit_agent->DmaFill(ptr, value, count);
  }

  // Host and unmapped SVM addresses copy via host.
  if (info.hostBaseAddress <= ptr && endPtr <= (ptrdiff_t)info.hostBaseAddress + info.sizeInBytes) {
    memset(ptr, value, count * sizeof(uint32_t));
    return HSA_STATUS_SUCCESS;
  }

  return HSA_STATUS_ERROR_INVALID_ALLOCATION;
}

hsa_status_t Runtime::AllowAccess(uint32_t num_agents,
                                  const hsa_agent_t* agents, const void* ptr) {
  const AMD::MemoryRegion* amd_region = NULL;
  size_t alloc_size = 0;

  {
    ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);

    std::map<const void*, AllocationRegion>::const_iterator it = allocation_map_.find(ptr);

    if (it == allocation_map_.end()) {
      /* See if this address was mapped via VMM */
      return VMemoryMapAllowAccess(ptr, HSA_ACCESS_PERMISSION_RW, agents,
                                   num_agents);
    }

    amd_region = reinterpret_cast<const AMD::MemoryRegion*>(it->second.region);

    // Imported IPC handle entries inside allocation_map_ do not have an amd_region because they
    // were allocated in the other process. Access is already granted during IPCAttach().
    if (!amd_region)
      return HSA_STATUS_SUCCESS;

    alloc_size = it->second.size;
  }

  return amd_region->AllowAccess(num_agents, agents, ptr, alloc_size);
}

hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
  switch (attribute) {
    case HSA_SYSTEM_INFO_VERSION_MAJOR:
      *((uint16_t*)value) = HSA_VERSION_MAJOR;
      break;
    case HSA_SYSTEM_INFO_VERSION_MINOR:
      *((uint16_t*)value) = HSA_VERSION_MINOR;
      break;
    case HSA_SYSTEM_INFO_TIMESTAMP: {
      *((uint64_t*)value) = os::ReadSystemClock();
      break;
    }
    case HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY: {
      assert(sys_clock_freq_ != 0 &&
             "Use of HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY before HSA "
             "initialization completes.");
      *(uint64_t*)value = sys_clock_freq_;
      break;
    }
    case HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT:
      *((uint64_t*)value) = 0xFFFFFFFFFFFFFFFF;
      break;
    case HSA_SYSTEM_INFO_ENDIANNESS:
#if defined(HSA_LITTLE_ENDIAN)
      *((hsa_endianness_t*)value) = HSA_ENDIANNESS_LITTLE;
#else
      *((hsa_endianness_t*)value) = HSA_ENDIANNESS_BIG;
#endif
      break;
    case HSA_SYSTEM_INFO_MACHINE_MODEL:
#if defined(HSA_LARGE_MODEL)
      *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_LARGE;
#else
      *((hsa_machine_model_t*)value) = HSA_MACHINE_MODEL_SMALL;
#endif
      break;
    case HSA_SYSTEM_INFO_EXTENSIONS: {
      memset(value, 0, sizeof(uint8_t) * 128);

      auto setFlag = [&](uint32_t bit) {
        assert(bit < 128 * 8 && "Extension value exceeds extension bitmask");
        uint index = bit / 8;
        uint subBit = bit % 8;
        ((uint8_t*)value)[index] |= 1 << subBit;
      };

      if (hsa_internal_api_table().finalizer_api.hsa_ext_program_finalize_fn != NULL) {
        setFlag(HSA_EXTENSION_FINALIZER);
      }

      if (hsa_internal_api_table().image_api.hsa_ext_image_create_fn != NULL) {
        setFlag(HSA_EXTENSION_IMAGES);
      }

      if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
        os::CloseLib(lib);
        setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
      }

      setFlag(HSA_EXTENSION_AMD_PROFILER);

      break;
    }
    case HSA_AMD_SYSTEM_INFO_BUILD_VERSION: {
      *(const char**)value = STRING(ROCR_BUILD_ID);
      break;
    }
    case HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: {
      bool ret = true;
      for (auto agent : gpu_agents_) {
        AMD::GpuAgent* gpu = (AMD::GpuAgent*)agent;
        ret &= (gpu->properties().Capability.ui32.SVMAPISupported == 1);
      }
      *(bool*)value = ret;
      break;
    }
    case HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: {
      bool ret = true;
      for(auto agent : gpu_agents_)
        ret &= (agent->supported_isas()[0]->GetXnack() == IsaFeature::Enabled);
      *(bool*)value = ret;
      break;
    }
    case HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED: {
      *((bool*)value) = g_use_mwaitx;
      break;
    }
    case HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED: {
      auto kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version;

      // Implemented in KFD in 1.12
      if (kfd_version.KernelInterfaceMajorVersion > 1 ||
          (kfd_version.KernelInterfaceMajorVersion == 1 &&
              kfd_version.KernelInterfaceMinorVersion >= 12))
        *(reinterpret_cast<bool*>(value)) = true;
      else
        *(reinterpret_cast<bool*>(value)) = false;
      break;
    }
    case HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED: {
      *((bool*)value) = core::Runtime::runtime_singleton_->VirtualMemApiSupported();
      break;
    }
    case HSA_AMD_SYSTEM_INFO_XNACK_ENABLED: {
      *((bool*)value) = core::Runtime::runtime_singleton_->XnackEnabled();
      break;
    }
    case HSA_AMD_SYSTEM_INFO_EXT_VERSION_MAJOR: {
      *((uint16_t*)value) = HSA_AMD_INTERFACE_VERSION_MAJOR;
      break;
    }
    case HSA_AMD_SYSTEM_INFO_EXT_VERSION_MINOR: {
      *((uint16_t*)value) = HSA_AMD_INTERFACE_VERSION_MINOR;
      break;
    }
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::SetAsyncSignalHandler(hsa_signal_t signal,
                                            hsa_signal_condition_t cond,
                                            hsa_signal_value_t value,
                                            hsa_amd_signal_handler handler,
                                            void* arg) {

  struct AsyncEventsInfo* asyncInfo = &asyncSignals_;
  int priority = runtime_singleton_->flag().async_events_thread_priority();

  if (signal.handle != 0) {
    // Indicate that this signal is in use.
    hsa_signal_handle(signal)->Retain();

    core::Signal* coreSignal = core::Signal::Convert(signal);
    if (coreSignal->EopEvent() && coreSignal->EopEvent()->EventData.EventType != HSA_EVENTTYPE_SIGNAL) {
      priority = os::OS_THREAD_PRIORITY_DEFAULT;
      asyncInfo = &asyncExceptions_;
    }
  }

  ScopedAcquire<HybridMutex> scope_lock(&asyncInfo->control.lock);

  // Lazy initializer
  if (asyncInfo->control.async_events_thread_ == NULL) {
    // Create monitoring thread control signal
    auto err = HSA::hsa_signal_create(0, 0, NULL, &asyncInfo->control.wake);
    if (err != HSA_STATUS_SUCCESS) {
      assert(false && "Asyncronous events control signal creation error.");
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
    asyncInfo->events.PushBack(asyncInfo->control.wake, HSA_SIGNAL_CONDITION_NE,
                          0, NULL, NULL);

    // Start event monitoring thread
    asyncInfo->control.exit = false;
    asyncInfo->control.async_events_thread_ =
        os::CreateThread(AsyncEventsLoop, asyncInfo, 0, priority);
    if (asyncInfo->control.async_events_thread_ == NULL) {
      assert(false && "Asyncronous events thread creation error.");
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
  }

  asyncInfo->new_events.PushBack(signal, cond, value, handler, arg);

  hsa_signal_handle(asyncInfo->control.wake)->StoreRelease(1);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::InteropMap(uint32_t num_agents, Agent** agents,
                                 int interop_handle, uint32_t flags,
                                 size_t* size, void** ptr,
                                 size_t* metadata_size, const void** metadata) {
  static const int tinyArraySize=8;
  HsaGraphicsResourceInfo info;

  HSAuint32 short_nodes[tinyArraySize];
  HSAuint32* nodes = short_nodes;
  if (num_agents > tinyArraySize) {
    nodes = new HSAuint32[num_agents];
    if (nodes == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  MAKE_SCOPE_GUARD([&]() {
    if (num_agents > tinyArraySize) delete[] nodes;
  });

  for (uint32_t i = 0; i < num_agents; i++)
    agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID,
                       &nodes[i]);

  if (HSAKMT_CALL(hsaKmtRegisterGraphicsHandleToNodes(interop_handle, &info, num_agents,
                                          nodes)) != HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR;

  HSAuint64 altAddress;
  HsaMemMapFlags map_flags;
  map_flags.Value = 0;
  map_flags.ui32.PageSize = HSA_PAGE_SIZE_64KB;
  if (HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes(info.MemoryAddress, info.SizeInBytes,
                                &altAddress, map_flags, num_agents,
                                nodes)) != HSAKMT_STATUS_SUCCESS) {
    map_flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
    if (HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes(info.MemoryAddress, info.SizeInBytes, &altAddress, map_flags,
                                  num_agents, nodes)) != HSAKMT_STATUS_SUCCESS) {
      HSAKMT_CALL(hsaKmtDeregisterMemory(info.MemoryAddress));
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    }
  }

  if (metadata_size != NULL) *metadata_size = info.MetadataSizeInBytes;
  if (metadata != NULL) *metadata = info.Metadata;

  *size = info.SizeInBytes;
  *ptr = info.MemoryAddress;

  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  allocation_map_[info.MemoryAddress] = AllocationRegion(
      nullptr, info.SizeInBytes, info.SizeInBytes, core::MemoryRegion::AllocateNoFlags);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::InteropUnmap(void* ptr) {
  if(HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(ptr))!=HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  if(HSAKMT_CALL(hsaKmtDeregisterMemory(ptr))!=HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::PtrInfo(const void* ptr, hsa_amd_pointer_info_t* info, void* (*alloc)(size_t),
                              uint32_t* num_agents_accessible, hsa_agent_t** accessible,
                              PtrInfoBlockData* block_info) {
  static_assert(static_cast<int>(HSA_POINTER_UNKNOWN) == static_cast<int>(HSA_EXT_POINTER_TYPE_UNKNOWN),
                "Thunk pointer info mismatch");
  static_assert(static_cast<int>(HSA_POINTER_ALLOCATED) == static_cast<int>(HSA_EXT_POINTER_TYPE_HSA),
                "Thunk pointer info mismatch");
  static_assert(static_cast<int>(HSA_POINTER_REGISTERED_USER) == static_cast<int>(HSA_EXT_POINTER_TYPE_LOCKED),
                "Thunk pointer info mismatch");
  static_assert(static_cast<int>(HSA_POINTER_REGISTERED_GRAPHICS) == static_cast<int>(HSA_EXT_POINTER_TYPE_GRAPHICS),
                "Thunk pointer info mismatch");

  HsaPointerInfo thunkInfo;
  uint32_t* mappedNodes;

  hsa_amd_pointer_info_t retInfo = {0};

  // check output struct has an initialized size.
  if (info->size == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  retInfo.size = Min(size_t(info->size), sizeof(hsa_amd_pointer_info_t));

  bool returnListData =
      ((alloc != nullptr) && (num_agents_accessible != nullptr) && (accessible != nullptr));

  bool allocation_map_entry_found = false;

  {  // memory_lock protects access to the NMappedNodes array and fragment user data since these may
     // change with calls to memory APIs.
    ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);

    // We don't care if this returns an error code.
    // The type will be HSA_EXT_POINTER_TYPE_UNKNOWN if so.
    auto err = HSAKMT_CALL(hsaKmtQueryPointerInfo(ptr, &thunkInfo));
    if (err != HSAKMT_STATUS_SUCCESS || thunkInfo.Type == HSA_POINTER_UNKNOWN) {
      retInfo.type = HSA_EXT_POINTER_TYPE_UNKNOWN;
      memcpy(info, &retInfo, retInfo.size);
      return HSA_STATUS_SUCCESS;
    }

    if (returnListData) {
      assert(thunkInfo.NMappedNodes <= agents_by_node_.size() &&
             "PointerInfo: Thunk returned more than all agents in NMappedNodes.");
      mappedNodes = (uint32_t*)alloca(thunkInfo.NMappedNodes * sizeof(uint32_t));
      memcpy(mappedNodes, thunkInfo.MappedNodes, thunkInfo.NMappedNodes * sizeof(uint32_t));
    }
    retInfo.type = (hsa_amd_pointer_type_t)thunkInfo.Type;
    retInfo.agentBaseAddress = reinterpret_cast<void*>(thunkInfo.GPUAddress);
    retInfo.hostBaseAddress = thunkInfo.CPUAddress;
    retInfo.sizeInBytes = thunkInfo.SizeInBytes;
    retInfo.userData = thunkInfo.UserData;
    retInfo.global_flags = thunkInfo.MemFlags.ui32.CoarseGrain
        ? HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED
        : HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED;
    retInfo.global_flags |=
        thunkInfo.MemFlags.ui32.Uncached ? HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT : 0;
    if (block_info != nullptr) {
      // Block_info reports the thunk allocation from which we may have suballocated.
      // For locked memory we want to return the host address since hostBaseAddress is used to
      // manipulate locked memory and it is possible that hostBaseAddress is different from
      // agentBaseAddress.
      // For device memory, hostBaseAddress is either equal to agentBaseAddress or is NULL when the
      // CPU does not have access.
      assert((retInfo.hostBaseAddress || retInfo.agentBaseAddress) && "Thunk pointer info returned no base address.");
      block_info->base = (retInfo.hostBaseAddress ? retInfo.hostBaseAddress : retInfo.agentBaseAddress);
      block_info->length = retInfo.sizeInBytes;

      // Report the owning agent, even if such an agent is not usable in the process.
      auto nodeAgents = agents_by_node_.find(thunkInfo.Node);
      assert(nodeAgents != agents_by_node_.end() && "Node id not found!");
      block_info->agentOwner = nodeAgents->second[0];
    }
    auto fragment = allocation_map_.upper_bound(ptr);
    if (fragment != allocation_map_.begin()) {
      fragment--;
      if ((fragment->first <= ptr) &&
          (ptr < reinterpret_cast<const uint8_t*>(fragment->first) + fragment->second.size_requested)) {
        // agent and host address must match here. Only lock memory is allowed to have differing
        // addresses but lock memory has type HSA_EXT_POINTER_TYPE_LOCKED and cannot be
        // suballocated.
        retInfo.agentBaseAddress = const_cast<void*>(fragment->first);
        retInfo.hostBaseAddress = retInfo.agentBaseAddress;
        retInfo.sizeInBytes = fragment->second.size_requested;
        retInfo.userData = fragment->second.user_ptr;
        allocation_map_entry_found = true;
      }
    }
  }  // end lock scope

  // Return type UNKNOWN for released fragments.  Do not report the underlying block info to users!
  if ((!allocation_map_entry_found) &&
      ((retInfo.type == HSA_EXT_POINTER_TYPE_HSA) || (retInfo.type == HSA_EXT_POINTER_TYPE_IPC))) {
    retInfo.type = HSA_EXT_POINTER_TYPE_UNKNOWN;
  }

  // IPC and Graphics memory may come from a node that does not have an agent in this process.
  // Ex. ROCR_VISIBLE_DEVICES or peer GPU is not supported by ROCm.
  retInfo.agentOwner.handle = 0;
  auto nodeAgents = agents_by_node_.find(thunkInfo.Node);
  assert(nodeAgents != agents_by_node_.end() && "Node id not found!");
  for (auto agent : nodeAgents->second) {
    if (agent->Enabled()) {
      retInfo.agentOwner = agent->public_handle();
      break;
    }
  }

  // Correct agentOwner for locked memory.  Thunk reports the GPU that owns the
  // alias but users are expecting to see a CPU when the memory is system.
  if (retInfo.type == HSA_EXT_POINTER_TYPE_LOCKED) {
    if ((nodeAgents == agents_by_node_.end()) ||
        (nodeAgents->second[0]->device_type() != core::Agent::kAmdCpuDevice)) {
      retInfo.agentOwner = cpu_agents_[0]->public_handle();
    }
  }

  memcpy(info, &retInfo, retInfo.size);

  if (returnListData) {
    uint32_t count = 0;
    for (HSAuint32 i = 0; i < thunkInfo.NMappedNodes; i++) {
      assert(mappedNodes[i] <= max_node_id() &&
             "PointerInfo: Invalid node ID returned from thunk.");
      count += agents_by_node_[mappedNodes[i]].size();
    }

    AMD::callback_t<decltype(alloc)> Alloc(alloc);
    *accessible = (hsa_agent_t*)Alloc(sizeof(hsa_agent_t) * count);
    if ((*accessible) == nullptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
    *num_agents_accessible = count;

    uint32_t index = 0;
    for (HSAuint32 i = 0; i < thunkInfo.NMappedNodes; i++) {
      auto& list = agents_by_node_[mappedNodes[i]];
      for (auto agent : list) {
        (*accessible)[index] = agent->public_handle();
        index++;
      }
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::SetPtrInfoData(const void* ptr, void* userptr) {
  {  // Use allocation map if possible to handle fragments.
    ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
    const auto& it = allocation_map_.find(ptr);
    if (it != allocation_map_.end()) {
      it->second.user_ptr = userptr;
      return HSA_STATUS_SUCCESS;
    }
  }
  // Cover entries not in the allocation map (graphics, lock,...)
  if (HSAKMT_CALL(hsaKmtSetMemoryUserData(ptr, userptr)) == HSAKMT_STATUS_SUCCESS)
    return HSA_STATUS_SUCCESS;
  return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}

// Send the dmabuf_fd to from process via Unix socket
static int SendDmaBufFd(int socket, int dmabuf_fd) {
  char iov_buf[1];
  struct msghdr msg = {0};
  char buf[CMSG_SPACE(sizeof(dmabuf_fd))];

  memset(buf, 0, sizeof(buf));
  memset(iov_buf, 0, sizeof(iov_buf));
  iov_buf[0] = 'y';

  struct iovec io = {.iov_base = iov_buf, .iov_len = 1};

  msg.msg_iov = &io;
  msg.msg_iovlen = 1;
  msg.msg_control = buf;
  msg.msg_controllen = sizeof(buf);

  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
  cmsg->cmsg_level = SOL_SOCKET;
  cmsg->cmsg_type = SCM_RIGHTS;
  cmsg->cmsg_len = CMSG_LEN(sizeof(dmabuf_fd));

  memcpy(CMSG_DATA(cmsg), &dmabuf_fd, sizeof(dmabuf_fd));

  msg.msg_controllen = CMSG_SPACE(sizeof(dmabuf_fd));

  ssize_t sent = sendmsg(socket, &msg, 0);

  return (sent < 0) ? -1 : 0;
}

// Receive the dmabuf_fd to from process via Unix socket
static int ReceiveDmaBufFd(int socket) {
  struct msghdr msg = {0};

  // The struct iovec is needed, even if it points to minimal data
  char m_buffer[1];
  struct iovec io = {.iov_base = m_buffer, .iov_len = sizeof(m_buffer)};
  msg.msg_iov = &io;
  msg.msg_iovlen = 1;

  char c_buffer[256];
  msg.msg_control = c_buffer;
  msg.msg_controllen = sizeof(c_buffer);

  ssize_t rcv = recvmsg(socket, &msg, MSG_WAITALL);
  if (rcv < 0) return -1;

  while (!rcv)
    rcv = recvmsg(socket, &msg, MSG_WAITALL);

  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);

  int fd;
  memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));

  return fd;
}

#define IPC_SOCK_SERVER_DMABUF_FD_HANDLE_LENGTH 64
#define IPC_SOCK_SERVER_NAME_LENGTH 32
#define IPC_SOCK_SERVER_CONN_CLOSE_HANDLE UINT64_MAX
void Runtime::AsyncIPCSockServerConnLoop(void*) {
   auto& ipc_sock_server_fd_ = runtime_singleton_->ipc_sock_server_fd_;
   auto& ipc_sock_server_conns_ = runtime_singleton_->ipc_sock_server_conns_;
   auto& ipc_sock_server_lock_ = runtime_singleton_->ipc_sock_server_lock_;

   int connection_fd;
   char buf[IPC_SOCK_SERVER_DMABUF_FD_HANDLE_LENGTH];
   // Wait until the client has connected
   while (1) {
     connection_fd = accept(ipc_sock_server_fd_, NULL, NULL);
     if (connection_fd == -1) continue;
     MAKE_SCOPE_GUARD([&]() { close(connection_fd); });
     if (read(connection_fd, buf, sizeof(buf)) == -1)
       continue;

     // Request to kill the server.
     uint64_t conn_handle = strtoull(buf, NULL, 10);
     if (conn_handle == IPC_SOCK_SERVER_CONN_CLOSE_HANDLE)
       break;

     int dmabuf_fd = -1;
     uint64_t fragOffset;
     void *ptr = NULL;
     size_t len = 0;

     // Search for registered export pointer
     ScopedAcquire<KernelMutex> lock(&ipc_sock_server_lock_);
     for (auto& conns : ipc_sock_server_conns_) {
       if (conn_handle == conns.first) {
         ptr = reinterpret_cast<void *>(conn_handle);
         len = conns.second;
         break;
       }
     }

     if (!ptr) continue;

     // Export DMA Buf FD and wait for client import
     int err = HSAKMT_CALL(hsaKmtExportDMABufHandle(ptr, len, &dmabuf_fd, &fragOffset));
     if (err != HSAKMT_STATUS_SUCCESS) continue;
     SendDmaBufFd(connection_fd, dmabuf_fd);
     err = read(connection_fd, buf, sizeof(buf));
     close(dmabuf_fd);
     if (err == -1) break; // Client failed to confirm import so end server
   }

   // Clean up
   ipc_sock_server_conns_.clear();
   close(ipc_sock_server_fd_);
}

hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* handle) {
  static_assert(sizeof(hsa_amd_ipc_memory_t) == sizeof(HsaSharedMemoryHandle),
                "Thunk IPC mismatch.");

  static const size_t pageSize = 4096;

  // Reject sharing allocations larger than ~8TB due to thunk limitations.
  if (len > 0x7FFFFFFF000ull) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  memset(handle->handle, 0, sizeof(handle->handle));

  // Check for fragment sharing.
  PtrInfoBlockData block = {};
  hsa_amd_pointer_info_t info = {};
  info.size = sizeof(info);
  if (PtrInfo(ptr, &info, nullptr, nullptr, nullptr, &block) != HSA_STATUS_SUCCESS)
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  if (info.agentBaseAddress != ptr || info.sizeInBytes != len)
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  Agent* agent = Agent::Convert(info.agentOwner);
  bool useFrag = (block.base != ptr || block.length != len);
  // Assume all pointers and blocks are 4Kb aligned.
  uint32_t fragOffset = (reinterpret_cast<uint8_t*>(ptr) -
                         reinterpret_cast<uint8_t*>(block.base))/pageSize;
  if (useFrag) {
    if (!IsMultipleOf(block.base, 2 * 1024 * 1024)) {
      assert(false && "Fragment's block not aligned to 2MB!");
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }
  }

  if (!ipc_dmabuf_supported_) {
    HsaSharedMemoryHandle *sHandle = reinterpret_cast<HsaSharedMemoryHandle*>(handle);
    if (agent->driver().ShareMemory(block.base, block.length, sHandle) != HSA_STATUS_SUCCESS)
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;

    hsa_status_t err = HSA_STATUS_SUCCESS;
    if (useFrag) {
      handle->handle[6] |= 0x80000000 | fragOffset;
      // Prevent realloction of fragment for better performance.
      ScopedAcquire<KernelSharedMutex::Shared> lock(memory_lock_.shared());
      err = allocation_map_[ptr].region->IPCFragmentExport(ptr);
      assert(err == HSA_STATUS_SUCCESS && "Region inconsistent with address map.");
    }
    return err;
  }

  // User ptr as dmabuf FD handle ID for client to request the actual dmabuf FD.
  uint32_t dmaBufFdHandleLo = (reinterpret_cast<uint64_t>(ptr) & 0xffffffff);
  uint32_t dmaBufFdHandleHi = (reinterpret_cast<uint64_t>(ptr) >> 32);
  handle->handle[0] = dmaBufFdHandleLo;
  handle->handle[1] = dmaBufFdHandleHi;
  handle->handle[2] = getpid(); // socket server name handle

  handle->handle[3] = agent->device_type() == Agent::kAmdCpuDevice;
  // System sub allocations are not supported for now.
  if (handle->handle[3] && useFrag) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  handle->handle[4] = agent->node_id();
  if (useFrag) handle->handle[6] |= 0x80000000 | fragOffset;

  // Work around to defer export on import call to minimize FD creation.
  // Without this, a deferred export may fail due to the kernel mode driver not
  // holding the GEM object reference.
  // Export the dmabuf then close the file to get the reference to ensure the
  // deferred export will not run into this problem.
  int dmabuf_fd;
  uint64_t dmabufOffset;
  hsa_status_t err = agent->driver().ExportDMABuf(ptr, len, &dmabuf_fd, &dmabufOffset);
  assert(dmabufOffset/pageSize == fragOffset && "DMA Buf inconsistent with pointer offset.");
  if (err != HSA_STATUS_SUCCESS) return err;
  close(dmabuf_fd);

  ScopedAcquire<KernelMutex> lock(&ipc_sock_server_lock_);
  if (!ipc_sock_server_conns_.size()) { // create new runtime socket server
    struct sockaddr_un address;
    ipc_sock_server_fd_ = socket(AF_UNIX, SOCK_STREAM, 0);
    assert(ipc_sock_server_fd_ > -1 && "DMA buffer could not be exported for IPC!");
    if (ipc_sock_server_fd_ == -1) return HSA_STATUS_ERROR;

    // Use the PID as unique socket server name.
    char socketName[IPC_SOCK_SERVER_NAME_LENGTH];
    snprintf(socketName, IPC_SOCK_SERVER_NAME_LENGTH, "xhsa%i", handle->handle[2]);

    // Initialize os socket server with client acceptance limit.
    // Socket servers sill serialize connections and drop connections over the listen limit.
    // The client can try and reconnect and it's unlikely that INT_MAX concurrent
    // connections will occur.
    memset(&address, 0, sizeof(struct sockaddr_un));
    address.sun_family = AF_UNIX;
    strncpy(address.sun_path, socketName, IPC_SOCK_SERVER_NAME_LENGTH);
    address.sun_path[0] = 0; // first NULL char creates unlisted abstract socket
    int err = bind(ipc_sock_server_fd_, (struct sockaddr *)&address, sizeof(struct sockaddr_un));
    assert(!err && "Connection to export DMA buffer not made!");
    if (err) return HSA_STATUS_ERROR;
    err = listen(ipc_sock_server_fd_, 1);
    assert(!err && "Connection to export DMA buffer not made!");
    if (err) return HSA_STATUS_ERROR;

    // Spin server client acceptance into a socket server thread.
    // Socket server needs to last for the lifetime of the runtime instance
    // as the attach life cycle is unknown.
    os::CreateThread(AsyncIPCSockServerConnLoop, NULL);
  }

  ipc_sock_server_conns_[reinterpret_cast<uint64_t>(ptr)] = len;

  // TODO: fragment block discard for better memory performance causes memory violations
  // with DMABuf export even when synchronously called. Bypass for now.

  return HSA_STATUS_SUCCESS;
}

int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
                             amdgpu_bo_import_result *res,
                             unsigned int numNodes, HSAuint32 *nodes,
                             void **importAddress, HSAuint64 *importSize) {
    struct sockaddr_un address;
    int dmabuf_fd = -1, socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
    assert(socket_fd > -1 && "DMA buffer could not be imported for IPC!");
    if (socket_fd == -1) return -1;

    // Set 10 second timeout for ReceiveDmaBufFd
    struct timeval tv;
    tv.tv_sec = 10;
    tv.tv_usec = 0;
    int status = setsockopt(socket_fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv));
    assert(status == 0 && "DMA buffer FD could not be received for IPC!");
    if (status) return -1;

    char buf[IPC_SOCK_SERVER_DMABUF_FD_HANDLE_LENGTH];
    memset(&address, 0, sizeof(struct sockaddr_un));
    memset(buf, 0, sizeof(buf));
    address.sun_family = AF_UNIX;
    snprintf(address.sun_path, IPC_SOCK_SERVER_NAME_LENGTH, "xhsa%i", conn_handle);
    address.sun_path[0] = 0; // first NULL char creates unlisted abstract socket

    int timeoutLimitMs = 10000, timeoutMs = 0, timeoutIntervalMs = 1;
    while (timeoutMs < timeoutLimitMs) {
      if (connect(socket_fd, (struct sockaddr *) &address, sizeof(struct sockaddr_un))) {
        timeoutMs  += timeoutIntervalMs;
        std::this_thread::sleep_for(std::chrono::milliseconds(timeoutIntervalMs));
      } else {
        break;
      }
    }

    MAKE_SCOPE_GUARD([&]() { close(socket_fd); });

    if (timeoutMs >= timeoutLimitMs) return -1;

    // Ping server to export and send DMABUF FD on handle
    snprintf(buf, sizeof(buf), "%li", dmabuf_fd_handle);
    if (write(socket_fd, buf, sizeof(buf)) == -1) return -1;

    if (dmabuf_fd_handle == IPC_SOCK_SERVER_CONN_CLOSE_HANDLE) return 0;

    dmabuf_fd = ReceiveDmaBufFd(socket_fd);
    if (dmabuf_fd == -1) return -1;

    HsaGraphicsResourceInfo info;
    HSA_REGISTER_MEM_FLAGS regFlags;
    regFlags.ui32.requiresVAddr = !!res ? 0 : 1;
    int err = HSAKMT_CALL(hsaKmtRegisterGraphicsHandleToNodesExt(dmabuf_fd, &info, numNodes, nodes, regFlags));
    if (err == HSAKMT_STATUS_SUCCESS) {
      *importAddress = info.MemoryAddress;
      *importSize = info.SizeInBytes;
      if (res) {
        HSAKMT_CALL(hsaKmtDeregisterMemory(*importAddress));

        // Manually libDRM import and GPU map system memory
        AMD::GpuAgent* agent = reinterpret_cast<AMD::GpuAgent*>(agents_by_node_[info.NodeId][0]);
        err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd,
                               dmabuf_fd, res));
      }
      close(dmabuf_fd);
    }

    // Ping socket server to close exporter
    if (write(socket_fd, buf, sizeof(buf)) == -1) return -1;
    return err;
}

hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, uint32_t num_agents,
                                Agent** agents, void** mapped_ptr) {
  static const int tinyArraySize = 8;
  void* importAddress;
  HSAuint64 importSize;
  uint64_t dmaBufFDHandle = 0;
  hsa_amd_ipc_memory_t importHandle = *handle;

  // Extract fragment info
  bool isFragment = false;
  uint32_t fragOffset = 0;

  if (Runtime::IsDifferentDriver(*agents, num_agents)) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  core::Driver* driver = &agents[0]->driver();

  auto fixFragment = [&](amdgpu_bo_handle ldrm_bo) {
    if (isFragment) {
      importAddress = reinterpret_cast<uint8_t*>(importAddress) + fragOffset;
      len = Min(len, importSize - fragOffset);
    }
    ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
    allocation_map_[importAddress] =
        AllocationRegion(nullptr, len, len, core::MemoryRegion::AllocateNoFlags);
    allocation_map_[importAddress].ldrm_bo = ldrm_bo;
  };

  auto importMemory = [&](unsigned int numNodes, HSAuint32* nodes, amdgpu_bo_import_result* res) {
    if (ipc_dmabuf_supported_) {
      int ret = IPCClientImport(importHandle.handle[2], dmaBufFDHandle, res, numNodes, nodes,
                                &importAddress, &importSize);
      if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    } else {
      hsa_status_t ret = driver->RegisterSharedHandle(
          reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle), &importAddress,
          &importSize);
      if (ret != HSA_STATUS_SUCCESS) return ret;
    }

    return HSA_STATUS_SUCCESS;
  };

  auto mapMemoryToNodes = [&](unsigned int numNodes, HSAuint32 *nodes) {
    HSAuint64 altAddress;
    if (!numNodes) {
      if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(importAddress, importSize, &altAddress)) != HSAKMT_STATUS_SUCCESS) {
        HSAKMT_CALL(hsaKmtDeregisterMemory(importAddress));
        return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
      }
    } else {
      HsaMemMapFlags map_flags;
      map_flags.Value = 0;
      map_flags.ui32.PageSize = HSA_PAGE_SIZE_64KB;
      if (HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes(importAddress, importSize, &altAddress, map_flags, numNodes,
                                    nodes)) != HSAKMT_STATUS_SUCCESS) {
        map_flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
        if (HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes(importAddress, importSize, &altAddress, map_flags, numNodes,
                                      nodes)) != HSAKMT_STATUS_SUCCESS) {
          HSAKMT_CALL(hsaKmtDeregisterMemory(importAddress));
          return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
        }
      }
    }
    fixFragment(NULL);
    *mapped_ptr = importAddress;
    return HSA_STATUS_SUCCESS;
  };

  if ((importHandle.handle[6] & 0x80000000) != 0) {
    isFragment = true;
    fragOffset = (importHandle.handle[6] & 0x1FF) * 4096;
    importHandle.handle[6] &= ~(0x80000000 | 0x1FF);
  }

  if (ipc_dmabuf_supported_) {
    uint64_t dmaBufFDHandleLo = importHandle.handle[0];
    uint64_t dmaBufFDHandleHi = importHandle.handle[1];
    dmaBufFDHandle = (dmaBufFDHandleHi << 32) | dmaBufFDHandleLo;
  }

  if (num_agents == 0) {
    amdgpu_bo_import_result res;
    bool isDmabufSysMem = ipc_dmabuf_supported_ && importHandle.handle[3];

    hsa_status_t err = importMemory(0, NULL, isDmabufSysMem ? &res : NULL);
    if (err != HSA_STATUS_SUCCESS) return err;
    if (!isDmabufSysMem) return mapMemoryToNodes(0, NULL);

    // System memory DMA Buf import
    auto errCleanup = [&](amdgpu_bo_handle bo)
    {
      DRM_CALL(amdgpu_bo_free(bo)); // auto frees cpu map
      return HSA_STATUS_ERROR;
    };

    // Create a shared cpu access pointer for user
    void *cpuPtr;
    amdgpu_bo_handle bo = res.buf_handle;
    int ret = DRM_CALL(amdgpu_bo_cpu_map(bo, &cpuPtr));
    if (ret) return errCleanup(bo);

    // Note VA ops will always override flags to allow read/write/exec permissions.
    ret = DRM_CALL(amdgpu_bo_va_op(bo, 0, importSize,
                          reinterpret_cast<uint64_t>(cpuPtr), 0, AMDGPU_VA_OP_MAP));
    if (ret) return errCleanup(bo);
    importAddress = cpuPtr;
    fixFragment(bo);
    *mapped_ptr = importAddress;
    return HSA_STATUS_SUCCESS;
  }

  HSAuint32* nodes = nullptr;
  if (num_agents > tinyArraySize)
    nodes = new HSAuint32[num_agents];
  else
    nodes = (HSAuint32*)alloca(sizeof(HSAuint32) * num_agents);
  if (nodes == NULL) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  MAKE_SCOPE_GUARD([&]() {
    if (num_agents > tinyArraySize) delete[] nodes;
  });

  for (uint32_t i = 0; i < num_agents; i++)
    agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &nodes[i]);

  hsa_status_t err = importMemory(num_agents, nodes, NULL);
  if (err != HSA_STATUS_SUCCESS) return err;
  return mapMemoryToNodes(num_agents, nodes);
}

hsa_status_t Runtime::IPCDetach(void* ptr) {
  bool ldrmImportCleaned = false;
  {  // Handle imported fragments.
    ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
    const auto& it = allocation_map_.find(ptr);
    if (it != allocation_map_.end()) {
      if (it->second.region != nullptr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      if (it->second.ldrm_bo) {
         if (DRM_CALL(amdgpu_bo_va_op(it->second.ldrm_bo, 0, it->second.size,
                             reinterpret_cast<uint64_t>(ptr), 0, AMDGPU_VA_OP_UNMAP)))
           return HSA_STATUS_ERROR_INVALID_ARGUMENT;
         if (DRM_CALL(amdgpu_bo_free(it->second.ldrm_bo))) // auto unmaps from cpu
           return HSA_STATUS_ERROR_INVALID_ARGUMENT;
         ldrmImportCleaned = true;
      }
      allocation_map_.erase(it);
      lock.Release();  // Can't hold memory lock when using pointer info.

      PtrInfoBlockData block = {};
      hsa_amd_pointer_info_t info = {};
      info.size = sizeof(info);
      if (PtrInfo(ptr, &info, nullptr, nullptr, nullptr, &block) != HSA_STATUS_SUCCESS)
        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      ptr = block.base;
    }
  }

  if (!ldrmImportCleaned) {
    if (HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(ptr)) != HSAKMT_STATUS_SUCCESS)
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    if (HSAKMT_CALL(hsaKmtDeregisterMemory(ptr)) != HSAKMT_STATUS_SUCCESS)
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  return HSA_STATUS_SUCCESS;
}

void Runtime::AsyncEventsLoop(void* _eventsInfo) {
  AsyncEventsInfo* eventsInfo = reinterpret_cast<AsyncEventsInfo*>(_eventsInfo);

  auto& async_events_control_ = eventsInfo->control;
  auto& async_events_ = eventsInfo->events;
  auto& new_async_events_ = eventsInfo->new_events;
  auto& hsa_events = eventsInfo->events.hsa_events_;
  auto& event_age = eventsInfo->events.age_;
  uint32_t unique_evts = 0;
  auto hsa_signals = reinterpret_cast<hsa_signal_handle*>(&async_events_.signal_[0]);

  auto processEvent = [&](size_t index, hsa_signal_value_t value, bool wait_any) {
    // No error or timeout occured, process the handlers
    // Call handler for the known satisfied signal.
    assert(async_events_.handler_[index] != nullptr);
    bool keep = async_events_.handler_[index](value, async_events_.arg_[index]);
    if (!keep) {
      if (!wait_any) {
        hsa_signals[index]->WaitingDec();
      }
      hsa_signal_handle(async_events_.signal_[index])->Release();
      async_events_.CopyIndex(index, async_events_.Size() - 1);
      async_events_.PopBack();
    }
    return keep;
  };

  // Prepares a list of events for a wait inside KFD
  auto PrepareInterrupt = [&](size_t idx, bool init_age) {
    HsaEvent* hsa_event = hsa_signals[idx]->EopEvent();
    // If any signal doesn't have an interrupt, then switch to polling
    if (hsa_event == nullptr) {
      unique_evts = 0;
      return false;
    } else {
      if (hsa_events.size() <= unique_evts) {
          hsa_events.resize(unique_evts + 10);
          event_age.resize(unique_evts + 10);
      }
      if (init_age || hsa_events[unique_evts] != hsa_event ) {
        event_age[unique_evts] = runtime_singleton_->KfdVersion().supports_event_age ? 1 : 0;
      }
      hsa_events[unique_evts] = hsa_event;
      unique_evts++;
      return true;
    }
  };

  // KFD will move this thread into sleep, until any event from the list is complete or
  // if ROCR can wake it up with hsaKmtSetEvent()
  auto WaitForInterrupt = [&]() {
    constexpr uint32_t wait_ms = 0xFFFFFFFEu;
    HsaEvent** end = std::unique(&hsa_events[0], &hsa_events[0] + unique_evts);
    unique_evts = uint32_t(end - &hsa_events[0]);
    HSAKMT_CALL(hsaKmtWaitOnMultipleEvents_Ext(&hsa_events[0], unique_evts, false, wait_ms, &event_age[0]));
  };

  while (!async_events_control_.exit) {
    // Wait for a signal
    std::vector<hsa_signal_value_t> value(1);
    value[0] = 0;
    uint32_t index = 0;
    uint32_t wait_any = true;
    if (eventsInfo->monitor_exceptions) {
      index =
          Signal::WaitAnyExceptions(uint32_t(async_events_.Size()), &async_events_.signal_[0],
                                    &async_events_.cond_[0], &async_events_.value_[0], &value[0]);
    } else {
     if (core::Runtime::runtime_singleton_->flag().wait_any()) {
       index = Signal::WaitMultiple(uint32_t(async_events_.Size()), &async_events_.signal_[0],
                                    &async_events_.cond_[0], &async_events_.value_[0], uint64_t(-1),
                                    HSA_WAIT_STATE_BLOCKED, value, false);
     } else {
      // Skip wake-up signal logic
      index = 1;
      wait_any = false;
      // The new events can reallocate the signals, hence update the pointer
      hsa_signals = reinterpret_cast<hsa_signal_handle*>(&async_events_.signal_[0]);
     }
    }

    // Reset the control signal
    if (index == 0) {
      hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0);
    } else if (index != -1) {
      if (wait_any) {
        processEvent(index, value[0], wait_any);
      } else {
        index = 0;
      }
      // Process all signals on the CPU first
      bool finish = false;
      bool polling = false;
      bool init_age = true;

      // Mark all signals with a waiting tag
      // @note: Waiting tag must be marked before the signal state check on CPU to
      // avoid a possible race condition between KFD sleep and rocr's awake call
      if (!wait_any) {
        for (size_t e = 0; e < async_events_.Size(); e++) {
          hsa_signals[e]->WaitingInc();
        }
      }
      while (!finish) {
        // If exception or WaitAny(), then finish with just one iterration
        if (wait_any) {
          finish = true;
        }
        bool interrupt_wait = false;
        unique_evts = 0;

        // Check remaining signals before sleeping.
        for (size_t i = index; i < async_events_.Size(); i++) {
          hsa_signal_handle sig(async_events_.signal_[i]);
          value[0] = atomic::Load(&sig->signal_.value, std::memory_order_relaxed);
          if (CheckSignalCondition(value[0], async_events_.cond_[i], async_events_.value_[i])) {
            if (i == 0) {
              hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0);
            } else {
              if (!processEvent(i, value[0], wait_any)) {
                i--;
              }
            }
            if (!wait_any) {
              finish = true;
              init_age = true;
            }
          }

          // If the current signal isn't complete and polling is disabled, then prepare KFD wait for an interrupt
          if (!finish && !polling) {
            interrupt_wait = PrepareInterrupt(i, init_age);
            // If the interrupt was disabled, then force polling
            if (!interrupt_wait) {
              polling = true;
              finish = false;
            }
          } else if (unique_evts > 0) {
            unique_evts = 0;
            interrupt_wait = false;
          }
        }
        // If nothing was complete and an interrupt wait was requested, then call KFD
        if (interrupt_wait) {
          WaitForInterrupt();
          init_age = false;
        }
      }
    }

    if (!wait_any) {
      // Remove the waiting tags from events
      for (size_t e = 0; e < async_events_.Size(); e++) {
        hsa_signals[e]->WaitingDec();
      }
    }

    // Insert new signals and find plain functions
    typedef std::pair<void (*)(void*), void*> func_arg_t;
    std::vector<func_arg_t> functions;
    {
      ScopedAcquire<HybridMutex> scope_lock(&async_events_control_.lock);
      for (size_t i = 0; i < new_async_events_.Size(); i++) {
        if (new_async_events_.signal_[i].handle == 0) {
          functions.push_back(
              func_arg_t((void (*)(void*))new_async_events_.handler_[i],
                         new_async_events_.arg_[i]));
          continue;
        }
        async_events_.PushBack(
            new_async_events_.signal_[i], new_async_events_.cond_[i],
            new_async_events_.value_[i], new_async_events_.handler_[i],
            new_async_events_.arg_[i]);
      }
      new_async_events_.Clear();
    }

    // Call plain functions
    for (size_t i = 0; i < functions.size(); i++)
      functions[i].first(functions[i].second);
    functions.clear();
  }

  // Release wait count of all pending signals
  for (size_t i = 1; i < async_events_.Size(); i++)
    hsa_signal_handle(async_events_.signal_[i])->Release();
  async_events_.Clear();

  for (size_t i = 0; i < new_async_events_.Size(); i++)
    hsa_signal_handle(new_async_events_.signal_[i])->Release();
  new_async_events_.Clear();
}

void Runtime::BindErrorHandlers() {
  if (!core::g_use_interrupt_wait || gpu_agents_.empty()) return;

  // Create memory event with manual reset to avoid racing condition
  // with driver in case of multiple concurrent VM faults.
  vm_fault_event_ = core::InterruptSignal::CreateEvent(HSA_EVENTTYPE_MEMORY, true);

  // Create an interrupt signal object to contain the memory event.
  // This signal object will be registered with the async handler global
  // thread.
  vm_fault_signal_ = new core::InterruptSignal(0, vm_fault_event_);

  if (!vm_fault_signal_->IsValid() || vm_fault_signal_->EopEvent() == NULL) {
    assert(false && "Failed on creating VM fault signal");
    return;
  }

  SetAsyncSignalHandler(core::Signal::Convert(vm_fault_signal_), HSA_SIGNAL_CONDITION_NE, 0,
                        VMFaultHandler, reinterpret_cast<void*>(vm_fault_signal_));

  // Create HW exception event which is for Non-RAS events
  hw_exception_event_ = core::InterruptSignal::CreateEvent(HSA_EVENTTYPE_HW_EXCEPTION, true);

  hw_exception_signal_ = new core::InterruptSignal(0, hw_exception_event_);

  if (!hw_exception_signal_->IsValid() || hw_exception_signal_->EopEvent() == NULL) {
    assert(false && "Failed on creating HW Exception signal");
    return;
  }

  SetAsyncSignalHandler(core::Signal::Convert(hw_exception_signal_), HSA_SIGNAL_CONDITION_NE, 0,
                        HwExceptionHandler, reinterpret_cast<void*>(hw_exception_signal_));
}

bool Runtime::HwExceptionHandler(hsa_signal_value_t val, void* arg) {
  core::InterruptSignal* hw_exception_signal = reinterpret_cast<core::InterruptSignal*>(arg);

  assert(hw_exception_signal != NULL);

  if (hw_exception_signal == NULL) return false;

  HsaEvent* exception_event = hw_exception_signal->EopEvent();

  HsaHwException& exception = exception_event->EventData.EventData.HwException;

  hsa_status_t custom_handler_status = HSA_STATUS_ERROR;
  auto system_event_handlers = runtime_singleton_->GetSystemEventHandlers();
  // If custom handler is registered, pack the fault info and call the handler

  if (!system_event_handlers.empty()) {
    hsa_amd_event_t hw_exception_event;
    hw_exception_event.event_type = HSA_AMD_GPU_HW_EXCEPTION_EVENT;
    hsa_amd_gpu_hw_exception_info_t& exception_info = hw_exception_event.hw_exception;

    // Find the faulty agent
    auto it = runtime_singleton_->agents_by_node_.find(exception.NodeId);
    assert(it != runtime_singleton_->agents_by_node_.end() && "Can't find faulty agent.");
    Agent* faulty_agent = it->second.front();
    exception_info.agent = Agent::Convert(faulty_agent);

    // This field is not set by KFD at the moment
    exception_info.reset_type = HSA_AMD_HW_EXCEPTION_RESET_TYPE_OTHER;

    exception_info.reset_cause = (exception.ResetCause == HSA_EVENTID_HW_EXCEPTION_ECC)
        ? HSA_AMD_HW_EXCEPTION_CAUSE_ECC
        : HSA_AMD_HW_EXCEPTION_CAUSE_GPU_HANG;

    for (auto& callback : system_event_handlers) {
      hsa_status_t err = callback.first(&hw_exception_event, callback.second);
      if (err == HSA_STATUS_SUCCESS) custom_handler_status = HSA_STATUS_SUCCESS;
    }
  }

  if (custom_handler_status != HSA_STATUS_SUCCESS) {
    core::Agent* faultingAgent = runtime_singleton_->agents_by_node_[exception.NodeId][0];
    fprintf(stderr, "HW Exception by GPU node-%u (Agent handle: %p) reason :%s\n", exception.NodeId,
            reinterpret_cast<void*>(faultingAgent->public_handle().handle),
            (exception.ResetCause == HSA_EVENTID_HW_EXCEPTION_ECC) ? "ECC" : "GPU Hang");

    assert(false && "GPU HW Exception");
    std::abort();
  }
  // No need to keep the signal because we are done.
  return false;
}

bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
  core::InterruptSignal* vm_fault_signal =
      reinterpret_cast<core::InterruptSignal*>(arg);

  assert(vm_fault_signal != NULL);

  if (vm_fault_signal == NULL) {
    return false;
  }

  HsaEvent* vm_fault_event = vm_fault_signal->EopEvent();

  HsaMemoryAccessFault& fault =
      vm_fault_event->EventData.EventData.MemoryAccessFault;

  hsa_status_t custom_handler_status = HSA_STATUS_ERROR;
  auto system_event_handlers = runtime_singleton_->GetSystemEventHandlers();
  Agent* faulty_agent = nullptr;
  // If custom handler is registered, pack the fault info and call the handler
  if (!system_event_handlers.empty()) {
    hsa_amd_event_t memory_fault_event;
    memory_fault_event.event_type = HSA_AMD_GPU_MEMORY_FAULT_EVENT;
    hsa_amd_gpu_memory_fault_info_t& fault_info = memory_fault_event.memory_fault;

    // Find the faulty agent
    auto it = runtime_singleton_->agents_by_node_.find(fault.NodeId);
    assert(it != runtime_singleton_->agents_by_node_.end() && "Can't find faulty agent.");
    faulty_agent = it->second.front();
    fault_info.agent = Agent::Convert(faulty_agent);

    fault_info.virtual_address = fault.VirtualAddress;
    fault_info.fault_reason_mask = 0;
    if (fault.Failure.NotPresent == 1) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_PAGE_NOT_PRESENT;
    }
    if (fault.Failure.ReadOnly == 1) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_READ_ONLY;
    }
    if (fault.Failure.NoExecute == 1) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_NX;
    }
    if (fault.Failure.GpuAccess == 1) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_HOST_ONLY;
    }
    if (fault.Failure.Imprecise == 1) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_IMPRECISE;
    }
    if (fault.Failure.ECC == 1 && fault.Failure.ErrorType == 0) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAMECC;
    }
    if (fault.Failure.ErrorType == 1) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_SRAMECC;
    }
    if (fault.Failure.ErrorType == 2) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_DRAMECC;
    }
    if (fault.Failure.ErrorType == 3) {
      fault_info.fault_reason_mask |= HSA_AMD_MEMORY_FAULT_HANG;
    }

    for (auto& callback : system_event_handlers) {
      hsa_status_t err = callback.first(&memory_fault_event, callback.second);
      if (err == HSA_STATUS_SUCCESS) custom_handler_status = HSA_STATUS_SUCCESS;
    }
  }

  // No custom VM fault handler registered or it failed.
  if (custom_handler_status != HSA_STATUS_SUCCESS) {
    if (runtime_singleton_->flag().enable_vm_fault_message()) {
      std::string reason = "";
      if (fault.Failure.NotPresent == 1) {
        reason += "Page not present or supervisor privilege";
      } else if (fault.Failure.ReadOnly == 1) {
        reason += "Write access to a read-only page";
      } else if (fault.Failure.NoExecute == 1) {
        reason += "Execute access to a page marked NX";
      } else if (fault.Failure.GpuAccess == 1) {
        reason += "Host access only";
      } else if ((fault.Failure.ECC == 1 && fault.Failure.ErrorType == 0) ||
                 fault.Failure.ErrorType == 2) {
        reason += "DRAM ECC failure";
      } else if (fault.Failure.ErrorType == 1) {
        reason += "SRAM ECC failure";
      } else if (fault.Failure.ErrorType == 3) {
        reason += "Generic hang recovery";
      } else {
        reason += "Unknown";
      }

      faulty_agent = runtime_singleton_->agents_by_node_[fault.NodeId][0];

      fprintf(
          stderr,
          "Memory access fault by GPU node-%u (Agent handle: %p) on address %p%s. Reason: %s.\n",
          fault.NodeId, reinterpret_cast<void*>(faulty_agent->public_handle().handle),
          reinterpret_cast<const void*>(fault.VirtualAddress),
          (fault.Failure.Imprecise == 1) ? "(may not be exact address)" : "", reason.c_str());

#ifndef NDEBUG
      PrintMemoryMapNear(reinterpret_cast<void*>(fault.VirtualAddress));
#endif
    }
    // Fallback if KFD does not support GPU core dump. In this case, there core dump is
    // generated by hsa-runtime.
    if (faulty_agent &&
        faulty_agent->supported_isas()[0]->GetMajorVersion() != 11 &&
                      !runtime_singleton_->KfdVersion().supports_core_dump) {

      if (pcs::PcsRuntime::instance()->SessionsActive())
        fprintf(stderr, "GPU core dump skipped because PC Sampling active\n");
      else if (amd::coredump::dump_gpu_core())
        fprintf(stderr, "GPU core dump failed\n");
    }
    assert(false && "GPU memory access fault.");
    std::abort();
  }
  // No need to keep the signal because we are done.
  return false;
}

void Runtime::PrintMemoryMapNear(void* ptr) {
  runtime_singleton_->memory_lock_.Acquire();
  auto it = runtime_singleton_->allocation_map_.upper_bound(ptr);
  for (int i = 0; i < 2; i++) {
    if (it != runtime_singleton_->allocation_map_.begin()) it--;
  }
  fprintf(stderr, "Nearby memory map:\n");
  auto start = it;
  for (int i = 0; i < 3; i++) {
    if (it == runtime_singleton_->allocation_map_.end()) break;
    std::string kind = "Non-HSA";
    if (it->second.region != nullptr) {
      const AMD::MemoryRegion* region = static_cast<const AMD::MemoryRegion*>(it->second.region);
      if (region->IsSystem())
        kind = "System";
      else if (region->IsLocalMemory())
        kind = "VRAM";
      else if (region->IsScratch())
        kind = "Scratch";
      else if (region->IsLDS())
        kind = "LDS";
    }
    fprintf(stderr, "%p, 0x%lx, %s\n", it->first, it->second.size, kind.c_str());
    it++;
  }
  fprintf(stderr, "\n");
  it = start;
  runtime_singleton_->memory_lock_.Release();
  hsa_amd_pointer_info_t info = {};
  PtrInfoBlockData block = {};
  uint32_t count = 0;
  hsa_agent_t* canAccess = nullptr;
  info.size = sizeof(info);
  for (int i = 0; i < 3; i++) {
    if (it == runtime_singleton_->allocation_map_.end()) break;
    hsa_status_t err = runtime_singleton_->PtrInfo(const_cast<void*>(it->first), &info, malloc,
                                                   &count, &canAccess, &block);
    if (err == HSA_STATUS_SUCCESS) {
      fprintf(stderr, "PtrInfo:\n\tAddress: %p-%p/%p-%p\n\tSize: 0x%lx\n\tType: %u\n\tOwner: %p\n",
              info.agentBaseAddress, (char*)info.agentBaseAddress + info.sizeInBytes,
              info.hostBaseAddress, (char*)info.hostBaseAddress + info.sizeInBytes, info.sizeInBytes,
              info.type, reinterpret_cast<void*>(info.agentOwner.handle));
      fprintf(stderr, "\tCanAccess: %u\n", count);
      for (int t = 0; t < count; t++)
        fprintf(stderr, "\t\t%p\n", reinterpret_cast<void*>(canAccess[t].handle));
      fprintf(stderr, "\tIn block: %p, 0x%lx\n", block.base, block.length);
      free(canAccess);
    }
    it++;
  }
}

Runtime::Runtime()
    : loader_(nullptr),
      region_gpu_(nullptr),
      sys_clock_freq_(0),
      num_nodes_(0),
      vm_fault_event_(nullptr),
      vm_fault_signal_(nullptr),
      hw_exception_event_(nullptr),
      hw_exception_signal_(nullptr),
      internal_queue_create_notifier_user_data_(nullptr),
      ref_count_(0),
      kfd_version{},
      ipc_sock_server_fd_(0) {

  virtual_mem_api_supported_ = false;
  ipc_dmabuf_supported_ = false;
  xnack_enabled_ = false;
  asyncSignals_.monitor_exceptions = false;
  asyncExceptions_.monitor_exceptions = true;
  g_use_interrupt_wait = true;
  g_use_mwaitx = true;
  ::_amdgpu_r_debug = {11,
                     nullptr,
                     reinterpret_cast<uintptr_t>(
                                &_loader_debug_state),
                     r_debug::RT_CONSISTENT,
                     0};

  log_file = stderr;
}

hsa_status_t Runtime::Load() {
  os::cpuid_t cpuinfo;

  // Assume features are not supported if parse CPUID fails
  if (!os::ParseCpuID(&cpuinfo)) {
    /*
     * This is not a failure, in some environments such as SRIOV, not all CPUID info is
     * exposed inside the guest
     */
    debug_warning("Parsing CPUID failed.");
  }

  flag_.Refresh();

  thunkLoader_ = new ThunkLoader();
  thunkLoader_->LoadThunkApiTable();

  if (!thunkLoader_->CreateThunkInstance()) {
    return HSA_STATUS_ERROR_NOT_INITIALIZED;
  }

  g_use_interrupt_wait = flag_.enable_interrupt();
  g_use_mwaitx = flag_.check_mwaitx(cpuinfo.mwaitx);

  if (!AMD::Load()) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  // Setup system clock frequency for the first time.
  if (sys_clock_freq_ == 0) {
    sys_clock_freq_ = os::SystemClockFrequency();
    if (sys_clock_freq_ < 100000) debug_warning("System clock resolution is low.");
  }

  BindErrorHandlers();

  loader_ = amd::hsa::loader::Loader::Create(&loader_context_);

  // Load extensions
  LoadExtensions();

  // Initialize per GPU scratch, blits, and trap handler
  for (core::Agent* agent : gpu_agents_) {
    hsa_status_t status =
        reinterpret_cast<AMD::GpuAgentInt*>(agent)->PostToolsInit();

    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }
  }

  // Load tools libraries
  LoadTools();

  // Initialize libdrm helper function
  CheckVirtualMemApiSupport();

  // Initialize IPC support mode
  InitIPCDmaBufSupport();

  // Load svm profiler
  svm_profile_.reset(new AMD::SvmProfileControl);

  return HSA_STATUS_SUCCESS;
}

void Runtime::Unload() {
  // Close IPC socket server
  if (ipc_sock_server_conns_.size())
    IPCClientImport(getpid(), IPC_SOCK_SERVER_CONN_CLOSE_HANDLE,
                    NULL, 0, NULL, NULL, NULL);

  svm_profile_.reset(nullptr);

  UnloadTools();
  UnloadExtensions();

  amd::hsa::loader::Loader::Destroy(loader_);
  loader_ = nullptr;

  for(auto nodeAgent: agents_by_node_) {
    for (auto agent: nodeAgent.second)
      agent->ReleaseResources();
  }

  asyncSignals_.control.Shutdown();
  asyncExceptions_.control.Shutdown();

  if (vm_fault_signal_ != nullptr) {
    vm_fault_signal_->DestroySignal();
    vm_fault_signal_ = nullptr;
  }
  core::InterruptSignal::DestroyEvent(vm_fault_event_);
  vm_fault_event_ = nullptr;

  if (hw_exception_signal_ != nullptr) {
    hw_exception_signal_->DestroySignal();
    hw_exception_signal_ = nullptr;
  }
  core::InterruptSignal::DestroyEvent(hw_exception_event_);
  hw_exception_event_ = nullptr;

  SharedSignalPool.clear();

  EventPool.clear();

  mapped_handle_map_.clear();
  memory_handle_map_.clear();

  DestroyAgents();

  CloseTools();

  AMD::Unload();

  DestroyDrivers();

  thunkLoader_->DestroyThunkInstance();

  delete thunkLoader_;
}

void Runtime::LoadExtensions() {
// Load finalizer and extension library
#ifdef HSA_LARGE_MODEL
  static const std::string kFinalizerLib[] = {"hsa-ext-finalize64.dll",
                                              "libhsa-ext-finalize64.so.1"};
#else
  static const std::string kFinalizerLib[] = {"hsa-ext-finalize.dll",
                                              "libhsa-ext-finalize.so.1"};
#endif

  // Update Hsa Api Table with handle of Finalizer extension Apis
  // Skipping finalizer loading since finalizer is no longer distributed.
  // LinkExts will expose the finalizer-not-present implementation.
  // extensions_.LoadFinalizer(kFinalizerLib[os_index(os::current_os)]);
  hsa_api_table().LinkExts(&extensions_.finalizer_api,
                          core::HsaApiTable::HSA_EXT_FINALIZER_API_TABLE_ID);

  // Update Hsa Api Table with handle of Image extension Apis
  extensions_.LoadImage();
  hsa_api_table().LinkExts(&extensions_.image_api,
                          core::HsaApiTable::HSA_EXT_IMAGE_API_TABLE_ID);

  // Update Hsa Api Table with handle of PCS extension Apis
  extensions_.LoadPcSampling();
  hsa_api_table().LinkExts(&extensions_.pcs_api,
                          core::HsaApiTable::HSA_EXT_PC_SAMPLING_API_TABLE_ID);
}

void Runtime::UnloadExtensions() { extensions_.Unload(); }

static std::vector<std::string> parse_tool_names(std::string tool_names) {
  std::vector<std::string> names;
  std::string name = "";
  bool quoted = false;
  while (tool_names.size() != 0) {
    auto index = tool_names.find_first_of(" \"\\");
    if (index == std::string::npos) {
      name += tool_names;
      break;
    }
    switch (tool_names[index]) {
      case ' ': {
        if (!quoted) {
          name += tool_names.substr(0, index);
          tool_names.erase(0, index + 1);
          names.push_back(name);
          name = "";
        } else {
          name += tool_names.substr(0, index + 1);
          tool_names.erase(0, index + 1);
        }
        break;
      }
      case '\"': {
        if (quoted) {
          quoted = false;
          name += tool_names.substr(0, index);
          tool_names.erase(0, index + 1);
          names.push_back(name);
          name = "";
        } else {
          quoted = true;
          tool_names.erase(0, index + 1);
        }
        break;
      }
      case '\\': {
        if (tool_names.size() > index + 1) {
          name += tool_names.substr(0, index) + tool_names[index + 1];
          tool_names.erase(0, index + 2);
        }
        break;
      }
    }  // end switch
  }    // end while

  if (name != "") names.push_back(name);
  return names;
}


static int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle) = NULL;

int fn_amdgpu_device_get_fd_nosupport(HsaAMDGPUDeviceHandle device_handle) {
  fprintf(stderr, "amdgpu_device_get_fd not available. Please update version of libdrm");
  return -1;
}

int Runtime::GetAmdgpuDeviceArgs(Agent *agent, ShareableHandle handle,
                                 int *drm_fd, uint64_t *cpu_addr) {
  int renderFd = fn_amdgpu_device_get_fd(static_cast<AMD::GpuAgent*>(agent)->libDrmDev());
  if (renderFd < 0) return HSA_STATUS_ERROR;

  uint32_t gem_handle = 0;
  if (DRM_CALL(amdgpu_bo_export(reinterpret_cast<amdgpu_bo_handle>(handle.handle),
                       amdgpu_bo_handle_type_kms, &gem_handle)))
    return HSA_STATUS_ERROR;

  union drm_amdgpu_gem_mmap args;
  memset(&args, 0, sizeof(args));
  /* Query the buffer address (args.addr_ptr).
   * The kernel driver ignores the offset and size parameters. */
  args.in.handle = gem_handle;
  if (DRM_CALL(drmCommandWriteRead(renderFd, DRM_AMDGPU_GEM_MMAP, &args, sizeof(args))))
    return HSA_STATUS_ERROR;

  *drm_fd = renderFd;
  *cpu_addr = args.out.addr_ptr;
  return HSA_STATUS_SUCCESS;
}

void Runtime::CheckVirtualMemApiSupport() {

  auto kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version;

  if (kfd_version.KernelInterfaceMajorVersion > 1 ||
      (kfd_version.KernelInterfaceMajorVersion == 1 &&
          kfd_version.KernelInterfaceMinorVersion >= 15)) {
    char* error;

    fn_amdgpu_device_get_fd =
        (int (*)(HsaAMDGPUDeviceHandle device_handle))dlsym(RTLD_DEFAULT, "amdgpu_device_get_fd");
    if ((error = dlerror()) != NULL) {
      debug_warning("amdgpu_device_get_fd not available. Please update version of libdrm");
      fn_amdgpu_device_get_fd = &fn_amdgpu_device_get_fd_nosupport;
    } else {
      virtual_mem_api_supported_ = true;
    }
  }
}

void Runtime::InitIPCDmaBufSupport() {
  bool dmabuf_supported = false;

  // Early exit so we don't double load lib DRM
  if (virtual_mem_api_supported_) {
    ipc_dmabuf_supported_ = !flag().enable_ipc_mode_legacy();
    return;
  }

  GetSystemInfo(HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED, &dmabuf_supported);
  if (!dmabuf_supported) return;

  char* error;
  fn_amdgpu_device_get_fd =
      (int (*)(HsaAMDGPUDeviceHandle device_handle))dlsym(RTLD_DEFAULT, "amdgpu_device_get_fd");
  if ((error = dlerror()) != NULL) {
    debug_warning("amdgpu_device_get_fd not available. Please update version of libdrm");
    fn_amdgpu_device_get_fd = &fn_amdgpu_device_get_fd_nosupport;
  } else {
    ipc_dmabuf_supported_ = !flag().enable_ipc_mode_legacy();
  }
}

void Runtime::LoadTools() {
  typedef bool (*tool_init_t)(::HsaApiTable*, uint64_t, uint64_t,
                              const char* const*);
  typedef Agent* (*tool_wrap_t)(Agent*);
  typedef void (*tool_add_t)(Runtime*);

#if defined(HSA_ROCPROFILER_REGISTER) && HSA_ROCPROFILER_REGISTER > 0
  if (!flag().disable_tool_register()) {
    auto* profiler_api_table_ = static_cast<void*>(&hsa_api_table());
    auto lib_id = rocprofiler_register_library_indentifier_t{};
    auto rocp_reg_status =
        rocprofiler_register_library_api_table("hsa", &ROCPROFILER_REGISTER_IMPORT_FUNC(hsa),
                                               ROCP_REG_VERSION, &profiler_api_table_, 1, &lib_id);

    if (rocp_reg_status != ROCP_REG_SUCCESS && flag().report_tool_register_failures()) {
      fprintf(stderr, "[hsa-runtime][%i] rocprofiler-register returned status code %i: %s\n",
              getpid(), rocp_reg_status, rocprofiler_register_error_string(rocp_reg_status));
    }

    bool allow_v1_registration = false;
    if (os::IsEnvVarSet("HSA_TOOLS_ROCPROFILER_V1_TOOLS")) {
      // assume true if env variable is set
      allow_v1_registration = true;
      auto allow_v1_value = os::GetEnvVar("HSA_TOOLS_ROCPROFILER_V1_TOOLS");
      // support using numbers, off, false, no, n, or f
      if (!allow_v1_value.empty()) {
        if (allow_v1_value.find_first_not_of("0123456789") == std::string::npos) {
          allow_v1_registration = (std::stoi(allow_v1_value) != 0);
        } else if (std::regex_match(
                       allow_v1_value,
                       std::regex{"^(off|false|no|n|f)$", std::regex_constants::icase})) {
          allow_v1_registration = false;
        }
      }
    }

    // if rocprofiler library supports registration and v1 support not explicitly requested,
    // do not use old method
    if (rocp_reg_status == ROCP_REG_SUCCESS && !allow_v1_registration) return;
  }
#endif

  std::vector<const char*> failed;

  //Get loaded libs and filter to tool libraries.
  struct lib_t {
    lib_t(os::LibHandle lib, uint32_t order, std::string name) : lib_(lib), order_(order), name_(name) {}
    os::LibHandle lib_;
    uint32_t order_;
    std::string name_;
  };

  std::list<lib_t> sorted;
  uint32_t env_count=0;

  // Load env var tool lib names and determine ordering offset.
  std::string tool_names = flag_.tools_lib_names();
  std::vector<std::string> names;
  if (tool_names != "") {
    names = parse_tool_names(std::move(tool_names));
    env_count = names.size();
  }

  // Discover loaded tools.
  std::vector<os::LibHandle> loaded_hds = os::GetLoadedToolsLib();
  for(auto& handle : loaded_hds) {
    const uint32_t* order = (const uint32_t*)os::GetExportAddress(handle, "HSA_AMD_TOOL_PRIORITY");
    if(order) {
      sorted.push_back(lib_t(handle, *order+env_count, os::GetLibraryName(handle)));
    } else {
      os::CloseLib(handle);
    }
  }

  // Load env var tools.
  env_count=0;
  for (auto& name : names) {
    os::LibHandle tool = os::LoadLib(name);

    if (tool != nullptr) {
      sorted.push_back(lib_t(tool, env_count, name));
      env_count++;
    } else {
      failed.push_back(name.c_str());
      if (flag().report_tool_load_failures())
        fprintf(stderr, "Tool lib \"%s\" failed to load.\n", name.c_str());
    }
  }

  if(!sorted.empty()) {
    // Close duplicate handles
    sorted.sort([](const lib_t& lhs, const lib_t& rhs) {
      if(lhs.lib_ == rhs.lib_)
        return lhs.order_ < rhs.order_;
      return lhs.lib_ < rhs.lib_;
    });

    os::LibHandle current = sorted.front().lib_;
    auto it = sorted.begin();
    it++;
    while(it != sorted.end()) {
      if(it->lib_==current) {
        os::CloseLib(current);
        auto rem = it;
        it = sorted.erase(rem);
      } else {
        current = it->lib_;
        it++;
      }
    }

    // Sort to load order
    sorted.sort([](const lib_t& lhs, const lib_t& rhs) {
      return lhs.order_ < rhs.order_;
    });

    for(auto& lib : sorted) {
      auto& tool = lib.lib_;

      rocr::AMD::callback_t<tool_init_t> ld = (tool_init_t)os::GetExportAddress(tool, "OnLoad");
      if (!ld) {
        failed.push_back(lib.name_.c_str());
        os::CloseLib(tool);
        continue;
      }
      if (!ld(&hsa_api_table().hsa_api,
        hsa_api_table().hsa_api.version.major_id,
        failed.size(), failed.data())) {
          failed.push_back(lib.name_.c_str());
          os::CloseLib(tool);
          continue;
      }
      tool_libs_.push_back(tool);

      rocr::AMD::callback_t<tool_wrap_t> wrap =
        (tool_wrap_t)os::GetExportAddress(tool, "WrapAgent");
      if (wrap) {
        std::vector<core::Agent*>* agent_lists[2] = {&cpu_agents_,
          &gpu_agents_};
        for (std::vector<core::Agent*>* agent_list : agent_lists) {
          for (size_t agent_idx = 0; agent_idx < agent_list->size();
            ++agent_idx) {
              Agent* agent = wrap(agent_list->at(agent_idx));
              if (agent != NULL) {
                assert(agent->IsValid() &&
                  "Agent returned from WrapAgent is not valid");
                agent_list->at(agent_idx) = agent;
              }
          }
        }
      }

      rocr::AMD::callback_t<tool_add_t> add = (tool_add_t)os::GetExportAddress(tool, "AddAgent");
      if (add) add(this);
    }
  }
}

void Runtime::UnloadTools() {
  typedef void (*tool_unload_t)();
  for (size_t i = tool_libs_.size(); i != 0; i--) {
    tool_unload_t unld;
    unld = (tool_unload_t)os::GetExportAddress(tool_libs_[i - 1], "OnUnload");
    if (unld) unld();
  }

  // Reset API table in case some tool doesn't cleanup properly
  hsa_api_table().Reset();
}

void Runtime::CloseTools() {
  // Due to valgrind bug, runtime cannot dlclose extensions see:
  // http://valgrind.org/docs/manual/faq.html#faq.unhelpful
  if (!flag_.running_valgrind()) {
    for (auto& lib : tool_libs_) os::CloseLib(lib);
  }
  tool_libs_.clear();
}

void Runtime::AsyncEventsControl::Shutdown() {
  if (async_events_thread_ != NULL) {
    exit = true;
    hsa_signal_handle(wake)->StoreRelaxed(1);
    os::WaitForThread(async_events_thread_);
    os::CloseThread(async_events_thread_);
    async_events_thread_ = NULL;
    HSA::hsa_signal_destroy(wake);
  }
}

void Runtime::AsyncEvents::PushBack(hsa_signal_t signal,
                                    hsa_signal_condition_t cond,
                                    hsa_signal_value_t value,
                                    hsa_amd_signal_handler handler, void* arg) {
  signal_.push_back(signal);
  cond_.push_back(cond);
  value_.push_back(value);
  handler_.push_back(handler);
  arg_.push_back(arg);
}

void Runtime::AsyncEvents::CopyIndex(size_t dst, size_t src) {
  signal_[dst] = signal_[src];
  cond_[dst] = cond_[src];
  value_[dst] = value_[src];
  handler_[dst] = handler_[src];
  arg_[dst] = arg_[src];
}

size_t Runtime::AsyncEvents::Size() { return signal_.size(); }

void Runtime::AsyncEvents::PopBack() {
  signal_.pop_back();
  cond_.pop_back();
  value_.pop_back();
  handler_.pop_back();
  arg_.pop_back();
}

void Runtime::AsyncEvents::Clear() {
  signal_.clear();
  cond_.clear();
  value_.clear();
  handler_.clear();
  arg_.clear();
}

hsa_status_t Runtime::SetCustomSystemEventHandler(hsa_amd_system_event_callback_t callback,
                                                  void* data) {
  ScopedAcquire<KernelMutex> lock(&system_event_lock_);
  system_event_handlers_.push_back(
      std::make_pair(AMD::callback_t<hsa_amd_system_event_callback_t>(callback), data));
  return HSA_STATUS_SUCCESS;
}

std::vector<std::pair<AMD::callback_t<hsa_amd_system_event_callback_t>, void*>>
Runtime::GetSystemEventHandlers() {
  ScopedAcquire<KernelMutex> lock(&system_event_lock_);
  return system_event_handlers_;
}

hsa_status_t Runtime::SetInternalQueueCreateNotifier(hsa_amd_runtime_queue_notifier callback,
                                                     void* user_data) {
  if (internal_queue_create_notifier_) {
    return HSA_STATUS_ERROR;
  } else {
    internal_queue_create_notifier_ = callback;
    internal_queue_create_notifier_user_data_ = user_data;
    return HSA_STATUS_SUCCESS;
  }
}

void Runtime::InternalQueueCreateNotify(const hsa_queue_t* queue, hsa_agent_t agent) {
  if (internal_queue_create_notifier_)
    internal_queue_create_notifier_(queue, agent, internal_queue_create_notifier_user_data_);
}

hsa_status_t Runtime::SetSvmAttrib(void* ptr, size_t size,
                                   hsa_amd_svm_attribute_pair_t* attribute_list,
                                   size_t attribute_count) {
  uint32_t set_attribs = 0;
  std::vector<bool> agent_seen(max_node_id() + 1, false);

  std::vector<HSA_SVM_ATTRIBUTE> attribs;
  attribs.reserve(attribute_count);
  uint32_t set_flags = 0;
  uint32_t clear_flags = 0;

  auto Convert = [&](uint64_t value) -> Agent* {
    hsa_agent_t handle = {value};
    Agent* agent = Agent::Convert(handle);
    if ((agent == nullptr) || !agent->IsValid())
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
                               "Invalid agent handle in Runtime::SetSvmAttrib.");
    return agent;
  };

  auto ConvertAllowNull = [&](uint64_t value) -> Agent* {
    hsa_agent_t handle = {value};
    Agent* agent = Agent::Convert(handle);
    if ((agent != nullptr) && (!agent->IsValid()))
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
                               "Invalid agent handle in Runtime::SetSvmAttrib.");
    return agent;
  };

  auto ConfirmNew = [&](Agent* agent) {
    if (agent_seen[agent->node_id()])
      throw AMD::hsa_exception(
          HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS,
          "Multiple attributes given for the same agent in Runtime::SetSvmAttrib.");
    agent_seen[agent->node_id()] = true;
  };

  auto Check = [&](uint64_t attrib) {
    if (set_attribs & (1 << attrib))
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS,
                               "Attribute given multiple times in Runtime::SetSvmAttrib.");
    set_attribs |= (1 << attrib);
  };

  auto kmtPair = [](uint32_t attrib, uint32_t value) {
    HSA_SVM_ATTRIBUTE pair = {attrib, value};
    return pair;
  };

  for (uint32_t i = 0; i < attribute_count; i++) {
    auto attrib = attribute_list[i].attribute;
    auto value = attribute_list[i].value;

    switch (attrib) {
      case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG: {
        Check(attrib);
        switch (value) {
          case HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED:
            set_flags |= HSA_SVM_FLAG_COHERENT;
            break;
          case HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED:
            clear_flags |= HSA_SVM_FLAG_COHERENT;
            break;
          default:
            throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                                     "Invalid HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG value.");
        }
        break;
      }
      case HSA_AMD_SVM_ATTRIB_READ_ONLY: {
        Check(attrib);
        if (value)
          set_flags |= HSA_SVM_FLAG_GPU_RO;
        else
          clear_flags |= HSA_SVM_FLAG_GPU_RO;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
        Check(attrib);
        if (value)
          set_flags |= HSA_SVM_FLAG_HIVE_LOCAL;
        else
          clear_flags |= HSA_SVM_FLAG_HIVE_LOCAL;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
        Check(attrib);
        // Max migration size is 1GB.
        if (value > 18) value = 18;
        attribs.push_back(kmtPair(HSA_SVM_ATTR_GRANULARITY, value));
        break;
      }
      case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
        Check(attrib);
        Agent* agent = ConvertAllowNull(value);
        if (agent == nullptr)
          attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, INVALID_NODEID));
        else
          attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, agent->node_id()));
        break;
      }
      case HSA_AMD_SVM_ATTRIB_READ_MOSTLY: {
        Check(attrib);
        if (value)
          set_flags |= HSA_SVM_FLAG_GPU_READ_MOSTLY;
        else
          clear_flags |= HSA_SVM_FLAG_GPU_READ_MOSTLY;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_GPU_EXEC: {
        Check(attrib);
        if (value)
          set_flags |= HSA_SVM_FLAG_GPU_EXEC;
        else
          clear_flags |= HSA_SVM_FLAG_GPU_EXEC;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE: {
        Agent* agent = Convert(value);
        ConfirmNew(agent);
        if (agent->device_type() == Agent::kAmdCpuDevice) {
          set_flags |= HSA_SVM_FLAG_HOST_ACCESS;
        } else {
          attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS, agent->node_id()));
        }
        break;
      }
      case HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE: {
        Agent* agent = Convert(value);
        ConfirmNew(agent);
        if (agent->device_type() == Agent::kAmdCpuDevice) {
          set_flags |= HSA_SVM_FLAG_HOST_ACCESS;
        } else {
          attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS_IN_PLACE, agent->node_id()));
        }
        break;
      }
      case HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS: {
        Agent* agent = Convert(value);
        ConfirmNew(agent);
        if (agent->device_type() == Agent::kAmdCpuDevice) {
          clear_flags |= HSA_SVM_FLAG_HOST_ACCESS;
        } else {
          attribs.push_back(kmtPair(HSA_SVM_ATTR_NO_ACCESS, agent->node_id()));
        }
        break;
      }
      default:
        throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                                 "Illegal or invalid attribute in Runtime::SetSvmAttrib");
    }
  }

  // Merge CPU access properties - grant access if any CPU needs access.
  // Probably wrong.
  if (set_flags & HSA_SVM_FLAG_HOST_ACCESS) clear_flags &= ~HSA_SVM_FLAG_HOST_ACCESS;

  // Add flag updates
  if (clear_flags) attribs.push_back(kmtPair(HSA_SVM_ATTR_CLR_FLAGS, clear_flags));
  if (set_flags) attribs.push_back(kmtPair(HSA_SVM_ATTR_SET_FLAGS, set_flags));

  uint8_t* base = AlignDown((uint8_t*)ptr, 4096);
  uint8_t* end = AlignUp((uint8_t*)ptr + size, 4096);
  size_t len = end - base;
  HSAKMT_STATUS error = HSAKMT_CALL(hsaKmtSVMSetAttr(base, len, attribs.size(), &attribs[0]));
  if (error != HSAKMT_STATUS_SUCCESS)
    throw AMD::hsa_exception(HSA_STATUS_ERROR, "hsaKmtSVMSetAttr failed.");

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::GetSvmAttrib(void* ptr, size_t size,
                                   hsa_amd_svm_attribute_pair_t* attribute_list,
                                   size_t attribute_count) {
  std::vector<HSA_SVM_ATTRIBUTE> attribs;
  attribs.reserve(attribute_count);

  std::vector<int> kmtIndices(attribute_count);

  bool getFlags = false;

  auto Convert = [&](uint64_t value) -> Agent* {
    hsa_agent_t handle = {value};
    Agent* agent = Agent::Convert(handle);
    if ((agent == nullptr) || !agent->IsValid())
      throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_AGENT,
                               "Invalid agent handle in Runtime::GetSvmAttrib.");
    return agent;
  };

  auto kmtPair = [](uint32_t attrib, uint32_t value) {
    HSA_SVM_ATTRIBUTE pair = {attrib, value};
    return pair;
  };

  for (uint32_t i = 0; i < attribute_count; i++) {
    auto& attrib = attribute_list[i].attribute;
    auto& value = attribute_list[i].value;

    switch (attrib) {
      case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG:
      case HSA_AMD_SVM_ATTRIB_READ_ONLY:
      case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL:
      case HSA_AMD_SVM_ATTRIB_READ_MOSTLY: {
        getFlags = true;
        kmtIndices[i] = -1;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
        kmtIndices[i] = attribs.size();
        attribs.push_back(kmtPair(HSA_SVM_ATTR_GRANULARITY, 0));
        break;
      }
      case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
        kmtIndices[i] = attribs.size();
        attribs.push_back(kmtPair(HSA_SVM_ATTR_PREFERRED_LOC, 0));
        break;
      }
      case HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION: {
        value = Agent::Convert(GetSVMPrefetchAgent(ptr, size)).handle;
        kmtIndices[i] = -1;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_ACCESS_QUERY: {
        Agent* agent = Convert(value);
        if (agent->device_type() == Agent::kAmdCpuDevice) {
          getFlags = true;
          kmtIndices[i] = -1;
        } else {
          kmtIndices[i] = attribs.size();
          attribs.push_back(kmtPair(HSA_SVM_ATTR_ACCESS, agent->node_id()));
        }
        break;
      }
      default:
        throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                                 "Illegal or invalid attribute in Runtime::SetSvmAttrib");
    }
  }

  if (getFlags) {
    // Order is important to later code.
    attribs.push_back(kmtPair(HSA_SVM_ATTR_CLR_FLAGS, 0));
    attribs.push_back(kmtPair(HSA_SVM_ATTR_SET_FLAGS, 0));
  }

  uint8_t* base = AlignDown((uint8_t*)ptr, 4096);
  uint8_t* end = AlignUp((uint8_t*)ptr + size, 4096);
  size_t len = end - base;
  if (attribs.size() != 0) {
    HSAKMT_STATUS error = HSAKMT_CALL(hsaKmtSVMGetAttr(base, len, attribs.size(), &attribs[0]));
    if (error != HSAKMT_STATUS_SUCCESS)
      throw AMD::hsa_exception(HSA_STATUS_ERROR, "hsaKmtSVMGetAttr failed.");
  }

  for (uint32_t i = 0; i < attribute_count; i++) {
    auto& attrib = attribute_list[i].attribute;
    auto& value = attribute_list[i].value;

    switch (attrib) {
      case HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG: {
        if (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_COHERENT) {
          value = HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED;
          break;
        }
        if (attribs[attribs.size() - 2].value & HSA_SVM_FLAG_COHERENT)
          value = HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED;
        else
          value = HSA_AMD_SVM_GLOBAL_FLAG_INDETERMINATE;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_READ_ONLY: {
        value = (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_GPU_RO);
        break;
      }
      case HSA_AMD_SVM_ATTRIB_HIVE_LOCAL: {
        value = (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_HIVE_LOCAL);
        break;
      }
      case HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY: {
        value = attribs[kmtIndices[i]].value;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION: {
        uint64_t node = attribs[kmtIndices[i]].value;
        Agent* agent = nullptr;
        if (node != INVALID_NODEID) agent = agents_by_node_[node][0];
        value = Agent::Convert(agent).handle;
        break;
      }
      case HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION: {
        break;
      }
      case HSA_AMD_SVM_ATTRIB_READ_MOSTLY: {
        value = (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_GPU_READ_MOSTLY);
        break;
      }
      case HSA_AMD_SVM_ATTRIB_ACCESS_QUERY: {
        if (kmtIndices[i] == -1) {
          if (attribs[attribs.size() - 1].value & HSA_SVM_FLAG_HOST_ACCESS)
            attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE;
        } else {
          switch (attribs[kmtIndices[i]].type) {
            case HSA_SVM_ATTR_ACCESS:
              attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE;
              break;
            case HSA_SVM_ATTR_ACCESS_IN_PLACE:
              attrib = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE;
              break;
            case HSA_SVM_ATTR_NO_ACCESS:
              attrib = HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS;
              break;
            default:
              assert(false && "Bad agent accessibility from KFD.");
          }
        }
        break;
      }
      default:
        throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT,
                                 "Illegal or invalid attribute in Runtime::GetSvmAttrib");
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent,
                                  uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
                                  hsa_signal_t completion_signal) {
  uintptr_t base = reinterpret_cast<uintptr_t>(AlignDown(ptr, 4096));
  uintptr_t end = AlignUp(reinterpret_cast<uintptr_t>(ptr) + size, 4096);
  size_t len = end - base;

  PrefetchOp* op = new PrefetchOp();
  MAKE_NAMED_SCOPE_GUARD(OpGuard, [&]() { delete op; });

  Agent* dest = Agent::Convert(agent);
  if (dest->device_type() == Agent::kAmdCpuDevice)
    op->node_id = 0;
  else
    op->node_id = dest->node_id();

  op->base = reinterpret_cast<void*>(base);
  op->size = len;
  op->completion = completion_signal;
  if (num_dep_signals > 1) {
    op->remaining_deps = num_dep_signals - 1;
    for (int i = 0; i < num_dep_signals - 1; i++) op->dep_signals.push_back(dep_signals[i]);
  } else {
    op->remaining_deps = 0;
  }

  {
    ScopedAcquire<KernelMutex> lock(&prefetch_lock_);
    // Remove all fully overlapped and trim partially overlapped ranges.
    // Get iteration bounds
    auto start = prefetch_map_.upper_bound(base);
    if (start != prefetch_map_.begin()) start--;
    auto stop = prefetch_map_.lower_bound(end);

    auto isEndNode = [&](decltype(start) node) { return node->second.next == prefetch_map_.end(); };
    auto isFirstNode = [&](decltype(start) node) {
      return node->second.prev == prefetch_map_.end();
    };

    // Trim and remove old ranges.
    while (start != stop) {
      uintptr_t startBase = start->first;
      uintptr_t startEnd = startBase + start->second.bytes;

      auto ibase = Max(startBase, base);
      auto iend = Min(startEnd, end);
      // Check for overlap
      if (ibase < iend) {
        // Second range check
        if (iend < startEnd) {
          auto ret = prefetch_map_.insert(
              std::make_pair(iend, PrefetchRange(startEnd - iend, start->second.op)));
          assert(ret.second && "Prefetch map insert failed during range split.");

          auto it = ret.first;
          it->second.prev = start;
          it->second.next = start->second.next;
          start->second.next = it;
          if (!isEndNode(it)) it->second.next->second.prev = it;
        }

        // Is the first interval of the old range valid
        if (startBase < ibase) {
          start->second.bytes = ibase - startBase;
        } else {
          if (isFirstNode(start)) {
            start->second.op->prefetch_map_entry = start->second.next;
            if (!isEndNode(start)) start->second.next->second.prev = prefetch_map_.end();
          } else {
            start->second.prev->second.next = start->second.next;
            if (!isEndNode(start)) start->second.next->second.prev = start->second.prev;
          }
          start = prefetch_map_.erase(start);
          continue;
        }
      }
      start++;
    }

    // Insert new range.
    auto ret = prefetch_map_.insert(std::make_pair(base, PrefetchRange(len, op)));
    assert(ret.second && "Prefetch map insert failed.");

    auto it = ret.first;
    op->prefetch_map_entry = it;
    it->second.next = it->second.prev = prefetch_map_.end();
  }

  // Remove the prefetch's ranges from the map.
  static auto removePrefetchRanges = [](PrefetchOp* op) {
    ScopedAcquire<KernelMutex> lock(&Runtime::runtime_singleton_->prefetch_lock_);
    auto it = op->prefetch_map_entry;
    while (it != Runtime::runtime_singleton_->prefetch_map_.end()) {
      auto next = it->second.next;
      Runtime::runtime_singleton_->prefetch_map_.erase(it);
      it = next;
    }
  };

  // Prefetch Signal handler for synchronization.
  static hsa_amd_signal_handler signal_handler = [](hsa_signal_value_t value, void* arg) {
    PrefetchOp* op = reinterpret_cast<PrefetchOp*>(arg);

    if (op->remaining_deps > 0) {
      op->remaining_deps--;
      Runtime::runtime_singleton_->SetAsyncSignalHandler(
          op->dep_signals[op->remaining_deps], HSA_SIGNAL_CONDITION_EQ, 0, signal_handler, arg);
      return false;
    }

    HSA_SVM_ATTRIBUTE attrib;
    attrib.type = HSA_SVM_ATTR_PREFETCH_LOC;
    attrib.value = op->node_id;
    HSAKMT_STATUS error = HSAKMT_CALL(hsaKmtSVMSetAttr(op->base, op->size, 1, &attrib));
    assert(error == HSAKMT_STATUS_SUCCESS && "KFD Prefetch failed.");

    removePrefetchRanges(op);

    if (op->completion.handle != 0) Signal::Convert(op->completion)->SubRelaxed(1);
    delete op;

    return false;
  };

  auto no_dependencies = [](void* arg) { signal_handler(0, arg); };

  MAKE_NAMED_SCOPE_GUARD(RangeGuard, [&]() { removePrefetchRanges(op); });

  hsa_status_t err;
  if (num_dep_signals == 0)
    err = AMD::hsa_amd_async_function(no_dependencies, op);
  else
    err = SetAsyncSignalHandler(dep_signals[num_dep_signals - 1], HSA_SIGNAL_CONDITION_EQ, 0,
                                signal_handler, op);
  if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "Signal handler unable to be set.");

  RangeGuard.Dismiss();
  OpGuard.Dismiss();
  return HSA_STATUS_SUCCESS;
}

Agent* Runtime::GetSVMPrefetchAgent(void* ptr, size_t size) {
  uintptr_t base = reinterpret_cast<uintptr_t>(AlignDown(ptr, 4096));
  uintptr_t end = AlignUp(reinterpret_cast<uintptr_t>(ptr) + size, 4096);

  std::vector<std::pair<uintptr_t, uintptr_t>> holes;

  ScopedAcquire<KernelMutex> lock(&Runtime::runtime_singleton_->prefetch_lock_);
  auto start = prefetch_map_.upper_bound(base);
  if (start != prefetch_map_.begin()) start--;
  auto stop = prefetch_map_.lower_bound(end);

  // KFD returns -1 for no or mixed destinations.
  uint32_t prefetch_node = -2;
  if (start != stop) {
    prefetch_node = start->second.op->node_id;
  }

  while (start != stop) {
    uintptr_t startBase = start->first;
    uintptr_t startEnd = startBase + start->second.bytes;

    auto ibase = Max(base, startBase);
    auto iend = Min(end, startEnd);
    // Check for intersection with the query
    if (ibase < iend) {
      // If prefetch locations are different then we report null agent.
      if (prefetch_node != start->second.op->node_id) return nullptr;

      // Push leading gap to an array for checking KFD.
      if (base < ibase) holes.push_back(std::make_pair(base, ibase - base));

      // Trim query range.
      base = iend;
    }
    start++;
  }
  if (base < end) holes.push_back(std::make_pair(base, end - base));

  HSA_SVM_ATTRIBUTE attrib;
  attrib.type = HSA_SVM_ATTR_PREFETCH_LOC;
  for (auto& range : holes) {
    HSAKMT_STATUS error =
        HSAKMT_CALL(hsaKmtSVMGetAttr(reinterpret_cast<void*>(range.first), range.second, 1, &attrib));
    assert(error == HSAKMT_STATUS_SUCCESS && "KFD prefetch query failed.");

    if (attrib.value == -1) return nullptr;
    if (prefetch_node == -2) prefetch_node = attrib.value;
    if (prefetch_node != attrib.value) return nullptr;
  }

  assert(prefetch_node != -2 && "prefetch_node was not updated.");
  assert(prefetch_node != -1 && "Should have already returned.");
  return agents_by_node_[prefetch_node][0];
}

hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset,
                                   uint64_t flags) {
#ifdef __linux__
  ScopedAcquire<KernelSharedMutex::Shared> lock(memory_lock_.shared());
  // Lookup containing allocation.
  auto mem = allocation_map_.upper_bound(ptr);
  if (mem != allocation_map_.begin()) {
    mem--;
    if ((mem->first <= ptr) &&
        (ptr < reinterpret_cast<const uint8_t*>(mem->first) + mem->second.size)) {
      // Check size is in bounds.
      if (uintptr_t(ptr) - uintptr_t(mem->first) + size <= mem->second.size) {
        switch (mem->second.region->owner()->device_type()) {
          case Agent::kAmdGpuDevice: {
            auto* owner = static_cast<AMD::GpuAgent*>(mem->second.region->owner());

            if (flags & HSA_AMD_DMABUF_MAPPING_TYPE_PCIE && !owner->is_xgmi_cpu_gpu() &&
                !owner->LargeBarEnabled()) {
              return static_cast<hsa_status_t>(HSA_STATUS_ERROR_NOT_SUPPORTED);
            }
          } break;
          case Agent::kAmdCpuDevice:
            return HSA_STATUS_ERROR_INVALID_AGENT;
          case Agent::kAmdAieDevice:
            break;
          case Agent::kUnknownDevice:
            return HSA_STATUS_ERROR_INVALID_AGENT;
        }

        int fd;
        uint64_t off;
        hsa_status_t err = mem->second.region->owner()->driver().ExportDMABuf(
            const_cast<void*>(ptr), size, &fd, &off);

        if (err != HSA_STATUS_SUCCESS) {
          assert((err != HSA_STATUS_ERROR_INVALID_ARGUMENT) &&
                 "Thunk does not recognize an expected allocation.");
          return err;
        }

        *dmabuf = fd;
        *offset = off;
        return HSA_STATUS_SUCCESS;
      }
    }
  }
  return HSA_STATUS_ERROR_INVALID_ALLOCATION;
#else
  return HSA_STATUS_ERROR_NOT_INITIALIZED;
#endif
}

hsa_status_t Runtime::DmaBufClose(int dmabuf) {
#ifdef __linux__
  int err = close(dmabuf);
  if (err == 0) return HSA_STATUS_SUCCESS;
  return HSA_STATUS_ERROR_RESOURCE_FREE;
#else
  return HSA_STATUS_ERROR_NOT_INITIALIZED;
#endif
}

hsa_status_t Runtime::VMemoryAddressReserve(void** va, size_t size, uint64_t address,
                                            uint64_t alignment, uint64_t flags) {
  void* addr = (void*)address;
  HsaMemFlags memFlags = {};

  if (!alignment)
    alignment = sysconf(_SC_PAGE_SIZE);

  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);

  if (flags & HSA_AMD_VMEM_ADDRESS_NO_REGISTER) {
    size_t requested = size + alignment - sysconf(_SC_PAGE_SIZE);
    auto mem = mmap(addr, requested, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
    if (mem == MAP_FAILED)
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

    auto aligned = AlignUp(mem, alignment);

    // Hint to enable THP for large host allocations which can help in performance gain
    constexpr size_t kLargePageSize = 2*1024*1024;
    if (size >= kLargePageSize) {
      if (madvise(aligned, size, MADV_HUGEPAGE))
        debug_warning(false && "madvise with MADV_HUGEPAGE failed");
    }

    reserved_address_map_[aligned] = AddressHandle(mem, size, false);
    *va = aligned;
    return HSA_STATUS_SUCCESS;
  }

  memFlags.ui32.OnlyAddress = 1;
  memFlags.ui32.FixedAddress = 1;

  /* Try to reserving the VA requested by user */
  if (HSAKMT_CALL(hsaKmtAllocMemoryAlign(0, size, alignment, memFlags, &addr)) != HSAKMT_STATUS_SUCCESS) {
    memFlags.ui32.FixedAddress = 0;
    /* Could not reserved VA requested, allocate alternate VA */
    if (HSAKMT_CALL(hsaKmtAllocMemoryAlign(0, size, alignment, memFlags, &addr)) != HSAKMT_STATUS_SUCCESS)
      return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  reserved_address_map_[addr] = AddressHandle(addr, size, true);
  *va = addr;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryAddressFree(void* va, size_t size) {
  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  std::map<const void*, AddressHandle>::iterator it = reserved_address_map_.find(va);

  if (it == reserved_address_map_.end()) {
    debug_warning(false && "Can't find address in reserved address");
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  }

  if (size != it->second.size) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  if (it->second.use_count > 0) return HSA_STATUS_ERROR_RESOURCE_FREE;

  if (it->second.registered) {
    if (HSAKMT_CALL(hsaKmtFreeMemory(it->second.os_addr, size)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
  } else {
    if (munmap(it->second.os_addr, size)) return HSA_STATUS_ERROR;
  }

  reserved_address_map_.erase(it);
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryHandleCreate(const MemoryRegion* region, size_t size,
                                          MemoryRegion::AllocateFlags alloc_flags,
                                          uint64_t flags_unused,
                                          hsa_amd_vmem_alloc_handle_t* memoryOnlyHandle) {
  const AMD::MemoryRegion* memRegion = static_cast<const AMD::MemoryRegion*>(region);

  if (!IsMultipleOf(size, memRegion->GetPageSize()))
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  ThunkHandle user_mode_driver_handle;
  hsa_status_t status =
      region->Allocate(size, alloc_flags, &user_mode_driver_handle, 0);
  if (status == HSA_STATUS_SUCCESS) {
    memory_handle_map_.emplace(std::piecewise_construct,
                               std::forward_as_tuple(user_mode_driver_handle),
                               std::forward_as_tuple(region, size, flags_unused,
                                                     user_mode_driver_handle,
                                                     alloc_flags));

    *memoryOnlyHandle = MemoryHandle::Convert(user_mode_driver_handle);
  }
  return status;
}

hsa_status_t Runtime::VMemoryHandleRelease(hsa_amd_vmem_alloc_handle_t memoryOnlyHandle) {
  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  auto memoryHandleIt = memory_handle_map_.find(MemoryHandle::Convert(memoryOnlyHandle));

  if (memoryHandleIt == memory_handle_map_.end()) {
    debug_warning(false && "Can't find memory handle");
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  }

  if (!memoryHandleIt->second.ref_count) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  if (--(memoryHandleIt->second.ref_count) == 0) {
    // From documentation, the handle can be released while there are still outstanding mappings. If
    // there are outstanding mappings, then we just decrement the ref count and exit. We will free
    // this handle when the last MappedHandle is deleted
    // and use_count == 0 and ref_count == 0.

    if (memoryHandleIt->second.use_count > 0) return HSA_STATUS_SUCCESS;

    memoryHandleIt->second.region->Free(memoryHandleIt->first, memoryHandleIt->second.size);
    memory_handle_map_.erase(memoryHandleIt);
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryHandleMap(void* va, size_t size, size_t in_offset,
                                       hsa_amd_vmem_alloc_handle_t memoryOnlyHandle,
                                       uint64_t flags) {
  int drm_fd, dmabuf_fd = 0;
  uint64_t offset = 0, ret;
  uint64_t drm_cpu_addr = 0;
  bool reservedAddressFound = false;

  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  auto reservedAddressIt = reserved_address_map_.upper_bound(va);
  if (reservedAddressIt != reserved_address_map_.begin()) {
    reservedAddressIt--;
    if ((reservedAddressIt->first <= va) &&
        ((reinterpret_cast<uint8_t*>(va) + size) <=
         (reinterpret_cast<const uint8_t*>(reservedAddressIt->first) + reservedAddressIt->second.size))) {
      reservedAddressFound = true;
    }
  }
  if (!reservedAddressFound) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  /* Confirm that this VA range has not been mapped yet */
  auto upperMappedHandleIt = mapped_handle_map_.upper_bound(va);
  if (upperMappedHandleIt != mapped_handle_map_.begin()) {
    upperMappedHandleIt--;
    if ((reinterpret_cast<const uint8_t*>(upperMappedHandleIt->first) + upperMappedHandleIt->second.size) > va)
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  auto lowerMappedHandleIt = mapped_handle_map_.lower_bound(va);
  if (lowerMappedHandleIt != mapped_handle_map_.end()) {
    if (reinterpret_cast<uint8_t*>(va) + size > lowerMappedHandleIt->first) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  auto memoryHandleIt = memory_handle_map_.find(MemoryHandle::Convert(memoryOnlyHandle));
  if (memoryHandleIt == memory_handle_map_.end()) {
    debug_warning(false && "Can't find memory handle");
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  auto *agent = memoryHandleIt->second.agentOwner();

  // For now, this is only supported for KFD due to the call to
  // GetAmdgpuDeviceArgs
  if (agent->device_type() != core::Agent::DeviceType::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  // Create handle by exporting and importing the memory from the owning agent
  auto &agent_driver = agent->driver();
  hsa_status_t status = agent_driver.ExportDMABuf(memoryHandleIt->first, size,
                                                  &dmabuf_fd, &offset);
  if (status != HSA_STATUS_SUCCESS)
    return status;
  assert(offset == 0);

  ShareableHandle shareable_handle;
  status = agent_driver.ImportDMABuf(dmabuf_fd, *agent, shareable_handle);
  if (status != HSA_STATUS_SUCCESS)
    return status;

  close(dmabuf_fd);

  // Get address that memory is mapped to
  ret = GetAmdgpuDeviceArgs(agent, shareable_handle, &drm_fd, &drm_cpu_addr);
  if (ret) return HSA_STATUS_ERROR;

  mapped_handle_map_.emplace(
      std::piecewise_construct, std::forward_as_tuple(va),
      std::forward_as_tuple(&memoryHandleIt->second, &reservedAddressIt->second,
                            offset, size, drm_fd,
                            reinterpret_cast<void *>(drm_cpu_addr),
                            HSA_ACCESS_PERMISSION_NONE, shareable_handle));

  reservedAddressIt->second.use_count++;
  memoryHandleIt->second.use_count++;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryHandleUnmap(void* va, size_t size) {
  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
  std::list<std::pair<void*, MappedHandle*>> mappedHandles;

  // va + size may consist of multiple MappedHandle's.
  // Build a list lf MappedHandles within this VA range.

  uint8_t* va_ptr = reinterpret_cast<uint8_t*>(va);
  uint8_t* va_chunk = va_ptr;
  while (va_chunk < va_ptr + size) {
    auto mappedHandleIt = mapped_handle_map_.find(va_chunk);
    // Cannot find a contiguous list of MappedHandles for the full VA range
    if (mappedHandleIt == mapped_handle_map_.end()) {
      return HSA_STATUS_ERROR_INVALID_ALLOCATION;
    }

    mappedHandles.push_back(std::make_pair(va_chunk, &mappedHandleIt->second));
    va_chunk += mappedHandleIt->second.size;
  }
  if (va_chunk != va_ptr + size) {
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  }

  for (auto mappedHandleIt : mappedHandles) {
    // Remove access from all agents that were allowed access
    for (auto agentPermsIt = mappedHandleIt.second->allowed_agents.begin();
              agentPermsIt != mappedHandleIt.second->allowed_agents.end();) {
      assert(mappedHandleIt.first == agentPermsIt->second.va);
      hsa_status_t status = agentPermsIt->second.RemoveAccess();
      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }
      agentPermsIt = mappedHandleIt.second->allowed_agents.erase(agentPermsIt);
    }

    if (mappedHandleIt.second->shareable_handle.IsValid()) {
      hsa_status_t status =
        mappedHandleIt.second->agentOwner()->driver().ReleaseShareableHandle(
                                      mappedHandleIt.second->shareable_handle);
      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }
    }

    assert(mappedHandleIt.second->address_handle->use_count >= 1);
    mappedHandleIt.second->address_handle->use_count--;
    assert(mappedHandleIt.second->mem_handle->use_count >= 1);
    mappedHandleIt.second->mem_handle->use_count--;

    if (!mappedHandleIt.second->mem_handle->use_count &&
        !mappedHandleIt.second->mem_handle->ref_count) {
        // User called VMemoryHandleRelease while this mapping was still
        // outstanding. We need to delete the MemoryHandle as it is the last
        // MappedHandle that was using it.
      mappedHandleIt.second->mem_handle->region->Free(mappedHandleIt.second->mem_handle->thunk_handle,
                                                      mappedHandleIt.second->mem_handle->size);
      memory_handle_map_.erase(mappedHandleIt.second->mem_handle->thunk_handle);
    }

    mapped_handle_map_.erase(mappedHandleIt.first);

  }
  return HSA_STATUS_SUCCESS;
}

Runtime::MappedHandleAllowedAgent::MappedHandleAllowedAgent(
    MappedHandle *mappedHandle, Agent *targetAgent, void *va, size_t size,
    hsa_access_permission_t perms)
    : va(va), size(size), targetAgent(targetAgent), permissions(perms),
      mappedHandle(mappedHandle) {

  // CPU agents have access as the memory is already mapped to the host.
  if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) return;

  int dmabuf_fd = 0;
  uint64_t offset = 0;
  MemoryHandle *memHandle = mappedHandle->mem_handle;

  // Export memory from owner agent.
  hsa_status_t status = memHandle->agentOwner()->driver().ExportDMABuf(
      memHandle->thunk_handle, mappedHandle->size, &dmabuf_fd, &offset);
  assert(status == HSA_STATUS_SUCCESS);
  if (status != HSA_STATUS_SUCCESS)
    return;
  assert(offset == 0);

  // Import to target agent.
  status = targetAgent->driver().ImportDMABuf(dmabuf_fd, *targetAgent,
                                              shareable_handle);
  assert(status == HSA_STATUS_SUCCESS);
  close(dmabuf_fd);
  if (status != HSA_STATUS_SUCCESS)
    return;
}

Runtime::MappedHandleAllowedAgent::~MappedHandleAllowedAgent() {
  if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) return;

  hsa_status_t status =
      targetAgent->driver().ReleaseShareableHandle(shareable_handle);
  assert(status == HSA_STATUS_SUCCESS);
}

hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permission_t perms) {
  if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
    void* mapped_ptr =
        mmap(va, size, PermissionsToMmapFlags(perms), MAP_SHARED | MAP_FIXED, mappedHandle->drm_fd,
             reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr));
    if (mapped_ptr != va)
      return HSA_STATUS_ERROR;
  } else {
    hsa_status_t status = targetAgent->driver().Map(
        shareable_handle, va, mappedHandle->offset, size, perms);
    if (status != HSA_STATUS_SUCCESS)
      return status;
  }
  permissions = perms;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
  if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
    if (munmap(va, size) != 0)
      return HSA_STATUS_ERROR;
    return HSA_STATUS_SUCCESS;
  } else {
    return targetAgent->driver().Unmap(
        shareable_handle, va, mappedHandle->offset, mappedHandle->size);
  }
}

// Note: VMemorySetAccessPerHandle should be called with &memory_lock_ held
hsa_status_t
Runtime::VMemorySetAccessPerHandle(void *va, MappedHandle &mappedHandle,
                                   const hsa_amd_memory_access_desc_t *desc,
                                   const size_t desc_cnt) {
  for (int i = 0; i < desc_cnt; i++) {
    Agent *targetAgent = Agent::Convert(desc[i].agent_handle);

    const size_t &size = mappedHandle.size;
    const hsa_access_permission_t &perm = desc[i].permissions;

    auto agentPermsIt = mappedHandle.allowed_agents.find(targetAgent);
    if (agentPermsIt == mappedHandle.allowed_agents.end()) {
      /* Agent not previously allowed, we need a new entry */
      agentPermsIt =
          mappedHandle.allowed_agents
              .emplace(std::piecewise_construct,
                       std::forward_as_tuple(targetAgent),
                       std::forward_as_tuple(&mappedHandle, targetAgent, va,
                                             size, perm))
              .first;

      if (agentPermsIt->second.EnableAccess(perm) != HSA_STATUS_SUCCESS) {
        mappedHandle.allowed_agents.erase(agentPermsIt);
        return HSA_STATUS_ERROR;
      }
    } else {
      /* Previous permissions are same as current permission */
      if (agentPermsIt->second.permissions == perm)
        continue;

      /* Permissions are different - update access */
      if (agentPermsIt->second.RemoveAccess() != HSA_STATUS_SUCCESS)
        throw AMD::hsa_exception(HSA_STATUS_ERROR, "Failed to remove access for memory handle.");

      if (agentPermsIt->second.EnableAccess(perm) != HSA_STATUS_SUCCESS) {
        mappedHandle.allowed_agents.erase(agentPermsIt);
        return HSA_STATUS_ERROR;
      }
    }
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemorySetAccess(void* va, size_t size,
                                       const hsa_amd_memory_access_desc_t* desc,
                                       const size_t desc_cnt) {
  std::list<std::pair<void*, MappedHandle*>> mappedHandles;
  bool reservedAddressFound = false;

  // Validate all agents
  for (int i = 0; i < desc_cnt; i++) {
    Agent* targetAgent = Agent::Convert(desc[i].agent_handle);

    if (targetAgent == NULL || !targetAgent->IsValid()) return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);

  auto reservedAddressIt = reserved_address_map_.upper_bound(va);
  if (reservedAddressIt != reserved_address_map_.begin()) {
    reservedAddressIt--;
    if ((reservedAddressIt->first <= va) &&
        ((reinterpret_cast<uint8_t*>(va) + size) <=
         (reinterpret_cast<const uint8_t*>(reservedAddressIt->first) +
          reservedAddressIt->second.size))) {
      reservedAddressFound = true;
    }
  }
  if (!reservedAddressFound) return HSA_STATUS_ERROR_INVALID_ARGUMENT;

  // va + size may consist of multiple MappedHandle's. Build a list lf MappedHandles within this VA
  // range
  uint8_t* va_chunk = reinterpret_cast<uint8_t*>(va);
  while (va_chunk < reinterpret_cast<uint8_t*>(va) + size) {
    auto mappedHandleIt = mapped_handle_map_.find(va_chunk);
    // Cannot find a contiguous list of MappedHandles for the full VA range
    if (mappedHandleIt == mapped_handle_map_.end()) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

    mappedHandles.push_back(std::make_pair(va_chunk, &mappedHandleIt->second));
    va_chunk += mappedHandleIt->second.size;
  }

  hsa_status_t status;
  for (auto mappedHandleIt : mappedHandles) {
    status = VMemorySetAccessPerHandle(mappedHandleIt.first,
                                       *mappedHandleIt.second, desc, desc_cnt);
    if (status != HSA_STATUS_SUCCESS)
      return status;
  }
  return HSA_STATUS_SUCCESS;
}

// Note: VMemoryMapAllowAccess should be called with &memory_lock_ held
hsa_status_t Runtime::VMemoryMapAllowAccess(const void *va,
                                            const hsa_access_permission_t perm,
                                            const hsa_agent_t *agents,
                                            size_t num_agents) {
  hsa_amd_memory_access_desc_t *desc =
      new (std::nothrow) hsa_amd_memory_access_desc_t[num_agents];
  if (desc == nullptr)
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  MAKE_SCOPE_GUARD([&]() { delete[] desc; });

  for (size_t i = 0; i < num_agents; i++) {
    Agent *targetAgent = Agent::Convert(agents[i]);
    if (targetAgent == nullptr || !targetAgent->IsValid())
      return HSA_STATUS_ERROR_INVALID_AGENT;

    desc[i].permissions = perm;
    desc[i].agent_handle = agents[i];
  }

  std::list<std::pair<void *, MappedHandle *>> mappedHandles;

  auto mappedHandleIt = mapped_handle_map_.upper_bound(va);
  if (mappedHandleIt != mapped_handle_map_.begin()) {
    mappedHandleIt--;

    if ((reinterpret_cast<const uint8_t *>(mappedHandleIt->first) +
         mappedHandleIt->second.size) > va) {
      // We found a mapped handle. See if there are more contiguous mapped
      // handles and add them to the list

      uint8_t *va_chunk = (uint8_t *)mappedHandleIt->first;
      do {
        mappedHandles.push_back(
            std::make_pair(va_chunk, &mappedHandleIt->second));
        va_chunk += mappedHandleIt->second.size;

        mappedHandleIt++;
        if (mappedHandleIt == mapped_handle_map_.end())
          break;
      } while (va_chunk == mappedHandleIt->first);
    }
  }

  if (mappedHandles.empty())
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  hsa_status_t status;
  for (auto mappedHandleIt : mappedHandles) {
    status = VMemorySetAccessPerHandle(
        mappedHandleIt.first, *mappedHandleIt.second, desc, num_agents);
    if (status != HSA_STATUS_SUCCESS)
      return status;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryGetAccess(const void* va, hsa_access_permission_t* perms,
                                       hsa_agent_t agent_handle) {
  *perms = HSA_ACCESS_PERMISSION_NONE;
  bool mappedHandleFound = false;

  ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);

  auto mappedHandleIt = mapped_handle_map_.upper_bound(va);
  if (mappedHandleIt != mapped_handle_map_.begin()) {
    mappedHandleIt--;
    if ((mappedHandleIt->first <= va) &&
        reinterpret_cast<const uint8_t*>(va) <=
         (reinterpret_cast<const uint8_t*>(mappedHandleIt->first) + mappedHandleIt->second.size)) {
      mappedHandleFound = true;
    }
  }
  if (!mappedHandleFound) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  Agent* agent = Agent::Convert(agent_handle);
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  auto agentPermsIt = mappedHandleIt->second.allowed_agents.find(agent);
  if (agentPermsIt != mappedHandleIt->second.allowed_agents.end()) {
    *perms = agentPermsIt->second.permissions;
    return HSA_STATUS_SUCCESS;
  }

  /* Set access was not called on this memory handle */
  *perms = HSA_ACCESS_PERMISSION_NONE;
  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryExportShareableHandle(int* dmabuf_fd,
                                                   hsa_amd_vmem_alloc_handle_t handle,
                                                   uint64_t flags) {
  *dmabuf_fd = -1;
  auto memoryHandle = memory_handle_map_.find(MemoryHandle::Convert(handle));
  if (memoryHandle == memory_handle_map_.end()) {
    debug_warning(false && "Can't find memory handle");
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;
  }

  uint64_t offset;

  hsa_status_t err = memoryHandle->second.region->owner()->driver().ExportDMABuf(
      memoryHandle->second.thunk_handle, memoryHandle->second.size, dmabuf_fd, &offset);
  if (err != HSA_STATUS_SUCCESS) return err;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryImportShareableHandle(int dmabuf_fd,
                                                   hsa_amd_vmem_alloc_handle_t* memoryOnlyHandle) {
  auto lookupRegion = [this](int nodeid, const AMD::MemoryRegion** ret) {
    auto nodeAgent = agents_by_node_.find(nodeid);
    if (nodeAgent == agents_by_node_.end()) {
      *ret = NULL;
      return;
    }

    Agent* agent = nodeAgent->second.front();
    if (agent == nullptr || !agent->IsValid() || agent->device_type() != Agent::kAmdGpuDevice) {
      *ret = NULL;
      return;
    }

    for (const core::MemoryRegion* region : agent->regions()) {
      const AMD::MemoryRegion* amd_region = reinterpret_cast<const AMD::MemoryRegion*>(region);

      // TODO: Verify that this works on a system with FINE_GRAINED memory.
      // System's with FINE_GRAINED will have both COARSE and FINE grain... need to get the
      // rigtht one.

      bool alloc_allowed;
      hsa_status_t status =
          amd_region->GetInfo(HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed);
      if (status == HSA_STATUS_SUCCESS && alloc_allowed) *ret = amd_region;
    }
  };

  HsaGraphicsResourceInfo info;
  int ret = HSAKMT_CALL(hsaKmtRegisterGraphicsHandleToNodes(dmabuf_fd, &info, 0, NULL));
  if (ret) return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;

  ThunkHandle thunk_handle = info.MemoryAddress;
  size_t size = info.SizeInBytes;
  int gpuid = info.NodeId;


  auto memoryHandleIt = memory_handle_map_.find(thunk_handle);
  if (memoryHandleIt != memory_handle_map_.end()) {
    /* This handle was already imported, increment ref_count and return */
    memoryHandleIt->second.ref_count++;
    *memoryOnlyHandle = MemoryHandle::Convert(thunk_handle);
    return HSA_STATUS_SUCCESS;
  }

  const AMD::MemoryRegion* region = NULL;
  lookupRegion(gpuid, &region);
  if (!region) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  HsaPointerInfo ptrInfo;
  ret = HSAKMT_CALL(hsaKmtQueryPointerInfo(info.MemoryAddress, &ptrInfo));
  if (ret != HSA_STATUS_SUCCESS || ptrInfo.Type == HSA_POINTER_UNKNOWN)
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  MemoryRegion::AllocateFlags alloc_flag = core::MemoryRegion::AllocateNoFlags;
  if (ptrInfo.MemFlags.ui32.NoSubstitute) alloc_flag |= core::MemoryRegion::AllocatePinned;

  memory_handle_map_.emplace(std::piecewise_construct,
          std::forward_as_tuple(thunk_handle),
          std::forward_as_tuple(region, size, 0, thunk_handle, alloc_flag));
  *memoryOnlyHandle = MemoryHandle::Convert(thunk_handle);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryRetainAllocHandle(hsa_amd_vmem_alloc_handle_t* mapped_handle,
                                               void* va) {
  auto mappedHandleIt = mapped_handle_map_.find(va);
  if (mappedHandleIt == mapped_handle_map_.end()) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  MemoryHandle* memoryHandle = mappedHandleIt->second.mem_handle;
  memoryHandle->ref_count++;
  *mapped_handle = MemoryHandle::Convert(memoryHandle->thunk_handle);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::VMemoryGetAllocPropertiesFromHandle(hsa_amd_vmem_alloc_handle_t allocHandle,
                                                          const core::MemoryRegion** mem_region,
                                                          hsa_amd_memory_type_t* type) {
  auto memoryHandleIt = memory_handle_map_.find(MemoryHandle::Convert(allocHandle));
  if (memoryHandleIt == memory_handle_map_.end()) return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  *mem_region = memoryHandleIt->second.region;
  *type = (memoryHandleIt->second.alloc_flag & core::MemoryRegion::AllocatePinned)
      ? MEMORY_TYPE_PINNED
      : MEMORY_TYPE_NONE;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t Runtime::EnableLogging(uint8_t* flags, void* file) {
  memcpy(log_flags, flags, sizeof(log_flags));

  if (file)
    log_file = reinterpret_cast<FILE*>(file);
  else
    log_file = stderr;

  return HSA_STATUS_SUCCESS;
}

}  // namespace core
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/signal.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTME_CORE_SIGNAL_CPP_
#define HSA_RUNTME_CORE_SIGNAL_CPP_

#include "core/inc/signal.h"

#include <algorithm>
#include <numeric>
#include <vector>

#include "core/util/timer.h"
#include "core/inc/runtime.h"

namespace rocr {
namespace core {

KernelMutex Signal::ipcLock_;
std::map<decltype(hsa_signal_t::handle), Signal*> Signal::ipcMap_;

void SharedSignalPool_t::clear() {
  ifdebug {
    size_t capacity = 0;
    for (auto& block : block_list_) capacity += block.second;
    if (capacity != free_list_.size())
      debug_print("Warning: Resource leak detected by SharedSignalPool, %ld Signals leaked.\n",
                  capacity - free_list_.size());
  }

  for (auto& block : block_list_) free_()(block.first);
  block_list_.clear();
  free_list_.clear();
}

SharedSignal* SharedSignalPool_t::alloc() {
  ScopedAcquire<HybridMutex> lock(&lock_);
  if (free_list_.empty()) {
    SharedSignal* block = reinterpret_cast<SharedSignal*>(
        allocate_()(block_size_ * sizeof(SharedSignal), __alignof(SharedSignal), 0, 0));
    if (block == nullptr) {
      block_size_ = minblock_;
      block = reinterpret_cast<SharedSignal*>(
          allocate_()(block_size_ * sizeof(SharedSignal), __alignof(SharedSignal), 0, 0));
      if (block == nullptr) throw std::bad_alloc();
    }

    MAKE_NAMED_SCOPE_GUARD(throwGuard, [&]() { free_()(block); });
    block_list_.push_back(std::make_pair(block, block_size_));
    throwGuard.Dismiss();


    for (int i = 0; i < block_size_; i++) {
      free_list_.push_back(&block[i]);
    }

    block_size_ *= 2;
  }

  SharedSignal* ret = free_list_.back();
  new (ret) SharedSignal();
  free_list_.pop_back();
  return ret;
}

void SharedSignalPool_t::free(SharedSignal* ptr) {
  if (ptr == nullptr) return;

  ptr->~SharedSignal();
  ScopedAcquire<HybridMutex> lock(&lock_);

  ifdebug {
    bool valid = false;
    for (auto& block : block_list_) {
      if ((block.first <= ptr) &&
          (uintptr_t(ptr) < uintptr_t(block.first) + block.second * sizeof(SharedSignal))) {
        valid = true;
        break;
      }
    }
    assert(valid && "Object does not belong to pool.");
  }

  free_list_.push_back(ptr);
}

LocalSignal::LocalSignal(hsa_signal_value_t initial_value, bool exportable)
    : local_signal_(exportable ? nullptr
                               : core::Runtime::runtime_singleton_->GetSharedSignalPool(),
                    exportable ? core::MemoryRegion::AllocateIPC : 0) {
  local_signal_.shared_object()->amd_signal.value = initial_value;
}

void Signal::registerIpc() {
  ScopedAcquire<KernelMutex> lock(&ipcLock_);
  auto handle = Convert(this);
  assert(ipcMap_.find(handle.handle) == ipcMap_.end() &&
         "Can't register the same IPC signal twice.");
  ipcMap_[handle.handle] = this;
}

bool Signal::deregisterIpc() {
  ScopedAcquire<KernelMutex> lock(&ipcLock_);
  if (refcount_ != 0) return false;
  auto handle = Convert(this);
  const auto& it = ipcMap_.find(handle.handle);
  assert(it != ipcMap_.end() && "Deregister on non-IPC signal.");
  ipcMap_.erase(it);
  return true;
}

Signal* Signal::lookupIpc(hsa_signal_t signal) {
  ScopedAcquire<KernelMutex> lock(&ipcLock_);
  const auto& it = ipcMap_.find(signal.handle);
  if (it == ipcMap_.end()) return nullptr;
  return it->second;
}

Signal* Signal::duplicateIpc(hsa_signal_t signal) {
  ScopedAcquire<KernelMutex> lock(&ipcLock_);
  const auto& it = ipcMap_.find(signal.handle);
  if (it == ipcMap_.end()) return nullptr;
  it->second->refcount_++;
  it->second->Retain();
  return it->second;
}

void Signal::Release() {
  if (--retained_ != 0) return;
  if (!isIPC())
    doDestroySignal();
  else if (deregisterIpc())
    doDestroySignal();
}

Signal::~Signal() {
  signal_.kind = AMD_SIGNAL_KIND_INVALID;
  if (refcount_ == 1 && isIPC()) {
    refcount_ = 0;
    deregisterIpc();
  }
}

uint32_t Signal::WaitMultiple(uint32_t signal_count, const hsa_signal_t* hsa_signals,
                              const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
                              uint64_t timeout, hsa_wait_state_t wait_hint,
                              std::vector<hsa_signal_value_t>& satisfying_values,
                              bool wait_on_all) {
  hsa_signal_handle* signals =
      reinterpret_cast<hsa_signal_handle*>(const_cast<hsa_signal_t*>(hsa_signals));

  for (uint32_t i = 0; i < signal_count; i++) signals[i]->Retain();

  MAKE_SCOPE_GUARD([&]() {
    for (uint32_t i = 0; i < signal_count; i++) signals[i]->Release();
  });

  uint32_t prior = 0;
  for (uint32_t i = 0; i < signal_count; i++) prior = Max(prior, signals[i]->waiting_++);

  MAKE_SCOPE_GUARD([&]() {
    for (uint32_t i = 0; i < signal_count; i++) signals[i]->waiting_--;
  });

  if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
      // Allow only the first waiter to sleep. Without event age tracking,
      // race condition can cause some threads to sleep without wakeup since missing interrupt.
      if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;

  // Ensure that all signals in the list can be slept on.
  if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
    for (uint32_t i = 0; i < signal_count; i++) {
      if (signals[i]->EopEvent() == NULL) {
        wait_hint = HSA_WAIT_STATE_ACTIVE;
        break;
      }
    }
  }

  const uint32_t small_size = 10;
  HsaEvent* short_evts[small_size];
  HsaEvent** evts = NULL;
  uint32_t unique_evts = 0;
  if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
    if (signal_count > small_size)
      evts = new HsaEvent* [signal_count];
    else
      evts = short_evts;
    for (uint32_t i = 0; i < signal_count; i++)
      evts[i] = signals[i]->EopEvent();
    std::sort(evts, evts + signal_count);
    HsaEvent** end = std::unique(evts, evts + signal_count);
    unique_evts = uint32_t(end - evts);
  }
  MAKE_SCOPE_GUARD([&]() {
    if (signal_count > small_size) delete[] evts;
  });

  uint64_t event_age[unique_evts];
  memset(event_age, 0, unique_evts * sizeof(uint64_t));
  if (core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
    for (uint32_t i = 0; i < unique_evts; i++)
      event_age[i] = 1;

  int64_t value;

  timer::fast_clock::time_point start_time = timer::fast_clock::now();

  // Set a polling timeout value
  const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);

  // Convert timeout value into the fast_clock domain
  uint64_t hsa_freq = 0;
  HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
  const timer::fast_clock::duration fast_timeout =
      timer::duration_from_seconds<timer::fast_clock::duration>(
          double(timeout) / double(hsa_freq));

  std::vector<uint32_t> unmet_condition_ids(signal_count);
  std::iota(unmet_condition_ids.begin(), unmet_condition_ids.end(), 0);

  while (true) {
    // Cannot mwaitx - polling multiple signals
    for (auto it = unmet_condition_ids.begin(); it != unmet_condition_ids.end();) {
      auto i = *it;
      bool condition_met = false;
      if (!signals[i]->IsValid())
        return uint32_t(-1);

      value =
          atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);

      switch (conds[i]) {
        case HSA_SIGNAL_CONDITION_EQ: {
          condition_met = (value == values[i]);
          break;
        }
        case HSA_SIGNAL_CONDITION_NE: {
          condition_met = (value != values[i]);
          break;
        }
        case HSA_SIGNAL_CONDITION_GTE: {
          condition_met = (value >= values[i]);
          break;
        }
        case HSA_SIGNAL_CONDITION_LT: {
          condition_met = (value < values[i]);
          break;
        }
        default:
          return uint32_t(-1);
      }
      if (condition_met) {
        it = unmet_condition_ids.erase(it);
        satisfying_values[i] = value;
        if (!wait_on_all)
          return i;
        else if (unmet_condition_ids.empty())
          return 0;
      } else {
        ++it;
      }
    }

    timer::fast_clock::time_point time = timer::fast_clock::now();
    if (time - start_time > fast_timeout) {
      return uint32_t(-1);
    }

    if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
      continue;
    }

    if (time - start_time < kMaxElapsed) {
    //  os::uSleep(20);
      continue;
    }

    uint32_t wait_ms;
    auto time_remaining = fast_timeout - (time - start_time);
    uint64_t ct=timer::duration_cast<std::chrono::milliseconds>(
      time_remaining).count();
    wait_ms = (ct>0xFFFFFFFEu) ? 0xFFFFFFFEu : ct;
    HSAKMT_CALL(hsaKmtWaitOnMultipleEvents_Ext(evts, unique_evts, wait_on_all, wait_ms, event_age));
  }
}

/*
 * Special handler to wait listen for exceptions from underlying driver.
 */
uint32_t Signal::WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals,
                         const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
                         hsa_signal_value_t* satisfying_value) {

  uint32_t wait_ms = uint32_t(-1);
  hsa_signal_handle* signals =
      reinterpret_cast<hsa_signal_handle*>(const_cast<hsa_signal_t*>(hsa_signals));

  for (uint32_t i = 0; i < signal_count; i++) signals[i]->Retain();

  MAKE_SCOPE_GUARD([&]() {
    for (uint32_t i = 0; i < signal_count; i++) signals[i]->Release();
  });

  uint32_t prior = 0;
  for (uint32_t i = 0; i < signal_count; i++) prior = Max(prior, signals[i]->waiting_++);


  MAKE_SCOPE_GUARD([&]() {
    for (uint32_t i = 0; i < signal_count; i++) signals[i]->waiting_--;
  });

  if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
      // Allow only the first waiter to sleep. Without event age tracking,
      // race condition can cause some threads to sleep without wakeup since missing interrupt.
      if (prior != 0) wait_ms = 0;

  HsaEvent** evts = new HsaEvent* [signal_count];
  MAKE_SCOPE_GUARD([&]() { delete[] evts; });

  uint32_t unique_evts = 0;

  for (uint32_t i = 0; i < signal_count; i++) {
    assert(signals[i]->EopEvent() != NULL);
    evts[i] = signals[i]->EopEvent();
  }

  std::sort(evts, evts + signal_count);
  HsaEvent** end = std::unique(evts, evts + signal_count);
  unique_evts = uint32_t(end - evts);

  uint64_t event_age[unique_evts];
  memset(event_age, 0, unique_evts * sizeof(uint64_t));
  if (core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
    for (uint32_t i = 0; i < unique_evts; i++)
      event_age[i] = 1;

  int64_t value;

  bool condition_met = false;
  while (true) {
    // Cannot mwaitx - polling multiple signals

    for (uint32_t i = 0; i < signal_count; i++) {
      if (!signals[i]->IsValid())
        return uint32_t(-1);

      const HSA_EVENTTYPE event_type = signals[i]->EopEvent()->EventData.EventType;
      if (event_type == HSA_EVENTTYPE_MEMORY) {
        const HsaMemoryAccessFault& fault =
            signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
        if (fault.Flags == HSA_EVENTID_MEMORY_FATAL_PROCESS) return i;
      } else if (event_type == HSA_EVENTTYPE_HW_EXCEPTION) {
        const HsaHwException& exception =
            signals[i]->EopEvent()->EventData.EventData.HwException;
        if (exception.MemoryLost) return i;
      }

      value = atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);

      switch (conds[i]) {
        case HSA_SIGNAL_CONDITION_EQ: {
          condition_met = (value == values[i]);
          break;
        }
        case HSA_SIGNAL_CONDITION_NE: {
          condition_met = (value != values[i]);
          break;
        }
        case HSA_SIGNAL_CONDITION_GTE: {
          condition_met = (value >= values[i]);
          break;
        }
        case HSA_SIGNAL_CONDITION_LT: {
          condition_met = (value < values[i]);
          break;
        }
        default: {
          return uint32_t(-1);
        }
      }
      if (condition_met) {
        if (satisfying_value != NULL) *satisfying_value = value;
        // Some other signal in the list satisfied condition
        return i;
      }
    }

    HSAKMT_CALL(hsaKmtWaitOnMultipleEvents_Ext(evts, unique_evts, false, wait_ms, event_age));
  } //while
}

SignalGroup::SignalGroup(uint32_t num_signals, const hsa_signal_t* hsa_signals)
    : count(num_signals) {
  if (count != 0) {
    signals = new hsa_signal_t[count];
  } else {
    signals = NULL;
  }
  if (signals == NULL) return;
  for (uint32_t i = 0; i < count; i++) signals[i] = hsa_signals[i];
}

}  // namespace core
}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/runtime/svm_profiler.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/svm_profiler.h"

#include <stdint.h>
#include <algorithm>
#include <sys/eventfd.h>
#include <poll.h>

#include "core/util/utils.h"
#include "core/inc/runtime.h"
#include "core/inc/agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/util/os.h"

namespace rocr {
namespace AMD {

static const char* smi_event_string(uint32_t event) {
  static const char* strings[] = {"NONE",
                                  "VMFAULT",
                                  "THERMAL_THROTTLE",
                                  "GPU_PRE_RESET",
                                  "GPU_POST_RESET",
                                  "MIGRATE_START",
                                  "MIGRATE_END",
                                  "PAGE_FAULT_START",
                                  "PAGE_FAULT_END",
                                  "QUEUE_EVICTION",
                                  "QUEUE_RESTORE",
                                  "UNMAP_FROM_GPU",
                                  "UNKNOWN"};

  event = std::min<uint32_t>(event, sizeof(strings) / sizeof(char*) - 1);
  return strings[event];
}

static const char* smi_migrate_string(uint32_t trigger) {
  static const char* strings[] = {"PREFETCH",
                                  "PAGEFAULT_GPU",
                                  "PAGEFAULT_CPU",
                                  "TTM_EVICTION",
                                  "UNKNOWN"};

  trigger = std::min<uint32_t>(trigger, sizeof(strings) / sizeof(char*) - 1);
  return strings[trigger];
}

static const char* smi_eviction_string(uint32_t trigger) {
  static const char* strings[] = {"SVM",
                                  "USERPTR",
                                  "TTM",
                                  "SUSPEND",
                                  "CRIU_CHECKPOINT",
                                  "CRIU_RESTORE",
                                  "UNKNOWN"};

  trigger = std::min<uint32_t>(trigger, sizeof(strings) / sizeof(char*) - 1);
  return strings[trigger];
}

static const char* smi_unmap_string(uint32_t trigger) {
  static const char* strings[] = {"MMU_NOTIFY",
                                  "MMU_NOTIFY_MIGRATE",
                                  "UNMAP_FROM_CPU",
                                  "UNKNOWN"};

  trigger = std::min<uint32_t>(trigger, sizeof(strings) / sizeof(char*) - 1);
  return strings[trigger];
}

void SvmProfileControl::PollSmiRun(void* _profileControl) {
  SvmProfileControl* profileControl = (SvmProfileControl*)_profileControl;

  profileControl->PollSmi();
}

void SvmProfileControl::PollSmi() {
  if (core::Runtime::runtime_singleton_->flag().svm_profile().empty()) {
    return;
  }
  FILE* logFile = fopen(core::Runtime::runtime_singleton_->flag().svm_profile().c_str(), "a");
  if (logFile == NULL) {
    return;
  }
  MAKE_NAMED_SCOPE_GUARD(logGuard, [&]() { fclose(logFile); });

  std::vector<pollfd> files;
  files.resize(core::Runtime::runtime_singleton_->gpu_agents().size() + 1);
  files[0].fd = event;
  files[0].events = POLLIN;
  files[0].revents = 0;

  HSAuint64 events = 0;
  events = HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_MIGRATE_START) |
      HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_MIGRATE_END) |
      HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_PAGE_FAULT_START) |
      HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_PAGE_FAULT_END) |
      HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_QUEUE_EVICTION) |
      HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_QUEUE_RESTORE) |
      HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_UNMAP_FROM_GPU);

  for (int i = 0; i < core::Runtime::runtime_singleton_->gpu_agents().size(); i++) {
    auto gpu_agent = core::Runtime::runtime_singleton_->gpu_agents()[i];
    auto err = gpu_agent->driver().OpenSMI(gpu_agent->node_id(), &files[i + 1].fd);
    assert(err == HSA_STATUS_SUCCESS);
    files[i + 1].events = POLLIN;
    files[i + 1].revents = 0;
    // Enable collecting masked events.
    auto wrote = write(files[i + 1].fd, &events, sizeof(events));
    assert(wrote == sizeof(events));
  }
  MAKE_NAMED_SCOPE_GUARD(smiGuard, [&]() {
    for (int i = 1; i < files.size(); i++) {
      close(files[i].fd);
    }
  });

  std::vector<std::string> smi_records;
  smi_records.resize(core::Runtime::runtime_singleton_->gpu_agents().size() + 1);
  char buffer[HSA_SMI_EVENT_MSG_SIZE + 1];

  auto format_agent = [this](uint32_t gpuid) {
    std::string ret;
    core::Agent* agent = core::Runtime::runtime_singleton_->agent_by_gpuid(gpuid);
    if (agent->device_type() == core::Agent::kAmdCpuDevice)
      return std::string("CPU");
    else
      return format("GPU%u(%p)", ((AMD::GpuAgent*)agent)->enumeration_index(),
                    agent->public_handle());
  };

  while (!exit) {
    int ready = poll(&files[0], files.size(), -1);
    if (ready < 1) {
      assert(false && "poll failed!");
      return;
    }

    for (int i = 1; i < files.size(); i++) {
      if (files[i].revents & POLLIN) {
        memset(buffer, 0, sizeof(buffer));
        auto len = read(files[i].fd, buffer, sizeof(buffer) - 1);
        if (len > 0) {
          buffer[len] = '\0';
          // printf("%s\n", buffer);
          // fprintf(logFile, "%s\n", buffer);

          smi_records[i] += buffer;

          while (true) {
            size_t pos = smi_records[i].find('\n');
            if (pos == std::string::npos) break;

            std::string line = smi_records[i].substr(0, pos);
            smi_records[i].erase(0, pos + 1);

            const char* cursor;
            cursor = line.c_str();

            // Event records follow the format:
            // event_id timestamp -pid event_specific_info trigger
            // timestamp, pid, and trigger are in dec.  All other are hex.
            // event_specific substring is listed for each event type.
            // See kfd_ioctl.h for more info.
            int event_id;
            uint64_t time;
            int pid;
            int offset = 0;
            int args = sscanf(cursor, "%x %lu -%u%n", &event_id, &time, &pid, &offset);
            assert(args == 3 && "Parsing error!");

            std::string detail;
            cursor += offset + 1;
            switch (event_id) {
              //@addr(size) from->to prefetch_location:preferred_location
              case HSA_SMI_EVENT_MIGRATE_START: {
                uint64_t addr;
                uint32_t size;
                uint32_t from, to;
                uint32_t trigger = 0;
                uint32_t fetch, pref;
                args = sscanf(cursor, "@%lx(%x) %x->%x %x:%x %u", &addr, &size, &from, &to, &fetch,
                              &pref, &trigger);
                assert(args == 7 && "Parsing error!");

                addr *= 4096;
                size *= 4096;

                std::string from_agent = format_agent(from);
                std::string to_agent = format_agent(to);
                std::string range = format("[%p, %p]", addr, addr + size - 1);
                std::string cause = smi_migrate_string(trigger);
                detail = cause + " " + from_agent + "->" + to_agent + " " + range;
                break;
              }
              //@addr(size) from->to
              case HSA_SMI_EVENT_MIGRATE_END: {
                uint64_t addr;
                uint32_t size;
                uint32_t from, to;
                uint32_t trigger;
                args = sscanf(cursor, "@%lx(%x) %x->%x %u", &addr, &size, &from, &to, &trigger);
                assert(args == 5 && "Parsing error!");

                addr *= 4096;
                size *= 4096;

                std::string from_agent = format_agent(from);
                std::string to_agent = format_agent(to);
                std::string range = format("[%p, %p]", addr, addr + size - 1);
                std::string cause = smi_migrate_string(trigger);
                detail = cause + " " + from_agent + "->" + to_agent + " " + range;
                break;
              }
              //@addr(gpu_id) W/R
              case HSA_SMI_EVENT_PAGE_FAULT_START: {
                uint64_t addr;
                uint32_t gpuid;
                char mode;
                args = sscanf(cursor, "@%lx(%x) %c", &addr, &gpuid, &mode);

                addr *= 4096;

                assert(args == 3 && "Parsing error!");
                std::string agent = format_agent(gpuid);
                std::string range = std::to_string(addr);
                std::string cause = (mode == 'W') ? "Write" : "Read";
                detail = cause + " " + agent + " " + range;
                break;
              }
              //@addr(gpu_id) M/U  (migration / page table update)
              case HSA_SMI_EVENT_PAGE_FAULT_END: {
                uint64_t addr;
                uint32_t gpuid;
                char mode;
                args = sscanf(cursor, "@%lx(%x) %c", &addr, &gpuid, &mode);
                assert(args == 3 && "Parsing error!");

                addr *= 4096;

                std::string agent = format_agent(gpuid);
                std::string range = std::to_string(addr);
                std::string cause = (mode == 'M') ? "Migration" : "Map";
                detail = cause + " " + agent + " " + range;
                break;
              }
              // gpu_id
              case HSA_SMI_EVENT_QUEUE_EVICTION: {
                uint32_t gpuid;
                uint32_t trigger;
                args = sscanf(cursor, "%x %u", &gpuid, &trigger);
                assert(args == 2 && "Parsing error!");
                std::string agent = format_agent(gpuid);
                std::string cause = smi_eviction_string(trigger);
                detail = cause + " " + agent;
                break;
              }
              // gpu_id
              case HSA_SMI_EVENT_QUEUE_RESTORE: {
                uint32_t gpuid;
                args = sscanf(cursor, "%x", &gpuid);
                assert(args == 1 && "Parsing error!");
                std::string agent = format_agent(gpuid);
                detail = agent;
                break;
              }
              //@addr(size) gpu_id
              case HSA_SMI_EVENT_UNMAP_FROM_GPU: {
                uint64_t addr;
                uint32_t size;
                uint32_t gpuid;
                uint32_t trigger;
                args = sscanf(cursor, "@%lx(%x) %x %u", &addr, &size, &gpuid, &trigger);
                assert(args == 4 && "Parsing error!");

                addr *= 4096;
                size *= 4096;

                std::string gpu = format_agent(gpuid);
                std::string range = format("[%p, %p]", addr, addr + size - 1);
                std::string cause = smi_unmap_string(trigger);
                detail = cause + " " + gpu + " " + range;
                break;
              }
              default:;
            }

            std::string record = std::string("ROCr HMM event: ") + std::to_string(time) + " " +
                smi_event_string(event_id) + " " + detail;
            // printf("%s\n", record.c_str());
            fprintf(logFile, "%s\n", record.c_str());
          }
        } else {
          auto err = errno;
          const char* msg = strerror(err);
          // printf("ROCr HMM event error: Read returned %ld, %s (%d)\n", len, msg, err);
          fprintf(logFile, "ROCr HMM event error: Read returned %ld, %s (%d)\n", len, msg, err);
        }
        files[i].revents = 0;
      }
    }
    if (files[0].revents & POLLIN) return;
  }
}

SvmProfileControl::SvmProfileControl() : event(-1), exit(false) {
  event = eventfd(0, EFD_CLOEXEC);
  if (event == -1) return;

  poll_smi_thread_ = os::CreateThread(PollSmiRun, (void*)this);
  if (poll_smi_thread_ == NULL) {
    assert(false && "Poll SMI thread creation error.");
    return;
  }
}

SvmProfileControl::~SvmProfileControl() {
  if (event != -1) eventfd_write(event, 1);
  if (poll_smi_thread_ != NULL) {
    exit = true;
    os::WaitForThread(poll_smi_thread_);
    os::CloseThread(poll_smi_thread_);
    poll_smi_thread_ = NULL;
  }
  close(event);
}

template <typename... Args>
std::string SvmProfileControl::format(const char* format, Args... args) {
  int len = snprintf(&format_buffer[0], format_buffer.size(), format, args...);
  if (len + 1 > format_buffer.size()) {
    format_buffer.resize(len + 1);
    snprintf(&format_buffer[0], format_buffer.size(), format, args...);
  }
  return std::string(&format_buffer[0]);
}

} // namespace AMD
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/thunk_loader.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/thunk_loader.h"
#include "core/inc/runtime.h"

#include <dlfcn.h>
#include <iostream>

namespace rocr {
namespace core {

  ThunkLoader::ThunkLoader() {
    if (core::Runtime::runtime_singleton_->flag().enable_dtif()) {
      dlerror(); // Clear any existing error messages
      dtif_handle = dlopen("libdtif.so", RTLD_LAZY);
      if (dtif_handle == NULL)
        fprintf(stderr, "Cannot load libdtif.so, failed:%s\n", dlerror());
      else
        debug_print("Load libdtif.so successully!\n");
    }
  }

  ThunkLoader::~ThunkLoader() {
    if (core::Runtime::runtime_singleton_->flag().enable_dtif()
      && (dtif_handle != NULL)) {
        if (dlclose(dtif_handle) != 0) {
          fprintf(stderr, "Cannot unload libdtif.so, failed:%s\n", dlerror());
        } else {
          debug_print("Unload libdtif.so successully!\n");
        }
    }
  }

  void ThunkLoader::LoadThunkApiTable() {
    if (core::Runtime::runtime_singleton_->flag().enable_dtif()) {
      dlerror(); // Clear any existing error messages

      HSAKMT_PFN(hsaKmtOpenKFD) = (HSAKMT_DEF(hsaKmtOpenKFD)*)dlsym(dtif_handle, "hsaKmtOpenKFD");
      if (HSAKMT_PFN(hsaKmtOpenKFD) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtCloseKFD) = (HSAKMT_DEF(hsaKmtCloseKFD)*)dlsym(dtif_handle, "hsaKmtCloseKFD");
      if (HSAKMT_PFN(hsaKmtCloseKFD) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetVersion) = (HSAKMT_DEF(hsaKmtGetVersion)*)dlsym(dtif_handle, "hsaKmtGetVersion");
      if (HSAKMT_PFN(hsaKmtGetVersion) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtAcquireSystemProperties) = (HSAKMT_DEF(hsaKmtAcquireSystemProperties)*)dlsym(dtif_handle, "hsaKmtAcquireSystemProperties");
      if (HSAKMT_PFN(hsaKmtAcquireSystemProperties) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtReleaseSystemProperties) = (HSAKMT_DEF(hsaKmtReleaseSystemProperties)*)dlsym(dtif_handle, "hsaKmtReleaseSystemProperties");
      if (HSAKMT_PFN(hsaKmtReleaseSystemProperties) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetNodeProperties) = (HSAKMT_DEF(hsaKmtGetNodeProperties)*)dlsym(dtif_handle, "hsaKmtGetNodeProperties");
      if (HSAKMT_PFN(hsaKmtGetNodeProperties) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetNodeMemoryProperties) = (HSAKMT_DEF(hsaKmtGetNodeMemoryProperties)*)dlsym(dtif_handle, "hsaKmtGetNodeMemoryProperties");
      if (HSAKMT_PFN(hsaKmtGetNodeMemoryProperties) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetNodeCacheProperties) = (HSAKMT_DEF(hsaKmtGetNodeCacheProperties)*)dlsym(dtif_handle, "hsaKmtGetNodeCacheProperties");
      if (HSAKMT_PFN(hsaKmtGetNodeCacheProperties) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetNodeIoLinkProperties) = (HSAKMT_DEF(hsaKmtGetNodeIoLinkProperties)*)dlsym(dtif_handle, "hsaKmtGetNodeIoLinkProperties");
      if (HSAKMT_PFN(hsaKmtGetNodeIoLinkProperties) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtCreateEvent) = (HSAKMT_DEF(hsaKmtCreateEvent)*)dlsym(dtif_handle, "hsaKmtCreateEvent");
      if (HSAKMT_PFN(hsaKmtCreateEvent) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDestroyEvent) = (HSAKMT_DEF(hsaKmtDestroyEvent)*)dlsym(dtif_handle, "hsaKmtDestroyEvent");
      if (HSAKMT_PFN(hsaKmtDestroyEvent) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSetEvent) = (HSAKMT_DEF(hsaKmtSetEvent)*)dlsym(dtif_handle, "hsaKmtSetEvent");
      if (HSAKMT_PFN(hsaKmtSetEvent) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtResetEvent) = (HSAKMT_DEF(hsaKmtResetEvent)*)dlsym(dtif_handle, "hsaKmtResetEvent");
      if (HSAKMT_PFN(hsaKmtResetEvent) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtQueryEventState) = (HSAKMT_DEF(hsaKmtQueryEventState)*)dlsym(dtif_handle, "hsaKmtQueryEventState");
      if (HSAKMT_PFN(hsaKmtQueryEventState) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtWaitOnEvent) = (HSAKMT_DEF(hsaKmtWaitOnEvent)*)dlsym(dtif_handle, "hsaKmtWaitOnEvent");
      if (HSAKMT_PFN(hsaKmtWaitOnEvent) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtWaitOnMultipleEvents) = (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents)*)dlsym(dtif_handle, "hsaKmtWaitOnMultipleEvents");
      if (HSAKMT_PFN(hsaKmtWaitOnMultipleEvents) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtCreateQueue) = (HSAKMT_DEF(hsaKmtCreateQueue)*)dlsym(dtif_handle, "hsaKmtCreateQueue");
      if (HSAKMT_PFN(hsaKmtCreateQueue) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtCreateQueueExt) = (HSAKMT_DEF(hsaKmtCreateQueueExt)*)dlsym(dtif_handle, "hsaKmtCreateQueueExt");
      if (HSAKMT_PFN(hsaKmtCreateQueueExt) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtUpdateQueue) = (HSAKMT_DEF(hsaKmtUpdateQueue)*)dlsym(dtif_handle, "hsaKmtUpdateQueue");
      if (HSAKMT_PFN(hsaKmtUpdateQueue) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDestroyQueue) = (HSAKMT_DEF(hsaKmtDestroyQueue)*)dlsym(dtif_handle, "hsaKmtDestroyQueue");
      if (HSAKMT_PFN(hsaKmtDestroyQueue) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSetQueueCUMask) = (HSAKMT_DEF(hsaKmtSetQueueCUMask)*)dlsym(dtif_handle, "hsaKmtSetQueueCUMask");
      if (HSAKMT_PFN(hsaKmtSetQueueCUMask) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSetMemoryPolicy) = (HSAKMT_DEF(hsaKmtSetMemoryPolicy)*)dlsym(dtif_handle, "hsaKmtSetMemoryPolicy");
      if (HSAKMT_PFN(hsaKmtSetMemoryPolicy) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtAllocMemory) = (HSAKMT_DEF(hsaKmtAllocMemory)*)dlsym(dtif_handle, "hsaKmtAllocMemory");
      if (HSAKMT_PFN(hsaKmtAllocMemory) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtAllocMemoryAlign) = (HSAKMT_DEF(hsaKmtAllocMemoryAlign)*)dlsym(dtif_handle, "hsaKmtAllocMemoryAlign");
      if (HSAKMT_PFN(hsaKmtAllocMemoryAlign) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtFreeMemory) = (HSAKMT_DEF(hsaKmtFreeMemory)*)dlsym(dtif_handle, "hsaKmtFreeMemory");
      if (HSAKMT_PFN(hsaKmtFreeMemory) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtAvailableMemory) = (HSAKMT_DEF(hsaKmtAvailableMemory)*)dlsym(dtif_handle, "hsaKmtAvailableMemory");
      if (HSAKMT_PFN(hsaKmtAvailableMemory) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRegisterMemory) = (HSAKMT_DEF(hsaKmtRegisterMemory)*)dlsym(dtif_handle, "hsaKmtRegisterMemory");
      if (HSAKMT_PFN(hsaKmtRegisterMemory) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRegisterMemoryToNodes) = (HSAKMT_DEF(hsaKmtRegisterMemoryToNodes)*)dlsym(dtif_handle, "hsaKmtRegisterMemoryToNodes");
      if (HSAKMT_PFN(hsaKmtRegisterMemoryToNodes) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRegisterMemoryWithFlags) = (HSAKMT_DEF(hsaKmtRegisterMemoryWithFlags)*)dlsym(dtif_handle, "hsaKmtRegisterMemoryWithFlags");
      if (HSAKMT_PFN(hsaKmtRegisterMemoryWithFlags) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodes) = (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodes)*)dlsym(dtif_handle, "hsaKmtRegisterGraphicsHandleToNodes");
      if (HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodes) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodesExt) = (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodesExt)*)dlsym(dtif_handle, "hsaKmtRegisterGraphicsHandleToNodesExt");
      if (HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodesExt) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtShareMemory) = (HSAKMT_DEF(hsaKmtShareMemory)*)dlsym(dtif_handle, "hsaKmtShareMemory");
      if (HSAKMT_PFN(hsaKmtShareMemory) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRegisterSharedHandle) = (HSAKMT_DEF(hsaKmtRegisterSharedHandle)*)dlsym(dtif_handle, "hsaKmtRegisterSharedHandle");
      if (HSAKMT_PFN(hsaKmtRegisterSharedHandle) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRegisterSharedHandleToNodes) = (HSAKMT_DEF(hsaKmtRegisterSharedHandleToNodes)*)dlsym(dtif_handle, "hsaKmtRegisterSharedHandleToNodes");
      if (HSAKMT_PFN(hsaKmtRegisterSharedHandleToNodes) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtProcessVMRead) = (HSAKMT_DEF(hsaKmtProcessVMRead)*)dlsym(dtif_handle, "hsaKmtProcessVMRead");
      if (HSAKMT_PFN(hsaKmtProcessVMRead) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtProcessVMWrite) = (HSAKMT_DEF(hsaKmtProcessVMWrite)*)dlsym(dtif_handle, "hsaKmtProcessVMWrite");
      if (HSAKMT_PFN(hsaKmtProcessVMWrite) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDeregisterMemory) = (HSAKMT_DEF(hsaKmtDeregisterMemory)*)dlsym(dtif_handle, "hsaKmtDeregisterMemory");
      if (HSAKMT_PFN(hsaKmtDeregisterMemory) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtMapMemoryToGPU) = (HSAKMT_DEF(hsaKmtMapMemoryToGPU)*)dlsym(dtif_handle, "hsaKmtMapMemoryToGPU");
      if (HSAKMT_PFN(hsaKmtMapMemoryToGPU) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtMapMemoryToGPUNodes) = (HSAKMT_DEF(hsaKmtMapMemoryToGPUNodes)*)dlsym(dtif_handle, "hsaKmtMapMemoryToGPUNodes");
      if (HSAKMT_PFN(hsaKmtMapMemoryToGPUNodes) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtUnmapMemoryToGPU) = (HSAKMT_DEF(hsaKmtUnmapMemoryToGPU)*)dlsym(dtif_handle, "hsaKmtUnmapMemoryToGPU");
      if (HSAKMT_PFN(hsaKmtUnmapMemoryToGPU) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgRegister) = (HSAKMT_DEF(hsaKmtDbgRegister)*)dlsym(dtif_handle, "hsaKmtDbgRegister");
      if (HSAKMT_PFN(hsaKmtDbgRegister) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgUnregister) = (HSAKMT_DEF(hsaKmtDbgUnregister)*)dlsym(dtif_handle, "hsaKmtDbgUnregister");
      if (HSAKMT_PFN(hsaKmtDbgUnregister) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgWavefrontControl) = (HSAKMT_DEF(hsaKmtDbgWavefrontControl)*)dlsym(dtif_handle, "hsaKmtDbgWavefrontControl");
      if (HSAKMT_PFN(hsaKmtDbgWavefrontControl) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgAddressWatch) = (HSAKMT_DEF(hsaKmtDbgAddressWatch)*)dlsym(dtif_handle, "hsaKmtDbgAddressWatch");
      if (HSAKMT_PFN(hsaKmtDbgAddressWatch) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgEnable) = (HSAKMT_DEF(hsaKmtDbgEnable)*)dlsym(dtif_handle, "hsaKmtDbgEnable");
      if (HSAKMT_PFN(hsaKmtDbgEnable) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgDisable) = (HSAKMT_DEF(hsaKmtDbgDisable)*)dlsym(dtif_handle, "hsaKmtDbgDisable");
      if (HSAKMT_PFN(hsaKmtDbgDisable) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgGetDeviceData) = (HSAKMT_DEF(hsaKmtDbgGetDeviceData)*)dlsym(dtif_handle, "hsaKmtDbgGetDeviceData");
      if (HSAKMT_PFN(hsaKmtDbgGetDeviceData) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDbgGetQueueData) = (HSAKMT_DEF(hsaKmtDbgGetQueueData)*)dlsym(dtif_handle, "hsaKmtDbgGetQueueData");
      if (HSAKMT_PFN(hsaKmtDbgGetQueueData) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetClockCounters) = (HSAKMT_DEF(hsaKmtGetClockCounters)*)dlsym(dtif_handle, "hsaKmtGetClockCounters");
      if (HSAKMT_PFN(hsaKmtGetClockCounters) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcGetCounterProperties) = (HSAKMT_DEF(hsaKmtPmcGetCounterProperties)*)dlsym(dtif_handle, "hsaKmtPmcGetCounterProperties");
      if (HSAKMT_PFN(hsaKmtPmcGetCounterProperties) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcRegisterTrace) = (HSAKMT_DEF(hsaKmtPmcRegisterTrace)*)dlsym(dtif_handle, "hsaKmtPmcRegisterTrace");
      if (HSAKMT_PFN(hsaKmtPmcRegisterTrace) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcUnregisterTrace) = (HSAKMT_DEF(hsaKmtPmcUnregisterTrace)*)dlsym(dtif_handle, "hsaKmtPmcUnregisterTrace");
      if (HSAKMT_PFN(hsaKmtPmcUnregisterTrace) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcAcquireTraceAccess) = (HSAKMT_DEF(hsaKmtPmcAcquireTraceAccess)*)dlsym(dtif_handle, "hsaKmtPmcAcquireTraceAccess");
      if (HSAKMT_PFN(hsaKmtPmcAcquireTraceAccess) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcReleaseTraceAccess) = (HSAKMT_DEF(hsaKmtPmcReleaseTraceAccess)*)dlsym(dtif_handle, "hsaKmtPmcReleaseTraceAccess");
      if (HSAKMT_PFN(hsaKmtPmcReleaseTraceAccess) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcStartTrace) = (HSAKMT_DEF(hsaKmtPmcStartTrace)*)dlsym(dtif_handle, "hsaKmtPmcStartTrace");
      if (HSAKMT_PFN(hsaKmtPmcStartTrace) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcQueryTrace) = (HSAKMT_DEF(hsaKmtPmcQueryTrace)*)dlsym(dtif_handle, "hsaKmtPmcQueryTrace");
      if (HSAKMT_PFN(hsaKmtPmcQueryTrace) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPmcStopTrace) = (HSAKMT_DEF(hsaKmtPmcStopTrace)*)dlsym(dtif_handle, "hsaKmtPmcStopTrace");
      if (HSAKMT_PFN(hsaKmtPmcStopTrace) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtMapGraphicHandle) = (HSAKMT_DEF(hsaKmtMapGraphicHandle)*)dlsym(dtif_handle, "hsaKmtMapGraphicHandle");
      if (HSAKMT_PFN(hsaKmtMapGraphicHandle) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtUnmapGraphicHandle) = (HSAKMT_DEF(hsaKmtUnmapGraphicHandle)*)dlsym(dtif_handle, "hsaKmtUnmapGraphicHandle");
      if (HSAKMT_PFN(hsaKmtUnmapGraphicHandle) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSetTrapHandler) = (HSAKMT_DEF(hsaKmtSetTrapHandler)*)dlsym(dtif_handle, "hsaKmtSetTrapHandler");
      if (HSAKMT_PFN(hsaKmtSetTrapHandler) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetTileConfig) = (HSAKMT_DEF(hsaKmtGetTileConfig)*)dlsym(dtif_handle, "hsaKmtGetTileConfig");
      if (HSAKMT_PFN(hsaKmtGetTileConfig) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtQueryPointerInfo) = (HSAKMT_DEF(hsaKmtQueryPointerInfo)*)dlsym(dtif_handle, "hsaKmtQueryPointerInfo");
      if (HSAKMT_PFN(hsaKmtQueryPointerInfo) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSetMemoryUserData) = (HSAKMT_DEF(hsaKmtSetMemoryUserData)*)dlsym(dtif_handle, "hsaKmtSetMemoryUserData");
      if (HSAKMT_PFN(hsaKmtSetMemoryUserData) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetQueueInfo) = (HSAKMT_DEF(hsaKmtGetQueueInfo)*)dlsym(dtif_handle, "hsaKmtGetQueueInfo");
      if (HSAKMT_PFN(hsaKmtGetQueueInfo) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtAllocQueueGWS) = (HSAKMT_DEF(hsaKmtAllocQueueGWS)*)dlsym(dtif_handle, "hsaKmtAllocQueueGWS");
      if (HSAKMT_PFN(hsaKmtAllocQueueGWS) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRuntimeEnable) = (HSAKMT_DEF(hsaKmtRuntimeEnable)*)dlsym(dtif_handle, "hsaKmtRuntimeEnable");
      if (HSAKMT_PFN(hsaKmtRuntimeEnable) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtRuntimeDisable) = (HSAKMT_DEF(hsaKmtRuntimeDisable)*)dlsym(dtif_handle, "hsaKmtRuntimeDisable");
      if (HSAKMT_PFN(hsaKmtRuntimeDisable) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtCheckRuntimeDebugSupport) = (HSAKMT_DEF(hsaKmtCheckRuntimeDebugSupport)*)dlsym(dtif_handle, "hsaKmtCheckRuntimeDebugSupport");
      if (HSAKMT_PFN(hsaKmtCheckRuntimeDebugSupport) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetRuntimeCapabilities) = (HSAKMT_DEF(hsaKmtGetRuntimeCapabilities)*)dlsym(dtif_handle, "hsaKmtGetRuntimeCapabilities");
      if (HSAKMT_PFN(hsaKmtGetRuntimeCapabilities) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtDebugTrapIoctl) = (HSAKMT_DEF(hsaKmtDebugTrapIoctl)*)dlsym(dtif_handle, "hsaKmtDebugTrapIoctl");
      if (HSAKMT_PFN(hsaKmtDebugTrapIoctl) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSPMAcquire) = (HSAKMT_DEF(hsaKmtSPMAcquire)*)dlsym(dtif_handle, "hsaKmtSPMAcquire");
      if (HSAKMT_PFN(hsaKmtSPMAcquire) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSPMRelease) = (HSAKMT_DEF(hsaKmtSPMRelease)*)dlsym(dtif_handle, "hsaKmtSPMRelease");
      if (HSAKMT_PFN(hsaKmtSPMRelease) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSPMSetDestBuffer) = (HSAKMT_DEF(hsaKmtSPMSetDestBuffer)*)dlsym(dtif_handle, "hsaKmtSPMSetDestBuffer");
      if (HSAKMT_PFN(hsaKmtSPMSetDestBuffer) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSVMSetAttr) = (HSAKMT_DEF(hsaKmtSVMSetAttr)*)dlsym(dtif_handle, "hsaKmtSVMSetAttr");
      if (HSAKMT_PFN(hsaKmtSVMSetAttr) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSVMGetAttr) = (HSAKMT_DEF(hsaKmtSVMGetAttr)*)dlsym(dtif_handle, "hsaKmtSVMGetAttr");
      if (HSAKMT_PFN(hsaKmtSVMGetAttr) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtSetXNACKMode) = (HSAKMT_DEF(hsaKmtSetXNACKMode)*)dlsym(dtif_handle, "hsaKmtSetXNACKMode");
      if (HSAKMT_PFN(hsaKmtSetXNACKMode) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetXNACKMode) = (HSAKMT_DEF(hsaKmtGetXNACKMode)*)dlsym(dtif_handle, "hsaKmtGetXNACKMode");
      if (HSAKMT_PFN(hsaKmtGetXNACKMode) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtOpenSMI) = (HSAKMT_DEF(hsaKmtOpenSMI)*)dlsym(dtif_handle, "hsaKmtOpenSMI");
      if (HSAKMT_PFN(hsaKmtOpenSMI) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtExportDMABufHandle) = (HSAKMT_DEF(hsaKmtExportDMABufHandle)*)dlsym(dtif_handle, "hsaKmtExportDMABufHandle");
      if (HSAKMT_PFN(hsaKmtExportDMABufHandle) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtWaitOnEvent_Ext) = (HSAKMT_DEF(hsaKmtWaitOnEvent_Ext)*)dlsym(dtif_handle, "hsaKmtWaitOnEvent_Ext");
      if (HSAKMT_PFN(hsaKmtWaitOnEvent_Ext) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtWaitOnMultipleEvents_Ext) = (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents_Ext)*)dlsym(dtif_handle, "hsaKmtWaitOnMultipleEvents_Ext");
      if (HSAKMT_PFN(hsaKmtWaitOnMultipleEvents_Ext) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtReplaceAsanHeaderPage) = (HSAKMT_DEF(hsaKmtReplaceAsanHeaderPage)*)dlsym(dtif_handle, "hsaKmtReplaceAsanHeaderPage");
      if (HSAKMT_PFN(hsaKmtReplaceAsanHeaderPage) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtReturnAsanHeaderPage) = (HSAKMT_DEF(hsaKmtReturnAsanHeaderPage)*)dlsym(dtif_handle, "hsaKmtReturnAsanHeaderPage");
      if (HSAKMT_PFN(hsaKmtReturnAsanHeaderPage) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtGetAMDGPUDeviceHandle) = (HSAKMT_DEF(hsaKmtGetAMDGPUDeviceHandle)*)dlsym(dtif_handle, "hsaKmtGetAMDGPUDeviceHandle");
      if (HSAKMT_PFN(hsaKmtGetAMDGPUDeviceHandle) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPcSamplingQueryCapabilities) = (HSAKMT_DEF(hsaKmtPcSamplingQueryCapabilities)*)dlsym(dtif_handle, "hsaKmtPcSamplingQueryCapabilities");
      if (HSAKMT_PFN(hsaKmtPcSamplingQueryCapabilities) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPcSamplingCreate) = (HSAKMT_DEF(hsaKmtPcSamplingCreate)*)dlsym(dtif_handle, "hsaKmtPcSamplingCreate");
      if (HSAKMT_PFN(hsaKmtPcSamplingCreate) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPcSamplingDestroy) = (HSAKMT_DEF(hsaKmtPcSamplingDestroy)*)dlsym(dtif_handle, "hsaKmtPcSamplingDestroy");
      if (HSAKMT_PFN(hsaKmtPcSamplingDestroy) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPcSamplingStart) = (HSAKMT_DEF(hsaKmtPcSamplingStart)*)dlsym(dtif_handle, "hsaKmtPcSamplingStart");
      if (HSAKMT_PFN(hsaKmtPcSamplingStart) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPcSamplingStop) = (HSAKMT_DEF(hsaKmtPcSamplingStop)*)dlsym(dtif_handle, "hsaKmtPcSamplingStop");
      if (HSAKMT_PFN(hsaKmtPcSamplingStop) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtPcSamplingSupport) = (HSAKMT_DEF(hsaKmtPcSamplingSupport)*)dlsym(dtif_handle, "hsaKmtPcSamplingSupport");
      if (HSAKMT_PFN(hsaKmtPcSamplingSupport) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtModelEnabled) = (HSAKMT_DEF(hsaKmtModelEnabled)*)dlsym(dtif_handle, "hsaKmtModelEnabled");
      if (HSAKMT_PFN(hsaKmtModelEnabled) == NULL) goto ERROR;

      HSAKMT_PFN(hsaKmtQueueRingDoorbell) = (HSAKMT_DEF(hsaKmtQueueRingDoorbell)*)dlsym(dtif_handle, "hsaKmtQueueRingDoorbell");
      if (HSAKMT_PFN(hsaKmtQueueRingDoorbell) == NULL) goto ERROR;

      DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)dlsym(dtif_handle, "amdgpu_device_initialize");
      if (DRM_PFN(amdgpu_device_initialize) == NULL) goto ERROR;

      DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)dlsym(dtif_handle, "amdgpu_device_deinitialize");
      if (DRM_PFN(amdgpu_device_deinitialize) == NULL) goto ERROR;

      DRM_PFN(amdgpu_query_gpu_info) = (DRM_DEF(amdgpu_query_gpu_info)*)dlsym(dtif_handle, "amdgpu_query_gpu_info");
      if (DRM_PFN(amdgpu_query_gpu_info) == NULL) goto ERROR;

      DRM_PFN(amdgpu_bo_cpu_map) = (DRM_DEF(amdgpu_bo_cpu_map)*)dlsym(dtif_handle, "amdgpu_bo_cpu_map");
      if (DRM_PFN(amdgpu_bo_cpu_map) == NULL) goto ERROR;

      DRM_PFN(amdgpu_bo_free) = (DRM_DEF(amdgpu_bo_free)*)dlsym(dtif_handle, "amdgpu_bo_free");
      if (DRM_PFN(amdgpu_bo_free) == NULL) goto ERROR;

      DRM_PFN(amdgpu_bo_export) = (DRM_DEF(amdgpu_bo_export)*)dlsym(dtif_handle, "amdgpu_bo_export");
      if (DRM_PFN(amdgpu_bo_export) == NULL) goto ERROR;

      DRM_PFN(amdgpu_bo_import) = (DRM_DEF(amdgpu_bo_import)*)dlsym(dtif_handle, "amdgpu_bo_import");
      if (DRM_PFN(amdgpu_bo_import) == NULL) goto ERROR;

      DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)dlsym(dtif_handle, "amdgpu_bo_va_op");
      if (DRM_PFN(amdgpu_bo_va_op) == NULL) goto ERROR;

      DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)dlsym(dtif_handle, "drmCommandWriteRead");
      if (DRM_PFN(drmCommandWriteRead) == NULL) goto ERROR;

      debug_print("Load all DTIF APIs OK!\n");
      return;

ERROR:
      fprintf(stderr, "dlsym failed: %s\n", dlerror());
    } else {
      HSAKMT_PFN(hsaKmtOpenKFD) = (HSAKMT_DEF(hsaKmtOpenKFD)*)(&hsaKmtOpenKFD);
      HSAKMT_PFN(hsaKmtCloseKFD) = (HSAKMT_DEF(hsaKmtCloseKFD)*)(&hsaKmtCloseKFD);
      HSAKMT_PFN(hsaKmtGetVersion) = (HSAKMT_DEF(hsaKmtGetVersion)*)(&hsaKmtGetVersion);
      HSAKMT_PFN(hsaKmtAcquireSystemProperties) = (HSAKMT_DEF(hsaKmtAcquireSystemProperties)*)(&hsaKmtAcquireSystemProperties);
      HSAKMT_PFN(hsaKmtReleaseSystemProperties) = (HSAKMT_DEF(hsaKmtReleaseSystemProperties)*)(&hsaKmtReleaseSystemProperties);
      HSAKMT_PFN(hsaKmtGetNodeProperties) = (HSAKMT_DEF(hsaKmtGetNodeProperties)*)(&hsaKmtGetNodeProperties);
      HSAKMT_PFN(hsaKmtGetNodeMemoryProperties) = (HSAKMT_DEF(hsaKmtGetNodeMemoryProperties)*)(&hsaKmtGetNodeMemoryProperties);
      HSAKMT_PFN(hsaKmtGetNodeCacheProperties) = (HSAKMT_DEF(hsaKmtGetNodeCacheProperties)*)(&hsaKmtGetNodeCacheProperties);
      HSAKMT_PFN(hsaKmtGetNodeIoLinkProperties) = (HSAKMT_DEF(hsaKmtGetNodeIoLinkProperties)*)(&hsaKmtGetNodeIoLinkProperties);
      HSAKMT_PFN(hsaKmtCreateEvent) = (HSAKMT_DEF(hsaKmtCreateEvent)*)(&hsaKmtCreateEvent);
      HSAKMT_PFN(hsaKmtDestroyEvent) = (HSAKMT_DEF(hsaKmtDestroyEvent)*)(&hsaKmtDestroyEvent);
      HSAKMT_PFN(hsaKmtSetEvent) = (HSAKMT_DEF(hsaKmtSetEvent)*)(&hsaKmtSetEvent);
      HSAKMT_PFN(hsaKmtResetEvent) = (HSAKMT_DEF(hsaKmtResetEvent)*)(&hsaKmtResetEvent);
      HSAKMT_PFN(hsaKmtQueryEventState) = (HSAKMT_DEF(hsaKmtQueryEventState)*)(&hsaKmtQueryEventState);
      HSAKMT_PFN(hsaKmtWaitOnEvent) = (HSAKMT_DEF(hsaKmtWaitOnEvent)*)(&hsaKmtWaitOnEvent);
      HSAKMT_PFN(hsaKmtWaitOnMultipleEvents) = (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents)*)(&hsaKmtWaitOnMultipleEvents);
      HSAKMT_PFN(hsaKmtCreateQueue) = (HSAKMT_DEF(hsaKmtCreateQueue)*)(&hsaKmtCreateQueue);
      HSAKMT_PFN(hsaKmtCreateQueueExt) = (HSAKMT_DEF(hsaKmtCreateQueueExt)*)(&hsaKmtCreateQueueExt);
      HSAKMT_PFN(hsaKmtUpdateQueue) = (HSAKMT_DEF(hsaKmtUpdateQueue)*)(&hsaKmtUpdateQueue);
      HSAKMT_PFN(hsaKmtDestroyQueue) = (HSAKMT_DEF(hsaKmtDestroyQueue)*)(&hsaKmtDestroyQueue);
      HSAKMT_PFN(hsaKmtSetQueueCUMask) = (HSAKMT_DEF(hsaKmtSetQueueCUMask)*)(&hsaKmtSetQueueCUMask);
      HSAKMT_PFN(hsaKmtSetMemoryPolicy) = (HSAKMT_DEF(hsaKmtSetMemoryPolicy)*)(&hsaKmtSetMemoryPolicy);
      HSAKMT_PFN(hsaKmtAllocMemory) = (HSAKMT_DEF(hsaKmtAllocMemory)*)(&hsaKmtAllocMemory);
      HSAKMT_PFN(hsaKmtAllocMemoryAlign) = (HSAKMT_DEF(hsaKmtAllocMemoryAlign)*)(&hsaKmtAllocMemoryAlign);
      HSAKMT_PFN(hsaKmtFreeMemory) = (HSAKMT_DEF(hsaKmtFreeMemory)*)(&hsaKmtFreeMemory);
      HSAKMT_PFN(hsaKmtAvailableMemory) = (HSAKMT_DEF(hsaKmtAvailableMemory)*)(&hsaKmtAvailableMemory);
      HSAKMT_PFN(hsaKmtRegisterMemory) = (HSAKMT_DEF(hsaKmtRegisterMemory)*)(&hsaKmtRegisterMemory);
      HSAKMT_PFN(hsaKmtRegisterMemoryToNodes) = (HSAKMT_DEF(hsaKmtRegisterMemoryToNodes)*)(&hsaKmtRegisterMemoryToNodes);
      HSAKMT_PFN(hsaKmtRegisterMemoryWithFlags) = (HSAKMT_DEF(hsaKmtRegisterMemoryWithFlags)*)(&hsaKmtRegisterMemoryWithFlags);
      HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodes) = (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodes)*)(&hsaKmtRegisterGraphicsHandleToNodes);
      HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodesExt) = (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodesExt)*)(&hsaKmtRegisterGraphicsHandleToNodesExt);
      HSAKMT_PFN(hsaKmtShareMemory) = (HSAKMT_DEF(hsaKmtShareMemory)*)(&hsaKmtShareMemory);
      HSAKMT_PFN(hsaKmtRegisterSharedHandle) = (HSAKMT_DEF(hsaKmtRegisterSharedHandle)*)(&hsaKmtRegisterSharedHandle);
      HSAKMT_PFN(hsaKmtRegisterSharedHandleToNodes) = (HSAKMT_DEF(hsaKmtRegisterSharedHandleToNodes)*)(&hsaKmtRegisterSharedHandleToNodes);
      HSAKMT_PFN(hsaKmtProcessVMRead) = (HSAKMT_DEF(hsaKmtProcessVMRead)*)(&hsaKmtProcessVMRead);
      HSAKMT_PFN(hsaKmtProcessVMWrite) = (HSAKMT_DEF(hsaKmtProcessVMWrite)*)(&hsaKmtProcessVMWrite);
      HSAKMT_PFN(hsaKmtDeregisterMemory) = (HSAKMT_DEF(hsaKmtDeregisterMemory)*)(&hsaKmtDeregisterMemory);
      HSAKMT_PFN(hsaKmtMapMemoryToGPU) = (HSAKMT_DEF(hsaKmtMapMemoryToGPU)*)(&hsaKmtMapMemoryToGPU);
      HSAKMT_PFN(hsaKmtMapMemoryToGPUNodes) = (HSAKMT_DEF(hsaKmtMapMemoryToGPUNodes)*)(&hsaKmtMapMemoryToGPUNodes);
      HSAKMT_PFN(hsaKmtUnmapMemoryToGPU) = (HSAKMT_DEF(hsaKmtUnmapMemoryToGPU)*)(&hsaKmtUnmapMemoryToGPU);
      HSAKMT_PFN(hsaKmtDbgRegister) = (HSAKMT_DEF(hsaKmtDbgRegister)*)(&hsaKmtDbgRegister);
      HSAKMT_PFN(hsaKmtDbgUnregister) = (HSAKMT_DEF(hsaKmtDbgUnregister)*)(&hsaKmtDbgUnregister);
      HSAKMT_PFN(hsaKmtDbgWavefrontControl) = (HSAKMT_DEF(hsaKmtDbgWavefrontControl)*)(&hsaKmtDbgWavefrontControl);
      HSAKMT_PFN(hsaKmtDbgAddressWatch) = (HSAKMT_DEF(hsaKmtDbgAddressWatch)*)(&hsaKmtDbgAddressWatch);
      HSAKMT_PFN(hsaKmtDbgEnable) = (HSAKMT_DEF(hsaKmtDbgEnable)*)(&hsaKmtDbgEnable);
      HSAKMT_PFN(hsaKmtDbgDisable) = (HSAKMT_DEF(hsaKmtDbgDisable)*)(&hsaKmtDbgDisable);
      HSAKMT_PFN(hsaKmtDbgGetDeviceData) = (HSAKMT_DEF(hsaKmtDbgGetDeviceData)*)(&hsaKmtDbgGetDeviceData);
      HSAKMT_PFN(hsaKmtDbgGetQueueData) = (HSAKMT_DEF(hsaKmtDbgGetQueueData)*)(&hsaKmtDbgGetQueueData);
      HSAKMT_PFN(hsaKmtGetClockCounters) = (HSAKMT_DEF(hsaKmtGetClockCounters)*)(&hsaKmtGetClockCounters);
      HSAKMT_PFN(hsaKmtPmcGetCounterProperties) = (HSAKMT_DEF(hsaKmtPmcGetCounterProperties)*)(&hsaKmtPmcGetCounterProperties);
      HSAKMT_PFN(hsaKmtPmcRegisterTrace) = (HSAKMT_DEF(hsaKmtPmcRegisterTrace)*)(&hsaKmtPmcRegisterTrace);
      HSAKMT_PFN(hsaKmtPmcUnregisterTrace) = (HSAKMT_DEF(hsaKmtPmcUnregisterTrace)*)(&hsaKmtPmcUnregisterTrace);
      HSAKMT_PFN(hsaKmtPmcAcquireTraceAccess) = (HSAKMT_DEF(hsaKmtPmcAcquireTraceAccess)*)(&hsaKmtPmcAcquireTraceAccess);
      HSAKMT_PFN(hsaKmtPmcReleaseTraceAccess) = (HSAKMT_DEF(hsaKmtPmcReleaseTraceAccess)*)(&hsaKmtPmcReleaseTraceAccess);
      HSAKMT_PFN(hsaKmtPmcStartTrace) = (HSAKMT_DEF(hsaKmtPmcStartTrace)*)(&hsaKmtPmcStartTrace);
      HSAKMT_PFN(hsaKmtPmcQueryTrace) = (HSAKMT_DEF(hsaKmtPmcQueryTrace)*)(&hsaKmtPmcQueryTrace);
      HSAKMT_PFN(hsaKmtPmcStopTrace) = (HSAKMT_DEF(hsaKmtPmcStopTrace)*)(&hsaKmtPmcStopTrace);
      HSAKMT_PFN(hsaKmtMapGraphicHandle) = (HSAKMT_DEF(hsaKmtMapGraphicHandle)*)(&hsaKmtMapGraphicHandle);
      HSAKMT_PFN(hsaKmtUnmapGraphicHandle) = (HSAKMT_DEF(hsaKmtUnmapGraphicHandle)*)(&hsaKmtUnmapGraphicHandle);
      HSAKMT_PFN(hsaKmtSetTrapHandler) = (HSAKMT_DEF(hsaKmtSetTrapHandler)*)(&hsaKmtSetTrapHandler);
      HSAKMT_PFN(hsaKmtGetTileConfig) = (HSAKMT_DEF(hsaKmtGetTileConfig)*)(&hsaKmtGetTileConfig);
      HSAKMT_PFN(hsaKmtQueryPointerInfo) = (HSAKMT_DEF(hsaKmtQueryPointerInfo)*)(&hsaKmtQueryPointerInfo);
      HSAKMT_PFN(hsaKmtSetMemoryUserData) = (HSAKMT_DEF(hsaKmtSetMemoryUserData)*)(&hsaKmtSetMemoryUserData);
      HSAKMT_PFN(hsaKmtGetQueueInfo) = (HSAKMT_DEF(hsaKmtGetQueueInfo)*)(&hsaKmtGetQueueInfo);
      HSAKMT_PFN(hsaKmtAllocQueueGWS) = (HSAKMT_DEF(hsaKmtAllocQueueGWS)*)(&hsaKmtAllocQueueGWS);
      HSAKMT_PFN(hsaKmtRuntimeEnable) = (HSAKMT_DEF(hsaKmtRuntimeEnable)*)(&hsaKmtRuntimeEnable);
      HSAKMT_PFN(hsaKmtRuntimeDisable) = (HSAKMT_DEF(hsaKmtRuntimeDisable)*)(&hsaKmtRuntimeDisable);
      HSAKMT_PFN(hsaKmtCheckRuntimeDebugSupport) = (HSAKMT_DEF(hsaKmtCheckRuntimeDebugSupport)*)(&hsaKmtCheckRuntimeDebugSupport);
      HSAKMT_PFN(hsaKmtGetRuntimeCapabilities) = (HSAKMT_DEF(hsaKmtGetRuntimeCapabilities)*)(&hsaKmtGetRuntimeCapabilities);
      HSAKMT_PFN(hsaKmtDebugTrapIoctl) = (HSAKMT_DEF(hsaKmtDebugTrapIoctl)*)(&hsaKmtDebugTrapIoctl);
      HSAKMT_PFN(hsaKmtSPMAcquire) = (HSAKMT_DEF(hsaKmtSPMAcquire)*)(&hsaKmtSPMAcquire);
      HSAKMT_PFN(hsaKmtSPMRelease) = (HSAKMT_DEF(hsaKmtSPMRelease)*)(&hsaKmtSPMRelease);
      HSAKMT_PFN(hsaKmtSPMSetDestBuffer) = (HSAKMT_DEF(hsaKmtSPMSetDestBuffer)*)(&hsaKmtSPMSetDestBuffer);
      HSAKMT_PFN(hsaKmtSVMSetAttr) = (HSAKMT_DEF(hsaKmtSVMSetAttr)*)(&hsaKmtSVMSetAttr);
      HSAKMT_PFN(hsaKmtSVMGetAttr) = (HSAKMT_DEF(hsaKmtSVMGetAttr)*)(&hsaKmtSVMGetAttr);
      HSAKMT_PFN(hsaKmtSetXNACKMode) = (HSAKMT_DEF(hsaKmtSetXNACKMode)*)(&hsaKmtSetXNACKMode);
      HSAKMT_PFN(hsaKmtGetXNACKMode) = (HSAKMT_DEF(hsaKmtGetXNACKMode)*)(&hsaKmtGetXNACKMode);
      HSAKMT_PFN(hsaKmtOpenSMI) = (HSAKMT_DEF(hsaKmtOpenSMI)*)(&hsaKmtOpenSMI);
      HSAKMT_PFN(hsaKmtExportDMABufHandle) = (HSAKMT_DEF(hsaKmtExportDMABufHandle)*)(&hsaKmtExportDMABufHandle);
      HSAKMT_PFN(hsaKmtWaitOnEvent_Ext) = (HSAKMT_DEF(hsaKmtWaitOnEvent_Ext)*)(&hsaKmtWaitOnEvent_Ext);
      HSAKMT_PFN(hsaKmtWaitOnMultipleEvents_Ext) = (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents_Ext)*)(&hsaKmtWaitOnMultipleEvents_Ext);
      HSAKMT_PFN(hsaKmtReplaceAsanHeaderPage) = (HSAKMT_DEF(hsaKmtReplaceAsanHeaderPage)*)(&hsaKmtReplaceAsanHeaderPage);
      HSAKMT_PFN(hsaKmtReturnAsanHeaderPage) = (HSAKMT_DEF(hsaKmtReturnAsanHeaderPage)*)(&hsaKmtReturnAsanHeaderPage);
      HSAKMT_PFN(hsaKmtGetAMDGPUDeviceHandle) = (HSAKMT_DEF(hsaKmtGetAMDGPUDeviceHandle)*)(&hsaKmtGetAMDGPUDeviceHandle);
      HSAKMT_PFN(hsaKmtPcSamplingQueryCapabilities) = (HSAKMT_DEF(hsaKmtPcSamplingQueryCapabilities)*)(&hsaKmtPcSamplingQueryCapabilities);
      HSAKMT_PFN(hsaKmtPcSamplingCreate) = (HSAKMT_DEF(hsaKmtPcSamplingCreate)*)(&hsaKmtPcSamplingCreate);
      HSAKMT_PFN(hsaKmtPcSamplingDestroy) = (HSAKMT_DEF(hsaKmtPcSamplingDestroy)*)(&hsaKmtPcSamplingDestroy);
      HSAKMT_PFN(hsaKmtPcSamplingStart) = (HSAKMT_DEF(hsaKmtPcSamplingStart)*)(&hsaKmtPcSamplingStart);
      HSAKMT_PFN(hsaKmtPcSamplingStop) = (HSAKMT_DEF(hsaKmtPcSamplingStop)*)(&hsaKmtPcSamplingStop);
      HSAKMT_PFN(hsaKmtPcSamplingSupport) = (HSAKMT_DEF(hsaKmtPcSamplingSupport)*)(&hsaKmtPcSamplingSupport);
      HSAKMT_PFN(hsaKmtModelEnabled) = (HSAKMT_DEF(hsaKmtModelEnabled)*)(&hsaKmtModelEnabled);

      DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)(&amdgpu_device_initialize);
      DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)(&amdgpu_device_deinitialize);
      DRM_PFN(amdgpu_query_gpu_info) = (DRM_DEF(amdgpu_query_gpu_info)*)(&amdgpu_query_gpu_info);
      DRM_PFN(amdgpu_bo_cpu_map) = (DRM_DEF(amdgpu_bo_cpu_map)*)(&amdgpu_bo_cpu_map);
      DRM_PFN(amdgpu_bo_free) = (DRM_DEF(amdgpu_bo_free)*)(&amdgpu_bo_free);
      DRM_PFN(amdgpu_bo_export) = (DRM_DEF(amdgpu_bo_export)*)(&amdgpu_bo_export);
      DRM_PFN(amdgpu_bo_import) = (DRM_DEF(amdgpu_bo_import)*)(&amdgpu_bo_import);
      DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)(&amdgpu_bo_va_op);
      DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)(&drmCommandWriteRead);
    }
  }

  bool ThunkLoader::CreateThunkInstance() {
    if (!core::Runtime::runtime_singleton_->flag().enable_dtif())
      return true;

    DtifCreateFunc* pfnDtifCreate = (DtifCreateFunc*)dlsym(dtif_handle, "DtifCreate");
    if (pfnDtifCreate != NULL) {
      if (pfnDtifCreate("HSA") != NULL) {
        debug_print("DtifCreate OK!\n");
        return true;
      } else {
        debug_print("DtifCreate failed!\n");
        return false;
      }
    }
    return false;
  }

  bool ThunkLoader::DestroyThunkInstance() {
    if (!core::Runtime::runtime_singleton_->flag().enable_dtif())
      return true;

    if (dtif_handle == NULL)
      return false;

    DtifDestroyFunc* pfnDtifDestroy = (DtifDestroyFunc*)dlsym(dtif_handle, "DtifDestroy");
    if (pfnDtifDestroy != NULL) {
      pfnDtifDestroy();
      debug_print("DtifDestroy OK!\n");
      return true;
    }
    return false;
  }
}   //  namespace core
}   //  namespace rocr


================================================
FILE: runtime/hsa-runtime/core/runtime/trap_handler/CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

cmake_minimum_required ( VERSION 3.7 )

# Import target 'clang' and 'llvm-objcopy'
find_package(Clang REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )
find_package(LLVM REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )

set (TARGET_DEVS "gfx900;gfx942;gfx950;gfx1010;gfx1030;gfx1100;gfx1200")
set (POSTFIX "9;942;950;1010;10;11;12")
set (SOURCE_SUFFIX ";;;;;;_gfx12")

if(${CMAKE_VERBOSE_MAKEFILE})
  get_property(clang_path TARGET clang PROPERTY LOCATION)
  get_property(objcopy_path TARGET llvm-objcopy PROPERTY LOCATION)
  message("Using clang from: ${clang_path}")
  message("Using llvm-objcopy from: ${objcopy_path}")
  message("Trap handlers assembled for: ${TARGET_DEVS}")
endif()

##==========================================
##  Add custom command to generate a kernel code object file
##==========================================
function(gen_kernel_bc TARGET_ID INPUT_FILE OUTPUT_FILE)

  set(CODE_OBJECT "${OUTPUT_FILE}.hsaco")

  separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
  "-x assembler -target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${CODE_OBJECT} ${INPUT_FILE}")

  ## Add custom command to produce a code object file.
  add_custom_command(OUTPUT ${CODE_OBJECT} COMMAND clang ${CLANG_ARG_LIST}
    DEPENDS ${INPUT_FILE} clang
    COMMENT "BUILDING bitcode for ${OUTPUT_FILE}..."
    VERBATIM)

  separate_arguments(OBJCOPY_ARG_LIST UNIX_COMMAND "--dump-section=.text=${OUTPUT_FILE} ${CODE_OBJECT}")

  ## Extract .text segment
  add_custom_command(OUTPUT ${OUTPUT_FILE}
                     COMMAND llvm-objcopy ${OBJCOPY_ARG_LIST}
                     DEPENDS ${CODE_OBJECT} llvm-objcopy
                     COMMENT "Extracting binary for ${OUTPUT_FILE}..."
                     VERBATIM)

  if(${CMAKE_VERBOSE_MAKEFILE})
    message("     Trap Handler Source: " ${INPUT_FILE})
    message("     Trap Handler Binary: " ${OUTPUT_FILE})
  endif()

endfunction(gen_kernel_bc)

##==========================================
## Find device code object name and forward to custom command
##==========================================
function(build_kernel TRAP_HANDLER_NAME TARGET_ID POSTFIX SOURCE_SUFFIX)

  ## generate trap handler object code files
  set (CODE_OBJECT_FILE "${TRAP_HANDLER_NAME}_${POSTFIX}")
  set (TRAP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/trap_handler${SOURCE_SUFFIX}.s")
  gen_kernel_bc(${TARGET_ID} ${TRAP_FILE} ${CODE_OBJECT_FILE})

  ## Build a list of code object file names
  ## These will be target dependencies.
  set (HSACO_TARG_LIST ${HSACO_TARG_LIST} "${CODE_OBJECT_FILE}" PARENT_SCOPE)

endfunction(build_kernel)

##==========================================
## Build the kernel for a list of devices
##==========================================
function(build_kernel_for_devices TRAP_HANDLER_NAME)

  set(HSACO_TARG_LIST "")

  list(LENGTH TARGET_DEVS dev_count)
  math(EXPR dev_count "${dev_count} - 1")
  foreach(ind RANGE ${dev_count})
    list(GET TARGET_DEVS ${ind} dev)
    list(GET POSTFIX ${ind} post)
    list(GET SOURCE_SUFFIX ${ind} suffix)
    if(${CMAKE_VERBOSE_MAKEFILE})
      message("\n  Generating: ${dev} ...")
    endif()
    build_kernel(${TRAP_HANDLER_NAME} ${dev} ${post} "${suffix}")
  endforeach(ind)

  set(HSACO_TARG_LIST ${HSACO_TARG_LIST} PARENT_SCOPE)

endfunction(build_kernel_for_devices)

##==========================================
## Create Trap Handler Object Code blobs file
##==========================================
function(generate_bytecodeStrm HeaderFILE)

  separate_arguments(ARG_LIST UNIX_COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${HeaderFILE}.h")
  set(ARG_LIST ${ARG_LIST} ${HSACO_TARG_LIST})

  ## Add a custom command that generates amd_trap_handler_v2.h
  ## This depends on all the generated code object files and the C++ generator script.
  add_custom_command(OUTPUT ${HeaderFILE}.h
                     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/create_trap_handler_header.sh ${ARG_LIST}
                     COMMENT "Collating trap handlers..."
                     DEPENDS ${HSACO_TARG_LIST} create_trap_handler_header.sh )

  ## Export a target that builds (and depends on) amd_trap_handler_v2.h
  add_custom_target( ${HeaderFILE} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${HeaderFILE}.h )

endfunction(generate_bytecodeStrm)

##==========================================
## Main function calls
##==========================================

build_kernel_for_devices("kCodeTrapHandlerV2")
generate_bytecodeStrm("amd_trap_handler_v2")


================================================
FILE: runtime/hsa-runtime/core/runtime/trap_handler/create_trap_handler_header.sh
================================================
#!/bin/bash -e
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

amd_gpu_shaders="$1"

if ! command -v xxd >/dev/null
then
    echo "xxd not found!"
    exit 1
fi

# Create the file in a temporary location and then move it in atomically
{
cat <<EOF
//==============================================================================
//  This file is automatically generated during build process, don't modify it
//==============================================================================

namespace rocr {
namespace AMD {

EOF

shift
for file in "$@"
do
xxd -i $file
    echo -e '\n'
done

cat <<EOF
} // namespace AMD
} // namespace rocr

EOF

} > "$amd_gpu_shaders"


================================================
FILE: runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

/// Trap Handler V2 source
.set SQ_WAVE_PC_HI_ADDRESS_MASK              , 0xFFFF
.set SQ_WAVE_PC_HI_HT_SHIFT                  , 24
.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT             , 16
.set SQ_WAVE_PC_HI_TRAP_ID_SIZE              , 8
.set SQ_WAVE_PC_HI_TRAP_ID_BFE               , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
.set SQ_WAVE_STATUS_HALT_SHIFT               , 13
.set SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT   , 18
.set SQ_WAVE_STATUS_HALT_BFE                 , (SQ_WAVE_STATUS_HALT_SHIFT | (1 << 16))
.set SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT          , 8
.set SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT      , 11
.set SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT       , 28
.set SQ_WAVE_TRAPSTS_MATH_EXCP               , 0x7F
.set SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT     , 26
.set SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT         , 22
.set SQ_WAVE_MODE_EXCP_EN_SHIFT              , 12
.set SQ_WAVE_MODE_EXCP_EN_SIZE               , 8
.set TRAP_ID_ABORT                           , 2
.set TRAP_ID_DEBUGTRAP                       , 3
.set DOORBELL_ID_SIZE                        , 10
.set DOORBELL_ID_MASK                        , ((1 << DOORBELL_ID_SIZE) - 1)
.set EC_QUEUE_WAVE_ABORT_M0                  , (1 << (DOORBELL_ID_SIZE + 0))
.set EC_QUEUE_WAVE_TRAP_M0                   , (1 << (DOORBELL_ID_SIZE + 1))
.set EC_QUEUE_WAVE_MATH_ERROR_M0             , (1 << (DOORBELL_ID_SIZE + 2))
.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0    , (1 << (DOORBELL_ID_SIZE + 3))
.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0       , (1 << (DOORBELL_ID_SIZE + 4))
.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0     , (1 << (DOORBELL_ID_SIZE + 5))

.set TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT    , 31
.set TTMP6_WAVE_STOPPED_SHIFT                , 30
.set TTMP6_SAVED_STATUS_HALT_SHIFT           , 29
.set TTMP6_SAVED_STATUS_HALT_MASK            , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
.set TTMP6_SAVED_TRAP_ID_SHIFT               , 25
.set TTMP6_SAVED_TRAP_ID_SIZE                , 4
.set TTMP6_SAVED_TRAP_ID_MASK                , (((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
.set TTMP6_SAVED_TRAP_ID_BFE                 , (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))

.set TTMP_PC_HI_SHIFT                        , 7
.set TTMP_DEBUG_ENABLED_SHIFT                , 23

.if .amdgcn.gfx_generation_number == 9
  .set TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT     , 26
  .set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT     , 15
  .set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x1F8000
.elseif .amdgcn.gfx_generation_number == 10 && .amdgcn.gfx_generation_minor < 3
  .set TTMP_SAVE_REPLAY_W64H_SHIFT           , 31
  .set TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT     , 24
  .set SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT      , 25
  .set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT     , 15
  .set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x3F8000
  .set SQ_WAVE_IB_STS_REPLAY_W64H_MASK       , 0x2000000
.endif

// Defining TTMP_REG1 and TTMP_REG2 for clarity in comments
// TTMP_REG1 means ttmp6 register if gfx>=942 and means ttmp13 register if gfx<942
// TTMP_REG2 means ttmp11 register if gfx>=942 and means ttmp6 register if gfx<942

.if .amdgcn.gfx_generation_number == 9
  .set TTMP11_TTMPS_SETUP_SHIFT              , 31

.if (.amdgcn.gfx_generation_minor >= 4)
  .set TTMP11_WAVE_IN_WG_MASK                , 0x3F

  // Bit to indicate that this is a stochastic trap
  .set TTMP13_PCS_IS_STOCHASTIC              , 21

  // Bit to indicate that this is a host trap
  .set TTMP13_PCS_IS_HOSTTRAP                , 22

.else

  // Bit to indicate that this is a host trap
  .set TTMP11_PCS_IS_HOSTTRAP                , 22
.endif
.endif

.if (.amdgcn.gfx_generation_number == 9)

 .macro S_LOAD_DWORD_PCS_TTMP_REG1 base, offset
  .if (.amdgcn.gfx_generation_minor >= 4)
     s_load_dword      ttmp6, \base, \offset
  .else
     s_load_dword      ttmp13,\base, \offset
  .endif
 .endm

 .macro S_BITSET0_B32_PCS_TTMP_REG2 bit_index
  .if (.amdgcn.gfx_generation_minor >= 4)
     s_bitset0_b32     ttmp11, \bit_index
  .else
     s_bitset0_b32     ttmp6, \bit_index
  .endif
 .endm

 .macro S_BITSET1_B32_PCS_TTMP_REG2 bit_index
  .if (.amdgcn.gfx_generation_minor >= 4)
     s_bitset1_b32    ttmp11, \bit_index
  .else
     s_bitset1_b32    ttmp6, \bit_index
  .endif
 .endm

 .macro S_CMP_GE_U32_PCS_TTMP_REG1 src0
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_cmp_ge_u32      \src0, ttmp6
  .else
    s_cmp_ge_u32      \src0, ttmp13
  .endif
 .endm

 .macro S_MOV_B32_SRC_PCS_TTMP_REG1 src0
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_mov_b32        ttmp6, \src0
  .else
    s_mov_b32        ttmp13, \src0
  .endif
 .endm

 .macro S_MOV_B32_DST_PCS_TTMP_REG1 dst
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_mov_b32       \dst, ttmp6
  .else
    s_mov_b32       \dst, ttmp13
  .endif
 .endm

 .macro S_LSHR_B32_PCS_TTMP_REG1_REG2 src1
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_lshr_b32       ttmp6, ttmp11, \src1
  .else
    s_lshr_b32       ttmp13, ttmp6, \src1
  .endif
 .endm

 .macro  S_STORE_DWORD_PCS_TTMP_REG1 base, offset
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_store_dword    ttmp6, \base, \offset
  .else
    s_store_dword    ttmp13, \base, \offset
  .endif
 .endm

 .macro S_MULK_I32_PCS_TTMP_REG1 const_val
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_mulk_i32       ttmp6, \const_val
  .else
    s_mulk_i32       ttmp13, \const_val
  .endif
 .endm

 .macro S_ADD_U32_PCS_TTMP_REG1  dst, src0
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_add_u32        \dst, \src0, ttmp6
  .else
    s_add_u32        \dst, \src0, ttmp13
  .endif
 .endm

 .macro S_CMP_LG_U32_PCS_TTMP_REG1 src0
  .if (.amdgcn.gfx_generation_minor >= 4)
    s_cmp_lg_u32     \src0, ttmp6
  .else
    s_cmp_lg_u32     \src0, ttmp13
  .endif
 .endm

.endif

// ABI between first and second level trap handler:
//   ttmp0  = PC[31:0]
//   ttmp8  = WorkgroupIdX
//   ttmp9  = WorkgroupIdY
//   ttmp10 = WorkgroupIdZ
//   ttmp12 = SQ_WAVE_STATUS
//   ttmp14 = TMA[31:0]
//   ttmp15 = TMA[63:32]
// gfx9:
//   ttmp1 = 0[2:0], PCRewind[3:0], HostTrap[0], TrapId[7:0], PC[47:32]
// For all gfx9 (except gfx940, gfx941, gfx942):
//   ttmp6 = 0[6:0], DispatchPktIndx[24:0]
//   ttmp11 = SQ_WAVE_IB_STS[20:15], 0[1:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveInWg[5:0]
//
// For gfx940/gfx941/gfx942:
//   ttmp11 = 0[0], DispatchPktIndx[24:0], WaveIdInWg[5:0]
//   ttmp13:
//       Bits 31:26 : SQ_WAVE_IB_STS[20:15] (1TH)
//            25:24 : 0 on 2TH entry. Used by 1st level TH but also
//                    free to be used in the 2nd level TH
//            23    : Debug Enabled (1TH)
//            22:0  : values are unspecified on 2TH entry. Free.
//
// gfx10:
//   ttmp1 = 0[0], PCRewind[5:0], HostTrap[0], TrapId[7:0], PC[47:32]
//
// gfx10/gfx11:
//   ttmp6 = 0[6:0], DispatchPktIndx[24:0]
//
// gfx1010:
//   ttmp11 = SQ_WAVE_IB_STS[25], SQ_WAVE_IB_STS[21:15], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0]
//
// gfx1030/gfx1100:
//   ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0]
//
// ttmp[14:15] points to TMA2; Available: ttmp[2:3], ttmp[4:5]
//
// ttmp7 : gfx9, gfx1010, gfx1030, gfx11 - 31:0 : PC[31:0]  (2TH, DBG);
//       : gfx940 - free;
//       : gfx12 - ttmp7 - 31:16 : workgroup_z[15:0]  (SPI) and 15:0 : workgroup_y[15:0]  (SPI)

trap_entry:
  // Extract trap_id from ttmp2
  s_bfe_u32                             ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
  s_cbranch_scc0                        .not_s_trap                      // If trap_id == 0, it's not an s_trap nor host trap

  // Check if the it was an host trap.
  s_bitcmp1_b32                         ttmp1, SQ_WAVE_PC_HI_HT_SHIFT
  s_cbranch_scc0                        .not_host_trap

.if (.amdgcn.gfx_generation_number == 9) // PC_SAMPLING_GFX9
  // ttmp[14:15] is TMA2; Available: ttmp[2:3], ttmp[4:5], ttmp7, TTMP_REG1
  // Check if this is a host-trap. For now, if so, that means we are sampling
  //
  // TMA2 layout:
  //   [0x00] out_buf_t* host_trap_buffers;
  //   [0x08] out_buf_t* stochastic_trap_buffers;
  //
  // --- Start profile trap handlers GFX9 --- //
  // If the wave entered the trap handler: 
  // If on gfx9:
  // - Check SQ_WAVE_PC_HI_HT_SHIFT bit on TTMP1 register to
  //   identify if it was a host trap.
  // If a host trap is detected:
  // - Mark TTMP13(gfx94x) or TTMP11(gfx9) hosttrap bit
  // - Load host_trap_buffers
  // - Branch to the profile trap handler logic.
  //
  // If on gfx9.4+:
  // - Check TRAPSTS bit 26 (SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT) to
  //   identify stochastic traps.
  // If a stochastic trap is detected:
  // - Set bit 21 in TTMP13 to indicate a stochastic trap.
  // - Branch to the profile trap handler logic.

  s_load_dwordx2                        ttmp[2:3], ttmp[14:15], 0 glc   // ttmp[14:15]=*host_trap_buffers
.if .amdgcn.gfx_generation_minor >= 4
  s_setreg_imm32_b32                    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT, 1), 0
  s_bitset0_b32                         ttmp13, TTMP13_PCS_IS_STOCHASTIC
  s_bitset1_b32                         ttmp13, TTMP13_PCS_IS_HOSTTRAP   // set bit 22 in TTMP13
.else
  s_bitset1_b32                         ttmp11, TTMP11_PCS_IS_HOSTTRAP    // Set bit 22 in TTMP11
.endif
  s_waitcnt                             lgkmcnt(0)
  s_mov_b64                             ttmp[14:15], ttmp[2:3]          //now ttmp[14:15] = host_trap_buffers
  s_branch                              .profile_trap_handlers_gfx9     // Off to the profile handlers
.else
  // Ignore host traps.  They should be masked by the driver anyway.
  s_branch .not_s_trap
.endif

.not_host_trap:
  // It's an s_trap; advance the PC
  s_add_u32                             ttmp0, ttmp0, 0x4
  s_addc_u32                            ttmp1, ttmp1, 0x0

  // If llvm.debugtrap and debugger is not attached.
  s_cmp_eq_u32                          ttmp2, TRAP_ID_DEBUGTRAP
  s_cbranch_scc0                        .no_skip_debugtrap
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor < 4) || .amdgcn.gfx_generation_number >= 10
  s_bitcmp0_b32                         ttmp11, TTMP_DEBUG_ENABLED_SHIFT
.else
  s_bitcmp0_b32                         ttmp13, TTMP_DEBUG_ENABLED_SHIFT
.endif
  s_cbranch_scc0                        .no_skip_debugtrap

  // Ignore llvm.debugtrap.
  s_branch                              .exit_trap

.not_s_trap:
.if .amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4
  //Check for stochastic trap on gfx9.4+
  s_getreg_b32                          ttmp7, hwreg(HW_REG_TRAPSTS)             // On gfx94x, TRAPSTS bit 26 ...
  s_bitcmp1_b32                         ttmp7, SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT   // is stochastic_sample_trap
  s_cbranch_scc0                        .no_skip_debugtrap

  // Handle stochastic trap
  s_setreg_imm32_b32                    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT, 1), 0
  s_load_dwordx2                        ttmp[2:3], ttmp[14:15], 0x8 glc // ttmp[14:15]=*stoch_trap_buf
  s_bitset0_b32                         ttmp13, TTMP13_PCS_IS_HOSTTRAP
  s_bitset1_b32                         ttmp13, TTMP13_PCS_IS_STOCHASTIC  // set bit 25 in TTMP13
  s_waitcnt                             lgkmcnt(0)
  s_mov_b64                             ttmp[14:15], ttmp[2:3]
  s_branch                              .profile_trap_handlers_gfx9      // Off to the profile handlers
.else
  s_branch                              .no_skip_debugtrap
.endif // PC_SAMPLING_GFX9

.if (.amdgcn.gfx_generation_number == 9) // PC_SAMPLING_GFX9
  // tma->host_trap_buffers Offsets:
  //    [0x00]  uint64_t buf_write_val;
  //    [0x08]  uint32_t buf_size;
  //    [0x0c]  uint32_t reserved0;
  //    [0x10]  uint32_t buf_written_val0;
  //    [0x14]  uint32_t buf_watermark0;
  //    [0x18]  hsa_signal_t done_sig0;
  //    [0x20]  uint32_t buf_written_val1;
  //    [0x24]  uint32_t buf_watermark1;
  //    [0x28]  hsa_signal_t done_sig1;
  //    [0x30]  uint8_t  reserved1[16];
  //    [0x40]  sample_t buffer0[buf_size];
  //    [0x40+(buf_size*sizeof(sample_t))]sample_t buffer1[buf_size];
  //
  //__global__ void profiling_trap_handler(out_buf_t* tma) {
  //  uint64_t local_entry = atomicAdd(&tma->buf_write_val, 1);
  //  int buf_to_use = local_entry >> 63;
  //  local_entry &= (ULLONG_MAX >> 1);
  //
  //  if (local_entry < tma->buf_size) {
  //    sample_t *buf_base = buf_to_use ? tma->buffer1 : tma->buffer0;
  //    fill_sample(&buf_base[local_entry]); // reads TTMP11 as well
  //
  //    uint32_t * written = buf_to_use ? &(tma->buf_written_val1) :
  //                                      &(tma->buf_written_val0);
  //
  //    uint64_t done = __atomic_fetch_add(&written, 1,
  //                memory_order_release, memory_scope_system);
  //
  //    uint32_t watermark = buf_to_use ? tma->buf_watermark0 :
  //                                      tma->buf_watermark1;
  //    if (done == watermark) {
  //       hsa_signal_t done_sig = buf_to_use ? tma->done_sig1 :
  //                                            tma->done_sig0;
  //       send_signal(done_sig);
  //    }
  //  }
  //}

  // ttmp[14:15] is tma->host_trap_buffers; Available: ttmp[2:3], ttmp[4:5], ttmp7, ttmp13
.profile_trap_handlers_gfx9:
  s_mov_b64                             ttmp[2:3], 1                    // atomic increment buf_write_val
  s_atomic_add_x2                       ttmp[2:3], ttmp[14:15], glc     // ttmp[2:3] = packed local_entry
  S_LOAD_DWORD_PCS_TTMP_REG1            ttmp[14:15], 0x8                // TTMP_REG1 = tma->buf_size
  s_waitcnt                             lgkmcnt(0)
  s_lshr_b32                            ttmp7, ttmp3, 31                // ttmp7 = buf_to_use
  S_BITSET0_B32_PCS_TTMP_REG2           31                              // clear out TTMP_REG2  bit31
  s_cmp_eq_u32                          ttmp7, 0                        // store off buf_to_use ...
  s_cbranch_scc1                        .skip_ttmp_set_gfx9             // into bit31 of TTMP_REG2
  S_BITSET1_B32_PCS_TTMP_REG2           31
.skip_ttmp_set_gfx9:
  s_bfe_u64                             ttmp[2:3], ttmp[2:3], (63<<16)  // ttmp[2:3] = new local_entry
  s_cmp_lg_u32                          ttmp3, 0                        // if entry >= 2^32, always lost
  s_cbranch_scc1                        .pc_sampling_exit
  S_CMP_GE_U32_PCS_TTMP_REG1            ttmp2                           // if local_entry >= buf_size
  s_cbranch_scc1                        .pc_sampling_exit

  // ttmp2=local_entry, ttmp7=buf_to_use (also in bit31 of TTMP_REG2), TTMP_REG1=buf_size
  // ttmp[14:15] is tma->host_trap_buffers. Available: ttmp3, ttmp[4:5]
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4)
  s_mul_i32                             ttmp6, ttmp6, ttmp7             // ttmp[4:5]=buf_size if ...
  s_mul_i32                             ttmp4, ttmp6, 0x40              // buf_to_use=1, 0 otherwise
  s_mul_hi_u32                          ttmp5, ttmp6, 0x40
.else
  s_mul_i32                             ttmp13, ttmp13, ttmp7           // ttmp[4:5]=buf_size if ...
  s_mul_i32                             ttmp4, ttmp13, 0x40             // buf_to_use=1, 0 otherwise
  s_mul_hi_u32                          ttmp5, ttmp13, 0x40
.endif

  s_add_u32                             ttmp4, ttmp4, 0x40              // now ttmp[4:5]=offset from ...
  s_addc_u32                            ttmp5, ttmp5, 0                 // tma to start of target buffer;
  s_add_u32                             ttmp4, ttmp14, ttmp4            // ttmp[4:5] now points to ...
  s_addc_u32                            ttmp5, ttmp15, ttmp5            // buffer0 or buffer1
  s_mov_b32                             ttmp7, ttmp2

 .if .amdgcn.gfx_generation_number == 9

 .if .amdgcn.gfx_generation_minor >= 4
  // Check if it's a stochastic trap
  s_bitcmp1_b32                         ttmp13, TTMP13_PCS_IS_STOCHASTIC
  s_cbranch_scc1                        .fill_sample_stochastic
  // Check if it's a host trap
  s_bitcmp1_b32                         ttmp13, TTMP13_PCS_IS_HOSTTRAP
  s_cbranch_scc1                        .fill_sample_hosttrap
.else
 // Check if it's a host trap
  s_bitcmp1_b32                         ttmp11, TTMP11_PCS_IS_HOSTTRAP
  s_cbranch_scc1                        .fill_sample_hosttrap

.endif
.endif
  // If neither bit is set, this is unexpected.
  // This branch is not expected to be taken.
  s_branch                              .no_skip_debugtrap

  // ttmp7 contains local_entry, ttmp[4:5] contains "&bufferX",
  // ttmp[14:15] holds 'tma->host_trap_buffers' pointer
  // ttmp[2:3] and ttmp13 are available for gathering perf sample info
  // ttmp[14:15] is live out

  // fill_sample(...) - begin //
  // typedef struct {
  // [0x00]  uint64_t pc;
  // [0x08]  uint64_t exec_mask;
  // [0x10]  uint32_t workgroup_id_x;
  // [0x14]  uint32_t workgroup_id_y;
  // [0x18]  uint32_t workgroup_id_z;
  // [0x1c]  uint32_t wave_in_wg : 6;
  //         uint32_t chiplet    : 3;    // Currently not used
  //         uint32_t reserved   : 23;
  // [0x20]  uint32_t hw_id;
  // [0x24]  uint32_t reserved0;
  // [0x28]  uint64_t reserved1;
  // [0x30]  uint64_t timestamp;
  // [0x38]  uint64_t correlation_id;
  // } perf_sample_hosttrap_v1_t;
  //
  // __device__ void fill_sample_hosttrap_v1(perf_sample_hosttrap_v1_t* buf) {
  //    buf->pc = ((ttmp1 & 0xffff) << 32) | ttmp0;
  //    buf->exec_mask = EXEC;
  //    buf->workgroup_id_x = ttmp8;
  //    buf->workgroup_id_y = ttmp9;
  //    buf->workgroup_id_z = ttmp10;
  //    buf->chiplet_and_wave_id = ttmp11 & 0x3f;
  //    buf->hw_id = s_getreg_b32(HW_REG_HW_ID);
  //    buf->timestamp = s_memrealtime;
  //    buf->correlation_id = get_correlation_id();
  // }
.fill_sample_hosttrap:
  s_mul_i32                             ttmp2, ttmp7, 0x40              // offset into buffer for 64B objects
  s_mul_hi_u32                          ttmp3, ttmp7, 0x40              // ttmp[2:3] will contain byte ...
  s_add_u32                             ttmp2, ttmp2, ttmp4
  s_addc_u32                            ttmp3, ttmp3, ttmp5             // ttmp[2:3]=&bufferX[local_entry]
  s_memrealtime                         ttmp[4:5]
  s_and_b32                             ttmp1, ttmp1, 0xffff            // clear out extra data from PC_HI
  s_store_dwordx2                       ttmp[0:1], ttmp[2:3]            // store PC
  s_waitcnt                             lgkmcnt(0)                      // wait for timestamp
  S_MOV_B32_SRC_PCS_TTMP_REG1           exec_lo
  S_STORE_DWORD_PCS_TTMP_REG1           ttmp[2:3], 0x8                  // store EXEC_LO
  S_MOV_B32_SRC_PCS_TTMP_REG1           exec_hi
  S_STORE_DWORD_PCS_TTMP_REG1           ttmp[2:3], 0xc                  // store EXEC_HI
  s_store_dwordx2                       ttmp[8:9], ttmp[2:3], 0x10      // store wg_id_x and wg_id_y
  s_store_dword                         ttmp10, ttmp[2:3], 0x18         // store wg_id_z
  s_store_dwordx2                       ttmp[4:5], ttmp[2:3], 0x30      // store timestamp

.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
  s_getreg_b32                          ttmp4, hwreg(HW_REG_XCC_ID)     //store XCC_ID
  s_lshl_b32                            ttmp4, ttmp4, 8
  s_and_b32                             ttmp5, ttmp11, TTMP11_WAVE_IN_WG_MASK
  s_or_b32                              ttmp4, ttmp4, ttmp5
  s_store_dword                         ttmp4, ttmp[2:3], 0x1c          // store wave_in_wg
.else
  s_and_b32                             ttmp4, ttmp11, 0x3f
  s_store_dword                         ttmp4, ttmp[2:3], 0x1c          // store wave_in_wg
.endif
  s_getreg_b32                          ttmp4, hwreg(HW_REG_HW_ID)
  s_store_dword                         ttmp4, ttmp[2:3], 0x20          // store HW_ID

  s_branch                              .get_correlation_id

.if .amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4
.fill_sample_stochastic:
  s_mul_i32                             ttmp2, ttmp7, 0x40              // offset into buffer for 64B objects
  s_mul_hi_u32                          ttmp3, ttmp7, 0x40
  s_add_u32                             ttmp2, ttmp2, ttmp4
  s_addc_u32                            ttmp3, ttmp3, ttmp5             // ttmp[2:3]=&buffer[local_entry]
  s_memrealtime                         ttmp[4:5]
  s_waitcnt                             lgkmcnt(0)                      // Wait for timestamp
  s_store_dwordx2                       ttmp[4:5], ttmp[2:3] 0x30       // Store timestamp

  s_getreg_b32                          ttmp4, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO)
  s_getreg_b32                          ttmp5, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI)
  s_store_dwordx2                       ttmp[4:5], ttmp[2:3] 0x00       // store snapshot data
  s_getreg_b32                          ttmp5, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1)
  s_getreg_b32                          ttmp4, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA)
  s_store_dwordx2                       ttmp[4:5], ttmp[2:3], 0x24            // store snapshot PC

  s_mov_b32                             ttmp6, exec_lo
  s_store_dword                         ttmp6, ttmp[2:3], 0x8           // store EXEC_LO
  s_mov_b32                             ttmp6, exec_hi
  s_store_dword                         ttmp6, ttmp[2:3], 0xc           // store EXEC_HI

  s_store_dwordx2                       ttmp[8:9], ttmp[2:3], 0x10      // store wg_id_x and wg_id_y
  s_store_dword                         ttmp10, ttmp[2:3], 0x18         // store wg_id_z
  s_getreg_b32                          ttmp4, hwreg(HW_REG_XCC_ID)
  s_lshl_b32                            ttmp4, ttmp4, 8
  s_and_b32                             ttmp5, ttmp11, TTMP11_WAVE_IN_WG_MASK
  s_or_b32                              ttmp4, ttmp4, ttmp5
  s_store_dword                         ttmp4, ttmp[2:3], 0x1c          // store chiplet_and_wave_id
  s_getreg_b32                          ttmp4, hwreg(HW_REG_HW_ID)
  s_store_dword                         ttmp4, ttmp[2:3], 0x20          // store HW_ID
  // ttmp[2:3]=&buffer[local_entry]; ttmp[4:5], ttmp[6:7] are free
  // ttmp[14:15]=ptr to ‘tma’ and is live out; ttmp11.b31 is buf_to_use, 0 or 1
  s_branch                              .get_correlation_id

.endif

.get_correlation_id:

  // get_correlation_id() -- begin //
  // Returns a value to use as a correlation ID.
  // Returns a 64bit number made up of the 9-bit queue ID and the
  // 25-bit dispatch_pkt concatenated together as:
  // Upper 32 bits: {23 0s}{9b queue_id}
  // Lower 32 bits: { 7 0s}{25b dispatch_pkt}
  // __device__ uint64_t get_correlation_id() {
  //   uint64_t output;
  //   // Get bottom 10 bits of queue's doorbell, in doorbell region.
  //   // Doorbell is 8B (3b per); region is 8K (13b total) so 10 bits.
  //   output = s_sendmsg(MSG_GET_DOORBELL);
  //   output &= 0x3ff;
  //   output <<= 32;
  //   // TTMP6 contains this packet dispatch ID modulus the queue size
  //   output |= TTMP6;
  //   return output;
  // }

  // ttmp[2:3] = &buffer[local_entry]
  // ttmp[4:5], ttmp7, and ttmp13 are free
  // ttmp[14:15] = tma->host_trap_buffers and is live out
  // ttmp6.b31 is buf_to_use, 0 or 1 and is live out

  s_mov_b64                             ttmp[4:5], exec                 // back up EXEC mask
  s_mov_b32                             exec_lo, 0x80000000             // prepare EXEC for doorbell spin
  s_sendmsg                             sendmsg(MSG_GET_DOORBELL)       // message 10, puts doorbell in EXEC
.wait_for_doorbell:
  s_nop                                 0x7                             // wait a bit for message to return
  s_bitcmp0_b32                         exec_lo, 0x1f                   // returned message  will 0 bit 31
  s_cbranch_scc0                        .wait_for_doorbell              // wait some more if no data yet
  s_mov_b32                             exec_hi, ttmp5                  // do not care about message[63:32]
  s_and_b32                             ttmp5, exec_lo, DOORBELL_ID_MASK // doorbell now in ttmp5
  s_mov_b32                             exec_lo, ttmp4                  // exec mask restored

.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
  s_bfe_u32                             ttmp4, ttmp11, (6 | 25 << 16)    // extract dispatch ID from ttmp11
.else
  s_and_b32                             ttmp4, ttmp6, 0x1ffffff         // extract low 25 bits from ttmp6 (DispatchPktIndx[24:0])
.endif
  s_store_dwordx2                       ttmp[4:5], ttmp[2:3], 0x38      // ttmp[4:5] is correlation ID. Store correlation_id to sample
  // get_correlation_id() -- end //

  // complete stores before returning
  s_dcache_wb
  s_waitcnt                             lgkmcnt(0)
  // fill_sample(...) - end //

  // ttmp[2:3], ttmp[4:5], ttmp7, and ttmp13 are free
  // ttmp[14:15] = tma->host_trap_buffers; ttmp6.b31 is buf_to_use, 0 or 1
  S_LSHR_B32_PCS_TTMP_REG1_REG2         31                              // TTMP_REG1 is buf_to_use
  S_MULK_I32_PCS_TTMP_REG1              0x10                            // written_val0 to written_val_X
  S_ADD_U32_PCS_TTMP_REG1               ttmp14, ttmp14                  // now ttmp[14:15] points to ...
  s_addc_u32                            ttmp15, ttmp15, 0x0             // buf_written_valX-0x10
  s_mov_b32                             ttmp7, 1                        // atomic increment buf_written_valX
  s_atomic_add                          ttmp7, ttmp[14:15], 0x10 glc    // ttmp7 will contain 'done'
  S_LOAD_DWORD_PCS_TTMP_REG1            ttmp[14:15], 0x14               // TTMP_REG1 will hold watermark
  s_waitcnt                             lgkmcnt(0)
  S_CMP_LG_U32_PCS_TTMP_REG1            ttmp7                          // if 'done' not at watermark, exit
  s_cbranch_scc1                        .pc_sampling_exit

  // ttmp[2:3], [4:5], ttmp7, and ttmp13 are free
  // ttmp[14:15] = buf_written_valX-0x10

  // send_signal(...) - begin //
  //__device__ void send_signal(hsa_signal_t* signal) {
  //
  //   amd_signal_t *sig = (amd_signal_t *)signal->handle;
  //   __atomic_store(&(sig->value), 0, memory_order_relaxed, memory_scope_system);
  //   if (sig->event_mailbox_ptr != NULL && sig->event_id != NULL) {
  //     uint32_t id = sig->event_id;
  //     __atomic_store(sig->event_mailbox_ptr, id,
  //            memory_order_relaxed, memory_scope_system);
  //     __builtin_amdgcn_s_sendmsg(1, id);
  //   }
  //}
  // We jump to the trap handler exit after this, so no live-out registers except
  // those that must survive the trap handler

  s_load_dwordx2                        ttmp[2:3], ttmp[14:15], 0x18    // load done_sig into ttmp[2:3]
  s_waitcnt                             lgkmcnt(0)                      // it's actually an amd_signal_t*
  s_load_dwordx2                        ttmp[4:5], ttmp[2:3], 0x10      // load event mailbox ptr into 4:5
  s_load_dword                          ttmp7, ttmp[2:3], 0x18          // load event_id into ttmp7
  s_mov_b64                             ttmp[14:15], 0
  s_store_dwordx2                       ttmp[14:15], ttmp[2:3], 0x8 glc // zero out signal value
  s_waitcnt                             lgkmcnt(0)                      // wait for value store to complete
  s_cmp_eq_u64                          ttmp[4:5], 0
  s_cbranch_scc1                        .pc_sampling_exit               // null mailbox means no interrupt
  s_cmp_eq_u32                          ttmp7, 0
  s_cbranch_scc1                        .pc_sampling_exit               // event_id zero means no interrupt
  s_store_dword                         ttmp7, ttmp[4:5] glc            // send event ID to the mailbox
  s_waitcnt                             lgkmcnt(0)
  S_MOV_B32_SRC_PCS_TTMP_REG1           m0                              // save off m0
  s_mov_b32                             m0, ttmp7                       // put ID into message payload
  s_nop                                 0x0                             // Manually inserted wait states
  s_sendmsg                             sendmsg(MSG_INTERRUPT)          // send interrupt message
  s_waitcnt                             lgkmcnt(0)                      // wait for message to be sent
  S_MOV_B32_DST_PCS_TTMP_REG1           m0                              // restore m0
  // send_signal(...) - end //
.pc_sampling_exit:
  // We can receive regular exceptions while doing PC-Sampling so we need to make sure we
  // handle these exceptions here
  s_getreg_b32                          ttmp2, hwreg(HW_REG_TRAPSTS)
  s_getreg_b32                          ttmp3, hwreg(HW_REG_MODE, SQ_WAVE_MODE_EXCP_EN_SHIFT, SQ_WAVE_MODE_EXCP_EN_SIZE) // ttmp3[7:0] = MODE.EXCP_EN
  // Set bits corresponding to TRAPSTS.MEM_VIOL, TRAPSTS.ILLEGAL_INST and TRAPSTS.XNACK_ERROR
  s_or_b32                              ttmp3, ttmp3, (1 << SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT | 1 << SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT | 1 << SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT)
  s_and_b32                             ttmp2, ttmp2, ttmp3
  // SCC will be 1 if either a maskable instruction was set, or one of MEM_VIOL, ILL_INST, XNACK_ERROR
  s_cbranch_scc1                        .no_skip_debugtrap              // if any of those are set, handle exceptions

  // Check for maskable exceptions
  s_getreg_b32                          ttmp3, hwreg(HW_REG_MODE, SQ_WAVE_MODE_EXCP_EN_SHIFT, SQ_WAVE_MODE_EXCP_EN_SIZE)
  s_and_b32                             ttmp3, ttmp2, ttmp3
  s_cbranch_scc1                        .no_skip_debugtrap

  // Since we are in PC sampling, it is safe to ignore watch1/2/3 and single step
  // as those should only be enabled by the debugger.
  // We could add them for completeness, i.e. check MODE.DEBUG_EN (bit 11)
  // and "MODE.EXCP_EN.WATCH (bit 19) && (TRAPSTS.EXCP_HI.ADDR_WATCH1 (bit 12) || TRAPSTS.EXCP_HI.ADDR_WATCH2 (bit 13) || TRAPSTS.EXCP_HI.ADDR_WATCH3 (bit 14)).
  s_branch                              .exit_trap

.endif // PC_SAMPLING_GFX9
.no_skip_debugtrap:
  // Save trap id and halt status in ttmp6.
  s_andn2_b32                           ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
  s_bfe_u32                             ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
  s_min_u32                             ttmp2, ttmp2, 0xF
  s_lshl_b32                            ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
  s_or_b32                              ttmp6, ttmp6, ttmp2
  s_bfe_u32                             ttmp2, ttmp12, SQ_WAVE_STATUS_HALT_BFE
  s_lshl_b32                            ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
  s_or_b32                              ttmp6, ttmp6, ttmp2

  // Fetch doorbell id for our queue.
.if .amdgcn.gfx_generation_number < 11
  s_mov_b32                             ttmp2, exec_lo
  s_mov_b32                             ttmp3, exec_hi
  s_mov_b32                             exec_lo, 0x80000000
  s_sendmsg                             sendmsg(MSG_GET_DOORBELL)
.wait_sendmsg:
  s_nop                                 0x7
  s_bitcmp0_b32                         exec_lo, 0x1F
  s_cbranch_scc0                        .wait_sendmsg
  s_mov_b32                             exec_hi, ttmp3
  // Restore exec_lo, move the doorbell_id into ttmp3
  s_and_b32                             ttmp3, exec_lo, DOORBELL_ID_MASK
  s_mov_b32                             exec_lo, ttmp2
.else
  s_sendmsg_rtn_b32                     ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
  s_waitcnt                             lgkmcnt(0)
  s_and_b32                             ttmp3, ttmp3, DOORBELL_ID_MASK
.endif

  // Map trap reason to an exception code.
  s_getreg_b32                          ttmp2, hwreg(HW_REG_TRAPSTS)

  s_bitcmp1_b32                         ttmp2, SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT
  s_cbranch_scc0                        .not_memory_violation
  s_or_b32                              ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0

  // Aperture violation requires XNACK_ERROR == 0.
  s_branch                              .not_aperture_violation

.not_memory_violation:
  s_bitcmp1_b32                         ttmp2, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT
  s_cbranch_scc0                        .not_aperture_violation
  s_or_b32                              ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0

.not_aperture_violation:
  s_bitcmp1_b32                         ttmp2, SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
  s_cbranch_scc0                        .not_illegal_instruction
  s_or_b32                              ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0

.not_illegal_instruction:
  s_and_b32                             ttmp2, ttmp2, SQ_WAVE_TRAPSTS_MATH_EXCP
  s_cbranch_scc0                        .not_math_exception
  s_getreg_b32                          ttmp7, hwreg(HW_REG_MODE)
  s_lshl_b32                            ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
  s_and_b32                             ttmp2, ttmp2, ttmp7
  s_cbranch_scc0                        .not_math_exception
  s_or_b32                              ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0

.not_math_exception:
  s_bfe_u32                             ttmp2, ttmp6, TTMP6_SAVED_TRAP_ID_BFE
  s_cmp_eq_u32                          ttmp2, TRAP_ID_ABORT
  s_cbranch_scc0                        .not_abort_trap
  s_or_b32                              ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0

.not_abort_trap:
  // If no other exception was flagged then report a generic error.
  s_andn2_b32                           ttmp2, ttmp3, DOORBELL_ID_MASK
  s_cbranch_scc1                        .send_interrupt
  s_or_b32                              ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0

.send_interrupt:
  // m0 = interrupt data = (exception_code << DOORBELL_ID_SIZE) | doorbell_id
  s_mov_b32                             ttmp2, m0
  s_mov_b32                             m0, ttmp3
  s_nop                                 0x0                             // Manually inserted wait states
  s_sendmsg                             sendmsg(MSG_INTERRUPT)
  s_waitcnt                             lgkmcnt(0)                      // Wait for the message to go out.
  s_mov_b32                             m0, ttmp2

  // Parking the wave requires saving the original pc in the preserved ttmps.
  // Register layout before parking the wave:
  //
  // ttmp7: 0[31:0]
  // ttmp11: 1st_level_ttmp11[31:23] 0[15:0] 1st_level_ttmp11[6:0]
  //
  // After parking the wave:
  //
  // ttmp7:  pc_lo[31:0]
  // ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor < 4) || (.amdgcn.gfx_generation_number == 10 && .amdgcn.gfx_generation_minor < 3) || (.amdgcn.gfx_generation_number == 11)
  // Save the PC
  s_mov_b32                             ttmp7, ttmp0
  s_and_b32                             ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
  s_lshl_b32                            ttmp1, ttmp1, TTMP_PC_HI_SHIFT
  s_andn2_b32                           ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
  s_or_b32                              ttmp11, ttmp11, ttmp1

  // Park the wave
  s_getpc_b64                           [ttmp0, ttmp1]
  s_add_u32                             ttmp0, ttmp0, .parked - .
  s_addc_u32                            ttmp1, ttmp1, 0x0
.endif

.halt_wave:
  // Halt the wavefront upon restoring STATUS below.
  s_bitset1_b32                         ttmp6, TTMP6_WAVE_STOPPED_SHIFT
  s_bitset1_b32                         ttmp12, SQ_WAVE_STATUS_HALT_SHIFT
  // Set WAVE.SKIP_EXPORT as a maker so the debugger knows the trap handler was
  // entered and has decided to halt the wavee.
  s_bitset1_b32                         ttmp12, SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT

.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
  s_bitcmp1_b32                         ttmp11, TTMP11_TTMPS_SETUP_SHIFT
  s_cbranch_scc1                        .ttmps_initialized
  s_mov_b32                             ttmp4, 0
  s_mov_b32                             ttmp5, 0
  s_bitset0_b32                         ttmp6, TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT
  s_bitset1_b32                         ttmp11, TTMP11_TTMPS_SETUP_SHIFT
.ttmps_initialized:
.endif

.exit_trap:
  // Restore SQ_WAVE_IB_STS.
.if .amdgcn.gfx_generation_number == 9
.if .amdgcn.gfx_generation_minor < 4
  s_lshr_b32                            ttmp2, ttmp11, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
.else
  s_lshr_b32                            ttmp2, ttmp13, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
.endif
  s_and_b32                             ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
  s_setreg_b32                          hwreg(HW_REG_IB_STS), ttmp2
.elseif .amdgcn.gfx_generation_number == 10 && .amdgcn.gfx_generation_minor < 3
  s_lshr_b32                            ttmp2, ttmp11, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
  s_and_b32                             ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
  s_lshr_b32                            ttmp2, ttmp11, (TTMP_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
  s_and_b32                             ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
  s_or_b32                              ttmp2, ttmp2, ttmp3
  s_setreg_b32                          hwreg(HW_REG_IB_STS), ttmp2
.endif

  // Restore SQ_WAVE_STATUS.
  s_and_b64                             exec, exec, exec               // restore STATUS.EXECZ, not writable by s_setreg_b32
  s_and_b64                             vcc, vcc, vcc                  // restore STATUS.VCCZ, not writable by s_setreg_b32
  s_setreg_b32                          hwreg(HW_REG_STATUS), ttmp12

  // Return to original (possibly modified) PC.
  s_rfe_b64                             [ttmp0, ttmp1]

.parked:
  s_trap                                0x2
  s_branch                              .parked

// For gfx11, add padding instructions so we can ensure instruction cache
// prefetch always has something to load.
.if .amdgcn.gfx_generation_number == 11
.rept (256 - ((. - trap_entry) % 64)) / 4
  s_code_end
.endr
.endif


================================================
FILE: runtime/hsa-runtime/core/runtime/trap_handler/trap_handler_gfx12.s
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

/// Trap Handler V2 source
.set DOORBELL_ID_SIZE                          , 10
.set DOORBELL_ID_MASK                          , ((1 << DOORBELL_ID_SIZE) - 1)
.set EC_QUEUE_WAVE_ABORT_M0                    , (1 << (DOORBELL_ID_SIZE + 0))
.set EC_QUEUE_WAVE_TRAP_M0                     , (1 << (DOORBELL_ID_SIZE + 1))
.set EC_QUEUE_WAVE_MATH_ERROR_M0               , (1 << (DOORBELL_ID_SIZE + 2))
.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0      , (1 << (DOORBELL_ID_SIZE + 3))
.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0         , (1 << (DOORBELL_ID_SIZE + 4))
.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0       , (1 << (DOORBELL_ID_SIZE + 5))

.set SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK    , (1 << 4) - 1
.set SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT      , 4
.set SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT , 6
.set SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT           , 7
.set SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT   , 8
.set SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_SHIFT     , 9
.set SQ_WAVE_EXCP_FLAG_PRIV_PERF_SNAPSHOT      , 10
.set SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_SHIFT , 11
.set SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT  , 12

.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT    , 0
.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE     , 7

.set SQ_WAVE_TRAP_CTRL_MATH_EXCP_MASK          , ((1 << 7) - 1)
.set SQ_WAVE_TRAP_CTRL_ADDR_WATCH_SHIFT        , 7
.set SQ_WAVE_TRAP_CTRL_WAVE_END_SHIFT          , 8
.set SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST         , 9

.set SQ_WAVE_PC_HI_ADDRESS_MASK                , 0xFFFF
.set SQ_WAVE_PC_HI_TRAP_ID_BFE                 , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT               , 28
.set SQ_WAVE_PC_HI_TRAP_ID_SIZE                , 4
.set SQ_WAVE_STATE_PRIV_HALT_BFE               , (SQ_WAVE_STATE_PRIV_HALT_SHIFT | (1 << 16))
.set SQ_WAVE_STATE_PRIV_HALT_SHIFT             , 14
.set SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT , 2

.set TRAP_ID_ABORT                             , 2
.set TRAP_ID_DEBUGTRAP                         , 3
.set TTMP6_SAVED_STATUS_HALT_MASK              , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
.set TTMP6_SAVED_STATUS_HALT_SHIFT             , 29
.set TTMP6_SAVED_TRAP_ID_BFE                   , (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))
.set TTMP6_SAVED_TRAP_ID_MASK                  , (((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
.set TTMP6_SAVED_TRAP_ID_SHIFT                 , 25
.set TTMP6_SAVED_TRAP_ID_SIZE                  , 4
.set TTMP6_WAVE_STOPPED_SHIFT                  , 30
.set TTMP8_DEBUG_FLAG_SHIFT                    , 31
.set TTMP11_DEBUG_ENABLED_SHIFT                , 23
.set TTMP_PC_HI_SHIFT                          , 7

.set TTMP13_HT_FLAG_BIT                        , 22           // TTMP13 bit for host‑trap
.set TTMP13_STOCH_FLAG_BIT                     , 21           // TTMP13 bit for stochastic
.set TTMP13_BUF_FULL_BIT                       , 31           // TTMP13 bit – buf full mark
.set TTMP8_DISPATCH_ID_MASK                    , 0X1FFFFFF
// Per-sample data layout within the device buffer. Each sample is 64 bytes.
// These are offsets from the start of a specific sample slot in the device buffer.

.set SAMPLE_OFF_BYTES_PER_SAMPLE               , 0x40         // bytes per sample slot

.set SAMPLE_OFF_PC_HOST                        , 0x00         // original PC (host only)
.set SAMPLE_OFF_EXEC_LOHI                      , 0x08         // saved EXEC low/high
.set SAMPLE_OFF_WGID_XY                        , 0x10         // WG id X / Y
.set SAMPLE_OFF_WGID_Z_WAVE                    , 0x18         // WG id Z
.set SAMPLE_OFF_TIMESTAMP                      , 0x30         // 64 bit realtime counter
.set SAMPLE_OFF_HW_ID                          , 0x20         // HW_ID (values combined from the HW_ID1 + HW_ID2)
.set SAMPLE_OFF_SNAPSHOT_DATA                  , 0x24
.set SAMPLE_OFF_CORRELATION                    , 0x38         // doorbell + dispatch id
.set SAMPLE_OFF_BUF_WRITTEN_VAL                , 0x10         // Offset to buf_written_val0/1 in pcs_sampling_data_t
.set SAMPLE_OFF_BUF_SIZE                       , 0x8          // Offset to buf_size in pcs_sampling_data_t
.set SAMPLE_OFF_DONE_SIG0                      , 0x18         // Offset for done_sig0 (hsa_signal_t handle for buffer 0)
.set SAMPLE_OFF_DONE_SIG1                      , 0x28         // Offset for done_sig1 (hsa_signal_t handle for buffer 1)
.set SAMPLE_OFF_SIGNAL_VALUE                   , 0x8          // Offset within signal structure to value field
.set SAMPLE_OFF_EVENT_MAILBOX0                 , 0x10         // Offset for event mailbox pointer for buffer 0
.set SAMPLE_OFF_EVENT_MAILBOX1                 , 0x20         // Offset for event mailbox pointer for buffer 1

.set WAVE_ID_MASK                              , 0x1f         // Mask to extract Wave ID from TTMP register.
.set BUF_INDEX_MASK                            , 0x7fffffff   // strip bit31 from add_x2
.set SAMPLE_OFF_BUF_WRITTEN_VAL                , 0x10         // Offset to buf_written_val0/1 in pcs_sampling_data_t
.set SAMPLE_INDEX_WIDTH                        , 31           // The sample index is 63 bits; the high part is 31 bits.

.set HW_REG_SHADER_HW_ID1                      , 0xf817
.set HW_REG_SHADER_HW_ID2                      , 0xf818
.set HW_REG_SQ_PERF_SNAPSHOT_PC_LO             , 0xf80b
.set HW_REG_SQ_PERF_SNAPSHOT_PC_HI             , 0xf80c
.set HW_REG_SQ_PERF_SNAPSHOT_DATA1             , 0xf80f
.set HW_REG_SQ_PERF_SNAPSHOT_DATA2             , 0xf810
.set HW_REG_SQ_PERF_SNAPSHOT_DATA              , 0xf81b

  // Macro to store the Correlation ID (Dispatch ID and Doorbell ID) into the current sample slot
  //
  // Assumes the following registers are set before it is called:
  //   v[0:1]:Must contain the 64-bit base address of the target sample slot
  //   ttmp8 :Must contain the dispatch ID in bits [24:0]
  //   exec  :Must be set to 0x1 to ensure operations apply only to lane 0
  //
  // Clobbers the following registers:
  //   v[2:3]:Used for [dispatch_id, doorbell_id]
  //   ttmp6 :Used as scratch register
.macro STORE_CORRELATION_ID
  s_sendmsg_rtn_b32 ttmp6, sendmsg(MSG_RTN_GET_DOORBELL)    // Gets current queue's doorbell ID into ttmp6.
  s_wait_kmcnt      0
  s_and_b32         ttmp6, ttmp6, DOORBELL_ID_MASK          // Mask to get actual doorbell ID.
  v_writelane_b32   v3, ttmp6, 0                            // Store doorbell ID into high part of v[2:3] (via v3).
  s_and_b32         ttmp6, ttmp8, TTMP8_DISPATCH_ID_MASK    // Get dispatch ID from ttmp8 into ttmp6
  v_writelane_b32   v2, ttmp6, 0                            // Store dispatch ID into low part of v[2:3] (via v2)
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_CORRELATION, scope:SCOPE_SYS  // Store {dispatch_id, doorbell_id} into sample slot.
                                                                       // v[0:1] = sample slot base address.
                                                                       // v[2] = dispatch_id, v[3] = doorbell_id.
.endm

  // Macro to store the HW_ID registers into the current sample slot
  //
  // Assumes the following registers are set before it is called:
  //   v[0:1]: Must contain the 64-bit base address of the target sample slot.
  //   exec  : Must be set to 0x1 to ensure operations apply only to lane 0.
  //
  // Clobbers the following registers:
  //   v[2:3]: Used to stage the data for the global store.
  //   ttmp6 : Used as scratch registers.
.macro STORE_HW_ID
  // Current ROCr API determines single dword for HW_ID, while this information is scattered accross two
  // dword registers HW_ID1 and HW_ID2 on GFX10+ architectures.
  // Thus, we combine values from HW_ID1 and HW_ID2 into a single dword HW_ID with the following layout:
  // WAVE_ID[4:0]
  // QUEUE_ID[8:5]
  // RESERVED [9]
  // WGP_ID[13:10]
  // SIMD_ID[15:14]
  // SA_ID[16]
  // ME_ID[17]
  // SE_ID[19:18]
  // PIPE_ID[21:20]
  // RESERVED [22]
  // WG_ID[27:23]
  // VM_ID[31:28]

  // Note: We don't show DP_RATE and STATE_ID that are useless for compute kernels
  // Also, we reduced SE_ID to 2 bits as there's only a maximum of 4 SEs on existing gfx12.0 parts
  // Finally, ME_ID is reduced to 1 bit as wavefronts are dispatched from either ME0 or ME1 in gfx12.
  // Bits 9 and 22 are reserved for a future use.

  s_getreg_b32      ttmp6, HW_REG_SHADER_HW_ID1             // Put HW_ID1 in ttmp6
  v_and_b32         v2, ttmp6, 0x1feffcff                   // Mask DP_RATE, SE_ID[2] and SIMD_ID
  v_and_b32         v3, ttmp6, 0x300                        // Put SIMD_ID into ttmp6[8:9]
  v_lshl_or_b32     v2, v3, 6, v2                           // Put SIMD_ID into v2[15:14]
  s_getreg_b32      ttmp6, HW_REG_SHADER_HW_ID2             // Put HW_ID2 in ttmp6
  v_and_b32         v3, ttmp6, 0xf000000                    // v3 = VM_ID in bits 27:24
  v_lshl_or_b32     v2, v3, 4, v2                           // Put VM_ID into v2[31:28]
  v_and_b32         v3, ttmp6, 0x1f0000                     // v3 = WG_ID in bits 20:16
  v_lshl_or_b32     v2, v3, 7, v2                           // Put WG_ID in v2[27:23]
  v_and_b32         v3, ttmp6, 0x100                        // v3 = ME_ID[0] in bit 8
  v_lshl_or_b32     v2, v3, 9, v2                           // Put ME_ID in v2[17]
  v_and_b32         v3, ttmp6, 0x30                         // v3 = PIPE_ID in bits 5:4
  v_lshl_or_b32     v2, v3, 16, v2                          // Put PIPE_ID in v2[21:20]
  v_and_b32         v3, ttmp6, 0xf                          // v3 = QUEUE_ID in bits 3:0
  v_lshl_or_b32     v2, v3, 5, v2                           // Put QUEUE_ID in v2[8:5]
  global_store_b32  v[0:1], v2, off, offset:SAMPLE_OFF_HW_ID, scope:SCOPE_SYS  // store HW_ID
.endm

// ABI (Application Binary Interface) between first and second-level trap handler:
//   ttmp0: PC_LO[31:0] (Program Counter Low)
//   ttmp1: PC_HI[15:0] (Program Counter High, bits 0-15), TrapID[3:0] (in bits 28-31 of original PC_HI)
//   ttmp11: 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], 0[5:0]
//   ttmp12: SQ_WAVE_STATE_PRIV (Private wave state register value).
//   ttmp14: TMA[31:0] - TMA_LO (Trap Memory Argument Low - base address for trap handler data, low 32 bits).
//   ttmp15: TTMA[63:32] - TMA_HI (Trap Memory Argument High - base address for trap handler data, high 32 bits).
//   For PC Sampling, this points to pcs_hosttrap_data_ or pcs_stochastic_data_
 trap_entry:

  s_mov_b32         ttmp3, 0

.check_hosttrap:

  // ttmp[14:15] points to TMA.
  // Available: ttmp[2:3], ttmp[4:5], ttmp6, ttmp[10:11]
  s_getreg_b32      ttmp2, hwreg(HW_REG_EXCP_FLAG_PRIV)     // On gfx12, EXCP_FLAG_PRIV.b7
  s_bitcmp1_b32     ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT
  s_cbranch_scc0    .check_stochastic

  // It's a Host Trap event.
  s_load_b64        ttmp[14:15], ttmp[14:15], 0x0, scope:SCOPE_CU         // ttmp[14:15]=*host_trap_buffers
  s_bitset1_b32     ttmp13, TTMP13_HT_FLAG_BIT              // set bit 22 in TTMP13

  // Clear the Host Trap flag in the hardware register to acknowledge the event
  s_setreg_imm32_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT,1), 0
  s_wait_kmcnt      0                                       // Ensure previous load is complete.
  s_branch          .profile_trap_handlers

.check_stochastic:
  s_getreg_b32      ttmp2, hwreg(HW_REG_EXCP_FLAG_PRIV)     // EXCP_FLAG_PRIV.b10=stochastic_sample_trap
  s_bitcmp1_b32     ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_PERF_SNAPSHOT // Test Performance Snapshot bit.

  s_cbranch_scc0    .check_exceptions                       // If not Stochastic, check for other exceptions.

  s_load_b64           ttmp[14:15], ttmp[14:15], 0x8, scope:SCOPE_CU         // ttmp[14:15]=*stoch_trap_buf
  s_wait_kmcnt      0

  s_bitset1_b32     ttmp13, TTMP13_STOCH_FLAG_BIT           // set bit 21 in TTMP13

  s_setreg_imm32_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_PERF_SNAPSHOT,1), 0 // Clear the perf_snapshot flag
  s_branch          .profile_trap_handlers

  // Check if this is a trap (s_trap instruction) or a hardware exception.
  // Extract TrapID from ttmp1 (which contains PC_HI).
  // Branch if not a trap (an exception instead).
  s_bfe_u32         ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE // ttmp2 = TrapID
  s_cbranch_scc0       .check_exceptions			             // If TrapID is 0, it's an exception, so branch.

  // If caused by s_trap then advance PC, then figure out the trap ID:
  // - if trapID is DEBUGTRAP and debugger is attach, report WAVE_TRAP,
  // - if trapID is ABORTTRAP, report WAVE_ABORT,
  // - report WAVE_TRAP for any other trap ID.
  s_add_u32         ttmp0, ttmp0, 0x4                       // PC_LO += 4
  s_addc_u32        ttmp1, ttmp1, 0x0                       // PC_HI += carry.

  // If llvm.debugtrap and debugger is not attached.
  s_cmp_eq_u32      ttmp2, TRAP_ID_DEBUGTRAP
  s_cbranch_scc0    .not_debug_trap

  s_bitcmp1_b32     ttmp11, TTMP11_DEBUG_ENABLED_SHIFT
  s_cbranch_scc0    .check_exceptions
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0

.not_debug_trap:
  s_cmp_eq_u32      ttmp2, TRAP_ID_ABORT
  s_cbranch_scc0    .not_abort_trap
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
  s_branch          .check_exceptions

.not_abort_trap:
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0

  s_bitcmp1_b32     ttmp8, TTMP8_DEBUG_FLAG_SHIFT
  s_cbranch_scc0    .check_exceptions

.check_exceptions:
  s_getreg_b32      ttmp2, hwreg(HW_REG_EXCP_FLAG_PRIV)
  s_getreg_b32      ttmp13, hwreg(HW_REG_TRAP_CTRL)

  s_bitcmp1_b32     ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT
  s_cbranch_scc0    .not_memory_violation
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0

  // Aperture violation requires XNACK_ERROR == 0.
  s_branch          .not_aperture_violation

.not_memory_violation:
  s_bitcmp1_b32     ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT
  s_cbranch_scc0    .not_aperture_violation
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0

.not_aperture_violation:
  s_bitcmp1_b32     ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
  s_cbranch_scc0    .not_illegal_instruction
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0

.not_illegal_instruction:
  s_bitcmp1_b32     ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
  s_cbranch_scc0    .not_wave_end
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0

.not_wave_start:
  s_bitcmp1_b32     ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_SHIFT
  s_cbranch_scc0    .not_wave_end
  s_bitcmp1_b32     ttmp13, SQ_WAVE_TRAP_CTRL_WAVE_END_SHIFT
  s_cbranch_scc0    .not_wave_end
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0

.not_wave_end:
  s_bitcmp1_b32     ttmp13, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST
  s_cbranch_scc0    .not_trap_after_inst
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0

.not_trap_after_inst:
  s_and_b32         ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
  s_cbranch_scc0    .not_addr_watch
  s_bitcmp1_b32     ttmp13, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_SHIFT
  s_cbranch_scc0    .not_addr_watch
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0

.not_addr_watch:
  s_getreg_b32      ttmp2, hwreg(HW_REG_EXCP_FLAG_USER, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE)
  s_and_b32         ttmp13, ttmp13, SQ_WAVE_TRAP_CTRL_MATH_EXCP_MASK
  s_and_b32         ttmp2, ttmp2, ttmp13
  s_cbranch_scc0    .not_math_exception
  s_or_b32          ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0

.not_math_exception:
  s_cmp_eq_u32      ttmp3, 0
  // This was not a s_trap we are interested in or an exception, return to
  // the user code.
  s_cbranch_scc1    .exit_trap

.send_interrupt:
  // Fetch doorbell id for our queue.
  s_sendmsg_rtn_b32 ttmp2, sendmsg(MSG_RTN_GET_DOORBELL)
  s_wait_kmcnt      0
  s_and_b32         ttmp2, ttmp2, DOORBELL_ID_MASK
  s_or_b32          ttmp3, ttmp2, ttmp3

  // Save trap id and halt status in ttmp6.
  s_andn2_b32       ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
  s_bfe_u32         ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
  s_min_u32         ttmp2, ttmp2, 0xF
  s_lshl_b32        ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
  s_or_b32          ttmp6, ttmp6, ttmp2
  s_bfe_u32         ttmp2, ttmp12, SQ_WAVE_STATE_PRIV_HALT_BFE
  s_lshl_b32        ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
  s_or_b32          ttmp6, ttmp6, ttmp2

  // m0 = interrupt data = (exception_code << DOORBELL_ID_SIZE) | doorbell_id
  s_mov_b32         ttmp2, m0
  s_mov_b32         m0, ttmp3
  s_sendmsg         sendmsg(MSG_INTERRUPT)
  // Wait for the message to go out.
  s_wait_kmcnt      0
  s_mov_b32         m0, ttmp2

  // Parking the wave requires saving the original pc in the preserved ttmps.
  // Register layout before parking the wave:
  //
  // ttmp10: ?[31:0]
  // ttmp11: 1st_level_ttmp11[31:23] 0[15:0] 1st_level_ttmp11[6:0]
  //
  // After parking the wave:
  //
  // ttmp10: pc_lo[31:0]
  // ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
  //
  // Save the PC
  s_mov_b32         ttmp10, ttmp0
  s_and_b32         ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
  s_lshl_b32        ttmp1, ttmp1, TTMP_PC_HI_SHIFT
  s_andn2_b32       ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
  s_or_b32          ttmp11, ttmp11, ttmp1

  // Park the wave
  s_getpc_b64       [ttmp0, ttmp1]
  s_add_u32         ttmp0, ttmp0, .parked - .
  s_addc_u32        ttmp1, ttmp1, 0x0

.halt_wave:
  // Halt the wavefront upon restoring STATUS below.
  s_bitset1_b32     ttmp6, TTMP6_WAVE_STOPPED_SHIFT
  s_bitset1_b32     ttmp12, SQ_WAVE_STATE_PRIV_HALT_SHIFT

  // Initialize TTMP registers
  s_bitcmp1_b32     ttmp8, TTMP8_DEBUG_FLAG_SHIFT
  s_cbranch_scc1    .ttmps_initialized
  s_mov_b32         ttmp4, 0
  s_mov_b32         ttmp5, 0
  s_bitset1_b32     ttmp8, TTMP8_DEBUG_FLAG_SHIFT
.ttmps_initialized:
  s_branch          .exit_trap

.profile_trap_handlers:
  // Register state at the start of profile_trap_handlers:
  //
  // ttmp0:  PC_LO[31:0] - Contains program counter low bits
  // ttmp1:  PC_HI[15:0] - Contains program counter high bits
  // ttmp2:  Contains HW_REG_EXCP_FLAG_PRIV
  // ttmp3:  Initialized to 0, available for use
  // ttmp4:  Available - Can be freely used
  // ttmp5:  Available - Can be freely used
  // ttmp6:  Initially contains flags  - trap ID and halt status - reused after saving
  // ttmp7:  Contains WGID_Y in high 16 bits, WGID_Z in low 16 bits
  // ttmp8:  Contains dispatch ID in bits [24:0] and debug flag
  // ttmp9:  Contains WGID_X
  // ttmp10: Available - Used next to save exec_lo
  // ttmp11: Contains debug flags - Used next to save exec_hi
  // ttmp12: Contains SQ_WAVE_STATE_PRIV
  // ttmp13: Contains flag bits for sampling type - HT_FLAG_BIT or STOCH_FLAG_BIT
  // ttmp[14:15]: Contains HT or ST buffer base address
  //
  // v[0:3] contain user shader data that must be preserved/restored
  // exec: Contains user's execution mask
  s_mov_b64         ttmp[10:11], exec                       // save exec to ttmp[10:11]
  s_mov_b64         exec, 0x1                               // turn on lane 0 only

  v_readlane_b32    ttmp2, v0, 0
  v_readlane_b32    ttmp3, v1, 0                            // Save out lane 0’s first 2 VGPRs

  // At this point, ttmp[4:5], ttmp6 and v[0:1] are free
  // Atomically get current sample slot index and select buffer
  // pcs_sampling_data_t.buf_write_val (uint64_t) stores:
  //   Bit 63: current_buffer_id (0 or 1)
  //   Bits 62-0: current_sample_index_in_buffer
  // v0 = 1 (value to add to the low part of buf_write_val)
  // v1 = 0 (value to add to the high part of buf_write_val, bit 63 is buffer selector)

  v_mov_b32         v0, 1
  v_mov_b32         v1, 0

  global_atomic_add_u64 v[0:1], v1, v[0:1], ttmp[14:15], scope:SCOPE_SYS th:TH_ATOMIC_RETURN
  s_wait_loadcnt    0                                       // Wait for atomic operation to complete and return value

  // At this point, ttmp[4:5] and ttmp6 are free
  // v[0:1] (lane 0) now holds the previous value of buf_write_val.
  // This previous value gives the slot index for the current sample.

  v_readlane_b32    ttmp6, v1, 0x0                          // previous buf_write_val[63:32]
  s_lshr_b32        ttmp6, ttmp6, TTMP13_BUF_FULL_BIT       // ttmp6 = previous_buffer_id (0 or 1, from bit 63 of original uint64_t)
                                                            // This ttmp6 is used to select which buffer's metadata (size, watermark, signal) to use.
                                                            // It's also used to calculate the base address of the sample buffer.
  s_bitset0_b32     ttmp13, TTMP13_BUF_FULL_BIT             // Clear our local buffer full flag for now

  s_cmp_eq_u32      ttmp6, 0                                // store off buf_to_use
  s_cbranch_scc1    .skip_bufbit_set                        // into bit31 of ttmp13
  s_bitset1_b32     ttmp13, TTMP13_BUF_FULL_BIT

.skip_bufbit_set:
  // ttmp[2:3]=v[0:1]-backup, ttmp[4:5]=free, ttmp6=buf_to_use (also in ttmp13.b31)
  // ttmp[10:11]=EXEC backup. ttmp[14:15]=tma
  // v[0:1].lane0=local_entry, v[2:3]=original, EXEC=0x1

  v_bfe_u32         v1, v1, 0, SAMPLE_INDEX_WIDTH           // v[0:1] = new local_entry
                                                            // removes bit 31 from v1, returning v1 & 0x7FFFFFFF.

  v_readlane_b32    ttmp5, v1, 0                            // ttmp5 = high 31 bits of sample index (if index > 2^32-1).
  s_cmp_lg_u32      ttmp5, 0                                // Check if sample index is very large (overflowed 32 bits).

  s_cbranch_scc1    .lost_sample                            // If ttmp5 > 0, index is too large, treat as lost sample.

  s_load_b32           ttmp5, ttmp[14:15], SAMPLE_OFF_BUF_SIZE, scope:SCOPE_CU // ttmp5 = pcs_sampling_data_t.buf_size
  v_readlane_b32    ttmp4, v0, 0                            // ttmp4 = sample_index_for_current_sample (from v0)
  s_wait_kmcnt      0                                       // Wait for buf_size load.

  s_cmp_ge_u32      ttmp4, ttmp5                            // if local_entry >= buf_size
  s_cbranch_scc1    .lost_sample                            // If index >= buf_size, buffer is full, sample is lost.
                                                            // This also sets TTMP13_BUF_FULL_BIT implicitly by branching.

  // Register state before calculating the sample buffer address:
  // ttmp2 = backup of original shader's v0
  // ttmp3 = backup of original shader's v1
  // ttmp4 = sample_index_for_current_sample (from v0)
  // ttmp5 = buf_size
  // ttmp6 = buffer_id (0 or 1)
  // ttmp[10:11] = original shader's [exec_lo, exec_hi]
  // ttmp[14:15] = base_address_of_pcs_sampling_data_t (TMA)
  // ttmp13.b31 = buffer_id (0 or 1, same as ttmp6)
  // v[0:1].lane0 = sample index value from atomic
  // v[2:3] = original user shader's v[2:3] values
  // exec = backup of user shader's v[0:1]
  s_mov_b64         exec, ttmp[2:3]                         // stash into EXEC to free up ttmp

  // Calculate the base address of the correct sample buffer (buffer0 or buffer1).
  // The buffers are located after the pcs_sampling_data_t struct header.
  // Address = (TMA + SAMPLE_OFF_BYTES_PER_SAMPLE) + (buffer_id * buf_size * 64)
  s_mul_i32         ttmp2, ttmp5, ttmp6                     // low 32 bits
  s_mul_hi_u32      ttmp3, ttmp5, ttmp6                     // high 32 bits

  // Multiply by 64 bytes per sample slot (shift left by 6 bits)
  // This converts from units of samples to units of bytes
  s_lshl_b64        ttmp[2:3], ttmp[2:3], 6
  s_add_u32         ttmp2, ttmp2, SAMPLE_OFF_BYTES_PER_SAMPLE
  s_addc_u32        ttmp3, ttmp3, 0
  s_add_u32         ttmp4, ttmp14, ttmp2                    // ttmp4 = TMA_base_lo + total_offset_lo. This is low part of &bufferX
  s_addc_u32        ttmp5, ttmp15, ttmp3                    // ttmp5 = TMA_base_hi + total_offset_hi + carry. This is high part of &bufferX
                                                            // ttmp[4:5] now correctly points to the base of the selected sample buffer array

  s_bitcmp1_b32     ttmp13, TTMP13_HT_FLAG_BIT              // if ttmp13.b22==1, this is hosttrap
  s_cbranch_scc1    .fill_sample_ht
  s_bitcmp1_b32     ttmp13, TTMP13_STOCH_FLAG_BIT
  s_cbranch_scc1    .fill_sample_stoch

  s_mov_b64         ttmp[2:3], exec                         // Restore user v[0:1] backup to ttmp[2:3]
  v_readlane_b32    ttmp4, v2, 0                            // Backup user v[2:3] to ttmp[4:5] for restore.
  v_readlane_b32    ttmp5, v3, 0
  s_branch          .restore_vector_before_exit_trap

.fill_sample_ht:
  // At this point, v[0:1] is local_entry (but v1 is 0)
  // v[2:3] is original user-data
  // ttmp[2:3] is free
  // ttmp[4:5] holds &buffer
  // ttmp6 holds buf_to_use
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // [ttmp14:15]=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC holds holds backup of original shader’s v[0:1]

  v_readlane_b32    ttmp6, v0, 0                              // ttmp6=local_entry
  s_mul_i32         ttmp2, ttmp6, SAMPLE_OFF_BYTES_PER_SAMPLE // into buffer for 64B objects
  s_mul_hi_u32      ttmp3, ttmp6, SAMPLE_OFF_BYTES_PER_SAMPLE // ttmp[2:3] now holds the offset
  s_add_u32         ttmp2, ttmp2, ttmp4
  s_addc_u32        ttmp3, ttmp3, ttmp5                     // ttmp[2:3]=&bufferX[local_entry]
  v_readlane_b32    ttmp4, v2, 0x0                          // ttmp[4:5] now holds backup of
  v_readlane_b32    ttmp5, v3, 0x0                          // user-data from v[2:3]
  v_writelane_b32   v0, ttmp2, 0x0
  v_writelane_b32   v1, ttmp3, 0x0                          // v[0:1]=&buffer[local_entry]

  s_sendmsg_rtn_b64 ttmp[2:3], sendmsg(MSG_RTN_GET_REALTIME)
  s_wait_kmcnt      0                                       // Wait for timestamp

  // v[0:1] = &buffer[local_entry]
  // v[2:3] = free
  // ttmp[2:3] holds the thing we want to store
  // ttmp[4:5] holds backup of original shaders v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shaders [exec_lo,exec_hi]
  // ttmp[14:15]=tma, ttmp13.b31 = buf_to_use
  // EXEC holds backup of original shaders v[0:1]

  v_writelane_b32   v2, ttmp2, 0                            // bring output data to v[2:3]
  v_writelane_b32   v3, ttmp3, 0

  s_mov_b64         ttmp[2:3], exec                         // vector stores need EXEC set
  s_mov_b64         exec, 1                                 // so ttmp[2:3] holds it for now

  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_TIMESTAMP, scope:SCOPE_SYS // store out timestamp

  // v[0:1] = &buffer[local_entry]
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC is 0x1

  s_and_b32         ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK // Clear out extra data from PC_HI
  v_writelane_b32   v2, ttmp0, 0
  v_writelane_b32   v3, ttmp1, 0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_PC_HOST, scope:SCOPE_SYS  // store out PC

  v_writelane_b32   v2, ttmp10, 0
  v_writelane_b32   v3, ttmp11, 0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_EXEC_LOHI, scope:SCOPE_SYS  // store out original EXEC

  // Store Workgroup ID X and Y at offset SAMPLE_OFF_WGID_XY (0x10).
  // ttmp9 = WGID_X (from first-level handler).
  // ttmp7 contains WGID_Y in high 16 bits.
  v_writelane_b32   v2, ttmp9, 0                            // wg_id_x
  s_bfe_u32         ttmp6, ttmp7, (16<<16)                  // extract bits 15:0, wg_id_y
  v_writelane_b32   v3, ttmp6, 0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_WGID_XY, scope:SCOPE_SYS  // store wg_id_x and wg_id_y

  // Store Workgroup ID Z and Wave ID at offset SAMPLE_OFF_WGID_Z_WAVE (0x18).
  // ttmp7 contains WGID_Z in low 16 bits.
  // ttmp11 contains Wave ID in low 6 bits (from EXEC_hi).
  s_bfe_u32         ttmp6, ttmp7, (16|16<<16)               // extract bits 31:16, wg_id_z
  v_writelane_b32   v2, ttmp6, 0
  v_writelane_b32   v3, ttmp8, 0x0                          // wave_in_wg is bits 29:25
  v_lshrrev_b32     v3, 25, v3                              // Shift wave_in_wg to 4:0
  v_and_b32         v3, v3, WAVE_ID_MASK                    // put (ttmp8>>25)&0x1f into v3
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_WGID_Z_WAVE, scope:SCOPE_SYS  // store wg_id_z and wave_id

  // v[0:1] = &buffer[local_entry]
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC is 0x1
  // Get HW_ID1 & 2 with S_GETREG_B32 with size=32 (F8 in upper bits), offset=0, and:
  // HW_ID1 = 23 (0x17), HW_ID2 = 24 (0x18)

  STORE_HW_ID

  // The following is still true as we get ready to jump to correlation ID check
  // v[0:1] = &buffer[local_entry]
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC is 0x1

  STORE_CORRELATION_ID
  // Ensure all stores have completed before returning and incrementing written_val
  s_wait_storecnt   0

  // Still true after returning back from correlation ID check
  // v[0:1] = &buffer[local_entry], but we no longer need it
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC is 0x1
  //
  s_branch          .ret_from_fill_sample

.fill_sample_stoch:
  // v0 contains local_entry, v1 is free
  // v[2:3] is original user-data
  // ttmp[2:3] is free
  // ttmp[4:5] holds &buffer
  // ttmp6 holds buf_to_use
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // [ttmp14:15]=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC holds holds backup of original shader’s v[0:1]

  v_readlane_b32    ttmp6, v0, 0x0                            // ttmp2=local_entry
  s_mul_i32         ttmp2, ttmp6, SAMPLE_OFF_BYTES_PER_SAMPLE // into buffer for 64B objects
  s_mul_hi_u32      ttmp3, ttmp6, SAMPLE_OFF_BYTES_PER_SAMPLE // ttmp[2:3] now holds the offset
  s_add_u32         ttmp2, ttmp2, ttmp4
  s_addc_u32        ttmp3, ttmp3, ttmp5                       // ttmp[2:3]=&bufferX[local_entry]
  v_readlane_b32    ttmp4, v2, 0x0                            // ttmp[4:5] now holds backup of
  v_readlane_b32    ttmp5, v3, 0x0                            // user-data from v[2:3]
  v_writelane_b32   v0, ttmp2, 0x0
  v_writelane_b32   v1, ttmp3, 0x0                            // v[0:1]=&buffer[local_entry]
  s_sendmsg_rtn_b64 ttmp[2:3], sendmsg(MSG_RTN_GET_REALTIME)
  s_wait_kmcnt      0                                         // Wait for timestamp

  // v[0:1] = &buffer[local_entry]
  // v[2:3] = free
  // ttmp[2:3] holds the thing we want to store
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC holds backup of original shader’s v[0:1]

  v_writelane_b32   v2, ttmp2, 0                            // bring output data to v[2:3]
  v_writelane_b32   v3, ttmp3, 0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_TIMESTAMP, scope:SCOPE_SYS  // store out timestamp

  // v[0:1] = &buffer[local_entry]
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=‘tma’, ttmp13.b31 = buf_to_use
  // EXEC is 0x1
  v_writelane_b32   v2, ttmp10, 0
  v_writelane_b32   v3, ttmp11, 0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_EXEC_LOHI, scope:SCOPE_SYS  // store out original EXEC
  v_writelane_b32   v2, ttmp9, 0                            // wg_id_x
  s_bfe_u32         ttmp6, ttmp7, (0 | (16 << 16))          // extract bits 15:0, wg_id_y
  v_writelane_b32   v3, ttmp6, 0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_WGID_XY, scope:SCOPE_SYS  // store wg_id_x and wg_id_y
  s_bfe_u32         ttmp6, ttmp7, (16|16<<16)               // extract bits 31:16, wg_id_z
  v_writelane_b32   v2, ttmp6, 0                            // put wg_id_z in v2
  v_writelane_b32   v3, ttmp8, 0x0                          // wave_in_wg is bits 29:25

  v_lshrrev_b32     v3, 25, v3                              // Shift wave_in_wg to 4:0

  v_and_b32         v3, v3, WAVE_ID_MASK                    // put (ttmp8>>25)&0x1f into v3
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_WGID_Z_WAVE, scope:SCOPE_SYS  // store wg_id_z and wave_id

  STORE_HW_ID

  //Read SNAPSHOT Data
  s_getreg_b32      ttmp6, HW_REG_SQ_PERF_SNAPSHOT_DATA1
  v_writelane_b32   v2, ttmp6, 0x0
  s_getreg_b32      ttmp6, HW_REG_SQ_PERF_SNAPSHOT_DATA2
  v_writelane_b32   v3, ttmp6, 0x0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_SNAPSHOT_DATA + 4, scope:SCOPE_SYS  // store snapshot DATA1 and DATA2

  s_getreg_b32      ttmp2, HW_REG_SQ_PERF_SNAPSHOT_DATA
  v_writelane_b32   v2, ttmp2, 0
  global_store_b32  v[0:1], v2, off, offset:SAMPLE_OFF_SNAPSHOT_DATA, scope:SCOPE_SYS  // store perf snapshot DATA

  s_getreg_b32      ttmp6, HW_REG_SQ_PERF_SNAPSHOT_PC_LO
  v_writelane_b32   v2, ttmp6, 0x0
  s_getreg_b32      ttmp6, HW_REG_SQ_PERF_SNAPSHOT_PC_HI
  v_writelane_b32   v3, ttmp6, 0x0
  global_store_b64  v[0:1], v[2:3], off, offset:SAMPLE_OFF_PC_HOST, scope:SCOPE_SYS  // store PC_HI:PC_LO

  // The following is still true as we get ready to jump to correlation ID check
  // v[0:1] = &buffer[local_entry]
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=tma, ttmp13.b31 tells us buf_to_use
  // EXEC is 0x1

  STORE_CORRELATION_ID
  // Ensure all stores have completed before returning and incrementing written_val
  s_wait_storecnt   0

.ret_from_fill_sample:
  // v[0:1] = free
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=‘tma’, ttmp13.b31 tells us buf_to_use
  // EXEC is 0x1

  // Sample data has been written to the device buffer.
  // Now, atomically increment the count of written samples for the current buffer.
  // This is pcs_sampling_data_t.buf_written_val0 or buf_written_val1.
  s_lshr_b32        ttmp6, ttmp13, 31                       // ttmp6 is buf_to_use
  s_mulk_i32        ttmp6, 0x10                             // ttmp6=offset from
                                                            // written_val0 to written_val_X
  s_add_u32         ttmp14, ttmp14, ttmp6                   // now ttmp[14:15] points to base for
  s_addc_u32        ttmp15, ttmp15, 0                       // buf_written_valX atomic operation

  // Atomically increment the chosen buf_written_val.
  // v0 = 0 (value to add - low part), v1 = 1 (value to add - high part, effectively just adding 1 to uint32_t)

  v_mov_b32         v0, 0                                   // want to atomic increment
  v_mov_b32         v1, 1                                   // buf_written_valX
  global_atomic_add_u32 v0, v0, v1, ttmp[14:15], offset:SAMPLE_OFF_BUF_WRITTEN_VAL, scope:SCOPE_SYS th:TH_ATOMIC_RETURN
  s_wait_loadcnt    0

  // v0 = done, v1 = free, v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=buf_written_valX-0x10, EXEC=0x1
  // Check Watermark and Signal Host

  s_mov_b64         exec, ttmp[4:5]                         // stash user’s v[2:3] in EXEC
  s_load_b32        ttmp5, ttmp[14:15], 0x14, scope:SCOPE_CU // load watermark into ttmp5
  v_readlane_b32    ttmp4, v0, 0                            // put done into ttmp4
  s_wait_kmcnt      0                                       // wait for watermark to load
  s_cmp_lg_u32      ttmp4, ttmp5                            // if done != watermark, exit
  s_add_u32         ttmp4, ttmp4, 1                         // ttmp4 is now current_sample_count (count_before_inc + 1)
  s_cmp_lt_u32      ttmp4, ttmp5                            // if (current_sample_count < watermark), don't signal
  s_mov_b64         ttmp[4:5], exec                         // restore user’s v[2:3]
  s_mov_b64         exec, 1
  s_cbranch_scc1    .restore_vector_before_exit_trap

.send_signal:
  // v[0:3] = free, ttmp[2:5] = backups of original v[0:3], ttmp6=free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=buf_written_valX-0x10, EXEC=old copy of original shader v[2:3]
  // write done-signal and optional interrupt

  // Watermark reached or exceeded. Signal the host.
  // Load the hsa_signal_t handle for the current buffer.
  // done_sig0 is at offset 0x18. done_sig1 is at 0x28.
  // addr = ttmp[14:15] + 0x18 + (buffer_id * 0x10).
  // ttmp0 still holds buffer_id * 0x10.

  s_load_b64           ttmp[14:15], ttmp[14:15], SAMPLE_OFF_DONE_SIG0, scope:SCOPE_CU // load done_sig into ttmp[14:15]
  s_mov_b64         exec, 1
  s_wait_kmcnt      0

  v_mov_b32         v0, 0
  v_mov_b32         v1, 0                                   // value to store into v[0:1]
  v_writelane_b32   v2, ttmp14, 0
  v_writelane_b32   v3, ttmp15, 0                           // Put signal address into v[2:3]
  global_store_b64  v[2:3], v[0:1], off, offset:SAMPLE_OFF_SIGNAL_VALUE, scope:SCOPE_SYS // zero out signal value

  s_load_b32           ttmp6, ttmp[14:15], 0x18, scope:SCOPE_CU           // load event_id into ttmp6
  s_load_b64           ttmp[14:15], ttmp[14:15], SAMPLE_OFF_EVENT_MAILBOX0, scope:SCOPE_CU     // load event mailbox ptr into 14:15
  s_wait_kmcnt      0

  s_cmp_eq_u64      ttmp[14:15], 0                          // null mailbox means no interrupt
  s_cbranch_scc1    .restore_vector_before_exit_trap
  s_cmp_eq_u32      ttmp6, 0                                // event_id zero means no interrupt
  s_cbranch_scc1    .restore_vector_before_exit_trap
  v_writelane_b32   v2, ttmp14, 0
  v_writelane_b32   v3, ttmp15, 0                           // Put mailbox address into v[2:3]

  s_wait_storecnt   0
  v_writelane_b32   v0, ttmp6, 0x0                          // put event_id into v0
  global_store_b32  v[2:3], v0, off, offset:0x0, scope:SCOPE_SYS // Send event ID to the mailbox
  s_wait_storecnt   0
  s_mov_b32         ttmp14, m0                              // save off m0
  v_readlane_b32    ttmp15, v0, 0                           // Put ID into message payload
  s_mov_b32         m0, ttmp15
  s_sendmsg         sendmsg(MSG_INTERRUPT)                  // send interrupt message
  s_wait_kmcnt      0
  s_mov_b32         m0, ttmp14                              // restore m0

  // v[0:1] = free
  // v[2:3] = free
  // ttmp[2:3] holds backup of original shader’s v[0:1]
  // ttmp[4:5] holds backup of original shader’s v[2:3]
  // ttmp6 = free
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=somewhere in tma region, EXEC is junk

.restore_vector_before_exit_trap:
  v_writelane_b32   v2, ttmp4, 0
  v_writelane_b32   v3, ttmp5, 0

.lost_sample:
  // v0 contains local_entry, v1 is free
  // v[2:3] is original user-data
  // ttmp[2:3] [local_entry, buf_size]
  // ttmp[4:5] = free
  // ttmp6=buf_to_use (also in ttmp13.b31)
  // ttmp[10:11] holds original shader’s [exec_lo,exec_hi]
  // ttmp[14:15]=tma
  // EXEC=0x1
  // Restore vector registers before exiting

  s_bitcmp1_b32     ttmp13, TTMP13_STOCH_FLAG_BIT           // Check if stochastic sampling
  s_cbranch_scc0    .lost_sample_restore                    // If not, just restore and exit
  s_getreg_b32      ttmp6, HW_REG_SQ_PERF_SNAPSHOT_PC_HI    // Read PC_HI to release lock

.lost_sample_restore:
  v_writelane_b32   v0, ttmp2, 0                            // restore v[0:1] to user data
  v_writelane_b32   v1, ttmp3, 0
  s_mov_b64         exec, ttmp[10:11]                       // restore exec mask

.exit_trap:
  // Restore SQ_WAVE_STATUS.
  s_and_b64         exec, exec, exec                        // Restore STATUS.EXECZ, not writable by s_setreg_b32
  s_and_b64         vcc, vcc, vcc                           // Restore STATUS.VCCZ, not writable by s_setreg_b32
  s_setreg_b32      hwreg(HW_REG_STATE_PRIV, 0, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT), ttmp12
  s_lshr_b32        ttmp12, ttmp12, (SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1)
  s_setreg_b32      hwreg(HW_REG_STATE_PRIV, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1, 32 - SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT - 1), ttmp12

  s_rfe_b64         [ttmp0, ttmp1]

.parked:
  s_trap            0x2
  s_branch          .parked

// Add s_code_end padding so instruction prefetch always has something to read.
.rept (256 - ((. - trap_entry) % 64)) / 4
  s_code_end
.endr


================================================
FILE: runtime/hsa-runtime/core/util/atomic_helpers.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

/*
  Helpers to use native types with C++11 atomic operations.
  Fixes GCC builtin functionality for x86 with respect to WC and non-temporal
  stores.
*/
#ifndef HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_
#define HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_

#include <atomic>

//ALWAYS_CONSERVATIVE will very likely overfence your code.
//For use as a debugging aid only.
#define ALWAYS_CONSERVATIVE 0

#if !ALWAYS_CONSERVATIVE
#if defined(__x86_64__) || defined(_M_X64)
#define X64_ORDER_WC 1
#endif
#if X64_ORDER_WC
#include <xmmintrin.h>
#endif
#endif

namespace rocr {
namespace atomic {

static constexpr int c11ToBuiltInFlags(std::memory_order order)
{
#if ALWAYS_CONSERVATIVE
  return __ATOMIC_RELAXED;
#elif X64_ORDER_WC
  return __ATOMIC_RELAXED;
#else
  return (order == std::memory_order_relaxed) ? __ATOMIC_RELAXED :
    (order == std::memory_order_acquire) ? __ATOMIC_ACQUIRE :
    (order == std::memory_order_release) ? __ATOMIC_RELEASE :
    (order == std::memory_order_seq_cst) ? __ATOMIC_SEQ_CST :
    (order == std::memory_order_consume) ? __ATOMIC_CONSUME :
    (order == std::memory_order_acq_rel) ? __ATOMIC_ACQ_REL :
    __ATOMIC_SEQ_CST;
#endif
}

static __forceinline void PreFence(std::memory_order order) {
#if ALWAYS_CONSERVATIVE
  switch (order) {
    case std::memory_order_release:
    case std::memory_order_seq_cst:
    case std::memory_order_acq_rel:
      __atomic_thread_fence(__ATOMIC_SEQ_CST);
    default:;
  }
#elif X64_ORDER_WC
  switch (order) {
    case std::memory_order_release:
    case std::memory_order_seq_cst:
    case std::memory_order_acq_rel:
      _mm_sfence();
    default:;
  }
#endif
}

static __forceinline void PostFence(std::memory_order order) {
#if ALWAYS_CONSERVATIVE
  switch (order) {
    case std::memory_order_seq_cst:
    case std::memory_order_acq_rel:
    case std::memory_order_acquire:
      __atomic_thread_fence(__ATOMIC_SEQ_CST);
    default:;
  }
#elif X64_ORDER_WC
  switch (order) {
    case std::memory_order_seq_cst:
      return _mm_mfence();
    case std::memory_order_acq_rel:
    case std::memory_order_acquire:
      return _mm_lfence();
    default:;
  }
#endif
}

static __forceinline void Fence(std::memory_order order=std::memory_order_seq_cst) {
#if ALWAYS_CONSERVATIVE
  __atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif X64_ORDER_WC
  switch (order) {
    case std::memory_order_seq_cst:
    case std::memory_order_acq_rel:
      return _mm_mfence();
    case std::memory_order_acquire:
      return _mm_lfence();
    case std::memory_order_release:
      return _mm_sfence();
    default:;
  }
#else
  std::atomic_thread_fence(order);
#endif
}

template <class T>
static __forceinline void BasicCheck(const T* ptr) {
  constexpr bool value = __atomic_always_lock_free(sizeof(T), 0);
  static_assert(value, "Atomic type may not be compatible with peripheral atomics.");
};

template <class T>
static __forceinline void BasicCheck(const volatile T* ptr) {
  constexpr bool value = __atomic_always_lock_free(sizeof(T), 0);
  static_assert(value, "Atomic type may not be compatible with peripheral atomics.");
};

/// @brief: Load value of type T atomically with specified memory order.
/// @param: ptr(Input), a pointer to type T.
/// @param: order(Input), memory order with atomic load, relaxed by default.
/// @return: T, loaded value.
template <class T>
static __forceinline T
    Load(const T* ptr, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  T ret;
  PreFence(order);
  __atomic_load(ptr, &ret, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: function overloading, for more info, see previous one.
/// @param: ptr(Input), a pointer to volatile type T.
/// @param: order(Input), memory order with atomic load, relaxed by default.
/// @return: T, loaded value.
template <class T>
static __forceinline T
    Load(const volatile T* ptr,
         std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  T ret;
  PreFence(order);
  __atomic_load(ptr, &ret, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Store value of type T with specified memory order.
/// @param: ptr(Input), a pointer to instance which will be stored.
/// @param: val(Input), value to be stored.
/// @param: order(Input), memory order with atomic store, relaxed by default.
/// @return: void.
template <class T>
static __forceinline void Store(
    T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  __atomic_store(ptr, &val, c11ToBuiltInFlags(order));
  PostFence(order);
}

/// @brief: Function overloading, for more info, see previous one.
/// @param: ptr(Input), a pointer to volatile instance which will be stored.
/// @param: val(Input), value to be stored.
/// @param: order(Input), memory order with atomic store, relaxed by default.
/// @return: void.
template <class T>
static __forceinline void Store(
    volatile T* ptr, T val,
    std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  __atomic_store(ptr, &val, c11ToBuiltInFlags(order));
  PostFence(order);
}

/// @brief: Compare and swap value atomically with specified memory order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value to be stored if condition is satisfied.
/// @param: expected(Input), value which is expected.
/// @param: order(Input), memory order with atomic operation.
/// @return: T, observed value of type T.
template <class T>
static __forceinline T
    Cas(T* ptr, T val, T expected,
        std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  __atomic_compare_exchange(ptr, &expected, &val, false, c11ToBuiltInFlags(order), __ATOMIC_RELAXED);
  PostFence(order);
  return expected;
}

/// @brief: Function overloading, for more info, see previous one.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: val(Input), value to be stored if condition is satisfied.
/// @param: expected(Input), value which is expected.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, observed value of type T.
template <class T>
static __forceinline T
    Cas(volatile T* ptr, T val, T expected,
        std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  __atomic_compare_exchange(ptr, &expected, &val, false, c11ToBuiltInFlags(order), __ATOMIC_RELAXED);
  PostFence(order);
  return expected;
}

/// @brief: Exchange the value atomically with specified memory order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value to be stored.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, the value prior to the exchange.
template <class T>
static __forceinline T
    Exchange(T* ptr, T val,
             std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  T ret;
  PreFence(order);
  __atomic_exchange(ptr, &val, &ret, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Function overloading, for more info, see previous one.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value to be stored.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, the value prior to the exchange.
template <class T>
static __forceinline T
    Exchange(volatile T* ptr, T val,
             std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  T ret;
  PreFence(order);
  __atomic_exchange(ptr, &val, &ret, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Add value to variable atomically with specified memory order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value to be added.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, the value of the variable prior to the addition.
template <class T>
static __forceinline T
    Add(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_add(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Subtract value from the variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value to be subtraced.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of the variable prior to the subtraction.
template <class T>
static __forceinline T
    Sub(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_sub(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Bit And operation on variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value which is ANDed with variable.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T
    And(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_and(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Bit Or operation on variable atomically with specified memory order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value which is ORed with variable.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T
    Or(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_or(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Bit Xor operation on variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: val(Input), value which is XORed with variable.
/// @order: order(Input), memory order which is relaxed by default.
/// @return: T, valud of variable prior to the opertaion.
template <class T>
static __forceinline T
    Xor(T* ptr, T val, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_xor(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Increase the value of variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T
    Increment(T* ptr, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_add(ptr, 1, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Decrease the value of the variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to variable which is operated on.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T
    Decrement(T* ptr, std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_sub(ptr, 1, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Add value to variable atomically with specified memory order.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: val(Input), value to be added.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, the value of the variable prior to the addition.
template <class T>
static __forceinline T
    Add(volatile T* ptr, T val,
        std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_add(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Subtract value from the variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: val(Input), value to be subtraced.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of the variable prior to the subtraction.
template <class T>
static __forceinline T
    Sub(volatile T* ptr, T val,
        std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_sub(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Bit And operation on variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: val(Input), value which is ANDed with variable.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T
    And(volatile T* ptr, T val,
        std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_and(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Bit Or operation on variable atomically with specified memory order.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: val(Input), value which is ORed with variable.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T Or(volatile T* ptr, T val,
                          std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_or(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Bit Xor operation on variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: val(Input), value which is XORed with variable.
/// @order: order(Input), memory order which is relaxed by default.
/// @return: T, valud of variable prior to the opertaion.
template <class T>
static __forceinline T
    Xor(volatile T* ptr, T val,
        std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_xor(ptr, val, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Increase the value of variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T
    Increment(volatile T* ptr,
              std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_add(ptr, 1, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}

/// @brief: Decrease the value of the variable atomically with specified memory
/// order.
/// @param: ptr(Input), a pointer to volatile variable which is operated on.
/// @param: order(Input), memory order which is relaxed by default.
/// @return: T, value of variable prior to the operation.
template <class T>
static __forceinline T
    Decrement(volatile T* ptr,
              std::memory_order order = std::memory_order_relaxed) {
  BasicCheck<T>(ptr);
  PreFence(order);
  T ret = __atomic_fetch_sub(ptr, 1, c11ToBuiltInFlags(order));
  PostFence(order);
  return ret;
}
}   //  namespace atomic
}   //  namespace rocr

#ifdef X64_ORDER_WC
#undef X64_ORDER_WC
#endif

#ifdef ALWAYS_CONSERVATIVE
#undef ALWAYS_CONSERVATIVE
#endif

#endif  // HSA_RUNTIME_CORE_UTIL_ATOMIC_HELPERS_H_


================================================
FILE: runtime/hsa-runtime/core/util/flag.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2021-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/util/flag.h"
#include "core/util/utils.h"
#include "core/util/os.h"

#include <vector>
#include <map>
#include <string>
#include <algorithm>
#include <locale>

namespace rocr {
FILE* log_file = stderr;
uint8_t log_flags[8];

void log_printf(const char* file, int line, const char* format, ...) {
    va_list ap;
    std::stringstream str_thrd_id;
    str_thrd_id << std::hex << std::this_thread::get_id();
    va_start(ap, format);
    char message[4096];
    vsnprintf(message, sizeof(message), format, ap);
    va_end(ap);
    fprintf(log_file, ":%-25s:%-4d: %010lld us: [pid:%-5d tid:0x%s] [***rocr***] %s\n",
            file, line, os::ReadAccurateClock()/1000ULL, os::GetProcessId(),
            str_thrd_id.str().c_str(), message);
    fflush(log_file);
}

// split at separators
static std::vector<std::string> split(std::string& str, char sep) {
  std::vector<std::string> ret;
  while (!str.empty()) {
    size_t pos = str.find(sep);
    if (pos == std::string::npos) {
      ret.push_back(str);
      return ret;
    }
    ret.push_back(str.substr(0, pos));
    str.erase(0, pos + 1);
  }
  return ret;
};

// Parse id,id-id,... strings into id lists
static std::vector<uint32_t> get_elements(std::string& str, uint32_t maxElement) {
  std::vector<uint32_t> ret;
  MAKE_NAMED_SCOPE_GUARD(error, [&]() { ret.clear(); });

  std::vector<std::string> ranges = split(str, ',');
  for (auto& str : ranges) {
    auto range = split(str, '-');
    // failure, too many -'s.
    if (range.size() > 2) return ret;

    char* end;
    uint32_t index = strtoul(range[0].c_str(), &end, 10);
    // Invalid syntax - id's must be base 10 digits only.
    if (*end != '\0') return ret;
    if (index <= maxElement) ret.push_back(index);

    if (range.size() == 2) {
      uint32_t secondindex = strtoul(range[1].c_str(), &end, 10);
      if (*end != '\0') return ret;         // bad syntax
      if (secondindex < index) return ret;  // inverted range
      secondindex = Min(secondindex, maxElement);
      for (uint32_t i = index + 1; i < secondindex + 1; i++) ret.push_back(i);
    }
  }

  // Confirm no duplicate ids.
  std::sort(ret.begin(), ret.end());
  if (std::adjacent_find(ret.begin(), ret.end()) != ret.end()) return ret;

  // Good parse, keep result.
  error.Dismiss();
  return ret;
};

/*
Parse env var per the following syntax, all whitespace is ignored:

ID = [0-9][0-9]*                         ex. base 10 numbers
ID_list = (ID | ID-ID)[, (ID | ID-ID)]*  ex. 0,2-4,7
GPU_list = ID_list                       ex. 0,2-4,7
CU_list = 0x[0-F]* | ID_list             ex. 0x337F OR 0,2-4,7
CU_Set = GPU_list : CU_list              ex. 0,2-4,7:0-15,32-47 OR 0,2-4,7:0x337F
HSA_CU_MASK =  CU_Set [; CU_Set]*        ex. 0,2-4,7:0-15,32-47; 3-9:0x337F

GPU indexes are taken post ROCR_VISIBLE_DEVICES reordering.
Listed or bit set CUs will be enabled at queue creation on the associated GPU.
All other CUs on the associated GPUs will be disabled.
CU masks of unlisted GPUs are not restricted.

Repeating a GPU or CU ID is a syntax error.
Parsing stops at the first CU_Set that has a syntax error, that set and all
following sets are ignored.
Specifying a mask with no usable CUs (CU_list is 0x0) is a syntax error.
Users should use ROCR_VISIBLE_DEVICES if they want to exclude use of a
particular GPU.
*/
void Flag::parse_masks(std::string& var, uint32_t maxGpu, uint32_t maxCU) {
  if (var.empty()) return;

  // Remove whitespace
  auto end = std::remove_if(var.begin(), var.end(),
                            [](char c) { return std::isspace<char>(c, std::locale::classic()); });
  var.erase(end, var.end());

  // Switch to uppercase
  for (auto& c : var) c = toupper(c);

  // Iterate over cu sets
  auto sets = split(var, ';');
  for (auto& set : sets) {
    auto parts = split(set, ':');
    if (parts.size() != 2) return;

    // temp storage for cu_set parsing.
    std::vector<uint32_t> gpu_index;
    std::vector<uint32_t> mask;

    // parse cu list first, check for bitmask format
    if (parts[1][1] == 'X') {
      // Confirm hex format and strip prefix
      auto& cu = parts[1];
      if (cu[0] != '0') return;
      cu.erase(0, 2);

      // Ensure all valid hex characters
      for (auto& c : cu) {
        if (!isxdigit(c)) return;
      }

      // Convert to uint32_t, lsb first.
      size_t len = cu.length();
      while (len != 0) {
        size_t trim = Min(len, size_t(8));
        len -= trim;
        auto tmp = cu.substr(len, trim);
        auto chunk = stoul(tmp, nullptr, 16);
        mask.push_back(chunk);
      }

      // Trim dwords beyond maxCUs
      uint32_t maxDwords = maxCU / 32 + 1;
      if (maxDwords < mask.size()) mask.resize(maxDwords);

      // Trim leading zeros
      while (!mask.empty() && mask.back() == 0) mask.pop_back();

      // Mask 0x0 is an error.
      if (mask.empty()) return;

    } else {
      // parse cu lists
      auto cu_indices = get_elements(parts[1], maxCU);
      if (cu_indices.empty()) return;
      uint32_t maxdword = cu_indices.back() / 32 + 1;
      mask.resize(maxdword, 0);
      for (auto id : cu_indices) {
        uint32_t index, offset;
        index = id / 32;
        offset = id % 32;
        mask[index] |= 1ul << offset;
      }
    }

    // parse device list
    gpu_index = get_elements(parts[0], maxGpu);
    if (gpu_index.empty()) return;

    // Ensure that no GPU was repeated across cu_sets
    for (auto id : gpu_index) {
      if (cu_mask_.find(id) != cu_mask_.end()) return;
    }

    // Insert into map
    for (auto id : gpu_index) {
      cu_mask_[id] = mask;
    }
  }
}

}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/util/flag.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_FLAG_H_
#define HSA_RUNTIME_CORE_INC_FLAG_H_

#include <stdint.h>

#include <vector>
#include <map>
#include <string>

#include "core/util/os.h"
#include "core/util/utils.h"

namespace rocr {

class Flag {
 public:
  enum SDMA_OVERRIDE { SDMA_DISABLE, SDMA_ENABLE, SDMA_DEFAULT };
  enum SRAMECC_ENABLE { SRAMECC_DISABLED, SRAMECC_ENABLED, SRAMECC_DEFAULT };

  // The values are meaningful and chosen to satisfy the thunk API.
  enum XNACK_REQUEST { XNACK_DISABLE = 0, XNACK_ENABLE = 1, XNACK_UNCHANGED = 2 };
  static_assert(XNACK_DISABLE == 0, "XNACK_REQUEST enum values improperly changed.");
  static_assert(XNACK_ENABLE == 1, "XNACK_REQUEST enum values improperly changed.");

  // Lift limit for 2.10 release RCCL workaround. This limit is not used when asynchronous scratch
  // reclaim is supported
  const size_t DEFAULT_SCRATCH_SINGLE_LIMIT = (140 * (1UL<<20));  // small_limit >> 2;
  const size_t DEFAULT_SCRATCH_SINGLE_LIMIT_ASYNC_PER_XCC = (3 * (1UL<<30));  // 3 GB
  const size_t DEFAULT_PCS_MAX_DEVICE_BUFFER_SIZE = (256 * (1UL<<20)); //256 MB

  Flag() {}

  virtual ~Flag() {}

  void Refresh() {
    std::string var = os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH");
    check_flat_scratch_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE");
    enable_vm_fault_message_ = (var == "0") ? false : true;

    var = os::GetEnvVar("HSA_ENABLE_QUEUE_FAULT_MESSAGE");
    enable_queue_fault_message_ = (var == "0") ? false : true;

    var = os::GetEnvVar("HSA_ENABLE_INTERRUPT");
    enable_interrupt_ = (var == "0") ? false : true;

    var = os::GetEnvVar("HSA_ENABLE_SDMA");
    enable_sdma_ = (var == "0") ? SDMA_DISABLE : ((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);

    var = os::GetEnvVar("HSA_ENABLE_PEER_SDMA");
    enable_peer_sdma_ = (var == "0") ? SDMA_DISABLE : ((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);

    var = os::GetEnvVar("HSA_ENABLE_SDMA_GANG");
    enable_sdma_gang_ = (var == "0") ? SDMA_DISABLE :
                       ((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);
    if (enable_sdma_ == SDMA_DISABLE) enable_sdma_gang_ = SDMA_DISABLE;

    var = os::GetEnvVar("HSA_ENABLE_SDMA_COPY_SIZE_OVERRIDE");
    enable_sdma_copy_size_override_ = (var == "0") ? SDMA_DISABLE :
                                      ((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);

    var = os::GetEnvVar("HSA_ENABLE_SDMA_RECOMMENDED_ENG");
    enable_sdma_recommended_eng_ = (var == "0") ? SDMA_DISABLE :
                                   ((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);

    visible_gpus_ = os::GetEnvVar("ROCR_VISIBLE_DEVICES");
    filter_visible_gpus_ = os::IsEnvVarSet("ROCR_VISIBLE_DEVICES");

    var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND");
    running_valgrind_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_SDMA_WAIT_IDLE");
    sdma_wait_idle_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_MAX_QUEUES");
    max_queues_ = static_cast<uint32_t>(atoi(var.c_str()));

    // Maximum amount of scratch mem that can be used per process per gpu
    var = os::GetEnvVar("HSA_SCRATCH_MEM");
    scratch_mem_size_ = atoi(var.c_str());

    // Scratch memory sizes > HSA_SCRATCH_SINGLE_LIMIT will trigger a use-once scheme
    // We also reserve HSA_SCRATCH_SINGLE_LIMIT per process per gpu to guarrantee we
    // have sufficient memory to for scratch in case user tried to allocate all device
    // memory
    if (os::IsEnvVarSet("HSA_SCRATCH_SINGLE_LIMIT")) {
      var = os::GetEnvVar("HSA_SCRATCH_SINGLE_LIMIT");
      char* end;
      scratch_single_limit_ = strtoul(var.c_str(), &end, 10);
    } else {
      scratch_single_limit_ = DEFAULT_SCRATCH_SINGLE_LIMIT;
    }

    // On GPUs that support asynchronous scratch reclaim
    // Scratch memory sizes > HSA_SCRATCH_SINGLE_LIMIT_ASYNC will trigger a use-once scheme
    // Note: This only sets the initial value for the threshold. If
    // hsa_amd_agent_set_async_scratch_limit is called after initialization, the threshold
    // will be updated.
    if (os::IsEnvVarSet("HSA_SCRATCH_SINGLE_LIMIT_ASYNC")) {
      var = os::GetEnvVar("HSA_SCRATCH_SINGLE_LIMIT_ASYNC");
      char* end;
      scratch_single_limit_async_ = strtoul(var.c_str(), &end, 10);
    } else {
      scratch_single_limit_async_ = 0;  // DEFAULT_SCRATCH_SINGLE_LIMIT_ASYNC_PER_XCC;
    }

    // On GPUs that support asynchronous scratch reclaim this can be used to disable this feature.
    // Disabling asynchronous scratch reclaim also disables use of alternate scratch
    // HSA_ENABLE_SCRATCH_ALT
    var = os::GetEnvVar("HSA_ENABLE_SCRATCH_ASYNC_RECLAIM");
    enable_scratch_async_reclaim_ = (var == "0") ? false : true;

    var = os::GetEnvVar("HSA_ENABLE_SCRATCH_ALT");
    // Temporary: Completely disable alternate scratch because we need to update
    // the debugger so that it can tell whether a dispatch is using alternate scratch
    // instead of main scratch
    // enable_scratch_alt_ = (var == "0") || !enable_scratch_async_reclaim_ ? false : true;
    enable_scratch_alt_ = false;

    tools_lib_names_ = os::GetEnvVar("HSA_TOOLS_LIB");

    var = os::GetEnvVar("HSA_TOOLS_REPORT_LOAD_FAILURE");

    ifdebug {
      report_tool_load_failures_ = (var == "1") ? true : false;
    } else {
      report_tool_load_failures_ = (var == "0") ? false : true;
    }

    var = os::GetEnvVar("HSA_TOOLS_DISABLE_REGISTER");
    disable_tool_register_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_TOOLS_REPORT_REGISTER_FAILURE");
    report_tool_register_failures_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_DISABLE_FRAGMENT_ALLOCATOR");
    disable_fragment_alloc_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_ENABLE_SDMA_HDP_FLUSH");
    enable_sdma_hdp_flush_ = (var == "0") ? false : true;

    var = os::GetEnvVar("HSA_REV_COPY_DIR");
    rev_copy_dir_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_FORCE_FINE_GRAIN_PCIE");
    fine_grain_pcie_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_NO_SCRATCH_RECLAIM");
    no_scratch_reclaim_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_NO_SCRATCH_THREAD_LIMITER");
    no_scratch_thread_limit_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_DISABLE_IMAGE");
    disable_image_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_DISABLE_PC_SAMPLING");
    disable_pc_sampling_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_LOADER_ENABLE_MMAP_URI");
    loader_enable_mmap_uri_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_FORCE_SDMA_SIZE");
    force_sdma_size_ = var.empty() ? 1024 * 1024 : atoi(var.c_str());

    var = os::GetEnvVar("HSA_IGNORE_SRAMECC_MISREPORT");
    check_sramecc_validity_ = (var == "1") ? false : true;

    // Legal values are zero "0" or one "1". Any other value will
    // be interpreted as not defining the env variable.
    var = os::GetEnvVar("HSA_XNACK");
    xnack_ = (var == "0") ? XNACK_DISABLE : ((var == "1") ? XNACK_ENABLE : XNACK_UNCHANGED);

    var = os::GetEnvVar("HSA_ENABLE_DEBUG");
    debug_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_CU_MASK_SKIP_INIT");
    cu_mask_skip_init_ = (var == "1") ? true : false;

    // Temporary opt-in for corrected HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT behavior.
    // Will become opt-out and possibly removed in future releases.
    var = os::GetEnvVar("HSA_COOP_CU_COUNT");
    coop_cu_count_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_DISCOVER_COPY_AGENTS");
    discover_copy_agents_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_SVM_PROFILE");
    svm_profile_ = var;

    var = os::GetEnvVar("HSA_ENABLE_SRAMECC");
    sramecc_enable_ =
        (var == "0") ? SRAMECC_DISABLED : ((var == "1") ? SRAMECC_ENABLED : SRAMECC_DEFAULT);

    var = os::GetEnvVar("HSA_IMAGE_PRINT_SRD");
    image_print_srd_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_ENABLE_MWAITX");
    enable_mwaitx_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_ENABLE_IPC_MODE_LEGACY");
    enable_ipc_mode_legacy_ = (var == "0") ? false : true; // Legacy mode by default
    if (os::IsEnvVarSet("HSA_PCS_MAX_DEVICE_BUFFER_SIZE")) {
      var = os::GetEnvVar("HSA_PCS_MAX_DEVICE_BUFFER_SIZE");
      char* end;
      pc_sampling_max_device_buffer_size_ = strtoul(var.c_str(), &end, 10);
    } else {
      pc_sampling_max_device_buffer_size_ = DEFAULT_PCS_MAX_DEVICE_BUFFER_SIZE;
    }

    // Temporary environment variable to disable CPU affinity override
    // Will either rename to HSA_OVERRIDE_CPU_AFFINITY later or remove completely.
    var = os::GetEnvVar("HSA_OVERRIDE_CPU_AFFINITY_DEBUG");
    override_cpu_affinity_ = (var == "0") ? false : true;

    var = os::GetEnvVar("HSA_ALLOCATE_QUEUE_DEV_MEM");
    dev_mem_queue_buf_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_WAIT_ANY_DEBUG");
    wait_any_ = (var == "1") ? true : false;

    /* hsa_signal_wait_relaxed abort timeout  */
    var = os::GetEnvVar("HSA_SIGNAL_WAIT_ABORT_TIMEOUT");
    signal_abort_timeout_ = var.empty() ? 0 : atoi(var.c_str());

    /* Valid inputs are 0-99, HIGH, MAX */
    var = os::GetEnvVar("HSA_ASYNCEVENTS_THREAD_PRIORITY");
    async_events_thread_priority_ = os::OS_THREAD_PRIORITY_DEFAULT;
    if (var == "MAX") {
      async_events_thread_priority_ = os::OS_THREAD_PRIORITY_MAX;
    } else if (var == "HIGH") {
      async_events_thread_priority_ = os::OS_THREAD_PRIORITY_HIGH;
    } else if (var != "") {
      char* end;
      int input = strtol(var.c_str(), &end, 10);
      if (input >= 0 && input <= 99)
        async_events_thread_priority_ = input;
      else
        fprintf(stderr, "Failed to parse HSA_ASYNCEVENTS_THREAD_PRIORITY");
    }

    var = os::GetEnvVar("HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG");
    enable_3d_swizzle_ = (var == "1") ? true : false;

    // This allows convient usage in scripting for enabling dtif.
    // IE the user should set HSA_DTIF_ENABLED = 1 to enable DTIF.
    // HSA_DTIF_ENABLED = 0 will disable DTIF backend.
    var = os::GetEnvVar("HSA_ENABLE_DTIF");
    enable_dtif_ = (var == "1") ? true : false;

    var = os::GetEnvVar("HSA_CO_DMACOPY_SIZE");
    co_dmacopy_size_ = var.empty() ? 1024*1024 : atoi(var.c_str());
  }

  void parse_masks(uint32_t maxGpu, uint32_t maxCU) {
    std::string var = os::GetEnvVar("HSA_CU_MASK");
    parse_masks(var, maxGpu, maxCU);
  }

  bool wait_any() const { return wait_any_; }

  bool check_flat_scratch() const { return check_flat_scratch_; }

  bool enable_vm_fault_message() const { return enable_vm_fault_message_; }

  bool enable_queue_fault_message() const { return enable_queue_fault_message_; }

  bool enable_interrupt() const { return enable_interrupt_; }

  bool enable_sdma_hdp_flush() const { return enable_sdma_hdp_flush_; }

  bool running_valgrind() const { return running_valgrind_; }

  bool sdma_wait_idle() const { return sdma_wait_idle_; }

  bool report_tool_load_failures() const { return report_tool_load_failures_; }

  bool report_tool_register_failures() const { return report_tool_register_failures_; }

  bool disable_tool_register() const { return disable_tool_register_; }

  bool disable_fragment_alloc() const { return disable_fragment_alloc_; }

  bool rev_copy_dir() const { return rev_copy_dir_; }

  bool fine_grain_pcie() const { return fine_grain_pcie_; }

  bool no_scratch_reclaim() const { return no_scratch_reclaim_; }

  bool no_scratch_thread_limiter() const { return no_scratch_thread_limit_; }

  SDMA_OVERRIDE enable_sdma() const { return enable_sdma_; }

  SDMA_OVERRIDE enable_peer_sdma() const { return enable_peer_sdma_; }

  SDMA_OVERRIDE enable_sdma_gang() const { return enable_sdma_gang_; }

  SDMA_OVERRIDE enable_sdma_copy_size_override() const { return enable_sdma_copy_size_override_; }

  SDMA_OVERRIDE enable_sdma_recommended_eng() const { return enable_sdma_recommended_eng_; }

  std::string visible_gpus() const { return visible_gpus_; }

  bool filter_visible_gpus() const { return filter_visible_gpus_; }

  uint32_t max_queues() const { return max_queues_; }

  size_t scratch_mem_size() const { return scratch_mem_size_; }

  size_t scratch_single_limit() const { return scratch_single_limit_; }

  bool enable_scratch_async_reclaim() const { return enable_scratch_async_reclaim_; }

  bool enable_scratch_alt() const { return enable_scratch_alt_; }

  size_t scratch_single_limit_async() const { return scratch_single_limit_async_; }

  std::string tools_lib_names() const { return tools_lib_names_; }

  bool disable_image() const { return disable_image_; }

  bool disable_pc_sampling() const { return disable_pc_sampling_; }

  bool loader_enable_mmap_uri() const { return loader_enable_mmap_uri_; }

  size_t force_sdma_size() const { return force_sdma_size_; }

  bool check_sramecc_validity() const { return check_sramecc_validity_; }

  bool override_cpu_affinity() const { return override_cpu_affinity_; }

  bool image_print_srd() const { return image_print_srd_; }

  bool check_mwaitx(bool mwaitx_supported) {
    if (enable_mwaitx_ && !mwaitx_supported) enable_mwaitx_ = false;

    return enable_mwaitx_;
  }

  XNACK_REQUEST xnack() const { return xnack_; }

  bool debug() const { return debug_; }

  const std::vector<uint32_t>& cu_mask(uint32_t gpu_index) const {
    static const std::vector<uint32_t> empty;
    auto it = cu_mask_.find(gpu_index);
    if (it == cu_mask_.end()) return empty;
    return it->second;
  }

  bool cu_mask_skip_init() const { return cu_mask_skip_init_; }

  bool coop_cu_count() const { return coop_cu_count_; }

  bool discover_copy_agents() const { return discover_copy_agents_; }

  const std::string& svm_profile() const { return svm_profile_; }

  SRAMECC_ENABLE sramecc_enable() const { return sramecc_enable_; }

  bool enable_ipc_mode_legacy() const { return enable_ipc_mode_legacy_; }

  size_t pc_sampling_max_device_buffer_size() const { return pc_sampling_max_device_buffer_size_; }

  size_t co_dmacopy_size() const { return co_dmacopy_size_; }

  bool dev_mem_queue_buf() const { return dev_mem_queue_buf_; }

  uint32_t signal_abort_timeout() const { return signal_abort_timeout_; }

  int async_events_thread_priority() const { return async_events_thread_priority_; }

  bool enable_3d_swizzle() const { return enable_3d_swizzle_; }

  bool enable_dtif() const { return enable_dtif_; }
 private:
  bool check_flat_scratch_;
  bool enable_vm_fault_message_;
  bool enable_interrupt_;
  bool enable_sdma_hdp_flush_;
  bool running_valgrind_;
  bool sdma_wait_idle_;
  bool enable_queue_fault_message_;
  bool report_tool_load_failures_;
  bool report_tool_register_failures_ = false;
  bool disable_tool_register_ = false;
  bool disable_fragment_alloc_;
  bool rev_copy_dir_;
  bool fine_grain_pcie_;
  bool no_scratch_reclaim_;
  bool no_scratch_thread_limit_;
  bool disable_image_;
  bool disable_pc_sampling_;
  bool loader_enable_mmap_uri_;
  bool check_sramecc_validity_;
  bool debug_;
  bool cu_mask_skip_init_;
  bool coop_cu_count_;
  bool discover_copy_agents_;
  bool override_cpu_affinity_;
  bool image_print_srd_;
  bool enable_mwaitx_;
  bool enable_ipc_mode_legacy_;
  bool wait_any_;
  bool dev_mem_queue_buf_;
  uint32_t signal_abort_timeout_;
  int  async_events_thread_priority_;
  bool enable_3d_swizzle_ = false;
  bool enable_dtif_;

  SDMA_OVERRIDE enable_sdma_;
  SDMA_OVERRIDE enable_peer_sdma_;
  SDMA_OVERRIDE enable_sdma_gang_;
  SDMA_OVERRIDE enable_sdma_copy_size_override_;
  SDMA_OVERRIDE enable_sdma_recommended_eng_;

  bool filter_visible_gpus_;
  std::string visible_gpus_;

  uint32_t max_queues_;

  size_t scratch_mem_size_;
  size_t scratch_single_limit_;
  size_t scratch_single_limit_async_;
  bool enable_scratch_async_reclaim_;
  bool enable_scratch_alt_;

  std::string tools_lib_names_;
  std::string svm_profile_;

  size_t force_sdma_size_;

  // Indicates user preference for Xnack state.
  XNACK_REQUEST xnack_;

  SRAMECC_ENABLE sramecc_enable_;

  size_t pc_sampling_max_device_buffer_size_;

  size_t co_dmacopy_size_;

  // Map GPU index post RVD to its default cu mask.
  std::map<uint32_t, std::vector<uint32_t>> cu_mask_;

  void parse_masks(std::string& args, uint32_t maxGpu, uint32_t maxCU);

  DISALLOW_COPY_AND_ASSIGN(Flag);
};

}  // namespace rocr

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/core/util/lazy_ptr.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_UTIL_LAZY_PTR_H_
#define HSA_RUNTIME_CORE_UTIL_LAZY_PTR_H_

#include <memory>
#include <utility>
#include <functional>

#include "core/util/locks.h"
#include "core/util/utils.h"

namespace rocr {

/*
 * Wrapper for a std::unique_ptr that initializes its object at first use.
 */
template <typename T> class lazy_ptr {
 public:
  lazy_ptr() {}

  explicit lazy_ptr(std::function<T*()> Constructor) { reset(Constructor); }

  lazy_ptr(lazy_ptr&& rhs) {
    obj = std::move(rhs.obj);
    func = std::move(rhs.func);
  }

  lazy_ptr& operator=(lazy_ptr&& rhs) {
    obj = std::move(rhs.obj);
    func = std::move(rhs.func);
  }

  lazy_ptr(lazy_ptr&) = delete;
  lazy_ptr& operator=(lazy_ptr&) = delete;

  void reset(std::function<T*()> Constructor = nullptr) {
    obj.reset();
    func = std::move(Constructor);
  }

  void reset(T* ptr) {
    obj.reset(ptr);
    func = nullptr;
  }

  bool operator==(T* rhs) const { return obj.get() == rhs; }
  bool operator!=(T* rhs) const { return obj.get() != rhs; }

  const std::unique_ptr<T>& operator->() const {
    make(true);
    assert(obj != nullptr && "Null dereference through lazy_ptr.");
    return obj;
  }

  std::unique_ptr<T>& operator*() {
    make(true);
    return obj;
  }

  const std::unique_ptr<T>& operator*() const {
    make(true);
    return obj;
  }

  /*
   * Ensures that the object is created or is being created.
   * This is useful when early construction of the object is required.
   */
  void touch() const { make(false); }

  // Tells if the lazy object has been constructed or not.
  // Construction may fail silently (return nullptr).
  bool created() const {
    std::atomic_thread_fence(std::memory_order_acquire);
    return func == nullptr;
  }

  // Tells if the lazy object exists or not.
  bool empty() const {
    std::atomic_thread_fence(std::memory_order_acquire);
    return obj == nullptr;
  }

 private:
  mutable std::unique_ptr<T> obj;
  mutable std::function<T*(void)> func;
  mutable KernelMutex lock;

  // Separated from make to improve inlining.
  void make_body(bool block) const {
    if (block) {
      lock.Acquire();
    } else if (!lock.Try()) {
      return;
    }
    MAKE_SCOPE_GUARD([&]() { lock.Release(); });
    if (func == nullptr) return;
    T* ptr = func();
    obj.reset(ptr);
    std::atomic_thread_fence(std::memory_order_release);
    func = nullptr;
  }

  __forceinline void make(bool block) const {
    if (!created()) {
      make_body(block);
    }
  }

};

}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_UTIL_LAZY_PTR_H_


================================================
FILE: runtime/hsa-runtime/core/util/lnx/os_linux.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifdef __linux__
#include "core/util/os.h"
#include "core/util/utils.h"

#include <link.h>
#include <dlfcn.h>
#include <pthread.h>
#include <limits.h>
#include <sched.h>
#include <sys/sysinfo.h>
#include <sys/time.h>
#include <sys/utsname.h>
#include <unistd.h>
#include <errno.h>
#include <cstring>
#include <atomic>
#include <memory>
#include <string>
#include <utility>
#include <semaphore.h>
#include "core/inc/runtime.h"
#if defined(__i386__) || defined(__x86_64__)
#include <cpuid.h>
#endif

#ifdef __GLIBC__
#define ABS_ADDR(base, ptr) (ptr)
#else
#define ABS_ADDR(base, ptr) ((base) + (ptr))
#endif

namespace rocr {
namespace os {

struct ThreadArgs {
  void* entry_args;
  ThreadEntry entry_function;
};

void* __stdcall ThreadTrampoline(void* arg) {
  ThreadArgs* ar = (ThreadArgs*)arg;
  ThreadEntry CallMe = ar->entry_function;
  void* Data = ar->entry_args;
  CallMe(Data);
  return nullptr;
}

// Thread container allows multiple waits and separate close (destroy).
class os_thread {
 public:
  explicit os_thread(ThreadEntry function,
                      void* threadArgument,
                      uint stackSize,
                      int priority)
      : thread(0), lock(nullptr), state(RUNNING) {
    int err;
    lock = CreateMutex();
    if (lock == nullptr) return;

    args.entry_args = threadArgument;
    args.entry_function = function;

    pthread_attr_t attrib;
    err = pthread_attr_init(&attrib);
    if (err != 0) {
      fprintf(stderr, "pthread_attr_init failed: %s\n", strerror(err));
      return;
    }

    MAKE_SCOPE_GUARD([&]() {
      if (pthread_attr_destroy(&attrib))
        fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err));
    });

    if (stackSize != 0) {
      stackSize = Max(uint(PTHREAD_STACK_MIN), stackSize);
      stackSize = AlignUp(stackSize, 4096);
      err = pthread_attr_setstacksize(&attrib, stackSize);
      if (err != 0) {
        fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err));
        return;
      }
    }

    int cores = 0;
    cpu_set_t* cpuset = nullptr;

    if (core::Runtime::runtime_singleton_->flag().override_cpu_affinity()) {
      cores = get_nprocs_conf();
      cpuset = CPU_ALLOC(cores);
      if (cpuset == nullptr) {
        fprintf(stderr, "CPU_ALLOC failed: %s\n", strerror(errno));
        return;
      }
      CPU_ZERO_S(CPU_ALLOC_SIZE(cores), cpuset);
      for (int i = 0; i < cores; i++) {
        CPU_SET_S(i, CPU_ALLOC_SIZE(cores), cpuset);
      }
#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
      err = pthread_attr_setaffinity_np(&attrib, CPU_ALLOC_SIZE(cores), cpuset);
      CPU_FREE(cpuset);
      if (err != 0) {
        fprintf(stderr, "pthread_setaffinity_np failed: %s\n", strerror(err));
        return;
      }
#endif
    }

    do {
      err = pthread_create(&thread, &attrib, ThreadTrampoline, &args);
      if (!err) break;

      if (err != EINVAL || stackSize == 0) {
        fprintf(stderr, "pthread_create failed %d (%s)\n", errno, strerror(errno));
        thread = 0;
        return;
      }

      // Probably a stack size error since system limits can be different from PTHREAD_STACK_MIN
      // Attempt to grow the stack within reason.
      stackSize *= 2;
      if (pthread_attr_setstacksize(&attrib, stackSize)) {
        fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err));
        thread = 0;
        return;
      }
    } while (stackSize < 20 * 1024 * 1024);

#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
    if (cores && cpuset) {
      err = pthread_setaffinity_np(thread, CPU_ALLOC_SIZE(cores), cpuset);
      CPU_FREE(cpuset);
      if (err != 0) {
        fprintf(stderr, "pthread_setaffinity_np failed: %s\n", strerror(err));
        thread = 0;
        return;
      }
    }
#endif
    struct sched_param param = {};
    if (priority != OS_THREAD_PRIORITY_DEFAULT) {
      int set_priority;
      int max_priority = sched_get_priority_max(SCHED_FIFO);

      if (priority == OS_THREAD_PRIORITY_MAX)
        set_priority = max_priority;
      else if (priority == OS_THREAD_PRIORITY_HIGH)
        set_priority = max_priority - 1;
      else if (priority > max_priority)
        set_priority = max_priority;
      else
        set_priority = priority;

      param.sched_priority = set_priority;
      if (pthread_setschedparam(thread, SCHED_FIFO, &param)) {
        fprintf(stderr, "pthread_setschedparam failed\n");
        return;
      }

      int policy = 0;
      if (pthread_getschedparam(thread, &policy, &param))
        fprintf(stderr, "pthread_getschedparam failed: %s\n", strerror(err));

      if (policy != SCHED_FIFO || param.sched_priority != set_priority)
        fprintf(stderr, "Failed to adjust thread priority (policy:%s requested:%d current:%d)\n",
                          policy == SCHED_FIFO ? "FIFO" :
                          policy == SCHED_OTHER ? "OTHER" :
                          policy == SCHED_RR ? "RR" : "Unknown",
                          set_priority, param.sched_priority);
    }
  }

  os_thread(os_thread&& rhs) {
    thread = rhs.thread;
    args = rhs.args;
    lock = rhs.lock;
    state = int(rhs.state);
    rhs.thread = 0;
    rhs.lock = nullptr;
  }

  os_thread(os_thread&) = delete;

  ~os_thread() {
    if (lock != nullptr) DestroyMutex(lock);
    if ((state == RUNNING) && (thread != 0)) {
      int err = pthread_detach(thread);
      if (err != 0) fprintf(stderr, "pthread_detach failed: %s\n", strerror(err));
    }
  }

  bool Valid() { return (lock != nullptr) && (thread != 0); }

  bool Wait() {
    if (state == FINISHED) return true;
    AcquireMutex(lock);
    if (state == FINISHED) {
      ReleaseMutex(lock);
      return true;
    }
    int err = pthread_join(thread, NULL);
    bool success = (err == 0);
    if (success) state = FINISHED;
    ReleaseMutex(lock);
    return success;
  }

 private:
  pthread_t thread;
  struct ThreadArgs args;
  Mutex lock;
  std::atomic<int> state;
  enum { FINISHED = 0, RUNNING = 1 };
};

static_assert(sizeof(LibHandle) == sizeof(void*), "OS abstraction size mismatch");
static_assert(sizeof(Semaphore) == sizeof(sem_t*), "OS abstraction size mismatch");
static_assert(sizeof(Mutex) == sizeof(pthread_mutex_t*), "OS abstraction size mismatch");
static_assert(sizeof(SharedMutex) == sizeof(pthread_rwlock_t*), "OS abstraction size mismatch");
static_assert(sizeof(Thread) == sizeof(os_thread*), "OS abstraction size mismatch");

LibHandle LoadLib(std::string filename) {
  void* ret = dlopen(filename.c_str(), RTLD_LAZY);
  if (ret == nullptr) debug_print("LoadLib(%s) failed: %s\n", filename.c_str(), dlerror());
  return *(LibHandle*)&ret;
}

void* GetExportAddress(LibHandle lib, std::string export_name) {
  void* ret = dlsym(*(void**)&lib, export_name.c_str());

  // dlsym searches the given library and all the library's load dependencies.
  // Remaining code limits symbol lookup to only the library handle given.
  // This lookup pattern matches Windows.
  if (ret == NULL) return ret;

  link_map* map;
  int err = dlinfo(*(void**)&lib, RTLD_DI_LINKMAP, &map);
  if (err == -1) {
    fprintf(stderr, "dlinfo failed: %s\n", dlerror());
    return nullptr;
  }

  Dl_info info;
  err = dladdr(ret, &info);
  if (err == 0) {
    fprintf(stderr, "dladdr failed.\n");
    return nullptr;
  }

  if (strcmp(info.dli_fname, map->l_name) == 0) return ret;

  return NULL;
}

void CloseLib(LibHandle lib) { dlclose(*(void**)&lib); }

/*
 * @brief Look for a symbol called "HSA_AMD_TOOL_PRIORITY" across all loaded
 * shared libraries, and if found, store the name of the library
 *
 * @param[in]: info A dl_phdr_info struct pointer, which contains information
 * about library's load address, header, and name.
 *
 * @param[in]: size integer size of dl_phdr_info struct
 *
 * @param[out]: data copy of the data argument to dl_phdr_iterate call
 *
 * @retval:: Return 0 on Success. If callback returns a non-zero value,
 * dl_iterate_phdr() will stop processing, even if there are unprocessed
 * shared objects.
 */

static int callback(struct dl_phdr_info* info, size_t size, void* data) {
  std::vector<std::string>* loadedToolsLib = (std::vector<std::string>*)data;
  assert(loadedToolsLib != nullptr);
  /*
   * Check if lib name is not empty and its not a "vdso.so" lib,
   * The vDSO is a special shared object file that is built into the Linux kernel.
   * It is not a regular shared library and thus does not have all the properties
   * of regular shared libraries. The way the vDSO is loaded and organized in memory
   * is different from regular shared libraries and it's not guaranteed that it
   * will have a specific segment or section. Hence its skipped.
   */

  if ((info) && (info->dlpi_name[0] != '\0')) {
    if (std::string(info->dlpi_name).find("vdso.so") != std::string::npos) return 0;

    /*
     * Iterate through the program headers of the loaded lib and check for PT_DYNAMIC program
     * header. If the PT_DYNAMIC program header is found, use dlpi_addr and dlpi_phdr members
     * of dl_phdr_info struct to get the address of the dynamic section of the loaded
     * library in memory
     */

    for (int i = 0; i < info->dlpi_phnum; i++) {
      if (info->dlpi_phdr[i].p_type == PT_DYNAMIC) {
        Elf64_Dyn* dyn_section = (Elf64_Dyn*)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);

        char* strings = nullptr;
        Elf64_Xword limit = 0;

        /*
         * The dynamic section is searched for DT_STRTAB (address of string table),
         * and DT_STRSZ (size of string table)
         * DT_NULL - Marks the end of the _DYNAMIC array
         */

        for (int j = 0;; j++) {
          if (dyn_section[j].d_tag == DT_NULL) break;

          if (dyn_section[j].d_tag == DT_STRTAB) strings = (char*)ABS_ADDR(info->dlpi_addr, dyn_section[j].d_un.d_ptr);

          if (dyn_section[j].d_tag == DT_STRSZ) limit = dyn_section[j].d_un.d_val;
        }

        if (strings == nullptr) debug_print("String table not found");

        /*
         * Hacky lookup, if string and symbol tables are found,
         * iterate through the strings in string table and check if
         * any string matches "HSA_AMD_TOOL_PRIORITY".
         * If yes, then add the name of the library to the vector of
         * lib names
         */
        if (strings != nullptr) {
          char* end = strings + limit;
          while (strings < end) {
            if (strcmp(strings, "HSA_AMD_TOOL_PRIORITY") == 0) {
              loadedToolsLib->push_back(info->dlpi_name);
              return 0;
            }
            strings += (strlen(strings) + 1);
          }
        }
      }
    }
  }
  return 0;
}

std::vector<LibHandle> GetLoadedToolsLib() {
  std::vector<LibHandle> ret;
  std::vector<std::string> names;

  /* Iterate through all of the loaded shared libraries in the process */
  dl_iterate_phdr(callback, &names);

  if (!names.empty()) {
    for (auto& name : names) ret.push_back(LoadLib(name));
  }

  return ret;
}

std::string GetLibraryName(LibHandle lib) {
  link_map *map;
  if(dlinfo(lib, RTLD_DI_LINKMAP, &map)!=0)
    return "";
  return map->l_name;
}

Semaphore CreateSemaphore() {
  sem_t *sem = new sem_t;
  sem_init(sem, 0, 0);
  return *(Semaphore*)&sem;
}

bool WaitSemaphore(Semaphore sem) {
  while(sem_wait(*(sem_t**)&sem))
    if (errno != EINTR) return false;

  return true;
}

void PostSemaphore(Semaphore sem) {
  int waitval = 1;
  if (sem_getvalue(*(sem_t**)&sem, &waitval))
    assert(false && "Failed to get semaphore waiters");

  /* sem_getvalue return <= 0 when there are threads blocked on sem_wait */
  if (waitval > 0)
    return;

  if (sem_post(*(sem_t**)&sem))
    assert(false && "Failed to post semaphore");
}

void DestroySemaphore(Semaphore sem) {
  sem_destroy(*(sem_t**)&sem);
  delete *(sem_t**)&sem;
}

Mutex CreateMutex() {
  pthread_mutex_t* mutex = new pthread_mutex_t;
  pthread_mutex_init(mutex, NULL);
  return *(Mutex*)&mutex;
}

bool TryAcquireMutex(Mutex lock) {
  return pthread_mutex_trylock(*(pthread_mutex_t**)&lock) == 0;
}

bool AcquireMutex(Mutex lock) {
  return pthread_mutex_lock(*(pthread_mutex_t**)&lock) == 0;
}

void ReleaseMutex(Mutex lock) {
  pthread_mutex_unlock(*(pthread_mutex_t**)&lock);
}

void DestroyMutex(Mutex lock) {
  pthread_mutex_destroy(*(pthread_mutex_t**)&lock);
  delete *(pthread_mutex_t**)&lock;
}

void Sleep(int delay_in_millisec) { usleep(delay_in_millisec * 1000); }

void uSleep(int delayInUs) { usleep(delayInUs); }

void YieldThread() { sched_yield(); }

Thread CreateThread(ThreadEntry function, void* threadArgument, uint stackSize, int priority) {
  os_thread* result = new os_thread(function, threadArgument, stackSize, priority);
  if (!result->Valid()) {
    delete result;
    return nullptr;
  }

  return reinterpret_cast<Thread>(result);
}

void CloseThread(Thread thread) { delete reinterpret_cast<os_thread*>(thread); }

bool WaitForThread(Thread thread) { return reinterpret_cast<os_thread*>(thread)->Wait(); }

bool WaitForAllThreads(Thread* threads, uint threadCount) {
  for (uint i = 0; i < threadCount; i++) WaitForThread(threads[i]);
  return true;
}

bool IsEnvVarSet(std::string env_var_name) {
  char* buff = NULL;
  buff = getenv(env_var_name.c_str());
  return (buff != NULL);
}

void SetEnvVar(std::string env_var_name, std::string env_var_value) {
  setenv(env_var_name.c_str(), env_var_value.c_str(), 1);
}

int GetProcessId() {
  return ::getpid();
}

std::string GetEnvVar(std::string env_var_name) {
  char* buff;
  buff = getenv(env_var_name.c_str());
  std::string ret;
  if (buff) {
    ret = buff;
  }
  return ret;
}

size_t GetUserModeVirtualMemorySize() {
#ifdef _LP64
  // https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt :
  // user space is 0000000000000000 - 00007fffffffffff (=47 bits)
  return (size_t)(0x800000000000);
#else
  return (size_t)(0xffffffff);  // ~4GB
#endif
}

size_t GetUsablePhysicalHostMemorySize() {
  struct sysinfo info = {0};
  if (sysinfo(&info) != 0) {
    return 0;
  }

  const size_t physical_size =
      static_cast<size_t>(info.totalram * info.mem_unit);
  return std::min(GetUserModeVirtualMemorySize(), physical_size);
}

uintptr_t GetUserModeVirtualMemoryBase() { return (uintptr_t)0; }

// Os event implementation
typedef struct EventDescriptor_ {
  pthread_cond_t event;
  pthread_mutex_t mutex;
  bool state;
  bool auto_reset;
} EventDescriptor;

EventHandle CreateOsEvent(bool auto_reset, bool init_state) {
  EventDescriptor* eventDescrp;
  eventDescrp = (EventDescriptor*)malloc(sizeof(EventDescriptor));

  pthread_mutex_init(&eventDescrp->mutex, NULL);
  pthread_cond_init(&eventDescrp->event, NULL);
  eventDescrp->auto_reset = auto_reset;
  eventDescrp->state = init_state;

  EventHandle handle = reinterpret_cast<EventHandle>(eventDescrp);

  return handle;
}

int DestroyOsEvent(EventHandle event) {
  if (event == NULL) {
    return -1;
  }

  EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
  int ret_code = pthread_cond_destroy(&eventDescrp->event);
  ret_code |= pthread_mutex_destroy(&eventDescrp->mutex);
  free(eventDescrp);
  return ret_code;
}

int WaitForOsEvent(EventHandle event, unsigned int milli_seconds) {
  if (event == NULL) {
    return -1;
  }

  EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
  // Event wait time is 0 and state is non-signaled, return directly
  if (milli_seconds == 0) {
    int tmp_ret = pthread_mutex_trylock(&eventDescrp->mutex);
    if (tmp_ret == EBUSY) {
      // Timeout
      return 1;
    }
  } else {
      pthread_mutex_lock(&eventDescrp->mutex);
  }

  int ret_code = 0;
  
  if (!eventDescrp->state) {
    if (milli_seconds == 0) {
      ret_code = 1;
    } else {
      struct timespec ts;
      struct timeval tp;

      ret_code = gettimeofday(&tp, NULL);
      ts.tv_sec = tp.tv_sec;
      ts.tv_nsec = tp.tv_usec * 1000;

      unsigned int sec = milli_seconds / 1000;
      unsigned int mSec = milli_seconds % 1000;

      ts.tv_sec += sec;
      ts.tv_nsec += mSec * 1000000;

      // More then one second, add 1 sec to the tv_sec elem
      if (ts.tv_nsec > 1000000000) {
        ts.tv_sec += 1;
        ts.tv_nsec = ts.tv_nsec - 1000000000;
      }

      ret_code =
          pthread_cond_timedwait(&eventDescrp->event, &eventDescrp->mutex, &ts);
      // Time out
      if (ret_code == 110) {
        ret_code = 0x14003;  // 1 means time out in HSA
      }

      if (ret_code == 0 && eventDescrp->auto_reset) {
        eventDescrp->state = false;
      }
    }
  } else if (eventDescrp->auto_reset) {
    eventDescrp->state = false;
  }
  pthread_mutex_unlock(&eventDescrp->mutex);

  return ret_code;
}

int SetOsEvent(EventHandle event) {
  if (event == NULL) {
    return -1;
  }

  EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
  int ret_code = 0;
  ret_code = pthread_mutex_lock(&eventDescrp->mutex);
  eventDescrp->state = true;
  ret_code = pthread_mutex_unlock(&eventDescrp->mutex);
  ret_code |= pthread_cond_signal(&eventDescrp->event);

  return ret_code;
}

int ResetOsEvent(EventHandle event) {
  if (event == NULL) {
    return -1;
  }

  EventDescriptor* eventDescrp = reinterpret_cast<EventDescriptor*>(event);
  int ret_code = 0;
  ret_code = pthread_mutex_lock(&eventDescrp->mutex);
  eventDescrp->state = false;
  ret_code = pthread_mutex_unlock(&eventDescrp->mutex);

  return ret_code;
}

static double invPeriod = 0.0;

uint64_t ReadAccurateClock() {
  if (invPeriod == 0.0) AccurateClockFrequency();
  timespec time;
  int err = clock_gettime(CLOCK_MONOTONIC_RAW, &time);
  if (err != 0) {
    perror("clock_gettime(CLOCK_MONOTONIC_RAW,...) failed");
    abort();
  }
  return (uint64_t(time.tv_sec) * 1000000000ull + uint64_t(time.tv_nsec)) * invPeriod;
}

uint64_t AccurateClockFrequency() {
  static clockid_t clock = CLOCK_MONOTONIC;
  static std::atomic<bool> first(true);
  // Check kernel version - not a concurrency concern.
  // use non-RAW for getres due to bug in older 2.6.x kernels
  if (first.load(std::memory_order_acquire)) {
    utsname kernelInfo;
    if (uname(&kernelInfo) == 0) {
      try {
        std::string ver = kernelInfo.release;
        size_t idx;
        int major = std::stoi(ver, &idx);
        int minor = std::stoi(ver.substr(idx + 1));
        if ((major >= 4) && (minor >= 4)) {
          clock = CLOCK_MONOTONIC_RAW;
        }
      } catch (...) {
        // Kernel version string doesn't conform to the standard pattern.
        // Keep using the "safe" (non-RAW) clock.
      }
    }
    first.store(false, std::memory_order_release);
  }
  timespec time;
  int err = clock_getres(clock, &time);
  if (err != 0) {
    perror("clock_getres failed");
    abort();
  }
  if (time.tv_sec != 0 || time.tv_nsec >= 0xFFFFFFFF) {
    fprintf(stderr,
            "clock_getres(CLOCK_MONOTONIC(_RAW),...) returned very low "
            "frequency (<1Hz).\n");
    abort();
  }
  if (invPeriod == 0.0) invPeriod = 1.0 / double(time.tv_nsec);
  return 1000000000ull / uint64_t(time.tv_nsec);
}

SharedMutex CreateSharedMutex() {
  pthread_rwlockattr_t attrib;
  int err = pthread_rwlockattr_init(&attrib);
  if (err != 0) {
    fprintf(stderr, "rw lock attribute init failed: %s\n", strerror(err));
    return nullptr;
  }

#ifdef HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP
  err = pthread_rwlockattr_setkind_np(&attrib, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
  if (err != 0) {
    fprintf(stderr, "Set rw lock attribute failure: %s\n", strerror(err));
    return nullptr;
  }
#endif

  std::unique_ptr<pthread_rwlock_t> lock(new pthread_rwlock_t);
  err = pthread_rwlock_init(lock.get(), &attrib);
  if (err != 0) {
    fprintf(stderr, "rw lock init failed: %s\n", strerror(err));
    return nullptr;
  }

  pthread_rwlockattr_destroy(&attrib);
  return lock.release();
}

bool TryAcquireSharedMutex(SharedMutex lock) {
  int err = pthread_rwlock_trywrlock(*(pthread_rwlock_t**)&lock);
  return err == 0;
}

bool AcquireSharedMutex(SharedMutex lock) {
  int err = pthread_rwlock_wrlock(*(pthread_rwlock_t**)&lock);
  return err == 0;
}

void ReleaseSharedMutex(SharedMutex lock) {
  int err = pthread_rwlock_unlock(*(pthread_rwlock_t**)&lock);
  if (err != 0) {
    fprintf(stderr, "SharedMutex unlock failed: %s\n", strerror(err));
    abort();
  }
}

bool TrySharedAcquireSharedMutex(SharedMutex lock) {
  int err = pthread_rwlock_tryrdlock(*(pthread_rwlock_t**)&lock);
  return err == 0;
}

bool SharedAcquireSharedMutex(SharedMutex lock) {
  int err = pthread_rwlock_rdlock(*(pthread_rwlock_t**)&lock);
  return err == 0;
}

void SharedReleaseSharedMutex(SharedMutex lock) {
  int err = pthread_rwlock_unlock(*(pthread_rwlock_t**)&lock);
  if (err != 0) {
    fprintf(stderr, "SharedMutex unlock failed: %s\n", strerror(err));
    abort();
  }
}

void DestroySharedMutex(SharedMutex lock) {
  pthread_rwlock_destroy(*(pthread_rwlock_t**)&lock);
  delete *(pthread_rwlock_t**)&lock;
}

static uint64_t sys_clock_period_ = 0;

uint64_t ReadSystemClock() {
  struct timespec ts;
  clock_gettime(CLOCK_BOOTTIME, &ts);
  uint64_t time = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec));
  if (sys_clock_period_ != 1)
    return time / sys_clock_period_;
  else
    return time;
}

uint64_t SystemClockFrequency() {
  struct timespec ts;
  clock_getres(CLOCK_BOOTTIME, &ts);
  sys_clock_period_ = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec));
  return 1000000000 / sys_clock_period_;
}

bool ParseCpuID(cpuid_t* cpuinfo) {
#if defined(__i386__) || defined(__x86_64__)
  uint32_t eax, ebx, ecx, edx, max_eax = 0;
  memset(cpuinfo, 0, sizeof(*cpuinfo));

  /* Make sure current CPU supports at least EAX 4 */
  if (!__get_cpuid_max(0x80000004, NULL)) return false;

  // Manufacturer ID is a twelve-character ASCII string stored in order EBX, EDX, ECX.
  if (!__get_cpuid(0, &max_eax, (uint32_t*)&cpuinfo->ManufacturerID[0],
                   (uint32_t*)&cpuinfo->ManufacturerID[8],
                   (uint32_t*)&cpuinfo->ManufacturerID[4])) {
    return false;
  }

  if (!strcmp(cpuinfo->ManufacturerID, "AuthenticAMD")) {
    if (__get_cpuid(0x80000001, &eax, &ebx, &ecx, &edx)) {
      cpuinfo->mwaitx = !!((ecx >> 29) & 0x1);
    }
  }
  return true;
#else
  return false;
#endif
}

}   //  namespace os
}   //  namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/core/util/locks.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// Library of syncronization primitives - to be added to as needed.

#ifndef HSA_RUNTIME_CORE_UTIL_LOCKS_H_
#define HSA_RUNTIME_CORE_UTIL_LOCKS_H_

#include "utils.h"
#include "os.h"

namespace rocr {

class HybridMutex {
 public:
  HybridMutex():lock_(0) { 
    sem_ = os::CreateSemaphore(); 
  }

  ~HybridMutex() { 
    os::DestroySemaphore(sem_); 
  }

  bool Try() {
    int old = 0;
    return lock_.compare_exchange_strong(old, 1);
  }

  bool Acquire() {
    int cnt = maxSpinIterPause + maxSpinIterYield;

    int old = 0;
    while (!lock_.compare_exchange_strong(old, 1)) {
      cnt--;
      if (cnt > maxSpinIterPause) {
        _mm_pause();
      } else if (cnt-- > maxSpinIterYield) {
        os::YieldThread();
      } else {
        os::WaitSemaphore(sem_);
        cnt = maxSpinIterPause + maxSpinIterYield;
      }
      old = 0;
    }
    return true;
  }

  void Release() {
    int old = 1;
    if (lock_.compare_exchange_strong(old, 0))
      os::PostSemaphore(sem_);
  }

 private:
  std::atomic<int> lock_;
  os::Semaphore sem_;
  const uint32_t maxSpinIterPause = 55;
  const uint32_t maxSpinIterYield = 55;

  /// @brief: Disable copiable and assignable ability.
  DISALLOW_COPY_AND_ASSIGN(HybridMutex);
};


/// @brief: a class represents a kernel mutex.
/// Uses the kernel's scheduler to keep the waiting thread from being scheduled
/// until the lock is released (Best for long waits, though anything using
/// a kernel object is a long wait).
class KernelMutex {
 public:
  KernelMutex() { lock_ = os::CreateMutex(); }
  ~KernelMutex() { os::DestroyMutex(lock_); }

  bool Try() { return os::TryAcquireMutex(lock_); }
  bool Acquire() { return os::AcquireMutex(lock_); }
  void Release() { os::ReleaseMutex(lock_); }

 private:
  os::Mutex lock_;

  /// @brief: Disable copiable and assignable ability.
  DISALLOW_COPY_AND_ASSIGN(KernelMutex);
};

/// @brief: represents a spin lock.
/// For very short hold durations on the order of the thread scheduling
/// quanta or less.
class SpinMutex {
 public:
  SpinMutex() { lock_ = 0; }

  bool Try() {
    int old = 0;
    return lock_.compare_exchange_strong(old, 1);
  }
  bool Acquire() {
    int old = 0;
    while (!lock_.compare_exchange_strong(old, 1))
	{
		old=0;
    os::YieldThread();
	}
    return true;
  }
  void Release() { lock_ = 0; }

 private:
  std::atomic<int> lock_;

  /// @brief: Disable copiable and assignable ability.
  DISALLOW_COPY_AND_ASSIGN(SpinMutex);
};

class KernelEvent {
 public:
  KernelEvent() { evt_ = os::CreateOsEvent(true, true); }
  ~KernelEvent() { os::DestroyOsEvent(evt_); }

  bool IsSet() { return os::WaitForOsEvent(evt_, 0)==0; }
  bool WaitForSet() { return os::WaitForOsEvent(evt_, 0xFFFFFFFF)==0; }
  void Set() { os::SetOsEvent(evt_); }
  void Reset() { os::ResetOsEvent(evt_); }

 private:
  os::EventHandle evt_;

  /// @brief: Disable copiable and assignable ability.
  DISALLOW_COPY_AND_ASSIGN(KernelEvent);
};

/// @brief: represents a yielding shared mutex.
/// aka read/write mutex
class KernelSharedMutex {
 public:
  /// @brief: Interfaces ScopedAcquire to shared operations.
  class Shared {
   public:
    explicit Shared(KernelSharedMutex* lock) : lock_(lock) {}
    bool Try() { return lock_->TryShared(); }
    bool Acquire() { return lock_->AcquireShared(); }
    void Release() { lock_->ReleaseShared(); }

   private:
    KernelSharedMutex* lock_;
  };

  KernelSharedMutex() { lock_ = os::CreateSharedMutex(); }
  ~KernelSharedMutex() { os::DestroySharedMutex(lock_); }

  // Exclusive mode operations
  bool Try() { return os::TryAcquireSharedMutex(lock_); }
  bool Acquire() { return os::AcquireSharedMutex(lock_); }
  void Release() { os::ReleaseSharedMutex(lock_); }

  // Shared mode operations
  bool TryShared() { return os::TrySharedAcquireSharedMutex(lock_); }
  bool AcquireShared() { return os::SharedAcquireSharedMutex(lock_); }
  void ReleaseShared() { os::SharedReleaseSharedMutex(lock_); }

  // Return shared operations interface
  Shared shared() { return Shared(this); }

 private:
  os::SharedMutex lock_;

  /// @brief: Disable copiable and assignable ability.
  DISALLOW_COPY_AND_ASSIGN(KernelSharedMutex);
};

/// @brief: Type trait to identify mutex types
template <class T> class isMutex {
 public:
  enum { value = false };
};
template <> class isMutex<HybridMutex> {
 public:
  enum { value = true };
};
template <> class isMutex<KernelMutex> {
 public:
  enum { value = true };
};
template <> class isMutex<SpinMutex> {
 public:
  enum { value = true };
};
template <> class isMutex<KernelSharedMutex> {
 public:
  enum { value = true };
};

/// @brief: A class behaves as a lock in a scope. When trying to enter into the
/// critical section, creat a object of this class. After the control path goes
/// out of the scope, it will release the lock automatically.
template <class LockType> class ScopedAcquire {
 public:
  /// @brief: When constructing, acquire the lock.
  /// @param: lock(Input), pointer to an existing lock.
  explicit ScopedAcquire(LockType* lock) : lock_(lock), doRelease(true) {
    static_assert(isMutex<LockType>::value, "ScopedAcquire requires a mutex type.");
    lock_.Acquire();
  }
  explicit ScopedAcquire(LockType lock) : lock_(lock), doRelease(true) {
    static_assert(!isMutex<LockType>::value, "Mutex types are not copyable.");
    lock_.Acquire();
  }

  /// @brief: when destructing, release the lock.
  ~ScopedAcquire() {
    if (doRelease) lock_.Release();
  }

  /// @brief: Release the lock early.  Avoid using when possible.
  void Release() {
    lock_.Release();
    doRelease = false;
  }

 private:
  /// @brief: Adapts between pointers to mutex types and mutex pointer types.
  template <class T, bool B> class container {
   public:
    container(T* lock) : lock_(lock) {}
    __forceinline bool Acquire() { return lock_->Acquire(); }
    __forceinline void Release() { return lock_->Release(); }

   private:
    T* lock_;
  };

  /// @brief: Specialization for mutex pointer types.
  template <class T> class container<T, false> {
   public:
    container(T lock) : lock_(lock) {}
    __forceinline bool Acquire() { return lock_.Acquire(); }
    __forceinline void Release() { return lock_.Release(); }

   private:
    T lock_;
  };

  container<LockType, isMutex<LockType>::value> lock_;
  bool doRelease;

  /// @brief: Disable copiable and assignable ability.
  DISALLOW_COPY_AND_ASSIGN(ScopedAcquire);
};

}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_SUTIL_LOCKS_H_


================================================
FILE: runtime/hsa-runtime/core/util/memory.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// Memory related utility functions.

#ifndef HSA_RUNTIME_CORE_UTIL_MEMORY_H_
#define HSA_RUNTIME_CORE_UTIL_MEMORY_H_

#ifdef __linux__
#include "inc/hsa.h"
#include <sys/mman.h>
#endif

namespace rocr {

#ifdef __linux__
/// @brief Converts @ref hsa_access_permission_t to mmap memory protection
///        flags.
__forceinline int PermissionsToMmapFlags(hsa_access_permission_t perms) {
  switch (perms) {
    case HSA_ACCESS_PERMISSION_RO:
      return PROT_READ;
    case HSA_ACCESS_PERMISSION_WO:
      return PROT_WRITE;
    case HSA_ACCESS_PERMISSION_RW:
      return PROT_READ | PROT_WRITE;
    case HSA_ACCESS_PERMISSION_NONE:
    default:
      return PROT_NONE;
  }
}
#endif

}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_UTIL_MEMORY_H_


================================================
FILE: runtime/hsa-runtime/core/util/os.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// Minimal operating system abstraction interfaces.

#ifndef HSA_RUNTIME_CORE_UTIL_OS_H_
#define HSA_RUNTIME_CORE_UTIL_OS_H_

#include <string>
#include <vector>
#include "utils.h"

namespace rocr {
namespace os {
typedef void* LibHandle;
typedef void* Semaphore;
typedef void* Mutex;
typedef void* SharedMutex;
typedef void* Thread;
typedef void* EventHandle;

typedef enum {
  OS_THREAD_PRIORITY_DEFAULT    = -1,
  OS_THREAD_PRIORITY_HIGH       = 254,
  OS_THREAD_PRIORITY_MAX        = 255,
} ThreadPriority;

enum class os_t { OS_WIN = 0, OS_LINUX, COUNT };
static __forceinline std::underlying_type<os_t>::type os_index(os_t val) {
  return std::underlying_type<os_t>::type(val);
}

#ifdef _WIN32
static const os_t current_os = os_t::OS_WIN;
#elif __linux__
static const os_t current_os = os_t::OS_LINUX;
#else
static_assert(false, "Operating System not detected!");
#endif

/// @brief: Loads dynamic library based on file name. Return value will be NULL
/// if failed.
/// @param: filename(Input), file name of the library.
/// @return: LibHandle.
LibHandle LoadLib(std::string filename);

/// @brief: Gets the address of exported symbol. Return NULl if failed.
/// @param: lib(Input), library handle which exporting from.
/// @param: export_name(Input), the name of the exported symbol.
/// @return: void*.
void* GetExportAddress(LibHandle lib, std::string export_name);

/// @brief: Unloads the dynamic library.
/// @param: lib(Input), library handle which will be unloaded.
void CloseLib(LibHandle lib);

/// @brief: Lists loaded tool libraries that contain
/// symbol HSA_AMD_TOOL_PRIORITY
/// @return: List of library handles
std::vector<LibHandle> GetLoadedToolsLib();

/// @brief: Returns the library's path name.
/// @param: lib(Input), libray handle
/// @return: Path name of library
std::string GetLibraryName(LibHandle lib);

/// @brief: Creates a Semaphore, will return NULL if failed.
/// @param: void.
/// @return: Semaphore.
Semaphore CreateSemaphore();

/// @brief: Waits for the semaphore. This is a blocking wait.
/// If the Semaphore is signalled, this function will return.
/// @param: sem(Input), handle to the semaphore.
/// @return: void.
bool WaitSemaphore(Semaphore sem);

/// @brief: Post/Signal/Wake-up the semaphore
/// @param: sem(Input), handle to the semaphore.
/// @return: void.
void PostSemaphore(Semaphore sem);

/// @brief: Destroys the semaphore.
/// @param: sem(Input), handle to the semaphore.
/// @return: void.
void DestroySemaphore(Semaphore sem);

/// @brief: Creates a mutex, will return NULL if failed.
/// @param: void.
/// @return: Mutex.
Mutex CreateMutex();

/// @brief: Tries to acquire the mutex once, if successed, return true.
/// @param: lock(Input), handle to the mutex.
/// @return: bool.
bool TryAcquireMutex(Mutex lock);

/// @brief: Aquires the mutex, if the mutex is locked, it will wait until it is
/// released. If the mutex is acquired successfully, it will return true.
/// @param: lock(Input), handle to the mutex.
/// @return: bool.
bool AcquireMutex(Mutex lock);

/// @brief: Releases the mutex.
/// @param: lock(Input), handle to the mutex.
/// @return: void.
void ReleaseMutex(Mutex lock);

/// @brief: Destroys the mutex.
/// @param: lock(Input), handle to the mutex.
/// @return: void.
void DestroyMutex(Mutex lock);

/// @brief: Creates a shared mutex, will return NULL if failed.
/// @param: void.
/// @return: SharedMutex.
SharedMutex CreateSharedMutex();

/// @brief: Tries to acquire the mutex in exclusive mode once, if successed, return true.
/// @param: lock(Input), handle to the shared mutex.
/// @return: bool.
bool TryAcquireSharedMutex(SharedMutex lock);

/// @brief: Aquires the mutex in exclusive mode, if the mutex is locked, it will wait until it is
/// released. If the mutex is acquired successfully, it will return true.
/// @param: lock(Input), handle to the mutex.
/// @return: bool.
bool AcquireSharedMutex(SharedMutex lock);

/// @brief: Releases the mutex from exclusive mode.
/// @param: lock(Input), handle to the mutex.
/// @return: void.
void ReleaseSharedMutex(SharedMutex lock);

/// @brief: Tries to acquire the mutex in shared mode once, if successed, return true.
/// @param: lock(Input), handle to the mutex.
/// @return: bool.
bool TrySharedAcquireSharedMutex(SharedMutex lock);

/// @brief: Aquires the mutex in shared mode, if the mutex in exclusive mode, it will wait until it
/// is released. If the mutex is acquired successfully, it will return true.
/// @param: lock(Input), handle to the mutex.
/// @return: bool.
bool SharedAcquireSharedMutex(SharedMutex lock);

/// @brief: Releases the mutex from shared mode.
/// @param: lock(Input), handle to the mutex.
/// @return: void.
void SharedReleaseSharedMutex(SharedMutex lock);

/// @brief: Destroys the mutex.
/// @param: lock(Input), handle to the mutex.
/// @return: void.
void DestroySharedMutex(SharedMutex lock);

/// @brief: Puts current thread to sleep.
/// @param: delayInMs(Input), time in millisecond for sleeping.
/// @return: void.
void Sleep(int delayInMs);

/// @brief: Puts current thread to sleep.
/// @param: delayInMs(Input), time in millisecond for sleeping.
/// @return: void.
void uSleep(int delayInUs);

/// @brief: Yields current thread.
/// @param: void.
/// @return: void.
void YieldThread();

typedef void (*ThreadEntry)(void*);

/// @brief: Creates a thread will return NULL if failed.
/// @param: entry_function(Input), a pointer to the function which the thread
/// starts from.
/// @param: entry_argument(Input), a pointer to the argument of the thread
/// function.
/// @param: stack_size(Input), size of the thread's stack, 0 by default.
/// @param: priority(Input), thread priority.
/// @return: Thread, a handle to thread created.
Thread CreateThread(ThreadEntry entry_function, void* entry_argument,
                    uint stack_size = 0, int priority = OS_THREAD_PRIORITY_DEFAULT);

/// @brief: Destroys the thread.
/// @param: thread(Input), thread handle to what will be destroyed.
/// @return: void.
void CloseThread(Thread thread);

/// @brief: Waits for specific thread to finish, if successful, return true.
/// @param: thread(Input), handle to waiting thread.
/// @return: bool.
bool WaitForThread(Thread thread);

/// @brief: Waits for multiple threads to finish, if successful, return true.
/// @param; threads(Input), a pointer to a list of thread handle.
/// @param: thread_count(Input), number of threads to be waited on.
/// @return: bool.
bool WaitForAllThreads(Thread* threads, uint thread_count);

/// @brief: Determines if environment key is set.
/// @param: env_var_name(Input), name of the environment value.
/// @return: bool, true for binding any value to environment key,
/// including an empty string. False otherwise
bool IsEnvVarSet(std::string env_var_name);

/// @brief: Sets the environment value.
/// @param: env_var_name(Input), name of the environment value.
/// @param: env_var_value(Input), value of the environment value.s
/// @return: void.
void SetEnvVar(std::string env_var_name, std::string env_var_value);

/// @brief: Gets the value of environment value.
/// @param: env_var_name(Input), name of the environment value.
/// @return: std::string, value of the environment value, returned as string.
std::string GetEnvVar(std::string env_var_name);

/// @brief: Gets the process ID.
/// @param: void
/// @return: int, process ID returned as int.
int GetProcessId();

/// @brief: Gets the max virtual memory size accessible to the application.
/// @param: void.
/// @return: size_t, size of the accessible memory to the application.
size_t GetUserModeVirtualMemorySize();

/// @brief: Gets the max physical host system memory size.
/// @param: void.
/// @return: size_t, size of the physical host system memory.
size_t GetUsablePhysicalHostMemorySize();

/// @brief: Gets the virtual memory base address. It is hardcoded to 0.
/// @param: void.
/// @return: uintptr_t, always 0.
uintptr_t GetUserModeVirtualMemoryBase();

/// @brief os event api, create an event
/// @param: auto_reset whether an event can reset the status automatically
/// @param: init_state initial state of the event
/// @return: event handle
EventHandle CreateOsEvent(bool auto_reset, bool init_state);

/// @brief os event api, destroy an event
/// @param: event handle
/// @return: whether destroy is correct
int DestroyOsEvent(EventHandle event);

/// @brief os event api, wait on event
/// @param: event Event handle
/// @param: milli_seconds wait time
/// @return: Indicate success or timeout
int WaitForOsEvent(EventHandle event, unsigned int milli_seconds);

/// @brief os event api, set event state
/// @param: event Event handle
/// @return: Whether event set is correct
int SetOsEvent(EventHandle event);

/// @brief os event api, reset event state
/// @param: event Event handle
/// @return: Whether event reset is correct
int ResetOsEvent(EventHandle event);

/// @brief reads a clock which is deemed to be accurate for elapsed time
/// measurements, though not necessarilly fast to query
/// @return clock counter value
uint64_t ReadAccurateClock();

/// @brief retrieves the frequency in Hz of the unit used in ReadAccurateClock.
/// It does not necessarilly reflect the resolution of the clock, but is the
/// value needed to convert a difference in the clock's counter value to elapsed
/// seconds.  This frequency does not change at runtime.
/// @return returns the frequency
uint64_t AccurateClockFrequency();

/// @brief read the system clock which serves as the HSA system clock
/// counter in KFD.
uint64_t ReadSystemClock();

/// @brief read the system clock frequency
uint64_t SystemClockFrequency();

typedef struct cpuid_s {
  char ManufacturerID[13];  // 12 char, NULL terminated
  bool mwaitx;
} cpuid_t;

/// @brief parse CPUID
/// @param: cpuinfo struct to be filled
bool ParseCpuID(cpuid_t* cpuinfo);

}   //  namespace os
}   //  namespace rocr

#endif  // HSA_RUNTIME_CORE_UTIL_OS_H_


================================================
FILE: runtime/hsa-runtime/core/util/simple_heap.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// A simple best fit memory allocator with eager compaction.  Manages block sub-allocation.
// For use when memory efficiency is more important than allocation speed.
// O(log n) time.

#ifndef HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_
#define HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_

#include <map>
#include <deque>
#include <utility>

#include "core/util/utils.h"

namespace rocr {

template <typename Allocator> class SimpleHeap {
 private:
  struct Fragment_T {
    typedef std::multimap<size_t, uintptr_t>::iterator ptr_t;
    ptr_t free_list_entry_;
    struct {
      size_t size : 62;
      bool discard : 1;
      bool free : 1;
    };

    Fragment_T(ptr_t Iterator, size_t Len, bool Free)
        : free_list_entry_(Iterator), size(Len), discard(false), free(Free) {}
    Fragment_T() = default;
  };

  struct Block {
    uintptr_t base_ptr_;
    size_t length_;

    Block(uintptr_t base, size_t length) : base_ptr_(base), length_(length) {}
    Block() = default;
  };

  Allocator block_allocator_;

  std::multimap<size_t, uintptr_t> free_list_;
  std::map<uintptr_t, std::map<uintptr_t, Fragment_T>> block_list_;
  std::deque<Block> block_cache_;

  // Size of blocks that are at least partially in use.
  size_t in_use_size_;
  // Total size of block cache
  size_t cache_size_;

  __forceinline bool isFree(const Fragment_T& node) { return node.free; }
  __forceinline void setUsed(Fragment_T& node) {
    node.free = false;
    node.free_list_entry_ = free_list_.end();
  }
  __forceinline void setFree(Fragment_T& node, typename Fragment_T::ptr_t Iterator) {
    node.free_list_entry_ = Iterator;
    node.free = true;
  }
  __forceinline Fragment_T makeFragment(size_t Len) {
    return Fragment_T(free_list_.end(), Len, false);
  }
  __forceinline Fragment_T makeFragment(typename Fragment_T::ptr_t Iterator, size_t Len) {
    return Fragment_T(Iterator, Len, true);
  }
  __forceinline void removeFreeListEntry(Fragment_T& node) {
    if (node.free_list_entry_ != free_list_.end()) {
      free_list_.erase(node.free_list_entry_);
      node.free_list_entry_ = free_list_.end();
    }
  }
  __forceinline void discard(Fragment_T& node) {
    removeFreeListEntry(node);
    node.discard = true;
  }

 public:
  explicit SimpleHeap(const Allocator& BlockAllocator = Allocator())
      : block_allocator_(BlockAllocator), in_use_size_(0), cache_size_(0) {}
  ~SimpleHeap() {
    trim();
    // Leak here may be due to the user.  Check is for debugging only.
    // assert(in_use_size_ == 0 && "Leak in SimpleHeap.");
  }

  SimpleHeap(const SimpleHeap& rhs) = delete;
  SimpleHeap(SimpleHeap&& rhs) = delete;
  SimpleHeap& operator=(const SimpleHeap& rhs) = delete;
  SimpleHeap& operator=(SimpleHeap&& rhs) = delete;

  void* alloc(size_t bytes) {
    // Find best fit.
    auto free_fragment = free_list_.lower_bound(bytes);
    uintptr_t base;
    size_t size;

    if (free_fragment != free_list_.end()) {
      base = free_fragment->second;
      size = free_fragment->first;
      free_list_.erase(free_fragment);

      assert(size >= bytes && "SimpleHeap: map lower_bound failure.");

      // Find the containing block and fragment
      auto it = block_list_.upper_bound(base);
      it--;
      auto& frag_map = it->second;
      const auto& fragment = frag_map.find(base);

      assert(fragment != frag_map.end() && "Inconsistency in SimpleHeap.");
      assert(size == fragment->second.size && "Inconsistency in SimpleHeap.");

      // Sub-allocate from fragment.
      fragment->second.size = bytes;
      setUsed(fragment->second);
      // Record remaining free space.
      if (size > bytes) {
        free_fragment = free_list_.insert(std::make_pair(size - bytes, base + bytes));
        frag_map[base + bytes] = makeFragment(free_fragment, size - bytes);
      }
      return reinterpret_cast<void*>(base);
    }

    // No usable fragment, check block cache
    if (bytes < default_block_size() && !block_cache_.empty()) {
      const auto& block = block_cache_.back();
      base = block.base_ptr_;
      size = block.length_;
      block_cache_.pop_back();
      cache_size_ -= size;
    } else {  // Alloc new block - new block may be larger than default.
      void* ptr = block_allocator_.alloc(bytes, size);
      base = reinterpret_cast<uintptr_t>(ptr);
      assert(ptr != nullptr && "Block allocation failed, Allocator is expected to throw.");
    }

    in_use_size_ += size;
    assert(size >= bytes && "Alloc exceeds block size.");
    // Sub alloc and insert free region.
    if (size > bytes) {
      free_fragment = free_list_.insert(std::make_pair(size - bytes, base + bytes));
      block_list_[base][base + bytes] = makeFragment(free_fragment, size - bytes);
    }
    // Track used region
    block_list_[base][base] = makeFragment(bytes);

    // Disallow multiple suballocation from large blocks.
    // Prevents a small allocation from retaining a large block.
    if (bytes > default_block_size()) {
      bool err = discardBlock(reinterpret_cast<void*>(base));
      assert(err && "Large block discard failed.");
    }

    return reinterpret_cast<void*>(base);
  }

  bool free(void* ptr) {
    if (ptr == nullptr) return true;

    uintptr_t base = reinterpret_cast<uintptr_t>(ptr);

    // Find fragment and validate.
    auto frag_map_it = block_list_.upper_bound(base);
    if (frag_map_it == block_list_.begin()) return false;
    frag_map_it--;
    auto& frag_map = frag_map_it->second;
    auto fragment = frag_map.find(base);
    if (fragment == frag_map.end() || isFree(fragment->second)) return false;

    bool discard = fragment->second.discard;

    // Merge lower
    if (fragment != frag_map.begin()) {
      auto lower = fragment;
      lower--;
      if (isFree(lower->second)) {
        removeFreeListEntry(lower->second);
        lower->second.size += fragment->second.size;
        frag_map.erase(fragment);
        fragment = lower;
      }
    }

    // Merge upper
    {
      auto upper = fragment;
      upper++;
      if ((upper != frag_map.end()) && isFree(upper->second)) {
        removeFreeListEntry(upper->second);
        fragment->second.size += upper->second.size;
        frag_map.erase(upper);
      }
    }

    // Release whole free blocks.
    if (frag_map.size() == 1) {
      Block block(fragment->first, fragment->second.size);
      block_list_.erase(frag_map_it);

      // Discard or add to the block cache.
      if (discard) {
        block_allocator_.free(reinterpret_cast<void*>(block.base_ptr_), block.length_);
      } else {
        block_cache_.push_back(block);
        cache_size_ += block.length_;
        in_use_size_ -= block.length_;
      }

      balance();

      // Don't publish free space since block was moved to the cache.
      return true;
    }

    // Don't report free memory if discarding the fragment.
    if (discard) return true;

    // Report free fragment
    const auto& freeEntry =
        free_list_.insert(std::make_pair(size_t(fragment->second.size), fragment->first));
    setFree(fragment->second, freeEntry);

    return true;
  }

  void balance() {
    // Release old blocks when over cache limit.
    while ((block_cache_.size() > 1) && (cache_size_ > in_use_size_ * 2)) {
      const auto& block = block_cache_.front();
      block_allocator_.free(reinterpret_cast<void*>(block.base_ptr_), block.length_);
      cache_size_ -= block.length_;
      block_cache_.pop_front();
    }
  }

  void trim() {
    for (const auto& block : block_cache_)
      block_allocator_.free(reinterpret_cast<void*>(block.base_ptr_), block.length_);
    block_cache_.clear();
    cache_size_ = 0;
  }

  size_t cache_size() const { return cache_size_; }

  size_t default_block_size() const { return block_allocator_.block_size(); }

  // Prevent reuse of the block containing ptr.  No further fragments will be allocated from the
  // block and the block will not be added to the block cache when it is free.
  bool discardBlock(void* ptr) {
    if (ptr == nullptr) return true;

    uintptr_t base = reinterpret_cast<uintptr_t>(ptr);

    // Find block validate.
    auto frag_map_it = block_list_.upper_bound(base);
    if (frag_map_it == block_list_.begin()) return false;
    frag_map_it--;
    auto& frag_map = frag_map_it->second;
    if ((base < frag_map.begin()->first) ||
        (frag_map.rbegin()->first + frag_map.rbegin()->second.size <= base))
      return false;

    // Is block already discarded?
    if (frag_map.begin()->second.discard) return true;

    // Mark all fragments for discard and compute block size.  Removes freelist records for all
    // fragments in the block.
    size_t size = 0;
    for (auto& frag : frag_map) {
      discard(frag.second);
      size += frag.second.size;
    }

    // Remove discarded block from in-use tracking and rebalance the block cache.
    in_use_size_ -= size;
    balance();

    return true;
  }
};

}  // namespace rocr

#endif  // HSA_RUNTME_CORE_UTIL_SIMPLE_HEAP_H_


================================================
FILE: runtime/hsa-runtime/core/util/small_heap.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "small_heap.h"

namespace rocr {

// Inserts node into freelist after place.
// Assumes node will not be an end of the list (list has guard nodes).
void SmallHeap::insertafter(SmallHeap::iterator_t place, SmallHeap::iterator_t node) {
  assert(place->first < node->first && "Order violation");
  assert(isfree(place->second) && "Freelist operation error.");
  iterator_t next = place->second.next;
  node->second.next = next;
  node->second.prior = place;
  place->second.next = node;
  next->second.prior = node;
}

// Removes node from freelist.
// Assumes node will not be an end of the list (list has guard nodes).
void SmallHeap::remove(SmallHeap::iterator_t node) {
  assert(isfree(node->second) && "Freelist operation error.");
  node->second.prior->second.next = node->second.next;
  node->second.next->second.prior = node->second.prior;
  setused(node->second);
}

// Returns high if merge failed or the merged node.
SmallHeap::memory_t::iterator SmallHeap::merge(SmallHeap::memory_t::iterator low,
                                               SmallHeap::memory_t::iterator high) {
  assert(isfree(low->second) && "Merge with allocated block");
  assert(isfree(high->second) && "Merge with allocated block");

  if ((char*)low->first + low->second.len != (char*)high->first) return high;

  assert(!islastfree(high->second) && "Illegal merge.");

  low->second.len += high->second.len;
  low->second.next = high->second.next;
  high->second.next->second.prior = low;

  memory.erase(high);
  return low;
}

void SmallHeap::free(void* ptr) {
  if (ptr == nullptr) return;

  auto iterator = memory.find(ptr);

  // Check for illegal free
  if (iterator == memory.end()) {
    assert(false && "Illegal free.");
    return;
  }

  // Return memory to total and link node into free list
  total_free += iterator->second.len;

  // Could also traverse the free list which might be faster in some cases.
  auto before = iterator;
  before--;
  while (!isfree(before->second)) before--;
  assert(before->second.next->first > iterator->first && "Inconsistency in small heap.");
  insertafter(before, iterator);

  // Attempt compaction
  iterator = merge(before, iterator);
  merge(iterator, iterator->second.next);

  // Update lowHighBondary
  high.erase(ptr);
}

void* SmallHeap::alloc(size_t bytes) {
  // Is enough memory available?
  if ((bytes > total_free) || (bytes == 0)) return nullptr;

  iterator_t current;

  // Walk the free list and allocate at first fitting location
  current = firstfree();
  while (!islastfree(current->second)) {
    if (bytes <= current->second.len) {
      // Decrement from total
      total_free -= bytes;

      // Split node
      if (bytes != current->second.len) {
        void* remaining = (char*)current->first + bytes;
        Node& node = memory[remaining];
        node.len = current->second.len - bytes;
        current->second.len = bytes;
        insertafter(current, memory.find(remaining));
      }

      remove(current);
      return current->first;
    }
    current = current->second.next;
  }
  assert(current->second.len == 0 && "Freelist corruption.");

  // Can't service the request due to fragmentation
  return nullptr;
}

void* SmallHeap::alloc_high(size_t bytes) {
  // Is enough memory available?
  if ((bytes > total_free) || (bytes == 0)) return nullptr;

  iterator_t current;

  // Walk the free list and allocate at first fitting location
  current = lastfree();
  while (!isfirstfree(current->second)) {
    if (bytes <= current->second.len) {
      // Decrement from total
      total_free -= bytes;

      void* alloc;
      // Split node
      if (bytes != current->second.len) {
        alloc = (char*)current->first + current->second.len - bytes;
        current->second.len -= bytes;
        Node& node = memory[alloc];
        node.len = bytes;
        setused(node);
      } else {
        alloc = current->first;
        remove(current);
      }

      high.insert(alloc);
      return alloc;
    }
    current = current->second.prior;
  }
  assert(current->second.len == 0 && "Freelist corruption.");

  // Can't service the request due to fragmentation
  return nullptr;
}

}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/core/util/small_heap.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// A simple first fit memory allocator with eager compaction.  For use with few
// items (where list iteration is faster than trees).
// Not thread safe!

#ifndef HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_
#define HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_

#include <map>
#include <set>

#include "utils.h"

namespace rocr {

class SmallHeap {
 private:
  struct Node;
  typedef std::map<void*, Node> memory_t;
  typedef memory_t::iterator iterator_t;

  struct Node {
    size_t len;
    iterator_t next;
    iterator_t prior;
  };

  SmallHeap(const SmallHeap& rhs) = delete;
  SmallHeap& operator=(const SmallHeap& rhs) = delete;

  void* const pool;
  const size_t length;

  size_t total_free;
  memory_t memory;
  std::set<void*> high;

  __forceinline bool isfree(const Node& node) const { return node.next != memory.begin(); }
  __forceinline bool islastfree(const Node& node) const { return node.next == memory.end(); }
  __forceinline bool isfirstfree(const Node& node) const { return node.prior == memory.end(); }
  __forceinline void setlastfree(Node& node) { node.next = memory.end(); }
  __forceinline void setfirstfree(Node& node) { node.prior = memory.end(); }
  __forceinline void setused(Node& node) { node.next = memory.begin(); }

  __forceinline iterator_t firstfree() { return memory.begin()->second.next; }
  __forceinline iterator_t lastfree() { return memory.rbegin()->second.prior; }
  void insertafter(iterator_t place, iterator_t node);
  void remove(iterator_t node);
  iterator_t merge(iterator_t low, iterator_t high);

 public:
  SmallHeap() : pool(nullptr), length(0), total_free(0) {}
  SmallHeap(void* base, size_t length)
      : pool(base), length(length), total_free(length) {
    assert(pool != nullptr && "Invalid base address.");
    assert(pool != (void*)0xFFFFFFFFFFFFFFFFull && "Invalid base address.");
    assert((char*)pool + length != (char*)0xFFFFFFFFFFFFFFFFull && "Invalid pool bounds.");

    Node& start = memory[0];
    Node& node = memory[pool];
    Node& end = memory[(void*)0xFFFFFFFFFFFFFFFFull];

    start.len = 0;
    start.next = memory.find(pool);
    setfirstfree(start);

    node.len = length;
    node.prior = memory.begin();
    node.next = --memory.end();

    end.len = 0;
    end.prior = start.next;
    setlastfree(end);

    high.insert((void*)0xFFFFFFFFFFFFFFFFull);
  }

  void* alloc(size_t bytes);
  void* alloc_high(size_t bytes);
  void free(void* ptr);

  void* base() const { return pool; }
  size_t size() const { return length; }
  size_t remaining() const { return total_free; }
  void* high_split() const { return *high.begin(); }
};

}  // namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/core/util/timer.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/util/timer.h"

namespace rocr {
namespace timer {

accurate_clock::init::init() {
  freq = os::AccurateClockFrequency();
  accurate_clock::period_ns = 1e9 / double(freq);
}

// Calibrates the fast clock using the accurate clock.
fast_clock::init::init() {
  typedef accurate_clock clock;
  clock::duration delay(std::chrono::milliseconds(1));

  // calibrate clock
  fast_clock::raw_rep min = 0;
  clock::duration elapsed;

  do {
    elapsed = clock::duration::max();

    for (int t = 0; t < 10; t++) {
      fast_clock::raw_rep r1, r2;
      clock::time_point t0, t1, t2, t3;

      t0 = clock::now();
      std::atomic_signal_fence(std::memory_order_acq_rel);
      r1 = fast_clock::raw_now();
      std::atomic_signal_fence(std::memory_order_acq_rel);
      t1 = clock::now();
      std::atomic_signal_fence(std::memory_order_acq_rel);

      do {
        t2 = clock::now();
      } while (t2 - t1 < delay);

      std::atomic_signal_fence(std::memory_order_acq_rel);
      r2 = fast_clock::raw_now();
      std::atomic_signal_fence(std::memory_order_acq_rel);
      t3 = clock::now();

      // If elapsed time is shorter than last recorded time and both the start
      // and end times are confirmed correlated then record the clock readings.
      // This protects against inaccuracy due to thread switching
      if ((t3 - t1 < elapsed) && ((t1 - t0) * 10 < (t2 - t1)) &&
          ((t3 - t2) * 10 < (t2 - t1))) {
        elapsed = t3 - t1;
        min = r2 - r1;
      }
    }
    delay += delay;
  } while (min < 1000);

  fast_clock::freq = double(min) / duration_in_seconds(elapsed);
  fast_clock::period_ps = 1e12 / fast_clock::freq;
  // printf("Timer setup took %f ms\n", duration_in_seconds(elapsed)*1000.0f);
  // printf("Fast clock frequency: %f MHz\n", double(fast_clock::freq)/1e6);
}

double accurate_clock::period_ns;
accurate_clock::raw_frequency accurate_clock::freq;
accurate_clock::init accurate_clock::accurate_clock_init;

double fast_clock::period_ps;
fast_clock::raw_frequency fast_clock::freq;
fast_clock::init fast_clock::fast_clock_init;
}   //  namespace timer
}   //  namespace rocr


================================================
FILE: runtime/hsa-runtime/core/util/timer.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_UTIL_TIMER_H_
#define HSA_RUNTIME_CORE_UTIL_TIMER_H_

#include "core/util/utils.h"
#include "core/util/os.h"
#include <chrono>
#include <time.h>
#include <type_traits>

namespace rocr {
namespace timer {

// Needed to patch around a mixed arithmetic bug in MSVC's duration_cast as of
// VS 2013.
template <bool isFloat, bool isSigned>
struct wide_type {
  typedef double type;
};
template <>
struct wide_type<false, false> {
  typedef uintmax_t type;
};
template <>
struct wide_type<false, true> {
  typedef intmax_t type;
};

template <typename To, typename Rep, typename Period>
static __forceinline To
    duration_cast(const std::chrono::duration<Rep, Period>& d) {
  typedef typename wide_type<std::is_floating_point<Rep>::value,
                             std::is_signed<Rep>::value>::type wide;
  typedef std::chrono::duration<wide, typename To::period> unit_convert_t;

  unit_convert_t temp = std::chrono::duration_cast<unit_convert_t>(d);
  return To(static_cast<typename To::rep>(temp.count()));
}
// End patch

template <typename Rep, typename Period>
static __forceinline double duration_in_seconds(
    std::chrono::duration<Rep, Period> delta) {
  typedef std::chrono::duration<double, std::ratio<1, 1>> seconds;
  return seconds(delta).count();
}

template <typename rep>
static __forceinline rep duration_from_seconds(double delta) {
  typedef std::chrono::duration<double, std::ratio<1, 1>> seconds;
  return std::chrono::duration_cast<rep>(seconds(delta));
}

// Provices a C++11 standard clock interface to the os::AccurateClock functions
class accurate_clock {
 public:
  typedef double rep;
  typedef std::nano period;
  typedef std::chrono::duration<rep, period> duration;
  typedef std::chrono::time_point<accurate_clock> time_point;

  static const bool is_steady = true;

  static __forceinline time_point now() {
    return time_point(duration(raw_now() * period_ns));
  }

  // These two extra APIs and types let us use clocks without conversion to the
  // arbitrary period unit
  typedef uint64_t raw_rep;
  typedef uint64_t raw_frequency;

  static __forceinline raw_rep raw_now() { return os::ReadAccurateClock(); }
  static __forceinline raw_frequency raw_freq() { return freq; }

 private:
  static double period_ns;
  static raw_frequency freq;

  class init {
   public:
    init();
  };
  static init accurate_clock_init;
};

// Provices a C++11 standard clock interface to the lowest latency approximate
// clock
class fast_clock {
 public:
  typedef double rep;
  typedef std::pico period;
  typedef std::chrono::duration<rep, period> duration;
  typedef std::chrono::time_point<fast_clock> time_point;

  static const bool is_steady = true;

  static __forceinline time_point now() {
    return time_point(duration(raw_now() * period_ps));
  }

  // These two extra APIs and types let us use clocks without conversion to the
  // arbitrary period unit
  typedef uint64_t raw_rep;
  typedef double raw_frequency;

#if defined(__x86_64__) || defined(_M_X64)
  static __forceinline raw_rep raw_now() { return __rdtsc(); }
  static __forceinline raw_frequency raw_freq() { return freq; }
#else
  static __forceinline raw_rep raw_now() {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
    return (raw_rep(ts.tv_sec) * 1000000000 + raw_rep(ts.tv_nsec));
  }
  static __forceinline raw_frequency raw_freq() { return 1.e-9; }
#endif

 private:
  static double period_ps;
  static raw_frequency freq;

  class init {
   public:
    init();
  };
  static init fast_clock_init;
};
}   //  namespace timer
}   //  namespace rocr  

#endif


================================================
FILE: runtime/hsa-runtime/core/util/utils.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// Generally useful utility functions

#ifndef HSA_RUNTIME_CORE_UTIL_UTILS_H_
#define HSA_RUNTIME_CORE_UTIL_UTILS_H_

#include "stdint.h"
#include "stddef.h"
#include "stdlib.h"
#include "stdarg.h"
#include "unistd.h"
#include <assert.h>
#include <iostream>
#include <string>
#include <algorithm>
#include <sstream>
#include <thread>

namespace rocr {
extern FILE* log_file;
extern uint8_t log_flags[8];

typedef unsigned int uint;
typedef uint64_t uint64;

#if defined(__GNUC__)
#if defined(__i386__) || defined(__x86_64__)
#include <x86intrin.h>
#endif

#define __forceinline __inline__ __attribute__((always_inline))
#define __declspec(x) __attribute__((x))
#undef __stdcall
#define __stdcall  // __attribute__((__stdcall__))
#define __ALIGNED__(x) __attribute__((aligned(x)))

void log_printf(const char* file, int line, const char* format, ...);

static __forceinline void* _aligned_malloc(size_t size, size_t alignment) {
#ifdef _ISOC11_SOURCE
  return aligned_alloc(alignment, size);
#else
  void *mem = NULL;
  if (0 != posix_memalign(&mem, alignment, size)) return NULL;
  return mem;
#endif
}
static __forceinline void _aligned_free(void* ptr) { return free(ptr); }
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
#include "intrin.h"
#define __ALIGNED__(x) __declspec(align(x))
#if (_MSC_VER < 1800)  // < VS 2013
static __forceinline unsigned long long int strtoull(const char* str,
                                                     char** endptr, int base) {
  return static_cast<unsigned long long>(_strtoui64(str, endptr, base));
}
#endif
#if (_MSC_VER < 1900)  // < VS 2015
#define thread_local __declspec(thread)
#endif
#else
#error "Compiler and/or processor not identified."
#endif

#define STRING2(x) #x
#define STRING(x) STRING2(x)

#define PASTE2(x, y) x##y
#define PASTE(x, y) PASTE2(x, y)

#ifdef NDEBUG
#define debug_warning_n(exp, limit)                                                                \
  do {                                                                                             \
  } while (false)
#else
#define debug_warning_n(exp, limit)                                                                \
  do {                                                                                             \
    static std::atomic<int> count(0);                                                              \
    if (!(exp) && (limit == 0 || count < limit)) {                                                 \
      fprintf(stderr, "Warning: " STRING(exp) " in %s, " __FILE__ ":" STRING(__LINE__) "\n",       \
              __PRETTY_FUNCTION__);                                                                \
      count++;                                                                                     \
    }                                                                                              \
  } while (false)
#endif
#define debug_warning(exp) debug_warning_n((exp), 0)

#ifdef NDEBUG
#define debug_print(fmt, ...)                                                                      \
  do {                                                                                             \
  } while (false)
#else
#define debug_print(fmt, ...)                                                                      \
  do {                                                                                             \
    fprintf(stderr, fmt, ##__VA_ARGS__);                                                           \
  } while (false)
#endif

#ifdef NDEBUG
#define ifdebug if (false)
#else
#define ifdebug if (true)
#endif

#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)

#define LogPrint(flag, format, ...)                                                                \
  do {                                                                                             \
    if (hsa_flag_isset64(log_flags, flag))                                                         \
      rocr::log_printf(__FILENAME__, __LINE__, format, ##__VA_ARGS__);                             \
  } while (false);

// A macro to remove unused variable warnings
#define UNUSED(x) (void)(x)

// A macro to disallow the copy and move constructor and operator= functions
#define DISALLOW_COPY_AND_ASSIGN(TypeName)                                                         \
  TypeName(const TypeName&) = delete;                                                              \
  TypeName(TypeName&&) = delete;                                                                   \
  void operator=(const TypeName&) = delete;                                                        \
  void operator=(TypeName&&) = delete;

template <typename lambda>
class ScopeGuard {
 public:
  explicit __forceinline ScopeGuard(const lambda& release)
      : release_(release), dismiss_(false) {}

  ScopeGuard(ScopeGuard& rhs) { *this = rhs; }

  __forceinline ~ScopeGuard() {
    if (!dismiss_) release_();
  }
  __forceinline ScopeGuard& operator=(ScopeGuard& rhs) {
    dismiss_ = rhs.dismiss_;
    release_ = rhs.release_;
    rhs.dismiss_ = true;
    return *this;
  }
  __forceinline void Dismiss() { dismiss_ = true; }

 private:
  lambda release_;
  bool dismiss_;
};

template <typename lambda>
static __forceinline ScopeGuard<lambda> MakeScopeGuard(lambda rel) {
  return ScopeGuard<lambda>(rel);
}

#define MAKE_SCOPE_GUARD_HELPER(lname, sname, ...) \
  auto lname = __VA_ARGS__;                        \
  ScopeGuard<decltype(lname)> sname(lname);
#define MAKE_SCOPE_GUARD(...)                                   \
  MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), \
                          PASTE(scopeGuard, __COUNTER__), __VA_ARGS__)
#define MAKE_NAMED_SCOPE_GUARD(name, ...)                             \
  MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), name, \
                          __VA_ARGS__)

/// @brief: Finds out the min one of two inputs, input must support ">"
/// operator.
/// @param: a(Input), a reference to type T.
/// @param: b(Input), a reference to type T.
/// @return: T.
template <class T>
static __forceinline T Min(const T& a, const T& b) {
  return (a > b) ? b : a;
}

template <class T, class... Arg>
static __forceinline T Min(const T& a, const T& b, Arg... args) {
  return Min(a, Min(b, args...));
}

/// @brief: Find out the max one of two inputs, input must support ">" operator.
/// @param: a(Input), a reference to type T.
/// @param: b(Input), a reference to type T.
/// @return: T.
template <class T>
static __forceinline T Max(const T& a, const T& b) {
  return (b > a) ? b : a;
}

template <class T, class... Arg>
static __forceinline T Max(const T& a, const T& b, Arg... args) {
  return Max(a, Max(b, args...));
}

/// @brief: Free the memory space which is newed previously.
/// @param: ptr(Input), a pointer to memory space. Can't be NULL.
/// @return: void.
struct DeleteObject {
  template <typename T>
  void operator()(const T* ptr) const {
    delete ptr;
  }
};

/// @brief: Checks if a value is power of two, if it is, return true. Be careful
/// when passing 0.
/// @param: val(Input), the data to be checked.
/// @return: bool.
template <typename T>
static __forceinline bool IsPowerOfTwo(T val) {
  return (val & (val - 1)) == 0;
}

/// @brief: Calculates the floor value aligned based on parameter of alignment.
/// If value is at the boundary of alignment, it is unchanged.
/// @param: value(Input), value to be calculated.
/// @param: alignment(Input), alignment value.
/// @return: T.
template <typename T>
static __forceinline T AlignDown(T value, size_t alignment) {
  return (T)((value / alignment) * alignment);
}

/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: T*, pointer to type T.
template <typename T>
static __forceinline T* AlignDown(T* value, size_t alignment) {
  return (T*)AlignDown((intptr_t)value, alignment);
}

/// @brief: Calculates the ceiling value aligned based on parameter of
/// alignment.
/// If value is at the boundary of alignment, it is unchanged.
/// @param: value(Input), value to be calculated.
/// @param: alignment(Input), alignment value.
/// @param: T.
template <typename T>
static __forceinline T AlignUp(T value, size_t alignment) {
  return AlignDown((T)(value + alignment - 1), alignment);
}

/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: T*, pointer to type T.
template <typename T>
static __forceinline T* AlignUp(T* value, size_t alignment) {
  return (T*)AlignDown((intptr_t)((uint8_t*)value + alignment - 1), alignment);
}

/// @brief: Checks if the input value is at the boundary of alignment, if it is,
/// @return true.
/// @param: value(Input), value to be checked.
/// @param: alignment(Input), alignment value.
/// @return: bool.
template <typename T>
static __forceinline bool IsMultipleOf(T value, size_t alignment) {
  return (AlignUp(value, alignment) == value);
}

/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: bool.
template <typename T>
static __forceinline bool IsMultipleOf(T* value, size_t alignment) {
  return (AlignUp(value, alignment) == value);
}

static __forceinline uint32_t NextPow2(uint32_t value) {
  if (value == 0) return 1;
  uint32_t v = value - 1;
  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;
  return v + 1;
}

static __forceinline uint64_t NextPow2(uint64_t value) {
  if (value == 0) return 1;
  uint64_t v = value - 1;
  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;
  v |= v >> 32;
  return v + 1;
}

static __forceinline bool strIsEmpty(const char* str) noexcept { return str[0] == '\0'; }

static __forceinline std::string& ltrim(std::string& s) {
  auto it = std::find_if(s.begin(), s.end(),
                         [](char c) { return !std::isspace<char>(c, std::locale::classic()); });
  s.erase(s.begin(), it);
  return s;
}

static __forceinline std::string& rtrim(std::string& s) {
  auto it = std::find_if(s.rbegin(), s.rend(),
                         [](char c) { return !std::isspace<char>(c, std::locale::classic()); });
  s.erase(it.base(), s.end());
  return s;
}

static __forceinline std::string& trim(std::string& s) { return ltrim(rtrim(s)); }

/// @brief: Flush the cachelines associated with the
/// provided address, offset, and length
/// @param: base(Input), base address to flush
/// @param: offset(Input), offset of base address to flush
/// @param: len(Input), length of buffer to flush
inline void FlushCpuCache(const void* base, size_t offset, size_t len) {
  static long cacheline_size = 0;

  if (!cacheline_size) {
#ifdef _SC_LEVEL1_DCACHE_LINESIZE
		long sz = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
#else
		long sz = 0;
#endif
    if (sz <= 0) return;
    cacheline_size = sz;
  }

  const char* cur = (const char*)base;
  cur += offset;
  uintptr_t lastline = (uintptr_t)(cur + len - 1) | (cacheline_size - 1);
  do {
    _mm_clflush((const void*)cur);
    cur += cacheline_size;
  } while (cur <= (const char*)lastline);
}

}  // namespace rocr

template <uint32_t lowBit, uint32_t highBit, typename T>
static __forceinline uint32_t BitSelect(T p) {
  static_assert(sizeof(T) <= sizeof(uintptr_t), "Type out of range.");
  static_assert(highBit < sizeof(uintptr_t) * 8, "Bit index out of range.");

  uintptr_t ptr = p;
  if (highBit != (sizeof(uintptr_t) * 8 - 1))
    return (uint32_t)((ptr & ((1ull << (highBit + 1)) - 1)) >> lowBit);
  else
    return (uint32_t)(ptr >> lowBit);
}

inline uint32_t PtrLow16Shift8(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFULL) >> 8);
}

inline uint32_t PtrHigh64Shift16(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFFFFFFFFF0000ULL) >> 16);
}

inline uint32_t PtrLow40Shift8(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFFFFFFFULL) >> 8);
}

inline uint32_t PtrHigh64Shift40(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFFF0000000000ULL) >> 40);
}

inline uint32_t PtrLow32(const void* p) {
  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
}

inline uint32_t PtrHigh32(const void* p) {
  uint32_t ptr = 0;
#ifdef HSA_LARGE_MODEL
  ptr = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p) >> 32);
#endif
  return ptr;
}

/// @brief: Concatenates two numbers of type InType to a number of type OutType
/// @param: hi(Input), To be placed in the upper bits of the output
/// @param: lo(Input), To be placed in the lower bits of the output
/// @return: OutType, Concatenation of hi and lo
template <typename OutType, typename InType>
typename std::enable_if<std::is_integral<OutType>::value && std::is_integral<InType>::value &&
                            sizeof(OutType) >= 2 * sizeof(InType),
                        OutType>::type
Concat(InType hi, InType lo) {
  OutType res = ((static_cast<OutType>(hi) << sizeof(InType) * 8) | static_cast<OutType>(lo));
  return res;
}


#include "atomic_helpers.h"

#endif  // HSA_RUNTIME_CORE_UTIL_UTILS_H_


================================================
FILE: runtime/hsa-runtime/core/util/win/os_win.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifdef _WIN32  // Are we compiling for windows?
#define NOMINMAX

#include "core/util/os.h"

#include <algorithm>
#include <process.h>
#include <string>
#include <windows.h>

#include <emmintrin.h>
#include <pmmintrin.h>
#include <xmmintrin.h>

#undef Yield
#undef CreateMutex

namespace rocr {
namespace os {

static_assert(sizeof(LibHandle) == sizeof(HMODULE),
              "OS abstraction size mismatch");
static_assert(sizeof(LibHandle) == sizeof(::HANDLE),
              "OS abstraction size mismatch");
static_assert(sizeof(Semaphore) == sizeof(::HANDLE),
              "OS abstraction size mismatch");
static_assert(sizeof(Mutex) == sizeof(::HANDLE),
              "OS abstraction size mismatch");
static_assert(sizeof(Thread) == sizeof(::HANDLE),
              "OS abstraction size mismatch");
static_assert(sizeof(EventHandle) == sizeof(::HANDLE),
              "OS abstraction size mismatch");

LibHandle LoadLib(std::string filename) {
  HMODULE ret = LoadLibrary(filename.c_str());
  return *(LibHandle*)&ret;
}

void* GetExportAddress(LibHandle lib, std::string export_name) {
  return GetProcAddress(*(HMODULE*)&lib, export_name.c_str());
}

void CloseLib(LibHandle lib) { FreeLibrary(*(::HMODULE*)&lib); }

std::vector<LibHandle> GetLoadedLibs() {
  // Use EnumProcessModulesEx
  static_assert(false, "Not implemented.");
}

std::string GetLibraryName(LibHandle lib) {
  static_assert(false, "Not implemented.");
}

Semaphore CreateSemaphore() {
  sem = static_cast<void*>(CreateSemaphore(NULL, 0, LONG_MAX, NULL));
  assert(sem != NULL && "CreateSemaphore failed");

  return *(Semaphore*)&sem;
}

bool WaitSemaphore(Semaphore sem) {
  return WaitForSingleObject(*(::HANDLE*)&lock, INFINITE) == WAIT_OBJECT_0;
}

void PostSemaphore(Semaphore sem) {
  ReleaseSemaphore(static_cast<HANDLE>(*sem), 1, NULL);
}

void DestroySemaphore(Semaphore sem) {
  if (!CloseHandle(static_cast<HANDLE>(*sem))) {
    assert("CloseHandle() failed");
  }
  *sem = NULL;
}

Mutex CreateMutex() { return CreateEvent(NULL, false, true, NULL); }

bool TryAcquireMutex(Mutex lock) {
  return WaitForSingleObject(*(::HANDLE*)&lock, 0) == WAIT_OBJECT_0;
}

bool AcquireMutex(Mutex lock) {
  return WaitForSingleObject(*(::HANDLE*)&lock, INFINITE) == WAIT_OBJECT_0;
}

void ReleaseMutex(Mutex lock) { SetEvent(*(::HANDLE*)&lock); }

void DestroyMutex(Mutex lock) { CloseHandle(*(::HANDLE*)&lock); }

void Sleep(int delay_in_millisecond) { ::Sleep(delay_in_millisecond); }

void uSleep(int delayInUs) { ::Sleep(delayInUs / 1000); }

void YieldThread() { ::Sleep(0); }

struct ThreadArgs {
  void* entry_args;
  ThreadEntry entry_function;
};

unsigned __stdcall ThreadTrampoline(void* arg) {
  ThreadArgs* thread_args = (ThreadArgs*)arg;
  ThreadEntry entry = thread_args->entry_function;
  void* data = thread_args->entry_args;
  delete thread_args;
  entry(data);
  _endthreadex(0);
  return 0;
}

Thread CreateThread(ThreadEntry entry_function, void* entry_argument,
                    uint stack_size, int priority_unused) {
  ThreadArgs* thread_args = new ThreadArgs();
  thread_args->entry_args = entry_argument;
  thread_args->entry_function = entry_function;
  uintptr_t ret =
      _beginthreadex(NULL, stack_size, ThreadTrampoline, thread_args, 0, NULL);
  return *(Thread*)&ret;
}

void CloseThread(Thread thread) { CloseHandle(*(::HANDLE*)&thread); }

bool WaitForThread(Thread thread) {
  return WaitForSingleObject(*(::HANDLE*)&thread, INFINITE) == WAIT_OBJECT_0;
}

bool WaitForAllThreads(Thread* threads, uint thread_count) {
  return WaitForMultipleObjects(thread_count, threads, TRUE, INFINITE) ==
         WAIT_OBJECT_0;
}

void SetEnvVar(std::string env_var_name, std::string env_var_value) {
  SetEnvironmentVariable(env_var_name.c_str(), env_var_value.c_str());
}

std::string GetEnvVar(std::string env_var_name) {
  char* buff;
  DWORD char_count = GetEnvironmentVariable(env_var_name.c_str(), NULL, 0);
  if (char_count == 0) return "";
  buff = (char*)alloca(sizeof(char) * char_count);
  GetEnvironmentVariable(env_var_name.c_str(), buff, char_count);
  buff[char_count - 1] = '\0';
  std::string ret = buff;
  return ret;
}

size_t GetUserModeVirtualMemorySize() {
  SYSTEM_INFO system_info = {0};
  GetSystemInfo(&system_info);
  return ((size_t)system_info.lpMaximumApplicationAddress + 1);
}

size_t GetUsablePhysicalHostMemorySize() {
  MEMORYSTATUSEX memory_status = {0};
  memory_status.dwLength = sizeof(memory_status);
  if (GlobalMemoryStatusEx(&memory_status) == 0) {
    return 0;
  }

  const size_t physical_size = static_cast<size_t>(memory_status.ullTotalPhys);
  return std::min(GetUserModeVirtualMemorySize(), physical_size);
}

uintptr_t GetUserModeVirtualMemoryBase() { return (uintptr_t)0; }

// Os event wrappers
EventHandle CreateOsEvent(bool auto_reset, bool init_state) {
  EventHandle evt = reinterpret_cast<EventHandle>(
      CreateEvent(NULL, (BOOL)(!auto_reset), (BOOL)init_state, NULL));
  return evt;
}

int DestroyOsEvent(EventHandle event) {
  if (event == NULL) {
    return -1;
  }
  return CloseHandle(reinterpret_cast<::HANDLE>(event));
}

int WaitForOsEvent(EventHandle event, unsigned int milli_seconds) {
  if (event == NULL) {
    return -1;
  }

  int ret_code =
      WaitForSingleObject(reinterpret_cast<::HANDLE>(event), milli_seconds);
  if (ret_code == WAIT_TIMEOUT) {
    ret_code = 0x14003;  // 0x14003 indicates timeout
  }
  return ret_code;
}

int SetOsEvent(EventHandle event) {
  if (event == NULL) {
    return -1;
  }
  return SetEvent(reinterpret_cast<::HANDLE>(event));
}

int ResetOsEvent(EventHandle event) {
  if (event == NULL) {
    return -1;
  }
  return ResetEvent(reinterpret_cast<::HANDLE>(event));
}

uint64_t ReadAccurateClock() {
  uint64_t ret;
  QueryPerformanceCounter((LARGE_INTEGER*)&ret);
  return ret;
}

uint64_t AccurateClockFrequency() {
  uint64_t ret;
  QueryPerformanceFrequency((LARGE_INTEGER*)&ret);
  return ret;
}

SharedMutex CreateSharedMutex() {
  assert(false && "Not implemented.");
  abort();
  return nullptr;
}

bool TryAcquireSharedMutex(SharedMutex lock) {
  assert(false && "Not implemented.");
  abort();
  return false;
}

bool AcquireSharedMutex(SharedMutex lock) {
  assert(false && "Not implemented.");
  abort();
  return false;
}

void ReleaseSharedMutex(SharedMutex lock) {
  assert(false && "Not implemented.");
  abort();
}

bool TrySharedAcquireSharedMutex(SharedMutex lock) {
  assert(false && "Not implemented.");
  abort();
  return false;
}

bool SharedAcquireSharedMutex(SharedMutex lock) {
  assert(false && "Not implemented.");
  abort();
  return false;
}

void SharedReleaseSharedMutex(SharedMutex lock) {
  assert(false && "Not implemented.");
  abort();
}

void DestroySharedMutex(SharedMutex lock) {
  assert(false && "Not implemented.");
  abort();
}

uint64_t ReadSystemClock() {
  assert(false && "Not implemented.");
  abort();
  return 0;
}

uint64_t SystemClockFrequency() {
  assert(false && "Not implemented.");
  abort();
  return 0;
}

bool ParseCpuID(cpuid_t* cpuinfo) {
  assert(false && "Not implemented.");
  abort();
  return false;
}

}   //  namespace os
}   //  namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/hsa-runtime64-config.cmake.in
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

@PACKAGE_INIT@

include( CMakeFindDependencyMacro )

# Client apps only need our private dependencies if rocr is a static lib.
set( _is_hsa_runtime_dynamic @BUILD_SHARED_LIBS@ )
if( NOT _is_hsa_runtime_dynamic )

  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_LIST_DIR}")

  find_dependency(hsakmt 1.0)
  find_dependency(LibElf)

endif()

include( "${CMAKE_CURRENT_LIST_DIR}/@CORE_RUNTIME_NAME@Targets.cmake" )

check_required_components(@CORE_RUNTIME_NAME@)


================================================
FILE: runtime/hsa-runtime/hsacore.so.def
================================================
ROCR_1
{
global:
	hsa_init;
	hsa_shut_down;
	hsa_system_get_info;
	hsa_extension_get_name;
	hsa_system_extension_supported;
	hsa_system_major_extension_supported;
	hsa_system_get_extension_table;
	hsa_system_get_major_extension_table;
	hsa_iterate_agents;
	hsa_agent_get_info;
	hsa_agent_get_exception_policies;
	hsa_cache_get_info;
	hsa_agent_iterate_caches;
	hsa_agent_extension_supported;
	hsa_agent_major_extension_supported;
	hsa_queue_create;
	hsa_soft_queue_create;
	hsa_queue_destroy;
	hsa_queue_inactivate;
	hsa_queue_load_read_index_scacquire;
	hsa_queue_load_read_index_acquire;
	hsa_queue_load_read_index_relaxed;
	hsa_queue_load_write_index_scacquire;
	hsa_queue_load_write_index_acquire;
	hsa_queue_load_write_index_relaxed;
	hsa_queue_store_write_index_relaxed;
	hsa_queue_store_write_index_screlease;
	hsa_queue_store_write_index_release;
	hsa_queue_cas_write_index_scacq_screl;
	hsa_queue_cas_write_index_acq_rel;
	hsa_queue_cas_write_index_scacquire;
	hsa_queue_cas_write_index_acquire;
	hsa_queue_cas_write_index_relaxed;
	hsa_queue_cas_write_index_screlease;
	hsa_queue_cas_write_index_release;
	hsa_queue_add_write_index_scacq_screl;
	hsa_queue_add_write_index_acq_rel;
	hsa_queue_add_write_index_scacquire;
	hsa_queue_add_write_index_acquire;
	hsa_queue_add_write_index_relaxed;
	hsa_queue_add_write_index_screlease;
	hsa_queue_add_write_index_release;
	hsa_queue_store_read_index_relaxed;
	hsa_queue_store_read_index_screlease;
	hsa_queue_store_read_index_release;
	hsa_agent_iterate_regions;
	hsa_region_get_info;
	hsa_memory_register;
	hsa_memory_deregister;
	hsa_memory_allocate;
	hsa_memory_free;
	hsa_memory_copy;
	hsa_memory_assign_agent;
	hsa_signal_create;
	hsa_signal_destroy;
	hsa_signal_load_relaxed;
	hsa_signal_load_scacquire;
	hsa_signal_load_acquire;
	hsa_signal_store_relaxed;
	hsa_signal_store_screlease;
	hsa_signal_store_release;
	hsa_signal_silent_store_relaxed;
	hsa_signal_silent_store_screlease;
	hsa_signal_wait_relaxed;
	hsa_signal_wait_scacquire;
	hsa_signal_wait_acquire;
	hsa_signal_group_create;
	hsa_signal_group_destroy;
	hsa_signal_group_wait_any_scacquire;
	hsa_signal_group_wait_any_relaxed;
	hsa_signal_and_relaxed;
	hsa_signal_and_scacquire;
	hsa_signal_and_acquire;
	hsa_signal_and_screlease;
	hsa_signal_and_release;
	hsa_signal_and_scacq_screl;
	hsa_signal_and_acq_rel;
	hsa_signal_or_relaxed;
	hsa_signal_or_scacquire;
	hsa_signal_or_acquire;
	hsa_signal_or_screlease;
	hsa_signal_or_release;
	hsa_signal_or_scacq_screl;
	hsa_signal_or_acq_rel;
	hsa_signal_xor_relaxed;
	hsa_signal_xor_scacquire;
	hsa_signal_xor_acquire;
	hsa_signal_xor_screlease;
	hsa_signal_xor_release;
	hsa_signal_xor_scacq_screl;
	hsa_signal_xor_acq_rel;
	hsa_signal_exchange_relaxed;
	hsa_signal_exchange_scacquire;
	hsa_signal_exchange_acquire;
	hsa_signal_exchange_screlease;
	hsa_signal_exchange_release;
	hsa_signal_exchange_scacq_screl;
	hsa_signal_exchange_acq_rel;
	hsa_signal_add_relaxed;
	hsa_signal_add_scacquire;
	hsa_signal_add_acquire;
	hsa_signal_add_screlease;
	hsa_signal_add_release;
	hsa_signal_add_scacq_screl;
	hsa_signal_add_acq_rel;
	hsa_signal_subtract_relaxed;
	hsa_signal_subtract_scacquire;
	hsa_signal_subtract_acquire;
	hsa_signal_subtract_screlease;
	hsa_signal_subtract_release;
	hsa_signal_subtract_scacq_screl;
	hsa_signal_subtract_acq_rel;
	hsa_signal_cas_relaxed;
	hsa_signal_cas_scacquire;
	hsa_signal_cas_acquire;
	hsa_signal_cas_screlease;
	hsa_signal_cas_release;
	hsa_signal_cas_scacq_screl;
	hsa_signal_cas_acq_rel;
	hsa_isa_from_name;
	hsa_agent_iterate_isas;
	hsa_isa_get_info;
	hsa_isa_get_info_alt;
	hsa_isa_get_exception_policies;
	hsa_isa_get_round_method;
	hsa_wavefront_get_info;
	hsa_isa_iterate_wavefronts;
	hsa_isa_compatible;
	hsa_code_object_serialize;
	hsa_code_object_deserialize;
	hsa_code_object_destroy;
	hsa_code_object_get_info;
	hsa_code_object_get_symbol;
	hsa_code_object_get_symbol_from_name;
	hsa_code_symbol_get_info;
	hsa_code_object_iterate_symbols;
	hsa_code_object_reader_create_from_file;
	hsa_code_object_reader_create_from_memory;
	hsa_code_object_reader_destroy;
	hsa_executable_create;
	hsa_executable_create_alt;
	hsa_executable_destroy;
	hsa_executable_load_code_object;
	hsa_executable_load_program_code_object;
	hsa_executable_load_agent_code_object;
	hsa_executable_freeze;
	hsa_executable_get_info;
	hsa_executable_global_variable_define;
	hsa_executable_agent_global_variable_define;
	hsa_executable_readonly_variable_define;
	hsa_executable_validate;
	hsa_executable_validate_alt;
	hsa_executable_get_symbol;
	hsa_executable_get_symbol_by_name;
	hsa_executable_symbol_get_info;
	hsa_executable_iterate_symbols;
	hsa_executable_iterate_agent_symbols;
	hsa_executable_iterate_program_symbols;
	hsa_status_string;
	hsa_ext_program_create;
	hsa_ext_program_destroy;
	hsa_ext_program_add_module;
	hsa_ext_program_iterate_modules;
	hsa_ext_program_get_info;
	hsa_ext_program_finalize;
	hsa_amd_coherency_get_type;
	hsa_amd_coherency_set_type;
	hsa_amd_profiling_set_profiler_enabled;
	hsa_amd_profiling_get_dispatch_time;
	hsa_amd_profiling_async_copy_enable;
	hsa_amd_profiling_get_async_copy_time;
	hsa_amd_profiling_convert_tick_to_system_domain;
	hsa_amd_signal_create;
	hsa_amd_signal_wait_any;
	hsa_amd_signal_async_handler;
	hsa_amd_async_function;
	hsa_amd_image_get_info_max_dim;
	hsa_amd_queue_cu_set_mask;
	hsa_amd_queue_cu_get_mask;
	hsa_amd_memory_fill;
	hsa_amd_memory_async_copy;
	hsa_amd_memory_async_copy_on_engine;
	hsa_amd_memory_copy_engine_status;
	hsa_amd_memory_get_preferred_copy_engine;
	hsa_amd_memory_async_copy_rect;
	hsa_amd_memory_lock;
	hsa_amd_memory_lock_to_pool;
	hsa_amd_memory_unlock;
	hsa_amd_agent_iterate_memory_pools;
	hsa_amd_agent_memory_pool_get_info;
	hsa_amd_agents_allow_access;
	hsa_amd_memory_pool_get_info;
	hsa_amd_memory_pool_allocate;
	hsa_amd_memory_pool_free;
	hsa_amd_memory_pool_can_migrate;
	hsa_amd_memory_migrate;
	hsa_amd_interop_map_buffer;
	hsa_amd_interop_unmap_buffer;
	hsa_amd_image_create;
	hsa_ext_image_get_capability;
	hsa_ext_image_data_get_info;
	hsa_ext_image_create;
	hsa_ext_image_import;
	hsa_ext_image_export;
	hsa_ext_image_copy;
	hsa_ext_image_clear;
	hsa_ext_image_destroy;
	hsa_ext_sampler_create;
	hsa_ext_sampler_create_v2;
	hsa_ext_sampler_destroy;
	hsa_ext_image_get_capability_with_layout;
	hsa_ext_image_data_get_info_with_layout;
	hsa_ext_image_create_with_layout;
	hsa_amd_pointer_info;
	hsa_amd_pointer_info_set_userdata;
	hsa_amd_ipc_memory_create;
	hsa_amd_ipc_memory_attach;
	hsa_amd_ipc_memory_detach;
	hsa_amd_ipc_signal_create;
	hsa_amd_ipc_signal_attach;
	hsa_amd_register_system_event_handler;
	hsa_amd_queue_set_priority;
	hsa_amd_register_deallocation_callback;
	hsa_amd_deregister_deallocation_callback;
	hsa_amd_signal_value_pointer;
	_amdgpu_r_debug;
	hsa_amd_svm_attributes_set;
	hsa_amd_svm_attributes_get;
	hsa_amd_svm_prefetch_async;
	hsa_amd_spm_acquire;
	hsa_amd_spm_release;
	hsa_amd_spm_set_dest_buffer;
	hsa_amd_portable_export_dmabuf;
	hsa_amd_portable_close_dmabuf;
	hsa_amd_vmem_address_reserve;
	hsa_amd_vmem_address_reserve_align;
	hsa_amd_vmem_address_free;
	hsa_amd_vmem_handle_create;
	hsa_amd_vmem_handle_release;
	hsa_amd_vmem_map;
	hsa_amd_vmem_unmap;
	hsa_amd_vmem_set_access;
	hsa_amd_vmem_get_access;
	hsa_amd_vmem_export_shareable_handle;
	hsa_amd_vmem_import_shareable_handle;
	hsa_amd_vmem_retain_alloc_handle;
	hsa_amd_vmem_get_alloc_properties_from_handle;
	hsa_amd_agent_set_async_scratch_limit;
	hsa_ven_amd_pcs_iterate_configuration;
	hsa_ven_amd_pcs_create;
	hsa_ven_amd_pcs_create_from_id;
	hsa_ven_amd_pcs_destroy;
	hsa_ven_amd_pcs_start;
	hsa_ven_amd_pcs_stop;
	hsa_ven_amd_pcs_flush;
	hsa_amd_queue_get_info;
	hsa_amd_enable_logging;
	hsa_amd_signal_wait_all;
	hsa_amd_portable_export_dmabuf_v2;
local:
    *;
};


================================================
FILE: runtime/hsa-runtime/hsacore.so.link
================================================
hsa_queue_load_read_index_acquire = hsa_queue_load_read_index_scacquire;
hsa_queue_load_write_index_acquire = hsa_queue_load_write_index_scacquire;
hsa_queue_store_write_index_release = hsa_queue_store_write_index_screlease;
hsa_queue_cas_write_index_acq_rel = hsa_queue_cas_write_index_scacq_screl;
hsa_queue_cas_write_index_acquire = hsa_queue_cas_write_index_scacquire;
hsa_queue_cas_write_index_release = hsa_queue_cas_write_index_screlease;
hsa_queue_add_write_index_acq_rel = hsa_queue_add_write_index_scacq_screl;
hsa_queue_add_write_index_acquire = hsa_queue_add_write_index_scacquire;
hsa_queue_add_write_index_release = hsa_queue_add_write_index_screlease;
hsa_queue_store_read_index_release = hsa_queue_store_read_index_screlease;
hsa_signal_load_acquire = hsa_signal_load_scacquire;
hsa_signal_store_release = hsa_signal_store_screlease;
hsa_signal_wait_acquire = hsa_signal_wait_scacquire;
hsa_signal_and_acquire = hsa_signal_and_scacquire;
hsa_signal_and_release = hsa_signal_and_screlease;
hsa_signal_and_acq_rel = hsa_signal_and_scacq_screl;
hsa_signal_or_acquire = hsa_signal_or_scacquire;
hsa_signal_or_release = hsa_signal_or_screlease;
hsa_signal_or_acq_rel = hsa_signal_or_scacq_screl;
hsa_signal_xor_acquire = hsa_signal_xor_scacquire;
hsa_signal_xor_release = hsa_signal_xor_screlease;
hsa_signal_xor_acq_rel = hsa_signal_xor_scacq_screl;
hsa_signal_exchange_acquire = hsa_signal_exchange_scacquire;
hsa_signal_exchange_release = hsa_signal_exchange_screlease;
hsa_signal_exchange_acq_rel = hsa_signal_exchange_scacq_screl;
hsa_signal_add_acquire = hsa_signal_add_scacquire;
hsa_signal_add_release = hsa_signal_add_screlease;
hsa_signal_add_acq_rel = hsa_signal_add_scacq_screl;
hsa_signal_subtract_acquire = hsa_signal_subtract_scacquire;
hsa_signal_subtract_release = hsa_signal_subtract_screlease;
hsa_signal_subtract_acq_rel = hsa_signal_subtract_scacq_screl;
hsa_signal_cas_acquire = hsa_signal_cas_scacquire;
hsa_signal_cas_release = hsa_signal_cas_screlease;
hsa_signal_cas_acq_rel = hsa_signal_cas_scacq_screl;


================================================
FILE: runtime/hsa-runtime/image/addrlib/inc/addrinterface.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addrinterface.h
* @brief Contains the addrlib interfaces declaration and parameter defines
****************************************************************************************************
*/
#ifndef __ADDR_INTERFACE_H__
#define __ADDR_INTERFACE_H__

// Includes should be before extern "C"
#include "addrtypes.h"

namespace rocr {
#define ADDRLIB_VERSION_MAJOR 8
#define ADDRLIB_VERSION_MINOR 10
#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR)

/// Virtually all interface functions need ADDR_HANDLE as first parameter
typedef VOID*   ADDR_HANDLE;

/// Client handle used in callbacks
typedef VOID*   ADDR_CLIENT_HANDLE;

typedef struct _ADDR_EXTENT3D
{
    UINT_32  width;
    UINT_32  height;
    UINT_32  depth;  // also slices for 2D images
} ADDR_EXTENT3D;

/**
* /////////////////////////////////////////////////////////////////////////////////////////////////
* //                                  Callback functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
*    typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
*         const ADDR_ALLOCSYSMEM_INPUT* pInput);
*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
*         VOID* pVirtAddr);
*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
*         const ADDR_DEBUGPRINT_INPUT* pInput);
*
* /////////////////////////////////////////////////////////////////////////////////////////////////
* //                               Create/Destroy/Config functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
*     AddrCreate()
*     AddrDestroy()
*
* /////////////////////////////////////////////////////////////////////////////////////////////////
* //                                  Surface functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
*     AddrComputeSurfaceInfo()
*     AddrComputeSurfaceAddrFromCoord()
*     AddrComputeSurfaceCoordFromAddr()
*
* /////////////////////////////////////////////////////////////////////////////////////////////////
* //                                   HTile functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
*     AddrComputeHtileInfo()
*     AddrComputeHtileAddrFromCoord()
*     AddrComputeHtileCoordFromAddr()
*
* /////////////////////////////////////////////////////////////////////////////////////////////////
* //                                   C-mask functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
*     AddrComputeCmaskInfo()
*     AddrComputeCmaskAddrFromCoord()
*     AddrComputeCmaskCoordFromAddr()
*
* /////////////////////////////////////////////////////////////////////////////////////////////////
* //                                   F-mask functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
*     AddrComputeFmaskInfo()
*     AddrComputeFmaskAddrFromCoord()
*     AddrComputeFmaskCoordFromAddr()
*
* /////////////////////////////////////////////////////////////////////////////////////////////////
* //                               Element/Utility functions
* /////////////////////////////////////////////////////////////////////////////////////////////////
*     ElemFlt32ToDepthPixel()
*     ElemFlt32ToColorPixel()
*     AddrExtractBankPipeSwizzle()
*     AddrCombineBankPipeSwizzle()
*     AddrComputeSliceSwizzle()
*     AddrConvertTileInfoToHW()
*     AddrConvertTileIndex()
*     AddrConvertTileIndex1()
*     AddrGetTileIndex()
*     AddrComputeBaseSwizzle()
*     AddrUseTileIndex()
*     AddrUseCombinedSwizzle()
*
**/

////////////////////////////////////////////////////////////////////////////////////////////////////
//                                      Callback functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
* @brief channel setting structure
****************************************************************************************************
*/
typedef union _ADDR_CHANNEL_SETTING
{
    struct
    {
        UINT_8 valid   : 1;    ///< Indicate whehter this channel setting is valid
        UINT_8 channel : 2;    ///< 0 for x channel, 1 for y channel, 2 for z channel, 3 for MSAA sample index
        UINT_8 index   : 5;    ///< Channel index
    };
    UINT_8 value;              ///< Value
} ADDR_CHANNEL_SETTING;

/**
****************************************************************************************************
* @brief address equation key structure
****************************************************************************************************
*/
typedef union _ADDR_EQUATION_KEY
{
    struct
    {
        UINT_32 log2ElementBytes : 3; ///< Log2 of Bytes per pixel
        UINT_32 tileMode         : 5; ///< Tile mode
        UINT_32 microTileType    : 3; ///< Micro tile type
        UINT_32 pipeConfig       : 5; ///< pipe config
        UINT_32 numBanksLog2     : 3; ///< Number of banks log2
        UINT_32 bankWidth        : 4; ///< Bank width
        UINT_32 bankHeight       : 4; ///< Bank height
        UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio
        UINT_32 prt              : 1; ///< SI only, indicate whether this equation is for prt
        UINT_32 reserved         : 1; ///< Reserved bit
    } fields;
    UINT_32 value;
} ADDR_EQUATION_KEY;

/**
****************************************************************************************************
* @brief address equation structure
****************************************************************************************************
*/
#define ADDR_MAX_LEGACY_EQUATION_COMP 3u
#define ADDR_MAX_EQUATION_COMP        5u
#define ADDR_MAX_EQUATION_BIT         20u

// Invalid equation index
#define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF

typedef struct _ADDR_EQUATION
{
    union
    {
        struct {
            ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT];  ///< addr setting
            ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT];  ///< xor setting
            ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT];  ///< xor2 setting
            ADDR_CHANNEL_SETTING xor3[ADDR_MAX_EQUATION_BIT];  ///< xor3 setting
            ADDR_CHANNEL_SETTING xor4[ADDR_MAX_EQUATION_BIT];  ///< xor4 setting
        };
        ///< Components showing the sources of each bit; each bit is result of addr ^ xor ^ xor2...
        ADDR_CHANNEL_SETTING comps[ADDR_MAX_EQUATION_COMP][ADDR_MAX_EQUATION_BIT];
    };
    UINT_32              numBits;                      ///< The number of bits in equation
    UINT_32              numBitComponents;             ///< The max number of channels contributing to a bit
    BOOL_32              stackedDepthSlices;           ///< TRUE if depth slices are treated as being
                                                       ///< stacked vertically prior to swizzling
} ADDR_EQUATION;


/**
****************************************************************************************************
* @brief Alloc system memory flags.
* @note These flags are reserved for future use and if flags are added will minimize the impact
*       of the client.
****************************************************************************************************
*/
typedef union _ADDR_ALLOCSYSMEM_FLAGS
{
    struct
    {
        UINT_32 reserved    : 32;  ///< Reserved for future use.
    } fields;
    UINT_32 value;

} ADDR_ALLOCSYSMEM_FLAGS;

/**
****************************************************************************************************
* @brief Alloc system memory input structure
****************************************************************************************************
*/
typedef struct _ADDR_ALLOCSYSMEM_INPUT
{
    UINT_32                 size;           ///< Size of this structure in bytes

    ADDR_ALLOCSYSMEM_FLAGS  flags;          ///< System memory flags.
    UINT_32                 sizeInBytes;    ///< System memory allocation size in bytes.
    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
} ADDR_ALLOCSYSMEM_INPUT;

/**
****************************************************************************************************
* ADDR_ALLOCSYSMEM
*   @brief
*       Allocate system memory callback function. Returns valid pointer on success.
****************************************************************************************************
*/
typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
    const ADDR_ALLOCSYSMEM_INPUT* pInput);

/**
****************************************************************************************************
* @brief Free system memory input structure
****************************************************************************************************
*/
typedef struct _ADDR_FREESYSMEM_INPUT
{
    UINT_32                 size;           ///< Size of this structure in bytes

    VOID*                   pVirtAddr;      ///< Virtual address
    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
} ADDR_FREESYSMEM_INPUT;

/**
****************************************************************************************************
* ADDR_FREESYSMEM
*   @brief
*       Free system memory callback function.
*       Returns ADDR_OK on success.
****************************************************************************************************
*/
typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
    const ADDR_FREESYSMEM_INPUT* pInput);

/**
****************************************************************************************************
* @brief Print debug message input structure
****************************************************************************************************
*/
typedef struct _ADDR_DEBUGPRINT_INPUT
{
    UINT_32             size;           ///< Size of this structure in bytes

    CHAR*               pDebugString;   ///< Debug print string
    va_list             ap;             ///< Variable argument list
    ADDR_CLIENT_HANDLE  hClient;        ///< Client handle
} ADDR_DEBUGPRINT_INPUT;

/**
****************************************************************************************************
* ADDR_DEBUGPRINT
*   @brief
*       Print debug message callback function.
*       Returns ADDR_OK on success.
****************************************************************************************************
*/
typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
    const ADDR_DEBUGPRINT_INPUT* pInput);

/**
****************************************************************************************************
* ADDR_CALLBACKS
*
*   @brief
*       Address Library needs client to provide system memory alloc/free routines.
****************************************************************************************************
*/
typedef struct _ADDR_CALLBACKS
{
    ADDR_ALLOCSYSMEM allocSysMem;   ///< Routine to allocate system memory
    ADDR_FREESYSMEM  freeSysMem;    ///< Routine to free system memory
    ADDR_DEBUGPRINT  debugPrint;    ///< Routine to print debug message
} ADDR_CALLBACKS;

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Create/Destroy functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
* ADDR_CREATE_FLAGS
*
*   @brief
*       This structure is used to pass some setup in creation of AddrLib
*   @note
****************************************************************************************************
*/
typedef union _ADDR_CREATE_FLAGS
{
    struct
    {
        UINT_32 noCubeMipSlicesPad     : 1;    ///< Turn cubemap faces padding off
        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
                                               ///  output structure
        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined tile swizzle
        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
        UINT_32 forceDccAndTcCompat    : 1;    ///< Force enable DCC and TC compatibility
        UINT_32 nonPower2MemConfig     : 1;    ///< Video memory bit width is not power of 2
        UINT_32 enableAltTiling        : 1;    ///< Enable alt tile mode
        UINT_32 reserved               : 22;   ///< Reserved bits for future use
    };

    UINT_32 value;
} ADDR_CREATE_FLAGS;

/**
****************************************************************************************************
*   ADDR_REGISTER_VALUE
*
*   @brief
*       Data from registers to setup AddrLib global data, used in AddrCreate
****************************************************************************************************
*/
typedef struct _ADDR_REGISTER_VALUE
{
    UINT_32  gbAddrConfig;       ///< For R8xx, use GB_ADDR_CONFIG register value.
                                 ///  For R6xx/R7xx, use GB_TILING_CONFIG.
                                 ///  But they can be treated as the same.
                                 ///  if this value is 0, use chip to set default value
    UINT_32  backendDisables;    ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled.
                                 ///  Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE

                                 ///  R800 registers-----------------------------------------------
    UINT_32  noOfBanks;          ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK
                                 ///  No enums for this value in h/w header files
                                 ///  0: 4
                                 ///  1: 8
                                 ///  2: 16
    UINT_32  noOfRanks;          ///  MC_ARB_RAMCFG.NOOFRANK
                                 ///  0: 1
                                 ///  1: 2
                                 ///  SI (R1000) registers-----------------------------------------
    const UINT_32* pTileConfig;  ///< Global tile setting tables
    UINT_32  noOfEntries;        ///< Number of entries in pTileConfig

                                 ///< CI registers-------------------------------------------------
    const UINT_32* pMacroTileConfig;    ///< Global macro tile mode table
    UINT_32  noOfMacroEntries;   ///< Number of entries in pMacroTileConfig
} ADDR_REGISTER_VALUE;

/**
****************************************************************************************************
* ADDR_CREATE_INPUT
*
*   @brief
*       Parameters use to create an AddrLib Object. Caller must provide all fields.
*
****************************************************************************************************
*/
typedef struct _ADDR_CREATE_INPUT
{
    UINT_32             size;                ///< Size of this structure in bytes

    UINT_32             chipEngine;          ///< Chip Engine
    UINT_32             chipFamily;          ///< Chip Family
    UINT_32             chipRevision;        ///< Chip Revision
    ADDR_CALLBACKS      callbacks;           ///< Callbacks for sysmem alloc/free/print
    ADDR_CREATE_FLAGS   createFlags;         ///< Flags to setup AddrLib
    ADDR_REGISTER_VALUE regValue;            ///< Data from registers to setup AddrLib global data
    ADDR_CLIENT_HANDLE  hClient;             ///< Client handle
    UINT_32             minPitchAlignPixels; ///< Minimum pitch alignment in pixels
} ADDR_CREATE_INPUT;

/**
****************************************************************************************************
* ADDR_CREATEINFO_OUTPUT
*
*   @brief
*       Return AddrLib handle to client driver
*
****************************************************************************************************
*/
typedef struct _ADDR_CREATE_OUTPUT
{
    UINT_32              size;            ///< Size of this structure in bytes

    ADDR_HANDLE          hLib;            ///< Address lib handle

    UINT_32              numEquations;    ///< Number of equations in the table
    const ADDR_EQUATION* pEquationTable;  ///< Pointer to the equation table
} ADDR_CREATE_OUTPUT;

/**
****************************************************************************************************
*   AddrCreate
*
*   @brief
*       Create AddrLib object, must be called before any interface calls
*
*   @return
*       ADDR_OK if successful
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrCreate(
    const ADDR_CREATE_INPUT*    pAddrCreateIn,
    ADDR_CREATE_OUTPUT*         pAddrCreateOut);


/**
****************************************************************************************************
*   AddrDestroy
*
*   @brief
*       Destroy AddrLib object, must be called to free internally allocated resources.
*
*   @return
*      ADDR_OK if successful
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrDestroy(
    ADDR_HANDLE hLib);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                    Surface functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
* @brief
*       Bank/tiling parameters. On function input, these can be set as desired or
*       left 0 for AddrLib to calculate/default. On function output, these are the actual
*       parameters used.
* @note
*       Valid bankWidth/bankHeight value:
*       1,2,4,8. They are factors instead of pixels or bytes.
*
*       The bank number remains constant across each row of the
*       macro tile as each pipe is selected, so the number of
*       tiles in the x direction with the same bank number will
*       be bank_width * num_pipes.
****************************************************************************************************
*/
typedef struct _ADDR_TILEINFO
{
    ///  Any of these parameters can be set to 0 to use the HW default.
    UINT_32     banks;              ///< Number of banks, numerical value
    UINT_32     bankWidth;          ///< Number of tiles in the X direction in the same bank
    UINT_32     bankHeight;         ///< Number of tiles in the Y direction in the same bank
    UINT_32     macroAspectRatio;   ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1
    UINT_32     tileSplitBytes;     ///< Tile split size, in bytes
    AddrPipeCfg pipeConfig;         ///< Pipe Config = HW enum + 1
} ADDR_TILEINFO;

// Create a define to avoid client change. The removal of R800 is because we plan to implement SI
// within 800 HWL - An AddrPipeCfg is added in above data structure
typedef ADDR_TILEINFO ADDR_R800_TILEINFO;

/**
****************************************************************************************************
* @brief
*       Information needed by quad buffer stereo support
****************************************************************************************************
*/
typedef struct _ADDR_QBSTEREOINFO
{
    UINT_32         eyeHeight;          ///< Height (in pixel rows) to right eye
    UINT_32         rightOffset;        ///< Offset (in bytes) to right eye
    UINT_32         rightSwizzle;       ///< TileSwizzle for right eyes
} ADDR_QBSTEREOINFO;

/**
****************************************************************************************************
*   ADDR_SURFACE_FLAGS
*
*   @brief
*       Surface flags
****************************************************************************************************
*/
typedef union _ADDR_SURFACE_FLAGS
{
    struct
    {
        UINT_32 color                : 1; ///< Flag indicates this is a color buffer
        UINT_32 depth                : 1; ///< Flag indicates this is a depth/stencil buffer
        UINT_32 stencil              : 1; ///< Flag indicates this is a stencil buffer
        UINT_32 texture              : 1; ///< Flag indicates this is a texture
        UINT_32 cube                 : 1; ///< Flag indicates this is a cubemap
        UINT_32 volume               : 1; ///< Flag indicates this is a volume texture
        UINT_32 fmask                : 1; ///< Flag indicates this is an fmask
        UINT_32 cubeAsArray          : 1; ///< Flag indicates if treat cubemap as arrays
        UINT_32 compressZ            : 1; ///< Flag indicates z buffer is compressed
        UINT_32 overlay              : 1; ///< Flag indicates this is an overlay surface
        UINT_32 noStencil            : 1; ///< Flag indicates this depth has no separate stencil
        UINT_32 display              : 1; ///< Flag indicates this should match display controller req.
        UINT_32 opt4Space            : 1; ///< Flag indicates this surface should be optimized for space
                                          ///  i.e. save some memory but may lose performance
        UINT_32 prt                  : 1; ///< Flag for partially resident texture
        UINT_32 qbStereo             : 1; ///< Quad buffer stereo surface
        UINT_32 pow2Pad              : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
        UINT_32 interleaved          : 1; ///< Special flag for interleaved YUV surface padding
        UINT_32 tcCompatible         : 1; ///< Flag indicates surface needs to be shader readable
        UINT_32 dispTileType         : 1; ///< NI: force display Tiling for 128 bit shared resoruce
        UINT_32 dccCompatible        : 1; ///< VI: whether to make MSAA surface support dcc fast clear
        UINT_32 dccPipeWorkaround    : 1; ///< VI: whether to workaround the HW limit that
                                          ///  dcc can't be enabled if pipe config of tile mode
                                          ///  is different from that of ASIC, this flag
                                          ///  is address lib internal flag, client should ignore it
        UINT_32 czDispCompatible     : 1; ///< SI+: CZ family has a HW bug needs special alignment.
                                          ///  This flag indicates we need to follow the
                                          ///  alignment with CZ families or other ASICs under
                                          ///  PX configuration + CZ.
        UINT_32 nonSplit             : 1; ///< CI: depth texture should not be split
        UINT_32 disableLinearOpt     : 1; ///< Disable tile mode optimization to linear
        UINT_32 needEquation         : 1; ///< Make the surface tile setting equation compatible.
                                          ///  This flag indicates we need to override tile
                                          ///  mode to PRT_* tile mode to disable slice rotation,
                                          ///  which is needed by swizzle pattern equation.
        UINT_32 skipIndicesOutput    : 1; ///< Skipping indices in output.
        UINT_32 rotateDisplay        : 1; ///< Rotate micro tile type
        UINT_32 minimizeAlignment    : 1; ///< Minimize alignment
        UINT_32 preferEquation       : 1; ///< Return equation index without adjusting tile mode
        UINT_32 matchStencilTileCfg  : 1; ///< Select tile index of stencil as well as depth surface
                                          ///  to make sure they share same tile config parameters
        UINT_32 disallowLargeThickDegrade   : 1;    ///< Disallow large thick tile degrade
        UINT_32 reserved             : 1; ///< Reserved bits
    };

    UINT_32 value;
} ADDR_SURFACE_FLAGS;

/**
****************************************************************************************************
*   ADDR_COMPUTE_SURFACE_INFO_INPUT
*
*   @brief
*       Input structure for AddrComputeSurfaceInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes

    AddrTileMode        tileMode;           ///< Tile mode
    AddrFormat          format;             ///< If format is set to valid one, bpp/width/height
                                            ///  might be overwritten
    UINT_32             bpp;                ///< Bits per pixel
    UINT_32             numSamples;         ///< Number of samples
    UINT_32             width;              ///< Width, in pixels
    UINT_32             height;             ///< Height, in pixels
    UINT_32             numSlices;          ///< Number of surface slices or depth
    UINT_32             slice;              ///< Slice index
    UINT_32             mipLevel;           ///< Current mipmap level
    UINT_32             numMipLevels;       ///< Number of mips in mip chain
    ADDR_SURFACE_FLAGS  flags;              ///< Surface type flags
    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
                                            ///  number of samples for normal AA; Set it to the
                                            ///  number of fragments for EQAA
    /// r800 and later HWL parameters
    // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's
    ADDR_TILEINFO*      pTileInfo;          ///< 2D tile parameters. Set to 0 to default/calculate
    AddrTileType        tileType;           ///< Micro tiling type, not needed when tileIndex != -1
    INT_32              tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
                                            ///  while the global useTileIndex is set to 1
    UINT_32             basePitch;          ///< Base level pitch in pixels, 0 means ignored, is a
                                            ///  must for mip levels from SI+.
                                            ///  Don't use pitch in blocks for compressed formats!
    UINT_32             maxBaseAlign;       ///< Max base alignment request from client
    UINT_32             pitchAlign;         ///< Pitch alignment request from client
    UINT_32             heightAlign;        ///< Height alignment request from client
} ADDR_COMPUTE_SURFACE_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_SURFACE_INFO_OUTPUT
*
*   @brief
*       Output structure for AddrComputeSurfInfo
*   @note
        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
        Pixel: Original pixel
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT
{
    UINT_32         size;           ///< Size of this structure in bytes

    UINT_32         pitch;          ///< Pitch in elements (in blocks for compressed formats)
    UINT_32         height;         ///< Height in elements (in blocks for compressed formats)
    UINT_32         depth;          ///< Number of slice/depth
    UINT_64         surfSize;       ///< Surface size in bytes
    AddrTileMode    tileMode;       ///< Actual tile mode. May differ from that in input
    UINT_32         baseAlign;      ///< Base address alignment
    UINT_32         pitchAlign;     ///< Pitch alignment, in elements
    UINT_32         heightAlign;    ///< Height alignment, in elements
    UINT_32         depthAlign;     ///< Depth alignment, aligned to thickness, for 3d texture
    UINT_32         bpp;            ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit)
    UINT_32         pixelPitch;     ///< Pitch in original pixels
    UINT_32         pixelHeight;    ///< Height in original pixels
    UINT_32         pixelBits;      ///< Original bits per pixel, passed from input
    UINT_64         sliceSize;      ///< Size of slice specified by input's slice
                                    ///  The result is controlled by surface flags & createFlags
                                    ///  By default this value equals to surfSize for volume
    UINT_32         pitchTileMax;   ///< PITCH_TILE_MAX value for h/w register
    UINT_32         heightTileMax;  ///< HEIGHT_TILE_MAX value for h/w register
    UINT_32         sliceTileMax;   ///< SLICE_TILE_MAX value for h/w register

    UINT_32         numSamples;     ///< Pass the effective numSamples processed in this call

    /// r800 and later HWL parameters
    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Filled in if 0 on input
    AddrTileType    tileType;       ///< Micro tiling type, only valid when tileIndex != -1
    INT_32          tileIndex;      ///< Tile index, MAY be "downgraded"

    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
    /// Output flags
    struct
    {
        /// Special information to work around SI mipmap swizzle bug UBTS #317508
        UINT_32     last2DLevel  : 1;  ///< TRUE if this is the last 2D(3D) tiled
                                       ///< Only meaningful when create flag checkLast2DLevel is set
        UINT_32     tcCompatible : 1;  ///< If the surface can be shader compatible
        UINT_32     dccUnsupport : 1;  ///< If the surface can support DCC compressed rendering
        UINT_32     prtTileIndex : 1;  ///< SI only, indicate the returned tile index is for PRT
                                       ///< If address lib return true for mip 0, client should set prt flag
                                       ///< for child mips in subsequent compute surface info calls
        UINT_32     reserved     :28;  ///< Reserved bits
    };

    UINT_32         equationIndex;     ///< Equation index in the equation table;

    UINT_32         blockWidth;        ///< Width in element inside one block(1D->Micro, 2D->Macro)
    UINT_32         blockHeight;       ///< Height in element inside one block(1D->Micro, 2D->Macro)
    UINT_32         blockSlices;       ///< Slice number inside one block(1D->Micro, 2D->Macro)

    /// Stereo info
    ADDR_QBSTEREOINFO*  pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE

    INT_32          stencilTileIdx; ///< stencil tile index output when matchStencilTileCfg was set
} ADDR_COMPUTE_SURFACE_INFO_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeSurfaceInfo
*
*   @brief
*       Compute surface width/height/depth/alignments and suitable tiling mode
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for AddrComputeSurfaceAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    UINT_32         x;                  ///< X coordinate
    UINT_32         y;                  ///< Y coordinate
    UINT_32         slice;              ///< Slice index
    UINT_32         sample;             ///< Sample index, use fragment index for EQAA

    UINT_32         bpp;                ///< Bits per pixel
    UINT_32         pitch;              ///< Surface pitch, in pixels
    UINT_32         height;             ///< Surface height, in pixels
    UINT_32         numSlices;          ///< Surface depth
    UINT_32         numSamples;         ///< Number of samples

    AddrTileMode    tileMode;           ///< Tile mode
    BOOL_32         isDepth;            ///< TRUE if the surface uses depth sample ordering within
                                        ///  micro tile. Textures can also choose depth sample order
    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
                                        ///  the case that components are stored separately
    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)

    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
                                        ///  number of samples for normal AA; Set it to the
                                        ///  number of fragments for EQAA
    /// r800 and later HWL parameters
    // Used for 1D tiling above
    AddrTileType    tileType;           ///< See defintion of AddrTileType
    struct
    {
        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
                                        ///  only flag. Only non-RT texture can set this to TRUE
        UINT_32     reserved :31;       ///< Reserved for future use.
    };
    // 2D tiling needs following structure
    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    union
    {
        struct
        {
            UINT_32  bankSwizzle;       ///< Bank swizzle
            UINT_32  pipeSwizzle;       ///< Pipe swizzle
        };
        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
    };
} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for AddrComputeSurfaceAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_64 addr;           ///< Byte address
    UINT_32 bitPosition;    ///< Bit position within surfaceAddr, 0-7.
                            ///  For surface bpp < 8, e.g. FMT_1.
    UINT_32 prtBlockIndex;  ///< Index of a PRT tile (64K block)
} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeSurfaceAddrFromCoord
*
*   @brief
*       Compute surface address from a given coordinate.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for AddrComputeSurfaceCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    UINT_64         addr;               ///< Address in bytes
    UINT_32         bitPosition;        ///< Bit position in addr. 0-7. for surface bpp < 8,
                                        ///  e.g. FMT_1;
    UINT_32         bpp;                ///< Bits per pixel
    UINT_32         pitch;              ///< Pitch, in pixels
    UINT_32         height;             ///< Height in pixels
    UINT_32         numSlices;          ///< Surface depth
    UINT_32         numSamples;         ///< Number of samples

    AddrTileMode    tileMode;           ///< Tile mode
    BOOL_32         isDepth;            ///< Surface uses depth sample ordering within micro tile.
                                        ///  Note: Textures can choose depth sample order as well.
    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
                                        ///  the case that components are stored separately
    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)

    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
                                        ///  number of samples for normal AA; Set it to the
                                        ///  number of fragments for EQAA
    /// r800 and later HWL parameters
    // Used for 1D tiling above
    AddrTileType    tileType;           ///< See defintion of AddrTileType
    struct
    {
        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
                                        ///  only flag. Only non-RT texture can set this to TRUE
        UINT_32     reserved :31;       ///< Reserved for future use.
    };
    // 2D tiling needs following structure
    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    union
    {
        struct
        {
            UINT_32  bankSwizzle;       ///< Bank swizzle
            UINT_32  pipeSwizzle;       ///< Pipe swizzle
        };
        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
    };
} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for AddrComputeSurfaceCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
{
    UINT_32 size;   ///< Size of this structure in bytes

    UINT_32 x;      ///< X coordinate
    UINT_32 y;      ///< Y coordinate
    UINT_32 slice;  ///< Index of slices
    UINT_32 sample; ///< Index of samples, means fragment index for EQAA
} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeSurfaceCoordFromAddr
*
*   @brief
*       Compute coordinate from a given surface address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut);

////////////////////////////////////////////////////////////////////////////////////////////////////
//                                   HTile functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR_HTILE_FLAGS
*
*   @brief
*       HTILE flags
****************************************************************************************************
*/
typedef union _ADDR_HTILE_FLAGS
{
    struct
    {
        UINT_32 tcCompatible          : 1;  ///< Flag indicates surface needs to be shader readable
        UINT_32 skipTcCompatSizeAlign : 1;  ///< Flag indicates that addrLib will not align htile
                                            ///  size to 256xBankxPipe when computing tc-compatible
                                            ///  htile info.
        UINT_32 reserved              : 30; ///< Reserved bits
    };

    UINT_32 value;
} ADDR_HTILE_FLAGS;

/**
****************************************************************************************************
*   ADDR_COMPUTE_HTILE_INFO_INPUT
*
*   @brief
*       Input structure of AddrComputeHtileInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT
{
    UINT_32            size;            ///< Size of this structure in bytes

    ADDR_HTILE_FLAGS   flags;           ///< HTILE flags
    UINT_32            pitch;           ///< Surface pitch, in pixels
    UINT_32            height;          ///< Surface height, in pixels
    UINT_32            numSlices;       ///< Number of slices
    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
    AddrHtileBlockSize blockWidth;      ///< 4 or 8. EG above only support 8
    AddrHtileBlockSize blockHeight;     ///< 4 or 8. EG above only support 8
    ADDR_TILEINFO*     pTileInfo;       ///< Tile info

    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
                                        ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_HTILE_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_HTILE_INFO_OUTPUT
*
*   @brief
*       Output structure of AddrComputeHtileInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT
{
    UINT_32 size;               ///< Size of this structure in bytes

    UINT_32 pitch;              ///< Pitch in pixels of depth buffer represented in this
                                ///  HTile buffer. This might be larger than original depth
                                ///  buffer pitch when called with an unaligned pitch.
    UINT_32 height;             ///< Height in pixels, as above
    UINT_64 htileBytes;         ///< Size of HTILE buffer, in bytes
    UINT_32 baseAlign;          ///< Base alignment
    UINT_32 bpp;                ///< Bits per pixel for HTILE is how many bits for an 8x8 block!
    UINT_32 macroWidth;         ///< Macro width in pixels, actually squared cache shape
    UINT_32 macroHeight;        ///< Macro height in pixels
    UINT_64 sliceSize;          ///< Slice size, in bytes.
    BOOL_32 sliceInterleaved;   ///< Flag to indicate if different slice's htile is interleaved
                                ///  Compute engine clear can't be used if htile is interleaved
    BOOL_32 nextMipLevelCompressible;   ///< Flag to indicate whether HTILE can be enabled in
                                        ///  next mip level, it also indicates if memory set based
                                        ///  fast clear can be used for current mip level.
} ADDR_COMPUTE_HTILE_INFO_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeHtileInfo
*
*   @brief
*       Compute Htile pitch, height, base alignment and size in bytes
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,
    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for AddrComputeHtileAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
{
    UINT_32            size;            ///< Size of this structure in bytes

    UINT_32            pitch;           ///< Pitch, in pixels
    UINT_32            height;          ///< Height in pixels
    UINT_32            x;               ///< X coordinate
    UINT_32            y;               ///< Y coordinate
    UINT_32            slice;           ///< Index of slice
    UINT_32            numSlices;       ///< Number of slices
    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
    ADDR_HTILE_FLAGS   flags;           ///< htile flags
    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
    ADDR_TILEINFO*     pTileInfo;       ///< Tile info

    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
                                        ///< README: When tileIndex is not -1, this must be valid
    UINT_32            bpp;             ///< depth/stencil buffer bit per pixel size
    UINT_32            zStencilAddr;    ///< tcCompatible Z/Stencil surface address
} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for AddrComputeHtileAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_64 addr;           ///< Address in bytes
    UINT_32 bitPosition;    ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method.
                            ///  So we keep bitPosition for HTILE as well
} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeHtileAddrFromCoord
*
*   @brief
*       Compute Htile address according to coordinates (of depth buffer)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,
    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for AddrComputeHtileCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
{
    UINT_32            size;            ///< Size of this structure in bytes

    UINT_64            addr;            ///< Address
    UINT_32            bitPosition;     ///< Bit position 0 or 4. CMASK and HTILE share some methods
                                        ///  so we keep bitPosition for HTILE as well
    UINT_32            pitch;           ///< Pitch, in pixels
    UINT_32            height;          ///< Height, in pixels
    UINT_32            numSlices;       ///< Number of slices
    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
    ADDR_TILEINFO*     pTileInfo;       ///< Tile info

    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
                                        ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for AddrComputeHtileCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
{
    UINT_32 size;   ///< Size of this structure in bytes

    UINT_32 x;      ///< X coordinate
    UINT_32 y;      ///< Y coordinate
    UINT_32 slice;  ///< Slice index
} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeHtileCoordFromAddr
*
*   @brief
*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
*       Htile address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,
    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     C-mask functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR_CMASK_FLAGS
*
*   @brief
*       CMASK flags
****************************************************************************************************
*/
typedef union _ADDR_CMASK_FLAGS
{
    struct
    {
        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
        UINT_32 reserved      :31; ///< Reserved bits
    };

    UINT_32 value;
} ADDR_CMASK_FLAGS;

/**
****************************************************************************************************
*   ADDR_COMPUTE_CMASK_INFO_INPUT
*
*   @brief
*       Input structure of AddrComputeCmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT
{
    UINT_32             size;            ///< Size of this structure in bytes

    ADDR_CMASK_FLAGS    flags;           ///< CMASK flags
    UINT_32             pitch;           ///< Pitch, in pixels, of color buffer
    UINT_32             height;          ///< Height, in pixels, of color buffer
    UINT_32             numSlices;       ///< Number of slices, of color buffer
    BOOL_32             isLinear;        ///< Linear or tiled layout, Only SI can be linear
    ADDR_TILEINFO*      pTileInfo;       ///< Tile info

    INT_32              tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
                                         ///  while the global useTileIndex is set to 1
    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
                                         ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_CMASK_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_CMASK_INFO_OUTPUT
*
*   @brief
*       Output structure of AddrComputeCmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_32 pitch;          ///< Pitch in pixels of color buffer which
                            ///  this Cmask matches. The size might be larger than
                            ///  original color buffer pitch when called with
                            ///  an unaligned pitch.
    UINT_32 height;         ///< Height in pixels, as above
    UINT_64 cmaskBytes;     ///< Size in bytes of CMask buffer
    UINT_32 baseAlign;      ///< Base alignment
    UINT_32 blockMax;       ///< Cmask block size. Need this to set CB_COLORn_MASK register
    UINT_32 macroWidth;     ///< Macro width in pixels, actually squared cache shape
    UINT_32 macroHeight;    ///< Macro height in pixels
    UINT_64 sliceSize;      ///< Slice size, in bytes.
} ADDR_COMPUTE_CMASK_INFO_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeCmaskInfo
*
*   @brief
*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
*       info
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,
    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for AddrComputeCmaskAddrFromCoord
*
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
{
    UINT_32          size;           ///< Size of this structure in bytes
    UINT_32          x;              ///< X coordinate
    UINT_32          y;              ///< Y coordinate
    UINT_64          fmaskAddr;      ///< Fmask addr for tc compatible Cmask
    UINT_32          slice;          ///< Slice index
    UINT_32          pitch;          ///< Pitch in pixels, of color buffer
    UINT_32          height;         ///< Height in pixels, of color buffer
    UINT_32          numSlices;      ///< Number of slices
    UINT_32          bpp;
    BOOL_32          isLinear;       ///< Linear or tiled layout, Only SI can be linear
    ADDR_CMASK_FLAGS flags;          ///< CMASK flags
    ADDR_TILEINFO*   pTileInfo;      ///< Tile info

    INT_32           tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
                                     ///< while the global useTileIndex is set to 1
    INT_32           macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
                                     ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for AddrComputeCmaskAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_64 addr;           ///< CMASK address in bytes
    UINT_32 bitPosition;    ///< Bit position within addr, 0-7. CMASK is 4 bpp,
                            ///  so the address may be located in bit 0 (0) or 4 (4)
} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeCmaskAddrFromCoord
*
*   @brief
*       Compute Cmask address according to coordinates (of MSAA color buffer)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for AddrComputeCmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
{
    UINT_32        size;            ///< Size of this structure in bytes

    UINT_64        addr;            ///< CMASK address in bytes
    UINT_32        bitPosition;     ///< Bit position within addr, 0-7. CMASK is 4 bpp,
                                    ///  so the address may be located in bit 0 (0) or 4 (4)
    UINT_32        pitch;           ///< Pitch, in pixels
    UINT_32        height;          ///< Height in pixels
    UINT_32        numSlices;       ///< Number of slices
    BOOL_32        isLinear;        ///< Linear or tiled layout, Only SI can be linear
    ADDR_TILEINFO* pTileInfo;       ///< Tile info

    INT_32         tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
                                    ///  while the global useTileIndex is set to 1
    INT_32         macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
                                    ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for AddrComputeCmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
{
    UINT_32 size;   ///< Size of this structure in bytes

    UINT_32 x;      ///< X coordinate
    UINT_32 y;      ///< Y coordinate
    UINT_32 slice;  ///< Slice index
} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeCmaskCoordFromAddr
*
*   @brief
*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
*       Cmask address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,
    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     F-mask functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR_COMPUTE_FMASK_INFO_INPUT
*
*   @brief
*       Input structure for AddrComputeFmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    AddrTileMode    tileMode;           ///< Tile mode
    UINT_32         pitch;              ///< Surface pitch, in pixels
    UINT_32         height;             ///< Surface height, in pixels
    UINT_32         numSlices;          ///< Number of slice/depth
    UINT_32         numSamples;         ///< Number of samples
    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
                                        ///  number of samples for normal AA; Set it to the
                                        ///  number of fragments for EQAA
    /// r800 and later HWL parameters
    struct
    {
        UINT_32 resolved:   1;          ///< TRUE if the surface is for resolved fmask, only used
                                        ///  by H/W clients. S/W should always set it to FALSE.
        UINT_32 reserved:  31;          ///< Reserved for future use.
    };
    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Clients must give valid data
    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
} ADDR_COMPUTE_FMASK_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_FMASK_INFO_OUTPUT
*
*   @brief
*       Output structure for AddrComputeFmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT
{
    UINT_32         size;           ///< Size of this structure in bytes

    UINT_32         pitch;          ///< Pitch of fmask in pixels
    UINT_32         height;         ///< Height of fmask in pixels
    UINT_32         numSlices;      ///< Slices of fmask
    UINT_64         fmaskBytes;     ///< Size of fmask in bytes
    UINT_32         baseAlign;      ///< Base address alignment
    UINT_32         pitchAlign;     ///< Pitch alignment
    UINT_32         heightAlign;    ///< Height alignment
    UINT_32         bpp;            ///< Bits per pixel of FMASK is: number of bit planes
    UINT_32         numSamples;     ///< Number of samples, used for dump, export this since input
                                    ///  may be changed in 9xx and above
    /// r800 and later HWL parameters
    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Fmask can have different
                                    ///  bank_height from color buffer
    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
                                    ///  while the global useTileIndex is set to 1
    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
    UINT_64         sliceSize;      ///< Size of slice in bytes
} ADDR_COMPUTE_FMASK_INFO_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeFmaskInfo
*
*   @brief
*       Compute Fmask pitch/height/depth/alignments and size in bytes
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,
    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for AddrComputeFmaskAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    UINT_32         x;                  ///< X coordinate
    UINT_32         y;                  ///< Y coordinate
    UINT_32         slice;              ///< Slice index
    UINT_32         plane;              ///< Plane number
    UINT_32         sample;             ///< Sample index (fragment index for EQAA)

    UINT_32         pitch;              ///< Surface pitch, in pixels
    UINT_32         height;             ///< Surface height, in pixels
    UINT_32         numSamples;         ///< Number of samples
    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
                                        ///  number of samples for normal AA; Set it to the
                                        ///  number of fragments for EQAA

    AddrTileMode    tileMode;           ///< Tile mode
    union
    {
        struct
        {
            UINT_32  bankSwizzle;       ///< Bank swizzle
            UINT_32  pipeSwizzle;       ///< Pipe swizzle
        };
        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
    };

    /// r800 and later HWL parameters
    struct
    {
        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by H/W clients
        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
        UINT_32 reserved:  30;          ///< Reserved for future use.
    };
    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Client must provide all data

} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for AddrComputeFmaskAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_64 addr;           ///< Fmask address
    UINT_32 bitPosition;    ///< Bit position within fmaskAddr, 0-7.
} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeFmaskAddrFromCoord
*
*   @brief
*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,
    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for AddrComputeFmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    UINT_64         addr;               ///< Address
    UINT_32         bitPosition;        ///< Bit position within addr, 0-7.

    UINT_32         pitch;              ///< Pitch, in pixels
    UINT_32         height;             ///< Height in pixels
    UINT_32         numSamples;         ///< Number of samples
    UINT_32         numFrags;           ///< Number of fragments
    AddrTileMode    tileMode;           ///< Tile mode
    union
    {
        struct
        {
            UINT_32  bankSwizzle;       ///< Bank swizzle
            UINT_32  pipeSwizzle;       ///< Pipe swizzle
        };
        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
    };

    /// r800 and later HWL parameters
    struct
    {
        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by HW components
        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
        UINT_32 reserved:  30;          ///< Reserved for future use.
    };
    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data

} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for AddrComputeFmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
{
    UINT_32 size;       ///< Size of this structure in bytes

    UINT_32 x;          ///< X coordinate
    UINT_32 y;          ///< Y coordinate
    UINT_32 slice;      ///< Slice index
    UINT_32 plane;      ///< Plane number
    UINT_32 sample;     ///< Sample index (fragment index for EQAA)
} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeFmaskCoordFromAddr
*
*   @brief
*       Compute FMASK coordinate from an given address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
    ADDR_HANDLE                                     hLib,
    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,
    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                          Element/utility functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrGetVersion
*
*   @brief
*       Get AddrLib version number
****************************************************************************************************
*/
UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);

/**
****************************************************************************************************
*   AddrUseTileIndex
*
*   @brief
*       Return TRUE if tileIndex is enabled in this address library
****************************************************************************************************
*/
BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib);

/**
****************************************************************************************************
*   AddrUseCombinedSwizzle
*
*   @brief
*       Return TRUE if combined swizzle is enabled in this address library
****************************************************************************************************
*/
BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib);

/**
****************************************************************************************************
*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
*
*   @brief
*       Input structure of AddrExtractBankPipeSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
{
    UINT_32         size;           ///< Size of this structure in bytes

    UINT_32         base256b;       ///< Base256b value

    /// r800 and later HWL parameters
    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data

    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
                                    ///  while the global useTileIndex is set to 1
    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
                                    ///< README: When tileIndex is not -1, this must be valid
} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT;

/**
****************************************************************************************************
*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
*
*   @brief
*       Output structure of AddrExtractBankPipeSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_32 bankSwizzle;    ///< Bank swizzle
    UINT_32 pipeSwizzle;    ///< Pipe swizzle
} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT;

/**
****************************************************************************************************
*   AddrExtractBankPipeSwizzle
*
*   @brief
*       Extract Bank and Pipe swizzle from base256b
*   @return
*       ADDR_OK if no error
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
    ADDR_HANDLE                                 hLib,
    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,
    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut);


/**
****************************************************************************************************
*   ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
*
*   @brief
*       Input structure of AddrCombineBankPipeSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
{
    UINT_32         size;           ///< Size of this structure in bytes

    UINT_32         bankSwizzle;    ///< Bank swizzle
    UINT_32         pipeSwizzle;    ///< Pipe swizzle
    UINT_64         baseAddr;       ///< Base address (leave it zero for driver clients)

    /// r800 and later HWL parameters
    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data

    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
                                    ///  while the global useTileIndex is set to 1
    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
                                    ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT;

/**
****************************************************************************************************
*   ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
*
*   @brief
*       Output structure of AddrCombineBankPipeSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_32 tileSwizzle;    ///< Combined swizzle
} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT;

/**
****************************************************************************************************
*   AddrCombineBankPipeSwizzle
*
*   @brief
*       Combine Bank and Pipe swizzle
*   @return
*       ADDR_OK if no error
*   @note
*       baseAddr here is full MCAddress instead of base256b
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
    ADDR_HANDLE                                 hLib,
    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut);


/**
****************************************************************************************************
*   ADDR_COMPUTE_SLICESWIZZLE_INPUT
*
*   @brief
*       Input structure of AddrComputeSliceSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    AddrTileMode    tileMode;           ///< Tile Mode
    UINT_32         baseSwizzle;        ///< Base tile swizzle
    UINT_32         slice;              ///< Slice index
    UINT_64         baseAddr;           ///< Base address, driver should leave it 0 in most cases

    /// r800 and later HWL parameters
    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Actually banks needed here!

    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
                                        ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_SLICESWIZZLE_INPUT;


/**
****************************************************************************************************
*   ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
*
*   @brief
*       Output structure of AddrComputeSliceSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
{
    UINT_32  size;           ///< Size of this structure in bytes

    UINT_32  tileSwizzle;    ///< Recalculated tileSwizzle value
} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeSliceSwizzle
*
*   @brief
*       Extract Bank and Pipe swizzle from base256b
*   @return
*       ADDR_OK if no error
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut);


/**
****************************************************************************************************
*   AddrSwizzleGenOption
*
*   @brief
*       Which swizzle generating options: legacy or linear
****************************************************************************************************
*/
typedef enum _AddrSwizzleGenOption
{
    ADDR_SWIZZLE_GEN_DEFAULT    = 0,    ///< As is in client driver implemention for swizzle
    ADDR_SWIZZLE_GEN_LINEAR     = 1,    ///< Using a linear increment of swizzle
} AddrSwizzleGenOption;

/**
****************************************************************************************************
*   AddrBlockType
*
*   @brief
*       Macro define resource block type
****************************************************************************************************
*/
typedef enum
{
    AddrBlockLinear = 0, // Resource uses linear swizzle mode
    AddrBlockMicro = 1, // Resource uses 256B block
    AddrBlockThin4KB = 2, // Resource uses thin 4KB block
    AddrBlockThick4KB = 3, // Resource uses thick 4KB block
    AddrBlockThin64KB = 4, // Resource uses thin 64KB block
    AddrBlockThick64KB = 5, // Resource uses thick 64KB block
    AddrBlockThinVar = 6, // Resource uses thin var block
    AddrBlockThickVar = 7, // Resource uses thick var block
    AddrBlockMaxTiledType,

    AddrBlockThin256KB = AddrBlockThinVar,
    AddrBlockThick256KB = AddrBlockThickVar,
} AddrBlockType;

/**
****************************************************************************************************
*   AddrSwizzleOption
*
*   @brief
*       Controls how swizzle is generated
****************************************************************************************************
*/
typedef union _ADDR_SWIZZLE_OPTION
{
    struct
    {
        UINT_32 genOption       : 1;    ///< The way swizzle is generated, see AddrSwizzleGenOption
        UINT_32 reduceBankBit   : 1;    ///< TRUE if we need reduce swizzle bits
        UINT_32 reserved        :30;    ///< Reserved bits
    };

    UINT_32 value;

} ADDR_SWIZZLE_OPTION;

/**
****************************************************************************************************
*   ADDR_COMPUTE_BASE_SWIZZLE_INPUT
*
*   @brief
*       Input structure of AddrComputeBaseSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT
{
    UINT_32             size;           ///< Size of this structure in bytes

    ADDR_SWIZZLE_OPTION option;         ///< Swizzle option
    UINT_32             surfIndex;      ///< Index of this surface type
    AddrTileMode        tileMode;       ///< Tile Mode

    /// r800 and later HWL parameters
    ADDR_TILEINFO*      pTileInfo;      ///< 2D tile parameters. Actually banks needed here!

    INT_32              tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    INT_32              macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
                                        ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_BASE_SWIZZLE_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
*
*   @brief
*       Output structure of AddrComputeBaseSwizzle
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_32 tileSwizzle;    ///< Combined swizzle
} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeBaseSwizzle
*
*   @brief
*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
*   @return
*       ADDR_OK if no error
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut);


/**
****************************************************************************************************
*   ELEM_GETEXPORTNORM_INPUT
*
*   @brief
*       Input structure for ElemGetExportNorm
*
****************************************************************************************************
*/
typedef struct _ELEM_GETEXPORTNORM_INPUT
{
    UINT_32             size;       ///< Size of this structure in bytes

    AddrColorFormat     format;     ///< Color buffer format; Client should use ColorFormat
    AddrSurfaceNumber   num;        ///< Surface number type; Client should use NumberType
    AddrSurfaceSwap     swap;       ///< Surface swap byte swap; Client should use SurfaceSwap
    UINT_32             numSamples; ///< Number of samples
} ELEM_GETEXPORTNORM_INPUT;

/**
****************************************************************************************************
*  ElemGetExportNorm
*
*   @brief
*       Helper function to check one format can be EXPORT_NUM, which is a register
*       CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600
*       family
*   @note
*       The implementation is only for r600.
*       00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two
*       clocks per export)
*       01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one
*       clock per export)
*
****************************************************************************************************
*/
BOOL_32 ADDR_API ElemGetExportNorm(
    ADDR_HANDLE                     hLib,
    const ELEM_GETEXPORTNORM_INPUT* pIn);


/**
****************************************************************************************************
*   ELEM_FLT32TODEPTHPIXEL_INPUT
*
*   @brief
*       Input structure for addrFlt32ToDepthPixel
*
****************************************************************************************************
*/
typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT
{
    UINT_32         size;           ///< Size of this structure in bytes

    AddrDepthFormat format;         ///< Depth buffer format
    ADDR_FLT_32     comps[2];       ///< Component values (Z/stencil)
} ELEM_FLT32TODEPTHPIXEL_INPUT;

/**
****************************************************************************************************
*   ELEM_FLT32TODEPTHPIXEL_INPUT
*
*   @brief
*       Output structure for ElemFlt32ToDepthPixel
*
****************************************************************************************************
*/
typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT
{
    UINT_32 size;           ///< Size of this structure in bytes

    UINT_8* pPixel;         ///< Real depth value. Same data type as depth buffer.
                            ///  Client must provide enough storage for this type.
    UINT_32 depthBase;      ///< Tile base in bits for depth bits
    UINT_32 stencilBase;    ///< Tile base in bits for stencil bits
    UINT_32 depthBits;      ///< Bits for depth
    UINT_32 stencilBits;    ///< Bits for stencil
} ELEM_FLT32TODEPTHPIXEL_OUTPUT;

/**
****************************************************************************************************
*   ElemFlt32ToDepthPixel
*
*   @brief
*       Convert a FLT_32 value to a depth/stencil pixel value
*
*   @return
*       Return code
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
    ADDR_HANDLE                         hLib,
    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut);


/**
****************************************************************************************************
*   ELEM_FLT32TOCOLORPIXEL_INPUT
*
*   @brief
*       Input structure for addrFlt32ToColorPixel
*
****************************************************************************************************
*/
typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT
{
    UINT_32            size;           ///< Size of this structure in bytes

    AddrColorFormat    format;         ///< Color buffer format
    AddrSurfaceNumber  surfNum;        ///< Surface number
    AddrSurfaceSwap    surfSwap;       ///< Surface swap
    ADDR_FLT_32        comps[4];       ///< Component values (r/g/b/a)
} ELEM_FLT32TOCOLORPIXEL_INPUT;

/**
****************************************************************************************************
*   ELEM_FLT32TOCOLORPIXEL_INPUT
*
*   @brief
*       Output structure for ElemFlt32ToColorPixel
*
****************************************************************************************************
*/
typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT
{
    UINT_32 size;       ///< Size of this structure in bytes

    UINT_8* pPixel;     ///< Real color value. Same data type as color buffer.
                        ///  Client must provide enough storage for this type.
} ELEM_FLT32TOCOLORPIXEL_OUTPUT;

/**
****************************************************************************************************
*   ElemFlt32ToColorPixel
*
*   @brief
*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
*
*   @return
*       Return code
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
    ADDR_HANDLE                         hLib,
    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ElemSize
*
*   @brief
*       Get bits-per-element for specified format
*
*   @return
*       Bits-per-element of specified format
*
****************************************************************************************************
*/
UINT_32 ADDR_API ElemSize(
    ADDR_HANDLE hLib,
    AddrFormat  format);

/**
****************************************************************************************************
*   ADDR_CONVERT_TILEINFOTOHW_INPUT
*
*   @brief
*       Input structure for AddrConvertTileInfoToHW
*   @note
*       When reverse is TRUE, indices are igonred
****************************************************************************************************
*/
typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes
    BOOL_32         reverse;            ///< Convert control flag.
                                        ///  FALSE: convert from real value to HW value;
                                        ///  TRUE: convert from HW value to real value.

    /// r800 and later HWL parameters
    ADDR_TILEINFO*  pTileInfo;          ///< Tile parameters with real value

    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
                                        ///  while the global useTileIndex is set to 1
    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
                                        ///< README: When tileIndex is not -1, this must be valid
    UINT_32         bpp;                ///< Bits per pixel
} ADDR_CONVERT_TILEINFOTOHW_INPUT;

/**
****************************************************************************************************
*   ADDR_CONVERT_TILEINFOTOHW_OUTPUT
*
*   @brief
*       Output structure for AddrConvertTileInfoToHW
****************************************************************************************************
*/
typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes

    /// r800 and later HWL parameters
    ADDR_TILEINFO*      pTileInfo;          ///< Tile parameters with hardware register value

} ADDR_CONVERT_TILEINFOTOHW_OUTPUT;

/**
****************************************************************************************************
*   AddrConvertTileInfoToHW
*
*   @brief
*       Convert tile info from real value to hardware register value
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
    ADDR_HANDLE                             hLib,
    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,
    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut);


/**
****************************************************************************************************
*   ADDR_CONVERT_TILEINDEX_INPUT
*
*   @brief
*       Input structure for AddrConvertTileIndex
****************************************************************************************************
*/
typedef struct _ADDR_CONVERT_TILEINDEX_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    INT_32          tileIndex;          ///< Tile index
    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
    UINT_32         bpp;                ///< Bits per pixel
    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
} ADDR_CONVERT_TILEINDEX_INPUT;

/**
****************************************************************************************************
*   ADDR_CONVERT_TILEINDEX_OUTPUT
*
*   @brief
*       Output structure for AddrConvertTileIndex
****************************************************************************************************
*/
typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT
{
    UINT_32             size;           ///< Size of this structure in bytes

    AddrTileMode        tileMode;       ///< Tile mode
    AddrTileType        tileType;       ///< Tile type
    ADDR_TILEINFO*      pTileInfo;      ///< Tile info

} ADDR_CONVERT_TILEINDEX_OUTPUT;

/**
****************************************************************************************************
*   AddrConvertTileIndex
*
*   @brief
*       Convert tile index to tile mode/type/info
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
    ADDR_HANDLE                         hLib,
    const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
    ADDR_CONVERT_TILEINDEX_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR_GET_MACROMODEINDEX_INPUT
*
*   @brief
*       Input structure for AddrGetMacroModeIndex
****************************************************************************************************
*/
typedef struct _ADDR_GET_MACROMODEINDEX_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    ADDR_SURFACE_FLAGS  flags;              ///< Surface flag
    INT_32              tileIndex;          ///< Tile index
    UINT_32             bpp;                ///< Bits per pixel
    UINT_32             numFrags;           ///< Number of color fragments
} ADDR_GET_MACROMODEINDEX_INPUT;

/**
****************************************************************************************************
*   ADDR_GET_MACROMODEINDEX_OUTPUT
*
*   @brief
*       Output structure for AddrGetMacroModeIndex
****************************************************************************************************
*/
typedef struct _ADDR_GET_MACROMODEINDEX_OUTPUT
{
    UINT_32             size;            ///< Size of this structure in bytes
    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
} ADDR_GET_MACROMODEINDEX_OUTPUT;

/**
****************************************************************************************************
*   AddrGetMacroModeIndex
*
*   @brief
*       Get macro mode index based on input parameters
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex(
    ADDR_HANDLE                          hLib,
    const ADDR_GET_MACROMODEINDEX_INPUT* pIn,
    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR_CONVERT_TILEINDEX1_INPUT
*
*   @brief
*       Input structure for AddrConvertTileIndex1 (without macro mode index)
****************************************************************************************************
*/
typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT
{
    UINT_32         size;               ///< Size of this structure in bytes

    INT_32          tileIndex;          ///< Tile index
    UINT_32         bpp;                ///< Bits per pixel
    UINT_32         numSamples;         ///< Number of samples
    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
} ADDR_CONVERT_TILEINDEX1_INPUT;

/**
****************************************************************************************************
*   AddrConvertTileIndex1
*
*   @brief
*       Convert tile index to tile mode/type/info
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
    ADDR_HANDLE                             hLib,
    const ADDR_CONVERT_TILEINDEX1_INPUT*    pIn,
    ADDR_CONVERT_TILEINDEX_OUTPUT*          pOut);


/**
****************************************************************************************************
*   ADDR_GET_TILEINDEX_INPUT
*
*   @brief
*       Input structure for AddrGetTileIndex
****************************************************************************************************
*/
typedef struct _ADDR_GET_TILEINDEX_INPUT
{
    UINT_32         size;           ///< Size of this structure in bytes

    AddrTileMode    tileMode;       ///< Tile mode
    AddrTileType    tileType;       ///< Tile-type: disp/non-disp/...
    ADDR_TILEINFO*  pTileInfo;      ///< Pointer to tile-info structure, can be NULL for linear/1D
} ADDR_GET_TILEINDEX_INPUT;

/**
****************************************************************************************************
*   ADDR_GET_TILEINDEX_OUTPUT
*
*   @brief
*       Output structure for AddrGetTileIndex
****************************************************************************************************
*/
typedef struct _ADDR_GET_TILEINDEX_OUTPUT
{
    UINT_32         size;           ///< Size of this structure in bytes

    INT_32          index;          ///< index in table
} ADDR_GET_TILEINDEX_OUTPUT;

/**
****************************************************************************************************
*   AddrGetTileIndex
*
*   @brief
*       Get the tiling mode index in table
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
    ADDR_HANDLE                     hLib,
    const ADDR_GET_TILEINDEX_INPUT* pIn,
    ADDR_GET_TILEINDEX_OUTPUT*      pOut);


/**
****************************************************************************************************
*   ADDR_PRT_INFO_INPUT
*
*   @brief
*       Input structure for AddrComputePrtInfo
****************************************************************************************************
*/
typedef struct _ADDR_PRT_INFO_INPUT
{
    AddrFormat          format;        ///< Surface format
    UINT_32             baseMipWidth;  ///< Base mipmap width
    UINT_32             baseMipHeight; ///< Base mipmap height
    UINT_32             baseMipDepth;  ///< Base mipmap depth
    UINT_32             numFrags;      ///< Number of fragments,
} ADDR_PRT_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR_PRT_INFO_OUTPUT
*
*   @brief
*       Input structure for AddrComputePrtInfo
****************************************************************************************************
*/
typedef struct _ADDR_PRT_INFO_OUTPUT
{
    UINT_32             prtTileWidth;
    UINT_32             prtTileHeight;
} ADDR_PRT_INFO_OUTPUT;

/**
****************************************************************************************************
*   AddrComputePrtInfo
*
*   @brief
*       Compute prt surface related information
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
    ADDR_HANDLE                 hLib,
    const ADDR_PRT_INFO_INPUT*  pIn,
    ADDR_PRT_INFO_OUTPUT*       pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     DCC key functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   _ADDR_COMPUTE_DCCINFO_INPUT
*
*   @brief
*       Input structure of AddrComputeDccInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_DCCINFO_INPUT
{
    UINT_32             size;            ///< Size of this structure in bytes
    UINT_32             bpp;             ///< BitPP of color surface
    UINT_32             numSamples;      ///< Sample number of color surface
    UINT_64             colorSurfSize;   ///< Size of color surface to which dcc key is bound
    AddrTileMode        tileMode;        ///< Tile mode of color surface
    ADDR_TILEINFO       tileInfo;        ///< Tile info of color surface
    UINT_32             tileSwizzle;     ///< Tile swizzle
    INT_32              tileIndex;       ///< Tile index of color surface,
                                         ///< MUST be -1 if you don't want to use it
                                         ///< while the global useTileIndex is set to 1
    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
                                         ///< README: When tileIndex is not -1, this must be valid
} ADDR_COMPUTE_DCCINFO_INPUT;

/**
****************************************************************************************************
*   ADDR_COMPUTE_DCCINFO_OUTPUT
*
*   @brief
*       Output structure of AddrComputeDccInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
{
    UINT_32 size;                 ///< Size of this structure in bytes
    UINT_32 dccRamBaseAlign;      ///< Base alignment of dcc key
    UINT_64 dccRamSize;           ///< Size of dcc key
    UINT_64 dccFastClearSize;     ///< Size of dcc key portion that can be fast cleared
    BOOL_32 subLvlCompressible;   ///< Whether sub resource is compressiable
    BOOL_32 dccRamSizeAligned;    ///< Whether the dcc key size is aligned
} ADDR_COMPUTE_DCCINFO_OUTPUT;

/**
****************************************************************************************************
*   AddrComputeDccInfo
*
*   @brief
*       Compute DCC key size, base alignment
*       info
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,
    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut);


/**
****************************************************************************************************
*   ADDR_GET_MAX_ALIGNMENTS_OUTPUT
*
*   @brief
*       Output structure of AddrGetMaxAlignments
****************************************************************************************************
*/
typedef struct ADDR_GET_MAX_ALINGMENTS_OUTPUT
{
    UINT_32 size;                   ///< Size of this structure in bytes
    UINT_32 baseAlign;              ///< Maximum base alignment in bytes
} ADDR_GET_MAX_ALIGNMENTS_OUTPUT;

/**
****************************************************************************************************
*   AddrGetMaxAlignments
*
*   @brief
*       Gets maximnum alignments
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
    ADDR_HANDLE                     hLib,
    ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut);

/**
****************************************************************************************************
*   AddrGetMaxMetaAlignments
*
*   @brief
*       Gets maximnum alignments for metadata
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
    ADDR_HANDLE                     hLib,
    ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut);

/**
****************************************************************************************************
*                                Address library interface version 2
*                                    available from Gfx9 hardware
****************************************************************************************************
*     Addr2ComputeSurfaceInfo()
*     Addr2ComputeSurfaceAddrFromCoord()
*     Addr2ComputeSurfaceCoordFromAddr()

*     Addr2ComputeHtileInfo()
*     Addr2ComputeHtileAddrFromCoord()
*     Addr2ComputeHtileCoordFromAddr()
*
*     Addr2ComputeCmaskInfo()
*     Addr2ComputeCmaskAddrFromCoord()
*     Addr2ComputeCmaskCoordFromAddr()
*
*     Addr2ComputeFmaskInfo()
*     Addr2ComputeFmaskAddrFromCoord()
*     Addr2ComputeFmaskCoordFromAddr()
*
*     Addr2ComputeDccInfo()
*
**/


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                    Surface functions for Gfx9
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR2_SURFACE_FLAGS
*
*   @brief
*       Surface flags
****************************************************************************************************
*/
typedef union _ADDR2_SURFACE_FLAGS
{
    struct
    {
        UINT_32 color             :  1; ///< This resource is a color buffer, can be used with RTV
        UINT_32 depth             :  1; ///< Thie resource is a depth buffer, can be used with DSV
        UINT_32 stencil           :  1; ///< Thie resource is a stencil buffer, can be used with DSV
        UINT_32 fmask             :  1; ///< This is an fmask surface
        UINT_32 overlay           :  1; ///< This is an overlay surface
        UINT_32 display           :  1; ///< This resource is displable, can be used with DRV
        UINT_32 prt               :  1; ///< This is a partially resident texture
        UINT_32 qbStereo          :  1; ///< This is a quad buffer stereo surface
        UINT_32 interleaved       :  1; ///< Special flag for interleaved YUV surface padding
        UINT_32 texture           :  1; ///< This resource can be used with SRV
        UINT_32 unordered         :  1; ///< This resource can be used with UAV
        UINT_32 rotated           :  1; ///< This resource is rotated and displable
        UINT_32 needEquation      :  1; ///< This resource needs equation to be generated if possible
        UINT_32 opt4space         :  1; ///< This resource should be optimized for space
        UINT_32 minimizeAlign     :  1; ///< This resource should use minimum alignment
        UINT_32 noMetadata        :  1; ///< This resource has no metadata
        UINT_32 metaRbUnaligned   :  1; ///< This resource has rb unaligned metadata
        UINT_32 metaPipeUnaligned :  1; ///< This resource has pipe unaligned metadata
        UINT_32 view3dAs2dArray   :  1; ///< This resource is a 3D resource viewed as 2D array
        UINT_32 allowExtEquation  :  1; ///< If unset, only legacy DX eqs are allowed (2 XORs)
        UINT_32 reserved          : 12; ///< Reserved bits
    };

    UINT_32 value;
} ADDR2_SURFACE_FLAGS;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SURFACE_INFO_INPUT
*
*   @brief
*       Input structure for Addr2ComputeSurfaceInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SURFACE_INFO_INPUT
{
    UINT_32               size;              ///< Size of this structure in bytes

    ADDR2_SURFACE_FLAGS   flags;             ///< Surface flags
    AddrSwizzleMode       swizzleMode;       ///< Swizzle Mode for Gfx9
    AddrResourceType      resourceType;      ///< Surface type
    AddrFormat            format;            ///< Surface format
    UINT_32               bpp;               ///< bits per pixel
    UINT_32               width;             ///< Width (of mip0), in pixels
    UINT_32               height;            ///< Height (of mip0), in pixels
    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
    UINT_32               numMipLevels;      ///< Total mipmap levels.
    UINT_32               numSamples;        ///< Number of samples
    UINT_32               numFrags;          ///< Number of fragments, leave it zero or the same as
                                             ///  number of samples for normal AA; Set it to the
                                             ///  number of fragments for EQAA
    UINT_32               pitchInElement;    ///< Pitch in elements (blocks for compressed formats)
    UINT_32               sliceAlign;        ///< Required slice size in bytes
} ADDR2_COMPUTE_SURFACE_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR2_MIP_INFO
*
*   @brief
*       Structure that contains information for mip level
*
****************************************************************************************************
*/
typedef struct _ADDR2_MIP_INFO
{
    UINT_32             pitch;              ///< Pitch in elements
    UINT_32             height;             ///< Padded height in elements
    UINT_32             depth;              ///< Padded depth
    UINT_32             pixelPitch;         ///< Pitch in pixels
    UINT_32             pixelHeight;        ///< Padded height in pixels
    UINT_32             equationIndex;      ///< Equation index in the equation table
    UINT_64             offset;             ///< Offset in bytes from mip base, should only be used
                                            ///< to setup vam surface descriptor, can't be used
                                            ///< to setup swizzle pattern
    UINT_64             macroBlockOffset;   ///< macro block offset in bytes from mip base
    UINT_32             mipTailOffset;      ///< mip tail offset in bytes
    UINT_32             mipTailCoordX;      ///< mip tail coord x
    UINT_32             mipTailCoordY;      ///< mip tail coord y
    UINT_32             mipTailCoordZ;      ///< mip tail coord z
} ADDR2_MIP_INFO;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeSurfInfo
*   @note
        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
        Pixel: Original pixel
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
{
    UINT_32             size;                 ///< Size of this structure in bytes

    UINT_32             pitch;                ///< Pitch in elements (blocks for compressed formats)
    UINT_32             height;               ///< Padded height (of mip0) in elements
    UINT_32             numSlices;            ///< Padded depth for 3d resource
                                              ///< or padded number of slices for 2d array resource
    UINT_32             mipChainPitch;        ///< Pitch (of total mip chain) in elements
    UINT_32             mipChainHeight;       ///< Padded height (of total mip chain) in elements
    UINT_32             mipChainSlice;        ///< Padded depth (of total mip chain)
    UINT_64             sliceSize;            ///< Slice (total mip chain) size in bytes
    UINT_64             surfSize;             ///< Surface (total mip chain) size in bytes
    UINT_32             baseAlign;            ///< Base address alignment
    UINT_32             bpp;                  ///< Bits per elements
                                              ///  (e.g. blocks for BCn, 1/3 for 96bit)
    UINT_32             pixelMipChainPitch;   ///< Mip chain pitch in original pixels
    UINT_32             pixelMipChainHeight;  ///< Mip chain height in original pixels
    UINT_32             pixelPitch;           ///< Pitch in original pixels
    UINT_32             pixelHeight;          ///< Height in original pixels
    UINT_32             pixelBits;            ///< Original bits per pixel, passed from input

    UINT_32             blockWidth;           ///< Width in element inside one block
    UINT_32             blockHeight;          ///< Height in element inside one block
    UINT_32             blockSlices;          ///< Slice number inside one block
                                              ///< Prt tile is one block, its width/height/slice
                                              ///< equals to blcok width/height/slice

    BOOL_32             epitchIsHeight;       ///< Whether to use height to program epitch register
    /// Stereo info
    ADDR_QBSTEREOINFO*  pStereoInfo;          ///< Stereo info, needed if qbStereo flag is TRUE
    /// Mip info
    ADDR2_MIP_INFO*     pMipInfo;             ///< Pointer to mip information array
                                              ///  if it is not NULL, the array is assumed to
                                              ///  contain numMipLevels entries

    UINT_32             equationIndex;        ///< Equation index in the equation table of mip0
    BOOL_32             mipChainInTail;       ///< If whole mipchain falls into mip tail block
    UINT_32             firstMipIdInTail;     ///< The id of first mip in tail, if there is no mip
                                              ///  in tail, it will be set to number of mip levels
} ADDR2_COMPUTE_SURFACE_INFO_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeSurfaceInfo
*
*   @brief
*       Compute surface width/height/slices/alignments and suitable tiling mode
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo(
    ADDR_HANDLE                                hLib,
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*    pIn,
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for Addr2ComputeSurfaceAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
{
    UINT_32             size;            ///< Size of this structure in bytes

    UINT_32             x;               ///< X coordinate
    UINT_32             y;               ///< Y coordinate
    UINT_32             slice;           ///< Slice index
    UINT_32             sample;          ///< Sample index, use fragment index for EQAA
    UINT_32             mipId;           ///< the mip ID in mip chain

    AddrSwizzleMode     swizzleMode;     ///< Swizzle mode for Gfx9
    ADDR2_SURFACE_FLAGS flags;           ///< Surface flags
    AddrResourceType    resourceType;    ///< Surface type
    UINT_32             bpp;             ///< Bits per pixel
    UINT_32             unalignedWidth;  ///< Surface original width (of mip0)
    UINT_32             unalignedHeight; ///< Surface original height (of mip0)
    UINT_32             numSlices;       ///< Surface original slices (of mip0)
    UINT_32             numMipLevels;    ///< Total mipmap levels
    UINT_32             numSamples;      ///< Number of samples
    UINT_32             numFrags;        ///< Number of fragments, leave it zero or the same as
                                         ///  number of samples for normal AA; Set it to the
                                         ///  number of fragments for EQAA

    UINT_32             pipeBankXor;     ///< Combined swizzle used to do bank/pipe rotation
    UINT_32             pitchInElement;  ///< Pitch in elements (blocks for compressed formats)
} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeSurfaceAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
{
    UINT_32    size;             ///< Size of this structure in bytes

    UINT_64    addr;             ///< Byte offset from the image starting address
    UINT_32    bitPosition;      ///< Bit position within surfaceAddr, 0-7.
                                 ///  For surface bpp < 8, e.g. FMT_1.
    UINT_32    prtBlockIndex;    ///< Index of a PRT tile (64K block)
} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeSurfaceAddrFromCoord
*
*   @brief
*       Compute surface address from a given coordinate.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
    ADDR_HANDLE                                         hLib,
    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,
    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for Addr2ComputeSurfaceCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT
{
    UINT_32             size;            ///< Size of this structure in bytes

    UINT_64             addr;            ///< Address in bytes
    UINT_32             bitPosition;     ///< Bit position in addr. 0-7. for surface bpp < 8,
                                         ///  e.g. FMT_1;

    AddrSwizzleMode     swizzleMode;     ///< Swizzle mode for Gfx9
    ADDR2_SURFACE_FLAGS flags;           ///< Surface flags
    AddrResourceType    resourceType;    ///< Surface type
    UINT_32             bpp;             ///< Bits per pixel
    UINT_32             unalignedWidth;  ///< Surface original width (of mip0)
    UINT_32             unalignedHeight; ///< Surface original height (of mip0)
    UINT_32             numSlices;       ///< Surface original slices (of mip0)
    UINT_32             numMipLevels;    ///< Total mipmap levels.
    UINT_32             numSamples;      ///< Number of samples
    UINT_32             numFrags;        ///< Number of fragments, leave it zero or the same as
                                         ///  number of samples for normal AA; Set it to the
                                         ///  number of fragments for EQAA

    UINT_32             pipeBankXor;     ///< Combined swizzle used to do bank/pipe rotation
    UINT_32             pitchInElement;  ///< Pitch in elements (blocks for compressed formats)
} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeSurfaceCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
{
    UINT_32    size;       ///< Size of this structure in bytes

    UINT_32    x;          ///< X coordinate
    UINT_32    y;          ///< Y coordinate
    UINT_32    slice;      ///< Index of slices
    UINT_32    sample;     ///< Index of samples, means fragment index for EQAA
    UINT_32    mipId;      ///< mipmap level id
} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeSurfaceCoordFromAddr
*
*   @brief
*       Compute coordinate from a given surface address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr(
    ADDR_HANDLE                                         hLib,
    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT*    pIn,
    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*         pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                   HTile functions for Gfx9
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR2_META_FLAGS
*
*   @brief
*       Metadata flags
****************************************************************************************************
*/
typedef union _ADDR2_META_FLAGS
{
    struct
    {
        UINT_32 pipeAligned :  1;    ///< if Metadata being pipe aligned
        UINT_32 rbAligned   :  1;    ///< if Metadata being RB aligned
        UINT_32 linear      :  1;    ///< if Metadata linear, GFX9 does not suppord this!
        UINT_32 reserved    : 29;    ///< Reserved bits
    };

    UINT_32 value;
} ADDR2_META_FLAGS;

/**
****************************************************************************************************
*   ADDR2_META_MIP_INFO
*
*   @brief
*       Structure to store per mip metadata information
****************************************************************************************************
*/
typedef struct _ADDR2_META_MIP_INFO
{
    BOOL_32    inMiptail;
    union
    {
        struct
        {
            UINT_32    startX;
            UINT_32    startY;
            UINT_32    startZ;
            UINT_32    width;
            UINT_32    height;
            UINT_32    depth;
        };

        // GFX10
        struct
        {
            UINT_32    offset;      ///< Metadata offset within one slice,
                                    ///  the thickness of a slice is meta block depth.
            UINT_32    sliceSize;   ///< Metadata size within one slice,
                                    ///  the thickness of a slice is meta block depth.
        };
    };
} ADDR2_META_MIP_INFO;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_HTILE_INFO_INPUT
*
*   @brief
*       Input structure of Addr2ComputeHtileInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_HTILE_INFO_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes

    ADDR2_META_FLAGS    hTileFlags;         ///< HTILE flags
    ADDR2_SURFACE_FLAGS depthFlags;         ///< Depth surface flags
    AddrSwizzleMode     swizzleMode;        ///< Depth surface swizzle mode
    UINT_32             unalignedWidth;     ///< Depth surface original width (of mip0)
    UINT_32             unalignedHeight;    ///< Depth surface original height (of mip0)
    UINT_32             numSlices;          ///< Number of slices of depth surface (of mip0)
    UINT_32             numMipLevels;       ///< Total mipmap levels of color surface
    UINT_32             firstMipIdInTail;   ///  Id of the first mip in tail,
                                            ///  if no mip is in tail, it should be set to
                                            ///  number of mip levels
                                            ///  Only for GFX10
} ADDR2_COMPUTE_HTILE_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_HTILE_INFO_OUTPUT
*
*   @brief
*       Output structure of Addr2ComputeHtileInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_HTILE_INFO_OUTPUT
{
    UINT_32    size;                ///< Size of this structure in bytes

    UINT_32    pitch;               ///< Pitch in pixels of depth buffer represented in this
                                    ///  HTile buffer. This might be larger than original depth
                                    ///  buffer pitch when called with an unaligned pitch.
    UINT_32    height;              ///< Height in pixels, as above
    UINT_32    baseAlign;           ///< Base alignment
    UINT_32    sliceSize;           ///< Slice size, in bytes.
    UINT_32    htileBytes;          ///< Size of HTILE buffer, in bytes
    UINT_32    metaBlkWidth;        ///< Meta block width
    UINT_32    metaBlkHeight;       ///< Meta block height
    UINT_32    metaBlkNumPerSlice;  ///< Number of metablock within one slice

    ADDR2_META_MIP_INFO* pMipInfo;  ///< HTILE mip information

    struct {
      UINT_16* gfx10_bits; /* 72 2-byte elements */
   } equation;
} ADDR2_COMPUTE_HTILE_INFO_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeHtileInfo
*
*   @brief
*       Compute Htile pitch, height, base alignment and size in bytes
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo(
    ADDR_HANDLE                              hLib,
    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,
    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for Addr2ComputeHtileAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
{
    UINT_32             size;                ///< Size of this structure in bytes

    UINT_32             x;                   ///< X coordinate
    UINT_32             y;                   ///< Y coordinate
    UINT_32             slice;               ///< Index of slices
    UINT_32             mipId;               ///< mipmap level id

    ADDR2_META_FLAGS    hTileFlags;          ///< HTILE flags
    ADDR2_SURFACE_FLAGS depthflags;          ///< Depth surface flags
    AddrSwizzleMode     swizzleMode;         ///< Depth surface swizzle mode
    UINT_32             bpp;                 ///< Depth surface bits per pixel
    UINT_32             unalignedWidth;      ///< Depth surface original width (of mip0)
    UINT_32             unalignedHeight;     ///< Depth surface original height (of mip0)
    UINT_32             numSlices;           ///< Depth surface original depth (of mip0)
    UINT_32             numMipLevels;        ///< Depth surface total mipmap levels
    UINT_32             numSamples;          ///< Depth surface number of samples
    UINT_32             pipeXor;             ///< Pipe xor setting
} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeHtileAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
{
    UINT_32    size;    ///< Size of this structure in bytes

    UINT_64    addr;    ///< Address in bytes
} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeHtileAddrFromCoord
*
*   @brief
*       Compute Htile address according to coordinates (of depth buffer)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord(
    ADDR_HANDLE                                       hLib,
    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*    pIn,
    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for Addr2ComputeHtileCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
{
    UINT_32             size;                ///< Size of this structure in bytes

    UINT_64             addr;                ///< Address

    ADDR2_META_FLAGS    hTileFlags;          ///< HTILE flags
    ADDR2_SURFACE_FLAGS depthFlags;          ///< Depth surface flags
    AddrSwizzleMode     swizzleMode;         ///< Depth surface swizzle mode
    UINT_32             bpp;                 ///< Depth surface bits per pixel
    UINT_32             unalignedWidth;      ///< Depth surface original width (of mip0)
    UINT_32             unalignedHeight;     ///< Depth surface original height (of mip0)
    UINT_32             numSlices;           ///< Depth surface original depth (of mip0)
    UINT_32             numMipLevels;        ///< Depth surface total mipmap levels
    UINT_32             numSamples;          ///< Depth surface number of samples
    UINT_32             pipeXor;             ///< Pipe xor setting
} ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeHtileCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
{
    UINT_32    size;        ///< Size of this structure in bytes

    UINT_32    x;           ///< X coordinate
    UINT_32    y;           ///< Y coordinate
    UINT_32    slice;       ///< Index of slices
    UINT_32    mipId;       ///< mipmap level id
} ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeHtileCoordFromAddr
*
*   @brief
*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
*       Htile address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr(
    ADDR_HANDLE                                       hLib,
    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*    pIn,
    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*         pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     C-mask functions for Gfx9
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR2_COMPUTE_CMASK_INFO_INPUT
*
*   @brief
*       Input structure of Addr2ComputeCmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_CMASKINFO_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes

    ADDR2_META_FLAGS    cMaskFlags;         ///< CMASK flags
    ADDR2_SURFACE_FLAGS colorFlags;         ///< Color surface flags
    AddrResourceType    resourceType;       ///< Color surface type
    AddrSwizzleMode     swizzleMode;        ///< FMask surface swizzle mode
    UINT_32             unalignedWidth;     ///< Color surface original width
    UINT_32             unalignedHeight;    ///< Color surface original height
    UINT_32             numSlices;          ///< Number of slices of color buffer
    UINT_32             numMipLevels;       ///< Number of mip levels
    UINT_32             firstMipIdInTail;   ///< The id of first mip in tail, if no mip is in tail,
                                            ///  it should be number of mip levels
                                            ///  Only for GFX10
} ADDR2_COMPUTE_CMASK_INFO_INPUT;

/* DCC addr meta equation for GFX9. */
struct gfx9_addr_meta_equation {
   UINT_8 num_bits;

   struct {
      struct {
         UINT_8 dim; /* 0..4 as index, 5 means invalid */
         UINT_8 ord; /* 0..31 */
      } coord[8]; /* 0..num_coords */
   } bit[32]; /* 0..num_bits */

   UINT_8 numPipeBits;
};

/**
****************************************************************************************************
*   ADDR2_COMPUTE_CMASK_INFO_OUTPUT
*
*   @brief
*       Output structure of Addr2ComputeCmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_CMASK_INFO_OUTPUT
{
    UINT_32    size;          ///< Size of this structure in bytes

    UINT_32    pitch;         ///< Pitch in pixels of color buffer which
                              ///  this Cmask matches. The size might be larger than
                              ///  original color buffer pitch when called with
                              ///  an unaligned pitch.
    UINT_32    height;        ///< Height in pixels, as above
    UINT_32    baseAlign;     ///< Base alignment
    UINT_32    sliceSize;     ///< Slice size, in bytes.
    UINT_32    cmaskBytes;    ///< Size in bytes of CMask buffer
    UINT_32    metaBlkWidth;  ///< Meta block width
    UINT_32    metaBlkHeight; ///< Meta block height

    UINT_32    metaBlkNumPerSlice;  ///< Number of metablock within one slice

    ADDR2_META_MIP_INFO* pMipInfo;  ///< CMASK mip information

    /* The equation for doing CMASK address computations in shaders. */
    union {
       /* This is chip-specific, and it varies with:
        * - resource type
        * - swizzle_mode
        * - bpp
        * - pipe_aligned
        * - rb_aligned
        */
       struct gfx9_addr_meta_equation gfx9;

       /* This is chip-specific, it requires 64KB_Z_X. */
       UINT_16 *gfx10_bits; /* 68 2-byte elements */
    } equation;
} ADDR2_COMPUTE_CMASK_INFO_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeCmaskInfo
*
*   @brief
*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
*       info
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo(
    ADDR_HANDLE                              hLib,
    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,
    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for Addr2ComputeCmaskAddrFromCoord
*
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
{
    UINT_32             size;                ///< Size of this structure in bytes

    UINT_32             x;                   ///< X coordinate
    UINT_32             y;                   ///< Y coordinate
    UINT_32             slice;               ///< Index of slices

    ADDR2_META_FLAGS    cMaskFlags;          ///< CMASK flags
    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
    AddrResourceType    resourceType;        ///< Color surface type
    AddrSwizzleMode     swizzleMode;         ///< FMask surface swizzle mode

    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
    UINT_32             numSlices;           ///< Color surface original slices (of mip0)

    UINT_32             numSamples;          ///< Color surfae sample number
    UINT_32             numFrags;            ///< Color surface fragment number

    UINT_32             pipeXor;             ///< pipe Xor setting
} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeCmaskAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
{
    UINT_32    size;           ///< Size of this structure in bytes

    UINT_64    addr;           ///< CMASK address in bytes
    UINT_32    bitPosition;    ///< Bit position within addr, 0 or 4
} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeCmaskAddrFromCoord
*
*   @brief
*       Compute Cmask address according to coordinates (of MSAA color buffer)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord(
    ADDR_HANDLE                                      hLib,
    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for Addr2ComputeCmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT
{
    UINT_32             size;                ///< Size of this structure in bytes

    UINT_64             addr;                ///< CMASK address in bytes
    UINT_32             bitPosition;         ///< Bit position within addr, 0 or 4

    ADDR2_META_FLAGS    cMaskFlags;          ///< CMASK flags
    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
    AddrResourceType    resourceType;        ///< Color surface type
    AddrSwizzleMode     swizzleMode;         ///< FMask surface swizzle mode

    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
    UINT_32             numMipLevels;        ///< Color surface total mipmap levels.
} ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeCmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
{
    UINT_32    size;        ///< Size of this structure in bytes

    UINT_32    x;           ///< X coordinate
    UINT_32    y;           ///< Y coordinate
    UINT_32    slice;       ///< Index of slices
    UINT_32    mipId;       ///< mipmap level id
} ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeCmaskCoordFromAddr
*
*   @brief
*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
*       Cmask address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr(
    ADDR_HANDLE                                       hLib,
    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*    pIn,
    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*         pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     F-mask functions for Gfx9
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR2_FMASK_FLAGS
*
*   @brief
*       FMASK flags
****************************************************************************************************
*/
typedef union _ADDR2_FMASK_FLAGS
{
    struct
    {
        UINT_32 resolved :  1;    ///< TRUE if this is a resolved fmask, used by H/W clients
                                  ///  by H/W clients. S/W should always set it to FALSE.
        UINT_32 reserved : 31;    ///< Reserved for future use.
    };

    UINT_32 value;
} ADDR2_FMASK_FLAGS;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_FMASK_INFO_INPUT
*
*   @brief
*       Input structure for Addr2ComputeFmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_FMASK_INFO_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes

    AddrSwizzleMode     swizzleMode;        ///< FMask surface swizzle mode
    UINT_32             unalignedWidth;     ///< Color surface original width
    UINT_32             unalignedHeight;    ///< Color surface original height
    UINT_32             numSlices;          ///< Number of slices/depth
    UINT_32             numSamples;         ///< Number of samples
    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
                                            ///  number of samples for normal AA; Set it to the
                                            ///  number of fragments for EQAA
    ADDR2_FMASK_FLAGS   fMaskFlags;         ///< FMASK flags
} ADDR2_COMPUTE_FMASK_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_FMASK_INFO_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeFmaskInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_FMASK_INFO_OUTPUT
{
    UINT_32    size;           ///< Size of this structure in bytes

    UINT_32    pitch;          ///< Pitch of fmask in pixels
    UINT_32    height;         ///< Height of fmask in pixels
    UINT_32    baseAlign;      ///< Base alignment
    UINT_32    numSlices;      ///< Slices of fmask
    UINT_32    fmaskBytes;     ///< Size of fmask in bytes
    UINT_32    bpp;            ///< Bits per pixel of FMASK is: number of bit planes
    UINT_32    numSamples;     ///< Number of samples
    UINT_32    sliceSize;      ///< Size of slice in bytes
} ADDR2_COMPUTE_FMASK_INFO_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeFmaskInfo
*
*   @brief
*       Compute Fmask pitch/height/slices/alignments and size in bytes
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo(
    ADDR_HANDLE                              hLib,
    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,
    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for Addr2ComputeFmaskAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
{
    UINT_32            size;               ///< Size of this structure in bytes

    AddrSwizzleMode    swizzleMode;        ///< FMask surface swizzle mode
    UINT_32            x;                  ///< X coordinate
    UINT_32            y;                  ///< Y coordinate
    UINT_32            slice;              ///< Slice index
    UINT_32            sample;             ///< Sample index (fragment index for EQAA)
    UINT_32            plane;              ///< Plane number

    UINT_32            unalignedWidth;     ///< Color surface original width
    UINT_32            unalignedHeight;    ///< Color surface original height
    UINT_32            numSamples;         ///< Number of samples
    UINT_32            numFrags;           ///< Number of fragments, leave it zero or the same as
                                           ///  number of samples for normal AA; Set it to the
                                           ///  number of fragments for EQAA
    UINT_32            tileSwizzle;        ///< Combined swizzle used to do bank/pipe rotation

    ADDR2_FMASK_FLAGS  fMaskFlags; ///< FMASK flags
} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeFmaskAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
{
    UINT_32    size;           ///< Size of this structure in bytes

    UINT_64    addr;           ///< Fmask address
    UINT_32    bitPosition;    ///< Bit position within fmaskAddr, 0-7.
} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeFmaskAddrFromCoord
*
*   @brief
*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord(
    ADDR_HANDLE                                       hLib,
    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*    pIn,
    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT
*
*   @brief
*       Input structure for Addr2ComputeFmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT
{
    UINT_32            size;               ///< Size of this structure in bytes

    UINT_64            addr;               ///< Address
    UINT_32            bitPosition;        ///< Bit position within addr, 0-7.
    AddrSwizzleMode    swizzleMode;        ///< FMask surface swizzle mode

    UINT_32            unalignedWidth;     ///< Color surface original width
    UINT_32            unalignedHeight;    ///< Color surface original height
    UINT_32            numSamples;         ///< Number of samples
    UINT_32            numFrags;           ///< Number of fragments

    UINT_32            tileSwizzle;        ///< Combined swizzle used to do bank/pipe rotation

    ADDR2_FMASK_FLAGS  fMaskFlags; ///< FMASK flags
} ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeFmaskCoordFromAddr
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
{
    UINT_32    size;      ///< Size of this structure in bytes

    UINT_32    x;         ///< X coordinate
    UINT_32    y;         ///< Y coordinate
    UINT_32    slice;     ///< Slice index
    UINT_32    sample;    ///< Sample index (fragment index for EQAA)
    UINT_32    plane;     ///< Plane number
} ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeFmaskCoordFromAddr
*
*   @brief
*       Compute FMASK coordinate from an given address
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr(
    ADDR_HANDLE                                       hLib,
    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*    pIn,
    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*         pOut);


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     DCC key functions for Gfx9
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   _ADDR2_COMPUTE_DCCINFO_INPUT
*
*   @brief
*       Input structure of Addr2ComputeDccInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_DCCINFO_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes

    ADDR2_META_FLAGS    dccKeyFlags;        ///< DCC key flags
    ADDR2_SURFACE_FLAGS colorFlags;         ///< Color surface flags
    AddrResourceType    resourceType;       ///< Color surface type
    AddrSwizzleMode     swizzleMode;        ///< Color surface swizzle mode
    UINT_32             bpp;                ///< bits per pixel
    UINT_32             unalignedWidth;     ///< Color surface original width (of mip0)
    UINT_32             unalignedHeight;    ///< Color surface original height (of mip0)
    UINT_32             numSlices;          ///< Number of slices, of color surface (of mip0)
    UINT_32             numFrags;           ///< Fragment number of color surface
    UINT_32             numMipLevels;       ///< Total mipmap levels of color surface
    UINT_32             dataSurfaceSize;    ///< The padded size of all slices and mip levels
                                            ///< useful in meta linear case
    UINT_32             firstMipIdInTail;   ///< The id of first mip in tail, if no mip is in tail,
                                            ///  it should be number of mip levels
                                            ///  Only for GFX10
} ADDR2_COMPUTE_DCCINFO_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_DCCINFO_OUTPUT
*
*   @brief
*       Output structure of Addr2ComputeDccInfo
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT
{
    UINT_32    size;               ///< Size of this structure in bytes

    UINT_32    dccRamBaseAlign;    ///< Base alignment of dcc key
    UINT_32    dccRamSize;         ///< Size of dcc key

    UINT_32    pitch;              ///< DCC surface mip chain pitch
    UINT_32    height;             ///< DCC surface mip chain height
    UINT_32    depth;              ///< DCC surface mip chain depth

    UINT_32    compressBlkWidth;   ///< DCC compress block width
    UINT_32    compressBlkHeight;  ///< DCC compress block height
    UINT_32    compressBlkDepth;   ///< DCC compress block depth

    UINT_32    metaBlkWidth;       ///< DCC meta block width
    UINT_32    metaBlkHeight;      ///< DCC meta block height
    UINT_32    metaBlkDepth;       ///< DCC meta block depth
    UINT_32    metaBlkSize;        ///< DCC meta block size in bytes
    UINT_32    metaBlkNumPerSlice; ///< Number of metablock within one slice

    union
    {
        UINT_32 fastClearSizePerSlice;  ///< Size of DCC within a slice should be fast cleared
        UINT_32 dccRamSliceSize;        ///< DCC ram size per slice. For mipmap, it's
                                        ///  the slize size of a mip chain, the thickness of a
                                        ///  a slice is meta block depth
                                        ///  Only for GFX10
    };

    ADDR2_META_MIP_INFO* pMipInfo;      ///< DCC mip information

    /* The equation for doing DCC address computations in shaders. */
    union {
       /* This is chip-specific, and it varies with:
        * - resource type
        * - swizzle_mode
        * - bpp
        * - number of fragments
        * - pipe_aligned
        * - rb_aligned
        */
       struct gfx9_addr_meta_equation gfx9;

       /* This is chip-specific, it requires 64KB_R_X, and it varies with:
        * - bpp
        * - pipe_aligned
        */
       UINT_16 *gfx10_bits; /* 68 2-byte elements */
    } equation;
} ADDR2_COMPUTE_DCCINFO_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeDccInfo
*
*   @brief
*       Compute DCC key size, base alignment
*       info
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo(
    ADDR_HANDLE                           hLib,
    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,
    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut);


/**
****************************************************************************************************
*   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for Addr2ComputeDccAddrFromCoord
*
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
{
    UINT_32             size;                ///< Size of this structure in bytes

    UINT_32             x;                   ///< X coordinate
    UINT_32             y;                   ///< Y coordinate
    UINT_32             slice;               ///< Index of slices
    UINT_32             sample;              ///< Index of samples, means fragment index for EQAA
    UINT_32             mipId;               ///< mipmap level id

    ADDR2_META_FLAGS    dccKeyFlags;         ///< DCC flags
    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
    AddrResourceType    resourceType;        ///< Color surface type
    AddrSwizzleMode     swizzleMode;         ///< Color surface swizzle mode
    UINT_32             bpp;                 ///< Color surface bits per pixel
    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
    UINT_32             numMipLevels;        ///< Color surface mipmap levels
    UINT_32             numFrags;            ///< Color surface fragment number

    UINT_32             pipeXor;             ///< pipe Xor setting
    UINT_32             pitch;               ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch
    UINT_32             height;              ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height
    UINT_32             compressBlkWidth;    ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth
    UINT_32             compressBlkHeight;   ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight
    UINT_32             compressBlkDepth;    ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth
    UINT_32             metaBlkWidth;        ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth
    UINT_32             metaBlkHeight;       ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight
    UINT_32             metaBlkDepth;        ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth
    UINT_32             dccRamSliceSize;     ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize
} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for Addr2ComputeDccAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
{
    UINT_32    size;           ///< Size of this structure in bytes

    UINT_64    addr;           ///< DCC address in bytes
} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeDccAddrFromCoord
*
*   @brief
*       Compute DCC address according to coordinates (of MSAA color buffer)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord(
    ADDR_HANDLE                                    hLib,
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*   pIn,
    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*        pOut);

////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     Misc functions for Gfx9
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   ADDR2_COMPUTE_PIPEBANKXOR_INPUT
*
*   @brief
*       Input structure of Addr2ComputePipebankXor
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_32             surfIndex;          ///< Input surface index
    ADDR2_SURFACE_FLAGS flags;              ///< Surface flag
    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
    AddrResourceType    resourceType;       ///< Surface resource type
    AddrFormat          format;             ///< Surface format
    UINT_32             numSamples;         ///< Number of samples
    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
                                            ///  number of samples for normal AA; Set it to the
                                            ///  number of fragments for EQAA
} ADDR2_COMPUTE_PIPEBANKXOR_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
*
*   @brief
*       Output structure of Addr2ComputePipebankXor
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_32             pipeBankXor;        ///< Pipe bank xor
} ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputePipeBankXor
*
*   @brief
*       Calculate a valid bank pipe xor value for client to use.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor(
    ADDR_HANDLE                            hLib,
    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
*
*   @brief
*       Input structure of Addr2ComputeSlicePipeBankXor
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
    AddrResourceType    resourceType;       ///< Surface resource type
    UINT_32             bpe;                ///< bits per element (e.g. block size for BCn format)
    UINT_32             basePipeBankXor;    ///< Base pipe bank xor
    UINT_32             slice;              ///< Slice id
    UINT_32             numSamples;         ///< Number of samples
} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
*
*   @brief
*       Output structure of Addr2ComputeSlicePipeBankXor
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_32             pipeBankXor;        ///< Pipe bank xor
} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeSlicePipeBankXor
*
*   @brief
*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor(
    ADDR_HANDLE                                  hLib,
    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
*
*   @brief
*       Input structure of Addr2ComputeSubResourceOffsetForSwizzlePattern
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
    AddrResourceType    resourceType;       ///< Surface resource type
    UINT_32             pipeBankXor;        ///< Per resource xor
    UINT_32             slice;              ///< Slice id
    UINT_64             sliceSize;          ///< Slice size of a mip chain
    UINT_64             macroBlockOffset;   ///< Macro block offset, returned in ADDR2_MIP_INFO
    UINT_32             mipTailOffset;      ///< Mip tail offset, returned in ADDR2_MIP_INFO
} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
*
*   @brief
*       Output structure of Addr2ComputeSubResourceOffsetForSwizzlePattern
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_64             offset;             ///< offset
} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Calculate sub resource offset to support swizzle pattern.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern(
    ADDR_HANDLE                                                     hLib,
    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT
*
*   @brief
*       Input structure of Addr2ComputeNonBlockCompressedView
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT
{
    UINT_32               size;              ///< Size of this structure in bytes
    ADDR2_SURFACE_FLAGS   flags;             ///< Surface flags
    AddrSwizzleMode       swizzleMode;       ///< Swizzle Mode for Gfx9
    AddrResourceType      resourceType;      ///< Surface type
    AddrFormat            format;            ///< Surface format
    UINT_32               width;             ///< Width of mip0 in texels (not in compressed block)
    UINT_32               height;            ///< Height of mip0 in texels (not in compressed block)
    UINT_32               numSlices;         ///< Number surface slice/depth of mip0
    UINT_32               numMipLevels;      ///< Total mipmap levels.
    UINT_32               pipeBankXor;       ///< Combined swizzle used to do bank/pipe rotation
    UINT_32               slice;             ///< Index of slice to view
    UINT_32               mipId;             ///< Id of mip to view
} ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT;

/**
****************************************************************************************************
*   ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT
*
*   @brief
*       Output structure of Addr2ComputeNonBlockCompressedView
****************************************************************************************************
*/
typedef struct _ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_64             offset;             ///< Offset shifted from resource base for the view
    UINT_32             pipeBankXor;        ///< Pipe bank xor for the view
    UINT_32             unalignedWidth;     ///< Mip0 width (in element) for the view
    UINT_32             unalignedHeight;    ///< Mip0 height (in element) for the view
    UINT_32             numMipLevels;       ///< Total mipmap levels for the view
    UINT_32             mipId;              ///< Mip ID for the view
} ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT;

/**
****************************************************************************************************
*   Addr2ComputeNonBlockCompressedView
*
*   @brief
*       Compute non-block-compressed view for a given mipmap level/slice
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeNonBlockCompressedView(
    ADDR_HANDLE                                       hLib,
    const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR2_BLOCK_SET
*
*   @brief
*       Bit field that defines block type
****************************************************************************************************
*/
typedef union _ADDR2_BLOCK_SET
{
    struct
    {
        UINT_32 micro          : 1;   // 256B block for 2D resource
        UINT_32 macroThin4KB   : 1;   // Thin 4KB for 2D/3D resource
        UINT_32 macroThick4KB  : 1;   // Thick 4KB for 3D resource
        UINT_32 macroThin64KB  : 1;   // Thin 64KB for 2D/3D resource
        UINT_32 macroThick64KB : 1;   // Thick 64KB for 3D resource
        UINT_32 var            : 1;   // VAR block
        UINT_32                : 1;
        UINT_32 linear         : 1;   // Linear block
        UINT_32 reserved       : 24;
    };

    struct
    {
        UINT_32                : 5;
        UINT_32 thin256KB      : 1;   // Thin 256KB block
        UINT_32 thick256KB     : 1;   // Thick 256KB block
        UINT_32                : 25;
    } gfx11;

    UINT_32 value;
} ADDR2_BLOCK_SET;

/**
****************************************************************************************************
*   ADDR2_SWTYPE_SET
*
*   @brief
*       Bit field that defines swizzle type
****************************************************************************************************
*/
typedef union _ADDR2_SWTYPE_SET
{
    struct
    {
        UINT_32 sw_Z     : 1;   // SW_*_Z_*
        UINT_32 sw_S     : 1;   // SW_*_S_*
        UINT_32 sw_D     : 1;   // SW_*_D_*
        UINT_32 sw_R     : 1;   // SW_*_R_*
        UINT_32 reserved : 28;
    };

    UINT_32 value;
} ADDR2_SWTYPE_SET;

/**
****************************************************************************************************
*   ADDR2_SWMODE_SET
*
*   @brief
*       Bit field that defines swizzle type
****************************************************************************************************
*/
typedef union _ADDR2_SWMODE_SET
{
    struct
    {
        UINT_32 swLinear    : 1;
        UINT_32 sw256B_S    : 1;
        UINT_32 sw256B_D    : 1;
        UINT_32 sw256B_R    : 1;
        UINT_32 sw4KB_Z     : 1;
        UINT_32 sw4KB_S     : 1;
        UINT_32 sw4KB_D     : 1;
        UINT_32 sw4KB_R     : 1;
        UINT_32 sw64KB_Z    : 1;
        UINT_32 sw64KB_S    : 1;
        UINT_32 sw64KB_D    : 1;
        UINT_32 sw64KB_R    : 1;
        UINT_32 swMiscDef12 : 1;
        UINT_32 swMiscDef13 : 1;
        UINT_32 swMiscDef14 : 1;
        UINT_32 swMiscDef15 : 1;
        UINT_32 sw64KB_Z_T  : 1;
        UINT_32 sw64KB_S_T  : 1;
        UINT_32 sw64KB_D_T  : 1;
        UINT_32 sw64KB_R_T  : 1;
        UINT_32 sw4KB_Z_X   : 1;
        UINT_32 sw4KB_S_X   : 1;
        UINT_32 sw4KB_D_X   : 1;
        UINT_32 sw4KB_R_X   : 1;
        UINT_32 sw64KB_Z_X  : 1;
        UINT_32 sw64KB_S_X  : 1;
        UINT_32 sw64KB_D_X  : 1;
        UINT_32 sw64KB_R_X  : 1;
        UINT_32 swMiscDef28 : 1;
        UINT_32 swMiscDef29 : 1;
        UINT_32 swMiscDef30 : 1;
        UINT_32 swMiscDef31 : 1;
    };

    struct
    {
        UINT_32             : 28;
        UINT_32 swVar_Z_X   : 1;
        UINT_32             : 2;
        UINT_32 swVar_R_X   : 1;
    } gfx10;

    struct
    {
        UINT_32             : 28;
        UINT_32 sw256KB_Z_X : 1;
        UINT_32 sw256KB_S_X : 1;
        UINT_32 sw256KB_D_X : 1;
        UINT_32 sw256KB_R_X : 1;
    } gfx11;

    UINT_32 value;
} ADDR2_SWMODE_SET;

/**
****************************************************************************************************
*   ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
*
*   @brief
*       Input structure of Addr2GetPreferredSurfaceSetting
****************************************************************************************************
*/
typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
{
    UINT_32               size;              ///< Size of this structure in bytes

    ADDR2_SURFACE_FLAGS   flags;             ///< Surface flags
    AddrResourceType      resourceType;      ///< Surface type
    AddrFormat            format;            ///< Surface format
    AddrResrouceLocation  resourceLoction;   ///< Surface heap choice
    ADDR2_BLOCK_SET       forbiddenBlock;    ///< Client can use it to disable some block setting
                                             ///< such as linear for DXTn, tiled for YUV
    ADDR2_SWTYPE_SET      preferredSwSet;    ///< Client can use it to specify sw type(s) wanted
    BOOL_32               noXor;             ///< Do not use xor mode for this resource
    UINT_32               bpp;               ///< bits per pixel
    UINT_32               width;             ///< Width (of mip0), in pixels
    UINT_32               height;            ///< Height (of mip0), in pixels
    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
    UINT_32               numMipLevels;      ///< Total mipmap levels.
    UINT_32               numSamples;        ///< Number of samples
    UINT_32               numFrags;          ///< Number of fragments, leave it zero or the same as
                                             ///  number of samples for normal AA; Set it to the
                                             ///  number of fragments for EQAA
    UINT_32               maxAlign;          ///< maximum base/size alignment requested by client
    UINT_32               minSizeAlign;      ///< memory allocated for surface in client driver will
                                             ///  be padded to multiple of this value (in bytes)
    DOUBLE                memoryBudget;      ///< Memory consumption ratio based on minimum possible
                                             ///  size.
} ADDR2_GET_PREFERRED_SURF_SETTING_INPUT;

/**
****************************************************************************************************
*   ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
*
*   @brief
*       Output structure of Addr2GetPreferredSurfaceSetting
****************************************************************************************************
*/
typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
{
    UINT_32               size;                 ///< Size of this structure in bytes

    AddrSwizzleMode       swizzleMode;          ///< Suggested swizzle mode to be used
    AddrResourceType      resourceType;         ///< Suggested resource type to program HW
    ADDR2_BLOCK_SET       validBlockSet;        ///< Valid block type bit conbination
    BOOL_32               canXor;               ///< If client can use xor on a valid macro block
                                                ///  type
    ADDR2_SWTYPE_SET      validSwTypeSet;       ///< Valid swizzle type bit combination
    ADDR2_SWTYPE_SET      clientPreferredSwSet; ///< Client-preferred swizzle type bit combination
    ADDR2_SWMODE_SET      validSwModeSet;       ///< Valid swizzle mode bit combination
} ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT;

/**
****************************************************************************************************
*   Addr2GetPreferredSurfaceSetting
*
*   @brief
*       Suggest a preferred setting for client driver to program HW register
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting(
    ADDR_HANDLE                                   hLib,
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut);

/**
****************************************************************************************************
*   Addr2GetPossibleSwizzleModes
*
*   @brief
*       Returns a list of swizzle modes that are valid from the hardware's perspective for the
*       client to choose from
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetPossibleSwizzleModes(
    ADDR_HANDLE                                   hLib,
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut);

/**
****************************************************************************************************
*   Addr2IsValidDisplaySwizzleMode
*
*   @brief
*       Return whether the swizzle mode is supported by display engine
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
    ADDR_HANDLE     hLib,
    AddrSwizzleMode swizzleMode,
    UINT_32         bpp,
    BOOL_32         *pResult);

/**
****************************************************************************************************
*   Addr2GetAllowedBlockSet
*
*   @brief
*       Returns the set of allowed block sizes given the allowed swizzle modes and resource type
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedBlockSet(
    ADDR_HANDLE      hLib,
    ADDR2_SWMODE_SET allowedSwModeSet,
    AddrResourceType rsrcType,
    ADDR2_BLOCK_SET* pAllowedBlockSet);

/**
****************************************************************************************************
*   Addr2GetAllowedSwSet
*
*   @brief
*       Returns the set of allowed swizzle types given the allowed swizzle modes
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedSwSet(
    ADDR_HANDLE       hLib,
    ADDR2_SWMODE_SET  allowedSwModeSet,
    ADDR2_SWTYPE_SET* pAllowedSwSet);

/**
****************************************************************************************************
*   Addr2IsBlockTypeAvailable
*
*   @brief
*       Determine whether a block type is allowed in a given blockSet
****************************************************************************************************
*/
BOOL_32 Addr2IsBlockTypeAvailable(ADDR2_BLOCK_SET blockSet, AddrBlockType blockType);

/**
****************************************************************************************************
*   Addr2BlockTypeWithinMemoryBudget
*
*   @brief
*       Determine whether a new block type is acceptable based on memory waste ratio. Will favor
*       larger block types.
****************************************************************************************************
*/
BOOL_32 Addr2BlockTypeWithinMemoryBudget(
    UINT_64 minSize,
    UINT_64 newBlockTypeSize,
    UINT_32 ratioLow,
    UINT_32 ratioHi,
#if defined(__cplusplus)
    DOUBLE  memoryBudget = 0.0f,
    BOOL_32 newBlockTypeBigger = TRUE);
#else
    DOUBLE  memoryBudget,
    BOOL_32 newBlockTypeBigger);
#endif

/**
****************************************************************************************************
*   ADDR3_SURFACE_FLAGS
*
*   @brief
*       Surface flags
****************************************************************************************************
*/
typedef union _ADDR3_SURFACE_FLAGS
{
    struct
    {
        UINT_32 color              : 1; ///< This resource is a color buffer, can be used with RTV
        UINT_32 depth              : 1; ///< This resource is a depth buffer, can be used with DSV
        UINT_32 stencil            : 1; ///< This resource is a stencil buffer, can be used with DSV
        UINT_32 texture            : 1; ///< This resource can be used with SRV
        UINT_32 unordered          : 1; ///< This resource can be used with UAV
        UINT_32 hiZHiS             : 1;
        UINT_32 blockCompressed    : 1;
        UINT_32 nv12               : 1;
        UINT_32 p010               : 1;
        UINT_32 view3dAs2dArray    : 1;
        UINT_32 isVrsImage         : 1; ///< This resource is a VRS source image
        UINT_32 reserved           : 21; ///< Reserved bits
    };

    UINT_32 value;
} ADDR3_SURFACE_FLAGS;

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SURFACE_INFO_INPUT
*
*   @brief
*       Input structure for Addr3ComputeSurfaceInfo
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SURFACE_INFO_INPUT
{
    UINT_32               size;              ///< Size of this structure in bytes

    ADDR3_SURFACE_FLAGS   flags;             ///< Surface flags
    Addr3SwizzleMode      swizzleMode;       ///< Swizzle Mode for Gfx12
    AddrResourceType      resourceType;      ///< Surface type
    AddrFormat            format;            ///< Surface format
    UINT_32               bpp;               ///< bits per pixel
    UINT_32               width;             ///< Width (of mip0), in pixels
    UINT_32               height;            ///< Height (of mip0), in pixels
    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
    UINT_32               numMipLevels;      ///< Total mipmap levels.
    UINT_32               numSamples;        ///< Number of samples
    UINT_32               pitchInElement;    ///< Pitch in elements (blocks for compressed formats)
    UINT_32               sliceAlign;        ///< Required slice size in bytes
} ADDR3_COMPUTE_SURFACE_INFO_INPUT;

/**
****************************************************************************************************
*   ADDR3_MIP_INFO
*
*   @brief
*       Structure that contains information for mip level
*
****************************************************************************************************
*/
typedef struct _ADDR3_MIP_INFO
{
    UINT_32             pitch;              ///< Pitch in elements
    UINT_32             height;             ///< Padded height in elements
    UINT_32             depth;              ///< Padded depth
    UINT_32             pixelPitch;         ///< Pitch in pixels
    UINT_32             pixelHeight;        ///< Padded height in pixels
    UINT_32             equationIndex;      ///< Equation index in the equation table
    UINT_64             offset;             ///< Offset in bytes from mip base, should only be used
                                            ///< to setup vam surface descriptor, can't be used
                                            ///< to setup swizzle pattern
    UINT_64             macroBlockOffset;   ///< macro block offset in bytes from mip base
    UINT_32             mipTailOffset;      ///< mip tail offset in bytes
    UINT_32             mipTailCoordX;      ///< mip tail coord x
    UINT_32             mipTailCoordY;      ///< mip tail coord y
    UINT_32             mipTailCoordZ;      ///< mip tail coord z
} ADDR3_MIP_INFO;

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SURFACE_INFO_OUTPUT
*
*   @brief
*       Output structure for Addr3ComputeSurfaceInfo
*   @note
        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
        Pixel: Original pixel
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SURFACE_INFO_OUTPUT
{
    UINT_32             size;                 ///< Size of this structure in bytes
    UINT_32             pitch;                ///< Pitch in elements (blocks for compressed formats)
    UINT_32             pixelPitch;           ///< Pitch in original pixels
    UINT_32             pixelHeight;          ///< Height in original pixels
    UINT_32             pixelBits;            ///< Original bits per pixel, passed from input
    UINT_32             bpp;                  ///< Bits per elements
                                              ///  (e.g. blocks for BCn, 1/3 for 96bit)
    UINT_32             numSlices;            ///< Padded depth for 3d resource
                                              ///  or padded number of slices for 2d array resource
    UINT_32             height;               ///< Padded height (of mip0) in elements
    UINT_64             sliceSize;            ///< Slice (total mip chain) size in bytes
    UINT_64             surfSize;             ///< Surface (total mip chain) size in bytes
    UINT_32             baseAlign;            ///< Base address alignment
    ADDR_EXTENT3D       blockExtent;          ///< Dimensions in element inside one block
    UINT_32             pixelMipChainPitch;   ///< Mip chain pitch in original pixels
    UINT_32             pixelMipChainHeight;  ///< Mip chain height in original pixels
    ADDR3_MIP_INFO*     pMipInfo;             ///< Info regarding the start, sizes of the mip levels
    BOOL_32             mipChainInTail;       ///< If whole mipchain falls into mip tail block
    UINT_32             firstMipIdInTail;     ///< The id of first mip in tail, if there is no mip
                                              ///  in tail, it will be set to number of mip levels
} ADDR3_COMPUTE_SURFACE_INFO_OUTPUT;

/**
****************************************************************************************************
*   ADDR3_SWMODE_SET
*
*   @brief
*       Bit field that defines swizzle type
****************************************************************************************************
*/
// The bit order MUST be the same as Addr3SwizzleMode enumerations, otherwise using bitset to enable
// or disable swizzle modes will be problematic.
typedef union _ADDR3_SWMODE_SET
{
    struct
    {
        UINT_32 swLinear    :  1;
        UINT_32 sw2d256B    :  1;
        UINT_32 sw2d4kB     :  1;
        UINT_32 sw2d64kB    :  1;
        UINT_32 sw2d256kB   :  1;
        UINT_32 sw3d4kB     :  1;
        UINT_32 sw3d64kB    :  1;
        UINT_32 sw3d256kB   :  1;
        UINT_32 reserved    : 24;
    };

    UINT_32 value;
} ADDR3_SWMODE_SET;

/**
****************************************************************************************************
*   ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT
*
*   @brief
*       Input structure of Addr3GetPossibleSwizzleModes
****************************************************************************************************
*/
typedef struct _ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT
{
    UINT_32               size;              ///< Size of this structure in bytes

    ADDR3_SURFACE_FLAGS   flags;             ///< Surface flags
    AddrResourceType      resourceType;      ///< Surface type
    UINT_32               bpp;               ///< bits per pixel
    UINT_32               width;             ///< Width (of mip0), in pixels
    UINT_32               height;            ///< Height (of mip0), in pixels
    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
    UINT_32               numMipLevels;      ///< Total mipmap levels.
    UINT_32               numSamples;        ///< Number of samples
    UINT_32               maxAlign;          ///< maximum base/size alignment requested by client
} ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT;

/**
****************************************************************************************************
*   ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT
*
*   @brief
*       Output structure of Addr3GetPossibleSwizzleModes
****************************************************************************************************
*/
typedef struct _ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT
{
    UINT_32           size;             ///< Size of this structure in bytes
    ADDR3_SWMODE_SET  validModes;       ///< List of valid swizzle modes for this function.
} ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT;

/**
****************************************************************************************************
*   Addr3ComputeSurfaceInfo
*
*   @brief
*       Compute surface width/height/slices/alignments and suitable tiling mode
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceInfo(
    ADDR_HANDLE                              hLib,
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pIn,
    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*       pOut);

/**
****************************************************************************************************
*   Addr3GetPossibleSwizzleModes
*
*   @brief
*       Returns a list of swizzle modes that are valid from the hardware's perspective for the
*       client to choose from
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3GetPossibleSwizzleModes(
    ADDR_HANDLE                                  hLib,
    const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn,
    ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
*
*   @brief
*       Input structure for Addr3ComputeSurfaceAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
{
    UINT_32             size;            ///< Size of this structure in bytes

    UINT_32             x;               ///< X coordinate
    UINT_32             y;               ///< Y coordinate
    UINT_32             slice;           ///< Slice index
    UINT_32             sample;          ///< Sample index, use fragment index for EQAA
    UINT_32             mipId;           ///< the mip ID in mip chain

    Addr3SwizzleMode    swizzleMode;     ///< Swizzle mode for Gfx12
    ADDR3_SURFACE_FLAGS flags;           ///< Surface flags
    AddrResourceType    resourceType;    ///< Surface type
    UINT_32             bpp;             ///< Bits per pixel
    ADDR_EXTENT3D       unAlignedDims;   ///< Surface original dimensions (of mip0)
    UINT_32             numMipLevels;    ///< Total mipmap levels
    UINT_32             numSamples;      ///< Number of samples
    UINT_32             pitchInElement;  ///< Pitch in elements (blocks for compressed formats)
} ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
*
*   @brief
*       Output structure for Addr3ComputeSurfaceAddrFromCoord
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
{
    UINT_32    size;             ///< Size of this structure in bytes

    UINT_64    addr;             ///< Byte offset from the image starting address
    UINT_32    bitPosition;      ///< Bit position within surfaceAddr, 0-7.
                                 ///  For surface bpp < 8, e.g. FMT_1.
    UINT_32    prtBlockIndex;    ///< Index of a PRT tile (64K block)
} ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;

/**
****************************************************************************************************
*   Addr3ComputeSurfaceAddrFromCoord
*
*   @brief
*       Compute surface address from a given coordinate.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord(
    ADDR_HANDLE                                         hLib,
    const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,
    ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut);

/**
****************************************************************************************************
*   ADDR3_COMPUTE_PIPEBANKXOR_INPUT
*
*   @brief
*       Input structure of Addr3ComputePipebankXor
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_PIPEBANKXOR_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_32             surfIndex;          ///< Input surface index
    Addr3SwizzleMode    swizzleMode;        ///< Surface swizzle mode
} ADDR3_COMPUTE_PIPEBANKXOR_INPUT;

/**
****************************************************************************************************
*   ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT
*
*   @brief
*       Output structure of Addr3ComputePipebankXor
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_32             pipeBankXor;        ///< Pipe bank xor
} ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT;

/**
****************************************************************************************************
*   Addr3ComputePipeBankXor
*
*   @brief
*       Calculate a valid bank pipe xor value for client to use.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputePipeBankXor(
    ADDR_HANDLE                            hLib,
    const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
    ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT
*
*   @brief
*       Input structure of Addr3ComputeNonBlockCompressedView
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT
{
    UINT_32               size;              ///< Size of this structure in bytes
    ADDR3_SURFACE_FLAGS   flags;             ///< Surface flags
    Addr3SwizzleMode      swizzleMode;       ///< Swizzle Mode for Gfx12
    AddrResourceType      resourceType;      ///< Surface type
    AddrFormat            format;            ///< Surface format
    ADDR_EXTENT3D         unAlignedDims;     ///< Surface original dimensions (of mip0)
    UINT_32               numMipLevels;      ///< Total mipmap levels.
    UINT_32               pipeBankXor;       ///< Combined swizzle used to do bank/pipe rotation
    UINT_32               slice;             ///< Index of slice to view
    UINT_32               mipId;             ///< Id of mip to view
} ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT;

/**
****************************************************************************************************
*   ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT
*
*   @brief
*       Output structure of Addr3ComputeNonBlockCompressedView
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_64             offset;             ///< Offset from resource base for the view
    UINT_32             pipeBankXor;        ///< Pipe bank xor for the view
    ADDR_EXTENT3D       unAlignedDims;      ///< Mip0 dimens (in element) for the view
    UINT_32             numMipLevels;       ///< Total mipmap levels for the view
    UINT_32             mipId;              ///< Mip ID for the view
} ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT;

/**
****************************************************************************************************
*   Addr3ComputeNonBlockCompressedView
*
*   @brief
*       Compute non-block-compressed view for a given mipmap level/slice
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeNonBlockCompressedView(
    ADDR_HANDLE                                       hLib,
    const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
    ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
*
*   @brief
*       Input structure of Addr3ComputeSubResourceOffsetForSwizzlePattern
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    Addr3SwizzleMode    swizzleMode;        ///< Surface swizzle mode
    AddrResourceType    resourceType;       ///< Surface resource type
    UINT_32             pipeBankXor;        ///< Per resource xor
    UINT_32             slice;              ///< Slice id
    UINT_64             sliceSize;          ///< Slice size of a mip chain
    UINT_64             macroBlockOffset;   ///< Macro block offset, returned in ADDR3_MIP_INFO
    UINT_32             mipTailOffset;      ///< Mip tail offset, returned in ADDR3_MIP_INFO
} ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT;

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
*
*   @brief
*       Output structure of Addr3ComputeSubResourceOffsetForSwizzlePattern
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_64             offset;             ///< offset
} ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT;

/**
****************************************************************************************************
*   Addr3ComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Calculate sub resource offset to support swizzle pattern.
****************************************************************************************************
*/
VOID ADDR_API Addr3ComputeSubResourceOffsetForSwizzlePattern(
    ADDR_HANDLE                                                     hLib,
    const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
    ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT
*
*   @brief
*       Input structure of Addr2ComputeSlicePipeBankXor
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    Addr3SwizzleMode    swizzleMode;        ///< Surface swizzle mode
    AddrResourceType    resourceType;       ///< Surface resource type
    UINT_32             bpe;                ///< bits per element (e.g. block size for BCn format)
    UINT_32             basePipeBankXor;    ///< Base pipe bank xor
    UINT_32             slice;              ///< Slice id
    UINT_32             numSamples;         ///< Number of samples
} ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT;

/**
****************************************************************************************************
*   ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
*
*   @brief
*       Output structure of Addr3ComputeSlicePipeBankXor
****************************************************************************************************
*/
typedef struct _ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
{
    UINT_32             size;               ///< Size of this structure in bytes
    UINT_32             pipeBankXor;        ///< Pipe bank xor
} ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT;

/**
****************************************************************************************************
*   Addr3ComputeSlicePipeBankXor
*
*   @brief
*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeSlicePipeBankXor(
    ADDR_HANDLE                                  hLib,
    const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
    ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);

} // namespace rocr
#endif // __ADDR_INTERFACE_H__


================================================
FILE: runtime/hsa-runtime/image/addrlib/inc/addrtypes.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addrtypes.h
* @brief Contains the helper function and constants
****************************************************************************************************
*/
#ifndef __ADDR_TYPES_H__
#define __ADDR_TYPES_H__

#if defined(__APPLE__) && !defined(HAVE_TSERVER)
// External definitions header maintained by Apple driver team, but not for diag team under Mac.
// Helps address compilation issues & reduces code covered by NDA
#include "addrExtDef.h"

#else

// Windows and/or Linux
#if !defined(VOID)
typedef void           VOID;
#endif

#if !defined(FLOAT)
typedef float          FLOAT;
#endif

#if !defined(DOUBLE)
typedef double         DOUBLE;
#endif

#if !defined(CHAR)
typedef char           CHAR;
#endif

#if !defined(INT)
typedef int            INT;
#endif

#include <stdarg.h> // va_list...etc need this header

#endif // defined (__APPLE__) && !defined(HAVE_TSERVER)

/**
****************************************************************************************************
*   Calling conventions
****************************************************************************************************
*/
#ifndef ADDR_CDECL
    #if defined(__GNUC__)
        #if defined(__i386__)
            #define ADDR_CDECL __attribute__((cdecl))
        #else
            #define ADDR_CDECL
        #endif
    #else
        #define ADDR_CDECL __cdecl
    #endif
#endif

#ifndef ADDR_STDCALL
    #if defined(__GNUC__)
        #if defined(__i386__)
            #define ADDR_STDCALL __attribute__((stdcall))
        #else
            #define ADDR_STDCALL
        #endif
    #else
        #define ADDR_STDCALL __stdcall
    #endif
#endif

#ifndef ADDR_FASTCALL
    #if defined(__GNUC__)
        #if defined(__i386__) || defined(__amd64__) || defined(__x86_64__)
            #define ADDR_FASTCALL __attribute__((regparm(0)))
        #else
            #define ADDR_FASTCALL
        #endif
    #else
        #define ADDR_FASTCALL __fastcall
    #endif
#endif

#ifndef GC_CDECL
    #define GC_CDECL  ADDR_CDECL
#endif

#ifndef GC_STDCALL
    #define GC_STDCALL  ADDR_STDCALL
#endif

#ifndef GC_FASTCALL
    #define GC_FASTCALL  ADDR_FASTCALL
#endif


#if defined(__GNUC__)
    #define ADDR_INLINE static inline   // inline needs to be static to link
#else
    // win32, win64, other platforms
    #define ADDR_INLINE   __inline
#endif // #if defined(__GNUC__)

#define ADDR_API ADDR_FASTCALL //default call convention is fast call

/**
****************************************************************************************************
* Global defines used by other modules
****************************************************************************************************
*/
#if !defined(TILEINDEX_INVALID)
#define TILEINDEX_INVALID                -1
#endif

#if !defined(TILEINDEX_LINEAR_GENERAL)
#define TILEINDEX_LINEAR_GENERAL         -2
#endif

#if !defined(TILEINDEX_LINEAR_ALIGNED)
#define TILEINDEX_LINEAR_ALIGNED          8
#endif

/**
****************************************************************************************************
* Return codes
****************************************************************************************************
*/
typedef enum _ADDR_E_RETURNCODE
{
    // General Return
    ADDR_OK    = 0,
    ADDR_ERROR = 1,

    // Specific Errors
    ADDR_OUTOFMEMORY,
    ADDR_INVALIDPARAMS,
    ADDR_NOTSUPPORTED,
    ADDR_NOTIMPLEMENTED,
    ADDR_PARAMSIZEMISMATCH,
    ADDR_INVALIDGBREGVALUES,

} ADDR_E_RETURNCODE;

/**
****************************************************************************************************
* @brief
*   Neutral enums that define tile modes for all H/W
* @note
*   R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from
*   ADDR_TM_2D_TILED_XTHICK
*
****************************************************************************************************
*/
typedef enum _AddrTileMode
{
    ADDR_TM_LINEAR_GENERAL      = 0,    ///< Least restrictions, pitch: multiple of 8 if not buffer
    ADDR_TM_LINEAR_ALIGNED      = 1,    ///< Requests pitch or slice to be multiple of 64 pixels
    ADDR_TM_1D_TILED_THIN1      = 2,    ///< Linear array of 8x8 tiles
    ADDR_TM_1D_TILED_THICK      = 3,    ///< Linear array of 8x8x4 tiles
    ADDR_TM_2D_TILED_THIN1      = 4,    ///< A set of macro tiles consist of 8x8 tiles
    ADDR_TM_2D_TILED_THIN2      = 5,    ///< 600 HWL only, macro tile ratio is 1:4
    ADDR_TM_2D_TILED_THIN4      = 6,    ///< 600 HWL only, macro tile ratio is 1:16
    ADDR_TM_2D_TILED_THICK      = 7,    ///< A set of macro tiles consist of 8x8x4 tiles
    ADDR_TM_2B_TILED_THIN1      = 8,    ///< 600 HWL only, with bank swap
    ADDR_TM_2B_TILED_THIN2      = 9,    ///< 600 HWL only, with bank swap and ratio is 1:4
    ADDR_TM_2B_TILED_THIN4      = 10,   ///< 600 HWL only, with bank swap and ratio is 1:16
    ADDR_TM_2B_TILED_THICK      = 11,   ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles
    ADDR_TM_3D_TILED_THIN1      = 12,   ///< Macro tiling w/ pipe rotation between slices
    ADDR_TM_3D_TILED_THICK      = 13,   ///< Macro tiling w/ pipe rotation bwtween slices, thick
    ADDR_TM_3B_TILED_THIN1      = 14,   ///< 600 HWL only, with bank swap
    ADDR_TM_3B_TILED_THICK      = 15,   ///< 600 HWL only, with bank swap, thick
    ADDR_TM_2D_TILED_XTHICK     = 16,   ///< Tile is 8x8x8, valid from NI
    ADDR_TM_3D_TILED_XTHICK     = 17,   ///< Tile is 8x8x8, valid from NI
    ADDR_TM_POWER_SAVE          = 18,   ///< Power save mode, only used by KMD on NI
    ADDR_TM_PRT_TILED_THIN1     = 19,   ///< No bank/pipe rotation or hashing beyond macrotile size
    ADDR_TM_PRT_2D_TILED_THIN1  = 20,   ///< Same as 2D_TILED_THIN1, PRT only
    ADDR_TM_PRT_3D_TILED_THIN1  = 21,   ///< Same as 3D_TILED_THIN1, PRT only
    ADDR_TM_PRT_TILED_THICK     = 22,   ///< No bank/pipe rotation or hashing beyond macrotile size
    ADDR_TM_PRT_2D_TILED_THICK  = 23,   ///< Same as 2D_TILED_THICK, PRT only
    ADDR_TM_PRT_3D_TILED_THICK  = 24,   ///< Same as 3D_TILED_THICK, PRT only
    ADDR_TM_UNKNOWN             = 25,   ///< Unkown tile mode, should be decided by address lib
    ADDR_TM_COUNT               = 26,   ///< Must be the value of the last tile mode
} AddrTileMode;

/**
****************************************************************************************************
* @brief
*   Neutral enums that define swizzle modes for Gfx9+ ASIC
* @note
*
*   ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resource
*   ADDR_SW_256B_* addressing block aligned size is 256B, for 2D resource
*   ADDR_SW_4KB_*  addressing block aligned size is 4KB, for 2D/3D resource
*   ADDR_SW_64KB_* addressing block aligned size is 64KB, for 1D/2D/3D resource
*   ADDR_SW_VAR_*  addressing block aligned size is ASIC specific
*
*   ADDR_SW_*_Z    For GFX9:
                   - for 2D resource, represents Z-order swizzle mode for depth/stencil/FMask
                   - for 3D resource, represents a swizzle mode similar to legacy thick tile mode
                   For GFX10:
                   - represents Z-order swizzle mode for depth/stencil/FMask
*   ADDR_SW_*_S    For GFX9+:
                   - represents standard swizzle mode defined by MS
*   ADDR_SW_*_D    For GFX9:
                   - for 2D resource, represents a swizzle mode for displayable resource
*                  - for 3D resource, represents a swizzle mode which places each slice in order & pixel
                   For GFX10:
                   - for 2D resource, represents a swizzle mode for displayable resource
                   - for 3D resource, represents a swizzle mode similar to legacy thick tile mode
                   within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible!
*   ADDR_SW_*_R    For GFX9:
                   - 2D resource only, represents a swizzle mode for rotated displayable resource
                   For GFX10:
                   - represents a swizzle mode for render target resource
*
****************************************************************************************************
*/
typedef enum _AddrSwizzleMode
{
    ADDR_SW_LINEAR          = 0,
    ADDR_SW_256B_S          = 1,
    ADDR_SW_256B_D          = 2,
    ADDR_SW_256B_R          = 3,
    ADDR_SW_4KB_Z           = 4,
    ADDR_SW_4KB_S           = 5,
    ADDR_SW_4KB_D           = 6,
    ADDR_SW_4KB_R           = 7,
    ADDR_SW_64KB_Z          = 8,
    ADDR_SW_64KB_S          = 9,
    ADDR_SW_64KB_D          = 10,
    ADDR_SW_64KB_R          = 11,
    ADDR_SW_MISCDEF12       = 12,
    ADDR_SW_MISCDEF13       = 13,
    ADDR_SW_MISCDEF14       = 14,
    ADDR_SW_MISCDEF15       = 15,
    ADDR_SW_64KB_Z_T        = 16,
    ADDR_SW_64KB_S_T        = 17,
    ADDR_SW_64KB_D_T        = 18,
    ADDR_SW_64KB_R_T        = 19,
    ADDR_SW_4KB_Z_X         = 20,
    ADDR_SW_4KB_S_X         = 21,
    ADDR_SW_4KB_D_X         = 22,
    ADDR_SW_4KB_R_X         = 23,
    ADDR_SW_64KB_Z_X        = 24,
    ADDR_SW_64KB_S_X        = 25,
    ADDR_SW_64KB_D_X        = 26,
    ADDR_SW_64KB_R_X        = 27,
    ADDR_SW_MISCDEF28       = 28,
    ADDR_SW_MISCDEF29       = 29,
    ADDR_SW_MISCDEF30       = 30,
    ADDR_SW_MISCDEF31       = 31,
    ADDR_SW_LINEAR_GENERAL  = 32,
    ADDR_SW_MAX_TYPE        = 33,

    ADDR_SW_RESERVED0       = ADDR_SW_MISCDEF12,
    ADDR_SW_RESERVED1       = ADDR_SW_MISCDEF13,
    ADDR_SW_RESERVED2       = ADDR_SW_MISCDEF14,
    ADDR_SW_RESERVED3       = ADDR_SW_MISCDEF15,
    ADDR_SW_RESERVED4       = ADDR_SW_MISCDEF29,
    ADDR_SW_RESERVED5       = ADDR_SW_MISCDEF30,

    ADDR_SW_VAR_Z_X         = ADDR_SW_MISCDEF28,
    ADDR_SW_VAR_R_X         = ADDR_SW_MISCDEF31,

    ADDR_SW_256KB_Z_X       = ADDR_SW_MISCDEF28,
    ADDR_SW_256KB_S_X       = ADDR_SW_MISCDEF29,
    ADDR_SW_256KB_D_X       = ADDR_SW_MISCDEF30,
    ADDR_SW_256KB_R_X       = ADDR_SW_MISCDEF31,
} AddrSwizzleMode;

/**
****************************************************************************************************
* @brief
*   Neutral enums that define swizzle modes for Gfx12+ ASIC
*
****************************************************************************************************
*/
typedef enum _Addr3SwizzleMode
{
    ADDR3_LINEAR    = 0,
    ADDR3_256B_2D   = 1,
    ADDR3_4KB_2D    = 2,
    ADDR3_64KB_2D   = 3,
    ADDR3_256KB_2D  = 4,
    ADDR3_4KB_3D    = 5,
    ADDR3_64KB_3D   = 6,
    ADDR3_256KB_3D  = 7,
    ADDR3_MAX_TYPE  = 8,
} Addr3SwizzleMode;

/**
****************************************************************************************************
* @brief
*   Neutral enums that define image type
* @note
*   this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrResourceType
{
    ADDR_RSRC_TEX_1D = 0,
    ADDR_RSRC_TEX_2D = 1,
    ADDR_RSRC_TEX_3D = 2,
    ADDR_RSRC_MAX_TYPE = 3,
} AddrResourceType;

/**
****************************************************************************************************
* @brief
*   Neutral enums that define resource heap location
* @note
*   this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrResrouceLocation
{
    ADDR_RSRC_LOC_UNDEF  = 0,   // Resource heap is undefined/unknown
    ADDR_RSRC_LOC_LOCAL  = 1,   // CPU visable and CPU invisable local heap
    ADDR_RSRC_LOC_USWC   = 2,   // CPU write-combined non-cached nonlocal heap
    ADDR_RSRC_LOC_CACHED = 3,   // CPU cached nonlocal heap
    ADDR_RSRC_LOC_INVIS  = 4,   // CPU invisable local heap only
    ADDR_RSRC_LOC_MAX_TYPE = 5,
} AddrResrouceLocation;

/**
****************************************************************************************************
* @brief
*   Neutral enums that define resource basic swizzle mode
* @note
*   this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrSwType
{
    ADDR_SW_Z  = 0,   // Resource basic swizzle mode is ZOrder
    ADDR_SW_S  = 1,   // Resource basic swizzle mode is Standard
    ADDR_SW_D  = 2,   // Resource basic swizzle mode is Display
    ADDR_SW_R  = 3,   // Resource basic swizzle mode is Rotated/Render optimized
    ADDR_SW_L  = 4,   // Resource basic swizzle mode is Linear
    ADDR_SW_MAX_SWTYPE
} AddrSwType;

/**
****************************************************************************************************
* @brief
*   Neutral enums that define mipmap major mode
* @note
*   this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrMajorMode
{
    ADDR_MAJOR_X = 0,
    ADDR_MAJOR_Y = 1,
    ADDR_MAJOR_Z = 2,
    ADDR_MAJOR_MAX_TYPE = 3,
} AddrMajorMode;

/**
****************************************************************************************************
*   AddrFormat
*
*   @brief
*       Neutral enum for SurfaceFormat
*
****************************************************************************************************
*/
typedef enum _AddrFormat {
    ADDR_FMT_INVALID                              = 0x00000000,
    ADDR_FMT_8                                    = 0x00000001,
    ADDR_FMT_4_4                                  = 0x00000002,
    ADDR_FMT_3_3_2                                = 0x00000003,
    ADDR_FMT_RESERVED_4                           = 0x00000004,
    ADDR_FMT_16                                   = 0x00000005,
    ADDR_FMT_16_FLOAT                             = ADDR_FMT_16,
    ADDR_FMT_8_8                                  = 0x00000007,
    ADDR_FMT_5_6_5                                = 0x00000008,
    ADDR_FMT_6_5_5                                = 0x00000009,
    ADDR_FMT_1_5_5_5                              = 0x0000000a,
    ADDR_FMT_4_4_4_4                              = 0x0000000b,
    ADDR_FMT_5_5_5_1                              = 0x0000000c,
    ADDR_FMT_32                                   = 0x0000000d,
    ADDR_FMT_32_FLOAT                             = ADDR_FMT_32,
    ADDR_FMT_16_16                                = 0x0000000f,
    ADDR_FMT_16_16_FLOAT                          = ADDR_FMT_16_16,
    ADDR_FMT_8_24                                 = 0x00000011,
    ADDR_FMT_8_24_FLOAT                           = ADDR_FMT_8_24,
    ADDR_FMT_24_8                                 = 0x00000013,
    ADDR_FMT_24_8_FLOAT                           = ADDR_FMT_24_8,
    ADDR_FMT_10_11_11                             = 0x00000015,
    ADDR_FMT_10_11_11_FLOAT                       = ADDR_FMT_10_11_11,
    ADDR_FMT_11_11_10                             = 0x00000017,
    ADDR_FMT_11_11_10_FLOAT                       = ADDR_FMT_11_11_10,
    ADDR_FMT_2_10_10_10                           = 0x00000019,
    ADDR_FMT_8_8_8_8                              = 0x0000001a,
    ADDR_FMT_10_10_10_2                           = 0x0000001b,
    ADDR_FMT_X24_8_32_FLOAT                       = 0x0000001c,
    ADDR_FMT_32_32                                = 0x0000001d,
    ADDR_FMT_32_32_FLOAT                          = ADDR_FMT_32_32,
    ADDR_FMT_16_16_16_16                          = 0x0000001f,
    ADDR_FMT_16_16_16_16_FLOAT                    = ADDR_FMT_16_16_16_16,
    ADDR_FMT_RESERVED_33                          = 0x00000021,
    ADDR_FMT_32_32_32_32                          = 0x00000022,
    ADDR_FMT_32_32_32_32_FLOAT                    = ADDR_FMT_32_32_32_32,
    ADDR_FMT_RESERVED_36                          = 0x00000024,
    ADDR_FMT_1                                    = 0x00000025,
    ADDR_FMT_1_REVERSED                           = 0x00000026,
    ADDR_FMT_GB_GR                                = 0x00000027,
    ADDR_FMT_BG_RG                                = 0x00000028,
    ADDR_FMT_32_AS_8                              = 0x00000029,
    ADDR_FMT_32_AS_8_8                            = 0x0000002a,
    ADDR_FMT_5_9_9_9_SHAREDEXP                    = 0x0000002b,
    ADDR_FMT_8_8_8                                = 0x0000002c,
    ADDR_FMT_16_16_16                             = 0x0000002d,
    ADDR_FMT_16_16_16_FLOAT                       = ADDR_FMT_16_16_16,
    ADDR_FMT_32_32_32                             = 0x0000002f,
    ADDR_FMT_32_32_32_FLOAT                       = ADDR_FMT_32_32_32,
    ADDR_FMT_BC1                                  = 0x00000031,
    ADDR_FMT_BC2                                  = 0x00000032,
    ADDR_FMT_BC3                                  = 0x00000033,
    ADDR_FMT_BC4                                  = 0x00000034,
    ADDR_FMT_BC5                                  = 0x00000035,
    ADDR_FMT_BC6                                  = 0x00000036,
    ADDR_FMT_BC7                                  = 0x00000037,
    ADDR_FMT_32_AS_32_32_32_32                    = 0x00000038,
    ADDR_FMT_APC3                                 = 0x00000039,
    ADDR_FMT_APC4                                 = 0x0000003a,
    ADDR_FMT_APC5                                 = 0x0000003b,
    ADDR_FMT_APC6                                 = 0x0000003c,
    ADDR_FMT_APC7                                 = 0x0000003d,
    ADDR_FMT_CTX1                                 = 0x0000003e,
    ADDR_FMT_RESERVED_63                          = 0x0000003f,
    ADDR_FMT_ASTC_4x4                             = 0x00000040,
    ADDR_FMT_ASTC_5x4                             = 0x00000041,
    ADDR_FMT_ASTC_5x5                             = 0x00000042,
    ADDR_FMT_ASTC_6x5                             = 0x00000043,
    ADDR_FMT_ASTC_6x6                             = 0x00000044,
    ADDR_FMT_ASTC_8x5                             = 0x00000045,
    ADDR_FMT_ASTC_8x6                             = 0x00000046,
    ADDR_FMT_ASTC_8x8                             = 0x00000047,
    ADDR_FMT_ASTC_10x5                            = 0x00000048,
    ADDR_FMT_ASTC_10x6                            = 0x00000049,
    ADDR_FMT_ASTC_10x8                            = 0x0000004a,
    ADDR_FMT_ASTC_10x10                           = 0x0000004b,
    ADDR_FMT_ASTC_12x10                           = 0x0000004c,
    ADDR_FMT_ASTC_12x12                           = 0x0000004d,
    ADDR_FMT_ETC2_64BPP                           = 0x0000004e,
    ADDR_FMT_ETC2_128BPP                          = 0x0000004f,
    ADDR_FMT_BG_RG_16_16_16_16                    = 0x00000050,
} AddrFormat;

/**
****************************************************************************************************
*   AddrDepthFormat
*
*   @brief
*       Neutral enum for addrFlt32ToDepthPixel
*
****************************************************************************************************
*/
typedef enum _AddrDepthFormat
{
    ADDR_DEPTH_INVALID                            = 0x00000000,
    ADDR_DEPTH_16                                 = 0x00000001,
    ADDR_DEPTH_X8_24                              = 0x00000002,
    ADDR_DEPTH_8_24                               = 0x00000003,
    ADDR_DEPTH_X8_24_FLOAT                        = 0x00000004,
    ADDR_DEPTH_8_24_FLOAT                         = 0x00000005,
    ADDR_DEPTH_32_FLOAT                           = 0x00000006,
    ADDR_DEPTH_X24_8_32_FLOAT                     = 0x00000007,

} AddrDepthFormat;

/**
****************************************************************************************************
*   AddrColorFormat
*
*   @brief
*       Neutral enum for ColorFormat
*
****************************************************************************************************
*/
typedef enum _AddrColorFormat
{
    ADDR_COLOR_INVALID                            = 0x00000000,
    ADDR_COLOR_8                                  = 0x00000001,
    ADDR_COLOR_4_4                                = 0x00000002,
    ADDR_COLOR_3_3_2                              = 0x00000003,
    ADDR_COLOR_RESERVED_4                         = 0x00000004,
    ADDR_COLOR_16                                 = 0x00000005,
    ADDR_COLOR_16_FLOAT                           = 0x00000006,
    ADDR_COLOR_8_8                                = 0x00000007,
    ADDR_COLOR_5_6_5                              = 0x00000008,
    ADDR_COLOR_6_5_5                              = 0x00000009,
    ADDR_COLOR_1_5_5_5                            = 0x0000000a,
    ADDR_COLOR_4_4_4_4                            = 0x0000000b,
    ADDR_COLOR_5_5_5_1                            = 0x0000000c,
    ADDR_COLOR_32                                 = 0x0000000d,
    ADDR_COLOR_32_FLOAT                           = 0x0000000e,
    ADDR_COLOR_16_16                              = 0x0000000f,
    ADDR_COLOR_16_16_FLOAT                        = 0x00000010,
    ADDR_COLOR_8_24                               = 0x00000011,
    ADDR_COLOR_8_24_FLOAT                         = 0x00000012,
    ADDR_COLOR_24_8                               = 0x00000013,
    ADDR_COLOR_24_8_FLOAT                         = 0x00000014,
    ADDR_COLOR_10_11_11                           = 0x00000015,
    ADDR_COLOR_10_11_11_FLOAT                     = 0x00000016,
    ADDR_COLOR_11_11_10                           = 0x00000017,
    ADDR_COLOR_11_11_10_FLOAT                     = 0x00000018,
    ADDR_COLOR_2_10_10_10                         = 0x00000019,
    ADDR_COLOR_8_8_8_8                            = 0x0000001a,
    ADDR_COLOR_10_10_10_2                         = 0x0000001b,
    ADDR_COLOR_X24_8_32_FLOAT                     = 0x0000001c,
    ADDR_COLOR_32_32                              = 0x0000001d,
    ADDR_COLOR_32_32_FLOAT                        = 0x0000001e,
    ADDR_COLOR_16_16_16_16                        = 0x0000001f,
    ADDR_COLOR_16_16_16_16_FLOAT                  = 0x00000020,
    ADDR_COLOR_RESERVED_33                        = 0x00000021,
    ADDR_COLOR_32_32_32_32                        = 0x00000022,
    ADDR_COLOR_32_32_32_32_FLOAT                  = 0x00000023,
} AddrColorFormat;

/**
****************************************************************************************************
*   AddrSurfaceNumber
*
*   @brief
*       Neutral enum for SurfaceNumber
*
****************************************************************************************************
*/
typedef enum _AddrSurfaceNumber {
    ADDR_NUMBER_UNORM                             = 0x00000000,
    ADDR_NUMBER_SNORM                             = 0x00000001,
    ADDR_NUMBER_USCALED                           = 0x00000002,
    ADDR_NUMBER_SSCALED                           = 0x00000003,
    ADDR_NUMBER_UINT                              = 0x00000004,
    ADDR_NUMBER_SINT                              = 0x00000005,
    ADDR_NUMBER_SRGB                              = 0x00000006,
    ADDR_NUMBER_FLOAT                             = 0x00000007,
} AddrSurfaceNumber;

/**
****************************************************************************************************
*   AddrSurfaceSwap
*
*   @brief
*       Neutral enum for SurfaceSwap
*
****************************************************************************************************
*/
typedef enum _AddrSurfaceSwap {
    ADDR_SWAP_STD                                 = 0x00000000,
    ADDR_SWAP_ALT                                 = 0x00000001,
    ADDR_SWAP_STD_REV                             = 0x00000002,
    ADDR_SWAP_ALT_REV                             = 0x00000003,
} AddrSurfaceSwap;

/**
****************************************************************************************************
*   AddrHtileBlockSize
*
*   @brief
*       Size of HTILE blocks, valid values are 4 or 8 for now
****************************************************************************************************
*/
typedef enum _AddrHtileBlockSize
{
    ADDR_HTILE_BLOCKSIZE_4 = 4,
    ADDR_HTILE_BLOCKSIZE_8 = 8,
} AddrHtileBlockSize;


/**
****************************************************************************************************
*   AddrPipeCfg
*
*   @brief
*       The pipe configuration field specifies both the number of pipes and
*       how pipes are interleaved on the surface.
*       The expression of number of pipes, the shader engine tile size, and packer tile size
*       is encoded in a PIPE_CONFIG register field.
*       In general the number of pipes usually matches the number of memory channels of the
*       hardware configuration.
*       For hw configurations w/ non-pow2 memory number of memory channels, it usually matches
*       the number of ROP units(? TODO: which registers??)
*       The enum value = hw enum + 1 which is to reserve 0 for requesting default.
****************************************************************************************************
*/
typedef enum _AddrPipeCfg
{
    ADDR_PIPECFG_INVALID              = 0,
    ADDR_PIPECFG_P2                   = 1, /// 2 pipes,
    ADDR_PIPECFG_P4_8x16              = 5, /// 4 pipes,
    ADDR_PIPECFG_P4_16x16             = 6,
    ADDR_PIPECFG_P4_16x32             = 7,
    ADDR_PIPECFG_P4_32x32             = 8,
    ADDR_PIPECFG_P8_16x16_8x16        = 9, /// 8 pipes
    ADDR_PIPECFG_P8_16x32_8x16        = 10,
    ADDR_PIPECFG_P8_32x32_8x16        = 11,
    ADDR_PIPECFG_P8_16x32_16x16       = 12,
    ADDR_PIPECFG_P8_32x32_16x16       = 13,
    ADDR_PIPECFG_P8_32x32_16x32       = 14,
    ADDR_PIPECFG_P8_32x64_32x32       = 15,
    ADDR_PIPECFG_P16_32x32_8x16       = 17, /// 16 pipes
    ADDR_PIPECFG_P16_32x32_16x16      = 18,
    ADDR_PIPECFG_UNUSED               = 19,
    ADDR_PIPECFG_MAX                  = 20,
} AddrPipeCfg;

/**
****************************************************************************************************
* AddrTileType
*
*   @brief
*       Neutral enums that specifies micro tile type (MICRO_TILE_MODE)
****************************************************************************************************
*/
typedef enum _AddrTileType
{
    ADDR_DISPLAYABLE        = 0,    ///< Displayable tiling
    ADDR_NON_DISPLAYABLE    = 1,    ///< Non-displayable tiling, a.k.a thin micro tiling
    ADDR_DEPTH_SAMPLE_ORDER = 2,    ///< Same as non-displayable plus depth-sample-order
    ADDR_ROTATED            = 3,    ///< Rotated displayable tiling
    ADDR_THICK              = 4,    ///< Thick micro-tiling, only valid for THICK and XTHICK
} AddrTileType;

////////////////////////////////////////////////////////////////////////////////////////////////////
//
//  Type definitions: short system-independent names for address library types
//
////////////////////////////////////////////////////////////////////////////////////////////////////

#if !defined(__APPLE__) || defined(HAVE_TSERVER)

#ifndef BOOL_32        // no bool type in C
/// @brief Boolean type, since none is defined in C
/// @ingroup type
#define BOOL_32 int
#endif

#ifndef INT_32
#define INT_32  int
#endif

#ifndef UINT_32
#define UINT_32 unsigned int
#endif

#ifndef INT_16
#define INT_16  short
#endif

#ifndef UINT_16
#define UINT_16 unsigned short
#endif

#ifndef INT_8
#define INT_8   signed char // signed must be used because of aarch64
#endif

#ifndef UINT_8
#define UINT_8  unsigned char
#endif

#ifndef NULL
#define NULL 0
#endif

#ifndef TRUE
#define TRUE 1
#endif

#ifndef FALSE
#define FALSE 0
#endif

//
//  64-bit integer types depend on the compiler
//
#if defined( __GNUC__ ) || defined( __WATCOMC__ )
#define INT_64   long long
#define UINT_64  unsigned long long

#elif defined( _WIN32 )
#define INT_64   __int64
#define UINT_64  unsigned __int64

#else
#error Unsupported compiler and/or operating system for 64-bit integers

/// @brief 64-bit signed integer type (compiler dependent)
/// @ingroup type
///
/// The addrlib defines a 64-bit signed integer type for either
/// Gnu/Watcom compilers (which use the first syntax) or for
/// the Windows VCC compiler (which uses the second syntax).
#define INT_64  long long OR __int64

/// @brief 64-bit unsigned integer type (compiler dependent)
/// @ingroup type
///
/// The addrlib defines a 64-bit unsigned integer type for either
/// Gnu/Watcom compilers (which use the first syntax) or for
/// the Windows VCC compiler (which uses the second syntax).
///
#define UINT_64  unsigned long long OR unsigned __int64
#endif

#endif // #if !defined(__APPLE__) || defined(HAVE_TSERVER)

//  ADDR64X is used to print addresses in hex form on both Windows and Linux
//
#if defined( __GNUC__ ) || defined( __WATCOMC__ )
#define ADDR64X "llx"
#define ADDR64D "lld"

#elif defined( _WIN32 )
#define ADDR64X "I64x"
#define ADDR64D "I64d"

#else
#error Unsupported compiler and/or operating system for 64-bit integers

/// @brief Addrlib device address 64-bit printf tag  (compiler dependent)
/// @ingroup type
///
/// This allows printf to display an ADDR_64 for either the Windows VCC compiler
/// (which used this value) or the Gnu/Watcom compilers (which use "llx".
/// An example of use is printf("addr 0x%"ADDR64X"\n", address);
///
#define ADDR64X "llx" OR "I64x"
#define ADDR64D "lld" OR "I64d"
#endif


/// @brief Union for storing a 32-bit float or 32-bit integer
/// @ingroup type
///
/// This union provides a simple way to convert between a 32-bit float
/// and a 32-bit integer. It also prevents the compiler from producing
/// code that alters NaN values when assiging or coying floats.
/// Therefore, all address library routines that pass or return 32-bit
/// floating point data do so by passing or returning a FLT_32.
///
typedef union {
    INT_32   i;
    UINT_32  u;
    float    f;
} ADDR_FLT_32;


////////////////////////////////////////////////////////////////////////////////////////////////////
//
//  Macros for controlling linking and building on multiple systems
//
////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(_MSC_VER)
#if defined(va_copy)
#undef va_copy  //redefine va_copy to support VC2013
#endif
#endif

#if !defined(va_copy)
#define va_copy(dst, src) \
    ((void) memcpy(&(dst), &(src), sizeof(va_list)))
#endif

#endif // __ADDR_TYPES_H__


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/addrinterface.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addrinterface.cpp
* @brief Contains the addrlib interface functions
****************************************************************************************************
*/
#include "addrinterface.h"
#include "addrlib1.h"
#include "addrlib2.h"
#include "addrlib3.h"

#include "addrcommon.h"

namespace rocr {
using namespace Addr;

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Create/Destroy/Config functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrCreate
*
*   @brief
*       Create address lib object
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrCreate(
    const ADDR_CREATE_INPUT*    pAddrCreateIn,  ///< [in] infomation for creating address lib object
    ADDR_CREATE_OUTPUT*         pAddrCreateOut) ///< [out] address lib handle
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;
    {
        returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut);
    }

    return returnCode;
}


/**
****************************************************************************************************
*   AddrDestroy
*
*   @brief
*       Destroy address lib object
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrDestroy(
    ADDR_HANDLE hLib) ///< address lib handle
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (hLib)
    {
        Lib* pLib = Lib::GetLib(hLib);
        pLib->Destroy();
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                    Surface functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrComputeSurfaceInfo
*
*   @brief
*       Calculate surface width/height/depth/alignments and suitable tiling mode
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
    ADDR_HANDLE                             hLib, ///< address lib handle
    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,  ///< [in] surface information
    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) ///< [out] surface parameters and alignments
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   AddrComputeSurfaceAddrFromCoord
*
*   @brief
*       Compute surface address according to coordinates
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,  ///< [in] surface info and coordinates
    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] surface address
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeSurfaceCoordFromAddr
*
*   @brief
*       Compute coordinates according to surface address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,  ///< [in] surface info and address
    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) ///< [out] coordinates
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                   HTile functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrComputeHtileInfo
*
*   @brief
*       Compute Htile pitch, height, base alignment and size in bytes
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
    ADDR_HANDLE                             hLib, ///< address lib handle
    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeHtileAddrFromCoord
*
*   @brief
*       Compute Htile address according to coordinates (of depth buffer)
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Htile info and coordinates
    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Htile address
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeHtileCoordFromAddr
*
*   @brief
*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
*       Htile address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,  ///< [in] Htile info and address
    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Htile coordinates
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     C-mask functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrComputeCmaskInfo
*
*   @brief
*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
*       info
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
    ADDR_HANDLE                             hLib, ///< address lib handle
    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeCmaskAddrFromCoord
*
*   @brief
*       Compute Cmask address according to coordinates (of MSAA color buffer)
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Cmask info and coordinates
    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Cmask address
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeCmaskCoordFromAddr
*
*   @brief
*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
*       Cmask address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Cmask info and address
    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Cmask coordinates
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     F-mask functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrComputeFmaskInfo
*
*   @brief
*       Compute Fmask pitch/height/depth/alignments and size in bytes
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
    ADDR_HANDLE                             hLib, ///< address lib handle
    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeFmaskAddrFromCoord
*
*   @brief
*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Fmask info and coordinates
    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Fmask address
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeFmaskCoordFromAddr
*
*   @brief
*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Fmask info and address
    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Fmask coordinates
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     DCC key functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrComputeDccInfo
*
*   @brief
*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
    ADDR_HANDLE                             hLib,   ///< handle of addrlib
    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,    ///< [in] input
    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut)   ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeDccInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


///////////////////////////////////////////////////////////////////////////////
// Below functions are element related or helper functions
///////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrGetVersion
*
*   @brief
*       Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION
*       defined in addrinterface.h to see if there is a mismatch.
****************************************************************************************************
*/
UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
{
    UINT_32 version = 0;

    Addr::Lib* pLib = Lib::GetLib(hLib);

    ADDR_ASSERT(pLib != NULL);

    if (pLib)
    {
        version = pLib->GetVersion();
    }

    return version;
}

/**
****************************************************************************************************
*   AddrUseTileIndex
*
*   @brief
*       Return TRUE if tileIndex is enabled in this address library
****************************************************************************************************
*/
BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib)
{
    BOOL_32 useTileIndex = FALSE;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_ASSERT(pLib != NULL);

    if (pLib)
    {
        useTileIndex = pLib->UseTileIndex(0);
    }

    return useTileIndex;
}

/**
****************************************************************************************************
*   AddrUseCombinedSwizzle
*
*   @brief
*       Return TRUE if combined swizzle is enabled in this address library
****************************************************************************************************
*/
BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib)
{
    BOOL_32 useCombinedSwizzle = FALSE;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_ASSERT(pLib != NULL);

    if (pLib)
    {
        useCombinedSwizzle = pLib->UseCombinedSwizzle();
    }

    return useCombinedSwizzle;
}

/**
****************************************************************************************************
*   AddrExtractBankPipeSwizzle
*
*   @brief
*       Extract Bank and Pipe swizzle from base256b
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
    ADDR_HANDLE                                 hLib,     ///< addrlib handle
    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,      ///< [in] input structure
    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut)     ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrCombineBankPipeSwizzle
*
*   @brief
*       Combine Bank and Pipe swizzle
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
    ADDR_HANDLE                                 hLib,
    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeSliceSwizzle
*
*   @brief
*       Compute a swizzle for slice from a base swizzle
*   @return
*       ADDR_OK if no error
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
    ADDR_HANDLE                                 hLib,
    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*      pIn,
    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*           pOut)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputeBaseSwizzle
*
*   @brief
*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
*   @return
*       ADDR_OK if no error
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
    ADDR_HANDLE                             hLib,
    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeBaseSwizzle(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   ElemFlt32ToDepthPixel
*
*   @brief
*       Convert a FLT_32 value to a depth/stencil pixel value
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
    ADDR_HANDLE                         hLib,    ///< addrlib handle
    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,     ///< [in] per-component value
    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    Lib* pLib = Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        pLib->Flt32ToDepthPixel(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   ElemFlt32ToColorPixel
*
*   @brief
*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
    ADDR_HANDLE                         hLib,    ///< addrlib handle
    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,     ///< [in] format, surface number and swap value
    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    Lib* pLib = Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        pLib->Flt32ToColorPixel(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   ElemGetExportNorm
*
*   @brief
*       Helper function to check one format can be EXPORT_NUM,
*       which is a register CB_COLOR_INFO.SURFACE_FORMAT.
*       FP16 can be reported as EXPORT_NORM for rv770 in r600
*       family
*
****************************************************************************************************
*/
BOOL_32 ADDR_API ElemGetExportNorm(
    ADDR_HANDLE                     hLib, ///< addrlib handle
    const ELEM_GETEXPORTNORM_INPUT* pIn)  ///< [in] input structure
{
    Addr::Lib* pLib = Lib::GetLib(hLib);
    BOOL_32 enabled = FALSE;

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        enabled = pLib->GetExportNorm(pIn);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    ADDR_ASSERT(returnCode == ADDR_OK);

    return enabled;
}

/**
****************************************************************************************************
*   ElemSize
*
*   @brief
*       Get bits-per-element for specified format
*
*   @return
*       Bits-per-element of specified format
*
****************************************************************************************************
*/
UINT_32 ADDR_API ElemSize(
    ADDR_HANDLE hLib,
    AddrFormat  format)
{
    UINT_32 bpe = 0;

    Addr::Lib* pLib = Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        bpe = pLib->GetBpe(format);
    }

    return bpe;
}

/**
****************************************************************************************************
*   AddrConvertTileInfoToHW
*
*   @brief
*       Convert tile info from real value to hardware register value
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
    ADDR_HANDLE                             hLib, ///< address lib handle
    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,  ///< [in] tile info with real value
    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut) ///< [out] tile info with HW register value
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ConvertTileInfoToHW(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrConvertTileIndex
*
*   @brief
*       Convert tile index to tile mode/type/info
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
    ADDR_HANDLE                          hLib, ///< address lib handle
    const ADDR_CONVERT_TILEINDEX_INPUT*  pIn,  ///< [in] input - tile index
    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ConvertTileIndex(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrGetMacroModeIndex
*
*   @brief
*       Get macro mode index based on input parameters
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex(
    ADDR_HANDLE                          hLib, ///< address lib handle
    const ADDR_GET_MACROMODEINDEX_INPUT* pIn,  ///< [in] input
    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut) ///< [out] macro mode index
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode;

    if (pLib != NULL)
    {
        returnCode = pLib->GetMacroModeIndex(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrConvertTileIndex1
*
*   @brief
*       Convert tile index to tile mode/type/info
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
    ADDR_HANDLE                          hLib, ///< address lib handle
    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,  ///< [in] input - tile index
    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ConvertTileIndex1(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrGetTileIndex
*
*   @brief
*       Get tile index from tile mode/type/info
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
*
*   @note
*       Only meaningful for SI (and above)
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
    ADDR_HANDLE                     hLib,
    const ADDR_GET_TILEINDEX_INPUT* pIn,
    ADDR_GET_TILEINDEX_OUTPUT*      pOut)
{
    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->GetTileIndex(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrComputePrtInfo
*
*   @brief
*       Interface function for ComputePrtInfo
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
    ADDR_HANDLE                 hLib,
    const ADDR_PRT_INFO_INPUT*  pIn,
    ADDR_PRT_INFO_OUTPUT*       pOut)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    V1::Lib* pLib = V1::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputePrtInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrGetMaxAlignments
*
*   @brief
*       Convert maximum alignments
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
    ADDR_HANDLE                     hLib, ///< address lib handle
    ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) ///< [out] output structure
{
    Addr::Lib* pLib = Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->GetMaxAlignments(pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   AddrGetMaxMetaAlignments
*
*   @brief
*       Convert maximum alignments for metadata
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
    ADDR_HANDLE                     hLib, ///< address lib handle
    ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) ///< [out] output structure
{
    Addr::Lib* pLib = Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->GetMaxMetaAlignments(pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                    Surface functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Addr2ComputeSurfaceInfo
*
*   @brief
*       Calculate surface width/height/depth/alignments and suitable tiling mode
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo(
    ADDR_HANDLE                                hLib, ///< address lib handle
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*    pIn,  ///< [in] surface information
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*         pOut) ///< [out] surface parameters and alignments
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeSurfaceAddrFromCoord
*
*   @brief
*       Compute surface address according to coordinates
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
    ADDR_HANDLE                                         hLib, ///< address lib handle
    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] surface info and coordinates
    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] surface address
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeSurfaceCoordFromAddr
*
*   @brief
*       Compute coordinates according to surface address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr(
    ADDR_HANDLE                                         hLib, ///< address lib handle
    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT*    pIn,  ///< [in] surface info and address
    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*         pOut) ///< [out] coordinates
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                   HTile functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Addr2ComputeHtileInfo
*
*   @brief
*       Compute Htile pitch, height, base alignment and size in bytes
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo(
    ADDR_HANDLE                              hLib, ///< address lib handle
    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeHtileAddrFromCoord
*
*   @brief
*       Compute Htile address according to coordinates (of depth buffer)
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord(
    ADDR_HANDLE                                       hLib, ///< address lib handle
    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Htile info and coordinates
    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Htile address
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeHtileCoordFromAddr
*
*   @brief
*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
*       Htile address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr(
    ADDR_HANDLE                                       hLib, ///< address lib handle
    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*    pIn,  ///< [in] Htile info and address
    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Htile coordinates
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     C-mask functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Addr2ComputeCmaskInfo
*
*   @brief
*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
*       info
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo(
    ADDR_HANDLE                              hLib, ///< address lib handle
    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeCmaskAddrFromCoord
*
*   @brief
*       Compute Cmask address according to coordinates (of MSAA color buffer)
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord(
    ADDR_HANDLE                                       hLib, ///< address lib handle
    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Cmask info and coordinates
    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Cmask address
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeCmaskCoordFromAddr
*
*   @brief
*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
*       Cmask address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr(
    ADDR_HANDLE                                       hLib, ///< address lib handle
    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*    pIn,  ///< [in] Cmask info and address
    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Cmask coordinates
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     F-mask functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Addr2ComputeFmaskInfo
*
*   @brief
*       Compute Fmask pitch/height/depth/alignments and size in bytes
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo(
    ADDR_HANDLE                              hLib, ///< address lib handle
    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeFmaskAddrFromCoord
*
*   @brief
*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord(
    ADDR_HANDLE                                       hLib, ///< address lib handle
    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Fmask info and coordinates
    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Fmask address
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Addr2ComputeFmaskCoordFromAddr
*
*   @brief
*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr(
    ADDR_HANDLE                                       hLib, ///< address lib handle
    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*    pIn,  ///< [in] Fmask info and address
    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Fmask coordinates
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                                     DCC key functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Addr2ComputeDccInfo
*
*   @brief
*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo(
    ADDR_HANDLE                           hLib,   ///< handle of addrlib
    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input
    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut)   ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeDccInfo(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2ComputeDccAddrFromCoord
*
*   @brief
*       Compute DCC key address according to coordinates
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord(
    ADDR_HANDLE                                     hLib, ///< address lib handle
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Dcc info and coordinates
    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Dcc address
{
    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeDccAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2ComputePipeBankXor
*
*   @brief
*       Calculate a valid bank pipe xor value for client to use.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor(
    ADDR_HANDLE                            hLib, ///< handle of addrlib
    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputePipeBankXor(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2ComputeSlicePipeBankXor
*
*   @brief
*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor(
    ADDR_HANDLE                                  hLib, ///< handle of addrlib
    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSlicePipeBankXor(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2ComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Calculate sub resource offset for swizzle pattern.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern(
    ADDR_HANDLE                                                     hLib, ///< handle of addrlib
    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,  ///< [in] input
    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2ComputeNonBlockCompressedView
*
*   @brief
*       Compute non-block-compressed view for a given mipmap level/slice.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2ComputeNonBlockCompressedView(
    ADDR_HANDLE                                       hLib, ///< handle of addrlib
    const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,  ///< [in] input
    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeNonBlockCompressedView(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2GetPreferredSurfaceSetting
*
*   @brief
*       Suggest a preferred setting for client driver to program HW register
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting(
    ADDR_HANDLE                                   hLib, ///< handle of addrlib
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->Addr2GetPreferredSurfaceSetting(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2IsValidDisplaySwizzleMode
*
*   @brief
*       Return whether the swizzle mode is supported by display engine
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
    ADDR_HANDLE     hLib,
    AddrSwizzleMode swizzleMode,
    UINT_32         bpp,
    BOOL_32         *pResult)
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {};
        in.resourceType = ADDR_RSRC_TEX_2D;
        in.swizzleMode  = swizzleMode;
        in.bpp          = bpp;

        *pResult   = pLib->IsValidDisplaySwizzleMode(&in);
        returnCode = ADDR_OK;
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2GetPossibleSwizzleModes
*
*   @brief
*       Returns a list of swizzle modes that are valid from the hardware's perspective for the
*       client to choose from
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetPossibleSwizzleModes(
    ADDR_HANDLE                                   hLib, ///< handle of addrlib
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->GetPossibleSwizzleModes(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}
/**
****************************************************************************************************
*   Addr2GetAllowedBlockSet
*
*   @brief
*       Returns the set of allowed block sizes given the allowed swizzle modes and resource type
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedBlockSet(
    ADDR_HANDLE      hLib,              ///< handle of addrlib
    ADDR2_SWMODE_SET allowedSwModeSet,  ///< [in] allowed swizzle modes
    AddrResourceType rsrcType,          ///< [in] resource type
    ADDR2_BLOCK_SET* pAllowedBlockSet)  ///< [out] allowed block sizes
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->GetAllowedBlockSet(allowedSwModeSet, rsrcType, pAllowedBlockSet);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2GetAllowedSwSet
*
*   @brief
*       Returns the set of allowed swizzle types given the allowed swizzle modes
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedSwSet(
    ADDR_HANDLE       hLib,              ///< handle of addrlib
    ADDR2_SWMODE_SET  allowedSwModeSet,  ///< [in] allowed swizzle modes
    ADDR2_SWTYPE_SET* pAllowedSwSet)     ///< [out] allowed swizzle types
{
    ADDR_E_RETURNCODE returnCode;

    V2::Lib* pLib = V2::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->GetAllowedSwSet(allowedSwModeSet, pAllowedSwSet);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr2IsBlockTypeAvailable
*
*   @brief
*       Determine whether a block type is allowed in a given blockSet
****************************************************************************************************
*/
BOOL_32 Addr2IsBlockTypeAvailable(
    ADDR2_BLOCK_SET blockSet,
    AddrBlockType   blockType)
{
    BOOL_32 avail;

    if (blockType == AddrBlockLinear)
    {
        avail = blockSet.linear ? TRUE : FALSE;
    }
    else
    {
        avail = blockSet.value & (1 << (static_cast<UINT_32>(blockType) - 1)) ? TRUE : FALSE;
    }

    return avail;
}

/**
****************************************************************************************************
*   Addr2BlockTypeWithinMemoryBudget
*
*   @brief
*       Determine whether a new block type is acceptable based on memory waste ratio. Will favor
*       larger block types.
****************************************************************************************************
*/
BOOL_32 Addr2BlockTypeWithinMemoryBudget(
    UINT_64 minSize,
    UINT_64 newBlockTypeSize,
    UINT_32 ratioLow,
    UINT_32 ratioHi,
    DOUBLE  memoryBudget,
    BOOL_32 newBlockTypeBigger)
{
    BOOL_32 accept = FALSE;

    if (memoryBudget >= 1.0)
    {
        if (newBlockTypeBigger)
        {
            if ((static_cast<DOUBLE>(newBlockTypeSize) / minSize) <= memoryBudget)
            {
                accept = TRUE;
            }
        }
        else
        {
            if ((static_cast<DOUBLE>(minSize) / newBlockTypeSize) > memoryBudget)
            {
                accept = TRUE;
            }
        }
    }
    else
    {
        if (newBlockTypeBigger)
        {
            if ((newBlockTypeSize * ratioHi) <= (minSize * ratioLow))
            {
                accept = TRUE;
            }
        }
        else
        {
            if ((newBlockTypeSize * ratioLow) < (minSize * ratioHi))
            {
                accept = TRUE;
            }
        }
    }

    return accept;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
//                                    Surface functions for Addr3
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Addr3ComputeSurfaceInfo
*
*   @brief
*       Calculate surface width/height/depth/alignments and suitable tiling mode
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceInfo(
    ADDR_HANDLE                                hLib, ///< address lib handle
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT*    pIn,  ///< [in] surface information
    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*         pOut) ///< [out] surface parameters and alignments
{
    V3::Lib* pLib = V3::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr3GetPossibleSwizzleModes
*
*   @brief
*       Get valid swizzle mode options given image input for further optimal selection
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_PARAMSIZEMISMATCH
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3GetPossibleSwizzleModes(
    ADDR_HANDLE                                    hLib, ///< address lib handle
    const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT*   pIn,  ///< [in] surface information
    ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT*        pOut) ///< [out] allowable swizzle mdoes
{
    V3::Lib* pLib = V3::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->GetPossibleSwizzleModes(pIn, pOut);
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr3ComputeSurfaceAddrFromCoord
*
*   @brief
*       Compute surface address according to coordinates
*
*   @return
*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord(
    ADDR_HANDLE                                         hLib, ///< address lib handle
    const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] surface info and coordinates
    ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] surface address
{
    V3::Lib* pLib = V3::Lib::GetLib(hLib);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr3ComputePipeBankXor
*
*   @brief
*       Calculate a valid bank pipe xor value for client to use.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputePipeBankXor(
    ADDR_HANDLE                            hLib, ///< handle of addrlib
    const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
    ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V3::Lib* pLib = V3::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputePipeBankXor(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr3ComputeNonBlockCompressedView
*
*   @brief
*       Compute non-block-compressed view for a given mipmap level/slice.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeNonBlockCompressedView(
    ADDR_HANDLE                                       hLib, ///< handle of addrlib
    const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,  ///< [in] input
    ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V3::Lib* pLib = V3::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeNonBlockCompressedView(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Addr3ComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Calculate sub resource offset for swizzle pattern.
****************************************************************************************************
*/
VOID ADDR_API Addr3ComputeSubResourceOffsetForSwizzlePattern(
    ADDR_HANDLE                                                     hLib, ///< handle of addrlib
    const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,  ///< [in] input
    ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) ///< [out] output
{
    V3::Lib* pLib = V3::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        pLib->ComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
    }
}

/**
****************************************************************************************************
*   Addr3ComputeSlicePipeBankXor
*
*   @brief
*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API Addr3ComputeSlicePipeBankXor(
    ADDR_HANDLE                                  hLib, ///< handle of addrlib
    const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
    ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
{
    ADDR_E_RETURNCODE returnCode;

    V3::Lib* pLib = V3::Lib::GetLib(hLib);

    if (pLib != NULL)
    {
        returnCode = pLib->ComputeSlicePipeBankXor(pIn, pOut);
    }
    else
    {
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}
} //namespace rocr

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2017-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

#ifndef _AMDGPU_ASIC_ADDR_H
#define _AMDGPU_ASIC_ADDR_H

#define ATI_VENDOR_ID         0x1002
#define AMD_VENDOR_ID         0x1022

// AMDGPU_VENDOR_IS_AMD(vendorId)
#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID))

#define FAMILY_UNKNOWN 0x00
#define FAMILY_TN      0x69 //# 105 / Trinity APUs
#define FAMILY_SI      0x6E //# 110 / Southern Islands: Tahiti, Pitcairn, CapeVerde, Oland, Hainan
#define FAMILY_CI      0x78 //# 120 / Sea Islands: Bonaire, Hawaii
#define FAMILY_KV      0x7D //# 125 / Kaveri APUs: Spectre, Spooky, Kalindi, Godavari
#define FAMILY_VI      0x82 //# 130 / Volcanic Islands: Iceland, Tonga, Fiji
#define FAMILY_CZ      0x87 //# 135 / Carrizo APUs: Carrizo, Stoney
#define FAMILY_AI      0x8D //# 141 / Vega: 10, 20
#define FAMILY_RV      0x8E //# 142 / Raven
#define FAMILY_NV      0x8F //# 143 / Navi: 10
#define FAMILY_VGH     0x90 //# 144 / Van Gogh
#define FAMILY_NV3     0x91 //# 145 / Navi: 3x
#define FAMILY_GFX1150 0x96
#define FAMILY_GFX1103 0x94
#define FAMILY_RMB     0x92 //# 146 / Rembrandt
#define FAMILY_RPL     0x95 //# 149 / Raphael
#define FAMILY_MDN     0x97 //# 151 / Mendocino
#define FAMILY_GFX12   0x98

// AMDGPU_FAMILY_IS(familyId, familyName)
#define FAMILY_IS(f, fn)     (f == FAMILY_##fn)
#define FAMILY_IS_TN(f)      FAMILY_IS(f, TN)
#define FAMILY_IS_SI(f)      FAMILY_IS(f, SI)
#define FAMILY_IS_CI(f)      FAMILY_IS(f, CI)
#define FAMILY_IS_KV(f)      FAMILY_IS(f, KV)
#define FAMILY_IS_VI(f)      FAMILY_IS(f, VI)
#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS)
#define FAMILY_IS_CZ(f)      FAMILY_IS(f, CZ)
#define FAMILY_IS_AI(f)      FAMILY_IS(f, AI)
#define FAMILY_IS_RV(f)      FAMILY_IS(f, RV)
#define FAMILY_IS_NV(f)      FAMILY_IS(f, NV)
#define FAMILY_IS_NV3(f)     FAMILY_IS(f, NV3)
#define FAMILY_IS_RMB(f)     FAMILY_IS(f, RMB)
#define FAMILY_IS_GFX12(f)   FAMILY_IS(f, GFX12)

#define AMDGPU_UNKNOWN          0xFF

#define AMDGPU_TAHITI_RANGE     0x05, 0x14 //#  5 <= x < 20
#define AMDGPU_PITCAIRN_RANGE   0x15, 0x28 //# 21 <= x < 40
#define AMDGPU_CAPEVERDE_RANGE  0x29, 0x3C //# 41 <= x < 60
#define AMDGPU_OLAND_RANGE      0x3C, 0x46 //# 60 <= x < 70
#define AMDGPU_HAINAN_RANGE     0x46, 0xFF //# 70 <= x < max

#define AMDGPU_BONAIRE_RANGE    0x14, 0x28 //# 20 <= x < 40
#define AMDGPU_HAWAII_RANGE     0x28, 0x3C //# 40 <= x < 60

#define AMDGPU_SPECTRE_RANGE    0x01, 0x41 //#   1 <= x < 65
#define AMDGPU_SPOOKY_RANGE     0x41, 0x81 //#  65 <= x < 129
#define AMDGPU_KALINDI_RANGE    0x81, 0xA1 //# 129 <= x < 161
#define AMDGPU_GODAVARI_RANGE   0xA1, 0xFF //# 161 <= x < max

#define AMDGPU_ICELAND_RANGE    0x01, 0x14 //#  1 <= x < 20
#define AMDGPU_TONGA_RANGE      0x14, 0x28 //# 20 <= x < 40
#define AMDGPU_FIJI_RANGE       0x3C, 0x50 //# 60 <= x < 80

#define AMDGPU_POLARIS10_RANGE  0x50, 0x5A //#  80 <= x < 90
#define AMDGPU_POLARIS11_RANGE  0x5A, 0x64 //#  90 <= x < 100
#define AMDGPU_POLARIS12_RANGE  0x64, 0x6E //# 100 <= x < 110
#define AMDGPU_VEGAM_RANGE      0x6E, 0xFF //# 110 <= x < max

#define AMDGPU_CARRIZO_RANGE    0x01, 0x21 //#  1 <= x < 33
#define AMDGPU_BRISTOL_RANGE    0x10, 0x21 //# 16 <= x < 33
#define AMDGPU_STONEY_RANGE     0x61, 0xFF //# 97 <= x < max

#define AMDGPU_VEGA10_RANGE     0x01, 0x14 //#  1 <= x < 20
#define AMDGPU_VEGA12_RANGE     0x14, 0x28 //# 20 <= x < 40
#define AMDGPU_VEGA20_RANGE     0x28, 0xFF //# 40 <= x < max

#define AMDGPU_RAVEN_RANGE      0x01, 0x81 //#   1 <= x < 129
#define AMDGPU_RAVEN2_RANGE     0x81, 0x90 //# 129 <= x < 144
#define AMDGPU_RENOIR_RANGE     0x91, 0xFF //# 145 <= x < max

#define AMDGPU_NAVI10_RANGE     0x01, 0x0A //# 1  <= x < 10
#define AMDGPU_NAVI12_RANGE     0x0A, 0x14 //# 10 <= x < 20
#define AMDGPU_NAVI14_RANGE     0x14, 0x28 //# 20 <= x < 40
#define AMDGPU_NAVI21_RANGE     0x28, 0x32 //# 40  <= x < 50
#define AMDGPU_NAVI22_RANGE     0x32, 0x3C //# 50  <= x < 60
#define AMDGPU_NAVI23_RANGE     0x3C, 0x46 //# 60  <= x < 70
#define AMDGPU_NAVI24_RANGE     0x46, 0x50 //# 70  <= x < 80

#define AMDGPU_VANGOGH_RANGE    0x01, 0xFF //# 1 <= x < max

#define AMDGPU_NAVI31_RANGE     0x01, 0x10 //# 01 <= x < 16
#define AMDGPU_NAVI32_RANGE     0x20, 0xFF //# 32 <= x < 255
#define AMDGPU_NAVI33_RANGE     0x10, 0x20 //# 16 <= x < 32
#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x80 //# 1 <= x < 128
#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xC0 //# 128 <= x < 192

#define AMDGPU_GFX1150_RANGE    0x01, 0xFF //# 1 <= x < max

#define AMDGPU_REMBRANDT_RANGE  0x01, 0xFF //# 01 <= x < 255

#define AMDGPU_RAPHAEL_RANGE    0x01, 0xFF //# 1 <= x < max

#define AMDGPU_MENDOCINO_RANGE  0x01, 0xFF //# 1 <= x < max

#define AMDGPU_GFX12_TBD1_RANGE 0x40, 0xFF //# 64 <= x < max

#define AMDGPU_EXPAND_FIX(x) x
#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
#define AMDGPU_IN_RANGE(val, ...)   AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))


// ASICREV_IS(eRevisionId, revisionName)
#define ASICREV_IS(r, rn)              AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
#define ASICREV_IS_TAHITI_P(r)         ASICREV_IS(r, TAHITI)
#define ASICREV_IS_PITCAIRN_PM(r)      ASICREV_IS(r, PITCAIRN)
#define ASICREV_IS_CAPEVERDE_M(r)      ASICREV_IS(r, CAPEVERDE)
#define ASICREV_IS_OLAND_M(r)          ASICREV_IS(r, OLAND)
#define ASICREV_IS_HAINAN_V(r)         ASICREV_IS(r, HAINAN)

#define ASICREV_IS_BONAIRE_M(r)        ASICREV_IS(r, BONAIRE)
#define ASICREV_IS_HAWAII_P(r)         ASICREV_IS(r, HAWAII)

#define ASICREV_IS_SPECTRE(r)          ASICREV_IS(r, SPECTRE)
#define ASICREV_IS_SPOOKY(r)           ASICREV_IS(r, SPOOKY)
#define ASICREV_IS_KALINDI(r)          ASICREV_IS(r, KALINDI)
#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI)

#define ASICREV_IS_ICELAND_M(r)        ASICREV_IS(r, ICELAND)
#define ASICREV_IS_TONGA_P(r)          ASICREV_IS(r, TONGA)
#define ASICREV_IS_FIJI_P(r)           ASICREV_IS(r, FIJI)

#define ASICREV_IS_POLARIS10_P(r)      ASICREV_IS(r, POLARIS10)
#define ASICREV_IS_POLARIS11_M(r)      ASICREV_IS(r, POLARIS11)
#define ASICREV_IS_POLARIS12_V(r)      ASICREV_IS(r, POLARIS12)
#define ASICREV_IS_VEGAM_P(r)          ASICREV_IS(r, VEGAM)

#define ASICREV_IS_CARRIZO(r)          ASICREV_IS(r, CARRIZO)
#define ASICREV_IS_CARRIZO_BRISTOL(r)  ASICREV_IS(r, BRISTOL)
#define ASICREV_IS_STONEY(r)           ASICREV_IS(r, STONEY)

#define ASICREV_IS_VEGA10_M(r)         ASICREV_IS(r, VEGA10)
#define ASICREV_IS_VEGA10_P(r)         ASICREV_IS(r, VEGA10)
#define ASICREV_IS_VEGA12_P(r)         ASICREV_IS(r, VEGA12)
#define ASICREV_IS_VEGA12_p(r)         ASICREV_IS(r, VEGA12)
#define ASICREV_IS_VEGA20_P(r)         ASICREV_IS(r, VEGA20)

#define ASICREV_IS_RAVEN(r)            ASICREV_IS(r, RAVEN)
#define ASICREV_IS_RAVEN2(r)           ASICREV_IS(r, RAVEN2)
#define ASICREV_IS_RENOIR(r)           ASICREV_IS(r, RENOIR)

#define ASICREV_IS_NAVI10_P(r)         ASICREV_IS(r, NAVI10)

#define ASICREV_IS_NAVI12_P(r)         ASICREV_IS(r, NAVI12)

#define ASICREV_IS_NAVI14_M(r)         ASICREV_IS(r, NAVI14)

#define ASICREV_IS_NAVI21_M(r)         ASICREV_IS(r, NAVI21)

#define ASICREV_IS_NAVI22_P(r)         ASICREV_IS(r, NAVI22)

#define ASICREV_IS_NAVI23_P(r)         ASICREV_IS(r, NAVI23)

#define ASICREV_IS_NAVI24_P(r)         ASICREV_IS(r, NAVI24)

#define ASICREV_IS_VANGOGH(r)          ASICREV_IS(r, VANGOGH)

#define ASICREV_IS_NAVI31_P(r)         ASICREV_IS(r, NAVI31)
#define ASICREV_IS_NAVI32_P(r)         ASICREV_IS(r, NAVI32)
#define ASICREV_IS_NAVI33_P(r)         ASICREV_IS(r, NAVI33)
#define ASICREV_IS_GFX1103_R1(r)       ASICREV_IS(r, GFX1103_R1)
#define ASICREV_IS_GFX1103_R2(r)       ASICREV_IS(r, GFX1103_R2)
#define ASICREV_IS_GFX1150(r)          ASICREV_IS(r, GFX1150)

#define ASICREV_IS_REMBRANDT(r)        ASICREV_IS(r, REMBRANDT)

#define ASICREV_IS_RAPHAEL(r)          ASICREV_IS(r, RAPHAEL)

#define ASICREV_IS_MENDOCINO(r)        ASICREV_IS(r, MENDOCINO)

#define ASICREV_IS_GFX12_TBD1_P(r)     ASICREV_IS(r, GFX12_TBD1)

#endif // _AMDGPU_ASIC_ADDR_H


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/chip/gfx10/gfx10_gb_reg.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

#if !defined (__GFX10_GB_REG_H__)
#define __GFX10_GB_REG_H__

/*
*    gfx10_gb_reg.h
*
*    Register Spec Release:  1.0
*
*/

//
// Make sure the necessary endian defines are there.
//
#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

union GB_ADDR_CONFIG_GFX10
{
    struct
    {
#if defined(LITTLEENDIAN_CPU)
        unsigned int                       NUM_PIPES : 3;
        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
        unsigned int            MAX_COMPRESSED_FRAGS : 2;
        unsigned int                       NUM_PKRS  : 3;
        unsigned int                                 : 21;
#elif defined(BIGENDIAN_CPU)
        unsigned int                                 : 21;
        unsigned int                       NUM_PKRS  : 3;
        unsigned int            MAX_COMPRESSED_FRAGS : 2;
        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
        unsigned int                       NUM_PIPES : 3;
#endif
    } bitfields, bits;
    unsigned int    u32All;
    int             i32All;
    float           f32All;
};

#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/chip/gfx11/gfx11_gb_reg.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

#if !defined (__GFX11_GB_REG_H__)
#define __GFX11_GB_REG_H__

/*
*    gfx11_gb_reg.h
*
*    Register Spec Release:  1.0
*
*/

//
// Make sure the necessary endian defines are there.
//
#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

union GB_ADDR_CONFIG_GFX11
{
    struct
    {
#if defined(LITTLEENDIAN_CPU)
                unsigned int NUM_PIPES            :  3;
                unsigned int PIPE_INTERLEAVE_SIZE :  3;
                unsigned int MAX_COMPRESSED_FRAGS :  2;
                unsigned int NUM_PKRS             :  3;
                unsigned int                      :  8;
                unsigned int NUM_SHADER_ENGINES   :  2;
                unsigned int                      :  5;
                unsigned int NUM_RB_PER_SE        :  2;
                unsigned int                      :  4;
#elif defined(BIGENDIAN_CPU)
                unsigned int                      :  4;
                unsigned int NUM_RB_PER_SE        :  2;
                unsigned int                      :  5;
                unsigned int NUM_SHADER_ENGINES   :  2;
                unsigned int                      :  8;
                unsigned int NUM_PKRS             :  3;
                unsigned int MAX_COMPRESSED_FRAGS :  2;
                unsigned int PIPE_INTERLEAVE_SIZE :  3;
                unsigned int NUM_PIPES            :  3;
#endif
    } bitfields, bits;
    unsigned int    u32All;
    int             i32All;
    float           f32All;
};

#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/chip/gfx12/gfx12_gb_reg.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2023 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

#if !defined (__GFX12_GB_REG_H__)
#define __GFX12_GB_REG_H__

/*
*    gfx12_gb_reg.h
*
*    Register Spec Release:  1.0
*
*/

//
// Make sure the necessary endian defines are there.
//
#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

union GB_ADDR_CONFIG_GFX12 {
    struct {
#if defined(LITTLEENDIAN_CPU)
        unsigned int                       NUM_PIPES : 3;
        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
        unsigned int            MAX_COMPRESSED_FRAGS : 2;
        unsigned int                        NUM_PKRS : 3;
        unsigned int                                 : 8;
        unsigned int              NUM_SHADER_ENGINES : 4;
        unsigned int                                 : 3;
        unsigned int                   NUM_RB_PER_SE : 2;
        unsigned int                                 : 4;
#elif defined(BIGENDIAN_CPU)
        unsigned int                                 : 4;
        unsigned int                   NUM_RB_PER_SE : 2;
        unsigned int                                 : 3;
        unsigned int              NUM_SHADER_ENGINES : 4;
        unsigned int                                 : 8;
        unsigned int                        NUM_PKRS : 3;
        unsigned int            MAX_COMPRESSED_FRAGS : 2;
        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
        unsigned int                       NUM_PIPES : 3;
#endif
    } bitfields, bits;
    unsigned int    u32All;
    int             i32All;
    float           f32All;
};

#endif

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

#if !defined (__GFX9_GB_REG_H__)
#define __GFX9_GB_REG_H__

/*
*    gfx9_gb_reg.h
*
*    Register Spec Release:  1.0
*
*/

//
// Make sure the necessary endian defines are there.
//
#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

union GB_ADDR_CONFIG_GFX9 {
    struct {
#if        defined(LITTLEENDIAN_CPU)
        unsigned int                       NUM_PIPES : 3;
        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
        unsigned int            MAX_COMPRESSED_FRAGS : 2;
        unsigned int            BANK_INTERLEAVE_SIZE : 3;
        unsigned int                                 : 1;
        unsigned int                       NUM_BANKS : 3;
        unsigned int                                 : 1;
        unsigned int         SHADER_ENGINE_TILE_SIZE : 3;
        unsigned int              NUM_SHADER_ENGINES : 2;
        unsigned int                        NUM_GPUS : 3;
        unsigned int             MULTI_GPU_TILE_SIZE : 2;
        unsigned int                   NUM_RB_PER_SE : 2;
        unsigned int                        ROW_SIZE : 2;
        unsigned int                 NUM_LOWER_PIPES : 1;
        unsigned int                       SE_ENABLE : 1;
#elif        defined(BIGENDIAN_CPU)
        unsigned int                       SE_ENABLE : 1;
        unsigned int                 NUM_LOWER_PIPES : 1;
        unsigned int                        ROW_SIZE : 2;
        unsigned int                   NUM_RB_PER_SE : 2;
        unsigned int             MULTI_GPU_TILE_SIZE : 2;
        unsigned int                        NUM_GPUS : 3;
        unsigned int              NUM_SHADER_ENGINES : 2;
        unsigned int         SHADER_ENGINE_TILE_SIZE : 3;
        unsigned int                                 : 1;
        unsigned int                       NUM_BANKS : 3;
        unsigned int                                 : 1;
        unsigned int            BANK_INTERLEAVE_SIZE : 3;
        unsigned int            MAX_COMPRESSED_FRAGS : 2;
        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
        unsigned int                       NUM_PIPES : 3;
#endif
    } bitfields, bits;
    unsigned int    u32All;
    signed int    i32All;
    float    f32All;
};

#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/chip/r800/si_gb_reg.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

#if !defined (__SI_GB_REG_H__)
#define __SI_GB_REG_H__

/*****************************************************************************************************************
 *
 *  si_gb_reg.h
 *
 *  Register Spec Release:  Chip Spec 0.28
 *
 *****************************************************************************************************************/

//
// Make sure the necessary endian defines are there.
//
#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

/*
 * GB_ADDR_CONFIG struct
 */

#if     defined(LITTLEENDIAN_CPU)

     typedef struct _GB_ADDR_CONFIG_T {
          unsigned int num_pipes                      : 3;
          unsigned int                                : 1;
          unsigned int pipe_interleave_size           : 3;
          unsigned int                                : 1;
          unsigned int bank_interleave_size           : 3;
          unsigned int                                : 1;
          unsigned int num_shader_engines             : 2;
          unsigned int                                : 2;
          unsigned int shader_engine_tile_size        : 3;
          unsigned int                                : 1;
          unsigned int num_gpus                       : 3;
          unsigned int                                : 1;
          unsigned int multi_gpu_tile_size            : 2;
          unsigned int                                : 2;
          unsigned int row_size                       : 2;
          unsigned int num_lower_pipes                : 1;
          unsigned int                                : 1;
     } GB_ADDR_CONFIG_T;

#elif       defined(BIGENDIAN_CPU)

     typedef struct _GB_ADDR_CONFIG_T {
          unsigned int                                : 1;
          unsigned int num_lower_pipes                : 1;
          unsigned int row_size                       : 2;
          unsigned int                                : 2;
          unsigned int multi_gpu_tile_size            : 2;
          unsigned int                                : 1;
          unsigned int num_gpus                       : 3;
          unsigned int                                : 1;
          unsigned int shader_engine_tile_size        : 3;
          unsigned int                                : 2;
          unsigned int num_shader_engines             : 2;
          unsigned int                                : 1;
          unsigned int bank_interleave_size           : 3;
          unsigned int                                : 1;
          unsigned int pipe_interleave_size           : 3;
          unsigned int                                : 1;
          unsigned int num_pipes                      : 3;
     } GB_ADDR_CONFIG_T;

#endif

#if     defined(LITTLEENDIAN_CPU)

     typedef struct _GB_ADDR_CONFIG_N {
          unsigned int num_pipes                      : 3;
          unsigned int pipe_interleave_size           : 3;
          unsigned int max_compressed_frags           : 2;
          unsigned int bank_interleave_size           : 3;
          unsigned int                                : 1;
          unsigned int num_banks                      : 3;
          unsigned int                                : 1;
          unsigned int shader_engine_tile_size        : 3;
          unsigned int num_shader_engines             : 2;
          unsigned int num_gpus                       : 3;
          unsigned int multi_gpu_tile_size            : 2;
          unsigned int num_rb_per_se                  : 2;
          unsigned int row_size                       : 2;
          unsigned int num_lower_pipes                : 1;
          unsigned int se_enable                      : 1;
     } GB_ADDR_CONFIG_N;

#elif       defined(BIGENDIAN_CPU)

     typedef struct _GB_ADDR_CONFIG_N {
          unsigned int se_enable                      : 1;
          unsigned int num_lower_pipes                : 1;
          unsigned int row_size                       : 2;
          unsigned int num_rb_per_se                  : 2;
          unsigned int multi_gpu_tile_size            : 2;
          unsigned int num_gpus                       : 3;
          unsigned int num_shader_engines             : 2;
          unsigned int shader_engine_tile_size        : 3;
          unsigned int                                : 1;
          unsigned int num_banks                      : 3;
          unsigned int                                : 1;
          unsigned int bank_interleave_size           : 3;
          unsigned int max_compressed_frags           : 2;
          unsigned int pipe_interleave_size           : 3;
          unsigned int num_pipes                      : 3;
     } GB_ADDR_CONFIG_N;

#endif

typedef union {
     unsigned int val : 32;
     GB_ADDR_CONFIG_T f;
     GB_ADDR_CONFIG_N n;
} GB_ADDR_CONFIG;

#if       defined(LITTLEENDIAN_CPU)

     typedef struct _GB_TILE_MODE_T {
          unsigned int micro_tile_mode                : 2;
          unsigned int array_mode                     : 4;
          unsigned int pipe_config                    : 5;
          unsigned int tile_split                     : 3;
          unsigned int bank_width                     : 2;
          unsigned int bank_height                    : 2;
          unsigned int macro_tile_aspect              : 2;
          unsigned int num_banks                      : 2;
          unsigned int micro_tile_mode_new            : 3;
          unsigned int sample_split                   : 2;
          unsigned int alt_pipe_config                : 5;
     } GB_TILE_MODE_T;

     typedef struct _GB_MACROTILE_MODE_T {
          unsigned int bank_width                     : 2;
          unsigned int bank_height                    : 2;
          unsigned int macro_tile_aspect              : 2;
          unsigned int num_banks                      : 2;
          unsigned int alt_bank_height                : 2;
          unsigned int alt_macro_tile_aspect          : 2;
          unsigned int alt_num_banks                  : 2;
          unsigned int                                : 18;
     } GB_MACROTILE_MODE_T;

#elif          defined(BIGENDIAN_CPU)

     typedef struct _GB_TILE_MODE_T {
          unsigned int alt_pipe_config                : 5;
          unsigned int sample_split                   : 2;
          unsigned int micro_tile_mode_new            : 3;
          unsigned int num_banks                      : 2;
          unsigned int macro_tile_aspect              : 2;
          unsigned int bank_height                    : 2;
          unsigned int bank_width                     : 2;
          unsigned int tile_split                     : 3;
          unsigned int pipe_config                    : 5;
          unsigned int array_mode                     : 4;
          unsigned int micro_tile_mode                : 2;
     } GB_TILE_MODE_T;

     typedef struct _GB_MACROTILE_MODE_T {
          unsigned int                                : 18;
          unsigned int alt_num_banks                  : 2;
          unsigned int alt_macro_tile_aspect          : 2;
          unsigned int alt_bank_height                : 2;
          unsigned int num_banks                      : 2;
          unsigned int macro_tile_aspect              : 2;
          unsigned int bank_height                    : 2;
          unsigned int bank_width                     : 2;
     } GB_MACROTILE_MODE_T;

#endif

typedef union {
     unsigned int val : 32;
     GB_TILE_MODE_T f;
} GB_TILE_MODE;

typedef union {
     unsigned int val : 32;
     GB_MACROTILE_MODE_T f;
} GB_MACROTILE_MODE;

#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrcommon.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addrcommon.h
* @brief Contains the helper function and constants.
****************************************************************************************************
*/

#ifndef __ADDR_COMMON_H__
#define __ADDR_COMMON_H__

#include "addrinterface.h"


#if !defined(__APPLE__) || defined(HAVE_TSERVER)
    #include <stdlib.h>
    #include <string.h>
#endif

#if defined(__GNUC__)
    #include <signal.h>
    #include <assert.h>
#endif

#if defined(_WIN32)
#include <intrin.h>
#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
// Platform specific debug break defines
////////////////////////////////////////////////////////////////////////////////////////////////////
#if !defined(DEBUG)
    #ifdef NDEBUG
        #define DEBUG 0
    #else
        #define DEBUG 1
    #endif
#endif

#if DEBUG
    #if defined(__GNUC__)
        #define ADDR_DBG_BREAK()    { assert(false); }
    #elif defined(__APPLE__)
        #define ADDR_DBG_BREAK()    { IOPanic("");}
    #else
        #define ADDR_DBG_BREAK()    { __debugbreak(); }
    #endif
#else
    #define ADDR_DBG_BREAK()
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////////////////////////
// Debug assertions used in AddrLib
////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(_WIN32) && (_MSC_VER >= 1400)
    #define ADDR_ANALYSIS_ASSUME(expr) __analysis_assume(expr)
#else
    #define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0)
#endif

#if DEBUG
    #if defined( _WIN32 )
        #define ADDR_ASSERT(__e)                                \
        {                                                       \
            ADDR_ANALYSIS_ASSUME(__e);                          \
            if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); } \
        }
    #else
        #define ADDR_ASSERT(__e) if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); }
    #endif

    #if ADDR_SILENCE_ASSERT_ALWAYS
        #define ADDR_ASSERT_ALWAYS()
    #else
        #define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
    #endif

    #define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
    #define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
#else //DEBUG
    #if defined( _WIN32 )
        #define ADDR_ASSERT(__e) { ADDR_ANALYSIS_ASSUME(__e); }
    #else
        #define ADDR_ASSERT(__e)
    #endif
    #define ADDR_ASSERT_ALWAYS()
    #define ADDR_UNHANDLED_CASE()
    #define ADDR_NOT_IMPLEMENTED()
#endif //DEBUG
////////////////////////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////////////////////////
// Debug print macro from legacy address library
////////////////////////////////////////////////////////////////////////////////////////////////////
#if DEBUG

#define ADDR_PRNT(a)    Object::DebugPrint a

/// @brief Macro for reporting informational messages
/// @ingroup util
///
/// This macro optionally prints an informational message to stdout.
/// The first parameter is a condition -- if it is true, nothing is done.
/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
/// starting with a string. This is passed to printf() or an equivalent
/// in order to format the informational message. For example,
/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3".
///
#define ADDR_INFO(cond, a)         \
{ if (!(cond)) { ADDR_PRNT(a); } }


/// @brief Macro for reporting error warning messages
/// @ingroup util
///
/// This macro optionally prints an error warning message to stdout,
/// followed by the file name and line number where the macro was called.
/// The first parameter is a condition -- if it is true, nothing is done.
/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
/// starting with a string. This is passed to printf() or an equivalent
/// in order to format the informational message. For example,
/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by
/// a second line with the file name and line number.
///
#define ADDR_WARN(cond, a)         \
{ if (!(cond))                     \
  { ADDR_PRNT(a);                  \
    ADDR_PRNT(("  WARNING in file %s, line %d\n", __FILE__, __LINE__)); \
} }


/// @brief Macro for reporting fatal error conditions
/// @ingroup util
///
/// This macro optionally stops execution of the current routine
/// after printing an error warning message to stdout,
/// followed by the file name and line number where the macro was called.
/// The first parameter is a condition -- if it is true, nothing is done.
/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
/// starting with a string. This is passed to printf() or an equivalent
/// in order to format the informational message. For example,
/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by
/// a second line with the file name and line number, then stops execution.
///
#define ADDR_EXIT(cond, a)         \
{ if (!(cond))                     \
  { ADDR_PRNT(a); ADDR_DBG_BREAK();\
} }

#else // DEBUG

#define ADDRDPF 1 ? (void)0 : (void)

#define ADDR_PRNT(a)

#define ADDR_DBG_BREAK()

#define ADDR_INFO(cond, a)

#define ADDR_WARN(cond, a)

#define ADDR_EXIT(cond, a)

#endif // DEBUG
////////////////////////////////////////////////////////////////////////////////////////////////////

#if defined(static_assert)
#define ADDR_C_ASSERT(__e) static_assert(__e, "")
#else
   /* This version of STATIC_ASSERT() relies on VLAs.  If COND is
    * false/zero, the array size will be -1 and we'll get a compile
    * error
    */
#  define ADDR_C_ASSERT(__e) do {         \
      (void) sizeof(char [1 - 2*!(__e)]); \
   } while (0)
#endif

namespace rocr {
namespace Addr
{

namespace V1
{
////////////////////////////////////////////////////////////////////////////////////////////////////
// Common constants
////////////////////////////////////////////////////////////////////////////////////////////////////
static const UINT_32 MicroTileWidth      = 8;       ///< Micro tile width, for 1D and 2D tiling
static const UINT_32 MicroTileHeight     = 8;       ///< Micro tile height, for 1D and 2D tiling
static const UINT_32 ThickTileThickness  = 4;       ///< Micro tile thickness, for THICK modes
static const UINT_32 XThickTileThickness = 8;       ///< Extra thick tiling thickness
static const UINT_32 PowerSaveTileBytes  = 64;      ///< Nuber of bytes per tile for power save 64
static const UINT_32 CmaskCacheBits      = 1024;    ///< Number of bits for CMASK cache
static const UINT_32 CmaskElemBits       = 4;       ///< Number of bits for CMASK element
static const UINT_32 HtileCacheBits      = 16384;   ///< Number of bits for HTILE cache 512*32

static const UINT_32 MicroTilePixels     = MicroTileWidth * MicroTileHeight;

static const INT_32 TileIndexInvalid        = TILEINDEX_INVALID;
static const INT_32 TileIndexLinearGeneral  = TILEINDEX_LINEAR_GENERAL;
static const INT_32 TileIndexNoMacroIndex   = -3;

} // V1

namespace V2
{
////////////////////////////////////////////////////////////////////////////////////////////////////
// Common constants
////////////////////////////////////////////////////////////////////////////////////////////////////
static const UINT_32 MaxSurfaceHeight = 16384;

} // V2

////////////////////////////////////////////////////////////////////////////////////////////////////
// Common macros
////////////////////////////////////////////////////////////////////////////////////////////////////
#define BITS_PER_BYTE 8
#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE )
#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE )

/// Helper macros to select a single bit from an int (undefined later in section)
#define _BIT(v,b)      (((v) >> (b) ) & 1)

/**
****************************************************************************************************
* ChipFamily
*
*   @brief
*       Neutral enums that specifies chip family.
*
****************************************************************************************************
*/
enum ChipFamily
{
    ADDR_CHIP_FAMILY_IVLD,    ///< Invalid family
    ADDR_CHIP_FAMILY_R6XX,
    ADDR_CHIP_FAMILY_R7XX,
    ADDR_CHIP_FAMILY_R8XX,
    ADDR_CHIP_FAMILY_NI,
    ADDR_CHIP_FAMILY_SI,
    ADDR_CHIP_FAMILY_CI,
    ADDR_CHIP_FAMILY_VI,
    ADDR_CHIP_FAMILY_AI,
    ADDR_CHIP_FAMILY_NAVI,
};

/**
****************************************************************************************************
* ConfigFlags
*
*   @brief
*       This structure is used to set configuration flags.
****************************************************************************************************
*/
union ConfigFlags
{
    struct
    {
        /// These flags are set up internally thru AddrLib::Create() based on ADDR_CREATE_FLAGS
        UINT_32 optimalBankSwap        : 1;    ///< New bank tiling for RV770 only
        UINT_32 noCubeMipSlicesPad     : 1;    ///< Disables faces padding for cubemap mipmaps
        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
                                               ///  output structure
        UINT_32 ignoreTileInfo         : 1;    ///< Don't use tile info structure
        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined swizzle
        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
        UINT_32 disableLinearOpt       : 1;    ///< Disallow tile modes to be optimized to linear
        UINT_32 use32bppFor422Fmt      : 1;    ///< View 422 formats as 32 bits per pixel element
        UINT_32 forceDccAndTcCompat    : 1;    ///< Force enable DCC and TC compatibility
        UINT_32 nonPower2MemConfig     : 1;    ///< Video memory bit width is not power of 2
        UINT_32 enableAltTiling        : 1;    ///< Enable alt tile mode
        UINT_32 reserved               : 18;   ///< Reserved bits for future use
    };

    UINT_32 value;
};

////////////////////////////////////////////////////////////////////////////////////////////////////
// Misc helper functions
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   AddrXorReduce
*
*   @brief
*       Xor the right-side numberOfBits bits of x.
****************************************************************************************************
*/
static inline UINT_32 XorReduce(
    UINT_32 x,
    UINT_32 numberOfBits)
{
    UINT_32 i;
    UINT_32 result = x & 1;

    for (i=1; i<numberOfBits; i++)
    {
        result ^= ((x>>i) & 1);
    }

    return result;
}

/**
****************************************************************************************************
*   Unset least bit
*
*   @brief
*       Returns a copy of the value with the least-significant '1' bit unset
****************************************************************************************************
*/
static inline UINT_32 UnsetLeastBit(
    UINT_32 val)
{
    return val & (val - 1);
}

/**
****************************************************************************************************
*   BitScanForward
*
*   @brief
*       Returns the index-position of the least-significant '1' bit. Must not be 0.
****************************************************************************************************
*/
static inline UINT_32 BitScanForward(
    UINT_32 mask) ///< [in] Bitmask to scan
{
    ADDR_ASSERT(mask > 0);
    unsigned long out = 0;
#if (defined(_WIN64) && defined(_M_X64)) || (defined(_WIN32) && defined(_M_IX64))
    out = ::_tzcnt_u32(mask);
#elif (defined(_WIN32) || defined(_WIN64))
    ::_BitScanForward(&out, mask);
#elif defined(__GNUC__)
    out = __builtin_ctz(mask);
#else
    while ((mask & 1) == 0)
    {
        mask >>= 1;
        out++;
    }
#endif
    return out;
}

/**
****************************************************************************************************
*   IsPow2
*
*   @brief
*       Check if the size (UINT_32) is pow 2
****************************************************************************************************
*/
static inline UINT_32 IsPow2(
    UINT_32 dim)        ///< [in] dimension of miplevel
{
    ADDR_ASSERT(dim > 0);
    return !(dim & (dim - 1));
}

/**
****************************************************************************************************
*   IsPow2
*
*   @brief
*       Check if the size (UINT_64) is pow 2
****************************************************************************************************
*/
static inline UINT_64 IsPow2(
    UINT_64 dim)        ///< [in] dimension of miplevel
{
    ADDR_ASSERT(dim > 0);
    return !(dim & (dim - 1));
}

/**
****************************************************************************************************
*   ByteAlign
*
*   @brief
*       Align UINT_32 "x" to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_32 PowTwoAlign(
    UINT_32 x,
    UINT_32 align)
{
    //
    // Assert that x is a power of two.
    //
    ADDR_ASSERT(IsPow2(align));
    return (x + (align - 1)) & (~(align - 1));
}

/**
****************************************************************************************************
*   ByteAlign
*
*   @brief
*       Align UINT_64 "x" to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_64 PowTwoAlign(
    UINT_64 x,
    UINT_64 align)
{
    //
    // Assert that x is a power of two.
    //
    ADDR_ASSERT(IsPow2(align));
    return (x + (align - 1)) & (~(align - 1));
}

/**
****************************************************************************************************
*   Min
*
*   @brief
*       Get the min value between two unsigned values
****************************************************************************************************
*/
static inline UINT_32 Min(
    UINT_32 value1,
    UINT_32 value2)
{
    return ((value1 < (value2)) ? (value1) : value2);
}

/**
****************************************************************************************************
*   Min
*
*   @brief
*       Get the min value between two signed values
****************************************************************************************************
*/
static inline INT_32 Min(
    INT_32 value1,
    INT_32 value2)
{
    return ((value1 < (value2)) ? (value1) : value2);
}

/**
****************************************************************************************************
*   Max
*
*   @brief
*       Get the max value between two unsigned values
****************************************************************************************************
*/
static inline UINT_32 Max(
    UINT_32 value1,
    UINT_32 value2)
{
    return ((value1 > (value2)) ? (value1) : value2);
}

/**
****************************************************************************************************
*   Max
*
*   @brief
*       Get the max value between two signed values
****************************************************************************************************
*/
static inline INT_32 Max(
    INT_32 value1,
    INT_32 value2)
{
    return ((value1 > (value2)) ? (value1) : value2);
}

/**
****************************************************************************************************
*   RoundUpQuotient
*
*   @brief
*       Divides two numbers, rounding up any remainder.
****************************************************************************************************
*/
static inline UINT_32 RoundUpQuotient(
    UINT_32 numerator,
    UINT_32 denominator)
{
    ADDR_ASSERT(denominator > 0);
    return ((numerator + (denominator - 1)) / denominator);
}

/**
****************************************************************************************************
*   RoundUpQuotient
*
*   @brief
*       Divides two numbers, rounding up any remainder.
****************************************************************************************************
*/
static inline UINT_64 RoundUpQuotient(
    UINT_64 numerator,
    UINT_64 denominator)
{
    ADDR_ASSERT(denominator > 0);
    return ((numerator + (denominator - 1)) / denominator);
}

/**
****************************************************************************************************
*   NextPow2
*
*   @brief
*       Compute the mipmap's next level dim size
****************************************************************************************************
*/
static inline UINT_32 NextPow2(
    UINT_32 dim)        ///< [in] dimension of miplevel
{
    UINT_32 newDim = 1;

    if (dim > 0x7fffffff)
    {
        ADDR_ASSERT_ALWAYS();
        newDim = 0x80000000;
    }
    else
    {
        while (newDim < dim)
        {
            newDim <<= 1;
        }
    }

    return newDim;
}

/**
****************************************************************************************************
*   Log2NonPow2
*
*   @brief
*       Compute log of base 2 no matter the target is power of 2 or not
****************************************************************************************************
*/
static inline UINT_32 Log2NonPow2(
    UINT_32 x)      ///< [in] the value should calculate log based 2
{
    UINT_32 y;

    y = 0;
    while (x > 1)
    {
        x >>= 1;
        y++;
    }

    return y;
}

/**
****************************************************************************************************
*   Log2
*
*   @brief
*       Compute log of base 2
****************************************************************************************************
*/
static inline UINT_32 Log2(
    UINT_32 x)      ///< [in] the value should calculate log based 2
{
    // Assert that x is a power of two.
    ADDR_ASSERT(IsPow2(x));

    return Log2NonPow2(x);
}

/**
****************************************************************************************************
*   QLog2
*
*   @brief
*       Compute log of base 2 quickly (<= 16)
****************************************************************************************************
*/
static inline UINT_32 QLog2(
    UINT_32 x)      ///< [in] the value should calculate log based 2
{
    ADDR_ASSERT(x <= 16);

    UINT_32 y = 0;

    switch (x)
    {
        case 1:
            y = 0;
            break;
        case 2:
            y = 1;
            break;
        case 4:
            y = 2;
            break;
        case 8:
            y = 3;
            break;
        case 16:
            y = 4;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
    }

    return y;
}

/**
****************************************************************************************************
*   SafeAssign
*
*   @brief
*       NULL pointer safe assignment
****************************************************************************************************
*/
static inline VOID SafeAssign(
    UINT_32*    pLVal,  ///< [in] Pointer to left val
    UINT_32     rVal)   ///< [in] Right value
{
    if (pLVal)
    {
        *pLVal = rVal;
    }
}

/**
****************************************************************************************************
*   SafeAssign
*
*   @brief
*       NULL pointer safe assignment for 64bit values
****************************************************************************************************
*/
static inline VOID SafeAssign(
    UINT_64*    pLVal,  ///< [in] Pointer to left val
    UINT_64     rVal)   ///< [in] Right value
{
    if (pLVal)
    {
        *pLVal = rVal;
    }
}

/**
****************************************************************************************************
*   SafeAssign
*
*   @brief
*       NULL pointer safe assignment for AddrTileMode
****************************************************************************************************
*/
static inline VOID SafeAssign(
    AddrTileMode*    pLVal, ///< [in] Pointer to left val
    AddrTileMode     rVal)  ///< [in] Right value
{
    if (pLVal)
    {
        *pLVal = rVal;
    }
}

/**
****************************************************************************************************
*   RoundHalf
*
*   @brief
*       return (x + 1) / 2
****************************************************************************************************
*/
static inline UINT_32 RoundHalf(
    UINT_32     x)     ///< [in] input value
{
    ADDR_ASSERT(x != 0);

#if 1
    return (x >> 1) + (x & 1);
#else
    return (x + 1) >> 1;
#endif
}

/**
****************************************************************************************************
*   SumGeo
*
*   @brief
*       Calculate sum of a geometric progression whose ratio is 1/2
****************************************************************************************************
*/
static inline UINT_32 SumGeo(
    UINT_32     base,   ///< [in] First term in the geometric progression
    UINT_32     num)    ///< [in] Number of terms to be added into sum
{
    ADDR_ASSERT(base > 0);

    UINT_32 sum = 0;
    UINT_32 i = 0;
    for (; (i < num) && (base > 1); i++)
    {
        sum += base;
        base = RoundHalf(base);
    }
    sum += num - i;

    return sum;
}

/**
****************************************************************************************************
*   GetBit
*
*   @brief
*       Extract bit N value (0 or 1) of a UINT32 value.
****************************************************************************************************
*/
static inline UINT_32 GetBit(
    UINT_32     u32,   ///< [in] UINT32 value
    UINT_32     pos)   ///< [in] bit position from LSB, valid range is [0..31]
{
    ADDR_ASSERT(pos <= 31);

    return (u32 >> pos) & 0x1;
}

/**
****************************************************************************************************
*   GetBits
*
*   @brief
*       Copy 'bitsNum' bits from src start from srcStartPos into destination from dstStartPos
*       srcStartPos: 0~31 for UINT_32
*       bitsNum    : 1~32 for UINT_32
*       srcStartPos: 0~31 for UINT_32
*                                                                 src start position
*                                                                          |
*       src : b[31] b[30] b[29] ... ... ... ... ... ... ... ... b[end]..b[beg] ... b[1] b[0]
*                                   || Bits num || copy length  || Bits num ||
*       dst : b[31] b[30] b[29] ... b[end]..b[beg] ... ... ... ... ... ... ... ... b[1] b[0]
*                                              |
*                                     dst start position
****************************************************************************************************
*/
static inline UINT_32 GetBits(
    UINT_32 src,
    UINT_32 srcStartPos,
    UINT_32 bitsNum,
    UINT_32 dstStartPos)
{
    ADDR_ASSERT((srcStartPos < 32) && (dstStartPos < 32) && (bitsNum > 0));
    ADDR_ASSERT((bitsNum + dstStartPos <= 32) && (bitsNum + srcStartPos <= 32));

    return ((src >> srcStartPos) << (32 - bitsNum)) >> (32 - bitsNum - dstStartPos);
}

/**
****************************************************************************************************
*   MortonGen2d
*
*   @brief
*       Generate 2D Morton interleave code with num lowest bits in each channel
****************************************************************************************************
*/
static inline UINT_32 MortonGen2d(
    UINT_32     x,     ///< [in] First channel
    UINT_32     y,     ///< [in] Second channel
    UINT_32     num)   ///< [in] Number of bits extracted from each channel
{
    UINT_32 mort = 0;

    for (UINT_32 i = 0; i < num; i++)
    {
        mort |= (GetBit(y, i) << (2 * i));
        mort |= (GetBit(x, i) << (2 * i + 1));
    }

    return mort;
}

/**
****************************************************************************************************
*   MortonGen3d
*
*   @brief
*       Generate 3D Morton interleave code with num lowest bits in each channel
****************************************************************************************************
*/
static inline UINT_32 MortonGen3d(
    UINT_32     x,     ///< [in] First channel
    UINT_32     y,     ///< [in] Second channel
    UINT_32     z,     ///< [in] Third channel
    UINT_32     num)   ///< [in] Number of bits extracted from each channel
{
    UINT_32 mort = 0;

    for (UINT_32 i = 0; i < num; i++)
    {
        mort |= (GetBit(z, i) << (3 * i));
        mort |= (GetBit(y, i) << (3 * i + 1));
        mort |= (GetBit(x, i) << (3 * i + 2));
    }

    return mort;
}

/**
****************************************************************************************************
*   ReverseBitVector
*
*   @brief
*       Return reversed lowest num bits of v: v[0]v[1]...v[num-2]v[num-1]
****************************************************************************************************
*/
static inline UINT_32 ReverseBitVector(
    UINT_32     v,     ///< [in] Reverse operation base value
    UINT_32     num)   ///< [in] Number of bits used in reverse operation
{
    UINT_32 reverse = 0;

    for (UINT_32 i = 0; i < num; i++)
    {
        reverse |= (GetBit(v, num - 1 - i) << i);
    }

    return reverse;
}

/**
****************************************************************************************************
*   FoldXor2d
*
*   @brief
*       Xor bit vector v[num-1]v[num-2]...v[1]v[0] with v[num]v[num+1]...v[2*num-2]v[2*num-1]
****************************************************************************************************
*/
static inline UINT_32 FoldXor2d(
    UINT_32     v,     ///< [in] Xor operation base value
    UINT_32     num)   ///< [in] Number of bits used in fold xor operation
{
    return (v & ((1 << num) - 1)) ^ ReverseBitVector(v >> num, num);
}

/**
****************************************************************************************************
*   DeMort
*
*   @brief
*       Return v[0] | v[2] | v[4] | v[6]... | v[2*num - 2]
****************************************************************************************************
*/
static inline UINT_32 DeMort(
    UINT_32     v,     ///< [in] DeMort operation base value
    UINT_32     num)   ///< [in] Number of bits used in fold DeMort operation
{
    UINT_32 d = 0;

    for (UINT_32 i = 0; i < num; i++)
    {
        d |= ((v & (1 << (i << 1))) >> i);
    }

    return d;
}

/**
****************************************************************************************************
*   FoldXor3d
*
*   @brief
*       v[0]...v[num-1] ^ v[3*num-1]v[3*num-3]...v[num+2]v[num] ^ v[3*num-2]...v[num+1]v[num-1]
****************************************************************************************************
*/
static inline UINT_32 FoldXor3d(
    UINT_32     v,     ///< [in] Xor operation base value
    UINT_32     num)   ///< [in] Number of bits used in fold xor operation
{
    UINT_32 t = v & ((1 << num) - 1);
    t ^= ReverseBitVector(DeMort(v >> num, num), num);
    t ^= ReverseBitVector(DeMort(v >> (num + 1), num), num);

    return t;
}

/**
****************************************************************************************************
*   InitChannel
*
*   @brief
*       Set channel initialization value via a return value
****************************************************************************************************
*/
static inline ADDR_CHANNEL_SETTING InitChannel(
    UINT_32     valid,     ///< [in] valid setting
    UINT_32     channel,   ///< [in] channel setting
    UINT_32     index)     ///< [in] index setting
{
    ADDR_CHANNEL_SETTING t;
    t.valid = valid;
    t.channel = channel;
    t.index = index;

    return t;
}

/**
****************************************************************************************************
*   InitChannel
*
*   @brief
*       Set channel initialization value via channel pointer
****************************************************************************************************
*/
static inline VOID InitChannel(
    UINT_32     valid,              ///< [in] valid setting
    UINT_32     channel,            ///< [in] channel setting
    UINT_32     index,              ///< [in] index setting
    ADDR_CHANNEL_SETTING *pChanSet) ///< [out] channel setting to be initialized
{
    pChanSet->valid = valid;
    pChanSet->channel = channel;
    pChanSet->index = index;
}


/**
****************************************************************************************************
*   InitChannel
*
*   @brief
*       Set channel initialization value via another channel
****************************************************************************************************
*/
static inline VOID InitChannel(
    ADDR_CHANNEL_SETTING *pChanDst, ///< [in] channel setting to be copied from
    ADDR_CHANNEL_SETTING *pChanSrc) ///< [out] channel setting to be initialized
{
    pChanDst->valid = pChanSrc->valid;
    pChanDst->channel = pChanSrc->channel;
    pChanDst->index = pChanSrc->index;
}

/**
****************************************************************************************************
*   GetMaxValidChannelIndex
*
*   @brief
*       Get max valid index for a specific channel
****************************************************************************************************
*/
static inline UINT_32 GetMaxValidChannelIndex(
    const ADDR_CHANNEL_SETTING *pChanSet,   ///< [in] channel setting to be initialized
    UINT_32                     searchCount,///< [in] number of channel setting to be searched
    UINT_32                     channel)    ///< [in] channel to be searched
{
    UINT_32 index = 0;

    for (UINT_32 i = 0; i < searchCount; i++)
    {
        if (pChanSet[i].valid && (pChanSet[i].channel == channel))
        {
            index = Max(index, static_cast<UINT_32>(pChanSet[i].index));
        }
    }

    return index;
}

/**
****************************************************************************************************
*   GetCoordActiveMask
*
*   @brief
*       Get bit mask which indicates which positions in the equation match the target coord
****************************************************************************************************
*/
static inline UINT_32 GetCoordActiveMask(
    const ADDR_CHANNEL_SETTING *pChanSet,   ///< [in] channel setting to be initialized
    UINT_32                     searchCount,///< [in] number of channel setting to be searched
    UINT_32                     channel,    ///< [in] channel to be searched
    UINT_32                     index)      ///< [in] index to be searched
{
    UINT_32 mask = 0;

    for (UINT_32 i = 0; i < searchCount; i++)
    {
        if ((pChanSet[i].valid   == TRUE)    &&
            (pChanSet[i].channel == channel) &&
            (pChanSet[i].index   == index))
        {
            mask |= (1 << i);
        }
    }

    return mask;
}

/**
****************************************************************************************************
*   FillEqBitComponents
*
*   @brief
*       Fill the 'numBitComponents' field based on the equation.
****************************************************************************************************
*/
static inline void FillEqBitComponents(
    ADDR_EQUATION *pEquation) // [in,out] Equation to calculate bit components for
{
    pEquation->numBitComponents = 1; // We always have at least the address
    for (UINT_32 xorN = 1; xorN < ADDR_MAX_EQUATION_COMP; xorN++)
    {
        for (UINT_32 bit = 0; bit < ADDR_MAX_EQUATION_BIT; bit++)
        {
            if (pEquation->comps[xorN][bit].valid)
            {
                pEquation->numBitComponents = xorN + 1;
                break;
            }
        }

        if (pEquation->numBitComponents != (xorN + 1))
        {
            // Skip following components if this one wasn't valid
            break;
        }
    }
}

/**
****************************************************************************************************
*   ShiftCeil
*
*   @brief
*       Apply right-shift with ceiling
****************************************************************************************************
*/
static inline UINT_32 ShiftCeil(
    UINT_32 a,  ///< [in] value to be right-shifted
    UINT_32 b)  ///< [in] number of bits to shift
{
    return (a >> b) + (((a & ((1 << b) - 1)) != 0) ? 1 : 0);
}

/**
****************************************************************************************************
*   ShiftRight
*
*   @brief
*       Return right-shift value and minimum is 1
****************************************************************************************************
*/
static inline UINT_32 ShiftRight(
    UINT_32 a,  ///< [in] value to be right-shifted
    UINT_32 b)  ///< [in] number of bits to shift
{
    return Max(a >> b, 1u);
}

} // Addr
} // namespace rocr

#endif // __ADDR_COMMON_H__


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addrelemlib.cpp
* @brief Contains the class implementation for element/pixel related functions.
****************************************************************************************************
*/

#include "addrelemlib.h"
#include "addrlib.h"

namespace rocr {
namespace Addr
{

/**
****************************************************************************************************
*   ElemLib::ElemLib
*
*   @brief
*       constructor
*
*   @return
*       N/A
****************************************************************************************************
*/
ElemLib::ElemLib(
    Lib* pAddrLib)  ///< [in] Parent addrlib instance pointer
    :
    Object(pAddrLib->GetClient()),
    m_pAddrLib(pAddrLib)
{
    switch (m_pAddrLib->GetChipFamily())
    {
        case ADDR_CHIP_FAMILY_R6XX:
            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
            m_fp16ExportNorm = 0;
            break;
        case ADDR_CHIP_FAMILY_R7XX:
            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
            m_fp16ExportNorm = 1;
            break;
        case ADDR_CHIP_FAMILY_R8XX:
        case ADDR_CHIP_FAMILY_NI: // Same as 8xx
            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
            m_fp16ExportNorm = 1;
            break;
        default:
            m_fp16ExportNorm = 1;
            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
            break;
    }

    m_configFlags.value = 0;
}

/**
****************************************************************************************************
*   ElemLib::~ElemLib
*
*   @brief
*       destructor
*
*   @return
*       N/A
****************************************************************************************************
*/
ElemLib::~ElemLib()
{
}

/**
****************************************************************************************************
*   ElemLib::Create
*
*   @brief
*       Creates and initializes AddrLib object.
*
*   @return
*       Returns point to ADDR_CREATEINFO if successful.
****************************************************************************************************
*/
ElemLib* ElemLib::Create(
    const Lib* pAddrLib)   ///< [in] Pointer of parent AddrLib instance
{
    ElemLib* pElemLib = NULL;

    if (pAddrLib)
    {
        VOID* pObj = Object::ClientAlloc(sizeof(ElemLib), pAddrLib->GetClient());
        if (pObj)
        {
            pElemLib = new(pObj) ElemLib(const_cast<Lib* const>(pAddrLib));
        }
    }

    return pElemLib;
}

/**************************************************************************************************
*   ElemLib::Flt32sToInt32s
*
*   @brief
*       Convert a ADDR_FLT_32 value to Int32 value
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID ElemLib::Flt32sToInt32s(
    ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
    UINT_32         bits,       ///< [in] nubmer of bits in value
    NumberType      numberType, ///< [in] the type of number
    UINT_32*        pResult)    ///< [out] Int32 value
{
    UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
    UINT_32 uscale;
    UINT_32 sign;

    //convert each component to an INT_32
    switch ( numberType )
    {
        case ADDR_NO_NUMBER:    //fall through
        case ADDR_ZERO:         //fall through
        case ADDR_ONE:          //fall through
        case ADDR_EPSILON:      //fall through
            return;        // these are zero-bit components, so don't set result

        case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
            uscale = (1<<bits) - 1;
            if (bits == 32)               // special case unsigned 32-bit int
            {
                *pResult = value.i;
            }
            else
            {
                if ((value.i < 0) || (value.u > uscale))
                {
                    *pResult = uscale;
                }
                else
                {
                    *pResult = value.i;
                }
                return;
            }

        // The algorithm used in the DB and TX differs at one value for 24-bit unorms
        case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
            if ((bits==24) && (value.i == 0x33000000))
            {
                *pResult = 1;
                return;
            }              // Else treat like ADDR_UNORM_R6XX

        case ADDR_UNORM_R6XX:            // unsigned repeating fraction
            if (value.f <= 0)
            {
                *pResult = 0;            // first clamp to [0..1]
            }
            else
            {
                if (value.f >= 1)
                {
                     *pResult = (1<<bits) - 1;
                }
                else
                {
                    if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
                    {
                        *pResult = 0;                        // NaN, so force to 0
                    }

                    #if 0 // floating point version for documentation
                    else
                    {
                        FLOAT f = value.f * ((1<<bits) - 1);
                        *pResult = static_cast<INT_32>(f + (round/256.0f));
                    }
                    #endif
                    else
                    {
                        ADDR_FLT_32 scaled;
                        ADDR_FLT_32 shifted;
                        UINT_64 truncated, rounded;
                        UINT_32 altShift;
                        UINT_32 mask = (1 << bits) - 1;
                        UINT_32 half = 1 << (bits - 1);
                        UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
                        UINT_64 temp = mant24 - (mant24>>bits) -
                            static_cast<INT_32>((mant24 & mask) > half);
                        UINT_32 exp8 = value.i >> 23;
                        UINT_32 shift = 126 - exp8 + 24 - bits;
                        UINT_64 final;

                        if (shift >= 32) // This is zero, even with maximum dither add
                        {
                            final = 0;
                        }
                        else
                        {
                            final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
                        }
                        //ADDR_EXIT( *pResult == final,
                        //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
                        //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
                        if (final > mask)
                        {
                            final = mask;
                        }

                        scaled.f  = value.f * ((1<<bits) - 1);
                        shifted.f = (scaled.f * 256);
                        truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
                        altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
                        truncated = (altShift > 60) ? 0 : truncated >> altShift;
                        rounded   = static_cast<INT_32>((round + truncated) >> 8);
                        //if (rounded > ((1<<bits) - 1))
                        //    rounded = ((1<<bits) - 1);
                        *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
                    }
                }
            }

            return;

        case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
            *pResult = value.i;
            return;

        // @@ FIX ROUNDING in this code, fix the denorm case
        case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
            sign = (value.i >> 31) & 1;
            if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
            {
                if ((value.i&0x007FFFFF) != 0)             // then if NaN
                {
                    *pResult = 0;                       // return 0
                }
                else
                {
                    *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
                }
                return;
            }
            if (value.f <= 0)
            {
                *pResult = 0;
            }
            else
            {
                if (value.f>=1)
                {
                    *pResult = 0xF << (bits-4);
                }
                else
                {
                    if ((value.i>>23) > 112 )
                    {
                        // 24-bit float: normalized
                        // value.i += 1 << (22-bits+4);
                        // round the IEEE mantissa to mantissa size
                        // @@ NOTE: add code to support rounding
                        value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
                        *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
                    }
                    else
                    {
                        // 24-bit float: denormalized
                        value.f = value.f / (1<<28) / (1<<28);
                        value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
                        // value.i += 1 << (22-bits+4);
                        // round the IEEE mantissa to mantissa size
                        // @@ NOTE: add code to support rounding
                        *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
                    }
                }
            }

            return;

        default:                    // invalid number mode
            //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
            break;

    }
}

/**
****************************************************************************************************
*   ElemLib::Int32sToPixel
*
*   @brief
*       Pack 32-bit integer values into an uncompressed pixel,
*       in the proper order
*
*   @return
*       N/A
*
*   @note
*       This entry point packes four 32-bit integer values into
*       an uncompressed pixel. The pixel values are specifies in
*       standard order, e.g. depth/stencil. This routine asserts
*       if called on compressed pixel.
****************************************************************************************************
*/
VOID ElemLib::Int32sToPixel(
    UINT_32              numComps,      ///< [in] number of components
    UINT_32*             pComps,        ///< [in] compnents
    UINT_32*             pCompBits,     ///< [in] total bits in each component
    UINT_32*             pCompStart,    ///< [in] the first bit position of each component
    ComponentFlags       properties,    ///< [in] properties about byteAligned, exportNorm
    UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
    UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
{
    UINT_32 i;
    UINT_32 j;
    UINT_32 start;
    UINT_32 size;
    UINT_32 byte;
    UINT_32 value = 0;
    UINT_32 compMask;
    UINT_32 elemMask=0;
    UINT_32 elementXor = 0;  // address xor when reading bytes from elements


    // @@ NOTE: assert if called on a compressed format!

    if (properties.byteAligned)    // Components are all byte-sized
    {
        for (i = 0; i < numComps; i++)        // Then for each component
        {
            // Copy the bytes of the component into the element
            start = pCompStart[i] / 8;
            size  = pCompBits[i]  / 8;
            for (j = 0; j < size; j++)
            {
                pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
            }
        }
    }
    else                        // Element is 32-bits or less, components are bit fields
    {
        // First, extract each component in turn and combine it into a 32-bit value
        for (i = 0; i < numComps; i++)
        {
            compMask = (1 << pCompBits[i]) - 1;
            elemMask |= compMask << pCompStart[i];
            value |= (pComps[i] & compMask) << pCompStart[i];
        }

        // Mext, copy the masked value into the element
        size = (resultBits + 7) / 8;
        for (i = 0; i < size; i++)
        {
            byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
            pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
        }
    }
}

/**
****************************************************************************************************
*   Flt32ToDepthPixel
*
*   @brief
*       Convert a FLT_32 value to a depth/stencil pixel value
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID ElemLib::Flt32ToDepthPixel(
    AddrDepthFormat     format,     ///< [in] Depth format
    const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
    UINT_8*             pPixel      ///< [out] depth pixel value
    ) const
{
    UINT_32 i;
    UINT_32 values[2];
    ComponentFlags properties;  // byteAligned, exportNorm
    UINT_32 resultBits = 0;     // result bits: total bits per pixel after decompression

    PixelFormatInfo fmt;

    // get type for each component
    PixGetDepthCompInfo(format, &fmt);

    //initialize properties
    properties.byteAligned = TRUE;
    properties.exportNorm  = TRUE;
    properties.floatComp   = FALSE;

    //set properties and result bits
    for (i = 0; i < 2; i++)
    {
        if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
        {
            properties.byteAligned = FALSE;
        }

        if (resultBits < fmt.compStart[i] + fmt.compBit[i])
        {
            resultBits = fmt.compStart[i] + fmt.compBit[i];
        }

        // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
        if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
        {
            properties.exportNorm = FALSE;
        }

        // Mark if there are any floating point components
        if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
        {
            properties.floatComp = TRUE;
        }
    }

    // Convert the two input floats to integer values
    for (i = 0; i < 2; i++)
    {
        Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
    }

    // Then pack the two integer components, in the proper order
    Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );

}

/**
****************************************************************************************************
*   Flt32ToColorPixel
*
*   @brief
*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID ElemLib::Flt32ToColorPixel(
    AddrColorFormat     format,     ///< [in] Color format
    AddrSurfaceNumber   surfNum,    ///< [in] Surface number
    AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
    const ADDR_FLT_32   comps[4],   ///< [in] four components of color
    UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
    ) const
{
    PixelFormatInfo pixelInfo;

    UINT_32 i;
    UINT_32 values[4];
    ComponentFlags properties;    // byteAligned, exportNorm
    UINT_32 resultBits = 0;       // result bits: total bits per pixel after decompression

    memset(&pixelInfo, 0, sizeof(PixelFormatInfo));

    PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);

    //initialize properties
    properties.byteAligned = TRUE;
    properties.exportNorm  = TRUE;
    properties.floatComp   = FALSE;

    //set properties and result bits
    for (i = 0; i < 4; i++)
    {
        if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
        {
            properties.byteAligned = FALSE;
        }

        if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
        {
            resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
        }

        if (m_fp16ExportNorm)
        {
            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
            // or if it's not FP and <=16 bits
            if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
                && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
            {
                properties.exportNorm = FALSE;
            }
        }
        else
        {
            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
            if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
            {
                properties.exportNorm = FALSE;
            }
        }

        // Mark if there are any floating point components
        if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
             (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
        {
            properties.floatComp = TRUE;
        }
    }

    // Convert the four input floats to integer values
    for (i = 0; i < 4; i++)
    {
        Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
    }

    // Then pack the four integer components, in the proper order
    Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
                  properties, resultBits, pPixel);
}

/**
****************************************************************************************************
*   ElemLib::GetCompType
*
*   @brief
*       Fill per component info
*
*   @return
*       N/A
*
****************************************************************************************************
*/
VOID ElemLib::GetCompType(
    AddrColorFormat   format,     ///< [in] surface format
    AddrSurfaceNumber numType,  ///< [in] number type
    PixelFormatInfo*  pInfo)       ///< [in][out] per component info out
{
    BOOL_32 handled = FALSE;

    // Floating point formats override the number format
    switch (format)
    {
        case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
        case ADDR_COLOR_16_16_FLOAT:
        case ADDR_COLOR_16_16_16_16_FLOAT:
        case ADDR_COLOR_32_FLOAT:
        case ADDR_COLOR_32_32_FLOAT:
        case ADDR_COLOR_32_32_32_32_FLOAT:
        case ADDR_COLOR_10_11_11_FLOAT:
        case ADDR_COLOR_11_11_10_FLOAT:
            numType = ADDR_NUMBER_FLOAT;
            break;
            // Special handling for the depth formats
        case ADDR_COLOR_8_24:                // fall through for these 2 similar format
        case ADDR_COLOR_24_8:
            for (UINT_32 c = 0; c < 4; c++)
            {
                if (pInfo->compBit[c] == 8)
                {
                    pInfo->numType[c] = ADDR_UINT_BITS;
                }
                else if (pInfo->compBit[c]  == 24)
                {
                    pInfo->numType[c] = ADDR_UNORM_R6XX;
                }
                else
                {
                    pInfo->numType[c] = ADDR_NO_NUMBER;
                }
            }
            handled = TRUE;
            break;
        case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
        case ADDR_COLOR_24_8_FLOAT:
        case ADDR_COLOR_X24_8_32_FLOAT:
            for (UINT_32 c = 0; c < 4; c++)
            {
                if (pInfo->compBit[c] == 8)
                {
                    pInfo->numType[c] = ADDR_UINT_BITS;
                }
                else if (pInfo->compBit[c] == 24)
                {
                    pInfo->numType[c] = ADDR_U4FLOATC;
                }
                else if (pInfo->compBit[c] == 32)
                {
                    pInfo->numType[c] = ADDR_S8FLOAT32;
                }
                else
                {
                    pInfo->numType[c] = ADDR_NO_NUMBER;
                }
            }
            handled = TRUE;
            break;
        default:
            break;
    }

    if (!handled)
    {
        for (UINT_32 c = 0; c < 4; c++)
        {
            // Assign a number type for each component
            AddrSurfaceNumber cnum;

            // First handle default component values
            if (pInfo->compBit[c] == 0)
            {
                if (c < 3)
                {
                    pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
                }
                else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
                {
                    pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
                }
                else
                {
                    pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
                }
                continue;
            }
            // Now handle small components
            else if (pInfo->compBit[c] == 1)
            {
                if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
                {
                    cnum = ADDR_NUMBER_UINT;
                }
                else
                {
                    cnum = ADDR_NUMBER_UNORM;
                }
            }
            else
            {
                cnum = numType;
            }

            // If no default, set the number type fom num, compbits, and architecture
            switch (cnum)
            {
                case ADDR_NUMBER_SRGB:
                    pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
                    break;
                case ADDR_NUMBER_UNORM:
                    pInfo->numType[c] = ADDR_UNORM_R6XX;
                    break;
                case ADDR_NUMBER_SNORM:
                    pInfo->numType[c] = ADDR_SNORM_R6XX;
                    break;
                case ADDR_NUMBER_USCALED:
                    pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
                    break;
                case ADDR_NUMBER_SSCALED:
                    pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
                    break;
                case ADDR_NUMBER_FLOAT:
                    if (pInfo->compBit[c] == 32)
                    {
                        pInfo->numType[c] = ADDR_S8FLOAT32;
                    }
                    else if (pInfo->compBit[c] == 16)
                    {
                        pInfo->numType[c] = ADDR_S5FLOAT;
                    }
                    else if (pInfo->compBit[c] >= 10)
                    {
                        pInfo->numType[c] = ADDR_U5FLOAT;
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                    break;
                case ADDR_NUMBER_SINT:
                    pInfo->numType[c] = ADDR_SINT_BITS;
                    break;
                case ADDR_NUMBER_UINT:
                    pInfo->numType[c] = ADDR_UINT_BITS;
                    break;

                default:
                    ADDR_ASSERT(!"Invalid number type");
                    pInfo->numType[c] = ADDR_NO_NUMBER;
                    break;
             }
        }
    }
}

/**
****************************************************************************************************
*   ElemLib::GetCompSwap
*
*   @brief
*       Get components swapped for color surface
*
*   @return
*       N/A
*
****************************************************************************************************
*/
VOID ElemLib::GetCompSwap(
    AddrSurfaceSwap  swap,   ///< [in] swap mode
    PixelFormatInfo* pInfo)  ///< [in,out] output per component info
{
    switch (pInfo->comps)
    {
        case 4:
            switch (swap)
            {
                case ADDR_SWAP_ALT:
                    SwapComps( 0, 2, pInfo );
                    break;    // BGRA
                case ADDR_SWAP_STD_REV:
                    SwapComps( 0, 3, pInfo );
                    SwapComps( 1, 2, pInfo );
                    break;    // ABGR
                case ADDR_SWAP_ALT_REV:
                    SwapComps( 0, 3, pInfo );
                    SwapComps( 0, 2, pInfo );
                    SwapComps( 0, 1, pInfo );
                    break;    // ARGB
                default:
                    break;
            }
            break;
        case 3:
            switch (swap)
            {
                case ADDR_SWAP_ALT_REV:
                    SwapComps( 0, 3, pInfo );
                    SwapComps( 0, 2, pInfo );
                    break;    // AGR
                case ADDR_SWAP_STD_REV:
                    SwapComps( 0, 2, pInfo );
                    break;    // BGR
                case ADDR_SWAP_ALT:
                    SwapComps( 2, 3, pInfo );
                    break;    // RGA
                default:
                    break;    // RGB
            }
            break;
        case 2:
            switch (swap)
            {
                case ADDR_SWAP_ALT_REV:
                    SwapComps( 0, 1, pInfo );
                    SwapComps( 1, 3, pInfo );
                    break;    // AR
                case ADDR_SWAP_STD_REV:
                    SwapComps( 0, 1, pInfo );
                    break;    // GR
                case ADDR_SWAP_ALT:
                    SwapComps( 1, 3, pInfo );
                    break;    // RA
                default:
                    break;    // RG
            }
            break;
        case 1:
            switch (swap)
            {
                case ADDR_SWAP_ALT_REV:
                    SwapComps( 0, 3, pInfo );
                    break;    // A
                case ADDR_SWAP_STD_REV:
                    SwapComps( 0, 2, pInfo );
                    break;    // B
                case ADDR_SWAP_ALT:
                    SwapComps( 0, 1, pInfo );
                    break;    // G
                default:
                    break;    // R
            }
            break;
    }
}

/**
****************************************************************************************************
*   ElemLib::GetCompSwap
*
*   @brief
*       Get components swapped for color surface
*
*   @return
*       N/A
*
****************************************************************************************************
*/
VOID ElemLib::SwapComps(
    UINT_32          c0,     ///< [in] component index 0
    UINT_32          c1,     ///< [in] component index 1
    PixelFormatInfo* pInfo)  ///< [in,out] output per component info
{
    UINT_32 start;
    UINT_32 bits;

    start = pInfo->compStart[c0];
    pInfo->compStart[c0] = pInfo->compStart[c1];
    pInfo->compStart[c1] = start;

    bits  = pInfo->compBit[c0];
    pInfo->compBit[c0] = pInfo->compBit[c1];
    pInfo->compBit[c1] = bits;
}

/**
****************************************************************************************************
*   ElemLib::PixGetColorCompInfo
*
*   @brief
*       Get per component info for color surface
*
*   @return
*       N/A
*
****************************************************************************************************
*/
VOID ElemLib::PixGetColorCompInfo(
    AddrColorFormat   format, ///< [in] surface format, read from register
    AddrSurfaceNumber number, ///< [in] pixel number type
    AddrSurfaceSwap   swap,   ///< [in] component swap mode
    PixelFormatInfo*  pInfo   ///< [out] output per component info
    ) const
{
    // 1. Get componet bits
    switch (format)
    {
        case ADDR_COLOR_8:
            GetCompBits(8, 0, 0, 0, pInfo);
            break;
        case ADDR_COLOR_1_5_5_5:
            GetCompBits(5, 5, 5, 1, pInfo);
            break;
        case ADDR_COLOR_5_6_5:
            GetCompBits(8, 6, 5, 0, pInfo);
            break;
        case ADDR_COLOR_6_5_5:
            GetCompBits(5, 5, 6, 0, pInfo);
            break;
        case ADDR_COLOR_8_8:
            GetCompBits(8, 8, 0, 0, pInfo);
            break;
        case ADDR_COLOR_4_4_4_4:
            GetCompBits(4, 4, 4, 4, pInfo);
            break;
        case ADDR_COLOR_16:
            GetCompBits(16, 0, 0, 0, pInfo);
            break;
        case ADDR_COLOR_8_8_8_8:
            GetCompBits(8, 8, 8, 8, pInfo);
            break;
        case ADDR_COLOR_2_10_10_10:
            GetCompBits(10, 10, 10, 2, pInfo);
            break;
        case ADDR_COLOR_10_11_11:
            GetCompBits(11, 11, 10, 0, pInfo);
            break;
        case ADDR_COLOR_11_11_10:
            GetCompBits(10, 11, 11, 0, pInfo);
            break;
        case ADDR_COLOR_16_16:
            GetCompBits(16, 16, 0, 0, pInfo);
            break;
        case ADDR_COLOR_16_16_16_16:
            GetCompBits(16, 16, 16, 16, pInfo);
            break;
        case ADDR_COLOR_16_FLOAT:
            GetCompBits(16, 0, 0, 0, pInfo);
            break;
        case ADDR_COLOR_16_16_FLOAT:
            GetCompBits(16, 16, 0, 0, pInfo);
            break;
        case ADDR_COLOR_32_FLOAT:
            GetCompBits(32, 0, 0, 0, pInfo);
            break;
        case ADDR_COLOR_32_32_FLOAT:
            GetCompBits(32, 32, 0, 0, pInfo);
            break;
        case ADDR_COLOR_16_16_16_16_FLOAT:
            GetCompBits(16, 16, 16, 16, pInfo);
            break;
        case ADDR_COLOR_32_32_32_32_FLOAT:
            GetCompBits(32, 32, 32, 32, pInfo);
            break;

        case ADDR_COLOR_32:
            GetCompBits(32, 0, 0, 0, pInfo);
            break;
        case ADDR_COLOR_32_32:
            GetCompBits(32, 32, 0, 0, pInfo);
            break;
        case ADDR_COLOR_32_32_32_32:
            GetCompBits(32, 32, 32, 32, pInfo);
            break;
        case ADDR_COLOR_10_10_10_2:
            GetCompBits(2, 10, 10, 10, pInfo);
            break;
        case ADDR_COLOR_10_11_11_FLOAT:
            GetCompBits(11, 11, 10, 0, pInfo);
            break;
        case ADDR_COLOR_11_11_10_FLOAT:
            GetCompBits(10, 11, 11, 0, pInfo);
            break;
        case ADDR_COLOR_5_5_5_1:
            GetCompBits(1, 5, 5, 5, pInfo);
            break;
        case ADDR_COLOR_3_3_2:
            GetCompBits(2, 3, 3, 0, pInfo);
            break;
        case ADDR_COLOR_4_4:
            GetCompBits(4, 4, 0, 0, pInfo);
            break;
        case ADDR_COLOR_8_24:
        case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
            GetCompBits(24, 8, 0, 0, pInfo);
            break;
        case ADDR_COLOR_24_8:
        case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
            GetCompBits(8, 24, 0, 0, pInfo);
            break;
        case ADDR_COLOR_X24_8_32_FLOAT:
            GetCompBits(32, 8, 0, 0, pInfo);
            break;

        case ADDR_COLOR_INVALID:
            GetCompBits(0, 0, 0, 0, pInfo);
            break;
        default:
            ADDR_ASSERT(0);
            GetCompBits(0, 0, 0, 0, pInfo);
            break;
    }

    // 2. Get component number type

    GetCompType(format, number, pInfo);

    // 3. Swap components if needed

    GetCompSwap(swap, pInfo);
}

/**
****************************************************************************************************
*   ElemLib::PixGetDepthCompInfo
*
*   @brief
*       Get per component info for depth surface
*
*   @return
*       N/A
*
****************************************************************************************************
*/
VOID ElemLib::PixGetDepthCompInfo(
    AddrDepthFormat  format,     ///< [in] surface format, read from register
    PixelFormatInfo* pInfo       ///< [out] output per component bits and type
    ) const
{
    if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
    {
        if (format == ADDR_DEPTH_8_24_FLOAT)
        {
            format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
        }

        if (format == ADDR_DEPTH_X8_24_FLOAT)
        {
            format = ADDR_DEPTH_32_FLOAT;
        }
    }

    switch (format)
    {
        case ADDR_DEPTH_16:
            GetCompBits(16, 0, 0, 0, pInfo);
            break;
        case ADDR_DEPTH_8_24:
        case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
            GetCompBits(24, 8, 0, 0, pInfo);
            break;
        case ADDR_DEPTH_X8_24:
        case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
            GetCompBits(24, 0, 0, 0, pInfo);
            break;
        case ADDR_DEPTH_32_FLOAT:
            GetCompBits(32, 0, 0, 0, pInfo);
            break;
        case ADDR_DEPTH_X24_8_32_FLOAT:
            GetCompBits(32, 8, 0, 0, pInfo);
            break;
        case ADDR_DEPTH_INVALID:
            GetCompBits(0, 0, 0, 0, pInfo);
            break;
        default:
            ADDR_ASSERT(0);
            GetCompBits(0, 0, 0, 0, pInfo);
            break;
    }

    switch (format)
    {
        case ADDR_DEPTH_16:
            pInfo->numType [0] = ADDR_UNORM_R6XX;
            pInfo->numType [1] = ADDR_ZERO;
            break;
        case ADDR_DEPTH_8_24:
            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
            pInfo->numType [1] = ADDR_UINT_BITS;
            break;
        case ADDR_DEPTH_8_24_FLOAT:
            pInfo->numType [0] = ADDR_U4FLOATC;
            pInfo->numType [1] = ADDR_UINT_BITS;
            break;
        case ADDR_DEPTH_X8_24:
            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
            pInfo->numType [1] = ADDR_ZERO;
            break;
        case ADDR_DEPTH_X8_24_FLOAT:
            pInfo->numType [0] = ADDR_U4FLOATC;
            pInfo->numType [1] = ADDR_ZERO;
            break;
        case ADDR_DEPTH_32_FLOAT:
            pInfo->numType [0] = ADDR_S8FLOAT32;
            pInfo->numType [1] = ADDR_ZERO;
            break;
        case ADDR_DEPTH_X24_8_32_FLOAT:
            pInfo->numType [0] = ADDR_S8FLOAT32;
            pInfo->numType [1] = ADDR_UINT_BITS;
            break;
        default:
            pInfo->numType [0] = ADDR_NO_NUMBER;
            pInfo->numType [1] = ADDR_NO_NUMBER;
            break;
    }

    pInfo->numType [2] = ADDR_NO_NUMBER;
    pInfo->numType [3] = ADDR_NO_NUMBER;
}

/**
****************************************************************************************************
*   ElemLib::PixGetExportNorm
*
*   @brief
*       Check if fp16 export norm can be enabled.
*
*   @return
*       TRUE if this can be enabled.
*
****************************************************************************************************
*/
BOOL_32 ElemLib::PixGetExportNorm(
    AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
    AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
    AddrSurfaceSwap     swap            ///< [in] components swap type
    ) const
{
    BOOL_32 enabled = TRUE;

    PixelFormatInfo formatInfo;

    PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);

    for (UINT_32 c = 0; c < 4; c++)
    {
        if (m_fp16ExportNorm)
        {
            if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
                (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
                (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
                (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
                (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
                (formatInfo.numType[c] != ADDR_U3FLOATM))
            {
                enabled = FALSE;
                break;
            }
        }
        else
        {
            if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
            {
                enabled = FALSE;
                break;
            }
        }
    }

    return enabled;
}

/**
****************************************************************************************************
*   ElemLib::AdjustSurfaceInfo
*
*   @brief
*       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID ElemLib::AdjustSurfaceInfo(
    ElemMode        elemMode,       ///< [in] element mode
    UINT_32         expandX,        ///< [in] decompression expansion factor in X
    UINT_32         expandY,        ///< [in] decompression expansion factor in Y
    UINT_32*        pBpp,           ///< [in,out] bpp
    UINT_32*        pBasePitch,     ///< [in,out] base pitch
    UINT_32*        pWidth,         ///< [in,out] width
    UINT_32*        pHeight)        ///< [in,out] height
{
    UINT_32 packedBits;
    UINT_32 basePitch;
    UINT_32 width;
    UINT_32 height;
    UINT_32 bpp;
    BOOL_32 bBCnFormat = FALSE;

    ADDR_ASSERT(pBpp != NULL);
    ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);

    if (pBpp)
    {
        bpp = *pBpp;

        switch (elemMode)
        {
            case ADDR_EXPANDED:
                packedBits = bpp / expandX / expandY;
                break;
            case ADDR_PACKED_STD: // Different bit order
            case ADDR_PACKED_REV:
                packedBits = bpp * expandX * expandY;
                break;
            case ADDR_PACKED_GBGR:
            case ADDR_PACKED_BGRG:
                packedBits = bpp; // 32-bit packed ==> 2 32-bit result
                break;
            case ADDR_PACKED_BC1: // Fall through
            case ADDR_PACKED_BC4:
                packedBits = 64;
                bBCnFormat = TRUE;
                break;
            case ADDR_PACKED_BC2: // Fall through
            case ADDR_PACKED_BC3: // Fall through
            case ADDR_PACKED_BC5: // Fall through
                bBCnFormat = TRUE;
                // fall through
            case ADDR_PACKED_ASTC:
            case ADDR_PACKED_ETC2_128BPP:
                packedBits = 128;
                break;
            case ADDR_PACKED_ETC2_64BPP:
                packedBits = 64;
                break;
            case ADDR_ROUND_BY_HALF:  // Fall through
            case ADDR_ROUND_TRUNCATE: // Fall through
            case ADDR_ROUND_DITHER:   // Fall through
            case ADDR_UNCOMPRESSED:
                packedBits = bpp;
                break;
            default:
                packedBits = bpp;
                ADDR_ASSERT_ALWAYS();
                break;
        }

        *pBpp = packedBits;
    }

    if (pWidth && pHeight && pBasePitch)
    {
        basePitch = *pBasePitch;
        width     = *pWidth;
        height    = *pHeight;

        if ((expandX > 1) || (expandY > 1))
        {
            if (elemMode == ADDR_EXPANDED)
            {
                basePitch *= expandX;
                width     *= expandX;
                height    *= expandY;
            }
            else
            {
                // Evergreen family workaround
                if (bBCnFormat && (m_pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_R8XX))
                {
                    // For BCn we now pad it to POW2 at the beginning so it is safe to
                    // divide by 4 directly
                    basePitch = basePitch / expandX;
                    width     = width  / expandX;
                    height    = height / expandY;
#if DEBUG
                    width     = (width == 0) ? 1 : width;
                    height    = (height == 0) ? 1 : height;

                    if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
                        (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
                    {
                        // if this assertion is hit we may have issues if app samples
                        // rightmost/bottommost pixels
                        ADDR_ASSERT_ALWAYS();
                    }
#endif
                }
                else // Not BCn format we still keep old way (FMT_1? No real test yet)
                {
                    basePitch = (basePitch + expandX - 1) / expandX;
                    width     = (width + expandX - 1) / expandX;
                    height    = (height + expandY - 1) / expandY;
                }
            }

            *pBasePitch = basePitch; // 0 is legal value for base pitch.
            *pWidth     = (width == 0) ? 1 : width;
            *pHeight    = (height == 0) ? 1 : height;
        } //if (pWidth && pHeight && pBasePitch)
    }
}

/**
****************************************************************************************************
*   ElemLib::RestoreSurfaceInfo
*
*   @brief
*       Reverse operation of AdjustSurfaceInfo
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID ElemLib::RestoreSurfaceInfo(
    ElemMode        elemMode,       ///< [in] element mode
    UINT_32         expandX,        ///< [in] decompression expansion factor in X
    UINT_32         expandY,        ///< [out] decompression expansion factor in Y
    UINT_32*        pBpp,           ///< [in,out] bpp
    UINT_32*        pWidth,         ///< [in,out] width
    UINT_32*        pHeight)        ///< [in,out] height
{
    UINT_32 originalBits;
    UINT_32 width;
    UINT_32 height;
    UINT_32 bpp;

    BOOL_32 bBCnFormat = FALSE;

    ADDR_ASSERT(pBpp != NULL);
    ADDR_ASSERT(pWidth != NULL && pHeight != NULL);

    if (pBpp)
    {
        bpp = *pBpp;

        switch (elemMode)
        {
        case ADDR_EXPANDED:
            originalBits = bpp * expandX * expandY;
            break;
        case ADDR_PACKED_STD: // Different bit order
        case ADDR_PACKED_REV:
            originalBits = bpp / expandX / expandY;
            break;
        case ADDR_PACKED_GBGR:
        case ADDR_PACKED_BGRG:
            originalBits = bpp; // 32-bit packed ==> 2 32-bit result
            break;
        case ADDR_PACKED_BC1: // Fall through
        case ADDR_PACKED_BC4:
            originalBits = 64;
            bBCnFormat = TRUE;
            break;
        case ADDR_PACKED_BC2: // Fall through
        case ADDR_PACKED_BC3: // Fall through
        case ADDR_PACKED_BC5:
            bBCnFormat = TRUE;
            // fall through
        case ADDR_PACKED_ASTC:
        case ADDR_PACKED_ETC2_128BPP:
            originalBits = 128;
            break;
        case ADDR_PACKED_ETC2_64BPP:
            originalBits = 64;
            break;
        case ADDR_ROUND_BY_HALF:  // Fall through
        case ADDR_ROUND_TRUNCATE: // Fall through
        case ADDR_ROUND_DITHER:   // Fall through
        case ADDR_UNCOMPRESSED:
            originalBits = bpp;
            break;
        default:
            originalBits = bpp;
            ADDR_ASSERT_ALWAYS();
            break;
        }

        *pBpp = originalBits;
    }

    if (pWidth && pHeight)
    {
        width    = *pWidth;
        height   = *pHeight;

        if ((expandX > 1) || (expandY > 1))
        {
            if (elemMode == ADDR_EXPANDED)
            {
                width /= expandX;
                height /= expandY;
            }
            else
            {
                width *= expandX;
                height *= expandY;
            }
        }

        *pWidth  = (width == 0) ? 1 : width;
        *pHeight = (height == 0) ? 1 : height;
    }
}

/**
****************************************************************************************************
*   ElemLib::GetBitsPerPixel
*
*   @brief
*       Compute the total bits per element according to a format
*       code. For compressed formats, this is not the same as
*       the number of bits per decompressed element.
*
*   @return
*       Bits per pixel
****************************************************************************************************
*/
UINT_32 ElemLib::GetBitsPerPixel(
    AddrFormat          format,         ///< [in] surface format code
    ElemMode*           pElemMode,      ///< [out] element mode
    UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
    UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
    UINT_32*            pUnusedBits)    ///< [out] bits unused
{
    UINT_32 bpp;
    UINT_32 expandX = 1;
    UINT_32 expandY = 1;
    UINT_32 bitUnused = 0;
    ElemMode elemMode = ADDR_UNCOMPRESSED; // default value

    switch (format)
    {
        case ADDR_FMT_8:
            bpp = 8;
            break;
        case ADDR_FMT_1_5_5_5:
        case ADDR_FMT_5_6_5:
        case ADDR_FMT_6_5_5:
        case ADDR_FMT_8_8:
        case ADDR_FMT_4_4_4_4:
        case ADDR_FMT_16:
            bpp = 16;
            break;
        case ADDR_FMT_GB_GR:
            elemMode = ADDR_PACKED_GBGR;
            bpp      = m_configFlags.use32bppFor422Fmt ? 32 : 16;
            expandX  = m_configFlags.use32bppFor422Fmt ? 2 : 1;
            break;
        case ADDR_FMT_BG_RG:
            elemMode = ADDR_PACKED_BGRG;
            bpp      = m_configFlags.use32bppFor422Fmt ? 32 : 16;
            expandX  = m_configFlags.use32bppFor422Fmt ? 2 : 1;
            break;
        case ADDR_FMT_8_8_8_8:
        case ADDR_FMT_2_10_10_10:
        case ADDR_FMT_10_11_11:
        case ADDR_FMT_11_11_10:
        case ADDR_FMT_16_16:
        case ADDR_FMT_32:
        case ADDR_FMT_24_8:
            bpp = 32;
            break;
        case ADDR_FMT_BG_RG_16_16_16_16:
            elemMode = ADDR_PACKED_BGRG;
            bpp = 32;
            break;
        case ADDR_FMT_16_16_16_16:
        case ADDR_FMT_32_32:
        case ADDR_FMT_CTX1:
            bpp = 64;
            break;
        case ADDR_FMT_32_32_32_32:
            bpp = 128;
            break;
        case ADDR_FMT_INVALID:
            bpp = 0;
            break;
        case ADDR_FMT_1_REVERSED:
            elemMode = ADDR_PACKED_REV;
            expandX = 8;
            bpp = 1;
            break;
        case ADDR_FMT_1:
            elemMode = ADDR_PACKED_STD;
            expandX = 8;
            bpp = 1;
            break;
        case ADDR_FMT_4_4:
        case ADDR_FMT_3_3_2:
            bpp = 8;
            break;
        case ADDR_FMT_5_5_5_1:
            bpp = 16;
            break;
        case ADDR_FMT_32_AS_8:
        case ADDR_FMT_32_AS_8_8:
        case ADDR_FMT_8_24:
        case ADDR_FMT_10_10_10_2:
        case ADDR_FMT_5_9_9_9_SHAREDEXP:
            bpp = 32;
            break;
        case ADDR_FMT_X24_8_32_FLOAT:
            bpp = 64;
            bitUnused = 24;
            break;
        case ADDR_FMT_8_8_8:
            elemMode = ADDR_EXPANDED;
            bpp = 24;//@@ 8;      // read 3 elements per pixel
            expandX = 3;
            break;
        case ADDR_FMT_16_16_16:
            elemMode = ADDR_EXPANDED;
            bpp = 48;//@@ 16;      // read 3 elements per pixel
            expandX = 3;
            break;
        case ADDR_FMT_32_32_32:
            elemMode = ADDR_EXPANDED;
            expandX = 3;
            bpp = 96;//@@ 32;      // read 3 elements per pixel
            break;
        case ADDR_FMT_BC1:
            elemMode = ADDR_PACKED_BC1;
            expandX = 4;
            expandY = 4;
            bpp = 64;
            break;
        case ADDR_FMT_BC4:
            elemMode = ADDR_PACKED_BC4;
            expandX = 4;
            expandY = 4;
            bpp = 64;
            break;
        case ADDR_FMT_BC2:
            elemMode = ADDR_PACKED_BC2;
            expandX = 4;
            expandY = 4;
            bpp = 128;
            break;
        case ADDR_FMT_BC3:
            elemMode = ADDR_PACKED_BC3;
            expandX = 4;
            expandY = 4;
            bpp = 128;
            break;
        case ADDR_FMT_BC5:
        case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
        case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
            elemMode = ADDR_PACKED_BC5;
            expandX = 4;
            expandY = 4;
            bpp = 128;
            break;

        case ADDR_FMT_ETC2_64BPP:
            elemMode = ADDR_PACKED_ETC2_64BPP;
            expandX  = 4;
            expandY  = 4;
            bpp      = 64;
            break;

        case ADDR_FMT_ETC2_128BPP:
            elemMode = ADDR_PACKED_ETC2_128BPP;
            expandX  = 4;
            expandY  = 4;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_4x4:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 4;
            expandY  = 4;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_5x4:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 5;
            expandY  = 4;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_5x5:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 5;
            expandY  = 5;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_6x5:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 6;
            expandY  = 5;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_6x6:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 6;
            expandY  = 6;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_8x5:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 8;
            expandY  = 5;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_8x6:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 8;
            expandY  = 6;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_8x8:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 8;
            expandY  = 8;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_10x5:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 10;
            expandY  = 5;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_10x6:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 10;
            expandY  = 6;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_10x8:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 10;
            expandY  = 8;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_10x10:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 10;
            expandY  = 10;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_12x10:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 12;
            expandY  = 10;
            bpp      = 128;
            break;

        case ADDR_FMT_ASTC_12x12:
            elemMode = ADDR_PACKED_ASTC;
            expandX  = 12;
            expandY  = 12;
            bpp      = 128;
            break;

        default:
            bpp = 0;
            ADDR_ASSERT_ALWAYS();
            break;
            // @@ or should this be an error?
    }

    SafeAssign(pExpandX, expandX);
    SafeAssign(pExpandY, expandY);
    SafeAssign(pUnusedBits, bitUnused);
    SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);

    return bpp;
}

/**
****************************************************************************************************
*   ElemLib::GetCompBits
*
*   @brief
*       Set each component's bit size and bit start. And set element mode and number type
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID ElemLib::GetCompBits(
    UINT_32          c0,        ///< [in] bits of component 0
    UINT_32          c1,        ///< [in] bits of component 1
    UINT_32          c2,        ///< [in] bits of component 2
    UINT_32          c3,        ///< [in] bits of component 3
    PixelFormatInfo* pInfo,     ///< [out] per component info out
    ElemMode         elemMode)  ///< [in] element mode
{
    pInfo->comps = 0;

    pInfo->compBit[0] = c0;
    pInfo->compBit[1] = c1;
    pInfo->compBit[2] = c2;
    pInfo->compBit[3] = c3;

    pInfo->compStart[0] = 0;
    pInfo->compStart[1] = c0;
    pInfo->compStart[2] = c0+c1;
    pInfo->compStart[3] = c0+c1+c2;

    pInfo->elemMode = elemMode;
    // still needed since component swap may depend on number of components
    for (INT i=0; i<4; i++)
    {
        if (pInfo->compBit[i] == 0)
        {
            pInfo->compStart[i]  = 0;       // all null components start at bit 0
            pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
        }
        else
        {
            pInfo->comps++;
        }
    }
}

/**
****************************************************************************************************
*   ElemLib::GetCompBits
*
*   @brief
*       Set the clear color (or clear depth/stencil) for a surface
*
*   @note
*       If clearColor is zero, a default clear value is used in place of comps[4].
*       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID ElemLib::SetClearComps(
    ADDR_FLT_32 comps[4],   ///< [in,out] components
    BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
    BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
{
    INT_32 i;

    // Use default clearvalues if clearColor is disabled
    if (clearColor == FALSE)
    {
        for (i=0; i<3; i++)
        {
            comps[i].f = 0.0;
        }
        comps[3].f = 1.0;
    }

    // Otherwise use the (modified) clear value
    else
    {
        for (i=0; i<4; i++)
        {   // If full precision, use clear value unchanged
            if (float32)
            {
                // Do nothing
                //comps[i] = comps[i];
            }
            // Else if it is a NaN, use the standard NaN value
            else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
            {
                comps[i].u = 0xFFC00000;
            }
            // Else reduce the mantissa precision
            else
            {
                comps[i].u = comps[i].u & 0xFFFFF000;
            }
        }
    }
}

/**
****************************************************************************************************
*   ElemLib::IsBlockCompressed
*
*   @brief
*       TRUE if this is block compressed format
*
*   @note
*
*   @return
*       BOOL_32
****************************************************************************************************
*/
BOOL_32 ElemLib::IsBlockCompressed(
    AddrFormat format)  ///< [in] Format
{
    return (((format >= ADDR_FMT_BC1) && (format <= ADDR_FMT_BC7)) ||
            ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP)));
}


/**
****************************************************************************************************
*   ElemLib::IsCompressed
*
*   @brief
*       TRUE if this is block compressed format or 1 bit format
*
*   @note
*
*   @return
*       BOOL_32
****************************************************************************************************
*/
BOOL_32 ElemLib::IsCompressed(
    AddrFormat format)  ///< [in] Format
{
    return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
}

/**
****************************************************************************************************
*   ElemLib::IsExpand3x
*
*   @brief
*       TRUE if this is 3x expand format
*
*   @note
*
*   @return
*       BOOL_32
****************************************************************************************************
*/
BOOL_32 ElemLib::IsExpand3x(
    AddrFormat format)  ///< [in] Format
{
    BOOL_32 is3x = FALSE;

    switch (format)
    {
        case ADDR_FMT_8_8_8:
        case ADDR_FMT_16_16_16:
        case ADDR_FMT_32_32_32:
            is3x = TRUE;
            break;
        default:
            break;
    }

    return is3x;
}

/**
****************************************************************************************************
*   ElemLib::IsMacroPixelPacked
*
*   @brief
*       TRUE if this is a macro-pixel-packed format.
*
*   @note
*
*   @return
*       BOOL_32
****************************************************************************************************
*/
BOOL_32 ElemLib::IsMacroPixelPacked(
    AddrFormat format)  ///< [in] Format
{
    BOOL_32 isMacroPixelPacked = FALSE;

    switch (format)
    {
        case ADDR_FMT_BG_RG:
        case ADDR_FMT_GB_GR:
        case ADDR_FMT_BG_RG_16_16_16_16:
            isMacroPixelPacked = TRUE;
            break;
        default:
            break;
    }

    return isMacroPixelPacked;
}

}
} //namespace rocr


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
****************************************************************************************************
* @file  addrelemlib.h
* @brief Contains the class for element/pixel related functions.
****************************************************************************************************
*/

#ifndef __ELEM_LIB_H__
#define __ELEM_LIB_H__

#include "addrinterface.h"
#include "addrobject.h"
#include "addrcommon.h"

namespace rocr {
namespace Addr
{

class Lib;

// The masks for property bits within the Properties INT_32
union ComponentFlags
{
    struct
    {
        UINT_32 byteAligned    : 1;    ///< all components are byte aligned
        UINT_32 exportNorm     : 1;    ///< components support R6xx NORM compression
        UINT_32 floatComp      : 1;    ///< there is at least one floating point component
    };

    UINT_32 value;
};

// Copy from legacy lib's NumberType
enum NumberType
{
    // The following number types have the range [-1..1]
    ADDR_NO_NUMBER,         // This component doesn't exist and has no default value
    ADDR_EPSILON,           // Force component value to integer 0x00000001
    ADDR_ZERO,              // Force component value to integer 0x00000000
    ADDR_ONE,               // Force component value to floating point 1.0
    // Above values don't have any bits per component (keep ADDR_ONE the last of these)

    ADDR_UNORM,             // Unsigned normalized (repeating fraction) full precision
    ADDR_SNORM,             // Signed normalized (repeating fraction) full precision
    ADDR_GAMMA,             // Gamma-corrected, full precision

    ADDR_UNORM_R5XXRB,      // Unsigned normalized (repeating fraction) for r5xx RB
    ADDR_SNORM_R5XXRB,      // Signed normalized (repeating fraction) for r5xx RB
    ADDR_GAMMA_R5XXRB,      // Gamma-corrected for r5xx RB (note: unnormalized value)
    ADDR_UNORM_R5XXBC,      // Unsigned normalized (repeating fraction) for r5xx BC
    ADDR_SNORM_R5XXBC,      // Signed normalized (repeating fraction) for r5xx BC
    ADDR_GAMMA_R5XXBC,      // Gamma-corrected for r5xx BC (note: unnormalized value)

    ADDR_UNORM_R6XX,        // Unsigned normalized (repeating fraction) for R6xx
    ADDR_UNORM_R6XXDB,      // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX
    ADDR_SNORM_R6XX,        // Signed normalized (repeating fraction) for R6xx
    ADDR_GAMMA8_R6XX,       // Gamma-corrected for r6xx
    ADDR_GAMMA8_R7XX_TP,    // Gamma-corrected for r7xx TP 12bit unorm 8.4.

    ADDR_U4FLOATC,          // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1]
    ADDR_GAMMA_4SEG,        // Gamma-corrected, four segment approximation
    ADDR_U0FIXED,           // Unsigned 0.N-bit fixed point

    // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine)
    ADDR_USCALED,           // Unsigned integer converted to/from floating point
    ADDR_SSCALED,           // Signed integer converted to/from floating point
    ADDR_USCALED_R5XXRB,    // Unsigned integer to/from floating point for r5xx RB
    ADDR_SSCALED_R5XXRB,    // Signed integer to/from floating point for r5xx RB
    ADDR_UINT_BITS,         // Keep in unsigned integer form, clamped to specified range
    ADDR_SINT_BITS,         // Keep in signed integer form, clamped to specified range
    ADDR_UINTBITS,          // @@ remove Keep in unsigned integer form, use modulus to reduce bits
    ADDR_SINTBITS,          // @@ remove Keep in signed integer form, use modulus to reduce bits

    // The following number types and ADDR_U4FLOATC have exponents
    // (LEAVE ADDR_S8FLOAT first or fix Finish routine)
    ADDR_S8FLOAT,           // Signed floating point with 8-bit exponent, bias=127
    ADDR_S8FLOAT32,         // 32-bit IEEE float, passes through NaN values
    ADDR_S5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
    ADDR_S5FLOATM,          // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf
    ADDR_U5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
    ADDR_U3FLOATM,          // Unsigned floating point with 3-bit exponent, bias=3

    ADDR_S5FIXED,           // Signed 5.N-bit fixed point, with rounding

    ADDR_END_NUMBER         // Used for range comparisons
};

// Copy from legacy lib's AddrElement
enum ElemMode
{
    // These formats allow both packing an unpacking
    ADDR_ROUND_BY_HALF,      // add 1/2 and truncate when packing this element
    ADDR_ROUND_TRUNCATE,     // truncate toward 0 for sign/mag, else toward neg
    ADDR_ROUND_DITHER,       // Pack by dithering -- requires (x,y) position

    // These formats only allow unpacking, no packing
    ADDR_UNCOMPRESSED,       // Elements are not compressed: one data element per pixel/texel
    ADDR_EXPANDED,           // Elements are split up and stored in multiple data elements
    ADDR_PACKED_STD,         // Elements are compressed into ExpandX by ExpandY data elements
    ADDR_PACKED_REV,         // Like ADDR_PACKED, but X order of pixels is reverved
    ADDR_PACKED_GBGR,        // Elements are compressed 4:2:2 in G1B_G0R order (high to low)
    ADDR_PACKED_BGRG,        // Elements are compressed 4:2:2 in BG1_RG0 order (high to low)
    ADDR_PACKED_BC1,         // Each data element is uncompressed to a 4x4 pixel/texel array
    ADDR_PACKED_BC2,         // Each data element is uncompressed to a 4x4 pixel/texel array
    ADDR_PACKED_BC3,         // Each data element is uncompressed to a 4x4 pixel/texel array
    ADDR_PACKED_BC4,         // Each data element is uncompressed to a 4x4 pixel/texel array
    ADDR_PACKED_BC5,         // Each data element is uncompressed to a 4x4 pixel/texel array
    ADDR_PACKED_ETC2_64BPP,  // ETC2 formats that use 64bpp to represent each 4x4 block
    ADDR_PACKED_ETC2_128BPP, // ETC2 formats that use 128bpp to represent each 4x4 block
    ADDR_PACKED_ASTC,        // Various ASTC formats, all are 128bpp with varying block sizes

    // These formats provide various kinds of compression
    ADDR_ZPLANE_R5XX,        // Compressed Zplane using r5xx architecture format
    ADDR_ZPLANE_R6XX,        // Compressed Zplane using r6xx architecture format
    //@@ Fill in the compression modes

    ADDR_END_ELEMENT         // Used for range comparisons
};

enum DepthPlanarType
{
    ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl
    ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile
    ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes
};

/**
****************************************************************************************************
*   PixelFormatInfo
*
*   @brief
*       Per component info
*
****************************************************************************************************
*/
struct PixelFormatInfo
{
    UINT_32             compBit[4];
    NumberType          numType[4];
    UINT_32             compStart[4];
    ElemMode            elemMode;
    UINT_32             comps;          ///< Number of components
};

/**
****************************************************************************************************
* @brief This class contains asic indepentent element related attributes and operations
****************************************************************************************************
*/
class ElemLib : public Object
{
protected:
    ElemLib(Lib* pAddrLib);

public:

    /// Makes this class virtual
    virtual ~ElemLib();

    static ElemLib* Create(
        const Lib* pAddrLib);

    /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx
    BOOL_32 PixGetExportNorm(
        AddrColorFormat colorFmt,
        AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const;

    /// Below method are asic independent, so make them just static.
    /// Remove static if we need different operation in hwl.

    VOID    Flt32ToDepthPixel(
        AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const;

    VOID    Flt32ToColorPixel(
        AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap,
        const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const;

    static VOID    Flt32sToInt32s(
        ADDR_FLT_32 value, UINT_32 bits, NumberType numberType, UINT_32* pResult);

    static VOID    Int32sToPixel(
        UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart,
        ComponentFlags properties, UINT_32 resultBits, UINT_8* pPixel);

    VOID    PixGetColorCompInfo(
        AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap,
        PixelFormatInfo* pInfo) const;

    VOID    PixGetDepthCompInfo(
        AddrDepthFormat format, PixelFormatInfo* pInfo) const;

    UINT_32 GetBitsPerPixel(
        AddrFormat format, ElemMode* pElemMode = NULL,
        UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL);

    static VOID    SetClearComps(
        ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32);

    VOID    AdjustSurfaceInfo(
        ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
        UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight);

    VOID    RestoreSurfaceInfo(
        ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
        UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight);

    /// Checks if depth and stencil are planar inside a tile
    BOOL_32 IsDepthStencilTilePlanar()
    {
        return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE;
    }

    /// Sets m_configFlags, copied from AddrLib
    VOID    SetConfigFlags(ConfigFlags flags)
    {
        m_configFlags = flags;
    }

    static BOOL_32 IsCompressed(AddrFormat format);
    static BOOL_32 IsBlockCompressed(AddrFormat format);
    static BOOL_32 IsExpand3x(AddrFormat format);
    static BOOL_32 IsMacroPixelPacked(AddrFormat format);

protected:

    static VOID    GetCompBits(
        UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3,
        PixelFormatInfo* pInfo,
        ElemMode elemMode = ADDR_ROUND_BY_HALF);

    static VOID    GetCompType(
        AddrColorFormat format, AddrSurfaceNumber numType,
        PixelFormatInfo* pInfo);

    static VOID    GetCompSwap(
        AddrSurfaceSwap swap, PixelFormatInfo* pInfo);

    static VOID    SwapComps(
        UINT_32 c0, UINT_32 c1, PixelFormatInfo* pInfo);

private:

    UINT_32             m_fp16ExportNorm;   ///< If allow FP16 to be reported as EXPORT_NORM
    DepthPlanarType     m_depthPlanarType;

    ConfigFlags         m_configFlags;      ///< Copy of AddrLib's configFlags
    Addr::Lib* const    m_pAddrLib;         ///< Pointer to parent addrlib instance
};

} //Addr
} //namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addrlib.cpp
* @brief Contains the implementation for the Addr::Lib class.
****************************************************************************************************
*/

#include "addrinterface.h"
#include "addrlib.h"
#include "addrcommon.h"

#if defined(__APPLE__)

UINT_32 div64_32(UINT_64 n, UINT_32 base)
{
    UINT_64 rem = n;
    UINT_64 b = base;
    UINT_64 res, d = 1;
    UINT_32 high = rem >> 32;

    res = 0;
    if (high >= base)
    {
        high /= base;
        res = (UINT_64) high << 32;
        rem -= (UINT_64) (high * base) << 32;
    }

    while (((INT_64)b > 0) && (b < rem))
    {
        b = b + b;
        d = d + d;
    }

    do
    {
        if (rem >= b)
        {
            rem -= b;
            res += d;
        }
        b >>= 1;
        d >>= 1;
    } while (d);

    n = res;
    return rem;
}

extern "C"
UINT_32 __umoddi3(UINT_64 n, UINT_32 base)
{
    return div64_32(n, base);
}

#endif // __APPLE__

namespace rocr {
namespace Addr
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Constructor/Destructor
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Lib::Lib
*
*   @brief
*       Constructor for the AddrLib class
*
****************************************************************************************************
*/
Lib::Lib() :
    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
    m_chipRevision(0),
    m_version(ADDRLIB_VERSION),
    m_pipes(0),
    m_banks(0),
    m_pipeInterleaveBytes(0),
    m_rowSize(0),
    m_minPitchAlignPixels(1),
    m_maxSamples(8),
    m_maxBaseAlign(0),
    m_maxMetaBaseAlign(0),
    m_pElemLib(NULL)
{
    m_configFlags.value = 0;
}

/**
****************************************************************************************************
*   Lib::Lib
*
*   @brief
*       Constructor for the AddrLib class with hClient as parameter
*
****************************************************************************************************
*/
Lib::Lib(const Client* pClient) :
    Object(pClient),
    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
    m_chipRevision(0),
    m_version(ADDRLIB_VERSION),
    m_pipes(0),
    m_banks(0),
    m_pipeInterleaveBytes(0),
    m_rowSize(0),
    m_minPitchAlignPixels(1),
    m_maxSamples(8),
    m_maxBaseAlign(0),
    m_maxMetaBaseAlign(0),
    m_pElemLib(NULL)
{
    m_configFlags.value = 0;
}

/**
****************************************************************************************************
*   Lib::~AddrLib
*
*   @brief
*       Destructor for the AddrLib class
*
****************************************************************************************************
*/
Lib::~Lib()
{
    if (m_pElemLib)
    {
        delete m_pElemLib;
        m_pElemLib = NULL;
    }
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Initialization/Helper
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Lib::Create
*
*   @brief
*       Creates and initializes AddrLib object.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::Create(
    const ADDR_CREATE_INPUT* pCreateIn,     ///< [in] pointer to ADDR_CREATE_INPUT
    ADDR_CREATE_OUTPUT*      pCreateOut)    ///< [out] pointer to ADDR_CREATE_OUTPUT
{
    Lib* pLib = NULL;
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pCreateIn->createFlags.fillSizeFields == TRUE)
    {
        if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) ||
            (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if ((returnCode == ADDR_OK)                    &&
        (pCreateIn->callbacks.allocSysMem != NULL) &&
        (pCreateIn->callbacks.freeSysMem != NULL))
    {
        Client client = {
            pCreateIn->hClient,
            pCreateIn->callbacks
        };

        switch (pCreateIn->chipEngine)
        {
            case CIASICIDGFXENGINE_ARCTICISLAND:
                switch (pCreateIn->chipFamily)
                {
                    case FAMILY_AI:
                    case FAMILY_RV:
                        pLib = Gfx9HwlInit(&client);
                        break;
                    case FAMILY_NV:
                    case FAMILY_VGH:
                    case FAMILY_RMB:
                    case FAMILY_RPL:
                    case FAMILY_MDN:
                        pLib = Gfx10HwlInit(&client);
                        break;
                    case FAMILY_NV3:
                    case FAMILY_GFX1150:
                    case FAMILY_GFX1103:
                        pLib = Gfx11HwlInit(&client);
                        break;
                    case FAMILY_GFX12:
                        pLib = Gfx12HwlInit(&client);
                        break;
                    default:
                        ADDR_ASSERT_ALWAYS();
                        break;
                }
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }
    }
    if(pLib == NULL)
    {
        returnCode = ADDR_OUTOFMEMORY;
    }
    if (pLib != NULL)
    {
        BOOL_32 initValid;

        // Pass createFlags to configFlags first since these flags may be overwritten
        pLib->m_configFlags.noCubeMipSlicesPad  = pCreateIn->createFlags.noCubeMipSlicesPad;
        pLib->m_configFlags.fillSizeFields      = pCreateIn->createFlags.fillSizeFields;
        pLib->m_configFlags.useTileIndex        = pCreateIn->createFlags.useTileIndex;
        pLib->m_configFlags.useCombinedSwizzle  = pCreateIn->createFlags.useCombinedSwizzle;
        pLib->m_configFlags.checkLast2DLevel    = pCreateIn->createFlags.checkLast2DLevel;
        pLib->m_configFlags.useHtileSliceAlign  = pCreateIn->createFlags.useHtileSliceAlign;
        pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
        pLib->m_configFlags.forceDccAndTcCompat = pCreateIn->createFlags.forceDccAndTcCompat;
        pLib->m_configFlags.nonPower2MemConfig  = pCreateIn->createFlags.nonPower2MemConfig;
        pLib->m_configFlags.enableAltTiling     = pCreateIn->createFlags.enableAltTiling;
        pLib->m_configFlags.disableLinearOpt    = FALSE;

        pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);

        pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);

        // Global parameters initialized and remaining configFlags bits are set as well
        initValid = pLib->HwlInitGlobalParams(pCreateIn);

        if (initValid)
        {
            pLib->m_pElemLib = ElemLib::Create(pLib);
        }
        else
        {
            pLib->m_pElemLib = NULL; // Don't go on allocating element lib
            returnCode = ADDR_INVALIDGBREGVALUES;
        }

        if (pLib->m_pElemLib == NULL)
        {
            delete pLib;
            pLib = NULL;
            returnCode = ADDR_OUTOFMEMORY;
            ADDR_ASSERT_ALWAYS();
        }
        else
        {
            pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags);
        }
    }

    pCreateOut->hLib = pLib;

    if ((pLib != NULL) &&
        (returnCode == ADDR_OK))
    {
        pCreateOut->numEquations =
            pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable);

        pLib->SetMaxAlignments();

    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::SetChipFamily
*
*   @brief
*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
*   @return
*      N/A
****************************************************************************************************
*/
VOID Lib::SetChipFamily(
    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
{
    ChipFamily family = HwlConvertChipFamily(uChipFamily, uChipRevision);

    ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD);

    m_chipFamily   = family;
    m_chipRevision = uChipRevision;
}

/**
****************************************************************************************************
*   Lib::SetMinPitchAlignPixels
*
*   @brief
*       Set m_minPitchAlignPixels with input param
*
*   @return
*      N/A
****************************************************************************************************
*/
VOID Lib::SetMinPitchAlignPixels(
    UINT_32 minPitchAlignPixels)    ///< [in] minmum pitch alignment in pixels
{
    m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels;
}

/**
****************************************************************************************************
*   Lib::SetMaxAlignments
*
*   @brief
*       Set max alignments
*
*   @return
*      N/A
****************************************************************************************************
*/
VOID Lib::SetMaxAlignments()
{
    m_maxBaseAlign     = HwlComputeMaxBaseAlignments();
    m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments();
}

/**
****************************************************************************************************
*   Lib::GetLib
*
*   @brief
*       Get AddrLib pointer
*
*   @return
*      An AddrLib class pointer
****************************************************************************************************
*/
Lib* Lib::GetLib(
    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
{
    return static_cast<Addr::Lib*>(hLib);
}

/**
****************************************************************************************************
*   Lib::GetMaxAlignments
*
*   @brief
*       Gets maximum alignments for data surface (include FMask)
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetMaxAlignments(
    ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if (pOut->size != sizeof(ADDR_GET_MAX_ALIGNMENTS_OUTPUT))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        if (m_maxBaseAlign != 0)
        {
            pOut->baseAlign = m_maxBaseAlign;
        }
        else
        {
            returnCode = ADDR_NOTIMPLEMENTED;
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::GetMaxMetaAlignments
*
*   @brief
*       Gets maximum alignments for metadata (CMask, DCC and HTile)
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments(
    ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if (pOut->size != sizeof(ADDR_GET_MAX_ALIGNMENTS_OUTPUT))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        if (m_maxMetaBaseAlign != 0)
        {
            pOut->baseAlign = m_maxMetaBaseAlign;
        }
        else
        {
            returnCode = ADDR_NOTIMPLEMENTED;
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::Bits2Number
*
*   @brief
*       Cat a array of binary bit to a number
*
*   @return
*       The number combined with the array of bits
****************************************************************************************************
*/
UINT_32 Lib::Bits2Number(
    UINT_32 bitNum,     ///< [in] how many bits
    ...)                ///< [in] varaible bits value starting from MSB
{
    UINT_32 number = 0;
    UINT_32 i;
    va_list bits_ptr;

    va_start(bits_ptr, bitNum);

    for(i = 0; i < bitNum; i++)
    {
        number |= va_arg(bits_ptr, UINT_32);
        number <<= 1;
    }

    number >>= 1;

    va_end(bits_ptr);

    return number;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Element lib
////////////////////////////////////////////////////////////////////////////////////////////////////


/**
****************************************************************************************************
*   Lib::Flt32ToColorPixel
*
*   @brief
*       Convert a FLT_32 value to a depth/stencil pixel value
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::Flt32ToDepthPixel(
    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
    ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) ||
            (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        GetElemLib()->Flt32ToDepthPixel(pIn->format, pIn->comps, pOut->pPixel);

        UINT_32 depthBase = 0;
        UINT_32 stencilBase = 0;
        UINT_32 depthBits = 0;
        UINT_32 stencilBits = 0;

        switch (pIn->format)
        {
            case ADDR_DEPTH_16:
                depthBits = 16;
                break;
            case ADDR_DEPTH_X8_24:
            case ADDR_DEPTH_8_24:
            case ADDR_DEPTH_X8_24_FLOAT:
            case ADDR_DEPTH_8_24_FLOAT:
                depthBase = 8;
                depthBits = 24;
                stencilBits = 8;
                break;
            case ADDR_DEPTH_32_FLOAT:
                depthBits = 32;
                break;
            case ADDR_DEPTH_X24_8_32_FLOAT:
                depthBase = 8;
                depthBits = 32;
                stencilBits = 8;
                break;
            default:
                break;
        }

        // Overwrite base since R800 has no "tileBase"
        if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE)
        {
            depthBase = 0;
            stencilBase = 0;
        }

        depthBase *= 64;
        stencilBase *= 64;

        pOut->stencilBase = stencilBase;
        pOut->depthBase = depthBase;
        pOut->depthBits = depthBits;
        pOut->stencilBits = stencilBits;
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::Flt32ToColorPixel
*
*   @brief
*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::Flt32ToColorPixel(
    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
    ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) ||
            (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        GetElemLib()->Flt32ToColorPixel(pIn->format,
                                        pIn->surfNum,
                                        pIn->surfSwap,
                                        pIn->comps,
                                        pOut->pPixel);
    }

    return returnCode;
}


/**
****************************************************************************************************
*   Lib::GetExportNorm
*
*   @brief
*       Check one format can be EXPORT_NUM
*   @return
*       TRUE if EXPORT_NORM can be used
****************************************************************************************************
*/
BOOL_32 Lib::GetExportNorm(
    const ELEM_GETEXPORTNORM_INPUT* pIn) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    BOOL_32 enabled = FALSE;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        enabled = GetElemLib()->PixGetExportNorm(pIn->format, pIn->num, pIn->swap);
    }

    return enabled;
}

/**
****************************************************************************************************
*   Lib::GetBpe
*
*   @brief
*       Get bits-per-element for specified format
*   @return
*       bits-per-element of specified format
****************************************************************************************************
*/
UINT_32 Lib::GetBpe(AddrFormat format) const
{
    return GetElemLib()->GetBitsPerPixel(format);
}

} // Addr
} // namespace rocr

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addrlib.h
* @brief Contains the Addr::Lib base class definition.
****************************************************************************************************
*/

#ifndef __ADDR_LIB_H__
#define __ADDR_LIB_H__

#include "addrinterface.h"
#include "addrtypes.h"
#include "addrobject.h"
#include "addrelemlib.h"

#include "amdgpu_asic_addr.h"

#ifndef CIASICIDGFXENGINE_R600
#define CIASICIDGFXENGINE_R600 0x00000006
#endif

#ifndef CIASICIDGFXENGINE_R800
#define CIASICIDGFXENGINE_R800 0x00000008
#endif

#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
#endif

#ifndef CIASICIDGFXENGINE_ARCTICISLAND
#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
#endif

namespace rocr {
namespace Addr
{

/**
****************************************************************************************************
* @brief Neutral enums that define pipeinterleave
****************************************************************************************************
*/
enum PipeInterleave
{
    ADDR_PIPEINTERLEAVE_256B = 256,
    ADDR_PIPEINTERLEAVE_512B = 512,
    ADDR_PIPEINTERLEAVE_1KB  = 1024,
    ADDR_PIPEINTERLEAVE_2KB  = 2048,
};

/**
****************************************************************************************************
* @brief Neutral enums that define DRAM row size
****************************************************************************************************
*/
enum RowSize
{
    ADDR_ROWSIZE_1KB = 1024,
    ADDR_ROWSIZE_2KB = 2048,
    ADDR_ROWSIZE_4KB = 4096,
    ADDR_ROWSIZE_8KB = 8192,
};

/**
****************************************************************************************************
* @brief Neutral enums that define bank interleave
****************************************************************************************************
*/
enum BankInterleave
{
    ADDR_BANKINTERLEAVE_1 = 1,
    ADDR_BANKINTERLEAVE_2 = 2,
    ADDR_BANKINTERLEAVE_4 = 4,
    ADDR_BANKINTERLEAVE_8 = 8,
};

/**
****************************************************************************************************
* @brief Neutral enums that define shader engine tile size
****************************************************************************************************
*/
enum ShaderEngineTileSize
{
    ADDR_SE_TILESIZE_16 = 16,
    ADDR_SE_TILESIZE_32 = 32,
};

/**
****************************************************************************************************
* @brief Neutral enums that define bank swap size
****************************************************************************************************
*/
enum BankSwapSize
{
    ADDR_BANKSWAP_128B = 128,
    ADDR_BANKSWAP_256B = 256,
    ADDR_BANKSWAP_512B = 512,
    ADDR_BANKSWAP_1KB = 1024,
};

/**
****************************************************************************************************
* @brief Enums that define max compressed fragments config
****************************************************************************************************
*/
enum NumMaxCompressedFragmentsConfig
{
    ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS   = 0x00000000,
    ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS   = 0x00000001,
    ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS   = 0x00000002,
    ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS   = 0x00000003,
};

/**
****************************************************************************************************
* @brief Enums that define num pipes config
****************************************************************************************************
*/
enum NumPipesConfig
{
    ADDR_CONFIG_1_PIPE                       = 0x00000000,
    ADDR_CONFIG_2_PIPE                       = 0x00000001,
    ADDR_CONFIG_4_PIPE                       = 0x00000002,
    ADDR_CONFIG_8_PIPE                       = 0x00000003,
    ADDR_CONFIG_16_PIPE                      = 0x00000004,
    ADDR_CONFIG_32_PIPE                      = 0x00000005,
    ADDR_CONFIG_64_PIPE                      = 0x00000006,
};

/**
****************************************************************************************************
* @brief Enums that define num banks config
****************************************************************************************************
*/
enum NumBanksConfig
{
    ADDR_CONFIG_1_BANK                       = 0x00000000,
    ADDR_CONFIG_2_BANK                       = 0x00000001,
    ADDR_CONFIG_4_BANK                       = 0x00000002,
    ADDR_CONFIG_8_BANK                       = 0x00000003,
    ADDR_CONFIG_16_BANK                      = 0x00000004,
};

/**
****************************************************************************************************
* @brief Enums that define num rb per shader engine config
****************************************************************************************************
*/
enum NumRbPerShaderEngineConfig
{
    ADDR_CONFIG_1_RB_PER_SHADER_ENGINE       = 0x00000000,
    ADDR_CONFIG_2_RB_PER_SHADER_ENGINE       = 0x00000001,
    ADDR_CONFIG_4_RB_PER_SHADER_ENGINE       = 0x00000002,
};

/**
****************************************************************************************************
* @brief Enums that define num shader engines config
****************************************************************************************************
*/
enum NumShaderEnginesConfig
{
    ADDR_CONFIG_1_SHADER_ENGINE              = 0x00000000,
    ADDR_CONFIG_2_SHADER_ENGINE              = 0x00000001,
    ADDR_CONFIG_4_SHADER_ENGINE              = 0x00000002,
    ADDR_CONFIG_8_SHADER_ENGINE              = 0x00000003,
};

/**
****************************************************************************************************
* @brief Enums that define pipe interleave size config
****************************************************************************************************
*/
enum PipeInterleaveSizeConfig
{
    ADDR_CONFIG_PIPE_INTERLEAVE_256B         = 0x00000000,
    ADDR_CONFIG_PIPE_INTERLEAVE_512B         = 0x00000001,
    ADDR_CONFIG_PIPE_INTERLEAVE_1KB          = 0x00000002,
    ADDR_CONFIG_PIPE_INTERLEAVE_2KB          = 0x00000003,
};

/**
****************************************************************************************************
* @brief Enums that define row size config
****************************************************************************************************
*/
enum RowSizeConfig
{
    ADDR_CONFIG_1KB_ROW                      = 0x00000000,
    ADDR_CONFIG_2KB_ROW                      = 0x00000001,
    ADDR_CONFIG_4KB_ROW                      = 0x00000002,
};

/**
****************************************************************************************************
* @brief Enums that define bank interleave size config
****************************************************************************************************
*/
enum BankInterleaveSizeConfig
{
    ADDR_CONFIG_BANK_INTERLEAVE_1            = 0x00000000,
    ADDR_CONFIG_BANK_INTERLEAVE_2            = 0x00000001,
    ADDR_CONFIG_BANK_INTERLEAVE_4            = 0x00000002,
    ADDR_CONFIG_BANK_INTERLEAVE_8            = 0x00000003,
};

/**
****************************************************************************************************
* @brief Enums that define engine tile size config
****************************************************************************************************
*/
enum ShaderEngineTileSizeConfig
{
    ADDR_CONFIG_SE_TILE_16                   = 0x00000000,
    ADDR_CONFIG_SE_TILE_32                   = 0x00000001,
};

/**
****************************************************************************************************
* @brief This class contains asic independent address lib functionalities
****************************************************************************************************
*/
class Lib : public Object
{
public:
    virtual ~Lib();

    static ADDR_E_RETURNCODE Create(
        const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut);

    /// Pair of Create
    VOID Destroy()
    {
        delete this;
    }

    static Lib* GetLib(ADDR_HANDLE hLib);

    /// Returns AddrLib version (from compiled binary instead include file)
    UINT_32 GetVersion()
    {
        return m_version;
    }

    /// Returns asic chip family name defined by AddrLib
    ChipFamily GetChipFamily() const
    {
        return m_chipFamily;
    }

    ADDR_E_RETURNCODE Flt32ToDepthPixel(
        const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
        ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE Flt32ToColorPixel(
        const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
        ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;

    BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const;

    ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;

    UINT_32 GetBpe(AddrFormat format) const;

protected:
    Lib();  // Constructor is protected
    Lib(const Client* pClient);

    /// Pure virtual function to get max base alignments
    virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0;

    /// Gets maximum alignements for metadata
    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const
    {
        ADDR_NOT_IMPLEMENTED();

        return 0;
    }

    VOID ValidBaseAlignments(UINT_32 alignment) const
    {
#if DEBUG
        ADDR_ASSERT(alignment <= m_maxBaseAlign);
#endif
    }

    VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const
    {
#if DEBUG
        ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign);
#endif
    }

    static BOOL_32 IsTex1d(AddrResourceType resourceType)
    {
        return (resourceType == ADDR_RSRC_TEX_1D);
    }

    static BOOL_32 IsTex2d(AddrResourceType resourceType)
    {
        return (resourceType == ADDR_RSRC_TEX_2D);
    }

    static BOOL_32 IsTex3d(AddrResourceType resourceType)
    {
        return (resourceType == ADDR_RSRC_TEX_3D);
    }

    //
    // Initialization
    //
    /// Pure Virtual function for Hwl computing internal global parameters from h/w registers
    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0;

    /// Pure Virtual function for Hwl converting chip family
    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;

    /// Get equation table pointer and number of equations
    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
    {
        *ppEquationTable = NULL;

        return 0;
    }

    //
    // Misc helper
    //
    static UINT_32 Bits2Number(UINT_32 bitNum, ...);

    static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags)
    {
        return (numFrags != 0) ? numFrags : Max(1u, numSamples);
    }

    /// Returns pointer of ElemLib
    ElemLib* GetElemLib() const
    {
        return m_pElemLib;
    }

    /// Returns fillSizeFields flag
    UINT_32 GetFillSizeFieldsFlags() const
    {
        return m_configFlags.fillSizeFields;
    }

private:
    // Disallow the copy constructor
    Lib(const Lib& a);

    // Disallow the assignment operator
    Lib& operator=(const Lib& a);

    VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

    VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);

    VOID SetMaxAlignments();

protected:
    ChipFamily  m_chipFamily;   ///< Chip family translated from the one in atiid.h

    UINT_32     m_chipRevision; ///< Revision id from xxx_id.h

    UINT_32     m_version;      ///< Current version

    //
    // Global parameters
    //
    ConfigFlags m_configFlags;          ///< Global configuration flags. Note this is setup by
                                        ///  AddrLib instead of Client except forceLinearAligned

    UINT_32     m_pipes;                ///< Number of pipes
    UINT_32     m_banks;                ///< Number of banks
                                        ///  For r800 this is MC_ARB_RAMCFG.NOOFBANK
                                        ///  Keep it here to do default parameter calculation

    UINT_32     m_pipeInterleaveBytes;
                                        ///< Specifies the size of contiguous address space
                                        ///  within each tiling pipe when making linear
                                        ///  accesses. (Formerly Group Size)

    UINT_32     m_rowSize;              ///< DRAM row size, in bytes

    UINT_32     m_minPitchAlignPixels;  ///< Minimum pitch alignment in pixels
    UINT_32     m_maxSamples;           ///< Max numSamples

    UINT_32     m_maxBaseAlign;         ///< Max base alignment for data surface
    UINT_32     m_maxMetaBaseAlign;     ///< Max base alignment for metadata

private:
    ElemLib*    m_pElemLib;             ///< Element Lib pointer
};

Lib* Gfx9HwlInit (const Client* pClient);
Lib* Gfx10HwlInit(const Client* pClient);
Lib* Gfx11HwlInit(const Client* pClient);
Lib* Gfx12HwlInit(const Client* pClient);
} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib1.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
****************************************************************************************************
* @file  addr1lib.cpp
* @brief Contains the implementation for the Addr::V1::Lib base class.
****************************************************************************************************
*/

#include "addrinterface.h"
#include "addrlib1.h"
#include "addrcommon.h"

namespace rocr {
namespace Addr
{
namespace V1
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Static Const Member
////////////////////////////////////////////////////////////////////////////////////////////////////

const TileModeFlags Lib::ModeFlags[ADDR_TM_COUNT] =
{// T   L  1  2  3  P  Pr B
    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL
    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED
    {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1
    {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK
    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1
    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2
    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4
    {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK
    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1
    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2
    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4
    {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK
    {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1
    {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK
    {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1
    {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK
    {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK
    {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK
    {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE
    {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1
    {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1
    {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1
    {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK
    {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK
    {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK
    {0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_UNKNOWN
};

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Constructor/Destructor
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Lib::AddrLib1
*
*   @brief
*       Constructor for the AddrLib1 class
*
****************************************************************************************************
*/
Lib::Lib()
    :
    Addr::Lib()
{
}

/**
****************************************************************************************************
*   Lib::Lib
*
*   @brief
*       Constructor for the Addr::V1::Lib class with hClient as parameter
*
****************************************************************************************************
*/
Lib::Lib(const Client* pClient)
    :
    Addr::Lib(pClient)
{
}

/**
****************************************************************************************************
*   Lib::~AddrLib1
*
*   @brief
*       Destructor for the AddrLib1 class
*
****************************************************************************************************
*/
Lib::~Lib()
{
}

/**
****************************************************************************************************
*   Lib::GetLib
*
*   @brief
*       Get AddrLib1 pointer
*
*   @return
*      An Addr::V1::Lib class pointer
****************************************************************************************************
*/
Lib* Lib::GetLib(
    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
{
    Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);
    if ((pAddrLib != NULL) &&
        ((pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_IVLD) ||
         (pAddrLib->GetChipFamily() > ADDR_CHIP_FAMILY_VI)))
    {
        // only valid and pre-VI ASIC can use AddrLib1 function.
        ADDR_ASSERT_ALWAYS();
        hLib = NULL;
    }
    return static_cast<Lib*>(hLib);
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Surface Methods
////////////////////////////////////////////////////////////////////////////////////////////////////


/**
****************************************************************************************************
*   Lib::ComputeSurfaceInfo
*
*   @brief
*       Interface function stub of AddrComputeSurfaceInfo.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
     const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    // We suggest client do sanity check but a check here is also good
    if (pIn->bpp > 128)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    if ((pIn->tileMode == ADDR_TM_UNKNOWN) && (pIn->mipLevel > 0))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    // Thick modes don't support multisample
    if ((Thickness(pIn->tileMode) > 1) && (pIn->numSamples > 1))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    if (returnCode == ADDR_OK)
    {
        // Get a local copy of input structure and only reference pIn for unadjusted values
        ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
        ADDR_TILEINFO tileInfoNull = {0};

        if (UseTileInfo())
        {
            // If the original input has a valid ADDR_TILEINFO pointer then copy its contents.
            // Otherwise the default 0's in tileInfoNull are used.
            if (pIn->pTileInfo)
            {
                tileInfoNull = *pIn->pTileInfo;
            }
            localIn.pTileInfo  = &tileInfoNull;
        }

        localIn.numSamples = (pIn->numSamples == 0) ? 1 : pIn->numSamples;

        // Do mipmap check first
        // If format is BCn, pre-pad dimension to power-of-two according to HWL
        ComputeMipLevel(&localIn);

        if (m_configFlags.checkLast2DLevel)
        {
            // Save this level's original height in pixels
            pOut->height = pIn->height;
        }

        UINT_32 expandX = 1;
        UINT_32 expandY = 1;
        ElemMode elemMode;

        // Save outputs that may not go through HWL
        pOut->pixelBits = localIn.bpp;
        pOut->numSamples = localIn.numSamples;
        pOut->last2DLevel = FALSE;
        pOut->tcCompatible = FALSE;

#if !ALT_TEST
        if (localIn.numSamples > 1)
        {
            ADDR_ASSERT(localIn.mipLevel == 0);
        }
#endif

        if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion
        {
            // Get compression/expansion factors and element mode
            // (which indicates compression/expansion
            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
                                                        &elemMode,
                                                        &expandX,
                                                        &expandY);

            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
            // restrictions are different.
            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
            // but we use this flag to skip RestoreSurfaceInfo below

            if ((elemMode == ADDR_EXPANDED) && (expandX > 1))
            {
                ADDR_ASSERT(IsLinear(localIn.tileMode));
            }

            GetElemLib()->AdjustSurfaceInfo(elemMode,
                                            expandX,
                                            expandY,
                                            &localIn.bpp,
                                            &localIn.basePitch,
                                            &localIn.width,
                                            &localIn.height);

            // Overwrite these parameters if we have a valid format
        }
        else if (localIn.bpp != 0)
        {
            localIn.width  = (localIn.width != 0) ? localIn.width : 1;
            localIn.height = (localIn.height != 0) ? localIn.height : 1;
        }
        else // Rule out some invalid parameters
        {
            ADDR_ASSERT_ALWAYS();

            returnCode = ADDR_INVALIDPARAMS;
        }

        // Check mipmap after surface expansion
        if (returnCode == ADDR_OK)
        {
            returnCode = PostComputeMipLevel(&localIn, pOut);
        }

        if (returnCode == ADDR_OK)
        {
            if (UseTileIndex(localIn.tileIndex))
            {
                // Make sure pTileInfo is not NULL
                ADDR_ASSERT(localIn.pTileInfo);

                UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags);

                INT_32 macroModeIndex = TileIndexNoMacroIndex;

                if (localIn.tileIndex != TileIndexLinearGeneral)
                {
                    // Try finding a macroModeIndex
                    macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex,
                                                              localIn.flags,
                                                              localIn.bpp,
                                                              numSamples,
                                                              localIn.pTileInfo,
                                                              &localIn.tileMode,
                                                              &localIn.tileType);
                }

                // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
                if (macroModeIndex == TileIndexNoMacroIndex)
                {
                    returnCode = HwlSetupTileCfg(localIn.bpp,
                                                 localIn.tileIndex, macroModeIndex,
                                                 localIn.pTileInfo,
                                                 &localIn.tileMode, &localIn.tileType);
                }
                // If macroModeIndex is invalid, then assert this is not macro tiled
                else if (macroModeIndex == TileIndexInvalid)
                {
                    ADDR_ASSERT(!IsMacroTiled(localIn.tileMode));
                }

                pOut->macroModeIndex = macroModeIndex;
            }
        }

        if (returnCode == ADDR_OK)
        {
            localIn.flags.dccPipeWorkaround = localIn.flags.dccCompatible;

            if (localIn.tileMode == ADDR_TM_UNKNOWN)
            {
                // HWL layer may override tile mode if necessary
                HwlSelectTileMode(&localIn);
            }
            else
            {
                // HWL layer may override tile mode if necessary
                HwlOverrideTileMode(&localIn);

                // Optimize tile mode if possible
                OptimizeTileMode(&localIn);
            }
        }

        // Call main function to compute surface info
        if (returnCode == ADDR_OK)
        {
            returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
        }

        if (returnCode == ADDR_OK)
        {
            // Since bpp might be changed we just pass it through
            pOut->bpp  = localIn.bpp;

            // Also original width/height/bpp
            pOut->pixelPitch    = pOut->pitch;
            pOut->pixelHeight   = pOut->height;

#if DEBUG
            if (localIn.flags.display)
            {
                ADDR_ASSERT((pOut->pitchAlign % 32) == 0);
            }
#endif //DEBUG

            if (localIn.format != ADDR_FMT_INVALID)
            {
                //
                // Note: For 96 bit surface, the pixelPitch returned might be an odd number, but it
                // is okay to program texture pitch as HW's mip calculator would multiply 3 first,
                // then do the appropriate paddings (linear alignment requirement and possible the
                // nearest power-of-two for mipmaps), which results in the original pitch.
                //
                GetElemLib()->RestoreSurfaceInfo(elemMode,
                                                 expandX,
                                                 expandY,
                                                 &localIn.bpp,
                                                 &pOut->pixelPitch,
                                                 &pOut->pixelHeight);
            }

            if (localIn.flags.qbStereo)
            {
                if (pOut->pStereoInfo)
                {
                    ComputeQbStereoInfo(pOut);
                }
            }

            if (localIn.flags.volume) // For volume sliceSize equals to all z-slices
            {
                pOut->sliceSize = pOut->surfSize;
            }
            else // For array: sliceSize is likely to have slice-padding (the last one)
            {
                pOut->sliceSize = pOut->surfSize / pOut->depth;

                // array or cubemap
                if (pIn->numSlices > 1)
                {
                    // If this is the last slice then add the padding size to this slice
                    if (pIn->slice == (pIn->numSlices - 1))
                    {
                        pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices);
                    }
                    else if (m_configFlags.checkLast2DLevel)
                    {
                        // Reset last2DLevel flag if this is not the last array slice
                        pOut->last2DLevel = FALSE;
                    }
                }
            }

            pOut->pitchTileMax = pOut->pitch / 8 - 1;
            pOut->heightTileMax = pOut->height / 8 - 1;
            pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1;
        }
    }

    ValidBaseAlignments(pOut->baseAlign);

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeSurfaceInfo
*
*   @brief
*       Interface function stub of AddrComputeSurfaceInfo.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            const ADDR_SURFACE_FLAGS flags = {{0}};
            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);

            // Try finding a macroModeIndex
            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
                                                             flags,
                                                             input.bpp,
                                                             numSamples,
                                                             input.pTileInfo,
                                                             &input.tileMode,
                                                             &input.tileType);

            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
            if (macroModeIndex == TileIndexNoMacroIndex)
            {
                returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex,
                                             input.pTileInfo, &input.tileMode, &input.tileType);
            }
            // If macroModeIndex is invalid, then assert this is not macro tiled
            else if (macroModeIndex == TileIndexInvalid)
            {
                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
            }

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut);

            if (returnCode == ADDR_OK)
            {
                pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
            }
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeSurfaceCoordFromAddr
*
*   @brief
*       Interface function stub of ComputeSurfaceCoordFromAddr.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr(
    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            const ADDR_SURFACE_FLAGS flags = {{0}};
            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);

            // Try finding a macroModeIndex
            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
                                                             flags,
                                                             input.bpp,
                                                             numSamples,
                                                             input.pTileInfo,
                                                             &input.tileMode,
                                                             &input.tileType);

            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
            if (macroModeIndex == TileIndexNoMacroIndex)
            {
                returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex,
                                             input.pTileInfo, &input.tileMode, &input.tileType);
            }
            // If macroModeIndex is invalid, then assert this is not macro tiled
            else if (macroModeIndex == TileIndexInvalid)
            {
                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
            }

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeSliceTileSwizzle
*
*   @brief
*       Interface function stub of ComputeSliceTileSwizzle.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSliceTileSwizzle(
    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_SLICESWIZZLE_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex,
                                         input.pTileInfo, &input.tileMode);
            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            returnCode = HwlComputeSliceTileSwizzle(pIn, pOut);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ExtractBankPipeSwizzle
*
*   @brief
*       Interface function stub of AddrExtractBankPipeSwizzle.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ExtractBankPipeSwizzle(
    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) ||
            (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            returnCode = HwlExtractBankPipeSwizzle(pIn, pOut);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::CombineBankPipeSwizzle
*
*   @brief
*       Interface function stub of AddrCombineBankPipeSwizzle.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::CombineBankPipeSwizzle(
    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle,
                                                   pIn->pipeSwizzle,
                                                   pIn->pTileInfo,
                                                   pIn->baseAddr,
                                                   &pOut->tileSwizzle);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeBaseSwizzle
*
*   @brief
*       Interface function stub of AddrCompueBaseSwizzle.
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeBaseSwizzle(
    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_BASE_SWIZZLE_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            if (IsMacroTiled(pIn->tileMode))
            {
                returnCode = HwlComputeBaseSwizzle(pIn, pOut);
            }
            else
            {
                pOut->tileSwizzle = 0;
            }
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeFmaskInfo
*
*   @brief
*       Interface function stub of ComputeFmaskInfo.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
    )
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    // No thick MSAA
    if (Thickness(pIn->tileMode) > 1)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_FMASK_INFO_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;

            if (pOut->pTileInfo)
            {
                // Use temp tile info for calcalation
                input.pTileInfo = pOut->pTileInfo;
            }
            else
            {
                input.pTileInfo = &tileInfoNull;
            }

            ADDR_SURFACE_FLAGS flags = {{0}};
            flags.fmask = 1;

            // Try finding a macroModeIndex
            INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex,
                                                             flags,
                                                             HwlComputeFmaskBits(pIn, NULL),
                                                             pIn->numSamples,
                                                             input.pTileInfo,
                                                             &input.tileMode);

            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
            if (macroModeIndex == TileIndexNoMacroIndex)
            {
                returnCode = HwlSetupTileCfg(0, input.tileIndex, macroModeIndex,
                                             input.pTileInfo, &input.tileMode);
            }

            ADDR_ASSERT(macroModeIndex != TileIndexInvalid);

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            if (pIn->numSamples > 1)
            {
                returnCode = HwlComputeFmaskInfo(pIn, pOut);
            }
            else
            {
                memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT));

                returnCode = ADDR_INVALIDPARAMS;
            }
        }
    }

    ValidBaseAlignments(pOut->baseAlign);

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeFmaskAddrFromCoord
*
*   @brief
*       Interface function stub of ComputeFmaskAddrFromCoord.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord(
    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_ASSERT(pIn->numSamples > 1);

        if (pIn->numSamples > 1)
        {
            returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut);
        }
        else
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeFmaskCoordFromAddr
*
*   @brief
*       Interface function stub of ComputeFmaskAddrFromCoord.
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr(
    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut           ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_ASSERT(pIn->numSamples > 1);

        if (pIn->numSamples > 1)
        {
            returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut);
        }
        else
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ConvertTileInfoToHW
*
*   @brief
*       Convert tile info from real value to HW register value in HW layer
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ConvertTileInfoToHW(
    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) ||
            (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_CONVERT_TILEINFOTOHW_INPUT input;
        // if pIn->reverse is TRUE, indices are ignored
        if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex,
                                         input.macroModeIndex, input.pTileInfo);

            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            returnCode = HwlConvertTileInfoToHW(pIn, pOut);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ConvertTileIndex
*
*   @brief
*       Convert tile index to tile mode/type/info
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ConvertTileIndex(
    const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure
    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) ||
            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {

        returnCode = HwlSetupTileCfg(pIn->bpp, pIn->tileIndex, pIn->macroModeIndex,
                                     pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);

        if (returnCode == ADDR_OK && pIn->tileInfoHw)
        {
            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};

            hwInput.pTileInfo = pOut->pTileInfo;
            hwInput.tileIndex = -1;
            hwOutput.pTileInfo = pOut->pTileInfo;

            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::GetMacroModeIndex
*
*   @brief
*       Get macro mode index based on input info
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetMacroModeIndex(
    const ADDR_GET_MACROMODEINDEX_INPUT* pIn, ///< [in] input structure
    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags())
    {
        if ((pIn->size != sizeof(ADDR_GET_MACROMODEINDEX_INPUT)) ||
            (pOut->size != sizeof(ADDR_GET_MACROMODEINDEX_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfo = {0};
        pOut->macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, pIn->flags, pIn->bpp,
                                                        pIn->numFrags, &tileInfo);
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ConvertTileIndex1
*
*   @brief
*       Convert tile index to tile mode/type/info
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ConvertTileIndex1(
    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,   ///< [in] input structure
    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut         ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) ||
            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_SURFACE_FLAGS flags = {{0}};

        HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples,
                                 pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);

        if (pIn->tileInfoHw)
        {
            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};

            hwInput.pTileInfo = pOut->pTileInfo;
            hwInput.tileIndex = -1;
            hwOutput.pTileInfo = pOut->pTileInfo;

            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::GetTileIndex
*
*   @brief
*       Get tile index from tile mode/type/info
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetTileIndex(
    const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure
    ADDR_GET_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) ||
            (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        returnCode = HwlGetTileIndex(pIn, pOut);
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::Thickness
*
*   @brief
*       Get tile mode thickness
*
*   @return
*       Tile mode thickness
****************************************************************************************************
*/
UINT_32 Lib::Thickness(
    AddrTileMode tileMode)    ///< [in] tile mode
{
    return ModeFlags[tileMode].thickness;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                               CMASK/HTILE
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Lib::ComputeHtileInfo
*
*   @brief
*       Interface function stub of AddrComputeHtilenfo
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_HTILE_INFO_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            if (pIn->flags.tcCompatible)
            {
                const UINT_32 sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8);
                const UINT_32 align     = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes;

                if (pIn->numSlices > 1)
                {
                    const UINT_32 surfBytes = (sliceSize * pIn->numSlices);

                    pOut->sliceSize        = sliceSize;
                    pOut->htileBytes       = pIn->flags.skipTcCompatSizeAlign ?
                                             surfBytes : PowTwoAlign(surfBytes, align);
                    pOut->sliceInterleaved = ((sliceSize % align) != 0) ? TRUE : FALSE;
                }
                else
                {
                    pOut->sliceSize        = pIn->flags.skipTcCompatSizeAlign ?
                                             sliceSize : PowTwoAlign(sliceSize, align);
                    pOut->htileBytes       = pOut->sliceSize;
                    pOut->sliceInterleaved = FALSE;
                }

                pOut->nextMipLevelCompressible = ((sliceSize % align) == 0) ? TRUE : FALSE;

                pOut->pitch       = pIn->pitch;
                pOut->height      = pIn->height;
                pOut->baseAlign   = align;
                pOut->macroWidth  = 0;
                pOut->macroHeight = 0;
                pOut->bpp         = 32;
            }
            else
            {
                pOut->bpp = ComputeHtileInfo(pIn->flags,
                                             pIn->pitch,
                                             pIn->height,
                                             pIn->numSlices,
                                             pIn->isLinear,
                                             isWidth8,
                                             isHeight8,
                                             pIn->pTileInfo,
                                             &pOut->pitch,
                                             &pOut->height,
                                             &pOut->htileBytes,
                                             &pOut->macroWidth,
                                             &pOut->macroHeight,
                                             &pOut->sliceSize,
                                             &pOut->baseAlign);
            }
        }
    }

    ValidMetaBaseAlignments(pOut->baseAlign);

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeCmaskInfo
*
*   @brief
*       Interface function stub of AddrComputeCmaskInfo
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_CMASK_INFO_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            returnCode = ComputeCmaskInfo(pIn->flags,
                                          pIn->pitch,
                                          pIn->height,
                                          pIn->numSlices,
                                          pIn->isLinear,
                                          pIn->pTileInfo,
                                          &pOut->pitch,
                                          &pOut->height,
                                          &pOut->cmaskBytes,
                                          &pOut->macroWidth,
                                          &pOut->macroHeight,
                                          &pOut->sliceSize,
                                          &pOut->baseAlign,
                                          &pOut->blockMax);
        }
    }

    ValidMetaBaseAlignments(pOut->baseAlign);

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeDccInfo
*
*   @brief
*       Interface function to compute DCC key info
*
*   @return
*       return code of HwlComputeDccInfo
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeDccInfo(
    const ADDR_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
    ADDR_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT)))
        {
            ret = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (ret == ADDR_OK)
    {
        ADDR_COMPUTE_DCCINFO_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;

            ret = HwlSetupTileCfg(input.bpp, input.tileIndex, input.macroModeIndex,
                                  &input.tileInfo, &input.tileMode);

            pIn = &input;
        }

        if (ret == ADDR_OK)
        {
            ret = HwlComputeDccInfo(pIn, pOut);

            ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
        }
    }

    return ret;
}

/**
****************************************************************************************************
*   Lib::ComputeHtileAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeHtileAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            if (pIn->flags.tcCompatible)
            {
                HwlComputeHtileAddrFromCoord(pIn, pOut);
            }
            else
            {
                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
                                                          pIn->height,
                                                          pIn->x,
                                                          pIn->y,
                                                          pIn->slice,
                                                          pIn->numSlices,
                                                          1,
                                                          pIn->isLinear,
                                                          isWidth8,
                                                          isHeight8,
                                                          pIn->pTileInfo,
                                                          &pOut->bitPosition);
            }
        }
    }

    return returnCode;

}

/**
****************************************************************************************************
*   Lib::ComputeHtileCoordFromAddr
*
*   @brief
*       Interface function stub of AddrComputeHtileCoordFromAddr
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr(
    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            HwlComputeXmaskCoordFromAddr(pIn->addr,
                                         pIn->bitPosition,
                                         pIn->pitch,
                                         pIn->height,
                                         pIn->numSlices,
                                         1,
                                         pIn->isLinear,
                                         isWidth8,
                                         isHeight8,
                                         pIn->pTileInfo,
                                         &pOut->x,
                                         &pOut->y,
                                         &pOut->slice);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeCmaskAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeCmaskAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord(
    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            if (pIn->flags.tcCompatible == TRUE)
            {
                returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
            }
            else
            {
                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
                                                          pIn->height,
                                                          pIn->x,
                                                          pIn->y,
                                                          pIn->slice,
                                                          pIn->numSlices,
                                                          2,
                                                          pIn->isLinear,
                                                          FALSE, //this is cmask, isWidth8 is not needed
                                                          FALSE, //this is cmask, isHeight8 is not needed
                                                          pIn->pTileInfo,
                                                          &pOut->bitPosition);
            }

        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeCmaskCoordFromAddr
*
*   @brief
*       Interface function stub of AddrComputeCmaskCoordFromAddr
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr(
    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) ||
            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        ADDR_TILEINFO tileInfoNull;
        ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input;

        if (UseTileIndex(pIn->tileIndex))
        {
            input = *pIn;
            // Use temp tile info for calcalation
            input.pTileInfo = &tileInfoNull;

            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);

            // Change the input structure
            pIn = &input;
        }

        if (returnCode == ADDR_OK)
        {
            HwlComputeXmaskCoordFromAddr(pIn->addr,
                                         pIn->bitPosition,
                                         pIn->pitch,
                                         pIn->height,
                                         pIn->numSlices,
                                         2,
                                         pIn->isLinear,
                                         FALSE,
                                         FALSE,
                                         pIn->pTileInfo,
                                         &pOut->x,
                                         &pOut->y,
                                         &pOut->slice);
        }
    }

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeTileDataWidthAndHeight
*
*   @brief
*       Compute the squared cache shape for per-tile data (CMASK and HTILE)
*
*   @return
*       N/A
*
*   @note
*       MacroWidth and macroHeight are measured in pixels
****************************************************************************************************
*/
VOID Lib::ComputeTileDataWidthAndHeight(
    UINT_32         bpp,             ///< [in] bits per pixel
    UINT_32         cacheBits,       ///< [in] bits of cache
    ADDR_TILEINFO*  pTileInfo,       ///< [in] Tile info
    UINT_32*        pMacroWidth,     ///< [out] macro tile width
    UINT_32*        pMacroHeight     ///< [out] macro tile height
    ) const
{
    UINT_32 height = 1;
    UINT_32 width  = cacheBits / bpp;
    UINT_32 pipes  = HwlGetPipes(pTileInfo);

    // Double height until the macro-tile is close to square
    // Height can only be doubled if width is even

    while ((width > height * 2 * pipes) && !(width & 1))
    {
        width  /= 2;
        height *= 2;
    }

    *pMacroWidth  = 8 * width;
    *pMacroHeight = 8 * height * pipes;

    // Note: The above iterative comptuation is equivalent to the following
    //
    //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2);
    //int macroHeight = pow2( 3+log2(pipes)+log2_height );
}

/**
****************************************************************************************************
*   Lib::HwlComputeTileDataWidthAndHeightLinear
*
*   @brief
*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
*
*   @return
*       N/A
*
*   @note
*       MacroWidth and macroHeight are measured in pixels
****************************************************************************************************
*/
VOID Lib::HwlComputeTileDataWidthAndHeightLinear(
    UINT_32*        pMacroWidth,     ///< [out] macro tile width
    UINT_32*        pMacroHeight,    ///< [out] macro tile height
    UINT_32         bpp,             ///< [in] bits per pixel
    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
    ) const
{
    ADDR_ASSERT(bpp != 4);              // Cmask does not support linear layout prior to SI
    *pMacroWidth  = 8 * 512 / bpp;      // Align width to 512-bit memory accesses
    *pMacroHeight = 8 * m_pipes;        // Align height to number of pipes
}

/**
****************************************************************************************************
*   Lib::ComputeHtileInfo
*
*   @brief
*       Compute htile pitch,width, bytes per 2D slice
*
*   @return
*       Htile bpp i.e. How many bits for an 8x8 tile
*       Also returns by output parameters:
*       *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size*
****************************************************************************************************
*/
UINT_32 Lib::ComputeHtileInfo(
    ADDR_HTILE_FLAGS flags,             ///< [in] htile flags
    UINT_32          pitchIn,           ///< [in] pitch input
    UINT_32          heightIn,          ///< [in] height input
    UINT_32          numSlices,         ///< [in] number of slices
    BOOL_32          isLinear,          ///< [in] if it is linear mode
    BOOL_32          isWidth8,          ///< [in] if htile block width is 8
    BOOL_32          isHeight8,         ///< [in] if htile block height is 8
    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
    UINT_32*         pPitchOut,         ///< [out] pitch output
    UINT_32*         pHeightOut,        ///< [out] height output
    UINT_64*         pHtileBytes,       ///< [out] bytes per 2D slice
    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
    UINT_32*         pBaseAlign         ///< [out] base alignment
    ) const
{

    UINT_32 macroWidth;
    UINT_32 macroHeight;
    UINT_32 baseAlign;
    UINT_64 surfBytes;
    UINT_64 sliceBytes;

    numSlices = Max(1u, numSlices);

    const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8);
    const UINT_32 cacheBits = HtileCacheBits;

    if (isLinear)
    {
        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
                                               &macroHeight,
                                               bpp,
                                               pTileInfo);
    }
    else
    {
        ComputeTileDataWidthAndHeight(bpp,
                                      cacheBits,
                                      pTileInfo,
                                      &macroWidth,
                                      &macroHeight);
    }

    *pPitchOut = PowTwoAlign(pitchIn,  macroWidth);
    *pHeightOut = PowTwoAlign(heightIn,  macroHeight);

    baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo);

    surfBytes = HwlComputeHtileBytes(*pPitchOut,
                                     *pHeightOut,
                                     bpp,
                                     isLinear,
                                     numSlices,
                                     &sliceBytes,
                                     baseAlign);

    *pHtileBytes = surfBytes;

    //
    // Use SafeAssign since they are optional
    //
    SafeAssign(pMacroWidth, macroWidth);

    SafeAssign(pMacroHeight, macroHeight);

    SafeAssign(pSliceSize,  sliceBytes);

    SafeAssign(pBaseAlign, baseAlign);

    return bpp;
}

/**
****************************************************************************************************
*   Lib::ComputeCmaskBaseAlign
*
*   @brief
*       Compute cmask base alignment
*
*   @return
*       Cmask base alignment
****************************************************************************************************
*/
UINT_32 Lib::ComputeCmaskBaseAlign(
    ADDR_CMASK_FLAGS flags,           ///< [in] Cmask flags
    ADDR_TILEINFO*   pTileInfo        ///< [in] Tile info
    ) const
{
    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);

    if (flags.tcCompatible)
    {
        ADDR_ASSERT(pTileInfo != NULL);
        if (pTileInfo)
        {
            baseAlign *= pTileInfo->banks;
        }
    }

    return baseAlign;
}

/**
****************************************************************************************************
*   Lib::ComputeCmaskBytes
*
*   @brief
*       Compute cmask size in bytes
*
*   @return
*       Cmask size in bytes
****************************************************************************************************
*/
UINT_64 Lib::ComputeCmaskBytes(
    UINT_32 pitch,        ///< [in] pitch
    UINT_32 height,       ///< [in] height
    UINT_32 numSlices     ///< [in] number of slices
    ) const
{
    return BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * numSlices * CmaskElemBits) /
        MicroTilePixels;
}

/**
****************************************************************************************************
*   Lib::ComputeCmaskInfo
*
*   @brief
*       Compute cmask pitch,width, bytes per 2D slice
*
*   @return
*       BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes,
*       macro-tile dimensions
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
    ADDR_CMASK_FLAGS flags,            ///< [in] cmask flags
    UINT_32          pitchIn,           ///< [in] pitch input
    UINT_32          heightIn,          ///< [in] height input
    UINT_32          numSlices,         ///< [in] number of slices
    BOOL_32          isLinear,          ///< [in] is linear mode
    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
    UINT_32*         pPitchOut,         ///< [out] pitch output
    UINT_32*         pHeightOut,        ///< [out] height output
    UINT_64*         pCmaskBytes,       ///< [out] bytes per 2D slice
    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
    UINT_32*         pBaseAlign,        ///< [out] base alignment
    UINT_32*         pBlockMax          ///< [out] block max == slice / 128 / 128 - 1
    ) const
{
    UINT_32 macroWidth;
    UINT_32 macroHeight;
    UINT_32 baseAlign;
    UINT_64 surfBytes;
    UINT_64 sliceBytes;

    numSlices = Max(1u, numSlices);

    const UINT_32 bpp = CmaskElemBits;
    const UINT_32 cacheBits = CmaskCacheBits;

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (isLinear)
    {
        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
                                               &macroHeight,
                                               bpp,
                                               pTileInfo);
    }
    else
    {
        ComputeTileDataWidthAndHeight(bpp,
                                      cacheBits,
                                      pTileInfo,
                                      &macroWidth,
                                      &macroHeight);
    }

    *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1);
    *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1);


    sliceBytes = ComputeCmaskBytes(*pPitchOut,
                                   *pHeightOut,
                                   1);

    baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo);

    while (sliceBytes % baseAlign)
    {
        *pHeightOut += macroHeight;

        sliceBytes = ComputeCmaskBytes(*pPitchOut,
                                       *pHeightOut,
                                       1);
    }

    surfBytes = sliceBytes * numSlices;

    *pCmaskBytes = surfBytes;

    //
    // Use SafeAssign since they are optional
    //
    SafeAssign(pMacroWidth, macroWidth);

    SafeAssign(pMacroHeight, macroHeight);

    SafeAssign(pBaseAlign, baseAlign);

    SafeAssign(pSliceSize, sliceBytes);

    UINT_32 slice = (*pPitchOut) * (*pHeightOut);
    UINT_32 blockMax = slice / 128 / 128 - 1;

#if DEBUG
    if (slice % (64*256) != 0)
    {
        ADDR_ASSERT_ALWAYS();
    }
#endif //DEBUG

    UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax();

    if (blockMax > maxBlockMax)
    {
        blockMax = maxBlockMax;
        returnCode = ADDR_INVALIDPARAMS;
    }

    SafeAssign(pBlockMax, blockMax);

    return returnCode;
}

/**
****************************************************************************************************
*   Lib::ComputeXmaskCoordYFromPipe
*
*   @brief
*       Compute the Y coord from pipe number for cmask/htile
*
*   @return
*       Y coordinate
*
****************************************************************************************************
*/
UINT_32 Lib::ComputeXmaskCoordYFromPipe(
    UINT_32         pipe,       ///< [in] pipe number
    UINT_32         x           ///< [in] x coordinate
    ) const
{
    UINT_32 pipeBit0;
    UINT_32 pipeBit1;
    UINT_32 xBit0;
    UINT_32 xBit1;
    UINT_32 yBit0;
    UINT_32 yBit1;

    UINT_32 y = 0;

    UINT_32 numPipes = m_pipes; // SI has its implementation
    //
    // Convert pipe + x to y coordinate.
    //
    switch (numPipes)
    {
        case 1:
            //
            // 1 pipe
            //
            // p0 = 0
            //
            y = 0;
            break;
        case 2:
            //
            // 2 pipes
            //
            // p0 = x0 ^ y0
            //
            // y0 = p0 ^ x0
            //
            pipeBit0 = pipe & 0x1;

            xBit0 = x & 0x1;

            yBit0 = pipeBit0 ^ xBit0;

            y = yBit0;
            break;
        case 4:
            //
            // 4 pipes
            //
            // p0 = x1 ^ y0
            // p1 = x0 ^ y1
            //
            // y0 = p0 ^ x1
            // y1 = p1 ^ x0
            //
            pipeBit0 =  pipe & 0x1;
            pipeBit1 = (pipe & 0x2) >> 1;

            xBit0 =  x & 0x1;
            xBit1 = (x & 0x2) >> 1;

            yBit0 = pipeBit0 ^ xBit1;
            yBit1 = pipeBit1 ^ xBit0;

            y = (yBit0 |
                 (yBit1 << 1));
            break;
        case 8:
            //
            // 8 pipes
            //
            // r600 and r800 have different method
            //
            y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x);
            break;
        default:
            break;
    }
    return y;
}

/**
****************************************************************************************************
*   Lib::HwlComputeXmaskCoordFromAddr
*
*   @brief
*       Compute the coord from an address of a cmask/htile
*
*   @return
*       N/A
*
*   @note
*       This method is reused by htile, so rename to Xmask
****************************************************************************************************
*/
VOID Lib::HwlComputeXmaskCoordFromAddr(
    UINT_64         addr,           ///< [in] address
    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
    UINT_32         pitch,          ///< [in] pitch
    UINT_32         height,         ///< [in] height
    UINT_32         numSlices,      ///< [in] number of slices
    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
    BOOL_32         isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
    BOOL_32         isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
    UINT_32*        pX,             ///< [out] x coord
    UINT_32*        pY,             ///< [out] y coord
    UINT_32*        pSlice          ///< [out] slice index
    ) const
{
    UINT_32 pipe;
    UINT_32 numPipes;
    UINT_32 numGroupBits;
    UINT_32 numPipeBits;
    UINT_32 macroTilePitch;
    UINT_32 macroTileHeight;

    UINT_64 bitAddr;

    UINT_32 microTileCoordY;

    UINT_32 elemBits;

    UINT_32 pitchAligned = pitch;
    UINT_32 heightAligned = height;
    UINT_64 totalBytes;

    UINT_64 elemOffset;

    UINT_64 macroIndex;
    UINT_32 microIndex;

    UINT_64 macroNumber;
    UINT_32 microNumber;

    UINT_32 macroX;
    UINT_32 macroY;
    UINT_32 macroZ;

    UINT_32 microX;
    UINT_32 microY;

    UINT_32 tilesPerMacro;
    UINT_32 macrosPerPitch;
    UINT_32 macrosPerSlice;

    //
    // Extract pipe.
    //
    numPipes = HwlGetPipes(pTileInfo);
    pipe = ComputePipeFromAddr(addr, numPipes);

    //
    // Compute the number of group and pipe bits.
    //
    numGroupBits = Log2(m_pipeInterleaveBytes);
    numPipeBits  = Log2(numPipes);

    UINT_32 groupBits = 8 * m_pipeInterleaveBytes;
    UINT_32 pipes = numPipes;


    //
    // Compute the micro tile size, in bits. And macro tile pitch and height.
    //
    if (factor == 2) //CMASK
    {
        ADDR_CMASK_FLAGS flags = {{0}};

        elemBits = CmaskElemBits;

        ComputeCmaskInfo(flags,
                         pitch,
                         height,
                         numSlices,
                         isLinear,
                         pTileInfo,
                         &pitchAligned,
                         &heightAligned,
                         &totalBytes,
                         &macroTilePitch,
                         &macroTileHeight);
    }
    else  //HTILE
    {
        ADDR_HTILE_FLAGS flags = {{0}};

        if (factor != 1)
        {
            factor = 1;
        }

        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);

        ComputeHtileInfo(flags,
                         pitch,
                         height,
                         numSlices,
                         isLinear,
                         isWidth8,
                         isHeight8,
                         pTileInfo,
                         &pitchAligned,
                         &heightAligned,
                         &totalBytes,
                         &macroTilePitch,
                         &macroTileHeight);
    }

    // Should use aligned dims
    //
    pitch = pitchAligned;
    height = heightAligned;


    //
    // Convert byte address to bit address.
    //
    bitAddr = BYTES_TO_BITS(addr) + bitPosition;


    //
    // Remove pipe bits from address.
    //

    bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits);


    elemOffset = bitAddr / elemBits;

    tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits;

    macrosPerPitch = pitch / (macroTilePitch/factor);
    macrosPerSlice = macrosPerPitch * height / macroTileHeight;

    macroIndex = elemOffset / factor / tilesPerMacro;
    microIndex = static_cast<UINT_32>(elemOffset % (tilesPerMacro * factor));

    macroNumber = macroIndex * factor + microIndex % factor;
    microNumber = microIndex / factor;

    macroX = static_cast<UINT_32>((macroNumber % macrosPerPitch));
    macroY = static_cast<UINT_32>((macroNumber % macrosPerSlice) / macrosPerPitch);
    macroZ = static_cast<UINT_32>((macroNumber / macrosPerSlice));


    microX = microNumber % (macroTilePitch / factor / MicroTileWidth);
    microY = (microNumber / (macroTilePitch / factor / MicroTileHeight));

    *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth;
    *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits);
    *pSlice = macroZ;

    microTileCoordY = ComputeXmaskCoordYFromPipe(pipe,
                                                 *pX/MicroTileWidth);


    //
    // Assemble final coordinates.
    //
    *pY += microTileCoordY * MicroTileHeight;

}

/**
****************************************************************************************************
*   Lib::HwlComputeXmaskAddrFromCoord
*
*   @brief
*       Compute the address from an address of cmask (prior to si)
*
*   @return
*       Address in bytes
*
****************************************************************************************************
*/
UINT_64 Lib::HwlComputeXmaskAddrFromCoord(
    UINT_32        pitch,          ///< [in] pitch
    UINT_32        height,         ///< [in] height
    UINT_32        x,              ///< [in] x coord
    UINT_32        y,              ///< [in] y coord
    UINT_32        slice,          ///< [in] slice/depth index
    UINT_32        numSlices,      ///< [in] number of slices
    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
    ) const
{
    UINT_64 addr;
    UINT_32 numGroupBits;
    UINT_32 numPipeBits;
    UINT_32 newPitch = 0;
    UINT_32 newHeight = 0;
    UINT_64 sliceBytes = 0;
    UINT_64 totalBytes = 0;
    UINT_64 sliceOffset;
    UINT_32 pipe;
    UINT_32 macroTileWidth;
    UINT_32 macroTileHeight;
    UINT_32 macroTilesPerRow;
    UINT_32 macroTileBytes;
    UINT_32 macroTileIndexX;
    UINT_32 macroTileIndexY;
    UINT_64 macroTileOffset;
    UINT_32 pixelBytesPerRow;
    UINT_32 pixelOffsetX;
    UINT_32 pixelOffsetY;
    UINT_32 pixelOffset;
    UINT_64 totalOffset;
    UINT_64 offsetLo;
    UINT_64 offsetHi;
    UINT_64 groupMask;


    UINT_32 elemBits = 0;

    UINT_32 numPipes = m_pipes; // This function is accessed prior to si only

    if (factor == 2) //CMASK
    {
        elemBits = CmaskElemBits;

        // For asics before SI, cmask is always tiled
        isLinear = FALSE;
    }
    else //HTILE
    {
        if (factor != 1) // Fix compile warning
        {
            factor = 1;
        }

        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
    }

    //
    // Compute the number of group bits and pipe bits.
    //
    numGroupBits = Log2(m_pipeInterleaveBytes);
    numPipeBits  = Log2(numPipes);

    //
    // Compute macro tile dimensions.
    //
    if (factor == 2) // CMASK
    {
        ADDR_CMASK_FLAGS flags = {{0}};

        ComputeCmaskInfo(flags,
                         pitch,
                         height,
                         numSlices,
                         isLinear,
                         pTileInfo,
                         &newPitch,
                         &newHeight,
                         &totalBytes,
                         &macroTileWidth,
                         &macroTileHeight);

        sliceBytes = totalBytes / numSlices;
    }
    else // HTILE
    {
        ADDR_HTILE_FLAGS flags = {{0}};

        ComputeHtileInfo(flags,
                         pitch,
                         height,
                         numSlices,
                         isLinear,
                         isWidth8,
                         isHeight8,
                         pTileInfo,
                         &newPitch,
                         &newHeight,
                         &totalBytes,
                         &macroTileWidth,
                         &macroTileHeight,
                         &sliceBytes);
    }

    sliceOffset = slice * sliceBytes;

    //
    // Get the pipe.  Note that neither slice rotation nor pipe swizzling apply for CMASK.
    //
    pipe = ComputePipeFromCoord(x,
                                y,
                                0,
                                ADDR_TM_2D_TILED_THIN1,
                                0,
                                FALSE,
                                pTileInfo);

    //
    // Compute the number of macro tiles per row.
    //
    macroTilesPerRow = newPitch / macroTileWidth;

    //
    // Compute the number of bytes per macro tile.
    //
    macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels);

    //
    // Compute the offset to the macro tile containing the specified coordinate.
    //
    macroTileIndexX = x / macroTileWidth;
    macroTileIndexY = y / macroTileHeight;
    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;

    //
    // Compute the pixel offset within the macro tile.
    //
    pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth;

    //
    // The nibbles are interleaved (see below), so the part of the offset relative to the x
    // coordinate repeats halfway across the row. (Not for HTILE)
    //
    if (factor == 2)
    {
        pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth;
    }
    else
    {
        pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits);
    }

    //
    // Compute the y offset within the macro tile.
    //
    pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow;

    pixelOffset = pixelOffsetX + pixelOffsetY;

    //
    // Combine the slice offset and macro tile offset with the pixel offset, accounting for the
    // pipe bits in the middle of the address.
    //
    totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset;

    //
    // Split the offset to put some bits below the pipe bits and some above.
    //
    groupMask = (1 << numGroupBits) - 1;
    offsetLo  = totalOffset &  groupMask;
    offsetHi  = (totalOffset & ~groupMask) << numPipeBits;

    //
    // Assemble the address from its components.
    //
    addr  = offsetLo;
    addr |= offsetHi;
    // This is to remove warning with /analyze option
    UINT_32 pipeBits = pipe << numGroupBits;
    addr |= pipeBits;

    //
    // Compute the bit position.  The lower nibble is used when the x coordinate within the macro
    // tile is less than half of the macro tile width, and the upper nibble is used when the x
    // coordinate within the macro tile is greater than or equal to half the macro tile width.
    //
    *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4;

    return addr;
}

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Surface Addressing Shared
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
****************************************************************************************************
*   Lib::ComputeSurfaceAddrFromCoordLinear
*
*   @brief
*       Compute address from coord for linear surface
*
*   @return
*       Address in bytes
*
****************************************************************************************************
*/
UINT_64 Lib::ComputeSurfaceAddrFromCoordLinear(
    UINT_32  x,              ///< [in] x coord
    UINT_32  y,              ///< [in] y coord
    UINT_32  slice,          ///< [in] slice/depth index
    UINT_32  sample,         ///< [in] sample index
    UINT_32  bpp,            ///< [in] bits per pixel
    UINT_32  pitch,          ///< [in] pitch
    UINT_32  height,         ///< [in] height
    UINT_32  numSlices,      ///< [in] number of slices
    UINT_32* pBitPosition    ///< [out] bit position inside a byte
    ) const
{
    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;

    UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize;
    UINT_64 rowOffset   = static_cast<UINT_64>(y) * pitch;
    UINT_64 pixOffset   = x;

    UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp;

    *pBitPosition = static_cast<UINT_32>(addr % 8);
    addr /= 8;

    return addr;
}

/**
****************************************************************************************************
*   Lib::ComputeSurfaceCoordFromAddrLinear
*
*   @brief
*       Compute the coord from an address of a linear surface
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID Lib::ComputeSurfaceCoordFromAddrLinear(
    UINT_64  addr,           ///< [in] address
    UINT_32  bitPosition,    ///< [in] bitPosition in a byte
    UINT_32  bpp,            ///< [in] bits per pixel
    UINT_32  pitch,          ///< [in] pitch
    UINT_32  height,         ///< [in] height
    UINT_32  numSlices,      ///< [in] number of slices
    UINT_32* pX,             ///< [out] x coord
    UINT_32* pY,             ///< [out] y coord
    UINT_32* pSlice,         ///< [out] slice/depth index
    UINT_32* pSample         ///< [out] sample index
    ) const
{
    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
    const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp;

    *pX = static_cast<UINT_32>((linearOffset % sliceSize) % pitch);
    *pY = static_cast<UINT_32>((linearOffset % sliceSize) / pitch % height);
    *pSlice  = static_cast<UINT_32>((linearOffset / sliceSize) % numSlices);
    *pSample = static_cast<UINT_32>((linearOffset / sliceSize) / numSlices);
}

/**
****************************************************************************************************
*   Lib::ComputeSurfaceCoordFromAddrMicroTiled
*
*   @brief
*       Compute the coord from an address of a micro tiled surface
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID Lib::ComputeSurfaceCoordFromAddrMicroTiled(
    UINT_64         addr,               ///< [in] address
    UINT_32         bitPosition,        ///< [in] bitPosition in a byte
    UINT_32         bpp,                ///< [in] bits per pixel
    UINT_32         pitch,              ///< [in] pitch
    UINT_32         height,             ///< [in] height
    UINT_32         numSamples,         ///< [in] number of samples
    AddrTileMode    tileMode,           ///< [in] tile mode
    UINT_32         tileBase,           ///< [in] base offset within a tile
    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
    UINT_32*        pX,                 ///< [out] x coord
    UINT_32*        pY,                 ///< [out] y coord
    UINT_32*        pSlice,             ///< [out] slice/depth index
    UINT_32*        pSample,            ///< [out] sample index,
    AddrTileType    microTileType,      ///< [in] micro tiling order
    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if in depth sample order
    ) const
{
    UINT_64 bitAddr;
    UINT_32 microTileThickness;
    UINT_32 microTileBits;
    UINT_64 sliceBits;
    UINT_64 rowBits;
    UINT_32 sliceIndex;
    UINT_32 microTileCoordX;
    UINT_32 microTileCoordY;
    UINT_32 pixelOffset;
    UINT_32 pixelCoordX = 0;
    UINT_32 pixelCoordY = 0;
    UINT_32 pixelCoordZ = 0;
    UINT_32 pixelCoordS = 0;

    //
    // Convert byte address to bit address.
    //
    bitAddr = BYTES_TO_BITS(addr) + bitPosition;

    //
    // Compute the micro tile size, in bits.
    //
    switch (tileMode)
    {
        case ADDR_TM_1D_TILED_THICK:
            microTileThickness = ThickTileThickness;
            break;
        default:
            microTileThickness = 1;
            break;
    }

    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;

    //
    // Compute number of bits per slice and number of bits per row of micro tiles.
    //
    sliceBits = static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples;

    rowBits   = (pitch / MicroTileWidth) * microTileBits;

    //
    // Extract the slice index.
    //
    sliceIndex = static_cast<UINT_32>(bitAddr / sliceBits);
    bitAddr -= sliceIndex * sliceBits;

    //
    // Extract the y coordinate of the micro tile.
    //
    microTileCoordY = static_cast<UINT_32>(bitAddr / rowBits) * MicroTileHeight;
    bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits;

    //
    // Extract the x coordinate of the micro tile.
    //
    microTileCoordX = static_cast<UINT_32>(bitAddr / microTileBits) * MicroTileWidth;

    //
    // Compute the pixel offset within the micro tile.
    //
    pixelOffset = static_cast<UINT_32>(bitAddr % microTileBits);

    //
    // Extract pixel coordinates from the offset.
    //
    HwlComputePixelCoordFromOffset(pixelOffset,
                                   bpp,
                                   numSamples,
                                   tileMode,
                                   tileBase,
                                   compBits,
                                   &pixelCoordX,
                                   &pixelCoordY,
                                   &pixelCoordZ,
                                   &pixelCoordS,
                                   microTileType,
                                   isDepthSampleOrder);

    //
    // Assemble final coordinates.
    //
    *pX     = microTileCoordX + pixelCoordX;
    *pY     = microTileCoordY + pixelCoordY;
    *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ;
    *pSample = pixelCoordS;

    if (microTileThickness > 1)
    {
        *pSample = 0;
    }
}

/**
****************************************************************************************************
*   Lib::ComputePipeFromAddr
*
*   @brief
*       Compute the pipe number from an address
*
*   @return
*       Pipe number
*
****************************************************************************************************
*/
UINT_32 Lib::ComputePipeFromAddr(
    UINT_64 addr,        ///< [in] address
    UINT_32 numPipes     ///< [in] number of banks
    ) const
{
    UINT_32 pipe;

    UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms

    // R600
    // The LSBs of the address are arranged as follows:
    //   bank | pipe | group
    //
    // To get the pipe number, shift off the group bits and mask the pipe bits.
    //

    // R800
    // The LSBs of the address are arranged as follows:
    //   bank | bankInterleave | pipe | pipeInterleave
    //
    // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits.
    //

    pipe = static_cast<UINT_32>(addr >> Log2(groupBytes)) & (numPipes - 1);

    return pipe;
}

/**
****************************************************************************************************
*   Lib::ComputeMicroTileEquation
*
*   @brief
*       Compute micro tile equation
*
*   @return
*       If equation can be computed
*
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeMicroTileEquation(
    UINT_32         log2BytesPP,    ///< [in] log2 of bytes per pixel
    AddrTileMode    tileMode,       ///< [in] tile mode
    AddrTileType    microTileType,  ///< [in] pixel order in display/non-display mode
    ADDR_EQUATION*  pEquation       ///< [out] equation
    ) const
{
    ADDR_E_RETURNCODE retCode = ADDR_OK;

    for (UINT_32 i = 0; i < log2BytesPP; i++)
    {
        pEquation->addr[i].valid = 1;
        pEquation->addr[i].channel = 0;
        pEquation->addr[i].index = i;
    }

    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[log2BytesPP];

    ADDR_CHANNEL_SETTING x0 = InitChannel(1, 0, log2BytesPP + 0);
    ADDR_CHANNEL_SETTING x1 = InitChannel(1, 0, log2BytesPP + 1);
    ADDR_CHANNEL_SETTING x2 = InitChannel(1, 0, log2BytesPP + 2);
    ADDR_CHANNEL_SETTING y0 = InitChannel(1, 1, 0);
    ADDR_CHANNEL_SETTING y1 = InitChannel(1, 1, 1);
    ADDR_CHANNEL_SETTING y2 = InitChannel(1, 1, 2);
    ADDR_CHANNEL_SETTING z0 = InitChannel(1, 2, 0);
    ADDR_CHANNEL_SETTING z1 = InitChannel(1, 2, 1);
    ADDR_CHANNEL_SETTING z2 = InitChannel(1, 2, 2);

    UINT_32 thickness = Thickness(tileMode);
    UINT_32 bpp = 1 << (log2BytesPP + 3);

    if (microTileType != ADDR_THICK)
    {
        if (microTileType == ADDR_DISPLAYABLE)
        {
            switch (bpp)
            {
                case 8:
                    pixelBit[0] = x0;
                    pixelBit[1] = x1;
                    pixelBit[2] = x2;
                    pixelBit[3] = y1;
                    pixelBit[4] = y0;
                    pixelBit[5] = y2;
                    break;
                case 16:
                    pixelBit[0] = x0;
                    pixelBit[1] = x1;
                    pixelBit[2] = x2;
                    pixelBit[3] = y0;
                    pixelBit[4] = y1;
                    pixelBit[5] = y2;
                    break;
                case 32:
                    pixelBit[0] = x0;
                    pixelBit[1] = x1;
                    pixelBit[2] = y0;
                    pixelBit[3] = x2;
                    pixelBit[4] = y1;
                    pixelBit[5] = y2;
                    break;
                case 64:
                    pixelBit[0] = x0;
                    pixelBit[1] = y0;
                    pixelBit[2] = x1;
                    pixelBit[3] = x2;
                    pixelBit[4] = y1;
                    pixelBit[5] = y2;
                    break;
                case 128:
                    pixelBit[0] = y0;
                    pixelBit[1] = x0;
                    pixelBit[2] = x1;
                    pixelBit[3] = x2;
                    pixelBit[4] = y1;
                    pixelBit[5] = y2;
                    break;
                default:
                    ADDR_ASSERT_ALWAYS();
                    break;
            }
        }
        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
        {
            pixelBit[0] = x0;
            pixelBit[1] = y0;
            pixelBit[2] = x1;
            pixelBit[3] = y1;
            pixelBit[4] = x2;
            pixelBit[5] = y2;
        }
        else if (microTileType == ADDR_ROTATED)
        {
            ADDR_ASSERT(thickness == 1);

            switch (bpp)
            {
                case 8:
                    pixelBit[0] = y0;
                    pixelBit[1] = y1;
                    pixelBit[2] = y2;
                    pixelBit[3] = x1;
                    pixelBit[4] = x0;
                    pixelBit[5] = x2;
                    break;
                case 16:
                    pixelBit[0] = y0;
                    pixelBit[1] = y1;
                    pixelBit[2] = y2;
                    pixelBit[3] = x0;
                    pixelBit[4] = x1;
                    pixelBit[5] = x2;
                    break;
                case 32:
                    pixelBit[0] = y0;
                    pixelBit[1] = y1;
                    pixelBit[2] = x0;
                    pixelBit[3] = y2;
                    pixelBit[4] = x1;
                    pixelBit[5] = x2;
                    break;
                case 64:
                    pixelBit[0] = y0;
                    pixelBit[1] = x0;
                    pixelBit[2] = y1;
                    pixelBit[3] = x1;
                    pixelBit[4] = x2;
                    pixelBit[5] = y2;
                    break;
                default:
                    retCode = ADDR_NOTSUPPORTED;
                    break;
            }
        }

        if (thickness > 1)
        {
            pixelBit[6] = z0;
            pixelBit[7] = z1;
            pEquation->numBits = 8 + log2BytesPP;
        }
        else
        {
            pEquation->numBits = 6 + log2BytesPP;
        }
    }
    else // ADDR_THICK
    {
        ADDR_ASSERT(thickness > 1);

        switch (bpp)
        {
            case 8:
            case 16:
                pixelBit[0] = x0;
                pixelBit[1] = y0;
                pixelBit[2] = x1;
                pixelBit[3] = y1;
                pixelBit[4] = z0;
                pixelBit[5] = z1;
                break;
            case 32:
                pixelBit[0] = x0;
                pixelBit[1] = y0;
                pixelBit[2] = x1;
                pixelBit[3] = z0;
                pixelBit[4] = y1;
                pixelBit[5] = z1;
                break;
            case 64:
            case 128:
                pixelBit[0] = x0;
                pixelBit[1] = y0;
                pixelBit[2] = z0;
                pixelBit[3] = x1;
                pixelBit[4] = y1;
                pixelBit[5] = z1;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        pixelBit[6] = x2;
        pixelBit[7] = y2;
        pEquation->numBits = 8 + log2BytesPP;
    }

    if (thickness == 8)
    {
        pixelBit[8] = z2;
        pEquation->numBits = 9 + log2BytesPP;
    }

    // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices,
    // which is not supported by our address lib
    pEquation->stackedDepthSlices = FALSE;
    pEquation->numBitComponents   = 1;

    return retCode;
}

/**
****************************************************************************************************
*   Lib::ComputePixelIndexWithinMicroTile
*
*   @brief
*       Compute the pixel index inside a micro tile of surface
*
*   @return
*       Pixel index
*
****************************************************************************************************
*/
UINT_32 Lib::ComputePixelIndexWithinMicroTile(
    UINT_32         x,              ///< [in] x coord
    UINT_32         y,              ///< [in] y coord
    UINT_32         z,              ///< [in] slice/depth index
    UINT_32         bpp,            ///< [in] bits per pixel
    AddrTileMode    tileMode,       ///< [in] tile mode
    AddrTileType    microTileType   ///< [in] pixel order in display/non-display mode
    ) const
{
    UINT_32 pixelBit0 = 0;
    UINT_32 pixelBit1 = 0;
    UINT_32 pixelBit2 = 0;
    UINT_32 pixelBit3 = 0;
    UINT_32 pixelBit4 = 0;
    UINT_32 pixelBit5 = 0;
    UINT_32 pixelBit6 = 0;
    UINT_32 pixelBit7 = 0;
    UINT_32 pixelBit8 = 0;
    UINT_32 pixelNumber;

    UINT_32 x0 = _BIT(x, 0);
    UINT_32 x1 = _BIT(x, 1);
    UINT_32 x2 = _BIT(x, 2);
    UINT_32 y0 = _BIT(y, 0);
    UINT_32 y1 = _BIT(y, 1);
    UINT_32 y2 = _BIT(y, 2);
    UINT_32 z0 = _BIT(z, 0);
    UINT_32 z1 = _BIT(z, 1);
    UINT_32 z2 = _BIT(z, 2);

    UINT_32 thickness = Thickness(tileMode);

    // Compute the pixel number within the micro tile.

    if (microTileType != ADDR_THICK)
    {
        if (microTileType == ADDR_DISPLAYABLE)
        {
            switch (bpp)
            {
                case 8:
                    pixelBit0 = x0;
                    pixelBit1 = x1;
                    pixelBit2 = x2;
                    pixelBit3 = y1;
                    pixelBit4 = y0;
                    pixelBit5 = y2;
                    break;
                case 16:
                    pixelBit0 = x0;
                    pixelBit1 = x1;
                    pixelBit2 = x2;
                    pixelBit3 = y0;
                    pixelBit4 = y1;
                    pixelBit5 = y2;
                    break;
                case 32:
                    pixelBit0 = x0;
                    pixelBit1 = x1;
                    pixelBit2 = y0;
                    pixelBit3 = x2;
                    pixelBit4 = y1;
                    pixelBit5 = y2;
                    break;
                case 64:
                    pixelBit0 = x0;
                    pixelBit1 = y0;
                    pixelBit2 = x1;
                    pixelBit3 = x2;
                    pixelBit4 = y1;
                    pixelBit5 = y2;
                    break;
                case 128:
                    pixelBit0 = y0;
                    pixelBit1 = x0;
                    pixelBit2 = x1;
                    pixelBit3 = x2;
                    pixelBit4 = y1;
                    pixelBit5 = y2;
                    break;
                default:
                    ADDR_ASSERT_ALWAYS();
                    break;
            }
        }
        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
        {
            pixelBit0 = x0;
            pixelBit1 = y0;
            pixelBit2 = x1;
            pixelBit3 = y1;
            pixelBit4 = x2;
            pixelBit5 = y2;
        }
        else if (microTileType == ADDR_ROTATED)
        {
            ADDR_ASSERT(thickness == 1);

            switch (bpp)
            {
                case 8:
                    pixelBit0 = y0;
                    pixelBit1 = y1;
                    pixelBit2 = y2;
                    pixelBit3 = x1;
                    pixelBit4 = x0;
                    pixelBit5 = x2;
                    break;
                case 16:
                    pixelBit0 = y0;
                    pixelBit1 = y1;
                    pixelBit2 = y2;
                    pixelBit3 = x0;
                    pixelBit4 = x1;
                    pixelBit5 = x2;
                    break;
                case 32:
                    pixelBit0 = y0;
                    pixelBit1 = y1;
                    pixelBit2 = x0;
                    pixelBit3 = y2;
                    pixelBit4 = x1;
                    pixelBit5 = x2;
                    break;
                case 64:
                    pixelBit0 = y0;
                    pixelBit1 = x0;
                    pixelBit2 = y1;
                    pixelBit3 = x1;
                    pixelBit4 = x2;
                    pixelBit5 = y2;
                    break;
                default:
                    ADDR_ASSERT_ALWAYS();
                    break;
            }
        }

        if (thickness > 1)
        {
            pixelBit6 = z0;
            pixelBit7 = z1;
        }
    }
    else // ADDR_THICK
    {
        ADDR_ASSERT(thickness > 1);

        switch (bpp)
        {
            case 8:
            case 16:
                pixelBit0 = x0;
                pixelBit1 = y0;
                pixelBit2 = x1;
                pixelBit3 = y1;
                pixelBit4 = z0;
                pixelBit5 = z1;
                break;
            case 32:
                pixelBit0 = x0;
                pixelBit1 = y0;
                pixelBit2 = x1;
                pixelBit3 = z0;
                pixelBit4 = y1;
                pixelBit5 = z1;
                break;
            case 64:
            case 128:
                pixelBit0 = x0;
                pixelBit1 = y0;
                pixelBit2 = z0;
                pixelBit3 = x1;
                pixelBit4 = y1;
                pixelBit5 = z1;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        pixelBit6 = x2;
        pixelBit7 = y2;
    }

    if (thickness == 8)
    {
        pixelBit8 = z2;
    }

    pixelNumber = ((pixelBit0     ) |
                   (pixelBit1 << 1) |
                   (pixelBit2 << 2) |
                   (pixelBit3 << 3) |
                   (pixelBit4 << 4) |
                   (pixelBit5 << 5) |
                   (pixelBit6 << 6) |
                   (pixelBit7 << 7) |
                   (pixelBit8 << 8));

    return pixelNumber;
}

/**
****************************************************************************************************
*   Lib::AdjustPitchAlignment
*
*   @brief
*       Adjusts pitch alignment for flipping surface
*
*   @return
*       N/A
*
****************************************************************************************************
*/
VOID Lib::AdjustPitchAlignment(
    ADDR_SURFACE_FLAGS  flags,      ///< [in] Surface flags
    UINT_32*            pPitchAlign ///< [out] Pointer to pitch alignment
    ) const
{
    // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment
    // Maybe it will be fixed in future but let's make it general for now.
    if (flags.display || flags.overlay)
    {
        *pPitchAlign = PowTwoAlign(*pPitchAlign, 32);

        if(flags.display)
        {
            *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign);
        }
    }
}

/**
****************************************************************************************************
*   Lib::PadDimensions
*
*   @brief
*       Helper function to pad dimensions
*
*   @return
*       N/A
*
****************************************************************************************************
*/
VOID Lib::PadDimensions(
    AddrTileMode        tileMode,    ///< [in] tile mode
    UINT_32             bpp,         ///< [in] bits per pixel
    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
    UINT_32             numSamples,  ///< [in] number of samples
    ADDR_TILEINFO*      pTileInfo,   ///< [in,out] bank structure.
    UINT_32             padDims,     ///< [in] Dimensions to pad valid value 1,2,3
    UINT_32             mipLevel,    ///< [in] MipLevel
    UINT_32*            pPitch,      ///< [in,out] pitch in pixels
    UINT_32*            pPitchAlign, ///< [in,out] pitch align could be changed in HwlPadDimensions
    UINT_32*            pHeight,     ///< [in,out] height in pixels
    UINT_32             heightAlign, ///< [in] height alignment
    UINT_32*            pSlices,     ///< [in,out] number of slices
    UINT_32             sliceAlign   ///< [in] number of slice alignment
    ) const
{
    UINT_32 pitchAlign = *pPitchAlign;
    UINT_32 thickness = Thickness(tileMode);

    ADDR_ASSERT(padDims <= 3);

    //
    // Override padding for mip levels
    //
    if (mipLevel > 0)
    {
        if (flags.cube)
        {
            // for cubemap, we only pad when client call with 6 faces as an identity
            if (*pSlices > 1)
            {
                padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture
            }
            else
            {
                padDims = 2;
            }
        }
    }

    // Any possibilities that padDims is 0?
    if (padDims == 0)
    {
        padDims = 3;
    }

    if (IsPow2(pitchAlign))
    {
        *pPitch = PowTwoAlign((*pPitch), pitchAlign);
    }
    else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear
    {
        *pPitch += pitchAlign - 1;
        *pPitch /= pitchAlign;
        *pPitch *= pitchAlign;
    }

    if (padDims > 1)
    {
        if (IsPow2(heightAlign))
        {
            *pHeight = PowTwoAlign((*pHeight), heightAlign);
        }
        else
        {
            *pHeight += heightAlign - 1;
            *pHeight /= heightAlign;
            *pHeight *= heightAlign;
        }
    }

    if (padDims > 2 || thickness > 1)
    {
        // for cubemap single face, we do not pad slices.
        // if we pad it, the slice number should be set to 6 and current mip level > 1
        if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray))
        {
            *pSlices = NextPow2(*pSlices);
        }

        // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test)
        if (thickness > 1)
        {
            *pSlices = PowTwoAlign((*pSlices), sliceAlign);
        }

    }

    HwlPadDimensions(tileMode,
                     bpp,
                     flags,
                     numSamples,
                     pTileInfo,
                     mipLevel,
                     pPitch,
                     pPitchAlign,
                     *pHeight,
                     heightAlign);
}


/**
****************************************************************************************************
*   Lib::HwlPreHandleBaseLvl3xPitch
*
*   @brief
*       Pre-handler of 3x pitch (96 bit) adjustment
*
*   @return
*       Expected pitch
****************************************************************************************************
*/
UINT_32 Lib::HwlPreHandleBaseLvl3xPitch(
    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
    UINT_32                                 expPitch    ///< [in] pitch
    ) const
{
    ADDR_ASSERT(pIn->width == expPitch);
    //
    // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size
    //
    if (ElemLib::IsExpand3x(pIn->format) &&
        pIn->mipLevel == 0 &&
        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
    {
        expPitch /= 3;
        expPitch = NextPow2(expPitch);
    }

    return expPitch;
}

/**
****************************************************************************************************
*   Lib::HwlPostHandleBaseLvl3xPitch
*
*   @brief
*       Post-handler of 3x pitch adjustment
*
*   @return
*       Expected pitch
****************************************************************************************************
*/
UINT_32 Lib::HwlPostHandleBaseLvl3xPitch(
    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
    UINT_32                                 expPitch    ///< [in] pitch
    ) const
{
    //
    // 96 bits surface of sub levels require element pitch of 32 bits instead
    // So we just return pitch in 32 bit pixels without timing 3
    //
    if (ElemLib::IsExpand3x(pIn->format) &&
        pIn->mipLevel == 0 &&
        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
    {
        expPitch *= 3;
    }

    return expPitch;
}


/**
****************************************************************************************************
*   Lib::IsMacroTiled
*
*   @brief
*       Check if the tile mode is macro tiled
*
*   @return
*       TRUE if it is macro tiled (2D/2B/3D/3B)
****************************************************************************************************
*/
BOOL_32 Lib::IsMacroTiled(
    AddrTileMode tileMode)  ///< [in] tile mode
{
   return ModeFlags[tileMode].isMacro;
}

/**
****************************************************************************************************
*   Lib::IsMacro3dTiled
*
*   @brief
*       Check if the tile mode is 3D macro tiled
*
*   @return
*       TRUE if it is 3D macro tiled
****************************************************************************************************
*/
BOOL_32 Lib::IsMacro3dTiled(
    AddrTileMode tileMode)  ///< [in] tile mode
{
    return ModeFlags[tileMode].isMacro3d;
}

/**
****************************************************************************************************
*   Lib::IsMicroTiled
*
*   @brief
*       Check if the tile mode is micro tiled
*
*   @return
*       TRUE if micro tiled
****************************************************************************************************
*/
BOOL_32 Lib::IsMicroTiled(
    AddrTileMode tileMode)  ///< [in] tile mode
{
    return ModeFlags[tileMode].isMicro;
}

/**
****************************************************************************************************
*   Lib::IsLinear
*
*   @brief
*       Check if the tile mode is linear
*
*   @return
*       TRUE if linear
****************************************************************************************************
*/
BOOL_32 Lib::IsLinear(
    AddrTileMode tileMode)  ///< [in] tile mode
{
    return ModeFlags[tileMode].isLinear;
}

/**
****************************************************************************************************
*   Lib::IsPrtNoRotationTileMode
*
*   @brief
*       Return TRUE if it is prt tile without rotation
*   @note
*       This function just used by CI
****************************************************************************************************
*/
BOOL_32 Lib::IsPrtNoRotationTileMode(
    AddrTileMode tileMode)
{
    return ModeFlags[tileMode].isPrtNoRotation;
}

/**
****************************************************************************************************
*   Lib::IsPrtTileMode
*
*   @brief
*       Return TRUE if it is prt tile
*   @note
*       This function just used by CI
****************************************************************************************************
*/
BOOL_32 Lib::IsPrtTileMode(
    AddrTileMode tileMode)
{
    return ModeFlags[tileMode].isPrt;
}

/**
****************************************************************************************************
*   Lib::ComputeMipLevel
*
*   @brief
*       Compute mipmap level width/height/slices
*   @return
*      N/A
****************************************************************************************************
*/
VOID Lib::ComputeMipLevel(
    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure
    ) const
{
    // Check if HWL has handled
    BOOL_32 hwlHandled = FALSE;

    if (ElemLib::IsBlockCompressed(pIn->format))
    {
        if (pIn->mipLevel == 0)
        {
            // DXTn's level 0 must be multiple of 4
            // But there are exceptions:
            // 1. Internal surface creation in hostblt/vsblt/etc...
            // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4
            pIn->width = PowTwoAlign(pIn->width, 4);
            pIn->height = PowTwoAlign(pIn->height, 4);
        }
    }

    hwlHandled = HwlComputeMipLevel(pIn);
}

/**
****************************************************************************************************
*   Lib::DegradeTo1D
*
*   @brief
*       Check if surface can be degraded to 1D
*   @return
*       TRUE if degraded
****************************************************************************************************
*/
BOOL_32 Lib::DegradeTo1D(
    UINT_32 width,                  ///< surface width
    UINT_32 height,                 ///< surface height
    UINT_32 macroTilePitchAlign,    ///< macro tile pitch align
    UINT_32 macroTileHeightAlign    ///< macro tile height align
    )
{
    BOOL_32 degrade = ((width < macroTilePitchAlign) || (height < macroTileHeightAlign));

    // Check whether 2D tiling still has too much footprint
    if (degrade == FALSE)
    {
        // Only check width and height as slices are aligned to thickness
        UINT_64 unalignedSize = width * height;

        UINT_32 alignedPitch = PowTwoAlign(width, macroTilePitchAlign);
        UINT_32 alignedHeight = PowTwoAlign(height, macroTileHeightAlign);
        UINT_64 alignedSize = alignedPitch * alignedHeight;

        // alignedSize > 1.5 * unalignedSize
        if (2 * alignedSize > 3 * unalignedSize)
        {
            degrade = TRUE;
        }
    }

    return degrade;
}

/**
****************************************************************************************************
*   Lib::OptimizeTileMode
*
*   @brief
*       Check if base level's tile mode can be optimized (degraded)
*   @return
*       N/A
****************************************************************************************************
*/
VOID Lib::OptimizeTileMode(
    ADDR_COMPUTE_SURFACE_INFO_INPUT*  pInOut     ///< [in, out] structure for surface info
    ) const
{
    AddrTileMode tileMode = pInOut->tileMode;

    BOOL_32 doOpt = (pInOut->flags.opt4Space == TRUE) ||
                    (pInOut->flags.minimizeAlignment == TRUE) ||
                    (pInOut->maxBaseAlign != 0);

    BOOL_32 convertToPrt = FALSE;

    // Optimization can only be done on level 0 and samples <= 1
    if ((doOpt == TRUE)                     &&
        (pInOut->mipLevel == 0)             &&
        (IsPrtTileMode(tileMode) == FALSE)  &&
        (pInOut->flags.prt == FALSE))
    {
        UINT_32 width = pInOut->width;
        UINT_32 height = pInOut->height;
        UINT_32 thickness = Thickness(tileMode);
        BOOL_32 macroTiledOK = TRUE;
        UINT_32 macroWidthAlign = 0;
        UINT_32 macroHeightAlign = 0;
        UINT_32 macroSizeAlign = 0;

        if (IsMacroTiled(tileMode))
        {
            macroTiledOK = HwlGetAlignmentInfoMacroTiled(pInOut,
                                                         &macroWidthAlign,
                                                         &macroHeightAlign,
                                                         &macroSizeAlign);
        }

        if (macroTiledOK)
        {
            if ((pInOut->flags.display == FALSE) &&
                (pInOut->flags.opt4Space == TRUE) &&
                (pInOut->numSamples <= 1))
            {
                // Check if linear mode is optimal
                if ((pInOut->height == 1) &&
                    (IsLinear(tileMode) == FALSE) &&
                    (ElemLib::IsBlockCompressed(pInOut->format) == FALSE) &&
                    (pInOut->flags.depth == FALSE) &&
                    (pInOut->flags.stencil == FALSE) &&
                    (m_configFlags.disableLinearOpt == FALSE) &&
                    (pInOut->flags.disableLinearOpt == FALSE))
                {
                    tileMode = ADDR_TM_LINEAR_ALIGNED;
                }
                else if (IsMacroTiled(tileMode) && (pInOut->flags.tcCompatible == FALSE))
                {
                    if (DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign))
                    {
                        tileMode = (thickness == 1) ?
                                   ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
                    }
                    else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0))
                    {
                        // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
                        // thinner modes, we should re-evaluate whether the corresponding
                        // thinner modes should be degraded. If so, we choose 1D thick mode instead.
                        tileMode = DegradeLargeThickTile(pInOut->tileMode, pInOut->bpp);

                        if (tileMode != pInOut->tileMode)
                        {
                            // Get thickness again after large thick degrade
                            thickness = Thickness(tileMode);

                            ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pInOut;
                            input.tileMode = tileMode;

                            macroTiledOK = HwlGetAlignmentInfoMacroTiled(&input,
                                                                         &macroWidthAlign,
                                                                         &macroHeightAlign,
                                                                         &macroSizeAlign);

                            if (macroTiledOK &&
                                DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign))
                            {
                                tileMode = ADDR_TM_1D_TILED_THICK;
                            }
                        }
                    }
                }
            }

            if (macroTiledOK)
            {
                if ((pInOut->flags.minimizeAlignment == TRUE) &&
                    (pInOut->numSamples <= 1) &&
                    (IsMacroTiled(tileMode) == TRUE))
                {
                    UINT_32 macroSize = PowTwoAlign(width, macroWidthAlign) *
                                        PowTwoAlign(height, macroHeightAlign);
                    UINT_32 microSize = PowTwoAlign(width, MicroTileWidth) *
                                        PowTwoAlign(height, MicroTileHeight);

                    if (macroSize > microSize)
                    {
                        tileMode = (thickness == 1) ?
                                   ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
                    }
                }

                if ((pInOut->maxBaseAlign != 0) &&
                    (IsMacroTiled(tileMode) == TRUE))
                {
                    if (macroSizeAlign > pInOut->maxBaseAlign)
                    {
                        if (pInOut->numSamples > 1)
                        {
                            ADDR_ASSERT(pInOut->maxBaseAlign >= Block64K);

                            convertToPrt = TRUE;
                        }
                        else if (pInOut->maxBaseAlign < Block64K)
                        {
                            tileMode = (thickness == 1) ?
                                       ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
                        }
                        else
                        {
                            convertToPrt = TRUE;
                        }
                    }
                }
            }
        }
    }

    if (convertToPrt)
    {
        if ((pInOut->flags.matchStencilTileCfg == TRUE) && (pInOut->numSamples <= 1))
        {
            pInOut->tileMode = ADDR_TM_1D_TILED_THIN1;
        }
        else
        {
            HwlSetPrtTileMode(pInOut);
        }
    }
    else if (tileMode != pInOut->tileMode)
    {
        pInOut->tileMode = tileMode;
    }

    HwlOptimizeTileMode(pInOut);
}

/**
****************************************************************************************************
*   Lib::DegradeLargeThickTile
*
*   @brief
*       Check if the thickness needs to be reduced if a tile is too large
*   @return
*       The degraded tile mode (unchanged if not degraded)
****************************************************************************************************
*/
AddrTileMode Lib::DegradeLargeThickTile(
    AddrTileMode tileMode,
    UINT_32 bpp) const
{
    // Override tilemode
    // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size,
    // it is better to just use THIN mode in this case
    UINT_32 thickness = Thickness(tileMode);

    if (thickness > 1 && m_configFlags.allowLargeThickTile == 0)
    {
        UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3);

        if (tileSize > m_rowSize)
        {
            switch (tileMode)
            {
                case ADDR_TM_2D_TILED_XTHICK:
                    if ((tileSize >> 1) <= m_rowSize)
                    {
                        tileMode = ADDR_TM_2D_TILED_THICK;
                        break;
                    }
                    // else fall through
                case ADDR_TM_2D_TILED_THICK:
                    tileMode    = ADDR_TM_2D_TILED_THIN1;
                    break;

                case ADDR_TM_3D_TILED_XTHICK:
                    if ((tileSize >> 1) <= m_rowSize)
                    {
                        tileMode = ADDR_TM_3D_TILED_THICK;
                        break;
                    }
                    // else fall through
                case ADDR_TM_3D_TILED_THICK:
                    tileMode    = ADDR_TM_3D_TILED_THIN1;
                    break;

                case ADDR_TM_PRT_TILED_THICK:
                    tileMode    = ADDR_TM_PRT_TILED_THIN1;
                    break;

                case ADDR_TM_PRT_2D_TILED_THICK:
                    tileMode    = ADDR_TM_PRT_2D_TILED_THIN1;
                    break;

                case ADDR_TM_PRT_3D_TILED_THICK:
                    tileMode    = ADDR_TM_PRT_3D_TILED_THIN1;
                    break;

                default:
                    break;
            }
        }
    }

    return tileMode;
}

/**
****************************************************************************************************
*   Lib::PostComputeMipLevel
*   @brief
*       Compute MipLevel info (including level 0) after surface adjustment
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::PostComputeMipLevel(
    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pIn,   ///< [in,out] Input structure
    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut   ///< [out] Output structure
    ) const
{
    // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is
    // required by CFX  for Hw Compatibility between NI and SI. Otherwise it is only needed for
    // mipLevel > 0. Any h/w has different requirement should implement its own virtual function

    if (pIn->flags.pow2Pad)
    {
        pIn->width      = NextPow2(pIn->width);
        pIn->height     = NextPow2(pIn->height);
        pIn->numSlices  = NextPow2(pIn->numSlices);
    }
    else if (pIn->mipLevel > 0)
    {
        pIn->width      = NextPow2(pIn->width);
        pIn->height     = NextPow2(pIn->height);

        if (!pIn->flags.cube)
        {
            pIn->numSlices = NextPow2(pIn->numSlices);
        }

        // for cubemap, we keep its value at first
    }

    return ADDR_OK;
}

/**
****************************************************************************************************
*   Lib::HwlSetupTileCfg
*
*   @brief
*       Map tile index to tile setting.
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::HwlSetupTileCfg(
    UINT_32         bpp,              ///< Bits per pixel
    INT_32          index,            ///< [in] Tile index
    INT_32          macroModeIndex,   ///< [in] Index in macro tile mode table(CI)
    ADDR_TILEINFO*  pInfo,            ///< [out] Tile Info
    AddrTileMode*   pMode,            ///< [out] Tile mode
    AddrTileType*   pType             ///< [out] Tile type
    ) const
{
    return ADDR_NOTSUPPORTED;
}

/**
****************************************************************************************************
*   Lib::HwlGetPipes
*
*   @brief
*       Get number pipes
*   @return
*       num pipes
****************************************************************************************************
*/
UINT_32 Lib::HwlGetPipes(
    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
    ) const
{
    //pTileInfo can be NULL when asic is 6xx and 8xx.
    return m_pipes;
}

/**
****************************************************************************************************
*   Lib::ComputeQbStereoInfo
*
*   @brief
*       Get quad buffer stereo information
*   @return
*       N/A
****************************************************************************************************
*/
VOID Lib::ComputeQbStereoInfo(
    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [in,out] updated pOut+pStereoInfo
    ) const
{
    ADDR_ASSERT(pOut->bpp >= 8);
    ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);

    // Save original height
    pOut->pStereoInfo->eyeHeight = pOut->height;

    // Right offset
    pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);

    pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut);
    // Double height
    pOut->height <<= 1;
    pOut->pixelHeight <<= 1;

    // Double size
    pOut->surfSize <<= 1;

    // Right start address meets the base align since it is guaranteed by AddrLib1

    // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo.
}


/**
****************************************************************************************************
*   Lib::ComputePrtInfo
*
*   @brief
*       Compute prt surface related info
*
*   @return
*       ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputePrtInfo(
    const ADDR_PRT_INFO_INPUT*  pIn,
    ADDR_PRT_INFO_OUTPUT*       pOut) const
{
    ADDR_ASSERT(pOut != NULL);

    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    UINT_32     expandX = 1;
    UINT_32     expandY = 1;
    ElemMode    elemMode;

    UINT_32     bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
                                                    &elemMode,
                                                    &expandX,
                                                    &expandY);

    if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    UINT_32     numFrags = pIn->numFrags;
    ADDR_ASSERT(numFrags <= 8);

    UINT_32     tileWidth = 0;
    UINT_32     tileHeight = 0;
    if (returnCode == ADDR_OK)
    {
        // 3D texture without depth or 2d texture
        if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1)
        {
            if (bpp == 8)
            {
                tileWidth = 256;
                tileHeight = 256;
            }
            else if (bpp == 16)
            {
                tileWidth = 256;
                tileHeight = 128;
            }
            else if (bpp == 32)
            {
                tileWidth = 128;
                tileHeight = 128;
            }
            else if (bpp == 64)
            {
                // assume it is BC1/4
                tileWidth = 512;
                tileHeight = 256;

                if (elemMode == ADDR_UNCOMPRESSED)
                {
                    tileWidth = 128;
                    tileHeight = 64;
                }
            }
            else if (bpp == 128)
            {
                // assume it is BC2/3/5/6H/7
                tileWidth = 256;
                tileHeight = 256;

                if (elemMode == ADDR_UNCOMPRESSED)
                {
                    tileWidth = 64;
                    tileHeight = 64;
                }
            }

            if (numFrags == 2)
            {
                tileWidth = tileWidth / 2;
            }
            else if (numFrags == 4)
            {
                tileWidth = tileWidth / 2;
                tileHeight = tileHeight / 2;
            }
            else if (numFrags == 8)
            {
                tileWidth = tileWidth / 4;
                tileHeight = tileHeight / 2;
            }
        }
        else    // 1d
        {
            tileHeight = 1;
            if (bpp == 8)
            {
                tileWidth = 65536;
            }
            else if (bpp == 16)
            {
                tileWidth = 32768;
            }
            else if (bpp == 32)
            {
                tileWidth = 16384;
            }
            else if (bpp == 64)
            {
                tileWidth = 8192;
            }
            else if (bpp == 128)
            {
                tileWidth = 4096;
            }
        }
    }

    pOut->prtTileWidth = tileWidth;
    pOut->prtTileHeight = tileHeight;

    return returnCode;
}

} // V1
} // Addr
} // namespace rocr

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib1.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
****************************************************************************************************
* @file  addrlib1.h
* @brief Contains the Addr::V1::Lib class definition.
****************************************************************************************************
*/

#ifndef __ADDR_LIB1_H__
#define __ADDR_LIB1_H__

#include "addrlib.h"

namespace rocr {
namespace Addr
{
namespace V1
{

/**
****************************************************************************************************
* @brief Neutral enums that define bank swap size
****************************************************************************************************
*/
enum SampleSplitSize
{
    ADDR_SAMPLESPLIT_1KB = 1024,
    ADDR_SAMPLESPLIT_2KB = 2048,
    ADDR_SAMPLESPLIT_4KB = 4096,
    ADDR_SAMPLESPLIT_8KB = 8192,
};

/**
****************************************************************************************************
* @brief Flags for AddrTileMode
****************************************************************************************************
*/
struct TileModeFlags
{
    UINT_32 thickness       : 4;
    UINT_32 isLinear        : 1;
    UINT_32 isMicro         : 1;
    UINT_32 isMacro         : 1;
    UINT_32 isMacro3d       : 1;
    UINT_32 isPrt           : 1;
    UINT_32 isPrtNoRotation : 1;
    UINT_32 isBankSwapped   : 1;
};

static const UINT_32 Block64K = 0x10000;
static const UINT_32 PrtTileSize = Block64K;

/**
****************************************************************************************************
* @brief This class contains asic independent address lib functionalities
****************************************************************************************************
*/
class Lib : public Addr::Lib
{
public:
    virtual ~Lib();

    static Lib* GetLib(
        ADDR_HANDLE hLib);

    /// Returns tileIndex support
    BOOL_32 UseTileIndex(INT_32 index) const
    {
        return m_configFlags.useTileIndex && (index != TileIndexInvalid);
    }

    /// Returns combined swizzle support
    BOOL_32 UseCombinedSwizzle() const
    {
        return m_configFlags.useCombinedSwizzle;
    }

    //
    // Interface stubs
    //
    ADDR_E_RETURNCODE ComputeSurfaceInfo(
        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT*  pIn,
        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeSliceTileSwizzle(
        const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ExtractBankPipeSwizzle(
        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE CombineBankPipeSwizzle(
        const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
        ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeBaseSwizzle(
        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeFmaskInfo(
        const ADDR_COMPUTE_FMASK_INFO_INPUT*  pIn,
        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);

    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*  pIn,
        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,
        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ConvertTileInfoToHW(
        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ConvertTileIndex(
        const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE GetMacroModeIndex(
        const ADDR_GET_MACROMODEINDEX_INPUT* pIn,
        ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ConvertTileIndex1(
        const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE GetTileIndex(
        const ADDR_GET_TILEINDEX_INPUT* pIn,
        ADDR_GET_TILEINDEX_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeHtileInfo(
        const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
        ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeCmaskInfo(
        const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
        ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeDccInfo(
        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,
        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,
        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
        const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*  pIn,
        ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
        const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*  pIn,
        ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;

    ADDR_E_RETURNCODE ComputePrtInfo(
        const ADDR_PRT_INFO_INPUT*  pIn,
        ADDR_PRT_INFO_OUTPUT*       pOut) const;
protected:
    Lib();  // Constructor is protected
    Lib(const Client* pClient);

    /// Pure Virtual function for Hwl computing surface info
    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl computing surface address from coord
    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl computing surface coord from address
    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl computing surface tile swizzle
    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b
    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl combining bank/pipe swizzle
    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
        UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0;

    /// Pure Virtual function for Hwl computing base swizzle
    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl computing HTILE base align
    virtual UINT_32 HwlComputeHtileBaseAlign(
        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0;

    /// Pure Virtual function for Hwl computing HTILE bpp
    virtual UINT_32 HwlComputeHtileBpp(
        BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0;

    /// Pure Virtual function for Hwl computing HTILE bytes
    virtual UINT_64 HwlComputeHtileBytes(
        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0;

    /// Pure Virtual function for Hwl computing FMASK info
    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0;

    /// Pure Virtual function for Hwl FMASK address from coord
    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl FMASK coord from address
    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl convert tile info from real value to HW value
    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0;

    /// Pure Virtual function for Hwl compute mipmap info
    virtual BOOL_32 HwlComputeMipLevel(
        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;

    /// Pure Virtual function for Hwl compute max cmask blockMax value
    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0;

    /// Pure Virtual function for Hwl compute fmask bits
    virtual UINT_32 HwlComputeFmaskBits(
        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
        UINT_32* pNumSamples) const = 0;

    /// Virtual function to get index (not pure then no need to implement this in all hwls
    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
        const ADDR_GET_TILEINDEX_INPUT* pIn,
        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
    {
        return ADDR_NOTSUPPORTED;
    }

    /// Virtual function for Hwl to compute Dcc info
    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
    {
        return ADDR_NOTSUPPORTED;
    }

    /// Virtual function to get cmask address for tc compatible cmask
    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
    {
        return ADDR_NOTSUPPORTED;
    }

    /// Virtual function to get htile address for tc compatible htile
    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const
    {
        return ADDR_NOTSUPPORTED;
    }

    // Compute attributes

    // HTILE
    UINT_32    ComputeHtileInfo(
        ADDR_HTILE_FLAGS flags,
        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices,
        BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
        ADDR_TILEINFO*  pTileInfo,
        UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes,
        UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL,
        UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const;

    // CMASK
    ADDR_E_RETURNCODE ComputeCmaskInfo(
        ADDR_CMASK_FLAGS flags,
        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear,
        ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes,
        UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL,
        UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const;

    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;

    // CMASK & HTILE addressing
    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice,
        UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8,
        BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo,
        UINT_32* bitPosition) const;

    virtual VOID HwlComputeXmaskCoordFromAddr(
        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;

    // Surface mipmap
    VOID    ComputeMipLevel(
        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    /// Pure Virtual function for Hwl to get macro tiled alignment info
    virtual BOOL_32 HwlGetAlignmentInfoMacroTiled(
        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
        UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const = 0;


    virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
    {
        // not supported in hwl layer
    }

    virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
    {
        // not supported in hwl layer
    }

    virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
    {
        // not supported in hwl layer
    }

    AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const;

    VOID PadDimensions(
        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
        UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
        UINT_32* pSlices, UINT_32 sliceAlign) const;

    virtual VOID HwlPadDimensions(
        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
        UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32 height, UINT_32 heightAlign) const
    {
    }

    //
    // Addressing shared for linear/1D tiling
    //
    UINT_64 ComputeSurfaceAddrFromCoordLinear(
        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
        UINT_32* pBitPosition) const;

    VOID    ComputeSurfaceCoordFromAddrLinear(
        UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp,
        UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;

    VOID    ComputeSurfaceCoordFromAddrMicroTiled(
        UINT_64 addr, UINT_32 bitPosition,
        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;

    ADDR_E_RETURNCODE ComputeMicroTileEquation(
        UINT_32 bpp, AddrTileMode tileMode,
        AddrTileType microTileType, ADDR_EQUATION* pEquation) const;

    UINT_32 ComputePixelIndexWithinMicroTile(
        UINT_32 x, UINT_32 y, UINT_32 z,
        UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const;

    /// Pure Virtual function for Hwl computing coord from offset inside micro tile
    virtual VOID HwlComputePixelCoordFromOffset(
        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0;

    //
    // Addressing shared by all
    //
    virtual UINT_32 HwlGetPipes(
        const ADDR_TILEINFO* pTileInfo) const;

    UINT_32 ComputePipeFromAddr(
        UINT_64 addr, UINT_32 numPipes) const;

    virtual ADDR_E_RETURNCODE ComputePipeEquation(
        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const
    {
        return ADDR_NOTSUPPORTED;
    }

    /// Pure Virtual function for Hwl computing pipe from coord
    virtual UINT_32 ComputePipeFromCoord(
        UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode,
        UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0;

    /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile
    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
        UINT_32 pipe, UINT_32 x) const = 0;

    //
    // Misc helper
    //
    static const TileModeFlags ModeFlags[ADDR_TM_COUNT];

    static UINT_32 Thickness(
        AddrTileMode tileMode);

    // Checking tile mode
    static BOOL_32 IsMacroTiled(AddrTileMode tileMode);
    static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode);
    static BOOL_32 IsLinear(AddrTileMode tileMode);
    static BOOL_32 IsMicroTiled(AddrTileMode tileMode);
    static BOOL_32 IsPrtTileMode(AddrTileMode tileMode);
    static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode);

    /// Return TRUE if tile info is needed
    BOOL_32 UseTileInfo() const
    {
        return !m_configFlags.ignoreTileInfo;
    }

    /// Adjusts pitch alignment for flipping surface
    VOID    AdjustPitchAlignment(
        ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const;

    /// Overwrite tile config according to tile index
    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
        UINT_32 bpp, INT_32 index, INT_32 macroModeIndex,
        ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const;

    /// Overwrite macro tile config according to tile index
    virtual INT_32 HwlComputeMacroModeIndex(
        INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
        ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL
        ) const
    {
        return TileIndexNoMacroIndex;
    }

    /// Pre-handler of 3x pitch (96 bit) adjustment
    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
    /// Post-handler of 3x pitch adjustment
    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
    /// Check miplevel after surface adjustment
    ADDR_E_RETURNCODE PostComputeMipLevel(
        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;

    /// Quad buffer stereo support, has its implementation in ind. layer
    VOID ComputeQbStereoInfo(
        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;

    /// Pure virutual function to compute stereo bank swizzle for right eye
    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;

    VOID OptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;

    /// Overwrite tile setting to PRT
    virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
    {
    }

    static BOOL_32 DegradeTo1D(
        UINT_32 width, UINT_32 height,
        UINT_32 macroTilePitchAlign, UINT_32 macroTileHeightAlign);

private:
    // Disallow the copy constructor
    Lib(const Lib& a);

    // Disallow the assignment operator
    Lib& operator=(const Lib& a);

    UINT_32 ComputeCmaskBaseAlign(
        ADDR_CMASK_FLAGS flags, ADDR_TILEINFO*  pTileInfo) const;

    UINT_64 ComputeCmaskBytes(
        UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const;

    //
    // CMASK/HTILE shared methods
    //
    VOID    ComputeTileDataWidthAndHeight(
        UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
        UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;

    UINT_32 ComputeXmaskCoordYFromPipe(
        UINT_32 pipe, UINT_32 x) const;
};

} // V1
} // Addr
} // namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib2.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
************************************************************************************************************************
* @file  addrlib2.cpp
* @brief Contains the implementation for the AddrLib2 base class.
************************************************************************************************************************
*/

#include "addrinterface.h"
#include "addrlib2.h"
#include "addrcommon.h"

namespace rocr {
namespace Addr
{
namespace V2
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Static Const Member
////////////////////////////////////////////////////////////////////////////////////////////////////

const Dim2d Lib::Block256_2d[] = {{16, 16}, {16, 8}, {8, 8}, {8, 4}, {4, 4}};

const Dim3d Lib::Block1K_3d[]  = {{16, 8, 8}, {8, 8, 8}, {8, 8, 4}, {8, 4, 4}, {4, 4, 4}};

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Constructor/Destructor
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
************************************************************************************************************************
*   Lib::Lib
*
*   @brief
*       Constructor for the Addr::V2::Lib class
*
************************************************************************************************************************
*/
Lib::Lib()
    :
    Addr::Lib(),
    m_se(0),
    m_rbPerSe(0),
    m_maxCompFrag(0),
    m_banksLog2(0),
    m_pipesLog2(0),
    m_seLog2(0),
    m_rbPerSeLog2(0),
    m_maxCompFragLog2(0),
    m_pipeInterleaveLog2(0),
    m_blockVarSizeLog2(0),
    m_numEquations(0)
{
}

/**
************************************************************************************************************************
*   Lib::Lib
*
*   @brief
*       Constructor for the AddrLib2 class with hClient as parameter
*
************************************************************************************************************************
*/
Lib::Lib(const Client* pClient)
    :
    Addr::Lib(pClient),
    m_se(0),
    m_rbPerSe(0),
    m_maxCompFrag(0),
    m_banksLog2(0),
    m_pipesLog2(0),
    m_seLog2(0),
    m_rbPerSeLog2(0),
    m_maxCompFragLog2(0),
    m_pipeInterleaveLog2(0),
    m_blockVarSizeLog2(0),
    m_numEquations(0)
{
}

/**
************************************************************************************************************************
*   Lib::~Lib
*
*   @brief
*       Destructor for the AddrLib2 class
*
************************************************************************************************************************
*/
Lib::~Lib()
{
}

/**
************************************************************************************************************************
*   Lib::GetLib
*
*   @brief
*       Get Addr::V2::Lib pointer
*
*   @return
*      An Addr::V2::Lib class pointer
************************************************************************************************************************
*/
Lib* Lib::GetLib(
    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
{
    Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);
    if ((pAddrLib != NULL) &&
        (pAddrLib->GetChipFamily() <= ADDR_CHIP_FAMILY_VI))
    {
        // only valid and GFX9+ ASIC can use AddrLib2 function.
        ADDR_ASSERT_ALWAYS();
        hLib = NULL;
    }
    return static_cast<Lib*>(hLib);
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Surface Methods
////////////////////////////////////////////////////////////////////////////////////////////////////


/**
************************************************************************************************************************
*   Lib::ComputeSurfaceInfo
*
*   @brief
*       Interface function stub of AddrComputeSurfaceInfo.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    // Adjust coming parameters.
    ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
    localIn.width        = Max(pIn->width, 1u);
    localIn.height       = Max(pIn->height, 1u);
    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
    localIn.numSlices    = Max(pIn->numSlices, 1u);
    localIn.numSamples   = Max(pIn->numSamples, 1u);
    localIn.numFrags     = (localIn.numFrags == 0) ? localIn.numSamples : pIn->numFrags;

    UINT_32  expandX  = 1;
    UINT_32  expandY  = 1;
    ElemMode elemMode = ADDR_UNCOMPRESSED;

    if (returnCode == ADDR_OK)
    {
        // Set format to INVALID will skip this conversion
        if (localIn.format != ADDR_FMT_INVALID)
        {
            // Get compression/expansion factors and element mode which indicates compression/expansion
            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
                                                        &elemMode,
                                                        &expandX,
                                                        &expandY);

            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
            // restrictions are different.
            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
            // but we use this flag to skip RestoreSurfaceInfo below

            if ((elemMode == ADDR_EXPANDED) && (expandX > 1))
            {
                ADDR_ASSERT(IsLinear(localIn.swizzleMode));
            }

            UINT_32 basePitch = 0;
            GetElemLib()->AdjustSurfaceInfo(elemMode,
                                            expandX,
                                            expandY,
                                            &localIn.bpp,
                                            &basePitch,
                                            &localIn.width,
                                            &localIn.height);

            // Overwrite these parameters if we have a valid format
        }

        if (localIn.bpp != 0)
        {
            localIn.width  = Max(localIn.width, 1u);
            localIn.height = Max(localIn.height, 1u);
        }
        else // Rule out some invalid parameters
        {
            ADDR_ASSERT_ALWAYS();

            returnCode = ADDR_INVALIDPARAMS;
        }
    }

    if (returnCode == ADDR_OK)
    {
        returnCode = ComputeSurfaceInfoSanityCheck(&localIn);
    }

    if (returnCode == ADDR_OK)
    {
        VerifyMipLevelInfo(pIn);

        if (IsLinear(pIn->swizzleMode))
        {
            // linear mode
            returnCode = ComputeSurfaceInfoLinear(&localIn, pOut);
        }
        else
        {
            // tiled mode
            returnCode = ComputeSurfaceInfoTiled(&localIn, pOut);
        }

        if (returnCode == ADDR_OK)
        {
            pOut->bpp = localIn.bpp;
            pOut->pixelPitch = pOut->pitch;
            pOut->pixelHeight = pOut->height;
            pOut->pixelMipChainPitch = pOut->mipChainPitch;
            pOut->pixelMipChainHeight = pOut->mipChainHeight;
            pOut->pixelBits = localIn.bpp;

            if (localIn.format != ADDR_FMT_INVALID)
            {
                UINT_32 pixelBits = pOut->pixelBits;

                GetElemLib()->RestoreSurfaceInfo(elemMode,
                                                 expandX,
                                                 expandY,
                                                 &pOut->pixelBits,
                                                 &pOut->pixelPitch,
                                                 &pOut->pixelHeight);

                GetElemLib()->RestoreSurfaceInfo(elemMode,
                                                 expandX,
                                                 expandY,
                                                 &pixelBits,
                                                 &pOut->pixelMipChainPitch,
                                                 &pOut->pixelMipChainHeight);

                if ((localIn.numMipLevels > 1) && (pOut->pMipInfo != NULL))
                {
                    for (UINT_32 i = 0; i < localIn.numMipLevels; i++)
                    {
                        pOut->pMipInfo[i].pixelPitch  = pOut->pMipInfo[i].pitch;
                        pOut->pMipInfo[i].pixelHeight = pOut->pMipInfo[i].height;

                        GetElemLib()->RestoreSurfaceInfo(elemMode,
                                                         expandX,
                                                         expandY,
                                                         &pixelBits,
                                                         &pOut->pMipInfo[i].pixelPitch,
                                                         &pOut->pMipInfo[i].pixelHeight);
                    }
                }
            }

            if (localIn.flags.needEquation && (Log2(localIn.numFrags) == 0))
            {
                pOut->equationIndex = GetEquationIndex(&localIn, pOut);
                if ((localIn.flags.allowExtEquation == 0) &&
                    (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX) &&
                    (m_equationTable[pOut->equationIndex].numBitComponents > ADDR_MAX_LEGACY_EQUATION_COMP))
                {
                    pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX;
                }
            }

            if (localIn.flags.qbStereo)
            {
                if (pOut->pStereoInfo != NULL)
                {
                    ComputeQbStereoInfo(pOut);
#if DEBUG
                    ValidateStereoInfo(pIn, pOut);
#endif
                }
            }
        }
    }

    ADDR_ASSERT(pOut->surfSize != 0);

    ValidBaseAlignments(pOut->baseAlign);

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceInfo
*
*   @brief
*       Interface function stub of AddrComputeSurfaceInfo.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT localIn = *pIn;
    localIn.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
    localIn.unalignedHeight = Max(pIn->unalignedHeight, 1u);
    localIn.numMipLevels    = Max(pIn->numMipLevels, 1u);
    localIn.numSlices       = Max(pIn->numSlices, 1u);
    localIn.numSamples      = Max(pIn->numSamples, 1u);
    localIn.numFrags        = Max(pIn->numFrags, 1u);

    if ((localIn.bpp < 8)        ||
        (localIn.bpp > 128)      ||
        ((localIn.bpp % 8) != 0) ||
        (localIn.sample >= localIn.numSamples)  ||
        (localIn.slice >= localIn.numSlices)    ||
        (localIn.mipId >= localIn.numMipLevels) ||
        (IsTex3d(localIn.resourceType) &&
         (Valid3DMipSliceIdConstraint(localIn.numSlices, localIn.mipId, localIn.slice) == FALSE)))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    if (returnCode == ADDR_OK)
    {
        if (IsLinear(localIn.swizzleMode))
        {
            returnCode = ComputeSurfaceAddrFromCoordLinear(&localIn, pOut);
        }
        else
        {
            returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut);
        }

        if (returnCode == ADDR_OK)
        {
            pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceCoordFromAddr
*
*   @brief
*       Interface function stub of ComputeSurfaceCoordFromAddr.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr(
    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if ((pIn->bpp < 8)        ||
        (pIn->bpp > 128)      ||
        ((pIn->bpp % 8) != 0) ||
        (pIn->bitPosition >= 8))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    if (returnCode == ADDR_OK)
    {
        if (IsLinear(pIn->swizzleMode))
        {
            returnCode = ComputeSurfaceCoordFromAddrLinear(pIn, pOut);
        }
        else
        {
            returnCode = ComputeSurfaceCoordFromAddrTiled(pIn, pOut);
        }
    }

    return returnCode;
}


////////////////////////////////////////////////////////////////////////////////////////////////////
//                               CMASK/HTILE
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
************************************************************************************************************************
*   Lib::ComputeHtileInfo
*
*   @brief
*       Interface function stub of AddrComputeHtilenfo
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeHtileInfo(pIn, pOut);

        ValidMetaBaseAlignments(pOut->baseAlign);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeHtileAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeHtileAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeHtileAddrFromCoord(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeHtileCoordFromAddr
*
*   @brief
*       Interface function stub of AddrComputeHtileCoordFromAddr
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr(
    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeHtileCoordFromAddr(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeCmaskInfo
*
*   @brief
*       Interface function stub of AddrComputeCmaskInfo
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else if (pIn->cMaskFlags.linear)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeCmaskInfo(pIn, pOut);

        ValidMetaBaseAlignments(pOut->baseAlign);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeCmaskAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeCmaskAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord(
    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeCmaskCoordFromAddr
*
*   @brief
*       Interface function stub of AddrComputeCmaskCoordFromAddr
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr(
    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;

    ADDR_NOT_IMPLEMENTED();

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeFmaskInfo
*
*   @brief
*       Interface function stub of ComputeFmaskInfo.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
    )
{
    ADDR_E_RETURNCODE returnCode;

    BOOL_32 valid = (IsZOrderSwizzle(pIn->swizzleMode) == TRUE) &&
                    ((pIn->numSamples > 0) || (pIn->numFrags > 0));

    if (GetFillSizeFieldsFlags())
    {
        if ((pIn->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT)))
        {
            valid = FALSE;
        }
    }

    if (valid == FALSE)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn = {0};
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};

        localIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
        localOut.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);

        localIn.swizzleMode  = pIn->swizzleMode;
        localIn.numSlices    = Max(pIn->numSlices, 1u);
        localIn.width        = Max(pIn->unalignedWidth, 1u);
        localIn.height       = Max(pIn->unalignedHeight, 1u);
        localIn.bpp          = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
        localIn.flags.fmask  = 1;
        localIn.numFrags     = 1;
        localIn.numSamples   = 1;
        localIn.resourceType = ADDR_RSRC_TEX_2D;

        if (localIn.bpp == 8)
        {
            localIn.format = ADDR_FMT_8;
        }
        else if (localIn.bpp == 16)
        {
            localIn.format = ADDR_FMT_16;
        }
        else if (localIn.bpp == 32)
        {
            localIn.format = ADDR_FMT_32;
        }
        else
        {
            localIn.format = ADDR_FMT_32_32;
        }

        returnCode = ComputeSurfaceInfo(&localIn, &localOut);

        if (returnCode == ADDR_OK)
        {
            pOut->pitch      = localOut.pitch;
            pOut->height     = localOut.height;
            pOut->baseAlign  = localOut.baseAlign;
            pOut->numSlices  = localOut.numSlices;
            pOut->fmaskBytes = static_cast<UINT_32>(localOut.surfSize);
            pOut->sliceSize  = static_cast<UINT_32>(localOut.sliceSize);
            pOut->bpp        = localIn.bpp;
            pOut->numSamples = 1;
        }
    }

    ValidBaseAlignments(pOut->baseAlign);

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeFmaskAddrFromCoord
*
*   @brief
*       Interface function stub of ComputeFmaskAddrFromCoord.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord(
    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;

    ADDR_NOT_IMPLEMENTED();

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeFmaskCoordFromAddr
*
*   @brief
*       Interface function stub of ComputeFmaskAddrFromCoord.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr(
    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*       pOut     ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;

    ADDR_NOT_IMPLEMENTED();

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeDccInfo
*
*   @brief
*       Interface function to compute DCC key info
*
*   @return
*       return code of HwlComputeDccInfo
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeDccInfo(
    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_DCCINFO_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeDccInfo(pIn, pOut);

        ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeDccAddrFromCoord
*
*   @brief
*       Interface function stub of ComputeDccAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeDccAddrFromCoord(
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlSupportComputeDccAddrFromCoord(pIn);

        if (returnCode == ADDR_OK)
        {
            HwlComputeDccAddrFromCoord(pIn, pOut);
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputePipeBankXor
*
*   @brief
*       Interface function stub of Addr2ComputePipeBankXor.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputePipeBankXor(
    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputePipeBankXor(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSlicePipeBankXor
*
*   @brief
*       Interface function stub of Addr2ComputeSlicePipeBankXor.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor(
    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else if ((IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) ||
             (IsNonPrtXor(pIn->swizzleMode) == FALSE) ||
             (pIn->numSamples > 1))
    {
        returnCode = ADDR_NOTSUPPORTED;
    }
    else if ((pIn->bpe != 0) &&
             (pIn->bpe != 8) &&
             (pIn->bpe != 16) &&
             (pIn->bpe != 32) &&
             (pIn->bpe != 64) &&
             (pIn->bpe != 128))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeSlicePipeBankXor(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Interface function stub of Addr2ComputeSubResourceOffsetForSwizzlePattern.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern(
    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeNonBlockCompressedView
*
*   @brief
*       Interface function stub of Addr2ComputeNonBlockCompressedView.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeNonBlockCompressedView(
    const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeNonBlockCompressedView(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ExtractPipeBankXor
*
*   @brief
*       Internal function to extract bank and pipe xor bits from combined xor bits.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ExtractPipeBankXor(
    UINT_32  pipeBankXor,
    UINT_32  bankBits,
    UINT_32  pipeBits,
    UINT_32* pBankX,
    UINT_32* pPipeX)
{
    ADDR_E_RETURNCODE returnCode;

    if (pipeBankXor < (1u << (pipeBits + bankBits)))
    {
        *pPipeX = pipeBankXor % (1 << pipeBits);
        *pBankX = pipeBankXor >> pipeBits;
        returnCode = ADDR_OK;
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        returnCode = ADDR_INVALIDPARAMS;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceInfoSanityCheck
*
*   @brief
*       Internal function to do basic sanity check before compute surface info
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*  pIn   ///< [in] input structure
    ) const
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        (pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeSurfaceInfoSanityCheck(pIn);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ApplyCustomizedPitchHeight
*
*   @brief
*       Helper function to override hw required row pitch/slice pitch by customrized one
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
    UINT_32  elementBytes,                          ///< [in] element bytes per element
    UINT_32  pitchAlignInElement,                   ///< [in] pitch alignment in element
    UINT_32* pPitch,                                ///< [in,out] pitch
    UINT_32* pHeight                                ///< [in,out] height
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->numMipLevels <= 1)
    {
        if (pIn->pitchInElement > 0)
        {
            if ((pIn->pitchInElement % pitchAlignInElement) != 0)
            {
                returnCode = ADDR_INVALIDPARAMS;
            }
            else if (pIn->pitchInElement < (*pPitch))
            {
                returnCode = ADDR_INVALIDPARAMS;
            }
            else
            {
                *pPitch = pIn->pitchInElement;
            }
        }

        if (returnCode == ADDR_OK)
        {
            if (pIn->sliceAlign > 0)
            {
                UINT_32 customizedHeight = pIn->sliceAlign / elementBytes / (*pPitch);

                if (customizedHeight * elementBytes * (*pPitch) != pIn->sliceAlign)
                {
                    returnCode = ADDR_INVALIDPARAMS;
                }
                else if ((pIn->numSlices > 1) && ((*pHeight) != customizedHeight))
                {
                    returnCode = ADDR_INVALIDPARAMS;
                }
                else
                {
                    *pHeight = customizedHeight;
                }
            }
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceInfoLinear
*
*   @brief
*       Internal function to calculate alignment for linear swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoLinear(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    return HwlComputeSurfaceInfoLinear(pIn, pOut);
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceInfoTiled
*
*   @brief
*       Internal function to calculate alignment for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    return HwlComputeSurfaceInfoTiled(pIn, pOut);
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceAddrFromCoordLinear
*
*   @brief
*       Internal function to calculate address from coord for linear swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;
    BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1) && (pIn->pipeBankXor == 0);

    if (valid)
    {
        if (IsTex1d(pIn->resourceType))
        {
            valid = (pIn->y == 0);
        }
    }

    if (valid)
    {
        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
        ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
        ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);

        localIn.bpp          = pIn->bpp;
        localIn.flags        = pIn->flags;
        localIn.width        = Max(pIn->unalignedWidth, 1u);
        localIn.height       = Max(pIn->unalignedHeight, 1u);
        localIn.numSlices    = Max(pIn->numSlices, 1u);
        localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
        localIn.resourceType = pIn->resourceType;

        if (localIn.numMipLevels <= 1)
        {
            localIn.pitchInElement = pIn->pitchInElement;
        }

        localOut.pMipInfo = mipInfo;

        returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut);

        if (returnCode == ADDR_OK)
        {
            pOut->addr        = (localOut.sliceSize * pIn->slice) +
                                mipInfo[pIn->mipId].offset +
                                (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
            pOut->bitPosition = 0;
        }
        else
        {
            valid = FALSE;
        }
    }

    if (valid == FALSE)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceAddrFromCoordTiled
*
*   @brief
*       Internal function to calculate address from coord for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    return HwlComputeSurfaceAddrFromCoordTiled(pIn, pOut);
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceCoordFromAddrLinear
*
*   @brief
*       Internal function to calculate coord from address for linear swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrLinear(
     const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1);

    if (valid)
    {
        if (IsTex1d(pIn->resourceType))
        {
            valid = (pIn->unalignedHeight == 1);
        }
    }

    if (valid)
    {
        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
        localIn.bpp          = pIn->bpp;
        localIn.flags        = pIn->flags;
        localIn.width        = Max(pIn->unalignedWidth, 1u);
        localIn.height       = Max(pIn->unalignedHeight, 1u);
        localIn.numSlices    = Max(pIn->numSlices, 1u);
        localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
        localIn.resourceType = pIn->resourceType;
        if (localIn.numMipLevels <= 1)
        {
            localIn.pitchInElement = pIn->pitchInElement;
        }
        returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut);

        if (returnCode == ADDR_OK)
        {
            pOut->slice = static_cast<UINT_32>(pIn->addr / localOut.sliceSize);
            pOut->sample = 0;

            UINT_32 offsetInSlice = static_cast<UINT_32>(pIn->addr % localOut.sliceSize);
            UINT_32 elementBytes = pIn->bpp >> 3;
            UINT_32 mipOffsetInSlice = 0;
            UINT_32 mipSize = 0;
            UINT_32 mipId = 0;
            for (; mipId < pIn->numMipLevels ; mipId++)
            {
                if (IsTex1d(pIn->resourceType))
                {
                    mipSize = localOut.pitch * elementBytes;
                }
                else
                {
                    UINT_32 currentMipHeight = (PowTwoAlign(localIn.height, (1 << mipId))) >> mipId;
                    mipSize = currentMipHeight * localOut.pitch * elementBytes;
                }

                if (mipSize == 0)
                {
                    valid = FALSE;
                    break;
                }
                else if ((mipSize + mipOffsetInSlice) > offsetInSlice)
                {
                    break;
                }
                else
                {
                    mipOffsetInSlice += mipSize;
                    if ((mipId == (pIn->numMipLevels - 1)) ||
                        (mipOffsetInSlice >= localOut.sliceSize))
                    {
                        valid = FALSE;
                    }
                }
            }

            if (valid)
            {
                pOut->mipId = mipId;

                UINT_32 elemOffsetInMip = (offsetInSlice - mipOffsetInSlice) / elementBytes;
                if (IsTex1d(pIn->resourceType))
                {
                    if (elemOffsetInMip < localOut.pitch)
                    {
                        pOut->x = elemOffsetInMip;
                        pOut->y = 0;
                    }
                    else
                    {
                        valid = FALSE;
                    }
                }
                else
                {
                    pOut->y = elemOffsetInMip / localOut.pitch;
                    pOut->x = elemOffsetInMip % localOut.pitch;
                }

                if ((pOut->slice >= pIn->numSlices)    ||
                    (pOut->mipId >= pIn->numMipLevels) ||
                    (pOut->x >= Max((pIn->unalignedWidth >> pOut->mipId), 1u))  ||
                    (pOut->y >= Max((pIn->unalignedHeight >> pOut->mipId), 1u)) ||
                    (IsTex3d(pIn->resourceType) &&
                     (FALSE == Valid3DMipSliceIdConstraint(pIn->numSlices,
                                                           pOut->mipId,
                                                           pOut->slice))))
                {
                    valid = FALSE;
                }
            }
        }
        else
        {
            valid = FALSE;
        }
    }

    if (valid == FALSE)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceCoordFromAddrTiled
*
*   @brief
*       Internal function to calculate coord from address for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrTiled(
     const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;

    ADDR_NOT_IMPLEMENTED();

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeBlockDimensionForSurf
*
*   @brief
*       Internal function to get block width/height/depth in element from surface input params.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf(
    UINT_32*         pWidth,
    UINT_32*         pHeight,
    UINT_32*         pDepth,
    UINT_32          bpp,
    UINT_32          numSamples,
    AddrResourceType resourceType,
    AddrSwizzleMode  swizzleMode) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (IsThick(resourceType, swizzleMode))
    {
        ComputeThickBlockDimension(pWidth, pHeight, pDepth, bpp, resourceType, swizzleMode);
    }
    else if (IsThin(resourceType, swizzleMode))
    {
        ComputeThinBlockDimension(pWidth, pHeight, pDepth, bpp, numSamples, resourceType, swizzleMode);
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        returnCode = ADDR_INVALIDPARAMS;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeThinBlockDimension
*
*   @brief
*       Internal function to get thin block width/height/depth in element from surface input params.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Lib::ComputeThinBlockDimension(
    UINT_32*         pWidth,
    UINT_32*         pHeight,
    UINT_32*         pDepth,
    UINT_32          bpp,
    UINT_32          numSamples,
    AddrResourceType resourceType,
    AddrSwizzleMode  swizzleMode) const
{
    ADDR_ASSERT(IsThin(resourceType, swizzleMode));

    // GFX9/GFX10 use different dimension amplifying logic: say for 128KB block + 1xAA + 1BPE, the dimension of thin
    // swizzle mode will be [256W * 512H] on GFX9 ASICs and [512W * 256H] on GFX10 ASICs. Since GFX10 is newer HWL so we
    // make its implementation into base class (in order to save future change on new HWLs)
    const UINT_32 log2BlkSize  = GetBlockSizeLog2(swizzleMode);
    const UINT_32 log2EleBytes = Log2(bpp >> 3);
    const UINT_32 log2Samples  = Log2(Max(numSamples, 1u));
    const UINT_32 log2NumEle   = log2BlkSize - log2EleBytes - log2Samples;

    // For "1xAA/4xAA cases" or "2xAA/8xAA + odd log2BlkSize cases", width == height or width == 2 * height;
    // For other cases, height == width or height == 2 * width
    const BOOL_32 widthPrecedent = ((log2Samples & 1) == 0) || ((log2BlkSize & 1) != 0);
    const UINT_32 log2Width      = (log2NumEle + (widthPrecedent ? 1 : 0)) / 2;

    *pWidth  = 1u << log2Width;
    *pHeight = 1u << (log2NumEle - log2Width);
    *pDepth  = 1;
}

/**
************************************************************************************************************************
*   Lib::ComputeBlockDimension
*
*   @brief
*       Internal function to get block width/height/depth in element without considering MSAA case
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeBlockDimension(
    UINT_32*         pWidth,
    UINT_32*         pHeight,
    UINT_32*         pDepth,
    UINT_32          bpp,
    AddrResourceType resourceType,
    AddrSwizzleMode  swizzleMode) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (IsThick(resourceType, swizzleMode))
    {
        ComputeThickBlockDimension(pWidth, pHeight, pDepth, bpp, resourceType, swizzleMode);
    }
    else if (IsThin(resourceType, swizzleMode))
    {
        ComputeThinBlockDimension(pWidth, pHeight, pDepth, bpp, 0, resourceType, swizzleMode);
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        returnCode = ADDR_INVALIDPARAMS;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeThickBlockDimension
*
*   @brief
*       Internal function to get block width/height/depth in element for thick swizzle mode
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Lib::ComputeThickBlockDimension(
    UINT_32*         pWidth,
    UINT_32*         pHeight,
    UINT_32*         pDepth,
    UINT_32          bpp,
    AddrResourceType resourceType,
    AddrSwizzleMode  swizzleMode) const
{
    ADDR_ASSERT(IsThick(resourceType, swizzleMode));

    const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
    const UINT_32 eleBytes                 = bpp >> 3;
    const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);

    ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0]));

    const UINT_32 log2blkSizeIn1KB = log2BlkSize - 10;
    const UINT_32 averageAmp       = log2blkSizeIn1KB / 3;
    const UINT_32 restAmp          = log2blkSizeIn1KB % 3;

    *pWidth  = Block1K_3d[microBlockSizeTableIndex].w << averageAmp;
    *pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2));
    *pDepth  = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0));
}

/**
************************************************************************************************************************
*   Lib::GetMipTailDim
*
*   @brief
*       Internal function to get out max dimension of first level in mip tail
*
*   @return
*       Max Width/Height/Depth value of the first mip fitted in mip tail
************************************************************************************************************************
*/
Dim3d Lib::GetMipTailDim(
    AddrResourceType  resourceType,
    AddrSwizzleMode   swizzleMode,
    UINT_32           blockWidth,
    UINT_32           blockHeight,
    UINT_32           blockDepth) const
{
    Dim3d   out         = {blockWidth, blockHeight, blockDepth};
    UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);

    if (IsThick(resourceType, swizzleMode))
    {
        UINT_32 dim = log2BlkSize % 3;

        if (dim == 0)
        {
            out.h >>= 1;
        }
        else if (dim == 1)
        {
            out.w >>= 1;
        }
        else
        {
            out.d >>= 1;
        }
    }
    else
    {
        ADDR_ASSERT(IsThin(resourceType, swizzleMode));

#if DEBUG
        // GFX9/GFX10 use different dimension shrinking logic for mipmap tail: say for 128KB block + 2BPE, the maximum
        // dimension of mipmap tail level will be [256W * 128H] on GFX9 ASICs and [128W * 256H] on GFX10 ASICs. Since
        // GFX10 is newer HWL so we make its implementation into base class, in order to save future change on new HWLs.
        // And assert log2BlkSize will always be an even value on GFX9, so we never need the logic wrapped by DEBUG...
        if ((log2BlkSize & 1) && (m_chipFamily == ADDR_CHIP_FAMILY_AI))
        {
            // Should never go here...
            ADDR_ASSERT_ALWAYS();

            out.h >>= 1;
        }
        else
#endif
        {
            out.w >>= 1;
        }
    }

    return out;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurface2DMicroBlockOffset
*
*   @brief
*       Internal function to calculate micro block (256B) offset from coord for 2D resource
*
*   @return
*       micro block (256B) offset for 2D resource
************************************************************************************************************************
*/
UINT_32 Lib::ComputeSurface2DMicroBlockOffset(
    const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const
{
    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));

    UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
    UINT_32 microBlockOffset = 0;
    if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode))
    {
        UINT_32 xBits = pIn->x << log2ElementBytes;
        microBlockOffset = (xBits & 0xf) | ((pIn->y & 0x3) << 4);
        if (log2ElementBytes < 3)
        {
            microBlockOffset |= (pIn->y & 0x4) << 4;
            if (log2ElementBytes == 0)
            {
                microBlockOffset |= (pIn->y & 0x8) << 4;
            }
            else
            {
                microBlockOffset |= (xBits & 0x10) << 3;
            }
        }
        else
        {
            microBlockOffset |= (xBits & 0x30) << 2;
        }
    }
    else if (IsDisplaySwizzle(pIn->resourceType, pIn->swizzleMode))
    {
        if (log2ElementBytes == 4)
        {
            microBlockOffset = (GetBit(pIn->x, 0) << 4) |
                               (GetBit(pIn->y, 0) << 5) |
                               (GetBit(pIn->x, 1) << 6) |
                               (GetBit(pIn->y, 1) << 7);
        }
        else
        {
            microBlockOffset = GetBits(pIn->x, 0, 3, log2ElementBytes)     |
                               GetBits(pIn->y, 1, 2, 3 + log2ElementBytes) |
                               GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) |
                               GetBits(pIn->y, 3, 1, 6 + log2ElementBytes);
            microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) |
                               (GetBit(pIn->y, 0) << 4) |
                               GetBits(microBlockOffset, 4, 3, 5);
        }
    }
    else if (IsRotateSwizzle(pIn->swizzleMode))
    {
        microBlockOffset = GetBits(pIn->y, 0, 3, log2ElementBytes) |
                           GetBits(pIn->x, 1, 2, 3 + log2ElementBytes) |
                           GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) |
                           GetBits(pIn->y, 3, 1, 6 + log2ElementBytes);
        microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) |
                           (GetBit(pIn->x, 0) << 4) |
                           GetBits(microBlockOffset, 4, 3, 5);
        if (log2ElementBytes == 3)
        {
           microBlockOffset = GetBits(microBlockOffset, 0, 6, 0) |
                              GetBits(pIn->x, 1, 2, 6);
        }
    }

    return microBlockOffset;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurface3DMicroBlockOffset
*
*   @brief
*       Internal function to calculate micro block (1KB) offset from coord for 3D resource
*
*   @return
*       micro block (1KB) offset for 3D resource
************************************************************************************************************************
*/
UINT_32 Lib::ComputeSurface3DMicroBlockOffset(
    const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const
{
    ADDR_ASSERT(IsThick(pIn->resourceType, pIn->swizzleMode));

    UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
    UINT_32 microBlockOffset = 0;
    if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode))
    {
        if (log2ElementBytes == 0)
        {
            microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1);
        }
        else if (log2ElementBytes == 1)
        {
            microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1);
        }
        else if (log2ElementBytes == 2)
        {
            microBlockOffset = ((pIn->y & 4) >> 2) | ((pIn->x & 4) >> 1);
        }
        else if (log2ElementBytes == 3)
        {
            microBlockOffset = (pIn->x & 6) >> 1;
        }
        else
        {
            microBlockOffset = pIn->x & 3;
        }

        microBlockOffset <<= 8;

        UINT_32 xBits = pIn->x << log2ElementBytes;
        microBlockOffset |= (xBits & 0xf) | ((pIn->y & 0x3) << 4) | ((pIn->slice & 0x3) << 6);
    }
    else if (IsZOrderSwizzle(pIn->swizzleMode))
    {
        UINT_32 xh, yh, zh;

        if (log2ElementBytes == 0)
        {
            microBlockOffset =
                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2);
            microBlockOffset = microBlockOffset | ((pIn->slice & 3) << 4) | ((pIn->x & 4) << 4);

            xh = pIn->x >> 3;
            yh = pIn->y >> 2;
            zh = pIn->slice >> 2;
        }
        else if (log2ElementBytes == 1)
        {
            microBlockOffset =
                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2);
            microBlockOffset = (microBlockOffset << 1) | ((pIn->slice & 3) << 5);

            xh = pIn->x >> 2;
            yh = pIn->y >> 2;
            zh = pIn->slice >> 2;
        }
        else if (log2ElementBytes == 2)
        {
            microBlockOffset =
                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->slice & 1) << 3);
            microBlockOffset = (microBlockOffset << 2) | ((pIn->y & 2) << 5);

            xh = pIn->x >> 2;
            yh = pIn->y >> 2;
            zh = pIn->slice >> 1;
        }
        else if (log2ElementBytes == 3)
        {
            microBlockOffset =
                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2) | ((pIn->x & 2) << 2);
            microBlockOffset <<= 3;

            xh = pIn->x >> 2;
            yh = pIn->y >> 1;
            zh = pIn->slice >> 1;
        }
        else
        {
            microBlockOffset =
                (((pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2)) << 4);

            xh = pIn->x >> 1;
            yh = pIn->y >> 1;
            zh = pIn->slice >> 1;
        }

        microBlockOffset |= ((MortonGen3d(xh, yh, zh, 1) << 7) & 0x380);
    }

    return microBlockOffset;
}

/**
************************************************************************************************************************
*   Lib::GetPipeXorBits
*
*   @brief
*       Internal function to get bits number for pipe/se xor operation
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
UINT_32 Lib::GetPipeXorBits(
    UINT_32 macroBlockBits) const
{
    ADDR_ASSERT(macroBlockBits >= m_pipeInterleaveLog2);

    // Total available xor bits
    UINT_32 xorBits = macroBlockBits - m_pipeInterleaveLog2;

    // Pipe/Se xor bits
    UINT_32 pipeBits = Min(xorBits, m_pipesLog2 + m_seLog2);

    return pipeBits;
}

/**
************************************************************************************************************************
*   Lib::Addr2GetPreferredSurfaceSetting
*
*   @brief
*       Internal function to get suggested surface information for cliet to use
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::Addr2GetPreferredSurfaceSetting(
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT)) ||
         (pOut->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlGetPreferredSurfaceSetting(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::GetPossibleSwizzleModes
*
*   @brief
*       Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetPossibleSwizzleModes(
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
{
    return HwlGetPossibleSwizzleModes(pIn, pOut);
}

/**
************************************************************************************************************************
*   Lib::GetAllowedBlockSet
*
*   @brief
*       Returns the set of allowed block sizes given the allowed swizzle modes and resource type
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetAllowedBlockSet(
    ADDR2_SWMODE_SET allowedSwModeSet,
    AddrResourceType rsrcType,
    ADDR2_BLOCK_SET* pAllowedBlockSet) const
{
    return HwlGetAllowedBlockSet(allowedSwModeSet, rsrcType, pAllowedBlockSet);
}

/**
************************************************************************************************************************
*   Lib::GetAllowedSwSet
*
*   @brief
*       Returns the set of allowed swizzle types given the allowed swizzle modes
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetAllowedSwSet(
    ADDR2_SWMODE_SET  allowedSwModeSet,
    ADDR2_SWTYPE_SET* pAllowedSwSet) const
{
    return HwlGetAllowedSwSet(allowedSwModeSet, pAllowedSwSet);
}

/**
************************************************************************************************************************
*   Lib::ComputeBlock256Equation
*
*   @brief
*       Compute equation for block 256B
*
*   @return
*       If equation computed successfully
*
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeBlock256Equation(
    AddrResourceType rsrcType,
    AddrSwizzleMode swMode,
    UINT_32 elementBytesLog2,
    ADDR_EQUATION* pEquation) const
{
    ADDR_E_RETURNCODE ret;

    if (IsBlock256b(swMode))
    {
        ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        ret = ADDR_INVALIDPARAMS;
    }

    return ret;
}

/**
************************************************************************************************************************
*   Lib::ComputeThinEquation
*
*   @brief
*       Compute equation for 2D/3D resource which use THIN mode
*
*   @return
*       If equation computed successfully
*
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeThinEquation(
    AddrResourceType rsrcType,
    AddrSwizzleMode swMode,
    UINT_32 elementBytesLog2,
    ADDR_EQUATION* pEquation) const
{
    ADDR_E_RETURNCODE ret;

    if (IsThin(rsrcType, swMode))
    {
        ret = HwlComputeThinEquation(rsrcType, swMode, elementBytesLog2, pEquation);
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        ret = ADDR_INVALIDPARAMS;
    }

    return ret;
}

/**
************************************************************************************************************************
*   Lib::ComputeThickEquation
*
*   @brief
*       Compute equation for 3D resource which use THICK mode
*
*   @return
*       If equation computed successfully
*
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeThickEquation(
    AddrResourceType rsrcType,
    AddrSwizzleMode swMode,
    UINT_32 elementBytesLog2,
    ADDR_EQUATION* pEquation) const
{
    ADDR_E_RETURNCODE ret;

    if (IsThick(rsrcType, swMode))
    {
        ret = HwlComputeThickEquation(rsrcType, swMode, elementBytesLog2, pEquation);
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        ret = ADDR_INVALIDPARAMS;
    }

    return ret;
}

/**
************************************************************************************************************************
*   Lib::ComputeQbStereoInfo
*
*   @brief
*       Get quad buffer stereo information
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Lib::ComputeQbStereoInfo(
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut    ///< [in,out] updated pOut+pStereoInfo
    ) const
{
    ADDR_ASSERT(pOut->bpp >= 8);
    ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);

    // Save original height
    pOut->pStereoInfo->eyeHeight = pOut->height;

    // Right offset
    pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);

    // Double height
    pOut->height <<= 1;

    ADDR_ASSERT(pOut->height <= MaxSurfaceHeight);

    pOut->pixelHeight <<= 1;

    // Double size
    pOut->surfSize  <<= 1;
    pOut->sliceSize <<= 1;
}

/**
************************************************************************************************************************
*   Lib::FilterInvalidEqSwizzleMode
*
*   @brief
*       Filter out swizzle mode(s) if it doesn't have valid equation index
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Lib::FilterInvalidEqSwizzleMode(
    ADDR2_SWMODE_SET& allowedSwModeSet,
    AddrResourceType  resourceType,
    UINT_32           elemLog2,
    UINT_32           maxComponents
    ) const
{
    if (resourceType != ADDR_RSRC_TEX_1D)
    {
        UINT_32       allowedSwModeSetVal = allowedSwModeSet.value;
        const UINT_32 rsrcTypeIdx         = static_cast<UINT_32>(resourceType) - 1;
        UINT_32       validSwModeSet      = allowedSwModeSetVal;

        for (UINT_32 swModeIdx = 1; validSwModeSet != 0; swModeIdx++)
        {
            if (validSwModeSet & 1)
            {
                UINT_32 equation = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
                if (equation == ADDR_INVALID_EQUATION_INDEX)
                {
                    allowedSwModeSetVal &= ~(1u << swModeIdx);
                }
                else if (m_equationTable[equation].numBitComponents > maxComponents)
                {
                    allowedSwModeSetVal &= ~(1u << swModeIdx);
                }
            }

            validSwModeSet >>= 1;
        }

        // Only apply the filtering if at least one valid swizzle mode remains
        if (allowedSwModeSetVal != 0)
        {
            allowedSwModeSet.value = allowedSwModeSetVal;
        }
    }
}

#if DEBUG
/**
************************************************************************************************************************
*   Lib::ValidateStereoInfo
*
*   @brief
*       Validate stereo info by checking a few typical cases
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Lib::ValidateStereoInfo(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*  pIn,   ///< [in] input structure
    const ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut   ///< [in] output structure
    ) const
{
    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT addrIn = {};
    addrIn.size            = sizeof(addrIn);
    addrIn.swizzleMode     = pIn->swizzleMode;
    addrIn.flags           = pIn->flags;
    addrIn.flags.qbStereo  = 0;
    addrIn.resourceType    = pIn->resourceType;
    addrIn.bpp             = pIn->bpp;
    addrIn.unalignedWidth  = pIn->width;
    addrIn.numSlices       = pIn->numSlices;
    addrIn.numMipLevels    = pIn->numMipLevels;
    addrIn.numSamples      = pIn->numSamples;
    addrIn.numFrags        = pIn->numFrags;

    // Call Addr2ComputePipeBankXor() and validate different pbXor value if necessary...
    const UINT_32 pbXor = 0;

    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT addrOut = {};
    addrOut.size = sizeof(addrOut);

    // Make the array to be {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096} for full test
    const UINT_32 TestCoord[] = {0};

    for (UINT_32 xIdx = 0; xIdx < sizeof(TestCoord) / sizeof(TestCoord[0]); xIdx++)
    {
        if (TestCoord[xIdx] < pIn->width)
        {
            addrIn.x = TestCoord[xIdx];

            for (UINT_32 yIdx = 0; yIdx  < sizeof(TestCoord) / sizeof(TestCoord[0]); yIdx++)
            {
                if (TestCoord[yIdx] < pIn->height)
                {
                    addrIn.y               = TestCoord[yIdx] + pOut->pStereoInfo->eyeHeight;
                    addrIn.pipeBankXor     = pbXor ^ pOut->pStereoInfo->rightSwizzle;
                    addrIn.unalignedHeight = pIn->height + pOut->pStereoInfo->eyeHeight;

                    ADDR_E_RETURNCODE ret = ComputeSurfaceAddrFromCoord(&addrIn, &addrOut);
                    ADDR_ASSERT(ret == ADDR_OK);

                    const UINT_64 rightEyeOffsetFromBase = addrOut.addr;

                    addrIn.y               = TestCoord[yIdx];
                    addrIn.pipeBankXor     = pbXor;
                    addrIn.unalignedHeight = pIn->height;

                    ret = ComputeSurfaceAddrFromCoord(&addrIn, &addrOut);
                    ADDR_ASSERT(ret == ADDR_OK);

                    const UINT_64 rightEyeOffsetRelative = addrOut.addr;

                    ADDR_ASSERT(rightEyeOffsetFromBase == rightEyeOffsetRelative + pOut->pStereoInfo->rightOffset);
                }
            }
        }
    }
}
#endif

} // V2
} // Addr
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib2.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
************************************************************************************************************************
* @file  addrlib2.h
* @brief Contains the Addr::V2::Lib class definition.
************************************************************************************************************************
*/

#ifndef __ADDR2_LIB2_H__
#define __ADDR2_LIB2_H__

#include "addrlib.h"

namespace rocr {
namespace Addr
{
namespace V2
{

/**
************************************************************************************************************************
* @brief Flags for SwizzleModeTable
************************************************************************************************************************
*/
union SwizzleModeFlags
{
    struct
    {
        // Swizzle mode
        UINT_32 isLinear        : 1;    // Linear

        // Block size
        UINT_32 is256b          : 1;    // Block size is 256B
        UINT_32 is4kb           : 1;    // Block size is 4KB
        UINT_32 is64kb          : 1;    // Block size is 64KB
        UINT_32 isVar           : 1;    // Block size is variable

        UINT_32 isZ             : 1;    // Z order swizzle mode
        UINT_32 isStd           : 1;    // Standard swizzle mode
        UINT_32 isDisp          : 1;    // Display swizzle mode
        UINT_32 isRot           : 1;    // Rotate swizzle mode

        // XOR mode
        UINT_32 isXor           : 1;    // XOR after swizzle if set

        UINT_32 isT             : 1;    // T mode

        // GFX10
        UINT_32 isRtOpt         : 1;    // mode opt for render target

        UINT_32 reserved        : 20;   // Reserved bits
    };

    UINT_32 u32All;
};

struct Dim2d
{
    UINT_32 w;
    UINT_32 h;
};

struct Dim3d
{
    UINT_32 w;
    UINT_32 h;
    UINT_32 d;
};

// Macro define resource block type
enum AddrBlockType
{
    AddrBlockLinear    = 0, // Resource uses linear swizzle mode
    AddrBlockMicro     = 1, // Resource uses 256B block
    AddrBlockThin4KB   = 2, // Resource uses thin 4KB block
    AddrBlockThick4KB  = 3, // Resource uses thick 4KB block
    AddrBlockThin64KB  = 4, // Resource uses thin 64KB block
    AddrBlockThick64KB = 5, // Resource uses thick 64KB block
    AddrBlockThinVar   = 6, // Resource uses thin var block
    AddrBlockThickVar  = 7, // Resource uses thick var block
    AddrBlockMaxTiledType,

    AddrBlockThin256KB  = AddrBlockThinVar,
    AddrBlockThick256KB = AddrBlockThickVar,
};

enum AddrSwSet
{
    AddrSwSetZ = 1 << ADDR_SW_Z,
    AddrSwSetS = 1 << ADDR_SW_S,
    AddrSwSetD = 1 << ADDR_SW_D,
    AddrSwSetR = 1 << ADDR_SW_R,

    AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
};

const UINT_32 Size256 = 256u;
const UINT_32 Size4K  = 4096u;
const UINT_32 Size64K = 65536u;

const UINT_32 Log2Size256 = 8u;
const UINT_32 Log2Size4K  = 12u;
const UINT_32 Log2Size64K = 16u;

/**
************************************************************************************************************************
* @brief Bit setting for swizzle pattern
************************************************************************************************************************
*/
union ADDR_BIT_SETTING
{
    struct
    {
        UINT_16 x;
        UINT_16 y;
        UINT_16 z;
        UINT_16 s;
    };
    UINT_64 value;
};

/**
************************************************************************************************************************
* @brief Swizzle pattern information
************************************************************************************************************************
*/
// Accessed by index representing the logbase2 of (8bpp/16bpp/32bpp/64bpp/128bpp)
// contains the indices which map to 2D arrays SW_PATTERN_NIBBLE[0-9] which contain sections of an index equation. They are dependant on pipe# and bpe #
struct ADDR_SW_PATINFO
{
    UINT_8  maxItemCount;
    UINT_8  nibble01Idx;
    UINT_16 nibble2Idx;
    UINT_16 nibble3Idx;
    UINT_8  nibble4Idx;
};

/**
************************************************************************************************************************
*   InitBit
*
*   @brief
*       Initialize bit setting value via a return value
************************************************************************************************************************
*/
#define InitBit(c, index) (1ull << ((c << 4) + index))

const UINT_64 X0  = InitBit(0,  0);
const UINT_64 X1  = InitBit(0,  1);
const UINT_64 X2  = InitBit(0,  2);
const UINT_64 X3  = InitBit(0,  3);
const UINT_64 X4  = InitBit(0,  4);
const UINT_64 X5  = InitBit(0,  5);
const UINT_64 X6  = InitBit(0,  6);
const UINT_64 X7  = InitBit(0,  7);
const UINT_64 X8  = InitBit(0,  8);
const UINT_64 X9  = InitBit(0,  9);
const UINT_64 X10 = InitBit(0, 10);
const UINT_64 X11 = InitBit(0, 11);

const UINT_64 Y0  = InitBit(1,  0);
const UINT_64 Y1  = InitBit(1,  1);
const UINT_64 Y2  = InitBit(1,  2);
const UINT_64 Y3  = InitBit(1,  3);
const UINT_64 Y4  = InitBit(1,  4);
const UINT_64 Y5  = InitBit(1,  5);
const UINT_64 Y6  = InitBit(1,  6);
const UINT_64 Y7  = InitBit(1,  7);
const UINT_64 Y8  = InitBit(1,  8);
const UINT_64 Y9  = InitBit(1,  9);
const UINT_64 Y10 = InitBit(1, 10);
const UINT_64 Y11 = InitBit(1, 11);

const UINT_64 Z0  = InitBit(2,  0);
const UINT_64 Z1  = InitBit(2,  1);
const UINT_64 Z2  = InitBit(2,  2);
const UINT_64 Z3  = InitBit(2,  3);
const UINT_64 Z4  = InitBit(2,  4);
const UINT_64 Z5  = InitBit(2,  5);
const UINT_64 Z6  = InitBit(2,  6);
const UINT_64 Z7  = InitBit(2,  7);
const UINT_64 Z8  = InitBit(2,  8);

const UINT_64 S0  = InitBit(3,  0);
const UINT_64 S1  = InitBit(3,  1);
const UINT_64 S2  = InitBit(3,  2);

/**
************************************************************************************************************************
* @brief This class contains asic independent address lib functionalities
************************************************************************************************************************
*/
class Lib : public Addr::Lib
{
public:
    virtual ~Lib();

    static Lib* GetLib(
        ADDR_HANDLE hLib);

    //
    // Interface stubs
    //

    // For data surface
    ADDR_E_RETURNCODE ComputeSurfaceInfo(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;

    // For HTile
    ADDR_E_RETURNCODE ComputeHtileInfo(
        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);

    // For CMask
    ADDR_E_RETURNCODE ComputeCmaskInfo(
        const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
        const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*      pOut) const;

    // For FMask
    ADDR_E_RETURNCODE ComputeFmaskInfo(
        const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn,
        ADDR2_COMPUTE_FMASK_INFO_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
        const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
        const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*      pOut) const;

    // For DCC key
    ADDR_E_RETURNCODE ComputeDccInfo(
        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);

    // Misc
    ADDR_E_RETURNCODE ComputePipeBankXor(
        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeSlicePipeBankXor(
        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern(
        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeNonBlockCompressedView(
        const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
        ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut);

    ADDR_E_RETURNCODE Addr2GetPreferredSurfaceSetting(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE GetPossibleSwizzleModes(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;

    virtual BOOL_32 IsValidDisplaySwizzleMode(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTIMPLEMENTED;
    }

    ADDR_E_RETURNCODE GetAllowedBlockSet(
        ADDR2_SWMODE_SET allowedSwModeSet,
        AddrResourceType rsrcType,
        ADDR2_BLOCK_SET* pAllowedBlockSet) const;

    ADDR_E_RETURNCODE GetAllowedSwSet(
        ADDR2_SWMODE_SET  allowedSwModeSet,
        ADDR2_SWTYPE_SET* pAllowedSwSet) const;

protected:
    Lib();  // Constructor is protected
    Lib(const Client* pClient);

    static const UINT_32 MaxNumOfBpp = 5;
    static const UINT_32 MaxNumOfBppCMask = 4;
    static const UINT_32 MaxNumOfAA  = 4;

    static const Dim2d Block256_2d[MaxNumOfBpp];
    static const Dim3d Block1K_3d[MaxNumOfBpp];

    static const UINT_32 PrtAlignment = 64 * 1024;
    static const UINT_32 MaxMacroBits = 20;

    static const UINT_32 MaxMipLevels = 16;

    BOOL_32 IsValidSwMode(AddrSwizzleMode swizzleMode) const
    {
        return (m_swizzleModeTable[swizzleMode].u32All != 0);
    }

    // Checking block size
    BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is256b;
    }

    BOOL_32 IsBlock4kb(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is4kb;
    }

    BOOL_32 IsBlock64kb(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is64kb;
    }

    BOOL_32 IsBlockVariable(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isVar;
    }

    // Checking swizzle mode
    BOOL_32 IsLinear(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isLinear;
    }

    BOOL_32 IsRtOptSwizzle(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isRtOpt;
    }

    BOOL_32 IsZOrderSwizzle(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isZ;
    }

    BOOL_32 IsStandardSwizzle(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isStd;
    }

    BOOL_32 IsDisplaySwizzle(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isDisp;
    }

    BOOL_32 IsRotateSwizzle(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isRot;
    }

    BOOL_32 IsStandardSwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
    {
        return HwlIsStandardSwizzle(resourceType, swizzleMode);
    }

    BOOL_32 IsDisplaySwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
    {
        return HwlIsDisplaySwizzle(resourceType, swizzleMode);
    }

    BOOL_32 IsXor(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isXor;
    }

    BOOL_32 IsPrt(AddrSwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isT;
    }

    BOOL_32 IsNonPrtXor(AddrSwizzleMode swizzleMode) const
    {
        return (IsXor(swizzleMode) && (IsPrt(swizzleMode) == FALSE));
    }

    // Checking resource type
    static BOOL_32 IsTex1d(AddrResourceType resourceType)
    {
        return (resourceType == ADDR_RSRC_TEX_1D);
    }

    static BOOL_32 IsTex2d(AddrResourceType resourceType)
    {
        return (resourceType == ADDR_RSRC_TEX_2D);
    }

    static BOOL_32 IsTex3d(AddrResourceType resourceType)
    {
        return (resourceType == ADDR_RSRC_TEX_3D);
    }

    BOOL_32 IsThick(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
    {
        return HwlIsThick(resourceType, swizzleMode);
    }

    BOOL_32 IsThin(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
    {
        return HwlIsThin(resourceType, swizzleMode);
    }

    UINT_32 GetBlockSizeLog2(AddrSwizzleMode swizzleMode) const
    {
        UINT_32 blockSizeLog2 = 0;

        if (IsBlock256b(swizzleMode) || IsLinear(swizzleMode))
        {
            blockSizeLog2 = 8;
        }
        else if (IsBlock4kb(swizzleMode))
        {
            blockSizeLog2 = 12;
        }
        else if (IsBlock64kb(swizzleMode))
        {
            blockSizeLog2 = 16;
        }
        else if (IsBlockVariable(swizzleMode) && (m_blockVarSizeLog2 != 0))
        {
            blockSizeLog2 = m_blockVarSizeLog2;
        }
        else
        {
            ADDR_ASSERT_ALWAYS();
        }

        return blockSizeLog2;
    }

    UINT_32 GetBlockSize(AddrSwizzleMode swizzleMode) const
    {
        return (1 << GetBlockSizeLog2(swizzleMode));
    }

    static UINT_32 GetFmaskBpp(UINT_32 sample, UINT_32 frag)
    {
        sample = (sample == 0) ? 1 : sample;
        frag   = (frag   == 0) ? sample : frag;

        UINT_32 fmaskBpp = QLog2(frag);

        if (sample > frag)
        {
            fmaskBpp++;
        }

        if (fmaskBpp == 3)
        {
            fmaskBpp = 4;
        }

        fmaskBpp = Max(8u, fmaskBpp * sample);

        return fmaskBpp;
    }

    virtual BOOL_32 HwlIsStandardSwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        ADDR_NOT_IMPLEMENTED();
        return FALSE;
    }

    virtual BOOL_32 HwlIsDisplaySwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        ADDR_NOT_IMPLEMENTED();
        return FALSE;
    }

    virtual BOOL_32 HwlIsThin(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        ADDR_NOT_IMPLEMENTED();
        return FALSE;
    }

    virtual BOOL_32 HwlIsThick(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        ADDR_NOT_IMPLEMENTED();
        return FALSE;
    }

    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
        const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,
        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
        const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,
        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
        const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,
        ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlSupportComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual VOID HwlComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut)
    {
        ADDR_NOT_IMPLEMENTED();
    }

    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual UINT_32 HwlGetEquationIndex(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_INVALID_EQUATION_INDEX;
    }

    UINT_32 GetEquationIndex(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
    {
        return HwlGetEquationIndex(pIn, pOut);
    }

    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView(
        const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
        ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlGetAllowedBlockSet(
        ADDR2_SWMODE_SET allowedSwModeSet,
        AddrResourceType rsrcType,
        ADDR2_BLOCK_SET* pAllowedBlockSet) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTIMPLEMENTED;
    }

    virtual ADDR_E_RETURNCODE HwlGetAllowedSwSet(
        ADDR2_SWMODE_SET  allowedSwModeSet,
        ADDR2_SWTYPE_SET* pAllowedSwSet) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTIMPLEMENTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTIMPLEMENTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTIMPLEMENTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTIMPLEMENTED;
    }

    ADDR_E_RETURNCODE ComputeBlock256Equation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const;

    ADDR_E_RETURNCODE ComputeThinEquation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const;

    ADDR_E_RETURNCODE ComputeThickEquation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const;

    ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    ADDR_E_RETURNCODE ComputeSurfaceInfoLinear(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceInfoTiled(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrLinear(
        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrTiled(
        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;

    UINT_32 ComputeSurface2DMicroBlockOffset(
        const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;

    UINT_32 ComputeSurface3DMicroBlockOffset(
        const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;

    // Misc
    ADDR_E_RETURNCODE ComputeBlockDimensionForSurf(
        UINT_32*         pWidth,
        UINT_32*         pHeight,
        UINT_32*         pDepth,
        UINT_32          bpp,
        UINT_32          numSamples,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const;

    ADDR_E_RETURNCODE ComputeBlockDimension(
        UINT_32*         pWidth,
        UINT_32*         pHeight,
        UINT_32*         pDepth,
        UINT_32          bpp,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const;

    virtual VOID ComputeThinBlockDimension(
        UINT_32*         pWidth,
        UINT_32*         pHeight,
        UINT_32*         pDepth,
        UINT_32          bpp,
        UINT_32          numSamples,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const;

    VOID ComputeThickBlockDimension(
        UINT_32*         pWidth,
        UINT_32*         pHeight,
        UINT_32*         pDepth,
        UINT_32          bpp,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const;

    static UINT_64 ComputePadSize(
        const Dim3d*      pBlkDim,
        UINT_32           width,
        UINT_32           height,
        UINT_32           numSlices,
        Dim3d*            pPadDim)
    {
        pPadDim->w = PowTwoAlign(width ,pBlkDim->w);
        pPadDim->h = PowTwoAlign(height ,pBlkDim->h);
        pPadDim->d = PowTwoAlign(numSlices, pBlkDim->d);
        return static_cast<UINT_64>(pPadDim->w) * pPadDim->h * pPadDim->d;
    }

    static ADDR_E_RETURNCODE ExtractPipeBankXor(
        UINT_32  pipeBankXor,
        UINT_32  bankBits,
        UINT_32  pipeBits,
        UINT_32* pBankX,
        UINT_32* pPipeX);

    static BOOL_32 Valid3DMipSliceIdConstraint(
        UINT_32 numSlices,
        UINT_32 mipId,
        UINT_32 slice)
    {
        return (Max((numSlices >> mipId), 1u) > slice);
    }

    Dim3d GetMipTailDim(
        AddrResourceType  resourceType,
        AddrSwizzleMode   swizzleMode,
        UINT_32           blockWidth,
        UINT_32           blockHeight,
        UINT_32           blockDepth) const;

    static BOOL_32 IsLocalHeap(AddrResrouceLocation resourceType)
    {
        return ((resourceType == ADDR_RSRC_LOC_LOCAL) ||
                (resourceType == ADDR_RSRC_LOC_INVIS));
    }

    static BOOL_32 IsInvisibleHeap(AddrResrouceLocation resourceType)
    {
        return (resourceType == ADDR_RSRC_LOC_INVIS);
    }

    static BOOL_32 IsNonlocalHeap(AddrResrouceLocation resourceType)
    {
        return ((resourceType == ADDR_RSRC_LOC_USWC) ||
                (resourceType == ADDR_RSRC_LOC_CACHED));
    }

    UINT_32 GetPipeLog2ForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
    {
        UINT_32 numPipeLog2 = pipeAligned ? Min(m_pipesLog2 + m_seLog2, 5u) : 0;

        if (IsXor(swizzleMode))
        {
            UINT_32 maxPipeLog2 = GetBlockSizeLog2(swizzleMode) - m_pipeInterleaveLog2;

            numPipeLog2 = Min(numPipeLog2, maxPipeLog2);
        }

        return numPipeLog2;
    }

    UINT_32 GetPipeNumForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
    {
        return (1 << GetPipeLog2ForMetaAddressing(pipeAligned, swizzleMode));
    }

    VOID VerifyMipLevelInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
    {
#if DEBUG
        if (pIn->numMipLevels > 1)
        {
            UINT_32 actualMipLevels = 1;
            switch (pIn->resourceType)
            {
                case ADDR_RSRC_TEX_3D:
                    // Fall through to share 2D case
                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1);
                case ADDR_RSRC_TEX_2D:
                    // Fall through to share 1D case
                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1);
                case ADDR_RSRC_TEX_1D:
                    // Base 1D case
                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1);
                    break;
                default:
                    ADDR_ASSERT_ALWAYS();
                    break;
            }
            // Client pass wrong number of MipLevels to addrlib and result will be bad.
            // Not sure if we should fail this calling instead of putting an assertion here.
            ADDR_ASSERT(actualMipLevels >= pIn->numMipLevels);
        }
#endif
    }

    ADDR_E_RETURNCODE ApplyCustomerPipeBankXor(
        AddrSwizzleMode swizzleMode,
        UINT_32         pipeBankXor,
        UINT_32         bankBits,
        UINT_32         pipeBits,
        UINT_32*        pBlockOffset) const
    {
        ADDR_E_RETURNCODE returnCode = ADDR_OK;

        if (IsXor(swizzleMode))
        {
            // Apply driver set bankPipeXor
            UINT_32 bankX = 0;
            UINT_32 pipeX = 0;
            returnCode = ExtractPipeBankXor(pipeBankXor, bankBits, pipeBits, &bankX, &pipeX);
            *pBlockOffset ^= (pipeX << m_pipeInterleaveLog2);
            *pBlockOffset ^= (bankX << (m_pipeInterleaveLog2 + pipeBits));
        }

        return returnCode;
    }

    UINT_32 GetPipeXorBits(UINT_32 macroBlockBits) const;

    ADDR_E_RETURNCODE ApplyCustomizedPitchHeight(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        UINT_32                                 elementBytes,
        UINT_32                                 pitchAlignInElement,
        UINT_32*                                pPitch,
        UINT_32*                                pHeight) const;

    VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;

    VOID FilterInvalidEqSwizzleMode(
        ADDR2_SWMODE_SET& allowedSwModeSet,
        AddrResourceType  resourceType,
        UINT_32           elemLog2,
        UINT_32           maxComponents) const;

#if DEBUG
    VOID ValidateStereoInfo(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        const ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
#endif

    UINT_32 m_se;                       ///< Number of shader engine
    UINT_32 m_rbPerSe;                  ///< Number of render backend per shader engine
    UINT_32 m_maxCompFrag;              ///< Number of max compressed fragment

    UINT_32 m_banksLog2;                ///< Number of bank Log2
    UINT_32 m_pipesLog2;                ///< Number of pipe per shader engine Log2
    UINT_32 m_seLog2;                   ///< Number of shader engine Log2
    UINT_32 m_rbPerSeLog2;              ///< Number of render backend per shader engine Log2
    UINT_32 m_maxCompFragLog2;          ///< Number of max compressed fragment Log2

    UINT_32 m_pipeInterleaveLog2;       ///< Log2 of pipe interleave bytes

    UINT_32 m_blockVarSizeLog2;         ///< Log2 of block var size

    SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE];  ///< Swizzle mode table

    // Max number of swizzle mode supported for equation
    static const UINT_32    MaxSwModeType = 32;
    // Max number of resource type (2D/3D) supported for equation
    static const UINT_32    MaxRsrcType = 2;
    // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
    static const UINT_32    MaxElementBytesLog2  = 5;
    // Almost all swizzle mode + resource type support equation
    static const UINT_32    EquationTableSize = MaxElementBytesLog2 * MaxSwModeType * MaxRsrcType;
    // Equation table
    ADDR_EQUATION           m_equationTable[EquationTableSize];

    // Number of equation entries in the table
    UINT_32                 m_numEquations;
    // Equation lookup table according to bpp and tile index
    UINT_32                 m_equationLookupTable[MaxRsrcType][MaxSwModeType][MaxElementBytesLog2];

private:
    // Disallow the copy constructor
    Lib(const Lib& a);

    // Disallow the assignment operator
    Lib& operator=(const Lib& a);
};

} // V2
} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib3.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
************************************************************************************************************************
* @file  addrlib3.cpp
* @brief Contains the implementation for the AddrLib3 base class.
************************************************************************************************************************
*/

#include "addrinterface.h"
#include "addrlib3.h"
#include "addrcommon.h"

namespace rocr {
namespace Addr
{
namespace V3
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Static Const Member
////////////////////////////////////////////////////////////////////////////////////////////////////

const Dim2d         Lib::Block256_2d[] = {{16, 16}, {16, 8}, {8, 8}, {8, 4}, {4, 4}};

const ADDR_EXTENT3D Lib::Block1K_3d[]  = {{16, 8, 8}, {8, 8, 8}, {8, 8, 4}, {8, 4, 4}, {4, 4, 4}};

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Constructor/Destructor
////////////////////////////////////////////////////////////////////////////////////////////////////

/**
************************************************************************************************************************
*   Lib::Lib
*
*   @brief
*       Constructor for the Addr::V3::Lib class
*
************************************************************************************************************************
*/
Lib::Lib()
    :
    Addr::Lib(),
    m_pipesLog2(0),
    m_pipeInterleaveLog2(0),
    m_numEquations(0)
{
    Init();
}

/**
************************************************************************************************************************
*   Lib::Lib
*
*   @brief
*       Constructor for the AddrLib3 class with hClient as parameter
*
************************************************************************************************************************
*/
Lib::Lib(
    const Client* pClient)
    :
    Addr::Lib(pClient),
    m_pipesLog2(0),
    m_pipeInterleaveLog2(0),
    m_numEquations(0)
{
    Init();
}

/**
************************************************************************************************************************
*   Lib::Init
*
*   @brief
*       Initialization of class
*
************************************************************************************************************************
*/
void Lib::Init()
{
    memset(m_equationTable, 0, sizeof(m_equationTable));

    // There is no equation table entry for linear, so start at the "next" swizzle mode entry.
    for (UINT_32  swizzleModeIdx = ADDR3_LINEAR + 1; swizzleModeIdx < ADDR3_MAX_TYPE; swizzleModeIdx++)
    {
        for (UINT_32  msaaRateIdx = 0; msaaRateIdx < MaxMsaaRateLog2; msaaRateIdx++)
        {
            for (UINT_32  log2BytesIdx = 0; log2BytesIdx < MaxElementBytesLog2; log2BytesIdx++)
            {
                SetEquationTableEntry(static_cast<Addr3SwizzleMode>(swizzleModeIdx),
                                      msaaRateIdx,
                                      log2BytesIdx,
                                      ADDR_INVALID_EQUATION_INDEX);
            }
        }
    }
}

/**
************************************************************************************************************************
*   Lib::~Lib
*
*   @brief
*       Destructor for the AddrLib2 class
*
************************************************************************************************************************
*/
Lib::~Lib()
{
}

/**
************************************************************************************************************************
*   Lib::GetLib
*
*   @brief
*       Get Addr::V3::Lib pointer
*
*   @return
*      An Addr::V2::Lib class pointer
************************************************************************************************************************
*/
Lib* Lib::GetLib(
    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
{
    Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);

    return static_cast<Lib*>(hLib);
}

/**
************************************************************************************************************************
*   Lib::GetBlockSize
*
*   @brief
*       Returns the byte size of a block for the swizzle mode.
*
*   @return
*       Byte size of the block, zero if swizzle mode is invalid.
************************************************************************************************************************
*/
UINT_32  Lib::GetBlockSize(
    Addr3SwizzleMode  swizzleMode,
    BOOL_32           forPitch
    ) const
{
    return  (1 << GetBlockSizeLog2(swizzleMode, forPitch));
}

/**
************************************************************************************************************************
*   Lib::GetBlockSizeLog2
*
*   @brief
*       Returns the log2 of the byte size of a block for the swizzle mode.
*
*   @return
*       Byte size of the block, zero if swizzle mode is invalid.
************************************************************************************************************************
*/
UINT_32  Lib::GetBlockSizeLog2(
    Addr3SwizzleMode  swizzleMode,
    BOOL_32           forPitch
    ) const
{
    UINT_32  blockSize = 0;

    switch (swizzleMode)
    {
        case ADDR3_256B_2D:
            blockSize = 8;
            break;
        case ADDR3_4KB_2D:
        case ADDR3_4KB_3D:
            blockSize = 12;
            break;
        case ADDR3_64KB_2D:
        case ADDR3_64KB_3D:
            blockSize = 16;
            break;
        case ADDR3_256KB_2D:
        case ADDR3_256KB_3D:
            blockSize = 18;
            break;
        case ADDR3_LINEAR:
            blockSize = (forPitch ? 7 : 8);
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            break;
    }

    return  blockSize;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceInfo
*
*   @brief
*       Interface function stub of ComputeSurfaceInfo.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR3_COMPUTE_SURFACE_INFO_INPUT)) ||
            (pOut->size != sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    // Adjust incoming parameters.
    ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
    localIn.width        = Max(pIn->width, 1u);
    localIn.height       = Max(pIn->height, 1u);
    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
    localIn.numSlices    = Max(pIn->numSlices, 1u);
    localIn.numSamples   = Max(pIn->numSamples, 1u);

    UINT_32  expandX  = 1;
    UINT_32  expandY  = 1;
    ElemMode elemMode = ADDR_UNCOMPRESSED;

    if (returnCode == ADDR_OK)
    {
        // Set format to INVALID will skip this conversion
        if (localIn.format != ADDR_FMT_INVALID)
        {
            // Get compression/expansion factors and element mode which indicates compression/expansion
            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
                                                        &elemMode,
                                                        &expandX,
                                                        &expandY);

            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
            // restrictions are different.
            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
            // but we use this flag to skip RestoreSurfaceInfo below
            if ((elemMode == ADDR_EXPANDED) && (expandX > 1))
            {
                ADDR_ASSERT(IsLinear(localIn.swizzleMode));
            }

            UINT_32 basePitch = 0;
            GetElemLib()->AdjustSurfaceInfo(elemMode,
                                            expandX,
                                            expandY,
                                            &localIn.bpp,
                                            &basePitch,
                                            &localIn.width,
                                            &localIn.height);

            // Overwrite these parameters if we have a valid format
        }

        if (localIn.bpp != 0)
        {
            localIn.width  = Max(localIn.width, 1u);
            localIn.height = Max(localIn.height, 1u);
        }
        else // Rule out some invalid parameters
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
    }

    if (returnCode == ADDR_OK)
    {
        returnCode = HwlComputeSurfaceInfo(&localIn, pOut);

        if (returnCode == ADDR_OK)
        {
            pOut->bpp         = localIn.bpp;
            pOut->pixelPitch  = pOut->pitch;
            pOut->pixelHeight = pOut->height;

            if (localIn.format != ADDR_FMT_INVALID)
            {
                UINT_32 pixelBits = pOut->pixelBits;

                GetElemLib()->RestoreSurfaceInfo(elemMode,
                                                 expandX,
                                                 expandY,
                                                 &pOut->pixelBits,
                                                 &pOut->pixelPitch,
                                                 &pOut->pixelHeight);

                GetElemLib()->RestoreSurfaceInfo(elemMode,
                                                 expandX,
                                                 expandY,
                                                 &pixelBits,
                                                 &pOut->pixelMipChainPitch,
                                                 &pOut->pixelMipChainHeight);

                if ((localIn.numMipLevels > 1) && (pOut->pMipInfo != NULL))
                {
                    for (UINT_32 i = 0; i < localIn.numMipLevels; i++)
                    {
                        pOut->pMipInfo[i].pixelPitch  = pOut->pMipInfo[i].pitch;
                        pOut->pMipInfo[i].pixelHeight = pOut->pMipInfo[i].height;

                        GetElemLib()->RestoreSurfaceInfo(elemMode,
                                                         expandX,
                                                         expandY,
                                                         &pixelBits,
                                                         &pOut->pMipInfo[i].pixelPitch,
                                                         &pOut->pMipInfo[i].pixelHeight);
                    }
                }
            }
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::GetPossibleSwizzleModes
*
*   @brief
*       Interface function stub of AddrComputeSurfaceInfo.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetPossibleSwizzleModes(
     const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn,    ///< [in] input structure
     ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size  != sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT)) ||
            (pOut->size != sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    if (returnCode == ADDR_OK)
    {
        const ADDR3_SURFACE_FLAGS flags = pIn->flags;

        // VRS images can only be 2D from the client API rules.
        ADDR_ASSERT((flags.isVrsImage == 0) || IsTex2d(pIn->resourceType));

        if (pIn->bpp == 96)
        {
            pOut->validModes.swLinear = 1;
        }
        // Depth/Stencil images can't be linear and must be 2D swizzle modes.
        // These three are related to DB block that supports only SW_64KB_2D and SW_256KB_2D for DSV.
        else if (flags.depth || flags.stencil)
        {
            pOut->validModes.sw2d64kB  = 1;
            pOut->validModes.sw2d256kB = 1;
        }
        // The organization of elements in the hierarchical surface is the same as any other surface, and it can support
        // any 2D swizzle mode (SW_256_2D, SW_4KB_2D, SW_64KB_2D, or SW_256KB_2D).  The swizzle mode can be selected
        // orthogonally to the underlying z or stencil surface.
        else if (pIn->flags.hiZHiS)
        {
            pOut->validModes.sw2d256B  = 1;
            pOut->validModes.sw2d4kB   = 1;
            pOut->validModes.sw2d64kB  = 1;
            pOut->validModes.sw2d256kB = 1;
        }
        // MSAA can't be linear and must be 2D swizzle modes.
        else if (pIn->numSamples > 1)
        {
            // NOTE: SW_256B_2D still supports MSAA. The removal of 256B for MSAA is reverted in HW Doc.
            pOut->validModes.sw2d256B  = 1;
            pOut->validModes.sw2d4kB   = 1;
            pOut->validModes.sw2d64kB  = 1;
            pOut->validModes.sw2d256kB = 1;
        }
        // Block-compressed images need to be either using 2D or linear swizzle modes.
        else if (flags.blockCompressed)
        {
            pOut->validModes.swLinear = 1;

            // We find cases where Tex3d BlockCompressed image adopts 2D_256B should be prohibited.
            if (IsTex3d(pIn->resourceType) == FALSE)
            {
                pOut->validModes.sw2d256B = 1;
            }
            pOut->validModes.sw2d4kB   = 1;
            pOut->validModes.sw2d64kB  = 1;
            pOut->validModes.sw2d256kB = 1;
        }
        else if (IsTex1d(pIn->resourceType))
        {
            pOut->validModes.swLinear  = 1;
            pOut->validModes.sw2d256B  = 1;
            pOut->validModes.sw2d4kB   = 1;
            pOut->validModes.sw2d64kB  = 1;
            pOut->validModes.sw2d256kB = 1;
        }
        else if (flags.nv12 || flags.p010 || IsTex2d(pIn->resourceType) || flags.view3dAs2dArray)
        {
            //      NV12 and P010 support
            //      SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
            // There could be more multimedia formats that require more hw specific tiling modes...

            // The exception is VRS images.
            // Linear is not allowed and the VRS surface needs to be 8BPP format.
            if (flags.isVrsImage)
            {
                ADDR_ASSERT(pIn->bpp == 8);
            }
            else
            {
                pOut->validModes.swLinear = 1;
            }
            if (flags.view3dAs2dArray == 0)
            {
                // ADDR3_256B_2D can't support 3D images.
                pOut->validModes.sw2d256B = 1;
            }
            pOut->validModes.sw2d4kB   = 1;
            pOut->validModes.sw2d64kB  = 1;
            pOut->validModes.sw2d256kB = 1;
        }
        else if (IsTex3d(pIn->resourceType))
        {
            // An eventual determination would be based on pal setting of height_watermark and depth_watermark.
            // However, we just adopt the simpler logic currently.
            // For 3D images w/ view3dAs2dArray = 0, SW_3D is preferred.
            // For 3D images w/ view3dAs2dArray = 1, it should go to 2D path above.
            // Enable linear since client may force linear tiling for 3D texture that does not set view3dAs2dArray.
            pOut->validModes.swLinear  = 1;
            pOut->validModes.sw3d4kB   = 1;
            pOut->validModes.sw3d64kB  = 1;
            pOut->validModes.sw3d256kB = 1;
        }
    }

    constexpr UINT_32 Size256  = 256u;
    constexpr UINT_32 Size4K   = 4 * 1024;
    constexpr UINT_32 Size64K  = 64 * 1024;
    constexpr UINT_32 Size256K = 256 * 1024;

    ADDR_ASSERT(pIn->maxAlign != 0);

    if (pIn->maxAlign < Size256K)
    {
        pOut->validModes.value &= ~Gfx12Blk256KBSwModeMask;
    }

    if (pIn->maxAlign < Size64K)
    {
        pOut->validModes.value &= ~Gfx12Blk64KBSwModeMask;
    }

    if (pIn->maxAlign < Size4K)
    {
        pOut->validModes.value &= ~Gfx12Blk4KBSwModeMask;
    }

    if (pIn->maxAlign < Size256)
    {
        pOut->validModes.value &= ~Gfx12Blk256BSwModeMask;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::HwlConvertChipFamily
*
*   @brief
*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
*   @return
*       ChipFamily
************************************************************************************************************************
*/
ChipFamily Lib::HwlConvertChipFamily(
    UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
    UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
{
    return ADDR_CHIP_FAMILY_NAVI;
}

/**
************************************************************************************************************************
*   Lib::ComputeBlockDimensionForSurf
*
*   @brief
*       Internal function to get block width/height/depth in element from surface input params.
*
*   @return
*       VOID
************************************************************************************************************************
*/
VOID Lib::ComputeBlockDimensionForSurf(
    ADDR_EXTENT3D*    pExtent,
    UINT_32           bpp,
    UINT_32           numSamples,
    Addr3SwizzleMode  swizzleMode
    ) const
{
    const UINT_32 eleBytes     = bpp >> 3;
    const UINT_32 log2EleBytes = Log2(eleBytes);
    const UINT_32 log2BlkSize  = GetBlockSizeLog2(swizzleMode);

    if (IsLinear(swizzleMode))
    {
        pExtent->width  = 1 << (log2BlkSize - log2EleBytes);
        pExtent->height = 1;
        pExtent->depth  = 1;
    }
    else if (Is3dSwizzle(swizzleMode))
    {
        const UINT_32 base             = (log2BlkSize / 3) - (log2EleBytes / 3);
        const UINT_32 log2BlkSizeMod3  = log2BlkSize % 3;
        const UINT_32 log2EleBytesMod3 = log2EleBytes % 3;

        UINT_32  x = base;
        UINT_32  y = base;
        UINT_32  z = base;

        if (log2BlkSizeMod3 > 0)
        {
            x++;
        }

        if (log2BlkSizeMod3 > 1)
        {
            z++;
        }

        if (log2EleBytesMod3 > 0)
        {
            x--;
        }

        if (log2EleBytesMod3 > 1)
        {
            z--;
        }

        pExtent->width  = 1u << x;
        pExtent->height = 1u << y;
        pExtent->depth  = 1u << z;
    }
    else
    {
        const UINT_32 log2Samples = Log2(Max(numSamples, 1u));
        const UINT_32 log2Width   = (log2BlkSize  >> 1)  -
                                    (log2EleBytes >> 1)  -
                                    (log2Samples  >> 1)  -
                                    (log2EleBytes & log2Samples & 1);
        const UINT_32 log2Height  = (log2BlkSize  >> 1)  -
                                    (log2EleBytes >> 1)  -
                                    (log2Samples  >> 1)  -
                                    ((log2EleBytes | log2Samples) & 1);

        // Return the extent in actual units, not log2
        pExtent->width  = 1u << log2Width;
        pExtent->height = 1u << log2Height;
        pExtent->depth  = 1;
    }
}

/**
************************************************************************************************************************
*   Lib::GetMipTailDim
*
*   @brief
*       Internal function to get out max dimension of first level in mip tail
*
*   @return
*       Max Width/Height/Depth value of the first mip fitted in mip tail
************************************************************************************************************************
*/
ADDR_EXTENT3D Lib::GetMipTailDim(
    Addr3SwizzleMode      swizzleMode,
    const ADDR_EXTENT3D&  blockDims
    ) const
{
    const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);

    ADDR_EXTENT3D  out = blockDims;

    if (Is3dSwizzle(swizzleMode))
    {
        const UINT_32 dim = log2BlkSize % 3;

        if (dim == 0)
        {
            out.height >>= 1;
        }
        else if (dim == 1)
        {
            out.width >>= 1;
        }
        else
        {
            out.depth >>= 1;
        }
    }
    else
    {
        if ((log2BlkSize % 2) == 0)
        {
            out.width >>= 1;
        }
        else
        {
            out.height >>= 1;
        }
    }

    return out;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceAddrFromCoord
*
*   @brief
*       Interface function stub of ComputeSurfaceAddrFromCoord.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
    const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
    ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (GetFillSizeFieldsFlags() == TRUE)
    {
        if ((pIn->size != sizeof(ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
            (pOut->size != sizeof(ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
        {
            returnCode = ADDR_PARAMSIZEMISMATCH;
        }
    }

    ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT localIn = *pIn;
    localIn.unAlignedDims.width  = Max(pIn->unAlignedDims.width,  1u);
    localIn.unAlignedDims.height = Max(pIn->unAlignedDims.height, 1u);
    localIn.unAlignedDims.depth  = Max(pIn->unAlignedDims.depth,  1u);
    localIn.numMipLevels         = Max(pIn->numMipLevels,         1u);
    localIn.numSamples           = Max(pIn->numSamples,           1u);

    if ((localIn.bpp < 8)                               ||
        (localIn.bpp > 128)                             ||
        ((localIn.bpp % 8) != 0)                        ||
        (localIn.sample >= localIn.numSamples)          ||
        (localIn.slice >= localIn.unAlignedDims.depth)  ||
        (localIn.mipId >= localIn.numMipLevels)         ||
        (IsTex3d(localIn.resourceType)                  &&
        (Valid3DMipSliceIdConstraint(localIn.unAlignedDims.depth, localIn.mipId, localIn.slice) == FALSE)))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    if (returnCode == ADDR_OK)
    {
        if (IsLinear(localIn.swizzleMode))
        {
            returnCode = ComputeSurfaceAddrFromCoordLinear(&localIn, pOut);
        }
        else
        {
            returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut);
        }

        if (returnCode == ADDR_OK)
        {
            pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceAddrFromCoord
*
*   @brief
*       Interface function stub of Addr3ComputePipeBankXor.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputePipeBankXor(
    const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
    ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size  != sizeof(ADDR3_COMPUTE_PIPEBANKXOR_INPUT)) ||
         (pOut->size != sizeof(ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputePipeBankXor(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceAddrFromCoordLinear
*
*   @brief
*       Internal function to calculate address from coord for linear swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
     const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;
    BOOL_32 valid = (pIn->numSamples <= 1);

    if (valid)
    {
        if (IsTex1d(pIn->resourceType))
        {
            valid = (pIn->y == 0);
        }
    }

    if (valid)
    {
        ADDR3_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
        ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
        ADDR3_MIP_INFO                    mipInfo[MaxMipLevels];
        ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);

        localIn.size         = sizeof(localIn);
        localIn.flags        = pIn->flags;
        localIn.swizzleMode  = ADDR3_LINEAR;
        localIn.resourceType = pIn->resourceType;
        localIn.format       = ADDR_FMT_INVALID;
        localIn.bpp          = pIn->bpp;
        localIn.width        = Max(pIn->unAlignedDims.width,  1u);
        localIn.height       = Max(pIn->unAlignedDims.height, 1u);
        localIn.numSlices    = Max(pIn->unAlignedDims.depth,  1u);
        localIn.numMipLevels = Max(pIn->numMipLevels,         1u);
        localIn.numSamples   = Max(pIn->numSamples,           1u);

        if (localIn.numMipLevels <= 1)
        {
            localIn.pitchInElement = pIn->pitchInElement;
        }

        localOut.size     = sizeof(localOut);
        localOut.pMipInfo = mipInfo;

        returnCode = ComputeSurfaceInfo(&localIn, &localOut);

        if (returnCode == ADDR_OK)
        {
            pOut->addr        = (localOut.sliceSize * pIn->slice) +
                                mipInfo[pIn->mipId].offset +
                                (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
            pOut->bitPosition = 0;
        }
        else
        {
            valid = FALSE;
        }
    }

    if (valid == FALSE)
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSurfaceAddrFromCoordTiled
*
*   @brief
*       Internal function to calculate address from coord for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordTiled(
     const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    return HwlComputeSurfaceAddrFromCoordTiled(pIn, pOut);
}

/**
************************************************************************************************************************
*   Lib::ComputeNonBlockCompressedView
*
*   @brief
*       Interface function stub of Addr3ComputeNonBlockCompressedView.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeNonBlockCompressedView(
    const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
    ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size  != sizeof(ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT)) ||
         (pOut->size != sizeof(ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else if (Is3dSwizzle(pIn->swizzleMode))
    {
        // 3D volume images using ADDR3_XX_3D is currently not supported.
        returnCode = ADDR_NOTSUPPORTED;
    }
    else
    {
        returnCode = HwlComputeNonBlockCompressedView(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Interface function stub of Addr3ComputeSubResourceOffsetForSwizzlePattern.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern(
    const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
    ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size  != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT)) ||
         (pOut->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        HwlComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::ComputeSlicePipeBankXor
*
*   @brief
*       Interface function stub of Addr3ComputeSlicePipeBankXor.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor(
    const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
    ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut)
{
    ADDR_E_RETURNCODE returnCode;

    if ((GetFillSizeFieldsFlags() == TRUE) &&
        ((pIn->size  != sizeof(ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT)) ||
         (pOut->size != sizeof(ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT))))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    if ((pIn->bpe != 0) &&
        (pIn->bpe != 8) &&
        (pIn->bpe != 16) &&
        (pIn->bpe != 32) &&
        (pIn->bpe != 64) &&
        (pIn->bpe != 128))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        returnCode = HwlComputeSlicePipeBankXor(pIn, pOut);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Lib::UseCustomHeight
*
*   @brief
*       Determines if the calculations for this surface should use minimal HW values or user-specified values.
*
*   @return
*       Returns TRUE if the user-specified alignment should be used
************************************************************************************************************************
*/
BOOL_32 Lib::UseCustomHeight(
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pIn
    ) const
{
    return ((pIn->numMipLevels <= 1)   &&
            IsLinear(pIn->swizzleMode) &&
            (pIn->sliceAlign > 0));
}

/**
************************************************************************************************************************
*   Lib::UseCustomPitch
*
*   @brief
*       Determines if the calculations for this surface should use minimal HW values or user-specified values.
*
*   @return
*       Returns TRUE if the user-specified pitch should be used
************************************************************************************************************************
*/
BOOL_32 Lib::UseCustomPitch(
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pIn
    ) const
{
    return ((pIn->numMipLevels <= 1)   &&
            IsLinear(pIn->swizzleMode) &&
            (pIn->pitchInElement > 0));
}

/**
************************************************************************************************************************
*   Lib::CanTrimLinearPadding
*
*   @brief
*       Determines if the calculations for this surface can omit extra trailing padding for linear surfaces.
*
*   @return
*       Returns TRUE if the trailing padding can be omitted.
************************************************************************************************************************
*/
BOOL_32 Lib::CanTrimLinearPadding(
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pIn
    ) const
{
    return ((IsTex3d(pIn->resourceType) == FALSE) &&
            (pIn->numSlices <= 1)                 &&
            IsLinear(pIn->swizzleMode));
}

/**
************************************************************************************************************************
*   Lib::ApplyCustomizedPitchHeight
*
*   @brief
*       Helper function to override hw required row pitch/slice pitch by customrized one
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    const UINT_32  elementBytes = pIn->bpp >> 3;

    // Calculate the default pitch/height without any user inputs
    pOut->pitch  = PowTwoAlign(pIn->width,  pOut->blockExtent.width);
    pOut->height = PowTwoAlign(pIn->height, pOut->blockExtent.height);

    // Custom pitches / alignments are only possible with single mip level / linear images; otherwise,
    // ignore those parameters.
    if (UseCustomPitch(pIn))
    {
        const UINT_32  pitchAlignmentBytes    = 1 << GetBlockSizeLog2(pIn->swizzleMode, TRUE);
        const UINT_32  pitchAlignmentElements = pitchAlignmentBytes / elementBytes;

        // Their requested pitch has to meet the pitch alignment constraints applied by the HW.
        if ((pIn->pitchInElement % pitchAlignmentElements) != 0)
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
        // And their pitch can't be less than the minimum
        else if (pIn->pitchInElement < pOut->pitch)
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
        else
        {
            pOut->pitch = pIn->pitchInElement;
        }
    }

    if ((returnCode == ADDR_OK) && UseCustomHeight(pIn))
    {
        UINT_32 customizedHeight = pIn->sliceAlign / elementBytes / pOut->pitch;

        if (customizedHeight * elementBytes * pOut->pitch != pIn->sliceAlign)
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
        else if ((pIn->numSlices > 1) && (pOut->height != customizedHeight))
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
        else
        {
            pOut->height = customizedHeight;
        }
    }

    return returnCode;
}

} // V3
} // Addr
} // namespace rocr

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrlib3.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2023 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
************************************************************************************************************************
* @file  addrlib3.h
* @brief Contains the Addr::V3::Lib class definition.
************************************************************************************************************************
*/

#ifndef __ADDR3_LIB3_H__
#define __ADDR3_LIB3_H__

#include "addrlib.h"

namespace rocr {
namespace Addr
{
namespace V3
{

/**
************************************************************************************************************************
* @brief Bitmasks for swizzle mode determination on GFX12
************************************************************************************************************************
*/
const UINT_32 Gfx12Blk256KBSwModeMask = (1u << ADDR3_256KB_2D)  |
                                        (1u << ADDR3_256KB_3D);

const UINT_32 Gfx12Blk64KBSwModeMask  = (1u << ADDR3_64KB_2D)   |
                                        (1u << ADDR3_64KB_3D);

const UINT_32 Gfx12Blk4KBSwModeMask   = (1u << ADDR3_4KB_2D)    |
                                        (1u << ADDR3_4KB_3D);

const UINT_32 Gfx12Blk256BSwModeMask  = (1u << ADDR3_256B_2D);

/**
************************************************************************************************************************
* @brief Bit setting for swizzle pattern
************************************************************************************************************************
*/
union ADDR_BIT_SETTING
{
    struct
    {
        UINT_16 x;
        UINT_16 y;
        UINT_16 z;
        UINT_16 s;
    };
    UINT_64 value;
};

/**
************************************************************************************************************************
* @brief Flags for SwizzleModeTable
************************************************************************************************************************
*/
union SwizzleModeFlags
{
    struct
    {
        // Swizzle mode
        UINT_32 isLinear        : 1;    // Linear
        UINT_32 is2d            : 1;    // 2d mode
        UINT_32 is3d            : 1;    // 3d mode

        // Block size
        UINT_32 is256b          : 1;    // Block size is 256B
        UINT_32 is4kb           : 1;    // Block size is 4KB
        UINT_32 is64kb          : 1;    // Block size is 64KB
        UINT_32 is256kb         : 1;    // Block size is 256KB

        UINT_32 reserved        : 25;   // Reserved bits
    };

    UINT_32 u32All;
};

struct Dim2d
{
    UINT_32 w;
    UINT_32 h;
};

const UINT_32 Log2Size256  = 8u;
const UINT_32 Log2Size4K   = 12u;
const UINT_32 Log2Size64K  = 16u;
const UINT_32 Log2Size256K = 18u;

/**
************************************************************************************************************************
* @brief Swizzle pattern information
************************************************************************************************************************
*/
// Accessed by index representing the logbase2 of (8bpp/16bpp/32bpp/64bpp/128bpp)
// contains the indices which map to 2D arrays SW_PATTERN_NIBBLE[1-4] which contain sections of an index equation.
struct ADDR_SW_PATINFO
{
    UINT_8 nibble1Idx;
    UINT_8 nibble2Idx;
    UINT_8 nibble3Idx;
    UINT_8 nibble4Idx;
};

/**
************************************************************************************************************************
*   InitBit
*
*   @brief
*       Initialize bit setting value via a return value
************************************************************************************************************************
*/
#define InitBit(c, index) (1ull << ((c << 4) + index))

const UINT_64 X0  = InitBit(0,  0);
const UINT_64 X1  = InitBit(0,  1);
const UINT_64 X2  = InitBit(0,  2);
const UINT_64 X3  = InitBit(0,  3);
const UINT_64 X4  = InitBit(0,  4);
const UINT_64 X5  = InitBit(0,  5);
const UINT_64 X6  = InitBit(0,  6);
const UINT_64 X7  = InitBit(0,  7);
const UINT_64 X8  = InitBit(0,  8);

const UINT_64 Y0  = InitBit(1,  0);
const UINT_64 Y1  = InitBit(1,  1);
const UINT_64 Y2  = InitBit(1,  2);
const UINT_64 Y3  = InitBit(1,  3);
const UINT_64 Y4  = InitBit(1,  4);
const UINT_64 Y5  = InitBit(1,  5);
const UINT_64 Y6  = InitBit(1,  6);
const UINT_64 Y7  = InitBit(1,  7);
const UINT_64 Y8  = InitBit(1,  8);

const UINT_64 Z0  = InitBit(2,  0);
const UINT_64 Z1  = InitBit(2,  1);
const UINT_64 Z2  = InitBit(2,  2);
const UINT_64 Z3  = InitBit(2,  3);
const UINT_64 Z4  = InitBit(2,  4);
const UINT_64 Z5  = InitBit(2,  5);

const UINT_64 S0  = InitBit(3,  0);
const UINT_64 S1  = InitBit(3,  1);
const UINT_64 S2  = InitBit(3,  2);

/**
************************************************************************************************************************
* @brief Bit setting for swizzle pattern
************************************************************************************************************************
*/

/**
************************************************************************************************************************
* @brief This class contains asic independent address lib functionalities
************************************************************************************************************************
*/
class Lib : public Addr::Lib
{
public:
    virtual ~Lib();

    static Lib* GetLib(
        ADDR_HANDLE hLib);

    //
    // Interface stubs
    //

    // For data surface
    ADDR_E_RETURNCODE ComputeSurfaceInfo(
        const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE GetPossibleSwizzleModes(
        const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT*   pIn,
        ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT*        pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
        const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    // Misc
    ADDR_E_RETURNCODE ComputePipeBankXor(
        const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeNonBlockCompressedView(
        const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
        ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern(
        const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);

    ADDR_E_RETURNCODE ComputeSlicePipeBankXor(
        const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);

protected:
    Lib();  // Constructor is protected
    Lib(const Client* pClient);

    static const UINT_32 MaxImageDim  = 65536;
    static const UINT_32 MaxMipLevels = 17; // Max image size is 64k
    static const UINT_32 MaxNumOfBpp  = 5;
    static const UINT_32 MaxNumOfAA   = 4;
    UINT_32 m_pipesLog2;                ///< Number of pipe per shader engine Log2
    UINT_32 m_pipeInterleaveLog2;       ///< Log2 of pipe interleave bytes

    static const Dim2d         Block256_2d[MaxNumOfBpp];
    static const ADDR_EXTENT3D Block1K_3d[MaxNumOfBpp];
    SwizzleModeFlags m_swizzleModeTable[ADDR3_MAX_TYPE];  ///< Swizzle mode table

    // Number of unique MSAA sample rates (1/2/4/8)
    static const UINT_32 MaxMsaaRateLog2     = 4;
    // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
    static const UINT_32 MaxElementBytesLog2 = 5;
    // Number of unique swizzle patterns (one entry per swizzle mode + MSAA + bpp configuration)
    static const UINT_32 NumSwizzlePatterns  = 19 * MaxElementBytesLog2;

    // Number of equation entries in the table
    UINT_32              m_numEquations;
    // Equation lookup table according to swizzle mode, MSAA sample rate, and bpp
    UINT_32              m_equationLookupTable[ADDR3_MAX_TYPE - 1][MaxMsaaRateLog2][MaxElementBytesLog2];

    // Equation table
    ADDR_EQUATION        m_equationTable[NumSwizzlePatterns];

    void SetEquationTableEntry(
        Addr3SwizzleMode addrType,
        UINT_32          msaaLog2,
        UINT_32          elementLog2,
        UINT_32          value)
    {
        m_equationLookupTable[addrType - 1][msaaLog2][elementLog2] = value;
    }

    const UINT_32 GetEquationTableEntry(
        Addr3SwizzleMode addrType,
        UINT_32          msaaLog2,
        UINT_32          elementLog2) const
    {
        return m_equationLookupTable[addrType - 1][msaaLog2][elementLog2];
    }

    static BOOL_32 Valid3DMipSliceIdConstraint(
        UINT_32 numSlices,
        UINT_32 mipId,
        UINT_32 slice)
    {
        return (Max((numSlices >> mipId), 1u) > slice);
    }

    UINT_32 GetBlockSize(
        Addr3SwizzleMode  swizzleMode,
        BOOL_32           forPitch = FALSE) const;

    UINT_32 GetBlockSizeLog2(
        Addr3SwizzleMode  swizzleMode,
        BOOL_32           forPitch = FALSE) const;

    BOOL_32 IsValidSwMode(Addr3SwizzleMode swizzleMode) const
    {
        return (m_swizzleModeTable[swizzleMode].u32All != 0);
    }

    UINT_32 IsLinear(Addr3SwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isLinear;
    }

    // Checking block size
    BOOL_32 IsBlock256b(Addr3SwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is256b;
    }

    // Checking block size
    BOOL_32 IsBlock4kb(Addr3SwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is4kb;
    }

    // Checking block size
    BOOL_32 IsBlock64kb(Addr3SwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is64kb;
    }

    // Checking block size
    BOOL_32 IsBlock256kb(Addr3SwizzleMode swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is256kb;
    }

    BOOL_32  Is2dSwizzle(Addr3SwizzleMode  swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is2d;
    }

    BOOL_32  Is3dSwizzle(Addr3SwizzleMode  swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].is3d;
    }

    virtual UINT_32 HwlComputeMaxBaseAlignments() const  { return 256 * 1024; }

    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn)
    {
        ADDR_NOT_IMPLEMENTED();
        // Although GFX12 addressing should be consistent regardless of the configuration, we still need to
        // call some initialization for member variables.
        return TRUE;
    }

    virtual ChipFamily HwlConvertChipFamily(
        UINT_32 chipFamily,
        UINT_32 chipRevision);

    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const { return 0; }

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
         const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
        const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    VOID ComputeBlockDimensionForSurf(
        ADDR_EXTENT3D*    pExtent,
        UINT_32           bpp,
        UINT_32           numSamples,
        Addr3SwizzleMode  swizzleMode) const;

    ADDR_EXTENT3D GetMipTailDim(
        Addr3SwizzleMode      swizzleMode,
        const ADDR_EXTENT3D&  blockDims) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
        const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled(
        const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
        const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTIMPLEMENTED;
    }

    virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView(
        const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
        ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    virtual VOID HwlComputeSubResourceOffsetForSwizzlePattern(
        const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
    }

    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
        const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
    {
        ADDR_NOT_IMPLEMENTED();
        return ADDR_NOTSUPPORTED;
    }

    ADDR_E_RETURNCODE ApplyCustomizedPitchHeight(
        const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    BOOL_32 UseCustomHeight(const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pIn) const;
    BOOL_32 UseCustomPitch(const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pIn) const;
    BOOL_32 CanTrimLinearPadding(const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pIn) const;

private:
    // Disallow the copy constructor
    Lib(const Lib& a);

    // Disallow the assignment operator
    Lib& operator=(const Lib& a);

    void Init();
};

} // V3
} // Addr
} // namespace rocr

#endif

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrobject.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
****************************************************************************************************
* @file  addrobject.cpp
* @brief Contains the Object base class implementation.
****************************************************************************************************
*/

#include "addrinterface.h"
#include "addrobject.h"

namespace rocr {
namespace Addr
{

/**
****************************************************************************************************
*   Object::Object
*
*   @brief
*       Constructor for the Object class.
****************************************************************************************************
*/
Object::Object()
{
    m_client.handle = NULL;
    m_client.callbacks.allocSysMem = NULL;
    m_client.callbacks.freeSysMem = NULL;
    m_client.callbacks.debugPrint = NULL;
}

/**
****************************************************************************************************
*   Object::Object
*
*   @brief
*       Constructor for the Object class.
****************************************************************************************************
*/
Object::Object(const Client* pClient)
{
    m_client = *pClient;
}

/**
****************************************************************************************************
*   Object::~Object
*
*   @brief
*       Destructor for the Object class.
****************************************************************************************************
*/
Object::~Object()
{
}

/**
****************************************************************************************************
*   Object::ClientAlloc
*
*   @brief
*       Calls instanced allocSysMem inside Client
****************************************************************************************************
*/
VOID* Object::ClientAlloc(
    size_t         objSize,    ///< [in] Size to allocate
    const Client*  pClient)    ///< [in] Client pointer
{
    VOID* pObjMem = NULL;

    if (pClient->callbacks.allocSysMem != NULL)
    {
        ADDR_ALLOCSYSMEM_INPUT allocInput = {0};

        allocInput.size        = sizeof(ADDR_ALLOCSYSMEM_INPUT);
        allocInput.flags.value = 0;
        allocInput.sizeInBytes = static_cast<UINT_32>(objSize);
        allocInput.hClient     = pClient->handle;

        pObjMem = pClient->callbacks.allocSysMem(&allocInput);
    }

    return pObjMem;
}

/**
****************************************************************************************************
*   Object::Alloc
*
*   @brief
*       A wrapper of ClientAlloc
****************************************************************************************************
*/
VOID* Object::Alloc(
    size_t objSize      ///< [in] Size to allocate
    ) const
{
    return ClientAlloc(objSize, &m_client);;
}

/**
****************************************************************************************************
*   Object::ClientFree
*
*   @brief
*       Calls freeSysMem inside Client
****************************************************************************************************
*/
VOID Object::ClientFree(
    VOID*          pObjMem,    ///< [in] User virtual address to free.
    const Client*  pClient)    ///< [in] Client pointer
{
    if (pClient->callbacks.freeSysMem != NULL)
    {
        if (pObjMem != NULL)
        {
            ADDR_FREESYSMEM_INPUT freeInput = {0};

            freeInput.size      = sizeof(ADDR_FREESYSMEM_INPUT);
            freeInput.hClient   = pClient->handle;
            freeInput.pVirtAddr = pObjMem;

            pClient->callbacks.freeSysMem(&freeInput);
        }
    }
}

/**
****************************************************************************************************
*   Object::Free
*
*   @brief
*       A wrapper of ClientFree
****************************************************************************************************
*/
VOID Object::Free(
    VOID* pObjMem       ///< [in] User virtual address to free.
    ) const
{
    ClientFree(pObjMem, &m_client);
}

/**
****************************************************************************************************
*   Object::operator new
*
*   @brief
*       Placement new operator. (with pre-allocated memory pointer)
*
*   @return
*       Returns pre-allocated memory pointer.
****************************************************************************************************
*/
VOID* Object::operator new(
    size_t objSize,     ///< [in] Size to allocate
    VOID*  pMem        ///< [in] Pre-allocated pointer
    ) noexcept
{
    return pMem;
}

/**
****************************************************************************************************
*   Object::operator delete
*
*   @brief
*       Frees Object object memory.
****************************************************************************************************
*/
VOID Object::operator delete(
    VOID* pObjMem)      ///< [in] User virtual address to free.
{
    Object* pObj = static_cast<Object*>(pObjMem);
    ClientFree(pObjMem, &pObj->m_client);
}

/**
****************************************************************************************************
*   Object::DebugPrint
*
*   @brief
*       Print debug message
*
*   @return
*       N/A
****************************************************************************************************
*/
VOID Object::DebugPrint(
    const CHAR* pDebugString,     ///< [in] Debug string
    ...
    ) const
{
#if DEBUG
    if (m_client.callbacks.debugPrint != NULL)
    {
        va_list ap;

        va_start(ap, pDebugString);

        ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};

        debugPrintInput.size         = sizeof(ADDR_DEBUGPRINT_INPUT);
        debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
        debugPrintInput.hClient      = m_client.handle;
        va_copy(debugPrintInput.ap, ap);

        m_client.callbacks.debugPrint(&debugPrintInput);

        va_end(ap);
        va_end(debugPrintInput.ap);
    }
#endif
}

} // Addr
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/addrobject.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/


/**
****************************************************************************************************
* @file  addrobject.h
* @brief Contains the Object base class definition.
****************************************************************************************************
*/

#ifndef __ADDR_OBJECT_H__
#define __ADDR_OBJECT_H__

#include "addrtypes.h"
#include "addrcommon.h"

namespace rocr {
namespace Addr
{

/**
****************************************************************************************************
* @brief This structure contains client specific data
****************************************************************************************************
*/
struct Client
{
    ADDR_CLIENT_HANDLE  handle;
    ADDR_CALLBACKS      callbacks;
};
/**
****************************************************************************************************
* @brief This class is the base class for all ADDR class objects.
****************************************************************************************************
*/
class Object
{
public:
    Object();
    Object(const Client* pClient);
    virtual ~Object();

    VOID* operator new(size_t size, VOID* pMem) noexcept;
    VOID  operator delete(VOID* pObj);
    /// Microsoft compiler requires a matching delete implementation, which seems to be called when
    /// bad_alloc is thrown. But currently C++ exception isn't allowed so a dummy implementation is
    /// added to eliminate the warning.
    VOID  operator delete(VOID* pObj, VOID* pMem) { ADDR_ASSERT_ALWAYS(); }

    VOID* Alloc(size_t size) const;
    VOID  Free(VOID* pObj) const;

    VOID DebugPrint(const CHAR* pDebugString, ...) const;

    const Client* GetClient() const {return &m_client;}

protected:
    Client m_client;

    static VOID* ClientAlloc(size_t size, const Client* pClient);
    static VOID  ClientFree(VOID* pObj, const Client* pClient);

private:
    // disallow the copy constructor
    Object(const Object& a);

    // disallow the assignment operator
    Object& operator=(const Object& a);
};

} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/coord.cpp
================================================

/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

// Coordinate class implementation
#include "addrcommon.h"
#include "coord.h"

namespace rocr {
namespace Addr
{
namespace V2
{

Coordinate::Coordinate()
{
    dim = DIM_X;
    ord = 0;
}

Coordinate::Coordinate(enum Dim dim, INT_32 n)
{
    set(dim, n);
}

VOID Coordinate::set(enum Dim d, INT_32 n)
{
    dim = d;
    ord = static_cast<INT_8>(n);
}

UINT_32 Coordinate::ison(const UINT_32 *coords) const
{
    UINT_32 bit = static_cast<UINT_32>(1ull << static_cast<UINT_32>(ord));

    return (coords[dim] & bit) ? 1 : 0;
}

enum Dim Coordinate::getdim()
{
    return dim;
}

INT_8 Coordinate::getord()
{
    return ord;
}

BOOL_32 Coordinate::operator==(const Coordinate& b)
{
    return (dim == b.dim) && (ord == b.ord);
}

BOOL_32 Coordinate::operator<(const Coordinate& b)
{
    BOOL_32 ret;

    if (dim == b.dim)
    {
        ret = ord < b.ord;
    }
    else
    {
        if (dim == DIM_S || b.dim == DIM_M)
        {
            ret = TRUE;
        }
        else if (b.dim == DIM_S || dim == DIM_M)
        {
            ret = FALSE;
        }
        else if (ord == b.ord)
        {
            ret = dim < b.dim;
        }
        else
        {
            ret = ord < b.ord;
        }
    }

    return ret;
}

BOOL_32 Coordinate::operator>(const Coordinate& b)
{
    BOOL_32 lt = *this < b;
    BOOL_32 eq = *this == b;
    return !lt && !eq;
}

BOOL_32 Coordinate::operator<=(const Coordinate& b)
{
    return (*this < b) || (*this == b);
}

BOOL_32 Coordinate::operator>=(const Coordinate& b)
{
    return !(*this < b);
}

BOOL_32 Coordinate::operator!=(const Coordinate& b)
{
    return !(*this == b);
}

Coordinate& Coordinate::operator++(INT_32)
{
    ord++;
    return *this;
}

// CoordTerm

CoordTerm::CoordTerm()
{
    num_coords = 0;
}

VOID CoordTerm::Clear()
{
    num_coords = 0;
}

VOID CoordTerm::add(Coordinate& co)
{
    // This function adds a coordinate INT_32o the list
    // It will prevent the same coordinate from appearing,
    // and will keep the list ordered from smallest to largest
    UINT_32 i;

    for (i = 0; i < num_coords; i++)
    {
        if (m_coord[i] == co)
        {
            break;
        }
        if (m_coord[i] > co)
        {
            for (UINT_32 j = num_coords; j > i; j--)
            {
                m_coord[j] = m_coord[j - 1];
            }
            m_coord[i] = co;
            num_coords++;
            break;
        }
    }

    if (i == num_coords)
    {
        m_coord[num_coords] = co;
        num_coords++;
    }
}

VOID CoordTerm::add(CoordTerm& cl)
{
    for (UINT_32 i = 0; i < cl.num_coords; i++)
    {
        add(cl.m_coord[i]);
    }
}

BOOL_32 CoordTerm::remove(Coordinate& co)
{
    BOOL_32 remove = FALSE;
    for (UINT_32 i = 0; i < num_coords; i++)
    {
        if (m_coord[i] == co)
        {
            remove = TRUE;
            num_coords--;
        }

        if (remove)
        {
            m_coord[i] = m_coord[i + 1];
        }
    }
    return remove;
}

BOOL_32 CoordTerm::Exists(Coordinate& co)
{
    BOOL_32 exists = FALSE;
    for (UINT_32 i = 0; i < num_coords; i++)
    {
        if (m_coord[i] == co)
        {
            exists = TRUE;
            break;
        }
    }
    return exists;
}

VOID CoordTerm::copyto(CoordTerm& cl)
{
    cl.num_coords = num_coords;
    for (UINT_32 i = 0; i < num_coords; i++)
    {
        cl.m_coord[i] = m_coord[i];
    }
}

UINT_32 CoordTerm::getsize()
{
    return num_coords;
}

UINT_32 CoordTerm::getxor(const UINT_32 *coords) const
{
    UINT_32 out = 0;
    for (UINT_32 i = 0; i < num_coords; i++)
    {
        out = out ^ m_coord[i].ison(coords);
    }
    return out;
}

VOID CoordTerm::getsmallest(Coordinate& co)
{
    co = m_coord[0];
}

UINT_32 CoordTerm::Filter(INT_8 f, Coordinate& co, UINT_32 start, enum Dim axis)
{
    for (UINT_32 i = start;  i < num_coords;)
    {
        if (((f == '<' && m_coord[i] < co) ||
             (f == '>' && m_coord[i] > co) ||
             (f == '=' && m_coord[i] == co)) &&
            (axis == NUM_DIMS || axis == m_coord[i].getdim()))
        {
            for (UINT_32 j = i; j < num_coords - 1; j++)
            {
                m_coord[j] = m_coord[j + 1];
            }
            num_coords--;
        }
        else
        {
            i++;
        }
    }
    return num_coords;
}

Coordinate& CoordTerm::operator[](UINT_32 i)
{
    return m_coord[i];
}

BOOL_32 CoordTerm::operator==(const CoordTerm& b)
{
    BOOL_32 ret = TRUE;

    if (num_coords != b.num_coords)
    {
        ret = FALSE;
    }
    else
    {
        for (UINT_32 i = 0; i < num_coords; i++)
        {
            // Note: the lists will always be in order, so we can compare the two lists at time
            if (m_coord[i] != b.m_coord[i])
            {
                ret = FALSE;
                break;
            }
        }
    }
    return ret;
}

BOOL_32 CoordTerm::operator!=(const CoordTerm& b)
{
    return !(*this == b);
}

BOOL_32 CoordTerm::exceedRange(const UINT_32 *ranges)
{
    BOOL_32 exceed = FALSE;
    for (UINT_32 i = 0; (i < num_coords) && (exceed == FALSE); i++)
    {
        exceed = ((1u << m_coord[i].getord()) <= ranges[m_coord[i].getdim()]);
    }

    return exceed;
}

// coordeq
CoordEq::CoordEq()
{
    m_numBits = 0;
}

VOID CoordEq::remove(Coordinate& co)
{
    for (UINT_32 i = 0; i < m_numBits; i++)
    {
        m_eq[i].remove(co);
    }
}

BOOL_32 CoordEq::Exists(Coordinate& co)
{
    BOOL_32 exists = FALSE;

    for (UINT_32 i = 0; i < m_numBits; i++)
    {
        if (m_eq[i].Exists(co))
        {
            exists = TRUE;
        }
    }
    return exists;
}

VOID CoordEq::resize(UINT_32 n)
{
    if (n > m_numBits)
    {
        for (UINT_32 i = m_numBits; i < n; i++)
        {
            m_eq[i].Clear();
        }
    }
    m_numBits = n;
}

UINT_32 CoordEq::getsize()
{
    return m_numBits;
}

UINT_64 CoordEq::solve(const UINT_32 *coords) const
{
    UINT_64 out = 0;
    for (UINT_32 i = 0; i < m_numBits; i++)
    {
        out |= static_cast<UINT_64>(m_eq[i].getxor(coords)) << i;
    }
    return out;
}

VOID CoordEq::solveAddr(
    UINT_64 addr, UINT_32 sliceInM,
    UINT_32 *coords) const
{
    UINT_32 BitsValid[NUM_DIMS] = {0};

    CoordEq temp = *this;

    memset(coords, 0, NUM_DIMS * sizeof(coords[0]));

    UINT_32 bitsLeft = 0;

    for (UINT_32 i = 0; i < temp.m_numBits; i++)
    {
        UINT_32 termSize = temp.m_eq[i].getsize();

        if (termSize == 1)
        {
            INT_8 bit = (addr >> i) & 1;
            enum Dim dim = temp.m_eq[i][0].getdim();
            INT_8 ord = temp.m_eq[i][0].getord();

            ADDR_ASSERT((ord < 32) || (bit == 0));

            BitsValid[dim] |= 1u << ord;
            coords[dim] |= bit << ord;

            temp.m_eq[i].Clear();
        }
        else if (termSize > 1)
        {
            bitsLeft++;
        }
    }

    if (bitsLeft > 0)
    {
        if (sliceInM != 0)
        {
            coords[DIM_Z] = coords[DIM_M] / sliceInM;
            BitsValid[DIM_Z] = 0xffffffff;
        }

        do
        {
            bitsLeft = 0;

            for (UINT_32 i = 0; i < temp.m_numBits; i++)
            {
                UINT_32 termSize = temp.m_eq[i].getsize();

                if (termSize == 1)
                {
                    INT_8 bit = (addr >> i) & 1;
                    enum Dim dim = temp.m_eq[i][0].getdim();
                    INT_8 ord = temp.m_eq[i][0].getord();

                    ADDR_ASSERT((ord < 32) || (bit == 0));
                    ADDR_ASSERT(dim < DIM_S);

                    BitsValid[dim] |= 1u << ord;
                    coords[dim] |= bit << ord;

                    temp.m_eq[i].Clear();
                }
                else if (termSize > 1)
                {
                    CoordTerm tmpTerm = temp.m_eq[i];

                    for (UINT_32 j = 0; j < termSize; j++)
                    {
                        enum Dim dim = temp.m_eq[i][j].getdim();
                        INT_8 ord = temp.m_eq[i][j].getord();

                        ADDR_ASSERT(dim < DIM_S);

                        if (BitsValid[dim] & (1u << ord))
                        {
                            UINT_32 v = (((coords[dim] >> ord) & 1) << i);
                            addr ^= static_cast<UINT_64>(v);
                            tmpTerm.remove(temp.m_eq[i][j]);
                        }
                    }

                    temp.m_eq[i] = tmpTerm;

                    bitsLeft++;
                }
            }
        } while (bitsLeft > 0);
    }
}

VOID CoordEq::copy(CoordEq& o, UINT_32 start, UINT_32 num)
{
    o.m_numBits = (num == 0xFFFFFFFF) ? m_numBits : num;
    for (UINT_32 i = 0; i < o.m_numBits; i++)
    {
        m_eq[start + i].copyto(o.m_eq[i]);
    }
}

VOID CoordEq::reverse(UINT_32 start, UINT_32 num)
{
    UINT_32 n = (num == 0xFFFFFFFF) ? m_numBits : num;

    for (UINT_32 i = 0; i < n / 2; i++)
    {
        CoordTerm temp;
        m_eq[start + i].copyto(temp);
        m_eq[start + n - 1 - i].copyto(m_eq[start + i]);
        temp.copyto(m_eq[start + n - 1 - i]);
    }
}

VOID CoordEq::xorin(CoordEq& x, UINT_32 start)
{
    UINT_32 n = ((m_numBits - start) < x.m_numBits) ? (m_numBits - start) : x.m_numBits;
    for (UINT_32 i = 0; i < n; i++)
    {
        m_eq[start + i].add(x.m_eq[i]);
    }
}

UINT_32 CoordEq::Filter(INT_8 f, Coordinate& co, UINT_32 start, enum Dim axis)
{
    for (UINT_32 i = start; i < m_numBits;)
    {
        UINT_32 m = m_eq[i].Filter(f, co, 0, axis);
        if (m == 0)
        {
            for (UINT_32 j = i; j < m_numBits - 1; j++)
            {
                m_eq[j] = m_eq[j + 1];
            }
            m_numBits--;
        }
        else
        {
            i++;
        }
    }
    return m_numBits;
}

VOID CoordEq::shift(INT_32 amount, INT_32 start)
{
    if (amount != 0)
    {
        INT_32 numBits = static_cast<INT_32>(m_numBits);
        amount = -amount;
        INT_32 inc = (amount < 0) ? -1 : 1;
        INT_32 i = (amount < 0) ? numBits - 1 : start;
        INT_32 end = (amount < 0) ? start - 1 : numBits;
        for (; (inc > 0) ? i < end : i > end; i += inc)
        {
            if ((i + amount < start) || (i + amount >= numBits))
            {
                m_eq[i].Clear();
            }
            else
            {
                m_eq[i + amount].copyto(m_eq[i]);
            }
        }
    }
}

CoordTerm& CoordEq::operator[](UINT_32 i)
{
    return m_eq[i];
}

VOID CoordEq::mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start, UINT_32 end)
{
    if (end == 0)
    {
        ADDR_ASSERT(m_numBits > 0);
        end = m_numBits - 1;
    }
    for (UINT_32 i = start; i <= end; i++)
    {
        UINT_32 select = (i - start) % 2;
        Coordinate& c = (select == 0) ? c0 : c1;
        m_eq[i].add(c);
        c++;
    }
}

VOID CoordEq::mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start, UINT_32 end)
{
    if (end == 0)
    {
        ADDR_ASSERT(m_numBits > 0);
        end = m_numBits - 1;
    }
    for (UINT_32 i = start; i <= end; i++)
    {
        UINT_32 select = (i - start) % 3;
        Coordinate& c = (select == 0) ? c0 : ((select == 1) ? c1 : c2);
        m_eq[i].add(c);
        c++;
    }
}

BOOL_32 CoordEq::operator==(const CoordEq& b)
{
    BOOL_32 ret = TRUE;

    if (m_numBits != b.m_numBits)
    {
        ret = FALSE;
    }
    else
    {
        for (UINT_32 i = 0; i < m_numBits; i++)
        {
            if (m_eq[i] != b.m_eq[i])
            {
                ret = FALSE;
                break;
            }
        }
    }
    return ret;
}

BOOL_32 CoordEq::operator!=(const CoordEq& b)
{
    return !(*this == b);
}

} // V2
} // Addr
} // namespace rocr

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/core/coord.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

// Class used to define a coordinate bit

#ifndef __COORD_H
#define __COORD_H

namespace rocr {
namespace Addr
{
namespace V2
{
#if defined(__cplusplus)
#if defined(_MSC_VER)
    #if _MSC_VER >= 1900
        #define ADDR_CPP11_COMPILER TRUE
    #endif
#else
    #if __cplusplus >= 201103L
        #define ADDR_CPP11_COMPILER TRUE
    #endif
#endif
#endif

#if defined(ADDR_CPP11_COMPILER)
enum Dim : INT_8
#else
enum Dim
#endif
{
   DIM_X,
   DIM_Y,
   DIM_Z,
   DIM_S,
   DIM_M,
   NUM_DIMS
};

class Coordinate
{
public:
    Coordinate();
    Coordinate(enum Dim dim, INT_32 n);

    VOID set(enum Dim dim, INT_32 n);
    UINT_32 ison(const UINT_32 *coords) const;
    enum Dim getdim();
    INT_8   getord();

    BOOL_32 operator==(const Coordinate& b);
    BOOL_32 operator<(const Coordinate& b);
    BOOL_32 operator>(const Coordinate& b);
    BOOL_32 operator<=(const Coordinate& b);
    BOOL_32 operator>=(const Coordinate& b);
    BOOL_32 operator!=(const Coordinate& b);
    Coordinate& operator++(INT_32);

private:
    enum Dim dim;
    INT_8 ord;
};

class CoordTerm
{
public:
    CoordTerm();
    VOID Clear();
    VOID add(Coordinate& co);
    VOID add(CoordTerm& cl);
    BOOL_32 remove(Coordinate& co);
    BOOL_32 Exists(Coordinate& co);
    VOID copyto(CoordTerm& cl);
    UINT_32 getsize();
    UINT_32 getxor(const UINT_32 *coords) const;

    VOID getsmallest(Coordinate& co);
    UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, enum Dim axis = NUM_DIMS);
    Coordinate& operator[](UINT_32 i);
    BOOL_32 operator==(const CoordTerm& b);
    BOOL_32 operator!=(const CoordTerm& b);
    BOOL_32 exceedRange(const UINT_32 *ranges);

private:
    static const UINT_32 MaxCoords = 8;
    UINT_32 num_coords;
    Coordinate m_coord[MaxCoords];
};

class CoordEq
{
public:
    CoordEq();
    VOID remove(Coordinate& co);
    BOOL_32 Exists(Coordinate& co);
    VOID resize(UINT_32 n);
    UINT_32 getsize();
    virtual UINT_64 solve(const UINT_32 *coords) const;
    virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM,
                           UINT_32 *coords) const;

    VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
    VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
    VOID xorin(CoordEq& x, UINT_32 start = 0);
    UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, enum Dim axis = NUM_DIMS);
    VOID shift(INT_32 amount, INT_32 start = 0);
    virtual CoordTerm& operator[](UINT_32 i);
    VOID mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start = 0, UINT_32 end = 0);
    VOID mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start = 0, UINT_32 end = 0);

    BOOL_32 operator==(const CoordEq& b);
    BOOL_32 operator!=(const CoordEq& b);

private:
    static const UINT_32 MaxEqBits = 64;
    UINT_32 m_numBits;

    CoordTerm m_eq[MaxEqBits];
};

} // V2
} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10SwizzlePattern.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx10SwizzlePattern.h
* @brief swizzle pattern for gfx10.
************************************************************************************************************************
*/

#ifndef __GFX10_SWIZZLE_PATTERN_H__
#define __GFX10_SWIZZLE_PATTERN_H__

namespace rocr {
namespace Addr
{
namespace V2
{
const ADDR_SW_PATINFO GFX10_SW_256_S_PATINFO[] =
{
    {   1,    0,    0,    0,    0, } , // 1 pipes 1 bpe @ SW_256_S @ Navi1x
    {   1,    1,    0,    0,    0, } , // 1 pipes 2 bpe @ SW_256_S @ Navi1x
    {   1,    2,    0,    0,    0, } , // 1 pipes 4 bpe @ SW_256_S @ Navi1x
    {   1,    3,    0,    0,    0, } , // 1 pipes 8 bpe @ SW_256_S @ Navi1x
    {   1,    4,    0,    0,    0, } , // 1 pipes 16 bpe @ SW_256_S @ Navi1x
    {   1,    0,    0,    0,    0, } , // 2 pipes 1 bpe @ SW_256_S @ Navi1x
    {   1,    1,    0,    0,    0, } , // 2 pipes 2 bpe @ SW_256_S @ Navi1x
    {   1,    2,    0,    0,    0, } , // 2 pipes 4 bpe @ SW_256_S @ Navi1x
    {   1,    3,    0,    0,    0, } , // 2 pipes 8 bpe @ SW_256_S @ Navi1x
    {   1,    4,    0,    0,    0, } , // 2 pipes 16 bpe @ SW_256_S @ Navi1x
    {   1,    0,    0,    0,    0, } , // 4 pipes 1 bpe @ SW_256_S @ Navi1x
    {   1,    1,    0,    0,    0, } , // 4 pipes 2 bpe @ SW_256_S @ Navi1x
    {   1,    2,    0,    0,    0, } , // 4 pipes 4 bpe @ SW_256_S @ Navi1x
    {   1,    3,    0,    0,    0, } , // 4 pipes 8 bpe @ SW_256_S @ Navi1x
    {   1,    4,    0,    0,    0, } , // 4 pipes 16 bpe @ SW_256_S @ Navi1x
    {   1,    0,    0,    0,    0, } , // 8 pipes 1 bpe @ SW_256_S @ Navi1x
    {   1,    1,    0,    0,    0, } , // 8 pipes 2 bpe @ SW_256_S @ Navi1x
    {   1,    2,    0,    0,    0, } , // 8 pipes 4 bpe @ SW_256_S @ Navi1x
    {   1,    3,    0,    0,    0, } , // 8 pipes 8 bpe @ SW_256_S @ Navi1x
    {   1,    4,    0,    0,    0, } , // 8 pipes 16 bpe @ SW_256_S @ Navi1x
    {   1,    0,    0,    0,    0, } , // 16 pipes 1 bpe @ SW_256_S @ Navi1x
    {   1,    1,    0,    0,    0, } , // 16 pipes 2 bpe @ SW_256_S @ Navi1x
    {   1,    2,    0,    0,    0, } , // 16 pipes 4 bpe @ SW_256_S @ Navi1x
    {   1,    3,    0,    0,    0, } , // 16 pipes 8 bpe @ SW_256_S @ Navi1x
    {   1,    4,    0,    0,    0, } , // 16 pipes 16 bpe @ SW_256_S @ Navi1x
    {   1,    0,    0,    0,    0, } , // 32 pipes 1 bpe @ SW_256_S @ Navi1x
    {   1,    1,    0,    0,    0, } , // 32 pipes 2 bpe @ SW_256_S @ Navi1x
    {   1,    2,    0,    0,    0, } , // 32 pipes 4 bpe @ SW_256_S @ Navi1x
    {   1,    3,    0,    0,    0, } , // 32 pipes 8 bpe @ SW_256_S @ Navi1x
    {   1,    4,    0,    0,    0, } , // 32 pipes 16 bpe @ SW_256_S @ Navi1x
    {   1,    0,    0,    0,    0, } , // 64 pipes 1 bpe @ SW_256_S @ Navi1x
    {   1,    1,    0,    0,    0, } , // 64 pipes 2 bpe @ SW_256_S @ Navi1x
    {   1,    2,    0,    0,    0, } , // 64 pipes 4 bpe @ SW_256_S @ Navi1x
    {   1,    3,    0,    0,    0, } , // 64 pipes 8 bpe @ SW_256_S @ Navi1x
    {   1,    4,    0,    0,    0, } , // 64 pipes 16 bpe @ SW_256_S @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_256_D_PATINFO[] =
{
    {   1,    5,    0,    0,    0, } , // 1 pipes 1 bpe @ SW_256_D @ Navi1x
    {   1,    1,    0,    0,    0, } , // 1 pipes 2 bpe @ SW_256_D @ Navi1x
    {   1,    2,    0,    0,    0, } , // 1 pipes 4 bpe @ SW_256_D @ Navi1x
    {   1,    6,    0,    0,    0, } , // 1 pipes 8 bpe @ SW_256_D @ Navi1x
    {   1,    7,    0,    0,    0, } , // 1 pipes 16 bpe @ SW_256_D @ Navi1x
    {   1,    5,    0,    0,    0, } , // 2 pipes 1 bpe @ SW_256_D @ Navi1x
    {   1,    1,    0,    0,    0, } , // 2 pipes 2 bpe @ SW_256_D @ Navi1x
    {   1,    2,    0,    0,    0, } , // 2 pipes 4 bpe @ SW_256_D @ Navi1x
    {   1,    6,    0,    0,    0, } , // 2 pipes 8 bpe @ SW_256_D @ Navi1x
    {   1,    7,    0,    0,    0, } , // 2 pipes 16 bpe @ SW_256_D @ Navi1x
    {   1,    5,    0,    0,    0, } , // 4 pipes 1 bpe @ SW_256_D @ Navi1x
    {   1,    1,    0,    0,    0, } , // 4 pipes 2 bpe @ SW_256_D @ Navi1x
    {   1,    2,    0,    0,    0, } , // 4 pipes 4 bpe @ SW_256_D @ Navi1x
    {   1,    6,    0,    0,    0, } , // 4 pipes 8 bpe @ SW_256_D @ Navi1x
    {   1,    7,    0,    0,    0, } , // 4 pipes 16 bpe @ SW_256_D @ Navi1x
    {   1,    5,    0,    0,    0, } , // 8 pipes 1 bpe @ SW_256_D @ Navi1x
    {   1,    1,    0,    0,    0, } , // 8 pipes 2 bpe @ SW_256_D @ Navi1x
    {   1,    2,    0,    0,    0, } , // 8 pipes 4 bpe @ SW_256_D @ Navi1x
    {   1,    6,    0,    0,    0, } , // 8 pipes 8 bpe @ SW_256_D @ Navi1x
    {   1,    7,    0,    0,    0, } , // 8 pipes 16 bpe @ SW_256_D @ Navi1x
    {   1,    5,    0,    0,    0, } , // 16 pipes 1 bpe @ SW_256_D @ Navi1x
    {   1,    1,    0,    0,    0, } , // 16 pipes 2 bpe @ SW_256_D @ Navi1x
    {   1,    2,    0,    0,    0, } , // 16 pipes 4 bpe @ SW_256_D @ Navi1x
    {   1,    6,    0,    0,    0, } , // 16 pipes 8 bpe @ SW_256_D @ Navi1x
    {   1,    7,    0,    0,    0, } , // 16 pipes 16 bpe @ SW_256_D @ Navi1x
    {   1,    5,    0,    0,    0, } , // 32 pipes 1 bpe @ SW_256_D @ Navi1x
    {   1,    1,    0,    0,    0, } , // 32 pipes 2 bpe @ SW_256_D @ Navi1x
    {   1,    2,    0,    0,    0, } , // 32 pipes 4 bpe @ SW_256_D @ Navi1x
    {   1,    6,    0,    0,    0, } , // 32 pipes 8 bpe @ SW_256_D @ Navi1x
    {   1,    7,    0,    0,    0, } , // 32 pipes 16 bpe @ SW_256_D @ Navi1x
    {   1,    5,    0,    0,    0, } , // 64 pipes 1 bpe @ SW_256_D @ Navi1x
    {   1,    1,    0,    0,    0, } , // 64 pipes 2 bpe @ SW_256_D @ Navi1x
    {   1,    2,    0,    0,    0, } , // 64 pipes 4 bpe @ SW_256_D @ Navi1x
    {   1,    6,    0,    0,    0, } , // 64 pipes 8 bpe @ SW_256_D @ Navi1x
    {   1,    7,    0,    0,    0, } , // 64 pipes 16 bpe @ SW_256_D @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_4K_S_PATINFO[] =
{
    {   1,    0,    1,    0,    0, } , // 1 pipes 1 bpe @ SW_4K_S @ Navi1x
    {   1,    1,    2,    0,    0, } , // 1 pipes 2 bpe @ SW_4K_S @ Navi1x
    {   1,    2,    3,    0,    0, } , // 1 pipes 4 bpe @ SW_4K_S @ Navi1x
    {   1,    3,    4,    0,    0, } , // 1 pipes 8 bpe @ SW_4K_S @ Navi1x
    {   1,    4,    5,    0,    0, } , // 1 pipes 16 bpe @ SW_4K_S @ Navi1x
    {   1,    0,    1,    0,    0, } , // 2 pipes 1 bpe @ SW_4K_S @ Navi1x
    {   1,    1,    2,    0,    0, } , // 2 pipes 2 bpe @ SW_4K_S @ Navi1x
    {   1,    2,    3,    0,    0, } , // 2 pipes 4 bpe @ SW_4K_S @ Navi1x
    {   1,    3,    4,    0,    0, } , // 2 pipes 8 bpe @ SW_4K_S @ Navi1x
    {   1,    4,    5,    0,    0, } , // 2 pipes 16 bpe @ SW_4K_S @ Navi1x
    {   1,    0,    1,    0,    0, } , // 4 pipes 1 bpe @ SW_4K_S @ Navi1x
    {   1,    1,    2,    0,    0, } , // 4 pipes 2 bpe @ SW_4K_S @ Navi1x
    {   1,    2,    3,    0,    0, } , // 4 pipes 4 bpe @ SW_4K_S @ Navi1x
    {   1,    3,    4,    0,    0, } , // 4 pipes 8 bpe @ SW_4K_S @ Navi1x
    {   1,    4,    5,    0,    0, } , // 4 pipes 16 bpe @ SW_4K_S @ Navi1x
    {   1,    0,    1,    0,    0, } , // 8 pipes 1 bpe @ SW_4K_S @ Navi1x
    {   1,    1,    2,    0,    0, } , // 8 pipes 2 bpe @ SW_4K_S @ Navi1x
    {   1,    2,    3,    0,    0, } , // 8 pipes 4 bpe @ SW_4K_S @ Navi1x
    {   1,    3,    4,    0,    0, } , // 8 pipes 8 bpe @ SW_4K_S @ Navi1x
    {   1,    4,    5,    0,    0, } , // 8 pipes 16 bpe @ SW_4K_S @ Navi1x
    {   1,    0,    1,    0,    0, } , // 16 pipes 1 bpe @ SW_4K_S @ Navi1x
    {   1,    1,    2,    0,    0, } , // 16 pipes 2 bpe @ SW_4K_S @ Navi1x
    {   1,    2,    3,    0,    0, } , // 16 pipes 4 bpe @ SW_4K_S @ Navi1x
    {   1,    3,    4,    0,    0, } , // 16 pipes 8 bpe @ SW_4K_S @ Navi1x
    {   1,    4,    5,    0,    0, } , // 16 pipes 16 bpe @ SW_4K_S @ Navi1x
    {   1,    0,    1,    0,    0, } , // 32 pipes 1 bpe @ SW_4K_S @ Navi1x
    {   1,    1,    2,    0,    0, } , // 32 pipes 2 bpe @ SW_4K_S @ Navi1x
    {   1,    2,    3,    0,    0, } , // 32 pipes 4 bpe @ SW_4K_S @ Navi1x
    {   1,    3,    4,    0,    0, } , // 32 pipes 8 bpe @ SW_4K_S @ Navi1x
    {   1,    4,    5,    0,    0, } , // 32 pipes 16 bpe @ SW_4K_S @ Navi1x
    {   1,    0,    1,    0,    0, } , // 64 pipes 1 bpe @ SW_4K_S @ Navi1x
    {   1,    1,    2,    0,    0, } , // 64 pipes 2 bpe @ SW_4K_S @ Navi1x
    {   1,    2,    3,    0,    0, } , // 64 pipes 4 bpe @ SW_4K_S @ Navi1x
    {   1,    3,    4,    0,    0, } , // 64 pipes 8 bpe @ SW_4K_S @ Navi1x
    {   1,    4,    5,    0,    0, } , // 64 pipes 16 bpe @ SW_4K_S @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_4K_D_PATINFO[] =
{
    {   1,    5,    1,    0,    0, } , // 1 pipes 1 bpe @ SW_4K_D @ Navi1x
    {   1,    1,    2,    0,    0, } , // 1 pipes 2 bpe @ SW_4K_D @ Navi1x
    {   1,    2,    3,    0,    0, } , // 1 pipes 4 bpe @ SW_4K_D @ Navi1x
    {   1,    6,    4,    0,    0, } , // 1 pipes 8 bpe @ SW_4K_D @ Navi1x
    {   1,    7,    5,    0,    0, } , // 1 pipes 16 bpe @ SW_4K_D @ Navi1x
    {   1,    5,    1,    0,    0, } , // 2 pipes 1 bpe @ SW_4K_D @ Navi1x
    {   1,    1,    2,    0,    0, } , // 2 pipes 2 bpe @ SW_4K_D @ Navi1x
    {   1,    2,    3,    0,    0, } , // 2 pipes 4 bpe @ SW_4K_D @ Navi1x
    {   1,    6,    4,    0,    0, } , // 2 pipes 8 bpe @ SW_4K_D @ Navi1x
    {   1,    7,    5,    0,    0, } , // 2 pipes 16 bpe @ SW_4K_D @ Navi1x
    {   1,    5,    1,    0,    0, } , // 4 pipes 1 bpe @ SW_4K_D @ Navi1x
    {   1,    1,    2,    0,    0, } , // 4 pipes 2 bpe @ SW_4K_D @ Navi1x
    {   1,    2,    3,    0,    0, } , // 4 pipes 4 bpe @ SW_4K_D @ Navi1x
    {   1,    6,    4,    0,    0, } , // 4 pipes 8 bpe @ SW_4K_D @ Navi1x
    {   1,    7,    5,    0,    0, } , // 4 pipes 16 bpe @ SW_4K_D @ Navi1x
    {   1,    5,    1,    0,    0, } , // 8 pipes 1 bpe @ SW_4K_D @ Navi1x
    {   1,    1,    2,    0,    0, } , // 8 pipes 2 bpe @ SW_4K_D @ Navi1x
    {   1,    2,    3,    0,    0, } , // 8 pipes 4 bpe @ SW_4K_D @ Navi1x
    {   1,    6,    4,    0,    0, } , // 8 pipes 8 bpe @ SW_4K_D @ Navi1x
    {   1,    7,    5,    0,    0, } , // 8 pipes 16 bpe @ SW_4K_D @ Navi1x
    {   1,    5,    1,    0,    0, } , // 16 pipes 1 bpe @ SW_4K_D @ Navi1x
    {   1,    1,    2,    0,    0, } , // 16 pipes 2 bpe @ SW_4K_D @ Navi1x
    {   1,    2,    3,    0,    0, } , // 16 pipes 4 bpe @ SW_4K_D @ Navi1x
    {   1,    6,    4,    0,    0, } , // 16 pipes 8 bpe @ SW_4K_D @ Navi1x
    {   1,    7,    5,    0,    0, } , // 16 pipes 16 bpe @ SW_4K_D @ Navi1x
    {   1,    5,    1,    0,    0, } , // 32 pipes 1 bpe @ SW_4K_D @ Navi1x
    {   1,    1,    2,    0,    0, } , // 32 pipes 2 bpe @ SW_4K_D @ Navi1x
    {   1,    2,    3,    0,    0, } , // 32 pipes 4 bpe @ SW_4K_D @ Navi1x
    {   1,    6,    4,    0,    0, } , // 32 pipes 8 bpe @ SW_4K_D @ Navi1x
    {   1,    7,    5,    0,    0, } , // 32 pipes 16 bpe @ SW_4K_D @ Navi1x
    {   1,    5,    1,    0,    0, } , // 64 pipes 1 bpe @ SW_4K_D @ Navi1x
    {   1,    1,    2,    0,    0, } , // 64 pipes 2 bpe @ SW_4K_D @ Navi1x
    {   1,    2,    3,    0,    0, } , // 64 pipes 4 bpe @ SW_4K_D @ Navi1x
    {   1,    6,    4,    0,    0, } , // 64 pipes 8 bpe @ SW_4K_D @ Navi1x
    {   1,    7,    5,    0,    0, } , // 64 pipes 16 bpe @ SW_4K_D @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_4K_S_X_PATINFO[] =
{
    {   1,    0,    1,    0,    0, } , // 1 pipes 1 bpe @ SW_4K_S_X @ Navi1x
    {   1,    1,    2,    0,    0, } , // 1 pipes 2 bpe @ SW_4K_S_X @ Navi1x
    {   1,    2,    3,    0,    0, } , // 1 pipes 4 bpe @ SW_4K_S_X @ Navi1x
    {   1,    3,    4,    0,    0, } , // 1 pipes 8 bpe @ SW_4K_S_X @ Navi1x
    {   1,    4,    5,    0,    0, } , // 1 pipes 16 bpe @ SW_4K_S_X @ Navi1x
    {   3,    0,    6,    0,    0, } , // 2 pipes 1 bpe @ SW_4K_S_X @ Navi1x
    {   3,    1,    7,    0,    0, } , // 2 pipes 2 bpe @ SW_4K_S_X @ Navi1x
    {   3,    2,    8,    0,    0, } , // 2 pipes 4 bpe @ SW_4K_S_X @ Navi1x
    {   3,    3,    9,    0,    0, } , // 2 pipes 8 bpe @ SW_4K_S_X @ Navi1x
    {   3,    4,   10,    0,    0, } , // 2 pipes 16 bpe @ SW_4K_S_X @ Navi1x
    {   3,    0,   11,    0,    0, } , // 4 pipes 1 bpe @ SW_4K_S_X @ Navi1x
    {   3,    1,   12,    0,    0, } , // 4 pipes 2 bpe @ SW_4K_S_X @ Navi1x
    {   3,    2,   13,    0,    0, } , // 4 pipes 4 bpe @ SW_4K_S_X @ Navi1x
    {   3,    3,   14,    0,    0, } , // 4 pipes 8 bpe @ SW_4K_S_X @ Navi1x
    {   3,    4,   15,    0,    0, } , // 4 pipes 16 bpe @ SW_4K_S_X @ Navi1x
    {   3,    0,   16,    0,    0, } , // 8 pipes 1 bpe @ SW_4K_S_X @ Navi1x
    {   3,    1,   17,    0,    0, } , // 8 pipes 2 bpe @ SW_4K_S_X @ Navi1x
    {   3,    2,   18,    0,    0, } , // 8 pipes 4 bpe @ SW_4K_S_X @ Navi1x
    {   3,    3,   19,    0,    0, } , // 8 pipes 8 bpe @ SW_4K_S_X @ Navi1x
    {   3,    4,   20,    0,    0, } , // 8 pipes 16 bpe @ SW_4K_S_X @ Navi1x
    {   3,    0,   21,    0,    0, } , // 16 pipes 1 bpe @ SW_4K_S_X @ Navi1x
    {   3,    1,   22,    0,    0, } , // 16 pipes 2 bpe @ SW_4K_S_X @ Navi1x
    {   3,    2,   23,    0,    0, } , // 16 pipes 4 bpe @ SW_4K_S_X @ Navi1x
    {   3,    3,   24,    0,    0, } , // 16 pipes 8 bpe @ SW_4K_S_X @ Navi1x
    {   3,    4,   25,    0,    0, } , // 16 pipes 16 bpe @ SW_4K_S_X @ Navi1x
    {   3,    0,   21,    0,    0, } , // 32 pipes 1 bpe @ SW_4K_S_X @ Navi1x
    {   3,    1,   22,    0,    0, } , // 32 pipes 2 bpe @ SW_4K_S_X @ Navi1x
    {   3,    2,   23,    0,    0, } , // 32 pipes 4 bpe @ SW_4K_S_X @ Navi1x
    {   3,    3,   24,    0,    0, } , // 32 pipes 8 bpe @ SW_4K_S_X @ Navi1x
    {   3,    4,   25,    0,    0, } , // 32 pipes 16 bpe @ SW_4K_S_X @ Navi1x
    {   3,    0,   21,    0,    0, } , // 64 pipes 1 bpe @ SW_4K_S_X @ Navi1x
    {   3,    1,   22,    0,    0, } , // 64 pipes 2 bpe @ SW_4K_S_X @ Navi1x
    {   3,    2,   23,    0,    0, } , // 64 pipes 4 bpe @ SW_4K_S_X @ Navi1x
    {   3,    3,   24,    0,    0, } , // 64 pipes 8 bpe @ SW_4K_S_X @ Navi1x
    {   3,    4,   25,    0,    0, } , // 64 pipes 16 bpe @ SW_4K_S_X @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_4K_D_X_PATINFO[] =
{
    {   1,    5,    1,    0,    0, } , // 1 pipes 1 bpe @ SW_4K_D_X @ Navi1x
    {   1,    1,    2,    0,    0, } , // 1 pipes 2 bpe @ SW_4K_D_X @ Navi1x
    {   1,    2,    3,    0,    0, } , // 1 pipes 4 bpe @ SW_4K_D_X @ Navi1x
    {   1,    6,    4,    0,    0, } , // 1 pipes 8 bpe @ SW_4K_D_X @ Navi1x
    {   1,    7,    5,    0,    0, } , // 1 pipes 16 bpe @ SW_4K_D_X @ Navi1x
    {   3,    5,    6,    0,    0, } , // 2 pipes 1 bpe @ SW_4K_D_X @ Navi1x
    {   3,    1,    7,    0,    0, } , // 2 pipes 2 bpe @ SW_4K_D_X @ Navi1x
    {   3,    2,    8,    0,    0, } , // 2 pipes 4 bpe @ SW_4K_D_X @ Navi1x
    {   3,    6,    9,    0,    0, } , // 2 pipes 8 bpe @ SW_4K_D_X @ Navi1x
    {   3,    7,   10,    0,    0, } , // 2 pipes 16 bpe @ SW_4K_D_X @ Navi1x
    {   3,    5,   11,    0,    0, } , // 4 pipes 1 bpe @ SW_4K_D_X @ Navi1x
    {   3,    1,   12,    0,    0, } , // 4 pipes 2 bpe @ SW_4K_D_X @ Navi1x
    {   3,    2,   13,    0,    0, } , // 4 pipes 4 bpe @ SW_4K_D_X @ Navi1x
    {   3,    6,   14,    0,    0, } , // 4 pipes 8 bpe @ SW_4K_D_X @ Navi1x
    {   3,    7,   15,    0,    0, } , // 4 pipes 16 bpe @ SW_4K_D_X @ Navi1x
    {   3,    5,   16,    0,    0, } , // 8 pipes 1 bpe @ SW_4K_D_X @ Navi1x
    {   3,    1,   17,    0,    0, } , // 8 pipes 2 bpe @ SW_4K_D_X @ Navi1x
    {   3,    2,   18,    0,    0, } , // 8 pipes 4 bpe @ SW_4K_D_X @ Navi1x
    {   3,    6,   19,    0,    0, } , // 8 pipes 8 bpe @ SW_4K_D_X @ Navi1x
    {   3,    7,   20,    0,    0, } , // 8 pipes 16 bpe @ SW_4K_D_X @ Navi1x
    {   3,    5,   21,    0,    0, } , // 16 pipes 1 bpe @ SW_4K_D_X @ Navi1x
    {   3,    1,   22,    0,    0, } , // 16 pipes 2 bpe @ SW_4K_D_X @ Navi1x
    {   3,    2,   23,    0,    0, } , // 16 pipes 4 bpe @ SW_4K_D_X @ Navi1x
    {   3,    6,   24,    0,    0, } , // 16 pipes 8 bpe @ SW_4K_D_X @ Navi1x
    {   3,    7,   25,    0,    0, } , // 16 pipes 16 bpe @ SW_4K_D_X @ Navi1x
    {   3,    5,   21,    0,    0, } , // 32 pipes 1 bpe @ SW_4K_D_X @ Navi1x
    {   3,    1,   22,    0,    0, } , // 32 pipes 2 bpe @ SW_4K_D_X @ Navi1x
    {   3,    2,   23,    0,    0, } , // 32 pipes 4 bpe @ SW_4K_D_X @ Navi1x
    {   3,    6,   24,    0,    0, } , // 32 pipes 8 bpe @ SW_4K_D_X @ Navi1x
    {   3,    7,   25,    0,    0, } , // 32 pipes 16 bpe @ SW_4K_D_X @ Navi1x
    {   3,    5,   21,    0,    0, } , // 64 pipes 1 bpe @ SW_4K_D_X @ Navi1x
    {   3,    1,   22,    0,    0, } , // 64 pipes 2 bpe @ SW_4K_D_X @ Navi1x
    {   3,    2,   23,    0,    0, } , // 64 pipes 4 bpe @ SW_4K_D_X @ Navi1x
    {   3,    6,   24,    0,    0, } , // 64 pipes 8 bpe @ SW_4K_D_X @ Navi1x
    {   3,    7,   25,    0,    0, } , // 64 pipes 16 bpe @ SW_4K_D_X @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_4K_S3_PATINFO[] =
{
    {   1,   29,  131,    0,    0, } , // 1 pipes 1 bpe @ SW_4K_S3 @ Navi1x
    {   1,   30,  132,    0,    0, } , // 1 pipes 2 bpe @ SW_4K_S3 @ Navi1x
    {   1,   31,  133,    0,    0, } , // 1 pipes 4 bpe @ SW_4K_S3 @ Navi1x
    {   1,   32,  134,    0,    0, } , // 1 pipes 8 bpe @ SW_4K_S3 @ Navi1x
    {   1,   33,  135,    0,    0, } , // 1 pipes 16 bpe @ SW_4K_S3 @ Navi1x
    {   1,   29,  131,    0,    0, } , // 2 pipes 1 bpe @ SW_4K_S3 @ Navi1x
    {   1,   30,  132,    0,    0, } , // 2 pipes 2 bpe @ SW_4K_S3 @ Navi1x
    {   1,   31,  133,    0,    0, } , // 2 pipes 4 bpe @ SW_4K_S3 @ Navi1x
    {   1,   32,  134,    0,    0, } , // 2 pipes 8 bpe @ SW_4K_S3 @ Navi1x
    {   1,   33,  135,    0,    0, } , // 2 pipes 16 bpe @ SW_4K_S3 @ Navi1x
    {   1,   29,  131,    0,    0, } , // 4 pipes 1 bpe @ SW_4K_S3 @ Navi1x
    {   1,   30,  132,    0,    0, } , // 4 pipes 2 bpe @ SW_4K_S3 @ Navi1x
    {   1,   31,  133,    0,    0, } , // 4 pipes 4 bpe @ SW_4K_S3 @ Navi1x
    {   1,   32,  134,    0,    0, } , // 4 pipes 8 bpe @ SW_4K_S3 @ Navi1x
    {   1,   33,  135,    0,    0, } , // 4 pipes 16 bpe @ SW_4K_S3 @ Navi1x
    {   1,   29,  131,    0,    0, } , // 8 pipes 1 bpe @ SW_4K_S3 @ Navi1x
    {   1,   30,  132,    0,    0, } , // 8 pipes 2 bpe @ SW_4K_S3 @ Navi1x
    {   1,   31,  133,    0,    0, } , // 8 pipes 4 bpe @ SW_4K_S3 @ Navi1x
    {   1,   32,  134,    0,    0, } , // 8 pipes 8 bpe @ SW_4K_S3 @ Navi1x
    {   1,   33,  135,    0,    0, } , // 8 pipes 16 bpe @ SW_4K_S3 @ Navi1x
    {   1,   29,  131,    0,    0, } , // 16 pipes 1 bpe @ SW_4K_S3 @ Navi1x
    {   1,   30,  132,    0,    0, } , // 16 pipes 2 bpe @ SW_4K_S3 @ Navi1x
    {   1,   31,  133,    0,    0, } , // 16 pipes 4 bpe @ SW_4K_S3 @ Navi1x
    {   1,   32,  134,    0,    0, } , // 16 pipes 8 bpe @ SW_4K_S3 @ Navi1x
    {   1,   33,  135,    0,    0, } , // 16 pipes 16 bpe @ SW_4K_S3 @ Navi1x
    {   1,   29,  131,    0,    0, } , // 32 pipes 1 bpe @ SW_4K_S3 @ Navi1x
    {   1,   30,  132,    0,    0, } , // 32 pipes 2 bpe @ SW_4K_S3 @ Navi1x
    {   1,   31,  133,    0,    0, } , // 32 pipes 4 bpe @ SW_4K_S3 @ Navi1x
    {   1,   32,  134,    0,    0, } , // 32 pipes 8 bpe @ SW_4K_S3 @ Navi1x
    {   1,   33,  135,    0,    0, } , // 32 pipes 16 bpe @ SW_4K_S3 @ Navi1x
    {   1,   29,  131,    0,    0, } , // 64 pipes 1 bpe @ SW_4K_S3 @ Navi1x
    {   1,   30,  132,    0,    0, } , // 64 pipes 2 bpe @ SW_4K_S3 @ Navi1x
    {   1,   31,  133,    0,    0, } , // 64 pipes 4 bpe @ SW_4K_S3 @ Navi1x
    {   1,   32,  134,    0,    0, } , // 64 pipes 8 bpe @ SW_4K_S3 @ Navi1x
    {   1,   33,  135,    0,    0, } , // 64 pipes 16 bpe @ SW_4K_S3 @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_4K_S3_X_PATINFO[] =
{
    {   1,   29,  131,    0,    0, } , // 1 pipes 1 bpe @ SW_4K_S3_X @ Navi1x
    {   1,   30,  132,    0,    0, } , // 1 pipes 2 bpe @ SW_4K_S3_X @ Navi1x
    {   1,   31,  133,    0,    0, } , // 1 pipes 4 bpe @ SW_4K_S3_X @ Navi1x
    {   1,   32,  134,    0,    0, } , // 1 pipes 8 bpe @ SW_4K_S3_X @ Navi1x
    {   1,   33,  135,    0,    0, } , // 1 pipes 16 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   29,  136,    0,    0, } , // 2 pipes 1 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   30,  137,    0,    0, } , // 2 pipes 2 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   31,  138,    0,    0, } , // 2 pipes 4 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   32,  139,    0,    0, } , // 2 pipes 8 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   33,  140,    0,    0, } , // 2 pipes 16 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   29,  141,    0,    0, } , // 4 pipes 1 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   30,  142,    0,    0, } , // 4 pipes 2 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   31,  143,    0,    0, } , // 4 pipes 4 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   32,  144,    0,    0, } , // 4 pipes 8 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   33,  145,    0,    0, } , // 4 pipes 16 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   29,  146,    0,    0, } , // 8 pipes 1 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   30,  147,    0,    0, } , // 8 pipes 2 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   31,  148,    0,    0, } , // 8 pipes 4 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   32,  149,    0,    0, } , // 8 pipes 8 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   33,  150,    0,    0, } , // 8 pipes 16 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   29,  151,    0,    0, } , // 16 pipes 1 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   30,  152,    0,    0, } , // 16 pipes 2 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   31,  153,    0,    0, } , // 16 pipes 4 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   32,  154,    0,    0, } , // 16 pipes 8 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   33,  155,    0,    0, } , // 16 pipes 16 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   29,  151,    0,    0, } , // 32 pipes 1 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   30,  152,    0,    0, } , // 32 pipes 2 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   31,  153,    0,    0, } , // 32 pipes 4 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   32,  154,    0,    0, } , // 32 pipes 8 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   33,  155,    0,    0, } , // 32 pipes 16 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   29,  151,    0,    0, } , // 64 pipes 1 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   30,  152,    0,    0, } , // 64 pipes 2 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   31,  153,    0,    0, } , // 64 pipes 4 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   32,  154,    0,    0, } , // 64 pipes 8 bpe @ SW_4K_S3_X @ Navi1x
    {   3,   33,  155,    0,    0, } , // 64 pipes 16 bpe @ SW_4K_S3_X @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_S_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_S @ Navi1x
    {   1,    1,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_S @ Navi1x
    {   1,    2,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_S @ Navi1x
    {   1,    3,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_S @ Navi1x
    {   1,    4,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_S @ Navi1x
    {   1,    0,    1,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_S @ Navi1x
    {   1,    1,    2,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_S @ Navi1x
    {   1,    2,    3,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_S @ Navi1x
    {   1,    3,    4,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_S @ Navi1x
    {   1,    4,    5,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_S @ Navi1x
    {   1,    0,    1,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_S @ Navi1x
    {   1,    1,    2,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_S @ Navi1x
    {   1,    2,    3,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_S @ Navi1x
    {   1,    3,    4,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_S @ Navi1x
    {   1,    4,    5,    5,    0, } , // 4 pipes 16 bpe @ SW_64K_S @ Navi1x
    {   1,    0,    1,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_S @ Navi1x
    {   1,    1,    2,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_S @ Navi1x
    {   1,    2,    3,    3,    0, } , // 8 pipes 4 bpe @ SW_64K_S @ Navi1x
    {   1,    3,    4,    4,    0, } , // 8 pipes 8 bpe @ SW_64K_S @ Navi1x
    {   1,    4,    5,    5,    0, } , // 8 pipes 16 bpe @ SW_64K_S @ Navi1x
    {   1,    0,    1,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_S @ Navi1x
    {   1,    1,    2,    2,    0, } , // 16 pipes 2 bpe @ SW_64K_S @ Navi1x
    {   1,    2,    3,    3,    0, } , // 16 pipes 4 bpe @ SW_64K_S @ Navi1x
    {   1,    3,    4,    4,    0, } , // 16 pipes 8 bpe @ SW_64K_S @ Navi1x
    {   1,    4,    5,    5,    0, } , // 16 pipes 16 bpe @ SW_64K_S @ Navi1x
    {   1,    0,    1,    1,    0, } , // 32 pipes 1 bpe @ SW_64K_S @ Navi1x
    {   1,    1,    2,    2,    0, } , // 32 pipes 2 bpe @ SW_64K_S @ Navi1x
    {   1,    2,    3,    3,    0, } , // 32 pipes 4 bpe @ SW_64K_S @ Navi1x
    {   1,    3,    4,    4,    0, } , // 32 pipes 8 bpe @ SW_64K_S @ Navi1x
    {   1,    4,    5,    5,    0, } , // 32 pipes 16 bpe @ SW_64K_S @ Navi1x
    {   1,    0,    1,    1,    0, } , // 64 pipes 1 bpe @ SW_64K_S @ Navi1x
    {   1,    1,    2,    2,    0, } , // 64 pipes 2 bpe @ SW_64K_S @ Navi1x
    {   1,    2,    3,    3,    0, } , // 64 pipes 4 bpe @ SW_64K_S @ Navi1x
    {   1,    3,    4,    4,    0, } , // 64 pipes 8 bpe @ SW_64K_S @ Navi1x
    {   1,    4,    5,    5,    0, } , // 64 pipes 16 bpe @ SW_64K_S @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_D_PATINFO[] =
{
    {   1,    5,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_D @ Navi1x
    {   1,    1,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_D @ Navi1x
    {   1,    2,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_D @ Navi1x
    {   1,    6,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_D @ Navi1x
    {   1,    7,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_D @ Navi1x
    {   1,    5,    1,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_D @ Navi1x
    {   1,    1,    2,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_D @ Navi1x
    {   1,    2,    3,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_D @ Navi1x
    {   1,    6,    4,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_D @ Navi1x
    {   1,    7,    5,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_D @ Navi1x
    {   1,    5,    1,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_D @ Navi1x
    {   1,    1,    2,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_D @ Navi1x
    {   1,    2,    3,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_D @ Navi1x
    {   1,    6,    4,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_D @ Navi1x
    {   1,    7,    5,    5,    0, } , // 4 pipes 16 bpe @ SW_64K_D @ Navi1x
    {   1,    5,    1,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_D @ Navi1x
    {   1,    1,    2,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_D @ Navi1x
    {   1,    2,    3,    3,    0, } , // 8 pipes 4 bpe @ SW_64K_D @ Navi1x
    {   1,    6,    4,    4,    0, } , // 8 pipes 8 bpe @ SW_64K_D @ Navi1x
    {   1,    7,    5,    5,    0, } , // 8 pipes 16 bpe @ SW_64K_D @ Navi1x
    {   1,    5,    1,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_D @ Navi1x
    {   1,    1,    2,    2,    0, } , // 16 pipes 2 bpe @ SW_64K_D @ Navi1x
    {   1,    2,    3,    3,    0, } , // 16 pipes 4 bpe @ SW_64K_D @ Navi1x
    {   1,    6,    4,    4,    0, } , // 16 pipes 8 bpe @ SW_64K_D @ Navi1x
    {   1,    7,    5,    5,    0, } , // 16 pipes 16 bpe @ SW_64K_D @ Navi1x
    {   1,    5,    1,    1,    0, } , // 32 pipes 1 bpe @ SW_64K_D @ Navi1x
    {   1,    1,    2,    2,    0, } , // 32 pipes 2 bpe @ SW_64K_D @ Navi1x
    {   1,    2,    3,    3,    0, } , // 32 pipes 4 bpe @ SW_64K_D @ Navi1x
    {   1,    6,    4,    4,    0, } , // 32 pipes 8 bpe @ SW_64K_D @ Navi1x
    {   1,    7,    5,    5,    0, } , // 32 pipes 16 bpe @ SW_64K_D @ Navi1x
    {   1,    5,    1,    1,    0, } , // 64 pipes 1 bpe @ SW_64K_D @ Navi1x
    {   1,    1,    2,    2,    0, } , // 64 pipes 2 bpe @ SW_64K_D @ Navi1x
    {   1,    2,    3,    3,    0, } , // 64 pipes 4 bpe @ SW_64K_D @ Navi1x
    {   1,    6,    4,    4,    0, } , // 64 pipes 8 bpe @ SW_64K_D @ Navi1x
    {   1,    7,    5,    5,    0, } , // 64 pipes 16 bpe @ SW_64K_D @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_S_T_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_S_T @ Navi1x
    {   1,    1,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_S_T @ Navi1x
    {   1,    2,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_S_T @ Navi1x
    {   1,    3,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_S_T @ Navi1x
    {   1,    4,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_S_T @ Navi1x
    {   2,    0,   36,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_S_T @ Navi1x
    {   2,    1,   37,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_S_T @ Navi1x
    {   2,    2,   38,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_S_T @ Navi1x
    {   2,    3,   39,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_S_T @ Navi1x
    {   2,    4,   40,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_S_T @ Navi1x
    {   2,    0,   41,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_S_T @ Navi1x
    {   2,    1,   42,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_S_T @ Navi1x
    {   2,    2,   43,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_S_T @ Navi1x
    {   2,    3,   44,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_S_T @ Navi1x
    {   2,    4,   45,    5,    0, } , // 4 pipes 16 bpe @ SW_64K_S_T @ Navi1x
    {   2,    0,   46,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_S_T @ Navi1x
    {   2,    1,   47,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_S_T @ Navi1x
    {   2,    2,   48,    3,    0, } , // 8 pipes 4 bpe @ SW_64K_S_T @ Navi1x
    {   2,    3,   49,    4,    0, } , // 8 pipes 8 bpe @ SW_64K_S_T @ Navi1x
    {   2,    4,   50,    5,    0, } , // 8 pipes 16 bpe @ SW_64K_S_T @ Navi1x
    {   2,    0,   51,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_S_T @ Navi1x
    {   2,    1,   52,    2,    0, } , // 16 pipes 2 bpe @ SW_64K_S_T @ Navi1x
    {   2,    2,   53,    3,    0, } , // 16 pipes 4 bpe @ SW_64K_S_T @ Navi1x
    {   2,    3,   54,    4,    0, } , // 16 pipes 8 bpe @ SW_64K_S_T @ Navi1x
    {   2,    4,   55,    5,    0, } , // 16 pipes 16 bpe @ SW_64K_S_T @ Navi1x
    {   2,    0,   56,   16,    0, } , // 32 pipes 1 bpe @ SW_64K_S_T @ Navi1x
    {   2,    1,   57,   17,    0, } , // 32 pipes 2 bpe @ SW_64K_S_T @ Navi1x
    {   2,    2,   58,   18,    0, } , // 32 pipes 4 bpe @ SW_64K_S_T @ Navi1x
    {   2,    3,   59,   19,    0, } , // 32 pipes 8 bpe @ SW_64K_S_T @ Navi1x
    {   2,    4,   60,   20,    0, } , // 32 pipes 16 bpe @ SW_64K_S_T @ Navi1x
    {   2,    0,    1,   21,    0, } , // 64 pipes 1 bpe @ SW_64K_S_T @ Navi1x
    {   2,    1,    2,   22,    0, } , // 64 pipes 2 bpe @ SW_64K_S_T @ Navi1x
    {   2,    2,    3,   23,    0, } , // 64 pipes 4 bpe @ SW_64K_S_T @ Navi1x
    {   2,    3,    4,   24,    0, } , // 64 pipes 8 bpe @ SW_64K_S_T @ Navi1x
    {   2,    4,    5,   25,    0, } , // 64 pipes 16 bpe @ SW_64K_S_T @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_D_T_PATINFO[] =
{
    {   1,    5,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_D_T @ Navi1x
    {   1,    1,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_D_T @ Navi1x
    {   1,    2,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_D_T @ Navi1x
    {   1,    6,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_D_T @ Navi1x
    {   1,    7,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_D_T @ Navi1x
    {   2,    5,   36,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_D_T @ Navi1x
    {   2,    1,   37,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_D_T @ Navi1x
    {   2,    2,   38,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_D_T @ Navi1x
    {   2,    6,   39,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_D_T @ Navi1x
    {   2,    7,   40,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_D_T @ Navi1x
    {   2,    5,   41,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_D_T @ Navi1x
    {   2,    1,   42,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_D_T @ Navi1x
    {   2,    2,   43,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_D_T @ Navi1x
    {   2,    6,   44,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_D_T @ Navi1x
    {   2,    7,   45,    5,    0, } , // 4 pipes 16 bpe @ SW_64K_D_T @ Navi1x
    {   2,    5,   46,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_D_T @ Navi1x
    {   2,    1,   47,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_D_T @ Navi1x
    {   2,    2,   48,    3,    0, } , // 8 pipes 4 bpe @ SW_64K_D_T @ Navi1x
    {   2,    6,   49,    4,    0, } , // 8 pipes 8 bpe @ SW_64K_D_T @ Navi1x
    {   2,    7,   50,    5,    0, } , // 8 pipes 16 bpe @ SW_64K_D_T @ Navi1x
    {   2,    5,   51,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_D_T @ Navi1x
    {   2,    1,   52,    2,    0, } , // 16 pipes 2 bpe @ SW_64K_D_T @ Navi1x
    {   2,    2,   53,    3,    0, } , // 16 pipes 4 bpe @ SW_64K_D_T @ Navi1x
    {   2,    6,   54,    4,    0, } , // 16 pipes 8 bpe @ SW_64K_D_T @ Navi1x
    {   2,    7,   55,    5,    0, } , // 16 pipes 16 bpe @ SW_64K_D_T @ Navi1x
    {   2,    5,   56,   16,    0, } , // 32 pipes 1 bpe @ SW_64K_D_T @ Navi1x
    {   2,    1,   57,   17,    0, } , // 32 pipes 2 bpe @ SW_64K_D_T @ Navi1x
    {   2,    2,   58,   18,    0, } , // 32 pipes 4 bpe @ SW_64K_D_T @ Navi1x
    {   2,    6,   59,   19,    0, } , // 32 pipes 8 bpe @ SW_64K_D_T @ Navi1x
    {   2,    7,   60,   20,    0, } , // 32 pipes 16 bpe @ SW_64K_D_T @ Navi1x
    {   2,    5,    1,   21,    0, } , // 64 pipes 1 bpe @ SW_64K_D_T @ Navi1x
    {   2,    1,    2,   22,    0, } , // 64 pipes 2 bpe @ SW_64K_D_T @ Navi1x
    {   2,    2,    3,   23,    0, } , // 64 pipes 4 bpe @ SW_64K_D_T @ Navi1x
    {   2,    6,    4,   24,    0, } , // 64 pipes 8 bpe @ SW_64K_D_T @ Navi1x
    {   2,    7,    5,   25,    0, } , // 64 pipes 16 bpe @ SW_64K_D_T @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_S_X_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_S_X @ Navi1x
    {   1,    1,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_S_X @ Navi1x
    {   1,    2,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_S_X @ Navi1x
    {   1,    3,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_S_X @ Navi1x
    {   1,    4,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_S_X @ Navi1x
    {   3,    0,    6,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_S_X @ Navi1x
    {   3,    1,    7,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_S_X @ Navi1x
    {   3,    2,    8,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_S_X @ Navi1x
    {   3,    3,    9,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_S_X @ Navi1x
    {   3,    4,   10,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_S_X @ Navi1x
    {   3,    0,   11,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_S_X @ Navi1x
    {   3,    1,   12,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_S_X @ Navi1x
    {   3,    2,   13,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_S_X @ Navi1x
    {   3,    3,   14,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_S_X @ Navi1x
    {   3,    4,   15,    5,    0, } , // 4 pipes 16 bpe @ SW_64K_S_X @ Navi1x
    {   3,    0,   16,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_S_X @ Navi1x
    {   3,    1,   17,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_S_X @ Navi1x
    {   3,    2,   18,    3,    0, } , // 8 pipes 4 bpe @ SW_64K_S_X @ Navi1x
    {   3,    3,   19,    4,    0, } , // 8 pipes 8 bpe @ SW_64K_S_X @ Navi1x
    {   3,    4,   20,    5,    0, } , // 8 pipes 16 bpe @ SW_64K_S_X @ Navi1x
    {   3,    0,   21,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_S_X @ Navi1x
    {   3,    1,   22,    2,    0, } , // 16 pipes 2 bpe @ SW_64K_S_X @ Navi1x
    {   3,    2,   23,    3,    0, } , // 16 pipes 4 bpe @ SW_64K_S_X @ Navi1x
    {   3,    3,   24,    4,    0, } , // 16 pipes 8 bpe @ SW_64K_S_X @ Navi1x
    {   3,    4,   25,    5,    0, } , // 16 pipes 16 bpe @ SW_64K_S_X @ Navi1x
    {   3,    0,   26,    6,    0, } , // 32 pipes 1 bpe @ SW_64K_S_X @ Navi1x
    {   3,    1,   27,    7,    0, } , // 32 pipes 2 bpe @ SW_64K_S_X @ Navi1x
    {   3,    2,   28,    8,    0, } , // 32 pipes 4 bpe @ SW_64K_S_X @ Navi1x
    {   3,    3,   29,    9,    0, } , // 32 pipes 8 bpe @ SW_64K_S_X @ Navi1x
    {   3,    4,   30,   10,    0, } , // 32 pipes 16 bpe @ SW_64K_S_X @ Navi1x
    {   3,    0,   31,   11,    0, } , // 64 pipes 1 bpe @ SW_64K_S_X @ Navi1x
    {   3,    1,   32,   12,    0, } , // 64 pipes 2 bpe @ SW_64K_S_X @ Navi1x
    {   3,    2,   33,   13,    0, } , // 64 pipes 4 bpe @ SW_64K_S_X @ Navi1x
    {   3,    3,   34,   14,    0, } , // 64 pipes 8 bpe @ SW_64K_S_X @ Navi1x
    {   3,    4,   35,   15,    0, } , // 64 pipes 16 bpe @ SW_64K_S_X @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_D_X_PATINFO[] =
{
    {   1,    5,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_D_X @ Navi1x
    {   1,    1,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_D_X @ Navi1x
    {   1,    2,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_D_X @ Navi1x
    {   1,    6,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_D_X @ Navi1x
    {   1,    7,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_D_X @ Navi1x
    {   3,    5,    6,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_D_X @ Navi1x
    {   3,    1,    7,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_D_X @ Navi1x
    {   3,    2,    8,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_D_X @ Navi1x
    {   3,    6,    9,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_D_X @ Navi1x
    {   3,    7,   10,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_D_X @ Navi1x
    {   3,    5,   11,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_D_X @ Navi1x
    {   3,    1,   12,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_D_X @ Navi1x
    {   3,    2,   13,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_D_X @ Navi1x
    {   3,    6,   14,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_D_X @ Navi1x
    {   3,    7,   15,    5,    0, } , // 4 pipes 16 bpe @ SW_64K_D_X @ Navi1x
    {   3,    5,   16,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_D_X @ Navi1x
    {   3,    1,   17,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_D_X @ Navi1x
    {   3,    2,   18,    3,    0, } , // 8 pipes 4 bpe @ SW_64K_D_X @ Navi1x
    {   3,    6,   19,    4,    0, } , // 8 pipes 8 bpe @ SW_64K_D_X @ Navi1x
    {   3,    7,   20,    5,    0, } , // 8 pipes 16 bpe @ SW_64K_D_X @ Navi1x
    {   3,    5,   21,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_D_X @ Navi1x
    {   3,    1,   22,    2,    0, } , // 16 pipes 2 bpe @ SW_64K_D_X @ Navi1x
    {   3,    2,   23,    3,    0, } , // 16 pipes 4 bpe @ SW_64K_D_X @ Navi1x
    {   3,    6,   24,    4,    0, } , // 16 pipes 8 bpe @ SW_64K_D_X @ Navi1x
    {   3,    7,   25,    5,    0, } , // 16 pipes 16 bpe @ SW_64K_D_X @ Navi1x
    {   3,    5,   26,    6,    0, } , // 32 pipes 1 bpe @ SW_64K_D_X @ Navi1x
    {   3,    1,   27,    7,    0, } , // 32 pipes 2 bpe @ SW_64K_D_X @ Navi1x
    {   3,    2,   28,    8,    0, } , // 32 pipes 4 bpe @ SW_64K_D_X @ Navi1x
    {   3,    6,   29,    9,    0, } , // 32 pipes 8 bpe @ SW_64K_D_X @ Navi1x
    {   3,    7,   30,   10,    0, } , // 32 pipes 16 bpe @ SW_64K_D_X @ Navi1x
    {   3,    5,   31,   11,    0, } , // 64 pipes 1 bpe @ SW_64K_D_X @ Navi1x
    {   3,    1,   32,   12,    0, } , // 64 pipes 2 bpe @ SW_64K_D_X @ Navi1x
    {   3,    2,   33,   13,    0, } , // 64 pipes 4 bpe @ SW_64K_D_X @ Navi1x
    {   3,    6,   34,   14,    0, } , // 64 pipes 8 bpe @ SW_64K_D_X @ Navi1x
    {   3,    7,   35,   15,    0, } , // 64 pipes 16 bpe @ SW_64K_D_X @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_1xaa_PATINFO[] =
{
    {   1,    5,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   1,    1,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   1,    2,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   1,    6,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   1,    7,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,   28,   61,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    1,   62,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    2,    8,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    6,   63,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    7,   64,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,   28,   65,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    1,   66,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    2,   67,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    6,   68,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    7,   69,   26,    0, } , // 4 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,   28,   70,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    1,   71,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    2,   72,   27,    0, } , // 8 pipes 4 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    6,   72,   28,    0, } , // 8 pipes 8 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    7,   73,   29,    0, } , // 8 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,   28,   74,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    1,   74,   30,    0, } , // 16 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    2,   74,   31,    0, } , // 16 pipes 4 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    6,   74,   32,    0, } , // 16 pipes 8 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    7,   74,   33,    0, } , // 16 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,   28,   75,    6,    0, } , // 32 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    1,   75,   34,    0, } , // 32 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    2,   75,   35,    0, } , // 32 pipes 4 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    6,   75,   36,    0, } , // 32 pipes 8 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    7,   76,   37,    0, } , // 32 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,   28,   77,   11,    0, } , // 64 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    1,   77,   38,    0, } , // 64 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    2,   77,   39,    0, } , // 64 pipes 4 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    6,   78,   40,    0, } , // 64 pipes 8 bpe @ SW_64K_R_X 1xaa @ Navi1x
    {   3,    7,   79,   41,    0, } , // 64 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_2xaa_PATINFO[] =
{
    {   2,    5,    1,   99,    0, } , // 1 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   2,    1,    2,  100,    0, } , // 1 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   2,    2,    3,  101,    0, } , // 1 pipes 4 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   2,    6,    4,  102,    0, } , // 1 pipes 8 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   2,    7,    5,  103,    0, } , // 1 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,   28,   61,   99,    0, } , // 2 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    1,   62,  100,    0, } , // 2 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    2,    8,  101,    0, } , // 2 pipes 4 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    6,   63,  102,    0, } , // 2 pipes 8 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    7,   64,  103,    0, } , // 2 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,   28,   65,   99,    0, } , // 4 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    1,   66,  100,    0, } , // 4 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    2,   67,  101,    0, } , // 4 pipes 4 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    6,   68,  102,    0, } , // 4 pipes 8 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    7,   69,  104,    0, } , // 4 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,   28,   70,   99,    0, } , // 8 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    1,   71,  100,    0, } , // 8 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    2,   72,  105,    0, } , // 8 pipes 4 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    6,   72,  106,    0, } , // 8 pipes 8 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    7,   73,  107,    0, } , // 8 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,   28,   74,   99,    0, } , // 16 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    1,   74,  108,    0, } , // 16 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    2,   74,  109,    0, } , // 16 pipes 4 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    6,   74,  107,    0, } , // 16 pipes 8 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    7,  113,   33,    0, } , // 16 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,   28,   75,  110,    0, } , // 32 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    1,   75,  111,    0, } , // 32 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    2,   75,  112,    0, } , // 32 pipes 4 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    6,   76,  113,    0, } , // 32 pipes 8 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    7,  114,   37,    0, } , // 32 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,   28,   78,  114,    0, } , // 64 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    1,   78,  115,    0, } , // 64 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    2,   78,  116,    0, } , // 64 pipes 4 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    6,   79,  117,    0, } , // 64 pipes 8 bpe @ SW_64K_R_X 2xaa @ Navi1x
    {   3,    7,  115,   41,    0, } , // 64 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_4xaa_PATINFO[] =
{
    {   2,    5,    1,  118,    0, } , // 1 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   2,    1,    2,  119,    0, } , // 1 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   2,    2,    3,  120,    0, } , // 1 pipes 4 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   2,    6,    4,  121,    0, } , // 1 pipes 8 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   2,    7,    5,  122,    0, } , // 1 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,   28,   61,  118,    0, } , // 2 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    1,   62,  119,    0, } , // 2 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    2,    8,  120,    0, } , // 2 pipes 4 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    6,   63,  121,    0, } , // 2 pipes 8 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    7,   64,  122,    0, } , // 2 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,   28,   65,  118,    0, } , // 4 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    1,   66,  119,    0, } , // 4 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    2,   67,  120,    0, } , // 4 pipes 4 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    6,   68,  121,    0, } , // 4 pipes 8 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    7,   69,  123,    0, } , // 4 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,   28,   70,  118,    0, } , // 8 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    1,   71,  119,    0, } , // 8 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    2,   72,  124,    0, } , // 8 pipes 4 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    6,   93,  125,    0, } , // 8 pipes 8 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    7,  116,  107,    0, } , // 8 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,   28,   74,  118,    0, } , // 16 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    1,   74,  126,    0, } , // 16 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    2,   74,  127,    0, } , // 16 pipes 4 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    6,  117,  107,    0, } , // 16 pipes 8 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    7,  118,   33,    0, } , // 16 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,   28,   76,  128,    0, } , // 32 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    1,   76,  129,    0, } , // 32 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    2,   76,  130,    0, } , // 32 pipes 4 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    6,  119,  113,    0, } , // 32 pipes 8 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    7,  120,   37,    0, } , // 32 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,   28,   79,  131,    0, } , // 64 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    1,   79,  132,    0, } , // 64 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    2,   79,  133,    0, } , // 64 pipes 4 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    6,  121,  117,    0, } , // 64 pipes 8 bpe @ SW_64K_R_X 4xaa @ Navi1x
    {   3,    7,  122,   41,    0, } , // 64 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_8xaa_PATINFO[] =
{
    {   2,    5,    1,  134,    0, } , // 1 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   2,    1,    2,  135,    0, } , // 1 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   2,    2,    3,  135,    0, } , // 1 pipes 4 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   2,    6,    4,  136,    0, } , // 1 pipes 8 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   2,    7,    5,  136,    0, } , // 1 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,   28,   61,  134,    0, } , // 2 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    1,   62,  135,    0, } , // 2 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    2,    8,  135,    0, } , // 2 pipes 4 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    6,   63,  136,    0, } , // 2 pipes 8 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    7,   64,  136,    0, } , // 2 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,   28,   65,  134,    0, } , // 4 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    1,   66,  135,    0, } , // 4 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    2,   67,  135,    0, } , // 4 pipes 4 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    6,   68,  136,    0, } , // 4 pipes 8 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    7,  102,  137,    0, } , // 4 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,   28,   70,  134,    0, } , // 8 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    1,   71,  135,    0, } , // 8 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    2,   72,  138,    0, } , // 8 pipes 4 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    6,  123,  139,    0, } , // 8 pipes 8 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    7,  124,  140,    0, } , // 8 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,   28,  105,  134,    0, } , // 16 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    1,  105,  138,    0, } , // 16 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    2,  125,  127,    0, } , // 16 pipes 4 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    6,  126,  107,    0, } , // 16 pipes 8 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    7,  126,  141,    0, } , // 16 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,   28,  107,  142,    0, } , // 32 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    1,  108,  143,    0, } , // 32 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    2,  127,  130,    0, } , // 32 pipes 4 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    6,  128,  113,    0, } , // 32 pipes 8 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    7,  128,  144,    0, } , // 32 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,   28,  110,  145,    0, } , // 64 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    1,  111,  146,    0, } , // 64 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    2,  129,  133,    0, } , // 64 pipes 4 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    6,  130,  117,    0, } , // 64 pipes 8 bpe @ SW_64K_R_X 8xaa @ Navi1x
    {   3,    7,  130,  147,    0, } , // 64 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_1xaa_PATINFO[] =
{
    {   1,    8,    1,    1,    0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   1,    9,    2,    2,    0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   1,   10,    3,    3,    0, } , // 1 pipes 4 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   1,   11,    4,    4,    0, } , // 1 pipes 8 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   1,    7,    5,    5,    0, } , // 1 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   12,   61,    1,    0, } , // 2 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    9,   62,    2,    0, } , // 2 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   10,    8,    3,    0, } , // 2 pipes 4 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   11,   63,    4,    0, } , // 2 pipes 8 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    7,   64,    5,    0, } , // 2 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   12,   65,    1,    0, } , // 4 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    9,   66,    2,    0, } , // 4 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   10,   67,    3,    0, } , // 4 pipes 4 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   11,   68,    4,    0, } , // 4 pipes 8 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    7,   69,   26,    0, } , // 4 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   12,   70,    1,    0, } , // 8 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    9,   71,    2,    0, } , // 8 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   10,   72,   27,    0, } , // 8 pipes 4 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   11,   72,   28,    0, } , // 8 pipes 8 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    7,   73,   29,    0, } , // 8 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   12,   74,    1,    0, } , // 16 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    9,   74,   30,    0, } , // 16 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   10,   74,   31,    0, } , // 16 pipes 4 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   11,   74,   32,    0, } , // 16 pipes 8 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    7,   74,   33,    0, } , // 16 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   12,   75,    6,    0, } , // 32 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    9,   75,   34,    0, } , // 32 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   10,   75,   35,    0, } , // 32 pipes 4 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   11,   75,   36,    0, } , // 32 pipes 8 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    7,   76,   37,    0, } , // 32 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   12,   77,   11,    0, } , // 64 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    9,   77,   38,    0, } , // 64 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   10,   77,   39,    0, } , // 64 pipes 4 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,   11,   78,   40,    0, } , // 64 pipes 8 bpe @ SW_64K_Z_X 1xaa @ Navi1x
    {   3,    7,   79,   41,    0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_2xaa_PATINFO[] =
{
    {   1,   13,   80,   42,    0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   1,   14,    3,    3,    0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   2,   15,    3,   43,    0, } , // 1 pipes 4 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   2,   16,   81,   44,    0, } , // 1 pipes 8 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   2,   17,    5,   45,    0, } , // 1 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   13,   82,   42,    0, } , // 2 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   14,    8,    3,    0, } , // 2 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   15,    8,   43,    0, } , // 2 pipes 4 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   16,   83,   44,    0, } , // 2 pipes 8 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   17,   64,   45,    0, } , // 2 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   13,   84,   42,    0, } , // 4 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   14,   67,    3,    0, } , // 4 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   15,   67,   43,    0, } , // 4 pipes 4 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   16,   85,   44,    0, } , // 4 pipes 8 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   17,   69,   46,    0, } , // 4 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   13,   86,   42,    0, } , // 8 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   14,   72,   27,    0, } , // 8 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   15,   72,   47,    0, } , // 8 pipes 4 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   16,   73,   48,    0, } , // 8 pipes 8 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   17,   73,   49,    0, } , // 8 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   13,   74,   50,    0, } , // 16 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   14,   74,   31,    0, } , // 16 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   15,   74,   51,    0, } , // 16 pipes 4 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   16,   74,   52,    0, } , // 16 pipes 8 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   17,   87,   53,    0, } , // 16 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   13,   75,   54,    0, } , // 32 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   14,   75,   35,    0, } , // 32 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   15,   75,   55,    0, } , // 32 pipes 4 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   16,   76,   56,    0, } , // 32 pipes 8 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   17,   88,   57,    0, } , // 32 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   13,   78,   58,    0, } , // 64 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   14,   78,   59,    0, } , // 64 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   15,   78,   60,    0, } , // 64 pipes 4 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   16,   79,   41,    0, } , // 64 pipes 8 bpe @ SW_64K_Z_X 2xaa @ Navi1x
    {   3,   17,   89,   61,    0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_4xaa_PATINFO[] =
{
    {   1,   18,    3,    3,    0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   2,   19,   90,   62,    0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   2,   20,    3,   63,    0, } , // 1 pipes 4 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   2,   21,    4,   64,    0, } , // 1 pipes 8 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   2,   22,    5,   65,    0, } , // 1 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   18,    8,    3,    0, } , // 2 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   19,   91,   62,    0, } , // 2 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   20,    8,   66,    0, } , // 2 pipes 4 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   21,   63,   67,    0, } , // 2 pipes 8 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   22,   64,   68,    0, } , // 2 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   18,   67,    3,    0, } , // 4 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   19,   92,   62,    0, } , // 4 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   20,   67,   63,    0, } , // 4 pipes 4 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   21,   68,   64,    0, } , // 4 pipes 8 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   22,   69,   69,    0, } , // 4 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   18,   72,   27,    0, } , // 8 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   19,   72,   70,    0, } , // 8 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   20,   72,   71,    0, } , // 8 pipes 4 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   21,   93,   72,    0, } , // 8 pipes 8 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   22,   94,   73,    0, } , // 8 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   18,   74,   31,    0, } , // 16 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   19,   74,   74,    0, } , // 16 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   20,   74,   75,    0, } , // 16 pipes 4 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   21,   95,   76,    0, } , // 16 pipes 8 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   22,   96,   76,    0, } , // 16 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   18,   76,   77,    0, } , // 32 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   19,   76,   78,    0, } , // 32 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   20,   76,   56,    0, } , // 32 pipes 4 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   21,   97,   79,    0, } , // 32 pipes 8 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   22,   98,   79,    0, } , // 32 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   18,   79,   80,    0, } , // 64 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   19,   79,   81,    0, } , // 64 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   20,   79,   41,    0, } , // 64 pipes 4 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   21,   99,   82,    0, } , // 64 pipes 8 bpe @ SW_64K_Z_X 4xaa @ Navi1x
    {   3,   22,  100,   82,    0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_8xaa_PATINFO[] =
{
    {   2,   23,    3,   43,    0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   2,   24,    3,   63,    0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   2,   25,    3,   83,    0, } , // 1 pipes 4 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   2,   26,   81,   84,    0, } , // 1 pipes 8 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   2,   27,    5,   85,    0, } , // 1 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   23,    8,   43,    0, } , // 2 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   24,    8,   66,    0, } , // 2 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   25,    8,   86,    0, } , // 2 pipes 4 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   26,  101,   87,    0, } , // 2 pipes 8 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   27,   64,   88,    0, } , // 2 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   23,   67,   43,    0, } , // 4 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   24,   67,   63,    0, } , // 4 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   25,   67,   83,    0, } , // 4 pipes 4 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   26,   85,   84,    0, } , // 4 pipes 8 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   27,  102,   89,    0, } , // 4 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   23,   72,   47,    0, } , // 8 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   24,   72,   71,    0, } , // 8 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   25,   72,   90,    0, } , // 8 pipes 4 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   26,  103,   91,    0, } , // 8 pipes 8 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   27,  104,   92,    0, } , // 8 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   23,  105,   51,    0, } , // 16 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   24,  105,   75,    0, } , // 16 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   25,   87,   93,    0, } , // 16 pipes 4 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   26,   96,   76,    0, } , // 16 pipes 8 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   27,  106,   94,    0, } , // 16 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   23,  107,   95,    0, } , // 32 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   24,  108,   56,    0, } , // 32 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   25,   88,   57,    0, } , // 32 pipes 4 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   26,   98,   79,    0, } , // 32 pipes 8 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   27,  109,   96,    0, } , // 32 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   23,  110,   97,    0, } , // 64 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   24,  111,   41,    0, } , // 64 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   25,   89,   61,    0, } , // 64 pipes 4 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   26,  100,   82,    0, } , // 64 pipes 8 bpe @ SW_64K_Z_X 8xaa @ Navi1x
    {   3,   27,  112,   98,    0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_S3_PATINFO[] =
{
    {   1,   29,  131,  148,    0, } , // 1 pipes 1 bpe @ SW_64K_S3 @ Navi1x
    {   1,   30,  132,  149,    0, } , // 1 pipes 2 bpe @ SW_64K_S3 @ Navi1x
    {   1,   31,  133,  150,    0, } , // 1 pipes 4 bpe @ SW_64K_S3 @ Navi1x
    {   1,   32,  134,  151,    0, } , // 1 pipes 8 bpe @ SW_64K_S3 @ Navi1x
    {   1,   33,  135,  152,    0, } , // 1 pipes 16 bpe @ SW_64K_S3 @ Navi1x
    {   1,   29,  131,  148,    0, } , // 2 pipes 1 bpe @ SW_64K_S3 @ Navi1x
    {   1,   30,  132,  149,    0, } , // 2 pipes 2 bpe @ SW_64K_S3 @ Navi1x
    {   1,   31,  133,  150,    0, } , // 2 pipes 4 bpe @ SW_64K_S3 @ Navi1x
    {   1,   32,  134,  151,    0, } , // 2 pipes 8 bpe @ SW_64K_S3 @ Navi1x
    {   1,   33,  135,  152,    0, } , // 2 pipes 16 bpe @ SW_64K_S3 @ Navi1x
    {   1,   29,  131,  148,    0, } , // 4 pipes 1 bpe @ SW_64K_S3 @ Navi1x
    {   1,   30,  132,  149,    0, } , // 4 pipes 2 bpe @ SW_64K_S3 @ Navi1x
    {   1,   31,  133,  150,    0, } , // 4 pipes 4 bpe @ SW_64K_S3 @ Navi1x
    {   1,   32,  134,  151,    0, } , // 4 pipes 8 bpe @ SW_64K_S3 @ Navi1x
    {   1,   33,  135,  152,    0, } , // 4 pipes 16 bpe @ SW_64K_S3 @ Navi1x
    {   1,   29,  131,  148,    0, } , // 8 pipes 1 bpe @ SW_64K_S3 @ Navi1x
    {   1,   30,  132,  149,    0, } , // 8 pipes 2 bpe @ SW_64K_S3 @ Navi1x
    {   1,   31,  133,  150,    0, } , // 8 pipes 4 bpe @ SW_64K_S3 @ Navi1x
    {   1,   32,  134,  151,    0, } , // 8 pipes 8 bpe @ SW_64K_S3 @ Navi1x
    {   1,   33,  135,  152,    0, } , // 8 pipes 16 bpe @ SW_64K_S3 @ Navi1x
    {   1,   29,  131,  148,    0, } , // 16 pipes 1 bpe @ SW_64K_S3 @ Navi1x
    {   1,   30,  132,  149,    0, } , // 16 pipes 2 bpe @ SW_64K_S3 @ Navi1x
    {   1,   31,  133,  150,    0, } , // 16 pipes 4 bpe @ SW_64K_S3 @ Navi1x
    {   1,   32,  134,  151,    0, } , // 16 pipes 8 bpe @ SW_64K_S3 @ Navi1x
    {   1,   33,  135,  152,    0, } , // 16 pipes 16 bpe @ SW_64K_S3 @ Navi1x
    {   1,   29,  131,  148,    0, } , // 32 pipes 1 bpe @ SW_64K_S3 @ Navi1x
    {   1,   30,  132,  149,    0, } , // 32 pipes 2 bpe @ SW_64K_S3 @ Navi1x
    {   1,   31,  133,  150,    0, } , // 32 pipes 4 bpe @ SW_64K_S3 @ Navi1x
    {   1,   32,  134,  151,    0, } , // 32 pipes 8 bpe @ SW_64K_S3 @ Navi1x
    {   1,   33,  135,  152,    0, } , // 32 pipes 16 bpe @ SW_64K_S3 @ Navi1x
    {   1,   29,  131,  148,    0, } , // 64 pipes 1 bpe @ SW_64K_S3 @ Navi1x
    {   1,   30,  132,  149,    0, } , // 64 pipes 2 bpe @ SW_64K_S3 @ Navi1x
    {   1,   31,  133,  150,    0, } , // 64 pipes 4 bpe @ SW_64K_S3 @ Navi1x
    {   1,   32,  134,  151,    0, } , // 64 pipes 8 bpe @ SW_64K_S3 @ Navi1x
    {   1,   33,  135,  152,    0, } , // 64 pipes 16 bpe @ SW_64K_S3 @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_S3_X_PATINFO[] =
{
    {   1,   29,  131,  148,    0, } , // 1 pipes 1 bpe @ SW_64K_S3_X @ Navi1x
    {   1,   30,  132,  149,    0, } , // 1 pipes 2 bpe @ SW_64K_S3_X @ Navi1x
    {   1,   31,  133,  150,    0, } , // 1 pipes 4 bpe @ SW_64K_S3_X @ Navi1x
    {   1,   32,  134,  151,    0, } , // 1 pipes 8 bpe @ SW_64K_S3_X @ Navi1x
    {   1,   33,  135,  152,    0, } , // 1 pipes 16 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   29,  136,  148,    0, } , // 2 pipes 1 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   30,  137,  149,    0, } , // 2 pipes 2 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   31,  138,  150,    0, } , // 2 pipes 4 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   32,  139,  151,    0, } , // 2 pipes 8 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   33,  140,  152,    0, } , // 2 pipes 16 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   29,  141,  148,    0, } , // 4 pipes 1 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   30,  142,  149,    0, } , // 4 pipes 2 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   31,  143,  150,    0, } , // 4 pipes 4 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   32,  144,  151,    0, } , // 4 pipes 8 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   33,  145,  152,    0, } , // 4 pipes 16 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   29,  146,  148,    0, } , // 8 pipes 1 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   30,  147,  149,    0, } , // 8 pipes 2 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   31,  148,  150,    0, } , // 8 pipes 4 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   32,  149,  151,    0, } , // 8 pipes 8 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   33,  150,  152,    0, } , // 8 pipes 16 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   29,  151,  148,    0, } , // 16 pipes 1 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   30,  152,  149,    0, } , // 16 pipes 2 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   31,  153,  150,    0, } , // 16 pipes 4 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   32,  154,  151,    0, } , // 16 pipes 8 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   33,  155,  152,    0, } , // 16 pipes 16 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   29,  156,  153,    0, } , // 32 pipes 1 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   30,  157,  154,    0, } , // 32 pipes 2 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   31,  158,  155,    0, } , // 32 pipes 4 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   32,  159,  156,    0, } , // 32 pipes 8 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   33,  160,  157,    0, } , // 32 pipes 16 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   29,  161,  158,    0, } , // 64 pipes 1 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   30,  162,  159,    0, } , // 64 pipes 2 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   31,  163,  160,    0, } , // 64 pipes 4 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   32,  164,  161,    0, } , // 64 pipes 8 bpe @ SW_64K_S3_X @ Navi1x
    {   3,   33,  165,  162,    0, } , // 64 pipes 16 bpe @ SW_64K_S3_X @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_S3_T_PATINFO[] =
{
    {   1,   29,  131,  148,    0, } , // 1 pipes 1 bpe @ SW_64K_S3_T @ Navi1x
    {   1,   30,  132,  149,    0, } , // 1 pipes 2 bpe @ SW_64K_S3_T @ Navi1x
    {   1,   31,  133,  150,    0, } , // 1 pipes 4 bpe @ SW_64K_S3_T @ Navi1x
    {   1,   32,  134,  151,    0, } , // 1 pipes 8 bpe @ SW_64K_S3_T @ Navi1x
    {   1,   33,  135,  152,    0, } , // 1 pipes 16 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   29,  136,  148,    0, } , // 2 pipes 1 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   30,  137,  149,    0, } , // 2 pipes 2 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   31,  138,  150,    0, } , // 2 pipes 4 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   32,  139,  151,    0, } , // 2 pipes 8 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   33,  140,  152,    0, } , // 2 pipes 16 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   29,  141,  148,    0, } , // 4 pipes 1 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   30,  142,  149,    0, } , // 4 pipes 2 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   31,  143,  150,    0, } , // 4 pipes 4 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   32,  144,  151,    0, } , // 4 pipes 8 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   33,  145,  152,    0, } , // 4 pipes 16 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   29,  166,  148,    0, } , // 8 pipes 1 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   30,  167,  149,    0, } , // 8 pipes 2 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   31,  168,  150,    0, } , // 8 pipes 4 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   32,  169,  151,    0, } , // 8 pipes 8 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   33,  170,  152,    0, } , // 8 pipes 16 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   29,  171,  148,    0, } , // 16 pipes 1 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   30,  172,  149,    0, } , // 16 pipes 2 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   31,  173,  150,    0, } , // 16 pipes 4 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   32,  174,  151,    0, } , // 16 pipes 8 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   33,  175,  152,    0, } , // 16 pipes 16 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   29,  176,  153,    0, } , // 32 pipes 1 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   30,  177,  154,    0, } , // 32 pipes 2 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   31,  178,  155,    0, } , // 32 pipes 4 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   32,  179,  156,    0, } , // 32 pipes 8 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   33,  180,  157,    0, } , // 32 pipes 16 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   29,  131,  163,    0, } , // 64 pipes 1 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   30,  132,  164,    0, } , // 64 pipes 2 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   31,  133,  165,    0, } , // 64 pipes 4 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   32,  134,  166,    0, } , // 64 pipes 8 bpe @ SW_64K_S3_T @ Navi1x
    {   3,   33,  135,  167,    0, } , // 64 pipes 16 bpe @ SW_64K_S3_T @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_64K_D3_X_PATINFO[] =
{
    {   1,   34,  131,  148,    0, } , // 1 pipes 1 bpe @ SW_64K_D3_X @ Navi1x
    {   1,   35,  132,  149,    0, } , // 1 pipes 2 bpe @ SW_64K_D3_X @ Navi1x
    {   1,   36,  133,  150,    0, } , // 1 pipes 4 bpe @ SW_64K_D3_X @ Navi1x
    {   1,   37,  134,  151,    0, } , // 1 pipes 8 bpe @ SW_64K_D3_X @ Navi1x
    {   1,   38,  135,  152,    0, } , // 1 pipes 16 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   34,  181,  148,    0, } , // 2 pipes 1 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   35,  182,  149,    0, } , // 2 pipes 2 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   36,  183,  150,    0, } , // 2 pipes 4 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   37,  184,  168,    0, } , // 2 pipes 8 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   38,  185,  169,    0, } , // 2 pipes 16 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   34,  186,  170,    0, } , // 4 pipes 1 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   35,  186,  171,    0, } , // 4 pipes 2 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   36,  187,  172,    0, } , // 4 pipes 4 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   37,  188,  169,    0, } , // 4 pipes 8 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   38,  189,  169,    0, } , // 4 pipes 16 bpe @ SW_64K_D3_X @ Navi1x
    {   2,   34,  190,  173,    0, } , // 8 pipes 1 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   35,  191,  171,    0, } , // 8 pipes 2 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   36,  192,  172,    0, } , // 8 pipes 4 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   37,  193,  169,    0, } , // 8 pipes 8 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   38,  194,  169,    0, } , // 8 pipes 16 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   34,  195,  174,    0, } , // 16 pipes 1 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   35,  196,  171,    0, } , // 16 pipes 2 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   36,  197,  172,    0, } , // 16 pipes 4 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   37,  198,  169,    0, } , // 16 pipes 8 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   38,  199,  169,    0, } , // 16 pipes 16 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   34,  200,  175,    0, } , // 32 pipes 1 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   35,  201,  176,    0, } , // 32 pipes 2 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   36,  202,  177,    0, } , // 32 pipes 4 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   37,  203,  178,    0, } , // 32 pipes 8 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   38,  204,  178,    0, } , // 32 pipes 16 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   34,  205,  179,    0, } , // 64 pipes 1 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   35,  206,  180,    0, } , // 64 pipes 2 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   36,  207,  181,    0, } , // 64 pipes 4 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   37,  208,  182,    0, } , // 64 pipes 8 bpe @ SW_64K_D3_X @ Navi1x
    {   3,   38,  209,  182,    0, } , // 64 pipes 16 bpe @ SW_64K_D3_X @ Navi1x
};

const ADDR_SW_PATINFO GFX10_SW_256_S_RBPLUS_PATINFO[] =
{
    {   1,    0,    0,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256_S @ RbPlus
    {   1,    0,    0,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256_S @ RbPlus
    {   1,    1,    0,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256_S @ RbPlus
    {   1,    2,    0,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256_S @ RbPlus
    {   1,    3,    0,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256_S @ RbPlus
    {   1,    4,    0,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256_S @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_256_D_RBPLUS_PATINFO[] =
{
    {   1,    5,    0,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256_D @ RbPlus
    {   1,    5,    0,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256_D @ RbPlus
    {   1,    1,    0,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256_D @ RbPlus
    {   1,   39,    0,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256_D @ RbPlus
    {   1,    6,    0,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256_D @ RbPlus
    {   1,    7,    0,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256_D @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_4K_S_RBPLUS_PATINFO[] =
{
    {   1,    0,    1,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_S @ RbPlus
    {   1,    0,    1,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_S @ RbPlus
    {   1,    1,    2,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_S @ RbPlus
    {   1,    2,    3,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_S @ RbPlus
    {   1,    3,    4,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_S @ RbPlus
    {   1,    4,    5,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_4K_D_RBPLUS_PATINFO[] =
{
    {   1,    5,    1,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_D @ RbPlus
    {   1,    5,    1,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_D @ RbPlus
    {   1,    1,    2,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_D @ RbPlus
    {   1,   39,    3,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_D @ RbPlus
    {   1,    6,    4,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_D @ RbPlus
    {   1,    7,    5,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_D @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_4K_S_X_RBPLUS_PATINFO[] =
{
    {   1,    0,    1,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   1,    1,    2,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   1,    2,    3,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   1,    3,    4,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   1,    4,    5,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,    6,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,    7,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,    8,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,    9,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,   10,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  210,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  211,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  212,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  213,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  214,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  215,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  216,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  217,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  218,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  219,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,   11,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,   12,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,   13,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,   14,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,   15,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  220,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  221,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  222,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  223,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  224,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  225,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  226,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  227,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  228,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  229,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,   16,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,   17,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,   18,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,   19,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,   20,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  230,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  231,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  232,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  233,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  234,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  235,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  236,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  237,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  238,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  239,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,   21,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,   22,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,   23,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,   24,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,   25,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  240,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  241,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  242,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  243,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  244,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  245,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  246,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  247,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  248,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  249,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,   21,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,   22,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,   23,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,   24,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,   25,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
    {   3,    0,  240,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus
    {   3,    1,  241,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus
    {   3,    2,  242,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_S_X @ RbPlus
    {   3,    3,  243,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_S_X @ RbPlus
    {   3,    4,  244,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_4K_D_X_RBPLUS_PATINFO[] =
{
    {   1,    5,    1,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   1,    1,    2,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   1,   39,    3,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   1,    6,    4,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   1,    7,    5,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,    6,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,    7,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,    8,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,    9,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,   10,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  210,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  211,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  212,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  213,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  214,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  215,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  216,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  217,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  218,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  219,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,   11,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,   12,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,   13,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,   14,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,   15,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  220,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  221,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  222,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  223,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  224,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  225,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  226,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  227,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  228,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  229,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,   16,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,   17,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,   18,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,   19,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,   20,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  230,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  231,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  232,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  233,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  234,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  235,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  236,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  237,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  238,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  239,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,   21,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,   22,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,   23,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,   24,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,   25,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  240,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  241,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  242,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  243,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  244,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  245,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  246,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  247,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  248,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  249,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,   21,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,   22,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,   23,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,   24,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,   25,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
    {   3,    5,  240,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus
    {   3,    1,  241,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus
    {   3,   39,  242,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_D_X @ RbPlus
    {   3,    6,  243,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_D_X @ RbPlus
    {   3,    7,  244,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_4K_S3_RBPLUS_PATINFO[] =
{
    {   1,   29,  131,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
    {   1,   29,  131,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus
    {   1,   30,  132,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus
    {   1,   31,  133,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_S3 @ RbPlus
    {   1,   32,  134,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_S3 @ RbPlus
    {   1,   33,  135,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_4K_S3_X_RBPLUS_PATINFO[] =
{
    {   1,   29,  131,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   1,   30,  132,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   1,   31,  133,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   1,   32,  134,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   1,   33,  135,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  136,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  137,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  138,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  139,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  140,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  141,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  142,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  143,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  144,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  145,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  146,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  147,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  148,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  149,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  150,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  141,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  142,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  143,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  144,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  145,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  146,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  147,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  148,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  149,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  150,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  146,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  147,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  148,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  149,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  150,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   29,  151,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   30,  152,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   31,  153,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   32,  154,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_S3_X @ RbPlus
    {   3,   33,  155,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_S_RBPLUS_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S @ RbPlus
    {   1,    0,    1,    1,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S @ RbPlus
    {   1,    1,    2,    2,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S @ RbPlus
    {   1,    2,    3,    3,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S @ RbPlus
    {   1,    3,    4,    4,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S @ RbPlus
    {   1,    4,    5,    5,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_D_RBPLUS_PATINFO[] =
{
    {   1,    5,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D @ RbPlus
    {   1,    5,    1,    1,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D @ RbPlus
    {   1,    1,    2,    2,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D @ RbPlus
    {   1,   39,    3,    3,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D @ RbPlus
    {   1,    6,    4,    4,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D @ RbPlus
    {   1,    7,    5,    5,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_S_T_RBPLUS_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   1,    2,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   1,    3,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   1,    4,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   36,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   37,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   38,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   39,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   40,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   41,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   42,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   43,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   44,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   45,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   46,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   47,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   48,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   49,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   50,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   41,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   42,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   43,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   44,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   45,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   46,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   47,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   48,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   49,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   50,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   51,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   52,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   53,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   54,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   55,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   46,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   47,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   48,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   49,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   50,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   51,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   52,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   53,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   54,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   55,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   56,   16,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   57,   17,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   58,   18,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   59,   19,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   60,   20,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   51,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   52,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   53,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   54,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   55,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   56,   16,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   57,   17,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   58,   18,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   59,   19,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   60,   20,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,    1,   21,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,    2,   22,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,    3,   23,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,    4,   24,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,    5,   25,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,   56,   16,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,   57,   17,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,   58,   18,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,   59,   19,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,   60,   20,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
    {   2,    0,    1,   21,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus
    {   2,    1,    2,   22,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus
    {   2,    2,    3,   23,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S_T @ RbPlus
    {   2,    3,    4,   24,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S_T @ RbPlus
    {   2,    4,    5,   25,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_D_T_RBPLUS_PATINFO[] =
{
    {   1,    5,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   1,   39,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   1,    6,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   1,    7,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   36,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   37,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   38,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   39,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   40,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   41,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   42,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   43,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   44,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   45,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   46,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   47,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   48,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   49,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   50,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   41,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   42,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   43,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   44,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   45,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   46,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   47,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   48,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   49,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   50,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   51,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   52,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   53,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   54,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   55,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   46,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   47,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   48,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   49,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   50,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   51,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   52,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   53,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   54,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   55,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   56,   16,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   57,   17,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   58,   18,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   59,   19,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   60,   20,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   51,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   52,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   53,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   54,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   55,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   56,   16,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   57,   17,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   58,   18,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   59,   19,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   60,   20,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,    1,   21,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,    2,   22,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,    3,   23,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,    4,   24,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,    5,   25,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,   56,   16,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,   57,   17,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,   58,   18,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,   59,   19,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,   60,   20,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
    {   2,    5,    1,   21,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus
    {   2,    1,    2,   22,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus
    {   2,   39,    3,   23,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D_T @ RbPlus
    {   2,    6,    4,   24,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D_T @ RbPlus
    {   2,    7,    5,   25,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_S_X_RBPLUS_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   1,    2,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   1,    3,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   1,    4,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,    6,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,    7,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,    8,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,    9,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,   10,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  210,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  211,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  212,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  213,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  214,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  215,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  216,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  217,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  218,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  219,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,   11,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,   12,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,   13,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,   14,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,   15,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  220,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  221,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  222,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  223,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  224,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  225,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  226,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  227,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  228,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  229,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,   16,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,   17,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,   18,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,   19,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,   20,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  230,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  231,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  232,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  233,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  234,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  250,    6,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  251,    7,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  252,    8,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  253,    9,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  254,   10,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,   21,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,   22,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,   23,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,   24,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,   25,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  255,    6,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  256,    7,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  257,    8,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  258,    9,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  259,   10,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  260,   11,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  261,   12,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  262,   13,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  263,   14,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  264,   15,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,   26,    6,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,   27,    7,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,   28,    8,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,   29,    9,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,   30,   10,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
    {   3,    0,  265,   11,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus
    {   3,    1,  266,   12,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus
    {   3,    2,  267,   13,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S_X @ RbPlus
    {   3,    3,  268,   14,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S_X @ RbPlus
    {   3,    4,  269,   15,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_D_X_RBPLUS_PATINFO[] =
{
    {   1,    5,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   1,   39,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   1,    6,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   1,    7,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,    6,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,    7,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,    8,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,    9,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,   10,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  210,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  211,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  212,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  213,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  214,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  215,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  216,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  217,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  218,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  219,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,   11,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,   12,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,   13,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,   14,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,   15,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  220,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  221,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  222,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  223,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  224,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  225,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  226,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  227,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  228,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  229,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,   16,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,   17,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,   18,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,   19,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,   20,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  230,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  231,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  232,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  233,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  234,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  250,    6,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  251,    7,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  252,    8,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  253,    9,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  254,   10,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,   21,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,   22,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,   23,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,   24,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,   25,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  255,    6,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  256,    7,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  257,    8,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  258,    9,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  259,   10,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  260,   11,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  261,   12,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  262,   13,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  263,   14,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  264,   15,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,   26,    6,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,   27,    7,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,   28,    8,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,   29,    9,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,   30,   10,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
    {   3,    5,  265,   11,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus
    {   3,    1,  266,   12,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus
    {   3,   39,  267,   13,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D_X @ RbPlus
    {   3,    6,  268,   14,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D_X @ RbPlus
    {   3,    7,  269,   15,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO[] =
{
    {   2,    0,  347,  193,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   2,    1,  348,  366,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   2,   39,  349,  195,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   2,    6,  350,  367,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   2,    7,  351,  368,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  352,  193,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  353,  194,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  354,  195,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  355,  369,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  356,  370,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  280,  193,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  281,  194,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  283,  196,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  284,  197,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  394,  219,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  395,  371,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  396,  372,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  397,  373,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  398,  374,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  290,  203,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  291,  204,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  292,  205,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  293,  206,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  294,  207,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  295,  219,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  296,  375,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  297,  376,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  298,  377,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  299,  378,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  399,  379,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  399,  380,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  399,  381,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  399,  382,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  399,  383,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  400,  669,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  401,  670,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  402,  671,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  304,  387,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  305,  388,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  307,  379,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  307,  389,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  307,  381,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  307,  382,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  307,  390,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  307,  672,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  307,  673,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  307,  674,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  307,  675,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  307,  676,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  309,  677,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  309,  678,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  309,  679,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  309,  399,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  323,  400,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  309,  680,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  309,  681,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  309,  682,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  309,  404,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  323,  405,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  309,  505,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  309,  506,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  309,  507,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  309,  683,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  323,  684,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  311,  685,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  311,  686,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  311,  687,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  318,  411,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  324,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    0,  311,  513,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    1,  311,  514,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,   39,  311,  515,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    6,  318,  413,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 1xaa @ RbPlus
    {   3,    7,  324,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO[] =
{
    {   3,    0,  424,  526,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  348,  527,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  358,  528,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  350,  688,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  359,  689,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  352,  526,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  353,  527,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  354,  528,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  355,  688,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  356,  690,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  280,  526,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  281,  527,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  282,  528,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  283,  529,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  284,  530,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  394,  691,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  395,  692,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  396,  693,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  397,  694,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  425,  695,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  290,  534,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  291,  535,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  292,  536,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  293,  537,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  294,  538,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  295,  691,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  296,  696,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  297,  697,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  298,  698,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  299,  699,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  399,  700,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  399,  701,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  399,  702,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  399,  703,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  426,  429,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  400,  704,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  401,  705,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  402,  706,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  304,  707,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  364,  708,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  307,  700,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  307,  701,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  307,  702,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  307,  703,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  427,  390,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  307,  709,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  307,  710,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  307,  711,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  307,  712,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  427,  676,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  309,  713,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  309,  714,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  309,  715,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  323,  716,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  428,  400,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  309,  717,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  309,  718,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  309,  719,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  323,  720,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  428,  405,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  309,  721,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  309,  722,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  309,  723,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  323,  724,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  428,  684,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  318,  725,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  318,  726,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  318,  727,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  324,  728,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  429,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    0,  318,  729,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    1,  318,  730,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,   39,  318,  731,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    6,  324,  732,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 2xaa @ RbPlus
    {   3,    7,  429,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO[] =
{
    {   3,    0,  347,  566,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  348,  733,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  349,  568,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  350,  734,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  351,  735,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  352,  566,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  353,  567,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  354,  568,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  355,  736,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  356,  737,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  280,  566,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  281,  567,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  282,  568,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  283,  569,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  284,  570,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  394,  587,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  395,  738,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  396,  739,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  397,  740,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  430,  741,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  290,  576,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  291,  577,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  292,  578,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  293,  579,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  405,  580,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  295,  587,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  296,  742,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  297,  743,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  298,  740,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  431,  699,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  399,  744,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  399,  745,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  399,  746,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  432,  747,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  433,  429,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  400,  748,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  401,  749,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  402,  750,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  434,  707,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  435,  708,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  307,  744,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  307,  751,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  307,  746,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  436,  703,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  437,  390,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  307,  752,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  307,  753,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  307,  754,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  436,  712,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  437,  676,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  323,  755,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  323,  756,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  323,  757,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  438,  716,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  439,  400,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  323,  758,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  323,  759,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  323,  760,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  438,  720,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  439,  405,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  323,  761,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  323,  762,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  323,  763,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  438,  724,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  439,  684,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  324,  764,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  324,  765,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  324,  766,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  440,  728,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  441,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    0,  324,  767,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    1,  324,  768,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,   39,  324,  769,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    6,  440,  732,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 4xaa @ RbPlus
    {   3,    7,  441,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO[] =
{
    {   3,    0,  424,  619,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  348,  620,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  358,  621,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  350,  770,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  359,  771,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  352,  619,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  353,  620,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  354,  621,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  355,  770,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  378,  772,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  280,  619,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  281,  620,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  282,  621,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  283,  622,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  413,  623,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  394,  773,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  395,  774,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  442,  775,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  443,  776,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  444,  777,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  415,  629,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  291,  630,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  292,  631,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  416,  632,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  417,  580,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  295,  773,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  296,  778,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  297,  779,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  445,  780,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  446,  699,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  399,  781,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  399,  782,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  447,  783,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  448,  784,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  449,  429,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  450,  785,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  302,  786,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  303,  787,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  420,  788,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  451,  708,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  339,  781,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  339,  782,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  422,  746,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  452,  703,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  453,  390,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  339,  789,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  339,  790,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  422,  754,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  452,  712,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  453,  676,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  343,  791,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  341,  792,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  423,  757,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  454,  716,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  455,  400,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  343,  793,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  341,  794,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  423,  760,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  454,  720,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  455,  405,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  343,  795,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  341,  796,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  423,  763,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  454,  724,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  455,  684,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  344,  797,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  345,  798,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  456,  766,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  457,  728,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  458,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    0,  344,  799,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    1,  345,  800,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,   39,  456,  769,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    6,  457,  732,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_R_X 8xaa @ RbPlus
    {   3,    7,  458,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO[] =
{
    {   2,    8,  347,  193,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   2,    9,  348,  366,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   2,   10,  349,  195,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   2,   11,  350,  367,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   2,    7,  351,  368,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  352,  193,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  353,  194,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  354,  195,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  355,  369,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  356,  370,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  280,  193,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  281,  194,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  283,  196,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  284,  197,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  285,  219,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  286,  371,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  287,  372,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  288,  373,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  289,  374,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  290,  203,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  291,  204,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  292,  205,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  293,  206,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  294,  207,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  295,  219,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  296,  375,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  297,  376,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  298,  377,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  299,  378,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  300,  379,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  300,  380,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  300,  381,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  300,  382,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  300,  383,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  301,  384,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  302,  385,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  303,  386,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  304,  387,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  305,  388,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  306,  379,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  306,  389,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  306,  381,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  307,  382,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  307,  390,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  306,  391,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  306,  392,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  306,  393,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  307,  394,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  307,  395,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  308,  396,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  308,  397,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  308,  398,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  309,  399,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  323,  400,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  308,  401,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  308,  402,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  308,  403,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  309,  404,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  323,  405,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  308,  240,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  308,  241,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  308,  242,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  309,  406,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  323,  407,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  310,  408,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  310,  409,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  310,  410,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  318,  411,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  324,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    8,  310,  250,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    9,  310,  251,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   10,  310,  252,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,   11,  318,  413,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 1xaa @ RbPlus
    {   3,    7,  324,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO[] =
{
    {   2,   13,  357,  415,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   2,   14,  349,  195,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  358,  263,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  350,  416,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  359,  417,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  360,  415,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  354,  195,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  354,  263,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  361,  418,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  356,  419,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  281,  262,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  282,  263,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  317,  264,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  284,  265,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  286,  420,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  287,  376,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  287,  421,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  289,  422,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  289,  423,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  291,  268,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  292,  205,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  292,  269,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  293,  270,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  294,  271,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  296,  420,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  297,  376,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  297,  421,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  298,  424,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  299,  423,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  300,  425,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  300,  426,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  300,  427,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  362,  428,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  363,  429,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  302,  430,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  303,  386,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  303,  431,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  305,  432,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  364,  433,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  306,  380,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  306,  381,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  306,  434,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  307,  435,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  365,  435,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  306,  402,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  306,  403,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  306,  436,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  307,  405,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  365,  405,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  308,  397,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  308,  398,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  308,  437,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  323,  438,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  366,  438,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  308,  402,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  308,  403,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  308,  436,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  323,  439,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  366,  439,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  308,  440,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  308,  242,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  308,  441,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  323,  442,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  366,  442,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  310,  443,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  310,  410,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  310,  444,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  324,  412,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  367,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   13,  310,  445,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   14,  310,  252,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   15,  310,  446,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   16,  324,  414,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 2xaa @ RbPlus
    {   3,   17,  367,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO[] =
{
    {   2,   18,  349,  195,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  349,  447,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  349,  448,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  350,  449,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  351,  450,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  354,  195,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  368,  451,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  354,  299,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  355,  452,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  356,  453,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  282,  298,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  282,  299,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  283,  300,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  284,  301,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  287,  372,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  287,  454,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  287,  455,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  288,  456,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  331,  457,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  292,  205,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  292,  306,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  292,  307,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  320,  308,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  321,  309,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  297,  376,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  297,  458,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  297,  459,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  299,  460,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  369,  461,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  300,  381,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  300,  462,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  300,  463,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  363,  464,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  370,  465,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  303,  386,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  303,  466,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  303,  467,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  371,  468,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  337,  469,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  306,  381,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  306,  462,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  306,  470,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  372,  470,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  373,  470,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  306,  393,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  306,  471,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  306,  472,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  372,  472,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  373,  472,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  308,  398,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  308,  473,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  308,  438,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  374,  438,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  375,  438,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  308,  403,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  308,  471,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  308,  439,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  374,  439,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  375,  439,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  308,  242,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  308,  441,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  308,  442,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  374,  442,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  375,  442,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  310,  410,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  310,  474,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  310,  412,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  376,  412,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  377,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   18,  310,  252,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   19,  310,  475,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   20,  310,  414,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   21,  376,  414,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 4xaa @ RbPlus
    {   3,   22,  377,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO[] =
{
    {   3,   23,  358,  263,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  349,  448,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  358,  332,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  350,  476,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  359,  477,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  354,  263,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  354,  299,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  354,  332,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  361,  478,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  378,  479,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  282,  263,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  282,  299,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  282,  332,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  317,  333,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  329,  334,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  287,  421,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  287,  480,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  287,  481,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  379,  482,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  380,  483,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  292,  269,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  292,  307,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  292,  339,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  332,  340,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  333,  341,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  297,  421,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  297,  459,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  297,  481,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  381,  484,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  382,  485,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  300,  434,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  300,  463,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  383,  486,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  384,  487,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  385,  488,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  303,  431,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  303,  467,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  303,  489,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  337,  469,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  386,  469,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  306,  434,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  306,  470,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  387,  490,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  373,  470,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  388,  470,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  306,  436,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  306,  472,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  387,  491,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  373,  472,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  388,  492,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  308,  437,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  308,  438,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  389,  493,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  375,  438,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  390,  438,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  308,  436,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  308,  439,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  391,  494,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  375,  439,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  390,  439,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  308,  441,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  308,  442,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  391,  495,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  375,  442,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  390,  442,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  310,  444,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  310,  412,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  392,  496,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  377,  412,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  393,  412,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   23,  310,  446,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   24,  310,  414,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   25,  367,  414,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   26,  377,  414,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_Z_X 8xaa @ RbPlus
    {   3,   27,  393,  414,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_S3_RBPLUS_PATINFO[] =
{
    {   1,   29,  131,  148,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
    {   1,   29,  131,  148,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus
    {   1,   30,  132,  149,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus
    {   1,   31,  133,  150,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S3 @ RbPlus
    {   1,   32,  134,  151,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S3 @ RbPlus
    {   1,   33,  135,  152,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_S3_X_RBPLUS_PATINFO[] =
{
    {   1,   29,  131,  148,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   1,   30,  132,  149,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   1,   31,  133,  150,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   1,   32,  134,  151,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   1,   33,  135,  152,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  136,  148,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  137,  149,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  138,  150,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  139,  151,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  140,  152,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  141,  148,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  142,  149,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  143,  150,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  144,  151,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  145,  152,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  146,  148,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  147,  149,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  148,  150,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  149,  151,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  150,  152,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  141,  148,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  142,  149,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  143,  150,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  144,  151,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  145,  152,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  146,  148,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  147,  149,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  148,  150,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  149,  151,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  150,  152,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  151,  148,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  152,  149,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  153,  150,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  154,  151,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  155,  152,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  146,  148,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  147,  149,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  148,  150,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  149,  151,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  150,  152,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  151,  148,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  152,  149,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  153,  150,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  154,  151,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  155,  152,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  156,  153,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  157,  154,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  158,  155,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  159,  156,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  160,  157,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  151,  148,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  152,  149,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  153,  150,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  154,  151,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  155,  152,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  156,  153,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  157,  154,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  158,  155,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  159,  156,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  160,  157,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  161,  158,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  162,  159,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  163,  160,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  164,  161,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  165,  162,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  156,  153,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  157,  154,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  158,  155,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  159,  156,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  160,  157,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   29,  161,  158,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   30,  162,  159,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   31,  163,  160,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   32,  164,  161,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S3_X @ RbPlus
    {   3,   33,  165,  162,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_S3_T_RBPLUS_PATINFO[] =
{
    {   1,   29,  131,  148,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   1,   30,  132,  149,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   1,   31,  133,  150,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   1,   32,  134,  151,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   1,   33,  135,  152,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  136,  148,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  137,  149,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  138,  150,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  139,  151,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  140,  152,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  141,  148,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  142,  149,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  143,  150,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  144,  151,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  145,  152,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  166,  148,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  167,  149,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  168,  150,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  169,  151,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  170,  152,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  141,  148,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  142,  149,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  143,  150,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  144,  151,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  145,  152,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  166,  148,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  167,  149,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  168,  150,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  169,  151,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  170,  152,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  171,  148,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  172,  149,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  173,  150,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  174,  151,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  175,  152,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  166,  148,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  167,  149,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  168,  150,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  169,  151,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  170,  152,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  171,  148,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  172,  149,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  173,  150,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  174,  151,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  175,  152,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  176,  153,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  177,  154,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  178,  155,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  179,  156,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  180,  157,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  171,  148,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  172,  149,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  173,  150,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  174,  151,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  175,  152,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  176,  153,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  177,  154,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  178,  155,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  179,  156,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  180,  157,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  131,  163,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  132,  164,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  133,  165,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  134,  166,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  135,  167,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  176,  153,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  177,  154,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  178,  155,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  179,  156,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  180,  157,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   29,  131,  163,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   30,  132,  164,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   31,  133,  165,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   32,  134,  166,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S3_T @ RbPlus
    {   3,   33,  135,  167,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_64K_D3_X_RBPLUS_PATINFO[] =
{
    {   1,   34,  131,  148,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   1,   35,  132,  149,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   1,   36,  133,  150,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   1,   37,  134,  151,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   1,   38,  135,  152,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   2,   34,  459,  170,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   2,   35,  459,  801,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   2,   36,  460,  802,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   2,   37,  461,  152,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   38,  462,  152,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  463,  803,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  463,  804,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  464,  805,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  465,  806,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  466,  806,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  467,  803,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  467,  804,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  468,  805,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  469,  806,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  470,  806,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  471,  807,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  472,  808,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  473,  809,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  474,  810,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  475,  811,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  476,  812,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  477,  804,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  478,  805,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  479,  806,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  480,  806,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  481,  813,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  482,  804,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  483,  805,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  484,  806,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  485,  806,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  486,  814,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  486,  815,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  486,  816,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  487,  817,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  488,  817,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  489,  812,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  490,  804,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  491,  805,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  492,  806,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  493,  806,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  489,  818,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  494,  819,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  494,  820,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  495,  821,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  496,  821,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  497,  822,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  498,  823,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  499,  824,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  500,  825,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  501,  825,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  497,  826,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  498,  827,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  499,  828,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  500,  829,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  501,  829,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  497,  830,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  502,  831,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  502,  832,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  503,  833,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  504,  833,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  505,  834,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  506,  835,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  507,  836,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  508,  837,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  509,  837,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   34,  505,  838,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   35,  506,  839,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus
    {   3,   36,  507,  840,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   37,  508,  841,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D3_X @ RbPlus
    {   4,   38,  509,  841,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO[] =
{
    {   2,    0,  270,  183,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   2,    1,  271,  184,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   2,   39,  272,  185,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   2,    6,  273,  186,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   2,    7,  274,  187,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  275,  188,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  276,  189,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  277,  190,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  278,  191,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  279,  192,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  280,  193,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  281,  194,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  283,  196,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  284,  197,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  394,  198,    1, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  395,  199,    2, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  396,  200,    3, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  397,  201,    4, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  398,  202,    5, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  290,  203,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  291,  204,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  292,  205,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  293,  206,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  294,  207,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  295,  208,    6, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  296,  209,    2, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  297,  210,    7, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  298,  211,    4, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  299,  212,    8, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  399,  213,    9, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  399,  214,   10, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  399,  215,   11, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  399,  216,   12, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  399,  217,   13, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  400,  218,   15, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  401,  219,   15, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  402,  220,   15, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  304,  221,   15, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  305,  222,   15, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  307,  213,    9, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  307,  223,   16, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  307,  215,   11, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  307,  216,   17, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  307,  224,   13, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  307,  497,   18, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  307,  498,   19, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  307,  499,   20, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  307,  500,   21, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  307,  501,   22, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  309,  230,  125, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  309,  231,  126, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  309,  232,  127, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  309,  233,   26, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  309,  234,   27, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  309,  502,   28, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  309,  503,   19, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  309,  504,   29, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  309,  238,   30, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  309,  239,   31, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  309,  505,   32, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  309,  506,   33, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  309,  507,   34, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  309,  508,   35, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  309,  509,   36, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  311,  510,  128, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  311,  511,  129, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  311,  512,  130, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  311,  248,   40, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  311,  249,   41, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    0,  311,  513,   32, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    1,  311,  514,   42, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,   39,  311,  515,   34, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    6,  311,  253,   43, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 1xaa @ RbPlus
    {   3,    7,  311,  254,   44, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO[] =
{
    {   3,    0,  403,  516,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  271,  517,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  313,  518,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  273,  519,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  314,  520,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  404,  521,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  276,  522,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  315,  523,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  278,  524,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  316,  525,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  280,  526,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  281,  527,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  282,  528,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  283,  529,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  284,  530,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  394,  208,  131, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  395,  531,  132, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  396,  302,  133, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  397,  532,  134, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  398,  533,  135, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  290,  534,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  291,  535,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  292,  536,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  293,  537,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  294,  538,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  295,  208,  131, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  296,  209,  132, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  297,  210,  133, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  298,  211,  134, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  299,  212,  135, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  399,  539,  136, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  399,  214,  137, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  399,  280,  138, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  399,  216,  139, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  399,  224,  140, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  400,  540,   15, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  401,  541,   15, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  402,  542,   15, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  304,  543,   15, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  305,  544,   15, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  307,  539,  136, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  307,  214,  137, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  307,  280,  138, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  307,  216,  139, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  307,  224,  140, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  307,  545,  141, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  307,  498,  142, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  307,  546,  143, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  307,  500,  144, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  307,  547,  145, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  309,  548,  146, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  309,  231,  147, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  309,  285,  148, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  309,  233,  149, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  309,  286,  150, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  309,  502,  141, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  309,  503,  151, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  309,  504,  143, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  309,  238,  152, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  309,  239,  153, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  309,  505,  154, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  309,  506,  155, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  309,  507,  156, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  309,  508,  157, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  309,  509,  158, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  318,  549,  159, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  318,  550,  160, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  318,  551,  161, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  318,  287,  162, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  318,  288,  163, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    0,  318,  552,  154, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    1,  318,  553,  155, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,   39,  318,  554,  156, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    6,  318,  555,  157, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 2xaa @ RbPlus
    {   3,    7,  318,  290,  158, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO[] =
{
    {   3,    0,  270,  556,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  271,  557,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  272,  558,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  273,  559,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  274,  560,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  275,  561,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  276,  562,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  277,  563,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  278,  564,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  279,  565,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  280,  566,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  281,  567,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  282,  568,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  283,  569,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  284,  570,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  394,  571,  164, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  395,  572,  165, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  396,  573,  166, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  397,  574,  167, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  398,  575,  168, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  290,  576,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  291,  577,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  292,  578,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  293,  579,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  405,  580,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  295,  581,  169, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  296,  582,  165, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  297,  583,  170, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  298,  584,  167, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  299,  585,  168, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  399,  213,  171, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  399,  214,  172, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  399,  215,  173, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  399,  216,  174, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  399,  217,  175, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  400,  586,   15, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  401,  587,   15, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  402,  588,   15, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  304,  589,   15, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  406,  544,   15, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  307,  213,  171, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  307,  223,  176, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  307,  215,  173, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  307,  216,  177, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  307,  224,  175, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  307,  497,  178, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  307,  498,  179, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  307,  499,  180, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  307,  500,  181, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  307,  501,  182, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  323,  590,  183, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  323,  591,  184, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  323,  592,  185, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  323,  593,  186, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  323,  286,  187, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  323,  594,  188, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  323,  595,  179, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  323,  596,  189, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  323,  321,  190, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  323,  322,  191, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  323,  597,  192, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  323,  598,  193, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  323,  599,  194, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  323,  600,  195, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  323,  601,  196, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  324,  602,  197, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  324,  603,  198, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  324,  604,  199, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  324,  605,  200, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  324,  606,  201, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    0,  324,  607,  192, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    1,  324,  608,  202, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,   39,  324,  609,  194, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    6,  324,  327,  203, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 4xaa @ RbPlus
    {   3,    7,  324,  328,  204, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO[] =
{
    {   3,    0,  407,  610,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  408,  611,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  409,  612,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  410,  613,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  411,  614,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  404,  615,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  276,  616,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  315,  617,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  278,  618,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  412,  565,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  280,  619,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  281,  620,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  282,  621,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  283,  622,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  413,  623,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  394,  624,  205, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  395,  625,  206, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  396,  626,  207, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  397,  627,  208, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  414,  628,  209, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  415,  629,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  291,  630,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  292,  631,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  416,  632,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  417,  580,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  295,  624,  205, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  296,  633,  206, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  297,  634,  207, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  298,  627,  208, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  418,  635,  210, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  399,  636,  211, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  399,  637,  212, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  399,  638,  213, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  399,  639,  214, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  419,  640,  215, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  301,  641,  216, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  302,  642,  216, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  303,  643,  216, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  420,  589,  105, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  421,  544,  217, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  339,  636,  211, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  339,  637,  212, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  339,  638,  213, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  339,  639,  214, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  422,  224,  175, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  339,  545,  218, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  339,  498,  219, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  339,  546,  220, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  339,  500,  221, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  339,  644,  222, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  343,  645,  223, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  343,  646,  224, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  343,  647,  225, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  341,  648,  226, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  423,  286,  187, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  343,  649,  218, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  343,  650,  227, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  343,  651,  220, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  343,  652,  221, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  341,  653,  228, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  343,  654,  229, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  343,  655,  230, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  343,  656,  231, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  343,  657,  232, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  343,  658,  233, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  346,  659,  234, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  346,  660,  235, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  346,  661,  236, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  344,  662,  237, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  345,  663,  238, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    0,  346,  664,  229, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    1,  346,  665,  230, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,   39,  346,  666,  231, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    6,  346,  667,  232, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_R_X 8xaa @ RbPlus
    {   3,    7,  344,  668,  204, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO[] =
{
    {   2,    8,  270,  183,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   2,    9,  271,  184,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   2,   10,  272,  185,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   2,   11,  273,  186,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   2,    7,  274,  187,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  275,  188,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  276,  189,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  277,  190,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  278,  191,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  279,  192,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  280,  193,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  281,  194,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  283,  196,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  284,  197,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  285,  198,    1, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  286,  199,    2, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  287,  200,    3, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  288,  201,    4, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  289,  202,    5, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  290,  203,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  291,  204,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  292,  205,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  293,  206,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  294,  207,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  295,  208,    6, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  296,  209,    2, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  297,  210,    7, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  298,  211,    4, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  299,  212,    8, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  300,  213,    9, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  300,  214,   10, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  300,  215,   11, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  300,  216,   12, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  300,  217,   13, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  301,  218,   14, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  302,  219,   14, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  303,  220,   14, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  304,  221,   15, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  305,  222,   15, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  306,  213,    9, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  306,  223,   16, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  306,  215,   11, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  307,  216,   17, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  307,  224,   13, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  306,  225,   18, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  306,  226,   19, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  306,  227,   20, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  307,  228,   21, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  307,  229,   22, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  308,  230,   23, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  308,  231,   24, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  308,  232,   25, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  309,  233,   26, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  309,  234,   27, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  308,  235,   28, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  308,  236,   19, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  308,  237,   29, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  309,  238,   30, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  309,  239,   31, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  308,  240,   32, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  308,  241,   33, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  308,  242,   34, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  309,  243,   35, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  309,  244,   36, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  310,  245,   37, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  310,  246,   38, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  310,  247,   39, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  311,  248,   40, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  311,  249,   41, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    8,  310,  250,   32, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    9,  310,  251,   42, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   10,  310,  252,   34, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,   11,  311,  253,   43, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
    {   3,    7,  311,  254,   44, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO[] =
{
    {   2,   13,  312,  255,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   2,   14,  272,  185,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  313,  256,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  273,  257,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  314,  258,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  276,  189,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  277,  190,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  315,  259,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  278,  260,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  316,  261,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  281,  262,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  282,  263,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  317,  264,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  284,  265,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  286,  209,    2, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  287,  266,    3, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  287,  210,   45, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  288,  211,   46, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  289,  267,   47, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  291,  268,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  292,  205,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  292,  269,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  293,  270,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  294,  271,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  296,  209,    2, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  297,  210,    7, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  297,  210,   45, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  298,  211,   46, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  299,  212,   47, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  300,  272,   48, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  300,  273,   11, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  300,  273,   49, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  300,  274,   50, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  300,  275,   51, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  302,  219,   14, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  303,  220,   14, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  303,  276,   14, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  304,  277,   15, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  305,  278,   15, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  306,  279,   48, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  306,  215,   11, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  306,  280,   49, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  307,  281,   52, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  307,  224,   53, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  306,  236,   19, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  306,  237,   54, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  306,  237,   55, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  307,  282,   56, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  307,  283,   57, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  308,  284,   24, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  308,  232,   25, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  308,  285,   58, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  309,  233,   59, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  309,  286,   60, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  308,  236,   19, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  308,  237,   29, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  308,  237,   55, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  309,  238,   56, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  309,  239,   61, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  308,  241,   62, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  308,  242,   34, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  308,  242,   63, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  309,  243,   64, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  309,  244,   65, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  310,  246,   38, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  310,  247,   39, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  310,  247,   66, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  318,  287,   67, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  318,  288,   68, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   13,  310,  251,   62, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   14,  310,  252,   34, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   15,  310,  252,   63, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   16,  318,  289,   69, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
    {   3,   17,  318,  290,   65, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO[] =
{
    {   2,   18,  272,  185,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  272,  291,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  272,  292,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  273,  293,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  274,  294,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  277,  190,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  315,  259,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  277,  295,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  319,  296,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  279,  297,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  282,  195,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  282,  298,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  282,  299,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  283,  300,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  284,  301,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  287,  200,    3, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  287,  302,   45, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  287,  303,   70, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  289,  304,   71, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  289,  305,   72, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  292,  205,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  292,  306,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  292,  307,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  320,  308,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  321,  309,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  297,  210,    7, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  297,  210,   45, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  297,  310,   45, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  298,  311,   71, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  299,  312,   47, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  300,  215,   11, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  300,  215,   73, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  300,  215,   74, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  300,  216,   75, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  300,  217,   76, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  303,  220,   14, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  303,  276,   14, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  303,  313,   14, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  305,  314,   15, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  322,  315,   15, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  306,  215,   11, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  306,  232,   77, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  306,  215,   78, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  307,  216,   79, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  307,  224,   80, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  306,  227,   20, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  306,  316,   55, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  306,  227,   81, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  307,  317,   82, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  307,  229,   83, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  308,  232,   25, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  308,  232,   84, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  308,  318,   84, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  323,  319,   85, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  323,  320,   86, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  308,  237,   29, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  308,  237,   55, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  308,  237,   87, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  323,  321,   88, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  323,  322,   89, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  308,  242,   34, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  308,  242,   90, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  308,  242,   91, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  323,  323,   92, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  323,  324,   93, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  310,  247,   39, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  310,  247,   66, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  310,  247,   94, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  324,  325,   95, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  324,  326,   96, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   18,  310,  252,   34, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   19,  310,  252,   97, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   20,  310,  252,   98, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   21,  324,  327,   99, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
    {   3,   22,  324,  328,  100, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus
};

const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO[] =
{
    {   3,   23,  313,  256,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  272,  292,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  325,  292,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  326,  329,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  327,  294,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  315,  259,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  277,  295,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  315,  330,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  278,  331,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  328,  331,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  282,  263,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  282,  299,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  282,  332,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  317,  333,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  329,  334,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  287,  210,   45, } , // 8 pipes (2 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  287,  335,   70, } , // 8 pipes (2 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  287,  336,   70, } , // 8 pipes (2 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  330,  337,   72, } , // 8 pipes (2 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  331,  338,  101, } , // 8 pipes (2 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  292,  269,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  292,  307,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  292,  339,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  332,  340,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  333,  341,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  297,  210,   45, } , // 8 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  297,  310,   45, } , // 8 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  297,  342,   45, } , // 8 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  299,  343,  102, } , // 8 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  334,  344,  103, } , // 8 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  300,  273,   49, } , // 16 pipes (4 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  300,  273,   74, } , // 16 pipes (4 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  300,  345,   74, } , // 16 pipes (4 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  335,  346,   76, } , // 16 pipes (4 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  336,  286,  104, } , // 16 pipes (4 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  303,  276,   14, } , // 8 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  303,  313,   14, } , // 8 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  303,  347,   14, } , // 8 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  337,  348,  105, } , // 8 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  338,  349,  106, } , // 8 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  306,  280,   49, } , // 16 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  306,  215,   78, } , // 16 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  306,  350,   74, } , // 16 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  339,  351,  107, } , // 16 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  340,  351,  108, } , // 16 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  306,  237,   55, } , // 32 pipes (8 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  306,  237,  109, } , // 32 pipes (8 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  306,  237,  110, } , // 32 pipes (8 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  339,  352,  111, } , // 32 pipes (8 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  339,  353,  112, } , // 32 pipes (8 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  308,  285,   58, } , // 16 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  308,  318,   84, } , // 16 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  308,  354,   84, } , // 16 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  341,  355,  113, } , // 16 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  342,  356,  114, } , // 16 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  308,  237,   55, } , // 32 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  308,  237,   87, } , // 32 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  308,  237,  115, } , // 32 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  343,  357,  116, } , // 32 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  341,  358,  117, } , // 32 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  308,  242,   63, } , // 64 pipes (16 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  308,  242,   91, } , // 64 pipes (16 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  308,  242,  118, } , // 64 pipes (16 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  343,  359,  119, } , // 64 pipes (16 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  343,  360,  120, } , // 64 pipes (16 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  310,  247,   66, } , // 32 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  310,  247,   94, } , // 32 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  310,  361,   94, } , // 32 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  344,  362,  121, } , // 32 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  345,  363,  122, } , // 32 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   23,  310,  252,   63, } , // 64 pipes (32 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   24,  310,  252,   98, } , // 64 pipes (32 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   25,  310,  252,  118, } , // 64 pipes (32 PKRs) 4 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   26,  346,  364,  123, } , // 64 pipes (32 PKRs) 8 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
    {   3,   27,  344,  365,  124, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
};

const UINT_64 GFX10_SW_PATTERN_NIBBLE01[][8] =
{
    {X0,            X1,            X2,            X3,            Y0,            Y1,            Y2,            Y3,            }, // 0
    {0,             X0,            X1,            X2,            Y0,            Y1,            Y2,            X3,            }, // 1
    {0,             0,             X0,            X1,            Y0,            Y1,            Y2,            X2,            }, // 2
    {0,             0,             0,             X0,            Y0,            Y1,            X1,            X2,            }, // 3
    {0,             0,             0,             0,             Y0,            Y1,            X0,            X1,            }, // 4
    {X0,            X1,            X2,            Y1,            Y0,            Y2,            X3,            Y3,            }, // 5
    {0,             0,             0,             X0,            Y0,            X1,            X2,            Y1,            }, // 6
    {0,             0,             0,             0,             X0,            Y0,            X1,            Y1,            }, // 7
    {X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            Y3,            }, // 8
    {0,             X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            }, // 9
    {0,             0,             X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 10
    {0,             0,             0,             X0,            Y0,            X1,            Y1,            X2,            }, // 11
    {X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            Y4,            }, // 12
    {S0,            X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            }, // 13
    {0,             S0,            X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 14
    {0,             0,             S0,            X0,            Y0,            X1,            Y1,            X2,            }, // 15
    {0,             0,             0,             S0,            X0,            Y0,            X1,            Y1,            }, // 16
    {0,             0,             0,             0,             S0,            X0,            Y0,            X1,            }, // 17
    {S0,            S1,            X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 18
    {0,             S0,            S1,            X0,            Y0,            X1,            Y1,            X2,            }, // 19
    {0,             0,             S0,            S1,            X0,            Y0,            X1,            Y1,            }, // 20
    {0,             0,             0,             S0,            S1,            X0,            Y0,            X1,            }, // 21
    {0,             0,             0,             0,             S0,            S1,            X0,            Y0,            }, // 22
    {S0,            S1,            S2,            X0,            Y0,            X1,            Y1,            X2,            }, // 23
    {0,             S0,            S1,            S2,            X0,            Y0,            X1,            Y1,            }, // 24
    {0,             0,             S0,            S1,            S2,            X0,            Y0,            X1,            }, // 25
    {0,             0,             0,             S0,            S1,            S2,            X0,            Y0,            }, // 26
    {0,             0,             0,             0,             S0,            S1,            S2,            X0,            }, // 27
    {X0,            X1,            X2,            Y1,            Y0,            Y2,            X3,            Y4,            }, // 28
    {X0,            X1,            Z0,            Y0,            Z1,            Y1,            X2,            Z2,            }, // 29
    {0,             X0,            Z0,            Y0,            Z1,            Y1,            X1,            Z2,            }, // 30
    {0,             0,             X0,            Y0,            Z0,            Y1,            X1,            Z1,            }, // 31
    {0,             0,             0,             X0,            Z0,            Y0,            X1,            Z1,            }, // 32
    {0,             0,             0,             0,             Z0,            Y0,            X0,            Z1,            }, // 33
    {X0,            X1,            Z0,            Y0,            Y1,            Z1,            X2,            Z2,            }, // 34
    {0,             X0,            Z0,            Y0,            X1,            Z1,            Y1,            Z2,            }, // 35
    {0,             0,             X0,            Y0,            X1,            Z0,            Y1,            Z1,            }, // 36
    {0,             0,             0,             X0,            Y0,            Z0,            X1,            Z1,            }, // 37
    {0,             0,             0,             0,             X0,            Z0,            Y0,            Z1,            }, // 38
    {0,             0,             X0,            X1,            Y0,            Y1,            X2,            Y2,            }, // 39
};

const UINT_64 GFX10_SW_PATTERN_NIBBLE2[][4] =
{
    {0,             0,             0,             0,             }, // 0
    {Y4,            X4,            Y5,            X5,            }, // 1
    {Y3,            X4,            Y4,            X5,            }, // 2
    {Y3,            X3,            Y4,            X4,            }, // 3
    {Y2,            X3,            Y3,            X4,            }, // 4
    {Y2,            X2,            Y3,            X3,            }, // 5
    {Z0^X4^Y4,      X4,            Y5,            X5,            }, // 6
    {Z0^Y3^X4,      X4,            Y4,            X5,            }, // 7
    {Z0^X3^Y3,      X3,            Y4,            X4,            }, // 8
    {Z0^Y2^X3,      X3,            Y3,            X4,            }, // 9
    {Z0^X2^Y2,      X2,            Y3,            X3,            }, // 10
    {Z1^Y4^X5,      Z0^X4^Y5,      Y5,            X5,            }, // 11
    {Z1^Y3^X5,      Z0^X4^Y4,      Y4,            X5,            }, // 12
    {Z1^Y3^X4,      Z0^X3^Y4,      Y4,            X4,            }, // 13
    {Z1^Y2^X4,      Z0^X3^Y3,      Y3,            X4,            }, // 14
    {Z1^Y2^X3,      Z0^X2^Y3,      Y3,            X3,            }, // 15
    {Z2^Y4^X6,      Z1^X4^Y6,      Z0^X5^Y5,      X5,            }, // 16
    {Z2^Y3^X6,      Z1^X4^Y5,      Z0^Y4^X5,      X5,            }, // 17
    {Z2^Y3^X5,      Z1^X3^Y5,      Z0^X4^Y4,      X4,            }, // 18
    {Y2^Z2^X5,      Z1^X3^Y4,      Z0^Y3^X4,      X4,            }, // 19
    {Y2^Z2^X4,      Z1^X2^Y4,      Z0^X3^Y3,      X3,            }, // 20
    {Z3^Y4^X7,      Z2^X4^Y7,      Z1^Y5^X6,      Z0^X5^Y6,      }, // 21
    {Y3^Z3^X7,      Z2^X4^Y6,      Z1^Y4^X6,      Z0^X5^Y5,      }, // 22
    {Y3^Z3^X6,      Z2^X3^Y6,      Z1^Y4^X5,      Z0^X4^Y5,      }, // 23
    {Y2^Z3^X6,      Z2^X3^Y5,      Z1^Y3^X5,      Z0^X4^Y4,      }, // 24
    {Y2^Z3^X5,      X2^Z2^Y5,      Z1^Y3^X4,      Z0^X3^Y4,      }, // 25
    {Y4^Z4^X8,      Z3^X4^Y8,      Z2^Y5^X7,      Z1^X5^Y7,      }, // 26
    {Y3^Z4^X8,      Z3^X4^Y7,      Z2^Y4^X7,      Z1^X5^Y6,      }, // 27
    {Y3^Z4^X7,      X3^Z3^Y7,      Z2^Y4^X6,      Z1^X4^Y6,      }, // 28
    {Y2^Z4^X7,      X3^Z3^Y6,      Z2^Y3^X6,      Z1^X4^Y5,      }, // 29
    {Y2^Z4^X6,      X2^Z3^Y6,      Z2^Y3^X5,      Z1^X3^Y5,      }, // 30
    {Y4^Z5^X9,      X4^Z4^Y9,      Z3^Y5^X8,      Z2^X5^Y8,      }, // 31
    {Y3^Z5^X9,      X4^Z4^Y8,      Z3^Y4^X8,      Z2^X5^Y7,      }, // 32
    {Y3^Z5^X8,      X3^Z4^Y8,      Z3^Y4^X7,      Z2^X4^Y7,      }, // 33
    {Y2^Z5^X8,      X3^Z4^Y7,      Y3^Z3^X7,      Z2^X4^Y6,      }, // 34
    {Y2^Z5^X7,      X2^Z4^Y7,      Y3^Z3^X6,      Z2^X3^Y6,      }, // 35
    {X4^Y4,         X4,            Y5,            X5,            }, // 36
    {Y3^X4,         X4,            Y4,            X5,            }, // 37
    {X3^Y3,         X3,            Y4,            X4,            }, // 38
    {Y2^X3,         X3,            Y3,            X4,            }, // 39
    {X2^Y2,         X2,            Y3,            X3,            }, // 40
    {Y4^X5,         X4^Y5,         Y5,            X5,            }, // 41
    {Y3^X5,         X4^Y4,         Y4,            X5,            }, // 42
    {Y3^X4,         X3^Y4,         Y4,            X4,            }, // 43
    {Y2^X4,         X3^Y3,         Y3,            X4,            }, // 44
    {Y2^X3,         X2^Y3,         Y3,            X3,            }, // 45
    {Y4^X6,         X4^Y6,         X5^Y5,         X5,            }, // 46
    {Y3^X6,         X4^Y5,         Y4^X5,         X5,            }, // 47
    {Y3^X5,         X3^Y5,         X4^Y4,         X4,            }, // 48
    {Y2^X5,         X3^Y4,         Y3^X4,         X4,            }, // 49
    {Y2^X4,         X2^Y4,         X3^Y3,         X3,            }, // 50
    {Y4^X7,         X4^Y7,         Y5^X6,         X5^Y6,         }, // 51
    {Y3^X7,         X4^Y6,         Y4^X6,         X5^Y5,         }, // 52
    {Y3^X6,         X3^Y6,         Y4^X5,         X4^Y5,         }, // 53
    {Y2^X6,         X3^Y5,         Y3^X5,         X4^Y4,         }, // 54
    {Y2^X5,         X2^Y5,         Y3^X4,         X3^Y4,         }, // 55
    {Y4,            X4,            Y5^X7,         X5^Y7,         }, // 56
    {Y3,            X4,            Y4^X7,         X5^Y6,         }, // 57
    {Y3,            X3,            Y4^X6,         X4^Y6,         }, // 58
    {Y2,            X3,            Y3^X6,         X4^Y5,         }, // 59
    {Y2,            X2,            Y3^X5,         X3^Y5,         }, // 60
    {Z0^X3^Y3,      X4,            Y5,            X5,            }, // 61
    {Z0^X3^Y3,      X4,            Y4,            X5,            }, // 62
    {Z0^X3^Y3,      X3,            Y2,            X4,            }, // 63
    {Z0^X3^Y3,      X2,            Y2,            X3,            }, // 64
    {Z1^X3^Y3,      Z0^X4^Y4,      Y5,            X5,            }, // 65
    {Z1^X3^Y3,      Z0^X4^Y4,      Y4,            X5,            }, // 66
    {Z1^X3^Y3,      Z0^X4^Y4,      Y3,            X4,            }, // 67
    {Z1^X3^Y3,      Z0^X4^Y4,      Y2,            X3,            }, // 68
    {Z1^X3^Y3,      Z0^X4^Y4,      Y2,            X2,            }, // 69
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      X5,            }, // 70
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      X4,            }, // 71
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      X3,            }, // 72
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      X2,            }, // 73
    {X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      }, // 74
    {X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      }, // 75
    {X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      }, // 76
    {X3^Y3^Z5,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      }, // 77
    {X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      }, // 78
    {X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      }, // 79
    {Y3,            Y4,            X4,            Y5,            }, // 80
    {X2,            Y3,            X3,            Y4,            }, // 81
    {Z0^X3^Y3,      Y4,            X4,            Y5,            }, // 82
    {Z0^X3^Y3,      X2,            X3,            Y4,            }, // 83
    {Z1^X3^Y3,      Z0^X4^Y4,      Y4,            Y5,            }, // 84
    {Z1^X3^Y3,      Z0^X4^Y4,      X2,            Y3,            }, // 85
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      Y4,            }, // 86
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X6,      Y2^X5^Y6,      }, // 87
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X7,      Y2^X5^Y7,      }, // 88
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X8,      Y2^X5^Y8,      }, // 89
    {X3,            Y3,            X4,            Y4,            }, // 90
    {Z0^X3^Y3,      X3,            X4,            Y4,            }, // 91
    {Z1^X3^Y3,      Z0^X4^Y4,      X3,            Y4,            }, // 92
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      Y2,            }, // 93
    {Z1^X3^Y3,      Z0^X4^Y4,      Y2^X5^Y5,      X2,            }, // 94
    {Z2^X3^Y3,      Z1^X4^Y4,      Y2^Y5^X6,      Z0^X5^Y6,      }, // 95
    {Z1^X3^Y3,      Z0^X4^Y4,      Y2^Y5^X6,      X1^X5^Y6,      }, // 96
    {Z2^X3^Y3,      Z1^X4^Y4,      Y2^Y5^X7,      Z0^X5^Y7,      }, // 97
    {Z1^X3^Y3,      Z0^X4^Y4,      Y2^Y5^X7,      X1^X5^Y7,      }, // 98
    {Z2^X3^Y3,      Z1^X4^Y4,      Y2^Y5^X8,      Z0^X5^Y8,      }, // 99
    {Z1^X3^Y3,      Z0^X4^Y4,      Y2^Y5^X8,      X1^X5^Y8,      }, // 100
    {Z0^X3^Y3,      Y2,            X3,            Y4,            }, // 101
    {Z1^X3^Y3,      Z0^X4^Y4,      X2,            Y2,            }, // 102
    {Z1^X3^Y3,      Z0^X4^Y4,      Y2^X5^Y5,      Y3,            }, // 103
    {Z1^X3^Y3,      Z0^X4^Y4,      Y0^X5^Y5,      Y2,            }, // 104
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X6,      Z3^X5^Y6,      }, // 105
    {Z1^X3^Y3,      Z0^X4^Y4,      Y0^Y5^X6,      X1^X5^Y6,      }, // 106
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X7,      Z4^X5^Y7,      }, // 107
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X7,      Z3^X5^Y7,      }, // 108
    {Z1^X3^Y3,      Z0^X4^Y4,      Y0^Y5^X7,      X1^X5^Y7,      }, // 109
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X8,      Z4^X5^Y8,      }, // 110
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X8,      Z3^X5^Y8,      }, // 111
    {Z1^X3^Y3,      Z0^X4^Y4,      Y0^Y5^X8,      X1^X5^Y8,      }, // 112
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X6,      S0^X5^Y6,      }, // 113
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X7,      S0^X5^Y7,      }, // 114
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X8,      S0^X5^Y8,      }, // 115
    {Z1^X3^Y3,      Z0^X4^Y4,      S1^X5^Y5,      X2,            }, // 116
    {Z2^X3^Y3,      Z1^X4^Y4,      S1^Y5^X6,      Z0^X5^Y6,      }, // 117
    {Z1^X3^Y3,      Z0^X4^Y4,      S1^Y5^X6,      S0^X5^Y6,      }, // 118
    {Z2^X3^Y3,      Z1^X4^Y4,      S1^Y5^X7,      Z0^X5^Y7,      }, // 119
    {Z1^X3^Y3,      Z0^X4^Y4,      S1^Y5^X7,      S0^X5^Y7,      }, // 120
    {Z2^X3^Y3,      Z1^X4^Y4,      S1^Y5^X8,      Z0^X5^Y8,      }, // 121
    {Z1^X3^Y3,      Z0^X4^Y4,      S1^Y5^X8,      S0^X5^Y8,      }, // 122
    {Z1^X3^Y3,      Z0^X4^Y4,      S2^X5^Y5,      Y2,            }, // 123
    {Z1^X3^Y3,      Z0^X4^Y4,      S2^X5^Y5,      X2,            }, // 124
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X6,      S2^X5^Y6,      }, // 125
    {Z1^X3^Y3,      Z0^X4^Y4,      S2^Y5^X6,      S1^X5^Y6,      }, // 126
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X7,      S2^X5^Y7,      }, // 127
    {Z1^X3^Y3,      Z0^X4^Y4,      S2^Y5^X7,      S1^X5^Y7,      }, // 128
    {Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X8,      S2^X5^Y8,      }, // 129
    {Z1^X3^Y3,      Z0^X4^Y4,      S2^Y5^X8,      S1^X5^Y8,      }, // 130
    {Y2,            X3,            Z3,            Y3,            }, // 131
    {Y2,            X2,            Z3,            Y3,            }, // 132
    {Y2,            X2,            Z2,            Y3,            }, // 133
    {Y1,            X2,            Z2,            Y2,            }, // 134
    {Y1,            X1,            Z2,            Y2,            }, // 135
    {Y2^X3^Z3,      X3,            Z3,            Y3,            }, // 136
    {X2^Y2^Z3,      X2,            Z3,            Y3,            }, // 137
    {X2^Y2^Z2,      X2,            Z2,            Y3,            }, // 138
    {Y1^X2^Z2,      X2,            Z2,            Y2,            }, // 139
    {X1^Y1^Z2,      X1,            Z2,            Y2,            }, // 140
    {Y2^X4^Z4,      X3^Y3^Z3,      Z3,            Y3,            }, // 141
    {Y2^X3^Z4,      X2^Y3^Z3,      Z3,            Y3,            }, // 142
    {Y2^X3^Z3,      X2^Z2^Y3,      Z2,            Y3,            }, // 143
    {Y1^X3^Z3,      X2^Y2^Z2,      Z2,            Y2,            }, // 144
    {Y1^X2^Z3,      X1^Y2^Z2,      Z2,            Y2,            }, // 145
    {Y2^X5^Z5,      X3^Y4^Z4,      Y3^Z3^X4,      Y3,            }, // 146
    {Y2^X4^Z5,      X2^Y4^Z4,      X3^Y3^Z3,      Y3,            }, // 147
    {Y2^X4^Z4,      X2^Z3^Y4,      Z2^X3^Y3,      Y3,            }, // 148
    {Y1^X4^Z4,      X2^Y3^Z3,      Y2^Z2^X3,      Y2,            }, // 149
    {Y1^X3^Z4,      X1^Y3^Z3,      X2^Y2^Z2,      Y2,            }, // 150
    {Y2^X6^Z6,      X3^Y5^Z5,      Z3^Y4^X5,      Y3^X4^Z4,      }, // 151
    {Y2^X5^Z6,      X2^Y5^Z5,      Z3^X4^Y4,      X3^Y3^Z4,      }, // 152
    {Y2^X5^Z5,      X2^Z4^Y5,      Z2^X4^Y4,      X3^Y3^Z3,      }, // 153
    {Y1^X5^Z5,      X2^Y4^Z4,      Z2^Y3^X4,      Y2^X3^Z3,      }, // 154
    {Y1^X4^Z5,      X1^Y4^Z4,      Z2^X3^Y3,      X2^Y2^Z3,      }, // 155
    {Y2^X7^Z7,      X3^Y6^Z6,      Z3^Y5^X6,      Y3^X5^Z5,      }, // 156
    {Y2^X6^Z7,      X2^Y6^Z6,      Z3^X5^Y5,      Y3^X4^Z5,      }, // 157
    {Y2^X6^Z6,      X2^Z5^Y6,      Z2^X5^Y5,      Y3^X4^Z4,      }, // 158
    {Y1^X6^Z6,      X2^Y5^Z5,      Z2^Y4^X5,      Y2^X4^Z4,      }, // 159
    {Y1^X5^Z6,      X1^Y5^Z5,      Z2^X4^Y4,      Y2^X3^Z4,      }, // 160
    {Y2^X8^Z8,      X3^Y7^Z7,      Z3^Y6^X7,      Y3^X6^Z6,      }, // 161
    {Y2^X7^Z8,      X2^Y7^Z7,      Z3^X6^Y6,      Y3^X5^Z6,      }, // 162
    {Y2^X7^Z7,      X2^Z6^Y7,      Z2^X6^Y6,      Y3^X5^Z5,      }, // 163
    {Y1^X7^Z7,      X2^Y6^Z6,      Z2^Y5^X6,      Y2^X5^Z5,      }, // 164
    {Y1^X6^Z7,      X1^Y6^Z6,      Z2^X5^Y5,      Y2^X4^Z5,      }, // 165
    {Y2^X5,         X3^Y4^Z4,      Y3^Z3^X4,      Y3,            }, // 166
    {Y2^X4,         X2^Y4^Z4,      X3^Y3^Z3,      Y3,            }, // 167
    {Y2^X4,         X2^Z3^Y4,      Z2^X3^Y3,      Y3,            }, // 168
    {Y1^X4,         X2^Y3^Z3,      Y2^Z2^X3,      Y2,            }, // 169
    {Y1^X3,         X1^Y3^Z3,      X2^Y2^Z2,      Y2,            }, // 170
    {Y2,            X3,            Z3^Y4^X5,      Y3^X4^Z4,      }, // 171
    {Y2,            X2,            Z3^X4^Y4,      X3^Y3^Z4,      }, // 172
    {Y2,            X2,            Z2^X4^Y4,      X3^Y3^Z3,      }, // 173
    {Y1,            X2,            Z2^Y3^X4,      Y2^X3^Z3,      }, // 174
    {Y1,            X1,            Z2^X3^Y3,      X2^Y2^Z3,      }, // 175
    {Y2,            X3,            Z3,            Y3^X5,         }, // 176
    {Y2,            X2,            Z3,            Y3^X4,         }, // 177
    {Y2,            X2,            Z2,            Y3^X4,         }, // 178
    {Y1,            X2,            Z2,            Y2^X4,         }, // 179
    {Y1,            X1,            Z2,            Y2^X3,         }, // 180
    {X3^Y3,         X3,            Z3,            Y2,            }, // 181
    {X3^Y3,         X2,            Z3,            Y2,            }, // 182
    {X3^Y3,         X2,            Z2,            Y2,            }, // 183
    {X3^Y3,         X2,            Z2,            Y1,            }, // 184
    {X3^Y3,         X1,            Z2,            Y1,            }, // 185
    {X3^Y3,         X4^Y4,         Z3,            Y2,            }, // 186
    {X3^Y3,         X4^Y4,         Z2,            Y2,            }, // 187
    {X3^Y3,         X4^Y4,         Z2,            Y1,            }, // 188
    {X3^Y3,         X1^X4^Y4,      Z2,            Y1,            }, // 189
    {X3^Y3,         X4^Y4,         X5^Y5,         Z3,            }, // 190
    {X3^Y3,         X4^Y4,         Z3^X5^Y5,      Y2,            }, // 191
    {X3^Y3,         X4^Y4,         Z2^X5^Y5,      Y2,            }, // 192
    {X3^Y3,         X4^Y4,         Z2^X5^Y5,      Y1,            }, // 193
    {X3^Y3,         X1^X4^Y4,      Z2^X5^Y5,      Y1,            }, // 194
    {X3^Y3,         X4^Y4,         Y2^Y5^X6,      X5^Y6,         }, // 195
    {X3^Y3,         X4^Y4,         Z3^Y5^X6,      Y2^X5^Y6,      }, // 196
    {X3^Y3,         X4^Y4,         Z2^Y5^X6,      Y2^X5^Y6,      }, // 197
    {X3^Y3,         X4^Y4,         Z2^Y5^X6,      Y1^X5^Y6,      }, // 198
    {X3^Y3,         X1^X4^Y4,      Z2^Y5^X6,      Y1^X5^Y6,      }, // 199
    {X3^Y3,         X4^Y4,         Y2^Y5^X7,      X5^Y7,         }, // 200
    {X3^Y3,         X4^Y4,         Z3^Y5^X7,      Y2^X5^Y7,      }, // 201
    {X3^Y3,         X4^Y4,         Z2^Y5^X7,      Y2^X5^Y7,      }, // 202
    {X3^Y3,         X4^Y4,         Z2^Y5^X7,      Y1^X5^Y7,      }, // 203
    {X3^Y3,         X1^X4^Y4,      Z2^Y5^X7,      Y1^X5^Y7,      }, // 204
    {X3^Y3,         X4^Y4,         Y2^Y5^X8,      X5^Y8,         }, // 205
    {X3^Y3,         X4^Y4,         Z3^Y5^X8,      Y2^X5^Y8,      }, // 206
    {X3^Y3,         X4^Y4,         Z2^Y5^X8,      Y2^X5^Y8,      }, // 207
    {X3^Y3,         X4^Y4,         Z2^Y5^X8,      Y1^X5^Y8,      }, // 208
    {X3^Y3,         X1^X4^Y4,      Z2^Y5^X8,      Y1^X5^Y8,      }, // 209
    {Y4^X5,         Z0^X4^Y5,      Y5,            X5,            }, // 210
    {Y3^X5,         Z0^X4^Y4,      Y4,            X5,            }, // 211
    {Y3^X4,         Z0^X3^Y4,      Y4,            X4,            }, // 212
    {Y2^X4,         Z0^X3^Y3,      Y3,            X4,            }, // 213
    {Y2^X3,         Z0^X2^Y3,      Y3,            X3,            }, // 214
    {Y4^X6,         X4^Y6,         Z0^X5^Y5,      X5,            }, // 215
    {Y3^X6,         X4^Y5,         Z0^Y4^X5,      X5,            }, // 216
    {Y3^X5,         X3^Y5,         Z0^X4^Y4,      X4,            }, // 217
    {Y2^X5,         X3^Y4,         Z0^Y3^X4,      X4,            }, // 218
    {Y2^X4,         X2^Y4,         Z0^X3^Y3,      X3,            }, // 219
    {Y4^X6,         Z1^X4^Y6,      Z0^X5^Y5,      X5,            }, // 220
    {Y3^X6,         Z1^X4^Y5,      Z0^Y4^X5,      X5,            }, // 221
    {Y3^X5,         Z1^X3^Y5,      Z0^X4^Y4,      X4,            }, // 222
    {Y2^X5,         Z1^X3^Y4,      Z0^Y3^X4,      X4,            }, // 223
    {Y2^X4,         Z1^X2^Y4,      Z0^X3^Y3,      X3,            }, // 224
    {Y4^X7,         X4^Y7,         Z1^Y5^X6,      Z0^X5^Y6,      }, // 225
    {Y3^X7,         X4^Y6,         Z1^Y4^X6,      Z0^X5^Y5,      }, // 226
    {Y3^X6,         X3^Y6,         Z1^Y4^X5,      Z0^X4^Y5,      }, // 227
    {Y2^X6,         X3^Y5,         Z1^Y3^X5,      Z0^X4^Y4,      }, // 228
    {Y2^X5,         X2^Y5,         Z1^Y3^X4,      Z0^X3^Y4,      }, // 229
    {Y4^X7,         Z2^X4^Y7,      Z1^Y5^X6,      Z0^X5^Y6,      }, // 230
    {Y3^X7,         Z2^X4^Y6,      Z1^Y4^X6,      Z0^X5^Y5,      }, // 231
    {Y3^X6,         Z2^X3^Y6,      Z1^Y4^X5,      Z0^X4^Y5,      }, // 232
    {Y2^X6,         Z2^X3^Y5,      Z1^Y3^X5,      Z0^X4^Y4,      }, // 233
    {Y2^X5,         X2^Z2^Y5,      Z1^Y3^X4,      Z0^X3^Y4,      }, // 234
    {Y4^X7,         X4^Y7,         Z2^Y5^X6,      Z1^X5^Y6,      }, // 235
    {Y3^X7,         X4^Y6,         Z2^Y4^X6,      Z1^X5^Y5,      }, // 236
    {Y3^X6,         X3^Y6,         Z2^Y4^X5,      Z1^X4^Y5,      }, // 237
    {Y2^X6,         X3^Y5,         Z2^Y3^X5,      Z1^X4^Y4,      }, // 238
    {Y2^X5,         X2^Y5,         Z2^Y3^X4,      Z1^X3^Y4,      }, // 239
    {Y4^X7,         Z3^X4^Y7,      Z2^Y5^X6,      Z1^X5^Y6,      }, // 240
    {Y3^X7,         Z3^X4^Y6,      Z2^Y4^X6,      Z1^X5^Y5,      }, // 241
    {Y3^X6,         X3^Z3^Y6,      Z2^Y4^X5,      Z1^X4^Y5,      }, // 242
    {Y2^X6,         X3^Z3^Y5,      Z2^Y3^X5,      Z1^X4^Y4,      }, // 243
    {Y2^X5,         X2^Z3^Y5,      Z2^Y3^X4,      Z1^X3^Y4,      }, // 244
    {Y4^X7,         X4^Y7,         Z3^Y5^X6,      Z2^X5^Y6,      }, // 245
    {Y3^X7,         X4^Y6,         Z3^Y4^X6,      Z2^X5^Y5,      }, // 246
    {Y3^X6,         X3^Y6,         Z3^Y4^X5,      Z2^X4^Y5,      }, // 247
    {Y2^X6,         X3^Y5,         Y3^Z3^X5,      Z2^X4^Y4,      }, // 248
    {Y2^X5,         X2^Y5,         Y3^Z3^X4,      Z2^X3^Y4,      }, // 249
    {Y4^X8,         X4^Y8,         Z2^Y5^X7,      Z1^X5^Y7,      }, // 250
    {Y3^X8,         X4^Y7,         Z2^Y4^X7,      Z1^X5^Y6,      }, // 251
    {Y3^X7,         X3^Y7,         Z2^Y4^X6,      Z1^X4^Y6,      }, // 252
    {Y2^X7,         X3^Y6,         Z2^Y3^X6,      Z1^X4^Y5,      }, // 253
    {Y2^X6,         X2^Y6,         Z2^Y3^X5,      Z1^X3^Y5,      }, // 254
    {Y4^X8,         Z3^X4^Y8,      Z2^Y5^X7,      Z1^X5^Y7,      }, // 255
    {Y3^X8,         Z3^X4^Y7,      Z2^Y4^X7,      Z1^X5^Y6,      }, // 256
    {Y3^X7,         X3^Z3^Y7,      Z2^Y4^X6,      Z1^X4^Y6,      }, // 257
    {Y2^X7,         X3^Z3^Y6,      Z2^Y3^X6,      Z1^X4^Y5,      }, // 258
    {Y2^X6,         X2^Z3^Y6,      Z2^Y3^X5,      Z1^X3^Y5,      }, // 259
    {Y4^X9,         X4^Y9,         Z3^Y5^X8,      Z2^X5^Y8,      }, // 260
    {Y3^X9,         X4^Y8,         Z3^Y4^X8,      Z2^X5^Y7,      }, // 261
    {Y3^X8,         X3^Y8,         Z3^Y4^X7,      Z2^X4^Y7,      }, // 262
    {Y2^X8,         X3^Y7,         Y3^Z3^X7,      Z2^X4^Y6,      }, // 263
    {Y2^X7,         X2^Y7,         Y3^Z3^X6,      Z2^X3^Y6,      }, // 264
    {Y4^X9,         X4^Z4^Y9,      Z3^Y5^X8,      Z2^X5^Y8,      }, // 265
    {Y3^X9,         X4^Z4^Y8,      Z3^Y4^X8,      Z2^X5^Y7,      }, // 266
    {Y3^X8,         X3^Z4^Y8,      Z3^Y4^X7,      Z2^X4^Y7,      }, // 267
    {Y2^X8,         X3^Z4^Y7,      Y3^Z3^X7,      Z2^X4^Y6,      }, // 268
    {Y2^X7,         X2^Z4^Y7,      Y3^Z3^X6,      Z2^X3^Y6,      }, // 269
    {X4,            Y4,            X5^Y8,         Y5^X8,         }, // 270
    {Y3,            X4,            Y4^X8,         X5^Y7,         }, // 271
    {X3,            Y3,            X4^Y7,         Y4^X7,         }, // 272
    {Y2,            X3,            Y3^X7,         X4^Y6,         }, // 273
    {X2,            Y2,            X3^Y6,         Y3^X6,         }, // 274
    {Z0^X4^Y4,      Y4,            X5,            X6^Y8,         }, // 275
    {Z0^X4^Y4,      Y3,            Y4,            X5^Y8,         }, // 276
    {Z0^X4^Y4,      X3,            Y3,            X5^Y7,         }, // 277
    {Z0^X4^Y4,      Y2,            X3,            Y3^X8,         }, // 278
    {Z0^X4^Y4,      X2,            Y2,            X3^Y6,         }, // 279
    {Y4^X5^Y5,      Z0^X4^Y4,      X5,            Y5,            }, // 280
    {Y4^X5^Y5,      Z0^X4^Y4,      Y3,            X5,            }, // 281
    {Y4^X5^Y5,      Z0^X4^Y4,      X3,            Y3,            }, // 282
    {Y4^X5^Y5,      Z0^X4^Y4,      Y2,            X3,            }, // 283
    {Y4^X5^Y5,      Z0^X4^Y4,      X2,            Y2,            }, // 284
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y5,            }, // 285
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y3,            }, // 286
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X3,            }, // 287
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y2,            }, // 288
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X2,            }, // 289
    {Y4^X6^Y6,      Z1^X4^Y4,      X5,            X6,            }, // 290
    {Y4^X6^Y6,      Z1^X4^Y4,      Y3,            X5,            }, // 291
    {Y4^X6^Y6,      Z1^X4^Y4,      X3,            Y3,            }, // 292
    {Y4^X6^Y6,      Z1^X4^Y4,      Y2,            X3,            }, // 293
    {Y4^X6^Y6,      Z1^X4^Y4,      X2,            Y2,            }, // 294
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5,            }, // 295
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y3,            }, // 296
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X3,            }, // 297
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y2,            }, // 298
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X2,            }, // 299
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         }, // 300
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X6,            }, // 301
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y3,            }, // 302
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X3,            }, // 303
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Y2,            }, // 304
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X2,            }, // 305
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         }, // 306
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      }, // 307
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         }, // 308
    {Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      }, // 309
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         }, // 310
    {Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      }, // 311
    {Y3,            X4,            Y4^X8,         Y5^X7,         }, // 312
    {X3,            Y3,            Y4^X7,         X4^Y7,         }, // 313
    {X2,            Y2,            Y3^X6,         X3^Y6,         }, // 314
    {Z0^X4^Y4,      X3,            Y3,            Y4^X8,         }, // 315
    {Z0^X4^Y4,      X2,            Y2,            Y3^X7,         }, // 316
    {Y4^X5^Y5,      Z0^X4^Y4,      X2,            X3,            }, // 317
    {Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      }, // 318
    {Z0^X4^Y4,      X2,            X3,            Y3^X8,         }, // 319
    {Y4^X6^Y6,      Z1^X4^Y4,      X2,            X3,            }, // 320
    {Y4^X6^Y6,      Z0^X4^Y4,      X2,            X3,            }, // 321
    {Y4^X7^Y7,      Z1^X4^Y4,      Y1^Y5^X6,      X2,            }, // 322
    {Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      }, // 323
    {Y4^X9^Y9,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      }, // 324
    {X3,            Y3,            Y4^X7,         Y1^X4^Y7,      }, // 325
    {Y2,            X3,            Y3^X7,         X1^X4^Y6,      }, // 326
    {X2,            Y2,            Y3^X6,         Y0^X3^Y6,      }, // 327
    {Y0^X4^Y4,      Y2,            X3,            Y3^X8,         }, // 328
    {Y4^X5^Y5,      Y0^X4^Y4,      X2,            X3,            }, // 329
    {Y4^X5^Y5,      Z0^X4^Y4,      X2^X5^Y5,      Y2,            }, // 330
    {Y4^X5^Y5,      Z0^X4^Y4,      Y1^X5^Y5,      X2,            }, // 331
    {Y4^X6^Y6,      Z0^X4^Y4,      X3,            Y3,            }, // 332
    {Y4^X6^Y6,      Y0^X4^Y4,      X3,            Y3,            }, // 333
    {Y4^X6^Y6,      Z0^X4^Y4,      Y0^X5^Y5,      X2,            }, // 334
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X2^X5^Y5,      }, // 335
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y1^X5^Y5,      }, // 336
    {Y4^X7^Y7,      Z0^X4^Y4,      Y1^Y5^X6,      X3,            }, // 337
    {Y4^X7^Y7,      Z0^X4^Y4,      Y0^Y5^X6,      X3,            }, // 338
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Z2^X5^Y6,      }, // 339
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y0^X5^Y6,      }, // 340
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Z2^X5^Y7,      }, // 341
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Y0^X5^Y7,      }, // 342
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Z3^X5^Y7,      }, // 343
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      Z3^X5^Y8,      }, // 344
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      Z2^X5^Y8,      }, // 345
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      Z4^X5^Y8,      }, // 346
    {X4,            Y4,            X5^Y10,        Y5^X10,        }, // 347
    {Y3,            X4,            Y4^X10,        X5^Y9,         }, // 348
    {X3,            Y3,            X4^Y9,         Y4^X9,         }, // 349
    {Y2,            X3,            Y3^X9,         X4^Y8,         }, // 350
    {X2,            Y2,            X3^Y8,         Y3^X8,         }, // 351
    {Z0^X4^Y4,      Y4,            X5,            Y5^X10,        }, // 352
    {Z0^X4^Y4,      Y3,            Y4,            X5^Y9,         }, // 353
    {Z0^X4^Y4,      X3,            Y3,            Y4^X9,         }, // 354
    {Z0^X4^Y4,      Y2,            X3,            Y3^X9,         }, // 355
    {Z0^X4^Y4,      X2,            Y2,            Y3^X8,         }, // 356
    {Y3,            X4,            Y4^X10,        Y5^X9,         }, // 357
    {X3,            Y3,            Y4^X9,         X4^Y9,         }, // 358
    {X2,            Y2,            Y3^X8,         X3^Y8,         }, // 359
    {Z0^X4^Y4,      Y3,            Y4,            Y5^X9,         }, // 360
    {Z0^X4^Y4,      X2,            X3,            Y3^X9,         }, // 361
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X2^X5^Y6,      }, // 362
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y1^X5^Y6,      }, // 363
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X2,            }, // 364
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y1^X5^Y6,      }, // 365
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Y1^X5^Y7,      }, // 366
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      Y1^X5^Y8,      }, // 367
    {Z0^X4^Y4,      X3,            Y3,            X5^Y8,         }, // 368
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X2,            }, // 369
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X1^X5^Y6,      }, // 370
    {Y4^X7^Y7,      Z1^X4^Y4,      Y1^Y5^X6,      X3,            }, // 371
    {Y4^X7^Y7,      Z1^X4^Y4,      Y1^Y5^X6,      Z0^X5^Y6,      }, // 372
    {Y4^X7^Y7,      Z0^X4^Y4,      Y1^Y5^X6,      X1^X5^Y6,      }, // 373
    {Y4^X8^Y8,      Z1^X4^Y4,      Y1^Y5^X7,      Z0^X5^Y7,      }, // 374
    {Y4^X8^Y8,      Z0^X4^Y4,      Y1^Y5^X7,      X1^X5^Y7,      }, // 375
    {Y4^X9^Y9,      Z1^X4^Y4,      Y1^Y5^X8,      Z0^X5^Y8,      }, // 376
    {Y4^X9^Y9,      Z0^X4^Y4,      Y1^Y5^X8,      X1^X5^Y8,      }, // 377
    {Z0^X4^Y4,      X2,            Y2,            X3^Y7,         }, // 378
    {Y4^X5^Y5,      Z0^X4^Y4,      Y2^X5^Y5,      X2,            }, // 379
    {Y4^X5^Y5,      Y0^X4^Y4,      X1^X5^Y5,      X2,            }, // 380
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X3,            }, // 381
    {Y4^X6^Y6,      Y0^X4^Y4,      Y1^X5^Y5,      X3,            }, // 382
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y2^X5^Y6,      }, // 383
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X2^X5^Y6,      }, // 384
    {Y4^X6^Y6,      Y0^X4^Y4,      Y1^X5^Y5,      Y2^X5^Y6,      }, // 385
    {Y4^X7^Y7,      Y0^X4^Y4,      Y1^Y5^X6,      X3,            }, // 386
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y2^X5^Y6,      }, // 387
    {Y4^X7^Y7,      Y0^X4^Y4,      Y1^Y5^X6,      X1^X5^Y6,      }, // 388
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Y2^X5^Y7,      }, // 389
    {Y4^X8^Y8,      Y0^X4^Y4,      Y1^Y5^X7,      X1^X5^Y7,      }, // 390
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X2^X5^Y7,      }, // 391
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X2^X5^Y8,      }, // 392
    {Y4^X9^Y9,      Y0^X4^Y4,      Y1^Y5^X8,      X1^X5^Y8,      }, // 393
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      Y5,            }, // 394
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      Y3,            }, // 395
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      X3,            }, // 396
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      Y2,            }, // 397
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      X2,            }, // 398
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^X7^Y7,      }, // 399
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X6,            }, // 400
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Y3,            }, // 401
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X3,            }, // 402
    {X4,            Y4,            Y5^X8,         X5^Y8,         }, // 403
    {Z0^X4^Y4,      Y4,            X5,            Y5^X9,         }, // 404
    {Y4^X6^Y6,      Z0^X4^Y4,      X2,            Y2,            }, // 405
    {Y4^X7^Y7,      Z1^X4^Y4,      S1^Y5^X6,      X2,            }, // 406
    {X4,            Y4,            Y5^X8,         S0^X5^Y8,      }, // 407
    {Y3,            X4,            Y4^X8,         S0^X5^Y7,      }, // 408
    {X3,            Y3,            Y4^X7,         S0^X4^Y7,      }, // 409
    {Y2,            X3,            Y3^X7,         S0^X4^Y6,      }, // 410
    {X2,            Y2,            Y3^X6,         S0^X3^Y6,      }, // 411
    {S2^X4^Y4,      X2,            Y2,            X3^Y6,         }, // 412
    {Y4^X5^Y5,      S2^X4^Y4,      X2,            Y2,            }, // 413
    {Y4^X5^Y5,      Z0^X4^Y4,      X3^X6^Y6,      X2,            }, // 414
    {Y4^X6^Y6,      Z1^X4^Y4,      X5,            Y6,            }, // 415
    {Y4^X6^Y6,      Z0^X4^Y4,      Y2,            X3,            }, // 416
    {Y4^X6^Y6,      S2^X4^Y4,      X2,            Y2,            }, // 417
    {Y4^X6^Y6,      Z0^X4^Y4,      S2^X5^Y5,      X2,            }, // 418
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X3^X7^Y7,      }, // 419
    {Y4^X7^Y7,      Z0^X4^Y4,      S2^Y5^X6,      Y2,            }, // 420
    {Y4^X7^Y7,      Z0^X4^Y4,      S2^Y5^X6,      X2,            }, // 421
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      S2^X5^Y6,      }, // 422
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      S2^X5^Y7,      }, // 423
    {X4,            Y4,            Y5^X10,        X5^Y10,        }, // 424
    {Y4^X5^Y5,      Z0^X4^Y4,      S0^X6^Y6,      X2,            }, // 425
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      S0^X7^Y7,      }, // 426
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      S0^X5^Y6,      }, // 427
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      S0^X5^Y7,      }, // 428
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      S0^X5^Y8,      }, // 429
    {Y4^X5^Y5,      Z0^X4^Y4,      S1^X6^Y6,      X2,            }, // 430
    {Y4^X6^Y6,      Z0^X4^Y4,      S1^X5^Y5,      X2,            }, // 431
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      S1^X7^Y7,      }, // 432
    {Y4^X6^Y6,      Z0^X4^Y4,      S1^X5^Y5,      S0^X7^Y7,      }, // 433
    {Y4^X7^Y7,      Z1^X4^Y4,      S1^Y5^X6,      Y2,            }, // 434
    {Y4^X7^Y7,      Z0^X4^Y4,      S1^Y5^X6,      X2,            }, // 435
    {Y4^X7^Y7,      Z1^X4^Y4,      S1^Y5^X6,      Z0^X5^Y6,      }, // 436
    {Y4^X7^Y7,      Z0^X4^Y4,      S1^Y5^X6,      S0^X5^Y6,      }, // 437
    {Y4^X8^Y8,      Z1^X4^Y4,      S1^Y5^X7,      Z0^X5^Y7,      }, // 438
    {Y4^X8^Y8,      Z0^X4^Y4,      S1^Y5^X7,      S0^X5^Y7,      }, // 439
    {Y4^X9^Y9,      Z1^X4^Y4,      S1^Y5^X8,      Z0^X5^Y8,      }, // 440
    {Y4^X9^Y9,      Z0^X4^Y4,      S1^Y5^X8,      S0^X5^Y8,      }, // 441
    {Y4^X5^Y5,      Z0^X4^Y4,      S2^X6^Y6,      X3,            }, // 442
    {Y4^X5^Y5,      Z0^X4^Y4,      S2^X6^Y6,      Y2,            }, // 443
    {Y4^X5^Y5,      S2^X4^Y4,      S1^X6^Y6,      X2,            }, // 444
    {Y4^X6^Y6,      Z0^X4^Y4,      S2^X5^Y5,      Y2,            }, // 445
    {Y4^X6^Y6,      S2^X4^Y4,      S1^X5^Y5,      X2,            }, // 446
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      S2^X7^Y7,      }, // 447
    {Y4^X6^Y6,      Z0^X4^Y4,      S2^X5^Y5,      S1^X7^Y7,      }, // 448
    {Y4^X6^Y6,      S2^X4^Y4,      S1^X5^Y5,      S0^X7^Y7,      }, // 449
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y6,            }, // 450
    {Y4^X7^Y7,      S2^X4^Y4,      S1^Y5^X6,      X2,            }, // 451
    {Y4^X7^Y7,      Z0^X4^Y4,      S2^Y5^X6,      S1^X5^Y6,      }, // 452
    {Y4^X7^Y7,      S2^X4^Y4,      S1^Y5^X6,      S0^X5^Y6,      }, // 453
    {Y4^X8^Y8,      Z0^X4^Y4,      S2^Y5^X7,      S1^X5^Y7,      }, // 454
    {Y4^X8^Y8,      S2^X4^Y4,      S1^Y5^X7,      S0^X5^Y7,      }, // 455
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      S2^X5^Y8,      }, // 456
    {Y4^X9^Y9,      Z0^X4^Y4,      S2^Y5^X8,      S1^X5^Y8,      }, // 457
    {Y4^X9^Y9,      S2^X4^Y4,      S1^Y5^X8,      S0^X5^Y8,      }, // 458
    {X4^Y4,         Y2,            Z3,            Y3,            }, // 459
    {X4^Y4,         Y2,            Z2,            Y3,            }, // 460
    {X4^Y4,         Y1,            Z2,            Y2,            }, // 461
    {Y1^X4^Y4,      X1,            Z2,            Y2,            }, // 462
    {Y4^X5^Y5,      X4^Y4,         Y2,            Z3,            }, // 463
    {Y4^X5^Y5,      X4^Y4,         Y2,            Z2,            }, // 464
    {Z3^Y4^X5^Y5,   X4^Y4,         Y1,            Z2,            }, // 465
    {Z3^Y4^X5^Y5,   Y1^X4^Y4,      X1,            Z2,            }, // 466
    {Y4^X5^Y5,      X4^Y4,         Z3^X5,         Y2,            }, // 467
    {Y4^X5^Y5,      X4^Y4,         Z2^X5,         Y2,            }, // 468
    {Z3^Y4^X5^Y5,   X4^Y4,         Z2^X5,         Y1,            }, // 469
    {Z3^Y4^X5^Y5,   Y1^X4^Y4,      Z2^X5,         X1,            }, // 470
    {Y4^X6^Y6,      X4^Y4,         Y2,            Y3,            }, // 471
    {Y4^X6^Y6,      X4^Y4,         Z3,            Y3,            }, // 472
    {Y4^X6^Y6,      X4^Y4,         Z2,            Y3,            }, // 473
    {Z3^Y4^X6^Y6,   X4^Y4,         Z2,            Y2,            }, // 474
    {Z3^Y4^X6^Y6,   Y1^X4^Y4,      Z2,            Y2,            }, // 475
    {Y4^X6^Y6,      X4^Y4,         X5^Y5,         Y2,            }, // 476
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z3,            }, // 477
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z2,            }, // 478
    {Z3^Y4^X6^Y6,   X4^Y4,         Y1^X5^Y5,      Z2,            }, // 479
    {Z3^Y4^X6^Y6,   Y1^X4^Y4,      X1^X5^Y5,      Z2,            }, // 480
    {Y4^X6^Y6,      X4^Y4,         X5^Y5,         Z3^X6,         }, // 481
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z3^X6,         }, // 482
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z2^X6,         }, // 483
    {Z3^Y4^X6^Y6,   X4^Y4,         Y1^X5^Y5,      Z2^X6,         }, // 484
    {Z3^Y4^X6^Y6,   Y1^X4^Y4,      X1^X5^Y5,      Z2^X6,         }, // 485
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Y3,            }, // 486
    {Z3^Y4^X7^Y7,   X4^Y4,         Y1^Y5^X6,      Y2,            }, // 487
    {Z3^Y4^X7^Y7,   Y1^X4^Y4,      X1^Y5^X6,      Y2,            }, // 488
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      X5^Y6,         }, // 489
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Z3^X5^Y6,      }, // 490
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Z2^X5^Y6,      }, // 491
    {Z3^Y4^X7^Y7,   X4^Y4,         Y1^Y5^X6,      Z2^X5^Y6,      }, // 492
    {Z3^Y4^X7^Y7,   Y1^X4^Y4,      X1^Y5^X6,      Z2^X5^Y6,      }, // 493
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Y3^X5^Y6,      }, // 494
    {Z3^Y4^X7^Y7,   X4^Y4,         Y1^Y5^X6,      Y2^X5^Y6,      }, // 495
    {Z3^Y4^X7^Y7,   Y1^X4^Y4,      X1^Y5^X6,      Y2^X5^Y6,      }, // 496
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      X5^Y7,         }, // 497
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      Z3^X5^Y7,      }, // 498
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      Z2^X5^Y7,      }, // 499
    {Z3^Y4^X8^Y8,   X4^Y4,         Y1^Y5^X7,      Z2^X5^Y7,      }, // 500
    {Z3^Y4^X8^Y8,   Y1^X4^Y4,      X1^Y5^X7,      Z2^X5^Y7,      }, // 501
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      Y3^X5^Y7,      }, // 502
    {Z3^Y4^X8^Y8,   X4^Y4,         Y1^Y5^X7,      Y2^X5^Y7,      }, // 503
    {Z3^Y4^X8^Y8,   Y1^X4^Y4,      X1^Y5^X7,      Y2^X5^Y7,      }, // 504
    {Y4^X9^Y9,      X4^Y4,         Y2^Y5^X8,      X5^Y8,         }, // 505
    {Y4^X9^Y9,      X4^Y4,         Y2^Y5^X8,      Z3^X5^Y8,      }, // 506
    {Y4^X9^Y9,      X4^Y4,         Y2^Y5^X8,      Z2^X5^Y8,      }, // 507
    {Z3^Y4^X9^Y9,   X4^Y4,         Y1^Y5^X8,      Z2^X5^Y8,      }, // 508
    {Z3^Y4^X9^Y9,   Y1^X4^Y4,      X1^Y5^X8,      Z2^X5^Y8,      }, // 509
};

const UINT_64 GFX10_SW_PATTERN_NIBBLE3[][4] =
{
    {0,             0,             0,             0,             }, // 0
    {Y6,            X6,            Y7,            X7,            }, // 1
    {Y5,            X6,            Y6,            X7,            }, // 2
    {Y5,            X5,            Y6,            X6,            }, // 3
    {Y4,            X5,            Y5,            X6,            }, // 4
    {Y4,            X4,            Y5,            X5,            }, // 5
    {Z0^X6^Y6,      X6,            Y7,            X7,            }, // 6
    {Z0^Y5^X6,      X6,            Y6,            X7,            }, // 7
    {Z0^X5^Y5,      X5,            Y6,            X6,            }, // 8
    {Z0^Y4^X5,      X5,            Y5,            X6,            }, // 9
    {Z0^X4^Y4,      X4,            Y5,            X5,            }, // 10
    {Z1^Y6^X7,      Z0^X6^Y7,      Y7,            X7,            }, // 11
    {Z1^Y5^X7,      Z0^X6^Y6,      Y6,            X7,            }, // 12
    {Z1^Y5^X6,      Z0^X5^Y6,      Y6,            X6,            }, // 13
    {Z1^Y4^X6,      Z0^X5^Y5,      Y5,            X6,            }, // 14
    {Z1^Y4^X5,      Z0^X4^Y5,      Y5,            X5,            }, // 15
    {X6^Y6,         X6,            Y7,            X7,            }, // 16
    {Y5^X6,         X6,            Y6,            X7,            }, // 17
    {X5^Y5,         X5,            Y6,            X6,            }, // 18
    {Y4^X5,         X5,            Y5,            X6,            }, // 19
    {X4^Y4,         X4,            Y5,            X5,            }, // 20
    {Y6^X7,         X6^Y7,         Y7,            X7,            }, // 21
    {Y5^X7,         X6^Y6,         Y6,            X7,            }, // 22
    {Y5^X6,         X5^Y6,         Y6,            X6,            }, // 23
    {Y4^X6,         X5^Y5,         Y5,            X6,            }, // 24
    {Y4^X5,         X4^Y5,         Y5,            X5,            }, // 25
    {Y3,            X4,            Y5,            X5,            }, // 26
    {Y4,            X5,            Y6,            X6,            }, // 27
    {Y2,            X4,            Y5,            X6,            }, // 28
    {Y2,            X3,            Y4,            X5,            }, // 29
    {Y4,            X6,            Y6,            X7,            }, // 30
    {Y3,            X4,            Y6,            X6,            }, // 31
    {Y2,            X3,            Y4,            X6,            }, // 32
    {Y2,            X2,            Y3,            X4,            }, // 33
    {Z0^X6^Y6,      X4,            Y6,            X7,            }, // 34
    {Z0^X6^Y6,      X3,            Y4,            X6,            }, // 35
    {Z0^X6^Y6,      Y2,            X3,            Y4,            }, // 36
    {Y2^X6^Y6,      X2,            Y3,            X4,            }, // 37
    {Z1^Y6^X7,      Z0^X6^Y7,      Y4,            X7,            }, // 38
    {Z1^Y6^X7,      Z0^X6^Y7,      Y3,            X4,            }, // 39
    {Y2^Y6^X7,      Z0^X6^Y7,      Y3,            X4,            }, // 40
    {Y2^Y6^X7,      X2^X6^Y7,      Y3,            X4,            }, // 41
    {X5,            Y6,            X6,            Y7,            }, // 42
    {Y5,            X5,            Y6,            Y2^Y7,         }, // 43
    {X4,            Y5,            X5,            Y2^Y6,         }, // 44
    {Y4,            X4,            Y5,            Y1^Y6,         }, // 45
    {Y3,            X4,            Y5,            Y1^Y6,         }, // 46
    {Y4,            X5,            Y6,            Y2^Y7,         }, // 47
    {X3,            Y4,            X5,            Y2^Y6,         }, // 48
    {Y2,            X3,            Y4,            Y1^Y6,         }, // 49
    {Y4,            Y6,            X6,            Y7,            }, // 50
    {Y3,            X4,            Y6,            Y2^Y7,         }, // 51
    {X2,            Y3,            X4,            Y2^Y6,         }, // 52
    {Y1,            X3,            Y4,            X2^Y6,         }, // 53
    {Z0^X6^Y6,      Y4,            X6,            Y7,            }, // 54
    {Z0^X6^Y6,      X3,            Y4,            Y2^Y7,         }, // 55
    {Y2^X6^Y6,      Y3,            X4,            X2^Y7,         }, // 56
    {X2^X6^Y6,      X3,            Y4,            Y1^Y7,         }, // 57
    {Z0^Y6^X7,      Z5^X6^Y7,      Y4,            Y7,            }, // 58
    {Z0^Y6^X7,      Z5^X6^Y7,      Y3,            X4,            }, // 59
    {Z0^Y6^X7,      Y2^X6^Y7,      X3,            Y4,            }, // 60
    {X2^Y6^X7,      Y1^X6^Y7,      X3,            Y4,            }, // 61
    {X5,            Y5,            X6,            Y2^Y6,         }, // 62
    {Y5,            X5,            Y2^Y6,         X2^Y7,         }, // 63
    {Y4,            X5,            Y1^Y5,         X2^Y6,         }, // 64
    {Y4,            X4,            Y1^Y5,         X1^Y6,         }, // 65
    {Y5,            X5,            X2^Y6,         Y2^Y7,         }, // 66
    {Y4,            X5,            X2^Y5,         Y1^Y6,         }, // 67
    {Y4,            X4,            X1^Y5,         Y1^Y6,         }, // 68
    {Y3,            X4,            Y1^Y5,         X1^Y6,         }, // 69
    {X4,            Y5,            X6,            Y2^Y6,         }, // 70
    {Y4,            X5,            X2^Y6,         Y2^Y7,         }, // 71
    {X3,            Y4,            Y1^Y5,         X2^Y6,         }, // 72
    {Y3,            X4,            X1^Y6,         Y1^Y7,         }, // 73
    {X3,            Y4,            X6,            Y2^Y6,         }, // 74
    {Y3,            X4,            Y2^Y6,         X2^Y7,         }, // 75
    {Y3,            X4,            Y1^Y6,         X2^Y7,         }, // 76
    {Z4^X6^Y6,      X3,            Y4,            X6,            }, // 77
    {Z4^X6^Y6,      X3,            Y4,            Y2^Y6,         }, // 78
    {Y1^X6^Y6,      Y3,            X4,            X2^Y7,         }, // 79
    {Z5^Y6^X7,      Z4^X6^Y7,      Y3,            X4,            }, // 80
    {Y2^Y6^X7,      Z4^X6^Y7,      Y3,            X4,            }, // 81
    {Y1^Y6^X7,      X2^X6^Y7,      Y3,            X4,            }, // 82
    {Y5,            Y1^Y6,         Y2^Y7,         X2^Y8,         }, // 83
    {X4,            Y1^Y5,         X1^Y6,         Y2^Y7,         }, // 84
    {Y4,            Y0^Y5,         Y1^Y6,         X1^Y7,         }, // 85
    {Y5,            Y1^Y6,         X2^Y7,         Y2^Y8,         }, // 86
    {X4,            X1^Y5,         Y1^Y6,         X2^Y7,         }, // 87
    {Y4,            Y0^Y5,         X1^Y6,         Y1^Y7,         }, // 88
    {X3,            Y0^Y5,         X1^Y6,         Y1^Y7,         }, // 89
    {Y4,            Y1^Y6,         X2^Y7,         Y2^Y8,         }, // 90
    {X4,            X1^Y6,         Y1^Y7,         X2^Y8,         }, // 91
    {X3,            X1^Y6,         Y1^Y7,         X2^Y8,         }, // 92
    {X3,            Y4,            X2^Y6,         Y1^Y7,         }, // 93
    {X3,            Y1^Y6,         X2^Y7,         Y2^Y8,         }, // 94
    {Z3^X6^Y6,      X3,            Y4,            Y2^Y7,         }, // 95
    {Y2^X6^Y6,      X3,            X2^Y7,         Y1^Y8,         }, // 96
    {Z3^Y6^X7,      Y2^X6^Y7,      X3,            Y4,            }, // 97
    {Y2^Y6^X7,      X2^X6^Y7,      X3,            Y1^Y7,         }, // 98
    {Y6,            X6,            Y7,            S0^Y8,         }, // 99
    {Y5,            X6,            Y6,            S0^Y7,         }, // 100
    {Y5,            X5,            Y6,            S0^Y7,         }, // 101
    {Y4,            X5,            Y5,            S0^Y6,         }, // 102
    {Y4,            X4,            Y5,            S0^Y6,         }, // 103
    {Y3,            X4,            Y5,            S0^Y6,         }, // 104
    {Y4,            X5,            Y6,            S0^Y7,         }, // 105
    {Y2,            X4,            Y5,            S0^Y6,         }, // 106
    {Y2,            X3,            Y4,            S0^Y6,         }, // 107
    {Y4,            X6,            Y6,            S0^Y7,         }, // 108
    {Y3,            X4,            Y6,            S0^Y7,         }, // 109
    {Z0^X6^Y6,      X6,            Y7,            S0^Y8,         }, // 110
    {Z0^X6^Y6,      X4,            Y6,            S0^Y7,         }, // 111
    {Z0^X6^Y6,      X3,            Y4,            S0^Y7,         }, // 112
    {S0^X6^Y6,      Y2,            X3,            Y4,            }, // 113
    {Z0^Y6^X7,      Z5^X6^Y7,      Y7,            S0^Y8,         }, // 114
    {Z0^Y6^X7,      Z5^X6^Y7,      Y4,            S0^Y7,         }, // 115
    {Z0^Y6^X7,      S0^X6^Y7,      Y3,            X4,            }, // 116
    {S0^Y6^X7,      Y2^X6^Y7,      X3,            Y4,            }, // 117
    {Y6,            X6,            S0^Y7,         S1^Y8,         }, // 118
    {Y5,            X6,            S0^Y6,         S1^Y7,         }, // 119
    {Y5,            X5,            S0^Y6,         S1^Y7,         }, // 120
    {Y4,            X5,            S0^Y5,         S1^Y6,         }, // 121
    {Y4,            X4,            S0^Y5,         S1^Y6,         }, // 122
    {Y3,            X4,            S0^Y5,         S1^Y6,         }, // 123
    {Y4,            X5,            S0^Y6,         S1^Y7,         }, // 124
    {X3,            Y4,            S0^Y5,         S1^Y6,         }, // 125
    {Y4,            X6,            S0^Y6,         S1^Y7,         }, // 126
    {Y3,            X4,            S0^Y6,         S1^Y7,         }, // 127
    {Z4^X6^Y6,      X6,            S0^Y7,         S1^Y8,         }, // 128
    {Z4^X6^Y6,      Y4,            S0^Y6,         S1^Y7,         }, // 129
    {S1^X6^Y6,      X3,            Y4,            S0^Y7,         }, // 130
    {Z5^Y6^X7,      Z4^X6^Y7,      S0^Y7,         S1^Y8,         }, // 131
    {S1^Y6^X7,      Z4^X6^Y7,      Y4,            S0^Y7,         }, // 132
    {S1^Y6^X7,      S0^X6^Y7,      Y3,            X4,            }, // 133
    {Y6,            S0^Y7,         S1^Y8,         S2^Y9,         }, // 134
    {Y5,            S0^Y6,         S1^Y7,         S2^Y8,         }, // 135
    {Y4,            S0^Y5,         S1^Y6,         S2^Y7,         }, // 136
    {X3,            S0^Y5,         S1^Y6,         S2^Y7,         }, // 137
    {Y4,            S0^Y6,         S1^Y7,         S2^Y8,         }, // 138
    {X3,            Y4,            S0^Y6,         S1^Y7,         }, // 139
    {Y2,            X3,            S0^Y6,         S1^Y7,         }, // 140
    {X2,            Y2,            X3,            S0^Y6,         }, // 141
    {Z3^X6^Y6,      S0^Y7,         S1^Y8,         S2^Y9,         }, // 142
    {S2^X6^Y6,      Y4,            S0^Y7,         S1^Y8,         }, // 143
    {S0^X6^Y6,      X2,            Y2,            X3,            }, // 144
    {Z3^Y6^X7,      S2^X6^Y7,      S0^Y7,         S1^Y8,         }, // 145
    {S2^Y6^X7,      S1^X6^Y7,      Y4,            S0^Y7,         }, // 146
    {S0^Y6^X7,      X2^X6^Y7,      Y2,            X3,            }, // 147
    {X4,            Z4,            Y4,            X5,            }, // 148
    {X3,            Z4,            Y4,            X4,            }, // 149
    {X3,            Z3,            Y4,            X4,            }, // 150
    {X3,            Z3,            Y3,            X4,            }, // 151
    {X2,            Z3,            Y3,            X3,            }, // 152
    {X4^Y4^Z4,      Z4,            Y4,            X5,            }, // 153
    {X3^Y4^Z4,      Z4,            Y4,            X4,            }, // 154
    {X3^Z3^Y4,      Z3,            Y4,            X4,            }, // 155
    {X3^Y3^Z3,      Z3,            Y3,            X4,            }, // 156
    {X2^Y3^Z3,      Z3,            Y3,            X3,            }, // 157
    {X4^Y5^Z5,      Y4^Z4^X5,      Y4,            X5,            }, // 158
    {X3^Y5^Z5,      X4^Y4^Z4,      Y4,            X4,            }, // 159
    {X3^Z4^Y5,      Z3^X4^Y4,      Y4,            X4,            }, // 160
    {X3^Y4^Z4,      Y3^Z3^X4,      Y3,            X4,            }, // 161
    {X2^Y4^Z4,      X3^Y3^Z3,      Y3,            X3,            }, // 162
    {X4,            Y4^Z4^X5,      Y4,            X5,            }, // 163
    {X3,            X4^Y4^Z4,      Y4,            X4,            }, // 164
    {X3,            Z3^X4^Y4,      Y4,            X4,            }, // 165
    {X3,            Y3^Z3^X4,      Y3,            X4,            }, // 166
    {X2,            X3^Y3^Z3,      Y3,            X3,            }, // 167
    {X3,            Z3,            Y2,            X4,            }, // 168
    {X2,            Z3,            Y2,            X3,            }, // 169
    {X3,            Z4,            Y4,            X5,            }, // 170
    {X2,            Z4,            Y3,            X4,            }, // 171
    {X2,            Z3,            Y3,            X4,            }, // 172
    {Y2,            X3,            Z4,            Y4,            }, // 173
    {Z3,            Y3,            X4,            Z4,            }, // 174
    {Z3^X6^Y6,      Y3,            X4,            Z4,            }, // 175
    {X2^X6^Y6,      Z4,            Y3,            X4,            }, // 176
    {X2^X6^Y6,      Z3,            Y3,            X4,            }, // 177
    {X2^X6^Y6,      Z3,            Y2,            X3,            }, // 178
    {Z3^Y6^X7,      Z4^X6^Y7,      Y3,            X4,            }, // 179
    {X2^Y6^X7,      Z4^X6^Y7,      Y3,            X4,            }, // 180
    {X2^Y6^X7,      Z3^X6^Y7,      Y3,            X4,            }, // 181
    {X2^Y6^X7,      Z3^X6^Y7,      Y2,            X3,            }, // 182
    {X6^Y7,         Y6^X7,         0,             0,             }, // 183
    {Y5^X7,         X6^Y6,         0,             0,             }, // 184
    {X5^Y6,         Y5^X6,         0,             0,             }, // 185
    {Y4^X6,         X5^Y5,         0,             0,             }, // 186
    {X4^Y5,         Y4^X5,         0,             0,             }, // 187
    {Y5^X9,         X7^Y7,         Y6^X8,         0,             }, // 188
    {Y5^X8,         X6^Y7,         Y6^X7,         0,             }, // 189
    {Y4^X8,         X6^Y6,         Y5^X7,         0,             }, // 190
    {Y4^X7,         X5^Y6,         Y5^X6,         0,             }, // 191
    {Y3^X7,         X5^Y5,         Y4^X6,         0,             }, // 192
    {X6^Y9,         Y6^X9,         X7^Y8,         Y7^X8,         }, // 193
    {X6^Y8,         Y5^X9,         X7^Y7,         Y6^X8,         }, // 194
    {X5^Y8,         Y5^X8,         X6^Y7,         Y6^X7,         }, // 195
    {Y3^X8,         X5^Y7,         X6^Y6,         Y5^X7,         }, // 196
    {Y3^X7,         X3^Y7,         X5^Y6,         Y5^X6,         }, // 197
    {X6,            X7^Y9,         Y6^X10,        X8^Y8,         }, // 198
    {Y5,            X6^Y9,         Y6^X9,         X7^Y8,         }, // 199
    {Y3,            X6^Y8,         Y5^X9,         X7^Y7,         }, // 200
    {X3,            Y3^X9,         Y5^X8,         X6^Y7,         }, // 201
    {Y2,            X3^Y7,         Y3^X8,         X6^Y6,         }, // 202
    {Y6^X9,         X7^Y8,         Y7^X8,         Z0^X5^Y5,      }, // 203
    {X6^Y8,         Y6^X8,         X7^Y7,         Z0^X5^Y5,      }, // 204
    {X5^Y8,         X6^Y7,         Y6^X7,         Z0^X5^Y5,      }, // 205
    {Y3^X7,         X5^Y7,         X6^Y6,         Z0^X5^Y5,      }, // 206
    {X3^Y7,         Y3^X6,         X5^Y6,         Z0^X5^Y5,      }, // 207
    {X6,            Y6^X10,        X7^Y9,         Y7^X9,         }, // 208
    {X5,            X6^Y9,         Y6^X9,         X7^Y8,         }, // 209
    {Y3,            X5^Y9,         X6^Y8,         Y6^X8,         }, // 210
    {X3,            Y3^X8,         X5^Y8,         X6^Y7,         }, // 211
    {Y2,            X3^Y8,         Y3^X7,         X5^Y7,         }, // 212
    {X6,            Y6,            X7^Y10,        Y7^X10,        }, // 213
    {Y3,            X6,            Y6^X10,        X7^Y9,         }, // 214
    {X3,            Y3,            X6^Y9,         Y6^X9,         }, // 215
    {Y2,            X3,            Y3^X9,         X6^Y8,         }, // 216
    {X2,            Y2,            X3^Y8,         Y3^X8,         }, // 217
    {Y6,            X7^Y9,         X8^Y8,         Y7^X9,         }, // 218
    {X6,            Y6^X9,         X7^Y8,         Y7^X8,         }, // 219
    {Y3,            X6^Y8,         X7^Y7,         Y6^X8,         }, // 220
    {X3,            Y3^X8,         X6^Y7,         Y6^X7,         }, // 221
    {Y2,            X3^Y7,         Y3^X7,         X6^Y6,         }, // 222
    {Y3,            X6,            X7^Y9,         Y6^X10,        }, // 223
    {X2,            Y2,            Y3^X8,         X3^Y8,         }, // 224
    {X6^Y6,         Y6,            X7,            X8^Y10,        }, // 225
    {X6^Y6,         Y3,            Y6,            X7^Y10,        }, // 226
    {X6^Y6,         X3,            Y3,            X7^Y9,         }, // 227
    {X6^Y6,         Y2,            X3,            Y3^X10,        }, // 228
    {X6^Y6,         X2,            Y2,            X3^Y8,         }, // 229
    {X6,            X7,            Y7^X10,        X8^Y9,         }, // 230
    {Y3,            X6,            X7^Y9,         Y7^X9,         }, // 231
    {X3,            Y3,            X6^Y9,         X7^Y8,         }, // 232
    {Y2,            X3,            Y3^X8,         X6^Y8,         }, // 233
    {X2,            Y2,            X3^Y8,         Y3^X7,         }, // 234
    {X6^Y6,         X6,            X7,            Y7^X11,        }, // 235
    {X6^Y6,         Y3,            X6,            X7^Y10,        }, // 236
    {X6^Y6,         X3,            Y3,            X6^Y10,        }, // 237
    {Z0^X6^Y6,      Y2,            X3,            Y3^X9,         }, // 238
    {Z0^X6^Y6,      X2,            Y2,            X3^Y9,         }, // 239
    {X6^Y6,         X6^Y8,         X7,            Y7,            }, // 240
    {X6^Y6,         X6^Y8,         Y3,            X7,            }, // 241
    {X6^Y6,         X6^Y8,         X3,            Y3,            }, // 242
    {Z0^X6^Y6,      X6^Y8,         Y2,            X3,            }, // 243
    {Z0^X6^Y6,      X6^Y8,         X2,            Y2,            }, // 244
    {Y6^X7,         X7,            Y7,            X8^Y10,        }, // 245
    {Y6^X7,         Y3,            X7,            Y7^X10,        }, // 246
    {Y6^X7,         X3,            Y3,            X7^Y9,         }, // 247
    {Z1^Y6^X7,      Y2,            X3,            Y3^X9,         }, // 248
    {Z1^Y6^X7,      X2,            Y2,            X3^Y8,         }, // 249
    {Y6^X7,         X6^Y7,         X7,            Y7,            }, // 250
    {Y6^X7,         X6^Y7,         Y3,            X7,            }, // 251
    {Y6^X7,         X6^Y7,         X3,            Y3,            }, // 252
    {Z1^Y6^X7,      Z0^X6^Y7,      Y2,            X3,            }, // 253
    {Z1^Y6^X7,      Z0^X6^Y7,      X2,            Y2,            }, // 254
    {X5^Y7,         X6^Y6,         0,             0,             }, // 255
    {Y5^X6,         Y2^X5^Y6,      0,             0,             }, // 256
    {Y4^X6,         X2^X5^Y5,      0,             0,             }, // 257
    {Y4^X5,         Y1^X4^Y5,      0,             0,             }, // 258
    {X5^Y7,         Y5^X7,         Y2^X6^Y6,      0,             }, // 259
    {X5^Y6,         Y4^X7,         X2^Y5^X6,      0,             }, // 260
    {X3^Y6,         Y4^X6,         Y1^X5^Y5,      0,             }, // 261
    {Y5^X9,         Y6^X8,         X6^Y8,         X7^Y7,         }, // 262
    {Y5^X8,         X5^Y8,         Y6^X7,         Y2^X6^Y7,      }, // 263
    {Y3^X8,         X5^Y7,         Y5^X7,         Y2^X6^Y6,      }, // 264
    {Y3^X7,         X3^Y7,         Y5^X6,         Y1^X5^Y6,      }, // 265
    {Y3,            X5^Y9,         X6^Y8,         X7^Y7,         }, // 266
    {Y2,            Y3^X7,         X3^Y8,         X5^Y7,         }, // 267
    {Y6^X8,         X6^Y8,         X7^Y7,         Z0^X5^Y5,      }, // 268
    {X5^Y8,         Y6^X7,         Y2^X6^Y7,      Z0^X5^Y5,      }, // 269
    {Y3^X7,         X5^Y7,         X2^X6^Y6,      Z0^X5^Y5,      }, // 270
    {Y3^X6,         X3^Y7,         Y1^X5^Y6,      Z0^X5^Y5,      }, // 271
    {Y3,            X5,            X6^Y10,        Y7^X9,         }, // 272
    {X3,            Y3,            X5^Y10,        X6^Y9,         }, // 273
    {Y2,            X3,            Y3^X8,         X5^Y9,         }, // 274
    {X2,            Y2,            Y3^X7,         X3^Y9,         }, // 275
    {Y3,            X6^Y8,         Y6^X8,         Y2^X7^Y7,      }, // 276
    {X3,            Y3^X8,         X6^Y7,         X2^Y6^X7,      }, // 277
    {Y2,            Y3^X7,         X3^Y7,         Y1^X6^Y6,      }, // 278
    {Y3,            X6,            Y6^X10,        Y7^X9,         }, // 279
    {X3,            Y3,            Y6^X9,         X6^Y9,         }, // 280
    {X2,            X3,            Y3^X9,         X6^Y8,         }, // 281
    {X6^Y6,         Y2,            X3,            Y3^X9,         }, // 282
    {X6^Y6,         X2,            Y2,            Y3^X8,         }, // 283
    {Y3,            X6,            Y7^X9,         X7^Y9,         }, // 284
    {X3,            Y3,            X6^Y9,         Y7^X8,         }, // 285
    {X2,            Y2,            Y3^X7,         X3^Y8,         }, // 286
    {Z0^Y6^X7,      Y2,            X3,            Y3^X9,         }, // 287
    {Z0^Y6^X7,      X2,            Y2,            Y3^X8,         }, // 288
    {Z0^Y6^X7,      Z4^X6^Y7,      X2,            X3,            }, // 289
    {Z0^Y6^X7,      Z4^X6^Y7,      X2,            Y2,            }, // 290
    {X5^Y6,         Y2^Y5^X6,      0,             0,             }, // 291
    {X2^X5^Y6,      Y2^Y5^X6,      0,             0,             }, // 292
    {X2^X5^Y5,      Y1^Y4^X6,      0,             0,             }, // 293
    {X1^X4^Y5,      Y1^Y4^X5,      0,             0,             }, // 294
    {Y4^X8,         X2^X6^Y6,      Y2^Y5^X7,      0,             }, // 295
    {Y4^X7,         Y2^Y5^X6,      Y1^X5^Y6,      0,             }, // 296
    {Y3^X7,         X1^X5^Y5,      Y1^Y4^X6,      0,             }, // 297
    {X5^Y8,         X6^Y7,         Y5^X8,         Y2^Y6^X7,      }, // 298
    {X5^Y8,         Y5^X8,         X2^Y6^X7,      Y2^X6^Y7,      }, // 299
    {Y3^X8,         X5^Y7,         X2^Y5^X7,      Y1^X6^Y6,      }, // 300
    {Y3^X7,         X3^Y7,         X1^Y5^X6,      Y1^X5^Y6,      }, // 301
    {Y3,            Y5^X9,         X6^Y8,         Y6^X8,         }, // 302
    {Y3,            X6^Y8,         Y5^X9,         X2^X7^Y7,      }, // 303
    {X3,            Y3^X9,         Y5^X8,         Y2^Y6^X7,      }, // 304
    {Y2,            X3^Y7,         Y3^X8,         X1^X6^Y6,      }, // 305
    {X5^Y8,         X6^Y7,         Y2^Y6^X7,      Z0^X5^Y5,      }, // 306
    {X5^Y8,         X2^X6^Y7,      Y2^Y6^X7,      Z0^X5^Y5,      }, // 307
    {Y3^X8,         Y2^Y5^X7,      Y1^X6^Y6,      Z0^X5^Y5,      }, // 308
    {Y3^X7,         Y2^X6^Y6,      X1^X5^Y7,      Y1^X5^Y5,      }, // 309
    {Y3,            X5^Y9,         X6^Y8,         X2^Y6^X8,      }, // 310
    {X3,            Y3^X8,         X5^Y8,         X2^Y6^X7,      }, // 311
    {Y2,            Y3^X8,         X3^Y7,         X1^Y5^X7,      }, // 312
    {Y3,            X6^Y8,         X2^X7^Y7,      Y2^Y6^X8,      }, // 313
    {X3,            Y3^X8,         Y2^Y6^X7,      Y1^X6^Y7,      }, // 314
    {X3,            Y3^X8,         Y2^Y6^X7,      X1^X6^Y7,      }, // 315
    {X6^Y6,         X3,            Y3,            Y6^X10,        }, // 316
    {X6^Y6,         X2,            X3,            Y3^X10,        }, // 317
    {X3,            Y3,            X6^Y9,         X2^X7^Y8,      }, // 318
    {X2,            X3,            Y3^X9,         Y2^Y6^X8,      }, // 319
    {X2,            X3,            Y3^X8,         Y2^X7^Y7,      }, // 320
    {Z3^X6^Y6,      Y2,            X3,            Y3^X9,         }, // 321
    {Z3^X6^Y6,      X2,            Y2,            Y3^X9,         }, // 322
    {Z3^X6^Y6,      X6^Y8,         Y2,            X3,            }, // 323
    {Z3^X6^Y6,      X6^Y8,         X2,            Y2,            }, // 324
    {Z4^Y6^X7,      X2,            X3,            Y3^X9,         }, // 325
    {Y1^Y6^X7,      X2,            X3,            Y3^X9,         }, // 326
    {Z4^Y6^X7,      Z3^X6^Y7,      Y2,            X3,            }, // 327
    {Z4^Y6^X7,      Z3^X6^Y7,      X2,            Y2,            }, // 328
    {Y1^Y4^X6,      X2^X5^Y5,      0,             0,             }, // 329
    {Y1^X5^Y7,      X2^X6^Y6,      Y2^Y5^X7,      0,             }, // 330
    {X1^X5^Y6,      Y1^Y4^X7,      X2^Y5^X6,      0,             }, // 331
    {Y5^X8,         Y1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      }, // 332
    {Y3^X8,         Y1^X5^Y7,      X1^Y5^X7,      Y2^X6^Y6,      }, // 333
    {Y3^X7,         Y1^X4^Y7,      Y2^X5^Y6,      X1^Y5^X6,      }, // 334
    {Y3,            X5^Y9,         X6^Y8,         X2^X7^Y7,      }, // 335
    {Y3,            X5^Y9,         Y1^X6^Y8,      X2^X7^Y7,      }, // 336
    {X3,            Y3^X8,         X5^Y7,         X1^X6^Y6,      }, // 337
    {Y2,            Y3^X7,         X3^Y7,         Y0^X5^Y6,      }, // 338
    {Y1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      Z0^X5^Y5,      }, // 339
    {X1^X5^Y8,      Y2^Y6^X7,      X2^X6^Y7,      Y1^X5^Y5,      }, // 340
    {X1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      Y1^X5^Y5,      }, // 341
    {Y3,            X5^Y9,         Y1^X6^Y8,      X2^Y6^X8,      }, // 342
    {X3,            Y3^X9,         Y1^X6^Y7,      X1^Y5^X8,      }, // 343
    {X3,            Y3^X8,         Y1^X5^Y8,      Y2^X6^Y7,      }, // 344
    {X3,            Y3,            X5^Y10,        Y1^X6^Y9,      }, // 345
    {Y2,            X3,            Y3^X8,         X5^Y8,         }, // 346
    {Y3,            Y1^X6^Y8,      X2^X7^Y7,      Y2^Y6^X8,      }, // 347
    {Y3,            X1^X6^Y8,      Y2^Y6^X8,      X2^X7^Y7,      }, // 348
    {Y3,            X1^X6^Y8,      X2^X7^Y7,      Y2^Y6^X8,      }, // 349
    {X3,            Y3,            Y6^X9,         Y1^X6^Y9,      }, // 350
    {X2,            X3,            Y3^X9,         Y1^X6^Y8,      }, // 351
    {X2^X6^Y6,      Y2,            X3,            Y3^X9,         }, // 352
    {Y1^X6^Y6,      X2,            Y2,            Y3^X8,         }, // 353
    {X3,            Y3,            Y1^X6^Y9,      X2^X7^Y8,      }, // 354
    {X3,            Y3,            X1^X6^Y9,      Y2^Y7^X8,      }, // 355
    {X3,            Y3,            X1^X6^Y9,      X2^X7^Y8,      }, // 356
    {Z2^X6^Y6,      X2,            X3,            Y3^X10,        }, // 357
    {Y0^X6^Y6,      X2,            X3,            Y3^X9,         }, // 358
    {Z2^X6^Y6,      X6^Y8,         Y2,            X3,            }, // 359
    {Z2^X6^Y6,      Y1^X6^Y8,      X2,            Y2,            }, // 360
    {Y6^X7,         X3,            Y3,            Y1^X7^Y9,      }, // 361
    {Y1^Y6^X7,      X3,            Y3,            X1^X7^Y9,      }, // 362
    {Y0^Y6^X7,      X3,            Y3,            X1^X7^Y9,      }, // 363
    {Z3^Y6^X7,      Z2^X6^Y7,      X2,            X3,            }, // 364
    {Z2^Y6^X7,      Y0^X6^Y7,      X2,            X3,            }, // 365
    {Y5^X9,         X6^Y8,         Y6^X8,         X7^Y7,         }, // 366
    {Y4^X8,         X5^Y7,         Y5^X7,         X6^Y6,         }, // 367
    {X4^Y7,         Y4^X7,         X5^Y6,         Y5^X6,         }, // 368
    {X5^Y7,         Y4^X8,         X6^Y6,         Y5^X7,         }, // 369
    {X3^Y7,         Y4^X7,         X5^Y6,         Y5^X6,         }, // 370
    {Y5,            X6^Y8,         X7^Y7,         Y6^X8,         }, // 371
    {Y3,            Y5^X8,         X6^Y7,         Y6^X7,         }, // 372
    {X3,            Y3^X8,         X6^Y6,         Y5^X7,         }, // 373
    {Y2,            Y3^X7,         X3^Y6,         Y5^X6,         }, // 374
    {X5,            X6^Y8,         Y6^X8,         X7^Y7,         }, // 375
    {Y3,            X5^Y8,         X6^Y7,         Y6^X7,         }, // 376
    {X3,            Y3^X7,         X5^Y7,         X6^Y6,         }, // 377
    {Y2,            X3^Y7,         Y3^X6,         X5^Y6,         }, // 378
    {X6,            Y6,            X7^Y8,         Y7^X8,         }, // 379
    {Y3,            X6,            Y6^X8,         X7^Y7,         }, // 380
    {X3,            Y3,            X6^Y7,         Y6^X7,         }, // 381
    {Y2,            X3,            Y3^X7,         X6^Y6,         }, // 382
    {X2,            Y2,            X3^Y6,         Y3^X6,         }, // 383
    {Y6,            X7^Y8,         Y7^X8,         X5^Y6,         }, // 384
    {X6,            X7^Y7,         Y6^X8,         X5^Y6,         }, // 385
    {Y3,            X6^Y7,         Y6^X7,         X5^Y6,         }, // 386
    {X3,            Y3^X7,         X6^Y6,         Z0^X5^Y6,      }, // 387
    {Y2,            Y3^X6,         X3^Y6,         Z0^X5^Y6,      }, // 388
    {Y3,            X6,            X7^Y7,         Y6^X8,         }, // 389
    {X2,            Y2,            Y3^X6,         X3^Y6,         }, // 390
    {X6^Y6,         Y6,            X7,            Y7^X8,         }, // 391
    {X6^Y6,         Y3,            Y6,            X7^Y7,         }, // 392
    {X6^Y6,         X3,            Y3,            Y6^X7,         }, // 393
    {X6^Y6,         Y2,            X3,            Y3^X7,         }, // 394
    {X3^Y6,         X2,            Y2,            Y3^X6,         }, // 395
    {X6,            X7,            Y7^X8,         X6^Y6,         }, // 396
    {Y3,            X6,            X7^Y7,         X6^Y6,         }, // 397
    {X3,            Y3,            X6^Y7,         X6^Y6,         }, // 398
    {Y2,            X3,            Y3^X7,         Z0^X6^Y6,      }, // 399
    {X2,            X3,            Y3^X6,         Y2^X6^Y6,      }, // 400
    {X6^Y6,         X6,            X7,            Y7^X8,         }, // 401
    {X6^Y6,         Y3,            X6,            X7^Y7,         }, // 402
    {X6^Y6,         X3,            Y3,            X6^Y7,         }, // 403
    {Z0^X6^Y6,      Y2,            X3,            Y3^X7,         }, // 404
    {Y2^X6^Y6,      X2,            X3,            Y3^X6,         }, // 405
    {Z0^X6^Y6,      X3^Y8,         Y2,            Y3,            }, // 406
    {Y2^X6^Y6,      X3^Y8,         X2,            Y3,            }, // 407
    {Y6^X7,         X7,            Y7,            X6^Y7,         }, // 408
    {Y6^X7,         Y3,            X7,            X6^Y7,         }, // 409
    {Y6^X7,         X3,            Y3,            X6^Y7,         }, // 410
    {Y2^Y6^X7,      X3,            Y3,            Z0^X6^Y7,      }, // 411
    {Y2^Y6^X7,      X3,            Y3,            X2^X6^Y7,      }, // 412
    {Y2^Y6^X7,      Z0^X6^Y7,      X3,            Y3,            }, // 413
    {Y2^Y6^X7,      X2^X6^Y7,      X3,            Y3,            }, // 414
    {X5^Y9,         Y6^X8,         X6^Y8,         X7^Y7,         }, // 415
    {Y4^X8,         X5^Y7,         Y5^X7,         X2^X6^Y6,      }, // 416
    {Y4^X7,         X4^Y7,         Y5^X6,         Y1^X5^Y6,      }, // 417
    {Y4^X8,         X5^Y7,         Y5^X7,         Y2^X6^Y6,      }, // 418
    {Y4^X7,         X3^Y7,         Y5^X6,         Y1^X5^Y6,      }, // 419
    {X5,            Y6^X8,         X6^Y8,         X7^Y7,         }, // 420
    {Y3,            X5^Y8,         Y6^X7,         Y2^X6^Y7,      }, // 421
    {X3,            Y3^X7,         X5^Y7,         Y2^X6^Y6,      }, // 422
    {Y2,            Y3^X6,         X3^Y7,         Y1^X5^Y6,      }, // 423
    {X3,            Y3^X7,         X5^Y7,         X2^X6^Y6,      }, // 424
    {Y3,            X5,            X6^Y8,         X7^Y7,         }, // 425
    {X3,            Y3,            X5^Y8,         X6^Y7,         }, // 426
    {X3,            Y3,            X5^Y8,         Y2^X6^Y7,      }, // 427
    {Y2,            X3,            Y3^X6,         X5^Y6,         }, // 428
    {X2,            Y2,            Y3^X5,         X3^Y6,         }, // 429
    {X6,            Y6^X8,         X7^Y7,         X5^Y6,         }, // 430
    {Y3,            Y6^X7,         Y2^X6^Y7,      X5^Y6,         }, // 431
    {X3,            Y3^X7,         Y2^X6^Y6,      Z0^X5^Y6,      }, // 432
    {X3,            Y3^X7,         Y2^X6^Y6,      Y1^X5^Y6,      }, // 433
    {X3,            Y3,            Y6^X7,         Y2^X6^Y7,      }, // 434
    {X2,            X3,            Y3^X7,         Y2^X6^Y6,      }, // 435
    {X6^Y6,         X3,            Y3,            Y2^X6^Y7,      }, // 436
    {X3,            Y3,            Y2^X6^Y7,      X6^Y6,         }, // 437
    {X3,            Y3,            X2^X6^Y7,      Y2^X6^Y6,      }, // 438
    {Y2^X6^Y6,      X3,            Y3,            X2^X6^Y7,      }, // 439
    {X6^Y6,         X6^Y8,         Y3,            Y7,            }, // 440
    {X6^Y6,         Y2^X6^Y8,      X3,            Y3,            }, // 441
    {Y2^X6^Y6,      X2^X6^Y8,      X3,            Y3,            }, // 442
    {Y6^X7,         Y3,            Y7,            X6^Y7,         }, // 443
    {Y6^X7,         X3,            Y3,            Y2^X6^Y7,      }, // 444
    {Y6^X7,         X6^Y7,         Y3,            Y7,            }, // 445
    {Y6^X7,         Y2^X6^Y7,      X3,            Y3,            }, // 446
    {X5^Y8,         Y5^X8,         X6^Y7,         Y2^Y6^X7,      }, // 447
    {X5^Y8,         Y5^X8,         X2^X6^Y7,      Y2^Y6^X7,      }, // 448
    {Y4^X8,         X5^Y7,         X2^X6^Y6,      Y1^Y5^X7,      }, // 449
    {X4^Y7,         Y4^X7,         X1^X5^Y6,      Y1^Y5^X6,      }, // 450
    {Y4^X9,         X6^Y7,         Y5^X8,         Y2^Y6^X7,      }, // 451
    {X5^Y7,         Y4^X8,         X2^Y5^X7,      Y1^X6^Y6,      }, // 452
    {X3^Y7,         Y4^X7,         X1^Y5^X6,      Y1^X5^Y6,      }, // 453
    {Y3,            X6^Y7,         Y5^X8,         Y2^Y6^X7,      }, // 454
    {Y3,            Y5^X8,         X2^Y6^X7,      Y2^X6^Y7,      }, // 455
    {X3,            Y3^X8,         X2^Y5^X7,      Y1^X6^Y6,      }, // 456
    {Y2,            Y3^X6,         X3^Y6,         X1^X5^Y5,      }, // 457
    {Y3,            X5^Y8,         X6^Y7,         Y2^Y6^X7,      }, // 458
    {Y3,            X5^Y8,         X2^X6^Y7,      Y2^Y6^X7,      }, // 459
    {X3,            Y3^X8,         Y2^Y5^X7,      Y1^X6^Y6,      }, // 460
    {X3,            Y3^X7,         Y2^X6^Y6,      X1^X5^Y7,      }, // 461
    {X3,            Y3,            X6^Y7,         Y2^Y6^X7,      }, // 462
    {X3,            Y3,            X2^X6^Y7,      Y2^Y6^X7,      }, // 463
    {X2,            X3,            Y3^X7,         Y2^Y5^X6,      }, // 464
    {X2,            X3,            Y3^X6,         Y2^X5^Y6,      }, // 465
    {Y3,            X6^Y7,         Y2^Y6^X7,      X5^Y6,         }, // 466
    {Y3,            X2^Y6^X7,      Y2^X6^Y7,      X5^Y6,         }, // 467
    {Y3,            X2^Y6^X7,      Y2^X6^Y7,      Z0^X5^Y6,      }, // 468
    {Y3,            X2^Y6^X7,      Y2^X6^Y7,      X1^X5^Y6,      }, // 469
    {X3,            Y3,            X2^Y6^X7,      Y2^X6^Y7,      }, // 470
    {X6^Y6,         X3,            Y3,            Y2^Y6^X7,      }, // 471
    {Y2^X6^Y6,      X3,            Y3,            X2^X6^Y6,      }, // 472
    {X3,            Y3,            Y2^Y6^X7,      X6^Y6,         }, // 473
    {Y2^Y6^X7,      X3,            Y3,            X6^Y7,         }, // 474
    {Y2^Y6^X7,      X6^Y7,         X3,            Y3,            }, // 475
    {Y4^X8,         X1^X5^Y7,      Y1^Y5^X7,      X2^X6^Y6,      }, // 476
    {Y4^X7,         Y0^X4^Y7,      X1^X5^Y6,      Y1^Y5^X6,      }, // 477
    {Y4^X8,         Y1^X5^Y7,      X1^Y5^X7,      Y2^X6^Y6,      }, // 478
    {Y3^X7,         Y0^X4^Y6,      X1^Y4^X6,      Y1^X5^Y5,      }, // 479
    {Y3,            X5^Y8,         X2^Y6^X7,      Y2^X6^Y7,      }, // 480
    {Y3,            Y1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      }, // 481
    {X3,            Y3^X7,         Y1^X5^Y6,      X1^Y5^X6,      }, // 482
    {X3,            Y3^X6,         Y1^X4^Y6,      Y2^X5^Y5,      }, // 483
    {Y3,            X1^X5^Y8,      Y2^Y6^X7,      X2^X6^Y7,      }, // 484
    {Y3,            X1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      }, // 485
    {X3,            Y3,            Y1^X5^Y7,      X2^X6^Y6,      }, // 486
    {X3,            Y3,            X1^X5^Y7,      Y2^X6^Y6,      }, // 487
    {X3,            Y3,            X1^X5^Y7,      X2^X6^Y6,      }, // 488
    {Y3,            X2^Y6^X7,      Y1^X6^Y7,      Y2^X5^Y6,      }, // 489
    {X3,            Y3,            X2^Y6^X7,      Y1^X6^Y7,      }, // 490
    {X2^X6^Y6,      X3,            Y3,            Y1^X6^Y6,      }, // 491
    {X2^X6^Y6,      X3,            Y3,            Y2^X6^Y6,      }, // 492
    {X3,            Y3,            Y1^X6^Y7,      X2^X6^Y6,      }, // 493
    {Y2^X6^Y6,      X3,            Y3,            Y1^X6^Y7,      }, // 494
    {Y2^X6^Y6,      Y1^X6^Y8,      X3,            Y3,            }, // 495
    {Y2^Y6^X7,      X3,            Y3,            Y1^X6^Y7,      }, // 496
    {X6^X8^Y8,      Y6,            X7,            X8^Y10,        }, // 497
    {X6^X8^Y8,      Y3,            Y6,            X7^Y10,        }, // 498
    {X6^X8^Y8,      X3,            Y3,            X7^Y9,         }, // 499
    {X6^X8^Y8,      Y2,            X3,            Y3^X10,        }, // 500
    {X6^X8^Y8,      X2,            Y2,            X3^Y8,         }, // 501
    {Z0^X6^Y6,      X6,            X7,            Y7^X11,        }, // 502
    {Z0^X6^Y6,      Y3,            X6,            X7^Y10,        }, // 503
    {Z0^X6^Y6,      X3,            Y3,            X6^Y10,        }, // 504
    {Z0^X6^Y6,      X6^X9^Y9,      X7,            Y7,            }, // 505
    {Z0^X6^Y6,      X6^X9^Y9,      Y3,            X7,            }, // 506
    {Z0^X6^Y6,      X6^X9^Y9,      X3,            Y3,            }, // 507
    {Z0^X6^Y6,      X6^X9^Y9,      Y2,            X3,            }, // 508
    {Z0^X6^Y6,      X6^X9^Y9,      X2,            Y2,            }, // 509
    {Z1^Y6^X7,      X7,            Y7,            X8^Y10,        }, // 510
    {Z1^Y6^X7,      Y3,            X7,            Y7^X10,        }, // 511
    {Z1^Y6^X7,      X3,            Y3,            X7^Y9,         }, // 512
    {Z1^Y6^X7,      Z0^X6^Y7,      X7,            Y7,            }, // 513
    {Z1^Y6^X7,      Z0^X6^Y7,      Y3,            X7,            }, // 514
    {Z1^Y6^X7,      Z0^X6^Y7,      X3,            Y3,            }, // 515
    {Y6^X7,         S0^X6^Y7,      0,             0,             }, // 516
    {Y5^X7,         S0^X6^Y6,      0,             0,             }, // 517
    {Y5^X6,         S0^X5^Y6,      0,             0,             }, // 518
    {Y4^X6,         S0^X5^Y5,      0,             0,             }, // 519
    {Y4^X5,         S0^X4^Y5,      0,             0,             }, // 520
    {X6^Y8,         Y6^X8,         S0^X7^Y7,      0,             }, // 521
    {X6^Y7,         Y5^X8,         S0^Y6^X7,      0,             }, // 522
    {X5^Y7,         Y5^X7,         S0^X6^Y6,      0,             }, // 523
    {X5^Y6,         Y4^X7,         S0^Y5^X6,      0,             }, // 524
    {X3^Y6,         Y4^X6,         S0^X5^Y5,      0,             }, // 525
    {Y6^X9,         X6^Y9,         Y7^X8,         S0^X7^Y8,      }, // 526
    {Y5^X9,         X6^Y8,         Y6^X8,         S0^X7^Y7,      }, // 527
    {Y5^X8,         X5^Y8,         Y6^X7,         S0^X6^Y7,      }, // 528
    {Y3^X8,         X5^Y7,         Y5^X7,         S0^X6^Y6,      }, // 529
    {Y3^X7,         X3^Y7,         Y5^X6,         S0^X5^Y6,      }, // 530
    {Y5,            X6^Y9,         X7^Y8,         Y6^X9,         }, // 531
    {X3,            Y3^X9,         X6^Y7,         Y5^X8,         }, // 532
    {Y2,            Y3^X8,         X3^Y7,         Y5^X7,         }, // 533
    {Y6^X9,         Y7^X8,         S0^X7^Y8,      Z0^X5^Y5,      }, // 534
    {X6^Y8,         Y6^X8,         S0^X7^Y7,      Z0^X5^Y5,      }, // 535
    {X5^Y8,         Y6^X7,         S0^X6^Y7,      Z0^X5^Y5,      }, // 536
    {Y3^X7,         X5^Y7,         S0^X6^Y6,      Z0^X5^Y5,      }, // 537
    {Y3^X6,         X3^Y7,         S0^X5^Y6,      Z0^X5^Y5,      }, // 538
    {X6,            Y6,            Y7^X10,        X7^Y10,        }, // 539
    {Y6,            X7^Y9,         Y7^X9,         S0^X8^Y8,      }, // 540
    {X6,            X7^Y8,         Y6^X9,         S0^Y7^X8,      }, // 541
    {Y3,            X6^Y8,         Y6^X8,         S0^X7^Y7,      }, // 542
    {X3,            Y3^X8,         X6^Y7,         S0^Y6^X7,      }, // 543
    {Y2,            Y3^X7,         X3^Y7,         S0^X6^Y6,      }, // 544
    {X6^X8^Y8,      Y6,            X7,            Y7^X11,        }, // 545
    {X6^X8^Y8,      X3,            Y3,            Y6^X10,        }, // 546
    {X6^X8^Y8,      X2,            Y2,            Y3^X9,         }, // 547
    {X6,            X7,            Y7^X10,        Y8^X9,         }, // 548
    {Z0^Y6^X7,      X7,            Y7,            X8^Y10,        }, // 549
    {Z0^Y6^X7,      Y3,            X7,            X8^Y9,         }, // 550
    {Z0^Y6^X7,      X3,            Y3,            X7^Y9,         }, // 551
    {Z0^Y6^X7,      Z4^X6^Y7,      X7,            Y7,            }, // 552
    {Z0^Y6^X7,      Z4^X6^Y7,      Y3,            X7,            }, // 553
    {Z0^Y6^X7,      Z4^X6^Y7,      X3,            Y3,            }, // 554
    {Z0^Y6^X7,      Z4^X6^Y7,      Y2,            X3,            }, // 555
    {S0^X6^Y7,      S1^Y6^X7,      0,             0,             }, // 556
    {S0^Y5^X7,      S1^X6^Y6,      0,             0,             }, // 557
    {S0^X5^Y6,      S1^Y5^X6,      0,             0,             }, // 558
    {S0^Y4^X6,      S1^X5^Y5,      0,             0,             }, // 559
    {S0^X4^Y5,      S1^Y4^X5,      0,             0,             }, // 560
    {Y5^X9,         S0^X7^Y7,      S1^Y6^X8,      0,             }, // 561
    {Y5^X8,         S0^X6^Y7,      S1^Y6^X7,      0,             }, // 562
    {Y4^X8,         S0^X6^Y6,      S1^Y5^X7,      0,             }, // 563
    {Y4^X7,         S0^X5^Y6,      S1^Y5^X6,      0,             }, // 564
    {Y3^X7,         S0^X5^Y5,      S1^Y4^X6,      0,             }, // 565
    {X6^Y9,         Y6^X9,         S0^X7^Y8,      S1^Y7^X8,      }, // 566
    {X6^Y8,         Y5^X9,         S0^X7^Y7,      S1^Y6^X8,      }, // 567
    {X5^Y8,         Y5^X8,         S0^X6^Y7,      S1^Y6^X7,      }, // 568
    {Y3^X8,         X5^Y7,         S0^X6^Y6,      S1^Y5^X7,      }, // 569
    {Y3^X7,         X3^Y7,         S0^X5^Y6,      S1^Y5^X6,      }, // 570
    {X6,            X7^Y9,         Y6^X10,        S0^X8^Y8,      }, // 571
    {Y5,            X6^Y9,         Y6^X9,         S0^X7^Y8,      }, // 572
    {Y3,            X6^Y8,         Y5^X9,         S0^X7^Y7,      }, // 573
    {X3,            Y3^X9,         Y5^X8,         S0^X6^Y7,      }, // 574
    {Y2,            X3^Y7,         Y3^X8,         S0^X6^Y6,      }, // 575
    {Y6^X9,         S0^X7^Y8,      S1^Y7^X8,      Z0^X5^Y5,      }, // 576
    {X6^Y8,         S0^Y6^X8,      S1^X7^Y7,      Z0^X5^Y5,      }, // 577
    {X5^Y8,         S0^X6^Y7,      S1^Y6^X7,      Z0^X5^Y5,      }, // 578
    {Y3^X8,         S0^X6^Y6,      S1^Y5^X7,      Z0^X5^Y5,      }, // 579
    {Y3^X6,         X3^Y7,         S0^X5^Y6,      S1^X5^Y5,      }, // 580
    {X6,            Y6^X10,        X7^Y9,         S0^Y7^X9,      }, // 581
    {X5,            X6^Y9,         Y6^X9,         S0^X7^Y8,      }, // 582
    {Y3,            X5^Y9,         X6^Y8,         S0^Y6^X8,      }, // 583
    {X3,            Y3^X8,         X5^Y8,         S0^X6^Y7,      }, // 584
    {Y2,            Y3^X8,         X3^Y7,         S0^X6^Y6,      }, // 585
    {Y6,            X7^Y9,         S0^X8^Y8,      S1^Y7^X9,      }, // 586
    {X6,            Y6^X9,         S0^X7^Y8,      S1^Y7^X8,      }, // 587
    {Y3,            X6^Y8,         S0^X7^Y7,      S1^Y6^X8,      }, // 588
    {X3,            Y3^X8,         S0^X6^Y7,      S1^Y6^X7,      }, // 589
    {X6,            X7,            Y7^X10,        S0^X8^Y9,      }, // 590
    {Y3,            X6,            X7^Y9,         S0^Y7^X9,      }, // 591
    {X3,            Y3,            X6^Y9,         S0^X7^Y8,      }, // 592
    {Y2,            X3,            Y3^X9,         S0^X7^Y7,      }, // 593
    {Z3^X6^Y6,      X6,            X7,            Y7^X11,        }, // 594
    {Z3^X6^Y6,      Y3,            X6,            X7^Y10,        }, // 595
    {Z3^X6^Y6,      X3,            Y3,            X6^Y10,        }, // 596
    {Z3^X6^Y6,      X6^X9^Y9,      X7,            Y7,            }, // 597
    {Z3^X6^Y6,      X6^X9^Y9,      Y3,            X7,            }, // 598
    {Z3^X6^Y6,      X6^X9^Y9,      X3,            Y3,            }, // 599
    {Z3^X6^Y6,      X6^X9^Y9,      Y2,            X3,            }, // 600
    {Z3^X6^Y6,      X6^X9^Y9,      X2,            Y2,            }, // 601
    {Z4^Y6^X7,      X7,            Y7,            X8^Y10,        }, // 602
    {Z4^Y6^X7,      Y3,            X7,            Y7^X10,        }, // 603
    {Z4^Y6^X7,      X3,            Y3,            X7^Y9,         }, // 604
    {Z4^Y6^X7,      Y2,            X3,            Y3^X9,         }, // 605
    {S1^Y6^X7,      X2,            Y2,            Y3^X8,         }, // 606
    {Z4^Y6^X7,      Z3^X6^Y7,      X7,            Y7,            }, // 607
    {Z4^Y6^X7,      Z3^X6^Y7,      Y3,            X7,            }, // 608
    {Z4^Y6^X7,      Z3^X6^Y7,      X3,            Y3,            }, // 609
    {S1^Y6^X7,      S2^X6^Y7,      0,             0,             }, // 610
    {S1^Y5^X7,      S2^X6^Y6,      0,             0,             }, // 611
    {S1^Y5^X6,      S2^X5^Y6,      0,             0,             }, // 612
    {S1^Y4^X6,      S2^X5^Y5,      0,             0,             }, // 613
    {S1^Y4^X5,      S2^X4^Y5,      0,             0,             }, // 614
    {S0^X6^Y8,      S1^Y6^X8,      S2^X7^Y7,      0,             }, // 615
    {S0^X6^Y7,      S1^Y5^X8,      S2^Y6^X7,      0,             }, // 616
    {S0^X5^Y7,      S1^Y5^X7,      S2^X6^Y6,      0,             }, // 617
    {S0^X5^Y6,      S1^Y4^X7,      S2^Y5^X6,      0,             }, // 618
    {Y6^X9,         S0^X6^Y9,      S1^Y7^X8,      S2^X7^Y8,      }, // 619
    {Y5^X9,         S0^X6^Y8,      S1^Y6^X8,      S2^X7^Y7,      }, // 620
    {Y5^X8,         S0^X5^Y8,      S1^Y6^X7,      S2^X6^Y7,      }, // 621
    {Y3^X8,         S0^X5^Y7,      S1^Y5^X7,      S2^X6^Y6,      }, // 622
    {Y3^X6,         X3^Y7,         S0^X4^Y6,      S1^X5^Y5,      }, // 623
    {X6,            Y6^X10,        S0^X7^Y9,      S1^Y7^X9,      }, // 624
    {Y5,            X6^Y9,         S0^X7^Y8,      S1^Y6^X9,      }, // 625
    {Y3,            Y5^X9,         S0^X6^Y8,      S1^Y6^X8,      }, // 626
    {X3,            Y3^X9,         S0^X6^Y7,      S1^Y5^X8,      }, // 627
    {Y2,            Y3^X8,         S0^X5^Y7,      S1^Y5^X7,      }, // 628
    {S0^X6^Y9,      S1^Y7^X8,      S2^X7^Y8,      Z0^X5^Y5,      }, // 629
    {S0^X6^Y8,      S1^Y6^X8,      S2^X7^Y7,      Z0^X5^Y5,      }, // 630
    {S0^X5^Y8,      S1^Y6^X7,      S2^X6^Y7,      Z0^X5^Y5,      }, // 631
    {Y3^X7,         S0^X5^Y7,      S1^X6^Y6,      S2^X5^Y5,      }, // 632
    {X5,            X6^Y9,         S0^Y6^X9,      S1^X7^Y8,      }, // 633
    {Y3,            X5^Y9,         S0^X6^Y8,      S1^Y6^X8,      }, // 634
    {Y2,            Y3^X7,         X3^Y8,         S0^X5^Y7,      }, // 635
    {X6,            Y6,            Y7^X10,        S0^X7^Y10,     }, // 636
    {Y3,            X6,            Y6^X10,        S0^X7^Y9,      }, // 637
    {X3,            Y3,            Y6^X9,         S0^X6^Y9,      }, // 638
    {Y2,            X3,            Y3^X9,         S0^X6^Y8,      }, // 639
    {X2,            Y2,            Y3^X8,         S0^X5^Y8,      }, // 640
    {Y6,            S0^X7^Y9,      S1^Y7^X9,      S2^X8^Y8,      }, // 641
    {X6,            S0^X7^Y8,      S1^Y6^X9,      S2^Y7^X8,      }, // 642
    {Y3,            S0^X6^Y8,      S1^Y6^X8,      S2^X7^Y7,      }, // 643
    {X3^X8^Y8,      X2,            Y2,            Y3^X9,         }, // 644
    {X6,            Y7,            S0^X7^Y10,     S1^Y8^X9,      }, // 645
    {Y3,            X6,            S0^X7^Y9,      S1^Y7^X9,      }, // 646
    {X3,            Y3,            S0^X6^Y9,      S1^Y7^X8,      }, // 647
    {Y2,            X3,            Y3^X8,         S0^X6^Y8,      }, // 648
    {Z2^X6^Y6,      X6,            X7,            Y7^X11,        }, // 649
    {Z2^X6^Y6,      Y3,            X6,            X7^Y10,        }, // 650
    {Z2^X6^Y6,      X3,            Y3,            X6^Y10,        }, // 651
    {Z2^X6^Y6,      Y2,            X3,            Y3^X10,        }, // 652
    {S2^X6^Y6,      X2,            Y2,            Y3^X8,         }, // 653
    {Z2^X6^Y6,      X6^X9^Y9,      X7,            Y7,            }, // 654
    {Z2^X6^Y6,      X6^X9^Y9,      Y3,            X7,            }, // 655
    {Z2^X6^Y6,      X6^X9^Y9,      X3,            Y3,            }, // 656
    {Z2^X6^Y6,      X6^X9^Y9,      Y2,            X3,            }, // 657
    {Z2^X6^Y6,      X3^X9^Y9,      X2,            Y2,            }, // 658
    {Z3^Y6^X7,      X7,            Y7,            S0^X8^Y10,     }, // 659
    {Z3^Y6^X7,      Y3,            X7,            S0^X8^Y9,      }, // 660
    {Z3^Y6^X7,      X3,            Y3,            S0^X7^Y9,      }, // 661
    {S2^Y6^X7,      Y2,            X3,            Y3^X9,         }, // 662
    {S2^Y6^X7,      X2,            Y2,            Y3^X8,         }, // 663
    {Z3^Y6^X7,      Z2^X6^Y7,      X7,            Y7,            }, // 664
    {Z3^Y6^X7,      Z2^X6^Y7,      Y3,            X7,            }, // 665
    {Z3^Y6^X7,      Z2^X6^Y7,      X3,            Y3,            }, // 666
    {Z3^Y6^X7,      Z2^X6^Y7,      Y2,            X3,            }, // 667
    {Z2^Y6^X7,      S2^X6^Y7,      X2,            Y2,            }, // 668
    {Y6,            X7^Y8,         Y7^X8,         Z0^X5^Y6,      }, // 669
    {X6,            X7^Y7,         Y6^X8,         Z0^X5^Y6,      }, // 670
    {Y3,            X6^Y7,         Y6^X7,         Z0^X5^Y6,      }, // 671
    {X6^X8^Y8,      Y6,            X7,            Y7^X8,         }, // 672
    {X6^X8^Y8,      Y3,            Y6,            X7^Y7,         }, // 673
    {X6^X8^Y8,      X3,            Y3,            Y6^X7,         }, // 674
    {X6^X8^Y8,      Y2,            X3,            Y3^X7,         }, // 675
    {X3^X8^Y8,      X2,            Y2,            Y3^X6,         }, // 676
    {X6,            X7,            Y7^X8,         Z0^X6^Y6,      }, // 677
    {Y3,            X6,            X7^Y7,         Z0^X6^Y6,      }, // 678
    {X3,            Y3,            X6^Y7,         Z0^X6^Y6,      }, // 679
    {Z0^X6^Y6,      X6,            X7,            Y7^X8,         }, // 680
    {Z0^X6^Y6,      Y3,            X6,            X7^Y7,         }, // 681
    {Z0^X6^Y6,      X3,            Y3,            X6^Y7,         }, // 682
    {Z0^X6^Y6,      X3^X9^Y9,      Y2,            Y3,            }, // 683
    {Y2^X6^Y6,      X3^X9^Y9,      X2,            Y3,            }, // 684
    {Z1^Y6^X7,      X7,            Y7,            Z0^X6^Y7,      }, // 685
    {Z1^Y6^X7,      Y3,            X7,            Z0^X6^Y7,      }, // 686
    {Z1^Y6^X7,      X3,            Y3,            Z0^X6^Y7,      }, // 687
    {Y4^X8,         X5^Y7,         Y5^X7,         S0^X6^Y6,      }, // 688
    {Y4^X7,         X4^Y7,         Y5^X6,         S0^X5^Y6,      }, // 689
    {Y4^X7,         X3^Y7,         Y5^X6,         S0^X5^Y6,      }, // 690
    {X6,            Y6^X9,         Y7^X8,         S0^X7^Y8,      }, // 691
    {Y5,            X6^Y8,         Y6^X8,         S0^X7^Y7,      }, // 692
    {Y3,            Y5^X8,         Y6^X7,         S0^X6^Y7,      }, // 693
    {X3,            Y3^X8,         Y5^X7,         S0^X6^Y6,      }, // 694
    {Y2,            Y3^X6,         X3^Y6,         X5^Y5,         }, // 695
    {X5,            X6^Y8,         Y6^X8,         S0^X7^Y7,      }, // 696
    {Y3,            X5^Y8,         Y6^X7,         S0^X6^Y7,      }, // 697
    {X3,            Y3^X7,         X5^Y7,         S0^X6^Y6,      }, // 698
    {Y2,            Y3^X6,         X3^Y7,         S0^X5^Y6,      }, // 699
    {X6,            Y6,            Y7^X8,         S0^X7^Y8,      }, // 700
    {Y3,            X6,            Y6^X8,         S0^X7^Y7,      }, // 701
    {X3,            Y3,            Y6^X7,         S0^X6^Y7,      }, // 702
    {Y2,            X3,            Y3^X7,         S0^X6^Y6,      }, // 703
    {Y6,            Y7^X8,         S0^X7^Y8,      Z0^X5^Y6,      }, // 704
    {X6,            Y6^X8,         S0^X7^Y7,      Z0^X5^Y6,      }, // 705
    {Y3,            Y6^X7,         S0^X6^Y7,      Z0^X5^Y6,      }, // 706
    {X3,            Y3^X7,         S0^X6^Y6,      Z0^X5^Y6,      }, // 707
    {Y2,            Y3^X6,         X3^Y6,         S0^X5^Y6,      }, // 708
    {X6^X8^Y8,      Y6,            Y7,            S0^X7^Y8,      }, // 709
    {X6^X8^Y8,      Y3,            Y6,            S0^X7^Y7,      }, // 710
    {S0^X8^Y8,      X3,            Y3,            X6^Y6,         }, // 711
    {S0^X8^Y8,      Y2,            X3,            Y3^X6,         }, // 712
    {X6,            Y7,            S0^X7^Y8,      Z0^X6^Y6,      }, // 713
    {Y3,            X6,            S0^X7^Y7,      Z0^X6^Y6,      }, // 714
    {X3,            Y3,            S0^X6^Y7,      Z0^X6^Y6,      }, // 715
    {Y2,            X3,            Y3^X6,         S0^X6^Y6,      }, // 716
    {Z0^X6^Y6,      X6,            Y7,            S0^X7^Y8,      }, // 717
    {Z0^X6^Y6,      Y3,            X6,            S0^X7^Y7,      }, // 718
    {Z0^X6^Y6,      X3,            Y3,            S0^X6^Y7,      }, // 719
    {S0^X6^Y6,      Y2,            X3,            Y3^X6,         }, // 720
    {Z0^X6^Y6,      X6^X9^Y9,      Y7,            S0^X7,         }, // 721
    {Z0^X6^Y6,      X6^X9^Y9,      Y3,            S0^X7,         }, // 722
    {Z0^X6^Y6,      S0^X9^Y9,      X3,            Y3,            }, // 723
    {S0^X6^Y6,      X3^X9^Y9,      Y2,            Y3,            }, // 724
    {Z0^Y6^X7,      Y7,            S0^X7,         Z4^X6^Y7,      }, // 725
    {Z0^Y6^X7,      Y3,            S0^X7,         Z4^X6^Y7,      }, // 726
    {Z0^Y6^X7,      X3,            Y3,            S0^X6^Y7,      }, // 727
    {S0^Y6^X7,      X3,            Y3,            Y2^X6^Y7,      }, // 728
    {Z0^Y6^X7,      Z4^X6^Y7,      Y7,            S0^X7,         }, // 729
    {Z0^Y6^X7,      Z4^X6^Y7,      Y3,            S0^X7,         }, // 730
    {Z0^Y6^X7,      S0^X6^Y7,      X3,            Y3,            }, // 731
    {S0^Y6^X7,      Y2^X6^Y7,      X3,            Y3,            }, // 732
    {Y5^X9,         X6^Y8,         S0^Y6^X8,      S1^X7^Y7,      }, // 733
    {Y4^X8,         X5^Y7,         S0^Y5^X7,      S1^X6^Y6,      }, // 734
    {X4^Y7,         Y4^X7,         S0^X5^Y6,      S1^Y5^X6,      }, // 735
    {X5^Y7,         Y4^X8,         S0^X6^Y6,      S1^Y5^X7,      }, // 736
    {X3^Y7,         Y4^X7,         S0^X5^Y6,      S1^Y5^X6,      }, // 737
    {Y5,            X6^Y8,         S0^X7^Y7,      S1^Y6^X8,      }, // 738
    {Y3,            Y5^X8,         S0^X6^Y7,      S1^Y6^X7,      }, // 739
    {X3,            Y3^X8,         S0^X6^Y6,      S1^Y5^X7,      }, // 740
    {Y2,            Y3^X6,         X3^Y6,         S0^X5^Y5,      }, // 741
    {X5,            X6^Y8,         S0^Y6^X8,      S1^X7^Y7,      }, // 742
    {Y3,            X5^Y8,         S0^X6^Y7,      S1^Y6^X7,      }, // 743
    {X6,            Y6,            S0^X7^Y8,      S1^Y7^X8,      }, // 744
    {Y3,            X6,            S0^Y6^X8,      S1^X7^Y7,      }, // 745
    {X3,            Y3,            S0^X6^Y7,      S1^Y6^X7,      }, // 746
    {Y2,            X3,            Y3^X7,         S0^Y5^X6,      }, // 747
    {Y6,            S0^X7^Y8,      S1^Y7^X8,      Z0^X5^Y6,      }, // 748
    {X6,            S0^X7^Y7,      S1^Y6^X8,      Z0^X5^Y6,      }, // 749
    {Y3,            S0^X6^Y7,      S1^Y6^X7,      Z0^X5^Y6,      }, // 750
    {Y3,            X6,            S0^X7^Y7,      S1^Y6^X8,      }, // 751
    {X6^X8^Y8,      Y6,            S0^X7,         S1^Y7^X8,      }, // 752
    {X6^X8^Y8,      Y3,            S0^X7,         S1^Y6^X8,      }, // 753
    {S1^X8^Y8,      X3,            Y3,            S0^X6^Y6,      }, // 754
    {X6,            S0^X7,         S1^Y7^X8,      Z3^X6^Y6,      }, // 755
    {Y3,            S0^X7,         S1^Y6^X8,      Z3^X6^Y6,      }, // 756
    {X3,            Y3,            S0^X6^Y7,      S1^X6^Y6,      }, // 757
    {Z3^X6^Y6,      X6,            S0^X7,         S1^Y7^X8,      }, // 758
    {Z3^X6^Y6,      Y3,            S0^X7,         S1^Y6^X8,      }, // 759
    {S1^X6^Y6,      X3,            Y3,            S0^X6^Y7,      }, // 760
    {Z3^X6^Y6,      X6^X9^Y9,      S0^X7,         S1^Y7,         }, // 761
    {Z3^X6^Y6,      S1^X9^Y9,      Y3,            S0^X7,         }, // 762
    {S1^X6^Y6,      S0^X9^Y9,      X3,            Y3,            }, // 763
    {Z4^Y6^X7,      S0^X7,         S1^Y7,         Z3^X6^Y7,      }, // 764
    {S1^Y6^X7,      Y3,            S0^X7,         Z3^X6^Y7,      }, // 765
    {S1^Y6^X7,      X3,            Y3,            S0^X6^Y7,      }, // 766
    {Z4^Y6^X7,      Z3^X6^Y7,      S0^X7,         S1^Y7,         }, // 767
    {S1^Y6^X7,      Z3^X6^Y7,      Y3,            S0^X7,         }, // 768
    {S1^Y6^X7,      S0^X6^Y7,      X3,            Y3,            }, // 769
    {Y4^X8,         S0^X5^Y7,      S1^Y5^X7,      S2^X6^Y6,      }, // 770
    {Y4^X7,         S0^X4^Y7,      S1^Y5^X6,      S2^X5^Y6,      }, // 771
    {Y3^X7,         S0^X4^Y6,      S1^Y4^X6,      S2^X5^Y5,      }, // 772
    {Y6,            S0^X6^Y9,      S1^Y7^X8,      S2^X7^Y8,      }, // 773
    {Y5,            S0^X6^Y8,      S1^Y6^X8,      S2^X7^Y7,      }, // 774
    {Y3,            Y5^X7,         S0^X5^Y7,      S1^X6^Y6,      }, // 775
    {X3,            Y3^X7,         S0^X5^Y6,      S1^Y5^X6,      }, // 776
    {Y2,            Y3^X5,         X3^Y6,         S0^X4^Y5,      }, // 777
    {X5,            S0^X6^Y8,      S1^Y6^X8,      S2^X7^Y7,      }, // 778
    {Y3,            S0^X5^Y8,      S1^Y6^X7,      S2^X6^Y7,      }, // 779
    {X3,            Y3^X7,         S0^X5^Y7,      S1^X6^Y6,      }, // 780
    {Y6,            S0^X6,         S1^Y7^X8,      S2^X7^Y8,      }, // 781
    {Y3,            S0^X6,         S1^Y6^X8,      S2^X7^Y7,      }, // 782
    {X3,            Y3,            S0^X5^Y7,      S1^X6^Y6,      }, // 783
    {Y2,            X3,            Y3^X6,         S0^X5^Y6,      }, // 784
    {S0^X6,         S1^Y7^X8,      S2^X7^Y8,      Z2^X5^Y6,      }, // 785
    {S0^X6,         S1^Y6^X8,      S2^X7^Y7,      Z2^X5^Y6,      }, // 786
    {Y3,            S0^X6^Y7,      S1^Y6^X7,      S2^X5^Y6,      }, // 787
    {X3,            Y3^X7,         S0^X6^Y6,      S1^X5^Y6,      }, // 788
    {S2^X8^Y8,      Y6,            S0^X6,         S1^X7^Y7,      }, // 789
    {S2^X8^Y8,      Y3,            S0^X6,         S1^Y6^X7,      }, // 790
    {S0^X6,         S1^Y7,         S2^X7^Y8,      Z2^X6^Y6,      }, // 791
    {Y3,            S0^X6,         S1^X7^Y7,      S2^X6^Y6,      }, // 792
    {Z2^X6^Y6,      S0^X6,         S1^Y7,         S2^X7^Y8,      }, // 793
    {S2^X6^Y6,      Y3,            S0^X6,         S1^X7^Y7,      }, // 794
    {Z2^X6^Y6,      S2^X9^Y9,      S0^X6,         S1^Y7,         }, // 795
    {S2^X6^Y6,      S1^X9^Y9,      Y3,            S0^X6,         }, // 796
    {Z2^Y6^X7,      S0^X7,         S1^Y7,         S2^X6^Y7,      }, // 797
    {S2^Y6^X7,      Y3,            S0^X7,         S1^X6^Y7,      }, // 798
    {Z2^Y6^X7,      S2^X6^Y7,      S0^X7,         S1^Y7,         }, // 799
    {S2^Y6^X7,      S1^X6^Y7,      Y3,            S0^X7,         }, // 800
    {X2,            Z4,            Y4,            X3,            }, // 801
    {X2,            Z3,            Y4,            X3,            }, // 802
    {Y3,            X3,            Z4,            X5,            }, // 803
    {Y3,            X2,            Z4,            X3,            }, // 804
    {Y3,            X2,            Z3,            X3,            }, // 805
    {Y2,            X2,            Y3,            X3,            }, // 806
    {Z3,            X3,            Z4,            X5^Y5,         }, // 807
    {X2,            Z4,            X3,            Y2^X5^Y5,      }, // 808
    {X2,            Z3,            X3,            Y2^X5^Y5,      }, // 809
    {X2,            Y3,            X3,            Y1^X5^Y5,      }, // 810
    {X2,            Y3,            X3,            X1^X5^Y5,      }, // 811
    {Y3,            Z3,            X3,            Z4,            }, // 812
    {Y2,            Y3,            X3,            Z4,            }, // 813
    {Z3,            X3,            Z4,            X5^Y6,         }, // 814
    {X2,            Z4,            X3,            Z3^X5^Y6,      }, // 815
    {X2,            Z3,            X3,            Z2^X5^Y6,      }, // 816
    {X2,            Y3,            X3,            Z2^X5^Y6,      }, // 817
    {Z3^X7,         Y3,            X3,            Z4,            }, // 818
    {Z3^X7,         X2,            Z4,            X3,            }, // 819
    {Z2^X7,         X2,            Z3,            X3,            }, // 820
    {Z2^X7,         X2,            Y3,            X3,            }, // 821
    {Z3,            X3,            Z4,            Y3^X6^Y6,      }, // 822
    {X2,            Z4,            X3,            Y3^X6^Y6,      }, // 823
    {X2,            Z3,            X3,            Y3^X6^Y6,      }, // 824
    {X2,            Y3,            X3,            Y2^X6^Y6,      }, // 825
    {Y3^X6^Y6,      Z3,            X3,            Z4,            }, // 826
    {Y3^X6^Y6,      X2,            Z4,            X3,            }, // 827
    {Y3^X6^Y6,      X2,            Z3,            X3,            }, // 828
    {Y2^X6^Y6,      X2,            Y3,            X3,            }, // 829
    {Y3^X6^Y6,      Z3^X8,         X3,            Z4,            }, // 830
    {X2^X6^Y6,      Z3^X8,         Z4,            X3,            }, // 831
    {X2^X6^Y6,      Z2^X8,         Z3,            X3,            }, // 832
    {X2^X6^Y6,      Z2^X8,         Y3,            X3,            }, // 833
    {Y3^Y6^X7,      X3,            Z4,            Z3^X6^Y7,      }, // 834
    {Y3^Y6^X7,      Z4,            X3,            X2^X6^Y7,      }, // 835
    {Y3^Y6^X7,      Z3,            X3,            X2^X6^Y7,      }, // 836
    {Y2^Y6^X7,      Y3,            X3,            X2^X6^Y7,      }, // 837
    {Y3^Y6^X7,      Z3^X6^Y7,      X3,            Z4,            }, // 838
    {Y3^Y6^X7,      X2^X6^Y7,      Z4,            X3,            }, // 839
    {Y3^Y6^X7,      X2^X6^Y7,      Z3,            X3,            }, // 840
    {Y2^Y6^X7,      X2^X6^Y7,      Y3,            X3,            }, // 841
};

const UINT_64 GFX10_SW_PATTERN_NIBBLE4[][4] =
{
    {0,             0,             0,             0,             }, // 0
    {Y7^X9,         0,             0,             0,             }, // 1
    {Y7^X8,         0,             0,             0,             }, // 2
    {Y6^X8,         0,             0,             0,             }, // 3
    {Y6^X7,         0,             0,             0,             }, // 4
    {Y5^X7,         0,             0,             0,             }, // 5
    {X8^Y8,         0,             0,             0,             }, // 6
    {X7^Y7,         0,             0,             0,             }, // 7
    {X6^Y6,         0,             0,             0,             }, // 8
    {X8^Y9,         Y8^X9,         0,             0,             }, // 9
    {Y7^X9,         X8^Y8,         0,             0,             }, // 10
    {X7^Y8,         Y7^X8,         0,             0,             }, // 11
    {Y6^X8,         X7^Y7,         0,             0,             }, // 12
    {X6^Y7,         Y6^X7,         0,             0,             }, // 13
    {X5^Y6,         0,             0,             0,             }, // 14
    {Z0^X5^Y6,      0,             0,             0,             }, // 15
    {X8^Y8,         Y7^X9,         0,             0,             }, // 16
    {X7^Y7,         Y6^X8,         0,             0,             }, // 17
    {Y7^X11,        X9^Y9,         Y8^X10,        0,             }, // 18
    {Y7^X10,        X8^Y9,         Y8^X9,         0,             }, // 19
    {Y6^X10,        X8^Y8,         Y7^X9,         0,             }, // 20
    {Y6^X9,         X7^Y8,         Y7^X8,         0,             }, // 21
    {Y3^X9,         X7^Y7,         Y6^X8,         0,             }, // 22
    {Y8^X9,         X6^Y6,         0,             0,             }, // 23
    {X8^Y8,         X6^Y6,         0,             0,             }, // 24
    {Y7^X8,         X6^Y6,         0,             0,             }, // 25
    {X7^Y7,         Z0^X6^Y6,      0,             0,             }, // 26
    {X6^Y7,         Z0^X6^Y6,      0,             0,             }, // 27
    {X8^Y10,        Y8^X10,        X9^Y9,         0,             }, // 28
    {X7^Y9,         Y7^X9,         X8^Y8,         0,             }, // 29
    {X6^Y9,         X7^Y8,         Y7^X8,         0,             }, // 30
    {Y3^X8,         X6^Y8,         X7^Y7,         0,             }, // 31
    {X8^Y11,        Y8^X11,        X9^Y10,        Y9^X10,        }, // 32
    {Y7^X11,        X8^Y10,        Y8^X10,        X9^Y9,         }, // 33
    {X7^Y10,        Y7^X10,        X8^Y9,         Y8^X9,         }, // 34
    {Y3^X10,        X7^Y9,         Y7^X9,         X8^Y8,         }, // 35
    {X3^Y9,         Y3^X9,         X7^Y8,         Y7^X8,         }, // 36
    {X9^Y9,         Y8^X10,        X6^Y7,         0,             }, // 37
    {X8^Y9,         Y8^X9,         X6^Y7,         0,             }, // 38
    {X8^Y8,         Y7^X9,         X6^Y7,         0,             }, // 39
    {X7^Y8,         Y7^X8,         Z0^X6^Y7,      0,             }, // 40
    {Y3^X8,         X7^Y7,         Z0^X6^Y7,      0,             }, // 41
    {X8^Y10,        Y7^X11,        X9^Y9,         Y8^X10,        }, // 42
    {Y3^X10,        X7^Y9,         X8^Y8,         Y7^X9,         }, // 43
    {Y3^X9,         X3^Y9,         X7^Y8,         Y7^X8,         }, // 44
    {Y2^X7^Y7,      0,             0,             0,             }, // 45
    {X2^Y6^X7,      0,             0,             0,             }, // 46
    {Y1^X6^Y6,      0,             0,             0,             }, // 47
    {X7^Y9,         X8^Y8,         0,             0,             }, // 48
    {Y7^X8,         Y2^X7^Y8,      0,             0,             }, // 49
    {X6^Y8,         X2^X7^Y7,      0,             0,             }, // 50
    {X5^Y8,         Y1^X6^Y7,      0,             0,             }, // 51
    {Y6^X8,         Y2^X7^Y7,      0,             0,             }, // 52
    {Y6^X7,         Y1^X6^Y7,      0,             0,             }, // 53
    {X7^Y9,         X8^Y8,         Y7^X9,         0,             }, // 54
    {X7^Y9,         Y7^X9,         Y2^X8^Y8,      0,             }, // 55
    {X6^Y9,         X7^Y8,         X2^Y7^X8,      0,             }, // 56
    {X3^Y9,         X6^Y8,         Y1^X7^Y7,      0,             }, // 57
    {Y2^X7^Y8,      X6^Y6,         0,             0,             }, // 58
    {X2^X7^Y7,      Z0^X6^Y6,      0,             0,             }, // 59
    {Y1^X6^Y7,      Z0^X6^Y6,      0,             0,             }, // 60
    {Y3^X8,         X6^Y8,         Y1^X7^Y7,      0,             }, // 61
    {Y7^X11,        Y8^X10,        X8^Y10,        X9^Y9,         }, // 62
    {Y7^X10,        X7^Y10,        Y8^X9,         Y2^X8^Y9,      }, // 63
    {Y3^X10,        X7^Y9,         Y7^X9,         X2^X8^Y8,      }, // 64
    {Y3^X9,         X3^Y9,         Y7^X8,         Y1^X7^Y8,      }, // 65
    {Y7^X9,         Y2^X8^Y8,      X6^Y7,         0,             }, // 66
    {X7^Y8,         X2^Y7^X8,      Z4^X6^Y7,      0,             }, // 67
    {X3^Y8,         Y1^X7^Y7,      Z4^X6^Y7,      0,             }, // 68
    {Y3^X10,        X7^Y9,         Y7^X9,         Y2^X8^Y8,      }, // 69
    {Y2^Y6^X8,      0,             0,             0,             }, // 70
    {Y1^X6^Y7,      0,             0,             0,             }, // 71
    {Y1^Y5^X7,      0,             0,             0,             }, // 72
    {X7^Y8,         Y2^Y7^X8,      0,             0,             }, // 73
    {X2^X7^Y8,      Y2^Y7^X8,      0,             0,             }, // 74
    {X2^X7^Y7,      Y1^Y6^X8,      0,             0,             }, // 75
    {X1^X6^Y7,      Y1^Y6^X7,      0,             0,             }, // 76
    {Y6^X9,         Y2^Y7^X8,      0,             0,             }, // 77
    {X2^Y7^X8,      Y2^X7^Y8,      0,             0,             }, // 78
    {X2^Y6^X8,      Y1^X7^Y7,      0,             0,             }, // 79
    {X1^Y6^X7,      Y1^X6^Y7,      0,             0,             }, // 80
    {Y6^X10,        X2^X8^Y8,      Y2^Y7^X9,      0,             }, // 81
    {Y6^X9,         Y2^Y7^X8,      Y1^X7^Y8,      0,             }, // 82
    {Y3^X9,         X1^X7^Y7,      Y1^Y6^X8,      0,             }, // 83
    {Y2^Y7^X8,      X6^Y6,         0,             0,             }, // 84
    {Y1^X7^Y7,      Z3^X6^Y6,      0,             0,             }, // 85
    {X1^X6^Y8,      Y1^X6^Y6,      0,             0,             }, // 86
    {X7^Y9,         X2^Y7^X9,      Y2^X8^Y8,      0,             }, // 87
    {X6^Y9,         X2^Y7^X8,      Y1^X7^Y8,      0,             }, // 88
    {X3^Y8,         X1^Y6^X8,      Y1^X7^Y7,      0,             }, // 89
    {X7^Y10,        Y7^X10,        X8^Y9,         Y2^Y8^X9,      }, // 90
    {X7^Y10,        Y7^X10,        X2^X8^Y9,      Y2^Y8^X9,      }, // 91
    {Y3^X10,        X7^Y9,         X2^X8^Y8,      Y1^Y7^X9,      }, // 92
    {X3^Y9,         Y3^X9,         X1^X7^Y8,      Y1^Y7^X8,      }, // 93
    {X2^X8^Y8,      Y2^Y7^X9,      X6^Y7,         0,             }, // 94
    {Y2^Y7^X8,      Y1^X7^Y8,      Z3^X6^Y7,      0,             }, // 95
    {Y2^Y7^X8,      X1^X7^Y8,      Z3^X6^Y7,      0,             }, // 96
    {X7^Y10,        X8^Y9,         Y7^X10,        Y2^Y8^X9,      }, // 97
    {X7^Y10,        Y7^X10,        X2^Y8^X9,      Y2^X8^Y9,      }, // 98
    {Y3^X10,        X7^Y9,         X2^Y7^X9,      Y1^X8^Y8,      }, // 99
    {Y3^X9,         X3^Y9,         X1^Y7^X8,      Y1^X7^Y8,      }, // 100
    {X1^Y5^X6,      0,             0,             0,             }, // 101
    {Y2^Y6^X7,      0,             0,             0,             }, // 102
    {X1^Y6^X7,      0,             0,             0,             }, // 103
    {Y0^X5^Y7,      X1^X6^Y6,      0,             0,             }, // 104
    {Z1^X5^Y6,      0,             0,             0,             }, // 105
    {Y1^X5^Y6,      0,             0,             0,             }, // 106
    {X1^Y6^X8,      Y2^X7^Y7,      0,             0,             }, // 107
    {Y2^X7^Y7,      X1^Y6^X8,      0,             0,             }, // 108
    {X7^Y9,         X2^X8^Y8,      Y2^Y7^X9,      0,             }, // 109
    {Y1^X7^Y9,      X2^X8^Y8,      Y2^Y7^X9,      0,             }, // 110
    {X6^Y8,         X1^X7^Y7,      Y1^Y6^X8,      0,             }, // 111
    {X3^Y8,         Y0^X6^Y7,      X1^Y6^X7,      0,             }, // 112
    {X2^X7^Y8,      Y1^X6^Y6,      0,             0,             }, // 113
    {Y2^Y7^X8,      Y1^X6^Y6,      0,             0,             }, // 114
    {Y1^X7^Y9,      X2^Y7^X9,      Y2^X8^Y8,      0,             }, // 115
    {Y1^X7^Y8,      X1^Y6^X9,      Y2^Y7^X8,      0,             }, // 116
    {Y1^X6^Y9,      Y2^X7^Y8,      X1^Y7^X8,      0,             }, // 117
    {Y7^X10,        Y1^X7^Y10,     X2^X8^Y9,      Y2^Y8^X9,      }, // 118
    {Y3^X10,        X1^X7^Y9,      Y1^Y7^X9,      X2^X8^Y8,      }, // 119
    {Y3^X8,         X3^Y9,         Y0^X6^Y8,      X1^X7^Y7,      }, // 120
    {Y2^Y7^X9,      X2^X8^Y8,      Z2^X6^Y7,      0,             }, // 121
    {X2^X8^Y8,      Y2^Y7^X9,      Y1^X6^Y7,      0,             }, // 122
    {Y3^X10,        Y1^X7^Y9,      X1^Y7^X9,      Y2^X8^Y8,      }, // 123
    {Y3^X10,        Y1^X7^Y9,      Y2^X8^Y8,      X1^Y7^X9,      }, // 124
    {Y8^X9,         Z0^X6^Y6,      0,             0,             }, // 125
    {X8^Y8,         Z0^X6^Y6,      0,             0,             }, // 126
    {Y7^X8,         Z0^X6^Y6,      0,             0,             }, // 127
    {X9^Y9,         Y8^X10,        Z0^X6^Y7,      0,             }, // 128
    {X8^Y9,         Y8^X9,         Z0^X6^Y7,      0,             }, // 129
    {X8^Y8,         Y7^X9,         Z0^X6^Y7,      0,             }, // 130
    {S0^X8^Y8,      0,             0,             0,             }, // 131
    {S0^Y7^X8,      0,             0,             0,             }, // 132
    {S0^X7^Y7,      0,             0,             0,             }, // 133
    {S0^Y6^X7,      0,             0,             0,             }, // 134
    {S0^X6^Y6,      0,             0,             0,             }, // 135
    {Y8^X9,         S0^X8^Y9,      0,             0,             }, // 136
    {Y7^X9,         S0^X8^Y8,      0,             0,             }, // 137
    {Y7^X8,         S0^X7^Y8,      0,             0,             }, // 138
    {Y6^X8,         S0^X7^Y7,      0,             0,             }, // 139
    {Y6^X7,         S0^X6^Y7,      0,             0,             }, // 140
    {X8^Y10,        Y8^X10,        S0^X9^Y9,      0,             }, // 141
    {X8^Y9,         Y7^X10,        S0^Y8^X9,      0,             }, // 142
    {X7^Y9,         Y7^X9,         S0^X8^Y8,      0,             }, // 143
    {X7^Y8,         Y6^X9,         S0^Y7^X8,      0,             }, // 144
    {X3^Y8,         Y6^X8,         S0^X7^Y7,      0,             }, // 145
    {S0^X8^Y9,      Z0^X6^Y6,      0,             0,             }, // 146
    {S0^X8^Y8,      Z0^X6^Y6,      0,             0,             }, // 147
    {S0^X7^Y8,      Z0^X6^Y6,      0,             0,             }, // 148
    {S0^X7^Y7,      Z0^X6^Y6,      0,             0,             }, // 149
    {S0^X6^Y7,      Z0^X6^Y6,      0,             0,             }, // 150
    {Y7^X10,        X8^Y9,         S0^Y8^X9,      0,             }, // 151
    {X6^Y9,         X7^Y8,         S0^Y7^X8,      0,             }, // 152
    {Y3^X8,         X6^Y8,         S0^X7^Y7,      0,             }, // 153
    {Y8^X11,        X8^Y11,        Y9^X10,        S0^X9^Y10,     }, // 154
    {Y7^X11,        X8^Y10,        Y8^X10,        S0^X9^Y9,      }, // 155
    {Y7^X10,        X7^Y10,        Y8^X9,         S0^X8^Y9,      }, // 156
    {Y3^X10,        X7^Y9,         Y7^X9,         S0^X8^Y8,      }, // 157
    {Y3^X9,         X3^Y9,         Y7^X8,         S0^X7^Y8,      }, // 158
    {Y8^X10,        S0^X9^Y9,      Z4^X6^Y7,      0,             }, // 159
    {Y7^X10,        S0^Y8^X9,      Z4^X6^Y7,      0,             }, // 160
    {Y7^X9,         S0^X8^Y8,      Z4^X6^Y7,      0,             }, // 161
    {X7^Y8,         S0^Y7^X8,      Z4^X6^Y7,      0,             }, // 162
    {X3^Y8,         S0^X7^Y7,      Z4^X6^Y7,      0,             }, // 163
    {S1^Y7^X9,      0,             0,             0,             }, // 164
    {S1^Y7^X8,      0,             0,             0,             }, // 165
    {S1^Y6^X8,      0,             0,             0,             }, // 166
    {S1^Y6^X7,      0,             0,             0,             }, // 167
    {S1^Y5^X7,      0,             0,             0,             }, // 168
    {S1^X8^Y8,      0,             0,             0,             }, // 169
    {S1^X7^Y7,      0,             0,             0,             }, // 170
    {S0^X8^Y9,      S1^Y8^X9,      0,             0,             }, // 171
    {S0^Y7^X9,      S1^X8^Y8,      0,             0,             }, // 172
    {S0^X7^Y8,      S1^Y7^X8,      0,             0,             }, // 173
    {S0^Y6^X8,      S1^X7^Y7,      0,             0,             }, // 174
    {S0^X6^Y7,      S1^Y6^X7,      0,             0,             }, // 175
    {S0^X8^Y8,      S1^Y7^X9,      0,             0,             }, // 176
    {S0^X7^Y7,      S1^Y6^X8,      0,             0,             }, // 177
    {Y7^X11,        S0^X9^Y9,      S1^Y8^X10,     0,             }, // 178
    {Y7^X10,        S0^X8^Y9,      S1^Y8^X9,      0,             }, // 179
    {Y6^X10,        S0^X8^Y8,      S1^Y7^X9,      0,             }, // 180
    {Y6^X9,         S0^X7^Y8,      S1^Y7^X8,      0,             }, // 181
    {Y3^X9,         S0^X7^Y7,      S1^Y6^X8,      0,             }, // 182
    {S1^Y8^X9,      Z3^X6^Y6,      0,             0,             }, // 183
    {S1^X8^Y8,      Z3^X6^Y6,      0,             0,             }, // 184
    {S1^Y7^X8,      Z3^X6^Y6,      0,             0,             }, // 185
    {S1^Y6^X8,      Z3^X6^Y6,      0,             0,             }, // 186
    {S0^X6^Y7,      S1^X6^Y6,      0,             0,             }, // 187
    {X8^Y10,        S0^Y8^X10,     S1^X9^Y9,      0,             }, // 188
    {X7^Y9,         S0^Y7^X9,      S1^X8^Y8,      0,             }, // 189
    {X6^Y9,         S0^X7^Y8,      S1^Y7^X8,      0,             }, // 190
    {X3^Y8,         S0^X7^Y7,      S1^Y6^X8,      0,             }, // 191
    {X8^Y11,        Y8^X11,        S0^X9^Y10,     S1^Y9^X10,     }, // 192
    {Y7^X11,        X8^Y10,        S0^Y8^X10,     S1^X9^Y9,      }, // 193
    {X7^Y10,        Y7^X10,        S0^X8^Y9,      S1^Y8^X9,      }, // 194
    {Y3^X10,        X7^Y9,         S0^Y7^X9,      S1^X8^Y8,      }, // 195
    {X3^Y9,         Y3^X9,         S0^X7^Y8,      S1^Y7^X8,      }, // 196
    {S0^X9^Y9,      S1^Y8^X10,     Z3^X6^Y7,      0,             }, // 197
    {S0^X8^Y9,      S1^Y8^X9,      Z3^X6^Y7,      0,             }, // 198
    {S0^X8^Y8,      S1^Y7^X9,      Z3^X6^Y7,      0,             }, // 199
    {S0^X7^Y8,      S1^Y7^X8,      Z3^X6^Y7,      0,             }, // 200
    {X3^Y8,         S0^X7^Y7,      Z3^X6^Y7,      0,             }, // 201
    {X8^Y10,        Y7^X11,        S0^X9^Y9,      S1^Y8^X10,     }, // 202
    {Y3^X10,        X7^Y9,         S0^X8^Y8,      S1^Y7^X9,      }, // 203
    {Y3^X9,         X3^Y9,         S0^X7^Y8,      S1^Y7^X8,      }, // 204
    {S2^X8^Y8,      0,             0,             0,             }, // 205
    {S2^Y7^X8,      0,             0,             0,             }, // 206
    {S2^X7^Y7,      0,             0,             0,             }, // 207
    {S2^Y6^X7,      0,             0,             0,             }, // 208
    {S2^X6^Y6,      0,             0,             0,             }, // 209
    {S1^X6^Y6,      0,             0,             0,             }, // 210
    {S1^Y8^X9,      S2^X8^Y9,      0,             0,             }, // 211
    {S1^Y7^X9,      S2^X8^Y8,      0,             0,             }, // 212
    {S1^Y7^X8,      S2^X7^Y8,      0,             0,             }, // 213
    {S1^Y6^X8,      S2^X7^Y7,      0,             0,             }, // 214
    {S1^Y6^X7,      S2^X6^Y7,      0,             0,             }, // 215
    {Z2^X5^Y6,      0,             0,             0,             }, // 216
    {S1^X5^Y6,      0,             0,             0,             }, // 217
    {S0^X8^Y10,     S1^Y8^X10,     S2^X9^Y9,      0,             }, // 218
    {S0^X8^Y9,      S1^Y7^X10,     S2^Y8^X9,      0,             }, // 219
    {S0^X7^Y9,      S1^Y7^X9,      S2^X8^Y8,      0,             }, // 220
    {S0^X7^Y8,      S1^Y6^X9,      S2^Y7^X8,      0,             }, // 221
    {S0^X6^Y8,      S1^Y6^X8,      S2^X7^Y7,      0,             }, // 222
    {S2^X8^Y9,      Z2^X6^Y6,      0,             0,             }, // 223
    {S2^X8^Y8,      Z2^X6^Y6,      0,             0,             }, // 224
    {S2^X7^Y8,      Z2^X6^Y6,      0,             0,             }, // 225
    {S1^X7^Y7,      S2^X6^Y6,      0,             0,             }, // 226
    {S0^Y7^X10,     S1^X8^Y9,      S2^Y8^X9,      0,             }, // 227
    {X3^Y9,         S0^X6^Y8,      S1^X7^Y7,      0,             }, // 228
    {Y8^X11,        S0^X8^Y11,     S1^Y9^X10,     S2^X9^Y10,     }, // 229
    {Y7^X11,        S0^X8^Y10,     S1^Y8^X10,     S2^X9^Y9,      }, // 230
    {Y7^X10,        S0^X7^Y10,     S1^Y8^X9,      S2^X8^Y9,      }, // 231
    {Y3^X10,        S0^X7^Y9,      S1^Y7^X9,      S2^X8^Y8,      }, // 232
    {Y3^X9,         S0^X6^Y9,      S1^Y7^X8,      S2^X7^Y8,      }, // 233
    {S1^Y8^X10,     S2^X9^Y9,      Z2^X6^Y7,      0,             }, // 234
    {S1^Y7^X10,     S2^Y8^X9,      Z2^X6^Y7,      0,             }, // 235
    {S1^Y7^X9,      S2^X8^Y8,      Z2^X6^Y7,      0,             }, // 236
    {S0^X7^Y8,      S1^Y7^X8,      Z2^X6^Y7,      0,             }, // 237
    {X3^Y8,         S0^X7^Y7,      S1^X6^Y7,      0,             }, // 238
};

const UINT_8 GFX10_DCC_64K_R_X_PATIDX[] =
{
       0, // 1 pipes 1 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       1, // 1 pipes 2 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       2, // 1 pipes 4 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       3, // 1 pipes 8 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       4, // 1 pipes 16 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       5, // 2 pipes 1 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       6, // 2 pipes 2 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       2, // 2 pipes 4 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       3, // 2 pipes 8 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       4, // 2 pipes 16 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       7, // 4+ pipes 1 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       6, // 4+ pipes 2 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       2, // 4+ pipes 4 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       3, // 4+ pipes 8 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       4, // 4+ pipes 16 bpe ua @ SW_64K_R_X 1xaa @ Navi1x
       0, // 1 pipes 1 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
       1, // 1 pipes 2 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
       2, // 1 pipes 4 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
       3, // 1 pipes 8 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
       4, // 1 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
       8, // 2 pipes 1 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
       9, // 2 pipes 2 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      10, // 2 pipes 4 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      11, // 2 pipes 8 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      12, // 2 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      13, // 4 pipes 1 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      14, // 4 pipes 2 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      15, // 4 pipes 4 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      16, // 4 pipes 8 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      17, // 4 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      18, // 8 pipes 1 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      19, // 8 pipes 2 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      20, // 8 pipes 4 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      21, // 8 pipes 8 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      22, // 8 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      23, // 16 pipes 1 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      24, // 16 pipes 2 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      25, // 16 pipes 4 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      26, // 16 pipes 8 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      27, // 16 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      28, // 32 pipes 1 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      29, // 32 pipes 2 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      30, // 32 pipes 4 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      31, // 32 pipes 8 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      32, // 32 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      33, // 64 pipes 1 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      34, // 64 pipes 2 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      35, // 64 pipes 4 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      36, // 64 pipes 8 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
      37, // 64 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x
};

const UINT_8 GFX10_HTILE_PATIDX[] =
{
       0, // 1xaa ua @ HTILE_64K @ Navi1x
       0, // 2xaa ua @ HTILE_64K @ Navi1x
       0, // 4xaa ua @ HTILE_64K @ Navi1x
       0, // 8xaa ua @ HTILE_64K @ Navi1x
       0, // 1 pipes 1xaa pa @ HTILE_64K @ Navi1x
       0, // 1 pipes 2xaa pa @ HTILE_64K @ Navi1x
       0, // 1 pipes 4xaa pa @ HTILE_64K @ Navi1x
       0, // 1 pipes 8xaa pa @ HTILE_64K @ Navi1x
       1, // 2 pipes 1xaa pa @ HTILE_64K @ Navi1x
       1, // 2 pipes 2xaa pa @ HTILE_64K @ Navi1x
       1, // 2 pipes 4xaa pa @ HTILE_64K @ Navi1x
       1, // 2 pipes 8xaa pa @ HTILE_64K @ Navi1x
       2, // 4 pipes 1xaa pa @ HTILE_64K @ Navi1x
       2, // 4 pipes 2xaa pa @ HTILE_64K @ Navi1x
       2, // 4 pipes 4xaa pa @ HTILE_64K @ Navi1x
       2, // 4 pipes 8xaa pa @ HTILE_64K @ Navi1x
       3, // 8 pipes 1xaa pa @ HTILE_64K @ Navi1x
       3, // 8 pipes 2xaa pa @ HTILE_64K @ Navi1x
       3, // 8 pipes 4xaa pa @ HTILE_64K @ Navi1x
       3, // 8 pipes 8xaa pa @ HTILE_64K @ Navi1x
       4, // 16 pipes 1xaa pa @ HTILE_64K @ Navi1x
       4, // 16 pipes 2xaa pa @ HTILE_64K @ Navi1x
       4, // 16 pipes 4xaa pa @ HTILE_64K @ Navi1x
       5, // 16 pipes 8xaa pa @ HTILE_64K @ Navi1x
       6, // 32 pipes 1xaa pa @ HTILE_64K @ Navi1x
       6, // 32 pipes 2xaa pa @ HTILE_64K @ Navi1x
       7, // 32 pipes 4xaa pa @ HTILE_64K @ Navi1x
       8, // 32 pipes 8xaa pa @ HTILE_64K @ Navi1x
       9, // 64 pipes 1xaa pa @ HTILE_64K @ Navi1x
      10, // 64 pipes 2xaa pa @ HTILE_64K @ Navi1x
      11, // 64 pipes 4xaa pa @ HTILE_64K @ Navi1x
      12, // 64 pipes 8xaa pa @ HTILE_64K @ Navi1x
};

const UINT_8 GFX10_CMASK_64K_PATIDX[] =
{
       0, // 1 bpe ua @ CMASK_64K @ Navi1x
       0, // 2 bpe ua @ CMASK_64K @ Navi1x
       0, // 4 bpe ua @ CMASK_64K @ Navi1x
       0, // 8 bpe ua @ CMASK_64K @ Navi1x
       0, // 1 pipes 1 bpe pa @ CMASK_64K @ Navi1x
       0, // 1 pipes 2 bpe pa @ CMASK_64K @ Navi1x
       0, // 1 pipes 4 bpe pa @ CMASK_64K @ Navi1x
       0, // 1 pipes 8 bpe pa @ CMASK_64K @ Navi1x
       1, // 2 pipes 1 bpe pa @ CMASK_64K @ Navi1x
       1, // 2 pipes 2 bpe pa @ CMASK_64K @ Navi1x
       1, // 2 pipes 4 bpe pa @ CMASK_64K @ Navi1x
       1, // 2 pipes 8 bpe pa @ CMASK_64K @ Navi1x
       2, // 4 pipes 1 bpe pa @ CMASK_64K @ Navi1x
       2, // 4 pipes 2 bpe pa @ CMASK_64K @ Navi1x
       2, // 4 pipes 4 bpe pa @ CMASK_64K @ Navi1x
       2, // 4 pipes 8 bpe pa @ CMASK_64K @ Navi1x
       3, // 8 pipes 1 bpe pa @ CMASK_64K @ Navi1x
       3, // 8 pipes 2 bpe pa @ CMASK_64K @ Navi1x
       3, // 8 pipes 4 bpe pa @ CMASK_64K @ Navi1x
       3, // 8 pipes 8 bpe pa @ CMASK_64K @ Navi1x
       4, // 16 pipes 1 bpe pa @ CMASK_64K @ Navi1x
       4, // 16 pipes 2 bpe pa @ CMASK_64K @ Navi1x
       4, // 16 pipes 4 bpe pa @ CMASK_64K @ Navi1x
       4, // 16 pipes 8 bpe pa @ CMASK_64K @ Navi1x
       5, // 32 pipes 1 bpe pa @ CMASK_64K @ Navi1x
       5, // 32 pipes 2 bpe pa @ CMASK_64K @ Navi1x
       5, // 32 pipes 4 bpe pa @ CMASK_64K @ Navi1x
       5, // 32 pipes 8 bpe pa @ CMASK_64K @ Navi1x
       6, // 64 pipes 1 bpe pa @ CMASK_64K @ Navi1x
       6, // 64 pipes 2 bpe pa @ CMASK_64K @ Navi1x
       6, // 64 pipes 4 bpe pa @ CMASK_64K @ Navi1x
       7, // 64 pipes 8 bpe pa @ CMASK_64K @ Navi1x
};

const UINT_8 GFX10_DCC_64K_R_X_RBPLUS_PATIDX[] =
{
       0, // 1 bpe ua @ SW_64K_R_X 1xaa @ RbPlus
       1, // 2 bpe ua @ SW_64K_R_X 1xaa @ RbPlus
       2, // 4 bpe ua @ SW_64K_R_X 1xaa @ RbPlus
       3, // 8 bpe ua @ SW_64K_R_X 1xaa @ RbPlus
       4, // 16 bpe ua @ SW_64K_R_X 1xaa @ RbPlus
       0, // 1 pipes (1 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
       1, // 1 pipes (1 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
       2, // 1 pipes (1 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
       3, // 1 pipes (1 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
       4, // 1 pipes (1 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      38, // 2 pipes (1-2 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      39, // 2 pipes (1-2 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      40, // 2 pipes (1-2 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      41, // 2 pipes (1-2 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      42, // 2 pipes (1-2 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      43, // 4 pipes (1-2 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      44, // 4 pipes (1-2 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      45, // 4 pipes (1-2 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      46, // 4 pipes (1-2 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      47, // 4 pipes (1-2 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      48, // 8 pipes (2 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      49, // 8 pipes (2 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      50, // 8 pipes (2 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      51, // 8 pipes (2 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      52, // 8 pipes (2 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      53, // 4 pipes (4 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      54, // 4 pipes (4 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      55, // 4 pipes (4 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      56, // 4 pipes (4 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      57, // 4 pipes (4 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      58, // 8 pipes (4 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      59, // 8 pipes (4 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      60, // 8 pipes (4 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      61, // 8 pipes (4 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      62, // 8 pipes (4 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      63, // 16 pipes (4 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      64, // 16 pipes (4 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      65, // 16 pipes (4 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      66, // 16 pipes (4 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      67, // 16 pipes (4 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      68, // 8 pipes (8 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      69, // 8 pipes (8 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      70, // 8 pipes (8 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      71, // 8 pipes (8 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      72, // 8 pipes (8 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      73, // 16 pipes (8 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      74, // 16 pipes (8 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      75, // 16 pipes (8 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      76, // 16 pipes (8 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      77, // 16 pipes (8 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      78, // 32 pipes (8 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      79, // 32 pipes (8 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      80, // 32 pipes (8 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      81, // 32 pipes (8 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      82, // 32 pipes (8 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      83, // 16 pipes (16 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      84, // 16 pipes (16 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      85, // 16 pipes (16 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      86, // 16 pipes (16 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      87, // 16 pipes (16 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      88, // 32 pipes (16 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      89, // 32 pipes (16 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      90, // 32 pipes (16 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      91, // 32 pipes (16 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      92, // 32 pipes (16 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      93, // 64 pipes (16 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      94, // 64 pipes (16 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      95, // 64 pipes (16 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      96, // 64 pipes (16 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      97, // 64 pipes (16 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      98, // 32 pipes (32 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
      99, // 32 pipes (32 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     100, // 32 pipes (32 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     101, // 32 pipes (32 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     102, // 32 pipes (32 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     103, // 64 pipes (32 PKRs) 1 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     104, // 64 pipes (32 PKRs) 2 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     105, // 64 pipes (32 PKRs) 4 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     106, // 64 pipes (32 PKRs) 8 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
     107, // 64 pipes (32 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus
};

const UINT_8 GFX10_HTILE_RBPLUS_PATIDX[] =
{
       0, // 1xaa ua @ HTILE_64K @ RbPlus
       0, // 2xaa ua @ HTILE_64K @ RbPlus
       0, // 4xaa ua @ HTILE_64K @ RbPlus
       0, // 8xaa ua @ HTILE_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      14, // 4 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      14, // 4 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      14, // 4 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      14, // 4 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      15, // 8 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      15, // 8 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      15, // 8 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      15, // 8 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (4 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (4 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (4 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      13, // 2 pipes (4 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      16, // 4 pipes (4 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      16, // 4 pipes (4 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      16, // 4 pipes (4 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      16, // 4 pipes (4 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      17, // 8 pipes (4 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      17, // 8 pipes (4 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      17, // 8 pipes (4 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      17, // 8 pipes (4 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      18, // 16 pipes (4 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      18, // 16 pipes (4 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      18, // 16 pipes (4 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      18, // 16 pipes (4 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      19, // 4 pipes (8 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      19, // 4 pipes (8 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      19, // 4 pipes (8 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      19, // 4 pipes (8 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      20, // 8 pipes (8 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      20, // 8 pipes (8 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      20, // 8 pipes (8 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      20, // 8 pipes (8 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      21, // 16 pipes (8 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      21, // 16 pipes (8 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      21, // 16 pipes (8 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      21, // 16 pipes (8 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      22, // 32 pipes (8 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      22, // 32 pipes (8 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      22, // 32 pipes (8 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      22, // 32 pipes (8 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      23, // 8 pipes (16 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      23, // 8 pipes (16 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      23, // 8 pipes (16 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      23, // 8 pipes (16 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      24, // 16 pipes (16 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      24, // 16 pipes (16 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      24, // 16 pipes (16 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      24, // 16 pipes (16 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      25, // 32 pipes (16 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      25, // 32 pipes (16 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      25, // 32 pipes (16 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      25, // 32 pipes (16 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      26, // 64 pipes (16 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      26, // 64 pipes (16 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      26, // 64 pipes (16 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      26, // 64 pipes (16 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      27, // 16 pipes (32 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      27, // 16 pipes (32 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      27, // 16 pipes (32 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      27, // 16 pipes (32 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      28, // 32 pipes (32 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      28, // 32 pipes (32 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      28, // 32 pipes (32 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      28, // 32 pipes (32 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
      29, // 64 pipes (32 PKRs) 1xaa pa @ HTILE_64K @ RbPlus
      29, // 64 pipes (32 PKRs) 2xaa pa @ HTILE_64K @ RbPlus
      29, // 64 pipes (32 PKRs) 4xaa pa @ HTILE_64K @ RbPlus
      29, // 64 pipes (32 PKRs) 8xaa pa @ HTILE_64K @ RbPlus
};

const UINT_8 GFX10_CMASK_64K_RBPLUS_PATIDX[] =
{
       0, // 1 bpe ua @ CMASK_64K @ RbPlus
       0, // 2 bpe ua @ CMASK_64K @ RbPlus
       0, // 4 bpe ua @ CMASK_64K @ RbPlus
       0, // 8 bpe ua @ CMASK_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
       0, // 1 pipes (1-2 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (1-2 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (1-2 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (1-2 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (1-2 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
       9, // 4 pipes (1-2 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
       9, // 4 pipes (1-2 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
       9, // 4 pipes (1-2 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
       9, // 4 pipes (1-2 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      10, // 8 pipes (1-2 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      10, // 8 pipes (1-2 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      10, // 8 pipes (1-2 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      10, // 8 pipes (1-2 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (4 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (4 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (4 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
       8, // 2 pipes (4 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      11, // 4 pipes (4 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      11, // 4 pipes (4 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      11, // 4 pipes (4 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      11, // 4 pipes (4 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      12, // 8 pipes (4 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      12, // 8 pipes (4 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      12, // 8 pipes (4 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      12, // 8 pipes (4 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      13, // 16 pipes (4 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      13, // 16 pipes (4 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      13, // 16 pipes (4 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      13, // 16 pipes (4 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      14, // 4 pipes (8 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      14, // 4 pipes (8 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      14, // 4 pipes (8 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      14, // 4 pipes (8 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      15, // 8 pipes (8 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      15, // 8 pipes (8 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      15, // 8 pipes (8 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      16, // 8 pipes (8 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      15, // 16 pipes (8 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      15, // 16 pipes (8 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      15, // 16 pipes (8 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      17, // 16 pipes (8 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      18, // 32 pipes (8 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      18, // 32 pipes (8 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      18, // 32 pipes (8 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      19, // 32 pipes (8 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      20, // 8 pipes (16 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      20, // 8 pipes (16 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      20, // 8 pipes (16 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      21, // 8 pipes (16 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      22, // 16 pipes (16 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      22, // 16 pipes (16 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      22, // 16 pipes (16 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      23, // 16 pipes (16 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      22, // 32 pipes (16 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      22, // 32 pipes (16 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      22, // 32 pipes (16 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      24, // 32 pipes (16 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      25, // 64 pipes (16 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      25, // 64 pipes (16 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      25, // 64 pipes (16 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      32, // 64 pipes (16 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      27, // 16 pipes (32 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      27, // 16 pipes (32 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      27, // 16 pipes (32 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      28, // 16 pipes (32 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      29, // 32 pipes (32 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      29, // 32 pipes (32 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      29, // 32 pipes (32 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      33, // 32 pipes (32 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
      29, // 64 pipes (32 PKRs) 1 bpe pa @ CMASK_64K @ RbPlus
      29, // 64 pipes (32 PKRs) 2 bpe pa @ CMASK_64K @ RbPlus
      29, // 64 pipes (32 PKRs) 4 bpe pa @ CMASK_64K @ RbPlus
      34, // 64 pipes (32 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus
};

const UINT_8 GFX10_CMASK_VAR_RBPLUS_PATIDX[] =
{
       0, // 1 bpe ua @ CMASK_VAR @ RbPlus
       0, // 2 bpe ua @ CMASK_VAR @ RbPlus
       0, // 4 bpe ua @ CMASK_VAR @ RbPlus
       0, // 8 bpe ua @ CMASK_VAR @ RbPlus
       0, // 1 pipes (1-2 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
       0, // 1 pipes (1-2 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
       0, // 1 pipes (1-2 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
       0, // 1 pipes (1-2 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (1-2 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (1-2 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (1-2 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (1-2 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
       9, // 4 pipes (1-2 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
       9, // 4 pipes (1-2 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
       9, // 4 pipes (1-2 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
       9, // 4 pipes (1-2 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      10, // 8 pipes (1-2 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      10, // 8 pipes (1-2 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      10, // 8 pipes (1-2 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      10, // 8 pipes (1-2 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (4 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (4 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (4 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
       8, // 2 pipes (4 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      11, // 4 pipes (4 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      11, // 4 pipes (4 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      11, // 4 pipes (4 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      11, // 4 pipes (4 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      12, // 8 pipes (4 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      12, // 8 pipes (4 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      12, // 8 pipes (4 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      12, // 8 pipes (4 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      13, // 16 pipes (4 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      13, // 16 pipes (4 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      13, // 16 pipes (4 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      13, // 16 pipes (4 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      14, // 4 pipes (8 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      14, // 4 pipes (8 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      14, // 4 pipes (8 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      14, // 4 pipes (8 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      15, // 8 pipes (8 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      15, // 8 pipes (8 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      15, // 8 pipes (8 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      16, // 8 pipes (8 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      15, // 16 pipes (8 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      15, // 16 pipes (8 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      15, // 16 pipes (8 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      17, // 16 pipes (8 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      18, // 32 pipes (8 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      18, // 32 pipes (8 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      18, // 32 pipes (8 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      19, // 32 pipes (8 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      20, // 8 pipes (16 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      20, // 8 pipes (16 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      20, // 8 pipes (16 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      21, // 8 pipes (16 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      22, // 16 pipes (16 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      22, // 16 pipes (16 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      22, // 16 pipes (16 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      23, // 16 pipes (16 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      22, // 32 pipes (16 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      22, // 32 pipes (16 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      22, // 32 pipes (16 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      24, // 32 pipes (16 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      25, // 64 pipes (16 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      25, // 64 pipes (16 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      25, // 64 pipes (16 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      26, // 64 pipes (16 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      27, // 16 pipes (32 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      27, // 16 pipes (32 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      27, // 16 pipes (32 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      28, // 16 pipes (32 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      29, // 32 pipes (32 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      29, // 32 pipes (32 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      29, // 32 pipes (32 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      30, // 32 pipes (32 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
      29, // 64 pipes (32 PKRs) 1 bpe pa @ CMASK_VAR @ RbPlus
      29, // 64 pipes (32 PKRs) 2 bpe pa @ CMASK_VAR @ RbPlus
      29, // 64 pipes (32 PKRs) 4 bpe pa @ CMASK_VAR @ RbPlus
      31, // 64 pipes (32 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus
};

const UINT_64 GFX10_DCC_64K_R_X_SW_PATTERN[][17] =
{
    {0,             X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            0,             0,             0,             0,             }, //0
    {0,             Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            0,             0,             0,             0,             }, //1
    {0,             X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            0,             0,             0,             0,             }, //2
    {0,             Y2,            X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            0,             0,             0,             0,             }, //3
    {0,             X2,            Y2,            X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            0,             0,             0,             0,             }, //4
    {0,             X3^Y3,         X4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            0,             0,             0,             0,             }, //5
    {0,             X3^Y3,         X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            0,             0,             0,             0,             }, //6
    {0,             X3^Y3,         X4^Y4,         X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            0,             0,             0,             0,             }, //7
    {0,             X4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Z0^X3^Y3,      Y8,            X9,            Y9,            0,             0,             0,             0,             }, //8
    {0,             Y4,            X4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Z0^X3^Y3,      X8,            Y8,            X9,            0,             0,             0,             0,             }, //9
    {0,             X3,            Y4,            X4,            X5,            Y5,            X6,            Y6,            X7,            Z0^X3^Y3,      Y7,            X8,            Y8,            0,             0,             0,             0,             }, //10
    {0,             Y2,            X3,            Y4,            X4,            X5,            Y5,            X6,            Y6,            Z0^X3^Y3,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //11
    {0,             X2,            Y2,            X3,            Y4,            X4,            X5,            Y5,            X6,            Z0^X3^Y3,      Y6,            X7,            Y7,            0,             0,             0,             0,             }, //12
    {0,             X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Z1^X3^Y3,      Z0^X4^Y4,      X9,            Y9,            0,             0,             0,             0,             }, //13
    {0,             Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Z1^X3^Y3,      Z0^X4^Y4,      Y8,            X9,            0,             0,             0,             0,             }, //14
    {0,             X3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Z1^X3^Y3,      Z0^X4^Y4,      X8,            Y8,            0,             0,             0,             0,             }, //15
    {0,             Y2,            X3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Z1^X3^Y3,      Z0^X4^Y4,      Y7,            X8,            0,             0,             0,             0,             }, //16
    {0,             X2,            Y2,            X3,            Y4,            X5,            Y5,            X6,            Y6,            Z1^X3^Y3,      Z0^X4^Y4,      X7,            Y7,            0,             0,             0,             0,             }, //17
    {0,             Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      Y9,            0,             0,             0,             0,             }, //18
    {0,             Y4,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      X9,            0,             0,             0,             0,             }, //19
    {0,             X3,            Y4,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      Y8,            0,             0,             0,             0,             }, //20
    {0,             Y2,            X3,            Y4,            Y5,            X6,            Y6,            X7,            Y7,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      X8,            0,             0,             0,             0,             }, //21
    {0,             X2,            Y2,            X3,            Y4,            Y5,            X6,            Y6,            X7,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      Y7,            0,             0,             0,             0,             }, //22
    {0,             X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //23
    {0,             Y4,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //24
    {0,             X3,            Y4,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //25
    {0,             Y2,            X3,            Y4,            X6,            Y6,            X7,            Y7,            X8,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //26
    {0,             X2,            Y2,            X3,            Y4,            X6,            Y6,            X7,            Y7,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //27
    {0,             Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //28
    {0,             Y4,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //29
    {0,             X3,            Y4,            Y6,            X7,            Y7,            X8,            Y8,            X9,            X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //30
    {0,             Y2,            X3,            Y4,            Y6,            X7,            Y7,            X8,            Y8,            X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //31
    {0,             X2,            X3,            Y4,            Y6,            X7,            Y7,            Y2,            X8,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      Y2^X6^Y6,      0,             0,             0,             }, //32
    {0,             X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y10,           X3^Y3^Z5,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //33
    {0,             Y4,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           X3^Y3^Z5,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //34
    {0,             X3,            Y4,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X3^Y3^Z5,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //35
    {0,             X3,            Y4,            X7,            Y7,            X8,            Y8,            Y2,            X9,            X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y2^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //36
    {0,             X3,            Y4,            X7,            Y7,            X8,            Y8,            X2,            Y2,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      Y2^Y6^X7,      X2^X6^Y7,      0,             0,             }, //37
    {0,             Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Z0^X4^Y4,      Y8,            X9,            Y9,            0,             0,             0,             0,             }, //38
    {0,             Y3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Z0^X4^Y4,      X8,            Y8,            X9,            0,             0,             0,             0,             }, //39
    {0,             X3,            Y3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Z0^X4^Y4,      Y7,            X8,            Y8,            0,             0,             0,             0,             }, //40
    {0,             Y2,            X3,            Y3,            Y4,            X5,            Y5,            X6,            Y6,            Z0^X4^Y4,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //41
    {0,             X2,            Y2,            X3,            Y3,            Y4,            X5,            Y5,            X6,            Z0^X4^Y4,      Y6,            X7,            Y7,            0,             0,             0,             0,             }, //42
    {0,             X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X5^Y5,      Z0^X4^Y4,      X9,            Y9,            0,             0,             0,             0,             }, //43
    {0,             Y3,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X5^Y5,      Z0^X4^Y4,      Y8,            X9,            0,             0,             0,             0,             }, //44
    {0,             X3,            Y3,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Y4^X5^Y5,      Z0^X4^Y4,      X8,            Y8,            0,             0,             0,             0,             }, //45
    {0,             Y2,            X3,            Y3,            X5,            Y5,            X6,            Y6,            X7,            Y4^X5^Y5,      Z0^X4^Y4,      Y7,            X8,            0,             0,             0,             0,             }, //46
    {0,             X2,            Y2,            X3,            Y3,            X5,            Y5,            X6,            Y6,            Y4^X5^Y5,      Z0^X4^Y4,      X7,            Y7,            0,             0,             0,             0,             }, //47
    {0,             Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      Y9,            0,             0,             0,             0,             }, //48
    {0,             Y3,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      X9,            0,             0,             0,             0,             }, //49
    {0,             X3,            Y3,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      Y8,            0,             0,             0,             0,             }, //50
    {0,             Y2,            X3,            Y3,            Y5,            X6,            Y6,            X7,            Y7,            Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      X8,            0,             0,             0,             0,             }, //51
    {0,             X2,            Y2,            X3,            Y3,            Y5,            X6,            Y6,            X7,            Y4^X5^Y5,      Z0^X4^Y4,      X5^X6^Y6,      Y7,            0,             0,             0,             0,             }, //52
    {0,             X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         Y9,            0,             0,             0,             0,             }, //53
    {0,             Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         X9,            0,             0,             0,             0,             }, //54
    {0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         Y8,            0,             0,             0,             0,             }, //55
    {0,             Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         X8,            0,             0,             0,             0,             }, //56
    {0,             X2,            Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         Y7,            0,             0,             0,             0,             }, //57
    {0,             X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y9,            0,             0,             0,             0,             }, //58
    {0,             Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X9,            0,             0,             0,             0,             }, //59
    {0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y8,            0,             0,             0,             0,             }, //60
    {0,             Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X8,            0,             0,             0,             0,             }, //61
    {0,             X2,            Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y7,            0,             0,             0,             0,             }, //62
    {0,             X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^X7^Y7,      0,             0,             0,             0,             }, //63
    {0,             Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^X7^Y7,      0,             0,             0,             0,             }, //64
    {0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^X7^Y7,      0,             0,             0,             0,             }, //65
    {0,             Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^X7^Y7,      0,             0,             0,             0,             }, //66
    {0,             X2,            Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^X7^Y7,      0,             0,             0,             0,             }, //67
    {0,             X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //68
    {0,             Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //69
    {0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //70
    {0,             Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //71
    {0,             X2,            Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //72
    {0,             X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //73
    {0,             Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //74
    {0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //75
    {0,             Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //76
    {0,             X2,            Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //77
    {0,             Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X6^X8^Y8,      0,             0,             0,             }, //78
    {0,             Y3,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X6^X8^Y8,      0,             0,             0,             }, //79
    {0,             X3,            Y3,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X6^X8^Y8,      0,             0,             0,             }, //80
    {0,             Y2,            X3,            Y3,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X6^X8^Y8,      0,             0,             0,             }, //81
    {0,             X2,            Y2,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X3^X8^Y8,      0,             0,             0,             }, //82
    {0,             X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      X6^Y6,         0,             0,             0,             }, //83
    {0,             Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      X6^Y6,         0,             0,             0,             }, //84
    {0,             X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      X6^Y6,         0,             0,             0,             }, //85
    {0,             Y2,            X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      X6^Y6,         0,             0,             0,             }, //86
    {0,             X2,            X3,            Y3,            X6,            X7,            Y7,            Y2,            X8,            Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      X6^Y6,         0,             0,             0,             }, //87
    {0,             X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //88
    {0,             Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //89
    {0,             X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //90
    {0,             Y2,            X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //91
    {0,             X2,            X3,            Y3,            X6,            X7,            Y7,            Y2,            X8,            Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      Y2^X6^Y6,      0,             0,             0,             }, //92
    {0,             X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y10,           Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X6^X9^Y9,      0,             0,             }, //93
    {0,             Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X6^X9^Y9,      0,             0,             }, //94
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X6^X9^Y9,      0,             0,             }, //95
    {0,             Y2,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X3^X9^Y9,      0,             0,             }, //96
    {0,             X2,            Y3,            X6,            X7,            Y7,            X8,            Y2,            Y8,            Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      Y2^X6^Y6,      X3^X9^Y9,      0,             0,             }, //97
    {0,             X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y10,           Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      X6^Y7,         0,             0,             }, //98
    {0,             Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      X6^Y7,         0,             0,             }, //99
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      X6^Y7,         0,             0,             }, //100
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y2,            X9,            Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y2^Y6^X7,      X6^Y7,         0,             0,             }, //101
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X2,            Y2,            Y4^X9^Y9,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      Y2^Y6^X7,      X6^Y7,         0,             0,             }, //102
    {0,             X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y10,           Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //103
    {0,             Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //104
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //105
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y2,            X9,            Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y2^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //106
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X2,            Y2,            Y4^X9^Y9,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      Y2^Y6^X7,      X2^X6^Y7,      0,             0,             }, //107
};

const UINT_64 GFX10_HTILE_SW_PATTERN[][18] =
{
    {0,             0,             0,             X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            0,             0,             0,             0,             0,             }, //0
    {0,             0,             0,             X3,            Y4,            X4,            X5,            Y5,            X6,            Z0^X3^Y3,      Y6,            X7,            Y7,            0,             0,             0,             0,             0,             }, //1
    {0,             0,             0,             X3,            Y4,            X5,            Y5,            X6,            Y6,            Z1^X3^Y3,      Z0^X4^Y4,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //2
    {0,             0,             0,             X3,            Y4,            Y5,            X6,            Y6,            X7,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      Y7,            X8,            Y8,            0,             0,             0,             }, //3
    {0,             0,             0,             X3,            Y4,            X6,            Y6,            X7,            Y7,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X8,            Y8,            X9,            0,             0,             }, //4
    {0,             0,             0,             X3,            Y4,            X6,            Y6,            X7,            Y7,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X8,            Y8,            X9,            0,             0,             }, //5
    {0,             0,             0,             X3,            Y4,            Y6,            X7,            Y7,            X8,            X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      Y8,            X9,            Y9,            0,             }, //6
    {0,             0,             0,             X3,            Y4,            Y6,            X7,            Y7,            X8,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      X6^Y6,         Y8,            X9,            Y9,            0,             }, //7
    {0,             0,             0,             X3,            Y4,            Y6,            X7,            Y7,            X8,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         Y8,            X9,            Y9,            0,             }, //8
    {0,             0,             0,             X3,            Y4,            X7,            Y7,            X8,            Y8,            X3^Y3^Z5,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      X9,            Y9,            X10,           }, //9
    {0,             0,             0,             X3,            Y4,            X7,            Y7,            X8,            Y8,            X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Z0^Y6^X7,      X6^Y7,         X9,            Y9,            X10,           }, //10
    {0,             0,             0,             X3,            Y4,            X7,            Y7,            X8,            Y8,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      Y6^X7,         X6^Y7,         X9,            Y9,            X10,           }, //11
    {0,             0,             0,             X3,            Y4,            X7,            Y7,            X8,            Y8,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X6^Y7,         X9,            Y9,            X10,           }, //12
    {0,             0,             0,             X3,            Y3,            Y4,            X5,            Y5,            X6,            Z0^X4^Y4,      Y6,            X7,            Y7,            0,             0,             0,             0,             0,             }, //13
    {0,             0,             0,             X3,            Y3,            X5,            Y5,            X6,            Y6,            Y4^X5^Y5,      Z0^X4^Y4,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //14
    {0,             0,             0,             X3,            Y3,            Y5,            X6,            Y6,            X7,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y7,            X8,            Y8,            0,             0,             0,             }, //15
    {0,             0,             0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      Y7,            X8,            Y8,            X5^Y5,         0,             0,             0,             }, //16
    {0,             0,             0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y7,            X8,            Y8,            0,             0,             0,             }, //17
    {0,             0,             0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         X8,            Y8,            X9,            0,             0,             }, //18
    {0,             0,             0,             X3,            Y3,            Y4,            X5,            X6,            Y6,            Z1^X4^Y4,      Z0^X5^Y5,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //19
    {0,             0,             0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X8,            Y8,            X9,            X5^Y6,         0,             0,             }, //20
    {0,             0,             0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X8,            Y8,            X9,            0,             0,             }, //21
    {0,             0,             0,             X3,            Y3,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X6^Y6,         Y8,            X9,            Y9,            0,             }, //22
    {0,             0,             0,             X3,            Y3,            Y4,            X6,            Y6,            X7,            Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         Y7,            X8,            Y8,            0,             0,             0,             }, //23
    {0,             0,             0,             X3,            Y3,            X6,            X7,            Y7,            X8,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         Y8,            X9,            Y9,            X6^Y6,         0,             }, //24
    {0,             0,             0,             X3,            Y3,            X6,            X7,            Y7,            X8,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         Y8,            X9,            Y9,            0,             }, //25
    {0,             0,             0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X6^Y8,         X9,            Y9,            X10,           }, //26
    {0,             0,             0,             X3,            Y3,            Y4,            X6,            X7,            Y7,            Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X8,            Y8,            X9,            0,             0,             }, //27
    {0,             0,             0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X9,            Y9,            X10,           X6^Y7,         }, //28
    {0,             0,             0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X6^Y7,         X9,            Y9,            X10,           }, //29
};

const UINT_64 GFX10_CMASK_SW_PATTERN[][17] =
{
    {X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            0,             0,             0,             0,             }, //0
    {X3,            Y4,            X4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Z0^X3^Y3,      X8,            Y8,            X9,            0,             0,             0,             0,             }, //1
    {X3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Z1^X3^Y3,      Z0^X4^Y4,      Y8,            X9,            0,             0,             0,             0,             }, //2
    {X3,            Y4,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Z2^X3^Y3,      Z1^X4^Y4,      Z0^X5^Y5,      X9,            0,             0,             0,             0,             }, //3
    {X3,            Y4,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            X3^Y3^Z3,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //4
    {X3,            Y4,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //5
    {X3,            Y4,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           X3^Y3^Z5,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //6
    {X3,            Y4,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           X3^Y3^Z4,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y6^X7,         Z0^X6^Y7,      0,             0,             }, //7
    {X3,            Y3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Z0^X4^Y4,      X8,            Y8,            X9,            0,             0,             0,             0,             }, //8
    {X3,            Y3,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X5^Y5,      Z0^X4^Y4,      Y8,            X9,            0,             0,             0,             0,             }, //9
    {X3,            Y3,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X9,            0,             0,             0,             0,             }, //10
    {X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         X9,            0,             0,             0,             0,             }, //11
    {X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X9,            0,             0,             0,             0,             }, //12
    {X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         0,             0,             0,             0,             }, //13
    {X3,            Y3,            Y4,            X5,            X6,            Y6,            X7,            Y7,            X8,            Z1^X4^Y4,      Z0^X5^Y5,      Y8,            X9,            0,             0,             0,             0,             }, //14
    {X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //15
    {X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //16
    {X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //17
    {X3,            Y3,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X6^Y6,         0,             0,             0,             }, //18
    {X3,            Y3,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X6^Y6,         0,             0,             0,             }, //19
    {X3,            Y3,            Y4,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X9,            0,             0,             0,             0,             }, //20
    {X3,            Y3,            Y4,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X9,            0,             0,             0,             0,             }, //21
    {X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         0,             0,             0,             }, //22
    {X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      X6^Y6,         0,             0,             0,             }, //23
    {X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //24
    {X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X6^Y8,         0,             0,             }, //25
    {X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X6^Y8,         0,             0,             }, //26
    {X3,            Y3,            Y4,            X6,            X7,            Y7,            X8,            Y8,            X9,            Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         0,             0,             0,             0,             }, //27
    {X3,            Y3,            Y4,            X6,            X7,            Y7,            X8,            Y8,            X9,            Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             0,             }, //28
    {X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X6^Y7,         0,             0,             }, //29
    {X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      X6^Y7,         0,             0,             }, //30
    {X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //31
    {X3,            Y3,            X6,            X7,            Y7,            X8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X3^Y8,         0,             0,             }, //32
    {X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y6^X7,         X6^Y7,         0,             0,             }, //33
    {X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y6^X7,         Z0^X6^Y7,      0,             0,             }, //34
};

}// V2
} // Addr
} // namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx10addrlib.cpp
* @brief Contain the implementation for the Gfx10Lib class.
************************************************************************************************************************
*/

#include "gfx10addrlib.h"
#include "addrcommon.h"
#include "gfx10_gb_reg.h"

#include "amdgpu_asic_addr.h"

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace rocr {
namespace Addr
{
/**
************************************************************************************************************************
*   Gfx10HwlInit
*
*   @brief
*       Creates an Gfx10Lib object.
*
*   @return
*       Returns an Gfx10Lib object pointer.
************************************************************************************************************************
*/
Addr::Lib* Gfx10HwlInit(const Client* pClient)
{
    return V2::Gfx10Lib::CreateObj(pClient);
}

namespace V2
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Static Const Member
////////////////////////////////////////////////////////////////////////////////////////////////////

const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
{//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
    {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
    {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
    {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
    {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
    {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
    {{0,    0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_4KB_R_X

    {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
    {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X

    {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_VAR_Z_X
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_VAR_R_X
    {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
};

const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};

const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};

/**
************************************************************************************************************************
*   Gfx10Lib::Gfx10Lib
*
*   @brief
*       Constructor
*
************************************************************************************************************************
*/
Gfx10Lib::Gfx10Lib(const Client* pClient)
    :
    Lib(pClient),
    m_numPkrLog2(0),
    m_numSaLog2(0),
    m_colorBaseIndex(0),
    m_xmaskBaseIndex(0),
    m_htileBaseIndex(0),
    m_dccBaseIndex(0)
{
    memset(&m_settings, 0, sizeof(m_settings));
    memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
}

/**
************************************************************************************************************************
*   Gfx10Lib::~Gfx10Lib
*
*   @brief
*       Destructor
************************************************************************************************************************
*/
Gfx10Lib::~Gfx10Lib()
{
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeHtileInfo
*
*   @brief
*       Interface function stub of AddrComputeHtilenfo
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
    const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
         ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
        (pIn->hTileFlags.pipeAligned != TRUE))
    {
        ret = ADDR_INVALIDPARAMS;
    }
    else
    {
        Dim3d         metaBlk     = {};
        const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
                                                   ADDR_RSRC_TEX_2D,
                                                   pIn->swizzleMode,
                                                   0,
                                                   0,
                                                   TRUE,
                                                   &metaBlk);

        pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
        pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
        pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
        pOut->metaBlkWidth  = metaBlk.w;
        pOut->metaBlkHeight = metaBlk.h;

        if (pIn->numMipLevels > 1)
        {
            ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);

            UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;

            for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
            {
                UINT_32 mipWidth, mipHeight;

                GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);

                mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
                mipHeight = PowTwoAlign(mipHeight, metaBlk.h);

                const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
                const UINT_32 heightInM    = mipHeight / metaBlk.h;
                const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[i].inMiptail = FALSE;
                    pOut->pMipInfo[i].offset    = offset;
                    pOut->pMipInfo[i].sliceSize = mipSliceSize;
                }

                offset += mipSliceSize;
            }

            pOut->sliceSize          = offset;
            pOut->metaBlkNumPerSlice = offset / metaBlkSize;
            pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;

            if (pOut->pMipInfo != NULL)
            {
                for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
                {
                    pOut->pMipInfo[i].inMiptail = TRUE;
                    pOut->pMipInfo[i].offset    = 0;
                    pOut->pMipInfo[i].sliceSize = 0;
                }

                if (pIn->firstMipIdInTail != pIn->numMipLevels)
                {
                    pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
                }
            }
        }
        else
        {
            const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
            const UINT_32 heightInM = pOut->height / metaBlk.h;

            pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
            pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
            pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;

            if (pOut->pMipInfo != NULL)
            {
                pOut->pMipInfo[0].inMiptail = FALSE;
                pOut->pMipInfo[0].offset    = 0;
                pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
            }
        }

        // Get the HTILE address equation (copied from HtileAddrFromCoord).
        // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
        const UINT_32 index = m_xmaskBaseIndex;
        const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;

        ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
        pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeCmaskInfo
*
*   @brief
*       Interface function stub of AddrComputeCmaskInfo
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
    const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
        (pIn->cMaskFlags.pipeAligned != TRUE)   ||
        ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
         ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
    {
        ret = ADDR_INVALIDPARAMS;
    }
    else
    {
        Dim3d         metaBlk     = {};
        const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
                                                   ADDR_RSRC_TEX_2D,
                                                   pIn->swizzleMode,
                                                   0,
                                                   0,
                                                   TRUE,
                                                   &metaBlk);

        pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
        pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
        pOut->baseAlign     = metaBlkSize;
        pOut->metaBlkWidth  = metaBlk.w;
        pOut->metaBlkHeight = metaBlk.h;

        if (pIn->numMipLevels > 1)
        {
            ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);

            UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;

            for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
            {
                UINT_32 mipWidth, mipHeight;

                GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);

                mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
                mipHeight = PowTwoAlign(mipHeight, metaBlk.h);

                const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
                const UINT_32 heightInM = mipHeight / metaBlk.h;

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[i].inMiptail = FALSE;
                    pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
                    pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
                }

                metaBlkPerSlice += pitchInM * heightInM;
            }

            pOut->metaBlkNumPerSlice = metaBlkPerSlice;

            if (pOut->pMipInfo != NULL)
            {
                for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
                {
                    pOut->pMipInfo[i].inMiptail = TRUE;
                    pOut->pMipInfo[i].offset    = 0;
                    pOut->pMipInfo[i].sliceSize = 0;
                }

                if (pIn->firstMipIdInTail != pIn->numMipLevels)
                {
                    pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
                }
            }
        }
        else
        {
            const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
            const UINT_32 heightInM = pOut->height / metaBlk.h;

            pOut->metaBlkNumPerSlice = pitchInM * heightInM;

            if (pOut->pMipInfo != NULL)
            {
                pOut->pMipInfo[0].inMiptail = FALSE;
                pOut->pMipInfo[0].offset    = 0;
                pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
            }
        }

        pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
        pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;

        // Get the CMASK address equation (copied from CmaskAddrFromCoord)
        const UINT_32  fmaskBpp      = GetFmaskBpp(1, 1);
        const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
        const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
        const UINT_8*  patIdxTable   =
            (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
            (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);

        ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
        pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeDccInfo
*
*   @brief
*       Interface function to compute DCC key info
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
    const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
    {
        // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
        // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
        ret = ADDR_INVALIDPARAMS;
    }
    else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
    {
        // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
        ret = ADDR_INVALIDPARAMS;
    }
    else
    {
        const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);

        {
            // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
            ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));

            const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);

            pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
            pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
            pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
        }

        if (ret == ADDR_OK)
        {
            Dim3d         metaBlk     = {};
            const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
            const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
                                                       pIn->resourceType,
                                                       pIn->swizzleMode,
                                                       elemLog2,
                                                       numFragLog2,
                                                       pIn->dccKeyFlags.pipeAligned,
                                                       &metaBlk);

            pOut->dccRamBaseAlign   = metaBlkSize;
            pOut->metaBlkWidth      = metaBlk.w;
            pOut->metaBlkHeight     = metaBlk.h;
            pOut->metaBlkDepth      = metaBlk.d;
            pOut->metaBlkSize       = metaBlkSize;

            pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
            pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
            pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);

            if (pIn->numMipLevels > 1)
            {
                ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);

                UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;

                for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
                {
                    UINT_32 mipWidth, mipHeight;

                    GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);

                    mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
                    mipHeight = PowTwoAlign(mipHeight, metaBlk.h);

                    const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
                    const UINT_32 heightInM    = mipHeight / metaBlk.h;
                    const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;

                    if (pOut->pMipInfo != NULL)
                    {
                        pOut->pMipInfo[i].inMiptail = FALSE;
                        pOut->pMipInfo[i].offset    = offset;
                        pOut->pMipInfo[i].sliceSize = mipSliceSize;
                    }

                    offset += mipSliceSize;
                }

                pOut->dccRamSliceSize    = offset;
                pOut->metaBlkNumPerSlice = offset / metaBlkSize;
                pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);

                if (pOut->pMipInfo != NULL)
                {
                    for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
                    {
                        pOut->pMipInfo[i].inMiptail = TRUE;
                        pOut->pMipInfo[i].offset    = 0;
                        pOut->pMipInfo[i].sliceSize = 0;
                    }

                    if (pIn->firstMipIdInTail != pIn->numMipLevels)
                    {
                        pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
                    }
                }
            }
            else
            {
                const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
                const UINT_32 heightInM = pOut->height / metaBlk.h;

                pOut->metaBlkNumPerSlice = pitchInM * heightInM;
                pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
                pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[0].inMiptail = FALSE;
                    pOut->pMipInfo[0].offset    = 0;
                    pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
                }
            }

            // Get the DCC address equation (copied from DccAddrFromCoord)
            const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
            const UINT_32 numPipeLog2 = m_pipesLog2;
            UINT_32       index       = m_dccBaseIndex + elemLog2;
            const UINT_8* patIdxTable;

            if (m_settings.supportRbPlus)
            {
                patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;

                if (pIn->dccKeyFlags.pipeAligned)
                {
                    index += MaxNumOfBpp;

                    if (m_numPkrLog2 < 2)
                    {
                        index += m_pipesLog2 * MaxNumOfBpp;
                    }
                    else
                    {
                        // 4 groups for "m_numPkrLog2 < 2" case
                        index += 4 * MaxNumOfBpp;

                        const UINT_32 dccPipePerPkr = 3;

                        index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
                                 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
                    }
                }
            }
            else
            {
                patIdxTable = GFX10_DCC_64K_R_X_PATIDX;

                if (pIn->dccKeyFlags.pipeAligned)
                {
                    index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
                }
                else
                {
                    index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
                }
            }

            ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
            pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeCmaskAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeCmaskAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
{
    // Only support pipe aligned CMask
    ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);

    ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
    input.size            = sizeof(input);
    input.cMaskFlags      = pIn->cMaskFlags;
    input.colorFlags      = pIn->colorFlags;
    input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
    input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
    input.numSlices       = Max(pIn->numSlices,       1u);
    input.swizzleMode     = pIn->swizzleMode;
    input.resourceType    = pIn->resourceType;

    ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
    output.size = sizeof(output);

    ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);

    if (returnCode == ADDR_OK)
    {
        const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
        const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
        const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
        const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
        const UINT_8*  patIdxTable   =
            (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
            (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);

        const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
        const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
        const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
                                                                      blkSizeLog2 + 1, // +1 for nibble offset
                                                                      pIn->x,
                                                                      pIn->y,
                                                                      pIn->slice,
                                                                      0);
        const UINT_32 xb       = pIn->x / output.metaBlkWidth;
        const UINT_32 yb       = pIn->y / output.metaBlkHeight;
        const UINT_32 pb       = output.pitch / output.metaBlkWidth;
        const UINT_32 blkIndex = (yb * pb) + xb;
        const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;

        pOut->addr = (output.sliceSize * pIn->slice) +
                     (blkIndex * (1 << blkSizeLog2)) +
                     ((blkOffset >> 1) ^ pipeXor);
        pOut->bitPosition = (blkOffset & 1) << 2;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeHtileAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeHtileAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->numMipLevels > 1)
    {
        returnCode = ADDR_NOTIMPLEMENTED;
    }
    else
    {
        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
        input.size            = sizeof(input);
        input.hTileFlags      = pIn->hTileFlags;
        input.depthFlags      = pIn->depthflags;
        input.swizzleMode     = pIn->swizzleMode;
        input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
        input.numSlices       = Max(pIn->numSlices,       1u);
        input.numMipLevels    = 1;

        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
        output.size = sizeof(output);

        returnCode = ComputeHtileInfo(&input, &output);

        if (returnCode == ADDR_OK)
        {
            const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
            const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
            const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
            const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;

            const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
            const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
            const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
                                                                           blkSizeLog2 + 1, // +1 for nibble offset
                                                                           pIn->x,
                                                                           pIn->y,
                                                                           pIn->slice,
                                                                           0);
            const UINT_32 xb       = pIn->x / output.metaBlkWidth;
            const UINT_32 yb       = pIn->y / output.metaBlkHeight;
            const UINT_32 pb       = output.pitch / output.metaBlkWidth;
            const UINT_32 blkIndex = (yb * pb) + xb;
            const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;

            pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
                         (blkIndex * (1 << blkSizeLog2)) +
                         ((blkOffset >> 1) ^ pipeXor);
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeHtileCoordFromAddr
*
*   @brief
*       Interface function stub of AddrComputeHtileCoordFromAddr
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
{
    ADDR_NOT_IMPLEMENTED();

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlSupportComputeDccAddrFromCoord
*
*   @brief
*       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
        (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
        (pIn->dccKeyFlags.linear == TRUE)             ||
        (pIn->numFrags           >  1)                ||
        (pIn->numMipLevels       >  1)                ||
        (pIn->mipId              >  0))
    {
        returnCode = ADDR_NOTSUPPORTED;
    }
    else if ((pIn->pitch == 0)         ||
             (pIn->metaBlkWidth == 0)  ||
             (pIn->metaBlkHeight == 0) ||
             (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
    {
        returnCode = ADDR_NOTSUPPORTED;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeDccAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeDccAddrFromCoord
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
{
    const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
    const UINT_32 numPipeLog2 = m_pipesLog2;
    const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
    UINT_32       index       = m_dccBaseIndex + elemLog2;
    const UINT_8* patIdxTable;

    if (m_settings.supportRbPlus)
    {
        patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;

        if (pIn->dccKeyFlags.pipeAligned)
        {
            index += MaxNumOfBpp;

            if (m_numPkrLog2 < 2)
            {
                index += m_pipesLog2 * MaxNumOfBpp;
            }
            else
            {
                // 4 groups for "m_numPkrLog2 < 2" case
                index += 4 * MaxNumOfBpp;

                const UINT_32 dccPipePerPkr = 3;

                index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
                         (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
            }
        }
    }
    else
    {
        patIdxTable = GFX10_DCC_64K_R_X_PATIDX;

        if (pIn->dccKeyFlags.pipeAligned)
        {
            index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
        }
        else
        {
            index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
        }
    }

    const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
    const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
    const UINT_32  blkOffset   =
        ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
                                        blkSizeLog2 + 1, // +1 for nibble offset
                                        pIn->x,
                                        pIn->y,
                                        pIn->slice,
                                        0);
    const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
    const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
    const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
    const UINT_32 blkIndex = (yb * pb) + xb;
    const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;

    pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
                 (blkIndex * (1 << blkSizeLog2)) +
                 ((blkOffset >> 1) ^ pipeXor);
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlInitGlobalParams
*
*   @brief
*       Initializes global parameters
*
*   @return
*       TRUE if all settings are valid
*
************************************************************************************************************************
*/
BOOL_32 Gfx10Lib::HwlInitGlobalParams(
    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
{
    BOOL_32              valid = TRUE;
    GB_ADDR_CONFIG_GFX10 gbAddrConfig;

    gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;

    // These values are copied from CModel code
    switch (gbAddrConfig.bits.NUM_PIPES)
    {
        case ADDR_CONFIG_1_PIPE:
            m_pipes     = 1;
            m_pipesLog2 = 0;
            break;
        case ADDR_CONFIG_2_PIPE:
            m_pipes     = 2;
            m_pipesLog2 = 1;
            break;
        case ADDR_CONFIG_4_PIPE:
            m_pipes     = 4;
            m_pipesLog2 = 2;
            break;
        case ADDR_CONFIG_8_PIPE:
            m_pipes     = 8;
            m_pipesLog2 = 3;
            break;
        case ADDR_CONFIG_16_PIPE:
            m_pipes     = 16;
            m_pipesLog2 = 4;
            break;
        case ADDR_CONFIG_32_PIPE:
            m_pipes     = 32;
            m_pipesLog2 = 5;
            break;
        case ADDR_CONFIG_64_PIPE:
            m_pipes     = 64;
            m_pipesLog2 = 6;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
            break;
    }

    switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
    {
        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
            m_pipeInterleaveLog2  = 8;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
            m_pipeInterleaveLog2  = 9;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
            m_pipeInterleaveLog2  = 10;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
            m_pipeInterleaveLog2  = 11;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
            break;
    }

    // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
    // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
    // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
    ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);

    switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
    {
        case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
            m_maxCompFrag     = 1;
            m_maxCompFragLog2 = 0;
            break;
        case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
            m_maxCompFrag     = 2;
            m_maxCompFragLog2 = 1;
            break;
        case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
            m_maxCompFrag     = 4;
            m_maxCompFragLog2 = 2;
            break;
        case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
            m_maxCompFrag     = 8;
            m_maxCompFragLog2 = 3;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
            break;
    }

    {
        // Skip unaligned case
        m_xmaskBaseIndex += MaxNumOfBppCMask;
        m_htileBaseIndex += MaxNumOfAA;

        m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask;
        m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
        m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;

        if (m_settings.supportRbPlus)
        {
            m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
            m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;

            ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));

            ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
                          sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));

            if (m_numPkrLog2 >= 2)
            {
                m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
                m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask;
                m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
            }
        }
        else
        {
            const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
                                        static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
                                        1;

            ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
            ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) ==
                          (numPipeType + 1) * MaxNumOfBppCMask);
        }
    }

    if (m_settings.supportRbPlus)
    {
        // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
        // corresponding SW_64KB_* mode
        m_blockVarSizeLog2 = m_pipesLog2 + 14;
    }

    if (valid)
    {
        InitEquationTable();
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlConvertChipFamily
*
*   @brief
*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
*   @return
*       ChipFamily
************************************************************************************************************************
*/
ChipFamily Gfx10Lib::HwlConvertChipFamily(
    UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
    UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
{
    ChipFamily family = ADDR_CHIP_FAMILY_NAVI;

    m_settings.dccUnsup3DSwDis  = 1;
    m_settings.dsMipmapHtileFix = 1;

    switch (chipFamily)
    {
        case FAMILY_NV:
            if (ASICREV_IS_NAVI10_P(chipRevision))
            {
                m_settings.dsMipmapHtileFix = 0;
                m_settings.isDcn20          = 1;
            }

            if (ASICREV_IS_NAVI12_P(chipRevision))
            {
                m_settings.isDcn20 = 1;
            }

            if (ASICREV_IS_NAVI14_M(chipRevision))
            {
                m_settings.isDcn20 = 1;
            }

            if (ASICREV_IS_NAVI21_M(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }

            if (ASICREV_IS_NAVI22_P(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }

            if (ASICREV_IS_NAVI23_P(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }

            if (ASICREV_IS_NAVI24_P(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }
            break;

        case FAMILY_VGH:
            if (ASICREV_IS_VANGOGH(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }
            else
            {
                ADDR_ASSERT(!"Unknown chip revision");
            }
            break;
        case FAMILY_RMB:
            if (ASICREV_IS_REMBRANDT(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }
            else
            {
                ADDR_ASSERT(!"Unknown chip revision");
            }
            break;
        case FAMILY_RPL:
            if (ASICREV_IS_RAPHAEL(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }
            break;
        case FAMILY_MDN:
            if (ASICREV_IS_MENDOCINO(chipRevision))
            {
                m_settings.supportRbPlus   = 1;
                m_settings.dccUnsup3DSwDis = 0;
            }
            else
            {
                ADDR_ASSERT(!"Unknown chip revision");
            }
            break;
        default:
            ADDR_ASSERT(!"Unknown chip family");
            break;
    }

    m_configFlags.use32bppFor422Fmt = TRUE;

    return family;
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetBlk256SizeLog2
*
*   @brief
*       Get block 256 size
*
*   @return
*       N/A
************************************************************************************************************************
*/
void Gfx10Lib::GetBlk256SizeLog2(
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2,    ///< [in] number of samples
    Dim3d*           pBlock             ///< [out] block size
    ) const
{
    if (IsThin(resourceType, swizzleMode))
    {
        UINT_32 blockBits = 8 - elemLog2;

        if (IsZOrderSwizzle(swizzleMode))
        {
            blockBits -= numSamplesLog2;
        }

        pBlock->w = (blockBits >> 1) + (blockBits & 1);
        pBlock->h = (blockBits >> 1);
        pBlock->d = 0;
    }
    else
    {
        ADDR_ASSERT(IsThick(resourceType, swizzleMode));

        UINT_32 blockBits = 8 - elemLog2;

        pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
        pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
        pBlock->h = (blockBits / 3);
    }
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetCompressedBlockSizeLog2
*
*   @brief
*       Get compress block size
*
*   @return
*       N/A
************************************************************************************************************************
*/
void Gfx10Lib::GetCompressedBlockSizeLog2(
    Gfx10DataType    dataType,          ///< [in] Data type
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2,    ///< [in] number of samples
    Dim3d*           pBlock             ///< [out] block size
    ) const
{
    if (dataType == Gfx10DataColor)
    {
        GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
    }
    else
    {
        ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
        pBlock->w = 3;
        pBlock->h = 3;
        pBlock->d = 0;
    }
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetMetaOverlapLog2
*
*   @brief
*       Get meta block overlap
*
*   @return
*       N/A
************************************************************************************************************************
*/
INT_32 Gfx10Lib::GetMetaOverlapLog2(
    Gfx10DataType    dataType,          ///< [in] Data type
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2     ///< [in] number of samples
    ) const
{
    Dim3d compBlock;
    Dim3d microBlock;

    GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
    GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);

    const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
    const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
    const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
    const INT_32 numPipesLog2   = GetEffectiveNumPipes();
    INT_32       overlap        = numPipesLog2 - maxSizeLog2;

    if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
    {
        overlap++;
    }

    // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
    if ((elemLog2 == 4) && (numSamplesLog2 == 3))
    {
        overlap--;
    }
    overlap = Max(overlap, 0);
    return overlap;
}

/**
************************************************************************************************************************
*   Gfx10Lib::Get3DMetaOverlapLog2
*
*   @brief
*       Get 3d meta block overlap
*
*   @return
*       N/A
************************************************************************************************************************
*/
INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2           ///< [in] element size log2
    ) const
{
    Dim3d microBlock;
    GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);

    INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);

    if (m_settings.supportRbPlus)
    {
        overlap++;
    }

    if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
    {
        overlap = 0;
    }
    return overlap;
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetPipeRotateAmount
*
*   @brief
*       Get pipe rotate amount
*
*   @return
*       Pipe rotate amount
************************************************************************************************************************
*/

INT_32 Gfx10Lib::GetPipeRotateAmount(
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
    ) const
{
    INT_32 amount = 0;

    if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
    {
        amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
                 1 : m_pipesLog2 - (m_numSaLog2 + 1);
    }

    return amount;
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetMetaBlkSize
*
*   @brief
*       Get metadata block size
*
*   @return
*       Meta block size
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::GetMetaBlkSize(
    Gfx10DataType    dataType,          ///< [in] Data type
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2,    ///< [in] number of samples
    BOOL_32          pipeAlign,         ///< [in] pipe align
    Dim3d*           pBlock             ///< [out] block size
    ) const
{
    INT_32 metablkSizeLog2;

    {
        const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
        const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
        const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
        const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
                                          numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
        const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
        INT_32       numPipesLog2       = m_pipesLog2;

        if (IsThin(resourceType, swizzleMode))
        {
            if ((pipeAlign == FALSE) ||
                (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
                (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
            {
                if (pipeAlign)
                {
                    metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
                    metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
                }
                else
                {
                    metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
                }
            }
            else
            {
                if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
                {
                    numPipesLog2++;
                }

                INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);

                if (numPipesLog2 >= 4)
                {
                    INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);

                    // In 16Bpe 8xaa, we have an extra overlap bit
                    if ((pipeRotateLog2 > 0)  &&
                        (elemLog2 == 4)       &&
                        (numSamplesLog2 == 3) &&
                        (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
                    {
                        overlapLog2++;
                    }

                    metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
                    metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);

                    if (m_settings.supportRbPlus    &&
                        IsRtOptSwizzle(swizzleMode) &&
                        (numPipesLog2 == 6)         &&
                        (numSamplesLog2 == 3)       &&
                        (m_maxCompFragLog2 == 3)    &&
                        (metablkSizeLog2 < 15))
                    {
                        metablkSizeLog2 = 15;
                    }
                }
                else
                {
                    metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
                }

                if (dataType == Gfx10DataDepthStencil)
                {
                    // For htile surfaces, pad meta block size to 2K * num_pipes
                    metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
                }

                const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);

                if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
                {
                    const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);

                    metablkSizeLog2 = Max(metablkSizeLog2, tmp);
                }
            }

            const INT_32 metablkBitsLog2 =
                metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
            pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
            pBlock->h = 1 << (metablkBitsLog2 >> 1);
            pBlock->d = 1;
        }
        else
        {
            ADDR_ASSERT(IsThick(resourceType, swizzleMode));

            if (pipeAlign)
            {
                if (m_settings.supportRbPlus         &&
                    (m_pipesLog2 == m_numSaLog2 + 1) &&
                    (m_pipesLog2 > 1)                &&
                    IsRbAligned(resourceType, swizzleMode))
                {
                    numPipesLog2++;
                }

                const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);

                metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
                metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
                metablkSizeLog2 = Max(metablkSizeLog2, 12);
            }
            else
            {
                metablkSizeLog2 = 12;
            }

            const INT_32 metablkBitsLog2 =
                metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
            pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
            pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
            pBlock->d = 1 << (metablkBitsLog2 / 3);
        }
    }

    return (1 << static_cast<UINT_32>(metablkSizeLog2));
}

/**
************************************************************************************************************************
*   Gfx10Lib::ConvertSwizzlePatternToEquation
*
*   @brief
*       Convert swizzle pattern to equation.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
    UINT_32                elemLog2,  ///< [in] element bytes log2
    AddrResourceType       rsrcType,  ///< [in] resource type
    AddrSwizzleMode        swMode,    ///< [in] swizzle mode
    const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
    ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
    const
{
    // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
    ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
    GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);

    const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
    const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
    memset(pEquation, 0, sizeof(ADDR_EQUATION));
    pEquation->numBits            = blockSizeLog2;
    pEquation->numBitComponents   = pPatInfo->maxItemCount;
    pEquation->stackedDepthSlices = FALSE;

    for (UINT_32 i = 0; i < elemLog2; i++)
    {
        pEquation->addr[i].channel = 0;
        pEquation->addr[i].valid   = 1;
        pEquation->addr[i].index   = i;
    }

    if (IsXor(swMode) == FALSE)
    {
        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
        {
            ADDR_ASSERT(IsPow2(pSwizzle[i].value));

            if (pSwizzle[i].x != 0)
            {
                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));

                pEquation->addr[i].channel = 0;
                pEquation->addr[i].valid   = 1;
                pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
            }
            else if (pSwizzle[i].y != 0)
            {
                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));

                pEquation->addr[i].channel = 1;
                pEquation->addr[i].valid   = 1;
                pEquation->addr[i].index   = Log2(pSwizzle[i].y);
            }
            else
            {
                ADDR_ASSERT(pSwizzle[i].z != 0);
                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));

                pEquation->addr[i].channel = 2;
                pEquation->addr[i].valid   = 1;
                pEquation->addr[i].index   = Log2(pSwizzle[i].z);
            }

            pEquation->xor1[i].value = 0;
            pEquation->xor2[i].value = 0;
        }
    }
    else if (IsThin(rsrcType, swMode))
    {
        Dim3d dim;
        ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);

        const UINT_32 blkXLog2 = Log2(dim.w);
        const UINT_32 blkYLog2 = Log2(dim.h);
        const UINT_32 blkXMask = dim.w - 1;
        const UINT_32 blkYMask = dim.h - 1;

        ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
        UINT_32          xMask = 0;
        UINT_32          yMask = 0;
        UINT_32          bMask = (1 << elemLog2) - 1;

        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
        {
            if (IsPow2(pSwizzle[i].value))
            {
                if (pSwizzle[i].x != 0)
                {
                    ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
                    xMask |= pSwizzle[i].x;

                    const UINT_32 xLog2 = Log2(pSwizzle[i].x);

                    ADDR_ASSERT(xLog2 < blkXLog2);

                    pEquation->addr[i].channel = 0;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = xLog2 + elemLog2;
                }
                else
                {
                    ADDR_ASSERT(pSwizzle[i].y != 0);
                    ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
                    yMask |= pSwizzle[i].y;

                    pEquation->addr[i].channel = 1;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = Log2(pSwizzle[i].y);

                    ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                }

                swizzle[i].value = 0;
                bMask |= 1 << i;
            }
            else
            {
                if (pSwizzle[i].z != 0)
                {
                    ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));

                    pEquation->xor2[i].channel = 2;
                    pEquation->xor2[i].valid   = 1;
                    pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
                }

                swizzle[i].x = pSwizzle[i].x;
                swizzle[i].y = pSwizzle[i].y;
                swizzle[i].z = swizzle[i].s = 0;

                ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);

                const UINT_32 xHi = swizzle[i].x & (~blkXMask);

                if (xHi != 0)
                {
                    ADDR_ASSERT(IsPow2(xHi));
                    ADDR_ASSERT(pEquation->xor1[i].value == 0);

                    pEquation->xor1[i].channel = 0;
                    pEquation->xor1[i].valid   = 1;
                    pEquation->xor1[i].index   = Log2(xHi) + elemLog2;

                    swizzle[i].x &= blkXMask;
                }

                const UINT_32 yHi = swizzle[i].y & (~blkYMask);

                if (yHi != 0)
                {
                    ADDR_ASSERT(IsPow2(yHi));

                    if (xHi == 0)
                    {
                        ADDR_ASSERT(pEquation->xor1[i].value == 0);
                        pEquation->xor1[i].channel = 1;
                        pEquation->xor1[i].valid   = 1;
                        pEquation->xor1[i].index   = Log2(yHi);
                    }
                    else
                    {
                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
                        pEquation->xor2[i].channel = 1;
                        pEquation->xor2[i].valid   = 1;
                        pEquation->xor2[i].index   = Log2(yHi);
                    }

                    swizzle[i].y &= blkYMask;
                }

                if (swizzle[i].value == 0)
                {
                    bMask |= 1 << i;
                }
            }
        }

        const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
        const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;

        ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);

        while (bMask != blockMask)
        {
            for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
            {
                if ((bMask & (1 << i)) == 0)
                {
                    if (IsPow2(swizzle[i].value))
                    {
                        if (swizzle[i].x != 0)
                        {
                            ADDR_ASSERT((xMask & swizzle[i].x) == 0);
                            xMask |= swizzle[i].x;

                            const UINT_32 xLog2 = Log2(swizzle[i].x);

                            ADDR_ASSERT(xLog2 < blkXLog2);

                            pEquation->addr[i].channel = 0;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = xLog2 + elemLog2;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzle[i].y != 0);
                            ADDR_ASSERT((yMask & swizzle[i].y) == 0);
                            yMask |= swizzle[i].y;

                            pEquation->addr[i].channel = 1;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = Log2(swizzle[i].y);

                            ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                        }

                        swizzle[i].value = 0;
                        bMask |= 1 << i;
                    }
                    else
                    {
                        const UINT_32 x = swizzle[i].x & xMask;
                        const UINT_32 y = swizzle[i].y & yMask;

                        if (x != 0)
                        {
                            ADDR_ASSERT(IsPow2(x));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 0;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(x) + elemLog2;
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 0;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(x) + elemLog2;
                            }
                        }

                        if (y != 0)
                        {
                            ADDR_ASSERT(IsPow2(y));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 1;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(y);
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 1;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(y);
                            }
                        }

                        swizzle[i].x &= ~x;
                        swizzle[i].y &= ~y;
                    }
                }
            }
        }

        ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
    }
    else
    {
        const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
        const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
        const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
        const UINT_32 blkXMask = (1 << blkXLog2) - 1;
        const UINT_32 blkYMask = (1 << blkYLog2) - 1;
        const UINT_32 blkZMask = (1 << blkZLog2) - 1;

        ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
        UINT_32          xMask = 0;
        UINT_32          yMask = 0;
        UINT_32          zMask = 0;
        UINT_32          bMask = (1 << elemLog2) - 1;

        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
        {
            if (IsPow2(pSwizzle[i].value))
            {
                if (pSwizzle[i].x != 0)
                {
                    ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
                    xMask |= pSwizzle[i].x;

                    const UINT_32 xLog2 = Log2(pSwizzle[i].x);

                    ADDR_ASSERT(xLog2 < blkXLog2);

                    pEquation->addr[i].channel = 0;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = xLog2 + elemLog2;
                }
                else if (pSwizzle[i].y != 0)
                {
                    ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
                    yMask |= pSwizzle[i].y;

                    pEquation->addr[i].channel = 1;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = Log2(pSwizzle[i].y);

                    ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                }
                else
                {
                    ADDR_ASSERT(pSwizzle[i].z != 0);
                    ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
                    zMask |= pSwizzle[i].z;

                    pEquation->addr[i].channel = 2;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = Log2(pSwizzle[i].z);

                    ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
                }

                swizzle[i].value = 0;
                bMask |= 1 << i;
            }
            else
            {
                swizzle[i].x = pSwizzle[i].x;
                swizzle[i].y = pSwizzle[i].y;
                swizzle[i].z = pSwizzle[i].z;
                swizzle[i].s = 0;

                ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);

                const UINT_32 xHi = swizzle[i].x & (~blkXMask);
                const UINT_32 yHi = swizzle[i].y & (~blkYMask);
                const UINT_32 zHi = swizzle[i].z & (~blkZMask);

                ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));

                if (xHi != 0)
                {
                    ADDR_ASSERT(IsPow2(xHi));
                    ADDR_ASSERT(pEquation->xor1[i].value == 0);

                    pEquation->xor1[i].channel = 0;
                    pEquation->xor1[i].valid   = 1;
                    pEquation->xor1[i].index   = Log2(xHi) + elemLog2;

                    swizzle[i].x &= blkXMask;
                }

                if (yHi != 0)
                {
                    ADDR_ASSERT(IsPow2(yHi));

                    if (pEquation->xor1[i].value == 0)
                    {
                        pEquation->xor1[i].channel = 1;
                        pEquation->xor1[i].valid   = 1;
                        pEquation->xor1[i].index   = Log2(yHi);
                    }
                    else
                    {
                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
                        pEquation->xor2[i].channel = 1;
                        pEquation->xor2[i].valid   = 1;
                        pEquation->xor2[i].index   = Log2(yHi);
                    }

                    swizzle[i].y &= blkYMask;
                }

                if (zHi != 0)
                {
                    ADDR_ASSERT(IsPow2(zHi));

                    if (pEquation->xor1[i].value == 0)
                    {
                        pEquation->xor1[i].channel = 2;
                        pEquation->xor1[i].valid   = 1;
                        pEquation->xor1[i].index   = Log2(zHi);
                    }
                    else
                    {
                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
                        pEquation->xor2[i].channel = 2;
                        pEquation->xor2[i].valid   = 1;
                        pEquation->xor2[i].index   = Log2(zHi);
                    }

                    swizzle[i].z &= blkZMask;
                }

                if (swizzle[i].value == 0)
                {
                    bMask |= 1 << i;
                }
            }
        }

        const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
        const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;

        ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);

        while (bMask != blockMask)
        {
            for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
            {
                if ((bMask & (1 << i)) == 0)
                {
                    if (IsPow2(swizzle[i].value))
                    {
                        if (swizzle[i].x != 0)
                        {
                            ADDR_ASSERT((xMask & swizzle[i].x) == 0);
                            xMask |= swizzle[i].x;

                            const UINT_32 xLog2 = Log2(swizzle[i].x);

                            ADDR_ASSERT(xLog2 < blkXLog2);

                            pEquation->addr[i].channel = 0;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = xLog2 + elemLog2;
                        }
                        else if (swizzle[i].y != 0)
                        {
                            ADDR_ASSERT((yMask & swizzle[i].y) == 0);
                            yMask |= swizzle[i].y;

                            pEquation->addr[i].channel = 1;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = Log2(swizzle[i].y);

                            ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                        }
                        else
                        {
                            ADDR_ASSERT(swizzle[i].z != 0);
                            ADDR_ASSERT((zMask & swizzle[i].z) == 0);
                            zMask |= swizzle[i].z;

                            pEquation->addr[i].channel = 2;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = Log2(swizzle[i].z);

                            ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
                        }

                        swizzle[i].value = 0;
                        bMask |= 1 << i;
                    }
                    else
                    {
                        const UINT_32 x = swizzle[i].x & xMask;
                        const UINT_32 y = swizzle[i].y & yMask;
                        const UINT_32 z = swizzle[i].z & zMask;

                        if (x != 0)
                        {
                            ADDR_ASSERT(IsPow2(x));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 0;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(x) + elemLog2;
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 0;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(x) + elemLog2;
                            }
                        }

                        if (y != 0)
                        {
                            ADDR_ASSERT(IsPow2(y));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 1;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(y);
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 1;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(y);
                            }
                        }

                        if (z != 0)
                        {
                            ADDR_ASSERT(IsPow2(z));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 2;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(z);
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 2;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(z);
                            }
                        }

                        swizzle[i].x &= ~x;
                        swizzle[i].y &= ~y;
                        swizzle[i].z &= ~z;
                    }
                }
            }
        }

        ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
    }
}

/**
************************************************************************************************************************
*   Gfx10Lib::InitEquationTable
*
*   @brief
*       Initialize Equation table.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx10Lib::InitEquationTable()
{
    memset(m_equationTable, 0, sizeof(m_equationTable));

    // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
    // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
    // computing 2D resources.
    for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
    {
        // Add offset. Start iterating from ADDR_RSRC_TEX_2D
        const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);

        // Iterate through the maximum number of swizzlemodes a type can hold
        for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
        {
            const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);

            // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
            for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
            {
                UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
                // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
                // overwriting the choice.
                const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);

                if (pPatInfo != NULL)
                {
                    ADDR_ASSERT(IsValidSwMode(swMode));
                    if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
                    {
                        ADDR_EQUATION equation = {};

                        // Passing in pPatInfo to get the addr equation
                        ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);

                        equationIndex = m_numEquations;
                        ADDR_ASSERT(equationIndex < EquationTableSize);
                        // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
                        m_equationTable[equationIndex] = equation;
                        // Increment m_numEquations
                        m_numEquations++;
                    }
                    else // There is no equationIndex
                    {
                        // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
                        ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
                        ADDR_ASSERT(rsrcTypeIdx == 1);
                        ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
                        ADDR_ASSERT(m_settings.supportRbPlus == 1);
                    }
                }
                // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
                // iteration in this nested for-loop
                m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
            }
        }
    }
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlGetEquationIndex
*
*   @brief
*       Interface function stub of GetEquationIndex
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::HwlGetEquationIndex(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;

    if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
        (pIn->resourceType == ADDR_RSRC_TEX_3D))
    {
        const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
        const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);

        equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
    }

    if (pOut->pMipInfo != NULL)
    {
        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
        {
            pOut->pMipInfo[i].equationIndex = equationIdx;
        }
    }

    return equationIdx;
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetValidDisplaySwizzleModes
*
*   @brief
*       Get valid swizzle modes mask for displayable surface
*
*   @return
*       Valid swizzle modes mask for displayable surface
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
    UINT_32 bpp
    ) const
{
    UINT_32 swModeMask = 0;

    if (bpp <= 64)
    {
        if (m_settings.isDcn20)
        {
            swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
        }
        else
        {
            swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
        }
    }

    return swModeMask;
}

/**
************************************************************************************************************************
*   Gfx10Lib::IsValidDisplaySwizzleMode
*
*   @brief
*       Check if a swizzle mode is supported by display engine
*
*   @return
*       TRUE is swizzle mode is supported by display engine
************************************************************************************************************************
*/
BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
    ) const
{
    ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);

    return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetMaxNumMipsInTail
*
*   @brief
*       Return max number of mips in tails
*
*   @return
*       Max number of mips in tails
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
    UINT_32 blockSizeLog2,     ///< block size log2
    BOOL_32 isThin             ///< is thin or thick
    ) const
{
    UINT_32 effectiveLog2 = blockSizeLog2;

    if (isThin == FALSE)
    {
        effectiveLog2 -= (blockSizeLog2 - 8) / 3;
    }

    return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputePipeBankXor
*
*   @brief
*       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
    ) const
{
    if (IsNonPrtXor(pIn->swizzleMode))
    {
        const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));

        // No pipe xor...
        const UINT_32 pipeXor = 0;
        UINT_32       bankXor = 0;

        const UINT_32         XorPatternLen = 8;
        static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
        static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
        static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
        static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
        static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};

        switch (bankBits)
        {
            case 1:
            case 2:
            case 3:
            case 4:
                bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
                break;
            default:
                // valid bank bits should be 0~4
                ADDR_ASSERT_ALWAYS();
            case 0:
                break;
        }

        pOut->pipeBankXor = bankXor | pipeXor;
    }
    else
    {
        pOut->pipeBankXor = 0;
    }

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeSlicePipeBankXor
*
*   @brief
*       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
    ) const
{
    if (IsNonPrtXor(pIn->swizzleMode))
    {
        const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
        const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
        const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);

        pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;

        if (pIn->bpe != 0)
        {
            const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
                                                                    pIn->resourceType,
                                                                    Log2(pIn->bpe >> 3),
                                                                    1);

            if (pPatInfo != NULL)
            {
                ADDR_BIT_SETTING fullSwizzlePattern[20];
                GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);

                const UINT_32 pipeBankXorOffset =
                    ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
                                                    blockBits,
                                                    0,
                                                    0,
                                                    pIn->slice,
                                                    0);

                const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;

                // Should have no bit set under pipe interleave
                ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);

                // This assertion firing means old approach doesn't calculate a correct sliceXor value...
                ADDR_ASSERT(pipeBankXor == pipeXor);

                pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
            }
        }
    }
    else
    {
        pOut->pipeBankXor = 0;
    }

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Compute sub resource offset to support swizzle pattern
*
*   @return
*       Offset
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));

    pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeNonBlockCompressedView
*
*   @brief
*       Compute non-block-compressed view for a given mipmap level/slice.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
    const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
    {
        // Only thin swizzle mode can have a NonBC view...
        returnCode = ADDR_INVALIDPARAMS;
    }
    else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
             ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
    {
        // Only support BC1~BC7, ASTC, or ETC2 for now...
        returnCode = ADDR_NOTSUPPORTED;
    }
    else
    {
        UINT_32 bcWidth, bcHeight;
        UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);

        ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
        infoIn.flags        = pIn->flags;
        infoIn.swizzleMode  = pIn->swizzleMode;
        infoIn.resourceType = pIn->resourceType;
        infoIn.bpp          = bpp;
        infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
        infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
        infoIn.numSlices    = pIn->numSlices;
        infoIn.numMipLevels = pIn->numMipLevels;
        infoIn.numSamples   = 1;
        infoIn.numFrags     = 1;

        ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
        ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);

        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
        infoOut.pMipInfo = mipInfo;

        const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;

        if (tiled)
        {
            returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
        }
        else
        {
            returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
        }

        if (returnCode == ADDR_OK)
        {
            ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
            subOffIn.swizzleMode      = infoIn.swizzleMode;
            subOffIn.resourceType     = infoIn.resourceType;
            subOffIn.slice            = pIn->slice;
            subOffIn.sliceSize        = infoOut.sliceSize;
            subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
            subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;

            ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};

            // For any mipmap level, move nonBc view base address by offset
            HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
            pOut->offset = subOffOut.offset;

            ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
            slicePbXorIn.bpe             = infoIn.bpp;
            slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
            slicePbXorIn.resourceType    = infoIn.resourceType;
            slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
            slicePbXorIn.slice           = pIn->slice;

            ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};

            // For any mipmap level, nonBc view should use computed pbXor
            HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
            pOut->pipeBankXor = slicePbXorOut.pipeBankXor;

            const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
            const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
            const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);

            if (inTail)
            {
                // For mipmap level that is in mip tail block, hack a lot of things...
                // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
                // are fit in tail block:

                // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
                pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;

                // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
                pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);

                // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
                pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);

                // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
                pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
            }
            // This check should cover at least mipId == 0
            else if (requestMipWidth << pIn->mipId == infoIn.width)
            {
                // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
                // - only one mipmap level and mipId = 0
                pOut->mipId        = 0;
                pOut->numMipLevels = 1;

                // (mip0) width = requestMipWidth
                pOut->unalignedWidth = requestMipWidth;

                // (mip0) height = requestMipHeight
                pOut->unalignedHeight = requestMipHeight;
            }
            else
            {
                // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
                // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
                // because single mip view may have different pitch value than original (multiple) mip view...
                // A simple case would be:
                // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
                // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
                //   mip0 width = 0x101/mip1 width = 0x80
                // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
                // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.

                // - 2 levels and mipId = 1
                pOut->mipId        = 1;
                pOut->numMipLevels = 2;

                const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
                const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);

                const BOOL_32 needToAvoidInTail =
                    tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
                    TRUE : FALSE;

                const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
                const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);

                const BOOL_32 needExtraWidth =
                    ((upperMipWidth < requestMipWidth * 2) ||
                     ((upperMipWidth == requestMipWidth * 2) &&
                      ((needToAvoidInTail == TRUE) ||
                       (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;

                const BOOL_32 needExtraHeight =
                    ((upperMipHeight < requestMipHeight * 2) ||
                     ((upperMipHeight == requestMipHeight * 2) &&
                      ((needToAvoidInTail == TRUE) ||
                       (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;

                // (mip0) width = requestLastMipLevelWidth
                pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);

                // (mip0) height = requestLastMipLevelHeight
                pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
            }

            // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
            ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
            // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
            ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ValidateNonSwModeParams
*
*   @brief
*       Validate compute surface info params except swizzle mode
*
*   @return
*       TRUE if parameters are valid, FALSE otherwise
************************************************************************************************************************
*/
BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    BOOL_32 valid = TRUE;

    if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
    const AddrResourceType    rsrcType = pIn->resourceType;
    const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
    const BOOL_32             msaa     = (pIn->numFrags > 1);
    const BOOL_32             display  = flags.display;
    const BOOL_32             tex3d    = IsTex3d(rsrcType);
    const BOOL_32             tex2d    = IsTex2d(rsrcType);
    const BOOL_32             tex1d    = IsTex1d(rsrcType);
    const BOOL_32             stereo   = flags.qbStereo;

    // Resource type check
    if (tex1d)
    {
        if (msaa || display || stereo)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex2d)
    {
        if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex3d)
    {
        if (msaa || display || stereo)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ValidateSwModeParams
*
*   @brief
*       Validate compute surface info related to swizzle mode
*
*   @return
*       TRUE if parameters are valid, FALSE otherwise
************************************************************************************************************************
*/
BOOL_32 Gfx10Lib::ValidateSwModeParams(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    BOOL_32 valid = TRUE;

    if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }
    else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
    {
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
    const AddrResourceType    rsrcType    = pIn->resourceType;
    const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
    const BOOL_32             msaa        = (pIn->numFrags > 1);
    const BOOL_32             zbuffer     = flags.depth || flags.stencil;
    const BOOL_32             color       = flags.color;
    const BOOL_32             display     = flags.display;
    const BOOL_32             tex3d       = IsTex3d(rsrcType);
    const BOOL_32             tex2d       = IsTex2d(rsrcType);
    const BOOL_32             tex1d       = IsTex1d(rsrcType);
    const BOOL_32             thin3d      = flags.view3dAs2dArray;
    const BOOL_32             linear      = IsLinear(swizzle);
    const BOOL_32             blk256B     = IsBlock256b(swizzle);
    const BOOL_32             blkVar      = IsBlockVariable(swizzle);
    const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
    const BOOL_32             prt         = flags.prt;
    const BOOL_32             fmask       = flags.fmask;

    // Misc check
    if ((pIn->numFrags > 1) &&
        (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
    {
        // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if ((pIn->bpp == 96) && (linear == FALSE))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    const UINT_32 swizzleMask = 1 << swizzle;

    // Resource type check
    if (tex1d)
    {
        if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex2d)
    {
        if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
        {
            {
                ADDR_ASSERT_ALWAYS();
                valid = FALSE;
            }
        }
        else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
                 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex3d)
    {
        if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
            (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
            (thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0)))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    // Swizzle type check
    if (linear)
    {
        if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsZOrderSwizzle(swizzle))
    {
        if ((pIn->bpp > 64)                         ||
            (msaa && (color || (pIn->bpp > 32)))    ||
            ElemLib::IsBlockCompressed(pIn->format) ||
            ElemLib::IsMacroPixelPacked(pIn->format))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsStandardSwizzle(rsrcType, swizzle))
    {
        if (zbuffer || msaa)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsDisplaySwizzle(rsrcType, swizzle))
    {
        if (zbuffer || msaa)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsRtOptSwizzle(swizzle))
    {
        if (zbuffer)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else
    {
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    // Block type check
    if (blk256B)
    {
        if (zbuffer || tex3d || msaa)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (blkVar)
    {
        if (m_blockVarSizeLog2 == 0)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
*
*   @brief
*       Compute surface info sanity check
*
*   @return
*       Offset
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
    ) const
{
    return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlGetPreferredSurfaceSetting
*
*   @brief
*       Internal function to get suggested surface information for client to use
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->flags.fmask)
    {
        const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
        const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));

        if (forbid64KbBlockType && forbidVarBlockType)
        {
            // Invalid combination...
            ADDR_ASSERT_ALWAYS();
            returnCode = ADDR_INVALIDPARAMS;
        }
        else
        {
            pOut->resourceType                   = ADDR_RSRC_TEX_2D;
            pOut->validBlockSet.value            = 0;
            pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
            pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
            pOut->validSwModeSet.value           = 0;
            pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
            pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
            pOut->canXor                         = TRUE;
            pOut->validSwTypeSet.value           = AddrSwSetZ;
            pOut->clientPreferredSwSet           = pOut->validSwTypeSet;

            BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);

            if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
            {
                const UINT_8  maxFmaskSwizzleModeType = 2;
                const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
                const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
                const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
                const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
                const UINT_32 width                   = Max(pIn->width, 1u);
                const UINT_32 height                  = Max(pIn->height, 1u);
                const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);

                AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
                Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
                Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
                UINT_64         padSize[maxFmaskSwizzleModeType] = {};

                for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
                {
                    ComputeBlockDimensionForSurf(&blkDim[i].w,
                                                 &blkDim[i].h,
                                                 &blkDim[i].d,
                                                 fmaskBpp,
                                                 1,
                                                 pOut->resourceType,
                                                 swMode[i]);

                    padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
                    padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
                }

                if (Addr2BlockTypeWithinMemoryBudget(padSize[0],
                                                padSize[1],
                                                ratioLow,
                                                ratioHi,
                                                pIn->memoryBudget,
                                                GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
                {
                    use64KbBlockType = FALSE;
                }
            }
            else if (forbidVarBlockType)
            {
                use64KbBlockType = TRUE;
            }

            if (use64KbBlockType)
            {
                pOut->swizzleMode = ADDR_SW_64KB_Z_X;
            }
            else
            {
                pOut->swizzleMode = ADDR_SW_VAR_Z_X;
            }
        }
    }
    else
    {
        UINT_32 bpp    = pIn->bpp;
        UINT_32 width  = Max(pIn->width, 1u);
        UINT_32 height = Max(pIn->height, 1u);

        // Set format to INVALID will skip this conversion
        if (pIn->format != ADDR_FMT_INVALID)
        {
            ElemMode elemMode = ADDR_UNCOMPRESSED;
            UINT_32 expandX, expandY;

            // Get compression/expansion factors and element mode which indicates compression/expansion
            bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
                                                &elemMode,
                                                &expandX,
                                                &expandY);

            UINT_32 basePitch = 0;
            GetElemLib()->AdjustSurfaceInfo(elemMode,
                                            expandX,
                                            expandY,
                                            &bpp,
                                            &basePitch,
                                            &width,
                                            &height);
        }

        const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
        const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
        const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
        const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
        const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);

        // Pre sanity check on non swizzle mode parameters
        ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
        localIn.flags        = pIn->flags;
        localIn.resourceType = pIn->resourceType;
        localIn.format       = pIn->format;
        localIn.bpp          = bpp;
        localIn.width        = width;
        localIn.height       = height;
        localIn.numSlices    = numSlices;
        localIn.numMipLevels = numMipLevels;
        localIn.numSamples   = numSamples;
        localIn.numFrags     = numFrags;

        if (ValidateNonSwModeParams(&localIn))
        {
            // Forbid swizzle mode(s) by client setting
            ADDR2_SWMODE_SET allowedSwModeSet = {};
            allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
            allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThin4KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThick4KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThin64KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThick64KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);

            if (pIn->preferredSwSet.value != 0)
            {
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
            }

            if (pIn->noXor)
            {
                allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
            }

            if (pIn->maxAlign > 0)
            {
                if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
                {
                    allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
                }

                if (pIn->maxAlign < Size64K)
                {
                    allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
                }

                if (pIn->maxAlign < Size4K)
                {
                    allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
                }

                if (pIn->maxAlign < Size256)
                {
                    allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
                }
            }

            // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
            switch (pIn->resourceType)
            {
                case ADDR_RSRC_TEX_1D:
                    allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
                    break;

                case ADDR_RSRC_TEX_2D:
                    allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
                    break;

                case ADDR_RSRC_TEX_3D:
                    allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;

                    if (pIn->flags.view3dAs2dArray)
                    {
                        // SW_LINEAR can be used for 3D thin images, including BCn image format.
                        allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
                    }
                    break;

                default:
                    ADDR_ASSERT_ALWAYS();
                    allowedSwModeSet.value = 0;
                    break;
            }

            if (ElemLib::IsBlockCompressed(pIn->format)  ||
                ElemLib::IsMacroPixelPacked(pIn->format) ||
                (bpp > 64)                               ||
                (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
            {
                allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
            }

            if (pIn->format == ADDR_FMT_32_32_32)
            {
                allowedSwModeSet.value &= Gfx10LinearSwModeMask;
            }

            if (msaa)
            {
                allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
            }

            if (pIn->flags.depth || pIn->flags.stencil)
            {
                allowedSwModeSet.value &= Gfx10ZSwModeMask;
            }

            if (pIn->flags.display)
            {
                allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
            }

            if (allowedSwModeSet.value != 0)
            {
#if DEBUG
                // Post sanity check, at least AddrLib should accept the output generated by its own
                UINT_32 validateSwModeSet = allowedSwModeSet.value;

                for (UINT_32 i = 0; validateSwModeSet != 0; i++)
                {
                    if (validateSwModeSet & 1)
                    {
                        localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
                        ADDR_ASSERT(ValidateSwModeParams(&localIn));
                    }

                    validateSwModeSet >>= 1;
                }
#endif

                pOut->resourceType   = pIn->resourceType;
                pOut->validSwModeSet = allowedSwModeSet;
                pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
                pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
                pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);

                pOut->clientPreferredSwSet = pIn->preferredSwSet;

                if (pOut->clientPreferredSwSet.value == 0)
                {
                    pOut->clientPreferredSwSet.value = AddrSwSetAll;
                }

                // Apply optional restrictions
                if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
                {
                    if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
                    {
                        // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
                        // the GL2 in VAR mode, so it should be avoided.
                        allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
                    }
                    else
                    {
                        // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
                        // But we have to suffer from low performance because there is no other choice...
                        ADDR_ASSERT_ALWAYS();
                    }
                }

                if (pIn->flags.needEquation)
                {
                    UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
                                                                        ADDR_MAX_LEGACY_EQUATION_COMP;
                    FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
                }

                if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
                {
                    pOut->swizzleMode = ADDR_SW_LINEAR;
                }
                else
                {
                    const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);

                    if ((height > 1) && (computeMinSize == FALSE))
                    {
                        // Always ignore linear swizzle mode if:
                        // 1. This is a (2D/3D) resource with height > 1
                        // 2. Client doesn't require computing minimize size
                        allowedSwModeSet.swLinear = 0;
                    }

                    // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
                    ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);

                    // Determine block size if there are 2 or more block type candidates
                    if (IsPow2(allowedBlockSet.value) == FALSE)
                    {
                        // Tracks a valid SwizzleMode for each valid block type
                        AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};

                        swMode[AddrBlockLinear] = ADDR_SW_LINEAR;

                        if (m_blockVarSizeLog2 != 0)
                        {
                            swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
                        }

                        if (pOut->resourceType == ADDR_RSRC_TEX_3D)
                        {
                            swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
                            swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
                            swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
                        }
                        else
                        {
                            swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
                            swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
                            swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
                        }

                        // Tracks the size of each valid swizzle mode's surface in bytes
                        UINT_64 padSize[AddrBlockMaxTiledType] = {};

                        const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
                        const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
                        const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
                        UINT_32       minSizeBlk         = AddrBlockMicro; // Tracks the most optimal block to use
                        UINT_64       minSize            = 0;              // Tracks the minimum acceptable block type

                        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};

                        // Iterate through all block types
                        for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
                        {
                            if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<rocr::AddrBlockType>(i)))
                            {
                                localIn.swizzleMode = swMode[i];

                                if (localIn.swizzleMode == ADDR_SW_LINEAR)
                                {
                                    returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
                                }
                                else
                                {
                                    returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
                                }

                                if (returnCode == ADDR_OK)
                                {
                                    padSize[i] = localOut.surfSize;

                                    if (minSize == 0)
                                    {
                                        minSize    = padSize[i];
                                        minSizeBlk = i;
                                    }
                                    else
                                    {
                                        // Checks if the block type is within the memory budget but favors larger blocks
                                        if (Addr2BlockTypeWithinMemoryBudget(
                                                minSize,
                                                padSize[i],
                                                ratioLow,
                                                ratioHi,
                                                0.0,
                                                GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
                                        {
                                            minSize    = padSize[i];
                                            minSizeBlk = i;
                                        }
                                    }
                                }
                                else
                                {
                                    ADDR_ASSERT_ALWAYS();
                                    break;
                                }
                            }
                        }

                        if (pIn->memoryBudget > 1.0)
                        {
                            // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
                            // smaller-block type again in coming loop
                            switch (minSizeBlk)
                            {
                                case AddrBlockThick64KB:
                                    allowedBlockSet.macroThin64KB = 0;
                                case AddrBlockThinVar:
                                case AddrBlockThin64KB:
                                    allowedBlockSet.macroThick4KB = 0;
                                case AddrBlockThick4KB:
                                    allowedBlockSet.macroThin4KB = 0;
                                case AddrBlockThin4KB:
                                    allowedBlockSet.micro  = 0;
                                case AddrBlockMicro:
                                    allowedBlockSet.linear = 0;
                                case AddrBlockLinear:
                                    break;

                                default:
                                    ADDR_ASSERT_ALWAYS();
                                    break;
                            }

                            for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
                            {
                                if ((i != minSizeBlk) &&
                                    Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<rocr::AddrBlockType>(i)))
                                {
                                    if (Addr2BlockTypeWithinMemoryBudget(
                                            minSize,
                                            padSize[i],
                                            0,
                                            0,
                                            pIn->memoryBudget,
                                            GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
                                    {
                                        // Clear the block type if the memory waste is unacceptable
                                        allowedBlockSet.value &= ~(1u << (i - 1));
                                    }
                                }
                            }

                            // Remove VAR block type if bigger block type is allowed
                            if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
                            {
                                if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
                                {
                                    allowedBlockSet.var = 0;
                                }
                            }

                            // Remove linear block type if 2 or more block types are allowed
                            if (IsPow2(allowedBlockSet.value) == FALSE)
                            {
                                allowedBlockSet.linear = 0;
                            }

                            // Select the biggest allowed block type
                            minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;

                            if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
                            {
                                minSizeBlk = AddrBlockLinear;
                            }
                        }

                        switch (minSizeBlk)
                        {
                            case AddrBlockLinear:
                                allowedSwModeSet.value &= Gfx10LinearSwModeMask;
                                break;

                            case AddrBlockMicro:
                                ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
                                break;

                            case AddrBlockThin4KB:
                                ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
                                break;

                            case AddrBlockThick4KB:
                                ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
                                break;

                            case AddrBlockThin64KB:
                                allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
                                                          Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
                                break;

                            case AddrBlockThick64KB:
                                ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
                                break;

                            case AddrBlockThinVar:
                                allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
                                break;

                            default:
                                ADDR_ASSERT_ALWAYS();
                                allowedSwModeSet.value = 0;
                                break;
                        }
                    }

                    // Block type should be determined.
                    ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));

                    ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);

                    // Determine swizzle type if there are 2 or more swizzle type candidates
                    if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
                    {
                        if (ElemLib::IsBlockCompressed(pIn->format))
                        {
                            if (allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
                            }
                            else if (allowedSwSet.sw_S)
                            {
                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT(allowedSwSet.sw_R);
                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
                            }
                        }
                        else if (ElemLib::IsMacroPixelPacked(pIn->format))
                        {
                            if (allowedSwSet.sw_S)
                            {
                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
                            }
                            else if (allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT(allowedSwSet.sw_R);
                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
                            }
                        }
                        else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
                        {
                            if (pIn->flags.color &&
                                GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
                                allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
                            }
                            else if (allowedSwSet.sw_S)
                            {
                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
                            }
                            else if (allowedSwSet.sw_R)
                            {
                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT(allowedSwSet.sw_Z);
                                allowedSwModeSet.value &= Gfx10ZSwModeMask;
                            }
                        }
                        else
                        {
                            if (allowedSwSet.sw_R)
                            {
                                allowedSwModeSet.value &= Gfx10RenderSwModeMask;
                            }
                            else if (allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
                            }
                            else if (allowedSwSet.sw_S)
                            {
                                allowedSwModeSet.value &= Gfx10StandardSwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT(allowedSwSet.sw_Z);
                                allowedSwModeSet.value &= Gfx10ZSwModeMask;
                            }
                        }

                        // Swizzle type should be determined.
                        ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
                    }

                    // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
                    // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
                    // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
                    pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
                }
            }
            else
            {
                // Invalid combination...
                ADDR_ASSERT_ALWAYS();
                returnCode = ADDR_INVALIDPARAMS;
            }
        }
        else
        {
            // Invalid combination...
            ADDR_ASSERT_ALWAYS();
            returnCode = ADDR_INVALIDPARAMS;
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ComputeStereoInfo
*
*   @brief
*       Compute height alignment and right eye pipeBankXor for stereo surface
*
*   @return
*       Error code
*
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
    UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
    UINT_32*                                pRightXor   ///< Right eye xor
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    *pRightXor = 0;

    if (IsNonPrtXor(pIn->swizzleMode))
    {
        const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
        const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
        const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
        const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];

        if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
        {
            UINT_32 yMax     = 0;
            UINT_32 yPosMask = 0;

            // First get "max y bit"
            for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
            {
                ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);

                if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
                    (m_equationTable[eqIndex].addr[i].index > yMax))
                {
                    yMax = m_equationTable[eqIndex].addr[i].index;
                }

                if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
                    (m_equationTable[eqIndex].xor1[i].channel == 1) &&
                    (m_equationTable[eqIndex].xor1[i].index > yMax))
                {
                    yMax = m_equationTable[eqIndex].xor1[i].index;
                }

                if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
                    (m_equationTable[eqIndex].xor2[i].channel == 1) &&
                    (m_equationTable[eqIndex].xor2[i].index > yMax))
                {
                    yMax = m_equationTable[eqIndex].xor2[i].index;
                }
            }

            // Then loop again for populating a position mask of "max Y bit"
            for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
            {
                if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
                    (m_equationTable[eqIndex].addr[i].index == yMax))
                {
                    yPosMask |= 1u << i;
                }
                else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
                         (m_equationTable[eqIndex].xor1[i].channel == 1) &&
                         (m_equationTable[eqIndex].xor1[i].index == yMax))
                {
                    yPosMask |= 1u << i;
                }
                else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
                         (m_equationTable[eqIndex].xor2[i].channel == 1) &&
                         (m_equationTable[eqIndex].xor2[i].index == yMax))
                {
                    yPosMask |= 1u << i;
                }
            }

            const UINT_32 additionalAlign = 1 << yMax;

            if (additionalAlign >= *pAlignY)
            {
                *pAlignY = additionalAlign;

                const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);

                if ((alignedHeight >> yMax) & 1)
                {
                    *pRightXor = yPosMask >> m_pipeInterleaveLog2;
                }
            }
        }
        else
        {
            ret = ADDR_INVALIDPARAMS;
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeSurfaceInfoTiled
*
*   @brief
*       Internal function to calculate alignment for tiled surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE ret;

    // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
    pOut->mipChainPitch    = 0;
    pOut->mipChainHeight   = 0;
    pOut->mipChainSlice    = 0;
    pOut->epitchIsHeight   = FALSE;

    // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
    pOut->mipChainInTail   = FALSE;
    pOut->firstMipIdInTail = pIn->numMipLevels;

    if (IsBlock256b(pIn->swizzleMode))
    {
        ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
    }
    else
    {
        ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ComputeSurfaceInfoMicroTiled
*
*   @brief
*       Internal function to calculate alignment for micro tiled surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
                                                         &pOut->blockHeight,
                                                         &pOut->blockSlices,
                                                         pIn->bpp,
                                                         pIn->numFrags,
                                                         pIn->resourceType,
                                                         pIn->swizzleMode);

    if (ret == ADDR_OK)
    {
        const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);

        pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
        pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
        pOut->numSlices = pIn->numSlices;
        pOut->baseAlign = blockSize;

        if (pIn->numMipLevels > 1)
        {
            const UINT_32 mip0Width    = pIn->width;
            const UINT_32 mip0Height   = pIn->height;
            UINT_64       mipSliceSize = 0;

            for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
            {
                UINT_32 mipWidth, mipHeight;

                GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);

                const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
                const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[i].pitch            = mipActualWidth;
                    pOut->pMipInfo[i].height           = mipActualHeight;
                    pOut->pMipInfo[i].depth            = 1;
                    pOut->pMipInfo[i].offset           = mipSliceSize;
                    pOut->pMipInfo[i].mipTailOffset    = 0;
                    pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
                }

                mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
            }

            pOut->sliceSize = mipSliceSize;
            pOut->surfSize  = mipSliceSize * pOut->numSlices;
        }
        else
        {
            pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
            pOut->surfSize  = pOut->sliceSize * pOut->numSlices;

            if (pOut->pMipInfo != NULL)
            {
                pOut->pMipInfo[0].pitch            = pOut->pitch;
                pOut->pMipInfo[0].height           = pOut->height;
                pOut->pMipInfo[0].depth            = 1;
                pOut->pMipInfo[0].offset           = 0;
                pOut->pMipInfo[0].mipTailOffset    = 0;
                pOut->pMipInfo[0].macroBlockOffset = 0;
            }
        }

    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ComputeSurfaceInfoMacroTiled
*
*   @brief
*       Internal function to calculate alignment for macro tiled surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
                                                                &pOut->blockHeight,
                                                                &pOut->blockSlices,
                                                                pIn->bpp,
                                                                pIn->numFrags,
                                                                pIn->resourceType,
                                                                pIn->swizzleMode);

    if (returnCode == ADDR_OK)
    {
        UINT_32 heightAlign = pOut->blockHeight;

        if (pIn->flags.qbStereo)
        {
            UINT_32 rightXor = 0;

            returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);

            if (returnCode == ADDR_OK)
            {
                pOut->pStereoInfo->rightSwizzle = rightXor;
            }
        }

        if (returnCode == ADDR_OK)
        {
            const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
            const UINT_32 blockSize     = 1 << blockSizeLog2;

            pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
            pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
            pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
            pOut->baseAlign = blockSize;

            if (pIn->numMipLevels > 1)
            {
                const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
                                                                pIn->swizzleMode,
                                                                pOut->blockWidth,
                                                                pOut->blockHeight,
                                                                pOut->blockSlices);
                const UINT_32 mip0Width         = pIn->width;
                const UINT_32 mip0Height        = pIn->height;
                const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
                const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
                const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
                const UINT_32 index             = Log2(pIn->bpp >> 3);
                UINT_32       firstMipInTail    = pIn->numMipLevels;
                UINT_64       mipChainSliceSize = 0;
                UINT_64       mipSize[MaxMipLevels];
                UINT_64       mipSliceSize[MaxMipLevels];

                ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
                Dim3d fixedTailMaxDim = tailMaxDim;

                if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
                {
                    fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
                    fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
                }

                for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
                {
                    UINT_32 mipWidth, mipHeight, mipDepth;

                    GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);

                    if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
                    {
                        firstMipInTail     = i;
                        mipChainSliceSize += blockSize / pOut->blockSlices;
                        break;
                    }
                    else
                    {
                        const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
                        const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
                        const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
                        const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);

                        mipSize[i]         = sliceSize * depth;
                        mipSliceSize[i]    = sliceSize * pOut->blockSlices;
                        mipChainSliceSize += sliceSize;

                        if (pOut->pMipInfo != NULL)
                        {
                            pOut->pMipInfo[i].pitch  = pitch;
                            pOut->pMipInfo[i].height = height;
                            pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
                        }
                    }
                }

                pOut->sliceSize        = mipChainSliceSize;
                pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
                pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
                pOut->firstMipIdInTail = firstMipInTail;

                if (pOut->pMipInfo != NULL)
                {
                    UINT_64 offset         = 0;
                    UINT_64 macroBlkOffset = 0;
                    UINT_32 tailMaxDepth   = 0;

                    if (firstMipInTail != pIn->numMipLevels)
                    {
                        UINT_32 mipWidth, mipHeight;

                        GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
                                   &mipWidth, &mipHeight, &tailMaxDepth);

                        offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
                        macroBlkOffset = blockSize;
                    }

                    for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
                    {
                        pOut->pMipInfo[i].offset           = offset;
                        pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
                        pOut->pMipInfo[i].mipTailOffset    = 0;

                        offset         += mipSize[i];
                        macroBlkOffset += mipSliceSize[i];
                    }

                    UINT_32 pitch  = tailMaxDim.w;
                    UINT_32 height = tailMaxDim.h;
                    UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);

                    tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);

                    for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
                    {
                        const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
                        const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);

                        pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
                        pOut->pMipInfo[i].mipTailOffset    = mipOffset;
                        pOut->pMipInfo[i].macroBlockOffset = 0;

                        pOut->pMipInfo[i].pitch  = pitch;
                        pOut->pMipInfo[i].height = height;
                        pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;

                        UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
                                       ((mipOffset >> 10) & 2)  |
                                       ((mipOffset >> 11) & 4)  |
                                       ((mipOffset >> 12) & 8)  |
                                       ((mipOffset >> 13) & 16) |
                                       ((mipOffset >> 14) & 32);
                        UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
                                       ((mipOffset >> 9)  & 2)  |
                                       ((mipOffset >> 10) & 4)  |
                                       ((mipOffset >> 11) & 8)  |
                                       ((mipOffset >> 12) & 16) |
                                       ((mipOffset >> 13) & 32);

                        if (blockSizeLog2 & 1)
                        {
                            const UINT_32 temp = mipX;
                            mipX = mipY;
                            mipY = temp;

                            if (index & 1)
                            {
                                mipY = (mipY << 1) | (mipX & 1);
                                mipX = mipX >> 1;
                            }
                        }

                        if (isThin)
                        {
                            pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
                            pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
                            pOut->pMipInfo[i].mipTailCoordZ = 0;

                            pitch  = Max(pitch  >> 1, Block256_2d[index].w);
                            height = Max(height >> 1, Block256_2d[index].h);
                        }
                        else
                        {
                            pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
                            pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
                            pOut->pMipInfo[i].mipTailCoordZ = 0;

                            pitch  = Max(pitch  >> 1, Block256_3d[index].w);
                            height = Max(height >> 1, Block256_3d[index].h);
                        }
                    }
                }
            }
            else
            {
                pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
                pOut->surfSize  = pOut->sliceSize * pOut->numSlices;

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[0].pitch            = pOut->pitch;
                    pOut->pMipInfo[0].height           = pOut->height;
                    pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
                    pOut->pMipInfo[0].offset           = 0;
                    pOut->pMipInfo[0].mipTailOffset    = 0;
                    pOut->pMipInfo[0].macroBlockOffset = 0;
                    pOut->pMipInfo[0].mipTailCoordX    = 0;
                    pOut->pMipInfo[0].mipTailCoordY    = 0;
                    pOut->pMipInfo[0].mipTailCoordZ    = 0;
                }
            }
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
*
*   @brief
*       Internal function to calculate address from coord for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE ret;

    if (IsBlock256b(pIn->swizzleMode))
    {
        ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
    }
    else
    {
        ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ComputeOffsetFromEquation
*
*   @brief
*       Compute offset from equation
*
*   @return
*       Offset
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
    const ADDR_EQUATION* pEq,   ///< Equation
    UINT_32              x,     ///< x coord in bytes
    UINT_32              y,     ///< y coord in pixel
    UINT_32              z      ///< z coord in slice
    ) const
{
    UINT_32 offset = 0;

    for (UINT_32 i = 0; i < pEq->numBits; i++)
    {
        UINT_32 v = 0;

        for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
        {
            if (pEq->comps[c][i].valid)
            {
                if (pEq->comps[c][i].channel == 0)
                {
                    v ^= (x >> pEq->comps[c][i].index) & 1;
                }
                else if (pEq->comps[c][i].channel == 1)
                {
                    v ^= (y >> pEq->comps[c][i].index) & 1;
                }
                else
                {
                    ADDR_ASSERT(pEq->comps[c][i].channel == 2);
                    v ^= (z >> pEq->comps[c][i].index) & 1;
                }
            }
        }

        offset |= (v << i);
    }

    return offset;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ComputeOffsetFromSwizzlePattern
*
*   @brief
*       Compute offset from swizzle pattern
*
*   @return
*       Offset
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
    const UINT_64* pPattern,    ///< Swizzle pattern
    UINT_32        numBits,     ///< Number of bits in pattern
    UINT_32        x,           ///< x coord in pixel
    UINT_32        y,           ///< y coord in pixel
    UINT_32        z,           ///< z coord in slice
    UINT_32        s            ///< sample id
    ) const
{
    UINT_32                 offset          = 0;
    const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);

    for (UINT_32 i = 0; i < numBits; i++)
    {
        UINT_32 v = 0;

        if (pSwizzlePattern[i].x != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].x;
            UINT_32 xBits = x;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= xBits & 1;
                }

                xBits >>= 1;
                mask  >>= 1;
            }
        }

        if (pSwizzlePattern[i].y != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].y;
            UINT_32 yBits = y;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= yBits & 1;
                }

                yBits >>= 1;
                mask  >>= 1;
            }
        }

        if (pSwizzlePattern[i].z != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].z;
            UINT_32 zBits = z;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= zBits & 1;
                }

                zBits >>= 1;
                mask  >>= 1;
            }
        }

        if (pSwizzlePattern[i].s != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].s;
            UINT_32 sBits = s;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= sBits & 1;
                }

                sBits >>= 1;
                mask  >>= 1;
            }
        }

        offset |= (v << i);
    }

    return offset;
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetSwizzlePatternInfo
*
*   @brief
*       Get swizzle pattern
*
*   @return
*       Swizzle pattern information
************************************************************************************************************************
*/
const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
    AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
    AddrResourceType resourceType,      ///< Resource type
    UINT_32          elemLog2,          ///< Element size in bytes log2
    UINT_32          numFrag            ///< Number of fragment
    ) const
{
    // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
    // the right location
    const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
    const ADDR_SW_PATINFO* patInfo     = NULL;
    const UINT_32          swizzleMask = 1 << swizzleMode;

    if (IsBlockVariable(swizzleMode))
    {
        if (m_blockVarSizeLog2 != 0)
        {
            ADDR_ASSERT(m_settings.supportRbPlus);

            if (IsRtOptSwizzle(swizzleMode))
            {
                if (numFrag == 1)
                {
                    patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
                }
                else if (numFrag == 2)
                {
                    patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
                }
                else if (numFrag == 4)
                {
                    patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
                }
                else
                {
                    ADDR_ASSERT(numFrag == 8);
                    patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
                }
            }
            else if (IsZOrderSwizzle(swizzleMode))
            {
                if (numFrag == 1)
                {
                    patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
                }
                else if (numFrag == 2)
                {
                    patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
                }
                else if (numFrag == 4)
                {
                    patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
                }
                else
                {
                    ADDR_ASSERT(numFrag == 8);
                    patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
                }
            }
        }
    }
    else if (IsLinear(swizzleMode) == FALSE)
    {
        if (resourceType == ADDR_RSRC_TEX_3D)
        {
            ADDR_ASSERT(numFrag == 1);

            if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
            {
                if (IsRtOptSwizzle(swizzleMode))
                {
                    if (swizzleMode == ADDR_SW_4KB_R_X)
                    {
                        patInfo = NULL;
                    }
                    else
                    {
                        patInfo = m_settings.supportRbPlus ?
                                  GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
                    }
                }
                else if (IsZOrderSwizzle(swizzleMode))
                {
                    patInfo = m_settings.supportRbPlus ?
                              GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
                }
                else if (IsDisplaySwizzle(resourceType, swizzleMode))
                {
                    ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
                    patInfo = m_settings.supportRbPlus ?
                              GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
                }
                else
                {
                    ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));

                    if (IsBlock4kb(swizzleMode))
                    {
                        if (swizzleMode == ADDR_SW_4KB_S)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
                        }
                    }
                    else
                    {
                        if (swizzleMode == ADDR_SW_64KB_S)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_64KB_S_X)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
                        }
                    }
                }
            }
        }
        else
        {
            if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
            {
                if (IsBlock256b(swizzleMode))
                {
                    if (swizzleMode == ADDR_SW_256B_S)
                    {
                        patInfo = m_settings.supportRbPlus ?
                                  GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
                    }
                    else
                    {
                        ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
                        patInfo = m_settings.supportRbPlus ?
                                  GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
                    }
                }
                else if (IsBlock4kb(swizzleMode))
                {
                    if (IsStandardSwizzle(resourceType, swizzleMode))
                    {
                        if (swizzleMode == ADDR_SW_4KB_S)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
                        }
                    }
                    else
                    {
                        if (swizzleMode == ADDR_SW_4KB_D)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_4KB_R_X)
                        {
                            patInfo = NULL;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
                        }
                    }
                }
                else
                {
                    if (IsRtOptSwizzle(swizzleMode))
                    {
                        if (numFrag == 1)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
                        }
                        else if (numFrag == 2)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
                        }
                        else if (numFrag == 4)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT(numFrag == 8);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
                        }
                    }
                    else if (IsZOrderSwizzle(swizzleMode))
                    {
                        if (numFrag == 1)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
                        }
                        else if (numFrag == 2)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
                        }
                        else if (numFrag == 4)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT(numFrag == 8);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
                        }
                    }
                    else if (IsDisplaySwizzle(resourceType, swizzleMode))
                    {
                        if (swizzleMode == ADDR_SW_64KB_D)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_64KB_D_X)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
                        }
                    }
                    else
                    {
                        if (swizzleMode == ADDR_SW_64KB_S)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_64KB_S_X)
                        {
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
                            patInfo = m_settings.supportRbPlus ?
                                      GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
                        }
                    }
                }
            }
        }
    }

    return (patInfo != NULL) ? &patInfo[index] : NULL;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
*
*   @brief
*       Internal function to calculate address from coord for micro tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
    ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
    ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);

    localIn.swizzleMode  = pIn->swizzleMode;
    localIn.flags        = pIn->flags;
    localIn.resourceType = pIn->resourceType;
    localIn.bpp          = pIn->bpp;
    localIn.width        = Max(pIn->unalignedWidth,  1u);
    localIn.height       = Max(pIn->unalignedHeight, 1u);
    localIn.numSlices    = Max(pIn->numSlices,       1u);
    localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
    localIn.numSamples   = Max(pIn->numSamples,      1u);
    localIn.numFrags     = Max(pIn->numFrags,        1u);
    localOut.pMipInfo    = mipInfo;

    ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);

    if (ret == ADDR_OK)
    {
        const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
        const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
        const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
        const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];

        if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
        {
            const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
            const UINT_32 yb           = pIn->y / localOut.blockHeight;
            const UINT_32 xb           = pIn->x / localOut.blockWidth;
            const UINT_32 blockIndex   = yb * pb + xb;
            const UINT_32 blockSize    = 256;
            const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
                                                                   pIn->x << elemLog2,
                                                                   pIn->y,
                                                                   0);
            pOut->addr = localOut.sliceSize * pIn->slice +
                         mipInfo[pIn->mipId].macroBlockOffset +
                         (blockIndex * blockSize) +
                         blk256Offset;
        }
        else
        {
            ret = ADDR_INVALIDPARAMS;
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
*
*   @brief
*       Internal function to calculate address from coord for macro tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
    ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
    ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);

    localIn.swizzleMode  = pIn->swizzleMode;
    localIn.flags        = pIn->flags;
    localIn.resourceType = pIn->resourceType;
    localIn.bpp          = pIn->bpp;
    localIn.width        = Max(pIn->unalignedWidth,  1u);
    localIn.height       = Max(pIn->unalignedHeight, 1u);
    localIn.numSlices    = Max(pIn->numSlices,       1u);
    localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
    localIn.numSamples   = Max(pIn->numSamples,      1u);
    localIn.numFrags     = Max(pIn->numFrags,        1u);
    localOut.pMipInfo    = mipInfo;

    ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);

    if (ret == ADDR_OK)
    {
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
        const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
        const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
        const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
        const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
        const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
                                    (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;

        if (localIn.numFrags > 1)
        {
            const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
                                                                    pIn->resourceType,
                                                                    elemLog2,
                                                                    localIn.numFrags);

            if (pPatInfo != NULL)
            {
                const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
                const UINT_32 yb        = pIn->y / localOut.blockHeight;
                const UINT_32 xb        = pIn->x / localOut.blockWidth;
                const UINT_64 blkIdx    = yb * pb + xb;

                ADDR_BIT_SETTING fullSwizzlePattern[20];
                GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);

                const UINT_32 blkOffset =
                    ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
                                                    blkSizeLog2,
                                                    pIn->x,
                                                    pIn->y,
                                                    pIn->slice,
                                                    pIn->sample);

                pOut->addr = (localOut.sliceSize * pIn->slice) +
                             (blkIdx << blkSizeLog2) +
                             (blkOffset ^ pipeBankXor);
            }
            else
            {
                ret = ADDR_INVALIDPARAMS;
            }
        }
        else
        {
            const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
            const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
            const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];

            if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
            {
                const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
                const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
                const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
                const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
                const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
                const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
                const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
                const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
                const UINT_32 yb        = pIn->y / localOut.blockHeight;
                const UINT_32 xb        = pIn->x / localOut.blockWidth;
                const UINT_64 blkIdx    = yb * pb + xb;
                const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
                                                                    x << elemLog2,
                                                                    y,
                                                                    z);
                pOut->addr = sliceSize * sliceId +
                             mipInfo[pIn->mipId].macroBlockOffset +
                             (blkIdx << blkSizeLog2) +
                             (blkOffset ^ pipeBankXor);
            }
            else
            {
                ret = ADDR_INVALIDPARAMS;
            }
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeMaxBaseAlignments
*
*   @brief
*       Gets maximum alignments
*   @return
*       maximum alignments
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
{
    return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
*
*   @brief
*       Gets maximum alignments for metadata
*   @return
*       maximum alignments for metadata
************************************************************************************************************************
*/
UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
{
    Dim3d metaBlk;

    const AddrSwizzleMode ValidSwizzleModeForXmask[] =
    {
        ADDR_SW_64KB_Z_X,
        m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
    };

    UINT_32 maxBaseAlignHtile = 0;
    UINT_32 maxBaseAlignCmask = 0;

    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
    {
        for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
        {
            for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
            {
                // Max base alignment for Htile
                const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
                                                                ADDR_RSRC_TEX_2D,
                                                                ValidSwizzleModeForXmask[swIdx],
                                                                bppLog2,
                                                                numFragLog2,
                                                                TRUE,
                                                                &metaBlk);

                maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
            }
        }

        // Max base alignment for Cmask
        const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
                                                        ADDR_RSRC_TEX_2D,
                                                        ValidSwizzleModeForXmask[swIdx],
                                                        0,
                                                        0,
                                                        TRUE,
                                                        &metaBlk);

        maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
    }

    // Max base alignment for 2D Dcc
    const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
    {
        ADDR_SW_64KB_S_X,
        ADDR_SW_64KB_D_X,
        ADDR_SW_64KB_R_X,
        m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
    };

    UINT_32 maxBaseAlignDcc2D = 0;

    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
    {
        for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
        {
            for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
            {
                const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
                                                             ADDR_RSRC_TEX_2D,
                                                             ValidSwizzleModeForDcc2D[swIdx],
                                                             bppLog2,
                                                             numFragLog2,
                                                             TRUE,
                                                             &metaBlk);

                maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
            }
        }
    }

    // Max base alignment for 3D Dcc
    const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
    {
        ADDR_SW_64KB_Z_X,
        ADDR_SW_64KB_S_X,
        ADDR_SW_64KB_D_X,
        ADDR_SW_64KB_R_X,
        m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
    };

    UINT_32 maxBaseAlignDcc3D = 0;

    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
    {
        for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
        {
            const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
                                                         ADDR_RSRC_TEX_3D,
                                                         ValidSwizzleModeForDcc3D[swIdx],
                                                         bppLog2,
                                                         0,
                                                         TRUE,
                                                         &metaBlk);

            maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
        }
    }

    return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetMetaElementSizeLog2
*
*   @brief
*       Gets meta data element size log2
*   @return
*       Meta data element size log2
************************************************************************************************************************
*/
INT_32 Gfx10Lib::GetMetaElementSizeLog2(
    Gfx10DataType dataType) ///< Data surface type
{
    INT_32 elemSizeLog2 = 0;

    if (dataType == Gfx10DataColor)
    {
        elemSizeLog2 = 0;
    }
    else if (dataType == Gfx10DataDepthStencil)
    {
        elemSizeLog2 = 2;
    }
    else
    {
        ADDR_ASSERT(dataType == Gfx10DataFmask);
        elemSizeLog2 = -1;
    }

    return elemSizeLog2;
}

/**
************************************************************************************************************************
*   Gfx10Lib::GetMetaCacheSizeLog2
*
*   @brief
*       Gets meta data cache line size log2
*   @return
*       Meta data cache line size log2
************************************************************************************************************************
*/
INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
    Gfx10DataType dataType) ///< Data surface type
{
    INT_32 cacheSizeLog2 = 0;

    if (dataType == Gfx10DataColor)
    {
        cacheSizeLog2 = 6;
    }
    else if (dataType == Gfx10DataDepthStencil)
    {
        cacheSizeLog2 = 8;
    }
    else
    {
        ADDR_ASSERT(dataType == Gfx10DataFmask);
        cacheSizeLog2 = 8;
    }
    return cacheSizeLog2;
}

/**
************************************************************************************************************************
*   Gfx10Lib::HwlComputeSurfaceInfoLinear
*
*   @brief
*       Internal function to calculate alignment for linear surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        const UINT_32 elementBytes = pIn->bpp >> 3;
        const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
        const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
        UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
        UINT_32       actualHeight = pIn->height;
        UINT_64       sliceSize    = 0;

        if (pIn->numMipLevels > 1)
        {
            for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
            {
                UINT_32 mipWidth, mipHeight;

                GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);

                const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[i].pitch            = mipActualWidth;
                    pOut->pMipInfo[i].height           = mipHeight;
                    pOut->pMipInfo[i].depth            = mipDepth;
                    pOut->pMipInfo[i].offset           = sliceSize;
                    pOut->pMipInfo[i].mipTailOffset    = 0;
                    pOut->pMipInfo[i].macroBlockOffset = sliceSize;
                }

                sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
            }
        }
        else
        {
            returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);

            if (returnCode == ADDR_OK)
            {
                sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[0].pitch            = pitch;
                    pOut->pMipInfo[0].height           = actualHeight;
                    pOut->pMipInfo[0].depth            = mipDepth;
                    pOut->pMipInfo[0].offset           = 0;
                    pOut->pMipInfo[0].mipTailOffset    = 0;
                    pOut->pMipInfo[0].macroBlockOffset = 0;
                }
            }
        }

        if (returnCode == ADDR_OK)
        {
            pOut->pitch          = pitch;
            pOut->height         = actualHeight;
            pOut->numSlices      = pIn->numSlices;
            pOut->sliceSize      = sliceSize;
            pOut->surfSize       = sliceSize * pOut->numSlices;
            pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
            pOut->blockWidth     = pitchAlign;
            pOut->blockHeight    = 1;
            pOut->blockSlices    = 1;

            // Following members are useless on GFX10
            pOut->mipChainPitch  = 0;
            pOut->mipChainHeight = 0;
            pOut->mipChainSlice  = 0;
            pOut->epitchIsHeight = FALSE;

            // Post calculation validate
            ADDR_ASSERT(pOut->sliceSize > 0);
        }
    }

    return returnCode;
}

} // V2
} // Addr
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx10addrlib.h
* @brief Contains the Gfx10Lib class definition.
************************************************************************************************************************
*/

#ifndef __GFX10_ADDR_LIB_H__
#define __GFX10_ADDR_LIB_H__

#include "addrlib2.h"
#include "coord.h"
#include "gfx10SwizzlePattern.h"

namespace rocr {
namespace Addr
{
namespace V2
{

/**
************************************************************************************************************************
* @brief GFX10 specific settings structure.
************************************************************************************************************************
*/
struct Gfx10ChipSettings
{
    struct
    {
        UINT_32 reserved1           : 32;

        // Misc configuration bits
        UINT_32 isDcn20             : 1; // If using DCN2.0
        UINT_32 supportRbPlus       : 1;
        UINT_32 dsMipmapHtileFix    : 1;
        UINT_32 dccUnsup3DSwDis     : 1;
        UINT_32                     : 4;
        UINT_32 reserved2           : 24;
    };
};

/**
************************************************************************************************************************
* @brief GFX10 data surface type.
************************************************************************************************************************
*/
enum Gfx10DataType
{
    Gfx10DataColor,
    Gfx10DataDepthStencil,
    Gfx10DataFmask
};

const UINT_32 Gfx10LinearSwModeMask = (1u << ADDR_SW_LINEAR);

const UINT_32 Gfx10Blk256BSwModeMask = (1u << ADDR_SW_256B_S) |
                                       (1u << ADDR_SW_256B_D);

const UINT_32 Gfx10Blk4KBSwModeMask = (1u << ADDR_SW_4KB_S)   |
                                      (1u << ADDR_SW_4KB_D)   |
                                      (1u << ADDR_SW_4KB_S_X) |
                                      (1u << ADDR_SW_4KB_D_X);

const UINT_32 Gfx10Blk64KBSwModeMask = (1u << ADDR_SW_64KB_S)   |
                                       (1u << ADDR_SW_64KB_D)   |
                                       (1u << ADDR_SW_64KB_S_T) |
                                       (1u << ADDR_SW_64KB_D_T) |
                                       (1u << ADDR_SW_64KB_Z_X) |
                                       (1u << ADDR_SW_64KB_S_X) |
                                       (1u << ADDR_SW_64KB_D_X) |
                                       (1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx10BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z_X) |
                                      (1u << ADDR_SW_VAR_R_X);

const UINT_32 Gfx10ZSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
                                 (1u << ADDR_SW_VAR_Z_X);

const UINT_32 Gfx10StandardSwModeMask = (1u << ADDR_SW_256B_S)   |
                                        (1u << ADDR_SW_4KB_S)    |
                                        (1u << ADDR_SW_64KB_S)   |
                                        (1u << ADDR_SW_64KB_S_T) |
                                        (1u << ADDR_SW_4KB_S_X)  |
                                        (1u << ADDR_SW_64KB_S_X);

const UINT_32 Gfx10DisplaySwModeMask = (1u << ADDR_SW_256B_D)   |
                                       (1u << ADDR_SW_4KB_D)    |
                                       (1u << ADDR_SW_64KB_D)   |
                                       (1u << ADDR_SW_64KB_D_T) |
                                       (1u << ADDR_SW_4KB_D_X)  |
                                       (1u << ADDR_SW_64KB_D_X);

const UINT_32 Gfx10RenderSwModeMask = (1u << ADDR_SW_64KB_R_X) |
                                      (1u << ADDR_SW_VAR_R_X);

const UINT_32 Gfx10XSwModeMask = (1u << ADDR_SW_4KB_S_X)  |
                                 (1u << ADDR_SW_4KB_D_X)  |
                                 (1u << ADDR_SW_64KB_Z_X) |
                                 (1u << ADDR_SW_64KB_S_X) |
                                 (1u << ADDR_SW_64KB_D_X) |
                                 (1u << ADDR_SW_64KB_R_X) |
                                 Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10TSwModeMask = (1u << ADDR_SW_64KB_S_T) |
                                 (1u << ADDR_SW_64KB_D_T);

const UINT_32 Gfx10XorSwModeMask = Gfx10XSwModeMask |
                                   Gfx10TSwModeMask;

const UINT_32 Gfx10Rsrc1dSwModeMask = Gfx10LinearSwModeMask |
                                      Gfx10RenderSwModeMask |
                                      Gfx10ZSwModeMask;

const UINT_32 Gfx10Rsrc2dSwModeMask = Gfx10LinearSwModeMask  |
                                      Gfx10Blk256BSwModeMask |
                                      Gfx10Blk4KBSwModeMask  |
                                      Gfx10Blk64KBSwModeMask |
                                      Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR)   |
                                      (1u << ADDR_SW_4KB_S)    |
                                      (1u << ADDR_SW_64KB_S)   |
                                      (1u << ADDR_SW_64KB_S_T) |
                                      (1u << ADDR_SW_4KB_S_X)  |
                                      (1u << ADDR_SW_64KB_Z_X) |
                                      (1u << ADDR_SW_64KB_S_X) |
                                      (1u << ADDR_SW_64KB_D_X) |
                                      (1u << ADDR_SW_64KB_R_X) |
                                      Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10Rsrc2dPrtSwModeMask = (Gfx10Blk4KBSwModeMask | Gfx10Blk64KBSwModeMask) & ~Gfx10XSwModeMask;

const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10DisplaySwModeMask;

const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
                                              (1u << ADDR_SW_64KB_R_X);


const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask |
                                          Gfx10BlkVarSwModeMask;

const UINT_32 Gfx10Rsrc3dViewAs2dSwModeMask = Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask;

const UINT_32 Gfx10Rsrc3dThickSwModeMask = Gfx10Rsrc3dSwModeMask & ~(Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask);

const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk4KBSwModeMask;

const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask;

const UINT_32 Gfx10MsaaSwModeMask = (Gfx10ZSwModeMask       |
                                     Gfx10RenderSwModeMask)
                                    ;

const UINT_32 Dcn20NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR)   |
                                        (1u << ADDR_SW_4KB_S)    |
                                        (1u << ADDR_SW_64KB_S)   |
                                        (1u << ADDR_SW_64KB_S_T) |
                                        (1u << ADDR_SW_4KB_S_X)  |
                                        (1u << ADDR_SW_64KB_S_X) |
                                        (1u << ADDR_SW_64KB_R_X);

const UINT_32 Dcn20Bpp64SwModeMask = (1u << ADDR_SW_4KB_D)    |
                                     (1u << ADDR_SW_64KB_D)   |
                                     (1u << ADDR_SW_64KB_D_T) |
                                     (1u << ADDR_SW_4KB_D_X)  |
                                     (1u << ADDR_SW_64KB_D_X) |
                                     Dcn20NonBpp64SwModeMask;

const UINT_32 Dcn21NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR)   |
                                        (1u << ADDR_SW_64KB_S)   |
                                        (1u << ADDR_SW_64KB_S_T) |
                                        (1u << ADDR_SW_64KB_S_X) |
                                        (1u << ADDR_SW_64KB_R_X);

const UINT_32 Dcn21Bpp64SwModeMask = (1u << ADDR_SW_64KB_D)   |
                                     (1u << ADDR_SW_64KB_D_T) |
                                     (1u << ADDR_SW_64KB_D_X) |
                                     Dcn21NonBpp64SwModeMask;

/**
************************************************************************************************************************
* @brief This class is the GFX10 specific address library
*        function set.
************************************************************************************************************************
*/
class Gfx10Lib : public Lib
{
public:
    /// Creates Gfx10Lib object
    static Addr::Lib* CreateObj(const Client* pClient)
    {
        VOID* pMem = Object::ClientAlloc(sizeof(Gfx10Lib), pClient);
        return (pMem != NULL) ? new (pMem) Gfx10Lib(pClient) : NULL;
    }

protected:
    Gfx10Lib(const Client* pClient);
    virtual ~Gfx10Lib();

    virtual BOOL_32 HwlIsStandardSwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isStd;
    }

    virtual BOOL_32 HwlIsDisplaySwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isDisp;
    }

    virtual BOOL_32 HwlIsThin(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return ((IsTex1d(resourceType)  == TRUE) ||
                (IsTex2d(resourceType)  == TRUE) ||
                ((IsTex3d(resourceType) == TRUE)                  &&
                 (m_swizzleModeTable[swizzleMode].isStd  == FALSE) &&
                 (m_swizzleModeTable[swizzleMode].isDisp == FALSE)));
    }

    virtual BOOL_32 HwlIsThick(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return ((IsTex3d(resourceType) == TRUE) &&
                (m_swizzleModeTable[swizzleMode].isStd || m_swizzleModeTable[swizzleMode].isDisp));
    }

    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
        const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlSupportComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn);

    virtual VOID HwlComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual UINT_32 HwlGetEquationIndex(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
    {
        *ppEquationTable = m_equationTable;

        return m_numEquations;
    }

    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView(
        const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
        ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    virtual UINT_32 HwlComputeMaxBaseAlignments() const;

    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;

    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);

    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

private:
    // Initialize equation table
    VOID InitEquationTable();

    ADDR_E_RETURNCODE ComputeSurfaceInfoMacroTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceInfoMicroTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordMacroTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordMicroTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    UINT_32 ComputeOffsetFromSwizzlePattern(
        const UINT_64* pPattern,
        UINT_32        numBits,
        UINT_32        x,
        UINT_32        y,
        UINT_32        z,
        UINT_32        s) const;

    UINT_32 ComputeOffsetFromEquation(
        const ADDR_EQUATION* pEq,
        UINT_32              x,
        UINT_32              y,
        UINT_32              z) const;

    ADDR_E_RETURNCODE ComputeStereoInfo(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        UINT_32*                                pAlignY,
        UINT_32*                                pRightXor) const;

    static void GetMipSize(
        UINT_32  mip0Width,
        UINT_32  mip0Height,
        UINT_32  mip0Depth,
        UINT_32  mipId,
        UINT_32* pMipWidth,
        UINT_32* pMipHeight,
        UINT_32* pMipDepth = NULL)
    {
        *pMipWidth  = ShiftCeil(Max(mip0Width, 1u),  mipId);
        *pMipHeight = ShiftCeil(Max(mip0Height, 1u), mipId);

        if (pMipDepth != NULL)
        {
            *pMipDepth = ShiftCeil(Max(mip0Depth, 1u),  mipId);
        }
    }

    const ADDR_SW_PATINFO* GetSwizzlePatternInfo(
        AddrSwizzleMode  swizzleMode,
        AddrResourceType resourceType,
        UINT_32          log2Elem,
        UINT_32          numFrag) const;

    /**
     * Will use the indices, "nibbles", to build an index equation inside pSwizzle
     *
     * @param pPatInfo Pointer to a patInfo. Contains indices mapping to the 2D nibble arrays which will be used to build an index equation.
     * @param pSwizzle Array to write the index equation to.
     */
    VOID GetSwizzlePatternFromPatternInfo(
        const ADDR_SW_PATINFO* pPatInfo,
        ADDR_BIT_SETTING       (&pSwizzle)[20]) const
    {
        memcpy(pSwizzle,
               GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],
               sizeof(GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx]));

        memcpy(&pSwizzle[8],
               GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx],
               sizeof(GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx]));

        memcpy(&pSwizzle[12],
               GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx],
               sizeof(GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx]));

        memcpy(&pSwizzle[16],
               GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx],
               sizeof(GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx]));
    }

    VOID ConvertSwizzlePatternToEquation(
        UINT_32                elemLog2,
        AddrResourceType       rsrcType,
        AddrSwizzleMode        swMode,
        const ADDR_SW_PATINFO* pPatInfo,
        ADDR_EQUATION*         pEquation) const;

    static INT_32 GetMetaElementSizeLog2(Gfx10DataType dataType);

    static INT_32 GetMetaCacheSizeLog2(Gfx10DataType dataType);

    void GetBlk256SizeLog2(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2,
        Dim3d*           pBlock) const;

    void GetCompressedBlockSizeLog2(
        Gfx10DataType    dataType,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2,
        Dim3d*           pBlock) const;

    INT_32 GetMetaOverlapLog2(
        Gfx10DataType    dataType,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2) const;

    INT_32 Get3DMetaOverlapLog2(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2) const;

    UINT_32 GetMetaBlkSize(
        Gfx10DataType    dataType,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2,
        BOOL_32          pipeAlign,
        Dim3d*           pBlock) const;

    INT_32 GetPipeRotateAmount(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const;

    INT_32 GetEffectiveNumPipes() const
    {
        return ((m_settings.supportRbPlus == FALSE) ||
                ((m_numSaLog2 + 1) >= m_pipesLog2)) ? m_pipesLog2 : m_numSaLog2 + 1;
    }

    BOOL_32 IsRbAligned(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        const BOOL_32 isRtopt   = IsRtOptSwizzle(swizzleMode);
        const BOOL_32 isZ       = IsZOrderSwizzle(swizzleMode);
        const BOOL_32 isDisplay = IsDisplaySwizzle(swizzleMode);

        return (IsTex2d(resourceType) && (isRtopt || isZ)) ||
               (IsTex3d(resourceType) && isDisplay);

    }

    UINT_32 GetValidDisplaySwizzleModes(UINT_32 bpp) const;

    BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const;

    static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
    {
        ADDR2_BLOCK_SET allowedBlockSet = {};

        allowedBlockSet.micro  = (allowedSwModeSet.value & Gfx10Blk256BSwModeMask) ? TRUE : FALSE;
        allowedBlockSet.linear = (allowedSwModeSet.value & Gfx10LinearSwModeMask)  ? TRUE : FALSE;
        allowedBlockSet.var    = (allowedSwModeSet.value & Gfx10BlkVarSwModeMask)  ? TRUE : FALSE;

        if (rsrcType == ADDR_RSRC_TEX_3D)
        {
            allowedBlockSet.macroThick4KB  = (allowedSwModeSet.value & Gfx10Rsrc3dThick4KBSwModeMask)  ? TRUE : FALSE;
            allowedBlockSet.macroThin64KB  = (allowedSwModeSet.value & Gfx10Rsrc3dThin64KBSwModeMask)  ? TRUE : FALSE;
            allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx10Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
        }
        else
        {
            allowedBlockSet.macroThin4KB  = (allowedSwModeSet.value & Gfx10Blk4KBSwModeMask)  ? TRUE : FALSE;
            allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx10Blk64KBSwModeMask) ? TRUE : FALSE;
        }

        return allowedBlockSet;
    }

    static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
    {
        ADDR2_SWTYPE_SET allowedSwSet = {};

        allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx10ZSwModeMask)        ? TRUE : FALSE;
        allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx10StandardSwModeMask) ? TRUE : FALSE;
        allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx10DisplaySwModeMask)  ? TRUE : FALSE;
        allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx10RenderSwModeMask)   ? TRUE : FALSE;

        return allowedSwSet;
    }

    BOOL_32 IsInMipTail(
        Dim3d   mipTailDim,
        UINT_32 maxNumMipsInTail,
        UINT_32 mipWidth,
        UINT_32 mipHeight,
        UINT_32 numMipsToTheEnd) const
    {
        BOOL_32 inTail = ((mipWidth <= mipTailDim.w) &&
                          (mipHeight <= mipTailDim.h) &&
                          (numMipsToTheEnd <= maxNumMipsInTail));

        return inTail;
    }

    UINT_32 GetBankXorBits(UINT_32 blockBits) const
    {
        return (blockBits > m_pipeInterleaveLog2 + m_pipesLog2 + ColumnBits) ?
               Min(blockBits - m_pipeInterleaveLog2 - m_pipesLog2 - ColumnBits, BankBits) : 0;
    }

    BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
    BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    static const UINT_32 ColumnBits       = 2;
    static const UINT_32 BankBits         = 4;
    static const UINT_32 UnalignedDccType = 3;

    static const Dim3d Block256_3d[MaxNumOfBpp];
    static const Dim3d Block64K_Log2_3d[MaxNumOfBpp];
    static const Dim3d Block4K_Log2_3d[MaxNumOfBpp];

    static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];

    // Number of packers log2
    UINT_32 m_numPkrLog2;
    // Number of shader array log2
    UINT_32 m_numSaLog2;

    Gfx10ChipSettings m_settings;

    UINT_32 m_colorBaseIndex;
    UINT_32 m_xmaskBaseIndex;
    UINT_32 m_htileBaseIndex;
    UINT_32 m_dccBaseIndex;
};

} // V2
} // Addr
} // namespace rocr

#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11SwizzlePattern.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx11SwizzlePattern.h
* @brief swizzle pattern for gfx11.
************************************************************************************************************************
*/

#ifndef __GFX11_SWIZZLE_PATTERN_H__
#define __GFX11_SWIZZLE_PATTERN_H__

namespace rocr {
namespace Addr
{
namespace V2
{
const ADDR_SW_PATINFO GFX11_SW_256_D_PATINFO[] =
{
    {   1,    0,    0,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256_D
    {   1,    0,    0,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256_D
    {   1,    1,    0,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256_D
    {   1,    2,    0,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256_D
    {   1,    3,    0,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256_D
    {   1,    4,    0,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256_D
};

const ADDR_SW_PATINFO GFX11_SW_4K_D_PATINFO[] =
{
    {   1,    0,    1,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_D
    {   1,    0,    1,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_D
    {   1,    1,    2,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_D
    {   1,    2,    3,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_D
    {   1,    3,    4,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_D
    {   1,    4,    5,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_D
};

const ADDR_SW_PATINFO GFX11_SW_4K_D_X_PATINFO[] =
{
    {   1,    0,    1,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_D_X
    {   1,    1,    2,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_D_X
    {   1,    2,    3,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_D_X
    {   1,    3,    4,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_D_X
    {   1,    4,    5,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,    6,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,    7,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,    8,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,    9,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   10,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   11,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   12,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   13,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   14,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   15,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   16,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   17,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   18,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   19,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   20,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   21,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   22,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   23,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   24,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   25,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   26,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   27,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   28,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   29,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   30,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   31,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   32,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   33,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   34,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   35,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   36,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   37,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   38,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   39,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   40,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   41,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   42,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   43,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   44,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   45,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   46,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   47,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   48,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   49,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   50,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   51,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   52,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   53,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   54,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   55,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   56,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   57,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   58,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   59,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   60,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   61,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   62,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   63,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   64,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   65,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   51,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   52,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   53,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   54,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   55,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_D_X
    {   3,    0,   56,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_D_X
    {   3,    1,   57,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_D_X
    {   3,    2,   58,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_D_X
    {   3,    3,   59,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_D_X
    {   3,    4,   60,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_D_X
};

const ADDR_SW_PATINFO GFX11_SW_64K_D_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D
    {   1,    0,    1,    1,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D
    {   1,    1,    2,    2,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D
    {   1,    2,    3,    3,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D
    {   1,    3,    4,    4,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D
    {   1,    4,    5,    5,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D
};

const ADDR_SW_PATINFO GFX11_SW_64K_D_X_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D_X
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D_X
    {   1,    2,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D_X
    {   1,    3,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D_X
    {   1,    4,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,    6,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,    7,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,    8,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,    9,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   10,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   11,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   12,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   13,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   14,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   15,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   16,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   17,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   18,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   19,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   20,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   21,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   22,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   23,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   24,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   25,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   26,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   27,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   28,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   29,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   30,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   31,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   32,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   33,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   34,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   35,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   36,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   37,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   38,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   39,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   40,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   41,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   42,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   43,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   44,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   45,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   66,    6,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   67,    7,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   68,    8,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   69,    9,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   70,   10,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   51,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   52,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   53,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   54,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   55,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   71,    6,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   72,    7,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   73,    8,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   74,    9,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   75,   10,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   76,   11,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   77,   12,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   78,   13,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   79,   14,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   80,   15,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   81,    6,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   82,    7,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   83,    8,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   84,    9,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   85,   10,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D_X
    {   3,    0,   86,   11,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D_X
    {   3,    1,   87,   12,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D_X
    {   3,    2,   88,   13,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D_X
    {   3,    3,   89,   14,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D_X
    {   3,    4,   90,   15,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D_X
};

const ADDR_SW_PATINFO GFX11_SW_64K_D_T_PATINFO[] =
{
    {   1,    0,    1,    1,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D_T
    {   1,    1,    2,    2,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D_T
    {   1,    2,    3,    3,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D_T
    {   1,    3,    4,    4,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D_T
    {   1,    4,    5,    5,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,   91,    1,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,   92,    2,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,   93,    3,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,   94,    4,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,   95,    5,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,   96,    1,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,   97,    2,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,   98,    3,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,   99,    4,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  100,    5,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  101,    1,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  102,    2,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  103,    3,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  104,    4,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  105,    5,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,   96,    1,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,   97,    2,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,   98,    3,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,   99,    4,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  100,    5,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  101,    1,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  102,    2,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  103,    3,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  104,    4,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  105,    5,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  106,    1,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  107,    2,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  108,    3,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  109,    4,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  110,    5,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  101,    1,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  102,    2,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  103,    3,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  104,    4,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  105,    5,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  106,    1,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  107,    2,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  108,    3,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  109,    4,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  110,    5,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  111,   16,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  112,   17,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  113,   18,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  114,   19,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  115,   20,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  106,    1,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  107,    2,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  108,    3,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  109,    4,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  110,    5,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  111,   16,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  112,   17,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  113,   18,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  114,   19,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  115,   20,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,    1,   21,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,    2,   22,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,    3,   23,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,    4,   24,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,    5,   25,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,  111,   16,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,  112,   17,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,  113,   18,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,  114,   19,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,  115,   20,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D_T
    {   2,    0,    1,   21,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D_T
    {   2,    1,    2,   22,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D_T
    {   2,    2,    3,   23,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D_T
    {   2,    3,    4,   24,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D_T
    {   2,    4,    5,   25,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D_T
};

const ADDR_SW_PATINFO GFX11_SW_256K_D_X_PATINFO[] =
{
    {   1,    0,    1,    1,    1, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256K_D_X
    {   1,    1,    2,    2,    2, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256K_D_X
    {   1,    2,    3,    3,    3, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256K_D_X
    {   1,    3,    4,    4,    4, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256K_D_X
    {   1,    4,    5,    5,    5, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,    6,    1,    1, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,    7,    2,    2, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,    8,    3,    3, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,    9,    4,    4, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   10,    5,    5, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   11,    1,    1, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   12,    2,    2, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   13,    3,    3, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   14,    4,    4, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   15,    5,    5, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   16,    1,    1, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   17,    2,    2, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   18,    3,    3, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   19,    4,    4, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   20,    5,    5, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   21,    1,    1, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   22,    2,    2, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   23,    3,    3, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   24,    4,    4, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   25,    5,    5, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   26,    1,    1, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   27,    2,    2, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   28,    3,    3, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   29,    4,    4, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   30,    5,    5, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   31,    1,    1, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   32,    2,    2, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   33,    3,    3, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   34,    4,    4, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   35,    5,    5, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   36,    1,    1, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   37,    2,    2, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   38,    3,    3, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   39,    4,    4, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   40,    5,    5, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   41,    1,    1, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   42,    2,    2, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   43,    3,    3, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   44,    4,    4, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   45,    5,    5, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   66,    6,    1, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   67,    7,    2, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   68,    8,    3, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   69,    9,    4, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   70,   10,    5, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   51,    1,    1, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   52,    2,    2, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   53,    3,    3, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   54,    4,    4, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   55,    5,    5, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   71,    6,    1, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   72,    7,    2, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   73,    8,    3, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   74,    9,    4, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   75,   10,    5, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   76,   11,    1, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   77,   12,    2, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   78,   13,    3, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   79,   14,    4, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   80,   15,    5, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   81,    6,    1, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   82,    7,    2, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   83,    8,    3, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   84,    9,    4, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   85,   10,    5, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256K_D_X
    {   3,    0,   86,   11,    1, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256K_D_X
    {   3,    1,   87,   12,    2, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256K_D_X
    {   3,    2,   88,   13,    3, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256K_D_X
    {   3,    3,   89,   14,    4, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256K_D_X
    {   3,    4,   90,   15,    5, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256K_D_X
};

const ADDR_SW_PATINFO GFX11_SW_64K_ZR_X_1xaa_PATINFO[] =
{
    {   2,    0,  116,   26,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   2,    1,  117,   22,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   2,    2,  118,   27,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   2,    3,  119,   28,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   2,    4,  120,   29,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  121,   30,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  122,   31,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  123,   32,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  124,   33,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  125,   34,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  126,   35,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  127,   36,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  128,   37,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  129,   38,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  130,   39,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  131,   40,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  132,   41,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  133,   42,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  134,   43,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  135,   44,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  136,   45,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  137,   46,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  138,   47,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  139,   48,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  140,   49,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  141,   40,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  142,   50,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  143,   51,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  144,   52,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  145,   53,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  146,   54,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  146,   55,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  146,   56,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  146,   57,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  146,   58,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  147,   59,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  148,   60,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  149,   61,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  150,   62,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  151,   63,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  152,   54,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  152,   64,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  152,   56,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  153,   57,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  153,   65,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  152,   66,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  152,   67,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  152,   68,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  153,   69,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  153,   70,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  154,   71,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  154,   72,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  154,   73,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  155,   74,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  156,   75,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  154,   76,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  154,   77,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  154,   78,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  155,   79,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  156,   80,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  154,   81,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  154,   82,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  154,   83,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  155,   84,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  156,   85,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  157,   86,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  157,   87,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  157,   88,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  158,   89,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  159,   90,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    0,  157,   91,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    1,  157,   92,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    2,  157,   93,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    3,  158,   94,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 1xaa
    {   3,    4,  159,   95,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 1xaa
};

const ADDR_SW_PATINFO GFX11_SW_64K_ZR_X_2xaa_PATINFO[] =
{
    {   2,    5,  160,   96,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   2,    6,  118,   27,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   2,    7,  161,   97,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   2,    8,  119,   98,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   2,    9,  162,   99,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  163,  100,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  123,   32,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  123,  101,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  164,  102,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  125,  103,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  127,  104,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  128,   37,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  128,  105,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  165,  106,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  130,  107,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  132,  108,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  133,   51,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  133,  109,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  135,  110,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  135,  111,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  137,  112,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  138,   47,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  138,  113,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  139,  114,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  140,  115,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  142,  108,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  143,   51,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  143,  109,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  144,  116,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  145,  111,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  146,  117,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  146,  118,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  146,  119,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  166,  120,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  167,  121,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  148,  122,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  149,   61,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  149,  123,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  151,  124,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  168,  125,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  152,   55,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  152,   56,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  152,  126,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  153,  127,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  169,  127,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  152,   77,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  152,   78,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  152,  128,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  153,   80,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  169,   80,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  154,   72,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  154,   73,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  154,  129,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  156,  130,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  170,  130,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  154,   77,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  154,   78,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  154,  128,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  156,  131,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  170,  131,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  154,  132,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  154,   83,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  154,  133,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  156,  134,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  170,  134,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  157,  135,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  157,   88,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  157,  136,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  159,   90,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  171,   90,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    5,  157,  137,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    6,  157,   93,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    7,  157,  138,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    8,  159,   95,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 2xaa
    {   3,    9,  171,   95,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 2xaa
};

const ADDR_SW_PATINFO GFX11_SW_64K_ZR_X_4xaa_PATINFO[] =
{
    {   2,   10,  118,   27,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   2,   11,  118,  139,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   2,   12,  118,  140,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   2,   13,  119,  141,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   2,   14,  120,  142,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  123,   32,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  172,  143,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  123,  144,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  124,  145,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  125,  146,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  128,   37,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  128,  147,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  128,  148,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  129,  149,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  130,  150,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  133,   42,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  133,  151,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  133,  152,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  134,  153,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  173,  154,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  138,   47,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  138,  155,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  138,  156,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  174,  157,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  175,  158,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  143,   51,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  143,  159,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  143,  160,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  145,  161,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  176,  162,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  146,   56,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  146,  163,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  146,  164,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  167,  165,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  177,  166,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  149,   61,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  149,  167,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  149,  168,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  178,  169,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  179,  170,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  152,   56,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  152,  163,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  152,  171,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  180,  171,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  181,  171,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  152,   68,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  152,  172,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  152,  173,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  180,  173,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  181,  173,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  154,   73,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  154,  174,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  154,  130,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  182,  130,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  183,  130,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  154,   78,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  154,  172,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  154,  131,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  182,  131,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  183,  131,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  154,   83,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  154,  133,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  154,  134,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  182,  134,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  183,  134,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  157,   88,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  157,  175,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  157,   90,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  184,   90,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  185,   90,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   10,  157,   93,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   11,  157,  176,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   12,  157,   95,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   13,  184,   95,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 4xaa
    {   3,   14,  185,   95,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 4xaa
};

const ADDR_SW_PATINFO GFX11_SW_64K_ZR_X_8xaa_PATINFO[] =
{
    {   2,   15,  161,   97,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   2,   16,  118,  140,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  186,  177,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  187,  178,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  162,  179,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  123,  101,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  123,  144,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  188,  180,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  189,  181,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  190,  182,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  128,  105,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  128,  148,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  128,  183,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  165,  184,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  191,  185,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  133,  109,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  133,  186,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  133,  187,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  192,  188,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  193,  189,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  138,  113,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  138,  156,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  138,  190,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  194,  191,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  195,  192,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  143,  109,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  143,  160,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  143,  187,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  196,  193,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  197,  194,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  146,  126,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  146,  164,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  198,  195,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  199,  196,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  200,  197,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  149,  123,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  149,  168,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  149,  198,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  179,  170,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  201,  170,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  152,  126,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  152,  171,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  202,  199,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  181,  171,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  203,  171,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  152,  128,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  152,  173,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  202,  200,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  181,  173,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  203,  201,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  154,  129,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  154,  130,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  204,  202,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  183,  130,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  205,  130,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  154,  128,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  154,  131,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  206,  203,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  183,  131,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  205,  131,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  154,  133,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  154,  134,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  206,  204,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  183,  134,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  205,  134,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  157,  136,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  157,   90,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  207,  205,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  185,   90,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  208,   90,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   15,  157,  138,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   16,  157,   95,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   17,  171,   95,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   18,  185,   95,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_{Z,R}_X 8xaa
    {   3,   19,  208,   95,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_{Z,R}_X 8xaa
};

const ADDR_SW_PATINFO GFX11_SW_256K_ZR_X_1xaa_PATINFO[] =
{
    {   2,    0,  116,   26,    6, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   2,    1,  117,   22,    2, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   2,    2,  118,   27,    7, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   2,    3,  119,   28,    4, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   2,    4,  120,   29,    8, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  121,   30,    6, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  122,   31,    9, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  123,   32,    7, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  124,   33,   10, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  125,   34,    8, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  126,   35,    6, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  127,   36,    9, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  128,   37,    7, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  129,   38,   10, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  130,   39,    8, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  131,  206,   11, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  132,  207,   12, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  133,  208,   13, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  134,  209,   14, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  135,  210,   15, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  136,  211,   16, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  137,   35,   17, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  138,  212,   18, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  139,  213,   19, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  140,  214,   20, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  141,  206,   11, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  142,  215,   21, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  143,  216,   13, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  144,  217,   22, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  145,  218,   15, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  146,  219,   23, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  146,  220,   24, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  146,  221,   25, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  146,  222,   26, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  146,  223,   27, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  147,  224,   28, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  148,  225,   29, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  149,  226,   30, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  150,  227,   31, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  151,  228,   32, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  152,  219,   23, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  152,  229,   33, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  152,  221,   25, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  153,  222,   34, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  153,  230,   27, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  152,  231,   23, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  152,  232,   33, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  152,  233,   25, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  153,  234,   34, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  153,  235,   35, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  154,  236,   36, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  154,  237,   37, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  154,  238,   38, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  155,  239,   39, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  155,  240,   40, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  154,  241,   23, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  154,  242,   24, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  154,  243,   25, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  155,  244,   41, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  155,  245,   42, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  154,   81,   23, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  154,   82,   24, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  154,   83,   25, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  155,  246,   43, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  155,  247,   44, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  157,  248,   45, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  157,  249,   46, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  157,  250,   47, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  209,  251,   48, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  209,  252,   49, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    0,  157,   91,   23, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    1,  157,   92,   33, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    2,  157,   93,   25, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    3,  209,  253,   43, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 1xaa
    {   3,    4,  209,  254,   50, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 1xaa
};

const ADDR_SW_PATINFO GFX11_SW_256K_ZR_X_2xaa_PATINFO[] =
{
    {   2,    5,  160,   96,   51, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   2,    6,  118,   27,    7, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   2,    7,  210,  255,   52, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   2,    8,  120,   29,    8, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   2,    9,  211,  256,   53, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  163,  100,   51, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  123,   32,    7, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  212,  257,   52, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  125,   34,    8, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  213,  258,   53, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  127,  104,   51, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  128,   37,    7, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  129,  259,   52, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  130,   39,    8, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  214,  260,   53, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  132,  261,   54, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  133,  216,   13, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  134,  262,   55, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  135,  263,   15, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  215,  264,   56, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  137,  265,   16, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  138,  212,   18, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  139,  266,   18, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  140,  214,   20, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  216,  267,   20, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  142,  261,   54, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  143,  216,   13, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  144,  262,   55, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  145,  218,   15, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  217,  268,   56, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  146,  269,   57, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  146,  270,   25, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  146,  271,   41, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  146,  272,   58, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  146,  273,   59, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  148,  274,   60, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  149,  226,   30, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  218,  275,   61, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  151,  228,   32, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  219,  276,   62, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  152,  277,   57, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  152,  221,   25, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  152,  278,   41, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  153,  230,   27, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  153,  279,   63, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  152,  280,   57, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  152,  243,   25, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  152,  281,   41, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  153,  282,   64, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  153,  283,   65, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  154,  284,   37, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  154,  238,   38, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  154,  239,   66, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  155,  240,   40, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  155,  273,   67, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  154,  280,   57, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  154,  243,   25, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  154,  281,   41, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  155,  245,   42, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  155,  285,   68, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  154,   82,   24, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  154,   83,   25, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  154,  286,   43, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  155,  247,   44, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  155,  287,   69, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  157,  288,   70, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  157,  250,   47, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  157,  289,   71, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  158,  290,   72, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  158,  291,   73, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    5,  157,   92,   24, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    6,  157,   93,   25, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    7,  157,  292,   43, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    8,  158,  293,   50, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 2xaa
    {   3,    9,  158,  294,   74, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 2xaa
};

const ADDR_SW_PATINFO GFX11_SW_256K_ZR_X_4xaa_PATINFO[] =
{
    {   2,   10,  118,   27,    7, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   2,   11,  119,   28,    4, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   2,   12,  120,   29,    8, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   2,   13,  220,  295,   75, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   2,   14,  221,  296,   76, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  123,   32,    7, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  124,   33,   10, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  125,   34,    8, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  222,  297,   77, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  223,  298,   76, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  128,   37,    7, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  129,   38,   10, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  130,   39,    8, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  224,  299,   77, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  225,  300,   76, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  133,  208,   13, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  134,  209,   14, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  135,  210,   15, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  215,  301,   78, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  226,  302,   79, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  138,  212,   18, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  139,  213,   19, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  140,  214,   20, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  216,  299,   80, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  227,  303,   81, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  143,  216,   13, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  144,  217,   22, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  145,  218,   15, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  217,  304,   82, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  228,  305,   83, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  146,  221,   25, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  146,  222,   26, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  146,  223,   27, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  146,  306,   84, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  146,  307,   85, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  149,  226,   30, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  218,  227,   86, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  168,  228,   87, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  219,  301,   62, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  229,  308,   88, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  152,  221,   25, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  152,  222,   34, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  152,  230,   27, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  153,  306,   84, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  153,  309,   89, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  152,  233,   25, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  152,  234,   34, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  152,  235,   35, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  153,  310,   90, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  153,  311,   91, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  154,  238,   38, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  154,  239,   66, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  154,  240,   92, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  156,  312,   93, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  156,  313,   94, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  154,  243,   25, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  154,  281,   41, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  154,  314,   42, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  156,  315,   95, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  156,  316,   96, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  154,   83,   25, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  154,  286,   43, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  154,  317,   44, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  156,  318,   97, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  156,  319,   68, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  157,  250,   47, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  157,  289,   71, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  157,  320,   98, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  159,  321,   99, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  159,  322,  100, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   10,  157,   93,   25, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   11,  157,  292,   43, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   12,  157,  323,   50, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   13,  159,  324,   74, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 4xaa
    {   3,   14,  159,  325,  101, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 4xaa
};

const ADDR_SW_PATINFO GFX11_SW_256K_ZR_X_8xaa_PATINFO[] =
{
    {   2,   15,  210,  255,   52, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   2,   16,  120,   29,    8, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   2,   17,  211,  256,   53, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   2,   18,  221,  296,   76, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   2,   19,  230,  326,  102, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  212,  257,   52, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  125,   34,    8, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  213,  258,   53, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  223,  298,   76, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  231,  327,  103, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  129,  259,   52, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  130,   39,    8, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  214,  260,   53, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  225,  300,   76, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  232,  328,  103, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  134,  262,   55, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  135,  263,   15, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  215,  264,   56, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  226,  302,  104, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  233,  329,  105, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  139,  266,   18, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  140,  214,   20, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  216,  267,   20, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  227,  303,   81, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  234,  330,  106, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  144,  262,   55, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  145,  218,   15, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  217,  268,   56, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  228,  305,   83, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  235,  331,  107, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  146,  271,   41, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  146,  272,   58, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  146,  273,   59, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  236,  332,  108, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  237,  333,  109, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  218,  275,   61, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  168,  228,   87, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  238,  276,  110, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  239,  308,  111, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  239,  334,  112, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  152,  278,   41, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  152,  230,   27, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  152,  279,   63, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  240,  309,   89, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  241,  335,  113, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  152,  281,   41, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  152,  282,   64, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  152,  283,   65, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  240,  311,   91, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  241,  336,   89, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  154,  239,   66, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  154,  240,   92, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  154,  273,   63, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  242,  313,   94, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  243,  337,  114, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  154,  281,   41, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  154,  314,   42, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  154,  338,   68, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  242,  316,   96, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  243,  339,  115, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  154,  286,   43, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  154,  317,   44, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  154,  340,   68, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  242,  341,  116, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  243,  342,  115, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  157,  289,   71, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  157,  320,   98, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  157,  343,  117, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  244,  322,  100, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  245,  344,  118, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   15,  157,  292,   43, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   16,  157,  323,   50, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   17,  157,  345,  119, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   18,  244,  325,  101, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256K_{Z,R}_X 8xaa
    {   3,   19,  245,  346,  120, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256K_{Z,R}_X 8xaa
};

const ADDR_SW_PATINFO GFX11_SW_4K_S3_PATINFO[] =
{
    {   1,   20,  246,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_S3
    {   1,   20,  246,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_S3
    {   1,   21,  247,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_S3
    {   1,   22,  248,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_S3
    {   1,   23,  249,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_S3
    {   1,   24,  250,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S3
};

const ADDR_SW_PATINFO GFX11_SW_4K_S3_X_PATINFO[] =
{
    {   1,   20,  246,    0,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S3_X
    {   1,   21,  247,    0,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S3_X
    {   1,   22,  248,    0,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_4K_S3_X
    {   1,   23,  249,    0,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_4K_S3_X
    {   1,   24,  250,    0,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  251,    0,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  252,    0,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  253,    0,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  254,    0,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  255,    0,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  256,    0,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  257,    0,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  258,    0,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  259,    0,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  260,    0,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  261,    0,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  262,    0,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  263,    0,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  264,    0,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  265,    0,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  256,    0,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  257,    0,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  258,    0,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  259,    0,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  260,    0,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  261,    0,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  262,    0,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  263,    0,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  264,    0,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  265,    0,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  261,    0,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  262,    0,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  263,    0,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  264,    0,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  265,    0,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_4K_S3_X
    {   3,   20,  266,    0,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_4K_S3_X
    {   3,   21,  267,    0,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_4K_S3_X
    {   3,   22,  268,    0,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_4K_S3_X
    {   3,   23,  269,    0,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_4K_S3_X
    {   3,   24,  270,    0,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S3_X
};

const ADDR_SW_PATINFO GFX11_SW_64K_S3_PATINFO[] =
{
    {   1,   20,  246,  347,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S3
    {   1,   20,  246,  347,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S3
    {   1,   21,  247,  348,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S3
    {   1,   22,  248,  349,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S3
    {   1,   23,  249,  350,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S3
    {   1,   24,  250,  351,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3
};

const ADDR_SW_PATINFO GFX11_SW_64K_S3_X_PATINFO[] =
{
    {   1,   20,  246,  347,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3_X
    {   1,   21,  247,  348,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3_X
    {   1,   22,  248,  349,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S3_X
    {   1,   23,  249,  350,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S3_X
    {   1,   24,  250,  351,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  251,  347,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  252,  348,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  253,  349,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  254,  350,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  255,  351,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  256,  347,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  257,  348,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  258,  349,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  259,  350,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  260,  351,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  261,  347,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  262,  348,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  263,  349,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  264,  350,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  265,  351,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  256,  347,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  257,  348,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  258,  349,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  259,  350,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  260,  351,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  261,  347,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  262,  348,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  263,  349,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  264,  350,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  265,  351,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  266,  347,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  267,  348,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  268,  349,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  269,  350,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  270,  351,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  261,  347,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  262,  348,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  263,  349,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  264,  350,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  265,  351,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  266,  347,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  267,  348,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  268,  349,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  269,  350,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  270,  351,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  271,  352,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  272,  353,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  273,  354,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  274,  355,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  275,  356,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  266,  347,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  267,  348,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  268,  349,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  269,  350,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  270,  351,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  271,  352,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  272,  353,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  273,  354,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  274,  355,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  275,  356,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  276,  357,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  277,  358,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  278,  359,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  279,  360,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  280,  361,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  271,  352,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  272,  353,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  273,  354,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  274,  355,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  275,  356,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S3_X
    {   3,   20,  276,  357,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S3_X
    {   3,   21,  277,  358,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S3_X
    {   3,   22,  278,  359,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S3_X
    {   3,   23,  279,  360,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S3_X
    {   3,   24,  280,  361,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3_X
};

const ADDR_SW_PATINFO GFX11_SW_64K_S3_T_PATINFO[] =
{
    {   1,   20,  246,  347,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3_T
    {   1,   21,  247,  348,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3_T
    {   1,   22,  248,  349,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_S3_T
    {   1,   23,  249,  350,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_S3_T
    {   1,   24,  250,  351,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  251,  347,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  252,  348,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  253,  349,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  254,  350,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  255,  351,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  256,  347,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  257,  348,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  258,  349,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  259,  350,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  260,  351,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  281,  347,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  282,  348,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  283,  349,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  284,  350,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  285,  351,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  256,  347,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  257,  348,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  258,  349,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  259,  350,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  260,  351,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  281,  347,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  282,  348,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  283,  349,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  284,  350,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  285,  351,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  286,  347,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  287,  348,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  288,  349,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  289,  350,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  290,  351,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  281,  347,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  282,  348,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  283,  349,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  284,  350,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  285,  351,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  286,  347,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  287,  348,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  288,  349,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  289,  350,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  290,  351,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  291,  352,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  292,  353,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  293,  354,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  294,  355,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  295,  356,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  286,  347,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  287,  348,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  288,  349,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  289,  350,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  290,  351,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  291,  352,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  292,  353,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  293,  354,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  294,  355,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  295,  356,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  246,  362,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  247,  363,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  248,  364,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  249,  365,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  250,  366,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  291,  352,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  292,  353,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  293,  354,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  294,  355,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  295,  356,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_S3_T
    {   3,   20,  246,  362,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_S3_T
    {   3,   21,  247,  363,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_S3_T
    {   3,   22,  248,  364,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_S3_T
    {   3,   23,  249,  365,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_S3_T
    {   3,   24,  250,  366,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3_T
};

const ADDR_SW_PATINFO GFX11_SW_256K_S3_X_PATINFO[] =
{
    {   1,   20,  246,  347,  121, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256K_S3_X
    {   1,   21,  247,  348,  121, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256K_S3_X
    {   1,   22,  248,  349,  122, } , // 1 pipes (1 PKRs) 4 bpe @ SW_256K_S3_X
    {   1,   23,  249,  350,  123, } , // 1 pipes (1 PKRs) 8 bpe @ SW_256K_S3_X
    {   1,   24,  250,  351,  123, } , // 1 pipes (1 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  251,  347,  121, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  252,  348,  121, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  253,  349,  122, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  254,  350,  123, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  255,  351,  123, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  256,  347,  121, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  257,  348,  121, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  258,  349,  122, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  259,  350,  123, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  260,  351,  123, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  261,  347,  121, } , // 8 pipes (2 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  262,  348,  121, } , // 8 pipes (2 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  263,  349,  122, } , // 8 pipes (2 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  264,  350,  123, } , // 8 pipes (2 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  265,  351,  123, } , // 8 pipes (2 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  256,  347,  121, } , // 4 pipes (4 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  257,  348,  121, } , // 4 pipes (4 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  258,  349,  122, } , // 4 pipes (4 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  259,  350,  123, } , // 4 pipes (4 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  260,  351,  123, } , // 4 pipes (4 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  261,  347,  121, } , // 8 pipes (4 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  262,  348,  121, } , // 8 pipes (4 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  263,  349,  122, } , // 8 pipes (4 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  264,  350,  123, } , // 8 pipes (4 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  265,  351,  123, } , // 8 pipes (4 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  266,  347,  121, } , // 16 pipes (4 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  267,  348,  121, } , // 16 pipes (4 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  268,  349,  122, } , // 16 pipes (4 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  269,  350,  123, } , // 16 pipes (4 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  270,  351,  123, } , // 16 pipes (4 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  261,  347,  121, } , // 8 pipes (8 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  262,  348,  121, } , // 8 pipes (8 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  263,  349,  122, } , // 8 pipes (8 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  264,  350,  123, } , // 8 pipes (8 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  265,  351,  123, } , // 8 pipes (8 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  266,  347,  121, } , // 16 pipes (8 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  267,  348,  121, } , // 16 pipes (8 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  268,  349,  122, } , // 16 pipes (8 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  269,  350,  123, } , // 16 pipes (8 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  270,  351,  123, } , // 16 pipes (8 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  271,  352,  121, } , // 32 pipes (8 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  272,  353,  121, } , // 32 pipes (8 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  273,  354,  122, } , // 32 pipes (8 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  274,  355,  123, } , // 32 pipes (8 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  275,  356,  123, } , // 32 pipes (8 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  266,  347,  121, } , // 16 pipes (16 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  267,  348,  121, } , // 16 pipes (16 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  268,  349,  122, } , // 16 pipes (16 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  269,  350,  123, } , // 16 pipes (16 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  270,  351,  123, } , // 16 pipes (16 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  271,  352,  121, } , // 32 pipes (16 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  272,  353,  121, } , // 32 pipes (16 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  273,  354,  122, } , // 32 pipes (16 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  274,  355,  123, } , // 32 pipes (16 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  275,  356,  123, } , // 32 pipes (16 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  276,  357,  121, } , // 64 pipes (16 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  277,  358,  121, } , // 64 pipes (16 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  278,  359,  122, } , // 64 pipes (16 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  279,  360,  123, } , // 64 pipes (16 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  280,  361,  123, } , // 64 pipes (16 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  271,  352,  121, } , // 32 pipes (32 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  272,  353,  121, } , // 32 pipes (32 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  273,  354,  122, } , // 32 pipes (32 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  274,  355,  123, } , // 32 pipes (32 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  275,  356,  123, } , // 32 pipes (32 PKRs) 16 bpe @ SW_256K_S3_X
    {   3,   20,  276,  357,  121, } , // 64 pipes (32 PKRs) 1 bpe @ SW_256K_S3_X
    {   3,   21,  277,  358,  121, } , // 64 pipes (32 PKRs) 2 bpe @ SW_256K_S3_X
    {   3,   22,  278,  359,  122, } , // 64 pipes (32 PKRs) 4 bpe @ SW_256K_S3_X
    {   3,   23,  279,  360,  123, } , // 64 pipes (32 PKRs) 8 bpe @ SW_256K_S3_X
    {   3,   24,  280,  361,  123, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256K_S3_X
};

const ADDR_SW_PATINFO GFX11_SW_64K_D3_X_PATINFO[] =
{
    {   1,   20,  246,  347,    0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D3_X
    {   1,   21,  247,  348,    0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D3_X
    {   1,   22,  248,  349,    0, } , // 1 pipes (1 PKRs) 4 bpe @ SW_64K_D3_X
    {   1,   23,  249,  350,    0, } , // 1 pipes (1 PKRs) 8 bpe @ SW_64K_D3_X
    {   1,   24,  250,  351,    0, } , // 1 pipes (1 PKRs) 16 bpe @ SW_64K_D3_X
    {   2,   20,  296,  367,    0, } , // 2 pipes (1-2 PKRs) 1 bpe @ SW_64K_D3_X
    {   2,   21,  296,  368,    0, } , // 2 pipes (1-2 PKRs) 2 bpe @ SW_64K_D3_X
    {   2,   22,  297,  369,    0, } , // 2 pipes (1-2 PKRs) 4 bpe @ SW_64K_D3_X
    {   2,   23,  298,  351,    0, } , // 2 pipes (1-2 PKRs) 8 bpe @ SW_64K_D3_X
    {   3,   24,  299,  351,    0, } , // 2 pipes (1-2 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  300,  370,    0, } , // 4 pipes (1-2 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  300,  371,    0, } , // 4 pipes (1-2 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  301,  372,    0, } , // 4 pipes (1-2 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  302,  373,    0, } , // 4 pipes (1-2 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  303,  373,    0, } , // 4 pipes (1-2 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  304,  370,    0, } , // 8 pipes (2 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  304,  371,    0, } , // 8 pipes (2 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  305,  372,    0, } , // 8 pipes (2 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  306,  373,    0, } , // 8 pipes (2 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  307,  373,    0, } , // 8 pipes (2 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  308,  374,    0, } , // 4 pipes (4 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  309,  375,    0, } , // 4 pipes (4 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  310,  376,    0, } , // 4 pipes (4 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  311,  377,    0, } , // 4 pipes (4 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  312,  378,    0, } , // 4 pipes (4 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  313,  379,    0, } , // 8 pipes (4 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  314,  371,    0, } , // 8 pipes (4 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  315,  372,    0, } , // 8 pipes (4 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  316,  373,    0, } , // 8 pipes (4 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  317,  373,    0, } , // 8 pipes (4 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  318,  380,    0, } , // 16 pipes (4 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  319,  371,    0, } , // 16 pipes (4 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  320,  372,    0, } , // 16 pipes (4 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  321,  373,    0, } , // 16 pipes (4 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  322,  373,    0, } , // 16 pipes (4 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  323,  381,    0, } , // 8 pipes (8 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  323,  382,    0, } , // 8 pipes (8 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  323,  383,    0, } , // 8 pipes (8 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  324,  384,    0, } , // 8 pipes (8 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  325,  384,    0, } , // 8 pipes (8 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  326,  379,    0, } , // 16 pipes (8 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  327,  371,    0, } , // 16 pipes (8 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  328,  372,    0, } , // 16 pipes (8 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  329,  373,    0, } , // 16 pipes (8 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  330,  373,    0, } , // 16 pipes (8 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  326,  385,    0, } , // 32 pipes (8 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  331,  386,    0, } , // 32 pipes (8 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  331,  387,    0, } , // 32 pipes (8 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  332,  388,    0, } , // 32 pipes (8 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  333,  388,    0, } , // 32 pipes (8 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  334,  389,    0, } , // 16 pipes (16 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  335,  390,    0, } , // 16 pipes (16 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  336,  391,    0, } , // 16 pipes (16 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  337,  392,    0, } , // 16 pipes (16 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  338,  392,    0, } , // 16 pipes (16 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  334,  393,    0, } , // 32 pipes (16 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  335,  394,    0, } , // 32 pipes (16 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  336,  395,    0, } , // 32 pipes (16 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  337,  396,    0, } , // 32 pipes (16 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  338,  396,    0, } , // 32 pipes (16 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  334,  397,    0, } , // 64 pipes (16 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  339,  398,    0, } , // 64 pipes (16 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  339,  399,    0, } , // 64 pipes (16 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  340,  400,    0, } , // 64 pipes (16 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  341,  400,    0, } , // 64 pipes (16 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  342,  401,    0, } , // 32 pipes (32 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  343,  402,    0, } , // 32 pipes (32 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  344,  403,    0, } , // 32 pipes (32 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  345,  404,    0, } , // 32 pipes (32 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  346,  404,    0, } , // 32 pipes (32 PKRs) 16 bpe @ SW_64K_D3_X
    {   3,   20,  342,  405,    0, } , // 64 pipes (32 PKRs) 1 bpe @ SW_64K_D3_X
    {   3,   21,  343,  406,    0, } , // 64 pipes (32 PKRs) 2 bpe @ SW_64K_D3_X
    {   3,   22,  344,  407,    0, } , // 64 pipes (32 PKRs) 4 bpe @ SW_64K_D3_X
    {   4,   23,  345,  408,    0, } , // 64 pipes (32 PKRs) 8 bpe @ SW_64K_D3_X
    {   4,   24,  346,  408,    0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D3_X
};


const UINT_64 GFX11_SW_PATTERN_NIBBLE01[][8] =
{
    {X0,            X1,            Y0,            X2,            Y1,            Y2,            X3,            Y3,            }, // 0
    {0,             X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            }, // 1
    {0,             0,             X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 2
    {0,             0,             0,             X0,            Y0,            X1,            X2,            Y1,            }, // 3
    {0,             0,             0,             0,             X0,            Y0,            X1,            Y1,            }, // 4
    {S0,            X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            }, // 5
    {0,             S0,            X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 6
    {0,             0,             S0,            X0,            Y0,            X1,            Y1,            X2,            }, // 7
    {0,             0,             0,             S0,            X0,            Y0,            X1,            Y1,            }, // 8
    {0,             0,             0,             0,             S0,            X0,            Y0,            X1,            }, // 9
    {S0,            S1,            X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 10
    {0,             S0,            S1,            X0,            Y0,            X1,            Y1,            X2,            }, // 11
    {0,             0,             S0,            S1,            X0,            Y0,            X1,            Y1,            }, // 12
    {0,             0,             0,             S0,            S1,            X0,            Y0,            X1,            }, // 13
    {0,             0,             0,             0,             S0,            S1,            X0,            Y0,            }, // 14
    {S0,            S1,            S2,            X0,            Y0,            X1,            Y1,            X2,            }, // 15
    {0,             S0,            S1,            S2,            X0,            Y0,            X1,            Y1,            }, // 16
    {0,             0,             S0,            S1,            S2,            X0,            Y0,            X1,            }, // 17
    {0,             0,             0,             S0,            S1,            S2,            X0,            Y0,            }, // 18
    {0,             0,             0,             0,             S0,            S1,            S2,            X0,            }, // 19
    {X0,            X1,            Z0,            Y0,            Y1,            Z1,            X2,            Z2,            }, // 20
    {0,             X0,            Z0,            Y0,            X1,            Z1,            Y1,            Z2,            }, // 21
    {0,             0,             X0,            Y0,            X1,            Z0,            Y1,            Z1,            }, // 22
    {0,             0,             0,             X0,            Y0,            Z0,            X1,            Z1,            }, // 23
    {0,             0,             0,             0,             X0,            Z0,            Y0,            Z1,            }, // 24
};

const UINT_64 GFX11_SW_PATTERN_NIBBLE2[][4] =
{
    {0,             0,             0,             0,             }, // 0
    {Y4,            X4,            Y5,            X5,            }, // 1
    {Y3,            X4,            Y4,            X5,            }, // 2
    {Y3,            X3,            Y4,            X4,            }, // 3
    {Y2,            X3,            Y3,            X4,            }, // 4
    {Y2,            X2,            Y3,            X3,            }, // 5
    {Z0^X4^Y4,      X4,            Y5,            X5,            }, // 6
    {Z0^Y3^X4,      X4,            Y4,            X5,            }, // 7
    {Z0^X3^Y3,      X3,            Y4,            X4,            }, // 8
    {Z0^Y2^X3,      X3,            Y3,            X4,            }, // 9
    {Z0^X2^Y2,      X2,            Y3,            X3,            }, // 10
    {Y4^X5,         Z0^X4^Y5,      Y5,            X5,            }, // 11
    {Y3^X5,         Z0^X4^Y4,      Y4,            X5,            }, // 12
    {Y3^X4,         Z0^X3^Y4,      Y4,            X4,            }, // 13
    {Y2^X4,         Z0^X3^Y3,      Y3,            X4,            }, // 14
    {Y2^X3,         Z0^X2^Y3,      Y3,            X3,            }, // 15
    {Y4^X6,         X4^Y6,         Z0^X5^Y5,      X5,            }, // 16
    {Y3^X6,         X4^Y5,         Z0^Y4^X5,      X5,            }, // 17
    {Y3^X5,         X3^Y5,         Z0^X4^Y4,      X4,            }, // 18
    {Y2^X5,         X3^Y4,         Z0^Y3^X4,      X4,            }, // 19
    {Y2^X4,         X2^Y4,         Z0^X3^Y3,      X3,            }, // 20
    {Z1^Y4^X5,      Z0^X4^Y5,      Y5,            X5,            }, // 21
    {Z1^Y3^X5,      Z0^X4^Y4,      Y4,            X5,            }, // 22
    {Z1^Y3^X4,      Z0^X3^Y4,      Y4,            X4,            }, // 23
    {Z1^Y2^X4,      Z0^X3^Y3,      Y3,            X4,            }, // 24
    {Z1^Y2^X3,      Z0^X2^Y3,      Y3,            X3,            }, // 25
    {Y4^X6,         Z1^X4^Y6,      Z0^X5^Y5,      X5,            }, // 26
    {Y3^X6,         Z1^X4^Y5,      Z0^Y4^X5,      X5,            }, // 27
    {Y3^X5,         Z1^X3^Y5,      Z0^X4^Y4,      X4,            }, // 28
    {Y2^X5,         Z1^X3^Y4,      Z0^Y3^X4,      X4,            }, // 29
    {Y2^X4,         Z1^X2^Y4,      Z0^X3^Y3,      X3,            }, // 30
    {Y4^X7,         X4^Y7,         Z1^Y5^X6,      Z0^X5^Y6,      }, // 31
    {Y3^X7,         X4^Y6,         Z1^Y4^X6,      Z0^X5^Y5,      }, // 32
    {Y3^X6,         X3^Y6,         Z1^Y4^X5,      Z0^X4^Y5,      }, // 33
    {Y2^X6,         X3^Y5,         Z1^Y3^X5,      Z0^X4^Y4,      }, // 34
    {Y2^X5,         X2^Y5,         Z1^Y3^X4,      Z0^X3^Y4,      }, // 35
    {Z2^Y4^X6,      Z1^X4^Y6,      Z0^X5^Y5,      X5,            }, // 36
    {Z2^Y3^X6,      Z1^X4^Y5,      Z0^Y4^X5,      X5,            }, // 37
    {Z2^Y3^X5,      Z1^X3^Y5,      Z0^X4^Y4,      X4,            }, // 38
    {Y2^Z2^X5,      Z1^X3^Y4,      Z0^Y3^X4,      X4,            }, // 39
    {Y2^Z2^X4,      Z1^X2^Y4,      Z0^X3^Y3,      X3,            }, // 40
    {Y4^X7,         Z2^X4^Y7,      Z1^Y5^X6,      Z0^X5^Y6,      }, // 41
    {Y3^X7,         Z2^X4^Y6,      Z1^Y4^X6,      Z0^X5^Y5,      }, // 42
    {Y3^X6,         Z2^X3^Y6,      Z1^Y4^X5,      Z0^X4^Y5,      }, // 43
    {Y2^X6,         Z2^X3^Y5,      Z1^Y3^X5,      Z0^X4^Y4,      }, // 44
    {Y2^X5,         X2^Z2^Y5,      Z1^Y3^X4,      Z0^X3^Y4,      }, // 45
    {Y4^X7,         X4^Y7,         Z2^Y5^X6,      Z1^X5^Y6,      }, // 46
    {Y3^X7,         X4^Y6,         Z2^Y4^X6,      Z1^X5^Y5,      }, // 47
    {Y3^X6,         X3^Y6,         Z2^Y4^X5,      Z1^X4^Y5,      }, // 48
    {Y2^X6,         X3^Y5,         Z2^Y3^X5,      Z1^X4^Y4,      }, // 49
    {Y2^X5,         X2^Y5,         Z2^Y3^X4,      Z1^X3^Y4,      }, // 50
    {Z3^Y4^X7,      Z2^X4^Y7,      Z1^Y5^X6,      Z0^X5^Y6,      }, // 51
    {Y3^Z3^X7,      Z2^X4^Y6,      Z1^Y4^X6,      Z0^X5^Y5,      }, // 52
    {Y3^Z3^X6,      Z2^X3^Y6,      Z1^Y4^X5,      Z0^X4^Y5,      }, // 53
    {Y2^Z3^X6,      Z2^X3^Y5,      Z1^Y3^X5,      Z0^X4^Y4,      }, // 54
    {Y2^Z3^X5,      X2^Z2^Y5,      Z1^Y3^X4,      Z0^X3^Y4,      }, // 55
    {Y4^X7,         Z3^X4^Y7,      Z2^Y5^X6,      Z1^X5^Y6,      }, // 56
    {Y3^X7,         Z3^X4^Y6,      Z2^Y4^X6,      Z1^X5^Y5,      }, // 57
    {Y3^X6,         X3^Z3^Y6,      Z2^Y4^X5,      Z1^X4^Y5,      }, // 58
    {Y2^X6,         X3^Z3^Y5,      Z2^Y3^X5,      Z1^X4^Y4,      }, // 59
    {Y2^X5,         X2^Z3^Y5,      Z2^Y3^X4,      Z1^X3^Y4,      }, // 60
    {Y4^X7,         X4^Y7,         Z3^Y5^X6,      Z2^X5^Y6,      }, // 61
    {Y3^X7,         X4^Y6,         Z3^Y4^X6,      Z2^X5^Y5,      }, // 62
    {Y3^X6,         X3^Y6,         Z3^Y4^X5,      Z2^X4^Y5,      }, // 63
    {Y2^X6,         X3^Y5,         Y3^Z3^X5,      Z2^X4^Y4,      }, // 64
    {Y2^X5,         X2^Y5,         Y3^Z3^X4,      Z2^X3^Y4,      }, // 65
    {Y4^X8,         X4^Y8,         Z2^Y5^X7,      Z1^X5^Y7,      }, // 66
    {Y3^X8,         X4^Y7,         Z2^Y4^X7,      Z1^X5^Y6,      }, // 67
    {Y3^X7,         X3^Y7,         Z2^Y4^X6,      Z1^X4^Y6,      }, // 68
    {Y2^X7,         X3^Y6,         Z2^Y3^X6,      Z1^X4^Y5,      }, // 69
    {Y2^X6,         X2^Y6,         Z2^Y3^X5,      Z1^X3^Y5,      }, // 70
    {Y4^X8,         Z3^X4^Y8,      Z2^Y5^X7,      Z1^X5^Y7,      }, // 71
    {Y3^X8,         Z3^X4^Y7,      Z2^Y4^X7,      Z1^X5^Y6,      }, // 72
    {Y3^X7,         X3^Z3^Y7,      Z2^Y4^X6,      Z1^X4^Y6,      }, // 73
    {Y2^X7,         X3^Z3^Y6,      Z2^Y3^X6,      Z1^X4^Y5,      }, // 74
    {Y2^X6,         X2^Z3^Y6,      Z2^Y3^X5,      Z1^X3^Y5,      }, // 75
    {Y4^X9,         X4^Y9,         Z3^Y5^X8,      Z2^X5^Y8,      }, // 76
    {Y3^X9,         X4^Y8,         Z3^Y4^X8,      Z2^X5^Y7,      }, // 77
    {Y3^X8,         X3^Y8,         Z3^Y4^X7,      Z2^X4^Y7,      }, // 78
    {Y2^X8,         X3^Y7,         Y3^Z3^X7,      Z2^X4^Y6,      }, // 79
    {Y2^X7,         X2^Y7,         Y3^Z3^X6,      Z2^X3^Y6,      }, // 80
    {Y4^Z4^X8,      Z3^X4^Y8,      Z2^Y5^X7,      Z1^X5^Y7,      }, // 81
    {Y3^Z4^X8,      Z3^X4^Y7,      Z2^Y4^X7,      Z1^X5^Y6,      }, // 82
    {Y3^Z4^X7,      X3^Z3^Y7,      Z2^Y4^X6,      Z1^X4^Y6,      }, // 83
    {Y2^Z4^X7,      X3^Z3^Y6,      Z2^Y3^X6,      Z1^X4^Y5,      }, // 84
    {Y2^Z4^X6,      X2^Z3^Y6,      Z2^Y3^X5,      Z1^X3^Y5,      }, // 85
    {Y4^X9,         X4^Z4^Y9,      Z3^Y5^X8,      Z2^X5^Y8,      }, // 86
    {Y3^X9,         X4^Z4^Y8,      Z3^Y4^X8,      Z2^X5^Y7,      }, // 87
    {Y3^X8,         X3^Z4^Y8,      Z3^Y4^X7,      Z2^X4^Y7,      }, // 88
    {Y2^X8,         X3^Z4^Y7,      Y3^Z3^X7,      Z2^X4^Y6,      }, // 89
    {Y2^X7,         X2^Z4^Y7,      Y3^Z3^X6,      Z2^X3^Y6,      }, // 90
    {X4^Y4,         X4,            Y5,            X5,            }, // 91
    {Y3^X4,         X4,            Y4,            X5,            }, // 92
    {X3^Y3,         X3,            Y4,            X4,            }, // 93
    {Y2^X3,         X3,            Y3,            X4,            }, // 94
    {X2^Y2,         X2,            Y3,            X3,            }, // 95
    {Y4^X5,         X4^Y5,         Y5,            X5,            }, // 96
    {Y3^X5,         X4^Y4,         Y4,            X5,            }, // 97
    {Y3^X4,         X3^Y4,         Y4,            X4,            }, // 98
    {Y2^X4,         X3^Y3,         Y3,            X4,            }, // 99
    {Y2^X3,         X2^Y3,         Y3,            X3,            }, // 100
    {Y4^X6,         X4^Y6,         X5^Y5,         X5,            }, // 101
    {Y3^X6,         X4^Y5,         Y4^X5,         X5,            }, // 102
    {Y3^X5,         X3^Y5,         X4^Y4,         X4,            }, // 103
    {Y2^X5,         X3^Y4,         Y3^X4,         X4,            }, // 104
    {Y2^X4,         X2^Y4,         X3^Y3,         X3,            }, // 105
    {Y4^X7,         X4^Y7,         Y5^X6,         X5^Y6,         }, // 106
    {Y3^X7,         X4^Y6,         Y4^X6,         X5^Y5,         }, // 107
    {Y3^X6,         X3^Y6,         Y4^X5,         X4^Y5,         }, // 108
    {Y2^X6,         X3^Y5,         Y3^X5,         X4^Y4,         }, // 109
    {Y2^X5,         X2^Y5,         Y3^X4,         X3^Y4,         }, // 110
    {Y4,            X4,            Y5^X7,         X5^Y7,         }, // 111
    {Y3,            X4,            Y4^X7,         X5^Y6,         }, // 112
    {Y3,            X3,            Y4^X6,         X4^Y6,         }, // 113
    {Y2,            X3,            Y3^X6,         X4^Y5,         }, // 114
    {Y2,            X2,            Y3^X5,         X3^Y5,         }, // 115
    {X4,            Y4,            X5^Y8,         Y5^X8,         }, // 116
    {Y3,            X4,            Y4^X8,         X5^Y7,         }, // 117
    {X3,            Y3,            X4^Y7,         Y4^X7,         }, // 118
    {Y2,            X3,            Y3^X7,         X4^Y7,         }, // 119
    {X2,            Y2,            X3^Y7,         Y3^X6,         }, // 120
    {Z0^X4^Y4,      Y4,            X5,            Y5^X9,         }, // 121
    {Z0^X4^Y4,      Y3,            Y4,            X5^Y8,         }, // 122
    {Z0^X4^Y4,      X3,            Y3,            Y4^X8,         }, // 123
    {Z0^X4^Y4,      Y2,            X3,            Y3^X8,         }, // 124
    {Z0^X4^Y4,      X2,            Y2,            Y3^X7,         }, // 125
    {Y4^X5^Y5,      Z0^X4^Y4,      X5,            Y5,            }, // 126
    {Y4^X5^Y5,      Z0^X4^Y4,      Y3,            X5,            }, // 127
    {Y4^X5^Y5,      Z0^X4^Y4,      X3,            Y3,            }, // 128
    {Y4^X5^Y5,      Z0^X4^Y4,      Y2,            X3,            }, // 129
    {Y4^X5^Y5,      Z0^X4^Y4,      X2,            Y2,            }, // 130
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y5,            }, // 131
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y3,            }, // 132
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X3,            }, // 133
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y2,            }, // 134
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X2,            }, // 135
    {Y4^X6^Y6,      Z1^X4^Y4,      X5,            X6,            }, // 136
    {Y4^X6^Y6,      Z1^X4^Y4,      Y3,            X5,            }, // 137
    {Y4^X6^Y6,      Z1^X4^Y4,      X3,            Y3,            }, // 138
    {Y4^X6^Y6,      Z1^X4^Y4,      Y2,            X3,            }, // 139
    {Y4^X6^Y6,      Z1^X4^Y4,      X2,            Y2,            }, // 140
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5,            }, // 141
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y3,            }, // 142
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X3,            }, // 143
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y2,            }, // 144
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X2,            }, // 145
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         }, // 146
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X6,            }, // 147
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y3,            }, // 148
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X3,            }, // 149
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Y2,            }, // 150
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X2,            }, // 151
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         }, // 152
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      }, // 153
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         }, // 154
    {Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      }, // 155
    {Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      }, // 156
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         }, // 157
    {Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      }, // 158
    {Y4^X9^Y9,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      }, // 159
    {Y3,            X4,            Y4^X8,         Y5^X7,         }, // 160
    {X3,            Y3,            Y4^X7,         X4^Y7,         }, // 161
    {X2,            Y2,            Y3^X6,         X3^Y7,         }, // 162
    {Z0^X4^Y4,      Y3,            Y4,            Y5^X8,         }, // 163
    {Z0^X4^Y4,      X2,            X3,            Y3^X8,         }, // 164
    {Y4^X5^Y5,      Z0^X4^Y4,      X2,            X3,            }, // 165
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X2^X5^Y6,      }, // 166
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y1^X5^Y6,      }, // 167
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X2,            }, // 168
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y1^X5^Y6,      }, // 169
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Y1^X5^Y7,      }, // 170
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      Y1^X5^Y8,      }, // 171
    {Z0^X4^Y4,      X3,            Y3,            X5^Y7,         }, // 172
    {Y4^X5^Y5,      Z0^X4^Y4,      Y1^X5^Y5,      X2,            }, // 173
    {Y4^X6^Y6,      Z1^X4^Y4,      X2,            X3,            }, // 174
    {Y4^X6^Y6,      Z0^X4^Y4,      X2,            X3,            }, // 175
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X2,            }, // 176
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X1^X5^Y6,      }, // 177
    {Y4^X7^Y7,      Z1^X4^Y4,      Y1^Y5^X6,      X3,            }, // 178
    {Y4^X7^Y7,      Z0^X4^Y4,      Y1^Y5^X6,      X3,            }, // 179
    {Y4^X7^Y7,      Z1^X4^Y4,      Y1^Y5^X6,      Z0^X5^Y6,      }, // 180
    {Y4^X7^Y7,      Z0^X4^Y4,      Y1^Y5^X6,      X1^X5^Y6,      }, // 181
    {Y4^X8^Y8,      Z1^X4^Y4,      Y1^Y5^X7,      Z0^X5^Y7,      }, // 182
    {Y4^X8^Y8,      Z0^X4^Y4,      Y1^Y5^X7,      X1^X5^Y7,      }, // 183
    {Y4^X9^Y9,      Z1^X4^Y4,      Y1^Y5^X8,      Z0^X5^Y8,      }, // 184
    {Y4^X9^Y9,      Z0^X4^Y4,      Y1^Y5^X8,      X1^X5^Y8,      }, // 185
    {X3,            Y3,            Y4^X6,         X4^Y7,         }, // 186
    {Y2,            X3,            Y3^X6,         X4^Y7,         }, // 187
    {Z0^X4^Y4,      X3,            Y3,            Y4^X6,         }, // 188
    {Z0^X4^Y4,      X2,            X3,            Y3^X7,         }, // 189
    {Z0^X4^Y4,      X2,            Y2,            X3^Y7,         }, // 190
    {Y4^X5^Y5,      Y0^X4^Y4,      X2,            X3,            }, // 191
    {Y4^X5^Y5,      Z0^X4^Y4,      Y2^X5^Y5,      X2,            }, // 192
    {Y4^X5^Y5,      Y0^X4^Y4,      X1^X5^Y5,      X2,            }, // 193
    {Y4^X6^Y6,      Z0^X4^Y4,      X3,            Y3,            }, // 194
    {Y4^X6^Y6,      Y0^X4^Y4,      X3,            Y3,            }, // 195
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X3,            }, // 196
    {Y4^X6^Y6,      Y0^X4^Y4,      Y1^X5^Y5,      X3,            }, // 197
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y2^X5^Y6,      }, // 198
    {Y4^X6^Y6,      Z0^X4^Y4,      Y1^X5^Y5,      X2^X5^Y6,      }, // 199
    {Y4^X6^Y6,      Y0^X4^Y4,      Y1^X5^Y5,      Y2^X5^Y6,      }, // 200
    {Y4^X7^Y7,      Y0^X4^Y4,      Y1^Y5^X6,      X3,            }, // 201
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y2^X5^Y6,      }, // 202
    {Y4^X7^Y7,      Y0^X4^Y4,      Y1^Y5^X6,      X1^X5^Y6,      }, // 203
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Y2^X5^Y7,      }, // 204
    {Y4^X8^Y8,      Y0^X4^Y4,      Y1^Y5^X7,      X1^X5^Y7,      }, // 205
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X2^X5^Y7,      }, // 206
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X2^X5^Y8,      }, // 207
    {Y4^X9^Y9,      Y0^X4^Y4,      Y1^Y5^X8,      X1^X5^Y8,      }, // 208
    {Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      }, // 209
    {Y2,            X3,            Y3^X7,         Y4^X6,         }, // 210
    {Y1,            X2,            Y2^X7,         Y3^X6,         }, // 211
    {Z0^X4^Y4,      Y2,            Y3,            Y4^X7,         }, // 212
    {Z0^X4^Y4,      Y1,            Y2,            Y3^X6,         }, // 213
    {Y4^X5^Y5,      Z0^X4^Y4,      Y1,            Y2,            }, // 214
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y1,            }, // 215
    {Y4^X6^Y6,      Z1^X4^Y4,      Y1,            X2,            }, // 216
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y1,            }, // 217
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y2,            }, // 218
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Y1,            }, // 219
    {Y1,            X2,            Y2^X6,         X3^Y7,         }, // 220
    {X1,            Y1,            X2^Y7,         Y2^X6,         }, // 221
    {Z0^X4^Y4,      Y1,            X2,            Y2^X7,         }, // 222
    {Z0^X4^Y4,      X1,            Y1,            Y2^X6,         }, // 223
    {Y4^X5^Y5,      Z0^X4^Y4,      Y1,            X2,            }, // 224
    {Y4^X5^Y5,      Z0^X4^Y4,      X1,            Y1,            }, // 225
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X1,            }, // 226
    {Y4^X6^Y6,      Z1^X4^Y4,      X1,            Y1,            }, // 227
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X1,            }, // 228
    {Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X1,            }, // 229
    {Y0,            X1,            Y1^X7,         Y2^X6,         }, // 230
    {Z0^X4^Y4,      Y0,            Y1,            Y2^X6,         }, // 231
    {Y4^X5^Y5,      Z0^X4^Y4,      Y0,            Y1,            }, // 232
    {Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y0,            }, // 233
    {Y4^X6^Y6,      Z1^X4^Y4,      Y0,            X1,            }, // 234
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y0,            }, // 235
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y3^X5,         }, // 236
    {Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X3^Y5,         }, // 237
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y1,            }, // 238
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X1,            }, // 239
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Z2^X5^Y6,      }, // 240
    {Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      Y0^X5^Y6,      }, // 241
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Z2^X5^Y7,      }, // 242
    {Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      Y0^X5^Y7,      }, // 243
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      Z2^X5^Y8,      }, // 244
    {Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      Y0^X5^Y8,      }, // 245
    {Y2,            X3,            Z3,            Y3,            }, // 246
    {Y2,            X2,            Z3,            Y3,            }, // 247
    {Y2,            X2,            Z2,            Y3,            }, // 248
    {Y1,            X2,            Z2,            Y2,            }, // 249
    {Y1,            X1,            Z2,            Y2,            }, // 250
    {Y2^X3^Z3,      X3,            Z3,            Y3,            }, // 251
    {X2^Y2^Z3,      X2,            Z3,            Y3,            }, // 252
    {X2^Y2^Z2,      X2,            Z2,            Y3,            }, // 253
    {Y1^X2^Z2,      X2,            Z2,            Y2,            }, // 254
    {X1^Y1^Z2,      X1,            Z2,            Y2,            }, // 255
    {Y2^X4^Z4,      X3^Y3^Z3,      Z3,            Y3,            }, // 256
    {Y2^X3^Z4,      X2^Y3^Z3,      Z3,            Y3,            }, // 257
    {Y2^X3^Z3,      X2^Z2^Y3,      Z2,            Y3,            }, // 258
    {Y1^X3^Z3,      X2^Y2^Z2,      Z2,            Y2,            }, // 259
    {Y1^X2^Z3,      X1^Y2^Z2,      Z2,            Y2,            }, // 260
    {Y2^X5^Z5,      X3^Y4^Z4,      Y3^Z3^X4,      Y3,            }, // 261
    {Y2^X4^Z5,      X2^Y4^Z4,      X3^Y3^Z3,      Y3,            }, // 262
    {Y2^X4^Z4,      X2^Z3^Y4,      Z2^X3^Y3,      Y3,            }, // 263
    {Y1^X4^Z4,      X2^Y3^Z3,      Y2^Z2^X3,      Y2,            }, // 264
    {Y1^X3^Z4,      X1^Y3^Z3,      X2^Y2^Z2,      Y2,            }, // 265
    {Y2^X6^Z6,      X3^Y5^Z5,      Z3^Y4^X5,      Y3^X4^Z4,      }, // 266
    {Y2^X5^Z6,      X2^Y5^Z5,      Z3^X4^Y4,      X3^Y3^Z4,      }, // 267
    {Y2^X5^Z5,      X2^Z4^Y5,      Z2^X4^Y4,      X3^Y3^Z3,      }, // 268
    {Y1^X5^Z5,      X2^Y4^Z4,      Z2^Y3^X4,      Y2^X3^Z3,      }, // 269
    {Y1^X4^Z5,      X1^Y4^Z4,      Z2^X3^Y3,      X2^Y2^Z3,      }, // 270
    {Y2^X7^Z7,      X3^Y6^Z6,      Z3^Y5^X6,      Y3^X5^Z5,      }, // 271
    {Y2^X6^Z7,      X2^Y6^Z6,      Z3^X5^Y5,      Y3^X4^Z5,      }, // 272
    {Y2^X6^Z6,      X2^Z5^Y6,      Z2^X5^Y5,      Y3^X4^Z4,      }, // 273
    {Y1^X6^Z6,      X2^Y5^Z5,      Z2^Y4^X5,      Y2^X4^Z4,      }, // 274
    {Y1^X5^Z6,      X1^Y5^Z5,      Z2^X4^Y4,      Y2^X3^Z4,      }, // 275
    {Y2^X8^Z8,      X3^Y7^Z7,      Z3^Y6^X7,      Y3^X6^Z6,      }, // 276
    {Y2^X7^Z8,      X2^Y7^Z7,      Z3^X6^Y6,      Y3^X5^Z6,      }, // 277
    {Y2^X7^Z7,      X2^Z6^Y7,      Z2^X6^Y6,      Y3^X5^Z5,      }, // 278
    {Y1^X7^Z7,      X2^Y6^Z6,      Z2^Y5^X6,      Y2^X5^Z5,      }, // 279
    {Y1^X6^Z7,      X1^Y6^Z6,      Z2^X5^Y5,      Y2^X4^Z5,      }, // 280
    {Y2^X5,         X3^Y4^Z4,      Y3^Z3^X4,      Y3,            }, // 281
    {Y2^X4,         X2^Y4^Z4,      X3^Y3^Z3,      Y3,            }, // 282
    {Y2^X4,         X2^Z3^Y4,      Z2^X3^Y3,      Y3,            }, // 283
    {Y1^X4,         X2^Y3^Z3,      Y2^Z2^X3,      Y2,            }, // 284
    {Y1^X3,         X1^Y3^Z3,      X2^Y2^Z2,      Y2,            }, // 285
    {Y2,            X3,            Z3^Y4^X5,      Y3^X4^Z4,      }, // 286
    {Y2,            X2,            Z3^X4^Y4,      X3^Y3^Z4,      }, // 287
    {Y2,            X2,            Z2^X4^Y4,      X3^Y3^Z3,      }, // 288
    {Y1,            X2,            Z2^Y3^X4,      Y2^X3^Z3,      }, // 289
    {Y1,            X1,            Z2^X3^Y3,      X2^Y2^Z3,      }, // 290
    {Y2,            X3,            Z3,            Y3^X5,         }, // 291
    {Y2,            X2,            Z3,            Y3^X4,         }, // 292
    {Y2,            X2,            Z2,            Y3^X4,         }, // 293
    {Y1,            X2,            Z2,            Y2^X4,         }, // 294
    {Y1,            X1,            Z2,            Y2^X3,         }, // 295
    {X4^Y4,         Y2,            Z3,            Y3,            }, // 296
    {X4^Y4,         Y2,            Z2,            Y3,            }, // 297
    {X4^Y4,         Y1,            Z2,            Y2,            }, // 298
    {Y1^X4^Y4,      X1,            Z2,            Y2,            }, // 299
    {Y4^X5^Y5,      X4^Y4,         Y2,            Z3,            }, // 300
    {Y4^X5^Y5,      X4^Y4,         Y2,            Z2,            }, // 301
    {Z3^Y4^X5^Y5,   X4^Y4,         Y1,            Z2,            }, // 302
    {Z3^Y4^X5^Y5,   Y1^X4^Y4,      X1,            Z2,            }, // 303
    {Y4^X5^Y5,      X4^Y4,         Z3^X5,         Y2,            }, // 304
    {Y4^X5^Y5,      X4^Y4,         Z2^X5,         Y2,            }, // 305
    {Z3^Y4^X5^Y5,   X4^Y4,         Z2^X5,         Y1,            }, // 306
    {Z3^Y4^X5^Y5,   Y1^X4^Y4,      Z2^X5,         X1,            }, // 307
    {Y4^X6^Y6,      X4^Y4,         Y2,            Y3,            }, // 308
    {Y4^X6^Y6,      X4^Y4,         Z3,            Y3,            }, // 309
    {Y4^X6^Y6,      X4^Y4,         Z2,            Y3,            }, // 310
    {Z3^Y4^X6^Y6,   X4^Y4,         Z2,            Y2,            }, // 311
    {Z3^Y4^X6^Y6,   Y1^X4^Y4,      Z2,            Y2,            }, // 312
    {Y4^X6^Y6,      X4^Y4,         X5^Y5,         Y2,            }, // 313
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z3,            }, // 314
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z2,            }, // 315
    {Z3^Y4^X6^Y6,   X4^Y4,         Y1^X5^Y5,      Z2,            }, // 316
    {Z3^Y4^X6^Y6,   Y1^X4^Y4,      X1^X5^Y5,      Z2,            }, // 317
    {Y4^X6^Y6,      X4^Y4,         X5^Y5,         Z3^X6,         }, // 318
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z3^X6,         }, // 319
    {Y4^X6^Y6,      X4^Y4,         Y2^X5^Y5,      Z2^X6,         }, // 320
    {Z3^Y4^X6^Y6,   X4^Y4,         Y1^X5^Y5,      Z2^X6,         }, // 321
    {Z3^Y4^X6^Y6,   Y1^X4^Y4,      X1^X5^Y5,      Z2^X6,         }, // 322
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Y3,            }, // 323
    {Z3^Y4^X7^Y7,   X4^Y4,         Y1^Y5^X6,      Y2,            }, // 324
    {Z3^Y4^X7^Y7,   Y1^X4^Y4,      X1^Y5^X6,      Y2,            }, // 325
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      X5^Y6,         }, // 326
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Z3^X5^Y6,      }, // 327
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Z2^X5^Y6,      }, // 328
    {Z3^Y4^X7^Y7,   X4^Y4,         Y1^Y5^X6,      Z2^X5^Y6,      }, // 329
    {Z3^Y4^X7^Y7,   Y1^X4^Y4,      X1^Y5^X6,      Z2^X5^Y6,      }, // 330
    {Y4^X7^Y7,      X4^Y4,         Y2^Y5^X6,      Y3^X5^Y6,      }, // 331
    {Z3^Y4^X7^Y7,   X4^Y4,         Y1^Y5^X6,      Y2^X5^Y6,      }, // 332
    {Z3^Y4^X7^Y7,   Y1^X4^Y4,      X1^Y5^X6,      Y2^X5^Y6,      }, // 333
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      X5^Y7,         }, // 334
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      Z3^X5^Y7,      }, // 335
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      Z2^X5^Y7,      }, // 336
    {Z3^Y4^X8^Y8,   X4^Y4,         Y1^Y5^X7,      Z2^X5^Y7,      }, // 337
    {Z3^Y4^X8^Y8,   Y1^X4^Y4,      X1^Y5^X7,      Z2^X5^Y7,      }, // 338
    {Y4^X8^Y8,      X4^Y4,         Y2^Y5^X7,      Y3^X5^Y7,      }, // 339
    {Z3^Y4^X8^Y8,   X4^Y4,         Y1^Y5^X7,      Y2^X5^Y7,      }, // 340
    {Z3^Y4^X8^Y8,   Y1^X4^Y4,      X1^Y5^X7,      Y2^X5^Y7,      }, // 341
    {Y4^X9^Y9,      X4^Y4,         Y2^Y5^X8,      X5^Y8,         }, // 342
    {Y4^X9^Y9,      X4^Y4,         Y2^Y5^X8,      Z3^X5^Y8,      }, // 343
    {Y4^X9^Y9,      X4^Y4,         Y2^Y5^X8,      Z2^X5^Y8,      }, // 344
    {Z3^Y4^X9^Y9,   X4^Y4,         Y1^Y5^X8,      Z2^X5^Y8,      }, // 345
    {Z3^Y4^X9^Y9,   Y1^X4^Y4,      X1^Y5^X8,      Z2^X5^Y8,      }, // 346
};

const UINT_64 GFX11_SW_PATTERN_NIBBLE3[][4] =
{
    {0,             0,             0,             0,             }, // 0
    {Y6,            X6,            Y7,            X7,            }, // 1
    {Y5,            X6,            Y6,            X7,            }, // 2
    {Y5,            X5,            Y6,            X6,            }, // 3
    {Y4,            X5,            Y5,            X6,            }, // 4
    {Y4,            X4,            Y5,            X5,            }, // 5
    {Z0^X6^Y6,      X6,            Y7,            X7,            }, // 6
    {Z0^Y5^X6,      X6,            Y6,            X7,            }, // 7
    {Z0^X5^Y5,      X5,            Y6,            X6,            }, // 8
    {Z0^Y4^X5,      X5,            Y5,            X6,            }, // 9
    {Z0^X4^Y4,      X4,            Y5,            X5,            }, // 10
    {Z1^Y6^X7,      Z0^X6^Y7,      Y7,            X7,            }, // 11
    {Z1^Y5^X7,      Z0^X6^Y6,      Y6,            X7,            }, // 12
    {Z1^Y5^X6,      Z0^X5^Y6,      Y6,            X6,            }, // 13
    {Z1^Y4^X6,      Z0^X5^Y5,      Y5,            X6,            }, // 14
    {Z1^Y4^X5,      Z0^X4^Y5,      Y5,            X5,            }, // 15
    {X6^Y6,         X6,            Y7,            X7,            }, // 16
    {Y5^X6,         X6,            Y6,            X7,            }, // 17
    {X5^Y5,         X5,            Y6,            X6,            }, // 18
    {Y4^X5,         X5,            Y5,            X6,            }, // 19
    {X4^Y4,         X4,            Y5,            X5,            }, // 20
    {Y6^X7,         X6^Y7,         Y7,            X7,            }, // 21
    {Y5^X7,         X6^Y6,         Y6,            X7,            }, // 22
    {Y5^X6,         X5^Y6,         Y6,            X6,            }, // 23
    {Y4^X6,         X5^Y5,         Y5,            X6,            }, // 24
    {Y4^X5,         X4^Y5,         Y5,            X5,            }, // 25
    {X6^Y7,         Y6^X7,         X7,            Y7,            }, // 26
    {X5^Y6,         Y5^X6,         X6,            Y6,            }, // 27
    {Y4^X6,         X5^Y6,         Y5,            X6,            }, // 28
    {X4^Y6,         Y4^X5,         X5,            Y5,            }, // 29
    {X6^Y8,         Y6^X8,         X7^Y7,         Y7,            }, // 30
    {X6^Y7,         Y5^X8,         Y6^X7,         Y6,            }, // 31
    {X5^Y7,         Y5^X7,         X6^Y6,         Y6,            }, // 32
    {X5^Y7,         Y4^X7,         X6^Y6,         Y5,            }, // 33
    {X3^Y7,         Y4^X6,         X5^Y6,         Y5,            }, // 34
    {X6^Y9,         Y6^X9,         X7^Y8,         Y7^X8,         }, // 35
    {X6^Y8,         Y5^X9,         X7^Y7,         Y6^X8,         }, // 36
    {X5^Y8,         Y5^X8,         X6^Y7,         Y6^X7,         }, // 37
    {Y3^X8,         X5^Y7,         X6^Y6,         Y5^X7,         }, // 38
    {Y3^X7,         X3^Y7,         X5^Y6,         Y5^X6,         }, // 39
    {X6,            Y6^X9,         X7^Y8,         Y7^X8,         }, // 40
    {Y5,            X6^Y8,         X7^Y7,         Y6^X8,         }, // 41
    {Y3,            Y5^X8,         X6^Y7,         Y6^X7,         }, // 42
    {X3,            Y3^X8,         X6^Y6,         Y5^X7,         }, // 43
    {Y2,            Y3^X7,         X3^Y6,         Y5^X6,         }, // 44
    {Y6^X9,         X7^Y8,         Y7^X8,         Z0^X5^Y5,      }, // 45
    {X6^Y8,         Y6^X8,         X7^Y7,         Z0^X5^Y5,      }, // 46
    {X5^Y8,         X6^Y7,         Y6^X7,         Z0^X5^Y5,      }, // 47
    {Y3^X7,         X5^Y7,         X6^Y6,         Z0^X5^Y5,      }, // 48
    {X3^Y7,         Y3^X6,         X5^Y6,         Z0^X5^Y5,      }, // 49
    {X5,            X6^Y8,         Y6^X8,         X7^Y7,         }, // 50
    {Y3,            X5^Y8,         X6^Y7,         Y6^X7,         }, // 51
    {X3,            Y3^X7,         X5^Y7,         X6^Y6,         }, // 52
    {Y2,            X3^Y7,         Y3^X6,         X5^Y6,         }, // 53
    {X6,            Y6,            X7^Y8,         Y7^X8,         }, // 54
    {Y3,            X6,            Y6^X8,         X7^Y7,         }, // 55
    {X3,            Y3,            X6^Y7,         Y6^X7,         }, // 56
    {Y2,            X3,            Y3^X7,         X6^Y6,         }, // 57
    {X2,            Y2,            X3^Y6,         Y3^X6,         }, // 58
    {Y6,            X7^Y8,         Y7^X8,         X5^Y6,         }, // 59
    {X6,            X7^Y7,         Y6^X8,         X5^Y6,         }, // 60
    {Y3,            X6^Y7,         Y6^X7,         X5^Y6,         }, // 61
    {X3,            Y3^X7,         X6^Y6,         Z0^X5^Y6,      }, // 62
    {Y2,            Y3^X6,         X3^Y6,         Z0^X5^Y6,      }, // 63
    {Y3,            X6,            X7^Y7,         Y6^X8,         }, // 64
    {X2,            Y2,            Y3^X6,         X3^Y6,         }, // 65
    {X6^Y6,         Y6,            X7,            Y7^X8,         }, // 66
    {X6^Y6,         Y3,            Y6,            X7^Y7,         }, // 67
    {X6^Y6,         X3,            Y3,            Y6^X7,         }, // 68
    {X6^Y6,         Y2,            X3,            Y3^X7,         }, // 69
    {X3^Y6,         X2,            Y2,            Y3^X6,         }, // 70
    {X6,            X7,            Y7^X8,         X6^Y6,         }, // 71
    {Y3,            X6,            X7^Y7,         X6^Y6,         }, // 72
    {X3,            Y3,            X6^Y7,         X6^Y6,         }, // 73
    {Y2,            X3,            Y3^X7,         Z0^X6^Y6,      }, // 74
    {X2,            X3,            Y3^X6,         Y2^X6^Y6,      }, // 75
    {X6^Y6,         X6,            X7,            Y7^X8,         }, // 76
    {X6^Y6,         Y3,            X6,            X7^Y7,         }, // 77
    {X6^Y6,         X3,            Y3,            X6^Y7,         }, // 78
    {Z0^X6^Y6,      Y2,            X3,            Y3^X7,         }, // 79
    {Y2^X6^Y6,      X2,            X3,            Y3^X6,         }, // 80
    {X6^Y6,         X6^Y8,         X7,            Y7,            }, // 81
    {X6^Y6,         X6^Y8,         Y3,            X7,            }, // 82
    {X6^Y6,         X6^Y8,         X3,            Y3,            }, // 83
    {Z0^X6^Y6,      X3^Y8,         Y2,            Y3,            }, // 84
    {Y2^X6^Y6,      X3^Y8,         X2,            Y3,            }, // 85
    {Y6^X7,         X7,            Y7,            X6^Y7,         }, // 86
    {Y6^X7,         Y3,            X7,            X6^Y7,         }, // 87
    {Y6^X7,         X3,            Y3,            X6^Y7,         }, // 88
    {Y2^Y6^X7,      X3,            Y3,            Z0^X6^Y7,      }, // 89
    {Y2^Y6^X7,      X3,            Y3,            X2^X6^Y7,      }, // 90
    {Y6^X7,         X6^Y7,         X7,            Y7,            }, // 91
    {Y6^X7,         X6^Y7,         Y3,            X7,            }, // 92
    {Y6^X7,         X6^Y7,         X3,            Y3,            }, // 93
    {Y2^Y6^X7,      Z0^X6^Y7,      X3,            Y3,            }, // 94
    {Y2^Y6^X7,      X2^X6^Y7,      X3,            Y3,            }, // 95
    {X5^Y7,         X6^Y6,         X6,            Y7,            }, // 96
    {Y5^X6,         X5^Y6,         Y6,            Y2^X6,         }, // 97
    {Y4^X6,         X5^Y6,         Y5,            X2^X6,         }, // 98
    {Y4^X5,         X4^Y6,         Y5,            Y1^X5,         }, // 99
    {X5^Y8,         Y6^X7,         X6^Y7,         Y7,            }, // 100
    {Y5^X7,         X5^Y7,         X6^Y6,         Y2^X6,         }, // 101
    {Y4^X7,         X5^Y6,         Y5^X6,         Y2^X6,         }, // 102
    {Y4^X6,         X3^Y6,         X5^Y5,         Y1^X5,         }, // 103
    {Y5^X9,         Y6^X8,         X6^Y8,         X7^Y7,         }, // 104
    {Y5^X8,         X5^Y8,         Y6^X7,         Y2^X6^Y7,      }, // 105
    {Y3^X8,         X5^Y7,         Y5^X7,         Y2^X6^Y6,      }, // 106
    {Y3^X7,         X3^Y7,         Y5^X6,         Y1^X5^Y6,      }, // 107
    {X5,            Y6^X8,         X6^Y8,         X7^Y7,         }, // 108
    {Y3,            X5^Y8,         Y6^X7,         Y2^X6^Y7,      }, // 109
    {X3,            Y3^X7,         X5^Y7,         Y2^X6^Y6,      }, // 110
    {Y2,            Y3^X6,         X3^Y7,         Y1^X5^Y6,      }, // 111
    {Y6^X8,         X6^Y8,         X7^Y7,         Z0^X5^Y5,      }, // 112
    {X5^Y8,         Y6^X7,         Y2^X6^Y7,      Z0^X5^Y5,      }, // 113
    {Y3^X7,         X5^Y7,         X2^X6^Y6,      Z0^X5^Y5,      }, // 114
    {Y3^X6,         X3^Y7,         Y1^X5^Y6,      Z0^X5^Y5,      }, // 115
    {X3,            Y3^X7,         X5^Y7,         X2^X6^Y6,      }, // 116
    {Y3,            X5,            X6^Y8,         X7^Y7,         }, // 117
    {X3,            Y3,            X5^Y8,         X6^Y7,         }, // 118
    {X3,            Y3,            X5^Y8,         Y2^X6^Y7,      }, // 119
    {Y2,            X3,            Y3^X6,         X5^Y6,         }, // 120
    {X2,            Y2,            Y3^X5,         X3^Y6,         }, // 121
    {X6,            Y6^X8,         X7^Y7,         X5^Y6,         }, // 122
    {Y3,            Y6^X7,         Y2^X6^Y7,      X5^Y6,         }, // 123
    {X3,            Y3^X7,         Y2^X6^Y6,      Z0^X5^Y6,      }, // 124
    {X3,            Y3^X7,         Y2^X6^Y6,      Y1^X5^Y6,      }, // 125
    {X3,            Y3,            Y6^X7,         Y2^X6^Y7,      }, // 126
    {X2,            X3,            Y3^X7,         Y2^X6^Y6,      }, // 127
    {X6^Y6,         X3,            Y3,            Y2^X6^Y7,      }, // 128
    {X3,            Y3,            Y2^X6^Y7,      X6^Y6,         }, // 129
    {X3,            Y3,            X2^X6^Y7,      Y2^X6^Y6,      }, // 130
    {Y2^X6^Y6,      X3,            Y3,            X2^X6^Y7,      }, // 131
    {X6^Y6,         X6^Y8,         Y3,            Y7,            }, // 132
    {X6^Y6,         Y2^X6^Y8,      X3,            Y3,            }, // 133
    {Y2^X6^Y6,      X2^X6^Y8,      X3,            Y3,            }, // 134
    {Y6^X7,         Y3,            Y7,            X6^Y7,         }, // 135
    {Y6^X7,         X3,            Y3,            Y2^X6^Y7,      }, // 136
    {Y6^X7,         X6^Y7,         Y3,            Y7,            }, // 137
    {Y6^X7,         Y2^X6^Y7,      X3,            Y3,            }, // 138
    {X5^Y6,         Y5^X6,         X6,            Y2^Y6,         }, // 139
    {X5^Y6,         Y5^X6,         X2^X6,         Y2^Y6,         }, // 140
    {Y4^X6,         X5^Y6,         X2^X6,         Y1^Y5,         }, // 141
    {X4^Y6,         Y4^X5,         X1^X5,         Y1^Y5,         }, // 142
    {Y4^X8,         X6^Y6,         Y5^X7,         Y2^X7,         }, // 143
    {X5^Y6,         Y5^X7,         X2^X6^Y6,      Y2^X6,         }, // 144
    {X5^Y6,         Y4^X7,         X2^Y5^X6,      Y1^X6,         }, // 145
    {X3^Y6,         Y4^X6,         X1^X5^Y5,      Y1^X5,         }, // 146
    {X5^Y8,         X6^Y7,         Y5^X8,         Y2^Y6^X7,      }, // 147
    {X5^Y8,         Y5^X8,         X2^Y6^X7,      Y2^X6^Y7,      }, // 148
    {Y3^X8,         X5^Y7,         X2^Y5^X7,      Y1^X6^Y6,      }, // 149
    {Y3^X7,         X3^Y7,         X1^Y5^X6,      Y1^X5^Y6,      }, // 150
    {Y3,            X6^Y7,         Y5^X8,         Y2^Y6^X7,      }, // 151
    {Y3,            Y5^X8,         X2^Y6^X7,      Y2^X6^Y7,      }, // 152
    {X3,            Y3^X8,         X2^Y5^X7,      Y1^X6^Y6,      }, // 153
    {Y2,            Y3^X6,         X3^Y6,         X1^X5^Y5,      }, // 154
    {X5^Y8,         X6^Y7,         Y2^Y6^X7,      Z0^X5^Y5,      }, // 155
    {X5^Y8,         X2^X6^Y7,      Y2^Y6^X7,      Z0^X5^Y5,      }, // 156
    {Y3^X8,         Y2^Y5^X7,      Y1^X6^Y6,      Z0^X5^Y5,      }, // 157
    {Y3^X7,         Y2^X6^Y6,      X1^X5^Y7,      Y1^X5^Y5,      }, // 158
    {Y3,            X5^Y8,         X6^Y7,         Y2^Y6^X7,      }, // 159
    {Y3,            X5^Y8,         X2^X6^Y7,      Y2^Y6^X7,      }, // 160
    {X3,            Y3^X8,         Y2^Y5^X7,      Y1^X6^Y6,      }, // 161
    {X3,            Y3^X7,         Y2^X6^Y6,      X1^X5^Y7,      }, // 162
    {X3,            Y3,            X6^Y7,         Y2^Y6^X7,      }, // 163
    {X3,            Y3,            X2^X6^Y7,      Y2^Y6^X7,      }, // 164
    {X2,            X3,            Y3^X7,         Y2^Y5^X6,      }, // 165
    {X2,            X3,            Y3^X6,         Y2^X5^Y6,      }, // 166
    {Y3,            X6^Y7,         Y2^Y6^X7,      X5^Y6,         }, // 167
    {Y3,            X2^Y6^X7,      Y2^X6^Y7,      X5^Y6,         }, // 168
    {Y3,            X2^Y6^X7,      Y2^X6^Y7,      Z0^X5^Y6,      }, // 169
    {Y3,            X2^Y6^X7,      Y2^X6^Y7,      X1^X5^Y6,      }, // 170
    {X3,            Y3,            X2^Y6^X7,      Y2^X6^Y7,      }, // 171
    {X6^Y6,         X3,            Y3,            Y2^Y6^X7,      }, // 172
    {Y2^X6^Y6,      X3,            Y3,            X2^X6^Y6,      }, // 173
    {X3,            Y3,            Y2^Y6^X7,      X6^Y6,         }, // 174
    {Y2^Y6^X7,      X3,            Y3,            X6^Y7,         }, // 175
    {Y2^Y6^X7,      X6^Y7,         X3,            Y3,            }, // 176
    {X5^Y5,         Y1^X5^Y6,      X2^X6,         Y2^Y6,         }, // 177
    {Y4^X5,         X1^X5^Y6,      Y1^Y5,         X2^X6,         }, // 178
    {Y4^X5,         Y0^X4^Y6,      X1^X5,         Y1^Y5,         }, // 179
    {X5^Y5,         Y1^X5^Y7,      X2^X6^Y6,      Y2^Y6,         }, // 180
    {Y4^X6,         Y1^X5^Y6,      X1^X5^Y5,      Y2^X6,         }, // 181
    {Y3^X6,         Y0^X4^Y6,      X1^Y4^X5,      Y1^X5,         }, // 182
    {Y5^X8,         Y1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      }, // 183
    {Y3^X8,         Y1^X5^Y7,      X1^Y5^X7,      Y2^X6^Y6,      }, // 184
    {Y3^X7,         Y1^X4^Y7,      Y2^X5^Y6,      X1^Y5^X6,      }, // 185
    {Y3,            X5^Y8,         X2^Y6^X7,      Y2^X6^Y7,      }, // 186
    {Y3,            Y1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      }, // 187
    {X3,            Y3^X7,         Y1^X5^Y6,      X1^Y5^X6,      }, // 188
    {X3,            Y3^X6,         Y1^X4^Y6,      Y2^X5^Y5,      }, // 189
    {Y1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      Z0^X5^Y5,      }, // 190
    {X1^X5^Y8,      Y2^Y6^X7,      X2^X6^Y7,      Y1^X5^Y5,      }, // 191
    {X1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      Y1^X5^Y5,      }, // 192
    {Y3,            X1^X5^Y8,      Y2^Y6^X7,      X2^X6^Y7,      }, // 193
    {Y3,            X1^X5^Y8,      X2^X6^Y7,      Y2^Y6^X7,      }, // 194
    {X3,            Y3,            Y1^X5^Y7,      X2^X6^Y6,      }, // 195
    {X3,            Y3,            X1^X5^Y7,      Y2^X6^Y6,      }, // 196
    {X3,            Y3,            X1^X5^Y7,      X2^X6^Y6,      }, // 197
    {Y3,            X2^Y6^X7,      Y1^X6^Y7,      Y2^X5^Y6,      }, // 198
    {X3,            Y3,            X2^Y6^X7,      Y1^X6^Y7,      }, // 199
    {X2^X6^Y6,      X3,            Y3,            Y1^X6^Y6,      }, // 200
    {X2^X6^Y6,      X3,            Y3,            Y2^X6^Y6,      }, // 201
    {X3,            Y3,            Y1^X6^Y7,      X2^X6^Y6,      }, // 202
    {Y2^X6^Y6,      X3,            Y3,            Y1^X6^Y7,      }, // 203
    {Y2^X6^Y6,      Y1^X6^Y8,      X3,            Y3,            }, // 204
    {Y2^Y6^X7,      X3,            Y3,            Y1^X6^Y7,      }, // 205
    {X6,            Y6^X10,        X7^Y9,         Y7^X9,         }, // 206
    {Y5,            X6^Y9,         X7^Y8,         Y6^X9,         }, // 207
    {Y3,            Y5^X9,         X6^Y8,         Y6^X8,         }, // 208
    {X3,            Y3^X9,         X6^Y7,         Y5^X8,         }, // 209
    {Y2,            Y3^X8,         X3^Y7,         Y5^X7,         }, // 210
    {Y6^X10,        X7^Y9,         Y7^X9,         X8^Y8,         }, // 211
    {X5^Y9,         X6^Y8,         Y6^X8,         X7^Y7,         }, // 212
    {Y3^X8,         X5^Y8,         X6^Y7,         Y6^X7,         }, // 213
    {X3^Y8,         Y3^X7,         X5^Y7,         X6^Y6,         }, // 214
    {X5,            X6^Y9,         Y6^X9,         X7^Y8,         }, // 215
    {Y3,            X5^Y9,         X6^Y8,         Y6^X8,         }, // 216
    {X3,            Y3^X8,         X5^Y8,         X6^Y7,         }, // 217
    {Y2,            X3^Y8,         Y3^X7,         X5^Y7,         }, // 218
    {X6,            Y6,            X7^Y10,        Y7^X10,        }, // 219
    {Y3,            X6,            Y6^X10,        X7^Y9,         }, // 220
    {X3,            Y3,            X6^Y9,         Y6^X9,         }, // 221
    {Y2,            X3,            Y3^X9,         X6^Y8,         }, // 222
    {X2,            Y2,            X3^Y8,         Y3^X8,         }, // 223
    {Y6,            X7^Y10,        Y7^X10,        X8^Y9,         }, // 224
    {X6,            X7^Y9,         Y6^X10,        X8^Y8,         }, // 225
    {Y3,            X6^Y9,         Y6^X9,         X7^Y8,         }, // 226
    {X3,            Y3^X9,         X6^Y8,         X7^Y7,         }, // 227
    {Y2,            Y3^X8,         X3^Y8,         X6^Y7,         }, // 228
    {Y3,            X6,            X7^Y9,         Y6^X10,        }, // 229
    {X2,            Y2,            Y3^X8,         X3^Y8,         }, // 230
    {X6^Y6,         Y6,            X7,            Y7^X10,        }, // 231
    {X6^Y6,         Y3,            Y6,            X7^Y9,         }, // 232
    {X6^Y6,         X3,            Y3,            Y6^X9,         }, // 233
    {X6^Y6,         Y2,            X3,            Y3^X9,         }, // 234
    {X6^Y6,         X2,            Y2,            Y3^X8,         }, // 235
    {X6,            X7,            Y7^X10,        X8^Y9,         }, // 236
    {Y3,            X6,            X7^Y9,         Y7^X9,         }, // 237
    {X3,            Y3,            X6^Y9,         X7^Y8,         }, // 238
    {Y2,            X3,            Y3^X8,         X6^Y8,         }, // 239
    {X2,            Y2,            X3^Y8,         Y3^X7,         }, // 240
    {X6^Y6,         X6,            X7,            Y7^X10,        }, // 241
    {X6^Y6,         Y3,            X6,            X7^Y9,         }, // 242
    {X6^Y6,         X3,            Y3,            X6^Y9,         }, // 243
    {Z0^X6^Y6,      Y2,            X3,            Y3^X8,         }, // 244
    {Z0^X6^Y6,      X2,            Y2,            X3^Y8,         }, // 245
    {Z0^X6^Y6,      X6^Y8,         Y2,            X3,            }, // 246
    {Z0^X6^Y6,      X6^Y8,         X2,            Y2,            }, // 247
    {Y6^X7,         X7,            Y7,            X8^Y9,         }, // 248
    {Y6^X7,         Y3,            X7,            X8^Y8,         }, // 249
    {Y6^X7,         X3,            Y3,            X7^Y8,         }, // 250
    {Z1^Y6^X7,      Y2,            X3,            Y3^X8,         }, // 251
    {Z1^Y6^X7,      X2,            Y2,            Y3^X7,         }, // 252
    {Z1^Y6^X7,      Z0^X6^Y7,      Y2,            X3,            }, // 253
    {Z1^Y6^X7,      Z0^X6^Y7,      X2,            Y2,            }, // 254
    {X4^Y6,         X5^Y5,         X5,            Y6,            }, // 255
    {X3^Y6,         Y4^X5,         X4,            Y5,            }, // 256
    {X3^Y7,         Y5^X6,         X5^Y6,         Y6,            }, // 257
    {X2^Y7,         Y4^X5,         X3^Y6,         Y5,            }, // 258
    {Y3^X8,         Y5^X7,         X5^Y7,         X6^Y6,         }, // 259
    {Y3^X6,         X2^Y7,         X3^Y6,         X5^Y5,         }, // 260
    {X5,            Y6^X9,         X6^Y9,         Y7^X8,         }, // 261
    {X3,            Y3^X8,         X5^Y8,         Y6^X7,         }, // 262
    {Y2,            Y3^X7,         X3^Y8,         X5^Y7,         }, // 263
    {Y2,            Y3^X6,         X2^Y8,         X3^Y7,         }, // 264
    {Y6^X9,         X6^Y9,         Y7^X8,         X7^Y8,         }, // 265
    {Y3^X8,         X5^Y8,         Y6^X7,         X6^Y7,         }, // 266
    {Y2^X7,         Y3^X6,         X3^Y7,         X5^Y6,         }, // 267
    {X2,            Y2^X7,         Y3^X6,         X3^Y7,         }, // 268
    {Y3,            X5,            X6^Y10,        Y7^X9,         }, // 269
    {X3,            Y3,            X5^Y10,        X6^Y9,         }, // 270
    {Y2,            X3,            Y3^X8,         X5^Y9,         }, // 271
    {X2,            Y2,            X3^Y9,         Y3^X7,         }, // 272
    {Y1,            X2,            Y2^X7,         Y3^X6,         }, // 273
    {X6,            Y6^X10,        Y7^X9,         X7^Y9,         }, // 274
    {X3,            Y3^X9,         Y6^X8,         X6^Y8,         }, // 275
    {Y2,            Y3^X7,         X2^Y8,         X3^Y7,         }, // 276
    {Y3,            X6,            Y6^X10,        Y7^X9,         }, // 277
    {Y2,            X3,            Y3^X9,         Y6^X8,         }, // 278
    {Y1,            Y2,            Y3^X7,         X2^Y8,         }, // 279
    {X6^Y6,         Y3,            X6,            Y7^X9,         }, // 280
    {X6^Y6,         Y2,            X3,            Y3^X8,         }, // 281
    {X6^Y6,         X2,            Y2,            Y3^X7,         }, // 282
    {X6^Y6,         Y1,            Y2,            Y3^X6,         }, // 283
    {Y3,            X6,            Y7^X9,         X7^Y9,         }, // 284
    {Z0^X6^Y6,      Y1,            X2,            Y2^X7,         }, // 285
    {X6^Y6,         X6^Y8,         Y2,            X3,            }, // 286
    {Z0^X6^Y6,      X3^Y8,         Y1,            X2,            }, // 287
    {Y6^X7,         Y3,            X7,            Y7^X9,         }, // 288
    {Y6^X7,         Y2,            X3,            Y3^X8,         }, // 289
    {Z0^Y6^X7,      X2,            Y2,            Y3^X7,         }, // 290
    {Z0^Y6^X7,      X2,            X3,            Y3^X8,         }, // 291
    {Y6^X7,         X6^Y7,         Y2,            X3,            }, // 292
    {Z0^Y6^X7,      Z4^X6^Y7,      X2,            Y2,            }, // 293
    {Z0^Y6^X7,      Y1^X6^Y7,      X2,            X3,            }, // 294
    {Y3^X5,         X4^Y6,         Y4,            X5,            }, // 295
    {X3^Y6,         Y3^X5,         X4,            Y4,            }, // 296
    {X3^Y7,         Y3^X6,         X5^Y6,         Y4,            }, // 297
    {X2^Y7,         Y3^X5,         X3^Y6,         Y4,            }, // 298
    {Y2^X7,         X3^Y7,         Y3^X6,         X5^Y6,         }, // 299
    {Y2^X6,         X2^Y7,         Y3^X5,         X3^Y6,         }, // 300
    {X2,            Y2^X8,         X3^Y7,         Y3^X7,         }, // 301
    {Y1,            Y2^X6,         X2^Y7,         Y3^X5,         }, // 302
    {X2^Y7,         Y2^X6,         X3^Y6,         Y3^X5,         }, // 303
    {X2,            Y2^X7,         X3^Y7,         Y3^X6,         }, // 304
    {Y1,            X2^Y7,         Y2^X6,         X3^Y6,         }, // 305
    {Y1,            X2,            Y2^X8,         X3^Y7,         }, // 306
    {X1,            Y1,            X2^Y7,         Y2^X7,         }, // 307
    {Y1,            Y2^X7,         X2^Y7,         Y3^X6,         }, // 308
    {X1,            Y1,            Y2^X7,         X2^Y7,         }, // 309
    {X6^Y6,         Y1,            X2,            Y2^X8,         }, // 310
    {X3^Y6,         X1,            Y1,            Y2^X7,         }, // 311
    {Y1,            X2,            Y2^X8,         Y3^X7,         }, // 312
    {X2,            Y2,            Y3^X7,         X3^Y8,         }, // 313
    {X6^Y6,         X2,            Y2,            X3^Y8,         }, // 314
    {Z3^X6^Y6,      Y1,            X2,            Y2^X8,         }, // 315
    {Y1^X6^Y6,      X2,            Y2,            Y3^X7,         }, // 316
    {X6^Y6,         X6^Y8,         X2,            Y2,            }, // 317
    {Z3^X6^Y6,      X3^Y8,         Y1,            X2,            }, // 318
    {Y1^X6^Y6,      X1^X6^Y8,      X2,            Y2,            }, // 319
    {Y6^X7,         X2,            Y2,            Y3^X7,         }, // 320
    {Y1^Y6^X7,      X2,            X3,            Y3^X8,         }, // 321
    {Y1^Y6^X7,      X3,            Y3,            X2^Y7^X8,      }, // 322
    {Y6^X7,         X6^Y7,         X2,            Y2,            }, // 323
    {Y1^Y6^X7,      Z3^X6^Y7,      X2,            X3,            }, // 324
    {Y1^Y6^X7,      X1^X6^Y7,      X3,            Y3,            }, // 325
    {X2^Y6,         Y3^X5,         X3,            Y4,            }, // 326
    {X1^Y7,         Y3^X5,         X2^Y6,         Y4,            }, // 327
    {Y2^X6,         X1^Y7,         Y3^X5,         X2^Y6,         }, // 328
    {Y1,            Y2^X6,         X1^Y7,         Y3^X5,         }, // 329
    {Y1^X7,         Y2^X6,         X2^Y6,         Y3^X5,         }, // 330
    {X1,            Y1^X7,         Y2^X6,         X2^Y6,         }, // 331
    {X1,            Y1,            X2^Y8,         Y2^X6,         }, // 332
    {Y0,            X1,            Y1^X7,         Y2^X6,         }, // 333
    {X2,            Y2^X8,         Y3^X7,         X3^Y7,         }, // 334
    {X1,            X2,            Y2^X8,         Y3^X7,         }, // 335
    {Y1^X6^Y6,      X1,            X2,            Y2^X7,         }, // 336
    {X2,            X3,            Y3^X8,         Y2^X7^Y7,      }, // 337
    {X6^Y6,         Y1,            X2,            Y2^X7,         }, // 338
    {Y1^X6^Y6,      X2,            X3,            Y3^X8,         }, // 339
    {X6^Y6,         Y2^X6^Y8,      Y1,            X2,            }, // 340
    {Y1^X6^Y6,      X2^X6^Y8,      Y2,            X3,            }, // 341
    {Y1^X6^Y6,      Y3^X8,         X2,            X3,            }, // 342
    {Y6^X7,         X2,            X3,            Y3^X8,         }, // 343
    {Y1^Y6^X7,      X3,            Y2,            Y3^X8^Y8,      }, // 344
    {Y6^X7,         Y2^X6^Y7,      X2,            X3,            }, // 345
    {Y1^Y6^X7,      X1^X6^Y7,      X3,            Y2,            }, // 346
    {X4,            Z4,            Y4,            X5,            }, // 347
    {X3,            Z4,            Y4,            X4,            }, // 348
    {X3,            Z3,            Y4,            X4,            }, // 349
    {X3,            Z3,            Y3,            X4,            }, // 350
    {X2,            Z3,            Y3,            X3,            }, // 351
    {X4^Y4^Z4,      Z4,            Y4,            X5,            }, // 352
    {X3^Y4^Z4,      Z4,            Y4,            X4,            }, // 353
    {X3^Z3^Y4,      Z3,            Y4,            X4,            }, // 354
    {X3^Y3^Z3,      Z3,            Y3,            X4,            }, // 355
    {X2^Y3^Z3,      Z3,            Y3,            X3,            }, // 356
    {X4^Y5^Z5,      Y4^Z4^X5,      Y4,            X5,            }, // 357
    {X3^Y5^Z5,      X4^Y4^Z4,      Y4,            X4,            }, // 358
    {X3^Z4^Y5,      Z3^X4^Y4,      Y4,            X4,            }, // 359
    {X3^Y4^Z4,      Y3^Z3^X4,      Y3,            X4,            }, // 360
    {X2^Y4^Z4,      X3^Y3^Z3,      Y3,            X3,            }, // 361
    {X4,            Y4^Z4^X5,      Y4,            X5,            }, // 362
    {X3,            X4^Y4^Z4,      Y4,            X4,            }, // 363
    {X3,            Z3^X4^Y4,      Y4,            X4,            }, // 364
    {X3,            Y3^Z3^X4,      Y3,            X4,            }, // 365
    {X2,            X3^Y3^Z3,      Y3,            X3,            }, // 366
    {X3,            Z4,            Y4,            X5,            }, // 367
    {X2,            Z4,            Y4,            X3,            }, // 368
    {X2,            Z3,            Y4,            X3,            }, // 369
    {Y3,            X3,            Z4,            X5,            }, // 370
    {Y3,            X2,            Z4,            X3,            }, // 371
    {Y3,            X2,            Z3,            X3,            }, // 372
    {Y2,            X2,            Y3,            X3,            }, // 373
    {Z3,            X3,            Z4,            X5^Y5,         }, // 374
    {X2,            Z4,            X3,            Y2^X5^Y5,      }, // 375
    {X2,            Z3,            X3,            Y2^X5^Y5,      }, // 376
    {X2,            Y3,            X3,            Y1^X5^Y5,      }, // 377
    {X2,            Y3,            X3,            X1^X5^Y5,      }, // 378
    {Y3,            Z3,            X3,            Z4,            }, // 379
    {Y2,            Y3,            X3,            Z4,            }, // 380
    {Z3,            X3,            Z4,            X5^Y6,         }, // 381
    {X2,            Z4,            X3,            Z3^X5^Y6,      }, // 382
    {X2,            Z3,            X3,            Z2^X5^Y6,      }, // 383
    {X2,            Y3,            X3,            Z2^X5^Y6,      }, // 384
    {Z3^X7,         Y3,            X3,            Z4,            }, // 385
    {Z3^X7,         X2,            Z4,            X3,            }, // 386
    {Z2^X7,         X2,            Z3,            X3,            }, // 387
    {Z2^X7,         X2,            Y3,            X3,            }, // 388
    {Z3,            X3,            Z4,            Y3^X6^Y6,      }, // 389
    {X2,            Z4,            X3,            Y3^X6^Y6,      }, // 390
    {X2,            Z3,            X3,            Y3^X6^Y6,      }, // 391
    {X2,            Y3,            X3,            Y2^X6^Y6,      }, // 392
    {Y3^X6^Y6,      Z3,            X3,            Z4,            }, // 393
    {Y3^X6^Y6,      X2,            Z4,            X3,            }, // 394
    {Y3^X6^Y6,      X2,            Z3,            X3,            }, // 395
    {Y2^X6^Y6,      X2,            Y3,            X3,            }, // 396
    {Y3^X6^Y6,      Z3^X8,         X3,            Z4,            }, // 397
    {X2^X6^Y6,      Z3^X8,         Z4,            X3,            }, // 398
    {X2^X6^Y6,      Z2^X8,         Z3,            X3,            }, // 399
    {X2^X6^Y6,      Z2^X8,         Y3,            X3,            }, // 400
    {Y3^Y6^X7,      X3,            Z4,            Z3^X6^Y7,      }, // 401
    {Y3^Y6^X7,      Z4,            X3,            X2^X6^Y7,      }, // 402
    {Y3^Y6^X7,      Z3,            X3,            X2^X6^Y7,      }, // 403
    {Y2^Y6^X7,      Y3,            X3,            X2^X6^Y7,      }, // 404
    {Y3^Y6^X7,      Z3^X6^Y7,      X3,            Z4,            }, // 405
    {Y3^Y6^X7,      X2^X6^Y7,      Z4,            X3,            }, // 406
    {Y3^Y6^X7,      X2^X6^Y7,      Z3,            X3,            }, // 407
    {Y2^Y6^X7,      X2^X6^Y7,      Y3,            X3,            }, // 408
};

const UINT_64 GFX11_SW_PATTERN_NIBBLE4[][4] =
{
    {0,             0,             0,             0,             }, // 0
    {Y8,            X8,            0,             0,             }, // 1
    {Y7,            X8,            0,             0,             }, // 2
    {Y7,            X7,            0,             0,             }, // 3
    {Y6,            X7,            0,             0,             }, // 4
    {Y6,            X6,            0,             0,             }, // 5
    {X8,            Y8,            0,             0,             }, // 6
    {X7,            Y7,            0,             0,             }, // 7
    {X6,            Y6,            0,             0,             }, // 8
    {X8,            Y7,            0,             0,             }, // 9
    {X7,            Y6,            0,             0,             }, // 10
    {X8^Y8,         Y8,            0,             0,             }, // 11
    {Y7^X8,         Y7,            0,             0,             }, // 12
    {X7^Y7,         Y7,            0,             0,             }, // 13
    {Y6^X7,         Y6,            0,             0,             }, // 14
    {X6^Y6,         Y6,            0,             0,             }, // 15
    {Y8,            Z0^X5^Y5,      0,             0,             }, // 16
    {X8,            Z0^X5^Y5,      0,             0,             }, // 17
    {Y7,            Z0^X5^Y5,      0,             0,             }, // 18
    {X7,            Z0^X5^Y5,      0,             0,             }, // 19
    {Y6,            Z0^X5^Y5,      0,             0,             }, // 20
    {Y7^X8,         X8,            0,             0,             }, // 21
    {Y6^X7,         X7,            0,             0,             }, // 22
    {X8^Y9,         Y8^X9,         0,             0,             }, // 23
    {Y7^X9,         X8^Y8,         0,             0,             }, // 24
    {X7^Y8,         Y7^X8,         0,             0,             }, // 25
    {Y6^X8,         X7^Y7,         0,             0,             }, // 26
    {X6^Y7,         Y6^X7,         0,             0,             }, // 27
    {Y8^X9,         X5^Y6,         0,             0,             }, // 28
    {Y7^X9,         X5^Y6,         0,             0,             }, // 29
    {Y7^X8,         X5^Y6,         0,             0,             }, // 30
    {Y6^X8,         Z0^X5^Y6,      0,             0,             }, // 31
    {Y6^X7,         Z0^X5^Y6,      0,             0,             }, // 32
    {X8^Y8,         Y7^X9,         0,             0,             }, // 33
    {X7^Y7,         Y6^X8,         0,             0,             }, // 34
    {X3^Y7,         Y6^X7,         0,             0,             }, // 35
    {Y8^X9,         X6^Y6,         0,             0,             }, // 36
    {X8^Y8,         X6^Y6,         0,             0,             }, // 37
    {Y7^X8,         X6^Y6,         0,             0,             }, // 38
    {X7^Y7,         Z0^X6^Y6,      0,             0,             }, // 39
    {X6^Y7,         Z0^X6^Y6,      0,             0,             }, // 40
    {X6^Y8,         X7^Y7,         0,             0,             }, // 41
    {Y3^X7,         X6^Y7,         0,             0,             }, // 42
    {Y3^X8,         X7^Y7,         0,             0,             }, // 43
    {X3^Y7,         Y3^X7,         0,             0,             }, // 44
    {Y8^X9,         X6^Y7,         0,             0,             }, // 45
    {Y7^X9,         X6^Y7,         0,             0,             }, // 46
    {Y7^X8,         X6^Y7,         0,             0,             }, // 47
    {X7^Y7,         Z0^X6^Y7,      0,             0,             }, // 48
    {X3^Y7,         Z0^X6^Y7,      0,             0,             }, // 49
    {Y3^X7,         X3^Y7,         0,             0,             }, // 50
    {X7,            Y8,            0,             0,             }, // 51
    {X6,            Y7,            0,             0,             }, // 52
    {X5,            Y6,            0,             0,             }, // 53
    {X7^Y8,         Y8,            0,             0,             }, // 54
    {X6^Y7,         Y7,            0,             0,             }, // 55
    {X5^Y6,         Y6,            0,             0,             }, // 56
    {X7^Y9,         X8^Y8,         0,             0,             }, // 57
    {X5^Y8,         X6^Y7,         0,             0,             }, // 58
    {X3^Y8,         X5^Y7,         0,             0,             }, // 59
    {X8^Y8,         X5^Y6,         0,             0,             }, // 60
    {X7^Y7,         X5^Y6,         0,             0,             }, // 61
    {X6^Y6,         Z0^X5^Y6,      0,             0,             }, // 62
    {X3^Y7,         X6^Y6,         0,             0,             }, // 63
    {X3^Y8,         X6^Y7,         0,             0,             }, // 64
    {X2^Y8,         X3^Y7,         0,             0,             }, // 65
    {X7^Y7,         X6^Y6,         0,             0,             }, // 66
    {X3^Y7,         Z0^X6^Y6,      0,             0,             }, // 67
    {Y3^X6,         X3^Y7,         0,             0,             }, // 68
    {Y2^X7,         Y3^X6,         0,             0,             }, // 69
    {X8^Y8,         X6^Y7,         0,             0,             }, // 70
    {X7^Y7,         X6^Y7,         0,             0,             }, // 71
    {X3^Y7,         Z4^X6^Y7,      0,             0,             }, // 72
    {Y2^X7^Y7,      Y1^X6^Y7,      0,             0,             }, // 73
    {Y3^X8,         Y2^X7^Y7,      0,             0,             }, // 74
    {Y5,            X6,            0,             0,             }, // 75
    {X5,            Y5,            0,             0,             }, // 76
    {X6,            Y5,            0,             0,             }, // 77
    {X6^Y6,         Y5,            0,             0,             }, // 78
    {X3^Y6,         Y5,            0,             0,             }, // 79
    {X6,            Z0^X5^Y5,      0,             0,             }, // 80
    {X5,            Z0^X5^Y5,      0,             0,             }, // 81
    {X5^Y6,         X6,            0,             0,             }, // 82
    {Y3^X5,         X5,            0,             0,             }, // 83
    {Y3^X7,         X6^Y6,         0,             0,             }, // 84
    {X3^Y6,         Y3^X6,         0,             0,             }, // 85
    {Y6^X8,         X5^Y6,         0,             0,             }, // 86
    {Y6^X7,         X5^Y6,         0,             0,             }, // 87
    {X3^Y6,         Z0^X5^Y6,      0,             0,             }, // 88
    {Y3^X6,         X3^Y6,         0,             0,             }, // 89
    {X3^Y6,         Y3^X7,         0,             0,             }, // 90
    {X2^Y6,         Y3^X6,         0,             0,             }, // 91
    {X6^Y7,         X6^Y6,         0,             0,             }, // 92
    {X3^Y6,         Z3^X6^Y6,      0,             0,             }, // 93
    {X1^X6^Y7,      Y1^X6^Y6,      0,             0,             }, // 94
    {Y3^X7,         X3^Y6,         0,             0,             }, // 95
    {X3^Y8,         X1^X6^Y7,      0,             0,             }, // 96
    {Y2^X8,         Y3^X7,         0,             0,             }, // 97
    {X3^Y7,         X6^Y7,         0,             0,             }, // 98
    {Y2^X7^Y7,      Z3^X6^Y7,      0,             0,             }, // 99
    {Y2^X7^Y8,      X1^X6^Y7,      0,             0,             }, // 100
    {X2^Y7^X8,      Y2^X7^Y8,      0,             0,             }, // 101
    {X4,            Y5,            0,             0,             }, // 102
    {X3,            Y5,            0,             0,             }, // 103
    {X3^Y6,         X5,            0,             0,             }, // 104
    {X2^Y6,         X3,            0,             0,             }, // 105
    {X3,            Z0^X5^Y5,      0,             0,             }, // 106
    {Y3^X5,         X3,            0,             0,             }, // 107
    {X3^Y7,         X5^Y6,         0,             0,             }, // 108
    {X2^Y6,         Y3^X5,         0,             0,             }, // 109
    {X6^Y6,         X5^Y6,         0,             0,             }, // 110
    {X3^Y6,         Z2^X5^Y6,      0,             0,             }, // 111
    {Y1^X6^Y6,      Y0^X5^Y6,      0,             0,             }, // 112
    {X3^Y7,         Y1^X6^Y6,      0,             0,             }, // 113
    {X1^X6^Y8,      Y1^X6^Y6,      0,             0,             }, // 114
    {Y2^X7^Y7,      X1^X6^Y8,      0,             0,             }, // 115
    {Y3^X7,         X1^X6^Y7,      0,             0,             }, // 116
    {Y1^X7^Y7,      Y2^X6^Y7,      0,             0,             }, // 117
    {X2^Y7^X9,      X1^X6^Y7,      0,             0,             }, // 118
    {Y3^X8,         Y1^X7^Y7,      0,             0,             }, // 119
    {Y3^X8^Y8,      X2^Y7^X9,      0,             0,             }, // 120
    {Z5,            Y5,            0,             0,             }, // 121
    {Z4,            Y5,            0,             0,             }, // 122
    {Z4,            Y4,            0,             0,             }, // 123
};

const UINT_8 GFX11_DCC_64K_R_X_PATIDX[] =
{
       0, // 1 bpe ua @ SW_64K_{Z,R}_X 1xaa
       1, // 2 bpe ua @ SW_64K_{Z,R}_X 1xaa
       2, // 4 bpe ua @ SW_64K_{Z,R}_X 1xaa
       3, // 8 bpe ua @ SW_64K_{Z,R}_X 1xaa
       4, // 16 bpe ua @ SW_64K_{Z,R}_X 1xaa
       0, // 1 pipes (1 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
       1, // 1 pipes (1 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
       2, // 1 pipes (1 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
       3, // 1 pipes (1 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
       4, // 1 pipes (1 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
       5, // 2 pipes (1-2 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
       6, // 2 pipes (1-2 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
       7, // 2 pipes (1-2 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
       8, // 2 pipes (1-2 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
       9, // 2 pipes (1-2 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      10, // 4 pipes (1-2 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      11, // 4 pipes (1-2 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      12, // 4 pipes (1-2 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      13, // 4 pipes (1-2 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      14, // 4 pipes (1-2 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      15, // 8 pipes (2 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      16, // 8 pipes (2 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      17, // 8 pipes (2 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      18, // 8 pipes (2 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      19, // 8 pipes (2 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      20, // 4 pipes (4 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      21, // 4 pipes (4 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      22, // 4 pipes (4 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      23, // 4 pipes (4 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      24, // 4 pipes (4 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      25, // 8 pipes (4 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      26, // 8 pipes (4 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      27, // 8 pipes (4 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      28, // 8 pipes (4 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      29, // 8 pipes (4 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      30, // 16 pipes (4 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      31, // 16 pipes (4 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      32, // 16 pipes (4 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      33, // 16 pipes (4 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      34, // 16 pipes (4 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      35, // 8 pipes (8 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      36, // 8 pipes (8 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      37, // 8 pipes (8 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      38, // 8 pipes (8 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      39, // 8 pipes (8 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      35, // 16 pipes (8 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      36, // 16 pipes (8 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      37, // 16 pipes (8 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      40, // 16 pipes (8 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      41, // 16 pipes (8 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      42, // 32 pipes (8 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      43, // 32 pipes (8 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      44, // 32 pipes (8 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      45, // 32 pipes (8 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      46, // 32 pipes (8 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      47, // 16 pipes (16 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      48, // 16 pipes (16 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      49, // 16 pipes (16 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      50, // 16 pipes (16 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      51, // 16 pipes (16 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      47, // 32 pipes (16 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      48, // 32 pipes (16 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      49, // 32 pipes (16 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      52, // 32 pipes (16 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      53, // 32 pipes (16 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      54, // 64 pipes (16 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      55, // 64 pipes (16 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      56, // 64 pipes (16 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      57, // 64 pipes (16 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      58, // 64 pipes (16 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      59, // 32 pipes (32 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      60, // 32 pipes (32 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      61, // 32 pipes (32 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      62, // 32 pipes (32 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      63, // 32 pipes (32 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
      59, // 64 pipes (32 PKRs) 1 bpe pa @ SW_64K_{Z,R}_X 1xaa
      60, // 64 pipes (32 PKRs) 2 bpe pa @ SW_64K_{Z,R}_X 1xaa
      61, // 64 pipes (32 PKRs) 4 bpe pa @ SW_64K_{Z,R}_X 1xaa
      64, // 64 pipes (32 PKRs) 8 bpe pa @ SW_64K_{Z,R}_X 1xaa
      65, // 64 pipes (32 PKRs) 16 bpe pa @ SW_64K_{Z,R}_X 1xaa
};

const UINT_8 GFX11_DCC_256K_R_X_PATIDX[] =
{
       0, // 1 bpe ua @ SW_256K_{Z,R}_X 1xaa
       1, // 2 bpe ua @ SW_256K_{Z,R}_X 1xaa
       2, // 4 bpe ua @ SW_256K_{Z,R}_X 1xaa
       3, // 8 bpe ua @ SW_256K_{Z,R}_X 1xaa
       4, // 16 bpe ua @ SW_256K_{Z,R}_X 1xaa
       0, // 1 pipes (1 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
       1, // 1 pipes (1 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
       2, // 1 pipes (1 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
       3, // 1 pipes (1 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
       4, // 1 pipes (1 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
       5, // 2 pipes (1-2 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
       6, // 2 pipes (1-2 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
       7, // 2 pipes (1-2 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
       8, // 2 pipes (1-2 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
       9, // 2 pipes (1-2 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      10, // 4 pipes (1-2 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      11, // 4 pipes (1-2 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      12, // 4 pipes (1-2 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      13, // 4 pipes (1-2 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      14, // 4 pipes (1-2 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      15, // 8 pipes (2 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      16, // 8 pipes (2 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      17, // 8 pipes (2 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      18, // 8 pipes (2 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      19, // 8 pipes (2 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      20, // 4 pipes (4 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      21, // 4 pipes (4 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      22, // 4 pipes (4 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      23, // 4 pipes (4 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      24, // 4 pipes (4 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      25, // 8 pipes (4 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      26, // 8 pipes (4 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      27, // 8 pipes (4 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      28, // 8 pipes (4 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      29, // 8 pipes (4 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      30, // 16 pipes (4 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      31, // 16 pipes (4 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      32, // 16 pipes (4 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      33, // 16 pipes (4 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      34, // 16 pipes (4 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      35, // 8 pipes (8 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      36, // 8 pipes (8 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      37, // 8 pipes (8 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      38, // 8 pipes (8 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      39, // 8 pipes (8 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      35, // 16 pipes (8 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      36, // 16 pipes (8 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      37, // 16 pipes (8 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      40, // 16 pipes (8 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      41, // 16 pipes (8 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      42, // 32 pipes (8 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      43, // 32 pipes (8 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      44, // 32 pipes (8 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      45, // 32 pipes (8 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      66, // 32 pipes (8 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      47, // 16 pipes (16 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      48, // 16 pipes (16 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      49, // 16 pipes (16 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      50, // 16 pipes (16 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      67, // 16 pipes (16 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      47, // 32 pipes (16 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      48, // 32 pipes (16 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      49, // 32 pipes (16 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      52, // 32 pipes (16 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      68, // 32 pipes (16 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      54, // 64 pipes (16 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      55, // 64 pipes (16 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      56, // 64 pipes (16 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      69, // 64 pipes (16 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      70, // 64 pipes (16 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      59, // 32 pipes (32 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      60, // 32 pipes (32 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      61, // 32 pipes (32 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      71, // 32 pipes (32 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      72, // 32 pipes (32 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
      59, // 64 pipes (32 PKRs) 1 bpe pa @ SW_256K_{Z,R}_X 1xaa
      60, // 64 pipes (32 PKRs) 2 bpe pa @ SW_256K_{Z,R}_X 1xaa
      61, // 64 pipes (32 PKRs) 4 bpe pa @ SW_256K_{Z,R}_X 1xaa
      73, // 64 pipes (32 PKRs) 8 bpe pa @ SW_256K_{Z,R}_X 1xaa
      74, // 64 pipes (32 PKRs) 16 bpe pa @ SW_256K_{Z,R}_X 1xaa
};

const UINT_8 GFX11_HTILE_PATIDX[] =
{
       0, // 1xaa ua @ HTILE_64K
       0, // 2xaa ua @ HTILE_64K
       0, // 4xaa ua @ HTILE_64K
       0, // 8xaa ua @ HTILE_64K
       0, // 1 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K
       0, // 1 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K
       0, // 1 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K
       0, // 1 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K
       1, // 2 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K
       1, // 2 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K
       1, // 2 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K
       1, // 2 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K
       2, // 4 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K
       2, // 4 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K
       2, // 4 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K
       2, // 4 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K
       3, // 8 pipes (1-2 PKRs) 1xaa pa @ HTILE_64K
       3, // 8 pipes (1-2 PKRs) 2xaa pa @ HTILE_64K
       3, // 8 pipes (1-2 PKRs) 4xaa pa @ HTILE_64K
       3, // 8 pipes (1-2 PKRs) 8xaa pa @ HTILE_64K
       1, // 2 pipes (4 PKRs) 1xaa pa @ HTILE_64K
       1, // 2 pipes (4 PKRs) 2xaa pa @ HTILE_64K
       1, // 2 pipes (4 PKRs) 4xaa pa @ HTILE_64K
       1, // 2 pipes (4 PKRs) 8xaa pa @ HTILE_64K
       4, // 4 pipes (4 PKRs) 1xaa pa @ HTILE_64K
       4, // 4 pipes (4 PKRs) 2xaa pa @ HTILE_64K
       4, // 4 pipes (4 PKRs) 4xaa pa @ HTILE_64K
       4, // 4 pipes (4 PKRs) 8xaa pa @ HTILE_64K
       5, // 8 pipes (4 PKRs) 1xaa pa @ HTILE_64K
       5, // 8 pipes (4 PKRs) 2xaa pa @ HTILE_64K
       5, // 8 pipes (4 PKRs) 4xaa pa @ HTILE_64K
       5, // 8 pipes (4 PKRs) 8xaa pa @ HTILE_64K
       6, // 16 pipes (4 PKRs) 1xaa pa @ HTILE_64K
       6, // 16 pipes (4 PKRs) 2xaa pa @ HTILE_64K
       6, // 16 pipes (4 PKRs) 4xaa pa @ HTILE_64K
       6, // 16 pipes (4 PKRs) 8xaa pa @ HTILE_64K
       7, // 4 pipes (8 PKRs) 1xaa pa @ HTILE_64K
       7, // 4 pipes (8 PKRs) 2xaa pa @ HTILE_64K
       7, // 4 pipes (8 PKRs) 4xaa pa @ HTILE_64K
       7, // 4 pipes (8 PKRs) 8xaa pa @ HTILE_64K
       8, // 8 pipes (8 PKRs) 1xaa pa @ HTILE_64K
       8, // 8 pipes (8 PKRs) 2xaa pa @ HTILE_64K
       8, // 8 pipes (8 PKRs) 4xaa pa @ HTILE_64K
       8, // 8 pipes (8 PKRs) 8xaa pa @ HTILE_64K
       9, // 16 pipes (8 PKRs) 1xaa pa @ HTILE_64K
       9, // 16 pipes (8 PKRs) 2xaa pa @ HTILE_64K
       9, // 16 pipes (8 PKRs) 4xaa pa @ HTILE_64K
       9, // 16 pipes (8 PKRs) 8xaa pa @ HTILE_64K
      10, // 32 pipes (8 PKRs) 1xaa pa @ HTILE_64K
      10, // 32 pipes (8 PKRs) 2xaa pa @ HTILE_64K
      10, // 32 pipes (8 PKRs) 4xaa pa @ HTILE_64K
      10, // 32 pipes (8 PKRs) 8xaa pa @ HTILE_64K
      11, // 8 pipes (16 PKRs) 1xaa pa @ HTILE_64K
      11, // 8 pipes (16 PKRs) 2xaa pa @ HTILE_64K
      11, // 8 pipes (16 PKRs) 4xaa pa @ HTILE_64K
      11, // 8 pipes (16 PKRs) 8xaa pa @ HTILE_64K
      12, // 16 pipes (16 PKRs) 1xaa pa @ HTILE_64K
      12, // 16 pipes (16 PKRs) 2xaa pa @ HTILE_64K
      12, // 16 pipes (16 PKRs) 4xaa pa @ HTILE_64K
      12, // 16 pipes (16 PKRs) 8xaa pa @ HTILE_64K
      13, // 32 pipes (16 PKRs) 1xaa pa @ HTILE_64K
      13, // 32 pipes (16 PKRs) 2xaa pa @ HTILE_64K
      13, // 32 pipes (16 PKRs) 4xaa pa @ HTILE_64K
      13, // 32 pipes (16 PKRs) 8xaa pa @ HTILE_64K
      14, // 64 pipes (16 PKRs) 1xaa pa @ HTILE_64K
      14, // 64 pipes (16 PKRs) 2xaa pa @ HTILE_64K
      14, // 64 pipes (16 PKRs) 4xaa pa @ HTILE_64K
      14, // 64 pipes (16 PKRs) 8xaa pa @ HTILE_64K
      15, // 16 pipes (32 PKRs) 1xaa pa @ HTILE_64K
      15, // 16 pipes (32 PKRs) 2xaa pa @ HTILE_64K
      15, // 16 pipes (32 PKRs) 4xaa pa @ HTILE_64K
      15, // 16 pipes (32 PKRs) 8xaa pa @ HTILE_64K
      16, // 32 pipes (32 PKRs) 1xaa pa @ HTILE_64K
      16, // 32 pipes (32 PKRs) 2xaa pa @ HTILE_64K
      16, // 32 pipes (32 PKRs) 4xaa pa @ HTILE_64K
      16, // 32 pipes (32 PKRs) 8xaa pa @ HTILE_64K
      17, // 64 pipes (32 PKRs) 1xaa pa @ HTILE_64K
      17, // 64 pipes (32 PKRs) 2xaa pa @ HTILE_64K
      17, // 64 pipes (32 PKRs) 4xaa pa @ HTILE_64K
      17, // 64 pipes (32 PKRs) 8xaa pa @ HTILE_64K
};

const UINT_64 GFX11_DCC_R_X_SW_PATTERN[][17] =
{
    {0,             X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            0,             0,             0,             0,             }, //0
    {0,             Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            0,             0,             0,             0,             }, //1
    {0,             X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            0,             0,             0,             0,             }, //2
    {0,             Y2,            X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            0,             0,             0,             0,             }, //3
    {0,             X2,            Y2,            X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            0,             0,             0,             0,             }, //4
    {0,             Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Z0^X4^Y4,      Y8,            X9,            Y9,            0,             0,             0,             0,             }, //5
    {0,             Y3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Z0^X4^Y4,      X8,            Y8,            X9,            0,             0,             0,             0,             }, //6
    {0,             X3,            Y3,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Z0^X4^Y4,      Y7,            X8,            Y8,            0,             0,             0,             0,             }, //7
    {0,             Y2,            X3,            Y3,            Y4,            X5,            Y5,            X6,            Y6,            Z0^X4^Y4,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //8
    {0,             X2,            Y2,            X3,            Y3,            Y4,            X5,            Y5,            X6,            Z0^X4^Y4,      Y6,            X7,            Y7,            0,             0,             0,             0,             }, //9
    {0,             X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X5^Y5,      Z0^X4^Y4,      X9,            Y9,            0,             0,             0,             0,             }, //10
    {0,             Y3,            X5,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X5^Y5,      Z0^X4^Y4,      Y8,            X9,            0,             0,             0,             0,             }, //11
    {0,             X3,            Y3,            X5,            Y5,            X6,            Y6,            X7,            Y7,            Y4^X5^Y5,      Z0^X4^Y4,      X8,            Y8,            0,             0,             0,             0,             }, //12
    {0,             Y2,            X3,            Y3,            X5,            Y5,            X6,            Y6,            X7,            Y4^X5^Y5,      Z0^X4^Y4,      Y7,            X8,            0,             0,             0,             0,             }, //13
    {0,             X2,            Y2,            X3,            Y3,            X5,            Y5,            X6,            Y6,            Y4^X5^Y5,      Z0^X4^Y4,      X7,            Y7,            0,             0,             0,             0,             }, //14
    {0,             Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y9,            0,             0,             0,             0,             }, //15
    {0,             Y3,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X9,            0,             0,             0,             0,             }, //16
    {0,             X3,            Y3,            Y5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y8,            0,             0,             0,             0,             }, //17
    {0,             Y2,            X3,            Y3,            Y5,            X6,            Y6,            X7,            Y7,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         X8,            0,             0,             0,             0,             }, //18
    {0,             X2,            Y2,            X3,            Y3,            Y5,            X6,            Y6,            X7,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y7,            0,             0,             0,             0,             }, //19
    {0,             X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         Y9,            0,             0,             0,             0,             }, //20
    {0,             Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         X9,            0,             0,             0,             0,             }, //21
    {0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         Y8,            0,             0,             0,             0,             }, //22
    {0,             Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         X8,            0,             0,             0,             0,             }, //23
    {0,             X2,            Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      X5^Y5,         Y7,            0,             0,             0,             0,             }, //24
    {0,             X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y9,            0,             0,             0,             0,             }, //25
    {0,             Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X9,            0,             0,             0,             0,             }, //26
    {0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            X8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y8,            0,             0,             0,             0,             }, //27
    {0,             Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X8,            0,             0,             0,             0,             }, //28
    {0,             X2,            Y2,            X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y7,            0,             0,             0,             0,             }, //29
    {0,             X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         0,             0,             0,             0,             }, //30
    {0,             Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         0,             0,             0,             0,             }, //31
    {0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         0,             0,             0,             0,             }, //32
    {0,             Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         0,             0,             0,             0,             }, //33
    {0,             X2,            Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         0,             0,             0,             0,             }, //34
    {0,             X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //35
    {0,             Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //36
    {0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //37
    {0,             Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //38
    {0,             X2,            Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      X5^Y6,         0,             0,             0,             0,             }, //39
    {0,             Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //40
    {0,             X2,            Y2,            X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      0,             0,             0,             0,             }, //41
    {0,             Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X6^Y6,         0,             0,             0,             }, //42
    {0,             Y3,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X6^Y6,         0,             0,             0,             }, //43
    {0,             X3,            Y3,            Y6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X6^Y6,         0,             0,             0,             }, //44
    {0,             Y2,            X3,            Y3,            Y6,            X7,            Y7,            X8,            Y8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X6^Y6,         0,             0,             0,             }, //45
    {0,             X2,            Y2,            Y3,            X6,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X3^Y6,         0,             0,             0,             }, //46
    {0,             X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         0,             0,             0,             }, //47
    {0,             Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         0,             0,             0,             }, //48
    {0,             X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         0,             0,             0,             }, //49
    {0,             Y2,            X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      X6^Y6,         0,             0,             0,             }, //50
    {0,             X2,            X3,            Y3,            X6,            X7,            Y7,            Y2,            X8,            Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      X6^Y6,         0,             0,             0,             }, //51
    {0,             Y2,            X3,            Y3,            X6,            X7,            Y7,            X8,            Y8,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //52
    {0,             X2,            X3,            Y3,            X6,            X7,            Y7,            Y2,            X8,            Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      Y2^X6^Y6,      0,             0,             0,             }, //53
    {0,             X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y10,           Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X6^Y8,         0,             0,             }, //54
    {0,             Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X6^Y8,         0,             0,             }, //55
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X6^Y8,         0,             0,             }, //56
    {0,             Y2,            Y3,            X6,            X7,            Y7,            X8,            Y8,            X9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X3^Y8,         0,             0,             }, //57
    {0,             X2,            Y3,            X6,            X7,            Y7,            X8,            Y2,            Y8,            Y4^X8^Y8,      Z2^X4^Y4,      Z1^Y5^X7,      Z0^X5^Y7,      Y2^X6^Y6,      X3^Y8,         0,             0,             }, //58
    {0,             X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y10,           Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X6^Y7,         0,             0,             }, //59
    {0,             Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            X10,           Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X6^Y7,         0,             0,             }, //60
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y9,            Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X6^Y7,         0,             0,             }, //61
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y2,            X9,            Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y2^Y6^X7,      X6^Y7,         0,             0,             }, //62
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X2,            Y2,            Y4^X9^Y9,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      Y2^Y6^X7,      X6^Y7,         0,             0,             }, //63
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y2,            X9,            Y4^X9^Y9,      Z3^X4^Y4,      Z2^Y5^X8,      Z1^X5^Y8,      Y2^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //64
    {0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            X2,            Y2,            Y4^X9^Y9,      Z2^X4^Y4,      Z1^Y5^X8,      Z0^X5^Y8,      Y2^Y6^X7,      X2^X6^Y7,      0,             0,             }, //65
    {0,             X2,            Y2,            X3,            Y3,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z2^X4^Y4,      Z1^Y5^X6,      Z0^X5^Y6,      X6^Y6,         0,             0,             0,             }, //66
    {0,             X2,            Y2,            X3,            Y3,            X6,            X7,            Y7,            X8,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      X6^Y6,         0,             0,             0,             }, //67
    {0,             X2,            Y2,            X3,            Y3,            X6,            X7,            Y7,            X8,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      0,             0,             0,             }, //68
    {0,             Y2,            X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X6^Y8,         0,             0,             }, //69
    {0,             X2,            Y2,            X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X8^Y8,      Z3^X4^Y4,      Z2^Y5^X7,      Z1^X5^Y7,      Z0^X6^Y6,      X6^Y8,         0,             0,             }, //70
    {0,             Y2,            X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      X6^Y7,         0,             0,             }, //71
    {0,             X2,            Y2,            X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      X6^Y7,         0,             0,             }, //72
    {0,             Y2,            X3,            Y3,            X7,            Y7,            X8,            Y8,            X9,            Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //73
    {0,             X2,            Y2,            X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X9^Y9,      X4^Y4^Z4,      Z3^Y5^X8,      Z2^X5^Y8,      Z1^Y6^X7,      Z0^X6^Y7,      0,             0,             }, //74
};

const UINT_64 GFX11_HTILE_SW_PATTERN[][18] =
{
    {0,             0,             0,             X3,            Y3,            X4,            Y4,            X5,            Y5,            X6,            Y6,            X7,            Y7,            0,             0,             0,             0,             0,             }, //0
    {0,             0,             0,             X3,            Y3,            Y4,            X5,            Y5,            X6,            Z0^X4^Y4,      Y6,            X7,            Y7,            0,             0,             0,             0,             0,             }, //1
    {0,             0,             0,             X3,            Y3,            X5,            Y5,            X6,            Y6,            Y4^X5^Y5,      Z0^X4^Y4,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //2
    {0,             0,             0,             X3,            Y3,            Y5,            X6,            Y6,            X7,            Y4^X5^Y5,      Z0^X4^Y4,      X5^Y5,         Y7,            X8,            Y8,            0,             0,             0,             }, //3
    {0,             0,             0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      Y7,            X8,            Y8,            X5^Y5,         0,             0,             0,             }, //4
    {0,             0,             0,             X3,            Y3,            X5,            X6,            Y6,            X7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      Y7,            X8,            Y8,            0,             0,             0,             }, //5
    {0,             0,             0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X6^Y6,      Z1^X4^Y4,      Z0^X5^Y5,      X5^Y6,         X8,            Y8,            X9,            0,             0,             }, //6
    {0,             0,             0,             X3,            Y3,            Y4,            X5,            X6,            Y6,            Z1^X4^Y4,      Z0^X5^Y5,      X7,            Y7,            X8,            0,             0,             0,             0,             }, //7
    {0,             0,             0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X8,            Y8,            X9,            X5^Y6,         0,             0,             }, //8
    {0,             0,             0,             X3,            Y3,            X6,            Y6,            X7,            Y7,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X8,            Y8,            X9,            0,             0,             }, //9
    {0,             0,             0,             X3,            Y3,            Y6,            X7,            Y7,            X8,            Y4^X7^Y7,      Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         X6^Y6,         Y8,            X9,            Y9,            0,             }, //10
    {0,             0,             0,             X3,            Y3,            Y4,            X6,            Y6,            X7,            Z1^X4^Y4,      Z0^Y5^X6,      X5^Y6,         Y7,            X8,            Y8,            0,             0,             0,             }, //11
    {0,             0,             0,             X3,            Y3,            X6,            X7,            Y7,            X8,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         Y8,            X9,            Y9,            X6^Y6,         0,             }, //12
    {0,             0,             0,             X3,            Y3,            X6,            X7,            Y7,            X8,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         Y8,            X9,            Y9,            0,             }, //13
    {0,             0,             0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X8^Y8,      Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X6^Y8,         X9,            Y9,            X10,           }, //14
    {0,             0,             0,             X3,            Y3,            Y4,            X6,            X7,            Y7,            Z1^X4^Y4,      Z0^Y5^X7,      X5^Y7,         X6^Y6,         X8,            Y8,            X9,            0,             0,             }, //15
    {0,             0,             0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X9,            Y9,            X10,           X6^Y7,         }, //16
    {0,             0,             0,             X3,            Y3,            X7,            Y7,            X8,            Y8,            Y4^X9^Y9,      Z1^X4^Y4,      Z0^Y5^X8,      X5^Y8,         Y6^X7,         X6^Y7,         X9,            Y9,            X10,           }, //17
};

}// V2
} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx11addrlib.cpp
* @brief Contain the implementation for the Gfx11Lib class.
************************************************************************************************************************
*/

#include "gfx11addrlib.h"
#include "gfx11_gb_reg.h"

#include "amdgpu_asic_addr.h"

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace rocr {
namespace Addr
{
/**
************************************************************************************************************************
*   Gfx11HwlInit
*
*   @brief
*       Creates an Gfx11Lib object.
*
*   @return
*       Returns an Gfx11Lib object pointer.
************************************************************************************************************************
*/
Addr::Lib* Gfx11HwlInit(const Client* pClient)
{
    return V2::Gfx11Lib::CreateObj(pClient);
}

namespace V2
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Static Const Member
////////////////////////////////////////////////////////////////////////////////////////////////////

const SwizzleModeFlags Gfx11Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
{//Linear 256B  4KB  64KB  256KB   Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
    {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
    {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
    {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
    {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X

    {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_Z_X
    {{0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_S_X
    {{0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_D_X
    {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_256KB_R_X
    {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
};

const Dim3d Gfx11Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};

const Dim3d Gfx11Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}};
const Dim3d Gfx11Lib::Block64K_Log2_3d[]  = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
const Dim3d Gfx11Lib::Block4K_Log2_3d[]   = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};

/**
************************************************************************************************************************
*   Gfx11Lib::Gfx11Lib
*
*   @brief
*       Constructor
*
************************************************************************************************************************
*/
Gfx11Lib::Gfx11Lib(const Client* pClient)
    :
    Lib(pClient),
    m_numPkrLog2(0),
    m_numSaLog2(0),
    m_colorBaseIndex(0),
    m_htileBaseIndex(0),
    m_dccBaseIndex(0)
{
    memset(&m_settings, 0, sizeof(m_settings));
    memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
}

/**
************************************************************************************************************************
*   Gfx11Lib::~Gfx11Lib
*
*   @brief
*       Destructor
************************************************************************************************************************
*/
Gfx11Lib::~Gfx11Lib()
{
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeHtileInfo
*
*   @brief
*       Interface function stub of AddrComputeHtilenfo
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileInfo(
    const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X)  &&
        (pIn->swizzleMode != ADDR_SW_256KB_Z_X) &&
        (pIn->hTileFlags.pipeAligned != TRUE))
    {
        ret = ADDR_INVALIDPARAMS;
    }
    else
    {
        Dim3d         metaBlk     = {};
        const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataDepthStencil,
                                                   ADDR_RSRC_TEX_2D,
                                                   pIn->swizzleMode,
                                                   0,
                                                   0,
                                                   TRUE,
                                                   &metaBlk);

        pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
        pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
        pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
        pOut->metaBlkWidth  = metaBlk.w;
        pOut->metaBlkHeight = metaBlk.h;

        if (pIn->numMipLevels > 1)
        {
            ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);

            UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;

            for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
            {
                UINT_32 mipWidth, mipHeight;

                GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);

                mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
                mipHeight = PowTwoAlign(mipHeight, metaBlk.h);

                const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
                const UINT_32 heightInM    = mipHeight / metaBlk.h;
                const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[i].inMiptail = FALSE;
                    pOut->pMipInfo[i].offset    = offset;
                    pOut->pMipInfo[i].sliceSize = mipSliceSize;
                }

                offset += mipSliceSize;
            }

            pOut->sliceSize          = offset;
            pOut->metaBlkNumPerSlice = offset / metaBlkSize;
            pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;

            if (pOut->pMipInfo != NULL)
            {
                for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
                {
                    pOut->pMipInfo[i].inMiptail = TRUE;
                    pOut->pMipInfo[i].offset    = 0;
                    pOut->pMipInfo[i].sliceSize = 0;
                }

                if (pIn->firstMipIdInTail != pIn->numMipLevels)
                {
                    pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
                }
            }
        }
        else
        {
            const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
            const UINT_32 heightInM = pOut->height / metaBlk.h;

            pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
            pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
            pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;

            if (pOut->pMipInfo != NULL)
            {
                pOut->pMipInfo[0].inMiptail = FALSE;
                pOut->pMipInfo[0].offset    = 0;
                pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
            }
        }

        // Get the HTILE address equation (copied from HtileAddrFromCoord).
        // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
        const UINT_32  index         = m_htileBaseIndex;
        const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;

        ADDR_C_ASSERT(sizeof(GFX11_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
        pOut->equation.gfx10_bits = (UINT_16 *)GFX11_HTILE_SW_PATTERN[patIdxTable[index]];
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeDccInfo
*
*   @brief
*       Interface function to compute DCC key info
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo(
    const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
    {
        ret = ADDR_INVALIDPARAMS;
    }
    else
    {
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
        const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
        Dim3d         compBlock   = {};

        GetCompressedBlockSizeLog2(Gfx11DataColor,
                                   pIn->resourceType,
                                   pIn->swizzleMode,
                                   elemLog2,
                                   numFragLog2,
                                   &compBlock);
        pOut->compressBlkWidth  = 1 << compBlock.w;
        pOut->compressBlkHeight = 1 << compBlock.h;
        pOut->compressBlkDepth  = 1 << compBlock.d;

        if (ret == ADDR_OK)
        {
            Dim3d         metaBlk     = {};
            const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataColor,
                                                       pIn->resourceType,
                                                       pIn->swizzleMode,
                                                       elemLog2,
                                                       numFragLog2,
                                                       pIn->dccKeyFlags.pipeAligned,
                                                       &metaBlk);

            pOut->dccRamBaseAlign   = metaBlkSize;
            pOut->metaBlkWidth      = metaBlk.w;
            pOut->metaBlkHeight     = metaBlk.h;
            pOut->metaBlkDepth      = metaBlk.d;
            pOut->metaBlkSize       = metaBlkSize;

            pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
            pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
            pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);

            if (pIn->numMipLevels > 1)
            {
                ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);

                UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;

                for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
                {
                    UINT_32 mipWidth, mipHeight;

                    GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);

                    mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
                    mipHeight = PowTwoAlign(mipHeight, metaBlk.h);

                    const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
                    const UINT_32 heightInM    = mipHeight / metaBlk.h;
                    const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;

                    if (pOut->pMipInfo != NULL)
                    {
                        pOut->pMipInfo[i].inMiptail = FALSE;
                        pOut->pMipInfo[i].offset    = offset;
                        pOut->pMipInfo[i].sliceSize = mipSliceSize;
                    }

                    offset += mipSliceSize;
                }

                pOut->dccRamSliceSize    = offset;
                pOut->metaBlkNumPerSlice = offset / metaBlkSize;
                pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);

                if (pOut->pMipInfo != NULL)
                {
                    for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
                    {
                        pOut->pMipInfo[i].inMiptail = TRUE;
                        pOut->pMipInfo[i].offset    = 0;
                        pOut->pMipInfo[i].sliceSize = 0;
                    }

                    if (pIn->firstMipIdInTail != pIn->numMipLevels)
                    {
                        pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
                    }
                }
            }
            else
            {
                const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
                const UINT_32 heightInM = pOut->height / metaBlk.h;

                pOut->metaBlkNumPerSlice = pitchInM * heightInM;
                pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
                pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[0].inMiptail = FALSE;
                    pOut->pMipInfo[0].offset    = 0;
                    pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
                }
            }

            // Get the DCC address equation (copied from DccAddrFromCoord)
            const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
            const UINT_32 numPipeLog2 = m_pipesLog2;
            UINT_32       index       = m_dccBaseIndex + elemLog2;
            const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
                                        GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;

            if (pIn->dccKeyFlags.pipeAligned)
            {
                index += MaxNumOfBpp;

                if (m_numPkrLog2 < 2)
                {
                    index += m_pipesLog2 * MaxNumOfBpp;
                }
                else
                {
                    // 4 groups for "m_numPkrLog2 < 2" case
                    index += 4 * MaxNumOfBpp;

                    const UINT_32 dccPipePerPkr = 3;

                    index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
                             (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
                }
            }

            ADDR_C_ASSERT(sizeof(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
            pOut->equation.gfx10_bits = (UINT_16*)GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]];
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeHtileAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeHtileAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileAddrFromCoord(
    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->numMipLevels > 1)
    {
        returnCode = ADDR_NOTIMPLEMENTED;
    }
    else
    {
        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
        input.size            = sizeof(input);
        input.hTileFlags      = pIn->hTileFlags;
        input.depthFlags      = pIn->depthflags;
        input.swizzleMode     = pIn->swizzleMode;
        input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
        input.numSlices       = Max(pIn->numSlices,       1u);
        input.numMipLevels    = 1;

        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
        output.size = sizeof(output);

        returnCode = ComputeHtileInfo(&input, &output);

        if (returnCode == ADDR_OK)
        {
            const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
            const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
            const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
            const UINT_8*  patIdxTable   = GFX11_HTILE_PATIDX;
            const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
            const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
            const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX11_HTILE_SW_PATTERN[patIdxTable[index]],
                                                                           blkSizeLog2 + 1, // +1 for nibble offset
                                                                           pIn->x,
                                                                           pIn->y,
                                                                           pIn->slice,
                                                                           0);
            const UINT_32 xb       = pIn->x / output.metaBlkWidth;
            const UINT_32 yb       = pIn->y / output.metaBlkHeight;
            const UINT_32 pb       = output.pitch / output.metaBlkWidth;
            const UINT_32 blkIndex = (yb * pb) + xb;
            const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;

            pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
                         (blkIndex * (1 << blkSizeLog2)) +
                         ((blkOffset >> 1) ^ pipeXor);
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeHtileCoordFromAddr
*
*   @brief
*       Interface function stub of AddrComputeHtileCoordFromAddr
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileCoordFromAddr(
    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
{
    ADDR_NOT_IMPLEMENTED();

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlSupportComputeDccAddrFromCoord
*
*   @brief
*       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlSupportComputeDccAddrFromCoord(
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
        ((pIn->swizzleMode != ADDR_SW_64KB_R_X) &&
         (pIn->swizzleMode != ADDR_SW_256KB_R_X)) ||
        (pIn->dccKeyFlags.linear == TRUE) ||
        (pIn->numFrags > 1) ||
        (pIn->numMipLevels > 1) ||
        (pIn->mipId > 0))
    {
        returnCode = ADDR_NOTSUPPORTED;
    }
    else if ((pIn->pitch == 0)         ||
             (pIn->metaBlkWidth == 0)  ||
             (pIn->metaBlkHeight == 0) ||
             (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
    {
        returnCode = ADDR_NOTSUPPORTED;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeDccAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeDccAddrFromCoord
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx11Lib::HwlComputeDccAddrFromCoord(
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
{
    const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
    const UINT_32 numPipeLog2 = m_pipesLog2;
    const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
    UINT_32       index       = m_dccBaseIndex + elemLog2;
    const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
                                GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;

    if (pIn->dccKeyFlags.pipeAligned)
    {
        index += MaxNumOfBpp;

        if (m_numPkrLog2 < 2)
        {
            index += m_pipesLog2 * MaxNumOfBpp;
        }
        else
        {
            // 4 groups for "m_numPkrLog2 < 2" case
            index += 4 * MaxNumOfBpp;

            const UINT_32 dccPipePerPkr = 3;

            index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
                     (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
        }
    }

    const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
    const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
    const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]],
                                                                 blkSizeLog2 + 1, // +1 for nibble offset
                                                                 pIn->x,
                                                                 pIn->y,
                                                                 pIn->slice,
                                                                 0);
    const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
    const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
    const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
    const UINT_32 blkIndex = (yb * pb) + xb;
    const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;

    pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
                 (blkIndex * (1 << blkSizeLog2)) +
                 ((blkOffset >> 1) ^ pipeXor);
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlInitGlobalParams
*
*   @brief
*       Initializes global parameters
*
*   @return
*       TRUE if all settings are valid
*
************************************************************************************************************************
*/
BOOL_32 Gfx11Lib::HwlInitGlobalParams(
    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
{
    BOOL_32              valid = TRUE;
    GB_ADDR_CONFIG_GFX11 gbAddrConfig;

    gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;

    switch (gbAddrConfig.bits.NUM_PIPES)
    {
        case ADDR_CONFIG_1_PIPE:
            m_pipes     = 1;
            m_pipesLog2 = 0;
            break;
        case ADDR_CONFIG_2_PIPE:
            m_pipes     = 2;
            m_pipesLog2 = 1;
            break;
        case ADDR_CONFIG_4_PIPE:
            m_pipes     = 4;
            m_pipesLog2 = 2;
            break;
        case ADDR_CONFIG_8_PIPE:
            m_pipes     = 8;
            m_pipesLog2 = 3;
            break;
        case ADDR_CONFIG_16_PIPE:
            m_pipes     = 16;
            m_pipesLog2 = 4;
            break;
        case ADDR_CONFIG_32_PIPE:
            m_pipes     = 32;
            m_pipesLog2 = 5;
            break;
        case ADDR_CONFIG_64_PIPE:
            m_pipes     = 64;
            m_pipesLog2 = 6;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
            break;
    }

    switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
    {
        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
            m_pipeInterleaveLog2  = 8;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
            m_pipeInterleaveLog2  = 9;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
            m_pipeInterleaveLog2  = 10;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
            m_pipeInterleaveLog2  = 11;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
            break;
    }

    // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
    // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
    // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
    ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);

    // These fields are deprecated on GFX11; they do nothing on HW.
    m_maxCompFrag     = 1;
    m_maxCompFragLog2 = 0;

    // Skip unaligned case
    m_htileBaseIndex += MaxNumOfAA;

    m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
    m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;

    m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
    m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;

    ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));

    if (m_numPkrLog2 >= 2)
    {
        m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
        m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
    }

    // There is no so-called VAR swizzle mode on GFX11 and instead there are 4 256KB swizzle modes. Here we treat 256KB
    // swizzle mode as "VAR" swizzle mode for reusing exising facilities (e.g GetBlockSizeLog2()) provided by base class
    m_blockVarSizeLog2 = 18;

    if (valid)
    {
        InitEquationTable();
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlConvertChipFamily
*
*   @brief
*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
*   @return
*       ChipFamily
************************************************************************************************************************
*/
ChipFamily Gfx11Lib::HwlConvertChipFamily(
    UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
    UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
{
    ChipFamily family = ADDR_CHIP_FAMILY_NAVI;

    switch (chipFamily)
    {
        case FAMILY_NV3:
            if (ASICREV_IS_NAVI31_P(chipRevision))
            {
            }
            if (ASICREV_IS_NAVI32_P(chipRevision))
            {
            }
            if (ASICREV_IS_NAVI33_P(chipRevision))
            {
            }
            break;
        case FAMILY_GFX1150:
            if (ASICREV_IS_GFX1150(chipRevision))
            {
                m_settings.isGfx1150 = 1;
            }
            break;
        case FAMILY_GFX1103:
            m_settings.isGfx1103 = 1;
            break;
        default:
            ADDR_ASSERT(!"Unknown chip family");
            break;
    }

    m_configFlags.use32bppFor422Fmt = TRUE;

    return family;
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetBlk256SizeLog2
*
*   @brief
*       Get block 256 size
*
*   @return
*       N/A
************************************************************************************************************************
*/
void Gfx11Lib::GetBlk256SizeLog2(
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2,    ///< [in] number of samples
    Dim3d*           pBlock             ///< [out] block size
    ) const
{
    if (IsThin(resourceType, swizzleMode))
    {
        UINT_32 blockBits = 8 - elemLog2;

        // On GFX11, Z and R modes are the same thing.
        if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
        {
            blockBits -= numSamplesLog2;
        }

        pBlock->w = (blockBits >> 1) + (blockBits & 1);
        pBlock->h = (blockBits >> 1);
        pBlock->d = 0;
    }
    else
    {
        ADDR_ASSERT(IsThick(resourceType, swizzleMode));

        UINT_32 blockBits = 8 - elemLog2;

        pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
        pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
        pBlock->h = (blockBits / 3);
    }
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetCompressedBlockSizeLog2
*
*   @brief
*       Get compress block size
*
*   @return
*       N/A
************************************************************************************************************************
*/
void Gfx11Lib::GetCompressedBlockSizeLog2(
    Gfx11DataType    dataType,          ///< [in] Data type
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2,    ///< [in] number of samples
    Dim3d*           pBlock             ///< [out] block size
    ) const
{
    if (dataType == Gfx11DataColor)
    {
        GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
    }
    else
    {
        ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
        pBlock->w = 3;
        pBlock->h = 3;
        pBlock->d = 0;
    }
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetMetaOverlapLog2
*
*   @brief
*       Get meta block overlap
*
*   @return
*       N/A
************************************************************************************************************************
*/
INT_32 Gfx11Lib::GetMetaOverlapLog2(
    Gfx11DataType    dataType,          ///< [in] Data type
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2     ///< [in] number of samples
    ) const
{
    Dim3d compBlock;
    Dim3d microBlock;

    GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
    GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);

    const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
    const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
    const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
    const INT_32 numPipesLog2   = GetEffectiveNumPipes();
    INT_32       overlap        = numPipesLog2 - maxSizeLog2;

    if (numPipesLog2 > 1)
    {
        overlap++;
    }

    // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
    if ((elemLog2 == 4) && (numSamplesLog2 == 3))
    {
        overlap--;
    }
    overlap = Max(overlap, 0);
    return overlap;
}

/**
************************************************************************************************************************
*   Gfx11Lib::Get3DMetaOverlapLog2
*
*   @brief
*       Get 3d meta block overlap
*
*   @return
*       N/A
************************************************************************************************************************
*/
INT_32 Gfx11Lib::Get3DMetaOverlapLog2(
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2           ///< [in] element size log2
    ) const
{
    Dim3d microBlock;
    GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);

    INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);

    overlap++;

    if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
    {
        overlap = 0;
    }
    return overlap;
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetPipeRotateAmount
*
*   @brief
*       Get pipe rotate amount
*
*   @return
*       Pipe rotate amount
************************************************************************************************************************
*/

INT_32 Gfx11Lib::GetPipeRotateAmount(
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
    ) const
{
    INT_32 amount = 0;

    if ((m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
    {
        amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
                 1 : m_pipesLog2 - (m_numSaLog2 + 1);
    }

    return amount;
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetMetaBlkSize
*
*   @brief
*       Get metadata block size
*
*   @return
*       Meta block size
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::GetMetaBlkSize(
    Gfx11DataType    dataType,          ///< [in] Data type
    AddrResourceType resourceType,      ///< [in] Resource type
    AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
    UINT_32          elemLog2,          ///< [in] element size log2
    UINT_32          numSamplesLog2,    ///< [in] number of samples
    BOOL_32          pipeAlign,         ///< [in] pipe align
    Dim3d*           pBlock             ///< [out] block size
    ) const
{
    INT_32 metablkSizeLog2;

    const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
    const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
    const INT_32 compBlkSizeLog2    = (dataType == Gfx11DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
    const INT_32 metaBlkSamplesLog2 = numSamplesLog2;
    const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
    INT_32       numPipesLog2       = m_pipesLog2;

    if (IsThin(resourceType, swizzleMode))
    {
        if ((pipeAlign == FALSE) ||
            (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
            (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
        {
            if (pipeAlign)
            {
                metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
                metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
            }
            else
            {
                metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
            }
        }
        else
        {
            if ((m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
            {
                numPipesLog2++;
            }

            INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);

            if (numPipesLog2 >= 4)
            {
                INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);

                // In 16Bpe 8xaa, we have an extra overlap bit
                if ((pipeRotateLog2 > 0)  &&
                    (elemLog2 == 4)       &&
                    (numSamplesLog2 == 3) &&
                    (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
                {
                    overlapLog2++;
                }

                metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
                metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
            }
            else
            {
                metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
            }

            if (dataType == Gfx11DataDepthStencil)
            {
                // For htile surfaces, pad meta block size to 2K * num_pipes
                metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
            }

            const INT_32 compFragLog2 = numSamplesLog2;

            if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
            {
                const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);

                metablkSizeLog2 = Max(metablkSizeLog2, tmp);
            }
        }

        const INT_32 metablkBitsLog2 =
            metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
        pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
        pBlock->h = 1 << (metablkBitsLog2 >> 1);
        pBlock->d = 1;
    }
    else
    {
        ADDR_ASSERT(IsThick(resourceType, swizzleMode));

        if (pipeAlign)
        {
            if ((m_pipesLog2 == m_numSaLog2 + 1) &&
                (m_pipesLog2 > 1)                &&
                IsRbAligned(resourceType, swizzleMode))
            {
                numPipesLog2++;
            }

            const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);

            metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
            metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
            metablkSizeLog2 = Max(metablkSizeLog2, 12);
        }
        else
        {
            metablkSizeLog2 = 12;
        }

        const INT_32 metablkBitsLog2 =
            metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
        pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
        pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
        pBlock->d = 1 << (metablkBitsLog2 / 3);
    }

    return (1 << static_cast<UINT_32>(metablkSizeLog2));
}

/**
************************************************************************************************************************
*   Gfx11Lib::ConvertSwizzlePatternToEquation
*
*   @brief
*       Convert swizzle pattern to equation.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
    UINT_32                elemLog2,  ///< [in] element bytes log2
    AddrResourceType       rsrcType,  ///< [in] resource type
    AddrSwizzleMode        swMode,    ///< [in] swizzle mode
    const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
    ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
    const
{
    ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
    GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);

    const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
    const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
    memset(pEquation, 0, sizeof(ADDR_EQUATION));
    pEquation->numBits            = blockSizeLog2;
    pEquation->numBitComponents   = pPatInfo->maxItemCount;
    pEquation->stackedDepthSlices = FALSE;

    for (UINT_32 i = 0; i < elemLog2; i++)
    {
        pEquation->addr[i].channel = 0;
        pEquation->addr[i].valid   = 1;
        pEquation->addr[i].index   = i;
    }

    if (IsXor(swMode) == FALSE)
    {
        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
        {
            ADDR_ASSERT(IsPow2(pSwizzle[i].value));

            if (pSwizzle[i].x != 0)
            {
                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));

                pEquation->addr[i].channel = 0;
                pEquation->addr[i].valid   = 1;
                pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
            }
            else if (pSwizzle[i].y != 0)
            {
                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));

                pEquation->addr[i].channel = 1;
                pEquation->addr[i].valid   = 1;
                pEquation->addr[i].index   = Log2(pSwizzle[i].y);
            }
            else
            {
                ADDR_ASSERT(pSwizzle[i].z != 0);
                ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));

                pEquation->addr[i].channel = 2;
                pEquation->addr[i].valid   = 1;
                pEquation->addr[i].index   = Log2(pSwizzle[i].z);
            }

            pEquation->xor1[i].value = 0;
            pEquation->xor2[i].value = 0;
        }
    }
    else if (IsThin(rsrcType, swMode))
    {
        Dim3d dim;
        ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);

        const UINT_32 blkXLog2 = Log2(dim.w);
        const UINT_32 blkYLog2 = Log2(dim.h);
        const UINT_32 blkXMask = dim.w - 1;
        const UINT_32 blkYMask = dim.h - 1;

        ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
        UINT_32          xMask = 0;
        UINT_32          yMask = 0;
        UINT_32          bMask = (1 << elemLog2) - 1;

        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
        {
            if (IsPow2(pSwizzle[i].value))
            {
                if (pSwizzle[i].x != 0)
                {
                    ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
                    xMask |= pSwizzle[i].x;

                    const UINT_32 xLog2 = Log2(pSwizzle[i].x);

                    ADDR_ASSERT(xLog2 < blkXLog2);

                    pEquation->addr[i].channel = 0;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = xLog2 + elemLog2;
                }
                else
                {
                    ADDR_ASSERT(pSwizzle[i].y != 0);
                    ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
                    yMask |= pSwizzle[i].y;

                    pEquation->addr[i].channel = 1;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = Log2(pSwizzle[i].y);

                    ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                }

                swizzle[i].value = 0;
                bMask |= 1 << i;
            }
            else
            {
                if (pSwizzle[i].z != 0)
                {
                    ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));

                    pEquation->xor2[i].channel = 2;
                    pEquation->xor2[i].valid   = 1;
                    pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
                }

                swizzle[i].x = pSwizzle[i].x;
                swizzle[i].y = pSwizzle[i].y;
                swizzle[i].z = swizzle[i].s = 0;

                ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);

                const UINT_32 xHi = swizzle[i].x & (~blkXMask);

                if (xHi != 0)
                {
                    ADDR_ASSERT(IsPow2(xHi));
                    ADDR_ASSERT(pEquation->xor1[i].value == 0);

                    pEquation->xor1[i].channel = 0;
                    pEquation->xor1[i].valid   = 1;
                    pEquation->xor1[i].index   = Log2(xHi) + elemLog2;

                    swizzle[i].x &= blkXMask;
                }

                const UINT_32 yHi = swizzle[i].y & (~blkYMask);

                if (yHi != 0)
                {
                    ADDR_ASSERT(IsPow2(yHi));

                    if (xHi == 0)
                    {
                        ADDR_ASSERT(pEquation->xor1[i].value == 0);
                        pEquation->xor1[i].channel = 1;
                        pEquation->xor1[i].valid   = 1;
                        pEquation->xor1[i].index   = Log2(yHi);
                    }
                    else
                    {
                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
                        pEquation->xor2[i].channel = 1;
                        pEquation->xor2[i].valid   = 1;
                        pEquation->xor2[i].index   = Log2(yHi);
                    }

                    swizzle[i].y &= blkYMask;
                }

                if (swizzle[i].value == 0)
                {
                    bMask |= 1 << i;
                }
            }
        }

        const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
        const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;

        ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);

        while (bMask != blockMask)
        {
            for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
            {
                if ((bMask & (1 << i)) == 0)
                {
                    if (IsPow2(swizzle[i].value))
                    {
                        if (swizzle[i].x != 0)
                        {
                            ADDR_ASSERT((xMask & swizzle[i].x) == 0);
                            xMask |= swizzle[i].x;

                            const UINT_32 xLog2 = Log2(swizzle[i].x);

                            ADDR_ASSERT(xLog2 < blkXLog2);

                            pEquation->addr[i].channel = 0;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = xLog2 + elemLog2;
                        }
                        else
                        {
                            ADDR_ASSERT(swizzle[i].y != 0);
                            ADDR_ASSERT((yMask & swizzle[i].y) == 0);
                            yMask |= swizzle[i].y;

                            pEquation->addr[i].channel = 1;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = Log2(swizzle[i].y);

                            ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                        }

                        swizzle[i].value = 0;
                        bMask |= 1 << i;
                    }
                    else
                    {
                        const UINT_32 x = swizzle[i].x & xMask;
                        const UINT_32 y = swizzle[i].y & yMask;

                        if (x != 0)
                        {
                            ADDR_ASSERT(IsPow2(x));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 0;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(x) + elemLog2;
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 0;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(x) + elemLog2;
                            }
                        }

                        if (y != 0)
                        {
                            ADDR_ASSERT(IsPow2(y));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 1;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(y);
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 1;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(y);
                            }
                        }

                        swizzle[i].x &= ~x;
                        swizzle[i].y &= ~y;
                    }
                }
            }
        }

        ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
    }
    else
    {
        const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ?
                              Block256K_Log2_3d[elemLog2] :
                              ((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]);

        const UINT_32 blkXLog2 = blkDim.w;
        const UINT_32 blkYLog2 = blkDim.h;
        const UINT_32 blkZLog2 = blkDim.d;
        const UINT_32 blkXMask = (1 << blkXLog2) - 1;
        const UINT_32 blkYMask = (1 << blkYLog2) - 1;
        const UINT_32 blkZMask = (1 << blkZLog2) - 1;

        ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
        UINT_32          xMask = 0;
        UINT_32          yMask = 0;
        UINT_32          zMask = 0;
        UINT_32          bMask = (1 << elemLog2) - 1;

        for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
        {
            if (IsPow2(pSwizzle[i].value))
            {
                if (pSwizzle[i].x != 0)
                {
                    ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
                    xMask |= pSwizzle[i].x;

                    const UINT_32 xLog2 = Log2(pSwizzle[i].x);

                    ADDR_ASSERT(xLog2 < blkXLog2);

                    pEquation->addr[i].channel = 0;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = xLog2 + elemLog2;
                }
                else if (pSwizzle[i].y != 0)
                {
                    ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
                    yMask |= pSwizzle[i].y;

                    pEquation->addr[i].channel = 1;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = Log2(pSwizzle[i].y);

                    ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                }
                else
                {
                    ADDR_ASSERT(pSwizzle[i].z != 0);
                    ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
                    zMask |= pSwizzle[i].z;

                    pEquation->addr[i].channel = 2;
                    pEquation->addr[i].valid   = 1;
                    pEquation->addr[i].index   = Log2(pSwizzle[i].z);

                    ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
                }

                swizzle[i].value = 0;
                bMask |= 1 << i;
            }
            else
            {
                swizzle[i].x = pSwizzle[i].x;
                swizzle[i].y = pSwizzle[i].y;
                swizzle[i].z = pSwizzle[i].z;
                swizzle[i].s = 0;

                ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);

                const UINT_32 xHi = swizzle[i].x & (~blkXMask);
                const UINT_32 yHi = swizzle[i].y & (~blkYMask);
                const UINT_32 zHi = swizzle[i].z & (~blkZMask);

                ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));

                if (xHi != 0)
                {
                    ADDR_ASSERT(IsPow2(xHi));
                    ADDR_ASSERT(pEquation->xor1[i].value == 0);

                    pEquation->xor1[i].channel = 0;
                    pEquation->xor1[i].valid   = 1;
                    pEquation->xor1[i].index   = Log2(xHi) + elemLog2;

                    swizzle[i].x &= blkXMask;
                }

                if (yHi != 0)
                {
                    ADDR_ASSERT(IsPow2(yHi));

                    if (pEquation->xor1[i].value == 0)
                    {
                        pEquation->xor1[i].channel = 1;
                        pEquation->xor1[i].valid   = 1;
                        pEquation->xor1[i].index   = Log2(yHi);
                    }
                    else
                    {
                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
                        pEquation->xor2[i].channel = 1;
                        pEquation->xor2[i].valid   = 1;
                        pEquation->xor2[i].index   = Log2(yHi);
                    }

                    swizzle[i].y &= blkYMask;
                }

                if (zHi != 0)
                {
                    ADDR_ASSERT(IsPow2(zHi));

                    if (pEquation->xor1[i].value == 0)
                    {
                        pEquation->xor1[i].channel = 2;
                        pEquation->xor1[i].valid   = 1;
                        pEquation->xor1[i].index   = Log2(zHi);
                    }
                    else
                    {
                        ADDR_ASSERT(pEquation->xor2[i].value == 0);
                        pEquation->xor2[i].channel = 2;
                        pEquation->xor2[i].valid   = 1;
                        pEquation->xor2[i].index   = Log2(zHi);
                    }

                    swizzle[i].z &= blkZMask;
                }

                if (swizzle[i].value == 0)
                {
                    bMask |= 1 << i;
                }
            }
        }

        const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
        const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;

        ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);

        while (bMask != blockMask)
        {
            for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
            {
                if ((bMask & (1 << i)) == 0)
                {
                    if (IsPow2(swizzle[i].value))
                    {
                        if (swizzle[i].x != 0)
                        {
                            ADDR_ASSERT((xMask & swizzle[i].x) == 0);
                            xMask |= swizzle[i].x;

                            const UINT_32 xLog2 = Log2(swizzle[i].x);

                            ADDR_ASSERT(xLog2 < blkXLog2);

                            pEquation->addr[i].channel = 0;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = xLog2 + elemLog2;
                        }
                        else if (swizzle[i].y != 0)
                        {
                            ADDR_ASSERT((yMask & swizzle[i].y) == 0);
                            yMask |= swizzle[i].y;

                            pEquation->addr[i].channel = 1;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = Log2(swizzle[i].y);

                            ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
                        }
                        else
                        {
                            ADDR_ASSERT(swizzle[i].z != 0);
                            ADDR_ASSERT((zMask & swizzle[i].z) == 0);
                            zMask |= swizzle[i].z;

                            pEquation->addr[i].channel = 2;
                            pEquation->addr[i].valid   = 1;
                            pEquation->addr[i].index   = Log2(swizzle[i].z);

                            ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
                        }

                        swizzle[i].value = 0;
                        bMask |= 1 << i;
                    }
                    else
                    {
                        const UINT_32 x = swizzle[i].x & xMask;
                        const UINT_32 y = swizzle[i].y & yMask;
                        const UINT_32 z = swizzle[i].z & zMask;

                        if (x != 0)
                        {
                            ADDR_ASSERT(IsPow2(x));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 0;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(x) + elemLog2;
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 0;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(x) + elemLog2;
                            }
                        }

                        if (y != 0)
                        {
                            ADDR_ASSERT(IsPow2(y));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 1;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(y);
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 1;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(y);
                            }
                        }

                        if (z != 0)
                        {
                            ADDR_ASSERT(IsPow2(z));

                            if (pEquation->xor1[i].value == 0)
                            {
                                pEquation->xor1[i].channel = 2;
                                pEquation->xor1[i].valid   = 1;
                                pEquation->xor1[i].index   = Log2(z);
                            }
                            else
                            {
                                ADDR_ASSERT(pEquation->xor2[i].value == 0);
                                pEquation->xor2[i].channel = 2;
                                pEquation->xor2[i].valid   = 1;
                                pEquation->xor2[i].index   = Log2(z);
                            }
                        }

                        swizzle[i].x &= ~x;
                        swizzle[i].y &= ~y;
                        swizzle[i].z &= ~z;
                    }
                }
            }
        }

        ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
    }
}

/**
************************************************************************************************************************
*   Gfx11Lib::InitEquationTable
*
*   @brief
*       Initialize Equation table.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx11Lib::InitEquationTable()
{
    memset(m_equationTable, 0, sizeof(m_equationTable));

    for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
    {
        const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);

        for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
        {
            const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);

            for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
            {
                UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
                const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);

                if (pPatInfo != NULL)
                {
                    ADDR_ASSERT(IsValidSwMode(swMode));

                    if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
                    {
                        ADDR_EQUATION equation = {};

                        // Passing in pPatInfo to get the addr equation
                        ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);

                        equationIndex = m_numEquations;
                        ADDR_ASSERT(equationIndex < EquationTableSize);
                        // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
                        m_equationTable[equationIndex] = equation;
                        // Increment m_numEquations
                        m_numEquations++;
                    }
                    else // There is no equationIndex
                    {
                        // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X
                        ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
                        ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D);
                        ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
                    }
                }

                m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
            }
        }
    }
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlGetEquationIndex
*
*   @brief
*       Interface function stub of GetEquationIndex
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::HwlGetEquationIndex(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;

    if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
        (pIn->resourceType == ADDR_RSRC_TEX_3D))
    {
        const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
        const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);

        equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
    }

    if (pOut->pMipInfo != NULL)
    {
        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
        {
            pOut->pMipInfo[i].equationIndex = equationIdx;
        }
    }

    return equationIdx;
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetValidDisplaySwizzleModes
*
*   @brief
*       Get valid swizzle modes mask for displayable surface
*
*   @return
*       Valid swizzle modes mask for displayable surface
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::GetValidDisplaySwizzleModes(
    UINT_32 bpp
    ) const
{
    UINT_32 swModeMask = 0;

    if (bpp <= 64)
    {
        const ChipFamily  family = GetChipFamily();

        swModeMask = Dcn32SwModeMask;

        if (false
            || (m_settings.isGfx1103)
            || (m_settings.isGfx1150)
           )
        {
            // Not all GPUs support displaying with 256kB swizzle modes.
            swModeMask &= ~((1u << ADDR_SW_256KB_D_X) |
                            (1u << ADDR_SW_256KB_R_X));
        }
    }

    return swModeMask;
}

/**
************************************************************************************************************************
*   Gfx11Lib::IsValidDisplaySwizzleMode
*
*   @brief
*       Check if a swizzle mode is supported by display engine
*
*   @return
*       TRUE is swizzle mode is supported by display engine
************************************************************************************************************************
*/
BOOL_32 Gfx11Lib::IsValidDisplaySwizzleMode(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
    ) const
{
    ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);

    return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetMaxNumMipsInTail
*
*   @brief
*       Return max number of mips in tails
*
*   @return
*       Max number of mips in tails
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::GetMaxNumMipsInTail(
    UINT_32 blockSizeLog2,     ///< block size log2
    BOOL_32 isThin             ///< is thin or thick
    ) const
{
    UINT_32 effectiveLog2 = blockSizeLog2;

    if (isThin == FALSE)
    {
        effectiveLog2 -= (blockSizeLog2 - 8) / 3;
    }

    return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputePipeBankXor
*
*   @brief
*       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputePipeBankXor(
    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
    ) const
{
    if (IsNonPrtXor(pIn->swizzleMode))
    {
        pOut->pipeBankXor = 0;
    }
    else
    {
        pOut->pipeBankXor = 0;
    }

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeSlicePipeBankXor
*
*   @brief
*       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (IsNonPrtXor(pIn->swizzleMode))
    {
        if (pIn->bpe == 0)
        {
            ADDR_ASSERT_ALWAYS();

            // Require a valid bytes-per-element value passed from client...
            returnCode = ADDR_INVALIDPARAMS;
        }
        else
        {
            const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
                                                                    pIn->resourceType,
                                                                    Log2(pIn->bpe >> 3),
                                                                    1);

            if (pPatInfo != NULL)
            {
                ADDR_BIT_SETTING fullSwizzlePattern[20];
                GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);

                const UINT_32 pipeBankXorOffset =
                    ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
                                                    GetBlockSizeLog2(pIn->swizzleMode),
                                                    0,
                                                    0,
                                                    pIn->slice,
                                                    0);

                const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;

                // Should have no bit set under pipe interleave
                ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);

                pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
            }
            else
            {
                // Should never come here...
                ADDR_NOT_IMPLEMENTED();

                returnCode = ADDR_NOTSUPPORTED;
            }
        }
    }
    else
    {
        pOut->pipeBankXor = 0;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Compute sub resource offset to support swizzle pattern
*
*   @return
*       Offset
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));

    pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeNonBlockCompressedView
*
*   @brief
*       Compute non-block-compressed view for a given mipmap level/slice.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView(
    const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
    {
        // Only thin swizzle mode can have a NonBC view...
        returnCode = ADDR_INVALIDPARAMS;
    }
    else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
             ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
    {
        // Only support BC1~BC7, ASTC, or ETC2 for now...
        returnCode = ADDR_NOTSUPPORTED;
    }
    else
    {
        UINT_32 bcWidth, bcHeight;
        UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);

        ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
        infoIn.flags        = pIn->flags;
        infoIn.swizzleMode  = pIn->swizzleMode;
        infoIn.resourceType = pIn->resourceType;
        infoIn.bpp          = bpp;
        infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
        infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
        infoIn.numSlices    = pIn->numSlices;
        infoIn.numMipLevels = pIn->numMipLevels;
        infoIn.numSamples   = 1;
        infoIn.numFrags     = 1;

        ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};

        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
        infoOut.pMipInfo = mipInfo;

        const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;

        if (tiled)
        {
            returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
        }
        else
        {
            returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
        }

        if (returnCode == ADDR_OK)
        {
            ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
            subOffIn.swizzleMode      = infoIn.swizzleMode;
            subOffIn.resourceType     = infoIn.resourceType;
            subOffIn.slice            = pIn->slice;
            subOffIn.sliceSize        = infoOut.sliceSize;
            subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
            subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;

            ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};

            // For any mipmap level, move nonBc view base address by offset
            HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
            pOut->offset = subOffOut.offset;

            ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
            slicePbXorIn.bpe             = infoIn.bpp;
            slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
            slicePbXorIn.resourceType    = infoIn.resourceType;
            slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
            slicePbXorIn.slice           = pIn->slice;

            ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};

            // For any mipmap level, nonBc view should use computed pbXor
            HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
            pOut->pipeBankXor = slicePbXorOut.pipeBankXor;

            const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
            const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
            const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);

            if (inTail)
            {
                // For mipmap level that is in mip tail block, hack a lot of things...
                // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
                // are fit in tail block:

                // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
                pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;

                // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
                pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);

                // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
                pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);

                // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
                pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
            }
            // This check should cover at least mipId == 0
            else if (requestMipWidth << pIn->mipId == infoIn.width)
            {
                // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
                // - only one mipmap level and mipId = 0
                pOut->mipId        = 0;
                pOut->numMipLevels = 1;

                // (mip0) width = requestMipWidth
                pOut->unalignedWidth = requestMipWidth;

                // (mip0) height = requestMipHeight
                pOut->unalignedHeight = requestMipHeight;
            }
            else
            {
                // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
                // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
                // because single mip view may have different pitch value than original (multiple) mip view...
                // A simple case would be:
                // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
                // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
                //   mip0 width = 0x101/mip1 width = 0x80
                // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
                // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.

                // - 2 levels and mipId = 1
                pOut->mipId        = 1;
                pOut->numMipLevels = 2;

                const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
                const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);

                const BOOL_32 needToAvoidInTail =
                    tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
                    TRUE : FALSE;

                const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
                const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);

                const BOOL_32 needExtraWidth =
                    ((upperMipWidth < requestMipWidth * 2) ||
                     ((upperMipWidth == requestMipWidth * 2) &&
                      ((needToAvoidInTail == TRUE) ||
                       (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;

                const BOOL_32 needExtraHeight =
                    ((upperMipHeight < requestMipHeight * 2) ||
                     ((upperMipHeight == requestMipHeight * 2) &&
                      ((needToAvoidInTail == TRUE) ||
                       (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;

                // (mip0) width = requestLastMipLevelWidth
                pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);

                // (mip0) height = requestLastMipLevelHeight
                pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
            }

            // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
            ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
            // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
            ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ValidateNonSwModeParams
*
*   @brief
*       Validate compute surface info params except swizzle mode
*
*   @return
*       TRUE if parameters are valid, FALSE otherwise
************************************************************************************************************************
*/
BOOL_32 Gfx11Lib::ValidateNonSwModeParams(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    BOOL_32 valid = TRUE;

    if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }
    else if (pIn->flags.fmask == 1)
    {
        // There is no FMASK for GFX11 ASICs
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }
    else if (pIn->numSamples > 8)
    {
        // There is no EQAA support for GFX11 ASICs, so the max number of sample is 8
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }
    else if ((pIn->numFrags != 0) && (pIn->numSamples != pIn->numFrags))
    {
        // There is no EQAA support for GFX11 ASICs, so the number of sample has to be same as number of fragment
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
    const AddrResourceType    rsrcType = pIn->resourceType;
    const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
    const BOOL_32             msaa     = (pIn->numSamples > 1);
    const BOOL_32             display  = flags.display;
    const BOOL_32             tex3d    = IsTex3d(rsrcType);
    const BOOL_32             tex2d    = IsTex2d(rsrcType);
    const BOOL_32             tex1d    = IsTex1d(rsrcType);
    const BOOL_32             stereo   = flags.qbStereo;

    // Resource type check
    if (tex1d)
    {
        if (msaa || display || stereo)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex2d)
    {
        if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex3d)
    {
        if (msaa || display || stereo)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ValidateSwModeParams
*
*   @brief
*       Validate compute surface info related to swizzle mode
*
*   @return
*       TRUE if parameters are valid, FALSE otherwise
************************************************************************************************************************
*/
BOOL_32 Gfx11Lib::ValidateSwModeParams(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    BOOL_32 valid = TRUE;

    if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }
    else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
    const AddrResourceType    rsrcType    = pIn->resourceType;
    const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
    const BOOL_32             msaa        = (pIn->numSamples > 1);
    const BOOL_32             zbuffer     = flags.depth || flags.stencil;
    const BOOL_32             color       = flags.color;
    const BOOL_32             display     = flags.display;
    const BOOL_32             tex3d       = IsTex3d(rsrcType);
    const BOOL_32             tex2d       = IsTex2d(rsrcType);
    const BOOL_32             tex1d       = IsTex1d(rsrcType);
    const BOOL_32             thin3d      = flags.view3dAs2dArray;
    const BOOL_32             linear      = IsLinear(swizzle);
    const BOOL_32             blk256B     = IsBlock256b(swizzle);
    const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
    const BOOL_32             prt         = flags.prt;

    // Misc check
    if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numSamples)))
    {
        // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if ((pIn->bpp == 96) && (linear == FALSE))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    const UINT_32 swizzleMask = 1 << swizzle;

    // Resource type check
    if (tex1d)
    {
        if ((swizzleMask & Gfx11Rsrc1dSwModeMask) == 0)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex2d)
    {
        if ((swizzleMask & Gfx11Rsrc2dSwModeMask) == 0)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
        else if (prt && ((swizzleMask & Gfx11Rsrc2dPrtSwModeMask) == 0))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex3d)
    {
        if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) ||
            (prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) ||
            (thin3d && ((swizzleMask & Gfx11Rsrc3dThinSwModeMask) == 0)))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    // Swizzle type check
    if (linear)
    {
        if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsZOrderSwizzle(swizzle))
    {
        if ((pIn->bpp > 64)                         ||
            (msaa && (color || (pIn->bpp > 32)))    ||
            ElemLib::IsBlockCompressed(pIn->format) ||
            ElemLib::IsMacroPixelPacked(pIn->format))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsStandardSwizzle(rsrcType, swizzle))
    {
        if (zbuffer || msaa)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsDisplaySwizzle(rsrcType, swizzle))
    {
        if (zbuffer || msaa)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsRtOptSwizzle(swizzle))
    {
        if (zbuffer)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    // Block type check
    if (blk256B)
    {
        if (zbuffer || tex3d || msaa)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeSurfaceInfoSanityCheck
*
*   @brief
*       Compute surface info sanity check
*
*   @return
*       Offset
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoSanityCheck(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
    ) const
{
    return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlGetPreferredSurfaceSetting
*
*   @brief
*       Internal function to get suggested surface information for cliet to use
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->flags.fmask)
    {
        // There is no FMASK for GFX11 ASICs.
        ADDR_ASSERT_ALWAYS();

        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        UINT_32 bpp    = pIn->bpp;
        UINT_32 width  = Max(pIn->width, 1u);
        UINT_32 height = Max(pIn->height, 1u);

        // Set format to INVALID will skip this conversion
        if (pIn->format != ADDR_FMT_INVALID)
        {
            ElemMode elemMode = ADDR_UNCOMPRESSED;
            UINT_32 expandX, expandY;

            // Get compression/expansion factors and element mode which indicates compression/expansion
            bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
                                                &elemMode,
                                                &expandX,
                                                &expandY);

            UINT_32 basePitch = 0;
            GetElemLib()->AdjustSurfaceInfo(elemMode,
                                            expandX,
                                            expandY,
                                            &bpp,
                                            &basePitch,
                                            &width,
                                            &height);
        }

        const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
        const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
        const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
        const BOOL_32 msaa         = numSamples > 1;

        // Pre sanity check on non swizzle mode parameters
        ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
        localIn.flags        = pIn->flags;
        localIn.resourceType = pIn->resourceType;
        localIn.format       = pIn->format;
        localIn.bpp          = bpp;
        localIn.width        = width;
        localIn.height       = height;
        localIn.numSlices    = numSlices;
        localIn.numMipLevels = numMipLevels;
        localIn.numSamples   = numSamples;
        localIn.numFrags     = numSamples;

        if (ValidateNonSwModeParams(&localIn))
        {
            // Forbid swizzle mode(s) by client setting
            ADDR2_SWMODE_SET allowedSwModeSet = {};
            allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx11LinearSwModeMask;
            allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx11Blk256BSwModeMask;
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThin4KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx11Blk4KBSwModeMask);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThick4KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick4KBSwModeMask : 0);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThin64KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.macroThick64KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick64KBSwModeMask : 0);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.gfx11.thin256KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask);
            allowedSwModeSet.value |=
                pIn->forbiddenBlock.gfx11.thick256KB ? 0 :
                ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick256KBSwModeMask : 0);

            if (pIn->preferredSwSet.value != 0)
            {
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx11ZSwModeMask;
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx11StandardSwModeMask;
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx11DisplaySwModeMask;
                allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx11RenderSwModeMask;
            }

            if (pIn->noXor)
            {
                allowedSwModeSet.value &= ~Gfx11XorSwModeMask;
            }

            if (pIn->maxAlign > 0)
            {
                if (pIn->maxAlign < Size256K)
                {
                    allowedSwModeSet.value &= ~Gfx11Blk256KBSwModeMask;
                }

                if (pIn->maxAlign < Size64K)
                {
                    allowedSwModeSet.value &= ~Gfx11Blk64KBSwModeMask;
                }

                if (pIn->maxAlign < Size4K)
                {
                    allowedSwModeSet.value &= ~Gfx11Blk4KBSwModeMask;
                }

                if (pIn->maxAlign < Size256)
                {
                    allowedSwModeSet.value &= ~Gfx11Blk256BSwModeMask;
                }
            }

            // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
            switch (pIn->resourceType)
            {
                case ADDR_RSRC_TEX_1D:
                    allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
                    break;

                case ADDR_RSRC_TEX_2D:
                    allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
                    break;

                case ADDR_RSRC_TEX_3D:
                    allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;

                    if (pIn->flags.view3dAs2dArray)
                    {
                        allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask;
                    }
                    break;

                default:
                    ADDR_ASSERT_ALWAYS();
                    allowedSwModeSet.value = 0;
                    break;
            }

            if (ElemLib::IsBlockCompressed(pIn->format)  ||
                ElemLib::IsMacroPixelPacked(pIn->format) ||
                (bpp > 64)                               ||
                (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
            {
                allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
            }

            if (pIn->format == ADDR_FMT_32_32_32)
            {
                allowedSwModeSet.value &= Gfx11LinearSwModeMask;
            }

            if (msaa)
            {
                allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
            }

            if (pIn->flags.depth || pIn->flags.stencil)
            {
                allowedSwModeSet.value &= Gfx11ZSwModeMask;
            }

            if (pIn->flags.display)
            {
                allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
            }

            if (allowedSwModeSet.value != 0)
            {
#if DEBUG
                // Post sanity check, at least AddrLib should accept the output generated by its own
                UINT_32 validateSwModeSet = allowedSwModeSet.value;

                for (UINT_32 i = 0; validateSwModeSet != 0; i++)
                {
                    if (validateSwModeSet & 1)
                    {
                        localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
                        ADDR_ASSERT(ValidateSwModeParams(&localIn));
                    }

                    validateSwModeSet >>= 1;
                }
#endif

                pOut->resourceType   = pIn->resourceType;
                pOut->validSwModeSet = allowedSwModeSet;
                pOut->canXor         = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;

                GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &(pOut->validBlockSet));
                GetAllowedSwSet(allowedSwModeSet, &(pOut->validSwTypeSet));

                pOut->clientPreferredSwSet = pIn->preferredSwSet;

                if (pOut->clientPreferredSwSet.value == 0)
                {
                    pOut->clientPreferredSwSet.value = AddrSwSetAll;
                }

                // Apply optional restrictions
                if (pIn->flags.needEquation)
                {
                    UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
                                                                        ADDR_MAX_LEGACY_EQUATION_COMP;
                    FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
                }

                if (allowedSwModeSet.value == Gfx11LinearSwModeMask)
                {
                    pOut->swizzleMode = ADDR_SW_LINEAR;
                }
                else
                {
                    const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);

                    if ((height > 1) && (computeMinSize == FALSE))
                    {
                        // Always ignore linear swizzle mode if:
                        // 1. This is a (2D/3D) resource with height > 1
                        // 2. Client doesn't require computing minimize size
                        allowedSwModeSet.swLinear = 0;
                    }

                    ADDR2_BLOCK_SET allowedBlockSet = {};
                    GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);

                    // Determine block size if there are 2 or more block type candidates
                    if (IsPow2(allowedBlockSet.value) == FALSE)
                    {
                        AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};

                        swMode[AddrBlockLinear] = ADDR_SW_LINEAR;

                        if (pOut->resourceType == ADDR_RSRC_TEX_3D)
                        {
                            swMode[AddrBlockThick4KB]   = ADDR_SW_4KB_S_X;
                            swMode[AddrBlockThin64KB]   = ADDR_SW_64KB_R_X;
                            swMode[AddrBlockThick64KB]  = ADDR_SW_64KB_S_X;
                            swMode[AddrBlockThin256KB]  = ADDR_SW_256KB_R_X;
                            swMode[AddrBlockThick256KB] = ADDR_SW_256KB_S_X;
                        }
                        else
                        {
                            swMode[AddrBlockMicro]     = ADDR_SW_256B_D;
                            swMode[AddrBlockThin4KB]   = ADDR_SW_4KB_D_X;
                            swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_D_X;
                            swMode[AddrBlockThin256KB] = ADDR_SW_256KB_D_X;
                        }

                        UINT_64 padSize[AddrBlockMaxTiledType] = {};

                        const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
                        const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
                        const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
                        UINT_32       minSizeBlk         = AddrBlockMicro;
                        UINT_64       minSize            = 0;

                        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};

                        for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
                        {
                            if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<rocr::AddrBlockType>(i)))
                            {
                                localIn.swizzleMode = swMode[i];

                                if (localIn.swizzleMode == ADDR_SW_LINEAR)
                                {
                                    returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
                                }
                                else
                                {
                                    returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
                                }

                                if (returnCode == ADDR_OK)
                                {
                                    padSize[i] = localOut.surfSize;

                                    if ((minSize == 0) ||
                                        Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
                                    {
                                        minSize    = padSize[i];
                                        minSizeBlk = i;
                                    }
                                }
                                else
                                {
                                    ADDR_ASSERT_ALWAYS();
                                    break;
                                }
                            }
                        }

                        if (pIn->memoryBudget > 1.0)
                        {
                            // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
                            // smaller-block type again in coming loop
                            switch (minSizeBlk)
                            {
                                case AddrBlockThick256KB:
                                    allowedBlockSet.gfx11.thin256KB = 0;
                                case AddrBlockThin256KB:
                                    allowedBlockSet.macroThick64KB = 0;
                                case AddrBlockThick64KB:
                                    allowedBlockSet.macroThin64KB = 0;
                                case AddrBlockThin64KB:
                                    allowedBlockSet.macroThick4KB = 0;
                                case AddrBlockThick4KB:
                                    allowedBlockSet.macroThin4KB = 0;
                                case AddrBlockThin4KB:
                                    allowedBlockSet.micro  = 0;
                                case AddrBlockMicro:
                                    allowedBlockSet.linear = 0;
                                case AddrBlockLinear:
                                    break;

                                default:
                                    ADDR_ASSERT_ALWAYS();
                                    break;
                            }

                            for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
                            {
                                if ((i != minSizeBlk) &&
                                    Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<rocr::AddrBlockType>(i)))
                                {
                                    if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
                                    {
                                        // Clear the block type if the memory waste is unacceptable
                                        allowedBlockSet.value &= ~(1u << (i - 1));
                                    }
                                }
                            }

                            // Remove linear block type if 2 or more block types are allowed
                            if (IsPow2(allowedBlockSet.value) == FALSE)
                            {
                                allowedBlockSet.linear = 0;
                            }

                            // Select the biggest allowed block type
                            minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;

                            if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
                            {
                                minSizeBlk = AddrBlockLinear;
                            }
                        }

                        switch (minSizeBlk)
                        {
                            case AddrBlockLinear:
                                allowedSwModeSet.value &= Gfx11LinearSwModeMask;
                                break;

                            case AddrBlockMicro:
                                ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx11Blk256BSwModeMask;
                                break;

                            case AddrBlockThin4KB:
                                ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx11Blk4KBSwModeMask;
                                break;

                            case AddrBlockThick4KB:
                                ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx11Rsrc3dThick4KBSwModeMask;
                                break;

                            case AddrBlockThin64KB:
                                allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
                                                          Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask;
                                break;

                            case AddrBlockThick64KB:
                                ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx11Rsrc3dThick64KBSwModeMask;
                                break;

                            case AddrBlockThin256KB:
                                allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
                                                          Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask;
                                break;

                            case AddrBlockThick256KB:
                                ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
                                allowedSwModeSet.value &= Gfx11Rsrc3dThick256KBSwModeMask;
                                break;

                            default:
                                ADDR_ASSERT_ALWAYS();
                                allowedSwModeSet.value = 0;
                                break;
                        }
                    }

                    // Block type should be determined.
                    GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);
                    ADDR_ASSERT(IsPow2(allowedBlockSet.value));

                    ADDR2_SWTYPE_SET allowedSwSet = {};
                    GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);

                    // Determine swizzle type if there are 2 or more swizzle type candidates
                    if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
                    {
                        if (ElemLib::IsBlockCompressed(pIn->format))
                        {
                            if (allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
                            }
                            else if (allowedSwSet.sw_S)
                            {
                                allowedSwModeSet.value &= Gfx11StandardSwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT(allowedSwSet.sw_R);
                                allowedSwModeSet.value &= Gfx11RenderSwModeMask;
                            }
                        }
                        else if (ElemLib::IsMacroPixelPacked(pIn->format))
                        {
                            if (allowedSwSet.sw_S)
                            {
                                allowedSwModeSet.value &= Gfx11StandardSwModeMask;
                            }
                            else if (allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT(allowedSwSet.sw_R);
                                allowedSwModeSet.value &= Gfx11RenderSwModeMask;
                            }
                        }
                        else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
                        {
                            if (pIn->flags.color && allowedSwSet.sw_R)
                            {
                                allowedSwModeSet.value &= Gfx11RenderSwModeMask;
                            }
                            else if (allowedSwSet.sw_S)
                            {
                                allowedSwModeSet.value &= Gfx11StandardSwModeMask;
                            }
                            else if (allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT(allowedSwSet.sw_Z);
                                allowedSwModeSet.value &= Gfx11ZSwModeMask;
                            }
                        }
                        else
                        {
                            if (allowedSwSet.sw_R)
                            {
                                allowedSwModeSet.value &= Gfx11RenderSwModeMask;
                            }
                            else if (allowedSwSet.sw_D)
                            {
                                allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
                            }
                            else if (allowedSwSet.sw_Z)
                            {
                                allowedSwModeSet.value &= Gfx11ZSwModeMask;
                            }
                            else
                            {
                                ADDR_ASSERT_ALWAYS();
                            }
                        }

                        // Swizzle type should be determined.
                        GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);
                        ADDR_ASSERT(IsPow2(allowedSwSet.value));
                    }

                    // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
                    // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
                    // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
                    pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
                }
            }
            else
            {
                // Invalid combination...
                ADDR_ASSERT_ALWAYS();
                returnCode = ADDR_INVALIDPARAMS;
            }
        }
        else
        {
            // Invalid combination...
            ADDR_ASSERT_ALWAYS();
            returnCode = ADDR_INVALIDPARAMS;
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlGetPossibleSwizzleModes
*
*   @brief
*       Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlGetPossibleSwizzleModes(
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->flags.fmask)
    {
        // There is no FMASK for GFX11 ASICs.
        ADDR_ASSERT_ALWAYS();

        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        UINT_32 bpp    = pIn->bpp;
        UINT_32 width  = Max(pIn->width, 1u);
        UINT_32 height = Max(pIn->height, 1u);

        // Set format to INVALID will skip this conversion
        if (pIn->format != ADDR_FMT_INVALID)
        {
            ElemMode elemMode = ADDR_UNCOMPRESSED;
            UINT_32 expandX, expandY;

            // Get compression/expansion factors and element mode which indicates compression/expansion
            bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
                &elemMode,
                &expandX,
                &expandY);

            UINT_32 basePitch = 0;
            GetElemLib()->AdjustSurfaceInfo(elemMode,
                expandX,
                expandY,
                &bpp,
                &basePitch,
                &width,
                &height);
        }

        const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
        const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
        const UINT_32 numSamples   = Max(pIn->numSamples, 1u);
        const BOOL_32 msaa         = numSamples > 1;

        // Pre sanity check on non swizzle mode parameters
        ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
        localIn.flags = pIn->flags;
        localIn.resourceType = pIn->resourceType;
        localIn.format = pIn->format;
        localIn.bpp = bpp;
        localIn.width = width;
        localIn.height = height;
        localIn.numSlices = numSlices;
        localIn.numMipLevels = numMipLevels;
        localIn.numSamples = numSamples;
        localIn.numFrags = numSamples;

        if (ValidateNonSwModeParams(&localIn))
        {
            // Allow appropriate swizzle modes by default
            ADDR2_SWMODE_SET allowedSwModeSet = {};
            allowedSwModeSet.value |= Gfx11LinearSwModeMask | Gfx11Blk256BSwModeMask;
            if (pIn->resourceType == ADDR_RSRC_TEX_3D)
            {
                allowedSwModeSet.value |= Gfx11Rsrc3dThick4KBSwModeMask  |
                                          Gfx11Rsrc3dThin64KBSwModeMask  |
                                          Gfx11Rsrc3dThick64KBSwModeMask |
                                          Gfx11Rsrc3dThin256KBSwModeMask |
                                          Gfx11Rsrc3dThick256KBSwModeMask;
            }
            else
            {
                allowedSwModeSet.value |= Gfx11Blk4KBSwModeMask | Gfx11Blk64KBSwModeMask | Gfx11Blk256KBSwModeMask;
            }

            // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
            switch (pIn->resourceType)
            {
            case ADDR_RSRC_TEX_1D:
                allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
                break;

            case ADDR_RSRC_TEX_2D:
                allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
                break;

            case ADDR_RSRC_TEX_3D:
                allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;

                if (pIn->flags.view3dAs2dArray)
                {
                    allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask;
                }
                break;

            default:
                ADDR_ASSERT_ALWAYS();
                allowedSwModeSet.value = 0;
                break;
            }

            // TODO: figure out if following restrictions are correct on GFX11...
            if (ElemLib::IsBlockCompressed(pIn->format) ||
                ElemLib::IsMacroPixelPacked(pIn->format) ||
                (bpp > 64) ||
                (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
            {
                allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
            }

            if (pIn->format == ADDR_FMT_32_32_32)
            {
                allowedSwModeSet.value &= Gfx11LinearSwModeMask;
            }

            if (msaa)
            {
                allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
            }

            if (pIn->flags.depth || pIn->flags.stencil)
            {
                allowedSwModeSet.value &= Gfx11ZSwModeMask;
            }

            if (pIn->flags.display)
            {
                allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
            }

            if (allowedSwModeSet.value != 0)
            {
#if DEBUG
                // Post sanity check, at least AddrLib should accept the output generated by its own
                UINT_32 validateSwModeSet = allowedSwModeSet.value;

                for (UINT_32 i = 0; validateSwModeSet != 0; i++)
                {
                    if (validateSwModeSet & 1)
                    {
                        localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
                        ADDR_ASSERT(ValidateSwModeParams(&localIn));
                    }

                    validateSwModeSet >>= 1;
                }
#endif

                pOut->resourceType = pIn->resourceType;
                pOut->clientPreferredSwSet = pIn->preferredSwSet;

                if (pOut->clientPreferredSwSet.value == 0)
                {
                    pOut->clientPreferredSwSet.value = AddrSwSetAll;
                }

                if (pIn->flags.needEquation)
                {
                    UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
                                                                        ADDR_MAX_LEGACY_EQUATION_COMP;
                    FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
                }

                pOut->validSwModeSet = allowedSwModeSet;
                pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
            }
            else
            {
                // Invalid combination...
                ADDR_ASSERT_ALWAYS();
                returnCode = ADDR_INVALIDPARAMS;
            }
        }
        else
        {
            // Invalid combination...
            ADDR_ASSERT_ALWAYS();
            returnCode = ADDR_INVALIDPARAMS;
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlGetAllowedBlockSet
*
*   @brief
*       Returns the set of allowed block sizes given the allowed swizzle modes and resource type
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedBlockSet(
    ADDR2_SWMODE_SET allowedSwModeSet,  ///< [in] allowed swizzle modes
    AddrResourceType rsrcType,          ///< [in] resource type
    ADDR2_BLOCK_SET* pAllowedBlockSet   ///< [out] allowed block sizes
    ) const
{
    ADDR2_BLOCK_SET allowedBlockSet = {};

    allowedBlockSet.micro  = (allowedSwModeSet.value & Gfx11Blk256BSwModeMask) ? TRUE : FALSE;
    allowedBlockSet.linear = (allowedSwModeSet.value & Gfx11LinearSwModeMask)  ? TRUE : FALSE;

    if (rsrcType == ADDR_RSRC_TEX_3D)
    {
        allowedBlockSet.macroThick4KB    = (allowedSwModeSet.value & Gfx11Rsrc3dThick4KBSwModeMask)   ? TRUE : FALSE;
        allowedBlockSet.macroThin64KB    = (allowedSwModeSet.value & Gfx11Rsrc3dThin64KBSwModeMask)   ? TRUE : FALSE;
        allowedBlockSet.macroThick64KB   = (allowedSwModeSet.value & Gfx11Rsrc3dThick64KBSwModeMask)  ? TRUE : FALSE;
        allowedBlockSet.gfx11.thin256KB  = (allowedSwModeSet.value & Gfx11Rsrc3dThin256KBSwModeMask)  ? TRUE : FALSE;
        allowedBlockSet.gfx11.thick256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick256KBSwModeMask) ? TRUE : FALSE;
    }
    else
    {
        allowedBlockSet.macroThin4KB    = (allowedSwModeSet.value & Gfx11Blk4KBSwModeMask)   ? TRUE : FALSE;
        allowedBlockSet.macroThin64KB   = (allowedSwModeSet.value & Gfx11Blk64KBSwModeMask)  ? TRUE : FALSE;
        allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Blk256KBSwModeMask) ? TRUE : FALSE;
    }

    *pAllowedBlockSet = allowedBlockSet;
    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlGetAllowedSwSet
*
*   @brief
*       Returns the set of allowed swizzle types given the allowed swizzle modes
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedSwSet(
    ADDR2_SWMODE_SET  allowedSwModeSet, ///< [in] allowed swizzle modes
    ADDR2_SWTYPE_SET* pAllowedSwSet     ///< [out] allowed swizzle types
    ) const
{
    ADDR2_SWTYPE_SET allowedSwSet = {};

    allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx11ZSwModeMask)        ? TRUE : FALSE;
    allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx11StandardSwModeMask) ? TRUE : FALSE;
    allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx11DisplaySwModeMask)  ? TRUE : FALSE;
    allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx11RenderSwModeMask)   ? TRUE : FALSE;

    *pAllowedSwSet = allowedSwSet;
    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ComputeStereoInfo
*
*   @brief
*       Compute height alignment and right eye pipeBankXor for stereo surface
*
*   @return
*       Error code
*
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::ComputeStereoInfo(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
    UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
    UINT_32*                                pRightXor   ///< Right eye xor
    ) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    *pRightXor = 0;

    if (IsNonPrtXor(pIn->swizzleMode))
    {
        const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
        const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
        const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
        const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];

        if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
        {
            UINT_32 yMax     = 0;
            UINT_32 yPosMask = 0;

            // First get "max y bit"
            for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
            {
                ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);

                if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
                    (m_equationTable[eqIndex].addr[i].index > yMax))
                {
                    yMax = m_equationTable[eqIndex].addr[i].index;
                }

                if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
                    (m_equationTable[eqIndex].xor1[i].channel == 1) &&
                    (m_equationTable[eqIndex].xor1[i].index > yMax))
                {
                    yMax = m_equationTable[eqIndex].xor1[i].index;
                }

                if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
                    (m_equationTable[eqIndex].xor2[i].channel == 1) &&
                    (m_equationTable[eqIndex].xor2[i].index > yMax))
                {
                    yMax = m_equationTable[eqIndex].xor2[i].index;
                }
            }

            // Then loop again for populating a position mask of "max Y bit"
            for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
            {
                if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
                    (m_equationTable[eqIndex].addr[i].index == yMax))
                {
                    yPosMask |= 1u << i;
                }
                else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
                         (m_equationTable[eqIndex].xor1[i].channel == 1) &&
                         (m_equationTable[eqIndex].xor1[i].index == yMax))
                {
                    yPosMask |= 1u << i;
                }
                else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
                         (m_equationTable[eqIndex].xor2[i].channel == 1) &&
                         (m_equationTable[eqIndex].xor2[i].index == yMax))
                {
                    yPosMask |= 1u << i;
                }
            }

            const UINT_32 additionalAlign = 1 << yMax;

            if (additionalAlign >= *pAlignY)
            {
                *pAlignY = additionalAlign;

                const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);

                if ((alignedHeight >> yMax) & 1)
                {
                    *pRightXor = yPosMask >> m_pipeInterleaveLog2;
                }
            }
        }
        else
        {
            ret = ADDR_INVALIDPARAMS;
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeSurfaceInfoTiled
*
*   @brief
*       Internal function to calculate alignment for tiled surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE ret;

    // Mip chain dimesion and epitch has no meaning in GFX11, set to default value
    pOut->mipChainPitch    = 0;
    pOut->mipChainHeight   = 0;
    pOut->mipChainSlice    = 0;
    pOut->epitchIsHeight   = FALSE;

    // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
    pOut->mipChainInTail   = FALSE;
    pOut->firstMipIdInTail = pIn->numMipLevels;

    if (IsBlock256b(pIn->swizzleMode))
    {
        ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
    }
    else
    {
        ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ComputeSurfaceInfoMicroTiled
*
*   @brief
*       Internal function to calculate alignment for micro tiled surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMicroTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
                                                         &pOut->blockHeight,
                                                         &pOut->blockSlices,
                                                         pIn->bpp,
                                                         pIn->numSamples,
                                                         pIn->resourceType,
                                                         pIn->swizzleMode);

    if (ret == ADDR_OK)
    {
        const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);

        pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
        pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
        pOut->numSlices = pIn->numSlices;
        pOut->baseAlign = blockSize;

        if (pIn->numMipLevels > 1)
        {
            const UINT_32 mip0Width    = pIn->width;
            const UINT_32 mip0Height   = pIn->height;
            UINT_64       mipSliceSize = 0;

            for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
            {
                UINT_32 mipWidth, mipHeight;

                GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);

                const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
                const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[i].pitch            = mipActualWidth;
                    pOut->pMipInfo[i].height           = mipActualHeight;
                    pOut->pMipInfo[i].depth            = 1;
                    pOut->pMipInfo[i].offset           = mipSliceSize;
                    pOut->pMipInfo[i].mipTailOffset    = 0;
                    pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
                }

                mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
            }

            pOut->sliceSize = mipSliceSize;
            pOut->surfSize  = mipSliceSize * pOut->numSlices;
        }
        else
        {
            pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
            pOut->surfSize  = pOut->sliceSize * pOut->numSlices;

            if (pOut->pMipInfo != NULL)
            {
                pOut->pMipInfo[0].pitch            = pOut->pitch;
                pOut->pMipInfo[0].height           = pOut->height;
                pOut->pMipInfo[0].depth            = 1;
                pOut->pMipInfo[0].offset           = 0;
                pOut->pMipInfo[0].mipTailOffset    = 0;
                pOut->pMipInfo[0].macroBlockOffset = 0;
            }
        }

    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ComputeSurfaceInfoMacroTiled
*
*   @brief
*       Internal function to calculate alignment for macro tiled surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMacroTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
                                                                &pOut->blockHeight,
                                                                &pOut->blockSlices,
                                                                pIn->bpp,
                                                                pIn->numSamples,
                                                                pIn->resourceType,
                                                                pIn->swizzleMode);

    if (returnCode == ADDR_OK)
    {
        UINT_32 heightAlign = pOut->blockHeight;

        if (pIn->flags.qbStereo)
        {
            UINT_32 rightXor = 0;

            returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);

            if (returnCode == ADDR_OK)
            {
                pOut->pStereoInfo->rightSwizzle = rightXor;
            }
        }

        if (returnCode == ADDR_OK)
        {
            const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
            const UINT_32 blockSize     = 1 << blockSizeLog2;

            pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
            pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
            pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
            pOut->baseAlign = blockSize;

            if (pIn->numMipLevels > 1)
            {
                const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
                                                                pIn->swizzleMode,
                                                                pOut->blockWidth,
                                                                pOut->blockHeight,
                                                                pOut->blockSlices);
                const UINT_32 mip0Width         = pIn->width;
                const UINT_32 mip0Height        = pIn->height;
                const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
                const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
                const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
                const UINT_32 index             = Log2(pIn->bpp >> 3);
                UINT_32       firstMipInTail    = pIn->numMipLevels;
                UINT_64       mipChainSliceSize = 0;
                UINT_64       mipSize[MaxMipLevels];
                UINT_64       mipSliceSize[MaxMipLevels];

                // For htile, we need to make z16 and stencil enter the mip tail at the same time as z32 would
                Dim3d fixedTailMaxDim = tailMaxDim;
                if (IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
                {
                    fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
                    fixedTailMaxDim.h /= Block256_2d[index].w / Block256_2d[2].w;
                }

                for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
                {
                    UINT_32 mipWidth, mipHeight, mipDepth;

                    GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);

                    if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
                    {
                        firstMipInTail     = i;
                        mipChainSliceSize += blockSize / pOut->blockSlices;
                        break;
                    }
                    else
                    {
                        const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
                        const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
                        const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
                        const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);

                        mipSize[i]         = sliceSize * depth;
                        mipSliceSize[i]    = sliceSize * pOut->blockSlices;
                        mipChainSliceSize += sliceSize;

                        if (pOut->pMipInfo != NULL)
                        {
                            pOut->pMipInfo[i].pitch  = pitch;
                            pOut->pMipInfo[i].height = height;
                            pOut->pMipInfo[i].depth  = depth;
                        }
                    }
                }

                pOut->sliceSize        = mipChainSliceSize;
                pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
                pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
                pOut->firstMipIdInTail = firstMipInTail;

                if (pOut->pMipInfo != NULL)
                {
                    UINT_64 offset         = 0;
                    UINT_64 macroBlkOffset = 0;
                    UINT_32 tailMaxDepth   = 0;

                    if (firstMipInTail != pIn->numMipLevels)
                    {
                        UINT_32 mipWidth, mipHeight;

                        GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
                                   &mipWidth, &mipHeight, &tailMaxDepth);

                        offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
                        macroBlkOffset = blockSize;
                    }

                    for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
                    {
                        pOut->pMipInfo[i].offset           = offset;
                        pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
                        pOut->pMipInfo[i].mipTailOffset    = 0;

                        offset         += mipSize[i];
                        macroBlkOffset += mipSliceSize[i];
                    }

                    UINT_32 pitch  = tailMaxDim.w;
                    UINT_32 height = tailMaxDim.h;
                    UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);

                    tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);

                    for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
                    {
                        const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
                        const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);

                        pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
                        pOut->pMipInfo[i].mipTailOffset    = mipOffset;
                        pOut->pMipInfo[i].macroBlockOffset = 0;

                        pOut->pMipInfo[i].pitch  = pitch;
                        pOut->pMipInfo[i].height = height;
                        pOut->pMipInfo[i].depth  = depth;

                        UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
                                       ((mipOffset >> 10) & 2)  |
                                       ((mipOffset >> 11) & 4)  |
                                       ((mipOffset >> 12) & 8)  |
                                       ((mipOffset >> 13) & 16) |
                                       ((mipOffset >> 14) & 32);
                        UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
                                       ((mipOffset >> 9)  & 2)  |
                                       ((mipOffset >> 10) & 4)  |
                                       ((mipOffset >> 11) & 8)  |
                                       ((mipOffset >> 12) & 16) |
                                       ((mipOffset >> 13) & 32);

                        if (blockSizeLog2 & 1)
                        {
                            const UINT_32 temp = mipX;
                            mipX = mipY;
                            mipY = temp;

                            if (index & 1)
                            {
                                mipY = (mipY << 1) | (mipX & 1);
                                mipX = mipX >> 1;
                            }
                        }

                        if (isThin)
                        {
                            pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
                            pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
                            pOut->pMipInfo[i].mipTailCoordZ = 0;

                            pitch  = Max(pitch  >> 1, Block256_2d[index].w);
                            height = Max(height >> 1, Block256_2d[index].h);
                            depth  = 1;
                        }
                        else
                        {
                            pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
                            pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
                            pOut->pMipInfo[i].mipTailCoordZ = 0;

                            pitch  = Max(pitch  >> 1, Block256_3d[index].w);
                            height = Max(height >> 1, Block256_3d[index].h);
                            depth  = PowTwoAlign(Max(depth  >> 1, 1u), Block256_3d[index].d);
                        }
                    }
                }
            }
            else
            {
                pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numSamples;
                pOut->surfSize  = pOut->sliceSize * pOut->numSlices;

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[0].pitch            = pOut->pitch;
                    pOut->pMipInfo[0].height           = pOut->height;
                    pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
                    pOut->pMipInfo[0].offset           = 0;
                    pOut->pMipInfo[0].mipTailOffset    = 0;
                    pOut->pMipInfo[0].macroBlockOffset = 0;
                    pOut->pMipInfo[0].mipTailCoordX    = 0;
                    pOut->pMipInfo[0].mipTailCoordY    = 0;
                    pOut->pMipInfo[0].mipTailCoordZ    = 0;
                }
            }
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled
*
*   @brief
*       Internal function to calculate address from coord for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE ret;

    if (IsBlock256b(pIn->swizzleMode))
    {
        ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
    }
    else
    {
        ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ComputeOffsetFromEquation
*
*   @brief
*       Compute offset from equation
*
*   @return
*       Offset
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
    const ADDR_EQUATION* pEq,   ///< Equation
    UINT_32              x,     ///< x coord in bytes
    UINT_32              y,     ///< y coord in pixel
    UINT_32              z      ///< z coord in slice
    ) const
{
    UINT_32 offset = 0;

    for (UINT_32 i = 0; i < pEq->numBits; i++)
    {
        UINT_32 v = 0;

        for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
        {
            if (pEq->comps[c][i].valid)
            {
                if (pEq->comps[c][i].channel == 0)
                {
                    v ^= (x >> pEq->comps[c][i].index) & 1;
                }
                else if (pEq->comps[c][i].channel == 1)
                {
                    v ^= (y >> pEq->comps[c][i].index) & 1;
                }
                else
                {
                    ADDR_ASSERT(pEq->comps[c][i].channel == 2);
                    v ^= (z >> pEq->comps[c][i].index) & 1;
                }
            }
        }

        offset |= (v << i);
    }

    return offset;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ComputeOffsetFromSwizzlePattern
*
*   @brief
*       Compute offset from swizzle pattern
*
*   @return
*       Offset
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::ComputeOffsetFromSwizzlePattern(
    const UINT_64* pPattern,    ///< Swizzle pattern
    UINT_32        numBits,     ///< Number of bits in pattern
    UINT_32        x,           ///< x coord in pixel
    UINT_32        y,           ///< y coord in pixel
    UINT_32        z,           ///< z coord in slice
    UINT_32        s            ///< sample id
    ) const
{
    UINT_32                 offset          = 0;
    const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);

    for (UINT_32 i = 0; i < numBits; i++)
    {
        UINT_32 v = 0;

        if (pSwizzlePattern[i].x != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].x;
            UINT_32 xBits = x;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= xBits & 1;
                }

                xBits >>= 1;
                mask  >>= 1;
            }
        }

        if (pSwizzlePattern[i].y != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].y;
            UINT_32 yBits = y;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= yBits & 1;
                }

                yBits >>= 1;
                mask  >>= 1;
            }
        }

        if (pSwizzlePattern[i].z != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].z;
            UINT_32 zBits = z;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= zBits & 1;
                }

                zBits >>= 1;
                mask  >>= 1;
            }
        }

        if (pSwizzlePattern[i].s != 0)
        {
            UINT_16 mask  = pSwizzlePattern[i].s;
            UINT_32 sBits = s;

            while (mask != 0)
            {
                if (mask & 1)
                {
                    v ^= sBits & 1;
                }

                sBits >>= 1;
                mask  >>= 1;
            }
        }

        offset |= (v << i);
    }

    return offset;
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetSwizzlePatternInfo
*
*   @brief
*       Get swizzle pattern
*
*   @return
*       Swizzle pattern information
************************************************************************************************************************
*/
const ADDR_SW_PATINFO* Gfx11Lib::GetSwizzlePatternInfo(
    AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
    AddrResourceType resourceType,      ///< Resource type
    UINT_32          elemLog2,          ///< Element size in bytes log2
    UINT_32          numFrag            ///< Number of fragment
    ) const
{
    const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
    const ADDR_SW_PATINFO* patInfo     = NULL;
    const UINT_32          swizzleMask = 1 << swizzleMode;
    const BOOL_32          isBlock256k = IsBlock256kb(swizzleMode);
    const BOOL_32          isBlock64K  = IsBlock64kb(swizzleMode);

    if (IsLinear(swizzleMode) == FALSE)
    {
        if (resourceType == ADDR_RSRC_TEX_3D)
        {
            ADDR_ASSERT(numFrag == 1);

            if ((swizzleMask & Gfx11Rsrc3dSwModeMask) != 0)
            {
                if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
                {
                    if (isBlock256k)
                    {
                        ADDR_ASSERT((swizzleMode == ADDR_SW_256KB_Z_X) || (swizzleMode == ADDR_SW_256KB_R_X));
                        patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
                    }
                    else if (isBlock64K)
                    {
                        ADDR_ASSERT((swizzleMode == ADDR_SW_64KB_Z_X) || (swizzleMode == ADDR_SW_64KB_R_X));
                        patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                }
                else if (IsDisplaySwizzle(resourceType, swizzleMode))
                {
                    if (isBlock256k)
                    {
                        ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
                        // patInfo = GFX11_SW_256K_D3_X_PATINFO;
                    }
                    else if (isBlock64K)
                    {
                        ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
                        patInfo = GFX11_SW_64K_D3_X_PATINFO;
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                }
                else
                {
                    ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));

                    if (isBlock256k)
                    {
                        ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_S_X);
                        patInfo = GFX11_SW_256K_S3_X_PATINFO;
                    }
                    else if (isBlock64K)
                    {
                        if (swizzleMode == ADDR_SW_64KB_S)
                        {
                            patInfo = GFX11_SW_64K_S3_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_64KB_S_X)
                        {
                            patInfo = GFX11_SW_64K_S3_X_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_64KB_S_T)
                        {
                            patInfo = GFX11_SW_64K_S3_T_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT_ALWAYS();
                        }
                    }
                    else if (IsBlock4kb(swizzleMode))
                    {
                        if (swizzleMode == ADDR_SW_4KB_S)
                        {
                            patInfo = GFX11_SW_4K_S3_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_4KB_S_X)
                        {
                            patInfo = GFX11_SW_4K_S3_X_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT_ALWAYS();
                        }
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                }
            }
        }
        else
        {
            if ((swizzleMask & Gfx11Rsrc2dSwModeMask) != 0)
            {
                if (IsBlock256b(swizzleMode))
                {
                    ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
                    patInfo = GFX11_SW_256_D_PATINFO;
                }
                else if (IsBlock4kb(swizzleMode))
                {
                    if (swizzleMode == ADDR_SW_4KB_D)
                    {
                        patInfo = GFX11_SW_4K_D_PATINFO;
                    }
                    else if (swizzleMode == ADDR_SW_4KB_D_X)
                    {
                        patInfo = GFX11_SW_4K_D_X_PATINFO;
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                }
                else if (isBlock64K)
                {
                    if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
                    {
                        if (numFrag == 1)
                        {
                            patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
                        }
                        else if (numFrag == 2)
                        {
                            patInfo = GFX11_SW_64K_ZR_X_2xaa_PATINFO;
                        }
                        else if (numFrag == 4)
                        {
                            patInfo = GFX11_SW_64K_ZR_X_4xaa_PATINFO;
                        }
                        else if (numFrag == 8)
                        {
                            patInfo = GFX11_SW_64K_ZR_X_8xaa_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT_ALWAYS();
                        }
                    }
                    else if (IsDisplaySwizzle(resourceType, swizzleMode))
                    {
                        if (swizzleMode == ADDR_SW_64KB_D)
                        {
                            patInfo = GFX11_SW_64K_D_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_64KB_D_X)
                        {
                            patInfo = GFX11_SW_64K_D_X_PATINFO;
                        }
                        else if (swizzleMode == ADDR_SW_64KB_D_T)
                        {
                            patInfo = GFX11_SW_64K_D_T_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT_ALWAYS();
                        }
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                }
                else if (isBlock256k)
                {
                    if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
                    {
                        if (numFrag == 1)
                        {
                            patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
                        }
                        else if (numFrag == 2)
                        {
                            patInfo = GFX11_SW_256K_ZR_X_2xaa_PATINFO;
                        }
                        else if (numFrag == 4)
                        {
                            patInfo = GFX11_SW_256K_ZR_X_4xaa_PATINFO;
                        }
                        else if (numFrag == 8)
                        {
                            patInfo = GFX11_SW_256K_ZR_X_8xaa_PATINFO;
                        }
                        else
                        {
                            ADDR_ASSERT_ALWAYS();
                        }
                    }
                    else if (IsDisplaySwizzle(resourceType, swizzleMode))
                    {
                        ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
                        patInfo = GFX11_SW_256K_D_X_PATINFO;
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                }
                else
                {
                    ADDR_ASSERT_ALWAYS();
                }
            }
        }
    }

    return (patInfo != NULL) ? &patInfo[index] : NULL;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled
*
*   @brief
*       Internal function to calculate address from coord for micro tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
    ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];

    localIn.swizzleMode  = pIn->swizzleMode;
    localIn.flags        = pIn->flags;
    localIn.resourceType = pIn->resourceType;
    localIn.bpp          = pIn->bpp;
    localIn.width        = Max(pIn->unalignedWidth,  1u);
    localIn.height       = Max(pIn->unalignedHeight, 1u);
    localIn.numSlices    = Max(pIn->numSlices,       1u);
    localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
    localIn.numSamples   = Max(pIn->numSamples,      1u);
    localIn.numFrags     = localIn.numSamples;
    localOut.pMipInfo    = mipInfo;

    ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);

    if (ret == ADDR_OK)
    {
        const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
        const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
        const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
        const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];

        if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
        {
            const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
            const UINT_32 yb           = pIn->y / localOut.blockHeight;
            const UINT_32 xb           = pIn->x / localOut.blockWidth;
            const UINT_32 blockIndex   = yb * pb + xb;
            const UINT_32 blockSize    = 256;
            const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
                                                                   pIn->x << elemLog2,
                                                                   pIn->y,
                                                                   0);
            pOut->addr = localOut.sliceSize * pIn->slice +
                         mipInfo[pIn->mipId].macroBlockOffset +
                         (blockIndex * blockSize) +
                         blk256Offset;
        }
        else
        {
            ret = ADDR_INVALIDPARAMS;
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled
*
*   @brief
*       Internal function to calculate address from coord for macro tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
    ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];

    localIn.swizzleMode  = pIn->swizzleMode;
    localIn.flags        = pIn->flags;
    localIn.resourceType = pIn->resourceType;
    localIn.bpp          = pIn->bpp;
    localIn.width        = Max(pIn->unalignedWidth,  1u);
    localIn.height       = Max(pIn->unalignedHeight, 1u);
    localIn.numSlices    = Max(pIn->numSlices,       1u);
    localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
    localIn.numSamples   = Max(pIn->numSamples,      1u);
    localIn.numFrags     = localIn.numSamples;
    localOut.pMipInfo    = mipInfo;

    ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);

    if (ret == ADDR_OK)
    {
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
        const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
        const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
        const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
        const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
        const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
                                    (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;

        if (localIn.numSamples > 1)
        {
            const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
                                                                    pIn->resourceType,
                                                                    elemLog2,
                                                                    localIn.numSamples);

            if (pPatInfo != NULL)
            {
                const UINT_32 pb     = localOut.pitch / localOut.blockWidth;
                const UINT_32 yb     = pIn->y / localOut.blockHeight;
                const UINT_32 xb     = pIn->x / localOut.blockWidth;
                const UINT_64 blkIdx = yb * pb + xb;

                ADDR_BIT_SETTING fullSwizzlePattern[20];
                GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);

                const UINT_32 blkOffset =
                    ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
                                                    blkSizeLog2,
                                                    pIn->x,
                                                    pIn->y,
                                                    pIn->slice,
                                                    pIn->sample);

                pOut->addr = (localOut.sliceSize * pIn->slice) +
                             (blkIdx << blkSizeLog2) +
                             (blkOffset ^ pipeBankXor);
            }
            else
            {
                ret = ADDR_INVALIDPARAMS;
            }
        }
        else
        {
            const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
            const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
            const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];

            if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
            {
                const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
                const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
                const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
                const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
                const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
                const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
                const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
                const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
                const UINT_32 yb        = pIn->y / localOut.blockHeight;
                const UINT_32 xb        = pIn->x / localOut.blockWidth;
                const UINT_64 blkIdx    = yb * pb + xb;
                const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
                                                                    x << elemLog2,
                                                                    y,
                                                                    z);
                pOut->addr = sliceSize * sliceId +
                             mipInfo[pIn->mipId].macroBlockOffset +
                             (blkIdx << blkSizeLog2) +
                             (blkOffset ^ pipeBankXor);
            }
            else
            {
                ret = ADDR_INVALIDPARAMS;
            }
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeMaxBaseAlignments
*
*   @brief
*       Gets maximum alignments
*   @return
*       maximum alignments
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::HwlComputeMaxBaseAlignments() const
{
    return Size256K;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeMaxMetaBaseAlignments
*
*   @brief
*       Gets maximum alignments for metadata
*   @return
*       maximum alignments for metadata
************************************************************************************************************************
*/
UINT_32 Gfx11Lib::HwlComputeMaxMetaBaseAlignments() const
{
    Dim3d metaBlk;

    // Max base alignment for Htile
    const AddrSwizzleMode ValidSwizzleModeForHtile[] =
    {
        ADDR_SW_64KB_Z_X,
        ADDR_SW_256KB_Z_X,
    };

    UINT_32 maxBaseAlignHtile = 0;

    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForHtile) / sizeof(ValidSwizzleModeForHtile[0]); swIdx++)
    {
        for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
        {
            for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
            {
                const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx11DataDepthStencil,
                                                                ADDR_RSRC_TEX_2D,
                                                                ValidSwizzleModeForHtile[swIdx],
                                                                bppLog2,
                                                                numFragLog2,
                                                                TRUE,
                                                                &metaBlk);

                maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
            }
        }
    }

    // Max base alignment for 2D Dcc
    // swizzle mode support DCC...
    const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
    {
        ADDR_SW_64KB_R_X,
        ADDR_SW_256KB_R_X,
    };

    UINT_32 maxBaseAlignDcc2D = 0;

    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
    {
        for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
        {
            for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
            {
                const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx11DataColor,
                                                             ADDR_RSRC_TEX_2D,
                                                             ValidSwizzleModeForDcc2D[swIdx],
                                                             bppLog2,
                                                             numFragLog2,
                                                             TRUE,
                                                             &metaBlk);

                maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
            }
        }
    }

    // Max base alignment for 3D Dcc
    const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
    {
        ADDR_SW_64KB_S_X,
        ADDR_SW_64KB_D_X,
        ADDR_SW_64KB_R_X,
        ADDR_SW_256KB_S_X,
        ADDR_SW_256KB_D_X,
        ADDR_SW_256KB_R_X,
    };

    UINT_32 maxBaseAlignDcc3D = 0;

    for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
    {
        for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
        {
            const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx11DataColor,
                                                         ADDR_RSRC_TEX_3D,
                                                         ValidSwizzleModeForDcc3D[swIdx],
                                                         bppLog2,
                                                         0,
                                                         TRUE,
                                                         &metaBlk);

            maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
        }
    }

    return Max(maxBaseAlignHtile, Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetMetaElementSizeLog2
*
*   @brief
*       Gets meta data element size log2
*   @return
*       Meta data element size log2
************************************************************************************************************************
*/
INT_32 Gfx11Lib::GetMetaElementSizeLog2(
    Gfx11DataType dataType) ///< Data surface type
{
    INT_32 elemSizeLog2 = 0;

    if (dataType == Gfx11DataColor)
    {
        elemSizeLog2 = 0;
    }
    else
    {
        ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
        elemSizeLog2 = 2;
    }

    return elemSizeLog2;
}

/**
************************************************************************************************************************
*   Gfx11Lib::GetMetaCacheSizeLog2
*
*   @brief
*       Gets meta data cache line size log2
*   @return
*       Meta data cache line size log2
************************************************************************************************************************
*/
INT_32 Gfx11Lib::GetMetaCacheSizeLog2(
    Gfx11DataType dataType) ///< Data surface type
{
    INT_32 cacheSizeLog2 = 0;

    if (dataType == Gfx11DataColor)
    {
        cacheSizeLog2 = 6;
    }
    else
    {
        ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
        cacheSizeLog2 = 8;
    }

    return cacheSizeLog2;
}

/**
************************************************************************************************************************
*   Gfx11Lib::HwlComputeSurfaceInfoLinear
*
*   @brief
*       Internal function to calculate alignment for linear surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoLinear(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }
    else
    {
        const UINT_32 elementBytes = pIn->bpp >> 3;
        const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
        const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
        UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
        UINT_32       actualHeight = pIn->height;
        UINT_64       sliceSize    = 0;

        if (pIn->numMipLevels > 1)
        {
            for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
            {
                UINT_32 mipWidth, mipHeight;

                GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);

                const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[i].pitch            = mipActualWidth;
                    pOut->pMipInfo[i].height           = mipHeight;
                    pOut->pMipInfo[i].depth            = mipDepth;
                    pOut->pMipInfo[i].offset           = sliceSize;
                    pOut->pMipInfo[i].mipTailOffset    = 0;
                    pOut->pMipInfo[i].macroBlockOffset = sliceSize;
                }

                sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
            }
        }
        else
        {
            returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);

            if (returnCode == ADDR_OK)
            {
                sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;

                if (pOut->pMipInfo != NULL)
                {
                    pOut->pMipInfo[0].pitch            = pitch;
                    pOut->pMipInfo[0].height           = actualHeight;
                    pOut->pMipInfo[0].depth            = mipDepth;
                    pOut->pMipInfo[0].offset           = 0;
                    pOut->pMipInfo[0].mipTailOffset    = 0;
                    pOut->pMipInfo[0].macroBlockOffset = 0;
                }
            }
        }

        if (returnCode == ADDR_OK)
        {
            pOut->pitch          = pitch;
            pOut->height         = actualHeight;
            pOut->numSlices      = pIn->numSlices;
            pOut->sliceSize      = sliceSize;
            pOut->surfSize       = sliceSize * pOut->numSlices;
            pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
            pOut->blockWidth     = pitchAlign;
            pOut->blockHeight    = 1;
            pOut->blockSlices    = 1;

            // Following members are useless on GFX11
            pOut->mipChainPitch  = 0;
            pOut->mipChainHeight = 0;
            pOut->mipChainSlice  = 0;
            pOut->epitchIsHeight = FALSE;

            // Post calculation validate
            ADDR_ASSERT(pOut->sliceSize > 0);
        }
    }

    return returnCode;
}

} // V2
} // Addr
} // namespace rocr

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx11addrlib.h
* @brief Contains the Gfx11Lib class definition.
************************************************************************************************************************
*/

#ifndef __GFX11_ADDR_LIB_H__
#define __GFX11_ADDR_LIB_H__

#include "addrlib2.h"
#include "coord.h"
#include "gfx11SwizzlePattern.h"

namespace rocr {
namespace Addr
{
namespace V2
{

/**
************************************************************************************************************************
* @brief GFX11 specific settings structure.
************************************************************************************************************************
*/
struct Gfx11ChipSettings
{
    struct
    {
        UINT_32 isGfx1150           :  1;
        UINT_32 isGfx1103           :  1;
        UINT_32 reserved1           : 30;

        // Misc configuration bits
        UINT_32 reserved2           : 32;
    };
};

/**
************************************************************************************************************************
* @brief GFX11 data surface type.
************************************************************************************************************************
*/
enum Gfx11DataType
{
    Gfx11DataColor,
    Gfx11DataDepthStencil,
};

const UINT_32 Gfx11LinearSwModeMask = (1u << ADDR_SW_LINEAR);

const UINT_32 Gfx11Blk256BSwModeMask = (1u << ADDR_SW_256B_D);

const UINT_32 Gfx11Blk4KBSwModeMask = (1u << ADDR_SW_4KB_S)   |
                                      (1u << ADDR_SW_4KB_D)   |
                                      (1u << ADDR_SW_4KB_S_X) |
                                      (1u << ADDR_SW_4KB_D_X);

const UINT_32 Gfx11Blk64KBSwModeMask = (1u << ADDR_SW_64KB_S)   |
                                       (1u << ADDR_SW_64KB_D)   |
                                       (1u << ADDR_SW_64KB_S_T) |
                                       (1u << ADDR_SW_64KB_D_T) |
                                       (1u << ADDR_SW_64KB_Z_X) |
                                       (1u << ADDR_SW_64KB_S_X) |
                                       (1u << ADDR_SW_64KB_D_X) |
                                       (1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx11Blk256KBSwModeMask = (1u << ADDR_SW_256KB_Z_X) |
                                        (1u << ADDR_SW_256KB_S_X) |
                                        (1u << ADDR_SW_256KB_D_X) |
                                        (1u << ADDR_SW_256KB_R_X);

const UINT_32 Gfx11ZSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
                                 (1u << ADDR_SW_256KB_Z_X);

const UINT_32 Gfx11StandardSwModeMask = (1u << ADDR_SW_4KB_S)    |
                                        (1u << ADDR_SW_64KB_S)   |
                                        (1u << ADDR_SW_64KB_S_T) |
                                        (1u << ADDR_SW_4KB_S_X)  |
                                        (1u << ADDR_SW_64KB_S_X) |
                                        (1u << ADDR_SW_256KB_S_X);

const UINT_32 Gfx11DisplaySwModeMask = (1u << ADDR_SW_256B_D)   |
                                       (1u << ADDR_SW_4KB_D)    |
                                       (1u << ADDR_SW_64KB_D)   |
                                       (1u << ADDR_SW_64KB_D_T) |
                                       (1u << ADDR_SW_4KB_D_X)  |
                                       (1u << ADDR_SW_64KB_D_X) |
                                       (1u << ADDR_SW_256KB_D_X);

const UINT_32 Gfx11RenderSwModeMask = (1u << ADDR_SW_64KB_R_X) |
                                      (1u << ADDR_SW_256KB_R_X);

const UINT_32 Gfx11XSwModeMask = (1u << ADDR_SW_4KB_S_X)  |
                                 (1u << ADDR_SW_4KB_D_X)  |
                                 (1u << ADDR_SW_64KB_Z_X) |
                                 (1u << ADDR_SW_64KB_S_X) |
                                 (1u << ADDR_SW_64KB_D_X) |
                                 (1u << ADDR_SW_64KB_R_X) |
                                 Gfx11Blk256KBSwModeMask;

const UINT_32 Gfx11TSwModeMask = (1u << ADDR_SW_64KB_S_T) |
                                 (1u << ADDR_SW_64KB_D_T);

const UINT_32 Gfx11XorSwModeMask = Gfx11XSwModeMask |
                                   Gfx11TSwModeMask;

const UINT_32 Gfx11Rsrc1dSwModeMask = (1u << ADDR_SW_LINEAR)   |
                                      (1u << ADDR_SW_64KB_R_X) |
                                      (1u << ADDR_SW_64KB_Z_X) ;

const UINT_32 Gfx11Rsrc2dSwModeMask = Gfx11LinearSwModeMask  |
                                      Gfx11DisplaySwModeMask |
                                      Gfx11ZSwModeMask       |
                                      Gfx11RenderSwModeMask;

const UINT_32 Gfx11Rsrc3dSwModeMask = Gfx11LinearSwModeMask    |
                                      Gfx11StandardSwModeMask  |
                                      Gfx11ZSwModeMask         |
                                      Gfx11RenderSwModeMask    |
                                      (1u << ADDR_SW_64KB_D_X) |
                                      (1u << ADDR_SW_256KB_D_X);

const UINT_32 Gfx11Rsrc2dPrtSwModeMask =
    (Gfx11Blk4KBSwModeMask | Gfx11Blk64KBSwModeMask) & ~Gfx11XSwModeMask & Gfx11Rsrc2dSwModeMask;

const UINT_32 Gfx11Rsrc3dPrtSwModeMask =
    (Gfx11Blk4KBSwModeMask | Gfx11Blk64KBSwModeMask) & ~Gfx11XSwModeMask & Gfx11Rsrc3dSwModeMask;

const UINT_32 Gfx11Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
                                              (1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx11Rsrc3dThin256KBSwModeMask = (1u << ADDR_SW_256KB_Z_X) |
                                               (1u << ADDR_SW_256KB_R_X);

const UINT_32 Gfx11Rsrc3dThinSwModeMask = Gfx11Rsrc3dThin64KBSwModeMask | Gfx11Rsrc3dThin256KBSwModeMask;

const UINT_32 Gfx11Rsrc3dThickSwModeMask = Gfx11Rsrc3dSwModeMask & ~(Gfx11Rsrc3dThinSwModeMask | Gfx11LinearSwModeMask);

const UINT_32 Gfx11Rsrc3dThick4KBSwModeMask = Gfx11Rsrc3dThickSwModeMask & Gfx11Blk4KBSwModeMask;

const UINT_32 Gfx11Rsrc3dThick64KBSwModeMask = Gfx11Rsrc3dThickSwModeMask & Gfx11Blk64KBSwModeMask;

const UINT_32 Gfx11Rsrc3dThick256KBSwModeMask = Gfx11Rsrc3dThickSwModeMask & Gfx11Blk256KBSwModeMask;

const UINT_32 Gfx11MsaaSwModeMask = Gfx11ZSwModeMask |
                                    Gfx11RenderSwModeMask;

const UINT_32 Dcn32SwModeMask = (1u << ADDR_SW_LINEAR)    |
                                (1u << ADDR_SW_64KB_D)    |
                                (1u << ADDR_SW_64KB_D_T)  |
                                (1u << ADDR_SW_64KB_D_X)  |
                                (1u << ADDR_SW_64KB_R_X)  |
                                (1u << ADDR_SW_256KB_D_X) |
                                (1u << ADDR_SW_256KB_R_X);

const UINT_32 Size256K     = 262144u;
const UINT_32 Log2Size256K = 18u;

/**
************************************************************************************************************************
* @brief This class is the GFX11 specific address library
*        function set.
************************************************************************************************************************
*/
class Gfx11Lib : public Lib
{
public:
    /// Creates Gfx11Lib object
    static Addr::Lib* CreateObj(const Client* pClient)
    {
        VOID* pMem = Object::ClientAlloc(sizeof(Gfx11Lib), pClient);
        return (pMem != NULL) ? new (pMem) Gfx11Lib(pClient) : NULL;
    }

protected:
    Gfx11Lib(const Client* pClient);
    virtual ~Gfx11Lib();

    virtual BOOL_32 HwlIsStandardSwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isStd;
    }

    virtual BOOL_32 HwlIsDisplaySwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isDisp;
    }

    virtual BOOL_32 HwlIsThin(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return ((IsTex1d(resourceType)  == TRUE) ||
                (IsTex2d(resourceType)  == TRUE) ||
                ((IsTex3d(resourceType) == TRUE)                  &&
                 (m_swizzleModeTable[swizzleMode].isStd  == FALSE) &&
                 (m_swizzleModeTable[swizzleMode].isDisp == FALSE)));
    }

    virtual BOOL_32 HwlIsThick(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return ((IsTex3d(resourceType) == TRUE) &&
                (m_swizzleModeTable[swizzleMode].isStd || m_swizzleModeTable[swizzleMode].isDisp));
    }

    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlSupportComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn);

    virtual VOID HwlComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual UINT_32 HwlGetEquationIndex(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
    {
        *ppEquationTable = m_equationTable;

        return m_numEquations;
    }

    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView(
        const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
        ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlGetAllowedBlockSet(
        ADDR2_SWMODE_SET allowedSwModeSet,
        AddrResourceType rsrcType,
        ADDR2_BLOCK_SET* pAllowedBlockSet) const;

    virtual ADDR_E_RETURNCODE HwlGetAllowedSwSet(
        ADDR2_SWMODE_SET  allowedSwModeSet,
        ADDR2_SWTYPE_SET* pAllowedSwSet) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    virtual UINT_32 HwlComputeMaxBaseAlignments() const;

    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;

    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);

    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

private:
    // Initialize equation table
    VOID InitEquationTable();

    ADDR_E_RETURNCODE ComputeSurfaceInfoMacroTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceInfoMicroTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordMacroTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordMicroTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    UINT_32 ComputeOffsetFromSwizzlePattern(
        const UINT_64* pPattern,
        UINT_32        numBits,
        UINT_32        x,
        UINT_32        y,
        UINT_32        z,
        UINT_32        s) const;

    UINT_32 ComputeOffsetFromEquation(
        const ADDR_EQUATION* pEq,
        UINT_32              x,
        UINT_32              y,
        UINT_32              z) const;

    ADDR_E_RETURNCODE ComputeStereoInfo(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        UINT_32*                                pAlignY,
        UINT_32*                                pRightXor) const;

    static void GetMipSize(
        UINT_32  mip0Width,
        UINT_32  mip0Height,
        UINT_32  mip0Depth,
        UINT_32  mipId,
        UINT_32* pMipWidth,
        UINT_32* pMipHeight,
        UINT_32* pMipDepth = NULL)
    {
        *pMipWidth  = ShiftCeil(Max(mip0Width, 1u),  mipId);
        *pMipHeight = ShiftCeil(Max(mip0Height, 1u), mipId);

        if (pMipDepth != NULL)
        {
            *pMipDepth = ShiftCeil(Max(mip0Depth, 1u),  mipId);
        }
    }

    const ADDR_SW_PATINFO* GetSwizzlePatternInfo(
        AddrSwizzleMode  swizzleMode,
        AddrResourceType resourceType,
        UINT_32          log2Elem,
        UINT_32          numFrag) const;

    VOID GetSwizzlePatternFromPatternInfo(
        const ADDR_SW_PATINFO* pPatInfo,
        ADDR_BIT_SETTING       (&pSwizzle)[20]) const
    {
        memcpy(pSwizzle,
               GFX11_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],
               sizeof(GFX11_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx]));

        memcpy(&pSwizzle[8],
               GFX11_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx],
               sizeof(GFX11_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx]));

        memcpy(&pSwizzle[12],
               GFX11_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx],
               sizeof(GFX11_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx]));

        memcpy(&pSwizzle[16],
               GFX11_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx],
               sizeof(GFX11_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx]));
    }

    VOID ConvertSwizzlePatternToEquation(
        UINT_32                elemLog2,
        AddrResourceType       rsrcType,
        AddrSwizzleMode        swMode,
        const ADDR_SW_PATINFO* pPatInfo,
        ADDR_EQUATION*         pEquation) const;

    static INT_32 GetMetaElementSizeLog2(Gfx11DataType dataType);

    static INT_32 GetMetaCacheSizeLog2(Gfx11DataType dataType);

    void GetBlk256SizeLog2(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2,
        Dim3d*           pBlock) const;

    void GetCompressedBlockSizeLog2(
        Gfx11DataType    dataType,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2,
        Dim3d*           pBlock) const;

    INT_32 GetMetaOverlapLog2(
        Gfx11DataType    dataType,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2) const;

    INT_32 Get3DMetaOverlapLog2(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2) const;

    UINT_32 GetMetaBlkSize(
        Gfx11DataType    dataType,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          elemLog2,
        UINT_32          numSamplesLog2,
        BOOL_32          pipeAlign,
        Dim3d*           pBlock) const;

    INT_32 GetPipeRotateAmount(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const;

    INT_32 GetEffectiveNumPipes() const
    {
        return ((m_numSaLog2 + 1) >= m_pipesLog2) ? m_pipesLog2 : m_numSaLog2 + 1;
    }

    BOOL_32 IsRbAligned(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        const BOOL_32 isRtopt   = IsRtOptSwizzle(swizzleMode);
        const BOOL_32 isZ       = IsZOrderSwizzle(swizzleMode);
        const BOOL_32 isDisplay = IsDisplaySwizzle(swizzleMode);

        return (IsTex2d(resourceType) && (isRtopt || isZ)) ||
               (IsTex3d(resourceType) && isDisplay);

    }

    UINT_32 GetValidDisplaySwizzleModes(UINT_32 bpp) const;

    BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const;

    BOOL_32 IsInMipTail(
        Dim3d   mipTailDim,
        UINT_32 maxNumMipsInTail,
        UINT_32 mipWidth,
        UINT_32 mipHeight,
        UINT_32 numMipsToTheEnd) const
    {
        BOOL_32 inTail = ((mipWidth <= mipTailDim.w) &&
                          (mipHeight <= mipTailDim.h) &&
                          (numMipsToTheEnd <= maxNumMipsInTail));

        return inTail;
    }

    UINT_32 GetBankXorBits(UINT_32 blockBits) const
    {
        return (blockBits > m_pipeInterleaveLog2 + m_pipesLog2 + ColumnBits) ?
               Min(blockBits - m_pipeInterleaveLog2 - m_pipesLog2 - ColumnBits, BankBits) : 0;
    }

    BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
    BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    BOOL_32 IsBlock256kb(AddrSwizzleMode swizzleMode) const { return IsBlockVariable(swizzleMode); }

    // TODO: figure out if there is any Column bits on GFX11...
    static const UINT_32 ColumnBits       = 2;
    static const UINT_32 BankBits         = 4;
    static const UINT_32 UnalignedDccType = 3;

    static const Dim3d Block256_3d[MaxNumOfBpp];
    static const Dim3d Block256K_Log2_3d[MaxNumOfBpp];
    static const Dim3d Block64K_Log2_3d[MaxNumOfBpp];
    static const Dim3d Block4K_Log2_3d[MaxNumOfBpp];

    static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];

    // Number of packers log2
    UINT_32 m_numPkrLog2;
    // Number of shader array log2
    UINT_32 m_numSaLog2;

    Gfx11ChipSettings m_settings;

    UINT_32 m_colorBaseIndex;
    UINT_32 m_htileBaseIndex;
    UINT_32 m_dccBaseIndex;
};

} // V2
} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12SwizzlePattern.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2023 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx12SwizzlePattern.h
* @brief swizzle pattern for gfx12.
************************************************************************************************************************
*/

#ifndef __GFX12_SWIZZLE_PATTERN_H__
#define __GFX12_SWIZZLE_PATTERN_H__

namespace rocr {
namespace Addr
{
namespace V3
{
    const ADDR_SW_PATINFO GFX12_SW_256B_2D_1xAA_PATINFO[] =
    {
        {   0,    0,    0,    0, } , // 1 BPE @ SW_256B_2D_1xAA
        {   1,    0,    0,    0, } , // 2 BPE @ SW_256B_2D_1xAA
        {   2,    0,    0,    0, } , // 4 BPE @ SW_256B_2D_1xAA
        {   3,    0,    0,    0, } , // 8 BPE @ SW_256B_2D_1xAA
        {   4,    0,    0,    0, } , // 16 BPE @ SW_256B_2D_1xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_256B_2D_2xAA_PATINFO[] =
    {
        {   5,    0,    0,    0, } , // 1 BPE @ SW_256B_2D_2xAA
        {   6,    0,    0,    0, } , // 2 BPE @ SW_256B_2D_2xAA
        {   7,    0,    0,    0, } , // 4 BPE @ SW_256B_2D_2xAA
        {   8,    0,    0,    0, } , // 8 BPE @ SW_256B_2D_2xAA
        {   9,    0,    0,    0, } , // 16 BPE @ SW_256B_2D_2xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_256B_2D_4xAA_PATINFO[] =
    {
        {  10,    0,    0,    0, } , // 1 BPE @ SW_256B_2D_4xAA
        {  11,    0,    0,    0, } , // 2 BPE @ SW_256B_2D_4xAA
        {  12,    0,    0,    0, } , // 4 BPE @ SW_256B_2D_4xAA
        {  13,    0,    0,    0, } , // 8 BPE @ SW_256B_2D_4xAA
        {  14,    0,    0,    0, } , // 16 BPE @ SW_256B_2D_4xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_256B_2D_8xAA_PATINFO[] =
    {
        {  15,    0,    0,    0, } , // 1 BPE @ SW_256B_2D_8xAA
        {  16,    0,    0,    0, } , // 2 BPE @ SW_256B_2D_8xAA
        {  17,    0,    0,    0, } , // 4 BPE @ SW_256B_2D_8xAA
        {  18,    0,    0,    0, } , // 8 BPE @ SW_256B_2D_8xAA
        {  19,    0,    0,    0, } , // 16 BPE @ SW_256B_2D_8xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_4KB_2D_1xAA_PATINFO[] =
    {
        {   0,    1,    0,    0, } , // 1 BPE @ SW_4KB_2D_1xAA
        {   1,    2,    0,    0, } , // 2 BPE @ SW_4KB_2D_1xAA
        {   2,    3,    0,    0, } , // 4 BPE @ SW_4KB_2D_1xAA
        {   3,    4,    0,    0, } , // 8 BPE @ SW_4KB_2D_1xAA
        {   4,    5,    0,    0, } , // 16 BPE @ SW_4KB_2D_1xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_4KB_2D_2xAA_PATINFO[] =
    {
        {   5,    2,    0,    0, } , // 1 BPE @ SW_4KB_2D_2xAA
        {   6,    3,    0,    0, } , // 2 BPE @ SW_4KB_2D_2xAA
        {   7,    4,    0,    0, } , // 4 BPE @ SW_4KB_2D_2xAA
        {   8,    5,    0,    0, } , // 8 BPE @ SW_4KB_2D_2xAA
        {   9,    6,    0,    0, } , // 16 BPE @ SW_4KB_2D_2xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_4KB_2D_4xAA_PATINFO[] =
    {
        {  10,    3,    0,    0, } , // 1 BPE @ SW_4KB_2D_4xAA
        {  11,    4,    0,    0, } , // 2 BPE @ SW_4KB_2D_4xAA
        {  12,    5,    0,    0, } , // 4 BPE @ SW_4KB_2D_4xAA
        {  13,    6,    0,    0, } , // 8 BPE @ SW_4KB_2D_4xAA
        {  14,    7,    0,    0, } , // 16 BPE @ SW_4KB_2D_4xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_4KB_2D_8xAA_PATINFO[] =
    {
        {  15,    4,    0,    0, } , // 1 BPE @ SW_4KB_2D_8xAA
        {  16,    5,    0,    0, } , // 2 BPE @ SW_4KB_2D_8xAA
        {  17,    6,    0,    0, } , // 4 BPE @ SW_4KB_2D_8xAA
        {  18,    7,    0,    0, } , // 8 BPE @ SW_4KB_2D_8xAA
        {  19,    8,    0,    0, } , // 16 BPE @ SW_4KB_2D_8xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_64KB_2D_1xAA_PATINFO[] =
    {
        {   0,    1,    1,    0, } , // 1 BPE @ SW_64KB_2D_1xAA
        {   1,    2,    2,    0, } , // 2 BPE @ SW_64KB_2D_1xAA
        {   2,    3,    3,    0, } , // 4 BPE @ SW_64KB_2D_1xAA
        {   3,    4,    4,    0, } , // 8 BPE @ SW_64KB_2D_1xAA
        {   4,    5,    5,    0, } , // 16 BPE @ SW_64KB_2D_1xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_64KB_2D_2xAA_PATINFO[] =
    {
        {   5,    2,    2,    0, } , // 1 BPE @ SW_64KB_2D_2xAA
        {   6,    3,    3,    0, } , // 2 BPE @ SW_64KB_2D_2xAA
        {   7,    4,    4,    0, } , // 4 BPE @ SW_64KB_2D_2xAA
        {   8,    5,    5,    0, } , // 8 BPE @ SW_64KB_2D_2xAA
        {   9,    6,    6,    0, } , // 16 BPE @ SW_64KB_2D_2xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_64KB_2D_4xAA_PATINFO[] =
    {
        {  10,    3,    3,    0, } , // 1 BPE @ SW_64KB_2D_4xAA
        {  11,    4,    4,    0, } , // 2 BPE @ SW_64KB_2D_4xAA
        {  12,    5,    5,    0, } , // 4 BPE @ SW_64KB_2D_4xAA
        {  13,    6,    6,    0, } , // 8 BPE @ SW_64KB_2D_4xAA
        {  14,    7,    7,    0, } , // 16 BPE @ SW_64KB_2D_4xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_64KB_2D_8xAA_PATINFO[] =
    {
        {  15,    4,    4,    0, } , // 1 BPE @ SW_64KB_2D_8xAA
        {  16,    5,    5,    0, } , // 2 BPE @ SW_64KB_2D_8xAA
        {  17,    6,    6,    0, } , // 4 BPE @ SW_64KB_2D_8xAA
        {  18,    7,    7,    0, } , // 8 BPE @ SW_64KB_2D_8xAA
        {  19,    8,    8,    0, } , // 16 BPE @ SW_64KB_2D_8xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_256KB_2D_1xAA_PATINFO[] =
    {
        {   0,    1,    1,    1, } , // 1 BPE @ SW_256KB_2D_1xAA
        {   1,    2,    2,    2, } , // 2 BPE @ SW_256KB_2D_1xAA
        {   2,    3,    3,    3, } , // 4 BPE @ SW_256KB_2D_1xAA
        {   3,    4,    4,    4, } , // 8 BPE @ SW_256KB_2D_1xAA
        {   4,    5,    5,    5, } , // 16 BPE @ SW_256KB_2D_1xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_256KB_2D_2xAA_PATINFO[] =
    {
        {   5,    2,    2,    2, } , // 1 BPE @ SW_256KB_2D_2xAA
        {   6,    3,    3,    3, } , // 2 BPE @ SW_256KB_2D_2xAA
        {   7,    4,    4,    4, } , // 4 BPE @ SW_256KB_2D_2xAA
        {   8,    5,    5,    5, } , // 8 BPE @ SW_256KB_2D_2xAA
        {   9,    6,    6,    6, } , // 16 BPE @ SW_256KB_2D_2xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_256KB_2D_4xAA_PATINFO[] =
    {
        {  10,    3,    3,    3, } , // 1 BPE @ SW_256KB_2D_4xAA
        {  11,    4,    4,    4, } , // 2 BPE @ SW_256KB_2D_4xAA
        {  12,    5,    5,    5, } , // 4 BPE @ SW_256KB_2D_4xAA
        {  13,    6,    6,    6, } , // 8 BPE @ SW_256KB_2D_4xAA
        {  14,    7,    7,    7, } , // 16 BPE @ SW_256KB_2D_4xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_256KB_2D_8xAA_PATINFO[] =
    {
        {  15,    4,    4,    4, } , // 1 BPE @ SW_256KB_2D_8xAA
        {  16,    5,    5,    5, } , // 2 BPE @ SW_256KB_2D_8xAA
        {  17,    6,    6,    6, } , // 4 BPE @ SW_256KB_2D_8xAA
        {  18,    7,    7,    7, } , // 8 BPE @ SW_256KB_2D_8xAA
        {  19,    8,    8,    8, } , // 16 BPE @ SW_256KB_2D_8xAA
    };

    const ADDR_SW_PATINFO GFX12_SW_4KB_3D_PATINFO[] =
    {
        {  20,    9,    0,    0, } , // 1 BPE @ SW_4KB_3D
        {  21,   10,    0,    0, } , // 2 BPE @ SW_4KB_3D
        {  22,   11,    0,    0, } , // 4 BPE @ SW_4KB_3D
        {  23,   12,    0,    0, } , // 8 BPE @ SW_4KB_3D
        {  24,   13,    0,    0, } , // 16 BPE @ SW_4KB_3D
    };

    const ADDR_SW_PATINFO GFX12_SW_64KB_3D_PATINFO[] =
    {
        {  20,    9,    9,    0, } , // 1 BPE @ SW_64KB_3D
        {  21,   10,   10,    0, } , // 2 BPE @ SW_64KB_3D
        {  22,   11,   11,    0, } , // 4 BPE @ SW_64KB_3D
        {  23,   12,   12,    0, } , // 8 BPE @ SW_64KB_3D
        {  24,   13,   13,    0, } , // 16 BPE @ SW_64KB_3D
    };

    const ADDR_SW_PATINFO GFX12_SW_256KB_3D_PATINFO[] =
    {
        {  20,    9,    9,    9, } , // 1 BPE @ SW_256KB_3D
        {  21,   10,   10,    9, } , // 2 BPE @ SW_256KB_3D
        {  22,   11,   11,   10, } , // 4 BPE @ SW_256KB_3D
        {  23,   12,   12,   11, } , // 8 BPE @ SW_256KB_3D
        {  24,   13,   13,   11, } , // 16 BPE @ SW_256KB_3D
    };


    const UINT_64 GFX12_SW_PATTERN_NIBBLE1[][8] =
    {
        {X0,            X1,            Y0,            X2,            Y1,            Y2,            X3,            Y3,            }, // 0
        {0,             X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            }, // 1
        {0,             0,             X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 2
        {0,             0,             0,             X0,            Y0,            X1,            X2,            Y1,            }, // 3
        {0,             0,             0,             0,             X0,            Y0,            X1,            Y1,            }, // 4
        {S0,            X0,            Y0,            X1,            Y1,            X2,            Y2,            X3,            }, // 5
        {0,             S0,            X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 6
        {0,             0,             S0,            X0,            Y0,            X1,            Y1,            X2,            }, // 7
        {0,             0,             0,             S0,            X0,            Y0,            X1,            Y1,            }, // 8
        {0,             0,             0,             0,             S0,            X0,            Y0,            X1,            }, // 9
        {S0,            S1,            X0,            Y0,            X1,            Y1,            X2,            Y2,            }, // 10
        {0,             S0,            S1,            X0,            Y0,            X1,            Y1,            X2,            }, // 11
        {0,             0,             S0,            S1,            X0,            Y0,            X1,            Y1,            }, // 12
        {0,             0,             0,             S0,            S1,            X0,            Y0,            X1,            }, // 13
        {0,             0,             0,             0,             S0,            S1,            X0,            Y0,            }, // 14
        {S0,            S1,            S2,            X0,            Y0,            X1,            Y1,            X2,            }, // 15
        {0,             S0,            S1,            S2,            X0,            Y0,            X1,            Y1,            }, // 16
        {0,             0,             S0,            S1,            S2,            X0,            Y0,            X1,            }, // 17
        {0,             0,             0,             S0,            S1,            S2,            X0,            Y0,            }, // 18
        {0,             0,             0,             0,             S0,            S1,            S2,            X0,            }, // 19
        {X0,            X1,            Z0,            Y0,            Y1,            Z1,            X2,            Z2,            }, // 20
        {0,             X0,            Z0,            Y0,            X1,            Z1,            Y1,            Z2,            }, // 21
        {0,             0,             X0,            Y0,            X1,            Z0,            Y1,            Z1,            }, // 22
        {0,             0,             0,             X0,            Y0,            Z0,            X1,            Z1,            }, // 23
        {0,             0,             0,             0,             X0,            Z0,            Y0,            Z1,            }, // 24
    };

    const UINT_64 GFX12_SW_PATTERN_NIBBLE2[][4] =
    {
        {0,             0,             0,             0,             }, // 0
        {Y4,            X4,            Y5,            X5,            }, // 1
        {Y3,            X4,            Y4,            X5,            }, // 2
        {Y3,            X3,            Y4,            X4,            }, // 3
        {Y2,            X3,            Y3,            X4,            }, // 4
        {Y2,            X2,            Y3,            X3,            }, // 5
        {Y1,            X2,            Y2,            X3,            }, // 6
        {Y1,            X1,            Y2,            X2,            }, // 7
        {Y0,            X1,            Y1,            X2,            }, // 8
        {Y2,            X3,            Z3,            Y3,            }, // 9
        {Y2,            X2,            Z3,            Y3,            }, // 10
        {Y2,            X2,            Z2,            Y3,            }, // 11
        {Y1,            X2,            Z2,            Y2,            }, // 12
        {Y1,            X1,            Z2,            Y2,            }, // 13
    };

    const UINT_64 GFX12_SW_PATTERN_NIBBLE3[][4] =
    {
        {0,             0,             0,             0,             }, // 0
        {Y6,            X6,            Y7,            X7,            }, // 1
        {Y5,            X6,            Y6,            X7,            }, // 2
        {Y5,            X5,            Y6,            X6,            }, // 3
        {Y4,            X5,            Y5,            X6,            }, // 4
        {Y4,            X4,            Y5,            X5,            }, // 5
        {Y3,            X4,            Y4,            X5,            }, // 6
        {Y3,            X3,            Y4,            X4,            }, // 7
        {Y2,            X3,            Y3,            X4,            }, // 8
        {X4,            Z4,            Y4,            X5,            }, // 9
        {X3,            Z4,            Y4,            X4,            }, // 10
        {X3,            Z3,            Y4,            X4,            }, // 11
        {X3,            Z3,            Y3,            X4,            }, // 12
        {X2,            Z3,            Y3,            X3,            }, // 13
    };

    const UINT_64 GFX12_SW_PATTERN_NIBBLE4[][2] =
    {
        {0,             0,             }, // 0
        {Y8,            X8,            }, // 1
        {Y7,            X8,            }, // 2
        {Y7,            X7,            }, // 3
        {Y6,            X7,            }, // 4
        {Y6,            X6,            }, // 5
        {Y5,            X6,            }, // 6
        {Y5,            X5,            }, // 7
        {Y4,            X5,            }, // 8
        {Z5,            Y5,            }, // 9
        {Z4,            Y5,            }, // 10
        {Z4,            Y4,            }, // 11
    };

} // V3
} // Addr
} // namespace
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2023 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx12addrlib.cpp
* @brief Contain the implementation for the Gfx12Lib class.
************************************************************************************************************************
*/

#include "gfx12addrlib.h"
#include "gfx12_gb_reg.h"

#include "amdgpu_asic_addr.h"

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace rocr {
namespace Addr
{
/**
************************************************************************************************************************
*   Gfx12HwlInit
*
*   @brief
*       Creates an Gfx12Lib object.
*
*   @return
*       Returns an Gfx12Lib object pointer.
************************************************************************************************************************
*/
Addr::Lib* Gfx12HwlInit(
    const Client* pClient)
{
    return V3::Gfx12Lib::CreateObj(pClient);
}

namespace V3
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Static Const Member
////////////////////////////////////////////////////////////////////////////////////////////////////
const SwizzleModeFlags Gfx12Lib::SwizzleModeTable[ADDR3_MAX_TYPE] =
{//Linear 2d   3d  256B  4KB  64KB  256KB  Reserved
    {{1,   0,   0,    0,   0,    0,     0,    0}}, // ADDR3_LINEAR
    {{0,   1,   0,    1,   0,    0,     0,    0}}, // ADDR3_256B_2D
    {{0,   1,   0,    0,   1,    0,     0,    0}}, // ADDR3_4KB_2D
    {{0,   1,   0,    0,   0,    1,     0,    0}}, // ADDR3_64KB_2D
    {{0,   1,   0,    0,   0,    0,     1,    0}}, // ADDR3_256KB_2D
    {{0,   0,   1,    0,   1,    0,     0,    0}}, // ADDR3_4KB_3D
    {{0,   0,   1,    0,   0,    1,     0,    0}}, // ADDR3_64KB_3D
    {{0,   0,   1,    0,   0,    0,     1,    0}}, // ADDR3_256KB_3D
};

const ADDR_EXTENT3D Gfx12Lib::Block4K_Log2_3d[]   = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
const ADDR_EXTENT3D Gfx12Lib::Block64K_Log2_3d[]  = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
const ADDR_EXTENT3D Gfx12Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}};

/**
************************************************************************************************************************
*   Gfx12Lib::Gfx12Lib
*
*   @brief
*       Constructor
*
************************************************************************************************************************
*/
Gfx12Lib::Gfx12Lib(
    const Client* pClient)
    :
    Lib(pClient),
    m_numSwizzleBits(0)
{
    memset(&m_settings, 0, sizeof(m_settings));
    memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
}

/**
************************************************************************************************************************
*   Gfx12Lib::~Gfx12Lib
*
*   @brief
*       Destructor
************************************************************************************************************************
*/
Gfx12Lib::~Gfx12Lib()
{
}

/**
************************************************************************************************************************
*   Gfx12Lib::ConvertSwizzlePatternToEquation
*
*   @brief
*       Convert swizzle pattern to equation.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx12Lib::ConvertSwizzlePatternToEquation(
    UINT_32                elemLog2,  ///< [in] element bytes log2
    Addr3SwizzleMode       swMode,    ///< [in] swizzle mode
    const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern info
    ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
    const
{
    ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K];
    GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);

    const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
    const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode, TRUE);

    pEquation->numBits = blockSizeLog2;
    pEquation->stackedDepthSlices = FALSE;

    for (UINT_32 i = 0; i < elemLog2; i++)
    {
        pEquation->addr[i].channel = 0;
        pEquation->addr[i].valid = 1;
        pEquation->addr[i].index = i;
    }

    for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
    {
        ADDR_ASSERT(IsPow2(pSwizzle[i].value));

        if (pSwizzle[i].x != 0)
        {
            ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));

            pEquation->addr[i].channel = 0;
            pEquation->addr[i].valid = 1;
            pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
        }
        else if (pSwizzle[i].y != 0)
        {
            ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));

            pEquation->addr[i].channel = 1;
            pEquation->addr[i].valid = 1;
            pEquation->addr[i].index = Log2(pSwizzle[i].y);
        }
        else if (pSwizzle[i].z != 0)
        {
            ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));

            pEquation->addr[i].channel = 2;
            pEquation->addr[i].valid = 1;
            pEquation->addr[i].index = Log2(pSwizzle[i].z);
        }
        else if (pSwizzle[i].s != 0)
        {
            ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].s)));

            pEquation->addr[i].channel = 3;
            pEquation->addr[i].valid = 1;
            pEquation->addr[i].index = Log2(pSwizzle[i].s);
        }
        else
        {
            ADDR_ASSERT_ALWAYS();
        }
    }
}

/**
************************************************************************************************************************
*   Gfx12Lib::InitEquationTable
*
*   @brief
*       Initialize Equation table.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx12Lib::InitEquationTable()
{
    memset(m_equationTable, 0, sizeof(m_equationTable));

    for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
    {
        const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);

        if (IsLinear(swMode))
        {
            // Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits)
            continue;
        }

        const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;

        for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
        {
            for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
            {
                UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
                const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, elemLog2, 1 << msaaIdx);

                if (pPatInfo != NULL)
                {
                    ADDR_ASSERT(IsValidSwMode(swMode));

                    ADDR_EQUATION equation = {};

                    ConvertSwizzlePatternToEquation(elemLog2, swMode, pPatInfo, &equation);

                    equationIndex = m_numEquations;
                    ADDR_ASSERT(equationIndex < NumSwizzlePatterns);

                    m_equationTable[equationIndex] = equation;
                    m_numEquations++;
                }
                SetEquationTableEntry(swMode, msaaIdx, elemLog2, equationIndex);
            }
        }
    }
}

/**
************************************************************************************************************************
*   Gfx12Lib::GetBlockPixelDimensions
*
*   @brief
*       Returns the pixel dimensions of one block.
*
************************************************************************************************************************
*/
ADDR_EXTENT3D  Gfx12Lib::GetBlockPixelDimensions(
    Addr3SwizzleMode  swizzleMode,
    UINT_32           log2BytesPerPixel
    ) const
{
    ADDR_EXTENT3D  log2Dim = {};

    switch (swizzleMode)
    {
        case ADDR3_4KB_3D:
            log2Dim = Block4K_Log2_3d[log2BytesPerPixel];
            break;
        case ADDR3_64KB_3D:
            log2Dim = Block64K_Log2_3d[log2BytesPerPixel];
            break;
        case ADDR3_256KB_3D:
            log2Dim = Block256K_Log2_3d[log2BytesPerPixel];
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            break;
    }

    return { 1u << log2Dim.width, 1u << log2Dim.height, 1u << log2Dim.depth };
}

/**
************************************************************************************************************************
*   Gfx12Lib::GetMipOrigin
*
*   @brief
*       Internal function to calculate origins of the mip levels
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
VOID Gfx12Lib::GetMipOrigin(
     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< [in] input structure
     const ADDR_EXTENT3D&                    mipExtentFirstInTail,
     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut        ///< [out] output structure
     ) const
{
    const BOOL_32        is3d           = Is3dSwizzle(pIn->swizzleMode);
    const UINT_32        bytesPerPixel  = pIn->bpp >> 3;
    const UINT_32        log2Bpp        = Log2(bytesPerPixel);
    const ADDR_EXTENT3D  pixelBlockDims = GetBlockPixelDimensions(ADDR3_4KB_3D, log2Bpp);
    const ADDR_EXTENT3D  tailMaxDim     = GetMipTailDim(pIn->swizzleMode,
                                                        pOut->blockExtent);
    const UINT_32        blockSizeLog2  = GetBlockSizeLog2(pIn->swizzleMode);
    const UINT_32        maxMipsInTail  = GetMaxNumMipsInTail(pIn->swizzleMode, blockSizeLog2);

    UINT_32 pitch  = tailMaxDim.width;
    UINT_32 height = tailMaxDim.height;

    UINT_32 depth  = (is3d ? PowTwoAlign(mipExtentFirstInTail.depth, pixelBlockDims.depth) : 1);

    const UINT_32 tailMaxDepth   = (is3d ? (depth / pixelBlockDims.depth) : 1);

    for (UINT_32 i = pOut->firstMipIdInTail; i < pIn->numMipLevels; i++)
    {
        INT_32  mipInTail = static_cast<INT_32>(i) - static_cast<INT_32>(pOut->firstMipIdInTail);
        if ((mipInTail < 0) || (pIn->numMipLevels == 1))
        {
            mipInTail = MaxMipLevels;
        }

        // "m" can be negative
        const INT_32  signedM   = static_cast<INT_32>(maxMipsInTail) - static_cast<INT_32>(1) - mipInTail;
        const UINT_32 m         = Max(0, signedM);
        const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);

        pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
        pOut->pMipInfo[i].mipTailOffset    = mipOffset;
        pOut->pMipInfo[i].macroBlockOffset = 0;

        pOut->pMipInfo[i].pitch  = pitch;
        pOut->pMipInfo[i].height = height;
        pOut->pMipInfo[i].depth  = depth;

        if (IsLinear(pIn->swizzleMode))
        {
            pOut->pMipInfo[i].mipTailCoordX = mipOffset >> 8;
            pOut->pMipInfo[i].mipTailCoordY = 0;
            pOut->pMipInfo[i].mipTailCoordZ = 0;

            pitch = Max(pitch >> 1, 1u);
        }
        else
        {
            UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
                           ((mipOffset >> 10) & 2)  |
                           ((mipOffset >> 11) & 4)  |
                           ((mipOffset >> 12) & 8)  |
                           ((mipOffset >> 13) & 16) |
                           ((mipOffset >> 14) & 32);
            UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
                           ((mipOffset >> 9)  & 2)  |
                           ((mipOffset >> 10) & 4)  |
                           ((mipOffset >> 11) & 8)  |
                           ((mipOffset >> 12) & 16) |
                           ((mipOffset >> 13) & 32);

            if (is3d == FALSE)
            {
                pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[log2Bpp].w;
                pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[log2Bpp].h;
                pOut->pMipInfo[i].mipTailCoordZ = 0;

                pitch  = Max(pitch  >> 1, Block256_2d[log2Bpp].w);
                height = Max(height >> 1, Block256_2d[log2Bpp].h);
                depth  = 1;
            }
            else
            {
                pOut->pMipInfo[i].mipTailCoordX = mipX * pixelBlockDims.width;
                pOut->pMipInfo[i].mipTailCoordY = mipY * pixelBlockDims.height;
                pOut->pMipInfo[i].mipTailCoordZ = 0;

                pitch  = Max(pitch  >> 1, pixelBlockDims.width);
                height = Max(height >> 1, pixelBlockDims.height);
                depth  = PowTwoAlign(Max(depth >> 1, 1u), pixelBlockDims.depth);
            }
        }
    }
}

/**
************************************************************************************************************************
*   Gfx12Lib::GetMipOffset
*
*   @brief
*       Internal function to calculate alignment for a surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
VOID Gfx12Lib::GetMipOffset(
     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    const UINT_32        bytesPerPixel = pIn->bpp >> 3;
    const UINT_32        log2Bpp       = Log2(bytesPerPixel);
    const UINT_32        blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
    const UINT_32        blockSize     = 1 << blockSizeLog2;
    const ADDR_EXTENT3D  tailMaxDim    = GetMipTailDim(pIn->swizzleMode,
                                                       pOut->blockExtent);
    const ADDR_EXTENT3D  mip0Dims      = GetBaseMipExtents(pIn);
    const UINT_32        maxMipsInTail = GetMaxNumMipsInTail(pIn->swizzleMode, blockSizeLog2);

    UINT_32       firstMipInTail    = pIn->numMipLevels;
    UINT_64       mipChainSliceSize = 0;
    UINT_64       mipSize[MaxMipLevels];
    UINT_64       mipSliceSize[MaxMipLevels];

    const ADDR_EXTENT3D fixedTailMaxDim = tailMaxDim;

    for (UINT_32 mipIdx = 0; mipIdx < pIn->numMipLevels; mipIdx++)
    {
        const ADDR_EXTENT3D  mipExtents = GetMipExtent(mip0Dims, mipIdx);

        if (SupportsMipTail(pIn->swizzleMode) &&
            IsInMipTail(fixedTailMaxDim, mipExtents, maxMipsInTail, pIn->numMipLevels - mipIdx))
        {
            firstMipInTail     = mipIdx;
            mipChainSliceSize += blockSize / pOut->blockExtent.depth;
            break;
        }
        else
        {
            const UINT_32 pitch  = UseCustomPitch(pIn)
                                        ? pOut->pitch
                                        : ((mipIdx == 0) && CanTrimLinearPadding(pIn))
                                          ? PowTwoAlign(mipExtents.width,  128u / bytesPerPixel)
                                          : PowTwoAlign(mipExtents.width,  pOut->blockExtent.width);
            const UINT_32 height = UseCustomHeight(pIn)
                                        ? pOut->height
                                        : PowTwoAlign(mipExtents.height, pOut->blockExtent.height);
            const UINT_32 depth  = PowTwoAlign(mipExtents.depth,  pOut->blockExtent.depth);

            // The original "blockExtent" calculation does subtraction of logs (i.e., division) to get the
            // sizes.  We aligned our pitch and height to those sizes, which means we need to multiply the various
            // factors back together to get back to the slice size.
            const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * pIn->numSamples * (pIn->bpp >> 3);

            mipSize[mipIdx]       = sliceSize * depth;
            mipSliceSize[mipIdx]  = sliceSize * pOut->blockExtent.depth;
            mipChainSliceSize    += sliceSize;

            if (pOut->pMipInfo != NULL)
            {
                pOut->pMipInfo[mipIdx].pitch  = pitch;
                pOut->pMipInfo[mipIdx].height = height;
                pOut->pMipInfo[mipIdx].depth  = depth;

                // The slice size of a linear image was calculated above as if the "pitch" is 256 byte aligned.
                // However, the rendering pitch is aligned to 128 bytes, and that is what needs to be reported
                // to our clients.
                if (IsLinear(pIn->swizzleMode))
                {
                    pOut->pMipInfo[mipIdx].pitch = PowTwoAlign(mipExtents.width,  128u / bytesPerPixel);
                }
            }
        }
    }

    pOut->sliceSize        = mipChainSliceSize;
    pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
    pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
    pOut->firstMipIdInTail = firstMipInTail;

    if (pOut->pMipInfo != NULL)
    {
       if (IsLinear(pIn->swizzleMode))
        {
            // 1. Linear swizzle mode doesn't have miptails.
            // 2. The organization of linear 3D mipmap resource is same as GFX11, we should use mip slice size to
            // caculate mip offset.
            ADDR_ASSERT(firstMipInTail == pIn->numMipLevels);

            UINT_64 sliceSize = 0;

            for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
            {
                pOut->pMipInfo[i].offset           = sliceSize;
                pOut->pMipInfo[i].macroBlockOffset = sliceSize;
                pOut->pMipInfo[i].mipTailOffset    = 0;

                sliceSize += mipSliceSize[i];
            }
        }
        else
        {
           UINT_64 offset         = 0;
           UINT_64 macroBlkOffset = 0;
           UINT_32 tailMaxDepth   = 0;

           ADDR_EXTENT3D  mipExtentFirstInTail = {};
           if (firstMipInTail != pIn->numMipLevels)
           {
              mipExtentFirstInTail = GetMipExtent(mip0Dims, firstMipInTail);

              offset         = blockSize *
                 PowTwoAlign(mipExtentFirstInTail.depth,
                             pOut->blockExtent.depth) / pOut->blockExtent.depth;
              macroBlkOffset = blockSize;
           }

           for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
           {
              pOut->pMipInfo[i].offset           = offset;
              pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
              pOut->pMipInfo[i].mipTailOffset    = 0;

              offset         += mipSize[i];
              macroBlkOffset += mipSliceSize[i];
           }

           GetMipOrigin(pIn, mipExtentFirstInTail, pOut);
        }
    }
}

/**
************************************************************************************************************************
*   Gfx12Lib::HwlComputeSurfaceInfo
*
*   @brief
*       Internal function to calculate alignment for a surface
*
*   @return
*       VOID
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo(
     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ComputeBlockDimensionForSurf(&pOut->blockExtent,
                                 pIn->bpp,
                                 pIn->numSamples,
                                 pIn->swizzleMode);

    ADDR_E_RETURNCODE  returnCode = ApplyCustomizedPitchHeight(pIn, pOut);

    if (returnCode == ADDR_OK)
    {
        pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockExtent.depth);
        pOut->baseAlign = 1 << GetBlockSizeLog2(pIn->swizzleMode);

        GetMipOffset(pIn, pOut);

        SanityCheckSurfSize(pIn, pOut);

        // Slices must be exact multiples of the block sizes.  However:
        // - with 3D images, one block will contain multiple slices, so that needs to be taken into account.
        // - with linear images that have only once slice, we may trim and use the pitch alignment for size.
        ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) %
                     GetBlockSize(pIn->swizzleMode, CanTrimLinearPadding(pIn))) == 0);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx12Lib::GetBaseMipExtents
*
*   @brief
*       Return the size of the base mip level in a nice cozy little structure.
*
************************************************************************************************************************
*/
ADDR_EXTENT3D Gfx12Lib::GetBaseMipExtents(
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn
    ) const
{
    return { pIn->width,
             pIn->height,
             (IsTex3d(pIn->resourceType) ? pIn->numSlices : 1) }; // slices is depth for 3d
}

/**
************************************************************************************************************************
*   Gfx12Lib::GetMaxNumMipsInTail
*
*   @brief
*       Return max number of mips in tails
*
*   @return
*       Max number of mips in tails
************************************************************************************************************************
*/
UINT_32 Gfx12Lib::GetMaxNumMipsInTail(
    Addr3SwizzleMode  swizzleMode,
    UINT_32           blockSizeLog2     ///< block size log2
    ) const
{
    UINT_32 effectiveLog2 = blockSizeLog2;
    UINT_32 mipsInTail    = 1;

    if (Is3dSwizzle(swizzleMode))
    {
        effectiveLog2 -= (blockSizeLog2 - 8) / 3;
    }

    if (effectiveLog2 > 8)
    {
        mipsInTail = (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
    }

    return mipsInTail;
}

/**
************************************************************************************************************************
*   Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled
*
*   @brief
*       Internal function to calculate address from coord for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
     const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    // 256B block cannot support 3D image.
    ADDR_ASSERT((IsTex3d(pIn->resourceType) && IsBlock256b(pIn->swizzleMode)) == FALSE);

    ADDR3_COMPUTE_SURFACE_INFO_INPUT  localIn = {};
    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
    ADDR3_MIP_INFO                    mipInfo[MaxMipLevels];

    localIn.size         = sizeof(localIn);
    localIn.flags        = pIn->flags;
    localIn.swizzleMode  = pIn->swizzleMode;
    localIn.resourceType = pIn->resourceType;
    localIn.format       = ADDR_FMT_INVALID;
    localIn.bpp          = pIn->bpp;
    localIn.width        = Max(pIn->unAlignedDims.width, 1u);
    localIn.height       = Max(pIn->unAlignedDims.height, 1u);
    localIn.numSlices    = Max(pIn->unAlignedDims.depth, 1u);
    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
    localIn.numSamples   = Max(pIn->numSamples, 1u);

    localOut.size        = sizeof(localOut);
    localOut.pMipInfo    = mipInfo;

    ADDR_E_RETURNCODE ret = ComputeSurfaceInfo(&localIn, &localOut);

    if (ret == ADDR_OK)
    {
        const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
        const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
        const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, Log2(localIn.numSamples), elemLog2);

        if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
        {
            const BOOL_32 inTail     = ((mipInfo[pIn->mipId].mipTailOffset != 0) && (blkSizeLog2 != Log2Size256));
            const BOOL_32 is3dNoMsaa = ((IsTex3d(pIn->resourceType) == TRUE) && (localIn.numSamples == 1));
            const UINT_64 sliceSize  = is3dNoMsaa ? (localOut.sliceSize * localOut.blockExtent.depth)
                                                  : localOut.sliceSize;
            const UINT_32 sliceId    = is3dNoMsaa ? (pIn->slice / localOut.blockExtent.depth) : pIn->slice;
            const UINT_32 x          = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
            const UINT_32 y          = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
            const UINT_32 z          = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
            const UINT_32 pb         = mipInfo[pIn->mipId].pitch / localOut.blockExtent.width;
            const UINT_32 yb         = pIn->y / localOut.blockExtent.height;
            const UINT_32 xb         = pIn->x / localOut.blockExtent.width;
            const UINT_64 blkIdx     = yb * pb + xb;
            const UINT_32 blkOffset  = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
                                                                 x << elemLog2,
                                                                 y,
                                                                 z,
                                                                 pIn->sample);
            pOut->addr = sliceSize * sliceId +
                         mipInfo[pIn->mipId].macroBlockOffset +
                         (blkIdx << blkSizeLog2) +
                         blkOffset;
        }
        else
        {
            ret = ADDR_INVALIDPARAMS;
        }
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx12Lib::HwlComputePipeBankXor
*
*   @brief
*       Generate a PipeBankXor value to be ORed into bits above numSwizzleBits of address
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx12Lib::HwlComputePipeBankXor(
    const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
    ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
    ) const
{
    if ((m_numSwizzleBits != 0)               && // does this configuration support swizzling
        //         base address XOR in GFX12 will be applied to all blk_size = 4KB, 64KB, or 256KB swizzle modes,
        //         Note that Linear and 256B are excluded.
        (IsLinear(pIn->swizzleMode) == FALSE) &&
        (IsBlock256b(pIn->swizzleMode) == FALSE))
    {
        pOut->pipeBankXor = pIn->surfIndex % (1 << m_numSwizzleBits);
    }
    else
    {
        pOut->pipeBankXor = 0;
    }

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx12Lib::ComputeOffsetFromEquation
*
*   @brief
*       Compute offset from equation
*
*   @return
*       Offset
************************************************************************************************************************
*/
UINT_32 Gfx12Lib::ComputeOffsetFromEquation(
    const ADDR_EQUATION* pEq,   ///< Equation
    UINT_32              x,     ///< x coord in bytes
    UINT_32              y,     ///< y coord in pixel
    UINT_32              z,     ///< z coord in slice
    UINT_32              s      ///< MSAA sample index
    ) const
{
    UINT_32 offset = 0;

    for (UINT_32 i = 0; i < pEq->numBits; i++)
    {
        UINT_32 v = 0;

        if (pEq->addr[i].valid)
        {
            if (pEq->addr[i].channel == 0)
            {
                v ^= (x >> pEq->addr[i].index) & 1;
            }
            else if (pEq->addr[i].channel == 1)
            {
                v ^= (y >> pEq->addr[i].index) & 1;
            }
            else if (pEq->addr[i].channel == 2)
            {
                v ^= (z >> pEq->addr[i].index) & 1;
            }
            else if (pEq->addr[i].channel == 3)
            {
                v ^= (s >> pEq->addr[i].index) & 1;
            }
            else
            {
                ADDR_ASSERT_ALWAYS();
            }
        }

        offset |= (v << i);
    }

    return offset;
}

/**
************************************************************************************************************************
*   Gfx12Lib::GetSwizzlePatternInfo
*
*   @brief
*       Get swizzle pattern
*
*   @return
*       Swizzle pattern information
************************************************************************************************************************
*/
const ADDR_SW_PATINFO* Gfx12Lib::GetSwizzlePatternInfo(
    Addr3SwizzleMode swizzleMode,       ///< Swizzle mode
    UINT_32          elemLog2,          ///< Element size in bytes log2
    UINT_32          numFrag            ///< Number of fragment
    ) const
{
    const ADDR_SW_PATINFO* patInfo = NULL;

    if (Is2dSwizzle(swizzleMode) == FALSE)
    {
        ADDR_ASSERT(numFrag == 1);
    }

    switch (swizzleMode)
    {
    case ADDR3_256KB_2D:
        switch (numFrag)
        {
        case 1:
            patInfo = GFX12_SW_256KB_2D_1xAA_PATINFO;
            break;
        case 2:
            patInfo = GFX12_SW_256KB_2D_2xAA_PATINFO;
            break;
        case 4:
            patInfo = GFX12_SW_256KB_2D_4xAA_PATINFO;
            break;
        case 8:
            patInfo = GFX12_SW_256KB_2D_8xAA_PATINFO;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
        }
        break;
    case ADDR3_256KB_3D:
        patInfo = GFX12_SW_256KB_3D_PATINFO;
        break;
    case ADDR3_64KB_2D:
        switch (numFrag)
        {
        case 1:
            patInfo = GFX12_SW_64KB_2D_1xAA_PATINFO;
            break;
        case 2:
            patInfo = GFX12_SW_64KB_2D_2xAA_PATINFO;
            break;
        case 4:
            patInfo = GFX12_SW_64KB_2D_4xAA_PATINFO;
            break;
        case 8:
            patInfo = GFX12_SW_64KB_2D_8xAA_PATINFO;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
        }
        break;
    case ADDR3_64KB_3D:
        patInfo = GFX12_SW_64KB_3D_PATINFO;
        break;
    case ADDR3_4KB_2D:
        switch (numFrag)
        {
        case 1:
            patInfo = GFX12_SW_4KB_2D_1xAA_PATINFO;
            break;
        case 2:
            patInfo = GFX12_SW_4KB_2D_2xAA_PATINFO;
            break;
        case 4:
            patInfo = GFX12_SW_4KB_2D_4xAA_PATINFO;
            break;
        case 8:
            patInfo = GFX12_SW_4KB_2D_8xAA_PATINFO;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
        }
        break;
    case ADDR3_4KB_3D:
        patInfo = GFX12_SW_4KB_3D_PATINFO;
        break;
    case ADDR3_256B_2D:
        switch (numFrag)
        {
        case 1:
            patInfo = GFX12_SW_256B_2D_1xAA_PATINFO;
            break;
        case 2:
            patInfo = GFX12_SW_256B_2D_2xAA_PATINFO;
            break;
        case 4:
            patInfo = GFX12_SW_256B_2D_4xAA_PATINFO;
            break;
        case 8:
            patInfo = GFX12_SW_256B_2D_8xAA_PATINFO;
            break;
        default:
            break;
        }
        break;
    default:
        ADDR_ASSERT_ALWAYS();
        break;
    }

    return (patInfo != NULL) ? &patInfo[elemLog2] : NULL;
}
/**
************************************************************************************************************************
*   Gfx12Lib::HwlInitGlobalParams
*
*   @brief
*       Initializes global parameters
*
*   @return
*       TRUE if all settings are valid
*
************************************************************************************************************************
*/
BOOL_32 Gfx12Lib::HwlInitGlobalParams(
    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
{
    BOOL_32              valid = TRUE;
    GB_ADDR_CONFIG_GFX12 gbAddrConfig;

    gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;

    switch (gbAddrConfig.bits.NUM_PIPES)
    {
        case ADDR_CONFIG_1_PIPE:
            m_pipesLog2 = 0;
            break;
        case ADDR_CONFIG_2_PIPE:
            m_pipesLog2 = 1;
            break;
        case ADDR_CONFIG_4_PIPE:
            m_pipesLog2 = 2;
            break;
        case ADDR_CONFIG_8_PIPE:
            m_pipesLog2 = 3;
            break;
        case ADDR_CONFIG_16_PIPE:
            m_pipesLog2 = 4;
            break;
        case ADDR_CONFIG_32_PIPE:
            m_pipesLog2 = 5;
            break;
        case ADDR_CONFIG_64_PIPE:
            m_pipesLog2 = 6;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
            break;
    }

    switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
    {
        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
            m_pipeInterleaveLog2 = 8;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
            m_pipeInterleaveLog2 = 9;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
            m_pipeInterleaveLog2 = 10;
            break;
        case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
            m_pipeInterleaveLog2 = 11;
            break;
        default:
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
            break;
    }

    m_numSwizzleBits = ((m_pipesLog2 >= 3) ? m_pipesLog2 - 2 : 0);

    if (valid)
    {
        InitEquationTable();
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx12Lib::HwlComputeNonBlockCompressedView
*
*   @brief
*       Compute non-block-compressed view for a given mipmap level/slice.
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx12Lib::HwlComputeNonBlockCompressedView(
    const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
    ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
        ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
    {
        // Only support BC1~BC7, ASTC, or ETC2 for now...
        returnCode = ADDR_NOTSUPPORTED;
    }
    else
    {
        UINT_32 bcWidth, bcHeight;
        const UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);

        ADDR3_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
        infoIn.size         = sizeof(infoIn);
        infoIn.flags        = pIn->flags;
        infoIn.swizzleMode  = pIn->swizzleMode;
        infoIn.resourceType = pIn->resourceType;
        infoIn.format       = pIn->format;
        infoIn.bpp          = bpp;
        infoIn.width        = RoundUpQuotient(pIn->unAlignedDims.width, bcWidth);
        infoIn.height       = RoundUpQuotient(pIn->unAlignedDims.height, bcHeight);
        infoIn.numSlices    = pIn->unAlignedDims.depth;
        infoIn.numMipLevels = pIn->numMipLevels;
        infoIn.numSamples   = 1;

        ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {};

        ADDR3_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
        infoOut.size     = sizeof(infoOut);
        infoOut.pMipInfo = mipInfo;

        returnCode = HwlComputeSurfaceInfo(&infoIn, &infoOut);

        if (returnCode == ADDR_OK)
        {
            ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
            subOffIn.size             = sizeof(subOffIn);
            subOffIn.swizzleMode      = infoIn.swizzleMode;
            subOffIn.resourceType     = infoIn.resourceType;
            subOffIn.pipeBankXor      = pIn->pipeBankXor;
            subOffIn.slice            = pIn->slice;
            subOffIn.sliceSize        = infoOut.sliceSize;
            subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
            subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;

            ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
            subOffOut.size = sizeof(subOffOut);

            // For any mipmap level, move nonBc view base address by offset
            HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
            pOut->offset = subOffOut.offset;

            ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
            slicePbXorIn.size            = sizeof(slicePbXorIn);
            slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
            slicePbXorIn.resourceType    = infoIn.resourceType;
            slicePbXorIn.bpe             = infoIn.bpp;
            slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
            slicePbXorIn.slice           = pIn->slice;
            slicePbXorIn.numSamples      = 1;

            ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
            slicePbXorOut.size = sizeof(slicePbXorOut);

            // For any mipmap level, nonBc view should use computed pbXor
            HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
            pOut->pipeBankXor = slicePbXorOut.pipeBankXor;

            const BOOL_32 tiled            = (pIn->swizzleMode != ADDR3_LINEAR);
            const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail);
            const UINT_32 requestMipWidth  =
                    RoundUpQuotient(Max(pIn->unAlignedDims.width  >> pIn->mipId, 1u), bcWidth);
            const UINT_32 requestMipHeight =
                    RoundUpQuotient(Max(pIn->unAlignedDims.height >> pIn->mipId, 1u), bcHeight);

            if (inTail)
            {
                // For mipmap level that is in mip tail block, hack a lot of things...
                // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
                // are fit in tail block:

                // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
                pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;

                // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
                pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);

                // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
                pOut->unAlignedDims.width  = Min(requestMipWidth << pOut->mipId, infoOut.blockExtent.width / 2);

                // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
                pOut->unAlignedDims.height = Min(requestMipHeight << pOut->mipId, infoOut.blockExtent.height);
            }
            // This check should cover at least mipId == 0
            else if ((requestMipWidth << pIn->mipId) == infoIn.width)
            {
                // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
                // - only one mipmap level and mipId = 0
                pOut->mipId        = 0;
                pOut->numMipLevels = 1;

                // (mip0) width = requestMipWidth
                pOut->unAlignedDims.width  = requestMipWidth;

                // (mip0) height = requestMipHeight
                pOut->unAlignedDims.height = requestMipHeight;
            }
            else
            {
                // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
                // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
                // because single mip view may have different pitch value than original (multiple) mip view...
                // A simple case would be:
                // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
                // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
                //   mip0 width = 0x101/mip1 width = 0x80
                // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
                // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.

                // - 2 levels and mipId = 1
                pOut->mipId        = 1;
                pOut->numMipLevels = 2;

                const UINT_32 upperMipWidth  =
                    RoundUpQuotient(Max(pIn->unAlignedDims.width  >> (pIn->mipId - 1), 1u), bcWidth);
                const UINT_32 upperMipHeight =
                    RoundUpQuotient(Max(pIn->unAlignedDims.height >> (pIn->mipId - 1), 1u), bcHeight);

                const BOOL_32 needToAvoidInTail = tiled                                              &&
                                                  (requestMipWidth <= infoOut.blockExtent.width / 2) &&
                                                  (requestMipHeight <= infoOut.blockExtent.height);

                const UINT_32 hwMipWidth  =
                    PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockExtent.width);
                const UINT_32 hwMipHeight =
                    PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockExtent.height);

                const BOOL_32 needExtraWidth =
                    ((upperMipWidth < requestMipWidth * 2) ||
                     ((upperMipWidth == requestMipWidth * 2) &&
                      ((needToAvoidInTail == TRUE) ||
                       (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockExtent.width)))));

                const BOOL_32 needExtraHeight =
                    ((upperMipHeight < requestMipHeight * 2) ||
                     ((upperMipHeight == requestMipHeight * 2) &&
                      ((needToAvoidInTail == TRUE) ||
                       (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockExtent.height)))));

                // (mip0) width = requestLastMipLevelWidth
                pOut->unAlignedDims.width  = upperMipWidth + (needExtraWidth ? 1: 0);

                // (mip0) height = requestLastMipLevelHeight
                pOut->unAlignedDims.height = upperMipHeight + (needExtraHeight ? 1: 0);
            }

            // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
            ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.width, pOut->mipId)  == requestMipWidth);
            // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
            ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.height, pOut->mipId) == requestMipHeight);
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Compute sub resource offset to support swizzle pattern
*
*   @return
*       VOID
************************************************************************************************************************
*/
VOID Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
    const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
    ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
    ) const
{
    pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
}

/**
************************************************************************************************************************
*   Gfx12Lib::HwlComputeSlicePipeBankXor
*
*   @brief
*       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSlicePipeBankXor(
    const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
    ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    // PipeBankXor is only applied to 4KB, 64KB and 256KB on GFX12.
    if ((IsLinear(pIn->swizzleMode) == FALSE) && (IsBlock256b(pIn->swizzleMode) == FALSE))
    {
        if (pIn->bpe == 0)
        {
            // Require a valid bytes-per-element value passed from client...
            returnCode = ADDR_INVALIDPARAMS;
        }
        else
        {
            const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
                                                                    Log2(pIn->bpe >> 3),
                                                                    1);

            if (pPatInfo != NULL)
            {
                const UINT_32 elemLog2    = Log2(pIn->bpe >> 3);
                const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), elemLog2);

                const UINT_32 pipeBankXorOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
                                                                            0,
                                                                            0,
                                                                            pIn->slice,
                                                                            0);

                const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;

                // Should have no bit set under pipe interleave
                ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);

                pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
            }
            else
            {
                // Should never come here...
                ADDR_NOT_IMPLEMENTED();

                returnCode = ADDR_NOTSUPPORTED;
            }
        }
    }
    else
    {
        pOut->pipeBankXor = 0;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx12Lib::SanityCheckSurfSize
*
*   @brief
*       Calculate the surface size via the exact hardware algorithm to see if it matches.
*
*   @return
************************************************************************************************************************
*/
void Gfx12Lib::SanityCheckSurfSize(
    const ADDR3_COMPUTE_SURFACE_INFO_INPUT*   pIn,
    const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*  pOut
    ) const
{
#if DEBUG
    // Verify that the requested image size is valid for the below algorithm.  The below code includes
    // implicit assumptions about the surface dimensions being less than "MaxImageDim"; otherwise, it can't
    // calculate "firstMipInTail" accurately and the below assertion will trip incorrectly.
    //
    // Surfaces destined for use only on the SDMA engine can exceed the gfx-engine-imposed limitations of
    // the "maximum" image dimensions.
    if ((pIn->width  <= MaxImageDim)        &&
        (pIn->height <= MaxImageDim)        &&
        (pIn->numMipLevels <= MaxMipLevels) &&
        (UseCustomPitch(pIn)  == FALSE)     &&
        (UseCustomHeight(pIn) == FALSE)     &&
        // HiZS surfaces have a reduced image size (i.e,. each pixel represents an 8x8 region of the parent
        // image, at least for single samples) but they still have the same number of mip levels as the
        // parent image.  This disconnect produces false assertions below as the image size doesn't apparently
        // support the specified number of mip levels.
        ((pIn->flags.hiZHiS == 0) || (pIn->numMipLevels == 1))   &&
        !(pIn->flags.view3dAs2dArray))
    {
        UINT_32  lastMipSize   = 1;
        UINT_32  dataChainSize = 0;

        const ADDR_EXTENT3D  mip0Dims      = GetBaseMipExtents(pIn);
        const UINT_32        blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
        const ADDR_EXTENT3D  tailMaxDim    = GetMipTailDim(pIn->swizzleMode, pOut->blockExtent);
        const UINT_32        maxMipsInTail = GetMaxNumMipsInTail(pIn->swizzleMode, blockSizeLog2);

        UINT_32  firstMipInTail = 0;
        for (INT_32 mipIdx = MaxMipLevels - 1; mipIdx >= 0; mipIdx--)
        {
            const ADDR_EXTENT3D  mipExtents = GetMipExtent(mip0Dims, mipIdx);

            if ((mipExtents.width  <= tailMaxDim.width)  &&
                (mipExtents.height <= tailMaxDim.height) &&
                ((static_cast<INT_32>(pIn->numMipLevels) - mipIdx) < static_cast<INT_32>(maxMipsInTail)))
            {
                firstMipInTail = mipIdx;
            }
        }

        for (INT_32  mipIdx = firstMipInTail - 1; mipIdx >= -1; mipIdx--)
        {
            const ADDR_EXTENT3D  mipExtents     = GetMipExtent(mip0Dims, mipIdx);
            const UINT_32        mipBlockWidth  = ShiftCeil(mipExtents.width,  Log2(pOut->blockExtent.width));
            const UINT_32        mipBlockHeight = ShiftCeil(mipExtents.height, Log2(pOut->blockExtent.height));

            if (mipIdx < (static_cast<INT_32>(pIn->numMipLevels) - 1))
            {
                dataChainSize += lastMipSize;
            }

            if (mipIdx >= 0)
            {
                lastMipSize = 4 * lastMipSize
                    - ((mipBlockWidth  & 1) ? mipBlockHeight : 0)
                    - ((mipBlockHeight & 1) ? mipBlockWidth  : 0)
                    - ((mipBlockWidth  & mipBlockHeight & 1) ? 1 : 0);
            }
        }

        if (CanTrimLinearPadding(pIn))
        {
            ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) <= (dataChainSize << blockSizeLog2));
        }
        else
        {
            ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) == (dataChainSize << blockSizeLog2));
        }
    }
#endif
}

} // V3
} // Addr
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2023 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx12addrlib.h
* @brief Contains the Gfx12Lib class definition.
************************************************************************************************************************
*/

#ifndef __GFX12_ADDR_LIB_H__
#define __GFX12_ADDR_LIB_H__

#include "addrlib3.h"
#include "coord.h"
#include "gfx12SwizzlePattern.h"

namespace rocr {
namespace Addr
{
namespace V3
{

/**
************************************************************************************************************************
* @brief GFX12 specific settings structure.
************************************************************************************************************************
*/
struct Gfx12ChipSettings
{
    struct
    {
        // Misc configuration bits
        UINT_32 reserved : 32;
    };
};

/**
************************************************************************************************************************
* @brief GFX12 data surface type.
************************************************************************************************************************
*/

/**
************************************************************************************************************************
* @brief This class is the GFX12 specific address library
*        function set.
************************************************************************************************************************
*/
class Gfx12Lib : public Lib
{
public:
    /// Creates Gfx12Lib object
    static Addr::Lib* CreateObj(const Client* pClient)
    {
        VOID* pMem = Object::ClientAlloc(sizeof(Gfx12Lib), pClient);
        return (pMem != NULL) ? new (pMem) Gfx12Lib(pClient) : NULL;
    }

protected:
    Gfx12Lib(const Client* pClient);
    virtual ~Gfx12Lib();

    // Meta surfaces such as Hi-S/Z are essentially images on GFX12, so just return the max
    // image alignment.
    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const { return 256 * 1024; }

    UINT_32 GetMaxNumMipsInTail(
        Addr3SwizzleMode  swizzleMode,
        UINT_32           blockSizeLog2) const;

    BOOL_32 IsInMipTail(
        const ADDR_EXTENT3D&  mipTailDim,
        const ADDR_EXTENT3D&  mipDims,
        UINT_32               maxNumMipsInTail,
        UINT_32               numMipsToTheEnd) const
    {
        BOOL_32 inTail = ((mipDims.width   <= mipTailDim.width)  &&
                          (mipDims.height  <= mipTailDim.height) &&
                          (numMipsToTheEnd <= maxNumMipsInTail));

        return inTail;
    }

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
        const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView(
        const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,
        ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut) const;

    virtual VOID HwlComputeSubResourceOffsetForSwizzlePattern(
        const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
        const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const;

    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
    {
        *ppEquationTable = m_equationTable;

        return m_numEquations;
    }

private:
    Gfx12ChipSettings m_settings;
    static const SwizzleModeFlags SwizzleModeTable[ADDR3_MAX_TYPE];

    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
        const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const override;

    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) override;

    void SanityCheckSurfSize(
        const ADDR3_COMPUTE_SURFACE_INFO_INPUT*   pIn,
        const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*  pOut) const;

    UINT_32           m_numSwizzleBits;

    static const ADDR_EXTENT3D Block4K_Log2_3d[];
    static const ADDR_EXTENT3D Block64K_Log2_3d[];
    static const ADDR_EXTENT3D Block256K_Log2_3d[];

    // Initialize equation table
    VOID InitEquationTable();

    VOID GetSwizzlePatternFromPatternInfo(
        const ADDR_SW_PATINFO* pPatInfo,
        ADDR_BIT_SETTING       (&pSwizzle)[Log2Size256K]) const
    {
        memcpy(pSwizzle,
               GFX12_SW_PATTERN_NIBBLE1[pPatInfo->nibble1Idx],
               sizeof(GFX12_SW_PATTERN_NIBBLE1[pPatInfo->nibble1Idx]));

        memcpy(&pSwizzle[8],
               GFX12_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx],
               sizeof(GFX12_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx]));

        memcpy(&pSwizzle[12],
               GFX12_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx],
               sizeof(GFX12_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx]));

        memcpy(&pSwizzle[16],
               GFX12_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx],
               sizeof(GFX12_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx]));
    }

    VOID ConvertSwizzlePatternToEquation(
        UINT_32                elemLog2,
        Addr3SwizzleMode       swMode,
        const ADDR_SW_PATINFO* pPatInfo,
        ADDR_EQUATION* pEquation) const;

    ADDR_EXTENT3D GetBaseMipExtents(
        const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    ADDR_EXTENT3D GetBlockPixelDimensions(
        Addr3SwizzleMode  swizzleMode,
        UINT_32           log2BytesPerPixel) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
         const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const override;

    static ADDR_EXTENT3D GetMipExtent(
        const ADDR_EXTENT3D&  mip0,
        UINT_32               mipId)
    {
        return {
            ShiftCeil(Max(mip0.width, 1u),  mipId),
            ShiftCeil(Max(mip0.height, 1u), mipId),
            ShiftCeil(Max(mip0.depth, 1u),  mipId)
        };
    }

    //# See 6.3 in //gfxip/gfx10/doc/architecture/ImageAddressing/gfx10_image_addressing.docx
    // miptail is applied to only larger block size (4kb, 64kb, 256kb), so there is no miptail in linear and
    // 256b_2d addressing since they are both 256b block.
    BOOL_32 SupportsMipTail(Addr3SwizzleMode swizzleMode) const
    {
        return GetBlockSize(swizzleMode) > 256u;
    }

    UINT_32 ComputeOffsetFromEquation(
        const ADDR_EQUATION* pEq,
        UINT_32              x,
        UINT_32              y,
        UINT_32              z,
        UINT_32              s) const;

    const ADDR_SW_PATINFO* GetSwizzlePatternInfo(
        Addr3SwizzleMode swizzleMode,
        UINT_32          log2Elem,
        UINT_32          numFrag) const;

    VOID GetMipOffset(
         const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    VOID GetMipOrigin(
         const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,
         const ADDR_EXTENT3D&                    mipExtentFirstInTail,
         ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
};

} // V3
} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx9addrlib.cpp
* @brief Contgfx9ns the implementation for the Gfx9Lib class.
************************************************************************************************************************
*/

#include "gfx9addrlib.h"

#include "gfx9_gb_reg.h"

#include "amdgpu_asic_addr.h"

////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
namespace rocr {
namespace Addr
{

/**
************************************************************************************************************************
*   Gfx9HwlInit
*
*   @brief
*       Creates an Gfx9Lib object.
*
*   @return
*       Returns an Gfx9Lib object pointer.
************************************************************************************************************************
*/
Addr::Lib* Gfx9HwlInit(const Client* pClient)
{
    return V2::Gfx9Lib::CreateObj(pClient);
}

namespace V2
{

////////////////////////////////////////////////////////////////////////////////////////////////////
//                               Static Const Member
////////////////////////////////////////////////////////////////////////////////////////////////////

const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
{//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
    {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
    {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
    {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
    {{0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_256B_R

    {{0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_Z
    {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
    {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
    {{0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_4KB_R

    {{0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_Z
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
    {{0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_64KB_R

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved

    {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_Z_T
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
    {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}}, // ADDR_SW_64KB_R_T

    {{0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_Z_x
    {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_x
    {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_x
    {{0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_4KB_R_x

    {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
    {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
    {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
    {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_64KB_R_X

    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
    {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
};

const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};

const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};

const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};

/**
************************************************************************************************************************
*   Gfx9Lib::Gfx9Lib
*
*   @brief
*       Constructor
*
************************************************************************************************************************
*/
Gfx9Lib::Gfx9Lib(const Client* pClient)
    :
    Lib(pClient)
{
    memset(&m_settings, 0, sizeof(m_settings));
    memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
    memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
    m_metaEqOverrideIndex = 0;
}

/**
************************************************************************************************************************
*   Gfx9Lib::~Gfx9Lib
*
*   @brief
*       Destructor
************************************************************************************************************************
*/
Gfx9Lib::~Gfx9Lib()
{
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeHtileInfo
*
*   @brief
*       Interface function stub of AddrComputeHtilenfo
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
                                                       pIn->swizzleMode);

    UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;

    UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;

    if ((numPipeTotal == 1) && (numRbTotal == 1))
    {
        numCompressBlkPerMetaBlkLog2 = 10;
    }
    else
    {
        if (m_settings.applyAliasFix)
        {
            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
        }
        else
        {
            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
        }
    }

    numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;

    Dim3d   metaBlkDim   = {8, 8, 1};
    UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
    UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
    UINT_32 heightAmp    = totalAmpBits - widthAmp;
    metaBlkDim.w <<= widthAmp;
    metaBlkDim.h <<= heightAmp;

#if DEBUG
    Dim3d metaBlkDimDbg = {8, 8, 1};
    for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
    {
        if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
            ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
        {
            metaBlkDimDbg.h <<= 1;
        }
        else
        {
            metaBlkDimDbg.w <<= 1;
        }
    }
    ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
#endif

    UINT_32 numMetaBlkX;
    UINT_32 numMetaBlkY;
    UINT_32 numMetaBlkZ;

    GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
                   pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
                   &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);

    const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
    UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;

    if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
    {
        align *= (numPipeTotal >> 1);
    }

    align = Max(align, metaBlkSize);

    if (m_settings.metaBaseAlignFix)
    {
        align = Max(align, GetBlockSize(pIn->swizzleMode));
    }

    if (m_settings.htileAlignFix)
    {
        const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
        const INT_32 htileCachelineSizeLog2 = 11;
        const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);

        INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));

        align <<= rbMaskPadding;
    }

    pOut->pitch      = numMetaBlkX * metaBlkDim.w;
    pOut->height     = numMetaBlkY * metaBlkDim.h;
    pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;

    pOut->metaBlkWidth       = metaBlkDim.w;
    pOut->metaBlkHeight      = metaBlkDim.h;
    pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;

    pOut->baseAlign  = align;
    pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeCmaskInfo
*
*   @brief
*       Interface function stub of AddrComputeCmaskInfo
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);

    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
                                                       pIn->swizzleMode);

    UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;

    UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;

    if ((numPipeTotal == 1) && (numRbTotal == 1))
    {
        numCompressBlkPerMetaBlkLog2 = 13;
    }
    else
    {
        if (m_settings.applyAliasFix)
        {
            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
        }
        else
        {
            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
        }

        numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
    }

    numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;

    Dim2d metaBlkDim = {8, 8};
    UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
    UINT_32 heightAmp = totalAmpBits >> 1;
    UINT_32 widthAmp = totalAmpBits - heightAmp;
    metaBlkDim.w <<= widthAmp;
    metaBlkDim.h <<= heightAmp;

#if DEBUG
    Dim2d metaBlkDimDbg = {8, 8};
    for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
    {
        if (metaBlkDimDbg.h < metaBlkDimDbg.w)
        {
            metaBlkDimDbg.h <<= 1;
        }
        else
        {
            metaBlkDimDbg.w <<= 1;
        }
    }
    ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
#endif

    UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
    UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
    UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);

    UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;

    if (m_settings.metaBaseAlignFix)
    {
        sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
    }

    pOut->pitch      = numMetaBlkX * metaBlkDim.w;
    pOut->height     = numMetaBlkY * metaBlkDim.h;
    pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
    pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
    pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);

    pOut->metaBlkWidth = metaBlkDim.w;
    pOut->metaBlkHeight = metaBlkDim.h;

    pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;

    // Get the CMASK address equation (copied from CmaskAddrFromCoord)
    UINT_32 fmaskBpp              = GetFmaskBpp(1, 1);
    UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
    UINT_32 metaBlkWidthLog2      = Log2(pOut->metaBlkWidth);
    UINT_32 metaBlkHeightLog2     = Log2(pOut->metaBlkHeight);

    MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
                                Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
                                metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};

    CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);

    // Generate the CMASK address equation.
    pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
    bool checked = false;
    for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
       CoordTerm &bit = (*eq)[b];

       unsigned c;
       for (c = 0; c < bit.getsize(); c++) {
          Coordinate &coord = bit[c];
          pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
          pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
       }
       for (; c < 5; c++)
          pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
    }

    // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
    for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
       CoordTerm &prev = (*eq)[b - 1];
       CoordTerm &cur = (*eq)[b];

       if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
          prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
          prev[0].getord() + 1 == cur[0].getord())
          pOut->equation.gfx9.num_bits = b;
       else
          break;
    }

    pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
                                                                   pIn->swizzleMode);

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx9Lib::GetMetaMipInfo
*
*   @brief
*       Get meta mip info
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::GetMetaMipInfo(
    UINT_32 numMipLevels,           ///< [in]  number of mip levels
    Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
    BOOL_32 dataThick,              ///< [in]  data surface is thick
    ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
    UINT_32 mip0Width,              ///< [in]  mip0 width
    UINT_32 mip0Height,             ///< [in]  mip0 height
    UINT_32 mip0Depth,              ///< [in]  mip0 depth
    UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
    UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
    UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
    const
{
    UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
    UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
    UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
    UINT_32 tailWidth   = pMetaBlkDim->w;
    UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
    UINT_32 tailDepth   = pMetaBlkDim->d;
    BOOL_32 inTail      = FALSE;
    AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;

    if (numMipLevels > 1)
    {
        if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
        {
            // Z major
            major = ADDR_MAJOR_Z;
        }
        else if (numMetaBlkX >= numMetaBlkY)
        {
            // X major
            major = ADDR_MAJOR_X;
        }
        else
        {
            // Y major
            major = ADDR_MAJOR_Y;
        }

        inTail = ((mip0Width <= tailWidth) &&
                  (mip0Height <= tailHeight) &&
                  ((dataThick == FALSE) || (mip0Depth <= tailDepth)));

        if (inTail == FALSE)
        {
            UINT_32 orderLimit;
            UINT_32 *pMipDim;
            UINT_32 *pOrderDim;

            if (major == ADDR_MAJOR_Z)
            {
                // Z major
                pMipDim = &numMetaBlkY;
                pOrderDim = &numMetaBlkZ;
                orderLimit = 4;
            }
            else if (major == ADDR_MAJOR_X)
            {
                // X major
                pMipDim = &numMetaBlkY;
                pOrderDim = &numMetaBlkX;
                orderLimit = 4;
            }
            else
            {
                // Y major
                pMipDim = &numMetaBlkX;
                pOrderDim = &numMetaBlkY;
                orderLimit = 2;
            }

            if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
            {
                *pMipDim += 2;
            }
            else
            {
                *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
            }
        }
    }

    if (pInfo != NULL)
    {
        UINT_32 mipWidth  = mip0Width;
        UINT_32 mipHeight = mip0Height;
        UINT_32 mipDepth  = mip0Depth;
        Dim3d   mipCoord  = {0};

        for (UINT_32 mip = 0; mip < numMipLevels; mip++)
        {
            if (inTail)
            {
                GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
                                   pMetaBlkDim);
                break;
            }
            else
            {
                mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
                mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
                mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);

                pInfo[mip].inMiptail = FALSE;
                pInfo[mip].startX = mipCoord.w;
                pInfo[mip].startY = mipCoord.h;
                pInfo[mip].startZ = mipCoord.d;
                pInfo[mip].width  = mipWidth;
                pInfo[mip].height = mipHeight;
                pInfo[mip].depth  = dataThick ? mipDepth : 1;

                if ((mip >= 3) || (mip & 1))
                {
                    switch (major)
                    {
                        case ADDR_MAJOR_X:
                            mipCoord.w += mipWidth;
                            break;
                        case ADDR_MAJOR_Y:
                            mipCoord.h += mipHeight;
                            break;
                        case ADDR_MAJOR_Z:
                            mipCoord.d += mipDepth;
                            break;
                        default:
                            break;
                    }
                }
                else
                {
                    switch (major)
                    {
                        case ADDR_MAJOR_X:
                            mipCoord.h += mipHeight;
                            break;
                        case ADDR_MAJOR_Y:
                            mipCoord.w += mipWidth;
                            break;
                        case ADDR_MAJOR_Z:
                            mipCoord.h += mipHeight;
                            break;
                        default:
                            break;
                    }
                }

                mipWidth  = Max(mipWidth >> 1, 1u);
                mipHeight = Max(mipHeight >> 1, 1u);
                mipDepth = Max(mipDepth >> 1, 1u);

                inTail = ((mipWidth <= tailWidth) &&
                          (mipHeight <= tailHeight) &&
                          ((dataThick == FALSE) || (mipDepth <= tailDepth)));
            }
        }
    }

    *pNumMetaBlkX = numMetaBlkX;
    *pNumMetaBlkY = numMetaBlkY;
    *pNumMetaBlkZ = numMetaBlkZ;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeDccInfo
*
*   @brief
*       Interface function to compute DCC key info
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
    ) const
{
    BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
    BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
    BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;

    if (dataLinear)
    {
        metaLinear = TRUE;
    }
    else if (metaLinear == TRUE)
    {
        pipeAligned = FALSE;
    }

    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);

    if (metaLinear)
    {
        // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
        ADDR_ASSERT_ALWAYS();

        pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
        pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
    }
    else
    {
        BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);

        UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;

        UINT_32 numFrags = Max(pIn->numFrags, 1u);
        UINT_32 numSlices = Max(pIn->numSlices, 1u);

        minMetaBlkSize /= numFrags;

        UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;

        UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;

        if ((numPipeTotal > 1) || (numRbTotal > 1))
        {
            const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);

            numCompressBlkPerMetaBlk =
                Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));

            if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
            {
                numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
            }
        }

        Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
        Dim3d metaBlkDim = compressBlkDim;

        for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
        {
            if ((metaBlkDim.h < metaBlkDim.w) ||
                ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
            {
                if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
                {
                    metaBlkDim.h <<= 1;
                }
                else
                {
                    metaBlkDim.d <<= 1;
                }
            }
            else
            {
                if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
                {
                    metaBlkDim.w <<= 1;
                }
                else
                {
                    metaBlkDim.d <<= 1;
                }
            }
        }

        UINT_32 numMetaBlkX;
        UINT_32 numMetaBlkY;
        UINT_32 numMetaBlkZ;

        GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
                       pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
                       &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);

        UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;

        if (numFrags > m_maxCompFrag)
        {
            sizeAlign *= (numFrags / m_maxCompFrag);
        }

        if (m_settings.metaBaseAlignFix)
        {
            sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
        }

        pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
                           numCompressBlkPerMetaBlk * numFrags;
        pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
        pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);

        pOut->pitch = numMetaBlkX * metaBlkDim.w;
        pOut->height = numMetaBlkY * metaBlkDim.h;
        pOut->depth = numMetaBlkZ * metaBlkDim.d;

        pOut->compressBlkWidth = compressBlkDim.w;
        pOut->compressBlkHeight = compressBlkDim.h;
        pOut->compressBlkDepth = compressBlkDim.d;

        pOut->metaBlkWidth = metaBlkDim.w;
        pOut->metaBlkHeight = metaBlkDim.h;
        pOut->metaBlkDepth = metaBlkDim.d;
        pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;

        pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
        pOut->fastClearSizePerSlice =
            pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);

        // Get the DCC address equation (copied from DccAddrFromCoord)
        UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
        UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
        UINT_32 metaBlkWidthLog2  = Log2(pOut->metaBlkWidth);
        UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
        UINT_32 metaBlkDepthLog2  = Log2(pOut->metaBlkDepth);
        UINT_32 compBlkWidthLog2  = Log2(pOut->compressBlkWidth);
        UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
        UINT_32 compBlkDepthLog2  = Log2(pOut->compressBlkDepth);

        MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
                                     Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
                                     metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
                                     compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};

        CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);

        // Generate the DCC address equation.
        pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
        bool checked = false;
        for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
           CoordTerm &bit = (*eq)[b];

           unsigned c;
           for (c = 0; c < bit.getsize(); c++) {
              Coordinate &coord = bit[c];
              pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
              pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
           }
           for (; c < 5; c++)
              pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
        }

        // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
        for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
           CoordTerm &prev = (*eq)[b - 1];
           CoordTerm &cur = (*eq)[b];

           if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
               prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
               prev[0].getord() + 1 == cur[0].getord())
              pOut->equation.gfx9.num_bits = b;
           else
              break;
        }

        pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
                                                                       pIn->swizzleMode);
    }

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeMaxBaseAlignments
*
*   @brief
*       Gets maximum alignments
*   @return
*       maximum alignments
************************************************************************************************************************
*/
UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
{
    return Size64K;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
*
*   @brief
*       Gets maximum alignments for metadata
*   @return
*       maximum alignments for metadata
************************************************************************************************************************
*/
UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
{
    // Max base alignment for Htile
    const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
    const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;

    // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
    // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
    ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
    const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);

    UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;

    if (maxNumPipeTotal > 2)
    {
        maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
    }

    maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);

    if (m_settings.metaBaseAlignFix)
    {
        maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
    }

    if (m_settings.htileAlignFix)
    {
        maxBaseAlignHtile *= maxNumPipeTotal;
    }

    // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate

    // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
    UINT_32 maxBaseAlignDcc3D = 65536;

    if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
    {
        maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
    }

    // Max base alignment for Msaa Dcc
    UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);

    if (m_settings.metaBaseAlignFix)
    {
        maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
    }

    return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeCmaskAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeCmaskAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
{
    ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
    input.size            = sizeof(input);
    input.cMaskFlags      = pIn->cMaskFlags;
    input.colorFlags      = pIn->colorFlags;
    input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
    input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
    input.numSlices       = Max(pIn->numSlices, 1u);
    input.swizzleMode     = pIn->swizzleMode;
    input.resourceType    = pIn->resourceType;

    ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
    output.size = sizeof(output);

    ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);

    if (returnCode == ADDR_OK)
    {
        UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
        UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
        UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
        UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);

        MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
                                     Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
                                     metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};

        const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);

        UINT_32 xb = pIn->x / output.metaBlkWidth;
        UINT_32 yb = pIn->y / output.metaBlkHeight;
        UINT_32 zb = pIn->slice;

        UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
        UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
        UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;

        UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
        UINT_64 address  = pMetaEq->solve(coords);

        pOut->addr = address >> 1;
        pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);


        UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
                                                           pIn->swizzleMode);

        UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));

        pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeHtileAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeHtileAddrFromCoord
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->numMipLevels > 1)
    {
        returnCode = ADDR_NOTIMPLEMENTED;
    }
    else
    {
        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
        input.size            = sizeof(input);
        input.hTileFlags      = pIn->hTileFlags;
        input.depthFlags      = pIn->depthflags;
        input.swizzleMode     = pIn->swizzleMode;
        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
        input.numSlices       = Max(pIn->numSlices, 1u);
        input.numMipLevels    = Max(pIn->numMipLevels, 1u);

        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
        output.size = sizeof(output);

        returnCode = ComputeHtileInfo(&input, &output);

        if (returnCode == ADDR_OK)
        {
            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
            UINT_32 numSamplesLog2    = Log2(pIn->numSamples);

            MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
                                         Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
                                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};

            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);

            UINT_32 xb = pIn->x / output.metaBlkWidth;
            UINT_32 yb = pIn->y / output.metaBlkHeight;
            UINT_32 zb = pIn->slice;

            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
            UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;

            UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
            UINT_64 address  = pMetaEq->solve(coords);

            pOut->addr = address >> 1;

            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
                                                               pIn->swizzleMode);

            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));

            pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeHtileCoordFromAddr
*
*   @brief
*       Interface function stub of AddrComputeHtileCoordFromAddr
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pIn->numMipLevels > 1)
    {
        returnCode = ADDR_NOTIMPLEMENTED;
    }
    else
    {
        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
        input.size            = sizeof(input);
        input.hTileFlags      = pIn->hTileFlags;
        input.swizzleMode     = pIn->swizzleMode;
        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
        input.numSlices       = Max(pIn->numSlices, 1u);
        input.numMipLevels    = Max(pIn->numMipLevels, 1u);

        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
        output.size = sizeof(output);

        returnCode = ComputeHtileInfo(&input, &output);

        if (returnCode == ADDR_OK)
        {
            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
            UINT_32 numSamplesLog2    = Log2(pIn->numSamples);

            MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
                                         Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
                                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};

            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);

            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
                                                               pIn->swizzleMode);

            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));

            UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;

            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;

            UINT_32 coords[NUM_DIMS];
            pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);

            pOut->slice = coords[DIM_M] / sliceSizeInBlock;
            pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
            pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlSupportComputeDccAddrFromCoord
*
*   @brief
*       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
    {
        returnCode = ADDR_NOTSUPPORTED;
    }
    else if ((pIn->pitch == 0)             ||
             (pIn->height == 0)            ||
             (pIn->compressBlkWidth == 0)  ||
             (pIn->compressBlkHeight == 0) ||
             (pIn->compressBlkDepth == 0)  ||
             (pIn->metaBlkWidth == 0)      ||
             (pIn->metaBlkHeight == 0)     ||
             (pIn->metaBlkDepth == 0)      ||
             (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
    {
        returnCode = ADDR_NOTSUPPORTED;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeDccAddrFromCoord
*
*   @brief
*       Interface function stub of AddrComputeDccAddrFromCoord
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
{
    UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
    UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
    UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
    UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
    UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
    UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
    UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
    UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);

    MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
                                 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
                                 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
                                 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};

    const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);

    UINT_32 xb = pIn->x / pIn->metaBlkWidth;
    UINT_32 yb = pIn->y / pIn->metaBlkHeight;
    UINT_32 zb = pIn->slice / pIn->metaBlkDepth;

    UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
    UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
    UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;

    UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
    UINT_64 address  = pMetaEq->solve(coords);

    pOut->addr = address >> 1;

    UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
                                                       pIn->swizzleMode);

    UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));

    pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlInitGlobalParams
*
*   @brief
*       Initializes global parameters
*
*   @return
*       TRUE if all settings are valid
*
************************************************************************************************************************
*/
BOOL_32 Gfx9Lib::HwlInitGlobalParams(
    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
{
    BOOL_32 valid = TRUE;

    if (m_settings.isArcticIsland)
    {
        GB_ADDR_CONFIG_GFX9 gbAddrConfig;

        gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;

        // These values are copied from CModel code
        switch (gbAddrConfig.bits.NUM_PIPES)
        {
            case ADDR_CONFIG_1_PIPE:
                m_pipes = 1;
                m_pipesLog2 = 0;
                break;
            case ADDR_CONFIG_2_PIPE:
                m_pipes = 2;
                m_pipesLog2 = 1;
                break;
            case ADDR_CONFIG_4_PIPE:
                m_pipes = 4;
                m_pipesLog2 = 2;
                break;
            case ADDR_CONFIG_8_PIPE:
                m_pipes = 8;
                m_pipesLog2 = 3;
                break;
            case ADDR_CONFIG_16_PIPE:
                m_pipes = 16;
                m_pipesLog2 = 4;
                break;
            case ADDR_CONFIG_32_PIPE:
                m_pipes = 32;
                m_pipesLog2 = 5;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
        {
            case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
                m_pipeInterleaveLog2 = 8;
                break;
            case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
                m_pipeInterleaveLog2 = 9;
                break;
            case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
                m_pipeInterleaveLog2 = 10;
                break;
            case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
                m_pipeInterleaveLog2 = 11;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
        // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
        ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);

        switch (gbAddrConfig.bits.NUM_BANKS)
        {
            case ADDR_CONFIG_1_BANK:
                m_banks = 1;
                m_banksLog2 = 0;
                break;
            case ADDR_CONFIG_2_BANK:
                m_banks = 2;
                m_banksLog2 = 1;
                break;
            case ADDR_CONFIG_4_BANK:
                m_banks = 4;
                m_banksLog2 = 2;
                break;
            case ADDR_CONFIG_8_BANK:
                m_banks = 8;
                m_banksLog2 = 3;
                break;
            case ADDR_CONFIG_16_BANK:
                m_banks = 16;
                m_banksLog2 = 4;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
        {
            case ADDR_CONFIG_1_SHADER_ENGINE:
                m_se = 1;
                m_seLog2 = 0;
                break;
            case ADDR_CONFIG_2_SHADER_ENGINE:
                m_se = 2;
                m_seLog2 = 1;
                break;
            case ADDR_CONFIG_4_SHADER_ENGINE:
                m_se = 4;
                m_seLog2 = 2;
                break;
            case ADDR_CONFIG_8_SHADER_ENGINE:
                m_se = 8;
                m_seLog2 = 3;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        switch (gbAddrConfig.bits.NUM_RB_PER_SE)
        {
            case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
                m_rbPerSe = 1;
                m_rbPerSeLog2 = 0;
                break;
            case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
                m_rbPerSe = 2;
                m_rbPerSeLog2 = 1;
                break;
            case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
                m_rbPerSe = 4;
                m_rbPerSeLog2 = 2;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
        {
            case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
                m_maxCompFrag = 1;
                m_maxCompFragLog2 = 0;
                break;
            case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
                m_maxCompFrag = 2;
                m_maxCompFragLog2 = 1;
                break;
            case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
                m_maxCompFrag = 4;
                m_maxCompFragLog2 = 2;
                break;
            case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
                m_maxCompFrag = 8;
                m_maxCompFragLog2 = 3;
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                break;
        }

        if ((m_rbPerSeLog2 == 1) &&
            (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
             ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
        {
            ADDR_ASSERT(m_settings.isVega10 == FALSE);

            ADDR_ASSERT(m_settings.isRaven == FALSE);

            ADDR_ASSERT(m_settings.isVega20 == FALSE);

            if (m_settings.isVega12)
            {
                m_settings.htileCacheRbConflict = 1;
            }
        }

        // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
        m_blockVarSizeLog2 = 0;
    }
    else
    {
        valid = FALSE;
        ADDR_NOT_IMPLEMENTED();
    }

    if (valid)
    {
        InitEquationTable();
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlConvertChipFamily
*
*   @brief
*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
*   @return
*       ChipFamily
************************************************************************************************************************
*/
ChipFamily Gfx9Lib::HwlConvertChipFamily(
    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
{
    ChipFamily family = ADDR_CHIP_FAMILY_AI;

    switch (uChipFamily)
    {
        case FAMILY_AI:
            m_settings.isArcticIsland = 1;
            m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
            m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
            m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
            m_settings.isDce12 = 1;

            if (m_settings.isVega10 == 0)
            {
                m_settings.htileAlignFix = 1;
                m_settings.applyAliasFix = 1;
            }

            m_settings.metaBaseAlignFix = 1;

            m_settings.depthPipeXorDisable = 1;
            break;
        case FAMILY_RV:
            m_settings.isArcticIsland = 1;

            if (ASICREV_IS_RAVEN(uChipRevision))
            {
                m_settings.isRaven = 1;

                m_settings.depthPipeXorDisable = 1;
            }

            if (ASICREV_IS_RAVEN2(uChipRevision))
            {
                m_settings.isRaven = 1;
            }

            if (m_settings.isRaven == 0)
            {
                m_settings.htileAlignFix = 1;
                m_settings.applyAliasFix = 1;
            }

            m_settings.isDcn1 = m_settings.isRaven;

            if (ASICREV_IS_RENOIR(uChipRevision))
            {
                m_settings.isRaven = 1;
                m_settings.isDcn2  = 1;
            }

            m_settings.metaBaseAlignFix = 1;
            break;

        default:
            ADDR_ASSERT(!"No Chip found");
            break;
    }

    return family;
}

/**
************************************************************************************************************************
*   Gfx9Lib::InitRbEquation
*
*   @brief
*       Init RB equation
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::GetRbEquation(
    CoordEq* pRbEq,             ///< [out] rb equation
    UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
    UINT_32  numSeLog2)         ///< [in] number of shader engine
    const
{
    // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
    UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
    Coordinate cx(DIM_X, rbRegion);
    Coordinate cy(DIM_Y, rbRegion);

    UINT_32 start = 0;
    UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;

    // Clear the rb equation
    pRbEq->resize(0);
    pRbEq->resize(numRbTotalLog2);

    if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
    {
        // Special case when more than 1 SE, and 2 RB per SE
        (*pRbEq)[0].add(cx);
        (*pRbEq)[0].add(cy);
        cx++;
        cy++;

        if (m_settings.applyAliasFix == false)
        {
            (*pRbEq)[0].add(cy);
        }

        (*pRbEq)[0].add(cy);
        start++;
    }

    UINT_32 numBits = 2 * (numRbTotalLog2 - start);

    for (UINT_32 i = 0; i < numBits; i++)
    {
        UINT_32 idx =
            start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);

        if ((i % 2) == 1)
        {
            (*pRbEq)[idx].add(cx);
            cx++;
        }
        else
        {
            (*pRbEq)[idx].add(cy);
            cy++;
        }
    }
}

/**
************************************************************************************************************************
*   Gfx9Lib::GetDataEquation
*
*   @brief
*       Get data equation for fmask and Z
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::GetDataEquation(
    CoordEq* pDataEq,               ///< [out] data surface equation
    Gfx9DataType dataSurfaceType,   ///< [in] data surface type
    AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
    AddrResourceType resourceType,  ///< [in] data surface resource type
    UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
    UINT_32 numSamplesLog2)         ///< [in] data surface sample count
    const
{
    Coordinate cx(DIM_X, 0);
    Coordinate cy(DIM_Y, 0);
    Coordinate cz(DIM_Z, 0);
    Coordinate cs(DIM_S, 0);

    // Clear the equation
    pDataEq->resize(0);
    pDataEq->resize(27);

    if (dataSurfaceType == Gfx9DataColor)
    {
        if (IsLinear(swizzleMode))
        {
            Coordinate cm(DIM_M, 0);

            pDataEq->resize(49);

            for (UINT_32 i = 0; i < 49; i++)
            {
                (*pDataEq)[i].add(cm);
                cm++;
            }
        }
        else if (IsThick(resourceType, swizzleMode))
        {
            // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
            UINT_32 i;
            if (IsStandardSwizzle(resourceType, swizzleMode))
            {
                // Standard 3d swizzle
                // Fill in bottom x bits
                for (i = elementBytesLog2; i < 4; i++)
                {
                    (*pDataEq)[i].add(cx);
                    cx++;
                }
                // Fill in 2 bits of y and then z
                for (i = 4; i < 6; i++)
                {
                    (*pDataEq)[i].add(cy);
                    cy++;
                }
                for (i = 6; i < 8; i++)
                {
                    (*pDataEq)[i].add(cz);
                    cz++;
                }
                if (elementBytesLog2 < 2)
                {
                    // fill in z & y bit
                    (*pDataEq)[8].add(cz);
                    (*pDataEq)[9].add(cy);
                    cz++;
                    cy++;
                }
                else if (elementBytesLog2 == 2)
                {
                    // fill in y and x bit
                    (*pDataEq)[8].add(cy);
                    (*pDataEq)[9].add(cx);
                    cy++;
                    cx++;
                }
                else
                {
                    // fill in 2 x bits
                    (*pDataEq)[8].add(cx);
                    cx++;
                    (*pDataEq)[9].add(cx);
                    cx++;
                }
            }
            else
            {
                // Z 3d swizzle
                UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
                UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
                                2 : ((elementBytesLog2 == 1) ? 3 : 1);
                pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
                for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
                {
                    (*pDataEq)[i].add(cz);
                    cz++;
                }
                if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
                {
                    // add an x and z
                    (*pDataEq)[6].add(cx);
                    (*pDataEq)[7].add(cz);
                    cx++;
                    cz++;
                }
                else if (elementBytesLog2 == 2)
                {
                    // add a y and z
                    (*pDataEq)[6].add(cy);
                    (*pDataEq)[7].add(cz);
                    cy++;
                    cz++;
                }
                // add y and x
                (*pDataEq)[8].add(cy);
                (*pDataEq)[9].add(cx);
                cy++;
                cx++;
            }
            // Fill in bit 10 and up
            pDataEq->mort3d( cz, cy, cx, 10 );
        }
        else if (IsThin(resourceType, swizzleMode))
        {
            UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
            // Color 2D
            UINT_32 microYBits = (8 - elementBytesLog2) / 2;
            UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
            UINT_32 i;
            // Fill in bottom x bits
            for (i = elementBytesLog2; i < 4; i++)
            {
                (*pDataEq)[i].add(cx);
                cx++;
            }
            // Fill in bottom y bits
            for (i = 4; i < 4 + microYBits; i++)
            {
                (*pDataEq)[i].add(cy);
                cy++;
            }
            // Fill in last of the micro_x bits
            for (i = 4 + microYBits; i < 8; i++)
            {
                (*pDataEq)[i].add(cx);
                cx++;
            }
            // Fill in x/y bits below sample split
            pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
            // Fill in sample bits
            for (i = 0; i < numSamplesLog2; i++)
            {
                cs.set(DIM_S, i);
                (*pDataEq)[tileSplitStart + i].add(cs);
            }
            // Fill in x/y bits above sample split
            if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
            {
                pDataEq->mort2d(cx, cy, blockSizeLog2);
            }
            else
            {
                pDataEq->mort2d(cy, cx, blockSizeLog2);
            }
        }
        else
        {
            ADDR_ASSERT_ALWAYS();
        }
    }
    else
    {
        // Fmask or depth
        UINT_32 sampleStart = elementBytesLog2;
        UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
        UINT_32 ymajStart = 6 + numSamplesLog2;

        for (UINT_32 s = 0; s < numSamplesLog2; s++)
        {
            cs.set(DIM_S, s);
            (*pDataEq)[sampleStart + s].add(cs);
        }

        // Put in the x-major order pixel bits
        pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
        // Put in the y-major order pixel bits
        pDataEq->mort2d(cy, cx, ymajStart);
    }
}

/**
************************************************************************************************************************
*   Gfx9Lib::GetPipeEquation
*
*   @brief
*       Get pipe equation
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::GetPipeEquation(
    CoordEq*         pPipeEq,            ///< [out] pipe equation
    CoordEq*         pDataEq,            ///< [in] data equation
    UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
    UINT_32          numPipeLog2,        ///< [in] number of pipes
    UINT_32          numSamplesLog2,     ///< [in] data surface sample count
    Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
    AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
    AddrResourceType resourceType        ///< [in] data surface resource type
    ) const
{
    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
    CoordEq dataEq;

    pDataEq->copy(dataEq);

    if (dataSurfaceType == Gfx9DataColor)
    {
        INT_32 shift = static_cast<INT_32>(numSamplesLog2);
        dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
    }

    dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);

    // This section should only apply to z/stencil, maybe fmask
    // If the pipe bit is below the comp block size,
    // then keep moving up the address until we find a bit that is above
    UINT_32 pipeStart = 0;

    if (dataSurfaceType != Gfx9DataColor)
    {
        Coordinate tileMin(DIM_X, 3);

        while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
        {
            pipeStart++;
        }

        // if pipe is 0, then the first pipe bit is above the comp block size,
        // so we don't need to do anything
        // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
        // we will get the same pipe equation
        if (pipeStart != 0)
        {
            for (UINT_32 i = 0; i < numPipeLog2; i++)
            {
                // Copy the jth bit above pipe interleave to the current pipe equation bit
                dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
            }
        }
    }

    if (IsPrt(swizzleMode))
    {
        // Clear out bits above the block size if prt's are enabled
        dataEq.resize(blockSizeLog2);
        dataEq.resize(48);
    }

    if (IsXor(swizzleMode))
    {
        CoordEq xorMask;

        if (IsThick(resourceType, swizzleMode))
        {
            CoordEq xorMask2;

            dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);

            xorMask.resize(numPipeLog2);

            for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
            {
                xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
                xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
            }
        }
        else
        {
            // Xor in the bits above the pipe+gpu bits
            dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);

            if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
            {
                Coordinate co;
                CoordEq xorMask2;
                // if 1xaa and not prt, then xor in the z bits
                xorMask2.resize(0);
                xorMask2.resize(numPipeLog2);
                for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
                {
                    co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
                    xorMask2[pipeIdx].add(co);
                }

                pPipeEq->xorin(xorMask2);
            }
        }

        xorMask.reverse();
        pPipeEq->xorin(xorMask);
    }
}
/**
************************************************************************************************************************
*   Gfx9Lib::GetMetaEquation
*
*   @brief
*       Get meta equation for cmask/htile/DCC
*   @return
*       Pointer to a calculated meta equation
************************************************************************************************************************
*/
const CoordEq* Gfx9Lib::GetMetaEquation(
    const MetaEqParams& metaEqParams)
{
    UINT_32 cachedMetaEqIndex;

    for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
    {
        if (memcmp(&metaEqParams,
                   &m_cachedMetaEqKey[cachedMetaEqIndex],
                   static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
        {
            break;
        }
    }

    CoordEq* pMetaEq = NULL;

    if (cachedMetaEqIndex < MaxCachedMetaEq)
    {
        pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
    }
    else
    {
        m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;

        pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];

        m_metaEqOverrideIndex %= MaxCachedMetaEq;

        GenMetaEquation(pMetaEq,
                        metaEqParams.maxMip,
                        metaEqParams.elementBytesLog2,
                        metaEqParams.numSamplesLog2,
                        metaEqParams.metaFlag,
                        metaEqParams.dataSurfaceType,
                        metaEqParams.swizzleMode,
                        metaEqParams.resourceType,
                        metaEqParams.metaBlkWidthLog2,
                        metaEqParams.metaBlkHeightLog2,
                        metaEqParams.metaBlkDepthLog2,
                        metaEqParams.compBlkWidthLog2,
                        metaEqParams.compBlkHeightLog2,
                        metaEqParams.compBlkDepthLog2);
    }

    return pMetaEq;
}

/**
************************************************************************************************************************
*   Gfx9Lib::GenMetaEquation
*
*   @brief
*       Get meta equation for cmask/htile/DCC
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::GenMetaEquation(
    CoordEq*         pMetaEq,               ///< [out] meta equation
    UINT_32          maxMip,                ///< [in] max mip Id
    UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
    UINT_32          numSamplesLog2,        ///< [in] data surface sample count
    ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
    Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
    AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
    AddrResourceType resourceType,          ///< [in] data surface resource type
    UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
    UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
    UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
    UINT_32          compBlkWidthLog2,      ///< [in] compress block width
    UINT_32          compBlkHeightLog2,     ///< [in] compress block height
    UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
    const
{
    UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
    UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;

    // Get the correct data address and rb equation
    CoordEq dataEq;
    GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
                    elementBytesLog2, numSamplesLog2);

    // Get pipe and rb equations
    CoordEq pipeEquation;
    GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
                    numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
    numPipeTotalLog2 = pipeEquation.getsize();

    if (metaFlag.linear)
    {
        // Linear metadata supporting was removed for GFX9! No one can use this feature.
        ADDR_ASSERT_ALWAYS();

        ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);

        dataEq.copy(*pMetaEq);

        if (IsLinear(swizzleMode))
        {
            if (metaFlag.pipeAligned)
            {
                // Remove the pipe bits
                INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
                pMetaEq->shift(-shift, pipeInterleaveLog2);
            }
            // Divide by comp block size, which for linear (which is always color) is 256 B
            pMetaEq->shift(-8);

            if (metaFlag.pipeAligned)
            {
                // Put pipe bits back in
                pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);

                for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
                {
                    pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
                }
            }
        }

        pMetaEq->shift(1);
    }
    else
    {
        UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
        UINT_32 compFragLog2 =
            ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
            maxCompFragLog2 : numSamplesLog2;

        UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;

        // Make sure the metaaddr is cleared
        pMetaEq->resize(0);
        pMetaEq->resize(27);

        if (IsThick(resourceType, swizzleMode))
        {
            Coordinate cx(DIM_X, 0);
            Coordinate cy(DIM_Y, 0);
            Coordinate cz(DIM_Z, 0);

            if (maxMip > 0)
            {
                pMetaEq->mort3d(cy, cx, cz);
            }
            else
            {
                pMetaEq->mort3d(cx, cy, cz);
            }
        }
        else
        {
            Coordinate cx(DIM_X, 0);
            Coordinate cy(DIM_Y, 0);
            Coordinate cs;

            if (maxMip > 0)
            {
                pMetaEq->mort2d(cy, cx, compFragLog2);
            }
            else
            {
                pMetaEq->mort2d(cx, cy, compFragLog2);
            }

            //------------------------------------------------------------------------------------------------------------------------
            // Put the compressible fragments at the lsb
            // the uncompressible frags will be at the msb of the micro address
            //------------------------------------------------------------------------------------------------------------------------
            for (UINT_32 s = 0; s < compFragLog2; s++)
            {
                cs.set(DIM_S, s);
                (*pMetaEq)[s].add(cs);
            }
        }

        // Keep a copy of the pipe equations
        CoordEq origPipeEquation;
        pipeEquation.copy(origPipeEquation);

        Coordinate co;
        // filter out everything under the compressed block size
        co.set(DIM_X, compBlkWidthLog2);
        pMetaEq->Filter('<', co, 0, DIM_X);
        co.set(DIM_Y, compBlkHeightLog2);
        pMetaEq->Filter('<', co, 0, DIM_Y);
        co.set(DIM_Z, compBlkDepthLog2);
        pMetaEq->Filter('<', co, 0, DIM_Z);

        // For non-color, filter out sample bits
        if (dataSurfaceType != Gfx9DataColor)
        {
            co.set(DIM_X, 0);
            pMetaEq->Filter('<', co, 0, DIM_S);
        }

        // filter out everything above the metablock size
        co.set(DIM_X, metaBlkWidthLog2 - 1);
        pMetaEq->Filter('>', co, 0, DIM_X);
        co.set(DIM_Y, metaBlkHeightLog2 - 1);
        pMetaEq->Filter('>', co, 0, DIM_Y);
        co.set(DIM_Z, metaBlkDepthLog2 - 1);
        pMetaEq->Filter('>', co, 0, DIM_Z);

        // filter out everything above the metablock size for the channel bits
        co.set(DIM_X, metaBlkWidthLog2 - 1);
        pipeEquation.Filter('>', co, 0, DIM_X);
        co.set(DIM_Y, metaBlkHeightLog2 - 1);
        pipeEquation.Filter('>', co, 0, DIM_Y);
        co.set(DIM_Z, metaBlkDepthLog2 - 1);
        pipeEquation.Filter('>', co, 0, DIM_Z);

        // Make sure we still have the same number of channel bits
        if (pipeEquation.getsize() != numPipeTotalLog2)
        {
            ADDR_ASSERT_ALWAYS();
        }

        // Loop through all channel and rb bits,
        // and make sure these components exist in the metadata address
        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
        {
            for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
            {
                if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
                {
                    ADDR_ASSERT_ALWAYS();
                }
            }
        }

        const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
        const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
        const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
        CoordEq       origRbEquation;

        GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);

        CoordEq rbEquation = origRbEquation;

        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
        {
            for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
            {
                if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
                {
                    ADDR_ASSERT_ALWAYS();
                }
            }
        }

        if (m_settings.applyAliasFix)
        {
            co.set(DIM_Z, -1);
        }

        // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
        {
            for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
            {
                BOOL_32 isRbEquationInPipeEquation = FALSE;

                if (m_settings.applyAliasFix)
                {
                    CoordTerm filteredPipeEq;
                    filteredPipeEq = pipeEquation[j];

                    filteredPipeEq.Filter('>', co, 0, DIM_Z);

                    isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
                }
                else
                {
                    isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
                }

                if (isRbEquationInPipeEquation)
                {
                    rbEquation[i].Clear();
                }
            }
        }

         bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};

        // Loop through each bit of the channel, get the smallest coordinate,
        // and remove it from the metaaddr, and rb_equation
        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
        {
            pipeEquation[i].getsmallest(co);

            UINT_32 old_size = pMetaEq->getsize();
            pMetaEq->Filter('=', co);
            UINT_32 new_size = pMetaEq->getsize();
            if (new_size != old_size-1)
            {
                ADDR_ASSERT_ALWAYS();
            }
            pipeEquation.remove(co);
            for (UINT_32 j = 0; j < numRbTotalLog2; j++)
            {
                if (rbEquation[j].remove(co))
                {
                    // if we actually removed something from this bit, then add the remaining
                    // channel bits, as these can be removed for this bit
                    for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
                    {
                        if (pipeEquation[i][k] != co)
                        {
                            rbEquation[j].add(pipeEquation[i][k]);
                            rbAppendedWithPipeBits[j] = true;
                        }
                    }
                }
            }
        }

        // Loop through the rb bits and see what remain;
        // filter out the smallest coordinate if it remains
        UINT_32 rbBitsLeft = 0;
        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
        {
            BOOL_32 isRbEqAppended = FALSE;

            if (m_settings.applyAliasFix)
            {
                isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
            }
            else
            {
                isRbEqAppended = (rbEquation[i].getsize() > 0);
            }

            if (isRbEqAppended)
            {
                rbBitsLeft++;
                rbEquation[i].getsmallest(co);
                UINT_32 old_size = pMetaEq->getsize();
                pMetaEq->Filter('=', co);
                UINT_32 new_size = pMetaEq->getsize();
                if (new_size != old_size - 1)
                {
                    // assert warning
                }
                for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
                {
                    if (rbEquation[j].remove(co))
                    {
                        // if we actually removed something from this bit, then add the remaining
                        // rb bits, as these can be removed for this bit
                        for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
                        {
                            if (rbEquation[i][k] != co)
                            {
                                rbEquation[j].add(rbEquation[i][k]);
                                rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
                            }
                        }
                    }
                }
            }
        }

        // capture the size of the metaaddr
        UINT_32 metaSize = pMetaEq->getsize();
        // resize to 49 bits...make this a nibble address
        pMetaEq->resize(49);
        // Concatenate the macro address above the current address
        for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
        {
            co.set(DIM_M, j);
            (*pMetaEq)[i].add(co);
        }

        // Multiply by meta element size (in nibbles)
        if (dataSurfaceType == Gfx9DataColor)
        {
            pMetaEq->shift(1);
        }
        else if (dataSurfaceType == Gfx9DataDepthStencil)
        {
            pMetaEq->shift(3);
        }

        //------------------------------------------------------------------------------------------
        // Note the pipeInterleaveLog2+1 is because address is a nibble address
        // Shift up from pipe interleave number of channel
        // and rb bits left, and uncompressed fragments
        //------------------------------------------------------------------------------------------

        pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);

        // Put in the channel bits
        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
        {
            origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
        }

        // Put in remaining rb bits
        for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
        {
            BOOL_32 isRbEqAppended = FALSE;

            if (m_settings.applyAliasFix)
            {
                isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
            }
            else
            {
                isRbEqAppended = (rbEquation[i].getsize() > 0);
            }

            if (isRbEqAppended)
            {
                origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
                // Mark any rb bit we add in to the rb mask
                j++;
            }
        }

        //------------------------------------------------------------------------------------------
        // Put in the uncompressed fragment bits
        //------------------------------------------------------------------------------------------
        for (UINT_32 i = 0; i < uncompFragLog2; i++)
        {
            co.set(DIM_S, compFragLog2 + i);
            (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
        }
    }
}

/**
************************************************************************************************************************
*   Gfx9Lib::IsEquationSupported
*
*   @brief
*       Check if equation is supported for given swizzle mode and resource type.
*
*   @return
*       TRUE if supported
************************************************************************************************************************
*/
BOOL_32 Gfx9Lib::IsEquationSupported(
    AddrResourceType rsrcType,
    AddrSwizzleMode  swMode,
    UINT_32          elementBytesLog2) const
{
    BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
                        (IsValidSwMode(swMode) == TRUE) &&
                        (IsLinear(swMode) == FALSE) &&
                        (((IsTex2d(rsrcType) == TRUE) &&
                          ((elementBytesLog2 < 4) ||
                           ((IsRotateSwizzle(swMode) == FALSE) &&
                            (IsZOrderSwizzle(swMode) == FALSE)))) ||
                         ((IsTex3d(rsrcType) == TRUE) &&
                          (IsRotateSwizzle(swMode) == FALSE) &&
                          (IsBlock256b(swMode) == FALSE)));

    return supported;
}

/**
************************************************************************************************************************
*   Gfx9Lib::InitEquationTable
*
*   @brief
*       Initialize Equation table.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::InitEquationTable()
{
    memset(m_equationTable, 0, sizeof(m_equationTable));

    // Loop all possible resource type (2D/3D)
    for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
    {
        AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);

        // Loop all possible swizzle mode
        for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
        {
            AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);

            // Loop all possible bpp
            for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
            {
                UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;

                // Check if the input is supported
                if (IsEquationSupported(rsrcType, swMode, bppIdx))
                {
                    ADDR_EQUATION     equation;
                    ADDR_E_RETURNCODE retCode;

                    memset(&equation, 0, sizeof(ADDR_EQUATION));

                    // Generate the equation
                    if (IsBlock256b(swMode) && IsTex2d(rsrcType))
                    {
                        retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
                    }
                    else if (IsThin(rsrcType, swMode))
                    {
                        retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
                    }
                    else
                    {
                        retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
                    }

                    // Only fill the equation into the table if the return code is ADDR_OK,
                    // otherwise if the return code is not ADDR_OK, it indicates this is not
                    // a valid input, we do nothing but just fill invalid equation index
                    // into the lookup table.
                    if (retCode == ADDR_OK)
                    {
                        equationIndex = m_numEquations;
                        ADDR_ASSERT(equationIndex < EquationTableSize);

                        m_equationTable[equationIndex] = equation;

                        m_numEquations++;
                    }
                    else
                    {
                        ADDR_ASSERT_ALWAYS();
                    }
                }

                // Fill the index into the lookup table, if the combination is not supported
                // fill the invalid equation index
                m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
            }
        }
    }
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlGetEquationIndex
*
*   @brief
*       Interface function stub of GetEquationIndex
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
UINT_32 Gfx9Lib::HwlGetEquationIndex(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
    ) const
{
    AddrResourceType rsrcType         = pIn->resourceType;
    AddrSwizzleMode  swMode           = pIn->swizzleMode;
    UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
    UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;

    if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
    {
        UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
        UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);

        index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
    }

    if (pOut->pMipInfo != NULL)
    {
        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
        {
            pOut->pMipInfo[i].equationIndex = index;
        }
    }

    return index;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeBlock256Equation
*
*   @brief
*       Interface function stub of ComputeBlock256Equation
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
    AddrResourceType rsrcType,
    AddrSwizzleMode  swMode,
    UINT_32          elementBytesLog2,
    ADDR_EQUATION*   pEquation) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    pEquation->numBits = 8;
    pEquation->numBitComponents = 1;

    UINT_32 i = 0;
    for (; i < elementBytesLog2; i++)
    {
        InitChannel(1, 0 , i, &pEquation->addr[i]);
    }

    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];

    const UINT_32 maxBitsUsed = 4;
    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};

    for (i = 0; i < maxBitsUsed; i++)
    {
        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
        InitChannel(1, 1, i, &y[i]);
    }

    if (IsStandardSwizzle(rsrcType, swMode))
    {
        switch (elementBytesLog2)
        {
            case 0:
                pixelBit[0] = x[0];
                pixelBit[1] = x[1];
                pixelBit[2] = x[2];
                pixelBit[3] = x[3];
                pixelBit[4] = y[0];
                pixelBit[5] = y[1];
                pixelBit[6] = y[2];
                pixelBit[7] = y[3];
                break;
            case 1:
                pixelBit[0] = x[0];
                pixelBit[1] = x[1];
                pixelBit[2] = x[2];
                pixelBit[3] = y[0];
                pixelBit[4] = y[1];
                pixelBit[5] = y[2];
                pixelBit[6] = x[3];
                break;
            case 2:
                pixelBit[0] = x[0];
                pixelBit[1] = x[1];
                pixelBit[2] = y[0];
                pixelBit[3] = y[1];
                pixelBit[4] = y[2];
                pixelBit[5] = x[2];
                break;
            case 3:
                pixelBit[0] = x[0];
                pixelBit[1] = y[0];
                pixelBit[2] = y[1];
                pixelBit[3] = x[1];
                pixelBit[4] = x[2];
                break;
            case 4:
                pixelBit[0] = y[0];
                pixelBit[1] = y[1];
                pixelBit[2] = x[0];
                pixelBit[3] = x[1];
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                ret = ADDR_INVALIDPARAMS;
                break;
        }
    }
    else if (IsDisplaySwizzle(rsrcType, swMode))
    {
        switch (elementBytesLog2)
        {
            case 0:
                pixelBit[0] = x[0];
                pixelBit[1] = x[1];
                pixelBit[2] = x[2];
                pixelBit[3] = y[1];
                pixelBit[4] = y[0];
                pixelBit[5] = y[2];
                pixelBit[6] = x[3];
                pixelBit[7] = y[3];
                break;
            case 1:
                pixelBit[0] = x[0];
                pixelBit[1] = x[1];
                pixelBit[2] = x[2];
                pixelBit[3] = y[0];
                pixelBit[4] = y[1];
                pixelBit[5] = y[2];
                pixelBit[6] = x[3];
                break;
            case 2:
                pixelBit[0] = x[0];
                pixelBit[1] = x[1];
                pixelBit[2] = y[0];
                pixelBit[3] = x[2];
                pixelBit[4] = y[1];
                pixelBit[5] = y[2];
                break;
            case 3:
                pixelBit[0] = x[0];
                pixelBit[1] = y[0];
                pixelBit[2] = x[1];
                pixelBit[3] = x[2];
                pixelBit[4] = y[1];
                break;
            case 4:
                pixelBit[0] = x[0];
                pixelBit[1] = y[0];
                pixelBit[2] = x[1];
                pixelBit[3] = y[1];
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                ret = ADDR_INVALIDPARAMS;
                break;
        }
    }
    else if (IsRotateSwizzle(swMode))
    {
        switch (elementBytesLog2)
        {
            case 0:
                pixelBit[0] = y[0];
                pixelBit[1] = y[1];
                pixelBit[2] = y[2];
                pixelBit[3] = x[1];
                pixelBit[4] = x[0];
                pixelBit[5] = x[2];
                pixelBit[6] = x[3];
                pixelBit[7] = y[3];
                break;
            case 1:
                pixelBit[0] = y[0];
                pixelBit[1] = y[1];
                pixelBit[2] = y[2];
                pixelBit[3] = x[0];
                pixelBit[4] = x[1];
                pixelBit[5] = x[2];
                pixelBit[6] = x[3];
                break;
            case 2:
                pixelBit[0] = y[0];
                pixelBit[1] = y[1];
                pixelBit[2] = x[0];
                pixelBit[3] = y[2];
                pixelBit[4] = x[1];
                pixelBit[5] = x[2];
                break;
            case 3:
                pixelBit[0] = y[0];
                pixelBit[1] = x[0];
                pixelBit[2] = y[1];
                pixelBit[3] = x[1];
                pixelBit[4] = x[2];
                break;
            default:
                ADDR_ASSERT_ALWAYS();
            case 4:
                ret = ADDR_INVALIDPARAMS;
                break;
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        ret = ADDR_INVALIDPARAMS;
    }

    // Post validation
    if (ret == ADDR_OK)
    {
        Dim2d microBlockDim = Block256_2d[elementBytesLog2];
        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
                    (microBlockDim.w * (1 << elementBytesLog2)));
        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeThinEquation
*
*   @brief
*       Interface function stub of ComputeThinEquation
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
    AddrResourceType rsrcType,
    AddrSwizzleMode  swMode,
    UINT_32          elementBytesLog2,
    ADDR_EQUATION*   pEquation) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);

    UINT_32 maxXorBits = blockSizeLog2;
    if (IsNonPrtXor(swMode))
    {
        // For non-prt-xor, maybe need to initialize some more bits for xor
        // The highest xor bit used in equation will be max the following 3 items:
        // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
        // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
        // 3. blockSizeLog2

        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
                                     GetPipeXorBits(blockSizeLog2) +
                                     2 * GetBankXorBits(blockSizeLog2));
    }

    const UINT_32 maxBitsUsed = 14;
    ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};

    const UINT_32 extraXorBits = 16;
    ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
    ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};

    for (UINT_32 i = 0; i < maxBitsUsed; i++)
    {
        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
        InitChannel(1, 1, i, &y[i]);
    }

    ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;

    for (UINT_32 i = 0; i < elementBytesLog2; i++)
    {
        InitChannel(1, 0 , i, &pixelBit[i]);
    }

    UINT_32 xIdx = 0;
    UINT_32 yIdx = 0;
    UINT_32 lowBits = 0;

    if (IsZOrderSwizzle(swMode))
    {
        if (elementBytesLog2 <= 3)
        {
            for (UINT_32 i = elementBytesLog2; i < 6; i++)
            {
                pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
            }

            lowBits = 6;
        }
        else
        {
            ret = ADDR_INVALIDPARAMS;
        }
    }
    else
    {
        ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);

        if (ret == ADDR_OK)
        {
            Dim2d microBlockDim = Block256_2d[elementBytesLog2];
            xIdx = Log2(microBlockDim.w);
            yIdx = Log2(microBlockDim.h);
            lowBits = 8;
        }
    }

    if (ret == ADDR_OK)
    {
        for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
        {
            pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
        }

        for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
        {
            xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
        }

        if (IsXor(swMode))
        {
            // Fill XOR bits
            UINT_32 pipeStart = m_pipeInterleaveLog2;
            UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);

            UINT_32 bankStart = pipeStart + pipeXorBits;
            UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);

            for (UINT_32 i = 0; i < pipeXorBits; i++)
            {
                UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];

                InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
            }

            for (UINT_32 i = 0; i < bankXorBits; i++)
            {
                UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];

                InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
            }

            if (IsPrt(swMode) == FALSE)
            {
                for (UINT_32 i = 0; i < pipeXorBits; i++)
                {
                    InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
                }

                for (UINT_32 i = 0; i < bankXorBits; i++)
                {
                    InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
                }
            }
        }

        FillEqBitComponents(pEquation);
        pEquation->numBits = blockSizeLog2;
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeThickEquation
*
*   @brief
*       Interface function stub of ComputeThickEquation
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
    AddrResourceType rsrcType,
    AddrSwizzleMode  swMode,
    UINT_32          elementBytesLog2,
    ADDR_EQUATION*   pEquation) const
{
    ADDR_E_RETURNCODE ret = ADDR_OK;

    ADDR_ASSERT(IsTex3d(rsrcType));

    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);

    UINT_32 maxXorBits = blockSizeLog2;
    if (IsNonPrtXor(swMode))
    {
        // For non-prt-xor, maybe need to initialize some more bits for xor
        // The highest xor bit used in equation will be max the following 3:
        // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
        // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
        // 3. blockSizeLog2

        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
                                     GetPipeXorBits(blockSizeLog2) +
                                     3 * GetBankXorBits(blockSizeLog2));
    }

    for (UINT_32 i = 0; i < elementBytesLog2; i++)
    {
        InitChannel(1, 0 , i, &pEquation->addr[i]);
    }

    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];

    const UINT_32 maxBitsUsed = 12;
    ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
    ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};

    const UINT_32 extraXorBits = 24;
    ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
    ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};

    for (UINT_32 i = 0; i < maxBitsUsed; i++)
    {
        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
        InitChannel(1, 1, i, &y[i]);
        InitChannel(1, 2, i, &z[i]);
    }

    if (IsZOrderSwizzle(swMode))
    {
        switch (elementBytesLog2)
        {
            case 0:
                pixelBit[0]  = x[0];
                pixelBit[1]  = y[0];
                pixelBit[2]  = x[1];
                pixelBit[3]  = y[1];
                pixelBit[4]  = z[0];
                pixelBit[5]  = z[1];
                pixelBit[6]  = x[2];
                pixelBit[7]  = z[2];
                pixelBit[8]  = y[2];
                pixelBit[9]  = x[3];
                break;
            case 1:
                pixelBit[0]  = x[0];
                pixelBit[1]  = y[0];
                pixelBit[2]  = x[1];
                pixelBit[3]  = y[1];
                pixelBit[4]  = z[0];
                pixelBit[5]  = z[1];
                pixelBit[6]  = z[2];
                pixelBit[7]  = y[2];
                pixelBit[8]  = x[2];
                break;
            case 2:
                pixelBit[0]  = x[0];
                pixelBit[1]  = y[0];
                pixelBit[2]  = x[1];
                pixelBit[3]  = z[0];
                pixelBit[4]  = y[1];
                pixelBit[5]  = z[1];
                pixelBit[6]  = y[2];
                pixelBit[7]  = x[2];
                break;
            case 3:
                pixelBit[0]  = x[0];
                pixelBit[1]  = y[0];
                pixelBit[2]  = z[0];
                pixelBit[3]  = x[1];
                pixelBit[4]  = z[1];
                pixelBit[5]  = y[1];
                pixelBit[6]  = x[2];
                break;
            case 4:
                pixelBit[0]  = x[0];
                pixelBit[1]  = y[0];
                pixelBit[2]  = z[0];
                pixelBit[3]  = z[1];
                pixelBit[4]  = y[1];
                pixelBit[5]  = x[1];
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                ret = ADDR_INVALIDPARAMS;
                break;
        }
    }
    else if (IsStandardSwizzle(rsrcType, swMode))
    {
        switch (elementBytesLog2)
        {
            case 0:
                pixelBit[0]  = x[0];
                pixelBit[1]  = x[1];
                pixelBit[2]  = x[2];
                pixelBit[3]  = x[3];
                pixelBit[4]  = y[0];
                pixelBit[5]  = y[1];
                pixelBit[6]  = z[0];
                pixelBit[7]  = z[1];
                pixelBit[8]  = z[2];
                pixelBit[9]  = y[2];
                break;
            case 1:
                pixelBit[0]  = x[0];
                pixelBit[1]  = x[1];
                pixelBit[2]  = x[2];
                pixelBit[3]  = y[0];
                pixelBit[4]  = y[1];
                pixelBit[5]  = z[0];
                pixelBit[6]  = z[1];
                pixelBit[7]  = z[2];
                pixelBit[8]  = y[2];
                break;
            case 2:
                pixelBit[0]  = x[0];
                pixelBit[1]  = x[1];
                pixelBit[2]  = y[0];
                pixelBit[3]  = y[1];
                pixelBit[4]  = z[0];
                pixelBit[5]  = z[1];
                pixelBit[6]  = y[2];
                pixelBit[7]  = x[2];
                break;
            case 3:
                pixelBit[0]  = x[0];
                pixelBit[1]  = y[0];
                pixelBit[2]  = y[1];
                pixelBit[3]  = z[0];
                pixelBit[4]  = z[1];
                pixelBit[5]  = x[1];
                pixelBit[6]  = x[2];
                break;
            case 4:
                pixelBit[0]  = y[0];
                pixelBit[1]  = y[1];
                pixelBit[2]  = z[0];
                pixelBit[3]  = z[1];
                pixelBit[4]  = x[0];
                pixelBit[5]  = x[1];
                break;
            default:
                ADDR_ASSERT_ALWAYS();
                ret = ADDR_INVALIDPARAMS;
                break;
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        ret = ADDR_INVALIDPARAMS;
    }

    if (ret == ADDR_OK)
    {
        Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
        UINT_32 xIdx = Log2(microBlockDim.w);
        UINT_32 yIdx = Log2(microBlockDim.h);
        UINT_32 zIdx = Log2(microBlockDim.d);

        pixelBit = pEquation->addr;

        const UINT_32 lowBits = 10;
        ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
        ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);

        for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
        {
            if ((i % 3) == 0)
            {
                pixelBit[i] = x[xIdx++];
            }
            else if ((i % 3) == 1)
            {
                pixelBit[i] = z[zIdx++];
            }
            else
            {
                pixelBit[i] = y[yIdx++];
            }
        }

        for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
        {
            if ((i % 3) == 0)
            {
                xorExtra[i - blockSizeLog2] = x[xIdx++];
            }
            else if ((i % 3) == 1)
            {
                xorExtra[i - blockSizeLog2] = z[zIdx++];
            }
            else
            {
                xorExtra[i - blockSizeLog2] = y[yIdx++];
            }
        }

        if (IsXor(swMode))
        {
            // Fill XOR bits
            UINT_32 pipeStart = m_pipeInterleaveLog2;
            UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
            for (UINT_32 i = 0; i < pipeXorBits; i++)
            {
                UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];

                InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);

                UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
                ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
                                                   &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];

                InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
            }

            UINT_32 bankStart = pipeStart + pipeXorBits;
            UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
            for (UINT_32 i = 0; i < bankXorBits; i++)
            {
                UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];

                InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);

                UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
                ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
                                                   &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];

                InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
            }
        }

        FillEqBitComponents(pEquation);
        pEquation->numBits = blockSizeLog2;
    }

    return ret;
}

/**
************************************************************************************************************************
*   Gfx9Lib::IsValidDisplaySwizzleMode
*
*   @brief
*       Check if a swizzle mode is supported by display engine
*
*   @return
*       TRUE is swizzle mode is supported by display engine
************************************************************************************************************************
*/
BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    BOOL_32 support = FALSE;

    const UINT_32 swizzleMask = 1 << pIn->swizzleMode;

    if (m_settings.isDce12)
    {
        if (pIn->bpp == 32)
        {
            support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
        }
        else if (pIn->bpp <= 64)
        {
            support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
        }
    }
    else if (m_settings.isDcn1)
    {
        if (pIn->bpp < 64)
        {
            support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
        }
        else if (pIn->bpp == 64)
        {
            support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
        }
    }
    else if (m_settings.isDcn2)
    {
        if (pIn->bpp < 64)
        {
            support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
        }
        else if (pIn->bpp == 64)
        {
            support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
        }
    }
    else
    {
        ADDR_NOT_IMPLEMENTED();
    }

    return support;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputePipeBankXor
*
*   @brief
*       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
{
    if (IsXor(pIn->swizzleMode))
    {
        UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
        UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
        UINT_32 bankBits       = GetBankXorBits(macroBlockBits);

        UINT_32 pipeXor = 0;
        UINT_32 bankXor = 0;

        const UINT_32 bankMask = (1 << bankBits) - 1;
        const UINT_32 index    = pIn->surfIndex & bankMask;

        const UINT_32 bpp      = pIn->flags.fmask ?
                                 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
        if (bankBits == 4)
        {
            static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
            static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};

            bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
        }
        else if (bankBits > 0)
        {
            UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
            bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
            bankXor = (index * bankIncrease) & bankMask;
        }

        pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
    }
    else
    {
        pOut->pipeBankXor = 0;
    }

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeSlicePipeBankXor
*
*   @brief
*       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
*
*   @return
*       PipeBankXor value
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
{
    UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
    UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
    UINT_32 bankBits       = GetBankXorBits(macroBlockBits);

    UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
    UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);

    pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));

    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
*
*   @brief
*       Compute sub resource offset to support swizzle pattern
*
*   @return
*       Offset
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
{
    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));

    UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
    UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
    UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
    UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
    UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
    UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;

    pOut->offset = pIn->slice * pIn->sliceSize +
                   pIn->macroBlockOffset +
                   (pIn->mipTailOffset ^ pipeBankXor) -
                   static_cast<UINT_64>(pipeBankXor);
    return ADDR_OK;
}

/**
************************************************************************************************************************
*   Gfx9Lib::ValidateNonSwModeParams
*
*   @brief
*       Validate compute surface info params except swizzle mode
*
*   @return
*       TRUE if parameters are valid, FALSE otherwise
************************************************************************************************************************
*/
BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    BOOL_32 valid = TRUE;

    if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    const BOOL_32 mipmap = (pIn->numMipLevels > 1);
    const BOOL_32 msaa   = (pIn->numFrags > 1);
    const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);

    const AddrResourceType rsrcType = pIn->resourceType;
    const BOOL_32          tex3d    = IsTex3d(rsrcType);
    const BOOL_32          tex2d    = IsTex2d(rsrcType);
    const BOOL_32          tex1d    = IsTex1d(rsrcType);

    const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
    const BOOL_32             zbuffer = flags.depth || flags.stencil;
    const BOOL_32             display = flags.display || flags.rotated;
    const BOOL_32             stereo  = flags.qbStereo;
    const BOOL_32             fmask   = flags.fmask;

    // Resource type check
    if (tex1d)
    {
        if (msaa || zbuffer || display || stereo || isBc || fmask)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex2d)
    {
        if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (tex3d)
    {
        if (msaa || zbuffer || display || stereo || fmask)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx9Lib::ValidateSwModeParams
*
*   @brief
*       Validate compute surface info related to swizzle mode
*
*   @return
*       TRUE if parameters are valid, FALSE otherwise
************************************************************************************************************************
*/
BOOL_32 Gfx9Lib::ValidateSwModeParams(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    BOOL_32 valid = TRUE;

    if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    const BOOL_32 mipmap = (pIn->numMipLevels > 1);
    const BOOL_32 msaa   = (pIn->numFrags > 1);
    const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
    const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);

    const AddrResourceType rsrcType = pIn->resourceType;
    const BOOL_32          tex3d    = IsTex3d(rsrcType);
    const BOOL_32          tex2d    = IsTex2d(rsrcType);
    const BOOL_32          tex1d    = IsTex1d(rsrcType);

    const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
    const BOOL_32          linear      = IsLinear(swizzle);
    const BOOL_32          blk256B     = IsBlock256b(swizzle);
    const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);

    const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
    const BOOL_32             zbuffer = flags.depth || flags.stencil;
    const BOOL_32             color   = flags.color;
    const BOOL_32             texture = flags.texture;
    const BOOL_32             display = flags.display || flags.rotated;
    const BOOL_32             prt     = flags.prt;
    const BOOL_32             fmask   = flags.fmask;

    const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
    const BOOL_32             zMaxMip = tex3d && mipmap &&
                                        (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);

    // Misc check
    if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
    {
        // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if ((pIn->bpp == 96) && (linear == FALSE))
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    if (prt && isNonPrtXor)
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    // Resource type check
    if (tex1d)
    {
        if (linear == FALSE)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    // Swizzle type check
    if (linear)
    {
        if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
            ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsZOrderSwizzle(swizzle))
    {
        if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsStandardSwizzle(swizzle))
    {
        if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsDisplaySwizzle(swizzle))
    {
        if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else if (IsRotateSwizzle(swizzle))
    {
        if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        valid = FALSE;
    }

    // Block type check
    if (blk256B)
    {
        if (prt || zbuffer || tex3d || mipmap || msaa)
        {
            ADDR_ASSERT_ALWAYS();
            valid = FALSE;
        }
    }

    return valid;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
*
*   @brief
*       Compute surface info sanity check
*
*   @return
*       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
    return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlGetPreferredSurfaceSetting
*
*   @brief
*       Internal function to get suggested surface information for cliet to use
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
    ElemLib*          pElemLib   = GetElemLib();

    UINT_32 bpp        = pIn->bpp;
    UINT_32 width      = Max(pIn->width, 1u);
    UINT_32 height     = Max(pIn->height, 1u);
    UINT_32 numSamples = Max(pIn->numSamples, 1u);
    UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;

    if (pIn->flags.fmask)
    {
        bpp                = GetFmaskBpp(numSamples, numFrags);
        numFrags           = 1;
        numSamples         = 1;
        pOut->resourceType = ADDR_RSRC_TEX_2D;
    }
    else
    {
        // Set format to INVALID will skip this conversion
        if (pIn->format != ADDR_FMT_INVALID)
        {
            UINT_32 expandX, expandY;

            // Don't care for this case
            ElemMode elemMode = ADDR_UNCOMPRESSED;

            // Get compression/expansion factors and element mode which indicates compression/expansion
            bpp = pElemLib->GetBitsPerPixel(pIn->format,
                                            &elemMode,
                                            &expandX,
                                            &expandY);

            UINT_32 basePitch = 0;
            GetElemLib()->AdjustSurfaceInfo(elemMode,
                                            expandX,
                                            expandY,
                                            &bpp,
                                            &basePitch,
                                            &width,
                                            &height);
        }

        // The output may get changed for volume(3D) texture resource in future
        pOut->resourceType = pIn->resourceType;
    }

    const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
    const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
    const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
    const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;

    // Pre sanity check on non swizzle mode parameters
    ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
    localIn.flags        = pIn->flags;
    localIn.resourceType = pOut->resourceType;
    localIn.format       = pIn->format;
    localIn.bpp          = bpp;
    localIn.width        = width;
    localIn.height       = height;
    localIn.numSlices    = numSlices;
    localIn.numMipLevels = numMipLevels;
    localIn.numSamples   = numSamples;
    localIn.numFrags     = numFrags;

    if (ValidateNonSwModeParams(&localIn))
    {
        // Forbid swizzle mode(s) by client setting
        ADDR2_SWMODE_SET allowedSwModeSet = {};
        allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
        allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
        allowedSwModeSet.value |=
            pIn->forbiddenBlock.macroThin4KB ? 0 :
            ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
        allowedSwModeSet.value |=
            pIn->forbiddenBlock.macroThick4KB ? 0 :
            ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
        allowedSwModeSet.value |=
            pIn->forbiddenBlock.macroThin64KB ? 0 :
            ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
        allowedSwModeSet.value |=
            pIn->forbiddenBlock.macroThick64KB ? 0 :
            ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);

        if (pIn->preferredSwSet.value != 0)
        {
            allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
            allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
            allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
            allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
        }

        if (pIn->noXor)
        {
            allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
        }

        if (pIn->maxAlign > 0)
        {
            if (pIn->maxAlign < Size64K)
            {
                allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
            }

            if (pIn->maxAlign < Size4K)
            {
                allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
            }

            if (pIn->maxAlign < Size256)
            {
                allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
            }
        }

        // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
        switch (pOut->resourceType)
        {
            case ADDR_RSRC_TEX_1D:
                allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
                break;

            case ADDR_RSRC_TEX_2D:
                allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;

                if (bpp > 64)
                {
                    allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
                }
                break;

            case ADDR_RSRC_TEX_3D:
                allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;

                if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
                {
                    // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
                    // When depth (Z) is the maximum dimension then must use one of the SW_*_S
                    // or SW_*_Z modes if mipmapping is desired on a 3D surface
                    allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
                }

                if ((bpp == 128) && pIn->flags.color)
                {
                    allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
                }

                if (pIn->flags.view3dAs2dArray)
                {
                    allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
                }
                break;

            default:
                ADDR_ASSERT_ALWAYS();
                allowedSwModeSet.value = 0;
                break;
        }

        if (pIn->format == ADDR_FMT_32_32_32)
        {
            allowedSwModeSet.value &= Gfx9LinearSwModeMask;
        }

        if (ElemLib::IsBlockCompressed(pIn->format))
        {
            if (pIn->flags.texture)
            {
                allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
            }
            else
            {
                allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
            }
        }

        if (ElemLib::IsMacroPixelPacked(pIn->format) ||
            (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
        {
            allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
        }

        if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
        {
            allowedSwModeSet.value &= Gfx9ZSwModeMask;

            if (pIn->flags.noMetadata == FALSE)
            {
                if (pIn->flags.depth &&
                    pIn->flags.texture &&
                    (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
                {
                    // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
                    // equation from wrong address within memory range a tile covered and use the
                    // garbage data for compressed Z reading which finally leads to corruption.
                    allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
                }

                if (m_settings.htileCacheRbConflict &&
                    (pIn->flags.depth || pIn->flags.stencil) &&
                    (numSlices > 1) &&
                    (pIn->flags.metaRbUnaligned == FALSE) &&
                    (pIn->flags.metaPipeUnaligned == FALSE))
                {
                    // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
                    allowedSwModeSet.value &= ~Gfx9XSwModeMask;
                }
            }
        }

        if (msaa)
        {
            allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
        }

        if ((numFrags > 1) &&
            (Size4K < (m_pipeInterleaveBytes * numFrags)))
        {
            // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
            allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
        }

        if (numMipLevels > 1)
        {
            allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
        }

        if (displayRsrc)
        {
            if (m_settings.isDce12)
            {
                allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
            }
            else if (m_settings.isDcn1)
            {
                allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
            }
            else if (m_settings.isDcn2)
            {
                allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
            }
            else
            {
                ADDR_NOT_IMPLEMENTED();
            }
        }

        if (allowedSwModeSet.value != 0)
        {
#if DEBUG
            // Post sanity check, at least AddrLib should accept the output generated by its own
            UINT_32 validateSwModeSet = allowedSwModeSet.value;

            for (UINT_32 i = 0; validateSwModeSet != 0; i++)
            {
                if (validateSwModeSet & 1)
                {
                    localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
                    ADDR_ASSERT(ValidateSwModeParams(&localIn));
                }

                validateSwModeSet >>= 1;
            }
#endif

            pOut->validSwModeSet = allowedSwModeSet;
            pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
            pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
            pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);

            pOut->clientPreferredSwSet = pIn->preferredSwSet;

            if (pOut->clientPreferredSwSet.value == 0)
            {
                pOut->clientPreferredSwSet.value = AddrSwSetAll;
            }

            // Apply optional restrictions
            if (pIn->flags.needEquation)
            {
                UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
                                                                    ADDR_MAX_LEGACY_EQUATION_COMP;
                FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
            }

            if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
            {
                pOut->swizzleMode = ADDR_SW_LINEAR;
            }
            else
            {
                const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);

                if ((height > 1) && (computeMinSize == FALSE))
                {
                    // Always ignore linear swizzle mode if:
                    // 1. This is a (2D/3D) resource with height > 1
                    // 2. Client doesn't require computing minimize size
                    allowedSwModeSet.swLinear = 0;
                }

                ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);

                // Determine block size if there are 2 or more block type candidates
                if (IsPow2(allowedBlockSet.value) == FALSE)
                {
                    AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};

                    swMode[AddrBlockLinear]   = ADDR_SW_LINEAR;
                    swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
                    swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
                    swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;

                    if (pOut->resourceType == ADDR_RSRC_TEX_3D)
                    {
                        swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
                        swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
                    }

                    UINT_64 padSize[AddrBlockMaxTiledType] = {};

                    const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
                    const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
                    const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
                    UINT_32       minSizeBlk         = AddrBlockMicro;
                    UINT_64       minSize            = 0;

                    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};

                    for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
                    {
                        if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<rocr::AddrBlockType>(i)))
                        {
                            localIn.swizzleMode = swMode[i];

                            if (localIn.swizzleMode == ADDR_SW_LINEAR)
                            {
                                returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
                            }
                            else
                            {
                                returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
                            }

                            if (returnCode == ADDR_OK)
                            {
                                padSize[i] = localOut.surfSize;

                                if ((minSize == 0) ||
                                    Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
                                {
                                    minSize    = padSize[i];
                                    minSizeBlk = i;
                                }
                            }
                            else
                            {
                                ADDR_ASSERT_ALWAYS();
                                break;
                            }
                        }
                    }

                    if (pIn->memoryBudget > 1.0)
                    {
                        // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
                        // smaller-block type again in coming loop
                        switch (minSizeBlk)
                        {
                            case AddrBlockThick64KB:
                                allowedBlockSet.macroThin64KB = 0;
                            case AddrBlockThin64KB:
                                allowedBlockSet.macroThick4KB = 0;
                            case AddrBlockThick4KB:
                                allowedBlockSet.macroThin4KB = 0;
                            case AddrBlockThin4KB:
                                allowedBlockSet.micro  = 0;
                            case AddrBlockMicro:
                                allowedBlockSet.linear = 0;
                            case AddrBlockLinear:
                                break;

                            default:
                                ADDR_ASSERT_ALWAYS();
                                break;
                        }

                        for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
                        {
                            if ((i != minSizeBlk) &&
                                Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<rocr::AddrBlockType>(i)))
                            {
                                if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
                                {
                                    // Clear the block type if the memory waste is unacceptable
                                    allowedBlockSet.value &= ~(1u << (i - 1));
                                }
                            }
                        }

                        // Remove linear block type if 2 or more block types are allowed
                        if (IsPow2(allowedBlockSet.value) == FALSE)
                        {
                            allowedBlockSet.linear = 0;
                        }

                        // Select the biggest allowed block type
                        minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;

                        if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
                        {
                            minSizeBlk = AddrBlockLinear;
                        }
                    }

                    switch (minSizeBlk)
                    {
                        case AddrBlockLinear:
                            allowedSwModeSet.value &= Gfx9LinearSwModeMask;
                            break;

                        case AddrBlockMicro:
                            ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
                            allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
                            break;

                        case AddrBlockThin4KB:
                            allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
                                                      Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
                            break;

                        case AddrBlockThick4KB:
                            ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
                            allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
                            break;

                        case AddrBlockThin64KB:
                            allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
                                                      Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
                            break;

                        case AddrBlockThick64KB:
                            ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
                            allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
                            break;

                        default:
                            ADDR_ASSERT_ALWAYS();
                            allowedSwModeSet.value = 0;
                            break;
                    }
                }

                // Block type should be determined.
                ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));

                ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);

                // Determine swizzle type if there are 2 or more swizzle type candidates
                if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
                {
                    if (ElemLib::IsBlockCompressed(pIn->format))
                    {
                        if (allowedSwSet.sw_D)
                        {
                            allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
                        }
                        else
                        {
                            ADDR_ASSERT(allowedSwSet.sw_S);
                            allowedSwModeSet.value &= Gfx9StandardSwModeMask;
                        }
                    }
                    else if (ElemLib::IsMacroPixelPacked(pIn->format))
                    {
                        if (allowedSwSet.sw_S)
                        {
                            allowedSwModeSet.value &= Gfx9StandardSwModeMask;
                        }
                        else if (allowedSwSet.sw_D)
                        {
                            allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
                        }
                        else
                        {
                            ADDR_ASSERT(allowedSwSet.sw_R);
                            allowedSwModeSet.value &= Gfx9RotateSwModeMask;
                        }
                    }
                    else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
                    {
                        if (pIn->flags.color && allowedSwSet.sw_D)
                        {
                            allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
                        }
                        else if (allowedSwSet.sw_Z)
                        {
                            allowedSwModeSet.value &= Gfx9ZSwModeMask;
                        }
                        else
                        {
                            ADDR_ASSERT(allowedSwSet.sw_S);
                            allowedSwModeSet.value &= Gfx9StandardSwModeMask;
                        }
                    }
                    else
                    {
                        if (pIn->flags.rotated && allowedSwSet.sw_R)
                        {
                            allowedSwModeSet.value &= Gfx9RotateSwModeMask;
                        }
                        else if (allowedSwSet.sw_D)
                        {
                            allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
                        }
                        else if (allowedSwSet.sw_S)
                        {
                            allowedSwModeSet.value &= Gfx9StandardSwModeMask;
                        }
                        else
                        {
                            ADDR_ASSERT(allowedSwSet.sw_Z);
                            allowedSwModeSet.value &= Gfx9ZSwModeMask;
                        }
                    }

                    // Swizzle type should be determined.
                    ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
                }

                // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
                // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
                // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
                pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
            }

            returnCode = ADDR_OK;
        }
        else
        {
            // Invalid combination...
            ADDR_ASSERT_ALWAYS();
        }
    }
    else
    {
        // Invalid combination...
        ADDR_ASSERT_ALWAYS();
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::ComputeStereoInfo
*
*   @brief
*       Compute height alignment and right eye pipeBankXor for stereo surface
*
*   @return
*       Error code
*
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
    UINT_32*                                pHeightAlign
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);

    if (eqIndex < m_numEquations)
    {
        if (IsXor(pIn->swizzleMode))
        {
            const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
            const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
            const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
            const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
            const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
            const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];

            ADDR_ASSERT(maxYCoordBlock256 ==
                        GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));

            const UINT_32 maxYCoordInBaseEquation =
                (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;

            ADDR_ASSERT(maxYCoordInBaseEquation ==
                        GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));

            const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;

            ADDR_ASSERT(maxYCoordInPipeXor ==
                        GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));

            const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
                                               0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;

            ADDR_ASSERT(maxYCoordInBankXor ==
                        GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));

            const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);

            if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
            {
                *pHeightAlign = 1u << maxYCoordInPipeBankXor;

                if (pOut->pStereoInfo != NULL)
                {
                    pOut->pStereoInfo->rightSwizzle = 0;

                    if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
                    {
                        if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
                        {
                            pOut->pStereoInfo->rightSwizzle |= (1u << 1);
                        }

                        if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
                        {
                            pOut->pStereoInfo->rightSwizzle |=
                                1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
                        }

                        ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
                                    GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
                                                       numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
                    }
                }
            }
        }
    }
    else
    {
        ADDR_ASSERT_ALWAYS();
        returnCode = ADDR_ERROR;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeSurfaceInfoTiled
*
*   @brief
*       Internal function to calculate alignment for tiled surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
                                                                &pOut->blockHeight,
                                                                &pOut->blockSlices,
                                                                pIn->bpp,
                                                                pIn->numFrags,
                                                                pIn->resourceType,
                                                                pIn->swizzleMode);

    if (returnCode == ADDR_OK)
    {
        UINT_32 pitchAlignInElement = pOut->blockWidth;

        if ((IsTex2d(pIn->resourceType) == TRUE) &&
            (pIn->flags.display || pIn->flags.rotated) &&
            (pIn->numMipLevels <= 1) &&
            (pIn->numSamples <= 1) &&
            (pIn->numFrags <= 1))
        {
            // Display engine needs pitch align to be at least 32 pixels.
            pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
        }

        pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);

        if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
        {
            if ((pIn->pitchInElement % pitchAlignInElement) != 0)
            {
                returnCode = ADDR_INVALIDPARAMS;
            }
            else if (pIn->pitchInElement < pOut->pitch)
            {
                returnCode = ADDR_INVALIDPARAMS;
            }
            else
            {
                pOut->pitch = pIn->pitchInElement;
            }
        }

        UINT_32 heightAlign = 0;

        if (pIn->flags.qbStereo)
        {
            returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
        }

        if (returnCode == ADDR_OK)
        {
            pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);

            if (heightAlign > 1)
            {
                pOut->height = PowTwoAlign(pOut->height, heightAlign);
            }

            pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);

            pOut->epitchIsHeight   = FALSE;
            pOut->mipChainInTail   = FALSE;
            pOut->firstMipIdInTail = pIn->numMipLevels;

            pOut->mipChainPitch    = pOut->pitch;
            pOut->mipChainHeight   = pOut->height;
            pOut->mipChainSlice    = pOut->numSlices;

            if (pIn->numMipLevels > 1)
            {
                pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
                                                         pIn->swizzleMode,
                                                         pIn->bpp,
                                                         pIn->width,
                                                         pIn->height,
                                                         pIn->numSlices,
                                                         pOut->blockWidth,
                                                         pOut->blockHeight,
                                                         pOut->blockSlices,
                                                         pIn->numMipLevels,
                                                         pOut->pMipInfo);

                const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);

                if (endingMipId == 0)
                {
                    const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
                                                           pIn->swizzleMode,
                                                           pOut->blockWidth,
                                                           pOut->blockHeight,
                                                           pOut->blockSlices);

                    pOut->epitchIsHeight = TRUE;
                    pOut->pitch          = tailMaxDim.w;
                    pOut->height         = tailMaxDim.h;
                    pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
                                           tailMaxDim.d : pIn->numSlices;
                    pOut->mipChainInTail = TRUE;
                }
                else
                {
                    UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
                    UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;

                    AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
                                                           pIn->swizzleMode,
                                                           mip0WidthInBlk,
                                                           mip0HeightInBlk,
                                                           pOut->numSlices / pOut->blockSlices);
                    if (majorMode == ADDR_MAJOR_Y)
                    {
                        UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);

                        if ((mip1WidthInBlk == 1) && (endingMipId > 2))
                        {
                            mip1WidthInBlk++;
                        }

                        pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);

                        pOut->epitchIsHeight = FALSE;
                    }
                    else
                    {
                        UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);

                        if ((mip1HeightInBlk == 1) && (endingMipId > 2))
                        {
                            mip1HeightInBlk++;
                        }

                        pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);

                        pOut->epitchIsHeight = TRUE;
                    }
                }

                if (pOut->pMipInfo != NULL)
                {
                    UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);

                    for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
                    {
                        Dim3d   mipStartPos          = {0};
                        UINT_32 mipTailOffsetInBytes = 0;

                        mipStartPos = GetMipStartPos(pIn->resourceType,
                                                     pIn->swizzleMode,
                                                     pOut->pitch,
                                                     pOut->height,
                                                     pOut->numSlices,
                                                     pOut->blockWidth,
                                                     pOut->blockHeight,
                                                     pOut->blockSlices,
                                                     i,
                                                     elementBytesLog2,
                                                     &mipTailOffsetInBytes);

                        UINT_32 pitchInBlock     =
                            pOut->mipChainPitch / pOut->blockWidth;
                        UINT_32 sliceInBlock     =
                            (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
                        UINT_64 blockIndex       =
                            mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
                        UINT_64 macroBlockOffset =
                            blockIndex << GetBlockSizeLog2(pIn->swizzleMode);

                        pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
                        pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
                    }
                }
            }
            else if (pOut->pMipInfo != NULL)
            {
                pOut->pMipInfo[0].pitch  = pOut->pitch;
                pOut->pMipInfo[0].height = pOut->height;
                pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
                pOut->pMipInfo[0].offset = 0;
            }

            pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
                              (pIn->bpp >> 3) * pIn->numFrags;
            pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
            pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);

            if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
                (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
                (pIn->flags.texture == TRUE) &&
                (pIn->flags.noMetadata == FALSE) &&
                (pIn->flags.metaPipeUnaligned == FALSE))
            {
                // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
                // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
                // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
                // them, which may cause invalid metadata to be fetched.
                pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
            }

            if (pIn->flags.prt)
            {
                pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
            }
        }
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeSurfaceInfoLinear
*
*   @brief
*       Internal function to calculate alignment for linear surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR_E_RETURNCODE returnCode   = ADDR_OK;
    UINT_32           pitch        = 0;
    UINT_32           actualHeight = 0;
    UINT_32           elementBytes = pIn->bpp >> 3;
    const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;

    if (IsTex1d(pIn->resourceType))
    {
        if (pIn->height > 1)
        {
            returnCode = ADDR_INVALIDPARAMS;
        }
        else
        {
            const UINT_32 pitchAlignInElement = alignment / elementBytes;

            pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
            actualHeight = pIn->numMipLevels;

            if (pIn->flags.prt == FALSE)
            {
                returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
                                                        &pitch, &actualHeight);
            }

            if (returnCode == ADDR_OK)
            {
                if (pOut->pMipInfo != NULL)
                {
                    for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
                    {
                        pOut->pMipInfo[i].offset = pitch * elementBytes * i;
                        pOut->pMipInfo[i].pitch  = pitch;
                        pOut->pMipInfo[i].height = 1;
                        pOut->pMipInfo[i].depth  = 1;
                    }
                }
            }
        }
    }
    else
    {
        returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
    }

    if ((pitch == 0) || (actualHeight == 0))
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    if (returnCode == ADDR_OK)
    {
        pOut->pitch          = pitch;
        pOut->height         = pIn->height;
        pOut->numSlices      = pIn->numSlices;
        pOut->mipChainPitch  = pitch;
        pOut->mipChainHeight = actualHeight;
        pOut->mipChainSlice  = pOut->numSlices;
        pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
        pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
        pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
        pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
        pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
        pOut->blockHeight    = 1;
        pOut->blockSlices    = 1;
    }

    // Post calculation validate
    ADDR_ASSERT(pOut->sliceSize > 0);

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::GetMipChainInfo
*
*   @brief
*       Internal function to get out information about mip chain
*
*   @return
*       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
************************************************************************************************************************
*/
UINT_32 Gfx9Lib::GetMipChainInfo(
    AddrResourceType  resourceType,
    AddrSwizzleMode   swizzleMode,
    UINT_32           bpp,
    UINT_32           mip0Width,
    UINT_32           mip0Height,
    UINT_32           mip0Depth,
    UINT_32           blockWidth,
    UINT_32           blockHeight,
    UINT_32           blockDepth,
    UINT_32           numMipLevel,
    ADDR2_MIP_INFO*   pMipInfo) const
{
    const Dim3d tailMaxDim =
        GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);

    UINT_32 mipPitch         = mip0Width;
    UINT_32 mipHeight        = mip0Height;
    UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
    UINT_32 offset           = 0;
    UINT_32 firstMipIdInTail = numMipLevel;
    BOOL_32 inTail           = FALSE;
    BOOL_32 finalDim         = FALSE;
    BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
    BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);

    for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
    {
        if (inTail)
        {
            if (finalDim == FALSE)
            {
                UINT_32 mipSize;

                if (is3dThick)
                {
                    mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
                }
                else
                {
                    mipSize = mipPitch * mipHeight * (bpp >> 3);
                }

                if (mipSize <= 256)
                {
                    UINT_32 index = Log2(bpp >> 3);

                    if (is3dThick)
                    {
                        mipPitch  = Block256_3dZ[index].w;
                        mipHeight = Block256_3dZ[index].h;
                        mipDepth  = Block256_3dZ[index].d;
                    }
                    else
                    {
                        mipPitch  = Block256_2d[index].w;
                        mipHeight = Block256_2d[index].h;
                    }

                    finalDim = TRUE;
                }
            }
        }
        else
        {
            inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
                                 mipPitch, mipHeight, mipDepth);

            if (inTail)
            {
                firstMipIdInTail = mipId;
                mipPitch         = tailMaxDim.w;
                mipHeight        = tailMaxDim.h;

                if (is3dThick)
                {
                    mipDepth = tailMaxDim.d;
                }
            }
            else
            {
                mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
                mipHeight = PowTwoAlign(mipHeight, blockHeight);

                if (is3dThick)
                {
                    mipDepth = PowTwoAlign(mipDepth,  blockDepth);
                }
            }
        }

        if (pMipInfo != NULL)
        {
            pMipInfo[mipId].pitch  = mipPitch;
            pMipInfo[mipId].height = mipHeight;
            pMipInfo[mipId].depth  = mipDepth;
            pMipInfo[mipId].offset = offset;
        }

        offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));

        if (finalDim)
        {
            if (is3dThin)
            {
                mipDepth = Max(mipDepth >> 1, 1u);
            }
        }
        else
        {
            mipPitch  = Max(mipPitch >> 1, 1u);
            mipHeight = Max(mipHeight >> 1, 1u);

            if (is3dThick || is3dThin)
            {
                mipDepth = Max(mipDepth >> 1, 1u);
            }
        }
    }

    return firstMipIdInTail;
}

/**
************************************************************************************************************************
*   Gfx9Lib::GetMetaMiptailInfo
*
*   @brief
*       Get mip tail coordinate information.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::GetMetaMiptailInfo(
    ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
    Dim3d                   mipCoord,       ///< [in] mip tail base coord
    UINT_32                 numMipInTail,   ///< [in] number of mips in tail
    Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
    ) const
{
    BOOL_32 isThick   = (pMetaBlkDim->d > 1);
    UINT_32 mipWidth  = pMetaBlkDim->w;
    UINT_32 mipHeight = pMetaBlkDim->h >> 1;
    UINT_32 mipDepth  = pMetaBlkDim->d;
    UINT_32 minInc;

    if (isThick)
    {
        minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
    }
    else if (pMetaBlkDim->h >= 1024)
    {
        minInc = 256;
    }
    else if (pMetaBlkDim->h == 512)
    {
        minInc = 128;
    }
    else
    {
        minInc = 64;
    }

    UINT_32 blk32MipId = 0xFFFFFFFF;

    for (UINT_32 mip = 0; mip < numMipInTail; mip++)
    {
        pInfo[mip].inMiptail = TRUE;
        pInfo[mip].startX = mipCoord.w;
        pInfo[mip].startY = mipCoord.h;
        pInfo[mip].startZ = mipCoord.d;
        pInfo[mip].width = mipWidth;
        pInfo[mip].height = mipHeight;
        pInfo[mip].depth = mipDepth;

        if (mipWidth <= 32)
        {
            if (blk32MipId == 0xFFFFFFFF)
            {
                blk32MipId = mip;
            }

            mipCoord.w = pInfo[blk32MipId].startX;
            mipCoord.h = pInfo[blk32MipId].startY;
            mipCoord.d = pInfo[blk32MipId].startZ;

            switch (mip - blk32MipId)
            {
                case 0:
                    mipCoord.w += 32;       // 16x16
                    break;
                case 1:
                    mipCoord.h += 32;       // 8x8
                    break;
                case 2:
                    mipCoord.h += 32;       // 4x4
                    mipCoord.w += 16;
                    break;
                case 3:
                    mipCoord.h += 32;       // 2x2
                    mipCoord.w += 32;
                    break;
                case 4:
                    mipCoord.h += 32;       // 1x1
                    mipCoord.w += 48;
                    break;
                // The following are for BC/ASTC formats
                case 5:
                    mipCoord.h += 48;       // 1/2 x 1/2
                    break;
                case 6:
                    mipCoord.h += 48;       // 1/4 x 1/4
                    mipCoord.w += 16;
                    break;
                case 7:
                    mipCoord.h += 48;       // 1/8 x 1/8
                    mipCoord.w += 32;
                    break;
                case 8:
                    mipCoord.h += 48;       // 1/16 x 1/16
                    mipCoord.w += 48;
                    break;
                default:
                    ADDR_ASSERT_ALWAYS();
                    break;
            }

            mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
            mipHeight = mipWidth;

            if (isThick)
            {
                mipDepth = mipWidth;
            }
        }
        else
        {
            if (mipWidth <= minInc)
            {
                // if we're below the minimal increment...
                if (isThick)
                {
                    // For 3d, just go in z direction
                    mipCoord.d += mipDepth;
                }
                else
                {
                    // For 2d, first go across, then down
                    if ((mipWidth * 2) == minInc)
                    {
                        // if we're 2 mips below, that's when we go back in x, and down in y
                        mipCoord.w -= minInc;
                        mipCoord.h += minInc;
                    }
                    else
                    {
                        // otherwise, just go across in x
                        mipCoord.w += minInc;
                    }
                }
            }
            else
            {
                // On even mip, go down, otherwise, go across
                if (mip & 1)
                {
                    mipCoord.w += mipWidth;
                }
                else
                {
                    mipCoord.h += mipHeight;
                }
            }
            // Divide the width by 2
            mipWidth >>= 1;
            // After the first mip in tail, the mip is always a square
            mipHeight = mipWidth;
            // ...or for 3d, a cube
            if (isThick)
            {
                mipDepth = mipWidth;
            }
        }
    }
}

/**
************************************************************************************************************************
*   Gfx9Lib::GetMipStartPos
*
*   @brief
*       Internal function to get out information about mip logical start position
*
*   @return
*       logical start position in macro block width/heith/depth of one mip level within one slice
************************************************************************************************************************
*/
Dim3d Gfx9Lib::GetMipStartPos(
    AddrResourceType  resourceType,
    AddrSwizzleMode   swizzleMode,
    UINT_32           width,
    UINT_32           height,
    UINT_32           depth,
    UINT_32           blockWidth,
    UINT_32           blockHeight,
    UINT_32           blockDepth,
    UINT_32           mipId,
    UINT_32           log2ElementBytes,
    UINT_32*          pMipTailBytesOffset) const
{
    Dim3d       mipStartPos = {0};
    const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);

    // Report mip in tail if Mip0 is already in mip tail
    BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
    UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
    UINT_32 mipIndexInTail = mipId;

    if (inMipTail == FALSE)
    {
        // Mip 0 dimension, unit in block
        UINT_32 mipWidthInBlk   = width  / blockWidth;
        UINT_32 mipHeightInBlk  = height / blockHeight;
        UINT_32 mipDepthInBlk   = depth  / blockDepth;
        AddrMajorMode majorMode = GetMajorMode(resourceType,
                                               swizzleMode,
                                               mipWidthInBlk,
                                               mipHeightInBlk,
                                               mipDepthInBlk);

        UINT_32 endingMip = mipId + 1;

        for (UINT_32 i = 1; i <= mipId; i++)
        {
            if ((i == 1) || (i == 3))
            {
                if (majorMode == ADDR_MAJOR_Y)
                {
                    mipStartPos.w += mipWidthInBlk;
                }
                else
                {
                    mipStartPos.h += mipHeightInBlk;
                }
            }
            else
            {
                if (majorMode == ADDR_MAJOR_X)
                {
                   mipStartPos.w += mipWidthInBlk;
                }
                else if (majorMode == ADDR_MAJOR_Y)
                {
                   mipStartPos.h += mipHeightInBlk;
                }
                else
                {
                   mipStartPos.d += mipDepthInBlk;
                }
            }

            BOOL_32 inTail = FALSE;

            if (IsThick(resourceType, swizzleMode))
            {
                UINT_32 dim = log2BlkSize % 3;

                if (dim == 0)
                {
                    inTail =
                        (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
                }
                else if (dim == 1)
                {
                    inTail =
                        (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
                }
                else
                {
                    inTail =
                        (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
                }
            }
            else
            {
                if (log2BlkSize & 1)
                {
                    inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
                }
                else
                {
                    inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
                }
            }

            if (inTail)
            {
                endingMip = i;
                break;
            }

            mipWidthInBlk  = RoundHalf(mipWidthInBlk);
            mipHeightInBlk = RoundHalf(mipHeightInBlk);
            mipDepthInBlk  = RoundHalf(mipDepthInBlk);
        }

        if (mipId >= endingMip)
        {
            inMipTail      = TRUE;
            mipIndexInTail = mipId - endingMip;
        }
    }

    if (inMipTail)
    {
        UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
        ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
        *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
    }

    return mipStartPos;
}

/**
************************************************************************************************************************
*   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
*
*   @brief
*       Internal function to calculate address from coord for tiled swizzle surface
*
*   @return
*       ADDR_E_RETURNCODE
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
     ) const
{
    ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
    localIn.swizzleMode  = pIn->swizzleMode;
    localIn.flags        = pIn->flags;
    localIn.resourceType = pIn->resourceType;
    localIn.bpp          = pIn->bpp;
    localIn.width        = Max(pIn->unalignedWidth, 1u);
    localIn.height       = Max(pIn->unalignedHeight, 1u);
    localIn.numSlices    = Max(pIn->numSlices, 1u);
    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
    localIn.numSamples   = Max(pIn->numSamples, 1u);
    localIn.numFrags     = Max(pIn->numFrags, 1u);
    if (localIn.numMipLevels <= 1)
    {
        localIn.pitchInElement = pIn->pitchInElement;
    }

    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
    ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);

    BOOL_32 valid = (returnCode == ADDR_OK) &&
                    (IsThin(pIn->resourceType, pIn->swizzleMode) ||
                     IsThick(pIn->resourceType, pIn->swizzleMode)) &&
                    ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));

    if (valid)
    {
        UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
        Dim3d   mipStartPos        = {0};
        UINT_32 mipTailBytesOffset = 0;

        if (pIn->numMipLevels > 1)
        {
            // Mip-map chain cannot be MSAA surface
            ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));

            mipStartPos = GetMipStartPos(pIn->resourceType,
                                         pIn->swizzleMode,
                                         localOut.pitch,
                                         localOut.height,
                                         localOut.numSlices,
                                         localOut.blockWidth,
                                         localOut.blockHeight,
                                         localOut.blockSlices,
                                         pIn->mipId,
                                         log2ElementBytes,
                                         &mipTailBytesOffset);
        }

        UINT_32 interleaveOffset = 0;
        UINT_32 pipeBits = 0;
        UINT_32 pipeXor = 0;
        UINT_32 bankBits = 0;
        UINT_32 bankXor = 0;

        if (IsThin(pIn->resourceType, pIn->swizzleMode))
        {
            UINT_32 blockOffset = 0;
            UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);

            if (IsZOrderSwizzle(pIn->swizzleMode))
            {
                // Morton generation
                if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
                {
                    UINT_32 totalLowBits = 6 - log2ElementBytes;
                    UINT_32 mortBits = totalLowBits / 2;
                    UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
                    // Are 9 bits enough?
                    UINT_32 highBitsValue =
                        MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
                    blockOffset = lowBitsValue | highBitsValue;
                    ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
                }
                else
                {
                    blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
                }

                // Fill LSBs with sample bits
                if (pIn->numSamples > 1)
                {
                    blockOffset *= pIn->numSamples;
                    blockOffset |= pIn->sample;
                }

                // Shift according to BytesPP
                blockOffset <<= log2ElementBytes;
            }
            else
            {
                // Micro block offset
                UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
                blockOffset = microBlockOffset;

                // Micro block dimension
                ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
                Dim2d microBlockDim = Block256_2d[log2ElementBytes];
                // Morton generation, does 12 bit enough?
                blockOffset |=
                    MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;

                // Sample bits start location
                UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
                // Join sample bits information to the highest Macro block bits
                if (IsNonPrtXor(pIn->swizzleMode))
                {
                    // Non-prt-Xor : xor highest Macro block bits with sample bits
                    blockOffset = blockOffset ^ (pIn->sample << sampleStart);
                }
                else
                {
                    // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
                    // after this op, the blockOffset only contains log2 Macro block size bits
                    blockOffset %= (1 << sampleStart);
                    blockOffset |= (pIn->sample << sampleStart);
                    ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
                }
            }

            if (IsXor(pIn->swizzleMode))
            {
                // Mask off bits above Macro block bits to keep page synonyms working for prt
                if (IsPrt(pIn->swizzleMode))
                {
                    blockOffset &= ((1 << log2BlkSize) - 1);
                }

                // Preserve offset inside pipe interleave
                interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
                blockOffset >>= m_pipeInterleaveLog2;

                // Pipe/Se xor bits
                pipeBits = GetPipeXorBits(log2BlkSize);
                // Pipe xor
                pipeXor = FoldXor2d(blockOffset, pipeBits);
                blockOffset >>= pipeBits;

                // Bank xor bits
                bankBits = GetBankXorBits(log2BlkSize);
                // Bank Xor
                bankXor = FoldXor2d(blockOffset, bankBits);
                blockOffset >>= bankBits;

                // Put all the part back together
                blockOffset <<= bankBits;
                blockOffset |= bankXor;
                blockOffset <<= pipeBits;
                blockOffset |= pipeXor;
                blockOffset <<= m_pipeInterleaveLog2;
                blockOffset |= interleaveOffset;
            }

            ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
            ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));

            blockOffset |= mipTailBytesOffset;

            if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
            {
                // Apply slice xor if not MSAA/PRT
                blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
                blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
                                (m_pipeInterleaveLog2 + pipeBits));
            }

            returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
                                                  bankBits, pipeBits, &blockOffset);

            blockOffset %= (1 << log2BlkSize);

            UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
            UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
            UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
            UINT_64 macroBlockIndex =
                (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
                ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
                ((pIn->x / localOut.blockWidth) + mipStartPos.w);

            pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
        }
        else
        {
            UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);

            Dim3d microBlockDim = Block1K_3d[log2ElementBytes];

            UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
                                              (pIn->y / microBlockDim.h),
                                              (pIn->slice / microBlockDim.d),
                                              8);

            blockOffset <<= 10;
            blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);

            if (IsXor(pIn->swizzleMode))
            {
                // Mask off bits above Macro block bits to keep page synonyms working for prt
                if (IsPrt(pIn->swizzleMode))
                {
                    blockOffset &= ((1 << log2BlkSize) - 1);
                }

                // Preserve offset inside pipe interleave
                interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
                blockOffset >>= m_pipeInterleaveLog2;

                // Pipe/Se xor bits
                pipeBits = GetPipeXorBits(log2BlkSize);
                // Pipe xor
                pipeXor = FoldXor3d(blockOffset, pipeBits);
                blockOffset >>= pipeBits;

                // Bank xor bits
                bankBits = GetBankXorBits(log2BlkSize);
                // Bank Xor
                bankXor = FoldXor3d(blockOffset, bankBits);
                blockOffset >>= bankBits;

                // Put all the part back together
                blockOffset <<= bankBits;
                blockOffset |= bankXor;
                blockOffset <<= pipeBits;
                blockOffset |= pipeXor;
                blockOffset <<= m_pipeInterleaveLog2;
                blockOffset |= interleaveOffset;
            }

            ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
            ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
            blockOffset |= mipTailBytesOffset;

            returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
                                                  bankBits, pipeBits, &blockOffset);

            blockOffset %= (1 << log2BlkSize);

            UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
            UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
            UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;

            UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
            UINT_32 sliceSizeInBlock =
                (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
            UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;

            pOut->addr = blockOffset | (blockIndex << log2BlkSize);
        }
    }
    else
    {
        returnCode = ADDR_INVALIDPARAMS;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::ComputeSurfaceInfoLinear
*
*   @brief
*       Internal function to calculate padding for linear swizzle 2D/3D surface
*
*   @return
*       N/A
************************************************************************************************************************
*/
ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
    UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
    UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
    ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
    ) const
{
    ADDR_E_RETURNCODE returnCode = ADDR_OK;

    UINT_32 elementBytes        = pIn->bpp >> 3;
    UINT_32 pitchAlignInElement = 0;

    if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
    {
        ADDR_ASSERT(pIn->numMipLevels <= 1);
        ADDR_ASSERT(pIn->numSlices <= 1);
        pitchAlignInElement = 1;
    }
    else
    {
        pitchAlignInElement = (256 / elementBytes);
    }

    UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
    UINT_32 slice0PaddedHeight = pIn->height;

    returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
                                            &mipChainWidth, &slice0PaddedHeight);

    if (returnCode == ADDR_OK)
    {
        UINT_32 mipChainHeight = 0;
        UINT_32 mipHeight      = pIn->height;
        UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;

        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
        {
            if (pMipInfo != NULL)
            {
                pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
                pMipInfo[i].pitch  = mipChainWidth;
                pMipInfo[i].height = mipHeight;
                pMipInfo[i].depth  = mipDepth;
            }

            mipChainHeight += mipHeight;
            mipHeight = RoundHalf(mipHeight);
            mipHeight = Max(mipHeight, 1u);
        }

        *pMipmap0PaddedWidth = mipChainWidth;
        *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
    }

    return returnCode;
}

/**
************************************************************************************************************************
*   Gfx9Lib::ComputeThinBlockDimension
*
*   @brief
*       Internal function to get thin block width/height/depth in element from surface input params.
*
*   @return
*       N/A
************************************************************************************************************************
*/
VOID Gfx9Lib::ComputeThinBlockDimension(
    UINT_32*         pWidth,
    UINT_32*         pHeight,
    UINT_32*         pDepth,
    UINT_32          bpp,
    UINT_32          numSamples,
    AddrResourceType resourceType,
    AddrSwizzleMode  swizzleMode) const
{
    ADDR_ASSERT(IsThin(resourceType, swizzleMode));

    const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
    const UINT_32 eleBytes                 = bpp >> 3;
    const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
    const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
    const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
    const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;

    ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));

    *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
    *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
    *pDepth  = 1;

    if (numSamples > 1)
    {
        const UINT_32 log2sample = Log2(numSamples);
        const UINT_32 q          = log2sample >> 1;
        const UINT_32 r          = log2sample & 1;

        if (log2BlkSize & 1)
        {
            *pWidth  >>= q;
            *pHeight >>= (q + r);
        }
        else
        {
            *pWidth  >>= (q + r);
            *pHeight >>= q;
        }
    }
}

} // V2
} // Addr
} // namespace rocr

================================================
FILE: runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.h
================================================
/*
************************************************************************************************************************
*
*  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
*  SPDX-License-Identifier: MIT
*
***********************************************************************************************************************/

/**
************************************************************************************************************************
* @file  gfx9addrlib.h
* @brief Contgfx9ns the Gfx9Lib class definition.
************************************************************************************************************************
*/

#ifndef __GFX9_ADDR_LIB_H__
#define __GFX9_ADDR_LIB_H__

#include "addrlib2.h"
#include "coord.h"

namespace rocr {
namespace Addr
{
namespace V2
{

/**
************************************************************************************************************************
* @brief GFX9 specific settings structure.
************************************************************************************************************************
*/
struct Gfx9ChipSettings
{
    struct
    {
        // Asic/Generation name
        UINT_32 isArcticIsland      : 1;
        UINT_32 isVega10            : 1;
        UINT_32 isRaven             : 1;
        UINT_32 isVega12            : 1;
        UINT_32 isVega20            : 1;
        UINT_32 reserved0           : 27;

        // Display engine IP version name
        UINT_32 isDce12             : 1;
        UINT_32 isDcn1              : 1;
        UINT_32 isDcn2              : 1;
        UINT_32 reserved1           : 29;

        // Misc configuration bits
        UINT_32 metaBaseAlignFix    : 1;
        UINT_32 depthPipeXorDisable : 1;
        UINT_32 htileAlignFix       : 1;
        UINT_32 applyAliasFix       : 1;
        UINT_32 htileCacheRbConflict: 1;
        UINT_32 reserved2           : 27;
    };
};

/**
************************************************************************************************************************
* @brief GFX9 data surface type.
************************************************************************************************************************
*/
enum Gfx9DataType
{
    Gfx9DataColor,
    Gfx9DataDepthStencil,
    Gfx9DataFmask
};

const UINT_32 Gfx9LinearSwModeMask = (1u << ADDR_SW_LINEAR);

const UINT_32 Gfx9Blk256BSwModeMask = (1u << ADDR_SW_256B_S) |
                                      (1u << ADDR_SW_256B_D) |
                                      (1u << ADDR_SW_256B_R);

const UINT_32 Gfx9Blk4KBSwModeMask = (1u << ADDR_SW_4KB_Z)   |
                                     (1u << ADDR_SW_4KB_S)   |
                                     (1u << ADDR_SW_4KB_D)   |
                                     (1u << ADDR_SW_4KB_R)   |
                                     (1u << ADDR_SW_4KB_Z_X) |
                                     (1u << ADDR_SW_4KB_S_X) |
                                     (1u << ADDR_SW_4KB_D_X) |
                                     (1u << ADDR_SW_4KB_R_X);

const UINT_32 Gfx9Blk64KBSwModeMask = (1u << ADDR_SW_64KB_Z)   |
                                      (1u << ADDR_SW_64KB_S)   |
                                      (1u << ADDR_SW_64KB_D)   |
                                      (1u << ADDR_SW_64KB_R)   |
                                      (1u << ADDR_SW_64KB_Z_T) |
                                      (1u << ADDR_SW_64KB_S_T) |
                                      (1u << ADDR_SW_64KB_D_T) |
                                      (1u << ADDR_SW_64KB_R_T) |
                                      (1u << ADDR_SW_64KB_Z_X) |
                                      (1u << ADDR_SW_64KB_S_X) |
                                      (1u << ADDR_SW_64KB_D_X) |
                                      (1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx9ZSwModeMask = (1u << ADDR_SW_4KB_Z)    |
                                (1u << ADDR_SW_64KB_Z)   |
                                (1u << ADDR_SW_64KB_Z_T) |
                                (1u << ADDR_SW_4KB_Z_X)  |
                                (1u << ADDR_SW_64KB_Z_X);

const UINT_32 Gfx9StandardSwModeMask = (1u << ADDR_SW_256B_S)   |
                                       (1u << ADDR_SW_4KB_S)    |
                                       (1u << ADDR_SW_64KB_S)   |
                                       (1u << ADDR_SW_64KB_S_T) |
                                       (1u << ADDR_SW_4KB_S_X)  |
                                       (1u << ADDR_SW_64KB_S_X);

const UINT_32 Gfx9DisplaySwModeMask = (1u << ADDR_SW_256B_D)   |
                                      (1u << ADDR_SW_4KB_D)    |
                                      (1u << ADDR_SW_64KB_D)   |
                                      (1u << ADDR_SW_64KB_D_T) |
                                      (1u << ADDR_SW_4KB_D_X)  |
                                      (1u << ADDR_SW_64KB_D_X);

const UINT_32 Gfx9RotateSwModeMask = (1u << ADDR_SW_256B_R)   |
                                     (1u << ADDR_SW_4KB_R)    |
                                     (1u << ADDR_SW_64KB_R)   |
                                     (1u << ADDR_SW_64KB_R_T) |
                                     (1u << ADDR_SW_4KB_R_X)  |
                                     (1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X)  |
                                (1u << ADDR_SW_4KB_S_X)  |
                                (1u << ADDR_SW_4KB_D_X)  |
                                (1u << ADDR_SW_4KB_R_X)  |
                                (1u << ADDR_SW_64KB_Z_X) |
                                (1u << ADDR_SW_64KB_S_X) |
                                (1u << ADDR_SW_64KB_D_X) |
                                (1u << ADDR_SW_64KB_R_X);

const UINT_32 Gfx9TSwModeMask = (1u << ADDR_SW_64KB_Z_T) |
                                (1u << ADDR_SW_64KB_S_T) |
                                (1u << ADDR_SW_64KB_D_T) |
                                (1u << ADDR_SW_64KB_R_T);

const UINT_32 Gfx9XorSwModeMask = Gfx9XSwModeMask |
                                  Gfx9TSwModeMask;

const UINT_32 Gfx9AllSwModeMask = Gfx9LinearSwModeMask   |
                                  Gfx9ZSwModeMask        |
                                  Gfx9StandardSwModeMask |
                                  Gfx9DisplaySwModeMask  |
                                  Gfx9RotateSwModeMask;

const UINT_32 Gfx9Rsrc1dSwModeMask = Gfx9LinearSwModeMask;

const UINT_32 Gfx9Rsrc2dSwModeMask = Gfx9AllSwModeMask;

const UINT_32 Gfx9Rsrc3dSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9RotateSwModeMask;

const UINT_32 Gfx9Rsrc2dPrtSwModeMask = (Gfx9Blk4KBSwModeMask | Gfx9Blk64KBSwModeMask) & ~Gfx9XSwModeMask;

const UINT_32 Gfx9Rsrc3dPrtSwModeMask = Gfx9Rsrc2dPrtSwModeMask & ~Gfx9RotateSwModeMask & ~Gfx9DisplaySwModeMask;

const UINT_32 Gfx9Rsrc3dThinSwModeMask = Gfx9DisplaySwModeMask & ~Gfx9Blk256BSwModeMask;

const UINT_32 Gfx9Rsrc3dThin4KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk4KBSwModeMask;

const UINT_32 Gfx9Rsrc3dThin64KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk64KBSwModeMask;

const UINT_32 Gfx9Rsrc3dThickSwModeMask = Gfx9Rsrc3dSwModeMask & ~(Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask);

const UINT_32 Gfx9Rsrc3dThick4KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk4KBSwModeMask;

const UINT_32 Gfx9Rsrc3dThick64KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk64KBSwModeMask;

const UINT_32 Gfx9MsaaSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9LinearSwModeMask;

const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR)   |
                                        (1u << ADDR_SW_4KB_D)    |
                                        (1u << ADDR_SW_4KB_R)    |
                                        (1u << ADDR_SW_64KB_D)   |
                                        (1u << ADDR_SW_64KB_R)   |
                                        (1u << ADDR_SW_4KB_D_X)  |
                                        (1u << ADDR_SW_4KB_R_X)  |
                                        (1u << ADDR_SW_64KB_D_X) |
                                        (1u << ADDR_SW_64KB_R_X);

const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) |
                                     (1u << ADDR_SW_256B_R) |
                                     Dce12NonBpp32SwModeMask;

const UINT_32 Dcn1NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR)   |
                                       (1u << ADDR_SW_4KB_S)    |
                                       (1u << ADDR_SW_64KB_S)   |
                                       (1u << ADDR_SW_64KB_S_T) |
                                       (1u << ADDR_SW_4KB_S_X)  |
                                       (1u << ADDR_SW_64KB_S_X);
const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D)    |
                                    (1u << ADDR_SW_64KB_D)   |
                                    (1u << ADDR_SW_64KB_D_T) |
                                    (1u << ADDR_SW_4KB_D_X)  |
                                    (1u << ADDR_SW_64KB_D_X) |
                                    Dcn1NonBpp64SwModeMask;

const UINT_32 Dcn2NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR)   |
                                       (1u << ADDR_SW_64KB_S)   |
                                       (1u << ADDR_SW_64KB_S_T) |
                                       (1u << ADDR_SW_64KB_S_X);

const UINT_32 Dcn2Bpp64SwModeMask = (1u << ADDR_SW_64KB_D)   |
                                    (1u << ADDR_SW_64KB_D_T) |
                                    (1u << ADDR_SW_64KB_D_X) |
                                    Dcn2NonBpp64SwModeMask;

/**
************************************************************************************************************************
* @brief GFX9 meta equation parameters
************************************************************************************************************************
*/
struct MetaEqParams
{
    UINT_32          maxMip;
    UINT_32          elementBytesLog2;
    UINT_32          numSamplesLog2;
    ADDR2_META_FLAGS metaFlag;
    Gfx9DataType     dataSurfaceType;
    AddrSwizzleMode  swizzleMode;
    AddrResourceType resourceType;
    UINT_32          metaBlkWidthLog2;
    UINT_32          metaBlkHeightLog2;
    UINT_32          metaBlkDepthLog2;
    UINT_32          compBlkWidthLog2;
    UINT_32          compBlkHeightLog2;
    UINT_32          compBlkDepthLog2;
};

/**
************************************************************************************************************************
* @brief This class is the GFX9 specific address library
*        function set.
************************************************************************************************************************
*/
class Gfx9Lib : public Lib
{
public:
    /// Creates Gfx9Lib object
    static Addr::Lib* CreateObj(const Client* pClient)
    {
        VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient);
        return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
    }

protected:
    Gfx9Lib(const Client* pClient);
    virtual ~Gfx9Lib();

    virtual BOOL_32 HwlIsStandardSwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return m_swizzleModeTable[swizzleMode].isStd ||
               (IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp);
    }

    virtual BOOL_32 HwlIsDisplaySwizzle(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return IsTex2d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp;
    }

    virtual BOOL_32 HwlIsThin(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return ((IsTex2d(resourceType)  == TRUE) ||
                ((IsTex3d(resourceType) == TRUE)                  &&
                 (m_swizzleModeTable[swizzleMode].isZ   == FALSE) &&
                 (m_swizzleModeTable[swizzleMode].isStd == FALSE)));
    }

    virtual BOOL_32 HwlIsThick(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const
    {
        return (IsTex3d(resourceType) &&
                (m_swizzleModeTable[swizzleMode].isZ || m_swizzleModeTable[swizzleMode].isStd));
    }

    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
        const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);

    virtual ADDR_E_RETURNCODE HwlSupportComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn);

    virtual VOID HwlComputeDccAddrFromCoord(
        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);

    virtual UINT_32 HwlGetEquationIndex(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const;

    virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const;

    virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2,
        ADDR_EQUATION* pEquation) const;

    // Get equation table pointer and number of equations
    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
    {
        *ppEquationTable = m_equationTable;

        return m_numEquations;
    }

    virtual BOOL_32 IsEquationSupported(
        AddrResourceType rsrcType,
        AddrSwizzleMode swMode,
        UINT_32 elementBytesLog2) const;

    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;

    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;

    virtual UINT_32 HwlComputeMaxBaseAlignments() const;

    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;

    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);

    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);

    virtual VOID ComputeThinBlockDimension(
        UINT_32*         pWidth,
        UINT_32*         pHeight,
        UINT_32*         pDepth,
        UINT_32          bpp,
        UINT_32          numSamples,
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode) const;

private:
    VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;

    VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
                         UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;

    VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
                         UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
                         UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;

    VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
                         UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
                         ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
                         UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
                         UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
                         UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;

    const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);

    VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
                        BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
                        UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
                        UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;

    BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        UINT_32*                                pMipmap0PaddedWidth,
        UINT_32*                                pSlice0PaddedHeight,
        ADDR2_MIP_INFO*                         pMipInfo = NULL) const;

    static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
    {
        ADDR2_BLOCK_SET allowedBlockSet = {};

        allowedBlockSet.micro  = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE;
        allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask)  ? TRUE : FALSE;

        if (rsrcType == ADDR_RSRC_TEX_3D)
        {
            allowedBlockSet.macroThin4KB   = (allowedSwModeSet.value & Gfx9Rsrc3dThin4KBSwModeMask)   ? TRUE : FALSE;
            allowedBlockSet.macroThick4KB  = (allowedSwModeSet.value & Gfx9Rsrc3dThick4KBSwModeMask)  ? TRUE : FALSE;
            allowedBlockSet.macroThin64KB  = (allowedSwModeSet.value & Gfx9Rsrc3dThin64KBSwModeMask)  ? TRUE : FALSE;
            allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx9Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
        }
        else
        {
            allowedBlockSet.macroThin4KB  = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask)  ? TRUE : FALSE;
            allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE;
        }

        return allowedBlockSet;
    }

    static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
    {
        ADDR2_SWTYPE_SET allowedSwSet = {};

        allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask)        ? TRUE : FALSE;
        allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE;
        allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask)  ? TRUE : FALSE;
        allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask)   ? TRUE : FALSE;

        return allowedSwSet;
    }

    BOOL_32 IsInMipTail(
        AddrResourceType  resourceType,
        AddrSwizzleMode   swizzleMode,
        Dim3d             mipTailDim,
        UINT_32           width,
        UINT_32           height,
        UINT_32           depth) const
    {
        BOOL_32 inTail = ((width <= mipTailDim.w) &&
                          (height <= mipTailDim.h) &&
                          (IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));

        return inTail;
    }

    BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
    BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;

    UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const
    {
        UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);

        // Bank xor bits
        UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);

        return bankBits;
    }

    UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
    {
        UINT_32 baseAlign;

        if (IsXor(swizzleMode))
        {
            baseAlign = GetBlockSize(swizzleMode);
        }
        else
        {
            baseAlign = 256;
        }

        return baseAlign;
    }

    // Initialize equation table
    VOID InitEquationTable();

    ADDR_E_RETURNCODE ComputeStereoInfo(
        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
        UINT_32*                                pHeightAlign) const;

    UINT_32 GetMipChainInfo(
        AddrResourceType  resourceType,
        AddrSwizzleMode   swizzleMode,
        UINT_32           bpp,
        UINT_32           mip0Width,
        UINT_32           mip0Height,
        UINT_32           mip0Depth,
        UINT_32           blockWidth,
        UINT_32           blockHeight,
        UINT_32           blockDepth,
        UINT_32           numMipLevel,
        ADDR2_MIP_INFO*   pMipInfo) const;

    VOID GetMetaMiptailInfo(
        ADDR2_META_MIP_INFO*    pInfo,
        Dim3d                   mipCoord,
        UINT_32                 numMipInTail,
        Dim3d*                  pMetaBlkDim) const;

    Dim3d GetMipStartPos(
        AddrResourceType  resourceType,
        AddrSwizzleMode   swizzleMode,
        UINT_32           width,
        UINT_32           height,
        UINT_32           depth,
        UINT_32           blockWidth,
        UINT_32           blockHeight,
        UINT_32           blockDepth,
        UINT_32           mipId,
        UINT_32           log2ElementBytes,
        UINT_32*          pMipTailBytesOffset) const;

    AddrMajorMode GetMajorMode(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          mip0WidthInBlk,
        UINT_32          mip0HeightInBlk,
        UINT_32          mip0DepthInBlk) const
    {
        BOOL_32 yMajor = (mip0WidthInBlk < mip0HeightInBlk);
        BOOL_32 xMajor = (yMajor == FALSE);

        if (IsThick(resourceType, swizzleMode))
        {
            yMajor = yMajor && (mip0HeightInBlk >= mip0DepthInBlk);
            xMajor = xMajor && (mip0WidthInBlk >= mip0DepthInBlk);
        }

        AddrMajorMode majorMode;
        if (xMajor)
        {
            majorMode = ADDR_MAJOR_X;
        }
        else if (yMajor)
        {
            majorMode = ADDR_MAJOR_Y;
        }
        else
        {
            majorMode = ADDR_MAJOR_Z;
        }

        return majorMode;
    }

    Dim3d GetDccCompressBlk(
        AddrResourceType resourceType,
        AddrSwizzleMode  swizzleMode,
        UINT_32          bpp) const
    {
        UINT_32 index = Log2(bpp >> 3);
        Dim3d   compressBlkDim;

        if (IsThin(resourceType, swizzleMode))
        {
            compressBlkDim.w = Block256_2d[index].w;
            compressBlkDim.h = Block256_2d[index].h;
            compressBlkDim.d = 1;
        }
        else if (IsStandardSwizzle(resourceType, swizzleMode))
        {
            compressBlkDim = Block256_3dS[index];
        }
        else
        {
            compressBlkDim = Block256_3dZ[index];
        }

        return compressBlkDim;
    }

    static const UINT_32 MaxSeLog2      = 3;
    static const UINT_32 MaxRbPerSeLog2 = 2;

    static const Dim3d   Block256_3dS[MaxNumOfBpp];
    static const Dim3d   Block256_3dZ[MaxNumOfBpp];

    static const UINT_32 MipTailOffset256B[];

    static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];

    static const UINT_32 MaxCachedMetaEq = 2;

    Gfx9ChipSettings m_settings;

    CoordEq      m_cachedMetaEq[MaxCachedMetaEq];
    MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq];
    UINT_32      m_metaEqOverrideIndex;
};

} // V2
} // Addr
} // namespace rocr
#endif


================================================
FILE: runtime/hsa-runtime/image/blit_kernel.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "blit_kernel.h"

#if (defined(WIN32) || defined(_WIN32))
#define NOMINMAX
#endif

#include <algorithm>
#include <atomic>
#include <sstream>
#include <string>

#include "image_manager.h"
#include "image_runtime.h"
#include "util.h"

#undef HSA_ARGUMENT_ALIGN_BYTES
#define HSA_ARGUMENT_ALIGN_BYTES 16

#include "core/inc/hsa_internal.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "core/inc/hsa_table_interface.h"

namespace rocr {
namespace image {

extern uint8_t blit_object_gfx7xx[14608];
extern uint8_t blit_object_gfx8xx[15424];
extern uint8_t blit_object_gfx9xx[15432];

extern uint8_t ocl_blit_object_gfx700[];
extern uint8_t ocl_blit_object_gfx701[];
extern uint8_t ocl_blit_object_gfx702[];
extern uint8_t ocl_blit_object_gfx801[];
extern uint8_t ocl_blit_object_gfx802[];
extern uint8_t ocl_blit_object_gfx803[];
extern uint8_t ocl_blit_object_gfx805[];
extern uint8_t ocl_blit_object_gfx810[];
extern uint8_t ocl_blit_object_gfx900[];
extern uint8_t ocl_blit_object_gfx902[];
extern uint8_t ocl_blit_object_gfx904[];
extern uint8_t ocl_blit_object_gfx906[];
extern uint8_t ocl_blit_object_gfx908[];
extern uint8_t ocl_blit_object_gfx909[];
extern uint8_t ocl_blit_object_gfx90a[];
extern uint8_t ocl_blit_object_gfx90c[];
extern uint8_t ocl_blit_object_gfx942[];
extern uint8_t ocl_blit_object_gfx950[];
extern uint8_t ocl_blit_object_gfx1010[];
extern uint8_t ocl_blit_object_gfx1011[];
extern uint8_t ocl_blit_object_gfx1012[];
extern uint8_t ocl_blit_object_gfx1013[];
extern uint8_t ocl_blit_object_gfx1030[];
extern uint8_t ocl_blit_object_gfx1031[];
extern uint8_t ocl_blit_object_gfx1032[];
extern uint8_t ocl_blit_object_gfx1033[];
extern uint8_t ocl_blit_object_gfx1034[];
extern uint8_t ocl_blit_object_gfx1035[];
extern uint8_t ocl_blit_object_gfx1036[];
extern uint8_t ocl_blit_object_gfx1100[];
extern uint8_t ocl_blit_object_gfx1101[];
extern uint8_t ocl_blit_object_gfx1102[];
extern uint8_t ocl_blit_object_gfx1103[];
extern uint8_t ocl_blit_object_gfx1150[];
extern uint8_t ocl_blit_object_gfx1151[];
extern uint8_t ocl_blit_object_gfx1152[];
extern uint8_t ocl_blit_object_gfx1153[];
extern uint8_t ocl_blit_object_gfx1200[];
extern uint8_t ocl_blit_object_gfx1201[];

// Arguments inserted by OCL compiler, all zero here.
struct OCLHiddenArgs {
  uint64_t offset_x;
  uint64_t offset_y;
  uint64_t offset_z;
  void* printf_buffer;
  void* enqueue;
  void* enqueue2;
  void* multi_grid;
};

static void* Allocate(hsa_agent_t agent, size_t size) {
  //use the host accessible kernarg pool
  hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();

  void* ptr = NULL;

  hsa_status_t status = AMD::hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
  assert(status == HSA_STATUS_SUCCESS);

  if (status != HSA_STATUS_SUCCESS) return NULL;

  status = AMD::hsa_amd_agents_allow_access(1, &agent, NULL, ptr);
  assert(status == HSA_STATUS_SUCCESS);

  if (status != HSA_STATUS_SUCCESS) {
    AMD::hsa_amd_memory_pool_free(ptr);
    return NULL;
  }
  return ptr;
}

BlitKernel::BlitKernel() {
}

BlitKernel::~BlitKernel() {}

hsa_status_t BlitKernel::Initialize() { return HSA_STATUS_SUCCESS; }

hsa_status_t BlitKernel::Cleanup() {

  for (std::pair<const uint64_t, hsa_executable_t> pair :
       code_executable_map_) {
    HSA::hsa_executable_destroy(pair.second);
  }

  code_executable_map_.clear();

  code_object_map_.clear();

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::BuildBlitCode(
    hsa_agent_t agent, std::vector<BlitCodeInfo>& blit_code_catalog) {
  // Find existing kernels in the list that have compatible ISA.
  hsa_isa_t agent_isa = {0};
  hsa_status_t status = HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  std::lock_guard<std::mutex> lock(lock_);

  for (std::pair<uint64_t, hsa_executable_t> pair : code_executable_map_) {
    bool isa_compatible = false;
    hsa_isa_t code_isa = {pair.first};

    status = HSA::hsa_isa_compatible(code_isa, agent_isa, &isa_compatible);
    if (HSA_STATUS_SUCCESS != status) {
      return status;
    }

    if (isa_compatible) {
      return PopulateKernelCode(agent, pair.second, blit_code_catalog);
    }
  }

  // No existing compatible kernels. Build new kernels.
  hsa_code_object_t code_object = {0};

  // Get the target name
  char agent_name[64] = {0};
  status = HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, &agent_name);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  // Get the patched code object
  uint8_t* patched_code_object;
  status = BlitKernel::GetPatchedBlitObject(agent_name, &patched_code_object);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  // Pass the patched code object
  code_object.handle = reinterpret_cast<uint64_t>(patched_code_object);

  code_object_map_[agent_isa.handle] = code_object;

  // Create executable.
  hsa_executable_t executable = {0};
  status =
      HSA::hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  code_executable_map_[agent_isa.handle] = executable;

  // Load code object.
  status = HSA::hsa_executable_load_code_object(executable, agent, code_object, "");
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  // Freeze executable.
  status = HSA::hsa_executable_freeze(executable, "");
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  return PopulateKernelCode(agent, executable, blit_code_catalog);
}

hsa_status_t BlitKernel::CopyBufferToImage(
    BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
    const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
    const Image& dst_image, const hsa_ext_image_region_t& image_region) {
  if (dst_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    ImageManager* manager = ImageRuntime::instance()->image_manager(dst_image.component);

    const uint32_t element_size =
        manager->GetImageProperty(dst_image.component, dst_image.desc.format,
                                  dst_image.desc.geometry).element_size;

    const size_t dst_origin = image_region.offset.x * element_size;
    char* dst_memory = reinterpret_cast<char*>(dst_image.data) + dst_origin;
    const size_t size = image_region.range.x * element_size;

    return HSA::hsa_memory_copy(dst_memory, src_memory, size);
  }

  const Image* dst_image_view = NULL;

  hsa_status_t status = ConvertImage(dst_image, &dst_image_view);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  assert(dst_image_view != NULL);

  hsa_kernel_dispatch_packet_t packet = { };

  const BlitCodeInfo& blit_code =
      blit_code_catalog.at(KERNEL_OP_COPY_BUFFER_TO_IMAGE);
  packet.kernel_object = blit_code.code_handle_;
  packet.group_segment_size = blit_code.group_segment_size_;
  packet.private_segment_size = blit_code.private_segment_size_;

  // Setup kernel argument.
  /*
  buffer is start of output pixel in destination buffer
  format.x is element count
  format.y is element size
  format.z is max(dword per pixel, 1)
  format.w is texture type.
  pixelOrigin is start pixel address.
  */
  struct KernelArgs {
    const void* buffer;
    uint64_t image[5];
    int32_t pixelOrigin[4];
    uint32_t format[4];
    uint64_t pitch;
    uint64_t slice_pitch;
    OCLHiddenArgs ocl;
  };

  KernelArgs* args = (KernelArgs*)Allocate(dst_image_view->component, sizeof(KernelArgs));
  assert(args != NULL);
  memset(args, 0, sizeof(KernelArgs));
  args->buffer = src_memory;
  for(auto& img : args->image)
    img = dst_image_view->Convert();
  args->pixelOrigin[0] = image_region.offset.x;
  args->pixelOrigin[1] = image_region.offset.y;
  args->pixelOrigin[2] = image_region.offset.z;

  ImageManager* manager = ImageRuntime::instance()->image_manager(dst_image_view->component);

  const uint32_t element_size =
      manager->GetImageProperty(dst_image_view->component,
                                dst_image_view->desc.format,
                                dst_image_view->desc.geometry).element_size;

  // Try to minimize the read operation to buffer by reading the buffer
  // up to one DWORD at a time.
  uint32_t buffer_read_count = element_size / sizeof(uint32_t);
  buffer_read_count = (buffer_read_count == 0) ? 1 : buffer_read_count;

  const uint32_t num_channel = GetNumChannel(*dst_image_view);
  const uint32_t size_per_channel = element_size / num_channel;

  args->format[0] = num_channel;
  args->format[1] = size_per_channel;
  args->format[2] = buffer_read_count;
  args->format[3] = dst_image_view->desc.geometry;

  unsigned long buffer_pitch[2] = {0, 0};
  CalcBufferRowSlicePitchesInPixel(dst_image_view->desc.geometry, element_size,
                                   image_region.range, src_row_pitch,
                                   src_slice_pitch, buffer_pitch);

  args->pitch = buffer_pitch[0];
  args->slice_pitch = buffer_pitch[1];

  packet.kernarg_address = args;

  // Setup packet dimension and working size.
  CalcWorkingSize(*dst_image_view, image_region.range, packet);

  status = LaunchKernel(blit_queue, packet);

  if (&dst_image != dst_image_view) {
    Image::Destroy(dst_image_view);
  }
  AMD::hsa_amd_memory_pool_free(args);

  return status;
}

hsa_status_t BlitKernel::CopyImageToBuffer(
    BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
    const Image& src_image, void* dst_memory, size_t dst_row_pitch,
    size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region) {
  if (src_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    ImageManager* manager = ImageRuntime::instance()->image_manager(src_image.component);

    const uint32_t element_size =
        manager->GetImageProperty(src_image.component, src_image.desc.format,
                                  src_image.desc.geometry).element_size;

    const size_t src_origin = image_region.offset.x * element_size;
    const char* src_memory =
        reinterpret_cast<const char*>(src_image.data) + src_origin;
    const size_t size = image_region.range.x * element_size;

    return HSA::hsa_memory_copy(dst_memory, src_memory, size);
  }

  const Image* src_image_view = NULL;

  hsa_status_t status = ConvertImage(src_image, &src_image_view);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }

  assert(src_image_view != NULL);

  hsa_kernel_dispatch_packet_t packet = { };

  const BlitCodeInfo& blit_code =
      blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_TO_BUFFER);
  packet.kernel_object = blit_code.code_handle_;
  packet.group_segment_size = blit_code.group_segment_size_;
  packet.private_segment_size = blit_code.private_segment_size_;

  // Setup kernel argument.
  /*
  buffer is start of output pixel in destination buffer
  format.x is element count
  format.y is element size
  format.z is max(dword per pixel, 1)
  format.w is texture type.
  pixelOrigin is start pixel address.
  */
  struct KernelArgs {
    uint64_t image[5];
    void* buffer;
    int32_t pixelOrigin[4];
    uint32_t format[4];
    uint64_t pitch;
    uint64_t slice_pitch;
    OCLHiddenArgs ocl;
  };

  KernelArgs* args = (KernelArgs*)Allocate(src_image_view->component, sizeof(KernelArgs));
  assert(args != NULL);
  memset(args, 0, sizeof(KernelArgs));
  for(auto &img : args->image)
    img = src_image_view->Convert();
  args->buffer = dst_memory;
  args->pixelOrigin[0] = image_region.offset.x;
  args->pixelOrigin[1] = image_region.offset.y;
  args->pixelOrigin[2] = image_region.offset.z;

  ImageManager* manager = ImageRuntime::instance()->image_manager(src_image_view->component);

  const uint32_t element_size =
      manager->GetImageProperty(src_image_view->component,
                                src_image_view->desc.format,
                                src_image_view->desc.geometry).element_size;

  // Try to minimize the write operation to buffer by reading the buffer
  // up to one DWORD at a time.
  uint32_t buffer_write_count = element_size / sizeof(uint32_t);
  buffer_write_count = (buffer_write_count == 0) ? 1 : buffer_write_count;

  const uint32_t num_channel = GetNumChannel(*src_image_view);
  const uint32_t size_per_channel = element_size / num_channel;

  args->format[0] = num_channel;
  args->format[1] = size_per_channel;
  args->format[2] = buffer_write_count;
  args->format[3] = src_image_view->desc.geometry;

  unsigned long buffer_pitch[2] = {0, 0};
  CalcBufferRowSlicePitchesInPixel(src_image_view->desc.geometry, element_size,
                                   image_region.range, dst_row_pitch,
                                   dst_slice_pitch, buffer_pitch);

  args->pitch = buffer_pitch[0];
  args->slice_pitch = buffer_pitch[1];

  packet.kernarg_address = args;

  // Setup packet dimension and working size.
  CalcWorkingSize(*src_image_view, image_region.range, packet);

  status = LaunchKernel(blit_queue, packet);

  if (&src_image != src_image_view) {
    Image::Destroy(src_image_view);
  }
  AMD::hsa_amd_memory_pool_free(args);

  return status;
}

hsa_status_t BlitKernel::CopyImage(
    BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
    const Image& dst_image, const Image& src_image,
    const hsa_dim3_t& dst_origin, const hsa_dim3_t& src_origin,
    const hsa_dim3_t size, KernelOp copy_type) {
  assert(src_image.component.handle == dst_image.component.handle);

  const Image* src_image_view = &src_image;
  const Image* dst_image_view = &dst_image;
  const BlitCodeInfo* blit_code = NULL;

  if (copy_type == KERNEL_OP_COPY_IMAGE_DEFAULT) {
    // Linear to linear image copy.

    hsa_status_t status = ConvertImage(src_image, &src_image_view);
    if (HSA_STATUS_SUCCESS != status) {
      return status;
    }

    assert(src_image_view != NULL);

    status = ConvertImage(dst_image, &dst_image_view);
    if (HSA_STATUS_SUCCESS != status) {
      return status;
    }

    assert(dst_image_view != NULL);

    const hsa_ext_image_geometry_t src_geometry = src_image_view->desc.geometry;
    const hsa_ext_image_geometry_t dst_geometry = dst_image_view->desc.geometry;

    if (src_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
        dst_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB) {
      blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_DEFAULT);
    } else if (src_geometry == HSA_EXT_IMAGE_GEOMETRY_1DB &&
               dst_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB) {
      blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_1DB_TO_REG);
    } else if (src_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
               dst_geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
      blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_REG_TO_1DB);
    } else {
      blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_1DB);
    }
  } else {
    blit_code = &blit_code_catalog.at(copy_type);
  }

  hsa_kernel_dispatch_packet_t packet = { };

  packet.kernel_object = blit_code->code_handle_;
  packet.group_segment_size = blit_code->group_segment_size_;
  packet.private_segment_size = blit_code->private_segment_size_;

  // Setup kernel argument.
  struct KernelArgs {
    uint64_t src[5];
    uint64_t dst[5];
    int32_t srcOrigin[4];
    int32_t dstOrigin[4];
    int32_t srcFormat;
    int32_t dstFormat;
    OCLHiddenArgs ocl;
  };

  KernelArgs* args = (KernelArgs*)Allocate(dst_image_view->component, sizeof(KernelArgs));
  assert(args != NULL);
  memset(args, 0, sizeof(KernelArgs));

  for(auto& img : args->src)
    img = src_image_view->Convert();
  args->srcFormat = src_image_view->desc.geometry;
  args->srcOrigin[0] = src_origin.x;
  args->srcOrigin[1] = src_origin.y;
  args->srcOrigin[2] = src_origin.z;

  for(auto& img : args->dst)
    img = dst_image_view->Convert();
  args->dstFormat = dst_image_view->desc.geometry;
  args->dstOrigin[0] = dst_origin.x;
  args->dstOrigin[1] = dst_origin.y;
  args->dstOrigin[2] = dst_origin.z;

  packet.kernarg_address = args;

  // Setup packet dimension and working size.
  CalcWorkingSize(*src_image_view, *dst_image_view, size, packet);

  hsa_status_t status = LaunchKernel(blit_queue, packet);

  if (&src_image != src_image_view) {
    Image::Destroy(src_image_view);
  }

  if (&dst_image != dst_image_view) {
    Image::Destroy(dst_image_view);
  }

  AMD::hsa_amd_memory_pool_free(args);

  return status;
}

hsa_status_t BlitKernel::FillImage(
    BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
    const Image& image, const void* pattern,
    const hsa_ext_image_region_t& region) {
  hsa_kernel_dispatch_packet_t packet = { };

  const BlitCodeInfo& blit_code =
      (image.desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)
          ? blit_code_catalog.at(KERNEL_OP_CLEAR_IMAGE)
          : blit_code_catalog.at(KERNEL_OP_CLEAR_IMAGE_1DB);
  packet.kernel_object = blit_code.code_handle_;
  packet.group_segment_size = blit_code.group_segment_size_;
  packet.private_segment_size = blit_code.private_segment_size_;

  // Setup kernel argument.
  struct KernelArgs {
    uint64_t image[5];
    int32_t format;
    uint32_t type;
    uint32_t data[4];
    int32_t origin[4];
    OCLHiddenArgs ocl;
  };

  KernelArgs* args = (KernelArgs*)Allocate(image.component, sizeof(KernelArgs));
  assert(args != NULL);
  memset(args, 0, sizeof(KernelArgs));

  for(auto &img : args->image)
    img = image.Convert();
  args->format = image.desc.geometry;
  for(int i=0; i<4; i++)
    args->data[i] = ((const uint32_t*)pattern)[i];
  args->origin[0] = region.offset.x;
  args->origin[1] = region.offset.y;
  args->origin[2] = region.offset.z;
  args->type = GetImageAccessType(image);

  packet.kernarg_address = args;

  // Setup packet dimension and working size.
  CalcWorkingSize(image, region.range, packet);

  hsa_status_t status = LaunchKernel(blit_queue, packet);

  AMD::hsa_amd_memory_pool_free(args);

  return status;
}

const char *BlitKernel::kernel_name_[KERNEL_OP_COUNT] = {
      "&__copy_image_to_buffer_kernel",
      "&__copy_buffer_to_image_kernel",
      "&__copy_image_default_kernel",
      "&__copy_image_linear_to_standard_kernel",
      "&__copy_image_standard_to_linear_kernel",
      "&__copy_image_1db_kernel",
      "&__copy_image_1db_to_reg_kernel",
      "&__copy_image_reg_to_1db_kernel",
      "&__clear_image_kernel",
      "&__clear_image_1db_kernel"};

const char *BlitKernel::ocl_kernel_name_[KERNEL_OP_COUNT] = {
      "copy_image_to_buffer.kd",
      "copy_buffer_to_image.kd",
      "copy_image_default.kd",
      "copy_image_linear_to_standard.kd",
      "copy_image_standard_to_linear.kd",
      "copy_image_1db.kd",
      "copy_image_1db_to_reg.kd",
      "copy_image_reg_to_1db.kd",
      "clear_image.kd",
      "clear_image_1db.kd"};

hsa_status_t BlitKernel::PopulateKernelCode(
    hsa_agent_t agent, hsa_executable_t executable,
    std::vector<BlitCodeInfo>& blit_code_catalog) {
  blit_code_catalog.clear();

  for (int i = 0; i < KERNEL_OP_COUNT; ++i) {
    // Get symbol handle.
    hsa_executable_symbol_t kernel_symbol = {0};

    hsa_status_t status = HSA::hsa_executable_get_symbol_by_name(executable, ocl_kernel_name_[i],
                                                                 &agent, &kernel_symbol);
    if (HSA_STATUS_SUCCESS != status) {
      blit_code_catalog.clear();
      return status;
    }

    // Get code handle.
    BlitCodeInfo blit_code = {0};
    status = HSA::hsa_executable_symbol_get_info(
        kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &blit_code.code_handle_);
    if (HSA_STATUS_SUCCESS != status) {
      blit_code_catalog.clear();
      return status;
    }

    status = HSA::hsa_executable_symbol_get_info(
        kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
        &blit_code.group_segment_size_);
    if (HSA_STATUS_SUCCESS != status) {
      blit_code_catalog.clear();
      return status;
    }

    status = HSA::hsa_executable_symbol_get_info(
        kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
        &blit_code.private_segment_size_);
    if (HSA_STATUS_SUCCESS != status) {
      blit_code_catalog.clear();
      return status;
    }

    blit_code_catalog.push_back(blit_code);
  }

  assert(blit_code_catalog.size() == KERNEL_OP_COUNT);
  return HSA_STATUS_SUCCESS;
}

void BlitKernel::CalcBufferRowSlicePitchesInPixel(
    hsa_ext_image_geometry_t geometry, uint32_t element_size,
    const hsa_dim3_t& copy_size, size_t in_row_pitch_byte,
    size_t in_slice_pitch_byte, unsigned long* out_pitch_pixel) {
  const bool is_1d_array =
      (geometry == HSA_EXT_IMAGE_GEOMETRY_1DA) ? true : false;

  out_pitch_pixel[0] =
      std::max(static_cast<unsigned long>(copy_size.x),
               static_cast<unsigned long>(in_row_pitch_byte / element_size));

  out_pitch_pixel[1] =
      (is_1d_array)
          ? out_pitch_pixel[0]
          : (std::max(
                static_cast<unsigned long>(out_pitch_pixel[0] * copy_size.y),
                static_cast<unsigned long>(in_slice_pitch_byte /
                                           element_size)));

  assert((out_pitch_pixel[0] <= out_pitch_pixel[1]));
}

uint32_t BlitKernel::GetDimSize(const Image& image) {
  static const uint32_t kDimSizeTable[] = {
      1,  // HSA_EXT_IMAGE_GEOMETRY_1D
      2,  // HSA_EXT_IMAGE_GEOMETRY_2D
      3,  // HSA_EXT_IMAGE_GEOMETRY_3D
      2,  // HSA_EXT_IMAGE_GEOMETRY_1DA
      3,  // HSA_EXT_IMAGE_GEOMETRY_2DA
      1,  // HSA_EXT_IMAGE_GEOMETRY_1DB
      2,  // HSA_EXT_IMAGE_GEOMETRY_2DDEPTH
      3,  // HSA_EXT_IMAGE_GEOMETRY_2DADEPTH
  };

  return kDimSizeTable[image.desc.geometry];
}

uint32_t BlitKernel::GetNumChannel(const Image& image) {
  static const uint32_t kNumChannelTable[] = {
      1,  // HSA_EXT_IMAGE_CHANNEL_ORDER_A,
      1,  // HSA_EXT_IMAGE_CHANNEL_ORDER_R,
      1,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RX,
      2,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RG,
      2,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX,
      2,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RA,
      3,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGB,
      3,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX,
      4,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,
      4,  // HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA,
      4,  // HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB,
      4,  // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR,
      3,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB,
      3,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX,
      4,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA,
      4,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA,
      1,  // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY,
      1,  // HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE,
      1,  // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH,
      1,  // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
  };

  return kNumChannelTable[image.desc.format.channel_order];
}

uint32_t BlitKernel::GetImageAccessType(const Image& image) {
  enum AccessType {
    ACCESS_TYPE_F = 0,
    ACCESS_TYPE_I = 1,
    ACCESS_TYPE_UI = 2,
  };

  static const uint32_t kAccessType[] = {
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010
      ACCESS_TYPE_I,   // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8
      ACCESS_TYPE_I,   // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16
      ACCESS_TYPE_I,   // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32
      ACCESS_TYPE_UI,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8
      ACCESS_TYPE_UI,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16
      ACCESS_TYPE_UI,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32
      ACCESS_TYPE_F,   // HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT
      ACCESS_TYPE_F    // HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT
  };

  return kAccessType[image.desc.format.channel_type];
}

void BlitKernel::CalcWorkingSize(const Image& image, const hsa_dim3_t& range,
                                 hsa_kernel_dispatch_packet_t& packet) {
  switch (image.desc.geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
      packet.setup = 2;
      packet.grid_size_x = range.x;
      packet.grid_size_y = range.y;
      packet.grid_size_z = 1;
      packet.workgroup_size_x = 64;
      packet.workgroup_size_y = packet.workgroup_size_z = 1;
      break;
    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
      packet.setup = 3;
      packet.grid_size_x = range.x;
      packet.grid_size_y = range.y;
      packet.grid_size_z = range.z;
      packet.workgroup_size_x = packet.workgroup_size_y = 8;
      packet.workgroup_size_z = 1;
      break;
    case HSA_EXT_IMAGE_GEOMETRY_3D:
      packet.setup = 3;
      packet.grid_size_x = range.x;
      packet.grid_size_y = range.y;
      packet.grid_size_z = range.z;
      packet.workgroup_size_x = packet.workgroup_size_y = 4;
      packet.workgroup_size_z = 4;
      break;
  }
}

void BlitKernel::CalcWorkingSize(const Image& src_image, const Image& dst_image,
                                 const hsa_dim3_t& range,
                                 hsa_kernel_dispatch_packet_t& packet) {
  if (GetDimSize(src_image) < GetDimSize(dst_image)) {
    CalcWorkingSize(src_image, range, packet);
  } else {
    CalcWorkingSize(dst_image, range, packet);
  }
}

hsa_status_t BlitKernel::ConvertImage(const Image& original_image,
                                      const Image** new_image) {
  // To simplify the kernel, some particular image channel types are converted
  // to a new channel type, while preserving the actual per pixel size.
  // E.g.: a UNORM SIGNED INT8 is converted into UNSIGNED INT8. This way the
  // kernel can just use read_imageui on all images.

  static const uint32_t kTypeConvertTable[] = {
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8,  // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,  // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8,  // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,  // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32,  // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32,  // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,  // HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32  // HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT
  };

  // To simplify the kernel, some particular image channel orders are converted
  // to a new channel order, while preserving the actual per pixel size.
  // E.g.: a CHANNEL ORDER A is converted into CHANNEL ORDER R. This way the
  // kernel can just read the first components of vector4 on all images.
  static const uint32_t kOrderConvertTable[] = {
      HSA_EXT_IMAGE_CHANNEL_ORDER_R,     // HSA_EXT_IMAGE_CHANNEL_ORDER_A
      HSA_EXT_IMAGE_CHANNEL_ORDER_R,     // HSA_EXT_IMAGE_CHANNEL_ORDER_R
      HSA_EXT_IMAGE_CHANNEL_ORDER_R,     // HSA_EXT_IMAGE_CHANNEL_ORDER_RX
      HSA_EXT_IMAGE_CHANNEL_ORDER_RG,    // HSA_EXT_IMAGE_CHANNEL_ORDER_RG
      HSA_EXT_IMAGE_CHANNEL_ORDER_RG,    // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX
      HSA_EXT_IMAGE_CHANNEL_ORDER_RG,    // HSA_EXT_IMAGE_CHANNEL_ORDER_RA
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGB,   // HSA_EXT_IMAGE_CHANNEL_ORDER_RGB
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGB,   // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA
      HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,  // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA
      HSA_EXT_IMAGE_CHANNEL_ORDER_R,  // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY
      HSA_EXT_IMAGE_CHANNEL_ORDER_R,  // HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE
      HSA_EXT_IMAGE_CHANNEL_ORDER_R,  // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH
      HSA_EXT_IMAGE_CHANNEL_ORDER_RG  // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
  };

  const uint32_t current_type = original_image.desc.format.channel_type;
  uint32_t converted_type = kTypeConvertTable[current_type];
  const uint32_t current_order = original_image.desc.format.channel_order;
  uint32_t converted_order = kOrderConvertTable[current_order];

  if ((current_type == converted_type) && (current_order == converted_order)) {
    *new_image = &original_image;
    return HSA_STATUS_SUCCESS;
  }

  // Handle formats that drop channels on conversion, only usable with RGB(X)
  if((current_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555) ||
     (current_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565) ||
     (current_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010)) {
    converted_order = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
  }

  // For internal book keeping, depth isn't a HW type.
  const hsa_ext_image_geometry_t current_geometry =
      original_image.desc.geometry;
  hsa_ext_image_geometry_t converted_geometry = current_geometry;
  if (converted_geometry == HSA_EXT_IMAGE_GEOMETRY_2DDEPTH) {
    converted_geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
  } else if (converted_geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH) {
    converted_geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
  }

  hsa_ext_image_format_t new_format = {
      static_cast<hsa_ext_image_channel_type_t>(converted_type),
      static_cast<hsa_ext_image_channel_order_t>(converted_order)};

  Image* new_image_handle = Image::Create(original_image.component);
  *new_image_handle=original_image;
  new_image_handle->desc.geometry = converted_geometry;

  hsa_status_t status = ImageRuntime::instance()
                            ->image_manager(new_image_handle->component)
                            ->ModifyImageSrd(*new_image_handle, new_format);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  *new_image = new_image_handle;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::LaunchKernel(BlitQueue& blit_queue,
                                      hsa_kernel_dispatch_packet_t& packet) {
  static const uint16_t kInvalidPacketHeader = HSA_PACKET_TYPE_INVALID;

  static const uint16_t kDispatchPacketHeader =
      (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
      (0 << HSA_PACKET_HEADER_BARRIER) |
      (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
      (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);

  // Copying the packet content to the queue buffer is not atomic, so it is
  // possible that the packet has a valid packet type but invalid content.
  // To make sure packet processor does not read invalid packet, we first
  // initialized the packet type to invalid.
  packet.header = kInvalidPacketHeader;

  // Setup completion signal.
  hsa_signal_t kernel_signal = {0};
  hsa_status_t status = HSA::hsa_signal_create(1, 0, NULL, &kernel_signal);
  if (HSA_STATUS_SUCCESS != status) {
    return status;
  }
  packet.completion_signal = kernel_signal;

  // Populate the queue.
  hsa_queue_t* queue = blit_queue.queue_;
  const uint32_t bitmask = queue->size - 1;

  // Reserve write index.
  uint64_t write_index = HSA::hsa_queue_add_write_index_scacq_screl(queue, 1);

  while (true) {
    // Wait until we have room in the queue;
    const uint64_t read_index = HSA::hsa_queue_load_read_index_relaxed(queue);
    if ((write_index - read_index) < queue->size) {
      break;
    }
  }

  // Populate queue buffer with AQL packet.
  hsa_kernel_dispatch_packet_t* queue_buffer =
      reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address);
  queue_buffer[write_index & bitmask] = packet;

  std::atomic_thread_fence(std::memory_order_release);

  // Enable packet.
  queue_buffer[write_index & bitmask].header = kDispatchPacketHeader;

  // Update doorbel register.
  HSA::hsa_signal_store_screlease(queue->doorbell_signal, write_index);

  // Wait for the packet to finish.
  if (HSA::hsa_signal_wait_scacquire(kernel_signal, HSA_SIGNAL_CONDITION_LT, 1, uint64_t(-1),
                                     HSA_WAIT_STATE_ACTIVE) != 0) {
    status = HSA::hsa_signal_destroy(kernel_signal);
    assert(status == HSA_STATUS_SUCCESS);
    // Signal wait returned unexpected value.
    return HSA_STATUS_ERROR;
  }

  // Cleanup
  status = HSA::hsa_signal_destroy(kernel_signal);
  assert(status == HSA_STATUS_SUCCESS);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t BlitKernel::GetPatchedBlitObject(const char* agent_name,
                                              uint8_t** blit_code_object) {
  std::string sname(agent_name);

  if (sname == "gfx700") {
    *blit_code_object = ocl_blit_object_gfx700;
  } else if (sname == "gfx701") {
    *blit_code_object = ocl_blit_object_gfx701;
  } else if (sname == "gfx702") {
    *blit_code_object = ocl_blit_object_gfx702;
  } else if (sname == "gfx801") {
    *blit_code_object = ocl_blit_object_gfx801;
  } else if (sname == "gfx802") {
    *blit_code_object = ocl_blit_object_gfx802;
  } else if (sname == "gfx803") {
    *blit_code_object = ocl_blit_object_gfx803;
  } else if (sname == "gfx805") {
    *blit_code_object = ocl_blit_object_gfx805;
  } else if (sname == "gfx810") {
    *blit_code_object = ocl_blit_object_gfx810;
  } else if (sname == "gfx900") {
    *blit_code_object = ocl_blit_object_gfx900;
  } else if (sname == "gfx902") {
    *blit_code_object = ocl_blit_object_gfx902;
  } else if (sname == "gfx904") {
    *blit_code_object = ocl_blit_object_gfx904;
  } else if (sname == "gfx906") {
    *blit_code_object = ocl_blit_object_gfx906;
  } else if (sname == "gfx908") {
    *blit_code_object = ocl_blit_object_gfx908;
  } else if (sname == "gfx909") {
    *blit_code_object = ocl_blit_object_gfx909;
  } else if (sname == "gfx90a") {
    *blit_code_object = ocl_blit_object_gfx90a;
  } else if (sname == "gfx90c") {
    *blit_code_object = ocl_blit_object_gfx90c;
  } else if (sname == "gfx942") {
    *blit_code_object = ocl_blit_object_gfx942;
  } else if (sname == "gfx950") {
    *blit_code_object = ocl_blit_object_gfx950;
  } else if (sname == "gfx1010") {
    *blit_code_object = ocl_blit_object_gfx1010;
  } else if (sname == "gfx1011") {
    *blit_code_object = ocl_blit_object_gfx1011;
  } else if (sname == "gfx1012") {
    *blit_code_object = ocl_blit_object_gfx1012;
  } else if (sname == "gfx1013") {
    *blit_code_object = ocl_blit_object_gfx1013;
  } else if (sname == "gfx1030") {
    *blit_code_object = ocl_blit_object_gfx1030;
  } else if (sname == "gfx1031") {
    *blit_code_object = ocl_blit_object_gfx1031;
  } else if (sname == "gfx1032") {
    *blit_code_object = ocl_blit_object_gfx1032;
  } else if (sname == "gfx1033") {
    *blit_code_object = ocl_blit_object_gfx1033;
  } else if (sname == "gfx1034") {
    *blit_code_object = ocl_blit_object_gfx1034;
  } else if (sname == "gfx1035") {
    *blit_code_object = ocl_blit_object_gfx1035;
  } else if (sname == "gfx1036") {
    *blit_code_object = ocl_blit_object_gfx1036;
  } else if (sname == "gfx1100") {
    *blit_code_object = ocl_blit_object_gfx1100;
  } else if (sname == "gfx1101") {
    *blit_code_object = ocl_blit_object_gfx1101;
  } else if (sname == "gfx1102") {
    *blit_code_object = ocl_blit_object_gfx1102;
  } else if (sname == "gfx1103") {
    *blit_code_object = ocl_blit_object_gfx1103;
  } else if (sname == "gfx1150") {
    *blit_code_object = ocl_blit_object_gfx1150;
  } else if (sname == "gfx1151") {
    *blit_code_object = ocl_blit_object_gfx1151;
  } else if (sname == "gfx1152") {
    *blit_code_object = ocl_blit_object_gfx1152;
  } else if (sname == "gfx1153") {
    *blit_code_object = ocl_blit_object_gfx1153;
  } else if (sname == "gfx1200") {
    *blit_code_object = ocl_blit_object_gfx1200;
  } else if (sname == "gfx1201") {
    *blit_code_object = ocl_blit_object_gfx1201;
  } else {
    return HSA_STATUS_ERROR_INVALID_ISA_NAME;
  }

  return HSA_STATUS_SUCCESS;
}

}  // namespace image
}  // namespace rocr
#undef HSA_ARGUMENT_ALIGN_BYTES


================================================
FILE: runtime/hsa-runtime/image/blit_kernel.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
#define HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
#include <assert.h>
#include <atomic>
#include <mutex>
#include <unordered_map>
#include <vector>

#include "inc/hsa.h"
#include "resource.h"

namespace rocr {
namespace image {

typedef struct BlitQueue {
  hsa_queue_t* queue_;
  volatile std::atomic<uint64_t> cached_index_;
} BlitQueue;

typedef struct BlitCodeInfo {
  uint64_t code_handle_;
  uint32_t group_segment_size_;
  uint32_t private_segment_size_;
} BlitCodeInfo;

class BlitKernel {
 public:
  typedef enum KernelOp {
    KERNEL_OP_COPY_IMAGE_TO_BUFFER = 0,
    KERNEL_OP_COPY_BUFFER_TO_IMAGE = 1,
    KERNEL_OP_COPY_IMAGE_DEFAULT = 2,
    KERNEL_OP_COPY_IMAGE_LINEAR_TO_STANDARD = 3,
    KERNEL_OP_COPY_IMAGE_STANDARD_TO_LINEAR = 4,
    KERNEL_OP_COPY_IMAGE_1DB = 5,
    KERNEL_OP_COPY_IMAGE_1DB_TO_REG = 6,
    KERNEL_OP_COPY_IMAGE_REG_TO_1DB = 7,
    KERNEL_OP_CLEAR_IMAGE = 8,
    KERNEL_OP_CLEAR_IMAGE_1DB = 9,
    KERNEL_OP_COUNT = 10
  } KernelOp;

  explicit BlitKernel();
  ~BlitKernel();

  hsa_status_t Initialize();

  hsa_status_t Cleanup();

  hsa_status_t BuildBlitCode(hsa_agent_t agent,
                             std::vector<BlitCodeInfo>& blit_code_catalog);

  hsa_status_t CopyBufferToImage(
      BlitQueue& blit_queue,
      const std::vector<BlitCodeInfo>& blit_code_catalog,
      const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
      const Image& dst_image, const hsa_ext_image_region_t& image_region);

  hsa_status_t CopyImageToBuffer(
      BlitQueue& blit_queue,
      const std::vector<BlitCodeInfo>& blit_code_catalog,
      const Image& src_image, void* dst_memory, size_t dst_row_pitch,
      size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region);

  hsa_status_t CopyImage(BlitQueue& blit_queue,
                         const std::vector<BlitCodeInfo>& blit_code_catalog,
                         const Image& dst_image, const Image& src_image,
                         const hsa_dim3_t& dst_origin,
                         const hsa_dim3_t& src_origin, const hsa_dim3_t size,
                         KernelOp copy_type);

  hsa_status_t FillImage(BlitQueue& blit_queue,
                         const std::vector<BlitCodeInfo>& blit_code_catalog,
                         const Image& image, const void* pattern,
                         const hsa_ext_image_region_t& region);

 private:

  hsa_status_t PopulateKernelCode(
      hsa_agent_t agent, hsa_executable_t executable,
      std::vector<BlitCodeInfo>& blit_code_catalog);

  inline void CalcBufferRowSlicePitchesInPixel(
      hsa_ext_image_geometry_t geometry, uint32_t element_size,
      const hsa_dim3_t& copy_size, size_t in_row_pitch_byte,
      size_t in_slice_pitch_byte, unsigned long* out_pitch_pixel);

  inline uint32_t GetDimSize(const Image& image);

  inline uint32_t GetNumChannel(const Image& image);

  inline uint32_t GetImageAccessType(const Image& image);

  void CalcWorkingSize(const Image& image, const hsa_dim3_t& range,
                       hsa_kernel_dispatch_packet_t& packet);

  void CalcWorkingSize(const Image& src_image, const Image& dst_image,
                       const hsa_dim3_t& range,
                       hsa_kernel_dispatch_packet_t& packet);

  hsa_status_t ConvertImage(const Image& original_image,
                            const Image** new_image);

  hsa_status_t LaunchKernel(BlitQueue& queue,
                            hsa_kernel_dispatch_packet_t& packet);

  // The kernels' name.
  static const char* kernel_name_[KERNEL_OP_COUNT];
  static const char* ocl_kernel_name_[KERNEL_OP_COUNT];

  // Mapping of ISA and kernel object.
  std::unordered_map<uint64_t, hsa_code_object_t> code_object_map_;

  // Mapping of ISA and kernel executable.
  std::unordered_map<uint64_t, hsa_executable_t> code_executable_map_;

  std::mutex lock_;

  DISALLOW_COPY_AND_ASSIGN(BlitKernel);

  // Get the patched code object
  hsa_status_t GetPatchedBlitObject(const char* agent_name, uint8_t** code_object_handle);
};

}  // namespace image
}  // namespace rocr

#endif  // HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H


================================================
FILE: runtime/hsa-runtime/image/blit_object_gfx7xx.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <stdint.h>
namespace rocr {
namespace image {
uint8_t blit_object_gfx7xx[] = {127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 224, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 16, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 56, 0, 2, 0, 64, 0, 8, 0, 1, 0, 2, 0, 0, 96, 6, 0, 0, 0, 184, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 96, 5, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 220, 37, 0, 0, 0, 0, 0, 0, 220, 37, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 46, 115, 104, 115, 116, 114, 116, 97, 98, 0, 46, 115, 116, 114, 116, 97, 98, 0, 46, 110, 111, 116, 101, 0, 46, 104, 115, 97, 100, 97, 116, 97, 95, 114, 101, 97, 100, 111, 110, 108, 121, 95, 97, 103, 101, 110, 116, 0, 46, 104, 115, 97, 116, 101, 120, 116, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 114, 101, 108, 97, 46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 69, 88, 80, 95, 69, 80, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 76, 79, 71, 69, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 76, 79, 71, 95, 73, 78, 86, 95, 69, 80, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 116, 111, 95, 98, 117, 102, 102, 101, 114, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 98, 117, 102, 102, 101, 114, 95, 116, 111, 95, 105, 109, 97, 103, 101, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 100, 101, 102, 97, 117, 108, 116, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 108, 105, 110, 101, 97, 114, 95, 116, 111, 95, 115, 116, 97, 110, 100, 97, 114, 100, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 115, 116, 97, 110, 100, 97, 114, 100, 95, 116, 111, 95, 108, 105, 110, 101, 97, 114, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 116, 111, 95, 114, 101, 103, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 114, 101, 103, 95, 116, 111, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 108, 101, 97, 114, 95, 105, 109, 97, 103, 101, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 108, 101, 97, 114, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 100, 97, 116, 97, 95, 114, 101, 97, 100, 111, 110, 108, 121, 95, 97, 103, 101, 110, 116, 0, 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 12, 0, 0, 0, 2, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 4, 0, 0, 0, 26, 0, 0, 0, 3, 0, 0, 0, 65, 77, 68, 0, 4, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, 68, 71, 80, 85, 0, 0, 4, 0, 0, 0, 41, 0, 0, 0, 4, 0, 0, 0, 65, 77, 68, 0, 25, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 32, 72, 83, 65, 32, 82, 117, 110, 116, 105, 109, 101, 32, 70, 105, 110, 97, 108, 105, 122, 101, 114, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 26, 0, 0, 0, 5, 0, 0, 0, 65, 77, 68, 0, 22, 0, 45, 104, 115, 97, 95, 99, 97, 108, 108, 95, 99, 111, 110, 118, 101, 110, 116, 105, 111, 110, 61, 48, 0, 37, 0, 0, 0, 0, 128, 63, 0, 0, 0, 0, 0, 96, 129, 63, 119, 62, 26, 57, 0, 192, 130, 63, 138, 105, 216, 57, 0, 32, 132, 63, 29, 70, 81, 58, 0, 160, 133, 63, 124, 54, 172, 57, 0, 0, 135, 63, 180, 12, 123, 58, 0, 128, 136, 63, 4, 116, 64, 58, 0, 0, 138, 63, 170, 171, 38, 58, 0, 128, 139, 63, 31, 15, 46, 58, 0, 0, 141, 63, 219, 250, 86, 58, 0, 160, 142, 63, 104, 49, 7, 57, 0, 32, 144, 63, 24, 226, 14, 58, 0, 192, 145, 63, 234, 220, 244, 56, 0, 64, 147, 63, 120, 89, 81, 58, 0, 224, 148, 63, 71, 125, 39, 58, 0, 128, 150, 63, 185, 105, 33, 58, 0, 32, 152, 63, 140, 130, 63, 58, 0, 224, 153, 63, 65, 38, 11, 55, 0, 128, 155, 63, 157, 155, 211, 57, 0, 32, 157, 63, 57, 205, 118, 58, 0, 224, 158, 63, 4, 147, 41, 58, 0, 160, 160, 63, 125, 136, 2, 58, 0, 96, 162, 63, 24, 24, 2, 58, 0, 32, 164, 63, 112, 173, 40, 58, 0, 224, 165, 63, 77, 181, 118, 58, 0, 192, 167, 63, 78, 59, 217, 57, 0, 160, 169, 63, 117, 90, 45, 56, 0, 96, 171, 63, 173, 205, 81, 58, 0, 64, 173, 63, 82, 247, 65, 58, 0, 32, 175, 63, 107, 197, 91, 58, 0, 32, 177, 63, 116, 96, 253, 56, 0, 0, 179, 63, 149, 32, 14, 58, 0, 0, 181, 63, 127, 102, 30, 57, 0, 224, 182, 63, 25, 143, 108, 58, 0, 224, 184, 63, 59, 122, 93, 58, 0, 224, 186, 63, 144, 213, 122, 58, 0, 0, 189, 63, 245, 57, 138, 57, 0, 0, 191, 63, 179, 205, 60, 58, 0, 32, 193, 63, 166, 204, 196, 57, 0, 64, 195, 63, 68, 155, 89, 57, 0, 96, 197, 63, 42, 66, 101, 57, 0, 128, 199, 63, 138, 76, 215, 57, 0, 160, 201, 63, 51, 236, 77, 58, 0, 224, 203, 63, 239, 79, 193, 57, 0, 32, 206, 63, 163, 130, 17, 57, 0, 96, 208, 63, 187, 246, 204, 56, 0, 160, 210, 63, 31, 217, 129, 57, 0, 224, 212, 63, 94, 213, 26, 58, 0, 64, 215, 63, 90, 153, 31, 57, 0, 128, 217, 63, 19, 174, 104, 58, 0, 224, 219, 63, 190, 188, 93, 58, 0, 96, 222, 63, 94, 130, 244, 55, 0, 192, 224, 63, 194, 238, 205, 57, 0, 32, 227, 63, 149, 75, 124, 58, 0, 160, 229, 63, 59, 55, 72, 58, 0, 32, 232, 63, 129, 82, 75, 58, 0, 192, 234, 63, 221, 231, 198, 55, 0, 64, 237, 63, 237, 1, 243, 57, 0, 224, 239, 63, 123, 51, 23, 57, 0, 128, 242, 63, 44, 158, 59, 56, 0, 32, 245, 63, 164, 162, 47, 57, 0, 192, 247, 63, 152, 251, 6, 58, 0, 128, 250, 63, 220, 182, 236, 56, 0, 32, 253, 63, 103, 96, 112, 58, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 59, 65, 172, 41, 52, 0, 0, 126, 60, 252, 176, 168, 53, 0, 192, 189, 60, 234, 131, 141, 54, 0, 16, 252, 60, 120, 14, 27, 54, 0, 240, 28, 61, 254, 185, 135, 54, 0, 160, 59, 61, 101, 236, 49, 54, 0, 16, 90, 61, 25, 113, 221, 54, 0, 80, 120, 61, 69, 0, 195, 53, 0, 32, 139, 61, 81, 119, 155, 55, 0, 0, 154, 61, 13, 203, 235, 55, 0, 208, 168, 61, 131, 159, 131, 55, 0, 128, 183, 61, 229, 138, 82, 55, 0, 16, 198, 61, 24, 235, 162, 55, 0, 144, 212, 61, 149, 116, 218, 54, 0, 240, 226, 61, 183, 30, 169, 54, 0, 48, 241, 61, 21, 183, 131, 55, 0, 96, 255, 61, 219, 49, 17, 55, 0, 176, 6, 62, 104, 62, 63, 56, 0, 176, 13, 62, 151, 106, 21, 56, 0, 160, 20, 62, 15, 124, 41, 56, 0, 128, 27, 62, 15, 16, 126, 56, 0, 96, 34, 62, 101, 182, 21, 56, 0, 48, 41, 62, 161, 227, 229, 55, 0, 240, 47, 62, 83, 56, 24, 56, 0, 176, 54, 62, 157, 113, 254, 53, 0, 80, 61, 62, 8, 129, 68, 56, 0, 240, 67, 62, 144, 50, 80, 56, 0, 144, 74, 62, 232, 57, 53, 55, 0, 16, 81, 62, 241, 15, 94, 56, 0, 144, 87, 62, 64, 167, 100, 56, 0, 16, 94, 62, 45, 116, 134, 55, 0, 112, 100, 62, 205, 227, 123, 56, 0, 224, 106, 62, 62, 173, 133, 54, 0, 48, 113, 62, 21, 183, 3, 56, 0, 128, 119, 62, 220, 203, 173, 55, 0, 192, 125, 62, 175, 54, 12, 56, 0, 0, 130, 62, 211, 82, 22, 55, 0, 16, 133, 62, 57, 113, 146, 56, 0, 32, 136, 62, 215, 252, 197, 56, 0, 48, 139, 62, 213, 85, 174, 56, 0, 64, 142, 62, 105, 193, 24, 56, 0, 64, 145, 62, 231, 253, 160, 56, 0, 64, 148, 62, 239, 9, 173, 56, 0, 64, 151, 62, 225, 186, 98, 56, 0, 48, 154, 62, 76, 205, 238, 56, 0, 48, 157, 62, 210, 170, 152, 55, 0, 32, 160, 62, 26, 26, 66, 55, 0, 0, 163, 62, 14, 225, 197, 56, 0, 240, 165, 62, 238, 42, 191, 55, 0, 208, 168, 62, 45, 135, 45, 56, 0, 176, 171, 62, 138, 46, 238, 55, 0, 128, 174, 62, 172, 223, 222, 56, 0, 96, 177, 62, 185, 242, 2, 56, 0, 48, 180, 62, 155, 30, 72, 56, 0, 0, 183, 62, 43, 170, 14, 56, 0, 192, 185, 62, 93, 251, 235, 56, 0, 144, 188, 62, 221, 95, 37, 56, 0, 80, 191, 62, 130, 59, 120, 56, 0, 16, 194, 62, 30, 218, 81, 56, 0, 208, 196, 62, 5, 27, 78, 55, 0, 128, 199, 62, 155, 67, 143, 56, 0, 48, 202, 62, 16, 14, 202, 56, 0, 224, 204, 62, 139, 192, 202, 56, 0, 144, 207, 62, 95, 246, 145, 56, 0, 64, 210, 62, 203, 33, 129, 55, 0, 224, 212, 62, 154, 154, 108, 56, 0, 128, 215, 62, 35, 153, 148, 56, 0, 32, 218, 62, 204, 123, 119, 56, 0, 192, 220, 62, 38, 45, 177, 55, 0, 80, 223, 62, 211, 206, 166, 56, 0, 224, 225, 62, 230, 211, 235, 56, 0, 112, 228, 62, 205, 227, 251, 56, 0, 0, 231, 62, 194, 133, 215, 56, 0, 144, 233, 62, 0, 126, 126, 56, 0, 16, 236, 62, 197, 146, 243, 56, 0, 160, 238, 62, 131, 9, 212, 55, 0, 32, 241, 62, 124, 26, 8, 56, 0, 160, 243, 62, 173, 195, 132, 55, 0, 16, 246, 62, 35, 233, 204, 56, 0, 144, 248, 62, 175, 95, 15, 56, 0, 0, 251, 62, 56, 253, 145, 56, 0, 112, 253, 62, 188, 71, 172, 56, 0, 224, 255, 62, 43, 4, 151, 56, 0, 32, 1, 63, 210, 82, 41, 57, 0, 80, 2, 63, 212, 206, 111, 57, 0, 144, 3, 63, 115, 112, 249, 55, 0, 192, 4, 63, 174, 158, 94, 56, 0, 240, 5, 63, 74, 200, 101, 56, 0, 32, 7, 63, 163, 11, 19, 56, 0, 64, 8, 63, 22, 207, 121, 57, 0, 112, 9, 63, 201, 202, 56, 57, 0, 160, 10, 63, 244, 210, 195, 56, 0, 192, 11, 63, 236, 93, 117, 57, 0, 240, 12, 63, 103, 180, 230, 56, 0, 16, 14, 63, 184, 15, 92, 57, 0, 64, 15, 63, 224, 188, 62, 56, 0, 96, 16, 63, 146, 209, 220, 56, 0, 128, 17, 63, 223, 107, 24, 57, 0, 160, 18, 63, 76, 231, 45, 57, 0, 192, 19, 63, 68, 9, 47, 57, 0, 224, 20, 63, 97, 255, 27, 57, 0, 0, 22, 63, 68, 237, 233, 56, 0, 32, 23, 63, 200, 109, 104, 56, 0, 48, 24, 63, 167, 153, 107, 57, 0, 80, 25, 63, 137, 156, 9, 57, 0, 112, 26, 63, 115, 118, 162, 55, 0, 128, 27, 63, 163, 218, 11, 57, 0, 144, 28, 63, 171, 105, 112, 57, 0, 176, 29, 63, 255, 73, 132, 56, 0, 192, 30, 63, 56, 53, 1, 57, 0, 208, 31, 63, 104, 194, 45, 57, 0, 224, 32, 63, 35, 244, 71, 57, 0, 240, 33, 63, 124, 241, 79, 57, 0, 0, 35, 63, 14, 225, 69, 57, 0, 16, 36, 63, 245, 232, 41, 57, 0, 32, 37, 63, 176, 93, 248, 56, 0, 48, 38, 63, 153, 95, 115, 56, 0, 48, 39, 63, 219, 8, 108, 57, 0, 64, 40, 63, 0, 230, 9, 57, 0, 80, 41, 63, 111, 153, 180, 55, 0, 80, 42, 63, 204, 51, 18, 57, 0, 80, 43, 63, 217, 234, 124, 57, 0, 96, 44, 63, 205, 181, 173, 56, 0, 96, 45, 63, 26, 38, 32, 57, 0, 96, 46, 63, 54, 238, 88, 57, 0, 112, 47, 63, 5, 73, 170, 53, 0, 112, 48, 63, 30, 209, 203, 55, 0, 112, 49, 63, 244, 253, 5, 56, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 254, 63, 248, 3, 254, 56, 0, 0, 252, 63, 193, 15, 252, 57, 0, 0, 250, 63, 201, 179, 140, 58, 0, 0, 248, 63, 16, 62, 248, 58, 0, 0, 246, 63, 48, 123, 64, 59, 0, 0, 244, 63, 96, 141, 137, 59, 0, 0, 242, 63, 72, 214, 185, 59, 0, 0, 240, 63, 241, 240, 240, 59, 0, 0, 239, 63, 127, 220, 186, 58, 0, 0, 237, 63, 108, 7, 102, 59, 0, 0, 235, 63, 166, 178, 189, 59, 0, 0, 234, 63, 161, 14, 234, 57, 0, 0, 232, 63, 247, 88, 75, 59, 0, 0, 230, 63, 72, 180, 194, 59, 0, 0, 229, 63, 172, 96, 150, 58, 0, 0, 227, 63, 228, 56, 142, 59, 0, 0, 225, 63, 14, 120, 252, 59, 0, 0, 224, 63, 56, 112, 96, 59, 0, 0, 222, 63, 77, 92, 233, 59, 0, 0, 221, 63, 76, 145, 79, 59, 0, 0, 219, 63, 239, 97, 235, 59, 0, 0, 218, 63, 79, 27, 104, 59, 0, 0, 217, 63, 178, 1, 89, 56, 0, 0, 215, 63, 229, 53, 148, 59, 0, 0, 214, 63, 89, 3, 174, 58, 0, 0, 212, 63, 3, 123, 199, 59, 0, 0, 211, 63, 109, 26, 80, 59, 0, 0, 210, 63, 33, 13, 210, 57, 0, 0, 208, 63, 204, 159, 182, 59, 0, 0, 207, 63, 81, 233, 72, 59, 0, 0, 206, 63, 185, 83, 52, 58, 0, 0, 204, 63, 205, 204, 204, 59, 0, 0, 203, 63, 192, 39, 135, 59, 0, 0, 202, 63, 205, 15, 11, 59, 0, 0, 201, 63, 209, 73, 123, 57, 0, 0, 199, 63, 125, 12, 206, 59, 0, 0, 198, 63, 106, 12, 152, 59, 0, 0, 197, 63, 247, 144, 75, 59, 0, 0, 196, 63, 21, 190, 220, 58, 0, 0, 195, 63, 49, 12, 195, 57, 0, 0, 193, 63, 214, 187, 228, 59, 0, 0, 192, 63, 193, 192, 192, 59, 0, 0, 191, 63, 232, 47, 160, 59, 0, 0, 190, 63, 12, 250, 130, 59, 0, 0, 189, 63, 142, 32, 82, 59, 0, 0, 188, 63, 24, 200, 36, 59, 0, 0, 187, 63, 135, 156, 251, 58, 0, 0, 186, 63, 140, 46, 186, 58, 0, 0, 185, 63, 233, 15, 133, 58, 0, 0, 184, 63, 3, 23, 56, 58, 0, 0, 183, 63, 162, 181, 251, 57, 0, 0, 182, 63, 97, 11, 182, 57, 0, 0, 181, 63, 170, 104, 158, 57, 0, 0, 180, 63, 65, 11, 180, 57, 0, 0, 179, 63, 41, 53, 246, 57, 0, 0, 178, 63, 67, 22, 50, 58, 0, 0, 177, 63, 192, 157, 126, 58, 0, 0, 176, 63, 11, 44, 176, 58, 0, 0, 175, 63, 26, 119, 235, 58, 0, 0, 174, 63, 185, 130, 24, 59, 0, 0, 173, 63, 176, 86, 64, 59, 0, 0, 172, 63, 8, 35, 109, 59, 0, 0, 171, 63, 227, 105, 143, 59, 0, 0, 170, 63, 171, 170, 170, 59, 0, 0, 169, 63, 72, 74, 200, 59, 0, 0, 168, 63, 87, 63, 232, 59, 0, 0, 168, 63, 129, 10, 168, 57, 0, 0, 167, 63, 230, 20, 188, 58, 0, 0, 166, 63, 114, 136, 43, 59, 0, 0, 165, 63, 5, 106, 125, 59, 0, 0, 164, 63, 30, 207, 169, 59, 0, 0, 163, 63, 61, 10, 215, 59, 0, 0, 163, 63, 246, 199, 75, 57, 0, 0, 162, 63, 172, 12, 223, 58, 0, 0, 161, 63, 93, 98, 86, 59, 0, 0, 160, 63, 161, 160, 160, 59, 0, 0, 159, 63, 254, 9, 216, 59, 0, 0, 159, 63, 57, 47, 11, 58, 0, 0, 158, 63, 72, 90, 25, 59, 0, 0, 157, 63, 158, 216, 137, 59, 0, 0, 156, 63, 97, 225, 200, 59, 0, 0, 156, 63, 193, 9, 156, 57, 0, 0, 155, 63, 62, 223, 24, 59, 0, 0, 154, 63, 217, 231, 144, 59, 0, 0, 153, 63, 219, 34, 215, 59, 0, 0, 153, 63, 139, 210, 120, 58, 0, 0, 152, 63, 19, 144, 81, 59, 0, 0, 151, 63, 237, 37, 180, 59, 0, 0, 151, 63, 46, 1, 23, 56, 0, 0, 150, 63, 216, 180, 31, 59, 0, 0, 149, 63, 104, 37, 160, 59, 0, 0, 148, 63, 79, 9, 242, 59, 0, 0, 148, 63, 41, 1, 11, 59, 0, 0, 147, 63, 196, 133, 154, 59, 0, 0, 146, 63, 132, 19, 241, 59, 0, 0, 146, 63, 37, 73, 18, 59, 0, 0, 145, 63, 197, 179, 162, 59, 0, 0, 144, 63, 9, 188, 253, 59, 0, 0, 144, 63, 198, 112, 52, 59, 0, 0, 143, 63, 238, 35, 184, 59, 0, 0, 143, 63, 208, 206, 59, 58, 0, 0, 142, 63, 218, 106, 112, 59, 0, 0, 141, 63, 2, 82, 218, 59, 0, 0, 141, 63, 35, 44, 247, 58, 0, 0, 140, 63, 4, 156, 162, 59, 0, 0, 140, 63, 193, 8, 140, 57, 0, 0, 139, 63, 148, 104, 96, 59, 0, 0, 138, 63, 252, 242, 216, 59, 0, 0, 138, 63, 225, 240, 5, 59, 0, 0, 137, 63, 138, 64, 174, 59, 0, 0, 137, 63, 215, 57, 86, 58, 0, 0, 136, 63, 137, 136, 136, 59, 0, 0, 135, 63, 136, 128, 247, 59, 0, 0, 135, 63, 190, 86, 79, 59, 0, 0, 134, 63, 68, 5, 217, 59, 0, 0, 134, 63, 252, 20, 23, 59, 0, 0, 133, 63, 97, 55, 191, 59, 0, 0, 133, 63, 77, 33, 208, 58, 0, 0, 132, 63, 200, 249, 169, 59, 0, 0, 132, 63, 8, 33, 132, 58, 0, 0, 131, 63, 82, 48, 153, 59, 0, 0, 131, 63, 188, 116, 19, 58, 0, 0, 130, 63, 191, 191, 140, 59, 0, 0, 130, 63, 33, 8, 130, 57, 0, 0, 129, 63, 169, 141, 132, 59, 0, 0, 129, 63, 4, 2, 129, 56, 0, 0, 128, 63, 129, 128, 128, 59, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 11, 0, 11, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 132, 192, 4, 7, 66, 192, 26, 7, 70, 192, 28, 135, 1, 192, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 74, 0, 2, 2, 74, 1, 4, 0, 128, 4, 0, 2, 209, 1, 27, 0, 0, 8, 0, 2, 209, 0, 25, 0, 0, 0, 4, 4, 74, 8, 4, 128, 135, 3, 4, 8, 125, 0, 106, 128, 135, 0, 36, 128, 190, 188, 0, 136, 191, 6, 7, 65, 192, 14, 7, 66, 192, 16, 7, 4, 192, 18, 7, 69, 192, 32, 135, 4, 192, 34, 7, 134, 192, 159, 0, 6, 48, 159, 2, 8, 48, 127, 0, 140, 191, 5, 0, 212, 210, 1, 25, 0, 0, 4, 0, 210, 210, 4, 25, 0, 0, 4, 11, 8, 74, 5, 0, 210, 210, 1, 27, 0, 0, 5, 9, 8, 74, 5, 0, 210, 210, 1, 25, 0, 0, 5, 106, 74, 210, 5, 1, 2, 0, 4, 7, 6, 80, 4, 0, 14, 74, 5, 2, 16, 74, 8, 4, 18, 74, 159, 4, 12, 48, 0, 3, 200, 192, 128, 2, 20, 126, 127, 0, 140, 191, 0, 95, 0, 240, 7, 7, 4, 0, 0, 0, 212, 210, 2, 29, 0, 0, 1, 0, 210, 210, 6, 29, 0, 0, 1, 1, 0, 74, 1, 0, 210, 210, 2, 31, 0, 0, 1, 1, 0, 74, 1, 0, 210, 210, 2, 29, 0, 0, 1, 106, 74, 210, 1, 11, 2, 0, 0, 7, 0, 80, 2, 0, 212, 210, 1, 19, 0, 0, 0, 0, 210, 210, 0, 19, 0, 0, 0, 5, 0, 74, 1, 0, 210, 210, 1, 19, 0, 0, 3, 106, 74, 210, 1, 21, 0, 0, 11, 2, 4, 126, 0, 5, 8, 80, 30, 7, 65, 192, 8, 7, 66, 192, 127, 0, 140, 191, 2, 132, 0, 191, 83, 0, 133, 191, 10, 7, 68, 192, 2, 130, 0, 191, 41, 0, 132, 191, 3, 132, 0, 191, 29, 0, 133, 191, 3, 130, 0, 191, 12, 0, 132, 191, 0, 0, 194, 210, 3, 5, 1, 0, 112, 15, 140, 191, 144, 16, 4, 52, 0, 106, 74, 210, 4, 0, 2, 0, 5, 2, 6, 126, 3, 3, 2, 80, 2, 15, 4, 56, 0, 0, 112, 220, 0, 2, 0, 0, 109, 0, 130, 191, 3, 129, 0, 191, 107, 0, 132, 191, 0, 0, 194, 210, 3, 3, 1, 0, 112, 15, 140, 191, 136, 16, 4, 52, 127, 0, 140, 191, 0, 106, 74, 210, 8, 0, 2, 0, 9, 2, 6, 126, 3, 3, 2, 80, 2, 15, 4, 56, 0, 0, 104, 220, 0, 2, 0, 0, 94, 0, 130, 191, 0, 0, 194, 210, 3, 5, 1, 0, 0, 106, 74, 210, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 80, 112, 15, 140, 191, 0, 0, 116, 220, 0, 7, 0, 0, 84, 0, 130, 191, 2, 129, 0, 191, 82, 0, 132, 191, 3, 132, 0, 191, 25, 0, 133, 191, 3, 130, 0, 191, 11, 0, 132, 191, 0, 0, 194, 210, 3, 3, 1, 0, 127, 0, 140, 191, 0, 106, 74, 210, 8, 0, 2, 0, 9, 2, 4, 126, 2, 3, 2, 80, 112, 15, 140, 191, 0, 0, 104, 220, 0, 7, 0, 0, 67, 0, 130, 191, 3, 129, 0, 191, 65, 0, 132, 191, 12, 7, 65, 192, 127, 0, 140, 191, 0, 106, 74, 210, 2, 6, 2, 0, 3, 2, 4, 126, 2, 9, 2, 80, 112, 15, 140, 191, 0, 0, 96, 220, 0, 7, 0, 0, 55, 0, 130, 191, 0, 0, 194, 210, 3, 5, 1, 0, 0, 106, 74, 210, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 80, 112, 15, 140, 191, 0, 0, 112, 220, 0, 7, 0, 0, 45, 0, 130, 191, 3, 132, 0, 191, 34, 0, 133, 191, 3, 130, 0, 191, 14, 0, 132, 191, 112, 15, 140, 191, 144, 16, 0, 52, 0, 15, 10, 56, 1, 0, 194, 210, 3, 5, 1, 0, 1, 106, 74, 210, 4, 2, 2, 0, 5, 2, 6, 126, 3, 5, 4, 80, 144, 20, 6, 52, 3, 19, 12, 56, 0, 0, 116, 220, 1, 5, 0, 0, 27, 0, 130, 191, 3, 129, 0, 191, 25, 0, 132, 191, 112, 15, 140, 191, 136, 16, 0, 52, 1, 0, 194, 210, 3, 5, 1, 0, 0, 15, 0, 56, 144, 18, 6, 52, 0, 7, 0, 56, 152, 20, 6, 52, 1, 106, 74, 210, 4, 2, 2, 0, 5, 2, 8, 126, 4, 5, 4, 80, 0, 7, 0, 56, 0, 0, 112, 220, 1, 0, 0, 0, 9, 0, 130, 191, 0, 0, 194, 210, 3, 5, 1, 0, 0, 106, 74, 210, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 80, 112, 15, 140, 191, 0, 0, 120, 220, 0, 7, 0, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 132, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 19, 0, 19, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 132, 192, 4, 7, 66, 192, 22, 7, 70, 192, 24, 135, 1, 192, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 74, 0, 2, 2, 74, 1, 4, 0, 128, 4, 0, 2, 209, 1, 27, 0, 0, 8, 0, 2, 209, 0, 25, 0, 0, 0, 4, 4, 74, 8, 4, 128, 135, 3, 4, 8, 125, 0, 106, 128, 135, 0, 36, 128, 190, 193, 0, 136, 191, 10, 7, 65, 192, 18, 7, 66, 192, 20, 7, 4, 192, 28, 135, 4, 192, 30, 7, 134, 192, 159, 0, 6, 48, 159, 2, 8, 48, 127, 0, 140, 191, 5, 0, 212, 210, 1, 25, 0, 0, 4, 0, 210, 210, 4, 25, 0, 0, 4, 11, 8, 74, 5, 0, 210, 210, 1, 27, 0, 0, 5, 9, 8, 74, 5, 0, 210, 210, 1, 25, 0, 0, 5, 106, 74, 210, 5, 1, 2, 0, 4, 7, 6, 80, 159, 4, 8, 48, 6, 0, 212, 210, 2, 29, 0, 0, 4, 0, 210, 210, 4, 29, 0, 0, 4, 13, 8, 74, 6, 0, 210, 210, 2, 31, 0, 0, 6, 9, 8, 74, 6, 0, 210, 210, 2, 29, 0, 0, 5, 106, 74, 210, 6, 11, 2, 0, 4, 7, 6, 80, 4, 0, 212, 210, 5, 19, 0, 0, 3, 0, 210, 210, 3, 19, 0, 0, 3, 9, 6, 74, 4, 0, 210, 210, 5, 19, 0, 0, 6, 106, 74, 210, 4, 5, 0, 0, 3, 2, 10, 126, 3, 11, 14, 80, 4, 0, 30, 74, 5, 2, 32, 74, 8, 4, 34, 74, 26, 7, 65, 192, 6, 7, 68, 192, 127, 0, 140, 191, 2, 132, 0, 191, 77, 0, 133, 191, 2, 130, 0, 191, 39, 0, 132, 191, 3, 130, 0, 191, 13, 0, 132, 191, 3, 0, 194, 210, 6, 5, 1, 0, 3, 106, 74, 210, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 80, 0, 0, 48, 220, 3, 0, 0, 3, 112, 0, 140, 191, 144, 6, 12, 44, 5, 0, 144, 210, 3, 1, 65, 2, 57, 0, 130, 191, 3, 129, 0, 191, 13, 0, 132, 191, 3, 0, 194, 210, 6, 3, 1, 0, 3, 106, 74, 210, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 80, 0, 0, 40, 220, 3, 0, 0, 3, 112, 0, 140, 191, 136, 6, 12, 44, 5, 0, 144, 210, 3, 1, 33, 2, 42, 0, 130, 191, 3, 0, 194, 210, 6, 5, 1, 0, 3, 106, 74, 210, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 80, 0, 0, 52, 220, 3, 0, 0, 5, 33, 0, 130, 191, 2, 129, 0, 191, 29, 0, 132, 191, 3, 130, 0, 191, 9, 0, 132, 191, 3, 0, 194, 210, 6, 3, 1, 0, 3, 106, 74, 210, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 80, 0, 0, 40, 220, 3, 0, 0, 5, 19, 0, 130, 191, 3, 129, 0, 191, 7, 0, 132, 191, 3, 106, 74, 210, 8, 12, 2, 0, 9, 2, 10, 126, 5, 15, 8, 80, 0, 0, 32, 220, 3, 0, 0, 5, 10, 0, 130, 191, 3, 0, 194, 210, 6, 5, 1, 0, 3, 106, 74, 210, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 80, 0, 0, 48, 220, 3, 0, 0, 5, 1, 0, 130, 191, 2, 2, 10, 126, 3, 2, 12, 126, 5, 2, 16, 126, 4, 2, 14, 126, 55, 0, 130, 191, 3, 129, 0, 191, 17, 0, 132, 191, 3, 0, 194, 210, 6, 5, 1, 0, 3, 106, 74, 210, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 80, 0, 0, 48, 220, 3, 0, 0, 3, 112, 0, 140, 191, 152, 6, 16, 44, 7, 0, 144, 210, 3, 33, 33, 2, 6, 0, 144, 210, 3, 17, 33, 2, 5, 0, 144, 210, 3, 1, 33, 2, 36, 0, 130, 191, 3, 0, 194, 210, 6, 5, 1, 0, 3, 106, 74, 210, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 80, 0, 0, 48, 220, 3, 0, 0, 5, 3, 130, 0, 191, 14, 0, 132, 191, 3, 106, 74, 210, 3, 9, 1, 0, 4, 106, 80, 210, 4, 1, 169, 1, 0, 0, 48, 220, 3, 0, 0, 3, 112, 0, 140, 191, 144, 6, 16, 44, 7, 0, 144, 210, 3, 1, 65, 2, 144, 10, 12, 44, 5, 0, 144, 210, 5, 1, 65, 2, 12, 0, 130, 191, 6, 106, 74, 210, 3, 25, 1, 0, 7, 106, 80, 210, 4, 1, 169, 1, 0, 0, 48, 220, 6, 0, 0, 8, 3, 106, 74, 210, 3, 9, 1, 0, 4, 106, 80, 210, 4, 1, 169, 1, 0, 0, 52, 220, 3, 0, 0, 6, 8, 7, 65, 192, 127, 0, 140, 191, 0, 3, 194, 192, 128, 2, 36, 126, 112, 0, 140, 191, 0, 95, 32, 240, 15, 5, 1, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 132, 192, 4, 7, 66, 192, 18, 7, 70, 192, 20, 135, 1, 192, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 74, 0, 2, 2, 74, 1, 4, 0, 128, 4, 0, 12, 209, 1, 27, 0, 0, 8, 0, 12, 209, 0, 25, 0, 0, 0, 4, 4, 74, 8, 4, 128, 136, 3, 4, 6, 125, 0, 106, 234, 136, 126, 4, 128, 190, 0, 106, 254, 138, 22, 0, 136, 191, 6, 7, 132, 192, 10, 7, 65, 192, 12, 7, 2, 192, 127, 0, 140, 191, 0, 9, 198, 192, 2, 0, 6, 74, 3, 2, 8, 74, 4, 4, 10, 74, 128, 2, 12, 126, 127, 0, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 14, 7, 130, 192, 0, 11, 196, 192, 127, 0, 140, 191, 4, 0, 14, 74, 5, 2, 16, 74, 6, 4, 18, 74, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 2, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 133, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 21, 0, 21, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 132, 192, 4, 7, 66, 192, 18, 7, 70, 192, 20, 135, 1, 192, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 74, 0, 2, 2, 74, 1, 4, 0, 128, 4, 0, 12, 209, 1, 27, 0, 0, 8, 0, 12, 209, 0, 25, 0, 0, 0, 4, 4, 74, 8, 4, 128, 136, 3, 4, 6, 125, 0, 106, 234, 136, 126, 4, 128, 190, 0, 106, 254, 138, 212, 2, 136, 191, 6, 7, 65, 192, 10, 7, 66, 192, 12, 7, 4, 192, 127, 0, 140, 191, 0, 3, 198, 192, 4, 0, 6, 74, 5, 2, 8, 74, 8, 4, 10, 74, 128, 2, 12, 126, 127, 0, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 112, 15, 140, 191, 106, 0, 16, 209, 3, 7, 1, 0, 3, 0, 0, 210, 3, 1, 169, 1, 242, 6, 28, 124, 106, 36, 130, 190, 223, 0, 136, 191, 128, 6, 8, 124, 106, 36, 132, 190, 128, 2, 6, 126, 4, 126, 254, 138, 217, 0, 136, 191, 255, 3, 136, 190, 28, 46, 77, 59, 8, 6, 8, 124, 106, 36, 136, 190, 255, 6, 6, 16, 82, 184, 78, 65, 8, 126, 254, 138, 242, 6, 6, 16, 208, 0, 136, 191, 255, 6, 14, 54, 255, 255, 255, 127, 242, 14, 16, 8, 255, 3, 138, 190, 0, 0, 128, 61, 106, 1, 22, 208, 8, 21, 0, 0, 126, 4, 138, 190, 10, 106, 254, 138, 7, 129, 16, 126, 69, 0, 136, 191, 129, 16, 18, 52, 255, 16, 16, 74, 0, 0, 128, 0, 255, 18, 18, 74, 0, 0, 0, 1, 255, 16, 20, 54, 0, 0, 127, 0, 255, 18, 18, 54, 0, 0, 1, 0, 9, 21, 18, 74, 144, 18, 20, 44, 128, 2, 22, 126, 10, 0, 194, 210, 10, 7, 1, 0, 255, 3, 141, 190, 85, 85, 85, 85, 255, 3, 140, 190, 85, 85, 85, 85, 12, 106, 74, 210, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 80, 0, 0, 52, 220, 12, 0, 0, 12, 255, 3, 141, 190, 85, 85, 85, 85, 255, 3, 140, 190, 85, 85, 85, 85, 10, 106, 74, 210, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 80, 0, 0, 52, 220, 10, 0, 0, 10, 255, 16, 16, 54, 255, 255, 127, 0, 240, 18, 18, 56, 240, 16, 16, 56, 9, 17, 16, 8, 113, 1, 140, 191, 13, 17, 18, 16, 12, 17, 18, 62, 255, 2, 28, 126, 171, 170, 170, 62, 255, 3, 140, 190, 0, 0, 128, 62, 7, 127, 30, 126, 12, 18, 28, 62, 12, 0, 130, 210, 12, 17, 38, 132, 193, 30, 30, 74, 14, 0, 130, 210, 9, 29, 194, 3, 9, 19, 32, 16, 13, 17, 24, 62, 15, 11, 16, 126, 14, 33, 24, 62, 255, 3, 140, 190, 244, 253, 5, 56, 12, 0, 130, 210, 8, 25, 48, 132, 112, 0, 140, 191, 12, 23, 24, 6, 8, 21, 16, 64, 0, 112, 49, 63, 12, 19, 30, 8, 255, 18, 28, 58, 0, 0, 0, 128, 8, 31, 26, 6, 10, 126, 254, 138, 8, 17, 18, 16, 21, 0, 136, 191, 8, 19, 20, 16, 255, 2, 22, 126, 171, 170, 42, 62, 255, 3, 140, 190, 37, 73, 18, 62, 12, 16, 22, 62, 8, 23, 22, 66, 205, 204, 76, 62, 8, 23, 22, 66, 0, 0, 128, 62, 8, 23, 22, 66, 171, 170, 170, 62, 10, 23, 20, 16, 241, 18, 28, 16, 15, 0, 130, 210, 9, 227, 41, 132, 15, 17, 26, 8, 255, 20, 24, 58, 0, 0, 0, 128, 255, 16, 16, 58, 0, 0, 0, 128, 10, 4, 254, 190, 8, 27, 20, 8, 15, 29, 18, 8, 15, 21, 20, 6, 12, 19, 18, 8, 255, 26, 22, 54, 0, 240, 255, 255, 9, 21, 18, 6, 13, 23, 16, 8, 9, 17, 16, 6, 255, 16, 18, 16, 0, 160, 42, 56, 11, 19, 18, 64, 0, 160, 42, 56, 8, 19, 16, 64, 0, 80, 213, 62, 11, 17, 18, 64, 0, 80, 213, 62, 255, 18, 20, 16, 59, 170, 184, 66, 10, 17, 20, 126, 191, 20, 24, 54, 131, 24, 24, 52, 255, 3, 139, 190, 85, 85, 85, 85, 255, 3, 138, 190, 85, 85, 85, 85, 12, 106, 74, 210, 10, 24, 2, 0, 11, 2, 26, 126, 13, 106, 80, 210, 13, 1, 169, 1, 0, 0, 52, 220, 12, 0, 0, 12, 255, 3, 138, 190, 0, 80, 213, 62, 10, 11, 28, 126, 11, 0, 130, 210, 10, 22, 38, 132, 14, 19, 30, 64, 0, 0, 49, 188, 8, 23, 16, 6, 14, 31, 22, 64, 239, 47, 228, 183, 8, 23, 22, 6, 255, 2, 28, 126, 171, 170, 42, 62, 255, 3, 138, 190, 171, 170, 42, 61, 10, 22, 28, 62, 14, 0, 130, 210, 14, 23, 194, 3, 11, 23, 30, 16, 14, 31, 22, 62, 255, 3, 138, 190, 8, 227, 130, 180, 255, 3, 139, 190, 24, 114, 177, 66, 112, 0, 140, 191, 13, 23, 26, 62, 12, 0, 8, 208, 8, 21, 0, 0, 11, 18, 4, 124, 12, 23, 26, 62, 106, 12, 140, 135, 11, 18, 2, 124, 134, 20, 16, 48, 12, 27, 20, 6, 106, 12, 234, 136, 10, 17, 16, 86, 255, 2, 20, 126, 0, 0, 128, 127, 255, 3, 138, 190, 208, 142, 206, 194, 8, 21, 16, 0, 10, 18, 22, 124, 128, 16, 16, 0, 3, 15, 10, 125, 242, 16, 16, 16, 255, 2, 18, 126, 0, 0, 192, 127, 255, 3, 138, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 10, 125, 10, 0, 4, 209, 3, 21, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 210, 8, 19, 42, 0, 3, 19, 4, 125, 8, 19, 16, 0, 7, 19, 136, 125, 8, 7, 14, 0, 242, 6, 10, 125, 242, 14, 6, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 3, 138, 190, 61, 10, 135, 63, 3, 0, 130, 210, 3, 21, 28, 4, 4, 4, 254, 190, 2, 126, 254, 138, 242, 2, 6, 126, 2, 4, 254, 190, 106, 0, 16, 209, 4, 7, 1, 0, 4, 0, 0, 210, 4, 1, 169, 1, 242, 8, 28, 124, 2, 106, 254, 135, 223, 0, 136, 191, 128, 8, 8, 124, 106, 36, 132, 190, 128, 2, 8, 126, 4, 126, 254, 138, 217, 0, 136, 191, 255, 3, 136, 190, 28, 46, 77, 59, 8, 8, 8, 124, 106, 36, 136, 190, 255, 8, 8, 16, 82, 184, 78, 65, 8, 126, 254, 138, 242, 8, 8, 16, 208, 0, 136, 191, 255, 8, 14, 54, 255, 255, 255, 127, 242, 14, 16, 8, 255, 3, 138, 190, 0, 0, 128, 61, 106, 1, 22, 208, 8, 21, 0, 0, 126, 4, 138, 190, 10, 106, 254, 138, 7, 129, 16, 126, 69, 0, 136, 191, 129, 16, 18, 52, 255, 16, 16, 74, 0, 0, 128, 0, 255, 18, 18, 74, 0, 0, 0, 1, 255, 16, 20, 54, 0, 0, 127, 0, 255, 18, 18, 54, 0, 0, 1, 0, 9, 21, 18, 74, 144, 18, 20, 44, 128, 2, 22, 126, 10, 0, 194, 210, 10, 7, 1, 0, 255, 3, 141, 190, 85, 85, 85, 85, 255, 3, 140, 190, 85, 85, 85, 85, 12, 106, 74, 210, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 80, 0, 0, 52, 220, 12, 0, 0, 12, 255, 3, 141, 190, 85, 85, 85, 85, 255, 3, 140, 190, 85, 85, 85, 85, 10, 106, 74, 210, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 80, 0, 0, 52, 220, 10, 0, 0, 10, 255, 16, 16, 54, 255, 255, 127, 0, 240, 18, 18, 56, 240, 16, 16, 56, 9, 17, 16, 8, 113, 1, 140, 191, 13, 17, 18, 16, 12, 17, 18, 62, 255, 2, 28, 126, 171, 170, 170, 62, 255, 3, 140, 190, 0, 0, 128, 62, 7, 127, 30, 126, 12, 18, 28, 62, 12, 0, 130, 210, 12, 17, 38, 132, 193, 30, 30, 74, 14, 0, 130, 210, 9, 29, 194, 3, 9, 19, 32, 16, 13, 17, 24, 62, 15, 11, 16, 126, 14, 33, 24, 62, 255, 3, 140, 190, 244, 253, 5, 56, 12, 0, 130, 210, 8, 25, 48, 132, 112, 0, 140, 191, 12, 23, 24, 6, 8, 21, 16, 64, 0, 112, 49, 63, 12, 19, 30, 8, 255, 18, 28, 58, 0, 0, 0, 128, 8, 31, 26, 6, 10, 126, 254, 138, 8, 17, 18, 16, 21, 0, 136, 191, 8, 19, 20, 16, 255, 2, 22, 126, 171, 170, 42, 62, 255, 3, 140, 190, 37, 73, 18, 62, 12, 16, 22, 62, 8, 23, 22, 66, 205, 204, 76, 62, 8, 23, 22, 66, 0, 0, 128, 62, 8, 23, 22, 66, 171, 170, 170, 62, 10, 23, 20, 16, 241, 18, 28, 16, 15, 0, 130, 210, 9, 227, 41, 132, 15, 17, 26, 8, 255, 20, 24, 58, 0, 0, 0, 128, 255, 16, 16, 58, 0, 0, 0, 128, 10, 4, 254, 190, 8, 27, 20, 8, 15, 29, 18, 8, 15, 21, 20, 6, 12, 19, 18, 8, 255, 26, 22, 54, 0, 240, 255, 255, 9, 21, 18, 6, 13, 23, 16, 8, 9, 17, 16, 6, 255, 16, 18, 16, 0, 160, 42, 56, 11, 19, 18, 64, 0, 160, 42, 56, 8, 19, 16, 64, 0, 80, 213, 62, 11, 17, 18, 64, 0, 80, 213, 62, 255, 18, 20, 16, 59, 170, 184, 66, 10, 17, 20, 126, 191, 20, 24, 54, 131, 24, 24, 52, 255, 3, 139, 190, 85, 85, 85, 85, 255, 3, 138, 190, 85, 85, 85, 85, 12, 106, 74, 210, 10, 24, 2, 0, 11, 2, 26, 126, 13, 106, 80, 210, 13, 1, 169, 1, 0, 0, 52, 220, 12, 0, 0, 12, 255, 3, 138, 190, 0, 80, 213, 62, 10, 11, 28, 126, 11, 0, 130, 210, 10, 22, 38, 132, 14, 19, 30, 64, 0, 0, 49, 188, 8, 23, 16, 6, 14, 31, 22, 64, 239, 47, 228, 183, 8, 23, 22, 6, 255, 2, 28, 126, 171, 170, 42, 62, 255, 3, 138, 190, 171, 170, 42, 61, 10, 22, 28, 62, 14, 0, 130, 210, 14, 23, 194, 3, 11, 23, 30, 16, 14, 31, 22, 62, 255, 3, 138, 190, 8, 227, 130, 180, 255, 3, 139, 190, 24, 114, 177, 66, 112, 0, 140, 191, 13, 23, 26, 62, 12, 0, 8, 208, 8, 21, 0, 0, 11, 18, 4, 124, 12, 23, 26, 62, 106, 12, 140, 135, 11, 18, 2, 124, 134, 20, 16, 48, 12, 27, 20, 6, 106, 12, 234, 136, 10, 17, 16, 86, 255, 2, 20, 126, 0, 0, 128, 127, 255, 3, 138, 190, 208, 142, 206, 194, 8, 21, 16, 0, 10, 18, 22, 124, 128, 16, 16, 0, 4, 15, 10, 125, 242, 16, 16, 16, 255, 2, 18, 126, 0, 0, 192, 127, 255, 3, 138, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 10, 125, 10, 0, 4, 209, 4, 21, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 210, 8, 19, 42, 0, 4, 19, 4, 125, 8, 19, 16, 0, 7, 19, 136, 125, 8, 9, 14, 0, 242, 8, 10, 125, 242, 14, 8, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 3, 138, 190, 61, 10, 135, 63, 4, 0, 130, 210, 4, 21, 28, 4, 4, 4, 254, 190, 2, 126, 254, 138, 242, 2, 8, 126, 2, 4, 254, 190, 14, 7, 132, 192, 106, 0, 16, 209, 5, 7, 1, 0, 5, 0, 0, 210, 5, 1, 169, 1, 127, 0, 140, 191, 8, 0, 34, 74, 9, 2, 36, 74, 10, 4, 38, 74, 242, 10, 28, 124, 106, 36, 130, 190, 223, 0, 136, 191, 128, 10, 8, 124, 106, 36, 132, 190, 128, 2, 10, 126, 4, 126, 254, 138, 217, 0, 136, 191, 255, 3, 136, 190, 28, 46, 77, 59, 8, 10, 8, 124, 106, 36, 136, 190, 255, 10, 10, 16, 82, 184, 78, 65, 8, 126, 254, 138, 242, 10, 10, 16, 208, 0, 136, 191, 255, 10, 14, 54, 255, 255, 255, 127, 242, 14, 16, 8, 255, 3, 138, 190, 0, 0, 128, 61, 106, 1, 22, 208, 8, 21, 0, 0, 126, 4, 138, 190, 10, 106, 254, 138, 7, 129, 16, 126, 69, 0, 136, 191, 129, 16, 18, 52, 255, 16, 16, 74, 0, 0, 128, 0, 255, 18, 18, 74, 0, 0, 0, 1, 255, 16, 20, 54, 0, 0, 127, 0, 255, 18, 18, 54, 0, 0, 1, 0, 9, 21, 18, 74, 144, 18, 20, 44, 128, 2, 22, 126, 10, 0, 194, 210, 10, 7, 1, 0, 255, 3, 141, 190, 85, 85, 85, 85, 255, 3, 140, 190, 85, 85, 85, 85, 12, 106, 74, 210, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 80, 0, 0, 52, 220, 12, 0, 0, 12, 255, 3, 141, 190, 85, 85, 85, 85, 255, 3, 140, 190, 85, 85, 85, 85, 10, 106, 74, 210, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 80, 0, 0, 52, 220, 10, 0, 0, 10, 255, 16, 16, 54, 255, 255, 127, 0, 240, 18, 18, 56, 240, 16, 16, 56, 9, 17, 16, 8, 113, 1, 140, 191, 13, 17, 18, 16, 12, 17, 18, 62, 255, 2, 28, 126, 171, 170, 170, 62, 255, 3, 140, 190, 0, 0, 128, 62, 7, 127, 30, 126, 12, 18, 28, 62, 12, 0, 130, 210, 12, 17, 38, 132, 193, 30, 30, 74, 14, 0, 130, 210, 9, 29, 194, 3, 9, 19, 32, 16, 13, 17, 24, 62, 15, 11, 16, 126, 14, 33, 24, 62, 255, 3, 140, 190, 244, 253, 5, 56, 12, 0, 130, 210, 8, 25, 48, 132, 112, 0, 140, 191, 12, 23, 24, 6, 8, 21, 16, 64, 0, 112, 49, 63, 12, 19, 26, 8, 255, 18, 28, 58, 0, 0, 0, 128, 8, 27, 30, 6, 10, 126, 254, 138, 8, 17, 18, 16, 21, 0, 136, 191, 8, 19, 20, 16, 255, 2, 22, 126, 171, 170, 42, 62, 255, 3, 140, 190, 37, 73, 18, 62, 12, 16, 22, 62, 8, 23, 22, 66, 205, 204, 76, 62, 8, 23, 22, 66, 0, 0, 128, 62, 8, 23, 22, 66, 171, 170, 170, 62, 10, 23, 20, 16, 241, 18, 28, 16, 13, 0, 130, 210, 9, 227, 41, 132, 13, 17, 30, 8, 255, 20, 24, 58, 0, 0, 0, 128, 255, 16, 16, 58, 0, 0, 0, 128, 10, 4, 254, 190, 8, 31, 16, 8, 13, 29, 20, 8, 13, 17, 16, 6, 12, 21, 18, 8, 255, 30, 20, 54, 0, 240, 255, 255, 9, 17, 16, 6, 15, 21, 18, 8, 8, 19, 16, 6, 255, 16, 18, 16, 0, 160, 42, 56, 10, 19, 18, 64, 0, 160, 42, 56, 8, 19, 16, 64, 0, 80, 213, 62, 10, 17, 18, 64, 0, 80, 213, 62, 255, 18, 22, 16, 59, 170, 184, 66, 11, 17, 22, 126, 191, 22, 24, 54, 131, 24, 24, 52, 255, 3, 139, 190, 85, 85, 85, 85, 255, 3, 138, 190, 85, 85, 85, 85, 12, 106, 74, 210, 10, 24, 2, 0, 11, 2, 26, 126, 13, 106, 80, 210, 13, 1, 169, 1, 0, 0, 52, 220, 12, 0, 0, 12, 255, 3, 138, 190, 0, 80, 213, 62, 11, 11, 28, 126, 10, 0, 130, 210, 10, 20, 38, 132, 14, 19, 30, 64, 0, 0, 49, 188, 8, 21, 16, 6, 14, 31, 20, 64, 239, 47, 228, 183, 8, 21, 20, 6, 255, 2, 28, 126, 171, 170, 42, 62, 255, 3, 138, 190, 171, 170, 42, 61, 10, 20, 28, 62, 14, 0, 130, 210, 14, 21, 194, 3, 10, 21, 30, 16, 14, 31, 20, 62, 255, 3, 138, 190, 8, 227, 130, 180, 255, 3, 139, 190, 24, 114, 177, 66, 112, 0, 140, 191, 13, 21, 26, 62, 12, 0, 8, 208, 8, 21, 0, 0, 11, 18, 4, 124, 12, 21, 26, 62, 106, 12, 140, 135, 11, 18, 2, 124, 134, 22, 16, 48, 12, 27, 20, 6, 106, 12, 234, 136, 10, 17, 16, 86, 255, 2, 20, 126, 0, 0, 128, 127, 255, 3, 138, 190, 208, 142, 206, 194, 8, 21, 16, 0, 10, 18, 22, 124, 128, 16, 16, 0, 5, 15, 10, 125, 242, 16, 16, 16, 255, 2, 18, 126, 0, 0, 192, 127, 255, 3, 138, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 10, 125, 10, 0, 4, 209, 5, 21, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 210, 8, 19, 42, 0, 5, 19, 4, 125, 8, 19, 16, 0, 7, 19, 136, 125, 8, 11, 14, 0, 242, 10, 10, 125, 242, 14, 10, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 3, 138, 190, 61, 10, 135, 63, 5, 0, 130, 210, 5, 21, 28, 4, 4, 4, 254, 190, 2, 126, 254, 138, 242, 2, 10, 126, 2, 4, 254, 190, 8, 7, 65, 192, 127, 0, 140, 191, 0, 3, 194, 192, 128, 2, 40, 126, 127, 0, 140, 191, 0, 95, 32, 240, 17, 3, 1, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 132, 192, 4, 7, 66, 192, 18, 7, 70, 192, 20, 135, 1, 192, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 74, 0, 2, 2, 74, 1, 4, 0, 128, 4, 0, 12, 209, 1, 27, 0, 0, 8, 0, 12, 209, 0, 25, 0, 0, 0, 4, 4, 74, 8, 4, 128, 136, 3, 4, 6, 125, 0, 106, 234, 136, 126, 4, 128, 190, 0, 106, 254, 138, 22, 0, 136, 191, 6, 7, 132, 192, 10, 7, 65, 192, 12, 7, 2, 192, 127, 0, 140, 191, 0, 9, 198, 192, 2, 0, 6, 74, 3, 2, 8, 74, 4, 4, 10, 74, 128, 2, 12, 126, 127, 0, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 14, 7, 130, 192, 0, 11, 196, 192, 127, 0, 140, 191, 4, 0, 14, 74, 5, 2, 16, 74, 6, 4, 18, 74, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 2, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129, 0, 172, 0, 144, 0, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 5, 0, 5, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, 192, 127, 0, 140, 191, 0, 255, 0, 135, 255, 255, 0, 0, 0, 8, 0, 147, 0, 7, 65, 192, 18, 135, 0, 192, 127, 0, 140, 191, 0, 2, 0, 128, 0, 0, 0, 74, 1, 0, 8, 125, 106, 36, 128, 190, 15, 0, 136, 191, 6, 7, 132, 192, 10, 7, 1, 192, 127, 0, 140, 191, 0, 9, 134, 192, 2, 0, 2, 74, 127, 0, 140, 191, 0, 32, 12, 224, 1, 1, 3, 128, 14, 7, 1, 192, 0, 11, 130, 192, 127, 0, 140, 191, 2, 0, 0, 74, 112, 15, 140, 191, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 66, 192, 18, 135, 1, 192, 127, 0, 140, 191, 2, 4, 2, 128, 2, 0, 0, 74, 3, 0, 8, 125, 106, 36, 130, 190, 22, 0, 136, 191, 2, 7, 196, 192, 10, 7, 2, 192, 127, 0, 140, 191, 0, 13, 136, 192, 4, 0, 6, 74, 127, 0, 140, 191, 0, 32, 12, 224, 3, 3, 4, 128, 14, 7, 130, 192, 0, 15, 198, 192, 1, 10, 1, 128, 0, 8, 0, 128, 127, 0, 140, 191, 1, 6, 1, 128, 0, 5, 0, 128, 4, 0, 14, 74, 1, 4, 18, 74, 0, 2, 16, 74, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 3, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 7, 0, 7, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 66, 192, 18, 135, 1, 192, 127, 0, 140, 191, 2, 4, 2, 128, 2, 0, 0, 74, 3, 0, 8, 125, 106, 36, 130, 190, 23, 0, 136, 191, 2, 7, 196, 192, 10, 7, 66, 192, 127, 0, 140, 191, 12, 135, 4, 192, 0, 13, 200, 192, 1, 10, 1, 128, 0, 8, 0, 128, 127, 0, 140, 191, 1, 9, 1, 128, 0, 5, 0, 128, 4, 0, 6, 74, 1, 4, 10, 74, 0, 2, 8, 74, 128, 2, 12, 126, 0, 95, 0, 240, 3, 1, 4, 0, 14, 7, 0, 192, 0, 15, 130, 192, 127, 0, 140, 191, 0, 0, 0, 74, 112, 15, 140, 191, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130, 0, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 64, 192, 127, 0, 140, 191, 0, 255, 2, 135, 255, 255, 0, 0, 0, 255, 128, 147, 16, 0, 16, 0, 1, 255, 1, 135, 255, 255, 0, 0, 2, 8, 2, 147, 0, 9, 0, 147, 1, 10, 1, 147, 0, 7, 132, 192, 4, 7, 66, 192, 24, 7, 70, 192, 26, 135, 1, 192, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 74, 0, 2, 2, 74, 1, 4, 0, 128, 4, 0, 2, 209, 1, 27, 0, 0, 8, 0, 2, 209, 0, 25, 0, 0, 0, 4, 4, 74, 8, 4, 128, 135, 3, 4, 8, 125, 0, 106, 128, 135, 0, 36, 128, 190, 46, 0, 136, 191, 20, 7, 132, 192, 127, 0, 140, 191, 8, 0, 14, 74, 9, 2, 16, 74, 10, 4, 18, 74, 28, 7, 1, 192, 6, 7, 66, 192, 127, 0, 140, 191, 2, 130, 0, 191, 26, 0, 133, 191, 2, 129, 0, 191, 11, 0, 132, 191, 12, 7, 132, 192, 0, 5, 198, 192, 127, 0, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 23, 0, 130, 191, 2, 128, 0, 191, 21, 0, 132, 191, 8, 7, 132, 192, 0, 5, 198, 192, 127, 0, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 10, 0, 130, 191, 16, 7, 132, 192, 0, 5, 198, 192, 127, 0, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 172, 0, 144, 0, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 5, 0, 5, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, 192, 127, 0, 140, 191, 0, 255, 0, 135, 255, 255, 0, 0, 0, 8, 0, 147, 0, 7, 65, 192, 24, 135, 0, 192, 127, 0, 140, 191, 0, 2, 0, 128, 0, 0, 0, 74, 1, 0, 8, 125, 106, 36, 128, 190, 41, 0, 136, 191, 20, 7, 1, 192, 127, 0, 140, 191, 2, 0, 0, 74, 28, 7, 1, 192, 6, 7, 66, 192, 127, 0, 140, 191, 2, 130, 0, 191, 24, 0, 133, 191, 2, 129, 0, 191, 10, 0, 132, 191, 12, 7, 132, 192, 0, 5, 130, 192, 127, 0, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 21, 0, 130, 191, 2, 128, 0, 191, 19, 0, 132, 191, 8, 7, 132, 192, 0, 5, 130, 192, 127, 0, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 9, 0, 130, 191, 16, 7, 132, 192, 0, 5, 130, 192, 127, 0, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 2, 0, 0, 0, 0, 0, 0, 40, 0, 0, 0, 1, 0, 4, 0, 8, 2, 0, 0, 0, 0, 0, 0, 8, 4, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 1, 0, 4, 0, 16, 6, 0, 0, 0, 0, 0, 0, 8, 4, 0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 26, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 4, 0, 0, 0, 0, 0, 0, 149, 0, 0, 0, 26, 0, 5, 0, 0, 5, 0, 0, 0, 0, 0, 0, 132, 4, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 26, 0, 5, 0, 0, 10, 0, 0, 0, 0, 0, 0, 220, 1, 0, 0, 0, 0, 0, 0, 209, 0, 0, 0, 26, 0, 5, 0, 0, 12, 0, 0, 0, 0, 0, 0, 212, 12, 0, 0, 0, 0, 0, 0, 249, 0, 0, 0, 26, 0, 5, 0, 0, 25, 0, 0, 0, 0, 0, 0, 220, 1, 0, 0, 0, 0, 0, 0, 33, 1, 0, 0, 26, 0, 5, 0, 0, 27, 0, 0, 0, 0, 0, 0, 116, 1, 0, 0, 0, 0, 0, 0, 58, 1, 0, 0, 26, 0, 5, 0, 0, 29, 0, 0, 0, 0, 0, 0, 168, 1, 0, 0, 0, 0, 0, 0, 90, 1, 0, 0, 26, 0, 5, 0, 0, 31, 0, 0, 0, 0, 0, 0, 172, 1, 0, 0, 0, 0, 0, 0, 122, 1, 0, 0, 26, 0, 5, 0, 0, 33, 0, 0, 0, 0, 0, 0, 56, 2, 0, 0, 0, 0, 0, 0, 144, 1, 0, 0, 26, 0, 5, 0, 0, 36, 0, 0, 0, 0, 0, 0, 220, 1, 0, 0, 0, 0, 0, 0, 170, 1, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 207, 1, 0, 0, 3, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 14, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 14, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 14, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 14, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 16, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 16, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 18, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 18, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 18, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 18, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 172, 19, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180, 19, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 21, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212, 21, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 244, 21, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252, 21, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 23, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 23, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 0, 0, 0, 0, 0, 0, 229, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 2, 0, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 1, 0, 0, 0, 3, 0, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 3, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 1, 0, 0, 0, 7, 0, 192, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 220, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224, 51, 0, 0, 0, 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 53, 0, 0, 0, 0, 0, 0, 176, 1, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 5, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0};
}
}


================================================
FILE: runtime/hsa-runtime/image/blit_object_gfx8xx.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <stdint.h>
namespace rocr {
namespace image {
uint8_t blit_object_gfx8xx[] = {127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 224, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 56, 0, 2, 0, 64, 0, 8, 0, 1, 0, 2, 0, 0, 96, 6, 0, 0, 0, 184, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 96, 5, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 41, 0, 0, 0, 0, 0, 0, 12, 41, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 46, 115, 104, 115, 116, 114, 116, 97, 98, 0, 46, 115, 116, 114, 116, 97, 98, 0, 46, 110, 111, 116, 101, 0, 46, 104, 115, 97, 100, 97, 116, 97, 95, 114, 101, 97, 100, 111, 110, 108, 121, 95, 97, 103, 101, 110, 116, 0, 46, 104, 115, 97, 116, 101, 120, 116, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 114, 101, 108, 97, 46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 69, 88, 80, 95, 69, 80, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 76, 79, 71, 69, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 76, 79, 71, 95, 73, 78, 86, 95, 69, 80, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 116, 111, 95, 98, 117, 102, 102, 101, 114, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 98, 117, 102, 102, 101, 114, 95, 116, 111, 95, 105, 109, 97, 103, 101, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 100, 101, 102, 97, 117, 108, 116, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 108, 105, 110, 101, 97, 114, 95, 116, 111, 95, 115, 116, 97, 110, 100, 97, 114, 100, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 115, 116, 97, 110, 100, 97, 114, 100, 95, 116, 111, 95, 108, 105, 110, 101, 97, 114, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 116, 111, 95, 114, 101, 103, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 114, 101, 103, 95, 116, 111, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 108, 101, 97, 114, 95, 105, 109, 97, 103, 101, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 108, 101, 97, 114, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 100, 97, 116, 97, 95, 114, 101, 97, 100, 111, 110, 108, 121, 95, 97, 103, 101, 110, 116, 0, 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 12, 0, 0, 0, 2, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 4, 0, 0, 0, 26, 0, 0, 0, 3, 0, 0, 0, 65, 77, 68, 0, 4, 0, 7, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, 68, 71, 80, 85, 0, 0, 4, 0, 0, 0, 41, 0, 0, 0, 4, 0, 0, 0, 65, 77, 68, 0, 25, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 32, 72, 83, 65, 32, 82, 117, 110, 116, 105, 109, 101, 32, 70, 105, 110, 97, 108, 105, 122, 101, 114, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 26, 0, 0, 0, 5, 0, 0, 0, 65, 77, 68, 0, 22, 0, 45, 104, 115, 97, 95, 99, 97, 108, 108, 95, 99, 111, 110, 118, 101, 110, 116, 105, 111, 110, 61, 48, 0, 197, 0, 0, 0, 0, 128, 63, 0, 0, 0, 0, 0, 96, 129, 63, 119, 62, 26, 57, 0, 192, 130, 63, 138, 105, 216, 57, 0, 32, 132, 63, 29, 70, 81, 58, 0, 160, 133, 63, 124, 54, 172, 57, 0, 0, 135, 63, 180, 12, 123, 58, 0, 128, 136, 63, 4, 116, 64, 58, 0, 0, 138, 63, 170, 171, 38, 58, 0, 128, 139, 63, 31, 15, 46, 58, 0, 0, 141, 63, 219, 250, 86, 58, 0, 160, 142, 63, 104, 49, 7, 57, 0, 32, 144, 63, 24, 226, 14, 58, 0, 192, 145, 63, 234, 220, 244, 56, 0, 64, 147, 63, 120, 89, 81, 58, 0, 224, 148, 63, 71, 125, 39, 58, 0, 128, 150, 63, 185, 105, 33, 58, 0, 32, 152, 63, 140, 130, 63, 58, 0, 224, 153, 63, 65, 38, 11, 55, 0, 128, 155, 63, 157, 155, 211, 57, 0, 32, 157, 63, 57, 205, 118, 58, 0, 224, 158, 63, 4, 147, 41, 58, 0, 160, 160, 63, 125, 136, 2, 58, 0, 96, 162, 63, 24, 24, 2, 58, 0, 32, 164, 63, 112, 173, 40, 58, 0, 224, 165, 63, 77, 181, 118, 58, 0, 192, 167, 63, 78, 59, 217, 57, 0, 160, 169, 63, 117, 90, 45, 56, 0, 96, 171, 63, 173, 205, 81, 58, 0, 64, 173, 63, 82, 247, 65, 58, 0, 32, 175, 63, 107, 197, 91, 58, 0, 32, 177, 63, 116, 96, 253, 56, 0, 0, 179, 63, 149, 32, 14, 58, 0, 0, 181, 63, 127, 102, 30, 57, 0, 224, 182, 63, 25, 143, 108, 58, 0, 224, 184, 63, 59, 122, 93, 58, 0, 224, 186, 63, 144, 213, 122, 58, 0, 0, 189, 63, 245, 57, 138, 57, 0, 0, 191, 63, 179, 205, 60, 58, 0, 32, 193, 63, 166, 204, 196, 57, 0, 64, 195, 63, 68, 155, 89, 57, 0, 96, 197, 63, 42, 66, 101, 57, 0, 128, 199, 63, 138, 76, 215, 57, 0, 160, 201, 63, 51, 236, 77, 58, 0, 224, 203, 63, 239, 79, 193, 57, 0, 32, 206, 63, 163, 130, 17, 57, 0, 96, 208, 63, 187, 246, 204, 56, 0, 160, 210, 63, 31, 217, 129, 57, 0, 224, 212, 63, 94, 213, 26, 58, 0, 64, 215, 63, 90, 153, 31, 57, 0, 128, 217, 63, 19, 174, 104, 58, 0, 224, 219, 63, 190, 188, 93, 58, 0, 96, 222, 63, 94, 130, 244, 55, 0, 192, 224, 63, 194, 238, 205, 57, 0, 32, 227, 63, 149, 75, 124, 58, 0, 160, 229, 63, 59, 55, 72, 58, 0, 32, 232, 63, 129, 82, 75, 58, 0, 192, 234, 63, 221, 231, 198, 55, 0, 64, 237, 63, 237, 1, 243, 57, 0, 224, 239, 63, 123, 51, 23, 57, 0, 128, 242, 63, 44, 158, 59, 56, 0, 32, 245, 63, 164, 162, 47, 57, 0, 192, 247, 63, 152, 251, 6, 58, 0, 128, 250, 63, 220, 182, 236, 56, 0, 32, 253, 63, 103, 96, 112, 58, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 59, 65, 172, 41, 52, 0, 0, 126, 60, 252, 176, 168, 53, 0, 192, 189, 60, 234, 131, 141, 54, 0, 16, 252, 60, 120, 14, 27, 54, 0, 240, 28, 61, 254, 185, 135, 54, 0, 160, 59, 61, 101, 236, 49, 54, 0, 16, 90, 61, 25, 113, 221, 54, 0, 80, 120, 61, 69, 0, 195, 53, 0, 32, 139, 61, 81, 119, 155, 55, 0, 0, 154, 61, 13, 203, 235, 55, 0, 208, 168, 61, 131, 159, 131, 55, 0, 128, 183, 61, 229, 138, 82, 55, 0, 16, 198, 61, 24, 235, 162, 55, 0, 144, 212, 61, 149, 116, 218, 54, 0, 240, 226, 61, 183, 30, 169, 54, 0, 48, 241, 61, 21, 183, 131, 55, 0, 96, 255, 61, 219, 49, 17, 55, 0, 176, 6, 62, 104, 62, 63, 56, 0, 176, 13, 62, 151, 106, 21, 56, 0, 160, 20, 62, 15, 124, 41, 56, 0, 128, 27, 62, 15, 16, 126, 56, 0, 96, 34, 62, 101, 182, 21, 56, 0, 48, 41, 62, 161, 227, 229, 55, 0, 240, 47, 62, 83, 56, 24, 56, 0, 176, 54, 62, 157, 113, 254, 53, 0, 80, 61, 62, 8, 129, 68, 56, 0, 240, 67, 62, 144, 50, 80, 56, 0, 144, 74, 62, 232, 57, 53, 55, 0, 16, 81, 62, 241, 15, 94, 56, 0, 144, 87, 62, 64, 167, 100, 56, 0, 16, 94, 62, 45, 116, 134, 55, 0, 112, 100, 62, 205, 227, 123, 56, 0, 224, 106, 62, 62, 173, 133, 54, 0, 48, 113, 62, 21, 183, 3, 56, 0, 128, 119, 62, 220, 203, 173, 55, 0, 192, 125, 62, 175, 54, 12, 56, 0, 0, 130, 62, 211, 82, 22, 55, 0, 16, 133, 62, 57, 113, 146, 56, 0, 32, 136, 62, 215, 252, 197, 56, 0, 48, 139, 62, 213, 85, 174, 56, 0, 64, 142, 62, 105, 193, 24, 56, 0, 64, 145, 62, 231, 253, 160, 56, 0, 64, 148, 62, 239, 9, 173, 56, 0, 64, 151, 62, 225, 186, 98, 56, 0, 48, 154, 62, 76, 205, 238, 56, 0, 48, 157, 62, 210, 170, 152, 55, 0, 32, 160, 62, 26, 26, 66, 55, 0, 0, 163, 62, 14, 225, 197, 56, 0, 240, 165, 62, 238, 42, 191, 55, 0, 208, 168, 62, 45, 135, 45, 56, 0, 176, 171, 62, 138, 46, 238, 55, 0, 128, 174, 62, 172, 223, 222, 56, 0, 96, 177, 62, 185, 242, 2, 56, 0, 48, 180, 62, 155, 30, 72, 56, 0, 0, 183, 62, 43, 170, 14, 56, 0, 192, 185, 62, 93, 251, 235, 56, 0, 144, 188, 62, 221, 95, 37, 56, 0, 80, 191, 62, 130, 59, 120, 56, 0, 16, 194, 62, 30, 218, 81, 56, 0, 208, 196, 62, 5, 27, 78, 55, 0, 128, 199, 62, 155, 67, 143, 56, 0, 48, 202, 62, 16, 14, 202, 56, 0, 224, 204, 62, 139, 192, 202, 56, 0, 144, 207, 62, 95, 246, 145, 56, 0, 64, 210, 62, 203, 33, 129, 55, 0, 224, 212, 62, 154, 154, 108, 56, 0, 128, 215, 62, 35, 153, 148, 56, 0, 32, 218, 62, 204, 123, 119, 56, 0, 192, 220, 62, 38, 45, 177, 55, 0, 80, 223, 62, 211, 206, 166, 56, 0, 224, 225, 62, 230, 211, 235, 56, 0, 112, 228, 62, 205, 227, 251, 56, 0, 0, 231, 62, 194, 133, 215, 56, 0, 144, 233, 62, 0, 126, 126, 56, 0, 16, 236, 62, 197, 146, 243, 56, 0, 160, 238, 62, 131, 9, 212, 55, 0, 32, 241, 62, 124, 26, 8, 56, 0, 160, 243, 62, 173, 195, 132, 55, 0, 16, 246, 62, 35, 233, 204, 56, 0, 144, 248, 62, 175, 95, 15, 56, 0, 0, 251, 62, 56, 253, 145, 56, 0, 112, 253, 62, 188, 71, 172, 56, 0, 224, 255, 62, 43, 4, 151, 56, 0, 32, 1, 63, 210, 82, 41, 57, 0, 80, 2, 63, 212, 206, 111, 57, 0, 144, 3, 63, 115, 112, 249, 55, 0, 192, 4, 63, 174, 158, 94, 56, 0, 240, 5, 63, 74, 200, 101, 56, 0, 32, 7, 63, 163, 11, 19, 56, 0, 64, 8, 63, 22, 207, 121, 57, 0, 112, 9, 63, 201, 202, 56, 57, 0, 160, 10, 63, 244, 210, 195, 56, 0, 192, 11, 63, 236, 93, 117, 57, 0, 240, 12, 63, 103, 180, 230, 56, 0, 16, 14, 63, 184, 15, 92, 57, 0, 64, 15, 63, 224, 188, 62, 56, 0, 96, 16, 63, 146, 209, 220, 56, 0, 128, 17, 63, 223, 107, 24, 57, 0, 160, 18, 63, 76, 231, 45, 57, 0, 192, 19, 63, 68, 9, 47, 57, 0, 224, 20, 63, 97, 255, 27, 57, 0, 0, 22, 63, 68, 237, 233, 56, 0, 32, 23, 63, 200, 109, 104, 56, 0, 48, 24, 63, 167, 153, 107, 57, 0, 80, 25, 63, 137, 156, 9, 57, 0, 112, 26, 63, 115, 118, 162, 55, 0, 128, 27, 63, 163, 218, 11, 57, 0, 144, 28, 63, 171, 105, 112, 57, 0, 176, 29, 63, 255, 73, 132, 56, 0, 192, 30, 63, 56, 53, 1, 57, 0, 208, 31, 63, 104, 194, 45, 57, 0, 224, 32, 63, 35, 244, 71, 57, 0, 240, 33, 63, 124, 241, 79, 57, 0, 0, 35, 63, 14, 225, 69, 57, 0, 16, 36, 63, 245, 232, 41, 57, 0, 32, 37, 63, 176, 93, 248, 56, 0, 48, 38, 63, 153, 95, 115, 56, 0, 48, 39, 63, 219, 8, 108, 57, 0, 64, 40, 63, 0, 230, 9, 57, 0, 80, 41, 63, 111, 153, 180, 55, 0, 80, 42, 63, 204, 51, 18, 57, 0, 80, 43, 63, 217, 234, 124, 57, 0, 96, 44, 63, 205, 181, 173, 56, 0, 96, 45, 63, 26, 38, 32, 57, 0, 96, 46, 63, 54, 238, 88, 57, 0, 112, 47, 63, 5, 73, 170, 53, 0, 112, 48, 63, 30, 209, 203, 55, 0, 112, 49, 63, 244, 253, 5, 56, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 254, 63, 248, 3, 254, 56, 0, 0, 252, 63, 193, 15, 252, 57, 0, 0, 250, 63, 201, 179, 140, 58, 0, 0, 248, 63, 16, 62, 248, 58, 0, 0, 246, 63, 48, 123, 64, 59, 0, 0, 244, 63, 96, 141, 137, 59, 0, 0, 242, 63, 72, 214, 185, 59, 0, 0, 240, 63, 241, 240, 240, 59, 0, 0, 239, 63, 127, 220, 186, 58, 0, 0, 237, 63, 108, 7, 102, 59, 0, 0, 235, 63, 166, 178, 189, 59, 0, 0, 234, 63, 161, 14, 234, 57, 0, 0, 232, 63, 247, 88, 75, 59, 0, 0, 230, 63, 72, 180, 194, 59, 0, 0, 229, 63, 172, 96, 150, 58, 0, 0, 227, 63, 228, 56, 142, 59, 0, 0, 225, 63, 14, 120, 252, 59, 0, 0, 224, 63, 56, 112, 96, 59, 0, 0, 222, 63, 77, 92, 233, 59, 0, 0, 221, 63, 76, 145, 79, 59, 0, 0, 219, 63, 239, 97, 235, 59, 0, 0, 218, 63, 79, 27, 104, 59, 0, 0, 217, 63, 178, 1, 89, 56, 0, 0, 215, 63, 229, 53, 148, 59, 0, 0, 214, 63, 89, 3, 174, 58, 0, 0, 212, 63, 3, 123, 199, 59, 0, 0, 211, 63, 109, 26, 80, 59, 0, 0, 210, 63, 33, 13, 210, 57, 0, 0, 208, 63, 204, 159, 182, 59, 0, 0, 207, 63, 81, 233, 72, 59, 0, 0, 206, 63, 185, 83, 52, 58, 0, 0, 204, 63, 205, 204, 204, 59, 0, 0, 203, 63, 192, 39, 135, 59, 0, 0, 202, 63, 205, 15, 11, 59, 0, 0, 201, 63, 209, 73, 123, 57, 0, 0, 199, 63, 125, 12, 206, 59, 0, 0, 198, 63, 106, 12, 152, 59, 0, 0, 197, 63, 247, 144, 75, 59, 0, 0, 196, 63, 21, 190, 220, 58, 0, 0, 195, 63, 49, 12, 195, 57, 0, 0, 193, 63, 214, 187, 228, 59, 0, 0, 192, 63, 193, 192, 192, 59, 0, 0, 191, 63, 232, 47, 160, 59, 0, 0, 190, 63, 12, 250, 130, 59, 0, 0, 189, 63, 142, 32, 82, 59, 0, 0, 188, 63, 24, 200, 36, 59, 0, 0, 187, 63, 135, 156, 251, 58, 0, 0, 186, 63, 140, 46, 186, 58, 0, 0, 185, 63, 233, 15, 133, 58, 0, 0, 184, 63, 3, 23, 56, 58, 0, 0, 183, 63, 162, 181, 251, 57, 0, 0, 182, 63, 97, 11, 182, 57, 0, 0, 181, 63, 170, 104, 158, 57, 0, 0, 180, 63, 65, 11, 180, 57, 0, 0, 179, 63, 41, 53, 246, 57, 0, 0, 178, 63, 67, 22, 50, 58, 0, 0, 177, 63, 192, 157, 126, 58, 0, 0, 176, 63, 11, 44, 176, 58, 0, 0, 175, 63, 26, 119, 235, 58, 0, 0, 174, 63, 185, 130, 24, 59, 0, 0, 173, 63, 176, 86, 64, 59, 0, 0, 172, 63, 8, 35, 109, 59, 0, 0, 171, 63, 227, 105, 143, 59, 0, 0, 170, 63, 171, 170, 170, 59, 0, 0, 169, 63, 72, 74, 200, 59, 0, 0, 168, 63, 87, 63, 232, 59, 0, 0, 168, 63, 129, 10, 168, 57, 0, 0, 167, 63, 230, 20, 188, 58, 0, 0, 166, 63, 114, 136, 43, 59, 0, 0, 165, 63, 5, 106, 125, 59, 0, 0, 164, 63, 30, 207, 169, 59, 0, 0, 163, 63, 61, 10, 215, 59, 0, 0, 163, 63, 246, 199, 75, 57, 0, 0, 162, 63, 172, 12, 223, 58, 0, 0, 161, 63, 93, 98, 86, 59, 0, 0, 160, 63, 161, 160, 160, 59, 0, 0, 159, 63, 254, 9, 216, 59, 0, 0, 159, 63, 57, 47, 11, 58, 0, 0, 158, 63, 72, 90, 25, 59, 0, 0, 157, 63, 158, 216, 137, 59, 0, 0, 156, 63, 97, 225, 200, 59, 0, 0, 156, 63, 193, 9, 156, 57, 0, 0, 155, 63, 62, 223, 24, 59, 0, 0, 154, 63, 217, 231, 144, 59, 0, 0, 153, 63, 219, 34, 215, 59, 0, 0, 153, 63, 139, 210, 120, 58, 0, 0, 152, 63, 19, 144, 81, 59, 0, 0, 151, 63, 237, 37, 180, 59, 0, 0, 151, 63, 46, 1, 23, 56, 0, 0, 150, 63, 216, 180, 31, 59, 0, 0, 149, 63, 104, 37, 160, 59, 0, 0, 148, 63, 79, 9, 242, 59, 0, 0, 148, 63, 41, 1, 11, 59, 0, 0, 147, 63, 196, 133, 154, 59, 0, 0, 146, 63, 132, 19, 241, 59, 0, 0, 146, 63, 37, 73, 18, 59, 0, 0, 145, 63, 197, 179, 162, 59, 0, 0, 144, 63, 9, 188, 253, 59, 0, 0, 144, 63, 198, 112, 52, 59, 0, 0, 143, 63, 238, 35, 184, 59, 0, 0, 143, 63, 208, 206, 59, 58, 0, 0, 142, 63, 218, 106, 112, 59, 0, 0, 141, 63, 2, 82, 218, 59, 0, 0, 141, 63, 35, 44, 247, 58, 0, 0, 140, 63, 4, 156, 162, 59, 0, 0, 140, 63, 193, 8, 140, 57, 0, 0, 139, 63, 148, 104, 96, 59, 0, 0, 138, 63, 252, 242, 216, 59, 0, 0, 138, 63, 225, 240, 5, 59, 0, 0, 137, 63, 138, 64, 174, 59, 0, 0, 137, 63, 215, 57, 86, 58, 0, 0, 136, 63, 137, 136, 136, 59, 0, 0, 135, 63, 136, 128, 247, 59, 0, 0, 135, 63, 190, 86, 79, 59, 0, 0, 134, 63, 68, 5, 217, 59, 0, 0, 134, 63, 252, 20, 23, 59, 0, 0, 133, 63, 97, 55, 191, 59, 0, 0, 133, 63, 77, 33, 208, 58, 0, 0, 132, 63, 200, 249, 169, 59, 0, 0, 132, 63, 8, 33, 132, 58, 0, 0, 131, 63, 82, 48, 153, 59, 0, 0, 131, 63, 188, 116, 19, 58, 0, 0, 130, 63, 191, 191, 140, 59, 0, 0, 130, 63, 33, 8, 130, 57, 0, 0, 129, 63, 169, 141, 132, 59, 0, 0, 129, 63, 4, 2, 129, 56, 0, 0, 128, 63, 129, 128, 128, 59, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 11, 0, 11, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 2, 10, 192, 0, 0, 0, 0, 3, 1, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 104, 0, 0, 0, 195, 0, 2, 192, 112, 0, 0, 0, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 50, 0, 2, 2, 50, 1, 4, 0, 128, 4, 0, 193, 208, 1, 27, 0, 0, 8, 0, 193, 208, 0, 25, 0, 0, 0, 4, 4, 50, 8, 4, 128, 134, 3, 4, 136, 125, 0, 106, 128, 134, 0, 32, 128, 190, 199, 0, 136, 191, 131, 0, 6, 192, 24, 0, 0, 0, 3, 1, 6, 192, 56, 0, 0, 0, 3, 2, 2, 192, 64, 0, 0, 0, 131, 2, 6, 192, 72, 0, 0, 0, 67, 2, 2, 192, 128, 0, 0, 0, 3, 3, 10, 192, 136, 0, 0, 0, 159, 0, 6, 34, 159, 2, 8, 34, 127, 0, 140, 191, 5, 0, 134, 210, 1, 25, 0, 0, 4, 0, 133, 210, 4, 25, 0, 0, 4, 11, 8, 50, 5, 0, 133, 210, 1, 27, 0, 0, 5, 9, 8, 50, 5, 0, 133, 210, 1, 25, 0, 0, 5, 106, 25, 209, 5, 1, 2, 0, 4, 7, 6, 56, 4, 0, 14, 50, 5, 2, 16, 50, 8, 4, 18, 50, 159, 4, 12, 34, 1, 4, 14, 192, 0, 0, 0, 0, 128, 2, 20, 126, 127, 0, 140, 191, 0, 95, 0, 240, 7, 7, 4, 0, 0, 0, 134, 210, 2, 29, 0, 0, 1, 0, 133, 210, 6, 29, 0, 0, 1, 1, 0, 50, 1, 0, 133, 210, 2, 31, 0, 0, 1, 1, 0, 50, 1, 0, 133, 210, 2, 29, 0, 0, 1, 106, 25, 209, 1, 11, 2, 0, 0, 7, 0, 56, 2, 0, 134, 210, 1, 19, 0, 0, 0, 0, 133, 210, 0, 19, 0, 0, 0, 5, 0, 50, 1, 0, 133, 210, 1, 19, 0, 0, 3, 106, 25, 209, 1, 21, 0, 0, 11, 2, 4, 126, 0, 5, 8, 56, 131, 0, 6, 192, 120, 0, 0, 0, 3, 1, 6, 192, 32, 0, 0, 0, 127, 0, 140, 191, 2, 132, 0, 191, 85, 0, 133, 191, 3, 2, 6, 192, 40, 0, 0, 0, 2, 130, 0, 191, 41, 0, 132, 191, 3, 132, 0, 191, 29, 0, 133, 191, 3, 130, 0, 191, 12, 0, 132, 191, 0, 0, 143, 210, 130, 6, 2, 0, 112, 15, 140, 191, 144, 16, 4, 36, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 6, 126, 3, 3, 2, 56, 2, 15, 4, 40, 0, 0, 112, 220, 0, 2, 0, 0, 110, 0, 130, 191, 3, 129, 0, 191, 108, 0, 132, 191, 0, 0, 143, 210, 129, 6, 2, 0, 112, 15, 140, 191, 136, 16, 4, 36, 127, 0, 140, 191, 0, 106, 25, 209, 8, 0, 2, 0, 9, 2, 6, 126, 3, 3, 2, 56, 2, 15, 4, 40, 0, 0, 104, 220, 0, 2, 0, 0, 95, 0, 130, 191, 0, 0, 143, 210, 130, 6, 2, 0, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 0, 116, 220, 0, 7, 0, 0, 85, 0, 130, 191, 2, 129, 0, 191, 83, 0, 132, 191, 3, 132, 0, 191, 26, 0, 133, 191, 3, 130, 0, 191, 11, 0, 132, 191, 0, 0, 143, 210, 129, 6, 2, 0, 127, 0, 140, 191, 0, 106, 25, 209, 8, 0, 2, 0, 9, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 0, 104, 220, 0, 7, 0, 0, 68, 0, 130, 191, 3, 129, 0, 191, 66, 0, 132, 191, 131, 0, 6, 192, 48, 0, 0, 0, 127, 0, 140, 191, 0, 106, 25, 209, 2, 6, 2, 0, 3, 2, 4, 126, 2, 9, 2, 56, 112, 15, 140, 191, 0, 0, 96, 220, 0, 7, 0, 0, 55, 0, 130, 191, 0, 0, 143, 210, 130, 6, 2, 0, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 0, 112, 220, 0, 7, 0, 0, 45, 0, 130, 191, 3, 132, 0, 191, 34, 0, 133, 191, 3, 130, 0, 191, 14, 0, 132, 191, 112, 15, 140, 191, 144, 16, 0, 36, 0, 15, 10, 40, 1, 0, 143, 210, 130, 6, 2, 0, 1, 106, 25, 209, 4, 2, 2, 0, 5, 2, 6, 126, 3, 5, 4, 56, 144, 20, 6, 36, 3, 19, 12, 40, 0, 0, 116, 220, 1, 5, 0, 0, 27, 0, 130, 191, 3, 129, 0, 191, 25, 0, 132, 191, 112, 15, 140, 191, 136, 16, 0, 36, 0, 15, 0, 40, 144, 18, 2, 36, 2, 0, 143, 210, 130, 6, 2, 0, 0, 3, 0, 40, 152, 20, 2, 36, 2, 106, 25, 209, 4, 4, 2, 0, 5, 2, 8, 126, 4, 7, 6, 56, 0, 3, 0, 40, 0, 0, 112, 220, 2, 0, 0, 0, 9, 0, 130, 191, 0, 0, 143, 210, 130, 6, 2, 0, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 0, 124, 220, 0, 7, 0, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 196, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 19, 0, 19, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 2, 10, 192, 0, 0, 0, 0, 3, 1, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 88, 0, 0, 0, 195, 0, 2, 192, 96, 0, 0, 0, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 50, 0, 2, 2, 50, 1, 4, 0, 128, 4, 0, 193, 208, 1, 27, 0, 0, 8, 0, 193, 208, 0, 25, 0, 0, 0, 4, 4, 50, 8, 4, 128, 134, 3, 4, 136, 125, 0, 106, 128, 134, 0, 32, 128, 190, 206, 0, 136, 191, 131, 0, 6, 192, 40, 0, 0, 0, 3, 1, 6, 192, 72, 0, 0, 0, 3, 2, 2, 192, 80, 0, 0, 0, 67, 2, 2, 192, 112, 0, 0, 0, 3, 3, 10, 192, 120, 0, 0, 0, 159, 0, 6, 34, 159, 2, 8, 34, 127, 0, 140, 191, 5, 0, 134, 210, 1, 25, 0, 0, 4, 0, 133, 210, 4, 25, 0, 0, 4, 11, 8, 50, 5, 0, 133, 210, 1, 27, 0, 0, 5, 9, 8, 50, 5, 0, 133, 210, 1, 25, 0, 0, 5, 106, 25, 209, 5, 1, 2, 0, 4, 7, 6, 56, 159, 4, 8, 34, 6, 0, 134, 210, 2, 29, 0, 0, 4, 0, 133, 210, 4, 29, 0, 0, 4, 13, 8, 50, 6, 0, 133, 210, 2, 31, 0, 0, 6, 9, 8, 50, 6, 0, 133, 210, 2, 29, 0, 0, 5, 106, 25, 209, 6, 11, 2, 0, 4, 7, 6, 56, 4, 0, 134, 210, 5, 19, 0, 0, 3, 0, 133, 210, 3, 19, 0, 0, 3, 9, 6, 50, 4, 0, 133, 210, 5, 19, 0, 0, 6, 106, 25, 209, 4, 5, 0, 0, 3, 2, 10, 126, 3, 11, 14, 56, 4, 0, 30, 50, 5, 2, 32, 50, 8, 4, 34, 50, 131, 0, 6, 192, 104, 0, 0, 0, 3, 2, 6, 192, 24, 0, 0, 0, 127, 0, 140, 191, 2, 132, 0, 191, 78, 0, 133, 191, 2, 130, 0, 191, 40, 0, 132, 191, 3, 130, 0, 191, 14, 0, 132, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 0, 80, 220, 3, 0, 0, 3, 112, 0, 140, 191, 249, 2, 12, 126, 3, 6, 5, 0, 249, 2, 10, 126, 3, 6, 4, 0, 57, 0, 130, 191, 3, 129, 0, 191, 13, 0, 132, 191, 3, 0, 143, 210, 129, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 0, 72, 220, 3, 0, 0, 3, 112, 0, 140, 191, 136, 6, 12, 32, 249, 2, 10, 126, 3, 6, 0, 0, 42, 0, 130, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 0, 84, 220, 3, 0, 0, 5, 33, 0, 130, 191, 2, 129, 0, 191, 29, 0, 132, 191, 3, 130, 0, 191, 9, 0, 132, 191, 3, 0, 143, 210, 129, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 0, 72, 220, 3, 0, 0, 5, 19, 0, 130, 191, 3, 129, 0, 191, 7, 0, 132, 191, 3, 106, 25, 209, 8, 12, 2, 0, 9, 2, 10, 126, 5, 15, 8, 56, 0, 0, 64, 220, 3, 0, 0, 5, 10, 0, 130, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 0, 80, 220, 3, 0, 0, 5, 1, 0, 130, 191, 2, 2, 10, 126, 3, 2, 12, 126, 5, 2, 16, 126, 4, 2, 14, 126, 58, 0, 130, 191, 3, 129, 0, 191, 18, 0, 132, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 0, 80, 220, 3, 0, 0, 3, 112, 0, 140, 191, 249, 2, 16, 126, 3, 6, 3, 0, 249, 2, 14, 126, 3, 6, 2, 0, 249, 2, 12, 126, 3, 6, 1, 0, 249, 2, 10, 126, 3, 6, 0, 0, 38, 0, 130, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 0, 80, 220, 3, 0, 0, 5, 3, 130, 0, 191, 16, 0, 132, 191, 3, 106, 25, 209, 3, 9, 1, 0, 4, 106, 28, 209, 4, 1, 169, 1, 0, 0, 80, 220, 3, 0, 0, 3, 112, 0, 140, 191, 249, 2, 16, 126, 3, 6, 5, 0, 249, 2, 14, 126, 3, 6, 4, 0, 249, 2, 12, 126, 5, 6, 5, 0, 249, 2, 10, 126, 5, 6, 4, 0, 12, 0, 130, 191, 6, 106, 25, 209, 3, 25, 1, 0, 7, 106, 28, 209, 4, 1, 169, 1, 0, 0, 80, 220, 6, 0, 0, 8, 3, 106, 25, 209, 3, 9, 1, 0, 4, 106, 28, 209, 4, 1, 169, 1, 0, 0, 84, 220, 3, 0, 0, 6, 131, 0, 6, 192, 32, 0, 0, 0, 127, 0, 140, 191, 1, 1, 14, 192, 0, 0, 0, 0, 128, 2, 36, 126, 112, 0, 140, 191, 0, 95, 32, 240, 15, 5, 1, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 2, 10, 192, 0, 0, 0, 0, 3, 1, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 72, 0, 0, 0, 195, 0, 2, 192, 80, 0, 0, 0, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 50, 0, 2, 2, 50, 1, 4, 0, 128, 4, 0, 198, 208, 1, 27, 0, 0, 8, 0, 198, 208, 0, 25, 0, 0, 0, 4, 4, 50, 8, 4, 128, 135, 3, 4, 134, 125, 0, 106, 234, 135, 126, 1, 128, 190, 0, 106, 254, 137, 28, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 131, 0, 6, 192, 40, 0, 0, 0, 3, 1, 2, 192, 48, 0, 0, 0, 127, 0, 140, 191, 4, 3, 14, 192, 0, 0, 0, 0, 2, 0, 6, 50, 3, 2, 8, 50, 4, 4, 10, 50, 128, 2, 12, 126, 127, 0, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 3, 1, 10, 192, 56, 0, 0, 0, 5, 2, 14, 192, 0, 0, 0, 0, 127, 0, 140, 191, 4, 0, 14, 50, 5, 2, 16, 50, 6, 4, 18, 50, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 2, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 197, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 21, 0, 21, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 2, 10, 192, 0, 0, 0, 0, 3, 1, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 72, 0, 0, 0, 195, 0, 2, 192, 80, 0, 0, 0, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 50, 0, 2, 2, 50, 1, 4, 0, 128, 4, 0, 198, 208, 1, 27, 0, 0, 8, 0, 198, 208, 0, 25, 0, 0, 0, 4, 4, 50, 8, 4, 128, 135, 3, 4, 134, 125, 0, 106, 234, 135, 126, 1, 128, 190, 0, 106, 254, 137, 225, 2, 136, 191, 131, 0, 6, 192, 24, 0, 0, 0, 3, 1, 6, 192, 40, 0, 0, 0, 3, 2, 2, 192, 48, 0, 0, 0, 127, 0, 140, 191, 1, 3, 14, 192, 0, 0, 0, 0, 4, 0, 6, 50, 5, 2, 8, 50, 8, 4, 10, 50, 128, 2, 12, 126, 127, 0, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 112, 15, 140, 191, 106, 0, 16, 208, 3, 7, 1, 0, 3, 0, 0, 209, 3, 1, 169, 1, 242, 6, 156, 124, 106, 32, 130, 190, 225, 0, 136, 191, 128, 6, 136, 124, 106, 32, 132, 190, 128, 2, 6, 126, 4, 126, 254, 137, 219, 0, 136, 191, 255, 0, 136, 190, 28, 46, 77, 59, 8, 6, 136, 124, 106, 32, 136, 190, 255, 6, 6, 10, 82, 184, 78, 65, 8, 126, 254, 137, 242, 6, 6, 10, 210, 0, 136, 191, 255, 6, 14, 38, 255, 255, 255, 127, 242, 14, 16, 4, 255, 0, 138, 190, 0, 0, 128, 61, 106, 1, 75, 208, 8, 21, 0, 0, 126, 1, 138, 190, 10, 106, 254, 137, 7, 105, 16, 126, 70, 0, 136, 191, 129, 16, 18, 36, 255, 16, 16, 50, 0, 0, 128, 0, 255, 18, 18, 50, 0, 0, 0, 1, 255, 16, 20, 38, 0, 0, 127, 0, 255, 18, 18, 38, 0, 0, 1, 0, 9, 21, 18, 50, 249, 2, 20, 126, 9, 6, 5, 0, 128, 2, 22, 126, 10, 0, 143, 210, 131, 20, 2, 0, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 12, 106, 25, 209, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 56, 0, 0, 84, 220, 12, 0, 0, 12, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 10, 106, 25, 209, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 56, 0, 0, 84, 220, 10, 0, 0, 10, 255, 16, 16, 38, 255, 255, 127, 0, 240, 18, 18, 40, 240, 16, 16, 40, 9, 17, 16, 4, 113, 1, 140, 191, 13, 17, 18, 10, 12, 17, 18, 44, 255, 2, 28, 126, 171, 170, 170, 62, 255, 0, 140, 190, 0, 0, 128, 62, 7, 103, 30, 126, 12, 18, 28, 44, 12, 0, 193, 209, 12, 17, 38, 132, 193, 30, 30, 50, 14, 0, 193, 209, 9, 29, 194, 3, 9, 19, 32, 10, 13, 17, 24, 44, 15, 11, 16, 126, 14, 33, 24, 44, 255, 0, 140, 190, 244, 253, 5, 56, 12, 0, 193, 209, 8, 25, 48, 132, 112, 0, 140, 191, 12, 23, 24, 2, 8, 21, 16, 46, 0, 112, 49, 63, 12, 19, 30, 4, 255, 18, 28, 42, 0, 0, 0, 128, 8, 31, 26, 2, 10, 126, 254, 137, 8, 17, 18, 10, 21, 0, 136, 191, 8, 19, 20, 10, 255, 2, 22, 126, 171, 170, 42, 62, 255, 0, 140, 190, 37, 73, 18, 62, 12, 16, 22, 44, 8, 23, 22, 48, 205, 204, 76, 62, 8, 23, 22, 48, 0, 0, 128, 62, 8, 23, 22, 48, 171, 170, 170, 62, 10, 23, 20, 10, 241, 18, 28, 10, 15, 0, 193, 209, 9, 227, 41, 132, 15, 17, 26, 4, 255, 20, 24, 42, 0, 0, 0, 128, 255, 16, 16, 42, 0, 0, 0, 128, 10, 1, 254, 190, 8, 27, 20, 4, 15, 29, 18, 4, 15, 21, 20, 2, 12, 19, 18, 4, 255, 26, 22, 38, 0, 240, 255, 255, 9, 21, 18, 2, 13, 23, 16, 4, 9, 17, 16, 2, 255, 16, 18, 10, 0, 160, 42, 56, 11, 19, 18, 46, 0, 160, 42, 56, 8, 19, 16, 46, 0, 80, 213, 62, 11, 17, 18, 46, 0, 80, 213, 62, 255, 18, 20, 10, 59, 170, 184, 66, 10, 17, 20, 126, 191, 20, 24, 38, 131, 24, 24, 36, 255, 0, 139, 190, 85, 85, 85, 85, 255, 0, 138, 190, 85, 85, 85, 85, 12, 106, 25, 209, 10, 24, 2, 0, 11, 2, 26, 126, 13, 106, 28, 209, 13, 1, 169, 1, 0, 0, 84, 220, 12, 0, 0, 12, 255, 0, 138, 190, 0, 80, 213, 62, 10, 11, 28, 126, 11, 0, 193, 209, 10, 22, 38, 132, 14, 19, 30, 46, 0, 0, 49, 188, 8, 23, 16, 2, 14, 31, 22, 46, 239, 47, 228, 183, 8, 23, 22, 2, 255, 2, 28, 126, 171, 170, 42, 62, 255, 0, 138, 190, 171, 170, 42, 61, 10, 22, 28, 44, 14, 0, 193, 209, 14, 23, 194, 3, 11, 23, 30, 10, 14, 31, 22, 44, 255, 0, 138, 190, 8, 227, 130, 180, 255, 0, 139, 190, 24, 114, 177, 66, 112, 0, 140, 191, 13, 23, 26, 44, 12, 0, 68, 208, 8, 21, 0, 0, 11, 18, 132, 124, 12, 23, 26, 44, 106, 12, 140, 134, 11, 18, 130, 124, 134, 20, 16, 34, 12, 27, 20, 2, 106, 12, 234, 135, 8, 0, 136, 210, 10, 17, 2, 0, 255, 2, 20, 126, 0, 0, 128, 127, 255, 0, 138, 190, 208, 142, 206, 194, 8, 21, 16, 0, 10, 18, 150, 124, 128, 16, 16, 0, 3, 15, 138, 125, 242, 16, 16, 10, 255, 2, 18, 126, 0, 0, 192, 127, 255, 0, 138, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 138, 125, 10, 0, 194, 208, 3, 21, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 209, 8, 19, 42, 0, 3, 19, 132, 125, 8, 19, 16, 0, 7, 19, 152, 125, 8, 7, 14, 0, 242, 6, 138, 125, 242, 14, 6, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 0, 138, 190, 61, 10, 135, 63, 3, 0, 193, 209, 3, 21, 28, 4, 4, 1, 254, 190, 2, 126, 254, 137, 242, 2, 6, 126, 2, 1, 254, 190, 106, 0, 16, 208, 4, 7, 1, 0, 4, 0, 0, 209, 4, 1, 169, 1, 242, 8, 156, 124, 2, 106, 254, 134, 225, 0, 136, 191, 128, 8, 136, 124, 106, 32, 132, 190, 128, 2, 8, 126, 4, 126, 254, 137, 219, 0, 136, 191, 255, 0, 136, 190, 28, 46, 77, 59, 8, 8, 136, 124, 106, 32, 136, 190, 255, 8, 8, 10, 82, 184, 78, 65, 8, 126, 254, 137, 242, 8, 8, 10, 210, 0, 136, 191, 255, 8, 14, 38, 255, 255, 255, 127, 242, 14, 16, 4, 255, 0, 138, 190, 0, 0, 128, 61, 106, 1, 75, 208, 8, 21, 0, 0, 126, 1, 138, 190, 10, 106, 254, 137, 7, 105, 16, 126, 70, 0, 136, 191, 129, 16, 18, 36, 255, 16, 16, 50, 0, 0, 128, 0, 255, 18, 18, 50, 0, 0, 0, 1, 255, 16, 20, 38, 0, 0, 127, 0, 255, 18, 18, 38, 0, 0, 1, 0, 9, 21, 18, 50, 249, 2, 20, 126, 9, 6, 5, 0, 128, 2, 22, 126, 10, 0, 143, 210, 131, 20, 2, 0, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 12, 106, 25, 209, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 56, 0, 0, 84, 220, 12, 0, 0, 12, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 10, 106, 25, 209, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 56, 0, 0, 84, 220, 10, 0, 0, 10, 255, 16, 16, 38, 255, 255, 127, 0, 240, 18, 18, 40, 240, 16, 16, 40, 9, 17, 16, 4, 113, 1, 140, 191, 13, 17, 18, 10, 12, 17, 18, 44, 255, 2, 28, 126, 171, 170, 170, 62, 255, 0, 140, 190, 0, 0, 128, 62, 7, 103, 30, 126, 12, 18, 28, 44, 12, 0, 193, 209, 12, 17, 38, 132, 193, 30, 30, 50, 14, 0, 193, 209, 9, 29, 194, 3, 9, 19, 32, 10, 13, 17, 24, 44, 15, 11, 16, 126, 14, 33, 24, 44, 255, 0, 140, 190, 244, 253, 5, 56, 12, 0, 193, 209, 8, 25, 48, 132, 112, 0, 140, 191, 12, 23, 24, 2, 8, 21, 16, 46, 0, 112, 49, 63, 12, 19, 30, 4, 255, 18, 28, 42, 0, 0, 0, 128, 8, 31, 26, 2, 10, 126, 254, 137, 8, 17, 18, 10, 21, 0, 136, 191, 8, 19, 20, 10, 255, 2, 22, 126, 171, 170, 42, 62, 255, 0, 140, 190, 37, 73, 18, 62, 12, 16, 22, 44, 8, 23, 22, 48, 205, 204, 76, 62, 8, 23, 22, 48, 0, 0, 128, 62, 8, 23, 22, 48, 171, 170, 170, 62, 10, 23, 20, 10, 241, 18, 28, 10, 15, 0, 193, 209, 9, 227, 41, 132, 15, 17, 26, 4, 255, 20, 24, 42, 0, 0, 0, 128, 255, 16, 16, 42, 0, 0, 0, 128, 10, 1, 254, 190, 8, 27, 20, 4, 15, 29, 18, 4, 15, 21, 20, 2, 12, 19, 18, 4, 255, 26, 22, 38, 0, 240, 255, 255, 9, 21, 18, 2, 13, 23, 16, 4, 9, 17, 16, 2, 255, 16, 18, 10, 0, 160, 42, 56, 11, 19, 18, 46, 0, 160, 42, 56, 8, 19, 16, 46, 0, 80, 213, 62, 11, 17, 18, 46, 0, 80, 213, 62, 255, 18, 20, 10, 59, 170, 184, 66, 10, 17, 20, 126, 191, 20, 24, 38, 131, 24, 24, 36, 255, 0, 139, 190, 85, 85, 85, 85, 255, 0, 138, 190, 85, 85, 85, 85, 12, 106, 25, 209, 10, 24, 2, 0, 11, 2, 26, 126, 13, 106, 28, 209, 13, 1, 169, 1, 0, 0, 84, 220, 12, 0, 0, 12, 255, 0, 138, 190, 0, 80, 213, 62, 10, 11, 28, 126, 11, 0, 193, 209, 10, 22, 38, 132, 14, 19, 30, 46, 0, 0, 49, 188, 8, 23, 16, 2, 14, 31, 22, 46, 239, 47, 228, 183, 8, 23, 22, 2, 255, 2, 28, 126, 171, 170, 42, 62, 255, 0, 138, 190, 171, 170, 42, 61, 10, 22, 28, 44, 14, 0, 193, 209, 14, 23, 194, 3, 11, 23, 30, 10, 14, 31, 22, 44, 255, 0, 138, 190, 8, 227, 130, 180, 255, 0, 139, 190, 24, 114, 177, 66, 112, 0, 140, 191, 13, 23, 26, 44, 12, 0, 68, 208, 8, 21, 0, 0, 11, 18, 132, 124, 12, 23, 26, 44, 106, 12, 140, 134, 11, 18, 130, 124, 134, 20, 16, 34, 12, 27, 20, 2, 106, 12, 234, 135, 8, 0, 136, 210, 10, 17, 2, 0, 255, 2, 20, 126, 0, 0, 128, 127, 255, 0, 138, 190, 208, 142, 206, 194, 8, 21, 16, 0, 10, 18, 150, 124, 128, 16, 16, 0, 4, 15, 138, 125, 242, 16, 16, 10, 255, 2, 18, 126, 0, 0, 192, 127, 255, 0, 138, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 138, 125, 10, 0, 194, 208, 4, 21, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 209, 8, 19, 42, 0, 4, 19, 132, 125, 8, 19, 16, 0, 7, 19, 152, 125, 8, 9, 14, 0, 242, 8, 138, 125, 242, 14, 8, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 0, 138, 190, 61, 10, 135, 63, 4, 0, 193, 209, 4, 21, 28, 4, 4, 1, 254, 190, 2, 126, 254, 137, 242, 2, 8, 126, 2, 1, 254, 190, 3, 2, 10, 192, 56, 0, 0, 0, 106, 0, 16, 208, 5, 7, 1, 0, 5, 0, 0, 209, 5, 1, 169, 1, 127, 0, 140, 191, 8, 0, 34, 50, 9, 2, 36, 50, 10, 4, 38, 50, 242, 10, 156, 124, 106, 32, 130, 190, 225, 0, 136, 191, 128, 10, 136, 124, 106, 32, 132, 190, 128, 2, 10, 126, 4, 126, 254, 137, 219, 0, 136, 191, 255, 0, 136, 190, 28, 46, 77, 59, 8, 10, 136, 124, 106, 32, 136, 190, 255, 10, 10, 10, 82, 184, 78, 65, 8, 126, 254, 137, 242, 10, 10, 10, 210, 0, 136, 191, 255, 10, 14, 38, 255, 255, 255, 127, 242, 14, 16, 4, 255, 0, 138, 190, 0, 0, 128, 61, 106, 1, 75, 208, 8, 21, 0, 0, 126, 1, 138, 190, 10, 106, 254, 137, 7, 105, 16, 126, 70, 0, 136, 191, 129, 16, 18, 36, 255, 16, 16, 50, 0, 0, 128, 0, 255, 18, 18, 50, 0, 0, 0, 1, 255, 16, 20, 38, 0, 0, 127, 0, 255, 18, 18, 38, 0, 0, 1, 0, 9, 21, 18, 50, 249, 2, 20, 126, 9, 6, 5, 0, 128, 2, 22, 126, 10, 0, 143, 210, 131, 20, 2, 0, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 12, 106, 25, 209, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 56, 0, 0, 84, 220, 12, 0, 0, 12, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 10, 106, 25, 209, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 56, 0, 0, 84, 220, 10, 0, 0, 10, 255, 16, 16, 38, 255, 255, 127, 0, 240, 18, 18, 40, 240, 16, 16, 40, 9, 17, 16, 4, 113, 1, 140, 191, 13, 17, 18, 10, 12, 17, 18, 44, 255, 2, 28, 126, 171, 170, 170, 62, 255, 0, 140, 190, 0, 0, 128, 62, 7, 103, 30, 126, 12, 18, 28, 44, 12, 0, 193, 209, 12, 17, 38, 132, 193, 30, 30, 50, 14, 0, 193, 209, 9, 29, 194, 3, 9, 19, 32, 10, 13, 17, 24, 44, 15, 11, 16, 126, 14, 33, 24, 44, 255, 0, 140, 190, 244, 253, 5, 56, 12, 0, 193, 209, 8, 25, 48, 132, 112, 0, 140, 191, 12, 23, 24, 2, 8, 21, 16, 46, 0, 112, 49, 63, 12, 19, 26, 4, 255, 18, 28, 42, 0, 0, 0, 128, 8, 27, 30, 2, 10, 126, 254, 137, 8, 17, 18, 10, 21, 0, 136, 191, 8, 19, 20, 10, 255, 2, 22, 126, 171, 170, 42, 62, 255, 0, 140, 190, 37, 73, 18, 62, 12, 16, 22, 44, 8, 23, 22, 48, 205, 204, 76, 62, 8, 23, 22, 48, 0, 0, 128, 62, 8, 23, 22, 48, 171, 170, 170, 62, 10, 23, 20, 10, 241, 18, 28, 10, 13, 0, 193, 209, 9, 227, 41, 132, 13, 17, 30, 4, 255, 20, 24, 42, 0, 0, 0, 128, 255, 16, 16, 42, 0, 0, 0, 128, 10, 1, 254, 190, 8, 31, 16, 4, 13, 29, 20, 4, 13, 17, 16, 2, 12, 21, 18, 4, 255, 30, 20, 38, 0, 240, 255, 255, 9, 17, 16, 2, 15, 21, 18, 4, 8, 19, 16, 2, 255, 16, 18, 10, 0, 160, 42, 56, 10, 19, 18, 46, 0, 160, 42, 56, 8, 19, 16, 46, 0, 80, 213, 62, 10, 17, 18, 46, 0, 80, 213, 62, 255, 18, 22, 10, 59, 170, 184, 66, 11, 17, 22, 126, 191, 22, 24, 38, 131, 24, 24, 36, 255, 0, 139, 190, 85, 85, 85, 85, 255, 0, 138, 190, 85, 85, 85, 85, 12, 106, 25, 209, 10, 24, 2, 0, 11, 2, 26, 126, 13, 106, 28, 209, 13, 1, 169, 1, 0, 0, 84, 220, 12, 0, 0, 12, 255, 0, 138, 190, 0, 80, 213, 62, 11, 11, 28, 126, 10, 0, 193, 209, 10, 20, 38, 132, 14, 19, 30, 46, 0, 0, 49, 188, 8, 21, 16, 2, 14, 31, 20, 46, 239, 47, 228, 183, 8, 21, 20, 2, 255, 2, 28, 126, 171, 170, 42, 62, 255, 0, 138, 190, 171, 170, 42, 61, 10, 20, 28, 44, 14, 0, 193, 209, 14, 21, 194, 3, 10, 21, 30, 10, 14, 31, 20, 44, 255, 0, 138, 190, 8, 227, 130, 180, 255, 0, 139, 190, 24, 114, 177, 66, 112, 0, 140, 191, 13, 21, 26, 44, 12, 0, 68, 208, 8, 21, 0, 0, 11, 18, 132, 124, 12, 21, 26, 44, 106, 12, 140, 134, 11, 18, 130, 124, 134, 22, 16, 34, 12, 27, 20, 2, 106, 12, 234, 135, 8, 0, 136, 210, 10, 17, 2, 0, 255, 2, 20, 126, 0, 0, 128, 127, 255, 0, 138, 190, 208, 142, 206, 194, 8, 21, 16, 0, 10, 18, 150, 124, 128, 16, 16, 0, 5, 15, 138, 125, 242, 16, 16, 10, 255, 2, 18, 126, 0, 0, 192, 127, 255, 0, 138, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 138, 125, 10, 0, 194, 208, 5, 21, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 209, 8, 19, 42, 0, 5, 19, 132, 125, 8, 19, 16, 0, 7, 19, 152, 125, 8, 11, 14, 0, 242, 10, 138, 125, 242, 14, 10, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 0, 138, 190, 61, 10, 135, 63, 5, 0, 193, 209, 5, 21, 28, 4, 4, 1, 254, 190, 2, 126, 254, 137, 242, 2, 10, 126, 2, 1, 254, 190, 131, 0, 6, 192, 32, 0, 0, 0, 127, 0, 140, 191, 1, 1, 14, 192, 0, 0, 0, 0, 128, 2, 40, 126, 127, 0, 140, 191, 0, 95, 32, 240, 17, 3, 1, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 2, 10, 192, 0, 0, 0, 0, 3, 1, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 72, 0, 0, 0, 195, 0, 2, 192, 80, 0, 0, 0, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 50, 0, 2, 2, 50, 1, 4, 0, 128, 4, 0, 198, 208, 1, 27, 0, 0, 8, 0, 198, 208, 0, 25, 0, 0, 0, 4, 4, 50, 8, 4, 128, 135, 3, 4, 134, 125, 0, 106, 234, 135, 126, 1, 128, 190, 0, 106, 254, 137, 28, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 131, 0, 6, 192, 40, 0, 0, 0, 3, 1, 2, 192, 48, 0, 0, 0, 127, 0, 140, 191, 4, 3, 14, 192, 0, 0, 0, 0, 2, 0, 6, 50, 3, 2, 8, 50, 4, 4, 10, 50, 128, 2, 12, 126, 127, 0, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 3, 1, 10, 192, 56, 0, 0, 0, 5, 2, 14, 192, 0, 0, 0, 0, 127, 0, 140, 191, 4, 0, 14, 50, 5, 2, 16, 50, 6, 4, 18, 50, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 2, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193, 2, 172, 0, 144, 0, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 5, 0, 5, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 0, 134, 255, 255, 0, 0, 0, 8, 0, 146, 131, 0, 6, 192, 0, 0, 0, 0, 67, 0, 2, 192, 72, 0, 0, 0, 127, 0, 140, 191, 0, 2, 0, 128, 0, 0, 0, 50, 1, 0, 136, 125, 106, 32, 128, 190, 20, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 131, 0, 2, 192, 40, 0, 0, 0, 127, 0, 140, 191, 4, 3, 10, 192, 0, 0, 0, 0, 2, 0, 2, 50, 127, 0, 140, 191, 0, 32, 12, 224, 1, 1, 3, 128, 131, 0, 2, 192, 56, 0, 0, 0, 5, 1, 10, 192, 0, 0, 0, 0, 127, 0, 140, 191, 2, 0, 0, 50, 112, 15, 140, 191, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 11, 0, 11, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 1, 6, 192, 0, 0, 0, 0, 195, 0, 2, 192, 72, 0, 0, 0, 127, 0, 140, 191, 2, 4, 2, 128, 2, 0, 0, 50, 3, 0, 136, 125, 106, 32, 130, 190, 29, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 3, 1, 2, 192, 40, 0, 0, 0, 127, 0, 140, 191, 4, 3, 10, 192, 0, 0, 0, 0, 4, 0, 6, 50, 127, 0, 140, 191, 0, 32, 12, 224, 3, 3, 3, 128, 3, 3, 10, 192, 8, 0, 0, 0, 3, 1, 10, 192, 56, 0, 0, 0, 5, 4, 14, 192, 0, 0, 0, 0, 127, 0, 140, 191, 1, 14, 1, 128, 0, 12, 0, 128, 1, 6, 1, 128, 0, 5, 0, 128, 4, 0, 14, 50, 1, 4, 18, 50, 0, 2, 16, 50, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 4, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 7, 0, 7, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 1, 6, 192, 0, 0, 0, 0, 195, 0, 2, 192, 72, 0, 0, 0, 127, 0, 140, 191, 2, 4, 2, 128, 2, 0, 0, 50, 3, 0, 136, 125, 106, 32, 130, 190, 31, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 3, 3, 10, 192, 8, 0, 0, 0, 3, 1, 6, 192, 40, 0, 0, 0, 127, 0, 140, 191, 67, 3, 2, 192, 48, 0, 0, 0, 4, 4, 14, 192, 0, 0, 0, 0, 1, 14, 1, 128, 0, 12, 0, 128, 127, 0, 140, 191, 1, 13, 1, 128, 0, 5, 0, 128, 4, 0, 6, 50, 1, 4, 10, 50, 0, 2, 8, 50, 128, 2, 12, 126, 0, 95, 0, 240, 3, 1, 4, 0, 3, 0, 2, 192, 56, 0, 0, 0, 5, 1, 10, 192, 0, 0, 0, 0, 127, 0, 140, 191, 0, 0, 0, 50, 112, 15, 140, 191, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 2, 172, 0, 144, 19, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 2, 134, 255, 255, 0, 0, 0, 255, 128, 146, 16, 0, 16, 0, 1, 255, 1, 134, 255, 255, 0, 0, 2, 8, 2, 146, 0, 9, 0, 146, 1, 10, 1, 146, 3, 2, 10, 192, 0, 0, 0, 0, 3, 1, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 96, 0, 0, 0, 195, 0, 2, 192, 104, 0, 0, 0, 127, 0, 140, 191, 2, 8, 2, 128, 0, 10, 0, 128, 2, 0, 0, 50, 0, 2, 2, 50, 1, 4, 0, 128, 4, 0, 193, 208, 1, 27, 0, 0, 8, 0, 193, 208, 0, 25, 0, 0, 0, 4, 4, 50, 8, 4, 128, 134, 3, 4, 136, 125, 0, 106, 128, 134, 0, 32, 128, 190, 55, 0, 136, 191, 3, 2, 10, 192, 80, 0, 0, 0, 127, 0, 140, 191, 8, 0, 14, 50, 9, 2, 16, 50, 10, 4, 18, 50, 131, 0, 2, 192, 112, 0, 0, 0, 3, 1, 6, 192, 24, 0, 0, 0, 127, 0, 140, 191, 2, 130, 0, 191, 30, 0, 133, 191, 2, 129, 0, 191, 13, 0, 132, 191, 3, 2, 10, 192, 48, 0, 0, 0, 2, 3, 14, 192, 0, 0, 0, 0, 127, 0, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 27, 0, 130, 191, 2, 128, 0, 191, 25, 0, 132, 191, 3, 2, 10, 192, 32, 0, 0, 0, 2, 3, 14, 192, 0, 0, 0, 0, 127, 0, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 12, 0, 130, 191, 3, 2, 10, 192, 64, 0, 0, 0, 2, 3, 14, 192, 0, 0, 0, 0, 127, 0, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193, 2, 172, 0, 144, 0, 0, 0, 11, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 5, 0, 5, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 192, 4, 0, 0, 0, 127, 0, 140, 191, 0, 255, 0, 134, 255, 255, 0, 0, 0, 8, 0, 146, 131, 0, 6, 192, 0, 0, 0, 0, 67, 0, 2, 192, 96, 0, 0, 0, 127, 0, 140, 191, 0, 2, 0, 128, 0, 0, 0, 50, 1, 0, 136, 125, 106, 32, 128, 190, 50, 0, 136, 191, 131, 0, 2, 192, 80, 0, 0, 0, 127, 0, 140, 191, 2, 0, 0, 50, 131, 0, 2, 192, 112, 0, 0, 0, 3, 1, 6, 192, 24, 0, 0, 0, 127, 0, 140, 191, 2, 130, 0, 191, 28, 0, 133, 191, 2, 129, 0, 191, 12, 0, 132, 191, 3, 2, 10, 192, 48, 0, 0, 0, 2, 1, 10, 192, 0, 0, 0, 0, 127, 0, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 25, 0, 130, 191, 2, 128, 0, 191, 23, 0, 132, 191, 3, 2, 10, 192, 32, 0, 0, 0, 2, 1, 10, 192, 0, 0, 0, 0, 127, 0, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 11, 0, 130, 191, 3, 2, 10, 192, 64, 0, 0, 0, 2, 1, 10, 192, 0, 0, 0, 0, 127, 0, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 2, 0, 0, 0, 0, 0, 0, 40, 0, 0, 0, 1, 0, 4, 0, 8, 2, 0, 0, 0, 0, 0, 0, 8, 4, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 1, 0, 4, 0, 16, 6, 0, 0, 0, 0, 0, 0, 8, 4, 0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 26, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 4, 0, 0, 0, 0, 0, 0, 149, 0, 0, 0, 26, 0, 5, 0, 0, 5, 0, 0, 0, 0, 0, 0, 204, 4, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 26, 0, 5, 0, 0, 10, 0, 0, 0, 0, 0, 0, 8, 2, 0, 0, 0, 0, 0, 0, 209, 0, 0, 0, 26, 0, 5, 0, 0, 13, 0, 0, 0, 0, 0, 0, 28, 13, 0, 0, 0, 0, 0, 0, 249, 0, 0, 0, 26, 0, 5, 0, 0, 27, 0, 0, 0, 0, 0, 0, 8, 2, 0, 0, 0, 0, 0, 0, 33, 1, 0, 0, 26, 0, 5, 0, 0, 30, 0, 0, 0, 0, 0, 0, 148, 1, 0, 0, 0, 0, 0, 0, 58, 1, 0, 0, 26, 0, 5, 0, 0, 32, 0, 0, 0, 0, 0, 0, 208, 1, 0, 0, 0, 0, 0, 0, 90, 1, 0, 0, 26, 0, 5, 0, 0, 34, 0, 0, 0, 0, 0, 0, 216, 1, 0, 0, 0, 0, 0, 0, 122, 1, 0, 0, 26, 0, 5, 0, 0, 36, 0, 0, 0, 0, 0, 0, 112, 2, 0, 0, 0, 0, 0, 0, 144, 1, 0, 0, 26, 0, 5, 0, 0, 39, 0, 0, 0, 0, 0, 0, 12, 2, 0, 0, 0, 0, 0, 0, 170, 1, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 207, 1, 0, 0, 3, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 15, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 15, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 15, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 200, 15, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 17, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 17, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 19, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 19, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 19, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 19, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 220, 20, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 228, 20, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 23, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 23, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 23, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 23, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 24, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 168, 24, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 0, 0, 0, 0, 0, 0, 229, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 2, 0, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 1, 0, 0, 0, 3, 0, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 3, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 1, 0, 0, 0, 7, 0, 192, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 12, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 55, 0, 0, 0, 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144, 56, 0, 0, 0, 0, 0, 0, 176, 1, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 5, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0};
}
}


================================================
FILE: runtime/hsa-runtime/image/blit_object_gfx9xx.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <stdint.h>
namespace rocr {
namespace image {
uint8_t blit_object_gfx9xx[] = {127, 69, 76, 70, 2, 1, 1, 64, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 224, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 72, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 56, 0, 2, 0, 64, 0, 8, 0, 1, 0, 2, 0, 0, 96, 6, 0, 0, 0, 184, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 96, 5, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 41, 0, 0, 0, 0, 0, 0, 24, 41, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 46, 115, 104, 115, 116, 114, 116, 97, 98, 0, 46, 115, 116, 114, 116, 97, 98, 0, 46, 110, 111, 116, 101, 0, 46, 104, 115, 97, 100, 97, 116, 97, 95, 114, 101, 97, 100, 111, 110, 108, 121, 95, 97, 103, 101, 110, 116, 0, 46, 104, 115, 97, 116, 101, 120, 116, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 115, 121, 109, 116, 97, 98, 0, 46, 114, 101, 108, 97, 46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 69, 88, 80, 95, 69, 80, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 76, 79, 71, 69, 0, 38, 104, 115, 97, 95, 101, 120, 116, 95, 105, 109, 97, 103, 101, 58, 58, 38, 95, 95, 111, 99, 109, 108, 116, 98, 108, 95, 77, 51, 50, 95, 76, 79, 71, 95, 73, 78, 86, 95, 69, 80, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 116, 111, 95, 98, 117, 102, 102, 101, 114, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 98, 117, 102, 102, 101, 114, 95, 116, 111, 95, 105, 109, 97, 103, 101, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 100, 101, 102, 97, 117, 108, 116, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 108, 105, 110, 101, 97, 114, 95, 116, 111, 95, 115, 116, 97, 110, 100, 97, 114, 100, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 115, 116, 97, 110, 100, 97, 114, 100, 95, 116, 111, 95, 108, 105, 110, 101, 97, 114, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 116, 111, 95, 114, 101, 103, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 111, 112, 121, 95, 105, 109, 97, 103, 101, 95, 114, 101, 103, 95, 116, 111, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 108, 101, 97, 114, 95, 105, 109, 97, 103, 101, 95, 107, 101, 114, 110, 101, 108, 0, 38, 95, 95, 99, 108, 101, 97, 114, 95, 105, 109, 97, 103, 101, 95, 49, 100, 98, 95, 107, 101, 114, 110, 101, 108, 0, 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 100, 97, 116, 97, 95, 114, 101, 97, 100, 111, 110, 108, 121, 95, 97, 103, 101, 110, 116, 0, 95, 95, 104, 115, 97, 95, 115, 101, 99, 116, 105, 111, 110, 46, 104, 115, 97, 116, 101, 120, 116, 0, 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 12, 0, 0, 0, 2, 0, 0, 0, 65, 77, 68, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 4, 0, 0, 0, 26, 0, 0, 0, 3, 0, 0, 0, 65, 77, 68, 0, 4, 0, 7, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 0, 65, 77, 68, 71, 80, 85, 0, 0, 4, 0, 0, 0, 41, 0, 0, 0, 4, 0, 0, 0, 65, 77, 68, 0, 25, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 65, 77, 68, 32, 72, 83, 65, 32, 82, 117, 110, 116, 105, 109, 101, 32, 70, 105, 110, 97, 108, 105, 122, 101, 114, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 26, 0, 0, 0, 5, 0, 0, 0, 65, 77, 68, 0, 22, 0, 45, 104, 115, 97, 95, 99, 97, 108, 108, 95, 99, 111, 110, 118, 101, 110, 116, 105, 111, 110, 61, 48, 0, 5, 0, 0, 0, 0, 128, 63, 0, 0, 0, 0, 0, 96, 129, 63, 119, 62, 26, 57, 0, 192, 130, 63, 138, 105, 216, 57, 0, 32, 132, 63, 29, 70, 81, 58, 0, 160, 133, 63, 124, 54, 172, 57, 0, 0, 135, 63, 180, 12, 123, 58, 0, 128, 136, 63, 4, 116, 64, 58, 0, 0, 138, 63, 170, 171, 38, 58, 0, 128, 139, 63, 31, 15, 46, 58, 0, 0, 141, 63, 219, 250, 86, 58, 0, 160, 142, 63, 104, 49, 7, 57, 0, 32, 144, 63, 24, 226, 14, 58, 0, 192, 145, 63, 234, 220, 244, 56, 0, 64, 147, 63, 120, 89, 81, 58, 0, 224, 148, 63, 71, 125, 39, 58, 0, 128, 150, 63, 185, 105, 33, 58, 0, 32, 152, 63, 140, 130, 63, 58, 0, 224, 153, 63, 65, 38, 11, 55, 0, 128, 155, 63, 157, 155, 211, 57, 0, 32, 157, 63, 57, 205, 118, 58, 0, 224, 158, 63, 4, 147, 41, 58, 0, 160, 160, 63, 125, 136, 2, 58, 0, 96, 162, 63, 24, 24, 2, 58, 0, 32, 164, 63, 112, 173, 40, 58, 0, 224, 165, 63, 77, 181, 118, 58, 0, 192, 167, 63, 78, 59, 217, 57, 0, 160, 169, 63, 117, 90, 45, 56, 0, 96, 171, 63, 173, 205, 81, 58, 0, 64, 173, 63, 82, 247, 65, 58, 0, 32, 175, 63, 107, 197, 91, 58, 0, 32, 177, 63, 116, 96, 253, 56, 0, 0, 179, 63, 149, 32, 14, 58, 0, 0, 181, 63, 127, 102, 30, 57, 0, 224, 182, 63, 25, 143, 108, 58, 0, 224, 184, 63, 59, 122, 93, 58, 0, 224, 186, 63, 144, 213, 122, 58, 0, 0, 189, 63, 245, 57, 138, 57, 0, 0, 191, 63, 179, 205, 60, 58, 0, 32, 193, 63, 166, 204, 196, 57, 0, 64, 195, 63, 68, 155, 89, 57, 0, 96, 197, 63, 42, 66, 101, 57, 0, 128, 199, 63, 138, 76, 215, 57, 0, 160, 201, 63, 51, 236, 77, 58, 0, 224, 203, 63, 239, 79, 193, 57, 0, 32, 206, 63, 163, 130, 17, 57, 0, 96, 208, 63, 187, 246, 204, 56, 0, 160, 210, 63, 31, 217, 129, 57, 0, 224, 212, 63, 94, 213, 26, 58, 0, 64, 215, 63, 90, 153, 31, 57, 0, 128, 217, 63, 19, 174, 104, 58, 0, 224, 219, 63, 190, 188, 93, 58, 0, 96, 222, 63, 94, 130, 244, 55, 0, 192, 224, 63, 194, 238, 205, 57, 0, 32, 227, 63, 149, 75, 124, 58, 0, 160, 229, 63, 59, 55, 72, 58, 0, 32, 232, 63, 129, 82, 75, 58, 0, 192, 234, 63, 221, 231, 198, 55, 0, 64, 237, 63, 237, 1, 243, 57, 0, 224, 239, 63, 123, 51, 23, 57, 0, 128, 242, 63, 44, 158, 59, 56, 0, 32, 245, 63, 164, 162, 47, 57, 0, 192, 247, 63, 152, 251, 6, 58, 0, 128, 250, 63, 220, 182, 236, 56, 0, 32, 253, 63, 103, 96, 112, 58, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 59, 65, 172, 41, 52, 0, 0, 126, 60, 252, 176, 168, 53, 0, 192, 189, 60, 234, 131, 141, 54, 0, 16, 252, 60, 120, 14, 27, 54, 0, 240, 28, 61, 254, 185, 135, 54, 0, 160, 59, 61, 101, 236, 49, 54, 0, 16, 90, 61, 25, 113, 221, 54, 0, 80, 120, 61, 69, 0, 195, 53, 0, 32, 139, 61, 81, 119, 155, 55, 0, 0, 154, 61, 13, 203, 235, 55, 0, 208, 168, 61, 131, 159, 131, 55, 0, 128, 183, 61, 229, 138, 82, 55, 0, 16, 198, 61, 24, 235, 162, 55, 0, 144, 212, 61, 149, 116, 218, 54, 0, 240, 226, 61, 183, 30, 169, 54, 0, 48, 241, 61, 21, 183, 131, 55, 0, 96, 255, 61, 219, 49, 17, 55, 0, 176, 6, 62, 104, 62, 63, 56, 0, 176, 13, 62, 151, 106, 21, 56, 0, 160, 20, 62, 15, 124, 41, 56, 0, 128, 27, 62, 15, 16, 126, 56, 0, 96, 34, 62, 101, 182, 21, 56, 0, 48, 41, 62, 161, 227, 229, 55, 0, 240, 47, 62, 83, 56, 24, 56, 0, 176, 54, 62, 157, 113, 254, 53, 0, 80, 61, 62, 8, 129, 68, 56, 0, 240, 67, 62, 144, 50, 80, 56, 0, 144, 74, 62, 232, 57, 53, 55, 0, 16, 81, 62, 241, 15, 94, 56, 0, 144, 87, 62, 64, 167, 100, 56, 0, 16, 94, 62, 45, 116, 134, 55, 0, 112, 100, 62, 205, 227, 123, 56, 0, 224, 106, 62, 62, 173, 133, 54, 0, 48, 113, 62, 21, 183, 3, 56, 0, 128, 119, 62, 220, 203, 173, 55, 0, 192, 125, 62, 175, 54, 12, 56, 0, 0, 130, 62, 211, 82, 22, 55, 0, 16, 133, 62, 57, 113, 146, 56, 0, 32, 136, 62, 215, 252, 197, 56, 0, 48, 139, 62, 213, 85, 174, 56, 0, 64, 142, 62, 105, 193, 24, 56, 0, 64, 145, 62, 231, 253, 160, 56, 0, 64, 148, 62, 239, 9, 173, 56, 0, 64, 151, 62, 225, 186, 98, 56, 0, 48, 154, 62, 76, 205, 238, 56, 0, 48, 157, 62, 210, 170, 152, 55, 0, 32, 160, 62, 26, 26, 66, 55, 0, 0, 163, 62, 14, 225, 197, 56, 0, 240, 165, 62, 238, 42, 191, 55, 0, 208, 168, 62, 45, 135, 45, 56, 0, 176, 171, 62, 138, 46, 238, 55, 0, 128, 174, 62, 172, 223, 222, 56, 0, 96, 177, 62, 185, 242, 2, 56, 0, 48, 180, 62, 155, 30, 72, 56, 0, 0, 183, 62, 43, 170, 14, 56, 0, 192, 185, 62, 93, 251, 235, 56, 0, 144, 188, 62, 221, 95, 37, 56, 0, 80, 191, 62, 130, 59, 120, 56, 0, 16, 194, 62, 30, 218, 81, 56, 0, 208, 196, 62, 5, 27, 78, 55, 0, 128, 199, 62, 155, 67, 143, 56, 0, 48, 202, 62, 16, 14, 202, 56, 0, 224, 204, 62, 139, 192, 202, 56, 0, 144, 207, 62, 95, 246, 145, 56, 0, 64, 210, 62, 203, 33, 129, 55, 0, 224, 212, 62, 154, 154, 108, 56, 0, 128, 215, 62, 35, 153, 148, 56, 0, 32, 218, 62, 204, 123, 119, 56, 0, 192, 220, 62, 38, 45, 177, 55, 0, 80, 223, 62, 211, 206, 166, 56, 0, 224, 225, 62, 230, 211, 235, 56, 0, 112, 228, 62, 205, 227, 251, 56, 0, 0, 231, 62, 194, 133, 215, 56, 0, 144, 233, 62, 0, 126, 126, 56, 0, 16, 236, 62, 197, 146, 243, 56, 0, 160, 238, 62, 131, 9, 212, 55, 0, 32, 241, 62, 124, 26, 8, 56, 0, 160, 243, 62, 173, 195, 132, 55, 0, 16, 246, 62, 35, 233, 204, 56, 0, 144, 248, 62, 175, 95, 15, 56, 0, 0, 251, 62, 56, 253, 145, 56, 0, 112, 253, 62, 188, 71, 172, 56, 0, 224, 255, 62, 43, 4, 151, 56, 0, 32, 1, 63, 210, 82, 41, 57, 0, 80, 2, 63, 212, 206, 111, 57, 0, 144, 3, 63, 115, 112, 249, 55, 0, 192, 4, 63, 174, 158, 94, 56, 0, 240, 5, 63, 74, 200, 101, 56, 0, 32, 7, 63, 163, 11, 19, 56, 0, 64, 8, 63, 22, 207, 121, 57, 0, 112, 9, 63, 201, 202, 56, 57, 0, 160, 10, 63, 244, 210, 195, 56, 0, 192, 11, 63, 236, 93, 117, 57, 0, 240, 12, 63, 103, 180, 230, 56, 0, 16, 14, 63, 184, 15, 92, 57, 0, 64, 15, 63, 224, 188, 62, 56, 0, 96, 16, 63, 146, 209, 220, 56, 0, 128, 17, 63, 223, 107, 24, 57, 0, 160, 18, 63, 76, 231, 45, 57, 0, 192, 19, 63, 68, 9, 47, 57, 0, 224, 20, 63, 97, 255, 27, 57, 0, 0, 22, 63, 68, 237, 233, 56, 0, 32, 23, 63, 200, 109, 104, 56, 0, 48, 24, 63, 167, 153, 107, 57, 0, 80, 25, 63, 137, 156, 9, 57, 0, 112, 26, 63, 115, 118, 162, 55, 0, 128, 27, 63, 163, 218, 11, 57, 0, 144, 28, 63, 171, 105, 112, 57, 0, 176, 29, 63, 255, 73, 132, 56, 0, 192, 30, 63, 56, 53, 1, 57, 0, 208, 31, 63, 104, 194, 45, 57, 0, 224, 32, 63, 35, 244, 71, 57, 0, 240, 33, 63, 124, 241, 79, 57, 0, 0, 35, 63, 14, 225, 69, 57, 0, 16, 36, 63, 245, 232, 41, 57, 0, 32, 37, 63, 176, 93, 248, 56, 0, 48, 38, 63, 153, 95, 115, 56, 0, 48, 39, 63, 219, 8, 108, 57, 0, 64, 40, 63, 0, 230, 9, 57, 0, 80, 41, 63, 111, 153, 180, 55, 0, 80, 42, 63, 204, 51, 18, 57, 0, 80, 43, 63, 217, 234, 124, 57, 0, 96, 44, 63, 205, 181, 173, 56, 0, 96, 45, 63, 26, 38, 32, 57, 0, 96, 46, 63, 54, 238, 88, 57, 0, 112, 47, 63, 5, 73, 170, 53, 0, 112, 48, 63, 30, 209, 203, 55, 0, 112, 49, 63, 244, 253, 5, 56, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 254, 63, 248, 3, 254, 56, 0, 0, 252, 63, 193, 15, 252, 57, 0, 0, 250, 63, 201, 179, 140, 58, 0, 0, 248, 63, 16, 62, 248, 58, 0, 0, 246, 63, 48, 123, 64, 59, 0, 0, 244, 63, 96, 141, 137, 59, 0, 0, 242, 63, 72, 214, 185, 59, 0, 0, 240, 63, 241, 240, 240, 59, 0, 0, 239, 63, 127, 220, 186, 58, 0, 0, 237, 63, 108, 7, 102, 59, 0, 0, 235, 63, 166, 178, 189, 59, 0, 0, 234, 63, 161, 14, 234, 57, 0, 0, 232, 63, 247, 88, 75, 59, 0, 0, 230, 63, 72, 180, 194, 59, 0, 0, 229, 63, 172, 96, 150, 58, 0, 0, 227, 63, 228, 56, 142, 59, 0, 0, 225, 63, 14, 120, 252, 59, 0, 0, 224, 63, 56, 112, 96, 59, 0, 0, 222, 63, 77, 92, 233, 59, 0, 0, 221, 63, 76, 145, 79, 59, 0, 0, 219, 63, 239, 97, 235, 59, 0, 0, 218, 63, 79, 27, 104, 59, 0, 0, 217, 63, 178, 1, 89, 56, 0, 0, 215, 63, 229, 53, 148, 59, 0, 0, 214, 63, 89, 3, 174, 58, 0, 0, 212, 63, 3, 123, 199, 59, 0, 0, 211, 63, 109, 26, 80, 59, 0, 0, 210, 63, 33, 13, 210, 57, 0, 0, 208, 63, 204, 159, 182, 59, 0, 0, 207, 63, 81, 233, 72, 59, 0, 0, 206, 63, 185, 83, 52, 58, 0, 0, 204, 63, 205, 204, 204, 59, 0, 0, 203, 63, 192, 39, 135, 59, 0, 0, 202, 63, 205, 15, 11, 59, 0, 0, 201, 63, 209, 73, 123, 57, 0, 0, 199, 63, 125, 12, 206, 59, 0, 0, 198, 63, 106, 12, 152, 59, 0, 0, 197, 63, 247, 144, 75, 59, 0, 0, 196, 63, 21, 190, 220, 58, 0, 0, 195, 63, 49, 12, 195, 57, 0, 0, 193, 63, 214, 187, 228, 59, 0, 0, 192, 63, 193, 192, 192, 59, 0, 0, 191, 63, 232, 47, 160, 59, 0, 0, 190, 63, 12, 250, 130, 59, 0, 0, 189, 63, 142, 32, 82, 59, 0, 0, 188, 63, 24, 200, 36, 59, 0, 0, 187, 63, 135, 156, 251, 58, 0, 0, 186, 63, 140, 46, 186, 58, 0, 0, 185, 63, 233, 15, 133, 58, 0, 0, 184, 63, 3, 23, 56, 58, 0, 0, 183, 63, 162, 181, 251, 57, 0, 0, 182, 63, 97, 11, 182, 57, 0, 0, 181, 63, 170, 104, 158, 57, 0, 0, 180, 63, 65, 11, 180, 57, 0, 0, 179, 63, 41, 53, 246, 57, 0, 0, 178, 63, 67, 22, 50, 58, 0, 0, 177, 63, 192, 157, 126, 58, 0, 0, 176, 63, 11, 44, 176, 58, 0, 0, 175, 63, 26, 119, 235, 58, 0, 0, 174, 63, 185, 130, 24, 59, 0, 0, 173, 63, 176, 86, 64, 59, 0, 0, 172, 63, 8, 35, 109, 59, 0, 0, 171, 63, 227, 105, 143, 59, 0, 0, 170, 63, 171, 170, 170, 59, 0, 0, 169, 63, 72, 74, 200, 59, 0, 0, 168, 63, 87, 63, 232, 59, 0, 0, 168, 63, 129, 10, 168, 57, 0, 0, 167, 63, 230, 20, 188, 58, 0, 0, 166, 63, 114, 136, 43, 59, 0, 0, 165, 63, 5, 106, 125, 59, 0, 0, 164, 63, 30, 207, 169, 59, 0, 0, 163, 63, 61, 10, 215, 59, 0, 0, 163, 63, 246, 199, 75, 57, 0, 0, 162, 63, 172, 12, 223, 58, 0, 0, 161, 63, 93, 98, 86, 59, 0, 0, 160, 63, 161, 160, 160, 59, 0, 0, 159, 63, 254, 9, 216, 59, 0, 0, 159, 63, 57, 47, 11, 58, 0, 0, 158, 63, 72, 90, 25, 59, 0, 0, 157, 63, 158, 216, 137, 59, 0, 0, 156, 63, 97, 225, 200, 59, 0, 0, 156, 63, 193, 9, 156, 57, 0, 0, 155, 63, 62, 223, 24, 59, 0, 0, 154, 63, 217, 231, 144, 59, 0, 0, 153, 63, 219, 34, 215, 59, 0, 0, 153, 63, 139, 210, 120, 58, 0, 0, 152, 63, 19, 144, 81, 59, 0, 0, 151, 63, 237, 37, 180, 59, 0, 0, 151, 63, 46, 1, 23, 56, 0, 0, 150, 63, 216, 180, 31, 59, 0, 0, 149, 63, 104, 37, 160, 59, 0, 0, 148, 63, 79, 9, 242, 59, 0, 0, 148, 63, 41, 1, 11, 59, 0, 0, 147, 63, 196, 133, 154, 59, 0, 0, 146, 63, 132, 19, 241, 59, 0, 0, 146, 63, 37, 73, 18, 59, 0, 0, 145, 63, 197, 179, 162, 59, 0, 0, 144, 63, 9, 188, 253, 59, 0, 0, 144, 63, 198, 112, 52, 59, 0, 0, 143, 63, 238, 35, 184, 59, 0, 0, 143, 63, 208, 206, 59, 58, 0, 0, 142, 63, 218, 106, 112, 59, 0, 0, 141, 63, 2, 82, 218, 59, 0, 0, 141, 63, 35, 44, 247, 58, 0, 0, 140, 63, 4, 156, 162, 59, 0, 0, 140, 63, 193, 8, 140, 57, 0, 0, 139, 63, 148, 104, 96, 59, 0, 0, 138, 63, 252, 242, 216, 59, 0, 0, 138, 63, 225, 240, 5, 59, 0, 0, 137, 63, 138, 64, 174, 59, 0, 0, 137, 63, 215, 57, 86, 58, 0, 0, 136, 63, 137, 136, 136, 59, 0, 0, 135, 63, 136, 128, 247, 59, 0, 0, 135, 63, 190, 86, 79, 59, 0, 0, 134, 63, 68, 5, 217, 59, 0, 0, 134, 63, 252, 20, 23, 59, 0, 0, 133, 63, 97, 55, 191, 59, 0, 0, 133, 63, 77, 33, 208, 58, 0, 0, 132, 63, 200, 249, 169, 59, 0, 0, 132, 63, 8, 33, 132, 58, 0, 0, 131, 63, 82, 48, 153, 59, 0, 0, 131, 63, 188, 116, 19, 58, 0, 0, 130, 63, 191, 191, 140, 59, 0, 0, 130, 63, 33, 8, 130, 57, 0, 0, 129, 63, 169, 141, 132, 59, 0, 0, 129, 63, 4, 2, 129, 56, 0, 0, 128, 63, 129, 128, 128, 59, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 11, 0, 11, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 10, 192, 0, 0, 0, 0, 131, 2, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 104, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 112, 0, 0, 0, 0, 2, 6, 126, 2, 2, 8, 126, 0, 0, 255, 209, 8, 6, 2, 4, 1, 0, 255, 209, 4, 8, 6, 4, 10, 2, 6, 126, 2, 0, 193, 208, 1, 27, 0, 0, 12, 0, 136, 125, 2, 0, 255, 209, 5, 6, 10, 4, 106, 2, 130, 134, 127, 192, 140, 191, 1, 4, 136, 125, 2, 106, 128, 134, 0, 32, 128, 190, 199, 0, 136, 191, 131, 0, 6, 192, 24, 0, 0, 0, 3, 1, 6, 192, 56, 0, 0, 0, 3, 2, 2, 192, 64, 0, 0, 0, 131, 2, 6, 192, 72, 0, 0, 0, 67, 2, 2, 192, 128, 0, 0, 0, 3, 3, 10, 192, 136, 0, 0, 0, 159, 0, 6, 34, 159, 2, 8, 34, 127, 192, 140, 191, 5, 0, 134, 210, 1, 25, 0, 0, 4, 0, 133, 210, 4, 25, 0, 0, 6, 0, 133, 210, 1, 27, 0, 0, 4, 0, 255, 209, 4, 11, 26, 4, 5, 0, 133, 210, 1, 25, 0, 0, 5, 106, 25, 209, 5, 1, 2, 0, 4, 7, 6, 56, 4, 0, 14, 104, 5, 2, 16, 104, 8, 4, 18, 104, 159, 4, 12, 34, 1, 4, 14, 192, 0, 0, 0, 0, 128, 2, 20, 126, 127, 192, 140, 191, 0, 95, 0, 240, 7, 7, 4, 0, 0, 0, 134, 210, 2, 29, 0, 0, 1, 0, 133, 210, 6, 29, 0, 0, 4, 0, 133, 210, 2, 31, 0, 0, 0, 0, 255, 209, 1, 1, 18, 4, 1, 0, 133, 210, 2, 29, 0, 0, 1, 106, 25, 209, 1, 11, 2, 0, 0, 7, 0, 56, 2, 0, 134, 210, 1, 19, 0, 0, 0, 0, 133, 210, 0, 19, 0, 0, 0, 5, 0, 104, 1, 0, 133, 210, 1, 19, 0, 0, 3, 106, 25, 209, 1, 21, 0, 0, 11, 2, 4, 126, 0, 5, 8, 56, 131, 0, 6, 192, 120, 0, 0, 0, 3, 1, 6, 192, 32, 0, 0, 0, 127, 192, 140, 191, 2, 132, 0, 191, 85, 0, 133, 191, 3, 2, 6, 192, 40, 0, 0, 0, 2, 130, 0, 191, 41, 0, 132, 191, 3, 132, 0, 191, 29, 0, 133, 191, 3, 130, 0, 191, 12, 0, 132, 191, 0, 0, 143, 210, 130, 6, 2, 0, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 2, 0, 0, 210, 8, 33, 29, 4, 0, 128, 112, 220, 0, 2, 127, 0, 110, 0, 130, 191, 3, 129, 0, 191, 108, 0, 132, 191, 0, 0, 143, 210, 129, 6, 2, 0, 127, 192, 140, 191, 0, 106, 25, 209, 8, 0, 2, 0, 9, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 2, 0, 0, 210, 8, 17, 29, 4, 0, 128, 104, 220, 0, 2, 127, 0, 95, 0, 130, 191, 0, 0, 143, 210, 130, 6, 2, 0, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 128, 116, 220, 0, 7, 127, 0, 85, 0, 130, 191, 2, 129, 0, 191, 83, 0, 132, 191, 3, 132, 0, 191, 26, 0, 133, 191, 3, 130, 0, 191, 11, 0, 132, 191, 0, 0, 143, 210, 129, 6, 2, 0, 127, 192, 140, 191, 0, 106, 25, 209, 8, 0, 2, 0, 9, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 128, 104, 220, 0, 7, 127, 0, 68, 0, 130, 191, 3, 129, 0, 191, 66, 0, 132, 191, 131, 0, 6, 192, 48, 0, 0, 0, 127, 192, 140, 191, 0, 106, 25, 209, 2, 6, 2, 0, 3, 2, 4, 126, 2, 9, 2, 56, 112, 15, 140, 191, 0, 128, 96, 220, 0, 7, 127, 0, 55, 0, 130, 191, 0, 0, 143, 210, 130, 6, 2, 0, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 128, 112, 220, 0, 7, 127, 0, 45, 0, 130, 191, 3, 132, 0, 191, 34, 0, 133, 191, 3, 130, 0, 191, 14, 0, 132, 191, 112, 15, 140, 191, 5, 0, 0, 210, 8, 33, 29, 4, 1, 0, 143, 210, 130, 6, 2, 0, 1, 106, 25, 209, 4, 2, 2, 0, 5, 2, 6, 126, 3, 5, 4, 56, 6, 0, 0, 210, 10, 33, 37, 4, 0, 128, 116, 220, 1, 5, 127, 0, 27, 0, 130, 191, 3, 129, 0, 191, 25, 0, 132, 191, 112, 15, 140, 191, 0, 0, 0, 210, 8, 17, 29, 4, 1, 0, 143, 210, 130, 6, 2, 0, 0, 0, 0, 210, 9, 33, 1, 4, 1, 106, 25, 209, 4, 2, 2, 0, 5, 2, 6, 126, 3, 5, 4, 56, 0, 0, 0, 210, 10, 49, 1, 4, 0, 128, 112, 220, 1, 0, 127, 0, 9, 0, 130, 191, 0, 0, 143, 210, 130, 6, 2, 0, 0, 106, 25, 209, 4, 0, 2, 0, 5, 2, 4, 126, 2, 3, 2, 56, 112, 15, 140, 191, 0, 128, 124, 220, 0, 7, 127, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 132, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 19, 0, 19, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 10, 192, 0, 0, 0, 0, 131, 2, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 88, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 96, 0, 0, 0, 0, 2, 6, 126, 2, 2, 8, 126, 0, 0, 255, 209, 8, 6, 2, 4, 1, 0, 255, 209, 4, 8, 6, 4, 10, 2, 6, 126, 2, 0, 193, 208, 1, 27, 0, 0, 12, 0, 136, 125, 2, 0, 255, 209, 5, 6, 10, 4, 106, 2, 130, 134, 127, 192, 140, 191, 1, 4, 136, 125, 2, 106, 128, 134, 0, 32, 128, 190, 194, 0, 136, 191, 131, 0, 6, 192, 40, 0, 0, 0, 3, 1, 6, 192, 72, 0, 0, 0, 3, 2, 2, 192, 80, 0, 0, 0, 67, 2, 2, 192, 112, 0, 0, 0, 3, 3, 10, 192, 120, 0, 0, 0, 159, 0, 6, 34, 159, 2, 8, 34, 127, 192, 140, 191, 5, 0, 134, 210, 1, 25, 0, 0, 4, 0, 133, 210, 4, 25, 0, 0, 6, 0, 133, 210, 1, 27, 0, 0, 4, 0, 255, 209, 4, 11, 26, 4, 5, 0, 133, 210, 1, 25, 0, 0, 5, 106, 25, 209, 5, 1, 2, 0, 4, 7, 6, 56, 159, 4, 8, 34, 6, 0, 134, 210, 2, 29, 0, 0, 4, 0, 133, 210, 4, 29, 0, 0, 7, 0, 133, 210, 2, 31, 0, 0, 4, 0, 255, 209, 4, 13, 30, 4, 6, 0, 133, 210, 2, 29, 0, 0, 5, 106, 25, 209, 6, 11, 2, 0, 4, 7, 6, 56, 4, 0, 134, 210, 5, 19, 0, 0, 3, 0, 133, 210, 3, 19, 0, 0, 3, 9, 6, 104, 4, 0, 133, 210, 5, 19, 0, 0, 6, 106, 25, 209, 4, 5, 0, 0, 3, 2, 10, 126, 3, 11, 14, 56, 4, 0, 30, 104, 5, 2, 32, 104, 8, 4, 34, 104, 131, 0, 6, 192, 104, 0, 0, 0, 3, 2, 6, 192, 24, 0, 0, 0, 127, 192, 140, 191, 2, 132, 0, 191, 78, 0, 133, 191, 2, 130, 0, 191, 40, 0, 132, 191, 3, 130, 0, 191, 14, 0, 132, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 128, 80, 220, 3, 0, 127, 3, 112, 15, 140, 191, 249, 2, 12, 126, 3, 6, 5, 0, 249, 2, 10, 126, 3, 6, 4, 0, 57, 0, 130, 191, 3, 129, 0, 191, 13, 0, 132, 191, 3, 0, 143, 210, 129, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 128, 72, 220, 3, 0, 127, 3, 112, 15, 140, 191, 136, 6, 12, 32, 249, 2, 10, 126, 3, 6, 0, 0, 42, 0, 130, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 128, 84, 220, 3, 0, 127, 5, 33, 0, 130, 191, 2, 129, 0, 191, 29, 0, 132, 191, 3, 130, 0, 191, 9, 0, 132, 191, 3, 0, 143, 210, 129, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 128, 72, 220, 3, 0, 127, 5, 19, 0, 130, 191, 3, 129, 0, 191, 7, 0, 132, 191, 3, 106, 25, 209, 8, 12, 2, 0, 9, 2, 10, 126, 5, 15, 8, 56, 0, 128, 64, 220, 3, 0, 127, 5, 10, 0, 130, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 128, 80, 220, 3, 0, 127, 5, 1, 0, 130, 191, 2, 2, 10, 126, 3, 2, 12, 126, 5, 2, 16, 126, 4, 2, 14, 126, 46, 0, 130, 191, 3, 129, 0, 191, 18, 0, 132, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 128, 80, 220, 3, 0, 127, 3, 112, 15, 140, 191, 249, 2, 16, 126, 3, 6, 3, 0, 249, 2, 14, 126, 3, 6, 2, 0, 249, 2, 12, 126, 3, 6, 1, 0, 249, 2, 10, 126, 3, 6, 0, 0, 26, 0, 130, 191, 3, 0, 143, 210, 130, 12, 2, 0, 3, 106, 25, 209, 8, 6, 2, 0, 9, 2, 10, 126, 5, 9, 8, 56, 0, 128, 80, 220, 3, 0, 127, 5, 3, 130, 0, 191, 12, 0, 132, 191, 4, 128, 80, 220, 3, 0, 127, 3, 112, 15, 140, 191, 249, 2, 16, 126, 3, 6, 5, 0, 249, 2, 14, 126, 3, 6, 4, 0, 249, 2, 12, 126, 5, 6, 5, 0, 249, 2, 10, 126, 5, 6, 4, 0, 4, 0, 130, 191, 12, 128, 80, 220, 3, 0, 127, 8, 4, 128, 84, 220, 3, 0, 127, 6, 131, 0, 6, 192, 32, 0, 0, 0, 127, 192, 140, 191, 1, 1, 14, 192, 0, 0, 0, 0, 128, 2, 36, 126, 112, 0, 140, 191, 0, 95, 32, 240, 15, 5, 1, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 10, 192, 0, 0, 0, 0, 131, 2, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 72, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 80, 0, 0, 0, 0, 2, 6, 126, 2, 2, 8, 126, 0, 0, 255, 209, 8, 6, 2, 4, 1, 0, 255, 209, 4, 8, 6, 4, 10, 2, 6, 126, 2, 0, 198, 208, 1, 27, 0, 0, 12, 0, 134, 125, 2, 0, 255, 209, 5, 6, 10, 4, 106, 2, 130, 135, 127, 192, 140, 191, 1, 4, 134, 125, 2, 106, 234, 135, 126, 1, 128, 190, 0, 106, 254, 137, 28, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 131, 0, 6, 192, 40, 0, 0, 0, 3, 1, 2, 192, 48, 0, 0, 0, 127, 192, 140, 191, 4, 3, 14, 192, 0, 0, 0, 0, 2, 0, 6, 104, 3, 2, 8, 104, 4, 4, 10, 104, 128, 2, 12, 126, 127, 192, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 3, 1, 10, 192, 56, 0, 0, 0, 5, 2, 14, 192, 0, 0, 0, 0, 127, 192, 140, 191, 4, 0, 14, 104, 5, 2, 16, 104, 6, 4, 18, 104, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 2, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 133, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 21, 0, 21, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 10, 192, 0, 0, 0, 0, 131, 2, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 72, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 80, 0, 0, 0, 0, 2, 6, 126, 2, 2, 8, 126, 0, 0, 255, 209, 8, 6, 2, 4, 1, 0, 255, 209, 4, 8, 6, 4, 10, 2, 6, 126, 2, 0, 198, 208, 1, 27, 0, 0, 12, 0, 134, 125, 2, 0, 255, 209, 5, 6, 10, 4, 106, 2, 130, 135, 127, 192, 140, 191, 1, 4, 134, 125, 2, 106, 234, 135, 126, 1, 128, 190, 0, 106, 254, 137, 233, 2, 136, 191, 131, 0, 6, 192, 24, 0, 0, 0, 3, 1, 6, 192, 40, 0, 0, 0, 3, 2, 2, 192, 48, 0, 0, 0, 127, 192, 140, 191, 1, 3, 14, 192, 0, 0, 0, 0, 4, 0, 6, 104, 5, 2, 8, 104, 8, 4, 10, 104, 128, 2, 12, 126, 127, 192, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 112, 15, 140, 191, 106, 0, 16, 208, 3, 7, 1, 0, 3, 0, 0, 209, 3, 1, 169, 1, 126, 1, 130, 190, 4, 0, 91, 208, 3, 229, 1, 0, 227, 0, 136, 191, 126, 1, 132, 190, 8, 0, 81, 208, 3, 1, 1, 0, 128, 2, 6, 126, 4, 126, 254, 137, 220, 0, 136, 191, 255, 0, 136, 190, 28, 46, 77, 59, 126, 1, 138, 190, 8, 0, 81, 208, 3, 17, 0, 0, 255, 6, 6, 10, 82, 184, 78, 65, 10, 126, 254, 137, 242, 6, 6, 10, 210, 0, 136, 191, 255, 6, 14, 38, 255, 255, 255, 127, 242, 14, 16, 4, 255, 0, 136, 190, 0, 0, 128, 61, 106, 1, 75, 208, 8, 17, 0, 0, 126, 1, 136, 190, 8, 106, 254, 137, 7, 105, 16, 126, 70, 0, 136, 191, 129, 16, 18, 36, 255, 16, 16, 104, 0, 0, 128, 0, 255, 18, 18, 104, 0, 0, 0, 1, 255, 16, 20, 38, 0, 0, 127, 0, 255, 18, 18, 38, 0, 0, 1, 0, 9, 21, 18, 104, 249, 2, 20, 126, 9, 6, 5, 0, 128, 2, 22, 126, 10, 0, 143, 210, 131, 20, 2, 0, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 12, 106, 25, 209, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 56, 0, 128, 84, 220, 12, 0, 127, 12, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 10, 106, 25, 209, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 56, 0, 128, 84, 220, 10, 0, 127, 10, 255, 16, 16, 38, 255, 255, 127, 0, 240, 18, 18, 40, 240, 16, 16, 40, 9, 17, 16, 4, 113, 15, 140, 191, 13, 17, 18, 10, 12, 17, 18, 44, 255, 2, 28, 126, 171, 170, 170, 62, 255, 0, 140, 190, 0, 0, 128, 62, 7, 103, 30, 126, 12, 18, 28, 44, 12, 0, 193, 209, 12, 17, 38, 132, 193, 30, 30, 104, 14, 0, 193, 209, 9, 29, 194, 3, 9, 19, 32, 10, 13, 17, 24, 44, 15, 11, 16, 126, 14, 33, 24, 44, 255, 0, 140, 190, 244, 253, 5, 56, 12, 0, 193, 209, 8, 25, 48, 132, 112, 15, 140, 191, 12, 23, 24, 2, 8, 21, 16, 46, 0, 112, 49, 63, 12, 19, 30, 4, 255, 18, 28, 42, 0, 0, 0, 128, 8, 31, 26, 2, 8, 126, 254, 137, 8, 17, 18, 10, 21, 0, 136, 191, 8, 19, 20, 10, 255, 2, 22, 126, 171, 170, 42, 62, 255, 0, 140, 190, 37, 73, 18, 62, 12, 16, 22, 44, 8, 23, 22, 48, 205, 204, 76, 62, 8, 23, 22, 48, 0, 0, 128, 62, 8, 23, 22, 48, 171, 170, 170, 62, 10, 23, 20, 10, 241, 18, 28, 10, 15, 0, 193, 209, 9, 227, 41, 132, 15, 17, 26, 4, 255, 20, 24, 42, 0, 0, 0, 128, 255, 16, 16, 42, 0, 0, 0, 128, 8, 1, 254, 190, 8, 27, 20, 4, 15, 29, 18, 4, 15, 21, 20, 2, 12, 19, 18, 4, 255, 26, 22, 38, 0, 240, 255, 255, 9, 21, 18, 2, 13, 23, 16, 4, 9, 17, 16, 2, 255, 16, 18, 10, 0, 160, 42, 56, 11, 19, 18, 46, 0, 160, 42, 56, 8, 19, 16, 46, 0, 80, 213, 62, 11, 17, 18, 46, 0, 80, 213, 62, 255, 18, 20, 10, 59, 170, 184, 66, 10, 17, 20, 126, 191, 20, 24, 38, 131, 24, 24, 36, 255, 0, 137, 190, 85, 85, 85, 85, 255, 0, 136, 190, 85, 85, 85, 85, 12, 106, 25, 209, 8, 24, 2, 0, 9, 2, 26, 126, 13, 106, 28, 209, 13, 1, 169, 1, 0, 128, 84, 220, 12, 0, 127, 12, 255, 0, 136, 190, 0, 80, 213, 62, 10, 11, 28, 126, 11, 0, 193, 209, 8, 22, 38, 132, 14, 19, 30, 46, 0, 0, 49, 188, 8, 23, 16, 2, 14, 31, 22, 46, 239, 47, 228, 183, 8, 23, 22, 2, 255, 2, 28, 126, 171, 170, 42, 62, 255, 0, 136, 190, 171, 170, 42, 61, 8, 22, 28, 44, 14, 0, 193, 209, 14, 23, 194, 3, 11, 23, 30, 10, 14, 31, 22, 44, 255, 0, 136, 190, 8, 227, 130, 180, 255, 0, 137, 190, 24, 114, 177, 66, 112, 15, 140, 191, 13, 23, 26, 44, 12, 0, 68, 208, 8, 17, 0, 0, 9, 18, 132, 124, 12, 23, 26, 44, 106, 12, 140, 134, 9, 18, 130, 124, 134, 20, 16, 34, 12, 27, 20, 2, 106, 12, 234, 135, 8, 0, 136, 210, 10, 17, 2, 0, 255, 2, 20, 126, 0, 0, 128, 127, 255, 0, 136, 190, 208, 142, 206, 194, 8, 21, 16, 0, 8, 18, 150, 124, 128, 16, 16, 0, 3, 15, 138, 125, 242, 16, 16, 10, 255, 2, 18, 126, 0, 0, 192, 127, 255, 0, 136, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 138, 125, 8, 0, 194, 208, 3, 17, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 209, 8, 19, 34, 0, 3, 19, 132, 125, 8, 19, 16, 0, 7, 19, 152, 125, 8, 7, 14, 0, 242, 6, 138, 125, 242, 14, 6, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 0, 136, 190, 61, 10, 135, 63, 3, 0, 193, 209, 3, 17, 28, 4, 4, 1, 254, 190, 2, 126, 254, 137, 242, 2, 6, 126, 2, 1, 254, 190, 106, 0, 16, 208, 4, 7, 1, 0, 4, 0, 0, 209, 4, 1, 169, 1, 242, 8, 156, 124, 2, 106, 254, 134, 227, 0, 136, 191, 126, 1, 132, 190, 8, 0, 81, 208, 4, 1, 1, 0, 128, 2, 8, 126, 4, 126, 254, 137, 220, 0, 136, 191, 255, 0, 136, 190, 28, 46, 77, 59, 126, 1, 138, 190, 8, 0, 81, 208, 4, 17, 0, 0, 255, 8, 8, 10, 82, 184, 78, 65, 10, 126, 254, 137, 242, 8, 8, 10, 210, 0, 136, 191, 255, 8, 14, 38, 255, 255, 255, 127, 242, 14, 16, 4, 255, 0, 136, 190, 0, 0, 128, 61, 106, 1, 75, 208, 8, 17, 0, 0, 126, 1, 136, 190, 8, 106, 254, 137, 7, 105, 16, 126, 70, 0, 136, 191, 129, 16, 18, 36, 255, 16, 16, 104, 0, 0, 128, 0, 255, 18, 18, 104, 0, 0, 0, 1, 255, 16, 20, 38, 0, 0, 127, 0, 255, 18, 18, 38, 0, 0, 1, 0, 9, 21, 18, 104, 249, 2, 20, 126, 9, 6, 5, 0, 128, 2, 22, 126, 10, 0, 143, 210, 131, 20, 2, 0, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 12, 106, 25, 209, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 56, 0, 128, 84, 220, 12, 0, 127, 12, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 10, 106, 25, 209, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 56, 0, 128, 84, 220, 10, 0, 127, 10, 255, 16, 16, 38, 255, 255, 127, 0, 240, 18, 18, 40, 240, 16, 16, 40, 9, 17, 16, 4, 113, 15, 140, 191, 13, 17, 18, 10, 12, 17, 18, 44, 255, 2, 28, 126, 171, 170, 170, 62, 255, 0, 140, 190, 0, 0, 128, 62, 7, 103, 30, 126, 12, 18, 28, 44, 12, 0, 193, 209, 12, 17, 38, 132, 193, 30, 30, 104, 14, 0, 193, 209, 9, 29, 194, 3, 9, 19, 32, 10, 13, 17, 24, 44, 15, 11, 16, 126, 14, 33, 24, 44, 255, 0, 140, 190, 244, 253, 5, 56, 12, 0, 193, 209, 8, 25, 48, 132, 112, 15, 140, 191, 12, 23, 24, 2, 8, 21, 16, 46, 0, 112, 49, 63, 12, 19, 30, 4, 255, 18, 28, 42, 0, 0, 0, 128, 8, 31, 26, 2, 8, 126, 254, 137, 8, 17, 18, 10, 21, 0, 136, 191, 8, 19, 20, 10, 255, 2, 22, 126, 171, 170, 42, 62, 255, 0, 140, 190, 37, 73, 18, 62, 12, 16, 22, 44, 8, 23, 22, 48, 205, 204, 76, 62, 8, 23, 22, 48, 0, 0, 128, 62, 8, 23, 22, 48, 171, 170, 170, 62, 10, 23, 20, 10, 241, 18, 28, 10, 15, 0, 193, 209, 9, 227, 41, 132, 15, 17, 26, 4, 255, 20, 24, 42, 0, 0, 0, 128, 255, 16, 16, 42, 0, 0, 0, 128, 8, 1, 254, 190, 8, 27, 20, 4, 15, 29, 18, 4, 15, 21, 20, 2, 12, 19, 18, 4, 255, 26, 22, 38, 0, 240, 255, 255, 9, 21, 18, 2, 13, 23, 16, 4, 9, 17, 16, 2, 255, 16, 18, 10, 0, 160, 42, 56, 11, 19, 18, 46, 0, 160, 42, 56, 8, 19, 16, 46, 0, 80, 213, 62, 11, 17, 18, 46, 0, 80, 213, 62, 255, 18, 20, 10, 59, 170, 184, 66, 10, 17, 20, 126, 191, 20, 24, 38, 131, 24, 24, 36, 255, 0, 137, 190, 85, 85, 85, 85, 255, 0, 136, 190, 85, 85, 85, 85, 12, 106, 25, 209, 8, 24, 2, 0, 9, 2, 26, 126, 13, 106, 28, 209, 13, 1, 169, 1, 0, 128, 84, 220, 12, 0, 127, 12, 255, 0, 136, 190, 0, 80, 213, 62, 10, 11, 28, 126, 11, 0, 193, 209, 8, 22, 38, 132, 14, 19, 30, 46, 0, 0, 49, 188, 8, 23, 16, 2, 14, 31, 22, 46, 239, 47, 228, 183, 8, 23, 22, 2, 255, 2, 28, 126, 171, 170, 42, 62, 255, 0, 136, 190, 171, 170, 42, 61, 8, 22, 28, 44, 14, 0, 193, 209, 14, 23, 194, 3, 11, 23, 30, 10, 14, 31, 22, 44, 255, 0, 136, 190, 8, 227, 130, 180, 255, 0, 137, 190, 24, 114, 177, 66, 112, 15, 140, 191, 13, 23, 26, 44, 12, 0, 68, 208, 8, 17, 0, 0, 9, 18, 132, 124, 12, 23, 26, 44, 106, 12, 140, 134, 9, 18, 130, 124, 134, 20, 16, 34, 12, 27, 20, 2, 106, 12, 234, 135, 8, 0, 136, 210, 10, 17, 2, 0, 255, 2, 20, 126, 0, 0, 128, 127, 255, 0, 136, 190, 208, 142, 206, 194, 8, 21, 16, 0, 8, 18, 150, 124, 128, 16, 16, 0, 4, 15, 138, 125, 242, 16, 16, 10, 255, 2, 18, 126, 0, 0, 192, 127, 255, 0, 136, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 138, 125, 8, 0, 194, 208, 4, 17, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 209, 8, 19, 34, 0, 4, 19, 132, 125, 8, 19, 16, 0, 7, 19, 152, 125, 8, 9, 14, 0, 242, 8, 138, 125, 242, 14, 8, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 0, 136, 190, 61, 10, 135, 63, 4, 0, 193, 209, 4, 17, 28, 4, 4, 1, 254, 190, 2, 126, 254, 137, 242, 2, 8, 126, 2, 1, 254, 190, 3, 2, 10, 192, 56, 0, 0, 0, 106, 0, 16, 208, 5, 7, 1, 0, 5, 0, 0, 209, 5, 1, 169, 1, 127, 192, 140, 191, 8, 0, 34, 104, 9, 2, 36, 104, 10, 4, 38, 104, 126, 1, 130, 190, 4, 0, 91, 208, 5, 229, 1, 0, 227, 0, 136, 191, 126, 1, 132, 190, 8, 0, 81, 208, 5, 1, 1, 0, 128, 2, 10, 126, 4, 126, 254, 137, 220, 0, 136, 191, 255, 0, 136, 190, 28, 46, 77, 59, 126, 1, 138, 190, 8, 0, 81, 208, 5, 17, 0, 0, 255, 10, 10, 10, 82, 184, 78, 65, 10, 126, 254, 137, 242, 10, 10, 10, 210, 0, 136, 191, 255, 10, 14, 38, 255, 255, 255, 127, 242, 14, 16, 4, 255, 0, 136, 190, 0, 0, 128, 61, 106, 1, 75, 208, 8, 17, 0, 0, 126, 1, 136, 190, 8, 106, 254, 137, 7, 105, 16, 126, 70, 0, 136, 191, 129, 16, 18, 36, 255, 16, 16, 104, 0, 0, 128, 0, 255, 18, 18, 104, 0, 0, 0, 1, 255, 16, 20, 38, 0, 0, 127, 0, 255, 18, 18, 38, 0, 0, 1, 0, 9, 21, 18, 104, 249, 2, 20, 126, 9, 6, 5, 0, 128, 2, 22, 126, 10, 0, 143, 210, 131, 20, 2, 0, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 12, 106, 25, 209, 12, 20, 2, 0, 13, 2, 26, 126, 13, 23, 26, 56, 0, 128, 84, 220, 12, 0, 127, 12, 255, 0, 141, 190, 85, 85, 85, 85, 255, 0, 140, 190, 85, 85, 85, 85, 10, 106, 25, 209, 12, 20, 2, 0, 13, 2, 28, 126, 14, 23, 22, 56, 0, 128, 84, 220, 10, 0, 127, 10, 255, 16, 16, 38, 255, 255, 127, 0, 240, 18, 18, 40, 240, 16, 16, 40, 9, 17, 16, 4, 113, 15, 140, 191, 13, 17, 18, 10, 12, 17, 18, 44, 255, 2, 28, 126, 171, 170, 170, 62, 255, 0, 140, 190, 0, 0, 128, 62, 7, 103, 30, 126, 12, 18, 28, 44, 12, 0, 193, 209, 12, 17, 38, 132, 193, 30, 30, 104, 14, 0, 193, 209, 9, 29, 194, 3, 9, 19, 32, 10, 13, 17, 24, 44, 15, 11, 16, 126, 14, 33, 24, 44, 255, 0, 140, 190, 244, 253, 5, 56, 12, 0, 193, 209, 8, 25, 48, 132, 112, 15, 140, 191, 12, 23, 24, 2, 8, 21, 16, 46, 0, 112, 49, 63, 12, 19, 26, 4, 255, 18, 28, 42, 0, 0, 0, 128, 8, 27, 30, 2, 8, 126, 254, 137, 8, 17, 18, 10, 21, 0, 136, 191, 8, 19, 20, 10, 255, 2, 22, 126, 171, 170, 42, 62, 255, 0, 140, 190, 37, 73, 18, 62, 12, 16, 22, 44, 8, 23, 22, 48, 205, 204, 76, 62, 8, 23, 22, 48, 0, 0, 128, 62, 8, 23, 22, 48, 171, 170, 170, 62, 10, 23, 20, 10, 241, 18, 28, 10, 13, 0, 193, 209, 9, 227, 41, 132, 13, 17, 30, 4, 255, 20, 24, 42, 0, 0, 0, 128, 255, 16, 16, 42, 0, 0, 0, 128, 8, 1, 254, 190, 8, 31, 16, 4, 13, 29, 20, 4, 13, 17, 16, 2, 12, 21, 18, 4, 255, 30, 20, 38, 0, 240, 255, 255, 9, 17, 16, 2, 15, 21, 18, 4, 8, 19, 16, 2, 255, 16, 18, 10, 0, 160, 42, 56, 10, 19, 18, 46, 0, 160, 42, 56, 8, 19, 16, 46, 0, 80, 213, 62, 10, 17, 18, 46, 0, 80, 213, 62, 255, 18, 22, 10, 59, 170, 184, 66, 11, 17, 22, 126, 191, 22, 24, 38, 131, 24, 24, 36, 255, 0, 137, 190, 85, 85, 85, 85, 255, 0, 136, 190, 85, 85, 85, 85, 12, 106, 25, 209, 8, 24, 2, 0, 9, 2, 26, 126, 13, 106, 28, 209, 13, 1, 169, 1, 0, 128, 84, 220, 12, 0, 127, 12, 255, 0, 136, 190, 0, 80, 213, 62, 11, 11, 28, 126, 10, 0, 193, 209, 8, 20, 38, 132, 14, 19, 30, 46, 0, 0, 49, 188, 8, 21, 16, 2, 14, 31, 20, 46, 239, 47, 228, 183, 8, 21, 20, 2, 255, 2, 28, 126, 171, 170, 42, 62, 255, 0, 136, 190, 171, 170, 42, 61, 8, 20, 28, 44, 14, 0, 193, 209, 14, 21, 194, 3, 10, 21, 30, 10, 14, 31, 20, 44, 255, 0, 136, 190, 8, 227, 130, 180, 255, 0, 137, 190, 24, 114, 177, 66, 112, 15, 140, 191, 13, 21, 26, 44, 12, 0, 68, 208, 8, 17, 0, 0, 9, 18, 132, 124, 12, 21, 26, 44, 106, 12, 140, 134, 9, 18, 130, 124, 134, 22, 16, 34, 12, 27, 20, 2, 106, 12, 234, 135, 8, 0, 136, 210, 10, 17, 2, 0, 255, 2, 20, 126, 0, 0, 128, 127, 255, 0, 136, 190, 208, 142, 206, 194, 8, 21, 16, 0, 8, 18, 150, 124, 128, 16, 16, 0, 5, 15, 138, 125, 242, 16, 16, 10, 255, 2, 18, 126, 0, 0, 192, 127, 255, 0, 136, 190, 0, 0, 128, 255, 8, 19, 16, 0, 128, 14, 138, 125, 8, 0, 194, 208, 5, 17, 0, 0, 128, 16, 16, 0, 255, 2, 18, 126, 0, 0, 128, 127, 8, 0, 0, 209, 8, 19, 34, 0, 5, 19, 132, 125, 8, 19, 16, 0, 7, 19, 152, 125, 8, 11, 14, 0, 242, 10, 138, 125, 242, 14, 10, 0, 255, 2, 14, 126, 174, 71, 97, 189, 255, 0, 136, 190, 61, 10, 135, 63, 5, 0, 193, 209, 5, 17, 28, 4, 4, 1, 254, 190, 2, 126, 254, 137, 242, 2, 10, 126, 2, 1, 254, 190, 131, 0, 6, 192, 32, 0, 0, 0, 127, 192, 140, 191, 1, 1, 14, 192, 0, 0, 0, 0, 128, 2, 40, 126, 127, 192, 140, 191, 0, 95, 32, 240, 17, 3, 1, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 10, 192, 0, 0, 0, 0, 131, 2, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 72, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 80, 0, 0, 0, 0, 2, 6, 126, 2, 2, 8, 126, 0, 0, 255, 209, 8, 6, 2, 4, 1, 0, 255, 209, 4, 8, 6, 4, 10, 2, 6, 126, 2, 0, 198, 208, 1, 27, 0, 0, 12, 0, 134, 125, 2, 0, 255, 209, 5, 6, 10, 4, 106, 2, 130, 135, 127, 192, 140, 191, 1, 4, 134, 125, 2, 106, 234, 135, 126, 1, 128, 190, 0, 106, 254, 137, 28, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 131, 0, 6, 192, 40, 0, 0, 0, 3, 1, 2, 192, 48, 0, 0, 0, 127, 192, 140, 191, 4, 3, 14, 192, 0, 0, 0, 0, 2, 0, 6, 104, 3, 2, 8, 104, 4, 4, 10, 104, 128, 2, 12, 126, 127, 192, 140, 191, 0, 95, 0, 240, 3, 3, 3, 0, 3, 1, 10, 192, 56, 0, 0, 0, 5, 2, 14, 192, 0, 0, 0, 0, 127, 192, 140, 191, 4, 0, 14, 104, 5, 2, 16, 104, 6, 4, 18, 104, 128, 2, 20, 126, 112, 15, 140, 191, 0, 95, 32, 240, 7, 3, 2, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129, 0, 172, 0, 148, 0, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 5, 0, 5, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 4, 134, 255, 255, 0, 0, 4, 10, 4, 146, 3, 0, 6, 192, 0, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 72, 0, 0, 0, 0, 2, 2, 126, 0, 0, 255, 209, 4, 2, 2, 4, 126, 1, 130, 190, 127, 192, 140, 191, 0, 0, 209, 208, 0, 3, 0, 0, 20, 0, 136, 191, 3, 2, 10, 192, 24, 0, 0, 0, 3, 0, 2, 192, 40, 0, 0, 0, 127, 192, 140, 191, 4, 3, 10, 192, 0, 0, 0, 0, 0, 0, 2, 104, 127, 192, 140, 191, 0, 32, 12, 224, 1, 1, 3, 128, 3, 0, 2, 192, 56, 0, 0, 0, 5, 1, 10, 192, 0, 0, 0, 0, 127, 192, 140, 191, 0, 0, 0, 104, 112, 15, 140, 191, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 195, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 13, 0, 13, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 6, 192, 0, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 72, 0, 0, 0, 0, 2, 6, 126, 0, 0, 255, 209, 8, 6, 2, 4, 126, 1, 130, 190, 127, 192, 140, 191, 0, 0, 209, 208, 0, 3, 0, 0, 29, 0, 136, 191, 3, 2, 14, 192, 8, 0, 0, 0, 3, 0, 2, 192, 40, 0, 0, 0, 127, 192, 140, 191, 6, 4, 10, 192, 0, 0, 0, 0, 0, 0, 6, 104, 127, 192, 140, 191, 0, 32, 12, 224, 3, 3, 4, 128, 3, 4, 10, 192, 56, 0, 0, 0, 7, 5, 14, 192, 0, 0, 0, 0, 5, 10, 0, 128, 127, 192, 140, 191, 18, 2, 14, 126, 4, 8, 1, 128, 17, 2, 16, 126, 16, 0, 18, 104, 11, 0, 255, 209, 0, 14, 10, 4, 10, 0, 255, 209, 1, 16, 6, 4, 128, 2, 24, 126, 112, 15, 140, 191, 0, 95, 32, 240, 9, 3, 5, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 9, 0, 9, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 6, 192, 0, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 72, 0, 0, 0, 0, 2, 6, 126, 0, 0, 255, 209, 8, 6, 2, 4, 126, 1, 130, 190, 127, 192, 140, 191, 0, 0, 209, 208, 0, 3, 0, 0, 31, 0, 136, 191, 3, 2, 14, 192, 8, 0, 0, 0, 3, 0, 6, 192, 40, 0, 0, 0, 127, 192, 140, 191, 67, 2, 2, 192, 48, 0, 0, 0, 6, 4, 14, 192, 0, 0, 0, 0, 5, 10, 5, 128, 127, 192, 140, 191, 9, 2, 6, 126, 4, 8, 4, 128, 1, 2, 8, 126, 0, 0, 10, 104, 7, 0, 255, 209, 5, 6, 10, 4, 6, 0, 255, 209, 4, 8, 6, 4, 128, 2, 16, 126, 0, 95, 0, 240, 5, 1, 4, 0, 3, 0, 2, 192, 56, 0, 0, 0, 7, 1, 10, 192, 0, 0, 0, 0, 127, 192, 140, 191, 0, 0, 0, 104, 112, 15, 140, 191, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130, 0, 172, 0, 148, 19, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 11, 0, 11, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 6, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 8, 134, 255, 255, 0, 0, 4, 255, 132, 146, 16, 0, 16, 0, 5, 255, 5, 134, 255, 255, 0, 0, 8, 10, 8, 146, 4, 11, 4, 146, 5, 12, 5, 146, 3, 0, 10, 192, 0, 0, 0, 0, 131, 2, 6, 192, 16, 0, 0, 0, 3, 3, 6, 192, 96, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 104, 0, 0, 0, 0, 2, 6, 126, 2, 2, 8, 126, 0, 0, 255, 209, 8, 6, 2, 4, 1, 0, 255, 209, 4, 8, 6, 4, 10, 2, 6, 126, 2, 0, 193, 208, 1, 27, 0, 0, 12, 0, 136, 125, 2, 0, 255, 209, 5, 6, 10, 4, 106, 2, 130, 134, 127, 192, 140, 191, 1, 4, 136, 125, 2, 106, 128, 134, 0, 32, 128, 190, 55, 0, 136, 191, 3, 2, 10, 192, 80, 0, 0, 0, 127, 192, 140, 191, 8, 0, 14, 104, 9, 2, 16, 104, 10, 4, 18, 104, 131, 0, 2, 192, 112, 0, 0, 0, 3, 1, 6, 192, 24, 0, 0, 0, 127, 192, 140, 191, 2, 130, 0, 191, 30, 0, 133, 191, 2, 129, 0, 191, 13, 0, 132, 191, 3, 2, 10, 192, 48, 0, 0, 0, 2, 3, 14, 192, 0, 0, 0, 0, 127, 192, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 27, 0, 130, 191, 2, 128, 0, 191, 25, 0, 132, 191, 3, 2, 10, 192, 32, 0, 0, 0, 2, 3, 14, 192, 0, 0, 0, 0, 127, 192, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 12, 0, 130, 191, 3, 2, 10, 192, 64, 0, 0, 0, 2, 3, 14, 192, 0, 0, 0, 0, 127, 192, 140, 191, 8, 2, 0, 126, 9, 2, 2, 126, 10, 2, 4, 126, 11, 2, 6, 126, 128, 2, 20, 126, 0, 95, 32, 240, 7, 0, 3, 0, 0, 0, 129, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 172, 0, 148, 0, 0, 0, 43, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 5, 0, 5, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 192, 4, 0, 0, 0, 127, 192, 140, 191, 4, 255, 4, 134, 255, 255, 0, 0, 4, 10, 4, 146, 3, 0, 6, 192, 0, 0, 0, 0, 127, 192, 140, 191, 67, 0, 2, 192, 96, 0, 0, 0, 0, 2, 2, 126, 0, 0, 255, 209, 4, 2, 2, 4, 126, 1, 130, 190, 127, 192, 140, 191, 0, 0, 209, 208, 0, 3, 0, 0, 50, 0, 136, 191, 3, 0, 2, 192, 80, 0, 0, 0, 127, 192, 140, 191, 0, 0, 0, 104, 3, 0, 2, 192, 112, 0, 0, 0, 3, 1, 6, 192, 24, 0, 0, 0, 127, 192, 140, 191, 0, 130, 0, 191, 28, 0, 133, 191, 0, 129, 0, 191, 12, 0, 132, 191, 3, 2, 10, 192, 48, 0, 0, 0, 2, 1, 10, 192, 0, 0, 0, 0, 127, 192, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 25, 0, 130, 191, 0, 128, 0, 191, 23, 0, 132, 191, 3, 2, 10, 192, 32, 0, 0, 0, 2, 1, 10, 192, 0, 0, 0, 0, 127, 192, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 11, 0, 130, 191, 3, 2, 10, 192, 64, 0, 0, 0, 2, 1, 10, 192, 0, 0, 0, 0, 127, 192, 140, 191, 8, 2, 2, 126, 9, 2, 4, 126, 10, 2, 6, 126, 11, 2, 8, 126, 0, 32, 28, 224, 0, 1, 1, 128, 0, 0, 129, 191, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 2, 0, 0, 0, 0, 0, 0, 40, 0, 0, 0, 1, 0, 4, 0, 8, 2, 0, 0, 0, 0, 0, 0, 8, 4, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 1, 0, 4, 0, 16, 6, 0, 0, 0, 0, 0, 0, 8, 4, 0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 26, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, 4, 0, 0, 0, 0, 0, 0, 149, 0, 0, 0, 26, 0, 5, 0, 0, 5, 0, 0, 0, 0, 0, 0, 168, 4, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 26, 0, 5, 0, 0, 10, 0, 0, 0, 0, 0, 0, 20, 2, 0, 0, 0, 0, 0, 0, 209, 0, 0, 0, 26, 0, 5, 0, 0, 13, 0, 0, 0, 0, 0, 0, 72, 13, 0, 0, 0, 0, 0, 0, 249, 0, 0, 0, 26, 0, 5, 0, 0, 27, 0, 0, 0, 0, 0, 0, 20, 2, 0, 0, 0, 0, 0, 0, 33, 1, 0, 0, 26, 0, 5, 0, 0, 30, 0, 0, 0, 0, 0, 0, 160, 1, 0, 0, 0, 0, 0, 0, 58, 1, 0, 0, 26, 0, 5, 0, 0, 32, 0, 0, 0, 0, 0, 0, 220, 1, 0, 0, 0, 0, 0, 0, 90, 1, 0, 0, 26, 0, 5, 0, 0, 34, 0, 0, 0, 0, 0, 0, 228, 1, 0, 0, 0, 0, 0, 0, 122, 1, 0, 0, 26, 0, 5, 0, 0, 36, 0, 0, 0, 0, 0, 0, 124, 2, 0, 0, 0, 0, 0, 0, 144, 1, 0, 0, 26, 0, 5, 0, 0, 39, 0, 0, 0, 0, 0, 0, 24, 2, 0, 0, 0, 0, 0, 0, 170, 1, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 207, 1, 0, 0, 3, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 15, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 15, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 216, 15, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224, 15, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 17, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 17, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 19, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 19, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 140, 19, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 148, 19, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252, 20, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 23, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 23, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 23, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 23, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 24, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212, 24, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 3, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 0, 0, 0, 0, 0, 0, 229, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 2, 0, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 1, 0, 0, 0, 3, 0, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 3, 0, 0, 0, 0, 0, 0, 24, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 1, 0, 0, 0, 7, 0, 192, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 24, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 55, 0, 0, 0, 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 56, 0, 0, 0, 0, 0, 0, 176, 1, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 5, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0};
}
}


================================================
FILE: runtime/hsa-runtime/image/blit_src/CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

cmake_minimum_required ( VERSION 3.7 )

# Import target 'clang'
find_package(Clang REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )

# Determine the target devices if not specified
if (NOT DEFINED TARGET_DEVICES)
  set (TARGET_DEVICES "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx805;gfx810"
                      "gfx900;gfx902;gfx904;gfx906;gfx908;gfx909;gfx90a;gfx90c;gfx942;gfx950"
                      "gfx1010;gfx1011;gfx1012;gfx1013;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
                      "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201")
endif()
set( TARGET_DEVICES ${TARGET_DEVICES} CACHE STRING "Build targets" FORCE )

if(${CMAKE_VERBOSE_MAKEFILE})
  get_property(clang_path TARGET clang PROPERTY LOCATION)
  message("Using clang from: ${clang_path}")
  message("Build Setting:")
  message("  Target Devices*: ${TARGET_DEVICES}")
  message("  (Specify \";\" separated list of target IDs.)")
  message("       Clang path: ${clang_path}")
endif()

##==========================================
##  Add custom command to generate a kernel code object file
##==========================================
function(gen_kernel_bc TARGET_ID INPUT_FILE OUTPUT_FILE)

  separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
    "-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0
    -target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -mcode-object-version=4
    -o ${OUTPUT_FILE} ${INPUT_FILE}")

  ## Add custom command to produce a code object file.
  ## This depends on the kernel source file & compiler.
  add_custom_command(OUTPUT ${OUTPUT_FILE} COMMAND clang ${CLANG_ARG_LIST}
    DEPENDS ${INPUT_FILE} clang
    COMMENT "BUILDING bitcode for ${OUTPUT_FILE}..."
    VERBATIM)

if(${CMAKE_VERBOSE_MAKEFILE})
  message("      Kernel Source: " ${INPUT_FILE})
  message("     Kernel Bitcode: " ${OUTPUT_FILE})
endif()

endfunction(gen_kernel_bc)

##==========================================
## Find device code object name and forward to custom command
##==========================================
function(build_kernel BLIT_NAME TARGET_ID)

  ## generate kernel bitcodes
  set (CODE_OBJECT_FILE "${BLIT_NAME}_${TARGET_ID}")
  set (CL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/imageblit_kernels.cl)
  gen_kernel_bc(${TARGET_ID} ${CL_FILE} ${CODE_OBJECT_FILE})

  ## Build a list of code object file names
  ## These will be target dependencies.
  set (HSACO_TARG_LIST ${HSACO_TARG_LIST} "${CODE_OBJECT_FILE}" PARENT_SCOPE)

endfunction(build_kernel)

##==========================================
## Build the kernel for a list of devices
##==========================================
function(build_kernel_for_devices BLIT_NAME)

  set(HSACO_TARG_LIST "")

  foreach(dev ${TARGET_DEVICES})
    if(${CMAKE_VERBOSE_MAKEFILE})
      message("\n  Generating: ${dev} ...")
    endif()
    build_kernel(${BLIT_NAME} ${dev})
  endforeach(dev)

  set(HSACO_TARG_LIST ${HSACO_TARG_LIST} PARENT_SCOPE)

endfunction(build_kernel_for_devices)

##==========================================
## Create BLIT Code Object blobs file
##==========================================
function(generate_blit_file BFILE)

  ## Add a custom command that generates opencl_blit_objects.cpp
  ## This depends on all the generated code object files and the C++ generator script.
  add_custom_command(OUTPUT ${BFILE}.cpp
                     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/create_hsaco_ascii_file.sh ${CMAKE_CURRENT_BINARY_DIR}/${BFILE}.cpp
                     DEPENDS ${HSACO_TARG_LIST} create_hsaco_ascii_file.sh )

  ## Export a target that builds (and depends on) opencl_blit_objects.cpp
  add_custom_target( ${BFILE} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${BFILE}.cpp )

endfunction(generate_blit_file)

build_kernel_for_devices("ocl_blit_object")
generate_blit_file("opencl_blit_objects")


================================================
FILE: runtime/hsa-runtime/image/blit_src/README.md
================================================
## OVERVIEW

This directory contains the CMakeLists.txt for automatically generating
the ASCII code object file, "opencl_blit_objects.cpp", which contains the
blobs of the code object of the Image BLIT kernels for the devices supported
on ROCm.  The blobs are loaded by the image library and required to update
whenever a new device is introduced.


## ADD NEW DEVICE

To add a new supported device, the following steps are required:

  1. Declare an extern variable of the device XXX, by adding the line of
     "extern uint32_t ocl_blit_object_gfxNNN[];" in "blit_kernel.cpp".
  2. Update the BlitKernel::GetPatchedBlitObject() function to support the
     device by assigning "blit_code_object" to "ocl_blit_object_gfxNNN[]".
  3. Add the target to the TARGET_DEVICES list in CMakeLists.txt. Specify using
     the target ID syntax which is the target GFX IP name, optionally followed
     by the settings for the target features such as XNACK and SRAMECC. If
     omitted, a target feature defaults to producing code that will execute on
     any setting. For example, "gfx908" for code that will run on any setting,
     or "gfx908:sramecc+:xnack-" for code that will only run if SRAMECC is
     enabled and XNACK is disabled.
  4. Rebuild the image library.


## REQUIREMENT

In order to create the code object file, the bitcodes of the kernels are
generated by the compiler and the following bitcode libraries are required,

   opencl.bc
   ocml.bc
   irif.bc
   oclc_correctly_rounded_sqrt_off.bc
   oclc_daz_opt_on.bc
   oclc_finite_only_off.bc
   oclc_isa_version_<GFXIP>.bc
   oclc_unsafe_math_off.bc

where <GFXIP> is the gfxip number of the GPU. The directory contains the
bitcode libraries is specified in a CMake varaible.

There are several variables are required for CMake to build the code
object file.  All of them have default values, and defined as following:

      OPENCL_DIR - the location of installed OpenCL
                   (Default: /opt/rocm/opencl)
     BITCODE_DIR - the directory contains the bitcode library
                   (Default: /opt/rocm/amdgcn/bitcode)
        LLVM_DIR - the directory contains the clang, llvm-link and llvm-dis
                   executables
                   (Default: ${PROJECT_BUILD_DIR}/../lightning/bin)
  TARGET_DEVICES - list of gpu types for kernel builds (eg. "gfx900;gfx902")
                   (Default: "gfx900;gfx902;gfx904")


## STEPS TO BUILD

  $ make build
  $ cd build
  $ cmake -D${OPENCL_DIR} -D${BITCODE_DIR} -D${LLVM_DIR} -D${TARGET_DEVICES} ..
  $ make opencl_blit_objects.cpp


================================================
FILE: runtime/hsa-runtime/image/blit_src/create_hsaco_ascii_file.sh
================================================
#!/bin/bash -e
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and/or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and/or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

opencl_blit_file="$1"

if ! command -v xxd >/dev/null
then
    echo "xxd not found!"
    exit 1
fi

# Create the file in a temporary location and then move it in atomically
{
cat <<EOF
//==============================================================================
//  This file is automatically generated during build process, don't modify it
//==============================================================================

namespace rocr {
namespace image {

EOF

for file in ocl_blit_object*
do
    xxd -i $file
    echo -e '\n'
done

cat <<EOF
} // namespace image
} // namespace rocr

EOF

} > "$opencl_blit_file"


================================================
FILE: runtime/hsa-runtime/image/blit_src/imageblit_kernels.cl
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

/// Kernel code for HSA image import/export/copy/clear in OpenCL C form.

uint4 read_image(__read_only image1d_t src1d,
                 __read_only image2d_t src2d,
                 __read_only image3d_t src3d,
                 __read_only image1d_array_t src1da,
                 __read_only image2d_array_t src2da,
                 uint format,
                 int4 coords) {
  switch (format) {
    case 0:  // 1D
      return read_imageui(src1d, coords.x);
      break;
    case 1:  // 2D
      return read_imageui(src2d, coords.xy);
      break;
    case 2:  // 3D
      return read_imageui(src3d, coords);
      break;
    case 3:  // 1DA
      return read_imageui(src1da, coords.xy);
      break;
    case 4:  // 2DA
      return read_imageui(src2da, coords);
      break;
    // case 5: //1DB
    //  return read_imageui(src1db, coords.x);
    //  break;
    default:  // Critical failure.
      return 0;
  }
}

void write_image(__write_only image1d_t src1d,
                 __write_only image2d_t src2d,
                 __write_only image3d_t src3d,
                 __write_only image1d_array_t src1da,
                 __write_only image2d_array_t src2da,
                 uint format,
                 int4 coords,
                 uint4 texel) {
  switch (format) {
    case 0:  // 1D
      write_imageui(src1d, coords.x, texel);
      break;
    case 1:  // 2D
      write_imageui(src2d, coords.xy, texel);
      break;
    case 2:  // 3D
      write_imageui(src3d, coords, texel);
      break;
    case 3:  // 1DA
      write_imageui(src1da, coords.xy, texel);
      break;
    case 4:  // 2DA
      write_imageui(src2da, coords, texel);
      break;
    // case 5: //1DB
    //  write_imageui(src1db, coords.x, texel);
    //  break;
    default:  // Critical failure.
      return;
  }
}

float4 read_image_float(__read_only image1d_t src1d,
                        __read_only image2d_t src2d,
                        __read_only image3d_t src3d,
                        __read_only image1d_array_t src1da,
                        __read_only image2d_array_t src2da,
                        uint format,
                        int4 coords) {
  switch (format) {
    case 0:  // 1D
      return read_imagef(src1d, coords.x);
      break;
    case 1:  // 2D
      return read_imagef(src2d, coords.xy);
      break;
    case 2:  // 3D
      return read_imagef(src3d, coords);
      break;
    case 3:  // 1DA
      return read_imagef(src1da, coords.xy);
      break;
    case 4:  // 2DA
      return read_imagef(src2da, coords);
      break;
    default:  // Critical failure.
      return 0;
  }
}

void write_image_float(__write_only image1d_t src1d,
                       __write_only image2d_t src2d,
                       __write_only image3d_t src3d,
                       __write_only image1d_array_t src1da,
                       __write_only image2d_array_t src2da,
                       uint format,
                       int4 coords,
                       float4 texel) {
  switch (format) {
    case 0:  // 1D
      write_imagef(src1d, coords.x, texel);
      break;
    case 1:  // 2D
      write_imagef(src2d, coords.xy, texel);
      break;
    case 2:  // 3D
      write_imagef(src3d, coords, texel);
      break;
    case 3:  // 1DA
      write_imagef(src1da, coords.xy, texel);
      break;
    case 4:  // 2DA
      write_imagef(src2da, coords, texel);
      break;
    default:  // Critical failure.
      return;
  }
}

void write_image_int(__write_only image1d_t src1d,
                     __write_only image2d_t src2d,
                     __write_only image3d_t src3d,
                     __write_only image1d_array_t src1da,
                     __write_only image2d_array_t src2da,
                     uint format,
                     int4 coords,
                     int4 texel) {
  switch (format) {
    case 0:  // 1D
      write_imagei(src1d, coords.x, texel);
      break;
    case 1:  // 2D
      write_imagei(src2d, coords.xy, texel);
      break;
    case 2:  // 3D
      write_imagei(src3d, coords, texel);
      break;
    case 3:  // 1DA
      write_imagei(src1da, coords.xy, texel);
      break;
    case 4:  // 2DA
      write_imagei(src2da, coords, texel);
      break;
    default:  // Critical failure.
      return;
  }
}

//image handle is repeated since OCL doesn't allow pointers to or casting of images.
//dst is start of output pixel in destination buffer
//format.x is element count
//format.y is element size
//format.z is max(dword per pixel, 1)
//format.w is texture type.
//srcOrigin is start pixel address.
//No export for 64, 96, 128 bit formats
__kernel void copy_image_to_buffer(
    __read_only image1d_t src1d,
    __read_only image2d_t src2d,
    __read_only image3d_t src3d,
    __read_only image1d_array_t src1da,
    __read_only image2d_array_t src2da,
    __global void* const dst,
    int4        srcOrigin,
    uint4       format,
    ulong       pitch,
    ulong       slice_pitch)
{
    ulong    idxDst;
    int4     coordsSrc;
    uint4    texel;

    __global uchar* const dstUChar = (__global uchar* const)dst;
    __global ushort* const dstUShort = (__global ushort* const)dst;
    __global uint* const dstUInt = (__global uint* const)dst;

    coordsSrc.x = get_global_id(0);
    coordsSrc.y = get_global_id(1);
    coordsSrc.z = get_global_id(2);
    coordsSrc.w = 0;

    idxDst = (coordsSrc.z * slice_pitch + coordsSrc.y * pitch +
        coordsSrc.x) * format.z;

    coordsSrc.x += srcOrigin.x;
    coordsSrc.y += srcOrigin.y;
    coordsSrc.z += srcOrigin.z;

    texel = read_image(src1d, src2d, src3d, src1da, src2da, format.w, coordsSrc);

    // Check components
    switch (format.x) {
    case 1:
        // Check size
        switch (format.y) {
        case 1:
            dstUChar[idxDst] = texel.x;
            break;
        case 2:
            dstUShort[idxDst] = texel.x;
            break;
        case 4:
            dstUInt[idxDst] = texel.x;
            break;
        }
    break;
    case 2:
        // Check size
        switch (format.y) {
        case 1:
            dstUShort[idxDst] = texel.x |
               (texel.y << 8);
            break;
        case 2:
            dstUInt[idxDst] = texel.x | (texel.y << 16);
            break;
        case 4:
            dstUInt[idxDst++] = texel.x;
            dstUInt[idxDst] = texel.y;
            break;
        }
    break;
    case 4:
        // Check size
        switch (format.y) {
        case 1:
            dstUInt[idxDst] = texel.x |
               (texel.y << 8) |
               (texel.z << 16) |
               (texel.w << 24);
            break;
        case 2:
            dstUInt[idxDst++] = texel.x | (texel.y << 16);
            dstUInt[idxDst] = texel.z | (texel.w << 16);
            break;
        case 4:
            dstUInt[idxDst++] = texel.x;
            dstUInt[idxDst++] = texel.y;
            dstUInt[idxDst++] = texel.z;
            dstUInt[idxDst] = texel.w;
            break;
        }
    break;
    }
}

__kernel void copy_buffer_to_image(__global uint* src,
                                   __write_only image1d_t dst1d,
                                   __write_only image2d_t dst2d,
                                   __write_only image3d_t dst3d,
                                   __write_only image1d_array_t dst1da,
                                   __write_only image2d_array_t dst2da,
                                   int4 dstOrigin,
                                   uint4 format,
                                   ulong pitch,
                                   ulong slice_pitch) {
  ulong idxSrc;
  int4 coordsDst;
  uint4 texel;

  __global uint* srcUInt = src;
  __global ushort* srcUShort = (__global ushort*)src;
  __global uchar* srcUChar = (__global uchar*)src;

  ushort tmpUShort;
  uint tmpUInt;

  coordsDst.x = get_global_id(0);
  coordsDst.y = get_global_id(1);
  coordsDst.z = get_global_id(2);
  coordsDst.w = 0;

  idxSrc = (coordsDst.z * slice_pitch + coordsDst.y * pitch + coordsDst.x) * format.z;

  coordsDst.x += dstOrigin.x;
  coordsDst.y += dstOrigin.y;
  coordsDst.z += dstOrigin.z;

  // Check components
  switch (format.x) {
    case 1:
        // Check size
        switch (format.y) {
          case 1:
            texel.x = (uint)srcUChar[idxSrc];
            break;
          case 2:
            texel.x = (uint)srcUShort[idxSrc];
            break;
          case 4:
            texel.x = srcUInt[idxSrc];
            break;
        }
    break;
    case 2:
        // Check size
        switch (format.y) {
          case 1:
            tmpUShort = srcUShort[idxSrc];
            texel.x = (uint)(tmpUShort & 0xff);
            texel.y = (uint)(tmpUShort >> 8);
            break;
          case 2:
            tmpUInt = srcUInt[idxSrc];
            texel.x = (tmpUInt & 0xffff);
            texel.y = (tmpUInt >> 16);
            break;
          case 4:
            texel.x = srcUInt[idxSrc++];
            texel.y = srcUInt[idxSrc];
            break;
        }
    break;
    case 4:
        // Check size
        switch (format.y) {
          case 1:
            tmpUInt = srcUInt[idxSrc];
            texel.x = tmpUInt & 0xff;
            texel.y = (tmpUInt >> 8) & 0xff;
            texel.z = (tmpUInt >> 16) & 0xff;
            texel.w = (tmpUInt >> 24) & 0xff;
            break;
          case 2:
            tmpUInt = srcUInt[idxSrc++];
            texel.x = tmpUInt & 0xffff;
            texel.y = (tmpUInt >> 16);
            tmpUInt = srcUInt[idxSrc];
            texel.z = tmpUInt & 0xffff;
            texel.w = (tmpUInt >> 16);
            break;
          case 4:
            texel.x = srcUInt[idxSrc++];
            texel.y = srcUInt[idxSrc++];
            texel.z = srcUInt[idxSrc++];
            texel.w = srcUInt[idxSrc];
            break;
        }
        break;
    }
    // Write the final pixel
    write_image(dst1d, dst2d, dst3d, dst1da, dst2da, format.w, coordsDst, texel);
}

__kernel void copy_image_default(__read_only image1d_t src1d,
                                 __read_only image2d_t src2d,
                                 __read_only image3d_t src3d,
                                 __read_only image1d_array_t src1da,
                                 __read_only image2d_array_t src2da,
                                 __write_only image1d_t dst1d,
                                 __write_only image2d_t dst2d,
                                 __write_only image3d_t dst3d,
                                 __write_only image1d_array_t dst1da,
                                 __write_only image2d_array_t dst2da,
                                 int4 srcOrigin,
                                 int4 dstOrigin,
                                 int srcFormat,
                                 int dstFormat) {
  int4 coordsDst;
  int4 coordsSrc;

  coordsDst.x = get_global_id(0);
  coordsDst.y = get_global_id(1);
  coordsDst.z = get_global_id(2);
  coordsDst.w = 0;

  coordsSrc = srcOrigin + coordsDst;
  coordsDst += dstOrigin;

  uint4 texel;
  texel = read_image(src1d, src2d, src3d, src1da, src2da, srcFormat, coordsSrc);
  write_image(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coordsDst, texel);
}

float linear_to_standard_rgba(float l_val) {
  float s_val = l_val;

  if (isnan(s_val)) s_val = 0.0f;

  if (s_val > 1.0f) {
    s_val = 1.0f;
  } else if (s_val < 0.0f) {
    s_val = 0.0f;
  } else if (s_val < 0.0031308f) {
    s_val = 12.92f * s_val;
  } else {
    s_val = (1.055f * pow(s_val, 5.0f / 12.0f)) - 0.055f;
  }

  return s_val;
}

__kernel void copy_image_linear_to_standard(
                                            __read_only image1d_t src1d,
                                            __read_only image2d_t src2d,
                                            __read_only image3d_t src3d,
                                            __read_only image1d_array_t src1da,
                                            __read_only image2d_array_t src2da,
                                            int srcFormat,
                                            __write_only image1d_t dst1d,
                                            __write_only image2d_t dst2d,
                                            __write_only image3d_t dst3d,
                                            __write_only image1d_array_t dst1da,
                                            __write_only image2d_array_t dst2da,
                                            int dstFormat,
                                            int4 srcOrigin,
                                            int4 dstOrigin) {
  int4 coordsDst;
  int4 coordsSrc;

  coordsDst.x = get_global_id(0);
  coordsDst.y = get_global_id(1);
  coordsDst.z = get_global_id(2);
  coordsDst.w = 0;

  coordsSrc = srcOrigin + coordsDst;
  coordsDst += dstOrigin;

  float4 texel;
  texel = read_image_float(src1d, src2d, src3d, src1da, src2da, srcFormat, coordsSrc);

  texel.x = linear_to_standard_rgba(texel.x);
  texel.y = linear_to_standard_rgba(texel.y);
  texel.z = linear_to_standard_rgba(texel.z);

  write_image_float(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coordsDst, texel);
}

__kernel void copy_image_standard_to_linear(
                                            __read_only image1d_t src1d,
                                            __read_only image2d_t src2d,
                                            __read_only image3d_t src3d,
                                            __read_only image1d_array_t src1da,
                                            __read_only image2d_array_t src2da,
                                            int srcFormat,
                                            __write_only image1d_t dst1d,
                                            __write_only image2d_t dst2d,
                                            __write_only image3d_t dst3d,
                                            __write_only image1d_array_t dst1da,
                                            __write_only image2d_array_t dst2da,
                                            int dstFormat,
                                            int4 srcOrigin,
                                            int4 dstOrigin) {
  int4 coordsDst;
  int4 coordsSrc;

  coordsDst.x = get_global_id(0);
  coordsDst.y = get_global_id(1);
  coordsDst.z = get_global_id(2);
  coordsDst.w = 0;

  coordsSrc = srcOrigin + coordsDst;
  coordsDst += dstOrigin;

  float4 texel;
  texel = read_image_float(src1d, src2d, src3d, src1da, src2da, srcFormat, coordsSrc);
  write_image_float(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coordsDst, texel);
}

__kernel void copy_image_1db(
                                            __read_only image1d_buffer_t src1d,
                                            __read_only image2d_t src2d,
                                            __read_only image3d_t src3d,
                                            __read_only image1d_array_t src1da,
                                            __read_only image2d_array_t src2da,
                                            int srcFormat,
                                            __write_only image1d_t dst1d,
                                            __write_only image2d_t dst2d,
                                            __write_only image3d_t dst3d,
                                            __write_only image1d_array_t dst1da,
                                            __write_only image2d_array_t dst2da,
                                            int dstFormat,
                                            int4 srcOrigin,
                                            int4 dstOrigin)
{
    int    coordDst;
    int    coordSrc;

    coordDst = get_global_id(0);

    coordSrc = srcOrigin.x + coordDst;
    coordDst += dstOrigin.x;

    uint4  texel;
    texel = read_imageui(src1d, coordSrc);
    write_imageui(dst1d, coordDst, texel);
}

__kernel void copy_image_1db_to_reg(
                                            __read_only image1d_buffer_t src1d,
                                            __read_only image2d_t src2d,
                                            __read_only image3d_t src3d,
                                            __read_only image1d_array_t src1da,
                                            __read_only image2d_array_t src2da,
                                            int srcFormat,
                                            __write_only image1d_t dst1d,
                                            __write_only image2d_t dst2d,
                                            __write_only image3d_t dst3d,
                                            __write_only image1d_array_t dst1da,
                                            __write_only image2d_array_t dst2da,
                                            int dstFormat,
                                            int4 srcOrigin,
                                            int4 dstOrigin)
{
    int4    coordsDst;
    int    coordSrc;

    coordsDst.x = get_global_id(0);
    coordsDst.y = get_global_id(1);
    coordsDst.z = get_global_id(2);
    coordsDst.w = 0;

    coordSrc = srcOrigin.x + coordsDst.x;
    coordsDst += dstOrigin;

    uint4  texel;
    texel = read_imageui(src1d, coordSrc);
    write_imageui(dst1d, coordsDst.x, texel);
}

__kernel void copy_image_reg_to_1db(
                                            __read_only image1d_t src1d,
                                            __read_only image2d_t src2d,
                                            __read_only image3d_t src3d,
                                            __read_only image1d_array_t src1da,
                                            __read_only image2d_array_t src2da,
                                            int srcFormat,
                                            __write_only image1d_buffer_t dst1d,
                                            __write_only image2d_t dst2d,
                                            __write_only image3d_t dst3d,
                                            __write_only image1d_array_t dst1da,
                                            __write_only image2d_array_t dst2da,
                                            int dstFormat,
                                            int4 srcOrigin,
                                            int4 dstOrigin)
{
    int    coordDst;
    int4    coordsSrc;

    coordsSrc.x = get_global_id(0);
    coordsSrc.y = get_global_id(1);
    coordsSrc.z = get_global_id(2);
    coordsSrc.w = 0;

    coordDst = dstOrigin.x + coordsSrc.x;
    coordsSrc += srcOrigin;

    uint4  texel;
    texel = read_imageui(src1d, coordsSrc.x);
    write_imageui(dst1d, coordDst, texel);
}

__kernel void clear_image(__write_only image1d_t dst1d,
                          __write_only image2d_t dst2d,
                          __write_only image3d_t dst3d,
                          __write_only image1d_array_t dst1da,
                          __write_only image2d_array_t dst2da,
                          int dstFormat,
                          uint type,
                          uint4 fill_data,
                          int4 origin) {
  int4 coords;

  coords.x = get_global_id(0);
  coords.y = get_global_id(1);
  coords.z = get_global_id(2);
  coords.w = 0;

  coords += origin;

  // Check components
  switch (type) {
    case 0:
      write_image_float(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coords, *(float4*)&fill_data);
      break;
    case 1:
      write_image_int(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coords, *(int4*)&fill_data);
      break;
    case 2:
      write_image(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coords, fill_data);
      break;
    }
}

__kernel void clear_image_1db(__write_only image1d_buffer_t dst1d,
                              __write_only image2d_t dst2d,
                              __write_only image3d_t dst3d,
                              __write_only image1d_array_t dst1da,
                              __write_only image2d_array_t dst2da,
                              int dstFormat,
                              uint4 fill_data,
                              int4 origin,
                              uint type) {
  int4 coords;

  coords.x = get_global_id(0);

  coords += origin;

  // Check components
  switch (type) {
    case 0:
      write_imagef(dst1d, coords.x, *(float4*)&fill_data);
      break;
    case 1:
      write_imagei(dst1d, coords.x, *(int4*)&fill_data);
      break;
    case 2:
      write_imageui(dst1d, coords.x, fill_data);
      break;
    }
}


================================================
FILE: runtime/hsa-runtime/image/device_info.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <assert.h>
#include <string>

#include "core/inc/hsa_internal.h"
#include "device_info.h"
#include "addrlib/src/amdgpu_asic_addr.h"

namespace rocr {
namespace image {

uint32_t MajorVerFromDevID(uint32_t dev_id) { return dev_id >> 8; }

uint32_t MinorVerFromDevID(uint32_t dev_id) { return (dev_id >> 4) & 0xF; }

uint32_t StepFromDevID(uint32_t dev_id) { return dev_id & 0xF; }

hsa_status_t GetGPUAsicID(hsa_agent_t agent, uint32_t *chip_id) {
  char asic_name[64];
  assert(chip_id != nullptr);

  hsa_status_t status = HSA::hsa_agent_get_info(
      agent, static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_NAME), &asic_name);
  assert(status == HSA_STATUS_SUCCESS);

  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }
  std::string a_str(asic_name);

  assert(a_str.compare(0, 3, "gfx", 3) == 0);

  a_str.erase(0,3);

  // Load chip_id accounting for stepping and minor in hex and major in dec.
  *chip_id = std::stoi(a_str.substr(a_str.length() - 2), nullptr, 16);
  *chip_id += (std::stoi(a_str.substr(0, a_str.length() - 2)) << 8);
  return HSA_STATUS_SUCCESS;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/device_info.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_INC_DEVICE_INFO_H_
#define HSA_RUNTIME_CORE_INC_DEVICE_INFO_H_

#include "stdint.h"
#include "inc/hsa.h"

namespace rocr {
namespace image {

uint32_t MajorVerFromDevID(uint32_t dev_id);
uint32_t MinorVerFromDevID(uint32_t dev_id);
uint32_t StepFromDevID(uint32_t dev_id);
hsa_status_t GetGPUAsicID(hsa_agent_t agent, uint32_t *chip_id);

}  // namespace image
}  // namespace rocr

#endif  // HSA_RUNTIME_CORE_INC_DEVICE_INFO_H_


================================================
FILE: runtime/hsa-runtime/image/hsa_ext_image.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "image_runtime.h"
#include "image/inc/hsa_ext_image_impl.h"
#include "core/inc/exceptions.h"

namespace rocr {

namespace AMD {
hsa_status_t handleException();

template <class T> static __forceinline T handleExceptionT() {
  handleException();
  abort();
  return T();
}
}   // namespace amd

#define TRY try {
#define CATCH } catch(...) { return AMD::handleException(); }
#define CATCHRET(RETURN_TYPE) } catch(...) { return AMD::handleExceptionT<RETURN_TYPE>(); }

namespace image {

//---------------------------------------------------------------------------//
//  Utilty routines
//---------------------------------------------------------------------------//
static void enforceDefaultPitch(hsa_agent_t agent,
                                const hsa_ext_image_descriptor_t* image_descriptor,
                                size_t& image_data_row_pitch, size_t& image_data_slice_pitch) {
  // Set default pitch
  if (image_data_row_pitch == 0) {
    auto manager = ImageRuntime::instance()->image_manager(agent);
    assert((manager != nullptr) && "Image manager should already exit.");
    image_data_row_pitch = image_descriptor->width *
      manager->GetImageProperty(agent, image_descriptor->format, image_descriptor->geometry)
      .element_size;
  }

  // Set default slice pitch
  if ((image_data_slice_pitch == 0) &&
    ((image_descriptor->depth != 0) || (image_descriptor->array_size != 0))) {
      switch (image_descriptor->geometry) {
      case HSA_EXT_IMAGE_GEOMETRY_3D:
      case HSA_EXT_IMAGE_GEOMETRY_2DA:
      case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH: {
        image_data_slice_pitch = image_data_row_pitch * image_descriptor->height;
        break;
                                            }
      case HSA_EXT_IMAGE_GEOMETRY_1DA: {
        image_data_slice_pitch = image_data_row_pitch;
        break;
                                       }
      default:
        fprintf(stderr, "Depth set on single layer image geometry.\n");
        //assert(false && "Depth set on single layer image geometry.");
      }
  }
}

//---------------------------------------------------------------------------//
//  APIs that implement Image functionality
//---------------------------------------------------------------------------//

hsa_status_t hsa_amd_image_get_info_max_dim(hsa_agent_t agent, hsa_agent_info_t attribute,
                                            void* value) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (value == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->GetImageInfoMaxDimension(agent, attribute, value);
  CATCH;
}

hsa_status_t hsa_ext_image_get_capability(hsa_agent_t agent,
                                          hsa_ext_image_geometry_t image_geometry,
                                          const hsa_ext_image_format_t* image_format,
                                          uint32_t* capability_mask) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if ((image_format == NULL) || (capability_mask == NULL) ||
      (image_geometry < HSA_EXT_IMAGE_GEOMETRY_1D) ||
      (image_geometry > HSA_EXT_IMAGE_GEOMETRY_2DADEPTH)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->GetImageCapability(agent, *image_format, image_geometry,
                                                      *capability_mask);
  CATCH;
}

hsa_status_t hsa_ext_image_data_get_info(hsa_agent_t agent,
                                         const hsa_ext_image_descriptor_t* image_descriptor,
                                         hsa_access_permission_t access_permission,
                                         hsa_ext_image_data_info_t* image_data_info) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if ((image_descriptor == NULL) || (image_data_info == NULL) ||
      (access_permission < HSA_ACCESS_PERMISSION_RO) ||
      (access_permission > HSA_ACCESS_PERMISSION_RW)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->GetImageSizeAndAlignment(
      agent, *image_descriptor, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image_data_info);
  CATCH;
}

hsa_status_t hsa_ext_image_create(hsa_agent_t agent,
                                  const hsa_ext_image_descriptor_t* image_descriptor,
                                  const void* image_data, hsa_access_permission_t access_permission,
                                  hsa_ext_image_t* image) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (image_descriptor == NULL || image_data == NULL || image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->CreateImageHandle(
      agent, *image_descriptor, image_data, access_permission, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0,
      0, *image);
  CATCH;
}

hsa_status_t hsa_ext_image_destroy(hsa_agent_t agent, hsa_ext_image_t image) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  return ImageRuntime::instance()->DestroyImageHandle(image);
  CATCH;
}

hsa_status_t hsa_ext_image_copy(hsa_agent_t agent, hsa_ext_image_t src_image,
                                const hsa_dim3_t* src_offset, hsa_ext_image_t dst_image,
                                const hsa_dim3_t* dst_offset, const hsa_dim3_t* range) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (src_image.handle == 0 || dst_image.handle == 0 || src_offset == NULL ||
      dst_offset == NULL || range == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->CopyImage(src_image, dst_image, *src_offset, *dst_offset,
                                             *range);
  CATCH;
}

hsa_status_t hsa_ext_image_import(hsa_agent_t agent, const void* src_memory, size_t src_row_pitch,
                                  size_t src_slice_pitch, hsa_ext_image_t dst_image,
                                  const hsa_ext_image_region_t* image_region) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (src_memory == NULL || dst_image.handle == 0 || image_region == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->CopyBufferToImage(src_memory, src_row_pitch, src_slice_pitch,
                                                     dst_image, *image_region);
  CATCH;
}

hsa_status_t hsa_ext_image_export(hsa_agent_t agent, hsa_ext_image_t src_image, void* dst_memory,
                                  size_t dst_row_pitch, size_t dst_slice_pitch,
                                  const hsa_ext_image_region_t* image_region) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (dst_memory == NULL || src_image.handle == 0 || image_region == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->CopyImageToBuffer(src_image, dst_memory, dst_row_pitch,
                                                     dst_slice_pitch, *image_region);
  CATCH;
}

hsa_status_t hsa_ext_image_clear(hsa_agent_t agent, hsa_ext_image_t image, const void* data,
                                 const hsa_ext_image_region_t* image_region) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (image.handle == 0 || image_region == NULL || data == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->FillImage(image, data, *image_region);
  CATCH;
};

hsa_status_t hsa_ext_sampler_create(hsa_agent_t agent,
                                    const hsa_ext_sampler_descriptor_t* sampler_descriptor,
                                    hsa_ext_sampler_t* sampler) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (sampler_descriptor == NULL || sampler == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  hsa_ext_sampler_descriptor_v2_t sampler_descriptor_v2 = {
      sampler_descriptor->coordinate_mode,
      sampler_descriptor->filter_mode,
      {sampler_descriptor->address_mode,
          sampler_descriptor->address_mode, sampler_descriptor->address_mode}
  };
  return ImageRuntime::instance()->CreateSamplerHandle(agent, sampler_descriptor_v2, *sampler);
  CATCH;
}

hsa_status_t hsa_ext_sampler_create_v2(hsa_agent_t agent,
                                    const hsa_ext_sampler_descriptor_v2_t* sampler_descriptor,
                                    hsa_ext_sampler_t* sampler) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (sampler_descriptor == NULL || sampler == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->CreateSamplerHandle(agent, *sampler_descriptor, *sampler);
  CATCH;
}

hsa_status_t hsa_ext_sampler_destroy(hsa_agent_t agent, hsa_ext_sampler_t sampler) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  return ImageRuntime::instance()->DestroySamplerHandle(sampler);
  CATCH;
}

hsa_status_t hsa_ext_image_get_capability_with_layout(hsa_agent_t agent,
                                                      hsa_ext_image_geometry_t image_geometry,
                                                      const hsa_ext_image_format_t* image_format,
                                                      hsa_ext_image_data_layout_t image_data_layout,
                                                      uint32_t* capability_mask) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if ((image_format == NULL) || (capability_mask == NULL) ||
      (image_geometry < HSA_EXT_IMAGE_GEOMETRY_1D) ||
      (image_geometry > HSA_EXT_IMAGE_GEOMETRY_2DADEPTH) ||
      (image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->GetImageCapability(agent, *image_format, image_geometry,
                                                      *capability_mask);
  CATCH;
}

hsa_status_t hsa_ext_image_data_get_info_with_layout(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
    hsa_access_permission_t access_permission, hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch, size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t* image_data_info) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if ((image_descriptor == NULL) || (image_data_info == NULL) ||
      (access_permission < HSA_ACCESS_PERMISSION_RO) ||
      (access_permission > HSA_ACCESS_PERMISSION_RW) ||
      (image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  enforceDefaultPitch(agent, image_descriptor, image_data_row_pitch, image_data_slice_pitch);

  return ImageRuntime::instance()->GetImageSizeAndAlignment(
      agent, *image_descriptor, image_data_layout, image_data_row_pitch, image_data_slice_pitch,
      *image_data_info);
  CATCH;
}

hsa_status_t hsa_ext_image_create_with_layout(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor, const void* image_data,
    hsa_access_permission_t access_permission, hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_t* image) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (image_descriptor == NULL || image_data == NULL || image == NULL ||
      image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  enforceDefaultPitch(agent, image_descriptor, image_data_row_pitch, image_data_slice_pitch);

  return ImageRuntime::instance()->CreateImageHandle(
      agent, *image_descriptor, image_data, access_permission, image_data_layout,
      image_data_row_pitch, image_data_slice_pitch, *image);
  CATCH;
}

hsa_status_t hsa_amd_image_create(hsa_agent_t agent,
                                  const hsa_ext_image_descriptor_t* image_descriptor,
                                  const hsa_amd_image_descriptor_t* image_layout,
                                  const void* image_data, hsa_access_permission_t access_permission,
                                  hsa_ext_image_t* image) {
  TRY;
  if (agent.handle == 0) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  if (image_descriptor == NULL || image_data == NULL || image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  return ImageRuntime::instance()->CreateImageHandleWithLayout(
      agent, *image_descriptor, image_layout, image_data, access_permission, *image);
  CATCH;
}

void LoadImage(core::ImageExtTableInternal* image_api,
               decltype(::hsa_amd_image_create)** interface_api) {
  image_api->hsa_ext_image_get_capability_fn = hsa_ext_image_get_capability;

  image_api->hsa_ext_image_data_get_info_fn = hsa_ext_image_data_get_info;

  image_api->hsa_ext_image_create_fn = hsa_ext_image_create;

  image_api->hsa_ext_image_import_fn = hsa_ext_image_import;

  image_api->hsa_ext_image_export_fn = hsa_ext_image_export;

  image_api->hsa_ext_image_copy_fn = hsa_ext_image_copy;

  image_api->hsa_ext_image_clear_fn = hsa_ext_image_clear;

  image_api->hsa_ext_image_destroy_fn = hsa_ext_image_destroy;

  image_api->hsa_ext_sampler_create_fn = hsa_ext_sampler_create;

  image_api->hsa_ext_sampler_destroy_fn = hsa_ext_sampler_destroy;

  image_api->hsa_ext_image_get_capability_with_layout_fn = hsa_ext_image_get_capability_with_layout;

  image_api->hsa_ext_image_data_get_info_with_layout_fn = hsa_ext_image_data_get_info_with_layout;

  image_api->hsa_ext_image_create_with_layout_fn = hsa_ext_image_create_with_layout;

  image_api->hsa_amd_image_get_info_max_dim_fn = hsa_amd_image_get_info_max_dim;

  image_api->hsa_ext_sampler_create_v2_fn = hsa_ext_sampler_create_v2;

  *interface_api = hsa_amd_image_create;
}

void ReleaseImageRsrcs() { ImageRuntime::DestroySingleton(); }

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_lut.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_EXT_IMAGE_IMAGE_LUT_H
#define AMD_HSA_EXT_IMAGE_IMAGE_LUT_H

#include <stdint.h>

#include "inc/hsa_ext_image.h"
#include "resource.h"
#include "util.h"

namespace rocr {
namespace image {

class ImageLut {
 public:
  ImageLut() {}

  virtual ~ImageLut() {}

  virtual uint32_t MapGeometry(hsa_ext_image_geometry_t geometry) const = 0;

  virtual ImageProperty MapFormat(const hsa_ext_image_format_t& format,
                                  hsa_ext_image_geometry_t geometry) const = 0;

  virtual Swizzle MapSwizzle(hsa_ext_image_channel_order32_t order) const = 0;

  virtual uint32_t GetMaxWidth(hsa_ext_image_geometry_t geometry) const = 0;

  virtual uint32_t GetMaxHeight(hsa_ext_image_geometry_t geometry) const = 0;

  virtual uint32_t GetMaxDepth(hsa_ext_image_geometry_t geometry) const = 0;

  virtual uint32_t GetMaxArraySize(hsa_ext_image_geometry_t geometry) const = 0;

 private:
   DISALLOW_COPY_AND_ASSIGN(ImageLut);
};

}  // namespace image
}  // namespace rocr
#endif  // AMD_HSA_EXT_IMAGE_IMAGE_LUT_H


================================================
FILE: runtime/hsa-runtime/image/image_lut_gfx11.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "image_lut_gfx11.h"
#include "resource_gfx11.h"

namespace rocr {
namespace image {

  /* 
   * The type table has changed for gfx11, so we need a separate instance for
   * the Property LUT
   */
  const ImageProperty ImageLutGfx11::kPropLutGfx11_[ORDER_COUNT][TYPE_COUNT] = {
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_A
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 1, FMT_8, TYPE_SINT},
     {RW, 2, FMT_16, TYPE_SINT},
     {RW, 4, FMT_32, TYPE_SINT},
     {RW, 1, FMT_8, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_UINT},
     {RW, 4, FMT_32, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_R
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 1, FMT_8, TYPE_SINT},
     {RW, 2, FMT_16, TYPE_SINT},
     {RW, 4, FMT_32, TYPE_SINT},
     {RW, 1, FMT_8, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_UINT},
     {RW, 4, FMT_32, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_RG
     {RW, 2, FMT_8_8, TYPE_SNORM},
     {RW, 4, FMT_16_16, TYPE_SNORM},
     {RW, 2, FMT_8_8, TYPE_UNORM},
     {RW, 4, FMT_16_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_8_8, TYPE_SINT},
     {RW, 4, FMT_16_16, TYPE_SINT},
     {RW, 8, FMT_32_32, TYPE_SINT},
     {RW, 2, FMT_8_8, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_UINT},
     {RW, 8, FMT_32_32, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_FLOAT},
     {RW, 8, FMT_32_32, TYPE_FLOAT}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_RA
     {RW, 2, FMT_8_8, TYPE_SNORM},
     {RW, 4, FMT_16_16, TYPE_SNORM},
     {RW, 2, FMT_8_8, TYPE_UNORM},
     {RW, 4, FMT_16_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_8_8, TYPE_SINT},
     {RW, 4, FMT_16_16, TYPE_SINT},
     {RW, 8, FMT_32_32, TYPE_SINT},
     {RW, 2, FMT_8_8, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_UINT},
     {RW, 8, FMT_32_32, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_FLOAT},
     {RW, 8, FMT_32_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_RGB
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_1_5_5_5, TYPE_UNORM},
     {RW, 2, FMT_5_6_5, TYPE_UNORM},
     {RW, 4, FMT_2_10_10_10, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA
     {RW, 4, FMT_8_8_8_8, TYPE_SNORM},
     {RW, 8, FMT_16_16_16_16, TYPE_SNORM},
     {RW, 4, FMT_8_8_8_8, TYPE_UNORM},
     {RW, 8, FMT_16_16_16_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_SINT},
     {RW, 8, FMT_16_16_16_16, TYPE_SINT},
     {RW, 16, FMT_32_32_32_32, TYPE_SINT},
     {RW, 4, FMT_8_8_8_8, TYPE_UINT},
     {RW, 8, FMT_16_16_16_16, TYPE_UINT},
     {RW, 16, FMT_32_32_32_32, TYPE_UINT},
     {RW, 8, FMT_16_16_16_16, TYPE_FLOAT},
     {RW, 16, FMT_32_32_32_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA
     {RW, 4, FMT_8_8_8_8, TYPE_SNORM},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_SINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB
     {RW, 4, FMT_8_8_8_8, TYPE_SNORM},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_SINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RO, 4, FMT_8_8_8_8, TYPE_SRGB},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {ROWO, 2, FMT_16, TYPE_UNORM},
     // TODO: 24 bit
     {0, 3, FMT_32, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {ROWO, 4, FMT_32, TYPE_FLOAT}},
    {}  // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
};

ImageProperty ImageLutGfx11::MapFormat(const hsa_ext_image_format_t& format,
                                    hsa_ext_image_geometry_t geometry) const {
  switch (geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_3D:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
      return kPropLutGfx11_[format.channel_order][format.channel_type];
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
      switch (format.channel_order) {
        // Hardware does not support buffer access to srgb image.
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
          break;
        default:
          switch (format.channel_type) {
            // Hardware does not support buffer access to 555/565 packed image.
            case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555:
            case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565:
              break;
            default:
              return kPropLutGfx11_[format.channel_order][format.channel_type];
          }
      }
      break;
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
      switch (format.channel_order) {
        case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
          return kPropLutGfx11_[format.channel_order][format.channel_type];
        default:
          break;
      }
      break;
    default:
      assert(false && "Should not reach here");
      break;
  }

  ImageProperty prop = {0};
  return prop;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_lut_gfx11.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_EXT_IMAGE_IMAGE_LUT_GFX11_H
#define AMD_HSA_EXT_IMAGE_IMAGE_LUT_GFX11_H

#include "image_lut.h"
#include "image_lut_kv.h"

namespace rocr {
namespace image {

class ImageLutGfx11 : public ImageLutKv {
 public:
  ImageLutGfx11() { }

  ImageProperty MapFormat(const hsa_ext_image_format_t& format,
                                  hsa_ext_image_geometry_t geometry) const;

 private:
  // Lookup table of channel format property. Based on HSA Programmer's
  // Reference Manual 1.0P Table 9-4 Channel Order, Channel type and Image
  // Geometry Combinations.
  static const ImageProperty kPropLutGfx11_[ORDER_COUNT][TYPE_COUNT];

  DISALLOW_COPY_AND_ASSIGN(ImageLutGfx11);
};

}  // namespace image
}  // namespace rocr
#endif  // AMD_HSA_EXT_IMAGE_IMAGE_LUT_GFX11_H


================================================
FILE: runtime/hsa-runtime/image/image_lut_kv.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "image_lut_kv.h"
#include "resource_kv.h"

namespace rocr {
namespace image {

const uint32_t ImageLutKv::kGeometryLut_[GEOMETRY_COUNT] = {
    SQ_RSRC_IMG_1D,        // HSA_EXT_IMAGE_GEOMETRY_1D
    SQ_RSRC_IMG_2D,        // HSA_EXT_IMAGE_GEOMETRY_2D
    SQ_RSRC_IMG_3D,        // HSA_EXT_IMAGE_GEOMETRY_3D
    SQ_RSRC_IMG_1D_ARRAY,  // HSA_EXT_IMAGE_GEOMETRY_1DA
    SQ_RSRC_IMG_2D_ARRAY,  // HSA_EXT_IMAGE_GEOMETRY_2DA
    0,                     // HSA_EXT_IMAGE_GEOMETRY_1DB
    SQ_RSRC_IMG_2D,        // HSA_EXT_IMAGE_GEOMETRY_2DDEPTH
    SQ_RSRC_IMG_2D_ARRAY   // HSA_EXT_IMAGE_GEOMETRY_2DADEPTH
};

const ImageProperty ImageLutKv::kPropLut_[ORDER_COUNT][TYPE_COUNT] = {
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_A
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 1, FMT_8, TYPE_SINT},
     {RW, 2, FMT_16, TYPE_SINT},
     {RW, 4, FMT_32, TYPE_SINT},
     {RW, 1, FMT_8, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_UINT},
     {RW, 4, FMT_32, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_R
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 1, FMT_8, TYPE_SINT},
     {RW, 2, FMT_16, TYPE_SINT},
     {RW, 4, FMT_32, TYPE_SINT},
     {RW, 1, FMT_8, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_UINT},
     {RW, 4, FMT_32, TYPE_UINT},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_RG
     {RW, 2, FMT_8_8, TYPE_SNORM},
     {RW, 4, FMT_16_16, TYPE_SNORM},
     {RW, 2, FMT_8_8, TYPE_UNORM},
     {RW, 4, FMT_16_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_8_8, TYPE_SINT},
     {RW, 4, FMT_16_16, TYPE_SINT},
     {RW, 8, FMT_32_32, TYPE_SINT},
     {RW, 2, FMT_8_8, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_UINT},
     {RW, 8, FMT_32_32, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_FLOAT},
     {RW, 8, FMT_32_32, TYPE_FLOAT}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_RA
     {RW, 2, FMT_8_8, TYPE_SNORM},
     {RW, 4, FMT_16_16, TYPE_SNORM},
     {RW, 2, FMT_8_8, TYPE_UNORM},
     {RW, 4, FMT_16_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_8_8, TYPE_SINT},
     {RW, 4, FMT_16_16, TYPE_SINT},
     {RW, 8, FMT_32_32, TYPE_SINT},
     {RW, 2, FMT_8_8, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_UINT},
     {RW, 8, FMT_32_32, TYPE_UINT},
     {RW, 4, FMT_16_16, TYPE_FLOAT},
     {RW, 8, FMT_32_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_RGB
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_1_5_5_5, TYPE_UNORM},
     {RW, 2, FMT_5_6_5, TYPE_UNORM},
     {RW, 4, FMT_2_10_10_10, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA
     {RW, 4, FMT_8_8_8_8, TYPE_SNORM},
     {RW, 8, FMT_16_16_16_16, TYPE_SNORM},
     {RW, 4, FMT_8_8_8_8, TYPE_UNORM},
     {RW, 8, FMT_16_16_16_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_SINT},
     {RW, 8, FMT_16_16_16_16, TYPE_SINT},
     {RW, 16, FMT_32_32_32_32, TYPE_SINT},
     {RW, 4, FMT_8_8_8_8, TYPE_UINT},
     {RW, 8, FMT_16_16_16_16, TYPE_UINT},
     {RW, 16, FMT_32_32_32_32, TYPE_UINT},
     {RW, 8, FMT_16_16_16_16, TYPE_FLOAT},
     {RW, 16, FMT_32_32_32_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA
     {RW, 4, FMT_8_8_8_8, TYPE_SNORM},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_SINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB
     {RW, 4, FMT_8_8_8_8, TYPE_SNORM},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_SINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 4, FMT_8_8_8_8, TYPE_UINT},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RO, 4, FMT_8_8_8_8, TYPE_SRGB},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0}},
    {},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA
    {     // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE
     {RW, 1, FMT_8, TYPE_SNORM},
     {RW, 2, FMT_16, TYPE_SNORM},
     {RW, 1, FMT_8, TYPE_UNORM},
     {RW, 2, FMT_16, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {RW, 2, FMT_16, TYPE_FLOAT},
     {RW, 4, FMT_32, TYPE_FLOAT}},
    {// HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {ROWO, 2, FMT_16, TYPE_UNORM},
     // TODO: 24 bit
     {0, 3, FMT_32, TYPE_UNORM},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {0, 0, 0, 0},
     {ROWO, 4, FMT_32, TYPE_FLOAT}},
    {}  // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
};

const Swizzle ImageLutKv::kSwizzleLut_[ORDER_COUNT] = {
    {SEL_0, SEL_0, SEL_0, SEL_X},  // HSA_EXT_IMAGE_CHANNEL_ORDER_A
    {SEL_X, SEL_0, SEL_0, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_R
    {SEL_X, SEL_0, SEL_0, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RX
    {SEL_X, SEL_Y, SEL_0, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RG
    {SEL_X, SEL_Y, SEL_0, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX
    {SEL_X, SEL_0, SEL_0, SEL_Y},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RA
    {SEL_Z, SEL_Y, SEL_X, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGB
    {SEL_Z, SEL_Y, SEL_X, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX
    {SEL_X, SEL_Y, SEL_Z, SEL_W},  // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA
    {SEL_Z, SEL_Y, SEL_X, SEL_W},  // HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA
    {SEL_Y, SEL_Z, SEL_W, SEL_X},  // HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB
    {SEL_Y, SEL_X, SEL_W, SEL_Z},  // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR
    {SEL_X, SEL_Y, SEL_Z, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB
    {SEL_X, SEL_Y, SEL_Z, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX
    {SEL_X, SEL_Y, SEL_Z, SEL_W},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA
    {SEL_Z, SEL_Y, SEL_X, SEL_W},  // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA
    {SEL_X, SEL_X, SEL_X, SEL_X},  // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY
    {SEL_X, SEL_X, SEL_X, SEL_1},  // HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE
    {SEL_X, SEL_0, SEL_0, SEL_0},  // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH
    {SEL_Y, SEL_0, SEL_0, SEL_0}   // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
};

const uint32_t ImageLutKv::kMaxDimensionLut_[GEOMETRY_COUNT][4] = {
    {16384, 1, 1, 1},         // HSA_EXT_IMAGE_GEOMETRY_1D
    {16384, 16384, 1, 1},     // HSA_EXT_IMAGE_GEOMETRY_2D
    {16384, 16384, 8192, 1},  // HSA_EXT_IMAGE_GEOMETRY_3D
    {16384, 1, 1, 8192},      // HSA_EXT_IMAGE_GEOMETRY_1DA
    {16384, 16384, 1, 8192},  // HSA_EXT_IMAGE_GEOMETRY_2DA
    {4294967295, 1, 1, 1},    // HSA_EXT_IMAGE_GEOMETRY_1DB
    {16384, 16384, 1, 1},     // HSA_EXT_IMAGE_GEOMETRY_2DDEPTH
    {16384, 16384, 1, 8192}   // HSA_EXT_IMAGE_GEOMETRY_2DADEPTH
};

uint32_t ImageLutKv::MapGeometry(hsa_ext_image_geometry_t geometry) const {
  switch (geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_3D:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
      return kGeometryLut_[geometry];
    default:
      assert(false && "Should not reach here");
      return static_cast<uint32_t>(-1);
  };
}

ImageProperty ImageLutKv::MapFormat(const hsa_ext_image_format_t& format,
                                    hsa_ext_image_geometry_t geometry) const {
  switch (geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_3D:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
      return kPropLut_[format.channel_order][format.channel_type];
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
      switch (format.channel_order) {
        // Hardware does not support buffer access to srgb image.
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
          break;
        default:
          switch (format.channel_type) {
            // Hardware does not support buffer access to 555/565 packed image.
            case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555:
            case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565:
              break;
            default:
              return kPropLut_[format.channel_order][format.channel_type];
          }
      }
      break;
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
      switch (format.channel_order) {
        case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
        case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
          return kPropLut_[format.channel_order][format.channel_type];
        default:
          break;
      }
      break;
    default:
      assert(false && "Should not reach here");
      break;
  }

  ImageProperty prop = {0};
  return prop;
}

Swizzle ImageLutKv::MapSwizzle(hsa_ext_image_channel_order32_t order) const {
  const Swizzle invalid_swizzle = {0xff, 0xff, 0xff, 0xff};
  switch (order) {
    case HSA_EXT_IMAGE_CHANNEL_ORDER_A:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_R:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RG:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
      return kSwizzleLut_[order];
    default:
      assert(false && "Should not reach here");
      return invalid_swizzle;
  };
}

uint32_t ImageLutKv::GetMaxWidth(hsa_ext_image_geometry_t geometry) const {
  return kMaxDimensionLut_[geometry][0];
}

uint32_t ImageLutKv::GetMaxHeight(hsa_ext_image_geometry_t geometry) const {
  return kMaxDimensionLut_[geometry][1];
}

uint32_t ImageLutKv::GetMaxDepth(hsa_ext_image_geometry_t geometry) const {
  return kMaxDimensionLut_[geometry][2];
}

uint32_t ImageLutKv::GetMaxArraySize(hsa_ext_image_geometry_t geometry) const {
  return kMaxDimensionLut_[geometry][3];
}

uint32_t ImageLutKv::GetPixelSize(uint8_t data_format, uint8_t data_type) const {
  //Currently only supports formats that ROCr can create.
  switch(data_format) {
    case FMT_1_5_5_5: return 2;
    case FMT_16: return 2;
    case FMT_16_16: return 4;
    case FMT_16_16_16_16: return 8;
    case FMT_2_10_10_10: return 4;
    //SPK: Where is unorm returning 3?  Was this a Hawaii specific thing?
    case FMT_32: return (data_type==TYPE_UNORM) ? 3 : 4;
    case FMT_32_32: return 8;
    case FMT_32_32_32_32: return 16;
    case FMT_5_6_5: return 2;
    case FMT_8: return 1;
    case FMT_8_8: return 2;
    case FMT_8_8_8_8: return 4;
    default: return 0;
  }
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_lut_kv.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_EXT_IMAGE_IMAGE_LUT_KV_H
#define AMD_HSA_EXT_IMAGE_IMAGE_LUT_KV_H

#include "image_lut.h"

namespace rocr {
namespace image {

class ImageLutKv : public ImageLut {
 public:
  ImageLutKv() {}

  virtual ~ImageLutKv() {}

  virtual uint32_t MapGeometry(hsa_ext_image_geometry_t geometry) const;

  virtual ImageProperty MapFormat(const hsa_ext_image_format_t& format,
                                  hsa_ext_image_geometry_t geometry) const;

  virtual Swizzle MapSwizzle(hsa_ext_image_channel_order32_t order) const;

  virtual uint32_t GetMaxWidth(hsa_ext_image_geometry_t geometry) const;

  virtual uint32_t GetMaxHeight(hsa_ext_image_geometry_t geometry) const;

  virtual uint32_t GetMaxDepth(hsa_ext_image_geometry_t geometry) const;

  virtual uint32_t GetMaxArraySize(hsa_ext_image_geometry_t geometry) const;

  uint32_t GetPixelSize(uint8_t data_format, uint8_t data_type) const;

 private:
  // Lookup table of image geometry to device geometry enum.
  static const uint32_t kGeometryLut_[GEOMETRY_COUNT];

  // Lookup table of channel format property. Based on HSA Programmer's
  // Reference Manual 1.0P Table 9-4 Channel Order, Channel type and Image
  // Geometry Combinations.
  static const ImageProperty kPropLut_[ORDER_COUNT][TYPE_COUNT];

  // Lookup table of channel order swizzle.
  static const Swizzle kSwizzleLut_[ORDER_COUNT];

  // Lookup table of image geometry to max dimension.
  // Each record contains four values: widht, height, depth, array_size.
  static const uint32_t kMaxDimensionLut_[GEOMETRY_COUNT][4];

  DISALLOW_COPY_AND_ASSIGN(ImageLutKv);
};

}  // namespace image
}  // namespace rocr
#endif  // AMD_HSA_EXT_IMAGE_IMAGE_LUT_KV_H


================================================
FILE: runtime/hsa-runtime/image/image_manager.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ext_image.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "image_manager.h"
#include "image_runtime.h"

#include <assert.h>

#include <algorithm>
#include <climits>
#include <cmath>

#if (defined(WIN32) || defined(_WIN32))
#define NOMINMAX
__inline long int lrintf(float f) { return _mm_cvtss_si32(_mm_load_ss(&f)); }
#endif

namespace rocr {
namespace image {

Image* Image::Create(hsa_agent_t agent) {
  hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();

  Image* image = NULL;

  hsa_status_t status =
      AMD::hsa_amd_memory_pool_allocate(pool, sizeof(Image), 0, reinterpret_cast<void**>(&image));
  assert(status == HSA_STATUS_SUCCESS);

  if (status != HSA_STATUS_SUCCESS) return NULL;

  new (image) Image();

  status = AMD::hsa_amd_agents_allow_access(1, &agent, NULL, image);

  if (status != HSA_STATUS_SUCCESS) {
    Image::Destroy(image);
    return NULL;
  }

  return image;
}

void Image::Destroy(const Image* image) {
  assert(image != NULL);
  image->~Image();

  hsa_status_t status = AMD::hsa_amd_memory_pool_free(const_cast<Image*>(image));

  assert(status == HSA_STATUS_SUCCESS);
}

Sampler* Sampler::Create(hsa_agent_t agent) {
  hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();

  Sampler* sampler = NULL;

  hsa_status_t status = AMD::hsa_amd_memory_pool_allocate(pool, sizeof(Sampler), 0,
                                                          reinterpret_cast<void**>(&sampler));

  if (status != HSA_STATUS_SUCCESS) return NULL;

  new (sampler) Sampler();

  status = AMD::hsa_amd_agents_allow_access(1, &agent, NULL, sampler);

  if (status != HSA_STATUS_SUCCESS) {
    Sampler::Destroy(sampler);
    return NULL;
  }

  return sampler;
}

void Sampler::Destroy(const Sampler* sampler) {
  assert(sampler != NULL);
  sampler->~Sampler();

  hsa_status_t status = AMD::hsa_amd_memory_pool_free(const_cast<Sampler*>(sampler));

  assert(status == HSA_STATUS_SUCCESS);
}

ImageManager::ImageManager() {}

ImageManager::~ImageManager() {}

hsa_status_t ImageManager::CopyBufferToImage(
    const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
    const Image& dst_image, const hsa_ext_image_region_t& image_region) {
  Image* src_image = Image::Create(dst_image.component);

  src_image->component = dst_image.component;
  src_image->desc = dst_image.desc;
  src_image->data = const_cast<void*>(src_memory);
  src_image->permission = HSA_ACCESS_PERMISSION_RO;
  src_image->row_pitch = src_row_pitch;
  src_image->slice_pitch = src_slice_pitch;

  const hsa_dim3_t dst_origin = image_region.offset;
  const hsa_dim3_t src_origin = {0};
  const hsa_dim3_t copy_size = image_region.range;

  hsa_status_t status = ImageManager::CopyImage(
      dst_image, *src_image, dst_origin, src_origin, copy_size);

  Image::Destroy(src_image);

  return status;
}

hsa_status_t ImageManager::CopyImageToBuffer(
    const Image& src_image, void* dst_memory, size_t dst_row_pitch,
    size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region) {
  // Treat buffer as image since we don't tile our image anyway.
  Image* dst_image = Image::Create(src_image.component);

  dst_image->component = src_image.component;
  dst_image->desc = src_image.desc;  // the width, height, depth is ignored.
  dst_image->data = dst_memory;
  dst_image->permission = HSA_ACCESS_PERMISSION_WO;
  dst_image->row_pitch = dst_row_pitch;
  dst_image->slice_pitch = dst_slice_pitch;

  const hsa_dim3_t dst_origin = {0};
  const hsa_dim3_t src_origin = image_region.offset;
  const hsa_dim3_t copy_size = image_region.range;

  hsa_status_t status = ImageManager::CopyImage(
      *dst_image, src_image, dst_origin, src_origin, copy_size);

  Image::Destroy(dst_image);

  return status;
}

hsa_status_t ImageManager::CopyImage(const Image& dst_image,
                                     const Image& src_image,
                                     const hsa_dim3_t& dst_origin,
                                     const hsa_dim3_t& src_origin,
                                     const hsa_dim3_t size) {
  ImageProperty dst_image_prop = GetImageProperty(
      dst_image.component, dst_image.desc.format, dst_image.desc.geometry);
  assert(dst_image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);

  const size_t dst_element_size = dst_image_prop.element_size;
  assert(dst_element_size != 0);

  ImageProperty src_image_prop = GetImageProperty(
      src_image.component, src_image.desc.format, src_image.desc.geometry);
  assert(src_image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);

  const size_t src_element_size = src_image_prop.element_size;
  assert(src_element_size != 0);

  const hsa_ext_image_format_t src_format = src_image.desc.format;
  const hsa_ext_image_channel_order32_t src_order = src_format.channel_order;
  const hsa_ext_image_channel_type32_t src_type = src_format.channel_type;

  const hsa_ext_image_format_t dst_format = dst_image.desc.format;
  const hsa_ext_image_channel_order32_t dst_order = dst_format.channel_order;
  const hsa_ext_image_channel_type32_t dst_type = dst_format.channel_type;

  bool linear_to_standard_rgb = false;
  bool standard_to_linear_rgb = false;

  if ((src_order != dst_order) || (src_type != dst_type)) {
    // Source and destination format must be the same, except for
    // SRGBA <--> RGBA images.
    if ((src_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8) &&
        (dst_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8)) {
      if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) &&
          (dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA)) {
        standard_to_linear_rgb = true;
      } else if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA) &&
                 (dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA)) {
        linear_to_standard_rgb = true;
      } else {
        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      }
    } else {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }
  }

  // Source and destination format should be the same so the element size
  // should be same too.
  const size_t element_size = src_element_size;

  // row_pitch and slice_pitch in bytes.
  const size_t dst_row_pitch =
      std::max(dst_image.row_pitch, size.x * element_size);
  const size_t dst_slice_pitch = std::max(
      dst_image.slice_pitch,
      dst_row_pitch *
          (dst_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ? 1 : size.y));

  const size_t src_row_pitch =
      std::max(src_image.row_pitch, size.x * element_size);
  const size_t src_slice_pitch = std::max(
      src_image.slice_pitch,
      src_row_pitch *
          (src_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ? 1 : size.y));

  size_t src_offset = src_origin.x;
  size_t dst_offset = dst_origin.x;
  size_t copy_size = size.x;

  // Calculate source the offset in bytes.
  src_offset *= element_size;
  src_offset += src_row_pitch * src_origin.y;
  src_offset += src_slice_pitch * src_origin.z;

  // Calculate destination the offset in bytes.
  dst_offset *= element_size;
  dst_offset += dst_row_pitch * dst_origin.y;
  dst_offset += dst_slice_pitch * dst_origin.z;

  copy_size *= element_size;

  // Get destination and source memory.
  unsigned char* dst = static_cast<unsigned char*>(dst_image.data);
  const unsigned char* src = static_cast<const unsigned char*>(src_image.data);

  if (!linear_to_standard_rgb && !standard_to_linear_rgb) {
    // Copy the memory by row.
    for (size_t slice = 0; slice < size.z; ++slice) {
      size_t src_offset_temp = src_offset + slice * src_slice_pitch;
      size_t dst_offset_temp = dst_offset + slice * dst_slice_pitch;

      for (size_t rows = 0; rows < size.y; ++rows) {
        std::memcpy((dst + dst_offset_temp), (src + src_offset_temp),
                    copy_size);
        src_offset_temp += src_row_pitch;
        dst_offset_temp += dst_row_pitch;
      }
    }
  } else {
    // Copy per pixel between RGBA-SRGBA images.
    for (size_t slice = 0; slice < size.z; ++slice) {
      size_t src_offset_temp = src_offset + slice * src_slice_pitch;
      size_t dst_offset_temp = dst_offset + slice * dst_slice_pitch;

      for (size_t rows = 0; rows < size.y; ++rows) {
        const uint8_t* src_pixel = src + src_offset_temp;
        uint8_t* dst_pixel = dst + dst_offset_temp;

        if (linear_to_standard_rgb) {
          for (size_t cols = 0; cols < size.x; ++cols) {
            dst_pixel[0] =
                Denormalize(LinearToStandardRGB(Normalize(src_pixel[0])));  // R
            dst_pixel[1] =
                Denormalize(LinearToStandardRGB(Normalize(src_pixel[1])));  // G
            dst_pixel[2] =
                Denormalize(LinearToStandardRGB(Normalize(src_pixel[2])));  // B
            dst_pixel[3] = src_pixel[3];                                    // A

            src_pixel += element_size;
            dst_pixel += element_size;
          }
        } else {
          assert(standard_to_linear_rgb);
          for (size_t cols = 0; cols < size.x; ++cols) {
            dst_pixel[0] =
                Denormalize(StandardToLinearRGB(Normalize(src_pixel[0])));  // R
            dst_pixel[1] =
                Denormalize(StandardToLinearRGB(Normalize(src_pixel[1])));  // G
            dst_pixel[2] =
                Denormalize(StandardToLinearRGB(Normalize(src_pixel[2])));  // B
            dst_pixel[3] = src_pixel[3];                                    // A

            src_pixel += element_size;
            dst_pixel += element_size;
          }
        }

        src_offset_temp += src_row_pitch;
        dst_offset_temp += dst_row_pitch;
      }
    }
  }

  return HSA_STATUS_SUCCESS;
}

uint16_t ImageManager::FloatToHalf(float in) {
  volatile union {
    float f;
    uint32_t u;
  } fu;

  fu.f = in;

  const uint16_t sign_bit_16 = (fu.u >> 16) & 0x8000;

  const uint32_t exp_32 = (fu.u >> 23) & 0xff;

  const uint32_t mantissa_32 = (fu.u) & 0x7fffff;

  if (exp_32 == 0 && mantissa_32 == 0) {
    // Zero.
    return sign_bit_16;
  } else if (exp_32 == 0xff) {
    if (mantissa_32 == 0) {
      // Inf.
      return (sign_bit_16 | 0x7c00);
    } else if ((mantissa_32 & 0x400000)) {
      // Quiet NaN.
      return (sign_bit_16 | 0x7e00);
    } else {
      // Signal NaN.
      return (sign_bit_16 | 0x7c01);
    }
  } else {
    const uint32_t kMaxExpNormal = 0x477fe000 >> 23;     // 65504.
    const uint32_t kMinExpNormal = 0x38800000 >> 23;     // 2^-14;
    const uint32_t kMinExpSubnormal = 0x33800000 >> 23;  // 2^-24.
    if (exp_32 > kMaxExpNormal) {
      // Half overflow.
      // TODO: clamp it to max half float or +Inf.
      return (sign_bit_16 | 0x7bff);
    } else if (exp_32 < kMinExpSubnormal) {
      // Half underflow.
      return (sign_bit_16);
    } else if (exp_32 < kMinExpNormal) {
      // Half subnormal.
      return (sign_bit_16 |
              ((0x0400 | (mantissa_32 >> 13)) >> (127 - exp_32 - 14)));
    } else {
      // Half normal.
      return (sign_bit_16 |
              (((exp_32 - 127 + 15) << 10) | (mantissa_32 >> 13)));
    }
  }
}

float ImageManager::Normalize(uint8_t u_val) {
  if (u_val == 0) {
    return 0.0f;
  } else if (u_val == UINT8_MAX) {
    return 1.0f;
  } else {
    return std::min(
        std::max(static_cast<float>(u_val) / static_cast<float>(UINT8_MAX),
                 0.0f),
        1.0f);
  }
}

uint8_t ImageManager::Denormalize(float f_val) {
  const unsigned long kScale = UINT8_MAX;
  return std::min(
      static_cast<unsigned long>(std::max(lrintf(kScale * f_val), 0l)), kScale);
}

float ImageManager::StandardToLinearRGB(float s_val) {
  // Map SRGB value to RGB color space based on HSA Programmers Reference
  // Manual version 1.0 Provisional, chapter 7.1.4.1.2  Standard RGB (s-Form).
  double l_val = (double)s_val;

  l_val = (l_val <= 0.04045f) ? (l_val / 12.92f)
                              : pow(((l_val + 0.055f) / 1.055f), 2.4f);

  return l_val;
}

float ImageManager::LinearToStandardRGB(float l_val) {
  // Map RGB value to SRGB color space based on HSA Programmers Reference
  // Manual version 1.0 Provisional, chapter 7.1.4.1.2  Standard RGB (s-Form).
  double s_val = (double)l_val;

#if (defined(WIN32) || defined(_WIN32))
  if (_isnan(s_val)) s_val = 0.0;
#else
  if (std::isnan(s_val)) s_val = 0.0;
#endif

  if (s_val > 1.0) {
    s_val = 1.0;
  } else if (s_val < 0.0) {
    s_val = 0.0;
  } else if (s_val < 0.0031308) {
    s_val = 12.92 * s_val;
  } else {
    s_val = (1.055 * pow(s_val, 5.0 / 12.0)) - 0.055;
  }

  return s_val;
}

void ImageManager::FormatPattern(const hsa_ext_image_format_t& format,
                                 const void* pattern_in, void* pattern_out) {
  const int kR = 0;
  const int kG = 1;
  const int kB = 2;
  const int kA = 3;

  int index[4] = {0};
  int num_channel = 0;

  switch (format.channel_order) {
    case HSA_EXT_IMAGE_CHANNEL_ORDER_A:
      index[0] = kA;
      num_channel = 1;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_R:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RX:
      index[0] = kR;
      num_channel = 1;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RG:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGX:
      index[0] = kR;
      index[1] = kG;
      num_channel = 2;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RA:
      index[0] = kR;
      index[1] = kA;
      num_channel = 2;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
      index[0] = kR;
      index[1] = kG;
      index[2] = kB;
      num_channel = 3;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
      index[0] = kR;
      index[1] = kG;
      index[2] = kB;
      index[3] = kA;
      num_channel = 4;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
      index[0] = kB;
      index[1] = kG;
      index[2] = kR;
      index[3] = kA;
      num_channel = 4;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB:
      index[0] = kA;
      index[1] = kR;
      index[2] = kG;
      index[3] = kB;
      num_channel = 4;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR:
      index[0] = kA;
      index[1] = kB;
      index[2] = kG;
      index[3] = kR;
      num_channel = 4;
      break;
    case HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
      index[0] = kR;
      num_channel = 1;
      break;
    default:
      assert(false && "Should not reach here.");
      break;
  }

  const float* pattern_in_f = NULL;
  const int32_t* pattern_in_i32 = NULL;
  const uint32_t* pattern_in_ui32 = NULL;

  float new_pattern_in_f[4] = { 0 };
  if ((format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB) ||
      (format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX) ||
      (format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) ||
      (format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA)) {
    pattern_in_f = reinterpret_cast<const float*>(pattern_in);

    new_pattern_in_f[0] = LinearToStandardRGB(pattern_in_f[0]);
    new_pattern_in_f[1] = LinearToStandardRGB(pattern_in_f[1]);
    new_pattern_in_f[2] = LinearToStandardRGB(pattern_in_f[2]);
    new_pattern_in_f[3] = pattern_in_f[3];

    pattern_in_f = reinterpret_cast<const float*>(new_pattern_in_f);
  } else {
    pattern_in_f = reinterpret_cast<const float*>(pattern_in);
    pattern_in_i32 = reinterpret_cast<const int32_t*>(pattern_in);
    pattern_in_ui32 = reinterpret_cast<const uint32_t*>(pattern_in);
  }

  for (int c = 0; c < num_channel; ++c) {
    switch (format.channel_type) {
      case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8: {
        int8_t* pattern_out_i8 = reinterpret_cast<int8_t*>(pattern_out);
        const long kScale = INT8_MAX;
        const long conv = lrintf(kScale * pattern_in_f[index[c]]);
        pattern_out_i8[c] = std::min(std::max(conv, -kScale - 1l), kScale);
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16: {
        int16_t* pattern_out_i16 = reinterpret_cast<int16_t*>(pattern_out);
        const long kScale = INT16_MAX;
        const long conv = lrintf(kScale * pattern_in_f[index[c]]);
        pattern_out_i16[c] = std::min(std::max(conv, -kScale - 1l), kScale);
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8: {
        uint8_t* pattern_out_ui8 = reinterpret_cast<uint8_t*>(pattern_out);
        const unsigned long kScale = UINT8_MAX;
        const long conv = lrintf(kScale * pattern_in_f[index[c]]);
        pattern_out_ui8[c] =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16: {
        uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
        const unsigned long kScale = UINT16_MAX;
        const long conv = lrintf(kScale * pattern_in_f[index[c]]);
        pattern_out_ui16[c] =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24: {
        typedef struct Order24 { uint32_t r : 24; } Order24;

        Order24* pattern_out_u24 = reinterpret_cast<Order24*>(pattern_out);
        const unsigned long kScale = 0xffffff;
        const long conv = lrintf(kScale * pattern_in_f[index[c]]);
        pattern_out_u24[c].r =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: {
        typedef struct Order555 {
          uint32_t b : 5;
          uint32_t g : 5;
          uint32_t r : 5;
        } Order555;

        Order555* pattern_out_u555 = reinterpret_cast<Order555*>(pattern_out);
        const unsigned long kScale = 0x1f;
        long conv = lrintf(kScale * pattern_in_f[index[0]]);
        pattern_out_u555->r =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);

        conv = lrintf(kScale * pattern_in_f[index[1]]);
        pattern_out_u555->g =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);

        conv = lrintf(kScale * pattern_in_f[index[2]]);
        pattern_out_u555->b =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
        return;
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: {
        typedef struct Order565 {
          uint32_t b : 5;
          uint32_t g : 6;
          uint32_t r : 5;
        } Order565;

        Order565* pattern_out_u565 = reinterpret_cast<Order565*>(pattern_out);
        unsigned long scale = 0x1f;
        long conv = lrintf(scale * pattern_in_f[index[0]]);
        pattern_out_u565->r =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);

        scale = 0x3f;
        conv = lrintf(scale * pattern_in_f[index[1]]);
        pattern_out_u565->g =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);

        scale = 0x1f;
        conv = lrintf(scale * pattern_in_f[index[2]]);
        pattern_out_u565->b =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);
        return;
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010: {
        typedef struct Order101010 {
          uint32_t b : 10;
          uint32_t g : 10;
          uint32_t r : 10;
        } Order101010;

        Order101010* pattern_out_u101010 =
            reinterpret_cast<Order101010*>(pattern_out);
        const unsigned long kScale = 0x3ff;
        long conv = lrintf(kScale * pattern_in_f[index[0]]);
        pattern_out_u101010->r =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);

        conv = lrintf(kScale * pattern_in_f[index[1]]);
        pattern_out_u101010->g =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);

        conv = lrintf(kScale * pattern_in_f[index[2]]);
        pattern_out_u101010->b =
            std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);

        return;
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8: {
        int8_t* pattern_out_i8 = reinterpret_cast<int8_t*>(pattern_out);
        pattern_out_i8[c] = pattern_in_i32[index[c]];
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16: {
        int16_t* pattern_out_i16 = reinterpret_cast<int16_t*>(pattern_out);
        pattern_out_i16[c] = pattern_in_i32[index[c]];
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32: {
        int32_t* pattern_out_i32 = reinterpret_cast<int32_t*>(pattern_out);
        pattern_out_i32[c] = pattern_in_i32[index[c]];
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: {
        uint8_t* pattern_out_ui8 = reinterpret_cast<uint8_t*>(pattern_out);
        pattern_out_ui8[c] = pattern_in_ui32[index[c]];
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: {
        uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
        pattern_out_ui16[c] = pattern_in_ui32[index[c]];
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: {
        uint32_t* pattern_out_ui32 = reinterpret_cast<uint32_t*>(pattern_out);
        pattern_out_ui32[c] = pattern_in_ui32[index[c]];
      } break;
      case HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT: {
        // TODO: convert to f16
        uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
        pattern_out_ui16[c] = FloatToHalf(pattern_in_f[index[c]]);
        break;
      }
      case HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT: {
        float* pattern_out_f = reinterpret_cast<float*>(pattern_out);
        pattern_out_f[c] = pattern_in_f[index[c]];
      } break;
      default:
        assert(false && "Should not reach here.");
        break;
    }
  }
}

hsa_status_t ImageManager::FillImage(const Image& image, const void* pattern,
                                     const hsa_ext_image_region_t& region) {
  const hsa_dim3_t origin = region.offset;
  const hsa_dim3_t size = region.range;

  ImageProperty image_prop =
      GetImageProperty(image.component, image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);

  const size_t element_size = image_prop.element_size;
  assert(element_size != 0);

  const size_t row_pitch = image.row_pitch;
  const size_t slice_pitch = image.slice_pitch;

  // Map memory.
  unsigned char* fill_mem = static_cast<unsigned char*>(image.data);

  char fill_value[4 * sizeof(int)] = {0};
  FormatPattern(image.desc.format, pattern, fill_value);

  // Calculate offset.
  size_t offset = origin.x * element_size;
  offset += row_pitch * origin.y;
  offset += slice_pitch * origin.z;

  // Fill the image memory with the pattern.
  for (size_t slice = 0; slice < size.z; ++slice) {
    size_t offset_temp = offset + slice * slice_pitch;

    for (size_t rows = 0; rows < size.y; ++rows) {
      size_t pix_offset = offset_temp;

      // Copy pattern per pixel.
      for (size_t column = 0; column < size.x; ++column) {
        memcpy((fill_mem + pix_offset), fill_value, element_size);
        pix_offset += element_size;
      }

      offset_temp += row_pitch;
    }
  }

  return HSA_STATUS_SUCCESS;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_manager.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_EXT_IMAGE_IMAGE_MANAGER_H
#define AMD_HSA_EXT_IMAGE_IMAGE_MANAGER_H

#include <cstring>
#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"
#include "resource.h"
#include "util.h"

namespace rocr {
namespace image {

/// @brief Abstract class for creating AMD agent specific image / sampler
/// resources and data transfer.
class ImageManager {
 public:
  explicit ImageManager();
  virtual ~ImageManager();

  virtual hsa_status_t Initialize(hsa_agent_t agent_handle) = 0;

  virtual void Cleanup() = 0;

  /// @brief Retrieve device specific image property of a certain format
  /// and geometry.
  virtual ImageProperty GetImageProperty(
      hsa_agent_t component, const hsa_ext_image_format_t& format,
      hsa_ext_image_geometry_t geometry) const = 0;

  /// @brief Retrieve device specific supported max width, height, depth,
  /// and array size of an image geometry.
  virtual void GetImageInfoMaxDimension(hsa_agent_t component,
                                        hsa_ext_image_geometry_t geometry,
                                        uint32_t& width, uint32_t& height,
                                        uint32_t& depth,
                                        uint32_t& array_size) const = 0;

  /// @brief Calculate the size and alignment of the backing storage of an
  /// image.
  virtual hsa_status_t CalculateImageSizeAndAlignment(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
      hsa_ext_image_data_layout_t image_data_layout,
      size_t image_data_row_pitch,
      size_t image_data_slice_pitch,
      hsa_ext_image_data_info_t& image_info) const = 0;

  /// @brief Fill image structure with device specific image object.
  virtual hsa_status_t PopulateImageSrd(Image& image) const = 0;

  /// @brief Fill image structure with device specific image object using the given format.
  virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const = 0;

  /// @brief Modify device specific image object according to the specified
  /// new format.
  virtual hsa_status_t ModifyImageSrd(
      Image& image, hsa_ext_image_format_t& new_format) const = 0;

  /// @brief Fill sampler structure with device specific sampler object.
  virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const = 0;

  // @brief Copy the content of a linear memory to an image object.
  virtual hsa_status_t CopyBufferToImage(
      const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
      const Image& dst_image, const hsa_ext_image_region_t& image_region);

  /// @brief Copy the content of an image object to a linear memory.
  virtual hsa_status_t CopyImageToBuffer(
      const Image& src_image, void* dst_memory, size_t dst_row_pitch,
      size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region);

  /// @brief Transfer images backing storage.
  virtual hsa_status_t CopyImage(const Image& dst_image, const Image& src_image,
                                 const hsa_dim3_t& dst_origin,
                                 const hsa_dim3_t& src_origin,
                                 const hsa_dim3_t size);

  /// @brief Fill image backing storage using host copy.
  virtual hsa_status_t FillImage(const Image& image, const void* pattern,
                                 const hsa_ext_image_region_t& region);

 protected:
  static uint16_t FloatToHalf(float in);

  static inline float Normalize(uint8_t u_val);

  static inline uint8_t Denormalize(float f_val);

  static float StandardToLinearRGB(float s_val);

  static float LinearToStandardRGB(float l_val);

  static void FormatPattern(const hsa_ext_image_format_t& format,
                            const void* pattern_in, void* pattern_out);

  template <typename dstT, typename srcT>
  static inline hsa_status_t convertAddressMode(dstT &word,
                            const hsa_ext_sampler_addressing_mode32_t address_mode[3]) {
    srcT clamp[3];
    for (int i = 0; i < 3; i++) {
      switch (address_mode[i]) {
        case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
          clamp[i] = srcT::SQ_TEX_CLAMP_LAST_TEXEL;
          break;
        case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER:
          clamp[i] = srcT::SQ_TEX_CLAMP_BORDER;
          break;
        case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
          clamp[i] = srcT::SQ_TEX_MIRROR;
          break;
        case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED:
        case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT:
          clamp[i] = srcT::SQ_TEX_WRAP;
          break;
        default:
          return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      }
    }
    word.bits.CLAMP_X = static_cast<unsigned int>(clamp[0]);
    word.bits.CLAMP_Y = static_cast<unsigned int>(clamp[1]);
    word.bits.CLAMP_Z = static_cast<unsigned int>(clamp[2]);
    return HSA_STATUS_SUCCESS;
  }
 private:
  DISALLOW_COPY_AND_ASSIGN(ImageManager);
};

}  // namespace image
}  // namespace rocr
#endif  // AMD_HSA_EXT_IMAGE_IMAGE_MANAGER_H


================================================
FILE: runtime/hsa-runtime/image/image_manager_ai.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#define NOMINMAX
#include "image_manager_ai.h"

#include <assert.h>

#include <algorithm>
#include <climits>

#include "core/inc/runtime.h"
#include "hsakmt/hsakmt.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_ai.h"
#include "util.h"
#include "device_info.h"

namespace rocr {
namespace image {

ImageManagerAi::ImageManagerAi() : ImageManagerKv() {}

ImageManagerAi::~ImageManagerAi() {}

ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD3)

ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD3)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD4)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD5)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD6)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD7)

ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3)

hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t& image_info) const {
  ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
  hsa_profile_t profile;

  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
  if (status != HSA_STATUS_SUCCESS) return status;

  Image::TileMode tileMode = Image::TileMode::LINEAR;
  if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
    tileMode = (profile == HSA_PROFILE_BASE &&
                desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
      Image::TileMode::TILED : Image::TileMode::LINEAR;
  }
  if (GetAddrlibSurfaceInfoAi(component, desc, tileMode,
        image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
    return HSA_STATUS_ERROR;
  }

  size_t rowPitch   = (out.bpp >> 3) * out.pitch;
  size_t slicePitch = rowPitch * out.height;
  if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
      image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
      ((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
       (image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
    return static_cast<hsa_status_t>(HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
  }

  image_info.size = out.surfSize;
  assert(image_info.size != 0);
  image_info.alignment = out.baseAlign;
  assert(image_info.alignment != 0);

  return HSA_STATUS_SUCCESS;
}

bool ImageManagerAi::IsLocalMemory(const void* address) const {
  return true;
}

hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image, const metadata_amd_t* descriptor) const {
  metadata_amd_ai_t* desc = (metadata_amd_ai_t*)descriptor;
  const void* image_data_addr = image.data;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  if((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
     (image_prop.element_size == 0))
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

  const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);

  if (IsLocalMemory(image.data)) {
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
  }

  image.srd[0]=desc->word0.u32All;
  image.srd[1]=desc->word1.u32All;
  image.srd[2]=desc->word2.u32All;
  image.srd[3]=desc->word3.u32All;
  image.srd[4]=desc->word4.u32All;
  image.srd[5]=desc->word5.u32All;
  image.srd[6]=desc->word6.u32All;
  image.srd[7]=desc->word7.u32All;

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    sq_buf_rsrc_word0_u word0;
    sq_buf_rsrc_word1_u word1;
    sq_buf_rsrc_word3_u word3;

    word0.val = 0;
    word0.f.base_address = PtrLow32(image_data_addr);

    word1.val = image.srd[1];
    word1.f.base_address_hi = PtrHigh32(image_data_addr);
    word1.f.stride = image_prop.element_size;

    word3.val = image.srd[3];
    word3.f.dst_sel_x = swizzle.x;
    word3.f.dst_sel_y = swizzle.y;
    word3.f.dst_sel_z = swizzle.z;
    word3.f.dst_sel_w = swizzle.w;
    word3.f.num_format = image_prop.data_type;
    word3.f.data_format = image_prop.data_format;
    word3.f.index_stride = image_prop.element_size;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[3] = word3.val;
  } else {
    uint32_t hwPixelSize = ImageLut().GetPixelSize(desc->word1.bitfields.DATA_FORMAT,
                                                   desc->word1.bitfields.NUM_FORMAT);
    if(image_prop.element_size!=hwPixelSize)
      return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

    ((SQ_IMG_RSRC_WORD0*)(&image.srd[0]))->bits.BASE_ADDRESS = PtrLow40Shift8(image_data_addr);
    ((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(image_data_addr);
    ((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.DATA_FORMAT = image_prop.data_format;
    ((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.NUM_FORMAT = image_prop.data_type;
    ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_X = swizzle.x;
    ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_Y = swizzle.y;
    ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_Z = swizzle.z;
    ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_W = swizzle.w;
    if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
        image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
      ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.TYPE =
          ImageLut().MapGeometry(image.desc.geometry);
    }
    
    // Imported metadata holds the offset to metadata, add the image base address.
    uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD5*)(&image.srd[5]))->bits.META_DATA_ADDRESS_HI) << 40;
    meta |= uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS) << 8;
    meta += reinterpret_cast<uintptr_t>(image_data_addr);

    ((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS = PtrLow40Shift8((void*)meta);
    ((SQ_IMG_RSRC_WORD5*)(&image.srd[5]))->bits.META_DATA_ADDRESS_HI =
        PtrHigh64Shift40((void*)meta);
  }
  //Looks like this is only used for CPU copies.
  image.row_pitch = 0;//desc->word4.bits.pitch+1*desc->word3.bits.element_size;
  image.slice_pitch = 0;//desc->;

  //Used by HSAIL shader ABI
  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) {
    SEL r = (SEL)swizzle.x;
    SEL g = (SEL)swizzle.y;
    SEL b = (SEL)swizzle.z;
    SEL a = (SEL)swizzle.w;

    TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW;

    if (a == SEL_X)
    {
        // Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ
        //
        // For the pre-defined border color values (white, opaque black, transparent black), the only thing that
        // matters is that the alpha channel winds up in the correct place (because the RGB channels are all the same)
        // so either of these TEX_BC_Swizzle enumerations will work.  Not sure what happens with border color palettes.
        if (b == SEL_Y)
        {
            // ABGR
            bcSwizzle = TEX_BC_Swizzle_WZYX;
        }
        else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X))
        {
            //RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        }
        else
        {
            // ARGB
            bcSwizzle = TEX_BC_Swizzle_WXYZ;
        }
    }
    else if (r == SEL_X)
    {
        // Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ
        if (g == SEL_Y)
        {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        }
        else if((g == SEL_X) && (b == SEL_X) && (a == SEL_W))
        {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        }
        else
        {
            // RAGB
            bcSwizzle = TEX_BC_Swizzle_XWYZ;
        }
    }
    else if (g == SEL_X)
    {
        // GRAB, have to use TEX_BC_Swizzle_YXWZ
        bcSwizzle = TEX_BC_Swizzle_YXWZ;
    }
    else if (b == SEL_X)
    {
        // BGRA, have to use TEX_BC_Swizzle_ZYXW
        bcSwizzle = TEX_BC_Swizzle_ZYXW;
    }

    return bcSwizzle;
}


hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image) const {
  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  const void* image_data_addr = image.data;

  if (IsLocalMemory(image.data))
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    sq_buf_rsrc_word0_u word0;
    sq_buf_rsrc_word1_u word1;
    sq_buf_rsrc_word2_u word2;
    sq_buf_rsrc_word3_u word3;

    word0.val = 0;
    word0.f.base_address = PtrLow32(image_data_addr);

    word1.val = 0;
    word1.f.base_address_hi = PtrHigh32(image_data_addr);
    word1.f.stride = image_prop.element_size;
    word1.f.swizzle_enable = false;
    word1.f.cache_swizzle = false;

    word2.f.num_records = image.desc.width * image_prop.element_size;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.dst_sel_x = swizzle.x;
    word3.f.dst_sel_y = swizzle.y;
    word3.f.dst_sel_z = swizzle.z;
    word3.f.dst_sel_w = swizzle.w;
    word3.f.num_format = image_prop.data_type;
    word3.f.data_format = image_prop.data_format;
    word3.f.index_stride = image_prop.element_size;
    word3.f.type = ImageLut().MapGeometry(image.desc.geometry);

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;

    image.row_pitch = image.desc.width * image_prop.element_size;
    image.slice_pitch = image.row_pitch;
  } else {
    sq_img_rsrc_word0_u word0;
    sq_img_rsrc_word1_u word1;
    sq_img_rsrc_word2_u word2;
    sq_img_rsrc_word3_u word3;
    sq_img_rsrc_word4_u word4;
    sq_img_rsrc_word5_u word5;
    sq_img_rsrc_word6_u word6;
    sq_img_rsrc_word7_u word7;

    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};

    uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc, image.tile_mode,
          image.row_pitch, image.slice_pitch, out);
    if (swizzleMode == (uint32_t)(-1)) {
      return HSA_STATUS_ERROR;
    }

    assert((out.bpp / 8) == image_prop.element_size);

    const size_t row_pitch_size = out.pitch * image_prop.element_size;

    word0.f.base_address = PtrLow40Shift8(image_data_addr);

    word1.val = 0;
    word1.f.base_address_hi = PtrHigh64Shift40(image_data_addr);
    word1.f.min_lod = 0;
    word1.f.data_format = image_prop.data_format;
    word1.f.num_format = image_prop.data_type;

    word2.val = 0;
    word2.f.width = image.desc.width - 1;
    word2.f.height = image.desc.height - 1;
    word2.f.perf_mod = 0;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.dst_sel_x = swizzle.x;
    word3.f.dst_sel_y = swizzle.y;
    word3.f.dst_sel_z = swizzle.z;
    word3.f.dst_sel_w = swizzle.w;
    word3.f.sw_mode = swizzleMode;
    word3.f.type = ImageLut().MapGeometry(image.desc.geometry);

    const bool image_array =
        (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
    const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);

    word4.val = 0;
    word4.f.depth =
        (image_array)
            ? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
            : (image_3d) ? image.desc.depth - 1 : 0;
    word4.f.pitch = out.pitch - 1;
    word4.f.bc_swizzle = GetBcSwizzle(swizzle);

    word5.val = 0;
    word6.val = 0;
    word7.val = 0;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;
    image.srd[4] = word4.val;
    image.srd[5] = word5.val;
    image.srd[6] = word6.val;
    image.srd[7] = word7.val;

    image.row_pitch = row_pitch_size;
    image.slice_pitch = out.sliceSize;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerAi::ModifyImageSrd(
    Image& image, hsa_ext_image_format_t& new_format) const {
  image.desc.format = new_format;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_BUF_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
    word3->bits.NUM_FORMAT = image_prop.data_type;
    word3->bits.DATA_FORMAT = image_prop.data_format;
  } else {
    SQ_IMG_RSRC_WORD1* word1 =
        reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
    word1->bits.DATA_FORMAT = image_prop.data_format;
    word1->bits.NUM_FORMAT = image_prop.data_type;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_IMG_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerAi::PopulateSamplerSrd(Sampler& sampler) const {
  const hsa_ext_sampler_descriptor_v2_t &sampler_descriptor = sampler.desc;

  SQ_IMG_SAMP_WORD0 word0;
  SQ_IMG_SAMP_WORD1 word1;
  SQ_IMG_SAMP_WORD2 word2;
  SQ_IMG_SAMP_WORD3 word3;

  word0.u32All = 0;
  hsa_status_t status = convertAddressMode<SQ_IMG_SAMP_WORD0, SQ_TEX_CLAMP>
                                       (word0, sampler_descriptor.address_modes);
  if (status != HSA_STATUS_SUCCESS) return status;
  word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode ==
                                  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);

  word1.u32All = 0;
  word1.bits.MAX_LOD = 4095;

  word2.u32All = 0;
  switch (sampler_descriptor.filter_mode) {
    case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
      break;
    case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER;
  word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE;
  word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE;

  word3.u32All = 0;

  // TODO: check this bit with HSAIL spec.
  word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK;

  sampler.srd[0] = word0.u32All;
  sampler.srd[1] = word1.u32All;
  sampler.srd[2] = word2.u32All;
  sampler.srd[3] = word3.u32All;

  return HSA_STATUS_SUCCESS;
}

uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    Image::TileMode tileMode,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
  const ImageProperty image_prop =
      GetImageProperty(component, desc.format, desc.geometry);

  const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);

  const uint32_t width = static_cast<uint32_t>(desc.width);
  const uint32_t height = static_cast<uint32_t>(desc.height);
  static const size_t kMinNumSlice = 1;
  const uint32_t num_slice = static_cast<uint32_t>(
      std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));

  ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
  in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
  in.format = addrlib_format;
  in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
  in.width = width;
  in.height = height;
  in.numSlices = num_slice;
  in.pitchInElement = image_data_row_pitch / image_prop.element_size;
  switch(desc.geometry) {
  case HSA_EXT_IMAGE_GEOMETRY_1D:
  case HSA_EXT_IMAGE_GEOMETRY_1DB:
  case HSA_EXT_IMAGE_GEOMETRY_1DA:
    in.resourceType = ADDR_RSRC_TEX_1D;
    break;
  case HSA_EXT_IMAGE_GEOMETRY_2D:
  case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
  case HSA_EXT_IMAGE_GEOMETRY_2DA:
  case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
    in.resourceType = ADDR_RSRC_TEX_2D;
    break;
  case HSA_EXT_IMAGE_GEOMETRY_3D:
    {
	    in.resourceType = ADDR_RSRC_TEX_3D;
	    /*
	     * 3D swizzle modes enforce alignment
	     * of the number of slices  to the block depth.
	     * If numSlices = 3 then the 3 slices are
	     * interleaved for 3D locality among the 8 slices
	     * that make up each block. This causes the memory
	     * footprint to jump to a 3x size of the ideal size
	     *
	     * 'enable3DSwizzleMode' flag tests for env variable
	     * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
	     * 3D swizzle:
	     * true: Keep view3dAs2dArray = 0 for real 3D interleaving.
	     * false: Use view3dAs2dArray = 1 to avoid the alignment
	     *       expansion.
	     * 2D swizzle modes can lower size overhead but may yield
	     * suboptimal cache behavior for fully 3D volumetric
	     * operations.
	     */
	    bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
	    if (enable3DSwizzleMode)
		    in.flags.view3dAs2dArray = 0;
	    else
		    in.flags.view3dAs2dArray = 1;

	    break;
    }
  }
  in.flags.texture = 1;

  ADDR2_GET_PREFERRED_SURF_SETTING_INPUT  prefSettingsInput = { 0 };
  ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 };

  prefSettingsInput.size            = sizeof(prefSettingsInput);
  prefSettingsInput.flags           = in.flags;
  prefSettingsInput.bpp             = in.bpp;
  prefSettingsInput.format          = in.format;
  prefSettingsInput.width           = in.width;
  prefSettingsInput.height          = in.height;
  prefSettingsInput.numFrags        = in.numFrags;
  prefSettingsInput.numSamples      = in.numSamples;
  prefSettingsInput.numMipLevels    = in.numMipLevels;
  prefSettingsInput.numSlices       = in.numSlices;
  prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF;
  prefSettingsInput.resourceType    = in.resourceType;

  // Disallow all swizzles but linear.
  if (tileMode == Image::TileMode::LINEAR) 
  {
      prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
  }

  prefSettingsInput.forbiddenBlock.micro = 1; // but don't ever allow the 256b swizzle modes
  prefSettingsInput.forbiddenBlock.var = 1; // and don't allow variable-size block modes

  if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_, &prefSettingsInput, &prefSettingsOutput)) {
    return (uint32_t)(-1);
  }

  in.swizzleMode = prefSettingsOutput.swizzleMode;

  out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
  if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out)) {
    return (uint32_t)(-1);
  }
  if (out.surfSize == 0) {
    return (uint32_t)(-1);
  }

  return in.swizzleMode;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_manager_ai.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_AI_H
#define HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_AI_H

#include "addrlib/inc/addrinterface.h"
#include "image_manager_kv.h"

namespace rocr {
namespace image {

class ImageManagerAi : public ImageManagerKv {
 public:
  explicit ImageManagerAi();
  virtual ~ImageManagerAi();

  /// @brief Calculate the size and alignment of the backing storage of an
  /// image.
  virtual hsa_status_t CalculateImageSizeAndAlignment(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
      hsa_ext_image_data_layout_t image_data_layout,
      size_t image_data_row_pitch, size_t image_data_slice_pitch,
      hsa_ext_image_data_info_t& image_info) const;

  /// @brief Fill image structure with device specific image object.
  virtual hsa_status_t PopulateImageSrd(Image& image) const;

  /// @brief Fill image structure with device specific image object using the given format.
  virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;

  /// @brief Modify device specific image object according to the specified
  /// new format.
  virtual hsa_status_t ModifyImageSrd(Image& image,
                                      hsa_ext_image_format_t& new_format) const;

  /// @brief Fill sampler structure with device specific sampler object.
  virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;

 protected:
  uint32_t GetAddrlibSurfaceInfoAi(hsa_agent_t component,
                             const hsa_ext_image_descriptor_t& desc,
                             Image::TileMode tileMode,
                             size_t image_data_row_pitch,
                             size_t image_data_slice_pitch,
                             ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;

  bool IsLocalMemory(const void* address) const;

 private:
  DISALLOW_COPY_AND_ASSIGN(ImageManagerAi);
};

}  // namespace image
}  // namespace rocr
#endif  // HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_AI_H


================================================
FILE: runtime/hsa-runtime/image/image_manager_gfx11.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2021, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#define NOMINMAX
#include "image_manager_gfx11.h"

#include <assert.h>

#include <algorithm>
#include <climits>

#include "core/inc/runtime.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_gfx11.h"
#include "util.h"
#include "device_info.h"

namespace rocr {
namespace image {

ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD3)

ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD3)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD4)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD5)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD6)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD7)

ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3)

//-----------------------------------------------------------------------------
// Workaround switch to combined format/type codes and missing gfx11
// specific look up table.  Only covers types used in image_lut_gfx11.cpp.
//-----------------------------------------------------------------------------
struct formatconverstion_t {
  FMT fmt;
  type type;
  FORMAT format;
};

// Format/Type to combined format code table.
// Sorted and indexed to allow fast searches.
static const formatconverstion_t FormatLUT[] = {
    {FMT_1_5_5_5, TYPE_UNORM, CFMT_1_5_5_5_UNORM},              // 0
    {FMT_10_10_10_2, TYPE_UNORM, CFMT_10_10_10_2_UNORM},        // 1
    {FMT_10_10_10_2, TYPE_SNORM, CFMT_10_10_10_2_SNORM},        // 2
    {FMT_10_10_10_2, TYPE_UINT, CFMT_10_10_10_2_UINT},          // 3
    {FMT_10_10_10_2, TYPE_SINT, CFMT_10_10_10_2_SINT},          // 4
    {FMT_16, TYPE_UNORM, CFMT_16_UNORM},                        // 5
    {FMT_16, TYPE_SNORM, CFMT_16_SNORM},                        // 6
    {FMT_16, TYPE_UINT, CFMT_16_UINT},                          // 7
    {FMT_16, TYPE_SINT, CFMT_16_SINT},                          // 8
    {FMT_16, TYPE_FLOAT, CFMT_16_FLOAT},                        // 9
    {FMT_16, TYPE_USCALED, CFMT_16_USCALED},                    // 10
    {FMT_16, TYPE_SSCALED, CFMT_16_SSCALED},                    // 11
    {FMT_16_16, TYPE_UNORM, CFMT_16_16_UNORM},                  // 12
    {FMT_16_16, TYPE_SNORM, CFMT_16_16_SNORM},                  // 13
    {FMT_16_16, TYPE_UINT, CFMT_16_16_UINT},                    // 14
    {FMT_16_16, TYPE_SINT, CFMT_16_16_SINT},                    // 15
    {FMT_16_16, TYPE_FLOAT, CFMT_16_16_FLOAT},                  // 16
    {FMT_16_16, TYPE_USCALED, CFMT_16_16_USCALED},              // 17
    {FMT_16_16, TYPE_SSCALED, CFMT_16_16_SSCALED},              // 18
    {FMT_16_16_16_16, TYPE_UNORM, CFMT_16_16_16_16_UNORM},      // 19
    {FMT_16_16_16_16, TYPE_SNORM, CFMT_16_16_16_16_SNORM},      // 20
    {FMT_16_16_16_16, TYPE_UINT, CFMT_16_16_16_16_UINT},        // 21
    {FMT_16_16_16_16, TYPE_SINT, CFMT_16_16_16_16_SINT},        // 22
    {FMT_16_16_16_16, TYPE_FLOAT, CFMT_16_16_16_16_FLOAT},      // 23
    {FMT_16_16_16_16, TYPE_USCALED, CFMT_16_16_16_16_USCALED},  // 24
    {FMT_16_16_16_16, TYPE_SSCALED, CFMT_16_16_16_16_SSCALED},  // 25
    {FMT_2_10_10_10, TYPE_UNORM, CFMT_2_10_10_10_UNORM},        // 26
    {FMT_2_10_10_10, TYPE_SNORM, CFMT_2_10_10_10_SNORM},        // 27
    {FMT_2_10_10_10, TYPE_UINT, CFMT_2_10_10_10_UINT},          // 28
    {FMT_2_10_10_10, TYPE_SINT, CFMT_2_10_10_10_SINT},          // 29
    {FMT_2_10_10_10, TYPE_USCALED, CFMT_2_10_10_10_USCALED},    // 30
    {FMT_2_10_10_10, TYPE_SSCALED, CFMT_2_10_10_10_SSCALED},    // 31
    {FMT_24_8, TYPE_UNORM, CFMT_24_8_UNORM},                    // 32
    {FMT_24_8, TYPE_UINT, CFMT_24_8_UINT},                      // 33
    {FMT_32, TYPE_UINT, CFMT_32_UINT},                          // 34
    {FMT_32, TYPE_SINT, CFMT_32_SINT},                          // 35
    {FMT_32, TYPE_FLOAT, CFMT_32_FLOAT},                        // 36
    {FMT_32_32, TYPE_UINT, CFMT_32_32_UINT},                    // 37
    {FMT_32_32, TYPE_SINT, CFMT_32_32_SINT},                    // 38
    {FMT_32_32, TYPE_FLOAT, CFMT_32_32_FLOAT},                  // 39
    {FMT_32_32_32, TYPE_UINT, CFMT_32_32_32_UINT},              // 40
    {FMT_32_32_32, TYPE_SINT, CFMT_32_32_32_SINT},              // 41
    {FMT_32_32_32, TYPE_FLOAT, CFMT_32_32_32_FLOAT},            // 42
    {FMT_32_32_32_32, TYPE_UINT, CFMT_32_32_32_32_UINT},        // 43
    {FMT_32_32_32_32, TYPE_SINT, CFMT_32_32_32_32_SINT},        // 44
    {FMT_32_32_32_32, TYPE_FLOAT, CFMT_32_32_32_32_FLOAT},      // 45
    {FMT_5_5_5_1, TYPE_UNORM, CFMT_5_5_5_1_UNORM},              // 46
    {FMT_5_6_5, TYPE_UNORM, CFMT_5_6_5_UNORM},                  // 47
    {FMT_8, TYPE_UNORM, CFMT_8_UNORM},                          // 48
    {FMT_8, TYPE_SNORM, CFMT_8_SNORM},                          // 49
    {FMT_8, TYPE_UINT, CFMT_8_UINT},                            // 50
    {FMT_8, TYPE_SINT, CFMT_8_SINT},                            // 51
    {FMT_8, TYPE_SRGB, CFMT_8_SRGB},                            // 52
    {FMT_8, TYPE_USCALED, CFMT_8_USCALED},                      // 53
    {FMT_8, TYPE_SSCALED, CFMT_8_SSCALED},                      // 54
    {FMT_8_24, TYPE_UNORM, CFMT_8_24_UNORM},                    // 55
    {FMT_8_24, TYPE_UINT, CFMT_8_24_UINT},                      // 56
    {FMT_8_8, TYPE_UNORM, CFMT_8_8_UNORM},                      // 57
    {FMT_8_8, TYPE_SNORM, CFMT_8_8_SNORM},                      // 58
    {FMT_8_8, TYPE_UINT, CFMT_8_8_UINT},                        // 59
    {FMT_8_8, TYPE_SINT, CFMT_8_8_SINT},                        // 60
    {FMT_8_8, TYPE_SRGB, CFMT_8_8_SRGB},                        // 61
    {FMT_8_8, TYPE_USCALED, CFMT_8_8_USCALED},                  // 62
    {FMT_8_8, TYPE_SSCALED, CFMT_8_8_SSCALED},                  // 63
    {FMT_8_8_8_8, TYPE_UNORM, CFMT_8_8_8_8_UNORM},              // 64
    {FMT_8_8_8_8, TYPE_SNORM, CFMT_8_8_8_8_SNORM},              // 65
    {FMT_8_8_8_8, TYPE_UINT, CFMT_8_8_8_8_UINT},                // 66
    {FMT_8_8_8_8, TYPE_SINT, CFMT_8_8_8_8_SINT},                // 67
    {FMT_8_8_8_8, TYPE_SRGB, CFMT_8_8_8_8_SRGB},                // 68
    {FMT_8_8_8_8, TYPE_USCALED, CFMT_8_8_8_8_USCALED},          // 69
    {FMT_8_8_8_8, TYPE_SSCALED, CFMT_8_8_8_8_SSCALED}           // 70
};
static const int FormatLUTSize = sizeof(FormatLUT)/sizeof(formatconverstion_t);

//Index in FormatLUT to start search, indexed by FMT enum.
static const int FormatEntryPoint[] = {
  71, // FMT_INVALID
  48, // FMT_8
  5,  // FMT_16
  57, // FMT_8_8
  34, // FMT_32
  12, // FMT_16_16
  71, // FMT_10_11_11
  71, // FMT_11_11_10
  1,  // FMT_10_10_10_2
  26, // FMT_2_10_10_10
  64, // FMT_8_8_8_8
  37, // FMT_32_32
  19, // FMT_16_16_16_16
  40, // FMT_32_32_32
  43, // FMT_32_32_32_32
  71, // RESERVED
  47, // FMT_5_6_5
  0,  // FMT_1_5_5_5
  46, // FMT_5_5_5_1
  71, // FMT_4_4_4_4
  55, // FMT_8_24
  32  // FMT_24_8
};

static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) {
  assert(fmt < sizeof(FormatEntryPoint)/sizeof(int) && "FMT out of range.");
  int start = FormatEntryPoint[fmt];
  int stop = std::min(start + 6, FormatLUTSize); // Only 6 types are used in image_kv_lut.cpp

  for(int i=start; i<stop; i++) {
    if((FormatLUT[i].fmt == fmt) && (FormatLUT[i].type == type))
      return FormatLUT[i].format;
  }
  return CFMT_INVALID;
};
//-----------------------------------------------------------------------------
// End workaround
//-----------------------------------------------------------------------------

ImageManagerGfx11::ImageManagerGfx11() : ImageManagerKv() {}

ImageManagerGfx11::~ImageManagerGfx11() {}

// TODO(cfreehil) remove from class, make it a utility function
hsa_status_t ImageManagerGfx11::CalculateImageSizeAndAlignment(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t& image_info) const {
  ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
  hsa_profile_t profile;

  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
  if (status != HSA_STATUS_SUCCESS) return status;

  Image::TileMode tileMode = Image::TileMode::LINEAR;
  if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
    tileMode = (profile == HSA_PROFILE_BASE &&
                desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
      Image::TileMode::TILED : Image::TileMode::LINEAR;
  }
  if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
        image_data_row_pitch, image_data_slice_pitch, out) ==
                                                             (uint32_t)(-1)) {
    return HSA_STATUS_ERROR;
  }

  size_t rowPitch   = (out.bpp >> 3) * out.pitch;
  size_t slicePitch = rowPitch * out.height;
  if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
      image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
      ((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
       (image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
    return static_cast<hsa_status_t>(
                                HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
  }

  image_info.size = out.surfSize;
  assert(image_info.size != 0);
  image_info.alignment = out.baseAlign;
  assert(image_info.alignment != 0);

  return HSA_STATUS_SUCCESS;
}

bool ImageManagerGfx11::IsLocalMemory(const void* address) const {
  return true;
}

hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image,
                                     const metadata_amd_t* descriptor) const {
  const metadata_amd_gfx11_t* desc = reinterpret_cast<const metadata_amd_gfx11_t*>(descriptor);
  const void* image_data_addr = image.data;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  if ((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
     (image_prop.element_size == 0))
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

  const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);

  if (IsLocalMemory(image.data)) {
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
  }

  image.srd[0] = desc->word0.u32All;
  image.srd[1] = desc->word1.u32All;
  image.srd[2] = desc->word2.u32All;
  image.srd[3] = desc->word3.u32All;
  image.srd[4] = desc->word4.u32All;
  image.srd[5] = desc->word5.u32All;
  image.srd[6] = desc->word6.u32All;
  image.srd[7] = desc->word7.u32All;

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    SQ_BUF_RSRC_WORD0 word0;
    SQ_BUF_RSRC_WORD1 word1;
    SQ_BUF_RSRC_WORD3 word3;

    word0.val = 0;
    word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);

    word1.val = image.srd[1];
    word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
    word1.f.STRIDE = image_prop.element_size;

    word3.val = image.srd[3];
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;

    word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    word3.f.INDEX_STRIDE = image_prop.element_size;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[3] = word3.val;
  } else {
    uint32_t hwPixelSize = ImageLut().GetPixelSize(image_prop.data_format, image_prop.data_type);

    if (image_prop.element_size != hwPixelSize) {
      return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
    }
    reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&image.srd[0])->bits.BASE_ADDRESS =
        PtrLow40Shift8(image_data_addr);
    reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.BASE_ADDRESS_HI =
        PtrHigh64Shift40(image_data_addr);
    reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_X =
                                                                    swizzle.x;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Y =
                                                                    swizzle.y;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Z =
                                                                    swizzle.z;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_W =
                                                                    swizzle.w;
    if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
        image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
      reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
          ImageLut().MapGeometry(image.desc.geometry);
    }
    
    // Imported metadata holds the offset to metadata, add the image base address.
    uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16;
    meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8;
    meta += reinterpret_cast<uintptr_t>(image_data_addr);

    ((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS = PtrLow16Shift8((void*)meta);
    ((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI =
        PtrHigh64Shift16((void*)meta);
  }
  // Looks like this is only used for CPU copies.
  image.row_pitch = 0;
  image.slice_pitch = 0;

  // Used by HSAIL shader ABI
  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) {
    SEL r = (SEL)swizzle.x;
    SEL g = (SEL)swizzle.y;
    SEL b = (SEL)swizzle.z;
    SEL a = (SEL)swizzle.w;

    TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW;

    if (a == SEL_X) {
        // Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ
        //
        // For the pre-defined border color values (white, opaque black,
        // transparent black), the only thing that matters is that the alpha
        // channel winds up in the correct place (because the RGB channels are
        // all the same) so either of these TEX_BC_Swizzle enumerations will
        // work.  Not sure what happens with border color palettes.
        if (b == SEL_Y) {
            // ABGR
            bcSwizzle = TEX_BC_Swizzle_WZYX;
        } else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X)) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else {
            // ARGB
            bcSwizzle = TEX_BC_Swizzle_WXYZ;
        }
    } else if (r == SEL_X) {
        // Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ
        if (g == SEL_Y) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else if ((g == SEL_X) && (b == SEL_X) && (a == SEL_W)) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else {
            // RAGB
            bcSwizzle = TEX_BC_Swizzle_XWYZ;
        }
    } else if (g == SEL_X) {
        // GRAB, have to use TEX_BC_Swizzle_YXWZ
        bcSwizzle = TEX_BC_Swizzle_YXWZ;
    } else if (b == SEL_X) {
        // BGRA, have to use TEX_BC_Swizzle_ZYXW
        bcSwizzle = TEX_BC_Swizzle_ZYXW;
    }

    return bcSwizzle;
}


hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image) const {
  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  const void* image_data_addr = image.data;

  if (IsLocalMemory(image.data))
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    SQ_BUF_RSRC_WORD0 word0;
    SQ_BUF_RSRC_WORD1 word1;
    SQ_BUF_RSRC_WORD2 word2;
    SQ_BUF_RSRC_WORD3 word3;

    word0.val = 0;
    word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);

    word1.val = 0;
    word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
    word1.f.STRIDE = image_prop.element_size;
    word1.f.SWIZZLE_ENABLE = 0;

    word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;
    word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    word3.f.INDEX_STRIDE = image_prop.element_size;
    word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;

    image.row_pitch = image.desc.width * image_prop.element_size;
    image.slice_pitch = image.row_pitch;
  } else {
    SQ_IMG_RSRC_WORD0 word0;
    SQ_IMG_RSRC_WORD1 word1;
    SQ_IMG_RSRC_WORD2 word2;
    SQ_IMG_RSRC_WORD3 word3;
    SQ_IMG_RSRC_WORD4 word4;
    SQ_IMG_RSRC_WORD5 word5;
    SQ_IMG_RSRC_WORD5 word6;
    SQ_IMG_RSRC_WORD5 word7;

    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};

    uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
         image.component, image.desc, image.tile_mode,
                                     image.row_pitch, image.slice_pitch, out);
    if (swizzleMode == (uint32_t)(-1)) {
      return HSA_STATUS_ERROR;
    }

    assert((out.bpp / 8) == image_prop.element_size);

    const size_t row_pitch_size = out.pitch * image_prop.element_size;

    word0.f.BASE_ADDRESS = PtrLow40Shift8(image_data_addr);

    word1.val = 0;
    word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(image_data_addr);
    word1.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    // Only take the lowest 2 bits of (image.desc.width - 1)
    word1.f.WIDTH = BitSelect<0, 1>(image.desc.width - 1);

    word2.val = 0;
    // Take the high 12 bits of (image.desc.width - 1)
    word2.f.WIDTH_HI = BitSelect<2, 13>(image.desc.width - 1);
    word2.f.HEIGHT = image.desc.height ? image.desc.height - 1 : 0;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;
    word3.f.SW_MODE = swizzleMode;
    word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
    word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);

    const bool image_array =
        (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
    const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);

    word4.val = 0;
    word4.f.DEPTH =
        (image_array) // Doesn't hurt but isn't array_size already >0?
            ? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
            : (image_3d) ? image.desc.depth - 1 : 0;

    // For 1d, 2d and 2d-msaa in gfx11 this is pitch-1
    if (!image_array && !image_3d) word4.f.PITCH = out.pitch - 1;

    word5.val = 0;
    word6.val = 0;
    word7.val = 0;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;
    image.srd[4] = word4.val;
    image.srd[5] = word5.val;
    image.srd[6] = word6.val;
    image.srd[7] = word7.val;

    image.row_pitch = row_pitch_size;
    image.slice_pitch = out.sliceSize;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerGfx11::ModifyImageSrd(
    Image& image, hsa_ext_image_format_t& new_format) const {
  image.desc.format = new_format;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_BUF_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
    word3->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
  } else {
    SQ_IMG_RSRC_WORD1* word1 =
        reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
    word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_IMG_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerGfx11::PopulateSamplerSrd(Sampler& sampler) const {
  const hsa_ext_sampler_descriptor_v2_t &sampler_descriptor = sampler.desc;

  SQ_IMG_SAMP_WORD0 word0;
  SQ_IMG_SAMP_WORD1 word1;
  SQ_IMG_SAMP_WORD2 word2;
  SQ_IMG_SAMP_WORD3 word3;

  word0.u32All = 0;
  hsa_status_t status = convertAddressMode<SQ_IMG_SAMP_WORD0, SQ_TEX_CLAMP>
                                       (word0, sampler_descriptor.address_modes);
  if (status != HSA_STATUS_SUCCESS) return status;
  word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode ==
                                  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);

  word1.u32All = 0;
  word1.bits.MAX_LOD = 4095;

  word2.u32All = 0;
  switch (sampler_descriptor.filter_mode) {
    case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
      break;
    case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER;
  word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE;
  word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE;

  word3.u32All = 0;

  // TODO: check this bit with HSAIL spec.
  word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK;

  sampler.srd[0] = word0.u32All;
  sampler.srd[1] = word1.u32All;
  sampler.srd[2] = word2.u32All;
  sampler.srd[3] = word3.u32All;

  return HSA_STATUS_SUCCESS;
}

uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    Image::TileMode tileMode,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
  const ImageProperty image_prop =
      GetImageProperty(component, desc.format, desc.geometry);

  const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);

  const uint32_t width = static_cast<uint32_t>(desc.width);
  const uint32_t height = static_cast<uint32_t>(desc.height);
  static const size_t kMinNumSlice = 1;
  const uint32_t num_slice = static_cast<uint32_t>(
      std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));

  ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
  in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
  in.format = addrlib_format;
  in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
  in.width = width;
  in.height = height;
  in.numSlices = num_slice;
  in.pitchInElement = image_data_row_pitch / image_prop.element_size;

  switch (desc.geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
      in.resourceType = ADDR_RSRC_TEX_1D;
      break;

    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
      in.resourceType = ADDR_RSRC_TEX_2D;
      break;

    case HSA_EXT_IMAGE_GEOMETRY_3D:
      {
	      in.resourceType = ADDR_RSRC_TEX_3D;
	      /*
	       * 3D swizzle modes enforce alignment
	       * of the number of slices  to the block depth.
	       * If numSlices = 3 then the 3 slices are
	       * interleaved for 3D locality among the 8 slices
	       * that make up each block. This causes the memory
	       * footprint to jump to a 3x size of the ideal size
	       *
	       * 'enable3DSwizzleMode' flag tests for env variable
	       * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
	       * 3D swizzle:
	       * true: Keep view3dAs2dArray = 0 for real 3D interleaving.
	       * false: Use view3dAs2dArray = 1 to avoid the alignment
	       *       expansion.
	       * 2D swizzle modes can lower size overhead but may yield
	       * suboptimal cache behavior for fully 3D volumetric
	       * operations.
	       */
	      bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
	      if (enable3DSwizzleMode)
		      in.flags.view3dAs2dArray = 0;
	      else
		      in.flags.view3dAs2dArray = 1;

	      break;
      }
  }
  in.flags.texture = 1;

  ADDR2_GET_PREFERRED_SURF_SETTING_INPUT  prefSettingsInput = { 0 };
  ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 };

  prefSettingsInput.size            = sizeof(prefSettingsInput);
  prefSettingsInput.flags           = in.flags;
  prefSettingsInput.bpp             = in.bpp;
  prefSettingsInput.format          = in.format;
  prefSettingsInput.width           = in.width;
  prefSettingsInput.height          = in.height;
  prefSettingsInput.numFrags        = in.numFrags;
  prefSettingsInput.numSamples      = in.numSamples;
  prefSettingsInput.numMipLevels    = in.numMipLevels;
  prefSettingsInput.numSlices       = in.numSlices;
  prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF;
  prefSettingsInput.resourceType    = in.resourceType;

  // Disallow all swizzles but linear.
  if (tileMode == Image::TileMode::LINEAR) {
      prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
      prefSettingsInput.forbiddenBlock.micro = 1;
      prefSettingsInput.forbiddenBlock.var = 1;
  }

  // but don't ever allow the 256b swizzle modes
  //prefSettingsInput.forbiddenBlock.micro = 1;
  // and don't allow variable-size block modes
  //prefSettingsInput.forbiddenBlock.var = 1;

  if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_,
                                   &prefSettingsInput, &prefSettingsOutput)) {
    return (uint32_t)(-1);
  }

  in.swizzleMode = prefSettingsOutput.swizzleMode;

  out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
  if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out)) {
    return (uint32_t)(-1);
  }
  if (out.surfSize == 0) {
    return (uint32_t)(-1);
  }

  return in.swizzleMode;
}

hsa_status_t ImageManagerGfx11::FillImage(const Image& image, const void* pattern,
                                       const hsa_ext_image_region_t& region) {
  if (BlitQueueInit().queue_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  Image* image_view = const_cast<Image*>(&image);

  SQ_BUF_RSRC_WORD3* word3_buff = NULL;
  SQ_IMG_RSRC_WORD3* word3_image = NULL;
  uint32_t dst_sel_w_original = 0;
  if (image_view->desc.format.channel_type ==
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) {
    // Force GPU to ignore the last two bits (alpha bits).
    if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
      word3_buff = reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_buff->bits.DST_SEL_W;
      word3_buff->bits.DST_SEL_W = SEL_0;
    } else {
      word3_image = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_image->bits.DST_SEL_W;
      word3_image->bits.DST_SEL_W = SEL_0;
    }
  }

  SQ_IMG_RSRC_WORD1* word1 = NULL;
  uint32_t num_format_original = 0;
  const void* new_pattern = pattern;
  float fill_value[4] = {0};
  switch (image_view->desc.format.channel_order) {
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: {
      // We do not have write support for SRGBA image, so convert pattern
      // to standard form and treat the image as RGBA image.
      const float* pattern_f = reinterpret_cast<const float*>(pattern);
      fill_value[0] = LinearToStandardRGB(pattern_f[0]);
      fill_value[1] = LinearToStandardRGB(pattern_f[1]);
      fill_value[2] = LinearToStandardRGB(pattern_f[2]);
      fill_value[3] = pattern_f[3];
      new_pattern = fill_value;

      ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);

      word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image_view->srd[1]);
      num_format_original = word1->bits.FORMAT;
      word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, TYPE_UNORM);
    } break;
    default:
      break;
  }

  hsa_status_t status = ImageRuntime::instance()->blit_kernel().FillImage(
      blit_queue_, blit_code_catalog_, *image_view, new_pattern, region);

  // Revert back original configuration.
  if (word3_buff != NULL) {
    word3_buff->bits.DST_SEL_W = dst_sel_w_original;
  }

  if (word3_image != NULL) {
    word3_image->bits.DST_SEL_W = dst_sel_w_original;
  }

  if (word1 != NULL) {
    word1->bits.FORMAT = num_format_original;
  }

  return status;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_manager_gfx11.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2021, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef EXT_IMAGE_IMAGE_MANAGER_GFX11_H_
#define EXT_IMAGE_IMAGE_MANAGER_GFX11_H_

#include "addrlib/inc/addrinterface.h"
#include "image_lut_gfx11.h"
#include "image_manager_kv.h"

namespace rocr {
namespace image {

class ImageManagerGfx11 : public ImageManagerKv {
 public:
  ImageManagerGfx11();
  virtual ~ImageManagerGfx11();

  /// @brief Calculate the size and alignment of the backing storage of an
  /// image.
  virtual hsa_status_t CalculateImageSizeAndAlignment(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
      hsa_ext_image_data_layout_t image_data_layout,
      size_t image_data_row_pitch, size_t image_data_slice_pitch,
      hsa_ext_image_data_info_t& image_info) const;

  /// @brief Fill image structure with device specific image object.
  virtual hsa_status_t PopulateImageSrd(Image& image) const;

  /// @brief Fill image structure with device specific image object using the given format.
  virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;

  /// @brief Modify device specific image object according to the specified
  /// new format.
  virtual hsa_status_t ModifyImageSrd(Image& image,
                                      hsa_ext_image_format_t& new_format) const;

  /// @brief Fill sampler structure with device specific sampler object.
  virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;

  /// @brief Fill image backing storage using agent copy.
  virtual hsa_status_t FillImage(const Image& image, const void* pattern,
                                 const hsa_ext_image_region_t& region);
 protected:
  uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
                             const hsa_ext_image_descriptor_t& desc,
                             Image::TileMode tileMode,
                             size_t image_data_row_pitch,
                             size_t image_data_slice_pitch,
                             ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;

  bool IsLocalMemory(const void* address) const;
  virtual const ImageLutGfx11& ImageLut() const { return image_lut_gfx11; };

 private:
  ImageLutGfx11 image_lut_gfx11;
  DISALLOW_COPY_AND_ASSIGN(ImageManagerGfx11);
};

}  // namespace image
}  // namespace rocr
#endif  // EXT_IMAGE_IMAGE_MANAGER_GFX11_H_


================================================
FILE: runtime/hsa-runtime/image/image_manager_gfx12.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#define NOMINMAX
#include "image_manager_gfx12.h"

#include <assert.h>

#include <algorithm>
#include <climits>

#include "core/inc/runtime.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
#include "core/util/utils.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_gfx12.h"
#include "util.h"
#include "device_info.h"

namespace rocr {
namespace image {

static_assert(sizeof(SQ_BUF_RSRC_WORD0) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_BUF_RSRC_WORD1) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_BUF_RSRC_WORD2) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_BUF_RSRC_WORD3) == sizeof(uint32_t), "struct size is invalid");

static_assert(sizeof(SQ_IMG_RSRC_WORD0) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_RSRC_WORD1) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_RSRC_WORD2) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_RSRC_WORD3) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_RSRC_WORD4) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_RSRC_WORD5) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_RSRC_WORD6) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_RSRC_WORD7) == sizeof(uint32_t), "struct size is invalid");

static_assert(sizeof(SQ_IMG_SAMP_WORD0) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_SAMP_WORD1) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_SAMP_WORD2) == sizeof(uint32_t), "struct size is invalid");
static_assert(sizeof(SQ_IMG_SAMP_WORD3) == sizeof(uint32_t), "struct size is invalid");

//-----------------------------------------------------------------------------
// Workaround switch to combined format/type codes and missing gfx11
// specific look up table.  Only covers types used in image_lut_gfx11.cpp.
//-----------------------------------------------------------------------------
struct formatconverstion_t {
  FMT fmt;
  type type;
  FORMAT format;
};

// Format/Type to combined format code table.
// Sorted and indexed to allow fast searches.
static const formatconverstion_t FormatLUT[] = {
    {FMT_1_5_5_5, TYPE_UNORM, CFMT_1_5_5_5_UNORM},              // 0
    {FMT_10_10_10_2, TYPE_UNORM, CFMT_10_10_10_2_UNORM},        // 1
    {FMT_10_10_10_2, TYPE_SNORM, CFMT_10_10_10_2_SNORM},        // 2
    {FMT_10_10_10_2, TYPE_UINT, CFMT_10_10_10_2_UINT},          // 3
    {FMT_10_10_10_2, TYPE_SINT, CFMT_10_10_10_2_SINT},          // 4
    {FMT_16, TYPE_UNORM, CFMT_16_UNORM},                        // 5
    {FMT_16, TYPE_SNORM, CFMT_16_SNORM},                        // 6
    {FMT_16, TYPE_UINT, CFMT_16_UINT},                          // 7
    {FMT_16, TYPE_SINT, CFMT_16_SINT},                          // 8
    {FMT_16, TYPE_FLOAT, CFMT_16_FLOAT},                        // 9
    {FMT_16, TYPE_USCALED, CFMT_16_USCALED},                    // 10
    {FMT_16, TYPE_SSCALED, CFMT_16_SSCALED},                    // 11
    {FMT_16_16, TYPE_UNORM, CFMT_16_16_UNORM},                  // 12
    {FMT_16_16, TYPE_SNORM, CFMT_16_16_SNORM},                  // 13
    {FMT_16_16, TYPE_UINT, CFMT_16_16_UINT},                    // 14
    {FMT_16_16, TYPE_SINT, CFMT_16_16_SINT},                    // 15
    {FMT_16_16, TYPE_FLOAT, CFMT_16_16_FLOAT},                  // 16
    {FMT_16_16, TYPE_USCALED, CFMT_16_16_USCALED},              // 17
    {FMT_16_16, TYPE_SSCALED, CFMT_16_16_SSCALED},              // 18
    {FMT_16_16_16_16, TYPE_UNORM, CFMT_16_16_16_16_UNORM},      // 19
    {FMT_16_16_16_16, TYPE_SNORM, CFMT_16_16_16_16_SNORM},      // 20
    {FMT_16_16_16_16, TYPE_UINT, CFMT_16_16_16_16_UINT},        // 21
    {FMT_16_16_16_16, TYPE_SINT, CFMT_16_16_16_16_SINT},        // 22
    {FMT_16_16_16_16, TYPE_FLOAT, CFMT_16_16_16_16_FLOAT},      // 23
    {FMT_16_16_16_16, TYPE_USCALED, CFMT_16_16_16_16_USCALED},  // 24
    {FMT_16_16_16_16, TYPE_SSCALED, CFMT_16_16_16_16_SSCALED},  // 25
    {FMT_2_10_10_10, TYPE_UNORM, CFMT_2_10_10_10_UNORM},        // 26
    {FMT_2_10_10_10, TYPE_SNORM, CFMT_2_10_10_10_SNORM},        // 27
    {FMT_2_10_10_10, TYPE_UINT, CFMT_2_10_10_10_UINT},          // 28
    {FMT_2_10_10_10, TYPE_SINT, CFMT_2_10_10_10_SINT},          // 29
    {FMT_2_10_10_10, TYPE_USCALED, CFMT_2_10_10_10_USCALED},    // 30
    {FMT_2_10_10_10, TYPE_SSCALED, CFMT_2_10_10_10_SSCALED},    // 31
    {FMT_24_8, TYPE_UNORM, CFMT_24_8_UNORM},                    // 32
    {FMT_24_8, TYPE_UINT, CFMT_24_8_UINT},                      // 33
    {FMT_32, TYPE_UINT, CFMT_32_UINT},                          // 34
    {FMT_32, TYPE_SINT, CFMT_32_SINT},                          // 35
    {FMT_32, TYPE_FLOAT, CFMT_32_FLOAT},                        // 36
    {FMT_32_32, TYPE_UINT, CFMT_32_32_UINT},                    // 37
    {FMT_32_32, TYPE_SINT, CFMT_32_32_SINT},                    // 38
    {FMT_32_32, TYPE_FLOAT, CFMT_32_32_FLOAT},                  // 39
    {FMT_32_32_32, TYPE_UINT, CFMT_32_32_32_UINT},              // 40
    {FMT_32_32_32, TYPE_SINT, CFMT_32_32_32_SINT},              // 41
    {FMT_32_32_32, TYPE_FLOAT, CFMT_32_32_32_FLOAT},            // 42
    {FMT_32_32_32_32, TYPE_UINT, CFMT_32_32_32_32_UINT},        // 43
    {FMT_32_32_32_32, TYPE_SINT, CFMT_32_32_32_32_SINT},        // 44
    {FMT_32_32_32_32, TYPE_FLOAT, CFMT_32_32_32_32_FLOAT},      // 45
    {FMT_5_5_5_1, TYPE_UNORM, CFMT_5_5_5_1_UNORM},              // 46
    {FMT_5_6_5, TYPE_UNORM, CFMT_5_6_5_UNORM},                  // 47
    {FMT_8, TYPE_UNORM, CFMT_8_UNORM},                          // 48
    {FMT_8, TYPE_SNORM, CFMT_8_SNORM},                          // 49
    {FMT_8, TYPE_UINT, CFMT_8_UINT},                            // 50
    {FMT_8, TYPE_SINT, CFMT_8_SINT},                            // 51
    {FMT_8, TYPE_SRGB, CFMT_8_SRGB},                            // 52
    {FMT_8, TYPE_USCALED, CFMT_8_USCALED},                      // 53
    {FMT_8, TYPE_SSCALED, CFMT_8_SSCALED},                      // 54
    {FMT_8_24, TYPE_UNORM, CFMT_8_24_UNORM},                    // 55
    {FMT_8_24, TYPE_UINT, CFMT_8_24_UINT},                      // 56
    {FMT_8_8, TYPE_UNORM, CFMT_8_8_UNORM},                      // 57
    {FMT_8_8, TYPE_SNORM, CFMT_8_8_SNORM},                      // 58
    {FMT_8_8, TYPE_UINT, CFMT_8_8_UINT},                        // 59
    {FMT_8_8, TYPE_SINT, CFMT_8_8_SINT},                        // 60
    {FMT_8_8, TYPE_SRGB, CFMT_8_8_SRGB},                        // 61
    {FMT_8_8, TYPE_USCALED, CFMT_8_8_USCALED},                  // 62
    {FMT_8_8, TYPE_SSCALED, CFMT_8_8_SSCALED},                  // 63
    {FMT_8_8_8_8, TYPE_UNORM, CFMT_8_8_8_8_UNORM},              // 64
    {FMT_8_8_8_8, TYPE_SNORM, CFMT_8_8_8_8_SNORM},              // 65
    {FMT_8_8_8_8, TYPE_UINT, CFMT_8_8_8_8_UINT},                // 66
    {FMT_8_8_8_8, TYPE_SINT, CFMT_8_8_8_8_SINT},                // 67
    {FMT_8_8_8_8, TYPE_SRGB, CFMT_8_8_8_8_SRGB},                // 68
    {FMT_8_8_8_8, TYPE_USCALED, CFMT_8_8_8_8_USCALED},          // 69
    {FMT_8_8_8_8, TYPE_SSCALED, CFMT_8_8_8_8_SSCALED}           // 70
};
static const int FormatLUTSize = sizeof(FormatLUT)/sizeof(formatconverstion_t);

//Index in FormatLUT to start search, indexed by FMT enum.
static const int FormatEntryPoint[] = {
  71, // FMT_INVALID
  48, // FMT_8
  5,  // FMT_16
  57, // FMT_8_8
  34, // FMT_32
  12, // FMT_16_16
  71, // FMT_10_11_11
  71, // FMT_11_11_10
  1,  // FMT_10_10_10_2
  26, // FMT_2_10_10_10
  64, // FMT_8_8_8_8
  37, // FMT_32_32
  19, // FMT_16_16_16_16
  40, // FMT_32_32_32
  43, // FMT_32_32_32_32
  71, // RESERVED
  47, // FMT_5_6_5
  0,  // FMT_1_5_5_5
  46, // FMT_5_5_5_1
  71, // FMT_4_4_4_4
  55, // FMT_8_24
  32  // FMT_24_8
};

static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) {
  assert(fmt < sizeof(FormatEntryPoint)/sizeof(int) && "FMT out of range.");
  int start = FormatEntryPoint[fmt];
  int stop = std::min(start + 6, FormatLUTSize); // Only 6 types are used in image_kv_lut.cpp

  for(int i=start; i<stop; i++) {
    if((FormatLUT[i].fmt == fmt) && (FormatLUT[i].type == type))
      return FormatLUT[i].format;
  }
  return CFMT_INVALID;
};
//-----------------------------------------------------------------------------
// End workaround
//-----------------------------------------------------------------------------

ImageManagerGfx12::ImageManagerGfx12() : ImageManagerKv() {}

ImageManagerGfx12::~ImageManagerGfx12() {}

// TODO(cfreehil) remove from class, make it a utility function
hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t& image_info) const {
  ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
  hsa_profile_t profile;

  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
  if (status != HSA_STATUS_SUCCESS) return status;

  Image::TileMode tileMode = Image::TileMode::LINEAR;
  if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
    tileMode = (profile == HSA_PROFILE_BASE &&
                desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
      Image::TileMode::TILED : Image::TileMode::LINEAR;
  }
  if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
        image_data_row_pitch, image_data_slice_pitch, out) ==
                                                             (uint32_t)(-1)) {
    return HSA_STATUS_ERROR;
  }

  size_t rowPitch   = (out.bpp >> 3) * out.pitch;
  size_t slicePitch = rowPitch * out.height;
  if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
      image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
      ((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
       (image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
    return static_cast<hsa_status_t>(
                                HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
  }

  image_info.size = out.surfSize;
  assert(image_info.size != 0);
  image_info.alignment = out.baseAlign;
  assert(image_info.alignment != 0);

  return HSA_STATUS_SUCCESS;
}

bool ImageManagerGfx12::IsLocalMemory(const void* address) const {
  return true;
}

hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image,
                                     const metadata_amd_t* descriptor) const {
  const metadata_amd_gfx12_t* desc = reinterpret_cast<const metadata_amd_gfx12_t*>(descriptor);
  const void* image_data_addr = image.data;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  if ((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
     (image_prop.element_size == 0))
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

  const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);

  if (IsLocalMemory(image.data)) {
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
  }

  image.srd[0] = desc->word0.u32All;
  image.srd[1] = desc->word1.u32All;
  image.srd[2] = desc->word2.u32All;
  image.srd[3] = desc->word3.u32All;
  image.srd[4] = desc->word4.u32All;
  image.srd[5] = desc->word5.u32All;
  image.srd[6] = desc->word6.u32All;
  image.srd[7] = desc->word7.u32All;

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    SQ_BUF_RSRC_WORD0 word0;
    SQ_BUF_RSRC_WORD1 word1;
    SQ_BUF_RSRC_WORD3 word3;

    word0.val = 0;
    word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);

    word1.val = image.srd[1];
    word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
    word1.f.STRIDE = image_prop.element_size;

    word3.val = image.srd[3];
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;

    word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);

    word3.f.INDEX_STRIDE = image_prop.element_size;

    // New to GFX12
    //word3.f.WRITE_COMPRESS_ENABLE = 0;
    //word3.f.COMPRESSION_EN = 0;
    //word3.f.COMPRESSION_ACCESS_MODE = 0;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[3] = word3.val;
  } else {
    uint32_t hwPixelSize = ImageLut().GetPixelSize(image_prop.data_format, image_prop.data_type);

    if (image_prop.element_size != hwPixelSize) {
      return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
    }
    reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&image.srd[0])->bits.BASE_ADDRESS =
        PtrLow40Shift8(image_data_addr);
    reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.BASE_ADDRESS_HI =
        PtrHigh64Shift40(image_data_addr);

    // New to GFX12...
    //reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.MAX_MIP = 0;

    reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_X =
                                                                    swizzle.x;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Y =
                                                                    swizzle.y;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Z =
                                                                    swizzle.z;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_W =
                                                                    swizzle.w;
    if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
        image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
      reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
          ImageLut().MapGeometry(image.desc.geometry);
    }
  }

  // Looks like this is only used for CPU copies.
  image.row_pitch = 0;
  image.slice_pitch = 0;

  // Used by HSAIL shader ABI
  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) {
    SEL r = (SEL)swizzle.x;
    SEL g = (SEL)swizzle.y;
    SEL b = (SEL)swizzle.z;
    SEL a = (SEL)swizzle.w;

    TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW;

    if (a == SEL_X) {
        // Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ
        //
        // For the pre-defined border color values (white, opaque black,
        // transparent black), the only thing that matters is that the alpha
        // channel winds up in the correct place (because the RGB channels are
        // all the same) so either of these TEX_BC_Swizzle enumerations will
        // work.  Not sure what happens with border color palettes.
        if (b == SEL_Y) {
            // ABGR
            bcSwizzle = TEX_BC_Swizzle_WZYX;
        } else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X)) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else {
            // ARGB
            bcSwizzle = TEX_BC_Swizzle_WXYZ;
        }
    } else if (r == SEL_X) {
        // Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ
        if (g == SEL_Y) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else if ((g == SEL_X) && (b == SEL_X) && (a == SEL_W)) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else {
            // RAGB
            bcSwizzle = TEX_BC_Swizzle_XWYZ;
        }
    } else if (g == SEL_X) {
        // GRAB, have to use TEX_BC_Swizzle_YXWZ
        bcSwizzle = TEX_BC_Swizzle_YXWZ;
    } else if (b == SEL_X) {
        // BGRA, have to use TEX_BC_Swizzle_ZYXW
        bcSwizzle = TEX_BC_Swizzle_ZYXW;
    }

    return bcSwizzle;
}


hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const {
  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  const void* image_data_addr = image.data;

  if (IsLocalMemory(image.data))
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    SQ_BUF_RSRC_WORD0 word0;
    SQ_BUF_RSRC_WORD1 word1;
    SQ_BUF_RSRC_WORD2 word2;
    SQ_BUF_RSRC_WORD3 word3;

    word0.val = 0;
    word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);

    word1.val = 0;
    word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
    word1.f.STRIDE = image_prop.element_size;

    word1.f.SWIZZLE_ENABLE = 0;

    word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;
    word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);

    word3.f.INDEX_STRIDE = image_prop.element_size;

    // New to GFX12
    //word3.f.WRITE_COMPRESS_ENABLE = 0;
    //word3.f.COMPRESSION_EN = 0;
    //word3.f.COMPRESSION_ACCESS_MODE = 0;

    word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;

    image.row_pitch = image.desc.width * image_prop.element_size;
    image.slice_pitch = image.row_pitch;
  } else {
    SQ_IMG_RSRC_WORD0 word0;
    SQ_IMG_RSRC_WORD1 word1;
    SQ_IMG_RSRC_WORD2 word2;
    SQ_IMG_RSRC_WORD3 word3;
    SQ_IMG_RSRC_WORD4 word4;
    SQ_IMG_RSRC_WORD5 word5;
    SQ_IMG_RSRC_WORD5 word6;
    SQ_IMG_RSRC_WORD5 word7;

    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};

    uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
         image.component, image.desc, image.tile_mode,
                                     image.row_pitch, image.slice_pitch, out);
    if (swizzleMode == (uint32_t)(-1)) {
      return HSA_STATUS_ERROR;
    }

    assert((out.bpp / 8) == image_prop.element_size);

    const size_t row_pitch_size = out.pitch * image_prop.element_size;

    word0.f.BASE_ADDRESS = PtrLow40Shift8(image_data_addr);

    word1.val = 0;
    word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(image_data_addr);

    // New to GFX12
    //word1.f.MAX_MIP = 0;
    //word1.f.BASE_LEVEL = 0;

    word1.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    // Only take the lowest 2 bits of (image.desc.width - 1)
    word1.f.WIDTH = BitSelect<0, 1>(image.desc.width - 1);

    word2.val = 0;
    // Take the high 14 bits of (image.desc.width - 1)
    word2.f.WIDTH_HI = BitSelect<2, 15>(image.desc.width - 1);
    word2.f.HEIGHT = image.desc.height ? image.desc.height - 1 : 0;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;
    //word3.f.NO_EDGE_CLAMP = 0;  // New to GFX12
    //word3.f.LAST_LEVEL = 0;     // New to GFX12
    word3.f.SW_MODE = swizzleMode;
    word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
    word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);

    const bool image_array =
        (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
    const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);

    word4.val = 0;

    // For 1d, 2d and 2d-msaa, fields DEPTH+PITCH_MSB encode pitch-1
    if (!image_array && !image_3d) {
      uint32_t encPitch = out.pitch - 1;
      word4.f.DEPTH = encPitch & 0x3fff;           // first 14 bits
      word4.f.PITCH_MSB = (encPitch >> 14) & 0x3;  // last 2 bits
    } else {
      word4.f.DEPTH =
        (image_array) // Doesn't hurt but isn't array_size already >0?
            ? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
            : (image_3d) ? image.desc.depth - 1 : 0;
    }

    word5.val = 0;
    word6.val = 0;
    word7.val = 0;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;
    image.srd[4] = word4.val;
    image.srd[5] = word5.val;
    image.srd[6] = word6.val;
    image.srd[7] = word7.val;

    image.row_pitch = row_pitch_size;
    image.slice_pitch = out.sliceSize;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerGfx12::ModifyImageSrd(
    Image& image, hsa_ext_image_format_t& new_format) const {
  image.desc.format = new_format;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_BUF_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
    word3->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
  } else {
    SQ_IMG_RSRC_WORD1* word1 =
        reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
    word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_IMG_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerGfx12::PopulateSamplerSrd(Sampler& sampler) const {
  const hsa_ext_sampler_descriptor_v2_t &sampler_descriptor = sampler.desc;

  SQ_IMG_SAMP_WORD0 word0;
  SQ_IMG_SAMP_WORD1 word1;
  SQ_IMG_SAMP_WORD2 word2;
  SQ_IMG_SAMP_WORD3 word3;

  word0.u32All = 0;
  hsa_status_t status = convertAddressMode<SQ_IMG_SAMP_WORD0, SQ_TEX_CLAMP>
                                       (word0, sampler_descriptor.address_modes);
  if (status != HSA_STATUS_SUCCESS) return status;
  word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode ==
                                  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);

  word1.u32All = 0;
  word1.bits.MAX_LOD = 4095;

  word2.u32All = 0;
  switch (sampler_descriptor.filter_mode) {
    case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
      break;
    case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER;
  word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE;
  word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE;

  word3.u32All = 0;

  // TODO: check this bit with HSAIL spec.
  word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK;

  sampler.srd[0] = word0.u32All;
  sampler.srd[1] = word1.u32All;
  sampler.srd[2] = word2.u32All;
  sampler.srd[3] = word3.u32All;

  return HSA_STATUS_SUCCESS;
}

uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    Image::TileMode tileMode,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
  const ImageProperty image_prop =
      GetImageProperty(component, desc.format, desc.geometry);

  const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);

  const uint32_t width = static_cast<uint32_t>(desc.width);
  const uint32_t height = static_cast<uint32_t>(desc.height);
  static const size_t kMinNumSlice = 1;
  const uint32_t num_slice = static_cast<uint32_t>(
      std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));

  ADDR3_COMPUTE_SURFACE_INFO_INPUT in = {0};
  in.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_INPUT);
  in.format = addrlib_format;
  in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
  in.width = width;
  in.height = height;
  in.numSlices = num_slice;
  in.pitchInElement = image_data_row_pitch / image_prop.element_size;

  switch (desc.geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
      in.resourceType = ADDR_RSRC_TEX_1D;
      break;

    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
      in.resourceType = ADDR_RSRC_TEX_2D;
      break;

    case HSA_EXT_IMAGE_GEOMETRY_3D:
      {
	in.resourceType = ADDR_RSRC_TEX_3D;
	/*
	 * 3D swizzle modes on GFX12 enforces alignment
	 * of the number of slices  to the block depth.
	 * If numSlices = 3 then the 3 slices are
	 * interleaved for 3D locality among the 8 slices
	 * that make up each block. This causes the memory
	 * footprint to jump from an ideal size of ~12 GB
	 * to ~32 GB.
	 * 'enable3DSwizzleMode' flag tests for env variable
	 * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
	 * 3D swizzle:
	 * true: Keep view3dAs2dArray = 0 for real 3D interleaving.
	 * false: Use view3dAs2dArray = 1 to avoid the alignment
	 *       expansion.
	 * 2D swizzle modes can lower size overhead but may yield
	 * suboptimal cache behavior for fully 3D volumetric
	 * operations.
	 */
	bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
	if (enable3DSwizzleMode)
	{
		in.flags.view3dAs2dArray = 0;
	}
	else
	{
		in.flags.view3dAs2dArray = 1;
	}
	break;
      }
  }

  in.flags.texture = 1;

  if (tileMode == Image::TileMode::LINEAR)
  {
    in.swizzleMode = ADDR3_LINEAR;
  } else {

    /*
     * AddrLib3 does not provide the best swizzle mode (unlike AddrLib2).
     * Instead, client has to request the list of possible swizzle mode and
     * then pick the best one for its needs (i.e. performance/space tradeoffs).
     *
     */
    ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT swOut = { 0 };
    swOut.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT);

    ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT swIn = { 0 };
    swIn.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT);
    swIn.flags = in.flags;
    swIn.resourceType = in.resourceType;
    swIn.bpp = in.bpp;
    swIn.width = in.width;
    swIn.height = in.height;
    swIn.numSlices = in.numSlices;
    swIn.numMipLevels = in.numMipLevels;
    swIn.numSamples = in.numSamples;
    /*
     * Cannot leave it to 0 like GFX11 Addr2GetPreferredSurfaceSetting method
     * as it triggers an ASSERT in AddrLib3 code.
     *
     * Setting it to 256K to allow for maximum number of swizzle mode in set
     * returned (similar behaviour as GFX11).
     *
     */
    swIn.maxAlign = 256 * 1024;


    if (ADDR_OK != Addr3GetPossibleSwizzleModes(addr_lib_, &swIn, &swOut)) {
      debug_print("Addr3GetPossibleSwizzleModes failed!\n");
      return (uint32_t) -1;
    }

    /*
     * Remove any modes that the client does not want (if any).
     */
    //swOut.validModes.sw***** = 0;


    /*
     * Pick the "best" swizzle mode.
     *
     * This algorithm is based on behaviour in GFX11 AddrLib and on
     * GFX12 code in PAL (that is also based on the GFX11 behaviour).
     *
     * Ratio variables control the extra space that can be used to get a larger
     * swizzle mode.
     *
     * ratioLow:ratioHi meanings:
     *
     *   2:1 ratio - same behaviour as GFX11.
     *   3:2 ratio - would be equivalent if flag opt4space in GFX11 (not used in ROCr)
     *   1:1 ratio - minimum size, not necessary best for performance
     *
     */
    const UINT_32 ratioLow = 2;
    const UINT_32 ratioHigh = 1;

    // Same behaviour as GFX11, remove linear if height is 1.
    if (in.height > 1) {
      swOut.validModes.swLinear = 0;
    }

    UINT_64 minSize = 0;
    Addr3SwizzleMode bestSwizzle = ADDR3_MAX_TYPE;

    for (uint32_t i = ADDR3_LINEAR; i < ADDR3_MAX_TYPE; i++) {

      if (swOut.validModes.value & (1 << i)) {
        ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
        localOut.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);

        in.swizzleMode = (Addr3SwizzleMode) i;

        if (ADDR_OK != Addr3ComputeSurfaceInfo(addr_lib_, &in, &localOut)) {
          // Should not happen, if it does, ignore this swizzle mode.
          debug_print("Addr3ComputeSurfaceInfo failed!\n");
          continue;
        }

        UINT_64 surfaceSize = localOut.surfSize;

        if (bestSwizzle == ADDR3_MAX_TYPE) {
          minSize = surfaceSize;
          bestSwizzle = (Addr3SwizzleMode) i;
        } else if ((surfaceSize * ratioHigh) <= (minSize * ratioLow)) {
          minSize = surfaceSize;
          bestSwizzle = (Addr3SwizzleMode) i;
        }
      }
    }

    if (bestSwizzle < ADDR3_MAX_TYPE) {
      in.swizzleMode = (Addr3SwizzleMode) bestSwizzle;
    } else {
      debug_print("Unable to find a valid swizzleMode for the surface!\n");
      return (uint32_t) -1;
    }
  }


  out.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);

  if (ADDR_OK != Addr3ComputeSurfaceInfo(addr_lib_, &in, &out)) {
    return (uint32_t)(-1);
  }
  if (out.surfSize == 0) {
    return (uint32_t)(-1);
  }

  return in.swizzleMode;
}

hsa_status_t ImageManagerGfx12::FillImage(const Image& image, const void* pattern,
                                       const hsa_ext_image_region_t& region) {
  if (BlitQueueInit().queue_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  Image* image_view = const_cast<Image*>(&image);

  SQ_BUF_RSRC_WORD3* word3_buff = NULL;
  SQ_IMG_RSRC_WORD3* word3_image = NULL;
  uint32_t dst_sel_w_original = 0;
  if (image_view->desc.format.channel_type ==
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) {
    // Force GPU to ignore the last two bits (alpha bits).
    if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
      word3_buff = reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_buff->bits.DST_SEL_W;
      word3_buff->bits.DST_SEL_W = SEL_0;
    } else {
      word3_image = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_image->bits.DST_SEL_W;
      word3_image->bits.DST_SEL_W = SEL_0;
    }
  }

  SQ_IMG_RSRC_WORD1* word1 = NULL;
  uint32_t num_format_original = 0;
  const void* new_pattern = pattern;
  float fill_value[4] = {0};
  switch (image_view->desc.format.channel_order) {
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: {
      // We do not have write support for SRGBA image, so convert pattern
      // to standard form and treat the image as RGBA image.
      const float* pattern_f = reinterpret_cast<const float*>(pattern);
      fill_value[0] = LinearToStandardRGB(pattern_f[0]);
      fill_value[1] = LinearToStandardRGB(pattern_f[1]);
      fill_value[2] = LinearToStandardRGB(pattern_f[2]);
      fill_value[3] = pattern_f[3];
      new_pattern = fill_value;

      ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);

      word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image_view->srd[1]);
      num_format_original = word1->bits.FORMAT;
      word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, TYPE_UNORM);
    } break;
    default:
      break;
  }

  hsa_status_t status = ImageRuntime::instance()->blit_kernel().FillImage(
      blit_queue_, blit_code_catalog_, *image_view, new_pattern, region);

  // Revert back original configuration.
  if (word3_buff != NULL) {
    word3_buff->bits.DST_SEL_W = dst_sel_w_original;
  }

  if (word3_image != NULL) {
    word3_image->bits.DST_SEL_W = dst_sel_w_original;
  }

  if (word1 != NULL) {
    word1->bits.FORMAT = num_format_original;
  }

  return status;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_manager_gfx12.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef EXT_IMAGE_IMAGE_MANAGER_GFX12_H_
#define EXT_IMAGE_IMAGE_MANAGER_GFX12_H_

#include "addrlib/inc/addrinterface.h"
#include "image_lut_gfx11.h"
#include "image_manager_kv.h"

namespace rocr {
namespace image {

class ImageManagerGfx12 : public ImageManagerKv {
 public:
  ImageManagerGfx12();
  virtual ~ImageManagerGfx12();

  /// @brief Calculate the size and alignment of the backing storage of an
  /// image.
  virtual hsa_status_t CalculateImageSizeAndAlignment(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
      hsa_ext_image_data_layout_t image_data_layout,
      size_t image_data_row_pitch, size_t image_data_slice_pitch,
      hsa_ext_image_data_info_t& image_info) const;

  /// @brief Fill image structure with device specific image object.
  virtual hsa_status_t PopulateImageSrd(Image& image) const;

  /// @brief Fill image structure with device specific image object using the given format.
  virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;

  /// @brief Modify device specific image object according to the specified
  /// new format.
  virtual hsa_status_t ModifyImageSrd(Image& image,
                                      hsa_ext_image_format_t& new_format) const;

  /// @brief Fill sampler structure with device specific sampler object.
  virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;

  /// @brief Fill image backing storage using agent copy.
  virtual hsa_status_t FillImage(const Image& image, const void* pattern,
                                 const hsa_ext_image_region_t& region);
 protected:
  uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
                             const hsa_ext_image_descriptor_t& desc,
                             Image::TileMode tileMode,
                             size_t image_data_row_pitch,
                             size_t image_data_slice_pitch,
                             ADDR3_COMPUTE_SURFACE_INFO_OUTPUT& out) const;

  bool IsLocalMemory(const void* address) const;
  virtual const ImageLutGfx11& ImageLut() const { return image_lut_gfx11; };

 private:
  ImageLutGfx11 image_lut_gfx11;
  DISALLOW_COPY_AND_ASSIGN(ImageManagerGfx12);
};

}  // namespace image
}  // namespace rocr
#endif  // EXT_IMAGE_IMAGE_MANAGER_GFX12_H_


================================================
FILE: runtime/hsa-runtime/image/image_manager_kv.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#define NOMINMAX
#include "image_manager_kv.h"

#include <assert.h>

#include <algorithm>
#include <climits>

#include "core/inc/runtime.h"
#include "hsakmt/hsakmt.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "core/inc/runtime.h"
#include "addrlib/inc/addrinterface.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_kv.h"
#include "util.h"
#include "device_info.h"

namespace rocr {
namespace image {

ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD3)

ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD3)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD4)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD5)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD6)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD7)

ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3)

ImageManagerKv::ImageManagerKv() : ImageManager() {}

ImageManagerKv::~ImageManagerKv() {}

hsa_status_t ImageManagerKv::Initialize(hsa_agent_t agent_handle) {
  agent_ = agent_handle;

  hsa_status_t status = GetGPUAsicID(agent_, &chip_id_);
  uint32_t major_ver = MajorVerFromDevID(chip_id_);
  assert(status == HSA_STATUS_SUCCESS);

  status = HSA::hsa_agent_get_info(
      agent_, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_ASIC_FAMILY_ID), &family_type_);
  assert(status == HSA_STATUS_SUCCESS);

  HsaGpuTileConfig tileConfig = {0};
  unsigned int tc[40];
  unsigned int mtc[40];
  tileConfig.TileConfig = &tc[0];
  tileConfig.NumTileConfigs = 40;
  tileConfig.MacroTileConfig = &mtc[0];
  tileConfig.NumMacroTileConfigs = 40;
  uint32_t node_id = 0;
  status = HSA::hsa_agent_get_info(
      agent_, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), &node_id);
  assert(status == HSA_STATUS_SUCCESS);
  hsa_status_t stat = HSA::hsa_get_tile_config(agent_handle, &tileConfig);
  assert(stat == HSA_STATUS_SUCCESS);

  // Initialize address library.
  // TODO(bwicakso) hard coded based on UGL parameters.
  // Need to get this information from KMD.
  addr_lib_ = NULL;
  ADDR_CREATE_INPUT addr_create_input = {0};
  ADDR_CREATE_OUTPUT addr_create_output = {0};

  if (major_ver >= 9) {
    addr_create_input.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
  } else {
    addr_create_input.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
  }

  addr_create_input.chipFamily = family_type_;
  addr_create_input.chipRevision = 0;  // TODO(bwicakso): find how to get this.

  ADDR_CREATE_FLAGS create_flags = {};
  create_flags.value = 0;
  create_flags.useTileIndex = 1;
  addr_create_input.createFlags = create_flags;

  addr_create_input.callbacks.allocSysMem = AllocSysMem;
  addr_create_input.callbacks.freeSysMem = FreeSysMem;
  addr_create_input.callbacks.debugPrint = 0;

  ADDR_REGISTER_VALUE reg_val = {0};
  reg_val.gbAddrConfig = tileConfig.GbAddrConfig;
  reg_val.noOfBanks = tileConfig.NumBanks;
  reg_val.noOfRanks = tileConfig.NumRanks;
  reg_val.pTileConfig = tileConfig.TileConfig;
  reg_val.noOfEntries = tileConfig.NumTileConfigs;
  reg_val.noOfMacroEntries = tileConfig.NumMacroTileConfigs;
  reg_val.pMacroTileConfig = tileConfig.MacroTileConfig;

  addr_create_input.regValue = reg_val;

  addr_create_input.minPitchAlignPixels = 0;

  ADDR_E_RETURNCODE addr_ret =
      AddrCreate(&addr_create_input, &addr_create_output);

  if (addr_ret == ADDR_OK) {
    addr_lib_ = addr_create_output.hLib;
  } else {
    return HSA_STATUS_ERROR;
  }

  // The ImageManagerKv::Initialize is called on the first call to
  // hsa_ext_image_*, so checking the coherency mode here is fine as long as
  // the change to the coherency mode happens before a call to
  // hsa_ext_image_create.
  hsa_amd_coherency_type_t coherency_type;
  status = AMD::hsa_amd_coherency_get_type(agent_, &coherency_type);
  assert(status == HSA_STATUS_SUCCESS);
  mtype_ = (coherency_type == HSA_AMD_COHERENCY_TYPE_COHERENT) ? 3 : 1;

  // TODO: handle the case where the call to hsa_set_memory_type happens after
  // hsa_ext_image_create.

  hsa_region_t local_region = {0};
  status = HSA::hsa_agent_iterate_regions(agent_, GetLocalMemoryRegion, &local_region);
  assert(status == HSA_STATUS_SUCCESS);

  local_memory_base_address_ = 0;
  if (local_region.handle != 0) {
    status = HSA::hsa_region_get_info(local_region,
                                      static_cast<hsa_region_info_t>(HSA_AMD_REGION_INFO_BASE),
                                      &local_memory_base_address_);
    assert(status == HSA_STATUS_SUCCESS);
  }

  // Zeroed the queue object so it can be created on demand.
  blit_queue_.queue_ = NULL;
  blit_queue_.cached_index_ = 0;

  return HSA_STATUS_SUCCESS;
}

void ImageManagerKv::Cleanup() {
  if (blit_queue_.queue_ != NULL) {
    HSA::hsa_queue_destroy(blit_queue_.queue_);
  }

  if (addr_lib_ != NULL) {
    AddrDestroy(addr_lib_);
  }
}

ImageProperty ImageManagerKv::GetImageProperty(
    hsa_agent_t component, const hsa_ext_image_format_t& format,
    hsa_ext_image_geometry_t geometry) const {
  return ImageLut().MapFormat(format, geometry);
}

void ImageManagerKv::GetImageInfoMaxDimension(hsa_agent_t component,
                                              hsa_ext_image_geometry_t geometry,
                                              uint32_t& width, uint32_t& height,
                                              uint32_t& depth,
                                              uint32_t& array_size) const {
  width = ImageLut().GetMaxWidth(geometry);
  height = ImageLut().GetMaxHeight(geometry);
  depth = ImageLut().GetMaxDepth(geometry);
  array_size = ImageLut().GetMaxArraySize(geometry);
}

hsa_status_t ImageManagerKv::CalculateImageSizeAndAlignment(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t& image_info) const {
  ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
  hsa_profile_t profile;

  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
  if (status != HSA_STATUS_SUCCESS) return status;

  Image::TileMode tileMode = Image::TileMode::LINEAR;
  if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
    tileMode = (profile == HSA_PROFILE_BASE &&
                desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
      Image::TileMode::TILED : Image::TileMode::LINEAR;
  }
  if (!GetAddrlibSurfaceInfo(component, desc, tileMode,
        image_data_row_pitch, image_data_slice_pitch, out)) {
    return HSA_STATUS_ERROR;
  }

  size_t rowPitch   = (out.bpp >> 3) * out.pitch;
  size_t slicePitch = rowPitch * out.height;
  if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
      image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
      ((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
       (image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
    return static_cast<hsa_status_t>(HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
  }

  image_info.size = out.surfSize;
  assert(image_info.size != 0);
  image_info.alignment = out.baseAlign;
  assert(image_info.alignment != 0);

  return HSA_STATUS_SUCCESS;
}

static const uint64_t kLimitSystem = 1ULL << 48;

bool ImageManagerKv::IsLocalMemory(const void* address) const {
  uintptr_t u_address = reinterpret_cast<uintptr_t>(address);

  uint32_t major_ver = MajorVerFromDevID(chip_id_);

  if (major_ver >= 8) {
    return true;
  }
#ifdef HSA_LARGE_MODEL
  // Fast path without querying local memory region info.
  // User mode system memory addressable by CPU is 0 to 2^48.
  return (u_address >= kLimitSystem);
#else
  // No local memory on 32 bit.
  return false;
#endif
}

hsa_status_t ImageManagerKv::PopulateImageSrd(Image& image, const metadata_amd_t* descriptor) const {
  metadata_amd_ci_vi_t* desc = (metadata_amd_ci_vi_t*)descriptor;
  bool atc_access = true;
  uint32_t mtype = mtype_;
  const void* image_data_addr = image.data;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  if((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
     (image_prop.element_size == 0))
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

  uint32_t hwPixelSize =
      ImageLut().GetPixelSize(desc->word1.bitfields.data_format, desc->word1.bitfields.num_format);
  if(image_prop.element_size!=hwPixelSize)
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

  const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);

  if (IsLocalMemory(image.data)) {
    atc_access = false;
    mtype = 1;
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
  }

  image.srd[0]=desc->word0.u32_all;
  image.srd[1]=desc->word1.u32_all;
  image.srd[2]=desc->word2.u32_all;
  image.srd[3]=desc->word3.u32_all;
  image.srd[4]=desc->word4.u32_all;
  image.srd[5]=desc->word5.u32_all;
  image.srd[6]=desc->word6.u32_all;
  image.srd[7]=desc->word7.u32_all;

  ((SQ_IMG_RSRC_WORD0*)(&image.srd[0]))->bits.base_address = PtrLow40Shift8(image_data_addr);
  ((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.base_address_hi = PtrHigh64Shift40(image_data_addr);
  ((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.data_format = image_prop.data_format;
  ((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.num_format = image_prop.data_type;
  ((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.mtype = mtype;
  ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.atc=atc_access;
  ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_x = swizzle.x;
  ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_y = swizzle.y;
  ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_z = swizzle.z;
  ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_w = swizzle.w;
  ((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.meta_data_address += PtrLow40Shift8(image_data_addr);

  //Looks like this is only used for CPU copies.
  image.row_pitch = (desc->word4.bits.pitch+1)*image_prop.element_size;
  image.slice_pitch = image.row_pitch * (desc->word2.bits.height+1);

  //Used by HSAIL shader ABI
  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerKv::PopulateImageSrd(Image& image) const {
  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  bool atc_access = true;
  uint32_t mtype = mtype_;
  const void* image_data_addr = image.data;

  if (IsLocalMemory(image.data)) {
    atc_access = false;
    mtype = 1;
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
  }

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    SQ_BUF_RSRC_WORD0 word0;
    SQ_BUF_RSRC_WORD1 word1;
    SQ_BUF_RSRC_WORD2 word2;
    SQ_BUF_RSRC_WORD3 word3;

    word0.u32_all = 0;
    word0.bits.base_address = PtrLow32(image_data_addr);

    word1.u32_all = 0;
    word1.bits.base_address_hi = PtrHigh32(image_data_addr);
    word1.bits.stride = image_prop.element_size;
    word1.bits.swizzle_enable = false;
    word1.bits.cache_swizzle = false;

    uint32_t major_ver = MajorVerFromDevID(chip_id_);
    word2.bits.num_records = (major_ver < 8) ?
                image.desc.width : image.desc.width * image_prop.element_size;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.u32_all = 0;
    word3.bits.dst_sel_x = swizzle.x;
    word3.bits.dst_sel_y = swizzle.y;
    word3.bits.dst_sel_z = swizzle.z;
    word3.bits.dst_sel_w = swizzle.w;
    word3.bits.num_format = image_prop.data_type;
    word3.bits.data_format = image_prop.data_format;
    word3.bits.atc = atc_access;
    word3.bits.element_size = image_prop.element_size;
    word3.bits.type = ImageLut().MapGeometry(image.desc.geometry);
    word3.bits.mtype = mtype;

    image.srd[0] = word0.u32_all;
    image.srd[1] = word1.u32_all;
    image.srd[2] = word2.u32_all;
    image.srd[3] = word3.u32_all;

    image.row_pitch = image.desc.width * image_prop.element_size;
    image.slice_pitch = image.row_pitch;
  } else {
    SQ_IMG_RSRC_WORD0 word0;
    SQ_IMG_RSRC_WORD1 word1;
    SQ_IMG_RSRC_WORD2 word2;
    SQ_IMG_RSRC_WORD3 word3;
    SQ_IMG_RSRC_WORD4 word4;
    SQ_IMG_RSRC_WORD5 word5;
    SQ_IMG_RSRC_WORD6 word6;
    SQ_IMG_RSRC_WORD7 word7;

    ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
    if (!GetAddrlibSurfaceInfo(image.component, image.desc, image.tile_mode,
          image.row_pitch, image.slice_pitch, out)) {
      return HSA_STATUS_ERROR;
    }

    assert((out.bpp / 8) == image_prop.element_size);

    const size_t row_pitch_size = out.pitch * image_prop.element_size;

    word0.bits.base_address = PtrLow40Shift8(image_data_addr);

    word1.u32_all = 0;
    word1.bits.base_address_hi = PtrHigh64Shift40(image_data_addr);
    word1.bits.min_lod = 0;
    word1.bits.data_format = image_prop.data_format;
    word1.bits.num_format = image_prop.data_type;
    word1.bits.mtype = mtype;

    word2.u32_all = 0;
    word2.bits.width = image.desc.width - 1;
    word2.bits.height = image.desc.height - 1;
    word2.bits.perf_mod = 0;
    word2.bits.interlaced = 0;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.u32_all = 0;
    word3.bits.dst_sel_x = swizzle.x;
    word3.bits.dst_sel_y = swizzle.y;
    word3.bits.dst_sel_z = swizzle.z;
    word3.bits.dst_sel_w = swizzle.w;
    word3.bits.tiling_index = out.tileIndex;
    word3.bits.pow2_pad = (IsPowerOfTwo(row_pitch_size) && IsPowerOfTwo(image.desc.height)) ? 1 : 0;
    word3.bits.type = ImageLut().MapGeometry(image.desc.geometry);
    word3.bits.atc = atc_access;

    const bool image_array =
        (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
    const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);

    word4.u32_all = 0;
    word4.bits.depth =
        (image_array)
            ? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
            : (image_3d) ? image.desc.depth - 1 : 0;
    word4.bits.pitch = out.pitch - 1;

    word5.u32_all = 0;
    word5.bits.last_array =
        (image_array)
            ? (std::max(image.desc.array_size, static_cast<size_t>(1)) - 1)
            : 0;

    word6.u32_all = 0;
    word7.u32_all = 0;

    image.srd[0] = word0.u32_all;
    image.srd[1] = word1.u32_all;
    image.srd[2] = word2.u32_all;
    image.srd[3] = word3.u32_all;
    image.srd[4] = word4.u32_all;
    image.srd[5] = word5.u32_all;
    image.srd[6] = word6.u32_all;
    image.srd[7] = word7.u32_all;

    image.row_pitch = row_pitch_size;
    image.slice_pitch = out.sliceSize;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerKv::ModifyImageSrd(
    Image& image, hsa_ext_image_format_t& new_format) const {
  image.desc.format = new_format;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_BUF_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.dst_sel_x = swizzle.x;
    word3->bits.dst_sel_y = swizzle.y;
    word3->bits.dst_sel_z = swizzle.z;
    word3->bits.dst_sel_w = swizzle.w;
    word3->bits.num_format = image_prop.data_type;
    word3->bits.data_format = image_prop.data_format;
  } else {
    SQ_IMG_RSRC_WORD1* word1 =
        reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
    word1->bits.data_format = image_prop.data_format;
    word1->bits.num_format = image_prop.data_type;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_IMG_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.dst_sel_x = swizzle.x;
    word3->bits.dst_sel_y = swizzle.y;
    word3->bits.dst_sel_z = swizzle.z;
    word3->bits.dst_sel_w = swizzle.w;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerKv::PopulateSamplerSrd(Sampler& sampler) const {
  const hsa_ext_sampler_descriptor_v2_t &sampler_descriptor = sampler.desc;

  SQ_IMG_SAMP_WORD0 word0;
  SQ_IMG_SAMP_WORD1 word1;
  SQ_IMG_SAMP_WORD2 word2;
  SQ_IMG_SAMP_WORD3 word3;

  word0.u32_all = 0;
  hsa_status_t status = convertAddressMode<SQ_IMG_SAMP_WORD0, SQ_TEX_CLAMP>
                                         (word0, sampler_descriptor.address_modes);
  if (status != HSA_STATUS_SUCCESS) return status;
  word0.bits.force_unormalized = (sampler_descriptor.coordinate_mode ==
                                  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);

  word1.u32_all = 0;
  word1.bits.max_lod = 4095;

  word2.u32_all = 0;
  switch (sampler_descriptor.filter_mode) {
    case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
      word2.bits.xy_mag_filter = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
      break;
    case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
      word2.bits.xy_mag_filter = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  word2.bits.xy_min_filter = word2.bits.xy_mag_filter;
  word2.bits.z_filter = SQ_TEX_Z_FILTER_NONE;
  word2.bits.mip_filter = SQ_TEX_MIP_FILTER_NONE;

  word3.u32_all = 0;

  // TODO: check this bit with HSAIL spec.
  word3.bits.border_color_type = SQ_TEX_BORDER_COLOR_TRANS_BLACK;

  sampler.srd[0] = word0.u32_all;
  sampler.srd[1] = word1.u32_all;
  sampler.srd[2] = word2.u32_all;
  sampler.srd[3] = word3.u32_all;

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerKv::CopyBufferToImage(
    const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
    const Image& dst_image, const hsa_ext_image_region_t& image_region) {
  if (BlitQueueInit().queue_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  return ImageRuntime::instance()->blit_kernel().CopyBufferToImage(
      blit_queue_, blit_code_catalog_, src_memory, src_row_pitch, src_slice_pitch, dst_image,
      image_region);
}

hsa_status_t ImageManagerKv::CopyImageToBuffer(
    const Image& src_image, void* dst_memory, size_t dst_row_pitch,
    size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region) {
  if (BlitQueueInit().queue_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  return ImageRuntime::instance()->blit_kernel().CopyImageToBuffer(
      blit_queue_, blit_code_catalog_, src_image, dst_memory, dst_row_pitch, dst_slice_pitch,
      image_region);
}

hsa_status_t ImageManagerKv::CopyImage(const Image& dst_image,
                                       const Image& src_image,
                                       const hsa_dim3_t& dst_origin,
                                       const hsa_dim3_t& src_origin,
                                       const hsa_dim3_t size) {
  if (BlitQueueInit().queue_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  const hsa_ext_image_format_t src_format = src_image.desc.format;
  const hsa_ext_image_channel_order32_t src_order = src_format.channel_order;
  const hsa_ext_image_channel_type32_t src_type = src_format.channel_type;

  const hsa_ext_image_format_t dst_format = dst_image.desc.format;
  const hsa_ext_image_channel_order32_t dst_order = dst_format.channel_order;
  const hsa_ext_image_channel_type32_t dst_type = dst_format.channel_type;

  BlitKernel::KernelOp copy_type = BlitKernel::KERNEL_OP_COPY_IMAGE_DEFAULT;

  if ((src_order == dst_order) && (src_type == dst_type)) {
    return ImageRuntime::instance()->blit_kernel().CopyImage(blit_queue_, blit_code_catalog_,
                                                             dst_image, src_image, dst_origin,
                                                             src_origin, size, copy_type);
  }

  // Source and destination format must be the same, except for
  // SRGBA <--> RGBA images.
  if ((src_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8) &&
      (dst_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8)) {
    if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) &&
        (dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA)) {
      copy_type = BlitKernel::KERNEL_OP_COPY_IMAGE_STANDARD_TO_LINEAR;
    } else if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA) &&
               (dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA)) {
      copy_type = BlitKernel::KERNEL_OP_COPY_IMAGE_LINEAR_TO_STANDARD;
    }

    if (copy_type != BlitKernel::KERNEL_OP_COPY_IMAGE_DEFAULT) {
      // KV and CZ don't have write support for SRGBA image, so treat the
      // destination image as RGBA image.
      SQ_IMG_RSRC_WORD1* word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(
          &const_cast<Image&>(dst_image).srd[1]);

      // Destination can be linear or standard, preserve the original value.
      uint32_t num_format_original = word1->bits.num_format;
      word1->bits.num_format = TYPE_UNORM;

      hsa_status_t status = ImageRuntime::instance()->blit_kernel().CopyImage(
          blit_queue_, blit_code_catalog_, dst_image, src_image, dst_origin, src_origin, size,
          copy_type);

      // Revert to the original format after the copy operation is finished.
      word1->bits.num_format = num_format_original;

      return status;
    }
  }

  return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}

hsa_status_t ImageManagerKv::FillImage(const Image& image, const void* pattern,
                                       const hsa_ext_image_region_t& region) {
  if (BlitQueueInit().queue_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  Image* image_view = const_cast<Image*>(&image);

  SQ_BUF_RSRC_WORD3* word3_buff = NULL;
  SQ_IMG_RSRC_WORD3* word3_image = NULL;
  uint32_t dst_sel_w_original = 0;
  if (image_view->desc.format.channel_type ==
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) {
    // Force GPU to ignore the last two bits (alpha bits).
    if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
      word3_buff = reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_buff->bits.dst_sel_w;
      word3_buff->bits.dst_sel_w = SEL_0;
    } else {
      word3_image = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_image->bits.dst_sel_w;
      word3_image->bits.dst_sel_w = SEL_0;
    }
  }

  SQ_IMG_RSRC_WORD1* word1 = NULL;
  uint32_t num_format_original = 0;
  const void* new_pattern = pattern;
  float fill_value[4] = {0};
  switch (image_view->desc.format.channel_order) {
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: {
      // KV and CZ don't have write support for SRGBA image, so convert pattern
      // to standard form and treat the image as RGBA image.
      const float* pattern_f = reinterpret_cast<const float*>(pattern);
      fill_value[0] = LinearToStandardRGB(pattern_f[0]);
      fill_value[1] = LinearToStandardRGB(pattern_f[1]);
      fill_value[2] = LinearToStandardRGB(pattern_f[2]);
      fill_value[3] = pattern_f[3];
      new_pattern = fill_value;

      word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image_view->srd[1]);
      num_format_original = word1->bits.num_format;
      word1->bits.num_format = TYPE_UNORM;
    } break;
    default:
      break;
  }

  hsa_status_t status = ImageRuntime::instance()->blit_kernel().FillImage(
      blit_queue_, blit_code_catalog_, *image_view, new_pattern, region);

  // Revert back original configuration.
  if (word3_buff != NULL) {
    word3_buff->bits.dst_sel_w = dst_sel_w_original;
  }

  if (word3_image != NULL) {
    word3_image->bits.dst_sel_w = dst_sel_w_original;
  }

  if (word1 != NULL) {
    word1->bits.num_format = num_format_original;
  }

  return status;
}

hsa_status_t ImageManagerKv::GetLocalMemoryRegion(hsa_region_t region,
                                                  void* data) {
  if (data == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_region_segment_t segment;
  hsa_status_t stat = HSA::hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment);
  if (stat != HSA_STATUS_SUCCESS) {
    return stat;
  }

  if (segment != HSA_REGION_SEGMENT_GLOBAL) {
    return HSA_STATUS_SUCCESS;
  }

  uint32_t base = 0;
  stat = HSA::hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &base);
  if (stat != HSA_STATUS_SUCCESS) {
    return stat;
  }

  if ((base & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) != 0) {
    hsa_region_t* local_memory_region = (hsa_region_t*)data;
    *local_memory_region = region;
  }

  return HSA_STATUS_SUCCESS;
}

AddrFormat ImageManagerKv::GetAddrlibFormat(const ImageProperty& image_prop) {
  switch (image_prop.data_format) {
    case FMT_8:
      return ADDR_FMT_8;
      break;
    case FMT_16:
      return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_16
                                                  : ADDR_FMT_16_FLOAT;
      break;
    case FMT_8_8:
      return ADDR_FMT_8_8;
      break;
    case FMT_32:
      return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_32
                                                  : ADDR_FMT_32_FLOAT;
      break;
    case FMT_16_16:
      return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_16_16
                                                  : ADDR_FMT_16_16_FLOAT;
      break;
    case FMT_2_10_10_10:
      return ADDR_FMT_2_10_10_10;
      break;
    case FMT_8_8_8_8:
      return ADDR_FMT_8_8_8_8;
      break;
    case FMT_32_32:
      return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_32_32
                                                  : ADDR_FMT_32_32_FLOAT;
      break;
    case FMT_16_16_16_16:
      return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_16_16_16_16
                                                  : ADDR_FMT_16_16_16_16_FLOAT;
      break;
    case FMT_32_32_32_32:
      return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_32_32_32_32
                                                  : ADDR_FMT_32_32_32_32_FLOAT;
      break;
    case FMT_5_6_5:
      return ADDR_FMT_5_6_5;
      break;
    case FMT_1_5_5_5:
      return ADDR_FMT_1_5_5_5;
      break;
    case FMT_8_24:
      return ADDR_FMT_8_24;
      break;
    default:
      assert(false && "Should not reach here");
      return ADDR_FMT_INVALID;
      break;
  }

  assert(false && "Should not reach here");
  return ADDR_FMT_INVALID;
}

VOID* ADDR_API
    ImageManagerKv::AllocSysMem(const ADDR_ALLOCSYSMEM_INPUT* input) {
  return malloc(input->sizeInBytes);
}

ADDR_E_RETURNCODE ADDR_API
    ImageManagerKv::FreeSysMem(const ADDR_FREESYSMEM_INPUT* input) {
  free(input->pVirtAddr);

  return ADDR_OK;
}

bool ImageManagerKv::GetAddrlibSurfaceInfo(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    Image::TileMode tileMode,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    ADDR_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
  const ImageProperty image_prop =
      GetImageProperty(component, desc.format, desc.geometry);

  const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);

  const uint32_t width = static_cast<uint32_t>(desc.width);
  const uint32_t height = static_cast<uint32_t>(desc.height);
  static const size_t kMinNumSlice = 1;
  const uint32_t num_slice = static_cast<uint32_t>(
      std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));

  uint32_t major_ver = MajorVerFromDevID(chip_id_);

  if (major_ver >= 9) {
    ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
    in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
    in.format = addrlib_format;
    in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
    in.width = width;
    in.height = height;
    in.numSlices = num_slice;
    in.pitchInElement = image_data_row_pitch / image_prop.element_size;
    switch(desc.geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
      in.resourceType = ADDR_RSRC_TEX_1D;
      break;
    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
      in.resourceType = ADDR_RSRC_TEX_2D;
      break;
    case HSA_EXT_IMAGE_GEOMETRY_3D:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
      {
	      in.resourceType = ADDR_RSRC_TEX_3D;
	      /*
	       * 3D swizzle modes enforce alignment
	       * of the number of slices  to the block depth.
	       * If numSlices = 3 then the 3 slices are
	       * interleaved for 3D locality among the 8 slices
	       * that make up each block. This causes the memory
	       * footprint to jump to a 3x size of the ideal size
	       * 'enable3DSwizzleMode' flag tests for env variable
	       * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
	       * 3D swizzle:
	       * true: Keep view3dAs2dArray = 0 for real 3D interleaving.
	       * false: Use view3dAs2dArray = 1 to avoid the alignment
	       *       expansion.
	       * 2D swizzle modes can lower size overhead but may yield
	       * suboptimal cache behavior for fully 3D volumetric
	       * operations.
	       */
	      bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
	      if (enable3DSwizzleMode)
		      in.flags.view3dAs2dArray = 0;
	      else
		      in.flags.view3dAs2dArray = 1;

	      break;
      }
    }
    in.flags.texture = 1;

    ADDR2_GET_PREFERRED_SURF_SETTING_INPUT  prefSettingsInput = { 0 };
    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 };

    prefSettingsInput.size            = sizeof(prefSettingsInput);
    prefSettingsInput.flags           = in.flags;
    prefSettingsInput.bpp             = in.bpp;
    prefSettingsInput.format          = in.format;
    prefSettingsInput.width           = in.width;
    prefSettingsInput.height          = in.height;
    prefSettingsInput.numFrags        = in.numFrags;
    prefSettingsInput.numSamples      = in.numSamples;
    prefSettingsInput.numMipLevels    = in.numMipLevels;
    prefSettingsInput.numSlices       = in.numSlices;
    prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF;
    prefSettingsInput.resourceType    = in.resourceType;

    // Disallow all swizzles but linear.
    if (tileMode == Image::TileMode::LINEAR)
    {
      prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
    }

    prefSettingsInput.forbiddenBlock.micro = 1; // but don't ever allow the 256b swizzle modes
    prefSettingsInput.forbiddenBlock.var = 1; // and don't allow variable-size block modes

    if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_, &prefSettingsInput, &prefSettingsOutput)) {
      return false;
    }

    in.swizzleMode = prefSettingsOutput.swizzleMode;

    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out2 = {0};
    out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
    if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out2)) {
      return false;
    }
    out.pitch = out2.pitch;
    out.height = out2.height;
    out.surfSize = out2.surfSize;
    out.bpp = out2.bpp;
    out.baseAlign = out2.baseAlign;
    out.tileIndex = in.swizzleMode;
    out.sliceSize = out2.sliceSize;
    return true;
  }

  ADDR_COMPUTE_SURFACE_INFO_INPUT in = {0};
  in.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
  in.tileMode = (tileMode == Image::TileMode::LINEAR)?
    ADDR_TM_LINEAR_ALIGNED : ADDR_TM_2D_TILED_THIN1;
  in.format = addrlib_format;
  in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
  in.numSamples = 1;
  in.width = width;
  in.height = height;
  in.numSlices = num_slice;
  in.flags.texture = 1;
  in.flags.noStencil = 1;
  in.flags.opt4Space = 0;
  in.tileType = ADDR_NON_DISPLAYABLE;
  in.tileIndex = -1;

  if (image_data_row_pitch != 0) {
    in.width = image_data_row_pitch / image_prop.element_size;
//    in.pitchAlign  = image_data_row_pitch / image_prop.element_size;
//    in.heightAlign = image_data_slice_pitch / image_data_row_pitch;
  }

  if (ADDR_OK != AddrComputeSurfaceInfo(addr_lib_, &in, &out)) {
    return false;
  }

  assert(out.tileIndex != -1);

  return (out.tileIndex != -1) ? true : false;
}

size_t ImageManagerKv::CalWorkingSizeBytes(hsa_ext_image_geometry_t geometry,
                                           hsa_dim3_t size_pixel,
                                           uint32_t element_size) const {
  switch (geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
      return size_pixel.x * element_size;
    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
      return size_pixel.x * size_pixel.y * element_size;
    default:
      return size_pixel.x * size_pixel.y * size_pixel.z * element_size;
  }
}

BlitQueue& ImageManagerKv::BlitQueueInit() {
  if (blit_queue_.queue_ == NULL) {
    // Queue is a precious resource, so only create it when it is needed.
    std::lock_guard<std::mutex> lock(lock_);
    if (blit_queue_.queue_ == NULL) {
      // Create the kernel queue.
      blit_queue_.cached_index_ = 0;

      uint32_t max_queue_size = 0;
      hsa_status_t status =
          HSA::hsa_agent_get_info(agent_, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size);

      status = HSA::hsa_queue_create(agent_, max_queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
                                     UINT_MAX, UINT_MAX, &blit_queue_.queue_);

      if (HSA_STATUS_SUCCESS != status) {
        blit_queue_.queue_ = NULL;
        return blit_queue_;
      }

      // Get the kernel handles.
      status = ImageRuntime::instance()->blit_kernel().BuildBlitCode(agent_, blit_code_catalog_);

      if (HSA_STATUS_SUCCESS != status) {
        blit_code_catalog_.clear();
        HSA::hsa_queue_destroy(blit_queue_.queue_);
        blit_queue_.queue_ = NULL;
        return blit_queue_;
      }
    }
  }

  assert(blit_queue_.queue_ != NULL &&
         blit_code_catalog_.size() == BlitKernel::KERNEL_OP_COUNT);

  return blit_queue_;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_manager_kv.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_KV_H
#define HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_KV_H

#include "addrlib/inc/addrinterface.h"
#include "blit_kernel.h"
#include "image_lut_kv.h"
#include "image_manager.h"

namespace rocr {
namespace image {

class ImageManagerKv : public ImageManager {
 public:
  explicit ImageManagerKv();
  virtual ~ImageManagerKv();

  virtual hsa_status_t Initialize(hsa_agent_t agent_handle);

  virtual void Cleanup();

  /// @brief Retrieve device specific image property of a certain format
  /// and geometry.
  virtual ImageProperty GetImageProperty(
      hsa_agent_t component, const hsa_ext_image_format_t& format,
      hsa_ext_image_geometry_t geometry) const;

  /// @brief Retrieve device specific supported max width, height, depth,
  /// and array size of an image geometry.
  virtual void GetImageInfoMaxDimension(hsa_agent_t component,
                                        hsa_ext_image_geometry_t geometry,
                                        uint32_t& width, uint32_t& height,
                                        uint32_t& depth,
                                        uint32_t& array_size) const;

  /// @brief Calculate the size and alignment of the backing storage of an
  /// image.
  virtual hsa_status_t CalculateImageSizeAndAlignment(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
      hsa_ext_image_data_layout_t image_data_layout,
      size_t image_data_row_pitch, size_t image_data_slice_pitch,
      hsa_ext_image_data_info_t& image_info) const;

  /// @brief Fill image structure with device specific image object.
  virtual hsa_status_t PopulateImageSrd(Image& image) const;

  /// @brief Fill image structure with device specific image object using the given format.
  virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;

  /// @brief Modify device specific image object according to the specified
  /// new format.
  virtual hsa_status_t ModifyImageSrd(Image& image,
                                      hsa_ext_image_format_t& new_format) const;

  /// @brief Fill sampler structure with device specific sampler object.
  virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;

  // @brief Copy the content of a linear memory to an image object.
  virtual hsa_status_t CopyBufferToImage(
      const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
      const Image& dst_image, const hsa_ext_image_region_t& image_region);

  /// @brief Copy the content of an image object to a linear memory.
  virtual hsa_status_t CopyImageToBuffer(
      const Image& src_image, void* dst_memory, size_t dst_row_pitch,
      size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region);

  /// @brief Transfer images backing storage using agent copy.
  virtual hsa_status_t CopyImage(const Image& dst_image, const Image& src_image,
                                 const hsa_dim3_t& dst_origin,
                                 const hsa_dim3_t& src_origin,
                                 const hsa_dim3_t size);

  /// @brief Fill image backing storage using agent copy.
  virtual hsa_status_t FillImage(const Image& image, const void* pattern,
                                 const hsa_ext_image_region_t& region);

 protected:
  static hsa_status_t GetLocalMemoryRegion(hsa_region_t region, void* data);

  static AddrFormat GetAddrlibFormat(const ImageProperty& image_prop);

  static VOID* ADDR_API AllocSysMem(const ADDR_ALLOCSYSMEM_INPUT* input);

  static ADDR_E_RETURNCODE ADDR_API
      FreeSysMem(const ADDR_FREESYSMEM_INPUT* input);

  bool GetAddrlibSurfaceInfo(hsa_agent_t component,
                             const hsa_ext_image_descriptor_t& desc,
                             Image::TileMode tileMode,
                             size_t image_data_row_pitch,
                             size_t image_data_slice_pitch,
                             ADDR_COMPUTE_SURFACE_INFO_OUTPUT& out) const;

  size_t CalWorkingSizeBytes(hsa_ext_image_geometry_t geometry,
                             hsa_dim3_t size_pixel,
                             uint32_t element_size) const;

  virtual bool IsLocalMemory(const void* address) const;

  BlitQueue& BlitQueueInit();

  virtual const ImageLutKv& ImageLut() const { return image_lut_; };

  ADDR_HANDLE addr_lib_;

  hsa_agent_t agent_;

  uint32_t family_type_;

  uint32_t chip_id_;

  BlitQueue blit_queue_;

  std::vector<BlitCodeInfo> blit_code_catalog_;

  uint32_t mtype_;

  uintptr_t local_memory_base_address_;

  std::mutex lock_;

 private:
  ImageLutKv image_lut_;
  DISALLOW_COPY_AND_ASSIGN(ImageManagerKv);
};

}  // namespace image
}  // namespace rocr
#endif  // HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_KV_H


================================================
FILE: runtime/hsa-runtime/image/image_manager_nv.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#define NOMINMAX
#include "image_manager_nv.h"

#include <assert.h>

#include <algorithm>
#include <climits>

#include "core/inc/runtime.h"
#include "inc/hsa_ext_amd.h"
#include "core/inc/hsa_internal.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_nv.h"
#include "util.h"
#include "device_info.h"

namespace rocr {
namespace image {

ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD3)

ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD3)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD4)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD5)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD6)
ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD7)

ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD0)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD1)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD2)
ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3)

//-----------------------------------------------------------------------------
// Workaround switch to combined format/type codes and missing gfx10
// specific look up table.  Only covers types used in image_lut_kv.cpp.
//-----------------------------------------------------------------------------
struct formatconverstion_t {
  FMT fmt;
  type type;
  FORMAT format;
};

// Format/Type to combined format code table.
// Sorted and indexed to allow fast searches.
static const formatconverstion_t FormatLUT[] = {
    {FMT_1_5_5_5, TYPE_UNORM, CFMT_1_5_5_5_UNORM},          // 0
    {FMT_10_10_10_2, TYPE_UNORM, CFMT_10_10_10_2_UNORM},    // 1
    {FMT_10_10_10_2, TYPE_SNORM, CFMT_10_10_10_2_SNORM},    // 2
    {FMT_10_10_10_2, TYPE_UINT, CFMT_10_10_10_2_UINT},      // 3
    {FMT_10_10_10_2, TYPE_SINT, CFMT_10_10_10_2_SINT},      // 4
    {FMT_16, TYPE_UNORM, CFMT_16_UNORM},                    // 5
    {FMT_16, TYPE_SNORM, CFMT_16_SNORM},                    // 6
    {FMT_16, TYPE_UINT, CFMT_16_UINT},                      // 7
    {FMT_16, TYPE_SINT, CFMT_16_SINT},                      // 8
    {FMT_16, TYPE_FLOAT, CFMT_16_FLOAT},                    // 9
    {FMT_16_16, TYPE_UNORM, CFMT_16_16_UNORM},              // 10
    {FMT_16_16, TYPE_SNORM, CFMT_16_16_SNORM},              // 11
    {FMT_16_16, TYPE_UINT, CFMT_16_16_UINT},                // 12
    {FMT_16_16, TYPE_SINT, CFMT_16_16_SINT},                // 13
    {FMT_16_16, TYPE_FLOAT, CFMT_16_16_FLOAT},              // 14
    {FMT_16_16_16_16, TYPE_UNORM, CFMT_16_16_16_16_UNORM},  // 15
    {FMT_16_16_16_16, TYPE_SNORM, CFMT_16_16_16_16_SNORM},  // 16
    {FMT_16_16_16_16, TYPE_UINT, CFMT_16_16_16_16_UINT},    // 17
    {FMT_16_16_16_16, TYPE_SINT, CFMT_16_16_16_16_SINT},    // 18
    {FMT_16_16_16_16, TYPE_FLOAT, CFMT_16_16_16_16_FLOAT},  // 19
    {FMT_2_10_10_10, TYPE_UNORM, CFMT_2_10_10_10_UNORM},    // 20
    {FMT_2_10_10_10, TYPE_SNORM, CFMT_2_10_10_10_SNORM},    // 21
    {FMT_2_10_10_10, TYPE_UINT, CFMT_2_10_10_10_UINT},      // 22
    {FMT_2_10_10_10, TYPE_SINT, CFMT_2_10_10_10_SINT},      // 23
    {FMT_24_8, TYPE_UNORM, CFMT_24_8_UNORM},                // 24
    {FMT_24_8, TYPE_UINT, CFMT_24_8_UINT},                  // 25
    {FMT_32, TYPE_UINT, CFMT_32_UINT},                      // 26
    {FMT_32, TYPE_SINT, CFMT_32_SINT},                      // 27
    {FMT_32, TYPE_FLOAT, CFMT_32_FLOAT},                    // 28
    {FMT_32_32, TYPE_UINT, CFMT_32_32_UINT},                // 29
    {FMT_32_32, TYPE_SINT, CFMT_32_32_SINT},                // 30
    {FMT_32_32, TYPE_FLOAT, CFMT_32_32_FLOAT},              // 31
    {FMT_32_32_32, TYPE_UINT, CFMT_32_32_32_UINT},          // 32
    {FMT_32_32_32, TYPE_SINT, CFMT_32_32_32_SINT},          // 33
    {FMT_32_32_32, TYPE_FLOAT, CFMT_32_32_32_FLOAT},        // 34
    {FMT_32_32_32_32, TYPE_UINT, CFMT_32_32_32_32_UINT},    // 35
    {FMT_32_32_32_32, TYPE_SINT, CFMT_32_32_32_32_SINT},    // 36
    {FMT_32_32_32_32, TYPE_FLOAT, CFMT_32_32_32_32_FLOAT},  // 37
    {FMT_5_5_5_1, TYPE_UNORM, CFMT_5_5_5_1_UNORM},          // 38
    {FMT_5_6_5, TYPE_UNORM, CFMT_5_6_5_UNORM},              // 39
    {FMT_8, TYPE_UNORM, CFMT_8_UNORM},                      // 40
    {FMT_8, TYPE_SNORM, CFMT_8_SNORM},                      // 41
    {FMT_8, TYPE_UINT, CFMT_8_UINT},                        // 42
    {FMT_8, TYPE_SINT, CFMT_8_SINT},                        // 43
    {FMT_8, TYPE_SRGB, CFMT_8_SRGB},                        // 44
    {FMT_8_24, TYPE_UNORM, CFMT_8_24_UNORM},                // 45
    {FMT_8_24, TYPE_UINT, CFMT_8_24_UINT},                  // 46
    {FMT_8_8, TYPE_UNORM, CFMT_8_8_UNORM},                  // 47
    {FMT_8_8, TYPE_SNORM, CFMT_8_8_SNORM},                  // 48
    {FMT_8_8, TYPE_UINT, CFMT_8_8_UINT},                    // 49
    {FMT_8_8, TYPE_SINT, CFMT_8_8_SINT},                    // 50
    {FMT_8_8, TYPE_SRGB, CFMT_8_8_SRGB},                    // 51
    {FMT_8_8_8_8, TYPE_UNORM, CFMT_8_8_8_8_UNORM},          // 52
    {FMT_8_8_8_8, TYPE_SNORM, CFMT_8_8_8_8_SNORM},          // 53
    {FMT_8_8_8_8, TYPE_UINT, CFMT_8_8_8_8_UINT},            // 54
    {FMT_8_8_8_8, TYPE_SINT, CFMT_8_8_8_8_SINT},            // 55
    {FMT_8_8_8_8, TYPE_SRGB, CFMT_8_8_8_8_SRGB}             // 56
};
static const int FormatLUTSize = sizeof(FormatLUT)/sizeof(formatconverstion_t);

//Index in FormatLUT to start search, indexed by FMT enum.
static const int FormatEntryPoint[] = {
  57,
  40,
  5,
  47,
  26,
  10,
  57,
  57,
  1,
  20,
  52,
  29,
  15,
  32,
  35,
  57,
  39,
  0,
  38,
  57,
  45,
  24
};

static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) {
  assert(fmt < sizeof(FormatEntryPoint)/sizeof(int) && "FMT out of range.");
  int start = FormatEntryPoint[fmt];
  int stop = std::min(start + 6, FormatLUTSize); // Only 6 types are used in image_kv_lut.cpp

  for(int i=start; i<stop; i++) {
    if((FormatLUT[i].fmt == fmt) && (FormatLUT[i].type == type))
      return FormatLUT[i].format;
  }
  return CFMT_INVALID;
};
//-----------------------------------------------------------------------------
// End workaround 
//-----------------------------------------------------------------------------

ImageManagerNv::ImageManagerNv() : ImageManagerKv() {}

ImageManagerNv::~ImageManagerNv() {}

// TODO(cfreehil) remove from class, make it a utility function
hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t& image_info) const {
  ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
  hsa_profile_t profile;

  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
  if (status != HSA_STATUS_SUCCESS) return status;

  Image::TileMode tileMode = Image::TileMode::LINEAR;
  if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
    tileMode = (profile == HSA_PROFILE_BASE &&
                desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
      Image::TileMode::TILED : Image::TileMode::LINEAR;
  }
  if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
        image_data_row_pitch, image_data_slice_pitch, out) ==
                                                             (uint32_t)(-1)) {
    return HSA_STATUS_ERROR;
  }

  size_t rowPitch   = (out.bpp >> 3) * out.pitch;
  size_t slicePitch = rowPitch * out.height;
  if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
      image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
      ((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
       (image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
    return static_cast<hsa_status_t>(
                                HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
  }

  image_info.size = out.surfSize;
  assert(image_info.size != 0);
  image_info.alignment = out.baseAlign;
  assert(image_info.alignment != 0);

  return HSA_STATUS_SUCCESS;
}

bool ImageManagerNv::IsLocalMemory(const void* address) const {
  return true;
}

hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image,
                                     const metadata_amd_t* descriptor) const {
  const metadata_amd_nv_t* desc =
                       reinterpret_cast<const metadata_amd_nv_t*>(descriptor);
  const void* image_data_addr = image.data;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  if ((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
     (image_prop.element_size == 0))
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

  const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);

  if (IsLocalMemory(image.data)) {
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
  }

  image.srd[0] = desc->word0.u32All;
  image.srd[1] = desc->word1.u32All;
  image.srd[2] = desc->word2.u32All;
  image.srd[3] = desc->word3.u32All;
  image.srd[4] = desc->word4.u32All;
  image.srd[5] = desc->word5.u32All;
  image.srd[6] = desc->word6.u32All;
  image.srd[7] = desc->word7.u32All;

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    SQ_BUF_RSRC_WORD0 word0;
    SQ_BUF_RSRC_WORD1 word1;
    SQ_BUF_RSRC_WORD3 word3;

    word0.val = 0;
    word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);

    word1.val = image.srd[1];
    word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
    word1.f.STRIDE = image_prop.element_size;

    word3.val = image.srd[3];
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;

    word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    word3.f.INDEX_STRIDE = image_prop.element_size;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[3] = word3.val;
  } else {
    uint32_t hwPixelSize = ImageLut().GetPixelSize(image_prop.data_format, image_prop.data_type);

    if (image_prop.element_size != hwPixelSize) {
      return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
    }
    reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&image.srd[0])->bits.BASE_ADDRESS =
        PtrLow40Shift8(image_data_addr);
    reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.BASE_ADDRESS_HI =
        PtrHigh64Shift40(image_data_addr);
    reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_X =
                                                                    swizzle.x;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Y =
                                                                    swizzle.y;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Z =
                                                                    swizzle.z;
    reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_W =
                                                                    swizzle.w;
    if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
        image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
      reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
          ImageLut().MapGeometry(image.desc.geometry);
    }
    
    // Imported metadata holds the offset to metadata, add the image base address.
    uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16;
    meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8;
    meta += reinterpret_cast<uintptr_t>(image_data_addr);

    ((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS = PtrLow16Shift8((void*)meta);
    ((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI =
        PtrHigh64Shift16((void*)meta);
  }
  // Looks like this is only used for CPU copies.
  image.row_pitch = 0;
  image.slice_pitch = 0;

  // Used by HSAIL shader ABI
  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) {
    SEL r = (SEL)swizzle.x;
    SEL g = (SEL)swizzle.y;
    SEL b = (SEL)swizzle.z;
    SEL a = (SEL)swizzle.w;

    TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW;

    if (a == SEL_X) {
        // Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ
        //
        // For the pre-defined border color values (white, opaque black,
        // transparent black), the only thing that matters is that the alpha
        // channel winds up in the correct place (because the RGB channels are
        // all the same) so either of these TEX_BC_Swizzle enumerations will
        // work.  Not sure what happens with border color palettes.
        if (b == SEL_Y) {
            // ABGR
            bcSwizzle = TEX_BC_Swizzle_WZYX;
        } else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X)) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else {
            // ARGB
            bcSwizzle = TEX_BC_Swizzle_WXYZ;
        }
    } else if (r == SEL_X) {
        // Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ
        if (g == SEL_Y) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else if ((g == SEL_X) && (b == SEL_X) && (a == SEL_W)) {
            // RGBA
            bcSwizzle = TEX_BC_Swizzle_XYZW;
        } else {
            // RAGB
            bcSwizzle = TEX_BC_Swizzle_XWYZ;
        }
    } else if (g == SEL_X) {
        // GRAB, have to use TEX_BC_Swizzle_YXWZ
        bcSwizzle = TEX_BC_Swizzle_YXWZ;
    } else if (b == SEL_X) {
        // BGRA, have to use TEX_BC_Swizzle_ZYXW
        bcSwizzle = TEX_BC_Swizzle_ZYXW;
    }

    return bcSwizzle;
}


hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image) const {
  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  const void* image_data_addr = image.data;

  if (IsLocalMemory(image.data)) {
    image_data_addr = reinterpret_cast<const void*>(
        reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
  }

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    SQ_BUF_RSRC_WORD0 word0;
    SQ_BUF_RSRC_WORD1 word1;
    SQ_BUF_RSRC_WORD2 word2;
    SQ_BUF_RSRC_WORD3 word3;

    word0.val = 0;
    word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);

    word1.val = 0;
    word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
    word1.f.STRIDE = image_prop.element_size;
    word1.f.SWIZZLE_ENABLE = false;
    word1.f.CACHE_SWIZZLE = false;

    word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.RESOURCE_LEVEL = 1;
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;
    word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    word3.f.INDEX_STRIDE = image_prop.element_size;
    word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;

    image.row_pitch = image.desc.width * image_prop.element_size;
    image.slice_pitch = image.row_pitch;
  } else {
    SQ_IMG_RSRC_WORD0 word0;
    SQ_IMG_RSRC_WORD1 word1;
    SQ_IMG_RSRC_WORD2 word2;
    SQ_IMG_RSRC_WORD3 word3;
    SQ_IMG_RSRC_WORD4 word4;
    SQ_IMG_RSRC_WORD5 word5;
    SQ_IMG_RSRC_WORD5 word6;
    SQ_IMG_RSRC_WORD5 word7;

    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};

    uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
         image.component, image.desc, image.tile_mode,
                                     image.row_pitch, image.slice_pitch, out);
    if (swizzleMode == (uint32_t)(-1)) {
      return HSA_STATUS_ERROR;
    }

    assert((out.bpp / 8) == image_prop.element_size);

    const size_t row_pitch_size = out.pitch * image_prop.element_size;

    word0.f.BASE_ADDRESS = PtrLow40Shift8(image_data_addr);

    word1.val = 0;
    word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(image_data_addr);
    word1.f.MIN_LOD = 0;
    word1.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
    // Only take the lowest 2 bits of (image.desc.width - 1)
    word1.f.WIDTH = BitSelect<0, 1>(image.desc.width - 1);

    word2.val = 0;
    // Take the high 12 bits of (image.desc.width - 1)
    word2.f.WIDTH_HI = BitSelect<2, 13>(image.desc.width - 1);
    word2.f.HEIGHT = image.desc.height ? image.desc.height - 1 : 0;
    word2.f.RESOURCE_LEVEL = 1;

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    word3.val = 0;
    word3.f.DST_SEL_X = swizzle.x;
    word3.f.DST_SEL_Y = swizzle.y;
    word3.f.DST_SEL_Z = swizzle.z;
    word3.f.DST_SEL_W = swizzle.w;
    word3.f.SW_MODE = swizzleMode;
    word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
    word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);

    const bool image_array =
        (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
         image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
    const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);

    word4.val = 0;
    word4.f.DEPTH =
        (image_array) // Doesn't hurt but isn't array_size already >0?
            ? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
            : (image_3d) ? image.desc.depth - 1 : 0;
    uint32_t minor_ver = MinorVerFromDevID(chip_id_);
    // For 1d, 2d and 2d-msaa in gfx1030 and beyond this is pitch-1
    if ((minor_ver >= 3) && !image_array && !image_3d)
      word4.f.PITCH = out.pitch - 1;

    word5.val = 0;
    word6.val = 0;
    word7.val = 0;

    image.srd[0] = word0.val;
    image.srd[1] = word1.val;
    image.srd[2] = word2.val;
    image.srd[3] = word3.val;
    image.srd[4] = word4.val;
    image.srd[5] = word5.val;
    image.srd[6] = word6.val;
    image.srd[7] = word7.val;

    image.row_pitch = row_pitch_size;
    image.slice_pitch = out.sliceSize;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerNv::ModifyImageSrd(
    Image& image, hsa_ext_image_format_t& new_format) const {
  image.desc.format = new_format;

  ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
  assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
  assert(image_prop.element_size != 0);

  if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_BUF_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
    word3->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
  } else {
    SQ_IMG_RSRC_WORD1* word1 =
        reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
    word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);

    const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
    SQ_IMG_RSRC_WORD3* word3 =
        reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
    word3->bits.DST_SEL_X = swizzle.x;
    word3->bits.DST_SEL_Y = swizzle.y;
    word3->bits.DST_SEL_Z = swizzle.z;
    word3->bits.DST_SEL_W = swizzle.w;
  }

  image.srd[8] = image.desc.format.channel_type;
  image.srd[9] = image.desc.format.channel_order;
  image.srd[10] = static_cast<uint32_t>(image.desc.width);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageManagerNv::PopulateSamplerSrd(Sampler& sampler) const {
  const hsa_ext_sampler_descriptor_v2_t &sampler_descriptor = sampler.desc;

  SQ_IMG_SAMP_WORD0 word0;
  SQ_IMG_SAMP_WORD1 word1;
  SQ_IMG_SAMP_WORD2 word2;
  SQ_IMG_SAMP_WORD3 word3;

  word0.u32All = 0;
  hsa_status_t status = convertAddressMode<SQ_IMG_SAMP_WORD0, SQ_TEX_CLAMP>
                                       (word0, sampler_descriptor.address_modes);
  if (status != HSA_STATUS_SUCCESS) return status;
  word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode ==
                                  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);

  word1.u32All = 0;
  word1.bits.MAX_LOD = 4095;

  word2.u32All = 0;
  switch (sampler_descriptor.filter_mode) {
    case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
      break;
    case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
      word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER;
  word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE;
  word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE;

  word3.u32All = 0;

  // TODO: check this bit with HSAIL spec.
  word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK;

  sampler.srd[0] = word0.u32All;
  sampler.srd[1] = word1.u32All;
  sampler.srd[2] = word2.u32All;
  sampler.srd[3] = word3.u32All;

  return HSA_STATUS_SUCCESS;
}

uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    Image::TileMode tileMode,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
  const ImageProperty image_prop =
      GetImageProperty(component, desc.format, desc.geometry);

  const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);

  const uint32_t width = static_cast<uint32_t>(desc.width);
  const uint32_t height = static_cast<uint32_t>(desc.height);
  static const size_t kMinNumSlice = 1;
  const uint32_t num_slice = static_cast<uint32_t>(
      std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));

  uint32_t minor_ver = MinorVerFromDevID(chip_id_);
  ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
  in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
  in.format = addrlib_format;
  in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
  in.width = width;
  in.height = height;
  in.numSlices = num_slice;
  // Custom Pitch is supported in gfx1030 and beyond
  if (minor_ver >= 3)
    in.pitchInElement = image_data_row_pitch / image_prop.element_size;
  switch (desc.geometry) {
    case HSA_EXT_IMAGE_GEOMETRY_1D:
    case HSA_EXT_IMAGE_GEOMETRY_1DB:
    case HSA_EXT_IMAGE_GEOMETRY_1DA:
      in.resourceType = ADDR_RSRC_TEX_1D;
      break;

    case HSA_EXT_IMAGE_GEOMETRY_2D:
    case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
    case HSA_EXT_IMAGE_GEOMETRY_2DA:
    case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
      in.resourceType = ADDR_RSRC_TEX_2D;
      break;

    case HSA_EXT_IMAGE_GEOMETRY_3D:
      {
         in.resourceType = ADDR_RSRC_TEX_3D;
         /*
	  * 3D swizzle modes enforce alignment
	  * of the number of slices  to the block depth.
	  * If numSlices = 3 then the 3 slices are
	  * interleaved for 3D locality among the 8 slices
	  * that make up each block. This causes the memory
	  * footprint to jump from an ideal size to 3x the size.
	  * 'enable3DSwizzleMode' flag tests for env variable
	  * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
	  * 3D swizzle:
	  * true: Keep view3dAs2dArray = 0 for real 3D interleaving.
	  * false: Use view3dAs2dArray = 1 to avoid the alignment
	  *       expansion.
	  * 2D swizzle modes can lower size overhead but may yield
	  * suboptimal cache behavior for fully 3D volumetric
	  * operations.
	  */
	  bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
	  if (enable3DSwizzleMode)
	      in.flags.view3dAs2dArray = 0;
	  else
	      in.flags.view3dAs2dArray = 1;

	  break;
      }
  }
  in.flags.texture = 1;

  ADDR2_GET_PREFERRED_SURF_SETTING_INPUT  prefSettingsInput = { 0 };
  ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 };

  prefSettingsInput.size            = sizeof(prefSettingsInput);
  prefSettingsInput.flags           = in.flags;
  prefSettingsInput.bpp             = in.bpp;
  prefSettingsInput.format          = in.format;
  prefSettingsInput.width           = in.width;
  prefSettingsInput.height          = in.height;
  prefSettingsInput.numFrags        = in.numFrags;
  prefSettingsInput.numSamples      = in.numSamples;
  prefSettingsInput.numMipLevels    = in.numMipLevels;
  prefSettingsInput.numSlices       = in.numSlices;
  prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF;
  prefSettingsInput.resourceType    = in.resourceType;

  // Disallow all swizzles but linear.
  if (tileMode == Image::TileMode::LINEAR) {
      prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
      prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
      prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
      prefSettingsInput.forbiddenBlock.micro = 1;
      prefSettingsInput.forbiddenBlock.var = 1;
  }

  // but don't ever allow the 256b swizzle modes
  //prefSettingsInput.forbiddenBlock.micro = 1;
  // and don't allow variable-size block modes
  //prefSettingsInput.forbiddenBlock.var = 1;

  if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_,
                                   &prefSettingsInput, &prefSettingsOutput)) {
    return (uint32_t)(-1);
  }

  in.swizzleMode = prefSettingsOutput.swizzleMode;

  out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
  if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out)) {
    return (uint32_t)(-1);
  }
  if (out.surfSize == 0) {
    return (uint32_t)(-1);
  }

  return in.swizzleMode;
}

hsa_status_t ImageManagerNv::FillImage(const Image& image, const void* pattern,
                                       const hsa_ext_image_region_t& region) {
  if (BlitQueueInit().queue_ == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }

  Image* image_view = const_cast<Image*>(&image);

  SQ_BUF_RSRC_WORD3* word3_buff = NULL;
  SQ_IMG_RSRC_WORD3* word3_image = NULL;
  uint32_t dst_sel_w_original = 0;
  if (image_view->desc.format.channel_type ==
      HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) {
    // Force GPU to ignore the last two bits (alpha bits).
    if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
      word3_buff = reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_buff->bits.DST_SEL_W;
      word3_buff->bits.DST_SEL_W = SEL_0;
    } else {
      word3_image = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image_view->srd[3]);
      dst_sel_w_original = word3_image->bits.DST_SEL_W;
      word3_image->bits.DST_SEL_W = SEL_0;
    }
  }

  SQ_IMG_RSRC_WORD1* word1 = NULL;
  uint32_t num_format_original = 0;
  const void* new_pattern = pattern;
  float fill_value[4] = {0};
  switch (image_view->desc.format.channel_order) {
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
    case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: {
      // KV and CZ don't have write support for SRGBA image, so convert pattern
      // to standard form and treat the image as RGBA image.
      const float* pattern_f = reinterpret_cast<const float*>(pattern);
      fill_value[0] = LinearToStandardRGB(pattern_f[0]);
      fill_value[1] = LinearToStandardRGB(pattern_f[1]);
      fill_value[2] = LinearToStandardRGB(pattern_f[2]);
      fill_value[3] = pattern_f[3];
      new_pattern = fill_value;

      ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);

      word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image_view->srd[1]);
      num_format_original = word1->bits.FORMAT;
      word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, TYPE_UNORM);
    } break;
    default:
      break;
  }

  hsa_status_t status = ImageRuntime::instance()->blit_kernel().FillImage(
      blit_queue_, blit_code_catalog_, *image_view, new_pattern, region);

  // Revert back original configuration.
  if (word3_buff != NULL) {
    word3_buff->bits.DST_SEL_W = dst_sel_w_original;
  }

  if (word3_image != NULL) {
    word3_image->bits.DST_SEL_W = dst_sel_w_original;
  }

  if (word1 != NULL) {
    word1->bits.FORMAT = num_format_original;
  }

  return status;
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_manager_nv.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_ 
#define EXT_IMAGE_IMAGE_MANAGER_NV_H_ 

#include "addrlib/inc/addrinterface.h"
#include "image_manager_kv.h"

namespace rocr {
namespace image {

class ImageManagerNv : public ImageManagerKv {
 public:
  ImageManagerNv();
  virtual ~ImageManagerNv();

  /// @brief Calculate the size and alignment of the backing storage of an
  /// image.
  virtual hsa_status_t CalculateImageSizeAndAlignment(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
      hsa_ext_image_data_layout_t image_data_layout,
      size_t image_data_row_pitch, size_t image_data_slice_pitch,
      hsa_ext_image_data_info_t& image_info) const;

  /// @brief Fill image structure with device specific image object.
  virtual hsa_status_t PopulateImageSrd(Image& image) const;

  /// @brief Fill image structure with device specific image object using the given format.
  virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;

  /// @brief Modify device specific image object according to the specified
  /// new format.
  virtual hsa_status_t ModifyImageSrd(Image& image,
                                      hsa_ext_image_format_t& new_format) const;

  /// @brief Fill sampler structure with device specific sampler object.
  virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;

  /// @brief Fill image backing storage using agent copy.
  virtual hsa_status_t FillImage(const Image& image, const void* pattern,
                                 const hsa_ext_image_region_t& region);
 protected:
  uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
                             const hsa_ext_image_descriptor_t& desc,
                             Image::TileMode tileMode,
                             size_t image_data_row_pitch,
                             size_t image_data_slice_pitch,
                             ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;

  bool IsLocalMemory(const void* address) const;

 private:
  DISALLOW_COPY_AND_ASSIGN(ImageManagerNv);
};

}  // namespace image
}  // namespace rocr
#endif  // EXT_IMAGE_IMAGE_MANAGER_NV_H_ 


================================================
FILE: runtime/hsa-runtime/image/image_runtime.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#define NOMINMAX
#include "image_runtime.h"

#include <assert.h>
#include <climits>
#include <mutex>

#include "core/inc/runtime.h"
#include "core/inc/hsa_internal.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "resource.h"
#include "image_manager_kv.h"
#include "image_manager_ai.h"
#include "image_manager_nv.h"
#include "image_manager_gfx11.h"
#include "image_manager_gfx12.h"
#include "device_info.h"

namespace rocr {
namespace image {

hsa_status_t FindKernelArgPool(hsa_amd_memory_pool_t pool, void* data) {
  assert(data != nullptr);

  hsa_status_t err;
  hsa_amd_segment_t segment;
  uint32_t flag;
  size_t size;

  err = AMD::hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
  assert(err == HSA_STATUS_SUCCESS);

  if (segment != HSA_AMD_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS;

  err = AMD::hsa_amd_memory_pool_get_info(
      pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
  assert(err == HSA_STATUS_SUCCESS);

  err = AMD::hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
  assert(err == HSA_STATUS_SUCCESS);

  if (((HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag) == 1) && (size != 0)) {
    *(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
    // Found the kernarg pool, stop the iteration.
    return HSA_STATUS_INFO_BREAK;
    }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageRuntime::CreateImageManager(hsa_agent_t agent, void* data) {
  ImageRuntime* runtime = reinterpret_cast<ImageRuntime*>(data);

  hsa_device_type_t hsa_device_type;
  hsa_status_t hsa_error_code =
      HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
  if (hsa_error_code != HSA_STATUS_SUCCESS) {
    return hsa_error_code;
  }

  if (hsa_device_type == HSA_DEVICE_TYPE_GPU) {

    uint32_t chip_id;
    hsa_error_code = GetGPUAsicID(agent, &chip_id);
    uint32_t major_ver = MajorVerFromDevID(chip_id);

    ImageManager* image_manager;

    switch (major_ver) {
    case 12:
      image_manager = new ImageManagerGfx12();
      break;
    case 11:
      image_manager = new ImageManagerGfx11();
      break;
    case 10:
      image_manager = new ImageManagerNv();
      break;
    case  9:
      image_manager = new ImageManagerAi();
      break;
    default:
      image_manager = new ImageManagerKv();
      break;
    }
    hsa_error_code = image_manager->Initialize(agent);

    if (hsa_error_code != HSA_STATUS_SUCCESS) {
      delete image_manager;
      return hsa_error_code;
    }

    runtime->image_managers_[agent.handle] = image_manager;
  } else if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
    uint32_t caches[4] = {0};
    hsa_error_code = HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, caches);

    if (hsa_error_code != HSA_STATUS_SUCCESS) {
      return hsa_error_code;
    }

    runtime->cpu_l2_cache_size_ = caches[1];

    if (runtime->kernarg_pool_.handle == 0)
      hsa_amd_agent_iterate_memory_pools(agent, FindKernelArgPool, &runtime->kernarg_pool_);
  }

  return HSA_STATUS_SUCCESS;
}

ImageRuntime* ImageRuntime::instance() {
  ImageRuntime* instance = get_instance().load(std::memory_order_acquire);
  if (instance == NULL) {
    // Protect the initialization from multi threaded access.
    std::lock_guard<std::mutex> lock(instance_mutex());

    // Make sure we are not initializing it twice.
    instance = get_instance().load(std::memory_order_relaxed);
    if (instance != NULL) {
      return instance;
    }

    instance = CreateSingleton();
    if (instance == NULL) {
      return NULL;
    }

    // UnloadCallback = &ext_image::ImageRuntime::DestroySingleton;
  }

  return instance;
}

ImageRuntime* ImageRuntime::CreateSingleton() {
  ImageRuntime* instance = new ImageRuntime();

  if (HSA_STATUS_SUCCESS != instance->blit_kernel_.Initialize()) {
    instance->Cleanup();
    delete instance;
    return NULL;
  }

  if (HSA_STATUS_SUCCESS != HSA::hsa_iterate_agents(CreateImageManager, instance)) {
    instance->Cleanup();
    delete instance;
    return NULL;
  }

  assert(instance->kernarg_pool_.handle != 0);
  assert(instance->image_managers_.size() != 0);

  get_instance().store(instance, std::memory_order_release);
  return instance;
}

void ImageRuntime::DestroySingleton() {
  ImageRuntime* instance = get_instance().load(std::memory_order_acquire);
  if (instance == NULL) {
    return;
  }

  instance->Cleanup();

  get_instance().store(NULL, std::memory_order_release);
  delete instance;
}

hsa_status_t ImageRuntime::GetImageInfoMaxDimension(hsa_agent_t component,
                                                    hsa_agent_info_t attribute,
                                                    void* value) {
  uint32_t* value_u32 = NULL;
  uint32_t* value_u32_v2 = NULL;
  uint32_t* value_u32_v3 = NULL;

  hsa_ext_image_geometry_t geometry;

  size_t image_attribute = static_cast<size_t>(attribute);
  switch (image_attribute) {
    case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
      value_u32 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_1DA;
      value_u32 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_1DB;
      value_u32 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
      value_u32_v2 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
      value_u32_v2 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_2DDEPTH;
      value_u32_v2 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_2DADEPTH;
      value_u32_v2 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
      value_u32_v3 = static_cast<uint32_t*>(value);
      break;
    case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
      geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
      value_u32 = static_cast<uint32_t*>(value);
      break;
    default:
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  uint32_t width = 0;
  uint32_t height = 0;
  uint32_t depth = 0;
  uint32_t array_size = 0;

  hsa_device_type_t device_type;
  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_DEVICE, &device_type);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  // Image is only supported on a GPU device.

  if (device_type == HSA_DEVICE_TYPE_GPU) {
    image_manager(component)->GetImageInfoMaxDimension(
        component, geometry, width, height, depth, array_size);
  }

  if (value_u32_v3 != NULL) {
    value_u32_v3[0] = width;
    value_u32_v3[1] = height;
    value_u32_v3[2] = depth;
  } else if (value_u32_v2 != NULL) {
    value_u32_v2[0] = width;
    value_u32_v2[1] = height;
  } else {
    *value_u32 = (image_attribute == HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS)
                     ? array_size
                     : width;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageRuntime::GetImageCapability(
    hsa_agent_t component, const hsa_ext_image_format_t& format,
    hsa_ext_image_geometry_t geometry, uint32_t& capability) {
  hsa_device_type_t device_type;
  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_DEVICE, &device_type);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  if (device_type == HSA_DEVICE_TYPE_GPU) {
    ImageManager* manager = image_manager(component);
    capability = manager->GetImageProperty(component, format, geometry).cap;
  } else {
    // Image is only supported on a GPU device.
    capability = 0;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageRuntime::GetImageSizeAndAlignment(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t& image_info) {
  image_info.alignment = 0;
  image_info.size = 0;

  // Validate the image format and geometry.
  uint32_t capability = 0;
  hsa_status_t status =
      GetImageCapability(component, desc.format, desc.geometry, capability);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  if (capability == 0) {
    return static_cast<hsa_status_t>(
        HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED);
  }

  const hsa_ext_image_geometry_t geometry = desc.geometry;
  uint32_t max_width = 0;
  uint32_t max_height = 0;
  uint32_t max_depth = 0;
  uint32_t max_array_size = 0;

  ImageManager* manager = image_manager(component);

  // Validate the image dimension.
  manager->GetImageInfoMaxDimension(component, geometry, max_width, max_height,
                                    max_depth, max_array_size);

  if (desc.width > max_width || desc.height > max_height ||
      desc.depth > max_depth || desc.array_size > max_array_size) {
    return static_cast<hsa_status_t>(
        HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED);
  }

  return manager->CalculateImageSizeAndAlignment(component, desc,
    image_data_layout, image_data_row_pitch, image_data_slice_pitch, image_info);
}

hsa_status_t ImageRuntime::CreateImageHandle(
    hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
    const void* image_data, const hsa_access_permission_t access_permission,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_t& image_handle) {
  image_handle.handle = 0;

  assert(image_data != NULL);

  // Validate image dimension.
  hsa_ext_image_data_info_t image_info = {0};
  hsa_status_t status =
      GetImageSizeAndAlignment(component, image_descriptor,
        image_data_layout, image_data_row_pitch, image_data_slice_pitch,
        image_info);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  // Validate image address alignment.
  if (!IsMultipleOf(reinterpret_cast<size_t>(image_data),
                    image_info.alignment)) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  Image* image = Image::Create(component);
  image->component = component;
  image->desc = image_descriptor;
  image->permission = access_permission;
  image->data = const_cast<void*>(image_data);
  image->row_pitch = image_data_row_pitch;
  image->slice_pitch = image_data_slice_pitch;
  hsa_profile_t profile;
  status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);

  if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR) {
    image->tile_mode = Image::TileMode::LINEAR;
  } else {
    Image::TileMode tileMode =
        (profile == HSA_PROFILE_BASE && image_descriptor.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)
        ? Image::TileMode::TILED
        : Image::TileMode::LINEAR;
    image->tile_mode = tileMode;
  }

  image_manager(component)->PopulateImageSrd(*image);

  if (core::Runtime::runtime_singleton_->flag().image_print_srd()) image->printSRD();

  image_handle.handle = image->Convert();

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageRuntime::CreateImageHandleWithLayout(
  hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
  const hsa_amd_image_descriptor_t* image_layout,
  const void* image_data, const hsa_access_permission_t access_permission,
  hsa_ext_image_t& image_handle)
{
  if(!IsMultipleOf(image_data, 256))
    return HSA_STATUS_ERROR_INVALID_ALLOCATION;

  if(image_layout->version!=1)
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
  
  uint32_t id;
  HSA::hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id);

  if(image_layout->deviceID!=(0x1002<<16|id))
    return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;

  const metadata_amd_t* desc = reinterpret_cast<const metadata_amd_t*>(image_layout);

  Image* image = Image::Create(component);
  image->component = component;
  image->desc = image_descriptor;
  image->permission = access_permission;
  image->data = const_cast<void*>(image_data);
  image->tile_mode = Image::TILED;
  hsa_status_t err=image_manager(component)->PopulateImageSrd(*image, desc);
  if(err!=HSA_STATUS_SUCCESS) {
    Image::Destroy(image);
    return err;
  }

  if (core::Runtime::runtime_singleton_->flag().image_print_srd()) image->printSRD();

  image_handle.handle = image->Convert();
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageRuntime::DestroyImageHandle(
    const hsa_ext_image_t& image_handle) {
  const Image* image = Image::Convert(image_handle.handle);

  if (image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  Image::Destroy(const_cast<Image*>(image));

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageRuntime::CopyBufferToImage(
    const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
    const hsa_ext_image_t& dst_image_handle,
    const hsa_ext_image_region_t& image_region) {
  const Image* dst_image = Image::Convert(dst_image_handle.handle);

  if (dst_image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  ImageManager* manager = image_manager(dst_image->component);
  return manager->CopyBufferToImage(src_memory, src_row_pitch, src_slice_pitch,
                                    *dst_image, image_region);
}

hsa_status_t ImageRuntime::CopyImageToBuffer(
    const hsa_ext_image_t& src_image_handle, void* dst_memory,
    size_t dst_row_pitch, size_t dst_slice_pitch,
    const hsa_ext_image_region_t& image_region) {
  const Image* src_image = Image::Convert(src_image_handle.handle);

  if (src_image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  ImageManager* manager = image_manager(src_image->component);
  return manager->CopyImageToBuffer(*src_image, dst_memory, dst_row_pitch,
                                    dst_slice_pitch, image_region);
}

hsa_status_t ImageRuntime::CopyImage(const hsa_ext_image_t& src_image_handle,
                                     const hsa_ext_image_t& dst_image_handle,
                                     const hsa_dim3_t& src_origin,
                                     const hsa_dim3_t& dst_origin,
                                     const hsa_dim3_t size) {
  const Image* src_image = Image::Convert(src_image_handle.handle);

  if (src_image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  const Image* dst_image = Image::Convert(dst_image_handle.handle);

  if (dst_image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  if (src_image->component.handle != dst_image->component.handle) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  ImageManager* manager = image_manager(src_image->component);
  return manager->CopyImage(*dst_image, *src_image, dst_origin, src_origin,
                            size);
}

hsa_status_t ImageRuntime::FillImage(
    const hsa_ext_image_t& image_handle, const void* pattern,
    const hsa_ext_image_region_t& image_region) {
  const Image* image = Image::Convert(image_handle.handle);

  if (image == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  ImageManager* manager = image_manager(image->component);
  return manager->FillImage(*image, pattern, image_region);
}

hsa_status_t ImageRuntime::CreateSamplerHandle(
    hsa_agent_t component,
    const hsa_ext_sampler_descriptor_v2_t& sampler_descriptor,
    hsa_ext_sampler_t& sampler_handle) {
  sampler_handle.handle = 0;

  hsa_device_type_t device_type;
  hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_DEVICE, &device_type);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  // Sampler is only supported on a GPU device.
  if (device_type != HSA_DEVICE_TYPE_GPU) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  Sampler* sampler = Sampler::Create(component);
  if (sampler == NULL) {
    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
  }
  sampler->component = component;
  sampler->desc = sampler_descriptor;

  image_manager(component)->PopulateSamplerSrd(*sampler);

  sampler_handle.handle = sampler->Convert();

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ImageRuntime::DestroySamplerHandle(
    hsa_ext_sampler_t& sampler_handle) {
  const Sampler* sampler = Sampler::Convert(sampler_handle.handle);

  if (sampler == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  Sampler::Destroy(sampler);

  return HSA_STATUS_SUCCESS;
}

ImageRuntime::ImageRuntime()
    : cpu_l2_cache_size_(0), kernarg_pool_({0}) {}

ImageRuntime::~ImageRuntime() {}

void ImageRuntime::Cleanup() {
  std::map<uint64_t, ImageManager*>::iterator it;
  for (it = image_managers_.begin(); it != image_managers_.end(); ++it) {
    it->second->Cleanup();
    delete it->second;
  }

  blit_kernel_.Cleanup();
}

}  // namespace image
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/image/image_runtime.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_IMAGE_RUNTIME_H
#define HSA_RUNTIME_EXT_IMAGE_IMAGE_RUNTIME_H

#include <atomic>
#include <map>
#include <mutex>

#include "inc/hsa.h"

#include "inc/hsa_ext_image.h"
#include "inc/hsa_ext_amd.h"
#include "blit_kernel.h"
#include "image_manager.h"
#include "util.h"

namespace rocr {
namespace image {

class ImageRuntime {
 public:
  /// @brief Getter for the ImageRuntime singleton object.
  static ImageRuntime* instance();

  /// @brief Destroy singleton object.
  static void DestroySingleton();

  /// @brief Retrieve maximum size of width, height, depth, array size in pixels
  /// for a particular geometry on a component.
  hsa_status_t GetImageInfoMaxDimension(hsa_agent_t component,
                                        hsa_agent_info_t attribute,
                                        void* value);

  /// @brief Query image support with particular format and geometry.
  hsa_status_t GetImageCapability(hsa_agent_t component,
                                  const hsa_ext_image_format_t& format,
                                  hsa_ext_image_geometry_t geometry,
                                  uint32_t& capability);

  /// @brief Query the size and address alignment of the backing storage of
  /// the image.
  hsa_status_t GetImageSizeAndAlignment(hsa_agent_t component,
                                        const hsa_ext_image_descriptor_t& desc,
                                        hsa_ext_image_data_layout_t image_data_layout,
                                        size_t image_data_row_pitch,
                                        size_t image_data_slice_pitch,
                                        hsa_ext_image_data_info_t& image_info);

  /// @brief Create device image object and return its handle.
  hsa_status_t CreateImageHandle(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
      const void* image_data, const hsa_access_permission_t access_permission,
      hsa_ext_image_data_layout_t image_data_layout,
      size_t image_data_row_pitch,
      size_t image_data_slice_pitch,
      hsa_ext_image_t& image);

  /// @brief Create device image object and return its handle.
  hsa_status_t CreateImageHandleWithLayout(
      hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
      const hsa_amd_image_descriptor_t* image_layout,
      const void* image_data, const hsa_access_permission_t access_permission,
      hsa_ext_image_t& image);

  /// @brief Destroy the device image object referenced by the handle.
  hsa_status_t DestroyImageHandle(const hsa_ext_image_t& image);

  /// @brief Copy the content of a linear memory to an image object.
  hsa_status_t CopyBufferToImage(const void* src_memory, size_t src_row_pitch,
                                 size_t src_slice_pitch,
                                 const hsa_ext_image_t& dst_image,
                                 const hsa_ext_image_region_t& image_region);

  /// @brief Copy the content of an image object to a linear memory.
  hsa_status_t CopyImageToBuffer(const hsa_ext_image_t& src_image,
                                 void* dst_memory, size_t dst_row_pitch,
                                 size_t dst_slice_pitch,
                                 const hsa_ext_image_region_t& image_region);

  /// @brief Copy the content of an image object to another image object.
  hsa_status_t CopyImage(const hsa_ext_image_t& src_image,
                         const hsa_ext_image_t& dst_image,
                         const hsa_dim3_t& src_origin,
                         const hsa_dim3_t& dst_origin, const hsa_dim3_t size);

  /// @brief Fill the content of an image object with a pattern.
  hsa_status_t FillImage(const hsa_ext_image_t& image, const void* pattern,
                         const hsa_ext_image_region_t& image_region);

  /// @brief Create device sampler object and return its handle.
  hsa_status_t CreateSamplerHandle(
      hsa_agent_t component,
      const hsa_ext_sampler_descriptor_v2_t& sampler_descriptor,
      hsa_ext_sampler_t& sampler);

  /// @brief Destroy the device sampler object referenced by the handle.
  hsa_status_t DestroySamplerHandle(hsa_ext_sampler_t& sampler);

  ImageManager* image_manager(hsa_agent_t agent) {
    std::map<uint64_t, ImageManager*>::iterator it = image_managers_.find(agent.handle);
    return (it != image_managers_.end()) ? it->second : NULL;
  }

  BlitKernel& blit_kernel() { return blit_kernel_; }

  size_t cpu_l2_cache_size() const { return cpu_l2_cache_size_; }

  hsa_amd_memory_pool_t kernarg_pool() const {
    return kernarg_pool_;
  }

 private:
  /// @brief Initialize singleton object, must be called once.
  static ImageRuntime* CreateSingleton();

  static hsa_status_t CreateImageManager(hsa_agent_t agent, void* data);

  ImageRuntime();

  ~ImageRuntime();

  void Cleanup();

  /// Pointer to singleton object.
  static __forceinline std::atomic<ImageRuntime*>& get_instance() {
    // This allocation is meant to last until the last thread has exited.
    // It is intentionally not freed.
    static std::atomic<ImageRuntime*>* instance_ = new std::atomic<ImageRuntime*>();
    return *instance_;
  }

  static __forceinline std::mutex& instance_mutex() {
    // This allocation is meant to last until the last thread has exited.
    // It is intentionally not freed.
    static std::mutex* instance_mutex_ = new std::mutex();
    return *instance_mutex_;
  }

  /// @brief Contains mapping of agent and its corresponding ::ImageManager
  ///        object.
  std::map<uint64_t, ImageManager*> image_managers_;

  /// @brief Manages kernel for accessing images.
  BlitKernel blit_kernel_;

  size_t cpu_l2_cache_size_;

  hsa_amd_memory_pool_t kernarg_pool_;

  DISALLOW_COPY_AND_ASSIGN(ImageRuntime);
};

}  // namespace image
}  // namespace rocr
#endif  // HSA_RUNTIME_EXT_IMAGE_IMAGE_RUNTIME_H


================================================
FILE: runtime/hsa-runtime/image/inc/hsa_ext_image_impl.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_H
#define HSA_RUNTIME_EXT_IMAGE_H

#include "inc/hsa.h"
#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ext_image.h"
#include "core/inc/hsa_ext_interface.h"

//---------------------------------------------------------------------------//
//  APIs that implement Image functionality
//---------------------------------------------------------------------------//

namespace rocr {
namespace image {

hsa_status_t hsa_amd_image_get_info_max_dim(hsa_agent_t agent, hsa_agent_info_t attribute,
                                            void* value);

hsa_status_t hsa_ext_image_get_capability(hsa_agent_t agent,
                                          hsa_ext_image_geometry_t image_geometry,
                                          const hsa_ext_image_format_t* image_format,
                                          uint32_t* capability_mask);

hsa_status_t hsa_ext_image_data_get_info(hsa_agent_t agent,
                                         const hsa_ext_image_descriptor_t* image_descriptor,
                                         hsa_access_permission_t access_permission,
                                         hsa_ext_image_data_info_t* image_data_info);

hsa_status_t hsa_ext_image_create(hsa_agent_t agent,
                                  const hsa_ext_image_descriptor_t* image_descriptor,
                                  const void* image_data, hsa_access_permission_t access_permission,
                                  hsa_ext_image_t* image);

hsa_status_t hsa_ext_image_destroy(hsa_agent_t agent, hsa_ext_image_t image);

hsa_status_t hsa_ext_image_copy(hsa_agent_t agent, hsa_ext_image_t src_image,
                                const hsa_dim3_t* src_offset, hsa_ext_image_t dst_image,
                                const hsa_dim3_t* dst_offset, const hsa_dim3_t* range);

hsa_status_t hsa_ext_image_import(hsa_agent_t agent, const void* src_memory, size_t src_row_pitch,
                                  size_t src_slice_pitch, hsa_ext_image_t dst_image,
                                  const hsa_ext_image_region_t* image_region);

hsa_status_t hsa_ext_image_export(hsa_agent_t agent, hsa_ext_image_t src_image, void* dst_memory,
                                  size_t dst_row_pitch, size_t dst_slice_pitch,
                                  const hsa_ext_image_region_t* image_region);

hsa_status_t hsa_ext_image_clear(hsa_agent_t agent, hsa_ext_image_t image, const void* data,
                                 const hsa_ext_image_region_t* image_region);

hsa_status_t hsa_ext_sampler_create(hsa_agent_t agent,
                                    const hsa_ext_sampler_descriptor_t* sampler_descriptor,
                                    hsa_ext_sampler_t* sampler);

hsa_status_t hsa_ext_sampler_create_v2(hsa_agent_t agent,
                                    const hsa_ext_sampler_descriptor_v2_t* sampler_descriptor,
                                    hsa_ext_sampler_t* sampler);

hsa_status_t hsa_ext_sampler_destroy(hsa_agent_t agent, hsa_ext_sampler_t sampler);

hsa_status_t hsa_ext_image_get_capability_with_layout(hsa_agent_t agent,
                                                      hsa_ext_image_geometry_t image_geometry,
                                                      const hsa_ext_image_format_t* image_format,
                                                      hsa_ext_image_data_layout_t image_data_layout,
                                                      uint32_t* capability_mask);

hsa_status_t hsa_ext_image_data_get_info_with_layout(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
    hsa_access_permission_t access_permission, hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch, size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t* image_data_info);

hsa_status_t hsa_ext_image_create_with_layout(
    hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor, const void* image_data,
    hsa_access_permission_t access_permission, hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_t* image);

hsa_status_t hsa_amd_image_create(hsa_agent_t agent,
                                  const hsa_ext_image_descriptor_t* image_descriptor,
                                  const hsa_amd_image_descriptor_t* image_layout,
                                  const void* image_data, hsa_access_permission_t access_permission,
                                  hsa_ext_image_t* image);

// Update Api table with func pointers that implement functionality
void LoadImage(core::ImageExtTableInternal* image_api,
               decltype(::hsa_amd_image_create)** interface_api);

// Release resources acquired by Image implementation
void ReleaseImageRsrcs();

}  // namespace image
}  // namespace rocr

#endif  //  HSA_RUNTIME_EXT_IMAGE_H


================================================
FILE: runtime/hsa-runtime/image/resource.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_RESOURCE_H
#define HSA_RUNTIME_EXT_IMAGE_RESOURCE_H

#include <stdint.h>

#include <cstring>

#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"

#include "util.h"

#define HSA_IMAGE_OBJECT_SIZE_DWORD 12
#define HSA_IMAGE_OBJECT_ALIGNMENT 16

#define HSA_SAMPLER_OBJECT_SIZE_DWORD 8
#define HSA_SAMPLER_OBJECT_ALIGNMENT 16

#define GEOMETRY_COUNT 8
#define ORDER_COUNT 20
#define TYPE_COUNT 16
#define RO HSA_EXT_IMAGE_CAPABILITY_READ_ONLY
#define ROWO \
  (HSA_EXT_IMAGE_CAPABILITY_READ_ONLY | HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY)
#define RW                                                                    \
  (HSA_EXT_IMAGE_CAPABILITY_READ_ONLY | HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY | \
  HSA_EXT_IMAGE_CAPABILITY_READ_WRITE)

namespace rocr {
namespace image {

typedef struct metadata_amd_s {
    uint32_t version; // Must be 1
    uint32_t vendorID; // AMD | CZ
    uint32_t words[8];
    uint32_t mip_offsets[0]; //Mip level offset bits [39:8] for each level (if any)
} metadata_amd_t;

/// @brief Structure to represent image access component.
typedef struct Swizzle {
  uint8_t x;
  uint8_t y;
  uint8_t z;
  uint8_t w;
} Swizzle;

/// @brief Structure to contain the property of an image with a particular
/// format and geometry.
typedef struct ImageProperty {
  uint8_t cap;           // hsa_ext_image_format_capability_t mask.
  uint8_t element_size;  // size per pixel in bytes.
  uint8_t data_format;   // device specific channel ordering.
  uint8_t data_type;     // device specific channel type.
} ImageProperty;

/// @brief Structure to represent an HSA image object.
typedef struct Image {
private:
  Image() {
    component.handle = 0;
    permission = HSA_ACCESS_PERMISSION_RO;
    data = NULL;
    std::memset(srd, 0, sizeof(srd));
    std::memset(&desc, 0, sizeof(desc));
    row_pitch = slice_pitch = 0;
    tile_mode = LINEAR;
  }

  ~Image() {}

public:
  typedef enum TileMode {
    LINEAR,
    TILED
  } TileMode;

  /// @brief Create an Image.
  static Image* Create(hsa_agent_t agent);

  /// @brief Destroy an Image.
  static void Destroy(const Image* image);

  /// @brief Convert from vendor representation to HSA handle.
  uint64_t Convert() const { return reinterpret_cast<uint64_t>(srd); }

  /// @brief Convert from HSA handle to vendor representation.
  static Image* Convert(uint64_t handle) {
    return reinterpret_cast<Image*>(handle - offsetof(Image, srd));
  }

  // Vendor specific image object.
  __ALIGNED__(
      HSA_IMAGE_OBJECT_ALIGNMENT) uint32_t srd[HSA_IMAGE_OBJECT_SIZE_DWORD];

  void const printSRD() const {
    char hexStr[200];
    size_t hexStrLen = 0;
    for (int i = 0; i < sizeof(srd) / sizeof(srd[0]); i++)
      hexStrLen += sprintf(&hexStr[hexStrLen], "0x%08x ", srd[i]);

    printf("\nSRD:%s\n\n", hexStr);
  }

  // HSA component of the image object.
  hsa_agent_t component;

  // HSA image descriptor of the image object.
  hsa_ext_image_descriptor_t desc;

  // HSA image access permission of the image object.
  hsa_access_permission_t permission;

  // Backing storage of the image object.
  void* data;

  // Device specific row pitch of the image object in size.
  size_t row_pitch;

  // Device specific slice pitch of the image object in size.
  size_t slice_pitch;

  // Device specific tile mode
  TileMode tile_mode;
} Image;

/// @brief Structure to represent an HSA sampler object.
typedef struct Sampler {
private:
  Sampler() {
    component.handle = 0;
    std::memset(srd, 0, sizeof(srd));
    std::memset(&desc, 0, sizeof(desc));
  }

  ~Sampler() {}

public:
  /// @brief Create a Sampler.
  static Sampler* Create(hsa_agent_t agent);

  /// @brief Destroy a Sampler.
  static void Destroy(const Sampler* sampler);

  /// @brief Convert from vendor representation to HSA handle.
  uint64_t Convert() { return reinterpret_cast<uint64_t>(srd); }

  /// @brief Convert from HSA handle to vendor representation.
  static Sampler* Convert(uint64_t handle) {
    return reinterpret_cast<Sampler*>(handle - offsetof(Sampler, srd));
  }

  // Vendor specific sampler object.
  __ALIGNED__(HSA_SAMPLER_OBJECT_ALIGNMENT)
  uint32_t srd[HSA_SAMPLER_OBJECT_SIZE_DWORD];

  // HSA component of the sampler object.
  hsa_agent_t component;

  // HSA sampler descriptor of the image object.
  hsa_ext_sampler_descriptor_v2_t desc;
} Sampler;

}  // namespace image
}  // namespace rocr
#endif  // HSA_RUNTIME_EXT_IMAGE_RESOURCE_H


================================================
FILE: runtime/hsa-runtime/image/resource_ai.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_RESOURCE_AI_H
#define HSA_RUNTIME_EXT_IMAGE_RESOURCE_AI_H

#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

namespace rocr {
namespace image {

        union SQ_BUF_RSRC_WORD0 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                    BASE_ADDRESS : 32;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                    BASE_ADDRESS : 32;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_BUF_RSRC_WORD1 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                 BASE_ADDRESS_HI : 16;
                unsigned int                          STRIDE : 14;
                unsigned int                   CACHE_SWIZZLE : 1;
                unsigned int                  SWIZZLE_ENABLE : 1;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                  SWIZZLE_ENABLE : 1;
                unsigned int                   CACHE_SWIZZLE : 1;
                unsigned int                          STRIDE : 14;
                unsigned int                 BASE_ADDRESS_HI : 16;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_BUF_RSRC_WORD2 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                     NUM_RECORDS : 32;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                     NUM_RECORDS : 32;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_BUF_RSRC_WORD3 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                       DST_SEL_X : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                      NUM_FORMAT : 3;
                unsigned int                     DATA_FORMAT : 4;
                unsigned int                  USER_VM_ENABLE : 1;
                unsigned int                    USER_VM_MODE : 1;
                unsigned int                    INDEX_STRIDE : 2;
                unsigned int                  ADD_TID_ENABLE : 1;
                unsigned int                                 : 3;
                unsigned int                              NV : 1;
                unsigned int                                 : 2;
                unsigned int                            TYPE : 2;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                            TYPE : 2;
                unsigned int                                 : 2;
                unsigned int                              NV : 1;
                unsigned int                                 : 3;
                unsigned int                  ADD_TID_ENABLE : 1;
                unsigned int                    INDEX_STRIDE : 2;
                unsigned int                    USER_VM_MODE : 1;
                unsigned int                  USER_VM_ENABLE : 1;
                unsigned int                     DATA_FORMAT : 4;
                unsigned int                      NUM_FORMAT : 3;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_X : 3;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD0 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                    BASE_ADDRESS : 32;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                    BASE_ADDRESS : 32;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD1 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                 BASE_ADDRESS_HI : 8;
                unsigned int                         MIN_LOD : 12;
                unsigned int                     DATA_FORMAT : 6;
                unsigned int                      NUM_FORMAT : 4;
                unsigned int                              NV : 1;
                unsigned int                     META_DIRECT : 1;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                     META_DIRECT : 1;
                unsigned int                              NV : 1;
                unsigned int                      NUM_FORMAT : 4;
                unsigned int                     DATA_FORMAT : 6;
                unsigned int                         MIN_LOD : 12;
                unsigned int                 BASE_ADDRESS_HI : 8;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD2 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                           WIDTH : 14;
                unsigned int                          HEIGHT : 14;
                unsigned int                        PERF_MOD : 3;
                unsigned int                                 : 1;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                                 : 1;
                unsigned int                        PERF_MOD : 3;
                unsigned int                          HEIGHT : 14;
                unsigned int                           WIDTH : 14;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD3 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                       DST_SEL_X : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                      BASE_LEVEL : 4;
                unsigned int                      LAST_LEVEL : 4;
                unsigned int                         SW_MODE : 5;
                unsigned int                                 : 3;
                unsigned int                            TYPE : 4;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                            TYPE : 4;
                unsigned int                                 : 3;
                unsigned int                         SW_MODE : 5;
                unsigned int                      LAST_LEVEL : 4;
                unsigned int                      BASE_LEVEL : 4;
                unsigned int                       DST_SEL_W : 3;
                unsigned int                       DST_SEL_Z : 3;
                unsigned int                       DST_SEL_Y : 3;
                unsigned int                       DST_SEL_X : 3;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD4 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                           DEPTH : 13;
                unsigned int                           PITCH : 16;
                unsigned int                      BC_SWIZZLE : 3;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                      BC_SWIZZLE : 3;
                unsigned int                           PITCH : 16;
                unsigned int                           DEPTH : 13;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD5 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                      BASE_ARRAY : 13;
                unsigned int                     ARRAY_PITCH : 4;
                unsigned int            META_DATA_ADDRESS_HI : 8;
                unsigned int                     META_LINEAR : 1;
                unsigned int               META_PIPE_ALIGNED : 1;
                unsigned int                 META_RB_ALIGNED : 1;
                unsigned int                         MAX_MIP : 4;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                         MAX_MIP : 4;
                unsigned int                 META_RB_ALIGNED : 1;
                unsigned int               META_PIPE_ALIGNED : 1;
                unsigned int                     META_LINEAR : 1;
                unsigned int            META_DATA_ADDRESS_HI : 8;
                unsigned int                     ARRAY_PITCH : 4;
                unsigned int                      BASE_ARRAY : 13;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD6 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                    MIN_LOD_WARN : 12;
                unsigned int                 COUNTER_BANK_ID : 8;
                unsigned int                  LOD_HDW_CNT_EN : 1;
                unsigned int                  COMPRESSION_EN : 1;
                unsigned int                 ALPHA_IS_ON_MSB : 1;
                unsigned int                 COLOR_TRANSFORM : 1;
                unsigned int                 LOST_ALPHA_BITS : 4;
                unsigned int                 LOST_COLOR_BITS : 4;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                 LOST_COLOR_BITS : 4;
                unsigned int                 LOST_ALPHA_BITS : 4;
                unsigned int                 COLOR_TRANSFORM : 1;
                unsigned int                 ALPHA_IS_ON_MSB : 1;
                unsigned int                  COMPRESSION_EN : 1;
                unsigned int                  LOD_HDW_CNT_EN : 1;
                unsigned int                 COUNTER_BANK_ID : 8;
                unsigned int                    MIN_LOD_WARN : 12;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_RSRC_WORD7 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int               META_DATA_ADDRESS : 32;
#elif           defined(BIGENDIAN_CPU)
                unsigned int               META_DATA_ADDRESS : 32;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_SAMP_WORD0 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                         CLAMP_X : 3;
                unsigned int                         CLAMP_Y : 3;
                unsigned int                         CLAMP_Z : 3;
                unsigned int                 MAX_ANISO_RATIO : 3;
                unsigned int              DEPTH_COMPARE_FUNC : 3;
                unsigned int              FORCE_UNNORMALIZED : 1;
                unsigned int                 ANISO_THRESHOLD : 3;
                unsigned int                  MC_COORD_TRUNC : 1;
                unsigned int                   FORCE_DEGAMMA : 1;
                unsigned int                      ANISO_BIAS : 6;
                unsigned int                     TRUNC_COORD : 1;
                unsigned int               DISABLE_CUBE_WRAP : 1;
                unsigned int                     FILTER_MODE : 2;
                unsigned int                     COMPAT_MODE : 1;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                     COMPAT_MODE : 1;
                unsigned int                     FILTER_MODE : 2;
                unsigned int               DISABLE_CUBE_WRAP : 1;
                unsigned int                     TRUNC_COORD : 1;
                unsigned int                      ANISO_BIAS : 6;
                unsigned int                   FORCE_DEGAMMA : 1;
                unsigned int                  MC_COORD_TRUNC : 1;
                unsigned int                 ANISO_THRESHOLD : 3;
                unsigned int              FORCE_UNNORMALIZED : 1;
                unsigned int              DEPTH_COMPARE_FUNC : 3;
                unsigned int                 MAX_ANISO_RATIO : 3;
                unsigned int                         CLAMP_Z : 3;
                unsigned int                         CLAMP_Y : 3;
                unsigned int                         CLAMP_X : 3;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_SAMP_WORD1 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                         MIN_LOD : 12;
                unsigned int                         MAX_LOD : 12;
                unsigned int                        PERF_MIP : 4;
                unsigned int                          PERF_Z : 4;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                          PERF_Z : 4;
                unsigned int                        PERF_MIP : 4;
                unsigned int                         MAX_LOD : 12;
                unsigned int                         MIN_LOD : 12;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_SAMP_WORD2 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                        LOD_BIAS : 14;
                unsigned int                    LOD_BIAS_SEC : 6;
                unsigned int                   XY_MAG_FILTER : 2;
                unsigned int                   XY_MIN_FILTER : 2;
                unsigned int                        Z_FILTER : 2;
                unsigned int                      MIP_FILTER : 2;
                unsigned int              MIP_POINT_PRECLAMP : 1;
                unsigned int                  BLEND_ZERO_PRT : 1;
                unsigned int                 FILTER_PREC_FIX : 1;
                unsigned int                  ANISO_OVERRIDE : 1;
#elif           defined(BIGENDIAN_CPU)
                unsigned int                  ANISO_OVERRIDE : 1;
                unsigned int                 FILTER_PREC_FIX : 1;
                unsigned int                  BLEND_ZERO_PRT : 1;
                unsigned int              MIP_POINT_PRECLAMP : 1;
                unsigned int                      MIP_FILTER : 2;
                unsigned int                        Z_FILTER : 2;
                unsigned int                   XY_MIN_FILTER : 2;
                unsigned int                   XY_MAG_FILTER : 2;
                unsigned int                    LOD_BIAS_SEC : 6;
                unsigned int                        LOD_BIAS : 14;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


        union SQ_IMG_SAMP_WORD3 {
        struct {
#if             defined(LITTLEENDIAN_CPU)
                unsigned int                BORDER_COLOR_PTR : 12;
                unsigned int                    SKIP_DEGAMMA : 1;
                unsigned int                                 : 17;
                unsigned int               BORDER_COLOR_TYPE : 2;
#elif           defined(BIGENDIAN_CPU)
                unsigned int               BORDER_COLOR_TYPE : 2;
                unsigned int                                 : 17;
                unsigned int                    SKIP_DEGAMMA : 1;
                unsigned int                BORDER_COLOR_PTR : 12;
#endif
        } bitfields, bits;
        unsigned int    u32All;
        signed int      i32All;
        float   f32All;
        };


#define SQ_BUF_RSRC_WORD0_REG_SIZE     32
#define SQ_BUF_RSRC_WORD0_BASE_ADDRESS_SIZE 32

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word0_t {
          unsigned int base_address                   : SQ_BUF_RSRC_WORD0_BASE_ADDRESS_SIZE;
     } sq_buf_rsrc_word0_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word0_t {
          unsigned int base_address                   : SQ_BUF_RSRC_WORD0_BASE_ADDRESS_SIZE;
     } sq_buf_rsrc_word0_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_buf_rsrc_word0_t f;
} sq_buf_rsrc_word0_u;

#define SQ_BUF_RSRC_WORD1_REG_SIZE     32
#define SQ_BUF_RSRC_WORD1_BASE_ADDRESS_HI_SIZE 16
#define SQ_BUF_RSRC_WORD1_STRIDE_SIZE  14
#define SQ_BUF_RSRC_WORD1_CACHE_SWIZZLE_SIZE 1
#define SQ_BUF_RSRC_WORD1_SWIZZLE_ENABLE_SIZE 1

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word1_t {
          unsigned int base_address_hi                : SQ_BUF_RSRC_WORD1_BASE_ADDRESS_HI_SIZE;
          unsigned int stride                         : SQ_BUF_RSRC_WORD1_STRIDE_SIZE;
          unsigned int cache_swizzle                  : SQ_BUF_RSRC_WORD1_CACHE_SWIZZLE_SIZE;
          unsigned int swizzle_enable                 : SQ_BUF_RSRC_WORD1_SWIZZLE_ENABLE_SIZE;
     } sq_buf_rsrc_word1_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word1_t {
          unsigned int swizzle_enable                 : SQ_BUF_RSRC_WORD1_SWIZZLE_ENABLE_SIZE;
          unsigned int cache_swizzle                  : SQ_BUF_RSRC_WORD1_CACHE_SWIZZLE_SIZE;
          unsigned int stride                         : SQ_BUF_RSRC_WORD1_STRIDE_SIZE;
          unsigned int base_address_hi                : SQ_BUF_RSRC_WORD1_BASE_ADDRESS_HI_SIZE;
     } sq_buf_rsrc_word1_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_buf_rsrc_word1_t f;
} sq_buf_rsrc_word1_u;

#define SQ_BUF_RSRC_WORD2_REG_SIZE     32
#define SQ_BUF_RSRC_WORD2_NUM_RECORDS_SIZE 32

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word2_t {
          unsigned int num_records                    : SQ_BUF_RSRC_WORD2_NUM_RECORDS_SIZE;
     } sq_buf_rsrc_word2_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word2_t {
          unsigned int num_records                    : SQ_BUF_RSRC_WORD2_NUM_RECORDS_SIZE;
     } sq_buf_rsrc_word2_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_buf_rsrc_word2_t f;
} sq_buf_rsrc_word2_u;

#define SQ_BUF_RSRC_WORD3_REG_SIZE     32
#define SQ_BUF_RSRC_WORD3_DST_SEL_X_SIZE 3
#define SQ_BUF_RSRC_WORD3_DST_SEL_Y_SIZE 3
#define SQ_BUF_RSRC_WORD3_DST_SEL_Z_SIZE 3
#define SQ_BUF_RSRC_WORD3_DST_SEL_W_SIZE 3
#define SQ_BUF_RSRC_WORD3_NUM_FORMAT_SIZE 3
#define SQ_BUF_RSRC_WORD3_DATA_FORMAT_SIZE 4
#define SQ_BUF_RSRC_WORD3_USER_VM_ENABLE_SIZE 1
#define SQ_BUF_RSRC_WORD3_USER_VM_MODE_SIZE 1
#define SQ_BUF_RSRC_WORD3_INDEX_STRIDE_SIZE 2
#define SQ_BUF_RSRC_WORD3_ADD_TID_ENABLE_SIZE 1
#define SQ_BUF_RSRC_WORD3_NV_SIZE      1
#define SQ_BUF_RSRC_WORD3_TYPE_SIZE    2

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word3_t {
          unsigned int dst_sel_x                      : SQ_BUF_RSRC_WORD3_DST_SEL_X_SIZE;
          unsigned int dst_sel_y                      : SQ_BUF_RSRC_WORD3_DST_SEL_Y_SIZE;
          unsigned int dst_sel_z                      : SQ_BUF_RSRC_WORD3_DST_SEL_Z_SIZE;
          unsigned int dst_sel_w                      : SQ_BUF_RSRC_WORD3_DST_SEL_W_SIZE;
          unsigned int num_format                     : SQ_BUF_RSRC_WORD3_NUM_FORMAT_SIZE;
          unsigned int data_format                    : SQ_BUF_RSRC_WORD3_DATA_FORMAT_SIZE;
          unsigned int user_vm_enable                 : SQ_BUF_RSRC_WORD3_USER_VM_ENABLE_SIZE;
          unsigned int user_vm_mode                   : SQ_BUF_RSRC_WORD3_USER_VM_MODE_SIZE;
          unsigned int index_stride                   : SQ_BUF_RSRC_WORD3_INDEX_STRIDE_SIZE;
          unsigned int add_tid_enable                 : SQ_BUF_RSRC_WORD3_ADD_TID_ENABLE_SIZE;
          unsigned int                                : 3;
          unsigned int nv                             : SQ_BUF_RSRC_WORD3_NV_SIZE;
          unsigned int                                : 2;
          unsigned int type                           : SQ_BUF_RSRC_WORD3_TYPE_SIZE;
     } sq_buf_rsrc_word3_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_buf_rsrc_word3_t {
          unsigned int type                           : SQ_BUF_RSRC_WORD3_TYPE_SIZE;
          unsigned int                                : 2;
          unsigned int nv                             : SQ_BUF_RSRC_WORD3_NV_SIZE;
          unsigned int                                : 3;
          unsigned int add_tid_enable                 : SQ_BUF_RSRC_WORD3_ADD_TID_ENABLE_SIZE;
          unsigned int index_stride                   : SQ_BUF_RSRC_WORD3_INDEX_STRIDE_SIZE;
          unsigned int user_vm_mode                   : SQ_BUF_RSRC_WORD3_USER_VM_MODE_SIZE;
          unsigned int user_vm_enable                 : SQ_BUF_RSRC_WORD3_USER_VM_ENABLE_SIZE;
          unsigned int data_format                    : SQ_BUF_RSRC_WORD3_DATA_FORMAT_SIZE;
          unsigned int num_format                     : SQ_BUF_RSRC_WORD3_NUM_FORMAT_SIZE;
          unsigned int dst_sel_w                      : SQ_BUF_RSRC_WORD3_DST_SEL_W_SIZE;
          unsigned int dst_sel_z                      : SQ_BUF_RSRC_WORD3_DST_SEL_Z_SIZE;
          unsigned int dst_sel_y                      : SQ_BUF_RSRC_WORD3_DST_SEL_Y_SIZE;
          unsigned int dst_sel_x                      : SQ_BUF_RSRC_WORD3_DST_SEL_X_SIZE;
     } sq_buf_rsrc_word3_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_buf_rsrc_word3_t f;
} sq_buf_rsrc_word3_u;


#define SQ_IMG_RSRC_WORD0_REG_SIZE     32
#define SQ_IMG_RSRC_WORD0_BASE_ADDRESS_SIZE 32

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word0_t {
          unsigned int base_address                   : SQ_IMG_RSRC_WORD0_BASE_ADDRESS_SIZE;
     } sq_img_rsrc_word0_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word0_t {
          unsigned int base_address                   : SQ_IMG_RSRC_WORD0_BASE_ADDRESS_SIZE;
     } sq_img_rsrc_word0_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word0_t f;
} sq_img_rsrc_word0_u;

#define SQ_IMG_RSRC_WORD1_REG_SIZE     32
#define SQ_IMG_RSRC_WORD1_BASE_ADDRESS_HI_SIZE 8
#define SQ_IMG_RSRC_WORD1_MIN_LOD_SIZE 12
#define SQ_IMG_RSRC_WORD1_DATA_FORMAT_SIZE 6
#define SQ_IMG_RSRC_WORD1_NUM_FORMAT_SIZE 4
#define SQ_IMG_RSRC_WORD1_NV_SIZE      1
#define SQ_IMG_RSRC_WORD1_META_DIRECT_SIZE 1

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word1_t {
          unsigned int base_address_hi                : SQ_IMG_RSRC_WORD1_BASE_ADDRESS_HI_SIZE;
          unsigned int min_lod                        : SQ_IMG_RSRC_WORD1_MIN_LOD_SIZE;
          unsigned int data_format                    : SQ_IMG_RSRC_WORD1_DATA_FORMAT_SIZE;
          unsigned int num_format                     : SQ_IMG_RSRC_WORD1_NUM_FORMAT_SIZE;
          unsigned int nv                             : SQ_IMG_RSRC_WORD1_NV_SIZE;
          unsigned int meta_direct                    : SQ_IMG_RSRC_WORD1_META_DIRECT_SIZE;
     } sq_img_rsrc_word1_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word1_t {
          unsigned int meta_direct                    : SQ_IMG_RSRC_WORD1_META_DIRECT_SIZE;
          unsigned int nv                             : SQ_IMG_RSRC_WORD1_NV_SIZE;
          unsigned int num_format                     : SQ_IMG_RSRC_WORD1_NUM_FORMAT_SIZE;
          unsigned int data_format                    : SQ_IMG_RSRC_WORD1_DATA_FORMAT_SIZE;
          unsigned int min_lod                        : SQ_IMG_RSRC_WORD1_MIN_LOD_SIZE;
          unsigned int base_address_hi                : SQ_IMG_RSRC_WORD1_BASE_ADDRESS_HI_SIZE;
     } sq_img_rsrc_word1_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word1_t f;
} sq_img_rsrc_word1_u;

#define SQ_IMG_RSRC_WORD2_REG_SIZE     32
#define SQ_IMG_RSRC_WORD2_WIDTH_SIZE   14
#define SQ_IMG_RSRC_WORD2_HEIGHT_SIZE  14
#define SQ_IMG_RSRC_WORD2_PERF_MOD_SIZE 3

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word2_t {
          unsigned int width                          : SQ_IMG_RSRC_WORD2_WIDTH_SIZE;
          unsigned int height                         : SQ_IMG_RSRC_WORD2_HEIGHT_SIZE;
          unsigned int perf_mod                       : SQ_IMG_RSRC_WORD2_PERF_MOD_SIZE;
          unsigned int                                : 1;
     } sq_img_rsrc_word2_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word2_t {
          unsigned int                                : 1;
          unsigned int perf_mod                       : SQ_IMG_RSRC_WORD2_PERF_MOD_SIZE;
          unsigned int height                         : SQ_IMG_RSRC_WORD2_HEIGHT_SIZE;
          unsigned int width                          : SQ_IMG_RSRC_WORD2_WIDTH_SIZE;
     } sq_img_rsrc_word2_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word2_t f;
} sq_img_rsrc_word2_u;

#define SQ_IMG_RSRC_WORD3_REG_SIZE     32
#define SQ_IMG_RSRC_WORD3_DST_SEL_X_SIZE 3
#define SQ_IMG_RSRC_WORD3_DST_SEL_Y_SIZE 3
#define SQ_IMG_RSRC_WORD3_DST_SEL_Z_SIZE 3
#define SQ_IMG_RSRC_WORD3_DST_SEL_W_SIZE 3
#define SQ_IMG_RSRC_WORD3_BASE_LEVEL_SIZE 4
#define SQ_IMG_RSRC_WORD3_LAST_LEVEL_SIZE 4
#define SQ_IMG_RSRC_WORD3_SW_MODE_SIZE 5
#define SQ_IMG_RSRC_WORD3_TYPE_SIZE    4

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word3_t {
          unsigned int dst_sel_x                      : SQ_IMG_RSRC_WORD3_DST_SEL_X_SIZE;
          unsigned int dst_sel_y                      : SQ_IMG_RSRC_WORD3_DST_SEL_Y_SIZE;
          unsigned int dst_sel_z                      : SQ_IMG_RSRC_WORD3_DST_SEL_Z_SIZE;
          unsigned int dst_sel_w                      : SQ_IMG_RSRC_WORD3_DST_SEL_W_SIZE;
          unsigned int base_level                     : SQ_IMG_RSRC_WORD3_BASE_LEVEL_SIZE;
          unsigned int last_level                     : SQ_IMG_RSRC_WORD3_LAST_LEVEL_SIZE;
          unsigned int sw_mode                        : SQ_IMG_RSRC_WORD3_SW_MODE_SIZE;
          unsigned int                                : 3;
          unsigned int type                           : SQ_IMG_RSRC_WORD3_TYPE_SIZE;
     } sq_img_rsrc_word3_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word3_t {
          unsigned int type                           : SQ_IMG_RSRC_WORD3_TYPE_SIZE;
          unsigned int                                : 3;
          unsigned int sw_mode                        : SQ_IMG_RSRC_WORD3_SW_MODE_SIZE;
          unsigned int last_level                     : SQ_IMG_RSRC_WORD3_LAST_LEVEL_SIZE;
          unsigned int base_level                     : SQ_IMG_RSRC_WORD3_BASE_LEVEL_SIZE;
          unsigned int dst_sel_w                      : SQ_IMG_RSRC_WORD3_DST_SEL_W_SIZE;
          unsigned int dst_sel_z                      : SQ_IMG_RSRC_WORD3_DST_SEL_Z_SIZE;
          unsigned int dst_sel_y                      : SQ_IMG_RSRC_WORD3_DST_SEL_Y_SIZE;
          unsigned int dst_sel_x                      : SQ_IMG_RSRC_WORD3_DST_SEL_X_SIZE;
     } sq_img_rsrc_word3_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word3_t f;
} sq_img_rsrc_word3_u;

#define SQ_IMG_RSRC_WORD4_REG_SIZE     32
#define SQ_IMG_RSRC_WORD4_DEPTH_SIZE   13
#define SQ_IMG_RSRC_WORD4_PITCH_SIZE   16
#define SQ_IMG_RSRC_WORD4_BC_SWIZZLE_SIZE 3

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word4_t {
          unsigned int depth                          : SQ_IMG_RSRC_WORD4_DEPTH_SIZE;
          unsigned int pitch                          : SQ_IMG_RSRC_WORD4_PITCH_SIZE;
          unsigned int bc_swizzle                     : SQ_IMG_RSRC_WORD4_BC_SWIZZLE_SIZE;
     } sq_img_rsrc_word4_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word4_t {
          unsigned int bc_swizzle                     : SQ_IMG_RSRC_WORD4_BC_SWIZZLE_SIZE;
          unsigned int pitch                          : SQ_IMG_RSRC_WORD4_PITCH_SIZE;
          unsigned int depth                          : SQ_IMG_RSRC_WORD4_DEPTH_SIZE;
     } sq_img_rsrc_word4_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word4_t f;
} sq_img_rsrc_word4_u;

#define SQ_IMG_RSRC_WORD5_REG_SIZE     32
#define SQ_IMG_RSRC_WORD5_BASE_ARRAY_SIZE 13
#define SQ_IMG_RSRC_WORD5_ARRAY_PITCH_SIZE 4
#define SQ_IMG_RSRC_WORD5_META_DATA_ADDRESS_SIZE 8
#define SQ_IMG_RSRC_WORD5_META_LINEAR_SIZE 1
#define SQ_IMG_RSRC_WORD5_META_PIPE_ALIGNED_SIZE 1
#define SQ_IMG_RSRC_WORD5_META_RB_ALIGNED_SIZE 1
#define SQ_IMG_RSRC_WORD5_MAX_MIP_SIZE 4

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word5_t {
          unsigned int base_array                     : SQ_IMG_RSRC_WORD5_BASE_ARRAY_SIZE;
          unsigned int array_pitch                    : SQ_IMG_RSRC_WORD5_ARRAY_PITCH_SIZE;
          unsigned int meta_data_address              : SQ_IMG_RSRC_WORD5_META_DATA_ADDRESS_SIZE;
          unsigned int meta_linear                    : SQ_IMG_RSRC_WORD5_META_LINEAR_SIZE;
          unsigned int meta_pipe_aligned              : SQ_IMG_RSRC_WORD5_META_PIPE_ALIGNED_SIZE;
          unsigned int meta_rb_aligned                : SQ_IMG_RSRC_WORD5_META_RB_ALIGNED_SIZE;
          unsigned int max_mip                        : SQ_IMG_RSRC_WORD5_MAX_MIP_SIZE;
     } sq_img_rsrc_word5_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word5_t {
          unsigned int max_mip                        : SQ_IMG_RSRC_WORD5_MAX_MIP_SIZE;
          unsigned int meta_rb_aligned                : SQ_IMG_RSRC_WORD5_META_RB_ALIGNED_SIZE;
          unsigned int meta_pipe_aligned              : SQ_IMG_RSRC_WORD5_META_PIPE_ALIGNED_SIZE;
          unsigned int meta_linear                    : SQ_IMG_RSRC_WORD5_META_LINEAR_SIZE;
          unsigned int meta_data_address              : SQ_IMG_RSRC_WORD5_META_DATA_ADDRESS_SIZE;
          unsigned int array_pitch                    : SQ_IMG_RSRC_WORD5_ARRAY_PITCH_SIZE;
          unsigned int base_array                     : SQ_IMG_RSRC_WORD5_BASE_ARRAY_SIZE;
     } sq_img_rsrc_word5_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word5_t f;
} sq_img_rsrc_word5_u;

#define SQ_IMG_RSRC_WORD6_REG_SIZE     32
#define SQ_IMG_RSRC_WORD6_MIN_LOD_WARN_SIZE 12
#define SQ_IMG_RSRC_WORD6_COUNTER_BANK_ID_SIZE 8
#define SQ_IMG_RSRC_WORD6_LOD_HDW_CNT_EN_SIZE 1
#define SQ_IMG_RSRC_WORD6_COMPRESSION_EN_SIZE 1
#define SQ_IMG_RSRC_WORD6_ALPHA_IS_ON_MSB_SIZE 1
#define SQ_IMG_RSRC_WORD6_COLOR_TRANSFORM_SIZE 1
#define SQ_IMG_RSRC_WORD6_LOST_ALPHA_BITS_SIZE 4
#define SQ_IMG_RSRC_WORD6_LOST_COLOR_BITS_SIZE 4

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word6_t {
          unsigned int min_lod_warn                   : SQ_IMG_RSRC_WORD6_MIN_LOD_WARN_SIZE;
          unsigned int counter_bank_id                : SQ_IMG_RSRC_WORD6_COUNTER_BANK_ID_SIZE;
          unsigned int lod_hdw_cnt_en                 : SQ_IMG_RSRC_WORD6_LOD_HDW_CNT_EN_SIZE;
          unsigned int compression_en                 : SQ_IMG_RSRC_WORD6_COMPRESSION_EN_SIZE;
          unsigned int alpha_is_on_msb                : SQ_IMG_RSRC_WORD6_ALPHA_IS_ON_MSB_SIZE;
          unsigned int color_transform                : SQ_IMG_RSRC_WORD6_COLOR_TRANSFORM_SIZE;
          unsigned int lost_alpha_bits                : SQ_IMG_RSRC_WORD6_LOST_ALPHA_BITS_SIZE;
          unsigned int lost_color_bits                : SQ_IMG_RSRC_WORD6_LOST_COLOR_BITS_SIZE;
     } sq_img_rsrc_word6_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word6_t {
          unsigned int lost_color_bits                : SQ_IMG_RSRC_WORD6_LOST_COLOR_BITS_SIZE;
          unsigned int lost_alpha_bits                : SQ_IMG_RSRC_WORD6_LOST_ALPHA_BITS_SIZE;
          unsigned int color_transform                : SQ_IMG_RSRC_WORD6_COLOR_TRANSFORM_SIZE;
          unsigned int alpha_is_on_msb                : SQ_IMG_RSRC_WORD6_ALPHA_IS_ON_MSB_SIZE;
          unsigned int compression_en                 : SQ_IMG_RSRC_WORD6_COMPRESSION_EN_SIZE;
          unsigned int lod_hdw_cnt_en                 : SQ_IMG_RSRC_WORD6_LOD_HDW_CNT_EN_SIZE;
          unsigned int counter_bank_id                : SQ_IMG_RSRC_WORD6_COUNTER_BANK_ID_SIZE;
          unsigned int min_lod_warn                   : SQ_IMG_RSRC_WORD6_MIN_LOD_WARN_SIZE;
     } sq_img_rsrc_word6_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word6_t f;
} sq_img_rsrc_word6_u;

#define SQ_IMG_RSRC_WORD7_REG_SIZE     32
#define SQ_IMG_RSRC_WORD7_META_DATA_ADDRESS_SIZE 32

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_rsrc_word7_t {
          unsigned int meta_data_address              : SQ_IMG_RSRC_WORD7_META_DATA_ADDRESS_SIZE;
     } sq_img_rsrc_word7_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_rsrc_word7_t {
          unsigned int meta_data_address              : SQ_IMG_RSRC_WORD7_META_DATA_ADDRESS_SIZE;
     } sq_img_rsrc_word7_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_rsrc_word7_t f;
} sq_img_rsrc_word7_u;

#define SQ_IMG_SAMP_WORD0_REG_SIZE     32
#define SQ_IMG_SAMP_WORD0_CLAMP_X_SIZE 3
#define SQ_IMG_SAMP_WORD0_CLAMP_Y_SIZE 3
#define SQ_IMG_SAMP_WORD0_CLAMP_Z_SIZE 3
#define SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SIZE 3
#define SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SIZE 3
#define SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SIZE 1
#define SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SIZE 3
#define SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SIZE 1
#define SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SIZE 1
#define SQ_IMG_SAMP_WORD0_ANISO_BIAS_SIZE 6
#define SQ_IMG_SAMP_WORD0_TRUNC_COORD_SIZE 1
#define SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SIZE 1
#define SQ_IMG_SAMP_WORD0_FILTER_MODE_SIZE 2
#define SQ_IMG_SAMP_WORD0_COMPAT_MODE_SIZE 1

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_samp_word0_t {
          unsigned int clamp_x                        : SQ_IMG_SAMP_WORD0_CLAMP_X_SIZE;
          unsigned int clamp_y                        : SQ_IMG_SAMP_WORD0_CLAMP_Y_SIZE;
          unsigned int clamp_z                        : SQ_IMG_SAMP_WORD0_CLAMP_Z_SIZE;
          unsigned int max_aniso_ratio                : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SIZE;
          unsigned int depth_compare_func             : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SIZE;
          unsigned int force_unnormalized             : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SIZE;
          unsigned int aniso_threshold                : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SIZE;
          unsigned int mc_coord_trunc                 : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SIZE;
          unsigned int force_degamma                  : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SIZE;
          unsigned int aniso_bias                     : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SIZE;
          unsigned int trunc_coord                    : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SIZE;
          unsigned int disable_cube_wrap              : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SIZE;
          unsigned int filter_mode                    : SQ_IMG_SAMP_WORD0_FILTER_MODE_SIZE;
          unsigned int compat_mode                    : SQ_IMG_SAMP_WORD0_COMPAT_MODE_SIZE;
     } sq_img_samp_word0_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_samp_word0_t {
          unsigned int compat_mode                    : SQ_IMG_SAMP_WORD0_COMPAT_MODE_SIZE;
          unsigned int filter_mode                    : SQ_IMG_SAMP_WORD0_FILTER_MODE_SIZE;
          unsigned int disable_cube_wrap              : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SIZE;
          unsigned int trunc_coord                    : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SIZE;
          unsigned int aniso_bias                     : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SIZE;
          unsigned int force_degamma                  : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SIZE;
          unsigned int mc_coord_trunc                 : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SIZE;
          unsigned int aniso_threshold                : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SIZE;
          unsigned int force_unnormalized             : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SIZE;
          unsigned int depth_compare_func             : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SIZE;
          unsigned int max_aniso_ratio                : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SIZE;
          unsigned int clamp_z                        : SQ_IMG_SAMP_WORD0_CLAMP_Z_SIZE;
          unsigned int clamp_y                        : SQ_IMG_SAMP_WORD0_CLAMP_Y_SIZE;
          unsigned int clamp_x                        : SQ_IMG_SAMP_WORD0_CLAMP_X_SIZE;
     } sq_img_samp_word0_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_samp_word0_t f;
} sq_img_samp_word0_u;

#define SQ_IMG_SAMP_WORD1_REG_SIZE     32
#define SQ_IMG_SAMP_WORD1_MIN_LOD_SIZE 12
#define SQ_IMG_SAMP_WORD1_MAX_LOD_SIZE 12
#define SQ_IMG_SAMP_WORD1_PERF_MIP_SIZE 4
#define SQ_IMG_SAMP_WORD1_PERF_Z_SIZE  4

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_samp_word1_t {
          unsigned int min_lod                        : SQ_IMG_SAMP_WORD1_MIN_LOD_SIZE;
          unsigned int max_lod                        : SQ_IMG_SAMP_WORD1_MAX_LOD_SIZE;
          unsigned int perf_mip                       : SQ_IMG_SAMP_WORD1_PERF_MIP_SIZE;
          unsigned int perf_z                         : SQ_IMG_SAMP_WORD1_PERF_Z_SIZE;
     } sq_img_samp_word1_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_samp_word1_t {
          unsigned int perf_z                         : SQ_IMG_SAMP_WORD1_PERF_Z_SIZE;
          unsigned int perf_mip                       : SQ_IMG_SAMP_WORD1_PERF_MIP_SIZE;
          unsigned int max_lod                        : SQ_IMG_SAMP_WORD1_MAX_LOD_SIZE;
          unsigned int min_lod                        : SQ_IMG_SAMP_WORD1_MIN_LOD_SIZE;
     } sq_img_samp_word1_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_samp_word1_t f;
} sq_img_samp_word1_u;

#define SQ_IMG_SAMP_WORD2_REG_SIZE     32
#define SQ_IMG_SAMP_WORD2_LOD_BIAS_SIZE 14
#define SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SIZE 6
#define SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SIZE 2
#define SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SIZE 2
#define SQ_IMG_SAMP_WORD2_Z_FILTER_SIZE 2
#define SQ_IMG_SAMP_WORD2_MIP_FILTER_SIZE 2
#define SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SIZE 1
#define SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SIZE 1
#define SQ_IMG_SAMP_WORD2_FILTER_PREC_FIX_SIZE 1
#define SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SIZE 1

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_samp_word2_t {
          unsigned int lod_bias                       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SIZE;
          unsigned int lod_bias_sec                   : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SIZE;
          unsigned int xy_mag_filter                  : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SIZE;
          unsigned int xy_min_filter                  : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SIZE;
          unsigned int z_filter                       : SQ_IMG_SAMP_WORD2_Z_FILTER_SIZE;
          unsigned int mip_filter                     : SQ_IMG_SAMP_WORD2_MIP_FILTER_SIZE;
          unsigned int mip_point_preclamp             : SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SIZE;
          unsigned int blend_zero_prt                 : SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SIZE;
          unsigned int filter_prec_fix                : SQ_IMG_SAMP_WORD2_FILTER_PREC_FIX_SIZE;
          unsigned int aniso_override                 : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SIZE;
     } sq_img_samp_word2_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_samp_word2_t {
          unsigned int aniso_override                 : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SIZE;
          unsigned int filter_prec_fix                : SQ_IMG_SAMP_WORD2_FILTER_PREC_FIX_SIZE;
          unsigned int blend_zero_prt                 : SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SIZE;
          unsigned int mip_point_preclamp             : SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SIZE;
          unsigned int mip_filter                     : SQ_IMG_SAMP_WORD2_MIP_FILTER_SIZE;
          unsigned int z_filter                       : SQ_IMG_SAMP_WORD2_Z_FILTER_SIZE;
          unsigned int xy_min_filter                  : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SIZE;
          unsigned int xy_mag_filter                  : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SIZE;
          unsigned int lod_bias_sec                   : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SIZE;
          unsigned int lod_bias                       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SIZE;
     } sq_img_samp_word2_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_samp_word2_t f;
} sq_img_samp_word2_u;

#define SQ_IMG_SAMP_WORD3_REG_SIZE     32
#define SQ_IMG_SAMP_WORD3_BORDER_COLOR_PTR_SIZE 12
#define SQ_IMG_SAMP_WORD3_SKIP_DEGAMMA_SIZE 1
#define SQ_IMG_SAMP_WORD3_BORDER_COLOR_TYPE_SIZE 2

#if             defined(LITTLEENDIAN_CPU)

     typedef struct _sq_img_samp_word3_t {
          unsigned int border_color_ptr               : SQ_IMG_SAMP_WORD3_BORDER_COLOR_PTR_SIZE;
          unsigned int skip_degamma                   : SQ_IMG_SAMP_WORD3_SKIP_DEGAMMA_SIZE;
          unsigned int                                : 17;
          unsigned int border_color_type              : SQ_IMG_SAMP_WORD3_BORDER_COLOR_TYPE_SIZE;
     } sq_img_samp_word3_t;

#elif           defined(BIGENDIAN_CPU)

     typedef struct _sq_img_samp_word3_t {
          unsigned int border_color_type              : SQ_IMG_SAMP_WORD3_BORDER_COLOR_TYPE_SIZE;
          unsigned int                                : 17;
          unsigned int skip_degamma                   : SQ_IMG_SAMP_WORD3_SKIP_DEGAMMA_SIZE;
          unsigned int border_color_ptr               : SQ_IMG_SAMP_WORD3_BORDER_COLOR_PTR_SIZE;
     } sq_img_samp_word3_t;

#endif

typedef union {
     unsigned int val : 32;
     sq_img_samp_word3_t f;
} sq_img_samp_word3_u;

typedef enum FMT {
FMT_INVALID                              = 0x00000000,
FMT_8                                    = 0x00000001,
FMT_16                                   = 0x00000002,
FMT_8_8                                  = 0x00000003,
FMT_32                                   = 0x00000004,
FMT_16_16                                = 0x00000005,
FMT_10_11_11                             = 0x00000006,
FMT_11_11_10                             = 0x00000007,
FMT_10_10_10_2                           = 0x00000008,
FMT_2_10_10_10                           = 0x00000009,
FMT_8_8_8_8                              = 0x0000000a,
FMT_32_32                                = 0x0000000b,
FMT_16_16_16_16                          = 0x0000000c,
FMT_32_32_32                             = 0x0000000d,
FMT_32_32_32_32                          = 0x0000000e,
FMT_RESERVED_4                           = 0x0000000f,
FMT_5_6_5                                = 0x00000010,
FMT_1_5_5_5                              = 0x00000011,
FMT_5_5_5_1                              = 0x00000012,
FMT_4_4_4_4                              = 0x00000013,
FMT_8_24                                 = 0x00000014,
FMT_24_8                                 = 0x00000015,
FMT_X24_8_32_FLOAT                       = 0x00000016,
FMT_RESERVED_33                          = 0x00000017,
FMT_11_11_10_FLOAT                       = 0x00000018,
FMT_16_FLOAT                             = 0x00000019,
FMT_32_FLOAT                             = 0x0000001a,
FMT_16_16_FLOAT                          = 0x0000001b,
FMT_8_24_FLOAT                           = 0x0000001c,
FMT_24_8_FLOAT                           = 0x0000001d,
FMT_32_32_FLOAT                          = 0x0000001e,
FMT_10_11_11_FLOAT                       = 0x0000001f,
FMT_16_16_16_16_FLOAT                    = 0x00000020,
FMT_3_3_2                                = 0x00000021,
FMT_6_5_5                                = 0x00000022,
FMT_32_32_32_32_FLOAT                    = 0x00000023,
FMT_RESERVED_36                          = 0x00000024,
FMT_1                                    = 0x00000025,
FMT_1_REVERSED                           = 0x00000026,
FMT_GB_GR                                = 0x00000027,
FMT_BG_RG                                = 0x00000028,
FMT_32_AS_8                              = 0x00000029,
FMT_32_AS_8_8                            = 0x0000002a,
FMT_5_9_9_9_SHAREDEXP                    = 0x0000002b,
FMT_8_8_8                                = 0x0000002c,
FMT_16_16_16                             = 0x0000002d,
FMT_16_16_16_FLOAT                       = 0x0000002e,
FMT_4_4                                  = 0x0000002f,
FMT_32_32_32_FLOAT                       = 0x00000030,
FMT_BC1                                  = 0x00000031,
FMT_BC2                                  = 0x00000032,
FMT_BC3                                  = 0x00000033,
FMT_BC4                                  = 0x00000034,
FMT_BC5                                  = 0x00000035,
FMT_BC6                                  = 0x00000036,
FMT_BC7                                  = 0x00000037,
FMT_32_AS_32_32_32_32                    = 0x00000038,
FMT_APC3                                 = 0x00000039,
FMT_APC4                                 = 0x0000003a,
FMT_APC5                                 = 0x0000003b,
FMT_APC6                                 = 0x0000003c,
FMT_APC7                                 = 0x0000003d,
FMT_CTX1                                 = 0x0000003e,
FMT_RESERVED_63                          = 0x0000003f,
} FMT;

typedef enum type {
TYPE_UNORM                     = 0x00000000,
TYPE_SNORM                     = 0x00000001,
TYPE_USCALED                   = 0x00000002,
TYPE_SSCALED                   = 0x00000003,
TYPE_UINT                      = 0x00000004,
TYPE_SINT                      = 0x00000005,
TYPE_RESERVED_6                = 0x00000006,
TYPE_FLOAT                     = 0x00000007,
TYPE_RESERVED_8                = 0x00000008,
TYPE_SRGB                      = 0x00000009,
TYPE_UNORM_UINT                = 0x0000000a,
} type;

typedef enum SEL {
  SEL_0 = 0x00000000,
  SEL_1 = 0x00000001,
  SEL_X = 0x00000004,
  SEL_Y = 0x00000005,
  SEL_Z = 0x00000006,
  SEL_W = 0x00000007,
} SEL;

typedef enum SQ_RSRC_IMG_TYPE {
  SQ_RSRC_IMG_1D = 0x00000008,
  SQ_RSRC_IMG_2D = 0x00000009,
  SQ_RSRC_IMG_3D = 0x0000000a,
  SQ_RSRC_IMG_1D_ARRAY = 0x0000000c,
  SQ_RSRC_IMG_2D_ARRAY = 0x0000000d,
} SQ_RSRC_IMG_TYPE;

typedef enum SQ_TEX_XY_FILTER {
  SQ_TEX_XY_FILTER_POINT = 0x00000000,
  SQ_TEX_XY_FILTER_BILINEAR = 0x00000001,
  SQ_TEX_XY_FILTER_ANISO_POINT = 0x00000002,
  SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003,
} SQ_TEX_XY_FILTER;

typedef enum SQ_TEX_Z_FILTER {
  SQ_TEX_Z_FILTER_NONE = 0x00000000,
  SQ_TEX_Z_FILTER_POINT = 0x00000001,
  SQ_TEX_Z_FILTER_LINEAR = 0x00000002,
} SQ_TEX_Z_FILTER;

typedef enum SQ_TEX_MIP_FILTER {
  SQ_TEX_MIP_FILTER_NONE = 0x00000000,
  SQ_TEX_MIP_FILTER_POINT = 0x00000001,
  SQ_TEX_MIP_FILTER_LINEAR = 0x00000002,
  SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ__VI = 0x00000003,
} SQ_TEX_MIP_FILTER;

typedef enum SQ_TEX_CLAMP {
  SQ_TEX_WRAP = 0x00000000,
  SQ_TEX_MIRROR = 0x00000001,
  SQ_TEX_CLAMP_LAST_TEXEL = 0x00000002,
  SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x00000003,
  SQ_TEX_CLAMP_HALF_BORDER = 0x00000004,
  SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005,
  SQ_TEX_CLAMP_BORDER = 0x00000006,
  SQ_TEX_MIRROR_ONCE_BORDER = 0x00000007,
} SQ_TEX_CLAMP;

typedef enum SQ_TEX_BORDER_COLOR {
  SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00000000,
  SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001,
  SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002,
  SQ_TEX_BORDER_COLOR_REGISTER = 0x00000003,
} SQ_TEX_BORDER_COLOR;

typedef enum TEX_BC_SWIZZLE {
TEX_BC_Swizzle_XYZW                      = 0x00000000,
TEX_BC_Swizzle_XWYZ                      = 0x00000001,
TEX_BC_Swizzle_WZYX                      = 0x00000002,
TEX_BC_Swizzle_WXYZ                      = 0x00000003,
TEX_BC_Swizzle_ZYXW                      = 0x00000004,
TEX_BC_Swizzle_YXWZ                      = 0x00000005,
} TEX_BC_SWIZZLE;

typedef struct metadata_amd_ai_s {
    uint32_t version; // Must be 1
    uint32_t vendorID; // AMD
    SQ_IMG_RSRC_WORD0 word0;
    SQ_IMG_RSRC_WORD1 word1;
    SQ_IMG_RSRC_WORD2 word2;
    SQ_IMG_RSRC_WORD3 word3;
    SQ_IMG_RSRC_WORD4 word4;
    SQ_IMG_RSRC_WORD5 word5;
    SQ_IMG_RSRC_WORD6 word6;
    SQ_IMG_RSRC_WORD7 word7;
    uint32_t mip_offsets[0]; //Mip level offset bits [39:8] for each level (if any)
} metadata_amd_ai_t;

}  // namespace image
}  // namespace rocr
#endif  // HSA_RUNTIME_EXT_IMAGE_RESOURCE_AI_H


================================================
FILE: runtime/hsa-runtime/image/resource_gfx11.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef EXT_IMAGE_RESOURCE_GFX11_H_
#define EXT_IMAGE_RESOURCE_GFX11_H_

#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

namespace rocr {
namespace image {

/**********************************************************/
/**********************************************************/
#define SQ_BUF_RSC_WRD0_REG_SZ 32
#define SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ 32

struct sq_buf_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};

union SQ_BUF_RSRC_WORD0 {
  sq_buf_rsrc_word0_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};

/***********/

/* Note: These registers are also defined/used in registers.h
 * in SQ_BUF_RSRC_WORD1_GFX11
 */
#define SQ_BUF_RSC_WRD1_REG_SZ 32
#define SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ  16
#define SQ_BUF_RSC_WRD1_STRIDE_SZ           14
#define SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ   2
struct sq_buf_rsrc_word1_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
  unsigned int STRIDE          : SQ_BUF_RSC_WRD1_STRIDE_SZ;
  unsigned int SWIZZLE_ENABLE  : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int SWIZZLE_ENABLE  : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
  unsigned int STRIDE          : SQ_BUF_RSC_WRD1_STRIDE_SZ;
  unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};

union SQ_BUF_RSRC_WORD1 {
  sq_buf_rsrc_word1_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_BUF_RSC_WRD2_REG_SZ 32
#define SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ 32
struct sq_buf_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#endif
};
union SQ_BUF_RSRC_WORD2 {
  sq_buf_rsrc_word2_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_BUF_RSC_WRD3_REG_SZ 32
#define SQ_BUF_RSC_WRD3_DST_SEL_X_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_W_SZ        3
#define SQ_BUF_RSC_WRD3_FORMAT_SZ           6
#define SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ     2
#define SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ   1
#define SQ_BUF_RSC_WRD3_LLC_NOALLOC_SZ      2
#define SQ_BUF_RSC_WORD3_OOB_SELECT_SZ      2
#define SQ_BUF_RSC_WRD3_TYPE_SZ             2
struct sq_buf_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int DST_SEL_X      : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;
  unsigned int DST_SEL_Y      : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_Z      : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_W      : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int FORMAT         : SQ_BUF_RSC_WRD3_FORMAT_SZ;
  unsigned int                : 3;
  unsigned int INDEX_STRIDE   : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
  unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
  unsigned int                : 2;
  unsigned int LLC_NOALLOC    : SQ_BUF_RSC_WRD3_LLC_NOALLOC_SZ;
  unsigned int OOB_SELECT     : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
  unsigned int TYPE           : SQ_BUF_RSC_WRD3_TYPE_SZ;

#elif defined(BIGENDIAN_CPU)
  unsigned int TYPE           : SQ_BUF_RSC_WRD3_TYPE_SZ;
  unsigned int OOB_SELECT     : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
  unsigned int LLC_NOALLOC    : SQ_BUF_RSC_WRD3_LLC_NOALLOC_SZ;
  unsigned int                : 2;
  unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
  unsigned int INDEX_STRIDE   : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
  unsigned int                : 3;
  unsigned int FORMAT         : SQ_BUF_RSC_WRD3_FORMAT_SZ;
  unsigned int DST_SEL_W      : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int DST_SEL_Z      : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_Y      : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_X      : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;

#endif
};
union SQ_BUF_RSRC_WORD3 {
  sq_buf_rsrc_word3_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

/**********************************************************/
/**********************************************************/
#define SQ_IMG_RSC_WRD0_REG_SZ 32
#define SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ 32
struct sq_img_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};
union SQ_IMG_RSRC_WORD0 {
  sq_img_rsrc_word0_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD1_REG_SZ 32
#define SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ  8
#define SQ_IMG_RSC_WRD1_LLC_NOALLOC_SZ      2
#define SQ_IMG_RSC_WRD1_BIG_PAGE_SZ         1
#define SQ_IMG_RSC_WRD1_MAX_MIP_SZ          4
#define SQ_IMG_RSC_WRD1_FORMAT_SZ           8
#define SQ_IMG_RSC_WRD1_WIDTH_LO            2

struct sq_img_rsrc_word1_t{
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
  unsigned int                 : 5;
  unsigned int LLC_NOALLOC     : SQ_IMG_RSC_WRD1_LLC_NOALLOC_SZ;
  unsigned int BIG_PAGE        : SQ_IMG_RSC_WRD1_BIG_PAGE_SZ;
  unsigned int MAX_MIP         : SQ_IMG_RSC_WRD1_MAX_MIP_SZ;
  unsigned int FORMAT          : SQ_IMG_RSC_WRD1_FORMAT_SZ;
  unsigned int                 : 2;
  unsigned int WIDTH           : SQ_IMG_RSC_WRD1_WIDTH_LO;
#elif defined(BIGENDIAN_CPU)
  unsigned int WIDTH           : SQ_IMG_RSC_WRD1_WIDTH_LO;
  unsigned int                 : 2;
  unsigned int FORMAT          : SQ_IMG_RSC_WRD1_FORMAT_SZ;
  unsigned int MAX_MIP         : SQ_IMG_RSC_WRD1_MAX_MIP_SZ;
  unsigned int BIG_PAGE        : SQ_IMG_RSC_WRD1_BIG_PAGE_SZ;
  unsigned int LLC_NOALLOC     : SQ_IMG_RSC_WRD1_LLC_NOALLOC_SZ;
  unsigned int                 : 5;
  unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD1 {
  sq_img_rsrc_word1_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD2_REG_SZ 32
#define SQ_IMG_RSC_WRD2_WIDTH_HI_SZ        12
#define SQ_IMG_RSC_WRD2_HEIGHT_SZ          14
struct sq_img_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int WIDTH_HI       : SQ_IMG_RSC_WRD2_WIDTH_HI_SZ;
  unsigned int                : 2;
  unsigned int HEIGHT         : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
  unsigned int                : 2;
  unsigned int                : 2;
#elif defined(BIGENDIAN_CPU)
  unsigned int                : 2;
  unsigned int                : 2;
  unsigned int HEIGHT         : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
  unsigned int                : 2;
  unsigned int WIDTH_HI       : SQ_IMG_RSC_WRD2_WIDTH_SZ;
#endif
};
union SQ_IMG_RSRC_WORD2 {
  sq_img_rsrc_word2_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD3_REG_SZ 32
#define SQ_IMG_RSC_WRD3_DST_SEL_X_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_W_SZ  3
#define SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ 4
#define SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ 4
#define SQ_IMG_RSC_WRD3_SW_MODE_SZ    5
#define SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ 3
#define SQ_IMG_RSC_WRD3_TYPE_SZ       4
struct sq_img_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int DST_SEL_X  : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
  unsigned int DST_SEL_Y  : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_Z  : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_W  : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int BASE_LEVEL : SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ;
  unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
  unsigned int SW_MODE    : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
  unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
  unsigned int TYPE       : SQ_IMG_RSC_WRD3_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int TYPE       : SQ_IMG_RSC_WRD3_TYPE_SZ;
  unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
  unsigned int W_MODE     : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
  unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
  unsigned int BASE_LEVEL : SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ;
  unsigned int DST_SEL_W  : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int DST_SEL_Z  : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_Y  : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_X  : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
#endif
};
union SQ_IMG_RSRC_WORD3 {
  sq_img_rsrc_word3_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD4_REG_SZ 32
#define SQ_IMG_RSC_WRD4_DEPTH_SZ    13
#define SQ_IMG_RSC_WRD4_PITCH_SZ    14
#define SQ_IMG_RSC_WRD4_BASE_ARR_SZ 13
union sq_img_rsrc_word4_t {
  struct {
#if defined(LITTLEENDIAN_CPU)
    // For arrays this is last slice in view, for 3D this is depth-1, For remaining this is pitch-1
    unsigned int DEPTH      : SQ_IMG_RSC_WRD4_DEPTH_SZ;
    unsigned int            : 1;  // Pitch[13]
    unsigned int            : 2;
    unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
    unsigned int            : 3;
#elif defined(BIGENDIAN_CPU)
    unsigned int            : 3;
    unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
    unsigned int            : 2;
    unsigned int            : 1;  // Pitch[13]
    unsigned int DEPTH      : SQ_IMG_RSC_WRD4_DEPTH_SZ;
#endif
  };

  struct {
#if defined(LITTLEENDIAN_CPU)
    // For 1d, 2d and 2d-msaa in gfx1030 this is pitch-1
    unsigned int PITCH      : SQ_IMG_RSC_WRD4_PITCH_SZ;
    unsigned int            : SQ_IMG_RSC_WRD4_REG_SZ-SQ_IMG_RSC_WRD4_PITCH_SZ;
#elif defined(BIGENDIAN_CPU)
    unsigned int            : SQ_IMG_RSC_WRD4_REG_SZ-SQ_IMG_RSC_WRD4_PITCH_SZ;
    unsigned int PITCH      : SQ_IMG_RSC_WRD4_PITCH_SZ;
#endif
  };
};
union SQ_IMG_RSRC_WORD4 {
  sq_img_rsrc_word4_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD4_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD5_REG_SZ 32
#define SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ               4
#define SQ_IMG_RSC_WRD5_DEPTH_SCALE_SZ               4
#define SQ_IMG_RSC_WRD5_HEIGHT_SCALE_SZ              4
#define SQ_IMG_RSC_WRD5_WIDTH_SCALE_SZ               4
#define SQ_IMG_RSC_WRD5_PERF_MOD_SZ                  3
#define SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ            1
#define SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ           1
#define SQ_IMG_RSC_WRD5_LOD_HWD_CNT_EN               1
#define SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ               1
#define SQ_IMG_RSC_WRD5_MIN_LOD_LO_SZ                5


struct sq_img_rsrc_word5_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int ARRAY_PITCH          : SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ;
  unsigned int                      : 4;
  unsigned int DEPTH_SCALE          : SQ_IMG_RSC_WRD5_DEPTH_SCALE_SZ;
  unsigned int HEIGHT_SCALE         : SQ_IMG_RSC_WRD5_HEIGHT_SCALE_SZ;
  unsigned int WIDTH_SCALE          : SQ_IMG_RSC_WRD5_WIDTH_SCALE_SZ;
  unsigned int PERF_MOD             : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
  unsigned int CORNER_SAMPLES       : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
  unsigned int LINKED_RESOURCE      : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
  unsigned int LOD_HWD_CNT          : SQ_IMG_RSC_WRD5_LOD_HWD_CNT_EN;
  unsigned int PRT_DEFAULT          : SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ;
  unsigned int MIN_LOD_LO           : SQ_IMG_RSC_WRD5_MIN_LOD_LO_SZ;

#elif defined(BIGENDIAN_CPU)
  unsigned int MIN_LOD_LO           : SQ_IMG_RSC_WRD5_MIN_LOD_LO_SZ;
  unsigned int PRT_DEFAULT          : SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ;
  unsigned int LOD_HWD_CNT          : SQ_IMG_RSC_WRD5_LOD_HWD_CNT_EN;
  unsigned int LINKED_RESOURCE      : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
  unsigned int CORNER_SAMPLES       : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
  unsigned int PERF_MOD             : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
  unsigned int WIDTH_SCALE          : SQ_IMG_RSC_WRD5_WIDTH_SCALE_SZ;
  unsigned int HEIGHT_SCALE         : SQ_IMG_RSC_WRD5_HEIGHT_SCALE_SZ;
  unsigned int DEPTH_SCALE          : SQ_IMG_RSC_WRD5_DEPTH_SCALE_SZ;
  unsigned int                      : 4;
  unsigned int ARRAY_PITCH          : SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ;
#endif
};

union SQ_IMG_RSRC_WORD5 {
  sq_img_rsrc_word5_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD5_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD6_REG_SZ 32
#define SQ_IMG_RSC_WRD6_MIN_LOD_HI_SZ             7
#define SQ_IMG_RSC_WRD6_ITERATE_256               1
#define SQ_IMG_RSC_WRD6_SAMPLE_PATTERN_OFFSET     4
#define SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ      2
#define SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ        2
#define SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ      1
#define SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ      1
#define SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ     1
#define SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ        1
#define SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ        1
#define SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ         8
struct sq_img_rsrc_word6_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int MIN_LOD_HI            : SQ_IMG_RSC_WRD6_MIN_LOD_HI_SZ;
  unsigned int                       : 3;
  unsigned int ITERATE_256           : SQ_IMG_RSC_WRD6_ITERATE_256;
  unsigned int SAMPLE_PATTERN_OFFSET : SQ_IMG_RSC_WRD6_SAMPLE_PATTERN_OFFSET;
  unsigned int MAX_UNCOMP_BLK_SZ     : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
  unsigned int MAX_COMP_BLK_SZ       : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
  unsigned int META_PIPE_ALIGNED     : SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
  unsigned int COMPRESSION_ENABLE    : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
  unsigned int ALPHA_IS_ON_MSB       : SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ;
  unsigned int COLOR_TRANSFORM       : SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ;
  unsigned int META_DATA_ADDRESS     : SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int META_DATA_ADDRESS     : SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ;
  unsigned int COLOR_TRANSFORM       : SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ;
  unsigned int ALPHA_IS_ON_MSB       : SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ;
  unsigned int COMPRESSION_ENABLE    : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
  unsigned int META_PIPE_ALIGNED     : SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ;
  unsigned int MAX_COMP_BLK_SZ       : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
  unsigned int MAX_UNCOMP_BLK_SZ     : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
  unsigned int SAMPLE_PATTERN_OFFSET : SQ_IMG_RSC_WRD6_SAMPLE_PATTERN_OFFSET;
  unsigned int ITERATE_256           : SQ_IMG_RSC_WRD6_ITERATE_256;
  unsigned int                       : 3;
  unsigned int MIN_LOD_HI            : SQ_IMG_RSC_WRD6_MIN_LOD_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD6 {
  sq_img_rsrc_word6_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD6_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD7_REG_SZ 32
#define SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ 32
struct sq_img_rsrc_word7_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int META_DATA_ADDRESS_HI : SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int META_DATA_ADDRESS_HI : SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD7 {
  sq_img_rsrc_word7_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD7_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/
/**********************************************************/
/**********************************************************/

#define SQ_IMG_SAMP_WORD0_REG_SZ 32
#define SQ_IMG_SAMP_WORD0_CLAMP_X_SZ            3
#define SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ            3
#define SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ            3
#define SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ    3
#define SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ 3
#define SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ 1
#define SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ    3
#define SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ     1
#define SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ      1
#define SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ         6
#define SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ        1
#define SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ  1
#define SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ        2
#define SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ       1
struct sq_img_samp_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int CLAMP_X            : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
  unsigned int CLAMP_Y            : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
  unsigned int CLAMP_Z            : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
  unsigned int MAX_ANISO_RATIO    : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
  unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
  unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
  unsigned int ANISO_THRESHOLD    : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
  unsigned int MC_COORD_TRUNC     : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
  unsigned int FORCE_DEGAMMA      : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
  unsigned int ANISO_BIAS         : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
  unsigned int TRUNC_COORD        : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
  unsigned int DISABLE_CUBE_WRAP  : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
  unsigned int FILTER_MODE        : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
  unsigned int SKIP_DEGAMMA       : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int SKIP_DEGAMMA       : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
  unsigned int FILTER_MODE        : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
  unsigned int DISABLE_CUBE_WRAP  : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
  unsigned int TRUNC_COORD        : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
  unsigned int ANISO_BIAS         : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
  unsigned int FORCE_DEGAMMA      : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
  unsigned int MC_COORD_TRUNC     : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
  unsigned int ANISO_THRESHOLD    : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
  unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
  unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
  unsigned int MAX_ANISO_RATIO    : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
  unsigned int CLAMP_Z            : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
  unsigned int CLAMP_Y            : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
  unsigned int CLAMP_X            : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
#endif
};

union SQ_IMG_SAMP_WORD0 {
  sq_img_samp_word0_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD1_REG_SZ 32
#define SQ_IMG_SAMP_WORD1_MIN_LOD_SZ  12
#define SQ_IMG_SAMP_WORD1_MAX_LOD_SZ  12
#define SQ_IMG_SAMP_WORD1_PERF_MIP_SZ 4
#define SQ_IMG_SAMP_WORD1_PERF_Z_SZ   4
struct sq_img_samp_word1_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int MIN_LOD  : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
  unsigned int MAX_LOD  : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
  unsigned int PERF_MIP : SQ_IMG_SAMP_WORD1_PERF_MIP_SZ;
  unsigned int PERF_Z   : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int PERF_Z   : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
  unsigned int PERF_MIP : SQ_IMG_SAMP_WORD1_PERF_MIP_SZ;
  unsigned int MAX_LOD  : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
  unsigned int MIN_LOD  : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
#endif
};

union SQ_IMG_SAMP_WORD1 {
  sq_img_samp_word1_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD2_REG_SZ 32
#define SQ_IMG_SAMP_WORD2_BC_PTR_SZ               12
#define SQ_IMG_SAMP_WORD2_BC_TYPE_SZ              2
#define SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ         6
#define SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ        2
#define SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ        2
#define SQ_IMG_SAMP_WORD2_Z_FILTER_SZ             2
#define SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ           2
#define SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ       1
#define SQ_IMG_SAMP_WORD2_BLEND_PTR_SZ            1
#define SQ_IMG_SAMP_WORD2_DERIV_ADJUST_EN_SZ      1
struct sq_img_samp_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BC_PTR             : SQ_IMG_SAMP_WORD2_BC_PTR_SZ;
  unsigned int BC_TYPE            : SQ_IMG_SAMP_WORD2_BC_TYPE_SZ;
  unsigned int LOD_BIAS_SEC       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
  unsigned int XY_MAG_FILTER      : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
  unsigned int XY_MIN_FILTER      : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
  unsigned int Z_FILTER           : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
  unsigned int MIP_FILTER         : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
  unsigned int                    : 1;
  unsigned int ANISO_OVERRIDE     : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
  unsigned int BLEND_PRT          : SQ_IMG_SAMP_WORD2_BLEND_PTR_SZ;
  unsigned int DERIV_ADJUST_EN    : SQ_IMG_SAMP_WORD2_DERIV_ADJUST_EN_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int DERIV_ADJUST_EN    : SQ_IMG_SAMP_WORD2_DERIV_ADJUST_EN_SZ 
  unsigned int BLEND_PRT          : SQ_IMG_SAMP_WORD2_BLEND_PRT_SZ;
  unsigned int ANISO_OVERRIDE     : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
  unsigned int                    : 1;
  unsigned int MIP_FILTER         : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
  unsigned int Z_FILTER           : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
  unsigned int XY_MIN_FILTER      : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
  unsigned int XY_MAG_FILTER      : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
  unsigned int LOD_BIAS_SEC       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
  unsigned int BC_TYPE            : SQ_IMG_SAMP_WORD2_BC_TYPE_SZ;
  unsigned int BC_PTR             : SQ_IMG_SAMP_WORD2_BC_PTR_SZ;
#endif
};

union SQ_IMG_SAMP_WORD2 {
  sq_img_samp_word2_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD3_REG_SZ 32
#define SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ 16
#define SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ      2
#define SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ     12
#define SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ 2

struct sq_img_samp_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int GRAD_ADJ_OR_DAV   : SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ;
  unsigned int RES_OR_DAV        : SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ;
  unsigned int BCP_LRS_DAV       : SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ;
  unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
  unsigned int BCP_LRS_DAV       : SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ;
  unsigned int RES_OR_DAV        : SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ;
  unsigned int GRAD_ADJ_OR_DAV   : SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ;
#endif
};

union SQ_IMG_SAMP_WORD3 {
  sq_img_samp_word3_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

/**************************************************************/
/**************************************************************/
/**************************************************************/

typedef enum FMT {
FMT_INVALID                              = 0x00000000,
FMT_8                                    = 0x00000001,
FMT_16                                   = 0x00000002,
FMT_8_8                                  = 0x00000003,
FMT_32                                   = 0x00000004,
FMT_16_16                                = 0x00000005,
FMT_10_11_11                             = 0x00000006,
FMT_11_11_10                             = 0x00000007,
FMT_10_10_10_2                           = 0x00000008,
FMT_2_10_10_10                           = 0x00000009,
FMT_8_8_8_8                              = 0x0000000a,
FMT_32_32                                = 0x0000000b,
FMT_16_16_16_16                          = 0x0000000c,
FMT_32_32_32                             = 0x0000000d,
FMT_32_32_32_32                          = 0x0000000e,
FMT_RESERVED_78                          = 0x0000000f,
FMT_5_6_5                                = 0x00000010,
FMT_1_5_5_5                              = 0x00000011,
FMT_5_5_5_1                              = 0x00000012,
FMT_4_4_4_4                              = 0x00000013,
FMT_8_24                                 = 0x00000014,
FMT_24_8                                 = 0x00000015,
FMT_X24_8_32                             = 0x00000016,
FMT_RESERVED_155                         = 0x00000017,
} FMT;

typedef enum type {
TYPE_UNORM                               = 0x00000000,
TYPE_SNORM                               = 0x00000001,
TYPE_USCALED                             = 0x00000002,
TYPE_SSCALED                             = 0x00000003,
TYPE_UINT                                = 0x00000004,
TYPE_SINT                                = 0x00000005,
TYPE_SRGB                                = 0x00000006,
TYPE_FLOAT                               = 0x00000007,
TYPE_RESERVED_8                          = 0x00000008,
TYPE_RESERVED_9                          = 0x00000009,
TYPE_UNORM_UINT                          = 0x0000000a,
TYPE_REVERSED_UNORM                      = 0x0000000b,
TYPE_FLOAT_CLAMP                         = 0x0000000c,
} type;

enum FORMAT {
CFMT_INVALID             = 0,
CFMT_8_UNORM             = 1,
CFMT_8_SNORM             = 2,
CFMT_8_USCALED           = 3,
CFMT_8_SSCALED           = 4,
CFMT_8_UINT              = 5,
CFMT_8_SINT              = 6,
CFMT_16_UNORM            = 7,
CFMT_16_SNORM            = 8,
CFMT_16_USCALED          = 9,
CFMT_16_SSCALED          = 10,
CFMT_16_UINT             = 11,
CFMT_16_SINT             = 12,
CFMT_16_FLOAT            = 13,
CFMT_8_8_UNORM           = 14,
CFMT_8_8_SNORM           = 15,
CFMT_8_8_USCALED         = 16,
CFMT_8_8_SSCALED         = 17,
CFMT_8_8_UINT            = 18,
CFMT_8_8_SINT            = 19,
CFMT_32_UINT             = 20,
CFMT_32_SINT             = 21,
CFMT_32_FLOAT            = 22,
CFMT_16_16_UNORM         = 23,
CFMT_16_16_SNORM         = 24,
CFMT_16_16_USCALED       = 25,
CFMT_16_16_SSCALED       = 26,
CFMT_16_16_UINT          = 27,
CFMT_16_16_SINT          = 28,
CFMT_16_16_FLOAT         = 29,
CFMT_10_11_11_FLOAT      = 30,
CFMT_11_11_10_FLOAT      = 31,
CFMT_10_10_10_2_UNORM    = 32,
CFMT_10_10_10_2_SNORM    = 33,
CFMT_10_10_10_2_UINT     = 34,
CFMT_10_10_10_2_SINT     = 35,
CFMT_2_10_10_10_UNORM    = 36,
CFMT_2_10_10_10_SNORM    = 37,
CFMT_2_10_10_10_USCALED  = 38,
CFMT_2_10_10_10_SSCALED  = 39,
CFMT_2_10_10_10_UINT     = 40,
CFMT_2_10_10_10_SINT     = 41,
CFMT_8_8_8_8_UNORM       = 42,
CFMT_8_8_8_8_SNORM       = 43,
CFMT_8_8_8_8_USCALED     = 44,
CFMT_8_8_8_8_SSCALED     = 45,
CFMT_8_8_8_8_UINT        = 46,
CFMT_8_8_8_8_SINT        = 47,
CFMT_32_32_UINT          = 48,
CFMT_32_32_SINT          = 49,
CFMT_32_32_FLOAT         = 50,
CFMT_16_16_16_16_UNORM   = 51,
CFMT_16_16_16_16_SNORM   = 52,
CFMT_16_16_16_16_USCALED = 53,
CFMT_16_16_16_16_SSCALED = 54,
CFMT_16_16_16_16_UINT    = 55,
CFMT_16_16_16_16_SINT    = 56,
CFMT_16_16_16_16_FLOAT   = 57,
CFMT_32_32_32_UINT       = 58,
CFMT_32_32_32_SINT       = 59,
CFMT_32_32_32_FLOAT      = 60,
CFMT_32_32_32_32_UINT    = 61,
CFMT_32_32_32_32_SINT    = 62,
CFMT_32_32_32_32_FLOAT   = 63,
CFMT_8_SRGB              = 64,
CFMT_8_8_SRGB            = 65,
CFMT_8_8_8_8_SRGB        = 66,
CFMT_5_9_9_9_FLOAT       = 67,
CFMT_5_6_5_UNORM         = 68,
CFMT_1_5_5_5_UNORM       = 69,
CFMT_5_5_5_1_UNORM       = 70,
CFMT_4_4_4_4_UNORM       = 71,
CFMT_4_4_UNORM           = 72,
CFMT_1_UNORM             = 73,
CFMT_1_REVERSED_UNORM    = 74,
CFMT_32_FLOAT_CLAMP      = 75,
CFMT_8_24_UNORM          = 76,
CFMT_8_24_UINT           = 77,
CFMT_24_8_UNORM          = 78,
CFMT_24_8_UINT           = 79,
CFMT_X24_8_32_UINT       = 80,
CFMT_X24_8_32_FLOAT      = 81,
};

typedef enum SEL {
  SEL_0 = 0x00000000,
  SEL_1 = 0x00000001,
  SEL_X = 0x00000004,
  SEL_Y = 0x00000005,
  SEL_Z = 0x00000006,
  SEL_W = 0x00000007,
} SEL;

typedef enum SQ_RSRC_IMG_TYPE {
  SQ_RSRC_IMG_1D            = 0x00000008,
  SQ_RSRC_IMG_2D            = 0x00000009,
  SQ_RSRC_IMG_3D            = 0x0000000a,
  SQ_RSRC_IMG_CUBE_ARRAY    = 0x0000000b,
  SQ_RSRC_IMG_1D_ARRAY      = 0x0000000c,
  SQ_RSRC_IMG_2D_ARRAY      = 0x0000000d,
  SQ_RSRC_IMG_2D_MSAA       = 0x0000000e,
  SQ_RSRC_IMG_2D_MSAA_ARRAY = 0x0000000f,
} SQ_RSRC_IMG_TYPE;

typedef enum SQ_TEX_XY_FILTER {
  SQ_TEX_XY_FILTER_POINT          = 0x00000000,
  SQ_TEX_XY_FILTER_BILINEAR       = 0x00000001,
  SQ_TEX_XY_FILTER_ANISO_POINT    = 0x00000002,
  SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003,
} SQ_TEX_XY_FILTER;

typedef enum SQ_TEX_Z_FILTER {
  SQ_TEX_Z_FILTER_NONE   = 0x00000000,
  SQ_TEX_Z_FILTER_POINT  = 0x00000001,
  SQ_TEX_Z_FILTER_LINEAR = 0x00000002,
} SQ_TEX_Z_FILTER;

typedef enum SQ_TEX_MIP_FILTER {
  SQ_TEX_MIP_FILTER_NONE                = 0x00000000,
  SQ_TEX_MIP_FILTER_POINT               = 0x00000001,
  SQ_TEX_MIP_FILTER_LINEAR              = 0x00000002,
  SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ__VI = 0x00000003,
} SQ_TEX_MIP_FILTER;

typedef enum SQ_TEX_CLAMP {
  SQ_TEX_WRAP                    = 0x00000000,
  SQ_TEX_MIRROR                  = 0x00000001,
  SQ_TEX_CLAMP_LAST_TEXEL        = 0x00000002,
  SQ_TEX_MIRROR_ONCE_LAST_TEXEL  = 0x00000003,
  SQ_TEX_CLAMP_HALF_BORDER       = 0x00000004,
  SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005,
  SQ_TEX_CLAMP_BORDER            = 0x00000006,
  SQ_TEX_MIRROR_ONCE_BORDER      = 0x00000007,
} SQ_TEX_CLAMP;

typedef enum SQ_TEX_BORDER_COLOR {
  SQ_TEX_BORDER_COLOR_TRANS_BLACK  = 0x00000000,
  SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001,
  SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002,
  SQ_TEX_BORDER_COLOR_REGISTER     = 0x00000003,
} SQ_TEX_BORDER_COLOR;

typedef enum TEX_BC_SWIZZLE {
TEX_BC_Swizzle_XYZW = 0x00000000,
TEX_BC_Swizzle_XWYZ = 0x00000001,
TEX_BC_Swizzle_WZYX = 0x00000002,
TEX_BC_Swizzle_WXYZ = 0x00000003,
TEX_BC_Swizzle_ZYXW = 0x00000004,
TEX_BC_Swizzle_YXWZ = 0x00000005,
} TEX_BC_SWIZZLE;

typedef struct metadata_amd_gfx11_s {
  uint32_t version;   // Must be 1
  uint32_t vendorID;  // AMD
  SQ_IMG_RSRC_WORD0 word0;
  SQ_IMG_RSRC_WORD1 word1;
  SQ_IMG_RSRC_WORD2 word2;
  SQ_IMG_RSRC_WORD3 word3;
  SQ_IMG_RSRC_WORD4 word4;
  SQ_IMG_RSRC_WORD5 word5;
  SQ_IMG_RSRC_WORD6 word6;
  SQ_IMG_RSRC_WORD7 word7;
  uint32_t mip_offsets[0];
} metadata_amd_gfx11_t;

}  // namespace image
}  // namespace rocr
#endif  // EXT_IMAGE_RESOURCE_GFX11_H_


================================================
FILE: runtime/hsa-runtime/image/resource_gfx12.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef EXT_IMAGE_RESOURCE_GFX12_H_
#define EXT_IMAGE_RESOURCE_GFX12_H_

#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

namespace rocr {
namespace image {

/**********************************************************/
/**********************************************************/
#define SQ_BUF_RSC_WRD0_REG_SZ 32
#define SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ 32

struct sq_buf_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};

union SQ_BUF_RSRC_WORD0 {
  sq_buf_rsrc_word0_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};

/***********/

/* Note: These registers are also defined/used in registers.h
 * in SQ_BUF_RSRC_WORD*_GFX12
 */
#define SQ_BUF_RSC_WRD1_REG_SZ 32
#define SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ  16
#define SQ_BUF_RSC_WRD1_STRIDE_SZ           14
#define SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ   2
struct sq_buf_rsrc_word1_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
  unsigned int STRIDE          : SQ_BUF_RSC_WRD1_STRIDE_SZ;
  unsigned int SWIZZLE_ENABLE  : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int SWIZZLE_ENABLE  : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
  unsigned int STRIDE          : SQ_BUF_RSC_WRD1_STRIDE_SZ;
  unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};

union SQ_BUF_RSRC_WORD1 {
  sq_buf_rsrc_word1_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_BUF_RSC_WRD2_REG_SZ 32
#define SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ 32
struct sq_buf_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#endif
};
union SQ_BUF_RSRC_WORD2 {
  sq_buf_rsrc_word2_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_BUF_RSC_WRD3_REG_SZ 32
#define SQ_BUF_RSC_WRD3_DST_SEL_X_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_W_SZ        3
#define SQ_BUF_RSC_WRD3_FORMAT_SZ           6
#define SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ     2
#define SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ   1
#define SQ_BUF_RSC_WRD3_WRITE_COMPRESS_ENABLE_SZ   1
#define SQ_BUF_RSC_WRD3_COMPRESSION_EN_SZ          1
#define SQ_BUF_RSC_WRD3_COMPRESSION_ACCESS_MODE_SZ 2
#define SQ_BUF_RSC_WORD3_OOB_SELECT_SZ      2
#define SQ_BUF_RSC_WRD3_TYPE_SZ             2
struct sq_buf_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int DST_SEL_X      : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;
  unsigned int DST_SEL_Y      : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_Z      : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_W      : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int FORMAT         : SQ_BUF_RSC_WRD3_FORMAT_SZ;
  unsigned int                : 3;
  unsigned int INDEX_STRIDE   : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
  unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_BUF_RSC_WRD3_WRITE_COMPRESS_ENABLE_SZ;
  unsigned int COMPRESSION_EN : SQ_BUF_RSC_WRD3_COMPRESSION_EN_SZ;
  unsigned int COMPRESSION_ACCESS_MODE : SQ_BUF_RSC_WRD3_COMPRESSION_ACCESS_MODE_SZ;
  unsigned int OOB_SELECT     : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
  unsigned int TYPE           : SQ_BUF_RSC_WRD3_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int TYPE           : SQ_BUF_RSC_WRD3_TYPE_SZ;
  unsigned int OOB_SELECT     : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
  unsigned int COMPRESSION_ACCESS_MODE : SQ_BUF_RSC_WRD3_COMPRESSION_ACCESS_MODE_SZ;
  unsigned int COMPRESSION_EN : SQ_BUF_RSC_WRD3_COMPRESSION_EN_SZ;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_BUF_RSC_WRD3_WRITE_COMPRESS_ENABLE_SZ;
  unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
  unsigned int INDEX_STRIDE   : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
  unsigned int                : 3;
  unsigned int FORMAT         : SQ_BUF_RSC_WRD3_FORMAT_SZ;
  unsigned int DST_SEL_W      : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int DST_SEL_Z      : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_Y      : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_X      : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;
#endif
};
union SQ_BUF_RSRC_WORD3 {
  sq_buf_rsrc_word3_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

/**********************************************************/
/**********************************************************/
#define SQ_IMG_RSC_WRD0_REG_SZ 32
#define SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ 32
struct sq_img_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};
union SQ_IMG_RSRC_WORD0 {
  sq_img_rsrc_word0_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD1_REG_SZ 32
#define SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ  8
#define SQ_IMG_RSC_WRD1_MAX_MIP_SZ          5
#define SQ_IMG_RSC_WRD1_FORMAT_SZ           8
#define SQ_IMG_RSC_WRD1_BASE_LEVEL_SZ       5
#define SQ_IMG_RSC_WRD1_WIDTH_LO            2

struct sq_img_rsrc_word1_t{
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
  unsigned int                 : 4;
  unsigned int MAX_MIP         : SQ_IMG_RSC_WRD1_MAX_MIP_SZ;
  unsigned int FORMAT          : SQ_IMG_RSC_WRD1_FORMAT_SZ;
  unsigned int BASE_LEVEL      : SQ_IMG_RSC_WRD1_BASE_LEVEL_SZ;
  unsigned int WIDTH           : SQ_IMG_RSC_WRD1_WIDTH_LO;
#elif defined(BIGENDIAN_CPU)
  unsigned int WIDTH           : SQ_IMG_RSC_WRD1_WIDTH_LO;
  unsigned int BASE_LEVEL      : SQ_IMG_RSC_WRD1_BASE_LEVEL_SZ;
  unsigned int FORMAT          : SQ_IMG_RSC_WRD1_FORMAT_SZ;
  unsigned int MAX_MIP         : SQ_IMG_RSC_WRD1_MAX_MIP_SZ;
  unsigned int                 : 4;
  unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD1 {
  sq_img_rsrc_word1_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD2_REG_SZ 32
#define SQ_IMG_RSC_WRD2_WIDTH_HI_SZ        14
#define SQ_IMG_RSC_WRD2_HEIGHT_SZ          16
struct sq_img_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int WIDTH_HI       : SQ_IMG_RSC_WRD2_WIDTH_HI_SZ;
  unsigned int HEIGHT         : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
  unsigned int                : 2;
#elif defined(BIGENDIAN_CPU)
  unsigned int                : 2;
  unsigned int HEIGHT         : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
  unsigned int WIDTH_HI       : SQ_IMG_RSC_WRD2_WIDTH_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD2 {
  sq_img_rsrc_word2_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD3_REG_SZ 32
#define SQ_IMG_RSC_WRD3_DST_SEL_X_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_W_SZ  3
#define SQ_IMG_RSC_WRD3_NO_EDGE_CLAMP_SZ 1
#define SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ 5
#define SQ_IMG_RSC_WRD3_SW_MODE_SZ    5
#define SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ 3
#define SQ_IMG_RSC_WRD3_TYPE_SZ       4
struct sq_img_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int DST_SEL_X  : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
  unsigned int DST_SEL_Y  : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_Z  : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_W  : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int NO_EDGE_CLAMP : SQ_IMG_RSC_WRD3_NO_EDGE_CLAMP_SZ;
  unsigned int            : 2;
  unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
  unsigned int SW_MODE    : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
  unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
  unsigned int TYPE       : SQ_IMG_RSC_WRD3_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int TYPE       : SQ_IMG_RSC_WRD3_TYPE_SZ;
  unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
  unsigned int SW_MODE    : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
  unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
  unsigned int            : 2;
  unsigned int NO_EDGE_CLAMP : SQ_IMG_RSC_WRD3_NO_EDGE_CLAMP_SZ;
  unsigned int DST_SEL_W  : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int DST_SEL_Z  : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_Y  : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_X  : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
#endif
};
union SQ_IMG_RSRC_WORD3 {
  sq_img_rsrc_word3_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD4_REG_SZ 32
#define SQ_IMG_RSC_WRD4_DEPTH_SZ    14
#define SQ_IMG_RSC_WRD4_PITCH_MSB_SZ 2
#define SQ_IMG_RSC_WRD4_BASE_ARR_SZ 13
#define SQ_IMG_RSC_WRD4_BASE_ARRAY_MSB_SZ 1

struct sq_img_rsrc_word4_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int DEPTH      : SQ_IMG_RSC_WRD4_DEPTH_SZ;
  unsigned int PITCH_MSB  : SQ_IMG_RSC_WRD4_PITCH_MSB_SZ;
  unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
  unsigned int BASE_ARRAY_MSB : SQ_IMG_RSC_WRD4_BASE_ARRAY_MSB_SZ;
  unsigned int            : 2;
#elif defined(BIGENDIAN_CPU)
  unsigned int            : 2;
  unsigned int BASE_ARRAY_MSB : SQ_IMG_RSC_WRD4_BASE_ARRAY_MSB_SZ;
  unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
  unsigned int PITCH_MSB  : SQ_IMG_RSC_WRD4_PITCH_MSB_SZ;
  unsigned int DEPTH      : SQ_IMG_RSC_WRD4_DEPTH_SZ;
#endif
};
union SQ_IMG_RSRC_WORD4 {
  sq_img_rsrc_word4_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD4_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD5_REG_SZ 32
#define SQ_IMG_RSC_WRD5_UAV3D_SZ                     1
#define SQ_IMG_RSC_WRD5_DEPTH_SCALE_SZ               5
#define SQ_IMG_RSC_WRD5_HEIGHT_SCALE_SZ              5
#define SQ_IMG_RSC_WRD5_WIDTH_SCALE_SZ               5  // Combined two consecutive separate fields width[0:2] and width[3:4].
#define SQ_IMG_RSC_WRD5_PERF_MOD_SZ                  3
#define SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ            1
#define SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ           1
#define SQ_IMG_RSC_WRD5_LOD_HWD_CNT_EN_SZ            1
#define SQ_IMG_RSC_WRD5_MIN_LOD_LO_SZ                6  // lowest 6 bits of MIN_LOD (13 bit total)

struct sq_img_rsrc_word5_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int                      : 4;
  unsigned int UAV3D                : SQ_IMG_RSC_WRD5_UAV3D_SZ;
  unsigned int DEPTH_SCALE          : SQ_IMG_RSC_WRD5_DEPTH_SCALE_SZ;
  unsigned int HEIGHT_SCALE         : SQ_IMG_RSC_WRD5_HEIGHT_SCALE_SZ;
  unsigned int WIDTH_SCALE          : SQ_IMG_RSC_WRD5_WIDTH_SCALE_SZ;
  unsigned int PERF_MOD             : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
  unsigned int CORNER_SAMPLES       : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
  unsigned int LINKED_RESOURCE      : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
  unsigned int LOD_HWD_CNT_EN       : SQ_IMG_RSC_WRD5_LOD_HWD_CNT_EN_SZ;
  unsigned int MIN_LOD_LO           : SQ_IMG_RSC_WRD5_MIN_LOD_LO_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int MIN_LOD_LO           : SQ_IMG_RSC_WRD5_MIN_LOD_LO_SZ;
  unsigned int LOD_HWD_CNT_EN       : SQ_IMG_RSC_WRD5_LOD_HWD_CNT_EN_SZ;
  unsigned int LINKED_RESOURCE      : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
  unsigned int CORNER_SAMPLES       : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
  unsigned int PERF_MOD             : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
  unsigned int WIDTH_SCALE          : SQ_IMG_RSC_WRD5_WIDTH_SCALE_SZ;
  unsigned int HEIGHT_SCALE         : SQ_IMG_RSC_WRD5_HEIGHT_SCALE_SZ;
  unsigned int DEPTH_SCALE          : SQ_IMG_RSC_WRD5_DEPTH_SCALE_SZ;
  unsigned int UAV3D                : SQ_IMG_RSC_WRD5_UAV3D_SZ;
  unsigned int                      : 4;
#endif
};

union SQ_IMG_RSRC_WORD5 {
  sq_img_rsrc_word5_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD5_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD6_REG_SZ 32

#define SQ_IMG_RSC_WRD6_MIN_LOD_HI_SZ              7
#define SQ_IMG_RSC_WRD5_COUNTER_BANK_ID_SZ         8  // 3 fields combined into bank_id
#define SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ       1
#define SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ         2
#define SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ       1
#define SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ      1
#define SQ_IMG_RSC_WRD6_COMPRESSION_ACCESS_MODE_SZ 2
#define SQ_IMG_RSC_WRD6_SPECULATIVE_READ_SZ        2

struct sq_img_rsrc_word6_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int MIN_LOD_HI            : SQ_IMG_RSC_WRD6_MIN_LOD_HI_SZ;
  unsigned int COUNTER_BANK_ID       : SQ_IMG_RSC_WRD5_COUNTER_BANK_ID_SZ;
  unsigned int MAX_UNCOMP_BLK_SZ     : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
  unsigned int                       : 1;
  unsigned int MAX_COMP_BLK_SZ       : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
  unsigned int                       : 1;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
  unsigned int COMPRESSION_ENABLE    : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
  unsigned int COMPRESSION_ACCESS_MODE : SQ_IMG_RSC_WRD6_COMPRESSION_ACCESS_MODE_SZ;
  unsigned int SPECULATIVE_READ      : SQ_IMG_RSC_WRD6_SPECULATIVE_READ_SZ;
  unsigned int                       : 6;
#elif defined(BIGENDIAN_CPU)
  unsigned int                       : 6;
  unsigned int SPECULATIVE_READ      : SQ_IMG_RSC_WRD6_SPECULATIVE_READ_SZ;
  unsigned int COMPRESSION_ACCESS_MODE : SQ_IMG_RSC_WRD6_COMPRESSION_ACCESS_MODE_SZ;
  unsigned int COMPRESSION_ENABLE    : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
  unsigned int                       : 1;
  unsigned int MAX_COMP_BLK_SZ       : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
  unsigned int                       : 1;
  unsigned int MAX_UNCOMP_BLK_SZ     : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
  unsigned int COUNTER_BANK_ID       : SQ_IMG_RSC_WRD5_COUNTER_BANK_ID_SZ;
  unsigned int MIN_LOD_HI            : SQ_IMG_RSC_WRD6_MIN_LOD_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD6 {
  sq_img_rsrc_word6_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD6_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD7_REG_SZ 32
struct sq_img_rsrc_word7_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int : 32;
#elif defined(BIGENDIAN_CPU)
  unsigned int : 32;
#endif
};
union SQ_IMG_RSRC_WORD7 {
  sq_img_rsrc_word7_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD7_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/
/**********************************************************/
/**********************************************************/


#define SQ_IMG_SAMP_WORD0_REG_SZ 32
#define SQ_IMG_SAMP_WORD0_CLAMP_X_SZ            3
#define SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ            3
#define SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ            3
#define SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ    3
#define SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ 3
#define SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ 1
#define SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ    3
#define SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ     1
#define SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ      1
#define SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ         6
#define SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ        1
#define SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ  1
#define SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ        2
#define SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ       1
struct sq_img_samp_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int CLAMP_X            : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
  unsigned int CLAMP_Y            : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
  unsigned int CLAMP_Z            : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
  unsigned int MAX_ANISO_RATIO    : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
  unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
  unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
  unsigned int ANISO_THRESHOLD    : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
  unsigned int MC_COORD_TRUNC     : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
  unsigned int FORCE_DEGAMMA      : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
  unsigned int ANISO_BIAS         : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
  unsigned int TRUNC_COORD        : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
  unsigned int DISABLE_CUBE_WRAP  : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
  unsigned int FILTER_MODE        : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
  unsigned int SKIP_DEGAMMA       : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int SKIP_DEGAMMA       : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
  unsigned int FILTER_MODE        : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
  unsigned int DISABLE_CUBE_WRAP  : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
  unsigned int TRUNC_COORD        : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
  unsigned int ANISO_BIAS         : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
  unsigned int FORCE_DEGAMMA      : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
  unsigned int MC_COORD_TRUNC     : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
  unsigned int ANISO_THRESHOLD    : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
  unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
  unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
  unsigned int MAX_ANISO_RATIO    : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
  unsigned int CLAMP_Z            : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
  unsigned int CLAMP_Y            : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
  unsigned int CLAMP_X            : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
#endif
};

union SQ_IMG_SAMP_WORD0 {
  sq_img_samp_word0_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD1_REG_SZ 32
#define SQ_IMG_SAMP_WORD1_MIN_LOD_SZ 13
#define SQ_IMG_SAMP_WORD1_MAX_LOD_SZ 13
#define SQ_IMG_SAMP_WORD1_PERF_Z_SZ   4
struct sq_img_samp_word1_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int MIN_LOD  : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
  unsigned int MAX_LOD  : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
  unsigned int : 2;
  unsigned int PERF_Z   : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int PERF_Z   : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
  unsigned int : 2;
  unsigned int MAX_LOD  : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
  unsigned int MIN_LOD  : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
#endif
};

union SQ_IMG_SAMP_WORD1 {
  sq_img_samp_word1_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD2_REG_SZ 32
#define SQ_IMG_SAMP_WORD2_LOD_BIAS_SZ            14
#define SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ         6
#define SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ        2
#define SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ        2
#define SQ_IMG_SAMP_WORD2_Z_FILTER_SZ             2
#define SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ           2
#define SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ       1
#define SQ_IMG_SAMP_WORD2_PERF_MIP_LO_SZ          2
struct sq_img_samp_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int LOD_BIAS           : SQ_IMG_SAMP_WORD2_LOD_BIAS_SZ;
  unsigned int LOD_BIAS_SEC       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
  unsigned int XY_MAG_FILTER      : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
  unsigned int XY_MIN_FILTER      : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
  unsigned int Z_FILTER           : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
  unsigned int MIP_FILTER         : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
  unsigned int                    : 1;
  unsigned int ANISO_OVERRIDE     : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
  unsigned int PERF_MIP_LO        : SQ_IMG_SAMP_WORD2_PERF_MIP_LO_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int PERF_MIP_LO        : SQ_IMG_SAMP_WORD2_PERF_MIP_LO_SZ;
  unsigned int ANISO_OVERRIDE     : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
  unsigned int                    : 1;
  unsigned int MIP_FILTER         : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
  unsigned int Z_FILTER           : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
  unsigned int XY_MIN_FILTER      : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
  unsigned int XY_MAG_FILTER      : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
  unsigned int LOD_BIAS_SEC       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
  unsigned int LOD_BIAS           : SQ_IMG_SAMP_WORD2_LOD_BIAS_SZ;
#endif
};

union SQ_IMG_SAMP_WORD2 {
  sq_img_samp_word2_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/


#define SQ_IMG_SAMP_WORD3_REG_SZ 32
#define SQ_IMG_SAMP_WORD3_PERF_MIP_HI_SZ       2
#define SQ_IMG_SAMP_WORD3_BORDER_COLOR_PTR_SZ 12
#define SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ   2

struct sq_img_samp_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int PERF_MIP_HI       : SQ_IMG_SAMP_WORD3_PERF_MIP_HI_SZ;
  unsigned int                   : 16;
  unsigned int BORDER_COLOR_PTR  : SQ_IMG_SAMP_WORD3_BORDER_COLOR_PTR_SZ;
  unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
  unsigned int BORDER_COLOR_PTR  : SQ_IMG_SAMP_WORD3_BORDER_COLOR_PTR_SZ;
  unsigned int                   : 16;
  unsigned int PERF_MIP_HI       : SQ_IMG_SAMP_WORD3_PERF_MIP_HI_SZ;
#endif
};

union SQ_IMG_SAMP_WORD3 {
  sq_img_samp_word3_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

/**************************************************************/
/**************************************************************/
/**************************************************************/

typedef enum FMT {
FMT_INVALID                              = 0x00000000,
FMT_8                                    = 0x00000001,
FMT_16                                   = 0x00000002,
FMT_8_8                                  = 0x00000003,
FMT_32                                   = 0x00000004,
FMT_16_16                                = 0x00000005,
FMT_10_11_11                             = 0x00000006,
FMT_11_11_10                             = 0x00000007,
FMT_10_10_10_2                           = 0x00000008,
FMT_2_10_10_10                           = 0x00000009,
FMT_8_8_8_8                              = 0x0000000a,
FMT_32_32                                = 0x0000000b,
FMT_16_16_16_16                          = 0x0000000c,
FMT_32_32_32                             = 0x0000000d,
FMT_32_32_32_32                          = 0x0000000e,
FMT_RESERVED_78                          = 0x0000000f,
FMT_5_6_5                                = 0x00000010,
FMT_1_5_5_5                              = 0x00000011,
FMT_5_5_5_1                              = 0x00000012,
FMT_4_4_4_4                              = 0x00000013,
FMT_8_24                                 = 0x00000014,
FMT_24_8                                 = 0x00000015,
FMT_X24_8_32                             = 0x00000016,
FMT_RESERVED_155                         = 0x00000017,
} FMT;

typedef enum type {
TYPE_UNORM                               = 0x00000000,
TYPE_SNORM                               = 0x00000001,
TYPE_USCALED                             = 0x00000002,
TYPE_SSCALED                             = 0x00000003,
TYPE_UINT                                = 0x00000004,
TYPE_SINT                                = 0x00000005,
TYPE_SRGB                                = 0x00000006,
TYPE_FLOAT                               = 0x00000007,
TYPE_RESERVED_8                          = 0x00000008,
TYPE_RESERVED_9                          = 0x00000009,
TYPE_UNORM_UINT                          = 0x0000000a,
TYPE_REVERSED_UNORM                      = 0x0000000b,
TYPE_FLOAT_CLAMP                         = 0x0000000c,
} type;

enum FORMAT {
CFMT_INVALID             = 0,
CFMT_8_UNORM             = 1,
CFMT_8_SNORM             = 2,
CFMT_8_USCALED           = 3,
CFMT_8_SSCALED           = 4,
CFMT_8_UINT              = 5,
CFMT_8_SINT              = 6,
CFMT_16_UNORM            = 7,
CFMT_16_SNORM            = 8,
CFMT_16_USCALED          = 9,
CFMT_16_SSCALED          = 10,
CFMT_16_UINT             = 11,
CFMT_16_SINT             = 12,
CFMT_16_FLOAT            = 13,
CFMT_8_8_UNORM           = 14,
CFMT_8_8_SNORM           = 15,
CFMT_8_8_USCALED         = 16,
CFMT_8_8_SSCALED         = 17,
CFMT_8_8_UINT            = 18,
CFMT_8_8_SINT            = 19,
CFMT_32_UINT             = 20,
CFMT_32_SINT             = 21,
CFMT_32_FLOAT            = 22,
CFMT_16_16_UNORM         = 23,
CFMT_16_16_SNORM         = 24,
CFMT_16_16_USCALED       = 25,
CFMT_16_16_SSCALED       = 26,
CFMT_16_16_UINT          = 27,
CFMT_16_16_SINT          = 28,
CFMT_16_16_FLOAT         = 29,
CFMT_10_11_11_FLOAT      = 30,
CFMT_11_11_10_FLOAT      = 31,
CFMT_10_10_10_2_UNORM    = 32,
CFMT_10_10_10_2_SNORM    = 33,
CFMT_10_10_10_2_UINT     = 34,
CFMT_10_10_10_2_SINT     = 35,
CFMT_2_10_10_10_UNORM    = 36,
CFMT_2_10_10_10_SNORM    = 37,
CFMT_2_10_10_10_USCALED  = 38,
CFMT_2_10_10_10_SSCALED  = 39,
CFMT_2_10_10_10_UINT     = 40,
CFMT_2_10_10_10_SINT     = 41,
CFMT_8_8_8_8_UNORM       = 42,
CFMT_8_8_8_8_SNORM       = 43,
CFMT_8_8_8_8_USCALED     = 44,
CFMT_8_8_8_8_SSCALED     = 45,
CFMT_8_8_8_8_UINT        = 46,
CFMT_8_8_8_8_SINT        = 47,
CFMT_32_32_UINT          = 48,
CFMT_32_32_SINT          = 49,
CFMT_32_32_FLOAT         = 50,
CFMT_16_16_16_16_UNORM   = 51,
CFMT_16_16_16_16_SNORM   = 52,
CFMT_16_16_16_16_USCALED = 53,
CFMT_16_16_16_16_SSCALED = 54,
CFMT_16_16_16_16_UINT    = 55,
CFMT_16_16_16_16_SINT    = 56,
CFMT_16_16_16_16_FLOAT   = 57,
CFMT_32_32_32_UINT       = 58,
CFMT_32_32_32_SINT       = 59,
CFMT_32_32_32_FLOAT      = 60,
CFMT_32_32_32_32_UINT    = 61,
CFMT_32_32_32_32_SINT    = 62,
CFMT_32_32_32_32_FLOAT   = 63,
CFMT_8_SRGB              = 64,
CFMT_8_8_SRGB            = 65,
CFMT_8_8_8_8_SRGB        = 66,
CFMT_5_9_9_9_FLOAT       = 67,
CFMT_5_6_5_UNORM         = 68,
CFMT_1_5_5_5_UNORM       = 69,
CFMT_5_5_5_1_UNORM       = 70,
CFMT_4_4_4_4_UNORM       = 71,
CFMT_4_4_UNORM           = 72,
CFMT_1_UNORM             = 73,
CFMT_1_REVERSED_UNORM    = 74,
CFMT_32_FLOAT_CLAMP      = 75,
CFMT_8_24_UNORM          = 76,
CFMT_8_24_UINT           = 77,
CFMT_24_8_UNORM          = 78,
CFMT_24_8_UINT           = 79,
CFMT_X24_8_32_UINT       = 80,
CFMT_X24_8_32_FLOAT      = 81,
};

typedef enum SEL {
  SEL_0 = 0x00000000,
  SEL_1 = 0x00000001,
  SEL_X = 0x00000004,
  SEL_Y = 0x00000005,
  SEL_Z = 0x00000006,
  SEL_W = 0x00000007,
} SEL;

typedef enum SQ_RSRC_IMG_TYPE {
  SQ_RSRC_IMG_1D            = 0x00000008,
  SQ_RSRC_IMG_2D            = 0x00000009,
  SQ_RSRC_IMG_3D            = 0x0000000a,
  SQ_RSRC_IMG_CUBE_ARRAY    = 0x0000000b,
  SQ_RSRC_IMG_1D_ARRAY      = 0x0000000c,
  SQ_RSRC_IMG_2D_ARRAY      = 0x0000000d,
  SQ_RSRC_IMG_2D_MSAA       = 0x0000000e,
  SQ_RSRC_IMG_2D_MSAA_ARRAY = 0x0000000f,
} SQ_RSRC_IMG_TYPE;

typedef enum SQ_TEX_XY_FILTER {
  SQ_TEX_XY_FILTER_POINT          = 0x00000000,
  SQ_TEX_XY_FILTER_BILINEAR       = 0x00000001,
  SQ_TEX_XY_FILTER_ANISO_POINT    = 0x00000002,
  SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003,
} SQ_TEX_XY_FILTER;

typedef enum SQ_TEX_Z_FILTER {
  SQ_TEX_Z_FILTER_NONE   = 0x00000000,
  SQ_TEX_Z_FILTER_POINT  = 0x00000001,
  SQ_TEX_Z_FILTER_LINEAR = 0x00000002,
} SQ_TEX_Z_FILTER;

typedef enum SQ_TEX_MIP_FILTER {
  SQ_TEX_MIP_FILTER_NONE                = 0x00000000,
  SQ_TEX_MIP_FILTER_POINT               = 0x00000001,
  SQ_TEX_MIP_FILTER_LINEAR              = 0x00000002,
  SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ__VI = 0x00000003,
} SQ_TEX_MIP_FILTER;

typedef enum SQ_TEX_CLAMP {
  SQ_TEX_WRAP                    = 0x00000000,
  SQ_TEX_MIRROR                  = 0x00000001,
  SQ_TEX_CLAMP_LAST_TEXEL        = 0x00000002,
  SQ_TEX_MIRROR_ONCE_LAST_TEXEL  = 0x00000003,
  SQ_TEX_CLAMP_HALF_BORDER       = 0x00000004,
  SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005,
  SQ_TEX_CLAMP_BORDER            = 0x00000006,
  SQ_TEX_MIRROR_ONCE_BORDER      = 0x00000007,
} SQ_TEX_CLAMP;

typedef enum SQ_TEX_BORDER_COLOR {
  SQ_TEX_BORDER_COLOR_TRANS_BLACK  = 0x00000000,
  SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001,
  SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002,
  SQ_TEX_BORDER_COLOR_REGISTER     = 0x00000003,
} SQ_TEX_BORDER_COLOR;

typedef enum TEX_BC_SWIZZLE {
TEX_BC_Swizzle_XYZW = 0x00000000,
TEX_BC_Swizzle_XWYZ = 0x00000001,
TEX_BC_Swizzle_WZYX = 0x00000002,
TEX_BC_Swizzle_WXYZ = 0x00000003,
TEX_BC_Swizzle_ZYXW = 0x00000004,
TEX_BC_Swizzle_YXWZ = 0x00000005,
} TEX_BC_SWIZZLE;

typedef struct metadata_amd_gfx12_s {
  uint32_t version;   // Must be 1
  uint32_t vendorID;  // AMD
  SQ_IMG_RSRC_WORD0 word0;
  SQ_IMG_RSRC_WORD1 word1;
  SQ_IMG_RSRC_WORD2 word2;
  SQ_IMG_RSRC_WORD3 word3;
  SQ_IMG_RSRC_WORD4 word4;
  SQ_IMG_RSRC_WORD5 word5;
  SQ_IMG_RSRC_WORD6 word6;
  SQ_IMG_RSRC_WORD7 word7;
  uint32_t mip_offsets[0];
} metadata_amd_gfx12_t;

}  // namespace image
}  // namespace rocr
#endif  // EXT_IMAGE_RESOURCE_GFX12_H_


================================================
FILE: runtime/hsa-runtime/image/resource_kv.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_RESOURCE_KV_H
#define HSA_RUNTIME_EXT_IMAGE_RESOURCE_KV_H

#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

namespace rocr {
namespace image {

union SQ_BUF_RSRC_WORD0 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int base_address : 32;
#elif defined(BIGENDIAN_CPU)
    unsigned int base_address : 32;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_BUF_RSRC_WORD1 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int base_address_hi : 16;
    unsigned int stride : 14;
    unsigned int cache_swizzle : 1;
    unsigned int swizzle_enable : 1;
#elif defined(BIGENDIAN_CPU)
    unsigned int swizzle_enable : 1;
    unsigned int cache_swizzle : 1;
    unsigned int stride : 14;
    unsigned int base_address_hi : 16;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_BUF_RSRC_WORD2 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int num_records : 32;
#elif defined(BIGENDIAN_CPU)
    unsigned int num_records : 32;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_BUF_RSRC_WORD3 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int dst_sel_x : 3;
    unsigned int dst_sel_y : 3;
    unsigned int dst_sel_z : 3;
    unsigned int dst_sel_w : 3;
    unsigned int num_format : 3;
    unsigned int data_format : 4;
    unsigned int element_size : 2;
    unsigned int index_stride : 2;
    unsigned int add_tid_enable : 1;
    unsigned int atc : 1;
    unsigned int hash_enable : 1;
    unsigned int heap : 1;
    unsigned int mtype : 3;
    unsigned int type : 2;
#elif defined(BIGENDIAN_CPU)
    unsigned int type : 2;
    unsigned int mtype : 3;
    unsigned int heap : 1;
    unsigned int hash_enable : 1;
    unsigned int atc : 1;
    unsigned int add_tid_enable : 1;
    unsigned int index_stride : 2;
    unsigned int element_size : 2;
    unsigned int data_format : 4;
    unsigned int num_format : 3;
    unsigned int dst_sel_w : 3;
    unsigned int dst_sel_z : 3;
    unsigned int dst_sel_y : 3;
    unsigned int dst_sel_x : 3;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD0 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int base_address : 32;
#elif defined(BIGENDIAN_CPU)
    unsigned int base_address : 32;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD1 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int base_address_hi : 8;
    unsigned int min_lod : 12;
    unsigned int data_format : 6;
    unsigned int num_format : 4;
    unsigned int mtype : 2;
#elif defined(BIGENDIAN_CPU)
    unsigned int mtype : 2;
    unsigned int num_format : 4;
    unsigned int data_format : 6;
    unsigned int min_lod : 12;
    unsigned int base_address_hi : 8;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD2 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int width : 14;
    unsigned int height : 14;
    unsigned int perf_mod : 3;
    unsigned int interlaced : 1;
#elif defined(BIGENDIAN_CPU)
    unsigned int interlaced : 1;
    unsigned int perf_mod : 3;
    unsigned int height : 14;
    unsigned int width : 14;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD3 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int dst_sel_x : 3;
    unsigned int dst_sel_y : 3;
    unsigned int dst_sel_z : 3;
    unsigned int dst_sel_w : 3;
    unsigned int base_level : 4;
    unsigned int last_level : 4;
    unsigned int tiling_index : 5;
    unsigned int pow2_pad : 1;
    unsigned int mtype : 1;
    unsigned int atc : 1;
    unsigned int type : 4;
#elif defined(BIGENDIAN_CPU)
    unsigned int type : 4;
    unsigned int atc : 1;
    unsigned int mtype : 1;
    unsigned int pow2_pad : 1;
    unsigned int tiling_index : 5;
    unsigned int last_level : 4;
    unsigned int base_level : 4;
    unsigned int dst_sel_w : 3;
    unsigned int dst_sel_z : 3;
    unsigned int dst_sel_y : 3;
    unsigned int dst_sel_x : 3;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD4 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int depth : 13;
    unsigned int pitch : 14;
    unsigned int : 5;
#elif defined(BIGENDIAN_CPU)
    unsigned int : 5;
    unsigned int pitch : 14;
    unsigned int depth : 13;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD5 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int base_array : 13;
    unsigned int last_array : 13;
    unsigned int : 6;
#elif defined(BIGENDIAN_CPU)
    unsigned int : 6;
    unsigned int last_array : 13;
    unsigned int base_array : 13;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD6 {
  struct {
#if	defined(LITTLEENDIAN_CPU)
    unsigned int min_lod_warn : 12;
    unsigned int counter_bank_id : 8;
    unsigned int lod_hdw_cnt_en : 1;
    unsigned int compression_en : 1;
    unsigned int alpha_is_on_msb : 1;
    unsigned int color_transform : 1;
    unsigned int lost_alpha_bits : 4;
    unsigned int lost_color_bits : 4;
#elif	defined(BIGENDIAN_CPU)
    unsigned int lost_color_bits : 4;
    unsigned int lost_alpha_bits : 4;
    unsigned int color_transform : 1;
    unsigned int alpha_is_on_msb : 1;
    unsigned int compression_en : 1;
    unsigned int lod_hdw_cnt_en : 1;
    unsigned int counter_bank_id : 8;
    unsigned int min_lod_warn : 12;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_RSRC_WORD7 {
  struct {
#if		defined(LITTLEENDIAN_CPU)
    unsigned int meta_data_address : 32;
#elif		defined(BIGENDIAN_CPU)
    unsigned int meta_data_address : 32;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_SAMP_WORD0 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int CLAMP_X : 3;
    unsigned int CLAMP_Y : 3;
    unsigned int CLAMP_Z : 3;
    unsigned int max_aniso_ratio : 3;
    unsigned int depth_compare_func : 3;
    unsigned int force_unormalized : 1;
    unsigned int aniso_threshold : 3;
    unsigned int mc_coord_trunc : 1;
    unsigned int force_degamma : 1;
    unsigned int aniso_bias : 6;
    unsigned int trunc_coord : 1;
    unsigned int disable_cube_wrap : 1;
    unsigned int filter_mode : 2;
    unsigned int compat_mode : 1;
#elif defined(BIGENDIAN_CPU)
    unsigned int compat_mode : 1;
    unsigned int filter_mode : 2;
    unsigned int disable_cube_wrap : 1;
    unsigned int trunc_coord : 1;
    unsigned int aniso_bias : 6;
    unsigned int force_degamma : 1;
    unsigned int mc_coord_trunc : 1;
    unsigned int aniso_threshold : 3;
    unsigned int force_unormalized : 1;
    unsigned int depth_compare_func : 3;
    unsigned int max_aniso_ratio : 3;
    unsigned int CLAMP_Z : 3;
    unsigned int CLAMP_Y : 3;
    unsigned int CLAMP_X : 3;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_SAMP_WORD1 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int min_lod : 12;
    unsigned int max_lod : 12;
    unsigned int perf_mip : 4;
    unsigned int perf_z : 4;
#elif defined(BIGENDIAN_CPU)
    unsigned int perf_z : 4;
    unsigned int perf_mip : 4;
    unsigned int max_lod : 12;
    unsigned int min_lod : 12;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_SAMP_WORD2 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int lod_bias : 14;
    unsigned int lod_bias_sec : 6;
    unsigned int xy_mag_filter : 2;
    unsigned int xy_min_filter : 2;
    unsigned int z_filter : 2;
    unsigned int mip_filter : 2;
    unsigned int mip_point_preclamp : 1;
    unsigned int disable_lsb_ceil : 1;
    unsigned int filter_prec_fix : 1;
    unsigned int aniso_override_vi : 1;
#elif defined(BIGENDIAN_CPU)
    unsigned int aniso_override_vi : 1;
    unsigned int filter_prec_fix : 1;
    unsigned int disable_lsb_ceil : 1;
    unsigned int mip_point_preclamp : 1;
    unsigned int mip_filter : 2;
    unsigned int z_filter : 2;
    unsigned int xy_min_filter : 2;
    unsigned int xy_mag_filter : 2;
    unsigned int lod_bias_sec : 6;
    unsigned int lod_bias : 14;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

union SQ_IMG_SAMP_WORD3 {
  struct {
#if defined(LITTLEENDIAN_CPU)
    unsigned int border_color_ptr : 12;
    unsigned int : 18;
    unsigned int border_color_type : 2;
#elif defined(BIGENDIAN_CPU)
    unsigned int border_color_type : 2;
    unsigned int : 18;
    unsigned int border_color_ptr : 12;
#endif
  } bitfields, bits;
  unsigned int u32_all;
  signed int i32_all;
  float f32_all;
};

typedef enum FMT {
  FMT_INVALID = 0x00000000,
  FMT_8 = 0x00000001,
  FMT_16 = 0x00000002,
  FMT_8_8 = 0x00000003,
  FMT_32 = 0x00000004,
  FMT_16_16 = 0x00000005,
  FMT_10_10_10_2 = 0x00000008,
  FMT_2_10_10_10 = 0x00000009,
  FMT_8_8_8_8 = 0x0000000a,
  FMT_32_32 = 0x0000000b,
  FMT_16_16_16_16 = 0x0000000c,
  FMT_32_32_32 = 0x0000000d,
  FMT_32_32_32_32 = 0x0000000e,
  FMT_5_6_5 = 0x00000010,
  FMT_1_5_5_5 = 0x00000011,
  FMT_5_5_5_1 = 0x00000012,
  FMT_8_24 = 0x00000014,
  FMT_24_8 = 0x00000015,
  FMT_X24_8_32 = 0x00000016,
  FMT_RESERVED_24__SI__CI = 0x00000018
} FMT;

typedef enum type {
  TYPE_UNORM = 0x00000000,
  TYPE_SNORM = 0x00000001,
  TYPE_UINT = 0x00000004,
  TYPE_SINT = 0x00000005,
  TYPE_FLOAT = 0x00000007,
  TYPE_SRGB = 0x00000009
} type;

typedef enum SEL {
  SEL_0 = 0x00000000,
  SEL_1 = 0x00000001,
  SEL_X = 0x00000004,
  SEL_Y = 0x00000005,
  SEL_Z = 0x00000006,
  SEL_W = 0x00000007,
} SEL;

typedef enum SQ_RSRC_IMG_TYPE {
  SQ_RSRC_IMG_1D = 0x00000008,
  SQ_RSRC_IMG_2D = 0x00000009,
  SQ_RSRC_IMG_3D = 0x0000000a,
  SQ_RSRC_IMG_1D_ARRAY = 0x0000000c,
  SQ_RSRC_IMG_2D_ARRAY = 0x0000000d,
} SQ_RSRC_IMG_TYPE;

typedef enum SQ_TEX_XY_FILTER {
  SQ_TEX_XY_FILTER_POINT = 0x00000000,
  SQ_TEX_XY_FILTER_BILINEAR = 0x00000001,
  SQ_TEX_XY_FILTER_ANISO_POINT = 0x00000002,
  SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003,
} SQ_TEX_XY_FILTER;

typedef enum SQ_TEX_Z_FILTER {
  SQ_TEX_Z_FILTER_NONE = 0x00000000,
  SQ_TEX_Z_FILTER_POINT = 0x00000001,
  SQ_TEX_Z_FILTER_LINEAR = 0x00000002,
} SQ_TEX_Z_FILTER;

typedef enum SQ_TEX_MIP_FILTER {
  SQ_TEX_MIP_FILTER_NONE = 0x00000000,
  SQ_TEX_MIP_FILTER_POINT = 0x00000001,
  SQ_TEX_MIP_FILTER_LINEAR = 0x00000002,
  SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ__VI = 0x00000003,
} SQ_TEX_MIP_FILTER;

typedef enum SQ_TEX_CLAMP {
  SQ_TEX_WRAP = 0x00000000,
  SQ_TEX_MIRROR = 0x00000001,
  SQ_TEX_CLAMP_LAST_TEXEL = 0x00000002,
  SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x00000003,
  SQ_TEX_CLAMP_HALF_BORDER = 0x00000004,
  SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005,
  SQ_TEX_CLAMP_BORDER = 0x00000006,
  SQ_TEX_MIRROR_ONCE_BORDER = 0x00000007,
} SQ_TEX_CLAMP;

typedef enum SQ_TEX_BORDER_COLOR {
  SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00000000,
  SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001,
  SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002,
  SQ_TEX_BORDER_COLOR_REGISTER = 0x00000003,
} SQ_TEX_BORDER_COLOR;

typedef struct metadata_amd_ci_vi_s {
    uint32_t version; // Must be 1
    uint32_t vendorID; // AMD | CZ
    SQ_IMG_RSRC_WORD0 word0;
    SQ_IMG_RSRC_WORD1 word1;
    SQ_IMG_RSRC_WORD2 word2;
    SQ_IMG_RSRC_WORD3 word3;
    SQ_IMG_RSRC_WORD4 word4;
    SQ_IMG_RSRC_WORD5 word5;
    SQ_IMG_RSRC_WORD6 word6;
    SQ_IMG_RSRC_WORD7 word7;
    uint32_t mip_offsets[0]; //Mip level offset bits [39:8] for each level (if any)
} metadata_amd_ci_vi_t;

}  // namespace image
}  // namespace rocr
#endif  // HSA_RUNTIME_EXT_IMAGE_RESOURCE_KV_H


================================================
FILE: runtime/hsa-runtime/image/resource_nv.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef EXT_IMAGE_RESOURCE_NV_H_
#define EXT_IMAGE_RESOURCE_NV_H_

#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

namespace rocr {
namespace image {

/**********************************************************/
/**********************************************************/
#define SQ_BUF_RSC_WRD0_REG_SZ 32
#define SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ 32

struct sq_buf_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};

union SQ_BUF_RSRC_WORD0 {
  sq_buf_rsrc_word0_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};

/***********/

#define SQ_BUF_RSC_WRD1_REG_SZ 32
#define SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ  16
#define SQ_BUF_RSC_WRD1_STRIDE_SZ           14
#define SQ_BUF_RSC_WRD1_CACHE_SWIZZLE_SZ    1
#define SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ   1
struct sq_buf_rsrc_word1_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
  unsigned int STRIDE          : SQ_BUF_RSC_WRD1_STRIDE_SZ;
  unsigned int CACHE_SWIZZLE   : SQ_BUF_RSC_WRD1_CACHE_SWIZZLE_SZ;
  unsigned int SWIZZLE_ENABLE  : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int SWIZZLE_ENABLE  : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
  unsigned int CACHE_SWIZZLE   : SQ_BUF_RSC_WRD1_CACHE_SWIZZLE_SZ;
  unsigned int STRIDE          : SQ_BUF_RSC_WRD1_STRIDE_SZ;
  unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};

union SQ_BUF_RSRC_WORD1 {
  sq_buf_rsrc_word1_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_BUF_RSC_WRD2_REG_SZ 32
#define SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ 32
struct sq_buf_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#endif
};
union SQ_BUF_RSRC_WORD2 {
  sq_buf_rsrc_word2_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_BUF_RSC_WRD3_REG_SZ 32
#define SQ_BUF_RSC_WRD3_DST_SEL_X_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ        3
#define SQ_BUF_RSC_WRD3_DST_SEL_W_SZ        3
#define SQ_BUF_RSC_WRD3_FORMAT_SZ           7
#define SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ     2
#define SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ   1
#define SQ_BUF_RSC_WRD3_RESOURCE_LEVEL      1
#define SQ_BUF_RSC_WRD3_RESERVED_1          2
#define SQ_BUF_RSC_WORD3_OOB_SELECT_SZ      2
#define SQ_BUF_RSC_WRD3_TYPE_SZ             2
struct sq_buf_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int DST_SEL_X      : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;
  unsigned int DST_SEL_Y      : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_Z      : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_W      : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int FORMAT         : SQ_BUF_RSC_WRD3_FORMAT_SZ;
  unsigned int                : 2;
  unsigned int INDEX_STRIDE   : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
  unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
  unsigned int RESOURCE_LEVEL : SQ_BUF_RSC_WRD3_RESOURCE_LEVEL;
  unsigned int                : 1;
  unsigned int RESERVED_1     : SQ_BUF_RSC_WRD3_RESERVED_1;
  unsigned int OOB_SELECT     : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
  unsigned int TYPE           : SQ_BUF_RSC_WRD3_TYPE_SZ;

#elif defined(BIGENDIAN_CPU)
  unsigned int TYPE           : SQ_BUF_RSC_WRD3_TYPE_SZ;
  unsigned int OOB_SELECT     : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
  unsigned int RESERVED_1     : SQ_BUF_RSC_WRD3_RESERVED_1;
  unsigned int                : 1;
  unsigned int RESOURCE_LEVEL : SQ_BUF_RSC_WRD3_RESOURCE_LEVEL;
  unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
  unsigned int INDEX_STRIDE   : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
  unsigned int                : 2;
  unsigned int FORMAT         : SQ_BUF_RSC_WRD3_FORMAT_SZ;
  unsigned int DST_SEL_W      : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int DST_SEL_Z      : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_Y      : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_X      : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;

#endif
};
union SQ_BUF_RSRC_WORD3 {
  sq_buf_rsrc_word3_t bitfields, bits, f;
  uint32_t val : SQ_BUF_RSC_WRD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

/**********************************************************/
/**********************************************************/
#define SQ_IMG_RSC_WRD0_REG_SZ 32
#define SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ 32
struct sq_img_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};
union SQ_IMG_RSRC_WORD0 {
  sq_img_rsrc_word0_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD1_REG_SZ 32
#define SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ  8
#define SQ_IMG_RSC_WRD1_MIN_LOD_SZ          12
#define SQ_IMG_RSC_WRD1_FORMAT_SZ           9
#define SQ_IMG_RSC_WRD1_WIDTH_LO            2

struct sq_img_rsrc_word1_t{
#if defined(LITTLEENDIAN_CPU)
  unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
  unsigned int MIN_LOD         : SQ_IMG_RSC_WRD1_MIN_LOD_SZ;
  unsigned int FORMAT          : SQ_IMG_RSC_WRD1_FORMAT_SZ;
  unsigned int                 : 1;
  unsigned int WIDTH           : SQ_IMG_RSC_WRD1_WIDTH_LO;
#elif defined(BIGENDIAN_CPU)
  unsigned int WIDTH           : SQ_IMG_RSC_WRD1_WIDTH_LO;
  unsigned int                 : 1;
  unsigned int FORMAT          : SQ_IMG_RSC_WRD1_FORMAT_SZ;
  unsigned int MIN_LOD         : SQ_IMG_RSC_WRD1_MIN_LOD_SZ;
  unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD1 {
  sq_img_rsrc_word1_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD2_REG_SZ 32
#define SQ_IMG_RSC_WRD2_WIDTH_HI_SZ        12
#define SQ_IMG_RSC_WRD2_HEIGHT_SZ          14
#define SQ_IMG_RSC_WRD2_RESOURCE_LEVEL_SZ  1
struct sq_img_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int WIDTH_HI       : SQ_IMG_RSC_WRD2_WIDTH_HI_SZ;
  unsigned int                : 2;
  unsigned int HEIGHT         : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
  unsigned int                : 2;
  unsigned int                : 1;
  unsigned int RESOURCE_LEVEL : SQ_IMG_RSC_WRD2_RESOURCE_LEVEL_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int RESOURCE_LEVEL : SQ_IMG_RSC_WRD2_RESOURCE_LEVEL_SZ;
  unsigned int RESERVED       : 1;
  unsigned int RESERVED       : 2;
  unsigned int HEIGHT         : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
  unsigned int                : 2;
  unsigned int WIDTH_HI       : SQ_IMG_RSC_WRD2_WIDTH_SZ;
#endif
};
union SQ_IMG_RSRC_WORD2 {
  sq_img_rsrc_word2_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD3_REG_SZ 32
#define SQ_IMG_RSC_WRD3_DST_SEL_X_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ  3
#define SQ_IMG_RSC_WRD3_DST_SEL_W_SZ  3
#define SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ 4
#define SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ 4
#define SQ_IMG_RSC_WRD3_SW_MODE_SZ    5
#define SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ 3
#define SQ_IMG_RSC_WRD3_TYPE_SZ       4
struct sq_img_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int DST_SEL_X  : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
  unsigned int DST_SEL_Y  : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_Z  : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_W  : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int BASE_LEVEL : SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ;
  unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
  unsigned int SW_MODE    : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
  unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
  unsigned int TYPE       : SQ_IMG_RSC_WRD3_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int TYPE       : SQ_IMG_RSC_WRD3_TYPE_SZ;
  unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
  unsigned int W_MODE     : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
  unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
  unsigned int BASE_LEVEL : SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ;
  unsigned int DST_SEL_W  : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
  unsigned int DST_SEL_Z  : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
  unsigned int DST_SEL_Y  : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
  unsigned int DST_SEL_X  : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
#endif
};
union SQ_IMG_RSRC_WORD3 {
  sq_img_rsrc_word3_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD4_REG_SZ 32
#define SQ_IMG_RSC_WRD4_DEPTH_SZ    13
#define SQ_IMG_RSC_WRD4_BASE_ARR_SZ 13
#define SQ_IMG_RSC_WRD4_PITCH_SZ 14
union sq_img_rsrc_word4_t {
  struct {
#if defined(LITTLEENDIAN_CPU)
    // For arrays this is last slice in view, for 3D this is depth-1, For remaining this is pitch-1
    unsigned int DEPTH      : SQ_IMG_RSC_WRD4_DEPTH_SZ;
    unsigned int            : 1; //Pitch[13] in gfx1030
    unsigned int            : 2;
    unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
    unsigned int            : 3;
#elif defined(BIGENDIAN_CPU)
    unsigned int            : 3;
    unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
    unsigned int            : 2;
    unsigned int            : 1; //Pitch[13] in gfx1030
    unsigned int DEPTH      : SQ_IMG_RSC_WRD4_DEPTH_SZ; //Pitch[0:12] in gfx1030
#endif
  };
  struct {
#if defined(LITTLEENDIAN_CPU)
    // For 1d, 2d and 2d-msaa in gfx1030 this is pitch-1
    unsigned int PITCH      : SQ_IMG_RSC_WRD4_PITCH_SZ;
    unsigned int            : SQ_IMG_RSC_WRD4_REG_SZ-SQ_IMG_RSC_WRD4_PITCH_SZ;
#elif defined(BIGENDIAN_CPU)
    unsigned int            : SQ_IMG_RSC_WRD4_REG_SZ-SQ_IMG_RSC_WRD4_PITCH_SZ;
    unsigned int PITCH      : SQ_IMG_RSC_WRD4_PITCH_SZ;
#endif
  };
};
union SQ_IMG_RSRC_WORD4 {
  sq_img_rsrc_word4_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD4_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD5_REG_SZ 32
#define SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ               4
#define SQ_IMG_RSC_WRD5_MAX_MIP_SZ                   4
//#define SQ_IMG_RSC_WRD5_DSCAL_OR_MID_LOD_WRN_SZ      4
//#define SQ_IMG_RSC_WRD5_HSCAL_OR_MID_LOD_WRN_SZ      4
//#define SQ_IMG_RSC_WRD5_WSCAL_OR_MID_LOD_WRN_SZ      4
#define SQ_IMG_RSC_WRD5_MID_LOD_WRN_SZ               12
#define SQ_IMG_RSC_WRD5_PERF_MOD_SZ                  3
#define SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ            1
#define SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ           1
#define SQ_IMG_RSC_WRD5_LOD_HDW_CNT_EN_SZ            1
#define SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ               1
#define SQ_IMG_RSC_WRD5_BIG_PAGE_SZ                  1

struct sq_img_rsrc_word5_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int ARRAY_PITCH          : SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ;
  unsigned int MAX_MIP              : SQ_IMG_RSC_WRD5_MAX_MIP_SZ;
  unsigned int MID_LOD_WRN          : SQ_IMG_RSC_WRD5_MID_LOD_WRN_SZ;
//  unsigned int DSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_DSCAL_OR_MID_LOD_WRN_SZ;
//  unsigned int HSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_HSCAL_OR_MID_LOD_WRN_SZ;
//  unsigned int WSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_WSCAL_OR_MID_LOD_WRN_SZ;
  unsigned int PERF_MOD             : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
  unsigned int CORNER_SAMPLES       : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
  unsigned int LINKED_RESOURCE      : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
  unsigned int LOD_HDW_CNT_EN       : SQ_IMG_RSC_WRD5_LOD_HDW_CNT_EN_SZ;
  unsigned int PRT_DEFAULT          : SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ;
  unsigned int                      : 4;
  unsigned int BIG_PAGE             : SQ_IMG_RSC_WRD5_BIG_PAGE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BIG_PAGE             : SQ_IMG_RSC_WRD5_BIG_PAGE_SZ;
  unsigned int                      : 4;
  unsigned int PRT_DEFAULT          : SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ;
  unsigned int LOD_HDW_CNT_EN       : SQ_IMG_RSC_WRD5_LOD_HDW_CNT_EN_SZ;
  unsigned int LINKED_RESOURCE      : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
  unsigned int CORNER_SAMPLES       : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
  unsigned int PERF_MOD             : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
  unsigned int MID_LOD_WRN          : SQ_IMG_RSC_WRD5_MID_LOD_WRN_SZ;
//  unsigned int WSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_WSCAL_OR_MID_LOD_WRN_SZ;
//  unsigned int HSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_HSCAL_OR_MID_LOD_WRN_SZ;
//  unsigned int DSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_DSCAL_OR_MID_LOD_WRN_SZ;
  unsigned int MAX_MIP              : SQ_IMG_RSC_WRD5_MAX_MIP_SZ;
  unsigned int ARRAY_PITCH          : SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ;
#endif
};

union SQ_IMG_RSRC_WORD5 {
  sq_img_rsrc_word5_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD5_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD6_REG_SZ 32
#define SQ_IMG_RSC_WRD6_COUNTER_BANK_ID_SZ        8
#define SQ_IMG_RSC_WRD6_RESERVED_2_SZ             2
#define SQ_IMG_RSC_WRD6_ITERATE_256_SZ            1
#define SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ      2
#define SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ        2
#define SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ      1
#define SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ      1
#define SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ     1
#define SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ        1
#define SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ        1
#define SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ         8
struct sq_img_rsrc_word6_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int COUNTER_BANK_ID       : SQ_IMG_RSC_WRD6_COUNTER_BANK_ID_SZ;
  unsigned int RESERVED_2            : SQ_IMG_RSC_WRD6_RESERVED_2_SZ;
  unsigned int ITERATE_256           : SQ_IMG_RSC_WRD6_ITERATE_256_SZ;
  unsigned int                       : 4;
  unsigned int MAX_UNCOMP_BLK_SZ     : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
  unsigned int MAX_COMP_BLK_SZ       : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
  unsigned int META_PIPE_ALIGNED     : SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
  unsigned int COMPRESSION_ENABLE    : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
  unsigned int ALPHA_IS_ON_MSB       : SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ;
  unsigned int COLOR_TRANSFORM       : SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ;
  unsigned int META_DATA_ADDRESS     : SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int META_DATA_ADDRESS     : SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ;
  unsigned int COLOR_TRANSFORM       : SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ;
  unsigned int ALPHA_IS_ON_MSB       : SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ;
  unsigned int COMPRESSION_ENABLE    : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
  unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
  unsigned int META_PIPE_ALIGNED     : SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ;
  unsigned int MAX_COMP_BLK_SZ       : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
  unsigned int MAX_UNCOMP_BLK_SZ     : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
  unsigned int                       : 4;
  unsigned int ITERATE_256           : SQ_IMG_RSC_WRD6_ITERATE_256_SZ;
  unsigned int RESERVED_2            : SQ_IMG_RSC_WRD6_RESERVED_2_SZ;
  unsigned int COUNTER_BANK_ID       : SQ_IMG_RSC_WRD6_COUNTER_BANK_ID_SZ;
#endif
};
union SQ_IMG_RSRC_WORD6 {
  sq_img_rsrc_word6_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD6_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_RSC_WRD7_REG_SZ 32
#define SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ 32
struct sq_img_rsrc_word7_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int META_DATA_ADDRESS_HI : SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int META_DATA_ADDRESS_HI : SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD7 {
  sq_img_rsrc_word7_t bitfields, bits, f;
  uint32_t val : SQ_IMG_RSC_WRD7_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/
/**********************************************************/
/**********************************************************/

#define SQ_IMG_SAMP_WORD0_REG_SZ 32
#define SQ_IMG_SAMP_WORD0_CLAMP_X_SZ            3
#define SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ            3
#define SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ            3
#define SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ    3
#define SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ 3
#define SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ 1
#define SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ    3
#define SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ     1
#define SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ      1
#define SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ         6
#define SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ        1
#define SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ  1
#define SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ        2
#define SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ       1
struct sq_img_samp_word0_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int CLAMP_X            : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
  unsigned int CLAMP_Y            : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
  unsigned int CLAMP_Z            : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
  unsigned int MAX_ANISO_RATIO    : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
  unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
  unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
  unsigned int ANISO_THRESHOLD    : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
  unsigned int MC_COORD_TRUNC     : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
  unsigned int FORCE_DEGAMMA      : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
  unsigned int ANISO_BIAS         : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
  unsigned int TRUNC_COORD        : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
  unsigned int DISABLE_CUBE_WRAP  : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
  unsigned int FILTER_MODE        : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
  unsigned int SKIP_DEGAMMA       : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int SKIP_DEGAMMA       : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
  unsigned int FILTER_MODE        : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
  unsigned int DISABLE_CUBE_WRAP  : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
  unsigned int TRUNC_COORD        : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
  unsigned int ANISO_BIAS         : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
  unsigned int FORCE_DEGAMMA      : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
  unsigned int MC_COORD_TRUNC     : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
  unsigned int ANISO_THRESHOLD    : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
  unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
  unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
  unsigned int MAX_ANISO_RATIO    : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
  unsigned int CLAMP_Z            : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
  unsigned int CLAMP_Y            : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
  unsigned int CLAMP_X            : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
#endif
};

union SQ_IMG_SAMP_WORD0 {
  sq_img_samp_word0_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD0_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD1_REG_SZ 32
#define SQ_IMG_SAMP_WORD1_MIN_LOD_SZ  12
#define SQ_IMG_SAMP_WORD1_MAX_LOD_SZ  12
#define SQ_IMG_SAMP_WORD1_PERF_MIP_SZ 4
#define SQ_IMG_SAMP_WORD1_PERF_Z_SZ   4
struct sq_img_samp_word1_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int MIN_LOD  : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
  unsigned int MAX_LOD  : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
  unsigned int PERF_MIP : SQ_IMG_SAMP_WORD1_PERF_MIP_SZ;
  unsigned int PERF_Z   : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int PERF_Z   : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
  unsigned int PERF_MIP : SQ_IMG_SAMP_WORD1_PERF_MIP_SZ;
  unsigned int MAX_LOD  : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
  unsigned int MIN_LOD  : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
#endif
};

union SQ_IMG_SAMP_WORD1 {
  sq_img_samp_word1_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD1_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD2_REG_SZ 32
#define SQ_IMG_SAMP_WORD2_BC_LRS_LB_SZ            12
#define SQ_IMG_SAMP_WORD2_BC_OR_BCT_SZ            2
#define SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ         6
#define SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ        2
#define SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ        2
#define SQ_IMG_SAMP_WORD2_Z_FILTER_SZ             2
#define SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ           2
#define SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SZ   1
#define SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ       1
#define SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SZ       1
#define SQ_IMG_SAMP_WORD2_DERIV_ADJUST_ENABLE_SZ  1
struct sq_img_samp_word2_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BC_LRS_LB          : SQ_IMG_SAMP_WORD2_BC_LRS_LB_SZ;
  unsigned int BC_OR_BCT          : SQ_IMG_SAMP_WORD2_BC_OR_BCT_SZ;
  unsigned int LOD_BIAS_SEC       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
  unsigned int XY_MAG_FILTER      : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
  unsigned int XY_MIN_FILTER      : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
  unsigned int Z_FILTER           : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
  unsigned int MIP_FILTER         : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
  unsigned int MIP_POINT_PRECLAMP : SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SZ;
  unsigned int ANISO_OVERRIDE     : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
  unsigned int BLEND_ZERO_PRT     : SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SZ;
  unsigned int DERIV_ADJUST_EN    : SQ_IMG_SAMP_WORD2_DERIV_ADJUST_ENABLE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int DERIV_ADJUST_EN    : SQ_IMG_SAMP_WORD2_DERIV_ADJUST_ENABLE_SZ;
  unsigned int BLEND_ZERO_PRT     : SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SZ;
  unsigned int ANISO_OVERRIDE     : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
  unsigned int MIP_POINT_PRECLAMP : SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SZ;
  unsigned int MIP_FILTER         : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
  unsigned int Z_FILTER           : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
  unsigned int XY_MIN_FILTER      : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
  unsigned int XY_MAG_FILTER      : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
  unsigned int LOD_BIAS_SEC       : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
  unsigned int BC_OR_BCT          : SQ_IMG_SAMP_WORD2_BC_OR_BCT_SZ;
  unsigned int LOD_BIAS           : SQ_IMG_SAMP_WORD2_BC_LRS_LB_SZ;
#endif
};

union SQ_IMG_SAMP_WORD2 {
  sq_img_samp_word2_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD2_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

#define SQ_IMG_SAMP_WORD3_REG_SZ 32
#define SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ      12
#define SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ  16
#define SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ       2
#define SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ  2

struct sq_img_samp_word3_t {
#if defined(LITTLEENDIAN_CPU)
  unsigned int BCP_LRS_DAV       : SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ;
  unsigned int GRAD_ADJ_OR_DAV   : SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ;
  unsigned int RES_OR_DAV        : SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ;
  unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
  unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
  unsigned int RES_OR_DAV        : SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ;
  unsigned int GRAD_ADJ_OR_DAV   : SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ;
  unsigned int BCP_LRS_DAV       : SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ;
#endif
};

union SQ_IMG_SAMP_WORD3 {
  sq_img_samp_word3_t bitfields, bits, f;
  uint32_t val : SQ_IMG_SAMP_WORD3_REG_SZ;
  uint32_t u32All;
  int32_t  i32All;
  float    f32All;
};
/***********/

/**************************************************************/
/**************************************************************/
/**************************************************************/

typedef enum FMT {
FMT_INVALID                              = 0x00000000,
FMT_8                                    = 0x00000001,
FMT_16                                   = 0x00000002,
FMT_8_8                                  = 0x00000003,
FMT_32                                   = 0x00000004,
FMT_16_16                                = 0x00000005,
FMT_10_11_11                             = 0x00000006,
FMT_11_11_10                             = 0x00000007,
FMT_10_10_10_2                           = 0x00000008,
FMT_2_10_10_10                           = 0x00000009,
FMT_8_8_8_8                              = 0x0000000a,
FMT_32_32                                = 0x0000000b,
FMT_16_16_16_16                          = 0x0000000c,
FMT_32_32_32                             = 0x0000000d,
FMT_32_32_32_32                          = 0x0000000e,
FMT_RESERVED_78                          = 0x0000000f,
FMT_5_6_5                                = 0x00000010,
FMT_1_5_5_5                              = 0x00000011,
FMT_5_5_5_1                              = 0x00000012,
FMT_4_4_4_4                              = 0x00000013,
FMT_8_24                                 = 0x00000014,
FMT_24_8                                 = 0x00000015,
FMT_X24_8_32                             = 0x00000016,
FMT_RESERVED_155                         = 0x00000017,
FMT_1                                    = 0x00000018,
FMT_1_REVERSED                           = 0x00000019,
FMT_GB_GR                                = 0x0000001a,
FMT_BG_RG                                = 0x0000001b,
FMT_4_4                                  = 0x0000001c,
FMT_BC1                                  = 0x0000001d,
FMT_BC2                                  = 0x0000001e,
FMT_BC3                                  = 0x0000001f,
FMT_BC4                                  = 0x00000020,
FMT_BC5                                  = 0x00000021,
FMT_BC6                                  = 0x00000022,
FMT_BC7                                  = 0x00000023,
FMT_6E4                                  = 0x00000024,
FMT_5_9_9_9                              = 0x00000025,
FMT_FMASK8_S2                            = 0x00000026,
FMT_FMASK8_S4                            = 0x00000027,
FMT_FMASK8_S8                            = 0x00000028,
FMT_FMASK16_S16                          = 0x00000029,
FMT_FMASK16_S8                           = 0x0000002a,
FMT_FMASK32_S16                          = 0x0000002b,
FMT_FMASK32_S8                           = 0x0000002c,
FMT_FMASK64_S16                          = 0x0000002d,
FMT_ETC2_RGB                             = 0x0000002e,
FMT_ETC2_RGBA                            = 0x0000002f,
FMT_ETC2_R                               = 0x00000030,
FMT_ETC2_RG                              = 0x00000031,
FMT_ETC2_RGBA1                           = 0x00000032,
FMT_ASTC_2D_LDR                          = 0x00000033,
FMT_ASTC_2D_HDR                          = 0x00000034,
FMT_ASTC_2D_LDR_SRGB                     = 0x00000035,
FMT_ASTC_3D_LDR                          = 0x00000036,
FMT_ASTC_3D_HDR                          = 0x00000037,
FMT_ASTC_3D_LDR_SRGB                     = 0x00000038,
FMT_MM_8                                 = 0x00000039,
FMT_MM_8_8                               = 0x0000003a,
FMT_MM_8_8_8_8                           = 0x0000003b,
FMT_MM_VYUY8                             = 0x0000003c,
FMT_MM_10_11_11                          = 0x0000003d,
FMT_MM_2_10_10_10                        = 0x0000003e,
FMT_MM_16_16_16_16                       = 0x0000003f,
FMT_10_IN_16                             = 0x00000040,
FMT_10_IN_16_16                          = 0x00000041,
FMT_10_IN_16_16_16_16                    = 0x00000042,
FMT_7E3                                  = 0x00000043,
FMT_YCBCR                                = 0x00000044,
} FMT;

typedef enum type {
TYPE_UNORM                               = 0x00000000,
TYPE_SNORM                               = 0x00000001,
TYPE_USCALED                             = 0x00000002,
TYPE_SSCALED                             = 0x00000003,
TYPE_UINT                                = 0x00000004,
TYPE_SINT                                = 0x00000005,
TYPE_RESERVED_6                          = 0x00000006,
TYPE_FLOAT                               = 0x00000007,
TYPE_RESERVED_8                          = 0x00000008,
TYPE_SRGB                                = 0x00000009,
TYPE_UNORM_UINT                          = 0x0000000a,
TYPE_REVERSED_UNORM                      = 0x0000000b,
TYPE_FLOAT_CLAMP                         = 0x0000000c,
} type;

enum FORMAT {
CFMT_INVALID           = 0,
CFMT_8_UNORM           = 1,
CFMT_8_SNORM           = 2,
CFMT_8_UINT            = 5,
CFMT_8_SINT            = 6,
CFMT_16_UNORM          = 7,
CFMT_16_SNORM          = 8,
CFMT_16_UINT           = 11,
CFMT_16_SINT           = 12,
CFMT_16_FLOAT          = 13,
CFMT_8_8_UNORM         = 14,
CFMT_8_8_SNORM         = 15,
CFMT_8_8_UINT          = 18,
CFMT_8_8_SINT          = 19,
CFMT_32_UINT           = 20,
CFMT_32_SINT           = 21,
CFMT_32_FLOAT          = 22,
CFMT_16_16_UNORM       = 23,
CFMT_16_16_SNORM       = 24,
CFMT_16_16_UINT        = 27,
CFMT_16_16_SINT        = 28,
CFMT_16_16_FLOAT       = 29,
CFMT_10_10_10_2_UNORM  = 44,
CFMT_10_10_10_2_SNORM  = 45,
CFMT_10_10_10_2_UINT   = 48,
CFMT_10_10_10_2_SINT   = 49,
CFMT_2_10_10_10_UNORM  = 50,
CFMT_2_10_10_10_SNORM  = 51,
CFMT_2_10_10_10_UINT   = 54,
CFMT_2_10_10_10_SINT   = 55,
CFMT_8_8_8_8_UNORM     = 56,
CFMT_8_8_8_8_SNORM     = 57,
CFMT_8_8_8_8_UINT      = 60,
CFMT_8_8_8_8_SINT      = 61,
CFMT_32_32_UINT        = 62,
CFMT_32_32_SINT        = 63,
CFMT_32_32_FLOAT       = 64,
CFMT_16_16_16_16_UNORM = 65,
CFMT_16_16_16_16_SNORM = 66,
CFMT_16_16_16_16_UINT  = 69,
CFMT_16_16_16_16_SINT  = 70,
CFMT_16_16_16_16_FLOAT = 71,
CFMT_32_32_32_UINT     = 72,
CFMT_32_32_32_SINT     = 73,
CFMT_32_32_32_FLOAT    = 74,
CFMT_32_32_32_32_UINT  = 75,
CFMT_32_32_32_32_SINT  = 76,
CFMT_32_32_32_32_FLOAT = 77,
CFMT_8_SRGB            = 128,
CFMT_8_8_SRGB          = 129,
CFMT_8_8_8_8_SRGB      = 130,
CFMT_5_6_5_UNORM       = 133,
CFMT_1_5_5_5_UNORM     = 134,
CFMT_5_5_5_1_UNORM     = 135,
CFMT_8_24_UNORM        = 141,
CFMT_8_24_UINT         = 142,
CFMT_24_8_UNORM        = 143,
CFMT_24_8_UINT         = 144
};

typedef enum SEL {
  SEL_0 = 0x00000000,
  SEL_1 = 0x00000001,
  SEL_X = 0x00000004,
  SEL_Y = 0x00000005,
  SEL_Z = 0x00000006,
  SEL_W = 0x00000007,
} SEL;

typedef enum SQ_RSRC_IMG_TYPE {
  SQ_RSRC_IMG_1D            = 0x00000008,
  SQ_RSRC_IMG_2D            = 0x00000009,
  SQ_RSRC_IMG_3D            = 0x0000000a,
  SQ_RSRC_IMG_CUBE_ARRAY    = 0x0000000b,
  SQ_RSRC_IMG_1D_ARRAY      = 0x0000000c,
  SQ_RSRC_IMG_2D_ARRAY      = 0x0000000d,
  SQ_RSRC_IMG_2D_MSAA       = 0x0000000e,
  SQ_RSRC_IMG_2D_MSAA_ARRAY = 0x0000000f,
} SQ_RSRC_IMG_TYPE;

typedef enum SQ_TEX_XY_FILTER {
  SQ_TEX_XY_FILTER_POINT          = 0x00000000,
  SQ_TEX_XY_FILTER_BILINEAR       = 0x00000001,
  SQ_TEX_XY_FILTER_ANISO_POINT    = 0x00000002,
  SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003,
} SQ_TEX_XY_FILTER;

typedef enum SQ_TEX_Z_FILTER {
  SQ_TEX_Z_FILTER_NONE   = 0x00000000,
  SQ_TEX_Z_FILTER_POINT  = 0x00000001,
  SQ_TEX_Z_FILTER_LINEAR = 0x00000002,
} SQ_TEX_Z_FILTER;

typedef enum SQ_TEX_MIP_FILTER {
  SQ_TEX_MIP_FILTER_NONE                = 0x00000000,
  SQ_TEX_MIP_FILTER_POINT               = 0x00000001,
  SQ_TEX_MIP_FILTER_LINEAR              = 0x00000002,
  SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ__VI = 0x00000003,
} SQ_TEX_MIP_FILTER;

typedef enum SQ_TEX_CLAMP {
  SQ_TEX_WRAP                    = 0x00000000,
  SQ_TEX_MIRROR                  = 0x00000001,
  SQ_TEX_CLAMP_LAST_TEXEL        = 0x00000002,
  SQ_TEX_MIRROR_ONCE_LAST_TEXEL  = 0x00000003,
  SQ_TEX_CLAMP_HALF_BORDER       = 0x00000004,
  SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005,
  SQ_TEX_CLAMP_BORDER            = 0x00000006,
  SQ_TEX_MIRROR_ONCE_BORDER      = 0x00000007,
} SQ_TEX_CLAMP;

typedef enum SQ_TEX_BORDER_COLOR {
  SQ_TEX_BORDER_COLOR_TRANS_BLACK  = 0x00000000,
  SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001,
  SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002,
  SQ_TEX_BORDER_COLOR_REGISTER     = 0x00000003,
} SQ_TEX_BORDER_COLOR;

typedef enum TEX_BC_SWIZZLE {
TEX_BC_Swizzle_XYZW = 0x00000000,
TEX_BC_Swizzle_XWYZ = 0x00000001,
TEX_BC_Swizzle_WZYX = 0x00000002,
TEX_BC_Swizzle_WXYZ = 0x00000003,
TEX_BC_Swizzle_ZYXW = 0x00000004,
TEX_BC_Swizzle_YXWZ = 0x00000005,
} TEX_BC_SWIZZLE;

typedef struct metadata_amd_nv_s {
    uint32_t version;  // Must be 1
    uint32_t vendorID;  // AMD
    SQ_IMG_RSRC_WORD0 word0;
    SQ_IMG_RSRC_WORD1 word1;
    SQ_IMG_RSRC_WORD2 word2;
    SQ_IMG_RSRC_WORD3 word3;
    SQ_IMG_RSRC_WORD4 word4;
    SQ_IMG_RSRC_WORD5 word5;
    SQ_IMG_RSRC_WORD6 word6;
    SQ_IMG_RSRC_WORD7 word7;
    uint32_t mip_offsets[0];
} metadata_amd_nv_t;

}  // namespace image
}  // namespace rocr
#endif  // EXT_IMAGE_RESOURCE_NV_H_


================================================
FILE: runtime/hsa-runtime/image/util.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_EXT_IMAGE_UTIL_H
#define HSA_RUNTIME_EXT_IMAGE_UTIL_H

#include "stdint.h"
#include "stddef.h"
#include "stdlib.h"
#include <assert.h>
#include <iostream>
#include <string>
#include <algorithm>

#include "inc/hsa.h"

namespace rocr {
namespace image {

#if defined(_MSC_VER)
#define ALIGNED_(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define ALIGNED_(x) __attribute__((aligned(x)))
#endif  // __GNUC__
#endif // _MSC_VER

#define MULTILINE(...) # __VA_ARGS__

#define ASSERT_SIZE_UINT32(desc)                                                                   \
  static_assert(sizeof(desc) == sizeof(uint32_t), #desc " size should be 32-bits");

}  // namespace image
}  // namespace rocr


#if defined(__GNUC__)
#include "mm_malloc.h"
#if defined(__i386__) || defined(__x86_64__)
#include <x86intrin.h>
#elif defined(__loongarch64)
#else
#error                                                                                             \
    "Processor not identified.  " \
            "Need to provide a lightweight approximate clock interface (aka __rdtsc())."
#endif

namespace rocr {
namespace image {

#define __forceinline __inline__ __attribute__((always_inline))
static __forceinline void __debugbreak() { __builtin_trap(); }
#define __declspec(x) __attribute__((x))
#undef __stdcall
#define __stdcall  // __attribute__((__stdcall__))
#define __ALIGNED__(x) __attribute__((aligned(x)))

static __forceinline void* _aligned_malloc(size_t size, size_t alignment) {
#ifdef _ISOC11_SOURCE
  return aligned_alloc(alignment, size);
#else
  void* mem = NULL;
  if (0 != posix_memalign(&mem, alignment, size)) return NULL;
  return mem;
#endif
}
static __forceinline void _aligned_free(void* ptr) { return free(ptr); }
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
#include "intrin.h"
#define __ALIGNED__(x) __declspec(align(x))

namespace rocr {
namespace image {
#else
#error "Compiler and/or processor not identified."
#endif

// A macro to disallow the copy and move constructor and operator= functions
#define DISALLOW_COPY_AND_ASSIGN(TypeName)                                                         \
  TypeName(const TypeName&) = delete;                                                              \
  TypeName(TypeName&&) = delete;                                                                   \
  void operator=(const TypeName&) = delete;                                                        \
  void operator=(TypeName&&) = delete;

template <typename lambda> class ScopeGuard {
 public:
  explicit __forceinline ScopeGuard(const lambda& release) : release_(release), dismiss_(false) {}

  ScopeGuard(ScopeGuard& rhs) { *this = rhs; }

  __forceinline ~ScopeGuard() {
    if (!dismiss_) release_();
  }
  __forceinline ScopeGuard& operator=(ScopeGuard& rhs) {
    dismiss_ = rhs.dismiss_;
    release_ = rhs.release_;
    rhs.dismiss_ = true;
    return *this;
  }
  __forceinline void Dismiss() { dismiss_ = true; }

 private:
  lambda release_;
  bool dismiss_;
};

template <typename lambda> static __forceinline ScopeGuard<lambda> MakeScopeGuard(lambda rel) {
  return ScopeGuard<lambda>(rel);
}

#define MAKE_SCOPE_GUARD_HELPER(lname, sname, ...)                                                 \
  auto lname = __VA_ARGS__;                                                                        \
  ScopeGuard<decltype(lname)> sname(lname);
#define MAKE_SCOPE_GUARD(...)                                                                      \
  MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), PASTE(scopeGuard, __COUNTER__),    \
                          __VA_ARGS__)
#define MAKE_NAMED_SCOPE_GUARD(name, ...)                                                          \
  MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), name, __VA_ARGS__)

/// @brief: Finds out the min one of two inputs, input must support ">"
/// operator.
/// @param: a(Input), a reference to type T.
/// @param: b(Input), a reference to type T.
/// @return: T.
template <class T> static __forceinline T Min(const T& a, const T& b) { return (a > b) ? b : a; }

template <class T, class... Arg> static __forceinline T Min(const T& a, const T& b, Arg... args) {
  return Min(a, Min(b, args...));
}

/// @brief: Find out the max one of two inputs, input must support ">" operator.
/// @param: a(Input), a reference to type T.
/// @param: b(Input), a reference to type T.
/// @return: T.
template <class T> static __forceinline T Max(const T& a, const T& b) { return (b > a) ? b : a; }

template <class T, class... Arg> static __forceinline T Max(const T& a, const T& b, Arg... args) {
  return Max(a, Max(b, args...));
}

/// @brief: Free the memory space which is newed previously.
/// @param: ptr(Input), a pointer to memory space. Can't be NULL.
/// @return: void.
struct DeleteObject {
  template <typename T> void operator()(const T* ptr) const { delete ptr; }
};

/// @brief: Checks if a value is power of two, if it is, return true. Be careful
/// when passing 0.
/// @param: val(Input), the data to be checked.
/// @return: bool.
template <typename T>
static __forceinline bool IsPowerOfTwo(T val) {
  return (val & (val - 1)) == 0;
}

/// @brief: Calculates the floor value aligned based on parameter of alignment.
/// If value is at the boundary of alignment, it is unchanged.
/// @param: value(Input), value to be calculated.
/// @param: alignment(Input), alignment value.
/// @return: T.
template <typename T>
static __forceinline T AlignDown(T value, size_t alignment) {
  assert(IsPowerOfTwo(alignment));
  return (T)(value & ~(alignment - 1));
}

/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: T*, pointer to type T.
template <typename T>
static __forceinline T* AlignDown(T* value, size_t alignment) {
  return (T*)AlignDown((intptr_t)value, alignment);
}

/// @brief: Calculates the ceiling value aligned based on parameter of
/// alignment.
/// If value is at the boundary of alignment, it is unchanged.
/// @param: value(Input), value to be calculated.
/// @param: alignment(Input), alignment value.
/// @param: T.
template <typename T>
static __forceinline T AlignUp(T value, size_t alignment) {
  return AlignDown((T)(value + alignment - 1), alignment);
}

/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: T*, pointer to type T.
template <typename T>
static __forceinline T* AlignUp(T* value, size_t alignment) {
  return (T*)AlignDown((intptr_t)((uint8_t*)value + alignment - 1), alignment);
}

/// @brief: Checks if the input value is at the boundary of alignment, if it is,
/// @return true.
/// @param: value(Input), value to be checked.
/// @param: alignment(Input), alignment value.
/// @return: bool.
template <typename T>
static __forceinline bool IsMultipleOf(T value, size_t alignment) {
  return (AlignUp(value, alignment) == value);
}

/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: bool.
template <typename T>
static __forceinline bool IsMultipleOf(T* value, size_t alignment) {
  return (AlignUp(value, alignment) == value);
}

static __forceinline uint32_t NextPow2(uint32_t value) {
  if (value == 0) return 1;
  uint32_t v = value - 1;
  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;
  return v + 1;
}

static __forceinline uint64_t NextPow2(uint64_t value) {
  if (value == 0) return 1;
  uint64_t v = value - 1;
  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;
  v |= v >> 32;
  return v + 1;
}

static __forceinline bool strIsEmpty(const char* str) noexcept { return str[0] == '\0'; }

static __forceinline std::string& ltrim(std::string& s) {
  auto it = std::find_if(s.begin(), s.end(),
                         [](char c) { return !std::isspace<char>(c, std::locale::classic()); });
  s.erase(s.begin(), it);
  return s;
}

static __forceinline std::string& rtrim(std::string& s) {
  auto it = std::find_if(s.rbegin(), s.rend(),
                         [](char c) { return !std::isspace<char>(c, std::locale::classic()); });
  s.erase(it.base(), s.end());
  return s;
}

static __forceinline std::string& trim(std::string& s) { return ltrim(rtrim(s)); }

template<uint32_t lowBit, uint32_t highBit, typename T>
static __forceinline uint32_t BitSelect(T p) {
  static_assert(sizeof(T) <= sizeof(uintptr_t), "Type out of range.");
  static_assert(highBit < sizeof(uintptr_t)*8, "Bit index out of range.");

  uintptr_t ptr = p;
  if(highBit != (sizeof(uintptr_t)*8-1))
    return (uint32_t)((ptr & ((1ull<<(highBit+1))-1)) >> lowBit);
  else
    return (uint32_t)(ptr >> lowBit);
}

inline uint32_t PtrLow16Shift8(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFULL) >> 8);
}

inline uint32_t PtrHigh64Shift16(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFFFFFFFFF0000ULL) >> 16);
}

inline uint32_t PtrLow40Shift8(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFFFFFFFULL) >> 8);
}

inline uint32_t PtrHigh64Shift40(const void* p) {
  uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
  return (uint32_t)((ptr & 0xFFFFFF0000000000ULL) >> 40);
}

inline uint32_t PtrLow32(const void* p) {
  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
}

inline uint32_t PtrHigh32(const void* p) {
  uint32_t ptr = 0;
#ifdef HSA_LARGE_MODEL
  ptr = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p) >> 32);
#endif
  return ptr;
}

}  // namespace image
}  // namespace rocr

#endif  // HSA_RUNTIME_EXT_IMAGE_UTIL_H


================================================
FILE: runtime/hsa-runtime/inc/Brig.h
================================================
// University of Illinois/NCSA
// Open Source License
//
// Copyright (c) 2013-2015, Advanced Micro Devices, Inc.
// All rights reserved.
//
// Developed by:
//
//     HSA Team
//
//     Advanced Micro Devices, Inc
//
//     www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal with
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
//
//     * Redistributions of source code must retain the above copyright notice,
//       this list of conditions and the following disclaimers.
//
//     * Redistributions in binary form must reproduce the above copyright notice,
//       this list of conditions and the following disclaimers in the
//       documentation and/or other materials provided with the distribution.
//
//     * Neither the names of the LLVM Team, University of Illinois at
//       Urbana-Champaign, nor the names of its contributors may be used to
//       endorse or promote products derived from this Software without specific
//       prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
// SOFTWARE.

#ifndef INCLUDED_BRIG_H
#define INCLUDED_BRIG_H

#include <stddef.h>   /* size_t */
#include <stdint.h>   /* uintXX_t */

#ifdef __cplusplus
extern "C" {
#endif  /* __cplusplus */

/*========================================================================================*/
/* =======================================================================================*/
/* =======================================================================================*/
/* =======================================================================================*/

typedef uint32_t BrigCodeOffset32_t;
typedef uint32_t BrigOperandOffset32_t;
typedef uint32_t BrigDataOffset32_t;

typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
typedef BrigDataOffset32_t BrigDataOffsetString32_t;

typedef uint32_t BrigVersion32_t;
enum BrigVersion {
    BRIG_VERSION_HSAIL_MAJOR = 1,
    BRIG_VERSION_HSAIL_MINOR = 0,
    BRIG_VERSION_BRIG_MAJOR  = 1,
    BRIG_VERSION_BRIG_MINOR  = 0
};

typedef uint16_t BrigKind16_t;
enum BrigKind {
    BRIG_KIND_NONE = 0x0000,

    BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
        BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
        BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
        BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
        BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
        BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
        BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
        BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
        BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
        BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
        BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
        BRIG_KIND_DIRECTIVE_LOC = 0x100a,
        BRIG_KIND_DIRECTIVE_MODULE = 0x100b,
        BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
        BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,
        BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
    BRIG_KIND_DIRECTIVE_END = 0x100f,

    BRIG_KIND_INST_BEGIN = 0x2000,
        BRIG_KIND_INST_ADDR = 0x2000,
        BRIG_KIND_INST_ATOMIC = 0x2001,
        BRIG_KIND_INST_BASIC = 0x2002,
        BRIG_KIND_INST_BR = 0x2003,
        BRIG_KIND_INST_CMP = 0x2004,
        BRIG_KIND_INST_CVT = 0x2005,
        BRIG_KIND_INST_IMAGE = 0x2006,
        BRIG_KIND_INST_LANE = 0x2007,
        BRIG_KIND_INST_MEM = 0x2008,
        BRIG_KIND_INST_MEM_FENCE = 0x2009,
        BRIG_KIND_INST_MOD = 0x200a,
        BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
        BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
        BRIG_KIND_INST_QUEUE = 0x200d,
        BRIG_KIND_INST_SEG = 0x200e,
        BRIG_KIND_INST_SEG_CVT = 0x200f,
        BRIG_KIND_INST_SIGNAL = 0x2010,
        BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
    BRIG_KIND_INST_END = 0x2012,

    BRIG_KIND_OPERAND_BEGIN = 0x3000,
        BRIG_KIND_OPERAND_ADDRESS = 0x3000,
        BRIG_KIND_OPERAND_ALIGN = 0x3001,
        BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
        BRIG_KIND_OPERAND_CODE_REF = 0x3003,
        BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
        BRIG_KIND_OPERAND_RESERVED = 0x3005,
        BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
        BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
        BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
        BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
        BRIG_KIND_OPERAND_REGISTER = 0x300a,
        BRIG_KIND_OPERAND_STRING = 0x300b,
        BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
    BRIG_KIND_OPERAND_END = 0x300d
};

typedef uint8_t BrigAlignment8_t;
enum BrigAlignment {
    BRIG_ALIGNMENT_NONE = 0,
    BRIG_ALIGNMENT_1 = 1,
    BRIG_ALIGNMENT_2 = 2,
    BRIG_ALIGNMENT_4 = 3,
    BRIG_ALIGNMENT_8 = 4,
    BRIG_ALIGNMENT_16 = 5,
    BRIG_ALIGNMENT_32 = 6,
    BRIG_ALIGNMENT_64 = 7,
    BRIG_ALIGNMENT_128 = 8,
    BRIG_ALIGNMENT_256 = 9,
    BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_256
};

typedef uint8_t BrigAllocation8_t;
enum BrigAllocation {
    BRIG_ALLOCATION_NONE = 0,
    BRIG_ALLOCATION_PROGRAM = 1,
    BRIG_ALLOCATION_AGENT = 2,
    BRIG_ALLOCATION_AUTOMATIC = 3
};

typedef uint8_t BrigAluModifier8_t;
enum BrigAluModifierMask {
    BRIG_ALU_FTZ = 1
};

typedef uint8_t BrigAtomicOperation8_t;
enum BrigAtomicOperation {
    BRIG_ATOMIC_ADD = 0,
    BRIG_ATOMIC_AND = 1,
    BRIG_ATOMIC_CAS = 2,
    BRIG_ATOMIC_EXCH = 3,
    BRIG_ATOMIC_LD = 4,
    BRIG_ATOMIC_MAX = 5,
    BRIG_ATOMIC_MIN = 6,
    BRIG_ATOMIC_OR = 7,
    BRIG_ATOMIC_ST = 8,
    BRIG_ATOMIC_SUB = 9,
    BRIG_ATOMIC_WRAPDEC = 10,
    BRIG_ATOMIC_WRAPINC = 11,
    BRIG_ATOMIC_XOR = 12,
    BRIG_ATOMIC_WAIT_EQ = 13,
    BRIG_ATOMIC_WAIT_NE = 14,
    BRIG_ATOMIC_WAIT_LT = 15,
    BRIG_ATOMIC_WAIT_GTE = 16,
    BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
    BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
    BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
    BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
};

typedef uint8_t BrigCompareOperation8_t;
enum BrigCompareOperation {
    BRIG_COMPARE_EQ = 0,
    BRIG_COMPARE_NE = 1,
    BRIG_COMPARE_LT = 2,
    BRIG_COMPARE_LE = 3,
    BRIG_COMPARE_GT = 4,
    BRIG_COMPARE_GE = 5,
    BRIG_COMPARE_EQU = 6,
    BRIG_COMPARE_NEU = 7,
    BRIG_COMPARE_LTU = 8,
    BRIG_COMPARE_LEU = 9,
    BRIG_COMPARE_GTU = 10,
    BRIG_COMPARE_GEU = 11,
    BRIG_COMPARE_NUM = 12,
    BRIG_COMPARE_NAN = 13,
    BRIG_COMPARE_SEQ = 14,
    BRIG_COMPARE_SNE = 15,
    BRIG_COMPARE_SLT = 16,
    BRIG_COMPARE_SLE = 17,
    BRIG_COMPARE_SGT = 18,
    BRIG_COMPARE_SGE = 19,
    BRIG_COMPARE_SGEU = 20,
    BRIG_COMPARE_SEQU = 21,
    BRIG_COMPARE_SNEU = 22,
    BRIG_COMPARE_SLTU = 23,
    BRIG_COMPARE_SLEU = 24,
    BRIG_COMPARE_SNUM = 25,
    BRIG_COMPARE_SNAN = 26,
    BRIG_COMPARE_SGTU = 27
};

typedef uint16_t BrigControlDirective16_t;
enum BrigControlDirective {
    BRIG_CONTROL_NONE = 0,
    BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
    BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
    BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
    BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
    BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
    BRIG_CONTROL_REQUIREDDIM = 6,
    BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
    BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
    BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
};

typedef uint8_t BrigExecutableModifier8_t;
enum BrigExecutableModifierMask {
    BRIG_EXECUTABLE_DEFINITION = 1
};

typedef uint8_t BrigImageChannelOrder8_t;
enum BrigImageChannelOrder {
    BRIG_CHANNEL_ORDER_A = 0,
    BRIG_CHANNEL_ORDER_R = 1,
    BRIG_CHANNEL_ORDER_RX = 2,
    BRIG_CHANNEL_ORDER_RG = 3,
    BRIG_CHANNEL_ORDER_RGX = 4,
    BRIG_CHANNEL_ORDER_RA = 5,
    BRIG_CHANNEL_ORDER_RGB = 6,
    BRIG_CHANNEL_ORDER_RGBX = 7,
    BRIG_CHANNEL_ORDER_RGBA = 8,
    BRIG_CHANNEL_ORDER_BGRA = 9,
    BRIG_CHANNEL_ORDER_ARGB = 10,
    BRIG_CHANNEL_ORDER_ABGR = 11,
    BRIG_CHANNEL_ORDER_SRGB = 12,
    BRIG_CHANNEL_ORDER_SRGBX = 13,
    BRIG_CHANNEL_ORDER_SRGBA = 14,
    BRIG_CHANNEL_ORDER_SBGRA = 15,
    BRIG_CHANNEL_ORDER_INTENSITY = 16,
    BRIG_CHANNEL_ORDER_LUMINANCE = 17,
    BRIG_CHANNEL_ORDER_DEPTH = 18,
    BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,

    BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128
};

typedef uint8_t BrigImageChannelType8_t;
enum BrigImageChannelType {
    BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
    BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
    BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
    BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
    BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
    BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
    BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
    BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
    BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
    BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
    BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
    BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
    BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
    BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
    BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
    BRIG_CHANNEL_TYPE_FLOAT = 15,

    BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128
};

typedef uint8_t BrigImageGeometry8_t;
enum BrigImageGeometry {
    BRIG_GEOMETRY_1D = 0,
    BRIG_GEOMETRY_2D = 1,
    BRIG_GEOMETRY_3D = 2,
    BRIG_GEOMETRY_1DA = 3,
    BRIG_GEOMETRY_2DA = 4,
    BRIG_GEOMETRY_1DB = 5,
    BRIG_GEOMETRY_2DDEPTH = 6,
    BRIG_GEOMETRY_2DADEPTH = 7,

    BRIG_GEOMETRY_FIRST_USER_DEFINED = 128
};

typedef uint8_t BrigImageQuery8_t;
enum BrigImageQuery {
    BRIG_IMAGE_QUERY_WIDTH = 0,
    BRIG_IMAGE_QUERY_HEIGHT = 1,
    BRIG_IMAGE_QUERY_DEPTH = 2,
    BRIG_IMAGE_QUERY_ARRAY = 3,
    BRIG_IMAGE_QUERY_CHANNELORDER = 4,
    BRIG_IMAGE_QUERY_CHANNELTYPE = 5,

    BRIG_IMAGE_QUERY_FIRST_USER_DEFINED = 6
};

typedef uint8_t BrigLinkage8_t;
enum BrigLinkage {
    BRIG_LINKAGE_NONE = 0,
    BRIG_LINKAGE_PROGRAM = 1,
    BRIG_LINKAGE_MODULE = 2,
    BRIG_LINKAGE_FUNCTION = 3,
    BRIG_LINKAGE_ARG = 4
};

typedef uint8_t BrigMachineModel8_t;
enum BrigMachineModel {
    BRIG_MACHINE_SMALL = 0,
    BRIG_MACHINE_LARGE = 1,
};

typedef uint8_t BrigMemoryModifier8_t;
enum BrigMemoryModifierMask {
    BRIG_MEMORY_CONST = 1
};

typedef uint8_t BrigMemoryOrder8_t;
enum BrigMemoryOrder {
    BRIG_MEMORY_ORDER_NONE = 0,
    BRIG_MEMORY_ORDER_RELAXED = 1,
    BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
    BRIG_MEMORY_ORDER_SC_RELEASE = 3,
    BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4,
};

typedef uint8_t BrigMemoryScope8_t;
enum BrigMemoryScope {
    BRIG_MEMORY_SCOPE_NONE = 0,
    BRIG_MEMORY_SCOPE_WORKITEM = 1,
    BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
    BRIG_MEMORY_SCOPE_WORKGROUP = 3,
    BRIG_MEMORY_SCOPE_AGENT = 4,
    BRIG_MEMORY_SCOPE_SYSTEM = 5,
};

typedef uint16_t BrigOpcode16_t;
enum BrigOpcode {
    BRIG_OPCODE_NOP = 0,
    BRIG_OPCODE_ABS = 1,
    BRIG_OPCODE_ADD = 2,
    BRIG_OPCODE_BORROW = 3,
    BRIG_OPCODE_CARRY = 4,
    BRIG_OPCODE_CEIL = 5,
    BRIG_OPCODE_COPYSIGN = 6,
    BRIG_OPCODE_DIV = 7,
    BRIG_OPCODE_FLOOR = 8,
    BRIG_OPCODE_FMA = 9,
    BRIG_OPCODE_FRACT = 10,
    BRIG_OPCODE_MAD = 11,
    BRIG_OPCODE_MAX = 12,
    BRIG_OPCODE_MIN = 13,
    BRIG_OPCODE_MUL = 14,
    BRIG_OPCODE_MULHI = 15,
    BRIG_OPCODE_NEG = 16,
    BRIG_OPCODE_REM = 17,
    BRIG_OPCODE_RINT = 18,
    BRIG_OPCODE_SQRT = 19,
    BRIG_OPCODE_SUB = 20,
    BRIG_OPCODE_TRUNC = 21,
    BRIG_OPCODE_MAD24 = 22,
    BRIG_OPCODE_MAD24HI = 23,
    BRIG_OPCODE_MUL24 = 24,
    BRIG_OPCODE_MUL24HI = 25,
    BRIG_OPCODE_SHL = 26,
    BRIG_OPCODE_SHR = 27,
    BRIG_OPCODE_AND = 28,
    BRIG_OPCODE_NOT = 29,
    BRIG_OPCODE_OR = 30,
    BRIG_OPCODE_POPCOUNT = 31,
    BRIG_OPCODE_XOR = 32,
    BRIG_OPCODE_BITEXTRACT = 33,
    BRIG_OPCODE_BITINSERT = 34,
    BRIG_OPCODE_BITMASK = 35,
    BRIG_OPCODE_BITREV = 36,
    BRIG_OPCODE_BITSELECT = 37,
    BRIG_OPCODE_FIRSTBIT = 38,
    BRIG_OPCODE_LASTBIT = 39,
    BRIG_OPCODE_COMBINE = 40,
    BRIG_OPCODE_EXPAND = 41,
    BRIG_OPCODE_LDA = 42,
    BRIG_OPCODE_MOV = 43,
    BRIG_OPCODE_SHUFFLE = 44,
    BRIG_OPCODE_UNPACKHI = 45,
    BRIG_OPCODE_UNPACKLO = 46,
    BRIG_OPCODE_PACK = 47,
    BRIG_OPCODE_UNPACK = 48,
    BRIG_OPCODE_CMOV = 49,
    BRIG_OPCODE_CLASS = 50,
    BRIG_OPCODE_NCOS = 51,
    BRIG_OPCODE_NEXP2 = 52,
    BRIG_OPCODE_NFMA = 53,
    BRIG_OPCODE_NLOG2 = 54,
    BRIG_OPCODE_NRCP = 55,
    BRIG_OPCODE_NRSQRT = 56,
    BRIG_OPCODE_NSIN = 57,
    BRIG_OPCODE_NSQRT = 58,
    BRIG_OPCODE_BITALIGN = 59,
    BRIG_OPCODE_BYTEALIGN = 60,
    BRIG_OPCODE_PACKCVT = 61,
    BRIG_OPCODE_UNPACKCVT = 62,
    BRIG_OPCODE_LERP = 63,
    BRIG_OPCODE_SAD = 64,
    BRIG_OPCODE_SADHI = 65,
    BRIG_OPCODE_SEGMENTP = 66,
    BRIG_OPCODE_FTOS = 67,
    BRIG_OPCODE_STOF = 68,
    BRIG_OPCODE_CMP = 69,
    BRIG_OPCODE_CVT = 70,
    BRIG_OPCODE_LD = 71,
    BRIG_OPCODE_ST = 72,
    BRIG_OPCODE_ATOMIC = 73,
    BRIG_OPCODE_ATOMICNORET = 74,
    BRIG_OPCODE_SIGNAL = 75,
    BRIG_OPCODE_SIGNALNORET = 76,
    BRIG_OPCODE_MEMFENCE = 77,
    BRIG_OPCODE_RDIMAGE = 78,
    BRIG_OPCODE_LDIMAGE = 79,
    BRIG_OPCODE_STIMAGE = 80,
    BRIG_OPCODE_IMAGEFENCE = 81,
    BRIG_OPCODE_QUERYIMAGE = 82,
    BRIG_OPCODE_QUERYSAMPLER = 83,
    BRIG_OPCODE_CBR = 84,
    BRIG_OPCODE_BR = 85,
    BRIG_OPCODE_SBR = 86,
    BRIG_OPCODE_BARRIER = 87,
    BRIG_OPCODE_WAVEBARRIER = 88,
    BRIG_OPCODE_ARRIVEFBAR = 89,
    BRIG_OPCODE_INITFBAR = 90,
    BRIG_OPCODE_JOINFBAR = 91,
    BRIG_OPCODE_LEAVEFBAR = 92,
    BRIG_OPCODE_RELEASEFBAR = 93,
    BRIG_OPCODE_WAITFBAR = 94,
    BRIG_OPCODE_LDF = 95,
    BRIG_OPCODE_ACTIVELANECOUNT = 96,
    BRIG_OPCODE_ACTIVELANEID = 97,
    BRIG_OPCODE_ACTIVELANEMASK = 98,
    BRIG_OPCODE_ACTIVELANEPERMUTE = 99,
    BRIG_OPCODE_CALL = 100,
    BRIG_OPCODE_SCALL = 101,
    BRIG_OPCODE_ICALL = 102,
    BRIG_OPCODE_RET = 103,
    BRIG_OPCODE_ALLOCA = 104,
    BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
    BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
    BRIG_OPCODE_DIM = 107,
    BRIG_OPCODE_GRIDGROUPS = 108,
    BRIG_OPCODE_GRIDSIZE = 109,
    BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
    BRIG_OPCODE_PACKETID = 111,
    BRIG_OPCODE_WORKGROUPID = 112,
    BRIG_OPCODE_WORKGROUPSIZE = 113,
    BRIG_OPCODE_WORKITEMABSID = 114,
    BRIG_OPCODE_WORKITEMFLATABSID = 115,
    BRIG_OPCODE_WORKITEMFLATID = 116,
    BRIG_OPCODE_WORKITEMID = 117,
    BRIG_OPCODE_CLEARDETECTEXCEPT = 118,
    BRIG_OPCODE_GETDETECTEXCEPT = 119,
    BRIG_OPCODE_SETDETECTEXCEPT = 120,
    BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,
    BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,
    BRIG_OPCODE_LDQUEUEREADINDEX = 123,
    BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,
    BRIG_OPCODE_STQUEUEREADINDEX = 125,
    BRIG_OPCODE_STQUEUEWRITEINDEX = 126,
    BRIG_OPCODE_CLOCK = 127,
    BRIG_OPCODE_CUID = 128,
    BRIG_OPCODE_DEBUGTRAP = 129,
    BRIG_OPCODE_GROUPBASEPTR = 130,
    BRIG_OPCODE_KERNARGBASEPTR = 131,
    BRIG_OPCODE_LANEID = 132,
    BRIG_OPCODE_MAXCUID = 133,
    BRIG_OPCODE_MAXWAVEID = 134,
    BRIG_OPCODE_NULLPTR = 135,
    BRIG_OPCODE_WAVEID = 136,

    BRIG_OPCODE_FIRST_USER_DEFINED = 32768,
};

typedef uint8_t BrigPack8_t;
enum BrigPack {
    BRIG_PACK_NONE = 0,
    BRIG_PACK_PP = 1,
    BRIG_PACK_PS = 2,
    BRIG_PACK_SP = 3,
    BRIG_PACK_SS = 4,
    BRIG_PACK_S = 5,
    BRIG_PACK_P = 6,
    BRIG_PACK_PPSAT = 7,
    BRIG_PACK_PSSAT = 8,
    BRIG_PACK_SPSAT = 9,
    BRIG_PACK_SSSAT = 10,
    BRIG_PACK_SSAT = 11,
    BRIG_PACK_PSAT = 12
};

typedef uint8_t BrigProfile8_t;
enum BrigProfile {
    BRIG_PROFILE_BASE = 0,
    BRIG_PROFILE_FULL = 1,
};

typedef uint16_t BrigRegisterKind16_t;
enum BrigRegisterKind {
    BRIG_REGISTER_KIND_CONTROL = 0,
    BRIG_REGISTER_KIND_SINGLE = 1,
    BRIG_REGISTER_KIND_DOUBLE = 2,
    BRIG_REGISTER_KIND_QUAD = 3
};

typedef uint8_t BrigRound8_t;
enum BrigRound {
    BRIG_ROUND_NONE = 0,
    BRIG_ROUND_FLOAT_DEFAULT = 1,
    BRIG_ROUND_FLOAT_NEAR_EVEN = 2,
    BRIG_ROUND_FLOAT_ZERO = 3,
    BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,
    BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,
    BRIG_ROUND_INTEGER_NEAR_EVEN = 6,
    BRIG_ROUND_INTEGER_ZERO = 7,
    BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,
    BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,
    BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,
    BRIG_ROUND_INTEGER_ZERO_SAT = 11,
    BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,
    BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,
    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,
    BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,
    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,
    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,
    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,
    BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,
    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,
    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21
};

typedef uint8_t BrigSamplerAddressing8_t;
enum BrigSamplerAddressing {
    BRIG_ADDRESSING_UNDEFINED = 0,
    BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
    BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
    BRIG_ADDRESSING_REPEAT = 3,
    BRIG_ADDRESSING_MIRRORED_REPEAT = 4,

    BRIG_ADDRESSING_FIRST_USER_DEFINED = 128
};

typedef uint8_t BrigSamplerCoordNormalization8_t;
enum BrigSamplerCoordNormalization {
    BRIG_COORD_UNNORMALIZED = 0,
    BRIG_COORD_NORMALIZED = 1
};

typedef uint8_t BrigSamplerFilter8_t;
enum BrigSamplerFilter {
    BRIG_FILTER_NEAREST = 0,
    BRIG_FILTER_LINEAR = 1,

    BRIG_FILTER_FIRST_USER_DEFINED = 128
};

typedef uint8_t BrigSamplerQuery8_t;
enum BrigSamplerQuery {
    BRIG_SAMPLER_QUERY_ADDRESSING = 0,
    BRIG_SAMPLER_QUERY_COORD = 1,
    BRIG_SAMPLER_QUERY_FILTER = 2
};

typedef uint32_t BrigSectionIndex32_t;
enum BrigSectionIndex {
    BRIG_SECTION_INDEX_DATA = 0,
    BRIG_SECTION_INDEX_CODE = 1,
    BRIG_SECTION_INDEX_OPERAND = 2,

    BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
};

typedef uint8_t BrigSegCvtModifier8_t;
enum BrigSegCvtModifierMask {
    BRIG_SEG_CVT_NONULL = 1
};

typedef uint8_t BrigSegment8_t;
enum BrigSegment {
    BRIG_SEGMENT_NONE = 0,
    BRIG_SEGMENT_FLAT = 1,
    BRIG_SEGMENT_GLOBAL = 2,
    BRIG_SEGMENT_READONLY = 3,
    BRIG_SEGMENT_KERNARG = 4,
    BRIG_SEGMENT_GROUP = 5,
    BRIG_SEGMENT_PRIVATE = 6,
    BRIG_SEGMENT_SPILL = 7,
    BRIG_SEGMENT_ARG = 8,

    BRIG_SEGMENT_FIRST_USER_DEFINED = 128
};

enum {
    BRIG_TYPE_BASE_SIZE  = 5,
    BRIG_TYPE_PACK_SIZE  = 2,
    BRIG_TYPE_ARRAY_SIZE = 1,

    BRIG_TYPE_BASE_SHIFT  = 0,
    BRIG_TYPE_PACK_SHIFT  = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
    BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,

    BRIG_TYPE_BASE_MASK  = ((1 << BRIG_TYPE_BASE_SIZE)  - 1) << BRIG_TYPE_BASE_SHIFT,
    BRIG_TYPE_PACK_MASK  = ((1 << BRIG_TYPE_PACK_SIZE)  - 1) << BRIG_TYPE_PACK_SHIFT,
    BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,

    BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
    BRIG_TYPE_PACK_32   = 1 << BRIG_TYPE_PACK_SHIFT,
    BRIG_TYPE_PACK_64   = 2 << BRIG_TYPE_PACK_SHIFT,
    BRIG_TYPE_PACK_128  = 3 << BRIG_TYPE_PACK_SHIFT,

    BRIG_TYPE_ARRAY     = 1 << BRIG_TYPE_ARRAY_SHIFT
};

typedef uint16_t BrigType16_t;
enum BrigType {
    BRIG_TYPE_NONE  = 0,
    BRIG_TYPE_U8    = 1,
    BRIG_TYPE_U16   = 2,
    BRIG_TYPE_U32   = 3,
    BRIG_TYPE_U64   = 4,
    BRIG_TYPE_S8    = 5,
    BRIG_TYPE_S16   = 6,
    BRIG_TYPE_S32   = 7,
    BRIG_TYPE_S64   = 8,
    BRIG_TYPE_F16   = 9,
    BRIG_TYPE_F32   = 10,
    BRIG_TYPE_F64   = 11,
    BRIG_TYPE_B1    = 12,
    BRIG_TYPE_B8    = 13,
    BRIG_TYPE_B16   = 14,
    BRIG_TYPE_B32   = 15,
    BRIG_TYPE_B64   = 16,
    BRIG_TYPE_B128  = 17,
    BRIG_TYPE_SAMP  = 18,
    BRIG_TYPE_ROIMG = 19,
    BRIG_TYPE_WOIMG = 20,
    BRIG_TYPE_RWIMG = 21,
    BRIG_TYPE_SIG32 = 22,
    BRIG_TYPE_SIG64 = 23,

    BRIG_TYPE_U8X4  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_32,
    BRIG_TYPE_U8X8  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_64,
    BRIG_TYPE_U8X16 = BRIG_TYPE_U8  | BRIG_TYPE_PACK_128,
    BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
    BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
    BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
    BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_S8X4  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_32,
    BRIG_TYPE_S8X8  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_64,
    BRIG_TYPE_S8X16 = BRIG_TYPE_S8  | BRIG_TYPE_PACK_128,
    BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
    BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
    BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
    BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
    BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
    BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
    BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
    BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128,

    BRIG_TYPE_U8_ARRAY    = BRIG_TYPE_U8    | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U16_ARRAY   = BRIG_TYPE_U16   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U32_ARRAY   = BRIG_TYPE_U32   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U64_ARRAY   = BRIG_TYPE_U64   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S8_ARRAY    = BRIG_TYPE_S8    | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S16_ARRAY   = BRIG_TYPE_S16   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S32_ARRAY   = BRIG_TYPE_S32   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S64_ARRAY   = BRIG_TYPE_S64   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F16_ARRAY   = BRIG_TYPE_F16   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F32_ARRAY   = BRIG_TYPE_F32   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F64_ARRAY   = BRIG_TYPE_F64   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_B8_ARRAY    = BRIG_TYPE_B8    | BRIG_TYPE_ARRAY,
    BRIG_TYPE_B16_ARRAY   = BRIG_TYPE_B16   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_B32_ARRAY   = BRIG_TYPE_B32   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_B64_ARRAY   = BRIG_TYPE_B64   | BRIG_TYPE_ARRAY,
    BRIG_TYPE_B128_ARRAY  = BRIG_TYPE_B128  | BRIG_TYPE_ARRAY,
    BRIG_TYPE_SAMP_ARRAY  = BRIG_TYPE_SAMP  | BRIG_TYPE_ARRAY,
    BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,
    BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,
    BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,
    BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U8X4_ARRAY  = BRIG_TYPE_U8X4  | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U8X8_ARRAY  = BRIG_TYPE_U8X8  | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S8X4_ARRAY  = BRIG_TYPE_S8X4  | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S8X8_ARRAY  = BRIG_TYPE_S8X8  | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,
    BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY,
};

typedef uint8_t BrigVariableModifier8_t;
enum BrigVariableModifierMask {
    BRIG_VARIABLE_DEFINITION = 1,
    BRIG_VARIABLE_CONST = 2
};

typedef uint8_t BrigWidth8_t;
enum BrigWidth {
    BRIG_WIDTH_NONE = 0,
    BRIG_WIDTH_1 = 1,
    BRIG_WIDTH_2 = 2,
    BRIG_WIDTH_4 = 3,
    BRIG_WIDTH_8 = 4,
    BRIG_WIDTH_16 = 5,
    BRIG_WIDTH_32 = 6,
    BRIG_WIDTH_64 = 7,
    BRIG_WIDTH_128 = 8,
    BRIG_WIDTH_256 = 9,
    BRIG_WIDTH_512 = 10,
    BRIG_WIDTH_1024 = 11,
    BRIG_WIDTH_2048 = 12,
    BRIG_WIDTH_4096 = 13,
    BRIG_WIDTH_8192 = 14,
    BRIG_WIDTH_16384 = 15,
    BRIG_WIDTH_32768 = 16,
    BRIG_WIDTH_65536 = 17,
    BRIG_WIDTH_131072 = 18,
    BRIG_WIDTH_262144 = 19,
    BRIG_WIDTH_524288 = 20,
    BRIG_WIDTH_1048576 = 21,
    BRIG_WIDTH_2097152 = 22,
    BRIG_WIDTH_4194304 = 23,
    BRIG_WIDTH_8388608 = 24,
    BRIG_WIDTH_16777216 = 25,
    BRIG_WIDTH_33554432 = 26,
    BRIG_WIDTH_67108864 = 27,
    BRIG_WIDTH_134217728 = 28,
    BRIG_WIDTH_268435456 = 29,
    BRIG_WIDTH_536870912 = 30,
    BRIG_WIDTH_1073741824 = 31,
    BRIG_WIDTH_2147483648 = 32,
    BRIG_WIDTH_WAVESIZE = 33,
    BRIG_WIDTH_ALL = 34,
};

struct BrigUInt64 {
    uint32_t lo;
    uint32_t hi;
};

struct BrigBase {
    uint16_t byteCount;
    BrigKind16_t kind;
};

struct BrigData {
    uint32_t byteCount;
    uint8_t bytes[1];
};

struct BrigDirectiveArgBlock {
    BrigBase base;
};

struct BrigDirectiveComment {
    BrigBase base;
    BrigDataOffsetString32_t name;
};

struct BrigDirectiveControl {
    BrigBase base;
    BrigControlDirective16_t control;
    uint16_t reserved;
    BrigDataOffsetOperandList32_t operands;
};

struct BrigDirectiveExecutable {
    BrigBase base;
    BrigDataOffsetString32_t name;
    uint16_t outArgCount;
    uint16_t inArgCount;
    BrigCodeOffset32_t firstInArg;
    BrigCodeOffset32_t firstCodeBlockEntry;
    BrigCodeOffset32_t nextModuleEntry;
    BrigExecutableModifier8_t modifier;
    BrigLinkage8_t linkage;
    uint16_t reserved;
};

struct BrigDirectiveExtension {
    BrigBase base;
    BrigDataOffsetString32_t name;
};

struct BrigDirectiveFbarrier {
    BrigBase base;
    BrigDataOffsetString32_t name;
    BrigVariableModifier8_t modifier;
    BrigLinkage8_t linkage;
    uint16_t reserved;
};

struct BrigDirectiveLabel {
    BrigBase base;
    BrigDataOffsetString32_t name;
};

struct BrigDirectiveLoc {
    BrigBase base;
    BrigDataOffsetString32_t filename;
    uint32_t line;
    uint32_t column;
};

struct BrigDirectiveNone {
    BrigBase base;
};

struct BrigDirectivePragma {
    BrigBase base;
    BrigDataOffsetOperandList32_t operands;
};

struct BrigDirectiveVariable {
    BrigBase base;
    BrigDataOffsetString32_t name;
    BrigOperandOffset32_t init;
    BrigType16_t type;
    BrigSegment8_t segment;
    BrigAlignment8_t align;
    BrigUInt64 dim;
    BrigVariableModifier8_t modifier;
    BrigLinkage8_t linkage;
    BrigAllocation8_t allocation;
    uint8_t reserved;
};

struct BrigDirectiveModule {
    BrigBase base;
    BrigDataOffsetString32_t name;
    BrigVersion32_t hsailMajor;
    BrigVersion32_t hsailMinor;
    BrigProfile8_t profile;
    BrigMachineModel8_t machineModel;
    BrigRound8_t defaultFloatRound;
    uint8_t reserved;
};

struct BrigInstBase {
    BrigBase base;
    BrigOpcode16_t opcode;
    BrigType16_t type;
    BrigDataOffsetOperandList32_t operands;
};

struct BrigInstAddr {
    BrigInstBase base;
    BrigSegment8_t segment;
    uint8_t reserved[3];
};

struct BrigInstAtomic {
    BrigInstBase base;
    BrigSegment8_t segment;
    BrigMemoryOrder8_t memoryOrder;
    BrigMemoryScope8_t memoryScope;
    BrigAtomicOperation8_t atomicOperation;
    uint8_t equivClass;
    uint8_t reserved[3];
};

struct BrigInstBasic {
    BrigInstBase base;
};

struct BrigInstBr {
    BrigInstBase base;
    BrigWidth8_t width;
    uint8_t reserved[3];
};

struct BrigInstCmp {
    BrigInstBase base;
    BrigType16_t sourceType;
    BrigAluModifier8_t modifier;
    BrigCompareOperation8_t compare;
    BrigPack8_t pack;
    uint8_t reserved[3];
};

struct BrigInstCvt {
    BrigInstBase base;
    BrigType16_t sourceType;
    BrigAluModifier8_t modifier;
    BrigRound8_t round;
};

struct BrigInstImage {
    BrigInstBase base;
    BrigType16_t imageType;
    BrigType16_t coordType;
    BrigImageGeometry8_t geometry;
    uint8_t equivClass;
    uint16_t reserved;
};

struct BrigInstLane {
    BrigInstBase base;
    BrigType16_t sourceType;
    BrigWidth8_t width;
    uint8_t reserved;
};

struct BrigInstMem {
    BrigInstBase base;
    BrigSegment8_t segment;
    BrigAlignment8_t align;
    uint8_t equivClass;
    BrigWidth8_t width;
    BrigMemoryModifier8_t modifier;
    uint8_t reserved[3];
};

struct BrigInstMemFence {
    BrigInstBase base;
    BrigMemoryOrder8_t memoryOrder;
    BrigMemoryScope8_t globalSegmentMemoryScope;
    BrigMemoryScope8_t groupSegmentMemoryScope;
    BrigMemoryScope8_t imageSegmentMemoryScope;
};

struct BrigInstMod {
    BrigInstBase base;
    BrigAluModifier8_t modifier;
    BrigRound8_t round;
    BrigPack8_t pack;
    uint8_t reserved;
};

struct BrigInstQueryImage {
    BrigInstBase base;
    BrigType16_t imageType;
    BrigImageGeometry8_t geometry;
    BrigImageQuery8_t query;
};

struct BrigInstQuerySampler {
    BrigInstBase base;
    BrigSamplerQuery8_t query;
    uint8_t reserved[3];
};

struct BrigInstQueue {
    BrigInstBase base;
    BrigSegment8_t segment;
    BrigMemoryOrder8_t memoryOrder;
    uint16_t reserved;
};

struct BrigInstSeg {
    BrigInstBase base;
    BrigSegment8_t segment;
    uint8_t reserved[3];
};

struct BrigInstSegCvt {
    BrigInstBase base;
    BrigType16_t sourceType;
    BrigSegment8_t segment;
    BrigSegCvtModifier8_t modifier;
};

struct BrigInstSignal {
    BrigInstBase base;
    BrigType16_t signalType;
    BrigMemoryOrder8_t memoryOrder;
    BrigAtomicOperation8_t signalOperation;
};

struct BrigInstSourceType {
    BrigInstBase base;
    BrigType16_t sourceType;
    uint16_t reserved;
};

struct BrigOperandAddress {
    BrigBase base;
    BrigCodeOffset32_t symbol;
    BrigOperandOffset32_t reg;
    BrigUInt64 offset;
};

struct BrigOperandAlign {
    BrigBase base;
    BrigAlignment8_t align;
    uint8_t reserved[3];
};

struct BrigOperandCodeList {
    BrigBase base;
    BrigDataOffsetCodeList32_t elements;
};

struct BrigOperandCodeRef {
    BrigBase base;
    BrigCodeOffset32_t ref;
};

struct BrigOperandConstantBytes {
    BrigBase base;
    BrigType16_t type;
    uint16_t reserved;
    BrigDataOffsetString32_t bytes;
};

struct BrigOperandConstantOperandList {
    BrigBase base;
    BrigType16_t type;
    uint16_t reserved;
    BrigDataOffsetOperandList32_t elements;
};

struct BrigOperandConstantImage {
    BrigBase base;
    BrigType16_t type;
    BrigImageGeometry8_t geometry;
    BrigImageChannelOrder8_t channelOrder;
    BrigImageChannelType8_t channelType;
    uint8_t reserved[3];
    BrigUInt64 width;
    BrigUInt64 height;
    BrigUInt64 depth;
    BrigUInt64 array;
};

struct BrigOperandOperandList {
    BrigBase base;
    BrigDataOffsetOperandList32_t elements;
};

struct BrigOperandRegister {
    BrigBase base;
    BrigRegisterKind16_t regKind;
    uint16_t regNum;
};

struct BrigOperandConstantSampler {
    BrigBase base;
    BrigType16_t type;
    BrigSamplerCoordNormalization8_t coord;
    BrigSamplerFilter8_t filter;
    BrigSamplerAddressing8_t addressing;
    uint8_t reserved[3];
};

struct BrigOperandString {
    BrigBase base;
    BrigDataOffsetString32_t string;
};

struct BrigOperandWavesize {
    BrigBase base;
};

typedef uint32_t BrigExceptions32_t;
enum BrigExceptionsMask {
    BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
    BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
    BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
    BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
    BRIG_EXCEPTIONS_INEXACT = 1 << 4,

    BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
};

struct BrigSectionHeader {
    uint64_t byteCount;
    uint32_t headerByteCount;
    uint32_t nameLength;
    uint8_t name[1];
};

struct BrigModuleHeader {
    char identification[8];
    BrigVersion32_t brigMajor;
    BrigVersion32_t brigMinor;
    uint64_t byteCount;
    uint8_t hash[64];
    uint32_t reserved;
    uint32_t sectionCount;
    uint64_t sectionIndex;
};

typedef BrigModuleHeader* BrigModule_t;

#ifdef __cplusplus
}
#endif  /*__cplusplus*/

#endif // defined(INCLUDED_BRIG_H)


================================================
FILE: runtime/hsa-runtime/inc/amd_hsa_common.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// The following set of header files provides definitions for AMD GPU
// Architecture:
//   - amd_hsa_common.h
//   - amd_hsa_elf.h
//   - amd_hsa_kernel_code.h
//   - amd_hsa_queue.h
//   - amd_hsa_signal.h
//
// Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more
// information.

#ifndef AMD_HSA_COMMON_H
#define AMD_HSA_COMMON_H

#include <stddef.h>
#include <stdint.h>

// Descriptive version of the HSA Application Binary Interface.
#define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)"

// Alignment attribute that specifies a minimum alignment (in bytes) for
// variables of the specified type.
#if defined(__GNUC__)
#  define __ALIGNED__(x) __attribute__((aligned(x)))
#elif defined(_MSC_VER)
#  define __ALIGNED__(x) __declspec(align(x))
#elif defined(RC_INVOKED)
#  define __ALIGNED__(x)
#else
#  error
#endif

// Creates enumeration entries for packed types. Enumeration entries include
// bit shift amount, bit width, and bit mask.
#define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width)                   \
  name##_SHIFT = (shift),                                                      \
  name##_WIDTH = (width),                                                      \
  name = (((1 << (width)) - 1) << (shift))                                     \

// Gets bits for specified mask from specified src packed instance.
#define AMD_HSA_BITS_GET(src, mask)                                            \
  ((src & mask) >> mask ## _SHIFT)                                             \

// Sets val bits for specified mask in specified dst packed instance.
#define AMD_HSA_BITS_SET(dst, mask, val)                                       \
  dst &= (~(1 << mask##_SHIFT) & ~mask);                                       \
  dst |= (((val) << mask##_SHIFT) & mask)                                      \

#endif // AMD_HSA_COMMON_H


================================================
FILE: runtime/hsa-runtime/inc/amd_hsa_elf.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// Undefine the macro in case it is defined in the system elf.h.
#undef EM_AMDGPU

#ifndef AMD_HSA_ELF_H
#define AMD_HSA_ELF_H

// AMD GPU Specific ELF Header Enumeration Values.
//
// Values are copied from LLVM BinaryFormat/ELF.h . This file also contains
// code object V1 defintions which are not part of the LLVM header. Code object
// V1 was only supported by the Finalizer which is now deprecated and removed.
//
// TODO: Deprecate and remove V1 support and replace this header with using the
// LLVM header.
namespace ELF {

// Machine architectures
// See current registered ELF machine architectures at:
//    http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
enum {
  EM_AMDGPU = 224,        // AMD GPU architecture
};

// OS ABI identification.
enum {
  ELFOSABI_AMDGPU_HSA = 64,    // AMD HSA runtime
};

// AMDGPU OS ABI Version identification.
enum {
  // ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification
  // was never defined for V1.
  ELFABIVERSION_AMDGPU_HSA_V2 = 0,
  ELFABIVERSION_AMDGPU_HSA_V3 = 1,
  ELFABIVERSION_AMDGPU_HSA_V4 = 2,
  ELFABIVERSION_AMDGPU_HSA_V5 = 3,
  ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};

// AMDGPU specific e_flags.
enum : unsigned {
  // Processor selection mask for EF_AMDGPU_MACH_* values.
  EF_AMDGPU_MACH = 0x0ff,

  // Not specified processor.
  EF_AMDGPU_MACH_NONE = 0x000,

  // AMDGCN-based processors.
  // clang-format off
  EF_AMDGPU_MACH_AMDGCN_GFX600          = 0x020,
  EF_AMDGPU_MACH_AMDGCN_GFX601          = 0x021,
  EF_AMDGPU_MACH_AMDGCN_GFX700          = 0x022,
  EF_AMDGPU_MACH_AMDGCN_GFX701          = 0x023,
  EF_AMDGPU_MACH_AMDGCN_GFX702          = 0x024,
  EF_AMDGPU_MACH_AMDGCN_GFX703          = 0x025,
  EF_AMDGPU_MACH_AMDGCN_GFX704          = 0x026,
  EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27   = 0x027,
  EF_AMDGPU_MACH_AMDGCN_GFX801          = 0x028,
  EF_AMDGPU_MACH_AMDGCN_GFX802          = 0x029,
  EF_AMDGPU_MACH_AMDGCN_GFX803          = 0x02a,
  EF_AMDGPU_MACH_AMDGCN_GFX810          = 0x02b,
  EF_AMDGPU_MACH_AMDGCN_GFX900          = 0x02c,
  EF_AMDGPU_MACH_AMDGCN_GFX902          = 0x02d,
  EF_AMDGPU_MACH_AMDGCN_GFX904          = 0x02e,
  EF_AMDGPU_MACH_AMDGCN_GFX906          = 0x02f,
  EF_AMDGPU_MACH_AMDGCN_GFX908          = 0x030,
  EF_AMDGPU_MACH_AMDGCN_GFX909          = 0x031,
  EF_AMDGPU_MACH_AMDGCN_GFX90C          = 0x032,
  EF_AMDGPU_MACH_AMDGCN_GFX1010         = 0x033,
  EF_AMDGPU_MACH_AMDGCN_GFX1011         = 0x034,
  EF_AMDGPU_MACH_AMDGCN_GFX1012         = 0x035,
  EF_AMDGPU_MACH_AMDGCN_GFX1030         = 0x036,
  EF_AMDGPU_MACH_AMDGCN_GFX1031         = 0x037,
  EF_AMDGPU_MACH_AMDGCN_GFX1032         = 0x038,
  EF_AMDGPU_MACH_AMDGCN_GFX1033         = 0x039,
  EF_AMDGPU_MACH_AMDGCN_GFX602          = 0x03a,
  EF_AMDGPU_MACH_AMDGCN_GFX705          = 0x03b,
  EF_AMDGPU_MACH_AMDGCN_GFX805          = 0x03c,
  EF_AMDGPU_MACH_AMDGCN_GFX1035         = 0x03d,
  EF_AMDGPU_MACH_AMDGCN_GFX1034         = 0x03e,
  EF_AMDGPU_MACH_AMDGCN_GFX90A          = 0x03f,
  EF_AMDGPU_MACH_AMDGCN_GFX940          = 0x040,
  EF_AMDGPU_MACH_AMDGCN_GFX1100         = 0x041,
  EF_AMDGPU_MACH_AMDGCN_GFX1013         = 0x042,
  EF_AMDGPU_MACH_AMDGCN_GFX1150         = 0x043,
  EF_AMDGPU_MACH_AMDGCN_GFX1103         = 0x044,
  EF_AMDGPU_MACH_AMDGCN_GFX1036         = 0x045,
  EF_AMDGPU_MACH_AMDGCN_GFX1101         = 0x046,
  EF_AMDGPU_MACH_AMDGCN_GFX1102         = 0x047,
  EF_AMDGPU_MACH_AMDGCN_GFX1200         = 0x048,
  EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49   = 0x049,
  EF_AMDGPU_MACH_AMDGCN_GFX1151         = 0x04a,
  EF_AMDGPU_MACH_AMDGCN_GFX941          = 0x04b,
  EF_AMDGPU_MACH_AMDGCN_GFX942          = 0x04c,
  EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D   = 0x04d,
  EF_AMDGPU_MACH_AMDGCN_GFX1201         = 0x04e,
  EF_AMDGPU_MACH_AMDGCN_GFX950          = 0x04f,
  EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50   = 0x050,
  EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC    = 0x051,
  EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
  EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
  EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC   = 0x054,
  EF_AMDGPU_MACH_AMDGCN_GFX1152         = 0x055,
  EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56   = 0x056,
  EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57   = 0x057,
  EF_AMDGPU_MACH_AMDGCN_GFX1153         = 0x058,
  EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC   = 0x059,
  EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC  = 0x05f,
  // clang-format on

  // First/last AMDGCN-based processors.
  EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC,

  // Indicates if the "xnack" target feature is enabled for all code contained
  // in the object.
  //
  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
  EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
  // Indicates if the trap handler is enabled for all code contained
  // in the object.
  //
  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
  EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,

  // Indicates if the "xnack" target feature is enabled for all code contained
  // in the object.
  //
  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
  EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
  // Indicates if the "sramecc" target feature is enabled for all code
  // contained in the object.
  //
  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
  EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,

  // XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
  //
  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
  EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
  // XNACK is not supported.
  EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
  // XNACK is any/default/unspecified.
  EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
  // XNACK is off.
  EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
  // XNACK is on.
  EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,

  // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
  //
  // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
  EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
  // SRAMECC is not supported.
  EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
  // SRAMECC is any/default/unspecified.
  EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
  // SRAMECC is off.
  EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
  // SRAMECC is on.
  EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,

  // Generic target versioning. This is contained in the list byte of EFLAGS.
  EF_AMDGPU_GENERIC_VERSION = 0xff000000,
  EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
  EF_AMDGPU_GENERIC_VERSION_MIN = 1,
  EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
};

// ELF Relocation types for AMDGPU.
enum : unsigned {
  R_AMDGPU_ABS32_LO = 1,
  R_AMDGPU_ABS32_HI = 2,
  R_AMDGPU_ABS64 = 3,
  R_AMDGPU_ABS32 = 6,
  R_AMDGPU_RELATIVE64 = 13,
};

} // end namespace ELF

// ELF Section Header Flag Enumeration Values.
#define SHF_AMDGPU_HSA_GLOBAL   (0x00100000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_READONLY (0x00200000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_CODE     (0x00400000 & SHF_MASKOS)
#define SHF_AMDGPU_HSA_AGENT    (0x00800000 & SHF_MASKOS)

//
typedef enum {
  AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM = 0,
  AMDGPU_HSA_SEGMENT_GLOBAL_AGENT = 1,
  AMDGPU_HSA_SEGMENT_READONLY_AGENT = 2,
  AMDGPU_HSA_SEGMENT_CODE_AGENT = 3,
  AMDGPU_HSA_SEGMENT_LAST,
} amdgpu_hsa_elf_segment_t;

// ELF Program Header Type Enumeration Values.
#define PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM)
#define PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT   (PT_LOOS + AMDGPU_HSA_SEGMENT_GLOBAL_AGENT)
#define PT_AMDGPU_HSA_LOAD_READONLY_AGENT (PT_LOOS + AMDGPU_HSA_SEGMENT_READONLY_AGENT)
#define PT_AMDGPU_HSA_LOAD_CODE_AGENT     (PT_LOOS + AMDGPU_HSA_SEGMENT_CODE_AGENT)

// ELF Symbol Type Enumeration Values.
#define STT_AMDGPU_HSA_KERNEL            (STT_LOOS + 0)
#define STT_AMDGPU_HSA_INDIRECT_FUNCTION (STT_LOOS + 1)
#define STT_AMDGPU_HSA_METADATA          (STT_LOOS + 2)

// ELF Symbol Binding Enumeration Values.
#define STB_AMDGPU_HSA_EXTERNAL (STB_LOOS + 0)

// ELF Symbol Other Information Creation/Retrieval.
#define ELF64_ST_AMDGPU_ALLOCATION(o)  (((o) >> 2) & 0x3)
#define ELF64_ST_AMDGPU_FLAGS(o)       ((o) >> 4)
#define ELF64_ST_AMDGPU_OTHER(f, a, v) (((f) << 4) + (((a) & 0x3) << 2) + ((v) & 0x3))

typedef enum {
  AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT = 0,
  AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM = 1,
  AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT = 2,
  AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT = 3,
  AMDGPU_HSA_SYMBOL_ALLOCATION_LAST,
} amdgpu_hsa_symbol_allocation_t;

// ELF Symbol Allocation Enumeration Values.
#define STA_AMDGPU_HSA_DEFAULT        AMDGPU_HSA_SYMBOL_ALLOCATION_DEFAULT
#define STA_AMDGPU_HSA_GLOBAL_PROGRAM AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_PROGRAM
#define STA_AMDGPU_HSA_GLOBAL_AGENT   AMDGPU_HSA_SYMBOL_ALLOCATION_GLOBAL_AGENT
#define STA_AMDGPU_HSA_READONLY_AGENT AMDGPU_HSA_SYMBOL_ALLOCATION_READONLY_AGENT

typedef enum {
  AMDGPU_HSA_SYMBOL_FLAG_DEFAULT = 0,
  AMDGPU_HSA_SYMBOL_FLAG_CONST = 1,
  AMDGPU_HSA_SYMBOL_FLAG_LAST,
} amdgpu_hsa_symbol_flag_t;

// ELF Symbol Flag Enumeration Values.
#define STF_AMDGPU_HSA_CONST AMDGPU_HSA_SYMBOL_FLAG_CONST

// Legacy/V1 AMD GPU Relocation Type Enumeration Values.
#define R_AMDGPU_V1_NONE         0
#define R_AMDGPU_V1_32_LOW       1
#define R_AMDGPU_V1_32_HIGH      2
#define R_AMDGPU_V1_64           3
#define R_AMDGPU_V1_INIT_SAMPLER 4
#define R_AMDGPU_V1_INIT_IMAGE   5
#define R_AMDGPU_V1_RELATIVE64   13

// AMD GPU Note Type Enumeration Values.
#define NT_AMD_HSA_CODE_OBJECT_VERSION 1
#define NT_AMD_HSA_HSAIL               2
#define NT_AMD_HSA_ISA_VERSION         3
#define NT_AMD_HSA_PRODUCER            4
#define NT_AMD_HSA_PRODUCER_OPTIONS    5
#define NT_AMD_HSA_EXTENSION           6
#define NT_AMD_HSA_ISA_NAME            11
/* AMDGPU snapshots of runtime, agent and queues state for use in core dump */
#define NT_AMDGPU_CORE_STATE           33
#define NT_AMD_HSA_HLDEBUG_DEBUG       101
#define NT_AMD_HSA_HLDEBUG_TARGET      102

// AMD GPU Metadata Kind Enumeration Values.
typedef uint16_t amdgpu_hsa_metadata_kind16_t;
typedef enum {
  AMDGPU_HSA_METADATA_KIND_NONE = 0,
  AMDGPU_HSA_METADATA_KIND_INIT_SAMP = 1,
  AMDGPU_HSA_METADATA_KIND_INIT_ROIMG = 2,
  AMDGPU_HSA_METADATA_KIND_INIT_WOIMG = 3,
  AMDGPU_HSA_METADATA_KIND_INIT_RWIMG = 4
} amdgpu_hsa_metadata_kind_t;

// AMD GPU Sampler Coordinate Normalization Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_coord8_t;
typedef enum {
  AMDGPU_HSA_SAMPLER_COORD_UNNORMALIZED = 0,
  AMDGPU_HSA_SAMPLER_COORD_NORMALIZED = 1
} amdgpu_hsa_sampler_coord_t;

// AMD GPU Sampler Filter Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_filter8_t;
typedef enum {
  AMDGPU_HSA_SAMPLER_FILTER_NEAREST = 0,
  AMDGPU_HSA_SAMPLER_FILTER_LINEAR = 1
} amdgpu_hsa_sampler_filter_t;

// AMD GPU Sampler Addressing Enumeration Values.
typedef uint8_t amdgpu_hsa_sampler_addressing8_t;
typedef enum {
  AMDGPU_HSA_SAMPLER_ADDRESSING_UNDEFINED = 0,
  AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_EDGE = 1,
  AMDGPU_HSA_SAMPLER_ADDRESSING_CLAMP_TO_BORDER = 2,
  AMDGPU_HSA_SAMPLER_ADDRESSING_REPEAT = 3,
  AMDGPU_HSA_SAMPLER_ADDRESSING_MIRRORED_REPEAT = 4
} amdgpu_hsa_sampler_addressing_t;

// AMD GPU Sampler Descriptor.
typedef struct amdgpu_hsa_sampler_descriptor_s {
  uint16_t size;
  amdgpu_hsa_metadata_kind16_t kind;
  amdgpu_hsa_sampler_coord8_t coord;
  amdgpu_hsa_sampler_filter8_t filter;
  amdgpu_hsa_sampler_addressing8_t addressing;
  uint8_t reserved1;
} amdgpu_hsa_sampler_descriptor_t;

// AMD GPU Image Geometry Enumeration Values.
typedef uint8_t amdgpu_hsa_image_geometry8_t;
typedef enum {
  AMDGPU_HSA_IMAGE_GEOMETRY_1D = 0,
  AMDGPU_HSA_IMAGE_GEOMETRY_2D = 1,
  AMDGPU_HSA_IMAGE_GEOMETRY_3D = 2,
  AMDGPU_HSA_IMAGE_GEOMETRY_1DA = 3,
  AMDGPU_HSA_IMAGE_GEOMETRY_2DA = 4,
  AMDGPU_HSA_IMAGE_GEOMETRY_1DB = 5,
  AMDGPU_HSA_IMAGE_GEOMETRY_2DDEPTH = 6,
  AMDGPU_HSA_IMAGE_GEOMETRY_2DADEPTH = 7
} amdgpu_hsa_image_geometry_t;

// AMD GPU Image Channel Order Enumeration Values.
typedef uint8_t amdgpu_hsa_image_channel_order8_t;
typedef enum {
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_A = 0,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_R = 1,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RX = 2,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RG = 3,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGX = 4,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RA = 5,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGB = 6,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBX = 7,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_RGBA = 8,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_BGRA = 9,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ARGB = 10,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_ABGR = 11,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGB = 12,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBX = 13,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SRGBA = 14,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_SBGRA = 15,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH = 18,
  AMDGPU_HSA_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
} amdgpu_hsa_image_channel_order_t;

// AMD GPU Image Channel Type Enumeration Values.
typedef uint8_t amdgpu_hsa_image_channel_type8_t;
typedef enum {
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_555 = 5,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SHORT_565 = 6,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_INT_101010 = 7,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
  AMDGPU_HSA_IMAGE_CHANNEL_TYPE_FLOAT = 15
} amdgpu_hsa_image_channel_type_t;

// AMD GPU Image Descriptor.
typedef struct amdgpu_hsa_image_descriptor_s {
  uint16_t size;
  amdgpu_hsa_metadata_kind16_t kind;
  amdgpu_hsa_image_geometry8_t geometry;
  amdgpu_hsa_image_channel_order8_t channel_order;
  amdgpu_hsa_image_channel_type8_t channel_type;
  uint8_t reserved1;
  uint64_t width;
  uint64_t height;
  uint64_t depth;
  uint64_t array;
} amdgpu_hsa_image_descriptor_t;

typedef struct amdgpu_hsa_note_code_object_version_s {
  uint32_t major_version;
  uint32_t minor_version;
} amdgpu_hsa_note_code_object_version_t;

typedef struct amdgpu_hsa_note_hsail_s {
  uint32_t hsail_major_version;
  uint32_t hsail_minor_version;
  uint8_t profile;
  uint8_t machine_model;
  uint8_t default_float_round;
} amdgpu_hsa_note_hsail_t;

typedef struct amdgpu_hsa_note_isa_s {
  uint16_t vendor_name_size;
  uint16_t architecture_name_size;
  uint32_t major;
  uint32_t minor;
  uint32_t stepping;
  char vendor_and_architecture_name[1];
} amdgpu_hsa_note_isa_t;

typedef struct amdgpu_hsa_note_producer_s {
  uint16_t producer_name_size;
  uint16_t reserved;
  uint32_t producer_major_version;
  uint32_t producer_minor_version;
  char producer_name[1];
} amdgpu_hsa_note_producer_t;

typedef struct amdgpu_hsa_note_producer_options_s {
  uint16_t producer_options_size;
  char producer_options[1];
} amdgpu_hsa_note_producer_options_t;

typedef enum {
  AMDGPU_HSA_RODATA_GLOBAL_PROGRAM = 0,
  AMDGPU_HSA_RODATA_GLOBAL_AGENT,
  AMDGPU_HSA_RODATA_READONLY_AGENT,
  AMDGPU_HSA_DATA_GLOBAL_PROGRAM,
  AMDGPU_HSA_DATA_GLOBAL_AGENT,
  AMDGPU_HSA_DATA_READONLY_AGENT,
  AMDGPU_HSA_BSS_GLOBAL_PROGRAM,
  AMDGPU_HSA_BSS_GLOBAL_AGENT,
  AMDGPU_HSA_BSS_READONLY_AGENT,
  AMDGPU_HSA_SECTION_LAST,
} amdgpu_hsa_elf_section_t;

#endif // AMD_HSA_ELF_H


================================================
FILE: runtime/hsa-runtime/inc/amd_hsa_kernel_code.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_KERNEL_CODE_H
#define AMD_HSA_KERNEL_CODE_H

#include "amd_hsa_common.h"
#include "hsa.h"

// AMD Kernel Code Version Enumeration Values.
typedef uint32_t amd_kernel_code_version32_t;
enum amd_kernel_code_version_t {
  AMD_KERNEL_CODE_VERSION_MAJOR = 1,
  AMD_KERNEL_CODE_VERSION_MINOR = 1
};

// AMD Machine Kind Enumeration Values.
typedef uint16_t amd_machine_kind16_t;
enum amd_machine_kind_t {
  AMD_MACHINE_KIND_UNDEFINED = 0,
  AMD_MACHINE_KIND_AMDGPU = 1
};

// AMD Machine Version.
typedef uint16_t amd_machine_version16_t;

// AMD Float Round Mode Enumeration Values.
enum amd_float_round_mode_t {
  AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
  AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
  AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
  AMD_FLOAT_ROUND_MODE_ZERO = 3
};

// AMD Float Denorm Mode Enumeration Values.
enum amd_float_denorm_mode_t {
  AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
  AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
  AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
  AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
};

// AMD Compute Program Resource Register One.
typedef uint32_t amd_compute_pgm_rsrc_one32_t;
enum amd_compute_pgm_rsrc_one_t {
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
};

// AMD System VGPR Workitem ID Enumeration Values.
enum amd_system_vgpr_workitem_id_t {
  AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
  AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
  AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
  AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
};

// AMD Compute Program Resource Register Two.
typedef uint32_t amd_compute_pgm_rsrc_two32_t;
enum amd_compute_pgm_rsrc_two_t {
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
};

// AMD Element Byte Size Enumeration Values.
enum amd_element_byte_size_t {
  AMD_ELEMENT_BYTE_SIZE_2 = 0,
  AMD_ELEMENT_BYTE_SIZE_4 = 1,
  AMD_ELEMENT_BYTE_SIZE_8 = 2,
  AMD_ELEMENT_BYTE_SIZE_16 = 3
};

// AMD Kernel Code Properties.
typedef uint32_t amd_kernel_code_properties32_t;
enum amd_kernel_code_properties_t {
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_WAVEFRONT_SIZE32, 10, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 11, 5),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
};

// AMD Power Of Two Enumeration Values.
typedef uint8_t amd_powertwo8_t;
enum amd_powertwo_t {
  AMD_POWERTWO_1 = 0,
  AMD_POWERTWO_2 = 1,
  AMD_POWERTWO_4 = 2,
  AMD_POWERTWO_8 = 3,
  AMD_POWERTWO_16 = 4,
  AMD_POWERTWO_32 = 5,
  AMD_POWERTWO_64 = 6,
  AMD_POWERTWO_128 = 7,
  AMD_POWERTWO_256 = 8
};

// AMD Enabled Control Directive Enumeration Values.
typedef uint64_t amd_enabled_control_directive64_t;
enum amd_enabled_control_directive_t {
  AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
  AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
  AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
  AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
  AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
  AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
};

// AMD Exception Kind Enumeration Values.
typedef uint16_t amd_exception_kind16_t;
enum amd_exception_kind_t {
  AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
  AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
  AMD_EXCEPTION_KIND_OVERFLOW = 4,
  AMD_EXCEPTION_KIND_UNDERFLOW = 8,
  AMD_EXCEPTION_KIND_INEXACT = 16
};

// AMD Control Directives.
#define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
#define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
  amd_enabled_control_directive64_t enabled_control_directives;
  uint16_t enable_break_exceptions;
  uint16_t enable_detect_exceptions;
  uint32_t max_dynamic_group_size;
  uint64_t max_flat_grid_size;
  uint32_t max_flat_workgroup_size;
  uint8_t required_dim;
  uint8_t reserved1[3];
  uint64_t required_grid_size[3];
  uint32_t required_workgroup_size[3];
  uint8_t reserved2[60];
} amd_control_directives_t;

// AMD Kernel Code.
#define AMD_ISA_ALIGN_BYTES 256
#define AMD_KERNEL_CODE_ALIGN_BYTES 64
#define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
  amd_kernel_code_version32_t amd_kernel_code_version_major;
  amd_kernel_code_version32_t amd_kernel_code_version_minor;
  amd_machine_kind16_t amd_machine_kind;
  amd_machine_version16_t amd_machine_version_major;
  amd_machine_version16_t amd_machine_version_minor;
  amd_machine_version16_t amd_machine_version_stepping;
  int64_t kernel_code_entry_byte_offset;
  int64_t kernel_code_prefetch_byte_offset;
  uint64_t kernel_code_prefetch_byte_size;
  uint64_t max_scratch_backing_memory_byte_size;
  amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
  amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
  amd_kernel_code_properties32_t kernel_code_properties;
  uint32_t workitem_private_segment_byte_size;
  uint32_t workgroup_group_segment_byte_size;
  uint32_t gds_segment_byte_size;
  uint64_t kernarg_segment_byte_size;
  uint32_t workgroup_fbarrier_count;
  uint16_t wavefront_sgpr_count;
  uint16_t workitem_vgpr_count;
  uint16_t reserved_vgpr_first;
  uint16_t reserved_vgpr_count;
  uint16_t reserved_sgpr_first;
  uint16_t reserved_sgpr_count;
  uint16_t debug_wavefront_private_segment_offset_sgpr;
  uint16_t debug_private_segment_buffer_sgpr;
  amd_powertwo8_t kernarg_segment_alignment;
  amd_powertwo8_t group_segment_alignment;
  amd_powertwo8_t private_segment_alignment;
  amd_powertwo8_t wavefront_size;
  int32_t call_convention;
  uint8_t reserved1[12];
  uint64_t runtime_loader_kernel_symbol;
  amd_control_directives_t control_directives;
} amd_kernel_code_t;

// TODO: this struct should be completely gone once debugger designs/implements
// Debugger APIs.
typedef struct amd_runtime_loader_debug_info_s {
  const void* elf_raw;
  size_t elf_size;
  const char *kernel_name;
  const void *owning_segment;
} amd_runtime_loader_debug_info_t;

#endif // AMD_HSA_KERNEL_CODE_H


================================================
FILE: runtime/hsa-runtime/inc/amd_hsa_queue.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_QUEUE_H
#define AMD_HSA_QUEUE_H

#include "amd_hsa_common.h"
#include "hsa.h"

// AMD Queue Properties.
typedef uint32_t amd_queue_properties32_t;
enum amd_queue_properties_t {
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE, 4, 1),
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 5, 27)
};

// AMD Queue.
#define AMD_QUEUE_ALIGN_BYTES 64
#define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES)

// AMD Queue Capabilities.
typedef uint32_t amd_queue_capabilities32_t;
enum amd_queue_capabilities_t {
  /* This version of CP FW supports dual-scratch and async-reclaim */
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_CP_ASYNC_RECLAIM, 0, 1),

  /*
   * This version of ROCr supports async-reclaim and CP FW may access the
   * V2 fields.
   */
  AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM, 1, 1),
};

/* This is the original amd_queue_t structure. The definition is only kept
 * for reference purposes. This structure should not be used. */
typedef struct AMD_QUEUE_ALIGN amd_queue_s {
  hsa_queue_t hsa_queue;
  uint32_t caps;
  uint32_t reserved1[3];
  volatile uint64_t write_dispatch_id;
  uint32_t group_segment_aperture_base_hi;
  uint32_t private_segment_aperture_base_hi;
  uint32_t max_cu_id;
  uint32_t max_wave_id;
  volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
  volatile uint32_t legacy_doorbell_lock;
  uint32_t reserved2[9];
  volatile uint64_t read_dispatch_id;
  uint32_t read_dispatch_id_field_base_byte_offset;
  uint32_t compute_tmpring_size;
  uint32_t scratch_resource_descriptor[4];
  uint64_t scratch_backing_memory_location;
  uint32_t reserved3[2];
  uint32_t scratch_wave64_lane_byte_size;
  amd_queue_properties32_t queue_properties;
  uint32_t reserved4[2];
  hsa_signal_t queue_inactive_signal;
  uint32_t reserved5[14];
} amd_queue_t;

/*
 * AMD_QUEUE Version 2
 * amd_queue_v2_t is backwards compatible with amd_queue_t structure and can
 * be used with previous versions of CP FW. The added fields tagged as V2 are
 * ignored when running previous versions of CP FW.
 * CP FW will not try to access elements beyond the original 64-bytes
 * (sizeof(amd_queue_t)) unless the AMD_QUEUE_CAPS_SW_ASYNC_RECLAIM bit is set.
 */

#define MAX_NUM_XCC 128
typedef struct scratch_last_used_index_xcc_s {
  volatile uint64_t main;
  volatile uint64_t alt;
} scratch_last_used_index_xcc_t;

typedef struct AMD_QUEUE_ALIGN amd_queue_v2_s {
  hsa_queue_t hsa_queue;
  uint32_t caps;
  uint32_t reserved1[3];
  volatile uint64_t write_dispatch_id;
  uint32_t group_segment_aperture_base_hi;
  uint32_t private_segment_aperture_base_hi;
  uint32_t max_cu_id;
  uint32_t max_wave_id;
  volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1;
  volatile uint32_t legacy_doorbell_lock;
  uint32_t reserved2[9];
  volatile uint64_t read_dispatch_id;
  uint32_t read_dispatch_id_field_base_byte_offset;
  uint32_t compute_tmpring_size;
  uint32_t scratch_resource_descriptor[4];
  uint64_t scratch_backing_memory_location;
  uint64_t scratch_backing_memory_byte_size;
  uint32_t scratch_wave64_lane_byte_size;
  amd_queue_properties32_t queue_properties;
  volatile uint64_t scratch_max_use_index;       /* V2 */
  hsa_signal_t queue_inactive_signal;
  volatile uint64_t alt_scratch_max_use_index;  /* V2 */
  uint32_t alt_scratch_resource_descriptor[4];   /* V2 */
  uint64_t alt_scratch_backing_memory_location;  /* V2 */
  uint32_t alt_scratch_dispatch_limit_x;         /* V2 */
  uint32_t alt_scratch_dispatch_limit_y;         /* V2 */
  uint32_t alt_scratch_dispatch_limit_z;         /* V2 */
  uint32_t alt_scratch_wave64_lane_byte_size;    /* V2 */
  uint32_t alt_compute_tmpring_size;             /* V2 */
  uint32_t reserved5;

  scratch_last_used_index_xcc_t scratch_last_used_index[MAX_NUM_XCC];
} amd_queue_v2_t;

#endif // AMD_HSA_QUEUE_H


================================================
FILE: runtime/hsa-runtime/inc/amd_hsa_signal.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_SIGNAL_H
#define AMD_HSA_SIGNAL_H

#include "amd_hsa_common.h"
#include "amd_hsa_queue.h"

// AMD Signal Kind Enumeration Values.
typedef int64_t amd_signal_kind64_t;
enum amd_signal_kind_t {
  AMD_SIGNAL_KIND_INVALID = 0,
  AMD_SIGNAL_KIND_USER = 1,
  AMD_SIGNAL_KIND_DOORBELL = -1,
  AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2
};

// AMD Signal.
#define AMD_SIGNAL_ALIGN_BYTES 64
#define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES)
typedef struct AMD_SIGNAL_ALIGN amd_signal_s {
  amd_signal_kind64_t kind;
  union {
    volatile int64_t value;
    volatile uint64_t* hardware_doorbell_ptr;
  };
  uint64_t event_mailbox_ptr;
  uint32_t event_id;
  uint32_t reserved1;
  uint64_t start_ts;
  uint64_t end_ts;
  union {
    amd_queue_v2_t* queue_ptr;
    uint64_t reserved2;
  };
  uint32_t reserved3[2];
} amd_signal_t;

#endif // AMD_HSA_SIGNAL_H


================================================
FILE: runtime/hsa-runtime/inc/hsa.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_INC_HSA_H_
#define HSA_RUNTIME_INC_HSA_H_

#include <stddef.h>   /* size_t */
#include <stdint.h>   /* uintXX_t */

#ifndef __cplusplus
#include <stdbool.h>  /* bool */
#endif /* __cplusplus */

// Placeholder for calling convention and import/export macros
#ifndef HSA_CALL
#define HSA_CALL
#endif

#ifndef HSA_EXPORT_DECORATOR
#ifdef __GNUC__
#define HSA_EXPORT_DECORATOR __attribute__ ((visibility ("default")))
#else
#define HSA_EXPORT_DECORATOR
#endif
#endif
#define HSA_API_EXPORT HSA_EXPORT_DECORATOR HSA_CALL
#define HSA_API_IMPORT HSA_CALL

#if !defined(HSA_API) && defined(HSA_EXPORT)
#define HSA_API HSA_API_EXPORT
#else
#define HSA_API HSA_API_IMPORT
#endif

// Detect and set large model builds.
#undef HSA_LARGE_MODEL
#if defined(__LP64__) || defined(_M_X64)
#define HSA_LARGE_MODEL
#endif

// Try to detect CPU endianness
#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU)
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define LITTLEENDIAN_CPU
#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define BIGENDIAN_CPU
#elif defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
      defined(_M_X64) || defined(__loongarch64) || defined(__riscv)
#define LITTLEENDIAN_CPU
#endif
#endif

#undef HSA_LITTLE_ENDIAN
#if defined(LITTLEENDIAN_CPU)
#define HSA_LITTLE_ENDIAN
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif

#ifndef HSA_DEPRECATED
#define HSA_DEPRECATED
//#ifdef __GNUC__
//#define HSA_DEPRECATED __attribute__((deprecated))
//#else
//#define HSA_DEPRECATED __declspec(deprecated)
//#endif
#endif

#define HSA_VERSION_1_0                              1

#ifdef __cplusplus
extern "C" {
#endif  /* __cplusplus */

/** \addtogroup error-codes Error codes
 *  @{
 */

/**
 * @brief Status codes.
 */
typedef enum {
  /**
   * The function has been executed successfully.
   */
  HSA_STATUS_SUCCESS = 0x0,
  /**
   * A traversal over a list of elements has been interrupted by the
   * application before completing.
   */
  HSA_STATUS_INFO_BREAK = 0x1,
  /**
   * A generic error has occurred.
   */
  HSA_STATUS_ERROR = 0x1000,
  /**
   * One of the actual arguments does not meet a precondition stated in the
   * documentation of the corresponding formal argument.
   */
  HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001,
  /**
   * The requested queue creation is not valid.
   */
  HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002,
  /**
   * The requested allocation is not valid.
   */
  HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003,
  /**
   * The agent is invalid.
   */
  HSA_STATUS_ERROR_INVALID_AGENT = 0x1004,
  /**
   * The memory region is invalid.
   */
  HSA_STATUS_ERROR_INVALID_REGION = 0x1005,
  /**
   * The signal is invalid.
   */
  HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006,
  /**
   * The queue is invalid.
   */
  HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007,
  /**
   * The HSA runtime failed to allocate the necessary resources. This error
   * may also occur when the HSA runtime needs to spawn threads or create
   * internal OS-specific events.
   */
  HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008,
  /**
   * The AQL packet is malformed.
   */
  HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009,
  /**
   * An error has been detected while releasing a resource.
   */
  HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A,
  /**
   * An API other than ::hsa_init has been invoked while the reference count
   * of the HSA runtime is 0.
   */
  HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B,
  /**
   * The maximum reference count for the object has been reached.
   */
  HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C,
  /**
   * The arguments passed to a functions are not compatible.
   */
  HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D,
  /**
   * The index is invalid.
   */
  HSA_STATUS_ERROR_INVALID_INDEX = 0x100E,
  /**
   * The instruction set architecture is invalid.
   */
  HSA_STATUS_ERROR_INVALID_ISA = 0x100F,
  /**
   * The instruction set architecture name is invalid.
   */
  HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017,
  /**
   * The code object is invalid.
   */
  HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010,
  /**
   * The executable is invalid.
   */
  HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011,
  /**
   * The executable is frozen.
   */
  HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012,
  /**
   * There is no symbol with the given name.
   */
  HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013,
  /**
   * The variable is already defined.
   */
  HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014,
  /**
   * The variable is undefined.
   */
  HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015,
  /**
   * An HSAIL operation resulted in a hardware exception.
   */
  HSA_STATUS_ERROR_EXCEPTION = 0x1016,
  /**
   * The code object symbol is invalid.
   */
  HSA_STATUS_ERROR_INVALID_CODE_SYMBOL = 0x1018,
  /**
   * The executable symbol is invalid.
   */
  HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL = 0x1019,
  /**
   * The file descriptor is invalid.
   */
  HSA_STATUS_ERROR_INVALID_FILE = 0x1020,
  /**
   * The code object reader is invalid.
   */
  HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER = 0x1021,
  /**
   * The cache is invalid.
   */
  HSA_STATUS_ERROR_INVALID_CACHE = 0x1022,
  /**
   * The wavefront is invalid.
   */
  HSA_STATUS_ERROR_INVALID_WAVEFRONT = 0x1023,
  /**
   * The signal group is invalid.
   */
  HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP = 0x1024,
  /**
   * The HSA runtime is not in the configuration state.
   */
  HSA_STATUS_ERROR_INVALID_RUNTIME_STATE = 0x1025,
  /**
  * The queue received an error that may require process termination.
  */
  HSA_STATUS_ERROR_FATAL = 0x1026
} hsa_status_t;

/**
 * @brief Query additional information about a status code.
 *
 * @param[in] status Status code.
 *
 * @param[out] status_string A NUL-terminated string that describes the error
 * status.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p status is an invalid
 * status code, or @p status_string is NULL.
 */
hsa_status_t HSA_API hsa_status_string(
    hsa_status_t status,
    const char ** status_string);

/** @} */

/** \defgroup common Common Definitions
 *  @{
 */

/**
 * @brief Three-dimensional coordinate.
 */
typedef struct hsa_dim3_s {
  /**
   * X dimension.
   */
   uint32_t x;

  /**
   * Y dimension.
   */
   uint32_t y;

   /**
    * Z dimension.
    */
   uint32_t z;
} hsa_dim3_t;

/**
 * @brief Access permissions.
 */
typedef enum {
  /**
   * Used to remove existing access
   */
  HSA_ACCESS_PERMISSION_NONE = 0,
  /**
   * Read-only access.
   */
  HSA_ACCESS_PERMISSION_RO = 1,
  /**
   * Write-only access.
   */
  HSA_ACCESS_PERMISSION_WO = 2,
  /**
   * Read and write access.
   */
  HSA_ACCESS_PERMISSION_RW = 3
} hsa_access_permission_t;

/**
 * @brief POSIX file descriptor.
 */
typedef int hsa_file_t;

/** @} **/


/** \defgroup initshutdown Initialization and Shut Down
 *  @{
 */

/**
 * @brief Initialize the HSA runtime.
 *
 * @details Initializes the HSA runtime if it is not already initialized, and
 * increases the reference counter associated with the HSA runtime for the
 * current process. Invocation of any HSA function other than ::hsa_init results
 * in undefined behavior if the current HSA runtime reference counter is less
 * than one.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_REFCOUNT_OVERFLOW The HSA runtime reference
 * count reaches INT32_MAX.
 */
hsa_status_t HSA_API hsa_init();

/**
 * @brief Shut down the HSA runtime.
 *
 * @details Decreases the reference count of the HSA runtime instance. When the
 * reference count reaches 0, the HSA runtime is no longer considered valid
 * but the application might call ::hsa_init to initialize the HSA runtime
 * again.
 *
 * Once the reference count of the HSA runtime reaches 0, all the resources
 * associated with it (queues, signals, agent information, etc.) are
 * considered invalid and any attempt to reference them in subsequent API calls
 * results in undefined behavior. When the reference count reaches 0, the HSA
 * runtime may release resources associated with it.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 */
hsa_status_t HSA_API hsa_shut_down();

/** @} **/

/** \defgroup agentinfo System and Agent Information
 *  @{
 */

/**
 * @brief Endianness. A convention used to interpret the bytes making up a data
 * word.
 */
typedef enum {
    /**
     * The least significant byte is stored in the smallest address.
     */
    HSA_ENDIANNESS_LITTLE = 0,
    /**
     * The most significant byte is stored in the smallest address.
     */
    HSA_ENDIANNESS_BIG = 1
} hsa_endianness_t;

/**
 * @brief Machine model. A machine model determines the size of certain data
 * types in HSA runtime and an agent.
 */
typedef enum {
    /**
     * Small machine model. Addresses use 32 bits.
     */
    HSA_MACHINE_MODEL_SMALL = 0,
    /**
     * Large machine model. Addresses use 64 bits.
     */
    HSA_MACHINE_MODEL_LARGE = 1
} hsa_machine_model_t;

/**
 * @brief Profile. A profile indicates a particular level of feature
 * support. For example, in the base profile the application must use the HSA
 * runtime allocator to reserve shared virtual memory, while in the full profile
 * any host pointer can be shared across all the agents.
 */
typedef enum {
    /**
     * Base profile.
     */
    HSA_PROFILE_BASE = 0,
    /**
     * Full profile.
     */
    HSA_PROFILE_FULL = 1
} hsa_profile_t;

/**
 * @brief System attributes.
 */
typedef enum {
  /**
   * Major version of the HSA runtime specification supported by the
   * implementation. The type of this attribute is uint16_t.
   */
  HSA_SYSTEM_INFO_VERSION_MAJOR = 0,
  /**
   * Minor version of the HSA runtime specification supported by the
   * implementation. The type of this attribute is uint16_t.
   */
  HSA_SYSTEM_INFO_VERSION_MINOR = 1,
  /**
   * Current timestamp. The value of this attribute monotonically increases at a
   * constant rate. The type of this attribute is uint64_t.
   */
  HSA_SYSTEM_INFO_TIMESTAMP = 2,
  /**
   * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is
   * in the range 1-400MHz. The type of this attribute is uint64_t.
   */
  HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3,
  /**
   * Maximum duration of a signal wait operation. Expressed as a count based on
   * the timestamp frequency. The type of this attribute is uint64_t.
   */
  HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4,
  /**
   * Endianness of the system. The type of this attribute is ::hsa_endianness_t.
   */
  HSA_SYSTEM_INFO_ENDIANNESS = 5,
  /**
   * Machine model supported by the HSA runtime. The type of this attribute is
   * ::hsa_machine_model_t.
   */
  HSA_SYSTEM_INFO_MACHINE_MODEL = 6,
  /**
   * Bit-mask indicating which extensions are supported by the
   * implementation. An extension with an ID of @p i is supported if the bit at
   * position @p i is set. The type of this attribute is uint8_t[128].
   */
  HSA_SYSTEM_INFO_EXTENSIONS = 7,
  /**
  * String containing the ROCr build identifier.
  */
  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
  /**
   * Returns true if hsa_amd_svm_* APIs are supported by the driver.  The type of
   * this attribute is bool.
   */
  HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201,
  // TODO: Should this be per Agent?
  /**
   * Returns true if all Agents have access to system allocated memory (such as
   * that allocated by mmap, malloc, or new) by default.
   * If false then system allocated memory may only be made SVM accessible to
   * an Agent by declaration of accessibility with hsa_amd_svm_set_attributes.
   * The type of this attribute is bool.
   */
  HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202,
  /**
   * Returns true if mwaitx is enabled on this system
   * The type of this attribute is bool.
   */
  HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203,
  /**
   * Returns true if DMABUF APIs are supported by the driver.  The type of
   * this attribute is bool.
   */
  HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED = 0x204,
  /**
   * Returns true if Virtual Memory APIs are supported by the driver.  The type of
   * this attribute is bool.
   */
  HSA_AMD_SYSTEM_INFO_VIRTUAL_MEM_API_SUPPORTED = 0x205,
  /**
   * Returns true if XNACK is enabled on this system.  The type of
   * this attribute is bool.
   */
  HSA_AMD_SYSTEM_INFO_XNACK_ENABLED = 0x206,
  /**
   * Major version of the HSA runtime extension specification supported by the
   * implementation. The type of this attribute is uint16_t.
   */
  HSA_AMD_SYSTEM_INFO_EXT_VERSION_MAJOR = 0x207,
  /**
   * Minor version of the HSA runtime extension specification supported by the
   * implementation. The type of this attribute is uint16_t.
   */
  HSA_AMD_SYSTEM_INFO_EXT_VERSION_MINOR = 0x208,
} hsa_system_info_t;

/**
 * @brief Get the current value of a system attribute.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * system attribute, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_system_get_info(
    hsa_system_info_t attribute,
    void* value);

/**
 * @brief HSA extensions.
 */
typedef enum {
  /**
   * Finalizer extension.
   */
  HSA_EXTENSION_FINALIZER = 0,
  /**
   * Images extension.
   */
  HSA_EXTENSION_IMAGES = 1,

  /**
   * Performance counter extension.
   */
  HSA_EXTENSION_PERFORMANCE_COUNTERS = 2,

  /**
   * Profiling events extension.
   */
  HSA_EXTENSION_PROFILING_EVENTS = 3,
  /**
   * Extension count.
   */
  HSA_EXTENSION_STD_LAST = 3,
  /**
   * First AMD extension number.
   */
  HSA_AMD_FIRST_EXTENSION = 0x200,
  /**
   * Profiler extension.
   */
  HSA_EXTENSION_AMD_PROFILER = 0x200,
  /**
   * Loader extension.
   */
  HSA_EXTENSION_AMD_LOADER = 0x201,
  /**
   * AqlProfile extension.
   */
  HSA_EXTENSION_AMD_AQLPROFILE = 0x202,
  /**
   * PC Sampling extension.
   */
  HSA_EXTENSION_AMD_PC_SAMPLING = 0x203,
  /**
   * Last AMD extension.
   */
  HSA_AMD_LAST_EXTENSION = 0x203
} hsa_extension_t;

/**
 * @brief Query the name of a given extension.
 *
 * @param[in] extension Extension identifier. If the extension is not supported
 * by the implementation (see ::HSA_SYSTEM_INFO_EXTENSIONS), the behavior
 * is undefined.
 *
 * @param[out] name Pointer to a memory location where the HSA runtime stores
 * the extension name. The extension name is a NUL-terminated string.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
 * extension, or @p name is NULL.
 */
hsa_status_t HSA_API hsa_extension_get_name(
    uint16_t extension,
    const char **name);

/**
 * @deprecated
 *
 * @brief Query if a given version of an extension is supported by the HSA
 * implementation.
 *
 * @param[in] extension Extension identifier.
 *
 * @param[in] version_major Major version number.
 *
 * @param[in] version_minor Minor version number.
 *
 * @param[out] result Pointer to a memory location where the HSA runtime stores
 * the result of the check. The result is true if the specified version of the
 * extension is supported, and false otherwise.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
 * extension, or @p result is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_system_extension_supported(
    uint16_t extension,
    uint16_t version_major,
    uint16_t version_minor,
    bool* result);

/**
 * @brief Query if a given version of an extension is supported by the HSA
 * implementation. All minor versions from 0 up to the returned @p version_minor
 * must be supported by the implementation.
 *
 * @param[in] extension Extension identifier.
 *
 * @param[in] version_major Major version number.
 *
 * @param[out] version_minor Minor version number.
 *
 * @param[out] result Pointer to a memory location where the HSA runtime stores
 * the result of the check. The result is true if the specified version of the
 * extension is supported, and false otherwise.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
 * extension, or @p version_minor is NULL, or @p result is NULL.
 */
hsa_status_t HSA_API hsa_system_major_extension_supported(
    uint16_t extension,
    uint16_t version_major,
    uint16_t *version_minor,
    bool* result);


/**
 * @deprecated
 *
 * @brief Retrieve the function pointers corresponding to a given version of an
 * extension. Portable applications are expected to invoke the extension API
 * using the returned function pointers
 *
 * @details The application is responsible for verifying that the given version
 * of the extension is supported by the HSA implementation (see
 * ::hsa_system_extension_supported). If the given combination of extension,
 * major version, and minor version is not supported by the implementation, the
 * behavior is undefined.
 *
 * @param[in] extension Extension identifier.
 *
 * @param[in] version_major Major version number for which to retrieve the
 * function pointer table.
 *
 * @param[in] version_minor Minor version number for which to retrieve the
 * function pointer table.
 *
 * @param[out] table Pointer to an application-allocated function pointer table
 * that is populated by the HSA runtime. Must not be NULL. The memory associated
 * with table can be reused or freed after the function returns.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
 * extension, or @p table is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_system_get_extension_table(
    uint16_t extension,
    uint16_t version_major,
    uint16_t version_minor,
    void *table);

/**
 * @brief Retrieve the function pointers corresponding to a given major version
 * of an extension. Portable applications are expected to invoke the extension
 * API using the returned function pointers.
 *
 * @details The application is responsible for verifying that the given major
 * version of the extension is supported by the HSA implementation (see
 * ::hsa_system_major_extension_supported). If the given combination of extension
 * and major version is not supported by the implementation, the behavior is
 * undefined. Additionally if the length doesn't allow space for a full minor
 * version, it is implementation defined if only some of the function pointers for
 * that minor version get written.
 *
 * @param[in] extension Extension identifier.
 *
 * @param[in] version_major Major version number for which to retrieve the
 * function pointer table.
 *
 * @param[in] table_length Size in bytes of the function pointer table to be
 * populated. The implementation will not write more than this many bytes to the
 * table.
 *
 * @param[out] table Pointer to an application-allocated function pointer table
 * that is populated by the HSA runtime. Must not be NULL. The memory associated
 * with table can be reused or freed after the function returns.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
 * extension, or @p table is NULL.
 */
hsa_status_t HSA_API hsa_system_get_major_extension_table(
    uint16_t extension,
    uint16_t version_major,
    size_t table_length,
    void *table);

/**
 * @brief Struct containing an opaque handle to an agent, a device that participates in
 * the HSA memory model. An agent can submit AQL packets for execution, and
 * may also accept AQL packets for execution (agent dispatch packets or kernel
 * dispatch packets launching HSAIL-derived binaries).
 */
typedef struct hsa_agent_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_agent_t;

/**
 * @brief Agent features.
 */
typedef enum {
    /**
     * The agent supports AQL packets of kernel dispatch type. If this
     * feature is enabled, the agent is also a kernel agent.
     */
    HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1,
    /**
     * The agent supports AQL packets of agent dispatch type.
     */
    HSA_AGENT_FEATURE_AGENT_DISPATCH = 2
} hsa_agent_feature_t;

/**
 * @brief Hardware device type.
 */
typedef enum {
  /**
   * CPU device.
   */
  HSA_DEVICE_TYPE_CPU = 0,
  /**
   * GPU device.
   */
  HSA_DEVICE_TYPE_GPU = 1,
  /**
   * DSP device.
   */
  HSA_DEVICE_TYPE_DSP = 2,
  /**
   * AI Engine (AIE) device.
   */
  HSA_DEVICE_TYPE_AIE = 3
} hsa_device_type_t;

/**
 * @brief Default floating-point rounding mode.
 */
typedef enum {
  /**
   * Use a default floating-point rounding mode specified elsewhere.
   */
  HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0,
  /**
   * Operations that specify the default floating-point mode are rounded to zero
   * by default.
   */
  HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1,
  /**
   * Operations that specify the default floating-point mode are rounded to the
   * nearest representable number and that ties should be broken by selecting
   * the value with an even least significant bit.
   */
  HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2
} hsa_default_float_rounding_mode_t;

/**
 * @brief Agent attributes.
 */
typedef enum {
  /**
   * Agent name. The type of this attribute is a NUL-terminated char[64]. The
   * name must be at most 63 characters long (not including the NUL terminator)
   * and all array elements not used for the name must be NUL.
   */
  HSA_AGENT_INFO_NAME = 0,
  /**
   * Name of vendor. The type of this attribute is a NUL-terminated char[64].
   * The name must be at most 63 characters long (not including the NUL
   * terminator) and all array elements not used for the name must be NUL.
   */
  HSA_AGENT_INFO_VENDOR_NAME = 1,
  /**
   * Agent capability. The type of this attribute is ::hsa_agent_feature_t.
   */
  HSA_AGENT_INFO_FEATURE = 2,
  /**
   * @deprecated Query ::HSA_ISA_INFO_MACHINE_MODELS for a given intruction set
   * architecture supported by the agent instead.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Machine model supported by the agent. The type of this attribute is
   * ::hsa_machine_model_t.
   */
  HSA_AGENT_INFO_MACHINE_MODEL = 3,
  /**
   * @deprecated Query ::HSA_ISA_INFO_PROFILES for a given intruction set
   * architecture supported by the agent instead.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Profile supported by the agent. The type of this attribute is
   * ::hsa_profile_t.
   */
  HSA_AGENT_INFO_PROFILE = 4,
  /**
   * @deprecated Query ::HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES for a given
   * intruction set architecture supported by the agent instead.  If more than
   * one ISA is supported by the agent, the returned value corresponds to the
   * first ISA enumerated by ::hsa_agent_iterate_isas.
   *
   * Default floating-point rounding mode. The type of this attribute is
   * ::hsa_default_float_rounding_mode_t, but the value
   * ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed.
   */
  HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5,
  /**
   * @deprecated Query ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES
   * for a given intruction set architecture supported by the agent instead.  If
   * more than one ISA is supported by the agent, the returned value corresponds
   * to the first ISA enumerated by ::hsa_agent_iterate_isas.
   *
   * A bit-mask of ::hsa_default_float_rounding_mode_t values, representing the
   * default floating-point rounding modes supported by the agent in the Base
   * profile. The type of this attribute is uint32_t. The default floating-point
   * rounding mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not
   * be set.
   */
  HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23,
  /**
   * @deprecated Query ::HSA_ISA_INFO_FAST_F16_OPERATION for a given intruction
   * set architecture supported by the agent instead.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Flag indicating that the f16 HSAIL operation is at least as fast as the
   * f32 operation in the current agent. The value of this attribute is
   * undefined if the agent is not a kernel agent. The type of this
   * attribute is bool.
   */
  HSA_AGENT_INFO_FAST_F16_OPERATION = 24,
  /**
   * @deprecated Query ::HSA_WAVEFRONT_INFO_SIZE for a given wavefront and
   * intruction set architecture supported by the agent instead.  If more than
   * one ISA is supported by the agent, the returned value corresponds to the
   * first ISA enumerated by ::hsa_agent_iterate_isas and the first wavefront
   * enumerated by ::hsa_isa_iterate_wavefronts for that ISA.
   *
   * Number of work-items in a wavefront. Must be a power of 2 in the range
   * [1,256]. The value of this attribute is undefined if the agent is not
   * a kernel agent. The type of this attribute is uint32_t.
   */
  HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
  /**
   * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_DIM for a given intruction
   * set architecture supported by the agent instead.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Maximum number of work-items of each dimension of a work-group.  Each
   * maximum must be greater than 0. No maximum can exceed the value of
   * ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is
   * undefined if the agent is not a kernel agent. The type of this
   * attribute is uint16_t[3].
   */
  HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
  /**
   * @deprecated Query ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE for a given intruction
   * set architecture supported by the agent instead.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Maximum total number of work-items in a work-group. The value of this
   * attribute is undefined if the agent is not a kernel agent. The type
   * of this attribute is uint32_t.
   */
  HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8,
  /**
   * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_DIM for a given intruction set
   * architecture supported by the agent instead.
   *
   * Maximum number of work-items of each dimension of a grid. Each maximum must
   * be greater than 0, and must not be smaller than the corresponding value in
   * ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
   * ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined
   * if the agent is not a kernel agent. The type of this attribute is
   * ::hsa_dim3_t.
   */
  HSA_AGENT_INFO_GRID_MAX_DIM = 9,
  /**
   * @deprecated Query ::HSA_ISA_INFO_GRID_MAX_SIZE for a given intruction set
   * architecture supported by the agent instead.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Maximum total number of work-items in a grid. The value of this attribute
   * is undefined if the agent is not a kernel agent. The type of this
   * attribute is uint32_t.
   */
  HSA_AGENT_INFO_GRID_MAX_SIZE = 10,
  /**
   * @deprecated Query ::HSA_ISA_INFO_FBARRIER_MAX_SIZE for a given intruction
   * set architecture supported by the agent instead.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Maximum number of fbarriers per work-group. Must be at least 32. The value
   * of this attribute is undefined if the agent is not a kernel agent. The
   * type of this attribute is uint32_t.
   */
  HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11,
  /**
   * @deprecated The maximum number of queues is not statically determined.
   *
   * Maximum number of queues that can be active (created but not destroyed) at
   * one time in the agent. The type of this attribute is uint32_t.
   */
  HSA_AGENT_INFO_QUEUES_MAX = 12,
  /**
   * Minimum number of packets that a queue created in the agent
   * can hold. Must be a power of 2 greater than 0. Must not exceed
   * the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this
   * attribute is uint32_t.
   */
  HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13,
  /**
   * Maximum number of packets that a queue created in the agent can
   * hold. Must be a power of 2 greater than 0. The type of this attribute
   * is uint32_t.
   */
  HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14,
  /**
   * Type of a queue created in the agent. The type of this attribute is
   * ::hsa_queue_type32_t.
   */
  HSA_AGENT_INFO_QUEUE_TYPE = 15,
  /**
   * @deprecated NUMA information is not exposed anywhere else in the API.
   *
   * Identifier of the NUMA node associated with the agent. The type of this
   * attribute is uint32_t.
   */
  HSA_AGENT_INFO_NODE = 16,
  /**
   * Type of hardware device associated with the agent. The type of this
   * attribute is ::hsa_device_type_t.
   */
  HSA_AGENT_INFO_DEVICE = 17,
  /**
   * @deprecated Query ::hsa_agent_iterate_caches to retrieve information about
   * the caches present in a given agent.
   *
   * Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size
   * of 0 for a particular level indicates that there is no cache information
   * for that level. The type of this attribute is uint32_t[4].
   */
  HSA_AGENT_INFO_CACHE_SIZE = 18,
  /**
   * @deprecated An agent may support multiple instruction set
   * architectures. See ::hsa_agent_iterate_isas.  If more than one ISA is
   * supported by the agent, the returned value corresponds to the first ISA
   * enumerated by ::hsa_agent_iterate_isas.
   *
   * Instruction set architecture of the agent. The type of this attribute
   * is ::hsa_isa_t.
   */
  HSA_AGENT_INFO_ISA = 19,
  /**
   * Bit-mask indicating which extensions are supported by the agent. An
   * extension with an ID of @p i is supported if the bit at position @p i is
   * set. The type of this attribute is uint8_t[128].
   */
  HSA_AGENT_INFO_EXTENSIONS = 20,
  /**
   * Major version of the HSA runtime specification supported by the
   * agent. The type of this attribute is uint16_t.
   */
  HSA_AGENT_INFO_VERSION_MAJOR = 21,
  /**
   * Minor version of the HSA runtime specification supported by the
   * agent. The type of this attribute is uint16_t.
   */
  HSA_AGENT_INFO_VERSION_MINOR = 22,
  /**
   * This enum does not have a fixed underlying type, thus in C++ post D2338:
   * If the enumeration type does not have a fixed underlying type, the value is
   * unchanged if the original value is within the range of the enumeration
   * values (9.7.1 [dcl.enum]), and otherwise, the behavior is
   * undefined.
   * Thus increase the range of this enum to encompass vendor extensions.
   */
  HSA_AGENT_INFO_LAST = INT32_MAX
} hsa_agent_info_t;

/**
 * @brief Get the current value of an attribute for a given agent.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * agent attribute, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_agent_get_info(
    hsa_agent_t agent,
    hsa_agent_info_t attribute,
    void* value);

/**
 * @brief Iterate over the available agents, and invoke an
 * application-defined callback on every iteration.
 *
 * @param[in] callback Callback to be invoked once per agent. The HSA
 * runtime passes two arguments to the callback: the agent and the
 * application data.  If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * ::hsa_iterate_agents returns that status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t HSA_API hsa_iterate_agents(
    hsa_status_t (*callback)(hsa_agent_t agent, void* data),
    void* data);

/*

// If we do not know the size of an attribute, we need to query it first
// Note: this API will not be in the spec unless needed
hsa_status_t HSA_API hsa_agent_get_info_size(
    hsa_agent_t agent,
    hsa_agent_info_t attribute,
    size_t* size);

// Set the value of an agents attribute
// Note: this API will not be in the spec unless needed
hsa_status_t HSA_API hsa_agent_set_info(
    hsa_agent_t agent,
    hsa_agent_info_t attribute,
    void* value);

*/

/**
 * @brief Exception policies applied in the presence of hardware exceptions.
 */
typedef enum {
    /**
     * If a hardware exception is detected, a work-item signals an exception.
     */
    HSA_EXCEPTION_POLICY_BREAK = 1,
    /**
     * If a hardware exception is detected, a hardware status bit is set.
     */
    HSA_EXCEPTION_POLICY_DETECT = 2
} hsa_exception_policy_t;

/**
 * @deprecated Use ::hsa_isa_get_exception_policies for a given intruction set
 * architecture supported by the agent instead. If more than one ISA is
 * supported by the agent, this function uses the first value returned by
 * ::hsa_agent_iterate_isas.
 *
 * @brief Retrieve the exception policy support for a given combination of
 * agent and profile
 *
 * @param[in] agent Agent.
 *
 * @param[in] profile Profile.
 *
 * @param[out] mask Pointer to a memory location where the HSA runtime stores a
 * mask of ::hsa_exception_policy_t values. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
 * profile, or @p mask is NULL.
 *
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_get_exception_policies(
    hsa_agent_t agent,
    hsa_profile_t profile,
    uint16_t *mask);

/**
 * @brief Cache handle.
 */
typedef struct hsa_cache_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_cache_t;

/**
 * @brief Cache attributes.
 */
typedef enum {
  /**
   * The length of the cache name in bytes, not including the NUL terminator.
   * The type of this attribute is uint32_t.
   */
  HSA_CACHE_INFO_NAME_LENGTH = 0,
  /**
   * Human-readable description.  The type of this attribute is a NUL-terminated
   * character array with the length equal to the value of
   * ::HSA_CACHE_INFO_NAME_LENGTH attribute.
   */
  HSA_CACHE_INFO_NAME = 1,
  /**
   * Cache level. A L1 cache must return a value of 1, a L2 must return a value
   * of 2, and so on.  The type of this attribute is uint8_t.
   */
  HSA_CACHE_INFO_LEVEL = 2,
  /**
   * Cache size, in bytes. A value of 0 indicates that there is no size
   * information available. The type of this attribute is uint32_t.
   */
  HSA_CACHE_INFO_SIZE = 3
} hsa_cache_info_t;

/**
 * @brief Get the current value of an attribute for a given cache object.
 *
 * @param[in] cache Cache.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CACHE The cache is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * instruction set architecture attribute, or @p value is
 * NULL.
 */
hsa_status_t HSA_API hsa_cache_get_info(
    hsa_cache_t cache,
    hsa_cache_info_t attribute,
    void* value);

/**
 * @brief Iterate over the memory caches of a given agent, and
 * invoke an application-defined callback on every iteration.
 *
 * @details Caches are visited in ascending order according to the value of the
 * ::HSA_CACHE_INFO_LEVEL attribute.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] callback Callback to be invoked once per cache that is present in
 * the agent.  The HSA runtime passes two arguments to the callback: the cache
 * and the application data.  If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * that value is returned.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_agent_iterate_caches(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_cache_t cache, void* data),
    void* data);

/**
 * @deprecated
 *
 * @brief Query if a given version of an extension is supported by an agent
 *
 * @param[in] extension Extension identifier.
 *
 * @param[in] agent Agent.
 *
 * @param[in] version_major Major version number.
 *
 * @param[in] version_minor Minor version number.
 *
 * @param[out] result Pointer to a memory location where the HSA runtime stores
 * the result of the check. The result is true if the specified version of the
 * extension is supported, and false otherwise. The result must be false if
 * ::hsa_system_extension_supported returns false for the same extension
 * version.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
 * extension, or @p result is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_agent_extension_supported(
    uint16_t extension,
    hsa_agent_t agent,
    uint16_t version_major,
    uint16_t version_minor,
    bool* result);

/**
 * @brief Query if a given version of an extension is supported by an agent. All
 * minor versions from 0 up to the returned @p version_minor must be supported.
 *
 * @param[in] extension Extension identifier.
 *
 * @param[in] agent Agent.
 *
 * @param[in] version_major Major version number.
 *
 * @param[out] version_minor Minor version number.
 *
 * @param[out] result Pointer to a memory location where the HSA runtime stores
 * the result of the check. The result is true if the specified version of the
 * extension is supported, and false otherwise. The result must be false if
 * ::hsa_system_extension_supported returns false for the same extension
 * version.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
 * extension, or @p version_minor is NULL, or @p result is NULL.
 */
hsa_status_t HSA_API hsa_agent_major_extension_supported(
    uint16_t extension,
    hsa_agent_t agent,
    uint16_t version_major,
    uint16_t *version_minor,
    bool* result);


/** @} */


/** \defgroup signals Signals
 *  @{
 */

/**
 * @brief Signal handle.
 */
typedef struct hsa_signal_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal. The value 0 is reserved.
   */
  uint64_t handle;
} hsa_signal_t;

/**
 * @brief Signal value. The value occupies 32 bits in small machine mode, and 64
 * bits in large machine mode.
 */
#ifdef HSA_LARGE_MODEL
  typedef int64_t hsa_signal_value_t;
#else
  typedef int32_t hsa_signal_value_t;
#endif

/**
 * @brief Create a signal.
 *
 * @param[in] initial_value Initial value of the signal.
 *
 * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
 * any agent might wait on the signal.
 *
 * @param[in] consumers List of agents that might consume (wait on) the
 * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
 * HSA runtime might use the list to optimize the handling of the signal
 * object. If an agent not listed in @p consumers waits on the returned
 * signal, the behavior is undefined. The memory associated with @p consumers
 * can be reused or freed after the function returns.
 *
 * @param[out] signal Pointer to a memory location where the HSA runtime will
 * store the newly created signal handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
 * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
 * contains duplicates.
 */
hsa_status_t HSA_API hsa_signal_create(
    hsa_signal_value_t initial_value,
    uint32_t num_consumers,
    const hsa_agent_t *consumers,
    hsa_signal_t *signal);

/**
 * @brief Destroy a signal previous created by ::hsa_signal_create.
 *
 * @param[in] signal Signal.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p signal is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The handle in @p signal is 0.
 */
hsa_status_t HSA_API hsa_signal_destroy(
    hsa_signal_t signal);

/**
 * @brief Atomically read the current value of a signal.
 *
 * @param[in] signal Signal.
 *
 * @return Value of the signal.
*/
hsa_signal_value_t HSA_API hsa_signal_load_scacquire(
    hsa_signal_t signal);

/**
 * @copydoc hsa_signal_load_scacquire
 */
hsa_signal_value_t HSA_API hsa_signal_load_relaxed(
    hsa_signal_t signal);

/**
 * @deprecated Renamed as ::hsa_signal_load_scacquire.
 *
 * @copydoc hsa_signal_load_scacquire
*/
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_load_acquire(
    hsa_signal_t signal);

/**
 * @brief Atomically set the value of a signal.
 *
 * @details If the value of the signal is changed, all the agents waiting
 * on @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal.
 *
 * @param[in] value New signal value.
 */
void HSA_API hsa_signal_store_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_store_relaxed
 */
void HSA_API hsa_signal_store_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_store_screlease.
 *
 * @copydoc hsa_signal_store_screlease
 */
void HSA_API HSA_DEPRECATED hsa_signal_store_release(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Atomically set the value of a signal without necessarily notifying the
 * the agents waiting on it.
 *
 * @details The agents waiting on @p signal may not wake up even when the new
 * value satisfies their wait condition. If the application wants to update the
 * signal and there is no need to notify any agent, invoking this function can
 * be more efficient than calling the non-silent counterpart.
 *
 * @param[in] signal Signal.
 *
 * @param[in] value New signal value.
 */
void HSA_API hsa_signal_silent_store_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_silent_store_relaxed
 */
void HSA_API hsa_signal_silent_store_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Atomically set the value of a signal and return its previous value.
 *
 * @details If the value of the signal is changed, all the agents waiting
 * on @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
 * behavior is undefined.
 *
 * @param[in] value New value.
 *
 * @return Value of the signal prior to the exchange.
 *
 */
hsa_signal_value_t HSA_API hsa_signal_exchange_scacq_screl(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_exchange_scacq_screl.
 *
 * @copydoc hsa_signal_exchange_scacq_screl
 */
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acq_rel(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_exchange_scacq_screl
 */
hsa_signal_value_t HSA_API hsa_signal_exchange_scacquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_exchange_scacquire.
 *
 * @copydoc hsa_signal_exchange_scacquire
 */
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_acquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_exchange_scacq_screl
 */
hsa_signal_value_t HSA_API hsa_signal_exchange_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);
/**
 * @copydoc hsa_signal_exchange_scacq_screl
 */
hsa_signal_value_t HSA_API hsa_signal_exchange_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_exchange_screlease.
 *
 * @copydoc hsa_signal_exchange_screlease
 */
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_exchange_release(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Atomically set the value of a signal if the observed value is equal to
 * the expected value. The observed value is returned regardless of whether the
 * replacement was done.
 *
 * @details If the value of the signal is changed, all the agents waiting
 * on @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal. If @p signal is a queue
 * doorbell signal, the behavior is undefined.
 *
 * @param[in] expected Value to compare with.
 *
 * @param[in] value New value.
 *
 * @return Observed value of the signal.
 *
 */
hsa_signal_value_t HSA_API hsa_signal_cas_scacq_screl(
    hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);


/**
 * @deprecated Renamed as ::hsa_signal_cas_scacq_screl.
 *
 * @copydoc hsa_signal_cas_scacq_screl
 */
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acq_rel(
    hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_cas_scacq_screl
 */
hsa_signal_value_t HSA_API hsa_signal_cas_scacquire(
    hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_cas_scacquire.
 *
 * @copydoc hsa_signal_cas_scacquire
 */
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_acquire(
    hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_cas_scacq_screl
 */
hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_cas_scacq_screl
 */
hsa_signal_value_t HSA_API hsa_signal_cas_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_cas_screlease.
 *
 * @copydoc hsa_signal_cas_screlease
 */
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_cas_release(
    hsa_signal_t signal,
    hsa_signal_value_t expected,
    hsa_signal_value_t value);

/**
 * @brief Atomically increment the value of a signal by a given amount.
 *
 * @details If the value of the signal is changed, all the agents waiting on
 * @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
 * behavior is undefined.
 *
 * @param[in] value Value to add to the value of the signal.
 *
 */
void HSA_API hsa_signal_add_scacq_screl(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_add_scacq_screl.
 *
 * @copydoc hsa_signal_add_scacq_screl
 */
void HSA_API HSA_DEPRECATED hsa_signal_add_acq_rel(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_add_scacq_screl
 */
void HSA_API hsa_signal_add_scacquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_add_scacquire.
 *
 * @copydoc hsa_signal_add_scacquire
 */
void HSA_API HSA_DEPRECATED hsa_signal_add_acquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_add_scacq_screl
 */
void HSA_API hsa_signal_add_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_add_scacq_screl
 */
void HSA_API hsa_signal_add_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);


/**
 * @deprecated Renamed as ::hsa_signal_add_screlease.
 *
 * @copydoc hsa_signal_add_screlease
 */
void HSA_API HSA_DEPRECATED hsa_signal_add_release(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Atomically decrement the value of a signal by a given amount.
 *
 * @details If the value of the signal is changed, all the agents waiting on
 * @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
 * behavior is undefined.
 *
 * @param[in] value Value to subtract from the value of the signal.
 *
 */
void HSA_API hsa_signal_subtract_scacq_screl(
    hsa_signal_t signal,
    hsa_signal_value_t value);


/**
 * @deprecated Renamed as ::hsa_signal_subtract_scacq_screl.
 *
 * @copydoc hsa_signal_subtract_scacq_screl
 */
void HSA_API HSA_DEPRECATED hsa_signal_subtract_acq_rel(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_subtract_scacq_screl
 */
void HSA_API hsa_signal_subtract_scacquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_subtract_scacquire.
 *
 * @copydoc hsa_signal_subtract_scacquire
 */
void HSA_API HSA_DEPRECATED hsa_signal_subtract_acquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_subtract_scacq_screl
 */
void HSA_API hsa_signal_subtract_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_subtract_scacq_screl
 */
void HSA_API hsa_signal_subtract_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);


/**
 * @deprecated Renamed as ::hsa_signal_subtract_screlease.
 *
 * @copydoc hsa_signal_subtract_screlease
 */
void HSA_API HSA_DEPRECATED hsa_signal_subtract_release(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Atomically perform a bitwise AND operation between the value of a
 * signal and a given value.
 *
 * @details If the value of the signal is changed, all the agents waiting on
 * @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
 * behavior is undefined.
 *
 * @param[in] value Value to AND with the value of the signal.
 *
 */
void HSA_API hsa_signal_and_scacq_screl(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_and_scacq_screl.
 *
 * @copydoc hsa_signal_and_scacq_screl
 */
void HSA_API HSA_DEPRECATED hsa_signal_and_acq_rel(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_and_scacq_screl
 */
void HSA_API hsa_signal_and_scacquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_and_scacquire.
 *
 * @copydoc hsa_signal_and_scacquire
 */
void HSA_API HSA_DEPRECATED hsa_signal_and_acquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_and_scacq_screl
 */
void HSA_API hsa_signal_and_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_and_scacq_screl
 */
void HSA_API hsa_signal_and_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);


/**
 * @deprecated Renamed as ::hsa_signal_and_screlease.
 *
 * @copydoc hsa_signal_and_screlease
 */
void HSA_API HSA_DEPRECATED hsa_signal_and_release(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Atomically perform a bitwise OR operation between the value of a
 * signal and a given value.
 *
 * @details If the value of the signal is changed, all the agents waiting on
 * @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
 * behavior is undefined.
 *
 * @param[in] value Value to OR with the value of the signal.
 */
void HSA_API hsa_signal_or_scacq_screl(
    hsa_signal_t signal,
    hsa_signal_value_t value);


/**
 * @deprecated Renamed as ::hsa_signal_or_scacq_screl.
 *
 * @copydoc hsa_signal_or_scacq_screl
 */
void HSA_API HSA_DEPRECATED hsa_signal_or_acq_rel(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_or_scacq_screl
 */
void HSA_API hsa_signal_or_scacquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_or_scacquire.
 *
 * @copydoc hsa_signal_or_scacquire
 */
void HSA_API HSA_DEPRECATED hsa_signal_or_acquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_or_scacq_screl
 */
void HSA_API hsa_signal_or_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_or_scacq_screl
 */
void HSA_API hsa_signal_or_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_or_screlease.
 *
 * @copydoc hsa_signal_or_screlease
 */
void HSA_API HSA_DEPRECATED hsa_signal_or_release(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Atomically perform a bitwise XOR operation between the value of a
 * signal and a given value.
 *
 * @details If the value of the signal is changed, all the agents waiting on
 * @p signal for which @p value satisfies their wait condition are awakened.
 *
 * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
 * behavior is undefined.
 *
 * @param[in] value Value to XOR with the value of the signal.
 *
 */
void HSA_API hsa_signal_xor_scacq_screl(
    hsa_signal_t signal,
    hsa_signal_value_t value);


/**
 * @deprecated Renamed as ::hsa_signal_xor_scacq_screl.
 *
 * @copydoc hsa_signal_xor_scacq_screl
 */
void HSA_API HSA_DEPRECATED hsa_signal_xor_acq_rel(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_xor_scacq_screl
 */
void HSA_API hsa_signal_xor_scacquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_xor_scacquire.
 *
 * @copydoc hsa_signal_xor_scacquire
 */
void HSA_API HSA_DEPRECATED hsa_signal_xor_acquire(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_xor_scacq_screl
 */
void HSA_API hsa_signal_xor_relaxed(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @copydoc hsa_signal_xor_scacq_screl
 */
void HSA_API hsa_signal_xor_screlease(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @deprecated Renamed as ::hsa_signal_xor_screlease.
 *
 * @copydoc hsa_signal_xor_screlease
 */
void HSA_API HSA_DEPRECATED hsa_signal_xor_release(
    hsa_signal_t signal,
    hsa_signal_value_t value);

/**
 * @brief Wait condition operator.
 */
typedef enum {
    /**
     * The two operands are equal.
     */
    HSA_SIGNAL_CONDITION_EQ = 0,
    /**
     * The two operands are not equal.
     */
    HSA_SIGNAL_CONDITION_NE = 1,
    /**
     * The first operand is less than the second operand.
     */
    HSA_SIGNAL_CONDITION_LT = 2,
    /**
     * The first operand is greater than or equal to the second operand.
     */
    HSA_SIGNAL_CONDITION_GTE = 3
} hsa_signal_condition_t;

/**
 * @brief State of the application thread during a signal wait.
 */
typedef enum {
    /**
     * The application thread may be rescheduled while waiting on the signal.
     */
    HSA_WAIT_STATE_BLOCKED = 0,
    /**
     * The application thread stays active while waiting on a signal.
     */
    HSA_WAIT_STATE_ACTIVE = 1
} hsa_wait_state_t;


/**
 * @brief Wait until a signal value satisfies a specified condition, or a
 * certain amount of time has elapsed.
 *
 * @details A wait operation can spuriously resume at any time sooner than the
 * timeout (for example, due to system or other external factors) even when the
 * condition has not been met.
 *
 * The function is guaranteed to return if the signal value satisfies the
 * condition at some point in time during the wait, but the value returned to
 * the application might not satisfy the condition. The application must ensure
 * that signals are used in such way that wait wakeup conditions are not
 * invalidated before dependent threads have woken up.
 *
 * When the wait operation internally loads the value of the passed signal, it
 * uses the memory order indicated in the function name.
 *
 * @param[in] signal Signal.
 *
 * @param[in] condition Condition used to compare the signal value with @p
 * compare_value.
 *
 * @param[in] compare_value Value to compare with.
 *
 * @param[in] timeout_hint Maximum duration of the wait.  Specified in the same
 * unit as the system timestamp. The operation might block for a shorter or
 * longer time even if the condition is not met. A value of UINT64_MAX indicates
 * no maximum.
 *
 * @param[in] wait_state_hint Hint used by the application to indicate the
 * preferred waiting state. The actual waiting state is ultimately decided by
 * HSA runtime and may not match the provided hint. A value of
 * ::HSA_WAIT_STATE_ACTIVE may improve the latency of response to a signal
 * update by avoiding rescheduling overhead.
 *
 * @return Observed value of the signal, which might not satisfy the specified
 * condition.
 *
*/
hsa_signal_value_t HSA_API hsa_signal_wait_scacquire(
    hsa_signal_t signal,
    hsa_signal_condition_t condition,
    hsa_signal_value_t compare_value,
    uint64_t timeout_hint,
    hsa_wait_state_t wait_state_hint);

/**
 * @copydoc hsa_signal_wait_scacquire
 */
hsa_signal_value_t HSA_API hsa_signal_wait_relaxed(
    hsa_signal_t signal,
    hsa_signal_condition_t condition,
    hsa_signal_value_t compare_value,
    uint64_t timeout_hint,
    hsa_wait_state_t wait_state_hint);

/**
 * @deprecated Renamed as ::hsa_signal_wait_scacquire.
 *
 * @copydoc hsa_signal_wait_scacquire
 */
hsa_signal_value_t HSA_API HSA_DEPRECATED hsa_signal_wait_acquire(
    hsa_signal_t signal,
    hsa_signal_condition_t condition,
    hsa_signal_value_t compare_value,
    uint64_t timeout_hint,
    hsa_wait_state_t wait_state_hint);

/**
 * @brief Group of signals.
 */
typedef struct hsa_signal_group_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_signal_group_t;

/**
 * @brief Create a signal group.
 *
 * @param[in] num_signals Number of elements in @p signals. Must not be 0.
 *
 * @param[in] signals List of signals in the group. The list must not contain
 * any repeated elements. Must not be NULL.
 *
 * @param[in] num_consumers Number of elements in @p consumers. Must not be 0.
 *
 * @param[in] consumers List of agents that might consume (wait on) the signal
 * group. The list must not contain repeated elements, and must be a subset of
 * the set of agents that are allowed to wait on all the signals in the
 * group. If an agent not listed in @p consumers waits on the returned group,
 * the behavior is undefined. The memory associated with @p consumers can be
 * reused or freed after the function returns. Must not be NULL.
 *
 * @param[out] signal_group Pointer to newly created signal group. Must not be
 * NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_signals is 0, @p signals
 * is NULL, @p num_consumers is 0, @p consumers is NULL, or @p signal_group is
 * NULL.
 */
hsa_status_t HSA_API hsa_signal_group_create(
    uint32_t num_signals,
    const hsa_signal_t *signals,
    uint32_t num_consumers,
    const hsa_agent_t *consumers,
    hsa_signal_group_t *signal_group);

/**
 * @brief Destroy a signal group previous created by ::hsa_signal_group_create.
 *
 * @param[in] signal_group Signal group.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid.
 */
hsa_status_t HSA_API hsa_signal_group_destroy(
    hsa_signal_group_t signal_group);

/**
 * @brief Wait until the value of at least one of the signals in a signal group
 * satisfies its associated condition.
 *
 * @details The function is guaranteed to return if the value of at least one of
 * the signals in the group satisfies its associated condition at some point in
 * time during the wait, but the signal value returned to the application may no
 * longer satisfy the condition. The application must ensure that signals in the
 * group are used in such way that wait wakeup conditions are not invalidated
 * before dependent threads have woken up.
 *
 * When this operation internally loads the value of the passed signal, it uses
 * the memory order indicated in the function name.
 *
 * @param[in] signal_group Signal group.
 *
 * @param[in] conditions List of conditions. Each condition, and the value at
 * the same index in @p compare_values, is used to compare the value of the
 * signal at that index in @p signal_group (the signal passed by the application
 * to ::hsa_signal_group_create at that particular index). The size of @p
 * conditions must not be smaller than the number of signals in @p signal_group;
 * any extra elements are ignored. Must not be NULL.
 *
 * @param[in] compare_values List of comparison values.  The size of @p
 * compare_values must not be smaller than the number of signals in @p
 * signal_group; any extra elements are ignored. Must not be NULL.
 *
 * @param[in] wait_state_hint Hint used by the application to indicate the
 * preferred waiting state. The actual waiting state is decided by the HSA runtime
 * and may not match the provided hint. A value of ::HSA_WAIT_STATE_ACTIVE may
 * improve the latency of response to a signal update by avoiding rescheduling
 * overhead.
 *
 * @param[out] signal Signal in the group that satisfied the associated
 * condition. If several signals satisfied their condition, the function can
 * return any of those signals. Must not be NULL.
 *
 * @param[out] value Observed value for @p signal, which might no longer satisfy
 * the specified condition. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL_GROUP @p signal_group is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p conditions is NULL, @p
 * compare_values is NULL, @p signal is NULL, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_signal_group_wait_any_scacquire(
    hsa_signal_group_t signal_group,
    const hsa_signal_condition_t *conditions,
    const hsa_signal_value_t *compare_values,
    hsa_wait_state_t wait_state_hint,
    hsa_signal_t *signal,
    hsa_signal_value_t *value);

/**
 * @copydoc hsa_signal_group_wait_any_scacquire
 */
hsa_status_t HSA_API hsa_signal_group_wait_any_relaxed(
    hsa_signal_group_t signal_group,
    const hsa_signal_condition_t *conditions,
    const hsa_signal_value_t *compare_values,
    hsa_wait_state_t wait_state_hint,
    hsa_signal_t *signal,
    hsa_signal_value_t *value);

/** @} */

/** \defgroup memory Memory
 *  @{
 */

/**
 * @brief A memory region represents a block of virtual memory with certain
 * properties. For example, the HSA runtime represents fine-grained memory in
 * the global segment using a region. A region might be associated with more
 * than one agent.
 */
typedef struct hsa_region_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_region_t;

/** @} */


/** \defgroup queue Queues
 *  @{
 */

/**
 * @brief Queue type. Intended to be used for dynamic queue protocol
 * determination.
 */
typedef enum {
  /**
   * Queue supports multiple producers. Use of multiproducer queue mechanics is
   * required.
   */
  HSA_QUEUE_TYPE_MULTI = 0,
  /**
   * Queue only supports a single producer. In some scenarios, the application
   * may want to limit the submission of AQL packets to a single agent. Queues
   * that support a single producer may be more efficient than queues supporting
   * multiple producers. Use of multiproducer queue mechanics is not supported.
   */
  HSA_QUEUE_TYPE_SINGLE = 1,
  /**
   * Queue supports multiple producers and cooperative dispatches. Cooperative
   * dispatches are able to use GWS synchronization. Queues of this type may be
   * limited in number. The runtime may return the same queue to serve multiple
   * ::hsa_queue_create calls when this type is given. Callers must inspect the
   * returned queue to discover queue size. Queues of this type are reference
   * counted and require a matching number of ::hsa_queue_destroy calls to
   * release. Use of multiproducer queue mechanics is required. See
   * ::HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES to query agent support for this
   * type.
   */
  HSA_QUEUE_TYPE_COOPERATIVE = 2
} hsa_queue_type_t;

/**
 * @brief A fixed-size type used to represent ::hsa_queue_type_t constants.
 */
typedef uint32_t hsa_queue_type32_t;

/**
 * @brief Queue features.
 */
typedef enum {
  /**
   * Queue supports kernel dispatch packets.
   */
  HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1,

  /**
   * Queue supports agent dispatch packets.
   */
  HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2
} hsa_queue_feature_t;

/**
 * @brief User mode queue.
 *
 * @details The queue structure is read-only and allocated by the HSA runtime,
 * but agents can directly modify the contents of the buffer pointed by @a
 * base_address, or use HSA runtime APIs to access the doorbell signal.
 *
 */
typedef struct hsa_queue_s {
  /**
   * Queue type.
   */
  hsa_queue_type32_t type;

  /**
   * Queue features mask. This is a bit-field of ::hsa_queue_feature_t
   * values. Applications should ignore any unknown set bits.
   */
  uint32_t features;

#ifdef HSA_LARGE_MODEL
  void* base_address;
#elif defined HSA_LITTLE_ENDIAN
  /**
   * Starting address of the HSA runtime-allocated buffer used to store the AQL
   * packets. Must be aligned to the size of an AQL packet.
   */
  void* base_address;
  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved0;
#else
  uint32_t reserved0;
  void* base_address;
#endif

  /**
   * Signal object used by the application to indicate the ID of a packet that
   * is ready to be processed. The HSA runtime manages the doorbell signal. If
   * the application tries to replace or destroy this signal, the behavior is
   * undefined.
   *
   * If @a type is ::HSA_QUEUE_TYPE_SINGLE, the doorbell signal value must be
   * updated in a monotonically increasing fashion. If @a type is
   * ::HSA_QUEUE_TYPE_MULTI, the doorbell signal value can be updated with any
   * value.
   */
  hsa_signal_t doorbell_signal;

  /**
   * Maximum number of packets the queue can hold. Must be a power of 2.
   */
  uint32_t size;
  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved1;
  /**
   * Queue identifier, which is unique over the lifetime of the application.
   */
  uint64_t id;

} hsa_queue_t;

/**
 * @brief Create a user mode queue.
 *
 * @details The HSA runtime creates the queue structure, the underlying packet
 * buffer, the completion signal, and the write and read indexes. The initial
 * value of the write and read indexes is 0. The type of every packet in the
 * buffer is initialized to ::HSA_PACKET_TYPE_INVALID.
 *
 * The application should only rely on the error code returned to determine if
 * the queue is valid.
 *
 * @param[in] agent Agent where to create the queue.
 *
 * @param[in] size Number of packets the queue is expected to
 * hold. Must be a power of 2 between 1 and the value of
 * ::HSA_AGENT_INFO_QUEUE_MAX_SIZE in @p agent. The size of the newly
 * created queue is the maximum of @p size and the value of
 * ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent.
 *
 * @param[in] type Type of the queue, a bitwise OR of hsa_queue_type_t values.
 * If the value of ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE,
 * then @p type must also be ::HSA_QUEUE_TYPE_SINGLE.
 *
 * @param[in] callback Callback invoked by the HSA runtime for every
 * asynchronous event related to the newly created queue. May be NULL. The HSA
 * runtime passes three arguments to the callback: a code identifying the event
 * that triggered the invocation, a pointer to the queue where the event
 * originated, and the application data.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @param[in] private_segment_size Hint indicating the maximum
 * expected private segment usage per work-item, in bytes. There may
 * be performance degradation if the application places a kernel
 * dispatch packet in the queue and the corresponding private segment
 * usage exceeds @p private_segment_size. If the application does not
 * want to specify any particular value for this argument, @p
 * private_segment_size must be UINT32_MAX. If the queue does not
 * support kernel dispatch packets, this argument is ignored.
 *
 * @param[in] group_segment_size Hint indicating the maximum expected
 * group segment usage per work-group, in bytes. There may be
 * performance degradation if the application places a kernel dispatch
 * packet in the queue and the corresponding group segment usage
 * exceeds @p group_segment_size. If the application does not want to
 * specify any particular value for this argument, @p
 * group_segment_size must be UINT32_MAX. If the queue does not
 * support kernel dispatch packets, this argument is ignored.
 *
 * @param[out] queue Memory location where the HSA runtime stores a pointer to
 * the newly created queue.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE_CREATION @p agent does not
 * support queues of the given type.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two,
 * @p size is 0, @p type is an invalid queue type, or @p queue is NULL.
 *
 */
hsa_status_t HSA_API hsa_queue_create(
    hsa_agent_t agent,
    uint32_t size,
    hsa_queue_type32_t type,
    void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
    void *data,
    uint32_t private_segment_size,
    uint32_t group_segment_size,
    hsa_queue_t **queue);

/**
 * @brief Create a queue for which the application or a kernel is responsible
 * for processing the AQL packets.
 *
 * @details The application can use this function to create queues where AQL
 * packets are not parsed by the packet processor associated with an agent,
 * but rather by a unit of execution running on that agent (for example, a
 * thread in the host application).
 *
 * The application is responsible for ensuring that all the producers and
 * consumers of the resulting queue can access the provided doorbell signal
 * and memory region. The application is also responsible for ensuring that the
 * unit of execution processing the queue packets supports the indicated
 * features (AQL packet types).
 *
 * When the queue is created, the HSA runtime allocates the packet buffer using
 * @p region, and the write and read indexes. The initial value of the write and
 * read indexes is 0, and the type of every packet in the buffer is initialized
 * to ::HSA_PACKET_TYPE_INVALID. The value of the @e size, @e type, @e features,
 * and @e doorbell_signal fields in the returned queue match the values passed
 * by the application.
 *
 * @param[in] region Memory region that the HSA runtime should use to allocate
 * the AQL packet buffer and any other queue metadata.
 *
 * @param[in] size Number of packets the queue is expected to hold. Must be a
 * power of 2 greater than 0.
 *
 * @param[in] type Queue type.
 *
 * @param[in] features Supported queue features. This is a bit-field of
 * ::hsa_queue_feature_t values.
 *
 * @param[in] doorbell_signal Doorbell signal that the HSA runtime must
 * associate with the returned queue. The signal handle must not be 0.
 *
 * @param[out] queue Memory location where the HSA runtime stores a pointer to
 * the newly created queue. The application should not rely on the value
 * returned for this argument but only in the status code to determine if the
 * queue is valid. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, @p
 * size is 0, @p type is an invalid queue type, the doorbell signal handle is
 * 0, or @p queue is NULL.
 *
 */
hsa_status_t HSA_API hsa_soft_queue_create(
    hsa_region_t region,
    uint32_t size,
    hsa_queue_type32_t type,
    uint32_t features,
    hsa_signal_t doorbell_signal,
    hsa_queue_t **queue);

/**
 * @brief Destroy a user mode queue.
 *
 * @details When a queue is destroyed, the state of the AQL packets that have
 * not been yet fully processed (their completion phase has not finished)
 * becomes undefined. It is the responsibility of the application to ensure that
 * all pending queue operations are finished if their results are required.
 *
 * The resources allocated by the HSA runtime during queue creation (queue
 * structure, ring buffer, doorbell signal) are released.  The queue should not
 * be accessed after being destroyed.
 *
 * @param[in] queue Pointer to a queue created using ::hsa_queue_create.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
 */
hsa_status_t HSA_API hsa_queue_destroy(
    hsa_queue_t *queue);

/**
 * @brief Inactivate a queue.
 *
 * @details Inactivating the queue aborts any pending executions and prevent any
 * new packets from being processed. Any more packets written to the queue once
 * it is inactivated will be ignored by the packet processor.
 *
 * @param[in] queue Pointer to a queue.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
 */
hsa_status_t HSA_API hsa_queue_inactivate(
    hsa_queue_t *queue);

/**
 * @deprecated Renamed as ::hsa_queue_load_read_index_scacquire.
 *
 * @copydoc hsa_queue_load_read_index_scacquire
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_read_index_acquire(
    const hsa_queue_t *queue);

/**
 * @brief Atomically load the read index of a queue.
 *
 * @param[in] queue Pointer to a queue.
 *
 * @return Read index of the queue pointed by @p queue.
 */
uint64_t HSA_API hsa_queue_load_read_index_scacquire(
    const hsa_queue_t *queue);

/**
 * @copydoc hsa_queue_load_read_index_scacquire
 */
uint64_t HSA_API hsa_queue_load_read_index_relaxed(
    const hsa_queue_t *queue);

/**
 * @deprecated Renamed as ::hsa_queue_load_write_index_scacquire.
 *
 * @copydoc hsa_queue_load_write_index_scacquire
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_load_write_index_acquire(
    const hsa_queue_t *queue);

/**
 * @brief Atomically load the write index of a queue.
 *
 * @param[in] queue Pointer to a queue.
 *
 * @return Write index of the queue pointed by @p queue.
 */
uint64_t HSA_API hsa_queue_load_write_index_scacquire(
    const hsa_queue_t *queue);

/**
 * @copydoc hsa_queue_load_write_index_scacquire
 */
uint64_t HSA_API hsa_queue_load_write_index_relaxed(
    const hsa_queue_t *queue);

/**
 * @brief Atomically set the write index of a queue.
 *
 * @details It is recommended that the application uses this function to update
 * the write index when there is a single agent submitting work to the queue
 * (the queue type is ::HSA_QUEUE_TYPE_SINGLE).
 *
 * @param[in] queue Pointer to a queue.
 *
 * @param[in] value Value to assign to the write index.
 *
 */
void HSA_API hsa_queue_store_write_index_relaxed(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_store_write_index_screlease.
 *
 * @copydoc hsa_queue_store_write_index_screlease
 */
void HSA_API HSA_DEPRECATED hsa_queue_store_write_index_release(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @copydoc hsa_queue_store_write_index_relaxed
 */
void HSA_API hsa_queue_store_write_index_screlease(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_cas_write_index_scacq_screl.
 *
 * @copydoc hsa_queue_cas_write_index_scacq_screl
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acq_rel(
    const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);

/**
 * @brief Atomically set the write index of a queue if the observed value is
 * equal to the expected value. The application can inspect the returned value
 * to determine if the replacement was done.
 *
 * @param[in] queue Pointer to a queue.
 *
 * @param[in] expected Expected value.
 *
 * @param[in] value Value to assign to the write index if @p expected matches
 * the observed write index. Must be greater than @p expected.
 *
 * @return Previous value of the write index.
 */
uint64_t HSA_API hsa_queue_cas_write_index_scacq_screl(
    const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_cas_write_index_scacquire.
 *
 * @copydoc hsa_queue_cas_write_index_scacquire
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_acquire(
    const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);

/**
 * @copydoc hsa_queue_cas_write_index_scacq_screl
 */
uint64_t HSA_API hsa_queue_cas_write_index_scacquire(
    const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);

/**
 * @copydoc hsa_queue_cas_write_index_scacq_screl
 */
uint64_t HSA_API hsa_queue_cas_write_index_relaxed(
    const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_cas_write_index_screlease.
 *
 * @copydoc hsa_queue_cas_write_index_screlease
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_cas_write_index_release(
    const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);

/**
 * @copydoc hsa_queue_cas_write_index_scacq_screl
 */
uint64_t HSA_API hsa_queue_cas_write_index_screlease(
    const hsa_queue_t *queue,
    uint64_t expected,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_add_write_index_scacq_screl.
 *
 * @copydoc hsa_queue_add_write_index_scacq_screl
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acq_rel(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @brief Atomically increment the write index of a queue by an offset.
 *
 * @param[in] queue Pointer to a queue.
 *
 * @param[in] value Value to add to the write index.
 *
 * @return Previous value of the write index.
 */
uint64_t HSA_API hsa_queue_add_write_index_scacq_screl(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_add_write_index_scacquire.
 *
 * @copydoc hsa_queue_add_write_index_scacquire
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_acquire(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @copydoc hsa_queue_add_write_index_scacq_screl
 */
uint64_t HSA_API hsa_queue_add_write_index_scacquire(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @copydoc hsa_queue_add_write_index_scacq_screl
 */
uint64_t HSA_API hsa_queue_add_write_index_relaxed(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_add_write_index_screlease.
 *
 * @copydoc hsa_queue_add_write_index_screlease
 */
uint64_t HSA_API HSA_DEPRECATED hsa_queue_add_write_index_release(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @copydoc hsa_queue_add_write_index_scacq_screl
 */
uint64_t HSA_API hsa_queue_add_write_index_screlease(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @brief Atomically set the read index of a queue.
 *
 * @details Modifications of the read index are not allowed and result in
 * undefined behavior if the queue is associated with an agent for which
 * only the corresponding packet processor is permitted to update the read
 * index.
 *
 * @param[in] queue Pointer to a queue.
 *
 * @param[in] value Value to assign to the read index.
 *
 */
void HSA_API hsa_queue_store_read_index_relaxed(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @deprecated Renamed as ::hsa_queue_store_read_index_screlease.
 *
 * @copydoc hsa_queue_store_read_index_screlease
 */
void HSA_API HSA_DEPRECATED hsa_queue_store_read_index_release(
    const hsa_queue_t *queue,
    uint64_t value);

/**
 * @copydoc hsa_queue_store_read_index_relaxed
 */
void HSA_API hsa_queue_store_read_index_screlease(
   const hsa_queue_t *queue,
   uint64_t value);
/** @} */


/** \defgroup aql Architected Queuing Language
 *  @{
 */

/**
 * @brief Packet type.
 */
typedef enum {
  /**
   * Vendor-specific packet.
   */
  HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0,
  /**
   * The packet has been processed in the past, but has not been reassigned to
   * the packet processor. A packet processor must not process a packet of this
   * type. All queues support this packet type.
   */
  HSA_PACKET_TYPE_INVALID = 1,
  /**
   * Packet used by agents for dispatching jobs to kernel agents. Not all
   * queues support packets of this type (see ::hsa_queue_feature_t).
   */
  HSA_PACKET_TYPE_KERNEL_DISPATCH = 2,
  /**
   * Packet used by agents to delay processing of subsequent packets, and to
   * express complex dependencies between multiple packets. All queues support
   * this packet type.
   */
  HSA_PACKET_TYPE_BARRIER_AND = 3,
  /**
   * Packet used by agents for dispatching jobs to agents.  Not all
   * queues support packets of this type (see ::hsa_queue_feature_t).
   */
  HSA_PACKET_TYPE_AGENT_DISPATCH = 4,
  /**
   * Packet used by agents to delay processing of subsequent packets, and to
   * express complex dependencies between multiple packets. All queues support
   * this packet type.
   */
  HSA_PACKET_TYPE_BARRIER_OR = 5
} hsa_packet_type_t;

/**
 * @brief Scope of the memory fence operation associated with a packet.
 */
typedef enum {
  /**
   * No scope (no fence is applied). The packet relies on external fences to
   * ensure visibility of memory updates.
   */
  HSA_FENCE_SCOPE_NONE = 0,
  /**
   * The fence is applied with agent scope for the global segment.
   */
  HSA_FENCE_SCOPE_AGENT = 1,
  /**
   * The fence is applied across both agent and system scope for the global
   * segment.
   */
  HSA_FENCE_SCOPE_SYSTEM = 2
} hsa_fence_scope_t;

/**
 * @brief Sub-fields of the @a header field that is present in any AQL
 * packet. The offset (with respect to the address of @a header) of a sub-field
 * is identical to its enumeration constant. The width of each sub-field is
 * determined by the corresponding value in ::hsa_packet_header_width_t. The
 * offset and the width are expressed in bits.
 */
 typedef enum {
  /**
   * Packet type. The value of this sub-field must be one of
   * ::hsa_packet_type_t. If the type is ::HSA_PACKET_TYPE_VENDOR_SPECIFIC, the
   * packet layout is vendor-specific.
   */
   HSA_PACKET_HEADER_TYPE = 0,
  /**
   * Barrier bit. If the barrier bit is set, the processing of the current
   * packet only launches when all preceding packets (within the same queue) are
   * complete.
   */
   HSA_PACKET_HEADER_BARRIER = 8,
  /**
   * Acquire fence scope. The value of this sub-field determines the scope and
   * type of the memory fence operation applied before the packet enters the
   * active phase. An acquire fence ensures that any subsequent global segment
   * or image loads by any unit of execution that belongs to a dispatch that has
   * not yet entered the active phase on any queue of the same kernel agent,
   * sees any data previously released at the scopes specified by the acquire
   * fence. The value of this sub-field must be one of ::hsa_fence_scope_t.
   */
   HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE = 9,
   /**
    * @deprecated Renamed as ::HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE.
    */
   HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9,
  /**
   * Release fence scope, The value of this sub-field determines the scope and
   * type of the memory fence operation applied after kernel completion but
   * before the packet is completed. A release fence makes any global segment or
   * image data that was stored by any unit of execution that belonged to a
   * dispatch that has completed the active phase on any queue of the same
   * kernel agent visible in all the scopes specified by the release fence. The
   * value of this sub-field must be one of ::hsa_fence_scope_t.
   */
   HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE = 11,
   /**
    * @deprecated Renamed as ::HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE.
    */
   HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11
 } hsa_packet_header_t;

/**
 * @brief Width (in bits) of the sub-fields in ::hsa_packet_header_t.
 */
 typedef enum {
   HSA_PACKET_HEADER_WIDTH_TYPE = 8,
   HSA_PACKET_HEADER_WIDTH_BARRIER = 1,
   HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE = 2,
   /**
    * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE.
    */
   HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2,
   HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE = 2,
   /**
    * @deprecated Use HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE.
    */
   HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2
 } hsa_packet_header_width_t;

/**
 * @brief Sub-fields of the kernel dispatch packet @a setup field. The offset
 * (with respect to the address of @a setup) of a sub-field is identical to its
 * enumeration constant. The width of each sub-field is determined by the
 * corresponding value in ::hsa_kernel_dispatch_packet_setup_width_t. The
 * offset and the width are expressed in bits.
 */
 typedef enum {
  /**
   * Number of dimensions of the grid. Valid values are 1, 2, or 3.
   *
   */
   HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0
 } hsa_kernel_dispatch_packet_setup_t;

/**
 * @brief Width (in bits) of the sub-fields in
 * ::hsa_kernel_dispatch_packet_setup_t.
 */
 typedef enum {
   HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2
 } hsa_kernel_dispatch_packet_setup_width_t;

/**
 * @brief AQL kernel dispatch packet
 */
typedef struct hsa_kernel_dispatch_packet_s {
  union {
    struct {
        /**
         * Packet header. Used to configure multiple packet parameters such as the
         * packet type. The parameters are described by ::hsa_packet_header_t.
         */
        uint16_t header;

        /**
         * Dispatch setup parameters. Used to configure kernel dispatch parameters
         * such as the number of dimensions in the grid. The parameters are described
         * by ::hsa_kernel_dispatch_packet_setup_t.
         */
        uint16_t setup;
    };
    uint32_t full_header;
  };

  /**
   * X dimension of work-group, in work-items. Must be greater than 0.
   */
  uint16_t workgroup_size_x;

  /**
   * Y dimension of work-group, in work-items. Must be greater than
   * 0. If the grid has 1 dimension, the only valid value is 1.
   */
  uint16_t workgroup_size_y;

  /**
   * Z dimension of work-group, in work-items. Must be greater than
   * 0. If the grid has 1 or 2 dimensions, the only valid value is 1.
   */
  uint16_t workgroup_size_z;

  /**
   * Reserved. Must be 0.
   */
  uint16_t reserved0;

  /**
   * X dimension of grid, in work-items. Must be greater than 0. Must
   * not be smaller than @a workgroup_size_x.
   */
  uint32_t grid_size_x;

  /**
   * Y dimension of grid, in work-items. Must be greater than 0. If the grid has
   * 1 dimension, the only valid value is 1. Must not be smaller than @a
   * workgroup_size_y.
   */
  uint32_t grid_size_y;

  /**
   * Z dimension of grid, in work-items. Must be greater than 0. If the grid has
   * 1 or 2 dimensions, the only valid value is 1. Must not be smaller than @a
   * workgroup_size_z.
   */
  uint32_t grid_size_z;

  /**
   * Size in bytes of private memory allocation request (per work-item).
   */
  uint32_t private_segment_size;

  /**
   * Size in bytes of group memory allocation request (per work-group). Must not
   * be less than the sum of the group memory used by the kernel (and the
   * functions it calls directly or indirectly) and the dynamically allocated
   * group segment variables.
   */
  uint32_t group_segment_size;

  /**
   * Opaque handle to a code object that includes an implementation-defined
   * executable code for the kernel.
   */
  uint64_t kernel_object;

#ifdef HSA_LARGE_MODEL
  void* kernarg_address;
#elif defined HSA_LITTLE_ENDIAN
  /**
   * Pointer to a buffer containing the kernel arguments. May be NULL.
   *
   * The buffer must be allocated using ::hsa_memory_allocate, and must not be
   * modified once the kernel dispatch packet is enqueued until the dispatch has
   * completed execution.
   */
  void* kernarg_address;
  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved1;
#else
  uint32_t reserved1;
  void* kernarg_address;
#endif

  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved2;

  /**
   * Signal used to indicate completion of the job. The application can use the
   * special signal handle 0 to indicate that no signal is used.
   */
  hsa_signal_t completion_signal;

} hsa_kernel_dispatch_packet_t;

/**
 * @brief Agent dispatch packet.
 */
typedef struct hsa_agent_dispatch_packet_s {
  /**
   * Packet header. Used to configure multiple packet parameters such as the
   * packet type. The parameters are described by ::hsa_packet_header_t.
   */
  uint16_t header;

  /**
   * Application-defined function to be performed by the destination agent.
   */
  uint16_t type;

  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved0;

#ifdef HSA_LARGE_MODEL
  void* return_address;
#elif defined HSA_LITTLE_ENDIAN
  /**
   * Address where to store the function return values, if any.
   */
  void* return_address;
  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved1;
#else
  uint32_t reserved1;
  void* return_address;
#endif

  /**
   * Function arguments.
   */
  uint64_t arg[4];

  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved2;

  /**
   * Signal used to indicate completion of the job. The application can use the
   * special signal handle 0 to indicate that no signal is used.
   */
  hsa_signal_t completion_signal;

} hsa_agent_dispatch_packet_t;

/**
 * @brief Barrier-AND packet.
 */
typedef struct hsa_barrier_and_packet_s {
  /**
   * Packet header. Used to configure multiple packet parameters such as the
   * packet type. The parameters are described by ::hsa_packet_header_t.
   */
  uint16_t header;

  /**
   * Reserved. Must be 0.
   */
  uint16_t reserved0;

  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved1;

  /**
   * Array of dependent signal objects. Signals with a handle value of 0 are
   * allowed and are interpreted by the packet processor as satisfied
   * dependencies.
   */
  hsa_signal_t dep_signal[5];

  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved2;

  /**
   * Signal used to indicate completion of the job. The application can use the
   * special signal handle 0 to indicate that no signal is used.
   */
  hsa_signal_t completion_signal;

} hsa_barrier_and_packet_t;

/**
 * @brief Barrier-OR packet.
 */
typedef struct hsa_barrier_or_packet_s {
  /**
   * Packet header. Used to configure multiple packet parameters such as the
   * packet type. The parameters are described by ::hsa_packet_header_t.
   */
  uint16_t header;

  /**
   * Reserved. Must be 0.
   */
  uint16_t reserved0;

  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved1;

  /**
   * Array of dependent signal objects. Signals with a handle value of 0 are
   * allowed and are interpreted by the packet processor as dependencies not
   * satisfied.
   */
  hsa_signal_t dep_signal[5];

  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved2;

  /**
   * Signal used to indicate completion of the job. The application can use the
   * special signal handle 0 to indicate that no signal is used.
   */
  hsa_signal_t completion_signal;

} hsa_barrier_or_packet_t;

/** @} */

/** \addtogroup memory Memory
 *  @{
 */

/**
 * @brief Memory segments associated with a region.
 */
typedef enum {
  /**
   * Global segment. Used to hold data that is shared by all agents.
   */
  HSA_REGION_SEGMENT_GLOBAL = 0,
  /**
   * Read-only segment. Used to hold data that remains constant during the
   * execution of a kernel.
   */
  HSA_REGION_SEGMENT_READONLY = 1,
  /**
   * Private segment. Used to hold data that is local to a single work-item.
   */
  HSA_REGION_SEGMENT_PRIVATE = 2,
  /**
   * Group segment. Used to hold data that is shared by the work-items of a
   * work-group.
  */
  HSA_REGION_SEGMENT_GROUP = 3,
  /**
   * Kernarg segment. Used to store kernel arguments.
  */
  HSA_REGION_SEGMENT_KERNARG = 4
} hsa_region_segment_t;

/**
 * @brief Global region flags.
 */
typedef enum {
  /**
   * The application can use memory in the region to store kernel arguments, and
   * provide the values for the kernarg segment of a kernel dispatch. If this
   * flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set.
   */
  HSA_REGION_GLOBAL_FLAG_KERNARG = 1,
  /**
   * Updates to memory in this region are immediately visible to all the
   * agents under the terms of the HSA memory model. If this
   * flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set.
   */
  HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2,
  /**
   * Updates to memory in this region can be performed by a single agent at
   * a time. If a different agent in the system is allowed to access the
   * region, the application must explicitely invoke ::hsa_memory_assign_agent
   * in order to transfer ownership to that agent for a particular buffer.
   */
  HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4,

  /**
   * Updates to memory in this region have extended scope, where the device-scope atomics
   * to this memory type act as system-scope with respect to all variables located in
   * memory regions of this type.
   * Note: On non-compliant systems, the application may still be responsible for performing
   * device-specific actions necessary to achieve system-scope coherence.
   */
  HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED = 8
} hsa_region_global_flag_t;

/**
 * @brief Attributes of a memory region.
 */

#ifdef __cplusplus
typedef enum : int {
#else
typedef enum {
#endif
  /**
   * Segment where memory in the region can be used. The type of this
   * attribute is ::hsa_region_segment_t.
   */
  HSA_REGION_INFO_SEGMENT = 0,
  /**
   * Flag mask. The value of this attribute is undefined if the value of
   * ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of
   * this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t
   * values.
   */
  HSA_REGION_INFO_GLOBAL_FLAGS = 1,
  /**
   * Size of this region, in bytes. The type of this attribute is size_t.
   */
  HSA_REGION_INFO_SIZE = 2,
  /**
   * Maximum allocation size in this region, in bytes. Must not exceed the value
   * of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t.
   *
   * If the region is in the global or readonly segments, this is the maximum
   * size that the application can pass to ::hsa_memory_allocate.
   *
   * If the region is in the group segment, this is the maximum size (per
   * work-group) that can be requested for a given kernel dispatch. If the
   * region is in the private segment, this is the maximum size (per work-item)
   * that can be requested for a specific kernel dispatch, and must be at least
   * 256 bytes.
   */
  HSA_REGION_INFO_ALLOC_MAX_SIZE = 4,
  /**
   * Maximum size (per work-group) of private memory that can be requested for a
   * specific kernel dispatch. Must be at least 65536 bytes. The type of this
   * attribute is uint32_t. The value of this attribute is undefined if the
   * region is not in the private segment.
   */
  HSA_REGION_INFO_ALLOC_MAX_PRIVATE_WORKGROUP_SIZE = 8,
  /**
   * Indicates whether memory in this region can be allocated using
   * ::hsa_memory_allocate. The type of this attribute is bool.
   *
   * The value of this flag is always false for regions in the group and private
   * segments.
   */
  HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5,
  /**
   * Allocation granularity of buffers allocated by ::hsa_memory_allocate in
   * this region. The size of a buffer allocated in this region is a multiple of
   * the value of this attribute. The value of this attribute is only defined if
   * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type
   * of this attribute is size_t.
   */
  HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6,
  /**
   * Alignment of buffers allocated by ::hsa_memory_allocate in this region. The
   * value of this attribute is only defined if
   * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must be
   * a power of 2. The type of this attribute is size_t.
   */
  HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7
} hsa_region_info_t;

/**
 * @brief Get the current value of an attribute of a region.
 *
 * @param[in] region A valid region.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to a application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * region attribute, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_region_get_info(
    hsa_region_t region,
    hsa_region_info_t attribute,
    void* value);

/**
 * @brief Iterate over the memory regions associated with a given agent, and
 * invoke an application-defined callback on every iteration.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] callback Callback to be invoked once per region that is
 * accessible from the agent.  The HSA runtime passes two arguments to the
 * callback, the region and the application data.  If @p callback returns a
 * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
 * traversal stops and ::hsa_agent_iterate_regions returns that status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_agent_iterate_regions(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_region_t region, void* data),
    void* data);

/**
 * @brief Allocate a block of memory in a given region.
 *
 * @param[in] region Region where to allocate memory from. The region must have
 * the ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED flag set.
 *
 * @param[in] size Allocation size, in bytes. Must not be zero. This value is
 * rounded up to the nearest multiple of ::HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE
 * in @p region.
 *
 * @param[out] ptr Pointer to the location where to store the base address of
 * the allocated block. The returned base address is aligned to the value of
 * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT in @p region. If the allocation
 * fails, the returned value is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
 * allocate memory in @p region, or @p size is greater than the value of
 * HSA_REGION_INFO_ALLOC_MAX_SIZE in @p region.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0.
 */
hsa_status_t HSA_API hsa_memory_allocate(hsa_region_t region,
    size_t size,
    void** ptr);

/**
 * @brief Deallocate a block of memory previously allocated using
 * ::hsa_memory_allocate.
 *
 * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
 * previously returned by ::hsa_memory_allocate, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 */
hsa_status_t HSA_API hsa_memory_free(void* ptr);

/**
 * @brief Copy a block of memory from the location pointed to by @p src to the
 * memory block pointed to by @p dst.
 *
 * @param[out] dst Buffer where the content is to be copied. If @p dst is in
 * coarse-grained memory, the copied data is only visible to the agent currently
 * assigned (::hsa_memory_assign_agent) to @p dst.
 *
 * @param[in] src A valid pointer to the source of data to be copied. The source
 * buffer must not overlap with the destination buffer. If the source buffer is
 * in coarse-grained memory then it must be assigned to an agent, from which the
 * data will be retrieved.
 *
 * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
 * performed and the function returns success. Copying a number of bytes larger
 * than the size of the buffers pointed by @p dst or @p src results in undefined
 * behavior.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
 * pointers are NULL.
 */
hsa_status_t HSA_API hsa_memory_copy(
    void *dst,
    const void *src,
    size_t size);

/**
 * @brief Change the ownership of a global, coarse-grained buffer.
 *
 * @details The contents of a coarse-grained buffer are visible to an agent
 * only after ownership has been explicitely transferred to that agent. Once the
 * operation completes, the previous owner cannot longer access the data in the
 * buffer.
 *
 * An implementation of the HSA runtime is allowed, but not required, to change
 * the physical location of the buffer when ownership is transferred to a
 * different agent. In general the application must not assume this
 * behavior. The virtual location (address) of the passed buffer is never
 * modified.
 *
 * @param[in] ptr Base address of a global buffer. The pointer must match an
 * address previously returned by ::hsa_memory_allocate. The size of the buffer
 * affected by the ownership change is identical to the size of that previous
 * allocation. If @p ptr points to a fine-grained global buffer, no operation is
 * performed and the function returns success. If @p ptr does not point to
 * global memory, the behavior is undefined.
 *
 * @param[in] agent Agent that becomes the owner of the buffer. The
 * application is responsible for ensuring that @p agent has access to the
 * region that contains the buffer. It is allowed to change ownership to an
 * agent that is already the owner of the buffer, with the same or different
 * access permissions.
 *
 * @param[in] access Access permissions requested for the new owner.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p access is
 * not a valid access value.
 */
hsa_status_t HSA_API hsa_memory_assign_agent(
    void *ptr,
    hsa_agent_t agent,
    hsa_access_permission_t access);

/**
 *
 * @brief Register a global, fine-grained buffer.
 *
 * @details Registering a buffer serves as an indication to the HSA runtime that
 * the memory might be accessed from a kernel agent other than the
 * host. Registration is a performance hint that allows the HSA runtime
 * implementation to know which buffers will be accessed by some of the kernel
 * agents ahead of time.
 *
 * Registration is only recommended for buffers in the global segment that have
 * not been allocated using the HSA allocator (::hsa_memory_allocate), but an OS
 * allocator instead. Registering an OS-allocated buffer in the base profile is
 * equivalent to a no-op.
 *
 * Registrations should not overlap.
 *
 * @param[in] ptr A buffer in global, fine-grained memory. If a NULL pointer is
 * passed, no operation is performed. If the buffer has been allocated using
 * ::hsa_memory_allocate, or has already been registered, no operation is
 * performed.
 *
 * @param[in] size Requested registration size in bytes. A size of 0 is
 * only allowed if @p ptr is NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 but @p ptr
 * is not NULL.
 */
hsa_status_t HSA_API hsa_memory_register(
    void *ptr,
    size_t size);

/**
 *
 * @brief Deregister memory previously registered using ::hsa_memory_register.
 *
 * @details If the memory interval being deregistered does not match a previous
 * registration (start and end addresses), the behavior is undefined.
 *
 * @param[in] ptr A pointer to the base of the buffer to be deregistered. If
 * a NULL pointer is passed, no operation is performed.
 *
 * @param[in] size Size of the buffer to be deregistered.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 */
hsa_status_t HSA_API hsa_memory_deregister(
    void *ptr,
    size_t size);

/** @} */


/** \defgroup instruction-set-architecture Instruction Set Architecture.
 *  @{
 */

/**
 * @brief Instruction set architecture.
 */
typedef struct hsa_isa_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_isa_t;

/**
 * @brief Retrieve a reference to an instruction set architecture handle out of
 * a symbolic name.
 *
 * @param[in] name Vendor-specific name associated with a a particular
 * instruction set architecture. @p name must start with the vendor name and a
 * colon (for example, "AMD:"). The rest of the name is vendor-specific. Must be
 * a NUL-terminated string.
 *
 * @param[out] isa Memory location where the HSA runtime stores the ISA handle
 * corresponding to the given name. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA_NAME The given name does not
 * correspond to any instruction set architecture.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p name is NULL, or @p isa is
 * NULL.
 */
hsa_status_t HSA_API hsa_isa_from_name(
    const char *name,
    hsa_isa_t *isa);

/**
 * @brief Iterate over the instruction sets supported by the given agent, and
 * invoke an application-defined callback on every iteration. The iterator is
 * deterministic: if an agent supports several instruction set architectures,
 * they are traversed in the same order in every invocation of this function.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] callback Callback to be invoked once per instruction set
 * architecture.  The HSA runtime passes two arguments to the callback: the
 * ISA and the application data.  If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * that status value is returned.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_agent_iterate_isas(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_isa_t isa, void *data),
    void *data);

/**
 * @brief Instruction set architecture attributes.
 */
typedef enum {
  /**
   * The length of the ISA name in bytes, not including the NUL terminator. The
   * type of this attribute is uint32_t.
   */
  HSA_ISA_INFO_NAME_LENGTH = 0,
  /**
   * Human-readable description.  The type of this attribute is character array
   * with the length equal to the value of ::HSA_ISA_INFO_NAME_LENGTH attribute.
   */
  HSA_ISA_INFO_NAME = 1,
  /**
   * @deprecated
   *
   * Number of call conventions supported by the instruction set architecture.
   * Must be greater than zero. The type of this attribute is uint32_t.
   */
  HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2,
  /**
   * @deprecated
   *
   * Number of work-items in a wavefront for a given call convention. Must be a
   * power of 2 in the range [1,256]. The type of this attribute is uint32_t.
   */
  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3,
  /**
   * @deprecated
   *
   * Number of wavefronts per compute unit for a given call convention. In
   * practice, other factors (for example, the amount of group memory used by a
   * work-group) may further limit the number of wavefronts per compute
   * unit. The type of this attribute is uint32_t.
   */
  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4,
  /**
   * Machine models supported by the instruction set architecture. The type of
   * this attribute is a bool[2]. If the ISA supports the small machine model,
   * the element at index ::HSA_MACHINE_MODEL_SMALL is true. If the ISA supports
   * the large model, the element at index ::HSA_MACHINE_MODEL_LARGE is true.
   */
  HSA_ISA_INFO_MACHINE_MODELS = 5,
  /**
   * Profiles supported by the instruction set architecture. The type of this
   * attribute is a bool[2]. If the ISA supports the base profile, the element
   * at index ::HSA_PROFILE_BASE is true. If the ISA supports the full profile,
   * the element at index ::HSA_PROFILE_FULL is true.
   */
  HSA_ISA_INFO_PROFILES = 6,
  /**
   * Default floating-point rounding modes supported by the instruction set
   * architecture. The type of this attribute is a bool[3]. The value at a given
   * index is true if the corresponding rounding mode in
   * ::hsa_default_float_rounding_mode_t is supported. At least one default mode
   * has to be supported.
   *
   * If the default mode is supported, then
   * ::HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES must report that
   * both the zero and the near roundings modes are supported.
   */
  HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES = 7,
  /**
   * Default floating-point rounding modes supported by the instruction set
   * architecture in the Base profile. The type of this attribute is a
   * bool[3]. The value at a given index is true if the corresponding rounding
   * mode in ::hsa_default_float_rounding_mode_t is supported. The value at
   * index HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT must be false.  At least one
   * of the values at indexes ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO or
   * HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR must be true.
   */
  HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 8,
  /**
   * Flag indicating that the f16 HSAIL operation is at least as fast as the
   * f32 operation in the instruction set architecture. The type of this
   * attribute is bool.
   */
  HSA_ISA_INFO_FAST_F16_OPERATION = 9,
  /**
   * Maximum number of work-items of each dimension of a work-group.  Each
   * maximum must be greater than 0. No maximum can exceed the value of
   * ::HSA_ISA_INFO_WORKGROUP_MAX_SIZE. The type of this attribute is
   * uint16_t[3].
   */
  HSA_ISA_INFO_WORKGROUP_MAX_DIM = 12,
  /**
   * Maximum total number of work-items in a work-group. The type
   * of this attribute is uint32_t.
   */
  HSA_ISA_INFO_WORKGROUP_MAX_SIZE = 13,
  /**
   * Maximum number of work-items of each dimension of a grid. Each maximum must
   * be greater than 0, and must not be smaller than the corresponding value in
   * ::HSA_ISA_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
   * ::HSA_ISA_INFO_GRID_MAX_SIZE. The type of this attribute is
   * ::hsa_dim3_t.
   */
  HSA_ISA_INFO_GRID_MAX_DIM = 14,
  /**
   * Maximum total number of work-items in a grid. The type of this
   * attribute is uint64_t.
   */
  HSA_ISA_INFO_GRID_MAX_SIZE = 16,
  /**
   * Maximum number of fbarriers per work-group. Must be at least 32. The
   * type of this attribute is uint32_t.
   */
  HSA_ISA_INFO_FBARRIER_MAX_SIZE = 17
} hsa_isa_info_t;

/**
 * @deprecated The concept of call convention has been deprecated. If the
 * application wants to query the value of an attribute for a given instruction
 * set architecture, use ::hsa_isa_get_info_alt instead. If the application
 * wants to query an attribute that is specific to a given combination of ISA
 * and wavefront, use ::hsa_wavefront_get_info.
 *
 * @brief Get the current value of an attribute for a given instruction set
 * architecture (ISA).
 *
 * @param[in] isa A valid instruction set architecture.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[in] index Call convention index. Used only for call convention
 * attributes, otherwise ignored. Must have a value between 0 (inclusive) and
 * the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT (not
 * inclusive) in @p isa.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_INDEX The index is out of range.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * instruction set architecture attribute, or @p value is
 * NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_get_info(
    hsa_isa_t isa,
    hsa_isa_info_t attribute,
    uint32_t index,
    void *value);

/**
 * @brief Get the current value of an attribute for a given instruction set
 * architecture (ISA).
 *
 * @param[in] isa A valid instruction set architecture.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * instruction set architecture attribute, or @p value is
 * NULL.
 */
hsa_status_t HSA_API hsa_isa_get_info_alt(
    hsa_isa_t isa,
    hsa_isa_info_t attribute,
    void *value);

/**
 * @brief Retrieve the exception policy support for a given combination of
 * instruction set architecture and profile.
 *
 * @param[in] isa A valid instruction set architecture.
 *
 * @param[in] profile Profile.
 *
 * @param[out] mask Pointer to a memory location where the HSA runtime stores a
 * mask of ::hsa_exception_policy_t values. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
 * profile, or @p mask is NULL.
 */
hsa_status_t HSA_API hsa_isa_get_exception_policies(
    hsa_isa_t isa,
    hsa_profile_t profile,
    uint16_t *mask);

/**
 * @brief Floating-point types.
 */
typedef enum {
  /**
   * 16-bit floating-point type.
   */
  HSA_FP_TYPE_16 = 1,
  /**
   * 32-bit floating-point type.
   */
  HSA_FP_TYPE_32 = 2,
  /**
   * 64-bit floating-point type.
   */
  HSA_FP_TYPE_64 = 4
} hsa_fp_type_t;

/**
 * @brief Flush to zero modes.
 */
typedef enum {
  /**
   * Flush to zero.
   */
  HSA_FLUSH_MODE_FTZ = 1,
  /**
   * Do not flush to zero.
   */
  HSA_FLUSH_MODE_NON_FTZ = 2
} hsa_flush_mode_t;

/**
 * @brief Round methods.
 */
typedef enum {
  /**
   * Single round method.
   */
  HSA_ROUND_METHOD_SINGLE = 1,
  /**
   * Double round method.
   */
  HSA_ROUND_METHOD_DOUBLE = 2
} hsa_round_method_t;

/**
 * @brief Retrieve the round method (single or double) used to implement the
 * floating-point multiply add instruction (mad) for a given combination of
 * instruction set architecture, floating-point type, and flush to zero
 * modifier.
 *
 * @param[in] isa Instruction set architecture.
 *
 * @param[in] fp_type Floating-point type.
 *
 * @param[in] flush_mode Flush to zero modifier.
 *
 * @param[out] round_method Pointer to a memory location where the HSA
 * runtime stores the round method used by the implementation. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fp_type is not a valid
 * floating-point type, or @p flush_mode is not a valid flush to zero modifier,
 * or @p round_method is NULL.
 */
hsa_status_t HSA_API hsa_isa_get_round_method(
    hsa_isa_t isa,
    hsa_fp_type_t fp_type,
    hsa_flush_mode_t flush_mode,
    hsa_round_method_t *round_method);

/**
 * @brief Wavefront handle
 */
typedef struct hsa_wavefront_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_wavefront_t;

/**
 * @brief Wavefront attributes.
 */
typedef enum {
  /**
   * Number of work-items in the wavefront. Must be a power of 2 in the range
   * [1,256]. The type of this attribute is uint32_t.
   */
  HSA_WAVEFRONT_INFO_SIZE = 0
} hsa_wavefront_info_t;

/**
 * @brief Get the current value of a wavefront attribute.
 *
 * @param[in] wavefront A wavefront.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_WAVEFRONT The wavefront is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * wavefront attribute, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_wavefront_get_info(
    hsa_wavefront_t wavefront,
    hsa_wavefront_info_t attribute,
    void *value);

/**
 * @brief Iterate over the different wavefronts supported by an instruction set
 * architecture, and invoke an application-defined callback on every iteration.
 *
 * @param[in] isa Instruction set architecture.
 *
 * @param[in] callback Callback to be invoked once per wavefront that is
 * supported by the agent. The HSA runtime passes two arguments to the callback:
 * the wavefront handle and the application data.  If @p callback returns a
 * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
 * traversal stops and that value is returned.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_isa_iterate_wavefronts(
    hsa_isa_t isa,
    hsa_status_t (*callback)(hsa_wavefront_t wavefront, void *data),
    void *data);

/**
 * @deprecated Use ::hsa_agent_iterate_isas to query which instructions set
 * architectures are supported by a given agent.
 *
 * @brief Check if the instruction set architecture of a code object can be
 * executed on an agent associated with another architecture.
 *
 * @param[in] code_object_isa Instruction set architecture associated with a
 * code object.
 *
 * @param[in] agent_isa Instruction set architecture associated with an agent.
 *
 * @param[out] result Pointer to a memory location where the HSA runtime stores
 * the result of the check. If the two architectures are compatible, the result
 * is true; if they are incompatible, the result is false.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p code_object_isa or @p agent_isa are
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_isa_compatible(
    hsa_isa_t code_object_isa,
    hsa_isa_t agent_isa,
    bool *result);

/** @} */


/** \defgroup executable Executable
 *  @{
 */

/**
 * @brief Code object reader handle. A code object reader is used to
 * load a code object from file (when created using
 * ::hsa_code_object_reader_create_from_file), or from memory (if created using
 * ::hsa_code_object_reader_create_from_memory).
 */
typedef struct hsa_code_object_reader_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_code_object_reader_t;

/**
 * @brief Create a code object reader to operate on a file.
 *
 * @param[in] file File descriptor. The file must have been opened by
 * application with at least read permissions prior calling this function. The
 * file must contain a vendor-specific code object.
 *
 * The file is owned and managed by the application; the lifetime of the file
 * descriptor must exceed that of any associated code object reader.
 *
 * @param[out] code_object_reader Memory location to store the newly created
 * code object reader handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
 */
hsa_status_t HSA_API hsa_code_object_reader_create_from_file(
    hsa_file_t file,
    hsa_code_object_reader_t *code_object_reader);

/**
 * @brief Create a code object reader to operate on memory.
 *
 * @param[in] code_object Memory buffer that contains a vendor-specific code
 * object. The buffer is owned and managed by the application; the lifetime of
 * the buffer must exceed that of any associated code object reader.
 *
 * @param[in] size Size of the buffer pointed to by @p code_object. Must not be
 * 0.
 *
 * @param[out] code_object_reader Memory location to store newly created code
 * object reader handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object is NULL, @p size
 * is zero, or @p code_object_reader is NULL.
 */
hsa_status_t HSA_API hsa_code_object_reader_create_from_memory(
    const void *code_object,
    size_t size,
    hsa_code_object_reader_t *code_object_reader);

/**
 * @brief Destroy a code object reader.
 *
 * @details The code object reader handle becomes invalid after completion of
 * this function. Any file or memory used to create the code object read is not
 * closed, removed, or deallocated by this function.
 *
 * @param[in] code_object_reader Code object reader to destroy.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
 * is invalid.
 */
hsa_status_t HSA_API hsa_code_object_reader_destroy(
    hsa_code_object_reader_t code_object_reader);

/**
 * @brief Struct containing an opaque handle to an executable, which contains
 * ISA for finalized kernels and indirect functions together with the allocated
 * global or readonly segment variables they reference.
 */
typedef struct hsa_executable_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_executable_t;

/**
 * @brief Executable state.
 */
typedef enum {
  /**
   * Executable state, which allows the user to load code objects and define
   * external variables. Variable addresses, kernel code handles, and
   * indirect function code handles are not available in query operations until
   * the executable is frozen (zero always returned).
   */
  HSA_EXECUTABLE_STATE_UNFROZEN = 0,
  /**
   * Executable state, which allows the user to query variable addresses,
   * kernel code handles, and indirect function code handles using query
   * operations. Loading new code objects, as well as defining external
   * variables, is not allowed in this state.
   */
  HSA_EXECUTABLE_STATE_FROZEN = 1
} hsa_executable_state_t;

/**
 * @deprecated Use ::hsa_executable_create_alt instead, which allows the
 * application to specify the default floating-point rounding mode of the
 * executable and assumes an unfrozen initial state.
 *
 * @brief Create an empty executable.
 *
 * @param[in] profile Profile used in the executable.
 *
 * @param[in] executable_state Executable state. If the state is
 * ::HSA_EXECUTABLE_STATE_FROZEN, the resulting executable is useless because no
 * code objects can be loaded, and no variables can be defined.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @param[out] executable Memory location where the HSA runtime stores the newly
 * created executable handle.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
 * @p executable is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_create(
    hsa_profile_t profile,
    hsa_executable_state_t executable_state,
    const char *options,
    hsa_executable_t *executable);

/**
 * @brief Create an empty executable.
 *
 * @param[in] profile Profile used in the executable.
 *
 * @param[in] default_float_rounding_mode Default floating-point rounding mode
 * used in the executable. Allowed rounding modes are near and zero (default is
 * not allowed).
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @param[out] executable Memory location where the HSA runtime stores newly
 * created executable handle. The initial state of the executable is
 * ::HSA_EXECUTABLE_STATE_UNFROZEN.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
 * @p executable is NULL.
 */
hsa_status_t HSA_API hsa_executable_create_alt(
    hsa_profile_t profile,
    hsa_default_float_rounding_mode_t default_float_rounding_mode,
    const char *options,
    hsa_executable_t *executable);

/**
 * @brief Destroy an executable.
 *
 * @details An executable handle becomes invalid after the executable has been
 * destroyed. Code object handles that were loaded into this executable are
 * still valid after the executable has been destroyed, and can be used as
 * intended. Resources allocated outside and associated with this executable
 * (such as external global or readonly variables) can be released after the
 * executable has been destroyed.
 *
 * Executable should not be destroyed while kernels are in flight.
 *
 * @param[in] executable Executable.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 */
hsa_status_t HSA_API hsa_executable_destroy(
    hsa_executable_t executable);

/**
 * @brief Loaded code object handle.
 */
typedef struct hsa_loaded_code_object_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_loaded_code_object_t;

/**
 * @brief Load a program code object into an executable.
 *
 * @details A program code object contains information about resources that are
 * accessible by all kernel agents that run the executable, and can be loaded
 * at most once into an executable.
 *
 * If the program code object uses extensions, the implementation must support
 * them for this operation to return successfully.
 *
 * @param[in] executable Executable.
 *
 * @param[in] code_object_reader A code object reader that holds the program
 * code object to load. If a code object reader is destroyed before all the
 * associated executables are destroyed, the behavior is undefined.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @param[out] loaded_code_object Pointer to a memory location where the HSA
 * runtime stores the loaded code object handle. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
 * is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The program code object is
 * not compatible with the executable or the implementation (for example, the
 * code object uses an extension that is not supported by the implementation).
 */
hsa_status_t HSA_API hsa_executable_load_program_code_object(
    hsa_executable_t executable,
    hsa_code_object_reader_t code_object_reader,
    const char *options,
    hsa_loaded_code_object_t *loaded_code_object);

/**
 * @brief Load an agent code object into an executable.
 *
 * @details The agent code object contains all defined agent
 * allocation variables, functions, indirect functions, and kernels in a given
 * program for a given instruction set architecture.
 *
 * Any module linkage declaration must have been defined either by a define
 * variable or by loading a code object that has a symbol with module linkage
 * definition.
 *
 * The default floating-point rounding mode of the code object associated with
 * @p code_object_reader must match that of the executable
 * (::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE), or be default (in which
 * case the value of ::HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE is used).
 * If the agent code object uses extensions, the implementation and the agent
 * must support them for this operation to return successfully.
 *
 * @param[in] executable Executable.
 *
 * @param[in] agent Agent to load code object for. A code object can be loaded
 * into an executable at most once for a given agent. The instruction set
 * architecture of the code object must be supported by the agent.
 *
 * @param[in] code_object_reader A code object reader that holds the code object
 * to load. If a code object reader is destroyed before all the associated
 * executables are destroyed, the behavior is undefined.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @param[out] loaded_code_object Pointer to a memory location where the HSA
 * runtime stores the loaded code object handle. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE The executable is frozen.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER @p code_object_reader
 * is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS The code object read by @p
 * code_object_reader is not compatible with the agent (for example, the agent
 * does not support the instruction set architecture of the code object), the
 * executable (for example, there is a default floating-point mode mismatch
 * between the two), or the implementation.
 */
hsa_status_t HSA_API hsa_executable_load_agent_code_object(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_code_object_reader_t code_object_reader,
    const char *options,
    hsa_loaded_code_object_t *loaded_code_object);

/**
 * @brief Freeze the executable.
 *
 * @details No modifications to executable can be made after freezing: no code
 * objects can be loaded to the executable, and no external variables can be
 * defined. Freezing the executable does not prevent querying the executable's
 * attributes. The application must define all the external variables in an
 * executable before freezing it.
 *
 * @param[in] executable Executable.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_VARIABLE_UNDEFINED One or more variables are
 * undefined in the executable.
 *
 * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is already frozen.
 */
hsa_status_t HSA_API hsa_executable_freeze(
    hsa_executable_t executable,
    const char *options);

/**
 * @brief Executable attributes.
 */
typedef enum {
  /**
   * Profile this executable is created for. The type of this attribute is
   * ::hsa_profile_t.
   */
  HSA_EXECUTABLE_INFO_PROFILE = 1,
  /**
   * Executable state. The type of this attribute is ::hsa_executable_state_t.
   */
  HSA_EXECUTABLE_INFO_STATE = 2,
  /**
   * Default floating-point rounding mode specified when executable was created.
   * The type of this attribute is ::hsa_default_float_rounding_mode_t.
   */
  HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 3
} hsa_executable_info_t;

/**
 * @brief Get the current value of an attribute for a given executable.
 *
 * @param[in] executable Executable.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * executable attribute, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_executable_get_info(
    hsa_executable_t executable,
    hsa_executable_info_t attribute,
    void *value);

/**
 * @brief Define an external global variable with program allocation.
 *
 * @details This function allows the application to provide the definition
 * of a variable in the global segment memory with program allocation. The
 * variable must be defined before loading a code object into an executable.
 * In addition, code objects loaded must not define the variable.
 *
 * @param[in] executable Executable. Must not be in frozen state.
 *
 * @param[in] variable_name Name of the variable. The Programmer's Reference
 * Manual describes the standard name mangling scheme.
 *
 * @param[in] address Address where the variable is defined. This address must
 * be in global memory and can be read and written by any agent in the
 * system. The application cannot deallocate the buffer pointed by @p address
 * before @p executable is destroyed.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
 * already defined.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
 * @p variable_name.
 *
 * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
 */
hsa_status_t HSA_API hsa_executable_global_variable_define(
    hsa_executable_t executable,
    const char *variable_name,
    void *address);

/**
 * @brief Define an external global variable with agent allocation.
 *
 * @details This function allows the application to provide the definition
 * of a variable in the global segment memory with agent allocation. The
 * variable must be defined before loading a code object into an executable.
 * In addition, code objects loaded must not define the variable.
 *
 * @param[in] executable Executable. Must not be in frozen state.
 *
 * @param[in] agent Agent for which the variable is being defined.
 *
 * @param[in] variable_name Name of the variable. The Programmer's Reference
 * Manual describes the standard name mangling scheme.
 *
 * @param[in] address Address where the variable is defined. This address must
 * have been previously allocated using ::hsa_memory_allocate in a global region
 * that is only visible to @p agent. The application cannot deallocate the
 * buffer pointed by @p address before @p executable is destroyed.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
 * already defined.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
 * @p variable_name.
 *
 * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
 */
hsa_status_t HSA_API hsa_executable_agent_global_variable_define(
    hsa_executable_t executable,
    hsa_agent_t agent,
    const char *variable_name,
    void *address);

/**
 * @brief Define an external readonly variable.
 *
 * @details This function allows the application to provide the definition
 * of a variable in the readonly segment memory. The variable must be defined
 * before loading a code object into an executable. In addition, code objects
 * loaded must not define the variable.
 *
 * @param[in] executable Executable. Must not be in frozen state.
 *
 * @param[in] agent Agent for which the variable is being defined.
 *
 * @param[in] variable_name Name of the variable. The Programmer's Reference
 * Manual describes the standard name mangling scheme.
 *
 * @param[in] address Address where the variable is defined. This address must
 * have been previously allocated using ::hsa_memory_allocate in a readonly
 * region associated with @p agent. The application cannot deallocate the buffer
 * pointed by @p address before @p executable is destroyed.
 *
 * @param[in] address Address where the variable is defined. The buffer pointed
 * by @p address is owned by the application, and cannot be deallocated before
 * @p executable is destroyed.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
 * already defined.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
 * @p variable_name.
 *
 * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
 */
hsa_status_t HSA_API hsa_executable_readonly_variable_define(
    hsa_executable_t executable,
    hsa_agent_t agent,
    const char *variable_name,
    void *address);

/**
 * @brief Validate an executable. Checks that all code objects have matching
 * machine model, profile, and default floating-point rounding mode. Checks that
 * all declarations have definitions. Checks declaration-definition
 * compatibility (see the HSA Programming Reference Manual for compatibility
 * rules). Invoking this function is equivalent to invoking
 * ::hsa_executable_validate_alt with no options.
 *
 * @param[in] executable Executable. Must be in frozen state.
 *
 * @param[out] result Memory location where the HSA runtime stores the
 * validation result. If the executable passes validation, the result is 0.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
 */
hsa_status_t HSA_API hsa_executable_validate(
    hsa_executable_t executable,
    uint32_t *result);

/**
 * @brief Validate an executable. Checks that all code objects have matching
 * machine model, profile, and default floating-point rounding mode. Checks that
 * all declarations have definitions. Checks declaration-definition
 * compatibility (see the HSA Programming Reference Manual for compatibility
 * rules).
 *
 * @param[in] executable Executable. Must be in frozen state.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @param[out] result Memory location where the HSA runtime stores the
 * validation result. If the executable passes validation, the result is 0.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
 */
hsa_status_t HSA_API hsa_executable_validate_alt(
    hsa_executable_t executable,
    const char *options,
    uint32_t *result);

/**
 * @brief Executable symbol handle.
 *
 * The lifetime of an executable object symbol matches that of the executable
 * associated with it. An operation on a symbol whose associated executable has
 * been destroyed results in undefined behavior.
 */
typedef struct hsa_executable_symbol_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_executable_symbol_t;

/**
 * @deprecated Use ::hsa_executable_get_symbol_by_name instead.
 *
 * @brief Get the symbol handle for a given a symbol name.
 *
 * @param[in] executable Executable.
 *
 * @param[in] module_name Module name. Must be NULL if the symbol has
 * program linkage.
 *
 * @param[in] symbol_name Symbol name.
 *
 * @param[in] agent Agent associated with the symbol. If the symbol is
 * independent of any agent (for example, a variable with program
 * allocation), this argument is ignored.
 *
 * @param[in] call_convention Call convention associated with the symbol. If the
 * symbol does not correspond to an indirect function, this argument is ignored.
 *
 * @param[out] symbol Memory location where the HSA runtime stores the symbol
 * handle.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
 * that matches @p symbol_name.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
 * @p symbol is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_get_symbol(
    hsa_executable_t executable,
    const char *module_name,
    const char *symbol_name,
    hsa_agent_t agent,
    int32_t call_convention,
    hsa_executable_symbol_t *symbol);

/**
 * @brief Retrieve the symbol handle corresponding to a given a symbol name.
 *
 * @param[in] executable Executable.
 *
 * @param[in] symbol_name Symbol name. Must be a NUL-terminated character
 * array. The Programmer's Reference Manual describes the standard name mangling
 * scheme.
 *
 * @param[in] agent Pointer to the agent for which the symbol with the given
 * name is defined. If the symbol corresponding to the given name has program
 * allocation, @p agent must be NULL.
 *
 * @param[out] symbol Memory location where the HSA runtime stores the symbol
 * handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
 * that matches @p symbol_name.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or @p
 * symbol is NULL.
 */
hsa_status_t HSA_API hsa_executable_get_symbol_by_name(
    hsa_executable_t executable,
    const char *symbol_name,
    const hsa_agent_t *agent,
    hsa_executable_symbol_t *symbol);

/**
 * @brief Symbol type.
 */
typedef enum {
  /**
   * Variable.
   */
  HSA_SYMBOL_KIND_VARIABLE = 0,
  /**
   * Kernel.
   */
  HSA_SYMBOL_KIND_KERNEL = 1,
  /**
   * Indirect function.
   */
  HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2
} hsa_symbol_kind_t;

/**
 * @brief Linkage type of a symbol.
 */
typedef enum {
  /**
   * Module linkage.
   */
  HSA_SYMBOL_LINKAGE_MODULE = 0,
  /**
   * Program linkage.
   */
  HSA_SYMBOL_LINKAGE_PROGRAM = 1
} hsa_symbol_linkage_t;

/**
 * @brief Allocation type of a variable.
 */
typedef enum {
  /**
   * Agent allocation.
   */
  HSA_VARIABLE_ALLOCATION_AGENT = 0,
  /**
   * Program allocation.
   */
  HSA_VARIABLE_ALLOCATION_PROGRAM = 1
} hsa_variable_allocation_t;

/**
 * @brief Memory segment associated with a variable.
 */
typedef enum {
  /**
   * Global memory segment.
   */
  HSA_VARIABLE_SEGMENT_GLOBAL = 0,
  /**
   * Readonly memory segment.
   */
  HSA_VARIABLE_SEGMENT_READONLY = 1
} hsa_variable_segment_t;

/**
 * @brief Executable symbol attributes.
 */
typedef enum {
  /**
   * The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0,
  /**
   * The length of the symbol name in bytes, not including the NUL terminator.
   * The type of this attribute is uint32_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1,
  /**
   * The name of the symbol. The type of this attribute is character array with
   * the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH
   * attribute.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2,
  /**
   * @deprecated
   *
   * The length of the module name in bytes (not including the NUL terminator)
   * to which this symbol belongs if this symbol has module linkage, otherwise 0
   * is returned. The type of this attribute is uint32_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
  /**
   * @deprecated
   *
   * The module name to which this symbol belongs if this symbol has module
   * linkage, otherwise an empty string is returned. The type of this attribute
   * is character array with the length equal to the value of
   * ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4,
  /**
   * @deprecated
   *
   * Agent associated with this symbol. If the symbol is a variable, the
   * value of this attribute is only defined if
   * ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is
   * ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20,
  /**
   * The address of the variable. The value of this attribute is undefined if
   * the symbol is not a variable. The type of this attribute is uint64_t.
   *
   * If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is
   * returned.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21,
  /**
   * The linkage kind of the symbol. The type of this attribute is
   * ::hsa_symbol_linkage_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5,
  /**
   * Indicates whether the symbol corresponds to a definition. The type of this
   * attribute is bool.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17,
  /**
   * @deprecated
   *
   * The allocation kind of the variable. The value of this attribute is
   * undefined if the symbol is not a variable.  The type of this attribute is
   * ::hsa_variable_allocation_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
  /**
   * @deprecated
   *
   * The segment kind of the variable. The value of this attribute is undefined
   * if the symbol is not a variable. The type of this attribute is
   * ::hsa_variable_segment_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
  /**
   * @deprecated
   *
   * Alignment of the symbol in memory. The value of this attribute is undefined
   * if the symbol is not a variable. The type of this attribute is uint32_t.
   *
   * The current alignment of the variable in memory may be greater than the
   * value specified in the source program variable declaration.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
  /**
   * @deprecated
   *
   * Size of the variable. The value of this attribute is undefined if
   * the symbol is not a variable. The type of this attribute is uint32_t.
   *
   * A value of 0 is returned if the variable is an external variable and has an
   * unknown dimension.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9,
  /**
   * @deprecated
   *
   * Indicates whether the variable is constant. The value of this attribute is
   * undefined if the symbol is not a variable. The type of this attribute is
   * bool.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
  /**
   * Kernel object handle, used in the kernel dispatch packet. The value of this
   * attribute is undefined if the symbol is not a kernel. The type of this
   * attribute is uint64_t.
   *
   * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
   * is returned.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22,
  /**
   * Size of kernarg segment memory that is required to hold the values of the
   * kernel arguments, in bytes. Must be a multiple of 16. The value of this
   * attribute is undefined if the symbol is not a kernel. The type of this
   * attribute is uint32_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
  /**
   * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
   * which is the maximum of 16 and the maximum alignment of any of the kernel
   * arguments. The value of this attribute is undefined if the symbol is not a
   * kernel. The type of this attribute is uint32_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
  /**
   * Size of static group segment memory required by the kernel (per
   * work-group), in bytes. The value of this attribute is undefined
   * if the symbol is not a kernel. The type of this attribute is uint32_t.
   *
   * The reported amount does not include any dynamically allocated group
   * segment memory that may be requested by the application when a kernel is
   * dispatched.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
  /**
   * Size of static private, spill, and arg segment memory required by
   * this kernel (per work-item), in bytes. The value of this attribute is
   * undefined if the symbol is not a kernel. The type of this attribute is
   * uint32_t.
   *
   * If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is
   * true, the kernel may use more private memory than the reported value, and
   * the application must add the dynamic call stack usage to @a
   * private_segment_size when populating a kernel dispatch packet.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
  /**
   * Dynamic callstack flag. The value of this attribute is undefined if the
   * symbol is not a kernel. The type of this attribute is bool.
   *
   * If this flag is set (the value is true), the kernel uses a dynamically
   * sized call stack. This can happen if recursive calls, calls to indirect
   * functions, or the HSAIL alloca instruction are present in the kernel.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
  /**
   * @deprecated
   *
   * Call convention of the kernel. The value of this attribute is undefined if
   * the symbol is not a kernel. The type of this attribute is uint32_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18,
  /**
   * Indirect function object handle. The value of this attribute is undefined
   * if the symbol is not an indirect function, or the associated agent does
   * not support the Full Profile. The type of this attribute depends on the
   * machine model: the type is uint32_t for small machine model, and uint64_t
   * for large model.
   *
   * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
   * is returned.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23,
  /**
   * @deprecated
   *
   * Call convention of the indirect function. The value of this attribute is
   * undefined if the symbol is not an indirect function, or the associated
   * agent does not support the Full Profile. The type of this attribute is
   * uint32_t.
   */
  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
} hsa_executable_symbol_info_t;

/**
 * @brief Get the current value of an attribute for a given executable symbol.
 *
 * @param[in] executable_symbol Executable symbol.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE_SYMBOL The executable symbol is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * executable symbol attribute, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_executable_symbol_get_info(
    hsa_executable_symbol_t executable_symbol,
    hsa_executable_symbol_info_t attribute,
    void *value);

/**
 * @deprecated
 *
 * @brief Iterate over the symbols in a executable, and invoke an
 * application-defined callback on every iteration.
 *
 * @param[in] executable Executable.
 *
 * @param[in] callback Callback to be invoked once per executable symbol. The
 * HSA runtime passes three arguments to the callback: the executable, a symbol,
 * and the application data.  If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * ::hsa_executable_iterate_symbols returns that status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_iterate_symbols(
    hsa_executable_t executable,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data);

/**
 * @brief Iterate over the kernels, indirect functions, and agent allocation
 * variables in an executable for a given agent, and invoke an application-
 * defined callback on every iteration.
 *
 * @param[in] executable Executable.
 *
 * @param[in] agent Agent.
 *
 * @param[in] callback Callback to be invoked once per executable symbol. The
 * HSA runtime passes three arguments to the callback: the executable, a symbol,
 * and the application data.  If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * ::hsa_executable_iterate_symbols returns that status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_executable_iterate_agent_symbols(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_agent_t agent,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data);

/**
 * @brief Iterate over the program allocation variables in an executable, and
 * invoke an application-defined callback on every iteration.
 *
 * @param[in] executable Executable.
 *
 * @param[in] callback Callback to be invoked once per executable symbol. The
 * HSA runtime passes three arguments to the callback: the executable, a symbol,
 * and the application data.  If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * ::hsa_executable_iterate_symbols returns that status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_executable_iterate_program_symbols(
    hsa_executable_t executable,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data);

/** @} */


/** \defgroup code-object Code Objects (deprecated).
 *  @{
 */

/**
 * @deprecated
 *
 * @brief Struct containing an opaque handle to a code object, which contains
 * ISA for finalized kernels and indirect functions together with information
 * about the global or readonly segment variables they reference.
 */
typedef struct hsa_code_object_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_code_object_t;

/**
 * @deprecated
 *
 * @brief Application data handle that is passed to the serialization
 * and deserialization functions.
 */
typedef struct hsa_callback_data_s {
  /**
   * Opaque handle.
   */
  uint64_t handle;
} hsa_callback_data_t;

/**
 * @deprecated
 *
 * @brief Serialize a code object. Can be used for offline finalization,
 * install-time finalization, disk code caching, etc.
 *
 * @param[in] code_object Code object.
 *
 * @param[in] alloc_callback Callback function for memory allocation. Must not
 * be NULL. The HSA runtime passes three arguments to the callback: the
 * allocation size, the application data, and a pointer to a memory location
 * where the application stores the allocation result. The HSA runtime invokes
 * @p alloc_callback once to allocate a buffer that contains the serialized
 * version of @p code_object.  If the callback returns a status code other than
 * ::HSA_STATUS_SUCCESS, this function returns the same code.
 *
 * @param[in] callback_data Application data that is passed to @p
 * alloc_callback. May be NULL.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @param[out] serialized_code_object Memory location where the HSA runtime
 * stores a pointer to the serialized code object. Must not be NULL.
 *
 * @param[out] serialized_code_object_size Memory location where the HSA runtime
 * stores the size (in bytes) of @p serialized_code_object. The returned value
 * matches the allocation size passed by the HSA runtime to @p
 * alloc_callback. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p alloc_callback, @p
 * serialized_code_object, or @p serialized_code_object_size are NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_serialize(
    hsa_code_object_t code_object,
    hsa_status_t (*alloc_callback)(size_t size,
                                   hsa_callback_data_t data,
                                   void **address),
    hsa_callback_data_t callback_data,
    const char *options,
    void **serialized_code_object,
    size_t *serialized_code_object_size);

/**
 * @deprecated
 *
 * @brief Deserialize a code object.
 *
 * @param[in] serialized_code_object A serialized code object. Must not be NULL.
 *
 * @param[in] serialized_code_object_size The size (in bytes) of @p
 * serialized_code_object. Must not be 0.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @param[out] code_object Memory location where the HSA runtime stores the
 * deserialized code object.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p serialized_code_object, or @p
 * code_object are NULL, or @p serialized_code_object_size is 0.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_deserialize(
    void *serialized_code_object,
    size_t serialized_code_object_size,
    const char *options,
    hsa_code_object_t *code_object);

/**
 * @deprecated
 *
 * @brief Destroy a code object.
 *
 * @details The lifetime of a code object must exceed that of any executable
 * where it has been loaded. If an executable that loaded @p code_object has not
 * been destroyed, the behavior is undefined.
 *
 * @param[in] code_object Code object. The handle becomes invalid after it has
 * been destroyed.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_destroy(
    hsa_code_object_t code_object);

/**
 * @deprecated
 *
 * @brief Code object type.
 */
typedef enum {
  /**
   * Produces code object that contains ISA for all kernels and indirect
   * functions in HSA source.
   */
  HSA_CODE_OBJECT_TYPE_PROGRAM = 0
} hsa_code_object_type_t;

/**
 * @deprecated
 *
 * @brief Code object attributes.
 */
typedef enum {
  /**
   * The version of the code object. The type of this attribute is a
   * NUL-terminated char[64]. The name must be at most 63 characters long (not
   * including the NUL terminator) and all array elements not used for the name
   * must be NUL.
   */
  HSA_CODE_OBJECT_INFO_VERSION = 0,
  /**
   * Type of code object. The type of this attribute is
   * ::hsa_code_object_type_t.
   */
  HSA_CODE_OBJECT_INFO_TYPE = 1,
  /**
   * Instruction set architecture this code object is produced for. The type of
   * this attribute is ::hsa_isa_t.
   */
  HSA_CODE_OBJECT_INFO_ISA = 2,
  /**
   * Machine model this code object is produced for. The type of this attribute
   * is ::hsa_machine_model_t.
   */
  HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3,
  /**
   * Profile this code object is produced for. The type of this attribute is
   * ::hsa_profile_t.
   */
  HSA_CODE_OBJECT_INFO_PROFILE = 4,
  /**
   * Default floating-point rounding mode used when the code object is
   * produced. The type of this attribute is
   * ::hsa_default_float_rounding_mode_t.
   */
  HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5
} hsa_code_object_info_t;

/**
 * @deprecated
 *
 * @brief Get the current value of an attribute for a given code object.
 *
 * @param[in] code_object Code object.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * code object attribute, or @p value is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_info(
    hsa_code_object_t code_object,
    hsa_code_object_info_t attribute,
    void *value);

/**
 * @deprecated
 *
 * @brief Load code object into the executable.
 *
 * @details Every global or readonly variable that is external must be defined
 * before loading the code object. An internal global or readonly variable is
 * allocated once the code object, that is being loaded, references this
 * variable and this variable is not allocated.
 *
 * Any module linkage declaration must have been defined either by a define
 * variable or by loading a code object that has a symbol with module linkage
 * definition.
 *
 * @param[in] executable Executable.
 *
 * @param[in] agent Agent to load code object for. The agent must support the
 * default floating-point rounding mode used by @p code_object.
 *
 * @param[in] code_object Code object to load.  The lifetime of the code object
 * must exceed that of the executable: if @p code_object is destroyed before @p
 * executable, the behavior is undefined.
 *
 * @param[in] options Standard and vendor-specific options. Unknown options are
 * ignored. A standard option begins with the "-hsa_" prefix. Options beginning
 * with the "-hsa_ext_<extension_name>_" prefix are reserved for extensions. A
 * vendor-specific option begins with the "-<vendor_name>_" prefix. Must be a
 * NUL-terminated string. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p agent is not compatible
 * with @p code_object (for example, @p agent does not support the default
 * floating-point rounding mode specified by @p code_object), or @p code_object
 * is not compatible with @p executable (for example, @p code_object and @p
 * executable have different machine models or profiles).
 *
 * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_executable_load_code_object(
    hsa_executable_t executable,
    hsa_agent_t agent,
    hsa_code_object_t code_object,
    const char *options);

/**
 * @deprecated
 *
 * @brief Code object symbol handle.
 *
 * The lifetime of a code object symbol matches that of the code object
 * associated with it. An operation on a symbol whose associated code object has
 * been destroyed results in undefined behavior.
 */
typedef struct hsa_code_symbol_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_code_symbol_t;

/**
 * @deprecated
 *
 * @brief Get the symbol handle within a code object for a given a symbol name.
 *
 * @param[in] code_object Code object.
 *
 * @param[in] symbol_name Symbol name.
 *
 * @param[out] symbol Memory location where the HSA runtime stores the symbol
 * handle.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
 * that matches @p symbol_name.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
 * @p symbol is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol(
    hsa_code_object_t code_object,
    const char *symbol_name,
    hsa_code_symbol_t *symbol);

/**
 * @deprecated
 *
 * @brief Get the symbol handle within a code object for a given a symbol name.
 *
 * @param[in] code_object Code object.
 *
 * @param[in] module_name Module name. Must be NULL if the symbol has
 * program linkage.
 *
 * @param[in] symbol_name Symbol name.
 *
 * @param[out] symbol Memory location where the HSA runtime stores the symbol
 * handle.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
 * that matches @p symbol_name.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
 * @p symbol is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_get_symbol_from_name(
    hsa_code_object_t code_object,
    const char *module_name,
    const char *symbol_name,
    hsa_code_symbol_t *symbol);

/**
 * @deprecated
 *
 * @brief Code object symbol attributes.
 */
typedef enum {
  /**
   * The type of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
   */
  HSA_CODE_SYMBOL_INFO_TYPE = 0,
  /**
   * The length of the symbol name in bytes, not including the NUL terminator.
   * The type of this attribute is uint32_t.
   */
  HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1,
  /**
   * The name of the symbol. The type of this attribute is character array with
   * the length equal to the value of ::HSA_CODE_SYMBOL_INFO_NAME_LENGTH
   * attribute.
   */
  HSA_CODE_SYMBOL_INFO_NAME = 2,
  /**
   * The length of the module name in bytes (not including the NUL terminator)
   * to which this symbol belongs if this symbol has module linkage, otherwise 0
   * is returned. The type of this attribute is uint32_t.
   */
  HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
  /**
   * The module name to which this symbol belongs if this symbol has module
   * linkage, otherwise an empty string is returned. The type of this attribute
   * is character array with the length equal to the value of
   * ::HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
   */
  HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4,
  /**
   * The linkage kind of the symbol. The type of this attribute is
   * ::hsa_symbol_linkage_t.
   */
  HSA_CODE_SYMBOL_INFO_LINKAGE = 5,
  /**
   * Indicates whether the symbol corresponds to a definition. The type of this
   * attribute is bool.
   */
  HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17,
  /**
   * The allocation kind of the variable. The value of this attribute is
   * undefined if the symbol is not a variable. The type of this attribute is
   * ::hsa_variable_allocation_t.
   */
  HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
  /**
   * The segment kind of the variable. The value of this attribute is
   * undefined if the symbol is not a variable. The type of this attribute is
   * ::hsa_variable_segment_t.
   */
  HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
  /**
   * Alignment of the symbol in memory. The value of this attribute is undefined
   * if the symbol is not a variable. The type of this attribute is uint32_t.
   *
   * The current alignment of the variable in memory may be greater than the
   * value specified in the source program variable declaration.
   */
  HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
  /**
   * Size of the variable. The value of this attribute is undefined if the
   * symbol is not a variable. The type of this attribute is uint32_t.
   *
   * A size of 0 is returned if the variable is an external variable and has an
   * unknown dimension.
   */
  HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9,
  /**
   * Indicates whether the variable is constant. The value of this attribute is
   * undefined if the symbol is not a variable. The type of this attribute is
   * bool.
   */
  HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
  /**
   * Size of kernarg segment memory that is required to hold the values of the
   * kernel arguments, in bytes. Must be a multiple of 16. The value of this
   * attribute is undefined if the symbol is not a kernel. The type of this
   * attribute is uint32_t.
   */
  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
  /**
   * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
   * which is the maximum of 16 and the maximum alignment of any of the kernel
   * arguments. The value of this attribute is undefined if the symbol is not a
   * kernel. The type of this attribute is uint32_t.
   */
  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
  /**
   * Size of static group segment memory required by the kernel (per
   * work-group), in bytes. The value of this attribute is undefined
   * if the symbol is not a kernel. The type of this attribute is uint32_t.
   *
   * The reported amount does not include any dynamically allocated group
   * segment memory that may be requested by the application when a kernel is
   * dispatched.
   */
  HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
  /**
   * Size of static private, spill, and arg segment memory required by
   * this kernel (per work-item), in bytes. The value of this attribute is
   * undefined if the symbol is not a kernel. The type of this attribute is
   * uint32_t.
   *
   * If the value of ::HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is true,
   * the kernel may use more private memory than the reported value, and the
   * application must add the dynamic call stack usage to @a
   * private_segment_size when populating a kernel dispatch packet.
   */
  HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
  /**
   * Dynamic callstack flag. The value of this attribute is undefined if the
   * symbol is not a kernel. The type of this attribute is bool.
   *
   * If this flag is set (the value is true), the kernel uses a dynamically
   * sized call stack. This can happen if recursive calls, calls to indirect
   * functions, or the HSAIL alloca instruction are present in the kernel.
   */
  HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
  /**
   * Call convention of the kernel. The value of this attribute is undefined if
   * the symbol is not a kernel. The type of this attribute is uint32_t.
   */
  HSA_CODE_SYMBOL_INFO_KERNEL_CALL_CONVENTION = 18,
  /**
   * Call convention of the indirect function. The value of this attribute is
   * undefined if the symbol is not an indirect function. The type of this
   * attribute is uint32_t.
   */
  HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16,
  /**
   * Wavefront size used by the kernel. The value of this attribute is either
   * 32 or 64. The type of this attribute is uint32_t.
   */
  HSA_CODE_SYMBOL_INFO_KERNEL_WAVEFRONT_SIZE = 19
} hsa_code_symbol_info_t;

/**
 * @deprecated
 *
 * @brief Get the current value of an attribute for a given code symbol.
 *
 * @param[in] code_symbol Code symbol.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_SYMBOL The code symbol is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * code symbol attribute, or @p value is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_symbol_get_info(
    hsa_code_symbol_t code_symbol,
    hsa_code_symbol_info_t attribute,
    void *value);

/**
 * @deprecated
 *
 * @brief Iterate over the symbols in a code object, and invoke an
 * application-defined callback on every iteration.
 *
 * @param[in] code_object Code object.
 *
 * @param[in] callback Callback to be invoked once per code object symbol. The
 * HSA runtime passes three arguments to the callback: the code object, a
 * symbol, and the application data.  If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * ::hsa_code_object_iterate_symbols returns that status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API HSA_DEPRECATED hsa_code_object_iterate_symbols(
    hsa_code_object_t code_object,
    hsa_status_t (*callback)(hsa_code_object_t code_object,
                             hsa_code_symbol_t symbol,
                             void *data),
    void *data);

/** @} */

#ifdef __cplusplus
}  // end extern "C" block
#endif

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/inc/hsa_amd_tool.h
================================================
#ifndef HSA_RUNTIME_AMD_TOOL_EVENTS_H_
#define HSA_RUNTIME_AMD_TOOL_EVENTS_H_

// Insert license header

#include <stddef.h>
#include <stdint.h>
#include "hsa.h"


typedef enum {
  HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE = 0,
  HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE =
      (1 << 0),  // This scratch allocation is only valid for 1 dispatch.
  HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT =
      (1 << 1),  // Used alternate scratch instead of main scratch
} hsa_amd_event_scratch_alloc_flag_t;

typedef enum {
  HSA_AMD_TOOL_EVENT_MIN = 0,

  // Scratch memory tracking
  HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_START,
  HSA_AMD_TOOL_EVENT_SCRATCH_ALLOC_END,
  HSA_AMD_TOOL_EVENT_SCRATCH_FREE_START,
  HSA_AMD_TOOL_EVENT_SCRATCH_FREE_END,
  HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_START,
  HSA_AMD_TOOL_EVENT_SCRATCH_ASYNC_RECLAIM_END,

  // Add new events above ^
  HSA_AMD_TOOL_EVENT_MAX
} hsa_amd_tool_event_kind_t;

typedef struct {
  hsa_amd_tool_event_kind_t kind;
} hsa_amd_tool_event_none_t;

typedef struct {
  hsa_amd_tool_event_kind_t kind;
  const hsa_queue_t* queue;
  hsa_amd_event_scratch_alloc_flag_t flags;
  uint64_t dispatch_id;  // Dispatch ID of the AQL packet that needs more scratch memory
} hsa_amd_event_scratch_alloc_start_t;

typedef struct {
  hsa_amd_tool_event_kind_t kind;
  const hsa_queue_t* queue;
  hsa_amd_event_scratch_alloc_flag_t flags;
  uint64_t dispatch_id;  // Dispatch ID of the AQL packet that needs more scratch memory
  size_t size;           // Amount of scratch allocated - in bytes
  size_t num_slots;      // limit of number of waves
} hsa_amd_event_scratch_alloc_end_t;

typedef struct {
  hsa_amd_tool_event_kind_t kind;
  const hsa_queue_t* queue;
  hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_free_start_t;

typedef struct {
  hsa_amd_tool_event_kind_t kind;
  const hsa_queue_t* queue;
  hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_free_end_t;

typedef struct {
  hsa_amd_tool_event_kind_t kind;
  const hsa_queue_t* queue;
  hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_async_reclaim_start_t;

typedef struct {
  hsa_amd_tool_event_kind_t kind;
  const hsa_queue_t* queue;
  hsa_amd_event_scratch_alloc_flag_t flags;
} hsa_amd_event_scratch_async_reclaim_end_t;

typedef union {
  const hsa_amd_tool_event_none_t* none;
  const hsa_amd_event_scratch_alloc_start_t* scratch_alloc_start;
  const hsa_amd_event_scratch_alloc_end_t* scratch_alloc_end;
  const hsa_amd_event_scratch_free_start_t* scratch_free_start;
  const hsa_amd_event_scratch_free_end_t* scratch_free_end;
  const hsa_amd_event_scratch_async_reclaim_start_t* scratch_async_reclaim_start;
  const hsa_amd_event_scratch_async_reclaim_end_t* scratch_async_reclaim_end;
} hsa_amd_tool_event_t;

typedef hsa_status_t (*hsa_amd_tool_event)(hsa_amd_tool_event_t);


#endif

================================================
FILE: runtime/hsa-runtime/inc/hsa_api_trace.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_H
#define HSA_RUNTIME_INC_HSA_API_TRACE_H

#include "hsa.h"
#include "hsa_api_trace_version.h"
#ifdef AMD_INTERNAL_BUILD
#include "hsa_ext_image.h"
#include "hsa_ext_amd.h"
#include "hsa_ext_finalize.h"
#include "hsa_amd_tool.h"
#include "hsa_ven_amd_pc_sampling.h"
#else
#include "inc/hsa_ext_image.h"
#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ext_finalize.h"
#include "inc/hsa_amd_tool.h"
#include "inc/hsa_ven_amd_pc_sampling.h"
#endif

#include <string.h>
#include <assert.h>
#include <stddef.h>

// Table MAJOR_VERSION and STEP_VERSION defines have moved to hsa_api_trace_version.h

// Min function used to copy Api Tables
static inline uint32_t Min(const uint32_t a, const uint32_t b) {
  return (a > b) ? b : a;
}

// Declarations of APIs intended for use only by tools.

// An AQL packet that can be put in an intercept queue to cause a callback to
// be invoked when the packet is about to be submitted to the underlying
// hardware queue. These packets are not copied to the underlying hardware
// queue. These packets should come immediately before the regular AQL packet
// they relate to. This implies that packet rewriters should always keep these
// packets adjacent to the regular AQL packet that follows them.
const uint32_t AMD_AQL_FORMAT_INTERCEPT_MARKER = 0xFE;

struct amd_aql_intercept_marker_s;

// When an intercept queue is processing rewritten packets to put them on the
// underlying hardware queue, if it encounters a
// AMD_AQL_FORMAT_INTERCEPT_MARKER vendor AQL packet it will call the following
// handler. packet points to the packet, queue is the underlying hardware
// queue, and packet_id is the packet id of the next packet to be put on the
// underlying hardware queue. The intercept queue does not put these packets
// onto the underlying hardware queue.
typedef void (*amd_intercept_marker_handler)(const struct amd_aql_intercept_marker_s* packet,
                                             hsa_queue_t* queue, uint64_t packet_id);
// An AQL vendor packet used by the intercept queue to mark the following
// packet. The callback will be invoked to allow a tool to know where in the
// underlying hardware queue the following packet will be placed. user_data can
// be used to hold any data useful to the tool.
typedef struct amd_aql_intercept_marker_s {
  uint16_t header; // Must have a packet type of HSA_PACKET_TYPE_VENDOR_SPECIFIC.
  uint8_t format; // Must be AMD_AQL_FORMAT_INTERCEPT_MARKER.
  uint8_t reserved[5]; // Must be 0.
#ifdef HSA_LARGE_MODEL
  amd_intercept_marker_handler callback;
#elif defined HSA_LITTLE_ENDIAN
  amd_intercept_marker_handler callback;
  uint32_t reserved1; // Must be 0.
#else
  uint32_t reserved1; // Must be 0.
  amd_intercept_marker_handler callback;
#endif
  uint64_t user_data[6];
} amd_aql_intercept_marker_t;

typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* pkts, uint64_t pkt_count);
typedef void (*hsa_amd_queue_intercept_handler)(const void* pkts, uint64_t pkt_count,
                                                uint64_t user_pkt_index, void* data,
                                                hsa_amd_queue_intercept_packet_writer writer);
hsa_status_t hsa_amd_queue_intercept_register(hsa_queue_t* queue,
                                              hsa_amd_queue_intercept_handler callback,
                                              void* user_data);
hsa_status_t hsa_amd_queue_intercept_create(
    hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
    void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
    uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);

typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t* queue, hsa_agent_t agent,
                                               void* data);
hsa_status_t hsa_amd_runtime_queue_create_register(hsa_amd_runtime_queue_notifier callback,
                                                   void* user_data);

// Structure of Version used to identify an instance of Api table
// Must be the first member (offsetof == 0) of all API tables.
// This is the root of the table passing ABI.
struct ApiTableVersion {
  uint32_t major_id;
  uint32_t minor_id;
  uint32_t step_id;
  uint32_t reserved;
};

struct ToolsApiTable {
  ApiTableVersion version;

  hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_start_fn;
  hsa_amd_tool_event hsa_amd_tool_scratch_event_alloc_end_fn;
  hsa_amd_tool_event hsa_amd_tool_scratch_event_free_start_fn;
  hsa_amd_tool_event hsa_amd_tool_scratch_event_free_end_fn;
  hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_start_fn;
  hsa_amd_tool_event hsa_amd_tool_scratch_event_async_reclaim_end_fn;
};

// Table to export HSA Finalizer Extension Apis
struct FinalizerExtTable {
  ApiTableVersion version;
	decltype(hsa_ext_program_create)* hsa_ext_program_create_fn;
	decltype(hsa_ext_program_destroy)* hsa_ext_program_destroy_fn;
	decltype(hsa_ext_program_add_module)* hsa_ext_program_add_module_fn;
	decltype(hsa_ext_program_iterate_modules)* hsa_ext_program_iterate_modules_fn;
	decltype(hsa_ext_program_get_info)* hsa_ext_program_get_info_fn;
	decltype(hsa_ext_program_finalize)* hsa_ext_program_finalize_fn;
};

// Table to export HSA Image Extension Apis
struct ImageExtTable {
  ApiTableVersion version;
	decltype(hsa_ext_image_get_capability)* hsa_ext_image_get_capability_fn;
	decltype(hsa_ext_image_data_get_info)* hsa_ext_image_data_get_info_fn;
	decltype(hsa_ext_image_create)* hsa_ext_image_create_fn;
	decltype(hsa_ext_image_import)* hsa_ext_image_import_fn;
	decltype(hsa_ext_image_export)* hsa_ext_image_export_fn;
	decltype(hsa_ext_image_copy)* hsa_ext_image_copy_fn;
	decltype(hsa_ext_image_clear)* hsa_ext_image_clear_fn;
	decltype(hsa_ext_image_destroy)* hsa_ext_image_destroy_fn;
	decltype(hsa_ext_sampler_create)* hsa_ext_sampler_create_fn;
	decltype(hsa_ext_sampler_destroy)* hsa_ext_sampler_destroy_fn;
  decltype(hsa_ext_image_get_capability_with_layout)* hsa_ext_image_get_capability_with_layout_fn;
  decltype(hsa_ext_image_data_get_info_with_layout)* hsa_ext_image_data_get_info_with_layout_fn;
  decltype(hsa_ext_image_create_with_layout)* hsa_ext_image_create_with_layout_fn;
  decltype(hsa_ext_sampler_create_v2)* hsa_ext_sampler_create_v2_fn;

};

// Table to export HSA PC Sampling Extension Apis
struct PcSamplingExtTable {
  ApiTableVersion version;
  decltype(hsa_ven_amd_pcs_iterate_configuration)* hsa_ven_amd_pcs_iterate_configuration_fn;
  decltype(hsa_ven_amd_pcs_create)* hsa_ven_amd_pcs_create_fn;
  decltype(hsa_ven_amd_pcs_create_from_id)* hsa_ven_amd_pcs_create_from_id_fn;
  decltype(hsa_ven_amd_pcs_destroy)* hsa_ven_amd_pcs_destroy_fn;
  decltype(hsa_ven_amd_pcs_start)* hsa_ven_amd_pcs_start_fn;
  decltype(hsa_ven_amd_pcs_stop)* hsa_ven_amd_pcs_stop_fn;
  decltype(hsa_ven_amd_pcs_flush)* hsa_ven_amd_pcs_flush_fn;
};


// Table to export AMD Extension Apis
struct AmdExtTable {
  ApiTableVersion version;
	decltype(hsa_amd_coherency_get_type)* hsa_amd_coherency_get_type_fn;
	decltype(hsa_amd_coherency_set_type)* hsa_amd_coherency_set_type_fn;
  decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled_fn;
  decltype(hsa_amd_profiling_async_copy_enable) *hsa_amd_profiling_async_copy_enable_fn;
  decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time_fn;
  decltype(hsa_amd_profiling_get_async_copy_time) *hsa_amd_profiling_get_async_copy_time_fn;
  decltype(hsa_amd_profiling_convert_tick_to_system_domain)* hsa_amd_profiling_convert_tick_to_system_domain_fn;
  decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler_fn;
  decltype(hsa_amd_async_function)* hsa_amd_async_function_fn;
  decltype(hsa_amd_signal_wait_any)* hsa_amd_signal_wait_any_fn;
  decltype(hsa_amd_queue_cu_set_mask)* hsa_amd_queue_cu_set_mask_fn;
  decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info_fn;
  decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools_fn;
  decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate_fn;
  decltype(hsa_amd_memory_pool_free)* hsa_amd_memory_pool_free_fn;
  decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
  decltype(hsa_amd_memory_async_copy_on_engine)* hsa_amd_memory_async_copy_on_engine_fn;
  decltype(hsa_amd_memory_copy_engine_status)* hsa_amd_memory_copy_engine_status_fn;
  decltype(hsa_amd_agent_memory_pool_get_info)* hsa_amd_agent_memory_pool_get_info_fn;
  decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access_fn;
  decltype(hsa_amd_memory_pool_can_migrate)* hsa_amd_memory_pool_can_migrate_fn;
  decltype(hsa_amd_memory_migrate)* hsa_amd_memory_migrate_fn;
  decltype(hsa_amd_memory_lock)* hsa_amd_memory_lock_fn;
  decltype(hsa_amd_memory_unlock)* hsa_amd_memory_unlock_fn;
  decltype(hsa_amd_memory_fill)* hsa_amd_memory_fill_fn;
  decltype(hsa_amd_interop_map_buffer)* hsa_amd_interop_map_buffer_fn;
  decltype(hsa_amd_interop_unmap_buffer)* hsa_amd_interop_unmap_buffer_fn;
  decltype(hsa_amd_image_create)* hsa_amd_image_create_fn;
  decltype(hsa_amd_pointer_info)* hsa_amd_pointer_info_fn;
  decltype(hsa_amd_pointer_info_set_userdata)* hsa_amd_pointer_info_set_userdata_fn;
  decltype(hsa_amd_ipc_memory_create)* hsa_amd_ipc_memory_create_fn;
  decltype(hsa_amd_ipc_memory_attach)* hsa_amd_ipc_memory_attach_fn;
  decltype(hsa_amd_ipc_memory_detach)* hsa_amd_ipc_memory_detach_fn;
  decltype(hsa_amd_signal_create)* hsa_amd_signal_create_fn;
  decltype(hsa_amd_ipc_signal_create)* hsa_amd_ipc_signal_create_fn;
  decltype(hsa_amd_ipc_signal_attach)* hsa_amd_ipc_signal_attach_fn;
  decltype(hsa_amd_register_system_event_handler)* hsa_amd_register_system_event_handler_fn;
  decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
  decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
  decltype(hsa_amd_queue_set_priority)* hsa_amd_queue_set_priority_fn;
  decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
  decltype(hsa_amd_runtime_queue_create_register)* hsa_amd_runtime_queue_create_register_fn;
  decltype(hsa_amd_memory_lock_to_pool)* hsa_amd_memory_lock_to_pool_fn;
  decltype(hsa_amd_register_deallocation_callback)* hsa_amd_register_deallocation_callback_fn;
  decltype(hsa_amd_deregister_deallocation_callback)* hsa_amd_deregister_deallocation_callback_fn;
  decltype(hsa_amd_signal_value_pointer)* hsa_amd_signal_value_pointer_fn;
  decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
  decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
  decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
  decltype(hsa_amd_spm_acquire)* hsa_amd_spm_acquire_fn;
  decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn;
  decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn;
  decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn;
  decltype(hsa_amd_portable_export_dmabuf)* hsa_amd_portable_export_dmabuf_fn;
  decltype(hsa_amd_portable_close_dmabuf)* hsa_amd_portable_close_dmabuf_fn;
  decltype(hsa_amd_vmem_address_reserve)* hsa_amd_vmem_address_reserve_fn;
  decltype(hsa_amd_vmem_address_free)* hsa_amd_vmem_address_free_fn;
  decltype(hsa_amd_vmem_handle_create)* hsa_amd_vmem_handle_create_fn;
  decltype(hsa_amd_vmem_handle_release)* hsa_amd_vmem_handle_release_fn;
  decltype(hsa_amd_vmem_map)* hsa_amd_vmem_map_fn;
  decltype(hsa_amd_vmem_unmap)* hsa_amd_vmem_unmap_fn;
  decltype(hsa_amd_vmem_set_access)* hsa_amd_vmem_set_access_fn;
  decltype(hsa_amd_vmem_get_access)* hsa_amd_vmem_get_access_fn;
  decltype(hsa_amd_vmem_export_shareable_handle)* hsa_amd_vmem_export_shareable_handle_fn;
  decltype(hsa_amd_vmem_import_shareable_handle)* hsa_amd_vmem_import_shareable_handle_fn;
  decltype(hsa_amd_vmem_retain_alloc_handle)* hsa_amd_vmem_retain_alloc_handle_fn;
  decltype(hsa_amd_vmem_get_alloc_properties_from_handle)*
      hsa_amd_vmem_get_alloc_properties_from_handle_fn;
  decltype(hsa_amd_agent_set_async_scratch_limit)* hsa_amd_agent_set_async_scratch_limit_fn;
  decltype(hsa_amd_queue_get_info)* hsa_amd_queue_get_info_fn;
  decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
  decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
  decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
  decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
  decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn;
};

// Table to export HSA Core Runtime Apis
struct CoreApiTable {
  ApiTableVersion version;
  decltype(hsa_init)* hsa_init_fn;
  decltype(hsa_shut_down)* hsa_shut_down_fn;
  decltype(hsa_system_get_info)* hsa_system_get_info_fn;
  decltype(hsa_system_extension_supported)* hsa_system_extension_supported_fn;
  decltype(hsa_system_get_extension_table)* hsa_system_get_extension_table_fn;
  decltype(hsa_iterate_agents)* hsa_iterate_agents_fn;
  decltype(hsa_agent_get_info)* hsa_agent_get_info_fn;
  decltype(hsa_queue_create)* hsa_queue_create_fn;
  decltype(hsa_soft_queue_create)* hsa_soft_queue_create_fn;
  decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
  decltype(hsa_queue_inactivate)* hsa_queue_inactivate_fn;
  decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
  decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
  decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
  decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
  decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
  decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
  decltype(hsa_queue_cas_write_index_scacq_screl)* hsa_queue_cas_write_index_scacq_screl_fn;
  decltype(hsa_queue_cas_write_index_scacquire)* hsa_queue_cas_write_index_scacquire_fn;
  decltype(hsa_queue_cas_write_index_relaxed)* hsa_queue_cas_write_index_relaxed_fn;
  decltype(hsa_queue_cas_write_index_screlease)* hsa_queue_cas_write_index_screlease_fn;
  decltype(hsa_queue_add_write_index_scacq_screl)* hsa_queue_add_write_index_scacq_screl_fn;
  decltype(hsa_queue_add_write_index_scacquire)* hsa_queue_add_write_index_scacquire_fn;
  decltype(hsa_queue_add_write_index_relaxed)* hsa_queue_add_write_index_relaxed_fn;
  decltype(hsa_queue_add_write_index_screlease)* hsa_queue_add_write_index_screlease_fn;
  decltype(hsa_queue_store_read_index_relaxed)* hsa_queue_store_read_index_relaxed_fn;
  decltype(hsa_queue_store_read_index_screlease)* hsa_queue_store_read_index_screlease_fn;
  decltype(hsa_agent_iterate_regions)* hsa_agent_iterate_regions_fn;
  decltype(hsa_region_get_info)* hsa_region_get_info_fn;
  decltype(hsa_agent_get_exception_policies)* hsa_agent_get_exception_policies_fn;
  decltype(hsa_agent_extension_supported)* hsa_agent_extension_supported_fn;
  decltype(hsa_memory_register)* hsa_memory_register_fn;
  decltype(hsa_memory_deregister)* hsa_memory_deregister_fn;
  decltype(hsa_memory_allocate)* hsa_memory_allocate_fn;
  decltype(hsa_memory_free)* hsa_memory_free_fn;
  decltype(hsa_memory_copy)* hsa_memory_copy_fn;
  decltype(hsa_memory_assign_agent)* hsa_memory_assign_agent_fn;
  decltype(hsa_signal_create)* hsa_signal_create_fn;
  decltype(hsa_signal_destroy)* hsa_signal_destroy_fn;
  decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed_fn;
  decltype(hsa_signal_load_scacquire)* hsa_signal_load_scacquire_fn;
  decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
  decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease_fn;
  decltype(hsa_signal_wait_relaxed)* hsa_signal_wait_relaxed_fn;
  decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire_fn;
  decltype(hsa_signal_and_relaxed)* hsa_signal_and_relaxed_fn;
  decltype(hsa_signal_and_scacquire)* hsa_signal_and_scacquire_fn;
  decltype(hsa_signal_and_screlease)* hsa_signal_and_screlease_fn;
  decltype(hsa_signal_and_scacq_screl)* hsa_signal_and_scacq_screl_fn;
  decltype(hsa_signal_or_relaxed)* hsa_signal_or_relaxed_fn;
  decltype(hsa_signal_or_scacquire)* hsa_signal_or_scacquire_fn;
  decltype(hsa_signal_or_screlease)* hsa_signal_or_screlease_fn;
  decltype(hsa_signal_or_scacq_screl)* hsa_signal_or_scacq_screl_fn;
  decltype(hsa_signal_xor_relaxed)* hsa_signal_xor_relaxed_fn;
  decltype(hsa_signal_xor_scacquire)* hsa_signal_xor_scacquire_fn;
  decltype(hsa_signal_xor_screlease)* hsa_signal_xor_screlease_fn;
  decltype(hsa_signal_xor_scacq_screl)* hsa_signal_xor_scacq_screl_fn;
  decltype(hsa_signal_exchange_relaxed)* hsa_signal_exchange_relaxed_fn;
  decltype(hsa_signal_exchange_scacquire)* hsa_signal_exchange_scacquire_fn;
  decltype(hsa_signal_exchange_screlease)* hsa_signal_exchange_screlease_fn;
  decltype(hsa_signal_exchange_scacq_screl)* hsa_signal_exchange_scacq_screl_fn;
  decltype(hsa_signal_add_relaxed)* hsa_signal_add_relaxed_fn;
  decltype(hsa_signal_add_scacquire)* hsa_signal_add_scacquire_fn;
  decltype(hsa_signal_add_screlease)* hsa_signal_add_screlease_fn;
  decltype(hsa_signal_add_scacq_screl)* hsa_signal_add_scacq_screl_fn;
  decltype(hsa_signal_subtract_relaxed)* hsa_signal_subtract_relaxed_fn;
  decltype(hsa_signal_subtract_scacquire)* hsa_signal_subtract_scacquire_fn;
  decltype(hsa_signal_subtract_screlease)* hsa_signal_subtract_screlease_fn;
  decltype(hsa_signal_subtract_scacq_screl)* hsa_signal_subtract_scacq_screl_fn;
  decltype(hsa_signal_cas_relaxed)* hsa_signal_cas_relaxed_fn;
  decltype(hsa_signal_cas_scacquire)* hsa_signal_cas_scacquire_fn;
  decltype(hsa_signal_cas_screlease)* hsa_signal_cas_screlease_fn;
  decltype(hsa_signal_cas_scacq_screl)* hsa_signal_cas_scacq_screl_fn;

  //===--- Instruction Set Architecture -----------------------------------===//

  decltype(hsa_isa_from_name)* hsa_isa_from_name_fn;
  // Deprecated since v1.1.
  decltype(hsa_isa_get_info)* hsa_isa_get_info_fn;
  // Deprecated since v1.1.
  decltype(hsa_isa_compatible)* hsa_isa_compatible_fn;

  //===--- Code Objects (deprecated) --------------------------------------===//

  // Deprecated since v1.1.
  decltype(hsa_code_object_serialize)* hsa_code_object_serialize_fn;
  // Deprecated since v1.1.
  decltype(hsa_code_object_deserialize)* hsa_code_object_deserialize_fn;
  // Deprecated since v1.1.
  decltype(hsa_code_object_destroy)* hsa_code_object_destroy_fn;
  // Deprecated since v1.1.
  decltype(hsa_code_object_get_info)* hsa_code_object_get_info_fn;
  // Deprecated since v1.1.
  decltype(hsa_code_object_get_symbol)* hsa_code_object_get_symbol_fn;
  // Deprecated since v1.1.
  decltype(hsa_code_symbol_get_info)* hsa_code_symbol_get_info_fn;
  // Deprecated since v1.1.
  decltype(hsa_code_object_iterate_symbols)* hsa_code_object_iterate_symbols_fn;

  //===--- Executable -----------------------------------------------------===//

  // Deprecated since v1.1.
  decltype(hsa_executable_create)* hsa_executable_create_fn;
  decltype(hsa_executable_destroy)* hsa_executable_destroy_fn;
  // Deprecated since v1.1.
  decltype(hsa_executable_load_code_object)* hsa_executable_load_code_object_fn;
  decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
  decltype(hsa_executable_get_info)* hsa_executable_get_info_fn;
  decltype(hsa_executable_global_variable_define)*
      hsa_executable_global_variable_define_fn;
  decltype(hsa_executable_agent_global_variable_define)*
      hsa_executable_agent_global_variable_define_fn;
  decltype(hsa_executable_readonly_variable_define)*
      hsa_executable_readonly_variable_define_fn;
  decltype(hsa_executable_validate)* hsa_executable_validate_fn;
  // Deprecated since v1.1.
  decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol_fn;
  decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info_fn;
  // Deprecated since v1.1.
  decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols_fn;

  //===--- Runtime Notifications ------------------------------------------===//

  decltype(hsa_status_string)* hsa_status_string_fn;

  // Start HSA v1.1 additions
  decltype(hsa_extension_get_name)* hsa_extension_get_name_fn;
  decltype(hsa_system_major_extension_supported)* hsa_system_major_extension_supported_fn;
  decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table_fn;
  decltype(hsa_agent_major_extension_supported)* hsa_agent_major_extension_supported_fn;
  decltype(hsa_cache_get_info)* hsa_cache_get_info_fn;
  decltype(hsa_agent_iterate_caches)* hsa_agent_iterate_caches_fn;
  decltype(hsa_signal_silent_store_relaxed)* hsa_signal_silent_store_relaxed_fn;
  decltype(hsa_signal_silent_store_screlease)* hsa_signal_silent_store_screlease_fn;
  decltype(hsa_signal_group_create)* hsa_signal_group_create_fn;
  decltype(hsa_signal_group_destroy)* hsa_signal_group_destroy_fn;
  decltype(hsa_signal_group_wait_any_scacquire)* hsa_signal_group_wait_any_scacquire_fn;
  decltype(hsa_signal_group_wait_any_relaxed)* hsa_signal_group_wait_any_relaxed_fn;

  //===--- Instruction Set Architecture - HSA v1.1 additions --------------===//

  decltype(hsa_agent_iterate_isas)* hsa_agent_iterate_isas_fn;
  decltype(hsa_isa_get_info_alt)* hsa_isa_get_info_alt_fn;
  decltype(hsa_isa_get_exception_policies)* hsa_isa_get_exception_policies_fn;
  decltype(hsa_isa_get_round_method)* hsa_isa_get_round_method_fn;
  decltype(hsa_wavefront_get_info)* hsa_wavefront_get_info_fn;
  decltype(hsa_isa_iterate_wavefronts)* hsa_isa_iterate_wavefronts_fn;

  //===--- Code Objects (deprecated) - HSA v1.1 additions -----------------===//

  // Deprecated since v1.1.
  decltype(hsa_code_object_get_symbol_from_name)*
      hsa_code_object_get_symbol_from_name_fn;

  //===--- Executable - HSA v1.1 additions --------------------------------===//

  decltype(hsa_code_object_reader_create_from_file)*
      hsa_code_object_reader_create_from_file_fn;
  decltype(hsa_code_object_reader_create_from_memory)*
      hsa_code_object_reader_create_from_memory_fn;
  decltype(hsa_code_object_reader_destroy)* hsa_code_object_reader_destroy_fn;
  decltype(hsa_executable_create_alt)* hsa_executable_create_alt_fn;
  decltype(hsa_executable_load_program_code_object)*
      hsa_executable_load_program_code_object_fn;
  decltype(hsa_executable_load_agent_code_object)*
      hsa_executable_load_agent_code_object_fn;
  decltype(hsa_executable_validate_alt)* hsa_executable_validate_alt_fn;
  decltype(hsa_executable_get_symbol_by_name)*
      hsa_executable_get_symbol_by_name_fn;
  decltype(hsa_executable_iterate_agent_symbols)*
      hsa_executable_iterate_agent_symbols_fn;
  decltype(hsa_executable_iterate_program_symbols)*
      hsa_executable_iterate_program_symbols_fn;
};

// Table to export HSA Apis from Core Runtime, Amd Extensions
// Finalizer and Images
struct HsaApiTable {

  // Version of Hsa Api Table
  ApiTableVersion version;

  // Table of function pointers to HSA Core Runtime
	CoreApiTable* core_;

  // Table of function pointers to AMD extensions
	AmdExtTable* amd_ext_;

  // Table of function pointers to HSA Finalizer Extension
	FinalizerExtTable* finalizer_ext_;

  // Table of function pointers to HSA Image Extension
	ImageExtTable* image_ext_;

  // Table of function pointers for tools to use
  ToolsApiTable* tools_;

  // Table of function pointers to AMD PC Sampling Extension
  PcSamplingExtTable* pc_sampling_ext_;
};

// Structure containing instances of different api tables
struct HsaApiTableContainer {
  HsaApiTable root;
	CoreApiTable core;
	AmdExtTable amd_ext;
	FinalizerExtTable finalizer_ext;
	ImageExtTable image_ext;
	ToolsApiTable tools;
  PcSamplingExtTable pc_sampling_ext;

  // Default initialization of a container instance
  HsaApiTableContainer() {
    root.version.major_id = HSA_API_TABLE_MAJOR_VERSION;
    root.version.minor_id = sizeof(HsaApiTable);
    root.version.step_id = HSA_API_TABLE_STEP_VERSION;

    core.version.major_id = HSA_CORE_API_TABLE_MAJOR_VERSION;
    core.version.minor_id = sizeof(CoreApiTable);
    core.version.step_id = HSA_CORE_API_TABLE_STEP_VERSION;
    root.core_ = &core;

    amd_ext.version.major_id = HSA_AMD_EXT_API_TABLE_MAJOR_VERSION;
    amd_ext.version.minor_id = sizeof(AmdExtTable);
    amd_ext.version.step_id = HSA_AMD_EXT_API_TABLE_STEP_VERSION;
    root.amd_ext_ = &amd_ext;

    finalizer_ext.version.major_id = HSA_FINALIZER_API_TABLE_MAJOR_VERSION;
    finalizer_ext.version.minor_id = sizeof(FinalizerExtTable);
    finalizer_ext.version.step_id = HSA_FINALIZER_API_TABLE_STEP_VERSION;
    root.finalizer_ext_ = &finalizer_ext;

    image_ext.version.major_id = HSA_IMAGE_API_TABLE_MAJOR_VERSION;
    image_ext.version.minor_id = sizeof(ImageExtTable);
    image_ext.version.step_id = HSA_IMAGE_API_TABLE_STEP_VERSION;
    root.image_ext_ = &image_ext;

    tools.version.major_id = HSA_TOOLS_API_TABLE_MAJOR_VERSION;
    tools.version.minor_id = sizeof(ToolsApiTable);
    tools.version.step_id = HSA_TOOLS_API_TABLE_STEP_VERSION;
    root.tools_ = &tools;

    pc_sampling_ext.version.major_id = HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION;
    pc_sampling_ext.version.minor_id = sizeof(PcSamplingExtTable);
    pc_sampling_ext.version.step_id = HSA_PC_SAMPLING_API_TABLE_STEP_VERSION;
    root.pc_sampling_ext_ = &pc_sampling_ext;
  }
};

// Api to copy function pointers of a table
static
void inline copyApi(void* src, void* dest, size_t size) {
  assert(size >= sizeof(ApiTableVersion));
  memcpy((char*)src + sizeof(ApiTableVersion),
         (char*)dest + sizeof(ApiTableVersion),
         (size - sizeof(ApiTableVersion)));
}

// Copy Api child tables if valid.
static void inline copyElement(ApiTableVersion* dest, ApiTableVersion* src) {
  if (src->major_id && (dest->major_id == src->major_id)) {
    dest->step_id = src->step_id;
    dest->minor_id = Min(dest->minor_id, src->minor_id);
    copyApi(dest, src, dest->minor_id);
  } else {
    dest->major_id = 0;
    dest->minor_id = 0;
    dest->step_id = 0;
  }
}

// Copy constructor for all Api tables. The function assumes the
// user has initialized an instance of tables container correctly
// for the Major, Minor and Stepping Ids of Root and Child Api tables.
// The function will overwrite the value of Minor Id by taking the
// minimum of source and destination parameters. It will also overwrite
// the stepping Id with value from source parameter.
static void inline copyTables(const HsaApiTable* src, HsaApiTable* dest) {
  // Verify Major Id of source and destination tables match
  if (dest->version.major_id != src->version.major_id) {
    dest->version.major_id = 0;
    dest->version.minor_id = 0;
    dest->version.step_id = 0;
    return;
  }

  // Initialize the stepping id and minor id of root table. For the
  // minor id which encodes struct size, take the minimum of source
  // and destination parameters
  dest->version.step_id = src->version.step_id;
  dest->version.minor_id = Min(dest->version.minor_id, src->version.minor_id);

  // Copy child tables if present
  if ((offsetof(HsaApiTable, core_) < dest->version.minor_id))
    copyElement(&dest->core_->version, &src->core_->version);
  if ((offsetof(HsaApiTable, amd_ext_) < dest->version.minor_id))
    copyElement(&dest->amd_ext_->version, &src->amd_ext_->version);
  if ((offsetof(HsaApiTable, finalizer_ext_) < dest->version.minor_id))
    copyElement(&dest->finalizer_ext_->version, &src->finalizer_ext_->version);
  if ((offsetof(HsaApiTable, image_ext_) < dest->version.minor_id))
    copyElement(&dest->image_ext_->version, &src->image_ext_->version);
  if ((offsetof(HsaApiTable, tools_) < dest->version.minor_id))
    copyElement(&dest->tools_->version, &src->tools_->version);
  if ((offsetof(HsaApiTable, pc_sampling_ext_) < dest->version.minor_id))
    copyElement(&dest->pc_sampling_ext_->version, &src->pc_sampling_ext_->version);
}
#endif


================================================
FILE: runtime/hsa-runtime/inc/hsa_api_trace_version.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H
#define HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H

// CODE IN THIS FILE **MUST** BE C-COMPATIBLE

// Major Ids of the Api tables exported by Hsa Core Runtime
#define HSA_API_TABLE_MAJOR_VERSION                 0x03
#define HSA_CORE_API_TABLE_MAJOR_VERSION            0x02
#define HSA_AMD_EXT_API_TABLE_MAJOR_VERSION         0x02
#define HSA_FINALIZER_API_TABLE_MAJOR_VERSION       0x02
#define HSA_IMAGE_API_TABLE_MAJOR_VERSION           0x02
#define HSA_AQLPROFILE_API_TABLE_MAJOR_VERSION      0x01
#define HSA_TOOLS_API_TABLE_MAJOR_VERSION           0x01
#define HSA_PC_SAMPLING_API_TABLE_MAJOR_VERSION     0x01

// Step Ids of the Api tables exported by Hsa Core Runtime
#define HSA_API_TABLE_STEP_VERSION                  0x01
#define HSA_CORE_API_TABLE_STEP_VERSION             0x00
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION          0x07
#define HSA_FINALIZER_API_TABLE_STEP_VERSION        0x00
#define HSA_IMAGE_API_TABLE_STEP_VERSION            0x01
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
#define HSA_IMAGE_EXT_API_TABLE_STEP_VERSION        HSA_IMAGE_API_TABLE_STEP_VERSION
#define HSA_AQLPROFILE_API_TABLE_STEP_VERSION       0x00
#define HSA_TOOLS_API_TABLE_STEP_VERSION            0x00
#define HSA_PC_SAMPLING_API_TABLE_STEP_VERSION      0x00

#endif  // HSA_RUNTIME_INC_HSA_API_TRACE_VERSION_H


================================================
FILE: runtime/hsa-runtime/inc/hsa_ext_amd.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA AMD extension.

#ifndef HSA_RUNTIME_EXT_AMD_H_
#define HSA_RUNTIME_EXT_AMD_H_

#include "hsa.h"
#include "hsa_ext_image.h"
#include "hsa_ven_amd_pc_sampling.h"

/**
 * - 1.0 - initial version
 * - 1.1 - dmabuf export
 * - 1.2 - hsa_amd_memory_async_copy_on_engine
 * - 1.3 - HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED pool
 * - 1.4 - Virtual Memory API
 * - 1.5 - hsa_amd_agent_info: HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES
 * - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align
 * - 1.7 - hsa_amd_signal_wait_all
 * - 1.8 - hsa_amd_memory_get_preferred_copy_engine
 * - 1.9 - hsa_amd_portable_export_dmabuf_v2
 * - 1.10 - hsa_amd_vmem_address_reserve: HSA_AMD_VMEM_ADDRESS_NO_REGISTER
 * - 1.11 - hsa_amd_agent_info_t: HSA_AMD_AGENT_INFO_CLOCK_COUNTERS
 */
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
#define HSA_AMD_INTERFACE_VERSION_MINOR 11

#ifdef __cplusplus
extern "C" {
#endif

/** \addtogroup aql Architected Queuing Language
 *  @{
 */

/**
 * @brief Macro to set a flag within uint8_t[8] types.
 */
static inline void hsa_flag_set64(uint8_t* value, uint32_t bit) {
  unsigned int index = bit / 8;
  unsigned int subBit = bit % 8;
  (((uint8_t*)value)[index]) |= (1 << subBit);
}

/**
 * @brief Macro to determine whether a flag is set within uint8_t[8] types.
 */
static inline bool hsa_flag_isset64(uint8_t* value, uint32_t bit) {
  unsigned int index = bit / 8;
  unsigned int subBit = bit % 8;
  return ((uint8_t*)value)[index] & (1 << subBit);
}

/**
 * @brief A fixed-size type used to represent ::hsa_signal_condition_t constants.
 */
typedef uint32_t hsa_signal_condition32_t;

/**
 * @brief AMD vendor specific packet type.
 */
typedef enum {
  /**
   * Packet used by agents to delay processing of subsequent packets until a
   * configurable condition is satisfied by an HSA signal.  Only kernel dispatch
   * queues created from AMD GPU Agents support this packet.
   */
  HSA_AMD_PACKET_TYPE_BARRIER_VALUE = 2,
  /**
   * Packet used to send commands to an AIE agent's embedded runtime (ERT). The
   * ERT is responsible for, among other things, handling dispatches. Only
   * queues created on AIE agents support this packet.
   */
  HSA_AMD_PACKET_TYPE_AIE_ERT = 3
} hsa_amd_packet_type_t;

/**
 * @brief A fixed-size type used to represent ::hsa_amd_packet_type_t constants.
 */
typedef uint8_t hsa_amd_packet_type8_t;

/**
 * @brief AMD vendor specific AQL packet header
 */
typedef struct hsa_amd_packet_header_s {
  /**
   * Packet header. Used to configure multiple packet parameters such as the
   * packet type. The parameters are described by ::hsa_packet_header_t.
   */
  uint16_t header;

  /**
   * Format of the vendor specific packet.
   */
  hsa_amd_packet_type8_t AmdFormat;

  /**
   * Reserved. Must be 0.
   */
  uint8_t reserved;
} hsa_amd_vendor_packet_header_t;

/**
 * @brief AMD barrier value packet.  Halts packet processing and waits for
 * (signal_value & ::mask) ::cond ::value to be satisfied, where signal_value
 * is the value of the signal ::signal.
 */
typedef struct hsa_amd_barrier_value_packet_s {
  /**
   * AMD vendor specific packet header.
   */
  hsa_amd_vendor_packet_header_t header;

  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved0;

  /**
   * Dependent signal object. A signal with a handle value of 0 is
   * allowed and is interpreted by the packet processor a satisfied
   * dependency.
   */
  hsa_signal_t signal;

  /**
   * Value to compare against.
   */
  hsa_signal_value_t value;

  /**
   * Bit mask to be combined by bitwise AND with ::signal's value.
   */
  hsa_signal_value_t mask;

  /**
   * Comparison operation.  See ::hsa_signal_condition_t.
   */
  hsa_signal_condition32_t cond;

  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved1;

  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved2;

  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved3;

  /**
   * Signal used to indicate completion of the job. The application can use the
   * special signal handle 0 to indicate that no signal is used.
   */
  hsa_signal_t completion_signal;
} hsa_amd_barrier_value_packet_t;

/**
 * State of an AIE ERT command.
 */
typedef enum {
  /**
   * Set by the host before submitting a command to the scheduler.
   */
  HSA_AMD_AIE_ERT_STATE_NEW = 1,
  /**
   * Internal scheduler state.
   */
  HSA_AMD_AIE_ERT_STATE_QUEUED = 2,
  /**
   * Internal scheduler state.
   */
  HSA_AMD_AIE_ERT_STATE_RUNNING = 3,
  /**
   * Set by the scheduler when a command completes.
   */
  HSA_AMD_AIE_ERT_STATE_COMPLETED = 4,
  /**
   * Set by the scheduler if a command failed.
   */
  HSA_AMD_AIE_ERT_STATE_ERROR = 5,
  /**
   * Set by the scheduler if a command aborted.
   */
  HSA_AMD_AIE_ERT_STATE_ABORT = 6,
  /**
   * Internal scheduler state.
   */
  HSA_AMD_AIE_ERT_STATE_SUBMITTED = 7,
  /**
   * Set by the scheduler on a timeout and reset.
   */
  HSA_AMD_AIE_ERT_STATE_TIMEOUT = 8,
  /**
   * Set by the scheduler on a timeout and fail to reset.
   */
  HSA_AMD_AIE_ERT_STATE_NORESPONSE = 9,
  HSA_AMD_AIE_ERT_STATE_SKERROR = 10,
  HSA_AMD_AIE_ERT_STATE_SKCRASHED = 11,
  HSA_AMD_AIE_ERT_STATE_MAX
} hsa_amd_aie_ert_state;

/**
 * Opcode types for HSA AIE ERT commands.
 */
typedef enum {
  /**
   * Start a workgroup on a compute unit (CU).
   */
  HSA_AMD_AIE_ERT_START_CU = 0,
  /**
   * Currently aliased to HSA_AMD_AIE_ERT_START_CU.
   */
  HSA_AMD_AIE_ERT_START_KERNEL = 0,
  /**
   * Configure command scheduler.
   */
  HSA_AMD_AIE_ERT_CONFIGURE = 2,
  HSA_AMD_AIE_ERT_EXIT = 3,
  HSA_AMD_AIE_ERT_ABORT = 4,
  /**
   * Execute a specified CU after writing.
   */
  HSA_AMD_AIE_ERT_EXEC_WRITE = 5,
  /**
   * Get stats about a CU's execution.
   */
  HSA_AMD_AIE_ERT_CU_STAT = 6,
  /**
   * Start KDMA CU or P2P.
   */
  HSA_AMD_AIE_ERT_START_COPYBO = 7,
  /**
   * Configure a soft kernel.
   */
  HSA_AMD_AIE_ERT_SK_CONFIG = 8,
  /**
   * Start a soft kernel.
   */
  HSA_AMD_AIE_ERT_SK_START = 9,
  /**
   * Unconfigure a soft kernel.
   */
  HSA_AMD_AIE_ERT_SK_UNCONFIG = 10,
  /**
   * Initialize a CU.
   */
  HSA_AMD_AIE_ERT_INIT_CU = 11,
  HSA_AMD_AIE_ERT_START_FA = 12,
  HSA_AMD_AIE_ERT_CLK_CALIB = 13,
  HSA_AMD_AIE_ERT_MB_VALIDATE = 14,
  /**
   * Same as HSA_AMD_AIE_ERT_START_CU but with a key-value pair.
   */
  HSA_AMD_AIE_ERT_START_KEY_VAL = 15,
  HSA_AMD_AIE_ERT_ACCESS_TEST_C = 16,
  HSA_AMD_AIE_ERT_ACCESS_TEST = 17,
  /**
   * Instruction buffer command format.
   */
  HSA_AMD_AIE_ERT_START_DPU = 18,
  /**
   * Command chain.
   */
  HSA_AMD_AIE_ERT_CMD_CHAIN = 19,
  /**
   * Instruction buffer command format on NPU.
   */
  HSA_AMD_AIE_ERT_START_NPU = 20,
  /**
   * Instruction buffer command with pre-emption format on the NPU.
   */
  HSA_AMD_AIE_ERT_START_NPU_PREEMPT = 21
} hsa_amd_aie_ert_cmd_opcode_t;

/**
 * Payload data for AIE ERT start kernel packets (i.e., when the opcode is
 * HSA_AMD_AIE_ERT_START_KERNEL).
 */
typedef struct hsa_amd_aie_ert_start_kernel_data_s {
  /**
   * Address to the PDI.
   */
  void* pdi_addr;
  /**
   * Opcode, instructions and kernel arguments.
   */
  uint32_t data[];
} hsa_amd_aie_ert_start_kernel_data_t;

/**
 * AMD AIE ERT packet. Used for sending a command to an AIE agent.
 */
typedef struct hsa_amd_aie_ert_packet_s {
  /**
   * AMD vendor specific packet header.
   */
  hsa_amd_vendor_packet_header_t header;
  /**
   * Format for packets interpreted by the ERT to understand the command and
   * payload data.
   */
  struct {
    /**
     * Current state of a command.
     */
    uint32_t state : 4;
    /**
     * Flexible field that can be interpreted on a per-command basis.
     */
    uint32_t custom : 8;
    /**
     * Number of DWORDs in the payload data.
     */
    uint32_t count : 11;
    /**
     * Opcode identifying the command.
     */
    uint32_t opcode : 5;
    /**
     * Type of a command (currently 0).
     */
    uint32_t type : 4;
  };
  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved0;
  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved1;
  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved2;
  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved3;
  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved4;
  /**
   * Reserved. Must be 0.
   */
  uint64_t reserved5;
  /**
   * Address of packet data payload. ERT commands contain arbitrarily sized
   * data payloads.
   */
  uint64_t payload_data;
} hsa_amd_aie_ert_packet_t;

/** @} */

/** \defgroup error-codes Error codes
 *  @{
 */

/**
 * @brief Enumeration constants added to ::hsa_status_t.
 *
 * @remark Additions to hsa_status_t
 */
enum {
  /**
   * The memory pool is invalid.
   */
  HSA_STATUS_ERROR_INVALID_MEMORY_POOL = 40,

  /**
   * Agent accessed memory beyond the maximum legal address.
   */
  HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION = 41,

  /**
   * Agent executed an invalid shader instruction.
   */
  HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION = 42,

  /**
   * Agent attempted to access an inaccessible address.
   * See hsa_amd_register_system_event_handler and
   * HSA_AMD_GPU_MEMORY_FAULT_EVENT for more information on illegal accesses.
   */
  HSA_STATUS_ERROR_MEMORY_FAULT = 43,

  /**
   * The CU mask was successfully set but the mask attempted to enable a CU
   * which was disabled for the process.  CUs disabled for the process remain
   * disabled.
   */
  HSA_STATUS_CU_MASK_REDUCED = 44,

  /**
   * Exceeded number of VGPRs available on this agent
   */
  HSA_STATUS_ERROR_OUT_OF_REGISTERS = 45,

  /**
   * Resource is busy or temporarily unavailable
   */
  HSA_STATUS_ERROR_RESOURCE_BUSY = 46,

  /**
   * Request is not supported by this system
   */
  HSA_STATUS_ERROR_NOT_SUPPORTED = 47,
};

/** @} */

/** \addtogroup memory Memory
 *  @{
 */

/**
 * @brief IOMMU version supported
 */
typedef enum {
  /**
   * IOMMU not supported
   */
  HSA_IOMMU_SUPPORT_NONE = 0,
  /* IOMMU V1 support is not relevant to user applications, so not reporting it */
  /**
   * IOMMU V2 supported
   */
  HSA_IOMMU_SUPPORT_V2 = 1,
} hsa_amd_iommu_version_t;

/**
 * @brief Structure containing information on the agent's clock counters.
 */
typedef struct hsa_amd_clock_counters_s {
  uint64_t gpu_clock_counter;
  uint64_t cpu_clock_counter;
  uint64_t system_clock_counter;
  uint64_t system_clock_frequency;
} hsa_amd_clock_counters_t;

/**
 * @brief Agent attributes.
 */
typedef enum hsa_amd_agent_info_s {
  /**
   * Chip identifier. The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_CHIP_ID = 0xA000,
  /**
   * Size of a cacheline in bytes. The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001,
  /**
   * The number of compute unit available in the agent. The type of this
   * attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002,
  /**
   * The maximum clock frequency of the agent in MHz. The type of this
   * attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003,
  /**
   * Internal driver node identifier. The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_DRIVER_NODE_ID = 0xA004,
  /**
   * Max number of watch points on memory address ranges to generate exception
   * events when the watched addresses are accessed.  The type of this
   * attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS = 0xA005,
  /**
   * Agent BDF_ID, named LocationID in thunk. The type of this attribute is
   * uint32_t.
   */
  HSA_AMD_AGENT_INFO_BDFID = 0xA006,
  /**
   * Memory Interface width, the return value type is uint32_t.
   * This attribute is deprecated.
   */
  HSA_AMD_AGENT_INFO_MEMORY_WIDTH = 0xA007,
  /**
   * Max Memory Clock, the return value type is uint32_t.
   */
  HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY = 0xA008,
  /**
   * Board name of Agent - populated from MarketingName of Kfd Node
   * The value is an Ascii string of 64 chars.
   */
  HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009,
  /**
   * Maximum number of waves possible in a Compute Unit.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
  /**
   * Number of SIMD's per compute unit CU
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
  /**
   * Number of Shader Engines (SE) in Gpu
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES = 0xA00C,
  /**
   * Number of Shader Arrays Per Shader Engines in Gpu
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE = 0xA00D,
  /**
   * Address of the HDP flush registers.  Use of these registers does not conform to the HSA memory
   * model and should be treated with caution.
   * The type of this attribute is hsa_amd_hdp_flush_t.
   */
  HSA_AMD_AGENT_INFO_HDP_FLUSH = 0xA00E,
  /**
   * PCIe domain for the agent.  Pairs with HSA_AMD_AGENT_INFO_BDFID
   * to give the full physical location of the Agent.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_DOMAIN = 0xA00F,
  /**
   * Queries for support of cooperative queues.  See ::HSA_QUEUE_TYPE_COOPERATIVE.
   * The type of this attribute is bool.
   */
  HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010,
  /**
   * Queries UUID of an agent. The value is an Ascii string with a maximum
   * of 21 chars including NUL. The string value consists of two parts: header
   * and body. The header identifies device type (GPU, CPU, DSP) while body
   * encodes UUID as a 16 digit hex string
   *
   * Agents that do not support UUID will return the string "GPU-XX" or
   * "CPU-XX" or "DSP-XX" depending upon their device type ::hsa_device_type_t
   */
  HSA_AMD_AGENT_INFO_UUID = 0xA011,
  /**
   * Queries for the ASIC revision of an agent. The value is an integer that
   * increments for each revision. This can be used by user-level software to
   * change how it operates, depending on the hardware version. This allows
   * selective workarounds for hardware errata.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_ASIC_REVISION = 0xA012,
  /**
   * Queries whether or not the host can directly access SVM memory that is
   * physically resident in the agent's local memory.
   * The type of this attribute is bool.
   */
  HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS = 0xA013,
  /**
   * Some processors support more CUs than can reliably be used in a cooperative
   * dispatch.  This queries the count of CUs which are fully enabled for
   * cooperative dispatch.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT = 0xA014,
  /**
   * Queries the amount of memory available in bytes accross all global pools
   * owned by the agent.
   * The type of this attribute is uint64_t.
   */
  HSA_AMD_AGENT_INFO_MEMORY_AVAIL = 0xA015,
  /**
   * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is
   * in the range 1-400MHz.
   * The type of this attribute is uint64_t.
   */
  HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY = 0xA016,
  /**
   * Queries for the ASIC family ID of an agent.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_ASIC_FAMILY_ID = 0xA107,
  /**
   * Queries for the Packet Processor(CP Firmware) ucode version of an agent.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_UCODE_VERSION = 0xA108,
  /**
   * Queries for the SDMA engine ucode of an agent.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION = 0xA109,
  /**
   * Queries the number of SDMA engines.
   * If HSA_AMD_AGENT_INFO_NUM_SDMA_XGMI_ENG query returns non-zero,
   * this query returns the the number of SDMA engines optimized for
   * host to device bidirectional traffic.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_NUM_SDMA_ENG = 0xA10A,
  /**
   * Queries the number of additional SDMA engines optimized for D2D xGMI copies.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_NUM_SDMA_XGMI_ENG = 0xA10B,
  /**
   * Queries for version of IOMMU supported by agent.
   * The type of this attribute is hsa_amd_iommu_version_t.
   */
  HSA_AMD_AGENT_INFO_IOMMU_SUPPORT = 0xA110,
  /**
   * Queries for number of XCCs within the agent.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_NUM_XCC = 0xA111,
  /**
   * Queries for driver unique identifier.
   * The type of this attribute is uint32_t.
   */
  HSA_AMD_AGENT_INFO_DRIVER_UID = 0xA112,
  /**
   * Returns the hsa_agent_t of the nearest CPU agent
   * The type of this attribute is hsa_agent_t.
   */
  HSA_AMD_AGENT_INFO_NEAREST_CPU = 0xA113,
  /**
   * Bit-mask indicating memory properties of this agent. A memory property is set if the flag bit
   * is set at that position. User may use the hsa_flag_isset64 macro to verify whether a flag
   * is set. The type of this attribute is uint8_t[8].
   */
  HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES = 0xA114,
  /**
   * Bit-mask indicating AQL Extensions supported by this agent. An AQL extension is set if the flag
   * bit is set at that position. User may use the hsa_flag_isset64 macro to verify whether a flag
   * is set. The type of this attribute is uint8_t[8].
   */
  HSA_AMD_AGENT_INFO_AQL_EXTENSIONS = 0xA115, /* Not implemented yet */
  /**
   * Maximum allowed value in bytes for scratch limit for this agent. This amount
   * is shared accross all queues created on this agent.
   * The type of this attribute is uint64_t.
   */
  HSA_AMD_AGENT_INFO_SCRATCH_LIMIT_MAX = 0xA116,
  /**
   * Current scratch limit threshold in bytes for this agent. This limit can be
   * modified using the hsa_amd_agent_set_async_scratch_limit call.
   * - AQL dispatches that require scratch-memory above this threshold will trigger a
   *   scratch use-once.
   * - AQL dispatches using less scratch-memory than this threshold, ROCr will
   *   permanently assign the allocated scratch memory to the queue handling the dispatch.
   *   This memory can be reclaimed by calling hsa_amd_agent_set_async_scratch_limit
   *   with a lower threshold by current value.
   *
   * The type of this attribute is uint64_t.
   */
  HSA_AMD_AGENT_INFO_SCRATCH_LIMIT_CURRENT = 0xA117,
  /**
   * Queries the driver for clock counters of the agent.
   * The type of this attribute is hsa_amd_clock_counters_t.
   */
  HSA_AMD_AGENT_INFO_CLOCK_COUNTERS = 0xA118
} hsa_amd_agent_info_t;

/**
 * @brief Agent memory properties attributes
 */
typedef enum hsa_amd_agent_memory_properties_s {
  HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU = (1 << 0),
} hsa_amd_agent_memory_properties_t;

/**
 * @brief SDMA engine IDs unique by single set bit position.
 */
typedef enum hsa_amd_sdma_engine_id {
  HSA_AMD_SDMA_ENGINE_0 = 0x1,
  HSA_AMD_SDMA_ENGINE_1 = 0x2,
  HSA_AMD_SDMA_ENGINE_2 = 0x4,
  HSA_AMD_SDMA_ENGINE_3 = 0x8,
  HSA_AMD_SDMA_ENGINE_4 = 0x10,
  HSA_AMD_SDMA_ENGINE_5 = 0x20,
  HSA_AMD_SDMA_ENGINE_6 = 0x40,
  HSA_AMD_SDMA_ENGINE_7 = 0x80,
  HSA_AMD_SDMA_ENGINE_8 = 0x100,
  HSA_AMD_SDMA_ENGINE_9 = 0x200,
  HSA_AMD_SDMA_ENGINE_10 = 0x400,
  HSA_AMD_SDMA_ENGINE_11 = 0x800,
  HSA_AMD_SDMA_ENGINE_12 = 0x1000,
  HSA_AMD_SDMA_ENGINE_13 = 0x2000,
  HSA_AMD_SDMA_ENGINE_14 = 0x4000,
  HSA_AMD_SDMA_ENGINE_15 = 0x8000
} hsa_amd_sdma_engine_id_t;

typedef struct hsa_amd_hdp_flush_s {
  uint32_t* HDP_MEM_FLUSH_CNTL;
  uint32_t* HDP_REG_FLUSH_CNTL;
} hsa_amd_hdp_flush_t;

/**
 * @brief Region attributes.
 */
#ifdef __cplusplus
typedef enum hsa_amd_region_info_s : int {
#else
typedef enum hsa_amd_region_info_s {
#endif
  /**
   * Determine if host can access the region. The type of this attribute
   * is bool.
   */
  HSA_AMD_REGION_INFO_HOST_ACCESSIBLE = 0xA000,
  /**
   * Base address of the region in flat address space.
   */
  HSA_AMD_REGION_INFO_BASE = 0xA001,
  /**
   * Memory Interface width, the return value type is uint32_t.
   * This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_WIDTH.
   */
  HSA_AMD_REGION_INFO_BUS_WIDTH = 0xA002,
  /**
   * Max Memory Clock, the return value type is uint32_t.
   * This attribute is deprecated. Use HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY.
   */
  HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY = 0xA003,
} hsa_amd_region_info_t;

/**
 * @brief Coherency attributes of fine grain region.
 */
typedef enum hsa_amd_coherency_type_s {
  /**
   * Coherent region.
   */
  HSA_AMD_COHERENCY_TYPE_COHERENT = 0,
  /**
   * Non coherent region.
   */
  HSA_AMD_COHERENCY_TYPE_NONCOHERENT = 1
} hsa_amd_coherency_type_t;


/**
 * @brief dmabuf attributes
 */
#ifdef __cplusplus
typedef enum hsa_amd_dma_buf_mapping_type_s : int {
#else
typedef enum hsa_amd_dma_buf_mapping_type_s {
#endif
  HSA_AMD_DMABUF_MAPPING_TYPE_NONE = 0,
  HSA_AMD_DMABUF_MAPPING_TYPE_PCIE = 1
} hsa_amd_dma_buf_mapping_type_t;
/**
 * @brief Get the coherency type of the fine grain region of an agent.
 *
 * @param[in] agent A valid agent.
 *
 * @param[out] type Pointer to a memory location where the HSA runtime will
 * store the coherency type of the fine grain region.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is NULL.
 */
hsa_status_t HSA_API hsa_amd_coherency_get_type(hsa_agent_t agent,
                                                hsa_amd_coherency_type_t* type);

/**
 * @brief Set the coherency type of the fine grain region of an agent.
 * Deprecated.  This is supported on KV platforms.  For backward compatibility
 * other platforms will spuriously succeed.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] type The coherency type to be set.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p type is invalid.
 */
hsa_status_t HSA_API hsa_amd_coherency_set_type(hsa_agent_t agent,
                                                hsa_amd_coherency_type_t type);

/** @} */

/** \defgroup profile Profiling
 *  @{
 */

/**
 * @brief Structure containing profiling dispatch time information.
 *
 * Times are reported as ticks in the domain of the HSA system clock.
 * The HSA system clock tick and frequency is obtained via hsa_system_get_info.
 */
typedef struct hsa_amd_profiling_dispatch_time_s {
  /**
   * Dispatch packet processing start time.
   */
  uint64_t start;
  /**
   * Dispatch packet completion time.
   */
  uint64_t end;
} hsa_amd_profiling_dispatch_time_t;

/**
 * @brief Structure containing profiling async copy time information.
 *
 * Times are reported as ticks in the domain of the HSA system clock.
 * The HSA system clock tick and frequency is obtained via hsa_system_get_info.
 */
typedef struct hsa_amd_profiling_async_copy_time_s {
  /**
   * Async copy processing start time.
   */
  uint64_t start;
  /**
   * Async copy completion time.
   */
  uint64_t end;
} hsa_amd_profiling_async_copy_time_t;

/**
 * @brief Enable or disable profiling capability of a queue.
 *
 * @param[in] queue A valid queue.
 *
 * @param[in] enable 1 to enable profiling. 0 to disable profiling.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
 */
hsa_status_t HSA_API
    hsa_amd_profiling_set_profiler_enabled(hsa_queue_t* queue, int enable);

/**
 * @brief Enable or disable asynchronous memory copy profiling.
 *
 * @details The runtime will provide the copy processing start timestamp and
 * completion timestamp of each call to hsa_amd_memory_async_copy if the
 * async copy profiling is enabled prior to the call to
 * hsa_amd_memory_async_copy. The completion signal object is used to
 * hold the last async copy start and end timestamp. The client can retrieve
 * these timestamps via call to hsa_amd_profiling_get_async_copy_time.
 *
 * @param[in] enable True to enable profiling. False to disable profiling.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed on allocating resources
 * needed to profile the asynchronous copy.
 */
hsa_status_t HSA_API
    hsa_amd_profiling_async_copy_enable(bool enable);

/**
 * @brief Retrieve packet processing time stamps.
 *
 * @param[in] agent The agent with which the signal was last used.  For
 * instance, if the profiled dispatch packet is dispatched onto queue Q,
 * which was created on agent A, then this parameter must be A.
 *
 * @param[in] signal A signal used as the completion signal of the dispatch
 * packet to retrieve time stamps from.  This dispatch packet must have been
 * issued to a queue with profiling enabled and have already completed.  Also
 * the signal must not have yet been used in any other packet following the
 * completion of the profiled dispatch packet.
 *
 * @param[out] time Packet processing timestamps in the HSA system clock
 * domain.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL The signal is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p time is NULL.
 */
hsa_status_t HSA_API hsa_amd_profiling_get_dispatch_time(
    hsa_agent_t agent, hsa_signal_t signal,
    hsa_amd_profiling_dispatch_time_t* time);

/**
 * @brief Retrieve asynchronous copy timestamps.
 *
 * @details Async copy profiling is enabled via call to
 * hsa_amd_profiling_async_copy_enable.
 *
 * @param[in] signal A signal used as the completion signal of the call to
 * hsa_amd_memory_async_copy.
 *
 * @param[out] time Async copy processing timestamps in the HSA system clock
 * domain.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL The signal is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p time is NULL.
 */
hsa_status_t HSA_API hsa_amd_profiling_get_async_copy_time(
    hsa_signal_t signal, hsa_amd_profiling_async_copy_time_t* time);

/**
 * @brief Computes the frequency ratio and offset between the agent clock and
 * HSA system clock and converts the agent's tick to HSA system domain tick.
 *
 * @param[in] agent The agent used to retrieve the agent_tick. It is user's
 * responsibility to make sure the tick number is from this agent, otherwise,
 * the behavior is undefined.
 *
 * @param[in] agent_tick The tick count retrieved from the specified @p agent.
 *
 * @param[out] system_tick The translated HSA system domain clock counter tick.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p system_tick is NULL;
 */
hsa_status_t HSA_API
    hsa_amd_profiling_convert_tick_to_system_domain(hsa_agent_t agent,
                                                    uint64_t agent_tick,
                                                    uint64_t* system_tick);

/** @} */

/** \defgroup status Runtime notifications
 *  @{
 */

/**
 * @brief Signal attribute flags.
 */
typedef enum {
  /**
   * Signal will only be consumed by AMD GPUs.  Limits signal consumption to
   * AMD GPU agents only.  Ignored if @p num_consumers is not zero (all agents).
   */
  HSA_AMD_SIGNAL_AMD_GPU_ONLY = 1,
  /**
   * Signal may be used for interprocess communication.
   * IPC signals can be read, written, and waited on from any process.
   * Profiling using an IPC enabled signal is only supported in a single process
   * at a time.  Producing profiling data in one process and consuming it in
   * another process is undefined.
   */
  HSA_AMD_SIGNAL_IPC = 2,
} hsa_amd_signal_attribute_t;

/**
 * @brief Create a signal with specific attributes.
 *
 * @param[in] initial_value Initial value of the signal.
 *
 * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
 * any agent might wait on the signal.
 *
 * @param[in] consumers List of agents that might consume (wait on) the
 * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
 * HSA runtime might use the list to optimize the handling of the signal
 * object. If an agent not listed in @p consumers waits on the returned
 * signal, the behavior is undefined. The memory associated with @p consumers
 * can be reused or freed after the function returns.
 *
 * @param[in] attributes Requested signal attributes.  Multiple signal attributes
 * may be requested by combining them with bitwise OR.  Requesting no attributes
 * (@p attributes == 0) results in the same signal as would have been obtained
 * via hsa_signal_create.
 *
 * @param[out] signal Pointer to a memory location where the HSA runtime will
 * store the newly created signal handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
 * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
 * contains duplicates.
 */
hsa_status_t HSA_API hsa_amd_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
                                           const hsa_agent_t* consumers, uint64_t attributes,
                                           hsa_signal_t* signal);

/**
 * @brief Returns a pointer to the value of a signal.
 *
 * Use of this API does not modify the lifetime of ::signal and any
 * hsa_signal_value_t retrieved by this API has lifetime equal to that of
 * ::signal.
 *
 * This API is intended for partial interoperability with non-HSA compatible
 * devices and should not be used where HSA interfaces are available.
 *
 * Use of the signal value must comply with use restritions of ::signal.
 * Use may result in data races if the operations performed are not platform
 * atomic.  Use with HSA_AMD_SIGNAL_AMD_GPU_ONLY or HSA_AMD_SIGNAL_IPC
 * attributed signals is required.
 *
 * @param[in] Signal handle to extract the signal value pointer from.
 *
 * @param[out] Location where the extracted signal value pointer will be placed.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL signal is not a valid hsa_signal_t
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT value_ptr is NULL.
 */
hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t signal,
                                          volatile hsa_signal_value_t** value_ptr);

/**
 * @brief Asyncronous signal handler function type.
 *
 * @details Type definition of callback function to be used with
 * hsa_amd_signal_async_handler. This callback is invoked if the associated
 * signal and condition are met. The callback receives the value of the signal
 * which satisfied the associated wait condition and a user provided value. If
 * the callback returns true then the callback will be called again if the
 * associated signal and condition are satisfied again. If the callback returns
 * false then it will not be called again.
 *
 * @param[in] value Contains the value of the signal observed by
 * hsa_amd_signal_async_handler which caused the signal handler to be invoked.
 *
 * @param[in] arg Contains the user provided value given when the signal handler
 * was registered with hsa_amd_signal_async_handler
 *
 * @retval true resumes monitoring the signal with this handler (as if calling
 * hsa_amd_signal_async_handler again with identical parameters)
 *
 * @retval false stops monitoring the signal with this handler (handler will
 * not be called again for this signal)
 *
 */
typedef bool (*hsa_amd_signal_handler)(hsa_signal_value_t value, void* arg);

/**
 * @brief Register asynchronous signal handler function.
 *
 * @details Allows registering a callback function and user provided value with
 * a signal and wait condition. The callback will be invoked if the associated
 * signal and wait condition are satisfied. Callbacks will be invoked serially
 * but in an arbitrary order so callbacks should be independent of each other.
 * After being invoked a callback may continue to wait for its associated signal
 * and condition and, possibly, be invoked again. Or the callback may stop
 * waiting. If the callback returns true then it will continue waiting and may
 * be called again. If false then the callback will not wait again and will not
 * be called again for the associated signal and condition. It is possible to
 * register the same callback multiple times with the same or different signals
 * and/or conditions. Each registration of the callback will be treated entirely
 * independently.
 *
 * @param[in] signal hsa signal to be asynchronously monitored
 *
 * @param[in] cond condition value to monitor for
 *
 * @param[in] value signal value used in condition expression
 *
 * @param[in] handler asynchronous signal handler invoked when signal's
 * condition is met
 *
 * @param[in] arg user provided value which is provided to handler when handler
 * is invoked
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL signal is not a valid hsa_signal_t
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL)
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of
 * resources or blocking signals are not supported by the HSA driver component.
 *
 */
hsa_status_t HSA_API
    hsa_amd_signal_async_handler(hsa_signal_t signal,
                                 hsa_signal_condition_t cond,
                                 hsa_signal_value_t value,
                                 hsa_amd_signal_handler handler, void* arg);

/**
 * @brief Wait for all signal-condition pairs to be satisfied.
 *
 * @details Allows waiting for all of several signal and condition pairs to be
 * satisfied. The function returns 0 if all signals met their conditions and -1
 * on a timeout. The value of each signal's satisfying value is returned in
 * satisfying_value unless satisfying_value is nullptr. NULL and invalid signals
 * are considered to have value 0 and their conditions already satisfied. This
 * function provides only relaxed memory semantics.
 */
uint32_t HSA_API hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* signals,
                                         hsa_signal_condition_t* conds, hsa_signal_value_t* values,
                                         uint64_t timeout_hint, hsa_wait_state_t wait_hint,
                                         hsa_signal_value_t* satisfying_values);

/**
 * @brief Wait for any signal-condition pair to be satisfied.
 *
 * @details Allows waiting for any of several signal and conditions pairs to be
 * satisfied. The function returns the index into the list of signals of the
 * first satisfying signal-condition pair. The function returns
 * std::numeric_limits<uint32_t>::max() if no valid signal is provided. The value
 * of the satisfying signal's value is returned in satisfying_value, unless
 * satisfying_value is nullptr or there's no valid signal in the signal-condition
 * pairs. NULL and invalid signals are ignored. This function provides only
 * relaxed memory semantics.
 */
uint32_t HSA_API
    hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals,
                            hsa_signal_condition_t* conds,
                            hsa_signal_value_t* values, uint64_t timeout_hint,
                            hsa_wait_state_t wait_hint,
                            hsa_signal_value_t* satisfying_value);

/** @} */

/**
 * @brief Call a function asynchronously
 *
 * @details Provides access to the runtime's asynchronous event handling thread
 * for general asynchronous functions.  Functions queued this way are executed
 * in the same manner as if they were a signal handler who's signal is
 * satisfied.
 *
 * @param[in] callback asynchronous function to be invoked
 *
 * @param[in] arg user provided value which is provided to handler when handler
 * is invoked
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT handler is invalid (NULL)
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is out of
 * resources or blocking signals are not supported by the HSA driver component.
 *
 */
hsa_status_t HSA_API
    hsa_amd_async_function(void (*callback)(void* arg), void* arg);

/** \addtogroup ext-images Images and samplers
 *  @{
 */

/**
 * @brief Encodes an opaque vendor specific image format.  The length of data
 * depends on the underlying format.  This structure must not be copied as its
 * true length can not be determined.
 */
typedef struct hsa_amd_image_descriptor_s {
  /*
  Version number of the descriptor
  */
  uint32_t version;

  /*
  Vendor and device PCI IDs for the format as VENDOR_ID<<16|DEVICE_ID.
  */
  uint32_t deviceID;

  /*
  Start of vendor specific data.
  */
  uint32_t data[1];
} hsa_amd_image_descriptor_t;

/**
 * @brief Creates an image from an opaque vendor specific image format.
 * Does not modify data at image_data.  Intended initially for
 * accessing interop images.
 *
 * @param agent[in] Agent on which to create the image
 *
 * @param[in] image_descriptor[in] Vendor specific image format
 *
 * @param[in] image_data Pointer to image backing store
 *
 * @param[in] access_permission Access permissions for the image object
 *
 * @param[out] image Created image object.
 *
 * @retval HSA_STATUS_SUCCESS Image created successfully
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT Bad or mismatched descriptor,
 * null image_data, or mismatched access_permission.
 */
hsa_status_t HSA_API hsa_amd_image_create(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    const hsa_amd_image_descriptor_t *image_layout,
    const void *image_data,
    hsa_access_permission_t access_permission,
    hsa_ext_image_t *image
);

/**
 * @brief Query image limits.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] attribute HSA image info attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p value is NULL or @p attribute <
 * HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS or @p attribute >
 * HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS.
 *
 */
hsa_status_t HSA_API hsa_amd_image_get_info_max_dim(hsa_agent_t agent,
                                                    hsa_agent_info_t attribute,
                                                    void* value);

/** @} */

/** \addtogroup queue Queues
 *  @{
 */

/**
 * @brief Set a queue's CU affinity mask.
 *
 * @details Enables the queue to run on only selected CUs.  The given mask is
 * combined by bitwise AND with any device wide mask in HSA_CU_MASK before
 * being applied.
 * If num_cu_mask_count is 0 then the request is interpreted as a request to
 * enable all CUs and no cu_mask array need be given.
 *
 * @param[in] queue A pointer to HSA queue.
 *
 * @param[in] num_cu_mask_count Size of CUMask bit array passed in, in bits.
 *
 * @param[in] cu_mask Bit-vector representing the CU mask.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_CU_MASK_REDUCED The function was successfully executed
 * but the given mask attempted to enable a CU which was disabled by
 * HSA_CU_MASK.  CUs disabled by HSA_CU_MASK remain disabled.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p queue is NULL or invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_cu_mask_count is not
 * a multiple of 32 or @p num_cu_mask_count is not 0 and cu_mask is NULL.
 * Devices with work group processors must even-index contiguous pairwise
 * CU enable e.g. 0x33(b'110011) is valid while 0x5(0x101) and 0x6(b'0110)
 * are invalid.
 *
 */
hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
                                               uint32_t num_cu_mask_count,
                                               const uint32_t* cu_mask);

/**
 * @brief Retrieve a queue's CU affinity mask.
 *
 * @details Returns the first num_cu_mask_count bits of a queue's CU mask.
 * Ensure that num_cu_mask_count is at least as large as
 * HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT to retrieve the entire mask.
 *
 * @param[in] queue A pointer to HSA queue.
 *
 * @param[in] num_cu_mask_count Size of CUMask bit array passed in, in bits.
 *
 * @param[out] cu_mask Bit-vector representing the CU mask.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE @p queue is NULL or invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_cu_mask_count is 0, not
 * a multiple of 32 or @p cu_mask is NULL.
 *
 */
hsa_status_t HSA_API hsa_amd_queue_cu_get_mask(const hsa_queue_t* queue, uint32_t num_cu_mask_count,
                                               uint32_t* cu_mask);

/** @} */

/** \addtogroup memory Memory
 *  @{
 */

/**
 * @brief Memory segments associated with a memory pool.
 */
typedef enum {
  /**
   * Global segment. Used to hold data that is shared by all agents.
   */
  HSA_AMD_SEGMENT_GLOBAL = 0,
  /**
   * Read-only segment. Used to hold data that remains constant during the
   * execution of a kernel.
   */
  HSA_AMD_SEGMENT_READONLY = 1,
  /**
   * Private segment. Used to hold data that is local to a single work-item.
   */
  HSA_AMD_SEGMENT_PRIVATE = 2,
  /**
   * Group segment. Used to hold data that is shared by the work-items of a
   * work-group.
   */
  HSA_AMD_SEGMENT_GROUP = 3,
} hsa_amd_segment_t;

/**
 * @brief A memory pool encapsulates physical storage on an agent
 * along with a memory access model.
 *
 * @details A memory pool encapsulates a physical partition of an agent's
 * memory system along with a memory access model.  Division of a single
 * memory system into separate pools allows querying each partition's access
 * path properties (see ::hsa_amd_agent_memory_pool_get_info). Allocations
 * from a pool are preferentially bound to that pool's physical partition.
 * Binding to the pool's preferential physical partition may not be
 * possible or persistent depending on the system's memory policy
 * and/or state which is beyond the scope of HSA APIs.
 *
 * For example, a multi-node NUMA memory system may be represented by multiple
 * pool's with each pool providing size and access path information for the
 * partition it represents.  Allocations from a pool are preferentially bound
 * to the pool's partition (which in this example is a NUMA node) while
 * following its memory access model. The actual placement may vary or migrate
 * due to the system's NUMA policy and state, which is beyond the scope of
 * HSA APIs.
 */
typedef struct hsa_amd_memory_pool_s {
  /**
   * Opaque handle.
   */
  uint64_t handle;
} hsa_amd_memory_pool_t;

typedef enum hsa_amd_memory_pool_global_flag_s {
  /**
   * The application can use allocations in the memory pool to store kernel
   * arguments, and provide the values for the kernarg segment of
   * a kernel dispatch.
   */
  HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT = 1,
  /**
   * Updates to memory in this pool conform to HSA memory consistency model.
   * If this flag is set, then ::HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED
   * must not be set.
   */
  HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED = 2,
  /**
   * Writes to memory in this pool can be performed by a single agent at a time.
   */
  HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED = 4,

  /** Updates to memory in this memory pool have extended scope, acting as
   * system-scope atomics for variables in memory regions of this type.
   * Note: On non-compliant systems, device-specific actions may be required
   * for system-scope coherence. */
  HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED = 8,

} hsa_amd_memory_pool_global_flag_t;

typedef enum hsa_amd_memory_pool_location_s {
    /**
     * This memory pool resides on the host (CPU)
     */
    HSA_AMD_MEMORY_POOL_LOCATION_CPU = 0,
    /**
     * This memory pool resides on a GPU
     */
    HSA_AMD_MEMORY_POOL_LOCATION_GPU = 1
} hsa_amd_memory_pool_location_t;

/**
 * @brief Memory pool features.
 */
typedef enum {
  /**
  * Segment where the memory pool resides. The type of this attribute is
  * ::hsa_amd_segment_t.
  */
  HSA_AMD_MEMORY_POOL_INFO_SEGMENT = 0,
  /**
  * Flag mask. The value of this attribute is undefined if the value of
  * ::HSA_AMD_MEMORY_POOL_INFO_SEGMENT is not ::HSA_AMD_SEGMENT_GLOBAL. The type
  * of
  * this attribute is uint32_t, a bit-field of
  * ::hsa_amd_memory_pool_global_flag_t
  * values.
  */
  HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS = 1,
  /**
  * Size of this pool, in bytes. The type of this attribute is size_t.
  */
  HSA_AMD_MEMORY_POOL_INFO_SIZE = 2,
  /**
  * Indicates whether memory in this pool can be allocated using
  * ::hsa_amd_memory_pool_allocate. The type of this attribute is bool.
  *
  * The value of this flag is always false for memory pools in the group and
  * private segments.
  */
  HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED = 5,
  /**
   * Allocation granularity of buffers allocated by
   * ::hsa_amd_memory_pool_allocate
   * in this memory pool. The size of a buffer allocated in this pool is a
   * multiple of the value of this attribute. While this is the minimum size of
   * allocation allowed, it is recommened to use
   * HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE to obtain the recommended
   * allocation granularity size for this pool.
   * The value of this attribute is only defined if
   * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for
   * this pool. The type of this attribute is size_t.
   */
  HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
  /**
   * Alignment of buffers allocated by ::hsa_amd_memory_pool_allocate in this
   * pool. The value of this attribute is only defined if
   * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for this pool, and
   * must be a power of 2. The type of this attribute is size_t.
   */
  HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
  /**
   * This memory_pool can be made directly accessible by all the agents in the
   * system (::hsa_amd_agent_memory_pool_get_info does not return
   * ::HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED for any agent). The type of this
   * attribute is bool.
   */
  HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
  /**
   * Maximum aggregate allocation size in bytes. The type of this attribute
   * is size_t.
   */
  HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
  /**
   * Location of this memory pool. The type of this attribute
   * is hsa_amd_memory_pool_location_t.
   */
  HSA_AMD_MEMORY_POOL_INFO_LOCATION = 17,
  /**
   * Internal block size for allocations. This would also be the recommended
   * granularity size for allocations as this prevents internal fragmentation.
   * The value of this attribute is only defined if
   * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is true for this pool.
   * The size of this attribute is size_t.
   */
  HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE = 18,
} hsa_amd_memory_pool_info_t;

/**
 * @brief Memory pool flag used to specify allocation directives
 *
 */
typedef enum hsa_amd_memory_pool_flag_s {
  /**
   * Allocates memory that conforms to standard HSA memory consistency model
   */
  HSA_AMD_MEMORY_POOL_STANDARD_FLAG = 0,
  /**
   * Allocates fine grain memory type where memory ordering is per point to point
   * connection. Atomic memory operations on these memory buffers are not
   * guaranteed to be visible at system scope.
   */
  HSA_AMD_MEMORY_POOL_PCIE_FLAG = (1 << 0),
  /**
   *  Allocates physically contiguous memory
   */
  HSA_AMD_MEMORY_POOL_CONTIGUOUS_FLAG = (1 << 1),
  /**
   *  Allocates executable memory
   */
  HSA_AMD_MEMORY_POOL_EXECUTABLE_FLAG = (1 << 2),

} hsa_amd_memory_pool_flag_t;

/**
 * @brief Get the current value of an attribute of a memory pool.
 *
 * @param[in] memory_pool A valid memory pool.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to a application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 */
hsa_status_t HSA_API
    hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
                                 hsa_amd_memory_pool_info_t attribute,
                                 void* value);

/**
 * @brief Iterate over the memory pools associated with a given agent, and
 * invoke an application-defined callback on every iteration.
 *
 * @details An agent can directly access buffers located in some memory pool, or
 * be enabled to access them by the application (see ::hsa_amd_agents_allow_access),
 * yet that memory pool may not be returned by this function for that given
 * agent.
 *
 * A memory pool of fine-grained type must be associated only with the host.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] callback Callback to be invoked on the same thread that called
 * ::hsa_amd_agent_iterate_memory_pools, serially, once per memory pool that is
 * associated with the agent.  The HSA runtime passes two arguments to the
 * callback: the memory pool, and the application data.  If @p callback
 * returns a status other than ::HSA_STATUS_SUCCESS for a particular iteration,
 * the traversal stops and ::hsa_amd_agent_iterate_memory_pools returns that status
 * value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_amd_agent_iterate_memory_pools(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_amd_memory_pool_t memory_pool, void* data),
    void* data);

/**
 * @brief Allocate a block of memory (or buffer) in the specified pool.
 *
 * @param[in] memory_pool Memory pool where to allocate memory from. The memory
 * pool must have the ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED flag set.
 *
 * @param[in] size Allocation size, in bytes. Must not be zero. This value is
 * rounded up to the nearest multiple of
 * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE in @p memory_pool.
 *
 * @param[in] flags A bit-field that is used to specify allocation
 * directives.
 *
 * @param[out] ptr Pointer to the location where to store the base virtual
 * address of
 * the allocated block. The returned base address is aligned to the value of
 * ::HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT in @p memory_pool. If the
 * allocation fails, the returned value is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES No memory is available.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The memory pool is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
 * allocate memory in @p memory_pool, or @p size is greater than
 * the value of HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE in @p memory_pool.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0,
 * or flags is not 0.
 *
 */
hsa_status_t HSA_API
    hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool, size_t size,
                                 uint32_t flags, void** ptr);

/**
 * @brief Deallocate a block of memory previously allocated using
 * ::hsa_amd_memory_pool_allocate.
 *
 * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
 * previously returned by ::hsa_amd_memory_pool_allocate, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 */
hsa_status_t HSA_API hsa_amd_memory_pool_free(void* ptr);

/**
 * @brief Asynchronously copy a block of memory from the location pointed to by
 * @p src on the @p src_agent to the memory block pointed to by @p dst on the @p
 * dst_agent.
 * Because the DMA engines used may not be in the same coherency domain, the caller must ensure
 * that buffers are system-level coherent. In general this requires the sending device to have
 * released the buffer to system scope prior to executing the copy API and the receiving device
 * must execute a system scope acquire fence prior to use of the destination buffer.
 *
 * @param[out] dst Buffer where the content is to be copied.
 *
 * @param[in] dst_agent Agent associated with the @p dst. The agent must be able to directly
 * access both the source and destination buffers in their current locations.
 * May be zero in which case the runtime will attempt to discover the destination agent.
 * Discovery may have variable and/or high latency.
 *
 * @param[in] src A valid pointer to the source of data to be copied. The source
 * buffer must not overlap with the destination buffer, otherwise the copy will succeed
 * but contents of @p dst is undefined.
 *
 * @param[in] src_agent Agent associated with the @p src. The agent must be able to directly
 * access both the source and destination buffers in their current locations.
 * May be zero in which case the runtime will attempt to discover the destination agent.
 * Discovery may have variable and/or high latency.
 *
 * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
 * performed and the function returns success. Copying a number of bytes larger
 * than the size of the buffers pointed by @p dst or @p src results in undefined
 * behavior.
 *
 * @param[in] num_dep_signals Number of dependent signals. Can be 0.
 *
 * @param[in] dep_signals List of signals that must be waited on before the copy
 * operation starts. The copy will start after every signal has been observed with
 * the value 0. The dependent signal should not include completion signal from
 * hsa_amd_memory_async_copy operation to be issued in future as that can result
 * in a deadlock. If @p num_dep_signals is 0, this argument is ignored.
 *
 * @param[in] completion_signal Signal used to indicate completion of the copy
 * operation. When the copy operation is finished, the value of the signal is
 * decremented. The runtime indicates that an error has occurred during the copy
 * operation by setting the value of the completion signal to a negative
 * number. The signal handle must not be 0.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. The
 * application is responsible for checking for asynchronous error conditions
 * (see the description of @p completion_signal).
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT An agent is invalid or no discovered agent has access.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p completion_signal is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
 * pointers are NULL, or the completion signal is 0.
 */
hsa_status_t HSA_API
    hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src,
                              hsa_agent_t src_agent, size_t size,
                              uint32_t num_dep_signals,
                              const hsa_signal_t* dep_signals,
                              hsa_signal_t completion_signal);

/**
 * @brief Asynchronously copy a block of memory from the location pointed to by
 * @p src on the @p src_agent to the memory block pointed to by @p dst on the @p
 * dst_agent on engine_id.
 *
 * WARNING: Concurrent use of this call with hsa_amd_memory_async_copy can result
 * in resource conflicts as HSA runtime will auto assign engines with the latter
 * call.  Approach using both calls concurrently with caution.
 *
 * All param definitions are identical to hsa_amd_memory_async_copy with the
 * exception of engine_id and force_copy_on_sdma.
 *
 * @param[in] - engine_id Target engine defined by hsa_amd_sdma_engine_id_t.
 * Client should use hsa_amd_memory_copy_engine_status first to get the ID
 * availability.
 *
 * @param[in] - force_copy_on_sdma By default, blit kernel copies are used when
 * dst_agent == src_agent.  Setting this to true will force the copy over SDMA1.
 *
 * All return definitions are identical to hsa_amd_memory_async_copy with the
 * following ammendments:
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
 * pointers are NULL, or the completion signal is 0 or engine_id is improperly
 * bounded.
 */
hsa_status_t HSA_API
    hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agent, const void* src,
                              hsa_agent_t src_agent, size_t size,
                              uint32_t num_dep_signals,
                              const hsa_signal_t* dep_signals,
                              hsa_signal_t completion_signal,
                              hsa_amd_sdma_engine_id_t engine_id,
                              bool force_copy_on_sdma);
/**
 * @brief Reports the availability of SDMA copy engines.
 *
 * @param[in] dst_agent Destination agent of copy status direction.
 *
 * @param[in] src_agent Source agent of copy status direction.
 *
 * @param[out] engine_ids_mask returns available SDMA engine IDs that can be masked
 * with hsa_amd_sdma_engine_id_t.
 *
 * @retval ::HSA_STATUS_SUCCESS Agent has available SDMA engines.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Agent does not have available SDMA engines.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
 * dst_agent == src_agent is generally used for shader copies.
 */
hsa_status_t HSA_API
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
                                      uint32_t *engine_ids_mask);
 /**
 * @brief Returns the preferred SDMA engine mask.
 *
 * @param[in] dst_agent Destination agent of copy status direction.
 *
 * @param[in] src_agent Source agent of copy status direction.
 *
 * @param[out] recommended_ids_mask returns available SDMA engine IDs for max bandwidth
 * that can be masked with hsa_amd_sdma_engine_id_t. Can be 0 if there is no preference
 *
 * @retval ::HSA_STATUS_SUCCESS For mask returned
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
 * dst_agent == src_agent is generally used for shader copies.
 */
hsa_status_t HSA_API
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
                                         uint32_t* recommended_ids_mask);

/*
[Provisional API]
Pitched memory descriptor.
All elements must be 4 byte aligned.  Pitch and slice are in bytes.
*/
typedef struct hsa_pitched_ptr_s {
  void* base;
  size_t pitch;
  size_t slice;
} hsa_pitched_ptr_t;

/*
[Provisional API]
Copy direction flag.
*/
typedef enum {
  hsaHostToHost = 0,
  hsaHostToDevice = 1,
  hsaDeviceToHost = 2,
  hsaDeviceToDevice = 3
} hsa_amd_copy_direction_t;

/*
[Provisional API]
SDMA 3D memory copy API.  The same requirements must be met by src and dst as in
hsa_amd_memory_async_copy.
Both src and dst must be directly accessible to the copy_agent during the copy, src and dst rects
must not overlap.
CPU agents are not supported.  API requires SDMA and will return an error if SDMA is not available.
Offsets and range carry x in bytes, y and z in rows and layers.
*/
hsa_status_t HSA_API hsa_amd_memory_async_copy_rect(
    const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
    const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
    hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
    hsa_signal_t completion_signal);

/**
 * @brief Type of accesses to a memory pool from a given agent.
 */
typedef enum {
  /**
  * The agent cannot directly access any buffer in the memory pool.
  */
  HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED = 0,
  /**
  * The agent can directly access a buffer located in the pool; the application
  * does not need to invoke ::hsa_amd_agents_allow_access.
  */
  HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT = 1,
  /**
  * The agent can directly access a buffer located in the pool, but only if the
  * application has previously requested access to that buffer using
  * ::hsa_amd_agents_allow_access.
  */
  HSA_AMD_MEMORY_POOL_ACCESS_DISALLOWED_BY_DEFAULT = 2
} hsa_amd_memory_pool_access_t;

/**
 * @brief Properties of the relationship between an agent a memory pool.
 */
typedef enum {
  /**
  * Hyper-transport bus type.
  */
  HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,

  /**
  * QPI bus type.
  */
  HSA_AMD_LINK_INFO_TYPE_QPI = 1,

  /**
  * PCIe bus type.
  */
  HSA_AMD_LINK_INFO_TYPE_PCIE = 2,

  /**
  * Infiniband bus type.
  */
  HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,

  /**
  * xGMI link type.
  */
  HSA_AMD_LINK_INFO_TYPE_XGMI = 4

} hsa_amd_link_info_type_t;

/**
 * @brief Link properties when accessing the memory pool from the specified
 * agent.
 */
typedef struct hsa_amd_memory_pool_link_info_s {
  /**
  * Minimum transfer latency (rounded to ns).
  */
  uint32_t min_latency;

  /**
  * Maximum transfer latency (rounded to ns).
  */
  uint32_t max_latency;

  /**
  * Minimum link interface bandwidth in MB/s.
  */
  uint32_t min_bandwidth;

  /**
  * Maximum link interface bandwidth in MB/s.
  */
  uint32_t max_bandwidth;

  /**
  * Support for 32-bit atomic transactions.
  */
  bool atomic_support_32bit;

  /**
  * Support for 64-bit atomic transactions.
  */
  bool atomic_support_64bit;

  /**
  * Support for cache coherent transactions.
  */
  bool coherent_support;

  /**
  * The type of bus/link.
  */
  hsa_amd_link_info_type_t link_type;

  /**
   * NUMA distance of memory pool relative to querying agent
   */
  uint32_t numa_distance;
} hsa_amd_memory_pool_link_info_t;

/**
 * @brief Properties of the relationship between an agent a memory pool.
 */
typedef enum {
  /**
  * Access to buffers located in the memory pool. The type of this attribute
  * is ::hsa_amd_memory_pool_access_t.
  *
  * An agent can always directly access buffers currently located in a memory
  * pool that is associated (the memory_pool is one of the values returned by
  * ::hsa_amd_agent_iterate_memory_pools on the agent) with that agent. If the
  * buffer is currently located in a memory pool that is not associated with
  * the agent, and the value returned by this function for the given
  * combination of agent and memory pool is not
  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED, the application still needs to invoke
  * ::hsa_amd_agents_allow_access in order to gain direct access to the buffer.
  *
  * If the given agent can directly access buffers the pool, the result is not
  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is associated with
  * the agent, or it is of fined-grained type, the result must not be
  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. If the memory pool is not associated
  * with the agent, and does not reside in the global segment, the result must
  * be HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED.
  */
  HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS = 0,

  /**
  * Number of links to hop when accessing the memory pool from the specified
  * agent. The value of this attribute is zero if the memory pool is associated
  * with the agent, or if the access type is
  * HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED. The type of this attribute is
  * uint32_t.
  */
  HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS = 1,

  /**
  * Details of each link hop when accessing the memory pool starting from the
  * specified agent. The type of this attribute is an array size of
  * HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS with each element containing
  * ::hsa_amd_memory_pool_link_info_t.
  */
  HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO = 2

} hsa_amd_agent_memory_pool_info_t;

/**
 * @brief Get the current value of an attribute of the relationship between an
 * agent and a memory pool.
 *
 * @param[in] agent Agent.
 *
 * @param[in] memory_pool Memory pool.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to a application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 */
hsa_status_t HSA_API hsa_amd_agent_memory_pool_get_info(
    hsa_agent_t agent, hsa_amd_memory_pool_t memory_pool,
    hsa_amd_agent_memory_pool_info_t attribute, void* value);

/**
 * @brief Enable direct access to a buffer from a given set of agents.
 *
 * @details
 *
 * Upon return, only the listed agents and the agent associated with the
 * buffer's memory pool have direct access to the @p ptr.
 *
 * Any agent that has access to the buffer before and after the call to
 * ::hsa_amd_agents_allow_access will also have access while
 * ::hsa_amd_agents_allow_access is in progress.
 *
 * The caller is responsible for ensuring that each agent in the list
 * must be able to access the memory pool containing @p ptr
 * (using ::hsa_amd_agent_memory_pool_get_info with ::HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS attribute),
 * otherwise error code is returned.
 *
 * @param[in] num_agents Size of @p agents.
 *
 * @param[in] agents List of agents. If @p num_agents is 0, this argument is
 * ignored.
 *
 * @param[in] flags A list of bit-field that is used to specify access
 * information in a per-agent basis. This is currently reserved and must be NULL.
 *
 * @param[in] ptr A buffer previously allocated using ::hsa_amd_memory_pool_allocate.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p num_agents is 0, or @p agents
 * is NULL, @p flags is not NULL, or attempting to enable access to agent(s)
 * because @p ptr is allocated from an inaccessible pool.
 *
 */
hsa_status_t HSA_API
    hsa_amd_agents_allow_access(uint32_t num_agents, const hsa_agent_t* agents,
                                const uint32_t* flags, const void* ptr);

/**
 * @brief Query if buffers currently located in some memory pool can be
 * relocated to a destination memory pool.
 *
 * @details If the returned value is non-zero, a migration of a buffer to @p
 * dst_memory_pool using ::hsa_amd_memory_migrate may nevertheless fail due to
 * resource limitations.
 *
 * @param[in] src_memory_pool Source memory pool.
 *
 * @param[in] dst_memory_pool Destination memory pool.
 *
 * @param[out] result Pointer to a memory location where the result of the query
 * is stored. Must not be NULL. If buffers currently located in @p
 * src_memory_pool can be relocated to @p dst_memory_pool, the result is
 * true.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL One of the memory pools is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
 */
hsa_status_t HSA_API
    hsa_amd_memory_pool_can_migrate(hsa_amd_memory_pool_t src_memory_pool,
                                    hsa_amd_memory_pool_t dst_memory_pool,
                                    bool* result);

/**
 * @brief Relocate a buffer to a new memory pool.
 *
 * @details When a buffer is migrated, its virtual address remains the same but
 * its physical contents are moved to the indicated memory pool.
 *
 * After migration, only the agent associated with the destination pool will have access.
 *
 * The caller is also responsible for ensuring that the allocation in the
 * source memory pool where the buffer is currently located can be migrated to the
 * specified destination memory pool (using ::hsa_amd_memory_pool_can_migrate returns a value of true
 * for the source and destination memory pools), otherwise behavior is undefined.
 *
 * The caller must ensure that the buffer is not accessed while it is migrated.
 *
 * @param[in] ptr Buffer to be relocated. The buffer must have been released to system
 * prior to call this API.  The buffer will be released to system upon completion.
 *
 * @param[in] memory_pool Memory pool where to place the buffer.
 *
 * @param[in] flags A bit-field that is used to specify migration
 * information. Must be zero.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL The destination memory pool is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
 * allocating the necessary resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p flags is not 0.
 */
hsa_status_t HSA_API hsa_amd_memory_migrate(const void* ptr,
                                            hsa_amd_memory_pool_t memory_pool,
                                            uint32_t flags);

/**
 *
 * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
 * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
 * locked memory, then the overlap area is kept locked (i.e multiple mappings are permitted). In
 * this case, the same input @p host_ptr may give different locked @p agent_ptr and when it does,
 * they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
 * Accesses to @p agent_ptr are coarse grained.
 *
 * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
 *
 * @param[in] size The size to be locked.
 *
 * @param[in] agents Array of agent handle to gain access to the @p host_ptr.
 * If this parameter is NULL and the @p num_agent is 0, all agents
 * in the platform will gain access to the @p host_ptr.
 *
 * @param[out] agent_ptr Pointer to the location where to store the new address.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
 * allocating the necessary resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or
 * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
 * is NULL but @p num_agent is not 0.
 */
hsa_status_t HSA_API hsa_amd_memory_lock(void* host_ptr, size_t size,
                                         hsa_agent_t* agents, int num_agent,
                                         void** agent_ptr);

/**
 *
 * @brief Pin a host pointer allocated by C/C++ or OS allocator (i.e. ordinary system DRAM) and
 * return a new pointer accessible by the @p agents. If the @p host_ptr overlaps with previously
 * locked memory, then the overlap area is kept locked (i.e. multiple mappings are permitted).
 * In this case, the same input @p host_ptr may give different locked @p agent_ptr and when it
 * does, they are not necessarily coherent (i.e. accessing either @p agent_ptr is not equivalent).
 * Acesses to the memory via @p agent_ptr have the same access properties as memory allocated from
 * @p pool as determined by ::hsa_amd_memory_pool_get_info and ::hsa_amd_agent_memory_pool_get_info
 * (ex. coarse/fine grain, platform atomic support, link info).  Physical composition and placement
 * of the memory (ex. page size, NUMA binding) is not changed.
 *
 * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator.
 *
 * @param[in] size The size to be locked.
 *
 * @param[in] agents Array of agent handle to gain access to the @p host_ptr.
 * If this parameter is NULL and the @p num_agent is 0, all agents
 * in the platform will gain access to the @p host_ptr.
 *
 * @param[in] pool Global memory pool owned by a CPU agent.
 *
 * @param[in] flags A bit-field that is used to specify allocation
 * directives. Reserved parameter, must be 0.
 *
 * @param[out] agent_ptr Pointer to the location where to store the new address.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
 * allocating the necessary resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT One or more agent in @p agents is
 * invalid or can not access @p pool.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_MEMORY_POOL @p pool is invalid or not owned
 * by a CPU agent.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 or @p host_ptr or
 * @p agent_ptr is NULL or @p agents not NULL but @p num_agent is 0 or @p agents
 * is NULL but @p num_agent is not 0 or flags is not 0.
 */
hsa_status_t HSA_API hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_t* agents,
                                                 int num_agent, hsa_amd_memory_pool_t pool,
                                                 uint32_t flags, void** agent_ptr);

/**
 *
 * @brief Unpin the host pointer previously pinned via ::hsa_amd_memory_lock or
 * ::hsa_amd_memory_lock_to_pool.
 *
 * @details The behavior is undefined if the host pointer being unpinned does not
 * match previous pinned address or if the host pointer was already deallocated.
 *
 * @param[in] host_ptr A buffer allocated by C/C++ or OS allocator that was
 * pinned previously via ::hsa_amd_memory_lock or ::hsa_amd_memory_lock_to_pool.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 */
hsa_status_t HSA_API hsa_amd_memory_unlock(void* host_ptr);

/**
 * @brief Sets the first @p count of uint32_t of the block of memory pointed by
 * @p ptr to the specified @p value.
 *
 * @param[in] ptr Pointer to the block of memory to fill.
 *
 * @param[in] value Value to be set.
 *
 * @param[in] count Number of uint32_t element to be set to the value.
 *
 * @retval HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL or
 * not 4 bytes aligned
 *
 * @retval HSA_STATUS_ERROR_INVALID_ALLOCATION if the given memory
 * region was not allocated with HSA runtime APIs.
 *
 */
hsa_status_t HSA_API
    hsa_amd_memory_fill(void* ptr, uint32_t value, size_t count);

/**
 * @brief Maps an interop object into the HSA flat address space and establishes
 * memory residency.  The metadata pointer is valid during the lifetime of the
 * map (until hsa_amd_interop_unmap_buffer is called).
 * Multiple calls to hsa_amd_interop_map_buffer with the same interop_handle
 * result in multiple mappings with potentially different addresses and
 * different metadata pointers.  Concurrent operations on these addresses are
 * not coherent.  Memory must be fenced to system scope to ensure consistency,
 * between mappings and with any views of this buffer in the originating
 * software stack.
 *
 * @param[in] num_agents Number of agents which require access to the memory
 *
 * @param[in] agents List of accessing agents.
 *
 * @param[in] interop_handle Handle of interop buffer (dmabuf handle in Linux)
 *
 * @param [in] flags Reserved, must be 0
 *
 * @param[out] size Size in bytes of the mapped object
 *
 * @param[out] ptr Base address of the mapped object
 *
 * @param[out] metadata_size Size of metadata in bytes, may be NULL
 *
 * @param[out] metadata Pointer to metadata, may be NULL
 *
 * @retval HSA_STATUS_SUCCESS if successfully mapped
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT all other errors
 */
hsa_status_t HSA_API hsa_amd_interop_map_buffer(uint32_t num_agents,
                                        hsa_agent_t* agents,
                                        int interop_handle,
                                        uint32_t flags,
                                        size_t* size,
                                        void** ptr,
                                        size_t* metadata_size,
                                        const void** metadata);

/**
 * @brief Removes a previously mapped interop object from HSA's flat address space.
 * Ends lifetime for the mapping's associated metadata pointer.
 */
hsa_status_t HSA_API hsa_amd_interop_unmap_buffer(void* ptr);

/**
 * @brief Denotes the type of memory in a pointer info query.
 */
typedef enum {
  /*
  Memory is not known to the HSA driver.  Unallocated or unlocked system memory.
  */
  HSA_EXT_POINTER_TYPE_UNKNOWN = 0,
  /*
  Memory was allocated with an HSA memory allocator.
  */
  HSA_EXT_POINTER_TYPE_HSA = 1,
  /*
  System memory which has been locked for use with an HSA agent.

  Memory of this type is normal malloc'd memory and is always accessible to
  the CPU.  Pointer info queries may not include CPU agents in the accessible
  agents list as the CPU has implicit access.
  */
  HSA_EXT_POINTER_TYPE_LOCKED = 2,
  /*
  Memory originated in a graphics component and is shared with ROCr.
  */
  HSA_EXT_POINTER_TYPE_GRAPHICS = 3,
  /*
  Memory has been shared with the local process via ROCr IPC APIs.
  */
  HSA_EXT_POINTER_TYPE_IPC = 4,
  /*
  No backend memory but virtual address
  */
  HSA_EXT_POINTER_TYPE_RESERVED_ADDR = 5
} hsa_amd_pointer_type_t;

/**
 * @brief Describes a memory allocation known to ROCr.
 * Within a ROCr major version this structure can only grow.
 */
typedef struct hsa_amd_pointer_info_s {
  /*
  Size in bytes of this structure.  Used for version control within a major ROCr
  revision.  Set to sizeof(hsa_amd_pointer_t) prior to calling
  hsa_amd_pointer_info.  If the runtime supports an older version of pointer
  info then size will be smaller on return.  Members starting after the return
  value of size will not be updated by hsa_amd_pointer_info.
  */
  uint32_t size;
  /*
  The type of allocation referenced.
  */
  hsa_amd_pointer_type_t type;
  /*
  Base address at which non-host agents may access the allocation. This field is
  not meaningful if the type of the allocation is HSA_EXT_POINTER_TYPE_UNKNOWN.
  */
  void* agentBaseAddress;
  /*
  Base address at which the host agent may access the allocation. This field is
  not meaningful if the type of the allocation is HSA_EXT_POINTER_TYPE_UNKNOWN.
  */
  void* hostBaseAddress;
  /*
  Size of the allocation. This field is not meaningful if the type of the allocation
  is HSA_EXT_POINTER_TYPE_UNKNOWN.
  */
  size_t sizeInBytes;
  /*
  Application provided value. This field is not meaningful if the type of the
  allocation is HSA_EXT_POINTER_TYPE_UNKNOWN.
  */
  void* userData;
  /*
  Reports an agent which "owns" (ie has preferred access to) the pool in which the
  allocation was
  made.  When multiple agents share equal access to a pool (ex: multiple CPU agents, or multi-die
  GPU boards) any such agent may be returned. This field is not meaningful if
  the type of the allocation is HSA_EXT_POINTER_TYPE_UNKNOWN or if this agent is not available in
  this process, for e.g if this agent is masked using ROCR_VISIBLE_DEVICES.
  */
  hsa_agent_t agentOwner;
  /*
  Contains a bitfield of hsa_amd_memory_pool_global_flag_t values.
  Reports the effective global flags bitmask for the allocation.  This field is not
  meaningful if the type of the allocation is HSA_EXT_POINTER_TYPE_UNKNOWN.
  */
  uint32_t global_flags;
} hsa_amd_pointer_info_t;

/**
 * @brief Retrieves information about the allocation referenced by the given
 * pointer.  Optionally returns the number and list of agents which can
 * directly access the allocation. In case this virtual address is unknown, the
 * pointer type returned will be HSA_EXT_POINTER_TYPE_UNKNOWN and the only fields
 * that are valid after hsa_amd_pointer_info returns are size and type.
 *
 * @param[in] ptr Pointer which references the allocation to retrieve info for.
 *
 * @param[in, out] info Pointer to structure to be filled with allocation info.
 * Data member size must be set to the size of the structure prior to calling
 * hsa_amd_pointer_info.  On return size will be set to the size of the
 * pointer info structure supported by the runtime, if smaller.  Members
 * beyond the returned value of size will not be updated by the API.
 * Must not be NULL.
 *
 * @param[in] alloc Function pointer to an allocator used to allocate the
 * @p accessible array.  If NULL @p accessible will not be returned.
 *
 * @param[out] num_agents_accessible Recieves the count of agents in
 * @p accessible.  If NULL @p accessible will not be returned.
 *
 * @param[out] accessible Recieves a pointer to the array, allocated by @p alloc,
 * holding the list of agents which may directly access the allocation.
 * May be NULL.
 *
 * @retval HSA_STATUS_SUCCESS Info retrieved successfully
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT NULL in @p ptr or @p info.
 */
hsa_status_t HSA_API hsa_amd_pointer_info(const void* ptr,
                                          hsa_amd_pointer_info_t* info,
                                          void* (*alloc)(size_t),
                                          uint32_t* num_agents_accessible,
                                          hsa_agent_t** accessible);

/**
 * @brief Associates an arbitrary pointer with an allocation known to ROCr.
 * The pointer can be fetched by hsa_amd_pointer_info in the userData field.
 *
 * @param[in] ptr Pointer to the first byte of an allocation known to ROCr
 * with which to associate @p userdata.
 *
 * @param[in] userdata Abitrary pointer to associate with the allocation.
 *
 * @retval HSA_STATUS_SUCCESS @p userdata successfully stored.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is not known to ROCr.
 */
hsa_status_t HSA_API hsa_amd_pointer_info_set_userdata(const void* ptr,
                                                       void* userdata);

/**
 * @brief 256-bit process independent identifier for a ROCr shared memory
 * allocation.
 */
typedef struct hsa_amd_ipc_memory_s {
  uint32_t handle[8];
} hsa_amd_ipc_memory_t;

/**
 * @brief Prepares an allocation for interprocess sharing and creates a
 * handle of type hsa_amd_ipc_memory_t uniquely identifying the allocation.  A
 * handle is valid while the allocation it references remains accessible in
 * any process.  In general applications should confirm that a shared memory
 * region has been attached (via hsa_amd_ipc_memory_attach) in the remote
 * process prior to releasing that memory in the local process.
 * Repeated calls for the same allocation may, but are not required to, return
 * unique handles. The allocation needs to be on memory on an agent of type
 * HSA_DEVICE_TYPE_GPU.
 *
 * @param[in] ptr Pointer to device memory allocated via ROCr APIs to prepare for
 * sharing.
 *
 * @param[in] len Length in bytes of the allocation to share.
 *
 * @param[out] handle Process independent identifier referencing the shared
 * allocation.
 *
 * @retval HSA_STATUS_SUCCESS allocation is prepared for interprocess sharing.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr does not point to the
 * first byte of an allocation made through ROCr, or len is not the full length
 * of the allocation or handle is NULL.
 */
hsa_status_t HSA_API hsa_amd_ipc_memory_create(void* ptr, size_t len,
                                               hsa_amd_ipc_memory_t* handle);

/**
 * @brief Imports shared memory into the local process and makes it accessible
 * by the given agents.  If a shared memory handle is attached multiple times
 * in a process each attach may return a different address.  Each returned
 * address is refcounted and requires a matching number of calls to
 * hsa_amd_ipc_memory_detach to release the shared memory mapping.
 *
 * @param[in] handle Pointer to the identifier for the shared memory.
 *
 * @param[in] len Length of the shared memory to import.
 * Reserved.  Must be the full length of the shared allocation in this version.
 *
 * @param[in] num_agents Count of agents in @p mapping_agents.
 * May be zero if all agents are to be allowed access.
 *
 * @param[in] mapping_agents List of agents to access the shared memory.
 * Ignored if @p num_agents is zero.
 *
 * @param[out] mapped_ptr Recieves a process local pointer to the shared memory.
 *
 * @retval HSA_STATUS_SUCCESS if memory is successfully imported.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p handle is not valid, @p len is
 * incorrect, @p mapped_ptr is NULL, or some agent for which access was
 * requested can not access the shared memory.
 */
hsa_status_t HSA_API hsa_amd_ipc_memory_attach(
    const hsa_amd_ipc_memory_t* handle, size_t len,
    uint32_t num_agents,
    const hsa_agent_t* mapping_agents,
    void** mapped_ptr);

/**
 * @brief Decrements the reference count for the shared memory mapping and
 * releases access to shared memory imported with hsa_amd_ipc_memory_attach.
 *
 * @param[in] mapped_ptr Pointer to the first byte of a shared allocation
 * imported with hsa_amd_ipc_memory_attach.
 *
 * @retval HSA_STATUS_SUCCESS if @p mapped_ptr was imported with
 * hsa_amd_ipc_memory_attach.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p mapped_ptr was not imported
 * with hsa_amd_ipc_memory_attach.
 */
hsa_status_t HSA_API hsa_amd_ipc_memory_detach(void* mapped_ptr);

/** @} */

/** \addtogroup status Runtime notifications
 *  @{
 */

/**
 * @brief 256-bit process independent identifier for a ROCr IPC signal.
 */
typedef hsa_amd_ipc_memory_t hsa_amd_ipc_signal_t;

/**
 * @brief Obtains an interprocess sharing handle for a signal.  The handle is
 * valid while the signal it references remains valid in any process.  In
 * general applications should confirm that the signal has been attached (via
 * hsa_amd_ipc_signal_attach) in the remote process prior to destroying that
 * signal in the local process.
 * Repeated calls for the same signal may, but are not required to, return
 * unique handles.
 *
 * @param[in] signal Signal created with attribute HSA_AMD_SIGNAL_IPC.
 *
 * @param[out] handle Process independent identifier referencing the shared
 * signal.
 *
 * @retval HSA_STATUS_SUCCESS @p handle is ready to use for interprocess sharing.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is not a valid signal
 * created with attribute HSA_AMD_SIGNAL_IPC or handle is NULL.
 */
hsa_status_t HSA_API hsa_amd_ipc_signal_create(hsa_signal_t signal, hsa_amd_ipc_signal_t* handle);

/**
 * @brief Imports an IPC capable signal into the local process.  If an IPC
 * signal handle is attached multiple times in a process each attach may return
 * a different signal handle.  Each returned signal handle is refcounted and
 * requires a matching number of calls to hsa_signal_destroy to release the
 * shared signal.
 *
 * @param[in] handle Pointer to the identifier for the shared signal.
 *
 * @param[out] signal Recieves a process local signal handle to the shared signal.
 *
 * @retval HSA_STATUS_SUCCESS if the signal is successfully imported.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED if HSA is not initialized
 *
 * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p handle is not valid.
 */
hsa_status_t HSA_API hsa_amd_ipc_signal_attach(const hsa_amd_ipc_signal_t* handle,
                                               hsa_signal_t* signal);

/**
 * @brief GPU system event type.
 */
typedef enum hsa_amd_event_type_s {
  /*
   AMD GPU memory fault.
   */
  HSA_AMD_GPU_MEMORY_FAULT_EVENT = 0,
  /*
   AMD GPU HW Exception.
   */
  HSA_AMD_GPU_HW_EXCEPTION_EVENT,
  /*
   AMD GPU memory error.
   */
  HSA_AMD_GPU_MEMORY_ERROR_EVENT,
} hsa_amd_event_type_t;

/**
 * @brief Flags denoting the cause of a memory fault.
 */
typedef enum {
  // Page not present or supervisor privilege.
  HSA_AMD_MEMORY_FAULT_PAGE_NOT_PRESENT = 1 << 0,
  // Write access to a read-only page.
  HSA_AMD_MEMORY_FAULT_READ_ONLY = 1 << 1,
  // Execute access to a page marked NX.
  HSA_AMD_MEMORY_FAULT_NX = 1 << 2,
  // GPU attempted access to a host only page.
  HSA_AMD_MEMORY_FAULT_HOST_ONLY = 1 << 3,
  // DRAM ECC failure.
  HSA_AMD_MEMORY_FAULT_DRAMECC = 1 << 4,
  // Can't determine the exact fault address.
  HSA_AMD_MEMORY_FAULT_IMPRECISE = 1 << 5,
  // SRAM ECC failure (ie registers, no fault address).
  HSA_AMD_MEMORY_FAULT_SRAMECC = 1 << 6,
  // GPU reset following unspecified hang.
  HSA_AMD_MEMORY_FAULT_HANG = 1U << 31
} hsa_amd_memory_fault_reason_t;

/**
 * @brief AMD GPU memory fault event data.
 */
typedef struct hsa_amd_gpu_memory_fault_info_s {
  /*
  The agent where the memory fault occurred.
  */
  hsa_agent_t agent;
  /*
  Virtual address accessed.
  */
  uint64_t virtual_address;
  /*
  Bit field encoding the memory access failure reasons. There could be multiple bits set
  for one fault.  Bits are defined in hsa_amd_memory_fault_reason_t.
  */
  uint32_t fault_reason_mask;
} hsa_amd_gpu_memory_fault_info_t;

/**
 * @brief Flags denoting the cause of a memory error.
 */
typedef enum {
  // Memory was in use by low-level HW component and cannot be released
  HSA_AMD_MEMORY_ERROR_MEMORY_IN_USE = (1 << 0),
} hsa_amd_memory_error_reason_t;

/**
 * @brief AMD GPU memory error event data.
 */
typedef struct hsa_amd_gpu_memory_error_info_s {
  /*
  The agent where the memory error occurred.
  */
  hsa_agent_t agent;
  /*
  Virtual address involved.
  */
  uint64_t virtual_address;
  /*
  Bit field encoding the memory error failure reasons. There could be multiple bits set
  for one error.  Bits are defined in hsa_amd_memory_error_reason_t.
  */
  uint32_t error_reason_mask;
} hsa_amd_gpu_memory_error_info_t;

/**
 * @brief Flags denoting the type of a HW exception
 */
typedef enum {
  // Unused for now
  HSA_AMD_HW_EXCEPTION_RESET_TYPE_OTHER = 1 << 0,
} hsa_amd_hw_exception_reset_type_t;

/**
 * @brief Flags denoting the cause of a HW exception
 */
typedef enum {
  // GPU Hang
  HSA_AMD_HW_EXCEPTION_CAUSE_GPU_HANG = 1 << 0,
  // SRAM ECC
  HSA_AMD_HW_EXCEPTION_CAUSE_ECC = 1 << 1,
} hsa_amd_hw_exception_reset_cause_t;

/**
 * @brief AMD GPU HW Exception event data.
 */
typedef struct hsa_amd_gpu_hw_exception_info_s {
  /*
  The agent where the HW exception occurred.
  */
  hsa_agent_t agent;
  hsa_amd_hw_exception_reset_type_t reset_type;
  hsa_amd_hw_exception_reset_cause_t reset_cause;
} hsa_amd_gpu_hw_exception_info_t;

/**
 * @brief AMD GPU event data passed to event handler.
 */
typedef struct hsa_amd_event_s {
  /*
  The event type.
  */
  hsa_amd_event_type_t event_type;
  union {
    /*
    The memory fault info, only valid when @p event_type is HSA_AMD_GPU_MEMORY_FAULT_EVENT.
    */
    hsa_amd_gpu_memory_fault_info_t memory_fault;
    /*
    The memory fault info, only valid when @p event_type is HSA_AMD_GPU_HW_EXCEPTION_EVENT.
    */
    hsa_amd_gpu_hw_exception_info_t hw_exception;
    /*
    The memory error info, only valid when @p event_type is HSA_AMD_GPU_MEMORY_ERROR_EVENT.
    */
    hsa_amd_gpu_memory_error_info_t memory_error;
  };
} hsa_amd_event_t;

typedef hsa_status_t (*hsa_amd_system_event_callback_t)(const hsa_amd_event_t* event, void* data);

/**
 * @brief Register AMD GPU event handler.
 *
 * @param[in] callback Callback to be invoked when an event is triggered.
 * The HSA runtime passes two arguments to the callback: @p event
 * is defined per event by the HSA runtime, and @p data is the user data.
 *
 * @param[in] data User data that is passed to @p callback. May be NULL.
 *
 * @retval HSA_STATUS_SUCCESS The handler has been registered successfully.
 *
 * @retval HSA_STATUS_ERROR An event handler has already been registered.
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p event is invalid.
 */
hsa_status_t HSA_API hsa_amd_register_system_event_handler(hsa_amd_system_event_callback_t callback,
                                                   void* data);

/** @} */

/** \addtogroup queue Queues
 *  @{
 */

/**
 * @brief Per-queue dispatch and wavefront scheduling priority.
 */
typedef enum hsa_amd_queue_priority_s {
  /*
  Below normal/high priority compute and all graphics
  */
  HSA_AMD_QUEUE_PRIORITY_LOW = 0,
  /*
  Above low priority compute, below high priority compute and all graphics
  */
  HSA_AMD_QUEUE_PRIORITY_NORMAL = 1,
  /*
  Above low/normal priority compute and all graphics
  */
  HSA_AMD_QUEUE_PRIORITY_HIGH = 2,
} hsa_amd_queue_priority_t;

/**
 * @brief Modifies the dispatch and wavefront scheduling prioirty for a
 * given compute queue. The default is HSA_AMD_QUEUE_PRIORITY_NORMAL.
 *
 * @param[in] queue Compute queue to apply new priority to.
 *
 * @param[in] priority Priority to associate with queue.
 *
 * @retval HSA_STATUS_SUCCESS if priority was changed successfully.
 *
 * @retval HSA_STATUS_ERROR_INVALID_QUEUE if queue is not a valid
 * compute queue handle.
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if priority is not a valid
 * value from hsa_amd_queue_priority_t.
 */
hsa_status_t HSA_API hsa_amd_queue_set_priority(hsa_queue_t* queue,
                                                hsa_amd_queue_priority_t priority);

/**
 * @brief Queue creation attributes.
 */
typedef enum {
  /**
   * The queue's packet buffer and queue descriptor struct should be
   * allocated in system memory (default). Mutually exclusive with
   * HSA_AMD_QUEUE_CREATE_DEVICE_MEM_RING_BUF and
   * HSA_AMD_QUEUE_CREATE_DEVICE_MEM_QUEUE_DESCRIPTOR.
   */
  HSA_AMD_QUEUE_CREATE_SYSTEM_MEM = 0,
  /**
   * The queue's packet buffer should be allocated in the agent's
   * fine-grain device memory region.
   */
  HSA_AMD_QUEUE_CREATE_DEVICE_MEM_RING_BUF = (1 << 0),
  /**
   * The queue desciptor struct should be allocated in the agent's
   * fine-grain device memory region. Not supported for devices
   * connected via PCIe because the CPU's atomic read-modify-write
   * operations cannot be promoted to PCIe atomic read-modify-write
   * operations.
   */
  HSA_AMD_QUEUE_CREATE_DEVICE_MEM_QUEUE_DESCRIPTOR = (1 << 1),
} hsa_amd_queue_create_flag_t;

/** @} */

/** \addtogroup memory Memory
 *  @{
 */

/**
 * @brief Deallocation notifier function type.
 */
typedef void (*hsa_amd_deallocation_callback_t)(void* ptr, void* user_data);

/**
 * @brief Registers a deallocation notifier monitoring for release of agent
 * accessible address @p ptr.  If successful, @p callback will be invoked when
 * @p ptr is removed from accessibility from all agents.
 *
 * Notification callbacks are automatically deregistered when they are invoked.
 *
 * Note: The current version supports notifications of address release
 * originating from ::hsa_amd_memory_pool_free.  Support for other address
 * release APIs will follow.
 *
 * @param[in] ptr Agent accessible address to monitor for deallocation.  Passed
 * to @p callback.
 *
 * @param[in] callback Notifier to be invoked when @p ptr is released from
 * agent accessibility.
 *
 * @param[in] user_data User provided value passed to @p callback.  May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The notifier registered successfully
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION @p ptr does not refer to a valid agent accessible
 * address.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL or @p ptr is NULL.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES if there is a failure in allocating
 * necessary resources
 */
hsa_status_t HSA_API hsa_amd_register_deallocation_callback(void* ptr,
                                                    hsa_amd_deallocation_callback_t callback,
                                                    void* user_data);

/**
 * @brief Removes a deallocation notifier previously registered with
 * ::hsa_amd_register_deallocation_callback.  Arguments must be identical to
 * those given in ::hsa_amd_register_deallocation_callback.
 *
 * @param[in] ptr Agent accessible address which was monitored for deallocation.
 *
 * @param[in] callback Notifier to be removed.
 *
 * @retval ::HSA_STATUS_SUCCESS The notifier has been removed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The given notifier was not registered.
 */
hsa_status_t HSA_API hsa_amd_deregister_deallocation_callback(void* ptr,
                                                      hsa_amd_deallocation_callback_t callback);

typedef enum hsa_amd_svm_model_s {
  /**
   * Updates to memory with this attribute conform to HSA memory consistency
   * model.
   */
  HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED = 0,
  /**
   * Writes to memory with this attribute can be performed by a single agent
   * at a time.
   */
  HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED = 1,
  /**
   * Memory region queried contains subregions with both
   * HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED and
   * HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED attributes.
   *
   * This attribute can not be used in hsa_amd_svm_attributes_set.  It is a
   * possible return from hsa_amd_svm_attributes_get indicating that the query
   * region contains both coarse and fine grained memory.
   */
  HSA_AMD_SVM_GLOBAL_FLAG_INDETERMINATE = 2
} hsa_amd_svm_model_t;

typedef enum hsa_amd_svm_attribute_s {
  // Memory model attribute.
  // Type of this attribute is hsa_amd_svm_model_t.
  HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG = 0,
  // Marks the range read only.  This allows multiple physical copies to be
  // placed local to each accessing device.
  // Type of this attribute is bool.
  HSA_AMD_SVM_ATTRIB_READ_ONLY = 1,
  // Automatic migrations should attempt to keep the memory within the xgmi hive
  // containing accessible agents.
  // Type of this attribute is bool.
  HSA_AMD_SVM_ATTRIB_HIVE_LOCAL = 2,
  // Page granularity to migrate at once.  Page granularity is specified as
  // log2(page_count).
  // Type of this attribute is uint64_t.
  HSA_AMD_SVM_ATTRIB_MIGRATION_GRANULARITY = 3,
  // Physical location to prefer when automatic migration occurs.
  // Set to the null agent handle (handle == 0) to indicate there
  // is no preferred location.
  // Type of this attribute is hsa_agent_t.
  HSA_AMD_SVM_ATTRIB_PREFERRED_LOCATION = 4,
  // This attribute can not be used in ::hsa_amd_svm_attributes_set (see
  // ::hsa_amd_svm_prefetch_async).
  // Queries the physical location of most recent prefetch command.
  // If the prefetch location has not been set or is not uniform across the
  // address range then returned hsa_agent_t::handle will be 0.
  // Querying this attribute will return the destination agent of the most
  // recent ::hsa_amd_svm_prefetch_async targeting the address range.  If
  // multiple async prefetches have been issued targeting the region and the
  // most recently issued prefetch has completed then the query will return
  // the location of the most recently completed prefetch.
  // Type of this attribute is hsa_agent_t.
  HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION = 5,
  // Optimizes with the anticipation that the majority of operations to the
  // range will be read operations.
  // Type of this attribute is bool.
  HSA_AMD_SVM_ATTRIB_READ_MOSTLY = 6,
  // Allows the execution on GPU.
  // Type of this attribute is bool.
  HSA_AMD_SVM_ATTRIB_GPU_EXEC = 7,
  // This attribute can not be used in ::hsa_amd_svm_attributes_get.
  // Enables an agent for access to the range.  Access may incur a page fault
  // and associated memory migration.  Either this or
  // HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE is required prior to SVM
  // access if HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is false.
  // Type of this attribute is hsa_agent_t.
  HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE = 0x200,
  // This attribute can not be used in ::hsa_amd_svm_attributes_get.
  // Enables an agent for access to the range without page faults.  Access
  // will not incur a page fault and will not cause access based migration.
  // and associated memory migration.  Either this or
  // HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE is required prior to SVM access if
  // HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is false.
  // Type of this attribute is hsa_agent_t.
  HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE = 0x201,
  // This attribute can not be used in ::hsa_amd_svm_attributes_get.
  // Denies an agent access to the memory range.  Access will cause a terminal
  // segfault.
  // Type of this attribute is hsa_agent_t.
  HSA_AMD_SVM_ATTRIB_AGENT_NO_ACCESS = 0x202,
  // This attribute can not be used in ::hsa_amd_svm_attributes_set.
  // Returns the access attribute associated with the agent.
  // The agent to query must be set in the attribute value field.
  // The attribute enum will be replaced with the agent's current access
  // attribute for the address range.
  // TODO: Clarify KFD return value for non-uniform access attribute.
  // Type of this attribute is hsa_agent_t.
  HSA_AMD_SVM_ATTRIB_ACCESS_QUERY = 0x203,
} hsa_amd_svm_attribute_t;

// List type for hsa_amd_svm_attributes_set/get.
typedef struct hsa_amd_svm_attribute_pair_s {
  // hsa_amd_svm_attribute_t value.
  uint64_t attribute;
  // Attribute value.  Bit values should be interpreted according to the type
  // given in the associated attribute description.
  uint64_t value;
} hsa_amd_svm_attribute_pair_t;

/**
 * @brief Sets SVM memory attributes.
 *
 * If HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT returns false then enabling
 * access to an Agent via this API (setting HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE
 * or HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE) is required prior to SVM
 * memory access by that Agent.
 *
 * Attributes HSA_AMD_SVM_ATTRIB_ACCESS_QUERY and HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION
 * may not be used with this API.
 *
 * @param[in] ptr Will be aligned down to nearest page boundary.
 *
 * @param[in] size Will be aligned up to nearest page boundary.
 *
 * @param[in] attribute_list List of attributes to set for the address range.
 *
 * @param[in] attribute_count Length of @p attribute_list.
 */
hsa_status_t hsa_amd_svm_attributes_set(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count);

/**
 * @brief Gets SVM memory attributes.
 *
 * Attributes HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE,
 * HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE and
 * HSA_AMD_SVM_ATTRIB_PREFETCH_LOCATION may not be used with this API.
 *
 * Note that attribute HSA_AMD_SVM_ATTRIB_ACCESS_QUERY takes as input an
 * hsa_agent_t and returns the current access type through its attribute field.
 *
 * @param[in] ptr Will be aligned down to nearest page boundary.
 *
 * @param[in] size Will be aligned up to nearest page boundary.
 *
 * @param[in] attribute_list List of attributes to set for the address range.
 *
 * @param[in] attribute_count Length of @p attribute_list.
 */
hsa_status_t hsa_amd_svm_attributes_get(void* ptr, size_t size,
                                        hsa_amd_svm_attribute_pair_t* attribute_list,
                                        size_t attribute_count);

/**
 * @brief Asynchronously migrates memory to an agent.
 *
 * Schedules memory migration to @p agent when @p dep_signals have been observed equal to zero.
 * @p completion_signal will decrement when the migration is complete.
 *
 * @param[in] ptr Will be aligned down to nearest page boundary.
 *
 * @param[in] size Will be aligned up to nearest page boundary.
 *
 * @param[in] agent Agent to migrate to.
 *
 * @param[in] num_dep_signals Number of dependent signals. Can be 0.
 *
 * @param[in] dep_signals List of signals that must be waited on before the migration
 * operation starts. The migration will start after every signal has been observed with
 * the value 0. If @p num_dep_signals is 0, this argument is ignored.
 *
 * @param[in] completion_signal Signal used to indicate completion of the migration
 * operation. When the migration operation is finished, the value of the signal is
 * decremented. The runtime indicates that an error has occurred during the copy
 * operation by setting the value of the completion signal to a negative
 * number. If no completion signal is required this handle may be null.
 */
hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agent,
                                        uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
                                        hsa_signal_t completion_signal);

/** @} */

/** \addtogroup profile Profiling
 *  @{
 */

/**
 * @brief Acquire Stream Performance Monitor on an agent
 *
 * Acquire exclusive use of SPM on @p preferred_agent.
 * See hsa_amd_spm_set_dest_buffer to provide a destination buffer to KFD to start recording and
 * retrieve this data.
 * @param[in] preferred_agent Agent on which to acquire SPM
 */
hsa_status_t hsa_amd_spm_acquire(hsa_agent_t preferred_agent);

/**
 * @brief Release Stream Performance Monitor on an agent
 *
 * Release exclusive use of SPM on @p preferred_agent. This will stop KFD writing SPM data.
 * If a destination buffer is set, then data in the destination buffer is available to user
 * when this function returns.
 *
 * @param[in] preferred_agent Agent on which to release SPM
 */
hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent);

/**
 * @brief  Set up the current destination user mode buffer for stream performance
 * counter data. KFD will start writing SPM data into the destination buffer. KFD will continue
 * to copy data into the current destination buffer until any of the following functions are called
 * - hsa_amd_spm_release
 * - hsa_amd_spm_set_dest_buffer with dest set to NULL
 * - hsa_amd_spm_set_dest_buffer with dest set to a new buffer
 *
 * if @p timeout is non-0, the call will wait for up to @p timeout ms for the previous
 * buffer to be filled. If previous buffer to be filled before timeout, the @p timeout
 * will be updated value with the time remaining. If the timeout is exceeded, the function
 * copies any partial data available into the previous user buffer and returns success.
 * User should not access destination data while KFD is copying data.
 * If the previous destination buffer was full, then @p is_data_loss flag is set.
 * @p dest is CPU accessible memory. It could be malloc'ed memory or host allocated memory
 *
 * @param[in] preferred_agent Agent on which to set the dest buffer
 *
 * @param[in] size_in_bytes size of the buffer
 *
 * @param[in,out] timeout timeout in milliseconds
 *
 * @param[out] size_copied number of bytes copied
 *
 * @param[in] dest destination address. Set to NULL to stop copy on previous buffer
 *
 * @param[out] is_data_loss true is data was lost
 */
hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes,
                                         uint32_t* timeout, uint32_t* size_copied, void* dest,
                                         bool* is_data_loss);

/** @} */

/** \addtogroup memory Memory
 *  @{
 */

/**
 * @brief Older version of export dmabuf
 *
 * This is the same as calling the v2 version of export dmabuf with the
 * flags argument set to HSA_AMD_DMABUF_MAPPING_TYPE_NONE.
 *
 * @param[in] ptr Pointer to the allocation being exported.
 *
 * @param[in] size Size in bytes to export following @p ptr.  The entire range
 * being exported must be contained within a single allocation.
 *
 * @param[out] dmabuf Pointer to a dma-buf file descriptor holding a reference to the
 * allocation.  Contents will not be altered in the event of failure.
 *
 * @param[out] offset Offset in bytes into the memory referenced by the dma-buf
 * object at which @p ptr resides.  Contents will not be altered in the event
 * of failure.
 *
 * @retval ::HSA_STATUS_SUCCESS Export completed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT One or more arguments is NULL.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The address range described by
 * @p ptr and @p size are not contained within a single allocation.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The allocation described by @p ptr
 * and @p size was allocated on a device which can not export memory.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The return file descriptor,
 * @p dmabuf, could not be created.
 */
hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
                                            uint64_t* offset);

                                            /**
 * @brief Obtains an OS specific, vendor neutral, handle to a memory allocation.
 *
 * Obtains an OS specific handle to GPU agent memory.  The memory must be part
 * of a single allocation from an hsa_amd_memory_pool_t exposed by a GPU Agent.
 * The handle may be used with other APIs (e.g. Vulkan) to obtain shared access
 * to the allocation.
 *
 * Shared access to the memory is not guaranteed to be fine grain coherent even
 * if the allocation exported is from a fine grain pool.  The shared memory
 * consistency model will be no stronger than the model exported from, consult
 * the importing API to determine the final consistency model.
 *
 * The allocation's memory remains valid as long as the handle and any mapping
 * of the handle remains valid.  When the handle and all mappings are closed
 * the backing memory will be released for reuse.
 *
 * @param[in] ptr Pointer to the allocation being exported.
 *
 * @param[in] size Size in bytes to export following @p ptr.  The entire range
 * being exported must be contained within a single allocation.
 *
 * @param[out] dmabuf Pointer to a dma-buf file descriptor holding a reference to the
 * allocation.  Contents will not be altered in the event of failure.
 *
 * @param[out] offset Offset in bytes into the memory referenced by the dma-buf
 * object at which @p ptr resides.  Contents will not be altered in the event
 * of failure.
 *
 * @param[in] flags Bitmask of hsa_amd_dma_buf_mapping_type_t flags.
 *
 * @retval ::HSA_STATUS_SUCCESS Export completed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT One or more arguments is NULL.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The address range described by
 * @p ptr and @p size are not contained within a single allocation.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The allocation described by @p ptr
 * and @p size was allocated on a device which can not export memory.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The return file descriptor,
 * @p dmabuf, could not be created.
 */
hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size,
                               int* dmabuf, uint64_t* offset, uint64_t flags);

/**
 * @brief Closes an OS specific, vendor neutral, handle to a memory allocation.
 *
 * Closes an OS specific handle to GPU agent memory.
 *
 * Applications should close a handle after imports are complete.  The handle
 * is not required to remain open for the lifetime of imported mappings.  The
 * referenced allocation will remain valid until all handles and mappings
 * are closed.
 *
 * @param[in] dmabuf Handle to be closed.
 *
 * @retval ::HSA_STATUS_SUCCESS Handle closed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_RESOURCE_FREE A generic error was encountered
 * when closing the handle.  The handle may have been closed already or an
 * async IO error may have occured.
 */
hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf);

typedef enum hsa_amd_vmem_address_reserve_flag_s {
  // Only reserve a VA range without registering it to the underlying driver
  HSA_AMD_VMEM_ADDRESS_NO_REGISTER = (1UL << 0),
} hsa_amd_vmem_address_reserve_flag_t;

/**
 * @brief Allocate a reserved address range
 *
 * Reserve a virtual address range. The size must be a multiple of the system page size.
 * If it is not possible to allocate the address specified by @p address, then @p va will be
 * a different address range.
 * Address range should be released by calling hsa_amd_vmem_address_free.
 *
 * @param[out] va virtual address allocated
 * @param[in] size of address range requested
 * @param[in] address requested
 * @param[in] flags optional hsa_amd_vmem_address_reserve_flag_t
 *
 * @retval ::HSA_STATUS_SUCCESS Address range allocated successfully
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources to allocate an address
 * range of this size.
 *
 * Note that this API will be deprecated in a future release and replaced by
 * hsa_amd_vmem_address_reserve_align
 */
hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t address,
                                          uint64_t flags);

/**
 * @brief Allocate a reserved address range
 *
 * Reserve a virtual address range. The size must be a multiple of the system page size.
 * If it is not possible to allocate the address specified by @p address, then @p va will be
 * a different address range.
 * Address range should be released by calling hsa_amd_vmem_address_free.
 *
 * @param[out] va virtual address allocated
 * @param[in] size of address range requested
 * @param[in] address requested
 * @param[in] alignment requested. 0 for default. Must be >= page-size and a power of 2
 * @param[in] flags optional hsa_amd_vmem_address_reserve_flag_t
 *
 * @retval ::HSA_STATUS_SUCCESS Address range allocated successfully
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources to allocate an address
 * range of this size.
 */
hsa_status_t hsa_amd_vmem_address_reserve_align(void** va, size_t size, uint64_t address,
                                          uint64_t alignment, uint64_t flags);

/**
 * @brief Free a reserved address range
 *
 * Free a previously allocated address range. The size must match the size of a previously
 * allocated address range.
 *
 * @param[out] va virtual address to be freed
 * @param[in] size of address range
 *
 * @retval ::HSA_STATUS_SUCCESS Address range released successfully
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid va specified
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid size specified
 * @retval ::HSA_STATUS_ERROR_RESOURCE_FREE Address range is still in use
 * @retval ::HSA_STATUS_ERROR Internal unexpected error
 */
hsa_status_t hsa_amd_vmem_address_free(void* va, size_t size);

/**
 * @brief Struct containing an opaque handle to a memory allocation handle
 */
typedef struct hsa_amd_vmem_alloc_handle_s {
  /**
   * Opaque handle. Two handles reference the same object of the enclosing type
   * if and only if they are equal.
   */
  uint64_t handle;
} hsa_amd_vmem_alloc_handle_t;

typedef enum {
  MEMORY_TYPE_NONE,
  MEMORY_TYPE_PINNED,
} hsa_amd_memory_type_t;

/**
 * @brief Create a virtual memory handle
 *
 * Create a virtual memory handle within this pool
 * @p size must be a aligned to allocation granule size for this memory pool, see
 * HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE
 * To minimize internal memory fragmentation, align the size to the recommended allocation granule
 * size, see HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE
 *
 * @param[in] pool memory to use
 * @param[in] size of the memory allocation
 * @param[in] type of memory
 * @param[in] flags - currently unsupported
 * @param[out] memory_handle - handle for the allocation
 *
 * @retval ::HSA_STATUS_SUCCESS memory allocated successfully
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid arguments
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION This memory pool does not support allocations
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources to allocate this memory
 */
hsa_status_t hsa_amd_vmem_handle_create(hsa_amd_memory_pool_t pool, size_t size,
                                        hsa_amd_memory_type_t type, uint64_t flags,
                                        hsa_amd_vmem_alloc_handle_t* memory_handle);

/**
 * @brief Release a virtual memory handle
 *
 * @param[in] memory handle that was previously allocated
 *
 * @retval ::HSA_STATUS_SUCCESS Address range allocated successfully
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid memory handle
 */
hsa_status_t hsa_amd_vmem_handle_release(hsa_amd_vmem_alloc_handle_t memory_handle);

/**
 * @brief Map a virtual memory handle
 *
 * Map a virtual memory handle to a reserved address range. The virtual address requested must be
 * within a previously reserved address range. @p va and (@p va + size) must be must be within
 * (va + size) of the previous allocated address range.
 * @p size must be equal to size of the @p memory_handle
 * hsa_amd_vmem_set_access needs to be called to make the memory accessible to specific agents
 *
 * @param[in] va virtual address range where memory will be mapped
 * @param[in] size of memory mapping
 * @param[in] in_offset offset into memory. Currently unsupported
 * @param[in] memory_handle virtual memory handle to be mapped
 * @param[in] flags. Currently unsupported
 *
 * @retval ::HSA_STATUS_SUCCESS Memory mapped successfully
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT va, size or memory_handle are invalid
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources
 *
 * @retval ::HSA_STATUS_ERROR Unexpected internal error
 */
hsa_status_t hsa_amd_vmem_map(void* va, size_t size, size_t in_offset,
                              hsa_amd_vmem_alloc_handle_t memory_handle, uint64_t flags);

/**
 * @brief Unmap a virtual memory handle
 *
 * Unmap previously mapped virtual address range
 *
 * @param[in] va virtual address range where memory will be mapped
 * @param[in] size of memory mapping
 *
 * @retval ::HSA_STATUS_SUCCESS Memory backing unmapped successfully
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION memory_handle is invalid
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT size is invalid
 *
 * @retval ::HSA_STATUS_ERROR Unexpected internal error
 */
hsa_status_t hsa_amd_vmem_unmap(void* va, size_t size);

typedef struct hsa_amd_memory_access_desc_s {
  hsa_access_permission_t permissions;
  hsa_agent_t agent_handle;
} hsa_amd_memory_access_desc_t;

/**
 * @brief Make a memory mapping accessible
 *
 * Make previously mapped virtual address accessible to specific agents. @p size must be equal to
 * size of previously mapped virtual memory handle.
 * Calling hsa_amd_vmem_set_access multiple times on the same @p va:
 *  - Will overwrite permissions for agents specified in @p desc
 *  - Will leave permissions unchanged for agents not specified in @p desc
 *
 * @param[in] va previously mapped virtual address
 * @param[in] size of memory mapping
 * @param[in] desc list of access permissions for each agent
 * @param[in] desc_cnt number of elements in desc
 *
 * @retval ::HSA_STATUS_SUCCESS
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT va, size or memory_handle are invalid
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION memory_handle is invalid
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT Invalid agent in desc
 *
 * @retval ::HSA_STATUS_ERROR Unexpected internal error
 */
hsa_status_t hsa_amd_vmem_set_access(void* va, size_t size,
                                     const hsa_amd_memory_access_desc_t* desc,
                                     size_t desc_cnt);

/**
 * @brief Get current access permissions for memory mapping
 *
 * Get access permissions for memory mapping for specific agent.
 *
 * @param[in] va previously mapped virtual address
 * @param[in] perms current permissions
 * @param[in] agent_handle agent
 *
 * @retval ::HSA_STATUS_SUCCESS
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT Invalid agent
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION va is not mapped or permissions never set for this
 * agent
 *
 * @retval ::HSA_STATUS_ERROR Unexpected internal error
 */
hsa_status_t hsa_amd_vmem_get_access(void* va, hsa_access_permission_t* perms,
                                     hsa_agent_t agent_handle);

/**
 * @brief Get an exportable shareable handle
 *
 * Get an exportable shareable handle for a memory_handle. This shareabl handle can then be used to
 * re-create a virtual memory handle using hsa_amd_vmem_import_shareable_handle. The shareable
 * handle can be transferred using mechanisms that support posix file descriptors Once all shareable
 * handles are closed, the memory_handle is released.
 *
 * @param[out] dmabuf_fd shareable handle
 * @param[in] handle previously allocated virtual memory handle
 * @param[in] flags Currently unsupported
 *
 * @retval ::HSA_STATUS_SUCCESS
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid memory handle
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Out of resources
 *
 * @retval ::HSA_STATUS_ERROR Unexpected internal error
 */
hsa_status_t hsa_amd_vmem_export_shareable_handle(int* dmabuf_fd,
                                                  hsa_amd_vmem_alloc_handle_t handle,
                                                  uint64_t flags);
/**
 * @brief Import a shareable handle
 *
 * Import a shareable handle for a memory handle. Importing a shareable handle that has been closed
 * and released results in undefined behavior.
 *
 * @param[in] dmabuf_fd shareable handle exported with hsa_amd_vmem_export_shareable_handle
 * @param[out] handle virtual memory handle
 *
 * @retval ::HSA_STATUS_SUCCESS
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid memory handle
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Out of resources
 *
 * @retval ::HSA_STATUS_ERROR Unexpected internal error
 */
hsa_status_t hsa_amd_vmem_import_shareable_handle(int dmabuf_fd,
                                                  hsa_amd_vmem_alloc_handle_t* handle);

/**
 * @brief Returns memory handle for mapped memory
 *
 * Return a memory handle for previously mapped memory. The handle will be the same value of handle
 * used to map the memory. The returned handle must be released with corresponding number of calls
 * to hsa_amd_vmem_handle_release.
 *
 * @param[out] memory_handle memory handle for this mapped address
 * @param[in] mapped address
 *
 * @retval ::HSA_STATUS_SUCCESS
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid address
 */
hsa_status_t hsa_amd_vmem_retain_alloc_handle(hsa_amd_vmem_alloc_handle_t* memory_handle,
                                              void* addr);

/**
 * @brief Returns the current allocation properties of a handle
 *
 * Returns the allocation properties of an existing handle
 *
 * @param[in] memory_handle memory handle to be queried
 * @param[out] pool memory pool that owns this handle
 * @param[out] memory type

 * @retval ::HSA_STATUS_SUCCESS
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION Invalid memory_handle
 */
hsa_status_t hsa_amd_vmem_get_alloc_properties_from_handle(
    hsa_amd_vmem_alloc_handle_t memory_handle, hsa_amd_memory_pool_t* pool,
    hsa_amd_memory_type_t* type);

/** @} */

/** \addtogroup queue Queues
 *  @{
 */

/**
 * @brief Set the asynchronous scratch limit threshold on all the queues for this agent.
 * Dispatches that are enqueued on HW queues on this agent that are smaller than threshold will not
 * result in a scratch use-once method.
 *
 * Increasing this threshold will only increase the internal limit and not cause immediate allocation
 * of additional scratch memory. Decreasing this threshold will result in a release in scratch memory
 * on queues where the current amount of allocated scratch exceeds the new limit.
 *
 * If this API call would result in a release in scratch memory and there are dispatches that are
 * currently using scratch memory on this agent, this will result into a blocking call until the
 * current dispatches are completed.
 *
 * This API is only supported on devices that support asynchronous scratch reclaim.
 *
 * @param[in] agent A valid agent.
 *
 * @param[in] threshold Threshold size in bytes
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT This agent does not support asynchronous scratch
 * reclaim
 */
hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t agent, size_t threshold);

typedef enum {
  /*
   * Returns the agent that owns the underlying HW queue.
   * The type of this attribute is hsa_agent_t.
   */
  HSA_AMD_QUEUE_INFO_AGENT,
  /*
   * Returns the doorbell ID of the completion signal of the queue
   * The type of this attribute is uint64_t.
   */
  HSA_AMD_QUEUE_INFO_DOORBELL_ID,
} hsa_queue_info_attribute_t;

hsa_status_t hsa_amd_queue_get_info(hsa_queue_t* queue, hsa_queue_info_attribute_t attribute,
                                    void* value);

/**
 * @brief logging types
 */
typedef enum hsa_amd_log_flag_s {
   /* Log AQL packets internally enqueued by HSA for Blit Kernels */
  HSA_AMD_LOG_FLAG_BLIT_KERNEL_PKTS = 0,
} hsa_amd_log_flag_t;

/**
 * @brief Enable logging via external file
 * If this function is called multiple times, the last call to this function will overwrite the
 * previous @p flags and @p file.
 *
 * @param[in] flags is used to filter types of logging. Type is uint8_t[8].
 * Can be set using the hsa_flag_set64 macro. Setting @p flags to 0 will disable logging.
 * @param[in] file file stream to output logging. If file is NULL, prints are sent to stderr.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 */
hsa_status_t hsa_amd_enable_logging(uint8_t* flags, void* file);

/** @} */

#ifdef __cplusplus
}  // end extern "C" block
#endif

#endif  // header guard


================================================
FILE: runtime/hsa-runtime/inc/hsa_ext_finalize.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_

#include "hsa.h"

#undef HSA_API
#ifdef HSA_EXPORT_FINALIZER
#define HSA_API HSA_API_EXPORT
#else
#define HSA_API HSA_API_IMPORT
#endif

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

struct BrigModuleHeader;
typedef struct BrigModuleHeader* BrigModule_t;

/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
 *  @{
 */

/**
 * @brief Enumeration constants added to ::hsa_status_t by this extension.
 */
enum {
  /**
   * The HSAIL program is invalid.
   */
  HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
  /**
   * The HSAIL module is invalid.
   */
  HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
  /**
   * Machine model or profile of the HSAIL module do not match the machine model
   * or profile of the HSAIL program.
   */
  HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
  /**
   * The HSAIL module is already a part of the HSAIL program.
   */
  HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
  /**
   * Compatibility mismatch between symbol declaration and symbol definition.
   */
  HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
  /**
   * The finalization encountered an error while finalizing a kernel or
   * indirect function.
   */
  HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
  /**
   * Mismatch between a directive in the control directive structure and in
   * the HSAIL kernel.
   */
  HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
};

/** @} */

/** \defgroup ext-alt-finalizer-program Finalization Program
 *  @{
 */

/**
 * @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
 * the definition of the BrigModule_t type.
 */
typedef BrigModule_t hsa_ext_module_t;

/**
 * @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
 * modules that collectively define functions and variables used by kernels and
 * indirect functions.
 */
typedef struct hsa_ext_program_s {
  /**
   * Opaque handle.
   */
  uint64_t handle;
} hsa_ext_program_t;

/**
 * @brief Create an empty HSAIL program.
 *
 * @param[in] machine_model Machine model used in the HSAIL program.
 *
 * @param[in] profile Profile used in the HSAIL program.
 *
 * @param[in] default_float_rounding_mode Default float rounding mode used in
 * the HSAIL program.
 *
 * @param[in] options Vendor-specific options. May be NULL.
 *
 * @param[out] program Memory location where the HSA runtime stores the newly
 * created HSAIL program handle.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
 * resources required for the operation.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
 * @p profile is invalid, @p default_float_rounding_mode is invalid, or
 * @p program is NULL.
 */
hsa_status_t HSA_API hsa_ext_program_create(
    hsa_machine_model_t machine_model,
    hsa_profile_t profile,
    hsa_default_float_rounding_mode_t default_float_rounding_mode,
    const char *options,
    hsa_ext_program_t *program);

/**
 * @brief Destroy a HSAIL program.
 *
 * @details The HSAIL program handle becomes invalid after it has been
 * destroyed. Code object handles produced by ::hsa_ext_program_finalize are
 * still valid after the HSAIL program has been destroyed, and can be used as
 * intended. Resources allocated outside and associated with the HSAIL program
 * (such as HSAIL modules that are added to the HSAIL program) can be released
 * after the finalization program has been destroyed.
 *
 * @param[in] program HSAIL program.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
 * invalid.
 */
hsa_status_t HSA_API hsa_ext_program_destroy(
    hsa_ext_program_t program);

/**
 * @brief Add a HSAIL module to an existing HSAIL program.
 *
 * @details The HSA runtime does not perform a deep copy of the HSAIL module
 * upon addition. Instead, it stores a pointer to the HSAIL module. The
 * ownership of the HSAIL module belongs to the application, which must ensure
 * that @p module is not released before destroying the HSAIL program.
 *
 * The HSAIL module is successfully added to the HSAIL program if @p module is
 * valid, if all the declarations and definitions for the same symbol are
 * compatible, and if @p module specify machine model and profile that matches
 * the HSAIL program.
 *
 * @param[in] program HSAIL program.
 *
 * @param[in] module HSAIL module. The application can add the same HSAIL module
 * to @p program at most once. The HSAIL module must specify the same machine
 * model and profile as @p program. If the floating-mode rounding mode of @p
 * module is not default, then it should match that of @p program.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
 * resources required for the operation.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
 * module does not match machine model of @p program, or the profile of @p
 * module does not match profile of @p program.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
 * already a part of the HSAIL program.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
 * definition compatibility mismatch. See the symbol compatibility rules in the
 * HSA Programming Reference Manual.
 */
hsa_status_t HSA_API hsa_ext_program_add_module(
    hsa_ext_program_t program,
    hsa_ext_module_t module);

/**
 * @brief Iterate over the HSAIL modules in a program, and invoke an
 * application-defined callback on every iteration.
 *
 * @param[in] program HSAIL program.
 *
 * @param[in] callback Callback to be invoked once per HSAIL module in the
 * program. The HSA runtime passes three arguments to the callback: the program,
 * a HSAIL module, and the application data.  If @p callback returns a status
 * other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
 * stops and ::hsa_ext_program_iterate_modules returns that status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
    hsa_ext_program_t program,
    hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
                             void* data),
    void* data);

/**
 * @brief HSAIL program attributes.
 */
typedef enum {
  /**
   * Machine model specified when the HSAIL program was created. The type
   * of this attribute is ::hsa_machine_model_t.
   */
  HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
  /**
   * Profile specified when the HSAIL program was created. The type of
   * this attribute is ::hsa_profile_t.
   */
  HSA_EXT_PROGRAM_INFO_PROFILE = 1,
  /**
   * Default float rounding mode specified when the HSAIL program was
   * created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
   */
  HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
} hsa_ext_program_info_t;

/**
 * @brief Get the current value of an attribute for a given HSAIL program.
 *
 * @param[in] program HSAIL program.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behaviour is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * HSAIL program attribute, or @p value is NULL.
 */
hsa_status_t HSA_API hsa_ext_program_get_info(
    hsa_ext_program_t program,
    hsa_ext_program_info_t attribute,
    void *value);

/**
 * @brief Finalizer-determined call convention.
 */
typedef enum {
 /**
  * Finalizer-determined call convention.
  */
  HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
} hsa_ext_finalizer_call_convention_t;

/**
 * @brief Control directives specify low-level information about the
 * finalization process.
 */
typedef struct hsa_ext_control_directives_s {
  /**
   * Bitset indicating which control directives are enabled. The bit assigned to
   * a control directive is determined by the corresponding value in
   * BrigControlDirective.
   *
   * If a control directive is disabled, its corresponding field value (if any)
   * must be 0. Control directives that are only present or absent (such as
   * partial workgroups) have no corresponding field as the presence of the bit
   * in this mask is sufficient.
   */
  uint64_t control_directives_mask;
  /**
   * Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
   * assigned to an HSAIL exception is determined by the corresponding value
   * in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
   * control directive, the finalizer uses the union of the two masks.
   */
  uint16_t break_exceptions_mask;
  /**
   * Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
   * bit assigned to an HSAIL exception is determined by the corresponding value
   * in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
   * control directive, the finalizer uses the union of the two masks.
   */
  uint16_t detect_exceptions_mask;
  /**
   * Maximum size (in bytes) of dynamic group memory that will be allocated by
   * the application for any dispatch of the kernel.  If the kernel contains a
   * maxdynamicsize control directive, the two values should match.
   */
  uint32_t max_dynamic_group_size;
  /**
   * Maximum number of grid work-items that will be used by the application to
   * launch the kernel. If the kernel contains a maxflatgridsize control
   * directive, the value of @a max_flat_grid_size must not be greater than the
   * value of the directive, and takes precedence.
   *
   * The value specified for maximum absolute grid size must be greater than or
   * equal to the product of the values specified by @a required_grid_size.
   *
   * If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
   * control_directives_mask, this field must be greater than 0.
   */
  uint64_t max_flat_grid_size;
  /**
   * Maximum number of work-group work-items that will be used by the
   * application to launch the kernel. If the kernel contains a
   * maxflatworkgroupsize control directive, the value of @a
   * max_flat_workgroup_size must not be greater than the value of the
   * directive, and takes precedence.
   *
   * The value specified for maximum absolute grid size must be greater than or
   * equal to the product of the values specified by @a required_workgroup_size.
   *
   * If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
   * control_directives_mask, this field must be greater than 0.
   */
  uint32_t max_flat_workgroup_size;
  /**
   * Reserved. Must be 0.
   */
  uint32_t reserved1;
  /**
   * Grid size that will be used by the application in any dispatch of the
   * kernel. If the kernel contains a requiredgridsize control directive, the
   * dimensions should match.
   *
   * The specified grid size must be consistent with @a required_workgroup_size
   * and @a required_dim. Also, the product of the three dimensions must not
   * exceed @a max_flat_grid_size. Note that the listed invariants must hold
   * only if all the corresponding control directives are enabled.
   *
   * If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
   * control_directives_mask, the three dimension values must be greater than 0.
   */
  uint64_t required_grid_size[3];
  /**
   * Work-group size that will be used by the application in any dispatch of the
   * kernel. If the kernel contains a requiredworkgroupsize control directive,
   * the dimensions should match.
   *
   * The specified work-group size must be consistent with @a required_grid_size
   * and @a required_dim. Also, the product of the three dimensions must not
   * exceed @a max_flat_workgroup_size. Note that the listed invariants must
   * hold only if all the corresponding control directives are enabled.
   *
   * If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
   * control_directives_mask, the three dimension values must be greater than 0.
   */
  hsa_dim3_t required_workgroup_size;
  /**
   * Number of dimensions that will be used by the application to launch the
   * kernel. If the kernel contains a requireddim control directive, the two
   * values should match.
   *
   * The specified dimensions must be consistent with @a required_grid_size and
   * @a required_workgroup_size. This invariant must hold only if all the
   * corresponding control directives are enabled.
   *
   * If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
   * control_directives_mask, this field must be 1, 2, or 3.
   */
  uint8_t required_dim;
  /**
   * Reserved. Must be 0.
   */
  uint8_t reserved2[75];
} hsa_ext_control_directives_t;

/**
 * @brief Finalize an HSAIL program for a given instruction set architecture.
 *
 * @details Finalize all of the kernels and indirect functions that belong to
 * the same HSAIL program for a specific instruction set architecture (ISA). The
 * transitive closure of all functions specified by call or scall must be
 * defined. Kernels and indirect functions that are being finalized must be
 * defined. Kernels and indirect functions that are referenced in kernels and
 * indirect functions being finalized may or may not be defined, but must be
 * declared. All the global/readonly segment variables that are referenced in
 * kernels and indirect functions being finalized may or may not be defined, but
 * must be declared.
 *
 * @param[in] program HSAIL program.
 *
 * @param[in] isa Instruction set architecture to finalize for.
 *
 * @param[in] call_convention A call convention used in a finalization. Must
 * have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
 * and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
 * isa (not inclusive).
 *
 * @param[in] control_directives Low-level control directives that influence
 * the finalization process.
 *
 * @param[in] options Vendor-specific options. May be NULL.
 *
 * @param[in] code_object_type Type of code object to produce.
 *
 * @param[out] code_object Code object generated by the Finalizer, which
 * contains the machine code for the kernels and indirect functions in the HSAIL
 * program. The code object is independent of the HSAIL module that was used to
 * generate it.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
 * resources required for the operation.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
 * the control directive structure and in the HSAIL kernel mismatch, or if the
 * same directive is used with a different value in one of the functions used by
 * this kernel.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
 * encountered an error while compiling a kernel or an indirect function.
 */
hsa_status_t HSA_API hsa_ext_program_finalize(
    hsa_ext_program_t program,
    hsa_isa_t isa,
    int32_t call_convention,
    hsa_ext_control_directives_t control_directives,
    const char *options,
    hsa_code_object_type_t code_object_type,
    hsa_code_object_t *code_object);

/** @} */

#define hsa_ext_finalizer_1_00

typedef struct hsa_ext_finalizer_1_00_pfn_s {
  hsa_status_t (*hsa_ext_program_create)(
      hsa_machine_model_t machine_model, hsa_profile_t profile,
      hsa_default_float_rounding_mode_t default_float_rounding_mode,
      const char *options, hsa_ext_program_t *program);

  hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);

  hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
                                                 hsa_ext_module_t module);

  hsa_status_t (*hsa_ext_program_iterate_modules)(
      hsa_ext_program_t program,
      hsa_status_t (*callback)(hsa_ext_program_t program,
                               hsa_ext_module_t module, void *data),
      void *data);

  hsa_status_t (*hsa_ext_program_get_info)(
      hsa_ext_program_t program, hsa_ext_program_info_t attribute,
      void *value);

  hsa_status_t (*hsa_ext_program_finalize)(
      hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
      hsa_ext_control_directives_t control_directives, const char *options,
      hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
} hsa_ext_finalizer_1_00_pfn_t;

#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus

#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_


================================================
FILE: runtime/hsa-runtime/inc/hsa_ext_image.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
// 
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// 
// Developed by:
// 
//                 AMD Research and AMD HSA Software Development
// 
//                 Advanced Micro Devices, Inc.
// 
//                 www.amd.com
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// 
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_EXT_IMAGE_H
#define HSA_EXT_IMAGE_H

#include "hsa.h"

#undef HSA_API
#ifdef HSA_EXPORT_IMAGES
#define HSA_API HSA_API_EXPORT
#else
#define HSA_API HSA_API_IMPORT
#endif

#ifdef __cplusplus
extern "C" {
#endif /*__cplusplus*/ 

/** \defgroup ext-images Images and Samplers
 *  @{
 */

/**
 * @brief Enumeration constants added to ::hsa_status_t by this extension.
 *
 * @remark Additions to hsa_status_t
 */
enum {
    /**
     * Image format is not supported.
     */
    HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED = 0x3000,
    /**
     * Image size is not supported.
     */
    HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED = 0x3001,
    /**
     * Image pitch is not supported or invalid.
     */
    HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED = 0x3002,
    /**
     * Sampler descriptor is not supported or invalid.
     */
    HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED = 0x3003
};

/**
 * @brief Enumeration constants added to ::hsa_agent_info_t by this
 * extension.
 *
 * @remark Additions to hsa_agent_info_t
 */
enum {
  /**
   * Maximum number of elements in 1D images. Must be at least 16384. The type
   * of this attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS = 0x3000,
  /**
   * Maximum number of elements in 1DA images. Must be at least 16384. The type
   * of this attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS = 0x3001,
  /**
   * Maximum number of elements in 1DB images. Must be at least 65536. The type
   * of this attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS = 0x3002,
  /**
   * Maximum dimensions (width, height) of 2D images, in image elements. The X
   * and Y maximums must be at least 16384. The type of this attribute is
   * size_t[2].
   */
  HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS = 0x3003,
  /**
   * Maximum dimensions (width, height) of 2DA images, in image elements. The X
   * and Y maximums must be at least 16384. The type of this attribute is
   * size_t[2].
   */
  HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS = 0x3004,
  /**
   * Maximum dimensions (width, height) of 2DDEPTH images, in image
   * elements. The X and Y maximums must be at least 16384. The type of this
   * attribute is size_t[2].
   */
  HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS = 0x3005,
  /**
   * Maximum dimensions (width, height) of 2DADEPTH images, in image
   * elements. The X and Y maximums must be at least 16384. The type of this
   * attribute is size_t[2].
   */
  HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS = 0x3006,
  /**
   * Maximum dimensions (width, height, depth) of 3D images, in image
   * elements. The maximum along any dimension must be at least 2048. The type
   * of this attribute is size_t[3].
   */
  HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS = 0x3007,
  /**
   * Maximum number of image layers in a image array. Must be at least 2048. The
   * type of this attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS = 0x3008,
  /**
   * Maximum number of read-only image handles that can be created for an agent at any one
   * time. Must be at least 128. The type of this attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES = 0x3009,
  /**
   * Maximum number of write-only and read-write image handles (combined) that
   * can be created for an agent at any one time. Must be at least 64. The type of this
   * attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES = 0x300A,
  /**
   * Maximum number of sampler handlers that can be created for an agent at any one
   * time. Must be at least 16. The type of this attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS = 0x300B,
  /**
   * Image pitch alignment. The agent only supports linear image data
   * layouts with a row pitch that is a multiple of this value. Must be
   * a power of 2. The type of this attribute is size_t.
   */
  HSA_EXT_AGENT_INFO_IMAGE_LINEAR_ROW_PITCH_ALIGNMENT = 0x300C
};

/**
 * @brief Image handle, populated by ::hsa_ext_image_create or
 * ::hsa_ext_image_create_with_layout. Image
 * handles are only unique within an agent, not across agents.
 *
 */
typedef struct hsa_ext_image_s {
  /**
   *  Opaque handle. For a given agent, two handles reference the same object of
   *  the enclosing type if and only if they are equal.
   */
    uint64_t handle;

} hsa_ext_image_t;

/**
 * @brief Geometry associated with the image. This specifies the
 * number of image dimensions and whether the image is an image
 * array. See the <em>Image Geometry</em> section in the <em>HSA
 * Programming Reference Manual</em> for definitions on each
 * geometry. The enumeration values match the BRIG type @p
 * hsa_ext_brig_image_geometry_t.
 */
typedef enum {
/**
   * One-dimensional image addressed by width coordinate.
   */
  HSA_EXT_IMAGE_GEOMETRY_1D = 0,

  /**
   * Two-dimensional image addressed by width and height coordinates.
   */
  HSA_EXT_IMAGE_GEOMETRY_2D = 1,

  /**
   * Three-dimensional image addressed by width, height, and depth coordinates.
   */
  HSA_EXT_IMAGE_GEOMETRY_3D = 2,

  /**
   * Array of one-dimensional images with the same size and format. 1D arrays
   * are addressed by width and index coordinate.
   */
  HSA_EXT_IMAGE_GEOMETRY_1DA = 3,

  /**
   * Array of two-dimensional images with the same size and format. 2D arrays
   * are addressed by width,  height, and index coordinates.
   */
  HSA_EXT_IMAGE_GEOMETRY_2DA = 4,

  /**
   * One-dimensional image addressed by width coordinate. It has
   * specific restrictions compared to ::HSA_EXT_IMAGE_GEOMETRY_1D. An
   * image with an opaque image data layout will always use a linear
   * image data layout, and one with an explicit image data layout
   * must specify ::HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR.
   */
  HSA_EXT_IMAGE_GEOMETRY_1DB = 5,

  /**
   * Two-dimensional depth image addressed by width and height coordinates.
   */
  HSA_EXT_IMAGE_GEOMETRY_2DDEPTH = 6,

  /**
   * Array of two-dimensional depth images with the same size and format. 2D
   * arrays are addressed by width, height, and index coordinates.
   */
  HSA_EXT_IMAGE_GEOMETRY_2DADEPTH = 7
} hsa_ext_image_geometry_t;

/**
 * @brief Channel type associated with the elements of an image. See
 * the <em>Channel Type</em> section in the <em>HSA Programming Reference
 * Manual</em> for definitions on each channel type. The
 * enumeration values and definition match the BRIG type @p
 * hsa_ext_brig_image_channel_type_t.
 */
typedef enum {
    HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
    HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
    HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
    HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
    HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
    HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
    HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
    HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15
} hsa_ext_image_channel_type_t;

/**
 * @brief A fixed-size type used to represent ::hsa_ext_image_channel_type_t constants.
 */
typedef uint32_t hsa_ext_image_channel_type32_t;
    
/**
 *
 * @brief Channel order associated with the elements of an image. See
 * the <em>Channel Order</em> section in the <em>HSA Programming Reference
 * Manual</em> for definitions on each channel order. The
 * enumeration values match the BRIG type @p
 * hsa_ext_brig_image_channel_order_t.
 */
typedef enum {
    HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0,
    HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1,
    HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2,
    HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3,
    HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4,
    HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5,
    HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6,
    HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7,
    HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8,
    HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9,
    HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10,
    HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11,
    HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12,
    HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13,
    HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14,
    HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15,
    HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
    HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
    HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18,
    HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
} hsa_ext_image_channel_order_t;

/**
 * @brief A fixed-size type used to represent ::hsa_ext_image_channel_order_t constants.
 */
typedef uint32_t hsa_ext_image_channel_order32_t;
    

/**
 * @brief Image format.
 */
typedef struct hsa_ext_image_format_s {
  /**
    * Channel type.
    */
    hsa_ext_image_channel_type32_t channel_type;

   /**
    * Channel order.
    */
    hsa_ext_image_channel_order32_t channel_order;
} hsa_ext_image_format_t;

/**
 * @brief Implementation independent image descriptor.
 */
typedef struct hsa_ext_image_descriptor_s {
    /**
     * Image geometry.
     */
    hsa_ext_image_geometry_t geometry;
    /**
     * Width of the image, in components.
     */
    size_t width;
    /**
     * Height of the image, in components. Only used if the geometry is
     * ::HSA_EXT_IMAGE_GEOMETRY_2D, ::HSA_EXT_IMAGE_GEOMETRY_3D,
     * HSA_EXT_IMAGE_GEOMETRY_2DA, HSA_EXT_IMAGE_GEOMETRY_2DDEPTH, or
     * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
     */
    size_t height;
    /**
     * Depth of the image, in components. Only used if the geometry is
     * ::HSA_EXT_IMAGE_GEOMETRY_3D, otherwise must be 0.
     */
    size_t depth;
    /**
     * Number of image layers in the image array. Only used if the geometry is
     * ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
     * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
     */
    size_t array_size;
    /**
     * Image format.
     */
    hsa_ext_image_format_t format;
} hsa_ext_image_descriptor_t;

/**
 * @brief Image capability.
 */
typedef enum  {
   /**
    * Images of this geometry, format, and layout are not supported by
    * the agent.
    */
    HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED = 0x0,
   /**
    * Read-only images of this geometry, format, and layout are
    * supported by the agent.
    */
    HSA_EXT_IMAGE_CAPABILITY_READ_ONLY = 0x1,
   /**
    * Write-only images of this geometry, format, and layout are
    * supported by the agent.
    */
    HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY = 0x2,
   /**
    * Read-write images of this geometry, format, and layout are
    * supported by the agent.
    */
    HSA_EXT_IMAGE_CAPABILITY_READ_WRITE = 0x4,
   /**
    * @deprecated Images of this geometry, format, and layout can be accessed from
    * read-modify-write atomic operations in the agent.
    */
    HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE = 0x8,
    /**
    * Images of this geometry, format, and layout are guaranteed to
    * have a consistent data layout regardless of how they are
    * accessed by the associated agent.
    */
    HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT = 0x10
} hsa_ext_image_capability_t;

/**
 * @brief Image data layout.
 *
 * @details An image data layout denotes such aspects of image data
 * layout as tiling and organization of channels in memory. Some image
 * data layouts may only apply to specific image geometries, formats,
 * and access permissions. Different agents may support different
 * image layout identifiers, including vendor specific layouts. Note
 * that an agent may not support the same image data layout for
 * different access permissions to images with the same image
 * geometry, size, and format. If multiple agents support the same
 * image data layout then it is possible to use separate image handles
 * for each agent that references the same image data.
 */

typedef enum  {
   /**
    * An implementation specific opaque image data layout which can
    * vary depending on the agent, geometry, image format, image size,
    * and access permissions.
    */
    HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE = 0x0,
   /**
    * The image data layout is specified by the following rules in
    * ascending byte address order. For a 3D image, 2DA image array,
    * or 1DA image array, the image data is stored as a linear sequence
    * of adjacent 2D image slices, 2D images, or 1D images
    * respectively, spaced according to the slice pitch. Each 2D image
    * is stored as a linear sequence of adjacent image rows, spaced
    * according to the row pitch. Each 1D or 1DB image is stored as a
    * single image row. Each image row is stored as a linear sequence
    * of image elements. Each image element is stored as a linear
    * sequence of image components specified by the left to right
    * channel order definition. Each image component is stored using
    * the memory type specified by the channel type.
    *
    * The 1DB image geometry always uses the linear image data layout.
    */
    HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR = 0x1
} hsa_ext_image_data_layout_t;

/**
 * @brief Retrieve the supported image capabilities for a given combination of
 * agent, geometry, and image format for an image created with an opaque image
 * data layout.
 *
 * @param[in] agent Agent to be associated with the image handle.
 *
 * @param[in] geometry Geometry.
 *
 * @param[in] image_format Pointer to an image format. Must not be NULL.
 *
 * @param[out] capability_mask Pointer to a memory location where the HSA
 * runtime stores a bit-mask of supported image capability
 * (::hsa_ext_image_capability_t) values. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_format is
 * NULL, or @p capability_mask is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_get_capability(
    hsa_agent_t agent,
    hsa_ext_image_geometry_t geometry,
    const hsa_ext_image_format_t *image_format,
    uint32_t *capability_mask);

/**
 * @brief Retrieve the supported image capabilities for a given combination of
 * agent, geometry, image format, and image layout for an image created with
 * an explicit image data layout.
 *
 * @param[in] agent Agent to be associated with the image handle.
 *
 * @param[in] geometry Geometry.
 *
 * @param[in] image_format Pointer to an image format. Must not be NULL.
 *
 * @param[in] image_data_layout The image data layout.
 * It is invalid to use ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use
 * ::hsa_ext_image_get_capability instead.
 *
 * @param[out] capability_mask Pointer to a memory location where the HSA
 * runtime stores a bit-mask of supported image capability
 * (::hsa_ext_image_capability_t) values. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_format is
 * NULL, @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
 * or @p capability_mask is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_get_capability_with_layout(
    hsa_agent_t agent,
    hsa_ext_image_geometry_t geometry,
    const hsa_ext_image_format_t *image_format,
    hsa_ext_image_data_layout_t image_data_layout,
    uint32_t *capability_mask);

/**
 * @brief Agent specific image size and alignment requirements, populated by
 * ::hsa_ext_image_data_get_info and ::hsa_ext_image_data_get_info_with_layout.
 */
typedef struct hsa_ext_image_data_info_s {
  /**
   * Image data size, in bytes.
   */
  size_t size;

  /**
   * Image data alignment, in bytes. Must always be a power of 2.
   */
  size_t alignment;

} hsa_ext_image_data_info_t;

/**
 * @brief Retrieve the image data requirements for a given combination of agent, image
 * descriptor, and access permission for an image created with an opaque image
 * data layout.
 *
 * @details The optimal image data size and alignment requirements may
 * vary depending on the image attributes specified in @p
 * image_descriptor, the @p access_permission, and the @p agent. Also,
 * different implementations of the HSA runtime may return different
 * requirements for the same input values.
 *
 * The implementation must return the same image data requirements for
 * different access permissions with matching image descriptors as long
 * as ::hsa_ext_image_get_capability reports
 * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT. Image
 * descriptors match if they have the same values, with the exception
 * that s-form channel orders match the corresponding non-s-form
 * channel order and vice versa.
 *
 * @param[in] agent Agent to be associated with the image handle.
 *
 * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
 *
 * @param[in] access_permission Access permission of the image when
 * accessed by @p agent. The access permission defines how the agent
 * is allowed to access the image and must match the corresponding
 * HSAIL image handle type. The @p agent must support the image format
 * specified in @p image_descriptor for the given @p
 * access_permission.
 *
 * @param[out] image_data_info Memory location where the runtime stores the
 * size and alignment requirements. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The @p
 * agent does not support the image format specified by @p
 * image_descriptor with the specified @p access_permission.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
 * does not support the image dimensions specified by @p
 * image_descriptor with the specified @p access_permission.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
 * access_permission is not a valid access permission value, or @p
 * image_data_info is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_data_get_info(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_info_t *image_data_info);

/**
 * @brief Retrieve the image data requirements for a given combination of
 * image descriptor, access permission, image data layout, image data row pitch,
 * and image data slice pitch for an image created with an explicit image
 * data layout.
 *
 * @details The image data size and alignment requirements may vary
 * depending on the image attributes specified in @p image_descriptor,
 * the @p access_permission, and the image layout. However, different
 * implementations of the HSA runtime will return the same
 * requirements for the same input values.
 *
 * The implementation must return the same image data requirements for
 * different access permissions with matching image descriptors and
 * matching image layouts as long as ::hsa_ext_image_get_capability
 * reports
 * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT. Image
 * descriptors match if they have the same values, with the exception
 * that s-form channel orders match the corresponding non-s-form
 * channel order and vice versa. Image layouts match if they are the
 * same image data layout and use the same image row and slice pitch
 * values.
 *
 * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
 *
 * @param[in] access_permission Access permission of the image when
 * accessed by an agent. The access permission defines how the agent
 * is allowed to access the image and must match the corresponding
 * HSAIL image handle type.
 *
 * @param[in] image_data_layout The image data layout to use.
 * It is invalid to use ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use
 * ::hsa_ext_image_data_get_info instead.
 *
 * @param[in] image_data_row_pitch The size in bytes for a single row
 * of the image in the image data. If 0 is specified then the default
 * row pitch value is used: image width * image element byte size.
 * The value used must be greater than or equal to the default row
 * pitch, and be a multiple of the image element byte size. For the
 * linear image layout it must also be a multiple of the image linear
 * row pitch alignment for the agents that will access the image data
 * using image instructions.
 *
 * @param[in] image_data_slice_pitch The size in bytes of a single
 * slice of a 3D image, or the size in bytes of each image layer in an
 * image array in the image data. If 0 is specified then the default
 * slice pitch value is used: row pitch * height if geometry is
 * ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
 * ::HSA_EXT_IMAGE_GEOMETRY_2DADEPTH; row pitch if geometry is
 * ::HSA_EXT_IMAGE_GEOMETRY_1DA; and 0 otherwise. The value used must
 * be 0 if the default slice pitch is 0, be greater than or equal to
 * the default slice pitch, and be a multiple of the row pitch.
 *
 * @param[out] image_data_info Memory location where the runtime stores the
 * size and alignment requirements. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The image
 * format specified by @p image_descriptor is not supported for the
 * @p access_permission and @p image_data_layout specified.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The image
 * dimensions specified by @p image_descriptor are not supported for
 * the @p access_permission and @p image_data_layout specified.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED The row and
 * slice pitch specified by @p image_data_row_pitch and @p
 * image_data_slice_pitch are invalid or not supported.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is
 * NULL, @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
 * or @p image_data_info is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_data_get_info_with_layout(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t *image_data_info);

/**
 * @brief Creates an agent specific image handle to an image with an
 * opaque image data layout.
 *
 * @details Images with an opaque image data layout created with
 * different access permissions but matching image descriptors and
 * same agent can share the same image data if
 * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported
 * by ::hsa_ext_image_get_capability for the image format specified in
 * the image descriptor. Image descriptors match if they have the same
 * values, with the exception that s-form channel orders match the
 * corresponding non-s-form channel order and vice versa.
 *
 * If necessary, an application can use image operations (import,
 * export, copy, clear) to prepare the image for the intended use
 * regardless of the access permissions.
 *
 * @param[in] agent agent to be associated with the image handle created.
 *
 * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
 *
 * @param[in] image_data Image data buffer that must have been allocated
 * according to the size and alignment requirements dictated by
 * ::hsa_ext_image_data_get_info. Must not be NULL.
 *
 * Any previous memory contents are preserved upon creation. The application is
 * responsible for ensuring that the lifetime of the image data exceeds that of
 * all the associated images.
 *
 * @param[in] access_permission Access permission of the image when
 * accessed by agent. The access permission defines how the agent
 * is allowed to access the image using the image handle created and
 * must match the corresponding HSAIL image handle type. The agent
 * must support the image format specified in @p image_descriptor for
 * the given @p access_permission.
 *
 * @param[out] image Pointer to a memory location where the HSA runtime stores
 * the newly created image handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent
 * does not have the capability to support the image format contained
 * in @p image_descriptor using the specified @p access_permission.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
 * does not support the image dimensions specified by @p
 * image_descriptor using the specified @p access_permission.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * support the creation of more image handles with the given @p access_permission).
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
 * image_data is NULL, @p image_data does not have a valid alignment,
 * @p access_permission is not a valid access permission
 * value, or @p image is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_create(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    const void *image_data,
    hsa_access_permission_t access_permission,
    hsa_ext_image_t *image);

/**
 * @brief Creates an agent specific image handle to an image with an explicit
 * image data layout.
 *
 * @details Images with an explicit image data layout created with
 * different access permissions but matching image descriptors and
 * matching image layout can share the same image data if
 * ::HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT is reported
 * by ::hsa_ext_image_get_capability_with_layout for the image format
 * specified in the image descriptor and specified image data
 * layout. Image descriptors match if they have the same values, with
 * the exception that s-form channel orders match the corresponding
 * non-s-form channel order and vice versa. Image layouts match if
 * they are the same image data layout and use the same image row and
 * slice values.
 *
 * If necessary, an application can use image operations (import, export, copy,
 * clear) to prepare the image for the intended use regardless of the access
 * permissions.
 *
 * @param[in] agent agent to be associated with the image handle created.
 *
 * @param[in] image_descriptor Pointer to an image descriptor. Must not be NULL.
 *
 * @param[in] image_data Image data buffer that must have been allocated
 * according to the size and alignment requirements dictated by
 * ::hsa_ext_image_data_get_info_with_layout. Must not be NULL.
 *
 * Any previous memory contents are preserved upon creation. The application is
 * responsible for ensuring that the lifetime of the image data exceeds that of
 * all the associated images.
 *
 * @param[in] access_permission Access permission of the image when
 * accessed by the agent. The access permission defines how the agent
 * is allowed to access the image and must match the corresponding
 * HSAIL image handle type. The agent must support the image format
 * specified in @p image_descriptor for the given @p access_permission
 * and @p image_data_layout.
 *
 * @param[in] image_data_layout The image data layout to use for the
 * @p image_data. It is invalid to use
 * ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE; use ::hsa_ext_image_create
 * instead.
 *
 * @param[in] image_data_row_pitch The size in bytes for a single row
 * of the image in the image data. If 0 is specified then the default
 * row pitch value is used: image width * image element byte size.
 * The value used must be greater than or equal to the default row
 * pitch, and be a multiple of the image element byte size. For the
 * linear image layout it must also be a multiple of the image linear
 * row pitch alignment for the agents that will access the image data
 * using image instructions.
 *
 * @param[in] image_data_slice_pitch The size in bytes of a single
 * slice of a 3D image, or the size in bytes of each image layer in an
 * image array in the image data. If 0 is specified then the default
 * slice pitch value is used: row pitch * height if geometry is
 * ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
 * ::HSA_EXT_IMAGE_GEOMETRY_2DADEPTH; row pitch if geometry is
 * ::HSA_EXT_IMAGE_GEOMETRY_1DA; and 0 otherwise. The value used must
 * be 0 if the default slice pitch is 0, be greater than or equal to
 * the default slice pitch, and be a multiple of the row pitch.
 *
 * @param[out] image Pointer to a memory location where the HSA runtime stores
 * the newly created image handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does
 * not have the capability to support the image format contained in the image
 * descriptor using the specified @p access_permission and @p image_data_layout.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent
 * does not support the image dimensions specified by @p
 * image_descriptor using the specified @p access_permission and @p
 * image_data_layout.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED The agent does
 * not support the row and slice pitch specified by @p image_data_row_pitch
 * and @p image_data_slice_pitch, or the values are invalid.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * support the creation of more image handles with the given @p access_permission).
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p
 * image_data is NULL, @p image_data does not have a valid alignment,
 * @p image_data_layout is ::HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE,
 * or @p image is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_create_with_layout(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    const void *image_data,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_t *image);

/**
 * @brief Destroy an image handle previously created using ::hsa_ext_image_create or
 * ::hsa_ext_image_create_with_layout.
 *
 * @details Destroying the image handle does not free the associated image data,
 * or modify its contents. The application should not destroy an image handle while
 * there are references to it queued for execution or currently being used in a
 * kernel dispatch.
 *
 * @param[in] agent Agent associated with the image handle.
 *
 * @param[in] image Image handle to destroy.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 */
hsa_status_t HSA_API hsa_ext_image_destroy(
    hsa_agent_t agent,
    hsa_ext_image_t image);

/**
 * @brief Copies a portion of one image (the source) to another image (the
 * destination).
 *
 * @details The source and destination image formats should be the
 * same, with the exception that s-form channel orders match the
 * corresponding non-s-form channel order and vice versa. For example,
 * it is allowed to copy a source image with a channel order of
 * HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB to a destination image with a
 * channel order of HSA_EXT_IMAGE_CHANNEL_ORDER_RGB.
 *
 * The source and destination images do not have to be of the same geometry and
 * appropriate scaling is performed by the HSA runtime. It is possible to copy
 * subregions between any combinations of source and destination geometries, provided
 * that the dimensions of the subregions are the same. For example, it is
 * allowed to copy a rectangular region from a 2D image to a slice of a 3D
 * image.
 *
 * If the source and destination image data overlap, or the combination of
 * offset and range references an out-out-bounds element in any of the images,
 * the behavior is undefined.
 *
 * @param[in] agent Agent associated with both the source and destination image handles.
 *
 * @param[in] src_image Image handle of source image. The agent associated with the source
 * image handle must be identical to that of the destination image.
 *
 * @param[in] src_offset Pointer to the offset within the source image where to
 * copy the data from. Must not be NULL.
 *
 * @param[in] dst_image Image handle of destination image.
 *
 * @param[in] dst_offset Pointer to the offset within the destination
 * image where to copy the data. Must not be NULL.
 *
 * @param[in] range Dimensions of the image portion to be copied. The HSA
 * runtime computes the size of the image data to be copied using this
 * argument. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_offset is
 * NULL, @p dst_offset is NULL, or @p range is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_copy(
    hsa_agent_t agent,
    hsa_ext_image_t src_image,
    const hsa_dim3_t* src_offset,
    hsa_ext_image_t dst_image,
    const hsa_dim3_t* dst_offset,
    const hsa_dim3_t* range);

/**
 * @brief Image region.
 */
typedef struct hsa_ext_image_region_s {
   /**
    * Offset within an image (in coordinates).
    */
    hsa_dim3_t offset;

   /**
    * Dimension size of the image range (in coordinates). The x, y, and z dimensions
    * correspond to width, height, and depth or index respectively.
    */
    hsa_dim3_t range;
} hsa_ext_image_region_t;

/**
 * @brief Import a linearly organized image data from memory directly to an
 * image handle.
 *
 * @details This operation updates the image data referenced by the image handle
 * from the source memory. The size of the data imported from memory is
 * implicitly derived from the image region.
 *
 * It is the application's responsibility to avoid out of bounds memory access.
 *
 * None of the source memory or destination image data memory can
 * overlap. Overlapping of any of the source and destination image
 * data memory within the import operation produces undefined results.
 *
 * @param[in] agent Agent associated with the image handle.
 *
 * @param[in] src_memory Source memory. Must not be NULL.
 *
 * @param[in] src_row_pitch The size in bytes of a single row of the image in the
 * source memory. If the value is smaller than the destination image region
 * width * image element byte size, then region width * image element byte
 * size is used.
 *
 * @param[in] src_slice_pitch The size in bytes of a single 2D slice of a 3D image,
 * or the size in bytes of each image layer in an image array in the source memory.
 * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_1DA and the value is smaller than the
 * value used for @p src_row_pitch, then the value used for @p src_row_pitch is used.
 * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
 * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH and the value is smaller than the value used for
 * @p src_row_pitch * destination image region height, then the value used for
 * @p src_row_pitch * destination image region height is used.
 * Otherwise, the value is not used.
 *
 * @param[in] dst_image Image handle of destination image.
 *
 * @param[in] image_region Pointer to the image region to be updated. Must not
 * be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p src_memory is NULL, or @p
 * image_region is NULL.
 *
 */
hsa_status_t HSA_API hsa_ext_image_import(
    hsa_agent_t agent,
    const void *src_memory,
    size_t src_row_pitch,
    size_t src_slice_pitch,
    hsa_ext_image_t dst_image,
    const hsa_ext_image_region_t *image_region);

/**
 * @brief Export the image data to linearly organized memory.
 *
 * @details The operation updates the destination memory with the image data of
 * @p src_image. The size of the data exported to memory is implicitly derived
 * from the image region.
 *
 * It is the application's responsibility to avoid out of bounds memory access.
 *
 * None of the destination memory or source image data memory can
 * overlap. Overlapping of any of the source and destination image
 * data memory within the export operation produces undefined results.
 *
 * @param[in] agent Agent associated with the image handle.
 *
 * @param[in] src_image Image handle of source image.
 *
 * @param[in] dst_memory Destination memory. Must not be NULL.
 *
 * @param[in] dst_row_pitch The size in bytes of a single row of the image in the
 * destination memory. If the value is smaller than the source image region
 * width * image element byte size, then region width * image element byte
 * size is used.
 *
 * @param[in] dst_slice_pitch The size in bytes of a single 2D slice of a 3D image,
 * or the size in bytes of each image in an image array in the destination memory.
 * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_1DA and the value is smaller than the
 * value used for @p dst_row_pitch, then the value used for @p dst_row_pitch is used.
 * If the geometry is ::HSA_EXT_IMAGE_GEOMETRY_3D, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
 * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH and the value is smaller than the value used for
 * @p dst_row_pitch * source image region height, then the value used for
 * @p dst_row_pitch * source image region height is used.
 * Otherwise, the value is not used.
 *
 * @param[in] image_region Pointer to the image region to be exported. Must not
 * be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p dst_memory is NULL, or @p
 * image_region is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_export(
    hsa_agent_t agent,
    hsa_ext_image_t src_image,
    void *dst_memory,
    size_t dst_row_pitch,
    size_t dst_slice_pitch,
    const hsa_ext_image_region_t *image_region);

/**
 * @brief Clear a region of an image so that every image element has
 * the specified value.
 *
 * @param[in] agent Agent associated with the image handle.
 *
 * @param[in] image Image handle for image to be cleared.
 *
 * @param[in] data The value to which to set each image element being
 * cleared. It is specified as an array of image component values. The
 * number of array elements must match the number of access components
 * for the image channel order. The type of each array element must
 * match the image access type of the image channel type. When the
 * value is used to set the value of an image element, the conversion
 * method corresponding to the image channel type is used. See the
 * <em>Channel Order</em> section and <em>Channel Type</em> section in
 * the <em>HSA Programming Reference Manual</em> for more
 * information. Must not be NULL.
 *
 * @param[in] image_region Pointer to the image region to clear. Must not be
 * NULL. If the region references an out-out-bounds element, the behavior is
 * undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p data is NULL, or @p
 * image_region is NULL.
 */
hsa_status_t HSA_API hsa_ext_image_clear(
    hsa_agent_t agent,
    hsa_ext_image_t image,
    const void* data,
    const hsa_ext_image_region_t *image_region);

/**
 * @brief Sampler handle. Samplers are populated by
 * ::hsa_ext_sampler_create or ::hsa_ext_sampler_create_v2. Sampler handles are only unique
 *  within an agent, not across agents.
 */
typedef struct hsa_ext_sampler_s {
  /**
   *  Opaque handle. For a given agent, two handles reference the same object of
   *  the enclosing type if and only if they are equal.
   */
    uint64_t handle;
} hsa_ext_sampler_t;

/**
 * @brief Sampler address modes. The sampler address mode describes
 * the processing of out-of-range image coordinates. See the
 * <em>Addressing Mode</em> section in the <em>HSA Programming Reference
 * Manual</em> for definitions on each address mode. The values
 * match the BRIG type @p hsa_ext_brig_sampler_addressing_t.
 */
typedef enum {
  /**
   * Out-of-range coordinates are not handled.
   */
  HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED = 0,

  /**
   * Clamp out-of-range coordinates to the image edge.
   */
  HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 1,

  /**
   * Clamp out-of-range coordinates to the image border color.
   */
  HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER = 2,

  /**
   * Wrap out-of-range coordinates back into the valid coordinate
   * range so the image appears as repeated tiles.
   */
  HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT = 3,

  /**
   * Mirror out-of-range coordinates back into the valid coordinate
   * range so the image appears as repeated tiles with every other
   * tile a reflection.
   */
  HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 4

} hsa_ext_sampler_addressing_mode_t;

/**
 * @brief A fixed-size type used to represent ::hsa_ext_sampler_addressing_mode_t constants.
 */
typedef uint32_t hsa_ext_sampler_addressing_mode32_t;

/**
 * @brief Sampler coordinate normalization modes. See the
 * <em>Coordinate Normalization Mode</em> section in the <em>HSA
 * Programming Reference Manual</em> for definitions on each
 * coordinate normalization mode. The values match the BRIG type @p
 * hsa_ext_brig_sampler_coord_normalization_t.
 */
typedef enum {

  /**
   * Coordinates are used to directly address an image element.
   */
  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED = 0,

  /**
   * Coordinates are scaled by the image dimension size before being
   * used to address an image element.
   */
  HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED = 1

} hsa_ext_sampler_coordinate_mode_t;

/**
 * @brief A fixed-size type used to represent ::hsa_ext_sampler_coordinate_mode_t constants.
 */
typedef uint32_t hsa_ext_sampler_coordinate_mode32_t;
    

/**
 * @brief Sampler filter modes. See the <em>Filter Mode</em> section
 * in the <em>HSA Programming Reference Manual</em> for definitions
 * on each address mode. The enumeration values match the BRIG type @p
 * hsa_ext_brig_sampler_filter_t.
 */
typedef enum {
  /**
   * Filter to the image element nearest (in Manhattan distance) to the
   * specified coordinate.
   */
  HSA_EXT_SAMPLER_FILTER_MODE_NEAREST = 0,

  /**
   * Filter to the image element calculated by combining the elements in a 2x2
   * square block or 2x2x2 cube block around the specified coordinate. The
   * elements are combined using linear interpolation.
   */
  HSA_EXT_SAMPLER_FILTER_MODE_LINEAR = 1

} hsa_ext_sampler_filter_mode_t;

/**
 * @brief A fixed-size type used to represent ::hsa_ext_sampler_filter_mode_t constants.
 */
typedef uint32_t hsa_ext_sampler_filter_mode32_t;

/**
 * @brief Implementation independent sampler descriptor.
 */
typedef struct hsa_ext_sampler_descriptor_s {
  /**
   * Sampler coordinate mode describes the normalization of image coordinates.
   */
  hsa_ext_sampler_coordinate_mode32_t coordinate_mode;

  /**
   * Sampler filter type describes the type of sampling performed.
   */
  hsa_ext_sampler_filter_mode32_t filter_mode;

  /**
   * Sampler address mode describes the processing of out-of-range image
   * coordinates.
   */
  hsa_ext_sampler_addressing_mode32_t address_mode;
} hsa_ext_sampler_descriptor_t;

/**
 * @brief Implementation independent sampler descriptor v2 which supports
 *  different address modes in X, Y and Z axises.
 */
typedef struct hsa_ext_sampler_descriptor_v2_s {
  /**
   * Sampler coordinate mode describes the normalization of image coordinates.
   */
  hsa_ext_sampler_coordinate_mode32_t coordinate_mode;

  /**
   * Sampler filter type describes the type of sampling performed.
   */
  hsa_ext_sampler_filter_mode32_t filter_mode;

  /**
   * Sampler address mode describes the processing of out-of-range image
   * coordinates.
   */
  hsa_ext_sampler_addressing_mode32_t address_modes[3]; // in X, Y and Z axises
} hsa_ext_sampler_descriptor_v2_t;

/**
 * @brief Create an agent specific sampler handle for a given agent
 * independent sampler descriptor and agent.
 *
 * @param[in] agent Agent to be associated with the sampler handle created.
 *
 * @param[in] sampler_descriptor Pointer to a sampler descriptor. Must not be
 * NULL.
 *
 * @param[out] sampler Memory location where the HSA runtime stores the newly
 * created sampler handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED The
 * @p agent does not have the capability to support the properties
 * specified by @p sampler_descriptor or it is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p sampler_descriptor is NULL, or
 * @p sampler is NULL.
 */
hsa_status_t HSA_API hsa_ext_sampler_create(
    hsa_agent_t agent,
    const hsa_ext_sampler_descriptor_t *sampler_descriptor,
    hsa_ext_sampler_t *sampler);

/**
 * @brief Create an agent specific sampler handle for a given agent
 * independent sampler descriptor v2 and agent.
 *
 * @param[in] agent Agent to be associated with the sampler handle created.
 *
 * @param[in] sampler_descriptor v2 Pointer to a sampler descriptor. Must not be
 * NULL.
 *
 * @param[out] sampler Memory location where the HSA runtime stores the newly
 * created sampler handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 *
 * @retval ::HSA_EXT_STATUS_ERROR_SAMPLER_DESCRIPTOR_UNSUPPORTED The
 * @p agent does not have the capability to support the properties
 * specified by @p sampler_descriptor or it is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
 * the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p sampler_descriptor is NULL, or
 * @p sampler is NULL.
 */
hsa_status_t HSA_API hsa_ext_sampler_create_v2(
    hsa_agent_t agent,
    const hsa_ext_sampler_descriptor_v2_t *sampler_descriptor,
    hsa_ext_sampler_t *sampler);

/**
 * @brief Destroy a sampler handle previously created using ::hsa_ext_sampler_create or
 * ::hsa_ext_sampler_create_v2.
 *
 * @details The sampler handle should not be destroyed while there are
 * references to it queued for execution or currently being used in a
 * kernel dispatch.
 *
 * @param[in] agent Agent associated with the sampler handle.
 *
 * @param[in] sampler Sampler handle to destroy.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
 */
hsa_status_t HSA_API hsa_ext_sampler_destroy(
    hsa_agent_t agent,
    hsa_ext_sampler_t sampler);


#define hsa_ext_images_1_00

/**
 * @brief The function pointer table for the images v1.00 extension. Can be returned by ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
 */
typedef struct hsa_ext_images_1_00_pfn_s {

  hsa_status_t (*hsa_ext_image_get_capability)(
    hsa_agent_t agent,
    hsa_ext_image_geometry_t geometry,
    const hsa_ext_image_format_t *image_format,
    uint32_t *capability_mask);

  hsa_status_t (*hsa_ext_image_data_get_info)(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_info_t *image_data_info);

  hsa_status_t (*hsa_ext_image_create)(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    const void *image_data,
    hsa_access_permission_t access_permission,
    hsa_ext_image_t *image);

  hsa_status_t (*hsa_ext_image_destroy)(
    hsa_agent_t agent,
    hsa_ext_image_t image);

  hsa_status_t (*hsa_ext_image_copy)(
    hsa_agent_t agent,
    hsa_ext_image_t src_image,
    const hsa_dim3_t* src_offset,
    hsa_ext_image_t dst_image,
    const hsa_dim3_t* dst_offset,
    const hsa_dim3_t* range);

  hsa_status_t (*hsa_ext_image_import)(
    hsa_agent_t agent,
    const void *src_memory,
    size_t src_row_pitch,
    size_t src_slice_pitch,
    hsa_ext_image_t dst_image,
    const hsa_ext_image_region_t *image_region);

  hsa_status_t (*hsa_ext_image_export)(
    hsa_agent_t agent,
    hsa_ext_image_t src_image,
    void *dst_memory,
    size_t dst_row_pitch,
    size_t dst_slice_pitch,
    const hsa_ext_image_region_t *image_region);

  hsa_status_t (*hsa_ext_image_clear)(
    hsa_agent_t agent,
    hsa_ext_image_t image,
    const void* data,
    const hsa_ext_image_region_t *image_region);

  hsa_status_t (*hsa_ext_sampler_create)(
    hsa_agent_t agent,
    const hsa_ext_sampler_descriptor_t *sampler_descriptor,
    hsa_ext_sampler_t *sampler);

  hsa_status_t (*hsa_ext_sampler_destroy)(
    hsa_agent_t agent,
    hsa_ext_sampler_t sampler);

} hsa_ext_images_1_00_pfn_t;

#define hsa_ext_images_1

/**
 * @brief The function pointer table for the images v1 extension. Can be returned by ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
 */
typedef struct hsa_ext_images_1_pfn_s {

  hsa_status_t (*hsa_ext_image_get_capability)(
    hsa_agent_t agent,
    hsa_ext_image_geometry_t geometry,
    const hsa_ext_image_format_t *image_format,
    uint32_t *capability_mask);

  hsa_status_t (*hsa_ext_image_data_get_info)(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_info_t *image_data_info);

  hsa_status_t (*hsa_ext_image_create)(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    const void *image_data,
    hsa_access_permission_t access_permission,
    hsa_ext_image_t *image);

  hsa_status_t (*hsa_ext_image_destroy)(
    hsa_agent_t agent,
    hsa_ext_image_t image);

  hsa_status_t (*hsa_ext_image_copy)(
    hsa_agent_t agent,
    hsa_ext_image_t src_image,
    const hsa_dim3_t* src_offset,
    hsa_ext_image_t dst_image,
    const hsa_dim3_t* dst_offset,
    const hsa_dim3_t* range);

  hsa_status_t (*hsa_ext_image_import)(
    hsa_agent_t agent,
    const void *src_memory,
    size_t src_row_pitch,
    size_t src_slice_pitch,
    hsa_ext_image_t dst_image,
    const hsa_ext_image_region_t *image_region);

  hsa_status_t (*hsa_ext_image_export)(
    hsa_agent_t agent,
    hsa_ext_image_t src_image,
    void *dst_memory,
    size_t dst_row_pitch,
    size_t dst_slice_pitch,
    const hsa_ext_image_region_t *image_region);

  hsa_status_t (*hsa_ext_image_clear)(
    hsa_agent_t agent,
    hsa_ext_image_t image,
    const void* data,
    const hsa_ext_image_region_t *image_region);

  hsa_status_t (*hsa_ext_sampler_create)(
    hsa_agent_t agent,
    const hsa_ext_sampler_descriptor_t *sampler_descriptor,
    hsa_ext_sampler_t *sampler);

  hsa_status_t (*hsa_ext_sampler_destroy)(
    hsa_agent_t agent,
    hsa_ext_sampler_t sampler);

  hsa_status_t (*hsa_ext_image_get_capability_with_layout)(
    hsa_agent_t agent,
    hsa_ext_image_geometry_t geometry,
    const hsa_ext_image_format_t *image_format,
    hsa_ext_image_data_layout_t image_data_layout,
    uint32_t *capability_mask);

  hsa_status_t (*hsa_ext_image_data_get_info_with_layout)(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_data_info_t *image_data_info);

  hsa_status_t (*hsa_ext_image_create_with_layout)(
    hsa_agent_t agent,
    const hsa_ext_image_descriptor_t *image_descriptor,
    const void *image_data,
    hsa_access_permission_t access_permission,
    hsa_ext_image_data_layout_t image_data_layout,
    size_t image_data_row_pitch,
    size_t image_data_slice_pitch,
    hsa_ext_image_t *image);

  hsa_status_t (*hsa_ext_sampler_create_v2)(
    hsa_agent_t agent,
    const hsa_ext_sampler_descriptor_v2_t *sampler_descriptor,
    hsa_ext_sampler_t *sampler);

} hsa_ext_images_1_pfn_t;
/** @} */
    
#ifdef __cplusplus
}  // end extern "C" block
#endif /*__cplusplus*/ 

#endif


================================================
FILE: runtime/hsa-runtime/inc/hsa_ven_amd_aqlprofile.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_

#include <stdint.h>
#include "hsa.h"

#define HSA_AQLPROFILE_VERSION_MAJOR 2
#define HSA_AQLPROFILE_VERSION_MINOR 0

#ifdef __cplusplus
extern "C" {
#endif  // __cplusplus

////////////////////////////////////////////////////////////////////////////////
// Library version
uint32_t hsa_ven_amd_aqlprofile_version_major();
uint32_t hsa_ven_amd_aqlprofile_version_minor();

///////////////////////////////////////////////////////////////////////
// Library API:
// The library provides helper methods for instantiation of
// the profile context object and for populating of the start
// and stop AQL packets. The profile object contains a profiling
// events list and needed for profiling buffers descriptors,
// a command buffer and an output data buffer. To check if there
// was an error the library methods return a status code. Also
// the library provides methods for querying required buffers
// attributes, to validate the event attributes and to get profiling
// output data.
//
// Returned status:
//     hsa_status_t – HSA status codes are used from hsa.h header
//
// Supported profiling features:
//
// Supported profiling events
typedef enum {
  HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
  HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
} hsa_ven_amd_aqlprofile_event_type_t;

// Supported performance counters (PMC) blocks
// The block ID is the same for a block instances set, for example
// each block instance from the TCC block set, TCC0, TCC1, …, TCCN
// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
typedef enum {
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
  // Memory related blocks
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
  // System blocks
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
  // GFX10 added blocks
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A = 26,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C = 27,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A = 28,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C = 29,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR = 30,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS = 31,

  // UMC & MMEA System Blocks
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC = 32,
  HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA = 33,

  HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
} hsa_ven_amd_aqlprofile_block_name_t;

// PMC event object structure
// ‘counter_id’ value is specified in GFXIPs perfcounter user guides
// which is the counters select value, “Performance Counters Selection”
// chapter.
typedef struct {
  hsa_ven_amd_aqlprofile_block_name_t block_name;
  uint32_t block_index;
  uint32_t counter_id;
} hsa_ven_amd_aqlprofile_event_t;

// Check if event is valid for the specific GPU
hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
    hsa_agent_t agent,                            // HSA handle for the profiling GPU
    const hsa_ven_amd_aqlprofile_event_t* event,  // [in] Pointer on validated event
    bool* result);                                // [out] True if the event valid, False otherwise

// Profiling parameters
// All parameters are generic and if not applicable for a specific
// profile configuration then error status will be returned.
typedef enum {
  /**
   * Select the target compute unit (wgp) for profiling.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
  /**
   * VMID Mask
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
  /**
   * Legacy. Deprecated.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
  /**
   * Legacy. Deprecated.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
  /**
   * Legacy. Deprecated.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
  /**
   * Shader engine mask for selection.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
  /**
   * Legacy. Deprecated.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
  /**
   * Legacy. Deprecated.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
  /**
   * Set SIMD Mask (GFX9) or SIMD ID for collection (Navi)
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION = 8,
  /**
   * Set true for occupancy collection only.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE = 9,
  /**
   * ATT collection max data size, in MB. Shared among shader engines.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE = 10,
  /**
   * Mask of which compute units to generate perfcounters. GFX9 only.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK = 240,
  /**
   * Select collection period for perfcounters. GFX9 only.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL = 241,
  /**
   * Select perfcounter ID (SQ block) for collection. GFX9 only.
   */
  HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME = 242,
} hsa_ven_amd_aqlprofile_parameter_name_t;

// Profile parameter object
typedef struct {
  hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
  uint32_t value;
} hsa_ven_amd_aqlprofile_parameter_t;

typedef enum {
  HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0 = 0,
  HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1,
  HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2,
  HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
} hsa_ven_amd_aqlprofile_att_marker_channel_t;

//
// Profile context object:
// The library provides a profile object structure which contains
// the events array, a buffer for the profiling start/stop commands
// and a buffer for the output data.
// The buffers are specified by the buffer descriptors and allocated
// by the application. The buffers allocation attributes, the command
// buffer size, the PMC output buffer size as well as profiling output
// data can be get using the generic get profile info helper _get_info.
//
// Buffer descriptor
typedef struct {
  void* ptr;
  uint32_t size;
} hsa_ven_amd_aqlprofile_descriptor_t;

// Profile context object structure, contains profiling events list and
// needed for profiling buffers descriptors, a command buffer and
// an output data buffer
typedef struct {
  hsa_agent_t agent;                                     // GFXIP handle
  hsa_ven_amd_aqlprofile_event_type_t type;              // Events type
  const hsa_ven_amd_aqlprofile_event_t* events;          // Events array
  uint32_t event_count;                                  // Events count
  const hsa_ven_amd_aqlprofile_parameter_t* parameters;  // Parameters array
  uint32_t parameter_count;                              // Parameters count
  hsa_ven_amd_aqlprofile_descriptor_t output_buffer;     // Output buffer
  hsa_ven_amd_aqlprofile_descriptor_t command_buffer;    // PM4 commands
} hsa_ven_amd_aqlprofile_profile_t;

//
// AQL packets populating methods:
// The helper methods to populate provided by the application START and
// STOP AQL packets which the application is required to submit before and
// after profiled GPU task packets respectively.
//
// AQL Vendor Specific packet which carries a PM4 command
typedef struct {
  uint16_t header;
  uint16_t pm4_command[27];
  hsa_signal_t completion_signal;
} hsa_ext_amd_aql_pm4_packet_t;

// Method to populate the provided AQL packet with profiling start commands
// Only 'pm4_command' fields of the packet are set and the application
// is responsible to set Vendor Specific header type a completion signal
hsa_status_t hsa_ven_amd_aqlprofile_start(
    hsa_ven_amd_aqlprofile_profile_t* profile,        // [in,out] profile context object
    hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);  // [out] profile start AQL packet

// Method to populate the provided AQL packet with profiling stop commands
// Only 'pm4_command' fields of the packet are set and the application
// is responsible to set Vendor Specific header type and a completion signal
hsa_status_t hsa_ven_amd_aqlprofile_stop(
    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile context object
    hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);   // [out] profile stop AQL packet

// Method to populate the provided AQL packet with profiling read commands
// Only 'pm4_command' fields of the packet are set and the application
// is responsible to set Vendor Specific header type and a completion signal
hsa_status_t hsa_ven_amd_aqlprofile_read(
    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile context object
    hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);   // [out] profile stop AQL packet

// Legacy devices, PM4 profiling packet size
const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
// Legacy devices, converting the profiling AQL packet to PM4 packet blob
hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
    const hsa_ext_amd_aql_pm4_packet_t* aql_packet,  // [in] AQL packet
    void* data);                                     // [out] PM4 packet blob

// Method to add a marker (correlation ID) into the ATT buffer.
hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
    hsa_ven_amd_aqlprofile_profile_t* profile,            // [in,out] profile context object
    hsa_ext_amd_aql_pm4_packet_t* aql_marker_packet,      // [out] profile marker AQL packet
    uint32_t data,                                        // [in] Data to be inserted
    hsa_ven_amd_aqlprofile_att_marker_channel_t channel); // [in] Comm channel

//
// Get profile info:
// Generic method for getting various profile info including profile buffers
// attributes like the command buffer size and the profiling PMC results.
// It’s implied that all counters are 64bit values.
//
// Profile generic output data:
typedef struct {
  uint32_t sample_id;  // PMC sample or trace buffer index
  union {
    struct {
      hsa_ven_amd_aqlprofile_event_t event;  // PMC event
      uint64_t result;                       // PMC result
    } pmc_data;
    hsa_ven_amd_aqlprofile_descriptor_t trace_data;  // Trace output data descriptor
  };
} hsa_ven_amd_aqlprofile_info_data_t;

// ID query type
typedef struct {
  const char* name;
  uint32_t id;
  uint32_t instance_count;
} hsa_ven_amd_aqlprofile_id_query_t;

// Profile attributes
typedef enum {
  HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0,  // get_info returns uint32_t value
  HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1,        // get_info returns uint32_t value
  HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2,             // get_info returns PMC uint64_t value
                                                        // in info_data object
  HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3,           // get_info returns trace buffer ptr/size
                                                        // in info_data object
  HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4,       // get_info returns number of block counter
  HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5,             // get_info returns block id, instances
                                                        // by name string using _id_query_t
  HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6,           // get_info returns size/pointer for
                                                        // counters enable command buffer
  HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7,          // get_info returns size/pointer for
                                                        // counters disable command buffer
} hsa_ven_amd_aqlprofile_info_type_t;


// Definition of output data iterator callback
typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
    hsa_ven_amd_aqlprofile_info_type_t info_type,   // [in] data type, PMC or trace data
    hsa_ven_amd_aqlprofile_info_data_t* info_data,  // [in] info_data object
    void* callback_data);                           // [in,out] data passed to the callback

// Method for getting the profile info
hsa_status_t hsa_ven_amd_aqlprofile_get_info(
    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile context object
    hsa_ven_amd_aqlprofile_info_type_t attribute,     // [in] requested profile attribute
    void* value);                                     // [in,out] returned value

// Method for iterating the events output data
hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
    const hsa_ven_amd_aqlprofile_profile_t* profile,  // [in] profile context object
    hsa_ven_amd_aqlprofile_data_callback_t callback,  // [in] callback to iterate the output data
    void* data);                                      // [in,out] data passed to the callback

// Return error string
hsa_status_t hsa_ven_amd_aqlprofile_error_string(
    const char** str);  // [out] pointer on the error string

/**
 * @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
 */
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_eventname_callback_t)(int id, const char* name);
/**
 * @brief Iterate over all possible event coordinate IDs and their names.
 */
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t);

/**
 * @brief Iterate over all event coordinates for a given agent_t and event_t.
 * @param position A counting sequence indicating callback number.
 * @param id Coordinate ID as in _iterate_event_ids.
 * @param extent Coordinate extent indicating maximum allowed instances.
 * @param coordinate The coordinate, in the range [0,extent-1].
 * @param name Coordinate name as in _iterate_event_ids.
 * @param userdata Userdata returned from _iterate_event_coord function.
 */
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_coordinate_callback_t)(
  int position,
  int id,
  int extent,
  int coordinate,
  const char* name,
  void* userdata
);

/**
 * @brief Iterate over all event coordinates for a given agent_t and event_t.
 * @param[in] agent HSA agent.
 * @param[in] event The event ID and block ID to iterate for.
 * @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
 * @param[in] callback Callback function to return the coordinates.
 * @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
 */
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
  hsa_agent_t agent,
  hsa_ven_amd_aqlprofile_event_t event,
  uint32_t sample_id,
  hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
  void* userdata
);

/**
 * @brief Extension version.
 */
#define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
#define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"

#ifdef HSA_LARGE_MODEL
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
#else
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
#endif

/**
 * @brief Extension function table.
 */
typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
  uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
  uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();

  hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
      const char** str);

  hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
      hsa_agent_t agent,
      const hsa_ven_amd_aqlprofile_event_t* event,
      bool* result);

  hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
      hsa_ven_amd_aqlprofile_profile_t* profile,
      hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);

  hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
      const hsa_ven_amd_aqlprofile_profile_t* profile,
      hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);

  hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
      const hsa_ven_amd_aqlprofile_profile_t* profile,
      hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);

  hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
      const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
      void* data);

  hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
      const hsa_ven_amd_aqlprofile_profile_t* profile,
      hsa_ven_amd_aqlprofile_info_type_t attribute,
      void* value);

  hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
      const hsa_ven_amd_aqlprofile_profile_t* profile,
      hsa_ven_amd_aqlprofile_data_callback_t callback,
      void* data);

  hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_ids)(
      hsa_ven_amd_aqlprofile_eventname_callback_t
  );

  hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_coord)(
      hsa_agent_t agent,
      hsa_ven_amd_aqlprofile_event_t event,
      uint32_t sample_id,
      hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
      void* userdata
  );

  hsa_status_t (*hsa_ven_amd_aqlprofile_att_marker)(
      hsa_ven_amd_aqlprofile_profile_t* profile,
      hsa_ext_amd_aql_pm4_packet_t* aql_packet,
      uint32_t data,
      hsa_ven_amd_aqlprofile_att_marker_channel_t channel
  );
} hsa_ven_amd_aqlprofile_1_00_pfn_t;

typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;

#ifdef __cplusplus
}
#endif  // __cplusplus

#endif  // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_


================================================
FILE: runtime/hsa-runtime/inc/hsa_ven_amd_loader.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

// HSA AMD extension for additional loader functionality.

#ifndef HSA_VEN_AMD_LOADER_H
#define HSA_VEN_AMD_LOADER_H

#include "hsa.h"

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

/**
 * @brief Queries equivalent host address for given @p device_address, and
 * records it in @p host_address.
 *
 *
 * @details Contents of memory pointed to by @p host_address would be identical
 * to contents of memory pointed to by @p device_address. Only difference
 * between the two is host accessibility: @p host_address is always accessible
 * from host, @p device_address might not be accessible from host.
 *
 * If @p device_address already points to host accessible memory, then the value
 * of @p device_address is simply copied into @p host_address.
 *
 * The lifetime of @p host_address is the same as the lifetime of @p
 * device_address, and both lifetimes are limited by the lifetime of the
 * executable that is managing these addresses.
 *
 *
 * @param[in] device_address Device address to query equivalent host address
 * for.
 *
 * @param[out] host_address Pointer to application-allocated buffer to record
 * queried equivalent host address in.
 *
 *
 * @retval HSA_STATUS_SUCCESS Function is executed successfully.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
 * null, or @p host_address is null.
 */
hsa_status_t hsa_ven_amd_loader_query_host_address(
  const void *device_address,
  const void **host_address);

/**
 * @brief The storage type of the code object that is backing loaded memory
 * segment.
 */
typedef enum {
  /**
   * Loaded memory segment is not backed by any code object (anonymous), as the
   * case would be with BSS (uninitialized data).
   */
  HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
  /**
   * Loaded memory segment is backed by the code object that is stored in the
   * file.
   */
  HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
  /**
   * Loaded memory segment is backed by the code object that is stored in the
   * memory.
   */
  HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
} hsa_ven_amd_loader_code_object_storage_type_t;

/**
 * @brief Loaded memory segment descriptor.
 *
 *
 * @details Loaded memory segment descriptor describes underlying loaded memory
 * segment. Loaded memory segment is created/allocated by the executable during
 * the loading of the code object that is backing underlying memory segment.
 *
 * The lifetime of underlying memory segment is limited by the lifetime of the
 * executable that is managing underlying memory segment.
 */
typedef struct hsa_ven_amd_loader_segment_descriptor_s {
  /**
   * Agent underlying memory segment is allocated on. If the code object that is
   * backing underlying memory segment is program code object, then 0.
   */
  hsa_agent_t agent;
  /**
   * Executable that is managing this underlying memory segment.
   */
  hsa_executable_t executable;
  /**
   * Storage type of the code object that is backing underlying memory segment.
   */
  hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
  /**
   * If the storage type of the code object that is backing underlying memory
   * segment is:
   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
   *     filepath to the code object;
   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
   *     accessible pointer to the first byte of the code object.
   */
  const void *code_object_storage_base;
  /**
   * If the storage type of the code object that is backing underlying memory
   * segment is:
   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
   *     the filepath to the code object (including null-terminating character);
   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
   *     bytes, of the memory occupied by the code object.
   */
  size_t code_object_storage_size;
  /**
   * If the storage type of the code object that is backing underlying memory
   * segment is:
   *   - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
   *   - other, then offset, in bytes, from the beginning of the code object to
   *     the first byte in the code object data is copied from.
   */
  size_t code_object_storage_offset;
  /**
   * Starting address of the underlying memory segment.
   */
  const void *segment_base;
  /**
   * Size, in bytes, of the underlying memory segment.
   */
  size_t segment_size;
} hsa_ven_amd_loader_segment_descriptor_t;

/**
 * @brief Either queries loaded memory segment descriptors, or total number of
 * loaded memory segment descriptors.
 *
 *
 * @details If @p segment_descriptors is not null and @p num_segment_descriptors
 * points to number that exactly matches total number of loaded memory segment
 * descriptors, then queries loaded memory segment descriptors, and records them
 * in @p segment_descriptors. If @p segment_descriptors is null and @p
 * num_segment_descriptors points to zero, then queries total number of loaded
 * memory segment descriptors, and records it in @p num_segment_descriptors. In
 * all other cases returns appropriate error code (see below).
 *
 * The caller of this function is responsible for the allocation/deallocation
 * and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
 *
 * The lifetime of loaded memory segments that are described by queried loaded
 * memory segment descriptors is limited by the lifetime of the executable that
 * is managing loaded memory segments.
 *
 * Queried loaded memory segment descriptors are always self-consistent: they
 * describe a complete set of loaded memory segments that are being backed by
 * fully loaded code objects that are present at the time (i.e. this function
 * is blocked until all executable manipulations are fully complete).
 *
 *
 * @param[out] segment_descriptors Pointer to application-allocated buffer to
 * record queried loaded memory segment descriptors in. Can be null if @p
 * num_segment_descriptors points to zero.
 *
 * @param[in,out] num_segment_descriptors Pointer to application-allocated
 * buffer that contains either total number of loaded memory segment descriptors
 * or zero.
 *
 *
 * @retval HSA_STATUS_SUCCESS Function is executed successfully.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
 * while @p num_segment_descriptors points to non-zero number, @p
 * segment_descriptors is not null while @p num_segment_descriptors points to
 * zero, or @p num_segment_descriptors is null.
 *
 * @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
 * does not point to number that exactly matches total number of loaded memory
 * segment descriptors.
 */
hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
  hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
  size_t *num_segment_descriptors);

/**
 * @brief Obtains the handle of executable to which the device address belongs.
 *
 * @details This method should not be used to obtain executable handle by using
 * a host address. The executable returned is expected to be alive until its
 * destroyed by the user.
 *
 * @retval HSA_STATUS_SUCCESS Function is executed successfully.
 *
 * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
 *
 * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
 * is no exectuable found for this kernel code object.
 */
hsa_status_t hsa_ven_amd_loader_query_executable(
  const void *device_address,
  hsa_executable_t *executable);

//===----------------------------------------------------------------------===//

/**
 * @brief Iterate over the loaded code objects in an executable, and invoke
 * an application-defined callback on every iteration.
 *
 * @param[in] executable Executable.
 *
 * @param[in] callback Callback to be invoked once per loaded code object. The
 * HSA runtime passes three arguments to the callback: the executable, a
 * loaded code object, and the application data. If @p callback returns a
 * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
 * traversal stops and
 * ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
 * status value.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
 */
hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
  hsa_executable_t executable,
  hsa_status_t (*callback)(
    hsa_executable_t executable,
    hsa_loaded_code_object_t loaded_code_object,
    void *data),
  void *data);

/**
 * @brief Loaded code object kind.
 */
typedef enum {
  /**
   * Program code object.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
  /**
   * Agent code object.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
} hsa_ven_amd_loader_loaded_code_object_kind_t;

/**
 * @brief Loaded code object attributes.
 */
typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
  /**
   * The executable in which this loaded code object is loaded. The
   * type of this attribute is ::hsa_executable_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
  /**
   * The kind of this loaded code object. The type of this attribute is
   * ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
  /**
   * The agent on which this loaded code object is loaded. The
   * value of this attribute is only defined if
   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
   * attribute is ::hsa_agent_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
  /**
   * The storage type of the code object reader used to load the loaded code object.
   * The type of this attribute is ::uint32_t interpreted as a
   * ::hsa_ven_amd_loader_code_object_storage_type_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
  /**
   * The memory address of the first byte of the code object that was loaaded.
   * The value of this attribute is only defined if
   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
   * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
   * attribute is ::uint64_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
  /**
   * The memory size in bytes of the code object that was loaaded.
   * The value of this attribute is only defined if
   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
   * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
   * attribute is ::uint64_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
  /**
   * The file descriptor of the code object that was loaaded.
   * The value of this attribute is only defined if
   * ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
   * ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
   * attribute is ::int.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
  /**
   * The signed byte address difference of the memory address at which the code
   * object is loaded minus the virtual address specified in the code object
   * that is loaded. The value of this attribute is only defined if the
   * executable in which the code object is loaded is froozen. The type of this
   * attribute is ::int64_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
  /**
   * The base memory address at which the code object is loaded. This is the
   * base address of the allocation for the lowest addressed segment of the code
   * object that is loaded. Note that any non-loaded segments before the first
   * loaded segment are ignored. The value of this attribute is only defined if
   * the executable in which the code object is loaded is froozen. The type of
   * this attribute is ::uint64_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
  /**
   * The byte size of the loaded code objects contiguous memory allocation. The
   * value of this attribute is only defined if the executable in which the code
   * object is loaded is froozen. The type of this attribute is ::uint64_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
  /**
   * The length of the URI in bytes, not including the NUL terminator. The type
   * of this attribute is uint32_t.
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
  /**
   * The URI name from which the code object was loaded. The type of this
   * attribute is a NUL terminated \p char* with the length equal to the value
   * of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
   * The URI name syntax is defined by the following BNF syntax:
   *
   *     code_object_uri ::== file_uri | memory_uri
   *     file_uri        ::== "file://" file_path [ range_specifier ]
   *     memory_uri      ::== "memory://" process_id range_specifier
   *     range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
   *     file_path       ::== URI_ENCODED_OS_FILE_PATH
   *     process_id      ::== DECIMAL_NUMBER
   *     number          ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
   *
   * ``number`` is a C integral literal where hexadecimal values are prefixed by
   * "0x" or "0X", and octal values by "0".
   *
   * ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
   * In URI encoding, every character that is not in the regular expression
   * ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
   * proceeded by "%".  Directories in the path are separated by "/".
   *
   * ``offset`` is a 0-based byte offset to the start of the code object.  For a
   * file URI, it is from the start of the file specified by the ``file_path``,
   * and if omitted defaults to 0. For a memory URI, it is the memory address
   * and is required.
   *
   * ``size`` is the number of bytes in the code object.  For a file URI, if
   * omitted it defaults to the size of the file.  It is required for a memory
   * URI.
   *
   * ``process_id`` is the identity of the process owning the memory.  For Linux
   * it is the C unsigned integral decimal literal for the process ID (PID).
   *
   * For example:
   *
   *     file:///dir1/dir2/file1
   *     file:///dir3/dir4/file2#offset=0x2000&size=3000
   *     memory://1234#offset=0x20000&size=3000
   */
  HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
} hsa_ven_amd_loader_loaded_code_object_info_t;

/**
 * @brief Get the current value of an attribute for a given loaded code
 * object.
 *
 * @param[in] loaded_code_object Loaded code object.
 *
 * @param[in] attribute Attribute to query.
 *
 * @param[out] value Pointer to an application-allocated buffer where to store
 * the value of the attribute. If the buffer passed by the application is not
 * large enough to hold the value of @p attribute, the behavior is undefined.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
 * invalid.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
 * loaded code object attribute, or @p value is NULL.
 */
hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
  hsa_loaded_code_object_t loaded_code_object,
  hsa_ven_amd_loader_loaded_code_object_info_t attribute,
  void *value);

//===----------------------------------------------------------------------===//

/**
 * @brief Create a code object reader to operate on a file with size and offset.
 *
 * @param[in] file File descriptor. The file must have been opened by
 * application with at least read permissions prior calling this function. The
 * file must contain a vendor-specific code object.
 *
 * The file is owned and managed by the application; the lifetime of the file
 * descriptor must exceed that of any associated code object reader.
 *
 * @param[in] size Size of the code object embedded in @p file.
 *
 * @param[in] offset 0-based offset relative to the beginning of the @p file
 * that denotes the beginning of the code object embedded within the @p file.
 *
 * @param[out] code_object_reader Memory location to store the newly created
 * code object reader handle. Must not be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
 * read permissions. This condition may also be reported as
 * ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
 * ::hsa_executable_load_agent_code_object function.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
 * do not form a valid code object. If file size is 0. Or offset > file size.
 * This condition may also be reported as
 * ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
 * ::hsa_executable_load_agent_code_object function.
 *
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
 * allocate the required resources.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
 */
hsa_status_t
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
    hsa_file_t file,
    size_t offset,
    size_t size,
    hsa_code_object_reader_t *code_object_reader);

//===----------------------------------------------------------------------===//

/**
 * @brief Iterate over the available executables, and invoke an
 * application-defined callback on every iteration. While
 * ::hsa_ven_amd_loader_iterate_executables is executing any calls to
 * ::hsa_executable_create, ::hsa_executable_create_alt, or
 * ::hsa_executable_destroy will be blocked.
 *
 * @param[in] callback Callback to be invoked once per executable. The HSA
 * runtime passes two arguments to the callback: the executable and the
 * application data. If @p callback returns a status other than
 * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
 * ::hsa_ven_amd_loader_iterate_executables returns that status value. If
 * @p callback invokes ::hsa_executable_create, ::hsa_executable_create_alt, or
 * ::hsa_executable_destroy then the behavior is undefined.
 *
 * @param[in] data Application data that is passed to @p callback on every
 * iteration. May be NULL.
 *
 * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
 *
 * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
 * initialized.
 *
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
*/
hsa_status_t
hsa_ven_amd_loader_iterate_executables(
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      void *data),
    void *data);

//===----------------------------------------------------------------------===//

/**
 * @brief Extension version.
 */
#define hsa_ven_amd_loader 001003

/**
 * @brief Extension function table version 1.00.
 */
typedef struct hsa_ven_amd_loader_1_00_pfn_s {
  hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
    const void *device_address,
    const void **host_address);

  hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t *num_segment_descriptors);

  hsa_status_t (*hsa_ven_amd_loader_query_executable)(
    const void *device_address,
    hsa_executable_t *executable);
} hsa_ven_amd_loader_1_00_pfn_t;

/**
 * @brief Extension function table version 1.01.
 */
typedef struct hsa_ven_amd_loader_1_01_pfn_s {
  hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
    const void *device_address,
    const void **host_address);

  hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t *num_segment_descriptors);

  hsa_status_t (*hsa_ven_amd_loader_query_executable)(
    const void *device_address,
    hsa_executable_t *executable);

  hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
    hsa_executable_t executable,
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      hsa_loaded_code_object_t loaded_code_object,
      void *data),
    void *data);

  hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
    hsa_loaded_code_object_t loaded_code_object,
    hsa_ven_amd_loader_loaded_code_object_info_t attribute,
    void *value);
} hsa_ven_amd_loader_1_01_pfn_t;

/**
 * @brief Extension function table version 1.02.
 */
typedef struct hsa_ven_amd_loader_1_02_pfn_s {
  hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
    const void *device_address,
    const void **host_address);

  hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t *num_segment_descriptors);

  hsa_status_t (*hsa_ven_amd_loader_query_executable)(
    const void *device_address,
    hsa_executable_t *executable);

  hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
    hsa_executable_t executable,
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      hsa_loaded_code_object_t loaded_code_object,
      void *data),
    void *data);

  hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
    hsa_loaded_code_object_t loaded_code_object,
    hsa_ven_amd_loader_loaded_code_object_info_t attribute,
    void *value);

  hsa_status_t
    (*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
      hsa_file_t file,
      size_t offset,
      size_t size,
      hsa_code_object_reader_t *code_object_reader);
} hsa_ven_amd_loader_1_02_pfn_t;

/**
 * @brief Extension function table version 1.03.
 */
typedef struct hsa_ven_amd_loader_1_03_pfn_s {
  hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
    const void *device_address,
    const void **host_address);

  hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t *num_segment_descriptors);

  hsa_status_t (*hsa_ven_amd_loader_query_executable)(
    const void *device_address,
    hsa_executable_t *executable);

  hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
    hsa_executable_t executable,
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      hsa_loaded_code_object_t loaded_code_object,
      void *data),
    void *data);

  hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
    hsa_loaded_code_object_t loaded_code_object,
    hsa_ven_amd_loader_loaded_code_object_info_t attribute,
    void *value);

  hsa_status_t
    (*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
      hsa_file_t file,
      size_t offset,
      size_t size,
      hsa_code_object_reader_t *code_object_reader);

  hsa_status_t
    (*hsa_ven_amd_loader_iterate_executables)(
      hsa_status_t (*callback)(
        hsa_executable_t executable,
        void *data),
      void *data);
} hsa_ven_amd_loader_1_03_pfn_t;

#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif /* HSA_VEN_AMD_LOADER_H */


================================================
FILE: runtime/hsa-runtime/inc/hsa_ven_amd_pc_sampling.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_VEN_AMD_PC_SAMPLING_H
#define HSA_VEN_AMD_PC_SAMPLING_H

#include "hsa.h"

#ifdef __cplusplus
extern "C" {
#endif /*__cplusplus*/


/**
 * @brief HSA AMD Vendor PC Sampling APIs
 * EXPERIMENTAL: All PC Sampling APIs are currently in an experimental phase and the APIs may be
 * modified extensively in the future
 */

/**
 * @brief PC Sampling sample data for hosttrap sampling method
 */
typedef struct {
  uint64_t pc;
  uint64_t exec_mask;
  uint32_t workgroup_id_x;
  uint32_t workgroup_id_y;
  uint32_t workgroup_id_z;
  uint32_t wave_in_wg : 6;
  uint32_t chiplet    : 3;   // Currently not used
  uint32_t reserved   : 23;
  uint32_t hw_id;
  uint32_t reserved0;
  uint64_t reserved1;
  uint64_t timestamp;
  uint64_t correlation_id;
} perf_sample_hosttrap_v1_t;

/**
 * @brief PC Sampling sample data for stochastic sampling method
 */
typedef struct {
  uint64_t pc;
  uint64_t exec_mask;
  uint32_t workgroup_id_x;
  uint32_t workgroup_id_y;
  uint32_t workgroup_id_z;
  uint32_t wave_in_wg : 6;
  uint32_t chiplet    : 3;   // Currently not used
  uint32_t reserved   : 23;
  uint32_t hw_id;
  uint32_t perf_snapshot_data;
  uint32_t perf_snapshot_data1;
  uint32_t perf_snapshot_data2;
  uint64_t timestamp;
  uint64_t correlation_id;
} perf_sample_snapshot_v1_t;

/**
 * @brief PC Sampling method kinds
 */
typedef enum {
  HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1,
  HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1
} hsa_ven_amd_pcs_method_kind_t;

/**
 * @brief PC Sampling interval unit type
 */
typedef enum {
  HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS,
  HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES,
  HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS
} hsa_ven_amd_pcs_units_t;

/**
 * @brief HSA callback function to perform the copy onto a destination buffer
 *
 * If data_size is 0, HSA will stop current copy operation and keep remaining data in internal
 * buffers. Remaining contents of HSA internal buffers will be included in next
 * hsa_ven_amd_pcs_data_ready_callback_t. HSA internal buffers can also be drained by calling
 * hsa_ven_amd_pcs_flush.
 *
 * @param[in] hsa_callback_data private data to pass back to HSA. Provided in
 * hsa_ven_amd_pcs_data_ready_callback_t
 *
 * @param[in] data_size size of destination buffer in bytes.
 * @param[in] destination destination buffer
 * @retval    TBD: but could be used to indicate that there is no more data to be read.
 * Or indicate an error and abort of current copy operations
 */
typedef hsa_status_t (*hsa_ven_amd_pcs_data_copy_callback_t)(void* hsa_callback_data,
                                                             size_t data_size, void* destination);

/**
 * @brief HSA callback function to to indicate that there is data ready to be copied
 *
 * When the client receives this callback, the client should call back @p data_copy_callback for HSA
 * to perform the copy operation into an available buffer. @p data_copy_callback can be called back
 * multiple times with smaller @p data_size to split the copy operation.
 *
 * This callback must not call ::hsa_ven_amd_pcs_flush.
 *
 * @param[in] client_callback_data client private data passed in via
 * hsa_ven_amd_pcs_create/hsa_ven_amd_pcs_create_from_id
 * @param[in] data_size size of data available to be copied
 * @param[in] lost_sample_count number of lost samples since last call to
 * hsa_ven_amd_pcs_data_ready_callback_t.
 * @param[in] data_copy_callback callback function for HSA to perform the actual copy
 * @param[in] hsa_callback_data private data to pass back to HSA
 */
typedef void (*hsa_ven_amd_pcs_data_ready_callback_t)(
    void* client_callback_data, size_t data_size, size_t lost_sample_count,
    hsa_ven_amd_pcs_data_copy_callback_t data_copy_callback, void* hsa_callback_data);

/**
 * @brief Opaque handle representing a sampling session.
 * Two sessions having same handle value represent the same session
 */
typedef struct {
  uint64_t handle;
} hsa_ven_amd_pcs_t;

/**
 * @brief PC Sampling configuration flag options
 */
typedef enum {
  /* The interval for this sampling method have to be a power of 2 */
  HSA_VEN_AMD_PCS_CONFIGURATION_FLAGS_INTERVAL_POWER_OF_2 = (1 << 0)
} hsa_ven_amd_pcs_configuration_flags_t;

/**
 * @brief PC Sampling method information
 * Used to provide client with list of supported PC Sampling methods
 */
typedef struct {
  hsa_ven_amd_pcs_method_kind_t method;
  hsa_ven_amd_pcs_units_t units;
  size_t min_interval;
  size_t max_interval;
  uint64_t flags;
} hsa_ven_amd_pcs_configuration_t;

/**
 * @brief Callback function to iterate through list of supported PC Sampling configurations
 *
 * @param[in] configuration one entry for supported PC Sampling method and configuration options
 * @param[in] callback_data client private callback data that was passed in when calling
 * hsa_ven_amd_pcs_iterate_configuration
 */
typedef hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration_callback_t)(
    const hsa_ven_amd_pcs_configuration_t* configuration, void* callback_data);

/**
 * @brief Iterate through list of current supported PC Sampling configurations for this @p agent
 *
 * HSA will callback @p configuration_callback for each currently available PC Sampling
 * configuration. The list of currently available configurations may not be the complete list of
 * configurations supported on the @p agent. The list of currently available configurations may be
 * reduced if the @p agent is currently handling other PC sampling sessions.
 *
 * @param[in] agent target agent
 * @param[in] configuration_callback callback function to iterate through list of configurations
 * @param[in] callback_data client private callback data
 **/
hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
    hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
    void* callback_data);

/**
 * @brief  Create a PC Sampling session on @p agent
 *
 * Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
 * parameters must be a legal configuration value, as described by the
 * hsa_ven_amd_pcs_configuration_t configurations passed to the callbacks of
 * hsa_ven_amd_pcs_iterate_configuration for this @p agent.
 * A successfull call may restrict the list of possible PC sampling methods available to subsequent
 * calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
 * on what types of PC sampling they can perform concurrently.
 * For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
 * The session will be in a stopped/inactive state after this call
 *
 * @param[in] agent target agent
 * @param[in] method method to use
 * @param[in] units sampling units
 * @param[in] interval sampling interval in @p units
 * @param[in] latency expected latency in microseconds for client to provide a buffer for the data
 * copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
 * buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
 * how many samples are received within @p latency and call @p data_ready_callback ahead of time so
 * that the client has @p latency time to allocate the buffer before the HSA-runtime internal
 * buffers are full. The value of latency can be 0.
 * @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
 * HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
 * perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
 * @param[in] data_ready_callback client callback function that will be called when:
 *   1. There is enough samples fill a buffer with @p buffer_size  - estimated samples received
 *      within @p latency period.
 * OR
 *   2. When hsa_ven_amd_pcs_flush is called.
 * @param[in] client_callback_data client private data to be provided back when data_ready_callback
 * is called.
 * @param[out] pc_sampling PC sampling session handle used to reference this session when calling
 * hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
 *
 * @retval ::HSA_STATUS_SUCCESS session created successfully
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
 * @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
 * cannot handle the type requested.
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
 * @retval ::HSA_STATUS_ERROR Unexpected error
 **/
hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
                                    hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
                                    size_t buffer_size,
                                    hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
                                    void* client_callback_data, hsa_ven_amd_pcs_t* pc_sampling);


/**
 * @brief  Creates a PC Sampling session on @p agent. Assumes that the caller provides the
 * @p pcs_id generated by the previous call to the underlying driver that reserved PC sampling
 * on the @p agent.
 *
 * Similar to the @ref hsa_ven_amd_pcs_create with the difference that it inherits an existing
 * PC sampling session that was previously created in the underlying driver.
 *
 * Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
 * parameters must be a legal configuration value, and match the parameters that we used to create
 * the underlying PC Sampling session in the underlying driver.
 * A successfull call may restrict the list of possible PC sampling methods available to subsequent
 * calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
 * on what types of PC sampling they can perform concurrently.
 * For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
 * The session will be in a stopped/inactive state after this call
 *
 * @param[in] pcs_id ID that uniquely identifies the PC sampling session within underlying driver
 * @param[in] agent target agent
 * @param[in] method method to use
 * @param[in] units sampling units
 * @param[in] interval sampling interval in @p units
 * @param[in] latency expected latency in microseconds for client to provide a buffer for the data
 * copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
 * buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
 * how many samples are received within @p latency and call @p data_ready_callback ahead of time so
 * that the client has @p latency time to allocate the buffer before the HSA-runtime internal
 * buffers are full. The value of latency can be 0.
 * @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
 * HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
 * perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
 * @param[in] data_ready_callback client callback function that will be called when:
 *   1. There is enough samples fill a buffer with @p buffer_size  - estimated samples received
 *      within @p latency period.
 * OR
 *   2. When hsa_ven_amd_pcs_flush is called.
 * @param[in] client_callback_data client private data to be provided back when data_ready_callback
 * is called.
 * @param[out] pc_sampling PC sampling session handle used to reference this session when calling
 * hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
 *
 * @retval ::HSA_STATUS_SUCCESS session created successfully
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
 * @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
 * cannot handle the type requested.
 * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
 * @retval ::HSA_STATUS_ERROR Unexpected error
 **/
hsa_status_t hsa_ven_amd_pcs_create_from_id(
    uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
    hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
    hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
    hsa_ven_amd_pcs_t* pc_sampling);

/**
 * @brief  Free a PC Sampling session on @p agent
 *
 * Free all the resources allocated for a PC Sampling session on @p agent
 * Internal buffers for this session will be lost.
 * If the session was active, the session will be stopped before it is destroyed.
 *
 * @param[in] pc_sampling PC sampling session handle
 *
 * @retval ::HSA_STATUS_SUCCESS Session destroyed successfully
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
 * @retval ::HSA_STATUS_ERROR unexpected error
 */
hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);

/**
 * @brief  Start a PC Sampling session
 *
 * Activate a PC Sampling session that was previous created.
 * The session with be in a active state after this call
 * If the session was already active, this will result in a no-op and will return HSA_STATUS_SUCCESS
 *
 * @param[in] pc_sampling PC sampling session handle
 *
 * @retval ::HSA_STATUS_SUCCESS Session started successfully
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
 * @retval ::HSA_STATUS_ERROR unexpected error
 */
hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);

/**
 * @brief  Stop a PC Sampling session
 *
 * Stop a session that is currently active
 * After a session is stopped HSA may still have some PC Sampling data in its internal buffers.
 * The internal buffers can be drained using hsa_ven_amd_pcs_flush. If the internal
 * buffers are not drained and the session is started again, the internal buffers will be available
 * on the next data_ready_callback.
 * If the session was already inactive, this will result in a no-op and will return
 * HSA_STATUS_SUCCESS
 *
 * @param[in] pc_sampling PC sampling session handle
 *
 * @retval ::HSA_STATUS_SUCCESS Session stopped successfully
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
 */
hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);

/**
 * @brief  Flush internal buffers for a PC Sampling session
 *
 * Drain internal buffers for a PC Sampling session. If internal buffers have available data,
 * this trigger a data_ready_callback.
 *
 * The function blocks until all PC samples associated with the @p pc_sampling session
 * generated prior to the function call have been communicated by invocations of
 * @p data_ready_callback having completed execution.
 *
 * @param[in] pc_sampling PC sampling session handle
 *
 * @retval ::HSA_STATUS_SUCCESS Session flushed successfully
 * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
 */
hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);

#define hsa_ven_amd_pc_sampling_1_00

/**
 * @brief The function pointer table for the PC Sampling v1.00 extension. Can be returned by
 * ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
 */
typedef struct hsa_ven_amd_pc_sampling_1_00_pfn_t {
  hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration)(
      hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
      void* callback_data);

  hsa_status_t (*hsa_ven_amd_pcs_create)(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
                                         hsa_ven_amd_pcs_units_t units, size_t interval,
                                         size_t latency, size_t buffer_size,
                                         hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
                                         void* client_callback_data,
                                         hsa_ven_amd_pcs_t* pc_sampling);

  hsa_status_t (*hsa_ven_amd_pcs_create_from_id)(
      uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
      hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
      hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
      hsa_ven_amd_pcs_t* pc_sampling);

  hsa_status_t (*hsa_ven_amd_pcs_destroy)(hsa_ven_amd_pcs_t pc_sampling);

  hsa_status_t (*hsa_ven_amd_pcs_start)(hsa_ven_amd_pcs_t pc_sampling);

  hsa_status_t (*hsa_ven_amd_pcs_stop)(hsa_ven_amd_pcs_t pc_sampling);

  hsa_status_t (*hsa_ven_amd_pcs_flush)(hsa_ven_amd_pcs_t pc_sampling);

} hsa_ven_amd_pc_sampling_1_00_pfn_t;

#ifdef __cplusplus
}  // end extern "C" block
#endif /*__cplusplus*/

#endif /* HSA_VEN_AMD_PC_SAMPLING_H */


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_core_dump.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <unistd.h>
#include <elf.h>
#include <fcntl.h>
#include <sys/resource.h>
#include <cstring>
#include <vector>
#include <sstream>
#include <fstream>
#include <memory>
#include "core/util/utils.h"
#include "core/inc/runtime.h"
#include "./amd_hsa_code_util.hpp"
#include "core/inc/amd_core_dump.hpp"
#include "hsakmt/hsakmt.h"

constexpr char SNAPSHOT_INFO_ALIGNMENT = 0x8;
constexpr uint32_t LOAD_ALIGNMENT_SHIFT = 4;
constexpr uint32_t NOTE_ALIGNMENT_SHIFT = 2;
const std::string PREFIX_FILE_NAME = "gpucore";
constexpr size_t MAX_BUFFER_SIZE = 4 * 1024 * 1024;

namespace rocr {
namespace amd {
namespace coredump {
/* Implementation details */
namespace impl {
class PackageBuilder {
 public:
  PackageBuilder() : st_(std::stringstream::out | std::stringstream::binary) {}
  size_t Size() const { return st_.str().size(); }
  template <typename T, typename = typename std::enable_if<!std::is_pointer<T>::value>::type>
  void Write(const T& v) {
    st_.write((char*)&v, sizeof(T));
  }
  void Write(const std::vector<uint8_t>& v) { st_.write((const char*)v.data(), v.size()); }
  void Write(void* data, uint32_t size) { st_.write((const char*)data, size); }
  bool GetBuffer(void* out) {
    size_t sz = Size();

    if (!sz) return false;
    std::memcpy(out, st_.str().c_str(), sz);
    return true;
  }
  void Print(void* buf, uint64_t size) {
    int i;
    for (i = 0; i < size; i++) debug_print("%02x ", 0xFF & ((uint8_t*)buf)[i]);
    debug_print("\n");
  }
 private:
  std::stringstream st_;
};

enum SegmentType { LOAD, NOTE };
struct SegmentBuilder;

struct SegmentInfo {
  SegmentType stype;
  uint64_t vaddr = 0;
  uint64_t size = 0;
  uint32_t flags = 0;
  SegmentBuilder* builder;
};

using SegmentsInfo = std::vector<SegmentInfo>;
using rocr::amd::hsa::alignUp;
struct SegmentBuilder {
  virtual ~SegmentBuilder() = default;
  /* Find which segments needs to be created.  */
  virtual hsa_status_t Collect(SegmentsInfo& segments) = 0;
  /* Called to read a given SegmentInfo's data.  */
  virtual hsa_status_t Read(void* buf, size_t buf_size, off_t offset) = 0;
};

struct NoteSegmentBuilder : public SegmentBuilder {
  hsa_status_t Collect(SegmentsInfo& segments) override {
    void *runtime_ptr, *agents_ptr = NULL, *queues_ptr = NULL;
    uint32_t runtime_size, agents_size, queue_size, n_entries, entry_size;
    HsaVersionInfo versionInfo = {0};

    if (HSAKMT_CALL(hsaKmtDbgEnable(&runtime_ptr, &runtime_size))) {
      fprintf(stderr, "Failed to enable debug interface, "
              "debugger might be already attached.\n");
      return HSA_STATUS_ERROR;
    }
    std::unique_ptr<void, decltype(std::free) *> runtime_info(runtime_ptr, std::free);

    if (HSAKMT_CALL(hsaKmtGetVersion(&versionInfo))) {
      HSAKMT_CALL(hsaKmtDbgDisable());
      fprintf(stderr, "Failed to fetch driver ABI version.\n");
      return HSA_STATUS_ERROR;
    }
    /* Note version */
    note_package_builder_.Write<uint64_t>(1);
    /* Store version_major in PT_NOTE package */
    note_package_builder_.Write<uint32_t>(versionInfo.KernelInterfaceMajorVersion);
    /* Store version_minor in PT_NOTE package */
    note_package_builder_.Write<uint32_t>(versionInfo.KernelInterfaceMinorVersion);
    /* Store runtime_info_size in PT_NOTE package */
    note_package_builder_.Write<uint64_t>(runtime_size);

    if (HSAKMT_CALL(hsaKmtDbgGetDeviceData(&agents_ptr, &n_entries, &entry_size))) {
       HSAKMT_CALL(hsaKmtDbgDisable());
       fprintf(stderr, "Failed to fetch agents snapshot.\n");
       return HSA_STATUS_ERROR;
    }
    agents_size = n_entries * entry_size;
    std::unique_ptr<void, decltype(std::free) *> agents_info(agents_ptr, std::free);
    /* Store n_agents in PT_NOTE package */
    note_package_builder_.Write<uint32_t>(n_entries);
    /* Store agent_info_entry_size in PT_NOTE package */
    note_package_builder_.Write<uint32_t>(entry_size);

    if (HSAKMT_CALL(hsaKmtDbgGetQueueData(&queues_ptr, &n_entries, &entry_size, true))) {
       HSAKMT_CALL(hsaKmtDbgDisable());
       fprintf(stderr, "Failed to fetch queues snapshot.\n");
       return HSA_STATUS_ERROR;
    }
    queue_size = n_entries * entry_size;
    std::unique_ptr<void, decltype(std::free) *> queues_info(queues_ptr, std::free);
    /* Store n_queues in PT_NOTE package */
    note_package_builder_.Write<uint32_t>(n_entries);
    /* Store queue_info_entry_size in PT_NOTE package */
    note_package_builder_.Write<uint32_t>(entry_size);

    PushInfo(runtime_info.get(), runtime_size);
    PushInfo(agents_info.get(), agents_size);
    PushInfo(queues_info.get(), queue_size);
    if (HSAKMT_CALL(hsaKmtDbgDisable())) {
      fprintf(stderr, "Failed to disable debug interface.\n");
      return HSA_STATUS_ERROR;
    }

    /* With note content, package this in the PT_NOTE.  */
    PackageBuilder noteHeaderBuilder;
    noteHeaderBuilder.Write<uint32_t> (7);  /* namesz */
    noteHeaderBuilder.Write<uint32_t> (note_package_builder_.Size());
    noteHeaderBuilder.Write<uint32_t> (NT_AMDGPU_CORE_STATE);  /* type.  */
    noteHeaderBuilder.Write<char[8]> ("AMDGPU\0");

    raw_.resize(noteHeaderBuilder.Size() + note_package_builder_.Size());
    if (!(noteHeaderBuilder.GetBuffer(raw_.data())
          && note_package_builder_.GetBuffer(&raw_[noteHeaderBuilder.Size()]))) {
      fprintf(stderr, "Failed to build the NT_AMDGPU_CORE_STATE note.\n");
      return HSA_STATUS_ERROR;
    }

    SegmentInfo s;
    s.stype = NOTE;
    s.vaddr = 0;
    s.size = raw_.size();
    s.flags = 0;
    s.builder = this;
    segments.push_back(s);

    return HSA_STATUS_SUCCESS;
  }

  hsa_status_t Read(void* buf, size_t buf_size, off_t offset) override {
    if (offset + buf_size >raw_.size ()) return HSA_STATUS_ERROR;
    memcpy(buf, raw_.data() + offset, buf_size);
    return HSA_STATUS_SUCCESS;
  }

 private:
  PackageBuilder note_package_builder_;
  std::vector<unsigned char> raw_;

  void PushInfo(void *data, uint32_t size) {
    note_package_builder_.Write(data, size);
    size = alignUp(size, SNAPSHOT_INFO_ALIGNMENT) - size;
    for (int i = 0; i < size; i++)
      note_package_builder_.Write<uint8_t>(0);
  }
};

struct LoadSegmentBuilder : public SegmentBuilder {
  LoadSegmentBuilder() : fd_(open("/proc/self/mem", O_RDONLY)) {}

  ~LoadSegmentBuilder() {
    if (fd_ != -1) close(fd_);
  }

  hsa_status_t Collect(SegmentsInfo& segments) override {
    const std::string maps_path = "/proc/self/maps";
    std::ifstream maps(maps_path);
    if (!maps.is_open()) {
      fprintf(stderr, "Could not open '%s'", maps_path.c_str());
      return HSA_STATUS_ERROR;
    }

    std::string line;
    while (std::getline(maps, line)) {
      std::istringstream isl{ line };
      std::string address, perms, offset, dev, inode, path;
      if (!(isl >> address >> perms >> offset >> dev >> inode)) {
        fprintf(stderr, "Failed to parse '%s'", maps_path.c_str());
        return HSA_STATUS_ERROR;
      }

      std::getline(isl >> std::ws, path);

      /* Look for the /dev/dri/renderD* files.  */
      if (path.rfind("/dev/dri/renderD", 0) == 0) {
        uint64_t start, end;
        if (sscanf(address.c_str(), "%lx-%lx", &start, &end) != 2) {
          fprintf(stderr, "Failed to parse '%s'", maps_path.c_str());
          return HSA_STATUS_ERROR;
        }
        uint32_t flags = SHF_ALLOC;
        flags |= (perms.find('w', 0) != std::string::npos) ? SHF_WRITE : 0;
        flags |= (perms.find('x', 0) != std::string::npos) ? SHF_EXECINSTR : 0;
        uint64_t size = end - start;

        debug_print("LOAD 0x%lx size: %ld\n", start, size);
        SegmentInfo s;
        s.stype = LOAD;
        s.vaddr = start;
        s.size = size;
        s.flags = flags;
        s.builder = this;
        segments.push_back(s);
       }
     }
     return HSA_STATUS_SUCCESS;
  }

  hsa_status_t Read(void* buf, size_t buf_size, off_t offset) override {
    if (fd_ == -1) return HSA_STATUS_ERROR;

    size_t done = 0;
    ssize_t read;
    do {
      read = pread(fd_, static_cast<char *>(buf) + done, buf_size - done,
                   offset + done);

      if (read == -1 && errno != EINTR) {
        perror("Failed to read GPU memory");
        return HSA_STATUS_ERROR;
      }
      else if (read > 0)
        done += read;
    } while (read != 0 && done < buf_size);

    if (read == 0 && done < buf_size) {
      fprintf(stderr, "Reached unexpected EOF while reading VRAM.\n");
      return HSA_STATUS_ERROR;
    }

    return HSA_STATUS_SUCCESS;
  }

 private:
  int fd_ = -1;
};

hsa_status_t build_core_dump(const std::string& filename, const SegmentsInfo& segments, size_t size_limit) {
  std::unique_ptr<unsigned char[]> copy_buffer(new unsigned char[MAX_BUFFER_SIZE]);
  if (!segments.size()) return HSA_STATUS_SUCCESS;
  SegmentInfo front = segments.front();
  off_t offset = sizeof(Elf64_Ehdr) + segments.size() * sizeof(Elf64_Phdr);

  if (size_limit != -1 && (offset + front.size > size_limit)) {
    debug_print("Core file size over limit\n");
    return HSA_STATUS_SUCCESS;
  }
  int fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
  if (fd == -1) {
    perror("Failed to create GPU coredump");
    return HSA_STATUS_ERROR;
  }
  Elf64_Ehdr ehdr{};
  ehdr.e_ident[EI_MAG0] = ELFMAG0;
  ehdr.e_ident[EI_MAG1] = ELFMAG1;
  ehdr.e_ident[EI_MAG2] = ELFMAG2;
  ehdr.e_ident[EI_MAG3] = ELFMAG3;
  ehdr.e_ident[EI_CLASS] = ELFCLASS64;
  ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
  ehdr.e_ident[EI_VERSION] = EV_CURRENT;
  ehdr.e_ident[EI_OSABI] = ELF::ELFOSABI_AMDGPU_HSA;
  ehdr.e_ident[EI_ABIVERSION] = 0;
  ehdr.e_type = ET_CORE;
  ehdr.e_machine = ELF::EM_AMDGPU;
  ehdr.e_version = EV_CURRENT;
  ehdr.e_entry = 0;
  ehdr.e_phoff = sizeof(Elf64_Ehdr);
  ehdr.e_shoff = 0;
  ehdr.e_flags = 0;
  ehdr.e_ehsize = sizeof(Elf64_Ehdr);
  ehdr.e_phentsize = sizeof(Elf64_Phdr);
  ehdr.e_phnum = segments.size();
  ehdr.e_shentsize = 0;
  ehdr.e_shnum = 0;
  ehdr.e_shstrndx = 0;

  if (write(fd, &ehdr, sizeof(ehdr)) == -1) {
    perror("Failed to write ELF header");
    close(fd);
    return HSA_STATUS_ERROR;
  }

  /* Make sure that the underlying file has enough space for the file headers. */
  int error = posix_fallocate(fd, sizeof(Elf64_Ehdr), segments.size() * sizeof(Elf64_Phdr));
  if (error != 0) {
    fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
    close(fd);
    return HSA_STATUS_ERROR;
  }
  size_t idx = 0;
  for (SegmentInfo seg : segments) {
    Elf64_Phdr phdr{};
    phdr.p_type = [](SegmentType s) {
      switch (s) {
        case LOAD:
          return PT_LOAD;
        case NOTE:
          return PT_NOTE;
        default:
          assert(false);
          return PT_NULL;
      }
    }(seg.stype);
    phdr.p_flags = seg.flags;
    phdr.p_vaddr = seg.vaddr;
    phdr.p_paddr = 0;
    phdr.p_memsz = seg.size;
    phdr.p_filesz = seg.size;
    phdr.p_align = [](SegmentType s) {
      switch (s) {
        case LOAD:
          return LOAD_ALIGNMENT_SHIFT;
        case NOTE:
          return NOTE_ALIGNMENT_SHIFT;
        default:
          assert(false);
          return (uint32_t)0;
      }
    }(seg.stype);
    if (size_limit != -1 && (offset + seg.size > size_limit)) {
      printf("Core limit file reached. GPU core dump created: %s\n", filename.c_str());
      close(fd);
      return HSA_STATUS_SUCCESS;
    }
    phdr.p_offset = alignUp(offset, (uint64_t)1 << phdr.p_align);
    if (pwrite(fd, &phdr, sizeof(phdr), sizeof(Elf64_Ehdr) + idx * sizeof(Elf64_Phdr)) == -1) {
      perror("Failed to write ELF header");
      close(fd);
      return HSA_STATUS_ERROR;
    }
    /* Allocate stace for the segment on the file, and write the segment
       content.  */
    error = posix_fallocate(fd, phdr.p_offset, phdr.p_filesz);
    if (error != 0) {
      fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
      close(fd);
      return HSA_STATUS_ERROR;
    }
    size_t remaining = phdr.p_filesz;
    while (remaining > 0) {
      size_t curr_chunk = std::min(remaining, MAX_BUFFER_SIZE);
      try {
        hsa_status_t st = seg.builder->Read(copy_buffer.get(), curr_chunk,
                                                    phdr.p_vaddr + phdr.p_filesz - remaining);
        if (st != HSA_STATUS_SUCCESS) {
          close(fd);
          return st;
        }
        if (pwrite(fd, copy_buffer.get(), curr_chunk, phdr.p_offset + phdr.p_filesz - remaining) ==
            -1) {
          perror("Failed to white core dump");
          close(fd);
          return HSA_STATUS_ERROR;
        }
      } catch (...) {
        close(fd);
        return HSA_STATUS_ERROR;
      }
      remaining -= curr_chunk;
    }
    offset += phdr.p_filesz;
    idx++;
  }
  printf("GPU core dump created: %s\n", filename.c_str());
  close(fd);
  return HSA_STATUS_SUCCESS;
}
}   //  namespace impl

hsa_status_t dump_gpu_core() {
  impl::NoteSegmentBuilder nbuilder;
  impl::LoadSegmentBuilder lbuilder;
  impl::SegmentsInfo segments;

  struct rlimit rlimit;

  if (getrlimit(RLIMIT_CORE, &rlimit)) {
    perror("Could not get core file size\n");
    return HSA_STATUS_ERROR;
  }
  debug_print("core file size: %ld\n", rlimit.rlim_cur);

  if (rlimit.rlim_cur == 0)
    return HSA_STATUS_SUCCESS;

  hsa_status_t status = nbuilder.Collect(segments);
  if (status != HSA_STATUS_SUCCESS) return status;

  status = lbuilder.Collect(segments);
  if (status != HSA_STATUS_SUCCESS) return status;

  std::stringstream st;
  st << PREFIX_FILE_NAME << "." << getpid();
  return build_core_dump(st.str(), segments, rlimit.rlim_cur);
}
}   //  namespace coredump
}   //  namespace amd
}   //  namespace rocr


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_elf_image.hpp"
#include "amd_hsa_code_util.hpp"
#include <gelf.h>
#include <errno.h>
#include <cstring>
#include <cerrno>
#include <fstream>
#include <memory>
#include <cassert>
#include <cstdlib>
#include <algorithm>
#ifdef _WIN32
#include <Windows.h>
#define alignof __alignof
#endif // _WIN32
#include <libelf.h>

#ifndef _WIN32
#define _open open
#define _close close
#define _tempnam tempnam
#include <fcntl.h>
#include <unistd.h>
#endif

#if defined(USE_MEMFILE)

#include "memfile.h"
#define OpenTemp(f)           mem_open(NULL, 0, 0)
#define CloseTemp(f)          mem_close(f)
#define _read(f, b, l)        mem_read((f), (b), (l))
#define _write(f, b, l)       mem_write((f), (b), (l))
#define _lseek(f, l, w)       mem_lseek((f), (l), (w))
#define _ftruncate(f, l)      mem_ftruncate((f), (size_t)(l))
#define sendfile(o, i, p, s)  mem_sendfile((o), (i), (p), (s))

#else // USE_MEMFILE

#define OpenTemp(f) amd::hsa::OpenTempFile(f);
#define CloseTemp(f) amd::hsa::CloseTempFile(f);

#ifndef _WIN32
#define _read read
#define _write write
#define _lseek lseek
#define _ftruncate ftruncate
#include <sys/sendfile.h>
#else
#define _ftruncate _chsize
#endif // !_WIN32

#endif // !USE_MEMFILE

#if !defined(BSD_LIBELF)
  #define elf_setshstrndx elfx_update_shstrndx
#endif

#define NOTE_RECORD_ALIGNMENT 4

using rocr::amd::hsa::alignUp;

namespace rocr {
namespace amd {
namespace elf {

    class FileImage {
    public:
      FileImage();
      ~FileImage();
      bool create();
      bool readFrom(const std::string& filename);
      bool copyFrom(const void* data, size_t size);
      bool writeTo(const std::string& filename);
      bool copyTo(void** buffer, size_t* size = 0);
      bool copyTo(void* buffer, size_t size);
      size_t getSize();

      std::string output() { return out.str(); }

      int fd() { return d; }

    private:
      int d;
      std::ostringstream out;

      bool error(const char* msg);
      bool perror(const char *msg);
      std::string werror();
    };

    FileImage::FileImage()
      : d(-1)
    {
    }

    FileImage::~FileImage()
    {
      if (d != -1) { CloseTemp(d); }
    }

    bool FileImage::error(const char* msg)
    {
      out << "Error: " << msg << std::endl;
      return false;
    }

    bool FileImage::perror(const char* msg)
    {
      out << "Error: " << msg << ": " << strerror(errno) << std::endl;
      return false;
    }

#ifdef _WIN32
    std::string FileImage::werror()
    {
      LPVOID lpMsgBuf;
      DWORD dw = GetLastError();

      FormatMessage(
        FORMAT_MESSAGE_ALLOCATE_BUFFER |
        FORMAT_MESSAGE_FROM_SYSTEM |
        FORMAT_MESSAGE_IGNORE_INSERTS,
        NULL,
        dw,
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
        (LPTSTR)&lpMsgBuf,
        0, NULL);
      std::string result((LPTSTR)lpMsgBuf);
      LocalFree(lpMsgBuf);
      return result;
    }
#endif // _WIN32

    bool FileImage::create()
    {
      d = OpenTemp("amdelf");
      if (d == -1) { return error("Failed to open temporary file for elf image"); }
      return true;
    }

    bool FileImage::readFrom(const std::string& filename)
    {
#ifdef _WIN32
      std::unique_ptr<char> buffer(new char[32 * 1024 * 1024]);
      HANDLE in = CreateFile(filename.c_str(), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
      if (in == INVALID_HANDLE_VALUE) { out << "Failed to open " << filename << ": " << werror() << std::endl; return false; }
      DWORD read;
      unsigned write;
      int written;
      do {
        if (!ReadFile(in, buffer.get(), sizeof(buffer), &read, NULL)) {
          out << "Failed to read " << filename << ": " << werror() << std::endl;
          CloseHandle(in);
          return false;
        }
        if (read > 0) {
          write = read;
          do {
            written = _write(d, buffer.get(), write);
            if (written < 0) {
              out << "Failed to write image file: " << werror() << std::endl;
              CloseHandle(in);
            }
            write -= written;
          } while (write > 0);
        }
      } while (read > 0);
      if (_lseek(d, 0L, SEEK_SET) < 0) { return perror("lseek(0) failed"); }
      CloseHandle(in);
      return true;
#else // _WIN32
      int in = _open(filename.c_str(), O_RDONLY);
      if (in < 0) { return perror("open failed"); }
      if (_lseek(in, 0L, SEEK_END) < 0) { 
        _close(in);
        return perror("lseek failed"); 
      }
      off_t size;
      if ((size = _lseek(in, 0L, SEEK_CUR)) < 0) { 
        _close(in);
        return perror("lseek(2) failed"); 
      }
      if (_lseek(in, 0L, SEEK_SET) < 0) { 
        _close(in);
        return perror("lseek(3) failed"); 
      }
      if (_lseek(d, 0L, SEEK_SET) < 0) { return perror("lseek(3) failed"); }
      ssize_t written;
      do {
        written = sendfile(d, in, NULL, size);
        if (written < 0) {
          _close(in);
          return perror("sendfile failed");
        }
        size -= written;
      } while (size > 0);
      _close(in);
      if (_lseek(d, 0L, SEEK_SET) < 0) { return perror("lseek(0) failed"); }
      return true;
#endif // _WIN32
    }

    bool FileImage::copyFrom(const void* data, size_t size)
    {
      assert(d != -1);
      if (_lseek(d, 0L, SEEK_SET) < 0) { return perror("lseek failed"); }
      if (_ftruncate(d, 0) < 0) { return perror("ftruncate failed"); }
      int written, offset = 0;
      while (size > 0) {
        written = _write(d, (const char*) data + offset, size);
        if (written < 0) {
          return perror("write failed");
        }
        size -= written;
        offset += written;
      }
      if (_lseek(d, 0L, SEEK_SET) < 0) { return perror("lseek failed"); }
      return true;
    }

    size_t FileImage::getSize()
    {
      assert(d != -1);
      if (_lseek(d, 0L, SEEK_END) < 0) { return perror("lseek failed"); }
      long seek = 0;
      if ((seek = _lseek(d, 0L, SEEK_CUR)) < 0) { return perror("lseek(2) failed"); }
      if (_lseek(d, 0L, SEEK_SET) < 0) { return perror("lseek(3) failed"); }
      return seek;
    }

    bool FileImage::copyTo(void** buffer, size_t* size)
    {
      size_t size1 = getSize();
      void* buffer1 = malloc(size1);
      if (_read(d, buffer1, size1) < 0) { free(buffer1); return perror("read failed"); }
      *buffer = buffer1;
      if (size) { *size = size1; }
      return true;
    }

    bool FileImage::copyTo(void* buffer, size_t size)
    {
      size_t size1 = getSize();
      if (size < size1) { return error("Buffer size is not enough"); }
      if (_read(d, buffer, size1) < 0) { return perror("read failed"); }
      return true;
    }

    bool FileImage::writeTo(const std::string& filename)
    {
      bool res = false;
      size_t size = 0;
      void *buffer = nullptr;
      if (copyTo(&buffer, &size)) {
        res = true;
        std::ofstream out(filename.c_str(), std::ios::binary);
        out.write((char*)buffer, size);
      }
      free(buffer);
      return res;
    }

    class Buffer {
    public:
      typedef unsigned char byte_type;
      typedef size_t size_type;

      Buffer();
      Buffer(const byte_type *src, size_type size, size_type align = 0);
      virtual ~Buffer();

      const byte_type* raw() const
        { return this->isConst() ? ptr_ : data_.data(); }
      size_type align() const
        { return align_; }
      size_type size() const
        { return this->isConst() ? size_ : data_.size(); }
      bool isConst() const
        { return 0 != size_; }
      bool isEmpty()
        { return size() == 0; }
      bool hasRaw(const byte_type *src) const
        { return (src >= this->raw()) && (src < this->raw() + this->size()); }
      template<typename T>
      bool has(const T *src) const
        { return this->hasRaw((const byte_type*)src); }
      bool has(size_type offset) const
        { return offset < this->size(); }

      template<typename T>
      size_type getOffset(const T *src) const
        { return this->getRawOffset((const byte_type*)src); }
      template<typename T>
      T get(size_type offset) const
        { return (T)this->getRaw(offset); }
      size_type addString(const std::string &str, size_type align = 0);
      size_type addStringLength(const std::string &str, size_type align = 0);
      size_type nextOffset(size_type align) const { return alignUp(this->size(), align); }
      template<typename T>
      size_type add(const T *src, size_type size, size_type align)
        { return this->addRaw((const byte_type*)src, size, align); }
      template<typename T>
      size_type add(const T &src, size_type align = 0)
        { return this->addRaw((const byte_type*)&src, sizeof(T), align == 0 ? alignof(T) : align); }
      size_type align(size_type align);

      template<typename T>
      size_type reserve()
      {
        Buffer::size_type offset = this->align(alignof(T));
        data_.insert(data_.end(), sizeof(T), 0x0);
        return offset;
      }

    private:
      size_type getRawOffset(const byte_type *src) const;
      const byte_type* getRaw(size_type offset) const;
      size_type addRaw(const byte_type *src, size_type size, size_type align);

      std::vector<byte_type> data_;
      const byte_type *ptr_;
      size_type size_;
      size_type align_;
    };

    Buffer::Buffer()
      : ptr_(nullptr)
      , size_(0)
      , align_(0)
    {
    }

    Buffer::Buffer(const Buffer::byte_type *src, Buffer::size_type size, Buffer::size_type align)
      : ptr_(src)
      , size_(size)
      , align_(align)
    {
    }

    Buffer::~Buffer()
    {
    }

    Buffer::size_type Buffer::getRawOffset(const Buffer::byte_type *src) const
    {
      assert(this->has(src));
      return src - this->raw();
    }

    const Buffer::byte_type* Buffer::getRaw(Buffer::size_type offset) const
    {
      assert(this->has(offset));
      return this->raw() + offset;
    }

    Buffer::size_type Buffer::addRaw(const Buffer::byte_type *src, Buffer::size_type size, Buffer::size_type align)
    {
      assert(!this->isConst());
      assert(nullptr != src);
      assert(0 != size);
      assert(0 != align);
      Buffer::size_type offset = this->align(align);
      data_.insert(data_.end(), src, src + size);
      return offset;
    }

    Buffer::size_type Buffer::addString(const std::string &str, size_type align)
    {
      return this->add(str.c_str(), str.length() + 1, align == 0 ? alignof(char) : align);
    }

    Buffer::size_type Buffer::addStringLength(const std::string &str, size_type align)
    {
      return this->add((uint32_t)(str.length() + 1), align == 0 ? alignof(uint32_t) : align);
    }

    Buffer::size_type Buffer::align(Buffer::size_type align)
    {
      assert(!this->isConst());
      assert(0 != align);
      Buffer::size_type offset = alignUp(this->size(), align);
      align_ = (std::max)(align_, align);
      data_.insert(data_.end(), offset - this->size(), 0x0);
      return offset;
    }

    class GElfImage;
    class GElfSegment;

    class GElfSection : public virtual Section {
    public:
      GElfSection(GElfImage* elf);

      bool push(const char* name, uint32_t shtype, uint64_t shflags, uint16_t shlink, uint32_t info, uint32_t align, uint64_t entsize = 0);
      bool pull0();
      bool pull(uint16_t ndx);
      virtual bool pullData() { return true; }
      bool push();
      uint16_t getSectionIndex() const override;
      uint32_t type() const override { return hdr.sh_type; }
      std::string Name() const override;
      uint64_t offset() const override { return hdr.sh_offset; }
      uint64_t addr() const override { return hdr.sh_addr; }
      bool updateAddr(uint64_t addr) override;
      uint64_t addralign() const override { return data0.size() == 0 ? data.align() : data0.align(); }
      uint64_t flags() const override { return hdr.sh_flags; }
      uint64_t size() const override { return data0.size() == 0 ? data.size() : data0.size(); }
      uint64_t nextDataOffset(uint64_t align) const override;
      uint64_t addData(const void *src, uint64_t size, uint64_t align) override;
      bool getData(uint64_t offset, void* dest, uint64_t size) override;
      bool hasRelocationSection() const override { return reloc_sec != 0; }
      RelocationSection* relocationSection(SymbolTable* symtab = 0) override;
      Segment* segment() override { return seg; }
      RelocationSection* asRelocationSection() override { return 0; }
      bool setMemSize(uint64_t s) override { memsize_ = s; return true; }
      uint64_t memSize() const override { return memsize_ ? memsize_ : size(); }
      bool setAlign(uint64_t a) override { align_ = a; return true; }
      uint64_t memAlign() const override { return align_ ? align_ : addralign(); }

    protected:
      GElfImage* elf;
      Segment* seg;
      GElf_Shdr hdr;
      Buffer data0, data;
      uint64_t memsize_;
      uint64_t align_;
      RelocationSection *reloc_sec;

      size_t ndxscn;

      friend class GElfSymbol;
      friend class GElfSegment;
      friend class GElfImage;
    };

    class GElfSegment : public Segment {
    public:
      GElfSegment(GElfImage* elf, uint16_t index);
      GElfSegment(GElfImage* elf, uint16_t index, uint32_t type, uint32_t flags, uint64_t paddr = 0);
      bool push(uint64_t vaddr);
      bool pull();
      uint64_t type() const override { return phdr.p_type; }
      uint64_t memSize() const override { return phdr.p_memsz; }
      uint64_t align() const override { return phdr.p_align; }
      uint64_t imageSize() const override { return phdr.p_filesz; }
      uint64_t vaddr() const override { return phdr.p_vaddr; }
      uint64_t flags() const override { return phdr.p_flags; }
      uint64_t offset() const override { return phdr.p_offset; }
      const char* data() const override;
      uint16_t getSegmentIndex() override;
      bool updateAddSection(Section *section) override;

    private:
      GElfImage* elf;
      uint16_t index;
      GElf_Phdr phdr;
      std::vector<Section*> sections;
    };

    class GElfStringTable : public GElfSection, public StringTable {
    public:
      GElfStringTable(GElfImage* elf);
      bool push(const char* name, uint32_t shtype, uint64_t shflags);
      bool pullData() override;
      const char* addString(const std::string& s) override;
      size_t addString1(const std::string& s) override;
      const char* getString(size_t ndx) override;
      size_t getStringIndex(const char* name) override;

      uint16_t getSectionIndex() const override { return GElfSection::getSectionIndex(); }
      uint32_t type() const override { return GElfSection::type(); }
      std::string Name() const override { return GElfSection::Name(); }
      uint64_t addr() const override { return GElfSection::addr(); }
      uint64_t offset() const override { return GElfSection::offset(); }
      bool updateAddr(uint64_t addr) override { return GElfSection::updateAddr(addr); }
      uint64_t addralign() const override { return GElfSection::addralign(); }
      uint64_t flags() const override { return GElfSection::flags(); }
      uint64_t size() const override { return GElfSection::size(); }
      Segment* segment() override { return GElfSection::segment(); }
      uint64_t nextDataOffset(uint64_t align) const override { return GElfSection::nextDataOffset(align); }
      uint64_t addData(const void *src, uint64_t size, uint64_t align) override { return GElfSection::addData(src, size, align); }
      bool getData(uint64_t offset, void* dest, uint64_t size) override { return GElfSection::getData(offset, dest, size); }
      bool hasRelocationSection() const override { return GElfSection::hasRelocationSection(); }
      RelocationSection* relocationSection(SymbolTable* symtab) override { return GElfSection::relocationSection(); }
      RelocationSection* asRelocationSection() override { return 0; }
      uint64_t memSize() const override { return GElfSection::memSize(); }
      bool setMemSize(uint64_t s) override { return GElfSection::setMemSize(s); }
      uint64_t memAlign() const override { return GElfSection::memAlign(); }
      bool setAlign(uint64_t a) override { return GElfSection::setAlign(a); }
    };

    class GElfSymbolTable;

    class GElfSymbol : public Symbol {
    public:
      GElfSymbol(GElfSymbolTable* symtab, Buffer &data, size_t index);

      bool push(const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, uint16_t shndx, unsigned char other);

      uint32_t index() override { return eindex / sizeof(GElf_Rela); }
      uint32_t type() override { return GELF_ST_TYPE(Sym()->st_info); }
      uint32_t binding() override { return GELF_ST_BIND(Sym()->st_info); }
      uint64_t size() override { return Sym()->st_size; }
      uint64_t value() override { return Sym()->st_value; }
      unsigned char other() override { return Sym()->st_other; }
      std::string name() override;
      Section* section() override;

      void setValue(uint64_t value) override { Sym()->st_value = value; }
      void setSize(uint64_t size) override { Sym()->st_size = size; }

    private:
      GElf_Sym* Sym() { return edata.get<GElf_Sym*>(eindex); }
      GElfSymbolTable* symtab;
      Buffer &edata;
      size_t eindex;
      friend class GElfSymbolTable;
    };

    class GElfSymbolTable : public GElfSection, public SymbolTable {
    private:
      Symbol* addSymbolInternal(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other = 0);

      GElfStringTable* strtab;
      std::vector<std::unique_ptr<GElfSymbol>> symbols;
      friend class GElfSymbol;

    public:
      GElfSymbolTable(GElfImage* elf);
      bool push(const char* name, GElfStringTable* strtab);
      bool pullData() override;
      uint16_t getSectionIndex() const override { return GElfSection::getSectionIndex(); }
      uint32_t type() const override { return GElfSection::type(); }
      std::string Name() const override { return GElfSection::Name(); }
      uint64_t offset() const override { return GElfSection::offset(); }
      uint64_t addr() const override { return GElfSection::addr(); }
      bool updateAddr(uint64_t addr) override { return GElfSection::updateAddr(addr); }
      uint64_t addralign() const override { return GElfSection::addralign(); }
      uint64_t flags() const override { return GElfSection::flags(); }
      uint64_t size() const override { return GElfSection::size(); }
      Segment* segment() override { return GElfSection::segment(); }
      uint64_t nextDataOffset(uint64_t align) const override { return GElfSection::nextDataOffset(align); }
      uint64_t addData(const void *src, uint64_t size, uint64_t align) override { return GElfSection::addData(src, size, align); }
      bool getData(uint64_t offset, void* dest, uint64_t size) override { return GElfSection::getData(offset, dest, size); }
      bool hasRelocationSection() const override { return GElfSection::hasRelocationSection(); }
      RelocationSection* relocationSection(SymbolTable* symtab) override { return GElfSection::relocationSection(); }
      Symbol* addSymbol(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other = 0) override;
      size_t symbolCount() override;
      Symbol* symbol(size_t i) override;
      RelocationSection* asRelocationSection() override { return 0; }
      uint64_t memSize() const override { return GElfSection::memSize(); }
      bool setMemSize(uint64_t s) override { return GElfSection::setMemSize(s); }
      uint64_t memAlign() const override { return GElfSection::memAlign(); }
      bool setAlign(uint64_t a) override { return GElfSection::setAlign(a); }
    };

    class GElfNoteSection : public GElfSection, public NoteSection {
    public:
      GElfNoteSection(GElfImage* elf);
      bool push(const std::string& name);
      uint16_t getSectionIndex() const override { return GElfSection::getSectionIndex(); }
      uint32_t type() const override { return GElfSection::type(); }
      std::string Name() const override { return GElfSection::Name(); }
      uint64_t addr() const override { return GElfSection::addr(); }
      bool updateAddr(uint64_t addr) override { return GElfSection::updateAddr(addr); }
      uint64_t offset() const override { return GElfSection::offset(); }
      uint64_t addralign() const override { return GElfSection::addralign(); }
      uint64_t flags() const override { return GElfSection::flags(); }
      uint64_t size() const override { return GElfSection::size(); }
      Segment* segment() override { return GElfSection::segment(); }
      uint64_t nextDataOffset(uint64_t align) const override { return GElfSection::nextDataOffset(align); }
      uint64_t addData(const void *src, uint64_t size, uint64_t align) override { return GElfSection::addData(src, size, align); }
      bool getData(uint64_t offset, void* dest, uint64_t size) override { return GElfSection::getData(offset, dest, size); }
      bool hasRelocationSection() const override { return GElfSection::hasRelocationSection(); }
      RelocationSection* relocationSection(SymbolTable* symtab) override { return GElfSection::relocationSection(); }
      bool addNote(const std::string& name, uint32_t type, const void* desc, uint32_t desc_size) override;
      bool getNote(const std::string& name, uint32_t type, void** desc, uint32_t* desc_size) override;
      RelocationSection* asRelocationSection() override { return 0; }
      uint64_t memSize() const override { return GElfSection::memSize(); }
      bool setMemSize(uint64_t s) override { return GElfSection::setMemSize(s); }
      uint64_t memAlign() const override { return GElfSection::memAlign(); }
      bool setAlign(uint64_t a) override { return GElfSection::setAlign(a); }
    };

    class GElfRelocationSection;

    class GElfRelocation : public Relocation {
    private:
      GElf_Rela *Rela() { return edata.get<GElf_Rela*>(eindex); }

      GElfRelocationSection* rsection;
      Buffer &edata;
      size_t eindex;

    public:
      GElfRelocation(GElfRelocationSection* rsection_, Buffer &edata_, size_t eindex_)
        : rsection(rsection_),
          edata(edata_), eindex(eindex_)
      {
      }

      bool push(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend);

      RelocationSection* section() override;
      uint32_t type() override { return GELF_R_TYPE(Rela()->r_info); }
      uint32_t symbolIndex() override { return GELF_R_SYM(Rela()->r_info); }
      Symbol* symbol() override;
      uint64_t offset() override { return Rela()->r_offset; }
      int64_t addend() override { return Rela()->r_addend; }
    };

    class GElfRelocationSection : public GElfSection, public RelocationSection {
    private:
      Section* section;
      GElfSymbolTable* symtab;
      std::vector<std::unique_ptr<GElfRelocation>> relocations;

    public:
      GElfRelocationSection(GElfImage* elf, Section* targetSection = 0, GElfSymbolTable* symtab_ = 0);
      bool push(const std::string& name);
      bool pullData() override;
      uint16_t getSectionIndex() const override { return GElfSection::getSectionIndex(); }
      uint32_t type() const override { return GElfSection::type(); }
      std::string Name() const override { return GElfSection::Name(); }
      uint64_t addr() const override { return GElfSection::addr(); }
      uint64_t offset() const override { return GElfSection::offset(); }
      bool updateAddr(uint64_t addr) override { return GElfSection::updateAddr(addr); }
      uint64_t addralign() const override { return GElfSection::addralign(); }
      uint64_t flags() const override { return GElfSection::flags(); }
      uint64_t size() const override { return GElfSection::size(); }
      Segment* segment() override { return GElfSection::segment(); }
      uint64_t nextDataOffset(uint64_t align) const override { return GElfSection::nextDataOffset(align); }
      uint64_t addData(const void *src, uint64_t size, uint64_t align) override { return GElfSection::addData(src, size, align); }
      bool getData(uint64_t offset, void* dest, uint64_t size) override { return GElfSection::getData(offset, dest, size); }
      bool hasRelocationSection() const override { return GElfSection::hasRelocationSection(); }
      RelocationSection* relocationSection(SymbolTable* symtab) override { return GElfSection::relocationSection(); }
      RelocationSection* asRelocationSection() override { return this; }

      size_t relocationCount() const override { return relocations.size(); }
      Relocation* relocation(size_t i) override { return relocations[i].get(); }
      Relocation* addRelocation(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend) override;
      Section* targetSection() override { return section; }
      uint64_t memSize() const override { return GElfSection::memSize(); }
      bool setMemSize(uint64_t s) override { return GElfSection::setMemSize(s); }
      uint64_t memAlign() const override { return GElfSection::memAlign(); }
      bool setAlign(uint64_t a) override { return GElfSection::setAlign(a); }
      friend class GElfRelocation;
    };

    class GElfImage : public Image {
    public:
      GElfImage(int elfclass);
      ~GElfImage();
      bool initNew(uint16_t machine, uint16_t type, uint8_t os_abi = 0, uint8_t abi_version = 0, uint32_t e_flags = 0) override;
      bool loadFromFile(const std::string& filename) override;
      bool saveToFile(const std::string& filename) override;
      bool initFromBuffer(const void* buffer, size_t size) override;
      bool initAsBuffer(const void* buffer, size_t size) override;
      bool close();
      bool writeTo(const std::string& filename) override;
      bool copyToBuffer(void** buf, size_t* size = 0) override;
      bool copyToBuffer(void* buf, size_t size) override;

      const char* data() override { assert(buffer); return buffer; }
      uint64_t size() override;

      bool push();

      bool Freeze() override;
      bool Validate() override;

      uint16_t Machine() override { return ehdr.e_machine; }
      uint16_t Type() override { return ehdr.e_type; }
      uint32_t EFlags() override { return ehdr.e_flags; }
      uint32_t ABIVersion() override { return (uint32_t)(ehdr.e_ident[EI_ABIVERSION]); }
      uint32_t EClass() override { return (uint32_t)(ehdr.e_ident[EI_CLASS]); }
      uint32_t OsAbi() override { return (uint32_t)(ehdr.e_ident[EI_OSABI]); }

      GElfStringTable* shstrtab() override;
      GElfStringTable* strtab() override;
      GElfSymbolTable* getReferencedSymbolTable(uint16_t index)
      {
        return static_cast<GElfSymbolTable*>(section(index));
      }
      GElfSymbolTable* getSymtab(uint16_t index) override
      {
        if (section(index)->type() == SHT_SYMTAB)
          return static_cast<GElfSymbolTable*>(section(index));
        return nullptr;
      }
      GElfSymbolTable* getDynsym(uint16_t index) override
      {
        if (section(index)->type() == SHT_DYNSYM)
          return static_cast<GElfSymbolTable*>(section(index));
        return nullptr;
      }

      GElfSymbolTable* getSymbolTable() override;
      GElfSymbolTable* getSymbolTable(uint16_t index) override
      {
        const char *UseDynsym = getenv("LOADER_USE_DYNSYM");
        if (UseDynsym && std::strncmp(UseDynsym, "0", 1) != 0)
          return getDynsym(index);
        return getSymtab(index);
      }

      GElfStringTable* addStringTable(const std::string& name) override;
      GElfStringTable* getStringTable(uint16_t index) override;

      GElfSymbolTable* addSymbolTable(const std::string& name, StringTable* stab = 0) override;
      GElfSymbolTable* symtab() override;
      GElfSymbolTable* dynsym() override;

      GElfSegment* segment(size_t i) override { return segments[i].get(); }
      Segment* segmentByVAddr(uint64_t vaddr) override;
      size_t sectionCount() override { return sections.size(); }
      GElfSection* section(size_t i) override { return sections[i].get(); }
      Section* sectionByVAddr(uint64_t vaddr) override;
      uint16_t machine() const;
      uint16_t etype() const;
      int eclass() const { return elfclass; }
      bool elfError(const char* msg);

      GElfNoteSection* note() override;
      GElfNoteSection* addNoteSection(const std::string& name) override;

      size_t segmentCount() override { return segments.size(); }
      Segment* initSegment(uint32_t type, uint32_t flags, uint64_t paddr = 0) override;
      bool addSegments() override;

      Section* addSection(const std::string &name,
                          uint32_t type,
                          uint64_t flags = 0,
                          uint64_t entsize = 0,
                          Segment* segment = 0) override;

      RelocationSection* addRelocationSection(Section* sec, SymbolTable* symtab);
      RelocationSection* relocationSection(Section* sec, SymbolTable* symtab = 0) override;

    private:
      bool frozen;
      int elfclass;
      FileImage img;
      const char* buffer;
      size_t bufferSize;
      Elf* e;
      GElf_Ehdr ehdr;
      GElfStringTable* shstrtabSection;
      GElfStringTable* strtabSection;
      GElfSymbolTable* symtabSection;
      GElfSymbolTable* dynsymSection;
      GElfNoteSection* noteSection;
      std::vector<std::unique_ptr<GElfSegment>> segments;
      std::vector<std::unique_ptr<GElfSection>> sections;

      bool imgError();
      const char *elfError();
      bool elfBegin(Elf_Cmd cmd);
      bool elfEnd();
      bool push0();
      bool pullElf();

      friend class GElfSection;
      friend class GElfSymbolTable;
      friend class GElfNoteSection;
      friend class GElfRelocationSection;
      friend class GElfSegment;
      friend class GElfSymbol;
    };

    GElfSegment::GElfSegment(GElfImage* elf_, uint16_t index_)
      : elf(elf_),
        index(index_)
    {
      memset(&phdr, 0, sizeof(phdr));
    }

    GElfSegment::GElfSegment(GElfImage* elf_, uint16_t index_,
      uint32_t type, uint32_t flags, uint64_t paddr)
      : elf(elf_),
        index(index_)
    {
      memset(&phdr, 0, sizeof(phdr));
      phdr.p_type = type;
      phdr.p_flags = flags;
      phdr.p_paddr = paddr;
    }

    const char* GElfSegment::data() const
    {
      return (const char*) elf->data() + phdr.p_offset;
    }

    bool GElfImage::Freeze()
    {
      assert(!frozen);
      if (!push()) { return false; }
      frozen = true;
      return true;
    }

    bool GElfImage::Validate()
    {
      if (ELFMAG0 != ehdr.e_ident[EI_MAG0] ||
          ELFMAG1 != ehdr.e_ident[EI_MAG1] ||
          ELFMAG2 != ehdr.e_ident[EI_MAG2] ||
          ELFMAG3 != ehdr.e_ident[EI_MAG3]) {
        out << "Invalid ELF magic" << std::endl;
        return false;
      }
      if (EV_CURRENT != ehdr.e_version) {
        out << "Invalid ELF version" << std::endl;
        return false;
      }
      return true;
    }

    bool GElfSegment::push(uint64_t vaddr)
    {
      phdr.p_align = 0;
      phdr.p_offset = 0;
      if (!sections.empty()) {
        phdr.p_offset = sections[0]->offset();
      }
      for (Section* section : sections) {
        phdr.p_align = (std::max)(phdr.p_align, section->memAlign());
      }
      phdr.p_vaddr = alignUp(vaddr, (std::max)(phdr.p_align, (uint64_t) 1));
      phdr.p_filesz = 0;
      phdr.p_memsz = 0;
      for (Section* section : sections) {
        phdr.p_memsz = alignUp(phdr.p_memsz, (std::max)(section->memAlign(), (uint64_t) 1));
        phdr.p_filesz = alignUp(phdr.p_filesz, (std::max)(section->memAlign(), (uint64_t) 1));
        if (!section->updateAddr(phdr.p_vaddr + phdr.p_memsz)) { return false; }
        phdr.p_filesz += (section->type() == SHT_NOBITS) ? 0 : section->size();
        phdr.p_memsz += section->memSize();
      }
      if (!gelf_update_phdr(elf->e, index, &phdr)) { return elf->elfError("gelf_update_phdr failed"); }
      return true;
    }

    bool GElfSegment::pull()
    {
      if (!gelf_getphdr(elf->e, index, &phdr)) { return elf->elfError("gelf_getphdr failed"); }
      return true;
    }

    uint16_t GElfSegment::getSegmentIndex()
    {
      return index;
    }

    bool GElfSegment::updateAddSection(Section *section)
    {
      sections.push_back(section);
      return true;
    }

    GElfSection::GElfSection(GElfImage* elf_)
      : elf(elf_),
        seg(nullptr),
        hdr{},
        memsize_(0),
        align_(0),
        reloc_sec(nullptr),
        ndxscn(0)
    {
    }

    uint16_t GElfSection::getSectionIndex() const
    {
      return (uint16_t)ndxscn;
    }

    std::string GElfSection::Name() const
    {
      return std::string(elf->shstrtab()->getString(hdr.sh_name));
    }

    bool GElfSection::updateAddr(uint64_t addr)
    {
      Elf_Scn *scn = elf_getscn(elf->e, ndxscn);
      assert(scn);
      if (!gelf_getshdr(scn, &hdr)) { return elf->elfError("gelf_get_shdr failed"); }
      hdr.sh_addr = addr;
      if (!gelf_update_shdr(scn, &hdr)) { return elf->elfError("gelf_update_shdr failed"); }
      return true;
    }

    bool GElfSection::push(const char* name, uint32_t shtype, uint64_t shflags, uint16_t shlink, uint32_t info, uint32_t align, uint64_t entsize)
    {
      Elf_Scn *scn = elf_newscn(elf->e);
      if (!scn) { return false; }
      ndxscn = elf_ndxscn(scn);
      if (!gelf_getshdr(scn, &hdr)) { return elf->elfError("gelf_get_shdr failed"); }
      align = (std::max)(align, (uint32_t) 8);
      hdr.sh_name = elf->shstrtab()->addString1(name);
      hdr.sh_type = shtype;
      hdr.sh_flags = shflags;
      hdr.sh_link = shlink;
      hdr.sh_addr = 0;
      hdr.sh_info = info;
      hdr.sh_addralign = align;
      hdr.sh_entsize = entsize;
      if (!gelf_update_shdr(scn, &hdr)) { return elf->elfError("gelf_update_shdr failed"); }
      return true;
    }

    bool GElfSection::pull0()
    {
      Elf_Scn *scn = elf_getscn(elf->e, ndxscn);
      if (!scn) { return false; }
      if (!gelf_getshdr(scn, &hdr)) { return elf->elfError("gelf_get_shdr failed"); }
      return true;
    }

    bool GElfSection::pull(uint16_t ndx)
    {
      ndxscn = (size_t) ndx;
      if (!pull0()) { return false; }
      Elf_Scn *scn = elf_getscn(elf->e, ndx);
      if (!scn) { return false; }
      Elf_Data *edata0 = elf_getdata(scn, NULL);
      if (edata0) {
        data0 = Buffer((const Buffer::byte_type*)edata0->d_buf, edata0->d_size, edata0->d_align);
      }
      seg = elf->segmentByVAddr(hdr.sh_addr);
      return true;
    }

    bool GElfSection::push()
    {
      Elf_Scn *scn = elf_getscn(elf->e, ndxscn);
      assert(scn);
      Elf_Data *edata = nullptr;
      edata = elf_newdata(scn);
      if (!edata) { return elf->elfError("elf_newdata failed"); }
      if (hdr.sh_type == SHT_NOBITS) {
        edata->d_buf = 0;
        edata->d_size = memsize_;
        if (align_ != 0) {
          edata->d_align = align_;
        }
      } else {
        edata->d_buf = (void*)data.raw();
        edata->d_size = data.size();
        if (data.align() != 0) {
          edata->d_align = data.align();
        }
      }
      edata->d_align = (std::max)(edata->d_align, (uint64_t) 8);
      switch (hdr.sh_type) {
      case SHT_RELA:
        edata->d_type = ELF_T_RELA;
        break;
      case SHT_SYMTAB:
        edata->d_type = ELF_T_SYM;
        break;
      default:
        edata->d_type = ELF_T_BYTE;
        break;
      }
      edata->d_version = EV_CURRENT;
      if (!gelf_getshdr(scn, &hdr)) { return elf->elfError("gelf_get_shdr failed"); }
      hdr.sh_size = edata->d_size;
      hdr.sh_addralign = edata->d_align;
      if (!gelf_update_shdr(scn, &hdr)) { return elf->elfError("gelf_update_shdr failed"); }
      return true;
    }

    uint64_t GElfSection::nextDataOffset(uint64_t align) const
    {
      return data.nextOffset(align);
    }

    uint64_t GElfSection::addData(const void *src, uint64_t size, uint64_t align)
    {
      return data.add(src, size, align);
    }

    bool GElfSection::getData(uint64_t offset, void* dest, uint64_t size)
    {
      Elf_Data* edata = 0;
      uint64_t coffset = 0;
      uint64_t csize = 0;
      Elf_Scn *scn = elf_getscn(elf->e, ndxscn);
      assert(scn);
      if ((edata = elf_getdata(scn, edata)) != 0) {
        if (coffset <= offset && offset <= coffset + edata->d_size) {
          csize = (std::min)(size, edata->d_size - offset);
          memcpy(dest, (const char*) edata->d_buf + offset - coffset, csize);
          dest = (char*) dest + csize;
          size -= csize;
          if (!size) { return true; }
        }
      }
      return false;
    }

    RelocationSection* GElfSection::relocationSection(SymbolTable* symtab)
    {
      if (!reloc_sec) {
        reloc_sec = elf->addRelocationSection(this, symtab);
      }
      return reloc_sec;
    }

    GElfStringTable::GElfStringTable(GElfImage* elf)
      : GElfSection(elf)
    {
    }

    bool GElfStringTable::push(const char* name, uint32_t shtype, uint64_t shflags)
    {
      if (!GElfSection::push(name, shtype, shflags, SHN_UNDEF, 0, 0)) { return false; }
      return true;
    }

    bool GElfStringTable::pullData()
    {
      return true;
    }

    const char* GElfStringTable::addString(const std::string& s)
    {
      if (data0.size() == 0 && data.size() == 0) {
        data.add('\0');
      }
      return data.get<const char*>(data.addString(s));
    }

    size_t GElfStringTable::addString1(const std::string& s)
    {
      if (data0.size() == 0 && data.size() == 0) {
        data.add('\0');
      }
      return data.addString(s);
    }

    const char* GElfStringTable::getString(size_t ndx)
    {
      if (data0.has(ndx)) { return data0.get<const char*>(ndx); }
      else if (data.has(ndx)) { return data.get<const char*>(ndx); }
      return nullptr;
    }

    size_t GElfStringTable::getStringIndex(const char* s)
    {
      if (data0.has(s)) {
        return data0.getOffset(s);
      } else if (data.has(s)) {
        return data.getOffset(s);
      } else {
        assert(false);
        return 0;
      }
    }

    GElfSymbol::GElfSymbol(GElfSymbolTable* symtab_, Buffer &data_, size_t index_)
      : symtab(symtab_),
        edata(data_),
        eindex(index_)
    {
    }

    Section* GElfSymbol::section()
    {
      if (Sym()->st_shndx != SHN_UNDEF) {
        return symtab->elf->section(Sym()->st_shndx);
      }
      return 0;
    }

    bool GElfSymbol::push(const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, uint16_t shndx, unsigned char other)
    {
      Sym()->st_name = symtab->strtab->addString1(name.c_str());
      Sym()->st_value = value;
      Sym()->st_size = size;
      Sym()->st_info = GELF_ST_INFO(binding, type);
      Sym()->st_shndx = shndx;
      Sym()->st_other = other;
      return true;
    }

    std::string GElfSymbol::name()
    {
      return symtab->strtab->getString(Sym()->st_name);
    }

    GElfSymbolTable::GElfSymbolTable(GElfImage* elf)
      : GElfSection(elf),
        strtab(0)
    {
    }

    bool GElfSymbolTable::push(const char* name, GElfStringTable* strtab)
    {
      if (!strtab) { strtab = elf->strtab(); }
      this->strtab = strtab;
      if (!GElfSection::push(name, SHT_SYMTAB, 0, strtab->getSectionIndex(), 0, 0, sizeof(Elf64_Sym))) { return false;  }
      return true;
    }

    bool GElfSymbolTable::pullData()
    {
      strtab = elf->getStringTable(hdr.sh_link);
      for (size_t i = 0; i < data0.size() / sizeof(GElf_Sym); ++i) {
        symbols.push_back(std::unique_ptr<GElfSymbol>(new GElfSymbol(this, data0, i * sizeof(GElf_Sym))));
      }
      return true;
    }

    Symbol* GElfSymbolTable::addSymbolInternal(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other)
    {
      GElfSymbol *sym = new (std::nothrow) GElfSymbol(this, data, data.reserve<GElf_Sym>());
      uint16_t shndx = section ? section->getSectionIndex() : (uint16_t) SHN_UNDEF;
      if (!sym->push(name, value, size, type, binding, shndx, other)) {
        delete sym;
        return nullptr;
      }
      symbols.push_back(std::unique_ptr<GElfSymbol>(sym));
      return sym;
    }

    Symbol* GElfSymbolTable::addSymbol(Section* section, const std::string& name, uint64_t value, uint64_t size, unsigned char type, unsigned char binding, unsigned char other)
    {
      if (symbols.size() == 0) {
        this->addSymbolInternal(nullptr, "", 0, 0, 0, 0, 0);
      }
      return this->addSymbolInternal(section, name, value, size, type, binding, other);
    }

    size_t GElfSymbolTable::symbolCount()
    {
      return symbols.size();
    }

    Symbol* GElfSymbolTable::symbol(size_t i)
    {
      return symbols[i].get();
    }

    GElfNoteSection::GElfNoteSection(GElfImage* elf)
      : GElfSection(elf)
    {
    }

    bool GElfNoteSection::push(const std::string& name)
    {
      return GElfSection::push(name.c_str(), SHT_NOTE, 0, 0, 0, 8);
    }

    bool GElfNoteSection::addNote(const std::string& name, uint32_t type, const void* desc, uint32_t desc_size)
    {
      data.addStringLength(name, NOTE_RECORD_ALIGNMENT);
      data.add(desc_size, NOTE_RECORD_ALIGNMENT);
      data.add(type, NOTE_RECORD_ALIGNMENT);
      data.addString(name, NOTE_RECORD_ALIGNMENT);
      data.align(NOTE_RECORD_ALIGNMENT);
      if (desc_size > 0) {
        assert(desc);
        data.add(desc, desc_size, NOTE_RECORD_ALIGNMENT);
        data.align(NOTE_RECORD_ALIGNMENT);
      }
      return true;
    }

    bool GElfNoteSection::getNote(const std::string& name, uint32_t type, void** desc, uint32_t* desc_size)
    {
      Elf_Data* data = 0;
      Elf_Scn *scn = elf_getscn(elf->e, ndxscn);
      assert(scn);
      while ((data = elf_getdata(scn, data)) != 0) {
        uint32_t note_offset = 0;
        while (note_offset < data->d_size) {
          char* notec = (char *) data->d_buf + note_offset;
          Elf64_Nhdr* note = (Elf64_Nhdr*) notec;
          if (type == note->n_type) {
            std::string note_name = GetNoteString(note->n_namesz, notec + sizeof(Elf64_Nhdr));
            if (name == note_name) {
              *desc = notec + sizeof(Elf64_Nhdr) + alignUp(note->n_namesz, 4);
              *desc_size = note->n_descsz;
              return true;
            }
          }
          note_offset += sizeof(Elf64_Nhdr) + alignUp(note->n_namesz, 4) + alignUp(note->n_descsz, 4);
        }
      }
      return false;
    }

    bool GElfRelocation::push(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend)
    {
      Rela()->r_info = GELF_R_INFO((uint64_t) symbol->index(), type);
      Rela()->r_offset = offset;
      Rela()->r_addend = addend;
      return true;
    }

    RelocationSection* GElfRelocation::section()
    {
      return rsection;
    }

    Symbol* GElfRelocation::symbol()
    {
      return rsection->symtab->symbol(symbolIndex());
    }

    GElfRelocationSection::GElfRelocationSection(GElfImage* elf, Section* section_, GElfSymbolTable* symtab_)
      : GElfSection(elf),
        section(section_),
        symtab(symtab_)
    {
    }

    bool GElfRelocationSection::push(const std::string& name)
    {
      return GElfSection::push(name.c_str(), SHT_RELA, 0, symtab->getSectionIndex(), section->getSectionIndex(), 0, sizeof(Elf64_Rela));
    }

    Relocation* GElfRelocationSection::addRelocation(uint32_t type, Symbol* symbol, uint64_t offset, int64_t addend)
    {
      GElfRelocation *rela = new (std::nothrow) GElfRelocation(this, data, data.reserve<GElf_Rela>());
      if (!rela || !rela->push(type, symbol, offset, addend)) {
        delete rela;
        return nullptr;
      }
      relocations.push_back(std::unique_ptr<GElfRelocation>(rela));
      return rela;
    }

    bool GElfRelocationSection::pullData()
    {
      section = elf->section(hdr.sh_info);
      symtab = elf->getReferencedSymbolTable(hdr.sh_link);
      Elf_Scn *lScn = elf_getscn(elf->e, ndxscn);
      assert(lScn);
      Elf_Data *lData = elf_getdata(lScn, nullptr);
      assert(lData);
      data0 = Buffer((const Buffer::byte_type*)lData->d_buf, lData->d_size, lData->d_align);
      for (size_t i = 0; i < data0.size() / sizeof(GElf_Rela); ++i) {
        relocations.push_back(std::unique_ptr<GElfRelocation>(new GElfRelocation(this, data0, i * sizeof(GElf_Rela))));
      }
      return true;
    }

    GElfImage::GElfImage(int elfclass_)
      : frozen(true),
        elfclass(elfclass_),
        buffer(0), bufferSize(0),
        e(0),
        shstrtabSection(0), strtabSection(0),
        symtabSection(0),
        dynsymSection(0),
        noteSection(0)
    {
      if (EV_NONE == elf_version(EV_CURRENT)) {
        assert(false);
      }
    }

    GElfImage::~GElfImage()
    {
      elf_end(e);
    }

    bool GElfImage::imgError()
    {
      out << img.output();
      return false;
    }

    const char *GElfImage::elfError()
    {
      return elf_errmsg(-1);
    }

    bool GElfImage::elfBegin(Elf_Cmd cmd)
    {
      if ((e = elf_begin(img.fd(), cmd, NULL
#ifdef AMD_LIBELF
                       , NULL
#endif
        )) == NULL) {
        out << "elf_begin failed: " << elfError() << std::endl;
        return false;
      }
      return true;
    }

    bool GElfImage::initNew(uint16_t machine, uint16_t type, uint8_t os_abi, uint8_t abi_version, uint32_t e_flags)
    {
      if (!img.create()) { return imgError(); }
      if (!elfBegin(ELF_C_WRITE)) { return false; }
      if (!gelf_newehdr(e, elfclass)) { return elfError("gelf_newehdr failed"); }
      if (!gelf_getehdr(e, &ehdr)) { return elfError("gelf_getehdr failed"); }
      ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
      ehdr.e_ident[EI_VERSION] = EV_CURRENT;
      ehdr.e_ident[EI_OSABI] = os_abi;
      ehdr.e_ident[EI_ABIVERSION] = abi_version;
      ehdr.e_machine = machine;
      ehdr.e_type = type;
      ehdr.e_version = EV_CURRENT;
      ehdr.e_flags = e_flags;
      if (!gelf_update_ehdr(e, &ehdr)) { return elfError("gelf_updateehdr failed"); }
      sections.push_back(std::unique_ptr<GElfSection>());
      if (!shstrtab()->push(".shstrtab", SHT_STRTAB, SHF_STRINGS)) { return elfError("Failed to create shstrtab"); }
      ehdr.e_shstrndx = shstrtab()->getSectionIndex();
      if (!gelf_update_ehdr(e, &ehdr)) { return elfError("gelf_updateehdr failed"); }
      if (!strtab()->push(".strtab", SHT_STRTAB, SHF_STRINGS)) { return elfError("Failed to create strtab"); }
      frozen = false;
      return true;
    }

    bool GElfImage::loadFromFile(const std::string& filename)
    {
      if (!img.create()) { return imgError(); }
      if (!img.readFrom(filename)) { return imgError(); }
      if (!elfBegin(ELF_C_RDWR)) { return false; }
      return pullElf();
    }

    bool GElfImage::saveToFile(const std::string& filename)
    {
      if (buffer) {
        std::ofstream out(filename.c_str(), std::ios::binary);
        if (out.fail()) { return false; }
        out.write(buffer, bufferSize);
        return !out.fail();
      } else {
        if (!push()) { return false; }
        return img.writeTo(filename);
      }
    }

    bool GElfImage::initFromBuffer(const void* buffer, size_t size)
    {
      if (size == 0) { size = ElfSize(buffer); }
      if (!img.create()) { return imgError(); }
      if (!img.copyFrom(buffer, size)) { return imgError(); }
      if (!elfBegin(ELF_C_RDWR)) { return false; }
      return pullElf();
    }

    bool GElfImage::initAsBuffer(const void* buffer, size_t size)
    {
      if (size == 0) { size = ElfSize(buffer); }
      if ((e = elf_memory(reinterpret_cast<char*>(const_cast<void*>(buffer)), size
#ifdef AMD_LIBELF
                       , NULL
#endif
        )) == NULL) {
        out << "elf_begin(buffer) failed: " << elfError() << std::endl;
        return false;
      }
      this->buffer = reinterpret_cast<const char*>(buffer);
      this->bufferSize = size;
      return pullElf();
    }

    bool GElfImage::pullElf()
    {
      if (!gelf_getehdr(e, &ehdr)) { return elfError("gelf_getehdr failed"); }
      segments.reserve(ehdr.e_phnum);
      for (size_t i = 0; i < ehdr.e_phnum; ++i) {
        GElfSegment* segment = new GElfSegment(this, i);
        segment->pull();
        segments.push_back(std::unique_ptr<GElfSegment>(segment));
      }

      shstrtabSection = new GElfStringTable(this);
      if (!shstrtabSection->pull(ehdr.e_shstrndx)) { return false; }
      Elf_Scn* scn = 0;
      for (unsigned n = 0; n < ehdr.e_shnum; ++n) {
        scn = elf_getscn(e, n);
        if (n == ehdr.e_shstrndx) {
          sections.push_back(std::unique_ptr<GElfSection>(shstrtabSection));
          continue;
        }
        GElf_Shdr shdr;
        if (!gelf_getshdr(scn, &shdr)) { return elfError("Failed to get shdr"); }
        GElfSection* section = 0;
        if (shdr.sh_type == SHT_NOTE) {
          section = new GElfNoteSection(this);
        } else if (shdr.sh_type == SHT_RELA) {
          section = new GElfRelocationSection(this);
        } else if (shdr.sh_type == SHT_STRTAB) {
          section = new GElfStringTable(this);
        } else if (shdr.sh_type == SHT_SYMTAB || shdr.sh_type == SHT_DYNSYM) {
          section = new GElfSymbolTable(this);
        } else if (shdr.sh_type == SHT_NULL) {
          section = 0;
          sections.push_back(std::unique_ptr<GElfSection>());
        } else {
          section = new GElfSection(this);
        }
        if (section) {
          sections.push_back(std::unique_ptr<GElfSection>(section));
          if (!section->pull(n)) { return false; }
        }
      }

      for (size_t n = 1; n < sections.size(); ++n) {
        GElfSection* section = sections[n].get();
        if (section->type() == SHT_STRTAB) {
          if (!section->pullData()) { return false; }
        }
      }

      for (size_t n = 1; n < sections.size(); ++n) {
        GElfSection* section = sections[n].get();
        if (section->type() == SHT_SYMTAB || section->type() == SHT_DYNSYM) {
          if (!section->pullData()) { return false; }
        }
      }

      for (size_t n = 1; n < sections.size(); ++n) {
        GElfSection* section = sections[n].get();
        if (section->type() != SHT_STRTAB && section->type() != SHT_SYMTAB && section->type() != SHT_DYNSYM) {
          if (!section->pullData()) { return false; }
        }
      }

      for (size_t i = 1; i < sections.size(); ++i) {
        if (i == ehdr.e_shstrndx) { continue; }
        std::unique_ptr<GElfSection>& section = sections[i];
        if (section->type() == SHT_STRTAB) { strtabSection = static_cast<GElfStringTable*>(section.get()); }
        if (section->type() == SHT_SYMTAB) { symtabSection = static_cast<GElfSymbolTable*>(section.get()); }
        if (section->type() == SHT_NOTE) { noteSection = static_cast<GElfNoteSection*>(section.get()); }
        if (section->type() == SHT_DYNSYM) { dynsymSection = static_cast<GElfSymbolTable*>(section.get()); }
      }

      size_t phnum;
      if (elf_getphdrnum(e, &phnum) < 0) { return elfError("elf_getphdrnum failed"); }
      for (size_t i = 0; i < phnum; ++i) {
        segments.push_back(std::unique_ptr<GElfSegment>(new GElfSegment(this, i)));
        if (!segments[i]->pull()) { return false; }
      }

      return true;
    }

    bool GElfImage::elfError(const char* msg)
    {
      out << "Error: " << msg << ": " << elfError() << std::endl;
      return false;
    }

    uint64_t GElfImage::size()
    {
      if (buffer) {
        return ElfSize(buffer);
      } else {
        return img.getSize();
      }
    }

    bool GElfImage::push0()
    {
      assert(e);
      for (std::unique_ptr<GElfSection>& section : sections) {
        if (section && !section->push()) { return false; }
      }

      for (std::unique_ptr<GElfSection>& section : sections) {
        if (section && !section->pull0()) { return false; }
      }

      if (!segments.empty()) {
        if (!gelf_newphdr(e, segments.size())) { return elfError("gelf_newphdr failed"); }
      }
      if (elf_update(e, ELF_C_NULL) < 0) { return elfError("elf_update (1.1) failed"); }
      if (!segments.empty()) {
        for (std::unique_ptr<GElfSection>& section : sections) {
          // Update section offsets.
          if (section && !section->pull0()) { return false; }
        }
        uint64_t vaddr = 0;
        for (std::unique_ptr<GElfSegment>& segment : segments) {
          if (!segment->push(vaddr)) { return false; }
          vaddr = segment->vaddr() + segment->memSize();
        }
      }
      return true;
    }

    bool GElfImage::push()
    {
      if (!push0()) { return false; }
      if (elf_update(e, ELF_C_WRITE) < 0) { return elfError("elf_update (2) failed"); }
      return true;
    }

    Segment* GElfImage::segmentByVAddr(uint64_t vaddr)
    {
      for (std::unique_ptr<GElfSegment>& seg : segments) {
        if (seg->vaddr() <= vaddr && vaddr < seg->vaddr() + seg->memSize()) {
          return seg.get();
        }
      }
      return 0;
    }

    Section* GElfImage::sectionByVAddr(uint64_t vaddr)
    {
      for (size_t n = 1; n < sections.size(); ++n) {
        if (sections[n]->addr() <= vaddr && vaddr < sections[n]->addr() + sections[n]->size()) {
          return sections[n].get();
        }
      }
      return nullptr;
    }

    bool GElfImage::elfEnd()
    {
      return false;
    }

    bool GElfImage::writeTo(const std::string& filename)
    {
      if (!img.writeTo(filename)) { return imgError(); }
      return true;
    }

    bool GElfImage::copyToBuffer(void** buf, size_t* size)
    {
      if (buffer) {
        *buf = malloc(bufferSize);
        memcpy(*buf, buffer, bufferSize);
        if (size) { *size = bufferSize; }
        return true;
      } else {
        return img.copyTo(buf, size);
      }
    }

    bool GElfImage::copyToBuffer(void* buf, size_t size)
    {
      if (buffer) {
        if (size < bufferSize) { return false; }
        memcpy(buf, buffer, bufferSize);
        return true;
      } else {
        return img.copyTo(buf, size);
      }
    }

    GElfStringTable* GElfImage::addStringTable(const std::string& name)
    {
      GElfStringTable* stab = new GElfStringTable(this);
      sections.push_back(std::unique_ptr<GElfStringTable>(stab));
      return stab;
    }

    GElfStringTable* GElfImage::getStringTable(uint16_t index)
    {
      return static_cast<GElfStringTable*>(sections[index].get());
    }

    GElfSymbolTable* GElfImage::addSymbolTable(const std::string& name, StringTable* stab)
    {
      if (!stab) { stab = strtab(); }
      const char* name0 = shstrtab()->addString(name);
      GElfSymbolTable* symtab = new GElfSymbolTable(this);
      symtab->push(name0, static_cast<GElfStringTable*>(stab));
      sections.push_back(std::unique_ptr<GElfSection>(symtab));
      return symtab;
    }

    GElfStringTable* GElfImage::shstrtab() {
      if (!shstrtabSection) {
        shstrtabSection = addStringTable(".shstrtab");
      }
      return shstrtabSection;
    }

    GElfStringTable* GElfImage::strtab() {
      if (!strtabSection) {
        strtabSection = addStringTable(".shstrtab");
      }
      return strtabSection;
    }

    GElfSymbolTable* GElfImage::symtab()
    {
      if (!symtabSection) {
        symtabSection = addSymbolTable(".symtab", strtab());
      }
      return symtabSection;
    }

    GElfSymbolTable* GElfImage::dynsym()
    {
      if (!dynsymSection) {
        dynsymSection = addSymbolTable(".dynsym", strtab());
      }
      return dynsymSection;
    }

    GElfSymbolTable* GElfImage::getSymbolTable()
    {
      const char *UseDynsym = getenv("LOADER_USE_DYNSYM");
      if (UseDynsym && std::strncmp(UseDynsym, "0", 1) != 0)
        return dynsym();
      return symtab();
    }

    GElfNoteSection* GElfImage::note()
    {
      if (!noteSection) { noteSection = addNoteSection(".note"); }
      return noteSection;
    }

    GElfNoteSection* GElfImage::addNoteSection(const std::string& name)
    {
      GElfNoteSection* note = new GElfNoteSection(this);
      note->push(name);
      sections.push_back(std::unique_ptr<GElfSection>(note));
      return note;
    }

    Segment* GElfImage::initSegment(uint32_t type, uint32_t flags, uint64_t paddr)
    {
      GElfSegment *seg = new (std::nothrow) GElfSegment(this, segments.size(), type, flags, paddr);
      segments.push_back(std::unique_ptr<GElfSegment>(seg));
      return seg;
    }

    bool GElfImage::addSegments()
    {
      return true;
    }

    Section* GElfImage::addSection(const std::string &name,
                                   uint32_t type,
                                   uint64_t flags,
                                   uint64_t entsize, Segment* segment)
    {
      GElfSection *section = new (std::nothrow) GElfSection(this);
      if (!section || !section->push(name.c_str(), type, flags, 0, 0, 0, entsize)) {
        delete section;
        return nullptr;
      }
      if (segment) {
        if (!segment->updateAddSection(section)) {
          delete section;
          return nullptr;
        }
      }
      sections.push_back(std::unique_ptr<GElfSection>(section));
      return section;
    }

    RelocationSection* GElfImage::addRelocationSection(Section* sec, SymbolTable* symtab)
    {
      std::string section_name = ".rela" + sec->Name();
      if (!symtab) { symtab = this->symtab(); }
      GElfRelocationSection *rsec = new GElfRelocationSection(this, sec, (GElfSymbolTable*) symtab);
      if (!rsec || !rsec->push(section_name)) {
        delete rsec;
        return nullptr;
      }
      sections.push_back(std::unique_ptr<GElfRelocationSection>(rsec));
      return rsec;
    }

    RelocationSection* GElfImage::relocationSection(Section* sec, SymbolTable* symtab)
    {
      return sec->relocationSection(symtab);
    }

    uint16_t GElfImage::machine() const
    {
      return ehdr.e_machine;
    }

    uint16_t GElfImage::etype() const
    {
      return ehdr.e_type;
    }

    Image* NewElf32Image() { return new GElfImage(ELFCLASS32); }
    Image* NewElf64Image() { return new GElfImage(ELFCLASS64); }

    uint64_t ElfSize(const void* emi)
    {
      const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*) emi;
      if (NULL == ehdr || EV_CURRENT != ehdr->e_version) {
        return false;
      }

      const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
      if (NULL == shdr) {
        return false;
      }

      uint64_t max_offset = ehdr->e_shoff;
      uint64_t total_size = max_offset + static_cast<uint64_t>(ehdr->e_shentsize) * static_cast<uint64_t>(ehdr->e_shnum);

      for (uint16_t i = 0; i < ehdr->e_shnum; ++i) {
        uint64_t cur_offset = static_cast<uint64_t>(shdr[i].sh_offset);
        if (max_offset < cur_offset) {
          max_offset = cur_offset;
          total_size = max_offset;
          if (SHT_NOBITS != shdr[i].sh_type) {
            total_size += static_cast<uint64_t>(shdr[i].sh_size);
          }
        }
      }

      return total_size;
    }

    std::string GetNoteString(uint32_t s_size, const char* s)
    {
      if (!s_size) { return ""; }
      if (s[s_size-1] == '\0') {
        return std::string(s, s_size-1);
      } else {
        return std::string(s, s_size);
      }
    }

}   //  namespace elf
}   //  namespace amd
}   //  namespace rocr


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include <assert.h>
#include <cstring>
#include <iomanip>
#include <algorithm>
#include "core/inc/amd_hsa_code.hpp"
#include "amd_hsa_code_util.hpp"
#include <libelf.h>
#include "inc/amd_hsa_elf.h"
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <algorithm>

#ifdef SP3_STATIC_LIB
#include "sp3.h"
#endif // SP3_STATIC_LIB

#ifndef _WIN32
#define _alloca alloca
#endif

namespace rocr {
namespace amd {
namespace hsa {
namespace code {

    using amd::elf::GetNoteString;

    bool Symbol::IsDeclaration() const
    {
      return elfsym->type() == STT_COMMON;
    }

    bool Symbol::IsDefinition() const
    {
      return !IsDeclaration();
    }

    bool Symbol::IsAgent() const
    {
      return elfsym->section()->flags() & SHF_AMDGPU_HSA_AGENT ? true : false;
    }

    hsa_symbol_linkage_t Symbol::Linkage() const
    {
      return elfsym->binding() == STB_GLOBAL ? HSA_SYMBOL_LINKAGE_PROGRAM : HSA_SYMBOL_LINKAGE_MODULE;
    }

    hsa_variable_allocation_t Symbol::Allocation() const
    {
      return IsAgent() ? HSA_VARIABLE_ALLOCATION_AGENT : HSA_VARIABLE_ALLOCATION_PROGRAM;
    }

    hsa_variable_segment_t Symbol::Segment() const
    {
      return elfsym->section()->flags() & SHF_AMDGPU_HSA_READONLY ? HSA_VARIABLE_SEGMENT_READONLY : HSA_VARIABLE_SEGMENT_GLOBAL;
    }

    uint64_t Symbol::Size() const
    {
      return elfsym->size();
    }

    uint32_t Symbol::Size32() const
    {
      assert(elfsym->size() < UINT32_MAX);
      return (uint32_t) Size();
    }

    uint32_t Symbol::Alignment() const
    {
      assert(elfsym->section()->addralign() < UINT32_MAX);
      return uint32_t(elfsym->section()->addralign());
    }

    bool Symbol::IsConst() const
    {
      return elfsym->section()->flags() & SHF_WRITE ? true : false;
    }

    hsa_status_t Symbol::GetInfo(hsa_code_symbol_info_t attribute, void *value)
    {
      if (!value) {
          return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      }

      switch (attribute) {
        case HSA_CODE_SYMBOL_INFO_TYPE: {
          *((hsa_symbol_kind_t*)value) = Kind();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_NAME_LENGTH: {
          *((uint32_t*)value) = GetSymbolName().size();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_NAME: {
          std::string SymbolName = GetSymbolName();
          memset(value, 0x0, SymbolName.size());
          memcpy(value, SymbolName.c_str(), SymbolName.size());
          break;
        }
        case HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH: {
          *((uint32_t*)value) = GetModuleName().size();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_MODULE_NAME: {
          std::string ModuleName = GetModuleName();
          memset(value, 0x0, ModuleName.size());
          memcpy(value, ModuleName.c_str(), ModuleName.size());
          break;
        }
        case HSA_CODE_SYMBOL_INFO_LINKAGE: {
          *((hsa_symbol_linkage_t*)value) = Linkage();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_IS_DEFINITION: {
          *((bool*)value) = IsDefinition();
          break;
        }
        default: {
          return HSA_STATUS_ERROR_INVALID_ARGUMENT;
        }
      }
      return HSA_STATUS_SUCCESS;
    }

    std::string Symbol::GetModuleName() const {
      std::string FullName = Name();
      return FullName.rfind(":") != std::string::npos ?
        FullName.substr(0, FullName.find(":")) : "";
    }

    std::string Symbol::GetSymbolName() const {
      std::string FullName = Name();
      return FullName.rfind(":") != std::string::npos ?
        FullName.substr(FullName.rfind(":") + 1) : std::move(FullName);
    }

    hsa_code_symbol_t Symbol::ToHandle(Symbol* sym)
    {
      hsa_code_symbol_t s;
      s.handle = reinterpret_cast<uint64_t>(sym);
      return s;
    }

    Symbol* Symbol::FromHandle(hsa_code_symbol_t s)
    {
      return reinterpret_cast<Symbol*>(s.handle);
    }

    KernelSymbol::KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc)
        : Symbol(elfsym_)
        , kernarg_segment_size(0)
        , kernarg_segment_alignment(0)
        , group_segment_size(0)
        , private_segment_size(0)
        , is_dynamic_callstack(0)
    {
      if (akc) {
        kernarg_segment_size = (uint32_t) akc->kernarg_segment_byte_size;
        kernarg_segment_alignment = (uint32_t) (1 << akc->kernarg_segment_alignment);
        group_segment_size = uint32_t(akc->workgroup_group_segment_byte_size);
        private_segment_size = uint32_t(akc->workitem_private_segment_byte_size);
        is_dynamic_callstack =
          AMD_HSA_BITS_GET(akc->kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK) ? true : false;
      }
    }

    hsa_status_t KernelSymbol::GetInfo(hsa_code_symbol_info_t attribute, void *value)
    {
      assert(value);
      switch (attribute) {
        case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE: {
          *((uint32_t*)value) = kernarg_segment_size;
          break;
        }
        case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT: {
          *((uint32_t*)value) = kernarg_segment_alignment;
          break;
        }
        case HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE: {
          *((uint32_t*)value) = group_segment_size;
          break;
        }
        case HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE: {
          *((uint32_t*)value) = private_segment_size;
          break;
        }
        case HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK: {
          *((bool*)value) = is_dynamic_callstack;
          break;
        }
        default: {
          return Symbol::GetInfo(attribute, value);
        }
      }
      return HSA_STATUS_SUCCESS;
    }

    hsa_status_t VariableSymbol::GetInfo(hsa_code_symbol_info_t attribute, void *value)
    {
      assert(value);
      switch (attribute) {
        case HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION: {
          *((hsa_variable_allocation_t*)value) = Allocation();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT: {
          *((hsa_variable_segment_t*)value) = Segment();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT: {
          *((uint32_t*)value) = Alignment();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE: {
          *((uint32_t*)value) = Size();
          break;
        }
        case HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST: {
          *((bool*)value) = IsConst();
          break;
        }
        default: {
          return Symbol::GetInfo(attribute, value);
        }
      }
      return HSA_STATUS_SUCCESS;
    }

    AmdHsaCode::AmdHsaCode(bool combineDataSegments_)
      : img(nullptr),
        combineDataSegments(combineDataSegments_),
        hsatext(0), imageInit(0), samplerInit(0),
        debugInfo(0), debugLine(0), debugAbbrev(0)
    {
      for (unsigned i = 0; i < AMDGPU_HSA_SEGMENT_LAST; ++i) {
        for (unsigned j = 0; j < 2; ++j) {
          hsaSegments[i][j] = 0;
        }
      }
      for (unsigned i = 0; i < AMDGPU_HSA_SECTION_LAST; ++i) {
        hsaSections[i] = 0;
      }
    }

    AmdHsaCode::~AmdHsaCode()
    {
      for (Symbol* sym : symbols) { delete sym; }
    }

    bool AmdHsaCode::PullElf()
    {
      uint32_t majorVersion, minorVersion;
      if (!GetCodeObjectVersion(&majorVersion, &minorVersion)) {
        return false;
      }
      if (majorVersion >= 2) {
        return PullElfV2();
      } else {
        return PullElfV1();
      }
    }

    bool AmdHsaCode::PullElfV1()
    {
      for (size_t i = 0; i < img->segmentCount(); ++i) {
        Segment* s = img->segment(i);
        if (s->type() == PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM ||
            s->type() == PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT ||
            s->type() == PT_AMDGPU_HSA_LOAD_READONLY_AGENT ||
            s->type() == PT_AMDGPU_HSA_LOAD_CODE_AGENT) {
          dataSegments.push_back(s);
        }
      }
      for (size_t i = 0; i < img->sectionCount(); ++i) {
        Section* sec = img->section(i);
        if (!sec) { continue; }
        if ((sec->type() == SHT_PROGBITS || sec->type() == SHT_NOBITS) &&
            (sec->flags() & (SHF_AMDGPU_HSA_AGENT | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_CODE))) {
          dataSections.push_back(sec);
        } else if (sec->type() == SHT_RELA) {
          relocationSections.push_back(sec->asRelocationSection());
        }
        if (sec->Name() == ".hsatext") {
          hsatext = sec;
        }
      }
      for (size_t i = 0; i < img->symtab()->symbolCount(); ++i) {
        amd::elf::Symbol* elfsym = img->symtab()->symbol(i);
        Symbol* sym = 0;
        switch (elfsym->type()) {
        case STT_AMDGPU_HSA_KERNEL: {
          amd::elf::Section* sec = elfsym->section();
          amd_kernel_code_t akc;
          if (!sec) {
            out << "Failed to find section for symbol " << elfsym->name() << std::endl;
            return false;
          }
          if (!(sec->flags() & (SHF_AMDGPU_HSA_AGENT | SHF_AMDGPU_HSA_CODE | SHF_EXECINSTR))) {
            out << "Invalid code section for symbol " << elfsym->name() << std::endl;
            return false;
          }
          if (!sec->getData(elfsym->value(), &akc, sizeof(amd_kernel_code_t))) {
            out << "Failed to get AMD Kernel Code for symbol " << elfsym->name() << std::endl;
            return false;
          }
          sym = new KernelSymbol(elfsym, &akc);
          break;
        }
        case STT_OBJECT:
        case STT_COMMON:
          sym = new VariableSymbol(elfsym);
          break;
        default:
          break; // Skip unknown symbols.
        }
        if (sym) { symbols.push_back(sym); }
      }

      return true;
    }

    bool AmdHsaCode::LoadFromFile(const std::string& filename)
    {
      if (!img) { img.reset(amd::elf::NewElf64Image()); }
      if (!img->loadFromFile(filename)) { return ElfImageError(); }
      if (!PullElf()) { return ElfImageError(); }
      return true;
    }

    bool AmdHsaCode::SaveToFile(const std::string& filename)
    {
      return img->saveToFile(filename) || ElfImageError();
    }

    bool AmdHsaCode::WriteToBuffer(void* buffer)
    {
      return img->copyToBuffer(buffer, ElfSize()) || ElfImageError();
    }


    bool AmdHsaCode::InitFromBuffer(const void* buffer, size_t size)
    {
      if (!img) { img.reset(amd::elf::NewElf64Image()); }
      if (!img->initFromBuffer(buffer, size)) { return ElfImageError(); }
      if (!PullElf()) { return ElfImageError(); }
      return true;
    }

    bool AmdHsaCode::InitAsBuffer(const void* buffer, size_t size)
    {
      if (!img) { img.reset(amd::elf::NewElf64Image()); }
      if (!img->initAsBuffer(buffer, size)) { return ElfImageError(); }
      if (!PullElf()) { return ElfImageError(); }
      return true;
    }

    bool AmdHsaCode::InitAsHandle(hsa_code_object_t code_object)
    {
      void *elfmemrd = reinterpret_cast<void*>(code_object.handle);
      if (!elfmemrd) { return false; }
      return InitAsBuffer(elfmemrd, 0);
    }

    bool AmdHsaCode::InitNew(bool xnack)
    {
      if (!img) {
        img.reset(amd::elf::NewElf64Image());
        uint32_t flags = 0;
        if (xnack) { flags |= ELF::EF_AMDGPU_FEATURE_XNACK_V2; }
        return img->initNew(ELF::EM_AMDGPU, ET_EXEC, ELF::ELFOSABI_AMDGPU_HSA, ELF::ELFABIVERSION_AMDGPU_HSA_V2, flags) ||
          ElfImageError(); // FIXME: elfutils libelf does not allow program headers in ET_REL file type, so change it later in finalizer.
      }
      return false;
    }

    bool AmdHsaCode::Freeze()
    {
      return img->Freeze() || ElfImageError();
    }

    hsa_code_object_t AmdHsaCode::GetHandle()
    {
      hsa_code_object_t code_object;
      code_object.handle = reinterpret_cast<uint64_t>(img->data());
      return code_object;
    }

    const char* AmdHsaCode::ElfData()
    {
      return img->data();
    }

    uint64_t AmdHsaCode::ElfSize()
    {
      return img->size();
    }

    bool AmdHsaCode::Validate()
    {
      if (!img->Validate()) { return ElfImageError(); }
      if (img->Machine() != ELF::EM_AMDGPU) {
        out << "ELF error: Invalid machine" << std::endl;
        return false;
      }
      return true;
    }

    void AmdHsaCode::AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size)
    {
      img->note()->addNote("AMD", type, desc, desc_size);
    }

    void AmdHsaCode::AddNoteCodeObjectVersion(uint32_t major, uint32_t minor)
    {
      amdgpu_hsa_note_code_object_version_t desc;
      desc.major_version = major;
      desc.minor_version = minor;
      AddAmdNote(NT_AMD_HSA_CODE_OBJECT_VERSION, &desc, sizeof(desc));
    }

    bool AmdHsaCode::GetCodeObjectVersion(uint32_t* major, uint32_t* minor)
    {
      switch (img->ABIVersion()) {
      case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
        amdgpu_hsa_note_code_object_version_t* desc;
        if (GetAmdNote(NT_AMD_HSA_CODE_OBJECT_VERSION, &desc)) {
          *major = desc->major_version;
          *minor = desc->minor_version;
          return *major <= 2;
        }
        return false;
      case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
        *major = 3;
        *minor = 0;
        return true;
      case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
        *major = 4;
        *minor = 0;
        return true;
      case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
        *major = 5;
        *minor = 0;
        return true;
      case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
        *major = 6;
        *minor = 0;
        return true;
      }

      return false;
    }

    bool AmdHsaCode::GetNoteCodeObjectVersion(std::string& version)
    {
      amdgpu_hsa_note_code_object_version_t* desc;
      if (!GetAmdNote(NT_AMD_HSA_CODE_OBJECT_VERSION, &desc)) { return false; }
      version.clear();
      version += std::to_string(desc->major_version);
      version += ".";
      version += std::to_string(desc->minor_version);
      return true;
    }

    void AmdHsaCode::AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode)
    {
      amdgpu_hsa_note_hsail_t desc;
      memset(&desc, 0, sizeof(desc));
      desc.hsail_major_version = hsail_major;
      desc.hsail_minor_version = hsail_minor;
      desc.profile = uint8_t(profile);
      desc.machine_model = uint8_t(machine_model);
      desc.default_float_round = uint8_t(rounding_mode);
      AddAmdNote(NT_AMD_HSA_HSAIL, &desc, sizeof(desc));
    }

    bool AmdHsaCode::GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round)
    {
      amdgpu_hsa_note_hsail_t *desc;
      if (!GetAmdNote(NT_AMD_HSA_HSAIL, &desc)) { return false; }
      *hsail_major = desc->hsail_major_version;
      *hsail_minor = desc->hsail_minor_version;
      *profile = (hsa_profile_t) desc->profile;
      *machine_model = (hsa_machine_model_t) desc->machine_model;
      *default_float_round = (hsa_default_float_rounding_mode_t) desc->default_float_round;
      return true;
    }

    void AmdHsaCode::AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping)
    {
      size_t size = sizeof(amdgpu_hsa_note_producer_t) + vendor_name.length() + architecture_name.length() + 1;
      amdgpu_hsa_note_isa_t* desc = (amdgpu_hsa_note_isa_t*) _alloca(size);
      memset(desc, 0, size);
      desc->vendor_name_size = vendor_name.length()+1;
      desc->architecture_name_size = architecture_name.length()+1;
      desc->major = major;
      desc->minor = minor;
      desc->stepping = stepping;
      memcpy(desc->vendor_and_architecture_name, vendor_name.c_str(), vendor_name.length() + 1);
      memcpy(desc->vendor_and_architecture_name + desc->vendor_name_size, architecture_name.c_str(), architecture_name.length() + 1);
      AddAmdNote(NT_AMD_HSA_ISA_VERSION, desc, size);
    }

    bool AmdHsaCode::GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping)
    {
      amdgpu_hsa_note_isa_t *desc;
      if (!GetAmdNote(NT_AMD_HSA_ISA_VERSION, &desc)) { return false; }
      vendor_name = GetNoteString(desc->vendor_name_size, desc->vendor_and_architecture_name);
      architecture_name = GetNoteString(desc->architecture_name_size, desc->vendor_and_architecture_name + vendor_name.length() + 1);
      *major_version = desc->major;
      *minor_version = desc->minor;
      *stepping = desc->stepping;
      return true;
    }

    struct MachInfo {
      std::string Name = "";
      bool XnackSupported = false;
      bool SrameccSupported = false;
    };

    // TODO: Move isa registry into the loader.
    static bool GetMachInfo(unsigned Mach, MachInfo &MI) {
      switch (Mach) {
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600:  MI.Name = "gfx600";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601:  MI.Name = "gfx601";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602:  MI.Name = "gfx602";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700:  MI.Name = "gfx700";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701:  MI.Name = "gfx701";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702:  MI.Name = "gfx702";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703:  MI.Name = "gfx703";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704:  MI.Name = "gfx704";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705:  MI.Name = "gfx705";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801:  MI.Name = "gfx801";  MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802:  MI.Name = "gfx802";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803:  MI.Name = "gfx803";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805:  MI.Name = "gfx805";  MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810:  MI.Name = "gfx810";  MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900:  MI.Name = "gfx900";  MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902:  MI.Name = "gfx902";  MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904:  MI.Name = "gfx904";  MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906:  MI.Name = "gfx906";  MI.XnackSupported = true;  MI.SrameccSupported = true;  break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908:  MI.Name = "gfx908";  MI.XnackSupported = true;  MI.SrameccSupported = true;  break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909:  MI.Name = "gfx909";  MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A:  MI.Name = "gfx90a";  MI.XnackSupported = true;  MI.SrameccSupported = true;  break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:  MI.Name = "gfx90c";  MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:  MI.Name = "gfx942";  MI.XnackSupported = true;  MI.SrameccSupported = true;  break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:  MI.Name = "gfx950";  MI.XnackSupported = true;  MI.SrameccSupported = true;  break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: MI.Name = "gfx1010"; MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: MI.Name = "gfx1011"; MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: MI.Name = "gfx1012"; MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: MI.Name = "gfx1013"; MI.XnackSupported = true;  MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: MI.Name = "gfx1030"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: MI.Name = "gfx1031"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: MI.Name = "gfx1032"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: MI.Name = "gfx1033"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: MI.Name = "gfx1034"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: MI.Name = "gfx1035"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: MI.Name = "gfx1036"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: MI.Name = "gfx1100"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: MI.Name = "gfx1101"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: MI.Name = "gfx1102"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: MI.Name = "gfx1103"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: MI.Name = "gfx1150"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: MI.Name = "gfx1151"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: MI.Name = "gfx1152"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: MI.Name = "gfx1153"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: MI.Name = "gfx1200"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: MI.Name = "gfx1201"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:    MI.Name = "gfx9-generic";    MI.XnackSupported = true; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC:  MI.Name = "gfx9-4-generic";  MI.XnackSupported = true;  MI.SrameccSupported = true; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: MI.Name = "gfx10-1-generic"; MI.XnackSupported = true; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: MI.Name = "gfx10-3-generic"; MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:   MI.Name = "gfx11-generic";   MI.XnackSupported = false; MI.SrameccSupported = false; break;
      case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:   MI.Name = "gfx12-generic";   MI.XnackSupported = false; MI.SrameccSupported = false; break;
      default: return false;
      }
      return true;
    }

    // This fuction is also copied to the Code Object Manager library.
    static std::string ConvertOldTargetNameToNew(const std::string &old_name, bool is_finalizer, uint32_t e_flags) {
      assert(!old_name.empty() && "Expecting non-empty old name");

      unsigned mach = 0;
      if (old_name == "AMD:AMDGPU:6:0:0")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
      else if (old_name == "AMD:AMDGPU:6:0:1")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
      else if (old_name == "AMD:AMDGPU:6:0:2")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
      else if (old_name == "AMD:AMDGPU:7:0:0")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
      else if (old_name == "AMD:AMDGPU:7:0:1")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
      else if (old_name == "AMD:AMDGPU:7:0:2")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
      else if (old_name == "AMD:AMDGPU:7:0:3")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
      else if (old_name == "AMD:AMDGPU:7:0:4")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
      else if (old_name == "AMD:AMDGPU:7:0:5")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
      else if (old_name == "AMD:AMDGPU:8:0:1")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
      else if (old_name == "AMD:AMDGPU:8:0:0" || old_name == "AMD:AMDGPU:8:0:2")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
      else if (old_name == "AMD:AMDGPU:8:0:3" || old_name == "AMD:AMDGPU:8:0:4")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
      else if (old_name == "AMD:AMDGPU:8:0:5")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
      else if (old_name == "AMD:AMDGPU:8:1:0")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
      else if (old_name == "AMD:AMDGPU:9:0:0" || old_name == "AMD:AMDGPU:9:0:1")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
      else if (old_name == "AMD:AMDGPU:9:0:2" || old_name == "AMD:AMDGPU:9:0:3")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
      else if (old_name == "AMD:AMDGPU:9:0:4" || old_name == "AMD:AMDGPU:9:0:5")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
      else if (old_name == "AMD:AMDGPU:9:0:6" || old_name == "AMD:AMDGPU:9:0:7")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
      else if (old_name == "AMD:AMDGPU:9:0:12")
        mach = ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
      else {
        // Code object v2 only supports asics up to gfx906 plus gfx90c. Do NOT
        // add handling of new asics into this if-else-if* block.
        return "";
      }
      MachInfo MI;
      if (!GetMachInfo(mach, MI))
        return "";

      // Only "AMD:AMDGPU:9:0:6" and "AMD:AMDGPU:9:0:7" supports SRAMECC for
      // code object V2, and it must be OFF.
      if (MI.SrameccSupported)
        MI.Name += ":sramecc-";

      if (is_finalizer) {
        if (e_flags & ELF::EF_AMDGPU_FEATURE_XNACK_V2)
          MI.Name += ":xnack+";
        else if (MI.XnackSupported)
          MI.Name += ":xnack-";
      } else {
        if (old_name == "AMD:AMDGPU:8:0:1")
          MI.Name += ":xnack+";
        else if (old_name == "AMD:AMDGPU:8:1:0")
          MI.Name += ":xnack+";
        else if (old_name == "AMD:AMDGPU:9:0:1")
          MI.Name += ":xnack+";
        else if (old_name == "AMD:AMDGPU:9:0:3")
          MI.Name += ":xnack+";
        else if (old_name == "AMD:AMDGPU:9:0:5")
          MI.Name += ":xnack+";
        else if (old_name == "AMD:AMDGPU:9:0:7")
          MI.Name += ":xnack+";
        else if (MI.XnackSupported)
          MI.Name += ":xnack-";
      }

      return MI.Name;
    }

    bool AmdHsaCode::GetIsa(std::string& isa_name, unsigned *genericVersion)
    {
      isa_name.clear();

      uint32_t code_object_major_version = 0;
      uint32_t code_object_minor_version = 0;

      // Generic versioning starts at 1, so zero means no generic version.
      if (genericVersion)
        *genericVersion = 0;

      switch (img->EClass()) {
      case ELFCLASS64:
        // There is no e_machine and/or OS ABI for R600 so rely on checking
        // the ELFCLASS to determine if AMDGCN versus R600. AMDHSA always uses
        // ELFCLASS64 and R600 always uses ELFCLASS32.
        isa_name += "amdgcn";
        break;
      default:
        return false;
      }
      if (img->Machine() != ELF::EM_AMDGPU)
        return false;
      isa_name += "-amd-";

      if (!GetCodeObjectVersion(&code_object_major_version, &code_object_minor_version)) {
        return false;
      }
      if (code_object_major_version >= 3) {

        switch (img->OsAbi()) {
        case ELF::ELFOSABI_AMDGPU_HSA:
          isa_name += "amdhsa";
          break;
        default:
          // Only support AMDHSA in the ROCm runtime.
          return false;
        }

        isa_name += "--";

        unsigned mach = img->EFlags() & ELF::EF_AMDGPU_MACH;
        MachInfo MI;

        if (!GetMachInfo(mach, MI))
          return false;

        if (code_object_major_version == 3) {
          if (img->EFlags() & ELF::EF_AMDGPU_FEATURE_SRAMECC_V3)
            MI.Name += ":sramecc+";
          else if (MI.SrameccSupported)
            MI.Name += ":sramecc-";

          if (img->EFlags() & ELF::EF_AMDGPU_FEATURE_XNACK_V3)
            MI.Name += ":xnack+";
          else if (MI.XnackSupported)
            MI.Name += ":xnack-";
        } else if (code_object_major_version >= 4) {
          switch (img->EFlags() & ELF::EF_AMDGPU_FEATURE_SRAMECC_V4) {
          case ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
            MI.Name += ":sramecc-";
            break;
          case ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
            MI.Name += ":sramecc+";
            break;
          }

          switch (img->EFlags() & ELF::EF_AMDGPU_FEATURE_XNACK_V4) {
          case ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4:
            MI.Name += ":xnack-";
            break;
          case ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4:
            MI.Name += ":xnack+";
            break;
          }

          // Generic version is not part of the ISA name.
          // Only parse it when the caller wants it.
          if (genericVersion && code_object_major_version >= 6) {
            *genericVersion = (img->EFlags() & ELF::EF_AMDGPU_GENERIC_VERSION) >> ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET;
          }
        } else {
          return false;
        }

        isa_name += MI.Name;

        return true;
      } else {

        std::string vendor_name, architecture_name;
        uint32_t major_version, minor_version, stepping;
        if (!GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping))
          return false;

        isa_name += "amdhsa--";

        std::string target_name = vendor_name + ':' + architecture_name + ':' +
            std::to_string(major_version) + ':' + std::to_string(minor_version) + ':' +
            std::to_string(stepping);

        amdgpu_hsa_note_hsail_t *hsail_note;
        bool is_finalizer = GetAmdNote(NT_AMD_HSA_HSAIL, &hsail_note);
        target_name = ConvertOldTargetNameToNew(target_name, is_finalizer, img->EFlags());
        if (target_name.empty()) return false;

        isa_name += target_name;

        return true;
      }
    }

    void AmdHsaCode::AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer)
    {
      size_t size = sizeof(amdgpu_hsa_note_producer_t) + producer.length();
      amdgpu_hsa_note_producer_t* desc = (amdgpu_hsa_note_producer_t*) _alloca(size);
      memset(desc, 0, size);
      desc->producer_name_size = producer.length();
      desc->producer_major_version = major;
      desc->producer_minor_version = minor;
      memcpy(desc->producer_name, producer.c_str(), producer.length() + 1);
      AddAmdNote(NT_AMD_HSA_PRODUCER, desc, size);
    }

    bool AmdHsaCode::GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name)
    {
      amdgpu_hsa_note_producer_t* desc;
      if (!GetAmdNote(NT_AMD_HSA_PRODUCER, &desc)) { return false; }
      *major = desc->producer_major_version;
      *minor = desc->producer_minor_version;
      producer_name = GetNoteString(desc->producer_name_size, desc->producer_name);
      return true;
    }

    void AmdHsaCode::AddNoteProducerOptions(const std::string& options)
    {
      size_t size = sizeof(amdgpu_hsa_note_producer_options_t) + options.length();
      amdgpu_hsa_note_producer_options_t *desc = (amdgpu_hsa_note_producer_options_t*) _alloca(size);
      desc->producer_options_size = options.length();
      memcpy(desc->producer_options, options.c_str(), options.length() + 1);
      AddAmdNote(NT_AMD_HSA_PRODUCER_OPTIONS, desc, size);
    }

    void AmdHsaCode::AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options)
    {
      using namespace code_options;
      std::ostringstream ss;
      ss <<
        space << "-hsa_call_convention=" << call_convention <<
        control_directives(user_directives);
      if (!user_options.empty()) {
        ss << space << user_options;
      }

      AddNoteProducerOptions(ss.str());
    }

    bool AmdHsaCode::GetNoteProducerOptions(std::string& options)
    {
      amdgpu_hsa_note_producer_options_t* desc;
      if (!GetAmdNote(NT_AMD_HSA_PRODUCER_OPTIONS, &desc)) { return false; }
      options = GetNoteString(desc->producer_options_size, desc->producer_options);
      return true;
    }

    hsa_status_t AmdHsaCode::GetInfo(hsa_code_object_info_t attribute, void *value)
    {
      assert(value);
      switch (attribute) {
      case HSA_CODE_OBJECT_INFO_VERSION: {
        std::string version;
        if (!GetNoteCodeObjectVersion(version)) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
        char *svalue = (char*)value;
        memset(svalue, 0x0, 64);
        memcpy(svalue, version.c_str(), (std::min)(size_t(63), version.length()));
        break;
      }
      case HSA_CODE_OBJECT_INFO_ISA: {
        // TODO: Currently returns string representation instead of hsa_isa_t
        // which is unavailable here.
        std::string isa;
        if (!GetIsa(isa)) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
        char *svalue = (char*)value;
        memset(svalue, 0x0, 64);
        memcpy(svalue, isa.c_str(), (std::min)(size_t(63), isa.length()));
        break;
      }
      case HSA_CODE_OBJECT_INFO_MACHINE_MODEL:
      case HSA_CODE_OBJECT_INFO_PROFILE:
      case HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE: {
        uint32_t hsail_major, hsail_minor;
        hsa_profile_t profile;
        hsa_machine_model_t machine_model;
        hsa_default_float_rounding_mode_t default_float_round;
        if (!GetNoteHsail(&hsail_major, &hsail_minor, &profile, &machine_model, &default_float_round)) {
          return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
        }
        switch (attribute) {
        case HSA_CODE_OBJECT_INFO_MACHINE_MODEL:
           *((hsa_machine_model_t*)value) = machine_model; break;
        case HSA_CODE_OBJECT_INFO_PROFILE:
          *((hsa_profile_t*)value) = profile; break;
        case HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE:
          *((hsa_default_float_rounding_mode_t*)value) = default_float_round; break;
        default: break;
        }
        break;
      }
      default:
        assert(false);
        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
      }
      return HSA_STATUS_SUCCESS;
    }

    hsa_status_t AmdHsaCode::GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *s)
    {
      std::string mname = MangleSymbolName(
        std::string(module_name ? module_name : ""),
        std::string(symbol_name)
      );
      for (Symbol* sym : symbols) {
        if (sym->Name() == mname) {
          *s = Symbol::ToHandle(sym);
          return HSA_STATUS_SUCCESS;
        }
      }
      return HSA_STATUS_ERROR_INVALID_SYMBOL_NAME;
    }

    hsa_status_t AmdHsaCode::IterateSymbols(hsa_code_object_t code_object,
                                  hsa_status_t (*callback)(
                                  hsa_code_object_t code_object,
                                  hsa_code_symbol_t symbol,
                                  void* data),
                                void* data)
    {
      for (Symbol* sym : symbols) {
        hsa_code_symbol_t s = Symbol::ToHandle(sym);
        hsa_status_t status = callback(code_object, s, data);
        if (status != HSA_STATUS_SUCCESS) { return status; }
      }
      return HSA_STATUS_SUCCESS;
    }

    Section* AmdHsaCode::ImageInitSection()
    {
      if (!imageInit) {
        imageInit = img->addSection(
          ".hsaimage_imageinit",
          SHT_PROGBITS,
          SHF_MERGE,
          sizeof(amdgpu_hsa_image_descriptor_t));
      }
      return imageInit;
    }

    void AmdHsaCode::AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& desc)
    {
      uint64_t offset = ImageInitSection()->addData(&desc, sizeof(desc), 8);
      amd::elf::Symbol* imageInit =
        img->symtab()->addSymbol(ImageInitSection(), "", offset, 0, STT_AMDGPU_HSA_METADATA, STB_LOCAL);
      image->elfSym()->section()->relocationSection()->addRelocation(R_AMDGPU_V1_INIT_IMAGE, imageInit, image->elfSym()->value() + destOffset, 0);
    }

    void AmdHsaCode::AddImageInitializer(
      Symbol* image, uint64_t destOffset,
      amdgpu_hsa_metadata_kind16_t kind,
      amdgpu_hsa_image_geometry8_t geometry,
      amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type,
      uint64_t width, uint64_t height, uint64_t depth, uint64_t array)
    {
      amdgpu_hsa_image_descriptor_t desc;
      desc.size = (uint16_t) sizeof(amdgpu_hsa_image_descriptor_t);
      desc.kind = kind;
      desc.geometry = geometry;
      desc.channel_order = channel_order;
      desc.channel_type = channel_type;
      desc.width = width;
      desc.height = height;
      desc.depth = depth;
      desc.array = array;
      AddImageInitializer(image, destOffset, desc);
    }


    Section* AmdHsaCode::SamplerInitSection()
    {
      if (!samplerInit) {
        samplerInit = img->addSection(
          ".hsaimage_samplerinit",
          SHT_PROGBITS,
          SHF_MERGE,
          sizeof(amdgpu_hsa_sampler_descriptor_t));
      }
      return samplerInit;
    }

    void AmdHsaCode::AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& desc)
    {
      uint64_t offset = SamplerInitSection()->addData(&desc, sizeof(desc), 8);
      amd::elf::Symbol* samplerInit =
        img->symtab()->addSymbol(SamplerInitSection(), "", offset, 0, STT_AMDGPU_HSA_METADATA, STB_LOCAL);
      sampler->elfSym()->section()->relocationSection()->addRelocation(R_AMDGPU_V1_INIT_SAMPLER, samplerInit, sampler->elfSym()->value() + destOffset, 0);
    }

    void AmdHsaCode::AddSamplerInitializer(Symbol* sampler, uint64_t destOffset,
        amdgpu_hsa_sampler_coord8_t coord,
        amdgpu_hsa_sampler_filter8_t filter,
        amdgpu_hsa_sampler_addressing8_t addressing)
    {
      amdgpu_hsa_sampler_descriptor_t desc;
      desc.size = (uint16_t) sizeof(amdgpu_hsa_sampler_descriptor_t);
      desc.kind = AMDGPU_HSA_METADATA_KIND_INIT_SAMP;
      desc.coord = coord;
      desc.filter = filter;
      desc.addressing = addressing;
      AddSamplerInitializer(sampler, destOffset, desc);
    }

    void AmdHsaCode::AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend)
    {
      uint32_t rtype = large ? R_AMDGPU_V1_64 : R_AMDGPU_V1_32_LOW;
      dest->elfSym()->section()->relocationSection()->addRelocation(rtype, addrOf->elfSym(), dest->elfSym()->value() + destOffset, addrAddend);
    }

    uint64_t AmdHsaCode::NextKernelCodeOffset() const
    {
      return HsaText()->nextDataOffset(256);
    }

    bool AmdHsaCode::AddKernelCode(KernelSymbol* sym, const void* code, size_t size)
    {
      assert(nullptr != sym);

      uint64_t offset = HsaText()->addData(code, size, 256);
      sym->setValue(offset);
      sym->setSize(size);
      return true;
    }

    Section* AmdHsaCode::AddEmptySection()
    {
      dataSections.push_back(nullptr); return nullptr;
    }

    Section* AmdHsaCode::AddCodeSection(Segment* segment)
    {
      if (nullptr == img) { return nullptr; }
      Section *sec = img->addSection(
        ".hsatext",
        SHT_PROGBITS,
        SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_AMDGPU_HSA_CODE | SHF_AMDGPU_HSA_AGENT,
        0,
        segment);
      dataSections.push_back(sec);
      hsatext = sec;
      return sec;
    }

    Section* AmdHsaCode::AddDataSection(const std::string &name,
                                        uint32_t type,
                                        uint64_t flags,
                                        Segment* segment)
    {
      if (nullptr == img) { return nullptr; }
      Section *sec = img->addSection(name, type, flags, 0, segment);
      dataSections.push_back(sec);
      return sec;
    }

    void AmdHsaCode::InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments)
    {
      InitHsaSegment(AmdHsaElfSectionSegment(section), combineSegments || !IsAmdHsaElfSectionROData(section));
    }

    Section* AmdHsaCode::HsaDataSection(amdgpu_hsa_elf_section_t sec, bool combineSegments)
    {
      if (!hsaSections[sec]) {
        bool writable = combineSegments || !IsAmdHsaElfSectionROData(sec);
        Segment* segment = HsaSegment(AmdHsaElfSectionSegment(sec), writable);
        assert(segment); // Expected to be init the segment via InitHsaSegment.
        Section* section;
        switch (sec) {
        case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
          section = AddDataSection(".hsarodata_global_program", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_GLOBAL, segment); break;
        case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
          section = AddDataSection(".hsarodata_global_agent", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break;
        case AMDGPU_HSA_RODATA_READONLY_AGENT:
          section = AddDataSection(".hsarodata_readonly_agent", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break;
        case AMDGPU_HSA_DATA_GLOBAL_PROGRAM:
          section = AddDataSection(".hsadata_global_program", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL, segment); break;
        case AMDGPU_HSA_DATA_GLOBAL_AGENT:
          section = AddDataSection(".hsadata_global_agent", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break;
        case AMDGPU_HSA_DATA_READONLY_AGENT:
          section = AddDataSection(".hsadata_readonly_agent", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break;
        case AMDGPU_HSA_BSS_GLOBAL_PROGRAM:
          section = AddDataSection(".hsabss_global_program", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL, segment); break;
        case AMDGPU_HSA_BSS_GLOBAL_AGENT:
          section = AddDataSection(".hsabss_global_agent", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break;
        case AMDGPU_HSA_BSS_READONLY_AGENT:
          section = AddDataSection(".hsabss_readonly_agent", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break;
        default:
          assert(false); return 0;
        }
        hsaSections[sec] = section;
      }
      return hsaSections[sec];
    }

    void AmdHsaCode::InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable)
    {
      if (!hsaSegments[segment][writable]) {
        uint32_t flags = PF_R;
        if (writable) { flags |= PF_W; }
        if (segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) { flags |= PF_X; }
        uint32_t type = PT_LOOS + segment;
        assert(segment < AMDGPU_HSA_SEGMENT_LAST);
        hsaSegments[segment][writable] = img->initSegment(type, flags);
      }
    }

    bool AmdHsaCode::AddHsaSegments()
    {
      if (!img->addSegments()) { return ElfImageError(); }
      return true;
    }

    Segment* AmdHsaCode::HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable)
    {
      return hsaSegments[segment][writable];
    }

    Symbol* AmdHsaCode::AddExecutableSymbol(const std::string &name,
                                            unsigned char type,
                                            unsigned char binding,
                                            unsigned char other,
                                            Section *section)
    {
      if (nullptr == img) { return nullptr; }
      if (!section) { section = HsaText(); }
      symbols.push_back(new KernelSymbol(img->symtab()->addSymbol(section, name, 0, 0, type, binding, other), nullptr));
      return symbols.back();
    }

    Symbol* AmdHsaCode::AddVariableSymbol(const std::string &name,
                                          unsigned char type,
                                          unsigned char binding,
                                          unsigned char other,
                                          Section *section,
                                          uint64_t value,
                                          uint64_t size)
    {
      if (nullptr == img) { return nullptr; }
      symbols.push_back(new VariableSymbol(img->symtab()->addSymbol(section, name, value, size, type, binding, other)));
      return symbols.back();
    }

    void AmdHsaCode::AddSectionSymbols()
    {
      if (nullptr == img) { return; }
      for (size_t i = 0; i < dataSections.size(); ++i) {
        if (dataSections[i] && dataSections[i]->flags() & SHF_ALLOC) {
          symbols.push_back(new VariableSymbol(img->symtab()->addSymbol(dataSections[i], "__hsa_section" + dataSections[i]->Name(), 0, 0, STT_SECTION, STB_LOCAL)));
        }
      }
    }

    Symbol* AmdHsaCode::GetSymbolByElfIndex(size_t index)
    {
      for (auto &s : symbols) {
        if (s && index == s->Index()) {
          return s;
        }
      }
      return nullptr;
    }

    Symbol* AmdHsaCode::FindSymbol(const std::string &n)
    {
      for (auto &s : symbols) {
        if (s && n == s->Name()) {
          return s;
        }
      }
      return nullptr;
    }

    void AmdHsaCode::AddData(amdgpu_hsa_elf_section_t s, const void* data, size_t size)
    {
//      getDataSection(s)->addData(data, size);
    }

    Section* AmdHsaCode::DebugInfo()
    {
      if (!debugInfo) {
        debugInfo = img->addSection(".debug_info", SHT_PROGBITS);
      }
      return debugInfo;
    }

    Section* AmdHsaCode::DebugLine()
    {
      if (!debugLine) {
        debugLine = img->addSection(".debug_line", SHT_PROGBITS);
      }
      return debugLine;
    }

    Section* AmdHsaCode::DebugAbbrev()
    {
      if (!debugAbbrev) {
        debugAbbrev = img->addSection(".debug_abbrev", SHT_PROGBITS);
      }
      return debugAbbrev;
    }

    Section* AmdHsaCode::AddHsaHlDebug(const std::string& name, const void* data, size_t size)
    {
      Section* section = img->addSection(name, SHT_PROGBITS, SHF_OS_NONCONFORMING);
      section->addData(data, size, 1);
      return section;
    }

    bool AmdHsaCode::PrintToFile(const std::string& filename)
    {
      std::ofstream out(filename);
      if (out.fail()) { return false; }
      Print(out);
      return out.fail();
    }

    void AmdHsaCode::Print(std::ostream& out)
    {
      PrintNotes(out);
      out << std::endl;
      PrintSegments(out);
      out << std::endl;
      PrintSections(out);
      out << std::endl;
      PrintSymbols(out);
      out << std::endl;
      PrintMachineCode(out);
      out << std::endl;
      out << "AMD HSA Code Object End" << std::endl;
    }

    void AmdHsaCode::PrintNotes(std::ostream& out)
    {
      {
        uint32_t major_version, minor_version;
        if (GetCodeObjectVersion(&major_version, &minor_version)) {
          out << "AMD HSA Code Object" << std::endl
              << "  Version " << major_version << "." << minor_version << std::endl;
        }
      }
      {
        uint32_t hsail_major, hsail_minor;
        hsa_profile_t profile;
        hsa_machine_model_t machine_model;
        hsa_default_float_rounding_mode_t rounding_mode;
        if (GetNoteHsail(&hsail_major, &hsail_minor, &profile, &machine_model, &rounding_mode)) {
          out << "HSAIL " << std::endl
              << "  Version: " << hsail_major << "." << hsail_minor << std::endl
              << "  Profile: " << HsaProfileToString(profile)
              << "  Machine model: " << HsaMachineModelToString(machine_model)
              << "  Default float rounding: " << HsaFloatRoundingModeToString(rounding_mode) << std::endl;
        }
      }
      {
        std::string vendor_name, architecture_name;
        uint32_t major_version, minor_version, stepping;
        if (GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) {
          out << "ISA" << std::endl
              << "  Vendor " << vendor_name
              << "  Arch " << architecture_name
              << "  Version " << major_version << ":" << minor_version << ":" << stepping << std::endl;
        }
      }
      {
        std::string producer_name, producer_options;
        uint32_t major, minor;
        if (GetNoteProducer(&major, &minor, producer_name)) {
          out << "Producer '" << producer_name << "' " << "Version " << major << ":" << minor << std::endl;
        }
      }
      {
        std::string producer_options;
        if (GetNoteProducerOptions(producer_options)) {
          out << "Producer options" << std::endl
              << "  '" << producer_options << "'" << std::endl;
        }
      }
    }

    void AmdHsaCode::PrintSegments(std::ostream& out)
    {
      out << "Segments (total " << DataSegmentCount() << "):" << std::endl;
      for (size_t i = 0; i < DataSegmentCount(); ++i) {
        PrintSegment(out, DataSegment(i));
      }
    }

    void AmdHsaCode::PrintSections(std::ostream& out)
    {
      out << "Data Sections (total " << DataSectionCount() << "):" << std::endl;
      for (size_t i = 0; i < DataSectionCount(); ++i) {
        PrintSection(out, DataSection(i));
      }
      out << std::endl;
      out << "Relocation Sections (total " << RelocationSectionCount() << "):" << std::endl;
      for (size_t i = 0; i < RelocationSectionCount(); ++i) {
        PrintSection(out, GetRelocationSection(i));
      }
    }

    void AmdHsaCode::PrintSymbols(std::ostream& out)
    {
      out << "Symbols (total " << SymbolCount() << "):" << std::endl;
      for (size_t i = 0; i < SymbolCount(); ++i) {
        PrintSymbol(out, GetSymbol(i));
      }
    }

    void AmdHsaCode::PrintMachineCode(std::ostream& out)
    {
      if (HasHsaText()) {
        out << std::dec;
        for (size_t i = 0; i < SymbolCount(); ++i) {
          Symbol* sym = GetSymbol(i);
          if (sym->IsKernelSymbol() && sym->IsDefinition()) {
            amd_kernel_code_t kernel_code;
            HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));
            out << "AMD Kernel Code for " << sym->Name() << ": " << std::endl << std::dec;
            PrintAmdKernelCode(out, &kernel_code);
            out << std::endl;
          }
        }

        std::vector<uint8_t> isa(HsaText()->size(), 0);
        HsaText()->getData(0, isa.data(), HsaText()->size());

        out << "Disassembly:" << std::endl;
        PrintDisassembly(out, isa.data(), HsaText()->size(), 0);
        out << std::endl << std::dec;
      } else {
        out << "Machine code section is not present" << std::endl << std::endl;
      }
    }

    void AmdHsaCode::PrintSegment(std::ostream& out, Segment* segment)
    {
      out << "  Segment (" << segment->getSegmentIndex() << ")" << std::endl;
      out << "    Type: " << AmdPTLoadToString(segment->type())
          << " "
          << "    Flags: " << "0x" << std::hex << std::setw(8) << std::setfill('0') << segment->flags() << std::dec
          << std::endl
          << "    Image Size: " << segment->imageSize()
          << " "
          << "    Memory Size: " << segment->memSize()
          << " "
          << "    Align: " << segment->align()
          << " "
          << "    VAddr: " << segment->vaddr()
          << std::endl;
      out << std::dec;
    }

    void AmdHsaCode::PrintSection(std::ostream& out, Section* section)
    {
      out << "  Section " << section->Name() << " (Index " << section->getSectionIndex() << ")" << std::endl;
      out << "    Type: " << section->type()
          << " "
          << "    Flags: " << "0x" << std::hex << std::setw(8) << std::setfill('0') << section->flags() << std::dec
          << std::endl
          << "    Size:  " << section->size()
          << " "
          << "    Address: " << section->addr()
          << " "
          << "    Align: " << section->addralign()
          << std::endl;
      out << std::dec;

      if (section->flags() & SHF_AMDGPU_HSA_CODE) {
        // Printed separately.
        return;
      }

      switch (section->type()) {
      case SHT_NOBITS:
        return;
      case SHT_RELA:
        PrintRelocationData(out, section->asRelocationSection());
        return;
      default:
        PrintRawData(out, section);
      }
    }

    void AmdHsaCode::PrintRawData(std::ostream& out, Section* section)
    {
      out << "    Data:" << std::endl;
      unsigned char *sdata = (unsigned char*)alloca(section->size());
      section->getData(0, sdata, section->size());
      PrintRawData(out, sdata, section->size());
    }

    void AmdHsaCode::PrintRawData(std::ostream& out, const unsigned char *data, size_t size)
    {
      out << std::hex << std::setfill('0');
      for (size_t i = 0; i < size; i += 16) {
        out << "      " << std::setw(7) << i << ":";

        for (size_t j = 0; j < 16; j += 1) {
          uint32_t value = i + j < size ? (uint32_t)data[i + j] : 0;
          if (j % 2 == 0) { out << ' '; }
          out << std::setw(2) << value;
        }
        out << "  ";

        for (size_t j = 0; i + j < size && j < 16; j += 1) {
          char value = (char)data[i + j] >= 32 && (char)data[i + j] <= 126 ? (char)data[i + j] : '.';
          out << value;
        }
        out << std::endl;
      }
      out << std::dec;
    }

    void AmdHsaCode::PrintRelocationData(std::ostream& out, RelocationSection* section)
    {
      if (section->targetSection()) {
        out << "    Relocation Entries for " << section->targetSection()->Name() << " Section (total " << section->relocationCount() << "):" << std::endl;
      } else {
        // Dynamic relocations do not have a target section, they work with
        // virtual addresses.
        out << "    Dynamic Relocation Entries (total " << section->relocationCount() << "):" << std::endl;
      }
      for (size_t i = 0; i < section->relocationCount(); ++i) {
        out << "      Relocation (Index " << i << "):" << std::endl;
        out << "        Type: " << section->relocation(i)->type() << std::endl;
        out << "        Symbol: " << section->relocation(i)->symbol()->name() << std::endl;
        out << "        Offset: " << section->relocation(i)->offset() << " Addend: " << section->relocation(i)->addend() << std::endl;
      }
      out << std::dec;
    }

    void AmdHsaCode::PrintSymbol(std::ostream& out, Symbol* sym)
    {
      out << "  Symbol " << sym->Name() << " (Index " << sym->Index() << "):" << std::endl;
      if (sym->IsKernelSymbol() || sym->IsVariableSymbol()) {
        out << "    Section: " << sym->GetSection()->Name() << " ";
        out << "    Section Offset: " << sym->SectionOffset() << std::endl;
        out << "    VAddr: " << sym->VAddr() << " ";
        out << "    Size: " << sym->Size() << " ";
        out << "    Alignment: " << sym->Alignment() << std::endl;
        out << "    Kind: " << HsaSymbolKindToString(sym->Kind()) << " ";
        out << "    Linkage: " << HsaSymbolLinkageToString(sym->Linkage()) << " ";
        out << "    Definition: " << (sym->IsDefinition() ? "TRUE" : "FALSE") << std::endl;
      }
      if (sym->IsVariableSymbol()) {
        out << "    Allocation: " << HsaVariableAllocationToString(sym->Allocation()) << " ";
        out << "    Segment: " << HsaVariableSegmentToString(sym->Segment()) << " ";
        out << "    Constant: " << (sym->IsConst() ? "TRUE" : "FALSE") << std::endl;
      }
      out << std::dec;
    }

    void AmdHsaCode::PrintMachineCode(std::ostream& out, KernelSymbol* sym)
    {
      assert(HsaText());
      amd_kernel_code_t kernel_code;
      HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));

      out << "AMD Kernel Code for " << sym->Name() << ": " << std::endl << std::dec;
      PrintAmdKernelCode(out, &kernel_code);
      out << std::endl;

      std::vector<uint8_t> isa(HsaText()->size(), 0);
      HsaText()->getData(0, isa.data(), HsaText()->size());
      uint64_t isa_offset = sym->SectionOffset() + kernel_code.kernel_code_entry_byte_offset;

      out << "Disassembly for " << sym->Name() << ": " << std::endl;
      PrintDisassembly(out, isa.data(), HsaText()->size(), isa_offset);
      out << std::endl << std::dec;
    }

    void AmdHsaCode::PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset)
    {
    #ifdef SP3_STATIC_LIB
      // Default asic is ci.
      std::string asic = "CI";
      std::string vendor_name, architecture_name;
      uint32_t major_version, minor_version, stepping;
      if (GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) {
        if (major_version == 7) {
          asic = "CI";
        } else if (major_version == 8) {
          asic = "VI";
        } else if (major_version == 9) {
          asic = "GFX9";
        } else if (major_version == 10) {
          asic = "GFX10";
        } else {
          assert(!"unknown compute capability");
        }
      }

      struct sp3_context *dis_state = sp3_new();
      sp3_setasic(dis_state, asic.c_str());

      sp3_vma *dis_vma = sp3_vm_new_ptr(0, size / 4, (const uint32_t*)isa);

      std::vector<uint32_t> comments(HsaText()->size() / 4, 0);
      for (size_t i = 0; i < SymbolCount(); ++i) {
        Symbol* sym = GetSymbol(i);
        if (sym->IsKernelSymbol() && sym->IsDefinition()) {
          comments[sym->SectionOffset() / 4] = COMMENT_AMD_KERNEL_CODE_T_BEGIN;
          comments[(sym->SectionOffset() + 252) / 4] = COMMENT_AMD_KERNEL_CODE_T_END;
          amd_kernel_code_t kernel_code;
          HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));
          comments[(kernel_code.kernel_code_entry_byte_offset + sym->SectionOffset()) / 4] = COMMENT_KERNEL_ISA_BEGIN;
        }
      }
      sp3_vma *comment_vma = sp3_vm_new_ptr(0, comments.size(), (const uint32_t*)comments.data());
      sp3_setcomments(dis_state, comment_vma, CommentTopCallBack, CommentRightCallBack, this);

      // When isa_offset == 0 disassembly full hsatext section.
      // Otherwise disassembly only from this offset till endpgm instruction.
      char *text = sp3_disasm(
        dis_state,
        dis_vma,
        isa_offset / 4,
        nullptr,
        SP3_SHTYPE_CS,
        nullptr,
        (unsigned)(size / 4),
        isa_offset == 0 ? SP3DIS_FORCEVALID | SP3DIS_COMMENTS : SP3DIS_COMMENTS);

      enum class IsaState {
        UNKNOWN,
        AMD_KERNEL_CODE_T_BEGIN,
        AMD_KERNEL_CODE_T,
        AMD_KERNEL_CODE_T_END,
        ISA_BEGIN,
        ISA,
        PADDING,
      };

      std::string line;
      char *text_ptr = text;
      IsaState state = IsaState::UNKNOWN;

      uint32_t offset = 0;
      uint32_t padding_end = 0;
      std::string padding;

      while (text_ptr && text_ptr[0] != '\0') {
        line.clear();
        while (text_ptr[0] != '\0' && text_ptr[0] != '\n') {
          line.push_back(text_ptr[0]);
          ++text_ptr;
        }
        ltrim(line);
        if (text_ptr[0] == '\n') {
          ++text_ptr;
        }
        switch (state) {
        case IsaState::UNKNOWN:
          assert(line != "// amd_kernel_code_t end");
          padding.clear();
          if (line == "// amd_kernel_code_t begin") {
            state = IsaState::AMD_KERNEL_CODE_T_BEGIN;
          } else if (line == "// isa begin") {
            state = IsaState::ISA_BEGIN;
          } else if (line == "end") {
            out << line << std::endl;
          } else if (line.find("v_cndmask_b32  v0, s0, v0, vcc") != std::string::npos) {
            padding += "  " + line + "\n";
            offset = ParseInstructionOffset(line);
            padding_end = ParseInstructionOffset(line);
            state = IsaState::PADDING;
          } else if (line != "shader (null)") {
            out << "  " << line << std::endl;
          }
          break;

        case IsaState::AMD_KERNEL_CODE_T_BEGIN:
          assert(line != "// amd_kernel_code_t begin");
          assert(line != "// amd_kernel_code_t end");
          assert(line != "// isa begin");
          assert(line != "end");
          padding.clear();
          offset = ParseInstructionOffset(line);
          state = IsaState::AMD_KERNEL_CODE_T;
          break;

        case IsaState::AMD_KERNEL_CODE_T:
          assert(line != "// amd_kernel_code_t begin");
          assert(line != "// isa begin");
          assert(line != "end");
          assert(padding.empty());
          if (line == "// amd_kernel_code_t end") {
            state = IsaState::AMD_KERNEL_CODE_T_END;
          }
          break;

        case IsaState::AMD_KERNEL_CODE_T_END:
          assert(line != "// amd_kernel_code_t begin");
          assert(line != "// amd_kernel_code_t end");
          assert(line != "// isa begin");
          assert(line != "end");
          assert(padding.empty());
          for (size_t i = 0; i < SymbolCount(); ++i) {
            Symbol* sym = GetSymbol(i);
            if (sym->IsKernelSymbol() && sym->IsDefinition() && sym->SectionOffset() == offset) {
              std::ostream::fmtflags flags = out.flags();
              char fill = out.fill();
              out << "  //" << std::endl;
              out << "  // amd_kernel_code_t for " << sym->Name()
                  << " (" << std::hex << std::setw(12) << std::setfill('0') << std::right << offset
                  << " - " << std::setw(12) << (offset + 256) << ')' << std::endl;
              out << "  //" << std::endl;
              out << std::setfill(fill);
              out.flags(flags);
              break;
            }
          }
          state = IsaState::UNKNOWN;
          break;

        case IsaState::ISA_BEGIN:
          assert(line != "// amd_kernel_code_t begin");
          assert(line != "// amd_kernel_code_t end");
          assert(line != "// isa begin");
          padding.clear();
          offset = ParseInstructionOffset(line);
          for (size_t i = 0; i < SymbolCount(); ++i) {
            Symbol* sym = GetSymbol(i);
            if (sym->IsKernelSymbol() && sym->IsDefinition()) {
              amd_kernel_code_t kernel_code;
              HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));
              if ((sym->SectionOffset() + kernel_code.kernel_code_entry_byte_offset) == offset) {
                out << "  //" << std::endl;
                out << "  // " << sym->Name() << ':' << std::endl;
                out << "  //" << std::endl;
                break;
              }
            }
          }
          if (line == "end") {
            out << line << std::endl;
            state = IsaState::UNKNOWN;
          } else {
            out << "  " << line << std::endl;
            state = IsaState::ISA;
          }
          break;

        case IsaState::ISA:
          assert(line != "// amd_kernel_code_t end");
          if (!padding.empty()) {
            out << padding;
            out.flush();
            padding.clear();
          }
          if (line == "// amd_kernel_code_t begin") {
            state = IsaState::AMD_KERNEL_CODE_T_BEGIN;
          } else if (line == "// isa begin") {
            state = IsaState::ISA_BEGIN;
          } else if (line == "end") {
            out << line << std::endl;
            state = IsaState::UNKNOWN;
          } else if (line.find("v_cndmask_b32  v0, s0, v0, vcc") != std::string::npos) {
            padding += "  " + line + "\n";
            offset = ParseInstructionOffset(line);
            padding_end = offset;
            state = IsaState::PADDING;
          } else {
            out << "  " << line << std::endl;
          }
          break;

        case IsaState::PADDING:
          assert(line != "// amd_kernel_code_t end");
          if (line.find("v_cndmask_b32  v0, s0, v0, vcc") != std::string::npos) {
            padding += "  " + line + "\n";
            padding_end = ParseInstructionOffset(line);
          } else if (line == "// amd_kernel_code_t begin" || line == "// isa begin" || line == "end") {
              padding.clear();
              std::ostream::fmtflags flags = out.flags();
              char fill = out.fill();
              out << "  //" << std::endl;
              out << "  // padding ("
                  << std::hex << std::setw(12) << std::setfill('0') << std::right << offset
                  << " - " << std::setw(12) << (padding_end + 4) << ')' << std::endl;
              out << "  //" << std::endl;
              out << std::setfill(fill);
              out.flags(flags);
              if (line == "// amd_kernel_code_t begin") {
                state = IsaState::AMD_KERNEL_CODE_T_BEGIN;
              } else if (line == "// isa begin") {
                state = IsaState::ISA_BEGIN;
              } else if (line == "end") {
                out << line << std::endl;
                state = IsaState::UNKNOWN;
              }
          } else {
            padding += "  " + line + "\n";
            state = IsaState::ISA;
          }
          break;

        default:
          assert(false);
          break;
        }
      }

      sp3_free(text);
      sp3_close(dis_state);
      sp3_vm_free(dis_vma);
      sp3_vm_free(comment_vma);
    #else
      PrintRawData(out, isa, size);
    #endif // SP3_STATIC_LIB
      out << std::dec;
    }

    std::string AmdHsaCode::MangleSymbolName(const std::string& module_name, const std::string& symbol_name)
    {
      if (module_name.empty()) {
        return symbol_name;
      } else {
        return module_name + "::" + symbol_name;
      }
    }

    bool AmdHsaCode::ElfImageError()
    {
      out << img->output();
      return false;
    }

      AmdHsaCode* AmdHsaCodeManager::FromHandle(hsa_code_object_t c)
      {
        CodeMap::iterator i = codeMap.find(c.handle);
        if (i == codeMap.end()) {
          AmdHsaCode* code = new AmdHsaCode();
          const void* buffer = reinterpret_cast<const void*>(c.handle);
          if (!code->InitAsBuffer(buffer, 0)) {
            delete code;
            return 0;
          }
          codeMap[c.handle] = code;
          return code;
        }
        return i->second;
      }

      bool AmdHsaCodeManager::Destroy(hsa_code_object_t c)
      {
        CodeMap::iterator i = codeMap.find(c.handle);
        if (i == codeMap.end()) {
          // Currently, we do not always create map entry for every code object buffer.
          return true;
        }
        delete i->second;
        codeMap.erase(i);
        return true;
      }

    bool AmdHsaCode::PullElfV2()
    {
      for (size_t i = 0; i < img->segmentCount(); ++i) {
        Segment* s = img->segment(i);
        if (s->type() == PT_LOAD) {
          dataSegments.push_back(s);
        }
      }
      for (size_t i = 0; i < img->sectionCount(); ++i) {
        Section* sec = img->section(i);
        if (!sec) { continue; }
        if ((sec->type() == SHT_PROGBITS || sec->type() == SHT_NOBITS) &&
            !(sec->flags() & SHF_EXECINSTR)) {
          dataSections.push_back(sec);
        } else if (sec->type() == SHT_RELA) {
          relocationSections.push_back(sec->asRelocationSection());
        }
        if (sec->Name() == ".text") {
          hsatext = sec;
        }
      }
      for (size_t i = 0; i < img->getSymbolTable()->symbolCount(); ++i) {
        amd::elf::Symbol* elfsym = img->getSymbolTable()->symbol(i);
        Symbol* sym = 0;
        switch (elfsym->type()) {
        case STT_AMDGPU_HSA_KERNEL: {
          amd::elf::Section* sec = elfsym->section();
          amd_kernel_code_t akc;
          if (!sec) {
            out << "Failed to find section for symbol " << elfsym->name() << std::endl;
            return false;
          }
          if (!(sec->flags() & (SHF_ALLOC | SHF_EXECINSTR))) {
            out << "Invalid code section for symbol " << elfsym->name() << std::endl;
            return false;
          }
          if (!sec->getData(elfsym->value() - sec->addr(), &akc, sizeof(amd_kernel_code_t))) {
            out << "Failed to get AMD Kernel Code for symbol " << elfsym->name() << std::endl;
            return false;
          }
          sym = new KernelSymbolV2(elfsym, &akc);
          break;
        }
        case STT_OBJECT:
        case STT_COMMON:
          sym = new VariableSymbolV2(elfsym);
          break;
        default:
          break; // Skip unknown symbols.
        }
        if (sym) { symbols.push_back(sym); }
      }

      return true;
    }

    KernelSymbolV2::KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc) :
      KernelSymbol(elfsym_, akc) { }
}   // namespace code
}   // namespace hsa
}   // namespace amd
}   // namespace rocr


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "amd_hsa_code_util.hpp"
#include <libelf.h>
#include <fstream>
#include <cstring>
#include <iomanip>
#include <cassert>
#include <algorithm>
#include <sstream>
#ifdef _WIN32
#include <Windows.h>
#include <io.h>
#include <process.h>
#else // _WIN32
#include <sys/types.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#endif // _WIN32
#include "inc/Brig.h"

namespace {
auto eq = " = ";

std::ostream& attr1(std::ostream& out)
{
  out << "  " << std::left << std::setw(60) << std::setfill(' ');
  return out;
}

std::ostream& attr2(std::ostream& out)
{
  out << "    " << std::left << std::setw(58) << std::setfill(' ');
  return out;
}
} // namespace anonymous

namespace rocr {
namespace amd {
namespace hsa {
namespace common {

bool IsAccessibleMemoryAddress(uint64_t address)
{
  if (0 == address) {
    return false;
  }
#if defined(_WIN32) || defined(_WIN64)
    MEMORY_BASIC_INFORMATION memory_info;
    if (!VirtualQuery(reinterpret_cast<void*>(address), &memory_info, sizeof(memory_info))) {
      return false;
    }
    int32_t is_accessible = ((memory_info.Protect & PAGE_READONLY) ||
                             (memory_info.Protect & PAGE_READWRITE) ||
                             (memory_info.Protect & PAGE_WRITECOPY) ||
                             (memory_info.Protect & PAGE_EXECUTE_READ) ||
                             (memory_info.Protect & PAGE_EXECUTE_READWRITE) ||
                             (memory_info.Protect & PAGE_EXECUTE_WRITECOPY));
    if (memory_info.Protect & PAGE_GUARD) {
      is_accessible = 0;
    }
    if (memory_info.Protect & PAGE_NOACCESS) {
      is_accessible = 0;
    }
    return is_accessible > 0;
#else
  int32_t random_fd = 0;
  ssize_t bytes_written = 0;
  if (-1 == (random_fd = open("/dev/random", O_WRONLY))) {
    return true;  // Skip check if /dev/random is not available.
  }
  bytes_written = write(random_fd, (void*)address, 1);
  if (-1 == close(random_fd)) {
    return false;
  }
  return bytes_written == 1;
#endif // _WIN32 || _WIN64
}

}   //  namespace common

std::string HsaSymbolKindToString(hsa_symbol_kind_t kind)
{
  switch (kind) {
  case HSA_SYMBOL_KIND_VARIABLE: return "VARIABLE";
  case HSA_SYMBOL_KIND_INDIRECT_FUNCTION: return "INDIRECT_FUNCTION";
  case HSA_SYMBOL_KIND_KERNEL: return "KERNEL";
  default: return "UNKNOWN";
  }
}

std::string HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage)
{
  switch (linkage) {
  case HSA_SYMBOL_LINKAGE_MODULE: return "MODULE";
  case HSA_SYMBOL_LINKAGE_PROGRAM: return "PROGRAM";
  default: return "UNKNOWN";
  }
}

std::string HsaVariableAllocationToString(hsa_variable_allocation_t allocation)
{
  switch (allocation) {
  case HSA_VARIABLE_ALLOCATION_AGENT: return "AGENT";
  case HSA_VARIABLE_ALLOCATION_PROGRAM: return "PROGRAM";
  default: return "UNKNOWN";
  }
}

std::string HsaVariableSegmentToString(hsa_variable_segment_t segment)
{
  switch (segment) {
  case HSA_VARIABLE_SEGMENT_GLOBAL: return "GLOBAL";
  case HSA_VARIABLE_SEGMENT_READONLY: return "READONLY";
  default: return "UNKNOWN";
  }
}

std::string HsaProfileToString(hsa_profile_t profile)
{
  switch (profile) {
  case HSA_PROFILE_BASE: return "BASE";
  case HSA_PROFILE_FULL: return "FULL";
  default: return "UNKNOWN";
  }
}

std::string HsaMachineModelToString(hsa_machine_model_t model)
{
  switch (model) {
  case HSA_MACHINE_MODEL_SMALL: return "SMALL";
  case HSA_MACHINE_MODEL_LARGE: return "LARGE";
  default: return "UNKNOWN";
  }
}

std::string HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode)
{
  switch (mode) {
  case HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT: return "DEFAULT";
  case HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO: return "ZERO";
  case HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR: return "NEAR";
  default: return "UNKNOWN";
  }
}

std::string AmdMachineKindToString(amd_machine_kind16_t machine)
{
  switch (machine) {
  case AMD_MACHINE_KIND_UNDEFINED: return "UNDEFINED";
  case AMD_MACHINE_KIND_AMDGPU: return "AMDGPU";
  default: return "UNKNOWN";
  }
}

std::string AmdFloatRoundModeToString(amd_float_round_mode_t round_mode)
{
  switch (round_mode) {
  case AMD_FLOAT_ROUND_MODE_NEAREST_EVEN: return "NEAREST_EVEN";
  case AMD_FLOAT_ROUND_MODE_PLUS_INFINITY: return "PLUS_INFINITY";
  case AMD_FLOAT_ROUND_MODE_MINUS_INFINITY: return "MINUS_INFINITY";
  case AMD_FLOAT_ROUND_MODE_ZERO: return "ZERO";
  default: return "UNKNOWN";
  }
}

std::string AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode)
{
  switch (denorm_mode) {
  case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT: return "FLUSH_SOURCE_OUTPUT";
  case AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT: return "FLUSH_OUTPUT";
  case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE: return "FLUSH_SOURCE";
  case AMD_FLOAT_DENORM_MODE_NO_FLUSH: return "FLUSH_NONE";
  default: return "UNKNOWN";
  }
}

std::string AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id)
{
  switch (system_vgpr_workitem_id) {
  case AMD_SYSTEM_VGPR_WORKITEM_ID_X: return "X";
  case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y: return "X, Y";
  case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z: return "X, Y, Z";
  default: return "UNKNOWN";
  }
}

std::string AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size)
{
  switch (element_byte_size) {
  case AMD_ELEMENT_BYTE_SIZE_2: return "WORD (2 bytes)";
  case AMD_ELEMENT_BYTE_SIZE_4: return "DWORD (4 bytes)";
  case AMD_ELEMENT_BYTE_SIZE_8: return "QWORD (8 bytes)";
  case AMD_ELEMENT_BYTE_SIZE_16: return "16 bytes";
  default: return "UNKNOWN";
  }
}

std::string AmdExceptionKindToString(amd_exception_kind16_t exceptions)
{
  std::string e;
  if (exceptions & AMD_EXCEPTION_KIND_INVALID_OPERATION) {
    e += ", INVALID_OPERATON";
    exceptions &= ~AMD_EXCEPTION_KIND_INVALID_OPERATION;
  }
  if (exceptions & AMD_EXCEPTION_KIND_DIVISION_BY_ZERO) {
    e += ", DIVISION_BY_ZERO";
    exceptions &= ~AMD_EXCEPTION_KIND_DIVISION_BY_ZERO;
  }
  if (exceptions & AMD_EXCEPTION_KIND_OVERFLOW) {
    e += ", OVERFLOW";
    exceptions &= ~AMD_EXCEPTION_KIND_OVERFLOW;
  }
  if (exceptions & AMD_EXCEPTION_KIND_UNDERFLOW) {
    e += ", UNDERFLOW";
    exceptions &= ~AMD_EXCEPTION_KIND_UNDERFLOW;
  }
  if (exceptions & AMD_EXCEPTION_KIND_INEXACT) {
    e += ", INEXACT";
    exceptions &= ~AMD_EXCEPTION_KIND_INEXACT;
  }
  if (exceptions) {
    e += ", UNKNOWN";
  }
  if (!e.empty()) {
    e = "[" + e.erase(0, 2) + "]";
  }
  return e;
}

std::string AmdPowerTwoToString(amd_powertwo8_t p)
{
  return std::to_string(1 << (unsigned) p);
}

amdgpu_hsa_elf_segment_t AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec)
{
  switch (sec) {
  case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
  case AMDGPU_HSA_DATA_GLOBAL_PROGRAM:
  case AMDGPU_HSA_BSS_GLOBAL_PROGRAM:
    return AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM;
  case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
  case AMDGPU_HSA_DATA_GLOBAL_AGENT:
  case AMDGPU_HSA_BSS_GLOBAL_AGENT:
    return AMDGPU_HSA_SEGMENT_GLOBAL_AGENT;
  case AMDGPU_HSA_RODATA_READONLY_AGENT:
  case AMDGPU_HSA_DATA_READONLY_AGENT:
  case AMDGPU_HSA_BSS_READONLY_AGENT:
    return AMDGPU_HSA_SEGMENT_READONLY_AGENT;
  default:
    assert(false); return AMDGPU_HSA_SEGMENT_LAST;
  }
}

bool IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec)
{
  switch (sec) {
  case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
  case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
  case AMDGPU_HSA_RODATA_READONLY_AGENT:
  default:
    return false;
  }
}

std::string AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg)
{
  switch (seg) {
  case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: return "GLOBAL_PROGRAM";
  case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT: return "GLOBAL_AGENT";
  case AMDGPU_HSA_SEGMENT_READONLY_AGENT: return "READONLY_AGENT";
  case AMDGPU_HSA_SEGMENT_CODE_AGENT: return "CODE_AGENT";
  default: return "UNKNOWN";
  }
}

std::string AmdPTLoadToString(uint64_t type)
{
  if (PT_LOOS <= type && type < PT_LOOS + AMDGPU_HSA_SEGMENT_LAST) {
    return AmdHsaElfSegmentToString((amdgpu_hsa_elf_segment_t) (type - PT_LOOS));
  } else {
    return "UNKNOWN (" + std::to_string(type) + ")";
  }
}

void PrintAmdKernelCode(std::ostream& out, const amd_kernel_code_t *akc)
{
  uint32_t is_debug_enabled = AMD_HSA_BITS_GET(akc->kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED);

  out << attr1 << "amd_kernel_code_version_major" << eq
      << akc->amd_kernel_code_version_major
      << std::endl;
  out << attr1 << "amd_kernel_code_version_minor" << eq
      << akc->amd_kernel_code_version_minor
      << std::endl;
  out << attr1 << "amd_machine_kind" << eq
      << AmdMachineKindToString(akc->amd_machine_kind)
      << std::endl;
  out << attr1 << "amd_machine_version_major" << eq
      << (uint32_t)akc->amd_machine_version_major
      << std::endl;
  out << attr1 << "amd_machine_version_minor" << eq
      << (uint32_t)akc->amd_machine_version_minor
      << std::endl;
  out << attr1 << "amd_machine_version_stepping" << eq
      << (uint32_t)akc->amd_machine_version_stepping
      << std::endl;
  out << attr1 << "kernel_code_entry_byte_offset" << eq
      << akc->kernel_code_entry_byte_offset
      << std::endl;
  if (akc->kernel_code_prefetch_byte_offset) {
    out << attr1 << "kernel_code_prefetch_byte_offset" << eq
        << akc->kernel_code_prefetch_byte_offset
        << std::endl;
  }
  if (akc->kernel_code_prefetch_byte_size) {
    out << attr1 << "kernel_code_prefetch_byte_size" << eq
        << akc->kernel_code_prefetch_byte_size
        << std::endl;
  }
  out << attr1 << "max_scratch_backing_memory_byte_size" << eq
      << akc->max_scratch_backing_memory_byte_size
      << std::endl;
  PrintAmdComputePgmRsrcOne(out, akc->compute_pgm_rsrc1);
  PrintAmdComputePgmRsrcTwo(out, akc->compute_pgm_rsrc2);
  PrintAmdKernelCodeProperties(out, akc->kernel_code_properties);
  if (akc->workitem_private_segment_byte_size) {
    out << attr1 << "workitem_private_segment_byte_size" << eq
        << akc->workitem_private_segment_byte_size
        << std::endl;
  }
  if (akc->workgroup_group_segment_byte_size) {
    out << attr1 << "workgroup_group_segment_byte_size" << eq
        << akc->workgroup_group_segment_byte_size
        << std::endl;
  }
  if (akc->gds_segment_byte_size) {
    out << attr1 << "gds_segment_byte_size" << eq
        << akc->gds_segment_byte_size
        << std::endl;
  }
  if (akc->kernarg_segment_byte_size) {
    out << attr1 << "kernarg_segment_byte_size" << eq
        << akc->kernarg_segment_byte_size
        << std::endl;
  }
  if (akc->workgroup_fbarrier_count) {
    out << attr1 << "workgroup_fbarrier_count" << eq
        << akc->workgroup_fbarrier_count
        << std::endl;
  }
  out << attr1 << "wavefront_sgpr_count" << eq
      << (uint32_t)akc->wavefront_sgpr_count
      << std::endl;
  out << attr1 << "workitem_vgpr_count" << eq
      << (uint32_t)akc->workitem_vgpr_count
      << std::endl;
  if (akc->reserved_vgpr_count > 0) {
    out << attr1 << "reserved_vgpr_first" << eq
        << (uint32_t)akc->reserved_vgpr_first
        << std::endl;
    out << attr1 << "reserved_vgpr_count" << eq
        << (uint32_t)akc->reserved_vgpr_count
        << std::endl;
  }
  if (akc->reserved_sgpr_count > 0) {
    out << attr1 << "reserved_sgpr_first" << eq
        << (uint32_t)akc->reserved_sgpr_first
        << std::endl;
    out << attr1 << "reserved_sgpr_count" << eq
        << (uint32_t)akc->reserved_sgpr_count
        << std::endl;
  }
  if (is_debug_enabled && (akc->debug_wavefront_private_segment_offset_sgpr != uint16_t(-1))) {
    out << attr1 << "debug_wavefront_private_segment_offset_sgpr" << eq
        << (uint32_t)akc->debug_wavefront_private_segment_offset_sgpr
        << std::endl;
  }
  if (is_debug_enabled && (akc->debug_private_segment_buffer_sgpr != uint16_t(-1))) {
    out << attr1 << "debug_private_segment_buffer_sgpr" << eq
        << (uint32_t)akc->debug_private_segment_buffer_sgpr
        << ":"
        << (uint32_t)(akc->debug_private_segment_buffer_sgpr + 3)
        << std::endl;
  }
  if (akc->kernarg_segment_alignment) {
    out << attr1 << "kernarg_segment_alignment" << eq
        << AmdPowerTwoToString(akc->kernarg_segment_alignment)
        << " (" << (uint32_t) akc->kernarg_segment_alignment << ")"
        << std::endl;
  }
  if (akc->group_segment_alignment) {
    out << attr1 << "group_segment_alignment" << eq
        << AmdPowerTwoToString(akc->group_segment_alignment)
        << " (" << (uint32_t) akc->group_segment_alignment << ")"
        << std::endl;
  }
  if (akc->private_segment_alignment) {
    out << attr1 << "private_segment_alignment" << eq
        << AmdPowerTwoToString(akc->private_segment_alignment)
        << " (" << (uint32_t) akc->private_segment_alignment << ")"
        << std::endl;
  }
  out << attr1 << "wavefront_size" << eq
      << AmdPowerTwoToString(akc->wavefront_size)
      << " (" << (uint32_t) akc->wavefront_size << ")"
      << std::endl;
  PrintAmdControlDirectives(out, akc->control_directives);
}

void PrintAmdComputePgmRsrcOne(std::ostream& out, amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1)
{
  out << "  COMPUTE_PGM_RSRC1 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc1 << "):" << std::endl;
  out << std::dec;

  uint32_t granulated_workitem_vgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT);
  out << attr2 << "granulated_workitem_vgpr_count" << eq
      << granulated_workitem_vgpr_count
      << std::endl;
  uint32_t granulated_wavefront_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT);
  out << attr2 << "granulated_wavefront_sgpr_count" << eq
      << granulated_wavefront_sgpr_count
      << std::endl;
  uint32_t priority = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY);
  out << attr2 << "priority" << eq
      << priority
      << std::endl;
  uint32_t float_round_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32);
  out << attr2 << "float_round_mode_32" << eq
      << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_32)
      << std::endl;
  uint32_t float_round_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64);
  out << attr2 << "float_round_mode_16_64" << eq
      << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_16_64)
      << std::endl;
  uint32_t float_denorm_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32);
  out << attr2 << "float_denorm_mode_32" << eq
      << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_32)
      << std::endl;
  uint32_t float_denorm_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64);
  out << attr2 << "float_denorm_mode_16_64" << eq
      << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_16_64)
      << std::endl;
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIV)) {
    out << attr2 << "priv" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP)) {
    out << attr2 << "enable_dx10_clamp" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE)) {
    out << attr2 << "debug_mode" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE)) {
    out << attr2 << "enable_ieee_mode" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_BULKY)) {
    out << attr2 << "bulky" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER)) {
    out << attr2 << "cdbg_user" << eq << "TRUE"
        << std::endl;
  }
}

void PrintAmdComputePgmRsrcTwo(std::ostream& out, amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2)
{
  out << "  COMPUTE_PGM_RSRC2 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc2 << "):" << std::endl;
  out << std::dec;

  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET)) {
    out << attr2 << "enable_sgpr_private_segment_wave_byte_offset" << eq << "TRUE"
        << std::endl;
  }
  uint32_t user_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT);
  out << attr2 << "user_sgpr_count" << eq
      << user_sgpr_count
      << std::endl;
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER)) {
    out << attr2 << "enable_trap_handler" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X)) {
    out << attr2 << "enable_sgpr_workgroup_id_x" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y)) {
    out << attr2 << "enable_sgpr_workgroup_id_y" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z)) {
    out << attr2 << "enable_sgpr_workgroup_id_z" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO)) {
    out << attr2 << "enable_sgpr_workgroup_info" << eq << "TRUE"
        << std::endl;
  }
  uint32_t enable_vgpr_workitem_id = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID);
  out << attr2 << "enable_vgpr_workitem_id" << eq
      << AmdSystemVgprWorkitemIdToString((amd_system_vgpr_workitem_id_t)enable_vgpr_workitem_id)
      << std::endl;
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH)) {
    out << attr2 << "enable_exception_address_watch" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION)) {
    out << attr2 << "enable_exception_memory_violation" << eq << "TRUE"
        << std::endl;
  }
  uint32_t granulated_lds_size = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE);
  out << attr2 << "granulated_lds_size" << eq
      << granulated_lds_size
      << std::endl;
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION)) {
    out << attr2 << "enable_exception_ieee_754_fp_invalid_operation" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE)) {
    out << attr2 << "enable_exception_fp_denormal_source" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO)) {
    out << attr2 << "enable_exception_ieee_754_fp_division_by_zero" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW)) {
    out << attr2 << "enable_exception_ieee_754_fp_overflow" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW)) {
    out << attr2 << "enable_exception_ieee_754_fp_underflow" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT)) {
    out << attr2 << "enable_exception_ieee_754_fp_inexact" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO)) {
    out << attr2 << "enable_exception_int_division_by_zero" << eq << "TRUE"
        << std::endl;
  }
}

void PrintAmdKernelCodeProperties(std::ostream& out, amd_kernel_code_properties32_t kernel_code_properties)
{
  out << "  KERNEL_CODE_PROPERTIES (0x" << std::hex << std::setw(8) << std::setfill('0') << kernel_code_properties << "):" << std::endl;
  out << std::dec;

  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER)) {
    out << attr2 << "enable_sgpr_private_segment_buffer" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR)) {
    out << attr2 << "enable_sgpr_dispatch_ptr" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR)) {
    out << attr2 << "enable_sgpr_queue_ptr" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR)) {
    out << attr2 << "enable_sgpr_kernarg_segment_ptr" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID)) {
    out << attr2 << "enable_sgpr_dispatch_id" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT)) {
    out << attr2 << "enable_sgpr_flat_scratch_init" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)) {
    out << attr2 << "enable_sgpr_private_segment_size" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X)) {
    out << attr2 << "enable_sgpr_grid_workgroup_count_x" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y)) {
    out << attr2 << "enable_sgpr_grid_workgroup_count_y" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z)) {
    out << attr2 << "enable_sgpr_grid_workgroup_count_z" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS)) {
    out << attr2 << "enable_ordered_append_gds" << eq << "TRUE"
        << std::endl;
  }
  uint32_t private_element_size = AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE);
  out << attr2 << "private_element_size" << eq
      << AmdElementByteSizeToString((amd_element_byte_size_t)private_element_size)
      << std::endl;
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_PTR64)) {
    out << attr2 << "is_ptr64" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK)) {
    out << attr2 << "is_dynamic_callstack" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED)) {
    out << attr2 << "is_debug_enabled" << eq << "TRUE"
        << std::endl;
  }
  if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED)) {
    out << attr2 << "is_xnack_enabled" << eq << "TRUE"
        << std::endl;
  }
}

void PrintAmdControlDirectives(std::ostream& out, const amd_control_directives_t &control_directives)
{
  if (!control_directives.enabled_control_directives) {
    return;
  }

  out << "  CONTROL_DIRECTIVES:" << std::endl;

  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS) {
    out << attr2 << "enable_break_exceptions" << eq
        << AmdExceptionKindToString(control_directives.enable_break_exceptions).c_str()
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS) {
    out << attr2 << "enable_detect_exceptions" << eq
        << AmdExceptionKindToString(control_directives.enable_detect_exceptions).c_str()
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE) {
    out << attr2 << "max_dynamic_group_size" << eq
        << control_directives.max_dynamic_group_size
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE) {
    out << attr2 << "max_flat_grid_size" << eq
        << control_directives.max_flat_grid_size
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE) {
    out << attr2 << "max_flat_workgroup_size" << eq
        << control_directives.max_flat_workgroup_size
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM) {
    out << attr2 << "required_dim" << eq
        << (uint32_t)control_directives.required_dim
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE) {
    out << attr2 << "required_grid_size" << eq
        << "("
        << control_directives.required_grid_size[0]
        << ", "
        << control_directives.required_grid_size[1]
        << ", "
        << control_directives.required_grid_size[2]
        << ")"
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE) {
    out << attr2 << "required_workgroup_size" << eq
        << "("
        << control_directives.required_workgroup_size[0]
        << ", "
        << control_directives.required_workgroup_size[1]
        << ", "
        << control_directives.required_workgroup_size[2]
        << ")"
        << std::endl;
  }
  if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS) {
    out << attr2 << "require_no_partial_workgroups" << eq << "TRUE"
        << std::endl;
  }
}

namespace code_options {

  std::ostream& space(std::ostream& out)
  {
    if (out.tellp()) { out << " "; }
    return out;
  }

  std::ostream& operator<<(std::ostream& out, const control_directive& d)
  {
    out << space <<
      "-hsa_control_directive:" << d.name << "=";
    return out;
  }

  const char *BrigExceptionString(BrigExceptions32_t e)
  {
    switch (e) {
    case BRIG_EXCEPTIONS_INVALID_OPERATION: return "INVALID_OPERATION";
    case BRIG_EXCEPTIONS_DIVIDE_BY_ZERO: return "DIVIDE_BY_ZERO";
    case BRIG_EXCEPTIONS_OVERFLOW: return "OVERFLOW";
    case BRIG_EXCEPTIONS_INEXACT: return "INEXACT";
    default:
      assert(false); return "<unknown_BRIG_exception>";
    }
  }

  std::ostream& operator<<(std::ostream& out, const exceptions_mask& e)
  {
    bool first = true;
    for (BrigExceptions32_t be = BRIG_EXCEPTIONS_INVALID_OPERATION; be < BRIG_EXCEPTIONS_FIRST_USER_DEFINED; ++be) {
      if (e.mask & be) {
        if (first) { first = false; } else { out << ","; }
        out << BrigExceptionString(be);
      }
    }
    return out;
  }

  std::ostream& operator<<(std::ostream& out, const control_directives& cd)
  {
    const hsa_ext_control_directives_t& d = cd.d;
    uint64_t mask = d.control_directives_mask;
    if (!mask) { return out; }

    if (mask & BRIG_CONTROL_ENABLEBREAKEXCEPTIONS) {
      out <<
        control_directive("ENABLEBREAKEXCEPTIONS") <<
        exceptions_mask(d.break_exceptions_mask);
    }
    if (mask & BRIG_CONTROL_ENABLEDETECTEXCEPTIONS) {
      out <<
        control_directive("ENABLEDETECTEXCEPTIONS") <<
        exceptions_mask(d.detect_exceptions_mask);
    }
    if (mask & BRIG_CONTROL_MAXDYNAMICGROUPSIZE) {
      out <<
        control_directive("MAXDYNAMICGROUPSIZE") <<
        d.max_dynamic_group_size;
    }
    if (mask & BRIG_CONTROL_MAXFLATGRIDSIZE) {
      out <<
        control_directive("MAXFLATGRIDSIZE") <<
        d.max_flat_grid_size;
    }
    if (mask & BRIG_CONTROL_MAXFLATWORKGROUPSIZE) {
      out <<
        control_directive("MAXFLATWORKGROUPSIZE") <<
        d.max_flat_workgroup_size;
    }
    if (mask & BRIG_CONTROL_REQUIREDDIM) {
      out <<
        control_directive("REQUIREDDIM") <<
        d.required_dim;
    }
    if (mask & BRIG_CONTROL_REQUIREDGRIDSIZE) {
      out <<
        control_directive("REQUIREDGRIDSIZE") <<
        d.required_grid_size[0] << "," <<
        d.required_grid_size[1] << "," <<
        d.required_grid_size[2];
    }
    if (mask & BRIG_CONTROL_REQUIREDWORKGROUPSIZE) {
      out <<
        control_directive("REQUIREDWORKGROUPSIZE") <<
        d.required_workgroup_size.x << "," <<
        d.required_workgroup_size.y << "," <<
        d.required_workgroup_size.z;
    }
    return out;
  }
}

const char* hsaerr2str(hsa_status_t status) {
  switch ((unsigned) status) {
    case HSA_STATUS_SUCCESS:
      return
          "HSA_STATUS_SUCCESS: The function has been executed successfully.";
    case HSA_STATUS_INFO_BREAK:
      return
          "HSA_STATUS_INFO_BREAK: A traversal over a list of "
          "elements has been interrupted by the application before "
          "completing.";
    case HSA_STATUS_ERROR:
      return "HSA_STATUS_ERROR: A generic error has occurred.";
    case HSA_STATUS_ERROR_INVALID_ARGUMENT:
      return
          "HSA_STATUS_ERROR_INVALID_ARGUMENT: One of the actual "
          "arguments does not meet a precondition stated in the "
          "documentation of the corresponding formal argument.";
    case HSA_STATUS_ERROR_INVALID_QUEUE_CREATION:
      return
          "HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: The requested "
          "queue creation is not valid.";
    case HSA_STATUS_ERROR_INVALID_ALLOCATION:
      return
          "HSA_STATUS_ERROR_INVALID_ALLOCATION: The requested "
          "allocation is not valid.";
    case HSA_STATUS_ERROR_INVALID_AGENT:
      return
          "HSA_STATUS_ERROR_INVALID_AGENT: The agent is invalid.";
    case HSA_STATUS_ERROR_INVALID_REGION:
      return
          "HSA_STATUS_ERROR_INVALID_REGION: The memory region is invalid.";
    case HSA_STATUS_ERROR_INVALID_SIGNAL:
      return
          "HSA_STATUS_ERROR_INVALID_SIGNAL: The signal is invalid.";
    case HSA_STATUS_ERROR_INVALID_QUEUE:
      return
          "HSA_STATUS_ERROR_INVALID_QUEUE: The queue is invalid.";
    case HSA_STATUS_ERROR_OUT_OF_RESOURCES:
      return
          "HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to "
          "allocate the necessary resources. This error may also "
          "occur when the core runtime library needs to spawn "
          "threads or create internal OS-specific events.";
    case HSA_STATUS_ERROR_INVALID_PACKET_FORMAT:
      return
          "HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: The AQL packet "
          "is malformed.";
    case HSA_STATUS_ERROR_RESOURCE_FREE:
      return
          "HSA_STATUS_ERROR_RESOURCE_FREE: An error has been "
          "detected while releasing a resource.";
    case HSA_STATUS_ERROR_NOT_INITIALIZED:
      return
          "HSA_STATUS_ERROR_NOT_INITIALIZED: An API other than "
          "hsa_init has been invoked while the reference count of "
          "the HSA runtime is zero.";
    case HSA_STATUS_ERROR_REFCOUNT_OVERFLOW:
      return
          "HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: The maximum "
          "reference count for the object has been reached.";
    case HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS:
      return
          "HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: The arguments passed to "
          "a functions are not compatible.";
    case HSA_STATUS_ERROR_INVALID_INDEX:
      return "The index is invalid.";
    case HSA_STATUS_ERROR_INVALID_ISA:
      return "The instruction set architecture is invalid.";
    case HSA_STATUS_ERROR_INVALID_CODE_OBJECT:
      return "The code object is invalid.";
    case HSA_STATUS_ERROR_INVALID_EXECUTABLE:
      return "The executable is invalid.";
    case HSA_STATUS_ERROR_FROZEN_EXECUTABLE:
      return "The executable is frozen.";
    case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME:
      return "There is no symbol with the given name.";
    case HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED:
      return "The variable is already defined.";
    case HSA_STATUS_ERROR_VARIABLE_UNDEFINED:
      return "The variable is undefined.";
    case HSA_EXT_STATUS_ERROR_INVALID_PROGRAM:
      return
          "HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: Invalid program";
    case HSA_EXT_STATUS_ERROR_INVALID_MODULE:
      return "HSA_EXT_STATUS_ERROR_INVALID_MODULE: Invalid module";
    case HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE:
      return
          "HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: Incompatible module";
    case HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED:
      return
          "HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: Module already "
          "included";
    case HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH:
      return
          "HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: Symbol mismatch";
    case HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED:
      return
          "HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: Finalization failed";
    case HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH:
      return
          "HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: Directive mismatch";
    default:
      return
          "Unknown HSA status";
  }
}

bool ReadFileIntoBuffer(const std::string& filename, std::vector<char>& buffer)
{
  std::ifstream file(filename, std::ios::binary);
  if (!file) { return false; }
  file.seekg(0, std::ios::end);
  std::streamsize size = file.tellg();
  file.seekg(0, std::ios::beg);

  buffer.resize((size_t) size);
  if (!file.read(buffer.data(), size)) { return false; }
  return true;
}

#ifndef _WIN32
#define _tempnam tempnam
#define _close close
#define _getpid getpid
#define _open open
#endif // _WIN32

int OpenTempFile(const char* prefix)
{
  unsigned c = 0;
  std::string tname = prefix;
  tname += "_";
  tname += std::to_string(_getpid());
  tname += "_";
  while (c++ < 20) { // Loop because several threads can generate same filename.
#ifdef _WIN32
    char dir[MAX_PATH+1];
    if (!GetTempPath(sizeof(dir), dir)) { return -1; }
    char *name = _tempnam(dir, tname.c_str());
    if (!name) { return -1; }
    HANDLE h = CreateFile(
      name,
      GENERIC_READ | GENERIC_WRITE,
      0, // No sharing
      NULL,
      CREATE_NEW,
      FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_DELETE_ON_CLOSE,
      NULL);
    free(name);
    if (h == INVALID_HANDLE_VALUE) { continue; }
    return _open_osfhandle((intptr_t)h, 0);
#else // _WIN32
    tname += "XXXXXX";
    int d = mkstemp((char*)tname.c_str());
    if (d < 0) { continue; }
    if (unlink(tname.c_str()) < 0) { _close(d); return -1; }
    return d;
#endif // _WIN32
  }
  return -1;
}

void CloseTempFile(int fd)
{
  _close(fd);
}

const char * CommentTopCallBack(void *ctx, int type) {
  static const char* amd_kernel_code_t_begin = "amd_kernel_code_t begin";
  static const char* amd_kernel_code_t_end = "amd_kernel_code_t end";
  static const char* isa_begin = "isa begin";
  switch(type) {
  case COMMENT_AMD_KERNEL_CODE_T_BEGIN:
    return amd_kernel_code_t_begin;
  case COMMENT_AMD_KERNEL_CODE_T_END:
    return amd_kernel_code_t_end;
  case COMMENT_KERNEL_ISA_BEGIN:
    return isa_begin;
  default:
    assert(false);
    return "";
  }
}
const char * CommentRightCallBack(void *ctx, int type) {
  return nullptr;
}

uint32_t ParseInstructionOffset(const std::string& instruction) {
  // instruction format: opcode op1, op2 ... // offset: binopcode
  std::string::size_type n = instruction.find("//");
  assert(n != std::string::npos);
  std::string comment = instruction.substr(n);
  n = comment.find(':');
  assert(n != std::string::npos);
  comment.erase(n);
  assert(comment.size() > 3);
  comment.erase(0, 3);
  return strtoul(comment.c_str(), nullptr, 16);
}

bool IsNotSpace(char c) {
  return !isspace(static_cast<int>(c));
}

void ltrim(std::string &str) {
  str.erase(str.begin(), std::find_if(str.begin(), str.end(), IsNotSpace));
}

std::string DumpFileName(const std::string& dir, const char* prefix, const char* ext, unsigned n, unsigned i)
{
  std::ostringstream ss;
  if (!dir.empty()) {
    ss << dir << "/";
  }
  ss <<
    prefix <<
    std::setfill('0') << std::setw(3) << n;
  if (i) { ss << "_" << i; }
  if (ext) { ss << "." << ext; }
  return ss.str();
}


}   //  namespace hsa
}   //  namespace amd
}   //  namespace rocr


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_CODE_UTIL_HPP_
#define AMD_HSA_CODE_UTIL_HPP_

#include <cassert>
#include <string>
#include <vector>
#include <iostream>
#ifdef _WIN32
#include <malloc.h>
#else // _WIN32
#include <cstdlib>
#endif // _WIN32
#include "inc/amd_hsa_kernel_code.h"
#include "inc/amd_hsa_elf.h"
#include "inc/hsa.h"
#include "inc/hsa_ext_finalize.h"

#define hsa_error(e) static_cast<hsa_status_t>(e)

#define release_assert(e)                                                      \
  if (!(e)) {                                                                  \
    std::cerr << __FILE__ << ":";                                              \
    std::cerr << __LINE__ << ":";                                              \
    std::cerr << " Assertion `" << #e << "' failed." << std::endl;             \
    std::abort();                                                              \
  }                                                                            \

namespace rocr {
namespace amd {
namespace hsa {

std::string HsaSymbolKindToString(hsa_symbol_kind_t kind);
std::string HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage);
std::string HsaVariableAllocationToString(hsa_variable_allocation_t allocation);
std::string HsaVariableSegmentToString(hsa_variable_segment_t segment);
std::string HsaProfileToString(hsa_profile_t profile);
std::string HsaMachineModelToString(hsa_machine_model_t model);
std::string HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode);
std::string AmdMachineKindToString(amd_machine_kind16_t machine);
std::string AmdFloatRoundModeToString(amd_float_round_mode_t round_mode);
std::string AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode);
std::string AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id);
std::string AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size);
std::string AmdExceptionKindToString(amd_exception_kind16_t exceptions);
std::string AmdPowerTwoToString(amd_powertwo8_t p);
amdgpu_hsa_elf_segment_t AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec);
bool IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec);
std::string AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg);
std::string AmdPTLoadToString(uint64_t type);

void PrintAmdKernelCode(std::ostream& out, const amd_kernel_code_t *akc);
void PrintAmdComputePgmRsrcOne(std::ostream& out, amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1);
void PrintAmdComputePgmRsrcTwo(std::ostream& out, amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2);
void PrintAmdKernelCodeProperties(std::ostream& out, amd_kernel_code_properties32_t kernel_code_properties);
void PrintAmdControlDirectives(std::ostream& out, const amd_control_directives_t &control_directives);

namespace code_options {
  // Space between options (not at the beginning).
  std::ostream& space(std::ostream& out);

  // Control directive option without value.
  struct control_directive {
    const char *name;
    control_directive(const char* name_) : name(name_) { }
  };
  std::ostream& operator<<(std::ostream& out, const control_directive& d);

  // Exceptions mask string.
  struct exceptions_mask {
    uint16_t mask;
    exceptions_mask(uint16_t mask_) : mask(mask_) { }
  };
  std::ostream& operator<<(std::ostream& out, const exceptions_mask& e);

  // Control directives options.
  struct control_directives {
    const hsa_ext_control_directives_t& d;
    control_directives(const hsa_ext_control_directives_t& d_) : d(d_) { }
  };
  std::ostream& operator<<(std::ostream& out, const control_directives& cd);
}

const char* hsaerr2str(hsa_status_t status);
bool ReadFileIntoBuffer(const std::string& filename, std::vector<char>& buffer);

// Create new empty temporary file that will be deleted when closed.
int OpenTempFile(const char* prefix);
void CloseTempFile(int fd);

// Helper comment types for isa disassembler
enum DumpIsaCommentType  {
  COMMENT_AMD_KERNEL_CODE_T_BEGIN = 1,
  COMMENT_AMD_KERNEL_CODE_T_END,
  COMMENT_KERNEL_ISA_BEGIN,
};

// Callbacks to create helper comments for isa disassembler
const char * CommentTopCallBack(void *ctx, int type);
const char * CommentRightCallBack(void *ctx, int type);

// Parse disassembler instruction line to find offset
uint32_t ParseInstructionOffset(const std::string& instruction);

// Trim whitespaces from start of string
void ltrim(std::string &str);


// Helper function that allocates an aligned memory.
inline void*
alignedMalloc(size_t size, size_t alignment)
{
#if defined(_WIN32)
  return ::_aligned_malloc(size, alignment);
#else
  void * ptr = NULL;
  alignment = (std::max)(alignment, sizeof(void*));
  if (0 == ::posix_memalign(&ptr, alignment, size)) {
    return ptr;
  }
  return NULL;
#endif
}

// Helper function that frees an aligned memory.
inline void
alignedFree(void *ptr)
{
#if defined(_WIN32)
  ::_aligned_free(ptr);
#else
  free(ptr);
#endif
}

inline uint64_t alignUp(uint64_t num, uint64_t align)
{
  assert(align);
  assert((align & (align - 1)) == 0);
  return (num + align - 1) & ~(align - 1);
}

inline uint32_t alignUp(uint32_t num, uint32_t align)
{
  assert(align);
  assert((align & (align - 1)) == 0);
  return (num + align - 1) & ~(align - 1);
}

std::string DumpFileName(const std::string& dir, const char* prefix, const char* ext, unsigned n, unsigned i = 0);

}   //  namespace hsa
}   //  namespace amd
}   //  namespace rocr

#endif // AMD_HSA_CODE_UTIL_HPP_


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "amd_hsa_locks.hpp"

namespace rocr {
namespace amd {
namespace hsa {
namespace common {

void ReaderWriterLock::ReaderLock()
{
  internal_lock_.lock();
  while (0 < writers_count_) {
    readers_condition_.wait(internal_lock_);
  }
  readers_count_ += 1;
  internal_lock_.unlock();
}

void ReaderWriterLock::ReaderUnlock()
{
  internal_lock_.lock();
  readers_count_ -= 1;
  if (0 == readers_count_ && 0 < writers_waiting_) {
    writers_condition_.notify_one();
  }
  internal_lock_.unlock();
}

void ReaderWriterLock::WriterLock()
{
  internal_lock_.lock();
  writers_waiting_ += 1;
  while (0 < readers_count_ || 0 < writers_count_) {
    writers_condition_.wait(internal_lock_);
  }
  writers_count_ += 1;
  writers_waiting_ -= 1;
  internal_lock_.unlock();
}

void ReaderWriterLock::WriterUnlock()
{
  internal_lock_.lock();
  writers_count_ -= 1;
  if (0 < writers_waiting_) {
    writers_condition_.notify_one();
  }
  readers_condition_.notify_all();
  internal_lock_.unlock();
}

} // namespace common
} // namespace hsa
} // namespace amd
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_HSA_LOCKS_HPP
#define AMD_HSA_LOCKS_HPP

#include <condition_variable>
#include <cstddef>
#include <mutex>

namespace rocr {
namespace amd {
namespace hsa {
namespace common {

template<typename LockType>
class ReaderLockGuard final {
public:
  explicit ReaderLockGuard(LockType &lock):
    lock_(lock)
  {
    lock_.ReaderLock();
  }

  ~ReaderLockGuard()
  {
    lock_.ReaderUnlock();
  }

private:
  ReaderLockGuard(const ReaderLockGuard&);
  ReaderLockGuard& operator=(const ReaderLockGuard&);

  LockType &lock_;
};

template<typename LockType>
class WriterLockGuard final {
public:
  explicit WriterLockGuard(LockType &lock):
    lock_(lock)
  {
    lock_.WriterLock();
  }

  ~WriterLockGuard()
  {
    lock_.WriterUnlock();
  }

private:
  WriterLockGuard(const WriterLockGuard&);
  WriterLockGuard& operator=(const WriterLockGuard&);

  LockType &lock_;
};

class ReaderWriterLock final {
public:
  ReaderWriterLock():
    readers_count_(0), writers_count_(0), writers_waiting_(0) {}

  ~ReaderWriterLock() {}

  void ReaderLock();

  void ReaderUnlock();

  void WriterLock();

  void WriterUnlock();

private:
  ReaderWriterLock(const ReaderWriterLock&);
  ReaderWriterLock& operator=(const ReaderWriterLock&);

  size_t readers_count_;
  size_t writers_count_;
  size_t writers_waiting_;
  std::mutex internal_lock_;
  std::condition_variable_any readers_condition_;
  std::condition_variable_any writers_condition_;
};

} // namespace common
} // namespace hsa
} // namespace amd
} // namespace rocr

#endif // AMD_HSA_LOCKS_HPP


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_options.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "amd_options.hpp"

#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <list>
#include <string>

#include <cstddef>

namespace rocr {
namespace amd {
namespace options {

//===----------------------------------------------------------------------===//
// StringFactory.                                                             //
//===----------------------------------------------------------------------===//

std::string StringFactory::Flatten(const char **cstrs,
                                   const uint32_t &cstrs_count,
                                   const char &spacer) {
  if (NULL == cstrs || 0 == cstrs_count) {
    return std::string();
  }

  std::string flattened;
  for (uint32_t i = 0; i < cstrs_count; ++i) {
    if (NULL == cstrs[i]) {
      return std::string();
    }
    flattened += cstrs[i];
    if (i != (cstrs_count - 1)) {
      flattened += spacer;
    }
  }
  return flattened;
}

std::list<std::string> StringFactory::Tokenize(const char *cstr,
                                               const char &delim) {
  if (NULL == cstr) {
    return std::list<std::string>();
  }

  const std::string str = cstr;
  size_t start = 0;
  size_t end = 0;

  std::list<std::string> tokens;
  while ((end = str.find(delim, start)) != std::string::npos) {
    if (start != end) {
      tokens.push_back(str.substr(start, end - start));
    }
    start = end + 1;
  }
  if (str.size() > start) {
    tokens.push_back(str.substr(start));
  }
  return tokens;
}

std::string StringFactory::ToLower(const std::string& str) {
  std::string lower(str.length(), ' ');
  std::transform(str.begin(), str.end(), lower.begin(), ::tolower);
  return lower;
}

std::string StringFactory::ToUpper(const std::string& str) {
  std::string upper(str.length(), ' ');
  std::transform(str.begin(), str.end(), upper.begin(), ::toupper);
  return upper;
}

//===----------------------------------------------------------------------===//
// HelpPrinter, HelpStreambuf.                                                //
//===----------------------------------------------------------------------===//

HelpStreambuf::HelpStreambuf(std::ostream& stream)
  : basicStream_(&stream),
    basicBuf_(stream.rdbuf()),
    wrapWidth_(0),
    indentSize_(0),
    atLineStart_(true),
    lineWidth_(0)
{
  basicStream_->rdbuf(this);
}

HelpStreambuf::int_type HelpStreambuf::overflow(HelpStreambuf::int_type ch) {
    if (atLineStart_ && ch != '\n') {
      std::string indent(indentSize_, ' ');
      basicBuf_->sputn(indent.data(), indent.size());
      lineWidth_ = indentSize_;
      atLineStart_ = false;
    } else if (ch == '\n') {
      atLineStart_ = true;
      lineWidth_ = 0;
    }

    if (wrapWidth_ > 0 && lineWidth_ == wrapWidth_) {
      basicBuf_->sputc('\n');
      std::string indent(indentSize_, ' ');
      basicBuf_->sputn(indent.data(), indent.size());
      lineWidth_ = indentSize_;
      atLineStart_ = false;
    }

    lineWidth_++;
    return basicBuf_->sputc(ch);
  }

HelpPrinter& HelpPrinter::PrintUsage(const std::string& usage) {
  sbuf_.IndentSize(0);
  sbuf_.WrapWidth(0);
  Stream() << usage;
  if (usage.length() < USAGE_WIDTH) {
    Stream() <<  std::string(USAGE_WIDTH - usage.length(), ' ');
  }
  Stream() << std::string(PADDING_WIDTH, ' ');
  return *this;
}

HelpPrinter& HelpPrinter::PrintDescription(const std::string& description) {
  sbuf_.WrapWidth(USAGE_WIDTH + PADDING_WIDTH + DESCRIPTION_WIDTH);
  sbuf_.IndentSize(USAGE_WIDTH + PADDING_WIDTH);
  Stream() << description << std::endl;
  sbuf_.IndentSize(0);
  sbuf_.WrapWidth(0);
  return *this;
}

//===----------------------------------------------------------------------===//
// ChoiceOptioin.                                                             //
//===----------------------------------------------------------------------===//
ChoiceOption::ChoiceOption(const std::string& name,
                           const std::vector<std::string>& choices,
                           const std::string& help,
                           std::ostream& error)
  : OptionBase(name, help, error) {
    for (const auto& choice: choices) {
      choices_.insert(choice);
    }
  }

bool ChoiceOption::ProcessTokens(std::list<std::string> &tokens) {
  assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
  if (2 != tokens.size()) {
    error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
    return false;
  }

  tokens.pop_front();

  if (0 == choices_.count(tokens.front())) {
    error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
    return false;
  }

  is_set_ = true;
  value_ = tokens.front();
  tokens.pop_front();
  return true;
}

void ChoiceOption::PrintHelp(HelpPrinter& printer) const {
  std::string usage = "-" + name_ + "=[";
  bool first = true;
  for (const auto& choice: choices_) {
    if (!first) {
      usage += '|';
    } else {
      first = false;
    }
    usage += choice;
  }
  usage += "]";
  printer.PrintUsage(usage).PrintDescription(help_);
}

//===----------------------------------------------------------------------===//
// PrefixOption.                                                             //
//===----------------------------------------------------------------------===//
bool PrefixOption::IsValid() const {
  return (0 < name_.size()) && (name_.find(':') == std::string::npos);
}

std::string::size_type PrefixOption::FindPrefix(const std::string& token) const {
  auto prefix = name_ + ':';
  return token.find(prefix);
}

bool PrefixOption::Accept(const std::string& token) const {
  return
    (token.compare(0, name_.length(), name_) == 0) &&
    token.length() > name_.length() &&
    token[name_.length()] == ':';
}

bool PrefixOption::ProcessTokens(std::list<std::string> &tokens) {
  assert(1 <= tokens.size());
  assert(Accept(tokens.front()) && "option name is mismatched");

  std::string value = tokens.front(); tokens.pop_front();
  value = value.substr(name_.length() + 1);

  for (const auto& token: tokens) {
    value += '=';
    value += token;
  }
  tokens.clear();

  values_.push_back(value);
  is_set_ = true;
  return true;
}

void PrefixOption::PrintHelp(HelpPrinter& printer) const {
  printer.PrintUsage("-" + name_ + ":[value]").PrintDescription(help_);
}

//===----------------------------------------------------------------------===//
// OptionParser.                                                              //
//===----------------------------------------------------------------------===//
std::vector<OptionBase*>::iterator
OptionParser::FindOption(const std::string& name) {
  std::vector<OptionBase*>::iterator it = options_.begin();
  std::vector<OptionBase*>::iterator end = options_.end();
  for (; it != end; ++it) {
    if ((*it)->Accept(name)) {
      return it;
    }
  }
  return end;
}

bool OptionParser::AddOption(OptionBase *option) {
  if (NULL == option || !option->IsValid()) {
    return false;
  }
  if (FindOption(option->name()) != options_.end()) {
    return false;
  }
  options_.push_back(option);
  return true;
}

const std::string& OptionParser::Unknown() const {
  assert(collectUnknown_);
  return unknownOptions_;
}

bool OptionParser::ParseOptions(const char *options) {
  std::list<std::string> tokens_l1 = StringFactory::Tokenize(options, ' ');
  if (0 == tokens_l1.size()) {
    return true;
  }

  std::list<std::string>::iterator tokens_l1i = tokens_l1.begin();
  while (tokens_l1i != tokens_l1.end()) {
    if ('-' == tokens_l1i->at(0)) {
      std::list<std::string>::iterator option_begin = tokens_l1i;
      std::list<std::string> tokens_l2;
      do {
        tokens_l2.push_back(*tokens_l1i);
        tokens_l1i++;
      } while (tokens_l1i != tokens_l1.end() && '-' != tokens_l1i->at(0));
      std::list<std::string>::iterator option_end = tokens_l1i;
      tokens_l2.front().erase(0, 1);

      if (1 == tokens_l2.size()) {
        tokens_l2 = StringFactory::Tokenize(tokens_l2.front().c_str(), '=');
        if (2 < tokens_l2.size()) {
          if (collectUnknown_) {
            unknownOptions_ += *tokens_l1i + " ";
            continue;
          } else {
            error() << "error: invalid option format: \'"
                    << tokens_l2.front() << '\'' << std::endl;
            Reset();
            return false;
          }
        }
      }

      auto find_status = FindOption(tokens_l2.front());
      if (find_status == options_.end()) {
        if (collectUnknown_) {
          for (; option_begin != option_end; ++option_begin) {
            unknownOptions_ += *option_begin + " ";
          }
          continue;
        } else {
          error() << "error: unknown option: \'"
                  << tokens_l2.front() << '\'' << std::endl;
          Reset();
          return false;
        }
      }

      if (!(*find_status)->ProcessTokens(tokens_l2)) {
        Reset();
        return false;
      }
      assert(0 == tokens_l2.size());
    } else {
      if (collectUnknown_) {
        unknownOptions_ += *tokens_l1i + " ";
      } else {
        error() << "error: unknown option: \'"
                << *tokens_l1i << '\'' << std::endl;
        Reset();
        return false;
      }
    }
  }

  return true;
}

void OptionParser::PrintHelp(std::ostream& out, const std::string& addition) const {
  HelpPrinter printer(out);
  for (const auto& option: options_) {
    option->PrintHelp(printer);
  }
  out << addition << std::endl;
}

void OptionParser::Reset() {
  unknownOptions_.clear();
  for (auto &option : options_) {
    option->Reset();
  }
}

} // namespace options
} // namespace amd
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/libamdhsacode/amd_options.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef AMD_OPTIONS_HPP
#define AMD_OPTIONS_HPP

#include <cstdlib>
#include <iostream>
#include <list>
#include <vector>
#include <cstdint>

#include <cassert>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>

namespace rocr {
namespace amd {
namespace options {

//===----------------------------------------------------------------------===//
// StringFactory.                                                             //
//===----------------------------------------------------------------------===//

class StringFactory final {
public:
  static std::string Flatten(const char **cstrs,
                             const uint32_t &cstrs_count,
                             const char &spacer = '\0');

  static std::list<std::string> Tokenize(const char *cstr, const char &delim);

  static std::string ToLower(const std::string& str);
  static std::string ToUpper(const std::string& str);
};

//===----------------------------------------------------------------------===//
// HelpPrinter, HelpStreambuf.                                                //
//===----------------------------------------------------------------------===//

class HelpStreambuf : public std::streambuf {
public:
  explicit HelpStreambuf(std::ostream& stream);

  virtual ~HelpStreambuf() {
    basicStream_->rdbuf(basicBuf_);
  }

  void IndentSize(unsigned indent) {
    assert(wrapWidth_ == 0 || indentSize_ < wrapWidth_);
    indentSize_ = indent;
  }

  void WrapWidth(unsigned wrap) {
    assert(wrapWidth_ == 0 || indentSize_ < wrapWidth_);
    wrapWidth_ = wrap;
  }

protected:
  virtual int_type overflow(int_type ch) override;

private:
  std::ostream* basicStream_;
  std::streambuf* basicBuf_;

  unsigned wrapWidth_;
  unsigned indentSize_;

  bool atLineStart_;
  unsigned lineWidth_;
};


class HelpPrinter {
private:
  static const unsigned USAGE_WIDTH = 30;
  static const unsigned PADDING_WIDTH = 2;
  static const unsigned DESCRIPTION_WIDTH = 50;

public:
  HelpPrinter& PrintUsage(const std::string& usage);
  HelpPrinter& PrintDescription(const std::string& description);

  std::ostream& Stream() { return *out_; }

private:
  explicit HelpPrinter(std::ostream& out = std::cout) : out_(&out), sbuf_(*out_) {}

  /// @brief Not copy-constructible.
  HelpPrinter(const HelpPrinter&);
  /// @brief Not copy-assignable.
  HelpPrinter& operator =(const HelpPrinter&);

  friend class OptionParser;

  std::ostream *out_;
  HelpStreambuf sbuf_;
};

//===----------------------------------------------------------------------===//
// OptionBase.                                                                //
//===----------------------------------------------------------------------===//

class OptionBase {
public:
  virtual ~OptionBase() {}

  const std::string& name() const {
    return name_;
  }
  const bool& is_set() const {
    return is_set_;
  }

  virtual bool IsValid() const {
    return 0 < name_.size();
  }

protected:
  explicit OptionBase(const std::string& name,
                      const std::string& help = "",
                      std::ostream &error = std::cerr)
    : name_(name),
      help_(help),
      is_set_(false),
      error_(&error) {}

  virtual void PrintHelp(HelpPrinter& printer) const = 0;
  virtual bool Accept(const std::string& name) const { return name_ == name; }

  const std::string name_;
  const std::string help_;
  bool is_set_;

  std::ostream &error() const { return *error_; }

private:
  /// @brief Not copy-constructible.
  OptionBase(const OptionBase &ob);
  /// @brief Not copy-assignable.
  OptionBase& operator=(const OptionBase &ob);

  void Reset() {
    is_set_ = false;
  }

  virtual bool ProcessTokens(std::list<std::string> &tokens) = 0;

  friend class OptionParser;

  mutable std::ostream *error_;
};


//===----------------------------------------------------------------------===//
// Option<T>.                                                                 //
//===----------------------------------------------------------------------===//

template<typename T>
class Option final: public OptionBase {
public:
  explicit Option(const std::string& name,
                  const std::string& help = "",
                  std::ostream& error = std::cerr):
    OptionBase(name, help, error) {}

  ~Option() {}

  const std::list<T>& values() const {
    return values_;
  }

protected:
  virtual void PrintHelp(HelpPrinter& printer) const override;

private:
  /// @brief Not copy-constructible.
  Option(const Option &o);
  /// @brief Not copy-assignable.
  Option& operator=(const Option &o);

  bool ProcessTokens(std::list<std::string> &tokens);

  std::list<T> values_;
};

template<typename T>
bool Option<T>::ProcessTokens(std::list<std::string> &tokens) {
  assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
  if (2 > tokens.size()) {
    error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
    return false;
  }

  is_set_ = true;
  tokens.pop_front();

  while (!tokens.empty()) {
    std::istringstream token_stream(tokens.front());
    if (!token_stream.good()) {
      error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
      return false;
    }

    T value;
    token_stream >> value;

    values_.push_back(value);
    tokens.pop_front();
  }
  return true;
}

template<typename T>
void Option<T>::PrintHelp(HelpPrinter& printer) const {
  printer.PrintUsage("-" + name_ + " [" + StringFactory::ToUpper(name_) + "s]")
         .PrintDescription(help_);
}

//===----------------------------------------------------------------------===//
// ValueOption<T>.                                                            //
//===----------------------------------------------------------------------===//

template<typename T>
class ValueOption final: public OptionBase {
public:
  explicit ValueOption(const std::string& name,
                       const std::string& help = "",
                       std::ostream& error = std::cerr):
    OptionBase(name, help, error) {}

  ~ValueOption() {}

  const T& value() const {
    return value_;
  }

protected:
  void PrintHelp(HelpPrinter& printer) const override;

private:
  /// @brief Not copy-constructible.
  ValueOption(const ValueOption &o);
  /// @brief Not copy-assignable.
  ValueOption& operator=(const ValueOption &o);

  bool ProcessTokens(std::list<std::string> &tokens) override;

  T value_;
};

template<typename T>
bool ValueOption<T>::ProcessTokens(std::list<std::string> &tokens) {
  assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
  if (2 != tokens.size()) {
    error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
    return false;
  }

  is_set_ = true;
  tokens.pop_front();

  std::istringstream token_stream(tokens.front());
  if (!token_stream.good()) {
    error() << "error: invalid option: \'" << name_ << '\'' << std::endl;
    return false;
  }
  token_stream >> value_;
  tokens.pop_front();
  return true;
}

template<typename T>
void ValueOption<T>::PrintHelp(HelpPrinter& printer) const {
  printer.PrintUsage("-" + name_ + "=[VAL]")
         .PrintDescription(help_);
}

//===----------------------------------------------------------------------===//
// ChoiceOptioin.                                                             //
//===----------------------------------------------------------------------===//
class ChoiceOption final: public OptionBase {
public:
  ChoiceOption(const std::string& name,
               const std::vector<std::string>& choices,
               const std::string& help = "",
               std::ostream& error = std::cerr);

  ~ChoiceOption() {}

  const std::string& value() const {
    return value_;
  }

protected:
  void PrintHelp(HelpPrinter& printer) const override;

private:
  /// @brief Not copy-constructible.
  ChoiceOption(const ChoiceOption&);
  /// @brief Not copy-assignable.
  ChoiceOption& operator =(const ChoiceOption&);

  bool ProcessTokens(std::list<std::string> &tokens) override;

  std::unordered_set<std::string> choices_;
  std::string value_;
};

//===----------------------------------------------------------------------===//
// Option<void>.                                                              //
//===----------------------------------------------------------------------===//

class NoArgOption final: public OptionBase {
public:
  explicit NoArgOption(const std::string& name,
                       const std::string& help = "",
                       std::ostream& error = std::cerr):
    OptionBase(name, help, error) {}

  ~NoArgOption() {}

protected:
  void PrintHelp(HelpPrinter& printer) const override {
    printer.PrintUsage("-" + name_).PrintDescription(help_);
  }

private:
  /// @brief Not copy-constructible.
  NoArgOption(const NoArgOption &o);
  /// @brief Not copy-assignable.
  NoArgOption& operator=(const NoArgOption &o);

  bool ProcessTokens(std::list<std::string> &tokens) override {
    assert(0 == name_.compare(tokens.front()) && "option name is mismatched");
    if (1 == tokens.size()) {
      tokens.pop_front();
      is_set_ = true;
      return true;
    } else if (2 == tokens.size()) {
      tokens.pop_front();
      if (tokens.front() == "1") {
        is_set_ = true;
        tokens.pop_front();
        return true;
      } else if (tokens.front() == "0") {
        is_set_ = false;
        tokens.pop_front();
        return true;
      }
    }
    error() << "error: invalid option: '" << name_ << "'" << std::endl;
    return false;
  }
};

//===----------------------------------------------------------------------===//
// PrefixOption.                                                              //
//===----------------------------------------------------------------------===//
class PrefixOption final: public OptionBase {
public:
  PrefixOption(const std::string& prefix,
               const std::string& help = "",
               std::ostream& error = std::cerr)
    : OptionBase(prefix, help, error) {}

  ~PrefixOption() {}

  const std::vector<std::string>& values() const {
    return values_;
  }

  bool IsValid() const override;

protected:
  void PrintHelp(HelpPrinter& printer) const override;
  bool Accept(const std::string& token) const override;

private:
  /// @brief Not copy-constructible.
  PrefixOption(const PrefixOption&);
  /// @brief Not copy-assignable.
  PrefixOption& operator =(const PrefixOption&);

  bool ProcessTokens(std::list<std::string>& tokens) override;

  std::string::size_type FindPrefix(const std::string& token) const;

  std::vector<std::string> values_;
};

//===----------------------------------------------------------------------===//
// OptionParser.                                                              //
//===----------------------------------------------------------------------===//

class OptionParser final {
public:
  explicit OptionParser(bool collectUnknown = false, std::ostream& error = std::cerr)
    : collectUnknown_(collectUnknown),
      error_(&error) {}

  ~OptionParser() {}

  bool AddOption(OptionBase *option);

  bool ParseOptions(const char *options);

  const std::string& Unknown() const;
  void CollectUnknown(bool b) { collectUnknown_ = b; }

  void PrintHelp(std::ostream& out, const std::string& addition = "") const;

  void Reset();

private:
  /// @brief Not copy-constructible.
  OptionParser(const OptionParser &op);
  /// @brief Not copy-assignable.
  OptionParser& operator=(const OptionParser &op);

  std::ostream& error() { return *error_; }

  std::vector<OptionBase*>::iterator FindOption(const std::string& name);

  std::vector<OptionBase*> options_;

  std::string unknownOptions_;
  bool collectUnknown_;

  std::ostream *error_;
};

} // namespace options
} // namespace amd
} // namespace rocr

#endif // AMD_OPTIONS_HPP


================================================
FILE: runtime/hsa-runtime/loader/AMDHSAKernelDescriptor.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
#define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H

#include <cstddef>
#include <cstdint>

// Gets offset of specified member in specified type.
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
#endif // offsetof

// Creates enumeration entries used for packing bits into integers. Enumeration
// entries include bit shift amount, bit width, and bit mask.
#ifndef AMDHSA_BITS_ENUM_ENTRY
#define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
  NAME ## _SHIFT = (SHIFT),                        \
  NAME ## _WIDTH = (WIDTH),                        \
  NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
#endif // AMDHSA_BITS_ENUM_ENTRY

// Gets bits for specified bit mask from specified source.
#ifndef AMDHSA_BITS_GET
#define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
#endif // AMDHSA_BITS_GET

// Sets bits for specified bit mask in specified destination.
#ifndef AMDHSA_BITS_SET
#define AMDHSA_BITS_SET(DST, MSK, VAL)  \
  DST &= ~MSK;                          \
  DST |= ((VAL << MSK ## _SHIFT) & MSK)
#endif // AMDHSA_BITS_SET

namespace rocr {
namespace llvm {
namespace amdhsa {

// Floating point rounding modes. Must match hardware definition.
enum : uint8_t {
  FLOAT_ROUND_MODE_NEAR_EVEN = 0,
  FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
  FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
  FLOAT_ROUND_MODE_ZERO = 3,
};

// Floating point denorm modes. Must match hardware definition.
enum : uint8_t {
  FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
  FLOAT_DENORM_MODE_FLUSH_DST = 1,
  FLOAT_DENORM_MODE_FLUSH_SRC = 2,
  FLOAT_DENORM_MODE_FLUSH_NONE = 3,
};

// System VGPR workitem IDs. Must match hardware definition.
enum : uint8_t {
  SYSTEM_VGPR_WORKITEM_ID_X = 0,
  SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
  SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
  SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
};

// Compute program resource register 1. Must match hardware definition.
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
  COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
  COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
  COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
  COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
  COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
  COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
  COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
  COMPUTE_PGM_RSRC1(PRIV, 20, 1),
  COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1),
  COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
  COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
  COMPUTE_PGM_RSRC1(BULKY, 24, 1),
  COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
  COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1),    // GFX9+
  COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
  COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1),     // GFX10+
  COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1),  // GFX10+
  COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
};
#undef COMPUTE_PGM_RSRC1

// Compute program resource register 2. Must match hardware definition.
#define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
  COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
  COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
  COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
  COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
  COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
  COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
  COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
  COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
  COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
  COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
  COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
};
#undef COMPUTE_PGM_RSRC2

// Compute program resource register 3 for GFX90A+. Must match hardware
// definition.
#define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
  COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
  COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
  COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
  COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
};
#undef COMPUTE_PGM_RSRC3_GFX90A

// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),    // GFX11+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),    // GFX11+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),      // GFX11+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
  COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),         // GFX11+
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS

// Kernel code properties. Must be kept backwards compatible.
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
  KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
  KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
  KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
  KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
  KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
  KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
  KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
  KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
  KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
  KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
  KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
};
#undef KERNEL_CODE_PROPERTY

// Kernel descriptor. Must be kept backwards compatible.
struct kernel_descriptor_t {
  uint32_t group_segment_fixed_size;
  uint32_t private_segment_fixed_size;
  uint32_t kernarg_size;
  uint8_t reserved0[4];
  int64_t kernel_code_entry_byte_offset;
  uint8_t reserved1[20];
  uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
  uint32_t compute_pgm_rsrc1;
  uint32_t compute_pgm_rsrc2;
  uint16_t kernel_code_properties;
  uint8_t reserved2[6];
};

enum : uint32_t {
  GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
  PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
  KERNARG_SIZE_OFFSET = 8,
  RESERVED0_OFFSET = 12,
  KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
  RESERVED1_OFFSET = 24,
  COMPUTE_PGM_RSRC3_OFFSET = 44,
  COMPUTE_PGM_RSRC1_OFFSET = 48,
  COMPUTE_PGM_RSRC2_OFFSET = 52,
  KERNEL_CODE_PROPERTIES_OFFSET = 56,
  RESERVED2_OFFSET = 58,
};

static_assert(
    sizeof(kernel_descriptor_t) == 64,
    "invalid size for kernel_descriptor_t");
static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
                  GROUP_SEGMENT_FIXED_SIZE_OFFSET,
              "invalid offset for group_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
                  PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
              "invalid offset for private_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
                  KERNARG_SIZE_OFFSET,
              "invalid offset for kernarg_size");
static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
              "invalid offset for reserved0");
static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
                  KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
              "invalid offset for kernel_code_entry_byte_offset");
static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
              "invalid offset for reserved1");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
                  COMPUTE_PGM_RSRC3_OFFSET,
              "invalid offset for compute_pgm_rsrc3");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
                  COMPUTE_PGM_RSRC1_OFFSET,
              "invalid offset for compute_pgm_rsrc1");
static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
                  COMPUTE_PGM_RSRC2_OFFSET,
              "invalid offset for compute_pgm_rsrc2");
static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
                  KERNEL_CODE_PROPERTIES_OFFSET,
              "invalid offset for kernel_code_properties");
static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET,
              "invalid offset for reserved2");

} // end namespace amdhsa
} // end namespace llvm
} // end namespace rocr

#endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H


================================================
FILE: runtime/hsa-runtime/loader/executable.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "executable.hpp"

#include <libelf.h>
#include <limits.h>
#include <link.h>
#include <unistd.h>

#include <algorithm>
#include <cstddef>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <atomic>
#include <fstream>
#include "inc/amd_hsa_elf.h"
#include "inc/amd_hsa_kernel_code.h"
#include "core/inc/amd_hsa_code.hpp"
#include "amd_hsa_code_util.hpp"
#include "amd_options.hpp"
#include "core/util/utils.h"

#include "AMDHSAKernelDescriptor.h"

using namespace rocr::amd::hsa;
using namespace rocr::amd::hsa::common;

// r_version history:
// 1: Initial debug protocol
// 2: New trap handler ABI. The reason for halting a wave is recorded in ttmp11[8:7].
// 3: New trap handler ABI. A wave halted at S_ENDPGM rewinds its PC by 8 bytes, and sets ttmp11[9]=1.
// 4: New trap handler ABI. Save the trap id in ttmp11[16:9]
// 5: New trap handler ABI. Save the PC in ttmp11[22:7] ttmp6[31:0], and park the wave if stopped
// 6: New trap handler ABI. ttmp6[25:0] contains dispatch index modulo queue size
// 7: New trap handler ABI. Send interrupts as a bitmask, coalescing concurrent exceptions.
// 8: New trap handler ABI. for gfx942: Initialize ttmp[4:5] if ttmp11[31] == 0.
// 9: New trap handler ABI. For gfx11: Save PC in ttmp11[22:7] ttmp6[31:0], and park the wave if stopped.
// 10: New trap handler ABI. Set status.skip_export when halting the wave.
//                           For gfx942, set ttmp6[31] = 0 if ttmp11[31] == 0.

HSA_API r_debug _amdgpu_r_debug;
static __forceinline link_map*& r_debug_tail() {
  static link_map* r_debug_tail_ = nullptr;
  return r_debug_tail_;
}

namespace rocr {
  // Having a side effect prevents call site optimization that allows removal of a noinline function call
  // with no side effect.
__attribute__((noinline)) void _loader_debug_state() {
  static volatile int function_needs_a_side_effect = 0;
  function_needs_a_side_effect ^= 1;
}

namespace amd {
namespace hsa {
namespace loader {

class LoaderOptions {
public:
  explicit LoaderOptions(std::ostream &error = std::cerr);

  const amd::options::NoArgOption* Help() const { return &help; }
  const amd::options::NoArgOption* DumpCode() const { return &dump_code; }
  const amd::options::NoArgOption* DumpIsa() const { return &dump_isa; }
  const amd::options::NoArgOption* DumpExec() const { return &dump_exec; }
  const amd::options::NoArgOption* DumpAll() const { return &dump_all; }
  const amd::options::ValueOption<std::string>* DumpDir() const { return &dump_dir; }
  const amd::options::PrefixOption* Substitute() const { return &substitute; }

  bool ParseOptions(const std::string& options);
  void Reset();
  void PrintHelp(std::ostream& out) const;

private:
  /// @brief Copy constructor - not available.
  LoaderOptions(const LoaderOptions&);

  /// @brief Assignment operator - not available.
  LoaderOptions& operator=(const LoaderOptions&);

  amd::options::NoArgOption help;
  amd::options::NoArgOption dump_code;
  amd::options::NoArgOption dump_isa;
  amd::options::NoArgOption dump_exec;
  amd::options::NoArgOption dump_all;
  amd::options::ValueOption<std::string> dump_dir;
  amd::options::PrefixOption substitute;
  amd::options::OptionParser option_parser;
};

LoaderOptions::LoaderOptions(std::ostream& error) :
  help("help", "print help"),
  dump_code("dump-code", "Dump finalizer output code object"),
  dump_isa("dump-isa", "Dump finalizer output to ISA text file"),
  dump_exec("dump-exec", "Dump executable to text file"),
  dump_all("dump-all", "Dump all finalizer input and output (as above)"),
  dump_dir("dump-dir", "Dump directory"),
  substitute("substitute", "Substitute code object with given index or index range on loading from file"),
  option_parser(false, error)
{
  option_parser.AddOption(&help);
  option_parser.AddOption(&dump_code);
  option_parser.AddOption(&dump_isa);
  option_parser.AddOption(&dump_exec);
  option_parser.AddOption(&dump_all);
  option_parser.AddOption(&dump_dir);
  option_parser.AddOption(&substitute);
}

bool LoaderOptions::ParseOptions(const std::string& options)
{
  return option_parser.ParseOptions(options.c_str());
}

void LoaderOptions::Reset()
{
  option_parser.Reset();
}

void LoaderOptions::PrintHelp(std::ostream& out) const
{
  option_parser.PrintHelp(out);
}

static const char *LOADER_DUMP_PREFIX = "amdcode";

Loader* Loader::Create(Context* context)
{
  return new AmdHsaCodeLoader(context);
}

void Loader::Destroy(Loader *loader)
{
  // Loader resets the link_map, but the executables and loaded code objects are not deleted.
  _amdgpu_r_debug.r_map = nullptr;
  _amdgpu_r_debug.r_state = r_debug::RT_CONSISTENT;
  r_debug_tail() = nullptr;
  delete loader;
}

Executable* AmdHsaCodeLoader::CreateExecutable(
  hsa_profile_t profile, const char *options, hsa_default_float_rounding_mode_t default_float_rounding_mode)
{
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);

  executables.push_back(new ExecutableImpl(profile, context, executables.size(), default_float_rounding_mode));
  return executables.back();
}

Executable* AmdHsaCodeLoader::CreateExecutable(
      std::unique_ptr<Context> isolated_context,
      hsa_profile_t profile,
      const char *options,
      hsa_default_float_rounding_mode_t default_float_rounding_mode)
{
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);

  executables.push_back(new ExecutableImpl(profile, std::move(isolated_context), executables.size(), default_float_rounding_mode));
  return executables.back();
}

static void AddCodeObjectInfoIntoDebugMap(link_map* map) {
  if (r_debug_tail()) {
      r_debug_tail()->l_next = map;
      map->l_prev = r_debug_tail();
      map->l_next = nullptr;
  } else {
      _amdgpu_r_debug.r_map = map;
      map->l_prev = nullptr;
      map->l_next = nullptr;
  }
  r_debug_tail() = map;
}

static void RemoveCodeObjectInfoFromDebugMap(link_map* map) {
  if (r_debug_tail() == map) {
      r_debug_tail() = map->l_prev;
  }
  if (_amdgpu_r_debug.r_map == map) {
      _amdgpu_r_debug.r_map = map->l_next;
  }

  if (map->l_prev) {
      map->l_prev->l_next = map->l_next;
  }
  if (map->l_next) {
      map->l_next->l_prev = map->l_prev;
  }

  free(map->l_name);
  memset(map, 0, sizeof(link_map));
}

hsa_status_t AmdHsaCodeLoader::FreezeExecutable(Executable *executable, const char *options) {
  hsa_status_t  status = executable->Freeze(options);
  if (status != HSA_STATUS_SUCCESS) {
    return status;
  }

  // Assuming runtime atomic implements C++ std::memory_order
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
  atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_ADD, std::memory_order_relaxed);
  atomic::Fence(std::memory_order_acq_rel);
  _loader_debug_state();
  atomic::Fence(std::memory_order_acq_rel);
  for (auto &lco : reinterpret_cast<ExecutableImpl*>(executable)->loaded_code_objects) {
    AddCodeObjectInfoIntoDebugMap(&(lco->r_debug_info));
  }
  atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_CONSISTENT, std::memory_order_release);
  _loader_debug_state();

  return HSA_STATUS_SUCCESS;
}

void AmdHsaCodeLoader::DestroyExecutable(Executable *executable) {
  // Assuming runtime atomic implements C++ std::memory_order
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
  atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_DELETE, std::memory_order_relaxed);
  atomic::Fence(std::memory_order_acq_rel);
  _loader_debug_state();
  atomic::Fence(std::memory_order_acq_rel);
  for (auto &lco : reinterpret_cast<ExecutableImpl*>(executable)->loaded_code_objects) {
    RemoveCodeObjectInfoFromDebugMap(&(lco->r_debug_info));
  }
  atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_CONSISTENT, std::memory_order_release);
  _loader_debug_state();

  executables[((ExecutableImpl*)executable)->id()] = nullptr;
  delete executable;
}

hsa_status_t AmdHsaCodeLoader::IterateExecutables(
  hsa_status_t (*callback)(
    hsa_executable_t executable,
    void *data),
  void *data)
{
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
  assert(callback);

  for (auto &exec : executables) {
    if(exec != nullptr){
      hsa_status_t status = callback(Executable::Handle(exec), data);
      if (status != HSA_STATUS_SUCCESS) {
        return status;
      }
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t AmdHsaCodeLoader::QuerySegmentDescriptors(
  hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
  size_t *num_segment_descriptors)
{
  if (!num_segment_descriptors) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  if (*num_segment_descriptors == 0 && segment_descriptors) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  if (*num_segment_descriptors != 0 && !segment_descriptors) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  this->EnableReadOnlyMode();

  size_t actual_num_segment_descriptors = 0;
  for (auto &executable : executables) {
    if (executable) {
      actual_num_segment_descriptors += executable->GetNumSegmentDescriptors();
    }
  }

  if (*num_segment_descriptors == 0) {
    *num_segment_descriptors = actual_num_segment_descriptors;
    this->DisableReadOnlyMode();
    return HSA_STATUS_SUCCESS;
  }
  if (*num_segment_descriptors != actual_num_segment_descriptors) {
    this->DisableReadOnlyMode();
    return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;
  }

  size_t i = 0;
  for (auto &executable : executables) {
    if (executable) {
      i += executable->QuerySegmentDescriptors(segment_descriptors, actual_num_segment_descriptors, i);
    }
  }

  this->DisableReadOnlyMode();
  return HSA_STATUS_SUCCESS;
}

uint64_t AmdHsaCodeLoader::FindHostAddress(uint64_t device_address)
{
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  if (device_address == 0) {
    return 0;
  }

  for (auto &exec : executables) {
    if (exec != nullptr) {
      uint64_t host_address = exec->FindHostAddress(device_address);
      if (host_address != 0) {
        return host_address;
      }
    }
  }
  return 0;
}

void AmdHsaCodeLoader::PrintHelp(std::ostream& out)
{
  LoaderOptions().PrintHelp(out);
}

void AmdHsaCodeLoader::EnableReadOnlyMode()
{
  rw_lock_.ReaderLock();
  for (auto &executable : executables) {
    if (executable) {
      ((ExecutableImpl*)executable)->EnableReadOnlyMode();
    }
  }
}

void AmdHsaCodeLoader::DisableReadOnlyMode()
{
  rw_lock_.ReaderUnlock();
  for (auto &executable : executables) {
    if (executable) {
      ((ExecutableImpl*)executable)->DisableReadOnlyMode();
    }
  }
}

//===----------------------------------------------------------------------===//
// SymbolImpl.                                                                    //
//===----------------------------------------------------------------------===//

bool SymbolImpl::GetInfo(hsa_symbol_info32_t symbol_info, void *value) {
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_TYPE) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_TYPE)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_TYPE) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_TYPE)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_NAME_LENGTH) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_NAME) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_NAME)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_MODULE_NAME) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_LINKAGE) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_IS_DEFINITION) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION)),
    "attributes are not compatible"
  );

  assert(value);

  switch (symbol_info) {
    case HSA_CODE_SYMBOL_INFO_TYPE: {
      *((hsa_symbol_kind_t*)value) = kind;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_NAME_LENGTH: {
      *((uint32_t*)value) = symbol_name.size();
      break;
    }
    case HSA_CODE_SYMBOL_INFO_NAME: {
      memset(value, 0x0, symbol_name.size());
      memcpy(value, symbol_name.c_str(), symbol_name.size());
      break;
    }
    case HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH: {
      *((uint32_t*)value) = module_name.size();
      break;
    }
    case HSA_CODE_SYMBOL_INFO_MODULE_NAME: {
      memset(value, 0x0, module_name.size());
      memcpy(value, module_name.c_str(), module_name.size());
      break;
    }
    case HSA_CODE_SYMBOL_INFO_LINKAGE: {
      *((hsa_symbol_linkage_t*)value) = linkage;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_IS_DEFINITION: {
      *((bool*)value) = is_definition;
      break;
    }
    case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_CALL_CONVENTION: {
      *((uint32_t*)value) = 0;
      break;
    }
    case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT:
    case HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS: {
      if (!is_loaded) {
        return false;
      }
      *((uint64_t*)value) = address;
      break;
    }
    case HSA_EXECUTABLE_SYMBOL_INFO_AGENT: {
      if (!is_loaded) {
        return false;
      }
      *((hsa_agent_t*)value) = agent;
      break;
    }
    default: {
      return false;
    }
  }

  return true;
}

//===----------------------------------------------------------------------===//
// KernelSymbol.                                                              //
//===----------------------------------------------------------------------===//

bool KernelSymbol::GetInfo(hsa_symbol_info32_t symbol_info, void *value) {
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK)),
    "attributes are not compatible"
  );

  assert(value);

  switch (symbol_info) {
    case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE: {
      *((uint32_t*)value) = kernarg_segment_size;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT: {
      *((uint32_t*)value) = kernarg_segment_alignment;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE: {
      *((uint32_t*)value) = group_segment_size;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE: {
      *((uint32_t*)value) = private_segment_size;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK: {
      *((bool*)value) = is_dynamic_callstack;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_KERNEL_WAVEFRONT_SIZE: {
      *((uint32_t*)value) = wavefront_size;
      break;
    }
    case HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE: {
      *((uint32_t*)value) = size;
      break;
    }
    case HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_ALIGN: {
      *((uint32_t*)value) = alignment;
      break;
    }
    default: {
      return SymbolImpl::GetInfo(symbol_info, value);
    }
  }

  return true;
}

//===----------------------------------------------------------------------===//
// VariableSymbol.                                                            //
//===----------------------------------------------------------------------===//

bool VariableSymbol::GetInfo(hsa_symbol_info32_t symbol_info, void *value) {
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE)),
    "attributes are not compatible"
  );
  static_assert(
    (symbol_attribute32_t(HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST) ==
     symbol_attribute32_t(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST)),
    "attributes are not compatible"
  );

  switch (symbol_info) {
    case HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION: {
      *((hsa_variable_allocation_t*)value) = allocation;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT: {
      *((hsa_variable_segment_t*)value) = segment;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT: {
      *((uint32_t*)value) = alignment;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE: {
      *((uint32_t*)value) = size;
      break;
    }
    case HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST: {
      *((bool*)value) = is_constant;
      break;
    }
    default: {
      return SymbolImpl::GetInfo(symbol_info, value);
    }
  }

  return true;
}

bool LoadedCodeObjectImpl::GetInfo(amd_loaded_code_object_info_t attribute, void *value)
{
  assert(value);

  switch (attribute) {
    case AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE:
      ((hsa_code_object_t*)value)->handle = reinterpret_cast<uint64_t>(elf_data);
      break;
    case AMD_LOADED_CODE_OBJECT_INFO_ELF_IMAGE_SIZE:
      *((size_t*)value) = elf_size;
      break;
    default: {
      return false;
    }
  }

  return true;
}

hsa_status_t LoadedCodeObjectImpl::IterateLoadedSegments(
  hsa_status_t (*callback)(
    amd_loaded_segment_t loaded_segment,
    void *data),
  void *data)
{
  assert(callback);

  for (auto &loaded_segment : loaded_segments) {
    hsa_status_t status = callback(LoadedSegment::Handle(loaded_segment), data);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }
  }

  return HSA_STATUS_SUCCESS;
}

void LoadedCodeObjectImpl::Print(std::ostream& out)
{
  out << "Code Object" << std::endl;
}

bool Segment::GetInfo(amd_loaded_segment_info_t attribute, void *value)
{
  assert(value);

  switch (attribute) {
    case AMD_LOADED_SEGMENT_INFO_TYPE: {
      *((amdgpu_hsa_elf_segment_t*)value) = segment;
      break;
    }
    case AMD_LOADED_SEGMENT_INFO_ELF_BASE_ADDRESS: {
      *((uint64_t*)value) = vaddr;
      break;
    }
    case AMD_LOADED_SEGMENT_INFO_LOAD_BASE_ADDRESS: {
      *((uint64_t*)value) = reinterpret_cast<uint64_t>(this->Address(this->VAddr()));
      break;
    }
    case AMD_LOADED_SEGMENT_INFO_SIZE: {
      *((size_t*)value) = size;
      break;
    }
    default: {
      return false;
    }
  }

  return true;
}

uint64_t Segment::Offset(uint64_t addr)
{
  assert(IsAddressInSegment(addr));
  return addr - vaddr;
}

void* Segment::Address(uint64_t addr)
{
  return owner->context()->SegmentAddress(segment, agent, ptr, Offset(addr));
}

bool Segment::Freeze()
{
  return !frozen ? (frozen = owner->context()->SegmentFreeze(segment, agent, ptr, size)) : true;
}

bool Segment::IsAddressInSegment(uint64_t addr)
{
  return vaddr <= addr && addr < vaddr + size;
}

void Segment::Copy(uint64_t addr, const void* src, size_t size)
{
  // loader must do copies before freezing.
  assert(!frozen);

  if (size > 0) {
    owner->context()->SegmentCopy(segment, agent, ptr, Offset(addr), src, size);
  }
}

void Segment::Print(std::ostream& out)
{
  out << "Segment" << std::endl
    << "    Type: " << AmdHsaElfSegmentToString(segment)
    << "    Size: " << size
    << "    VAddr: " << vaddr << std::endl
    << "    Ptr: " << std::hex << ptr << std::dec
    << std::endl;
}

void Segment::Destroy()
{
  owner->context()->SegmentFree(segment, agent, ptr, size);
}

//===----------------------------------------------------------------------===//
// ExecutableImpl.                                                                //
//===----------------------------------------------------------------------===//

ExecutableImpl::ExecutableImpl(
    const hsa_profile_t &_profile,
    Context *context,
    size_t id,
    hsa_default_float_rounding_mode_t default_float_rounding_mode)
  : Executable()
  , profile_(_profile)
  , context_(context)
  , id_(id)
  , default_float_rounding_mode_(default_float_rounding_mode)
  , state_(HSA_EXECUTABLE_STATE_UNFROZEN)
  , program_allocation_segment(nullptr)
{
}

ExecutableImpl::ExecutableImpl(
    const hsa_profile_t &_profile,
    std::unique_ptr<Context> unique_context,
    size_t id,
    hsa_default_float_rounding_mode_t default_float_rounding_mode)
  : Executable()
  , profile_(_profile)
  , unique_context_(std::move(unique_context))
  , id_(id)
  , default_float_rounding_mode_(default_float_rounding_mode)
  , state_(HSA_EXECUTABLE_STATE_UNFROZEN)
  , program_allocation_segment(nullptr)
{
  context_ = unique_context_.get();
}

ExecutableImpl::~ExecutableImpl() {
  for (ExecutableObject* o : objects) {
    o->Destroy();
    delete o;
  }
  objects.clear();

  for (auto &symbol_entry : program_symbols_) {
    delete symbol_entry.second;
  }
  for (auto &symbol_entry : agent_symbols_) {
    delete symbol_entry.second;
  }
}

hsa_status_t ExecutableImpl::DefineProgramExternalVariable(
  const char *name, void *address)
{
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
  assert(name);

  if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
    return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
  }

  auto symbol_entry = program_symbols_.find(std::string(name));
  if (symbol_entry != program_symbols_.end()) {
    return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
  }

  program_symbols_.insert(
    std::make_pair(std::string(name),
                   new VariableSymbol(true,
                                      "", // Only program linkage symbols can be
                                          // defined.
                                      std::string(name),
                                      HSA_SYMBOL_LINKAGE_PROGRAM,
                                      true,
                                      HSA_VARIABLE_ALLOCATION_PROGRAM,
                                      HSA_VARIABLE_SEGMENT_GLOBAL,
                                      0,     // TODO: size.
                                      0,     // TODO: align.
                                      false, // TODO: const.
                                      true,
                                      reinterpret_cast<uint64_t>(address))));
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::DefineAgentExternalVariable(
  const char *name,
  hsa_agent_t agent,
  hsa_variable_segment_t segment,
  void *address)
{
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
  assert(name);

  if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
    return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
  }

  auto symbol_entry = agent_symbols_.find(std::make_pair(std::string(name), agent));
  if (symbol_entry != agent_symbols_.end()) {
    return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
  }

  auto insert_status = agent_symbols_.insert(
    std::make_pair(std::make_pair(std::string(name), agent),
                   new VariableSymbol(true,
                                      "", // Only program linkage symbols can be
                                          // defined.
                                      std::string(name),
                                      HSA_SYMBOL_LINKAGE_PROGRAM,
                                      true,
                                      HSA_VARIABLE_ALLOCATION_AGENT,
                                      segment,
                                      0,     // TODO: size.
                                      0,     // TODO: align.
                                      false, // TODO: const.
                                      true,
                                      reinterpret_cast<uint64_t>(address))));
  assert(insert_status.second);
  insert_status.first->second->agent = agent;

  return HSA_STATUS_SUCCESS;
}

bool ExecutableImpl::IsProgramSymbol(const char *symbol_name) {
  assert(symbol_name);

  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  return program_symbols_.find(std::string(symbol_name)) != program_symbols_.end();
}

Symbol* ExecutableImpl::GetSymbol(
  const char *symbol_name,
  const hsa_agent_t *agent)
{
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  return this->GetSymbolInternal(symbol_name, agent);
}

Symbol* ExecutableImpl::GetSymbolInternal(
  const char *symbol_name,
  const hsa_agent_t *agent)
{
  assert(symbol_name);

  std::string mangled_name = std::string(symbol_name);
  if (mangled_name.empty()) {
    return nullptr;
  }

  if (!agent) {
    auto program_symbol = program_symbols_.find(mangled_name);
    if (program_symbol != program_symbols_.end()) {
      return program_symbol->second;
    }
    return nullptr;
  }

  auto agent_symbol = agent_symbols_.find(std::make_pair(mangled_name, *agent));
  if (agent_symbol != agent_symbols_.end()) {
    return agent_symbol->second;
  }
  return nullptr;
}

hsa_status_t ExecutableImpl::IterateSymbols(
  iterate_symbols_f callback, void *data)
{
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  assert(callback);

  for (auto &symbol_entry : program_symbols_) {
    hsa_status_t hsc =
      callback(Executable::Handle(this), Symbol::Handle(symbol_entry.second), data);
    if (HSA_STATUS_SUCCESS != hsc) {
      return hsc;
    }
  }
  for (auto &symbol_entry : agent_symbols_) {
    hsa_status_t hsc =
      callback(Executable::Handle(this), Symbol::Handle(symbol_entry.second), data);
    if (HSA_STATUS_SUCCESS != hsc) {
      return hsc;
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::IterateAgentSymbols(
    hsa_agent_t agent,
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_agent_t agent,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  assert(callback);

  for (auto &symbol_entry : agent_symbols_) {
    if (symbol_entry.second->GetAgent().handle != agent.handle) {
      continue;
    }

    hsa_status_t status = callback(
        Executable::Handle(this), agent, Symbol::Handle(symbol_entry.second),
        data);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::IterateProgramSymbols(
    hsa_status_t (*callback)(hsa_executable_t exec,
                             hsa_executable_symbol_t symbol,
                             void *data),
    void *data) {
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  assert(callback);

  for (auto &symbol_entry : program_symbols_) {
    hsa_status_t status = callback(
        Executable::Handle(this), Symbol::Handle(symbol_entry.second), data);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::IterateLoadedCodeObjects(
  hsa_status_t (*callback)(
    hsa_executable_t executable,
    hsa_loaded_code_object_t loaded_code_object,
    void *data),
  void *data)
{
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  assert(callback);

  for (auto &loaded_code_object : loaded_code_objects) {
    hsa_status_t status = callback(
        Executable::Handle(this),
        LoadedCodeObject::Handle(loaded_code_object),
        data);
    if (status != HSA_STATUS_SUCCESS) {
      return status;
    }
  }

  return HSA_STATUS_SUCCESS;
}

size_t ExecutableImpl::GetNumSegmentDescriptors()
{
  // assuming we are in readonly mode.
  size_t actual_num_segment_descriptors = 0;
  for (auto &obj : loaded_code_objects) {
    actual_num_segment_descriptors += obj->LoadedSegments().size();
  }
  return actual_num_segment_descriptors;
}

size_t ExecutableImpl::QuerySegmentDescriptors(
  hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
  size_t total_num_segment_descriptors,
  size_t first_empty_segment_descriptor)
{
  // assuming we are in readonly mode.
  assert(segment_descriptors);
  assert(first_empty_segment_descriptor < total_num_segment_descriptors);

  size_t i = first_empty_segment_descriptor;
  for (auto &obj : loaded_code_objects) {
    assert(i < total_num_segment_descriptors);
    for (auto &seg : obj->LoadedSegments()) {
      segment_descriptors[i].agent = seg->Agent();
      segment_descriptors[i].executable = Executable::Handle(seg->Owner());
      segment_descriptors[i].code_object_storage_type = HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY;
      segment_descriptors[i].code_object_storage_base = obj->ElfData();
      segment_descriptors[i].code_object_storage_size = obj->ElfSize();
      segment_descriptors[i].code_object_storage_offset = seg->StorageOffset();
      segment_descriptors[i].segment_base = seg->Address(seg->VAddr());
      segment_descriptors[i].segment_size = seg->Size();
      ++i;
    }
  }

  return i - first_empty_segment_descriptor;
}

hsa_agent_t LoadedCodeObjectImpl::getAgent() const {
  assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
  return loaded_segments.front()->Agent();
}
hsa_executable_t LoadedCodeObjectImpl::getExecutable() const {
  assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
  return Executable::Handle(loaded_segments.front()->Owner());
}
uint64_t LoadedCodeObjectImpl::getElfData() const {
  return reinterpret_cast<uint64_t>(elf_data);
}
uint64_t LoadedCodeObjectImpl::getElfSize() const {
  return (uint64_t)elf_size;
}
uint64_t LoadedCodeObjectImpl::getStorageOffset() const {
  assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
  return (uint64_t)loaded_segments.front()->StorageOffset();
}
uint64_t LoadedCodeObjectImpl::getLoadBase() const {
  // TODO Add support for code objects with 0 segments.
  assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
  return reinterpret_cast<uint64_t>(loaded_segments.front()->Address(0));
}
uint64_t LoadedCodeObjectImpl::getLoadSize() const {
  // TODO Add support for code objects with 0 or >1 segments.
  assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
  return (uint64_t)loaded_segments.front()->Size();
}
int64_t LoadedCodeObjectImpl::getDelta() const {
  // TODO Add support for code objects with 0 segments.
  assert(loaded_segments.size() == 1 && "Only supports code objects v2+");
  return getLoadBase() - loaded_segments.front()->VAddr();
}

std::string LoadedCodeObjectImpl::getUri() const {
  return std::string(r_debug_info.l_name);
}

hsa_executable_t AmdHsaCodeLoader::FindExecutable(uint64_t device_address)
{
  hsa_executable_t execHandle = {0};
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  if (device_address == 0) {
    return execHandle;
  }

  for (auto &exec : executables) {
    if (exec != nullptr) {
      uint64_t host_address = exec->FindHostAddress(device_address);
      if (host_address != 0) {
        return Executable::Handle(exec);
      }
    }
  }
  return execHandle;
}

uint64_t ExecutableImpl::FindHostAddress(uint64_t device_address)
{
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);
  for (auto &obj : loaded_code_objects) {
    assert(obj);
    for (auto &seg : obj->LoadedSegments()) {
      assert(seg);
      uint64_t paddr = (uint64_t)(uintptr_t)seg->Address(seg->VAddr());
      if (paddr <= device_address && device_address < paddr + seg->Size()) {
        void *haddr = context_->SegmentHostAddress(
          seg->ElfSegment(), seg->Agent(), seg->Ptr(), device_address - paddr);
        return nullptr == haddr ? 0 : (uint64_t)(uintptr_t)haddr;
      }
    }
  }
  return 0;
}

void ExecutableImpl::EnableReadOnlyMode()
{
  rw_lock_.ReaderLock();
}

void ExecutableImpl::DisableReadOnlyMode()
{
  rw_lock_.ReaderUnlock();
}

#define HSAERRCHECK(hsc)                                                       \
  if (hsc != HSA_STATUS_SUCCESS) {                                             \
    assert(false);                                                             \
    return hsc;                                                                \
  }                                                                            \


hsa_status_t ExecutableImpl::GetInfo(
    hsa_executable_info_t executable_info, void *value)
{
  ReaderLockGuard<ReaderWriterLock> reader_lock(rw_lock_);

  assert(value);

  switch (executable_info) {
    case HSA_EXECUTABLE_INFO_PROFILE: {
      *((hsa_profile_t*)value) = profile_;;
      break;
    }
    case HSA_EXECUTABLE_INFO_STATE: {
      *((hsa_executable_state_t*)value) = state_;
      break;
    }
    case HSA_EXECUTABLE_INFO_DEFAULT_FLOAT_ROUNDING_MODE: {
      *((hsa_default_float_rounding_mode_t*)value) =
          default_float_rounding_mode_;
      break;
    }
    default: {
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
    }
  }

  return HSA_STATUS_SUCCESS;
}

static uint32_t NextCodeObjectNum()
{
  static std::atomic_uint_fast32_t dumpN(1);
  return dumpN++;
}

hsa_status_t ExecutableImpl::LoadCodeObject(
  hsa_agent_t agent,
  hsa_code_object_t code_object,
  const char *options,
  const std::string &uri,
  hsa_loaded_code_object_t *loaded_code_object)
{
  return LoadCodeObject(agent, code_object, 0, options, uri, loaded_code_object);
}

hsa_status_t ExecutableImpl::LoadCodeObject(
  hsa_agent_t agent,
  hsa_code_object_t code_object,
  size_t code_object_size,
  const char *options,
  const std::string &uri,
  hsa_loaded_code_object_t *loaded_code_object)
{
  WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
  if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
    logger_ << "LoaderError: executable is already frozen\n";
    return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
  }

  LoaderOptions loaderOptions;
  if (options && !loaderOptions.ParseOptions(options)) {
    return HSA_STATUS_ERROR;
  }

  const char *options_append = getenv("LOADER_OPTIONS_APPEND");
  if (options_append && !loaderOptions.ParseOptions(options_append)) {
    return HSA_STATUS_ERROR;
  }

  typedef std::tuple<uint32_t, uint32_t, std::string> Substitute;
  std::vector<Substitute> substitutes;

  for (const std::string& s : loaderOptions.Substitute()->values()) {
    std::string::size_type vi = s.find('=');
    if (vi == std::string::npos) { return HSA_STATUS_ERROR; }
    std::string value = s.substr(vi + 1);
    std::string range = s.substr(0, vi);
    std::string::size_type mi = range.find('-');
    uint32_t n1 = UINT32_MAX, n2 = UINT32_MAX;
    if (mi != std::string::npos) {
      std::string s1, s2;
      s1 = range.substr(0, mi - 1);
      s2 = range.substr(mi + 1);
      std::istringstream is1(s1); is1 >> n1;
      std::istringstream is2(s2); is2 >> n2;
    } else {
      std::istringstream is(range); is >> n1;
      n2 = n1;
    }
    substitutes.push_back(std::make_tuple(n1, n2, value));
  }

  uint32_t codeNum = NextCodeObjectNum();

  code.reset(new code::AmdHsaCode());

  std::string substituteFileName;
  for (const Substitute& ss : substitutes) {
    if (codeNum >= std::get<0>(ss) && codeNum <= std::get<1>(ss)) {
      substituteFileName = std::get<2>(ss);
      break;
    }
  }
  std::vector<char> buffer;
  if (substituteFileName.empty()) {
   if (!code->InitAsHandle(code_object)) {
      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
    }
  } else {
    if (!ReadFileIntoBuffer(substituteFileName, buffer)) {
      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
    }
    if (!code->InitAsBuffer(&buffer[0], buffer.size())) {
      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
    }
  }

  if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpCode()->is_set()) {
    if (!code->SaveToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "hsaco", codeNum))) {
      // Ignore error.
    }
  }
  if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpIsa()->is_set()) {
    if (!code->PrintToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "isa", codeNum))) {
      // Ignore error.
    }
  }

  std::string codeIsa;
  unsigned genericVersion;
  if (!code->GetIsa(codeIsa, &genericVersion)) {
    logger_ << "LoaderError: failed to determine code object's ISA\n";
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  uint32_t majorVersion, minorVersion;
  if (!code->GetCodeObjectVersion(&majorVersion, &minorVersion)) {
    logger_ << "LoaderError: failed to determine code object's version\n";
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  if (majorVersion < 1 || majorVersion > 6) {
    logger_ << "LoaderError: unsupported code object version: " << majorVersion << "\n";
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }
  if (agent.handle == 0 && majorVersion == 1) {
    logger_ << "LoaderError: code object v1 requires non-null agent\n";
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  uint32_t codeHsailMajor;
  uint32_t codeHsailMinor;
  hsa_profile_t codeProfile;
  hsa_machine_model_t codeMachineModel;
  hsa_default_float_rounding_mode_t codeRoundingMode;
  if (!code->GetNoteHsail(&codeHsailMajor, &codeHsailMinor, &codeProfile, &codeMachineModel, &codeRoundingMode)) {
    codeProfile = profile_;
  }
  if (profile_ != codeProfile) {
    logger_ << "LoaderError: mismatched profiles\n";
    return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;
  }

  hsa_isa_t objectsIsa = context_->IsaFromName(codeIsa.c_str());
  if (!objectsIsa.handle) {
    logger_ << "LoaderError: code object's ISA (" << codeIsa.c_str() << ") is invalid\n";
    return HSA_STATUS_ERROR_INVALID_ISA_NAME;
  }

  if (agent.handle != 0 && !context_->IsaSupportedByAgent(agent, objectsIsa, genericVersion)) {
    logger_ << "LoaderError: code object's ISA (" << codeIsa.c_str() << ") is not supported by the agent\n";
    return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;
  }

  hsa_status_t status;

  objects.push_back(new LoadedCodeObjectImpl(this, agent, code->ElfData(), code->ElfSize()));
  loaded_code_objects.push_back((LoadedCodeObjectImpl*)objects.back());

  status = LoadSegments(agent, code.get(), majorVersion);
  if (status != HSA_STATUS_SUCCESS) return status;

  for (size_t i = 0; i < code->SymbolCount(); ++i) {
    if (majorVersion >= 2 &&
        code->GetSymbol(i)->elfSym()->type() != STT_AMDGPU_HSA_KERNEL &&
        code->GetSymbol(i)->elfSym()->binding() == STB_LOCAL)
      continue;

    status = LoadSymbol(agent, code->GetSymbol(i), majorVersion);
    if (status != HSA_STATUS_SUCCESS) { return status; }
  }

  status = ApplyRelocations(agent, code.get());
  if (status != HSA_STATUS_SUCCESS) { return status; }

  code.reset();

  if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpExec()->is_set()) {
    if (!PrintToFile(amd::hsa::DumpFileName(loaderOptions.DumpDir()->value(), LOADER_DUMP_PREFIX, "exec", codeNum))) {
      // Ignore error.
    }
  }

  loaded_code_objects.back()->r_debug_info.l_addr = loaded_code_objects.back()->getDelta();
  loaded_code_objects.back()->r_debug_info.l_name = strdup(uri.c_str());
  loaded_code_objects.back()->r_debug_info.l_prev = nullptr;
  loaded_code_objects.back()->r_debug_info.l_next = nullptr;

  if (nullptr != loaded_code_object) { *loaded_code_object = LoadedCodeObject::Handle(loaded_code_objects.back()); }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::LoadSegments(hsa_agent_t agent,
                                          const code::AmdHsaCode *c,
                                          uint32_t majorVersion) {
  if (majorVersion < 2)
    return LoadSegmentsV1(agent, c);
  else
    return LoadSegmentsV2(agent, c);
}

hsa_status_t ExecutableImpl::LoadSegmentsV1(hsa_agent_t agent,
                                            const code::AmdHsaCode *c) {
  hsa_status_t status = HSA_STATUS_SUCCESS;
  for (size_t i = 0; i < c->DataSegmentCount(); ++i) {
    status = LoadSegmentV1(agent, c->DataSegment(i));
    if (status != HSA_STATUS_SUCCESS) return status;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::LoadSegmentsV2(hsa_agent_t agent,
                                            const code::AmdHsaCode *c) {
  assert(c->Machine() == ELF::EM_AMDGPU && "Program code objects are not supported");

  if (!c->DataSegmentCount()) return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;

  uint64_t vaddr = c->DataSegment(0)->vaddr();
  uint64_t size = c->DataSegment(c->DataSegmentCount() - 1)->vaddr() +
                  c->DataSegment(c->DataSegmentCount() - 1)->memSize();

  void *ptr = context_->SegmentAlloc(AMDGPU_HSA_SEGMENT_CODE_AGENT, agent, size,
      AMD_ISA_ALIGN_BYTES, true);
  if (!ptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  Segment *load_segment = new Segment(this, agent, AMDGPU_HSA_SEGMENT_CODE_AGENT,
      ptr, size, vaddr, c->DataSegment(0)->offset());
  if (!load_segment) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;

  hsa_status_t status = HSA_STATUS_SUCCESS;
  for (size_t i = 0; i < c->DataSegmentCount(); ++i) {
    status = LoadSegmentV2(c->DataSegment(i), load_segment);
    if (status != HSA_STATUS_SUCCESS) return status;
  }

  objects.push_back(load_segment);
  loaded_code_objects.back()->LoadedSegments().push_back(load_segment);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::LoadSegmentV1(hsa_agent_t agent,
                                           const code::Segment *s) {
  assert(s->type() < PT_LOOS + AMDGPU_HSA_SEGMENT_LAST);
  if (s->memSize() == 0)
    return HSA_STATUS_SUCCESS;
  amdgpu_hsa_elf_segment_t segment = (amdgpu_hsa_elf_segment_t)(s->type() - PT_LOOS);
  Segment *new_seg = nullptr;
  bool need_alloc = true;
  if (segment == AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM && nullptr != program_allocation_segment) {
    new_seg = program_allocation_segment;
    need_alloc = false;
  }
  if (need_alloc) {
    void* ptr = context_->SegmentAlloc(segment, agent, s->memSize(), s->align(), true);
    if (!ptr) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; }
    new_seg = new Segment(this, agent, segment, ptr, s->memSize(), s->vaddr(), s->offset());
    new_seg->Copy(s->vaddr(), s->data(), s->imageSize());
    objects.push_back(new_seg);

    if (segment == AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM) {
      program_allocation_segment = new_seg;
    }
  }
  assert(new_seg);
  loaded_code_objects.back()->LoadedSegments().push_back(new_seg);
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::LoadSegmentV2(const code::Segment *data_segment,
                                           loader::Segment *load_segment) {
  assert(data_segment && load_segment);
  load_segment->Copy(data_segment->vaddr(), data_segment->data(),
                     data_segment->imageSize());

  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::LoadSymbol(hsa_agent_t agent,
                                        code::Symbol* sym,
                                        uint32_t majorVersion)
{
  if (sym->IsDeclaration()) {
    return LoadDeclarationSymbol(agent, sym, majorVersion);
  } else {
    return LoadDefinitionSymbol(agent, sym, majorVersion);
  }
}

namespace {

bool string_ends_with(const std::string &str, const std::string &suf) {
  return str.size() >= suf.size() ? str.compare(str.size() - suf.size(), suf.size(), suf) == 0 : false;
}

}

hsa_status_t ExecutableImpl::LoadDefinitionSymbol(hsa_agent_t agent,
                                                  code::Symbol* sym,
                                                  uint32_t majorVersion)
{
  bool isAgent = sym->IsAgent();
  if (majorVersion >= 2) {
    isAgent = agent.handle != 0;
  }
  if (isAgent) {
    auto agent_symbol = agent_symbols_.find(std::make_pair(sym->Name(), agent));
    if (agent_symbol != agent_symbols_.end()) {
      // TODO(spec): this is not spec compliant.
      return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
    }
  } else {
    auto program_symbol = program_symbols_.find(sym->Name());
    if (program_symbol != program_symbols_.end()) {
      // TODO(spec): this is not spec compliant.
      return HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED;
    }
  }

  uint64_t address = SymbolAddress(agent, sym);
  SymbolImpl *symbol = nullptr;
  if (string_ends_with(sym->GetSymbolName(), ".kd")) {
    // V3.
    llvm::amdhsa::kernel_descriptor_t kd;
    sym->GetSection()->getData(sym->SectionOffset(), &kd, sizeof(kd));

    uint32_t kernarg_segment_size = kd.kernarg_size; // FIXME: If 0 then the compiler is not specifying the size.
    uint32_t kernarg_segment_alignment = 16;         // FIXME: Use the minumum HSA required alignment.
    uint32_t group_segment_size = kd.group_segment_fixed_size;
    uint32_t private_segment_size = kd.private_segment_fixed_size;
    bool is_dynamic_callstack = AMDHSA_BITS_GET(kd.kernel_code_properties, rocr::llvm::amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
    bool uses_wave32 = AMDHSA_BITS_GET( kd.kernel_code_properties, rocr::llvm::amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);

    uint64_t size = sym->Size();

    KernelSymbol *kernel_symbol = new KernelSymbol(true,
                                    sym->GetModuleName(),
                                    sym->GetSymbolName(),
                                    sym->Linkage(),
                                    true, // sym->IsDefinition()
                                    kernarg_segment_size,
                                    kernarg_segment_alignment,
                                    group_segment_size,
                                    private_segment_size,
                                    is_dynamic_callstack,
                                    size,
                                    64,
                                    uses_wave32 ? 32 : 64,
                                    address);
    symbol = kernel_symbol;
  } else if (sym->IsVariableSymbol()) {
    symbol = new VariableSymbol(true,
                       sym->GetModuleName(),
                       sym->GetSymbolName(),
                       sym->Linkage(),
                       true, // sym->IsDefinition()
                       sym->Allocation(),
                       sym->Segment(),
                       sym->Size(),
                       sym->Alignment(),
                       sym->IsConst(),
                       false,
                       address);
  } else if (sym->IsKernelSymbol()) {
      amd_kernel_code_t akc;
      sym->GetSection()->getData(sym->SectionOffset(), &akc, sizeof(akc));

      uint32_t kernarg_segment_size =
        uint32_t(akc.kernarg_segment_byte_size);
      uint32_t kernarg_segment_alignment =
        uint32_t(1 << akc.kernarg_segment_alignment);
      uint32_t group_segment_size =
        uint32_t(akc.workgroup_group_segment_byte_size);
      uint32_t private_segment_size =
        uint32_t(akc.workitem_private_segment_byte_size);
      bool is_dynamic_callstack =
        AMD_HSA_BITS_GET(akc.kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK) ? true : false;
      bool uses_wave32 = akc.wavefront_size == AMD_POWERTWO_32;

      uint64_t size = sym->Size();

      if (!size && sym->SectionOffset() < sym->GetSection()->size()) {
        // ORCA Runtime relies on symbol size equal to size of kernel ISA. If symbol size is 0 in ELF,
        // calculate end of segment - symbol value.
        size = sym->GetSection()->size() - sym->SectionOffset();
      }
      KernelSymbol *kernel_symbol = new KernelSymbol(true,
                                      sym->GetModuleName(),
                                      sym->GetSymbolName(),
                                      sym->Linkage(),
                                      true, // sym->IsDefinition()
                                      kernarg_segment_size,
                                      kernarg_segment_alignment,
                                      group_segment_size,
                                      private_segment_size,
                                      is_dynamic_callstack,
                                      size,
                                      256,
                                      uses_wave32 ? 32 : 64,
                                      address);
      kernel_symbol->debug_info.elf_raw = code->ElfData();
      kernel_symbol->debug_info.elf_size = code->ElfSize();
      kernel_symbol->debug_info.kernel_name = kernel_symbol->full_name.c_str();
      kernel_symbol->debug_info.owning_segment = (void*)SymbolSegment(agent, sym)->Address(sym->GetSection()->addr());
      symbol = kernel_symbol;

      // \todo kzhuravl 10/15/15 This is a debugger backdoor: needs to be
      // removed.
      uint64_t target_address = sym->GetSection()->addr() + sym->SectionOffset() + ((size_t)(&((amd_kernel_code_t*)0)->runtime_loader_kernel_symbol));
      uint64_t source_value = (uint64_t) (uintptr_t) &kernel_symbol->debug_info;
      SymbolSegment(agent, sym)->Copy(target_address, &source_value, sizeof(source_value));
  } else {
    assert(!"Unexpected symbol type in LoadDefinitionSymbol");
    return HSA_STATUS_ERROR;
  }

  assert(symbol);
  if (isAgent) {
    symbol->agent = agent;
    agent_symbols_.insert(std::make_pair(std::make_pair(sym->Name(), agent), symbol));
  } else {
    program_symbols_.insert(std::make_pair(sym->Name(), symbol));
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::LoadDeclarationSymbol(hsa_agent_t agent,
                                                   code::Symbol* sym,
                                                   uint32_t majorVersion)
{
  auto program_symbol = program_symbols_.find(sym->Name());
  if (program_symbol == program_symbols_.end()) {
    auto agent_symbol = agent_symbols_.find(std::make_pair(sym->Name(), agent));
    if (agent_symbol == agent_symbols_.end()) {
      logger_ << "LoaderError: symbol \"" << sym->Name() << "\" is undefined\n";

      // TODO(spec): this is not spec compliant.
      return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
    }
  }
  return HSA_STATUS_SUCCESS;
}

Segment* ExecutableImpl::VirtualAddressSegment(uint64_t vaddr)
{
  for (auto &seg : loaded_code_objects.back()->LoadedSegments()) {
    if (seg->IsAddressInSegment(vaddr)) {
      return seg;
    }
  }
  return 0;
}

uint64_t ExecutableImpl::SymbolAddress(hsa_agent_t agent, code::Symbol* sym)
{
  code::Section* sec = sym->GetSection();
  Segment* seg = SectionSegment(agent, sec);
  return nullptr == seg ? 0 : (uint64_t) (uintptr_t) seg->Address(sym->VAddr());
}

uint64_t ExecutableImpl::SymbolAddress(hsa_agent_t agent, elf::Symbol* sym)
{
  elf::Section* sec = sym->section();
  Segment* seg = SectionSegment(agent, sec);
  uint64_t vaddr = sec->addr() + sym->value();
  return nullptr == seg ? 0 : (uint64_t) (uintptr_t) seg->Address(vaddr);
}

Segment* ExecutableImpl::SymbolSegment(hsa_agent_t agent, code::Symbol* sym)
{
  return SectionSegment(agent, sym->GetSection());
}

Segment* ExecutableImpl::SectionSegment(hsa_agent_t agent, code::Section* sec)
{
  for (Segment* seg : loaded_code_objects.back()->LoadedSegments()) {
    if (seg->IsAddressInSegment(sec->addr())) {
      return seg;
    }
  }
  return 0;
}

hsa_status_t ExecutableImpl::ApplyRelocations(hsa_agent_t agent, amd::hsa::code::AmdHsaCode *c)
{
  hsa_status_t status = HSA_STATUS_SUCCESS;

  uint32_t majorVersion, minorVersion;
  if (!c->GetCodeObjectVersion(&majorVersion, &minorVersion)) {
    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }

  for (size_t i = 0; i < c->RelocationSectionCount(); ++i) {
    if (c->GetRelocationSection(i)->targetSection()) {
      // Static relocations may be present if --emit-relocs
      // option was passed to lld, but they cannot be applied
      // again, so skip it for code object v2 and up.
      if (majorVersion >= 2) {
        continue;
      }

      status = ApplyStaticRelocationSection(agent, c->GetRelocationSection(i));
    } else {
      // Dynamic relocations are supported starting code object v2.1.
      if (majorVersion < 2) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }
      if (majorVersion == 2 && minorVersion < 1) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }
      status = ApplyDynamicRelocationSection(agent, c->GetRelocationSection(i));
    }
    if (status != HSA_STATUS_SUCCESS) { return status; }
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::ApplyStaticRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec)
{
  // Skip link-time relocations (if any).
  if (!(sec->targetSection()->flags() & SHF_ALLOC)) { return HSA_STATUS_SUCCESS; }
  hsa_status_t status = HSA_STATUS_SUCCESS;
  for (size_t i = 0; i < sec->relocationCount(); ++i) {
    status = ApplyStaticRelocation(agent, sec->relocation(i));
    if (status != HSA_STATUS_SUCCESS) { return status; }
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::ApplyStaticRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel)
{
  hsa_status_t status = HSA_STATUS_SUCCESS;
  amd::elf::Symbol* sym = rel->symbol();
  code::RelocationSection* rsec = rel->section();
  code::Section* sec = rsec->targetSection();
  Segment* rseg = SectionSegment(agent, sec);
  size_t reladdr = sec->addr() + rel->offset();
  switch (rel->type()) {
    case R_AMDGPU_V1_32_LOW:
    case R_AMDGPU_V1_32_HIGH:
    case R_AMDGPU_V1_64:
    {
      uint64_t addr;
      switch (sym->type()) {
        case STT_OBJECT:
        case STT_SECTION:
        case STT_AMDGPU_HSA_KERNEL:
        case STT_AMDGPU_HSA_INDIRECT_FUNCTION:
          addr = SymbolAddress(agent, sym);
          if (!addr) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
          break;
        case STT_COMMON: {
          hsa_agent_t *sagent = &agent;
          if (STA_AMDGPU_HSA_GLOBAL_PROGRAM == ELF64_ST_AMDGPU_ALLOCATION(sym->other())) {
            sagent = nullptr;
          }
          SymbolImpl* esym = (SymbolImpl*) GetSymbolInternal(sym->name().c_str(), sagent);
          if (!esym) {
            logger_ << "LoaderError: symbol \"" << sym->name() << "\" is undefined\n";
            return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
          }
          addr = esym->address;
          break;
        }
        default:
          return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }
      addr += rel->addend();

      uint32_t addr32 = 0;
      switch (rel->type()) {
        case R_AMDGPU_V1_32_HIGH:
          addr32 = uint32_t((addr >> 32) & 0xFFFFFFFF);
          rseg->Copy(reladdr, &addr32, sizeof(addr32));
          break;
        case R_AMDGPU_V1_32_LOW:
          addr32 = uint32_t(addr & 0xFFFFFFFF);
          rseg->Copy(reladdr, &addr32, sizeof(addr32));
          break;
        case R_AMDGPU_V1_64:
          rseg->Copy(reladdr, &addr, sizeof(addr));
          break;
        default:
          return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }
      break;
    }

    case R_AMDGPU_V1_INIT_SAMPLER:
    {
      if (STT_AMDGPU_HSA_METADATA != sym->type() ||
          SHT_PROGBITS != sym->section()->type() ||
          !(sym->section()->flags() & SHF_MERGE)) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }
      amdgpu_hsa_sampler_descriptor_t desc;
      if (!sym->section()->getData(sym->value(), &desc, sizeof(desc))) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }
      if (AMDGPU_HSA_METADATA_KIND_INIT_SAMP != desc.kind) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }

      hsa_ext_sampler_descriptor_t hsa_sampler_descriptor;
      hsa_sampler_descriptor.coordinate_mode =
        hsa_ext_sampler_coordinate_mode_t(desc.coord);
      hsa_sampler_descriptor.filter_mode =
        hsa_ext_sampler_filter_mode_t(desc.filter);
      hsa_sampler_descriptor.address_mode =
        hsa_ext_sampler_addressing_mode_t(desc.addressing);

      hsa_ext_sampler_t hsa_sampler = {0};
      status = context_->SamplerCreate(agent, &hsa_sampler_descriptor, &hsa_sampler);
      if (status != HSA_STATUS_SUCCESS) { return status; }
      assert(hsa_sampler.handle);
      rseg->Copy(reladdr, &hsa_sampler, sizeof(hsa_sampler));
      break;
    }

    case R_AMDGPU_V1_INIT_IMAGE:
    {
      if (STT_AMDGPU_HSA_METADATA != sym->type() ||
          SHT_PROGBITS != sym->section()->type() ||
          !(sym->section()->flags() & SHF_MERGE)) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }

      amdgpu_hsa_image_descriptor_t desc;
      if (!sym->section()->getData(sym->value(), &desc, sizeof(desc))) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }
      if (AMDGPU_HSA_METADATA_KIND_INIT_ROIMG != desc.kind &&
          AMDGPU_HSA_METADATA_KIND_INIT_WOIMG != desc.kind &&
          AMDGPU_HSA_METADATA_KIND_INIT_RWIMG != desc.kind) {
        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
      }

      hsa_ext_image_format_t hsa_image_format;
      hsa_image_format.channel_order =
        hsa_ext_image_channel_order_t(desc.channel_order);
      hsa_image_format.channel_type =
        hsa_ext_image_channel_type_t(desc.channel_type);

      hsa_ext_image_descriptor_t hsa_image_descriptor;
      hsa_image_descriptor.geometry =
        hsa_ext_image_geometry_t(desc.geometry);
      hsa_image_descriptor.width = size_t(desc.width);
      hsa_image_descriptor.height = size_t(desc.height);
      hsa_image_descriptor.depth = size_t(desc.depth);
      hsa_image_descriptor.array_size = size_t(desc.array);
      hsa_image_descriptor.format = hsa_image_format;

      hsa_access_permission_t hsa_image_permission = HSA_ACCESS_PERMISSION_RO;
      switch (desc.kind) {
        case AMDGPU_HSA_METADATA_KIND_INIT_ROIMG: {
          hsa_image_permission = HSA_ACCESS_PERMISSION_RO;
          break;
        }
        case AMDGPU_HSA_METADATA_KIND_INIT_WOIMG: {
          hsa_image_permission = HSA_ACCESS_PERMISSION_WO;
          break;
        }
        case AMDGPU_HSA_METADATA_KIND_INIT_RWIMG: {
          hsa_image_permission = HSA_ACCESS_PERMISSION_RW;
          break;
        }
        default: {
          assert(false);
          return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
        }
      }

      hsa_ext_image_t hsa_image = {0};
      status = context_->ImageCreate(agent, hsa_image_permission,
                                  &hsa_image_descriptor,
                                  NULL, // TODO: image_data?
                                  &hsa_image);
      if (status != HSA_STATUS_SUCCESS) { return status; }
      rseg->Copy(reladdr, &hsa_image, sizeof(hsa_image));
      break;
    }

    default:
      // Ignore.
      break;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::ApplyDynamicRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec)
{
  hsa_status_t status = HSA_STATUS_SUCCESS;
  for (size_t i = 0; i < sec->relocationCount(); ++i) {
    status = ApplyDynamicRelocation(agent, sec->relocation(i));
    if (status != HSA_STATUS_SUCCESS) { return status; }
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::ApplyDynamicRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel)
{
  Segment* relSeg = VirtualAddressSegment(rel->offset());
  uint64_t symAddr = 0;
  switch (rel->symbol()->type()) {
    case STT_OBJECT:
    case STT_AMDGPU_HSA_KERNEL:
    case STT_FUNC:
    {
      Segment* symSeg = VirtualAddressSegment(rel->symbol()->value());
      symAddr = reinterpret_cast<uint64_t>(symSeg->Address(rel->symbol()->value()));
      break;
    }

    // External symbols, they must be defined prior loading.
    case STT_NOTYPE:
    {
      // TODO: Only agent allocation variables are supported in v2.1. How will
      // we distinguish between program allocation and agent allocation
      // variables?
      auto agent_symbol = agent_symbols_.find(std::make_pair(rel->symbol()->name(), agent));
      if (agent_symbol != agent_symbols_.end())
        symAddr = agent_symbol->second->address;
      break;
    }

    default:
      // Only objects and kernels are supported in v2.1.
      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }
  symAddr += rel->addend();

  switch (rel->type()) {
    case ELF::R_AMDGPU_ABS32_HI:
    {
      if (!symAddr) {
        logger_ << "LoaderError: symbol \"" << rel->symbol()->name() << "\" is undefined\n";
        return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
      }

      uint32_t symAddr32 = uint32_t((symAddr >> 32) & 0xFFFFFFFF);
      relSeg->Copy(rel->offset(), &symAddr32, sizeof(symAddr32));
      break;
    }

    case ELF::R_AMDGPU_ABS32_LO:
    {
      if (!symAddr) {
        logger_ << "LoaderError: symbol \"" << rel->symbol()->name() << "\" is undefined\n";
        return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
      }

      uint32_t symAddr32 = uint32_t(symAddr & 0xFFFFFFFF);
      relSeg->Copy(rel->offset(), &symAddr32, sizeof(symAddr32));
      break;
    }

    case ELF::R_AMDGPU_ABS32:
    {
      if (!symAddr) {
        logger_ << "LoaderError: symbol \"" << rel->symbol()->name() << "\" is undefined\n";
        return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
      }

      uint32_t symAddr32 = uint32_t(symAddr);
      relSeg->Copy(rel->offset(), &symAddr32, sizeof(symAddr32));
      break;
    }

    case ELF::R_AMDGPU_ABS64:
    {
      if (!symAddr) {
        logger_ << "LoaderError: symbol \"" << rel->symbol()->name() << "\" is undefined\n";
        return HSA_STATUS_ERROR_VARIABLE_UNDEFINED;
      }

      relSeg->Copy(rel->offset(), &symAddr, sizeof(symAddr));
      break;
    }

    case ELF::R_AMDGPU_RELATIVE64:
    {
      int64_t baseDelta = reinterpret_cast<uint64_t>(relSeg->Address(0)) - relSeg->VAddr();
      uint64_t relocatedAddr = baseDelta + rel->addend();
      relSeg->Copy(rel->offset(), &relocatedAddr, sizeof(relocatedAddr));
      break;
    }

    default:
      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t ExecutableImpl::Freeze(const char *options) {
  amd::hsa::common::WriterLockGuard<amd::hsa::common::ReaderWriterLock> writer_lock(rw_lock_);
  if (HSA_EXECUTABLE_STATE_FROZEN == state_) {
    return HSA_STATUS_ERROR_FROZEN_EXECUTABLE;
  }

  for (auto &lco : loaded_code_objects) {
    for (auto &ls : lco->LoadedSegments()) {
      ls->Freeze();
    }
  }

  state_ = HSA_EXECUTABLE_STATE_FROZEN;
  return HSA_STATUS_SUCCESS;
}

void ExecutableImpl::Print(std::ostream& out)
{
  out << "AMD Executable" << std::endl;
  out << "  Id: " << id()
      << "  Profile: " << HsaProfileToString(profile())
      << std::endl << std::endl;
  out << "Loaded Objects (total " << objects.size() << ")" << std::endl;
  size_t i = 0;
  for (ExecutableObject* o : objects) {
    out << "Loaded Object " << i++ << ": ";
    o->Print(out);
    out << std::endl;
  }
  out << "End AMD Executable" << std::endl;
}

bool ExecutableImpl::PrintToFile(const std::string& filename)
{
  std::ofstream out(filename);
  if (out.fail()) { return false; }
  Print(out);
  return out.fail();
}

} // namespace loader
} // namespace hsa
} // namespace amd
} // namespace rocr


================================================
FILE: runtime/hsa-runtime/loader/executable.hpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_
#define HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_

#include <array>
#include <cassert>
#include <cstdint>
#include <iostream>
#include <libelf.h>
#include <link.h>
#include <list>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <cstring>
#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"
#include "core/inc/amd_hsa_loader.hpp"
#include "core/inc/amd_hsa_code.hpp"
#include "inc/amd_hsa_kernel_code.h"
#include "amd_hsa_locks.hpp"

namespace rocr {
namespace amd {
namespace hsa {
namespace loader {

class MemoryAddress;
class SymbolImpl;
class KernelSymbol;
class VariableSymbol;
class ExecutableImpl;

//===----------------------------------------------------------------------===//
// SymbolImpl.                                                                //
//===----------------------------------------------------------------------===//

typedef uint32_t symbol_attribute32_t;

class SymbolImpl: public Symbol {
public:
  virtual ~SymbolImpl() {}

  bool IsKernel() const {
    return HSA_SYMBOL_KIND_KERNEL == kind;
  }
  bool IsVariable() const {
    return HSA_SYMBOL_KIND_VARIABLE == kind;
  }

  bool is_loaded;
  hsa_symbol_kind_t kind;
  std::string module_name;
  std::string symbol_name;
  hsa_symbol_linkage_t linkage;
  bool is_definition;
  uint64_t address;
  hsa_agent_t agent;

  hsa_agent_t GetAgent() override {
    return agent;
  }

protected:
  SymbolImpl(const bool &_is_loaded,
             const hsa_symbol_kind_t &_kind,
             const std::string &_module_name,
             const std::string &_symbol_name,
             const hsa_symbol_linkage_t &_linkage,
             const bool &_is_definition,
             const uint64_t &_address = 0)
    : is_loaded(_is_loaded)
    , kind(_kind)
    , module_name(_module_name)
    , symbol_name(_symbol_name)
    , linkage(_linkage)
    , is_definition(_is_definition)
    , address(_address) {}

  virtual bool GetInfo(hsa_symbol_info32_t symbol_info, void* value) override;

 private:
  SymbolImpl(const SymbolImpl &s);
  SymbolImpl& operator=(const SymbolImpl &s);
};

//===----------------------------------------------------------------------===//
// KernelSymbol.                                                              //
//===----------------------------------------------------------------------===//

class KernelSymbol final: public SymbolImpl {
public:
  KernelSymbol(const bool &_is_loaded,
               const std::string &_module_name,
               const std::string &_symbol_name,
               const hsa_symbol_linkage_t &_linkage,
               const bool &_is_definition,
               const uint32_t &_kernarg_segment_size,
               const uint32_t &_kernarg_segment_alignment,
               const uint32_t &_group_segment_size,
               const uint32_t &_private_segment_size,
               const bool &_is_dynamic_callstack,
               const uint32_t &_size,
               const uint32_t &_alignment,
               const uint32_t &_wavefront_size,
               const uint64_t &_address = 0)
    : SymbolImpl(_is_loaded,
                 HSA_SYMBOL_KIND_KERNEL,
                 _module_name,
                 _symbol_name,
                 _linkage,
                 _is_definition,
                 _address)
    , full_name(_module_name.empty() ? _symbol_name : _module_name + "::" + _symbol_name)
    , kernarg_segment_size(_kernarg_segment_size)
    , kernarg_segment_alignment(_kernarg_segment_alignment)
    , group_segment_size(_group_segment_size)
    , private_segment_size(_private_segment_size)
    , is_dynamic_callstack(_is_dynamic_callstack)
    , size(_size)
    , alignment(_alignment)
    , wavefront_size(_wavefront_size)
    , debug_info{} {}

  ~KernelSymbol() {}

  bool GetInfo(hsa_symbol_info32_t symbol_info, void *value);

  std::string full_name;
  uint32_t kernarg_segment_size;
  uint32_t kernarg_segment_alignment;
  uint32_t group_segment_size;
  uint32_t private_segment_size;
  bool is_dynamic_callstack;
  uint32_t size;
  uint32_t alignment;
  uint32_t wavefront_size;
  amd_runtime_loader_debug_info_t debug_info;

private:
  KernelSymbol(const KernelSymbol &ks);
  KernelSymbol& operator=(const KernelSymbol &ks);
};

//===----------------------------------------------------------------------===//
// VariableSymbol.                                                            //
//===----------------------------------------------------------------------===//

class VariableSymbol final: public SymbolImpl {
public:
  VariableSymbol(const bool &_is_loaded,
                 const std::string &_module_name,
                 const std::string &_symbol_name,
                 const hsa_symbol_linkage_t &_linkage,
                 const bool &_is_definition,
                 const hsa_variable_allocation_t &_allocation,
                 const hsa_variable_segment_t &_segment,
                 const uint32_t &_size,
                 const uint32_t &_alignment,
                 const bool &_is_constant,
                 const bool &_is_external = false,
                 const uint64_t &_address = 0)
    : SymbolImpl(_is_loaded,
                 HSA_SYMBOL_KIND_VARIABLE,
                 _module_name,
                 _symbol_name,
                 _linkage,
                 _is_definition,
                 _address)
    , allocation(_allocation)
    , segment(_segment)
    , size(_size)
    , alignment(_alignment)
    , is_constant(_is_constant)
    , is_external(_is_external) {}

  ~VariableSymbol() {}

  bool GetInfo(hsa_symbol_info32_t symbol_info, void *value);

  hsa_variable_allocation_t allocation;
  hsa_variable_segment_t segment;
  uint32_t size;
  uint32_t alignment;
  bool is_constant;
  bool is_external;

private:
  VariableSymbol(const VariableSymbol &vs);
  VariableSymbol& operator=(const VariableSymbol &vs);
};

//===----------------------------------------------------------------------===//
// Logger.                                                                    //
//===----------------------------------------------------------------------===//

class Logger final {
public:
  Logger(std::ostream &Stream = std::cerr) : OutStream(Stream) {}

  template <typename T>
  Logger &operator<<(const T &Data) {
    if (!IsLoggingEnabled())
      return *this;
    OutStream << Data;
    return *this;
  }

private:
  Logger(const Logger &L);
  Logger& operator=(const Logger &L);

  bool IsLoggingEnabled() const {
    const char *enable_logging = getenv("LOADER_ENABLE_LOGGING");
    if (!enable_logging)
      return false;
    if (std::string(enable_logging) == "0")
      return false;
    return true;
  }

  std::ostream &OutStream;
};

//===----------------------------------------------------------------------===//
// Executable.                                                                //
//===----------------------------------------------------------------------===//

class ExecutableImpl;
class LoadedCodeObjectImpl;
class Segment;

class ExecutableObject {
protected:
  ExecutableImpl *owner;
  hsa_agent_t agent;

public:
  ExecutableObject(ExecutableImpl *owner_, hsa_agent_t agent_)
    : owner(owner_), agent(agent_) { }

  ExecutableImpl* Owner() const { return owner; }
  hsa_agent_t Agent() const { return agent; }
  virtual void Print(std::ostream& out) = 0;
  virtual void Destroy() = 0;

  virtual ~ExecutableObject() { }
};

class LoadedCodeObjectImpl : public LoadedCodeObject, public ExecutableObject {
friend class AmdHsaCodeLoader;
private:
  LoadedCodeObjectImpl(const LoadedCodeObjectImpl&);
  LoadedCodeObjectImpl& operator=(const LoadedCodeObjectImpl&);

  const void *elf_data;
  const size_t elf_size;
  std::vector<Segment*> loaded_segments;

public:
  LoadedCodeObjectImpl(ExecutableImpl *owner_, hsa_agent_t agent_, const void *elf_data_, size_t elf_size_)
    : ExecutableObject(owner_, agent_), elf_data(elf_data_), elf_size(elf_size_) {
      memset(&r_debug_info, 0, sizeof(r_debug_info));
    }

  const void* ElfData() const { return elf_data; }
  size_t ElfSize() const { return elf_size; }
  std::vector<Segment*>& LoadedSegments() { return loaded_segments; }

  bool GetInfo(amd_loaded_code_object_info_t attribute, void *value) override;

  hsa_status_t IterateLoadedSegments(
    hsa_status_t (*callback)(
      amd_loaded_segment_t loaded_segment,
      void *data),
    void *data) override;

  void Print(std::ostream& out) override;

  void Destroy() override {}

  hsa_agent_t getAgent() const override;
  hsa_executable_t getExecutable() const override;
  uint64_t getElfData() const override;
  uint64_t getElfSize() const override;
  uint64_t getStorageOffset() const override;
  uint64_t getLoadBase() const override;
  uint64_t getLoadSize() const override;
  int64_t getDelta() const override;
  std::string getUri() const override;

  link_map r_debug_info;
};

class Segment : public LoadedSegment, public ExecutableObject {
private:
  amdgpu_hsa_elf_segment_t segment;
  void *ptr;
  size_t size;
  uint64_t vaddr;
  bool frozen;
  size_t storage_offset;

public:
  Segment(ExecutableImpl *owner_, hsa_agent_t agent_, amdgpu_hsa_elf_segment_t segment_, void* ptr_, size_t size_, uint64_t vaddr_, size_t storage_offset_)
    : ExecutableObject(owner_, agent_), segment(segment_),
      ptr(ptr_), size(size_), vaddr(vaddr_), frozen(false), storage_offset(storage_offset_) { }

  amdgpu_hsa_elf_segment_t ElfSegment() const { return segment; }
  void* Ptr() const { return ptr; }
  size_t Size() const { return size; }
  uint64_t VAddr() const { return vaddr; }
  size_t StorageOffset() const { return storage_offset;  }

  bool GetInfo(amd_loaded_segment_info_t attribute, void *value) override;

  uint64_t Offset(uint64_t addr); // Offset within segment. Used together with ptr with loader context functions.

  void* Address(uint64_t addr); // Address in segment. Used for relocations and valid on agent.

  bool Freeze();

  bool IsAddressInSegment(uint64_t addr);
  void Copy(uint64_t addr, const void* src, size_t size);
  void Print(std::ostream& out) override;
  void Destroy() override;
};

class Sampler : public ExecutableObject {
private:
  hsa_ext_sampler_t samp;

public:
  Sampler(ExecutableImpl *owner, hsa_agent_t agent, hsa_ext_sampler_t samp_)
    : ExecutableObject(owner, agent), samp(samp_) { }
  void Print(std::ostream& out) override;
  void Destroy() override;
};

class Image : public ExecutableObject {
private:
  hsa_ext_image_t img;

public:
  Image(ExecutableImpl *owner, hsa_agent_t agent, hsa_ext_image_t img_)
    : ExecutableObject(owner, agent), img(img_) { }
  void Print(std::ostream& out) override;
  void Destroy() override;
};

typedef std::string ProgramSymbol;
typedef std::unordered_map<ProgramSymbol, SymbolImpl*> ProgramSymbolMap;

typedef std::pair<std::string, hsa_agent_t> AgentSymbol;
struct ASC {
  bool operator()(const AgentSymbol &las, const AgentSymbol &ras) const {
    return las.first == ras.first && las.second.handle == ras.second.handle;
  }
};
struct ASH {
  size_t operator()(const AgentSymbol &as) const {
    size_t h = std::hash<std::string>()(as.first);
    size_t i = std::hash<uint64_t>()(as.second.handle);
    return h ^ (i << 1);
  }
};
typedef std::unordered_map<AgentSymbol, SymbolImpl*, ASH, ASC> AgentSymbolMap;

class ExecutableImpl final: public Executable {
friend class AmdHsaCodeLoader;
public:
  const hsa_profile_t& profile() const {
    return profile_;
  }
  const hsa_executable_state_t& state() const {
    return state_;
  }

  ExecutableImpl(
      const hsa_profile_t &_profile,
      Context *context,
      size_t id,
      hsa_default_float_rounding_mode_t default_float_rounding_mode);

  ExecutableImpl(
      const hsa_profile_t &_profile,
      std::unique_ptr<Context> unique_context,
      size_t id,
      hsa_default_float_rounding_mode_t default_float_rounding_mode);

  ~ExecutableImpl();

  hsa_status_t GetInfo(hsa_executable_info_t executable_info, void *value) override;

  hsa_status_t DefineProgramExternalVariable(
    const char *name, void *address) override;

  hsa_status_t DefineAgentExternalVariable(
    const char *name,
    hsa_agent_t agent,
    hsa_variable_segment_t segment,
    void *address) override;

  hsa_status_t LoadCodeObject(
    hsa_agent_t agent,
    hsa_code_object_t code_object,
    const char *options,
    const std::string &uri,
    hsa_loaded_code_object_t *loaded_code_object) override;

  hsa_status_t LoadCodeObject(
    hsa_agent_t agent,
    hsa_code_object_t code_object,
    size_t code_object_size,
    const char *options,
    const std::string &uri,
    hsa_loaded_code_object_t *loaded_code_object) override;

  hsa_status_t Freeze(const char *options) override;

  hsa_status_t Validate(uint32_t *result) override {
    amd::hsa::common::ReaderLockGuard<amd::hsa::common::ReaderWriterLock> reader_lock(rw_lock_);
    assert(result);
    *result = 0;
    return HSA_STATUS_SUCCESS;
  }

  /// @note needed for hsa v1.0.
  /// @todo remove during loader refactoring.
  bool IsProgramSymbol(const char *symbol_name) override;

  Symbol* GetSymbol(
    const char *symbol_name,
    const hsa_agent_t *agent) override;

  hsa_status_t IterateSymbols(
    iterate_symbols_f callback, void *data) override;

  /// @since hsa v1.1.
  hsa_status_t IterateAgentSymbols(
      hsa_agent_t agent,
      hsa_status_t (*callback)(hsa_executable_t exec,
                               hsa_agent_t agent,
                               hsa_executable_symbol_t symbol,
                               void *data),
      void *data) override;

  /// @since hsa v1.1.
  hsa_status_t IterateProgramSymbols(
      hsa_status_t (*callback)(hsa_executable_t exec,
                               hsa_executable_symbol_t symbol,
                               void *data),
      void *data) override;

  hsa_status_t IterateLoadedCodeObjects(
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      hsa_loaded_code_object_t loaded_code_object,
      void *data),
    void *data) override;

  size_t GetNumSegmentDescriptors() override;

  size_t QuerySegmentDescriptors(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t total_num_segment_descriptors,
    size_t first_empty_segment_descriptor) override;

  uint64_t FindHostAddress(uint64_t device_address) override;

  void EnableReadOnlyMode();
  void DisableReadOnlyMode();

  void Print(std::ostream& out) override;
  bool PrintToFile(const std::string& filename) override;

  Context* context() { return context_; }
  size_t id() { return id_; }

private:
  ExecutableImpl(const ExecutableImpl &e);
  ExecutableImpl& operator=(const ExecutableImpl &e);

  std::unique_ptr<amd::hsa::code::AmdHsaCode> code;

  Symbol* GetSymbolInternal(
    const char *symbol_name,
    const hsa_agent_t *agent);

  hsa_status_t LoadSegments(hsa_agent_t agent, const code::AmdHsaCode *c,
                            uint32_t majorVersion);
  hsa_status_t LoadSegmentsV1(hsa_agent_t agent, const code::AmdHsaCode *c);
  hsa_status_t LoadSegmentsV2(hsa_agent_t agent, const code::AmdHsaCode *c);
  hsa_status_t LoadSegmentV1(hsa_agent_t agent, const code::Segment *s);
  hsa_status_t LoadSegmentV2(const code::Segment *data_segment,
                             loader::Segment *load_segment);

  hsa_status_t LoadSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
  hsa_status_t LoadDefinitionSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);
  hsa_status_t LoadDeclarationSymbol(hsa_agent_t agent, amd::hsa::code::Symbol* sym, uint32_t majorVersion);

  hsa_status_t ApplyRelocations(hsa_agent_t agent, amd::hsa::code::AmdHsaCode *c);
  hsa_status_t ApplyStaticRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec);
  hsa_status_t ApplyStaticRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel);
  hsa_status_t ApplyDynamicRelocationSection(hsa_agent_t agent, amd::hsa::code::RelocationSection* sec);
  hsa_status_t ApplyDynamicRelocation(hsa_agent_t agent, amd::hsa::code::Relocation *rel);

  Segment* VirtualAddressSegment(uint64_t vaddr);
  uint64_t SymbolAddress(hsa_agent_t agent, amd::hsa::code::Symbol* sym);
  uint64_t SymbolAddress(hsa_agent_t agent, amd::elf::Symbol* sym);
  Segment* SymbolSegment(hsa_agent_t agent, amd::hsa::code::Symbol* sym);
  Segment* SectionSegment(hsa_agent_t agent, amd::hsa::code::Section* sec);

  amd::hsa::common::ReaderWriterLock rw_lock_;
  hsa_profile_t profile_;
  Context *context_;
  std::unique_ptr<Context> unique_context_;
  Logger logger_;
  const size_t id_;
  hsa_default_float_rounding_mode_t default_float_rounding_mode_;
  hsa_executable_state_t state_;

  ProgramSymbolMap program_symbols_;
  AgentSymbolMap agent_symbols_;
  std::vector<ExecutableObject*> objects;
  Segment *program_allocation_segment;
  std::vector<LoadedCodeObjectImpl*> loaded_code_objects;
};

class AmdHsaCodeLoader : public Loader {
private:
  Context* context;
  std::vector<Executable*> executables;
  amd::hsa::common::ReaderWriterLock rw_lock_;

public:
  AmdHsaCodeLoader(Context* context_)
    : context(context_) { assert(context); }

  Context* GetContext() const override { return context; }

  Executable* CreateExecutable(
      hsa_profile_t profile,
      const char *options,
      hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) override;

  Executable* CreateExecutable(
      std::unique_ptr<Context> isolated_context,
      hsa_profile_t profile,
      const char *options,
      hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) override;

  hsa_status_t FreezeExecutable(Executable *executable, const char *options) override;
  void DestroyExecutable(Executable *executable) override;

  hsa_status_t IterateExecutables(
    hsa_status_t (*callback)(
      hsa_executable_t executable,
      void *data),
    void *data) override;

  hsa_status_t QuerySegmentDescriptors(
    hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
    size_t *num_segment_descriptors) override;

  hsa_executable_t FindExecutable(uint64_t device_address) override;

  uint64_t FindHostAddress(uint64_t device_address) override;

  void PrintHelp(std::ostream& out) override;

  void EnableReadOnlyMode();
  void DisableReadOnlyMode();
};

} // namespace loader
} // namespace hsa
} // namespace amd
} // namespace rocr

#endif // HSA_RUNTIME_CORE_LOADER_EXECUTABLE_HPP_


================================================
FILE: runtime/hsa-runtime/pcs/hsa_ven_amd_pc_sampling.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "pcs_runtime.h"
#include "core/inc/agent.h"
#include "core/inc/amd_gpu_agent.h"
#include "core/inc/exceptions.h"

namespace rocr {
namespace AMD {
hsa_status_t handleException();

template <class T> static __forceinline T handleExceptionT() {
  handleException();
  abort();
  return T();
}
}  // namespace AMD

#define IS_OPEN()                                                                                  \
  do {                                                                                             \
    if (!core::Runtime::runtime_singleton_->IsOpen()) return HSA_STATUS_ERROR_NOT_INITIALIZED;     \
  } while (false)

template <class T> static __forceinline bool IsValid(T* ptr) {
  return (ptr == NULL) ? NULL : ptr->IsValid();
}

#define TRY try {
#define CATCH                                                                                      \
  }                                                                                                \
  catch (...) {                                                                                    \
    return AMD::handleException();                                                                 \
  }
#define CATCHRET(RETURN_TYPE)                                                                      \
  }                                                                                                \
  catch (...) {                                                                                    \
    return AMD::handleExceptionT<RETURN_TYPE>();                                                   \
  }

namespace pcs {

hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
    hsa_agent_t hsa_agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
    void* callback_data) {
  TRY;
  IS_OPEN();

  core::Agent* agent = core::Agent::Convert(hsa_agent);
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  return PcsRuntime::instance()->PcSamplingIterateConfig(agent, configuration_callback,
                                                         callback_data);
  CATCH;
}

hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t hsa_agent, hsa_ven_amd_pcs_method_kind_t method,
                                    hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
                                    size_t buffer_size,
                                    hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb,
                                    void* client_cb_data, hsa_ven_amd_pcs_t* handle) {
  TRY;
  IS_OPEN();
  core::Agent* agent = core::Agent::Convert(hsa_agent);
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  return PcsRuntime::instance()->PcSamplingCreate(
      agent, method, units, interval, latency, buffer_size, data_ready_cb, client_cb_data, handle);
  CATCH;
}

hsa_status_t hsa_ven_amd_pcs_create_from_id(uint32_t pcs_id, hsa_agent_t hsa_agent,
                                            hsa_ven_amd_pcs_method_kind_t method,
                                            hsa_ven_amd_pcs_units_t units, size_t interval,
                                            size_t latency, size_t buffer_size,
                                            hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb,
                                            void* client_cb_data, hsa_ven_amd_pcs_t* handle) {
  TRY;
  IS_OPEN();
  core::Agent* agent = core::Agent::Convert(hsa_agent);
  if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
    return HSA_STATUS_ERROR_INVALID_AGENT;

  return PcsRuntime::instance()->PcSamplingCreateFromId(pcs_id, agent, method, units, interval,
                                                        latency, buffer_size, data_ready_cb,
                                                        client_cb_data, handle);
  CATCH;
}

hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t handle) {
  TRY;
  return PcsRuntime::instance()->PcSamplingDestroy(handle);
  CATCH;
}

hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t handle) {
  TRY;
  return PcsRuntime::instance()->PcSamplingStart(handle);
  CATCH;
}

hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t handle) {
  TRY;
  return PcsRuntime::instance()->PcSamplingStop(handle);
  CATCH;
}

hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t handle) {
  TRY;
  return PcsRuntime::instance()->PcSamplingFlush(handle);
  CATCH;
}

void LoadPcSampling(core::PcSamplingExtTableInternal* pcs_api) {
  pcs_api->hsa_ven_amd_pcs_iterate_configuration_fn = hsa_ven_amd_pcs_iterate_configuration;
  pcs_api->hsa_ven_amd_pcs_create_fn = hsa_ven_amd_pcs_create;
  pcs_api->hsa_ven_amd_pcs_create_from_id_fn = hsa_ven_amd_pcs_create_from_id;
  pcs_api->hsa_ven_amd_pcs_destroy_fn = hsa_ven_amd_pcs_destroy;
  pcs_api->hsa_ven_amd_pcs_start_fn = hsa_ven_amd_pcs_start;
  pcs_api->hsa_ven_amd_pcs_stop_fn = hsa_ven_amd_pcs_stop;
  pcs_api->hsa_ven_amd_pcs_flush_fn = hsa_ven_amd_pcs_flush;
}

}  //  namespace pcs
}  //  namespace rocr


================================================
FILE: runtime/hsa-runtime/pcs/inc/hsa_ven_amd_pc_sampling_impl.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_VEN_AMD_PC_SAMPLING_IMPL_H
#define HSA_VEN_AMD_PC_SAMPLING_IMPL_H

#include "inc/hsa.h"
#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ven_amd_pc_sampling.h"
#include "core/inc/hsa_ext_interface.h"

//---------------------------------------------------------------------------//
//  APIs that implement PC Sampling functionality
//---------------------------------------------------------------------------//

namespace rocr {
namespace pcs {

hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
    hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
    void* callback_data);

hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
                                    hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
                                    size_t buffer_size,
                                    hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
                                    void* client_callback_data, hsa_ven_amd_pcs_t* pc_sampling);

hsa_status_t hsa_ven_amd_pcs_create_from_id(
    uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
    hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
    hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
    hsa_ven_amd_pcs_t* pc_sampling);

hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);

hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);

hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);

hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);

// Update Api table with func pointers that implement functionality
void LoadPcSampling(core::PcSamplingExtTableInternal* pcs_api);

// Release resources acquired by Image implementation
void ReleasePcSamplingRsrcs();

}  // namespace pcs
}  // namespace rocr

#endif  //  HSA_VEN_AMD_PC_SAMPLING_IMPL_H


================================================
FILE: runtime/hsa-runtime/pcs/pcs_runtime.cpp
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#include "pcs_runtime.h"

#include <assert.h>
#include <mutex>

#include "core/inc/runtime.h"

#include "core/inc/amd_gpu_agent.h"

namespace rocr {
namespace pcs {

#define IS_BAD_PTR(ptr)                                          \
do {                                                           \
  if ((ptr) == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \
} while (false)


PcsRuntime* PcsRuntime::instance() {
  PcsRuntime* instance = get_instance().load(std::memory_order_acquire);
  if (instance == NULL) {
    // Protect the initialization from multi threaded access.
    std::lock_guard<std::mutex> lock(instance_mutex());

    // Make sure we are not initializing it twice.
    instance = get_instance().load(std::memory_order_relaxed);
    if (instance != NULL) {
      return instance;
    }

    instance = CreateSingleton();
    if (instance == NULL) {
      return NULL;
    }
  }

  return instance;
}

PcsRuntime* PcsRuntime::CreateSingleton() {
  PcsRuntime* instance = new PcsRuntime();

  get_instance().store(instance, std::memory_order_release);
  return instance;
}

void PcsRuntime::DestroySingleton() {
  PcsRuntime* instance = get_instance().load(std::memory_order_acquire);
  if (instance == NULL) {
    return;
  }

  get_instance().store(NULL, std::memory_order_release);
  delete instance;
}

void ReleasePcSamplingRsrcs() { PcsRuntime::DestroySingleton(); }

bool PcsRuntime::SessionsActive() const {
  return pc_sampling_.size() > 0;
}

PcsRuntime::PcSamplingSession::PcSamplingSession(
    core::Agent* _agent, hsa_ven_amd_pcs_method_kind_t method, hsa_ven_amd_pcs_units_t units,
    size_t interval, size_t latency, size_t buffer_size,
    hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data)
    : agent(_agent), thunkId_(0), active_(false), valid_(true), sample_size_(0) {
  switch (method) {
    case HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1:
      sample_size_ = sizeof(perf_sample_hosttrap_v1_t);
      break;
    case HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1:
      sample_size_ = sizeof(perf_sample_snapshot_v1_t);
      break;
    default:
      valid_ = false;
      return;
  }

  if (!interval || !buffer_size || (buffer_size % (2 * sample_size_))) {
    valid_ = false;
    return;
  }

  csd.method = method;
  csd.units = units;
  csd.interval = interval;
  csd.latency = latency;
  csd.buffer_size = buffer_size;
  csd.data_ready_callback = data_ready_callback;
  csd.client_callback_data = client_callback_data;

  data_rdy.buf1 = nullptr;
  data_rdy.buf1_sz = 0;
  data_rdy.buf2 = nullptr;
  data_rdy.buf2_sz = 0;
}

void PcsRuntime::PcSamplingSession::GetHsaKmtSamplingInfo(HsaPcSamplingInfo* sampleInfo) {
  sampleInfo->value_min = 0;
  sampleInfo->value_max = 0;
  sampleInfo->flags = 0;
  sampleInfo->value = csd.interval;

  switch (csd.method) {
    case HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1:
      sampleInfo->method = HSA_PC_SAMPLING_METHOD_KIND_HOSTTRAP_V1;
      break;
    case HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1:
      sampleInfo->method = HSA_PC_SAMPLING_METHOD_KIND_STOCHASTIC_V1;
      break;
  }

  switch (csd.units) {
    case HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS:
      sampleInfo->units = HSA_PC_SAMPLING_UNIT_INTERVAL_MICROSECONDS;
      break;
    case HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES:
      sampleInfo->units = HSA_PC_SAMPLING_UNIT_INTERVAL_CYCLES;
      break;
    case HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS:
      sampleInfo->units = HSA_PC_SAMPLING_UNIT_INTERVAL_INSTRUCTIONS;
      break;
  }
}

hsa_status_t PcSamplingDataCopyCallback(void* _session, size_t bytes_to_copy, void* destination) {
  assert(_session);
  assert(destination);

  PcsRuntime::PcSamplingSession* session =
      reinterpret_cast<PcsRuntime::PcSamplingSession*>(_session);

  return session->DataCopyCallback(reinterpret_cast<uint8_t*>(destination), bytes_to_copy);
}

hsa_status_t PcsRuntime::PcSamplingSession::DataCopyCallback(uint8_t* buffer,
                                                             size_t bytes_to_copy) {
  if (bytes_to_copy != (data_rdy.buf1_sz + data_rdy.buf2_sz)) return HSA_STATUS_ERROR_EXCEPTION;

  if (data_rdy.buf1_sz) memcpy(buffer, data_rdy.buf1, data_rdy.buf1_sz);
  if (data_rdy.buf2_sz) memcpy(buffer + data_rdy.buf1_sz, data_rdy.buf2, data_rdy.buf2_sz);

  return HSA_STATUS_SUCCESS;
}

hsa_status_t PcsRuntime::PcSamplingSession::HandleSampleData(uint8_t* buf1, size_t buf1_sz,
                                                             uint8_t* buf2, size_t buf2_sz,
                                                             size_t lost_sample_count) {
  data_rdy.buf1 = buf1;
  data_rdy.buf1_sz = buf1_sz;
  data_rdy.buf2 = buf2;
  data_rdy.buf2_sz = buf2_sz;

  AMD::GpuAgent* gpuAgent = static_cast<AMD::GpuAgent*>(agent);

  switch (csd.method) {
    case HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1: {
      size_t buf_samples = buf1_sz / sizeof(perf_sample_hosttrap_v1_t);
      perf_sample_hosttrap_v1_t* samples = reinterpret_cast<perf_sample_hosttrap_v1_t*>(buf1);
      while (buf_samples--) {
        samples->timestamp = gpuAgent->TranslateTime(samples->timestamp);
        samples++;
      }

      buf_samples = buf2_sz / sizeof(perf_sample_hosttrap_v1_t);
      samples = reinterpret_cast<perf_sample_hosttrap_v1_t*>(buf2);
      while (buf_samples--) {
        samples->timestamp = gpuAgent->TranslateTime(samples->timestamp);
        samples++;
      }
    }
    break;
    case HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1: {
      size_t buf_samples = buf1_sz / sizeof(perf_sample_snapshot_v1_t);
      perf_sample_snapshot_v1_t* samples = reinterpret_cast<perf_sample_snapshot_v1_t*>(buf1);
      while (buf_samples--) {
        samples->timestamp = gpuAgent->TranslateTime(samples->timestamp);
        samples++;
      }

      buf_samples = buf2_sz / sizeof(perf_sample_snapshot_v1_t);
      samples = reinterpret_cast<perf_sample_snapshot_v1_t*>(buf2);
      while (buf_samples--) {
        samples->timestamp = gpuAgent->TranslateTime(samples->timestamp);
        samples++;
      }
    }
    break;
  }

  csd.data_ready_callback(csd.client_callback_data, buf1_sz + buf2_sz, lost_sample_count,
                          &PcSamplingDataCopyCallback,
                          /* hsa_callback_data*/ this);
  return HSA_STATUS_SUCCESS;
}

hsa_status_t PcsRuntime::PcSamplingIterateConfig(
    core::Agent* agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
    void* callback_data) {
  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(agent);
  return gpu_agent->PcSamplingIterateConfig(configuration_callback, callback_data);
}

hsa_status_t PcsRuntime::PcSamplingCreate(core::Agent* agent, hsa_ven_amd_pcs_method_kind_t method,
                                          hsa_ven_amd_pcs_units_t units, size_t interval,
                                          size_t latency, size_t buffer_size,
                                          hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb,
                                          void* client_cb_data, hsa_ven_amd_pcs_t* handle) {

  IS_BAD_PTR(handle);
  IS_BAD_PTR(data_ready_cb);

  return PcSamplingCreateInternal(
      agent, method, units, interval, latency, buffer_size, data_ready_cb, client_cb_data, handle,
      [](core::Agent* agent_, PcSamplingSession& session_) {
        return static_cast<AMD::GpuAgentInt*>(agent_)->PcSamplingCreate(session_);
      });
}

hsa_status_t PcsRuntime::PcSamplingCreateFromId(uint32_t ioctl_pcs_id, core::Agent* agent,
                                                hsa_ven_amd_pcs_method_kind_t method,
                                                hsa_ven_amd_pcs_units_t units, size_t interval,
                                                size_t latency, size_t buffer_size,
                                                hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb,
                                                void* client_cb_data, hsa_ven_amd_pcs_t* handle) {
  IS_BAD_PTR(handle);
  IS_BAD_PTR(data_ready_cb);

  return PcSamplingCreateInternal(
      agent, method, units, interval, latency, buffer_size, data_ready_cb, client_cb_data, handle,
      [&](core::Agent* agent_, PcSamplingSession& session_) {
        return static_cast<AMD::GpuAgentInt*>(agent_)->PcSamplingCreateFromId(ioctl_pcs_id,
                                                                              session_);
      });
}

hsa_status_t PcsRuntime::PcSamplingCreateInternal(
    core::Agent* agent, hsa_ven_amd_pcs_method_kind_t method, hsa_ven_amd_pcs_units_t units,
    size_t interval, size_t latency, size_t buffer_size,
    hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb, void* client_cb_data,
    hsa_ven_amd_pcs_t* handle, agent_pcs_create_fn_t agent_pcs_create_fn) {
  ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);

  handle->handle = ++pc_sampling_id_;
  // create a new PcSamplingSession(agent, method, units, interval, latency, buffer_size,
  // data_ready_cb, client_cb_data) reference and insert into pc_sampling_
  pc_sampling_.emplace(std::piecewise_construct, std::forward_as_tuple(handle->handle),
                       std::forward_as_tuple(agent, method, units, interval, latency, buffer_size,
                                             data_ready_cb, client_cb_data));

  if (!pc_sampling_[handle->handle].isValid()) {
      pc_sampling_.erase(handle->handle);
      return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_status_t ret = agent_pcs_create_fn(agent, pc_sampling_[handle->handle]);
  if (ret != HSA_STATUS_SUCCESS) {
    pc_sampling_.erase(handle->handle);
    return ret;
  }
  return HSA_STATUS_SUCCESS;
}

hsa_status_t PcsRuntime::PcSamplingDestroy(hsa_ven_amd_pcs_t handle) {
  ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
  auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
  if (pcSamplingSessionIt == pc_sampling_.end()) {
    debug_warning(false && "Cannot find PcSampling session");
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);

  hsa_status_t ret = gpu_agent->PcSamplingDestroy(pcSamplingSessionIt->second);
  pc_sampling_.erase(pcSamplingSessionIt);
  return ret;
}

hsa_status_t PcsRuntime::PcSamplingStart(hsa_ven_amd_pcs_t handle) {
  ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
  auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
  if (pcSamplingSessionIt == pc_sampling_.end()) {
    debug_warning(false && "Cannot find PcSampling session");
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);

  return gpu_agent->PcSamplingStart(pcSamplingSessionIt->second);
}

hsa_status_t PcsRuntime::PcSamplingStop(hsa_ven_amd_pcs_t handle) {
  ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
  auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
  if (pcSamplingSessionIt == pc_sampling_.end()) {
    debug_warning(false && "Cannot find PcSampling session");
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);

  return gpu_agent->PcSamplingStop(pcSamplingSessionIt->second);
}

hsa_status_t PcsRuntime::PcSamplingFlush(hsa_ven_amd_pcs_t handle) {
  ScopedAcquire<KernelMutex> lock(&pc_sampling_lock_);
  auto pcSamplingSessionIt = pc_sampling_.find(reinterpret_cast<uint64_t>(handle.handle));
  if (pcSamplingSessionIt == pc_sampling_.end()) {
    debug_warning(false && "Cannot find PcSampling session");
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }
  AMD::GpuAgentInt* gpu_agent = static_cast<AMD::GpuAgentInt*>(pcSamplingSessionIt->second.agent);

  return gpu_agent->PcSamplingFlush(pcSamplingSessionIt->second);
}

}  // namespace pcs
}  // namespace rocr


================================================
FILE: runtime/hsa-runtime/pcs/pcs_runtime.h
================================================
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//                 AMD Research and AMD HSA Software Development
//
//                 Advanced Micro Devices, Inc.
//
//                 www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//  - Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimers.
//  - Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimers in
//    the documentation and/or other materials provided with the distribution.
//  - Neither the names of Advanced Micro Devices, Inc,
//    nor the names of its contributors may be used to endorse or promote
//    products derived from this Software without specific prior written
//    permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////

#ifndef HSA_RUNTIME_PCS_RUNTIME_H
#define HSA_RUNTIME_PCS_RUNTIME_H

#include <atomic>
#include <map>
#include <mutex>

#include "hsakmt/hsakmt.h"

#include "hsa_ven_amd_pc_sampling.h"
#include "core/inc/agent.h"
#include "core/inc/exceptions.h"


namespace rocr {
namespace pcs {

class PcsRuntime {
 public:
  PcsRuntime() : pc_sampling_id_(0) {}
  ~PcsRuntime() {}

  /// @brief Getter for the PcsRuntime singleton object.
  static PcsRuntime* instance();

  bool SessionsActive() const;

  /// @brief Destroy singleton object.
  static void DestroySingleton();

  class PcSamplingSession {
   public:
    PcSamplingSession() : agent(NULL), thunkId_(0), active_(false){};
    PcSamplingSession(core::Agent* agent, hsa_ven_amd_pcs_method_kind_t method,
                      hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
                      size_t buffer_size, hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
                      void* client_callback_data);
    ~PcSamplingSession(){};

    bool isValid() const { return valid_; }
    size_t buffer_size() const { return csd.buffer_size; }
    hsa_ven_amd_pcs_method_kind_t method() const { return csd.method; }
    size_t latency() const { return csd.latency; }
    size_t sample_size() const { return sample_size_; }

    void GetHsaKmtSamplingInfo(HsaPcSamplingInfo* sampleInfo);
    hsa_status_t HandleSampleData(uint8_t* buf1, size_t buf1_sz, uint8_t* buf2, size_t buf2_sz,
                                  size_t lost_sample_count);
    hsa_status_t DataCopyCallback(uint8_t* buffer, size_t buffer_size);

    core::Agent* agent;
    void SetThunkId(HsaPcSamplingTraceId thunkId) { thunkId_ = thunkId; }
    HsaPcSamplingTraceId ThunkId() { return thunkId_; }
    bool isActive() { return active_; }
    void start() { active_ = true; }
    void stop() { active_ = false; }

   private:
    HsaPcSamplingTraceId thunkId_;

    bool active_;  // Set to true when the session is started
    bool valid_;   // Whether configuration parameters are valid
    size_t sample_size_;

    struct client_session_data_t {
      hsa_ven_amd_pcs_method_kind_t method;
      hsa_ven_amd_pcs_units_t units;
      size_t interval;
      size_t latency;
      size_t buffer_size;
      hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback;
      void* client_callback_data;
    };
    struct client_session_data_t csd;

    struct data_ready_info_t {
      uint8_t* buf1;
      size_t buf1_sz;
      uint8_t* buf2;
      size_t buf2_sz;
    };
    struct data_ready_info_t data_rdy;
  };  // class PcSamplingSession

  hsa_status_t PcSamplingIterateConfig(
      core::Agent* agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
      void* callback_data);

  hsa_status_t PcSamplingCreate(core::Agent* agent, hsa_ven_amd_pcs_method_kind_t method,
                                hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
                                size_t buffer_size,
                                hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb,
                                void* client_cb_data, hsa_ven_amd_pcs_t* handle);


  hsa_status_t PcSamplingCreateFromId(uint32_t ioctl_pcs_id, core::Agent* agent,
                                      hsa_ven_amd_pcs_method_kind_t method,
                                      hsa_ven_amd_pcs_units_t units, size_t interval,
                                      size_t latency, size_t buffer_size,
                                      hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb,
                                      void* client_cb_data, hsa_ven_amd_pcs_t* handle);

  hsa_status_t PcSamplingDestroy(hsa_ven_amd_pcs_t handle);
  hsa_status_t PcSamplingStart(hsa_ven_amd_pcs_t handle);
  hsa_status_t PcSamplingStop(hsa_ven_amd_pcs_t handle);
  hsa_status_t PcSamplingFlush(hsa_ven_amd_pcs_t handle);

 private:
  /// @brief Initialize singleton object, must be called once.
  static PcsRuntime* CreateSingleton();

  /// Pointer to singleton object.
  static __forceinline std::atomic<PcsRuntime*>& get_instance() {
    // This allocation is meant to last until the last thread has exited.
    // It is intentionally not freed.
    static std::atomic<PcsRuntime*>* instance_ = new std::atomic<PcsRuntime*>();
    return *instance_;
  }
  static __forceinline std::mutex& instance_mutex() {
    // This allocation is meant to last until the last thread has exited.
    // It is intentionally not freed.
   static std::mutex* instance_mutex_ = new std::mutex();
   return *instance_mutex_;
}
  // Map of pc sampling sessions indexed by hsa_ven_amd_pcs_t handle
  std::map<uint64_t, PcSamplingSession> pc_sampling_;
  KernelMutex pc_sampling_lock_;
  uint64_t pc_sampling_id_;

  DISALLOW_COPY_AND_ASSIGN(PcsRuntime);

  using agent_pcs_create_fn_t = std::function<hsa_status_t(core::Agent*, PcSamplingSession&)>;
  hsa_status_t PcSamplingCreateInternal(core::Agent* agent, hsa_ven_amd_pcs_method_kind_t method,
                                        hsa_ven_amd_pcs_units_t units, size_t interval,
                                        size_t latency, size_t buffer_size,
                                        hsa_ven_amd_pcs_data_ready_callback_t data_ready_cb,
                                        void* client_cb_data, hsa_ven_amd_pcs_t* handle,
                                        agent_pcs_create_fn_t agent_pcs_create_fn);
};

}  // namespace pcs
}  // namespace rocr
#endif  // HSA_RUNTIME_PCS_RUNTIME_H


================================================
FILE: runtime/hsa-runtime-tools/CMakeLists.txt
================================================
cmake_minimum_required ( VERSION 3.5.0 )

# Set ext runtime module name and project name.
set ( TOOLS_NAME "hsa-runtime-tools" )
set ( TOOLS_TARGET "${TOOLS_NAME}64" )
set ( TOOLS_LIBRARY "lib${TOOLS_TARGET}" )
project ( ${TOOLS_TARGET} )

# Optionally, build with ccache.
set(ROCM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
if (ROCM_CCACHE_BUILD)
  find_program(CCACHE_PROGRAM ccache)
  if (CCACHE_PROGRAM)
    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PROGRAM})
  else()
    message(WARNING "Unable to find ccache. Falling back to real compiler")
  endif() # if (CCACHE_PROGRAM)
endif() # if (ROCM_CCACHE_BUILD)

## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake_modules" )
include ( utils )

## Compiler preproc definitions.
#add_definitions ( -D__linux__ )
add_definitions ( -DUNIX_OS )
add_definitions ( -DLINUX )
add_definitions ( -D__AMD64__ )
add_definitions ( -D__x86_64__ )
add_definitions ( -DAMD_INTERNAL_BUILD )
add_definitions ( -DLITTLEENDIAN_CPU=1 )
add_definitions ( -D HSA_DEPRECATED= )

## Get the package version. The defaults to 1.0.0.
get_version( "1.1.9")
set(SO_MAJOR 1)
set(SO_MINOR 1)
if ( ${ROCM_PATCH_VERSION} )
    set ( SO_PATCH ${ROCM_PATCH_VERSION})
    set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
else ()
    set(SO_PATCH 9)
endif ()

set( SO_VERSION_STRING "${SO_MAJOR}.${SO_MINOR}.${SO_PATCH}" )
set( PACKAGE_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}.${VERSION_COMMIT_COUNT}-${VERSION_JOB}-${VERSION_HASH}" )

## Find the hsakmt library and include files
get_include_path( HSAKMT_INC_PATH "libhsakmt include path" NAMES "hsakmt.h" "libhsakmt/hsakmt.h" HINTS "${CMAKE_BINARY_DIR}/../../include" "${CMAKE_CURRENT_SOURCE_DIR}/../../../../libhsakmt/include" PATHS "/opt/rocm/include")
get_library_path( HSAKMT_LIB_PATH "libhsakmt library path" NAMES "libhsakmt.so" HINTS "${CMAKE_BINARY_DIR}/../../lib" "${CMAKE_BINARY_DIR}/../roct" PATHS "/opt/rocm/lib")
include_directories( ${HSAKMT_INC_PATH} )
link_directories( ${HSAKMT_LIB_PATH} )

## Find the hsa-runtime and include files
get_include_path( HSA_INC_PATH "ROCr include path" NAMES "hsa.h" "hsa/hsa.h" HINTS "${CMAKE_BINARY_DIR}/../../include" "${CMAKE_CURRENT_SOURCE_DIR}/../hsa-runtime/inc" PATHS "/opt/rocm/include")
get_library_path( HSA_LIB_PATH "ROCr library path" NAMES "libhsa-runtime64.so" HINTS "${CMAKE_BINARY_DIR}/../../lib" "${CMAKE_BINARY_DIR}/../hsa-core" "${CMAKE_CURRENT_SOURCE_DIR}/../hsa-runtime/build" PATHS "/opt/rocm/lib")
include_directories( ${HSA_INC_PATH} )
link_directories( ${HSA_LIB_PATH} )

## External dependencies
get_include_path( REG_INCLUDE "ASIC register directory" NAMES "si_id.h" HINTS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../p4/driver/drivers/inc/asic_reg" "${HSA_CLOSED_SOURCE_DIR}/drivers/inc/asic_reg" "${CMAKE_CURRENT_SOURCE_DIR}/../../../../../drivers/inc/asic_reg" )

## Find self
if( "${TOOLS_SOURCE_DIR}" STREQUAL "" )
    get_include_path( TOOLS_SOURCE_FILE null NAMES "inc/amd_hsa_tools_interfaces.h" HINTS "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../../tools/" )
    get_filename_component( TOOLS_SOURCE_DIR "${TOOLS_SOURCE_FILE}/.." ABSOLUTE )
    unset( TOOLS_SOURCE_FILE CACHE )
endif()
set( TOOLS_SOURCE_DIR ${TOOLS_SOURCE_DIR} CACHE PATH "Tools lib source dir" FORCE )

get_filename_component( OPEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.." ABSOLUTE CACHE )
set( OPEN_SOURCE_DIR ${OPEN_SOURCE_DIR} CACHE PATH "Open source root dir" FORCE )

## Set RUNPATH - ../../lib covers use of the legacy symlink in /hsa/lib/
set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../../lib;$ORIGIN/../../lib64;$ORIGIN/../lib64")

## ------------------------- Linux Compiler and Linker options -------------------------
set ( CMAKE_CXX_FLAGS "-std=c++11 ")

set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type -Werror -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=sign-compare -Wno-error=enum-compare -Wno-sign-compare -Wno-write-strings -Wno-deprecated-declarations -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -fPIC" )

if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64  -msse -msse2" )
elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" )
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
endif ()

if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug )
    set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
endif ()

set ( DRVDEF "${TOOLS_SOURCE_DIR}/make/hsatools.so.def" )

set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl,--version-script=${DRVDEF} -Wl,--enable-new-dtags" )

## Library path(s).
include_directories ( ${REG_INCLUDE} )
include_directories ( ${TOOLS_SOURCE_DIR} )
include_directories ( ${TOOLS_SOURCE_DIR}/.. )
include_directories ( ${OPEN_SOURCE_DIR}/hsa-runtime )
include_directories ( ${OPEN_SOURCE_DIR}/hsa-runtime/inc )
include_directories ( ${OPEN_SOURCE_DIR}/hsa-runtime/core/inc )
include_directories ( ${TOOLS_SOURCE_DIR}/inc )
include_directories ( ${TOOLS_SOURCE_DIR}/commandwriter )
include_directories ( ${TOOLS_SOURCE_DIR}/commandwriter/include/si )
include_directories ( ${TOOLS_SOURCE_DIR}/common )
include_directories ( ${TOOLS_SOURCE_DIR}/debugger )
include_directories ( ${TOOLS_SOURCE_DIR}/intercept )
include_directories ( ${TOOLS_SOURCE_DIR}/profiler )
include_directories ( ${TOOLS_SOURCE_DIR}/threadtrace )
include_directories ( ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000 )
include_directories ( ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/ci )
include_directories ( ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/si )
include_directories ( ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gen )
include_directories ( ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx8 )
include_directories ( ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx81 )
include_directories ( ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx9 )

set ( CORE_SRC   ${OPEN_SOURCE_DIR}/hsa-runtime/core/common/shared.cpp
                 ${OPEN_SOURCE_DIR}/hsa-runtime/core/common/hsa_table_interface.cpp )

set ( CMDWRITER_SRC   ${TOOLS_SOURCE_DIR}/commandwriter/aql_hw_cmdwriter.cpp
                      ${TOOLS_SOURCE_DIR}/commandwriter/ci_aql_common.cpp
                      ${TOOLS_SOURCE_DIR}/commandwriter/gfx9_cmdwriter.cpp
                      ${TOOLS_SOURCE_DIR}/commandwriter/gfx9_factory.cpp
                      ${TOOLS_SOURCE_DIR}/commandwriter/pre_gfx9_factory.cpp
                      ${TOOLS_SOURCE_DIR}/commandwriter/cmdwriter.cpp )

set ( COMMON_SRC      ${TOOLS_SOURCE_DIR}/common/amd_asic_type.cpp
                      ${TOOLS_SOURCE_DIR}/common/amd_tools_interface.cpp )

set ( DEBUGGER_SRC    ${TOOLS_SOURCE_DIR}/debugger/cwsr_trapstring_perf.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/gpu_trap_event.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/hsa_ext_debugger.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/kfd_event.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/pm4_queue.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/runtime_trapstring.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/shader_event.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/trap_finalizer.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/trap_handler.cpp
                      ${TOOLS_SOURCE_DIR}/debugger/trap_manager.cpp )

set ( INTERCEPT_SRC   ${TOOLS_SOURCE_DIR}/intercept/amd_sw_aql_command_processor.cpp
                      ${TOOLS_SOURCE_DIR}/intercept/hsa_amd_tools.cpp
                      ${TOOLS_SOURCE_DIR}/intercept/aql_pm4_factory.cpp
                      ${TOOLS_SOURCE_DIR}/intercept/aql_proxy_queue.cpp
                      ${TOOLS_SOURCE_DIR}/intercept/profiler.cpp )

set ( PROFILER_SRC    ${TOOLS_SOURCE_DIR}/profiler/gpu_countergroup.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/gpu_counter.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/var_data.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/info_set.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/parameter_set.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/ci_blockinfo.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/ci_pmu.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/vi_blockinfo.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/vi_pmu.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/ai_blockinfo.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/ai_pmu.cpp
                      ${TOOLS_SOURCE_DIR}/profiler/hsa_ext_profiler.cpp )

set ( THREAD_TRACE_SRC    ${TOOLS_SOURCE_DIR}/threadtrace/thread_trace.cpp
                          ${TOOLS_SOURCE_DIR}/threadtrace/gfx9_factory.cpp
                          ${TOOLS_SOURCE_DIR}/threadtrace/gfx9_thread_trace.cpp
                          ${TOOLS_SOURCE_DIR}/threadtrace/pre_gfx9_factory.cpp
                          ${TOOLS_SOURCE_DIR}/threadtrace/pre_gfx9_thread_trace.cpp )

set ( SP3_R1000_SRC   ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-asic.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-dispatch.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-eval.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-gc.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-int.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-lib.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-main.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-merge-shaders.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-native.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/sp3-vm.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gen/sp3-parse.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gen/sp3-lex.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/ci/sp3-ci-asic.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/ci/sp3-ci-dis.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/ci/sp3-ci-gen.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/ci/sp3-ci-inst-info.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/ci/sp3-ci-regs.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/ci/sp3-ci-tables.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/si/sp3-si-asic.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/si/sp3-si-dis.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/si/sp3-si-gen.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/si/sp3-si-inst-info.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/si/sp3-si-regs.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/si/sp3-si-tables.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx81/sp3-gfx81-asic.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx81/sp3-gfx81-dis.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx81/sp3-gfx81-gen.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx81/sp3-gfx81-inst-info.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx81/sp3-gfx81-regs.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx8/sp3-gfx8-asic.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx8/sp3-gfx8-dis.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx8/sp3-gfx8-gen.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx8/sp3-gfx8-inst-info.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx8/sp3-gfx8-regs.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx8/sp3-gfx8-tables.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx9/sp3-gfx9-asic.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx9/sp3-gfx9-dis.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx9/sp3-gfx9-gen.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx9/sp3-gfx9-inst-info.c
                      ${TOOLS_SOURCE_DIR}/sp3/Chip/R1000/gfx9/sp3-gfx9-regs.c )

set ( UTIL_SRC   ${OPEN_SOURCE_DIR}/hsa-runtime/core/util/timer.cpp
                 ${OPEN_SOURCE_DIR}/hsa-runtime/core/util/small_heap.cpp
                 ${OPEN_SOURCE_DIR}/hsa-runtime/core/util/lnx/os_linux.cpp )


## This is the main shared library.
add_library ( ${TOOLS_TARGET} SHARED ${CORE_SRC}
                                     ${COMMON_SRC}
                                     ${CMD_WRITER_SRC}
                                     ${CMDWRITER_SRC}
                                     ${DEBUGGER_SRC}
                                     ${INTERCEPT_SRC}
                                     ${PROFILER_SRC}
                                     ${THREAD_TRACE_SRC}
                                     ${SP3_R1000_SRC}
                                     ${UTIL_SRC} )

## Set the VERSION and SOVERSION values
set_property ( TARGET ${TOOLS_TARGET} PROPERTY VERSION "${SO_VERSION_STRING}" )
set_property ( TARGET ${TOOLS_TARGET} PROPERTY SOVERSION "${SO_MAJOR}" )

## Add the required link libraries
target_link_libraries (
    ${TOOLS_TARGET}
    PRIVATE hsa-runtime64
    PRIVATE hsakmt
    c dl pthread rt
)

## If the build is Release, strip the target library
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
    add_custom_command ( TARGET ${TOOLS_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} $<TARGET_FILE_NAME:${TOOLS_TARGET}> )
endif ()

## Create symlinks for legacy packaging and install
add_custom_target ( hsa_tools_lib_link ALL WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E create_symlink ../hsa/lib/${TOOLS_LIBRARY}.so ${TOOLS_LIBRARY}-link.so )

## Set install information
install ( TARGETS ${TOOLS_TARGET} LIBRARY DESTINATION hsa/lib )
install ( DIRECTORY ${TOOLS_SOURCE_DIR}/inc/ DESTINATION hsa/include/hsa FILES_MATCHING PATTERN "*.h" )
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/${TOOLS_LIBRARY}-link.so DESTINATION lib PERMISSIONS OWNER_WRITE OWNER_READ RENAME ${TOOLS_LIBRARY}.so )


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and#or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

cmake_minimum_required ( VERSION 3.5.0 )

## Set the name and project name.
set ( PROJECT_STRING hsa-ext-rocr-dev )
project ( ${PROJECT_STRING} )

add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../hsa-ext-image" "../hsa-ext-image")

## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake_modules" )
include ( utils )

## Get the package version.
get_version ( "1.1.9" )
if ( ${ROCM_PATCH_VERSION} )
    set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
endif ()

set( PACKAGE_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}.${VERSION_COMMIT_COUNT}-${VERSION_JOB}-${VERSION_HASH}" )

## Packaging directives
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Package types to build")

set ( CPACK_PACKAGE_NAME ${PROJECT_NAME} )
set ( CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." )
set ( CPACK_PACKAGE_VERSION ${PACKAGE_VERSION_STRING} )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices, Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime extensions for ROCm platforms" )
set ( CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/description" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/copyright" )

# Debian package specific variables
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsakmt-roct, hsa-rocr-dev" )
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/ROCR-Runtime" )
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/postinst;${CMAKE_CURRENT_SOURCE_DIR}/prerm" )

# RPM package specific variables
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
set ( CPACK_RPM_PACKAGE_DEPENDS "hsakmt-roct, hsa-rocr-dev" )
set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rpm_postun" )

include ( CPack )


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/Old CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and#or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

cmake_minimum_required ( VERSION 3.5.0 )

## Set the name and project name.
set ( PROJECT_STRING hsa-ext-rocr-dev )
project ( ${PROJECT_STRING} )

## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake_modules" )
include ( utils )

## Get the package version.
get_version ( "1.2.0" )

set( PACKAGE_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}.${VERSION_COMMIT_COUNT}-${VERSION_JOB}-${VERSION_BUILD_ID}-${VERSION_HASH}" )

## Packaging directives
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Package types to build")

set ( PACKAGE_DIRECTORIES "hsa/lib")
add_custom_command ( OUTPUT ${PACKAGE_DIRECTORIES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E make_directory "hsa/lib" ) 
add_custom_target (create_dirs DEPENDS ${PACKAGE_DIRECTORIES} )

set ( TOOLS_NAME     "libhsa-runtime-tools64" )
set ( IMAGE_NAME     "libhsa-ext-image64" )

set ( TOOLS_LIBRARY_SOURCE     "${OUT_DIR}/lib/${TOOLS_NAME}.so*" )
set ( IMAGE_LIBRARY_SOURCE     "${OUT_DIR}/lib/${IMAGE_NAME}.so*" )

set ( TOOLS_LIBRARY_TARGET     "${CMAKE_CURRENT_BINARY_DIR}/hsa/lib/${TOOLS_NAME}.so" )
set ( IMAGE_LIBRARY_TARGET     "${CMAKE_CURRENT_BINARY_DIR}/hsa/lib/${IMAGE_NAME}.so" )

add_custom_command ( OUTPUT ${TOOLS_LIBRARY_TARGET} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                     COMMAND ${CMAKE_COMMAND} -E copy ${TOOLS_LIBRARY_SOURCE} "${CMAKE_CURRENT_BINARY_DIR}/hsa/lib/" )

add_custom_command ( OUTPUT ${IMAGE_LIBRARY_TARGET} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                     COMMAND ${CMAKE_COMMAND} -E copy ${IMAGE_LIBRARY_SOURCE} "${CMAKE_CURRENT_BINARY_DIR}/hsa/lib/" )

add_custom_target ( copy_targets ALL DEPENDS create_dirs
                                        ${TOOLS_LIBRARY_TARGET}
                                        ${IMAGE_LIBRARY_TARGET}
                                        )

install ( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/hsa/ DESTINATION hsa USE_SOURCE_PERMISSIONS )

set ( CPACK_PACKAGE_NAME ${PROJECT_NAME} )
set ( CPACK_PACKAGE_VENDOR "AMD" )
set ( CPACK_PACKAGE_VERSION ${PACKAGE_VERSION_STRING} )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime extensions for ROCm platforms" )
set ( CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/description" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/copyright" )

# Debian package specific variables
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/ROCR-Runtime" )
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/postinst;${CMAKE_CURRENT_SOURCE_DIR}/prerm" )

# RPM package specific variables
set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rpm_postun" )

include ( CPack )


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/copyright
================================================
The University of Illinois/NCSA
Open Source License (NCSA)

Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.

Developed by:

                AMD Research and AMD HSA Software Development

                Advanced Micro Devices, Inc.

                www.amd.com

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal with the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

 - Redistributions of source code must retain the above copyright notice,
   this list of conditions and the following disclaimers.
 - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimers in
   the documentation and/or other materials provided with the distribution.
 - Neither the names of Advanced Micro Devices, Inc,
   nor the names of its contributors may be used to endorse or promote
   products derived from this Software without specific prior written
   permission.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS WITH THE SOFTWARE.


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/description
================================================
This package includes the user-mode runtime necessary for host applications to launch compute kernels to available HSA and ROCm components. This version is consistent with the 1.0 Final HSA Runtime Programmer's Reference Manual and targets AMD AMD Fiji ASICS on supported platforms.


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/postinst
================================================
#/bin/bash

set -e

do_ldconfig() {
    echo /opt/rocm/hsa/lib > /etc/ld.so.conf.d/hsa-ext-rocr-dev.conf && ldconfig
}

case "$1" in
   configure)
       do_ldconfig
   ;;
   abort-upgrade|abort-remove|abort-deconfigure)
       echo "$1"
   ;;
   *)
       exit 0
   ;;
esac


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/prerm
================================================
#!/bin/bash

set -e

rm_ldconfig() {
    rm -f /etc/ld.so.conf.d/hsa-ext-rocr-dev.conf && ldconfig
}

case "$1" in
   remove)
       rm_ldconfig
   ;;
   purge)
   ;;
   *)
       exit 0
   ;;
esac


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/rpm_post
================================================
echo /opt/rocm/hsa/lib > /etc/ld.so.conf.d/hsa-ext-rocr-dev.conf && ldconfig


================================================
FILE: runtime/packages/hsa-ext-rocr-dev/rpm_postun
================================================
if [ $1 -eq 0 ]; then
    rm -f /etc/ld.so.conf.d/hsa-ext-rocr-dev.conf && ldconfig
fi


================================================
FILE: runtime/packages/rocr_tools_legacy/CMakeLists.txt
================================================
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
##                 AMD Research and AMD HSA Software Development
##
##                 Advanced Micro Devices, Inc.
##
##                 www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
##  - Redistributions of source code must retain the above copyright notice,
##    this list of conditions and the following disclaimers.
##  - Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimers in
##    the documentation and#or other materials provided with the distribution.
##  - Neither the names of Advanced Micro Devices, Inc,
##    nor the names of its contributors may be used to endorse or promote
##    products derived from this Software without specific prior written
##    permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################

cmake_minimum_required ( VERSION 3.5.0 )

## Set the name and project name.
set ( PROJECT_STRING rocr-tools-legacy )
project ( ${PROJECT_STRING} )

#
# The parameter "tool_objs" specifies the folder where build
# products accumulate. It is specified relative to current cmake
# binary directory
#
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../hsa-runtime-tools" "${CMAKE_CURRENT_BINARY_DIR}/tools_objs")

## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake_modules" )
include ( utils )

## Get the package version.
get_version ( "1.1.9" )

set( PACKAGE_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}.${VERSION_COMMIT_COUNT}-${VERSION_JOB}-${VERSION_HASH}" )

## Packaging directives
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Package types to build")

set ( CPACK_PACKAGE_NAME ${PROJECT_NAME} )
set ( CPACK_PACKAGE_VENDOR "AMD" )
set ( CPACK_PACKAGE_VERSION ${PACKAGE_VERSION_STRING} )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD Heterogeneous System Architecture HSA - Linux HSA Runtime extensions for ROCm platforms" )
set ( CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/description" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/copyright" )

# Debian package specific variables
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsakmt-roct, hsa-rocr-dev" )
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/ROCR-Runtime" )
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/postinst;${CMAKE_CURRENT_SOURCE_DIR}/prerm" )

# RPM package specific variables
set ( CPACK_RPM_PACKAGE_DEPENDS "hsakmt-roct, hsa-rocr-dev" )
set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/rpm_postun" )

include ( CPack )


================================================
FILE: runtime/packages/rocr_tools_legacy/copyright
================================================
The University of Illinois/NCSA
Open Source License (NCSA)

Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.

Developed by:

                AMD Research and AMD HSA Software Development

                Advanced Micro Devices, Inc.

                www.amd.com

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal with the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

 - Redistributions of source code must retain the above copyright notice,
   this list of conditions and the following disclaimers.
 - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimers in
   the documentation and/or other materials provided with the distribution.
 - Neither the names of Advanced Micro Devices, Inc,
   nor the names of its contributors may be used to endorse or promote
   products derived from this Software without specific prior written
   permission.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS WITH THE SOFTWARE.


================================================
FILE: runtime/packages/rocr_tools_legacy/description
================================================
This package includes legacy version of ROCr Tools library. The use
of this library is deprecatd and no longer supported.

This library provides following services:

  - Kernel dispatches via PM4 path
  - Collection of Performance Counters
    - The last ROCm release that supported CodeXL was 2.6
    - Submission of Pm4 commands via AQL packets enables this functionality
  - Support various Debugger related activities
    - The last ROCm release that supported Debugger was 2.6

Services other than kernel dispatches are AMD private i.e. are
not defined by any specification.


================================================
FILE: runtime/packages/rocr_tools_legacy/postinst
================================================
#/bin/bash

set -e

do_ldconfig() {
    echo /opt/rocm/hsa/lib > /etc/ld.so.conf.d/rocr_tools_legacy.conf && ldconfig
}

case "$1" in
   configure)
       do_ldconfig
   ;;
   abort-upgrade|abort-remove|abort-deconfigure)
       echo "$1"
   ;;
   *)
       exit 0
   ;;
esac


================================================
FILE: runtime/packages/rocr_tools_legacy/prerm
================================================
#!/bin/bash

set -e

rm_ldconfig() {
    rm -f /etc/ld.so.conf.d/rocr_tools_legacy.conf && ldconfig
}

case "$1" in
   remove)
       rm_ldconfig
   ;;
   purge)
   ;;
   *)
       exit 0
   ;;
esac


================================================
FILE: runtime/packages/rocr_tools_legacy/rpm_post
================================================
echo /opt/rocm/hsa/lib > /etc/ld.so.conf.d/rocr_tools_legacy.conf && ldconfig


================================================
FILE: runtime/packages/rocr_tools_legacy/rpm_postun
================================================
if [ $1 -eq 0]; then
    rm -f /etc/ld.so.conf.d/rocr_tools_legacy.conf && ldconfig
fi


================================================
FILE: samples/GetInfo/get_info.cpp
================================================
#include "get_info.h"

#include <iostream>

GetInfo::GetInfo() : HsaTest("HSA Info") {}

GetInfo::~GetInfo() {}

void GetInfo::Run() {
  std::cout << std::endl;
  std::cout << "Num CPUs in platform: " << cpus_.size() << std::endl;
  std::cout << "------------------------------------------------\n";

  for (size_t i = 0; i < cpus_.size(); ++i) {
    hsa_agent_t cpu = cpus_[i];
    std::cout << "CPU[" << i << "] properties:" << std::endl;
    std::cout << "------------------------------------------------\n";
    AgentProps prop(cpu);
    PrintAgentInfo(prop);
    PrintPeers(cpu);
    std::cout << "------------------------------------------------\n";

    hsa_amd_memory_pool_t global_fine = global_fine_[cpu.handle];
    if (global_fine.handle != 0) {
      std::cout << "CPU[" << i << "] system fine grain pool properties:\n";
      std::cout << "------------------------------------------------\n";
      PoolProps prop(global_fine);
      PrintPoolInfo(prop);
      std::cout << "------------------------------------------------\n";
    }

    hsa_amd_memory_pool_t global_coarse = global_coarse_[cpu.handle];
    if (global_coarse.handle != 0) {
      std::cout << "CPU[" << i << "] system coarse grain pool properties:\n";
      std::cout << "------------------------------------------------\n";
      PoolProps prop(global_coarse);
      PrintPoolInfo(prop);
      std::cout << "------------------------------------------------\n";
    }
  }

  std::cout << std::endl;
  std::cout << "Num GPUs in platform: " << gpus_.size() << std::endl;
  std::cout << "------------------------------------------------\n";

  for (size_t i = 0; i < gpus_.size(); ++i) {
    hsa_agent_t gpu = gpus_[i];
    std::cout << "GPU[" << i << "] properties:" << std::endl;
    std::cout << "------------------------------------------------\n";
    AgentProps prop(gpu);
    PrintAgentInfo(prop);
    PrintPeers(gpu);
    std::cout << "------------------------------------------------\n";

    hsa_amd_memory_pool_t global_coarse = global_coarse_[gpu.handle];
    if (global_coarse.handle != 0) {
      std::cout << "GPU[" << i << "] local memory pool properties:\n";
      std::cout << "------------------------------------------------\n";
      PoolProps prop(global_coarse);
      PrintPoolInfo(prop);
      std::cout << "------------------------------------------------\n";
    }

    hsa_amd_memory_pool_t group = group_[gpu.handle];
    if (group.handle != 0) {
      std::cout << "GPU[" << i << "] group memory pool properties:\n";
      std::cout << "------------------------------------------------\n";
      PoolProps prop(group);
      PrintPoolInfo(prop);
      std::cout << "------------------------------------------------\n";
    }
  }
}

int main(int argc, char* argv[]) {
  GetInfo get_info;

  get_info.Init();
  get_info.Run();
  get_info.Cleanup();

  return 0;
}

================================================
FILE: samples/GetInfo/get_info.h
================================================
#ifndef GET_INFO_H
#define GET_INFO_H

#include "samples/common/hsa_test.h"

class GetInfo : public HsaTest {
 public:
  GetInfo();
  ~GetInfo();

  void Run() override;
};

#endif // GET_INFO_H


================================================
FILE: samples/common/common.cpp
================================================
#include "common.hpp"

void ErrorCheck(hsa_status_t hsa_error_code) {
  if (hsa_error_code != HSA_STATUS_SUCCESS) {
    std::cerr << "HSA reported error!" << std::endl;
    exit(EXIT_FAILURE);
  }
}

hsa_status_t FindGpuDevice(hsa_agent_t agent, void *data) {
  if (data == NULL) {
     return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  hsa_device_type_t hsa_device_type;
  hsa_status_t hsa_error_code = hsa_agent_get_info(
    agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type
  );
  if (hsa_error_code != HSA_STATUS_SUCCESS) {
    return hsa_error_code;
  }

  if (hsa_device_type == HSA_DEVICE_TYPE_GPU) {
    *((hsa_agent_t*)data) = agent;
  }

  return HSA_STATUS_SUCCESS;
}

hsa_status_t FindHostRegion(hsa_region_t region, void *data) {
  if (data == NULL) {
    return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  }

  bool is_host_region = false;
  hsa_status_t hsa_error_code = hsa_region_get_info(
      region, (hsa_region_info_t)HSA_AMD_REGION_INFO_HOST_ACCESSIBLE,
      &is_host_region);
  if (hsa_error_code != HSA_STATUS_SUCCESS) {
    return hsa_error_code;
  }

  if (is_host_region) {
    *((hsa_region_t*)data) = region;
  }

  return HSA_STATUS_SUCCESS;
}


================================================
FILE: samples/common/common.hpp
================================================
#ifndef COMMON_COMMON_HPP
#define COMMON_COMMON_HPP

#include <cstdlib>
#include <iostream>

#include "hsa.h"
#include "hsa_ext_finalize.h"
#include "hsa_ext_amd.h"

#if defined(_MSC_VER)
  #define ALIGNED_(x) __declspec(align(x))
#else
  #if defined(__GNUC__)
    #define ALIGNED_(x) __attribute__ ((aligned(x)))
  #endif // __GNUC__
#endif // _MSC_VER

#define MULTILINE(...) # __VA_ARGS__

void ErrorCheck(hsa_status_t hsa_error_code);

hsa_status_t FindGpuDevice(hsa_agent_t agent, void *data);

hsa_status_t FindHostRegion(hsa_region_t region, void *data);

#endif // COMMON_COMMON_HPP


================================================
FILE: samples/common/common_utility.cpp
================================================
#include "common_utility.h"


double CalcMedian(vector<double> scores)
{
	double median;
	size_t size = scores.size();

	if (size  % 2 == 0)
		median = (scores[size / 2 - 1] + scores[size / 2]) / 2;
	else 
		median = scores[size / 2];

	return median;
}

double CalcMean(vector<double> scores)
{
	double mean = 0;
	size_t size = scores.size();

       for (int i=0; i<size; ++i)
	   	mean += scores[i];

	return mean/size;
}


double CalcStdDeviation(vector<double> scores, int score_mean)
{
	double ret = 0.0;
	for (int i=0; i<scores.size(); ++i)
	{
		ret += (scores[i] - score_mean) * (scores[i] - score_mean);
	}

	ret /= scores.size();

	return sqrt(ret);
}

int CalcConcurrentQueues(vector<double> scores)
{
    int num_of_concurrent_queues = 0;
    vector<double>execpted_exec_time_array;
    
    for (int i=0; i<scores.size(); ++i)
    {
        execpted_exec_time_array.push_back(scores[0]/(1<<i));
    }

   
   for (int i=0; i<scores.size(); ++i)
   {
	   cout << "expected exe time = " << execpted_exec_time_array[i] << endl;
   }

    for (int i=1; i<scores.size(); ++i)
    {
        if ((execpted_exec_time_array[i] - scores[i]) < 0.1 * execpted_exec_time_array[i])
            ++num_of_concurrent_queues;
    }

    return num_of_concurrent_queues;
}


================================================
FILE: samples/common/common_utility.h
================================================
#include <iostream>
#include <algorithm>
#include <cmath>
#include <vector>
using namespace std;

double CalcMean(vector<double> scores);
double CalcMedian(vector<double> scores);
double CalcStdDeviation(vector<double> scores, int score_mean);
int CalcConcurrentQueues(vector<double> scores);


================================================
FILE: samples/common/helper_funcs.cpp
================================================
/**********************************************************************
Copyright 2013 Advanced Micro Devices, Inc. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

	Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
	Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
 other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************/

#include "helper_funcs.hpp"

#ifndef _WIN32
#include <unistd.h>
#endif


/*
 * Prints no more than 256 elements of the given array.
 * Prints full array if length is less than 256.
 * Prints Array name followed by elements.
 */
template<typename T> 
void printArray(
    const std::string header, 
    const T * data, 
    const int width,
    const int height)
{
    std::cout<<"\n"<<header<<"\n";
    for(int i = 0; i < height; i++)
    {
        for(int j = 0; j < width; j++)
        {
            std::cout<<data[i*width+j]<<" ";
        }
        std::cout<<"\n";
    }
    std::cout<<"\n";
}

template<typename T> 
int fillRandom(
         T * arrayPtr, 
         const int width,
         const int height,
         const T rangeMin,
         const T rangeMax,
         unsigned int seed)
{
    if(!arrayPtr)
    {
        error("Cannot fill array. NULL pointer.");
        return HSA_SDK_FAILURE;
    }

    if(!seed)
        seed = (unsigned int)time(NULL);

    srand(seed);
    double range = double(rangeMax - rangeMin) + 1.0; 

    /* random initialisation of input */
    for(int i = 0; i < height; i++)
        for(int j = 0; j < width; j++)
        {
            int index = i*width + j;
            arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); 
        }

    return HSA_SDK_SUCCESS;
}

template<typename T> 
int fillPos(
         T * arrayPtr, 
         const int width,
         const int height)
{
    if(!arrayPtr)
    {
        error("Cannot fill array. NULL pointer.");
        return HSA_SDK_FAILURE;
    }

    /* initialisation of input with positions*/
    for(T i = 0; i < height; i++)
        for(T j = 0; j < width; j++)
        {
            T index = i*width + j;
            arrayPtr[index] = index;
        }

    return HSA_SDK_SUCCESS;
}

template<typename T> 
int fillConstant(
         T * arrayPtr, 
         const int width,
         const int height,
         const T val)
{
    if(!arrayPtr)
    {
        error("Cannot fill array. NULL pointer.");
        return HSA_SDK_FAILURE;
    }

    /* initialisation of input with constant value*/
    for(int i = 0; i < height; i++)
        for(int j = 0; j < width; j++)
        {
            int index = i*width + j;
            arrayPtr[index] = val;
        }

    return HSA_SDK_SUCCESS;
}

template<typename T>
T roundToPowerOf2(T val)
{
    int bytes = sizeof(T);

    val--;
    for(int i = 0; i < bytes; i++)
        val |= val >> (1<<i);  
    val++;

    return val;
}

template<typename T>
int isPowerOf2(T val)
{
    long long _val = val;
    if((_val & (-_val))-_val == 0 && _val != 0)
        return HSA_SDK_SUCCESS;
    else
        return HSA_SDK_FAILURE;
}


template<typename T>
bool checkVal(
    T input, 
    T reference, 
    std::string message,
    bool isAPIerror)
{
    if(input==reference)
    {
        return true;
    }
    else
    {
        error(message);   
        return false;
    }
}


template<typename T>
std::string toString(T t, std::ios_base &(*r)(std::ios_base&))
{
  std::ostringstream output;
  output << r << t;
  return output.str();
}


bool
compare(const float *refData, const float *data, 
                        const int length, const float epsilon)
{
    float error = 0.0f;
    float ref = 0.0f;

    for(int i = 1; i < length; ++i) 
    {
        float diff = refData[i] - data[i];
        error += diff * diff;
        ref += refData[i] * refData[i];
    }

    float normRef =::sqrtf((float) ref);
    if (::fabs((float) ref) < 1e-7f) {
        return false;
    }
    float normError = ::sqrtf((float) error);
    error = normError / normRef;

    return error < epsilon;
}

bool
compare(const double *refData, const double *data, 
                        const int length, const double epsilon)
{
    double error = 0.0;
    double ref = 0.0;

    for(int i = 1; i < length; ++i) 
    {
        double diff = refData[i] - data[i];
        error += diff * diff;
        ref += refData[i] * refData[i];
    }

    double normRef =::sqrt((double) ref);
    if (::fabs((double) ref) < 1e-7) {
        return false;
    }
    double normError = ::sqrt((double) error);
    error = normError / normRef;

    return error < epsilon;
}

void 
error(const char* errorMsg)
{
    std::cout<<"Error: "<<errorMsg<<std::endl;
}

void 
error(std::string errorMsg)
{
    std::cout<<"Error: "<<errorMsg<<std::endl;
}

void 
expectedError(const char* errorMsg)
{
    std::cout<<"Expected Error: "<<errorMsg<<std::endl;
}

void 
expectedError(std::string errorMsg)
{
    std::cout<<"Expected Error: "<<errorMsg<<std::endl;
}


/////////////////////////////////////////////////////////////////
// Template Instantiations 
/////////////////////////////////////////////////////////////////
template 
void printArray<short>(const std::string, 
        const short*, int, int);
template 
void printArray<unsigned char>(const std::string, 
        const unsigned char *, int, int);
template 
void printArray<unsigned int>(const std::string, 
        const unsigned int *, int, int);
template 
void printArray<int>(const std::string, 
        const int *, int, int);
template 
void printArray<long>(const std::string, 
        const long*, int, int);
template 
void printArray<float>(const std::string, 
        const float*, int, int);
template 
void printArray<double>(const std::string, 
        const double*, int, int);

template 
int fillRandom<unsigned char>(unsigned char* arrayPtr, 
        const int width, const int height, 
        unsigned char rangeMin, unsigned char rangeMax, unsigned int seed);	
template 
int fillRandom<unsigned int>(unsigned int* arrayPtr, 
        const int width, const int height, 
        unsigned int rangeMin, unsigned int rangeMax, unsigned int seed);	
template 
int fillRandom<int>(int* arrayPtr, 
        const int width, const int height, 
        int rangeMin, int rangeMax, unsigned int seed);	
template 
int fillRandom<long>(long* arrayPtr, 
        const int width, const int height, 
        long rangeMin, long rangeMax, unsigned int seed);	
template 
int fillRandom<float>(float* arrayPtr, 
        const int width, const int height, 
        float rangeMin, float rangeMax, unsigned int seed);	
template 
int fillRandom<double>(double* arrayPtr, 
        const int width, const int height, 
        double rangeMin, double rangeMax, unsigned int seed);	

template 
short roundToPowerOf2<short>(short val);
template 
unsigned int roundToPowerOf2<unsigned int>(unsigned int val);
template 
int roundToPowerOf2<int>(int val);
template 
long roundToPowerOf2<long>(long val);

template
int isPowerOf2<short>(short val);
template
int isPowerOf2<unsigned int>(unsigned int val);
template
int isPowerOf2<int>(int val);
template
int isPowerOf2<long>(long val);

template<> 
int fillPos<short>(short * arrayPtr, const int width, const int height);
template<> 
int fillPos<unsigned int>(unsigned int * arrayPtr, const int width, const int height);
template<> 
int fillPos<int>(int * arrayPtr, const int width, const int height);
template<> 
int fillPos<long>(long * arrayPtr, const int width, const int height);

template<> 
int fillConstant<short>(short * arrayPtr, 
        const int width, const int height, 
        const short val);
template<> 
int fillConstant(unsigned int * arrayPtr, 
        const int width, const int height, 
        const unsigned int val);
template<> 
int fillConstant(int * arrayPtr, 
        const int width, const int height, 
        const int val);
template<> 
int fillConstant(long * arrayPtr, 
        const int width, const int height, 
        const long val);
template<> 
int fillConstant(long * arrayPtr, 
        const int width, const int height, 
        const long val);
template<> 
int fillConstant(long * arrayPtr, 
        const int width, const int height, 
        const long val);


template
bool checkVal<char>(char input, char reference, std::string message, bool isAPIerror);
template
bool checkVal<bool>(bool input, bool reference, std::string message, bool isAPIerror);
template
bool checkVal<std::string>(std::string input, std::string reference, std::string message, bool isAPIerror);
template
bool checkVal<short>(short input, short reference, std::string message, bool isAPIerror);
template
bool checkVal<unsigned int>(unsigned int  input, unsigned int  reference, std::string message, bool isAPIerror);
template
bool checkVal<int>(int input, int reference, std::string message, bool isAPIerror);
template
bool checkVal<long>(long input, long reference, std::string message, bool isAPIerror);


template
std::string toString<char>(char t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<short>(short t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<unsigned int>(unsigned int t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<int>(int t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<long>(long t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<float>(float t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<double>(double t, std::ios_base &(*r)(std::ios_base&));


================================================
FILE: samples/common/helper_funcs.hpp
================================================
/**********************************************************************
Copyright 2013 Advanced Micro Devices, Inc. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

	Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
	Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
 other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef HELPER_FUNCS_HPP_
#define HELPER_FUNCS_HPP_

#define HSA_SDK_SUCCESS 0
#define HSA_SDK_FAILURE 1
#define HSA_SDK_EXPECTED_FAILURE 2

#include <iostream>
#include <fstream>
#include <iomanip>
#include <sstream>
#include <string>
#include <ctime>
#include <cmath>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <vector>
#include <malloc.h>

/**
 * error
 * constant function, Prints error messages 
 * @param errorMsg char* message
 */
void error(const char* errorMsg);	

/**
 * error
 * constant function, Prints error messages 
 * @param errorMsg std::string message
 */
void error(std::string errorMsg);

/**
 * expectedError
 * constant function, Prints error messages 
 * @param errorMsg char* message
 */
void expectedError(const char* errorMsg);	

/**
 * expectedError
 * constant function, Prints error messages 
 * @param errorMsg string message
 */
void expectedError(std::string errorMsg);

/**
 * compare template version
 * compare data to check error
 * @param refData templated input
 * @param data templated input
 * @param length number of values to compare
 * @param epsilon errorWindow
 */
bool compare(const float *refData, const float *data, 
        const int length, const float epsilon = 1e-6f); 
bool compare(const double *refData, const double *data, 
        const int length, const double epsilon = 1e-6); 

/**
 * printArray
 * displays a array on std::out
 */
template<typename T> 
void printArray(
     const std::string header,
     const T * data, 
     const int width,
     const int height);


/**
 * fillRandom
 * fill array with random values
 */
template<typename T> 
int fillRandom(
     T * arrayPtr, 
     const int width,
     const int height,
     const T rangeMin,
     const T rangeMax,
     unsigned int seed=123);	
  
/**
 * fillPos
 * fill the specified positions
 */
template<typename T> 
int fillPos(
     T * arrayPtr, 
     const int width,
     const int height);
  
/**
 * fillConstant
 * fill the array with constant value
 */
template<typename T> 
int fillConstant(
     T * arrayPtr, 
     const int width,
     const int height,
     const T val);

  
/**
 * roundToPowerOf2
 * rounds to a power of 2
 */
template<typename T>
T roundToPowerOf2(T val);

/**
 * isPowerOf2
 * checks if input is a power of 2
 */
template<typename T>
int isPowerOf2(T val);
  
/**
 * checkVal
 * Set default(isAPIerror) parameter to false 
 * if checkVaul is used to check otherthan OpenCL API error code 
 */
template<typename T> 
bool checkVal(
  T input, 
  T reference, 
  std::string message, bool isAPIerror = true);

/**
 * toString
 * convert a T type to string
 */
template<typename T>
std::string toString(T t, std::ios_base & (*r)(std::ios_base&)); 


#endif


================================================
FILE: samples/common/hsa_base_util.cpp
================================================
#include "hsa_base_util.h"
#include "HSAILAmdExt.h"


void HSA_UTIL::GetHsailNameAndKernelName(char * file_name_full, char *file_name_base, char *kernel_name)
{
	strcpy(hail_file_name_full, file_name_full);
	strcpy(hail_file_name_base, file_name_base);
	strcpy(hsa_kernel_name, kernel_name);
}

HSA_UTIL::HSA_UTIL()
{
#ifdef TIME
    	base_kernel_time_idx = base_timer.CreateTimer();
	base_setup_time_idx = base_timer.CreateTimer();
#endif
}

HSA_UTIL::~HSA_UTIL()
{

}


bool HSA_UTIL::HsaInit()
{
#ifdef TIME
       base_timer.StartTimer(base_setup_time_idx);
#endif

 	err = hsa_init();
 	check(Initializing the hsa runtime, err);

	/* 
	 * Iterate over the agents and pick the gpu agent using 
	 * the find_gpu callback.
	 */
	err = hsa_iterate_agents(find_gpu, &device);
	check(Calling hsa_iterate_agents, err);

	err = (device.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
	check(Checking if the GPU device is non-zero, err);

	if (err == HSA_STATUS_ERROR)
		return false;

	/*
	 * Query the maximum size of the queue.
	 */
	err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
	check(Querying the device maximum queue size, err);

	/*  
	 * Create a queue using the maximum size.
	 */
	err = hsa_queue_create(device, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &command_queue);
	check(Creating the queue, err);

	profile = hsa_profile_t(108);
       hsa_agent_get_info(device, HSA_AGENT_INFO_PROFILE, &profile);

       if (profile == HSA_PROFILE_BASE) 
	{
	    memset(hail_file_name_full, 0, sizeof(char)*128);
           cout << "Loading base profile!!!" << endl;
           strcpy(hail_file_name_full, hail_file_name_base); //overwrite full hsail file name with base 
       } 
   
        amd::hsail::registerExtensions();
        if (!tool.assembleFromFile(hail_file_name_full)) 
	{
          std::cout << tool.output();
          return false;
        }
        module = tool.brigModule();

	// Create hsail program.
	err = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, profile, HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, NULL, &hsa_program);
	check("Error in creating program object", err);

	// Add hsail module.
	//cout << "hsail file name = " << hail_file_name_full << endl;
	err = hsa_ext_program_add_module(hsa_program, module);
	check("Error in adding module to program object", err);

	// Finalize hsail program.
        hsa_isa_t isa = {0};
        err = hsa_agent_get_info(device, HSA_AGENT_INFO_ISA, &isa);
        check("Get hsa agent info isa", err);

	hsa_ext_control_directives_t control_directives;
	memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t));

	err = hsa_ext_program_finalize(hsa_program,
			isa,
			0,
			control_directives,
			NULL, //"-g -O0 -dump-isa",
			HSA_CODE_OBJECT_TYPE_PROGRAM,
			&code_object);
	check("Error in finalizing program object", err);

	// Create executable.
	err = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable);
	check("Error in creating executable object", err);

	// Load code object.
	err = hsa_executable_load_code_object(hsaExecutable, device, code_object, "");
	check("Error in loading executable object", err);

	// Freeze executable.
	err = hsa_executable_freeze(hsaExecutable, "");
	check("Error in freezing executable object", err);

	// Get symbol handle.
	err = hsa_executable_get_symbol(hsaExecutable, NULL,  hsa_kernel_name, device, 0, &kernelSymbol);
	check("get symbol handle", err);

	// Get code handle.
	
	err = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle);
	check("Get code handle", err);

#ifdef TIME
	base_timer.StopTimer(base_setup_time_idx);
#endif


	//hsa_region_t local_kernarg_region;
	mem_region.kernarg_region.handle = 0;
	mem_region.coarse_region.handle = 0;

	hsa_agent_iterate_regions(device, get_memory_region, &mem_region);
	err = (mem_region.kernarg_region.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
	check(Finding a kernarg memory region, err);

	return true;
}

double HSA_UTIL::Run(int dim, int group_x, int group_y, int group_z, int s_size, int grid_x, int grid_y, int grid_z, void* kernel_args, int kernel_args_size)
{
#ifdef TIME
		base_timer.StartTimer(base_kernel_time_idx);
#endif

	/*
	 * Create a signal to wait for the dispatch to finish.
	 */
	hsa_signal_t local_signal;
	err=hsa_signal_create(1, 0, NULL, &local_signal);
	check(Creating a HSA_UTIL signal, err);

	/* Initialize the dispatch packet */
	hsa_kernel_dispatch_packet_t local_dispatch_packet;
	memset(&local_dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));
	/*
	 * Setup the dispatch information.
	 */
	local_dispatch_packet.completion_signal=local_signal;
	local_dispatch_packet.setup |=  dim<< HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
	local_dispatch_packet.workgroup_size_x = group_x;
	local_dispatch_packet.workgroup_size_y = group_y;
	local_dispatch_packet.workgroup_size_z = group_z;
	local_dispatch_packet.group_segment_size = s_size;
	local_dispatch_packet.grid_size_x = grid_x;
	local_dispatch_packet.grid_size_y = grid_y;
	local_dispatch_packet.grid_size_z = grid_z;
	local_dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH;
	//local_dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
	//local_dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
	local_dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
	local_dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
	local_dispatch_packet.kernel_object = codeHandle;

  // Specify amount of private segment size (in bytes) that is needed per work-item
  // Retrieve the amount of private memory needed
  uint32_t private_mem_size = 0;
  hsa_executable_symbol_get_info(kernelSymbol,
                        HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &private_mem_size);
  local_dispatch_packet.private_segment_size = private_mem_size;

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

	/*
	 * Find a memory region that supports kernel arguments.
	 */


/*
	kernarg_region.handle = 0;

	hsa_agent_iterate_regions(device, get_kernarg, &kernarg_region);
	err = (kernarg_region.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
	check(Finding a kernarg memory region, err);
	
*/

        void* local_kernel_arg_buffer = NULL;
	/*
	 * Allocate the kernel argument buffer from the correct region.
	 */
	err = hsa_memory_allocate(mem_region.kernarg_region, kernel_args_size, &local_kernel_arg_buffer);
	check(Allocating kernel argument memory buffer, err);
	memcpy(local_kernel_arg_buffer, kernel_args, kernel_args_size);
	local_dispatch_packet.kernarg_address = local_kernel_arg_buffer;

	/*	
	 * Obtain the current queue write index.
	 */
	uint64_t index = hsa_queue_load_write_index_relaxed(command_queue);

	/*	
	 * Write the aql packet at the calculated queue index address.
	 */
	const uint32_t queueMask = command_queue->size - 1;
	((hsa_kernel_dispatch_packet_t*)(command_queue->base_address))[index&queueMask]=local_dispatch_packet;

	/*	
	 * Increment the write index and ring the doorbell to dispatch the kernel.
	 */
	hsa_queue_store_write_index_relaxed(command_queue, index+1);
	hsa_signal_store_release(command_queue->doorbell_signal, index);

	/*	
	 * Wait on the dispatch signal until all kernel are finished.
	 */
	while (hsa_signal_wait_acquire(local_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE)  != 0);

#ifdef TIME
	base_timer.StopTimer(base_kernel_time_idx);
#endif

	/*
	 * Cleanup all allocated resources.
	 */

        err = hsa_memory_free(local_kernel_arg_buffer);
        check(Deallocate memory, err);

	err=hsa_signal_destroy(local_signal);
	check(Destroying the local_signal, err);

	return 0;
}

double HSA_UTIL::GetKernelTime()
{
    return base_timer.ReadTimer(base_kernel_time_idx);
}

double HSA_UTIL::GetSetupTime()
{
    return base_timer.ReadTimer(base_setup_time_idx);
}

void HSA_UTIL::Close()
{
	err = hsa_executable_destroy(hsaExecutable); 
	check(Destroying the hsaExecutable, err)

	err = hsa_code_object_destroy(code_object);
	check(Destroying the code_object, err);

	err=hsa_queue_destroy(command_queue);
	check(Destroying the queue, err);

	err=hsa_shut_down();
	check(Shutting down the runtime, err);
}

void* HSA_UTIL::AllocateLocalMemory(size_t size) 
{
  void *buffer = NULL;

  // Allocate in local memory only if it is available
  if (mem_region.coarse_region.handle != 0) 
  {
      cout << "Allocating in local memory" << endl;
      err = hsa_memory_allocate(mem_region.coarse_region, size, (void **)&buffer);
      check(hsa memory allocation in local memory, err);

      // register agent
      err = hsa_memory_assign_agent(buffer, device, HSA_ACCESS_PERMISSION_RW);
      return (err == HSA_STATUS_SUCCESS) ? buffer : NULL;
  }

  // Allocate in system memory if local memory is not available
  cout << "Allocating in system memory" << endl;
  err = hsa_memory_allocate(mem_region.kernarg_region, size, (void **)&buffer);
  return (err == HSA_STATUS_SUCCESS) ? buffer : NULL;
}

void* HSA_UTIL::AllocateSysMemory( size_t size)
{
    void *buffer = NULL;
    err = hsa_memory_allocate(mem_region.kernarg_region, size, (void **)&buffer);
    return (err == HSA_STATUS_SUCCESS) ? buffer : NULL;
}

bool HSA_UTIL::TransferData(void *dest, void *src, uint length, bool host_to_dev) 
{

  hsa_status_t status;

  void *buffer = (host_to_dev) ? dest : src;
  err = hsa_memory_assign_agent(buffer, device, HSA_ACCESS_PERMISSION_RW);
  if (err != HSA_STATUS_SUCCESS) 
  {
      return false;
  }
  err = hsa_memory_copy(dest, src, length);  // first is dest, second is src 
  return (err == HSA_STATUS_SUCCESS);

}


================================================
FILE: samples/common/hsa_base_util.h
================================================
#ifndef __HSA_BASE__
#define __HSA_BASE__


#include <vector>
#include "hsa.h"
#include "hsa_ext_finalize.h"
#include "hsa_ext_amd.h"
#include "hsatimer.h"
#include "utilities.h"
#include "common.hpp"
#include "HSAILTool.h"

class HSA_UTIL{
    public:
	    HSA_UTIL();
	    ~HSA_UTIL();

	public:
	    void GetHsailNameAndKernelName(char *hail_file_name_full, char *hail_file_name_base, char *kernel_name);
	    bool HsaInit();
        void Close();
	double GetKernelTime();
	double GetSetupTime();
	void* AllocateLocalMemory(size_t size) ;
	void* AllocateSysMemory(size_t size);
	bool TransferData(void *dest, void *src, uint length, bool host_to_dev) ;
	
	double Run(int dim, int group_x, int group_y, int group_z, int s_size, int grid_x, int grid_y, int grid_z, void* kernel_args, int kernel_args_size);

	public:
		hsa_status_t err;
		uint32_t queue_size;
		hsa_agent_t device;
		MemRegion mem_region;
              //hsa_region_t kernarg_region;
		// Memory region supporting kernel parameters
             // hsa_region_t coarse_region;
		// Hsail profile supported by agent
              hsa_profile_t profile;

		char hail_file_name_full[128];
		char hail_file_name_base[128];
		char hsa_kernel_name[128];

		hsa_queue_t* command_queue;
		HSAIL_ASM::Tool tool;
		hsa_ext_module_t module;
		hsa_ext_program_t hsa_program;
		hsa_executable_t hsaExecutable;
	  hsa_executable_symbol_t kernelSymbol;
		hsa_code_object_t code_object;
		uint64_t codeHandle;
		hsa_signal_t hsa_signal;
		hsa_kernel_dispatch_packet_t dispatch_packet; 	
		hsa_region_t hsa_kernarg_region;

		PerfTimer base_timer;
		int base_kernel_time_idx;
		int base_setup_time_idx;
};


#endif


================================================
FILE: samples/common/hsa_perf_cntrs.cpp
================================================
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <cassert>

#include <iostream>
#include <vector>
#include <string>

#include <stdlib.h>

#include "hsa.h"
#include "tools/inc/hsa_ext_profiler.h"
#include "tools/inc/amd_hsa_tools_interfaces.h"

#include "hsa_perf_cntrs.hpp"

using namespace std;

void PreDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
  assert((dispParam->pre_dispatch) && "Pre Dispatch Callback Param is Malformed");

  hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
  hsa_status_t status = hsa_ext_tools_pmu_begin(*perfMgr, dispParam->queue,
                                                dispParam->aql_translation_handle, true);
  assert((status == HSA_STATUS_SUCCESS) && "Error in beginning Perf Cntr Session");
}

void PostDispatchCallback(const hsa_dispatch_callback_t* dispParam, void* usrArg) {
  assert((!dispParam->pre_dispatch) && "Post Dispatch Callback Param is Malformed");

  hsa_ext_tools_pmu_t* perfMgr = reinterpret_cast<hsa_ext_tools_pmu_t*>(usrArg);
  hsa_status_t status = hsa_ext_tools_pmu_end(*perfMgr, dispParam->queue,
                                              dispParam->aql_translation_handle);
  assert((status == HSA_STATUS_SUCCESS) && "Error in endning Perf Cntr Session");
}

// Constructor of the class
RocrPerfCntrApp::RocrPerfCntrApp( ) : perfMgr_(NULL) {

}

// Destructor of the class. Ideally it should delete the
// PMU and its counters
RocrPerfCntrApp::~RocrPerfCntrApp( ) {

}

// Return the number of perf counters
uint32_t RocrPerfCntrApp::GetNumPerfCntrs( ) {
  return uint32_t(cntrList_.size());
}

// Return the handle of perf counter at specified index
CntrInfo* RocrPerfCntrApp::GetPerfCntr(uint32_t idx) {
  return cntrList_[idx];
}

// Print the various fields of Perf Cntrs being programmed
bool RocrPerfCntrApp::PrintCntrs( ) {

  CntrInfo *info;
  int size = uint32_t(cntrList_.size());
  for (int idx = 0; idx < size; idx++) {
    info = cntrList_[idx];
    std::cout << std::endl;
    std::cout << "Rocr Perf Cntr Id: " << info->cntrId << std::endl;
    std::cout << "Rocr Perf Cntr Name: " << info->cntrName << std::endl;
    std::cout << "Rocr Perf Cntr Blk Id: " << info->blkId << std::endl;
    std::cout << "Rocr Perf Cntr Value: " << info->cntrResult << std::endl;
    std::cout << "Rocr Perf Cntr Validation: " << info->cnfType << std::endl;
    std::cout << std::endl;
  }
  return true;
}

// Initialize the list of perf counters
// block id of kHsaAiCounterBlockSQ = 14 == 0x0E
hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) {

  // Initialize the list of Perf Cntrs
  // Add SQ counter for number of waves
  CntrInfo* info = NULL;
  cntrList_.reserve(23);
  
  char *cntrChoice = getenv("IOMMU");
  if (cntrChoice == NULL) {
    // Event for number of Waves
    info = new CntrInfo(0x4, "SQ_SQ_PERF_SEL_WAVES", NULL,
                                  0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
    cntrList_.push_back(info);
    
    // Event for number of Threads
    info = new CntrInfo(0xE, "SQ_SQ_PERF_SEL_ITEMS", NULL,
                                  0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact);
    cntrList_.push_back(info);
  
  } else {

    // Program to collect event number 4
    info = new CntrInfo(0x4, "Iommu_Cntr_4", NULL,
                        0x63, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_None);
    cntrList_.push_back(info);
  
    // Program to collect event number 6
    info = new CntrInfo(0x6, "Iommu_Cntr_6", NULL,
                        0x63, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_None);
    cntrList_.push_back(info);
  }
  

  // Create an instance of Perf Mgr
  hsa_status_t status;
  status = hsa_ext_tools_create_pmu(agent, &perfMgr_);
  assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr Mgr");

  // Process each counter from the list as necessary
  // each counter descriptor with its perf block handle
  // and create an instance of counter in that block
  uint32_t size = GetNumPerfCntrs();
  for (uint32_t idx = 0; idx < size; idx++) {
    info = GetPerfCntr(idx);
    
    // Obtain the handle of perf block
    if (info->blkHndl == NULL) {
      status = hsa_ext_tools_get_counter_block_by_id(perfMgr_, info->blkId, &info->blkHndl);
      assert((status == HSA_STATUS_SUCCESS) && "Error in getting Perf Cntr Blk Hndl");
    }

    // Create an instance of counter in the perf block
    status = hsa_ext_tools_create_counter(info->blkHndl, &info->cntrHndl);
    assert((status == HSA_STATUS_SUCCESS) && "Error in creating Perf Cntr in Perf Blk");

    // Update the Event Index property of counter
    uint32_t cntrProp = HSA_EXT_TOOLS_COUNTER_PARAMETER_EVENT_INDEX;
    status = hsa_ext_tools_set_counter_parameter(info->cntrHndl, cntrProp,
                                                 sizeof(uint32_t), (void*)&info->cntrId);
    assert((status == HSA_STATUS_SUCCESS) && "Error in updating Perf Cntr Property Event Index");

    // Enable the updated perf counter
    status = hsa_ext_tools_set_counter_enabled(info->cntrHndl, true);
    assert((status == HSA_STATUS_SUCCESS) && "Error in enabing Perf Cntr");
  }

  return status;
}

// Register Pre and Post dispatch callbacks
void RocrPerfCntrApp::RegisterCallbacks(hsa_queue_t *queue){
  
  hsa_status_t status;
  status = hsa_ext_tools_set_callback_functions(queue, PreDispatchCallback, PostDispatchCallback);
  assert((status == HSA_STATUS_SUCCESS) && "Error in registering Pre & Post Dispatch Callbacks");
  status = hsa_ext_tools_set_callback_arguments(queue, &perfMgr_, &perfMgr_);
  assert((status == HSA_STATUS_SUCCESS) && "Error in registering Pre & Post Dispatch Callback Params");
  return;
}

// Wait for perf counter collection to complete
hsa_status_t RocrPerfCntrApp::Wait() {

  hsa_status_t status;
  status = hsa_ext_tools_pmu_wait_for_completion(perfMgr_, 5000);
  assert((status == HSA_STATUS_SUCCESS) && "Error in Waiting for Perf Cntr Completion");
  return status;
}

// Validate perf counter values
hsa_status_t RocrPerfCntrApp::Validate() {

  // Retrieve the results of the different Perf Cntrs
  // and validate them as configured
  CntrInfo* info = NULL;
  hsa_status_t status = HSA_STATUS_SUCCESS;
  uint32_t size = GetNumPerfCntrs();
  for (uint32_t idx = 0; idx < size; idx++) {
    info = GetPerfCntr(idx);
    status = hsa_ext_tools_get_counter_result(info->cntrHndl, &info->cntrResult);
    std::cout << "Value of Perf Cntr is: " << info->cntrResult << std::endl;
  }

  return status;
}


================================================
FILE: samples/common/hsa_perf_cntrs.hpp
================================================
#ifndef ROCR_PERF_CNTR_APP_H_
#define ROCR_PERF_CNTR_APP_H_

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

#include <iostream>
#include <vector>
#include <string>

#include "hsa.h"
#include "tools/inc/hsa_ext_profiler.h"

typedef enum CntrValCnfType {
  
  ///< no counter value validation should be performed
  CntrValCnf_None,

  ///< counter value should be an exact match to expectedResult
  CntrValCnf_Exact,

  ///< counter value should be greater than expectedResult
  CntrValCnf_GreaterThan,
  
  ///< counter value should be less than expectedResult
  CntrValCnf_LessThan

} CntrValCnfType;

/// Struct used to encapsulate Counter Info
typedef struct CntrInfo {
  
  ///< Id of counter in hardware block
  uint32_t cntrId;
  
  ///< Name of counter
  char cntrName[72];
  
  ///< Handle of perf counter
  hsa_ext_tools_counter_t cntrHndl;
  
  ///< Id of hardware block containing the counter
  uint32_t blkId;
  
  ///< Handle of counter block
  hsa_ext_tools_counter_block_t blkHndl;
  
  ///< Expected value of perf counte
  uint64_t  expectedResult;

  ///< Value of perf counter expected
  uint64_t cntrResult;
  
  ///< Type of validation upon completion of dispatch
  CntrValCnfType cnfType;

  CntrInfo(uint32_t cntrId, char* cntrName, void* cntrHndl,
           uint32_t blkId, void* blkHndl,
           uint64_t expResult, uint64_t result, CntrValCnfType cnfType) {
    this->cntrId = cntrId;
    this->cntrHndl = cntrHndl;
    this->blkId = blkId;
    this->blkHndl = blkHndl;
    this->expectedResult = expResult;
    this->cntrResult = result;
    this->cnfType = cnfType;
    memcpy(this->cntrName, cntrName, strlen(cntrName));
  }
  
} CntrInfo;

class RocrPerfCntrApp {

 public:

  // Constructor of the class. Will initialize the list of perf counters
  // that will be used to program the device
  RocrPerfCntrApp( );

  // Destructor of the class
  ~RocrPerfCntrApp( );

  // Return the number of perf counters
  uint32_t GetNumPerfCntrs();

  // Return the handle of perf counter at specified index
  CntrInfo* GetPerfCntr(uint32_t idx);

  // Print the list of perf counters
  bool PrintCntrs();

  // Initialize the list of perf counters
  hsa_status_t Init(hsa_agent_t agent);

  // Register Pre and Post dispatch callbacks
  void RegisterCallbacks(hsa_queue_t *queue);

  // Wait for perf counter collection to complete
  hsa_status_t Wait();

  // Validate perf counter values
  hsa_status_t Validate();
 
 private:
 
  // Number of queues to create
  std::vector<CntrInfo *> cntrList_;

  // Handle of Perf Cntr Manager
  hsa_ext_tools_pmu_t perfMgr_;
};

#endif  //  ROCR_PERF_CNTR_APP_H_


================================================
FILE: samples/common/hsa_rsrc_factory.cpp
================================================
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <cassert>

#include <iostream>
#include <vector>
#include <string>

#include "hsa.h"
#include "hsa_rsrc_factory.hpp"
#include "hsa_ext_finalize.h"
#include "tools/inc/hsa_ext_profiler.h"
#include "HSAILAmdExt.h"

#include "common.hpp"

using namespace std;

// Provide access to command line arguments passed in by user
uint32_t hsa_cmdline_arg_cnt;
char **hsa_cmdline_arg_list;

// Callback function to find and bind kernarg region of an agent
static hsa_status_t find_memregions(hsa_region_t region, void *data) {

  hsa_region_global_flag_t flags;
  hsa_region_segment_t segment_id;

  hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
  if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
    return HSA_STATUS_SUCCESS;
  }

  AgentInfo *agent_info = (AgentInfo *)data;
  hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
  if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
    agent_info->coarse_region = region;
  }

  if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
    agent_info->kernarg_region = region;
  }

  return HSA_STATUS_SUCCESS;
}

// Callback function to get the number of agents
static hsa_status_t get_hsa_agents(hsa_agent_t agent, void *data) {

  // Copy handle of agent and increment number of agents reported
  HsaRsrcFactory *rsrcFactory = reinterpret_cast<HsaRsrcFactory *>(data);

  // Determine if device is a Gpu agent
  hsa_status_t status;
  hsa_device_type_t type;
  status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
  if (type == HSA_DEVICE_TYPE_DSP) {
    return HSA_STATUS_SUCCESS;
  }

  if (type == HSA_DEVICE_TYPE_CPU) {
    AgentInfo *agent_info = reinterpret_cast<AgentInfo *>(malloc(sizeof(AgentInfo)));
    agent_info->dev_id = agent;
    agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
    rsrcFactory->AddAgentInfo(agent_info, false);
    return HSA_STATUS_SUCCESS;
  }
  
  // Device is a Gpu agent, build an instance of AgentInfo
  AgentInfo *agent_info = reinterpret_cast<AgentInfo *>(malloc(sizeof(AgentInfo)));
  agent_info->dev_id = agent;
  agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
  hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
  agent_info->max_wave_size = 0;
  hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
  agent_info->max_queue_size = 0;
  hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
  agent_info->profile = hsa_profile_t(108);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);

  // Initialize memory regions to zero
  agent_info->kernarg_region.handle = 0;
  agent_info->coarse_region.handle = 0;
  
  // Find and Bind Memory regions of the Gpu agent
  hsa_agent_iterate_regions(agent, find_memregions, agent_info);

  // Save the instance of AgentInfo
  rsrcFactory->AddAgentInfo(agent_info, true);
  return HSA_STATUS_SUCCESS;
}

// Definitions for Static Data members of the class
char* HsaRsrcFactory::brig_path_ = NULL;
uint32_t HsaRsrcFactory::num_cus_;
uint32_t HsaRsrcFactory::num_waves_;
uint32_t HsaRsrcFactory::num_workitems_;
uint32_t HsaRsrcFactory::kernel_loop_count_;
bool HsaRsrcFactory::print_debug_info_ = false;

char* HsaRsrcFactory::num_cus_key_ = "num_cus";
char* HsaRsrcFactory::brig_path_key_ = "brig_path";
char* HsaRsrcFactory::num_waves_key_ = "waves_per_cu";
char* HsaRsrcFactory::num_workitems_key_ = "workitems_per_wave";
char* HsaRsrcFactory::print_debug_key_ = "print_debug";
char* HsaRsrcFactory::kernel_loop_count_key_ = "kernel_loop_count";

// Constructor of the class
HsaRsrcFactory::HsaRsrcFactory( ) {

  // Initialize the Hsa Runtime
  hsa_status_t status = hsa_init();
  assert(status == HSA_STATUS_SUCCESS);

  // Discover the set of Gpu devices available on the platform
  status = hsa_iterate_agents(get_hsa_agents, this);
  check("Error Calling hsa_iterate_agents", status);

  // Process command line arguments
  ProcessCmdline( );
}

// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory( ) {

}

// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents( ) {
  return uint32_t(gpu_list_.size());
}

// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfCpuAgents( ) {
  return uint32_t(cpu_list_.size());
}

// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info) {

  // Determine if request is valid
  uint32_t size = uint32_t(gpu_list_.size());
  if (idx >= size) {
    return false;
  }

  // Copy AgentInfo from specified index
  *agent_info = gpu_list_[idx];
  return true;
}

// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, AgentInfo **agent_info) {

  // Determine if request is valid
  uint32_t size = uint32_t(cpu_list_.size());
  if (idx >= size) {
    return false;
  }

  // Copy AgentInfo from specified index
  *agent_info = cpu_list_[idx];
  return true;
}

// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(AgentInfo *agent_info,
                                 uint32_t num_pkts, hsa_queue_t **queue) {

  hsa_status_t status;

  // Code to create a Profile Queue object
  if (num_pkts == UINT32_MAX) {
    status = hsa_ext_tools_queue_create_profiled(agent_info->dev_id,
                                  512, HSA_QUEUE_TYPE_SINGLE, NULL,
                                  NULL, UINT32_MAX, UINT32_MAX, queue);
    return (status == HSA_STATUS_SUCCESS);
  }

  status = hsa_queue_create(agent_info->dev_id, num_pkts,
                            HSA_QUEUE_TYPE_MULTI, NULL, NULL,
                            UINT32_MAX, UINT32_MAX, queue);
  return (status == HSA_STATUS_SUCCESS);
}

// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t *signal) {

  hsa_status_t status;
  status = hsa_signal_create(value, 0, NULL, signal);
  return (status == HSA_STATUS_SUCCESS);
}

// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateLocalMemory(AgentInfo *agent_info, size_t size) {

  hsa_status_t status;
  uint8_t *buffer = NULL;

  // Allocate in local memory only if it is available
  if (agent_info->coarse_region.handle != 0) {
    std::cout << "Allocating in local memory" << std::endl;
    status = hsa_memory_allocate(agent_info->coarse_region, size, (void **)&buffer);
    if (status == HSA_STATUS_SUCCESS) {
      status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
      return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
    }
    return NULL;
  }

  // Allocate in system memory if local memory is not available
  std::cout << "Allocating in system memory" << std::endl;
  status = hsa_memory_allocate(agent_info->kernarg_region, size, (void **)&buffer);
  return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}

// Allocate memory tp pass kernel parameters.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateSysMemory(AgentInfo *agent_info, size_t size) {

  hsa_status_t status;
  uint8_t *buffer = NULL;
  status = hsa_memory_allocate(agent_info->kernarg_region, size, (void **)&buffer);
  return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}

bool HsaRsrcFactory::TransferData(uint8_t *dest_buff, uint8_t *src_buff,
                                  uint32_t length, bool host_to_dev) {

  hsa_status_t status;
  status = hsa_memory_copy(dest_buff, src_buff, length);
  return (status == HSA_STATUS_SUCCESS);

}

// Fake method for compilation steps only
uint8_t* HsaRsrcFactory::AllocateMemory(AgentInfo *agent_info, size_t size) {

  hsa_status_t status;
  uint8_t *buffer = NULL;
  status = hsa_memory_allocate(agent_info->kernarg_region, size, (void **)&buffer);
  return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}

// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::LoadAndFinalize(AgentInfo *agent_info,
                                     const char *brig_path, char *kernel_name,
                                     hsa_executable_symbol_t *code_desc) {

  hsa_status_t status;
  // Load BRIG, encapsulated in an ELF container, into a BRIG module.
  /*
  status_t build_err;
  hsa_ext_brig_module_t *brig_obj;
  build_err = (status_t)create_brig_module_from_brig_file(brig_path, &brig_obj);
  check_build("Error in creating the brig module from brig file", build_err);

  // Determine the Brig module has the kernel symbol
  hsa_status_t status;
  hsa_ext_brig_code_section_offset32_t kernel_symbol;
  status = hsa_find_symbol_offset(brig_obj, kernel_name, &kernel_symbol);
  check("Error in Finding the Symbol Offset for the Kernel", status);
  */

  amd::hsail::registerExtensions();

  // Copy handle of Brig object
  hsa_ext_module_t brig_module_v3;
  if (!tool.assembleFromFile(brig_path)) {
    std::cout << tool.output();
    return false;
  }
  brig_module_v3 = tool.brigModule();
  
  // Create hsail program.
  hsa_ext_program_t hsailProgram;
  status = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE,
                                  agent_info->profile,
                                  HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO,
                                  NULL, &hsailProgram);
  check("Error in creating program object", status);

  // Add hsail module.
  status = hsa_ext_program_add_module(hsailProgram, brig_module_v3);
  check("Error in adding module to program object", status);

  // Finalize hsail program.
  hsa_isa_t isa = {0};
  status = hsa_agent_get_info(agent_info->dev_id, HSA_AGENT_INFO_ISA, &isa);
  check("Error in getting Id of Isa supported by agent", status);

  hsa_ext_control_directives_t control_directives;
  memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t));

  hsa_code_object_t code_object;
  status = hsa_ext_program_finalize(hsailProgram,
                                           isa,
                                           0,
                                           control_directives,
                                           NULL, //"-g -O0 -dump-isa",
                                           HSA_CODE_OBJECT_TYPE_PROGRAM,
                                           &code_object);
  check("Error in finalizing program object", status);

  //status = hsa_ext_program_destroy(hsailProgram);
  //check("Error in destroying program object", status);

  // Create executable.
  hsa_executable_t hsaExecutable;
  status = hsa_executable_create(agent_info->profile,
                                 HSA_EXECUTABLE_STATE_UNFROZEN,
                                 "", &hsaExecutable);
  check("Error in creating executable object", status);

  // Load code object.
  status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, "");
  check("Error in loading executable object", status);

  // Freeze executable.
  status = hsa_executable_freeze(hsaExecutable, "");
  check("Error in freezing executable object", status);

  // Get symbol handle.
  hsa_executable_symbol_t kernelSymbol;
  status = hsa_executable_get_symbol(hsaExecutable, NULL,
                             kernel_name, agent_info->dev_id, 0, &kernelSymbol);
  
  // Update output parameter
  *code_desc = kernelSymbol;
  return true;

  /**

  // Create Hsa Program
  hsa_ext_program_handle_t program;
  status = hsa_ext_program_create(&agent_info->dev_id, 1,
                                  HSA_EXT_BRIG_MACHINE_LARGE,
                                  HSA_EXT_BRIG_PROFILE_FULL, &program);
  check("Error in Creating Hsa Program", status);

  // Add the BRIG module to hsa program.
  hsa_ext_brig_module_handle_t brig_handle;
  status = hsa_ext_add_module(program, brig_obj, &brig_handle);
  check("Error in Adding Brig Module to the Program", status);

  // Construct finalization request list.
  hsa_ext_finalization_request_t finalize_request;
  finalize_request.module = brig_handle;
  finalize_request.symbol = kernel_symbol;
  finalize_request.program_call_convention = 0;

  // Finalize the Hsa Program.
  status = hsa_ext_finalize_program(program, agent_info->dev_id,
                                    1, &finalize_request, NULL, NULL, 0, NULL, 0);
  check("Error in Finalizing the Hsa Program", status);

  // Destroy the brig module. The program was successfully created the kernel
  // symbol was found and the program was finalized, so it is no longer needed.
  destroy_brig_module(brig_obj);

  // Get the hsa code descriptor address.
  status = hsa_ext_query_kernel_descriptor_address(program, brig_handle, kernel_symbol, code_desc);
  check("Error Querying the Kernel Descriptor Address", status);

  return true;
  **/
}

// Add an instance of AgentInfo representing a Hsa Gpu agent
void HsaRsrcFactory::AddAgentInfo(AgentInfo *agent_info, bool gpu) {
  
  // Add input to Gpu list
  if (gpu) {
    gpu_list_.push_back(agent_info);
    return;
  }

  // Add input to Cpu list
  cpu_list_.push_back(agent_info);
}

// Print the various fields of Hsa Gpu Agents
bool HsaRsrcFactory::PrintGpuAgents( ) {

  AgentInfo *agent_info;
  int size = uint32_t(gpu_list_.size());
  for (int idx = 0; idx < size; idx++) {
    agent_info = gpu_list_[idx];
    std::cout << std::endl;
    std::cout << "Hsa Gpu Agent Id: " << agent_info->dev_id.handle << std::endl;
    std::cout << "Hsa Gpu Agent Name: " << agent_info->name << std::endl;
    std::cout << "Hsa Gpu Agent Max Wave Size: " << agent_info->max_wave_size << std::endl;
    std::cout << "Hsa Gpu Agent Max Queue Size: " << agent_info->max_queue_size << std::endl;
    std::cout << "Hsa Gpu Agent Kernarg Region Id: " << agent_info->coarse_region.handle << std::endl;
    std::cout << std::endl;
  }
  return true;
}

// Returns the file path where brig files is located. Value is
// available only after an instance has been built.
char* HsaRsrcFactory::GetBrigPath( ) {
  return HsaRsrcFactory::brig_path_;
}

// Returns the number of compute units present on platform
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetNumOfCUs( ) {
  return HsaRsrcFactory::num_cus_;
}

// Returns the maximum number of waves that can be launched
// per compute unit. The actual number that can be launched
// is affected by resource availability
//
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetNumOfWavesPerCU( ) {
  return HsaRsrcFactory::num_waves_;
}

// Returns the number of work-items that can execute per wave
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetNumOfWorkItemsPerWave( ) {
  return HsaRsrcFactory::num_workitems_;
}

// Returns the number of times kernel loop body should execute.
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetKernelLoopCount() {
  return HsaRsrcFactory::kernel_loop_count_;
}

// Returns boolean flag to indicate if debug info should be printed
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetPrintDebugInfo() {
  return HsaRsrcFactory::print_debug_info_;
}

// Process command line arguments. The method will capture
// various user command line parameters for tests to use
void HsaRsrcFactory::ProcessCmdline( ) {
 
  // Command line arguments are given
  uint32_t idx;
  uint32_t arg_idx;
  for (idx = 1; idx < hsa_cmdline_arg_cnt; idx += 2) {
    arg_idx = GetArgIndex((char *)hsa_cmdline_arg_list[idx]);
    switch(arg_idx) {
      case 0:
        HsaRsrcFactory::brig_path_ = hsa_cmdline_arg_list[idx + 1];
        break;
      case 1:
        HsaRsrcFactory::num_cus_ = atoi(hsa_cmdline_arg_list[idx + 1]);
        break;
      case 2:
        HsaRsrcFactory::num_waves_ = atoi(hsa_cmdline_arg_list[idx + 1]);
        break;
      case 3:
        HsaRsrcFactory::num_workitems_ = atoi(hsa_cmdline_arg_list[idx + 1]);
        break;
      case 4:
        HsaRsrcFactory::kernel_loop_count_ = atoi(hsa_cmdline_arg_list[idx + 1]);
        break;
      case 5:
        HsaRsrcFactory::print_debug_info_ = true;
        break;
    }
  }

}

uint32_t HsaRsrcFactory::GetArgIndex(char *arg_value ) {

  // Map Brig file path to index zero
  if (!strcmp(HsaRsrcFactory::brig_path_key_, arg_value)) {
      return 0;
  }

  // Map Number of Compute Units to index one
  if (!strcmp(HsaRsrcFactory::num_cus_key_, arg_value)) {
      return 1;
  }

  // Map Number of Waves per CU to index two
  if (!strcmp(HsaRsrcFactory::num_waves_key_, arg_value)) {
      return 2;
  }

  // Map Number of Workitems per Wave to index three
  if (!strcmp(HsaRsrcFactory::num_workitems_key_, arg_value)) {
      return 3;
  }

  // Map Kernel Loop Count to index four
  if (!strcmp(HsaRsrcFactory::kernel_loop_count_key_, arg_value)) {
      return 4;
  }

  // Map print debug info parameter
  if (!strcmp(HsaRsrcFactory::print_debug_key_, arg_value)) {
      return 5;
  }
  
  return 108;

}

void HsaRsrcFactory::PrintHelpMsg( ) {

  std::cout << "Key for passing Brig filepath: " << HsaRsrcFactory::brig_path_key_ << std::endl;
  std::cout << "Key for passing Number of Compute Units: " << HsaRsrcFactory::num_cus_key_ << std::endl;
  std::cout << "Key for passing Number of Waves per CU: " << HsaRsrcFactory::num_waves_key_ << std::endl;
  std::cout << "Key for passing Number of Workitems per Wave: " << HsaRsrcFactory::num_workitems_key_ << std::endl;
  std::cout << "Key for passing Kernel Loop Count: " << HsaRsrcFactory::kernel_loop_count_key_ << std::endl;

}


================================================
FILE: samples/common/hsa_rsrc_factory.hpp
================================================
#ifndef HSA_RSRC_FACTORY_H_
#define HSA_RSRC_FACTORY_H_

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

#include <iostream>
#include <vector>
#include <string>

#include "hsatimer.h"
#include "hsa.h"
#include "hsa_ext_finalize.h"
#include "HSAILTool.h"


#define HSA_ARGUMENT_ALIGN_BYTES 16
#define HSA_QUEUE_ALIGN_BYTES 64
#define HSA_PACKET_ALIGN_BYTES 64

#define check(msg, status) \
if (status != HSA_STATUS_SUCCESS) { \
    const char *emsg = 0; \
    hsa_status_string(status, &emsg); \
    printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
    exit(1); \
}

#define check_build(msg, status) \
if (status != STATUS_SUCCESS) { \
    printf("%s\n", msg); \
    exit(1); \
}

// Define required BRIG data structures.
typedef uint32_t BrigCodeOffset32_t;
typedef uint32_t BrigDataOffset32_t;
typedef uint16_t BrigKinds16_t;
typedef uint8_t BrigLinkage8_t;
typedef uint8_t BrigExecutableModifier8_t;
typedef BrigDataOffset32_t BrigDataOffsetString32_t;

/*
enum BrigKinds {
  BRIG_KIND_NONE = 0x0000,
  BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
  BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
};

typedef struct BrigBase BrigBase;
struct BrigBase {
  uint16_t byteCount;
  BrigKinds16_t kind;
};

typedef struct BrigExecutableModifier BrigExecutableModifier;
struct BrigExecutableModifier {
  BrigExecutableModifier8_t allBits;
};

typedef struct BrigDirectiveExecutable BrigDirectiveExecutable;
struct BrigDirectiveExecutable {
  uint16_t byteCount;
  BrigKinds16_t kind;
  BrigDataOffsetString32_t name;
  uint16_t outArgCount;
  uint16_t inArgCount;
  BrigCodeOffset32_t firstInArg;
  BrigCodeOffset32_t firstCodeBlockEntry;
  BrigCodeOffset32_t nextModuleEntry;
  uint32_t codeBlockEntryCount;
  BrigExecutableModifier modifier;
  BrigLinkage8_t linkage;
  uint16_t reserved;
};

typedef struct BrigData BrigData;
struct BrigData {
  uint32_t byteCount;
  uint8_t bytes[1];
};
*/

// Provide access to command line arguments passed in by user
extern uint32_t hsa_cmdline_arg_cnt;
extern char **hsa_cmdline_arg_list;

// Encapsulates information about a Hsa Agent such as its
// handle, name, max queue size, max wavefront size, etc.
typedef struct {

  // Handle of Agent
  hsa_agent_t dev_id;
  
  // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
  uint32_t dev_type;

  // Name of Agent whose length is less than 64
  char name[64];

  // Max size of Wavefront size
  uint32_t max_wave_size;

  // Max size of Queue buffer
  uint32_t max_queue_size;

  // Hsail profile supported by agent
  hsa_profile_t profile;

  // Memory region supporting kernel parameters
  hsa_region_t coarse_region;

  // Memory region supporting kernel arguments
  hsa_region_t kernarg_region;

} AgentInfo;

class HsaRsrcFactory {

 public:

  // Constructor of the class. Will initialize the Hsa Runtime and
  // query the system topology to get the list of Cpu and Gpu devices
  HsaRsrcFactory( );

  // Destructor of the class
  ~HsaRsrcFactory( );

  // Get the count of Hsa Gpu Agents available on the platform
  //
  // @return uint32_t Number of Gpu agents on platform
  //
  uint32_t GetCountOfGpuAgents( );

  // Get the count of Hsa Cpu Agents available on the platform
  //
  // @return uint32_t Number of Cpu agents on platform
  //
  uint32_t GetCountOfCpuAgents( );

  // Get the AgentInfo handle of a Gpu device
  //
  // @param idx Gpu Agent at specified index
  //
  // @param agent_info Output parameter updated with AgentInfo
  //
  // @return bool true if successful, false otherwise
  //
  bool GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info);

  // Get the AgentInfo handle of a Cpu device
  //
  // @param idx Cpu Agent at specified index
  //
  // @param agent_info Output parameter updated with AgentInfo
  //
  // @return bool true if successful, false otherwise
  //
  bool GetCpuAgentInfo(uint32_t idx, AgentInfo **agent_info);

  // Create a Queue object and return its handle. The queue object is expected
  // to support user requested number of Aql dispatch packets.
  //
  // @param agent_info Gpu Agent on which to create a queue object
  //
  // @param num_Pkts Number of packets to be held by queue
  //
  // @param queue Output parameter updated with handle of queue object
  //
  // @return bool true if successful, false otherwise
  //
  bool CreateQueue(AgentInfo *agent_info,
                   uint32_t num_pkts, hsa_queue_t **queue);

  // Create a Signal object and return its handle.
  //
  // @param value Initial value of signal object
  //
  // @param signal Output parameter updated with handle of signal object
  //
  // @return bool true if successful, false otherwise
  //
  bool CreateSignal(uint32_t value, hsa_signal_t *signal);

  // Allocate memory for use by a kernel of specified size in specified
  // agent's memory region. Currently supports Global segment whose Kernarg
  // flag set.
  //
  // @param agent_info Agent from whose memory region to allocate
  //
  // @param size Size of memory in terms of bytes
  //
  // @return uint8_t* Pointer to buffer, null if allocation fails.
  //
  uint8_t* AllocateLocalMemory(AgentInfo *agent_info, size_t size);
  uint8_t* AllocateMemory(AgentInfo *agent_info, size_t size);

  bool TransferData(uint8_t *dest_buff, uint8_t *src_buff,
                    uint32_t length, bool host_to_dev);

  // Allocate memory tp pass kernel parameters.
  //
  // @param agent_info Agent from whose memory region to allocate
  //
  // @param size Size of memory in terms of bytes
  //
  // @return uint8_t* Pointer to buffer, null if allocation fails.
  //
  uint8_t* AllocateSysMemory(AgentInfo *agent_info, size_t size);

  // Loads an Assembled Brig file and Finalizes it into Device Isa
  //
  // @param agent_info Gpu device for which to finalize
  //
  // @param brig_path File path of the Assembled Brig file
  //
  // @param kernel_name Name of the kernel to finalize
  //
  // @param code_desc Handle of finalized Code Descriptor that could
  // be used to submit for execution
  //
  // @return bool true if successful, false otherwise
  //
  bool LoadAndFinalize(AgentInfo *agent_info,
                       const char *brig_path, char *kernel_name,
                       hsa_executable_symbol_t *code_desc);

  // Add an instance of AgentInfo representing a Hsa Gpu agent
  void AddAgentInfo(AgentInfo *agent_info, bool gpu);

  // Returns the file path where brig files is located
  static char* GetBrigPath( );

  // Returns the number of compute units present on platform
  static uint32_t GetNumOfCUs( );

  // Returns the maximum number of waves that can be launched
  // per compute unit. The actual number that can be launched
  // is affected by resource availability
  static uint32_t GetNumOfWavesPerCU( );

  // Returns the number of work-items that can execute per wave
  static uint32_t GetNumOfWorkItemsPerWave( );
  
  // Returns the number of times kernel loop body should execute.
  static uint32_t GetKernelLoopCount();
  
  // Returns boolean flag to indicate if debug info should be printed
  static uint32_t GetPrintDebugInfo();

 private:
 
  // Number of queues to create
  uint32_t num_queues_;

  // Used to maintain a list of Hsa Queue handles
  std::vector<hsa_queue_t *> queue_list_;
 
  // Number of Signals to create
  uint32_t num_signals_;
 
  // Used to maintain a list of Hsa Signal handles
  std::vector<hsa_signal_t *> signal_list_;
 
  // Number of agents reported by platform
  uint32_t num_agents_;
 
  // Used to maintain a list of Hsa Gpu Agent Info
  std::vector<AgentInfo *> gpu_list_;
 
  // Used to maintain a list of Hsa Cpu Agent Info
  std::vector<AgentInfo *> cpu_list_;

  // Records the file path where Brig file is located.
  // Value is available only after an instance has been built.
  static char* brig_path_;
  static char* brig_path_key_;

  // Records the number of Compute units present on system.
  // Value is available only after an instance has been built.
  static uint32_t num_cus_;
  static char* num_cus_key_;

  // Records the number of waves that can be launched per Compute unit
  // Value is available only after an instance has been built.
  static uint32_t num_waves_;
  static char* num_waves_key_;

  // Records the number of work-items that can be packed into a wave
  // Value is available only after an instance has been built.
  static uint32_t num_workitems_;
  static char* num_workitems_key_;

  // Records the number of times kernel loop body should run. Value
  // is available only after an instance has been built.
  static uint32_t kernel_loop_count_;
  static char* kernel_loop_count_key_;

  // Records the number of times kernel loop body should run. Value
  // is available only after an instance has been built.
  static bool print_debug_info_;
  static char* print_debug_key_;

  // Print the various fields of Hsa Gpu Agents
  bool PrintGpuAgents( );
  
  // Process command line arguments. The method will capture
  // various user command line parameters for tests to use
  static void ProcessCmdline( );
  
  // Prints the help banner on user arg keys
  static void PrintHelpMsg( );

  // Maps an index for the user argument
  static uint32_t GetArgIndex(char *arg_value);

  HSAIL_ASM::Tool tool;
};

#endif  //  HSA_RSRC_FACTORY_H_


================================================
FILE: samples/common/hsa_test.cpp
================================================
#include "hsa_test.h"

#include <atomic>
#include <iostream>

#define PRINT_ATTRIBUTE(attribute, value, metric) \
  std::cout << #attribute " = " << value << " " << metric << std::endl;

static size_t ToMB(size_t size) { return (size / (1024 * 1024)); }

HsaTest::HsaTest(const char* test_name) : test_name_(test_name) {
  std::cout << "Running " << test_name_ << std::endl;
  std::cout << "------------------------------------------------\n";
}

HsaTest::~HsaTest() {}

void HsaTest::Init() {
  hsa_status_t stat = hsa_init();
  if (stat != HSA_STATUS_SUCCESS) {
    std::cerr << "hsa_init fail with status " << stat << std::endl;
  }

  stat = hsa_iterate_agents(IterateAgents, (void*)this);
}

void HsaTest::Cleanup() { hsa_shut_down(); }

hsa_status_t HsaTest::IterateAgents(hsa_agent_t agent, void* data) {
  HsaTest* hsatest = (HsaTest*)data;

  AgentProps prop(agent);

  if (prop.device_type == HSA_DEVICE_TYPE_CPU) {
    hsatest->cpus_.push_back(agent);
  } else if (prop.device_type == HSA_DEVICE_TYPE_GPU) {
    hsatest->gpus_.push_back(agent);
  }

  hsa_amd_memory_pool_t pools[3] = {{0}, {0}, {0}};
  hsa_status_t stat =
      hsa_amd_agent_iterate_memory_pools(agent, IteratePools, pools);

  hsatest->global_fine_[agent.handle] = pools[0];
  hsatest->global_coarse_[agent.handle] = pools[1];
  hsatest->group_[agent.handle] = pools[2];

  return HSA_STATUS_SUCCESS;
}

hsa_status_t HsaTest::IteratePools(hsa_amd_memory_pool_t pool, void* data) {
  hsa_amd_memory_pool_t* pools = (hsa_amd_memory_pool_t*)data;

  PoolProps prop(pool);

  if (prop.segment == HSA_AMD_SEGMENT_GLOBAL) {
    if (prop.global_flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) {
      pools[0].handle = pool.handle;
    } else {
      pools[1].handle = pool.handle;
    }
  } else if (prop.segment == HSA_AMD_SEGMENT_GROUP) {
    pools[2].handle = pool.handle;
  }

  return HSA_STATUS_SUCCESS;
}

HsaTest::AgentProps::AgentProps(hsa_agent_t agent) {
  if (agent.handle == 0) {
    return;
  }

  hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, (void*)name);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, (void*)vendor_name);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, (void*)&feature);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_MACHINE_MODEL,
                     (void*)&machine_model);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, (void*)&profile);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE,
                     (void*)&default_float_rounding_mode);
  hsa_agent_get_info(agent,
                     HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES,
                     (void*)&base_profile_float_rounding_mode);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION,
                     (void*)&fast_f16_operation);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE,
                     (void*)&wavefront_size);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
                     (void*)workgroup_max_dim);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE,
                     (void*)&workgroup_max_size);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, (void*)&grid_max_dim);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE,
                     (void*)&grid_max_size);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE,
                     (void*)&fbarrier_max_size);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, (void*)&queue_max);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE,
                     (void*)&queue_min_size);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
                     (void*)&queue_max_size);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, (void*)&queue_type);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, (void*)&node);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, (void*)&device_type);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, (void*)cache_size);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, (void*)&isa);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_EXTENSIONS, (void*)extensions);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_VERSION_MAJOR,
                     (void*)&version_major);
  hsa_agent_get_info(agent, HSA_AGENT_INFO_VERSION_MINOR,
                     (void*)&version_minor);
}

HsaTest::PoolProps::PoolProps(hsa_amd_memory_pool_t pool) {
  if (pool.handle == 0) {
    return;
  }

  hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                               (void*)&segment);
  hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
                               (void*)&global_flag);
  hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
                               (void*)&size);
  hsa_amd_memory_pool_get_info(pool,
                               HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
                               (void*)&alloc_allowed);
  hsa_amd_memory_pool_get_info(pool,
                               HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
                               (void*)&alloc_granule);
  hsa_amd_memory_pool_get_info(pool,
                               HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
                               (void*)&alloc_alignment);
  hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL,
                               (void*)&all_accessible);
}

HsaTest::Kernel::Kernel(hsa_agent_t agent, std::string hsail_text)
    : agent_(agent), hsail_file_(hsail_text) {
  program_.handle = 0;
  code_object_.handle = 0;
  executable_.handle = 0;

  AgentProps prop(agent_);
  profile_ = prop.profile;

  Initialize();
}

HsaTest::Kernel::~Kernel() { Cleanup(); }

uint64_t HsaTest::Kernel::GetCodeHandle(const char* kernel_name) {
  kernel_symbol_ = {0};
  if (HSA_STATUS_SUCCESS != hsa_executable_get_symbol(executable_, NULL,
                                                      kernel_name, agent_, 0,
                                                      &kernel_symbol_)) {
    return 0;
  }

  uint64_t code_handle = 0;
  if (HSA_STATUS_SUCCESS !=
      hsa_executable_symbol_get_info(kernel_symbol_,
                                     HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
                                     &code_handle)) {
    return 0;
  }

  return code_handle;
}

hsa_status_t HsaTest::Kernel::GetScratchSize(uint32_t* size) {

  hsa_status_t status;
  status = hsa_executable_symbol_get_info(kernel_symbol_,
               HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, size);
  return status;
}

void HsaTest::Kernel::Initialize() {
  CreateProgramFromHsailFile();
  CreateCodeObjectAndExecutable();
}

void HsaTest::Kernel::Cleanup() {
  if (executable_.handle != 0) {
    hsa_executable_destroy(executable_);
    executable_.handle = 0;
  }

  if (code_object_.handle != 0) {
    hsa_code_object_destroy(code_object_);
    code_object_.handle = 0;
  }

  if (program_.handle != 0) {
    hsa_ext_program_destroy(program_);
    program_.handle = 0;
  }
}

bool HsaTest::Kernel::CreateProgramFromHsailFile() {
  if (HSA_STATUS_SUCCESS !=
      hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, profile_,
                             HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, NULL,
                             &program_)) {
    return false;
  }

  if (!tool_.assembleFromFile(hsail_file_.c_str())) {
    return false;
  }

  hsa_ext_module_t module = tool_.brigModule();
  if (HSA_STATUS_SUCCESS != hsa_ext_program_add_module(program_, module)) {
    return false;
  }

  return true;
}

bool HsaTest::Kernel::CreateCodeObjectAndExecutable() {
  hsa_isa_t isa = {0};
  if (HSA_STATUS_SUCCESS !=
      hsa_agent_get_info(agent_, HSA_AGENT_INFO_ISA, &isa)) {
    return false;
  }

  hsa_ext_control_directives_t control_directives = {0};
  if (HSA_STATUS_SUCCESS !=
      hsa_ext_program_finalize(program_, isa, 0, control_directives, "",
                               HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object_)) {
    return false;
  }

  if (HSA_STATUS_SUCCESS != hsa_executable_create(profile_,
                                                  HSA_EXECUTABLE_STATE_UNFROZEN,
                                                  "", &executable_)) {
    return false;
  }

  if (HSA_STATUS_SUCCESS !=
      hsa_executable_load_code_object(executable_, agent_, code_object_, "")) {
    return false;
  }

  if (HSA_STATUS_SUCCESS != hsa_executable_freeze(executable_, "")) {
    return false;
  }

  return true;
}

void HsaTest::GetGpuPeer(hsa_agent_t master,
                         std::vector<hsa_agent_t>& gpu_peers) {
  AgentProps master_prop(master);
  for (hsa_agent_t agent : gpus_) {
    AgentProps agent_prop(agent);
    if (master.handle == agent.handle ||
        agent_prop.device_type != HSA_DEVICE_TYPE_GPU) {
      continue;
    }

    hsa_amd_memory_pool_t peer_local_pool = global_coarse_[agent.handle];

    hsa_amd_memory_pool_access_t access =
        HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
    if (HSA_STATUS_SUCCESS == hsa_amd_agent_memory_pool_get_info(
                                  master, peer_local_pool,
                                  HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
                                  (void*)&access) &&
        access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
      gpu_peers.push_back(agent);
    }
  }
}

void* HsaTest::AllocateSystemMemory(bool fine_grain, size_t size) {
  if (cpus_.size() == 0) {
    return NULL;
  }

  hsa_amd_memory_pool_t pool = (fine_grain) ? global_fine_[cpus_[0].handle]
                                            : global_coarse_[cpus_[0].handle];

  void* ptr = NULL;
  if (HSA_STATUS_SUCCESS != hsa_amd_memory_pool_allocate(pool, size, 0, &ptr)) {
    return NULL;
  }

  return ptr;
}

void* HsaTest::AllocateLocalMemory(hsa_agent_t agent, size_t size) {
  if (gpus_.size() == 0) {
    return NULL;
  }

  hsa_amd_memory_pool_t pool = global_coarse_[agent.handle];

  void* ptr = NULL;
  if (HSA_STATUS_SUCCESS != hsa_amd_memory_pool_allocate(pool, size, 0, &ptr)) {
    return NULL;
  }

  return ptr;
}

void HsaTest::FreeMemory(void* ptr) { hsa_amd_memory_pool_free(ptr); }

void HsaTest::LaunchPacket(hsa_queue_t& queue, hsa_packet_type_t type,
                           void* packet) {
  uint32_t queue_bitmask = queue.size - 1;
  const uint64_t write_index = hsa_queue_add_write_index_acq_rel(&queue, 1);

  static const uint16_t kInvalidPacketHeader = HSA_PACKET_TYPE_INVALID;

  if (type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
    hsa_kernel_dispatch_packet_t* dispatch_packet =
        reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet);
    const uint16_t temp_header = dispatch_packet->header;
    dispatch_packet->header = kInvalidPacketHeader;

    // Populate queue buffer.
    hsa_kernel_dispatch_packet_t* queue_buffer =
        reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue.base_address);
    queue_buffer[write_index & queue_bitmask] = *dispatch_packet;

    // Enable packet.
    std::atomic_thread_fence(std::memory_order_release);
    queue_buffer[write_index & queue_bitmask].header = temp_header;
    dispatch_packet->header = temp_header;
  } else if (type == HSA_PACKET_TYPE_BARRIER_AND) {
    hsa_barrier_and_packet_t* barrier_and_packet =
        reinterpret_cast<hsa_barrier_and_packet_t*>(packet);
    const uint16_t temp_header = barrier_and_packet->header;
    barrier_and_packet->header = kInvalidPacketHeader;

    // Populate queue buffer.
    hsa_barrier_and_packet_t* queue_buffer =
        reinterpret_cast<hsa_barrier_and_packet_t*>(queue.base_address);
    queue_buffer[write_index & queue_bitmask] = *barrier_and_packet;

    // Enable packet.
    std::atomic_thread_fence(std::memory_order_release);
    queue_buffer[write_index & queue_bitmask].header = temp_header;
    barrier_and_packet->header = temp_header;
  } else if (type == HSA_PACKET_TYPE_BARRIER_OR) {
    hsa_barrier_or_packet_t* barrier_or_packet =
        reinterpret_cast<hsa_barrier_or_packet_t*>(packet);
    const uint16_t temp_header = barrier_or_packet->header;
    barrier_or_packet->header = kInvalidPacketHeader;

    // Populate queue buffer.
    hsa_barrier_or_packet_t* queue_buffer =
        reinterpret_cast<hsa_barrier_or_packet_t*>(queue.base_address);
    queue_buffer[write_index & queue_bitmask] = *barrier_or_packet;

    // Enable packet.
    std::atomic_thread_fence(std::memory_order_release);
    queue_buffer[write_index & queue_bitmask].header = temp_header;
    barrier_or_packet->header = temp_header;
  }

  hsa_signal_store_release(queue.doorbell_signal, write_index);
}

void HsaTest::PrintAgentInfo(AgentProps& prop) {
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_NAME, prop.name, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_VENDOR_NAME, prop.vendor_name, "");

  const char* feature_strings[] = {"NONE", "HSA_AGENT_FEATURE_DISPATCH",
                                   "HSA_AGENT_FEATURE_AGENT_DISPATCH"};
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_FEATURE, feature_strings[prop.feature], "");

  const char* model_strings[] = {"HSA_MACHINE_MODEL_SMALL",
                                 "HSA_MACHINE_MODEL_LARGE"};
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_MACHINE_MODEL,
                  model_strings[prop.machine_model], "");

  const char* profile_strings[] = {"HSA_PROFILE_BASE", "HSA_PROFILE_FULL"};
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_PROFILE, profile_strings[prop.profile], "");

  const char* default_float_rounding_strings[] = {
      "HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT",
      "HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO",
      "HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR"};
  PRINT_ATTRIBUTE(
      HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE,
      default_float_rounding_strings[prop.default_float_rounding_mode], "");
  PRINT_ATTRIBUTE(
      HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES,
      default_float_rounding_strings[prop.base_profile_float_rounding_mode],
      "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_FAST_F16_OPERATION, prop.fast_f16_operation,
                  "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_WAVEFRONT_SIZE, prop.wavefront_size, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_WORKGROUP_MAX_DIM[0],
                  prop.workgroup_max_dim[0], "");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_WORKGROUP_MAX_DIM[1],
                  prop.workgroup_max_dim[1], "");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_WORKGROUP_MAX_DIM[2],
                  prop.workgroup_max_dim[2], "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, prop.workgroup_max_size,
                  "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_GRID_MAX_DIM.x, prop.grid_max_dim.x, "");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_GRID_MAX_DIM.y, prop.grid_max_dim.y, "");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_GRID_MAX_DIM.z, prop.grid_max_dim.z, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_GRID_MAX_SIZE, prop.grid_max_size, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, prop.fbarrier_max_size, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_QUEUES_MAX, prop.queue_max, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_QUEUE_MIN_SIZE, prop.queue_min_size, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_QUEUE_MAX_SIZE, prop.queue_max_size, "");

  const char* queue_type_strings[] = {"HSA_QUEUE_TYPE_MULTI",
                                      "HSA_QUEUE_TYPE_SINGLE"};
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_QUEUE_TYPE,
                  queue_type_strings[prop.queue_type], "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_NODE, prop.node, "");

  const char* device_type_strings[] = {
      "HSA_DEVICE_TYPE_CPU", "HSA_DEVICE_TYPE_GPU", "HSA_DEVICE_TYPE_DSP"};
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_DEVICE, device_type_strings[prop.device_type],
                  "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_CACHE_SIZE[0], prop.cache_size[0], "bytes");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_CACHE_SIZE[1], prop.cache_size[1], "bytes");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_CACHE_SIZE[2], prop.cache_size[2], "bytes");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_CACHE_SIZE[3], prop.cache_size[3], "bytes");

  std::string extensions = "";
  extensions += (prop.extensions[HSA_EXTENSION_FINALIZER])
                    ? "HSA_EXTENSION_FINALIZER | "
                    : "";
  extensions +=
      (prop.extensions[HSA_EXTENSION_IMAGES]) ? "HSA_EXTENSION_IMAGES | " : "";
  extensions += (prop.extensions[HSA_EXTENSION_AMD_PROFILER])
                    ? "HSA_EXTENSION_AMD_PROFILER "
                    : "";
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_EXTENSIONS, extensions, "");

  PRINT_ATTRIBUTE(HSA_AGENT_INFO_VERSION_MAJOR, prop.version_major, "");
  PRINT_ATTRIBUTE(HSA_AGENT_INFO_VERSION_MINOR, prop.version_minor, "");
}

void HsaTest::PrintPeers(hsa_agent_t agent) {
  std::cout << "Peer GPUs: ";
  std::vector<hsa_agent_t> gpu_peers;
  GetGpuPeer(agent, gpu_peers);
  if (gpu_peers.size() > 0) {
    for (hsa_agent_t peer_agent : gpu_peers) {
      // Get the index of the peer in gpus_.
      size_t peer_idx = 0;
      for (; peer_idx < gpus_.size(); ++peer_idx) {
        if (peer_agent.handle == gpus_[peer_idx].handle) {
          std::cout << "GPU[" << peer_idx << "] ";
          break;
        }
      }
    }
    std::cout << std::endl;
  } else {
    std::cout << "No peer GPUs\n";
  }
}

void HsaTest::PrintPoolInfo(PoolProps& prop) {
  const char* segment_strings[] = {
      "HSA_SEGMENT_GLOBAL", "HSA_AMD_SEGMENT_READONLY",
      "HSA_AMD_SEGMENT_PRIVATE", "HSA_AMD_SEGMENT_GROUP"};
  PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
                  segment_strings[prop.segment], "");

  std::string global_flag = "";
  global_flag +=
      (prop.global_flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT)
          ? "HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT | "
          : "";
  global_flag +=
      (prop.global_flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED)
          ? "HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED | "
          : "";
  global_flag +=
      (prop.global_flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)
          ? "HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED "
          : "";
  PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, global_flag, "");

  static const size_t kMb = 1024 * 1024;
  if (prop.size >= kMb) {
    PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_SIZE, ToMB(prop.size), "MB");
  } else {
    PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_SIZE, prop.size, "bytes");
  }

  PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
                  prop.alloc_allowed, "");
  PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
                  prop.alloc_granule, "bytes");
  PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
                  prop.alloc_alignment, "bytes");
  PRINT_ATTRIBUTE(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL,
                  prop.all_accessible, "");
}


================================================
FILE: samples/common/hsa_test.h
================================================
#ifndef HSA_TEST_H
#define HSA_TEST_H

#include <map>
#include <string>
#include <vector>

#include "hsa.h"
#include "hsa_ext_amd.h"
#include "hsa_ext_finalize.h"

#include "HSAILTool.h"

class HsaTest {
 public:
  HsaTest(const char* test_name);
  virtual ~HsaTest();

  void Init();
  void Cleanup();

  virtual void Run() = 0;

 protected:
  static hsa_status_t IterateAgents(hsa_agent_t agent, void* data);
  static hsa_status_t IteratePools(hsa_amd_memory_pool_t pool, void* data);

  typedef struct AgentProps {
    AgentProps(hsa_agent_t);

    char name[64];
    char vendor_name[64];
    hsa_agent_feature_t feature;
    hsa_machine_model_t machine_model;
    hsa_profile_t profile;
    hsa_default_float_rounding_mode_t default_float_rounding_mode;
    hsa_default_float_rounding_mode_t base_profile_float_rounding_mode;
    bool fast_f16_operation;
    uint32_t wavefront_size;
    uint16_t workgroup_max_dim[3];
    uint32_t workgroup_max_size;
    hsa_dim3_t grid_max_dim;
    uint32_t grid_max_size;
    uint32_t fbarrier_max_size;
    uint32_t queue_max;
    uint32_t queue_min_size;
    uint32_t queue_max_size;
    hsa_queue_type_t queue_type;
    uint32_t node;
    hsa_device_type_t device_type;
    uint32_t cache_size[4];
    hsa_isa_t isa;
    uint8_t extensions[128];
    uint16_t version_major;
    uint16_t version_minor;
  } AgentProps;

  typedef struct PoolProps {
    PoolProps(hsa_amd_memory_pool_t pool);

    hsa_amd_segment_t segment;
    hsa_amd_memory_pool_global_flag_t global_flag;
    size_t size;
    bool alloc_allowed;
    size_t alloc_granule;
    size_t alloc_alignment;
    bool all_accessible;
  } PoolProps;

  class Kernel {
   public:
    Kernel(hsa_agent_t agent, std::string hsail_file);

    virtual ~Kernel();

    uint64_t GetCodeHandle(const char* kernel_name);
    hsa_status_t GetScratchSize(uint32_t* size);

   protected:
    virtual void Initialize();

    virtual void Cleanup();

    bool CreateProgramFromHsailFile();

    bool CreateCodeObjectAndExecutable();

    HSAIL_ASM::Tool tool_;

    hsa_agent_t agent_;
    hsa_profile_t profile_;

    hsa_ext_program_t program_;
    hsa_code_object_t code_object_;
    hsa_executable_t executable_;
    hsa_executable_symbol_t kernel_symbol_;

    std::string hsail_file_;
  };

  virtual void GetGpuPeer(hsa_agent_t master,
                          std::vector<hsa_agent_t>& gpu_peers);
  virtual void* AllocateSystemMemory(bool fine_grain, size_t size);
  virtual void* AllocateLocalMemory(hsa_agent_t agent, size_t size);
  virtual void FreeMemory(void* ptr);

  virtual void LaunchPacket(hsa_queue_t& queue, hsa_packet_type_t type,
                            void* packet);

  virtual void PrintAgentInfo(AgentProps& prop);
  virtual void PrintPeers(hsa_agent_t agent);
  virtual void PrintPoolInfo(PoolProps& prop);

  std::string test_name_;

  std::vector<hsa_agent_t> cpus_;
  std::vector<hsa_agent_t> gpus_;

  std::map<uint64_t, hsa_amd_memory_pool_t> global_fine_;
  std::map<uint64_t, hsa_amd_memory_pool_t> global_coarse_;
  std::map<uint64_t, hsa_amd_memory_pool_t> group_;
};

#endif  // HSA_TEST_H


================================================
FILE: samples/common/hsatimer.cpp
================================================
#include "hsatimer.h"

PerfTimer::PerfTimer()
{
    freq_in_100mhz = MeasureTSCFreqHz();
}

PerfTimer::~PerfTimer()
{
	while(!_timers.empty())
	{
		Timer *temp = _timers.back();
		_timers.pop_back();
		delete temp;
	}
}

//a new cretaed timer instantance index will be returned
int PerfTimer::CreateTimer()
{
    Timer *newTimer = new Timer;
	newTimer->_start = 0;
	newTimer->_clocks = 0;

#ifdef _WIN32
    QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->_freq);       
#else
	newTimer->_freq = (long long)1.0E3;
#endif

	/* Push back the address of new Timer instance created */
	_timers.push_back(newTimer);
	return (int)(_timers.size() - 1);
}

int PerfTimer::StartTimer(int index)
{
	if(index >= (int)_timers.size())
	{
		Error("Cannot reset timer. Invalid handle.");
		return HSA_FAILURE;
	}
	
#ifdef _WIN32
        // General Windows timing method
       #ifndef _AMD
	long long tmpStart;
	QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
	_timers[index]->_start = (double)tmpStart;
       #else
       // AMD Windows timing method      

       #endif
	   
#else
       // General Linux timing method
      #ifndef _AMD
	struct timeval s;
	gettimeofday(&s, 0);
	_timers[index]->_start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3)); 
       #else

       // AMD timing method

	unsigned int unused;
	_timers[index]->_start = __rdtscp(&unused);

       #endif
	   
#endif

	return HSA_SUCCESS;
}


int PerfTimer::StopTimer(int index)
{
	double n=0;
	if(index >= (int)_timers.size())
	{
		Error("Cannot reset timer. Invalid handle.");
		return HSA_FAILURE;
	}
#ifdef _WIN32
       #ifndef _AMD
	long long n1;
	QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
	n = (double) n1;
	#else
	
        // AMD Window Timing
        
	#endif
	
#else
        // General Linux timing method
        #ifndef _AMD
	struct timeval s;
	gettimeofday(&s, 0);
	n = s.tv_sec * 1.0E3+ (double)(s.tv_usec/1.0E3);
	#else
       // AMD Linux timing

	unsigned int unused;
	n = __rdtscp(&unused);
	#endif
	
#endif

	n -= _timers[index]->_start;
	_timers[index]->_start = 0;

	#ifndef _AMD
	_timers[index]->_clocks += n;
	#else
        //_timers[index]->_clocks += 10 * n /freq_in_100mhz;      // unit is ns
	_timers[index]->_clocks += 1.0E-6 * 10  * n /freq_in_100mhz;  // convert to ms
	cout << "_AMD is enabled!!!" << endl;
	#endif
	
	return HSA_SUCCESS;
}

void PerfTimer::Error(string str)
{
    cout << str << endl;
}


double PerfTimer::ReadTimer(int index)
{

	if(index >= (int)_timers.size())
	{
		Error("Cannot read timer. Invalid handle.");
		return HSA_FAILURE;
	}
	
	double reading = double(_timers[index]->_clocks);
	
	reading = double(reading / _timers[index]->_freq);
	
	return reading;
}


uint64_t PerfTimer::CoarseTimestampUs() 
{
#ifdef _WIN32
	uint64_t freqHz, ticks;
	QueryPerformanceFrequency((LARGE_INTEGER *)&freqHz);
	QueryPerformanceCounter((LARGE_INTEGER *)&ticks);

	// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
	while (ticks > (1ULL << 44)) {
		ticks /= 16;
		freqHz /= 16;
	}

	return (ticks * 1000000) / freqHz;
#else
	struct timespec ts;
	clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 
	return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
#endif
}

uint64_t PerfTimer::MeasureTSCFreqHz() 
{
	// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
	unsigned int unused;
	uint64_t tscTicksEnd;

	uint64_t coarseBeginUs = CoarseTimestampUs();
	uint64_t tscTicksBegin = __rdtscp(&unused);
	do 
	{
		tscTicksEnd = __rdtscp(&unused);
	} 
	while (tscTicksEnd - tscTicksBegin < 1000000000);
	
	uint64_t coarseEndUs = CoarseTimestampUs();

	// Compute the TSC frequency and round to nearest 100MHz.
	uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
	uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
	return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}


================================================
FILE: samples/common/hsatimer.h
================================================
#ifndef __MYTIME__
#define __MYTIME__

// Will use AMD timer and general Linux timer based on users' need --> compilation flag
// need to consider platform is Windows or Linux

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;

#if defined(_MSC_VER)
  #include <time.h>
  #include <windows.h>
  #include <intrin.h>
#else
  #if defined(__GNUC__)
    #include <sys/time.h>
    #include <x86intrin.h>
  #endif // __GNUC__
#endif //_MSC_VER

#define HSA_FAILURE  1
#define HSA_SUCCESS 0

class PerfTimer {
	private:
		struct Timer
		{
			string name;          /* < name name of time object*/
			long long _freq;      /* < _freq frequency*/
			double _clocks;       /* < _clocks number of ticks at end*/
			double _start;        /* < _start start point ticks*/
		};

		std::vector<Timer*> _timers;  /*< _timers vector to Timer objects */
		double freq_in_100mhz;

	public:
		PerfTimer();
		~PerfTimer();

	private:
		//AMD timing method
		uint64_t CoarseTimestampUs();
		uint64_t MeasureTSCFreqHz();

		//General Linux timing method

	public:
		int CreateTimer();
		int StartTimer(int index);
		int StopTimer(int index);

	public:
		// retrieve time
		double ReadTimer(int index);
		// write into a file
		double WriteTimer(int index);

	public:
		void Error(string str);
};

#endif


================================================
FILE: samples/common/os.cpp
================================================
#ifdef _WIN32 // Compiling for Windows Platform

#include <stdlib.h>
#include <Windows.h>
#include "os.h"
#include <stdio.h>

void SetEnv(const char* env_var_name, const char* env_var_value) {
  BOOL err = SetEnvironmentVariable(env_var_name, env_var_value);
  if(FALSE == err){
	  printf("Set environment variable failed!\n");
	  exit(1);
  }
  return;
}

char* GetEnv(const char* env_var_name){
  char* buff;
  DWORD char_count = GetEnvironmentVariable(env_var_name, NULL, 0);
  if (char_count == 0) return NULL;
  buff = (char*)malloc(sizeof(char) * char_count);
  GetEnvironmentVariable(env_var_name, buff, char_count);
  buff[char_count - 1] = '\0';
  return buff;
}

#elif defined(__linux__)

#include "os.h"
#include <stdlib.h>

void SetEnv(const char* env_var_name, const char* env_var_value){
	int err = setenv(env_var_name, env_var_value, 1);
	if(0 != err){
		printf("Set environment variable failed!\n");
		exit(1);
	}
	return;
}

char* GetEnv(const char* env_var_name) {
  return getenv(env_var_name);
}

#endif


================================================
FILE: samples/common/os.h
================================================
#ifndef HSA_PERF_SRC_UTILS_OS_H_
#define HSA_PERF_SRC_UTILS_OS_H_

#include <stdio.h>

// Set envriroment variable
void SetEnv(const char* env_var_name, const char* env_var_value);

// Get the value of enviroment
char* GetEnv(const char* env_var_name);

#endif


================================================
FILE: samples/common/utilities.cpp
================================================
#include "utilities.h"

/*
 * Prints no more than 256 elements of the given array.
 * Prints full array if length is less than 256.
 * Prints Array name followed by elements.
 */
template<typename T> 
void PrintArray(
    string header, 
    const T * data, 
    const int width,
    const int height) 
{
    cout<<"\n"<<header<<"\n";
    for(int i = 0; i < height; i++)
    {
        for(int j = 0; j < width; j++)
        {
            cout<<data[i*width+j]<<" ";
        }
        cout<<"\n";
    }
    cout<<"\n";
}

template<typename T>
int IsPowerOf2(T val)
{
    long long _val = val;
    if((_val & (-_val))-_val == 0 && _val != 0)
        return 0;
    else
        return -1;
}


template<typename T>
T RoundToPowerOf2(T val)
{
    int bytes = sizeof(T);

    val--;
    for(int i = 0; i < bytes; i++)
        val |= val >> (1<<i);  
    val++;

    return val;
}

template<typename T> 
int FillRandom(
         T * arrayPtr, 
         const int width,
         const int height,
         const T rangeMin,
         const T rangeMax,
         unsigned int seed)
{
    if(!arrayPtr)
    {
        printf("Cannot fill array. NULL pointer.");
        return -1;
    }

    if(!seed)
        seed = (unsigned int)time(NULL);

    srand(seed);
    double range = double(rangeMax - rangeMin) + 1.0; 

    /* random initialisation of input */
    for(int i = 0; i < height; i++)
        for(int j = 0; j < width; j++)
        {
            int index = i*width + j;
            arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); 
        }

    return 0;
}

#if 0
//get a memory region that can be used for global memory allocations.
hsa_status_t get_global_region(hsa_region_t region, void* data) 
{
	hsa_region_segment_t segment;
	hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment);
	if (HSA_REGION_SEGMENT_GLOBAL == segment) 
	{
		hsa_region_t* ret = (hsa_region_t*) data;
		*ret = region;
	}
	return HSA_STATUS_SUCCESS;
}


/*
 * Finds the specified symbols offset in the specified brig_module.
 * If the symbol is found the function returns HSA_STATUS_SUCCESS, 
 * otherwise it returns HSA_STATUS_ERROR.
 */
hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, 
		char* symbol_name,
		hsa_ext_brig_code_section_offset32_t* offset) 
{

	/*  
	 * Get the data section 
	 */
	hsa_ext_brig_section_header_t* data_section_header = 
		brig_module->section[HSA_EXT_BRIG_SECTION_DATA];
	/*  
	 * Get the code section
	 */
	hsa_ext_brig_section_header_t* code_section_header =
		brig_module->section[HSA_EXT_BRIG_SECTION_CODE];

	/*  
	 * First entry into the BRIG code section
	 */
	BrigCodeOffset32_t code_offset = code_section_header->header_byte_count;
	BrigBase* code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
	while (code_offset != code_section_header->byte_count) 
	{
		if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) 
		{
			/* 
			 * Now find the data in the data section
			 */
			BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry);
			BrigDataOffsetString32_t data_name_offset = directive_kernel->name;
			BrigData* data_entry = (BrigData*)((char*) data_section_header + data_name_offset);
			if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) 
			{
				*offset = code_offset;
				return HSA_STATUS_SUCCESS;
			}
		}
		code_offset += code_entry->byteCount;
		code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
	}   
	return HSA_STATUS_ERROR;
}
#endif

/*
 * Determines if the given agent is of type HSA_DEVICE_TYPE_GPU
 * and sets the value of data to the agent handle if it is.
 */
hsa_status_t find_gpu(hsa_agent_t agent, void *data) 
{
	if (data == NULL) 
	{
		return HSA_STATUS_ERROR_INVALID_ARGUMENT;
	}   
	hsa_device_type_t device_type;
	hsa_status_t stat = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
	if (stat != HSA_STATUS_SUCCESS) 
	{
		return stat;
	}   
	if (device_type == HSA_DEVICE_TYPE_GPU) 
	{
		*((hsa_agent_t *)data) = agent;
	}   
	return HSA_STATUS_SUCCESS;
}


/*
 * Determines if a memory region can be used for kernarg
 * allocations.
 */
hsa_status_t get_memory_region(hsa_region_t region, void* data) 
{
	hsa_region_global_flag_t flags;
	hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);

	MemRegion *my_mem_region = (MemRegion *)data;
	
	if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
             my_mem_region->coarse_region = region;
       }
	
	if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) 
	{
		my_mem_region->kernarg_region= region;
	}   
	
	return HSA_STATUS_SUCCESS;
}


================================================
FILE: samples/common/utilities.h
================================================
#ifndef __HSA_UTILITY__
#define __HSA_UTILITY__

#include <vector>
#include <thread>

#include "hsa.h"
#include "hsa_ext_finalize.h"

#include <string.h>
#include<iostream>
using namespace std;


#define HSA_ARGUMENT_ALIGN_BYTES 16

#if defined(_MSC_VER)
  #define ALIGNED_(x) __declspec(align(x))

#pragma warning(disable: 4800)
#pragma warning(disable: 4305) // truncation from 'double' to 'const float'
#pragma warning(disable: 4267) // conversion from 'size_t' to 'int', possible loss of data

typedef unsigned int uint;

#else
  #if defined(__GNUC__)
    #define ALIGNED_(x) __attribute__ ((aligned(x)))
  #endif // __GNUC__
#endif // _MSC_VER

#define SDK_FAILURE 1
#define SDK_SUCCESS 0

/*
#define check(msg, status) \
if (status != HSA_STATUS_SUCCESS) { \
	printf("%s failed.\n", #msg); \
	exit(1); \
} else { \
	printf("%s succeeded.\n", #msg); \
}
*/
#define check(msg, status) \
if (status != HSA_STATUS_SUCCESS) { \
	printf("%s failed.\n", #msg); \
	exit(1); \
} else { \
	; \
}

/*
 * Define required BRIG data structures.
 */

typedef uint32_t BrigCodeOffset32_t;

typedef uint32_t BrigDataOffset32_t;

typedef uint16_t BrigKinds16_t;

typedef uint8_t BrigLinkage8_t;

typedef uint8_t BrigExecutableModifier8_t;

typedef BrigDataOffset32_t BrigDataOffsetString32_t;

typedef struct {
  // memory region accessed by GPU only
  hsa_region_t coarse_region;

  // system memory access by gpu and cpu
  hsa_region_t kernarg_region;

} MemRegion;


/*
enum BrigKinds {
	BRIG_KIND_NONE = 0x0000,
	BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
	BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
};

typedef struct BrigBase BrigBase;
struct BrigBase {
	uint16_t byteCount;
	BrigKinds16_t kind;
};

typedef struct BrigExecutableModifier BrigExecutableModifier;
struct BrigExecutableModifier {
	BrigExecutableModifier8_t allBits;
};

typedef struct BrigDirectiveExecutable BrigDirectiveExecutable;
struct BrigDirectiveExecutable {
	uint16_t byteCount;
	BrigKinds16_t kind;
	BrigDataOffsetString32_t name;
	uint16_t outArgCount;
	uint16_t inArgCount;
	BrigCodeOffset32_t firstInArg;
	BrigCodeOffset32_t firstCodeBlockEntry;
	BrigCodeOffset32_t nextModuleEntry;
	uint32_t codeBlockEntryCount;
	BrigExecutableModifier modifier;
	BrigLinkage8_t linkage;
	uint16_t reserved;
};

typedef struct BrigData BrigData;
struct BrigData {
	uint32_t byteCount;
	uint8_t bytes[1];
};
*/

struct float2
{
    float s0;
    float s1;


    float2 operator * (float2 &fl)
    {
        float2 temp;
        temp.s0 = (this->s0) * fl.s0;
        temp.s1 = (this->s1) * fl.s1;
        return temp;
    }

    float2 operator * (float scalar)
    {
        float2 temp;
        temp.s0 = (this->s0) * scalar;
        temp.s1 = (this->s1) * scalar;
        return temp;
    }

    float2 operator + (float2 &fl)
    {
        float2 temp;
        temp.s0 = (this->s0) + fl.s0;
        temp.s1 = (this->s1) + fl.s1;
        return temp;
    }
    
    float2 operator - (float2 fl)
    {
        float2 temp;
        temp.s0 = (this->s0) - fl.s0;
        temp.s1 = (this->s1) - fl.s1;
        return temp;
    }
};


struct uint2
{
    uint s0;
    uint s1;


    uint2 operator * (uint2 &fl)
    {
        uint2 temp;
        temp.s0 = (this->s0) * fl.s0;
        temp.s1 = (this->s1) * fl.s1;
        return temp;
    }

    uint2 operator * (float scalar)
    {
        uint2 temp;
        temp.s0 = (this->s0) * scalar;
        temp.s1 = (this->s1) * scalar;
        return temp;
    }

    uint2 operator + (uint2 &fl)
    {
        uint2 temp;
        temp.s0 = (this->s0) + fl.s0;
        temp.s1 = (this->s1) + fl.s1;
        return temp;
    }
    
    uint2 operator - (uint2 fl)
    {
        uint2 temp;
        temp.s0 = (this->s0) - fl.s0;
        temp.s1 = (this->s1) - fl.s1;
        return temp;
    }
};


/*
 * Prints no more than 256 elements of the given array.
 * Prints full array if length is less than 256.
 * Prints Array name followed by elements.
 */
template<typename T> void PrintArray(string header, const T * data, const int width, const int height);

template<typename T> int IsPowerOf2(T val);

template<typename T> T RoundToPowerOf2(T val);

template<typename T> int FillRandom(T * arrayPtr, const int width, const int height, const T rangeMin, const T rangeMax, unsigned int seed=123);

//get a memory region that can be used for global memory allocations.
hsa_status_t get_global_region(hsa_region_t region, void* data); 

/*
 * Finds the specified symbols offset in the specified brig_module.
 * If the symbol is found the function returns HSA_STATUS_SUCCESS, 
 * otherwise it returns HSA_STATUS_ERROR.
 */
 
//hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, char* symbol_name, hsa_ext_brig_code_section_offset32_t* offset);

/*
 * Determines if the given agent is of type HSA_DEVICE_TYPE_GPU
 * and sets the value of data to the agent handle if it is.
 */
hsa_status_t find_gpu(hsa_agent_t agent, void *data);

/*
 * Determines if a memory region can be used for kernarg
 * allocations.
 */
hsa_status_t get_memory_region(hsa_region_t region, void* data);

#endif